Commit e727d249 authored by wx002's avatar wx002

clean repo

parent f3db7a5e
This source diff could not be displayed because it is too large. You can view the blob instead.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# All of that stuff above is unnecccccessary\n",
"## we just need to do this \\/"
]
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"from urllib.request import urlopen\n",
"import pandas as pd\n",
"import geocoder\n",
"import json\n",
"import re\n",
"\n",
"API_KEY = \"AIzaSyCJkjXe-dVvbhcxfdwrsMzXbn2qmzZ0sJM\"\n",
"API_KEY2 = \"AIzaSyCRaRfZZvRUAGTz6EsRpii5AJMPDrEEklo\"\n",
"API_KEY3 = 'AIzaSyC3bTGeppurTHGv1Z5jF7SEMAJSBdx4RgA'"
"import gmplot"
]
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def get_loc_json(lat, long, api_key):\n",
" url_suffix = \"https://maps.googleapis.com/maps/api/geocode/json?latlng=\"\n",
" url_end = \"&key=\" + api_key\n",
" coord = str(lat) + ','+str(long)\n",
" full_url = url_suffix + coord + url_end\n",
" print(full_url)\n",
" resp = urlopen(full_url)\n",
" data = resp.read().decode('utf-8')\n",
" return json.loads(data)"
"dataset = pd.read_csv('Datasets/Brightkite.csv', index_col=1)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 14,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"Index(['UserID', 'Latitude', 'Longitude', 'PlaceID'], dtype='object')"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"class user:\n",
" def __init__(self, ID):\n",
" \"\"\"\n",
" ID is user ID\n",
" t is time that a measurement was taken\n",
" \"\"\"\n",
" self.ID = ID\n",
" self.times = []\n",
" self.lats = []\n",
" self.longs = []\n",
" self.places = [] #later on we can connect this to a dictionary and then we won't have to look up every location we are given\n",
"\n",
" def __str__(self):\n",
" return 'User ID: {} Created On: {} Places Visted: {}'.format(self.ID, self.times[0], len(self.places))\n",
" \n",
" def add_location(self, time, lat, long, place):\n",
" \"\"\"\n",
" Adds new values to user location dataset\n",
" \"\"\"\n",
" self.times.append(time)\n",
" self.lats.append(lat)\n",
" self.longs.append(long)\n",
" self.places.append(place)"
"dataset.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"array([ 0, 1, 2, ..., 2964, 2965, 2966], dtype=int64)"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"class data_process:\n",
"\n",
" def __init__(self):\n",
" pass\n",
"\n",
" def data_reader(self, filename):\n",
" file = open(filename)\n",
" lines = file.readlines()\n",
" return lines\n",
"\n",
" def parseLine(self, line, regex, split):\n",
" lineList = re.split(regex, line, maxsplit = split)\n",
" return int(lineList[0]), str(lineList[1]), float(lineList[2]), float(lineList[3]), str(lineList[4])\n",
"\n",
" def create_user(self, ID):\n",
" '''\n",
" 0 - user ID\n",
" 1 - time\n",
" 2 - lat\n",
" 3 - long\n",
" 4 - place ID\n",
" '''\n",
" return user(int(ID))\n",
"\n",
" def create_user_set(self, regex, datafile):\n",
" user_set = {}\n",
" lines = self.data_reader(datafile)\n",
" for line in lines:\n",
" ID, time, lat, long, place = self.parseLine(line, regex, 5)\n",
" if ID not in user_set:\n",
" u = self.create_user(ID)\n",
" u.add_location(time, lat, long, place)\n",
" user_set[u.ID] = u\n",
" print(user_set[u.ID])\n",
" else:\n",
" user_set[u.ID].add_location(time, lat, long, place)\n",
"\n",
" return user_set"
"dataset['UserID'].unique()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"p = data_process()\n",
"bright_kite = p.create_user_set(r'\\t+', 'Datasets/Brightkite.txt')\n",
"print(a)"
"key = 'AiEfap-qUoZalL1qK8ollM-SwVdoJFemh60tHo0EeraVYP8V4WPJXAVD2YjqzgA1'\n",
"#geo = geocoder.bing([45.15,-75.14], method='reverse', key = key)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dict_keys(['address', 'bbox', 'city', 'confidence', 'country', 'lat', 'lng', 'ok', 'postal', 'quality', 'raw', 'state', 'status', 'street'])\n"
]
}
],
"source": [
"dic = geo.json\n",
"print(dic.keys())\n",
"test = gmplot.GoogleMapPlotter(30.3164945, \n",
" 78.03219179999999, 13)\n",
"test.draw('testmap.html')"
]
}
],
"metadata": {
......@@ -144,7 +117,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
"version": "3.7.1"
}
},
"nbformat": 4,
......
from urllib.request import urlopen
import json
import re
API_KEY = "AIzaSyCJkjXe-dVvbhcxfdwrsMzXbn2qmzZ0sJM"
API_KEY2 = "AIzaSyCRaRfZZvRUAGTz6EsRpii5AJMPDrEEklo"
API_KEY3 = 'AIzaSyC3bTGeppurTHGv1Z5jF7SEMAJSBdx4RgA'
def get_loc_json(lat, long, api_key):
url_suffix = "https://maps.googleapis.com/maps/api/geocode/json?latlng="
url_end = "&key=" + api_key
coord = str(lat) + ','+str(long)
full_url = url_suffix + coord + url_end
print(full_url)
resp = urlopen(full_url)
data = resp.read().decode('utf-8')
return json.loads(data)
class user:
def __init__(self, ID, time, lat, long):
self.ID = ID
self.t = time
self.lat = lat
self.long = long
def __str__(self):
return 'ID: {} time: {} lat: {} long: {}'.format(self.ID, self.time, self.lat, self.long)
@property
def id(self):
return self.id
@property
def t(self):
return self.t
@property
def coordinates(self):
return (self.lat, self.long)
class data_process:
def __init__(self):
pass
def data_reader(self, filename):
file = open(filename)
lines = file.readlines()
return lines
def parseLine(self, line, regex, split):
lineList = re.split(regex, line, maxsplit = split)
return lineList
def create_user(self, lineList):
'''
0 - user ID
1 - time
2 - lat
3 - long
4 - place ID
'''
if len(lineList) >= 4:
print(lineList)
print(type(lineList[1]))
return user(int(lineList[0]), str(lineList[1]), float(lineList[2]), float(lineList[3]))
def create_user_set(self, regex, datafile):
user_set = {}
lines = self.data_reader(datafile)
for line in lines:
lineList = self.parseLine(line, regex, 4)
u = self.create_user(lineList)
if u.id not in user_set:
user_set[u.id] = []
else:
user_set[u.id].append(u)
return user_set
p = data_process()
bright_kite = p.create_user_set(r'\t+', 'Datasets/Brightkite.txt')
a = user(0, 'aaaa', 40, 100.000005)
print(a)
This diff is collapsed.
import pandas as pd
import numpy as np
import json
import geocoder
import time
# import folium
f = open('Datasets/phone_history.json')
file_str = f.read()
phone_data = json.loads(file_str)['locations']
# process into a normal txt
phone_data_new = open('Datasets/phone_data.txt', 'w+')
phone_data_new.write('Dates\tLat\tLong\tAccuracy\n')
for d in phone_data:
time = str(pd.to_datetime(d['timestampMs'], unit='ms'))
lat = int(d['latitudeE7'])/(10**7)
lon = int(d['longitudeE7'])/(10**7)
acc = d['accuracy']
line = '{}\t{}\t{}\t{}\n'.format(time,lat,lon,acc)
phone_data_new.write(line)
phone_data_new.close()
print('finsh making file!')
'''
phone_df = pd.read_csv('Datasets/phone_data.txt', sep='\t')
#coordinates = pd.to_numeric(list(phone_df['coordinates']))
latList = phone_df['Lat']
longList = phone_df['Long']
coordinates = [list(a) for a in list(zip(latList, longList))]
#print(coordinates)
# build unique coordinates
def get_unique_cord(cord_list):
unique = []
for cord in cord_list:
if cord not in unique:
unique.append(cord)
return unique
# bing api below --------------------------------------------------------
bing_key = 'AiEfap-qUoZalL1qK8ollM-SwVdoJFemh60tHo0EeraVYP8V4WPJXAVD2YjqzgA1'
#g = geocoder.bing(coordinates[0], method = 'reverse', key = bing_key)
addr_file = open('Datasets/phone_address.txt', 'w+')
unique = get_unique_cord(coordinates)
coord_dic = {}
for i in range(len(unique)):
try:
g = geocoder.bing(unique[i], method = 'reverse', key=bing_key)
for r in g:
line_str = r.address + ',' + r.city + ',' + r.country + '\n'
addr_file.write(str(unique[i]) + ' : ' + line_str)
print('{} : {}'.format(unique[i], line_str))
time.sleep(1)
except Exception as ex:
print('current Index: {} '.format(i))
if hasattr(ex, 'message'):
print(ex.message)
else:
print(ex)
break
addr_file.close()
print('finish address!')
'''
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment