In [1]:
#load libraries and APIs
import time
start = time.time() # start time
import pandas as pd
import geopandas as gpd
import numpy as np
import shapely
from shapely.geometry import Point, Polygon
import fiona
import folium
import os
import math
In [2]:
#import data
df = pd.read_csv('1_jan_16-31_aug_16.csv')

#Change datatype of conflicting data types
for column in df:
    if df[column].dtype == 'bool':
        df[column] = df[column].astype('str')
In [3]:
#Change dataframe to geospatial and check by exporting to shp
geometry = [Point(xy) for xy in zip(df.Longitude, df.Latitude)]
crs = {'init': 'epsg:4326'}
geo_df = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
#geo_df.to_file(driver='ESRI Shapefile', filename='data.shp')
#geo_df.to_file(driver='GeoJSON', filename='data.json')
In [4]:
#create dummy variables
''' 
# my way of doing it
dummy_vars = geo_df['Ownership'].unique()
for var in dummy_vars:
    geo_df['Ownership is '+var] = geo_df['Ownership'].map({ var:1 }).fillna(0).astype(int)
'''
dummy_df = pd.get_dummies(geo_df['Ownership'])
#dummy_df = dummy_df.add_prefix('Ownership is')
geo_df = pd.concat([geo_df, dummy_df], axis=1, join_axes=[geo_df.index])
In [5]:
#Create All the Maps for Various Categories
dummy_vars = geo_df['Ownership'].unique()
bin_labels = ["verylow", "low", "average", "high", "veryhigh"]
bin_count = len(bin_labels)
style_map = {}
for i, val in enumerate(bin_labels):
    style_map[val] = i+1
'''
for var in dummy_vars: # to conserve variables
    print(var)
    var_geo_df = var+'_geo_df'
    var_bins = var+'_bins'
    bin_labels = ["verylow", "low", "average", "high", "veryhigh"]
    var_binned_geo_df = var+'_binned_geo_df'
    vars()[var_geo_df] = geo_df[(geo_df[var] == 1) & (geo_df[var] != 0)]
    vars()[var_bins] = pd.cut(vars()[var_geo_df]['ClosePrice'], 5).unique()
    vars()[var_binned_geo_df] = pd.cut(vars()[var_geo_df]['ClosePrice'], 5, labels=bin_labels)
    vars()[var_geo_df] = pd.concat([vars()[var_geo_df], dummy_df], axis=1, join_axes=[geo_df.index])
'''
for var in dummy_vars: # conserve memory
    #print(var)
    var_geo_df = geo_df[(geo_df[var] == 1) & (geo_df[var] != 0)]
    var_binned_geo_df = pd.DataFrame(pd.cut(var_geo_df['ClosePrice'], 5, labels=bin_labels))#.applymap(str)
    var_binned_geo_df.columns = ['category']
    var_geo_df = pd.concat([var_geo_df, var_binned_geo_df], axis=1, join_axes=[var_geo_df.index])
    var_geo_df['style'] = var_geo_df['category'].map(style_map).astype(int)
    map = folium.Map(location=[39.045753, -76.641273])
    for_map_df = var_geo_df.head(1000) #clip results
    for index, row in for_map_df.iterrows():
        fill_color = 'hsla('+str(int(100*row['style']/bin_count))+', 100%, 50%, 1)'
        #print(fill_color)
        folium.CircleMarker(
            location = [ row['Latitude'], row['Longitude'] ],
            radius= 5,
            color= '#ffffff',
            weight= 1,
            opacity= 0.8,
            fill= fill_color,
            fill_color= fill_color,
            fillOpacity= 0.8,
            fill_opacity= 0.8,
        ).add_to(map)
    bounds = [ [ var_geo_df['Latitude'].min(), var_geo_df['Longitude'].min() ], [ var_geo_df['Latitude'].max(), var_geo_df['Longitude'].max() ] ]
    map.fit_bounds(bounds, max_zoom=18)  
    var_map = var.replace(" ", "_")+"_map"
    map_export_path = os.path.join('results', var+'_folium.html') 
    vars()[var_map] = map    
    map.save(map_export_path)
    print( var_map, ' saved to ', map_export_path )
Ground_Rent_map  saved to  results\Ground Rent_folium.html
Fee_Simple_map  saved to  results\Fee Simple_folium.html
Condo_map  saved to  results\Condo_folium.html
Coop_map  saved to  results\Coop_folium.html
Rental_Apartment_map  saved to  results\Rental Apartment_folium.html
In [19]:
Ground_Rent_map
Out[19]:
In [20]:
Fee_Simple_map
Out[20]:
In [21]:
Condo_map
Out[21]:
In [22]:
Coop_map
Out[22]:
In [18]:
Rental_Apartment_map
Out[18]:
In [6]:
#Algorithm for Convex Hull around points
def split(u, v, points):
    # return points on left side of UV
    return [p for p in points if np.cross(p - u, v - u) < 0]

def extend(u, v, points):
    if not points:
        return []

    # find furthest point W, and split search to WV, UW
    w = min(points, key=lambda p: np.cross(p - u, v - u))
    p1, p2 = split(w, v, points), split(u, w, points)
    return extend(w, v, p1) + [w] + extend(u, w, p2)

def convex_hull(points):
    # find two hull points, U, V, and split to left and right search
    u = min(points, key=lambda p: p[0])
    v = max(points, key=lambda p: p[0])
    left, right = split(u, v, points), split(v, u, points)

    # find convex hull on each side
    return [v] + extend(u, v, left) + [u] + extend(v, u, right) + [v]
In [7]:
#Calculations and inputs to Polygons for Fishnet
points = geo_df[['Latitude', 'Longitude']].values
polygon = np.array(convex_hull(points))
bounds = Polygon(polygon).bounds
max_x = bounds[3]
max_y = bounds[2]
min_x = bounds[1]
min_y = bounds[0]
degree_radians = ((max_y + min_y)/2) * math.pi / 180
degree_length = degree_radians * 69.172
grid_area = .33 #0.33
grid_size = math.sqrt(grid_area)
dy = grid_size/degree_length
dx = grid_size/degree_length
nx = int(math.ceil(abs((max_x - min_x))/dx))
ny = int(math.ceil(abs((max_y - min_y))/dy))
In [8]:
#Generate Polygons for Fishnet
polygons_array = []
id=1
for i in range(ny):
    for j in range(nx):        
        #print(i,j)
        vertice_1 = [ min(min_x+j*dx, max_x), min(min_y+i*dy, max_y) ]
        vertice_2 = [ min(min_x+j*dx, max_x), min(min_y+(i+1)*dy, max_y) ]
        vertice_3 = [ min(min_x+(j+1)*dx, max_x), min(min_y+(i+1)*dy, max_y) ]
        vertice_4 = [ min(min_x+(j+1)*dx, max_x), min(min_y+i*dy, max_y) ]
        vertices = [ vertice_1, vertice_2, vertice_3, vertice_4 ]
        polygons_array.append(vertices)
        #geometry = Polygon([[p.x, p.y] for p in vertices])
        id+=1
In [9]:
#Create Fishnet GeoDataFrame
df = pd.DataFrame(np.array(range(1,id)), columns=['id'])
df['ClosePrice'] = 0
df['intersections'] = 0
geometry = [Polygon(p) for p in polygons_array]
crs = {'init': 'epsg:4326'}
fishnet_df = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
In [10]:
# Screen All polygons that are not needed
intersects_df = pd.DataFrame(fishnet_df.intersects(geo_df.geometry.unary_union), columns=['intersects'])
fishnet_df = pd.concat([fishnet_df, intersects_df], axis=1, join_axes=[fishnet_df.index])
fishnet_df = fishnet_df[fishnet_df['intersects']]
#fishnet_df = fishnet_df.drop(columns=['intersects'])
In [11]:
#Join Points with Fishnet Polygons
geo_df_clipped = geo_df.head(5000)
for index, row in fishnet_df.iterrows():
    for index1, row1 in geo_df_clipped.iterrows():
        if row.geometry.intersects(row1.geometry):
            row1['ClosePrice']
            fishnet_df.loc[index, 'ClosePrice'] = fishnet_df.loc[index, 'ClosePrice'] + row1['ClosePrice']
            fishnet_df.loc[index, 'intersections'] = fishnet_df.loc[index, 'intersections'] + 1
In [12]:
#Now we will average 'ClosePrice' to be used in Final map
fishnet_df['AvgClosePrice'] =  (fishnet_df['ClosePrice']/fishnet_df['intersections']).fillna(0)
var_binned_geo_df = pd.DataFrame(pd.cut(fishnet_df['AvgClosePrice'], 5, labels=bin_labels))#.applymap(str)
var_binned_geo_df.columns = ['category']
fishnet_df = pd.concat([fishnet_df, var_binned_geo_df], axis=1, join_axes=[fishnet_df.index])
fishnet_df['stylecat'] = fishnet_df['category'].map(style_map).astype(int)
In [13]:
#Prepare Data for Map

for_map_df = fishnet_df.head(2000) #clip results

'''
#Export to GeoJSON
for_geojson_df = for_map_df
#for_geojson_df = for_geojson_df.drop(columns=['style'])
#Change datatype of conflicting data types
for column in for_geojson_df:
    if for_geojson_df[column].dtype not in [ 'int32', 'int64', 'float32', 'float64' ,'object', 'str']:
        for_geojson_df[column] = for_geojson_df[column].astype('str')
#for_geojson_df.applymap(str).to_file(driver='GeoJSON', filename='data.json')
'''
Out[13]:
"\n#Export to GeoJSON\nfor_geojson_df = for_map_df\n#for_geojson_df = for_geojson_df.drop(columns=['style'])\n#Change datatype of conflicting data types\nfor column in for_geojson_df:\n    if for_geojson_df[column].dtype not in [ 'int32', 'int64', 'float32', 'float64' ,'object', 'str']:\n        for_geojson_df[column] = for_geojson_df[column].astype('str')\n#for_geojson_df.applymap(str).to_file(driver='GeoJSON', filename='data.json')\n"
In [14]:
map = folium.Map(location=[39.045753, -76.641273])
''' #clorepath example
map.choropleth(
    geo_data = for_map_df.to_json(),
    name='Polygons',
    data = for_map_df,    
    columns=['id','AvgClosePrice'],
    key_on='feature.properties.id',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name= 'AvgClosePrice'
)
'''
folium.GeoJson(
    for_map_df.to_json(),
    name='geojson',
    style_function= lambda x:{ 'color': '#ffffff', 'weight': 1,'opacity': 0.8, 'fillOpacity': 0.5, 'fillColor':'hsla('+str(int(100*x['properties']['stylecat']/bin_count))+', 100%, 50%, 1)' }
).add_to(map)

folium.LayerControl().add_to(map)

#set map to layer bounds
bounds = for_map_df.total_bounds
bounds = [ [ bounds[1], bounds[0] ] , [ bounds[3], bounds[2] ] ]
map.fit_bounds(bounds, max_zoom=18)  
map
Out[14]:
In [15]:
map_export_path = os.path.join('results', 'Fishnet_folium.html')
map.save(map_export_path)
print( 'Fishnet Map saved to ', map_export_path )
Fishnet Map saved to  results\Fishnet_folium.html
In [16]:
totaltime = time.time() - start
print(totaltime)
119.31781339645386