In [2]:
#load libraries and APIs
import time
start = time.time() # start time
import pandas as pd
import geopandas as gpd
import numpy as np
import shapely
from shapely.geometry import Point, Polygon
import fiona
import folium
import os
import math
In [3]:
# Path to data
Project_folder = "Sample_Files"
sample_points = os.path.join(Project_folder, "eMerges 21 Test Longitudes and Lattitude Records for UpWork.csv")
dist_cong = os.path.join(Project_folder, "Maryland_Election_Boundaries__US_Congressional_Districts_2011.shp")
dist_legi = os.path.join(Project_folder, "Maryland_Election_Boundaries__Maryland_Legislative_Districts_2012.shp")
output_folder = 'data'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
In [4]:
#import data
## Read Sample Points
df = pd.read_csv(sample_points)
### Change datatype of conflicting data types
for column in df:
    if df[column].dtype == 'bool':
        df[column] = df[column].astype('str')
## Change dataframe to geospatial
geometry = [Point(xy) for xy in zip(df.Mailing_Addresses_Longitude, df.Mailing_Addresses_Latitude)]
crs = {'init': 'epsg:4326'}
geo_df = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
## Read Spatial Data
dist_cong_df = gpd.read_file(dist_cong)
dist_legi_df = gpd.read_file(dist_legi)
In [7]:
dist_cong_df.plot()
Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x2415f6fbb38>
In [6]:
dist_legi_df.plot()
Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x2415f56db70>
In [50]:
# Spatial Joins for the Data
temp_df = gpd.sjoin(geo_df, dist_cong_df, how="inner", op='intersects')
temp_df['Congressional District#'] = temp_df['DISTRICT']
geo_df = temp_df[df.columns]
temp_df = gpd.sjoin(geo_df, dist_legi_df, how="inner", op='intersects')
temp_df['Legislative District#'] = temp_df['DISTRICT']
geo_df = temp_df[df.columns]
# Drop Geometry Column
df = geo_df.drop(columns=['geometry'])
In [8]:
geo_df.plot()
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x2415f949c18>
In [51]:
# Write output files
outcsv = os.path.join(output_folder, "Sample_Points_Joined.csv")
df.to_csv(outcsv, encoding='utf-8', index=False)
In [52]:
df
Out[52]:
Voters_StateVoterID Mailing_Addresses_Latitude Mailing_Addresses_Longitude Congressional District# Legislative District#
0 3596440 39.690390 -78.396180 06 1C
1 3596431 39.690390 -78.396180 06 1C
2 500590677 39.690390 -78.396180 06 1C
3 500729045 39.690390 -78.396180 06 1C
4 3596500 39.691260 -78.396800 06 1C
5 3596503 39.691260 -78.396800 06 1C
6 3596098 39.693230 -78.394710 06 1C
7 501650184 39.693230 -78.394710 06 1C
8 501124085 39.693230 -78.394710 06 1C
9 3596099 39.693230 -78.394710 06 1C
10 3596132 39.625670 -78.390240 06 1C
11 750074813 39.623838 -78.392240 06 1C
12 1272723 39.622340 -78.393140 06 1C
13 985977 39.621584 -78.392492 06 1C
14 3623638 39.621170 -78.391800 06 1C
15 3623637 39.621170 -78.391800 06 1C
16 3633818 39.619820 -78.393670 06 1C
17 3140850 39.705870 -78.353200 06 1C
18 500710183 39.708450 -78.351670 06 1C
19 3596575 39.708450 -78.351670 06 1C
In [ ]: