In [1]:
# Import Libraries
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
import lxml
import os
import glob
import time
import datetime
import json
import itertools
from fuzzywuzzy import process
from fuzzywuzzy import fuzz
C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\fuzzywuzzy\fuzz.py:35: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning
  warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
In [2]:
# Set Output Folder
output_folder = os.path.abspath("output")
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
In [3]:
# Import data folders
data_folder = os.path.abspath("data")
In [4]:
# Helping Functions
def remove_consecutive_duplicates(x):
    return ''.join(i for i, _ in itertools.groupby(x))
In [7]:
# Read Constituency Data
ac_gdf = None
ac_filepath = os.path.join(data_folder, "AC", "India_AC.shp")
ac_gdf = gpd.read_file(ac_filepath)
ac_gdf
Out[7]:
OBJECTID ST_CODE ST_NAME DT_CODE DIST_NAME AC_NO AC_NAME PC_NO PC_NAME PC_ID STATUS Shape_Leng Shape_Area geometry
0 1 13 NAGALAND 1.0 MON 41 Tizit 1 NAGALAND 1301 Pre delimitation 1.381854 0.055845 POLYGON ((94.9457548574008 26.93518316946773, ...
1 1 13 NAGALAND 1.0 MON 43 Tapi 1 NAGALAND 1301 Pre delimitation 1.056157 0.030387 POLYGON ((95.22324386371338 26.75963707718944,...
2 1 13 NAGALAND 1.0 MON 42 Wakching 1 NAGALAND 1301 Pre delimitation 0.980303 0.018828 POLYGON ((94.86775129009004 26.82831138606167,...
3 1 13 NAGALAND 2.0 TUENSANG 49 Tamlu 1 NAGALAND 1301 Pre delimitation 1.133296 0.021899 POLYGON ((94.73862639329349 26.76868496204139,...
4 1 13 NAGALAND 3.0 MOKOKCHUNG 21 Tuli 1 NAGALAND 1301 Pre delimitation 0.965989 0.022397 POLYGON ((94.73862639329349 26.76868496204139,...
5 1 13 NAGALAND 1.0 MON 44 Phomching 1 NAGALAND 1301 Pre delimitation 0.356973 0.007281 POLYGON ((95.20991815389976 26.72866549251853,...
6 1 13 NAGALAND 1.0 MON 46 Mon Town 1 NAGALAND 1301 Pre delimitation 0.492124 0.011957 POLYGON ((94.96949610236328 26.72373432897615,...
7 1 13 NAGALAND 1.0 MON 47 Aboi 1 NAGALAND 1301 Pre delimitation 0.664703 0.016869 POLYGON ((94.99003335425596 26.62768812660579,...
8 1 13 NAGALAND 3.0 MOKOKCHUNG 30 Alungtaki 1 NAGALAND 1301 Pre delimitation 1.111287 0.031180 POLYGON ((94.52707905005815 26.69958256688199,...
9 1 13 NAGALAND 1.0 MON 45 Tehok 1 NAGALAND 1301 Pre delimitation 0.580774 0.011101 POLYGON ((95.12053707632509 26.62556110945769,...
10 1 13 NAGALAND 2.0 TUENSANG 50 Longleng 1 NAGALAND 1301 Pre delimitation 0.725516 0.022976 POLYGON ((94.73277705341832 26.59490452672259,...
11 1 13 NAGALAND 3.0 MOKOKCHUNG 22 Arkakong 1 NAGALAND 1301 Pre delimitation 0.576670 0.016012 POLYGON ((94.73272983001772 26.59489452536212,...
12 1 13 NAGALAND 3.0 MOKOKCHUNG 29 Jangpetkong 1 NAGALAND 1301 Pre delimitation 0.790225 0.019501 POLYGON ((94.59166983429549 26.54138626746231,...
13 1 13 NAGALAND 1.0 MON 48 Moka 1 NAGALAND 1301 Pre delimitation 0.613600 0.013579 POLYGON ((95.11353174969054 26.53153183372717,...
14 1 13 NAGALAND 5.0 WOKHA 40 Bhandari 1 NAGALAND 1301 Pre delimitation 2.105271 0.078391 POLYGON ((94.22785725650726 26.50144543915576,...
15 1 13 NAGALAND 2.0 TUENSANG 51 Noksen 1 NAGALAND 1301 Pre delimitation 0.883926 0.020589 POLYGON ((94.68651101617053 26.5247634442776, ...
16 1 13 NAGALAND 2.0 TUENSANG 55 Tobu 1 NAGALAND 1301 Pre delimitation 0.675198 0.024041 POLYGON ((95.02478163826203 26.4871035152151, ...
17 1 13 NAGALAND 3.0 MOKOKCHUNG 24 Angetyongpang 1 NAGALAND 1301 Pre delimitation 0.680903 0.013847 POLYGON ((94.69043964607289 26.4755964063491, ...
18 1 13 NAGALAND 3.0 MOKOKCHUNG 23 Impur 1 NAGALAND 1301 Pre delimitation 0.449900 0.013073 POLYGON ((94.60467622003159 26.4561825093154, ...
19 1 13 NAGALAND 2.0 TUENSANG 56 Noklak 1 NAGALAND 1301 Pre delimitation 0.876690 0.032109 POLYGON ((95.10811841912982 26.39522082138387,...
20 1 13 NAGALAND 3.0 MOKOKCHUNG 28 Koridang 1 NAGALAND 1301 Pre delimitation 0.752880 0.016517 POLYGON ((94.4926679209878 26.34862624470549, ...
21 1 13 NAGALAND 2.0 TUENSANG 52 Longkhim Chare 1 NAGALAND 1301 Pre delimitation 0.784452 0.027747 POLYGON ((94.65750690711815 26.34742268291052,...
22 1 13 NAGALAND 2.0 TUENSANG 54 Tuensang Sadar-II 1 NAGALAND 1301 Pre delimitation 1.078362 0.043857 POLYGON ((94.91999992425718 26.34637312911667,...
23 1 13 NAGALAND 3.0 MOKOKCHUNG 25 Monguya 1 NAGALAND 1301 Pre delimitation 0.608629 0.008306 POLYGON ((94.62470017936982 26.32930194712719,...
24 1 13 NAGALAND 3.0 MOKOKCHUNG 27 Mokokchung Town 1 NAGALAND 1301 Pre delimitation 0.131384 0.001076 POLYGON ((94.54252940101111 26.33431455507269,...
25 1 13 NAGALAND 3.0 MOKOKCHUNG 26 Aonglenden 1 NAGALAND 1301 Pre delimitation 0.314304 0.003474 POLYGON ((94.52133995267673 26.30919024633283,...
26 1 13 NAGALAND 2.0 TUENSANG 53 Tuensang Sadar-I 1 NAGALAND 1301 Pre delimitation 0.288725 0.004838 POLYGON ((94.79527878988432 26.30946500720438,...
27 1 13 NAGALAND 5.0 WOKHA 39 Sanis 1 NAGALAND 1301 Pre delimitation 1.148655 0.030105 POLYGON ((94.34819933373234 26.25056504923771,...
28 1 13 NAGALAND 4.0 ZUNHEBOTO 31 Akuluto 1 NAGALAND 1301 Pre delimitation 0.620547 0.014271 POLYGON ((94.48322360598468 26.29239831283195,...
29 1 13 NAGALAND 4.0 ZUNHEBOTO 33 Suruhuto 1 NAGALAND 1301 Pre delimitation 0.568226 0.013466 POLYGON ((94.5081128643173 26.21370854751149, ...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4152 78 9 UTTAR PRADESH 45.0 ALLAHABAD 258 Handia 78 BHADOHI 978 None 1.221541 0.026459 POLYGON ((82.33191633527071 25.44923534473543,...
4153 52 9 UTTAR PRADESH 45.0 ALLAHABAD 263 Allahabad South 52 ALLAHABAD 952 None 0.308595 0.002585 POLYGON ((81.87433344513244 25.45406652075337,...
4154 76 9 UTTAR PRADESH 67.0 VARANASI 386 Shivpur 76 CHANDAULI 976 None 1.126150 0.024123 POLYGON ((83.12710597246274 25.44357179220452,...
4155 78 9 UTTAR PRADESH 68.0 SANT RAVIDAS NAGAR * 393 Gyanpur 78 BHADOHI 978 None 1.422333 0.030785 POLYGON ((82.38944645219209 25.44594297618744,...
4156 52 9 UTTAR PRADESH 45.0 ALLAHABAD 260 Karachhana 52 ALLAHABAD 952 None 1.157874 0.032931 POLYGON ((81.82137715063504 25.41293949925853,...
4157 48 9 UTTAR PRADESH 40.0 BANDA 234 Naraini (SC) 48 BANDA 948 None 3.475376 0.107559 POLYGON ((80.74833097129732 25.40767615672928,...
4158 77 9 UTTAR PRADESH 67.0 VARANASI 391 Sevapuri 77 VARANASI 977 None 1.211015 0.024156 POLYGON ((82.82223038402503 25.38401144946886,...
4159 48 9 UTTAR PRADESH 41.0 CHITRAKOOT * 237 Manikpur 48 BANDA 948 None 3.417139 0.175099 POLYGON ((81.49776040481891 25.26790492816997,...
4160 77 9 UTTAR PRADESH 67.0 VARANASI 387 Rohaniya 77 VARANASI 977 None 0.985821 0.020446 POLYGON ((82.86353564485421 25.37710985873304,...
4161 77 9 UTTAR PRADESH 67.0 VARANASI 388 Varanasi North 77 VARANASI 977 None 0.340793 0.004158 POLYGON ((83.03745467379423 25.33332910065752,...
4162 78 9 UTTAR PRADESH 68.0 SANT RAVIDAS NAGAR * 394 Aurai (SC) 78 BHADOHI 978 None 1.053042 0.019486 POLYGON ((82.47639097046698 25.35226362756441,...
4163 52 9 UTTAR PRADESH 45.0 ALLAHABAD 264 Bara (SC) 52 ALLAHABAD 952 None 1.644742 0.084295 POLYGON ((81.74993347485002 25.34606069764311,...
4164 77 9 UTTAR PRADESH 67.0 VARANASI 389 Varanasi South 77 VARANASI 977 None 0.122368 0.000584 POLYGON ((83.03745467379423 25.33332910065752,...
4165 76 9 UTTAR PRADESH 66.0 CHANDAULI * 380 Mughalsarai 76 CHANDAULI 976 None 1.052822 0.022884 POLYGON ((83.13732327095227 25.32277610952985,...
4166 52 9 UTTAR PRADESH 45.0 ALLAHABAD 259 Meja 52 ALLAHABAD 952 None 1.208045 0.044396 POLYGON ((82.22175459799638 25.27342988168374,...
4167 77 9 UTTAR PRADESH 67.0 VARANASI 390 Varanasi Cantt. 77 VARANASI 977 None 0.336326 0.003619 POLYGON ((83.00090658107393 25.31745955051787,...
4168 79 9 UTTAR PRADESH 69.0 MIRZAPUR 397 Majhawan 79 MIRZAPUR 979 None 1.703413 0.061997 POLYGON ((82.64149998793414 25.2505412587721, ...
4169 79 9 UTTAR PRADESH 69.0 MIRZAPUR 395 Chhanbey (SC) 79 MIRZAPUR 979 None 3.353915 0.134513 POLYGON ((82.34662684725123 25.20696243626747,...
4170 46 9 UTTAR PRADESH 37.0 LALITPUR 226 Lalitpur 46 JHANSI 946 None 2.682238 0.192485 POLYGON ((78.58016882513124 25.19122700562184,...
4171 80 9 UTTAR PRADESH 66.0 CHANDAULI * 383 Chakia (SC) 80 ROBERTSGANJ (SC) 980 None 2.112128 0.119383 POLYGON ((83.31480632035544 25.2531978102308, ...
4172 79 9 UTTAR PRADESH 69.0 MIRZAPUR 396 Mirzapur 79 MIRZAPUR 979 None 1.020419 0.024429 POLYGON ((82.57645444171919 25.24636920127767,...
4173 79 9 UTTAR PRADESH 69.0 MIRZAPUR 398 Chunar 79 MIRZAPUR 979 None 1.443258 0.037048 POLYGON ((83.03014026905822 25.24342130365028,...
4174 52 9 UTTAR PRADESH 45.0 ALLAHABAD 265 Koraon (SC) 52 ALLAHABAD 952 None 1.939929 0.108972 POLYGON ((81.96441033357007 25.1558874632359, ...
4175 79 9 UTTAR PRADESH 69.0 MIRZAPUR 399 Marihan 79 MIRZAPUR 979 None 2.946889 0.145359 POLYGON ((83.13272370903201 25.12748348347606,...
4176 46 9 UTTAR PRADESH 37.0 LALITPUR 227 Mehroni (SC) 46 JHANSI 946 None 3.339006 0.256947 POLYGON ((78.77402903711044 24.85108140048629,...
4177 80 9 UTTAR PRADESH 70.0 SONBHADRA 400 Ghorawal 80 ROBERTSGANJ (SC) 980 None 2.354558 0.104874 POLYGON ((83.09301859303372 24.79781630937521,...
4178 80 9 UTTAR PRADESH 70.0 SONBHADRA 401 Robertsganj 80 ROBERTSGANJ (SC) 980 None 2.316555 0.161227 POLYGON ((83.39349552090164 24.78226446543277,...
4179 80 9 UTTAR PRADESH 70.0 SONBHADRA 402 Obra 80 ROBERTSGANJ (SC) 980 None 2.437449 0.135870 POLYGON ((83.09499108776856 24.65526460727585,...
4180 80 9 UTTAR PRADESH 70.0 SONBHADRA 403 Duddhi (SC) 80 ROBERTSGANJ (SC) 980 None 2.631747 0.202402 POLYGON ((83.27368541514983 24.36496680167659,...
4181 29 33 TAMIL NADU 20.0 THIRUVARUR 169 Nannilam 29 NAGAPATTINAM (SC) 3329 None 1.453697 0.033369 POLYGON ((79.74810744410331 10.99637622558237,...

4182 rows × 14 columns

In [8]:
# States of India
states_file = os.path.join(data_folder, 'allStateofIndia2018_07_15_05_49_44_241.csv')
states_df = pd.read_csv(states_file, delimiter=';')
states_df = states_df[['State Name(In English)', 'Census 2011 Code']]
states_df.columns = ['State Name', 'state code 2011']
states_df.loc[states_df['State Name'] == 'TELANGANA', 'state code 2011'] = 28 #Telangana Fix
states_df['State Name'] = states_df['State Name'].str.upper()
states_df
Out[8]:
State Name state code 2011
0 ANDAMAN AND NICOBAR ISLANDS 35
1 ANDHRA PRADESH 28
2 ARUNACHAL PRADESH 12
3 ASSAM 18
4 BIHAR 10
5 CHANDIGARH 4
6 CHHATTISGARH 22
7 DADRA AND NAGAR HAVELI 26
8 DAMAN AND DIU 25
9 DELHI 7
10 GOA 30
11 GUJARAT 24
12 HARYANA 6
13 HIMACHAL PRADESH 2
14 JAMMU AND KASHMIR 1
15 JHARKHAND 20
16 KARNATAKA 29
17 KERALA 32
18 LAKSHADWEEP 31
19 MADHYA PRADESH 23
20 MAHARASHTRA 27
21 MANIPUR 14
22 MEGHALAYA 17
23 MIZORAM 15
24 NAGALAND 13
25 ODISHA 21
26 PUDUCHERRY 34
27 PUNJAB 3
28 RAJASTHAN 8
29 SIKKIM 11
30 TAMIL NADU 33
31 TELANGANA 28
32 TRIPURA 16
33 UTTARAKHAND 5
34 UTTAR PRADESH 9
35 WEST BENGAL 19
In [9]:
# CSV Geocoded Villages
csv_geocoded_file = os.path.join(data_folder, 'geocoded_villages.csv')
csv_geocoded_df = pd.read_csv(csv_geocoded_file)
#csv_google_df = google_df[google_df.columns[0:16]]
csv_geocoded_df = csv_geocoded_df[['Village', 'District', 'State', 'Assembly Constituency ECI Code', 'Assembly Constituency Name', 'State Name']]
csv_geocoded_df = pd.merge(csv_geocoded_df, states_df,  how='left', left_on=['State Name'], right_on = ['State Name'])
csv_geocoded_df.columns = ['village', 'district', 'state', 'ac code', 'ac name', 'state name', 'state code']
csv_geocoded_df
#csv_geocoded_df.drop
Out[9]:
village district state ac code ac name state name state code
0 Pub Kathal Muri BARPETA ASSAM 42 Patacharkuchi ASSAM 18
1 Pub-rehabari BARPETA ASSAM 42 Patacharkuchi ASSAM 18
2 Bar Manikpur BARPETA ASSAM 42 Patacharkuchi ASSAM 18
3 Chaibari BARPETA ASSAM 42 Patacharkuchi ASSAM 18
4 Chemtia BARPETA ASSAM 42 Patacharkuchi ASSAM 18
5 Batia Mari BARPETA ASSAM 42 Patacharkuchi ASSAM 18
6 Baghmara BARPETA ASSAM 42 Patacharkuchi ASSAM 18
7 Pachim Kathalmuri(kathalguri) BARPETA ASSAM 42 Patacharkuchi ASSAM 18
8 Gergeria BARPETA ASSAM 42 Patacharkuchi ASSAM 18
9 Madhapur BARPETA ASSAM 41 Bhawanipur ASSAM 18
10 Madha Pur BARPETA ASSAM 41 Bhawanipur ASSAM 18
11 Kathalmuri Ghat BARPETA ASSAM 41 Bhawanipur ASSAM 18
12 Dakshin Rehabari BARPETA ASSAM 42 Patacharkuchi ASSAM 18
13 Hallang Bari BARPETA ASSAM 41 Bhawanipur ASSAM 18
14 Barsidhani BARPETA ASSAM 42 Patacharkuchi ASSAM 18
15 Chengli Mari BARPETA ASSAM 41 Bhawanipur ASSAM 18
16 Chungapota BONGAIGAON ASSAM 32 Bongaigaon ASSAM 18
17 Mohina NALBARI ASSAM 59 Nalbari ASSAM 18
18 Dhamdhama (dhemdhema) NALBARI ASSAM 59 Nalbari ASSAM 18
19 Dipta NALBARI ASSAM 62 Barama ASSAM 18
20 Borjhar NALBARI ASSAM 62 Barama ASSAM 18
21 Atoukhong THOUBAL MANIPUR 30 Lilong MANIPUR 14
22 Thoudam THOUBAL MANIPUR 30 Lilong MANIPUR 14
23 Laiphrakpam THOUBAL MANIPUR 30 Lilong MANIPUR 14
24 Irong Thokchom THOUBAL MANIPUR 30 Lilong MANIPUR 14
25 Nungei THOUBAL MANIPUR 30 Lilong MANIPUR 14
26 Oinam THOUBAL MANIPUR 30 Lilong MANIPUR 14
27 Leisangthem THOUBAL MANIPUR 30 Lilong MANIPUR 14
28 Khekman THOUBAL MANIPUR 30 Lilong MANIPUR 14
29 Moijing THOUBAL MANIPUR 30 Lilong MANIPUR 14
... ... ... ... ... ... ... ...
176813 Ragaboina Gudem(007 ) BHADRADRI KOTHAGUDEM(02) TELANGANA 111 Yellandu TELANGANA 28
176814 Sudimalla(005 ) BHADRADRI KOTHAGUDEM(02) TELANGANA 111 Yellandu TELANGANA 28
176815 Annasagar(011 ) KAMAREDDY(07) TELANGANA 15 Yellareddy TELANGANA 28
176816 Vellutla(003 ) KAMAREDDY(07) TELANGANA 15 Yellareddy TELANGANA 28
176817 Lingareddipet(009 ) KAMAREDDY(07) TELANGANA 15 Yellareddy TELANGANA 28
176818 Advilingal(004 ) KAMAREDDY(07) TELANGANA 15 Yellareddy TELANGANA 28
176819 Obulapur(016 ) JANGOAN(04) TELANGANA 99 Ghanpur (Station) TELANGANA 28
176820 Suraram(013 ) JANGOAN(04) TELANGANA 99 Ghanpur (Station) TELANGANA 28
176821 Thimmampet(007 ) JANGOAN(04) TELANGANA 99 Ghanpur (Station) TELANGANA 28
176822 Aliyabad(018) JANGOAN(04) TELANGANA 99 Ghanpur (Station) TELANGANA 28
176823 Zaffergadh(011) JANGOAN(04) TELANGANA 99 Ghanpur (Station) TELANGANA 28
176824 Zaffergadh(011 ) JANGOAN(04) TELANGANA 99 Ghanpur (Station) TELANGANA 28
176825 Thimmapur(017 ) JANGOAN(04) TELANGANA 99 Ghanpur (Station) TELANGANA 28
176826 Sagaram(010 ) JANGOAN(04) TELANGANA 99 Ghanpur (Station) TELANGANA 28
176827 Uppugal(001 ) JANGOAN(04) TELANGANA 99 Ghanpur (Station) TELANGANA 28
176828 Thidugu(009 ) JANGOAN(04) TELANGANA 99 Ghanpur (Station) TELANGANA 28
176829 Hothi-b(026) SANGAREDDY(23) TELANGANA 38 Zahirabad TELANGANA 28
176830 Malchelma(038 ) SANGAREDDY(23) TELANGANA 38 Zahirabad TELANGANA 28
176831 Gousabad(021 ) SANGAREDDY(23) TELANGANA 38 Zahirabad TELANGANA 28
176832 Hugelli(032 ) SANGAREDDY(23) TELANGANA 38 Zahirabad TELANGANA 28
176833 Anegunta(034 ) SANGAREDDY(23) TELANGANA 38 Zahirabad TELANGANA 28
176834 Mannapur(024 ) SANGAREDDY(23) TELANGANA 38 Zahirabad TELANGANA 28
176835 Khanjamalpur(017) SANGAREDDY(23) TELANGANA 36 Andole TELANGANA 28
176836 Auranganagar(020 ) SANGAREDDY(23) TELANGANA 38 Zahirabad TELANGANA 28
176837 Didgi(008 ) SANGAREDDY(23) TELANGANA 38 Zahirabad TELANGANA 28
176838 Asadgunj(014 ) SANGAREDDY(23) TELANGANA 38 Zahirabad TELANGANA 28
176839 Chinna Hyderabad(027) SANGAREDDY(23) TELANGANA 38 Zahirabad TELANGANA 28
176840 Madgi(001 ) SANGAREDDY(23) TELANGANA 38 Zahirabad TELANGANA 28
176841 Kothur-b(007) SANGAREDDY(23) TELANGANA 38 Zahirabad TELANGANA 28
176842 Dhanasiri(018 ) SANGAREDDY(23) TELANGANA 38 Zahirabad TELANGANA 28

176843 rows × 7 columns

In [10]:
# Google Geocoded villages
google_geocoded_file = os.path.join('google geocoding', 'output1.csv')
google_df = pd.read_csv(google_geocoded_file)
google_df_columns = list(google_df.columns)
google_df = google_df[google_df_columns[0:3] + google_df_columns[14:16]]
#Change Google Geocoded villages dataframe to geospatial and get AC data
geometry = [Point(xy) for xy in zip(google_df._longitude.apply(pd.to_numeric, errors='coerce'), google_df._latitude.apply(pd.to_numeric, errors='coerce'))]
crs = {'init': 'epsg:4326'}
google_geo_df = gpd.GeoDataFrame(google_df, crs=crs, geometry=geometry)
# Spatial Join
google_geo_df = gpd.sjoin(google_geo_df, ac_gdf, how='inner', op='within')
google_geo_df_columns = list(google_geo_df.columns)
google_geo_df = google_geo_df[google_geo_df_columns[0:3] + google_geo_df_columns[12:14] + google_geo_df_columns[8:10]]
google_geo_df.columns = ['village', 'district', 'state', 'ac code', 'ac name', 'state code', 'state name']
google_geo_df
C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\IPython\core\interactiveshell.py:2785: DtypeWarning: Columns (5,10,11,12,13,14,16,17,18,19) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\numpy\lib\function_base.py:2831: RuntimeWarning: invalid value encountered in ? (vectorized)
  outputs = ufunc(*inputs)
Out[10]:
village district state ac code ac name state code state name
0 Urwa RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
8788 Kamalpur RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
14320 Bheelampur Chhapra AZAMGARH UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
17500 Mahmadpur RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
19996 Mahmadpur RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
20001 Nigohan RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
32777 Bahrampur RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
60896 Baherawa SIDDHARTH NAGAR UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
84657 Makhadumpur Urf Munimabad RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
84658 Manihar Sharki RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
84659 Said Alipur RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
84660 Mutawallipur Rana Sahab RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
84661 Purey Nasiran RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
87583 Khodaypur RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
87586 Pakharauli RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
87598 Gaffoorpur Urf Jalalabad RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
87599 Barara Bujurg RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
87601 Surasana RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
87614 Kadha Chak Sagunpur RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
87637 Surajoopur RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
88900 Ambara Mathai RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
88901 Bansi Rihayak RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
88902 Chandpur Look Mu. RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
88903 Tarapur Bansi RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
88904 Rampur Gauri Mu. RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
88905 Charuhar Ashanandpur RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
88907 Payagpur Mu. RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
88909 Tikar Agachipur RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
88910 Daudpur Garhai RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
88912 Jharaha RAE BARELI UTTAR PRADESH 183 Unchahar 9 UTTAR PRADESH
... ... ... ... ... ... ... ...
234495 Kalamboor ERNAKULAM KERALA 94 Kaduthuruthy 32 KERALA
234507 Kalamboor West ERNAKULAM KERALA 94 Kaduthuruthy 32 KERALA
236443 Marangattupally KOTTAYAM KERALA 94 Kaduthuruthy 32 KERALA
236444 Kummannoor KOTTAYAM KERALA 94 Kaduthuruthy 32 KERALA
236445 Kozha KOTTAYAM KERALA 94 Kaduthuruthy 32 KERALA
236449 Chempilavu KOTTAYAM KERALA 94 Kaduthuruthy 32 KERALA
229722 Narath KANNUR KERALA 10 Azhikode 32 KERALA
230185 Pappinissery KANNUR KERALA 10 Azhikode 32 KERALA
230186 Puzhathi KANNUR KERALA 10 Azhikode 32 KERALA
230187 Chirakkal KANNUR KERALA 10 Azhikode 32 KERALA
230188 Pallikunnu KANNUR KERALA 10 Azhikode 32 KERALA
230189 Valapattanm KANNUR KERALA 10 Azhikode 32 KERALA
230191 Asheekode KANNUR KERALA 10 Azhikode 32 KERALA
230137 Mukkaly KOTTAYAM KERALA 54 Mannarkkad 32 KERALA
231340 Mannarkkad 1 PALAKKAD KERALA 54 Mannarkkad 32 KERALA
231341 Mannarkkad 2 PALAKKAD KERALA 54 Mannarkkad 32 KERALA
230621 Malkapet(017 ) RAJANNA SIRICILLA(21) TELANGANA 66 Charminar 28 ANDHRA PRADESH
231436 Wadeshwar PUNE MAHARASHTRA 209 Shivajinagar 27 MAHARASHTRA
231646 Dhan Ch Para KHOWAI TRIPURA 46 SURMA (SC) 16 TRIPURA
231649 Kallapally Beloor(003 ) SANGAREDDY(23) TELANGANA 119 Kundapura 29 KARNATAKA
232332 Khali Mahuvar UDHAM SINGH NAGAR UTTARAKHAND 174 Jalalpore 24 GUJARAT
234023 Kumharpara KONDAGAON CHHATTISGARH 50 Raipur City North 22 CHHATTISGARH
234537 Chokli KANNUR KERALA 13 Thalassery 32 KERALA
234542 Kathirur KANNUR KERALA 13 Thalassery 32 KERALA
236138 New Mahi KANNUR KERALA 13 Thalassery 32 KERALA
236140 Eranholi KANNUR KERALA 13 Thalassery 32 KERALA
236146 Peringadi KANNUR KERALA 13 Thalassery 32 KERALA
234540 Panniyannur KANNUR KERALA 29 Mahe 34 PUDUCHERRY
235385 Dwarika KANGPOKPI MANIPUR 82 Dwarka 24 GUJARAT
236481 Govindapuram KOTTAYAM KERALA 28 Kozhikode South 32 KERALA

235924 rows × 7 columns

In [11]:
#Compile all the geocodes
geocoded_df = pd.concat([csv_geocoded_df, google_geo_df])
geocoded_df = geocoded_df[['village', 'district', 'state', 'ac code', 'ac name', 'state code', 'state name']]
geocoded_df
C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\ipykernel_launcher.py:2: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  
Out[11]:
village district state ac code ac name state code state name
0 Pub Kathal Muri BARPETA ASSAM 42 Patacharkuchi 18 ASSAM
1 Pub-rehabari BARPETA ASSAM 42 Patacharkuchi 18 ASSAM
2 Bar Manikpur BARPETA ASSAM 42 Patacharkuchi 18 ASSAM
3 Chaibari BARPETA ASSAM 42 Patacharkuchi 18 ASSAM
4 Chemtia BARPETA ASSAM 42 Patacharkuchi 18 ASSAM
5 Batia Mari BARPETA ASSAM 42 Patacharkuchi 18 ASSAM
6 Baghmara BARPETA ASSAM 42 Patacharkuchi 18 ASSAM
7 Pachim Kathalmuri(kathalguri) BARPETA ASSAM 42 Patacharkuchi 18 ASSAM
8 Gergeria BARPETA ASSAM 42 Patacharkuchi 18 ASSAM
9 Madhapur BARPETA ASSAM 41 Bhawanipur 18 ASSAM
10 Madha Pur BARPETA ASSAM 41 Bhawanipur 18 ASSAM
11 Kathalmuri Ghat BARPETA ASSAM 41 Bhawanipur 18 ASSAM
12 Dakshin Rehabari BARPETA ASSAM 42 Patacharkuchi 18 ASSAM
13 Hallang Bari BARPETA ASSAM 41 Bhawanipur 18 ASSAM
14 Barsidhani BARPETA ASSAM 42 Patacharkuchi 18 ASSAM
15 Chengli Mari BARPETA ASSAM 41 Bhawanipur 18 ASSAM
16 Chungapota BONGAIGAON ASSAM 32 Bongaigaon 18 ASSAM
17 Mohina NALBARI ASSAM 59 Nalbari 18 ASSAM
18 Dhamdhama (dhemdhema) NALBARI ASSAM 59 Nalbari 18 ASSAM
19 Dipta NALBARI ASSAM 62 Barama 18 ASSAM
20 Borjhar NALBARI ASSAM 62 Barama 18 ASSAM
21 Atoukhong THOUBAL MANIPUR 30 Lilong 14 MANIPUR
22 Thoudam THOUBAL MANIPUR 30 Lilong 14 MANIPUR
23 Laiphrakpam THOUBAL MANIPUR 30 Lilong 14 MANIPUR
24 Irong Thokchom THOUBAL MANIPUR 30 Lilong 14 MANIPUR
25 Nungei THOUBAL MANIPUR 30 Lilong 14 MANIPUR
26 Oinam THOUBAL MANIPUR 30 Lilong 14 MANIPUR
27 Leisangthem THOUBAL MANIPUR 30 Lilong 14 MANIPUR
28 Khekman THOUBAL MANIPUR 30 Lilong 14 MANIPUR
29 Moijing THOUBAL MANIPUR 30 Lilong 14 MANIPUR
... ... ... ... ... ... ... ...
234495 Kalamboor ERNAKULAM KERALA 94 Kaduthuruthy 32 KERALA
234507 Kalamboor West ERNAKULAM KERALA 94 Kaduthuruthy 32 KERALA
236443 Marangattupally KOTTAYAM KERALA 94 Kaduthuruthy 32 KERALA
236444 Kummannoor KOTTAYAM KERALA 94 Kaduthuruthy 32 KERALA
236445 Kozha KOTTAYAM KERALA 94 Kaduthuruthy 32 KERALA
236449 Chempilavu KOTTAYAM KERALA 94 Kaduthuruthy 32 KERALA
229722 Narath KANNUR KERALA 10 Azhikode 32 KERALA
230185 Pappinissery KANNUR KERALA 10 Azhikode 32 KERALA
230186 Puzhathi KANNUR KERALA 10 Azhikode 32 KERALA
230187 Chirakkal KANNUR KERALA 10 Azhikode 32 KERALA
230188 Pallikunnu KANNUR KERALA 10 Azhikode 32 KERALA
230189 Valapattanm KANNUR KERALA 10 Azhikode 32 KERALA
230191 Asheekode KANNUR KERALA 10 Azhikode 32 KERALA
230137 Mukkaly KOTTAYAM KERALA 54 Mannarkkad 32 KERALA
231340 Mannarkkad 1 PALAKKAD KERALA 54 Mannarkkad 32 KERALA
231341 Mannarkkad 2 PALAKKAD KERALA 54 Mannarkkad 32 KERALA
230621 Malkapet(017 ) RAJANNA SIRICILLA(21) TELANGANA 66 Charminar 28 ANDHRA PRADESH
231436 Wadeshwar PUNE MAHARASHTRA 209 Shivajinagar 27 MAHARASHTRA
231646 Dhan Ch Para KHOWAI TRIPURA 46 SURMA (SC) 16 TRIPURA
231649 Kallapally Beloor(003 ) SANGAREDDY(23) TELANGANA 119 Kundapura 29 KARNATAKA
232332 Khali Mahuvar UDHAM SINGH NAGAR UTTARAKHAND 174 Jalalpore 24 GUJARAT
234023 Kumharpara KONDAGAON CHHATTISGARH 50 Raipur City North 22 CHHATTISGARH
234537 Chokli KANNUR KERALA 13 Thalassery 32 KERALA
234542 Kathirur KANNUR KERALA 13 Thalassery 32 KERALA
236138 New Mahi KANNUR KERALA 13 Thalassery 32 KERALA
236140 Eranholi KANNUR KERALA 13 Thalassery 32 KERALA
236146 Peringadi KANNUR KERALA 13 Thalassery 32 KERALA
234540 Panniyannur KANNUR KERALA 29 Mahe 34 PUDUCHERRY
235385 Dwarika KANGPOKPI MANIPUR 82 Dwarka 24 GUJARAT
236481 Govindapuram KOTTAYAM KERALA 28 Kozhikode South 32 KERALA

412767 rows × 7 columns

In [13]:
# Read Scrapped Data
scrapped_folder = os.path.join(data_folder, "Scrapped")
scrapped_files = os.listdir(scrapped_folder)
print("data found for these years:")
i = 1
for file in scrapped_files:
    print(i,") ",file)
    file_path = os.path.join(scrapped_folder, file)
    df = pd.read_csv(file_path, low_memory=False)
    df = df[df['Village'] != '-999']
    df = pd.merge(df, geocoded_df, left_on=['Village', 'District', 'State'], right_on = ['village', 'district', 'state'])
    outfile = os.path.join(output_folder, 'geocoded_' + file)
    df.to_csv(outfile, encoding='utf-8', index=False)
    i+=1
data found for these years:
1 )  2009-2010_Scrapped_Cleaned_Data.csv
2 )  2010-2011_Scrapped_Cleaned_Data.csv
3 )  2011-2012_Scrapped_Cleaned_Data.csv
4 )  2012-2013_Scrapped_Cleaned_Data.csv
5 )  2013-2014_Scrapped_Cleaned_Data.csv
6 )  2014-2015_Scrapped_Cleaned_Data.csv
7 )  2015-2016_Scrapped_Cleaned_Data.csv
8 )  2016-2017_Scrapped_Cleaned_Data.csv
9 )  2017-2018_Scrapped_Cleaned_Data.csv
In [ ]:
 
In [ ]:
 
In [ ]: