# Import Libraries
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
import lxml
import os
import glob
import time
import datetime
import json
import itertools
from fuzzywuzzy import process
from fuzzywuzzy import fuzz
# Set Output Folder
output_folder = os.path.abspath("output")
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# Import data folders
data_folder = os.path.abspath("data")
# Helping Functions
def remove_consecutive_duplicates(x):
try:
return ''.join(i for i, _ in itertools.groupby(x))
except:
return ''
# Read Constituency Data
ac_gdf = None
ac_filepath = os.path.join(data_folder, "AC", "India_AC.shp")
ac_gdf = gpd.read_file(ac_filepath)
ac_gdf
OBJECTID | ST_CODE | ST_NAME | DT_CODE | DIST_NAME | AC_NO | AC_NAME | PC_NO | PC_NAME | PC_ID | STATUS | Shape_Leng | Shape_Area | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 13 | NAGALAND | 1.0 | MON | 41 | Tizit | 1 | NAGALAND | 1301 | Pre delimitation | 1.381854 | 0.055845 | POLYGON ((94.9457548574008 26.93518316946773, ... |
1 | 1 | 13 | NAGALAND | 1.0 | MON | 43 | Tapi | 1 | NAGALAND | 1301 | Pre delimitation | 1.056157 | 0.030387 | POLYGON ((95.22324386371338 26.75963707718944,... |
2 | 1 | 13 | NAGALAND | 1.0 | MON | 42 | Wakching | 1 | NAGALAND | 1301 | Pre delimitation | 0.980303 | 0.018828 | POLYGON ((94.86775129009004 26.82831138606167,... |
3 | 1 | 13 | NAGALAND | 2.0 | TUENSANG | 49 | Tamlu | 1 | NAGALAND | 1301 | Pre delimitation | 1.133296 | 0.021899 | POLYGON ((94.73862639329349 26.76868496204139,... |
4 | 1 | 13 | NAGALAND | 3.0 | MOKOKCHUNG | 21 | Tuli | 1 | NAGALAND | 1301 | Pre delimitation | 0.965989 | 0.022397 | POLYGON ((94.73862639329349 26.76868496204139,... |
5 | 1 | 13 | NAGALAND | 1.0 | MON | 44 | Phomching | 1 | NAGALAND | 1301 | Pre delimitation | 0.356973 | 0.007281 | POLYGON ((95.20991815389976 26.72866549251853,... |
6 | 1 | 13 | NAGALAND | 1.0 | MON | 46 | Mon Town | 1 | NAGALAND | 1301 | Pre delimitation | 0.492124 | 0.011957 | POLYGON ((94.96949610236328 26.72373432897615,... |
7 | 1 | 13 | NAGALAND | 1.0 | MON | 47 | Aboi | 1 | NAGALAND | 1301 | Pre delimitation | 0.664703 | 0.016869 | POLYGON ((94.99003335425596 26.62768812660579,... |
8 | 1 | 13 | NAGALAND | 3.0 | MOKOKCHUNG | 30 | Alungtaki | 1 | NAGALAND | 1301 | Pre delimitation | 1.111287 | 0.031180 | POLYGON ((94.52707905005815 26.69958256688199,... |
9 | 1 | 13 | NAGALAND | 1.0 | MON | 45 | Tehok | 1 | NAGALAND | 1301 | Pre delimitation | 0.580774 | 0.011101 | POLYGON ((95.12053707632509 26.62556110945769,... |
10 | 1 | 13 | NAGALAND | 2.0 | TUENSANG | 50 | Longleng | 1 | NAGALAND | 1301 | Pre delimitation | 0.725516 | 0.022976 | POLYGON ((94.73277705341832 26.59490452672259,... |
11 | 1 | 13 | NAGALAND | 3.0 | MOKOKCHUNG | 22 | Arkakong | 1 | NAGALAND | 1301 | Pre delimitation | 0.576670 | 0.016012 | POLYGON ((94.73272983001772 26.59489452536212,... |
12 | 1 | 13 | NAGALAND | 3.0 | MOKOKCHUNG | 29 | Jangpetkong | 1 | NAGALAND | 1301 | Pre delimitation | 0.790225 | 0.019501 | POLYGON ((94.59166983429549 26.54138626746231,... |
13 | 1 | 13 | NAGALAND | 1.0 | MON | 48 | Moka | 1 | NAGALAND | 1301 | Pre delimitation | 0.613600 | 0.013579 | POLYGON ((95.11353174969054 26.53153183372717,... |
14 | 1 | 13 | NAGALAND | 5.0 | WOKHA | 40 | Bhandari | 1 | NAGALAND | 1301 | Pre delimitation | 2.105271 | 0.078391 | POLYGON ((94.22785725650726 26.50144543915576,... |
15 | 1 | 13 | NAGALAND | 2.0 | TUENSANG | 51 | Noksen | 1 | NAGALAND | 1301 | Pre delimitation | 0.883926 | 0.020589 | POLYGON ((94.68651101617053 26.5247634442776, ... |
16 | 1 | 13 | NAGALAND | 2.0 | TUENSANG | 55 | Tobu | 1 | NAGALAND | 1301 | Pre delimitation | 0.675198 | 0.024041 | POLYGON ((95.02478163826203 26.4871035152151, ... |
17 | 1 | 13 | NAGALAND | 3.0 | MOKOKCHUNG | 24 | Angetyongpang | 1 | NAGALAND | 1301 | Pre delimitation | 0.680903 | 0.013847 | POLYGON ((94.69043964607289 26.4755964063491, ... |
18 | 1 | 13 | NAGALAND | 3.0 | MOKOKCHUNG | 23 | Impur | 1 | NAGALAND | 1301 | Pre delimitation | 0.449900 | 0.013073 | POLYGON ((94.60467622003159 26.4561825093154, ... |
19 | 1 | 13 | NAGALAND | 2.0 | TUENSANG | 56 | Noklak | 1 | NAGALAND | 1301 | Pre delimitation | 0.876690 | 0.032109 | POLYGON ((95.10811841912982 26.39522082138387,... |
20 | 1 | 13 | NAGALAND | 3.0 | MOKOKCHUNG | 28 | Koridang | 1 | NAGALAND | 1301 | Pre delimitation | 0.752880 | 0.016517 | POLYGON ((94.4926679209878 26.34862624470549, ... |
21 | 1 | 13 | NAGALAND | 2.0 | TUENSANG | 52 | Longkhim Chare | 1 | NAGALAND | 1301 | Pre delimitation | 0.784452 | 0.027747 | POLYGON ((94.65750690711815 26.34742268291052,... |
22 | 1 | 13 | NAGALAND | 2.0 | TUENSANG | 54 | Tuensang Sadar-II | 1 | NAGALAND | 1301 | Pre delimitation | 1.078362 | 0.043857 | POLYGON ((94.91999992425718 26.34637312911667,... |
23 | 1 | 13 | NAGALAND | 3.0 | MOKOKCHUNG | 25 | Monguya | 1 | NAGALAND | 1301 | Pre delimitation | 0.608629 | 0.008306 | POLYGON ((94.62470017936982 26.32930194712719,... |
24 | 1 | 13 | NAGALAND | 3.0 | MOKOKCHUNG | 27 | Mokokchung Town | 1 | NAGALAND | 1301 | Pre delimitation | 0.131384 | 0.001076 | POLYGON ((94.54252940101111 26.33431455507269,... |
25 | 1 | 13 | NAGALAND | 3.0 | MOKOKCHUNG | 26 | Aonglenden | 1 | NAGALAND | 1301 | Pre delimitation | 0.314304 | 0.003474 | POLYGON ((94.52133995267673 26.30919024633283,... |
26 | 1 | 13 | NAGALAND | 2.0 | TUENSANG | 53 | Tuensang Sadar-I | 1 | NAGALAND | 1301 | Pre delimitation | 0.288725 | 0.004838 | POLYGON ((94.79527878988432 26.30946500720438,... |
27 | 1 | 13 | NAGALAND | 5.0 | WOKHA | 39 | Sanis | 1 | NAGALAND | 1301 | Pre delimitation | 1.148655 | 0.030105 | POLYGON ((94.34819933373234 26.25056504923771,... |
28 | 1 | 13 | NAGALAND | 4.0 | ZUNHEBOTO | 31 | Akuluto | 1 | NAGALAND | 1301 | Pre delimitation | 0.620547 | 0.014271 | POLYGON ((94.48322360598468 26.29239831283195,... |
29 | 1 | 13 | NAGALAND | 4.0 | ZUNHEBOTO | 33 | Suruhuto | 1 | NAGALAND | 1301 | Pre delimitation | 0.568226 | 0.013466 | POLYGON ((94.5081128643173 26.21370854751149, ... |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4152 | 78 | 9 | UTTAR PRADESH | 45.0 | ALLAHABAD | 258 | Handia | 78 | BHADOHI | 978 | None | 1.221541 | 0.026459 | POLYGON ((82.33191633527071 25.44923534473543,... |
4153 | 52 | 9 | UTTAR PRADESH | 45.0 | ALLAHABAD | 263 | Allahabad South | 52 | ALLAHABAD | 952 | None | 0.308595 | 0.002585 | POLYGON ((81.87433344513244 25.45406652075337,... |
4154 | 76 | 9 | UTTAR PRADESH | 67.0 | VARANASI | 386 | Shivpur | 76 | CHANDAULI | 976 | None | 1.126150 | 0.024123 | POLYGON ((83.12710597246274 25.44357179220452,... |
4155 | 78 | 9 | UTTAR PRADESH | 68.0 | SANT RAVIDAS NAGAR * | 393 | Gyanpur | 78 | BHADOHI | 978 | None | 1.422333 | 0.030785 | POLYGON ((82.38944645219209 25.44594297618744,... |
4156 | 52 | 9 | UTTAR PRADESH | 45.0 | ALLAHABAD | 260 | Karachhana | 52 | ALLAHABAD | 952 | None | 1.157874 | 0.032931 | POLYGON ((81.82137715063504 25.41293949925853,... |
4157 | 48 | 9 | UTTAR PRADESH | 40.0 | BANDA | 234 | Naraini (SC) | 48 | BANDA | 948 | None | 3.475376 | 0.107559 | POLYGON ((80.74833097129732 25.40767615672928,... |
4158 | 77 | 9 | UTTAR PRADESH | 67.0 | VARANASI | 391 | Sevapuri | 77 | VARANASI | 977 | None | 1.211015 | 0.024156 | POLYGON ((82.82223038402503 25.38401144946886,... |
4159 | 48 | 9 | UTTAR PRADESH | 41.0 | CHITRAKOOT * | 237 | Manikpur | 48 | BANDA | 948 | None | 3.417139 | 0.175099 | POLYGON ((81.49776040481891 25.26790492816997,... |
4160 | 77 | 9 | UTTAR PRADESH | 67.0 | VARANASI | 387 | Rohaniya | 77 | VARANASI | 977 | None | 0.985821 | 0.020446 | POLYGON ((82.86353564485421 25.37710985873304,... |
4161 | 77 | 9 | UTTAR PRADESH | 67.0 | VARANASI | 388 | Varanasi North | 77 | VARANASI | 977 | None | 0.340793 | 0.004158 | POLYGON ((83.03745467379423 25.33332910065752,... |
4162 | 78 | 9 | UTTAR PRADESH | 68.0 | SANT RAVIDAS NAGAR * | 394 | Aurai (SC) | 78 | BHADOHI | 978 | None | 1.053042 | 0.019486 | POLYGON ((82.47639097046698 25.35226362756441,... |
4163 | 52 | 9 | UTTAR PRADESH | 45.0 | ALLAHABAD | 264 | Bara (SC) | 52 | ALLAHABAD | 952 | None | 1.644742 | 0.084295 | POLYGON ((81.74993347485002 25.34606069764311,... |
4164 | 77 | 9 | UTTAR PRADESH | 67.0 | VARANASI | 389 | Varanasi South | 77 | VARANASI | 977 | None | 0.122368 | 0.000584 | POLYGON ((83.03745467379423 25.33332910065752,... |
4165 | 76 | 9 | UTTAR PRADESH | 66.0 | CHANDAULI * | 380 | Mughalsarai | 76 | CHANDAULI | 976 | None | 1.052822 | 0.022884 | POLYGON ((83.13732327095227 25.32277610952985,... |
4166 | 52 | 9 | UTTAR PRADESH | 45.0 | ALLAHABAD | 259 | Meja | 52 | ALLAHABAD | 952 | None | 1.208045 | 0.044396 | POLYGON ((82.22175459799638 25.27342988168374,... |
4167 | 77 | 9 | UTTAR PRADESH | 67.0 | VARANASI | 390 | Varanasi Cantt. | 77 | VARANASI | 977 | None | 0.336326 | 0.003619 | POLYGON ((83.00090658107393 25.31745955051787,... |
4168 | 79 | 9 | UTTAR PRADESH | 69.0 | MIRZAPUR | 397 | Majhawan | 79 | MIRZAPUR | 979 | None | 1.703413 | 0.061997 | POLYGON ((82.64149998793414 25.2505412587721, ... |
4169 | 79 | 9 | UTTAR PRADESH | 69.0 | MIRZAPUR | 395 | Chhanbey (SC) | 79 | MIRZAPUR | 979 | None | 3.353915 | 0.134513 | POLYGON ((82.34662684725123 25.20696243626747,... |
4170 | 46 | 9 | UTTAR PRADESH | 37.0 | LALITPUR | 226 | Lalitpur | 46 | JHANSI | 946 | None | 2.682238 | 0.192485 | POLYGON ((78.58016882513124 25.19122700562184,... |
4171 | 80 | 9 | UTTAR PRADESH | 66.0 | CHANDAULI * | 383 | Chakia (SC) | 80 | ROBERTSGANJ (SC) | 980 | None | 2.112128 | 0.119383 | POLYGON ((83.31480632035544 25.2531978102308, ... |
4172 | 79 | 9 | UTTAR PRADESH | 69.0 | MIRZAPUR | 396 | Mirzapur | 79 | MIRZAPUR | 979 | None | 1.020419 | 0.024429 | POLYGON ((82.57645444171919 25.24636920127767,... |
4173 | 79 | 9 | UTTAR PRADESH | 69.0 | MIRZAPUR | 398 | Chunar | 79 | MIRZAPUR | 979 | None | 1.443258 | 0.037048 | POLYGON ((83.03014026905822 25.24342130365028,... |
4174 | 52 | 9 | UTTAR PRADESH | 45.0 | ALLAHABAD | 265 | Koraon (SC) | 52 | ALLAHABAD | 952 | None | 1.939929 | 0.108972 | POLYGON ((81.96441033357007 25.1558874632359, ... |
4175 | 79 | 9 | UTTAR PRADESH | 69.0 | MIRZAPUR | 399 | Marihan | 79 | MIRZAPUR | 979 | None | 2.946889 | 0.145359 | POLYGON ((83.13272370903201 25.12748348347606,... |
4176 | 46 | 9 | UTTAR PRADESH | 37.0 | LALITPUR | 227 | Mehroni (SC) | 46 | JHANSI | 946 | None | 3.339006 | 0.256947 | POLYGON ((78.77402903711044 24.85108140048629,... |
4177 | 80 | 9 | UTTAR PRADESH | 70.0 | SONBHADRA | 400 | Ghorawal | 80 | ROBERTSGANJ (SC) | 980 | None | 2.354558 | 0.104874 | POLYGON ((83.09301859303372 24.79781630937521,... |
4178 | 80 | 9 | UTTAR PRADESH | 70.0 | SONBHADRA | 401 | Robertsganj | 80 | ROBERTSGANJ (SC) | 980 | None | 2.316555 | 0.161227 | POLYGON ((83.39349552090164 24.78226446543277,... |
4179 | 80 | 9 | UTTAR PRADESH | 70.0 | SONBHADRA | 402 | Obra | 80 | ROBERTSGANJ (SC) | 980 | None | 2.437449 | 0.135870 | POLYGON ((83.09499108776856 24.65526460727585,... |
4180 | 80 | 9 | UTTAR PRADESH | 70.0 | SONBHADRA | 403 | Duddhi (SC) | 80 | ROBERTSGANJ (SC) | 980 | None | 2.631747 | 0.202402 | POLYGON ((83.27368541514983 24.36496680167659,... |
4181 | 29 | 33 | TAMIL NADU | 20.0 | THIRUVARUR | 169 | Nannilam | 29 | NAGAPATTINAM (SC) | 3329 | None | 1.453697 | 0.033369 | POLYGON ((79.74810744410331 10.99637622558237,... |
4182 rows × 14 columns
# States of India
states_file = os.path.join(data_folder, 'allStateofIndia2018_07_15_05_49_44_241.csv')
states_df = pd.read_csv(states_file, delimiter=';')
states_df = states_df[['State Name(In English)', 'Census 2011 Code']]
states_df.columns = ['State Name', 'state code 2011']
states_df.loc[states_df['State Name'] == 'TELANGANA', 'state code 2011'] = 28 #Telangana Fix
states_df['State Name'] = states_df['State Name'].str.upper()
states_df
State Name | state code 2011 | |
---|---|---|
0 | ANDAMAN AND NICOBAR ISLANDS | 35 |
1 | ANDHRA PRADESH | 28 |
2 | ARUNACHAL PRADESH | 12 |
3 | ASSAM | 18 |
4 | BIHAR | 10 |
5 | CHANDIGARH | 4 |
6 | CHHATTISGARH | 22 |
7 | DADRA AND NAGAR HAVELI | 26 |
8 | DAMAN AND DIU | 25 |
9 | DELHI | 7 |
10 | GOA | 30 |
11 | GUJARAT | 24 |
12 | HARYANA | 6 |
13 | HIMACHAL PRADESH | 2 |
14 | JAMMU AND KASHMIR | 1 |
15 | JHARKHAND | 20 |
16 | KARNATAKA | 29 |
17 | KERALA | 32 |
18 | LAKSHADWEEP | 31 |
19 | MADHYA PRADESH | 23 |
20 | MAHARASHTRA | 27 |
21 | MANIPUR | 14 |
22 | MEGHALAYA | 17 |
23 | MIZORAM | 15 |
24 | NAGALAND | 13 |
25 | ODISHA | 21 |
26 | PUDUCHERRY | 34 |
27 | PUNJAB | 3 |
28 | RAJASTHAN | 8 |
29 | SIKKIM | 11 |
30 | TAMIL NADU | 33 |
31 | TELANGANA | 28 |
32 | TRIPURA | 16 |
33 | UTTARAKHAND | 5 |
34 | UTTAR PRADESH | 9 |
35 | WEST BENGAL | 19 |
# CSV Geocoded Villages
csv_geocoded_file = os.path.join(data_folder, 'geocoded_villages.csv')
csv_geocoded_df = pd.read_csv(csv_geocoded_file)
#csv_google_df = google_df[google_df.columns[0:16]]
csv_geocoded_df = csv_geocoded_df[['Village', 'District', 'State', 'Assembly Constituency ECI Code', 'Assembly Constituency Name', 'State Name']]
csv_geocoded_df = pd.merge(csv_geocoded_df, states_df, how='left', left_on=['State Name'], right_on = ['State Name'])
csv_geocoded_df.columns = ['village', 'district', 'state', 'ac code', 'ac name', 'state name', 'state code']
csv_geocoded_df
#csv_geocoded_df.drop
village | district | state | ac code | ac name | state name | state code | |
---|---|---|---|---|---|---|---|
0 | Pub Kathal Muri | BARPETA | ASSAM | 42 | Patacharkuchi | ASSAM | 18 |
1 | Pub-rehabari | BARPETA | ASSAM | 42 | Patacharkuchi | ASSAM | 18 |
2 | Bar Manikpur | BARPETA | ASSAM | 42 | Patacharkuchi | ASSAM | 18 |
3 | Chaibari | BARPETA | ASSAM | 42 | Patacharkuchi | ASSAM | 18 |
4 | Chemtia | BARPETA | ASSAM | 42 | Patacharkuchi | ASSAM | 18 |
5 | Batia Mari | BARPETA | ASSAM | 42 | Patacharkuchi | ASSAM | 18 |
6 | Baghmara | BARPETA | ASSAM | 42 | Patacharkuchi | ASSAM | 18 |
7 | Pachim Kathalmuri(kathalguri) | BARPETA | ASSAM | 42 | Patacharkuchi | ASSAM | 18 |
8 | Gergeria | BARPETA | ASSAM | 42 | Patacharkuchi | ASSAM | 18 |
9 | Madhapur | BARPETA | ASSAM | 41 | Bhawanipur | ASSAM | 18 |
10 | Madha Pur | BARPETA | ASSAM | 41 | Bhawanipur | ASSAM | 18 |
11 | Kathalmuri Ghat | BARPETA | ASSAM | 41 | Bhawanipur | ASSAM | 18 |
12 | Dakshin Rehabari | BARPETA | ASSAM | 42 | Patacharkuchi | ASSAM | 18 |
13 | Hallang Bari | BARPETA | ASSAM | 41 | Bhawanipur | ASSAM | 18 |
14 | Barsidhani | BARPETA | ASSAM | 42 | Patacharkuchi | ASSAM | 18 |
15 | Chengli Mari | BARPETA | ASSAM | 41 | Bhawanipur | ASSAM | 18 |
16 | Chungapota | BONGAIGAON | ASSAM | 32 | Bongaigaon | ASSAM | 18 |
17 | Mohina | NALBARI | ASSAM | 59 | Nalbari | ASSAM | 18 |
18 | Dhamdhama (dhemdhema) | NALBARI | ASSAM | 59 | Nalbari | ASSAM | 18 |
19 | Dipta | NALBARI | ASSAM | 62 | Barama | ASSAM | 18 |
20 | Borjhar | NALBARI | ASSAM | 62 | Barama | ASSAM | 18 |
21 | Atoukhong | THOUBAL | MANIPUR | 30 | Lilong | MANIPUR | 14 |
22 | Thoudam | THOUBAL | MANIPUR | 30 | Lilong | MANIPUR | 14 |
23 | Laiphrakpam | THOUBAL | MANIPUR | 30 | Lilong | MANIPUR | 14 |
24 | Irong Thokchom | THOUBAL | MANIPUR | 30 | Lilong | MANIPUR | 14 |
25 | Nungei | THOUBAL | MANIPUR | 30 | Lilong | MANIPUR | 14 |
26 | Oinam | THOUBAL | MANIPUR | 30 | Lilong | MANIPUR | 14 |
27 | Leisangthem | THOUBAL | MANIPUR | 30 | Lilong | MANIPUR | 14 |
28 | Khekman | THOUBAL | MANIPUR | 30 | Lilong | MANIPUR | 14 |
29 | Moijing | THOUBAL | MANIPUR | 30 | Lilong | MANIPUR | 14 |
... | ... | ... | ... | ... | ... | ... | ... |
176813 | Ragaboina Gudem(007 ) | BHADRADRI KOTHAGUDEM(02) | TELANGANA | 111 | Yellandu | TELANGANA | 28 |
176814 | Sudimalla(005 ) | BHADRADRI KOTHAGUDEM(02) | TELANGANA | 111 | Yellandu | TELANGANA | 28 |
176815 | Annasagar(011 ) | KAMAREDDY(07) | TELANGANA | 15 | Yellareddy | TELANGANA | 28 |
176816 | Vellutla(003 ) | KAMAREDDY(07) | TELANGANA | 15 | Yellareddy | TELANGANA | 28 |
176817 | Lingareddipet(009 ) | KAMAREDDY(07) | TELANGANA | 15 | Yellareddy | TELANGANA | 28 |
176818 | Advilingal(004 ) | KAMAREDDY(07) | TELANGANA | 15 | Yellareddy | TELANGANA | 28 |
176819 | Obulapur(016 ) | JANGOAN(04) | TELANGANA | 99 | Ghanpur (Station) | TELANGANA | 28 |
176820 | Suraram(013 ) | JANGOAN(04) | TELANGANA | 99 | Ghanpur (Station) | TELANGANA | 28 |
176821 | Thimmampet(007 ) | JANGOAN(04) | TELANGANA | 99 | Ghanpur (Station) | TELANGANA | 28 |
176822 | Aliyabad(018) | JANGOAN(04) | TELANGANA | 99 | Ghanpur (Station) | TELANGANA | 28 |
176823 | Zaffergadh(011) | JANGOAN(04) | TELANGANA | 99 | Ghanpur (Station) | TELANGANA | 28 |
176824 | Zaffergadh(011 ) | JANGOAN(04) | TELANGANA | 99 | Ghanpur (Station) | TELANGANA | 28 |
176825 | Thimmapur(017 ) | JANGOAN(04) | TELANGANA | 99 | Ghanpur (Station) | TELANGANA | 28 |
176826 | Sagaram(010 ) | JANGOAN(04) | TELANGANA | 99 | Ghanpur (Station) | TELANGANA | 28 |
176827 | Uppugal(001 ) | JANGOAN(04) | TELANGANA | 99 | Ghanpur (Station) | TELANGANA | 28 |
176828 | Thidugu(009 ) | JANGOAN(04) | TELANGANA | 99 | Ghanpur (Station) | TELANGANA | 28 |
176829 | Hothi-b(026) | SANGAREDDY(23) | TELANGANA | 38 | Zahirabad | TELANGANA | 28 |
176830 | Malchelma(038 ) | SANGAREDDY(23) | TELANGANA | 38 | Zahirabad | TELANGANA | 28 |
176831 | Gousabad(021 ) | SANGAREDDY(23) | TELANGANA | 38 | Zahirabad | TELANGANA | 28 |
176832 | Hugelli(032 ) | SANGAREDDY(23) | TELANGANA | 38 | Zahirabad | TELANGANA | 28 |
176833 | Anegunta(034 ) | SANGAREDDY(23) | TELANGANA | 38 | Zahirabad | TELANGANA | 28 |
176834 | Mannapur(024 ) | SANGAREDDY(23) | TELANGANA | 38 | Zahirabad | TELANGANA | 28 |
176835 | Khanjamalpur(017) | SANGAREDDY(23) | TELANGANA | 36 | Andole | TELANGANA | 28 |
176836 | Auranganagar(020 ) | SANGAREDDY(23) | TELANGANA | 38 | Zahirabad | TELANGANA | 28 |
176837 | Didgi(008 ) | SANGAREDDY(23) | TELANGANA | 38 | Zahirabad | TELANGANA | 28 |
176838 | Asadgunj(014 ) | SANGAREDDY(23) | TELANGANA | 38 | Zahirabad | TELANGANA | 28 |
176839 | Chinna Hyderabad(027) | SANGAREDDY(23) | TELANGANA | 38 | Zahirabad | TELANGANA | 28 |
176840 | Madgi(001 ) | SANGAREDDY(23) | TELANGANA | 38 | Zahirabad | TELANGANA | 28 |
176841 | Kothur-b(007) | SANGAREDDY(23) | TELANGANA | 38 | Zahirabad | TELANGANA | 28 |
176842 | Dhanasiri(018 ) | SANGAREDDY(23) | TELANGANA | 38 | Zahirabad | TELANGANA | 28 |
176843 rows × 7 columns
# Google Geocoded villages
google_geocoded_file = os.path.join('google geocoding', 'output1.csv')
google_df = pd.read_csv(google_geocoded_file)
google_df_columns = list(google_df.columns)
google_df = google_df[google_df_columns[0:3] + google_df_columns[14:16]]
#Change Google Geocoded villages dataframe to geospatial and get AC data
geometry = [Point(xy) for xy in zip(google_df._longitude.apply(pd.to_numeric, errors='coerce'), google_df._latitude.apply(pd.to_numeric, errors='coerce'))]
crs = {'init': 'epsg:4326'}
google_geo_df = gpd.GeoDataFrame(google_df, crs=crs, geometry=geometry)
# Spatial Join
google_geo_df = gpd.sjoin(google_geo_df, ac_gdf, how='inner', op='within')
google_geo_df_columns = list(google_geo_df.columns)
google_geo_df = google_geo_df[google_geo_df_columns[0:3] + google_geo_df_columns[12:14] + google_geo_df_columns[8:10]]
google_geo_df.columns = ['village', 'district', 'state', 'ac code', 'ac name', 'state code', 'state name']
google_geo_df
C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\IPython\core\interactiveshell.py:2785: DtypeWarning: Columns (5,10,11,12,13,14,16,17,18,19) have mixed types. Specify dtype option on import or set low_memory=False. interactivity=interactivity, compiler=compiler, result=result) C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\numpy\lib\function_base.py:2831: RuntimeWarning: invalid value encountered in ? (vectorized) outputs = ufunc(*inputs)
village | district | state | ac code | ac name | state code | state name | |
---|---|---|---|---|---|---|---|
0 | Urwa | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
8788 | Kamalpur | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
14320 | Bheelampur Chhapra | AZAMGARH | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
17500 | Mahmadpur | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
19996 | Mahmadpur | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
20001 | Nigohan | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
32777 | Bahrampur | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
60896 | Baherawa | SIDDHARTH NAGAR | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
84657 | Makhadumpur Urf Munimabad | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
84658 | Manihar Sharki | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
84659 | Said Alipur | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
84660 | Mutawallipur Rana Sahab | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
84661 | Purey Nasiran | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
87583 | Khodaypur | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
87586 | Pakharauli | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
87598 | Gaffoorpur Urf Jalalabad | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
87599 | Barara Bujurg | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
87601 | Surasana | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
87614 | Kadha Chak Sagunpur | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
87637 | Surajoopur | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
88900 | Ambara Mathai | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
88901 | Bansi Rihayak | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
88902 | Chandpur Look Mu. | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
88903 | Tarapur Bansi | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
88904 | Rampur Gauri Mu. | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
88905 | Charuhar Ashanandpur | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
88907 | Payagpur Mu. | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
88909 | Tikar Agachipur | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
88910 | Daudpur Garhai | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
88912 | Jharaha | RAE BARELI | UTTAR PRADESH | 183 | Unchahar | 9 | UTTAR PRADESH |
... | ... | ... | ... | ... | ... | ... | ... |
234495 | Kalamboor | ERNAKULAM | KERALA | 94 | Kaduthuruthy | 32 | KERALA |
234507 | Kalamboor West | ERNAKULAM | KERALA | 94 | Kaduthuruthy | 32 | KERALA |
236443 | Marangattupally | KOTTAYAM | KERALA | 94 | Kaduthuruthy | 32 | KERALA |
236444 | Kummannoor | KOTTAYAM | KERALA | 94 | Kaduthuruthy | 32 | KERALA |
236445 | Kozha | KOTTAYAM | KERALA | 94 | Kaduthuruthy | 32 | KERALA |
236449 | Chempilavu | KOTTAYAM | KERALA | 94 | Kaduthuruthy | 32 | KERALA |
229722 | Narath | KANNUR | KERALA | 10 | Azhikode | 32 | KERALA |
230185 | Pappinissery | KANNUR | KERALA | 10 | Azhikode | 32 | KERALA |
230186 | Puzhathi | KANNUR | KERALA | 10 | Azhikode | 32 | KERALA |
230187 | Chirakkal | KANNUR | KERALA | 10 | Azhikode | 32 | KERALA |
230188 | Pallikunnu | KANNUR | KERALA | 10 | Azhikode | 32 | KERALA |
230189 | Valapattanm | KANNUR | KERALA | 10 | Azhikode | 32 | KERALA |
230191 | Asheekode | KANNUR | KERALA | 10 | Azhikode | 32 | KERALA |
230137 | Mukkaly | KOTTAYAM | KERALA | 54 | Mannarkkad | 32 | KERALA |
231340 | Mannarkkad 1 | PALAKKAD | KERALA | 54 | Mannarkkad | 32 | KERALA |
231341 | Mannarkkad 2 | PALAKKAD | KERALA | 54 | Mannarkkad | 32 | KERALA |
230621 | Malkapet(017 ) | RAJANNA SIRICILLA(21) | TELANGANA | 66 | Charminar | 28 | ANDHRA PRADESH |
231436 | Wadeshwar | PUNE | MAHARASHTRA | 209 | Shivajinagar | 27 | MAHARASHTRA |
231646 | Dhan Ch Para | KHOWAI | TRIPURA | 46 | SURMA (SC) | 16 | TRIPURA |
231649 | Kallapally Beloor(003 ) | SANGAREDDY(23) | TELANGANA | 119 | Kundapura | 29 | KARNATAKA |
232332 | Khali Mahuvar | UDHAM SINGH NAGAR | UTTARAKHAND | 174 | Jalalpore | 24 | GUJARAT |
234023 | Kumharpara | KONDAGAON | CHHATTISGARH | 50 | Raipur City North | 22 | CHHATTISGARH |
234537 | Chokli | KANNUR | KERALA | 13 | Thalassery | 32 | KERALA |
234542 | Kathirur | KANNUR | KERALA | 13 | Thalassery | 32 | KERALA |
236138 | New Mahi | KANNUR | KERALA | 13 | Thalassery | 32 | KERALA |
236140 | Eranholi | KANNUR | KERALA | 13 | Thalassery | 32 | KERALA |
236146 | Peringadi | KANNUR | KERALA | 13 | Thalassery | 32 | KERALA |
234540 | Panniyannur | KANNUR | KERALA | 29 | Mahe | 34 | PUDUCHERRY |
235385 | Dwarika | KANGPOKPI | MANIPUR | 82 | Dwarka | 24 | GUJARAT |
236481 | Govindapuram | KOTTAYAM | KERALA | 28 | Kozhikode South | 32 | KERALA |
235924 rows × 7 columns
#Compile all the geocodes
geocoded_df = pd.concat([csv_geocoded_df, google_geo_df])
geocoded_df = geocoded_df[['village', 'district', 'state', 'ac code', 'ac name', 'state code', 'state name']]
geocoded_df['village_y'] = geocoded_df['village'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
geocoded_df['district_y'] = geocoded_df['district'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
geocoded_df
C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\ipykernel_launcher.py:2: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version of pandas will change to not sort by default. To accept the future behavior, pass 'sort=False'. To retain the current behavior and silence the warning, pass 'sort=True'.
village | district | state | ac code | ac name | state code | state name | village_y | district_y | |
---|---|---|---|---|---|---|---|---|---|
0 | Pub Kathal Muri | BARPETA | ASSAM | 42 | Patacharkuchi | 18 | ASSAM | PUBKATHALMURI | BARPETA |
1 | Pub-rehabari | BARPETA | ASSAM | 42 | Patacharkuchi | 18 | ASSAM | PUBREHABARI | BARPETA |
2 | Bar Manikpur | BARPETA | ASSAM | 42 | Patacharkuchi | 18 | ASSAM | BARMANIKPUR | BARPETA |
3 | Chaibari | BARPETA | ASSAM | 42 | Patacharkuchi | 18 | ASSAM | CHAIBARI | BARPETA |
4 | Chemtia | BARPETA | ASSAM | 42 | Patacharkuchi | 18 | ASSAM | CHEMTIA | BARPETA |
5 | Batia Mari | BARPETA | ASSAM | 42 | Patacharkuchi | 18 | ASSAM | BATIAMARI | BARPETA |
6 | Baghmara | BARPETA | ASSAM | 42 | Patacharkuchi | 18 | ASSAM | BAGHMARA | BARPETA |
7 | Pachim Kathalmuri(kathalguri) | BARPETA | ASSAM | 42 | Patacharkuchi | 18 | ASSAM | PACHIMKATHALMURIKATHALGURI | BARPETA |
8 | Gergeria | BARPETA | ASSAM | 42 | Patacharkuchi | 18 | ASSAM | GERGERIA | BARPETA |
9 | Madhapur | BARPETA | ASSAM | 41 | Bhawanipur | 18 | ASSAM | MADHAPUR | BARPETA |
10 | Madha Pur | BARPETA | ASSAM | 41 | Bhawanipur | 18 | ASSAM | MADHAPUR | BARPETA |
11 | Kathalmuri Ghat | BARPETA | ASSAM | 41 | Bhawanipur | 18 | ASSAM | KATHALMURIGHAT | BARPETA |
12 | Dakshin Rehabari | BARPETA | ASSAM | 42 | Patacharkuchi | 18 | ASSAM | DAKSHINREHABARI | BARPETA |
13 | Hallang Bari | BARPETA | ASSAM | 41 | Bhawanipur | 18 | ASSAM | HALANGBARI | BARPETA |
14 | Barsidhani | BARPETA | ASSAM | 42 | Patacharkuchi | 18 | ASSAM | BARSIDHANI | BARPETA |
15 | Chengli Mari | BARPETA | ASSAM | 41 | Bhawanipur | 18 | ASSAM | CHENGLIMARI | BARPETA |
16 | Chungapota | BONGAIGAON | ASSAM | 32 | Bongaigaon | 18 | ASSAM | CHUNGAPOTA | BONGAIGAON |
17 | Mohina | NALBARI | ASSAM | 59 | Nalbari | 18 | ASSAM | MOHINA | NALBARI |
18 | Dhamdhama (dhemdhema) | NALBARI | ASSAM | 59 | Nalbari | 18 | ASSAM | DHAMDHAMADHEMDHEMA | NALBARI |
19 | Dipta | NALBARI | ASSAM | 62 | Barama | 18 | ASSAM | DIPTA | NALBARI |
20 | Borjhar | NALBARI | ASSAM | 62 | Barama | 18 | ASSAM | BORJHAR | NALBARI |
21 | Atoukhong | THOUBAL | MANIPUR | 30 | Lilong | 14 | MANIPUR | ATOUKHONG | THOUBAL |
22 | Thoudam | THOUBAL | MANIPUR | 30 | Lilong | 14 | MANIPUR | THOUDAM | THOUBAL |
23 | Laiphrakpam | THOUBAL | MANIPUR | 30 | Lilong | 14 | MANIPUR | LAIPHRAKPAM | THOUBAL |
24 | Irong Thokchom | THOUBAL | MANIPUR | 30 | Lilong | 14 | MANIPUR | IRONGTHOKCHOM | THOUBAL |
25 | Nungei | THOUBAL | MANIPUR | 30 | Lilong | 14 | MANIPUR | NUNGEI | THOUBAL |
26 | Oinam | THOUBAL | MANIPUR | 30 | Lilong | 14 | MANIPUR | OINAM | THOUBAL |
27 | Leisangthem | THOUBAL | MANIPUR | 30 | Lilong | 14 | MANIPUR | LEISANGTHEM | THOUBAL |
28 | Khekman | THOUBAL | MANIPUR | 30 | Lilong | 14 | MANIPUR | KHEKMAN | THOUBAL |
29 | Moijing | THOUBAL | MANIPUR | 30 | Lilong | 14 | MANIPUR | MOIJING | THOUBAL |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
234495 | Kalamboor | ERNAKULAM | KERALA | 94 | Kaduthuruthy | 32 | KERALA | KALAMBOR | ERNAKULAM |
234507 | Kalamboor West | ERNAKULAM | KERALA | 94 | Kaduthuruthy | 32 | KERALA | KALAMBORWEST | ERNAKULAM |
236443 | Marangattupally | KOTTAYAM | KERALA | 94 | Kaduthuruthy | 32 | KERALA | MARANGATUPALY | KOTAYAM |
236444 | Kummannoor | KOTTAYAM | KERALA | 94 | Kaduthuruthy | 32 | KERALA | KUMANOR | KOTAYAM |
236445 | Kozha | KOTTAYAM | KERALA | 94 | Kaduthuruthy | 32 | KERALA | KOZHA | KOTAYAM |
236449 | Chempilavu | KOTTAYAM | KERALA | 94 | Kaduthuruthy | 32 | KERALA | CHEMPILAVU | KOTAYAM |
229722 | Narath | KANNUR | KERALA | 10 | Azhikode | 32 | KERALA | NARATH | KANUR |
230185 | Pappinissery | KANNUR | KERALA | 10 | Azhikode | 32 | KERALA | PAPINISERY | KANUR |
230186 | Puzhathi | KANNUR | KERALA | 10 | Azhikode | 32 | KERALA | PUZHATHI | KANUR |
230187 | Chirakkal | KANNUR | KERALA | 10 | Azhikode | 32 | KERALA | CHIRAKAL | KANUR |
230188 | Pallikunnu | KANNUR | KERALA | 10 | Azhikode | 32 | KERALA | PALIKUNU | KANUR |
230189 | Valapattanm | KANNUR | KERALA | 10 | Azhikode | 32 | KERALA | VALAPATANM | KANUR |
230191 | Asheekode | KANNUR | KERALA | 10 | Azhikode | 32 | KERALA | ASHEKODE | KANUR |
230137 | Mukkaly | KOTTAYAM | KERALA | 54 | Mannarkkad | 32 | KERALA | MUKALY | KOTAYAM |
231340 | Mannarkkad 1 | PALAKKAD | KERALA | 54 | Mannarkkad | 32 | KERALA | MANARKAD | PALAKAD |
231341 | Mannarkkad 2 | PALAKKAD | KERALA | 54 | Mannarkkad | 32 | KERALA | MANARKAD | PALAKAD |
230621 | Malkapet(017 ) | RAJANNA SIRICILLA(21) | TELANGANA | 66 | Charminar | 28 | ANDHRA PRADESH | MALKAPET | RAJANASIRICILA |
231436 | Wadeshwar | PUNE | MAHARASHTRA | 209 | Shivajinagar | 27 | MAHARASHTRA | WADESHWAR | PUNE |
231646 | Dhan Ch Para | KHOWAI | TRIPURA | 46 | SURMA (SC) | 16 | TRIPURA | DHANCHPARA | KHOWAI |
231649 | Kallapally Beloor(003 ) | SANGAREDDY(23) | TELANGANA | 119 | Kundapura | 29 | KARNATAKA | KALAPALYBELOR | SANGAREDY |
232332 | Khali Mahuvar | UDHAM SINGH NAGAR | UTTARAKHAND | 174 | Jalalpore | 24 | GUJARAT | KHALIMAHUVAR | UDHAMSINGHNAGAR |
234023 | Kumharpara | KONDAGAON | CHHATTISGARH | 50 | Raipur City North | 22 | CHHATTISGARH | KUMHARPARA | KONDAGAON |
234537 | Chokli | KANNUR | KERALA | 13 | Thalassery | 32 | KERALA | CHOKLI | KANUR |
234542 | Kathirur | KANNUR | KERALA | 13 | Thalassery | 32 | KERALA | KATHIRUR | KANUR |
236138 | New Mahi | KANNUR | KERALA | 13 | Thalassery | 32 | KERALA | NEWMAHI | KANUR |
236140 | Eranholi | KANNUR | KERALA | 13 | Thalassery | 32 | KERALA | ERANHOLI | KANUR |
236146 | Peringadi | KANNUR | KERALA | 13 | Thalassery | 32 | KERALA | PERINGADI | KANUR |
234540 | Panniyannur | KANNUR | KERALA | 29 | Mahe | 34 | PUDUCHERRY | PANIYANUR | KANUR |
235385 | Dwarika | KANGPOKPI | MANIPUR | 82 | Dwarka | 24 | GUJARAT | DWARIKA | KANGPOKPI |
236481 | Govindapuram | KOTTAYAM | KERALA | 28 | Kozhikode South | 32 | KERALA | GOVINDAPURAM | KOTAYAM |
412767 rows × 9 columns
# Read Scrapped Data
downloaded_folder = os.path.join(data_folder, "downloaded")
downloaded_files = os.listdir(downloaded_folder)
print("data found for these years:")
i = 1
for file in downloaded_files:
print(i,") ",file)
file_path = os.path.join(downloaded_folder, file)
df = pd.read_csv(file_path, low_memory=False, error_bad_lines=False, encoding = "ISO-8859-1")
df = df[df['Village Name'] != '-999']
df['district_y'] = df['District Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
df['village_y'] = df['Village Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
df = pd.merge(df, geocoded_df, left_on=['village_y', 'district_y'], right_on = ['village_y', 'district_y'])
outfile = os.path.join(output_folder, 'geocoded_' + file)
df.to_csv(outfile, encoding='utf-8', index=False)
i+=1
data found for these years: 1 ) BASIC_HABITATION_INFORMATION_AS_ON_1_APR_09.csv
b'Skipping line 1651336: expected 16 fields, saw 17\n'
2 ) BASIC_HABITATION_INFORMATION_AS_ON_1_APR_10.csv 3 ) BASIC_HABITATION_INFORMATION_AS_ON_1_APR_11.csv
b'Skipping line 1647302: expected 16 fields, saw 17\n'
4 ) BASIC_HABITATION_INFORMATION_AS_ON_1_APR_12.csv
b'Skipping line 1644388: expected 16 fields, saw 17\n'