In [1]:
# Import Libraries
import pandas as pd
import geopandas as gpd
import os
import itertools
import jellyfish
In [2]:
# Import folders
data_folder = os.path.abspath('data')

# Set Output Folder
output_folder = os.path.abspath("output")
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
In [3]:
# Helping Functions
def remove_consecutive_duplicates(x):
    return ''.join(i for i, _ in itertools.groupby(x))
In [4]:
# Merge all files
files = os.listdir(os.path.join(data_folder, 'Scrapped'))
df_list = []
for file in files:
    filepath = os.path.join(data_folder, 'Scrapped', file)
    df = pd.read_csv(filepath)
    df_list.append(df)
# Merge Files
df = pd.concat(df_list)
df
C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\ipykernel_launcher.py:9: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  if __name__ == '__main__':
Out[4]:
Amount disbursed to bank accounts Amount disbursed to post office accounts Amount sanctioned (Rs. In Lakhs) Approved_Work_status_Amount sanctioned (Rs. In Lakhs) Approved_Work_status_Total works Block Name Completed_Work_status_Amount sanctioned (Rs. In Lakhs) Completed_Work_status_Total works District Name Labour exp. (disbursed + pending, Rs. In Lakhs) ... Total person-days worked by SCs Total person-days worked by STs Total person-days worked by women Total persons allotted work Total persons demanded work Total persons with disability Total persons worked Total post office accounts Total works financial_year
0 48381 1195 327 0.0 17.0 Campbell Bay 1.0 6.0 NICOBARS 1 ... 26 5 273 56 57 0 36 2 1039 2012
1 412497 0 12 0.0 12.0 Campbell Bay 0.0 0.0 NICOBARS 6 ... 0 0 1387 92 92 0 92 0 82 2012
2 21546 0 7 0.0 0.0 Campbell Bay 0.0 0.0 NICOBARS 2 ... 0 916 124 25 25 0 25 0 17 2012
3 288142 0 10 0.0 12.0 Campbell Bay 0.0 1.0 NICOBARS 6 ... 0 0 1508 123 123 1 122 0 61 2012
4 0 0 7 0.0 0.0 Nancowry 3.0 3.0 NICOBARS 3 ... 0 1659 176 61 61 0 61 0 27 2012
5 784143 0 20 0.0 14.0 Nancowry 10.0 5.0 NICOBARS 10 ... 0 5031 1180 151 151 0 148 0 67 2012
6 0 0 8 0.0 4.0 Nancowry 5.0 5.0 NICOBARS 5 ... 0 2577 714 92 92 1 88 0 43 2012
7 882819 0 14 0.0 3.0 Nancowry 12.0 8.0 NICOBARS 12 ... 0 6204 2127 190 190 1 184 0 59 2012
8 0 0 8 0.0 1.0 Nancowry 7.0 4.0 NICOBARS 6 ... 0 3816 520 108 108 0 105 0 25 2012
9 188940 0 202 0.0 10.0 Nicobar 15.0 9.0 NICOBARS 51 ... 0 23372 9779 792 792 1 749 0 332 2012
10 164828 0 30 0.0 0.0 Diglipur 23.0 5.0 NORTH AND MIDDLE ANDAMAN 15 ... 0 0 5590 280 280 0 254 0 38 2012
11 78142 0 42 0.0 0.0 Diglipur 28.0 3.0 NORTH AND MIDDLE ANDAMAN 20 ... 0 0 4157 402 402 0 344 0 38 2012
12 0 0 82 0.0 1.0 Diglipur 42.0 10.0 NORTH AND MIDDLE ANDAMAN 19 ... 0 0 5506 335 334 1 297 0 77 2012
13 0 0 40 0.0 0.0 Diglipur 17.0 2.0 NORTH AND MIDDLE ANDAMAN 17 ... 0 0 6169 348 346 1 296 0 36 2012
14 56426 182628 47 0.0 2.0 Diglipur 14.0 7.0 NORTH AND MIDDLE ANDAMAN 16 ... 0 0 4913 594 594 3 536 91 76 2012
15 0 0 31 0.0 2.0 Diglipur 24.0 6.0 NORTH AND MIDDLE ANDAMAN 21 ... 0 0 4185 351 349 10 309 0 58 2012
16 3916 0 46 0.0 2.0 Diglipur 37.0 5.0 NORTH AND MIDDLE ANDAMAN 35 ... 0 0 9954 646 636 0 552 0 48 2012
17 0 0 31 0.0 2.0 Diglipur 21.0 5.0 NORTH AND MIDDLE ANDAMAN 22 ... 0 0 4771 334 334 4 304 0 44 2012
18 0 0 47 0.0 0.0 Diglipur 23.0 1.0 NORTH AND MIDDLE ANDAMAN 17 ... 0 0 1808 260 255 0 242 0 10 2012
19 244394 0 92 0.0 2.0 Diglipur 37.0 9.0 NORTH AND MIDDLE ANDAMAN 37 ... 0 0 11853 742 739 0 685 0 83 2012
20 1075832 0 70 0.0 0.0 Diglipur 13.0 3.0 NORTH AND MIDDLE ANDAMAN 33 ... 0 0 11613 446 440 0 408 0 38 2012
21 0 0 41 0.0 3.0 Diglipur 32.0 5.0 NORTH AND MIDDLE ANDAMAN 19 ... 0 0 4063 316 316 2 298 0 46 2012
22 0 0 54 0.0 0.0 Diglipur 21.0 11.0 NORTH AND MIDDLE ANDAMAN 21 ... 0 0 9496 555 551 1 460 0 72 2012
23 150410 6230 45 0.0 1.0 Diglipur 29.0 4.0 NORTH AND MIDDLE ANDAMAN 27 ... 0 0 7532 378 374 3 354 4 51 2012
24 74048 0 15 0.0 1.0 Diglipur 10.0 3.0 NORTH AND MIDDLE ANDAMAN 7 ... 0 0 2003 226 222 0 173 0 25 2012
25 14240 0 4341917 0.0 7.0 Mayabunder 3864203.0 5.0 NORTH AND MIDDLE ANDAMAN 37 ... 0 0 8578 434 434 0 433 0 48 2012
26 1290144 0 7114028 0.0 3.0 Mayabunder 1878501.0 4.0 NORTH AND MIDDLE ANDAMAN 38 ... 0 0 7008 386 388 0 385 0 46 2012
27 16910 0 3679558 0.0 0.0 Mayabunder 1377881.0 6.0 NORTH AND MIDDLE ANDAMAN 16 ... 0 0 2731 150 150 0 150 0 47 2012
28 340336 0 7608985 0.0 10.0 Mayabunder 2869372.0 4.0 NORTH AND MIDDLE ANDAMAN 47 ... 0 0 11100 570 571 0 568 0 54 2012
29 532932 0 1340646 0.0 4.0 Mayabunder 856650.0 4.0 NORTH AND MIDDLE ANDAMAN 10 ... 0 0 9360 294 294 0 294 0 43 2012
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
106924 10334569 97888 848 NaN NaN RAGHUNATHPUR-II NaN NaN PURULIA 103 ... 7923 21956 24195 2805 2805 25 1727 1934 5816 2017
106925 8168598 2247196 438 NaN NaN RAGHUNATHPUR-II NaN NaN PURULIA 105 ... 37393 3441 23370 3982 3984 26 1982 2725 3217 2017
106926 3553754 0 360 NaN NaN SANTURI NaN NaN PURULIA 36 ... 16922 4369 11933 1340 1339 14 722 1739 2086 2017
106927 14342705 5521 1057 NaN NaN SANTURI NaN NaN PURULIA 147 ... 18995 40825 42956 3542 3542 38 2801 700 2500 2017
106928 8197670 0 444 NaN NaN SANTURI NaN NaN PURULIA 82 ... 10108 13057 15003 1592 1592 9 1313 281 3310 2017
106929 6753142 428040 566 NaN NaN SANTURI NaN NaN PURULIA 72 ... 30925 2127 27008 2550 2550 32 1945 1366 3613 2017
106930 5518460 0 457 NaN NaN SANTURI NaN NaN PURULIA 56 ... 7104 25503 17819 1979 1979 56 1396 487 2573 2017
106931 3540582 180484 385 NaN NaN SANTURI NaN NaN PURULIA 38 ... 9863 12513 11092 1528 1528 11 857 1598 2511 2017
106932 13625280 0 877 NaN NaN Khoribari NaN NaN SILIGURI MAHAKUMA PARISAD 137 ... 40347 13973 47066 1847 1847 19 1730 381 5717 2017
106933 17096940 0 1027 NaN NaN Khoribari NaN NaN SILIGURI MAHAKUMA PARISAD 170 ... 57306 25075 52795 2097 2097 0 1970 1802 3139 2017
106934 14412240 0 731 NaN NaN Khoribari NaN NaN SILIGURI MAHAKUMA PARISAD 144 ... 55862 8392 60938 1780 1780 25 1735 281 2534 2017
106935 16777256 0 884 NaN NaN Khoribari NaN NaN SILIGURI MAHAKUMA PARISAD 170 ... 72657 10651 54664 2178 2178 0 2081 1332 4067 2017
106936 7774024 0 468 NaN NaN Matigara NaN NaN SILIGURI MAHAKUMA PARISAD 78 ... 36469 1083 25972 1094 1094 1 1065 551 1412 2017
106937 11880180 0 669 NaN NaN Matigara NaN NaN SILIGURI MAHAKUMA PARISAD 121 ... 21585 22408 52214 853 853 2 831 297 965 2017
106938 8598620 0 571 NaN NaN Matigara NaN NaN SILIGURI MAHAKUMA PARISAD 86 ... 32764 1906 37590 770 770 0 668 359 935 2017
106939 12731752 0 667 NaN NaN Matigara NaN NaN SILIGURI MAHAKUMA PARISAD 127 ... 41995 126 57034 827 827 13 751 244 639 2017
106940 13612488 0 710 NaN NaN Matigara NaN NaN SILIGURI MAHAKUMA PARISAD 135 ... 32858 31088 41350 1423 1426 4 1325 298 1825 2017
106941 17647920 0 1435 NaN NaN Naxalbari NaN NaN SILIGURI MAHAKUMA PARISAD 175 ... 74050 3742 77052 1817 1817 12 1768 572 1687 2017
106942 20450660 0 847 NaN NaN Naxalbari NaN NaN SILIGURI MAHAKUMA PARISAD 205 ... 20202 65415 61640 2023 2023 0 2006 85 2492 2017
106943 9795060 0 519 NaN NaN Naxalbari NaN NaN SILIGURI MAHAKUMA PARISAD 99 ... 28897 19272 46761 675 675 1 666 22 1032 2017
106944 24039180 0 1708 NaN NaN Naxalbari NaN NaN SILIGURI MAHAKUMA PARISAD 244 ... 41961 28614 61380 2279 2280 14 2245 362 3034 2017
106945 17662140 0 1257 NaN NaN Naxalbari NaN NaN SILIGURI MAHAKUMA PARISAD 178 ... 43698 16947 63556 2277 2277 3 2209 0 1693 2017
106946 7904112 0 418 NaN NaN Naxalbari NaN NaN SILIGURI MAHAKUMA PARISAD 79 ... 3790 25626 40830 567 567 0 566 612 1049 2017
106947 12777216 0 1023 NaN NaN Phansidewa NaN NaN SILIGURI MAHAKUMA PARISAD 128 ... 12328 15593 20966 1804 1808 1 1517 947 2267 2017
106948 20092576 0 1654 NaN NaN Phansidewa NaN NaN SILIGURI MAHAKUMA PARISAD 198 ... 29500 49551 42795 2304 2304 11 2191 539 1523 2017
106949 16286338 8800 523 NaN NaN Phansidewa NaN NaN SILIGURI MAHAKUMA PARISAD 159 ... 6101 4368 23659 2363 2363 0 2198 774 4539 2017
106950 15877824 19360 1229 NaN NaN Phansidewa NaN NaN SILIGURI MAHAKUMA PARISAD 155 ... 27076 37795 33354 2187 2187 4 2019 612 1719 2017
106951 20107700 0 1048 NaN NaN Phansidewa NaN NaN SILIGURI MAHAKUMA PARISAD 201 ... 11479 69106 49520 2401 2402 6 2182 702 1260 2017
106952 16102484 0 1350 NaN NaN Phansidewa NaN NaN SILIGURI MAHAKUMA PARISAD 161 ... 48751 5241 52190 2336 2359 0 2102 920 2074 2017
106953 21957688 0 733 NaN NaN Phansidewa NaN NaN SILIGURI MAHAKUMA PARISAD 216 ... 46217 7608 43426 2689 2689 19 2494 866 1424 2017

981716 rows × 55 columns

In [5]:
# Get unique records
togeocode_df = df[['District Name', 'Block Name', 'Panchayat Name']].drop_duplicates(subset=None, keep="first", inplace=False).reset_index(drop=True)
togeocode_df['district_x'] = togeocode_df['District Name'].str.upper().replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
togeocode_df['block_x'] = togeocode_df['Block Name'].str.upper().replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
togeocode_df['panchayat_x'] = togeocode_df['Panchayat Name'].str.upper().replace('PANCHAYAT', '').str.replace('GP', '').replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
togeocode_df['id'] = togeocode_df.index+1000000
togeocode_df
Out[5]:
District Name Block Name Panchayat Name district_x block_x panchayat_x id
0 NICOBARS Campbell Bay CAMPBELL BAY NICOBARS CAMPBEL BAY CAMPBEL BAY 1000000
1 NICOBARS Campbell Bay GOVINDNAGAR NICOBARS CAMPBEL BAY GOVINDNAGAR 1000001
2 NICOBARS Campbell Bay Great & Little Nicobar NICOBARS CAMPBEL BAY GREAT & LITLE NICOBAR 1000002
3 NICOBARS Campbell Bay LAXMI NAGAR NICOBARS CAMPBEL BAY LAXMI NAGAR 1000003
4 NICOBARS Nancowry CHOWRA TC NICOBARS NANCOWRY CHOWRA TC 1000004
5 NICOBARS Nancowry KAMORTA TC NICOBARS NANCOWRY KAMORTA TC 1000005
6 NICOBARS Nancowry KATCHAL TC NICOBARS NANCOWRY KATCHAL TC 1000006
7 NICOBARS Nancowry NANCOWRY TC NICOBARS NANCOWRY NANCOWRY TC 1000007
8 NICOBARS Nancowry TERESSA TC NICOBARS NANCOWRY TERESA TC 1000008
9 NICOBARS Nicobar TRIBAL COUNCIL NICOBARS NICOBAR TRIBAL COUNCIL 1000009
10 NORTH AND MIDDLE ANDAMAN Diglipur Diglipur NORTH AND MIDLE ANDAMAN DIGLIPUR DIGLIPUR 1000010
11 NORTH AND MIDDLE ANDAMAN Diglipur Gandhinagar NORTH AND MIDLE ANDAMAN DIGLIPUR GANDHINAGAR 1000011
12 NORTH AND MIDDLE ANDAMAN Diglipur Kalighat NORTH AND MIDLE ANDAMAN DIGLIPUR KALIGHAT 1000012
13 NORTH AND MIDDLE ANDAMAN Diglipur Keralapuram NORTH AND MIDLE ANDAMAN DIGLIPUR KERALAPURAM 1000013
14 NORTH AND MIDDLE ANDAMAN Diglipur Kishori Nagar NORTH AND MIDLE ANDAMAN DIGLIPUR KISHORI NAGAR 1000014
15 NORTH AND MIDDLE ANDAMAN Diglipur Laxmipur NORTH AND MIDLE ANDAMAN DIGLIPUR LAXMIPUR 1000015
16 NORTH AND MIDDLE ANDAMAN Diglipur Madhupur NORTH AND MIDLE ANDAMAN DIGLIPUR MADHUPUR 1000016
17 NORTH AND MIDDLE ANDAMAN Diglipur Nabagram NORTH AND MIDLE ANDAMAN DIGLIPUR NABAGRAM 1000017
18 NORTH AND MIDDLE ANDAMAN Diglipur Paschimsagar NORTH AND MIDLE ANDAMAN DIGLIPUR PASCHIMSAGAR 1000018
19 NORTH AND MIDDLE ANDAMAN Diglipur Radha Nagar NORTH AND MIDLE ANDAMAN DIGLIPUR RADHA NAGAR 1000019
20 NORTH AND MIDDLE ANDAMAN Diglipur Rama Krishnagram NORTH AND MIDLE ANDAMAN DIGLIPUR RAMA KRISHNAGRAM 1000020
21 NORTH AND MIDDLE ANDAMAN Diglipur Ramnagar NORTH AND MIDLE ANDAMAN DIGLIPUR RAMNAGAR 1000021
22 NORTH AND MIDDLE ANDAMAN Diglipur Shibpur NORTH AND MIDLE ANDAMAN DIGLIPUR SHIBPUR 1000022
23 NORTH AND MIDDLE ANDAMAN Diglipur Sitanagar NORTH AND MIDLE ANDAMAN DIGLIPUR SITANAGAR 1000023
24 NORTH AND MIDDLE ANDAMAN Diglipur Subashgram NORTH AND MIDLE ANDAMAN DIGLIPUR SUBASHGRAM 1000024
25 NORTH AND MIDDLE ANDAMAN Mayabunder Basantipur NORTH AND MIDLE ANDAMAN MAYABUNDER BASANTIPUR 1000025
26 NORTH AND MIDDLE ANDAMAN Mayabunder Chainpur NORTH AND MIDLE ANDAMAN MAYABUNDER CHAINPUR 1000026
27 NORTH AND MIDDLE ANDAMAN Mayabunder De-Reserve Area NORTH AND MIDLE ANDAMAN MAYABUNDER DE-RESERVE AREA 1000027
28 NORTH AND MIDDLE ANDAMAN Mayabunder Harinagar NORTH AND MIDLE ANDAMAN MAYABUNDER HARINAGAR 1000028
29 NORTH AND MIDDLE ANDAMAN Mayabunder Mayabunder NORTH AND MIDLE ANDAMAN MAYABUNDER MAYABUNDER 1000029
... ... ... ... ... ... ... ...
261011 JHARGRAM JHARGRAM LODHASULI JHARGRAM JHARGRAM LODHASULI 1261011
261012 JHARGRAM JHARGRAM MANIKPARA JHARGRAM JHARGRAM MANIKPARA 1261012
261013 JHARGRAM JHARGRAM NEDABAHARA JHARGRAM JHARGRAM NEDABAHARA 1261013
261014 JHARGRAM JHARGRAM PATASHIMUL JHARGRAM JHARGRAM PATASHIMUL 1261014
261015 JHARGRAM JHARGRAM RADHANAGAR JHARGRAM JHARGRAM RADHANAGAR 1261015
261016 JHARGRAM JHARGRAM SAPDHARA JHARGRAM JHARGRAM SAPDHARA 1261016
261017 JHARGRAM JHARGRAM SARDIHA JHARGRAM JHARGRAM SARDIHA 1261017
261018 JHARGRAM JHARGRAM SHALBONI JHARGRAM JHARGRAM SHALBONI 1261018
261019 JHARGRAM NAYAGRAM ARA JHARGRAM NAYAGRAM ARA 1261019
261020 JHARGRAM NAYAGRAM BALIGERIA JHARGRAM NAYAGRAM BALIGERIA 1261020
261021 JHARGRAM NAYAGRAM BARAKHAKRI JHARGRAM NAYAGRAM BARAKHAKRI 1261021
261022 JHARGRAM NAYAGRAM BARANEGUI JHARGRAM NAYAGRAM BARANEGUI 1261022
261023 JHARGRAM NAYAGRAM BERAJAL JHARGRAM NAYAGRAM BERAJAL 1261023
261024 JHARGRAM NAYAGRAM CHANDABILLA JHARGRAM NAYAGRAM CHANDABILA 1261024
261025 JHARGRAM NAYAGRAM CHANDRAREKHA JHARGRAM NAYAGRAM CHANDRAREKHA 1261025
261026 JHARGRAM NAYAGRAM JAMIRAPAL JHARGRAM NAYAGRAM JAMIRAPAL 1261026
261027 JHARGRAM NAYAGRAM KHARIKAMATHANI JHARGRAM NAYAGRAM KHARIKAMATHANI 1261027
261028 JHARGRAM NAYAGRAM MALAM JHARGRAM NAYAGRAM MALAM 1261028
261029 JHARGRAM NAYAGRAM NAYAGRAM JHARGRAM NAYAGRAM NAYAGRAM 1261029
261030 JHARGRAM NAYAGRAM PATINA JHARGRAM NAYAGRAM PATINA 1261030
261031 JHARGRAM SANKRAIL ANDHARI JHARGRAM SANKRAIL ANDHARI 1261031
261032 JHARGRAM SANKRAIL CHHATRI JHARGRAM SANKRAIL CHATRI 1261032
261033 JHARGRAM SANKRAIL DHANGHORI JHARGRAM SANKRAIL DHANGHORI 1261033
261034 JHARGRAM SANKRAIL KHUDMORAI JHARGRAM SANKRAIL KHUDMORAI 1261034
261035 JHARGRAM SANKRAIL KULTIKRI JHARGRAM SANKRAIL KULTIKRI 1261035
261036 JHARGRAM SANKRAIL LAUDAHA JHARGRAM SANKRAIL LAUDAHA 1261036
261037 JHARGRAM SANKRAIL PATHRA JHARGRAM SANKRAIL PATHRA 1261037
261038 JHARGRAM SANKRAIL RAGRAH JHARGRAM SANKRAIL RAGRAH 1261038
261039 JHARGRAM SANKRAIL ROHINI JHARGRAM SANKRAIL ROHINI 1261039
261040 JHARGRAM SANKRAIL SANKRAIL JHARGRAM SANKRAIL SANKRAIL 1261040

261041 rows × 7 columns

In [6]:
# Geocode using grampanghayat file
gp_point_shp = os.path.join(data_folder, 'shapefiles', 'gp-point-shp.shp')
gp_point_df = gpd.read_file(gp_point_shp)
gp_point_df = gp_point_df[gp_point_df['dtname'].notnull()]
gp_point_df = gp_point_df[gp_point_df['gp_name'].notnull()]
gp_point_df['district_y'] = gp_point_df['dtname'].str.upper().replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
gp_point_df['block_y'] = gp_point_df['block_name'].str.upper().replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
gp_point_df['panchayat_y'] = gp_point_df['gp_name'].str.upper().replace('PANCHAYAT', '').str.replace('GP', '').replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
gp_point_df['lat'] = gp_point_df.geometry.y
gp_point_df['lon'] = gp_point_df.geometry.x
gp_geocoded_df = pd.merge(togeocode_df, gp_point_df, how='inner', left_on=['district_x', 'block_x', 'panchayat_x'], right_on = ['district_y', 'block_y', 'panchayat_y'])
gp_geocoded_df = gp_geocoded_df[['District Name', 'Block Name', 'Panchayat Name', 'lat', 'lon', 'id']].drop_duplicates(subset='id', keep="first", inplace=False).reset_index(drop=True)
gp_geocoded_df
Out[6]:
District Name Block Name Panchayat Name lat lon id
0 ANANTAPUR Agali HULIKERADEVARAHALLI 13.768624 77.009729 1000080
1 ANANTAPUR Agali MADHUDI 13.810206 77.025746 1000084
2 ANANTAPUR Agali RAVUDI 13.841146 77.019506 1000088
3 ANANTAPUR Amadagur CHEEKIREVULAPALLE 13.874734 78.041962 1000090
4 ANANTAPUR Amadagur KASSAMUDRAM 13.933423 78.014464 1000094
5 ANANTAPUR Amadagur MOHAMMADABAD 13.967322 78.010266 1000095
6 ANANTAPUR Amadagur THUMMALA 13.929267 78.064326 1000097
7 ANANTAPUR Amarapuram HALUKURU 14.090250 77.001711 1000102
8 ANANTAPUR Amarapuram HEMAVATHI 14.033064 76.986075 1000103
9 ANANTAPUR Amarapuram NIDRAGATTA 14.037968 76.954373 1000105
10 ANANTAPUR Amarapuram SIVARAM 14.032437 77.023079 1000106
11 ANANTAPUR Anantapur KATIGANIKALVA 14.612996 77.554218 1000121
12 ANANTAPUR Anantapur KURUGUNTA 14.674663 77.519245 1000124
13 ANANTAPUR Anantapur MANNILA 14.556448 77.686479 1000125
14 ANANTAPUR Anantapur THATICHERLA 14.752598 77.571765 1000132
15 ANANTAPUR Atmakur MADIGUBBA 14.638847 77.453057 1000137
16 ANANTAPUR Atmakur THOPUDURTHI 14.552931 77.492462 1000147
17 ANANTAPUR Bathalapalle APPARACHERUVU 14.479485 77.798019 1000149
18 ANANTAPUR Bathalapalle BATHALAPALLE 14.504290 77.765405 1000150
19 ANANTAPUR Bathalapalle MALYAVANTHAM 14.569889 77.768676 1000154
20 ANANTAPUR Bathalapalle OBULAPURAM 14.458996 77.843963 1000157
21 ANANTAPUR Beluguppa AVULENNA 14.683234 77.153240 1000162
22 ANANTAPUR Beluguppa BELUGUPPA 14.712986 77.136826 1000163
23 ANANTAPUR Beluguppa DUDDEKUNTA 14.680413 77.220876 1000164
24 ANANTAPUR Beluguppa GANGAVARAM 14.640860 77.207144 1000165
25 ANANTAPUR Beluguppa KALVAPALLE 14.625519 77.267957 1000167
26 ANANTAPUR Beluguppa KONAMPALLE 14.669387 77.262099 1000168
27 ANANTAPUR Beluguppa THAGGUPARTHY 14.725207 77.198641 1000176
28 ANANTAPUR Beluguppa YERRAGUDI 14.702239 77.176537 1000178
29 ANANTAPUR Bommanahal BANDUR 14.949310 77.008596 1000179
... ... ... ... ... ... ...
47874 RAE BARELI DALMAU TERUKHA 26.154272 81.105019 1259028
47875 RAE BARELI DALMAU TIKRAN 26.215372 81.008023 1259029
47876 RAE BARELI SHIVGARH ACHAI 26.515666 81.345122 1259033
47877 RAE BARELI SHIVGARH BAHUDA KHURD 26.589912 81.176757 1259036
47878 RAE BARELI SHIVGARH BAITI 26.568444 81.234854 1259037
47879 RAE BARELI SHIVGARH BEDARU 26.568102 81.187406 1259041
47880 RAE BARELI SHIVGARH BHAUSI 26.496343 81.208293 1259042
47881 RAE BARELI SHIVGARH CHITWANIYA 26.559720 81.217836 1259044
47882 RAE BARELI SHIVGARH GOVINDPUR 26.477405 81.218719 1259050
47883 RAE BARELI SHIVGARH KASNA 26.524071 81.281588 1259054
47884 RAE BARELI SHIVGARH KUMBHI 26.571530 81.261130 1259057
47885 RAE BARELI SHIVGARH KUMHARAWAN 26.548436 81.274146 1259058
47886 RAE BARELI SHIVGARH OSAH 26.473133 81.238772 1259061
47887 RAE BARELI SHIVGARH PADARIYA 26.505045 81.280267 1259062
47888 RAE BARELI SHIVGARH RAIPUR 26.513434 81.357935 1259065
47889 RAE BARELI SHIVGARH RAJAPUR 26.517628 81.274542 1259067
47890 RAE BARELI SHIVGARH SURAJPUR 26.591872 81.221682 1259075
47891 PITHORAGARH GANGOLIHAT JARTOLA 29.534581 80.093678 1259118
47892 AMRITSAR HARSHA CHHINA NEPAL 31.833863 74.696878 1259139
47893 GORAKHPUR BANSGAON GOCHHARAN 26.562101 83.354279 1260511
47894 HATHRAS MURSAN MOHANPUR 27.568727 78.003088 1260513
47895 HATHRAS MURSAN NAGLA NANDU 27.589662 77.993367 1260515
47896 JAUNPUR BAKSHA KHUNSHAPUR 25.805381 82.483548 1260517
47897 JAUNPUR BAKSHA MARGUPUR 25.807982 82.444158 1260519
47898 JAUNPUR BAKSHA NARAYANPUR 25.856477 82.450771 1260520
47899 SITAPUR GONDLAMAU BAROY 27.301445 80.732414 1260529
47900 SITAPUR GONDLAMAU KABIRPUR 27.254446 80.674254 1260533
47901 SITAPUR GONDLAMAU KAMLAPUR 27.284050 80.723617 1260534
47902 SITAPUR GONDLAMAU KODIKAPUR 27.304422 80.623020 1260536
47903 SITAPUR GONDLAMAU MEERAPUR 27.334557 80.552864 1260540

47904 rows × 6 columns

In [7]:
# Filter non geocoded records
non_geocoded_df = togeocode_df[~togeocode_df['id'].isin(gp_geocoded_df.id)]
In [8]:
# Geocode using Census Village file
census_village_shp = os.path.join(data_folder, 'shapefiles', 'Census Villages.shp')
cenvil_df = gpd.read_file(census_village_shp)
cenvil_df = cenvil_df[cenvil_df['dtname'].notnull()]
cenvil_df = cenvil_df[cenvil_df['sdtname'].notnull()]
cenvil_df = cenvil_df[cenvil_df['vilname'].notnull()]
cenvil_df['district_y'] = cenvil_df['dtname'].str.upper().replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
cenvil_df['block_y'] = cenvil_df['sdtname'].str.upper().replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
cenvil_df['village_y'] = cenvil_df['vilname'].str.upper().replace('PANCHAYAT', '').str.replace('GP', '').replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
cenvil_df['lat'] = cenvil_df.geometry.y
cenvil_df['lon'] = cenvil_df.geometry.x
cenvil_geocoded_df = pd.merge(non_geocoded_df, cenvil_df, how='inner', left_on=['district_x', 'block_x', 'panchayat_x'], right_on = ['district_y', 'block_y', 'village_y'])
cenvil_geocoded_df = cenvil_geocoded_df[['District Name', 'Block Name', 'Panchayat Name', 'lat', 'lon', 'id']].drop_duplicates(subset='id', keep="first", inplace=False).reset_index(drop=True)
cenvil_geocoded_df
Out[8]:
District Name Block Name Panchayat Name lat lon id
0 ANANTAPUR Agali AGALI 13.784665 77.054368 1000078
1 ANANTAPUR Agali INAGALORE 13.835137 77.042107 1000081
2 ANANTAPUR Agali KODIHALLI 13.781844 77.072040 1000083
3 ANANTAPUR Agali NARASAMBUDI 13.807735 77.044694 1000085
4 ANANTAPUR Amadagur AMADAGUR 13.888526 78.021691 1000089
5 ANANTAPUR Amarapuram AMARAPURAM 14.131109 76.976506 1000100
6 ANANTAPUR Amarapuram BASAVANAHALLI 14.024649 77.062410 1000101
7 ANANTAPUR Amarapuram VALASA 14.164192 76.946107 1000108
8 ANANTAPUR Anantapur ALAMURU 14.647386 77.535765 1000112
9 ANANTAPUR Anantapur CHIYYEDU 14.605295 77.706863 1000115
10 ANANTAPUR Anantapur ITIKALAPALLE 14.601605 77.657618 1000116
11 ANANTAPUR Anantapur KODIMI 14.709472 77.548963 1000123
12 ANANTAPUR Anantapur RACHANAPALLE 14.701827 77.551172 1000128
13 ANANTAPUR Atmakur ATMAKUR 14.647519 77.357418 1000134
14 ANANTAPUR Atmakur GORIDINDLA 14.573467 77.376384 1000136
15 ANANTAPUR Atmakur SANAPA 14.606638 77.469307 1000144
16 ANANTAPUR Bathalapalle D.CHERLOPALLE 14.441108 77.874763 1000151
17 ANANTAPUR Bathalapalle DAMPETLA 14.463522 77.889151 1000152
18 ANANTAPUR Bathalapalle SANGALA 14.448167 77.818805 1000158
19 ANANTAPUR Beluguppa ANKAMPALLE 14.703058 77.237416 1000161
20 ANANTAPUR Beluguppa NARASAPURAM 14.652366 77.065626 1000171
21 ANANTAPUR Beluguppa SEERPI 14.666179 77.156520 1000174
22 ANANTAPUR Bommanahal BOLLANAGUDAM 14.995705 77.049252 1000180
23 ANANTAPUR Bommanahal KOLAGANAHALLI 14.917239 76.948635 1000188
24 ANANTAPUR Brahmasamudram BHAIRASAMUDRAM 14.567910 76.944043 1000198
25 ANANTAPUR Brahmasamudram PILLALAPALLE 14.570944 77.012727 1000208
26 ANANTAPUR Brahmasamudram THEETAKAL 14.639004 77.021317 1000211
27 ANANTAPUR Bukkapatnam KOTHAKOTA 14.240095 77.940510 1000218
28 ANANTAPUR Bukkapatnam MARALA 14.306887 77.928113 1000220
29 ANANTAPUR Bukkapatnam SIDDARAMPURAM 14.275607 77.884052 1000223
... ... ... ... ... ... ...
22518 AMETHI TILOI RAJANPUR 26.408847 81.499132 1260503
22519 AMETHI TILOI RAJAPUR HALIM 26.435741 81.413751 1260504
22520 AMETHI TILOI SANGRAMPUR 26.327451 81.465089 1260505
22521 AMETHI TILOI SAVITAPUR 26.317198 81.450202 1260506
22522 AMETHI TILOI SIJNI 26.339191 81.372623 1260507
22523 ALIPURDUAR FALAKATA DALGAON 26.671157 89.164596 1260567
22524 ALIPURDUAR FALAKATA MAIRADANGA 26.577557 89.233014 1260576
22525 ALIPURDUAR KALCHINI CHUAPARA 26.732861 89.422926 1260579
22526 ALIPURDUAR KUMARGRAM CHENGMARI 26.581170 89.821368 1260590
22527 ALIPURDUAR KUMARGRAM KUMARGRAM 26.612292 89.829210 1260595
22528 EAST DISTRICT PAKYONG TAZA 27.209626 88.618207 1260703
22529 JHARGRAM JHARGRAM CHANDRI 22.326953 86.921443 1261008
22530 JHARGRAM JHARGRAM CHUBKA 22.396438 87.193081 1261009
22531 JHARGRAM JHARGRAM MANIKPARA 22.369905 87.125101 1261012
22532 JHARGRAM JHARGRAM PATASHIMUL 22.285662 86.980339 1261014
22533 JHARGRAM JHARGRAM RADHANAGAR 22.477238 87.010400 1261015
22534 JHARGRAM JHARGRAM SAPDHARA 22.377526 86.926356 1261016
22535 JHARGRAM JHARGRAM SARDIHA 22.375892 87.153331 1261017
22536 JHARGRAM NAYAGRAM ARA 21.900784 87.050799 1261019
22537 JHARGRAM NAYAGRAM CHANDABILLA 22.086912 87.017334 1261024
22538 JHARGRAM NAYAGRAM MALAM 22.066601 87.152645 1261028
22539 JHARGRAM NAYAGRAM NAYAGRAM 22.035067 87.170341 1261029
22540 JHARGRAM NAYAGRAM PATINA 22.162124 87.006604 1261030
22541 JHARGRAM SANKRAIL ANDHARI 22.154164 87.092958 1261031
22542 JHARGRAM SANKRAIL CHHATRI 22.237054 87.089601 1261032
22543 JHARGRAM SANKRAIL KULTIKRI 22.175490 87.165471 1261035
22544 JHARGRAM SANKRAIL LAUDAHA 22.143824 87.123349 1261036
22545 JHARGRAM SANKRAIL PATHRA 22.260671 87.159645 1261037
22546 JHARGRAM SANKRAIL ROHINI 22.165943 87.084149 1261039
22547 JHARGRAM SANKRAIL SANKRAIL 22.196941 87.138044 1261040

22548 rows × 6 columns

In [9]:
# Filter non geocoded records
non_geocoded_df = non_geocoded_df[~non_geocoded_df['id'].isin(cenvil_geocoded_df.id)]
non_geocoded_df
Out[9]:
District Name Block Name Panchayat Name district_x block_x panchayat_x id
0 NICOBARS Campbell Bay CAMPBELL BAY NICOBARS CAMPBEL BAY CAMPBEL BAY 1000000
1 NICOBARS Campbell Bay GOVINDNAGAR NICOBARS CAMPBEL BAY GOVINDNAGAR 1000001
2 NICOBARS Campbell Bay Great & Little Nicobar NICOBARS CAMPBEL BAY GREAT & LITLE NICOBAR 1000002
3 NICOBARS Campbell Bay LAXMI NAGAR NICOBARS CAMPBEL BAY LAXMI NAGAR 1000003
4 NICOBARS Nancowry CHOWRA TC NICOBARS NANCOWRY CHOWRA TC 1000004
5 NICOBARS Nancowry KAMORTA TC NICOBARS NANCOWRY KAMORTA TC 1000005
6 NICOBARS Nancowry KATCHAL TC NICOBARS NANCOWRY KATCHAL TC 1000006
7 NICOBARS Nancowry NANCOWRY TC NICOBARS NANCOWRY NANCOWRY TC 1000007
8 NICOBARS Nancowry TERESSA TC NICOBARS NANCOWRY TERESA TC 1000008
9 NICOBARS Nicobar TRIBAL COUNCIL NICOBARS NICOBAR TRIBAL COUNCIL 1000009
10 NORTH AND MIDDLE ANDAMAN Diglipur Diglipur NORTH AND MIDLE ANDAMAN DIGLIPUR DIGLIPUR 1000010
11 NORTH AND MIDDLE ANDAMAN Diglipur Gandhinagar NORTH AND MIDLE ANDAMAN DIGLIPUR GANDHINAGAR 1000011
12 NORTH AND MIDDLE ANDAMAN Diglipur Kalighat NORTH AND MIDLE ANDAMAN DIGLIPUR KALIGHAT 1000012
13 NORTH AND MIDDLE ANDAMAN Diglipur Keralapuram NORTH AND MIDLE ANDAMAN DIGLIPUR KERALAPURAM 1000013
14 NORTH AND MIDDLE ANDAMAN Diglipur Kishori Nagar NORTH AND MIDLE ANDAMAN DIGLIPUR KISHORI NAGAR 1000014
15 NORTH AND MIDDLE ANDAMAN Diglipur Laxmipur NORTH AND MIDLE ANDAMAN DIGLIPUR LAXMIPUR 1000015
16 NORTH AND MIDDLE ANDAMAN Diglipur Madhupur NORTH AND MIDLE ANDAMAN DIGLIPUR MADHUPUR 1000016
17 NORTH AND MIDDLE ANDAMAN Diglipur Nabagram NORTH AND MIDLE ANDAMAN DIGLIPUR NABAGRAM 1000017
18 NORTH AND MIDDLE ANDAMAN Diglipur Paschimsagar NORTH AND MIDLE ANDAMAN DIGLIPUR PASCHIMSAGAR 1000018
19 NORTH AND MIDDLE ANDAMAN Diglipur Radha Nagar NORTH AND MIDLE ANDAMAN DIGLIPUR RADHA NAGAR 1000019
20 NORTH AND MIDDLE ANDAMAN Diglipur Rama Krishnagram NORTH AND MIDLE ANDAMAN DIGLIPUR RAMA KRISHNAGRAM 1000020
21 NORTH AND MIDDLE ANDAMAN Diglipur Ramnagar NORTH AND MIDLE ANDAMAN DIGLIPUR RAMNAGAR 1000021
22 NORTH AND MIDDLE ANDAMAN Diglipur Shibpur NORTH AND MIDLE ANDAMAN DIGLIPUR SHIBPUR 1000022
23 NORTH AND MIDDLE ANDAMAN Diglipur Sitanagar NORTH AND MIDLE ANDAMAN DIGLIPUR SITANAGAR 1000023
24 NORTH AND MIDDLE ANDAMAN Diglipur Subashgram NORTH AND MIDLE ANDAMAN DIGLIPUR SUBASHGRAM 1000024
25 NORTH AND MIDDLE ANDAMAN Mayabunder Basantipur NORTH AND MIDLE ANDAMAN MAYABUNDER BASANTIPUR 1000025
26 NORTH AND MIDDLE ANDAMAN Mayabunder Chainpur NORTH AND MIDLE ANDAMAN MAYABUNDER CHAINPUR 1000026
27 NORTH AND MIDDLE ANDAMAN Mayabunder De-Reserve Area NORTH AND MIDLE ANDAMAN MAYABUNDER DE-RESERVE AREA 1000027
28 NORTH AND MIDDLE ANDAMAN Mayabunder Harinagar NORTH AND MIDLE ANDAMAN MAYABUNDER HARINAGAR 1000028
29 NORTH AND MIDDLE ANDAMAN Mayabunder Mayabunder NORTH AND MIDLE ANDAMAN MAYABUNDER MAYABUNDER 1000029
... ... ... ... ... ... ... ...
260992 JHARGRAM GOPIBALLAVPUR-I KENDUGARI JHARGRAM GOPIBALAVPUR-I KENDUGARI 1260992
260993 JHARGRAM GOPIBALLAVPUR-I SARIA JHARGRAM GOPIBALAVPUR-I SARIA 1260993
260994 JHARGRAM GOPIBALLAVPUR-I SATMA JHARGRAM GOPIBALAVPUR-I SATMA 1260994
260995 JHARGRAM GOPIBALLAVPUR-I SHASHRHA JHARGRAM GOPIBALAVPUR-I SHASHRHA 1260995
260996 JHARGRAM JAMBANI CHILKIGARH JHARGRAM JAMBANI CHILKIGARH 1260996
260997 JHARGRAM JAMBANI CHINCHRA JHARGRAM JAMBANI CHINCHRA 1260997
260998 JHARGRAM JAMBANI DHARSA JHARGRAM JAMBANI DHARSA 1260998
260999 JHARGRAM JAMBANI DUBRA JHARGRAM JAMBANI DUBRA 1260999
261000 JHARGRAM JAMBANI GIDHNI JHARGRAM JAMBANI GIDHNI 1261000
261001 JHARGRAM JAMBANI JAMBONI JHARGRAM JAMBANI JAMBONI 1261001
261002 JHARGRAM JAMBANI KAPGARI JHARGRAM JAMBANI KAPGARI 1261002
261003 JHARGRAM JAMBANI KENDADANGRI JHARGRAM JAMBANI KENDADANGRI 1261003
261004 JHARGRAM JAMBANI LALBANDH JHARGRAM JAMBANI LALBANDH 1261004
261005 JHARGRAM JAMBANI PARIHATI JHARGRAM JAMBANI PARIHATI 1261005
261006 JHARGRAM JHARGRAM AGUIBONI JHARGRAM JHARGRAM AGUIBONI 1261006
261007 JHARGRAM JHARGRAM BANDHGORA JHARGRAM JHARGRAM BANDHGORA 1261007
261010 JHARGRAM JHARGRAM DUDHKUNDI JHARGRAM JHARGRAM DUDHKUNDI 1261010
261011 JHARGRAM JHARGRAM LODHASULI JHARGRAM JHARGRAM LODHASULI 1261011
261013 JHARGRAM JHARGRAM NEDABAHARA JHARGRAM JHARGRAM NEDABAHARA 1261013
261018 JHARGRAM JHARGRAM SHALBONI JHARGRAM JHARGRAM SHALBONI 1261018
261020 JHARGRAM NAYAGRAM BALIGERIA JHARGRAM NAYAGRAM BALIGERIA 1261020
261021 JHARGRAM NAYAGRAM BARAKHAKRI JHARGRAM NAYAGRAM BARAKHAKRI 1261021
261022 JHARGRAM NAYAGRAM BARANEGUI JHARGRAM NAYAGRAM BARANEGUI 1261022
261023 JHARGRAM NAYAGRAM BERAJAL JHARGRAM NAYAGRAM BERAJAL 1261023
261025 JHARGRAM NAYAGRAM CHANDRAREKHA JHARGRAM NAYAGRAM CHANDRAREKHA 1261025
261026 JHARGRAM NAYAGRAM JAMIRAPAL JHARGRAM NAYAGRAM JAMIRAPAL 1261026
261027 JHARGRAM NAYAGRAM KHARIKAMATHANI JHARGRAM NAYAGRAM KHARIKAMATHANI 1261027
261033 JHARGRAM SANKRAIL DHANGHORI JHARGRAM SANKRAIL DHANGHORI 1261033
261034 JHARGRAM SANKRAIL KHUDMORAI JHARGRAM SANKRAIL KHUDMORAI 1261034
261038 JHARGRAM SANKRAIL RAGRAH JHARGRAM SANKRAIL RAGRAH 1261038

190589 rows × 7 columns

In [10]:
# Second Phase of Grampanchayat join data using phenoitic join
## Prepare GP Data
gp_point_df['district_y'] = gp_point_df['dtname'].str.upper().replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
gp_point_df['block_y'] = gp_point_df['block_name'].str.upper().replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
gp_point_df['panchayat_y'] = gp_point_df['gp_name'].str.upper().replace('PANCHAYAT', '').str.replace('GP', '').replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
## Prepare Non Geocoded Data
non_geocoded_df['district_x'] = non_geocoded_df['District Name'].str.upper().replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
non_geocoded_df['block_x'] = non_geocoded_df['Block Name'].str.upper().replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
non_geocoded_df['panchayat_x'] = non_geocoded_df['Panchayat Name'].str.upper().replace('PANCHAYAT', '').str.replace('GP', '').replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
## Merge
soundex_gpgeocoded_df = pd.merge(non_geocoded_df, gp_point_df, how='inner', left_on=['district_x', 'block_x', 'panchayat_x'], right_on = ['district_y', 'block_y', 'panchayat_y'])
## Check for quality
def jaro_winkler_match(x):
    return jellyfish.jaro_winkler(x['panchayat_x'], x['panchayat_y'])
soundex_gpgeocoded_df['is_match'] = soundex_gpgeocoded_df[['panchayat_x', 'panchayat_y']].apply(jaro_winkler_match, axis=1)
soundex_gpgeocoded_df = soundex_gpgeocoded_df[soundex_gpgeocoded_df['is_match'] == 1.0]
soundex_gpgeocoded_df = soundex_gpgeocoded_df[['District Name', 'Block Name', 'Panchayat Name', 'lat', 'lon', 'id']].drop_duplicates(subset='id', keep="first", inplace=False).reset_index(drop=True)
soundex_gpgeocoded_df
Out[10]:
District Name Block Name Panchayat Name lat lon id
0 NORTH AND MIDDLE ANDAMAN Diglipur Laxmipur 13.320263 92.935452 1000015
1 NORTH AND MIDDLE ANDAMAN Diglipur Radha Nagar 13.398258 92.920165 1000019
2 NORTH AND MIDDLE ANDAMAN Diglipur Rama Krishnagram 13.246140 92.982946 1000020
3 NORTH AND MIDDLE ANDAMAN Diglipur Ramnagar 13.246140 92.982946 1000021
4 NORTH AND MIDDLE ANDAMAN Diglipur Sitanagar 13.246564 92.934633 1000023
5 NORTH AND MIDDLE ANDAMAN Diglipur Subashgram 13.251730 92.958664 1000024
6 NORTH AND MIDDLE ANDAMAN Mayabunder Pahalgaon 12.845044 92.862707 1000030
7 NORTH AND MIDDLE ANDAMAN Mayabunder Pokadera 12.905950 92.907320 1000031
8 NORTH AND MIDDLE ANDAMAN Rangat Kadamtala 12.354658 92.769528 1000036
9 NORTH AND MIDDLE ANDAMAN Rangat Long Island 12.395201 92.937125 1000038
10 NORTH AND MIDDLE ANDAMAN Rangat Parnasala 12.516090 92.906369 1000041
11 NORTH AND MIDDLE ANDAMAN Rangat Sundergarh 12.248895 92.817250 1000045
12 NORTH AND MIDDLE ANDAMAN Rangat Urmilapur 12.542299 92.862041 1000046
13 ANANTAPUR Agali Hallikera 13.768624 77.009729 1000079
14 ANANTAPUR Agali P.BYADIGERA 13.798521 76.999590 1000086
15 ANANTAPUR Amadagur JOWKULAKOTHAPALLI 13.980832 78.028448 1000093
16 ANANTAPUR Amarapuram THAMMEDAHALLI 14.157010 76.998959 1000107
17 ANANTAPUR Anantapur KAMARUPALLI 14.655622 77.491123 1000119
18 ANANTAPUR Anantapur KATTAKINGAPALLI 14.612996 77.554218 1000122
19 ANANTAPUR Anantapur UPPARAPALLI 14.656695 77.638550 1000133
20 ANANTAPUR Bathalapalle GARISALAPALLI 14.472745 77.888437 1000153
21 ANANTAPUR Bommanahal UNTHAKAL 14.980956 76.951567 1000196
22 ANANTAPUR Brahmasamudram VEPALAPARTHI 14.553595 76.898218 1000212
23 ANANTAPUR Bukkarayasamudram CHEDULLA 14.758668 77.684237 1000228
24 ANANTAPUR Bukkarayasamudram DANDUVARIPALLE 14.660011 77.710380 1000230
25 ANANTAPUR Bukkarayasamudram GOVINDAPALLE 14.735162 77.626619 1000232
26 ANANTAPUR Bukkarayasamudram JANTHULUR 14.756311 77.655562 1000233
27 ANANTAPUR Bukkarayasamudram KORRAPADU 14.736670 77.706004 1000235
28 ANANTAPUR Bukkarayasamudram REDDIPALLE 14.706014 77.675588 1000239
29 ANANTAPUR Chenne Kothapalle CHENNAKOTHAPALLE 14.275225 77.621588 1000246
... ... ... ... ... ... ...
55416 RAE BARELI DALMAU UBARANI 26.109635 81.157392 1259030
55417 RAE BARELI DALMAU UMRAMAU 26.175899 81.060630 1259031
55418 RAE BARELI SHIVGARH BADAVAR 26.530307 81.271646 1259035
55419 RAE BARELI SHIVGARH BANKAGARH 26.544911 81.169259 1259038
55420 RAE BARELI SHIVGARH BHAWANIGARH 26.534063 81.222904 1259043
55421 RAE BARELI SHIVGARH DEHLI 26.585485 81.209077 1259046
55422 RAE BARELI SHIVGARH DHODHWAPUR 26.549008 81.217018 1259048
55423 RAE BARELI SHIVGARH JAGDEESHPUR 26.502278 81.253706 1259053
55424 RAE BARELI SHIVGARH NARAINPUR 26.490754 81.234027 1259059
55425 RAE BARELI SHIVGARH NARETHUWA 26.534429 81.325634 1259060
55426 RAE BARELI SHIVGARH PIPARI 26.551922 81.252683 1259064
55427 RAE BARELI SHIVGARH RAIPUR NERUA 26.518928 81.234014 1259066
55428 RAE BARELI SHIVGARH RAMPUR KHAS 26.506288 81.244942 1259068
55429 RAE BARELI SHIVGARH RAMPUR TIKRA 26.506288 81.244942 1259069
55430 RAE BARELI SHIVGARH SHIVGARH 26.544690 81.243923 1259073
55431 RAE BARELI SHIVGARH SHIVLI 26.548611 81.231550 1259074
55432 SANT RAVIDAS NAGAR Gyanpur Geraee 25.406200 82.431304 1259076
55433 SANT RAVIDAS NAGAR Gyanpur Joginka 25.395939 82.470273 1259077
55434 SANT RAVIDAS NAGAR Gyanpur Sonkhari 25.341214 82.409071 1259081
55435 UDAM SINGH NAGAR Bajpur Barwala 29.105141 79.181473 1259119
55436 UDAM SINGH NAGAR Sitarganj Salmata 28.905476 79.755871 1259121
55437 SANGRUR DHURI BHASOURH 30.354323 75.782513 1259147
55438 BAREILLY BITHIRI CHAINPUR KOHNI 28.375967 79.532901 1260508
55439 JAUNPUR BAKSHA MAHIMAPUR DEEH 25.821474 82.542160 1260518
55440 JAUNPUR BAKSHA SHESHPURA 25.817252 82.470473 1260524
55441 SITAPUR GONDLAMAU CHAUPARIAY 27.295590 80.696476 1260531
55442 SITAPUR GONDLAMAU KUMAUGRANT 27.390243 80.632652 1260537
55443 SITAPUR GONDLAMAU MAHMADPUR JHABRA 27.281080 80.661790 1260539
55444 UNNAO BIGHAPUR RAJAPUR PIPRAHA 26.303163 80.703166 1260544
55445 NAWANSHAHR NAWANSHAHR BHAURA 31.120617 76.200179 1260662

55446 rows × 6 columns

In [11]:
# Filter non geocoded records
non_geocoded_df = non_geocoded_df[~non_geocoded_df['id'].isin(soundex_gpgeocoded_df.id)]
non_geocoded_df
Out[11]:
District Name Block Name Panchayat Name district_x block_x panchayat_x id
0 NICOBARS Campbell Bay CAMPBELL BAY N216 C514 C514 1000000
1 NICOBARS Campbell Bay GOVINDNAGAR N216 C514 G153 1000001
2 NICOBARS Campbell Bay Great & Little Nicobar N216 C514 G634 1000002
3 NICOBARS Campbell Bay LAXMI NAGAR N216 C514 L255 1000003
4 NICOBARS Nancowry CHOWRA TC N216 N526 C632 1000004
5 NICOBARS Nancowry KAMORTA TC N216 N526 K563 1000005
6 NICOBARS Nancowry KATCHAL TC N216 N526 K324 1000006
7 NICOBARS Nancowry NANCOWRY TC N216 N526 N526 1000007
8 NICOBARS Nancowry TERESSA TC N216 N526 T623 1000008
9 NICOBARS Nicobar TRIBAL COUNCIL N216 N216 T614 1000009
10 NORTH AND MIDDLE ANDAMAN Diglipur Diglipur N635 D241 D241 1000010
11 NORTH AND MIDDLE ANDAMAN Diglipur Gandhinagar N635 D241 G535 1000011
12 NORTH AND MIDDLE ANDAMAN Diglipur Kalighat N635 D241 K423 1000012
13 NORTH AND MIDDLE ANDAMAN Diglipur Keralapuram N635 D241 K641 1000013
14 NORTH AND MIDDLE ANDAMAN Diglipur Kishori Nagar N635 D241 K265 1000014
16 NORTH AND MIDDLE ANDAMAN Diglipur Madhupur N635 D241 M316 1000016
17 NORTH AND MIDDLE ANDAMAN Diglipur Nabagram N635 D241 N126 1000017
18 NORTH AND MIDDLE ANDAMAN Diglipur Paschimsagar N635 D241 P252 1000018
22 NORTH AND MIDDLE ANDAMAN Diglipur Shibpur N635 D241 S160 1000022
25 NORTH AND MIDDLE ANDAMAN Mayabunder Basantipur N635 M153 B253 1000025
26 NORTH AND MIDDLE ANDAMAN Mayabunder Chainpur N635 M153 C516 1000026
27 NORTH AND MIDDLE ANDAMAN Mayabunder De-Reserve Area N635 M153 D626 1000027
28 NORTH AND MIDDLE ANDAMAN Mayabunder Harinagar N635 M153 H652 1000028
29 NORTH AND MIDDLE ANDAMAN Mayabunder Mayabunder N635 M153 M153 1000029
32 NORTH AND MIDDLE ANDAMAN Mayabunder Rampur N635 M153 R516 1000032
33 NORTH AND MIDDLE ANDAMAN Mayabunder Swadesh Nagar N635 M153 S325 1000033
34 NORTH AND MIDDLE ANDAMAN Rangat Bakultala N635 R523 B243 1000034
35 NORTH AND MIDDLE ANDAMAN Rangat Dasarathpur N635 R523 D263 1000035
37 NORTH AND MIDDLE ANDAMAN Rangat Kaushalya Nagar N635 R523 K245 1000037
39 NORTH AND MIDDLE ANDAMAN Rangat Nilambur N635 R523 N451 1000039
... ... ... ... ... ... ... ...
260992 JHARGRAM GOPIBALLAVPUR-I KENDUGARI J626 G114 K532 1260992
260993 JHARGRAM GOPIBALLAVPUR-I SARIA J626 G114 S600 1260993
260994 JHARGRAM GOPIBALLAVPUR-I SATMA J626 G114 S350 1260994
260995 JHARGRAM GOPIBALLAVPUR-I SHASHRHA J626 G114 S260 1260995
260996 JHARGRAM JAMBANI CHILKIGARH J626 J515 C422 1260996
260997 JHARGRAM JAMBANI CHINCHRA J626 J515 C526 1260997
260998 JHARGRAM JAMBANI DHARSA J626 J515 D620 1260998
260999 JHARGRAM JAMBANI DUBRA J626 J515 D160 1260999
261000 JHARGRAM JAMBANI GIDHNI J626 J515 G350 1261000
261001 JHARGRAM JAMBANI JAMBONI J626 J515 J515 1261001
261002 JHARGRAM JAMBANI KAPGARI J626 J515 K126 1261002
261003 JHARGRAM JAMBANI KENDADANGRI J626 J515 K533 1261003
261004 JHARGRAM JAMBANI LALBANDH J626 J515 L415 1261004
261005 JHARGRAM JAMBANI PARIHATI J626 J515 P630 1261005
261006 JHARGRAM JHARGRAM AGUIBONI J626 J626 A215 1261006
261007 JHARGRAM JHARGRAM BANDHGORA J626 J626 B532 1261007
261010 JHARGRAM JHARGRAM DUDHKUNDI J626 J626 D325 1261010
261011 JHARGRAM JHARGRAM LODHASULI J626 J626 L324 1261011
261013 JHARGRAM JHARGRAM NEDABAHARA J626 J626 N316 1261013
261018 JHARGRAM JHARGRAM SHALBONI J626 J626 S415 1261018
261020 JHARGRAM NAYAGRAM BALIGERIA J626 N265 B426 1261020
261021 JHARGRAM NAYAGRAM BARAKHAKRI J626 N265 B622 1261021
261022 JHARGRAM NAYAGRAM BARANEGUI J626 N265 B652 1261022
261023 JHARGRAM NAYAGRAM BERAJAL J626 N265 B624 1261023
261025 JHARGRAM NAYAGRAM CHANDRAREKHA J626 N265 C536 1261025
261026 JHARGRAM NAYAGRAM JAMIRAPAL J626 N265 J561 1261026
261027 JHARGRAM NAYAGRAM KHARIKAMATHANI J626 N265 K625 1261027
261033 JHARGRAM SANKRAIL DHANGHORI J626 S526 D526 1261033
261034 JHARGRAM SANKRAIL KHUDMORAI J626 S526 K356 1261034
261038 JHARGRAM SANKRAIL RAGRAH J626 S526 R260 1261038

135143 rows × 7 columns

In [12]:
# Second Phase of Census Villages data join  using phenoitic join
## Prepare Census Villages Data
cenvil_df['district_y'] = cenvil_df['dtname'].str.upper().replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
cenvil_df['block_y'] = cenvil_df['sdtname'].str.upper().replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
cenvil_df['village_y'] = cenvil_df['vilname'].str.upper().replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
soundex_cenvilgeocoded_df = pd.merge(non_geocoded_df, cenvil_df, how='inner', left_on=['district_x', 'panchayat_x'], right_on = ['district_y', 'village_y'])
## Check for quality
def jaro_winkler_match(x):
    return jellyfish.jaro_winkler(x['panchayat_x'], x['village_y'])
soundex_cenvilgeocoded_df['is_match'] = soundex_cenvilgeocoded_df[['panchayat_x', 'village_y']].apply(jaro_winkler_match, axis=1)
soundex_cenvilgeocoded_df = soundex_cenvilgeocoded_df[soundex_cenvilgeocoded_df['is_match'] == 1.0]
soundex_cenvilgeocoded_df = soundex_cenvilgeocoded_df[['District Name', 'Block Name', 'Panchayat Name', 'lat', 'lon', 'id']].drop_duplicates(subset='id', keep="first", inplace=False).reset_index(drop=True)
soundex_cenvilgeocoded_df
Out[12]:
District Name Block Name Panchayat Name lat lon id
0 NICOBARS Campbell Bay CAMPBELL BAY 7.009279 93.926589 1000000
1 NICOBARS Campbell Bay GOVINDNAGAR 6.996494 93.896547 1000001
2 NICOBARS Campbell Bay LAXMI NAGAR 6.883300 93.889787 1000003
3 NICOBARS Nancowry KAMORTA TC 21.065458 79.498997 1000005
4 NORTH AND MIDDLE ANDAMAN Diglipur Diglipur 13.245422 92.971099 1000010
5 NORTH AND MIDDLE ANDAMAN Diglipur Kalighat 13.119377 92.952089 1000012
6 NORTH AND MIDDLE ANDAMAN Diglipur Keralapuram 13.255532 93.012199 1000013
7 NORTH AND MIDDLE ANDAMAN Diglipur Kishori Nagar 13.190907 92.870359 1000014
8 NORTH AND MIDDLE ANDAMAN Diglipur Madhupur 13.264025 92.969082 1000016
9 NORTH AND MIDDLE ANDAMAN Diglipur Nabagram 13.166283 92.941103 1000017
10 NORTH AND MIDDLE ANDAMAN Mayabunder Basantipur 12.743961 92.877087 1000025
11 NORTH AND MIDDLE ANDAMAN Mayabunder Chainpur 12.708116 92.806193 1000026
12 NORTH AND MIDDLE ANDAMAN Mayabunder Harinagar 12.671905 92.878907 1000028
13 NORTH AND MIDDLE ANDAMAN Mayabunder Mayabunder 12.921971 92.897331 1000029
14 NORTH AND MIDDLE ANDAMAN Mayabunder Rampur 12.518138 92.909912 1000032
15 NORTH AND MIDDLE ANDAMAN Mayabunder Swadesh Nagar 12.660179 92.887343 1000033
16 NORTH AND MIDDLE ANDAMAN Rangat Bakultala 12.503236 92.857219 1000034
17 NORTH AND MIDDLE ANDAMAN Rangat Dasarathpur 12.495893 92.914466 1000035
18 NORTH AND MIDDLE ANDAMAN Rangat Kaushalya Nagar 12.534930 92.824345 1000037
19 NORTH AND MIDDLE ANDAMAN Rangat Nilambur 12.170486 92.764054 1000039
20 NORTH AND MIDDLE ANDAMAN Rangat Rangat 12.506387 92.907397 1000042
21 NORTH AND MIDDLE ANDAMAN Rangat Shivapuram 12.614373 92.930490 1000044
22 SOUTH ANDAMAN Ferrargunj BINDRABAN 11.719855 92.655212 1000050
23 SOUTH ANDAMAN Ferrargunj CHOULDARI 11.630423 92.665056 1000051
24 SOUTH ANDAMAN Ferrargunj COLLINPUR 11.691887 92.603782 1000052
25 SOUTH ANDAMAN Ferrargunj FERRARAGUNJ 11.715603 92.652890 1000053
26 SOUTH ANDAMAN Ferrargunj GUPTAPARA 11.561501 92.658138 1000054
27 SOUTH ANDAMAN Ferrargunj HOPE TOWN 11.699442 92.726428 1000055
28 SOUTH ANDAMAN Ferrargunj HUMFRYGUNJ 11.593297 92.659985 1000056
29 SOUTH ANDAMAN Ferrargunj KANYAPURAM 11.732659 92.698093 1000057
... ... ... ... ... ... ...
83292 JHARGRAM GOPIBALLAVPUR-I GOPIBALLAVPUR 22.207152 86.901588 1260991
83293 JHARGRAM GOPIBALLAVPUR-I KENDUGARI 21.975726 87.200100 1260992
83294 JHARGRAM GOPIBALLAVPUR-I SARIA 22.124500 86.855235 1260993
83295 JHARGRAM GOPIBALLAVPUR-I SHASHRHA 22.189836 86.796338 1260995
83296 JHARGRAM JAMBANI CHILKIGARH 22.449344 86.872883 1260996
83297 JHARGRAM JAMBANI CHINCHRA 22.280531 86.905032 1260997
83298 JHARGRAM JAMBANI DHARSA 22.519595 86.816080 1260998
83299 JHARGRAM JAMBANI DUBRA 22.136627 87.120604 1260999
83300 JHARGRAM JAMBANI GIDHNI 22.485793 86.858759 1261000
83301 JHARGRAM JAMBANI JAMBONI 21.871876 87.060891 1261001
83302 JHARGRAM JAMBANI KAPGARI 22.523286 86.876195 1261002
83303 JHARGRAM JAMBANI KENDADANGRI 22.173594 87.073594 1261003
83304 JHARGRAM JAMBANI LALBANDH 22.420880 86.830344 1261004
83305 JHARGRAM JAMBANI PARIHATI 22.284173 86.930019 1261005
83306 JHARGRAM JHARGRAM AGUIBONI 22.311781 86.943780 1261006
83307 JHARGRAM JHARGRAM BANDHGORA 22.189921 86.852231 1261007
83308 JHARGRAM JHARGRAM DUDHKUNDI 22.236526 87.109333 1261010
83309 JHARGRAM JHARGRAM LODHASULI 22.385609 86.996699 1261011
83310 JHARGRAM JHARGRAM NEDABAHARA 22.380763 86.995109 1261013
83311 JHARGRAM JHARGRAM SHALBONI 21.873296 87.084168 1261018
83312 JHARGRAM NAYAGRAM BALIGERIA 21.896406 87.079674 1261020
83313 JHARGRAM NAYAGRAM BARAKHAKRI 22.372433 87.154985 1261021
83314 JHARGRAM NAYAGRAM BARANEGUI 21.985373 87.111380 1261022
83315 JHARGRAM NAYAGRAM BERAJAL 21.979447 87.148221 1261023
83316 JHARGRAM NAYAGRAM CHANDRAREKHA 21.960476 87.063864 1261025
83317 JHARGRAM NAYAGRAM JAMIRAPAL 21.983613 87.210078 1261026
83318 JHARGRAM NAYAGRAM KHARIKAMATHANI 22.005273 87.146044 1261027
83319 JHARGRAM SANKRAIL DHANGHORI 22.147340 87.143844 1261033
83320 JHARGRAM SANKRAIL KHUDMORAI 21.976395 87.212614 1261034
83321 JHARGRAM SANKRAIL RAGRAH 22.181524 87.037965 1261038

83322 rows × 6 columns

In [13]:
# Filter non geocoded records
non_geocoded_df = non_geocoded_df[~non_geocoded_df['id'].isin(soundex_cenvilgeocoded_df.id)]
non_geocoded_df
Out[13]:
District Name Block Name Panchayat Name district_x block_x panchayat_x id
2 NICOBARS Campbell Bay Great & Little Nicobar N216 C514 G634 1000002
4 NICOBARS Nancowry CHOWRA TC N216 N526 C632 1000004
6 NICOBARS Nancowry KATCHAL TC N216 N526 K324 1000006
7 NICOBARS Nancowry NANCOWRY TC N216 N526 N526 1000007
8 NICOBARS Nancowry TERESSA TC N216 N526 T623 1000008
9 NICOBARS Nicobar TRIBAL COUNCIL N216 N216 T614 1000009
11 NORTH AND MIDDLE ANDAMAN Diglipur Gandhinagar N635 D241 G535 1000011
18 NORTH AND MIDDLE ANDAMAN Diglipur Paschimsagar N635 D241 P252 1000018
22 NORTH AND MIDDLE ANDAMAN Diglipur Shibpur N635 D241 S160 1000022
27 NORTH AND MIDDLE ANDAMAN Mayabunder De-Reserve Area N635 M153 D626 1000027
40 NORTH AND MIDDLE ANDAMAN Rangat Nimbutala N635 R523 N513 1000040
43 NORTH AND MIDDLE ANDAMAN Rangat Sabari N635 R523 S160 1000043
47 NORTH AND MIDDLE ANDAMAN Rangat Uttara N635 R523 U360 1000047
48 SOUTH ANDAMAN Ferrargunj BAMBOOFLAT S353 F662 B511 1000048
49 SOUTH ANDAMAN Ferrargunj BAMBOOFLAT-II S353 F662 B511 1000049
61 SOUTH ANDAMAN Ferrargunj SHOAL BAY S353 F662 S410 1000061
67 SOUTH ANDAMAN Little Andaman HUT BAY S353 L345 H310 1000067
69 SOUTH ANDAMAN Little Andaman R.K.PUR S353 L345 R216 1000069
71 SOUTH ANDAMAN Little Andaman V.K PUR S353 L345 V216 1000071
87 ANANTAPUR Agali RAGELINGANAHALLI A553 A240 R245 1000087
99 ANANTAPUR Amarapuram ALDAHALLI A553 A561 A434 1000099
109 ANANTAPUR Anantapur A.NARAYANAPURAM A553 A553 A565 1000109
110 ANANTAPUR Anantapur AKKAMPALLI A553 A553 A251 1000110
111 ANANTAPUR Anantapur AKUTHOTAPALLI A553 A553 A233 1000111
129 ANANTAPUR Anantapur RAJIV COLONY A553 A553 R212 1000129
130 ANANTAPUR Anantapur RUDRAMPETA A553 A553 R365 1000130
131 ANANTAPUR Anantapur SOMALADODDI A553 A553 S543 1000131
135 ANANTAPUR Atmakur B.YALERU A553 A352 B460 1000135
139 ANANTAPUR Atmakur ODDUPALLI A553 A352 O314 1000139
140 ANANTAPUR Atmakur P.SIDDARAMPURAM A553 A352 P236 1000140
... ... ... ... ... ... ... ...
260913 PURBA BARDHAMAN RAINA-II BARABAINAN P611 R500 B615 1260913
260914 PURBA BARDHAMAN RAINA-II GOTAN P611 R500 G350 1260914
260915 PURBA BARDHAMAN RAINA-II KAITY P611 R500 K300 1260915
260916 PURBA BARDHAMAN RAINA-II PAHALANPUR P611 R500 P451 1260916
260917 PURBA BARDHAMAN RAINA-II PAINTA-I P611 R500 P530 1260917
260918 PURBA BARDHAMAN RAINA-II PAINTA-II P611 R500 P530 1260918
260919 PURBA BARDHAMAN RAINA-II UCHALAN P611 R500 U245 1260919
260920 KALIMPONG GARUBATHAN AAHALEY K451 G613 A400 1260920
260921 KALIMPONG GARUBATHAN DALIM K451 G613 D450 1260921
260924 KALIMPONG GARUBATHAN KUMAI K451 G613 K500 1260924
260925 KALIMPONG GARUBATHAN NIM K451 G613 N500 1260925
260927 KALIMPONG GARUBATHAN POKHREYBONG K451 G613 P261 1260927
260928 KALIMPONG GARUBATHAN RONGO K451 G613 R520 1260928
260932 KALIMPONG KALIMPONG-I BONG K451 K451 B520 1260932
260933 KALIMPONG KALIMPONG-I DR. GRAHAMS HOMES K451 K451 D626 1260933
260937 KALIMPONG KALIMPONG-I LOWER ECHHAY K451 K451 L620 1260937
260939 KALIMPONG KALIMPONG-I PABRINGTAR K451 K451 P165 1260939
260943 KALIMPONG KALIMPONG-I SEOKBIR K451 K451 S216 1260943
260947 KALIMPONG KALIMPONG-I TISTA K451 K451 T230 1260947
260948 KALIMPONG KALIMPONG-I UPPER ECHHAY K451 K451 U162 1260948
260950 KALIMPONG KALIMPONG-II GITABLING K451 K451 G314 1260950
260951 KALIMPONG KALIMPONG-II KAGE K451 K451 K200 1260951
260953 KALIMPONG KALIMPONG-II LAVA-GITABEONG K451 K451 L123 1260953
260954 KALIMPONG KALIMPONG-II LINGSEY K451 K451 L520 1260954
260956 KALIMPONG KALIMPONG-II LOLEY K451 K451 L400 1260956
260957 KALIMPONG KALIMPONG-II PAYONG K451 K451 P520 1260957
260959 KALIMPONG KALIMPONG-II SHANGSE K451 K451 S520 1260959
260975 JHARGRAM BINPUR-II BOSHPAHARI J626 B516 B216 1260975
260980 JHARGRAM BINPUR-II SHILDA J626 B516 S430 1260980
260994 JHARGRAM GOPIBALLAVPUR-I SATMA J626 G114 S350 1260994

51821 rows × 7 columns

In [14]:
# To be Geocoded by google
non_geocoded_df.to_csv(os.path.join(output_folder, 'tobe_google_geocoded.csv'), encoding='utf-8', index=False)
non_geocoded_df
Out[14]:
District Name Block Name Panchayat Name district_x block_x panchayat_x id
2 NICOBARS Campbell Bay Great & Little Nicobar N216 C514 G634 1000002
4 NICOBARS Nancowry CHOWRA TC N216 N526 C632 1000004
6 NICOBARS Nancowry KATCHAL TC N216 N526 K324 1000006
7 NICOBARS Nancowry NANCOWRY TC N216 N526 N526 1000007
8 NICOBARS Nancowry TERESSA TC N216 N526 T623 1000008
9 NICOBARS Nicobar TRIBAL COUNCIL N216 N216 T614 1000009
11 NORTH AND MIDDLE ANDAMAN Diglipur Gandhinagar N635 D241 G535 1000011
18 NORTH AND MIDDLE ANDAMAN Diglipur Paschimsagar N635 D241 P252 1000018
22 NORTH AND MIDDLE ANDAMAN Diglipur Shibpur N635 D241 S160 1000022
27 NORTH AND MIDDLE ANDAMAN Mayabunder De-Reserve Area N635 M153 D626 1000027
40 NORTH AND MIDDLE ANDAMAN Rangat Nimbutala N635 R523 N513 1000040
43 NORTH AND MIDDLE ANDAMAN Rangat Sabari N635 R523 S160 1000043
47 NORTH AND MIDDLE ANDAMAN Rangat Uttara N635 R523 U360 1000047
48 SOUTH ANDAMAN Ferrargunj BAMBOOFLAT S353 F662 B511 1000048
49 SOUTH ANDAMAN Ferrargunj BAMBOOFLAT-II S353 F662 B511 1000049
61 SOUTH ANDAMAN Ferrargunj SHOAL BAY S353 F662 S410 1000061
67 SOUTH ANDAMAN Little Andaman HUT BAY S353 L345 H310 1000067
69 SOUTH ANDAMAN Little Andaman R.K.PUR S353 L345 R216 1000069
71 SOUTH ANDAMAN Little Andaman V.K PUR S353 L345 V216 1000071
87 ANANTAPUR Agali RAGELINGANAHALLI A553 A240 R245 1000087
99 ANANTAPUR Amarapuram ALDAHALLI A553 A561 A434 1000099
109 ANANTAPUR Anantapur A.NARAYANAPURAM A553 A553 A565 1000109
110 ANANTAPUR Anantapur AKKAMPALLI A553 A553 A251 1000110
111 ANANTAPUR Anantapur AKUTHOTAPALLI A553 A553 A233 1000111
129 ANANTAPUR Anantapur RAJIV COLONY A553 A553 R212 1000129
130 ANANTAPUR Anantapur RUDRAMPETA A553 A553 R365 1000130
131 ANANTAPUR Anantapur SOMALADODDI A553 A553 S543 1000131
135 ANANTAPUR Atmakur B.YALERU A553 A352 B460 1000135
139 ANANTAPUR Atmakur ODDUPALLI A553 A352 O314 1000139
140 ANANTAPUR Atmakur P.SIDDARAMPURAM A553 A352 P236 1000140
... ... ... ... ... ... ... ...
260913 PURBA BARDHAMAN RAINA-II BARABAINAN P611 R500 B615 1260913
260914 PURBA BARDHAMAN RAINA-II GOTAN P611 R500 G350 1260914
260915 PURBA BARDHAMAN RAINA-II KAITY P611 R500 K300 1260915
260916 PURBA BARDHAMAN RAINA-II PAHALANPUR P611 R500 P451 1260916
260917 PURBA BARDHAMAN RAINA-II PAINTA-I P611 R500 P530 1260917
260918 PURBA BARDHAMAN RAINA-II PAINTA-II P611 R500 P530 1260918
260919 PURBA BARDHAMAN RAINA-II UCHALAN P611 R500 U245 1260919
260920 KALIMPONG GARUBATHAN AAHALEY K451 G613 A400 1260920
260921 KALIMPONG GARUBATHAN DALIM K451 G613 D450 1260921
260924 KALIMPONG GARUBATHAN KUMAI K451 G613 K500 1260924
260925 KALIMPONG GARUBATHAN NIM K451 G613 N500 1260925
260927 KALIMPONG GARUBATHAN POKHREYBONG K451 G613 P261 1260927
260928 KALIMPONG GARUBATHAN RONGO K451 G613 R520 1260928
260932 KALIMPONG KALIMPONG-I BONG K451 K451 B520 1260932
260933 KALIMPONG KALIMPONG-I DR. GRAHAMS HOMES K451 K451 D626 1260933
260937 KALIMPONG KALIMPONG-I LOWER ECHHAY K451 K451 L620 1260937
260939 KALIMPONG KALIMPONG-I PABRINGTAR K451 K451 P165 1260939
260943 KALIMPONG KALIMPONG-I SEOKBIR K451 K451 S216 1260943
260947 KALIMPONG KALIMPONG-I TISTA K451 K451 T230 1260947
260948 KALIMPONG KALIMPONG-I UPPER ECHHAY K451 K451 U162 1260948
260950 KALIMPONG KALIMPONG-II GITABLING K451 K451 G314 1260950
260951 KALIMPONG KALIMPONG-II KAGE K451 K451 K200 1260951
260953 KALIMPONG KALIMPONG-II LAVA-GITABEONG K451 K451 L123 1260953
260954 KALIMPONG KALIMPONG-II LINGSEY K451 K451 L520 1260954
260956 KALIMPONG KALIMPONG-II LOLEY K451 K451 L400 1260956
260957 KALIMPONG KALIMPONG-II PAYONG K451 K451 P520 1260957
260959 KALIMPONG KALIMPONG-II SHANGSE K451 K451 S520 1260959
260975 JHARGRAM BINPUR-II BOSHPAHARI J626 B516 B216 1260975
260980 JHARGRAM BINPUR-II SHILDA J626 B516 S430 1260980
260994 JHARGRAM GOPIBALLAVPUR-I SATMA J626 G114 S350 1260994

51821 rows × 7 columns

In [15]:
# Read Google geocoded data
g1 = pd.read_csv(os.path.join(data_folder, 'google', 'geocoded1.csv'),  encoding = "ISO-8859-1")
g2 = pd.read_csv(os.path.join(data_folder, 'google', 'geocoded2.csv'),  encoding = "ISO-8859-1")
g3 = pd.read_csv(os.path.join(data_folder, 'google', 'geocoded3.csv'),  encoding = "ISO-8859-1")
g4 = pd.read_csv(os.path.join(data_folder, 'google', 'geocoded4.csv'),  encoding = "ISO-8859-1")
google_geocoded_df = pd.concat([g1, g2, g3, g4])
google_geocoded_df['lat'] = google_geocoded_df['_latitude']
google_geocoded_df['lon'] = google_geocoded_df['_longitude']
google_geocoded_df = google_geocoded_df[['id', 'lat', 'lon']]
google_geocoded_df = pd.merge(non_geocoded_df, google_geocoded_df, how='inner', on='id')
google_geocoded_df = google_geocoded_df[['District Name', 'Block Name', 'Panchayat Name', 'lat', 'lon', 'id']].drop_duplicates(subset='id', keep="first", inplace=False).reset_index(drop=True)
google_geocoded_df
Out[15]:
District Name Block Name Panchayat Name lat lon id
0 NICOBARS Campbell Bay Great & Little Nicobar 7.325778 93.692753 1000002
1 NICOBARS Nancowry CHOWRA TC 8.456021 93.042598 1000004
2 NICOBARS Nancowry KATCHAL TC 7.967549 93.359045 1000006
3 NICOBARS Nancowry NANCOWRY TC 7.976840 93.550907 1000007
4 NICOBARS Nancowry TERESSA TC 8.241989 93.127785 1000008
5 NICOBARS Nicobar TRIBAL COUNCIL 7.120540 93.784150 1000009
6 NORTH AND MIDDLE ANDAMAN Diglipur Gandhinagar 23.215635 72.636941 1000011
7 NORTH AND MIDDLE ANDAMAN Diglipur Paschimsagar 13.264189 92.882137 1000018
8 NORTH AND MIDDLE ANDAMAN Diglipur Shibpur 13.242794 93.030098 1000022
9 NORTH AND MIDDLE ANDAMAN Mayabunder De-Reserve Area 12.913057 92.897743 1000027
10 NORTH AND MIDDLE ANDAMAN Rangat Nimbutala 12.500218 92.961879 1000040
11 NORTH AND MIDDLE ANDAMAN Rangat Sabari 12.506952 92.913850 1000043
12 NORTH AND MIDDLE ANDAMAN Rangat Uttara 12.506952 92.913850 1000047
13 SOUTH ANDAMAN Ferrargunj BAMBOOFLAT 11.720196 92.656157 1000048
14 SOUTH ANDAMAN Ferrargunj BAMBOOFLAT-II 11.720196 92.656157 1000049
15 SOUTH ANDAMAN Ferrargunj SHOAL BAY 11.720196 92.656157 1000061
16 SOUTH ANDAMAN Little Andaman HUT BAY 10.744887 92.499992 1000067
17 SOUTH ANDAMAN Little Andaman R.K.PUR 10.696474 92.566856 1000069
18 SOUTH ANDAMAN Little Andaman V.K PUR 10.744887 92.499992 1000071
19 ANANTAPUR Agali RAGELINGANAHALLI 13.785969 77.052848 1000087
20 ANANTAPUR Amarapuram ALDAHALLI 14.128138 76.980148 1000099
21 ANANTAPUR Anantapur A.NARAYANAPURAM 14.703305 77.583434 1000109
22 ANANTAPUR Anantapur AKKAMPALLI 14.690438 77.544029 1000110
23 ANANTAPUR Anantapur AKUTHOTAPALLI 14.618106 77.635714 1000111
24 ANANTAPUR Anantapur RAJIV COLONY 14.698476 77.601766 1000129
25 ANANTAPUR Anantapur RUDRAMPETA 14.670000 77.576270 1000130
26 ANANTAPUR Anantapur SOMALADODDI 14.725955 77.602347 1000131
27 ANANTAPUR Atmakur B.YALERU 14.629375 77.507526 1000135
28 ANANTAPUR Atmakur ODDUPALLI 14.646694 77.358284 1000139
29 ANANTAPUR Atmakur P.SIDDARAMPURAM 14.270619 77.865313 1000140
... ... ... ... ... ... ...
44068 PURBA BARDHAMAN RAINA-II BARABAINAN 23.008667 87.935931 1260913
44069 PURBA BARDHAMAN RAINA-II GOTAN 22.978767 87.926126 1260914
44070 PURBA BARDHAMAN RAINA-II KAITY 23.014856 87.841469 1260915
44071 PURBA BARDHAMAN RAINA-II PAHALANPUR 22.960127 87.867251 1260916
44072 PURBA BARDHAMAN RAINA-II PAINTA-I 22.978526 87.797236 1260917
44073 PURBA BARDHAMAN RAINA-II PAINTA-II 22.978526 87.797236 1260918
44074 PURBA BARDHAMAN RAINA-II UCHALAN 23.030366 87.778793 1260919
44075 KALIMPONG GARUBATHAN AAHALEY 26.954219 88.695244 1260920
44076 KALIMPONG GARUBATHAN DALIM 26.954219 88.695244 1260921
44077 KALIMPONG GARUBATHAN KUMAI 27.002462 88.827501 1260924
44078 KALIMPONG GARUBATHAN NIM 26.954219 88.695244 1260925
44079 KALIMPONG GARUBATHAN POKHREYBONG 26.958845 88.180356 1260927
44080 KALIMPONG GARUBATHAN RONGO 27.042234 88.833503 1260928
44081 KALIMPONG KALIMPONG-I BONG 27.059356 88.469454 1260932
44082 KALIMPONG KALIMPONG-I DR. GRAHAMS HOMES 27.083741 88.491369 1260933
44083 KALIMPONG KALIMPONG-I LOWER ECHHAY 27.059356 88.469454 1260937
44084 KALIMPONG KALIMPONG-I PABRINGTAR 27.059356 88.469454 1260939
44085 KALIMPONG KALIMPONG-I SEOKBIR 27.034829 88.519896 1260943
44086 KALIMPONG KALIMPONG-I TISTA 27.057825 88.465992 1260947
44087 KALIMPONG KALIMPONG-I UPPER ECHHAY 27.079489 88.518920 1260948
44088 KALIMPONG KALIMPONG-II GITABLING 27.059356 88.469454 1260950
44089 KALIMPONG KALIMPONG-II KAGE 27.059356 88.469454 1260951
44090 KALIMPONG KALIMPONG-II LAVA-GITABEONG 27.081617 88.520462 1260953
44091 KALIMPONG KALIMPONG-II LINGSEY 27.059356 88.469454 1260954
44092 KALIMPONG KALIMPONG-II LOLEY 27.059356 88.469454 1260956
44093 KALIMPONG KALIMPONG-II PAYONG 27.059356 88.469454 1260957
44094 KALIMPONG KALIMPONG-II SHANGSE 27.059356 88.469454 1260959
44095 JHARGRAM BINPUR-II BOSHPAHARI 22.583006 86.918585 1260975
44096 JHARGRAM BINPUR-II SHILDA 22.583006 86.918585 1260980
44097 JHARGRAM GOPIBALLAVPUR-I SATMA 22.207909 86.765519 1260994

44098 rows × 6 columns

In [16]:
# Agregate all geocodes
#google_geocoded_df = pd.read_csv(os.path.join(data_folder, 'google_geocoded.csv'))
geocoded_df = pd.concat([gp_geocoded_df, cenvil_geocoded_df, google_geocoded_df, soundex_cenvilgeocoded_df, soundex_gpgeocoded_df])
geocoded_df = geocoded_df.drop_duplicates(subset='id', keep="first", inplace=False).reset_index(drop=True)
geocoded_df.to_csv(os.path.join(output_folder, 'geocoded_panchayats.csv'), encoding='utf-8', index=False)
geocoded_df
Out[16]:
District Name Block Name Panchayat Name lat lon id
0 ANANTAPUR Agali HULIKERADEVARAHALLI 13.768624 77.009729 1000080
1 ANANTAPUR Agali MADHUDI 13.810206 77.025746 1000084
2 ANANTAPUR Agali RAVUDI 13.841146 77.019506 1000088
3 ANANTAPUR Amadagur CHEEKIREVULAPALLE 13.874734 78.041962 1000090
4 ANANTAPUR Amadagur KASSAMUDRAM 13.933423 78.014464 1000094
5 ANANTAPUR Amadagur MOHAMMADABAD 13.967322 78.010266 1000095
6 ANANTAPUR Amadagur THUMMALA 13.929267 78.064326 1000097
7 ANANTAPUR Amarapuram HALUKURU 14.090250 77.001711 1000102
8 ANANTAPUR Amarapuram HEMAVATHI 14.033064 76.986075 1000103
9 ANANTAPUR Amarapuram NIDRAGATTA 14.037968 76.954373 1000105
10 ANANTAPUR Amarapuram SIVARAM 14.032437 77.023079 1000106
11 ANANTAPUR Anantapur KATIGANIKALVA 14.612996 77.554218 1000121
12 ANANTAPUR Anantapur KURUGUNTA 14.674663 77.519245 1000124
13 ANANTAPUR Anantapur MANNILA 14.556448 77.686479 1000125
14 ANANTAPUR Anantapur THATICHERLA 14.752598 77.571765 1000132
15 ANANTAPUR Atmakur MADIGUBBA 14.638847 77.453057 1000137
16 ANANTAPUR Atmakur THOPUDURTHI 14.552931 77.492462 1000147
17 ANANTAPUR Bathalapalle APPARACHERUVU 14.479485 77.798019 1000149
18 ANANTAPUR Bathalapalle BATHALAPALLE 14.504290 77.765405 1000150
19 ANANTAPUR Bathalapalle MALYAVANTHAM 14.569889 77.768676 1000154
20 ANANTAPUR Bathalapalle OBULAPURAM 14.458996 77.843963 1000157
21 ANANTAPUR Beluguppa AVULENNA 14.683234 77.153240 1000162
22 ANANTAPUR Beluguppa BELUGUPPA 14.712986 77.136826 1000163
23 ANANTAPUR Beluguppa DUDDEKUNTA 14.680413 77.220876 1000164
24 ANANTAPUR Beluguppa GANGAVARAM 14.640860 77.207144 1000165
25 ANANTAPUR Beluguppa KALVAPALLE 14.625519 77.267957 1000167
26 ANANTAPUR Beluguppa KONAMPALLE 14.669387 77.262099 1000168
27 ANANTAPUR Beluguppa THAGGUPARTHY 14.725207 77.198641 1000176
28 ANANTAPUR Beluguppa YERRAGUDI 14.702239 77.176537 1000178
29 ANANTAPUR Bommanahal BANDUR 14.949310 77.008596 1000179
... ... ... ... ... ... ...
253288 RAE BARELI DALMAU UBARANI 26.109635 81.157392 1259030
253289 RAE BARELI DALMAU UMRAMAU 26.175899 81.060630 1259031
253290 RAE BARELI SHIVGARH BADAVAR 26.530307 81.271646 1259035
253291 RAE BARELI SHIVGARH BANKAGARH 26.544911 81.169259 1259038
253292 RAE BARELI SHIVGARH BHAWANIGARH 26.534063 81.222904 1259043
253293 RAE BARELI SHIVGARH DEHLI 26.585485 81.209077 1259046
253294 RAE BARELI SHIVGARH DHODHWAPUR 26.549008 81.217018 1259048
253295 RAE BARELI SHIVGARH JAGDEESHPUR 26.502278 81.253706 1259053
253296 RAE BARELI SHIVGARH NARAINPUR 26.490754 81.234027 1259059
253297 RAE BARELI SHIVGARH NARETHUWA 26.534429 81.325634 1259060
253298 RAE BARELI SHIVGARH PIPARI 26.551922 81.252683 1259064
253299 RAE BARELI SHIVGARH RAIPUR NERUA 26.518928 81.234014 1259066
253300 RAE BARELI SHIVGARH RAMPUR KHAS 26.506288 81.244942 1259068
253301 RAE BARELI SHIVGARH RAMPUR TIKRA 26.506288 81.244942 1259069
253302 RAE BARELI SHIVGARH SHIVGARH 26.544690 81.243923 1259073
253303 RAE BARELI SHIVGARH SHIVLI 26.548611 81.231550 1259074
253304 SANT RAVIDAS NAGAR Gyanpur Geraee 25.406200 82.431304 1259076
253305 SANT RAVIDAS NAGAR Gyanpur Joginka 25.395939 82.470273 1259077
253306 SANT RAVIDAS NAGAR Gyanpur Sonkhari 25.341214 82.409071 1259081
253307 UDAM SINGH NAGAR Bajpur Barwala 29.105141 79.181473 1259119
253308 UDAM SINGH NAGAR Sitarganj Salmata 28.905476 79.755871 1259121
253309 SANGRUR DHURI BHASOURH 30.354323 75.782513 1259147
253310 BAREILLY BITHIRI CHAINPUR KOHNI 28.375967 79.532901 1260508
253311 JAUNPUR BAKSHA MAHIMAPUR DEEH 25.821474 82.542160 1260518
253312 JAUNPUR BAKSHA SHESHPURA 25.817252 82.470473 1260524
253313 SITAPUR GONDLAMAU CHAUPARIAY 27.295590 80.696476 1260531
253314 SITAPUR GONDLAMAU KUMAUGRANT 27.390243 80.632652 1260537
253315 SITAPUR GONDLAMAU MAHMADPUR JHABRA 27.281080 80.661790 1260539
253316 UNNAO BIGHAPUR RAJAPUR PIPRAHA 26.303163 80.703166 1260544
253317 NAWANSHAHR NAWANSHAHR BHAURA 31.120617 76.200179 1260662

253318 rows × 6 columns