# Import Libraries
import pandas as pd
import geopandas as gpd
import os
import itertools
import jellyfish
# Import folders
data_folder = os.path.abspath('data')
# Set Output Folder
output_folder = os.path.abspath("output")
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# Helping Functions
def remove_consecutive_duplicates(x):
return ''.join(i for i, _ in itertools.groupby(x))
# Merge all files
files = os.listdir(os.path.join(data_folder, 'Scrapped'))
df_list = []
for file in files:
filepath = os.path.join(data_folder, 'Scrapped', file)
df = pd.read_csv(filepath)
df_list.append(df)
# Merge Files
df = pd.concat(df_list)
df
C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\ipykernel_launcher.py:9: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version of pandas will change to not sort by default. To accept the future behavior, pass 'sort=False'. To retain the current behavior and silence the warning, pass 'sort=True'. if __name__ == '__main__':
Amount disbursed to bank accounts | Amount disbursed to post office accounts | Amount sanctioned (Rs. In Lakhs) | Approved_Work_status_Amount sanctioned (Rs. In Lakhs) | Approved_Work_status_Total works | Block Name | Completed_Work_status_Amount sanctioned (Rs. In Lakhs) | Completed_Work_status_Total works | District Name | Labour exp. (disbursed + pending, Rs. In Lakhs) | ... | Total person-days worked by SCs | Total person-days worked by STs | Total person-days worked by women | Total persons allotted work | Total persons demanded work | Total persons with disability | Total persons worked | Total post office accounts | Total works | financial_year | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 48381 | 1195 | 327 | 0.0 | 17.0 | Campbell Bay | 1.0 | 6.0 | NICOBARS | 1 | ... | 26 | 5 | 273 | 56 | 57 | 0 | 36 | 2 | 1039 | 2012 |
1 | 412497 | 0 | 12 | 0.0 | 12.0 | Campbell Bay | 0.0 | 0.0 | NICOBARS | 6 | ... | 0 | 0 | 1387 | 92 | 92 | 0 | 92 | 0 | 82 | 2012 |
2 | 21546 | 0 | 7 | 0.0 | 0.0 | Campbell Bay | 0.0 | 0.0 | NICOBARS | 2 | ... | 0 | 916 | 124 | 25 | 25 | 0 | 25 | 0 | 17 | 2012 |
3 | 288142 | 0 | 10 | 0.0 | 12.0 | Campbell Bay | 0.0 | 1.0 | NICOBARS | 6 | ... | 0 | 0 | 1508 | 123 | 123 | 1 | 122 | 0 | 61 | 2012 |
4 | 0 | 0 | 7 | 0.0 | 0.0 | Nancowry | 3.0 | 3.0 | NICOBARS | 3 | ... | 0 | 1659 | 176 | 61 | 61 | 0 | 61 | 0 | 27 | 2012 |
5 | 784143 | 0 | 20 | 0.0 | 14.0 | Nancowry | 10.0 | 5.0 | NICOBARS | 10 | ... | 0 | 5031 | 1180 | 151 | 151 | 0 | 148 | 0 | 67 | 2012 |
6 | 0 | 0 | 8 | 0.0 | 4.0 | Nancowry | 5.0 | 5.0 | NICOBARS | 5 | ... | 0 | 2577 | 714 | 92 | 92 | 1 | 88 | 0 | 43 | 2012 |
7 | 882819 | 0 | 14 | 0.0 | 3.0 | Nancowry | 12.0 | 8.0 | NICOBARS | 12 | ... | 0 | 6204 | 2127 | 190 | 190 | 1 | 184 | 0 | 59 | 2012 |
8 | 0 | 0 | 8 | 0.0 | 1.0 | Nancowry | 7.0 | 4.0 | NICOBARS | 6 | ... | 0 | 3816 | 520 | 108 | 108 | 0 | 105 | 0 | 25 | 2012 |
9 | 188940 | 0 | 202 | 0.0 | 10.0 | Nicobar | 15.0 | 9.0 | NICOBARS | 51 | ... | 0 | 23372 | 9779 | 792 | 792 | 1 | 749 | 0 | 332 | 2012 |
10 | 164828 | 0 | 30 | 0.0 | 0.0 | Diglipur | 23.0 | 5.0 | NORTH AND MIDDLE ANDAMAN | 15 | ... | 0 | 0 | 5590 | 280 | 280 | 0 | 254 | 0 | 38 | 2012 |
11 | 78142 | 0 | 42 | 0.0 | 0.0 | Diglipur | 28.0 | 3.0 | NORTH AND MIDDLE ANDAMAN | 20 | ... | 0 | 0 | 4157 | 402 | 402 | 0 | 344 | 0 | 38 | 2012 |
12 | 0 | 0 | 82 | 0.0 | 1.0 | Diglipur | 42.0 | 10.0 | NORTH AND MIDDLE ANDAMAN | 19 | ... | 0 | 0 | 5506 | 335 | 334 | 1 | 297 | 0 | 77 | 2012 |
13 | 0 | 0 | 40 | 0.0 | 0.0 | Diglipur | 17.0 | 2.0 | NORTH AND MIDDLE ANDAMAN | 17 | ... | 0 | 0 | 6169 | 348 | 346 | 1 | 296 | 0 | 36 | 2012 |
14 | 56426 | 182628 | 47 | 0.0 | 2.0 | Diglipur | 14.0 | 7.0 | NORTH AND MIDDLE ANDAMAN | 16 | ... | 0 | 0 | 4913 | 594 | 594 | 3 | 536 | 91 | 76 | 2012 |
15 | 0 | 0 | 31 | 0.0 | 2.0 | Diglipur | 24.0 | 6.0 | NORTH AND MIDDLE ANDAMAN | 21 | ... | 0 | 0 | 4185 | 351 | 349 | 10 | 309 | 0 | 58 | 2012 |
16 | 3916 | 0 | 46 | 0.0 | 2.0 | Diglipur | 37.0 | 5.0 | NORTH AND MIDDLE ANDAMAN | 35 | ... | 0 | 0 | 9954 | 646 | 636 | 0 | 552 | 0 | 48 | 2012 |
17 | 0 | 0 | 31 | 0.0 | 2.0 | Diglipur | 21.0 | 5.0 | NORTH AND MIDDLE ANDAMAN | 22 | ... | 0 | 0 | 4771 | 334 | 334 | 4 | 304 | 0 | 44 | 2012 |
18 | 0 | 0 | 47 | 0.0 | 0.0 | Diglipur | 23.0 | 1.0 | NORTH AND MIDDLE ANDAMAN | 17 | ... | 0 | 0 | 1808 | 260 | 255 | 0 | 242 | 0 | 10 | 2012 |
19 | 244394 | 0 | 92 | 0.0 | 2.0 | Diglipur | 37.0 | 9.0 | NORTH AND MIDDLE ANDAMAN | 37 | ... | 0 | 0 | 11853 | 742 | 739 | 0 | 685 | 0 | 83 | 2012 |
20 | 1075832 | 0 | 70 | 0.0 | 0.0 | Diglipur | 13.0 | 3.0 | NORTH AND MIDDLE ANDAMAN | 33 | ... | 0 | 0 | 11613 | 446 | 440 | 0 | 408 | 0 | 38 | 2012 |
21 | 0 | 0 | 41 | 0.0 | 3.0 | Diglipur | 32.0 | 5.0 | NORTH AND MIDDLE ANDAMAN | 19 | ... | 0 | 0 | 4063 | 316 | 316 | 2 | 298 | 0 | 46 | 2012 |
22 | 0 | 0 | 54 | 0.0 | 0.0 | Diglipur | 21.0 | 11.0 | NORTH AND MIDDLE ANDAMAN | 21 | ... | 0 | 0 | 9496 | 555 | 551 | 1 | 460 | 0 | 72 | 2012 |
23 | 150410 | 6230 | 45 | 0.0 | 1.0 | Diglipur | 29.0 | 4.0 | NORTH AND MIDDLE ANDAMAN | 27 | ... | 0 | 0 | 7532 | 378 | 374 | 3 | 354 | 4 | 51 | 2012 |
24 | 74048 | 0 | 15 | 0.0 | 1.0 | Diglipur | 10.0 | 3.0 | NORTH AND MIDDLE ANDAMAN | 7 | ... | 0 | 0 | 2003 | 226 | 222 | 0 | 173 | 0 | 25 | 2012 |
25 | 14240 | 0 | 4341917 | 0.0 | 7.0 | Mayabunder | 3864203.0 | 5.0 | NORTH AND MIDDLE ANDAMAN | 37 | ... | 0 | 0 | 8578 | 434 | 434 | 0 | 433 | 0 | 48 | 2012 |
26 | 1290144 | 0 | 7114028 | 0.0 | 3.0 | Mayabunder | 1878501.0 | 4.0 | NORTH AND MIDDLE ANDAMAN | 38 | ... | 0 | 0 | 7008 | 386 | 388 | 0 | 385 | 0 | 46 | 2012 |
27 | 16910 | 0 | 3679558 | 0.0 | 0.0 | Mayabunder | 1377881.0 | 6.0 | NORTH AND MIDDLE ANDAMAN | 16 | ... | 0 | 0 | 2731 | 150 | 150 | 0 | 150 | 0 | 47 | 2012 |
28 | 340336 | 0 | 7608985 | 0.0 | 10.0 | Mayabunder | 2869372.0 | 4.0 | NORTH AND MIDDLE ANDAMAN | 47 | ... | 0 | 0 | 11100 | 570 | 571 | 0 | 568 | 0 | 54 | 2012 |
29 | 532932 | 0 | 1340646 | 0.0 | 4.0 | Mayabunder | 856650.0 | 4.0 | NORTH AND MIDDLE ANDAMAN | 10 | ... | 0 | 0 | 9360 | 294 | 294 | 0 | 294 | 0 | 43 | 2012 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
106924 | 10334569 | 97888 | 848 | NaN | NaN | RAGHUNATHPUR-II | NaN | NaN | PURULIA | 103 | ... | 7923 | 21956 | 24195 | 2805 | 2805 | 25 | 1727 | 1934 | 5816 | 2017 |
106925 | 8168598 | 2247196 | 438 | NaN | NaN | RAGHUNATHPUR-II | NaN | NaN | PURULIA | 105 | ... | 37393 | 3441 | 23370 | 3982 | 3984 | 26 | 1982 | 2725 | 3217 | 2017 |
106926 | 3553754 | 0 | 360 | NaN | NaN | SANTURI | NaN | NaN | PURULIA | 36 | ... | 16922 | 4369 | 11933 | 1340 | 1339 | 14 | 722 | 1739 | 2086 | 2017 |
106927 | 14342705 | 5521 | 1057 | NaN | NaN | SANTURI | NaN | NaN | PURULIA | 147 | ... | 18995 | 40825 | 42956 | 3542 | 3542 | 38 | 2801 | 700 | 2500 | 2017 |
106928 | 8197670 | 0 | 444 | NaN | NaN | SANTURI | NaN | NaN | PURULIA | 82 | ... | 10108 | 13057 | 15003 | 1592 | 1592 | 9 | 1313 | 281 | 3310 | 2017 |
106929 | 6753142 | 428040 | 566 | NaN | NaN | SANTURI | NaN | NaN | PURULIA | 72 | ... | 30925 | 2127 | 27008 | 2550 | 2550 | 32 | 1945 | 1366 | 3613 | 2017 |
106930 | 5518460 | 0 | 457 | NaN | NaN | SANTURI | NaN | NaN | PURULIA | 56 | ... | 7104 | 25503 | 17819 | 1979 | 1979 | 56 | 1396 | 487 | 2573 | 2017 |
106931 | 3540582 | 180484 | 385 | NaN | NaN | SANTURI | NaN | NaN | PURULIA | 38 | ... | 9863 | 12513 | 11092 | 1528 | 1528 | 11 | 857 | 1598 | 2511 | 2017 |
106932 | 13625280 | 0 | 877 | NaN | NaN | Khoribari | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 137 | ... | 40347 | 13973 | 47066 | 1847 | 1847 | 19 | 1730 | 381 | 5717 | 2017 |
106933 | 17096940 | 0 | 1027 | NaN | NaN | Khoribari | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 170 | ... | 57306 | 25075 | 52795 | 2097 | 2097 | 0 | 1970 | 1802 | 3139 | 2017 |
106934 | 14412240 | 0 | 731 | NaN | NaN | Khoribari | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 144 | ... | 55862 | 8392 | 60938 | 1780 | 1780 | 25 | 1735 | 281 | 2534 | 2017 |
106935 | 16777256 | 0 | 884 | NaN | NaN | Khoribari | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 170 | ... | 72657 | 10651 | 54664 | 2178 | 2178 | 0 | 2081 | 1332 | 4067 | 2017 |
106936 | 7774024 | 0 | 468 | NaN | NaN | Matigara | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 78 | ... | 36469 | 1083 | 25972 | 1094 | 1094 | 1 | 1065 | 551 | 1412 | 2017 |
106937 | 11880180 | 0 | 669 | NaN | NaN | Matigara | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 121 | ... | 21585 | 22408 | 52214 | 853 | 853 | 2 | 831 | 297 | 965 | 2017 |
106938 | 8598620 | 0 | 571 | NaN | NaN | Matigara | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 86 | ... | 32764 | 1906 | 37590 | 770 | 770 | 0 | 668 | 359 | 935 | 2017 |
106939 | 12731752 | 0 | 667 | NaN | NaN | Matigara | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 127 | ... | 41995 | 126 | 57034 | 827 | 827 | 13 | 751 | 244 | 639 | 2017 |
106940 | 13612488 | 0 | 710 | NaN | NaN | Matigara | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 135 | ... | 32858 | 31088 | 41350 | 1423 | 1426 | 4 | 1325 | 298 | 1825 | 2017 |
106941 | 17647920 | 0 | 1435 | NaN | NaN | Naxalbari | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 175 | ... | 74050 | 3742 | 77052 | 1817 | 1817 | 12 | 1768 | 572 | 1687 | 2017 |
106942 | 20450660 | 0 | 847 | NaN | NaN | Naxalbari | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 205 | ... | 20202 | 65415 | 61640 | 2023 | 2023 | 0 | 2006 | 85 | 2492 | 2017 |
106943 | 9795060 | 0 | 519 | NaN | NaN | Naxalbari | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 99 | ... | 28897 | 19272 | 46761 | 675 | 675 | 1 | 666 | 22 | 1032 | 2017 |
106944 | 24039180 | 0 | 1708 | NaN | NaN | Naxalbari | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 244 | ... | 41961 | 28614 | 61380 | 2279 | 2280 | 14 | 2245 | 362 | 3034 | 2017 |
106945 | 17662140 | 0 | 1257 | NaN | NaN | Naxalbari | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 178 | ... | 43698 | 16947 | 63556 | 2277 | 2277 | 3 | 2209 | 0 | 1693 | 2017 |
106946 | 7904112 | 0 | 418 | NaN | NaN | Naxalbari | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 79 | ... | 3790 | 25626 | 40830 | 567 | 567 | 0 | 566 | 612 | 1049 | 2017 |
106947 | 12777216 | 0 | 1023 | NaN | NaN | Phansidewa | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 128 | ... | 12328 | 15593 | 20966 | 1804 | 1808 | 1 | 1517 | 947 | 2267 | 2017 |
106948 | 20092576 | 0 | 1654 | NaN | NaN | Phansidewa | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 198 | ... | 29500 | 49551 | 42795 | 2304 | 2304 | 11 | 2191 | 539 | 1523 | 2017 |
106949 | 16286338 | 8800 | 523 | NaN | NaN | Phansidewa | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 159 | ... | 6101 | 4368 | 23659 | 2363 | 2363 | 0 | 2198 | 774 | 4539 | 2017 |
106950 | 15877824 | 19360 | 1229 | NaN | NaN | Phansidewa | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 155 | ... | 27076 | 37795 | 33354 | 2187 | 2187 | 4 | 2019 | 612 | 1719 | 2017 |
106951 | 20107700 | 0 | 1048 | NaN | NaN | Phansidewa | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 201 | ... | 11479 | 69106 | 49520 | 2401 | 2402 | 6 | 2182 | 702 | 1260 | 2017 |
106952 | 16102484 | 0 | 1350 | NaN | NaN | Phansidewa | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 161 | ... | 48751 | 5241 | 52190 | 2336 | 2359 | 0 | 2102 | 920 | 2074 | 2017 |
106953 | 21957688 | 0 | 733 | NaN | NaN | Phansidewa | NaN | NaN | SILIGURI MAHAKUMA PARISAD | 216 | ... | 46217 | 7608 | 43426 | 2689 | 2689 | 19 | 2494 | 866 | 1424 | 2017 |
981716 rows × 55 columns
# Get unique records
togeocode_df = df[['District Name', 'Block Name', 'Panchayat Name']].drop_duplicates(subset=None, keep="first", inplace=False).reset_index(drop=True)
togeocode_df['district_x'] = togeocode_df['District Name'].str.upper().replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
togeocode_df['block_x'] = togeocode_df['Block Name'].str.upper().replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
togeocode_df['panchayat_x'] = togeocode_df['Panchayat Name'].str.upper().replace('PANCHAYAT', '').str.replace('GP', '').replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
togeocode_df['id'] = togeocode_df.index+1000000
togeocode_df
District Name | Block Name | Panchayat Name | district_x | block_x | panchayat_x | id | |
---|---|---|---|---|---|---|---|
0 | NICOBARS | Campbell Bay | CAMPBELL BAY | NICOBARS | CAMPBEL BAY | CAMPBEL BAY | 1000000 |
1 | NICOBARS | Campbell Bay | GOVINDNAGAR | NICOBARS | CAMPBEL BAY | GOVINDNAGAR | 1000001 |
2 | NICOBARS | Campbell Bay | Great & Little Nicobar | NICOBARS | CAMPBEL BAY | GREAT & LITLE NICOBAR | 1000002 |
3 | NICOBARS | Campbell Bay | LAXMI NAGAR | NICOBARS | CAMPBEL BAY | LAXMI NAGAR | 1000003 |
4 | NICOBARS | Nancowry | CHOWRA TC | NICOBARS | NANCOWRY | CHOWRA TC | 1000004 |
5 | NICOBARS | Nancowry | KAMORTA TC | NICOBARS | NANCOWRY | KAMORTA TC | 1000005 |
6 | NICOBARS | Nancowry | KATCHAL TC | NICOBARS | NANCOWRY | KATCHAL TC | 1000006 |
7 | NICOBARS | Nancowry | NANCOWRY TC | NICOBARS | NANCOWRY | NANCOWRY TC | 1000007 |
8 | NICOBARS | Nancowry | TERESSA TC | NICOBARS | NANCOWRY | TERESA TC | 1000008 |
9 | NICOBARS | Nicobar | TRIBAL COUNCIL | NICOBARS | NICOBAR | TRIBAL COUNCIL | 1000009 |
10 | NORTH AND MIDDLE ANDAMAN | Diglipur | Diglipur | NORTH AND MIDLE ANDAMAN | DIGLIPUR | DIGLIPUR | 1000010 |
11 | NORTH AND MIDDLE ANDAMAN | Diglipur | Gandhinagar | NORTH AND MIDLE ANDAMAN | DIGLIPUR | GANDHINAGAR | 1000011 |
12 | NORTH AND MIDDLE ANDAMAN | Diglipur | Kalighat | NORTH AND MIDLE ANDAMAN | DIGLIPUR | KALIGHAT | 1000012 |
13 | NORTH AND MIDDLE ANDAMAN | Diglipur | Keralapuram | NORTH AND MIDLE ANDAMAN | DIGLIPUR | KERALAPURAM | 1000013 |
14 | NORTH AND MIDDLE ANDAMAN | Diglipur | Kishori Nagar | NORTH AND MIDLE ANDAMAN | DIGLIPUR | KISHORI NAGAR | 1000014 |
15 | NORTH AND MIDDLE ANDAMAN | Diglipur | Laxmipur | NORTH AND MIDLE ANDAMAN | DIGLIPUR | LAXMIPUR | 1000015 |
16 | NORTH AND MIDDLE ANDAMAN | Diglipur | Madhupur | NORTH AND MIDLE ANDAMAN | DIGLIPUR | MADHUPUR | 1000016 |
17 | NORTH AND MIDDLE ANDAMAN | Diglipur | Nabagram | NORTH AND MIDLE ANDAMAN | DIGLIPUR | NABAGRAM | 1000017 |
18 | NORTH AND MIDDLE ANDAMAN | Diglipur | Paschimsagar | NORTH AND MIDLE ANDAMAN | DIGLIPUR | PASCHIMSAGAR | 1000018 |
19 | NORTH AND MIDDLE ANDAMAN | Diglipur | Radha Nagar | NORTH AND MIDLE ANDAMAN | DIGLIPUR | RADHA NAGAR | 1000019 |
20 | NORTH AND MIDDLE ANDAMAN | Diglipur | Rama Krishnagram | NORTH AND MIDLE ANDAMAN | DIGLIPUR | RAMA KRISHNAGRAM | 1000020 |
21 | NORTH AND MIDDLE ANDAMAN | Diglipur | Ramnagar | NORTH AND MIDLE ANDAMAN | DIGLIPUR | RAMNAGAR | 1000021 |
22 | NORTH AND MIDDLE ANDAMAN | Diglipur | Shibpur | NORTH AND MIDLE ANDAMAN | DIGLIPUR | SHIBPUR | 1000022 |
23 | NORTH AND MIDDLE ANDAMAN | Diglipur | Sitanagar | NORTH AND MIDLE ANDAMAN | DIGLIPUR | SITANAGAR | 1000023 |
24 | NORTH AND MIDDLE ANDAMAN | Diglipur | Subashgram | NORTH AND MIDLE ANDAMAN | DIGLIPUR | SUBASHGRAM | 1000024 |
25 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Basantipur | NORTH AND MIDLE ANDAMAN | MAYABUNDER | BASANTIPUR | 1000025 |
26 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Chainpur | NORTH AND MIDLE ANDAMAN | MAYABUNDER | CHAINPUR | 1000026 |
27 | NORTH AND MIDDLE ANDAMAN | Mayabunder | De-Reserve Area | NORTH AND MIDLE ANDAMAN | MAYABUNDER | DE-RESERVE AREA | 1000027 |
28 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Harinagar | NORTH AND MIDLE ANDAMAN | MAYABUNDER | HARINAGAR | 1000028 |
29 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Mayabunder | NORTH AND MIDLE ANDAMAN | MAYABUNDER | MAYABUNDER | 1000029 |
... | ... | ... | ... | ... | ... | ... | ... |
261011 | JHARGRAM | JHARGRAM | LODHASULI | JHARGRAM | JHARGRAM | LODHASULI | 1261011 |
261012 | JHARGRAM | JHARGRAM | MANIKPARA | JHARGRAM | JHARGRAM | MANIKPARA | 1261012 |
261013 | JHARGRAM | JHARGRAM | NEDABAHARA | JHARGRAM | JHARGRAM | NEDABAHARA | 1261013 |
261014 | JHARGRAM | JHARGRAM | PATASHIMUL | JHARGRAM | JHARGRAM | PATASHIMUL | 1261014 |
261015 | JHARGRAM | JHARGRAM | RADHANAGAR | JHARGRAM | JHARGRAM | RADHANAGAR | 1261015 |
261016 | JHARGRAM | JHARGRAM | SAPDHARA | JHARGRAM | JHARGRAM | SAPDHARA | 1261016 |
261017 | JHARGRAM | JHARGRAM | SARDIHA | JHARGRAM | JHARGRAM | SARDIHA | 1261017 |
261018 | JHARGRAM | JHARGRAM | SHALBONI | JHARGRAM | JHARGRAM | SHALBONI | 1261018 |
261019 | JHARGRAM | NAYAGRAM | ARA | JHARGRAM | NAYAGRAM | ARA | 1261019 |
261020 | JHARGRAM | NAYAGRAM | BALIGERIA | JHARGRAM | NAYAGRAM | BALIGERIA | 1261020 |
261021 | JHARGRAM | NAYAGRAM | BARAKHAKRI | JHARGRAM | NAYAGRAM | BARAKHAKRI | 1261021 |
261022 | JHARGRAM | NAYAGRAM | BARANEGUI | JHARGRAM | NAYAGRAM | BARANEGUI | 1261022 |
261023 | JHARGRAM | NAYAGRAM | BERAJAL | JHARGRAM | NAYAGRAM | BERAJAL | 1261023 |
261024 | JHARGRAM | NAYAGRAM | CHANDABILLA | JHARGRAM | NAYAGRAM | CHANDABILA | 1261024 |
261025 | JHARGRAM | NAYAGRAM | CHANDRAREKHA | JHARGRAM | NAYAGRAM | CHANDRAREKHA | 1261025 |
261026 | JHARGRAM | NAYAGRAM | JAMIRAPAL | JHARGRAM | NAYAGRAM | JAMIRAPAL | 1261026 |
261027 | JHARGRAM | NAYAGRAM | KHARIKAMATHANI | JHARGRAM | NAYAGRAM | KHARIKAMATHANI | 1261027 |
261028 | JHARGRAM | NAYAGRAM | MALAM | JHARGRAM | NAYAGRAM | MALAM | 1261028 |
261029 | JHARGRAM | NAYAGRAM | NAYAGRAM | JHARGRAM | NAYAGRAM | NAYAGRAM | 1261029 |
261030 | JHARGRAM | NAYAGRAM | PATINA | JHARGRAM | NAYAGRAM | PATINA | 1261030 |
261031 | JHARGRAM | SANKRAIL | ANDHARI | JHARGRAM | SANKRAIL | ANDHARI | 1261031 |
261032 | JHARGRAM | SANKRAIL | CHHATRI | JHARGRAM | SANKRAIL | CHATRI | 1261032 |
261033 | JHARGRAM | SANKRAIL | DHANGHORI | JHARGRAM | SANKRAIL | DHANGHORI | 1261033 |
261034 | JHARGRAM | SANKRAIL | KHUDMORAI | JHARGRAM | SANKRAIL | KHUDMORAI | 1261034 |
261035 | JHARGRAM | SANKRAIL | KULTIKRI | JHARGRAM | SANKRAIL | KULTIKRI | 1261035 |
261036 | JHARGRAM | SANKRAIL | LAUDAHA | JHARGRAM | SANKRAIL | LAUDAHA | 1261036 |
261037 | JHARGRAM | SANKRAIL | PATHRA | JHARGRAM | SANKRAIL | PATHRA | 1261037 |
261038 | JHARGRAM | SANKRAIL | RAGRAH | JHARGRAM | SANKRAIL | RAGRAH | 1261038 |
261039 | JHARGRAM | SANKRAIL | ROHINI | JHARGRAM | SANKRAIL | ROHINI | 1261039 |
261040 | JHARGRAM | SANKRAIL | SANKRAIL | JHARGRAM | SANKRAIL | SANKRAIL | 1261040 |
261041 rows × 7 columns
# Geocode using grampanghayat file
gp_point_shp = os.path.join(data_folder, 'shapefiles', 'gp-point-shp.shp')
gp_point_df = gpd.read_file(gp_point_shp)
gp_point_df = gp_point_df[gp_point_df['dtname'].notnull()]
gp_point_df = gp_point_df[gp_point_df['gp_name'].notnull()]
gp_point_df['district_y'] = gp_point_df['dtname'].str.upper().replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
gp_point_df['block_y'] = gp_point_df['block_name'].str.upper().replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
gp_point_df['panchayat_y'] = gp_point_df['gp_name'].str.upper().replace('PANCHAYAT', '').str.replace('GP', '').replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
gp_point_df['lat'] = gp_point_df.geometry.y
gp_point_df['lon'] = gp_point_df.geometry.x
gp_geocoded_df = pd.merge(togeocode_df, gp_point_df, how='inner', left_on=['district_x', 'block_x', 'panchayat_x'], right_on = ['district_y', 'block_y', 'panchayat_y'])
gp_geocoded_df = gp_geocoded_df[['District Name', 'Block Name', 'Panchayat Name', 'lat', 'lon', 'id']].drop_duplicates(subset='id', keep="first", inplace=False).reset_index(drop=True)
gp_geocoded_df
District Name | Block Name | Panchayat Name | lat | lon | id | |
---|---|---|---|---|---|---|
0 | ANANTAPUR | Agali | HULIKERADEVARAHALLI | 13.768624 | 77.009729 | 1000080 |
1 | ANANTAPUR | Agali | MADHUDI | 13.810206 | 77.025746 | 1000084 |
2 | ANANTAPUR | Agali | RAVUDI | 13.841146 | 77.019506 | 1000088 |
3 | ANANTAPUR | Amadagur | CHEEKIREVULAPALLE | 13.874734 | 78.041962 | 1000090 |
4 | ANANTAPUR | Amadagur | KASSAMUDRAM | 13.933423 | 78.014464 | 1000094 |
5 | ANANTAPUR | Amadagur | MOHAMMADABAD | 13.967322 | 78.010266 | 1000095 |
6 | ANANTAPUR | Amadagur | THUMMALA | 13.929267 | 78.064326 | 1000097 |
7 | ANANTAPUR | Amarapuram | HALUKURU | 14.090250 | 77.001711 | 1000102 |
8 | ANANTAPUR | Amarapuram | HEMAVATHI | 14.033064 | 76.986075 | 1000103 |
9 | ANANTAPUR | Amarapuram | NIDRAGATTA | 14.037968 | 76.954373 | 1000105 |
10 | ANANTAPUR | Amarapuram | SIVARAM | 14.032437 | 77.023079 | 1000106 |
11 | ANANTAPUR | Anantapur | KATIGANIKALVA | 14.612996 | 77.554218 | 1000121 |
12 | ANANTAPUR | Anantapur | KURUGUNTA | 14.674663 | 77.519245 | 1000124 |
13 | ANANTAPUR | Anantapur | MANNILA | 14.556448 | 77.686479 | 1000125 |
14 | ANANTAPUR | Anantapur | THATICHERLA | 14.752598 | 77.571765 | 1000132 |
15 | ANANTAPUR | Atmakur | MADIGUBBA | 14.638847 | 77.453057 | 1000137 |
16 | ANANTAPUR | Atmakur | THOPUDURTHI | 14.552931 | 77.492462 | 1000147 |
17 | ANANTAPUR | Bathalapalle | APPARACHERUVU | 14.479485 | 77.798019 | 1000149 |
18 | ANANTAPUR | Bathalapalle | BATHALAPALLE | 14.504290 | 77.765405 | 1000150 |
19 | ANANTAPUR | Bathalapalle | MALYAVANTHAM | 14.569889 | 77.768676 | 1000154 |
20 | ANANTAPUR | Bathalapalle | OBULAPURAM | 14.458996 | 77.843963 | 1000157 |
21 | ANANTAPUR | Beluguppa | AVULENNA | 14.683234 | 77.153240 | 1000162 |
22 | ANANTAPUR | Beluguppa | BELUGUPPA | 14.712986 | 77.136826 | 1000163 |
23 | ANANTAPUR | Beluguppa | DUDDEKUNTA | 14.680413 | 77.220876 | 1000164 |
24 | ANANTAPUR | Beluguppa | GANGAVARAM | 14.640860 | 77.207144 | 1000165 |
25 | ANANTAPUR | Beluguppa | KALVAPALLE | 14.625519 | 77.267957 | 1000167 |
26 | ANANTAPUR | Beluguppa | KONAMPALLE | 14.669387 | 77.262099 | 1000168 |
27 | ANANTAPUR | Beluguppa | THAGGUPARTHY | 14.725207 | 77.198641 | 1000176 |
28 | ANANTAPUR | Beluguppa | YERRAGUDI | 14.702239 | 77.176537 | 1000178 |
29 | ANANTAPUR | Bommanahal | BANDUR | 14.949310 | 77.008596 | 1000179 |
... | ... | ... | ... | ... | ... | ... |
47874 | RAE BARELI | DALMAU | TERUKHA | 26.154272 | 81.105019 | 1259028 |
47875 | RAE BARELI | DALMAU | TIKRAN | 26.215372 | 81.008023 | 1259029 |
47876 | RAE BARELI | SHIVGARH | ACHAI | 26.515666 | 81.345122 | 1259033 |
47877 | RAE BARELI | SHIVGARH | BAHUDA KHURD | 26.589912 | 81.176757 | 1259036 |
47878 | RAE BARELI | SHIVGARH | BAITI | 26.568444 | 81.234854 | 1259037 |
47879 | RAE BARELI | SHIVGARH | BEDARU | 26.568102 | 81.187406 | 1259041 |
47880 | RAE BARELI | SHIVGARH | BHAUSI | 26.496343 | 81.208293 | 1259042 |
47881 | RAE BARELI | SHIVGARH | CHITWANIYA | 26.559720 | 81.217836 | 1259044 |
47882 | RAE BARELI | SHIVGARH | GOVINDPUR | 26.477405 | 81.218719 | 1259050 |
47883 | RAE BARELI | SHIVGARH | KASNA | 26.524071 | 81.281588 | 1259054 |
47884 | RAE BARELI | SHIVGARH | KUMBHI | 26.571530 | 81.261130 | 1259057 |
47885 | RAE BARELI | SHIVGARH | KUMHARAWAN | 26.548436 | 81.274146 | 1259058 |
47886 | RAE BARELI | SHIVGARH | OSAH | 26.473133 | 81.238772 | 1259061 |
47887 | RAE BARELI | SHIVGARH | PADARIYA | 26.505045 | 81.280267 | 1259062 |
47888 | RAE BARELI | SHIVGARH | RAIPUR | 26.513434 | 81.357935 | 1259065 |
47889 | RAE BARELI | SHIVGARH | RAJAPUR | 26.517628 | 81.274542 | 1259067 |
47890 | RAE BARELI | SHIVGARH | SURAJPUR | 26.591872 | 81.221682 | 1259075 |
47891 | PITHORAGARH | GANGOLIHAT | JARTOLA | 29.534581 | 80.093678 | 1259118 |
47892 | AMRITSAR | HARSHA CHHINA | NEPAL | 31.833863 | 74.696878 | 1259139 |
47893 | GORAKHPUR | BANSGAON | GOCHHARAN | 26.562101 | 83.354279 | 1260511 |
47894 | HATHRAS | MURSAN | MOHANPUR | 27.568727 | 78.003088 | 1260513 |
47895 | HATHRAS | MURSAN | NAGLA NANDU | 27.589662 | 77.993367 | 1260515 |
47896 | JAUNPUR | BAKSHA | KHUNSHAPUR | 25.805381 | 82.483548 | 1260517 |
47897 | JAUNPUR | BAKSHA | MARGUPUR | 25.807982 | 82.444158 | 1260519 |
47898 | JAUNPUR | BAKSHA | NARAYANPUR | 25.856477 | 82.450771 | 1260520 |
47899 | SITAPUR | GONDLAMAU | BAROY | 27.301445 | 80.732414 | 1260529 |
47900 | SITAPUR | GONDLAMAU | KABIRPUR | 27.254446 | 80.674254 | 1260533 |
47901 | SITAPUR | GONDLAMAU | KAMLAPUR | 27.284050 | 80.723617 | 1260534 |
47902 | SITAPUR | GONDLAMAU | KODIKAPUR | 27.304422 | 80.623020 | 1260536 |
47903 | SITAPUR | GONDLAMAU | MEERAPUR | 27.334557 | 80.552864 | 1260540 |
47904 rows × 6 columns
# Filter non geocoded records
non_geocoded_df = togeocode_df[~togeocode_df['id'].isin(gp_geocoded_df.id)]
# Geocode using Census Village file
census_village_shp = os.path.join(data_folder, 'shapefiles', 'Census Villages.shp')
cenvil_df = gpd.read_file(census_village_shp)
cenvil_df = cenvil_df[cenvil_df['dtname'].notnull()]
cenvil_df = cenvil_df[cenvil_df['sdtname'].notnull()]
cenvil_df = cenvil_df[cenvil_df['vilname'].notnull()]
cenvil_df['district_y'] = cenvil_df['dtname'].str.upper().replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
cenvil_df['block_y'] = cenvil_df['sdtname'].str.upper().replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
cenvil_df['village_y'] = cenvil_df['vilname'].str.upper().replace('PANCHAYAT', '').str.replace('GP', '').replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
cenvil_df['lat'] = cenvil_df.geometry.y
cenvil_df['lon'] = cenvil_df.geometry.x
cenvil_geocoded_df = pd.merge(non_geocoded_df, cenvil_df, how='inner', left_on=['district_x', 'block_x', 'panchayat_x'], right_on = ['district_y', 'block_y', 'village_y'])
cenvil_geocoded_df = cenvil_geocoded_df[['District Name', 'Block Name', 'Panchayat Name', 'lat', 'lon', 'id']].drop_duplicates(subset='id', keep="first", inplace=False).reset_index(drop=True)
cenvil_geocoded_df
District Name | Block Name | Panchayat Name | lat | lon | id | |
---|---|---|---|---|---|---|
0 | ANANTAPUR | Agali | AGALI | 13.784665 | 77.054368 | 1000078 |
1 | ANANTAPUR | Agali | INAGALORE | 13.835137 | 77.042107 | 1000081 |
2 | ANANTAPUR | Agali | KODIHALLI | 13.781844 | 77.072040 | 1000083 |
3 | ANANTAPUR | Agali | NARASAMBUDI | 13.807735 | 77.044694 | 1000085 |
4 | ANANTAPUR | Amadagur | AMADAGUR | 13.888526 | 78.021691 | 1000089 |
5 | ANANTAPUR | Amarapuram | AMARAPURAM | 14.131109 | 76.976506 | 1000100 |
6 | ANANTAPUR | Amarapuram | BASAVANAHALLI | 14.024649 | 77.062410 | 1000101 |
7 | ANANTAPUR | Amarapuram | VALASA | 14.164192 | 76.946107 | 1000108 |
8 | ANANTAPUR | Anantapur | ALAMURU | 14.647386 | 77.535765 | 1000112 |
9 | ANANTAPUR | Anantapur | CHIYYEDU | 14.605295 | 77.706863 | 1000115 |
10 | ANANTAPUR | Anantapur | ITIKALAPALLE | 14.601605 | 77.657618 | 1000116 |
11 | ANANTAPUR | Anantapur | KODIMI | 14.709472 | 77.548963 | 1000123 |
12 | ANANTAPUR | Anantapur | RACHANAPALLE | 14.701827 | 77.551172 | 1000128 |
13 | ANANTAPUR | Atmakur | ATMAKUR | 14.647519 | 77.357418 | 1000134 |
14 | ANANTAPUR | Atmakur | GORIDINDLA | 14.573467 | 77.376384 | 1000136 |
15 | ANANTAPUR | Atmakur | SANAPA | 14.606638 | 77.469307 | 1000144 |
16 | ANANTAPUR | Bathalapalle | D.CHERLOPALLE | 14.441108 | 77.874763 | 1000151 |
17 | ANANTAPUR | Bathalapalle | DAMPETLA | 14.463522 | 77.889151 | 1000152 |
18 | ANANTAPUR | Bathalapalle | SANGALA | 14.448167 | 77.818805 | 1000158 |
19 | ANANTAPUR | Beluguppa | ANKAMPALLE | 14.703058 | 77.237416 | 1000161 |
20 | ANANTAPUR | Beluguppa | NARASAPURAM | 14.652366 | 77.065626 | 1000171 |
21 | ANANTAPUR | Beluguppa | SEERPI | 14.666179 | 77.156520 | 1000174 |
22 | ANANTAPUR | Bommanahal | BOLLANAGUDAM | 14.995705 | 77.049252 | 1000180 |
23 | ANANTAPUR | Bommanahal | KOLAGANAHALLI | 14.917239 | 76.948635 | 1000188 |
24 | ANANTAPUR | Brahmasamudram | BHAIRASAMUDRAM | 14.567910 | 76.944043 | 1000198 |
25 | ANANTAPUR | Brahmasamudram | PILLALAPALLE | 14.570944 | 77.012727 | 1000208 |
26 | ANANTAPUR | Brahmasamudram | THEETAKAL | 14.639004 | 77.021317 | 1000211 |
27 | ANANTAPUR | Bukkapatnam | KOTHAKOTA | 14.240095 | 77.940510 | 1000218 |
28 | ANANTAPUR | Bukkapatnam | MARALA | 14.306887 | 77.928113 | 1000220 |
29 | ANANTAPUR | Bukkapatnam | SIDDARAMPURAM | 14.275607 | 77.884052 | 1000223 |
... | ... | ... | ... | ... | ... | ... |
22518 | AMETHI | TILOI | RAJANPUR | 26.408847 | 81.499132 | 1260503 |
22519 | AMETHI | TILOI | RAJAPUR HALIM | 26.435741 | 81.413751 | 1260504 |
22520 | AMETHI | TILOI | SANGRAMPUR | 26.327451 | 81.465089 | 1260505 |
22521 | AMETHI | TILOI | SAVITAPUR | 26.317198 | 81.450202 | 1260506 |
22522 | AMETHI | TILOI | SIJNI | 26.339191 | 81.372623 | 1260507 |
22523 | ALIPURDUAR | FALAKATA | DALGAON | 26.671157 | 89.164596 | 1260567 |
22524 | ALIPURDUAR | FALAKATA | MAIRADANGA | 26.577557 | 89.233014 | 1260576 |
22525 | ALIPURDUAR | KALCHINI | CHUAPARA | 26.732861 | 89.422926 | 1260579 |
22526 | ALIPURDUAR | KUMARGRAM | CHENGMARI | 26.581170 | 89.821368 | 1260590 |
22527 | ALIPURDUAR | KUMARGRAM | KUMARGRAM | 26.612292 | 89.829210 | 1260595 |
22528 | EAST DISTRICT | PAKYONG | TAZA | 27.209626 | 88.618207 | 1260703 |
22529 | JHARGRAM | JHARGRAM | CHANDRI | 22.326953 | 86.921443 | 1261008 |
22530 | JHARGRAM | JHARGRAM | CHUBKA | 22.396438 | 87.193081 | 1261009 |
22531 | JHARGRAM | JHARGRAM | MANIKPARA | 22.369905 | 87.125101 | 1261012 |
22532 | JHARGRAM | JHARGRAM | PATASHIMUL | 22.285662 | 86.980339 | 1261014 |
22533 | JHARGRAM | JHARGRAM | RADHANAGAR | 22.477238 | 87.010400 | 1261015 |
22534 | JHARGRAM | JHARGRAM | SAPDHARA | 22.377526 | 86.926356 | 1261016 |
22535 | JHARGRAM | JHARGRAM | SARDIHA | 22.375892 | 87.153331 | 1261017 |
22536 | JHARGRAM | NAYAGRAM | ARA | 21.900784 | 87.050799 | 1261019 |
22537 | JHARGRAM | NAYAGRAM | CHANDABILLA | 22.086912 | 87.017334 | 1261024 |
22538 | JHARGRAM | NAYAGRAM | MALAM | 22.066601 | 87.152645 | 1261028 |
22539 | JHARGRAM | NAYAGRAM | NAYAGRAM | 22.035067 | 87.170341 | 1261029 |
22540 | JHARGRAM | NAYAGRAM | PATINA | 22.162124 | 87.006604 | 1261030 |
22541 | JHARGRAM | SANKRAIL | ANDHARI | 22.154164 | 87.092958 | 1261031 |
22542 | JHARGRAM | SANKRAIL | CHHATRI | 22.237054 | 87.089601 | 1261032 |
22543 | JHARGRAM | SANKRAIL | KULTIKRI | 22.175490 | 87.165471 | 1261035 |
22544 | JHARGRAM | SANKRAIL | LAUDAHA | 22.143824 | 87.123349 | 1261036 |
22545 | JHARGRAM | SANKRAIL | PATHRA | 22.260671 | 87.159645 | 1261037 |
22546 | JHARGRAM | SANKRAIL | ROHINI | 22.165943 | 87.084149 | 1261039 |
22547 | JHARGRAM | SANKRAIL | SANKRAIL | 22.196941 | 87.138044 | 1261040 |
22548 rows × 6 columns
# Filter non geocoded records
non_geocoded_df = non_geocoded_df[~non_geocoded_df['id'].isin(cenvil_geocoded_df.id)]
non_geocoded_df
District Name | Block Name | Panchayat Name | district_x | block_x | panchayat_x | id | |
---|---|---|---|---|---|---|---|
0 | NICOBARS | Campbell Bay | CAMPBELL BAY | NICOBARS | CAMPBEL BAY | CAMPBEL BAY | 1000000 |
1 | NICOBARS | Campbell Bay | GOVINDNAGAR | NICOBARS | CAMPBEL BAY | GOVINDNAGAR | 1000001 |
2 | NICOBARS | Campbell Bay | Great & Little Nicobar | NICOBARS | CAMPBEL BAY | GREAT & LITLE NICOBAR | 1000002 |
3 | NICOBARS | Campbell Bay | LAXMI NAGAR | NICOBARS | CAMPBEL BAY | LAXMI NAGAR | 1000003 |
4 | NICOBARS | Nancowry | CHOWRA TC | NICOBARS | NANCOWRY | CHOWRA TC | 1000004 |
5 | NICOBARS | Nancowry | KAMORTA TC | NICOBARS | NANCOWRY | KAMORTA TC | 1000005 |
6 | NICOBARS | Nancowry | KATCHAL TC | NICOBARS | NANCOWRY | KATCHAL TC | 1000006 |
7 | NICOBARS | Nancowry | NANCOWRY TC | NICOBARS | NANCOWRY | NANCOWRY TC | 1000007 |
8 | NICOBARS | Nancowry | TERESSA TC | NICOBARS | NANCOWRY | TERESA TC | 1000008 |
9 | NICOBARS | Nicobar | TRIBAL COUNCIL | NICOBARS | NICOBAR | TRIBAL COUNCIL | 1000009 |
10 | NORTH AND MIDDLE ANDAMAN | Diglipur | Diglipur | NORTH AND MIDLE ANDAMAN | DIGLIPUR | DIGLIPUR | 1000010 |
11 | NORTH AND MIDDLE ANDAMAN | Diglipur | Gandhinagar | NORTH AND MIDLE ANDAMAN | DIGLIPUR | GANDHINAGAR | 1000011 |
12 | NORTH AND MIDDLE ANDAMAN | Diglipur | Kalighat | NORTH AND MIDLE ANDAMAN | DIGLIPUR | KALIGHAT | 1000012 |
13 | NORTH AND MIDDLE ANDAMAN | Diglipur | Keralapuram | NORTH AND MIDLE ANDAMAN | DIGLIPUR | KERALAPURAM | 1000013 |
14 | NORTH AND MIDDLE ANDAMAN | Diglipur | Kishori Nagar | NORTH AND MIDLE ANDAMAN | DIGLIPUR | KISHORI NAGAR | 1000014 |
15 | NORTH AND MIDDLE ANDAMAN | Diglipur | Laxmipur | NORTH AND MIDLE ANDAMAN | DIGLIPUR | LAXMIPUR | 1000015 |
16 | NORTH AND MIDDLE ANDAMAN | Diglipur | Madhupur | NORTH AND MIDLE ANDAMAN | DIGLIPUR | MADHUPUR | 1000016 |
17 | NORTH AND MIDDLE ANDAMAN | Diglipur | Nabagram | NORTH AND MIDLE ANDAMAN | DIGLIPUR | NABAGRAM | 1000017 |
18 | NORTH AND MIDDLE ANDAMAN | Diglipur | Paschimsagar | NORTH AND MIDLE ANDAMAN | DIGLIPUR | PASCHIMSAGAR | 1000018 |
19 | NORTH AND MIDDLE ANDAMAN | Diglipur | Radha Nagar | NORTH AND MIDLE ANDAMAN | DIGLIPUR | RADHA NAGAR | 1000019 |
20 | NORTH AND MIDDLE ANDAMAN | Diglipur | Rama Krishnagram | NORTH AND MIDLE ANDAMAN | DIGLIPUR | RAMA KRISHNAGRAM | 1000020 |
21 | NORTH AND MIDDLE ANDAMAN | Diglipur | Ramnagar | NORTH AND MIDLE ANDAMAN | DIGLIPUR | RAMNAGAR | 1000021 |
22 | NORTH AND MIDDLE ANDAMAN | Diglipur | Shibpur | NORTH AND MIDLE ANDAMAN | DIGLIPUR | SHIBPUR | 1000022 |
23 | NORTH AND MIDDLE ANDAMAN | Diglipur | Sitanagar | NORTH AND MIDLE ANDAMAN | DIGLIPUR | SITANAGAR | 1000023 |
24 | NORTH AND MIDDLE ANDAMAN | Diglipur | Subashgram | NORTH AND MIDLE ANDAMAN | DIGLIPUR | SUBASHGRAM | 1000024 |
25 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Basantipur | NORTH AND MIDLE ANDAMAN | MAYABUNDER | BASANTIPUR | 1000025 |
26 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Chainpur | NORTH AND MIDLE ANDAMAN | MAYABUNDER | CHAINPUR | 1000026 |
27 | NORTH AND MIDDLE ANDAMAN | Mayabunder | De-Reserve Area | NORTH AND MIDLE ANDAMAN | MAYABUNDER | DE-RESERVE AREA | 1000027 |
28 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Harinagar | NORTH AND MIDLE ANDAMAN | MAYABUNDER | HARINAGAR | 1000028 |
29 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Mayabunder | NORTH AND MIDLE ANDAMAN | MAYABUNDER | MAYABUNDER | 1000029 |
... | ... | ... | ... | ... | ... | ... | ... |
260992 | JHARGRAM | GOPIBALLAVPUR-I | KENDUGARI | JHARGRAM | GOPIBALAVPUR-I | KENDUGARI | 1260992 |
260993 | JHARGRAM | GOPIBALLAVPUR-I | SARIA | JHARGRAM | GOPIBALAVPUR-I | SARIA | 1260993 |
260994 | JHARGRAM | GOPIBALLAVPUR-I | SATMA | JHARGRAM | GOPIBALAVPUR-I | SATMA | 1260994 |
260995 | JHARGRAM | GOPIBALLAVPUR-I | SHASHRHA | JHARGRAM | GOPIBALAVPUR-I | SHASHRHA | 1260995 |
260996 | JHARGRAM | JAMBANI | CHILKIGARH | JHARGRAM | JAMBANI | CHILKIGARH | 1260996 |
260997 | JHARGRAM | JAMBANI | CHINCHRA | JHARGRAM | JAMBANI | CHINCHRA | 1260997 |
260998 | JHARGRAM | JAMBANI | DHARSA | JHARGRAM | JAMBANI | DHARSA | 1260998 |
260999 | JHARGRAM | JAMBANI | DUBRA | JHARGRAM | JAMBANI | DUBRA | 1260999 |
261000 | JHARGRAM | JAMBANI | GIDHNI | JHARGRAM | JAMBANI | GIDHNI | 1261000 |
261001 | JHARGRAM | JAMBANI | JAMBONI | JHARGRAM | JAMBANI | JAMBONI | 1261001 |
261002 | JHARGRAM | JAMBANI | KAPGARI | JHARGRAM | JAMBANI | KAPGARI | 1261002 |
261003 | JHARGRAM | JAMBANI | KENDADANGRI | JHARGRAM | JAMBANI | KENDADANGRI | 1261003 |
261004 | JHARGRAM | JAMBANI | LALBANDH | JHARGRAM | JAMBANI | LALBANDH | 1261004 |
261005 | JHARGRAM | JAMBANI | PARIHATI | JHARGRAM | JAMBANI | PARIHATI | 1261005 |
261006 | JHARGRAM | JHARGRAM | AGUIBONI | JHARGRAM | JHARGRAM | AGUIBONI | 1261006 |
261007 | JHARGRAM | JHARGRAM | BANDHGORA | JHARGRAM | JHARGRAM | BANDHGORA | 1261007 |
261010 | JHARGRAM | JHARGRAM | DUDHKUNDI | JHARGRAM | JHARGRAM | DUDHKUNDI | 1261010 |
261011 | JHARGRAM | JHARGRAM | LODHASULI | JHARGRAM | JHARGRAM | LODHASULI | 1261011 |
261013 | JHARGRAM | JHARGRAM | NEDABAHARA | JHARGRAM | JHARGRAM | NEDABAHARA | 1261013 |
261018 | JHARGRAM | JHARGRAM | SHALBONI | JHARGRAM | JHARGRAM | SHALBONI | 1261018 |
261020 | JHARGRAM | NAYAGRAM | BALIGERIA | JHARGRAM | NAYAGRAM | BALIGERIA | 1261020 |
261021 | JHARGRAM | NAYAGRAM | BARAKHAKRI | JHARGRAM | NAYAGRAM | BARAKHAKRI | 1261021 |
261022 | JHARGRAM | NAYAGRAM | BARANEGUI | JHARGRAM | NAYAGRAM | BARANEGUI | 1261022 |
261023 | JHARGRAM | NAYAGRAM | BERAJAL | JHARGRAM | NAYAGRAM | BERAJAL | 1261023 |
261025 | JHARGRAM | NAYAGRAM | CHANDRAREKHA | JHARGRAM | NAYAGRAM | CHANDRAREKHA | 1261025 |
261026 | JHARGRAM | NAYAGRAM | JAMIRAPAL | JHARGRAM | NAYAGRAM | JAMIRAPAL | 1261026 |
261027 | JHARGRAM | NAYAGRAM | KHARIKAMATHANI | JHARGRAM | NAYAGRAM | KHARIKAMATHANI | 1261027 |
261033 | JHARGRAM | SANKRAIL | DHANGHORI | JHARGRAM | SANKRAIL | DHANGHORI | 1261033 |
261034 | JHARGRAM | SANKRAIL | KHUDMORAI | JHARGRAM | SANKRAIL | KHUDMORAI | 1261034 |
261038 | JHARGRAM | SANKRAIL | RAGRAH | JHARGRAM | SANKRAIL | RAGRAH | 1261038 |
190589 rows × 7 columns
# Second Phase of Grampanchayat join data using phenoitic join
## Prepare GP Data
gp_point_df['district_y'] = gp_point_df['dtname'].str.upper().replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
gp_point_df['block_y'] = gp_point_df['block_name'].str.upper().replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
gp_point_df['panchayat_y'] = gp_point_df['gp_name'].str.upper().replace('PANCHAYAT', '').str.replace('GP', '').replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
## Prepare Non Geocoded Data
non_geocoded_df['district_x'] = non_geocoded_df['District Name'].str.upper().replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
non_geocoded_df['block_x'] = non_geocoded_df['Block Name'].str.upper().replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
non_geocoded_df['panchayat_x'] = non_geocoded_df['Panchayat Name'].str.upper().replace('PANCHAYAT', '').str.replace('GP', '').replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
## Merge
soundex_gpgeocoded_df = pd.merge(non_geocoded_df, gp_point_df, how='inner', left_on=['district_x', 'block_x', 'panchayat_x'], right_on = ['district_y', 'block_y', 'panchayat_y'])
## Check for quality
def jaro_winkler_match(x):
return jellyfish.jaro_winkler(x['panchayat_x'], x['panchayat_y'])
soundex_gpgeocoded_df['is_match'] = soundex_gpgeocoded_df[['panchayat_x', 'panchayat_y']].apply(jaro_winkler_match, axis=1)
soundex_gpgeocoded_df = soundex_gpgeocoded_df[soundex_gpgeocoded_df['is_match'] == 1.0]
soundex_gpgeocoded_df = soundex_gpgeocoded_df[['District Name', 'Block Name', 'Panchayat Name', 'lat', 'lon', 'id']].drop_duplicates(subset='id', keep="first", inplace=False).reset_index(drop=True)
soundex_gpgeocoded_df
District Name | Block Name | Panchayat Name | lat | lon | id | |
---|---|---|---|---|---|---|
0 | NORTH AND MIDDLE ANDAMAN | Diglipur | Laxmipur | 13.320263 | 92.935452 | 1000015 |
1 | NORTH AND MIDDLE ANDAMAN | Diglipur | Radha Nagar | 13.398258 | 92.920165 | 1000019 |
2 | NORTH AND MIDDLE ANDAMAN | Diglipur | Rama Krishnagram | 13.246140 | 92.982946 | 1000020 |
3 | NORTH AND MIDDLE ANDAMAN | Diglipur | Ramnagar | 13.246140 | 92.982946 | 1000021 |
4 | NORTH AND MIDDLE ANDAMAN | Diglipur | Sitanagar | 13.246564 | 92.934633 | 1000023 |
5 | NORTH AND MIDDLE ANDAMAN | Diglipur | Subashgram | 13.251730 | 92.958664 | 1000024 |
6 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Pahalgaon | 12.845044 | 92.862707 | 1000030 |
7 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Pokadera | 12.905950 | 92.907320 | 1000031 |
8 | NORTH AND MIDDLE ANDAMAN | Rangat | Kadamtala | 12.354658 | 92.769528 | 1000036 |
9 | NORTH AND MIDDLE ANDAMAN | Rangat | Long Island | 12.395201 | 92.937125 | 1000038 |
10 | NORTH AND MIDDLE ANDAMAN | Rangat | Parnasala | 12.516090 | 92.906369 | 1000041 |
11 | NORTH AND MIDDLE ANDAMAN | Rangat | Sundergarh | 12.248895 | 92.817250 | 1000045 |
12 | NORTH AND MIDDLE ANDAMAN | Rangat | Urmilapur | 12.542299 | 92.862041 | 1000046 |
13 | ANANTAPUR | Agali | Hallikera | 13.768624 | 77.009729 | 1000079 |
14 | ANANTAPUR | Agali | P.BYADIGERA | 13.798521 | 76.999590 | 1000086 |
15 | ANANTAPUR | Amadagur | JOWKULAKOTHAPALLI | 13.980832 | 78.028448 | 1000093 |
16 | ANANTAPUR | Amarapuram | THAMMEDAHALLI | 14.157010 | 76.998959 | 1000107 |
17 | ANANTAPUR | Anantapur | KAMARUPALLI | 14.655622 | 77.491123 | 1000119 |
18 | ANANTAPUR | Anantapur | KATTAKINGAPALLI | 14.612996 | 77.554218 | 1000122 |
19 | ANANTAPUR | Anantapur | UPPARAPALLI | 14.656695 | 77.638550 | 1000133 |
20 | ANANTAPUR | Bathalapalle | GARISALAPALLI | 14.472745 | 77.888437 | 1000153 |
21 | ANANTAPUR | Bommanahal | UNTHAKAL | 14.980956 | 76.951567 | 1000196 |
22 | ANANTAPUR | Brahmasamudram | VEPALAPARTHI | 14.553595 | 76.898218 | 1000212 |
23 | ANANTAPUR | Bukkarayasamudram | CHEDULLA | 14.758668 | 77.684237 | 1000228 |
24 | ANANTAPUR | Bukkarayasamudram | DANDUVARIPALLE | 14.660011 | 77.710380 | 1000230 |
25 | ANANTAPUR | Bukkarayasamudram | GOVINDAPALLE | 14.735162 | 77.626619 | 1000232 |
26 | ANANTAPUR | Bukkarayasamudram | JANTHULUR | 14.756311 | 77.655562 | 1000233 |
27 | ANANTAPUR | Bukkarayasamudram | KORRAPADU | 14.736670 | 77.706004 | 1000235 |
28 | ANANTAPUR | Bukkarayasamudram | REDDIPALLE | 14.706014 | 77.675588 | 1000239 |
29 | ANANTAPUR | Chenne Kothapalle | CHENNAKOTHAPALLE | 14.275225 | 77.621588 | 1000246 |
... | ... | ... | ... | ... | ... | ... |
55416 | RAE BARELI | DALMAU | UBARANI | 26.109635 | 81.157392 | 1259030 |
55417 | RAE BARELI | DALMAU | UMRAMAU | 26.175899 | 81.060630 | 1259031 |
55418 | RAE BARELI | SHIVGARH | BADAVAR | 26.530307 | 81.271646 | 1259035 |
55419 | RAE BARELI | SHIVGARH | BANKAGARH | 26.544911 | 81.169259 | 1259038 |
55420 | RAE BARELI | SHIVGARH | BHAWANIGARH | 26.534063 | 81.222904 | 1259043 |
55421 | RAE BARELI | SHIVGARH | DEHLI | 26.585485 | 81.209077 | 1259046 |
55422 | RAE BARELI | SHIVGARH | DHODHWAPUR | 26.549008 | 81.217018 | 1259048 |
55423 | RAE BARELI | SHIVGARH | JAGDEESHPUR | 26.502278 | 81.253706 | 1259053 |
55424 | RAE BARELI | SHIVGARH | NARAINPUR | 26.490754 | 81.234027 | 1259059 |
55425 | RAE BARELI | SHIVGARH | NARETHUWA | 26.534429 | 81.325634 | 1259060 |
55426 | RAE BARELI | SHIVGARH | PIPARI | 26.551922 | 81.252683 | 1259064 |
55427 | RAE BARELI | SHIVGARH | RAIPUR NERUA | 26.518928 | 81.234014 | 1259066 |
55428 | RAE BARELI | SHIVGARH | RAMPUR KHAS | 26.506288 | 81.244942 | 1259068 |
55429 | RAE BARELI | SHIVGARH | RAMPUR TIKRA | 26.506288 | 81.244942 | 1259069 |
55430 | RAE BARELI | SHIVGARH | SHIVGARH | 26.544690 | 81.243923 | 1259073 |
55431 | RAE BARELI | SHIVGARH | SHIVLI | 26.548611 | 81.231550 | 1259074 |
55432 | SANT RAVIDAS NAGAR | Gyanpur | Geraee | 25.406200 | 82.431304 | 1259076 |
55433 | SANT RAVIDAS NAGAR | Gyanpur | Joginka | 25.395939 | 82.470273 | 1259077 |
55434 | SANT RAVIDAS NAGAR | Gyanpur | Sonkhari | 25.341214 | 82.409071 | 1259081 |
55435 | UDAM SINGH NAGAR | Bajpur | Barwala | 29.105141 | 79.181473 | 1259119 |
55436 | UDAM SINGH NAGAR | Sitarganj | Salmata | 28.905476 | 79.755871 | 1259121 |
55437 | SANGRUR | DHURI | BHASOURH | 30.354323 | 75.782513 | 1259147 |
55438 | BAREILLY | BITHIRI CHAINPUR | KOHNI | 28.375967 | 79.532901 | 1260508 |
55439 | JAUNPUR | BAKSHA | MAHIMAPUR DEEH | 25.821474 | 82.542160 | 1260518 |
55440 | JAUNPUR | BAKSHA | SHESHPURA | 25.817252 | 82.470473 | 1260524 |
55441 | SITAPUR | GONDLAMAU | CHAUPARIAY | 27.295590 | 80.696476 | 1260531 |
55442 | SITAPUR | GONDLAMAU | KUMAUGRANT | 27.390243 | 80.632652 | 1260537 |
55443 | SITAPUR | GONDLAMAU | MAHMADPUR JHABRA | 27.281080 | 80.661790 | 1260539 |
55444 | UNNAO | BIGHAPUR | RAJAPUR PIPRAHA | 26.303163 | 80.703166 | 1260544 |
55445 | NAWANSHAHR | NAWANSHAHR | BHAURA | 31.120617 | 76.200179 | 1260662 |
55446 rows × 6 columns
# Filter non geocoded records
non_geocoded_df = non_geocoded_df[~non_geocoded_df['id'].isin(soundex_gpgeocoded_df.id)]
non_geocoded_df
District Name | Block Name | Panchayat Name | district_x | block_x | panchayat_x | id | |
---|---|---|---|---|---|---|---|
0 | NICOBARS | Campbell Bay | CAMPBELL BAY | N216 | C514 | C514 | 1000000 |
1 | NICOBARS | Campbell Bay | GOVINDNAGAR | N216 | C514 | G153 | 1000001 |
2 | NICOBARS | Campbell Bay | Great & Little Nicobar | N216 | C514 | G634 | 1000002 |
3 | NICOBARS | Campbell Bay | LAXMI NAGAR | N216 | C514 | L255 | 1000003 |
4 | NICOBARS | Nancowry | CHOWRA TC | N216 | N526 | C632 | 1000004 |
5 | NICOBARS | Nancowry | KAMORTA TC | N216 | N526 | K563 | 1000005 |
6 | NICOBARS | Nancowry | KATCHAL TC | N216 | N526 | K324 | 1000006 |
7 | NICOBARS | Nancowry | NANCOWRY TC | N216 | N526 | N526 | 1000007 |
8 | NICOBARS | Nancowry | TERESSA TC | N216 | N526 | T623 | 1000008 |
9 | NICOBARS | Nicobar | TRIBAL COUNCIL | N216 | N216 | T614 | 1000009 |
10 | NORTH AND MIDDLE ANDAMAN | Diglipur | Diglipur | N635 | D241 | D241 | 1000010 |
11 | NORTH AND MIDDLE ANDAMAN | Diglipur | Gandhinagar | N635 | D241 | G535 | 1000011 |
12 | NORTH AND MIDDLE ANDAMAN | Diglipur | Kalighat | N635 | D241 | K423 | 1000012 |
13 | NORTH AND MIDDLE ANDAMAN | Diglipur | Keralapuram | N635 | D241 | K641 | 1000013 |
14 | NORTH AND MIDDLE ANDAMAN | Diglipur | Kishori Nagar | N635 | D241 | K265 | 1000014 |
16 | NORTH AND MIDDLE ANDAMAN | Diglipur | Madhupur | N635 | D241 | M316 | 1000016 |
17 | NORTH AND MIDDLE ANDAMAN | Diglipur | Nabagram | N635 | D241 | N126 | 1000017 |
18 | NORTH AND MIDDLE ANDAMAN | Diglipur | Paschimsagar | N635 | D241 | P252 | 1000018 |
22 | NORTH AND MIDDLE ANDAMAN | Diglipur | Shibpur | N635 | D241 | S160 | 1000022 |
25 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Basantipur | N635 | M153 | B253 | 1000025 |
26 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Chainpur | N635 | M153 | C516 | 1000026 |
27 | NORTH AND MIDDLE ANDAMAN | Mayabunder | De-Reserve Area | N635 | M153 | D626 | 1000027 |
28 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Harinagar | N635 | M153 | H652 | 1000028 |
29 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Mayabunder | N635 | M153 | M153 | 1000029 |
32 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Rampur | N635 | M153 | R516 | 1000032 |
33 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Swadesh Nagar | N635 | M153 | S325 | 1000033 |
34 | NORTH AND MIDDLE ANDAMAN | Rangat | Bakultala | N635 | R523 | B243 | 1000034 |
35 | NORTH AND MIDDLE ANDAMAN | Rangat | Dasarathpur | N635 | R523 | D263 | 1000035 |
37 | NORTH AND MIDDLE ANDAMAN | Rangat | Kaushalya Nagar | N635 | R523 | K245 | 1000037 |
39 | NORTH AND MIDDLE ANDAMAN | Rangat | Nilambur | N635 | R523 | N451 | 1000039 |
... | ... | ... | ... | ... | ... | ... | ... |
260992 | JHARGRAM | GOPIBALLAVPUR-I | KENDUGARI | J626 | G114 | K532 | 1260992 |
260993 | JHARGRAM | GOPIBALLAVPUR-I | SARIA | J626 | G114 | S600 | 1260993 |
260994 | JHARGRAM | GOPIBALLAVPUR-I | SATMA | J626 | G114 | S350 | 1260994 |
260995 | JHARGRAM | GOPIBALLAVPUR-I | SHASHRHA | J626 | G114 | S260 | 1260995 |
260996 | JHARGRAM | JAMBANI | CHILKIGARH | J626 | J515 | C422 | 1260996 |
260997 | JHARGRAM | JAMBANI | CHINCHRA | J626 | J515 | C526 | 1260997 |
260998 | JHARGRAM | JAMBANI | DHARSA | J626 | J515 | D620 | 1260998 |
260999 | JHARGRAM | JAMBANI | DUBRA | J626 | J515 | D160 | 1260999 |
261000 | JHARGRAM | JAMBANI | GIDHNI | J626 | J515 | G350 | 1261000 |
261001 | JHARGRAM | JAMBANI | JAMBONI | J626 | J515 | J515 | 1261001 |
261002 | JHARGRAM | JAMBANI | KAPGARI | J626 | J515 | K126 | 1261002 |
261003 | JHARGRAM | JAMBANI | KENDADANGRI | J626 | J515 | K533 | 1261003 |
261004 | JHARGRAM | JAMBANI | LALBANDH | J626 | J515 | L415 | 1261004 |
261005 | JHARGRAM | JAMBANI | PARIHATI | J626 | J515 | P630 | 1261005 |
261006 | JHARGRAM | JHARGRAM | AGUIBONI | J626 | J626 | A215 | 1261006 |
261007 | JHARGRAM | JHARGRAM | BANDHGORA | J626 | J626 | B532 | 1261007 |
261010 | JHARGRAM | JHARGRAM | DUDHKUNDI | J626 | J626 | D325 | 1261010 |
261011 | JHARGRAM | JHARGRAM | LODHASULI | J626 | J626 | L324 | 1261011 |
261013 | JHARGRAM | JHARGRAM | NEDABAHARA | J626 | J626 | N316 | 1261013 |
261018 | JHARGRAM | JHARGRAM | SHALBONI | J626 | J626 | S415 | 1261018 |
261020 | JHARGRAM | NAYAGRAM | BALIGERIA | J626 | N265 | B426 | 1261020 |
261021 | JHARGRAM | NAYAGRAM | BARAKHAKRI | J626 | N265 | B622 | 1261021 |
261022 | JHARGRAM | NAYAGRAM | BARANEGUI | J626 | N265 | B652 | 1261022 |
261023 | JHARGRAM | NAYAGRAM | BERAJAL | J626 | N265 | B624 | 1261023 |
261025 | JHARGRAM | NAYAGRAM | CHANDRAREKHA | J626 | N265 | C536 | 1261025 |
261026 | JHARGRAM | NAYAGRAM | JAMIRAPAL | J626 | N265 | J561 | 1261026 |
261027 | JHARGRAM | NAYAGRAM | KHARIKAMATHANI | J626 | N265 | K625 | 1261027 |
261033 | JHARGRAM | SANKRAIL | DHANGHORI | J626 | S526 | D526 | 1261033 |
261034 | JHARGRAM | SANKRAIL | KHUDMORAI | J626 | S526 | K356 | 1261034 |
261038 | JHARGRAM | SANKRAIL | RAGRAH | J626 | S526 | R260 | 1261038 |
135143 rows × 7 columns
# Second Phase of Census Villages data join using phenoitic join
## Prepare Census Villages Data
cenvil_df['district_y'] = cenvil_df['dtname'].str.upper().replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
cenvil_df['block_y'] = cenvil_df['sdtname'].str.upper().replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
cenvil_df['village_y'] = cenvil_df['vilname'].str.upper().replace('([^A-Za-z]+)', '').apply(lambda x: jellyfish.soundex(x))
soundex_cenvilgeocoded_df = pd.merge(non_geocoded_df, cenvil_df, how='inner', left_on=['district_x', 'panchayat_x'], right_on = ['district_y', 'village_y'])
## Check for quality
def jaro_winkler_match(x):
return jellyfish.jaro_winkler(x['panchayat_x'], x['village_y'])
soundex_cenvilgeocoded_df['is_match'] = soundex_cenvilgeocoded_df[['panchayat_x', 'village_y']].apply(jaro_winkler_match, axis=1)
soundex_cenvilgeocoded_df = soundex_cenvilgeocoded_df[soundex_cenvilgeocoded_df['is_match'] == 1.0]
soundex_cenvilgeocoded_df = soundex_cenvilgeocoded_df[['District Name', 'Block Name', 'Panchayat Name', 'lat', 'lon', 'id']].drop_duplicates(subset='id', keep="first", inplace=False).reset_index(drop=True)
soundex_cenvilgeocoded_df
District Name | Block Name | Panchayat Name | lat | lon | id | |
---|---|---|---|---|---|---|
0 | NICOBARS | Campbell Bay | CAMPBELL BAY | 7.009279 | 93.926589 | 1000000 |
1 | NICOBARS | Campbell Bay | GOVINDNAGAR | 6.996494 | 93.896547 | 1000001 |
2 | NICOBARS | Campbell Bay | LAXMI NAGAR | 6.883300 | 93.889787 | 1000003 |
3 | NICOBARS | Nancowry | KAMORTA TC | 21.065458 | 79.498997 | 1000005 |
4 | NORTH AND MIDDLE ANDAMAN | Diglipur | Diglipur | 13.245422 | 92.971099 | 1000010 |
5 | NORTH AND MIDDLE ANDAMAN | Diglipur | Kalighat | 13.119377 | 92.952089 | 1000012 |
6 | NORTH AND MIDDLE ANDAMAN | Diglipur | Keralapuram | 13.255532 | 93.012199 | 1000013 |
7 | NORTH AND MIDDLE ANDAMAN | Diglipur | Kishori Nagar | 13.190907 | 92.870359 | 1000014 |
8 | NORTH AND MIDDLE ANDAMAN | Diglipur | Madhupur | 13.264025 | 92.969082 | 1000016 |
9 | NORTH AND MIDDLE ANDAMAN | Diglipur | Nabagram | 13.166283 | 92.941103 | 1000017 |
10 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Basantipur | 12.743961 | 92.877087 | 1000025 |
11 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Chainpur | 12.708116 | 92.806193 | 1000026 |
12 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Harinagar | 12.671905 | 92.878907 | 1000028 |
13 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Mayabunder | 12.921971 | 92.897331 | 1000029 |
14 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Rampur | 12.518138 | 92.909912 | 1000032 |
15 | NORTH AND MIDDLE ANDAMAN | Mayabunder | Swadesh Nagar | 12.660179 | 92.887343 | 1000033 |
16 | NORTH AND MIDDLE ANDAMAN | Rangat | Bakultala | 12.503236 | 92.857219 | 1000034 |
17 | NORTH AND MIDDLE ANDAMAN | Rangat | Dasarathpur | 12.495893 | 92.914466 | 1000035 |
18 | NORTH AND MIDDLE ANDAMAN | Rangat | Kaushalya Nagar | 12.534930 | 92.824345 | 1000037 |
19 | NORTH AND MIDDLE ANDAMAN | Rangat | Nilambur | 12.170486 | 92.764054 | 1000039 |
20 | NORTH AND MIDDLE ANDAMAN | Rangat | Rangat | 12.506387 | 92.907397 | 1000042 |
21 | NORTH AND MIDDLE ANDAMAN | Rangat | Shivapuram | 12.614373 | 92.930490 | 1000044 |
22 | SOUTH ANDAMAN | Ferrargunj | BINDRABAN | 11.719855 | 92.655212 | 1000050 |
23 | SOUTH ANDAMAN | Ferrargunj | CHOULDARI | 11.630423 | 92.665056 | 1000051 |
24 | SOUTH ANDAMAN | Ferrargunj | COLLINPUR | 11.691887 | 92.603782 | 1000052 |
25 | SOUTH ANDAMAN | Ferrargunj | FERRARAGUNJ | 11.715603 | 92.652890 | 1000053 |
26 | SOUTH ANDAMAN | Ferrargunj | GUPTAPARA | 11.561501 | 92.658138 | 1000054 |
27 | SOUTH ANDAMAN | Ferrargunj | HOPE TOWN | 11.699442 | 92.726428 | 1000055 |
28 | SOUTH ANDAMAN | Ferrargunj | HUMFRYGUNJ | 11.593297 | 92.659985 | 1000056 |
29 | SOUTH ANDAMAN | Ferrargunj | KANYAPURAM | 11.732659 | 92.698093 | 1000057 |
... | ... | ... | ... | ... | ... | ... |
83292 | JHARGRAM | GOPIBALLAVPUR-I | GOPIBALLAVPUR | 22.207152 | 86.901588 | 1260991 |
83293 | JHARGRAM | GOPIBALLAVPUR-I | KENDUGARI | 21.975726 | 87.200100 | 1260992 |
83294 | JHARGRAM | GOPIBALLAVPUR-I | SARIA | 22.124500 | 86.855235 | 1260993 |
83295 | JHARGRAM | GOPIBALLAVPUR-I | SHASHRHA | 22.189836 | 86.796338 | 1260995 |
83296 | JHARGRAM | JAMBANI | CHILKIGARH | 22.449344 | 86.872883 | 1260996 |
83297 | JHARGRAM | JAMBANI | CHINCHRA | 22.280531 | 86.905032 | 1260997 |
83298 | JHARGRAM | JAMBANI | DHARSA | 22.519595 | 86.816080 | 1260998 |
83299 | JHARGRAM | JAMBANI | DUBRA | 22.136627 | 87.120604 | 1260999 |
83300 | JHARGRAM | JAMBANI | GIDHNI | 22.485793 | 86.858759 | 1261000 |
83301 | JHARGRAM | JAMBANI | JAMBONI | 21.871876 | 87.060891 | 1261001 |
83302 | JHARGRAM | JAMBANI | KAPGARI | 22.523286 | 86.876195 | 1261002 |
83303 | JHARGRAM | JAMBANI | KENDADANGRI | 22.173594 | 87.073594 | 1261003 |
83304 | JHARGRAM | JAMBANI | LALBANDH | 22.420880 | 86.830344 | 1261004 |
83305 | JHARGRAM | JAMBANI | PARIHATI | 22.284173 | 86.930019 | 1261005 |
83306 | JHARGRAM | JHARGRAM | AGUIBONI | 22.311781 | 86.943780 | 1261006 |
83307 | JHARGRAM | JHARGRAM | BANDHGORA | 22.189921 | 86.852231 | 1261007 |
83308 | JHARGRAM | JHARGRAM | DUDHKUNDI | 22.236526 | 87.109333 | 1261010 |
83309 | JHARGRAM | JHARGRAM | LODHASULI | 22.385609 | 86.996699 | 1261011 |
83310 | JHARGRAM | JHARGRAM | NEDABAHARA | 22.380763 | 86.995109 | 1261013 |
83311 | JHARGRAM | JHARGRAM | SHALBONI | 21.873296 | 87.084168 | 1261018 |
83312 | JHARGRAM | NAYAGRAM | BALIGERIA | 21.896406 | 87.079674 | 1261020 |
83313 | JHARGRAM | NAYAGRAM | BARAKHAKRI | 22.372433 | 87.154985 | 1261021 |
83314 | JHARGRAM | NAYAGRAM | BARANEGUI | 21.985373 | 87.111380 | 1261022 |
83315 | JHARGRAM | NAYAGRAM | BERAJAL | 21.979447 | 87.148221 | 1261023 |
83316 | JHARGRAM | NAYAGRAM | CHANDRAREKHA | 21.960476 | 87.063864 | 1261025 |
83317 | JHARGRAM | NAYAGRAM | JAMIRAPAL | 21.983613 | 87.210078 | 1261026 |
83318 | JHARGRAM | NAYAGRAM | KHARIKAMATHANI | 22.005273 | 87.146044 | 1261027 |
83319 | JHARGRAM | SANKRAIL | DHANGHORI | 22.147340 | 87.143844 | 1261033 |
83320 | JHARGRAM | SANKRAIL | KHUDMORAI | 21.976395 | 87.212614 | 1261034 |
83321 | JHARGRAM | SANKRAIL | RAGRAH | 22.181524 | 87.037965 | 1261038 |
83322 rows × 6 columns
# Filter non geocoded records
non_geocoded_df = non_geocoded_df[~non_geocoded_df['id'].isin(soundex_cenvilgeocoded_df.id)]
non_geocoded_df
District Name | Block Name | Panchayat Name | district_x | block_x | panchayat_x | id | |
---|---|---|---|---|---|---|---|
2 | NICOBARS | Campbell Bay | Great & Little Nicobar | N216 | C514 | G634 | 1000002 |
4 | NICOBARS | Nancowry | CHOWRA TC | N216 | N526 | C632 | 1000004 |
6 | NICOBARS | Nancowry | KATCHAL TC | N216 | N526 | K324 | 1000006 |
7 | NICOBARS | Nancowry | NANCOWRY TC | N216 | N526 | N526 | 1000007 |
8 | NICOBARS | Nancowry | TERESSA TC | N216 | N526 | T623 | 1000008 |
9 | NICOBARS | Nicobar | TRIBAL COUNCIL | N216 | N216 | T614 | 1000009 |
11 | NORTH AND MIDDLE ANDAMAN | Diglipur | Gandhinagar | N635 | D241 | G535 | 1000011 |
18 | NORTH AND MIDDLE ANDAMAN | Diglipur | Paschimsagar | N635 | D241 | P252 | 1000018 |
22 | NORTH AND MIDDLE ANDAMAN | Diglipur | Shibpur | N635 | D241 | S160 | 1000022 |
27 | NORTH AND MIDDLE ANDAMAN | Mayabunder | De-Reserve Area | N635 | M153 | D626 | 1000027 |
40 | NORTH AND MIDDLE ANDAMAN | Rangat | Nimbutala | N635 | R523 | N513 | 1000040 |
43 | NORTH AND MIDDLE ANDAMAN | Rangat | Sabari | N635 | R523 | S160 | 1000043 |
47 | NORTH AND MIDDLE ANDAMAN | Rangat | Uttara | N635 | R523 | U360 | 1000047 |
48 | SOUTH ANDAMAN | Ferrargunj | BAMBOOFLAT | S353 | F662 | B511 | 1000048 |
49 | SOUTH ANDAMAN | Ferrargunj | BAMBOOFLAT-II | S353 | F662 | B511 | 1000049 |
61 | SOUTH ANDAMAN | Ferrargunj | SHOAL BAY | S353 | F662 | S410 | 1000061 |
67 | SOUTH ANDAMAN | Little Andaman | HUT BAY | S353 | L345 | H310 | 1000067 |
69 | SOUTH ANDAMAN | Little Andaman | R.K.PUR | S353 | L345 | R216 | 1000069 |
71 | SOUTH ANDAMAN | Little Andaman | V.K PUR | S353 | L345 | V216 | 1000071 |
87 | ANANTAPUR | Agali | RAGELINGANAHALLI | A553 | A240 | R245 | 1000087 |
99 | ANANTAPUR | Amarapuram | ALDAHALLI | A553 | A561 | A434 | 1000099 |
109 | ANANTAPUR | Anantapur | A.NARAYANAPURAM | A553 | A553 | A565 | 1000109 |
110 | ANANTAPUR | Anantapur | AKKAMPALLI | A553 | A553 | A251 | 1000110 |
111 | ANANTAPUR | Anantapur | AKUTHOTAPALLI | A553 | A553 | A233 | 1000111 |
129 | ANANTAPUR | Anantapur | RAJIV COLONY | A553 | A553 | R212 | 1000129 |
130 | ANANTAPUR | Anantapur | RUDRAMPETA | A553 | A553 | R365 | 1000130 |
131 | ANANTAPUR | Anantapur | SOMALADODDI | A553 | A553 | S543 | 1000131 |
135 | ANANTAPUR | Atmakur | B.YALERU | A553 | A352 | B460 | 1000135 |
139 | ANANTAPUR | Atmakur | ODDUPALLI | A553 | A352 | O314 | 1000139 |
140 | ANANTAPUR | Atmakur | P.SIDDARAMPURAM | A553 | A352 | P236 | 1000140 |
... | ... | ... | ... | ... | ... | ... | ... |
260913 | PURBA BARDHAMAN | RAINA-II | BARABAINAN | P611 | R500 | B615 | 1260913 |
260914 | PURBA BARDHAMAN | RAINA-II | GOTAN | P611 | R500 | G350 | 1260914 |
260915 | PURBA BARDHAMAN | RAINA-II | KAITY | P611 | R500 | K300 | 1260915 |
260916 | PURBA BARDHAMAN | RAINA-II | PAHALANPUR | P611 | R500 | P451 | 1260916 |
260917 | PURBA BARDHAMAN | RAINA-II | PAINTA-I | P611 | R500 | P530 | 1260917 |
260918 | PURBA BARDHAMAN | RAINA-II | PAINTA-II | P611 | R500 | P530 | 1260918 |
260919 | PURBA BARDHAMAN | RAINA-II | UCHALAN | P611 | R500 | U245 | 1260919 |
260920 | KALIMPONG | GARUBATHAN | AAHALEY | K451 | G613 | A400 | 1260920 |
260921 | KALIMPONG | GARUBATHAN | DALIM | K451 | G613 | D450 | 1260921 |
260924 | KALIMPONG | GARUBATHAN | KUMAI | K451 | G613 | K500 | 1260924 |
260925 | KALIMPONG | GARUBATHAN | NIM | K451 | G613 | N500 | 1260925 |
260927 | KALIMPONG | GARUBATHAN | POKHREYBONG | K451 | G613 | P261 | 1260927 |
260928 | KALIMPONG | GARUBATHAN | RONGO | K451 | G613 | R520 | 1260928 |
260932 | KALIMPONG | KALIMPONG-I | BONG | K451 | K451 | B520 | 1260932 |
260933 | KALIMPONG | KALIMPONG-I | DR. GRAHAMS HOMES | K451 | K451 | D626 | 1260933 |
260937 | KALIMPONG | KALIMPONG-I | LOWER ECHHAY | K451 | K451 | L620 | 1260937 |
260939 | KALIMPONG | KALIMPONG-I | PABRINGTAR | K451 | K451 | P165 | 1260939 |
260943 | KALIMPONG | KALIMPONG-I | SEOKBIR | K451 | K451 | S216 | 1260943 |
260947 | KALIMPONG | KALIMPONG-I | TISTA | K451 | K451 | T230 | 1260947 |
260948 | KALIMPONG | KALIMPONG-I | UPPER ECHHAY | K451 | K451 | U162 | 1260948 |
260950 | KALIMPONG | KALIMPONG-II | GITABLING | K451 | K451 | G314 | 1260950 |
260951 | KALIMPONG | KALIMPONG-II | KAGE | K451 | K451 | K200 | 1260951 |
260953 | KALIMPONG | KALIMPONG-II | LAVA-GITABEONG | K451 | K451 | L123 | 1260953 |
260954 | KALIMPONG | KALIMPONG-II | LINGSEY | K451 | K451 | L520 | 1260954 |
260956 | KALIMPONG | KALIMPONG-II | LOLEY | K451 | K451 | L400 | 1260956 |
260957 | KALIMPONG | KALIMPONG-II | PAYONG | K451 | K451 | P520 | 1260957 |
260959 | KALIMPONG | KALIMPONG-II | SHANGSE | K451 | K451 | S520 | 1260959 |
260975 | JHARGRAM | BINPUR-II | BOSHPAHARI | J626 | B516 | B216 | 1260975 |
260980 | JHARGRAM | BINPUR-II | SHILDA | J626 | B516 | S430 | 1260980 |
260994 | JHARGRAM | GOPIBALLAVPUR-I | SATMA | J626 | G114 | S350 | 1260994 |
51821 rows × 7 columns
# To be Geocoded by google
non_geocoded_df.to_csv(os.path.join(output_folder, 'tobe_google_geocoded.csv'), encoding='utf-8', index=False)
non_geocoded_df
District Name | Block Name | Panchayat Name | district_x | block_x | panchayat_x | id | |
---|---|---|---|---|---|---|---|
2 | NICOBARS | Campbell Bay | Great & Little Nicobar | N216 | C514 | G634 | 1000002 |
4 | NICOBARS | Nancowry | CHOWRA TC | N216 | N526 | C632 | 1000004 |
6 | NICOBARS | Nancowry | KATCHAL TC | N216 | N526 | K324 | 1000006 |
7 | NICOBARS | Nancowry | NANCOWRY TC | N216 | N526 | N526 | 1000007 |
8 | NICOBARS | Nancowry | TERESSA TC | N216 | N526 | T623 | 1000008 |
9 | NICOBARS | Nicobar | TRIBAL COUNCIL | N216 | N216 | T614 | 1000009 |
11 | NORTH AND MIDDLE ANDAMAN | Diglipur | Gandhinagar | N635 | D241 | G535 | 1000011 |
18 | NORTH AND MIDDLE ANDAMAN | Diglipur | Paschimsagar | N635 | D241 | P252 | 1000018 |
22 | NORTH AND MIDDLE ANDAMAN | Diglipur | Shibpur | N635 | D241 | S160 | 1000022 |
27 | NORTH AND MIDDLE ANDAMAN | Mayabunder | De-Reserve Area | N635 | M153 | D626 | 1000027 |
40 | NORTH AND MIDDLE ANDAMAN | Rangat | Nimbutala | N635 | R523 | N513 | 1000040 |
43 | NORTH AND MIDDLE ANDAMAN | Rangat | Sabari | N635 | R523 | S160 | 1000043 |
47 | NORTH AND MIDDLE ANDAMAN | Rangat | Uttara | N635 | R523 | U360 | 1000047 |
48 | SOUTH ANDAMAN | Ferrargunj | BAMBOOFLAT | S353 | F662 | B511 | 1000048 |
49 | SOUTH ANDAMAN | Ferrargunj | BAMBOOFLAT-II | S353 | F662 | B511 | 1000049 |
61 | SOUTH ANDAMAN | Ferrargunj | SHOAL BAY | S353 | F662 | S410 | 1000061 |
67 | SOUTH ANDAMAN | Little Andaman | HUT BAY | S353 | L345 | H310 | 1000067 |
69 | SOUTH ANDAMAN | Little Andaman | R.K.PUR | S353 | L345 | R216 | 1000069 |
71 | SOUTH ANDAMAN | Little Andaman | V.K PUR | S353 | L345 | V216 | 1000071 |
87 | ANANTAPUR | Agali | RAGELINGANAHALLI | A553 | A240 | R245 | 1000087 |
99 | ANANTAPUR | Amarapuram | ALDAHALLI | A553 | A561 | A434 | 1000099 |
109 | ANANTAPUR | Anantapur | A.NARAYANAPURAM | A553 | A553 | A565 | 1000109 |
110 | ANANTAPUR | Anantapur | AKKAMPALLI | A553 | A553 | A251 | 1000110 |
111 | ANANTAPUR | Anantapur | AKUTHOTAPALLI | A553 | A553 | A233 | 1000111 |
129 | ANANTAPUR | Anantapur | RAJIV COLONY | A553 | A553 | R212 | 1000129 |
130 | ANANTAPUR | Anantapur | RUDRAMPETA | A553 | A553 | R365 | 1000130 |
131 | ANANTAPUR | Anantapur | SOMALADODDI | A553 | A553 | S543 | 1000131 |
135 | ANANTAPUR | Atmakur | B.YALERU | A553 | A352 | B460 | 1000135 |
139 | ANANTAPUR | Atmakur | ODDUPALLI | A553 | A352 | O314 | 1000139 |
140 | ANANTAPUR | Atmakur | P.SIDDARAMPURAM | A553 | A352 | P236 | 1000140 |
... | ... | ... | ... | ... | ... | ... | ... |
260913 | PURBA BARDHAMAN | RAINA-II | BARABAINAN | P611 | R500 | B615 | 1260913 |
260914 | PURBA BARDHAMAN | RAINA-II | GOTAN | P611 | R500 | G350 | 1260914 |
260915 | PURBA BARDHAMAN | RAINA-II | KAITY | P611 | R500 | K300 | 1260915 |
260916 | PURBA BARDHAMAN | RAINA-II | PAHALANPUR | P611 | R500 | P451 | 1260916 |
260917 | PURBA BARDHAMAN | RAINA-II | PAINTA-I | P611 | R500 | P530 | 1260917 |
260918 | PURBA BARDHAMAN | RAINA-II | PAINTA-II | P611 | R500 | P530 | 1260918 |
260919 | PURBA BARDHAMAN | RAINA-II | UCHALAN | P611 | R500 | U245 | 1260919 |
260920 | KALIMPONG | GARUBATHAN | AAHALEY | K451 | G613 | A400 | 1260920 |
260921 | KALIMPONG | GARUBATHAN | DALIM | K451 | G613 | D450 | 1260921 |
260924 | KALIMPONG | GARUBATHAN | KUMAI | K451 | G613 | K500 | 1260924 |
260925 | KALIMPONG | GARUBATHAN | NIM | K451 | G613 | N500 | 1260925 |
260927 | KALIMPONG | GARUBATHAN | POKHREYBONG | K451 | G613 | P261 | 1260927 |
260928 | KALIMPONG | GARUBATHAN | RONGO | K451 | G613 | R520 | 1260928 |
260932 | KALIMPONG | KALIMPONG-I | BONG | K451 | K451 | B520 | 1260932 |
260933 | KALIMPONG | KALIMPONG-I | DR. GRAHAMS HOMES | K451 | K451 | D626 | 1260933 |
260937 | KALIMPONG | KALIMPONG-I | LOWER ECHHAY | K451 | K451 | L620 | 1260937 |
260939 | KALIMPONG | KALIMPONG-I | PABRINGTAR | K451 | K451 | P165 | 1260939 |
260943 | KALIMPONG | KALIMPONG-I | SEOKBIR | K451 | K451 | S216 | 1260943 |
260947 | KALIMPONG | KALIMPONG-I | TISTA | K451 | K451 | T230 | 1260947 |
260948 | KALIMPONG | KALIMPONG-I | UPPER ECHHAY | K451 | K451 | U162 | 1260948 |
260950 | KALIMPONG | KALIMPONG-II | GITABLING | K451 | K451 | G314 | 1260950 |
260951 | KALIMPONG | KALIMPONG-II | KAGE | K451 | K451 | K200 | 1260951 |
260953 | KALIMPONG | KALIMPONG-II | LAVA-GITABEONG | K451 | K451 | L123 | 1260953 |
260954 | KALIMPONG | KALIMPONG-II | LINGSEY | K451 | K451 | L520 | 1260954 |
260956 | KALIMPONG | KALIMPONG-II | LOLEY | K451 | K451 | L400 | 1260956 |
260957 | KALIMPONG | KALIMPONG-II | PAYONG | K451 | K451 | P520 | 1260957 |
260959 | KALIMPONG | KALIMPONG-II | SHANGSE | K451 | K451 | S520 | 1260959 |
260975 | JHARGRAM | BINPUR-II | BOSHPAHARI | J626 | B516 | B216 | 1260975 |
260980 | JHARGRAM | BINPUR-II | SHILDA | J626 | B516 | S430 | 1260980 |
260994 | JHARGRAM | GOPIBALLAVPUR-I | SATMA | J626 | G114 | S350 | 1260994 |
51821 rows × 7 columns
# Read Google geocoded data
g1 = pd.read_csv(os.path.join(data_folder, 'google', 'geocoded1.csv'), encoding = "ISO-8859-1")
g2 = pd.read_csv(os.path.join(data_folder, 'google', 'geocoded2.csv'), encoding = "ISO-8859-1")
g3 = pd.read_csv(os.path.join(data_folder, 'google', 'geocoded3.csv'), encoding = "ISO-8859-1")
g4 = pd.read_csv(os.path.join(data_folder, 'google', 'geocoded4.csv'), encoding = "ISO-8859-1")
google_geocoded_df = pd.concat([g1, g2, g3, g4])
google_geocoded_df['lat'] = google_geocoded_df['_latitude']
google_geocoded_df['lon'] = google_geocoded_df['_longitude']
google_geocoded_df = google_geocoded_df[['id', 'lat', 'lon']]
google_geocoded_df = pd.merge(non_geocoded_df, google_geocoded_df, how='inner', on='id')
google_geocoded_df = google_geocoded_df[['District Name', 'Block Name', 'Panchayat Name', 'lat', 'lon', 'id']].drop_duplicates(subset='id', keep="first", inplace=False).reset_index(drop=True)
google_geocoded_df
District Name | Block Name | Panchayat Name | lat | lon | id | |
---|---|---|---|---|---|---|
0 | NICOBARS | Campbell Bay | Great & Little Nicobar | 7.325778 | 93.692753 | 1000002 |
1 | NICOBARS | Nancowry | CHOWRA TC | 8.456021 | 93.042598 | 1000004 |
2 | NICOBARS | Nancowry | KATCHAL TC | 7.967549 | 93.359045 | 1000006 |
3 | NICOBARS | Nancowry | NANCOWRY TC | 7.976840 | 93.550907 | 1000007 |
4 | NICOBARS | Nancowry | TERESSA TC | 8.241989 | 93.127785 | 1000008 |
5 | NICOBARS | Nicobar | TRIBAL COUNCIL | 7.120540 | 93.784150 | 1000009 |
6 | NORTH AND MIDDLE ANDAMAN | Diglipur | Gandhinagar | 23.215635 | 72.636941 | 1000011 |
7 | NORTH AND MIDDLE ANDAMAN | Diglipur | Paschimsagar | 13.264189 | 92.882137 | 1000018 |
8 | NORTH AND MIDDLE ANDAMAN | Diglipur | Shibpur | 13.242794 | 93.030098 | 1000022 |
9 | NORTH AND MIDDLE ANDAMAN | Mayabunder | De-Reserve Area | 12.913057 | 92.897743 | 1000027 |
10 | NORTH AND MIDDLE ANDAMAN | Rangat | Nimbutala | 12.500218 | 92.961879 | 1000040 |
11 | NORTH AND MIDDLE ANDAMAN | Rangat | Sabari | 12.506952 | 92.913850 | 1000043 |
12 | NORTH AND MIDDLE ANDAMAN | Rangat | Uttara | 12.506952 | 92.913850 | 1000047 |
13 | SOUTH ANDAMAN | Ferrargunj | BAMBOOFLAT | 11.720196 | 92.656157 | 1000048 |
14 | SOUTH ANDAMAN | Ferrargunj | BAMBOOFLAT-II | 11.720196 | 92.656157 | 1000049 |
15 | SOUTH ANDAMAN | Ferrargunj | SHOAL BAY | 11.720196 | 92.656157 | 1000061 |
16 | SOUTH ANDAMAN | Little Andaman | HUT BAY | 10.744887 | 92.499992 | 1000067 |
17 | SOUTH ANDAMAN | Little Andaman | R.K.PUR | 10.696474 | 92.566856 | 1000069 |
18 | SOUTH ANDAMAN | Little Andaman | V.K PUR | 10.744887 | 92.499992 | 1000071 |
19 | ANANTAPUR | Agali | RAGELINGANAHALLI | 13.785969 | 77.052848 | 1000087 |
20 | ANANTAPUR | Amarapuram | ALDAHALLI | 14.128138 | 76.980148 | 1000099 |
21 | ANANTAPUR | Anantapur | A.NARAYANAPURAM | 14.703305 | 77.583434 | 1000109 |
22 | ANANTAPUR | Anantapur | AKKAMPALLI | 14.690438 | 77.544029 | 1000110 |
23 | ANANTAPUR | Anantapur | AKUTHOTAPALLI | 14.618106 | 77.635714 | 1000111 |
24 | ANANTAPUR | Anantapur | RAJIV COLONY | 14.698476 | 77.601766 | 1000129 |
25 | ANANTAPUR | Anantapur | RUDRAMPETA | 14.670000 | 77.576270 | 1000130 |
26 | ANANTAPUR | Anantapur | SOMALADODDI | 14.725955 | 77.602347 | 1000131 |
27 | ANANTAPUR | Atmakur | B.YALERU | 14.629375 | 77.507526 | 1000135 |
28 | ANANTAPUR | Atmakur | ODDUPALLI | 14.646694 | 77.358284 | 1000139 |
29 | ANANTAPUR | Atmakur | P.SIDDARAMPURAM | 14.270619 | 77.865313 | 1000140 |
... | ... | ... | ... | ... | ... | ... |
44068 | PURBA BARDHAMAN | RAINA-II | BARABAINAN | 23.008667 | 87.935931 | 1260913 |
44069 | PURBA BARDHAMAN | RAINA-II | GOTAN | 22.978767 | 87.926126 | 1260914 |
44070 | PURBA BARDHAMAN | RAINA-II | KAITY | 23.014856 | 87.841469 | 1260915 |
44071 | PURBA BARDHAMAN | RAINA-II | PAHALANPUR | 22.960127 | 87.867251 | 1260916 |
44072 | PURBA BARDHAMAN | RAINA-II | PAINTA-I | 22.978526 | 87.797236 | 1260917 |
44073 | PURBA BARDHAMAN | RAINA-II | PAINTA-II | 22.978526 | 87.797236 | 1260918 |
44074 | PURBA BARDHAMAN | RAINA-II | UCHALAN | 23.030366 | 87.778793 | 1260919 |
44075 | KALIMPONG | GARUBATHAN | AAHALEY | 26.954219 | 88.695244 | 1260920 |
44076 | KALIMPONG | GARUBATHAN | DALIM | 26.954219 | 88.695244 | 1260921 |
44077 | KALIMPONG | GARUBATHAN | KUMAI | 27.002462 | 88.827501 | 1260924 |
44078 | KALIMPONG | GARUBATHAN | NIM | 26.954219 | 88.695244 | 1260925 |
44079 | KALIMPONG | GARUBATHAN | POKHREYBONG | 26.958845 | 88.180356 | 1260927 |
44080 | KALIMPONG | GARUBATHAN | RONGO | 27.042234 | 88.833503 | 1260928 |
44081 | KALIMPONG | KALIMPONG-I | BONG | 27.059356 | 88.469454 | 1260932 |
44082 | KALIMPONG | KALIMPONG-I | DR. GRAHAMS HOMES | 27.083741 | 88.491369 | 1260933 |
44083 | KALIMPONG | KALIMPONG-I | LOWER ECHHAY | 27.059356 | 88.469454 | 1260937 |
44084 | KALIMPONG | KALIMPONG-I | PABRINGTAR | 27.059356 | 88.469454 | 1260939 |
44085 | KALIMPONG | KALIMPONG-I | SEOKBIR | 27.034829 | 88.519896 | 1260943 |
44086 | KALIMPONG | KALIMPONG-I | TISTA | 27.057825 | 88.465992 | 1260947 |
44087 | KALIMPONG | KALIMPONG-I | UPPER ECHHAY | 27.079489 | 88.518920 | 1260948 |
44088 | KALIMPONG | KALIMPONG-II | GITABLING | 27.059356 | 88.469454 | 1260950 |
44089 | KALIMPONG | KALIMPONG-II | KAGE | 27.059356 | 88.469454 | 1260951 |
44090 | KALIMPONG | KALIMPONG-II | LAVA-GITABEONG | 27.081617 | 88.520462 | 1260953 |
44091 | KALIMPONG | KALIMPONG-II | LINGSEY | 27.059356 | 88.469454 | 1260954 |
44092 | KALIMPONG | KALIMPONG-II | LOLEY | 27.059356 | 88.469454 | 1260956 |
44093 | KALIMPONG | KALIMPONG-II | PAYONG | 27.059356 | 88.469454 | 1260957 |
44094 | KALIMPONG | KALIMPONG-II | SHANGSE | 27.059356 | 88.469454 | 1260959 |
44095 | JHARGRAM | BINPUR-II | BOSHPAHARI | 22.583006 | 86.918585 | 1260975 |
44096 | JHARGRAM | BINPUR-II | SHILDA | 22.583006 | 86.918585 | 1260980 |
44097 | JHARGRAM | GOPIBALLAVPUR-I | SATMA | 22.207909 | 86.765519 | 1260994 |
44098 rows × 6 columns
# Agregate all geocodes
#google_geocoded_df = pd.read_csv(os.path.join(data_folder, 'google_geocoded.csv'))
geocoded_df = pd.concat([gp_geocoded_df, cenvil_geocoded_df, google_geocoded_df, soundex_cenvilgeocoded_df, soundex_gpgeocoded_df])
geocoded_df = geocoded_df.drop_duplicates(subset='id', keep="first", inplace=False).reset_index(drop=True)
geocoded_df.to_csv(os.path.join(output_folder, 'geocoded_panchayats.csv'), encoding='utf-8', index=False)
geocoded_df
District Name | Block Name | Panchayat Name | lat | lon | id | |
---|---|---|---|---|---|---|
0 | ANANTAPUR | Agali | HULIKERADEVARAHALLI | 13.768624 | 77.009729 | 1000080 |
1 | ANANTAPUR | Agali | MADHUDI | 13.810206 | 77.025746 | 1000084 |
2 | ANANTAPUR | Agali | RAVUDI | 13.841146 | 77.019506 | 1000088 |
3 | ANANTAPUR | Amadagur | CHEEKIREVULAPALLE | 13.874734 | 78.041962 | 1000090 |
4 | ANANTAPUR | Amadagur | KASSAMUDRAM | 13.933423 | 78.014464 | 1000094 |
5 | ANANTAPUR | Amadagur | MOHAMMADABAD | 13.967322 | 78.010266 | 1000095 |
6 | ANANTAPUR | Amadagur | THUMMALA | 13.929267 | 78.064326 | 1000097 |
7 | ANANTAPUR | Amarapuram | HALUKURU | 14.090250 | 77.001711 | 1000102 |
8 | ANANTAPUR | Amarapuram | HEMAVATHI | 14.033064 | 76.986075 | 1000103 |
9 | ANANTAPUR | Amarapuram | NIDRAGATTA | 14.037968 | 76.954373 | 1000105 |
10 | ANANTAPUR | Amarapuram | SIVARAM | 14.032437 | 77.023079 | 1000106 |
11 | ANANTAPUR | Anantapur | KATIGANIKALVA | 14.612996 | 77.554218 | 1000121 |
12 | ANANTAPUR | Anantapur | KURUGUNTA | 14.674663 | 77.519245 | 1000124 |
13 | ANANTAPUR | Anantapur | MANNILA | 14.556448 | 77.686479 | 1000125 |
14 | ANANTAPUR | Anantapur | THATICHERLA | 14.752598 | 77.571765 | 1000132 |
15 | ANANTAPUR | Atmakur | MADIGUBBA | 14.638847 | 77.453057 | 1000137 |
16 | ANANTAPUR | Atmakur | THOPUDURTHI | 14.552931 | 77.492462 | 1000147 |
17 | ANANTAPUR | Bathalapalle | APPARACHERUVU | 14.479485 | 77.798019 | 1000149 |
18 | ANANTAPUR | Bathalapalle | BATHALAPALLE | 14.504290 | 77.765405 | 1000150 |
19 | ANANTAPUR | Bathalapalle | MALYAVANTHAM | 14.569889 | 77.768676 | 1000154 |
20 | ANANTAPUR | Bathalapalle | OBULAPURAM | 14.458996 | 77.843963 | 1000157 |
21 | ANANTAPUR | Beluguppa | AVULENNA | 14.683234 | 77.153240 | 1000162 |
22 | ANANTAPUR | Beluguppa | BELUGUPPA | 14.712986 | 77.136826 | 1000163 |
23 | ANANTAPUR | Beluguppa | DUDDEKUNTA | 14.680413 | 77.220876 | 1000164 |
24 | ANANTAPUR | Beluguppa | GANGAVARAM | 14.640860 | 77.207144 | 1000165 |
25 | ANANTAPUR | Beluguppa | KALVAPALLE | 14.625519 | 77.267957 | 1000167 |
26 | ANANTAPUR | Beluguppa | KONAMPALLE | 14.669387 | 77.262099 | 1000168 |
27 | ANANTAPUR | Beluguppa | THAGGUPARTHY | 14.725207 | 77.198641 | 1000176 |
28 | ANANTAPUR | Beluguppa | YERRAGUDI | 14.702239 | 77.176537 | 1000178 |
29 | ANANTAPUR | Bommanahal | BANDUR | 14.949310 | 77.008596 | 1000179 |
... | ... | ... | ... | ... | ... | ... |
253288 | RAE BARELI | DALMAU | UBARANI | 26.109635 | 81.157392 | 1259030 |
253289 | RAE BARELI | DALMAU | UMRAMAU | 26.175899 | 81.060630 | 1259031 |
253290 | RAE BARELI | SHIVGARH | BADAVAR | 26.530307 | 81.271646 | 1259035 |
253291 | RAE BARELI | SHIVGARH | BANKAGARH | 26.544911 | 81.169259 | 1259038 |
253292 | RAE BARELI | SHIVGARH | BHAWANIGARH | 26.534063 | 81.222904 | 1259043 |
253293 | RAE BARELI | SHIVGARH | DEHLI | 26.585485 | 81.209077 | 1259046 |
253294 | RAE BARELI | SHIVGARH | DHODHWAPUR | 26.549008 | 81.217018 | 1259048 |
253295 | RAE BARELI | SHIVGARH | JAGDEESHPUR | 26.502278 | 81.253706 | 1259053 |
253296 | RAE BARELI | SHIVGARH | NARAINPUR | 26.490754 | 81.234027 | 1259059 |
253297 | RAE BARELI | SHIVGARH | NARETHUWA | 26.534429 | 81.325634 | 1259060 |
253298 | RAE BARELI | SHIVGARH | PIPARI | 26.551922 | 81.252683 | 1259064 |
253299 | RAE BARELI | SHIVGARH | RAIPUR NERUA | 26.518928 | 81.234014 | 1259066 |
253300 | RAE BARELI | SHIVGARH | RAMPUR KHAS | 26.506288 | 81.244942 | 1259068 |
253301 | RAE BARELI | SHIVGARH | RAMPUR TIKRA | 26.506288 | 81.244942 | 1259069 |
253302 | RAE BARELI | SHIVGARH | SHIVGARH | 26.544690 | 81.243923 | 1259073 |
253303 | RAE BARELI | SHIVGARH | SHIVLI | 26.548611 | 81.231550 | 1259074 |
253304 | SANT RAVIDAS NAGAR | Gyanpur | Geraee | 25.406200 | 82.431304 | 1259076 |
253305 | SANT RAVIDAS NAGAR | Gyanpur | Joginka | 25.395939 | 82.470273 | 1259077 |
253306 | SANT RAVIDAS NAGAR | Gyanpur | Sonkhari | 25.341214 | 82.409071 | 1259081 |
253307 | UDAM SINGH NAGAR | Bajpur | Barwala | 29.105141 | 79.181473 | 1259119 |
253308 | UDAM SINGH NAGAR | Sitarganj | Salmata | 28.905476 | 79.755871 | 1259121 |
253309 | SANGRUR | DHURI | BHASOURH | 30.354323 | 75.782513 | 1259147 |
253310 | BAREILLY | BITHIRI CHAINPUR | KOHNI | 28.375967 | 79.532901 | 1260508 |
253311 | JAUNPUR | BAKSHA | MAHIMAPUR DEEH | 25.821474 | 82.542160 | 1260518 |
253312 | JAUNPUR | BAKSHA | SHESHPURA | 25.817252 | 82.470473 | 1260524 |
253313 | SITAPUR | GONDLAMAU | CHAUPARIAY | 27.295590 | 80.696476 | 1260531 |
253314 | SITAPUR | GONDLAMAU | KUMAUGRANT | 27.390243 | 80.632652 | 1260537 |
253315 | SITAPUR | GONDLAMAU | MAHMADPUR JHABRA | 27.281080 | 80.661790 | 1260539 |
253316 | UNNAO | BIGHAPUR | RAJAPUR PIPRAHA | 26.303163 | 80.703166 | 1260544 |
253317 | NAWANSHAHR | NAWANSHAHR | BHAURA | 31.120617 | 76.200179 | 1260662 |
253318 rows × 6 columns