# Import Libraries
import pandas as pd
import geopandas as gpd
import lxml
import os
import glob
import time
import datetime
import json
import itertools
# Set Output Folder
output_folder = os.path.abspath("output")
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# Import data folders
data_folder = os.path.abspath("data")
# Helping Functions
def remove_consecutive_duplicates(x):
return ''.join(i for i, _ in itertools.groupby(x))
# Read Scrapped Data
scrapped_folder = os.path.join(data_folder, "Scrapped")
scrapped_files = os.listdir(scrapped_folder)
print("data found for these years:")
i = 1
df_list = []
for file in scrapped_files:
print(i,") ",file)
file_path = os.path.join(scrapped_folder, file)
df = pd.read_csv(file_path, low_memory=False)
df_list.append(df)
i+=1
## merge all the files
df = pd.concat(df_list).reset_index(drop=True)
data found for these years: 1 ) 2009-2010_Scrapped_Cleaned_Data.csv 2 ) 2010-2011_Scrapped_Cleaned_Data.csv 3 ) 2011-2012_Scrapped_Cleaned_Data.csv 4 ) 2012-2013_Scrapped_Cleaned_Data.csv 5 ) 2013-2014_Scrapped_Cleaned_Data.csv 6 ) 2014-2015_Scrapped_Cleaned_Data.csv 7 ) 2015-2016_Scrapped_Cleaned_Data.csv 8 ) 2016-2017_Scrapped_Cleaned_Data.csv 9 ) 2017-2018_Scrapped_Cleaned_Data.csv
C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\ipykernel_launcher.py:14: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version of pandas will change to not sort by default. To accept the future behavior, pass 'sort=False'. To retain the current behavior and silence the warning, pass 'sort=True'.
df
Block | District | Financial Year | GEN Pop | Habit Category | Habitation | LPCD as on 01/04/2013 | LWE | Minority Blocks | Minority Districts | ... | Status as on 1/4/2013 | Status as on 1/4/2014 | Status as on 1/4/2015 | Status as on 1/4/2016 | Status as on 1/4/2017 | Status as on date | Sub Category | Tot Pop | Unnamed: 17 | Village | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -999 | BAHRAICH | 2009-2010 | 40.0 | No. Of Habitation With 100% Population Coverage | Abdullah Ganj?nanpara | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 42 | NaN | -999 |
1 | -999 | BAHRAICH | 2009-2010 | 52.0 | No. Of Habitation With 100% Population Coverage | Chakia/rupaideeha | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 58 | NaN | -999 |
2 | -999 | BAHRAICH | 2009-2010 | 71.0 | No. Of Habitation With 100% Population Coverage | Motipurr Renj | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 86 | NaN | -999 |
3 | -999 | BAHRAICH | 2009-2010 | 91.0 | No. Of Habitation With 100% Population Coverage | Dharampur/ Murtiha | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 115 | NaN | -999 |
4 | -999 | BAHRAICH | 2009-2010 | 409.0 | No. Of Habitation With 100% Population Coverage | Nishan Gara Renj | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 522 | NaN | -999 |
5 | -999 | BAHRAICH | 2009-2010 | 1514.0 | No. Of Habitation With 100% Population Coverage | Kakraha Renj | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 1761 | NaN | -999 |
6 | -999 | BAHRAICH | 2009-2010 | 4874.0 | No. Of Habitation With 100% Population Coverage | Katarnia Ghat | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 5347 | NaN | -999 |
7 | -999 | BAKSHA | 2009-2010 | 0.0 | No. Of Habitation With 100% Population Coverage | Boro Suba | NaN | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 350 | NaN | Tatikuchi |
8 | -999 | BAKSHA | 2009-2010 | 0.0 | No. Of Habitation With 100% Population Coverage | Dakhin Suba | NaN | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 116 | NaN | -999 |
9 | -999 | BAKSHA | 2009-2010 | 0.0 | No. Of Habitation With Population Coverage >= ... | Sathisamuka | NaN | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 349 | NaN | Majar Khat |
10 | -999 | BAKSHA | 2009-2010 | 18.0 | No. Of Habitation With 100% Population Coverage | Pub Suba (boro Suba) | NaN | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 108 | NaN | -999 |
11 | -999 | BAKSHA | 2009-2010 | 45.0 | No. Of Habitation With 100% Population Coverage | Uttar Suba Majorkuchi | NaN | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 284 | NaN | -999 |
12 | -999 | BAKSHA | 2009-2010 | 96.0 | No. Of Habitation With 100% Population Coverage | Karkabari Suba | NaN | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 299 | NaN | Majar Khat |
13 | -999 | BAKSHA | 2009-2010 | 100.0 | No. Of Habitation With Population Coverage >= ... | Boro Suba Majorbari | NaN | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 543 | NaN | Bebejiapara |
14 | -999 | BAKSHA | 2009-2010 | 133.0 | No. Of Habitation With Population Coverage >= ... | Pub Bebejiapara Suba(bagaribari) | NaN | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 733 | NaN | Bebejiapara |
15 | -999 | BAKSHA | 2009-2010 | 142.0 | No. Of Habitation With Population Coverage >= ... | Pachim Bebejiapara Suba | NaN | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 508 | NaN | Bebejiapara |
16 | -999 | BAKSHA | 2009-2010 | 192.0 | No. Of Habitation With Population Coverage >= ... | Majorkhat Suba | NaN | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 395 | NaN | Majar Khat |
17 | -999 | BAKSHA | 2009-2010 | 240.0 | No. Of Habitation With Population Coverage >= ... | Auniati Suba | NaN | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 587 | NaN | Bebejiapara |
18 | -999 | BAKSHA | 2009-2010 | 466.0 | No. Of Habitation With Population Coverage >= ... | Ganakpara | NaN | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 466 | NaN | Bebejiapara |
19 | -999 | BAKSHA | 2009-2010 | 980.0 | No. Of Habitation With 100% Population Coverage | Unnekuri | NaN | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 1024 | NaN | Unnekuri |
20 | -999 | BARPETA | 2009-2010 | 0.0 | No. Of Habitation With 100% Population Coverage | Pub Kathalmuri | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 156 | NaN | Pub Kathal Muri |
21 | -999 | BARPETA | 2009-2010 | 21.0 | No. Of Habitation With Population Coverage >= ... | Ulubari Suba | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 283 | NaN | Pub-rehabari |
22 | -999 | BARPETA | 2009-2010 | 59.0 | No. Of Habitation With Population Coverage >= ... | Pub Boro Suba | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 273 | NaN | Pub-rehabari |
23 | -999 | BARPETA | 2009-2010 | 107.0 | No. Of Habitation With 100% Population Coverage | Das Suba | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 152 | NaN | Bar Manikpur |
24 | -999 | BARPETA | 2009-2010 | 107.0 | No. Of Habitation With 100% Population Coverage | Pama Suba (bar Manikpur) | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 107 | NaN | Bar Manikpur |
25 | -999 | BARPETA | 2009-2010 | 107.0 | No. Of Habitation With 100% Population Coverage | Rehabari Suba (pachim) | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 107 | NaN | Pub-rehabari |
26 | -999 | BARPETA | 2009-2010 | 107.0 | No. Of Habitation With Population Coverage >= ... | Dongpar Suba | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 107 | NaN | Chaibari |
27 | -999 | BARPETA | 2009-2010 | 112.0 | No. Of Habitation With 100% Population Coverage | Gomura Suba (chemtia) | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 112 | NaN | Chemtia |
28 | -999 | BARPETA | 2009-2010 | 115.0 | No. Of Habitation With 100% Population Coverage | Musalman Suba | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 115 | NaN | Chemtia |
29 | -999 | BARPETA | 2009-2010 | 130.0 | No. Of Habitation With 100% Population Coverage | Bangali Suba | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | NaN | -999 | As on 01/04/2009 | 162 | NaN | Batia Mari |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
7365227 | Ziro-ii Cd Block | LOWER SUBANSIRI | 2017-2018 | 3.0 | No. Of Habitation With 100% Population Coverage | Lower Talo | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | FC | -999 | As on 01/04/2017 | 268 | NaN | Lower Talo |
7365228 | Ziro-ii Cd Block | LOWER SUBANSIRI | 2017-2018 | 3.0 | No. Of Habitation With Population Coverage >= ... | Upper Talo | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 148 | NaN | Upper Talo |
7365229 | Ziro-ii Cd Block | LOWER SUBANSIRI | 2017-2018 | 4.0 | No. Of Habitation With 100% Population Coverage | Pochu | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | FC | -999 | As on 01/04/2017 | 67 | NaN | Pochu |
7365230 | Ziro-ii Cd Block | LOWER SUBANSIRI | 2017-2018 | 9.0 | No. Of Habitation With Population Coverage > 0... | Yuj | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 169 | NaN | Yuj |
7365231 | Ziro-ii Cd Block | LOWER SUBANSIRI | 2017-2018 | 21.0 | No. Of Habitation With Population Coverage > 0... | Puk | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 227 | NaN | Puk |
7365232 | Ziro-ii Cd Block | LOWER SUBANSIRI | 2017-2018 | 108.0 | No. Of Habitation With Population Coverage >= ... | Lumri | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 108 | NaN | Lumri |
7365233 | Ziro-ii Cd Block | LOWER SUBANSIRI | 2017-2018 | 159.0 | No. Of Habitation With Population Coverage >= ... | Possa | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 503 | NaN | Possa |
7365234 | Ziro-ii Cd Block | LOWER SUBANSIRI | 2017-2018 | 230.0 | No. Of Habitation With 100% Population Coverage | Peni | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | FC | -999 | As on 01/04/2017 | 529 | NaN | Peni |
7365235 | Ziro-ii Cd Block | LOWER SUBANSIRI | 2017-2018 | 670.0 | No. Of Habitation With Population Coverage >= ... | Neepco Camp | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 793 | NaN | Neepco Camp. |
7365236 | Ziro-ii Cd Block | LOWER SUBANSIRI | 2017-2018 | 788.0 | No. Of Habitation With Population Coverage >= ... | Yazali T/ship | NaN | 0 | 0 | 1 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 1768 | NaN | Yazali T/ship |
7365237 | Zunheboto | ZUNHEBOTO | 2017-2018 | 0.0 | No. Of Habitation With Population Coverage > 0... | Hekiye | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 130 | NaN | Hekiye |
7365238 | Zunheboto | ZUNHEBOTO | 2017-2018 | 0.0 | No. Of Habitation With Population Coverage > 0... | Kheshepu | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 91 | NaN | Kheshepu |
7365239 | Zunheboto | ZUNHEBOTO | 2017-2018 | 0.0 | No. Of Habitation With Population Coverage > 0... | Newland | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 291 | NaN | New Land |
7365240 | Zunheboto | ZUNHEBOTO | 2017-2018 | 1.0 | No. Of Habitation With 100% Population Coverage | Baimbho | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | FC | -999 | As on 01/04/2017 | 549 | NaN | Baimbho |
7365241 | Zunheboto | ZUNHEBOTO | 2017-2018 | 1.0 | No. Of Habitation With 100% Population Coverage | Natha Old | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | FC | -999 | As on 01/04/2017 | 546 | NaN | Natha Old |
7365242 | Zunheboto | ZUNHEBOTO | 2017-2018 | 1.0 | No. Of Habitation With Population Coverage >= ... | Lochomi | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 921 | NaN | Lochomi |
7365243 | Zunheboto | ZUNHEBOTO | 2017-2018 | 1.0 | No. Of Habitation With Population Coverage >= ... | Lizu New | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 513 | NaN | Lizu New |
7365244 | Zunheboto | ZUNHEBOTO | 2017-2018 | 2.0 | No. Of Habitation With 100% Population Coverage | Kawoto | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | FC | -999 | As on 01/04/2017 | 381 | NaN | Kawoto |
7365245 | Zunheboto | ZUNHEBOTO | 2017-2018 | 2.0 | No. Of Habitation With 100% Population Coverage | Lizu Old | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | FC | -999 | As on 01/04/2017 | 770 | NaN | Lizu Old |
7365246 | Zunheboto | ZUNHEBOTO | 2017-2018 | 2.0 | No. Of Habitation With 100% Population Coverage | Lizu Old | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | FC | FC | Ach | 770 | NaN | Lizu Old |
7365247 | Zunheboto | ZUNHEBOTO | 2017-2018 | 2.0 | No. Of Habitation With Population Coverage > 0... | Yezami | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 773 | NaN | Yezami |
7365248 | Zunheboto | ZUNHEBOTO | 2017-2018 | 3.0 | No. Of Habitation With 100% Population Coverage | Shotomi | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | FC | -999 | As on 01/04/2017 | 407 | NaN | Shotomi |
7365249 | Zunheboto | ZUNHEBOTO | 2017-2018 | 5.0 | No. Of Habitation With Population Coverage >= ... | Asukhumi | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 1406 | NaN | Asukhumi |
7365250 | Zunheboto | ZUNHEBOTO | 2017-2018 | 5.0 | No. Of Habitation With Population Coverage >= ... | Asukhumi | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | PC | FC | Ach | 1406 | NaN | Asukhumi |
7365251 | Zunheboto | ZUNHEBOTO | 2017-2018 | 6.0 | No. Of Habitation With Population Coverage >= ... | Lizu Aviqato | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 512 | NaN | Lizu Aviqato |
7365252 | Zunheboto | ZUNHEBOTO | 2017-2018 | 9.0 | No. Of Habitation With Population Coverage > 0... | Yemishe | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 702 | NaN | Yemishe |
7365253 | Zunheboto | ZUNHEBOTO | 2017-2018 | 9.0 | No. Of Habitation With Population Coverage >= ... | Sheipu | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 980 | NaN | Sheipu |
7365254 | Zunheboto | ZUNHEBOTO | 2017-2018 | 10.0 | No. Of Habitation With Population Coverage >= ... | Natha New | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 466 | NaN | Natha New |
7365255 | Zunheboto | ZUNHEBOTO | 2017-2018 | 11.0 | No. Of Habitation With Population Coverage > 0... | Lizu Naghuto | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 631 | NaN | Lizu Naghuto |
7365256 | Zunheboto | ZUNHEBOTO | 2017-2018 | 23.0 | No. Of Habitation With Population Coverage > 0... | Sukhalu | NaN | 0 | 1 | 0 | ... | NaN | NaN | NaN | NaN | PC | -999 | As on 01/04/2017 | 523 | NaN | Sukhalu |
7365257 rows × 31 columns
# For Geocoding
df = df[['Village', 'District', 'State']]
df = df[df['Village'] != '-999']
df = df.drop_duplicates(subset=None, keep="first", inplace=False)
df = df.reset_index(drop=True)
to_geocode_df = df
df = to_geocode_df.copy()
df['Village_x'] = df['Village'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
df['District_x'] = df['District'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
df['code_x'] = df.index + 1
## Output to CSV
#df.to_csv(os.path.join(output_folder, "gp_to_geocode.csv"), encoding='utf-8', index=False)
df
Village | District | State | Village_x | District_x | code_x | |
---|---|---|---|---|---|---|
0 | Tatikuchi | BAKSHA | ASSAM | TATIKUCHI | BAKSHA | 1 |
1 | Majar Khat | BAKSHA | ASSAM | MAJARKHAT | BAKSHA | 2 |
2 | Bebejiapara | BAKSHA | ASSAM | BEBEJIAPARA | BAKSHA | 3 |
3 | Unnekuri | BAKSHA | ASSAM | UNEKURI | BAKSHA | 4 |
4 | Pub Kathal Muri | BARPETA | ASSAM | PUBKATHALMURI | BARPETA | 5 |
5 | Pub-rehabari | BARPETA | ASSAM | PUBREHABARI | BARPETA | 6 |
6 | Bar Manikpur | BARPETA | ASSAM | BARMANIKPUR | BARPETA | 7 |
7 | Chaibari | BARPETA | ASSAM | CHAIBARI | BARPETA | 8 |
8 | Chemtia | BARPETA | ASSAM | CHEMTIA | BARPETA | 9 |
9 | Batia Mari | BARPETA | ASSAM | BATIAMARI | BARPETA | 10 |
10 | Baghmara | BARPETA | ASSAM | BAGHMARA | BARPETA | 11 |
11 | Pachim Kathalmuri(kathalguri) | BARPETA | ASSAM | PACHIMKATHALMURIKATHALGURI | BARPETA | 12 |
12 | Gergeria | BARPETA | ASSAM | GERGERIA | BARPETA | 13 |
13 | Madhapur | BARPETA | ASSAM | MADHAPUR | BARPETA | 14 |
14 | Kathalmuri Ghat | BARPETA | ASSAM | KATHALMURIGHAT | BARPETA | 15 |
15 | Dakshin Rehabari | BARPETA | ASSAM | DAKSHINREHABARI | BARPETA | 16 |
16 | Hallang Bari | BARPETA | ASSAM | HALANGBARI | BARPETA | 17 |
17 | Barsidhani | BARPETA | ASSAM | BARSIDHANI | BARPETA | 18 |
18 | Chengli Mari | BARPETA | ASSAM | CHENGLIMARI | BARPETA | 19 |
19 | Chungapota | BONGAIGAON | ASSAM | CHUNGAPOTA | BONGAIGAON | 20 |
20 | Tirimari | CHIRANG | ASSAM | TIRIMARI | CHIRANG | 21 |
21 | Nalbari | CHIRANG | ASSAM | NALBARI | CHIRANG | 22 |
22 | Khagrabari | CHIRANG | ASSAM | KHAGRABARI | CHIRANG | 23 |
23 | Basugaon | CHIRANG | ASSAM | BASUGAON | CHIRANG | 24 |
24 | Bagaribari | DARRANG | ASSAM | BAGARIBARI | DARANG | 25 |
25 | Saikiapara | DARRANG | ASSAM | SAIKIAPARA | DARANG | 26 |
26 | Borzamuguri | DARRANG | ASSAM | BORZAMUGURI | DARANG | 27 |
27 | Bez Bhageti | DARRANG | ASSAM | BEZBHAGETI | DARANG | 28 |
28 | Khagrabari (khagrabari No.2) | DARRANG | ASSAM | KHAGRABARIKHAGRABARINO | DARANG | 29 |
29 | Dewanpukhuri | DARRANG | ASSAM | DEWANPUKHURI | DARANG | 30 |
... | ... | ... | ... | ... | ... | ... |
416483 | Anegunta(034 ) | SANGAREDDY(23) | TELANGANA | ANEGUNTA | SANGAREDY | 416484 |
416484 | Govindpur(039 ) | SANGAREDDY(23) | TELANGANA | GOVINDPUR | SANGAREDY | 416485 |
416485 | Jadimalkapur(036) | SANGAREDDY(23) | TELANGANA | JADIMALKAPUR | SANGAREDY | 416486 |
416486 | Mannapur(024 ) | SANGAREDDY(23) | TELANGANA | MANAPUR | SANGAREDY | 416487 |
416487 | Mogudampally(022 ) | SANGAREDDY(23) | TELANGANA | MOGUDAMPALY | SANGAREDY | 416488 |
416488 | Raipally-t(031) | SANGAREDDY(23) | TELANGANA | RAIPALYT | SANGAREDY | 416489 |
416489 | Parwathapur(035) | SANGAREDDY(23) | TELANGANA | PARWATHAPUR | SANGAREDY | 416490 |
416490 | Thumkunta(006 ) | SANGAREDDY(23) | TELANGANA | THUMKUNTA | SANGAREDY | 416491 |
416491 | Pastapur(010 ) | SANGAREDDY(23) | TELANGANA | PASTAPUR | SANGAREDY | 416492 |
416492 | Godgarpally(016 ) | SANGAREDDY(23) | TELANGANA | GODGARPALY | SANGAREDY | 416493 |
416493 | Khanjamalpur(017) | SANGAREDDY(23) | TELANGANA | KHANJAMALPUR | SANGAREDY | 416494 |
416494 | Raipally D(011) | SANGAREDDY(23) | TELANGANA | RAIPALYD | SANGAREDY | 416495 |
416495 | Ippepally(023 ) | SANGAREDDY(23) | TELANGANA | IPEPALY | SANGAREDY | 416496 |
416496 | Auranganagar(020 ) | SANGAREDDY(23) | TELANGANA | AURANGANAGAR | SANGAREDY | 416497 |
416497 | Didgi(008 ) | SANGAREDDY(23) | TELANGANA | DIDGI | SANGAREDY | 416498 |
416498 | Burdipad(005 ) | SANGAREDDY(23) | TELANGANA | BURDIPAD | SANGAREDY | 416499 |
416499 | Asadgunj(014 ) | SANGAREDDY(23) | TELANGANA | ASADGUNJ | SANGAREDY | 416500 |
416500 | Gopanpalluy(015 ) | SANGAREDDY(23) | TELANGANA | GOPANPALUY | SANGAREDY | 416501 |
416501 | Khasimpur(013 ) | SANGAREDDY(23) | TELANGANA | KHASIMPUR | SANGAREDY | 416502 |
416502 | Algol(009 ) | SANGAREDDY(23) | TELANGANA | ALGOL | SANGAREDY | 416503 |
416503 | Chinna Hyderabad(027) | SANGAREDDY(23) | TELANGANA | CHINAHYDERABAD | SANGAREDY | 416504 |
416504 | Madgi(001 ) | SANGAREDDY(23) | TELANGANA | MADGI | SANGAREDY | 416505 |
416505 | Kothur-b(007) | SANGAREDDY(23) | TELANGANA | KOTHURB | SANGAREDY | 416506 |
416506 | Jadimalkapur (037) | SANGAREDDY(23) | TELANGANA | JADIMALKAPUR | SANGAREDY | 416507 |
416507 | Chiragpally(002 ) | SANGAREDDY(23) | TELANGANA | CHIRAGPALY | SANGAREDY | 416508 |
416508 | Buchinelly(004 ) | SANGAREDDY(23) | TELANGANA | BUCHINELY | SANGAREDY | 416509 |
416509 | Satwar(003 ) | SANGAREDDY(23) | TELANGANA | SATWAR | SANGAREDY | 416510 |
416510 | Dhanasiri(018 ) | SANGAREDDY(23) | TELANGANA | DHANASIRI | SANGAREDY | 416511 |
416511 | Ranjhole(030 ) | SANGAREDDY(23) | TELANGANA | RANJHOLE | SANGAREDY | 416512 |
416512 | Allipur(029 ) | SANGAREDDY(23) | TELANGANA | ALIPUR | SANGAREDY | 416513 |
416513 rows × 6 columns
# Read Scrapped Data
match_folder = os.path.join(data_folder, "VILLAGE_ASSEMBLY_PC_match")
file_path = os.path.join(match_folder, "merge.xlsx")
match_df = pd.read_excel(file_path)
#match_df = match_df[['District Name', 'Village Name']]
match_df['District_y'] = match_df['District Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
match_df['Village_y'] = match_df['Village Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
match_df['code_y'] = match_df.index + 1
match_df
S.No. | Assembly Constituency Code | Assembly Constituency ECI Code | Assembly Constituency Name | Block Code | Block Name | District Census 2011 Code | District Code | District Name | Localbody Code | ... | State Name | Subdistrict Census 2011 Code | Subdistrict Code | Subdistrict Name | Village Census 2011 Code | Village Code | Village Name | District_y | Village_y | code_y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 198997 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587861 | 587861 | Allavaram | EASTGODAVARI | ALAVARAM | 1 |
1 | 2 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199007 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587861 | 587861 | Allavaram | EASTGODAVARI | ALAVARAM | 2 |
2 | 3 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 198998 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587871 | 587871 | Bendamurulanka | EASTGODAVARI | BENDAMURULANKA | 3 |
3 | 4 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199016 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587871 | 587871 | Bendamurulanka | EASTGODAVARI | BENDAMURULANKA | 4 |
4 | 5 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 198999 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587860 | 587860 | Bodasakurru | EASTGODAVARI | BODASAKURU | 5 |
5 | 6 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199000 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587865 | 587865 | Devaguptam | EASTGODAVARI | DEVAGUPTAM | 6 |
6 | 7 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199006 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587865 | 587865 | Devaguptam | EASTGODAVARI | DEVAGUPTAM | 7 |
7 | 8 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199001 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587865 | 587865 | Devaguptam | EASTGODAVARI | DEVAGUPTAM | 8 |
8 | 9 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199002 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587868 | 587868 | Godi | EASTGODAVARI | GODI | 9 |
9 | 10 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199004 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587868 | 587868 | Godi | EASTGODAVARI | GODI | 10 |
10 | 11 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199003 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587867 | 587867 | Godilanka | EASTGODAVARI | GODILANKA | 11 |
11 | 12 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199005 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587862 | 587862 | Gudala | EASTGODAVARI | GUDALA | 12 |
12 | 13 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199008 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587872 | 587872 | Komaragiripatnam | EASTGODAVARI | KOMARAGIRIPATNAM | 13 |
13 | 14 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199014 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587872 | 587872 | Komaragiripatnam | EASTGODAVARI | KOMARAGIRIPATNAM | 14 |
14 | 15 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199009 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587870 | 587870 | Mogallamuru | EASTGODAVARI | MOGALAMURU | 15 |
15 | 16 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199011 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587866 | 587866 | Rellugadda | EASTGODAVARI | RELUGADA | 16 |
16 | 17 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199010 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587873 | 587873 | Samanthakuru | EASTGODAVARI | SAMANTHAKURU | 17 |
17 | 18 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199012 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587873 | 587873 | Samanthakuru | EASTGODAVARI | SAMANTHAKURU | 18 |
18 | 19 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199013 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587863 | 587863 | Tadikona | EASTGODAVARI | TADIKONA | 19 |
19 | 20 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199015 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587869 | 587869 | Thurupulanka | EASTGODAVARI | THURUPULANKA | 20 |
20 | 21 | 3166 | 163 | Amalapuram | 4868 | ALLAVARAM | 545 | 505 | EAST GODAVARI | 199017 | ... | ANDHRA PRADESH | 4940 | 4940 | Allavaram | 587864 | 587864 | Yentrikona | EASTGODAVARI | YENTRIKONA | 21 |
21 | 22 | 3166 | 163 | Amalapuram | 4869 | AMALAPURAM | 545 | 505 | EAST GODAVARI | 199018 | ... | ANDHRA PRADESH | 4941 | 4941 | Amalapuram | 587877 | 587877 | A. Vemavaram | EASTGODAVARI | AVEMAVARAM | 22 |
22 | 23 | 3166 | 163 | Amalapuram | 4869 | AMALAPURAM | 545 | 505 | EAST GODAVARI | 199019 | ... | ANDHRA PRADESH | 4941 | 4941 | Amalapuram | 0 | 912647 | A. Vemavarappadu | EASTGODAVARI | AVEMAVARAPADU | 23 |
23 | 24 | 3166 | 163 | Amalapuram | 4869 | AMALAPURAM | 545 | 505 | EAST GODAVARI | 199020 | ... | ANDHRA PRADESH | 4941 | 4941 | Amalapuram | 0 | 912648 | Bandarulanka | EASTGODAVARI | BANDARULANKA | 24 |
24 | 25 | 3166 | 163 | Amalapuram | 4869 | AMALAPURAM | 545 | 505 | EAST GODAVARI | 199021 | ... | ANDHRA PRADESH | 4941 | 4941 | Amalapuram | 587878 | 587878 | Bhatnavilli | EASTGODAVARI | BHATNAVILI | 25 |
25 | 26 | 3166 | 163 | Amalapuram | 4869 | AMALAPURAM | 545 | 505 | EAST GODAVARI | 199033 | ... | ANDHRA PRADESH | 4941 | 4941 | Amalapuram | 587878 | 587878 | Bhatnavilli | EASTGODAVARI | BHATNAVILI | 26 |
26 | 27 | 3166 | 163 | Amalapuram | 4869 | AMALAPURAM | 545 | 505 | EAST GODAVARI | 199023 | ... | ANDHRA PRADESH | 4941 | 4941 | Amalapuram | 587881 | 587881 | Edarapalle | EASTGODAVARI | EDARAPALE | 27 |
27 | 28 | 3166 | 163 | Amalapuram | 4869 | AMALAPURAM | 545 | 505 | EAST GODAVARI | 199024 | ... | ANDHRA PRADESH | 4941 | 4941 | Amalapuram | 587883 | 587883 | Gunnapalle Agraharam | EASTGODAVARI | GUNAPALEAGRAHARAM | 28 |
28 | 29 | 3166 | 163 | Amalapuram | 4869 | AMALAPURAM | 545 | 505 | EAST GODAVARI | 199025 | ... | ANDHRA PRADESH | 4941 | 4941 | Amalapuram | 587886 | 587886 | Immidivarappadu | EASTGODAVARI | IMIDIVARAPADU | 29 |
29 | 30 | 3166 | 163 | Amalapuram | 4869 | AMALAPURAM | 545 | 505 | EAST GODAVARI | 199026 | ... | ANDHRA PRADESH | 4941 | 4941 | Amalapuram | 587882 | 587882 | Indupalle | EASTGODAVARI | INDUPALE | 30 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
477590 | 65503 | 996 | 398 | Chunar | 1407 | NARAINPUR | 199 | 170 | MIRZAPUR | 79890 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212463 | 212463 | Meerapur | MIRZAPUR | MERAPUR | 477591 |
477591 | 65504 | 996 | 398 | Chunar | 1411 | SHIKHAR | 199 | 170 | MIRZAPUR | 80088 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212381 | 212381 | Meghupur | MIRZAPUR | MEGHUPUR | 477592 |
477592 | 65505 | 996 | 398 | Chunar | 1411 | SHIKHAR | 199 | 170 | MIRZAPUR | 80081 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212402 | 212402 | Meria | MIRZAPUR | MERIA | 477593 |
477593 | 65506 | 996 | 398 | Chunar | 1402 | JAMALPUR | 199 | 170 | MIRZAPUR | 79571 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212762 | 212762 | Milki Mu. Barawa | MIRZAPUR | MILKIMUBARAWA | 477594 |
477594 | 65507 | 996 | 398 | Chunar | 1402 | JAMALPUR | 199 | 170 | MIRZAPUR | 79520 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 0 | 904102 | Milki Nip | MIRZAPUR | MILKINIP | 477595 |
477595 | 65508 | 996 | 398 | Chunar | 1402 | JAMALPUR | 199 | 170 | MIRZAPUR | 79537 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212755 | 212755 | Milki Nisf Mu. Gogahara | MIRZAPUR | MILKINISFMUGOGAHARA | 477596 |
477596 | 65509 | 996 | 398 | Chunar | 1407 | NARAINPUR | 199 | 170 | MIRZAPUR | 79853 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212627 | 212627 | Mirapur | MIRZAPUR | MIRAPUR | 477597 |
477597 | 65510 | 996 | 398 | Chunar | 1402 | JAMALPUR | 199 | 170 | MIRZAPUR | 79573 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212644 | 212644 | Mirzapur Khurd | MIRZAPUR | MIRZAPURKHURD | 477598 |
477598 | 65511 | 996 | 398 | Chunar | 1411 | SHIKHAR | 199 | 170 | MIRZAPUR | 80080 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212406 | 212406 | Misirpur | MIRZAPUR | MISIRPUR | 477599 |
477599 | 65512 | 996 | 398 | Chunar | 1407 | NARAINPUR | 199 | 170 | MIRZAPUR | 79856 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212550 | 212550 | Mohamadabad | MIRZAPUR | MOHAMADABAD | 477600 |
477600 | 65513 | 996 | 398 | Chunar | 1407 | NARAINPUR | 199 | 170 | MIRZAPUR | 79825 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212461 | 212461 | Mohammadpur | MIRZAPUR | MOHAMADPUR | 477601 |
477601 | 65514 | 996 | 398 | Chunar | 1407 | NARAINPUR | 199 | 170 | MIRZAPUR | 79883 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212565 | 212565 | Mohammadpur Urf Dayalpur | MIRZAPUR | MOHAMADPURURFDAYALPUR | 477602 |
477602 | 65515 | 996 | 398 | Chunar | 1407 | NARAINPUR | 199 | 170 | MIRZAPUR | 79854 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212629 | 212629 | Moharpur | MIRZAPUR | MOHARPUR | 477603 |
477603 | 65516 | 996 | 398 | Chunar | 1402 | JAMALPUR | 199 | 170 | MIRZAPUR | 79565 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 0 | 904112 | Mohinipur | MIRZAPUR | MOHINIPUR | 477604 |
477604 | 65517 | 996 | 398 | Chunar | 1407 | NARAINPUR | 199 | 170 | MIRZAPUR | 79854 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212630 | 212630 | Moinuddinpur | MIRZAPUR | MOINUDINPUR | 477605 |
477605 | 65518 | 996 | 398 | Chunar | 1411 | SHIKHAR | 199 | 170 | MIRZAPUR | 80062 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212392 | 212392 | Molnapur | MIRZAPUR | MOLNAPUR | 477606 |
477606 | 65519 | 996 | 398 | Chunar | 1411 | SHIKHAR | 199 | 170 | MIRZAPUR | 267093 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212403 | 212403 | Muinuddinpur | MIRZAPUR | MUINUDINPUR | 477607 |
477607 | 65520 | 996 | 398 | Chunar | 1402 | JAMALPUR | 199 | 170 | MIRZAPUR | 79504 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212651 | 212651 | Mujaffarpur | MIRZAPUR | MUJAFARPUR | 477608 |
477608 | 65521 | 996 | 398 | Chunar | 1407 | NARAINPUR | 199 | 170 | MIRZAPUR | 79855 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212637 | 212637 | Mujdih | MIRZAPUR | MUJDIH | 477609 |
477609 | 65522 | 996 | 398 | Chunar | 1411 | SHIKHAR | 199 | 170 | MIRZAPUR | 80082 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212339 | 212339 | Mukundpur | MIRZAPUR | MUKUNDPUR | 477610 |
477610 | 65523 | 996 | 398 | Chunar | 1402 | JAMALPUR | 199 | 170 | MIRZAPUR | 79574 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212758 | 212758 | Murhua | MIRZAPUR | MURHUA | 477611 |
477611 | 65524 | 996 | 398 | Chunar | 1407 | NARAINPUR | 199 | 170 | MIRZAPUR | 79856 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212532 | 212532 | Musepur | MIRZAPUR | MUSEPUR | 477612 |
477612 | 65525 | 996 | 398 | Chunar | 1411 | SHIKHAR | 199 | 170 | MIRZAPUR | 80074 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212428 | 212428 | Muzahidpur | MIRZAPUR | MUZAHIDPUR | 477613 |
477613 | 65526 | 996 | 398 | Chunar | 1407 | NARAINPUR | 199 | 170 | MIRZAPUR | 79841 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212534 | 212534 | Naithi Pachawa | MIRZAPUR | NAITHIPACHAWA | 477614 |
477614 | 65527 | 996 | 398 | Chunar | 1407 | NARAINPUR | 199 | 170 | MIRZAPUR | 79857 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212584 | 212584 | Nakahara | MIRZAPUR | NAKAHARA | 477615 |
477615 | 65528 | 996 | 398 | Chunar | 1407 | NARAINPUR | 199 | 170 | MIRZAPUR | 79858 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212506 | 212506 | Narayanpur | MIRZAPUR | NARAYANPUR | 477616 |
477616 | 65529 | 996 | 398 | Chunar | 1402 | JAMALPUR | 199 | 170 | MIRZAPUR | 79588 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212785 | 212785 | Naudiha | MIRZAPUR | NAUDIHA | 477617 |
477617 | 65530 | 996 | 398 | Chunar | 1407 | NARAINPUR | 199 | 170 | MIRZAPUR | 269179 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212579 | 212579 | Naugaraha | MIRZAPUR | NAUGARAHA | 477618 |
477618 | 65531 | 996 | 398 | Chunar | 1407 | NARAINPUR | 199 | 170 | MIRZAPUR | 79887 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212616 | 212616 | Newada | MIRZAPUR | NEWADA | 477619 |
477619 | 65532 | 996 | 398 | Chunar | 1407 | NARAINPUR | 199 | 170 | MIRZAPUR | 79859 | ... | UTTAR PRADESH | 1003 | 1003 | Chunar | 212609 | 212609 | Nibi | MIRZAPUR | NIBI | 477620 |
477620 rows × 24 columns
geocoded_df = pd.merge(df, match_df, how='inner', left_on=['Village_x', 'District_x'], right_on = ['Village_y', 'District_y'])
geocoded_df = geocoded_df.drop_duplicates(subset='code_x', keep='first', inplace=False)
# Output to CSV
geocoded_df
geocoded_df.to_csv(os.path.join(output_folder, "geocoded_villages.csv"), encoding='utf-8', index=False)
non_geocoded_df = df[~df['code_x'].isin(geocoded_df['code_x'])]
# Output to CSV
non_geocoded_df['District to geocode'] = non_geocoded_df['District'].str.upper().str.replace('([^A-Za-z ]+)', '')
non_geocoded_df['Village to geocode'] = non_geocoded_df['Village'].str.upper().str.replace('([^A-Za-z ]+)', '')
non_geocoded_df['State to geocode'] = non_geocoded_df['State'].str.upper().str.replace('([^A-Za-z ]+)', '')
non_geocoded_df.to_csv(os.path.join(output_folder, "non_geocoded_villages.csv"), encoding='utf-8', index=False)
non_geocoded_df
C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\ipykernel_launcher.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy This is separate from the ipykernel package so we can avoid doing imports until C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\ipykernel_launcher.py:4: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy after removing the cwd from sys.path. C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\ipykernel_launcher.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy """
Village | District | State | Village_x | District_x | code_x | District to geocode | Village to geocode | State to geocode | |
---|---|---|---|---|---|---|---|---|---|
0 | Tatikuchi | BAKSHA | ASSAM | TATIKUCHI | BAKSHA | 1 | BAKSHA | TATIKUCHI | ASSAM |
1 | Majar Khat | BAKSHA | ASSAM | MAJARKHAT | BAKSHA | 2 | BAKSHA | MAJAR KHAT | ASSAM |
2 | Bebejiapara | BAKSHA | ASSAM | BEBEJIAPARA | BAKSHA | 3 | BAKSHA | BEBEJIAPARA | ASSAM |
3 | Unnekuri | BAKSHA | ASSAM | UNEKURI | BAKSHA | 4 | BAKSHA | UNNEKURI | ASSAM |
20 | Tirimari | CHIRANG | ASSAM | TIRIMARI | CHIRANG | 21 | CHIRANG | TIRIMARI | ASSAM |
21 | Nalbari | CHIRANG | ASSAM | NALBARI | CHIRANG | 22 | CHIRANG | NALBARI | ASSAM |
22 | Khagrabari | CHIRANG | ASSAM | KHAGRABARI | CHIRANG | 23 | CHIRANG | KHAGRABARI | ASSAM |
23 | Basugaon | CHIRANG | ASSAM | BASUGAON | CHIRANG | 24 | CHIRANG | BASUGAON | ASSAM |
24 | Bagaribari | DARRANG | ASSAM | BAGARIBARI | DARANG | 25 | DARRANG | BAGARIBARI | ASSAM |
25 | Saikiapara | DARRANG | ASSAM | SAIKIAPARA | DARANG | 26 | DARRANG | SAIKIAPARA | ASSAM |
26 | Borzamuguri | DARRANG | ASSAM | BORZAMUGURI | DARANG | 27 | DARRANG | BORZAMUGURI | ASSAM |
27 | Bez Bhageti | DARRANG | ASSAM | BEZBHAGETI | DARANG | 28 | DARRANG | BEZ BHAGETI | ASSAM |
28 | Khagrabari (khagrabari No.2) | DARRANG | ASSAM | KHAGRABARIKHAGRABARINO | DARANG | 29 | DARRANG | KHAGRABARI KHAGRABARI NO | ASSAM |
29 | Dewanpukhuri | DARRANG | ASSAM | DEWANPUKHURI | DARANG | 30 | DARRANG | DEWANPUKHURI | ASSAM |
30 | Kawi Mari | DARRANG | ASSAM | KAWIMARI | DARANG | 31 | DARRANG | KAWI MARI | ASSAM |
31 | Barara | DARRANG | ASSAM | BARARA | DARANG | 32 | DARRANG | BARARA | ASSAM |
32 | Sub-mahaliapara | DARRANG | ASSAM | SUBMAHALIAPARA | DARANG | 33 | DARRANG | SUBMAHALIAPARA | ASSAM |
33 | Kuhiarkuchi | DARRANG | ASSAM | KUHIARKUCHI | DARANG | 34 | DARRANG | KUHIARKUCHI | ASSAM |
34 | Gomthapara | DARRANG | ASSAM | GOMTHAPARA | DARANG | 35 | DARRANG | GOMTHAPARA | ASSAM |
35 | Bholaguri | DARRANG | ASSAM | BHOLAGURI | DARANG | 36 | DARRANG | BHOLAGURI | ASSAM |
36 | Sabukdhara | DARRANG | ASSAM | SABUKDHARA | DARANG | 37 | DARRANG | SABUKDHARA | ASSAM |
37 | No.1 Bainara Satra | DARRANG | ASSAM | NOBAINARASATRA | DARANG | 38 | DARRANG | NO BAINARA SATRA | ASSAM |
38 | Sareng | DARRANG | ASSAM | SARENG | DARANG | 39 | DARRANG | SARENG | ASSAM |
39 | No.2 Boinara Satra | DARRANG | ASSAM | NOBOINARASATRA | DARANG | 40 | DARRANG | NO BOINARA SATRA | ASSAM |
40 | Jhargaon | DARRANG | ASSAM | JHARGAON | DARANG | 41 | DARRANG | JHARGAON | ASSAM |
41 | Barnadijhar | DARRANG | ASSAM | BARNADIJHAR | DARANG | 42 | DARRANG | BARNADIJHAR | ASSAM |
42 | Athiabari Khat | DARRANG | ASSAM | ATHIABARIKHAT | DARANG | 43 | DARRANG | ATHIABARI KHAT | ASSAM |
43 | Barangabari | DARRANG | ASSAM | BARANGABARI | DARANG | 44 | DARRANG | BARANGABARI | ASSAM |
44 | Salaipara | DARRANG | ASSAM | SALAIPARA | DARANG | 45 | DARRANG | SALAIPARA | ASSAM |
45 | S. Hetoi | DIMAPUR | NAGALAND | SHETOI | DIMAPUR | 46 | DIMAPUR | S HETOI | NAGALAND |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
416468 | Venkatapur(003 ) | JANGOAN(04) | TELANGANA | VENKATAPUR | JANGOAN | 416469 | JANGOAN | VENKATAPUR | TELANGANA |
416469 | Shapally(014 ) | JANGOAN(04) | TELANGANA | SHAPALY | JANGOAN | 416470 | JANGOAN | SHAPALLY | TELANGANA |
416471 | Raghunathapally(002 ) | JANGOAN(04) | TELANGANA | RAGHUNATHAPALY | JANGOAN | 416472 | JANGOAN | RAGHUNATHAPALLY | TELANGANA |
416472 | Thammadapalli-i(006) | JANGOAN(04) | TELANGANA | THAMADAPALI | JANGOAN | 416473 | JANGOAN | THAMMADAPALLII | TELANGANA |
416473 | Thammadapally-g(015) | JANGOAN(04) | TELANGANA | THAMADAPALYG | JANGOAN | 416474 | JANGOAN | THAMMADAPALLYG | TELANGANA |
416474 | Kunur(005 ) | JANGOAN(04) | TELANGANA | KUNUR | JANGOAN | 416475 | JANGOAN | KUNUR | TELANGANA |
416476 | Gudpally-t(019) | SANGAREDDY(23) | TELANGANA | GUDPALYT | SANGAREDY | 416477 | SANGAREDDY | GUDPALLYT | TELANGANA |
416477 | Mogudampally(022) | SANGAREDDY(23) | TELANGANA | MOGUDAMPALY | SANGAREDY | 416478 | SANGAREDDY | MOGUDAMPALLY | TELANGANA |
416478 | Shaikapur(033 ) | SANGAREDDY(23) | TELANGANA | SHAIKAPUR | SANGAREDY | 416479 | SANGAREDDY | SHAIKAPUR | TELANGANA |
416480 | Hothi-k(028) | SANGAREDDY(23) | TELANGANA | HOTHIK | SANGAREDY | 416481 | SANGAREDDY | HOTHIK | TELANGANA |
416484 | Govindpur(039 ) | SANGAREDDY(23) | TELANGANA | GOVINDPUR | SANGAREDY | 416485 | SANGAREDDY | GOVINDPUR | TELANGANA |
416485 | Jadimalkapur(036) | SANGAREDDY(23) | TELANGANA | JADIMALKAPUR | SANGAREDY | 416486 | SANGAREDDY | JADIMALKAPUR | TELANGANA |
416487 | Mogudampally(022 ) | SANGAREDDY(23) | TELANGANA | MOGUDAMPALY | SANGAREDY | 416488 | SANGAREDDY | MOGUDAMPALLY | TELANGANA |
416488 | Raipally-t(031) | SANGAREDDY(23) | TELANGANA | RAIPALYT | SANGAREDY | 416489 | SANGAREDDY | RAIPALLYT | TELANGANA |
416489 | Parwathapur(035) | SANGAREDDY(23) | TELANGANA | PARWATHAPUR | SANGAREDY | 416490 | SANGAREDDY | PARWATHAPUR | TELANGANA |
416490 | Thumkunta(006 ) | SANGAREDDY(23) | TELANGANA | THUMKUNTA | SANGAREDY | 416491 | SANGAREDDY | THUMKUNTA | TELANGANA |
416491 | Pastapur(010 ) | SANGAREDDY(23) | TELANGANA | PASTAPUR | SANGAREDY | 416492 | SANGAREDDY | PASTAPUR | TELANGANA |
416492 | Godgarpally(016 ) | SANGAREDDY(23) | TELANGANA | GODGARPALY | SANGAREDY | 416493 | SANGAREDDY | GODGARPALLY | TELANGANA |
416494 | Raipally D(011) | SANGAREDDY(23) | TELANGANA | RAIPALYD | SANGAREDY | 416495 | SANGAREDDY | RAIPALLY D | TELANGANA |
416495 | Ippepally(023 ) | SANGAREDDY(23) | TELANGANA | IPEPALY | SANGAREDY | 416496 | SANGAREDDY | IPPEPALLY | TELANGANA |
416498 | Burdipad(005 ) | SANGAREDDY(23) | TELANGANA | BURDIPAD | SANGAREDY | 416499 | SANGAREDDY | BURDIPAD | TELANGANA |
416500 | Gopanpalluy(015 ) | SANGAREDDY(23) | TELANGANA | GOPANPALUY | SANGAREDY | 416501 | SANGAREDDY | GOPANPALLUY | TELANGANA |
416501 | Khasimpur(013 ) | SANGAREDDY(23) | TELANGANA | KHASIMPUR | SANGAREDY | 416502 | SANGAREDDY | KHASIMPUR | TELANGANA |
416502 | Algol(009 ) | SANGAREDDY(23) | TELANGANA | ALGOL | SANGAREDY | 416503 | SANGAREDDY | ALGOL | TELANGANA |
416506 | Jadimalkapur (037) | SANGAREDDY(23) | TELANGANA | JADIMALKAPUR | SANGAREDY | 416507 | SANGAREDDY | JADIMALKAPUR | TELANGANA |
416507 | Chiragpally(002 ) | SANGAREDDY(23) | TELANGANA | CHIRAGPALY | SANGAREDY | 416508 | SANGAREDDY | CHIRAGPALLY | TELANGANA |
416508 | Buchinelly(004 ) | SANGAREDDY(23) | TELANGANA | BUCHINELY | SANGAREDY | 416509 | SANGAREDDY | BUCHINELLY | TELANGANA |
416509 | Satwar(003 ) | SANGAREDDY(23) | TELANGANA | SATWAR | SANGAREDY | 416510 | SANGAREDDY | SATWAR | TELANGANA |
416511 | Ranjhole(030 ) | SANGAREDDY(23) | TELANGANA | RANJHOLE | SANGAREDY | 416512 | SANGAREDDY | RANJHOLE | TELANGANA |
416512 | Allipur(029 ) | SANGAREDDY(23) | TELANGANA | ALIPUR | SANGAREDY | 416513 | SANGAREDDY | ALLIPUR | TELANGANA |
239670 rows × 9 columns