In [22]:
# Import Libraries
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
import lxml
import os
import glob
import time
import datetime
import json
import itertools
In [23]:
# Set Output Folder
output_folder = os.path.abspath("output")
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
In [24]:
# Import data folders
data_folder = os.path.abspath("data")
In [25]:
# Helping Functions
def remove_consecutive_duplicates(x):
    return ''.join(i for i, _ in itertools.groupby(x))
In [26]:
# States of India
states_file = os.path.join(data_folder, 'allStateofIndia2018_07_15_05_49_44_241.csv')
states_df = pd.read_csv(states_file, delimiter=';')
states_df = states_df[['State Name(In English)', 'Census 2011 Code']]
states_df.columns = ['State Name', 'state code 2011']
states_df.loc[states_df['State Name'] == 'TELANGANA', 'state code 2011'] = 28 #Telangana Fix
states_df['State Name'] = states_df['State Name'].str.upper()
states_df
Out[26]:
State Name state code 2011
0 ANDAMAN AND NICOBAR ISLANDS 35
1 ANDHRA PRADESH 28
2 ARUNACHAL PRADESH 12
3 ASSAM 18
4 BIHAR 10
5 CHANDIGARH 4
6 CHHATTISGARH 22
7 DADRA AND NAGAR HAVELI 26
8 DAMAN AND DIU 25
9 DELHI 7
10 GOA 30
11 GUJARAT 24
12 HARYANA 6
13 HIMACHAL PRADESH 2
14 JAMMU AND KASHMIR 1
15 JHARKHAND 20
16 KARNATAKA 29
17 KERALA 32
18 LAKSHADWEEP 31
19 MADHYA PRADESH 23
20 MAHARASHTRA 27
21 MANIPUR 14
22 MEGHALAYA 17
23 MIZORAM 15
24 NAGALAND 13
25 ODISHA 21
26 PUDUCHERRY 34
27 PUNJAB 3
28 RAJASTHAN 8
29 SIKKIM 11
30 TAMIL NADU 33
31 TELANGANA 28
32 TRIPURA 16
33 UTTARAKHAND 5
34 UTTAR PRADESH 9
35 WEST BENGAL 19
In [27]:
# Read physical progress Data
physical_progress_folder = os.path.join(data_folder, "Physical Progress Reports", "output")
physical_progress_files = os.listdir(physical_progress_folder)
physical_progress_files = [os.path.join(physical_progress_folder, x)  for x in physical_progress_files if x.split('.')[-1] == 'csv'][:-1]
df_list = []
for file in physical_progress_files:
    df = pd.read_csv(file, usecols=[0, 7, 8, 9, 10, 11])
    df.columns = ['Panchayat Name', 'Total Houses completed', 'Financial Year', 'Scheme', 'State Name', 'District Name']
    df_list.append(df)
    print(file)
physical_df = pd.concat(df_list).reset_index()
physical_df = physical_df[physical_df['Panchayat Name'].notnull()]
physical_df['Panchayat Name'] = physical_df['Panchayat Name'].str.upper()
physical_df['Panchayat Name'] = physical_df['Panchayat Name'].str.replace('PANCHAYAT', '').str.replace('GP', '')
physical_df['District Name'] = physical_df['District Name'].str.upper()
physical_df['State Name'] = physical_df['State Name'].str.upper()
physical_df = physical_df[['Panchayat Name','State Name', 'District Name']]
physical_df
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2010-2011_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2011-2012_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2012-2013_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2013-2014_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2014-2015_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2015-2016_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2016-2017_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2017-2019_Scrapped_Data.csv
Out[27]:
Panchayat Name State Name District Name
0 ARARIA BASTI BIHAR ARARIA
1 BANGAMA BIHAR ARARIA
2 BANSBARI BIHAR ARARIA
3 BASANTPUR BIHAR ARARIA
4 BATURBARI BIHAR ARARIA
5 BELWA BIHAR ARARIA
6 BOCHI BIHAR ARARIA
7 CHANDARDEI BIHAR ARARIA
8 CHATAR BIHAR ARARIA
9 CHIKNI BIHAR ARARIA
10 DIYARI BIHAR ARARIA
11 GAIRA BIHAR ARARIA
12 GAIYARI BIHAR ARARIA
13 HARIYA BIHAR ARARIA
14 HAYATPUR BIHAR ARARIA
15 JAMUA BIHAR ARARIA
16 JHAMTA BIHAR ARARIA
17 KAMALDAHA BIHAR ARARIA
18 KISMAT KHAWASPUR BIHAR ARARIA
19 KUSIYARGAON BIHAR ARARIA
20 MADANPUR (E) BIHAR ARARIA
21 MADANPUR (W) BIHAR ARARIA
22 PAIKTOLA BIHAR ARARIA
23 PAKHARIYA BIHAR ARARIA
24 RAMPUR KODARKATTI BIHAR ARARIA
25 RAMPUR MOHANPUR (E) BIHAR ARARIA
26 RAMPUR MOHANPUR (W) BIHAR ARARIA
27 SAHASMAL BIHAR ARARIA
28 SHARANPUR BIHAR ARARIA
29 TATOUNA BHOJPUR BIHAR ARARIA
... ... ... ...
3204523 RAMPUR KHURAD PUNJAB PATIALA
3204524 RANGIAN PUNJAB PATIALA
3204525 SADHROR PUNJAB PATIALA
3204526 SADHROR MAJARI PUNJAB PATIALA
3204527 SAFDALPUR PUNJAB PATIALA
3204528 SAID KHERI PUNJAB PATIALA
3204529 SALEMPUR URF NANGAL PUNJAB PATIALA
3204530 SARAI BANJARA PUNJAB PATIALA
3204531 SATNAM NAGAR PUNJAB PATIALA
3204532 SEHRA PUNJAB PATIALA
3204533 SEHRI PUNJAB PATIALA
3204534 SHAMDO PUNJAB PATIALA
3204535 SHAMDO CAMP PUNJAB PATIALA
3204536 SURAJGARH PUNJAB PATIALA
3204537 SURAL KALAN PUNJAB PATIALA
3204538 SURAL KHURAD PUNJAB PATIALA
3204539 TAKHU MAJRA PUNJAB PATIALA
3204540 TASOULI PUNJAB PATIALA
3204541 TEHALPURA PUNJAB PATIALA
3204542 THUHA PUNJAB PATIALA
3204543 UCHA KHERA PUNJAB PATIALA
3204544 UGANA PUNJAB PATIALA
3204545 UGANI PUNJAB PATIALA
3204546 UKSI PUNJAB PATIALA
3204547 UKSI JATTAN PUNJAB PATIALA
3204548 UPAL HARI PUNJAB PATIALA
3204549 UPALHERI KHURD PUNJAB PATIALA
3204550 URDHAN PUNJAB PATIALA
3204551 URNA PUNJAB PATIALA
3204552 WAZIRABAD PUNJAB PATIALA

3204550 rows × 3 columns

In [28]:
# Read All Central Scheme Financial progress Data
centr_folder = os.path.join(data_folder, "Financial Progress Reports", "All Central Scheme-done", "output")
centr_files = os.listdir(centr_folder)
centr_files = [os.path.join(centr_folder, x)  for x in centr_files if x.split('.')[-1] == 'csv'][:-1]
df_list = []
for file in centr_files:
    df = pd.read_csv(file)
    df_list.append(df)
    print(file)
centr_df = pd.concat(df_list).reset_index()
centr_df['Scheme'] = 'All Central Scheme'
centr_df['Panchayat Name'] = centr_df['A']
centr_df = centr_df[centr_df['Panchayat Name'].notnull()]
centr_df['Panchayat Name'] = centr_df['Panchayat Name'].str.upper()
centr_df['Panchayat Name'] = centr_df['Panchayat Name'].str.replace('PANCHAYAT', '').str.replace('GP', '')
centr_df['District Name'] = centr_df['District Name'].str.upper()
centr_df['State Name'] = centr_df['State Name'].str.upper()
centr_df = centr_df[['Panchayat Name','State Name', 'District Name']]
centr_df
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2010-2011_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2011-2012_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2012-2013_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2013-2014_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2014-2015_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2015-2016_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2016-2017_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2017-2018_Scrapped_Data.csv
Out[28]:
Panchayat Name State Name District Name
0 HALDHIA ASSAM BARPETA
1 MISAMARA ASSAM DHEMAJI
2 KANKALASH-BASHAIL ASSAM KARIMGANJ
3 SOUTH BADARPUR ASSAM KARIMGANJ
4 GOPAL NAGAR ASSAM NAGAON
5 BORGANG ASSAM SONITPUR
6 NO.1 BIHAGURI ASSAM SONITPUR
7 ARARIA BASTI BIHAR ARARIA
8 BANGAMA BIHAR ARARIA
9 BANSBARI BIHAR ARARIA
10 BELWA BIHAR ARARIA
11 BOCHI BIHAR ARARIA
12 CHANDARDEI BIHAR ARARIA
13 CHATAR BIHAR ARARIA
14 CHIKNI BIHAR ARARIA
15 GAIYARI BIHAR ARARIA
16 HAYATPUR BIHAR ARARIA
17 JAMUA BIHAR ARARIA
18 JHAMTA BIHAR ARARIA
19 KAMALDAHA BIHAR ARARIA
20 KISMAT KHAWASPUR BIHAR ARARIA
21 KUSIYARGAON BIHAR ARARIA
22 MADANPUR (E) BIHAR ARARIA
23 MADANPUR (W) BIHAR ARARIA
24 PAIKTOLA BIHAR ARARIA
25 PAKHARIYA BIHAR ARARIA
26 RAMPUR MOHANPUR (E) BIHAR ARARIA
27 SAHASMAL BIHAR ARARIA
28 TATOUNA BHOJPUR BIHAR ARARIA
29 BIRNAGAR (E) BIHAR ARARIA
... ... ... ...
1034033 BANDAHALLI KARNATAKA YADGIRI
1034034 BELAGUNDI KARNATAKA YADGIRI
1034035 CHANDRAKI KARNATAKA YADGIRI
1034036 CHAPATLA KARNATAKA YADGIRI
1034037 CHINNAKARA KARNATAKA YADGIRI
1034038 GAJARKOT KARNATAKA YADGIRI
1034039 HALIGERA KARNATAKA YADGIRI
1034040 HATTIKUNI KARNATAKA YADGIRI
1034041 JAIGRAM KARNATAKA YADGIRI
1034042 KADECHUR KARNATAKA YADGIRI
1034043 KALEBELGUNDA KARNATAKA YADGIRI
1034044 KANDKURU KARNATAKA YADGIRI
1034045 KILLANKERA KARNATAKA YADGIRI
1034046 KONKAL KARNATAKA YADGIRI
1034047 KOULURU KARNATAKA YADGIRI
1034048 MADWAR KARNATAKA YADGIRI
1034049 MALHAR KARNATAKA YADGIRI
1034050 MINASPUR KARNATAKA YADGIRI
1034051 MOTNALLI KARNATAKA YADGIRI
1034052 MUDNAL KARNATAKA YADGIRI
1034053 MUNDARGI KARNATAKA YADGIRI
1034054 PASPOOL KARNATAKA YADGIRI
1034055 PUTPAK KARNATAKA YADGIRI
1034056 RAMSAMUDRA KARNATAKA YADGIRI
1034057 THANAGUNDI KARNATAKA YADGIRI
1034058 VARKANALLI KARNATAKA YADGIRI
1034059 YALSATTI KARNATAKA YADGIRI
1034060 YARGOL KARNATAKA YADGIRI
1034061 YEMPHED KARNATAKA YADGIRI
1034062 YLERI KARNATAKA YADGIRI

1034060 rows × 3 columns

In [29]:
# Read All IAY New Construction Data
iay_folder = os.path.join(data_folder, "Financial Progress Reports", "IAY New Construction -done", "output")
iay_files = os.listdir(iay_folder)
iay_files = [os.path.join(iay_folder, x)  for x in iay_files if x.split('.')[-1] == 'csv'][:-1]
df_list = []
for file in iay_files:
    df = pd.read_csv(file)
    df_list.append(df)
    print(file)
iay_df = pd.concat(df_list).reset_index()
iay_df['Scheme'] = 'IAY New Construction'
iay_df = iay_df[iay_df.columns[1:-1]]
iay_df['Panchayat Name'] = iay_df['A']
iay_df = iay_df[iay_df.columns[1:]]
iay_df = iay_df[iay_df['Panchayat Name'].notnull()]
iay_df['Panchayat Name'] = iay_df['Panchayat Name'].str.upper()
iay_df['Panchayat Name'] = iay_df['Panchayat Name'].str.replace('PANCHAYAT', '').str.replace('GP', '')
iay_df['District Name'] = iay_df['District Name'].str.upper()
iay_df['State Name'] = iay_df['State Name'].str.upper()
iay_df = iay_df[['Panchayat Name','State Name', 'District Name']]
iay_df
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\IAY New Construction -done\output\2010-2011_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\IAY New Construction -done\output\2011-2012_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\IAY New Construction -done\output\2012-2013_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\IAY New Construction -done\output\2013-2014_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\IAY New Construction -done\output\2014-2015_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\IAY New Construction -done\output\2015-2016_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\IAY New Construction -done\output\2016-2017_Scrapped_Data.csv
C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\ipykernel_launcher.py:10: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  # Remove the CWD from sys.path while we load stuff.
Out[29]:
Panchayat Name State Name District Name
0 HALDHIA ASSAM BARPETA
1 MISAMARA ASSAM DHEMAJI
2 KANKALASH-BASHAIL ASSAM KARIMGANJ
3 SOUTH BADARPUR ASSAM KARIMGANJ
4 GOPAL NAGAR ASSAM NAGAON
5 BORGANG ASSAM SONITPUR
6 NO.1 BIHAGURI ASSAM SONITPUR
7 ARARIA BASTI BIHAR ARARIA
8 BANGAMA BIHAR ARARIA
9 BANSBARI BIHAR ARARIA
10 BELWA BIHAR ARARIA
11 BOCHI BIHAR ARARIA
12 CHANDARDEI BIHAR ARARIA
13 CHATAR BIHAR ARARIA
14 CHIKNI BIHAR ARARIA
15 DIYARI BIHAR ARARIA
16 GAIYARI BIHAR ARARIA
17 HAYATPUR BIHAR ARARIA
18 JAMUA BIHAR ARARIA
19 JHAMTA BIHAR ARARIA
20 KAMALDAHA BIHAR ARARIA
21 KISMAT KHAWASPUR BIHAR ARARIA
22 KUSIYARGAON BIHAR ARARIA
23 MADANPUR (E) BIHAR ARARIA
24 MADANPUR (W) BIHAR ARARIA
25 PAIKTOLA BIHAR ARARIA
26 PAKHARIYA BIHAR ARARIA
27 RAMPUR MOHANPUR (E) BIHAR ARARIA
28 SAHASMAL BIHAR ARARIA
29 TATOUNA BHOJPUR BIHAR ARARIA
... ... ... ...
815254 PERIKIPALEM ANDHRA PRADESH WEST GODAVARI
815255 RAYAKUDURU ANDHRA PRADESH WEST GODAVARI
815256 UTTARAPALEM ANDHRA PRADESH WEST GODAVARI
815257 VEERAVASARAM ANDHRA PRADESH WEST GODAVARI
815258 ADAVIPALEM ANDHRA PRADESH WEST GODAVARI
815259 ARYAPETA ANDHRA PRADESH WEST GODAVARI
815260 BADEVA ANDHRA PRADESH WEST GODAVARI
815261 BURUGUPALLE ANDHRA PRADESH WEST GODAVARI
815262 CHINCHINADA ANDHRA PRADESH WEST GODAVARI
815263 DODDIPATLA ANDHRA PRADESH WEST GODAVARI
815264 ILAPAKURRU ANDHRA PRADESH WEST GODAVARI
815265 KALAGAMPUDI ANDHRA PRADESH WEST GODAVARI
815266 KAMBLEEPALEM ANDHRA PRADESH WEST GODAVARI
815267 KANAKAYALANKA ANDHRA PRADESH WEST GODAVARI
815268 KATTIPALEM ANDHRA PRADESH WEST GODAVARI
815269 KAZA EAST ANDHRA PRADESH WEST GODAVARI
815270 KAZA WEST ANDHRA PRADESH WEST GODAVARI
815271 KONTHERU ANDHRA PRADESH WEST GODAVARI
815272 LAKSHMIPALEM ANDHRA PRADESH WEST GODAVARI
815273 MATTAPALEM ANDHRA PRADESH WEST GODAVARI
815274 MEDAPADU ANDHRA PRADESH WEST GODAVARI
815275 NARIMAVARIMERAKA ANDHRA PRADESH WEST GODAVARI
815276 NEREDUMALLI ANDHRA PRADESH WEST GODAVARI
815277 PEDALANKA ANDHRA PRADESH WEST GODAVARI
815278 PENUMARRU ANDHRA PRADESH WEST GODAVARI
815279 SIRAGALAPALLE ANDHRA PRADESH WEST GODAVARI
815280 UTADA ANDHRA PRADESH WEST GODAVARI
815281 VADDILANKA ANDHRA PRADESH WEST GODAVARI
815282 Y.V.LANKA ANDHRA PRADESH WEST GODAVARI
815283 YELAMANCHILI ANDHRA PRADESH WEST GODAVARI

815281 rows × 3 columns

In [30]:
# Read All PMAY New Construction Data
pmay_folder = os.path.join(data_folder, "Financial Progress Reports", "Pradhan Mantri Awaas Yojana -done")
pmay_files = os.listdir(pmay_folder)
pmay_files = [os.path.join(pmay_folder, x)  for x in pmay_files if x.split('.')[-1] == 'csv'][:-1]
df_list = []
for file in pmay_files:
    df = pd.read_csv(file)
    df_list.append(df)
    print(file)
pmay_df = pd.concat(df_list).reset_index()
pmay_df['Scheme'] = 'Pradhan Mantri Awaas Yojana'
pmay_df = pmay_df[pmay_df.columns[1:-1]]
pmay_df['Panchayat Name'] = pmay_df['A']
pmay_df = pmay_df[pmay_df.columns[1:]]
pmay_df = pmay_df[pmay_df['Panchayat Name'].notnull()]
pmay_df['Panchayat Name'] = pmay_df['Panchayat Name'].str.upper()
pmay_df['Panchayat Name'] = pmay_df['Panchayat Name'].str.replace('PANCHAYAT', '').str.replace('GP', '')
pmay_df['District Name'] = pmay_df['District Name'].str.upper()
pmay_df['State Name'] = pmay_df['State Name'].str.upper()
pmay_df = pmay_df[['Panchayat Name','State Name', 'District Name']]
pmay_df
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\Pradhan Mantri Awaas Yojana -done\2016-2017_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\Pradhan Mantri Awaas Yojana -done\2017-2018_Scrapped_Data.csv
C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\ipykernel_launcher.py:10: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  # Remove the CWD from sys.path while we load stuff.
Out[30]:
Panchayat Name State Name District Name
0 BARAMA ASSAM BAKSA
1 DEBACHARA ASSAM BAKSA
2 KAKLABARI ASSAM BAKSA
3 KHARUAJAN ASSAM BAKSA
4 MERKUCHI ASSAM BAKSA
5 PURANSRIPUR ASSAM BAKSA
6 AMBARI ASSAM BAKSA
7 BATHOUGURI ASSAM BAKSA
8 BHOARA ASSAM BAKSA
9 THAMNA ASSAM BAKSA
10 BAHBARI ASSAM BAKSA
11 BARAPETA ASSAM BAKSA
12 BONMAJA ASSAM BAKSA
13 CHUNBARI ASSAM BAKSA
14 DHEKIAJANI ASSAM BAKSA
15 KHUSRABARI ASSAM BAKSA
16 MAINAMATA PATHAR ASSAM BAKSA
17 MAIRAJHAR PATHAR ASSAM BAKSA
18 NIMUA ASSAM BAKSA
19 PASCHIM HOWLY ASSAM BAKSA
20 PUB HOWLY ASSAM BAKSA
21 UTTAR HOWLY ASSAM BAKSA
22 UTTAR KHARIJA BIJNI ASSAM BAKSA
23 BAGHDOBA ASSAM BAKSA
24 BALABARI ASSAM BAKSA
25 BETNA ASSAM BAKSA
26 KAURBAHA ASSAM BAKSA
27 TENGAJHAR ASSAM BAKSA
28 JALAH GAON ASSAM BAKSA
29 SALBARI ASSAM BAKSA
... ... ... ...
259203 BANDAHALLI KARNATAKA YADGIRI
259204 BELAGUNDI KARNATAKA YADGIRI
259205 CHANDRAKI KARNATAKA YADGIRI
259206 CHAPATLA KARNATAKA YADGIRI
259207 CHINNAKARA KARNATAKA YADGIRI
259208 GAJARKOT KARNATAKA YADGIRI
259209 HALIGERA KARNATAKA YADGIRI
259210 HATTIKUNI KARNATAKA YADGIRI
259211 JAIGRAM KARNATAKA YADGIRI
259212 KADECHUR KARNATAKA YADGIRI
259213 KALEBELGUNDA KARNATAKA YADGIRI
259214 KANDKURU KARNATAKA YADGIRI
259215 KILLANKERA KARNATAKA YADGIRI
259216 KONKAL KARNATAKA YADGIRI
259217 KOULURU KARNATAKA YADGIRI
259218 MADWAR KARNATAKA YADGIRI
259219 MALHAR KARNATAKA YADGIRI
259220 MINASPUR KARNATAKA YADGIRI
259221 MOTNALLI KARNATAKA YADGIRI
259222 MUDNAL KARNATAKA YADGIRI
259223 MUNDARGI KARNATAKA YADGIRI
259224 PASPOOL KARNATAKA YADGIRI
259225 PUTPAK KARNATAKA YADGIRI
259226 RAMSAMUDRA KARNATAKA YADGIRI
259227 THANAGUNDI KARNATAKA YADGIRI
259228 VARKANALLI KARNATAKA YADGIRI
259229 YALSATTI KARNATAKA YADGIRI
259230 YARGOL KARNATAKA YADGIRI
259231 YEMPHED KARNATAKA YADGIRI
259232 YLERI KARNATAKA YADGIRI

259233 rows × 3 columns

In [31]:
togeocode_df = pd.concat([physical_df, centr_df, iay_df, pmay_df])
togeocode_df = togeocode_df.drop_duplicates().reset_index()
togeocode_df
Out[31]:
index Panchayat Name State Name District Name
0 0 ARARIA BASTI BIHAR ARARIA
1 1 BANGAMA BIHAR ARARIA
2 2 BANSBARI BIHAR ARARIA
3 3 BASANTPUR BIHAR ARARIA
4 4 BATURBARI BIHAR ARARIA
5 5 BELWA BIHAR ARARIA
6 6 BOCHI BIHAR ARARIA
7 7 CHANDARDEI BIHAR ARARIA
8 8 CHATAR BIHAR ARARIA
9 9 CHIKNI BIHAR ARARIA
10 10 DIYARI BIHAR ARARIA
11 11 GAIRA BIHAR ARARIA
12 12 GAIYARI BIHAR ARARIA
13 13 HARIYA BIHAR ARARIA
14 14 HAYATPUR BIHAR ARARIA
15 15 JAMUA BIHAR ARARIA
16 16 JHAMTA BIHAR ARARIA
17 17 KAMALDAHA BIHAR ARARIA
18 18 KISMAT KHAWASPUR BIHAR ARARIA
19 19 KUSIYARGAON BIHAR ARARIA
20 20 MADANPUR (E) BIHAR ARARIA
21 21 MADANPUR (W) BIHAR ARARIA
22 22 PAIKTOLA BIHAR ARARIA
23 23 PAKHARIYA BIHAR ARARIA
24 24 RAMPUR KODARKATTI BIHAR ARARIA
25 25 RAMPUR MOHANPUR (E) BIHAR ARARIA
26 26 RAMPUR MOHANPUR (W) BIHAR ARARIA
27 27 SAHASMAL BIHAR ARARIA
28 28 SHARANPUR BIHAR ARARIA
29 29 TATOUNA BHOJPUR BIHAR ARARIA
... ... ... ... ...
263467 947168 DARGAAPUR GHARBI PUNJAB TARAN TARAN
263468 947169 JALLEWAL PUNJAB TARAN TARAN
263469 947170 JAURA PUNJAB TARAN TARAN
263470 947171 KALER PUNJAB TARAN TARAN
263471 947172 KIRIYAN PUNJAB TARAN TARAN
263472 947173 MARHANA PUNJAB TARAN TARAN
263473 947174 RURIWALA PUNJAB TARAN TARAN
263474 947175 SHAHABPUR PUNJAB TARAN TARAN
263475 947176 SHAKRI PUNJAB TARAN TARAN
263476 947177 SIRHALI KHURD PUNJAB TARAN TARAN
263477 947178 THATHIAN MAHANTAN PUNJAB TARAN TARAN
263478 947179 TIKHU CHAK PUNJAB TARAN TARAN
263479 947212 ALAWALPUR PUNJAB TARAN TARAN
263480 947213 BAINI MATUA PUNJAB TARAN TARAN
263481 947214 GULALIPUR PUNJAB TARAN TARAN
263482 947215 KAIRON WAL PUNJAB TARAN TARAN
263483 947216 NORANGABAD PUNJAB TARAN TARAN
263484 947217 PAKHOKE PUNJAB TARAN TARAN
263485 1023979 AMBOLI DADRA AND NAGAR HAVELI DADRA AND NAGAR HAVELI
263486 1023980 DADRA DADRA AND NAGAR HAVELI DADRA AND NAGAR HAVELI
263487 1023981 GALONDA DADRA AND NAGAR HAVELI DADRA AND NAGAR HAVELI
263488 1023982 KHANVEL DADRA AND NAGAR HAVELI DADRA AND NAGAR HAVELI
263489 1023983 KHARADPADA DADRA AND NAGAR HAVELI DADRA AND NAGAR HAVELI
263490 1023984 KHERDI DADRA AND NAGAR HAVELI DADRA AND NAGAR HAVELI
263491 1023985 KILVANI DADRA AND NAGAR HAVELI DADRA AND NAGAR HAVELI
263492 1023986 MANDONI DADRA AND NAGAR HAVELI DADRA AND NAGAR HAVELI
263493 1023987 NAROLI DADRA AND NAGAR HAVELI DADRA AND NAGAR HAVELI
263494 1023988 RANDHA DADRA AND NAGAR HAVELI DADRA AND NAGAR HAVELI
263495 1023989 SAYLI DADRA AND NAGAR HAVELI DADRA AND NAGAR HAVELI
263496 1023990 SURANGI DADRA AND NAGAR HAVELI DADRA AND NAGAR HAVELI

263497 rows × 4 columns

In [32]:
# Read Scrapped Data
match_folder = os.path.join(data_folder, "VILLAGE_ASSEMBLY_PC_match")
file_path = os.path.join(match_folder, "merge.xlsx")
match_df = pd.read_excel(file_path)
match_df = match_df[['Village Name', 'Localbody Name', 'District Name', 'State Name', 'Assembly Constituency ECI Code']]
match_df = match_df.drop_duplicates().reset_index()
match_df = pd.merge(match_df, states_df, how='inner', on=['State Name']).drop_duplicates().reset_index()
match_df = match_df[match_df.columns[2:]]
match_df
Out[32]:
Village Name Localbody Name District Name State Name Assembly Constituency ECI Code state code 2011
0 Allavaram ALLAVARAM EAST GODAVARI ANDHRA PRADESH 163 28
1 Allavaram KODURUPADU EAST GODAVARI ANDHRA PRADESH 163 28
2 Bendamurulanka BENDAMURLANKA EAST GODAVARI ANDHRA PRADESH 163 28
3 Bendamurulanka VODALAREVU EAST GODAVARI ANDHRA PRADESH 163 28
4 Bodasakurru BODASAKURRU EAST GODAVARI ANDHRA PRADESH 163 28
5 Devaguptam DEVAGUPTAM EAST GODAVARI ANDHRA PRADESH 163 28
6 Devaguptam GUNDIPUDI EAST GODAVARI ANDHRA PRADESH 163 28
7 Devaguptam DEVAGUPTAMRAVULAPALEM EAST GODAVARI ANDHRA PRADESH 163 28
8 Godi GODI EAST GODAVARI ANDHRA PRADESH 163 28
9 Godi GODITHIPPA EAST GODAVARI ANDHRA PRADESH 163 28
10 Godilanka GODILANKA EAST GODAVARI ANDHRA PRADESH 163 28
11 Gudala GUDALA EAST GODAVARI ANDHRA PRADESH 163 28
12 Komaragiripatnam KOMARAGIRIPATNAM EAST GODAVARI ANDHRA PRADESH 163 28
13 Komaragiripatnam THUMMALAPALLE EAST GODAVARI ANDHRA PRADESH 163 28
14 Mogallamuru MOGALLAMURU EAST GODAVARI ANDHRA PRADESH 163 28
15 Rellugadda RELLUGADDA EAST GODAVARI ANDHRA PRADESH 163 28
16 Samanthakuru PALLIPALEM EAST GODAVARI ANDHRA PRADESH 163 28
17 Samanthakuru SAMANTHAKURRU EAST GODAVARI ANDHRA PRADESH 163 28
18 Tadikona TADIKONA EAST GODAVARI ANDHRA PRADESH 163 28
19 Thurupulanka THURPULANKA EAST GODAVARI ANDHRA PRADESH 163 28
20 Yentrikona YENTRIKONA EAST GODAVARI ANDHRA PRADESH 163 28
21 A. Vemavaram A.VEMAVARAM EAST GODAVARI ANDHRA PRADESH 163 28
22 A. Vemavarappadu A.VEMAVARAPADU EAST GODAVARI ANDHRA PRADESH 163 28
23 Bandarulanka BANDARULANKA EAST GODAVARI ANDHRA PRADESH 163 28
24 Bhatnavilli BHATNAVILLI EAST GODAVARI ANDHRA PRADESH 163 28
25 Bhatnavilli REDDIPALLI EAST GODAVARI ANDHRA PRADESH 163 28
26 Edarapalle EDARAPALLE EAST GODAVARI ANDHRA PRADESH 163 28
27 Gunnapalle Agraharam G.AGRAHARAM EAST GODAVARI ANDHRA PRADESH 163 28
28 Immidivarappadu IMMIDIVARAPPADU EAST GODAVARI ANDHRA PRADESH 163 28
29 Indupalle INDUPALLE EAST GODAVARI ANDHRA PRADESH 163 28
... ... ... ... ... ... ...
474801 Meerapur SUKULPUR MIRZAPUR UTTAR PRADESH 398 9
474802 Meghupur SHILPI MIRZAPUR UTTAR PRADESH 398 9
474803 Meria MEDIYA MIRZAPUR UTTAR PRADESH 398 9
474804 Milki Mu. Barawa MANUR PURAB PATTI MIRZAPUR UTTAR PRADESH 398 9
474805 Milki Nip CHAINPURA MIRZAPUR UTTAR PRADESH 398 9
474806 Milki Nisf Mu. Gogahara GOGAHARA MIRZAPUR UTTAR PRADESH 398 9
474807 Mirapur MIRAPUR MIRZAPUR UTTAR PRADESH 398 9
474808 Mirzapur Khurd MIRZAPUR KHURD MIRZAPUR UTTAR PRADESH 398 9
474809 Misirpur MAWAIYA MIRZAPUR UTTAR PRADESH 398 9
474810 Mohamadabad MUSEPUR MIRZAPUR UTTAR PRADESH 398 9
474811 Mohammadpur DAYALPUR MIRZAPUR UTTAR PRADESH 398 9
474812 Mohammadpur Urf Dayalpur SARAIYA SIKANDARPUR MIRZAPUR UTTAR PRADESH 398 9
474813 Moharpur MOHIDDINPUR MIRZAPUR UTTAR PRADESH 398 9
474814 Mohinipur MADAPUR MIRZAPUR UTTAR PRADESH 398 9
474815 Moinuddinpur MOHIDDINPUR MIRZAPUR UTTAR PRADESH 398 9
474816 Molnapur BASARATAPUR MIRZAPUR UTTAR PRADESH 398 9
474817 Muinuddinpur ISWAR PATTI MIRZAPUR UTTAR PRADESH 398 9
474818 Mujaffarpur BARAIPUR MIRZAPUR UTTAR PRADESH 398 9
474819 Mujdih MUJDIH MIRZAPUR UTTAR PRADESH 398 9
474820 Mukundpur MUKUNDPUR MIRZAPUR UTTAR PRADESH 398 9
474821 Murhua MUDHUAA MIRZAPUR UTTAR PRADESH 398 9
474822 Musepur MUSEPUR MIRZAPUR UTTAR PRADESH 398 9
474823 Muzahidpur KELA BELA MIRZAPUR UTTAR PRADESH 398 9
474824 Naithi Pachawa HASAPUR MIRZAPUR UTTAR PRADESH 398 9
474825 Nakahara NAKAHARA MIRZAPUR UTTAR PRADESH 398 9
474826 Narayanpur NARAYANPUR MIRZAPUR UTTAR PRADESH 398 9
474827 Naudiha SHERWA MIRZAPUR UTTAR PRADESH 398 9
474828 Naugaraha BELA MIRZAPUR UTTAR PRADESH 398 9
474829 Newada SHREEPUR MIRZAPUR UTTAR PRADESH 398 9
474830 Nibi NIBI DEORIA MIRZAPUR UTTAR PRADESH 398 9

474831 rows × 6 columns

In [34]:
# Join by the column and try to filter non geocodes
match_df['district_y'] = match_df['District Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
match_df['village_y'] = match_df['Localbody Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
togeocode_df['district_y'] = togeocode_df['District Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
togeocode_df['village_y'] = togeocode_df['Panchayat Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
geocoded_df = pd.merge(togeocode_df, match_df, how='left', left_on=['village_y', 'district_y'], right_on = ['village_y', 'district_y'])
nongeocoded_df = geocoded_df[geocoded_df['state code 2011'].isnull()]
nongeocoded_df = nongeocoded_df[nongeocoded_df.columns[1:6]]
nongeocoded_df = nongeocoded_df.drop_duplicates().reset_index()
geocoded_df = geocoded_df[geocoded_df['state code 2011'].notnull()][['Panchayat Name', 'District Name_x', 'State Name_x', 'Assembly Constituency ECI Code', 'state code 2011']]
geocoded_df.columns = ['panchayat', 'district', 'state', 'ac_code', 'state_code']
geocoded_df = geocoded_df.drop_duplicates().reset_index()
match_df['village_y'] = match_df['Village Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
geocoded_df1 = pd.merge(nongeocoded_df, match_df, how='left', left_on=['village_y', 'district_y'], right_on = ['village_y', 'district_y'])
nongeocoded_df = geocoded_df1[geocoded_df1['state code 2011'].isnull()]
nongeocoded_df = nongeocoded_df[nongeocoded_df.columns[1:6]]
nongeocoded_df = nongeocoded_df.drop_duplicates().reset_index()
nongeocoded_df = nongeocoded_df[nongeocoded_df.columns[1:4]]
nongeocoded_df.columns = ['panchayat', 'state', 'district']
geocoded_df1 = geocoded_df1[geocoded_df1['state code 2011'].notnull()][['Panchayat Name', 'District Name_x', 'State Name_x', 'Assembly Constituency ECI Code', 'state code 2011']]
geocoded_df1.columns = ['panchayat', 'district', 'state', 'ac_code', 'state_code']
geocoded_df = pd.concat([geocoded_df, geocoded_df1]).drop_duplicates().reset_index()
geocoded_df = geocoded_df[['panchayat', 'district', 'state', 'ac_code', 'state_code']]
geocoded_df
C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\ipykernel_launcher.py:22: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

Out[34]:
panchayat district state ac_code state_code
0 ARARIA BASTI ARARIA BIHAR 49.0 10.0
1 BANGAMA ARARIA BIHAR 49.0 10.0
2 BANSBARI ARARIA BIHAR 49.0 10.0
3 BASANTPUR ARARIA BIHAR 49.0 10.0
4 BATURBARI ARARIA BIHAR 49.0 10.0
5 BELWA ARARIA BIHAR 49.0 10.0
6 BOCHI ARARIA BIHAR 49.0 10.0
7 CHATAR ARARIA BIHAR 49.0 10.0
8 DIYARI ARARIA BIHAR 49.0 10.0
9 GAIYARI ARARIA BIHAR 49.0 10.0
10 HAYATPUR ARARIA BIHAR 49.0 10.0
11 JAMUA ARARIA BIHAR 49.0 10.0
12 JHAMTA ARARIA BIHAR 49.0 10.0
13 KAMALDAHA ARARIA BIHAR 49.0 10.0
14 KAMALDAHA ARARIA BIHAR 51.0 10.0
15 KISMAT KHAWASPUR ARARIA BIHAR 49.0 10.0
16 PAIKTOLA ARARIA BIHAR 49.0 10.0
17 RAMPUR KODARKATTI ARARIA BIHAR 49.0 10.0
18 SAHASMAL ARARIA BIHAR 49.0 10.0
19 SHARANPUR ARARIA BIHAR 49.0 10.0
20 BAGNAGAR ARARIA BIHAR 50.0 10.0
21 BARA ISTAMBARAR ARARIA BIHAR 50.0 10.0
22 BHAGWANPUR ARARIA BIHAR 50.0 10.0
23 BHUNA MAJGAMA ARARIA BIHAR 50.0 10.0
24 CHAKAI ARARIA BIHAR 50.0 10.0
25 CHIRAH ARARIA BIHAR 50.0 10.0
26 DUBBA ARARIA BIHAR 50.0 10.0
27 GIRDA ARARIA BIHAR 50.0 10.0
28 HARDAR ARARIA BIHAR 50.0 10.0
29 KAKAN ARARIA BIHAR 50.0 10.0
... ... ... ... ... ...
123628 BOJJAGUDEM THANDA NALGONDA TELANGANA 95.0 28.0
123629 MAMILLAGUDEM NALGONDA TELANGANA 92.0 28.0
123630 THUMMALA PALLE NALGONDA TELANGANA 93.0 28.0
123631 KANCHAN PALLE NALGONDA TELANGANA 92.0 28.0
123632 KESHAVAPURAM NALGONDA TELANGANA 88.0 28.0
123633 KESHAVAPURAM NALGONDA TELANGANA 92.0 28.0
123634 YELLA PURAM NALGONDA TELANGANA 86.0 28.0
123635 YELLA PURAM NALGONDA TELANGANA 87.0 28.0
123636 LINGAM PALLE NALGONDA TELANGANA 87.0 28.0
123637 POCHAMPALLE NALGONDA TELANGANA 87.0 28.0
123638 RAJAPET NALGONDA TELANGANA 92.0 28.0
123639 RAMACHANDRA PURAM NALGONDA TELANGANA 95.0 28.0
123640 SUNKI SHALA NALGONDA TELANGANA 93.0 28.0
123641 SUNKI SHALA NALGONDA TELANGANA 87.0 28.0
123642 KACHARAM NALGONDA TELANGANA 86.0 28.0
123643 TIRMALAPUR NIZAMABAD TELANGANA 14.0 28.0
123644 BASWAPUR NIZAMABAD TELANGANA 19.0 28.0
123645 BASWAPUR NIZAMABAD TELANGANA 14.0 28.0
123646 MIRZAPUR NIZAMABAD TELANGANA 14.0 28.0
123647 MALKAPUR NIZAMABAD TELANGANA 18.0 28.0
123648 MALKAPUR NIZAMABAD TELANGANA 17.0 28.0
123649 MALKAPUR NIZAMABAD TELANGANA 14.0 28.0
123650 HANGARGA NIZAMABAD TELANGANA 12.0 28.0
123651 DEVANPALLE NIZAMABAD TELANGANA 19.0 28.0
123652 AREPALLE NIZAMABAD TELANGANA 18.0 28.0
123653 MACHAPUR NIZAMABAD TELANGANA 12.0 28.0
123654 GOBINDAPUR PASCHIM BARDHAMAN WEST BENGAL 275.0 19.0
123655 BHURI PASCHIM BARDHAMAN WEST BENGAL 279.0 19.0
123656 KHASA AMRITSAR PUNJAB 20.0 3.0
123657 HEIR AMRITSAR PUNJAB 20.0 3.0

123658 rows × 5 columns

In [35]:
nongeocoded_df
Out[35]:
panchayat state district
0 CHANDARDEI BIHAR ARARIA
1 GAIRA BIHAR ARARIA
2 HARIYA BIHAR ARARIA
3 KUSIYARGAON BIHAR ARARIA
4 MADANPUR (E) BIHAR ARARIA
5 MADANPUR (W) BIHAR ARARIA
6 PAKHARIYA BIHAR ARARIA
7 RAMPUR MOHANPUR (E) BIHAR ARARIA
8 RAMPUR MOHANPUR (W) BIHAR ARARIA
9 TATOUNA BHOJPUR BIHAR ARARIA
10 BAGDAHRA BIHAR ARARIA
11 BHANSIYA BIHAR ARARIA
12 CHAINPUR MASURIYA BIHAR ARARIA
13 CHILHANIYAN BIHAR ARARIA
14 CHOUKTA BIHAR ARARIA
15 GAIRKI MASIRIYA BIHAR ARARIA
16 MAHALGOAN BIHAR ARARIA
17 PRASADPUR BIHAR ARARIA
18 SIMARIYA BIHAR ARARIA
19 SISOUNA BIHAR ARARIA
20 CHOURI BIHAR ARARIA
21 DEHTI (N) BIHAR ARARIA
22 DEHTI (S) BIHAR ARARIA
23 DIGHLI BIHAR ARARIA
24 MIYANPUR BIHAR ARARIA
25 PIPRA BIJWAR BIHAR ARARIA
26 SUKSAINA BIHAR ARARIA
27 DAHGMA BIHAR ARARIA
28 THENGAPUR PIPRA BIHAR ARARIA
29 ARWAL SIPAH BIHAR ARWAL
... ... ... ...
159862 PHULA PUNJAB TARAN TARAN
159863 SUR SINGH PUNJAB TARAN TARAN
159864 BARHAMPUR PUNJAB TARAN TARAN
159865 GUJARPURA PUNJAB TARAN TARAN
159866 SANGATPUR PUNJAB TARAN TARAN
159867 WARYAH PUNJAB TARAN TARAN
159868 GOBINDWAL PUNJAB TARAN TARAN
159869 JAWANDPUR PUNJAB TARAN TARAN
159870 MAL CHAK PUNJAB TARAN TARAN
159871 CHAUDHRIWALA PUNJAB TARAN TARAN
159872 DARGAAPUR GHARBI PUNJAB TARAN TARAN
159873 JALLEWAL PUNJAB TARAN TARAN
159874 JAURA PUNJAB TARAN TARAN
159875 KALER PUNJAB TARAN TARAN
159876 KIRIYAN PUNJAB TARAN TARAN
159877 MARHANA PUNJAB TARAN TARAN
159878 RURIWALA PUNJAB TARAN TARAN
159879 SHAHABPUR PUNJAB TARAN TARAN
159880 SHAKRI PUNJAB TARAN TARAN
159881 SIRHALI KHURD PUNJAB TARAN TARAN
159882 THATHIAN MAHANTAN PUNJAB TARAN TARAN
159883 TIKHU CHAK PUNJAB TARAN TARAN
159884 ALAWALPUR PUNJAB TARAN TARAN
159885 BAINI MATUA PUNJAB TARAN TARAN
159886 GULALIPUR PUNJAB TARAN TARAN
159887 KAIRON WAL PUNJAB TARAN TARAN
159888 NORANGABAD PUNJAB TARAN TARAN
159889 PAKHOKE PUNJAB TARAN TARAN
159890 GALONDA DADRA AND NAGAR HAVELI DADRA AND NAGAR HAVELI
159891 SURANGI DADRA AND NAGAR HAVELI DADRA AND NAGAR HAVELI

159892 rows × 3 columns

In [36]:
#Save to CSV
geocoded_df.to_csv(os.path.join(output_folder, 'match_geocoded.csv'), encoding='utf-8', index=False)
nongeocoded_df.to_csv(os.path.join(output_folder, 'nongeocoded.csv'), encoding='utf-8', index=False)