In [13]:
#Import Libraries
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
import json
import requests
import math
import os
import time
import datetime
import itertools
In [14]:
# Import folders
data_folder = os.path.abspath('data')
# Set Output Folder
output_folder = os.path.abspath("output")
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
In [15]:
# Helping Functions
def remove_consecutive_duplicates(x):
    return ''.join(i for i, _ in itertools.groupby(x))
In [16]:
# Read Constituency Data
ac_gdf = None
ac_filepath = os.path.join(data_folder, "AC", "India_AC.shp")
ac_gdf = gpd.read_file(ac_filepath)
ac_gdf

# Read Constituency Data
ac_file = os.path.join(data_folder, "General_Later_Ashoka_alldata.csv")
acdf = pd.read_csv(ac_file)
acdf['state_name'] = acdf['state_name'].str.replace("_", " ")
#acdf = acdf[['state_name', 'constituency_no', 'constituency_name', 'year', 'month']]
acdf = acdf.drop_duplicates(subset=None, keep="first", inplace=False).reset_index(drop=True)
acdf.loc[acdf['newstate_code'] == 36, 'newstate_code'] = 28 #Telangana  Fix
acdf = acdf[['state_name', 'state_code', 'constituency_no', 'year', 'month']]
acdf['day'] = 1
acdf['dyear'] = 0
acdf.loc[(acdf['month'] > 9) | (acdf['year'] == 2008), 'dyear'] = 1
acdf['dyear'] = acdf['year'] + acdf['dyear']
acdf['datetime'] = pd.to_datetime(acdf[['year', 'month', 'day']])
acdf = acdf.drop_duplicates().reset_index(drop=True)
acdf = pd.merge(acdf, ac_gdf,  how='inner', left_on=['state_code', 'constituency_no'], right_on = ['ST_CODE','AC_NO'])#[['ST_CODE', 'ST_NAME','DT_CODE', 'DIST_NAME', 'AC_NO', 'AC_NAME', 'PC_NO', 'PC_NAME']]
acdf = acdf[acdf.columns[:-5]]
acdf
Out[16]:
state_name state_code constituency_no year month day dyear datetime OBJECTID ST_CODE ST_NAME DT_CODE DIST_NAME AC_NO AC_NAME PC_NO PC_NAME
0 Jammu & Kashmir 1 1 2008 12 1 2009 2008-12-01 1 1 JAMMU & KASHMIR 1.0 KUPWARA 1 KARNAH 1 BARAMULLA
1 Jammu & Kashmir 1 1 2014 12 1 2015 2014-12-01 1 1 JAMMU & KASHMIR 1.0 KUPWARA 1 KARNAH 1 BARAMULLA
2 Jammu & Kashmir 1 2 2008 12 1 2009 2008-12-01 1 1 JAMMU & KASHMIR 1.0 KUPWARA 2 KUPWARA 1 BARAMULLA
3 Jammu & Kashmir 1 2 2014 12 1 2015 2014-12-01 1 1 JAMMU & KASHMIR 1.0 KUPWARA 2 KUPWARA 1 BARAMULLA
4 Jammu & Kashmir 1 3 2008 12 1 2009 2008-12-01 1 1 JAMMU & KASHMIR 1.0 KUPWARA 3 LOLAB 1 BARAMULLA
5 Jammu & Kashmir 1 3 2014 12 1 2015 2014-12-01 1 1 JAMMU & KASHMIR 1.0 KUPWARA 3 LOLAB 1 BARAMULLA
6 Jammu & Kashmir 1 4 2008 12 1 2009 2008-12-01 1 1 JAMMU & KASHMIR 1.0 KUPWARA 4 HANDWARA 1 BARAMULLA
7 Jammu & Kashmir 1 4 2014 12 1 2015 2014-12-01 1 1 JAMMU & KASHMIR 1.0 KUPWARA 4 HANDWARA 1 BARAMULLA
8 Jammu & Kashmir 1 5 2008 12 1 2009 2008-12-01 1 1 JAMMU & KASHMIR 1.0 KUPWARA 5 LANGATE 1 BARAMULLA
9 Jammu & Kashmir 1 5 2014 12 1 2015 2014-12-01 1 1 JAMMU & KASHMIR 1.0 KUPWARA 5 LANGATE 1 BARAMULLA
10 Jammu & Kashmir 1 6 2008 12 1 2009 2008-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 6 URI 1 BARAMULLA
11 Jammu & Kashmir 1 6 2014 12 1 2015 2014-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 6 URI 1 BARAMULLA
12 Jammu & Kashmir 1 7 2008 12 1 2009 2008-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 7 RAFIABAD 1 BARAMULLA
13 Jammu & Kashmir 1 7 2014 12 1 2015 2014-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 7 RAFIABAD 1 BARAMULLA
14 Jammu & Kashmir 1 8 2008 12 1 2009 2008-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 8 SOPORE 1 BARAMULLA
15 Jammu & Kashmir 1 8 2014 12 1 2015 2014-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 8 SOPORE 1 BARAMULLA
16 Jammu & Kashmir 1 9 2008 12 1 2009 2008-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 9 GUREZ 1 BARAMULLA
17 Jammu & Kashmir 1 9 2014 12 1 2015 2014-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 9 GUREZ 1 BARAMULLA
18 Jammu & Kashmir 1 10 2008 12 1 2009 2008-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 10 BANDIPORA 1 BARAMULLA
19 Jammu & Kashmir 1 10 2014 12 1 2015 2014-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 10 BANDIPORA 1 BARAMULLA
20 Jammu & Kashmir 1 11 2008 12 1 2009 2008-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 11 SONAWARI 1 BARAMULLA
21 Jammu & Kashmir 1 11 2014 12 1 2015 2014-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 11 SONAWARI 1 BARAMULLA
22 Jammu & Kashmir 1 12 2008 12 1 2009 2008-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 12 SANGRAMA 1 BARAMULLA
23 Jammu & Kashmir 1 12 2014 12 1 2015 2014-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 12 SANGRAMA 1 BARAMULLA
24 Jammu & Kashmir 1 13 2008 12 1 2009 2008-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 13 BARAMULA 1 BARAMULLA
25 Jammu & Kashmir 1 13 2014 12 1 2015 2014-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 13 BARAMULA 1 BARAMULLA
26 Jammu & Kashmir 1 14 2008 12 1 2009 2008-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 14 GULMARG 1 BARAMULLA
27 Jammu & Kashmir 1 14 2014 12 1 2015 2014-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 14 GULMARG 1 BARAMULLA
28 Jammu & Kashmir 1 15 2008 12 1 2009 2008-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 15 PATTAN 1 BARAMULLA
29 Jammu & Kashmir 1 15 2014 12 1 2015 2014-12-01 1 1 JAMMU & KASHMIR 2.0 BARAMULA 15 PATTAN 1 BARAMULLA
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8328 Puducherry 34 16 2011 5 1 2011 2011-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 16 Orleampeth 1 PONDICHERRY
8329 Puducherry 34 16 2016 5 1 2016 2016-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 16 Orleampeth 1 PONDICHERRY
8330 Puducherry 34 17 2011 5 1 2011 2011-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 17 Nellithope 1 PONDICHERRY
8331 Puducherry 34 17 2016 5 1 2016 2016-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 17 Nellithope 1 PONDICHERRY
8332 Puducherry 34 18 2011 5 1 2011 2011-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 18 Mudaliarpet 1 PONDICHERRY
8333 Puducherry 34 18 2016 5 1 2016 2016-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 18 Mudaliarpet 1 PONDICHERRY
8334 Puducherry 34 19 2011 5 1 2011 2011-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 19 Ariankuppam 1 PONDICHERRY
8335 Puducherry 34 19 2016 5 1 2016 2016-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 19 Ariankuppam 1 PONDICHERRY
8336 Puducherry 34 20 2011 5 1 2011 2011-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 20 Manavely 1 PONDICHERRY
8337 Puducherry 34 20 2016 5 1 2016 2016-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 20 Manavely 1 PONDICHERRY
8338 Puducherry 34 21 2011 5 1 2011 2011-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 21 Embalam (SC) 1 PONDICHERRY
8339 Puducherry 34 21 2016 5 1 2016 2016-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 21 Embalam (SC) 1 PONDICHERRY
8340 Puducherry 34 22 2011 5 1 2011 2011-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 22 Nettapakkam (SC) 1 PONDICHERRY
8341 Puducherry 34 22 2016 5 1 2016 2016-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 22 Nettapakkam (SC) 1 PONDICHERRY
8342 Puducherry 34 23 2011 5 1 2011 2011-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 23 Bahour 1 PONDICHERRY
8343 Puducherry 34 23 2016 5 1 2016 2016-05-01 1 34 PUDUCHERRY 2.0 PONDICHERRY 23 Bahour 1 PONDICHERRY
8344 Puducherry 34 24 2011 5 1 2011 2011-05-01 1 34 PUDUCHERRY 4.0 KARAIKAL 24 Nedungadu (SC) 1 PONDICHERRY
8345 Puducherry 34 24 2016 5 1 2016 2016-05-01 1 34 PUDUCHERRY 4.0 KARAIKAL 24 Nedungadu (SC) 1 PONDICHERRY
8346 Puducherry 34 25 2011 5 1 2011 2011-05-01 1 34 PUDUCHERRY 4.0 KARAIKAL 25 Thirunallar 1 PONDICHERRY
8347 Puducherry 34 25 2016 5 1 2016 2016-05-01 1 34 PUDUCHERRY 4.0 KARAIKAL 25 Thirunallar 1 PONDICHERRY
8348 Puducherry 34 26 2011 5 1 2011 2011-05-01 1 34 PUDUCHERRY 4.0 KARAIKAL 26 Karaikal North 1 PONDICHERRY
8349 Puducherry 34 26 2016 5 1 2016 2016-05-01 1 34 PUDUCHERRY 4.0 KARAIKAL 26 Karaikal North 1 PONDICHERRY
8350 Puducherry 34 27 2011 5 1 2011 2011-05-01 1 34 PUDUCHERRY 4.0 KARAIKAL 27 Karaikal South 1 PONDICHERRY
8351 Puducherry 34 27 2016 5 1 2016 2016-05-01 1 34 PUDUCHERRY 4.0 KARAIKAL 27 Karaikal South 1 PONDICHERRY
8352 Puducherry 34 28 2011 5 1 2011 2011-05-01 1 34 PUDUCHERRY 4.0 KARAIKAL 28 Neravy- T.R. Pattin 1 PONDICHERRY
8353 Puducherry 34 28 2016 5 1 2016 2016-05-01 1 34 PUDUCHERRY 4.0 KARAIKAL 28 Neravy- T.R. Pattin 1 PONDICHERRY
8354 Puducherry 34 29 2011 5 1 2011 2011-05-01 1 34 PUDUCHERRY 3.0 MAHE 29 Mahe 1 PONDICHERRY
8355 Puducherry 34 29 2016 5 1 2016 2016-05-01 1 34 PUDUCHERRY 3.0 MAHE 29 Mahe 1 PONDICHERRY
8356 Puducherry 34 30 2011 5 1 2011 2011-05-01 1 34 PUDUCHERRY 1.0 YANAM 30 Yanam 1 PONDICHERRY
8357 Puducherry 34 30 2016 5 1 2016 2016-05-01 1 34 PUDUCHERRY 1.0 YANAM 30 Yanam 1 PONDICHERRY

8358 rows × 17 columns

In [17]:
# Read geocoded Scrapped Data
geocoded_file = os.path.join(data_folder, 'geocoded.csv')
geocoded_df = pd.read_csv(geocoded_file)
geocoded_df['district_y'] = geocoded_df['district'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
geocoded_df['panchayat_y'] = geocoded_df['panchayat'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
geocoded_df
Out[17]:
ac_code district panchayat state state_code district_y panchayat_y
0 49.0 ARARIA ARARIA BASTI BIHAR 10.0 ARARIA ARARIABASTI
1 49.0 ARARIA BANGAMA BIHAR 10.0 ARARIA BANGAMA
2 49.0 ARARIA BANSBARI BIHAR 10.0 ARARIA BANSBARI
3 49.0 ARARIA BASANTPUR BIHAR 10.0 ARARIA BASANTPUR
4 49.0 ARARIA BATURBARI BIHAR 10.0 ARARIA BATURBARI
5 49.0 ARARIA BELWA BIHAR 10.0 ARARIA BELWA
6 49.0 ARARIA BOCHI BIHAR 10.0 ARARIA BOCHI
7 49.0 ARARIA CHATAR BIHAR 10.0 ARARIA CHATAR
8 49.0 ARARIA DIYARI BIHAR 10.0 ARARIA DIYARI
9 49.0 ARARIA GAIYARI BIHAR 10.0 ARARIA GAIYARI
10 49.0 ARARIA HAYATPUR BIHAR 10.0 ARARIA HAYATPUR
11 49.0 ARARIA JAMUA BIHAR 10.0 ARARIA JAMUA
12 49.0 ARARIA JHAMTA BIHAR 10.0 ARARIA JHAMTA
13 49.0 ARARIA KAMALDAHA BIHAR 10.0 ARARIA KAMALDAHA
14 51.0 ARARIA KAMALDAHA BIHAR 10.0 ARARIA KAMALDAHA
15 49.0 ARARIA KISMAT KHAWASPUR BIHAR 10.0 ARARIA KISMATKHAWASPUR
16 49.0 ARARIA PAIKTOLA BIHAR 10.0 ARARIA PAIKTOLA
17 49.0 ARARIA RAMPUR KODARKATTI BIHAR 10.0 ARARIA RAMPURKODARKATI
18 49.0 ARARIA SAHASMAL BIHAR 10.0 ARARIA SAHASMAL
19 49.0 ARARIA SHARANPUR BIHAR 10.0 ARARIA SHARANPUR
20 50.0 ARARIA BAGNAGAR BIHAR 10.0 ARARIA BAGNAGAR
21 50.0 ARARIA BARA ISTAMBARAR BIHAR 10.0 ARARIA BARAISTAMBARAR
22 50.0 ARARIA BHAGWANPUR BIHAR 10.0 ARARIA BHAGWANPUR
23 50.0 ARARIA BHUNA MAJGAMA BIHAR 10.0 ARARIA BHUNAMAJGAMA
24 50.0 ARARIA CHAKAI BIHAR 10.0 ARARIA CHAKAI
25 50.0 ARARIA CHIRAH BIHAR 10.0 ARARIA CHIRAH
26 50.0 ARARIA DUBBA BIHAR 10.0 ARARIA DUBA
27 50.0 ARARIA GIRDA BIHAR 10.0 ARARIA GIRDA
28 50.0 ARARIA HARDAR BIHAR 10.0 ARARIA HARDAR
29 50.0 ARARIA KAKAN BIHAR 10.0 ARARIA KAKAN
... ... ... ... ... ... ... ...
278624 7.0 SENAPATI MOLKONBUNG MANIPUR 14.0 SENAPATI MOLKONBUNG
278625 31.0 SENAPATI POIROU TANGKHUL MANIPUR 14.0 SENAPATI POIROUTANGKHUL
278626 9.0 GURDASPUR DHIR PUNJAB 3.0 GURDASPUR DHIR
278627 9.0 GURDASPUR LONGOWAL PUNJAB 3.0 GURDASPUR LONGOWAL
278628 9.0 PATHANKOT HABAT PINDI PUNJAB 3.0 PATHANKOT HABATPINDI
278629 8.0 GURDASPUR KOTLA BAJJA SINGH PUNJAB 3.0 GURDASPUR KOTLABAJASINGH
278630 8.0 GURDASPUR KHUNDI PUNJAB 3.0 GURDASPUR KHUNDI
278631 43.0 HOSHIARPUR CHEEMA PUNJAB 3.0 HOSHIARPUR CHEMA
278632 43.0 HOSHIARPUR DAGAN PUNJAB 3.0 HOSHIARPUR DAGAN
278633 43.0 HOSHIARPUR KOLPUR PUNJAB 3.0 HOSHIARPUR KOLPUR
278634 43.0 HOSHIARPUR ULAH PUNJAB 3.0 HOSHIARPUR ULAH
278635 43.0 HOSHIARPUR BAROTI PUNJAB 3.0 HOSHIARPUR BAROTI
278636 43.0 HOSHIARPUR BASSI GULAM HUSSAIN PUNJAB 3.0 HOSHIARPUR BASIGULAMHUSAIN
278637 43.0 HOSHIARPUR SAINCHAN PUNJAB 3.0 HOSHIARPUR SAINCHAN
278638 43.0 HOSHIARPUR SARAIN PUNJAB 3.0 HOSHIARPUR SARAIN
278639 43.0 HOSHIARPUR BAHADURPUR BAHIAN PUNJAB 3.0 HOSHIARPUR BAHADURPURBAHIAN
278640 43.0 HOSHIARPUR CHAK SADHU PUNJAB 3.0 HOSHIARPUR CHAKSADHU
278641 43.0 HOSHIARPUR CHOHAL PUNJAB 3.0 HOSHIARPUR CHOHAL
278642 43.0 HOSHIARPUR DADA PUNJAB 3.0 HOSHIARPUR DADA
278643 43.0 HOSHIARPUR HARMOYA PUNJAB 3.0 HOSHIARPUR HARMOYA
278644 43.0 HOSHIARPUR HUKRAN PUNJAB 3.0 HOSHIARPUR HUKRAN
278645 43.0 HOSHIARPUR NARI PUNJAB 3.0 HOSHIARPUR NARI
278646 43.0 HOSHIARPUR BHANGALA NEW PUNJAB 3.0 HOSHIARPUR BHANGALANEW
278647 43.0 HOSHIARPUR DEVI DAS PUNJAB 3.0 HOSHIARPUR DEVIDAS
278648 43.0 HOSHIARPUR BATWARA PUNJAB 3.0 HOSHIARPUR BATWARA
278649 43.0 HOSHIARPUR BEH JOGAN PUNJAB 3.0 HOSHIARPUR BEHJOGAN
278650 43.0 HOSHIARPUR BHAVNAUR PUNJAB 3.0 HOSHIARPUR BHAVNAUR
278651 60.0 LUDHIANA TAJPUR BET PUNJAB 3.0 LUDHIANA TAJPURBET
278652 60.0 LUDHIANA GHARKHAN PUNJAB 3.0 LUDHIANA GHARKHAN
278653 60.0 LUDHIANA LALLAURI KALAN PUNJAB 3.0 LUDHIANA LALAURIKALAN

278654 rows × 7 columns

In [18]:
# Read physical progress Data
physical_progress_folder = os.path.join(data_folder, "Physical Progress Reports", "output")
physical_progress_files = os.listdir(physical_progress_folder)
physical_progress_files = [os.path.join(physical_progress_folder, x)  for x in physical_progress_files if x.split('.')[-1] == 'csv'][:-1]
df_list = []
for file in physical_progress_files:
    df = pd.read_csv(file, usecols=[0, 7, 8, 9, 10, 11])
    df.columns = ['Panchayat Name', 'Total Houses completed', 'Financial Year', 'Scheme', 'State Name', 'District Name']
    df_list.append(df)
    print(file)
physical_df = pd.concat(df_list).reset_index()
df_list = None
physical_df = physical_df[physical_df['Panchayat Name'].notnull()]
physical_df['Panchayat Name'] = physical_df['Panchayat Name'].str.upper()
physical_df['Panchayat Name'] = physical_df['Panchayat Name'].str.replace('PANCHAYAT', '').str.replace('GP', '')
physical_df['District Name'] = physical_df['District Name'].str.upper()
physical_df['State Name'] = physical_df['State Name'].str.upper()
physical_df['dyear'] = physical_df['Financial Year'].str.split('-').str[0].fillna(0).astype(int)
physical_df['district_y'] = physical_df['District Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
physical_df['panchayat_y'] = physical_df['Panchayat Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
physical_df = pd.merge(physical_df, geocoded_df, how='left', on=['panchayat_y', 'district_y'])
physical_df = physical_df[physical_df['ac_code'].notnull()]
physical_df = physical_df[['Total Houses completed', 'Scheme', 'dyear', 'ac_code', 'state_code']]
physical_df = physical_df.groupby(['ac_code', 'state_code', 'dyear', 'Scheme']).sum().reset_index()
physical_df
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2010-2011_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2011-2012_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2012-2013_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2013-2014_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2014-2015_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2015-2016_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2016-2017_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Physical Progress Reports\output\2017-2019_Scrapped_Data.csv
Out[18]:
ac_code state_code dyear Scheme Total Houses completed
0 0.0 1.0 2011 ALL CENTRAL SCHEMES 0
1 0.0 1.0 2011 IAY NEW CONSTRUCTION 0
2 0.0 1.0 2012 ALL CENTRAL SCHEMES 2
3 0.0 1.0 2012 IAY NEW CONSTRUCTION 2
4 0.0 1.0 2012 UPGRADATION 0
5 0.0 1.0 2013 ALL CENTRAL SCHEMES 11
6 0.0 1.0 2013 IAY NEW CONSTRUCTION 11
7 0.0 1.0 2014 ALL CENTRAL SCHEMES 0
8 0.0 1.0 2014 IAY NEW CONSTRUCTION 0
9 0.0 1.0 2015 ALL CENTRAL SCHEMES 0
10 0.0 1.0 2015 IAY NEW CONSTRUCTION 0
11 0.0 1.0 2016 ALL CENTRAL SCHEMES 9
12 0.0 1.0 2017 ALL CENTRAL SCHEMES 56
13 0.0 1.0 2017 PRADHAN MANTRI AWAAS YOJANA GRAMIN 56
14 0.0 11.0 2014 ALL CENTRAL SCHEMES 22
15 0.0 11.0 2014 IAY NEW CONSTRUCTION 22
16 0.0 11.0 2015 ALL CENTRAL SCHEMES 4
17 0.0 11.0 2015 IAY NEW CONSTRUCTION 4
18 0.0 11.0 2016 ALL CENTRAL SCHEMES 8
19 0.0 11.0 2017 ALL CENTRAL SCHEMES 0
20 0.0 11.0 2017 PRADHAN MANTRI AWAAS YOJANA GRAMIN 0
21 0.0 24.0 2012 ALL CENTRAL SCHEMES 12
22 0.0 24.0 2012 IAY NEW CONSTRUCTION 12
23 0.0 24.0 2013 ALL CENTRAL SCHEMES 0
24 0.0 24.0 2013 IAY NEW CONSTRUCTION 0
25 0.0 24.0 2014 ALL CENTRAL SCHEMES 42
26 0.0 24.0 2014 IAY NEW CONSTRUCTION 42
27 0.0 24.0 2015 ALL CENTRAL SCHEMES 21
28 0.0 24.0 2015 IAY NEW CONSTRUCTION 21
29 0.0 24.0 2016 ALL CENTRAL SCHEMES 11
... ... ... ... ... ...
58139 402.0 9.0 2014 LOHIA GRAMIN AWAS YOJANA 0
58140 402.0 9.0 2015 ALL CENTRAL SCHEMES 1072
58141 402.0 9.0 2015 IAY NEW CONSTRUCTION 1072
58142 402.0 9.0 2016 ALL CENTRAL SCHEMES 2056
58143 402.0 9.0 2017 ALL CENTRAL SCHEMES 3272
58144 402.0 9.0 2017 PRADHAN MANTRI AWAAS YOJANA GRAMIN 3269
58145 403.0 9.0 2011 ALL CENTRAL SCHEMES 88
58146 403.0 9.0 2011 IAY NEW CONSTRUCTION 88
58147 403.0 9.0 2012 ALL CENTRAL SCHEMES 358
58148 403.0 9.0 2012 IAY NEW CONSTRUCTION 358
58149 403.0 9.0 2013 ALL CENTRAL SCHEMES 234
58150 403.0 9.0 2013 ALL STATE SCHEMES 0
58151 403.0 9.0 2013 IAY NEW CONSTRUCTION 234
58152 403.0 9.0 2013 LOHIA GRAMIN AWAS YOJANA 0
58153 403.0 9.0 2013 PRIYADARSHINI AWAAS YOJANA 0
58154 403.0 9.0 2014 ALL CENTRAL SCHEMES 1386
58155 403.0 9.0 2014 ALL STATE SCHEMES 0
58156 403.0 9.0 2014 IAY NEW CONSTRUCTION 1386
58157 403.0 9.0 2014 PRIYADARSHINI AWAAS YOJANA 0
58158 403.0 9.0 2015 ALL CENTRAL SCHEMES 1905
58159 403.0 9.0 2015 ALL STATE SCHEMES 0
58160 403.0 9.0 2015 IAY NEW CONSTRUCTION 1905
58161 403.0 9.0 2015 PRIYADARSHINI AWAAS YOJANA 0
58162 403.0 9.0 2016 ALL CENTRAL SCHEMES 3025
58163 403.0 9.0 2016 ALL STATE SCHEMES 0
58164 403.0 9.0 2016 PRIYADARSHINI AWAAS YOJANA 0
58165 403.0 9.0 2017 ALL CENTRAL SCHEMES 3637
58166 403.0 9.0 2017 ALL STATE SCHEMES 0
58167 403.0 9.0 2017 PRADHAN MANTRI AWAAS YOJANA GRAMIN 3632
58168 403.0 9.0 2017 PRIYADARSHINI AWAAS YOJANA 0

58169 rows × 5 columns

In [19]:
# Read All Central Scheme Financial progress Data
centr_folder = os.path.join(data_folder, "Financial Progress Reports", "All Central Scheme-done", "output")
centr_files = os.listdir(centr_folder)
centr_files = [os.path.join(centr_folder, x)  for x in centr_files if x.split('.')[-1] == 'csv'][:-1]
df_list = []
for file in centr_files:
    df = pd.read_csv(file)
    df_list.append(df)
    print(file)
centr_df = pd.concat(df_list).reset_index()
centr_df['Scheme'] = 'All Central Scheme'
centr_df['Panchayat Name'] = centr_df['A']
centr_df = centr_df[centr_df['Panchayat Name'].notnull()]
centr_df['Panchayat Name'] = centr_df['Panchayat Name'].str.upper()
centr_df['Panchayat Name'] = centr_df['Panchayat Name'].str.replace('PANCHAYAT', '').str.replace('GP', '')
centr_df['District Name'] = centr_df['District Name'].str.upper()
centr_df['State Name'] = centr_df['State Name'].str.upper()
centr_df['dyear'] = centr_df['Financial Year'].str.split('-').str[0].fillna(0).astype(int)
centr_df['district_y'] = centr_df['District Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
centr_df['panchayat_y'] = centr_df['Panchayat Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
centr_df = pd.merge(centr_df, geocoded_df, how='left', on=['panchayat_y', 'district_y'])
centr_df = centr_df[centr_df['ac_code'].notnull()]
centr_df = centr_df[['B', 'C', 'D', 'E', 'F=B+C+D+E', 'dyear', 'ac_code', 'state_code']]
centr_df.columns = ['centr_b', 'centr_c', 'centr_d', 'centr_e', 'centr_f', 'dyear', 'ac_code', 'state_code']
centr_df = centr_df.groupby(['ac_code', 'state_code', 'dyear']).sum().reset_index()
centr_df
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2010-2011_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2011-2012_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2012-2013_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2013-2014_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2014-2015_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2015-2016_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2016-2017_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\All Central Scheme-done\output\2017-2018_Scrapped_Data.csv
Out[19]:
ac_code state_code dyear centr_b centr_c centr_d centr_e centr_f
0 0.0 1.0 2010 0.000 0.000 0.000 0.285 0.285
1 0.0 1.0 2012 1.235 0.000 0.000 0.750 1.985
2 0.0 1.0 2013 4.500 5.063 0.000 0.500 10.063
3 0.0 1.0 2014 1.950 0.250 0.000 0.200 2.400
4 0.0 1.0 2015 5.450 6.388 0.938 1.150 13.926
5 0.0 1.0 2017 50.700 3.500 6.000 16.500 76.700
6 0.0 11.0 2014 0.000 0.000 85.100 0.000 85.100
7 0.0 11.0 2016 0.000 0.000 4.500 0.000 4.500
8 0.0 11.0 2017 0.000 2.000 1.500 0.000 3.500
9 0.0 24.0 2010 0.000 4.950 0.000 0.450 5.400
10 0.0 24.0 2012 0.000 0.720 0.720 0.000 1.440
11 0.0 24.0 2013 0.595 5.880 0.000 3.815 10.290
12 0.0 24.0 2014 0.805 3.920 1.925 16.485 23.135
13 0.0 24.0 2015 0.175 12.600 0.000 1.050 13.825
14 0.0 24.0 2016 0.525 0.000 0.525 0.700 1.750
15 0.0 24.0 2017 0.000 16.500 0.000 14.605 31.105
16 0.0 27.0 2013 0.000 16.080 308.530 0.000 324.610
17 0.0 27.0 2014 0.000 0.950 0.000 3.800 4.750
18 1.0 1.0 2013 0.000 7.900 0.000 7.200 15.100
19 1.0 1.0 2014 0.000 1.876 0.000 2.626 4.502
20 1.0 1.0 2015 0.000 2.700 0.000 5.054 7.754
21 1.0 1.0 2017 0.000 0.000 0.000 4.500 4.500
22 1.0 2.0 2012 0.540 0.485 0.000 0.266 1.291
23 1.0 2.0 2013 2.700 4.725 0.000 0.000 7.425
24 1.0 2.0 2014 1.575 1.200 0.000 0.000 2.775
25 1.0 2.0 2015 1.200 1.575 0.000 0.375 3.150
26 1.0 2.0 2016 2.975 0.750 0.000 2.975 6.700
27 1.0 2.0 2017 6.890 0.000 0.650 6.110 13.650
28 1.0 3.0 2017 10.440 0.000 0.000 12.780 23.220
29 1.0 5.0 2012 64.765 0.000 2.380 43.030 110.175
... ... ... ... ... ... ... ... ...
22761 400.0 9.0 2011 6.098 5.777 24.644 2.819 39.338
22762 400.0 9.0 2012 100.054 13.702 28.546 72.542 214.844
22763 400.0 9.0 2013 395.988 58.125 29.338 21.849 505.300
22764 400.0 9.0 2014 551.150 128.250 52.717 78.000 810.117
22765 400.0 9.0 2015 161.425 55.875 18.650 38.975 274.925
22766 400.0 9.0 2016 134.900 52.125 24.300 38.600 249.925
22767 400.0 9.0 2017 1807.150 469.175 203.275 499.295 2978.895
22768 401.0 9.0 2011 8.682 48.630 0.000 3.091 60.403
22769 401.0 9.0 2012 32.468 66.688 12.247 55.984 167.387
22770 401.0 9.0 2013 176.131 144.860 3.860 37.356 362.207
22771 401.0 9.0 2014 240.675 342.000 33.000 34.175 649.850
22772 401.0 9.0 2015 88.400 142.825 10.800 18.850 260.875
22773 401.0 9.0 2016 88.225 132.350 38.925 19.200 278.700
22774 401.0 9.0 2017 1314.155 550.160 290.940 391.885 2547.140
22775 402.0 9.0 2010 0.364 0.000 0.000 0.000 0.364
22776 402.0 9.0 2011 33.907 33.465 0.970 4.576 72.918
22777 402.0 9.0 2012 61.572 90.804 9.311 47.167 208.854
22778 402.0 9.0 2013 179.843 270.592 7.575 35.730 493.740
22779 402.0 9.0 2014 516.300 623.625 60.825 45.425 1246.175
22780 402.0 9.0 2015 274.950 283.500 11.050 32.550 602.050
22781 402.0 9.0 2016 214.475 279.750 16.625 26.925 537.775
22782 402.0 9.0 2017 2236.815 1627.620 75.895 615.180 4555.510
22783 403.0 9.0 2010 0.000 0.364 0.000 0.000 0.364
22784 403.0 9.0 2011 57.827 106.336 60.140 15.154 239.457
22785 403.0 9.0 2012 68.757 126.544 60.056 76.877 332.234
22786 403.0 9.0 2013 382.750 771.792 62.756 19.687 1236.985
22787 403.0 9.0 2014 581.425 1026.375 86.900 130.175 1824.875
22788 403.0 9.0 2015 221.025 462.375 17.375 77.675 778.450
22789 403.0 9.0 2016 205.075 427.500 10.275 45.675 688.525
22790 403.0 9.0 2017 1736.045 2109.270 317.445 824.650 4987.410

22791 rows × 8 columns

In [20]:
# Read All IAY New Construction Data
iay_folder = os.path.join(data_folder, "Financial Progress Reports", "IAY New Construction -done", "output")
iay_files = os.listdir(iay_folder)
iay_files = [os.path.join(iay_folder, x)  for x in iay_files if x.split('.')[-1] == 'csv'][:-1]
df_list = []
for file in iay_files:
    df = pd.read_csv(file)
    df_list.append(df)
    print(file)
iay_df = pd.concat(df_list).reset_index()
iay_df['Scheme'] = 'IAY New Construction'
iay_df = iay_df[iay_df.columns[1:-1]]
iay_df['Panchayat Name'] = iay_df['A']
iay_df = iay_df[iay_df.columns[1:]]
iay_df = iay_df[iay_df['Panchayat Name'].notnull()]
iay_df['Panchayat Name'] = iay_df['Panchayat Name'].str.upper()
iay_df['Panchayat Name'] = iay_df['Panchayat Name'].str.replace('PANCHAYAT', '').str.replace('GP', '')
iay_df['District Name'] = iay_df['District Name'].str.upper()
iay_df['State Name'] = iay_df['State Name'].str.upper()
iay_df['dyear'] = iay_df['Financial Year'].str.split('-').str[0].fillna(0).astype(int)
iay_df['district_y'] = iay_df['District Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
iay_df['panchayat_y'] = iay_df['Panchayat Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
iay_df = pd.merge(iay_df, geocoded_df, how='left', on=['panchayat_y', 'district_y'])
iay_df = iay_df[iay_df['ac_code'].notnull()]
iay_df = iay_df[['B', 'C', 'D', 'E', 'F=B+C+D+E', 'dyear', 'ac_code', 'state_code']]
iay_df.columns = ['iay_b', 'iay_c', 'iay_d', 'iay_e', 'iay_f', 'dyear', 'ac_code', 'state_code']
iay_df = iay_df.groupby(['ac_code', 'state_code', 'dyear']).sum().reset_index()
iay_df
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\IAY New Construction -done\output\2010-2011_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\IAY New Construction -done\output\2011-2012_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\IAY New Construction -done\output\2012-2013_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\IAY New Construction -done\output\2013-2014_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\IAY New Construction -done\output\2014-2015_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\IAY New Construction -done\output\2015-2016_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\IAY New Construction -done\output\2016-2017_Scrapped_Data.csv
C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\ipykernel_launcher.py:10: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  # Remove the CWD from sys.path while we load stuff.
Out[20]:
ac_code state_code dyear iay_b iay_c iay_d iay_e iay_f
0 0.0 1.0 2010 0.000 0.000 0.000 0.285 0.285
1 0.0 1.0 2012 1.235 0.000 0.000 0.750 1.985
2 0.0 1.0 2013 4.500 5.063 0.000 0.500 10.063
3 0.0 1.0 2014 1.950 0.250 0.000 0.200 2.400
4 0.0 1.0 2015 5.450 6.388 0.938 1.150 13.926
5 0.0 11.0 2014 0.000 0.000 85.100 0.000 85.100
6 0.0 11.0 2016 0.000 0.000 4.500 0.000 4.500
7 0.0 24.0 2010 0.000 4.950 0.000 0.450 5.400
8 0.0 24.0 2012 0.000 0.720 0.720 0.000 1.440
9 0.0 24.0 2013 0.595 5.880 0.000 3.815 10.290
10 0.0 24.0 2014 0.805 3.920 1.925 16.485 23.135
11 0.0 24.0 2015 0.175 12.600 0.000 1.050 13.825
12 0.0 24.0 2016 0.525 0.000 0.525 0.700 1.750
13 0.0 27.0 2013 0.000 16.080 308.530 0.000 324.610
14 0.0 27.0 2014 0.000 0.950 0.000 3.800 4.750
15 1.0 1.0 2013 0.000 7.900 0.000 7.200 15.100
16 1.0 1.0 2014 0.000 1.876 0.000 2.626 4.502
17 1.0 1.0 2015 0.000 2.700 0.000 5.054 7.754
18 1.0 2.0 2012 0.540 0.485 0.000 0.266 1.291
19 1.0 2.0 2013 2.700 4.725 0.000 0.000 7.425
20 1.0 2.0 2014 1.575 1.200 0.000 0.000 2.775
21 1.0 2.0 2015 1.200 1.575 0.000 0.375 3.150
22 1.0 2.0 2016 0.375 0.750 0.000 0.375 1.500
23 1.0 5.0 2012 64.765 0.000 2.380 43.030 110.175
24 1.0 5.0 2013 21.083 0.000 0.045 9.332 30.460
25 1.0 5.0 2014 56.438 1.576 0.750 8.321 67.085
26 1.0 5.0 2015 34.650 0.000 0.000 13.050 47.700
27 1.0 5.0 2016 18.141 0.112 0.000 9.258 27.511
28 1.0 6.0 2010 0.203 0.000 0.000 0.000 0.203
29 1.0 6.0 2011 19.390 0.000 1.961 11.497 32.848
... ... ... ... ... ... ... ... ...
19027 399.0 9.0 2013 437.749 3.375 15.519 32.762 489.405
19028 399.0 9.0 2014 308.025 2.625 60.200 47.975 418.825
19029 399.0 9.0 2015 555.700 20.250 76.175 28.450 680.575
19030 399.0 9.0 2016 331.125 18.750 27.600 25.100 402.575
19031 400.0 9.0 2011 6.098 5.777 24.644 2.819 39.338
19032 400.0 9.0 2012 100.054 13.702 28.546 72.542 214.844
19033 400.0 9.0 2013 395.988 58.125 29.338 21.849 505.300
19034 400.0 9.0 2014 551.150 128.250 52.717 78.000 810.117
19035 400.0 9.0 2015 161.425 55.875 18.650 38.975 274.925
19036 400.0 9.0 2016 134.900 52.125 24.300 38.600 249.925
19037 401.0 9.0 2011 8.682 48.630 0.000 3.091 60.403
19038 401.0 9.0 2012 32.468 66.203 12.247 55.984 166.902
19039 401.0 9.0 2013 176.131 144.860 3.860 37.356 362.207
19040 401.0 9.0 2014 240.675 342.000 33.000 34.175 649.850
19041 401.0 9.0 2015 88.400 142.825 10.800 18.850 260.875
19042 401.0 9.0 2016 87.825 131.950 38.925 19.200 277.900
19043 402.0 9.0 2010 0.364 0.000 0.000 0.000 0.364
19044 402.0 9.0 2011 33.907 33.465 0.970 4.576 72.918
19045 402.0 9.0 2012 61.572 90.804 9.311 47.167 208.854
19046 402.0 9.0 2013 179.843 270.592 7.575 35.730 493.740
19047 402.0 9.0 2014 516.300 623.625 60.825 45.425 1246.175
19048 402.0 9.0 2015 274.950 283.500 11.050 32.550 602.050
19049 402.0 9.0 2016 214.475 279.750 16.625 26.925 537.775
19050 403.0 9.0 2010 0.000 0.364 0.000 0.000 0.364
19051 403.0 9.0 2011 57.827 106.336 60.140 15.154 239.457
19052 403.0 9.0 2012 68.757 126.544 60.056 76.877 332.234
19053 403.0 9.0 2013 382.750 771.792 62.756 19.687 1236.985
19054 403.0 9.0 2014 581.425 1026.375 86.900 130.175 1824.875
19055 403.0 9.0 2015 221.025 462.375 17.375 77.675 778.450
19056 403.0 9.0 2016 205.075 427.500 10.275 45.675 688.525

19057 rows × 8 columns

In [21]:
# Read All PMAY New Construction Data
pmay_folder = os.path.join(data_folder, "Financial Progress Reports", "Pradhan Mantri Awaas Yojana -done")
pmay_files = os.listdir(pmay_folder)
pmay_files = [os.path.join(pmay_folder, x)  for x in pmay_files if x.split('.')[-1] == 'csv'][:-1]
df_list = []
for file in pmay_files:
    df = pd.read_csv(file)
    df_list.append(df)
    print(file)
pmay_df = pd.concat(df_list).reset_index()
pmay_df['Scheme'] = 'Pradhan Mantri Awaas Yojana'
pmay_df = pmay_df[pmay_df.columns[1:-1]]
pmay_df['Panchayat Name'] = pmay_df['A']
pmay_df = pmay_df[pmay_df.columns[1:]]
pmay_df = pmay_df[pmay_df['Panchayat Name'].notnull()]
pmay_df['Panchayat Name'] = pmay_df['Panchayat Name'].str.upper()
pmay_df['Panchayat Name'] = pmay_df['Panchayat Name'].str.replace('PANCHAYAT', '').str.replace('GP', '')
pmay_df['District Name'] = pmay_df['District Name'].str.upper()
pmay_df['State Name'] = pmay_df['State Name'].str.upper()
pmay_df['dyear'] = pmay_df['Financial Year'].str.split('-').str[0].fillna(0).astype(int)
pmay_df['district_y'] = pmay_df['District Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
pmay_df['panchayat_y'] = pmay_df['Panchayat Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
pmay_df = pd.merge(pmay_df, geocoded_df, how='left', on=['panchayat_y', 'district_y'])
pmay_df = pmay_df[pmay_df['ac_code'].notnull()]
pmay_df = pmay_df[['B', 'C', 'D', 'E', 'F=B+C+D+E', 'dyear', 'ac_code', 'state_code']]
pmay_df.columns = ['pmay_b', 'pmay_c', 'pmay_d', 'pmay_e', 'pmay_f', 'dyear', 'ac_code', 'state_code']
pmay_df = pmay_df.groupby(['ac_code', 'state_code', 'dyear']).sum().reset_index()
pmay_df
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\Pradhan Mantri Awaas Yojana -done\2016-2017_Scrapped_Data.csv
E:\workspace\sourav sarkar\Task 13 - Prepare Awas Yojana dataset at assembly constituency level\data\Financial Progress Reports\Pradhan Mantri Awaas Yojana -done\2017-2018_Scrapped_Data.csv
C:\Users\sandyjones\AppData\Local\conda\conda\envs\geo\lib\site-packages\ipykernel_launcher.py:10: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  # Remove the CWD from sys.path while we load stuff.
Out[21]:
ac_code state_code dyear pmay_b pmay_c pmay_d pmay_e pmay_f
0 0.0 1.0 2017 50.700 3.500 6.00 16.500 76.700
1 0.0 11.0 2017 0.000 2.000 0.00 0.000 2.000
2 0.0 24.0 2017 0.000 16.500 0.00 14.500 31.000
3 1.0 1.0 2017 0.000 0.000 0.00 4.500 4.500
4 1.0 2.0 2016 2.600 0.000 0.00 2.600 5.200
5 1.0 2.0 2017 6.890 0.000 0.65 6.110 13.650
6 1.0 3.0 2017 10.440 0.000 0.00 12.780 23.220
7 1.0 5.0 2016 125.400 0.600 0.00 1.200 127.200
8 1.0 5.0 2017 187.000 0.700 11.40 9.200 208.300
9 1.0 6.0 2016 7.200 0.000 1.35 25.650 34.200
10 1.0 6.0 2017 22.320 0.000 2.46 46.800 71.580
11 1.0 8.0 2016 85732.500 207.000 7038.00 35431.500 128409.000
12 1.0 8.0 2017 1069204.200 8576.700 80385.00 405540.600 1563706.500
13 1.0 9.0 2016 2.800 0.000 11.20 1.600 15.600
14 1.0 9.0 2017 70.200 0.000 153.10 36.100 259.400
15 1.0 10.0 2017 761.350 1252.500 417.15 800.150 3231.150
16 1.0 11.0 2017 8.800 70.600 6.50 66.700 152.600
17 1.0 14.0 2017 19.380 18.780 58.44 294.300 390.900
18 1.0 15.0 2017 0.000 58.630 0.00 0.000 58.630
19 1.0 16.0 2016 0.970 7.269 0.00 2.423 10.662
20 1.0 16.0 2017 31.415 145.905 0.00 63.775 241.095
21 1.0 17.0 2017 0.000 155.610 0.00 0.000 155.610
22 1.0 18.0 2017 1041.950 215.475 316.42 530.400 2104.245
23 1.0 19.0 2016 403.600 8.400 234.00 33.600 679.600
24 1.0 19.0 2017 2338.800 40.600 1379.50 185.600 3944.500
25 1.0 20.0 2016 21.240 52.320 100.14 181.260 354.960
26 1.0 20.0 2017 309.000 689.220 1880.52 2163.420 5042.160
27 1.0 21.0 2016 40.200 54.140 0.00 28.100 122.440
28 1.0 21.0 2017 1706.750 2634.100 1.70 2082.810 6425.360
29 1.0 22.0 2016 73.200 560.560 3.56 189.520 826.840
... ... ... ... ... ... ... ... ...
5574 386.0 9.0 2016 1.200 0.000 0.00 0.000 1.200
5575 386.0 9.0 2017 1043.200 34.700 50.50 499.300 1627.700
5576 387.0 9.0 2016 5.280 0.000 0.00 3.960 9.240
5577 387.0 9.0 2017 417.500 21.600 21.40 366.040 826.540
5578 388.0 9.0 2017 1036.200 23.700 87.70 455.900 1603.500
5579 389.0 9.0 2017 64.500 1.200 0.00 23.900 89.600
5580 390.0 9.0 2016 0.800 0.000 0.00 0.400 1.200
5581 390.0 9.0 2017 149.800 0.000 10.80 115.500 276.100
5582 391.0 9.0 2016 2.200 0.000 0.00 0.000 2.200
5583 391.0 9.0 2017 564.400 23.100 44.90 437.000 1069.400
5584 392.0 9.0 2016 8.400 0.000 3.20 1.200 12.800
5585 392.0 9.0 2017 2373.800 3.700 267.90 856.640 3502.040
5586 393.0 9.0 2016 0.800 0.000 0.00 0.000 0.800
5587 393.0 9.0 2017 3901.700 0.000 229.20 1500.800 5631.700
5588 394.0 9.0 2017 1254.100 0.000 82.30 542.700 1879.100
5589 395.0 9.0 2016 293.480 2.200 16.28 45.320 357.280
5590 395.0 9.0 2017 7242.450 47.680 918.96 1843.180 10052.270
5591 396.0 9.0 2016 33.000 0.000 0.00 0.000 33.000
5592 396.0 9.0 2017 794.160 13.000 75.90 337.400 1220.460
5593 397.0 9.0 2016 284.240 3.960 28.80 45.980 362.980
5594 397.0 9.0 2017 6752.330 131.080 686.80 2520.300 10090.510
5595 398.0 9.0 2016 205.400 3.960 26.40 85.360 321.120
5596 398.0 9.0 2017 2876.150 54.360 514.54 1949.440 5394.490
5597 399.0 9.0 2016 123.000 7.040 13.00 7.480 150.520
5598 399.0 9.0 2017 2534.380 84.540 200.74 590.280 3409.940
5599 400.0 9.0 2017 1803.400 468.800 202.90 498.920 2974.020
5600 401.0 9.0 2016 0.400 0.400 0.00 0.000 0.800
5601 401.0 9.0 2017 1313.030 541.160 290.94 390.110 2535.240
5602 402.0 9.0 2017 2230.840 1622.370 75.52 613.680 4542.410
5603 403.0 9.0 2017 1732.670 2096.520 316.32 824.650 4970.160

5604 rows × 8 columns

In [22]:
variables = [
    'tot_houses',
    'centr_houses',
    'state_houses',
    'centr_b',
    'centr_c',
    'centr_d',
    'centr_e',
    'centr_f',
    'iay_b',
    'iay_c',
    'iay_d',
    'iay_e',
    'iay_f',
    'pmay_b',
    'pmay_c',
    'pmay_d',
    'pmay_e',
    'pmay_f'
]
#Get all variables
def get_ac_variables(row):
    #print(pd.DataFrame(row).T)
    next_elect = acdf[(acdf['state_code'] == row['state_code']) & (acdf['constituency_no'] == row['constituency_no']) & (acdf['datetime'] > row['datetime'])][0:1]
    if len(next_elect) < 1:
        next_elect = pd.DataFrame(row).T
        next_elect['dyear'] = next_elect['dyear'] + 5
        next_elect['datetime'] = next_elect['datetime'] + pd.DateOffset(years=5)
    #print(next_elect)
    try:
        df1 = physical_df[(physical_df['state_code'] == row['state_code']) & (physical_df['ac_code'] == row['constituency_no']) & (physical_df['dyear'] >= row['dyear']) & (physical_df['dyear'] < next_elect['dyear'].values[0])]
        tot_houses = df1['Total Houses completed'].sum()
        centr_houses = df1[df1['Scheme'] == 'ALL CENTRAL SCHEMES']['Total Houses completed'].sum()
        state_houses = df1[df1['Scheme'] == 'ALL STATE SCHEMES']['Total Houses completed'].sum()
        df2 = centr_df[(centr_df['state_code'] == row['state_code']) & (centr_df['ac_code'] == row['constituency_no']) & (centr_df['dyear'] >= row['dyear']) & (centr_df['dyear'] < next_elect['dyear'].values[0])]
        centr_b = df2['centr_b'].sum()
        centr_c = df2['centr_c'].sum() 
        centr_d = df2['centr_d'].sum() 
        centr_e = df2['centr_e'].sum() 
        centr_f = df2['centr_f'].sum() 
        df3 = iay_df[(iay_df['state_code'] == row['state_code']) & (iay_df['ac_code'] == row['constituency_no']) & (iay_df['dyear'] >= row['dyear']) & (iay_df['dyear'] < next_elect['dyear'].values[0])]
        iay_b = df3['iay_b'].sum()
        iay_c = df3['iay_c'].sum() 
        iay_d = df3['iay_d'].sum() 
        iay_e = df3['iay_e'].sum() 
        iay_f = df3['iay_f'].sum()
        df4 = pmay_df[(pmay_df['state_code'] == row['state_code']) & (pmay_df['ac_code'] == row['constituency_no']) & (pmay_df['dyear'] >= row['dyear']) & (pmay_df['dyear'] < next_elect['dyear'].values[0])]
        pmay_b = df4['pmay_b'].sum()
        pmay_c = df4['pmay_c'].sum() 
        pmay_d = df4['pmay_d'].sum() 
        pmay_e = df4['pmay_e'].sum() 
        pmay_f = df4['pmay_f'].sum() 
        #Return Now
        return tot_houses,centr_houses,state_houses,centr_b,centr_c,centr_d,centr_e,centr_f,iay_b,iay_c,iay_d,iay_e,iay_f,pmay_b,pmay_c,pmay_d,pmay_e,pmay_f
    except:
        return 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    return 1
#Apply This Function
acdf[variables] = acdf.apply(get_ac_variables, axis=1, result_type='expand')
acdf
Out[22]:
state_name state_code constituency_no year month day dyear datetime OBJECTID ST_CODE ... iay_b iay_c iay_d iay_e iay_f pmay_b pmay_c pmay_d pmay_e pmay_f
0 Jammu & Kashmir 1 1 2008 12 1 2009 2008-12-01 1 1 ... 0.000 9.776 0.000 9.826 19.602 0.000 0.0 0.000 0.000 0.000
1 Jammu & Kashmir 1 1 2014 12 1 2015 2014-12-01 1 1 ... 0.000 2.700 0.000 5.054 7.754 0.000 0.0 0.000 4.500 4.500
2 Jammu & Kashmir 1 2 2008 12 1 2009 2008-12-01 1 1 ... 1.499 79.563 3.625 50.878 135.565 0.000 0.0 0.000 0.000 0.000
3 Jammu & Kashmir 1 2 2014 12 1 2015 2014-12-01 1 1 ... 0.562 23.076 0.000 23.844 47.482 4.000 65.0 0.000 177.100 246.100
4 Jammu & Kashmir 1 3 2008 12 1 2009 2008-12-01 1 1 ... 0.000 31.487 0.500 8.053 40.040 0.000 0.0 0.000 0.000 0.000
5 Jammu & Kashmir 1 3 2014 12 1 2015 2014-12-01 1 1 ... 0.000 2.600 0.000 3.090 5.690 0.000 5.5 2.000 22.000 29.500
6 Jammu & Kashmir 1 4 2008 12 1 2009 2008-12-01 1 1 ... 0.750 65.382 2.750 81.079 149.961 0.000 0.0 0.000 0.000 0.000
7 Jammu & Kashmir 1 4 2014 12 1 2015 2014-12-01 1 1 ... 0.188 12.291 0.000 30.302 42.781 0.000 42.5 0.000 223.500 266.000
8 Jammu & Kashmir 1 5 2008 12 1 2009 2008-12-01 1 1 ... 0.000 7.801 0.000 22.160 29.961 0.000 0.0 0.000 0.000 0.000
9 Jammu & Kashmir 1 5 2014 12 1 2015 2014-12-01 1 1 ... 0.000 0.938 0.000 8.067 9.005 0.000 5.5 0.000 75.000 80.500
10 Jammu & Kashmir 1 6 2008 12 1 2009 2008-12-01 1 1 ... 0.000 4.552 0.000 28.435 32.987 0.000 0.0 0.000 0.000 0.000
11 Jammu & Kashmir 1 6 2014 12 1 2015 2014-12-01 1 1 ... 0.000 14.237 0.000 22.886 37.123 0.000 2.3 0.000 19.700 22.000
12 Jammu & Kashmir 1 7 2008 12 1 2009 2008-12-01 1 1 ... 1.124 6.935 0.438 27.061 35.558 0.000 0.0 0.000 0.000 0.000
13 Jammu & Kashmir 1 7 2014 12 1 2015 2014-12-01 1 1 ... 0.000 9.136 1.438 50.701 61.275 0.000 0.0 0.000 42.000 42.000
14 Jammu & Kashmir 1 8 2008 12 1 2009 2008-12-01 1 1 ... 0.000 9.625 0.000 35.216 44.841 0.000 0.0 0.000 0.000 0.000
15 Jammu & Kashmir 1 8 2014 12 1 2015 2014-12-01 1 1 ... 0.500 14.350 0.000 39.820 54.670 0.000 0.0 0.000 41.500 41.500
16 Jammu & Kashmir 1 9 2008 12 1 2009 2008-12-01 1 1 ... 0.000 22.686 0.000 0.000 22.686 0.000 0.0 0.000 0.000 0.000
17 Jammu & Kashmir 1 9 2014 12 1 2015 2014-12-01 1 1 ... 0.000 18.938 0.000 0.000 18.938 0.000 5.5 0.000 0.500 6.000
18 Jammu & Kashmir 1 10 2008 12 1 2009 2008-12-01 1 1 ... 0.000 16.451 0.000 80.617 97.068 0.000 0.0 0.000 0.000 0.000
19 Jammu & Kashmir 1 10 2014 12 1 2015 2014-12-01 1 1 ... 0.000 68.805 0.000 25.805 94.610 0.000 53.5 0.000 59.500 113.000
20 Jammu & Kashmir 1 11 2008 12 1 2009 2008-12-01 1 1 ... 1.250 4.750 0.000 20.091 26.091 0.000 0.0 0.000 0.000 0.000
21 Jammu & Kashmir 1 11 2014 12 1 2015 2014-12-01 1 1 ... 1.250 9.960 0.000 19.674 30.884 0.000 1.0 0.000 35.000 36.000
22 Jammu & Kashmir 1 12 2008 12 1 2009 2008-12-01 1 1 ... 0.000 0.000 0.250 13.164 13.414 0.000 0.0 0.000 0.000 0.000
23 Jammu & Kashmir 1 12 2014 12 1 2015 2014-12-01 1 1 ... 0.000 3.000 0.000 39.832 42.832 0.000 0.0 0.000 15.200 15.200
24 Jammu & Kashmir 1 13 2008 12 1 2009 2008-12-01 1 1 ... 3.130 67.961 1.200 137.428 209.719 0.000 0.0 0.000 0.000 0.000
25 Jammu & Kashmir 1 13 2014 12 1 2015 2014-12-01 1 1 ... 0.000 75.537 1.638 141.817 218.992 0.000 11.8 1.500 79.700 93.000
26 Jammu & Kashmir 1 14 2008 12 1 2009 2008-12-01 1 1 ... 1.000 6.075 1.409 83.041 91.525 0.000 0.0 0.000 0.000 0.000
27 Jammu & Kashmir 1 14 2014 12 1 2015 2014-12-01 1 1 ... 0.250 14.150 0.000 91.704 106.104 0.000 3.0 0.000 57.500 60.500
28 Jammu & Kashmir 1 15 2008 12 1 2009 2008-12-01 1 1 ... 0.985 0.000 1.710 35.374 38.069 0.000 0.0 0.000 0.000 0.000
29 Jammu & Kashmir 1 15 2014 12 1 2015 2014-12-01 1 1 ... 0.000 0.000 0.250 41.290 41.540 0.500 0.0 0.000 11.500 12.000
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8328 Puducherry 34 16 2011 5 1 2011 2011-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8329 Puducherry 34 16 2016 5 1 2016 2016-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8330 Puducherry 34 17 2011 5 1 2011 2011-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8331 Puducherry 34 17 2016 5 1 2016 2016-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8332 Puducherry 34 18 2011 5 1 2011 2011-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8333 Puducherry 34 18 2016 5 1 2016 2016-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8334 Puducherry 34 19 2011 5 1 2011 2011-05-01 1 34 ... 17.996 0.000 0.430 3.639 22.065 0.000 0.0 0.000 0.000 0.000
8335 Puducherry 34 19 2016 5 1 2016 2016-05-01 1 34 ... 1.287 0.000 0.000 0.000 1.287 18.934 0.0 0.000 1.301 20.235
8336 Puducherry 34 20 2011 5 1 2011 2011-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8337 Puducherry 34 20 2016 5 1 2016 2016-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8338 Puducherry 34 21 2011 5 1 2011 2011-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8339 Puducherry 34 21 2016 5 1 2016 2016-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8340 Puducherry 34 22 2011 5 1 2011 2011-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8341 Puducherry 34 22 2016 5 1 2016 2016-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8342 Puducherry 34 23 2011 5 1 2011 2011-05-01 1 34 ... 17.793 0.000 0.000 2.010 19.803 0.000 0.0 0.000 0.000 0.000
8343 Puducherry 34 23 2016 5 1 2016 2016-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 2.610 0.0 0.000 0.000 2.610
8344 Puducherry 34 24 2011 5 1 2011 2011-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8345 Puducherry 34 24 2016 5 1 2016 2016-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8346 Puducherry 34 25 2011 5 1 2011 2011-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8347 Puducherry 34 25 2016 5 1 2016 2016-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8348 Puducherry 34 26 2011 5 1 2011 2011-05-01 1 34 ... 0.000 0.450 0.450 0.000 0.900 0.000 0.0 0.000 0.000 0.000
8349 Puducherry 34 26 2016 5 1 2016 2016-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8350 Puducherry 34 27 2011 5 1 2011 2011-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8351 Puducherry 34 27 2016 5 1 2016 2016-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8352 Puducherry 34 28 2011 5 1 2011 2011-05-01 1 34 ... 15.108 0.000 0.910 1.949 17.967 0.000 0.0 0.000 0.000 0.000
8353 Puducherry 34 28 2016 5 1 2016 2016-05-01 1 34 ... 0.540 0.000 0.000 0.000 0.540 21.480 0.0 0.794 6.714 28.988
8354 Puducherry 34 29 2011 5 1 2011 2011-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8355 Puducherry 34 29 2016 5 1 2016 2016-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8356 Puducherry 34 30 2011 5 1 2011 2011-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000
8357 Puducherry 34 30 2016 5 1 2016 2016-05-01 1 34 ... 0.000 0.000 0.000 0.000 0.000 0.000 0.0 0.000 0.000 0.000

8358 rows × 35 columns

In [23]:
# Output to CSV
acdf.to_csv(os.path.join(output_folder, "Assembly_Constituencies_Variables_Categories.csv"), encoding='utf-8', index=False)