#Import Libraries
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
import json
import requests
import math
import os
import time
import datetime
import itertools
# Import folders
data_folder = os.path.abspath('data')
# Set Output Folder
output_folder = os.path.abspath("output")
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# Helping Functions
def remove_consecutive_duplicates(x):
return ''.join(i for i, _ in itertools.groupby(x))
# Read Constituency Data
ac_gdf = None
ac_filepath = os.path.join(data_folder, "AC", "India_AC.shp")
ac_gdf = gpd.read_file(ac_filepath)
ac_gdf
# Read Constituency Data
ac_file = os.path.join(data_folder, "General_Later_Ashoka_alldata.csv")
acdf = pd.read_csv(ac_file)
acdf['state_name'] = acdf['state_name'].str.replace("_", " ")
#acdf = acdf[['state_name', 'constituency_no', 'constituency_name', 'year', 'month']]
acdf = acdf.drop_duplicates(subset=None, keep="first", inplace=False).reset_index(drop=True)
acdf.loc[acdf['newstate_code'] == 36, 'newstate_code'] = 28 #Telangana Fix
acdf = acdf[['state_name', 'state_code', 'constituency_no', 'year', 'month']]
acdf['day'] = 1
acdf['dyear'] = 0
acdf.loc[(acdf['month'] > 9) | (acdf['year'] == 2008), 'dyear'] = 1
acdf['dyear'] = acdf['year'] + acdf['dyear']
acdf['datetime'] = pd.to_datetime(acdf[['year', 'month', 'day']])
acdf = acdf.drop_duplicates().reset_index(drop=True)
acdf = pd.merge(acdf, ac_gdf, how='inner', left_on=['state_code', 'constituency_no'], right_on = ['ST_CODE','AC_NO'])#[['ST_CODE', 'ST_NAME','DT_CODE', 'DIST_NAME', 'AC_NO', 'AC_NAME', 'PC_NO', 'PC_NAME']]
acdf = acdf[acdf.columns[:-5]]
acdf
# Read geocoded Scrapped Data
geocoded_file = os.path.join(data_folder, 'geocoded.csv')
geocoded_df = pd.read_csv(geocoded_file)
geocoded_df['district_y'] = geocoded_df['district'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
geocoded_df['panchayat_y'] = geocoded_df['panchayat'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
geocoded_df
# Read physical progress Data
physical_progress_folder = os.path.join(data_folder, "Physical Progress Reports", "output")
physical_progress_files = os.listdir(physical_progress_folder)
physical_progress_files = [os.path.join(physical_progress_folder, x) for x in physical_progress_files if x.split('.')[-1] == 'csv'][:-1]
df_list = []
for file in physical_progress_files:
df = pd.read_csv(file, usecols=[0, 7, 8, 9, 10, 11])
df.columns = ['Panchayat Name', 'Total Houses completed', 'Financial Year', 'Scheme', 'State Name', 'District Name']
df_list.append(df)
print(file)
physical_df = pd.concat(df_list).reset_index()
df_list = None
physical_df = physical_df[physical_df['Panchayat Name'].notnull()]
physical_df['Panchayat Name'] = physical_df['Panchayat Name'].str.upper()
physical_df['Panchayat Name'] = physical_df['Panchayat Name'].str.replace('PANCHAYAT', '').str.replace('GP', '')
physical_df['District Name'] = physical_df['District Name'].str.upper()
physical_df['State Name'] = physical_df['State Name'].str.upper()
physical_df['dyear'] = physical_df['Financial Year'].str.split('-').str[0].fillna(0).astype(int)
physical_df['district_y'] = physical_df['District Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
physical_df['panchayat_y'] = physical_df['Panchayat Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
physical_df = pd.merge(physical_df, geocoded_df, how='left', on=['panchayat_y', 'district_y'])
physical_df = physical_df[physical_df['ac_code'].notnull()]
physical_df = physical_df[['Total Houses completed', 'Scheme', 'dyear', 'ac_code', 'state_code']]
physical_df = physical_df.groupby(['ac_code', 'state_code', 'dyear', 'Scheme']).sum().reset_index()
physical_df
# Read All Central Scheme Financial progress Data
centr_folder = os.path.join(data_folder, "Financial Progress Reports", "All Central Scheme-done", "output")
centr_files = os.listdir(centr_folder)
centr_files = [os.path.join(centr_folder, x) for x in centr_files if x.split('.')[-1] == 'csv'][:-1]
df_list = []
for file in centr_files:
df = pd.read_csv(file)
df_list.append(df)
print(file)
centr_df = pd.concat(df_list).reset_index()
centr_df['Scheme'] = 'All Central Scheme'
centr_df['Panchayat Name'] = centr_df['A']
centr_df = centr_df[centr_df['Panchayat Name'].notnull()]
centr_df['Panchayat Name'] = centr_df['Panchayat Name'].str.upper()
centr_df['Panchayat Name'] = centr_df['Panchayat Name'].str.replace('PANCHAYAT', '').str.replace('GP', '')
centr_df['District Name'] = centr_df['District Name'].str.upper()
centr_df['State Name'] = centr_df['State Name'].str.upper()
centr_df['dyear'] = centr_df['Financial Year'].str.split('-').str[0].fillna(0).astype(int)
centr_df['district_y'] = centr_df['District Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
centr_df['panchayat_y'] = centr_df['Panchayat Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
centr_df = pd.merge(centr_df, geocoded_df, how='left', on=['panchayat_y', 'district_y'])
centr_df = centr_df[centr_df['ac_code'].notnull()]
centr_df = centr_df[['B', 'C', 'D', 'E', 'F=B+C+D+E', 'dyear', 'ac_code', 'state_code']]
centr_df.columns = ['centr_b', 'centr_c', 'centr_d', 'centr_e', 'centr_f', 'dyear', 'ac_code', 'state_code']
centr_df = centr_df.groupby(['ac_code', 'state_code', 'dyear']).sum().reset_index()
centr_df
# Read All IAY New Construction Data
iay_folder = os.path.join(data_folder, "Financial Progress Reports", "IAY New Construction -done", "output")
iay_files = os.listdir(iay_folder)
iay_files = [os.path.join(iay_folder, x) for x in iay_files if x.split('.')[-1] == 'csv'][:-1]
df_list = []
for file in iay_files:
df = pd.read_csv(file)
df_list.append(df)
print(file)
iay_df = pd.concat(df_list).reset_index()
iay_df['Scheme'] = 'IAY New Construction'
iay_df = iay_df[iay_df.columns[1:-1]]
iay_df['Panchayat Name'] = iay_df['A']
iay_df = iay_df[iay_df.columns[1:]]
iay_df = iay_df[iay_df['Panchayat Name'].notnull()]
iay_df['Panchayat Name'] = iay_df['Panchayat Name'].str.upper()
iay_df['Panchayat Name'] = iay_df['Panchayat Name'].str.replace('PANCHAYAT', '').str.replace('GP', '')
iay_df['District Name'] = iay_df['District Name'].str.upper()
iay_df['State Name'] = iay_df['State Name'].str.upper()
iay_df['dyear'] = iay_df['Financial Year'].str.split('-').str[0].fillna(0).astype(int)
iay_df['district_y'] = iay_df['District Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
iay_df['panchayat_y'] = iay_df['Panchayat Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
iay_df = pd.merge(iay_df, geocoded_df, how='left', on=['panchayat_y', 'district_y'])
iay_df = iay_df[iay_df['ac_code'].notnull()]
iay_df = iay_df[['B', 'C', 'D', 'E', 'F=B+C+D+E', 'dyear', 'ac_code', 'state_code']]
iay_df.columns = ['iay_b', 'iay_c', 'iay_d', 'iay_e', 'iay_f', 'dyear', 'ac_code', 'state_code']
iay_df = iay_df.groupby(['ac_code', 'state_code', 'dyear']).sum().reset_index()
iay_df
# Read All PMAY New Construction Data
pmay_folder = os.path.join(data_folder, "Financial Progress Reports", "Pradhan Mantri Awaas Yojana -done")
pmay_files = os.listdir(pmay_folder)
pmay_files = [os.path.join(pmay_folder, x) for x in pmay_files if x.split('.')[-1] == 'csv'][:-1]
df_list = []
for file in pmay_files:
df = pd.read_csv(file)
df_list.append(df)
print(file)
pmay_df = pd.concat(df_list).reset_index()
pmay_df['Scheme'] = 'Pradhan Mantri Awaas Yojana'
pmay_df = pmay_df[pmay_df.columns[1:-1]]
pmay_df['Panchayat Name'] = pmay_df['A']
pmay_df = pmay_df[pmay_df.columns[1:]]
pmay_df = pmay_df[pmay_df['Panchayat Name'].notnull()]
pmay_df['Panchayat Name'] = pmay_df['Panchayat Name'].str.upper()
pmay_df['Panchayat Name'] = pmay_df['Panchayat Name'].str.replace('PANCHAYAT', '').str.replace('GP', '')
pmay_df['District Name'] = pmay_df['District Name'].str.upper()
pmay_df['State Name'] = pmay_df['State Name'].str.upper()
pmay_df['dyear'] = pmay_df['Financial Year'].str.split('-').str[0].fillna(0).astype(int)
pmay_df['district_y'] = pmay_df['District Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
pmay_df['panchayat_y'] = pmay_df['Panchayat Name'].str.upper().str.replace('([^A-Za-z]+)', '').apply(remove_consecutive_duplicates)
pmay_df = pd.merge(pmay_df, geocoded_df, how='left', on=['panchayat_y', 'district_y'])
pmay_df = pmay_df[pmay_df['ac_code'].notnull()]
pmay_df = pmay_df[['B', 'C', 'D', 'E', 'F=B+C+D+E', 'dyear', 'ac_code', 'state_code']]
pmay_df.columns = ['pmay_b', 'pmay_c', 'pmay_d', 'pmay_e', 'pmay_f', 'dyear', 'ac_code', 'state_code']
pmay_df = pmay_df.groupby(['ac_code', 'state_code', 'dyear']).sum().reset_index()
pmay_df
variables = [
'tot_houses',
'centr_houses',
'state_houses',
'centr_b',
'centr_c',
'centr_d',
'centr_e',
'centr_f',
'iay_b',
'iay_c',
'iay_d',
'iay_e',
'iay_f',
'pmay_b',
'pmay_c',
'pmay_d',
'pmay_e',
'pmay_f'
]
#Get all variables
def get_ac_variables(row):
#print(pd.DataFrame(row).T)
next_elect = acdf[(acdf['state_code'] == row['state_code']) & (acdf['constituency_no'] == row['constituency_no']) & (acdf['datetime'] > row['datetime'])][0:1]
if len(next_elect) < 1:
next_elect = pd.DataFrame(row).T
next_elect['dyear'] = next_elect['dyear'] + 5
next_elect['datetime'] = next_elect['datetime'] + pd.DateOffset(years=5)
#print(next_elect)
try:
df1 = physical_df[(physical_df['state_code'] == row['state_code']) & (physical_df['ac_code'] == row['constituency_no']) & (physical_df['dyear'] >= row['dyear']) & (physical_df['dyear'] < next_elect['dyear'].values[0])]
tot_houses = df1['Total Houses completed'].sum()
centr_houses = df1[df1['Scheme'] == 'ALL CENTRAL SCHEMES']['Total Houses completed'].sum()
state_houses = df1[df1['Scheme'] == 'ALL STATE SCHEMES']['Total Houses completed'].sum()
df2 = centr_df[(centr_df['state_code'] == row['state_code']) & (centr_df['ac_code'] == row['constituency_no']) & (centr_df['dyear'] >= row['dyear']) & (centr_df['dyear'] < next_elect['dyear'].values[0])]
centr_b = df2['centr_b'].sum()
centr_c = df2['centr_c'].sum()
centr_d = df2['centr_d'].sum()
centr_e = df2['centr_e'].sum()
centr_f = df2['centr_f'].sum()
df3 = iay_df[(iay_df['state_code'] == row['state_code']) & (iay_df['ac_code'] == row['constituency_no']) & (iay_df['dyear'] >= row['dyear']) & (iay_df['dyear'] < next_elect['dyear'].values[0])]
iay_b = df3['iay_b'].sum()
iay_c = df3['iay_c'].sum()
iay_d = df3['iay_d'].sum()
iay_e = df3['iay_e'].sum()
iay_f = df3['iay_f'].sum()
df4 = pmay_df[(pmay_df['state_code'] == row['state_code']) & (pmay_df['ac_code'] == row['constituency_no']) & (pmay_df['dyear'] >= row['dyear']) & (pmay_df['dyear'] < next_elect['dyear'].values[0])]
pmay_b = df4['pmay_b'].sum()
pmay_c = df4['pmay_c'].sum()
pmay_d = df4['pmay_d'].sum()
pmay_e = df4['pmay_e'].sum()
pmay_f = df4['pmay_f'].sum()
#Return Now
return tot_houses,centr_houses,state_houses,centr_b,centr_c,centr_d,centr_e,centr_f,iay_b,iay_c,iay_d,iay_e,iay_f,pmay_b,pmay_c,pmay_d,pmay_e,pmay_f
except:
return 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
return 1
#Apply This Function
acdf[variables] = acdf.apply(get_ac_variables, axis=1, result_type='expand')
acdf
# Output to CSV
acdf.to_csv(os.path.join(output_folder, "Assembly_Constituencies_Variables_Categories.csv"), encoding='utf-8', index=False)