#Import Libraries
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
import json
import requests
import math
import os
import time
import datetime
# Import folders
data_folder = os.path.abspath('data')
# Set Output Folder
output_folder = os.path.abspath("output")
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# Read Constituency Data
ac_gdf = None
ac_filepath = os.path.join(data_folder, "AC", "India_AC.shp")
ac_gdf = gpd.read_file(ac_filepath)
ac_gdf
# Read Constituency Data
ac_file = os.path.join(data_folder, "General_Later_Ashoka_alldata.csv")
acdf = pd.read_csv(ac_file)
acdf['state_name'] = acdf['state_name'].str.replace("_", " ")
#acdf = acdf[['state_name', 'constituency_no', 'constituency_name', 'year', 'month']]
acdf = acdf.drop_duplicates(subset=None, keep="first", inplace=False).reset_index(drop=True)
acdf.loc[acdf['newstate_code'] == 36, 'newstate_code'] = 28 #Telangana Fix
acdf = acdf[['state_name', 'state_code', 'constituency_no', 'year', 'month']]
acdf['day'] = 1
acdf['dyear'] = 0
acdf.loc[(acdf['month'] > 9) | (acdf['year'] == 2008), 'dyear'] = 1
acdf['dyear'] = acdf['year'] + acdf['dyear']
acdf['datetime'] = pd.to_datetime(acdf[['year', 'month', 'day']])
acdf = acdf.drop_duplicates().reset_index(drop=True)
acdf = pd.merge(acdf, ac_gdf, how='inner', left_on=['state_code', 'constituency_no'], right_on = ['ST_CODE','AC_NO'])#[['ST_CODE', 'ST_NAME','DT_CODE', 'DIST_NAME', 'AC_NO', 'AC_NAME', 'PC_NO', 'PC_NAME']]
acdf = acdf[acdf.columns[:-5]]
acdf
# Read geocoded Scrapped Data
geocoded_folder = os.path.join(data_folder, "Scrapped", "geocoded")
geocoded_files = os.listdir(geocoded_folder)
geocoded_files = [os.path.join(geocoded_folder, x) for x in geocoded_files if x.split('.')[-1] == 'csv'][4:]
df_list = []
for file in geocoded_files:
df = pd.read_csv(file)
statuscol = 'Status as on 1/4/' + file.split('-')[-2].split('_')[-1]
print(statuscol)
df = df[['Financial Year', 'Habit Category', statuscol, 'Tot Pop', 'ac code', 'state code']]
df.columns = ['Financial Year', 'Habit Category', 'Status', 'Tot Pop', 'ac code', 'state code']
#df = df[['Financial Year', 'Habit Category', 'Tot Pop', 'ac code', 'state code']]
df_list.append(df)
# Merge All Data
water_df = pd.concat(df_list).reset_index(drop=True)
#water_df = water_df[water_df['Status'].isin(['FC', 'PC', 'QA'])]
water_df['dyear'] = water_df['Financial Year'].str.split('-').str[0].fillna(0).astype(int)
water_df = water_df[water_df.columns[1:]]
water_df['Tot Pop'] = water_df['Tot Pop'].apply(pd.to_numeric, errors='ceorce').fillna(0).astype(int)
#water_df = water_df.groupby(['Habit Category', 'Status', 'ac code', 'state code', 'dyear']).sum().reset_index()
#water_df['Status'] = 'Partially Covered'
#water_df[water_df['Habit Category'] == 'No. Of Habitation With 100% Population Coverage']['Status'] = 'Fully Covered'
water_df.loc[water_df['Status'] == 'FC', 'Status'] = 'Fully Covered'
water_df.loc[water_df['Status'] != 'Fully Covered', 'Status'] = 'Partially Covered'
water_df = water_df[['Status', 'ac code', 'state code', 'dyear', 'Tot Pop']]
water_df = water_df.groupby(['Status', 'ac code', 'state code', 'dyear']).sum().reset_index()
water_df
# Read geocoded downloaded Data
downloaded_folder = os.path.join(data_folder, "downloaded", "geocoded")
downloaded_files = os.listdir(downloaded_folder)
downloaded_files = [os.path.join(downloaded_folder, x) for x in downloaded_files if x.split('.')[-1] == 'csv']
df_list = []
for file in downloaded_files:
print(file)
df = pd.read_csv(file)
df = df[['Year', 'Status', 'SC Current Population', 'ST Current Population', 'GENERAL Current Population', 'ac code', 'state code']]
# df.columns = ['Financial Year', 'Habit Category', 'Status', 'Tot Pop', 'ac code', 'state code']
df_list.append(df)
# Merge All Data
downloaded_df = pd.concat(df_list).reset_index(drop=True)
downloaded_df = downloaded_df[downloaded_df['Status'].isin(['Partially Covered', 'Fully Covered'])]
downloaded_df['dyear'] = downloaded_df['Year'].str.split('_').str[-1].fillna(0).astype(int)
downloaded_df['Tot Pop'] = downloaded_df['SC Current Population'].apply(pd.to_numeric, errors='ceorce').fillna(0).astype(int) + downloaded_df['ST Current Population'].apply(pd.to_numeric, errors='ceorce').fillna(0).astype(int) + downloaded_df['GENERAL Current Population'].apply(pd.to_numeric, errors='ceorce').fillna(0).astype(int)
downloaded_df = downloaded_df[['Status', 'ac code', 'state code', 'dyear', 'Tot Pop']]
downloaded_df = downloaded_df.groupby(['Status', 'ac code', 'state code', 'dyear']).sum().reset_index()
downloaded_df
water_df = pd.concat([water_df, downloaded_df])
water_df
variables = [
'tot_pop_start',
'tot_pop1',
'tot_pop2',
'tot_pop3',
'tot_pop4',
'tot_pop5',
'tot_pop_end',
'full_cvg_start',
'full_cvg1',
'full_cvg2',
'full_cvg3',
'full_cvg4',
'full_cvg5',
'full_cvg_end',
'partial_cvg_start',
'partial_cvg1',
'partial_cvg2',
'partial_cvg3',
'partial_cvg4',
'partial_cvg5',
'partial_cvg_end',
]
#Get all variables
def get_ac_variables(row):
#print(pd.DataFrame(row).T)
next_elect = acdf[(acdf['state_code'] == row['state_code']) & (acdf['constituency_no'] == row['constituency_no']) & (acdf['datetime'] > row['datetime'])][0:1]
if len(next_elect) < 1:
next_elect = pd.DataFrame(row).T
next_elect['dyear'] = next_elect['dyear'] + 5
next_elect['datetime'] = next_elect['datetime'] + pd.DateOffset(years=5)
#print(next_elect)
delta_year = math.floor((next_elect['datetime'].dt.year - row['datetime'].year)/5)
if not delta_year:
delta_year = 1
#print(delta_year)
#Starting of Election Term
try:
df1 = water_df[(water_df['state code'] == row['state_code']) & (water_df['ac code'] == row['constituency_no']) & (water_df['dyear'] >= row['dyear']) & (water_df['dyear'] < next_elect['dyear'].values[0])]
tot_pop_start = df1[df1['dyear'] == row['dyear']]['Tot Pop'].sum()
tot_pop1 = df1[df1['dyear'] == (row['dyear'] + delta_year-1)]['Tot Pop'].sum()
tot_pop2 = df1[df1['dyear'] == (row['dyear'] + delta_year+0)]['Tot Pop'].sum()
tot_pop3 = df1[df1['dyear'] == (row['dyear'] + delta_year+1)]['Tot Pop'].sum()
tot_pop4 = df1[df1['dyear'] == (row['dyear'] + delta_year+2)]['Tot Pop'].sum()
tot_pop_end = tot_pop5 = df1[df1['dyear'] > (row['dyear'] + delta_year+3)]['Tot Pop'].sum()
df2 = df1[df1['Status'] == 'Fully Covered']
full_cvg_start = df2[df2['dyear'] == row['dyear']]['Tot Pop'].sum()
full_cvg1 = df2[df2['dyear'] == (row['dyear'] + delta_year-1)]['Tot Pop'].sum()
full_cvg2 = df2[df2['dyear'] == (row['dyear'] + delta_year+0)]['Tot Pop'].sum()
full_cvg3 = df2[df2['dyear'] == (row['dyear'] + delta_year+1)]['Tot Pop'].sum()
full_cvg4 = df2[df2['dyear'] == (row['dyear'] + delta_year+2)]['Tot Pop'].sum()
full_cvg_end = full_cvg5 = df2[df2['dyear'] > (row['dyear'] + delta_year+3)]['Tot Pop'].sum()
df3 = df1[df1['Status'] == 'Partially Covered']
partial_cvg_start = df3[df3['dyear'] == row['dyear']]['Tot Pop'].sum()
partial_cvg1 = df3[df3['dyear'] == (row['dyear'] + delta_year-1)]['Tot Pop'].sum()
partial_cvg2 = df3[df3['dyear'] == (row['dyear'] + delta_year+0)]['Tot Pop'].sum()
partial_cvg3 = df3[df3['dyear'] == (row['dyear'] + delta_year+1)]['Tot Pop'].sum()
partial_cvg4 = df3[df3['dyear'] == (row['dyear'] + delta_year+2)]['Tot Pop'].sum()
partial_cvg_end = partial_cvg5 = df3[df3['dyear'] > (row['dyear'] + delta_year+3)]['Tot Pop'].sum()
'''
df1 = df1[df1['dyear'] == np.sort(df1['dyear'].unique())[0]]
var1 = df1[df1['Status'] == 'Fully Covered']['Tot Pop'].sum()
var2 = df1[df1['Status'] == 'Partially Covered']['Tot Pop'].sum()
var3 = df1['Tot Pop'].sum()
#End of Election Term
df2 = water_df[(water_df['state code'] == row['state_code']) & (water_df['ac code'] == row['constituency_no']) & (water_df['dyear'] > row['dyear']) & (water_df['dyear'] <= next_elect['dyear'].values[0])]
df2 = df2[df2['dyear'] == np.sort(df2['dyear'].unique())[-1]]
#print(df2)
var4 = df2[df2['Status'] == 'Fully Covered']['Tot Pop'].sum()
var5 = df2[df2['Status'] == 'Partially Covered']['Tot Pop'].sum()
var6 = df2['Tot Pop'].sum()
'''
#Return Now
return tot_pop_start, tot_pop1, tot_pop2, tot_pop3, tot_pop4, tot_pop5, tot_pop_end, full_cvg_start, full_cvg1, full_cvg2, full_cvg3, full_cvg4, full_cvg5, full_cvg_end, partial_cvg_start, partial_cvg1, partial_cvg2, partial_cvg3, partial_cvg4, partial_cvg5, partial_cvg_end
except:
return 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
#Apply This Function
acdf[variables] = acdf.apply(get_ac_variables, axis=1, result_type='expand')
acdf
# Output to CSV
acdf.to_csv(os.path.join(output_folder, "Assembly_Constituencies_Variables_Categories.csv"), encoding='utf-8', index=False)