%matplotlib inline
#Import Modules
import geopandas as gpd
import pandas as pd
pd.options.display.float_format = '{:.2f}'.format
import numpy as np
import matplotlib.pyplot as plt
import math
import calendar
from datetime import datetime, timedelta
#Rain fall
df = pd.read_csv('Data/Jaipur_Rainfall_Data.csv')
#get Hour columns for use in analysis
hour_cols = list(pd.to_numeric(df.columns, errors = 'coerce').dropna().astype(int))
hour_cols = list(map(str, hour_cols))
#convert month string to month number
map_month = dict((v.upper(),k) for k,v in enumerate(calendar.month_abbr)) #mapping for months
df['MONTH'] = df['MONTH'].map(map_month)
#Convert Year Month Date to DATETIME
df['DATETIME'] = pd.to_datetime((df.YEAR*10000+df.MONTH*100+df.DAY).apply(str),format='%Y%m%d')
#convert hour column to unpivot table
df = pd.melt(df, id_vars=['DATETIME'], value_vars=hour_cols)
#Add Hour to time
df['DATETIME'] = df['DATETIME'] + pd.TimedeltaIndex((df.variable.astype(float) - 1), 'H')
df = df.drop(columns=['variable'])
df.columns = ['DATETIME', 'RAINFALL']
df['RAINFALL'] = pd.to_numeric(df['RAINFALL'], errors='coerce')
#Recover recoverable data by month mean
df = df.fillna(df.groupby(df['DATETIME'].dt.month).transform('mean'))
#Remove Bad Missing data year
#df = df[df['DATETIME'].dt.year == 2011].reset_index(drop=True) #2011 has missing data for monsoon season
df.index = df['DATETIME']
#driver
driver="FileGDB"
#Declare Data Locations
GIS_Data = "../GIS/GIS Data/"
Main_GDB = GIS_Data + "Thesis-GIS-Data.gdb"
Watershed_GDB = GIS_Data + "Watershed.gdb"
Scratch_GDB = GIS_Data + "Scratch.gdb"
#Extra
Landuse_Landcover_shp = GIS_Data + "Exported/Landuse_Landcover.shp"
#Read Layers
Lulc_df = gpd.read_file(Main_GDB, layer='Landuse_Landcover_Join_Soil_Geom_Zone')
#Soil_df = gpd.read_file(Main_GDB, layer='Soil_50K')
#Geom_df = gpd.read_file(Main_GDB, layer='Geomorphology_50K')
#Stream_Lines_w_Order = gpd.read_file(Watershed_GDB, layer='Stream_Lines_w_Order')
#reference for Soil
'''
Type of Soil in Jaipur
#Soil_df['FAMILY_TEX'].unique()
array(['Loamy skeletal', 'Fine loamy', 'Coarse loamy', 'Fine',
'Sandy skeletal'], dtype=object)
so given below classes 1 to 4 top to bottom high infiltration
class 1 : 'Sandy skeletal'
class 2 : 'Coarse loamy','Loamy skeletal'
class 3 : 'Fine loamy'
class 4 : 'Fine'
'''
soil_map = { 'Sandy skeletal':1, 'Loamy skeletal':2, 'Coarse loamy':2, 'Fine loamy':3, 'Fine':4 }
#map the Soil Data to number
Lulc_df['soil_map'] = Lulc_df['FAMILY_TEX'].map(soil_map)
#test data
#Lulc_df[~Lulc_df['Level_1'].isin(['Agriculture', 'Forest', 'Industry', 'Open Space', 'Waterbodies', 'Wetland', 'Miscellaneous', 'Waste Land'])].sort_values(by=['Level_1', 'Level_2']).groupby(['Level_1', 'Level_2']).mean()
#Function to Determine Curve Number
def Curve_Number(row):
Curve_Number = 80
if ( row['Level_1'] == 'Agriculture' ):
if ( row['Level_2'] in ['Crop Land', 'Horticulture', 'Plant Nursery'] ):
if row['soil_map'] == 1: Curve_Number = 65
elif row['soil_map'] == 2: Curve_Number = 75
elif row['soil_map'] == 3: Curve_Number = 80
elif row['soil_map'] == 4: Curve_Number = 85
elif ( row['Level_2'] in ['Orchard', 'Plantation'] ):
if row['soil_map'] == 1: Curve_Number = 45
elif row['soil_map'] == 2: Curve_Number = 55
elif row['soil_map'] == 3: Curve_Number = 65
elif row['soil_map'] == 4: Curve_Number = 75
else :
if row['soil_map'] == 1: Curve_Number = 76
elif row['soil_map'] == 2: Curve_Number = 85
elif row['soil_map'] == 3: Curve_Number = 90
elif row['soil_map'] == 4: Curve_Number = 93
elif ( row['Level_1'] == 'Built-up' ):
if ( row['Level_2'] in ['Vacant Land', 'Recreational', 'Slum'] ):
if row['soil_map'] == 1: Curve_Number = 49
elif row['soil_map'] == 2: Curve_Number = 69
elif row['soil_map'] == 3: Curve_Number = 79
elif row['soil_map'] == 4: Curve_Number = 84
elif ( row['Level_2'] in ['Transportation Node', 'Utilities', 'Transport'] ):
if row['soil_map'] == 1: Curve_Number = 92
elif row['soil_map'] == 2: Curve_Number = 94
elif row['soil_map'] == 3: Curve_Number = 96
elif row['soil_map'] == 4: Curve_Number = 98
else :
if row['soil_map'] == 1: Curve_Number = 72
elif row['soil_map'] == 2: Curve_Number = 79
elif row['soil_map'] == 3: Curve_Number = 84
elif row['soil_map'] == 4: Curve_Number = 88
elif ( row['Level_1'] == 'Forest' ):
if ( row['Level_2'] in ['Dense Forest'] ):
if row['soil_map'] == 1: Curve_Number = 36
elif row['soil_map'] == 2: Curve_Number = 60
elif row['soil_map'] == 3: Curve_Number = 73
elif row['soil_map'] == 4: Curve_Number = 79
else :
if row['soil_map'] == 1: Curve_Number = 43
elif row['soil_map'] == 2: Curve_Number = 65
elif row['soil_map'] == 3: Curve_Number = 72
elif row['soil_map'] == 4: Curve_Number = 82
elif ( row['Level_1'] == 'Industry' ):
if row['soil_map'] == 1: Curve_Number = 81
elif row['soil_map'] == 2: Curve_Number = 88
elif row['soil_map'] == 3: Curve_Number = 91
elif row['soil_map'] == 4: Curve_Number = 93
elif ( row['Level_1'] == 'Open Space' ):
if ( row['Level_2'] in ['Grazing Land'] ):
if row['soil_map'] == 1: Curve_Number = 49
elif row['soil_map'] == 2: Curve_Number = 69
elif row['soil_map'] == 3: Curve_Number = 79
elif row['soil_map'] == 4: Curve_Number = 84
elif ( row['Level_2'] in ['Tree Clad / Tree Cover'] ):
if row['soil_map'] == 1: Curve_Number = 30
elif row['soil_map'] == 2: Curve_Number = 58
elif row['soil_map'] == 3: Curve_Number = 71
elif row['soil_map'] == 4: Curve_Number = 78
else :
if row['soil_map'] == 1: Curve_Number = 43
elif row['soil_map'] == 2: Curve_Number = 65
elif row['soil_map'] == 3: Curve_Number = 72
elif row['soil_map'] == 4: Curve_Number = 82
elif ( row['Level_1'] == 'Waste Land' ):
if ( row['Level_2'] in ['Barren'] ):
if row['soil_map'] == 1: Curve_Number = 43
elif row['soil_map'] == 2: Curve_Number = 65
elif row['soil_map'] == 3: Curve_Number = 72
elif row['soil_map'] == 4: Curve_Number = 82
elif ( row['Level_2'] in ['Scrub Land'] ):
if row['soil_map'] == 1: Curve_Number = 49
elif row['soil_map'] == 2: Curve_Number = 69
elif row['soil_map'] == 3: Curve_Number = 79
elif row['soil_map'] == 4: Curve_Number = 84
elif ( row['Level_2'] in ['Rocky Area / Mountain'] ):
if row['soil_map'] == 1: Curve_Number = 75
elif row['soil_map'] == 2: Curve_Number = 82
elif row['soil_map'] == 3: Curve_Number = 86
elif row['soil_map'] == 4: Curve_Number = 88
else :
if row['soil_map'] == 1: Curve_Number = 43
elif row['soil_map'] == 2: Curve_Number = 65
elif row['soil_map'] == 3: Curve_Number = 72
elif row['soil_map'] == 4: Curve_Number = 82
elif ( row['Level_1'] in ['Waterbodies', 'Wetland'] ):
Curve_Number = 98
else :
if row['soil_map'] == 1: Curve_Number = 65
elif row['soil_map'] == 2: Curve_Number = 75
elif row['soil_map'] == 3: Curve_Number = 82
elif row['soil_map'] == 4: Curve_Number = 86
return Curve_Number
#Apply Curve Map
#Lulc_df = Lulc_df.head(1) #for testing
Lulc_df['Curve_Number'] = Lulc_df.apply(lambda row: Curve_Number(row), axis=1)
Lulc_df['PMR'] = (1000 / Lulc_df['Curve_Number']) - 10 #Potential Maximum Retention
#LULC Ready Now
#Lulc_df.to_file("export/Lulc_df.shp")
''' #Old Basic Script
def Runoff_Liters(CN, SA, P):
#CN = Curve Number # Curve Number
#SA = Shape Area # Area
#P = RAINFALL # Precipitation
PMR = (1000 / CN) - 10 #Potential Maximum Retention
PE = math.pow((P - (.2*PMR)), 2) / (P + (.8*PMR)) #Precipitation Excess
return PE*SA # Milimeter * Meter = Litres
def Get_Runoff_Liters_old(RAINFALL):
if (RAINFALL > 0):
return Lulc_df.apply(lambda row: Runoff_Liters(row['Curve_Number'], row['Shape_Area'], RAINFALL), axis=1).sum()
else :
return 0
df['Runoff_Liters'] = df['RAINFALL'].apply(lambda x: Get_Runoff_Liters_old(x))
'''
def Get_Runoff_Liters(P): # Fast Vectorised
#P = RAINFALL # Precipitation
if (P > 0):
#print(P)
Lulc_df['PE'] = (P - (.2*Lulc_df['PMR'])) #Precipitation Excess
Lulc_df.loc[ Lulc_df['PE'] < 0 , 'PE' ] = 0
Lulc_df[ 'PE' ] = (Lulc_df['PE'] ** 2) / (P + (.8*Lulc_df['PMR']))
Lulc_df[ 'RL' ] = Lulc_df['PE']*Lulc_df['Shape_Area'] #Runoff Litres
#return Lulc_df[ 'RL' ].sum()
return Lulc_df.groupby(['Zone_ID'])['RL'].sum()
#return pd.DataFrame(Lulc_df.groupby(['Zone_ID'])['RL'].sum()).T
else :
return 0
#Get List of Zones
Zones = np.sort(Lulc_df['Zone_ID'].unique(), axis=None)
Zones = ['RL_Zone_' + str(s) for s in Zones]
for Zone in Zones:
df[Zone] = 0
#Call Function to load data for each zone RL
R_df = df[ df['RAINFALL'] > 0 ]
df.loc[ df['RAINFALL'] > 0, Zones ] = R_df['RAINFALL'].apply(Get_Runoff_Liters).values
#df['RUNOFF_LITRES'] = pd.DataFrame(df[Zones].sum(axis=1))
df.to_csv('Runoff_Analyzed.csv', encoding='utf-8', index=True)
df.resample('Y').sum().mean().reset_index().to_csv('Runoff_ZOnes_Analyzed.csv', encoding='utf-8', index=True)
df[df['RAINFALL'] > 0].sum()
df1 = df.resample('d').sum().groupby([df['DATETIME'].dt.dayofyear]).mean()
df1[df1['RUNOFF_LITRES'] > 0]
df.resample('M').sum().plot(kind='line'), legend=False) #overall Runoff Liters
df.groupby([df['DATETIME'].dt.day, df['DATETIME'].dt.year]).sum().groupby('DATETIME').mean().plot(kind='line')#, legend=False)
df[(df['DATETIME'].dt.year == 2011) & (df['DATETIME'].dt.month == 8)][ df['RAINFALL'] > 0 ].resample('d').sum()
df[df['RAINFALL'] > 0]
#Forward to Demand Supply
#Take backup of original dataframe
checkpt1_df = df.copy()
#Read Data for moving forward
#Use backup when needed
df = checkpt1_df.copy()
#Convert data to daily values
df = df.resample('d').sum().groupby([df['DATETIME'].dt.dayofyear]).mean()
#Read Demand Data for all zones
demand_csv = 'Zones_of_Analysis_from_Excel.csv'
demand_df = pd.read_csv(demand_csv)
zones = list(demand_df['Zone'].values)
#Constants for Calculating supply demand
evap = .3 #30% evaporation loss in urban environement.
trns = .25 #20% Transmition loss
potw = 65/135
nonpotw = 35/135 #35 lpcd per capita
bulkw = 35/135 #35 lpcd per capita
sec_wtr = 68 #Secondary treatment cost in Lakhs per MLD
ter_wtr = 40 #Tertiary treatment cost in Lakhs per MLD
dam_wtr = 40 #Dam water cost till CWR in Lakhs per MLD
#Calculate Water demand supply and availability in all zones
for zone in zones:
#Prepare Variables
df1 = demand_df[ demand_df['Zone'] == zone ]
WD = df1['Water_Demand_liters'].values[0]
RA = df1['RESIDENTIAL_Area'].values[0]
CA = df1['COMMERCIAL_Area'].values[0]
IA = df1['INDUSTRY_Area'].values[0]
TA = RA+CA+IA
#Calculate Water Demand Supply Values
##Demands
df[zone+"_Water_Demand"] = WD
df[zone+"_Pot_Water_Demand"] = WD * potw
df[zone+"_Nonpot_Water_Demand"] = WD * nonpotw
df[zone+"_Bulk_Water_Demand"] = WD * bulkw
##Potentials
df[zone+"_Excess_Runoff"] = df["RL_"+zone]
df[zone+"_Potential_Treated_Runoff"] = df[zone+"_Excess_Runoff"] * ( 1 - evap )
df[zone+"_Potential_Treated_WW"] = WD * ( 1 - ( trns * ( (RA*1+CA*.75+IA*.5) / TA ) ) )
##Potable Tertiary Treatment
df[zone+"_Treated_Runoff_Demand"] = df[ [ zone+"_Pot_Water_Demand", zone+"_Potential_Treated_Runoff" ] ].min(axis=1)
df[zone+"_Dam_Water_Demand_Potable"] = df[zone+"_Pot_Water_Demand"] - df[zone+"_Treated_Runoff_Demand"]
##Non Potable Secondary Treatment
df[zone+"_Potential_Raw_Water"] = df[zone+"_Potential_Treated_Runoff"] - df[zone+"_Treated_Runoff_Demand"] + df[zone+"_Potential_Treated_WW"]
df[zone+"_Raw_Water_Demand"] = df[zone+"_Nonpot_Water_Demand"] + df[zone+"_Bulk_Water_Demand"]
df[zone+"_Dam_Water_Demand_Nonpot"] = df[zone+"_Raw_Water_Demand"] - df[zone+"_Potential_Raw_Water"]
df.loc[df[zone+"_Dam_Water_Demand_Nonpot"] < 0 , [zone+"_Dam_Water_Demand_Nonpot"]] = 0
#df[zone+"_Raw_Water_Demand"] = df[ [ zone+"_Raw_Water_Demand", zone+"_Potential_Raw_Water" ] ].min(axis=1)
df[zone+"_Excess_Raw_Water"] = df[zone+"_Potential_Raw_Water"] - df[zone+"_Raw_Water_Demand"]
##Water Savings from Dam
df[zone+"_Savings_Dam_Water"] = df[zone+"_Water_Demand"] - df[zone+"_Dam_Water_Demand_Potable"] - df[zone+"_Dam_Water_Demand_Nonpot"]
#Calculate Cost Comparision Values
##Original
df[zone+"_Original_Water_Treatment_Cost"] = ( df[zone+"_Water_Demand"] / 1000000 ) * dam_wtr
df[zone+"_Original_WW_Treatment_Cost"] = ( df[zone+"_Potential_Treated_WW"] / 1000000 ) * sec_wtr
df[zone+"_Original_Total_Treatment_Cost"] = df[zone+"_Original_Water_Treatment_Cost"] + df[zone+"_Original_WW_Treatment_Cost"]
##Managed
df[zone+"_Managed_Water_Treatment_Cost"] = ( df[zone+"_Treated_Runoff_Demand"] / 1000000 * ter_wtr ) + ( df[zone+"_Dam_Water_Demand_Potable"] / 1000000 * dam_wtr )
df[zone+"_Managed_WW_Treatment_Cost"] = ( df[zone+"_Potential_Treated_WW"] / 1000000 ) * sec_wtr
df[zone+"_Managed_Total_Treatment_Cost"] = df[zone+"_Managed_Water_Treatment_Cost"] + df[zone+"_Managed_WW_Treatment_Cost"]
##Savings in terms of Cost
df[zone+"_Savings_Total_Treatment_Cost"] = df[zone+"_Original_Total_Treatment_Cost"] - df[zone+"_Managed_Total_Treatment_Cost"]
#print(zone)
df
df.to_csv('Water_Management_Analysis.csv', encoding='utf-8', index=True)