import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import calendar
from datetime import datetime, timedelta
df = pd.read_csv(r'Data/Jaipur_Rainfall_Data.csv')
#get Hour columns for use in analysis
hour_cols = list(pd.to_numeric(df.columns, errors = 'coerce').dropna().astype(int))
hour_cols = list(map(str, hour_cols))
#convert month string to month number
map_month = dict((v.upper(),k) for k,v in enumerate(calendar.month_abbr)) #mapping for months
df['MONTH'] = df['MONTH'].map(map_month)
#Convert Year Month Date to DATETIME
df['DATETIME'] = pd.to_datetime((df.YEAR*10000+df.MONTH*100+df.DAY).apply(str),format='%Y%m%d')
#convert hour column to unpivot table
df = pd.melt(df, id_vars=['DATETIME'], value_vars=hour_cols)
#Add Hour to time
df['DATETIME'] = df['DATETIME'] + pd.TimedeltaIndex((df.variable.astype(float) - 1), 'H')
df = df.drop(columns=['variable'])
df.columns = ['DATETIME', 'RAINFALL']
df['RAINFALL'] = pd.to_numeric(df['RAINFALL'], errors='coerce')
#Recover recoverable data by month mean
df = df.fillna(df.groupby(df['DATETIME'].dt.month).transform('mean'))
#Remove Bad Missing data year
#df = df[df['DATETIME'].dt.year == 2011].reset_index(drop=True) #2011 has missing data for monsoon season
df.index = df['DATETIME']
df.resample('h').sum().plot(kind='line') #Hourly
df.resample('d').sum().plot(kind='line') #Daily
df.resample('w').sum().plot(kind='line') #Weekly
df.resample('M').sum().plot(kind='line') #Monthly
df.resample('Y').sum().plot(kind='line') #Yearly
df.resample('D').sum().groupby(df['DATETIME'].dt.weekday_name).mean() #Monthly
df.resample('w').sum().groupby([df['DATETIME'].dt.week, df['DATETIME'].dt.month.apply(lambda x: calendar.month_abbr[x])]).mean() #Weekly
df.resample('M').sum().groupby(df['DATETIME'].dt.month.apply(lambda x: calendar.month_abbr[x])).mean() #Monthly
df.groupby(df['DATETIME'].dt.year).sum() #yearly
df.resample('Y').sum().mean() #overall Mean Rainfall