import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import calendar
from datetime import datetime, timedelta

df = pd.read_csv(r'Data/Jaipur_Rainfall_Data.csv')

#get Hour columns for use in analysis
hour_cols = list(pd.to_numeric(df.columns, errors = 'coerce').dropna().astype(int))
hour_cols = list(map(str, hour_cols))

#convert month string to month number
map_month = dict((v.upper(),k) for k,v in enumerate(calendar.month_abbr)) #mapping for months
df['MONTH'] = df['MONTH'].map(map_month)

#Convert Year Month Date to DATETIME
df['DATETIME'] = pd.to_datetime((df.YEAR*10000+df.MONTH*100+df.DAY).apply(str),format='%Y%m%d')

#convert hour column to unpivot table
df = pd.melt(df, id_vars=['DATETIME'], value_vars=hour_cols)

#Add Hour to time
df['DATETIME'] =  df['DATETIME'] + pd.TimedeltaIndex((df.variable.astype(float) - 1), 'H')
df = df.drop(columns=['variable'])
df.columns = ['DATETIME', 'RAINFALL']
df['RAINFALL'] =  pd.to_numeric(df['RAINFALL'], errors='coerce')

#Recover recoverable data by month mean
df = df.fillna(df.groupby(df['DATETIME'].dt.month).transform('mean'))

#Remove Bad Missing data year
#df = df[df['DATETIME'].dt.year == 2011].reset_index(drop=True) #2011 has missing data for monsoon season
df.index = df['DATETIME']

df.resample('h').sum().plot(kind='line') #Hourly

<matplotlib.axes._subplots.AxesSubplot at 0x286be4ae780>

df.resample('d').sum().plot(kind='line') #Daily

<matplotlib.axes._subplots.AxesSubplot at 0x286bffa60f0>

df.resample('w').sum().plot(kind='line') #Weekly

<matplotlib.axes._subplots.AxesSubplot at 0x286c0088550>

df.resample('M').sum().plot(kind='line') #Monthly

<matplotlib.axes._subplots.AxesSubplot at 0x286c005ee10>

df.resample('Y').sum().plot(kind='line') #Yearly

<matplotlib.axes._subplots.AxesSubplot at 0x286c00da160>

df.resample('D').sum().groupby(df['DATETIME'].dt.weekday_name).mean() #Monthly

df.resample('w').sum().groupby([df['DATETIME'].dt.week, df['DATETIME'].dt.month.apply(lambda x: calendar.month_abbr[x])]).mean() #Weekly

df.resample('M').sum().groupby(df['DATETIME'].dt.month.apply(lambda x: calendar.month_abbr[x])).mean() #Monthly

df.groupby(df['DATETIME'].dt.year).sum() #yearly

df.resample('Y').sum().mean() #overall Mean Rainfall

RAINFALL    528.761192
dtype: float64

	RAINFALL
DATETIME
Friday	1.280612
Monday	1.434431
Saturday	0.928189
Sunday	1.629153
Thursday	1.388936
Tuesday	1.588853
Wednesday	1.882058

		RAINFALL
DATETIME	DATETIME
1.0	Jan	0.000000
2.0	Jan	0.500000
3.0	Jan	0.028571
4.0	Jan	0.000000
5.0	Feb	0.728571
6.0	Feb	3.142857
7.0	Feb	3.671429
8.0	Feb	0.528571
9.0	Mar	1.378187
10.0	Mar	1.563748
11.0	Mar	2.178034
12.0	Mar	0.649462
13.0	Apr	0.230573
13.0	Mar	0.000000
14.0	Apr	1.802778
15.0	Apr	2.759921
16.0	Apr	0.902778
17.0	Apr	0.363889
17.0	May	1.150000
18.0	May	1.428571
19.0	May	3.157143
20.0	May	4.571429
21.0	May	3.257143
22.0	Jun	7.443204
22.0	May	14.600000
23.0	Jun	2.618516
24.0	Jun	4.557143
25.0	Jun	31.642857
26.0	Jul	13.759710
26.0	Jun	30.500000
...	...	...
29.0	Jul	21.869119
30.0	Aug	48.300000
30.0	Jul	27.026809
31.0	Aug	32.975576
32.0	Aug	36.761290
33.0	Aug	51.089862
34.0	Aug	67.218433
35.0	Aug	27.200000
35.0	Sep	40.095657
36.0	Sep	27.741261
37.0	Sep	18.205746
38.0	Sep	18.248603
39.0	Oct	4.081006
39.0	Sep	0.000000
40.0	Oct	0.071429
41.0	Oct	0.971429
42.0	Oct	2.014286
43.0	Oct	0.042857
44.0	Nov	0.000000
45.0	Nov	0.528571
46.0	Nov	3.614286
47.0	Nov	7.671429
48.0	Dec	0.000000
48.0	Nov	0.000000
49.0	Dec	0.071429
50.0	Dec	0.000000
51.0	Dec	0.000000
52.0	Dec	0.000000
52.0	Jan	0.166667
53.0	Jan	2.000000

	RAINFALL
DATETIME
Apr	6.083333
Aug	215.800000
Dec	0.071429
Feb	8.700000
Jan	1.528571
Jul	127.812361
Jun	60.888350
Mar	4.633333
May	14.871429
Nov	11.814286
Oct	3.100000
Sep	73.458101

	RAINFALL
DATETIME
2006	295.258093
2007	510.748603
2008	470.051189
2009	376.500000
2010	776.100000
2011	555.070462
2012	717.600000