# Reset View to India level
def reset_view():
driver.get(url)
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.ID, 'lblChangeFilters'))
)
element.click()
except:
driver.execute_script('toggleGlobalFilters()')
time.sleep(1)
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.ID, 'lblChangeFilters'))
)
element.click()
time.sleep(2)
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.ID, 'ddlGlobalState'))
)
element.find_elements_by_xpath(".//option")[0].click()
# Open Physical Progress Table
def physical_progress():
driver.execute_script('LoadPage("/NationalArea/National/PhysicalProgressWorkLayout")')
def select_state(state_code):
time.sleep(1)
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.ID, 'StateList_PhyProgressWorkDetails'))
)
element.find_elements_by_xpath(".//option[@value='-1']")[0].click()
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, ".//select[@id='DistrictList_PhyProgressWorkDetails']/option[@value='0']"))
)
state_code = str(state_code)
driver.execute_script('document.getElementById("DistrictList_PhyProgressWorkDetails").innerHTML = "";')
time.sleep(1)
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.ID, 'StateList_PhyProgressWorkDetails'))
)
element.find_elements_by_xpath(".//option[@value='" + state_code + "']")[0].click()
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, ".//select[@id='DistrictList_PhyProgressWorkDetails']/option[@value='0']"))
)
def select_year(year):
year = str(year)
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.ID, 'YearList_PhyProgressWorkDetails'))
)
element.find_elements_by_xpath(".//option[@value='" + year + "']")[0].click()
# Hit Table Load
def load_table():
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.ID, 'btnViewPhyProgressWork'))
)
element.click()
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, ".//div[@id='loadReport']/iframe"))
)
# Scrape table upto block level
def scrape_table():
df_list = []
driver.switch_to.default_content()
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, ".//div[@id='loadReport']/iframe"))
)
driver.switch_to_frame(0)
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "VisibleReportContentReportViewer_ctl09"))
)
state_has_data = 0
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, ".//td/div[@title='Please click here for Districtwise Details']/a[@tabindex='1']"))
)
state_has_data = 1
except:
print('-- State has no Data')
state_has_data = 0
# Start Scraping data if state has data
if state_has_data == 1:
element = driver.find_element_by_id('VisibleReportContentReportViewer_ctl09').find_element_by_xpath(".//table[@cols='19']").find_element_by_xpath(".//td/div/a[@tabindex='1']")
State = element.text
print('-- Scraping State: ', State)
element.click()
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, ".//td/div[@title='Please click here for Blockwise Details']/a[@tabindex='1']"))
)
noofdistricts = len(driver.find_element_by_id('VisibleReportContentReportViewer_ctl09').find_element_by_xpath(".//table[@cols='19']").find_elements_by_xpath(".//td/div[@title='Please click here for Blockwise Details']/a"))
print('--# no of districts to scrape data for: ', noofdistricts)
i = 0
while i < noofdistricts:
# Open district detail
element = driver.find_element_by_id('VisibleReportContentReportViewer_ctl09').find_element_by_xpath(".//table[@cols='19']").find_element_by_xpath(".//td/div[@title='Please click here for Blockwise Details']/a[@tabindex='" + str(i+1) + "']")
District = element.text
print('--#-- Scraping District: ', District)
element.click()
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, ".//td/div[@title='Click here to view Road wise Details.']/a[@tabindex='1']"))
)
noofblocks = len(driver.find_element_by_id('VisibleReportContentReportViewer_ctl09').find_element_by_xpath(".//table[@cols='19']").find_elements_by_xpath(".//td/div[@title='Click here to view Road wise Details.']/a"))
print('--#--# no of blocks to scrape data for: ', noofblocks)
#driver.find_elements_by_xpath(".//table[@cols='19']")
j = 0
while j < noofblocks:
#Open Block Details
element = driver.find_element_by_id('VisibleReportContentReportViewer_ctl09').find_element_by_xpath(".//table[@cols='19']").find_element_by_xpath(".//td/div[@title='Click here to view Road wise Details.']/a[@tabindex='" + str(j+1) + "']")
Block = element.text
print('--#--#-- Scraping Block: ', Block)
element.click()
# Scrape Table
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, ".//table[@cols='23']"))
)
html_table = element.get_attribute('outerHTML')
df = pd.read_html(html_table, skiprows=0, header =2)[0]
df = df[df.columns[1:]][:-1]
df['Block'] = Block
df['District'] = District
df['State'] = State
df['index_value'] = df.index
# Flatten table for multirowspan values
def get_missing_data(row):
row['Habitation Name'] = row['Sr.No.']
row['Population'] = row['Packages']
row['SC/ST Population'] = row['Sanctioned Year']
previous_row = df.head(4)[df['Road Name'].notnull()][-1:]
row['Sr.No.'] = previous_row['Sr.No.'].values[0]
row['Packages'] = previous_row['Packages'].values[0]
row['Sanctioned Year'] = previous_row['Sanctioned Year'].values[0]
row['Road Name'] = previous_row['Road Name'].values[0]
row['Upgrade / New'] = previous_row['Upgrade / New'].values[0]
row['Surface Type'] = previous_row['Surface Type'].values[0]
row['Length'] = previous_row['Length'].values[0]
row['Pavement Cost'] = previous_row['Pavement Cost'].values[0]
row['No. of CD Works'] = previous_row['No. of CD Works'].values[0]
row['CD Work Cost'] = previous_row['CD Work Cost'].values[0]
row['LSB Cost'] = previous_row['LSB Cost'].values[0]
row['LSB State Cost'] = previous_row['LSB State Cost'].values[0]
row['Protection Work'] = previous_row['Protection Work'].values[0]
row['Other Works'] = previous_row['Other Works'].values[0]
row['Present Status'] = previous_row['Present Status'].values[0]
row['Completed Length'] = previous_row['Completed Length'].values[0]
row['Expenditure Till Date'] = previous_row['Expenditure Till Date'].values[0]
row['Total Cost'] = previous_row['Total Cost'].values[0]
row['View'] = previous_row['View'].values[0]
return (row)
df[df['Road Name'].isnull()] = df[df['Road Name'].isnull()].apply(get_missing_data, axis=1, result_type='expand')
#Append scraped data to list
df_list.append(df)
# Back to Blocks table
j+=1
go_back()
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, ".//td/div[@title='Click here to view Road wise Details.']/a[@tabindex='1']"))
)
# Back to districts table
i+=1
go_back()
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, ".//td/div[@title='Please click here for Blockwise Details']/a[@tabindex='1']"))
)
driver.switch_to.default_content()
table = pd.concat(df_list)
#print('-- Scrapped ', State, ' it has total records', table.shape[0])
return table
else:
# Reset focus to main page
driver.switch_to.default_content()
return None
# Send table up one level
def go_back():
driver.find_element_by_id('ReportViewer_ctl05_ctl01_ctl00_ctl00_ctl00').click()