In [9]:
#Import Libraries
import numpy as np
import pandas as pd
import geopandas as gpd
import json
import requests
import math
import os
import time


In [10]:
# Import Folders
data_folder = os.path.abspath("data")
output_folder = os.path.abspath("output")
#check if outfolder exists if not create it
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

In [11]:
#Declarations
req_frmt = 'pjson'
geo_frmt = 'geojson'
crs = {'init': 'epsg:4326'}

#Playing with headers
my_referer = 'http://webgis.nic.in/'
UserAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'

#layers
Layers = [
    'https://webgis.nic.in/publishing/rest/services/bharatmaps/admin17new/MapServer/4',
    'https://webgis.nic.in/publishing/rest/services/bharatmaps/admin17new/MapServer/9',
]

#Skip layers by this
#Layers = Layers[0:]

In [12]:
def getmetadata(_url):
    url = _url + '?f=' + req_frmt    
    print("--Getting Metadata for layer with url", url)
    response = requests.get(url, headers={'referer': my_referer, 'User-Agent':UserAgent}, verify=False, timeout=10)
    if response.status_code == 200:
        metadata = response.json()
        return metadata
    else:
        print("--Error in getting Metadata for layer")
        return 0
    
def getfeatcount(_url):
    query = 'where=1%3D1&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&returnIdsOnly=false&returnCountOnly=true&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&resultOffset=&resultRecordCount='
    url = _url + '/query?' + query + '&f=' + req_frmt    
    print("--Getting feature count for layer with url", url)
    response = requests.get(url, headers={'referer': my_referer, 'User-Agent':UserAgent}, verify=False, timeout=120)
    if response.status_code == 200:
        response = response.json()
        feat_count = response['count']
        return feat_count
    else:
        print("--Error in getting Featurecount for layer:", layername)
        return 0

In [None]:
# Scrape Layers
for _url in Layers:
    # Get Metadata
    metadata = getmetadata(_url)
    if not metadata == 0:
        # Metadata to variables
        layername = metadata["name"]
        layer_folder = os.path.join(output_folder, layername)
        #check if outfolder exists if not create it
        if not os.path.exists(layer_folder):
            os.makedirs(layer_folder)
        _max_rec = int(metadata['maxRecordCount'])#/2)
        idfield = metadata["fields"][0]['name']
        feat_count = getfeatcount(_url)
        #Start scraping From server
        print("---Starting to scrape data for layer:", layername)
        offset = 0
        i = 1
        while offset < feat_count: #loop
            max_rec = _max_rec
            scraped = 0 
            while scraped == 0 and max_rec >= 1:
                #Query Server
                url = _url + '/query?resultOffset=' +  str(offset) + '&resultRecordCount=' + str(max_rec) + '&f=' + geo_frmt + '&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true'
                print("---",url)
                #get Data to Memory
                status_code = 0
                try:
                    response = requests.get(url, headers={'referer': my_referer, 'User-Agent':UserAgent}, verify=False, timeout=240)
                    status_code = response.status_code
                except:
                    pass
                err_code = 0
                try:
                    err_code = response.json()['error']['code']
                except:
                    pass
                print("--Status Code: ",status_code," | error code:",err_code)
                if status_code == 200 and err_code == 0:
                    #save json
                    response = response.json()
                    out_file = os.path.join(layer_folder, 'query-' + str(i) + '.json')
                    with open(out_file, 'w') as outfile:
                        json.dump(response, outfile)
                    #all done
                    scraped = 1
                    #Increase variable for next loop
                    i+=1
                    offset+=max_rec
                    print ("---Scraping ",layername," now progress: ", min(offset, feat_count), " Scrapped out of Total ", feat_count)
                else:                            
                    print("---Error in Scraping ", layername, " | Url: ", url, " | Rescraping ")
                    max_rec = int(max_rec / 2)
        print("\n")

--Getting Metadata fo layer with url https://webgis.nic.in/publishing/rest/services/bharatmaps/admin17new/MapServer/9?f=pjson




---Starting to scrape data for layer: Villages
--- https://webgis.nic.in/publishing/rest/services/bharatmaps/admin17new/MapServer/9/query?resultOffset=0&resultRecordCount=1000&f=geojson&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true




--Status Code:  200  | error code: 0
---Scraping  Villages  now progress:  1000  Scrapped out of Total  575265
--- https://webgis.nic.in/publishing/rest/services/bharatmaps/admin17new/MapServer/9/query?resultOffset=1000&resultRecordCount=1000&f=geojson&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true




--Status Code:  200  | error code: 0
---Scraping  Villages  now progress:  2000  Scrapped out of Total  575265
--- https://webgis.nic.in/publishing/rest/services/bharatmaps/admin17new/MapServer/9/query?resultOffset=2000&resultRecordCount=1000&f=geojson&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true




--Status Code:  0  | error code: 0
---Error in Scraping  Villages  | Url:  https://webgis.nic.in/publishing/rest/services/bharatmaps/admin17new/MapServer/9/query?resultOffset=2000&resultRecordCount=1000&f=geojson&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true  | Rescraping 
--- https://webgis.nic.in/publishing/rest/services/bharatmaps/admin17new/MapServer/9/query?resultOffset=2000&resultRecordCount=500&f=geojson&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true


