In [7]:
#Import Libraries
import numpy as np
import pandas as pd
import geopandas as gpd
import json
import requests
import math
import os
import time
In [8]:
# Import Folders
data_folder = os.path.abspath("data")
output_folder = os.path.abspath("output")
#check if outfolder exists if not create it
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
In [9]:
#Declarations
req_frmt = 'pjson'
geo_frmt = 'geojson'
crs = {'init': 'epsg:4326'}

#Playing with headers
my_referer = 'http://www.myplan.ie/'
UserAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'

#layers
Layers = ['https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/0',
          'https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/1',
          'https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/2',
          'https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/3',
          'https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/4',
          'https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/5',
          'https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/6',
          'https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/7',
         ]

#Skip layers by this
Layers = Layers[0:]
In [10]:
def getmetadata(_url):
    url = _url + '?f=' + req_frmt    
    print("--Getting Metadata fo layer with url", url)
    response = requests.get(url, headers={'referer': my_referer, 'User-Agent':UserAgent}, timeout=10)
    if response.status_code == 200:
        metadata = response.json()
        return metadata
    else:
        print("--Error in getting Metadata for layer")
        return 0
    
def getfeatcount(_url):
    query = 'where=1%3D1&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&returnIdsOnly=false&returnCountOnly=true&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&resultOffset=&resultRecordCount='
    url = _url + '/query?' + query + '&f=' + req_frmt
    response = requests.get(url, headers={'referer': my_referer, 'User-Agent':UserAgent}, timeout=120)
    if response.status_code == 200:
        response = response.json()
        feat_count = response['count']
        return feat_count
    else:
        print("--Error in getting Featurecount for layer:", layername)
        return 0
In [ ]:
# Scrape Layers
for _url in Layers:
    # Get Metadata
    metadata = getmetadata(_url)
    if not metadata == 0:
        # Metadata to variables
        layername = metadata["name"]
        layer_folder = os.path.join(output_folder, layername)
        #check if outfolder exists if not create it
        if not os.path.exists(layer_folder):
            os.makedirs(layer_folder)
        _max_rec = int(metadata['maxRecordCount'])#/2)
        idfield = metadata["fields"][0]['name']
        feat_count = getfeatcount(_url)
        #Start scraping From server
        print("---Starting to scrape data for layer:", layername)
        offset = 0
        i = 1
        while offset < feat_count: #loop
            max_rec = _max_rec
            scraped = 0 
            while scraped == 0 and max_rec >= 1:
                #Query Server
                url = _url + '/query?resultOffset=' +  str(offset) + '&resultRecordCount=' + str(max_rec) + '&f=' + geo_frmt + '&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true'
                print("---",url)
                #get Data to Memory
                status_code = 0
                try:
                    response = requests.get(url, headers={'referer': my_referer, 'User-Agent':UserAgent}, timeout=240)
                    status_code = response.status_code
                except:
                    pass
                err_code = 0
                try:
                    err_code = response.json()['error']['code']
                except:
                    pass
                print("--Status Code: ",status_code," | error code:",err_code)
                if status_code == 200 and err_code == 0:
                    #save json
                    response = response.json()
                    out_file = os.path.join(layer_folder, 'query-' + str(i) + '.json')
                    with open(out_file, 'w') as outfile:
                        json.dump(response, outfile)
                    #all done
                    scraped = 1
                    #Increase variable for next loop
                    i+=1
                    offset+=max_rec
                    print ("---Scraping ",layername," now progress: ", min(offset, feat_count), " Scrapped out of Total ", feat_count)
                else:                            
                    print("---Error in Scraping ", layername, " | Url: ", url, " | Rescraping ")
                    max_rec = int(max_rec / 2)
        print("\n")
--Getting Metadata fo layer with url https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/0?f=pjson
---Starting to scrape data for layer: Regional Assemblies (NUTS II)
--- https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/0/query?resultOffset=0&resultRecordCount=1000&f=geojson&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true
--Status Code:  200  | error code: 0
---Scraping  Regional Assemblies (NUTS II)  now progress:  2  Scrapped out of Total  2


--Getting Metadata fo layer with url https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/1?f=pjson
---Starting to scrape data for layer: Regional Authorities (NUTS III)
--- https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/1/query?resultOffset=0&resultRecordCount=1000&f=geojson&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true
--Status Code:  200  | error code: 0
---Scraping  Regional Authorities (NUTS III)  now progress:  9  Scrapped out of Total  9


--Getting Metadata fo layer with url https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/2?f=pjson
---Starting to scrape data for layer: Provinces
--- https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/2/query?resultOffset=0&resultRecordCount=1000&f=geojson&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true
--Status Code:  200  | error code: 0
---Scraping  Provinces  now progress:  4  Scrapped out of Total  4


--Getting Metadata fo layer with url https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/3?f=pjson
---Starting to scrape data for layer: Counties (NUTS IV)
--- https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/3/query?resultOffset=0&resultRecordCount=1000&f=geojson&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true
--Status Code:  200  | error code: 0
---Scraping  Counties (NUTS IV)  now progress:  27  Scrapped out of Total  27


--Getting Metadata fo layer with url https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/4?f=pjson
---Starting to scrape data for layer: Gaeltacht Areas
--- https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/4/query?resultOffset=0&resultRecordCount=1000&f=geojson&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true
--Status Code:  200  | error code: 0
---Scraping  Gaeltacht Areas  now progress:  9  Scrapped out of Total  9


--Getting Metadata fo layer with url https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/5?f=pjson
---Starting to scrape data for layer: Electoral Divisions (NUTS V)
--- https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/5/query?resultOffset=0&resultRecordCount=1000&f=geojson&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true
--Status Code:  500  | error code: 0
---Error in Scraping  Electoral Divisions (NUTS V)  | Url:  https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/5/query?resultOffset=0&resultRecordCount=1000&f=geojson&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true  | Rescraping 
--- https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/5/query?resultOffset=0&resultRecordCount=500&f=geojson&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true
--Status Code:  200  | error code: 0
---Scraping  Electoral Divisions (NUTS V)  now progress:  500  Scrapped out of Total  3441
--- https://maps.housing.gov.ie/arcgis/rest/services/MyPlan/AdministrativeBoundaries/MapServer/5/query?resultOffset=500&resultRecordCount=1000&f=geojson&where=1%3D1&outFields=*&returnGeometry=true&outSR=4326&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&returnTrueCurves=true