diff --git a/major_projects_grabber/major_projects_grabber.py b/major_projects_grabber/major_projects_grabber.py index 1c79a34..d449d64 100644 --- a/major_projects_grabber/major_projects_grabber.py +++ b/major_projects_grabber/major_projects_grabber.py @@ -3,19 +3,13 @@ Download documents from the NSW DPE Major Projects website. Example usage: # Grab a single project modification using its job id, and save in 'files' - python major_projects_grabber.py -o files -i 1019 + major_projects_grabber -o files -i 1746 # Grab all modifications in search results page, and save in current folder - python major_projects_grabber.py -o . http:// + major_projects_grabber "http://majorprojects.planning.nsw.gov.au/index.pl?action=search&authority_id=547" """ -# search url -# http://majorprojects.planning.nsw.gov.au/index.pl?action=search&page_id=&search=&authority_id=&search_site_type_id=10&reference_table=&status_id=&decider=&from_date=&to_date=&x=31&y=16 - -# mod url -# http://majorprojects.planning.nsw.gov.au/index.pl?action=view_job&job_id=9503 - import os import re import sys @@ -25,6 +19,7 @@ import requests import argparse import pandas as pd from lxml import html +from tqdm import tqdm from requests.exceptions import ConnectionError, InvalidURL @@ -54,7 +49,7 @@ def get_documents(mod_id, output_dir): # Add note if no documents are found on portal if not folders: txt_name = 'No documents on DPE portal for this modification.txt' - open(os.path.join(mod_dir,txt_name), 'a').close() + open(os.path.join(mod_dir, txt_name), 'a').close() # Create link to DPE Major Projects page for current modification text = """ @@ -126,10 +121,10 @@ def main(): example_text = """examples: # Grab a single project modification using its job id, and save in 'files' - python major_projects_grabber.py -o files -i 1019 + major_projects_grabber -i 1746 -o files # Grab all modifications in search results page, and save in current folder - python major_projects_grabber.py -o . http:// + major_projects_grabber http://majorprojects.planning.nsw.gov.au/index.pl?action=search&authority_id=547 """ # Set up command line arguments @@ -168,7 +163,7 @@ def main(): mod_ids.extend(search_mod_ids) # Download documents from given modification ids - for mod_id in mod_ids: + for mod_id in tqdm(mod_ids): get_documents(mod_id, output_dir)