From fec0e858c63f4a922519ba4046bbe3752904d3f3 Mon Sep 17 00:00:00 2001 From: Dan Howe Date: Mon, 18 Mar 2019 22:23:34 +1100 Subject: [PATCH] Allow download directory to not be specified --- README.md | 15 ++++---------- waternsw_grabber/waternsw_grabber.py | 31 +++++++++++++++++++++------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 16bc0eb..c82d236 100644 --- a/README.md +++ b/README.md @@ -25,19 +25,12 @@ https://github.com/mozilla/geckodriver/releases ```python from waternsw_grabber import telemetered_bore_downloader, extract_records -download_dir = 'downloads' -output_dir = 'csv' -start_date = '1900-01-01' -end_date = '2019-02-14' -bore_ids = [ - 'GW039102.1.1', - 'GW039114.1.1', - 'GW039117.1.1', -] +basin_name = 'Bega River Basin' +download_dir = '.' # Download bore records -telemetered_bore_downloader(bore_ids, start_date, end_date, download_dir) +telemetered_bore_downloader(basin_name, download_dir) # Extract zip archives -extract_records(download_dir, output_dir, clean_up=False) +extract_records(basin_name, download_dir) ``` diff --git a/waternsw_grabber/waternsw_grabber.py b/waternsw_grabber/waternsw_grabber.py index b8bdc50..a0a6038 100644 --- a/waternsw_grabber/waternsw_grabber.py +++ b/waternsw_grabber/waternsw_grabber.py @@ -258,7 +258,10 @@ def telemetered_bore_downloader(basin_name, bore_ids = basins[basins['Basin name'] == basin_name].index.values # Add basin name to root download directory - download_dir = os.path.join(download_dir, basin_name) + if not download_dir: + download_dir = basin_name + else: + download_dir = os.path.join(download_dir, basin_name) # Open browser driver = open_browser(download_dir) @@ -270,7 +273,7 @@ def telemetered_bore_downloader(basin_name, # Download bore logs pbar = tqdm(bore_ids) for bore_id in pbar: - pbar.set_description(bore_id) + pbar.set_description('Downloading {}'.format(bore_id)) try: get_telemetered_bore(driver, bore_id, start_date, end_date) except ValueError as e: @@ -315,7 +318,7 @@ def get_basins(): return basins -def extract_definitions(basin_name, download_dir): +def extract_definitions(basin_name, download_dir=None): """Extract variable and quality metadata from bore records. Args: @@ -326,8 +329,13 @@ def extract_definitions(basin_name, download_dir): # Get basin info for telemetered site data basins = get_basins() + # Check if download directory was provided + if not download_dir: + output_dir = basin_name + else: + output_dir = os.path.join(download_dir, basin_name) + # Prepare output directory - output_dir = os.path.join(download_dir, basin_name) os.makedirs(output_dir, exist_ok=True) # Find zip files @@ -449,7 +457,7 @@ def extract_definitions(basin_name, download_dir): return sites -def extract_records(basin_name, download_dir, clean_up=False): +def extract_records(basin_name, download_dir=None, clean_up=False): """Extract downloaded bore records. Args: @@ -464,8 +472,13 @@ def extract_records(basin_name, download_dir, clean_up=False): # Keep unique basin codes basin_codes = sites['Basin code'].unique() - # Find zip files - output_dir = os.path.join(download_dir, basin_name) + # Check if download directory was provided + if not download_dir: + output_dir = basin_name + else: + output_dir = os.path.join(download_dir, basin_name) + + # List zip files zip_names = [f for f in os.listdir(output_dir) if f.endswith('.zip')] # Prepare output directory @@ -479,7 +492,9 @@ def extract_records(basin_name, download_dir, clean_up=False): for period in periods: master[basin_code][period] = pd.DataFrame() - for zip_name in tqdm(zip_names): + pbar = tqdm(zip_names) + for zip_name in pbar: + pbar.set_description('Extracting {}'.format(zip_name)) # Skip duplicate downloads if re.search(r'\([0-9]+\)', zip_name): continue