From 6d2e0f89c5480d599d53d490fac355ff0e2c4d41 Mon Sep 17 00:00:00 2001 From: Daniel Howe Date: Wed, 13 Mar 2019 07:08:46 +1100 Subject: [PATCH] Download by basin, rather then by bore --- waternsw_grabber/waternsw_grabber.py | 46 ++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/waternsw_grabber/waternsw_grabber.py b/waternsw_grabber/waternsw_grabber.py index 6093814..7865cfc 100644 --- a/waternsw_grabber/waternsw_grabber.py +++ b/waternsw_grabber/waternsw_grabber.py @@ -218,18 +218,43 @@ def open_browser(download_dir): return driver -def telemetered_bore_downloader(bore_ids, start_date, end_date, download_dir): +def telemetered_bore_downloader(basin, + download_dir=None, + start_date=None, + end_date=None): """Download multiple records from telemetered bore. Args: - bore_ids: bore ID values (array-like) + basin: basin name or code (string) + download_dir: path to where downloaded files will be saved start_date: start date (string YYYY-MM-DD format) end_date: end date (string YYYY-MM-DD format) - download_dir: path to where downloaded files will be saved Raises: ValueError when bore ID is invalid """ + + # Get full name of basin + basins = get_basins() + + # Check if full basin name was provided + if basin in basins['Basin name'].values: + basin_name = basin + else: + try: + # Check ift two-letter basin code was provided + basin_name = basins.groupby('Basin code').first().loc[basin][ + 'Basin name'] + except KeyError: + raise KeyError('Basin {} not found'.format(basin)) + + # Get list of bore IDs from selected basin + bore_ids = basins[basins['Basin name'] == basin_name].index.values + + # Add basin name to root download directory + download_dir = os.path.join(download_dir, basin_name) + + # Open browser driver = open_browser(download_dir) # Set up log File @@ -270,6 +295,17 @@ def telemetered_bore_downloader(bore_ids, start_date, end_date, download_dir): driver.quit() +def get_basins(): + """Load basin definitions file.""" + + # Get basin info for telemetered site data + csv_name = os.path.join( + os.path.dirname(__file__), 'data', 'telemetered-sites.csv') + basins = pd.read_csv(csv_name, index_col=0) + + return basins + + def extract_definitions(input_dir, output_dir): """Extract variable and quality metadata from bore records. @@ -279,9 +315,7 @@ def extract_definitions(input_dir, output_dir): """ # Get basin info for telemetered site data - csv_name = os.path.join( - os.path.dirname(__file__), 'data', 'telemetered-sites.csv') - basins = pd.read_csv(csv_name, index_col=0) + basins = get_basins() # Find zip files zip_names = [f for f in os.listdir(input_dir) if f.endswith('.zip')]