Allow download directory to not be specified

master
Dan Howe 6 years ago
parent 4529dbdc28
commit fec0e858c6

@ -25,19 +25,12 @@ https://github.com/mozilla/geckodriver/releases
```python
from waternsw_grabber import telemetered_bore_downloader, extract_records
download_dir = 'downloads'
output_dir = 'csv'
start_date = '1900-01-01'
end_date = '2019-02-14'
bore_ids = [
'GW039102.1.1',
'GW039114.1.1',
'GW039117.1.1',
]
basin_name = 'Bega River Basin'
download_dir = '.'
# Download bore records
telemetered_bore_downloader(bore_ids, start_date, end_date, download_dir)
telemetered_bore_downloader(basin_name, download_dir)
# Extract zip archives
extract_records(download_dir, output_dir, clean_up=False)
extract_records(basin_name, download_dir)
```

@ -258,7 +258,10 @@ def telemetered_bore_downloader(basin_name,
bore_ids = basins[basins['Basin name'] == basin_name].index.values
# Add basin name to root download directory
download_dir = os.path.join(download_dir, basin_name)
if not download_dir:
download_dir = basin_name
else:
download_dir = os.path.join(download_dir, basin_name)
# Open browser
driver = open_browser(download_dir)
@ -270,7 +273,7 @@ def telemetered_bore_downloader(basin_name,
# Download bore logs
pbar = tqdm(bore_ids)
for bore_id in pbar:
pbar.set_description(bore_id)
pbar.set_description('Downloading {}'.format(bore_id))
try:
get_telemetered_bore(driver, bore_id, start_date, end_date)
except ValueError as e:
@ -315,7 +318,7 @@ def get_basins():
return basins
def extract_definitions(basin_name, download_dir):
def extract_definitions(basin_name, download_dir=None):
"""Extract variable and quality metadata from bore records.
Args:
@ -326,8 +329,13 @@ def extract_definitions(basin_name, download_dir):
# Get basin info for telemetered site data
basins = get_basins()
# Check if download directory was provided
if not download_dir:
output_dir = basin_name
else:
output_dir = os.path.join(download_dir, basin_name)
# Prepare output directory
output_dir = os.path.join(download_dir, basin_name)
os.makedirs(output_dir, exist_ok=True)
# Find zip files
@ -449,7 +457,7 @@ def extract_definitions(basin_name, download_dir):
return sites
def extract_records(basin_name, download_dir, clean_up=False):
def extract_records(basin_name, download_dir=None, clean_up=False):
"""Extract downloaded bore records.
Args:
@ -464,8 +472,13 @@ def extract_records(basin_name, download_dir, clean_up=False):
# Keep unique basin codes
basin_codes = sites['Basin code'].unique()
# Find zip files
output_dir = os.path.join(download_dir, basin_name)
# Check if download directory was provided
if not download_dir:
output_dir = basin_name
else:
output_dir = os.path.join(download_dir, basin_name)
# List zip files
zip_names = [f for f in os.listdir(output_dir) if f.endswith('.zip')]
# Prepare output directory
@ -479,7 +492,9 @@ def extract_records(basin_name, download_dir, clean_up=False):
for period in periods:
master[basin_code][period] = pd.DataFrame()
for zip_name in tqdm(zip_names):
pbar = tqdm(zip_names)
for zip_name in pbar:
pbar.set_description('Extracting {}'.format(zip_name))
# Skip duplicate downloads
if re.search(r'\([0-9]+\)', zip_name):
continue

Loading…
Cancel
Save