|
|
|
@ -231,3 +231,81 @@ def telemetered_bore_downloader(bore_ids, start_date, end_date, download_dir):
|
|
|
|
|
os.remove(log_name)
|
|
|
|
|
|
|
|
|
|
driver.quit()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_records(input_dir, output_dir):
|
|
|
|
|
"""Extract downloaded bore records.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
input_dir: path to downloaded zip archives
|
|
|
|
|
output_dir: path to save csv files
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
# Find zip files
|
|
|
|
|
zip_names = [f for f in os.listdir(input_dir) if f.endswith('.zip')]
|
|
|
|
|
|
|
|
|
|
# Prepare output directory
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
for zip_name in tqdm(zip_names):
|
|
|
|
|
# Skip duplicate downloads
|
|
|
|
|
if re.search('\([0-9]+\)', zip_name):
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Use '.part' file if zip was not correctly downloaded
|
|
|
|
|
if os.path.getsize(os.path.join(input_dir, zip_name)) == 0:
|
|
|
|
|
zip_name += '.part'
|
|
|
|
|
|
|
|
|
|
# Read csv file inside zip archive
|
|
|
|
|
df = pd.read_csv(
|
|
|
|
|
os.path.join(input_dir, zip_name),
|
|
|
|
|
header=2,
|
|
|
|
|
skiprows=[3],
|
|
|
|
|
parse_dates=['Date'],
|
|
|
|
|
compression='zip',
|
|
|
|
|
dayfirst=True)
|
|
|
|
|
|
|
|
|
|
# Get bore specifics
|
|
|
|
|
meta = df.iloc[1, -1]
|
|
|
|
|
lat = float(re.search('(?<=Lat:)\S+', meta).group())
|
|
|
|
|
lon = float(re.search('(?<=Long:)\S+', meta).group())
|
|
|
|
|
elev = float(re.search('(?<=Elev:).+(?=m)', meta).group())
|
|
|
|
|
address = re.search('(?<=\d\.\d\.\d - ).+(?=\sLat)', meta).group()
|
|
|
|
|
bore_id = re.search('^\S+', meta).group()
|
|
|
|
|
site, hole, pipe = bore_id.split('.')
|
|
|
|
|
|
|
|
|
|
# Add bore specifics to dataframe
|
|
|
|
|
df['Site'] = site
|
|
|
|
|
df['Hole'] = hole
|
|
|
|
|
df['Pipe'] = pipe
|
|
|
|
|
df['Lat'] = lat
|
|
|
|
|
df['Lon'] = lon
|
|
|
|
|
df['Elev'] = elev
|
|
|
|
|
|
|
|
|
|
# Rename columns
|
|
|
|
|
df = df.rename(
|
|
|
|
|
columns={
|
|
|
|
|
'Date': 'Date time',
|
|
|
|
|
'Bore level below MP': 'Below Measuring Point',
|
|
|
|
|
'GW Level - m AHD': 'Above Sea Level'
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# Select output columns
|
|
|
|
|
df = df[[
|
|
|
|
|
'Site',
|
|
|
|
|
'Hole',
|
|
|
|
|
'Pipe',
|
|
|
|
|
'Date time',
|
|
|
|
|
'Below Measuring Point',
|
|
|
|
|
'Above Sea Level',
|
|
|
|
|
'Lat',
|
|
|
|
|
'Lon',
|
|
|
|
|
'Elev',
|
|
|
|
|
]]
|
|
|
|
|
|
|
|
|
|
# Get csv name from zip archive
|
|
|
|
|
zip_name = zip_name.replace('.part', '')
|
|
|
|
|
csv_name = os.path.join(output_dir, zip_name.replace('.zip', 'csv'))
|
|
|
|
|
|
|
|
|
|
# Export to csv
|
|
|
|
|
master.to_csv(csv_name, float_format='%0.3f')
|
|
|
|
|