|
|
|
@ -249,6 +249,12 @@ def extract_records(input_dir, output_dir, clean_up=False):
|
|
|
|
|
# Prepare output directory
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
# Create master dataframe
|
|
|
|
|
periods = ['all', 'weekly']
|
|
|
|
|
master = {}
|
|
|
|
|
for period in periods:
|
|
|
|
|
master[period] = pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
for zip_name in tqdm(zip_names):
|
|
|
|
|
# Skip duplicate downloads
|
|
|
|
|
if re.search('\([0-9]+\)', zip_name):
|
|
|
|
@ -278,13 +284,8 @@ def extract_records(input_dir, output_dir, clean_up=False):
|
|
|
|
|
bore_id = re.search('^\S+', meta).group()
|
|
|
|
|
site, hole, pipe = bore_id.split('.')
|
|
|
|
|
|
|
|
|
|
# Add bore specifics to dataframe
|
|
|
|
|
df['Site'] = site
|
|
|
|
|
df['Hole'] = hole
|
|
|
|
|
df['Pipe'] = pipe
|
|
|
|
|
df['Lat'] = lat
|
|
|
|
|
df['Lon'] = lon
|
|
|
|
|
df['Elev'] = elev
|
|
|
|
|
# FIXME: detect basin automatically
|
|
|
|
|
basin_id = 'MB'
|
|
|
|
|
|
|
|
|
|
# Rename columns
|
|
|
|
|
df = df.rename(
|
|
|
|
@ -296,23 +297,47 @@ def extract_records(input_dir, output_dir, clean_up=False):
|
|
|
|
|
|
|
|
|
|
# Select output columns
|
|
|
|
|
df = df[[
|
|
|
|
|
'Site',
|
|
|
|
|
'Hole',
|
|
|
|
|
'Pipe',
|
|
|
|
|
'Date time',
|
|
|
|
|
'Below Measuring Point',
|
|
|
|
|
'Above Sea Level',
|
|
|
|
|
'Lat',
|
|
|
|
|
'Lon',
|
|
|
|
|
'Elev',
|
|
|
|
|
]]
|
|
|
|
|
|
|
|
|
|
# Get csv name from zip archive
|
|
|
|
|
csv_name = os.path.join(output_dir, zip_name.replace('.zip', '.csv'))
|
|
|
|
|
# Set date index for resampling
|
|
|
|
|
df.index = df['Date time']
|
|
|
|
|
|
|
|
|
|
# Export to csv
|
|
|
|
|
df.to_csv(csv_name, float_format='%0.3f')
|
|
|
|
|
# Append to master dataframe
|
|
|
|
|
for period in periods:
|
|
|
|
|
if period == 'weekly':
|
|
|
|
|
# Resample to weekly timestamps
|
|
|
|
|
df = df.resample('1w').mean()
|
|
|
|
|
df['Date time'] = df.index
|
|
|
|
|
|
|
|
|
|
# Add bore specifics to dataframe
|
|
|
|
|
df['Site'] = site
|
|
|
|
|
df['Hole'] = hole
|
|
|
|
|
df['Pipe'] = pipe
|
|
|
|
|
df['Lat'] = lat
|
|
|
|
|
df['Lon'] = lon
|
|
|
|
|
df['Elev'] = elev
|
|
|
|
|
df['Basin'] = basin_id
|
|
|
|
|
|
|
|
|
|
master[period] = pd.concat([master[period], df])
|
|
|
|
|
|
|
|
|
|
if clean_up:
|
|
|
|
|
# Remove original zip archive
|
|
|
|
|
os.remove(os.path.join(input_dir, zip_name))
|
|
|
|
|
|
|
|
|
|
for period in periods:
|
|
|
|
|
# Set column order
|
|
|
|
|
master[period] = master[period][[
|
|
|
|
|
'Date time', 'Basin', 'Site', 'Hole', 'Pipe',
|
|
|
|
|
'Below Measuring Point', 'Above Sea Level', 'Lat', 'Lon', 'Elev'
|
|
|
|
|
]]
|
|
|
|
|
|
|
|
|
|
# Get latest date from dataframe
|
|
|
|
|
latest_date = master[period]['Date time'].iloc[-1].strftime('%Y-%m-%d')
|
|
|
|
|
csv_name = os.path.join(
|
|
|
|
|
output_dir, '{}-{}-{}.csv'.format(basin_id, latest_date, period))
|
|
|
|
|
|
|
|
|
|
# Export to csv
|
|
|
|
|
master[period].to_csv(csv_name, index=False, float_format='%0.3f')
|
|
|
|
|