Add basin code and bore details to output

master
Dan Howe 6 years ago
parent 6f66bde339
commit 511de588aa

@ -363,6 +363,9 @@ def extract_definitions(input_dir, output_dir):
variables.to_csv(csv_name_v)
qualities.to_csv(csv_name_q)
sites = sites[~sites.index.duplicated(keep='first')]
return sites
def extract_records(input_dir, output_dir, clean_up=False):
"""Extract downloaded bore records.
@ -374,26 +377,14 @@ def extract_records(input_dir, output_dir, clean_up=False):
"""
# Update definition tables
extract_definitions(input_dir, output_dir)
sites = extract_definitions(input_dir, output_dir)
# Get basin info for telemetered site data
csv_name = os.path.join(
os.path.dirname(os.path.dirname(__file__)), 'data',
'telemetered-sites.csv')
basins = pd.read_csv(csv_name, index_col=0)
# Keep unique basin codes
basin_codes = sites['Basin code'].unique()
# Find zip files
zip_names = [f for f in os.listdir(input_dir) if f.endswith('.zip')]
# Get basin IDs for all zip files
basin_codes = []
for zip_name in zip_names:
bore_id = os.path.splitext(zip_name)[0].replace('cf', '')
basin_codes.append(basins.loc[bore_id, 'Basin code'])
# Keep uniue basin codes
basin_codes = list(set(basin_codes))
# Prepare output directory
os.makedirs(output_dir, exist_ok=True)
@ -441,13 +432,15 @@ def extract_records(input_dir, output_dir, clean_up=False):
parse_dates=['Date'],
index_col=['Date'],
compression='zip',
dayfirst=True,
nrows=100)
dayfirst=True)
# FIXME: convert quality codes to integers
# Convert quality codes to integers
for col in df.columns:
if 'Quality' in col:
df[col] = df[col].astype(int)
# Update column names
df.columns = columns + ['Metadata']
df.columns = columns + ['Metadata']
# Get bore specifics
meta = df['Metadata'].iloc[1]
@ -456,21 +449,31 @@ def extract_records(input_dir, output_dir, clean_up=False):
df = df.drop(columns='Metadata')
# Get basin ID
basin_code = basins.loc[bore_id, 'Basin code']
basin_code = sites.loc[bore_id, 'Basin code']
# Append to master dataframe
# Resample if necessary
for period in periods:
if period == 'daily':
# Resample to daily timestamps
df = df.resample('1d').mean()
# FIXME: add bore IDs
elif period == 'weekly':
# Resample to weekly timestamps
df = df.resample('1w').mean()
# FIXME: add bore IDs
master[basin_code][period] = pd.concat([master[basin_code][period], df])
# Add specific borehole details
df['Site'] = sites.loc[bore_id, 'Site']
df['Hole'] = sites.loc[bore_id, 'Hole']
df['Pipe'] = sites.loc[bore_id, 'Pipe']
df['Basin'] = sites.loc[bore_id, 'Basin code']
df = df[['Site', 'Hole', 'Pipe', 'Basin'] + columns]
# Remove empty rows
df = df.dropna()
# Add to master dataframe
master[basin_code][period] = pd.concat(
[master[basin_code][period], df])
if clean_up:
# Remove original zip archive
@ -479,9 +482,12 @@ def extract_records(input_dir, output_dir, clean_up=False):
for basin_code in basin_codes:
for period in periods:
# Get latest date from dataframe
latest_date = master[basin_code][period].index[-1].strftime('%Y-%m-%d')
latest_date = master[basin_code][period].index[-1].strftime(
'%Y-%m-%d')
csv_name = os.path.join(
output_dir, '{}-{}-{}.csv'.format(basin_code, latest_date, period))
output_dir, '{}-{}-{}.csv'.format(basin_code, latest_date,
period))
# Export to csv
master[basin_code][period].to_csv(csv_name, index=True, float_format='%0.3f')
master[basin_code][period].to_csv(
csv_name, index=True, float_format='%0.3f')

Loading…
Cancel
Save