Add extract_records() function

master
Dan Howe 6 years ago
parent 74eed20c20
commit 6f020db6c6

@ -1 +1,2 @@
from .waternsw_grabber import open_browser, get_telemetered_bore, telemetered_bore_downloader
from .waternsw_grabber import (telemetered_bore_downloader,
extract_records)

@ -231,3 +231,81 @@ def telemetered_bore_downloader(bore_ids, start_date, end_date, download_dir):
os.remove(log_name)
driver.quit()
def extract_records(input_dir, output_dir):
"""Extract downloaded bore records.
Args:
input_dir: path to downloaded zip archives
output_dir: path to save csv files
"""
# Find zip files
zip_names = [f for f in os.listdir(input_dir) if f.endswith('.zip')]
# Prepare output directory
os.makedirs(output_dir, exist_ok=True)
for zip_name in tqdm(zip_names):
# Skip duplicate downloads
if re.search('\([0-9]+\)', zip_name):
continue
# Use '.part' file if zip was not correctly downloaded
if os.path.getsize(os.path.join(input_dir, zip_name)) == 0:
zip_name += '.part'
# Read csv file inside zip archive
df = pd.read_csv(
os.path.join(input_dir, zip_name),
header=2,
skiprows=[3],
parse_dates=['Date'],
compression='zip',
dayfirst=True)
# Get bore specifics
meta = df.iloc[1, -1]
lat = float(re.search('(?<=Lat:)\S+', meta).group())
lon = float(re.search('(?<=Long:)\S+', meta).group())
elev = float(re.search('(?<=Elev:).+(?=m)', meta).group())
address = re.search('(?<=\d\.\d\.\d - ).+(?=\sLat)', meta).group()
bore_id = re.search('^\S+', meta).group()
site, hole, pipe = bore_id.split('.')
# Add bore specifics to dataframe
df['Site'] = site
df['Hole'] = hole
df['Pipe'] = pipe
df['Lat'] = lat
df['Lon'] = lon
df['Elev'] = elev
# Rename columns
df = df.rename(
columns={
'Date': 'Date time',
'Bore level below MP': 'Below Measuring Point',
'GW Level - m AHD': 'Above Sea Level'
})
# Select output columns
df = df[[
'Site',
'Hole',
'Pipe',
'Date time',
'Below Measuring Point',
'Above Sea Level',
'Lat',
'Lon',
'Elev',
]]
# Get csv name from zip archive
zip_name = zip_name.replace('.part', '')
csv_name = os.path.join(output_dir, zip_name.replace('.zip', 'csv'))
# Export to csv
master.to_csv(csv_name, float_format='%0.3f')

Loading…
Cancel
Save