From 23508dba69113fe1f28eee2c4ca6c54b3b1cb761 Mon Sep 17 00:00:00 2001 From: Dan Howe Date: Mon, 16 Sep 2019 13:19:30 +1000 Subject: [PATCH] Process multiple stations --- port-authority-nsw/port_authority_nsw.py | 77 +++++++++++++++--------- 1 file changed, 49 insertions(+), 28 deletions(-) diff --git a/port-authority-nsw/port_authority_nsw.py b/port-authority-nsw/port_authority_nsw.py index 05b2a9e..607f3be 100644 --- a/port-authority-nsw/port_authority_nsw.py +++ b/port-authority-nsw/port_authority_nsw.py @@ -35,10 +35,30 @@ import requests import pandas as pd from lxml import html -# Set station as Fort Denison tide -stn_id = 16 - output_dir = 'csv' +stn_ids = [ + 2, # Offshore (Directional) Wave + 3, # Bombora (Directional) Wave + 4, # Captain Cook Channel (SG) Wave + 5, # Kurnell (SG) Wave + 6, # Molineaux Point Wind + 7, # Sydney Airport (Main Runway BOM) Wind + 8, # Brotherson Emergency Response Jetty Tide + 9, # Caltex (Directional) Current + 12, # Western Wedding Cake Wind + 13, # Fort Denison (Sth end BOM) Wind + 14, # Overseas Passenger Terminal Wind + 15, # Glebe Island Wind + 16, # Fort Denison-Primary (Nth end) Tide + 17, # Fort Denison-Secondary (Vegapuls64) Tide + 18, # Circular Quay ADCP Current + 19, # Balls Head Current + 22, # Twofold Bay - Munganno Point Wave + 23, # Twofold Bay - Multipurpose Wharf Wind + 24, # Breakwater Wharf Wind + 27, # Middle Wall (Vegapulse WL61) Tide + 28, # Goodwood (Vegapulse WL61) Tide +] def update_master(output_dir, csv_name, df): @@ -77,28 +97,29 @@ url = 'http://wavewindtide.portauthoritynsw.com.au/' page = requests.get(url) tree = html.fromstring(page.content) -# Get elements from selected station -t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate') -meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary') -description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle') - -# Parse column names -text = re.split(':\s|(m|s|knots|deg)\s', meas.text + ' ') -parts = [t for t in text if t] -parameters = [p.strip() for p in parts[::3]] -values = [float(p) for p in parts[1::3]] -units = parts[2::3] -columns = [f'{p} ({u})' for p, u in zip(parameters, units)] - -# Parse time -time = re.search('at ([0-9]{4})', t_raw.text).group(1) -date = t_raw.text.split(',')[1].strip() -t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M') - -# Create dataframe -df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t]) -df.index.name = 'datetime' - -# Update master dataframe -csv_name = description.text + '.csv' -update_master(output_dir, csv_name, df) +for stn_id in stn_ids: + # Get elements from selected station + t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate') + meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary') + description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle') + + # Parse column names + text = re.split(':\s|(m|s|knots|deg)\s', meas.text + ' ') + parts = [t for t in text if t] + parameters = [p.strip() for p in parts[::3]] + values = [float(p) for p in parts[1::3]] + units = parts[2::3] + columns = [f'{p} ({u})' for p, u in zip(parameters, units)] + + # Parse time + time = re.search('at ([0-9]{4})', t_raw.text).group(1) + date = t_raw.text.split(',')[1].strip() + t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M') + + # Create dataframe + df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t]) + df.index.name = 'datetime' + + # Update master dataframe + csv_name = description.text + '.csv' + update_master(output_dir, csv_name, df)