Process multiple stations

6 years ago · 23508dba69
parent 14e6a9021f
commit 23508dba69
1 changed files with 49 additions and 28 deletions
--- a/port-authority-nsw/port_authority_nsw.py
+++ b/port-authority-nsw/port_authority_nsw.py
@ -35,10 +35,30 @@ import requests
 import pandas as pd
 from lxml import html

-# Set station as Fort Denison tide
-stn_id = 16
-
 output_dir = 'csv'
+stn_ids = [
+    2,   # Offshore (Directional) Wave
+    3,   # Bombora (Directional) Wave
+    4,   # Captain Cook Channel (SG) Wave
+    5,   # Kurnell (SG) Wave
+    6,   # Molineaux Point Wind
+    7,   # Sydney Airport (Main Runway BOM) Wind
+    8,   # Brotherson Emergency Response Jetty Tide
+    9,   # Caltex (Directional) Current
+    12,  # Western Wedding Cake Wind
+    13,  # Fort Denison (Sth end BOM) Wind
+    14,  # Overseas Passenger Terminal Wind
+    15,  # Glebe Island Wind
+    16,  # Fort Denison-Primary (Nth end) Tide
+    17,  # Fort Denison-Secondary (Vegapuls64) Tide
+    18,  # Circular Quay ADCP Current
+    19,  # Balls Head Current
+    22,  # Twofold Bay - Munganno Point Wave
+    23,  # Twofold Bay - Multipurpose Wharf Wind
+    24,  # Breakwater Wharf Wind
+    27,  # Middle Wall (Vegapulse WL61) Tide
+    28,  # Goodwood (Vegapulse WL61) Tide
+]


 def update_master(output_dir, csv_name, df):
@ -77,28 +97,29 @@ url = 'http://wavewindtide.portauthoritynsw.com.au/'
 page = requests.get(url)
 tree = html.fromstring(page.content)

-# Get elements from selected station
-t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate')
-meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary')
-description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle')
-
-# Parse column names
-text = re.split(':\s|(m|s|knots|deg)\s', meas.text + ' ')
-parts = [t for t in text if t]
-parameters = [p.strip() for p in parts[::3]]
-values = [float(p) for p in parts[1::3]]
-units = parts[2::3]
-columns = [f'{p} ({u})' for p, u in zip(parameters, units)]
-
-# Parse time
-time = re.search('at ([0-9]{4})', t_raw.text).group(1)
-date = t_raw.text.split(',')[1].strip()
-t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M')
-
-# Create dataframe
-df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t])
-df.index.name = 'datetime'
-
-# Update master dataframe
-csv_name = description.text + '.csv'
-update_master(output_dir, csv_name, df)
+for stn_id in stn_ids:
+    # Get elements from selected station
+    t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate')
+    meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary')
+    description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle')
+
+    # Parse column names
+    text = re.split(':\s|(m|s|knots|deg)\s', meas.text + ' ')
+    parts = [t for t in text if t]
+    parameters = [p.strip() for p in parts[::3]]
+    values = [float(p) for p in parts[1::3]]
+    units = parts[2::3]
+    columns = [f'{p} ({u})' for p, u in zip(parameters, units)]
+
+    # Parse time
+    time = re.search('at ([0-9]{4})', t_raw.text).group(1)
+    date = t_raw.text.split(',')[1].strip()
+    t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M')
+
+    # Create dataframe
+    df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t])
+    df.index.name = 'datetime'
+
+    # Update master dataframe
+    csv_name = description.text + '.csv'
+    update_master(output_dir, csv_name, df)