Process multiple stations

7 years ago · 23508dba69
parent 14e6a9021f
commit 23508dba69
1 changed files with 49 additions and 28 deletions
--- a/port-authority-nsw/port_authority_nsw.py
+++ b/port-authority-nsw/port_authority_nsw.py
@ -35,10 +35,30 @@ import requests
 import pandas as pd
 from lxml import html
 # Set station as Fort Denison tide
 stn_id = 16
 output_dir = 'csv'
 stn_ids = [
    2,   # Offshore (Directional) Wave
    3,   # Bombora (Directional) Wave
    4,   # Captain Cook Channel (SG) Wave
    5,   # Kurnell (SG) Wave
    6,   # Molineaux Point Wind
    7,   # Sydney Airport (Main Runway BOM) Wind
    8,   # Brotherson Emergency Response Jetty Tide
    9,   # Caltex (Directional) Current
    12,  # Western Wedding Cake Wind
    13,  # Fort Denison (Sth end BOM) Wind
    14,  # Overseas Passenger Terminal Wind
    15,  # Glebe Island Wind
    16,  # Fort Denison-Primary (Nth end) Tide
    17,  # Fort Denison-Secondary (Vegapuls64) Tide
    18,  # Circular Quay ADCP Current
    19,  # Balls Head Current
    22,  # Twofold Bay - Munganno Point Wave
    23,  # Twofold Bay - Multipurpose Wharf Wind
    24,  # Breakwater Wharf Wind
    27,  # Middle Wall (Vegapulse WL61) Tide
    28,  # Goodwood (Vegapulse WL61) Tide
 ]
 def update_master(output_dir, csv_name, df):
@ -77,28 +97,29 @@ url = 'http://wavewindtide.portauthoritynsw.com.au/'
 page = requests.get(url)
 tree = html.fromstring(page.content)
-# Get elements from selected station
+for stn_id in stn_ids:
-t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate')
+    # Get elements from selected station
-meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary')
+    t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate')
-description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle')
+    meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary')
-
+    description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle')
-# Parse column names
+
-text = re.split(':\s|(m|s|knots|deg)\s', meas.text + ' ')
+    # Parse column names
-parts = [t for t in text if t]
+    text = re.split(':\s|(m|s|knots|deg)\s', meas.text + ' ')
-parameters = [p.strip() for p in parts[::3]]
+    parts = [t for t in text if t]
-values = [float(p) for p in parts[1::3]]
+    parameters = [p.strip() for p in parts[::3]]
-units = parts[2::3]
+    values = [float(p) for p in parts[1::3]]
-columns = [f'{p} ({u})' for p, u in zip(parameters, units)]
+    units = parts[2::3]
-
+    columns = [f'{p} ({u})' for p, u in zip(parameters, units)]
-# Parse time
+
-time = re.search('at ([0-9]{4})', t_raw.text).group(1)
+    # Parse time
-date = t_raw.text.split(',')[1].strip()
+    time = re.search('at ([0-9]{4})', t_raw.text).group(1)
-t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M')
+    date = t_raw.text.split(',')[1].strip()
-
+    t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M')
-# Create dataframe
+
-df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t])
+    # Create dataframe
-df.index.name = 'datetime'
+    df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t])
-
+    df.index.name = 'datetime'
-# Update master dataframe
+
-csv_name = description.text + '.csv'
+    # Update master dataframe
-update_master(output_dir, csv_name, df)
+    csv_name = description.text + '.csv'
    update_master(output_dir, csv_name, df)