From b4d722191854ccf87c94a20af3bf89d6b2625d28 Mon Sep 17 00:00:00 2001
From: Dan Howe <d.howe@wrl.unsw.edu.au>
Date: Mon, 16 Sep 2019 09:33:13 +1000
Subject: [PATCH] Add new script

---
 port-authority-nsw/port_authority_nsw.py | 101 +++++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 port-authority-nsw/port_authority_nsw.py

diff --git a/port-authority-nsw/port_authority_nsw.py b/port-authority-nsw/port_authority_nsw.py
new file mode 100644
index 0000000..4709a6d
--- /dev/null
+++ b/port-authority-nsw/port_authority_nsw.py
@@ -0,0 +1,101 @@
+"""Get latest observations from Port Authority of NSW and update local copy.
+
+Station IDs are below:
+
+02: Offshore (Directional) Wave
+03: Bombora (Directional) Wave
+04: Captain Cook Channel (SG) Wave
+05: Kurnell (SG) Wave
+06: Molineaux Point Wind
+07: Sydney Airport (Main Runway BOM) Wind
+08: Brotherson Emergency Response Jetty Tide
+09: Caltex (Directional) Current
+12: Western Wedding Cake Wind
+13: Fort Denison (Sth end BOM) Wind
+14: Overseas Passenger Terminal Wind
+15: Glebe Island Wind
+16: Fort Denison-Primary (Nth end) Tide
+17: Fort Denison-Secondary (Vegapuls64) Tide
+18: Circular Quay ADCP Current
+19: Balls Head Current
+22: Twofold Bay - Munganno Point Wave
+23: Twofold Bay - Multipurpose Wharf Wind
+24: Breakwater Wharf Wind
+27: Middle Wall (Vegapulse WL61) Tide
+28: Goodwood (Vegapulse WL61) Tide
+"""
+
+import os
+import re
+import datetime
+import requests
+import pandas as pd
+from lxml import html
+
+# Set station as Fort Denison tide
+stn_id = 16
+
+output_dir = 'csv'
+
+
+def update_master(output_dir, csv_name, df):
+    """Update master csv time series.
+
+    Args:
+        output_dir (str): path to time series directory
+        csv_name (str):   name of time series file
+        df (dataframe):   dataframe with datetime index
+
+    Returns:
+        None
+    """
+    try:
+        # Load local master table if it exists
+        master = pd.read_csv(os.path.join(output_dir, csv_name),
+                             index_col=0,
+                             parse_dates=True)
+
+        # Only include timestamps that do not already exist
+        df = df[~df.index.isin(master.index)]
+
+        # Update master
+        master = master.append(df)
+
+    except FileNotFoundError:
+        # Create new master table if none exists
+        master = df
+
+    # Export master table
+    master.to_csv(os.path.join(output_dir, csv_name))
+
+
+# Get main page
+url = 'http://wavewindtide.portauthoritynsw.com.au/'
+page = requests.get(url)
+tree = html.fromstring(page.content)
+
+# Get elements from selected station
+t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate')
+meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary')
+description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle')
+
+# Parse column names
+text = re.split(':', meas.text)
+parameters = text[::3]
+entries = [re.split('(\D+)$', t) for t in text[1::3]]
+values = [float(e[0]) for e in entries]
+units = [e[1] for e in entries]
+columns = [f'{p} ({u})' for p, u in zip(parameters, units)]
+
+# Parse time
+time = re.search('at ([0-9]{4})', t_raw.text).group(1)
+date = t_raw.text.split(',')[1].strip()
+t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M')
+
+# Create dataframe
+df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t])
+df.index.name = 'datetime'
+
+# Update master dataframe
+csv_name = description.text + '.csv'
+update_master(output_dir, csv_name, df)