diff --git a/port-authority-nsw/port_authority_nsw.py b/port-authority-nsw/port_authority_nsw.py new file mode 100644 index 0000000..4709a6d --- /dev/null +++ b/port-authority-nsw/port_authority_nsw.py @@ -0,0 +1,101 @@ +"""Get latest observations from Port Authority of NSW and update local copy. + +Station IDs are below: + +02: Offshore (Directional) Wave +03: Bombora (Directional) Wave +04: Captain Cook Channel (SG) Wave +05: Kurnell (SG) Wave +06: Molineaux Point Wind +07: Sydney Airport (Main Runway BOM) Wind +08: Brotherson Emergency Response Jetty Tide +09: Caltex (Directional) Current +12: Western Wedding Cake Wind +13: Fort Denison (Sth end BOM) Wind +14: Overseas Passenger Terminal Wind +15: Glebe Island Wind +16: Fort Denison-Primary (Nth end) Tide +17: Fort Denison-Secondary (Vegapuls64) Tide +18: Circular Quay ADCP Current +19: Balls Head Current +22: Twofold Bay - Munganno Point Wave +23: Twofold Bay - Multipurpose Wharf Wind +24: Breakwater Wharf Wind +27: Middle Wall (Vegapulse WL61) Tide +28: Goodwood (Vegapulse WL61) Tide +""" + +import os +import re +import datetime +import requests +import pandas as pd +from lxml import html + +# Set station as Fort Denison tide +stn_id = 16 + +output_dir = 'csv' + + +def update_master(output_dir, csv_name, df): + """Update master csv time series. + + Args: + output_dir (str): path to time series directory + csv_name (str): name of time series file + df (dataframe): dataframe with datetime index + + Returns: + None + """ + try: + # Load local master table if it exists + master = pd.read_csv(os.path.join(output_dir, csv_name), + index_col=0, + parse_dates=True) + + # Only include timestamps that do not already exist + df = df[~df.index.isin(master.index)] + + # Update master + master = master.append(df) + + except FileNotFoundError: + # Create new master table if none exists + master = df + + # Export master table + master.to_csv(os.path.join(output_dir, csv_name)) + + +# Get main page +url = 'http://wavewindtide.portauthoritynsw.com.au/' +page = requests.get(url) +tree = html.fromstring(page.content) + +# Get elements from selected station +t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate') +meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary') +description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle') + +# Parse column names +text = re.split(':', meas.text) +parameters = text[::3] +entries = [re.split('(\D+)$', t) for t in text[1::3]] +values = [float(e[0]) for e in entries] +units = [e[1] for e in entries] +columns = [f'{p} ({u})' for p, u in zip(parameters, units)] + +# Parse time +time = re.search('at ([0-9]{4})', t_raw.text).group(1) +date = t_raw.text.split(',')[1].strip() +t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M') + +# Create dataframe +df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t]) +df.index.name = 'datetime' + +# Update master dataframe +csv_name = description.text + '.csv' +update_master(output_dir, csv_name, df)