diff --git a/port-authority-nsw/README.md b/port-authority-nsw/README.md index 99678ab..92420eb 100644 --- a/port-authority-nsw/README.md +++ b/port-authority-nsw/README.md @@ -1,4 +1,4 @@ -# pansw_tide.py +# port_authority_nsw.py Download latest time series data from Port Authority of NSW and update local copy. [wavewindtide.portauthoritynsw.com.au](http://wavewindtide.portauthoritynsw.com.au/) diff --git a/port-authority-nsw/pansw_tide.py b/port-authority-nsw/pansw_tide.py deleted file mode 100644 index 4709a6d..0000000 --- a/port-authority-nsw/pansw_tide.py +++ /dev/null @@ -1,101 +0,0 @@ -"""Get latest observations from Port Authority of NSW and update local copy. - -Station IDs are below: - -02: Offshore (Directional) Wave -03: Bombora (Directional) Wave -04: Captain Cook Channel (SG) Wave -05: Kurnell (SG) Wave -06: Molineaux Point Wind -07: Sydney Airport (Main Runway BOM) Wind -08: Brotherson Emergency Response Jetty Tide -09: Caltex (Directional) Current -12: Western Wedding Cake Wind -13: Fort Denison (Sth end BOM) Wind -14: Overseas Passenger Terminal Wind -15: Glebe Island Wind -16: Fort Denison-Primary (Nth end) Tide -17: Fort Denison-Secondary (Vegapuls64) Tide -18: Circular Quay ADCP Current -19: Balls Head Current -22: Twofold Bay - Munganno Point Wave -23: Twofold Bay - Multipurpose Wharf Wind -24: Breakwater Wharf Wind -27: Middle Wall (Vegapulse WL61) Tide -28: Goodwood (Vegapulse WL61) Tide -""" - -import os -import re -import datetime -import requests -import pandas as pd -from lxml import html - -# Set station as Fort Denison tide -stn_id = 16 - -output_dir = 'csv' - - -def update_master(output_dir, csv_name, df): - """Update master csv time series. - - Args: - output_dir (str): path to time series directory - csv_name (str): name of time series file - df (dataframe): dataframe with datetime index - - Returns: - None - """ - try: - # Load local master table if it exists - master = pd.read_csv(os.path.join(output_dir, csv_name), - index_col=0, - parse_dates=True) - - # Only include timestamps that do not already exist - df = df[~df.index.isin(master.index)] - - # Update master - master = master.append(df) - - except FileNotFoundError: - # Create new master table if none exists - master = df - - # Export master table - master.to_csv(os.path.join(output_dir, csv_name)) - - -# Get main page -url = 'http://wavewindtide.portauthoritynsw.com.au/' -page = requests.get(url) -tree = html.fromstring(page.content) - -# Get elements from selected station -t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate') -meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary') -description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle') - -# Parse column names -text = re.split(':', meas.text) -parameters = text[::3] -entries = [re.split('(\D+)$', t) for t in text[1::3]] -values = [float(e[0]) for e in entries] -units = [e[1] for e in entries] -columns = [f'{p} ({u})' for p, u in zip(parameters, units)] - -# Parse time -time = re.search('at ([0-9]{4})', t_raw.text).group(1) -date = t_raw.text.split(',')[1].strip() -t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M') - -# Create dataframe -df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t]) -df.index.name = 'datetime' - -# Update master dataframe -csv_name = description.text + '.csv' -update_master(output_dir, csv_name, df)