Add new script

7 years ago · b4d7221918
parent 391551d8aa
commit b4d7221918
1 changed files with 101 additions and 0 deletions
--- a/port-authority-nsw/port_authority_nsw.py
+++ b/port-authority-nsw/port_authority_nsw.py
@ -0,0 +1,101 @@
 """Get latest observations from Port Authority of NSW and update local copy.
 Station IDs are below:
 02: Offshore (Directional) Wave
 03: Bombora (Directional) Wave
 04: Captain Cook Channel (SG) Wave
 05: Kurnell (SG) Wave
 06: Molineaux Point Wind
 07: Sydney Airport (Main Runway BOM) Wind
 08: Brotherson Emergency Response Jetty Tide
 09: Caltex (Directional) Current
 12: Western Wedding Cake Wind
 13: Fort Denison (Sth end BOM) Wind
 14: Overseas Passenger Terminal Wind
 15: Glebe Island Wind
 16: Fort Denison-Primary (Nth end) Tide
 17: Fort Denison-Secondary (Vegapuls64) Tide
 18: Circular Quay ADCP Current
 19: Balls Head Current
 22: Twofold Bay - Munganno Point Wave
 23: Twofold Bay - Multipurpose Wharf Wind
 24: Breakwater Wharf Wind
 27: Middle Wall (Vegapulse WL61) Tide
 28: Goodwood (Vegapulse WL61) Tide
 """
 import os
 import re
 import datetime
 import requests
 import pandas as pd
 from lxml import html
 # Set station as Fort Denison tide
 stn_id = 16
 output_dir = 'csv'
 def update_master(output_dir, csv_name, df):
    """Update master csv time series.
    Args:
        output_dir (str): path to time series directory
        csv_name (str):   name of time series file
        df (dataframe):   dataframe with datetime index
    Returns:
        None
    """
    try:
        # Load local master table if it exists
        master = pd.read_csv(os.path.join(output_dir, csv_name),
                             index_col=0,
                             parse_dates=True)
        # Only include timestamps that do not already exist
        df = df[~df.index.isin(master.index)]
        # Update master
        master = master.append(df)
    except FileNotFoundError:
        # Create new master table if none exists
        master = df
    # Export master table
    master.to_csv(os.path.join(output_dir, csv_name))
 # Get main page
 url = 'http://wavewindtide.portauthoritynsw.com.au/'
 page = requests.get(url)
 tree = html.fromstring(page.content)
 # Get elements from selected station
 t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate')
 meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary')
 description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle')
 # Parse column names
 text = re.split(':', meas.text)
 parameters = text[::3]
 entries = [re.split('(\D+)$', t) for t in text[1::3]]
 values = [float(e[0]) for e in entries]
 units = [e[1] for e in entries]
 columns = [f'{p} ({u})' for p, u in zip(parameters, units)]
 # Parse time
 time = re.search('at ([0-9]{4})', t_raw.text).group(1)
 date = t_raw.text.split(',')[1].strip()
 t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M')
 # Create dataframe
 df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t])
 df.index.name = 'datetime'
 # Update master dataframe
 csv_name = description.text + '.csv'
 update_master(output_dir, csv_name, df)