python-snippets/port-authority-nsw/port_authority_nsw.py

"""Get latest observations from Port Authority of NSW and update local copy.

Station IDs are below:

02: Offshore (Directional) Wave
03: Bombora (Directional) Wave
04: Captain Cook Channel (SG) Wave
05: Kurnell (SG) Wave
06: Molineaux Point Wind
07: Sydney Airport (Main Runway BOM) Wind
08: Brotherson Emergency Response Jetty Tide
09: Caltex (Directional) Current
12: Western Wedding Cake Wind
13: Fort Denison (Sth end BOM) Wind
14: Overseas Passenger Terminal Wind
15: Glebe Island Wind
16: Fort Denison-Primary (Nth end) Tide
17: Fort Denison-Secondary (Vegapuls64) Tide
18: Circular Quay ADCP Current
19: Balls Head Current
22: Twofold Bay - Munganno Point Wave
23: Twofold Bay - Multipurpose Wharf Wind
24: Breakwater Wharf Wind
27: Middle Wall (Vegapulse WL61) Tide
28: Goodwood (Vegapulse WL61) Tide
"""

import os
import re
import datetime
import requests
import pandas as pd
from lxml import html

# Set station as Fort Denison tide
stn_id = 16

output_dir = 'csv'


def update_master(output_dir, csv_name, df):
    """Update master csv time series.

    Args:
        output_dir (str): path to time series directory
        csv_name (str):   name of time series file
        df (dataframe):   dataframe with datetime index

    Returns:
        None
    """
    try:
        # Load local master table if it exists
        master = pd.read_csv(os.path.join(output_dir, csv_name),
                             index_col=0,
                             parse_dates=True)

        # Only include timestamps that do not already exist
        df = df[~df.index.isin(master.index)]

        # Update master
        master = master.append(df)

    except FileNotFoundError:
        # Create new master table if none exists
        master = df

    # Export master table
    master.to_csv(os.path.join(output_dir, csv_name))


# Get main page
url = 'http://wavewindtide.portauthoritynsw.com.au/'
page = requests.get(url)
tree = html.fromstring(page.content)

# Get elements from selected station
t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate')
meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary')
description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle')

# Parse column names
text = re.split(':', meas.text)
parameters = text[::3]
entries = [re.split('(\D+)$', t) for t in text[1::3]]
values = [float(e[0]) for e in entries]
units = [e[1] for e in entries]
columns = [f'{p} ({u})' for p, u in zip(parameters, units)]

# Parse time
time = re.search('at ([0-9]{4})', t_raw.text).group(1)
date = t_raw.text.split(',')[1].strip()
t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M')

# Create dataframe
df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t])
df.index.name = 'datetime'

# Update master dataframe
csv_name = description.text + '.csv'
update_master(output_dir, csv_name, df)
Add new script 5 years ago			`"""Get latest observations from Port Authority of NSW and update local copy.`

			`Station IDs are below:`

			`02: Offshore (Directional) Wave`
			`03: Bombora (Directional) Wave`
			`04: Captain Cook Channel (SG) Wave`
			`05: Kurnell (SG) Wave`
			`06: Molineaux Point Wind`
			`07: Sydney Airport (Main Runway BOM) Wind`
			`08: Brotherson Emergency Response Jetty Tide`
			`09: Caltex (Directional) Current`
			`12: Western Wedding Cake Wind`
			`13: Fort Denison (Sth end BOM) Wind`
			`14: Overseas Passenger Terminal Wind`
			`15: Glebe Island Wind`
			`16: Fort Denison-Primary (Nth end) Tide`
			`17: Fort Denison-Secondary (Vegapuls64) Tide`
			`18: Circular Quay ADCP Current`
			`19: Balls Head Current`
			`22: Twofold Bay - Munganno Point Wave`
			`23: Twofold Bay - Multipurpose Wharf Wind`
			`24: Breakwater Wharf Wind`
			`27: Middle Wall (Vegapulse WL61) Tide`
			`28: Goodwood (Vegapulse WL61) Tide`
			`"""`

			`import os`
			`import re`
			`import datetime`
			`import requests`
			`import pandas as pd`
			`from lxml import html`

			`# Set station as Fort Denison tide`
			`stn_id = 16`

			`output_dir = 'csv'`


			`def update_master(output_dir, csv_name, df):`
			`"""Update master csv time series.`

			`Args:`
			`output_dir (str): path to time series directory`
			`csv_name (str): name of time series file`
			`df (dataframe): dataframe with datetime index`

			`Returns:`
			`None`
			`"""`
			`try:`
			`# Load local master table if it exists`
			`master = pd.read_csv(os.path.join(output_dir, csv_name),`
			`index_col=0,`
			`parse_dates=True)`

			`# Only include timestamps that do not already exist`
			`df = df[~df.index.isin(master.index)]`

			`# Update master`
			`master = master.append(df)`

			`except FileNotFoundError:`
			`# Create new master table if none exists`
			`master = df`

			`# Export master table`
			`master.to_csv(os.path.join(output_dir, csv_name))`


			`# Get main page`
			`url = 'http://wavewindtide.portauthoritynsw.com.au/'`
			`page = requests.get(url)`
			`tree = html.fromstring(page.content)`

			`# Get elements from selected station`
			`t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate')`
			`meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary')`
			`description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle')`

			`# Parse column names`
			`text = re.split(':', meas.text)`
			`parameters = text[::3]`
			`entries = [re.split('(\D+)$', t) for t in text[1::3]]`
			`values = [float(e[0]) for e in entries]`
			`units = [e[1] for e in entries]`
			`columns = [f'{p} ({u})' for p, u in zip(parameters, units)]`

			`# Parse time`
			`time = re.search('at ([0-9]{4})', t_raw.text).group(1)`
			`date = t_raw.text.split(',')[1].strip()`
			`t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M')`

			`# Create dataframe`
			`df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t])`
			`df.index.name = 'datetime'`

			`# Update master dataframe`
			`csv_name = description.text + '.csv'`
			`update_master(output_dir, csv_name, df)`