Add new script
parent
391551d8aa
commit
b4d7221918
@ -0,0 +1,101 @@
|
|||||||
|
"""Get latest observations from Port Authority of NSW and update local copy.
|
||||||
|
|
||||||
|
Station IDs are below:
|
||||||
|
|
||||||
|
02: Offshore (Directional) Wave
|
||||||
|
03: Bombora (Directional) Wave
|
||||||
|
04: Captain Cook Channel (SG) Wave
|
||||||
|
05: Kurnell (SG) Wave
|
||||||
|
06: Molineaux Point Wind
|
||||||
|
07: Sydney Airport (Main Runway BOM) Wind
|
||||||
|
08: Brotherson Emergency Response Jetty Tide
|
||||||
|
09: Caltex (Directional) Current
|
||||||
|
12: Western Wedding Cake Wind
|
||||||
|
13: Fort Denison (Sth end BOM) Wind
|
||||||
|
14: Overseas Passenger Terminal Wind
|
||||||
|
15: Glebe Island Wind
|
||||||
|
16: Fort Denison-Primary (Nth end) Tide
|
||||||
|
17: Fort Denison-Secondary (Vegapuls64) Tide
|
||||||
|
18: Circular Quay ADCP Current
|
||||||
|
19: Balls Head Current
|
||||||
|
22: Twofold Bay - Munganno Point Wave
|
||||||
|
23: Twofold Bay - Multipurpose Wharf Wind
|
||||||
|
24: Breakwater Wharf Wind
|
||||||
|
27: Middle Wall (Vegapulse WL61) Tide
|
||||||
|
28: Goodwood (Vegapulse WL61) Tide
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
import requests
|
||||||
|
import pandas as pd
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
|
# Set station as Fort Denison tide
|
||||||
|
stn_id = 16
|
||||||
|
|
||||||
|
output_dir = 'csv'
|
||||||
|
|
||||||
|
|
||||||
|
def update_master(output_dir, csv_name, df):
|
||||||
|
"""Update master csv time series.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
output_dir (str): path to time series directory
|
||||||
|
csv_name (str): name of time series file
|
||||||
|
df (dataframe): dataframe with datetime index
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Load local master table if it exists
|
||||||
|
master = pd.read_csv(os.path.join(output_dir, csv_name),
|
||||||
|
index_col=0,
|
||||||
|
parse_dates=True)
|
||||||
|
|
||||||
|
# Only include timestamps that do not already exist
|
||||||
|
df = df[~df.index.isin(master.index)]
|
||||||
|
|
||||||
|
# Update master
|
||||||
|
master = master.append(df)
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
# Create new master table if none exists
|
||||||
|
master = df
|
||||||
|
|
||||||
|
# Export master table
|
||||||
|
master.to_csv(os.path.join(output_dir, csv_name))
|
||||||
|
|
||||||
|
|
||||||
|
# Get main page
|
||||||
|
url = 'http://wavewindtide.portauthoritynsw.com.au/'
|
||||||
|
page = requests.get(url)
|
||||||
|
tree = html.fromstring(page.content)
|
||||||
|
|
||||||
|
# Get elements from selected station
|
||||||
|
t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate')
|
||||||
|
meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary')
|
||||||
|
description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle')
|
||||||
|
|
||||||
|
# Parse column names
|
||||||
|
text = re.split(':', meas.text)
|
||||||
|
parameters = text[::3]
|
||||||
|
entries = [re.split('(\D+)$', t) for t in text[1::3]]
|
||||||
|
values = [float(e[0]) for e in entries]
|
||||||
|
units = [e[1] for e in entries]
|
||||||
|
columns = [f'{p} ({u})' for p, u in zip(parameters, units)]
|
||||||
|
|
||||||
|
# Parse time
|
||||||
|
time = re.search('at ([0-9]{4})', t_raw.text).group(1)
|
||||||
|
date = t_raw.text.split(',')[1].strip()
|
||||||
|
t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M')
|
||||||
|
|
||||||
|
# Create dataframe
|
||||||
|
df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t])
|
||||||
|
df.index.name = 'datetime'
|
||||||
|
|
||||||
|
# Update master dataframe
|
||||||
|
csv_name = description.text + '.csv'
|
||||||
|
update_master(output_dir, csv_name, df)
|
Loading…
Reference in New Issue