Add new script
parent
391551d8aa
commit
b4d7221918
@ -0,0 +1,101 @@
|
||||
"""Get latest observations from Port Authority of NSW and update local copy.
|
||||
|
||||
Station IDs are below:
|
||||
|
||||
02: Offshore (Directional) Wave
|
||||
03: Bombora (Directional) Wave
|
||||
04: Captain Cook Channel (SG) Wave
|
||||
05: Kurnell (SG) Wave
|
||||
06: Molineaux Point Wind
|
||||
07: Sydney Airport (Main Runway BOM) Wind
|
||||
08: Brotherson Emergency Response Jetty Tide
|
||||
09: Caltex (Directional) Current
|
||||
12: Western Wedding Cake Wind
|
||||
13: Fort Denison (Sth end BOM) Wind
|
||||
14: Overseas Passenger Terminal Wind
|
||||
15: Glebe Island Wind
|
||||
16: Fort Denison-Primary (Nth end) Tide
|
||||
17: Fort Denison-Secondary (Vegapuls64) Tide
|
||||
18: Circular Quay ADCP Current
|
||||
19: Balls Head Current
|
||||
22: Twofold Bay - Munganno Point Wave
|
||||
23: Twofold Bay - Multipurpose Wharf Wind
|
||||
24: Breakwater Wharf Wind
|
||||
27: Middle Wall (Vegapulse WL61) Tide
|
||||
28: Goodwood (Vegapulse WL61) Tide
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import datetime
|
||||
import requests
|
||||
import pandas as pd
|
||||
from lxml import html
|
||||
|
||||
# Set station as Fort Denison tide
|
||||
stn_id = 16
|
||||
|
||||
output_dir = 'csv'
|
||||
|
||||
|
||||
def update_master(output_dir, csv_name, df):
|
||||
"""Update master csv time series.
|
||||
|
||||
Args:
|
||||
output_dir (str): path to time series directory
|
||||
csv_name (str): name of time series file
|
||||
df (dataframe): dataframe with datetime index
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
try:
|
||||
# Load local master table if it exists
|
||||
master = pd.read_csv(os.path.join(output_dir, csv_name),
|
||||
index_col=0,
|
||||
parse_dates=True)
|
||||
|
||||
# Only include timestamps that do not already exist
|
||||
df = df[~df.index.isin(master.index)]
|
||||
|
||||
# Update master
|
||||
master = master.append(df)
|
||||
|
||||
except FileNotFoundError:
|
||||
# Create new master table if none exists
|
||||
master = df
|
||||
|
||||
# Export master table
|
||||
master.to_csv(os.path.join(output_dir, csv_name))
|
||||
|
||||
|
||||
# Get main page
|
||||
url = 'http://wavewindtide.portauthoritynsw.com.au/'
|
||||
page = requests.get(url)
|
||||
tree = html.fromstring(page.content)
|
||||
|
||||
# Get elements from selected station
|
||||
t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate')
|
||||
meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary')
|
||||
description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle')
|
||||
|
||||
# Parse column names
|
||||
text = re.split(':', meas.text)
|
||||
parameters = text[::3]
|
||||
entries = [re.split('(\D+)$', t) for t in text[1::3]]
|
||||
values = [float(e[0]) for e in entries]
|
||||
units = [e[1] for e in entries]
|
||||
columns = [f'{p} ({u})' for p, u in zip(parameters, units)]
|
||||
|
||||
# Parse time
|
||||
time = re.search('at ([0-9]{4})', t_raw.text).group(1)
|
||||
date = t_raw.text.split(',')[1].strip()
|
||||
t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M')
|
||||
|
||||
# Create dataframe
|
||||
df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t])
|
||||
df.index.name = 'datetime'
|
||||
|
||||
# Update master dataframe
|
||||
csv_name = description.text + '.csv'
|
||||
update_master(output_dir, csv_name, df)
|
Loading…
Reference in New Issue