Rename pansw script

master
Dan Howe 5 years ago
parent 5891431a37
commit 391551d8aa

@ -1,4 +1,4 @@
# pansw_tide.py
# port_authority_nsw.py
Download latest time series data from Port Authority of NSW and update local copy.
[wavewindtide.portauthoritynsw.com.au](http://wavewindtide.portauthoritynsw.com.au/)

@ -1,101 +0,0 @@
"""Get latest observations from Port Authority of NSW and update local copy.
Station IDs are below:
02: Offshore (Directional) Wave
03: Bombora (Directional) Wave
04: Captain Cook Channel (SG) Wave
05: Kurnell (SG) Wave
06: Molineaux Point Wind
07: Sydney Airport (Main Runway BOM) Wind
08: Brotherson Emergency Response Jetty Tide
09: Caltex (Directional) Current
12: Western Wedding Cake Wind
13: Fort Denison (Sth end BOM) Wind
14: Overseas Passenger Terminal Wind
15: Glebe Island Wind
16: Fort Denison-Primary (Nth end) Tide
17: Fort Denison-Secondary (Vegapuls64) Tide
18: Circular Quay ADCP Current
19: Balls Head Current
22: Twofold Bay - Munganno Point Wave
23: Twofold Bay - Multipurpose Wharf Wind
24: Breakwater Wharf Wind
27: Middle Wall (Vegapulse WL61) Tide
28: Goodwood (Vegapulse WL61) Tide
"""
import os
import re
import datetime
import requests
import pandas as pd
from lxml import html
# Set station as Fort Denison tide
stn_id = 16
output_dir = 'csv'
def update_master(output_dir, csv_name, df):
"""Update master csv time series.
Args:
output_dir (str): path to time series directory
csv_name (str): name of time series file
df (dataframe): dataframe with datetime index
Returns:
None
"""
try:
# Load local master table if it exists
master = pd.read_csv(os.path.join(output_dir, csv_name),
index_col=0,
parse_dates=True)
# Only include timestamps that do not already exist
df = df[~df.index.isin(master.index)]
# Update master
master = master.append(df)
except FileNotFoundError:
# Create new master table if none exists
master = df
# Export master table
master.to_csv(os.path.join(output_dir, csv_name))
# Get main page
url = 'http://wavewindtide.portauthoritynsw.com.au/'
page = requests.get(url)
tree = html.fromstring(page.content)
# Get elements from selected station
t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate')
meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary')
description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle')
# Parse column names
text = re.split(':', meas.text)
parameters = text[::3]
entries = [re.split('(\D+)$', t) for t in text[1::3]]
values = [float(e[0]) for e in entries]
units = [e[1] for e in entries]
columns = [f'{p} ({u})' for p, u in zip(parameters, units)]
# Parse time
time = re.search('at ([0-9]{4})', t_raw.text).group(1)
date = t_raw.text.split(',')[1].strip()
t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M')
# Create dataframe
df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t])
df.index.name = 'datetime'
# Update master dataframe
csv_name = description.text + '.csv'
update_master(output_dir, csv_name, df)
Loading…
Cancel
Save