|
|
|
@ -35,10 +35,30 @@ import requests
|
|
|
|
|
import pandas as pd
|
|
|
|
|
from lxml import html
|
|
|
|
|
|
|
|
|
|
# Set station as Fort Denison tide
|
|
|
|
|
stn_id = 16
|
|
|
|
|
|
|
|
|
|
output_dir = 'csv'
|
|
|
|
|
stn_ids = [
|
|
|
|
|
2, # Offshore (Directional) Wave
|
|
|
|
|
3, # Bombora (Directional) Wave
|
|
|
|
|
4, # Captain Cook Channel (SG) Wave
|
|
|
|
|
5, # Kurnell (SG) Wave
|
|
|
|
|
6, # Molineaux Point Wind
|
|
|
|
|
7, # Sydney Airport (Main Runway BOM) Wind
|
|
|
|
|
8, # Brotherson Emergency Response Jetty Tide
|
|
|
|
|
9, # Caltex (Directional) Current
|
|
|
|
|
12, # Western Wedding Cake Wind
|
|
|
|
|
13, # Fort Denison (Sth end BOM) Wind
|
|
|
|
|
14, # Overseas Passenger Terminal Wind
|
|
|
|
|
15, # Glebe Island Wind
|
|
|
|
|
16, # Fort Denison-Primary (Nth end) Tide
|
|
|
|
|
17, # Fort Denison-Secondary (Vegapuls64) Tide
|
|
|
|
|
18, # Circular Quay ADCP Current
|
|
|
|
|
19, # Balls Head Current
|
|
|
|
|
22, # Twofold Bay - Munganno Point Wave
|
|
|
|
|
23, # Twofold Bay - Multipurpose Wharf Wind
|
|
|
|
|
24, # Breakwater Wharf Wind
|
|
|
|
|
27, # Middle Wall (Vegapulse WL61) Tide
|
|
|
|
|
28, # Goodwood (Vegapulse WL61) Tide
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def update_master(output_dir, csv_name, df):
|
|
|
|
@ -77,28 +97,29 @@ url = 'http://wavewindtide.portauthoritynsw.com.au/'
|
|
|
|
|
page = requests.get(url)
|
|
|
|
|
tree = html.fromstring(page.content)
|
|
|
|
|
|
|
|
|
|
# Get elements from selected station
|
|
|
|
|
t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate')
|
|
|
|
|
meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary')
|
|
|
|
|
description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle')
|
|
|
|
|
|
|
|
|
|
# Parse column names
|
|
|
|
|
text = re.split(':\s|(m|s|knots|deg)\s', meas.text + ' ')
|
|
|
|
|
parts = [t for t in text if t]
|
|
|
|
|
parameters = [p.strip() for p in parts[::3]]
|
|
|
|
|
values = [float(p) for p in parts[1::3]]
|
|
|
|
|
units = parts[2::3]
|
|
|
|
|
columns = [f'{p} ({u})' for p, u in zip(parameters, units)]
|
|
|
|
|
|
|
|
|
|
# Parse time
|
|
|
|
|
time = re.search('at ([0-9]{4})', t_raw.text).group(1)
|
|
|
|
|
date = t_raw.text.split(',')[1].strip()
|
|
|
|
|
t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M')
|
|
|
|
|
|
|
|
|
|
# Create dataframe
|
|
|
|
|
df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t])
|
|
|
|
|
df.index.name = 'datetime'
|
|
|
|
|
|
|
|
|
|
# Update master dataframe
|
|
|
|
|
csv_name = description.text + '.csv'
|
|
|
|
|
update_master(output_dir, csv_name, df)
|
|
|
|
|
for stn_id in stn_ids:
|
|
|
|
|
# Get elements from selected station
|
|
|
|
|
t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate')
|
|
|
|
|
meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary')
|
|
|
|
|
description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle')
|
|
|
|
|
|
|
|
|
|
# Parse column names
|
|
|
|
|
text = re.split(':\s|(m|s|knots|deg)\s', meas.text + ' ')
|
|
|
|
|
parts = [t for t in text if t]
|
|
|
|
|
parameters = [p.strip() for p in parts[::3]]
|
|
|
|
|
values = [float(p) for p in parts[1::3]]
|
|
|
|
|
units = parts[2::3]
|
|
|
|
|
columns = [f'{p} ({u})' for p, u in zip(parameters, units)]
|
|
|
|
|
|
|
|
|
|
# Parse time
|
|
|
|
|
time = re.search('at ([0-9]{4})', t_raw.text).group(1)
|
|
|
|
|
date = t_raw.text.split(',')[1].strip()
|
|
|
|
|
t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M')
|
|
|
|
|
|
|
|
|
|
# Create dataframe
|
|
|
|
|
df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t])
|
|
|
|
|
df.index.name = 'datetime'
|
|
|
|
|
|
|
|
|
|
# Update master dataframe
|
|
|
|
|
csv_name = description.text + '.csv'
|
|
|
|
|
update_master(output_dir, csv_name, df)
|
|
|
|
|