Process multiple stations

master
Dan Howe 5 years ago
parent 14e6a9021f
commit 23508dba69

@ -35,10 +35,30 @@ import requests
import pandas as pd import pandas as pd
from lxml import html from lxml import html
# Set station as Fort Denison tide
stn_id = 16
output_dir = 'csv' output_dir = 'csv'
stn_ids = [
2, # Offshore (Directional) Wave
3, # Bombora (Directional) Wave
4, # Captain Cook Channel (SG) Wave
5, # Kurnell (SG) Wave
6, # Molineaux Point Wind
7, # Sydney Airport (Main Runway BOM) Wind
8, # Brotherson Emergency Response Jetty Tide
9, # Caltex (Directional) Current
12, # Western Wedding Cake Wind
13, # Fort Denison (Sth end BOM) Wind
14, # Overseas Passenger Terminal Wind
15, # Glebe Island Wind
16, # Fort Denison-Primary (Nth end) Tide
17, # Fort Denison-Secondary (Vegapuls64) Tide
18, # Circular Quay ADCP Current
19, # Balls Head Current
22, # Twofold Bay - Munganno Point Wave
23, # Twofold Bay - Multipurpose Wharf Wind
24, # Breakwater Wharf Wind
27, # Middle Wall (Vegapulse WL61) Tide
28, # Goodwood (Vegapulse WL61) Tide
]
def update_master(output_dir, csv_name, df): def update_master(output_dir, csv_name, df):
@ -77,28 +97,29 @@ url = 'http://wavewindtide.portauthoritynsw.com.au/'
page = requests.get(url) page = requests.get(url)
tree = html.fromstring(page.content) tree = html.fromstring(page.content)
# Get elements from selected station for stn_id in stn_ids:
t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate') # Get elements from selected station
meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary') t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate')
description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle') meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary')
description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle')
# Parse column names
text = re.split(':\s|(m|s|knots|deg)\s', meas.text + ' ') # Parse column names
parts = [t for t in text if t] text = re.split(':\s|(m|s|knots|deg)\s', meas.text + ' ')
parameters = [p.strip() for p in parts[::3]] parts = [t for t in text if t]
values = [float(p) for p in parts[1::3]] parameters = [p.strip() for p in parts[::3]]
units = parts[2::3] values = [float(p) for p in parts[1::3]]
columns = [f'{p} ({u})' for p, u in zip(parameters, units)] units = parts[2::3]
columns = [f'{p} ({u})' for p, u in zip(parameters, units)]
# Parse time
time = re.search('at ([0-9]{4})', t_raw.text).group(1) # Parse time
date = t_raw.text.split(',')[1].strip() time = re.search('at ([0-9]{4})', t_raw.text).group(1)
t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M') date = t_raw.text.split(',')[1].strip()
t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M')
# Create dataframe
df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t]) # Create dataframe
df.index.name = 'datetime' df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t])
df.index.name = 'datetime'
# Update master dataframe
csv_name = description.text + '.csv' # Update master dataframe
update_master(output_dir, csv_name, df) csv_name = description.text + '.csv'
update_master(output_dir, csv_name, df)

Loading…
Cancel
Save