Rename pansw script

6 years ago · 391551d8aa
parent 5891431a37
commit 391551d8aa
2 changed files with 1 additions and 102 deletions
--- a/port-authority-nsw/README.md
+++ b/port-authority-nsw/README.md
@ -1,4 +1,4 @@
-# pansw_tide.py
+# port_authority_nsw.py

 Download latest time series data from Port Authority of NSW and update local copy.
 [wavewindtide.portauthoritynsw.com.au](http://wavewindtide.portauthoritynsw.com.au/)
--- a/port-authority-nsw/pansw_tide.py
+++ b/port-authority-nsw/pansw_tide.py
@ -1,101 +0,0 @@
-"""Get latest observations from Port Authority of NSW and update local copy.
-
-Station IDs are below:
-
-02: Offshore (Directional) Wave
-03: Bombora (Directional) Wave
-04: Captain Cook Channel (SG) Wave
-05: Kurnell (SG) Wave
-06: Molineaux Point Wind
-07: Sydney Airport (Main Runway BOM) Wind
-08: Brotherson Emergency Response Jetty Tide
-09: Caltex (Directional) Current
-12: Western Wedding Cake Wind
-13: Fort Denison (Sth end BOM) Wind
-14: Overseas Passenger Terminal Wind
-15: Glebe Island Wind
-16: Fort Denison-Primary (Nth end) Tide
-17: Fort Denison-Secondary (Vegapuls64) Tide
-18: Circular Quay ADCP Current
-19: Balls Head Current
-22: Twofold Bay - Munganno Point Wave
-23: Twofold Bay - Multipurpose Wharf Wind
-24: Breakwater Wharf Wind
-27: Middle Wall (Vegapulse WL61) Tide
-28: Goodwood (Vegapulse WL61) Tide
-"""
-
-import os
-import re
-import datetime
-import requests
-import pandas as pd
-from lxml import html
-
-# Set station as Fort Denison tide
-stn_id = 16
-
-output_dir = 'csv'
-
-
-def update_master(output_dir, csv_name, df):
-    """Update master csv time series.
-
-    Args:
-        output_dir (str): path to time series directory
-        csv_name (str):   name of time series file
-        df (dataframe):   dataframe with datetime index
-
-    Returns:
-        None
-    """
-    try:
-        # Load local master table if it exists
-        master = pd.read_csv(os.path.join(output_dir, csv_name),
-                             index_col=0,
-                             parse_dates=True)
-
-        # Only include timestamps that do not already exist
-        df = df[~df.index.isin(master.index)]
-
-        # Update master
-        master = master.append(df)
-
-    except FileNotFoundError:
-        # Create new master table if none exists
-        master = df
-
-    # Export master table
-    master.to_csv(os.path.join(output_dir, csv_name))
-
-
-# Get main page
-url = 'http://wavewindtide.portauthoritynsw.com.au/'
-page = requests.get(url)
-tree = html.fromstring(page.content)
-
-# Get elements from selected station
-t_raw = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblRecordDate')
-meas = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblSummary')
-description = tree.get_element_by_id(f'MainContent_ctl{stn_id:02}_lblTitle')
-
-# Parse column names
-text = re.split(':', meas.text)
-parameters = text[::3]
-entries = [re.split('(\D+)$', t) for t in text[1::3]]
-values = [float(e[0]) for e in entries]
-units = [e[1] for e in entries]
-columns = [f'{p} ({u})' for p, u in zip(parameters, units)]
-
-# Parse time
-time = re.search('at ([0-9]{4})', t_raw.text).group(1)
-date = t_raw.text.split(',')[1].strip()
-t = datetime.datetime.strptime(date + time, '%d %b %Y%H%M')
-
-# Create dataframe
-df = pd.DataFrame({c: v for c, v in zip(columns, values)}, index=[t])
-df.index.name = 'datetime'
-
-# Update master dataframe
-csv_name = description.text + '.csv'
-update_master(output_dir, csv_name, df)