|
|
|
"""Download latest time series data from MHL website and update local copy.
|
|
|
|
D. Howe
|
|
|
|
2019-08-30
|
|
|
|
"""
|
|
|
|
import os
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
# Set output directory
|
|
|
|
output_dir = 'csv'
|
|
|
|
|
|
|
|
# Define data URLs
|
|
|
|
url = 'http://www.mhl.nsw.gov.au/data/realtime/timeseries/'
|
|
|
|
txt_names = [
|
|
|
|
'SYDDOW.WaveHeight.csv.dat.txt',
|
|
|
|
'SYDDOW.WavePeriod.csv.dat.txt',
|
|
|
|
'OceanTide-213470.Level.csv.dat.txt',
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
def update_master(output_dir, csv_name, df):
|
|
|
|
"""Update master csv time series with new data.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
output_dir (str): path to time series directory
|
|
|
|
csv_name (str): name of time series file
|
|
|
|
df (dataframe): dataframe with new time seies data
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
None
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
# Load local master table if it exists
|
|
|
|
master = pd.read_csv(os.path.join(output_dir, csv_name),
|
|
|
|
index_col=0,
|
|
|
|
parse_dates=True)
|
|
|
|
|
|
|
|
# Remove existing rows if same timestamps appear in new dataframe
|
|
|
|
master = master[~master.index.isin(df.index)]
|
|
|
|
|
|
|
|
# Update master
|
|
|
|
master = master.append(df)
|
|
|
|
|
|
|
|
# Make sure timestamps are in order
|
|
|
|
master = master.sort_index()
|
|
|
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
# Create new master table if none exists
|
|
|
|
master = df
|
|
|
|
|
|
|
|
# Export master table
|
|
|
|
master.to_csv(os.path.join(output_dir, csv_name))
|
|
|
|
|
|
|
|
|
|
|
|
for txt_name in txt_names:
|
|
|
|
# Read text file from MHL website
|
|
|
|
df = pd.read_csv(url + txt_name, index_col=0, parse_dates=True)
|
|
|
|
|
|
|
|
# Update local copy
|
|
|
|
csv_name = txt_name.replace('.dat.txt', '')
|
|
|
|
master = update_master(output_dir, csv_name, df)
|