You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

61 lines
1.6 KiB
Python

"""Download latest time series data from MHL website and update local copy.
D. Howe
2019-08-30
"""
import os
import pandas as pd
# Set output directory
output_dir = 'csv'
# Define data URLs
url = 'http://www.mhl.nsw.gov.au/data/realtime/timeseries/'
txt_names = [
'SYDDOW.WaveHeight.csv.dat.txt',
'SYDDOW.WavePeriod.csv.dat.txt',
'OceanTide-213470.Level.csv.dat.txt',
]
def update_master(output_dir, csv_name, df):
"""Update master csv time series with new data.
Args:
output_dir (str): path to time series directory
csv_name (str): name of time series file
df (dataframe): dataframe with new time seies data
Returns:
None
"""
try:
# Load local master table if it exists
master = pd.read_csv(os.path.join(output_dir, csv_name),
index_col=0,
parse_dates=True)
# Remove existing rows if same timestamps appear in new dataframe
master = master[~master.index.isin(df.index)]
# Update master
master = master.append(df)
# Make sure timestamps are in order
master = master.sort_index()
except FileNotFoundError:
# Create new master table if none exists
master = df
# Export master table
master.to_csv(os.path.join(output_dir, csv_name))
for txt_name in txt_names:
# Read text file from MHL website
df = pd.read_csv(url + txt_name, index_col=0, parse_dates=True)
# Update local copy
csv_name = txt_name.replace('.dat.txt', '')
master = update_master(output_dir, csv_name, df)