"""Download latest time series data from MHL website and update local copy. D. Howe 2019-08-30 """ import os import pandas as pd # Set output directory output_dir = 'csv' # Define data URLs url = 'http://www.mhl.nsw.gov.au/data/realtime/timeseries/' txt_names = [ 'SYDDOW.WaveHeight.csv.dat.txt', 'SYDDOW.WavePeriod.csv.dat.txt', 'OceanTide-213470.Level.csv.dat.txt', ] def update_master(output_dir, csv_name, df): """Update master csv time series with new data. Args: output_dir (str): path to time series directory csv_name (str): name of time series file df (dataframe): dataframe with new time seies data Returns: None """ try: # Load local master table if it exists master = pd.read_csv(os.path.join(output_dir, csv_name), index_col=0, parse_dates=True) # Remove existing rows if same timestamps appear in new dataframe master = master[~master.index.isin(df.index)] # Update master master = master.append(df) # Make sure timestamps are in order master = master.sort_index() except FileNotFoundError: # Create new master table if none exists master = df # Export master table master.to_csv(os.path.join(output_dir, csv_name)) for txt_name in txt_names: # Read text file from MHL website df = pd.read_csv(url + txt_name, index_col=0, parse_dates=True) # Update local copy csv_name = txt_name.replace('.dat.txt', '') master = update_master(output_dir, csv_name, df)