diff --git a/mhl-waverider/mhl_waverider.py b/mhl-waverider/mhl_waverider.py index 9f5f45d..93f59e4 100644 --- a/mhl-waverider/mhl_waverider.py +++ b/mhl-waverider/mhl_waverider.py @@ -13,27 +13,48 @@ url = 'http://www.mhl.nsw.gov.au/data/realtime/timeseries/' txt_names = [ 'SYDDOW.WaveHeight.csv.dat.txt', 'SYDDOW.WavePeriod.csv.dat.txt', + 'OceanTide-213470.Level.csv.dat.txt', ] -for txt_name in txt_names: - # Read text file from MHL website - df = pd.read_csv(url + txt_name, index_col=0, parse_dates=True) - # Create name for exported data - csv_name = os.path.join(input_dir, txt_name.replace('.dat.txt', '')) +def update_master(output_dir, csv_name, df): + """Update master csv time series with new data. + + Args: + output_dir (str): path to time series directory + csv_name (str): name of time series file + df (dataframe): dataframe with new time seies data + + Returns: + None + """ try: - # Load local master table - master = pd.read_csv(csv_name, index_col=0, parse_dates=True) + # Load local master table if it exists + master = pd.read_csv(os.path.join(output_dir, csv_name), + index_col=0, + parse_dates=True) - # Only include timestamps that do not already exist - df = df[~df.index.isin(master.index)] + # Remove existing rows if same timestamps appear in new dataframe + master = master[~master.index.isin(df.index)] # Update master master = master.append(df) + # Make sure timestamps are in order + master = master.sort_index() + except FileNotFoundError: # Create new master table if none exists master = df # Export master table - master.to_csv(csv_name) + master.to_csv(os.path.join(output_dir, csv_name)) + + +for txt_name in txt_names: + # Read text file from MHL website + df = pd.read_csv(url + txt_name, index_col=0, parse_dates=True) + + # Update local copy + csv_name = txt_name.replace('.dat.txt', '') + master = update_master(output_dir, csv_name, df)