"""Download latest time series data from MHL website and update local copy. D. Howe 2019-08-30 """ import os import pandas as pd # Set output directory output_dir = 'csv' # Define data URLs url = 'http://www.mhl.nsw.gov.au/data/realtime/timeseries/' txt_names = [ 'SYDDOW.WaveHeight.csv.dat.txt', 'SYDDOW.WavePeriod.csv.dat.txt', ] for txt_name in txt_names: # Read text file from MHL website df = pd.read_csv(url + txt_name, index_col=0, parse_dates=True) # Create name for exported data csv_name = os.path.join(input_dir, txt_name.replace('.dat.txt', '')) try: # Load local master table master = pd.read_csv(csv_name, index_col=0, parse_dates=True) # Only include timestamps that do not already exist df = df[~df.index.isin(master.index)] # Update master master = master.append(df) except FileNotFoundError: # Create new master table if none exists master = df # Export master table master.to_csv(csv_name)