"""Download latest time series data from MHL website and update local copy.
D. Howe
2019-08-30
"""
import os
import pandas as pd

# Set output directory
output_dir = 'csv'

# Define data URLs
url = 'http://www.mhl.nsw.gov.au/data/realtime/timeseries/'
txt_names = [
    'SYDDOW.WaveHeight.csv.dat.txt',
    'SYDDOW.WavePeriod.csv.dat.txt',
    'OceanTide-213470.Level.csv.dat.txt',
]


def update_master(output_dir, csv_name, df):
    """Update master csv time series with new data.

    Args:
        output_dir (str): path to time series directory
        csv_name (str):   name of time series file
        df (dataframe):   dataframe with new time seies data

    Returns:
        None
    """
    try:
        # Load local master table if it exists
        master = pd.read_csv(os.path.join(output_dir, csv_name),
                             index_col=0,
                             parse_dates=True)

        # Remove existing rows if same timestamps appear in new dataframe
        master = master[~master.index.isin(df.index)]

        # Update master
        master = master.append(df)

        # Make sure timestamps are in order
        master = master.sort_index()

    except FileNotFoundError:
        # Create new master table if none exists
        master = df

    # Export master table
    master.to_csv(os.path.join(output_dir, csv_name))


for txt_name in txt_names:
    # Read text file from MHL website
    df = pd.read_csv(url + txt_name, index_col=0, parse_dates=True)

    # Update local copy
    csv_name = txt_name.replace('.dat.txt', '')
    master = update_master(output_dir, csv_name, df)