python-snippets/bom-wind/bom_wind.py

"""Download latest time series data from BOM website and update local copy.

BOM does not allow direct http requests to data files unless it believes the
request comes from a browser. For example:

>>> pd.read_csv(url)  # This does not work
HTTPError: HTTP Error 403: Forbidden

>>> requests.get(url)  # This works!
<Response [200]>

D. Howe
2019-09-04
"""
import os
import json
import requests
import pandas as pd

# Set json product URL for North Head
# http://www.bom.gov.au/products/IDN60901/IDN60901.95768.shtml
json_url = 'http://www.bom.gov.au/fwo/IDN60901/IDN60901.95768.json'

# Set output directory
output_dir = 'bom'

# Set output column names
cols = [
    'wind_dir',
    'wind_spd_kmh',
    'gust_kmh',
]


def update_master(output_dir, csv_name, df):
    """Update master csv time series.

    Args:
        output_dir (str): path to time series directory
        csv_name (str):   name of time series file
        df (dataframe):   dataframe with datetime index

    Returns:
        None
    """
    try:
        # Load local master table if it exists
        master = pd.read_csv(os.path.join(output_dir, csv_name),
                             index_col=0,
                             parse_dates=True)

        # Only include timestamps that do not already exist
        df = df[~df.index.isin(master.index)]

        # Update master
        master = master.append(df)

    except FileNotFoundError:
        # Create new master table if none exists
        master = df

    # Export master table
    master.to_csv(os.path.join(output_dir, csv_name))


# Download JSON data
json_data = json.loads(requests.get(json_url).content)

# Create dataframe
df = pd.DataFrame(json_data['observations']['data'])

# Set local time as index
df.index = pd.to_datetime(df['local_date_time_full'])
df = df.sort_index()

# Extract columns of interest
df = df[cols]

# Update master table
csv_name = json_url.split('/')[-1].replace('.json', '.csv')
update_master(output_dir, csv_name, df)
Add 'bom_wind' 5 years ago			`"""Download latest time series data from BOM website and update local copy.`

Simplify http request 5 years ago			`BOM does not allow direct http requests to data files unless it believes the`
			`request comes from a browser. For example:`

			`>>> pd.read_csv(url) # This does not work`
			`HTTPError: HTTP Error 403: Forbidden`

			`>>> requests.get(url) # This works!`
			`<Response [200]>`
Add 'bom_wind' 5 years ago
			`D. Howe`
			`2019-09-04`
			`"""`
			`import os`
			`import json`
			`import requests`
			`import pandas as pd`

Simplify http request 5 years ago			`# Set json product URL for North Head`
			`# http://www.bom.gov.au/products/IDN60901/IDN60901.95768.shtml`
			`json_url = 'http://www.bom.gov.au/fwo/IDN60901/IDN60901.95768.json'`
Add 'bom_wind' 5 years ago
			`# Set output directory`
			`output_dir = 'bom'`

			`# Set output column names`
			`cols = [`
			`'wind_dir',`
			`'wind_spd_kmh',`
			`'gust_kmh',`
			`]`


			`def update_master(output_dir, csv_name, df):`
			`"""Update master csv time series.`

			`Args:`
			`output_dir (str): path to time series directory`
			`csv_name (str): name of time series file`
			`df (dataframe): dataframe with datetime index`

			`Returns:`
			`None`
			`"""`
			`try:`
			`# Load local master table if it exists`
			`master = pd.read_csv(os.path.join(output_dir, csv_name),`
			`index_col=0,`
			`parse_dates=True)`

			`# Only include timestamps that do not already exist`
			`df = df[~df.index.isin(master.index)]`

			`# Update master`
			`master = master.append(df)`

			`except FileNotFoundError:`
			`# Create new master table if none exists`
			`master = df`

			`# Export master table`
			`master.to_csv(os.path.join(output_dir, csv_name))`


Simplify http request 5 years ago			`# Download JSON data`
			`json_data = json.loads(requests.get(json_url).content)`
Add 'bom_wind' 5 years ago
			`# Create dataframe`
			`df = pd.DataFrame(json_data['observations']['data'])`

			`# Set local time as index`
			`df.index = pd.to_datetime(df['local_date_time_full'])`
Fix timestamp sorting 5 years ago			`df = df.sort_index()`
Add 'bom_wind' 5 years ago
			`# Extract columns of interest`
			`df = df[cols]`

			`# Update master table`
Simplify http request 5 years ago			`csv_name = json_url.split('/')[-1].replace('.json', '.csv')`
Add 'bom_wind' 5 years ago			`update_master(output_dir, csv_name, df)`