Add 'bom_wind'

6 years ago · dece15191b
parent e21b07f466
commit dece15191b
2 changed files with 92 additions and 0 deletions
--- a/bom-wind/README.md
+++ b/bom-wind/README.md
@ -0,0 +1,6 @@
 # bom_wind.py
 Download latest time series data from BOM website and update local copy.
 BOM does not allow direct http requests to data files, so a requests session
 is opened on the product page before accessing the resource.
--- a/bom-wind/bom_wind.py
+++ b/bom-wind/bom_wind.py
@ -0,0 +1,86 @@
 """Download latest time series data from BOM website and update local copy.
 BOM does not allow direct http requests to data files, so a requests session
 is opened on the product page before accessing the resource.
 D. Howe
 2019-09-04
 """
 import os
 import json
 import requests
 import pandas as pd
 from lxml import html
 # Set base URL
 bom_url = 'http://www.bom.gov.au/'
 # Set product URL for North Head
 product_url = 'products/IDN60901/IDN60901.95768.shtml'
 # Set output directory
 output_dir = 'bom'
 # Set output column names
 cols = [
    'wind_dir',
    'wind_spd_kmh',
    'gust_kmh',
 ]
 def update_master(output_dir, csv_name, df):
    """Update master csv time series.
    Args:
        output_dir (str): path to time series directory
        csv_name (str):   name of time series file
        df (dataframe):   dataframe with datetime index
    Returns:
        None
    """
    try:
        # Load local master table if it exists
        master = pd.read_csv(os.path.join(output_dir, csv_name),
                             index_col=0,
                             parse_dates=True)
        # Only include timestamps that do not already exist
        df = df[~df.index.isin(master.index)]
        # Update master
        master = master.append(df)
    except FileNotFoundError:
        # Create new master table if none exists
        master = df
    # Export master table
    master.to_csv(os.path.join(output_dir, csv_name))
 # Open new session with BOM website
 with requests.session() as session:
    # Load product page
    page = session.get(bom_url + product_url)
    tree = html.fromstring(page.content)
    # Find and download JSON data
    json_url = tree.xpath('//*[@id="content"]/p[4]/a')[0].get('href')
    json_data = json.loads(session.get(bom_url + json_url).content)
 # Extract file base name
 csv_name = json_url.split('/')[-1].replace('.json', '.csv')
 # Create dataframe
 df = pd.DataFrame(json_data['observations']['data'])
 # Set local time as index
 df.index = pd.to_datetime(df['local_date_time_full'])
 # Extract columns of interest
 df = df[cols]
 # Update master table
 update_master(output_dir, csv_name, df)