Add 'bom_wind'
parent
e21b07f466
commit
dece15191b
@ -0,0 +1,6 @@
|
||||
# bom_wind.py
|
||||
|
||||
Download latest time series data from BOM website and update local copy.
|
||||
|
||||
BOM does not allow direct http requests to data files, so a requests session
|
||||
is opened on the product page before accessing the resource.
|
@ -0,0 +1,86 @@
|
||||
"""Download latest time series data from BOM website and update local copy.
|
||||
|
||||
BOM does not allow direct http requests to data files, so a requests session
|
||||
is opened on the product page before accessing the resource.
|
||||
|
||||
D. Howe
|
||||
2019-09-04
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
import pandas as pd
|
||||
from lxml import html
|
||||
|
||||
# Set base URL
|
||||
bom_url = 'http://www.bom.gov.au/'
|
||||
|
||||
# Set product URL for North Head
|
||||
product_url = 'products/IDN60901/IDN60901.95768.shtml'
|
||||
|
||||
# Set output directory
|
||||
output_dir = 'bom'
|
||||
|
||||
# Set output column names
|
||||
cols = [
|
||||
'wind_dir',
|
||||
'wind_spd_kmh',
|
||||
'gust_kmh',
|
||||
]
|
||||
|
||||
|
||||
def update_master(output_dir, csv_name, df):
|
||||
"""Update master csv time series.
|
||||
|
||||
Args:
|
||||
output_dir (str): path to time series directory
|
||||
csv_name (str): name of time series file
|
||||
df (dataframe): dataframe with datetime index
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
try:
|
||||
# Load local master table if it exists
|
||||
master = pd.read_csv(os.path.join(output_dir, csv_name),
|
||||
index_col=0,
|
||||
parse_dates=True)
|
||||
|
||||
# Only include timestamps that do not already exist
|
||||
df = df[~df.index.isin(master.index)]
|
||||
|
||||
# Update master
|
||||
master = master.append(df)
|
||||
|
||||
except FileNotFoundError:
|
||||
# Create new master table if none exists
|
||||
master = df
|
||||
|
||||
# Export master table
|
||||
master.to_csv(os.path.join(output_dir, csv_name))
|
||||
|
||||
|
||||
# Open new session with BOM website
|
||||
with requests.session() as session:
|
||||
# Load product page
|
||||
page = session.get(bom_url + product_url)
|
||||
tree = html.fromstring(page.content)
|
||||
|
||||
# Find and download JSON data
|
||||
json_url = tree.xpath('//*[@id="content"]/p[4]/a')[0].get('href')
|
||||
json_data = json.loads(session.get(bom_url + json_url).content)
|
||||
|
||||
# Extract file base name
|
||||
csv_name = json_url.split('/')[-1].replace('.json', '.csv')
|
||||
|
||||
# Create dataframe
|
||||
df = pd.DataFrame(json_data['observations']['data'])
|
||||
|
||||
# Set local time as index
|
||||
df.index = pd.to_datetime(df['local_date_time_full'])
|
||||
|
||||
# Extract columns of interest
|
||||
df = df[cols]
|
||||
|
||||
# Update master table
|
||||
update_master(output_dir, csv_name, df)
|
Loading…
Reference in New Issue