You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

82 lines
1.9 KiB
Python

5 years ago
"""Download latest time series data from BOM website and update local copy.
BOM does not allow direct http requests to data files unless it believes the
request comes from a browser. For example:
>>> pd.read_csv(url) # This does not work
HTTPError: HTTP Error 403: Forbidden
>>> requests.get(url) # This works!
<Response [200]>
5 years ago
D. Howe
2019-09-04
"""
import os
import json
import requests
import pandas as pd
# Set json product URL for North Head
# http://www.bom.gov.au/products/IDN60901/IDN60901.95768.shtml
json_url = 'http://www.bom.gov.au/fwo/IDN60901/IDN60901.95768.json'
5 years ago
# Set output directory
output_dir = 'bom'
# Set output column names
cols = [
'wind_dir',
'wind_spd_kmh',
'gust_kmh',
]
def update_master(output_dir, csv_name, df):
"""Update master csv time series.
Args:
output_dir (str): path to time series directory
csv_name (str): name of time series file
df (dataframe): dataframe with datetime index
Returns:
None
"""
try:
# Load local master table if it exists
master = pd.read_csv(os.path.join(output_dir, csv_name),
index_col=0,
parse_dates=True)
# Only include timestamps that do not already exist
df = df[~df.index.isin(master.index)]
# Update master
master = master.append(df)
except FileNotFoundError:
# Create new master table if none exists
master = df
# Export master table
master.to_csv(os.path.join(output_dir, csv_name))
# Download JSON data
json_data = json.loads(requests.get(json_url).content)
5 years ago
# Create dataframe
df = pd.DataFrame(json_data['observations']['data'])
# Set local time as index
df.index = pd.to_datetime(df['local_date_time_full'])
df = df.sort_index()
5 years ago
# Extract columns of interest
df = df[cols]
# Update master table
csv_name = json_url.split('/')[-1].replace('.json', '.csv')
5 years ago
update_master(output_dir, csv_name, df)