Initial commit

master
Chris Leaman 6 years ago
commit 85fa9e55c2

11
.gitignore vendored

@ -0,0 +1,11 @@
# Jupyter NB Checkpoints
.ipynb_checkpoints/
# exclude data from source control by default
/data/
# Pycharm
.idea
# DotEnv configuration
.env

@ -0,0 +1,45 @@
#################################################################################
# PROJECT RULES #
#################################################################################
.PHONY: mat_to_csv
mat-to-csv: ##@data Converts raw .mat files to .csv for python
cd ./src/data/ && python mat_to_csv.py
sites-csv-to-shp: ./data/interim/sites.shp
cd ./src/data && python csv_to_shp.py
#################################################################################
# Self Documenting Commands #
#################################################################################
.DEFAULT_GOAL := help
.PHONY: help
# Refer to https://gist.github.com/prwhite/8168133
#COLORS
GREEN := $(shell tput -Txterm setaf 2)
WHITE := $(shell tput -Txterm setaf 7)
YELLOW := $(shell tput -Txterm setaf 3)
RESET := $(shell tput -Txterm sgr0)
# Add the following 'help' target to your Makefile
# And add help text after each target name starting with '\#\#'
# A category can be added with @category
HELP_FUN = \
%help; \
while(<>) { push @{$$help{$$2 // 'options'}}, [$$1, $$3] if /^([a-zA-Z\-]+)\s*:.*\#\#(?:@([a-zA-Z\-]+))?\s(.*)$$/ }; \
print "usage: make [target]\n\n"; \
for (sort keys %help) { \
print "${WHITE}$$_:${RESET}\n"; \
for (@{$$help{$$_}}) { \
$$sep = " " x (32 - length $$_->[0]); \
print " ${YELLOW}$$_->[0]${RESET}$$sep${GREEN}$$_->[1]${RESET}\n"; \
}; \
print "\n"; }
help: ##@other Show this help.
@perl -e '$(HELP_FUN)' $(MAKEFILE_LIST)

@ -0,0 +1,21 @@
# 2016 Narrabeen Storm EWS Performance
This repository investigates whether the storm impacts (i.e. Sallenger, 2000) of the June 2016 Narrabeen Storm could
have been forecasted in advance.
## Repository and analysis format
This repository follows the [Cookiecutter Data Science](https://drivendata.github.io/cookiecutter-data-science/)
structure where possible. The analysis is done in python (look at the `/src/` folder) with some interactive, exploratory notebooks located at `/notebooks`.
## Where to start?
Check out jupyter notebook `./notebooks/01_exploration.ipynb` which has an example of how to import the data and some interactive widgets.
## Available data
Raw, interim and processed data used in this analysis is kept in the `/data/` folder.
- `/data/raw/processed_shorelines`: This data was recieved from Tom Beuzen in October 2018. It consists of pre/post storm profiles at every 100 m sections along beaches ranging from Dee Why to Nambucca . Profiles are based on raw aerial LIDAR and were processed by Mitch Harley. Tides and waves (10 m contour and reverse shoaled deepwater) for each individual 100 m section is also provided.
- `/data/raw/raw_lidar`: This is the raw pre/post storm aerial LIDAR which was taken for the June 2016 storm. `.las` files are the raw files which have been processed into `.tiff` files using `PDAL`. Note that these files have not been corrected for systematic errors, so actual elevations should be taken from the `processed_shorelines` folder. Obtained November 2018 from Mitch Harley from the black external HDD labeled "UNSW LIDAR".
- `/data/raw/profile_features`: Dune toe and crest locations based on prestorm LIDAR. Refer to `/notebooks/qgis.qgz` as this shows how they were manually extracted. Note that the shapefiles only show the location (lat/lon) of the dune crest and toe. For actual elevations, these locations need to related to the processed shorelines.
## Notebooks
- `/notebooks/01_exploration.ipynb`: Shows how to import processed shorelines, waves and tides. An interactive widget plots the location and cross sections.
- `/notebooks/qgis.qgz`: A QGIS file which is used to explore the aerial LIDAR data in `/data/raw/raw_lidar`. By examining the pre-strom lidar, dune crest and dune toe lines are manually extracted. These are stored in the `/data/profile_features/`.

File diff suppressed because one or more lines are too long

Binary file not shown.

@ -0,0 +1,13 @@
import pandas as pd
import os
def main():
data_folder = './data/interim'
df_waves = pd.read_csv(os.path.join(data_folder, 'waves.csv'), index_col=[0,1])
df_tides = pd.read_csv(os.path.join(data_folder, 'tides.csv'), index_col=[0,1])
df_profiles = pd.read_csv(os.path.join(data_folder, 'profiles.csv'), index_col=[0,1,2])
df_sites = pd.read_csv(os.path.join(data_folder, 'sites.csv'),index_col=[0])
if __name__ == '__main__':
main()

@ -0,0 +1,38 @@
"""
Converts .csv files to .shape files
"""
from fiona.crs import from_epsg
import fiona
from shapely.geometry import Point, mapping
from fiona import collection
import pandas as pd
import os
def sites_csv_to_shp(input_csv='.\data\interim\sites.csv', output_shp='.\data\interim\sites.shp'):
"""
Converts our dataframe of sites to .shp to load in QGis
:param input_csv:
:param output_shp:
:return:
"""
df_sites = pd.read_csv(input_csv, index_col=[0])
schema = {
'geometry': 'Point',
'properties': {
'beach': 'str',
'site_id': 'str'
}
}
with fiona.open(output_shp, 'w', crs=from_epsg(4326), driver='ESRI Shapefile', schema=schema) as output:
for index, row in df_sites.iterrows():
point = Point(row['lon'], row['lat'])
prop = {
'beach': row['beach'],
'site_id': index,
}
output.write({'geometry': mapping(point), 'properties': prop})
if __name__ == '__main__':
sites_csv_to_shp()

@ -0,0 +1,180 @@
"""
Converts raw .mat files into a flattened .csv structure which can be imported into python pandas.
"""
import logging.config
from datetime import datetime, timedelta
import pandas as pd
from mat4py import loadmat
logging.config.fileConfig('../logging.conf', disable_existing_loggers=False)
logger = logging.getLogger(__name__)
def parse_waves(waves_mat):
"""
Parses the raw waves.mat file and returns a pandas dataframe
:param waves_mat:
:return:
"""
logger.info('Parsing %s', waves_mat)
mat_data = loadmat(waves_mat)['data']
rows = []
for i in range(0, len(mat_data['site'])):
for j in range(0, len(mat_data['dates'][i])):
rows.append({
'beach': mat_data['site'][i],
'lon': mat_data['lon'][i],
'lat': mat_data['lat'][i],
'datetime': matlab_datenum_to_datetime(mat_data['dates'][i][j][0]),
'Hs': mat_data['H'][i][j][0],
'Hs0': mat_data['Ho'][i][j][0],
'Tp': mat_data['T'][i][j][0],
'dir': mat_data['D'][i][j][0],
'E': mat_data['E'][i][j][0],
'P': mat_data['P'][i][j][0],
'Exs': mat_data['Exs'][i][j][0],
'Pxs': mat_data['Pxs'][i][j][0],
})
df = pd.DataFrame(rows)
df['datetime'] = df['datetime'].dt.round('1s')
return df
def parse_tides(tides_mat):
"""
Parses the raw tides.mat file and returns a pandas dataframe
:param tides_mat:
:return:
"""
logger.info('Parsing %s', tides_mat)
mat_data = loadmat(tides_mat)['data']
rows = []
for i in range(0, len(mat_data['site'])):
for j in range(0, len(mat_data['time'])):
rows.append({
'beach': mat_data['site'][i][0],
'lon': mat_data['lons'][i][0],
'lat': mat_data['lats'][i][0],
'datetime': matlab_datenum_to_datetime(mat_data['time'][j][0]),
'tide': mat_data['tide'][i][j]
})
df = pd.DataFrame(rows)
df['datetime'] = df['datetime'].dt.round('1s')
return df
def parse_profiles(profiles_mat):
"""
Parses the raw profiles.mat file and returns a pandas dataframe
:param tides_mat:
:return:
"""
logger.info('Parsing %s', profiles_mat)
mat_data = loadmat(profiles_mat)['data']
rows = []
for i in range(0, len(mat_data['site'])):
for j in range(0, len(mat_data['pfx'][i])):
for profile_type in ['prestorm', 'poststorm']:
if profile_type == 'prestorm':
z = mat_data['pf1'][i][j][0]
if profile_type == 'poststorm':
z = mat_data['pf2'][i][j][0]
rows.append({
'beach': mat_data['site'][i],
'lon': mat_data['lon'][i],
'lat': mat_data['lat'][i],
'profile_type': profile_type,
'x': mat_data['pfx'][i][j][0],
'z': z,
})
df = pd.DataFrame(rows)
return df
def matlab_datenum_to_datetime(matlab_datenum):
# https://stackoverflow.com/a/13965852
return datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum % 1) - timedelta(
days=366)
def get_unique_sites(dfs, cols=['beach', 'lat', 'lon']):
"""
Generates a dataframe of unique sites based on beach names, lats and lons. Creates a unique site ID for each.
:param dfs:
:param cols:
:return:
"""
rows = []
df_all = pd.concat([df[cols] for df in dfs])
beach_groups = df_all.groupby(['beach'])
for beach_name, beach_group in beach_groups:
site_groups = beach_group.groupby(['lat', 'lon'])
siteNo = 1
for site_name, site_group in site_groups:
site = '{}{:04d}'.format(beach_name, siteNo)
rows.append({'site_id': site,
'lat': site_name[0],
'lon': site_name[1],
'beach': beach_name})
siteNo += 1
df = pd.DataFrame(rows)
return df
def replace_unique_sites(df, df_sites, cols=['beach', 'lat', 'lon']):
"""
Replaces beach/lat/lon columns with the unique site_id
:param dfs:
:param df_sites:
:return:
"""
df_merged = df.merge(df_sites, on=cols)
# Check that all our records have a unique site identifier
n_unmatched = len(df) - len(df_merged)
if n_unmatched > 0:
logger.warning('Not all records (%d of %d) matched with a unique site', n_unmatched, len(df))
df_merged = df_merged.drop(columns=cols)
return df_merged
def main():
df_waves = parse_waves(waves_mat='../../data/raw/waves.mat')
df_tides = parse_tides(tides_mat='../../data/raw/tides.mat')
df_profiles = parse_profiles(profiles_mat='../../data/raw/profiles.mat')
df_sites = get_unique_sites(dfs=[df_waves, df_tides, df_profiles])
logger.info('Identifying unique sites')
df_waves = replace_unique_sites(df_waves, df_sites)
df_tides = replace_unique_sites(df_tides, df_sites)
df_profiles = replace_unique_sites(df_profiles, df_sites)
logger.info('Setting pandas index')
df_profiles.set_index(['site_id', 'profile_type', 'x'], inplace=True)
df_waves.set_index(['site_id', 'datetime'], inplace=True)
df_tides.set_index(['site_id', 'datetime'], inplace=True)
df_sites.set_index(['site_id'], inplace=True)
logger.info('Outputting .csv files')
df_profiles.to_csv('../../data/interim/profiles.csv')
df_tides.to_csv('../../data/interim/tides.csv')
df_waves.to_csv('../../data/interim/waves.csv')
df_sites.to_csv('../../data/interim/sites.csv')
logger.info('Done!')
if __name__ == '__main__':
main()

@ -0,0 +1,76 @@
import pandas as pd
import os
import fiona
from shapely.geometry import LineString, Point
from shapely.geometry import shape
from shapely.ops import transform
import pyproj
from functools import partial
import numpy as np
def shapes_from_shp(shp_file):
"""
Parses a shape file and returns a list of shapely shapes
:param shp_file:
:return:
"""
shapes = []
for feat in fiona.open(shp_file):
shapes.append(shape(feat['geometry']))
return shapes
def convert_coord_systems(g1, in_coord_system='EPSG:4326', out_coord_system='EPSG:28356'):
"""
Converts coordinates from one coordinates system to another. Needed because shapefiles are usually defined in
lat/lon but should be converted to GDA to calculated distances.
https://gis.stackexchange.com/a/127432
:param in_coord_system: Default is lat/lon WGS84
:param out_coord_system: Default is GDA56 for NSW coastline
:return:
"""
project = partial(
pyproj.transform,
pyproj.Proj(init=in_coord_system), # source coordinate system
pyproj.Proj(init=out_coord_system)) # destination coordinate system
g2 = transform(project, g1) # apply projection
return g2
def distance_to_intersection(lat,lon,orientation,line_strings):
"""
Returns the distance at whjch a line drawn from a lat/lon at an orientation intersects a line stinrg
:param lat:
:param lon:
:param orientation: Angle, clockwise positive from true north in degrees, of the tangent to the shoreline facing
towards the
land.
:param line_string:
:return:
"""
start_point = Point(lon,lat)
start_point = convert_coord_systems(start_point)
distance = 1000 # m look up to 1000m for an intersection
new_point = Point(start_point.coords.xy[0]+distance*np.cos(np.deg2rad(orientation)),
start_point.coords.xy[1]+distance*np.sin(np.deg2rad(orientation)))
profile_line = LineString([start_point, new_point])
# Check whether profile_line intersects with any lines in line_string
for line_string in line_strings:
intersection_points = profile_line.intersection(line_string)
if not intersection_points.is_empty:
return intersection_points.distance(start_point)
return None
def get_sites_dune_crest_toe():
data_folder = './data/interim'
df_sites = pd.read_csv(os.path.join(data_folder, 'sites.csv'),index_col=[0])
# Import
for f in ['./data/raw/profile_features/dune_crests.shp']:
shapes = shapes_from_shp(f)
shapes = [convert_coord_systems(x) for x in shapes]
# Iterate through each site

@ -0,0 +1,27 @@
[loggers]
keys=root, matplotlib
[handlers]
keys=consoleHandler
[formatters]
keys=simpleFormatter
[logger_root]
level=DEBUG
handlers=consoleHandler
[logger_matplotlib]
level=WARNING
handlers=consoleHandler
qualname=matplotlib
[handler_consoleHandler]
class=StreamHandler
level=DEBUG
formatter=simpleFormatter
args=(sys.stdout,)
[formatter_simpleFormatter]
format=%(asctime)s %(name)-17s %(levelname)-8s %(message)s
datefmt=%a, %d %b %Y %H:%M:%S
Loading…
Cancel
Save