Merge branch 'feature/twl-exceedence-time'

7 years ago · f23b273de0
parent 2a27f4e535 25d2518fbb
commit f23b273de0
27 changed files with 6175 additions and 1536 deletions
--- a/.env
+++ b/.env
@ -0,0 +1,18 @@
+# Environment variables go here, these will be automatically read when using "pipenv run python 'file.py'"
+
+# Location where data is backed up to. Should be able to copy a set of the data from here.
+DATA_BACKUP_DIR="J:/Coastal/Temp/CKL/nsw_2016_storm_impact/data"
+
+# Location where the matlab interpreter is located. Required for a couple of data processing scripts.
+MATLAB_PATH="C:/Program Files/MATLAB/R2016b/bin/win64/MATLAB.exe"
+
+# Number of threads to use for multi-core processing. Used when calculating time-varying beach slope when estimating
+# total water level.
+MULTIPROCESS_THREADS=2
+
+
+# The settings below should be left as is unless you know what you're doing.
+
+# Need to set pythonpath so that relative imports can be properly used in with pipenv
+# Refer to https://stackoverflow.com/q/52986500 and https://stackoverflow.com/a/49797761
+# PYTHONPATH=${PWD}
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,7 @@
 # Jupyter NB Checkpoints
 .ipynb_checkpoints/
+/notebooks/*.png
+

 # exclude data from source control by default
 /data/
@ -7,11 +9,15 @@
 # Pycharm
 .idea

-# DotEnv configuration
-.env
+# Matlab
 *.asv

+# DotEnv configuration
+# .env
+
 # Python
 __pycache__/
 *.py[cod]
 *$py.class
+/.venv/
+*.log
--- a/182
+++ b/182
@ -1,9 +1,44 @@
-DATA_BACKUP_DIR = "J:/Coastal/Temp/CKL/nsw_2016_storm_impact/data"
+SHELL=cmd

-#################################################################################
-# PROJECT RULES                                                                 #
-#################################################################################
-.PHONY: push-data mat_to_csv sites-csv-to-shp
+###############################
+# Load environment variables
+
+include .env
+export $(shell sed 's/=.*//' .env)
+CURRENT_DIR = $(shell pwd)
+
+
+###############################
+# Create python virtual environment
+
+.PHONY: venv-init
+venv-init: ##@environment Setup virtual environment
+	conda env create -f environment.yml --prefix=.venv python=3.6
+
+.PHONY: venv-activate
+venv-activate: ##@environment Activates the virtual environment
+	activate $(CURRENT_DIR)/.venv
+
+.PHONY: venv-update
+venv-update: ##@environment Updates to latest packages
+	conda update ipykernel && conda update --prefix .venv --all
+
+.PHONY: venv-requirements-install
+venv-requirements-install: ##@environment Ensures environment.yml packages are installed
+	conda env update
+
+# The environment.yml file should really be created by hand, but
+# this provides a good starting point.
+.PHONY: venv-requirements-export
+venv-requirements-export: ##@environment Exports current environment to environment.yml
+	conda env export --file environment.yml
+
+# To install new packages: conda install --prefix .venv PACKAGE
+
+
+###############################
+# Get data from network drive
+.PHONY: push-data pull-data

 push-data: ##@data Copies data from ./data/ to data backup directory
 	rclone copy ./data/ $(DATA_BACKUP_DIR) --exclude "*.las" --progress
@ -12,15 +47,136 @@ pull-data: ##@data Copies data from data backup directory to ./data/
 	# We probably don't want to pull the raw LIDAR .las files, so lets exclude them
 	rclone copy $(DATA_BACKUP_DIR) ./data/ --exclude "*.las" --progress

-mat-to-csv: ##@data Converts raw .mat files to .csv for python
-	cd ./src/data/ && python mat_to_csv.py

-sites-csv-to-shp: ##@data Create the sites.shp from sites.csv
-	cd ./src/data && python csv_to_shp.py sites_csv_to_shp "..\..\data\interim\sites.csv" "..\..\data\interim\sites.shp"
+###############################
+# Process data
+.PHONY: process-mat
+
+impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv ./data/interim/impacts_forecasted_mean_slope_sto06.csv ./data/interim/impacts_observed.csv ##@products makes obsered and forecasted impacts
+
+# Calculates beach orientations at each profile
+./data/raw/processed_shorelines/orientations.mat: ./data/raw/processed_shorelines/profiles.mat
+	$(MATLAB_PATH) -nosplash -r "cd $(CURRENT_DIR); run('./src/data/beach_orientations.m'); quit"
+
+# # Produces a .csv of sites where our beach cross-sections are located
+# ./data/interim/sites.csv ./data/interim/profiles.csv: ./data/raw/processed_shorelines/profiles.mat
+# 	activate ./.venv && python ./src/data/parse_mat.py create-sites-and-profiles-csv \
+# 	--profiles-mat "./data/raw/processed_shorelines/profiles.mat" \
+# 	--profiles-output-file "./data/interim/profiles.csv" \
+# 	--sites-output-file "./data/interim/sites.csv"
+
+# Produces a .csv of sites where our beach cross-sections are located
+./data/interim/sites.csv ./data/interim/profiles.csv: ./data/raw/processed_shorelines/profiles.mat
+	activate ./.venv && python ./src/cli.py create-sites-and-profiles-csv \
+	--profiles-mat "./data/raw/processed_shorelines/profiles.mat" \
+	--profiles-output-file "./data/interim/profiles.csv" \
+	--sites-output-file "./data/interim/sites.csv"
+
+# Produces a .csv of waves for each site
+./data/interim/waves.csv: ./data/interim/sites.csv ./data/raw/processed_shorelines/waves.mat
+	activate ./.venv && python ./src/cli.py create-waves-csv \
+	--waves-mat "./data/raw/processed_shorelines/waves.mat" \
+	--sites-csv "./data/interim/sites.csv" \
+	--output-file "./data/interim/waves.csv"
+
+# Produces a .csv of tides for each site
+./data/interim/tides.csv: ./data/interim/sites.csv ./data/raw/processed_shorelines/tides.mat
+	activate ./.venv && python ./src/cli.py create-tides-csv \
+	--tides-mat "./data/raw/processed_shorelines/tides.mat" \
+	--sites-csv "./data/interim/sites.csv" \
+	--output-file "./data/interim/tides.csv"
+
+# Creates a .shp of our sites to load into QGis
+./data/interim/sites.shp: ./data/interim/sites.csv
+	activate ./.venv && python ./src/cli.py sites-csv-to-shp \
+	--input-csv "./data/interim/sites.csv" \
+	--output-shp "./data/interim/sites.shp"
+
+# # Creates a .csv of our dune toe and crest profile features from .shp file
+# ./data/interim/profile_features.csv: ./data/raw/profile_features/dune_crests.shp ./data/raw/profile_features/dune_toes.shp ./data/interim/sites.csv ./data/interim/profiles.csv
+# 	activate ./.venv && python ./src/cli.py create-profile-features \
+# 	--dune-crest-shp "./data/raw/profile_features/dune_crests.shp" \
+# 	--dune-toe-shp "./data/raw/profile_features/dune_toes.shp" \
+# 	--sites-csv "./data/interim/sites.csv" \
+# 	--profiles-csv "./data/interim/profiles.csv" \
+# 	--output-csv "./data/interim/profile_features.csv"
+
+# Create a .csv of our dune toe and crest profile features from Tom Beuzen's .mat file
+./data/interim/profile_features.csv: ./data/raw/profile_features_tom_beuzen/*.mat ./data/interim/sites.csv
+	activate ./.venv && python ./src/cli.py create-profile-features \
+	--crest-mat "./data/raw/profile_features_tom_beuzen/J16_DuneCrest.mat" \
+	--toe-mat "./data/raw/profile_features_tom_beuzen/J16_DuneToe.mat" \
+	--sites-csv "./data/interim/sites.csv" \
+	--output-file "./data/interim/profile_features.csv"
+
+# Creates a forecast of twl using sto06 and prestorm time varying prestorm foreshore slope
+./data/interim/twl_foreshore_slope_sto06.csv: ./data/interim/waves.csv ./data/interim/tides.csv ./data/interim/profiles.csv ./data/interim/sites.csv ./data/interim/profile_features.csv
+	activate ./.venv && python ./src/cli.py create-twl-forecast \
+	--waves-csv "./data/interim/waves.csv" \
+	--tides-csv "./data/interim/tides.csv" \
+	--profiles-csv "./data/interim/profiles.csv" \
+	--profile-features-csv "./data/interim/profile_features.csv" \
+	--runup-function "sto06" \
+	--slope "foreshore" \
+	--profile-type "prestorm" \
+	--output-file "./data/interim/twl_foreshore_slope_sto06.csv"
+
+./data/interim/twl_mean_slope_sto06.csv: ./data/interim/waves.csv ./data/interim/tides.csv ./data/interim/profiles.csv ./data/interim/sites.csv ./data/interim/profile_features.csv
+	activate ./.venv && python ./src/cli.py create-twl-forecast \
+	--waves-csv "./data/interim/waves.csv" \
+	--tides-csv "./data/interim/tides.csv" \
+	--profiles-csv "./data/interim/profiles.csv" \
+	--profile-features-csv "./data/interim/profile_features.csv" \
+	--runup-function "sto06" \
+	--slope "mean" \
+	--profile-type "prestorm" \
+	--output-file "./data/interim/twl_mean_slope_sto06.csv"
+
+./data/interim/twl_poststorm_mean_slope_sto06.csv: ./data/interim/waves.csv ./data/interim/tides.csv ./data/interim/profiles.csv ./data/interim/sites.csv ./data/interim/profile_features.csv
+	activate ./.venv && python ./src/cli.py create-twl-forecast \
+	--waves-csv "./data/interim/waves.csv" \
+	--tides-csv "./data/interim/tides.csv" \
+	--profiles-csv "./data/interim/profiles.csv" \
+	--profile-features-csv "./data/interim/profile_features.csv" \
+	--runup-function "sto06" \
+	--slope "mean" \
+	--profile-type "poststorm" \
+	--output-file "./data/interim/twl_poststorm_mean_slope_sto06.csv"
+
+./data/interim/impacts_observed.csv: ./data/interim/profiles.csv ./data/interim/profile_features.csv
+	activate ./.venv && python ./src/cli.py create-observed-impacts \
+	--profiles-csv "./data/interim/profiles.csv" \
+	--profile-features-csv "./data/interim/profile_features.csv" \
+	--output-file "./data/interim/impacts_observed.csv"
+
+./data/interim/impacts_forecasted_mean_slope_sto06.csv: ./data/interim/profile_features.csv ./data/interim/twl_mean_slope_sto06.csv
+	activate ./.venv && python ./src/cli.py create-forecasted-impacts \
+	--profile-features-csv "./data/interim/profile_features.csv" \
+	--forecasted-twl-csv "./data/interim/twl_mean_slope_sto06.csv" \
+	--output-file "./data/interim/impacts_forecasted_mean_slope_sto06.csv"
+
+./data/interim/impacts_forecasted_foreshore_slope_sto06.csv: ./data/interim/profile_features.csv ./data/interim/twl_foreshore_slope_sto06.csv
+	activate ./.venv && python ./src/cli.py create-forecasted-impacts \
+	--profile-features-csv "./data/interim/profile_features.csv" \
+	--forecasted-twl-csv "./data/interim/twl_foreshore_slope_sto06.csv" \
+	--output-file "./data/interim/impacts_forecasted_foreshore_slope_sto06.csv"
+
+./data/interim/impacts_forecasted_poststorm_mean_slope_sto06.csv: ./data/interim/profile_features.csv ./data/interim/twl_foreshore_slope_sto06.csv
+	activate ./.venv && python ./src/cli.py create-forecasted-impacts \
+	--profile-features-csv "./data/interim/profile_features.csv" \
+	--forecasted-twl-csv "./data/interim/twl_poststorm_mean_slope_sto06.csv" \
+	--output-file "./data/interim/impacts_forecasted_poststorm_mean_slope_sto06.csv"
+
+
+###############################
+# Misc commands
+format: ./src/*.py ##@misc Check python file formatting
+	activate ./.venv && black --line-length 120 "src/"
+
+
+###############################
+# Help command

-#################################################################################
-# Self Documenting Commands                                                     #
-#################################################################################
 .DEFAULT_GOAL := help
 .PHONY: help

@ -49,5 +205,3 @@ HELP_FUN = \

 help: ##@other Show this help.
 	@perl -e '$(HELP_FUN)' $(MAKEFILE_LIST)
-
-
--- a/README.md
+++ b/README.md
@ -1,56 +1,77 @@
 # 2016 Narrabeen Storm EWS Performance
-This repository investigates whether the storm impacts (i.e. Sallenger, 2000) of the June 2016 Narrabeen Storm could 
-have been forecasted in advance. 
+This repository investigates whether the storm impacts (i.e. Sallenger, 2000) of the June 2016 Narrabeen Storm could have been forecasted in advance.

 ## Repository and analysis format
-This repository follows the [Cookiecutter Data Science](https://drivendata.github.io/cookiecutter-data-science/) 
-structure where possible. The analysis is done in python (look at the `/src/` folder) with some interactive, 
-exploratory notebooks located at `/notebooks`. 
-
-Development is conducted using a [gitflow](https://www.atlassian
-.com/git/tutorials/comparing-workflows/gitflow-workflow) approach - mainly the `master` branch stores the official 
-release history and the `develop` branch serves as an integration branch for features. Other `hotfix` and `feature` 
-branches should be created and merged as necessary.
-
-## Where to start?
-1. Clone this repository.
-2. Pull data from WRL coastal J drive with `make pull-data`
-3. Check out jupyter notebook `./notebooks/01_exploration.ipynb` which has an example of how to import the data and 
-some interactive widgets. 
-
-## Requirements
+This repository follows the [Cookiecutter Data Science](https://drivendata.github.io/cookiecutter-data-science/)
+structure where possible. The analysis is done in python (look at the `/src/` folder) with some interactive,exploratory notebooks located at `/notebooks`.
+
+Development is conducted using a [gitflow](https://www.atlassian.com/git/tutorials/comparing-workflows/gitflow-workflow) approach. The `master` branch stores the officialrelease history and the `develop` branch serves as an integration branch for features. Other `hotfix` and `feature` branches should be created and merged as necessary.
+
+## How to start?
+
+#### Getting software requirements
 The following requirements are needed to run various bits:
- [Python 3.6+](https://conda.io/docs/user-guide/install/windows.html): Used for processing and analysing data. 
-Jupyter notebooks are used for exploratory analyis and communication.
- [QGIS](https://www.qgis.org/en/site/forusers/download): Used for looking at raw LIDAR pre/post storm surveys and 
-extracting dune crests/toes
- [rclone](https://rclone.org/downloads/): Data is not tracked by this repository, but is backed up to a remote 
-Chris Leaman working directory located on the WRL coastal drive. Rclone is used to sync local and remote copies. 
-Ensure rclone.exe is located on your `PATH` environment.
- [gnuMake](http://gnuwin32.sourceforge.net/packages/make.htm): A list of commands for processing data is provided in
- the `./Makefile`. Use gnuMake to launch these commands. Ensure make.exe is located on your `PATH` environment.
+- [Anacond](https://www.anaconda.com/download/): Used for processing and analysing data. The Anaconda distribution is used for managing environments and is available for Windows, Mac and Linux. Jupyter notebooks are used for exploratory analyis and communication.
+- [QGIS](https://www.qgis.org/en/site/forusers/download): Used for looking at raw LIDAR pre/post storm surveys and extracting dune crests/toes
+- [rclone](https://rclone.org/downloads/): Data is not tracked by this repository, but is backed up to a remote Chris Leaman working directory located on the WRL coastal drive. Rclone is used to sync local and remote copies. Ensure rclone.exe is located on your `PATH` environment.
+- [gnuMake](http://gnuwin32.sourceforge.net/packages/make.htm): A list of commands for processing data is provided in the `./Makefile`. Use gnuMake to launch these commands. Ensure make.exe is located on your `PATH` environment.
+- git
+
+#### Getting the repository
+Clone the repository onto into your local environment:
+```
+git clone http://git.wrl.unsw.edu.au:3000/chrisl/nsw-2016-storm-impact.git
+cd nsw-2016-storm-impact
+```
+
+#### Getting the python environment set up
+Commands for setting up the python environment are provided in the `Makefile`. Simply run the following commands in the repo root directory:
+```
+make venv-init
+make venv-activate
+make venv-requirements-install
+```
+You can see what these commands are actually running by inspecting the `Makefile`.
+
+#### Pull data
+The actual raw, interim and processed data are not tracked by the repository as part of good git practices. A copy of the raw data is stored on the WRL Coastal J:\ drive and can be copied using the following command.
+```
+make pull-data
+```
+If you have updated the data and want to copy it back to the J:\ drive, use the following command. Note that it is probably not a good idea to modify data stored in `./data/raw`.
+```
+make push-data
+```
+
+#### View notebooks
+Jupyter notebooks have been set up to help explore the data. Once you have set up your environment and pulled the data, this is probably a good place to start as you. To run the notebook, use the following command and navigate to the `./notebooks` folder.
+```
+jupyter notebook
+```

 ## Available data
-Raw, interim and processed data used in this analysis is kept in the `/data/` folder. Data is not tracked in the 
-repository due to size constraints, but stored locally. A mirror is kept of the coastal folder J drive which you can 
+Raw, interim and processed data used in this analysis is kept in the `/data/` folder. Data is not tracked in the repository due to size constraints, but stored locally. A mirror is kept of the coastal folder J drive which you can
 use to push/pull to, using rclone. In order to get the data, run `make pull-data`.

 List of data:
- `/data/raw/processed_shorelines`: This data was recieved from Tom Beuzen in October 2018. It consists of pre/post 
-storm profiles at every 100 m sections along beaches ranging from Dee Why to Nambucca . Profiles are based on raw 
-aerial LIDAR and were processed by Mitch Harley. Tides and waves (10 m contour and reverse shoaled deepwater) for 
-each individual 100 m section is also provided.
- `/data/raw/raw_lidar`: This is the raw pre/post storm aerial LIDAR which was taken for the June 2016 storm. `.las` 
-files are the raw files which have been processed into `.tiff` files using `PDAL`. Note that these files have not 
-been corrected for systematic errors, so actual elevations should be taken from the `processed_shorelines` folder. 
-Obtained November 2018 from Mitch Harley from the black external HDD labeled "UNSW LIDAR".
- `/data/raw/profile_features`: Dune toe and crest locations based on prestorm LIDAR. Refer to `/notebooks/qgis.qgz` 
-as this shows how they were manually extracted. Note that the shapefiles only show the location (lat/lon) of the dune
- crest and toe. For actual elevations, these locations need to related to the processed shorelines.
+- `/data/raw/processed_shorelines`: This data was recieved from Tom Beuzen in October 2018. It consists of pre/poststorm profiles at every 100 m sections along beaches ranging from Dee Why to Nambucca . Profiles are based on raw
+aerial LIDAR and were processed by Mitch Harley. Tides and waves (10 m contour and reverse shoaled deepwater) for each individual 100 m section is also provided.
+- `/data/raw/raw_lidar`: This is the raw pre/post storm aerial LIDAR which was taken for the June 2016 storm. `.las` files are the raw files which have been processed into `.tiff` files using `PDAL`. Note that these files have not
+been corrected for systematic errors, so actual elevations should be taken from the `processed_shorelines` folder. Obtained November 2018 from Mitch Harley from the black external HDD labeled "UNSW LIDAR".
+- `/data/raw/profile_features`: Dune toe and crest locations based on prestorm LIDAR. Refer to `/notebooks/qgis.qgz` as this shows how they were manually extracted. Note that the shapefiles only show the location (lat/lon) of the dune crest and toe. For actual elevations, these locations need to related to the processed shorelines.

 ## Notebooks
- `/notebooks/01_exploration.ipynb`: Shows how to import processed shorelines, waves and tides. An interactive widget
- plots the location and cross sections.
- `/notebooks/qgis.qgz`: A QGIS file which is used to explore the aerial LIDAR data in `/data/raw/raw_lidar`. By 
-examining the pre-strom lidar, dune crest and dune toe lines are manually extracted. These are stored in the 
-`/data/profile_features/`. 
+- `/notebooks/01_exploration.ipynb`: Shows how to import processed shorelines, waves and tides. An interactive widget plots the location and cross sections.
+- `/notebooks/qgis.qgz`: A QGIS file which is used to explore the aerial LIDAR data in `/data/raw/raw_lidar`. Byexamining the pre-strom lidar, dune crest and dune toe lines are manually extracted. These are stored in the `/data/profile_features/`.
+
+## TODO
+- [ ] Mitch updated the raw profiles.mat to include more information about the survey time. Our data scripts should be updated to parse this new information and include it in our dataframes.
+- [ ] Setup precomit hook for automatic code formatting using [black](https://ljvmiranda921.github.io/notebook/2018/06/21/precommits-using-black-and-flake8/). Low priority as can run black using the command `make format`.
+- [ ] Raw tide WL's are interpolated based on location from tide gauges. This probably isn't the most accurate method, but should have a small effect since surge elevation was low during this event. Need to assess the effect of this method.
+- [ ] Estimate max TWL from elevation where pre storm and post storm profiles are the same. Need to think more about this as runup impacting dune toe will move the dune face back, incorrectly raising the observed twl. Perhaps this estimation of max TWL is only useful for the swash regime.
+- [ ] Implement [bayesian change detection algorithm](https://github.com/hildensia/bayesian_changepoint_detection) to help detect dune crests and toes from profiles. Probably low priority at the moment since we are doing manual detection.
+- [ ] Implement dune impact calculations as per Palmsten & Holman. Calculation should be done in a new dataframe.
+- [ ] Implement data/interim/*.csv file checking using py.test. Check for correct columns, number of nans etc. Testing of code is probably a lower priority than just checking the interim data files at the moment. Some functions which should be tested are the slope functions in `forecast_twl.py`, as these can be tricky with different profiles.
+- [ ] Investigate using [modin](https://github.com/modin-project/modin) to help speed up analysis.
+- [X] Need to think about how relative imports are handled, see [here](https://chrisyeh96.github.io/2017/08/08/definitive-guide-python-imports.html). Maybe the click CLI interface should be moved to the `./src/` folder and it can import all the other packages?
+- [ ] Simplify runup_models in Stockdon06 - we should really only have one function for each runup model. Need to make it work with individual values or entire dataframe. Use [np.maskedarray](https://docs.scipy.org/doc/numpy-1.15.0/reference/maskedarray.generic.html)
--- a/environment.yml
+++ b/environment.yml
@ -0,0 +1,161 @@
+name: C:\Users\z5189959\Desktop\nsw-2016-storm-impact\.venv
+channels:
+  - plotly
+  - defaults
+  - conda-forge
+dependencies:
+  - black=18.9b0=py_0
+  - colorlover=0.2.1=py_0
+  - jupyter_contrib_core=0.3.3=py_2
+  - jupyter_contrib_nbextensions=0.5.0=py36_1000
+  - jupyter_highlight_selected_word=0.2.0=py36_1000
+  - jupyter_latex_envs=1.4.4=py36_1000
+  - jupyter_nbextensions_configurator=0.4.0=py36_1000
+  - matplotlib-base=3.0.2=py36h3e3dc42_1001
+  - appdirs=1.4.3=py36h28b3542_0
+  - asn1crypto=0.24.0=py36_0
+  - attrs=18.2.0=py36h28b3542_0
+  - autopep8=1.4.3=py36_0
+  - backcall=0.1.0=py36_0
+  - blas=1.0=mkl
+  - bleach=3.0.2=py36_0
+  - boost=1.67.0=py36_4
+  - boost-cpp=1.67.0=hfa6e2cd_4
+  - ca-certificates=2018.03.07=0
+  - certifi=2018.10.15=py36_0
+  - cffi=1.11.5=py36h74b6da3_1
+  - chardet=3.0.4=py36_1
+  - click=7.0=py36_0
+  - click-plugins=1.0.4=py36_0
+  - cligj=0.5.0=py36_0
+  - colorama=0.4.0=py36_0
+  - cryptography=2.3.1=py36h74b6da3_0
+  - curl=7.62.0=h2a8f88b_0
+  - cycler=0.10.0=py36h009560c_0
+  - decorator=4.3.0=py36_0
+  - entrypoints=0.2.3=py36_2
+  - expat=2.2.5=he025d50_0
+  - fiona=1.7.10=py36h5bf8d1d_0
+  - freetype=2.9.1=ha9979f8_1
+  - freexl=1.0.5=hfa6e2cd_0
+  - gdal=2.2.2=py36hcebd033_1
+  - geos=3.6.2=h9ef7328_2
+  - geotiff=1.4.2=hd5bfa41_0
+  - hdf4=4.2.13=h712560f_2
+  - hdf5=1.10.1=h98b8871_1
+  - icc_rt=2017.0.4=h97af966_0
+  - icu=58.2=ha66f8fd_1
+  - idna=2.7=py36_0
+  - intel-openmp=2019.1=144
+  - ipykernel=5.1.0=py36h39e3cac_0
+  - ipython=7.2.0=py36h39e3cac_0
+  - ipython_genutils=0.2.0=py36h3c5d0ee_0
+  - ipywidgets=7.4.2=py36_0
+  - jedi=0.13.1=py36_0
+  - jinja2=2.10=py36_0
+  - jpeg=9b=hb83a4c4_2
+  - jsonschema=2.6.0=py36h7636477_0
+  - jupyter_client=5.2.3=py36_0
+  - jupyter_core=4.4.0=py36_0
+  - kealib=1.4.7=ha5b336b_5
+  - kiwisolver=1.0.1=py36h6538335_0
+  - krb5=1.16.1=h038dc86_6
+  - libboost=1.67.0=hfd51bdf_4
+  - libcurl=7.62.0=h2a8f88b_0
+  - libgdal=2.2.2=h2727f2b_1
+  - libiconv=1.15=h1df5818_7
+  - libkml=1.3.0=he5f2a48_4
+  - libnetcdf=4.4.1.1=h825a56a_8
+  - libpng=1.6.35=h2a8f88b_0
+  - libpq=10.5=h5fe2233_0
+  - libsodium=1.0.16=h9d3ae62_0
+  - libspatialite=4.3.0a=h383548d_18
+  - libssh2=1.8.0=hd619d38_4
+  - libtiff=4.0.9=h36446d0_2
+  - libxml2=2.9.8=hadb2253_1
+  - libxslt=1.1.32=hf6f1972_0
+  - lxml=4.2.5=py36hef2cd61_0
+  - m2w64-gcc-libgfortran=5.3.0=6
+  - m2w64-gcc-libs=5.3.0=7
+  - m2w64-gcc-libs-core=5.3.0=7
+  - m2w64-gmp=6.1.0=2
+  - m2w64-libwinpthread-git=5.0.0.4634.697f757=2
+  - markupsafe=1.1.0=py36he774522_0
+  - matplotlib=3.0.1=py36hc8f65d3_0
+  - mistune=0.8.4=py36he774522_0
+  - mkl=2018.0.3=1
+  - mkl_fft=1.0.6=py36hdbbee80_0
+  - mkl_random=1.0.1=py36h77b88f5_1
+  - msys2-conda-epoch=20160418=1
+  - munch=2.3.2=py36_0
+  - nbconvert=5.3.1=py36_0
+  - nbformat=4.4.0=py36h3a5bc1b_0
+  - notebook=5.7.2=py36_0
+  - numpy=1.15.4=py36ha559c80_0
+  - numpy-base=1.15.4=py36h8128ebf_0
+  - openjpeg=2.3.0=h5ec785f_1
+  - openssl=1.0.2p=hfa6e2cd_0
+  - pandas=0.23.4=py36h830ac7b_0
+  - pandoc=2.2.3.2=0
+  - pandocfilters=1.4.2=py36_1
+  - parso=0.3.1=py36_0
+  - pickleshare=0.7.5=py36_0
+  - pip=18.1=py36_0
+  - plotly=3.4.2=py36h28b3542_0
+  - proj4=4.9.3=hcf24537_7
+  - prometheus_client=0.4.2=py36_0
+  - prompt_toolkit=2.0.7=py36_0
+  - psutil=5.4.8=py36he774522_0
+  - py-boost=1.67.0=py36h8300f20_4
+  - pycodestyle=2.4.0=py36_0
+  - pycparser=2.19=py36_0
+  - pygments=2.2.0=py36hb010967_0
+  - pyopenssl=18.0.0=py36_0
+  - pyparsing=2.3.0=py36_0
+  - pyproj=1.9.5.1=py36_0
+  - pyqt=5.9.2=py36h6538335_2
+  - pysocks=1.6.8=py36_0
+  - python=3.6.7=h33f27b4_1
+  - python-dateutil=2.7.5=py36_0
+  - pytz=2018.7=py36_0
+  - pywinpty=0.5.4=py36_0
+  - pyyaml=3.13=py36hfa6e2cd_0
+  - pyzmq=17.1.2=py36hfa6e2cd_0
+  - qt=5.9.6=vc14h1e9a669_2
+  - requests=2.20.1=py36_0
+  - retrying=1.3.3=py36_2
+  - scikit-learn=0.20.1=py36hb854c30_0
+  - scipy=1.1.0=py36h4f6bf74_1
+  - send2trash=1.5.0=py36_0
+  - setuptools=40.6.2=py36_0
+  - shapely=1.6.4=py36hc90234e_0
+  - sip=4.19.8=py36h6538335_0
+  - six=1.11.0=py36_1
+  - sqlite=3.25.3=he774522_0
+  - terminado=0.8.1=py36_1
+  - testpath=0.4.2=py36_0
+  - tk=8.6.8=hfa6e2cd_0
+  - toml=0.10.0=py36h28b3542_0
+  - tornado=5.1.1=py36hfa6e2cd_0
+  - traitlets=4.3.2=py36h096827d_0
+  - urllib3=1.23=py36_0
+  - vc=14.1=h0510ff6_4
+  - vs2015_runtime=14.15.26706=h3a45250_0
+  - wcwidth=0.1.7=py36h3d5aa90_0
+  - webencodings=0.5.1=py36_1
+  - wheel=0.32.3=py36_0
+  - widgetsnbextension=3.4.2=py36_0
+  - win_inet_pton=1.0.1=py36_1
+  - wincertstore=0.2=py36h7fe50ca_0
+  - winpty=0.4.3=4
+  - xerces-c=3.2.2=ha925a31_0
+  - xz=5.2.4=h2fa13f4_4
+  - yaml=0.1.7=hc54c509_2
+  - yapf=0.25.0=py36_0
+  - zeromq=4.2.5=he025d50_1
+  - zlib=1.2.11=h62dcd97_3
+  - plotly-orca=1.1.1=1
+  - pip:
+    - blackcellmagic==0.0.1
+    - mat4py==0.4.1
+prefix: C:\Users\z5189959\Desktop\nsw-2016-storm-impact\.venv
--- a/notebooks/01_exploration.ipynb
+++ b/notebooks/01_exploration.ipynb
--- a/notebooks/02_collision_protection_volume.ipynb
+++ b/notebooks/02_collision_protection_volume.ipynb
@ -0,0 +1,627 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Investigate \"collision protection volume\" concept"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-05T02:45:14.908283Z",
+     "start_time": "2018-12-05T02:45:14.556163Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "%reload_ext autoreload\n",
+    "%autoreload"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-05T02:45:34.323928Z",
+     "start_time": "2018-12-05T02:45:14.911088Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.debugger import set_trace\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import os\n",
+    "\n",
+    "import plotly\n",
+    "import plotly.graph_objs as go\n",
+    "import plotly.plotly as py\n",
+    "import plotly.tools as tls\n",
+    "import plotly.figure_factory as ff\n",
+    "import plotly.io as pio"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load data\n",
+    "Load data from the `./data/interim/` folder and parse into `pandas` dataframes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-05T02:45:53.010702Z",
+     "start_time": "2018-12-05T02:45:34.324930Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Importing profiles.csv\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\z5189959\\Desktop\\nsw-2016-storm-impact\\.venv\\lib\\site-packages\\numpy\\lib\\arraysetops.py:522: FutureWarning:\n",
+      "\n",
+      "elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Importing profile_features.csv\n",
+      "Importing impacts_forecasted_foreshore_slope_sto06.csv\n",
+      "Importing impacts_forecasted_mean_slope_sto06.csv\n",
+      "Importing impacts_observed.csv\n",
+      "Importing twl_foreshore_slope_sto06.csv\n",
+      "Importing twl_mean_slope_sto06.csv\n",
+      "Done!\n"
+     ]
+    }
+   ],
+   "source": [
+    "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
+    "    print('Importing {}'.format(csv))\n",
+    "    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
+    "\n",
+    "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
+    "df_profile_features = df_from_csv('profile_features.csv', index_col=[0])\n",
+    "\n",
+    "impacts = {\n",
+    "    'forecasted': {\n",
+    "        'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),\n",
+    "        'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),\n",
+    "        },\n",
+    "    'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n",
+    "    }\n",
+    "\n",
+    "twls = {\n",
+    "    'forecasted': {\n",
+    "        'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),\n",
+    "        'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "print('Done!')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Lets define a function to calculate the \"collision protection volume\" based on prestorm profiles."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Get berm feature functions\n",
+    "Define a couple of functions which are going to help us get features of our berms."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-05T03:01:56.646213Z",
+     "start_time": "2018-12-05T03:01:56.366466Z"
+    },
+    "code_folding": []
+   },
+   "outputs": [],
+   "source": [
+    "from shapely.geometry import Point, LineString, Polygon\n",
+    "\n",
+    "\n",
+    "def collision_protection_vol(x, z, d_low_x, d_low_z, lower_z, angle):\n",
+    "    # First, get the bounding line strings of our protection volume\n",
+    "    lower_line = LineString([Point(min(x), lower_z), Point(max(x), lower_z)])\n",
+    "    profile_line = LineString([Point(x_coord, z_coord) for x_coord, z_coord in zip(x, z)\n",
+    "                               if all([not np.isnan(x_coord), not np.isnan(z_coord)])])\n",
+    "    slope_line = LineString([Point(d_low_x, d_low_z),\n",
+    "                             Point(max(x), d_low_z - max(x) * np.sin(np.deg2rad(angle)))])\n",
+    "\n",
+    "    # Work out where our lower line and slope line intersect\n",
+    "    lower_profile_intersection = lower_line.intersection(profile_line)\n",
+    "    d_protected_intersection = lower_line.intersection(slope_line)\n",
+    "\n",
+    "    # Define the perimeter of the protection area\n",
+    "    profile_protected = LineString([Point(x_coord, z_coord) for x_coord, z_coord\n",
+    "                                    in zip(profile_line.xy[0], profile_line.xy[1])\n",
+    "                                    if d_low_x < x_coord < lower_profile_intersection.xy[0][0]]\n",
+    "                                   + [lower_profile_intersection]\n",
+    "                                   + [d_protected_intersection]\n",
+    "                                   + [Point(d_low_x, d_low_z)])\n",
+    "\n",
+    "    # Convert to polygon and return the area (m3/m)\n",
+    "    protection_area_poly = Polygon(profile_protected)\n",
+    "    protection_area_vol = protection_area_poly.area\n",
+    "    return protection_area_vol\n",
+    "\n",
+    "\n",
+    "def get_berm_width(z, d_low_x):\n",
+    "    \"\"\"\n",
+    "    Returns the width of the berm, defined by the distance between dune toe to z=0\n",
+    "    \"\"\"\n",
+    "    x_seaward_limit = z.dropna().tail(1).reset_index().x[0]\n",
+    "    return x_seaward_limit - d_low_x\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "site_id = 'NARRA0018'\n",
+    "profile_type = 'prestorm'\n",
+    "query = \"site_id == '{}' and profile_type == '{}'\".format(\n",
+    "    site_id, profile_type)\n",
+    "prestorm_profile = df_profiles.query(query)\n",
+    "profile_features = df_profile_features.query(query)\n",
+    "\n",
+    "x = prestorm_profile.index.get_level_values('x')\n",
+    "z = prestorm_profile.z\n",
+    "d_low_x = profile_features.dune_toe_x.tolist()[0]\n",
+    "d_low_z = profile_features.dune_toe_z.tolist()[0]\n",
+    "angle = 60  # degrees from the horizontal\n",
+    "lower_z = 0.5  # from mhw\n",
+    "\n",
+    "vol = collision_protection_vol(x, z, d_low_x, d_low_z, lower_z, angle)\n",
+    "berm_width = get_berm_width(z, d_low_x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-05T02:45:54.224110Z",
+     "start_time": "2018-12-05T02:45:54.030142Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from datetime import timedelta\n",
+    "\n",
+    "def wl_time(t, wl, z_lower, z_upper):\n",
+    "    \"\"\"\n",
+    "    Returns the amount of time the water level is between two elevations.\n",
+    "    \"\"\"\n",
+    "    df_wl = pd.DataFrame.from_records([(t_val, R2_val) for t_val, R2_val in zip(t,R2)], columns=['datetime','wl'])\n",
+    "    df_wl.set_index(pd.DatetimeIndex(df_wl['datetime']),inplace=True)\n",
+    "    df_wl.drop(columns=['datetime'], inplace=True)\n",
+    "    \n",
+    "    # Assumes that each record is one hour... probably need to check this\n",
+    "    hours = len(df_wl.query('{} < wl < {}'.format(z_lower, z_upper)))\n",
+    "    return timedelta(hours=hours)\n",
+    "\n",
+    "def wave_power(t, wl, z_lower, z_upper, Hs0, Tp):\n",
+    "    \"\"\"\n",
+    "    Returns the cumulative wave power when the water level is between two elevations.\n",
+    "    \"\"\"\n",
+    "    df_wl = pd.DataFrame.from_records([(t_val, R2_val,Hs0_val,Tp_val) for t_val, R2_val,Hs0_val,Tp_val in zip(t,R2,Hs0,Tp)], columns=['datetime','wl', 'Hs0','Tp'])\n",
+    "    df_wl.set_index(pd.DatetimeIndex(df_wl['datetime']),inplace=True)\n",
+    "    df_wl.drop(columns=['datetime'], inplace=True)\n",
+    "    \n",
+    "    # Assumes that each record is one hour... probably need to check this\n",
+    "    rho = 1025 # kg/m3\n",
+    "    g = 9.8 # m/s2\n",
+    "    df_wl_times = df_wl.query('{} < wl < {}'.format(z_lower, z_upper))\n",
+    "    power = rho * g ** 2 / 64 / np.pi * df_wl_times.Hs0 ** 2 * df_wl_times.Tp\n",
+    "    return power.sum()\n",
+    "\n",
+    "df_twl = twls['forecasted']['mean_slope_sto06']\n",
+    "df_twl_site = df_twl.query(\"site_id == '{}'\".format(site_id))\n",
+    "\n",
+    "R2 = df_twl_site.R2.tolist()\n",
+    "t = df_twl_site.index.get_level_values('datetime')\n",
+    "z_lower = 0.5\n",
+    "z_upper = d_low_z\n",
+    "\n",
+    "exposed_time = wl_time(t, R2, z_lower,z_upper)\n",
+    "\n",
+    "Hs0 = df_twl.Hs0.tolist()\n",
+    "Tp = df_twl.Tp.tolist()\n",
+    "wave_p = wave_power(t, R2, z_lower,z_upper,Hs0, Tp)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-05T02:45:54.231129Z",
+     "start_time": "2018-12-05T02:45:54.225660Z"
+    }
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-05T03:37:45.472885Z",
+     "start_time": "2018-12-05T03:37:45.462857Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.96"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def dune_toe_elevation_change(site_id, df_profile_features):\n",
+    "    query = \"site_id == '{}'\".format(site_id)\n",
+    "    profile_features = df_profile_features.query(query)\n",
+    "    prestorm_dune_toe_z = profile_features.query(\"profile_type=='prestorm'\").dune_toe_z.tolist()[0]\n",
+    "    poststorm_dune_toe_z = profile_features.query(\"profile_type=='poststorm'\").dune_toe_z.tolist()[0]\n",
+    "    return prestorm_dune_toe_z - poststorm_dune_toe_z\n",
+    "\n",
+    "toe_ele_change = dune_toe_elevation_change(\"MANNING0081\", df_profile_features)\n",
+    "toe_ele_change"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-05T03:45:45.203827Z",
+     "start_time": "2018-12-05T03:45:13.608478Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0 of 816\n",
+      "20 of 816\n",
+      "40 of 816\n",
+      "60 of 816\n",
+      "80 of 816\n",
+      "100 of 816\n"
+     ]
+    }
+   ],
+   "source": [
+    "vols = []\n",
+    "exposed_times = []\n",
+    "toe_ele_changes = []\n",
+    "wave_powers = []\n",
+    "berm_widths = []\n",
+    "swash_vol_changes = []\n",
+    "dune_face_vol_changes = []\n",
+    "site_ids_to_plot = []\n",
+    "\n",
+    "# Get site ids where we observed collision\n",
+    "observed_site_ids = impacts['observed'].query(\"storm_regime=='collision'\").index.get_level_values('site_id').unique()\n",
+    "\n",
+    "# # Get site ids where we forecast swash\n",
+    "# forecasted_site_ids = impacts['forecasted']['mean_slope_sto06'].query(\"storm_regime=='swash'\").index.get_level_values('site_id').unique()\n",
+    "\n",
+    "# site_ids = set(observed_site_ids).intersection(set(forecasted_site_ids))\n",
+    "\n",
+    "site_ids = observed_site_ids\n",
+    "\n",
+    "# Calculate for each site\n",
+    "\n",
+    "for n, site_id in enumerate(site_ids):\n",
+    "    \n",
+    "    if n%20 ==0:\n",
+    "        print('{} of {}'.format(n, len(site_ids)))\n",
+    "    \n",
+    "    try:\n",
+    "        query = \"site_id == '{}' and profile_type == '{}'\".format(site_id, 'prestorm')\n",
+    "        prestorm_profile = df_profiles.query(query)\n",
+    "        profile_features = df_profile_features.query(query)\n",
+    "\n",
+    "        vol = collision_protection_vol(x = prestorm_profile.index.get_level_values('x'),\n",
+    "                                       z = prestorm_profile.z,\n",
+    "                                       d_low_x = profile_features.dune_toe_x.tolist()[0],\n",
+    "                                       d_low_z = profile_features.dune_toe_z.tolist()[0],\n",
+    "                                       lower_z = profile_features.dune_toe_z.tolist()[0] - 2, # from mhw\n",
+    "                                       angle = 60,  # degrees from the horizontal\n",
+    "                                      )\n",
+    "        \n",
+    "        df_twl = twls['forecasted']['mean_slope_sto06']\n",
+    "        df_twl_site = df_twl.query(\"site_id == '{}'\".format(site_id))\n",
+    "        \n",
+    "        berm_width = get_berm_width(z = prestorm_profile.z,\n",
+    "                                    d_low_x = profile_features.dune_toe_x.tolist()[0]) \n",
+    "        \n",
+    "        exposed_time = wl_time(t = df_twl_site.index.get_level_values('datetime'),\n",
+    "                               wl = df_twl_site.R2.tolist(),\n",
+    "                               z_lower = profile_features.dune_toe_z.tolist()[0] -2,\n",
+    "                               z_upper = profile_features.dune_toe_z.tolist()[0],\n",
+    "                              )\n",
+    "        swash_vol_change = impacts['observed'].query(\"site_id == '{}'\".format(site_id)).swash_vol_change.tolist()[0]\n",
+    "        dune_face_vol_change = impacts['observed'].query(\"site_id == '{}'\".format(site_id)).dune_face_vol_change.tolist()[0]\n",
+    "        \n",
+    "        power = wave_power(t = df_twl_site.index.get_level_values('datetime'),\n",
+    "                           wl = df_twl_site.R2.tolist(),\n",
+    "                           z_lower = profile_features.dune_toe_z.tolist()[0] -2,\n",
+    "                           z_upper = profile_features.dune_toe_z.tolist()[0],\n",
+    "                           Hs0=df_twl_site.Hs0.tolist(),\n",
+    "                           Tp=df_twl_site.Tp.tolist())\n",
+    "        \n",
+    "        toe_ele_change = dune_toe_elevation_change(site_id, df_profile_features)\n",
+    "    except:\n",
+    "        continue\n",
+    "    \n",
+    "#     print(site_id, toe_ele_change)\n",
+    "    vols.append(vol)\n",
+    "    exposed_times.append(exposed_time)\n",
+    "    toe_ele_changes.append(toe_ele_change)\n",
+    "    wave_powers.append(power)\n",
+    "    berm_widths.append(berm_width)\n",
+    "    swash_vol_changes.append(swash_vol_change)\n",
+    "    dune_face_vol_changes.append(dune_face_vol_change)\n",
+    "    site_ids_to_plot.append(site_id)\n",
+    "    \n",
+    "    if n>100:\n",
+    "        break\n",
+    "\n",
+    "    \n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-03T03:12:11.598150Z",
+     "start_time": "2018-12-03T03:12:11.590128Z"
+    }
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-05T05:03:39.147413Z",
+     "start_time": "2018-12-05T05:03:39.070207Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "225855bac0d0464d9be74917812c19ac",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FigureWidget({\n",
+       "    'data': [{'marker': {'size': 4},\n",
+       "              'mode': 'markers',\n",
+       "              'text': [-0…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "trace1 = go.Scatter(\n",
+    "    x=berm_widths,\n",
+    "    y=dune_face_vol_changes,\n",
+    "    text = ['{}<br>{}'.format(ele, site_id) for ele,site_id in zip(toe_ele_changes,site_ids_to_plot)],\n",
+    "    mode='markers',\n",
+    "    marker=dict(\n",
+    "        size=4,\n",
+    "#         color = [-1 if x<0 else 1 for x in toe_ele_changes],\n",
+    "#         color =  toe_ele_changes,\n",
+    "#         color = dune_face_vol_changes,\n",
+    "#         color = [x.total_seconds() / 60 / 60 for x in exposed_times],\n",
+    "#         colorscale='Viridis',\n",
+    "#         showscale=True\n",
+    "    ))\n",
+    "\n",
+    "layout = go.Layout(\n",
+    "    title='Dune Collision Protection',\n",
+    "#     height=300,\n",
+    "#     legend=dict(font={'size': 10}),\n",
+    "#     margin=dict(t=50, b=50, l=50, r=20),\n",
+    "    xaxis=dict(\n",
+    "        title='Berm width',\n",
+    "        autorange=True,\n",
+    "        showgrid=True,\n",
+    "        zeroline=True,\n",
+    "        showline=True,\n",
+    "    ),\n",
+    "    yaxis=dict(\n",
+    "        title='Dune face vol change',\n",
+    "        autorange=True,\n",
+    "        showgrid=True,\n",
+    "        zeroline=True,\n",
+    "        showline=True,\n",
+    "    ))\n",
+    "\n",
+    "g_plot = go.FigureWidget(data=[trace1], layout=layout)\n",
+    "g_plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-05T03:15:46.517975Z",
+     "start_time": "2018-12-05T03:15:46.512936Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[64.5799,\n",
+       " 21.0163,\n",
+       " 38.106,\n",
+       " 28.101,\n",
+       " 58.7247,\n",
+       " 33.5534,\n",
+       " 71.1675,\n",
+       " 52.6043,\n",
+       " 50.5765,\n",
+       " 39.9074,\n",
+       " 67.8385,\n",
+       " 43.9043,\n",
+       " 39.8181,\n",
+       " 37.7153,\n",
+       " 20.4454,\n",
+       " 39.7757,\n",
+       " 42.1843,\n",
+       " 33.6152,\n",
+       " 42.9587,\n",
+       " 39.9773,\n",
+       " 35.7835,\n",
+       " 31.2884,\n",
+       " -0.4618,\n",
+       " 31.0094,\n",
+       " 33.3479,\n",
+       " 47.8394,\n",
+       " 32.3566,\n",
+       " 36.5205,\n",
+       " 45.7109,\n",
+       " 16.0687,\n",
+       " 35.4375,\n",
+       " 43.327,\n",
+       " 53.5016,\n",
+       " 31.0357,\n",
+       " 47.6528,\n",
+       " 25.5658,\n",
+       " 41.0514,\n",
+       " 28.1645,\n",
+       " 44.5443,\n",
+       " 42.925,\n",
+       " 33.9535,\n",
+       " 36.2626,\n",
+       " 35.2536]"
+      ]
+     },
+     "execution_count": 51,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# impacts['observed']\n",
+    "swash_vol_changes"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.7"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/03_dune_to_vs_runup.ipynb
+++ b/notebooks/03_dune_to_vs_runup.ipynb
--- a/notebooks/04_profile_picker.ipynb
+++ b/notebooks/04_profile_picker.ipynb
--- a/notebooks/qgis.qgz
+++ b/notebooks/qgis.qgz
--- a/src/init.py
+++ b/src/init.py
--- a/src/analysis/init.py
+++ b/src/analysis/init.py
--- a/src/analysis/analysis.py
+++ b/src/analysis/analysis.py
@ -1,13 +0,0 @@
-import pandas as pd
-import os
-
-def main():
-
-    data_folder = './data/interim'
-    df_waves = pd.read_csv(os.path.join(data_folder, 'waves.csv'), index_col=[0,1])
-    df_tides = pd.read_csv(os.path.join(data_folder, 'tides.csv'), index_col=[0,1])
-    df_profiles = pd.read_csv(os.path.join(data_folder, 'profiles.csv'), index_col=[0,1,2])
-    df_sites = pd.read_csv(os.path.join(data_folder, 'sites.csv'),index_col=[0])
-
-if __name__ == '__main__':
-    main()
--- a/src/analysis/compare_impacts.py
+++ b/src/analysis/compare_impacts.py
@ -2,13 +2,13 @@
 Compares forecasted and observed impacts, putting them into one data frame and exporting the results.
 """

-import logging.config
 import os

 import pandas as pd

-logging.config.fileConfig('./src/logging.conf', disable_existing_loggers=False)
-logger = logging.getLogger(__name__)
+from utils import setup_logging
+
+logger = setup_logging()


 def compare_impacts(df_forecasted, df_observed):
@ -18,15 +18,16 @@ def compare_impacts(df_forecasted, df_observed):
    :param df_observed:
    :return:
    """
-    df_compared = df_forecasted.merge(df_observed, left_index=True, right_index=True,
-                                      suffixes=['_forecasted', '_observed'])
+    df_compared = df_forecasted.merge(
+        df_observed, left_index=True, right_index=True, suffixes=["_forecasted", "_observed"]
+    )
    return df_compared


-if __name__ == '__main__':
-    logger.info('Importing existing data')
-    data_folder = './data/interim'
-    df_forecasted = pd.read_csv(os.path.join(data_folder, 'impacts_forecasted_mean_slope_sto06.csv'), index_col=[0])
-    df_observed = pd.read_csv(os.path.join(data_folder, 'impacts_observed.csv'), index_col=[0])
+if __name__ == "__main__":
+    logger.info("Importing existing data")
+    data_folder = "./data/interim"
+    df_forecasted = pd.read_csv(os.path.join(data_folder, "impacts_forecasted_mean_slope_sto06.csv"), index_col=[0])
+    df_observed = pd.read_csv(os.path.join(data_folder, "impacts_observed.csv"), index_col=[0])
    df_compared = compare_impacts(df_forecasted, df_observed)
-    df_compared.to_csv(os.path.join(data_folder, 'impacts_observed_vs_forecasted_mean_slope_sto06.csv'))
+    df_compared.to_csv(os.path.join(data_folder, "impacts_observed_vs_forecasted_mean_slope_sto06.csv"))
--- a/src/analysis/forecast_twl.py
+++ b/src/analysis/forecast_twl.py
@ -1,67 +1,83 @@
-import logging.config
 import os
 from multiprocessing import Pool

+import click
 import numpy as np
 import numpy.ma as ma
 import pandas as pd
 from scipy import stats

-from src.analysis.runup_models import sto06_individual, sto06
+from analysis import runup_models
+from utils import setup_logging

-logging.config.fileConfig('./src/logging.conf', disable_existing_loggers=False)
-logger = logging.getLogger(__name__)
+logger = setup_logging()

+MULTIPROCESS_THREADS = int(os.environ.get("MULTIPROCESS_THREADS", 4))

-def forecast_twl(df_tides, df_profiles, df_waves, df_profile_features, runup_function, n_processes=4,
-                 slope='foreshore'):
+
+def forecast_twl(
+    df_tides,
+    df_profiles,
+    df_waves,
+    df_profile_features,
+    runup_function,
+    n_processes=MULTIPROCESS_THREADS,
+    slope="foreshore",
+    profile_type="prestorm",
+):
    # Use df_waves as a base
    df_twl = df_waves.copy()

    # Merge tides
-    logger.info('Merging tides')
+    logger.info("Merging tides")
    df_twl = df_twl.merge(df_tides, left_index=True, right_index=True)

    # Estimate foreshore slope. Do the analysis per site_id. This is so we only have to query the x and z
    # cross-section profiles once per site.
-    logger.info('Calculating beach slopes')
-    site_ids = df_twl.index.get_level_values('site_id').unique()
-    # site_ids = [x for x in site_ids if 'NARRA' in x]  # todo remove this - for testing narrabeen only
+    logger.info("Calculating beach slopes")
+    site_ids = df_twl.index.get_level_values("site_id").unique()

-    if slope == 'foreshore':
+    if slope == "foreshore":
        # Process each site_id with a different process and combine results at the end
        with Pool(processes=n_processes) as pool:
-            results = pool.starmap(foreshore_slope_for_site_id,
-                                   [(site_id, df_twl, df_profiles) for site_id in site_ids])
-        df_twl['beta'] = pd.concat(results)
-
-    elif slope == 'mean':
-        # todo mean beach profile
-        df_temp = df_twl.join(df_profile_features, how='inner')
-        df_temp['mhw'] = 0.5
+            results = pool.starmap(
+                foreshore_slope_for_site_id, [(site_id, df_twl, df_profiles) for site_id in site_ids]
+            )
+        df_twl["beta"] = pd.concat(results)
+
+    elif slope == "mean":
+        df_temp = df_twl.join(
+            df_profile_features.query("profile_type=='{}'".format(profile_type)).reset_index(level="profile_type"),
+            how="inner",
+        )
+        df_temp["mhw"] = 0.5
        with Pool(processes=n_processes) as pool:
-            results = pool.starmap(mean_slope_for_site_id,
-                                   [(site_id, df_temp, df_profiles, 'dune_toe_z', 'mhw') for site_id in site_ids])
-        df_twl['beta'] = pd.concat(results)
+            results = pool.starmap(
+                mean_slope_for_site_id,
+                [(site_id, df_temp, df_profiles, "dune_toe_z", "dune_toe_x", "mhw") for site_id in site_ids],
+            )
+        df_twl["beta"] = pd.concat(results)

    # Estimate runup
-    R2, setup, S_total, S_inc, S_ig = runup_function(df_twl, Hs0_col='Hs0', Tp_col='Tp', beta_col='beta')
+    R2, setup, S_total, S_inc, S_ig = runup_function(Hs0=df_twl['Hs0'].tolist(), Tp=df_twl["Tp"].tolist(), beta=df_twl["beta"].tolist())

-    df_twl['R2'] = R2
-    df_twl['setup'] = setup
-    df_twl['S_total'] = S_total
+    df_twl["R2"] = R2
+    df_twl["setup"] = setup
+    df_twl["S_total"] = S_total

    # Estimate TWL
-    df_twl['R_high'] = df_twl['tide'] + df_twl['R2']
-    df_twl['R_low'] = df_twl['tide'] + 1.1 * df_twl['setup'] - 1.1 / 2 * df_twl['S_total']
+    df_twl["R_high"] = df_twl["tide"] + df_twl["R2"]
+    df_twl["R_low"] = df_twl["tide"] + 1.1 * df_twl["setup"] - 1.1 / 2 * df_twl["S_total"]

    # Drop unneeded columns
-    df_twl.drop(columns=['E', 'Exs', 'P', 'Pxs', 'dir'], inplace=True, errors='ignore')
+    # df_twl.drop(columns=["E", "Exs", "P", "Pxs", "dir"], inplace=True, errors="ignore")

    return df_twl


-def mean_slope_for_site_id(site_id, df_twl, df_profiles, top_elevation_col, btm_elevation_col):
+def mean_slope_for_site_id(
+    site_id, df_twl, df_profiles, top_elevation_col, top_x_col, btm_elevation_col, profile_type="prestorm"
+):
    """
    Calculates the foreshore slope values a given site_id. Returns a series (with same indicies as df_twl) of
    foreshore slopes. This function is used to parallelize getting foreshore slopes as it is computationally
@ -73,16 +89,23 @@ def mean_slope_for_site_id(site_id, df_twl, df_profiles, top_elevation_col, btm_
    """

    # Get the prestorm beach profile
-    profile = df_profiles.query("site_id =='{}' and profile_type == 'prestorm'".format(site_id))
-    profile_x = profile.index.get_level_values('x').tolist()
+    profile = df_profiles.query("site_id =='{}' and profile_type == '{}'".format(site_id, profile_type))
+    profile_x = profile.index.get_level_values("x").tolist()
    profile_z = profile.z.tolist()

    df_twl_site = df_twl.query("site_id == '{}'".format(site_id))

-    df_beta = df_twl_site.apply(lambda row: slope_from_profile(profile_x=profile_x, profile_z=profile_z,
-                                                               top_elevation=row[top_elevation_col],
-                                                               btm_elevation=row[btm_elevation_col],
-                                                               method='end_points'), axis=1)
+    df_beta = df_twl_site.apply(
+        lambda row: slope_from_profile(
+            profile_x=profile_x,
+            profile_z=profile_z,
+            top_elevation=row[top_elevation_col],
+            btm_elevation=row[btm_elevation_col],
+            method="end_points",
+            top_x=row[top_x_col],
+        ),
+        axis=1,
+    )
    return df_beta


@ -99,16 +122,22 @@ def foreshore_slope_for_site_id(site_id, df_twl, df_profiles):

    # Get the prestorm beach profile
    profile = df_profiles.query("site_id =='{}' and profile_type == 'prestorm'".format(site_id))
-    profile_x = profile.index.get_level_values('x').tolist()
+    profile_x = profile.index.get_level_values("x").tolist()
    profile_z = profile.z.tolist()

    df_twl_site = df_twl.query("site_id == '{}'".format(site_id))

-    df_beta = df_twl_site.apply(lambda row: foreshore_slope_from_profile(profile_x=profile_x, profile_z=profile_z,
-                                                                         tide=row.tide,
-                                                                         runup_function=sto06_individual,
-                                                                         Hs0=row.Hs0,
-                                                                         Tp=row.Tp), axis=1)
+    df_beta = df_twl_site.apply(
+        lambda row: foreshore_slope_from_profile(
+            profile_x=profile_x,
+            profile_z=profile_z,
+            tide=row.tide,
+            runup_function=runup_models.sto06,
+            Hs0=row.Hs0,
+            Tp=row.Tp,
+        ),
+        axis=1,
+    )
    return df_beta


@ -130,16 +159,22 @@ def foreshore_slope_from_profile(profile_x, profile_z, tide, runup_function, **k
        return None

    # Initalize estimates
-    max_number_iterations = 20
+    max_number_iterations = 30
    iteration_count = 0
-    min_accuracy = 0.001
+    averaged_accuracy = 0.03  # if slopes within this amount, average after max number of iterations
+    acceptable_accuracy = 0.01  # if slopes within this amount, accept after max number of iterations
+    preferred_accuracy = 0.001  # if slopes within this amount, accept
    beta = 0.05

    while True:
        R2, setup, S_total, _, _ = runup_function(beta=beta, **kwargs)
-        beta_new = slope_from_profile(profile_x=profile_x, profile_z=profile_z, method='end_points',
-                                      top_elevation=tide + setup + S_total / 2,
-                                      btm_elevation=tide + setup - S_total / 2)
+        beta_new = slope_from_profile(
+            profile_x=profile_x,
+            profile_z=profile_z,
+            method="end_points",
+            top_elevation=tide + setup + S_total / 2,
+            btm_elevation=tide + setup - S_total / 2,
+        )

        # Return None if we can't find a slope, usually because the elevations we've specified are above/below our
        # profile x and z coordinates.
@ -147,18 +182,23 @@ def foreshore_slope_from_profile(profile_x, profile_z, tide, runup_function, **k
            return None

        # If slopes do not change much between interactions, return the slope
-        if abs(beta_new - beta) < min_accuracy:
+        if abs(beta_new - beta) < preferred_accuracy:
            return beta

        # If we can't converge a solution, return None
        if iteration_count > max_number_iterations:
-            return None
+            if abs(beta_new - beta) < acceptable_accuracy:
+                return beta
+            elif abs(beta_new - beta) < averaged_accuracy:
+                return (beta_new + beta) / 2
+            else:
+                return None

        beta = beta_new
        iteration_count += 1


-def slope_from_profile(profile_x, profile_z, top_elevation, btm_elevation, method='end_points'):
+def slope_from_profile(profile_x, profile_z, top_elevation, btm_elevation, method="end_points", top_x=None, btm_x=None):
    """
    Returns a slope (beta) from a bed profile, given the top and bottom elevations of where the slope should be taken.
    :param x: List of x bed profile coordinates
@ -166,6 +206,9 @@ def slope_from_profile(profile_x, profile_z, top_elevation, btm_elevation, metho
    :param top_elevation: Top elevation of where to take the slope
    :param btm_elevation: Bottom elevation of where to take the slope
    :param method: Method used to calculate slope (end_points or least_squares)
+    :param top_x: x-coordinate of the top end point. May be needed, as there may be multiple crossings of the
+    top_elevation.
+    :param btm_x: x-coordinate of the bottom end point
    :return:
    """

@ -173,16 +216,19 @@ def slope_from_profile(profile_x, profile_z, top_elevation, btm_elevation, metho
    if any([x is None for x in [profile_x, profile_z, top_elevation, btm_elevation]]):
        return None

-    end_points = {
-        'top': {
-            'z': top_elevation,
-        },
-        'btm': {
-            'z': btm_elevation,
-        }}
+    end_points = {"top": {"z": top_elevation}, "btm": {"z": btm_elevation}}

    for end_type in end_points.keys():
-        elevation = end_points[end_type]['z']
+
+        # Add x coordinates if they are specified
+        if top_x and end_type == "top":
+            end_points["top"]["x"] = top_x
+            continue
+        if btm_x and end_type == "top":
+            end_points["btm"]["x"] = btm_x
+            continue
+
+        elevation = end_points[end_type]["z"]
        intersection_x = crossings(profile_x, profile_z, elevation)

        # No intersections found
@ -191,26 +237,31 @@ def slope_from_profile(profile_x, profile_z, top_elevation, btm_elevation, metho

        # One intersection
        elif len(intersection_x) == 1:
-            end_points[end_type]['x'] = intersection_x[0]
+            end_points[end_type]["x"] = intersection_x[0]

        # More than on intersection
        else:
-            if end_type == 'top':
+            if end_type == "top":
                # For top elevation, take most seaward intersection
-                end_points[end_type]['x'] = intersection_x[-1]
+                end_points[end_type]["x"] = intersection_x[-1]
            else:
                # For bottom elevation, take most landward intersection that is seaward of top elevation
-                end_points[end_type]['x'] = [x for x in intersection_x if x > end_points['top']['x']][0]
-
-    if method == 'end_points':
-        x_top = end_points['top']['x']
-        x_btm = end_points['btm']['x']
-        z_top = end_points['top']['z']
-        z_btm = end_points['btm']['z']
+                end_point_btm = [x for x in intersection_x if x > end_points["top"]["x"]]
+                if len(end_point_btm) == 0:
+                    # If there doesn't seem to be an intersection seaward of the top elevation, return none.
+                    return None
+                else:
+                    end_points[end_type]["x"] = end_point_btm[0]
+
+    if method == "end_points":
+        x_top = end_points["top"]["x"]
+        x_btm = end_points["btm"]["x"]
+        z_top = end_points["top"]["z"]
+        z_btm = end_points["btm"]["z"]
        return -(z_top - z_btm) / (x_top - x_btm)

-    elif method == 'least_squares':
-        profile_mask = [True if end_points['top']['x'] < pts < end_points['btm']['x'] else False for pts in x]
+    elif method == "least_squares":
+        profile_mask = [True if end_points["top"]["x"] < pts < end_points["btm"]["x"] else False for pts in profile_x]
        slope_x = np.array(profile_x)[profile_mask].tolist()
        slope_z = np.array(profile_z)[profile_mask].tolist()
        slope, _, _, _, _ = stats.linregress(slope_x, slope_z)
@ -239,29 +290,43 @@ def crossings(profile_x, profile_z, constant_z):
    z = np.subtract(profile_z, constant_z)

    # Find all indices right before any crossing.
+    # TODO Sometimes this can give a runtime warning https://stackoverflow.com/a/36489085
    indicies = np.where(z[:-1] * z[1:] < 0)[0]

    # Use linear interpolation to find intersample crossings.
    return [profile_x[i] - (profile_x[i] - profile_x[i + 1]) / (z[i] - z[i + 1]) * (z[i]) for i in indicies]


-if __name__ == '__main__':
-    logger.info('Importing data')
-    data_folder = './data/interim'
-    df_waves = pd.read_csv(os.path.join(data_folder, 'waves.csv'), index_col=[0, 1])
-    df_tides = pd.read_csv(os.path.join(data_folder, 'tides.csv'), index_col=[0, 1])
-    df_profiles = pd.read_csv(os.path.join(data_folder, 'profiles.csv'), index_col=[0, 1, 2])
-    df_sites = pd.read_csv(os.path.join(data_folder, 'sites.csv'), index_col=[0])
-    df_profile_features = pd.read_csv(os.path.join(data_folder, 'profile_features.csv'), index_col=[0])
-
-    logger.info('Forecasting TWL')
-
-    df_twl_foreshore_slope_sto06 = forecast_twl(df_tides, df_profiles, df_waves, df_profile_features,
-                                                runup_function=sto06, slope='foreshore')
-    df_twl_foreshore_slope_sto06.to_csv(os.path.join(data_folder, 'twl_foreshore_slope_sto06.csv'))
-
-    df_twl_mean_slope_sto06 = forecast_twl(df_tides, df_profiles, df_waves, df_profile_features,
-                                           runup_function=sto06, slope='mean')
-    df_twl_mean_slope_sto06.to_csv(os.path.join(data_folder, 'twl_mean_slope_sto06.csv'))
-
-    logger.info('Done')
+@click.command()
+@click.option("--waves-csv", required=True, help="")
+@click.option("--tides-csv", required=True, help="")
+@click.option("--profiles-csv", required=True, help="")
+@click.option("--profile-features-csv", required=True, help="")
+@click.option("--runup-function", required=True, help="", type=click.Choice(["sto06"]))
+@click.option("--slope", required=True, help="", type=click.Choice(["foreshore", "mean"]))
+@click.option("--profile-type", required=True, help="", type=click.Choice(["prestorm", "poststorm"]))
+@click.option("--output-file", required=True, help="")
+def create_twl_forecast(
+    waves_csv, tides_csv, profiles_csv, profile_features_csv, runup_function, slope, profile_type, output_file
+):
+    logger.info("Creating forecast of total water levels")
+    logger.info("Importing data")
+    df_waves = pd.read_csv(waves_csv, index_col=[0, 1])
+    df_tides = pd.read_csv(tides_csv, index_col=[0, 1])
+    df_profiles = pd.read_csv(profiles_csv, index_col=[0, 1, 2])
+    df_profile_features = pd.read_csv(profile_features_csv, index_col=[0, 1])
+
+    logger.info("Forecasting TWL")
+    df_twl = forecast_twl(
+        df_tides,
+        df_profiles,
+        df_waves,
+        df_profile_features,
+        runup_function=getattr(runup_models, runup_function),
+        slope=slope,
+        profile_type=profile_type,
+    )
+
+    df_twl.to_csv(output_file)
+    logger.info("Saved to %s", output_file)
+    logger.info("Done!")
--- a/src/analysis/forecasted_storm_impacts.py
+++ b/src/analysis/forecasted_storm_impacts.py
@ -2,13 +2,12 @@
 Estimates the forecasted storm impacts based on the forecasted water level and dune crest/toe.
 """

-import logging.config
-import os
-
+import click
 import pandas as pd

-logging.config.fileConfig('./src/logging.conf', disable_existing_loggers=False)
-logger = logging.getLogger(__name__)
+from utils import setup_logging
+
+logger = setup_logging()


 def forecasted_impacts(df_profile_features, df_forecasted_twl):
@ -19,20 +18,24 @@ def forecasted_impacts(df_profile_features, df_forecasted_twl):
    :param df_forecasted_twl:
    :return:
    """
-    logger.info('Getting forecasted storm regimes')
+    logger.info("Getting forecasted storm impacts")

-    df_forecasted_impacts = pd.DataFrame(index=df_profile_features.index)
+    df_forecasted_impacts = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique())

    # For each site, find the maximum R_high value and the corresponding R_low value.
-    idx = df_forecasted_twl.groupby(level=['site_id'])['R_high'].idxmax().dropna()
-    df_r_vals = df_forecasted_twl.loc[idx, ['R_high', 'R_low']].reset_index(['datetime'])
-    df_forecasted_impacts = df_forecasted_impacts.merge(df_r_vals, how='left', left_index=True, right_index=True)
+    idx = df_forecasted_twl.groupby(level=["site_id"])["R_high"].idxmax().dropna()
+    df_r_vals = df_forecasted_twl.loc[idx, ["R_high", "R_low"]].reset_index(["datetime"])
+    df_forecasted_impacts = df_forecasted_impacts.merge(df_r_vals, how="left", left_index=True, right_index=True)

    # Join with df_profile features to find dune toe and crest elevations
-    df_forecasted_impacts = df_forecasted_impacts.merge(df_profile_features[['dune_toe_z', 'dune_crest_z']],
-                                                        how='left',
-                                                        left_index=True,
-                                                        right_index=True)
+    df_forecasted_impacts = df_forecasted_impacts.merge(
+        df_profile_features.query("profile_type=='prestorm'").reset_index("profile_type")[
+            ["dune_toe_z", "dune_crest_z"]
+        ],
+        how="left",
+        left_on=["site_id"],
+        right_on=["site_id"],
+    )

    # Compare R_high and R_low wirth dune crest and toe elevations
    df_forecasted_impacts = storm_regime(df_forecasted_impacts)
@ -47,27 +50,73 @@ def storm_regime(df_forecasted_impacts):
    :param df_forecasted_impacts:
    :return:
    """
-    logger.info('Getting forecasted storm regimes')
+    logger.info("Getting forecasted storm regimes")
+    df_forecasted_impacts.loc[
+        df_forecasted_impacts.R_high <= df_forecasted_impacts.dune_toe_z, "storm_regime"
+    ] = "swash"
    df_forecasted_impacts.loc[
-        df_forecasted_impacts.R_high <= df_forecasted_impacts.dune_toe_z, 'storm_regime'] = 'swash'
+        df_forecasted_impacts.dune_toe_z <= df_forecasted_impacts.R_high, "storm_regime"
+    ] = "collision"
    df_forecasted_impacts.loc[
-        df_forecasted_impacts.dune_toe_z <= df_forecasted_impacts.R_high, 'storm_regime'] = 'collision'
-    df_forecasted_impacts.loc[(df_forecasted_impacts.dune_crest_z <= df_forecasted_impacts.R_high) &
-                              (df_forecasted_impacts.R_low <= df_forecasted_impacts.dune_crest_z),
-                              'storm_regime'] = 'overwash'
-    df_forecasted_impacts.loc[(df_forecasted_impacts.dune_crest_z <= df_forecasted_impacts.R_low) &
-                              (df_forecasted_impacts.dune_crest_z <= df_forecasted_impacts.R_high),
-                              'storm_regime'] = 'inundation'
+        (df_forecasted_impacts.dune_crest_z <= df_forecasted_impacts.R_high)
+        & (df_forecasted_impacts.R_low <= df_forecasted_impacts.dune_crest_z),
+        "storm_regime",
+    ] = "overwash"
+    df_forecasted_impacts.loc[
+        (df_forecasted_impacts.dune_crest_z <= df_forecasted_impacts.R_low)
+        & (df_forecasted_impacts.dune_crest_z <= df_forecasted_impacts.R_high),
+        "storm_regime",
+    ] = "inundation"

    return df_forecasted_impacts


-if __name__ == '__main__':
-    logger.info('Importing existing data')
-    data_folder = './data/interim'
-    df_profiles = pd.read_csv(os.path.join(data_folder, 'profiles.csv'), index_col=[0, 1, 2])
-    df_profile_features = pd.read_csv(os.path.join(data_folder, 'profile_features.csv'), index_col=[0])
-    df_forecasted_twl = pd.read_csv(os.path.join(data_folder, 'twl_mean_slope_sto06.csv'), index_col=[0, 1])
+def twl_exceedence_time(df_profile_features, df_forecasted_twl, z_twl_col="R_high", z_exceedence_col="dune_toe_z"):
+    """
+    Returns a dataframe of number of hours the twl exceeded a certain z elevation.
+    May need to use this https://stackoverflow.com/a/53656968 if datetimes are not consistent.
+    :param df_profile_features:
+    :param df_forecasted_twl:
+    :param z_twl_col:
+    :param z_exceedence_col:
+    :return:
+    """
+    logger.info("Getting twl exceedence time")
+
+    # Get a dataframe of prestorm dune toes organised by site_id
+    df_dune_toes = (
+        df_profile_features.query('profile_type=="prestorm"').reset_index("profile_type")[z_exceedence_col].to_frame()
+    )
+
+    # Merge dune toes into site_id
+    df_merged = df_forecasted_twl.merge(df_dune_toes, left_on=["site_id"], right_on=["site_id"])
+
+    # Return the sum of hours that twl exceeded the level
+    return (
+        (df_merged[z_twl_col] >= df_merged[z_exceedence_col])
+        .groupby("site_id")
+        .sum()
+        .rename("twl_{}_exceedance_hrs".format(z_exceedence_col))
+        .to_frame()
+    )
+
+
+@click.command()
+@click.option("--profile-features-csv", required=True, help="")
+@click.option("--forecasted-twl-csv", required=True, help="")
+@click.option("--output-file", required=True, help="")
+def create_forecasted_impacts(profile_features_csv, forecasted_twl_csv, output_file):
+    logger.info("Creating observed wave impacts")
+    logger.info("Importing existing data")
+    df_profile_features = pd.read_csv(profile_features_csv, index_col=[0, 1])
+    df_forecasted_twl = pd.read_csv(forecasted_twl_csv, index_col=[0, 1])

    df_forecasted_impacts = forecasted_impacts(df_profile_features, df_forecasted_twl)
-    df_forecasted_impacts.to_csv(os.path.join(data_folder, 'impacts_forecasted_mean_slope_sto06.csv'))
+
+    df_forecasted_impacts = df_forecasted_impacts.merge(
+        twl_exceedence_time(df_profile_features, df_forecasted_twl), left_on=["site_id"], right_on=["site_id"]
+    )
+
+    df_forecasted_impacts.to_csv(output_file)
+    logger.info("Saved to %s", output_file)
+    logger.info("Done!")
--- a/src/analysis/observed_storm_impacts.py
+++ b/src/analysis/observed_storm_impacts.py
@ -1,12 +1,11 @@
-import logging.config
-import os
-
+import click
 import numpy as np
 import pandas as pd
 from scipy.integrate import simps

-logging.config.fileConfig('./src/logging.conf', disable_existing_loggers=False)
-logger = logging.getLogger(__name__)
+from utils import setup_logging
+
+logger = setup_logging()


 def return_first_or_nan(l):
@ -29,16 +28,17 @@ def volume_change(df_profiles, df_profile_features, zone):
    :param zone: Either 'swash' or 'dune_face'
    :return:
    """
-    logger.info('Calculating change in beach volume in {} zone'.format(zone))
+    logger.info("Calculating change in beach volume in {} zone".format(zone))

-    df_vol_changes = pd.DataFrame(index=df_profile_features.index)
+    df_vol_changes = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique())
    df_profiles = df_profiles.sort_index()
-    sites = df_profiles.groupby(level=['site_id'])
+    sites = df_profiles.groupby(level=["site_id"])

    for site_id, df_site in sites:
-        logger.debug('Calculating change in beach volume at {} in {} zone'.format(site_id, zone))
-        prestorm_dune_toe_x = df_profile_features.loc[df_profile_features.index == site_id].dune_toe_x.tolist()
-        prestorm_dune_crest_x = df_profile_features.loc[df_profile_features.index == site_id].dune_crest_x.tolist()
+        logger.debug("Calculating change in beach volume at {} in {} zone".format(site_id, zone))
+        query = "site_id=='{}'&profile_type=='prestorm'".format(site_id)
+        prestorm_dune_toe_x = df_profile_features.query(query).dune_toe_x.tolist()
+        prestorm_dune_crest_x = df_profile_features.query(query).dune_crest_x.tolist()

        # We may not have a dune toe or crest defined, or there may be multiple defined.
        prestorm_dune_crest_x = return_first_or_nan(prestorm_dune_crest_x)
@ -48,38 +48,66 @@ def volume_change(df_profiles, df_profile_features, zone):
        if np.isnan(prestorm_dune_toe_x):
            prestorm_dune_toe_x = prestorm_dune_crest_x

+        # If no prestorm and poststorm profiles, skip site and continue
+        profile_lengths = [len(df_site.query("profile_type == '{}'".format(x))) for x in ["prestorm", "poststorm"]]
+        if any([length == 0 for length in profile_lengths]):
+            continue
+
        # Find last x coordinate where we have both prestorm and poststorm measurements. If we don't do this,
        # the prestorm and poststorm values are going to be calculated over different lengths.
-        df_zone = df_site.dropna(subset=['z'])
-        x_last_obs = min([max(df_zone.query("profile_type == '{}'".format(profile_type)).index.get_level_values('x'))
-                          for profile_type in ['prestorm', 'poststorm']])
+        df_zone = df_site.dropna(subset=["z"])
+        x_last_obs = min(
+            [
+                max(df_zone.query("profile_type == '{}'".format(profile_type)).index.get_level_values("x"))
+                for profile_type in ["prestorm", "poststorm"]
+            ]
+        )
+        x_first_obs = max(
+            [
+                min(df_zone.query("profile_type == '{}'".format(profile_type)).index.get_level_values("x"))
+                for profile_type in ["prestorm", "poststorm"]
+            ]
+        )

        # Where we want to measure pre and post storm volume is dependant on the zone selected
-        if zone == 'swash':
-            x_min = prestorm_dune_toe_x
+        if zone == "swash":
+            x_min = max(prestorm_dune_toe_x, x_first_obs)
            x_max = x_last_obs
-        elif zone == 'dune_face':
-            x_min = prestorm_dune_crest_x
-            x_max = prestorm_dune_toe_x
+        elif zone == "dune_face":
+            x_min = max(prestorm_dune_crest_x, x_first_obs)
+            x_max = min(prestorm_dune_toe_x, x_last_obs)
        else:
-            logger.warning('Zone argument not properly specified. Please check')
+            logger.warning("Zone argument not properly specified. Please check")
            x_min = None
            x_max = None

        # Now, compute the volume of sand between the x-coordinates prestorm_dune_toe_x and x_swash_last for both prestorm
        # and post storm profiles.
-        prestorm_vol = beach_volume(x=df_zone.query("profile_type=='prestorm'").index.get_level_values('x'),
-                                    z=df_zone.query("profile_type=='prestorm'").z,
-                                    x_min=x_min,
-                                    x_max=x_max)
-        poststorm_vol = beach_volume(x=df_zone.query("profile_type=='poststorm'").index.get_level_values('x'),
-                                     z=df_zone.query("profile_type=='poststorm'").z,
-                                     x_min=x_min,
-                                     x_max=x_max)
-
-        df_vol_changes.loc[site_id, 'prestorm_{}_vol'.format(zone)] = prestorm_vol
-        df_vol_changes.loc[site_id, 'poststorm_{}_vol'.format(zone)] = poststorm_vol
-        df_vol_changes.loc[site_id, '{}_vol_change'.format(zone)] = prestorm_vol - poststorm_vol
+        prestorm_vol = beach_volume(
+            x=df_zone.query("profile_type=='prestorm'").index.get_level_values("x"),
+            z=df_zone.query("profile_type=='prestorm'").z,
+            x_min=x_min,
+            x_max=x_max,
+        )
+        poststorm_vol = beach_volume(
+            x=df_zone.query("profile_type=='poststorm'").index.get_level_values("x"),
+            z=df_zone.query("profile_type=='poststorm'").z,
+            x_min=x_min,
+            x_max=x_max,
+        )
+
+        # Volume change needs to be calculated including a tolerance for LIDAR accuracy. If difference between
+        # profiles is less than 20 cm, consider them as zero difference.
+        prestorm_z = df_zone.query("profile_type=='prestorm'").reset_index("profile_type").z
+        poststorm_z = df_zone.query("profile_type=='poststorm'").reset_index("profile_type").z
+        diff_z = prestorm_z - poststorm_z
+        diff_z[abs(diff_z) < 0.2] = 0
+        diff_vol = beach_volume(x=diff_z.index.get_level_values("x"), z=diff_z, x_min=x_min, x_max=x_max)
+
+        df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = prestorm_vol
+        df_vol_changes.loc[site_id, "poststorm_{}_vol".format(zone)] = poststorm_vol
+        df_vol_changes.loc[site_id, "{}_vol_change".format(zone)] = diff_vol
+        df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = diff_vol / prestorm_vol * 100

    return df_vol_changes

@ -110,28 +138,40 @@ def storm_regime(df_observed_impacts):
    :param df_observed_impacts:
    :return:
    """
-    logger.info('Getting observed storm regimes')
-    df_observed_impacts.loc[df_observed_impacts.swash_vol_change < 3, 'storm_regime'] = 'swash'
-    df_observed_impacts.loc[df_observed_impacts.dune_face_vol_change > 3, 'storm_regime'] = 'collision'
+    logger.info("Getting observed storm regimes")
+
+    swash = (df_observed_impacts.dune_face_pct_change <= 2) & (df_observed_impacts.dune_face_vol_change <= 3)
+    collision = (df_observed_impacts.dune_face_pct_change >= 2) | (df_observed_impacts.dune_face_vol_change > 3)
+
+    df_observed_impacts.loc[swash, "storm_regime"] = "swash"
+    df_observed_impacts.loc[collision, "storm_regime"] = "collision"
    return df_observed_impacts


-if __name__ == '__main__':
-    logger.info('Importing existing data')
-    data_folder = './data/interim'
-    df_profiles = pd.read_csv(os.path.join(data_folder, 'profiles.csv'), index_col=[0, 1, 2])
-    df_profile_features = pd.read_csv(os.path.join(data_folder, 'profile_features.csv'), index_col=[0])
+@click.command()
+@click.option("--profiles-csv", required=True, help="")
+@click.option("--profile-features-csv", required=True, help="")
+@click.option("--output-file", required=True, help="")
+def create_observed_impacts(profiles_csv, profile_features_csv, output_file):
+
+    logger.info("Creating observed wave impacts")
+    logger.info("Importing data")
+    df_profiles = pd.read_csv(profiles_csv, index_col=[0, 1, 2])
+    df_profile_features = pd.read_csv(profile_features_csv, index_col=[0, 1])

-    logger.info('Creating new dataframe for observed impacts')
-    df_observed_impacts = pd.DataFrame(index=df_profile_features.index)
+    logger.info("Creating new dataframe for observed impacts")
+    df_observed_impacts = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique())

-    logger.info('Getting pre/post storm volumes')
-    df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone='swash')
-    df_dune_face_vol_changes = volume_change(df_profiles, df_profile_features, zone='dune_face')
+    logger.info("Getting pre/post storm volumes")
+    df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone="swash")
+    df_dune_face_vol_changes = volume_change(df_profiles, df_profile_features, zone="dune_face")
    df_observed_impacts = df_observed_impacts.join([df_swash_vol_changes, df_dune_face_vol_changes])

    # Classify regime based on volume changes
    df_observed_impacts = storm_regime(df_observed_impacts)

    # Save dataframe to csv
-    df_observed_impacts.to_csv(os.path.join(data_folder, 'impacts_observed.csv'))
+    df_observed_impacts.to_csv(output_file, float_format="%.4f")
+
+    logger.info("Saved to %s", output_file)
+    logger.info("Done!")
--- a/src/analysis/runup_models.py
+++ b/src/analysis/runup_models.py
@ -1,51 +1,53 @@
 import numpy as np
 import pandas as pd

-def sto06_individual(Hs0, Tp, beta):

-    Lp = 9.8 * Tp ** 2 / 2 / np.pi
+def sto06(Hs0, Tp, beta):
+    """
+    :param Hs0: List or float of offshore significant wave height values
+    :param Tp: List or float of peak wave period
+    :param beta: List of float of beach slope
+    :return: Float or list of R2, setup, S_total, S_inc and S_ig values
+    """

-    S_ig = 0.06 * np.sqrt(Hs0 * Lp)
-    S_inc = 0.75 * beta * np.sqrt(Hs0 * Lp)
+    df = pd.DataFrame({"Hs0": Hs0, "Tp": Tp, "beta": beta}, index=[x for x in range(0, np.size(Hs0))])
+
+    df["Lp"] = 9.8 * df['Tp'] ** 2 / 2 / np.pi
+
+    # General equation
+    df["S_ig"] = pd.to_numeric(0.06 * np.sqrt(df["Hs0"] * df["Lp"]), errors="coerce")
+    df["S_inc"] = pd.to_numeric(0.75 * df["beta"] * np.sqrt(df["Hs0"] * df["Lp"]), errors="coerce")
+    df["setup"] = pd.to_numeric(0.35 * df["beta"] * np.sqrt(df["Hs0"] * df["Lp"]), errors="coerce")
+    df["S_total"] = np.sqrt(df["S_inc"] ** 2 + df["S_ig"] ** 2)
+    df["R2"] = 1.1 * (df["setup"] + df["S_total"] / 2)

    # Dissipative conditions
-    if beta / (Hs0/Lp)**(0.5) <= 0.3:
-        setup = 0.016 * (Hs0 * Lp) ** 0.5
-        S_total = 0.046 * (Hs0 * Lp) ** 0.5
-        R2 = 0.043 *  (Hs0 * Lp) ** 0.5
-    else:
-        setup = 0.35 * beta * (Hs0 * Lp) ** 0.5
-        S_total = np.sqrt(S_inc ** 2 + S_ig **2)
-        R2 = 1.1 * (setup + S_total / 2)
+    dissipative = df["beta"] / (df["Hs0"] / df["Lp"]) ** (0.5) <= 0.3
+
+    df.loc[dissipative, "setup"] = 0.016 * (df["Hs0"] * df["Lp"]) ** (0.5)  # eqn 16
+    df.loc[dissipative, "S_total"] = 0.046 * (df["Hs0"] * df["Lp"]) ** (0.5)  # eqn 17
+    df.loc[dissipative, "R2"] = 0.043 * (df["Hs0"] * df["Lp"]) ** (0.5)  # eqn 18

-    return R2, setup, S_total, S_inc, S_ig
+    return (
+        float_or_list(df["R2"].tolist()),
+        float_or_list(df["setup"].tolist()),
+        float_or_list(df["S_total"].tolist()),
+        float_or_list(df["S_inc"].tolist()),
+        float_or_list(df["S_ig"].tolist()),
+    )

-def sto06(df, Hs0_col, Tp_col, beta_col):
+
+def float_or_list(a):
    """
-    Vectorized version of Stockdon06 which can be used with dataframes
-    :param df:
-    :param Hs0_col:
-    :param Tp_col:
-    :param beta_col:
+    If only one value in the array, return the float, else return a list
+    :param a:
    :return:
    """
+    if len(a) == 1:
+        return a[0]
+    else:
+        return list(a)

-    Lp  = 9.8 * df[Tp_col] ** 2 / 2 / np.pi
-
-    # General equation
-    S_ig = pd.to_numeric(0.06 * np.sqrt(df[Hs0_col] * Lp), errors='coerce')
-    S_inc = pd.to_numeric(0.75 * df[beta_col] * np.sqrt(df[Hs0_col] * Lp), errors='coerce')
-    setup = pd.to_numeric(0.35 * df[beta_col] * np.sqrt(df[Hs0_col] * Lp), errors='coerce')
-    S_total = np.sqrt(S_inc ** 2 + S_ig ** 2)
-    R2 = 1.1 * (setup + S_total / 2)
-
-    # Dissipative conditions
-    dissipative = df[beta_col] / (df[Hs0_col] / Lp)**(0.5) <= 0.3
-    setup.loc[dissipative,:] = 0.016 * (df[Hs0_col] * Lp) ** (0.5)  # eqn 16
-    S_total.loc[dissipative,:] = 0.046 * (df[Hs0_col] * Lp) ** (0.5)  # eqn 17
-    R2.loc[dissipative,:] = 0.043 * (df[Hs0_col] * Lp) ** (0.5)  # eqn 18
-
-    return R2, setup, S_total, S_inc, S_ig

-if __name__ == '__main__':
+if __name__ == "__main__":
    pass
--- a/src/cli.py
+++ b/src/cli.py
@ -0,0 +1,35 @@
+"""
+Entry point to run data processing and analysis commands.
+"""
+
+import click
+
+import data.parse_mat as parse_mat
+import data.profile_features as profile_features
+import data.csv_to_shp as csv_to_shp
+import analysis.forecast_twl as forecast_twl
+import analysis.forecasted_storm_impacts as forecasted_storm_impacts
+import analysis.observed_storm_impacts as observed_storm_impacts
+
+# Disable numpy warnings
+import warnings
+
+warnings.simplefilter(action="ignore", category=FutureWarning)
+
+
+@click.group()
+def cli():
+    pass
+
+
+if __name__ == "__main__":
+    cli.add_command(parse_mat.create_waves_csv)
+    cli.add_command(parse_mat.create_sites_and_profiles_csv)
+    cli.add_command(parse_mat.create_tides_csv)
+    cli.add_command(parse_mat.create_profile_features)
+    # cli.add_command(profile_features.create_profile_features)
+    cli.add_command(csv_to_shp.sites_csv_to_shp)
+    cli.add_command(forecast_twl.create_twl_forecast)
+    cli.add_command(forecasted_storm_impacts.create_forecasted_impacts)
+    cli.add_command(observed_storm_impacts.create_observed_impacts)
+    cli()
--- a/src/data/beach_orientations.m
+++ b/src/data/beach_orientations.m
@ -1,19 +1,39 @@
-% Calculate orientation the beach profile at each unique site and save to .mat file
+% Calculate orientation the beach profile at each unique site and save to .mat file. Orientation is
+% the number of degrees, anticlockwise from east, perpendicular to the shoreline (pointing towards
+% land).
+
+%% Setup
 % Needs the following coastal tools:
-% J:\Coastal\Tools\MALT Logspiral Transformation
-% J:\Coastal\Tools\Coordinate Transformations
+addpath('J:\Coastal\Tools\MALT Logspiral Transformation')
+addpath('J:\Coastal\Tools\Coordinate Transformations')

 clear 
 clc

+%% Options
+% Where is the profiles file located? This should contain a structure including the .lat and .lon
+% for each analysed cross section
+profilesFile = '..\..\data\raw\processed_shorelines\profiles.mat';
+
+% Where should we store the processed beach orientations?
+outputFile = '..\..\data\raw\processed_shorelines\orientations.mat';
+
+% How far in meters does the profile extend towards land and sea? Used to provide end points of the
+% cross section
+distance = 200;
+
+
+%% Script
+
 % Load profile data, this is where we want to calculate orientations.
 warning('off','all')
-data = load('C:\Users\z5189959\Desktop\nsw_2016_storm_impact\data\raw\processed_shorelines\profiles.mat');
+data = load(profilesFile);
 data = data.data;

+% Save results to variable
 output = [];

-for ii = 1:n
+for ii = 1:length(data)
  disp(num2str(ii))
  lat = data(ii).lat;
  lon = data(ii).lon;
@ -21,14 +41,41 @@ for ii = 1:n
  [x,y,utmzone] = deg2utm(lat,lon);

  if strcmp(beach, 'BOOM') == 1 || strcmp(beach, 'HARGn') == 1 || strcmp(beach, 'BILG') == 1 || strcmp(beach, 'HARGs') == 1 || strcmp(beach, 'DEEWHYn') == 1
-    % log spiral transformation file is out of date. Talk to Mitch
-    continue
-  end
+    % These are straight beaches, load the transformation file directly and read the rotation angle.
+    parameterDir = 'J:\Coastal\Tools\MALT Logspiral Transformation';
+    parameterFile = [parameterDir, filesep, beach, '.mat'];
+    parameterMat = load(parameterFile);
+    fields = fieldnames(parameterMat);
+    field = fields(1,1);
+    site = getfield(parameterMat, field{1});
+    rot_angle = site.rot_angle;  % Angle of the shoreline counter clockwise from east
+    
+    % Figure out end points in utm coordinates
+
+    x_land = x - distance * cos(deg2rad(rot_angle));
+    y_land = y + distance * sin(deg2rad(rot_angle));
+    x_sea = x + distance * cos(deg2rad(rot_angle));
+    y_sea = y - distance * sin(deg2rad(rot_angle));
+    
+    [lat_land,lon_land] = utm2deg(x_land,y_land,utmzone);
+    [lat_sea,lon_sea] = utm2deg(x_land,y_land,utmzone);
    
-  if strcmp(beach, 'AVOCAs') == 1
-    % negative solution. Talk to Mitch
+    row.lat_center = lat;
+    row.lon_center = lon;
+    row.lat_land = lat_land;
+    row.lon_land = lat_land;
+    row.lat_sea = lat_sea;
+    row.lon_sea = lon_sea;
+    row.orientation = rot_angle + 90;  % Tangent to shoreline towards line 
+    row.beach = beach;
+    output = [output; row];
    continue
  end
+    
+%   if strcmp(beach, 'AVOCAs') == 1
+%     % negative solution. Talk to Mitch
+%     continue
+%   end
  
  % Get the sp log spiral transformed coordinates
  xyz.x = x;
@ -56,7 +103,12 @@ for ii = 1:n
  [lat_sea,lon_sea] = utm2deg(xyz_sea.x,xyz_sea.y,utmzone);

  % Orientation in degrees anticlockwise from east, pointing towards land
-  orientation = radtodeg(atan2((xyz_land.y - xyz_sea.y), (xyz_land.x - xyz_sea.x)));
+  try
+    orientation = radtodeg(atan2((xyz_land.y - xyz_sea.y), (xyz_land.x - xyz_sea.x)));
+  catch
+    disp(['Cannot calculate orientation: ' beach])
+    continue
+  end
  
  row.lat_center = lat;
  row.lon_center = lon;
@ -70,4 +122,4 @@ for ii = 1:n
 end
 warning('on','all')

-save('orientations.mat','output','-v7')
+save(outputFile','output','-v7')
--- a/src/data/csv_to_shp.py
+++ b/src/data/csv_to_shp.py
@ -1,17 +1,20 @@
 """
 Converts .csv files to .shape files
 """
-
 import click
 import fiona
 import pandas as pd
 from fiona.crs import from_epsg
 from shapely.geometry import Point, mapping

+from utils import setup_logging
+
+logger = setup_logging()
+

@click.command()
-@click.argument('input_csv')
-@click.argument('output_shp')
+@click.option("--input-csv", required=True, help=".csv file to convert")
+@click.option("--output-shp", required=True, help="where to store .shp file")
 def sites_csv_to_shp(input_csv, output_shp):
    """
    Converts our dataframe of sites to .shp to load in QGis
@ -19,30 +22,13 @@ def sites_csv_to_shp(input_csv, output_shp):
    :param output_shp:
    :return:
    """
+    logger.info("Converting %s to %s", input_csv, output_shp)
    df_sites = pd.read_csv(input_csv, index_col=[0])
-
-    schema = {
-        'geometry': 'Point',
-        'properties': {
-            'beach': 'str',
-            'site_id': 'str'
-        }
-    }
-    with fiona.open(output_shp, 'w', crs=from_epsg(4326), driver='ESRI Shapefile', schema=schema) as output:
+    logger.info(os.environ.get("GDAL_DATA", None))
+    schema = {"geometry": "Point", "properties": {"beach": "str", "site_id": "str"}}
+    with fiona.open(output_shp, "w", crs=from_epsg(4326), driver="ESRI Shapefile", schema=schema) as output:
        for index, row in df_sites.iterrows():
-            point = Point(row['lon'], row['lat'])
-            prop = {
-                'beach': row['beach'],
-                'site_id': index,
-            }
-            output.write({'geometry': mapping(point), 'properties': prop})
-
-
-@click.group()
-def cli():
-    pass
-
-
-if __name__ == '__main__':
-    cli.add_command(sites_csv_to_shp)
-    cli()
+            point = Point(row["x_200_lon"], row["x_200_lat"])
+            prop = {"beach": row["beach"], "site_id": index}
+            output.write({"geometry": mapping(point), "properties": prop})
+    logger.info("Done!")
--- a/src/data/mat_to_csv.py
+++ b/src/data/mat_to_csv.py
@ -1,263 +0,0 @@
-"""
-Converts raw .mat files into a flattened .csv structure which can be imported into python pandas.
-"""
-
-import logging.config
-from datetime import datetime, timedelta
-
-import pandas as pd
-from mat4py import loadmat
-import numpy as np
-
-logging.config.fileConfig('./src/logging.conf', disable_existing_loggers=False)
-logger = logging.getLogger(__name__)
-
-
-def parse_orientations(orientations_mat):
-    """
-    Parses the raw orientations.mat file and returns a pandas dataframe. Note that orientations are the direction
-    towards land measured in degrees anti-clockwise from east.
-    :param orientations_mat:
-    :return:
-    """
-    logger.info('Parsing %s', orientations_mat)
-    mat_data = loadmat(orientations_mat)['output']
-    rows = []
-    for i in range(0, len(mat_data['beach'])):
-        rows.append({
-            'beach': mat_data['beach'][i],
-            'orientation': mat_data['orientation'][i],
-            'lat_center': mat_data['lat_center'][i],
-            'lon_center': mat_data['lon_center'][i],
-            'lat_land': mat_data['lat_land'][i],
-            'lon_land': mat_data['lon_land'][i],
-            'lat_sea': mat_data['lat_sea'][i],
-            'lon_sea': mat_data['lon_sea'][i],
-        })
-
-    df = pd.DataFrame(rows)
-    return df
-
-def combine_sites_and_orientaions(df_sites, df_orientations):
-    """
-    Replaces beach/lat/lon columns with the unique site_id.
-    :param dfs:
-    :param df_sites:
-    :return:
-    """
-    df_merged_sites = df_sites.merge(df_orientations[['beach', 'lat_center', 'lon_center', 'orientation']],
-                                      left_on=['beach', 'lat', 'lon'],
-                                       right_on=['beach', 'lat_center', 'lon_center'])
-
-    # Check that all our records have a unique site identifier
-    n_unmatched = len(df_sites) - len(df_merged_sites)
-    if n_unmatched > 0:
-        logger.warning('Not all records (%d of %d) matched with an orientation', n_unmatched, len(df_sites))
-
-    # Drop extra columns
-    df_merged_sites = df_merged_sites.drop(columns = ['lat_center', 'lon_center'])
-
-    return df_merged_sites
-
-def specify_lat_lon_profile_center(df_sites, x_val=200):
-    """
-    Specify which x-coordinate in the beach profile cross section the lat/lon corresponds to
-    :param df_sites:
-    :return:
-    """
-    df_sites['profile_x_lat_lon'] = x_val
-    return df_sites
-
-def parse_waves(waves_mat):
-    """
-    Parses the raw waves.mat file and returns a pandas dataframe
-    :param waves_mat:
-    :return:
-    """
-    logger.info('Parsing %s', waves_mat)
-    mat_data = loadmat(waves_mat)['data']
-    rows = []
-    for i in range(0, len(mat_data['site'])):
-        for j in range(0, len(mat_data['dates'][i])):
-            rows.append({
-                'beach': mat_data['site'][i],
-                'lon': mat_data['lon'][i],
-                'lat': mat_data['lat'][i],
-                'datetime': matlab_datenum_to_datetime(mat_data['dates'][i][j][0]),
-                'Hs': mat_data['H'][i][j][0],
-                'Hs0': mat_data['Ho'][i][j][0],
-                'Tp': mat_data['T'][i][j][0],
-                'dir': mat_data['D'][i][j][0],
-                'E': mat_data['E'][i][j][0],
-                'P': mat_data['P'][i][j][0],
-                'Exs': mat_data['Exs'][i][j][0],
-                'Pxs': mat_data['Pxs'][i][j][0],
-            })
-
-    df = pd.DataFrame(rows)
-    df['datetime'] = df['datetime'].dt.round('1s')
-    return df
-
-
-def parse_tides(tides_mat):
-    """
-    Parses the raw tides.mat file and returns a pandas dataframe
-    :param tides_mat:
-    :return:
-    """
-    logger.info('Parsing %s', tides_mat)
-    mat_data = loadmat(tides_mat)['data']
-    rows = []
-    for i in range(0, len(mat_data['site'])):
-        for j in range(0, len(mat_data['time'])):
-            rows.append({
-                'beach': mat_data['site'][i][0],
-                'lon': mat_data['lons'][i][0],
-                'lat': mat_data['lats'][i][0],
-                'datetime': matlab_datenum_to_datetime(mat_data['time'][j][0]),
-                'tide': mat_data['tide'][i][j]
-            })
-
-    df = pd.DataFrame(rows)
-    df['datetime'] = df['datetime'].dt.round('1s')
-    return df
-
-
-def parse_profiles(profiles_mat):
-    """
-    Parses the raw profiles.mat file and returns a pandas dataframe
-    :param tides_mat:
-    :return:
-    """
-    logger.info('Parsing %s', profiles_mat)
-    mat_data = loadmat(profiles_mat)['data']
-    rows = []
-    for i in range(0, len(mat_data['site'])):
-        for j in range(0, len(mat_data['pfx'][i])):
-            for profile_type in ['prestorm', 'poststorm']:
-
-                if profile_type == 'prestorm':
-                    z = mat_data['pf1'][i][j][0]
-                if profile_type == 'poststorm':
-                    z = mat_data['pf2'][i][j][0]
-
-                rows.append({
-                    'beach': mat_data['site'][i],
-                    'lon': mat_data['lon'][i],
-                    'lat': mat_data['lat'][i],
-                    'profile_type': profile_type,
-                    'x': mat_data['pfx'][i][j][0],
-                    'z': z,
-                })
-
-    df = pd.DataFrame(rows)
-    return df
-
-def remove_zeros(df_profiles):
-    """
-    When parsing the pre/post storm profiles, the end of some profiles have constant values of zero. Let's change
-    these to NaNs for consistancy. Didn't use pandas fillnan because 0 may still be a valid value.
-    :param df:
-    :return:
-    """
-
-    df_profiles = df_profiles.sort_index()
-    groups = df_profiles.groupby(level=['site_id','profile_type'])
-    for key, _ in groups:
-        logger.debug('Removing zeros from {} profile at {}'.format(key[1], key[0]))
-        idx_site = (df_profiles.index.get_level_values('site_id') == key[0]) & \
-                   (df_profiles.index.get_level_values('profile_type') == key[1])
-        df_profile = df_profiles[idx_site]
-        x_last_ele = df_profile[df_profile.z!=0].index.get_level_values('x')[-1]
-        df_profiles.loc[idx_site & (df_profiles.index.get_level_values('x')>x_last_ele), 'z'] = np.nan
-
-    return df_profiles
-
-def matlab_datenum_to_datetime(matlab_datenum):
-    # https://stackoverflow.com/a/13965852
-    return datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum % 1) - timedelta(
-        days=366)
-
-
-def get_unique_sites(dfs, cols=['beach', 'lat', 'lon']):
-    """
-    Generates a dataframe of unique sites based on beach names, lats and lons. Creates a unique site ID for each.
-    :param dfs:
-    :param cols:
-    :return:
-    """
-
-    rows = []
-    df_all = pd.concat([df[cols] for df in dfs])
-    beach_groups = df_all.groupby(['beach'])
-    for beach_name, beach_group in beach_groups:
-        site_groups = beach_group.groupby(['lat', 'lon'])
-        siteNo = 1
-        for site_name, site_group in site_groups:
-            site = '{}{:04d}'.format(beach_name, siteNo)
-            rows.append({'site_id': site,
-                         'lat': site_name[0],
-                         'lon': site_name[1],
-                         'beach': beach_name})
-            siteNo += 1
-
-    df = pd.DataFrame(rows)
-
-    return df
-
-
-def replace_unique_sites(df, df_sites, cols=['beach', 'lat', 'lon']):
-    """
-    Replaces beach/lat/lon columns with the unique site_id
-    :param dfs:
-    :param df_sites:
-    :return:
-    """
-
-    df_merged = df.merge(df_sites, on=cols)
-
-    # Check that all our records have a unique site identifier
-    n_unmatched = len(df) - len(df_merged)
-    if n_unmatched > 0:
-        logger.warning('Not all records (%d of %d) matched with a unique site', n_unmatched, len(df))
-
-    df_merged = df_merged.drop(columns=cols)
-
-    return df_merged
-
-
-def main():
-    df_waves = parse_waves(waves_mat='./data/raw/processed_shorelines/waves.mat')
-    df_tides = parse_tides(tides_mat='./data/raw/processed_shorelines/tides.mat')
-    df_profiles = parse_profiles(profiles_mat='./data/raw/processed_shorelines/profiles.mat')
-    df_sites = get_unique_sites(dfs=[df_waves, df_tides, df_profiles])
-    df_orientations = parse_orientations(orientations_mat='./data/raw/processed_shorelines/orientations.mat')
-
-    logger.info('Identifying unique sites')
-    df_waves = replace_unique_sites(df_waves, df_sites)
-    df_tides = replace_unique_sites(df_tides, df_sites)
-    df_profiles = replace_unique_sites(df_profiles, df_sites)
-
-    logger.info('Combine orientations into sites')
-    df_sites = combine_sites_and_orientaions(df_sites, df_orientations)
-    df_sites = specify_lat_lon_profile_center(df_sites)
-
-    logger.info('Setting pandas index')
-    df_profiles.set_index(['site_id', 'profile_type', 'x'], inplace=True)
-    df_waves.set_index(['site_id', 'datetime'], inplace=True)
-    df_tides.set_index(['site_id', 'datetime'], inplace=True)
-    df_sites.set_index(['site_id'], inplace=True)
-
-    logger.info('Nanning profile zero elevations')
-    df_profiles = remove_zeros(df_profiles)
-
-    logger.info('Outputting .csv files')
-    df_profiles.to_csv('./data/interim/profiles.csv')
-    df_tides.to_csv('./data/interim/tides.csv')
-    df_waves.to_csv('./data/interim/waves.csv')
-    df_sites.to_csv('./data/interim/sites.csv')
-    logger.info('Done!')
-
-
-if __name__ == '__main__':
-    main()
--- a/src/data/parse_mat.py
+++ b/src/data/parse_mat.py
@ -0,0 +1,440 @@
+"""
+Converts raw .mat files into a flattened .csv structure which can be imported into python pandas.
+"""
+
+import math
+from datetime import datetime, timedelta
+
+import click
+import numpy as np
+import pandas as pd
+from mat4py import loadmat
+from shapely.geometry import Point
+
+from data.profile_features import convert_coord_systems
+from utils import setup_logging
+
+logger = setup_logging()
+
+
+def parse_orientations(orientations_mat):
+    """
+    Parses the raw orientations.mat file and returns a pandas dataframe. Note that orientations are the direction
+    towards land measured in degrees anti-clockwise from east.
+    :param orientations_mat:
+    :return:
+    """
+    logger.info("Parsing %s", orientations_mat)
+    mat_data = loadmat(orientations_mat)["output"]
+    rows = []
+    for i in range(0, len(mat_data["beach"])):
+        rows.append(
+            {
+                "beach": mat_data["beach"][i],
+                "orientation": mat_data["orientation"][i],
+                "lat_center": mat_data["lat_center"][i],
+                "lon_center": mat_data["lon_center"][i],
+                "lat_land": mat_data["lat_land"][i],
+                "lon_land": mat_data["lon_land"][i],
+                "lat_sea": mat_data["lat_sea"][i],
+                "lon_sea": mat_data["lon_sea"][i],
+            }
+        )
+
+    df = pd.DataFrame(rows)
+    return df
+
+
+def parse_dune_crest_toes(df_sites, crest_mat, toe_mat):
+    """
+    :param df_sites:
+    :param crest_mat:
+    :param toe_mat:
+    :return:
+    """
+    logger.info("Parsing dune crests and toes")
+
+    rows = []
+    crest_data = loadmat(crest_mat)
+    toe_data = loadmat(toe_mat)
+
+    for n, _ in enumerate(crest_data["xc1"]):
+        rows.extend(
+            [
+                {
+                    "dune_crest_x": crest_data["xc1"][n],
+                    "dune_crest_z": crest_data["zc1"][n],
+                    "dune_toe_x": toe_data["xt1"][n],
+                    "dune_toe_z": toe_data["zt1"][n],
+                    "profile_type": "prestorm",
+                    "site_no": n + 1,
+                },
+                {
+                    "dune_crest_x": crest_data["xc2"][n],
+                    "dune_crest_z": crest_data["zc2"][n],
+                    "dune_toe_x": toe_data["xt2"][n],
+                    "dune_toe_z": toe_data["zt2"][n],
+                    "profile_type": "poststorm",
+                    "site_no": n + 1,
+                },
+            ]
+        )
+
+    df_profile_features = pd.DataFrame(rows)
+
+    # Want the site_id instead of the site_no, so merge in df_sites
+    df_sites.reset_index(inplace=True)
+    df_profile_features = df_sites[["site_no", "site_id"]].merge(df_profile_features, how="outer", on=["site_no"])
+    df_profile_features.drop(columns=["site_no"], inplace=True)
+    df_profile_features.set_index(["site_id", "profile_type"], inplace=True)
+    df_profile_features.sort_index(inplace=True)
+    df_profile_features = df_profile_features.round(3)
+
+    return df_profile_features
+
+
+def combine_sites_and_orientaions(df_sites, df_orientations):
+    """
+    Replaces beach/lat/lon columns with the unique site_id.
+    :param dfs:
+    :param df_sites:
+    :return:
+    """
+    df_merged_sites = df_sites.merge(
+        df_orientations[["beach", "lat_center", "lon_center", "orientation"]],
+        left_on=["beach", "lat", "lon"],
+        right_on=["beach", "lat_center", "lon_center"],
+    )
+
+    # Check that all our records have a unique site identifier
+    n_unmatched = len(df_sites) - len(df_merged_sites)
+    if n_unmatched > 0:
+        logger.warning("Not all records (%d of %d) matched with an orientation", n_unmatched, len(df_sites))
+
+    # Drop extra columns
+    df_merged_sites = df_merged_sites.drop(columns=["lat_center", "lon_center"])
+
+    return df_merged_sites
+
+
+def specify_lat_lon_profile_center(df_sites, x_val=200):
+    """
+    Specify which x-coordinate in the beach profile cross section the lat/lon corresponds to
+    :param df_sites:
+    :return:
+    """
+    df_sites["profile_x_lat_lon"] = x_val
+    return df_sites
+
+
+def parse_waves(waves_mat):
+    """
+    Parses the raw waves.mat file and returns a pandas dataframe
+    :param waves_mat:
+    :return:
+    """
+    logger.info("Parsing %s", waves_mat)
+    mat_data = loadmat(waves_mat)["data"]
+    rows = []
+    for i in range(0, len(mat_data["site"])):
+        for j in range(0, len(mat_data["dates"][i])):
+            rows.append(
+                {
+                    "beach": mat_data["site"][i],
+                    "lon": mat_data["lon"][i],
+                    "lat": mat_data["lat"][i],
+                    "datetime": matlab_datenum_to_datetime(mat_data["dates"][i][j][0]),
+                    "Hs": mat_data["H"][i][j][0],
+                    "Hs0": mat_data["Ho"][i][j][0],
+                    "Tp": mat_data["T"][i][j][0],
+                    "dir": mat_data["D"][i][j][0],
+                    "E": mat_data["E"][i][j][0],
+                    "P": mat_data["P"][i][j][0],
+                    "Exs": mat_data["Exs"][i][j][0],
+                    "Pxs": mat_data["Pxs"][i][j][0],
+                }
+            )
+
+    df = pd.DataFrame(rows)
+    df["datetime"] = df["datetime"].dt.round("1s")
+    return df
+
+
+def parse_tides(tides_mat):
+    """
+    Parses the raw tides.mat file and returns a pandas dataframe
+    :param tides_mat:
+    :return:
+    """
+    logger.info("Parsing %s", tides_mat)
+    mat_data = loadmat(tides_mat)["data"]
+    rows = []
+    for i in range(0, len(mat_data["site"])):
+        for j in range(0, len(mat_data["time"])):
+            rows.append(
+                {
+                    "beach": mat_data["site"][i][0],
+                    "lon": mat_data["lons"][i][0],
+                    "lat": mat_data["lats"][i][0],
+                    "datetime": matlab_datenum_to_datetime(mat_data["time"][j][0]),
+                    "tide": mat_data["tide"][i][j],
+                }
+            )
+
+    df = pd.DataFrame(rows)
+    df["datetime"] = df["datetime"].dt.round("1s")
+    return df
+
+
+def parse_profiles_and_sites(profiles_mat):
+    """
+    Parses the raw profiles.mat file and returns a pandas dataframe
+    :param tides_mat:
+    :return:
+    """
+    logger.info("Parsing %s", profiles_mat)
+    mat_data = loadmat(profiles_mat)["data"]
+    profile_rows = []
+    site_rows = []
+    site_counter = 0
+
+    for i, site in enumerate(mat_data["site"]):
+        logger.debug("Processing site {} of {}".format(i + 1, len(mat_data["site"])))
+
+        # Give each site a unique id
+        if len(site_rows) == 0 or site_rows[-1]["beach"] != site:
+            site_counter = 1
+        else:
+            site_counter += 1
+        site_id = "{}{:04d}".format(site, site_counter)
+
+        # Initalize location of x=200m latitude and longitude
+        x_200_lat = np.nan
+        x_200_lon = np.nan
+
+        # Want to calculation the orientation
+        orientation = {}
+
+        for x, lat, lon, z_prestorm, z_poststorm, easting, northing in zip(
+            mat_data["x"][i],
+            mat_data["lats"][i],
+            mat_data["lons"][i],
+            mat_data["Zpre"][i],
+            mat_data["Zpost"][i],
+            mat_data["eastings"][i],
+            mat_data["northings"][i],
+        ):
+
+            # Only extract pre and post storm profile
+            for j, profile_type in enumerate(["prestorm", "poststorm"]):
+
+                if mat_data["isgood"][i][j] == 1:
+                    land_lim = mat_data["landlims"][i][j]
+                    survey_datetime = matlab_datenum_to_datetime(mat_data["surveydates"][i][j])
+
+                    if profile_type == "prestorm":
+                        z = z_prestorm
+                    else:
+                        z = z_poststorm
+
+                    # Keep a record of the where the center of the profile is located, and the locations of the land
+                    # and sea
+
+                    # TODO: This code isn't very transferrable. What if we don't have lat/lons at 200 m? Relook at this
+                    if x[0] == 200:
+                        x_200_lat = lat[0]
+                        x_200_lon = lon[0]
+                    elif x[0] == 0:
+                        orientation["land_easting"] = easting[0]
+                        orientation["land_northing"] = northing[0]
+                    elif x[0] == 400:
+                        orientation["sea_easting"] = easting[0]
+                        orientation["sea_northing"] = northing[0]
+
+                    profile_rows.append(
+                        {
+                            "site_id": site_id,
+                            "lon": lon[0],
+                            "lat": lat[0],
+                            "profile_type": profile_type,
+                            "x": x[0],
+                            "z": z[0],
+                            "land_lim": land_lim,
+                            "survey_datetime": survey_datetime,
+                        }
+                    )
+
+        orientation = math.degrees(
+            math.atan2(
+                orientation["land_northing"] - orientation["sea_northing"],
+                orientation["land_easting"] - orientation["sea_easting"],
+            )
+        )
+        site_rows.append(
+            {
+                "site_id": site_id,
+                "site_no": i + 1,
+                "beach": site,
+                "lat": x_200_lat,
+                "lon": x_200_lon,
+                "orientation": orientation,
+                "profile_x_lat_lon": 200,
+            }
+        )
+
+    df_profiles = pd.DataFrame(profile_rows)
+    df_sites = pd.DataFrame(site_rows)
+
+    logger.info("Parsed profiles and sites")
+    return df_profiles, df_sites
+
+
+def remove_zeros(df_profiles):
+    """
+    When parsing the pre/post storm profiles, the end of some profiles have constant values of zero. Let's change
+    these to NaNs for consistancy. Didn't use pandas fillnan because 0 may still be a valid value.
+    :param df_profiles:
+    :return:
+    """
+
+    logger.info("Removing zeros from end of profiles")
+    df_profiles = df_profiles.sort_index()
+    groups = df_profiles.groupby(level=["site_id", "profile_type"])
+    for key, _ in groups:
+        logger.debug("Removing zeros from {} profile at {}".format(key[1], key[0]))
+        idx_site = (df_profiles.index.get_level_values("site_id") == key[0]) & (
+            df_profiles.index.get_level_values("profile_type") == key[1]
+        )
+        df_profile = df_profiles[idx_site]
+        x_last_ele = df_profile[df_profile.z != 0].index.get_level_values("x")[-1]
+        df_profiles.loc[idx_site & (df_profiles.index.get_level_values("x") > x_last_ele), "z"] = np.nan
+    logger.info("Removed zeros from end of profiles")
+
+    return df_profiles
+
+
+def matlab_datenum_to_datetime(matlab_datenum):
+    """
+    Adapted from https://stackoverflow.com/a/13965852
+    :param matlab_datenum:
+    :return:
+    """
+    return datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum % 1) - timedelta(days=366)
+
+
+def replace_unique_sites(df, df_sites):
+    """
+    Replaces beach/lat/lon columns with the unique site_id
+    :param dfs:
+    :param df_sites:
+    :return:
+    """
+    # Make the sites index a column, so it can be merged into df
+    df_sites["site_id"] = df_sites.index.get_level_values("site_id")
+
+    # Create eastings and northings so we can calculate distances
+    site_points = [convert_coord_systems(Point(lon, lat)).xy for lon, lat in zip(df_sites["lon"], df_sites["lat"])]
+    df_sites["easting"] = [x[0][0] for x in site_points]
+    df_sites["northing"] = [x[1][0] for x in site_points]
+
+    # Process each unique combination lat/lons in groups
+    groups = df.groupby(["lat", "lon"])
+    for (lat, lon), df_group in groups:
+
+        # Calculate distances from each point to each site and determine closest site
+        easting, northing = [x[0] for x in convert_coord_systems(Point(lon, lat)).xy]
+        distances_to_sites = np.sqrt((df_sites["easting"] - easting) ** 2 + (df_sites["northing"] - northing) ** 2)
+        min_distance = distances_to_sites.min()
+        closest_site = distances_to_sites.idxmin()
+
+        # Do some logging so we can check later.
+        if min_distance > 1:
+            logger.warning("Closest site to (%.4f,%.4f) is %s (%.2f m away)", lat, lon, closest_site, min_distance)
+        else:
+            logger.info("Closest site to (%.4f,%.4f) is %s (%.2f m away)", lat, lon, closest_site, min_distance)
+
+        # Assign site_id based on closest site
+        df.loc[df_group.index, "site_id"] = closest_site
+
+    nan_count = df.site_id.isna().sum()
+    if nan_count > 0:
+        logger.warning("Not all records (%d of %d) matched with a unique site", nan_count, len(df))
+
+    df = df.drop(columns=["lat", "lon", "beach"])
+
+    return df
+
+
+@click.command(short_help="create waves.csv")
+@click.option("--waves-mat", required=True, help=".mat file containing wave records")
+@click.option("--sites-csv", required=True, help=".csv file description of cross section sites")
+@click.option("--output-file", required=True, help="where to save waves.csv")
+def create_waves_csv(waves_mat, sites_csv, output_file):
+    logger.info("Creating %s", output_file)
+    df_waves = parse_waves(waves_mat=waves_mat)
+    df_sites = pd.read_csv(sites_csv, index_col=[0])
+    df_waves = replace_unique_sites(df_waves, df_sites)
+    df_waves.set_index(["site_id", "datetime"], inplace=True)
+    df_waves.sort_index(inplace=True)
+    df_waves.to_csv(output_file)
+    logger.info("Created %s", output_file)
+
+
+@click.command(short_help="create profile_features.csv")
+@click.option("--crest-mat", required=True, help=".mat file containing wave records")
+@click.option("--toe-mat", required=True, help=".mat file containing wave records")
+@click.option("--sites-csv", required=True, help=".csv file description of cross section sites")
+@click.option("--output-file", required=True, help="where to save waves.csv")
+def create_profile_features(crest_mat, toe_mat, sites_csv, output_file):
+    logger.info("Creating %s", output_file)
+    df_sites = pd.read_csv(sites_csv, index_col=[0])
+    df_profile_features = parse_dune_crest_toes(df_sites, crest_mat, toe_mat)
+    df_profile_features.to_csv(output_file)
+    logger.info("Created %s", output_file)
+
+
+@click.command(short_help="create profiles.csv")
+@click.option("--profiles-mat", required=True, help=".mat file containing beach profiles")
+@click.option("--profiles-output-file", required=True, help="where to save profiles.csv")
+@click.option("--sites-output-file", required=True, help="where to save sites.csv")
+def create_sites_and_profiles_csv(profiles_mat, profiles_output_file, sites_output_file):
+    logger.info("Creating sites and profiles csvs")
+    df_profiles, df_sites = parse_profiles_and_sites(profiles_mat=profiles_mat)
+    df_profiles.set_index(["site_id", "profile_type", "x"], inplace=True)
+    df_profiles.sort_index(inplace=True)
+    df_profiles = remove_zeros(df_profiles)
+
+    df_sites.set_index(["site_id"], inplace=True)
+    df_sites.sort_index(inplace=True)
+
+    df_profiles.to_csv(profiles_output_file)
+    logger.info("Created %s", profiles_output_file)
+    df_sites.to_csv(sites_output_file)
+    logger.info("Created %s", sites_output_file)
+
+
+@click.command(short_help="create profiles.csv")
+@click.option("--tides-mat", required=True, help=".mat file containing tides")
+@click.option("--sites-csv", required=True, help=".csv file description of cross section sites")
+@click.option("--output-file", required=True, help="where to save tides.csv")
+def create_tides_csv(tides_mat, sites_csv, output_file):
+    logger.info("Creating %s", output_file)
+    df_tides = parse_tides(tides_mat=tides_mat)
+    df_sites = pd.read_csv(sites_csv, index_col=[0])
+    df_tides = replace_unique_sites(df_tides, df_sites)
+    df_tides.set_index(["site_id", "datetime"], inplace=True)
+    df_tides.sort_index(inplace=True)
+    df_tides.to_csv(output_file)
+    logger.info("Created %s", output_file)
+
+
+@click.group()
+def cli():
+    pass
+
+
+if __name__ == "__main__":
+    cli.add_command(create_waves_csv)
+    cli.add_command(create_sites_and_profiles_csv)
+    cli.add_command(create_tides_csv)
+    cli()
--- a/src/data/profile_features.py
+++ b/src/data/profile_features.py
@ -1,6 +1,6 @@
-import os
 from functools import partial

+import click
 import fiona
 import numpy as np
 import pandas as pd
@ -9,24 +9,12 @@ from shapely.geometry import LineString, Point
 from shapely.geometry import shape
 from shapely.ops import transform

+from utils import setup_logging

-def shapes_from_shp(shp_file):
-    """
-    Parses a shape file and returns a list of shapely shapes, ids and properties
-    :param shp_file:
-    :return:
-    """
-    shapes = []
-    ids = []
-    properties = []
-    for feat in fiona.open(shp_file, 'r'):
-        shapes.append(shape(feat['geometry']))
-        ids.append(feat['id'])
-        properties.append(feat['properties'])
-    return shapes, ids, properties
+logger = setup_logging()


-def convert_coord_systems(g1, in_coord_system='EPSG:4326', out_coord_system='EPSG:28356'):
+def convert_coord_systems(g1, in_coord_system="EPSG:4326", out_coord_system="EPSG:28356"):
    """
    Converts coordinates from one coordinates system to another. Needed because shapefiles are usually defined in
    lat/lon but should be converted to GDA to calculated distances.
@ -38,12 +26,29 @@ def convert_coord_systems(g1, in_coord_system='EPSG:4326', out_coord_system='EPS
    project = partial(
        pyproj.transform,
        pyproj.Proj(init=in_coord_system),  # source coordinate system
-        pyproj.Proj(init=out_coord_system))  # destination coordinate system
+        pyproj.Proj(init=out_coord_system),
+    )  # destination coordinate system

    g2 = transform(project, g1)  # apply projection
    return g2


+def shapes_from_shp(shp_file):
+    """
+    Parses a shape file and returns a list of shapely shapes, ids and properties
+    :param shp_file:
+    :return:
+    """
+    shapes = []
+    ids = []
+    properties = []
+    for feat in fiona.open(shp_file, "r"):
+        shapes.append(shape(feat["geometry"]))
+        ids.append(feat["id"])
+        properties.append(feat["properties"])
+    return shapes, ids, properties
+
+
 def distance_to_intersection(lat, lon, landward_orientation, beach, line_strings, line_properties):
    """
    Returns the distance at whjch a line drawn from a lat/lon at an orientation intersects a line stinrg
@ -59,15 +64,19 @@ def distance_to_intersection(lat, lon, landward_orientation, beach, line_strings
    start_point = convert_coord_systems(start_point)

    distance = 1000  # m look up to 1000m for an intersection
-    landward_point = Point(start_point.coords.xy[0] + distance * np.cos(np.deg2rad(landward_orientation)),
-                           start_point.coords.xy[1] + distance * np.sin(np.deg2rad(landward_orientation)))
+    landward_point = Point(
+        start_point.coords.xy[0] + distance * np.cos(np.deg2rad(landward_orientation)),
+        start_point.coords.xy[1] + distance * np.sin(np.deg2rad(landward_orientation)),
+    )
    landward_line = LineString([start_point, landward_point])
-    seaward_point = Point(start_point.coords.xy[0] - distance * np.cos(np.deg2rad(landward_orientation)),
-                          start_point.coords.xy[1] - distance * np.sin(np.deg2rad(landward_orientation)))
+    seaward_point = Point(
+        start_point.coords.xy[0] - distance * np.cos(np.deg2rad(landward_orientation)),
+        start_point.coords.xy[1] - distance * np.sin(np.deg2rad(landward_orientation)),
+    )
    seaward_line = LineString([start_point, seaward_point])

    # Look at relevant line_strings which have the same beach property in order to reduce computation time
-    line_strings = [s for s, p in zip(line_strings, line_properties) if p['beach'] == beach]
+    line_strings = [s for s, p in zip(line_strings, line_properties) if p["beach"] == beach]

    # Check whether profile_line intersects with any lines in line_string. If intersection point is landwards,
    # consider this negative, otherwise seawards is positive.
@ -99,7 +108,7 @@ def beach_profile_elevation(x_coord, df_profiles, profile_type, site_id):

    # Get profile
    df_profile = df_profiles.query('profile_type == "{}" and site_id =="{}"'.format(profile_type, site_id))
-    return np.interp(x_coord, df_profile.index.get_level_values('x'), df_profile['z'])
+    return np.interp(x_coord, df_profile.index.get_level_values("x"), df_profile["z"])


 def parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp):
@ -111,51 +120,42 @@ def parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp):
    # Get site information. Base our profile features on each site
    df_profile_features = df_sites

-    features = {
-        'dune_crest':
-            {
-                'file': dune_crest_shp
-            },
-        'dune_toe':
-            {
-                'file': dune_toe_shp
-            },
-    }
+    features = {"dune_crest": {"file": dune_crest_shp}, "dune_toe": {"file": dune_toe_shp}}

    # Import our dune crest and toes
    for feat in features.keys():
-        shapes, _, properties = shapes_from_shp(features[feat]['file'])
+        shapes, _, properties = shapes_from_shp(features[feat]["file"])
        shapes = [convert_coord_systems(x) for x in shapes]

        # Figure out the x coordinates of our crest and toes, by looking at where our beach sections intersect our
        # shape files.
-        col_name = '{}_x'.format(feat)
-        df_profile_features[col_name] = df_profile_features['profile_x_lat_lon'] + \
-                                        df_profile_features.apply(lambda row:
-                                                                  distance_to_intersection(
-                                                                      row['lat'], row['lon'], row['orientation'],
-                                                                      row['beach'], shapes, properties),
-                                                                  axis=1)
+        col_name = "{}_x".format(feat)
+        df_profile_features[col_name] = df_profile_features["profile_x_lat_lon"] + df_profile_features.apply(
+            lambda row: distance_to_intersection(
+                row["lat"], row["lon"], row["orientation"], row["beach"], shapes, properties
+            ),
+            axis=1,
+        )
        # Get the elevations of the crest and toe
-        col_name = '{}_z'.format(feat)
-        df_profile_features[col_name] = df_profile_features.apply(lambda row:
-                                                                  beach_profile_elevation(
-                                                                      row['{}_x'.format(feat)],
-                                                                      df_profiles,
-                                                                      'prestorm',
-                                                                      row.name),
-                                                                  axis=1)
-
-    df_profile_features = df_profile_features.drop(columns=['beach', 'lat', 'lon', 'orientation'])
-    return df_profile_features
+        col_name = "{}_z".format(feat)
+        df_profile_features[col_name] = df_profile_features.apply(
+            lambda row: beach_profile_elevation(row["{}_x".format(feat)], df_profiles, "prestorm", row.name), axis=1
+        )

+    df_profile_features = df_profile_features.drop(columns=["beach", "lat", "lon", "orientation", "profile_x_lat_lon"])
+    return df_profile_features

-if __name__ == '__main__':
-    data_folder = './data/interim'
-    df_sites = pd.read_csv(os.path.join(data_folder, 'sites.csv'), index_col=[0])
-    df_profiles = pd.read_csv(os.path.join(data_folder, 'profiles.csv'), index_col=[0, 1, 2])

-    dune_crest_shp = './data/raw/profile_features/dune_crests.shp'
-    dune_toe_shp = './data/raw/profile_features/dune_toes.shp'
+@click.command(short_help="create .csv of dune toe and crest positions")
+@click.option("--dune-crest-shp", required=True, help=".csv file to convert")
+@click.option("--dune-toe-shp", required=True, help="where to store .shp file")
+@click.option("--sites-csv", required=True, help="where to store .shp file")
+@click.option("--profiles-csv", required=True, help="where to store .shp file")
+@click.option("--output-csv", required=True, help="where to store .shp file")
+def create_profile_features(dune_crest_shp, dune_toe_shp, sites_csv, profiles_csv, output_csv):
+    logger.info("Creating .csv of dune crests and toes")
+    df_sites = pd.read_csv(sites_csv, index_col=[0])
+    df_profiles = pd.read_csv(profiles_csv, index_col=[0, 1, 2])
    df_profile_features = parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp)
-    df_profile_features.to_csv('./data/interim/profile_features.csv')
+    df_profile_features.to_csv(output_csv)
+    logger.info("Done!")
--- a/src/logging.conf
+++ b/src/logging.conf
@ -1,27 +0,0 @@
-[loggers]
-keys=root, matplotlib
-
-[handlers]
-keys=consoleHandler
-
-[formatters]
-keys=simpleFormatter
-
-[logger_root]
-level=DEBUG
-handlers=consoleHandler
-
-[logger_matplotlib]
-level=WARNING
-handlers=consoleHandler
-qualname=matplotlib
-
-[handler_consoleHandler]
-class=StreamHandler
-level=DEBUG
-formatter=simpleFormatter
-args=(sys.stdout,)
-
-[formatter_simpleFormatter]
-format=%(asctime)s %(name)-17s %(levelname)-8s %(message)s
-datefmt=%a, %d %b %Y %H:%M:%S
--- a/src/logging.yaml
+++ b/src/logging.yaml
@ -0,0 +1,51 @@
+---
+version: 1
+disable_existing_loggers: False
+formatters:
+    simple:
+        format: "[%(asctime)s] [%(filename)15.15s:%(lineno)4.4s %(funcName)15.15s] [%(levelname)-4.4s] %(message)s"
+        datefmt: "%Y-%m-%d %H:%M:%S"
+
+handlers:
+    console:
+        class: logging.StreamHandler
+        level: DEBUG
+        formatter: simple
+        stream: ext://sys.stdout
+
+    info_file_handler:
+        class: logging.handlers.RotatingFileHandler
+        level: INFO
+        formatter: simple
+        filename: info.log
+        maxBytes: 10485760 # 10MB
+        backupCount: 3
+        encoding: utf8
+
+    warning_file_handler:
+        class: logging.handlers.RotatingFileHandler
+        level: WARNING
+        formatter: simple
+        filename: warning.log
+        maxBytes: 10485760 # 10MB
+        backupCount: 3
+        encoding: utf8
+
+    error_file_handler:
+        class: logging.handlers.RotatingFileHandler
+        level: ERROR
+        formatter: simple
+        filename: error.log
+        maxBytes: 10485760 # 10MB
+        backupCount: 3
+        encoding: utf8
+
+loggers:
+    my_module:
+        level: ERROR
+        handlers: [console]
+        propagate: no
+
+root:
+    level: DEBUG
+    handlers: [console, info_file_handler, error_file_handler, warning_file_handler]
--- a/src/utils.py
+++ b/src/utils.py
@ -0,0 +1,16 @@
+import logging.config
+import os
+import yaml
+
+
+def setup_logging(path="./src/logging.yaml", default_level=logging.INFO):
+    """
+    Setup logging configuration
+    """
+    if os.path.exists(path):
+        with open(path, "rt") as f:
+            config = yaml.safe_load(f.read())
+        logging.config.dictConfig(config)
+    else:
+        logging.basicConfig(level=default_level)
+    return logging.getLogger(__name__)