Include grain size inputs

Parse cumulative wave power and energy from mat file into .csv
Create new notebook for comparing models
22 changed files with 3890 additions and 650 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -1,3 +1,5 @@
+
+
 *.ipynb filter=nbstripout

 *.ipynb diff=ipynb
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
 # Jupyter NB Checkpoints
 .ipynb_checkpoints/
 /notebooks/*.png
+/notebooks/*.csv


 # exclude data from source control by default
--- a/74
+++ b/74
@ -67,11 +67,12 @@ impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv ./data/inte
 	--profiles-output-file "./data/interim/profiles.csv" \
 	--sites-output-file "./data/interim/sites.csv"

-./data/interim/waves.csv: ./data/interim/sites.csv ./data/raw/processed_shorelines/waves.mat
+./data/interim/waves.csv: ./data/interim/sites.csv ./data/raw/processed_shorelines/waves.mat ./data/raw/processed_shorelines/sites_waves.mat
 	$(PYTHON_CLI) create-waves-csv \
 	--waves-mat "./data/raw/processed_shorelines/waves.mat" \
 	--sites-csv "./data/interim/sites.csv" \
-	--output-file "./data/interim/waves.csv"
+	--waves-output-file "./data/interim/waves.csv" \
+	--sites-waves-output-file "./data/interim/sites_waves.csv"

 ./data/interim/tides.csv: ./data/interim/sites.csv ./data/raw/processed_shorelines/tides.mat
 	$(PYTHON_CLI) create-tides-csv \
@ -85,6 +86,11 @@ impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv ./data/inte
 	--profiles-csv "./data/interim/profiles.csv" \
 	--output-file "./data/interim/profile_features_crest_toes.csv" \

+./data/interim/sites_grain_size.csv : ./data/raw/grain_size/sites_grain_size.csv
+	$(PYTHON_CLI) create-grain-size-csv \
+	--grain-size-csv "./data/raw/grain_size/sites_grain_size.csv" \
+	--output-file "./data/interim/sites_grain_size.csv" \
+

 ### TWLs

@ -110,6 +116,27 @@ impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv ./data/inte
 	--profile-type "prestorm" \
 	--output-file "./data/interim/twl_mean_slope_sto06.csv"

+./data/interim/twl_mean_slope_hol86.csv: ./data/interim/waves.csv ./data/interim/tides.csv ./data/interim/profiles.csv ./data/interim/sites.csv ./data/interim/profile_features_crest_toes.csv
+	$(PYTHON_CLI) create-twl-forecast \
+	--waves-csv "./data/interim/waves.csv" \
+	--tides-csv "./data/interim/tides.csv" \
+	--profiles-csv "./data/interim/profiles.csv" \
+	--profile-features-csv "./data/interim/profile_features_crest_toes.csv" \
+	--runup-function "hol86" \
+	--slope "mean" \
+	--profile-type "prestorm" \
+	--output-file "./data/interim/twl_mean_slope_hol86.csv"
+
+./data/interim/twl_mean_slope_nie91.csv: ./data/interim/waves.csv ./data/interim/tides.csv ./data/interim/profiles.csv ./data/interim/sites.csv ./data/interim/profile_features_crest_toes.csv
+	$(PYTHON_CLI) create-twl-forecast \
+	--waves-csv "./data/interim/waves.csv" \
+	--tides-csv "./data/interim/tides.csv" \
+	--profiles-csv "./data/interim/profiles.csv" \
+	--profile-features-csv "./data/interim/profile_features_crest_toes.csv" \
+	--runup-function "nie91" \
+	--slope "mean" \
+	--profile-type "prestorm" \
+	--output-file "./data/interim/twl_mean_slope_nie91.csv"

 ### IMPACTS

@ -132,10 +159,21 @@ impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv ./data/inte
 	--forecasted-twl-csv "./data/interim/twl_foreshore_slope_sto06.csv" \
 	--output-file "./data/interim/impacts_forecasted_foreshore_slope_sto06.csv"

+./data/interim/impacts_forecasted_mean_slope_hol86.csv: ./data/interim/profile_features_crest_toes.csv ./data/interim/twl_mean_slope_hol86.csv
+	$(PYTHON_CLI) create-forecasted-impacts \
+	--profile-features-csv "./data/interim/profile_features_crest_toes.csv" \
+	--forecasted-twl-csv "./data/interim/twl_mean_slope_hol86.csv" \
+	--output-file "./data/interim/impacts_forecasted_mean_slope_hol86.csv"
+
+./data/interim/impacts_forecasted_mean_slope_nie91.csv: ./data/interim/profile_features_crest_toes.csv ./data/interim/twl_mean_slope_nie91.csv
+	$(PYTHON_CLI) create-forecasted-impacts \
+	--profile-features-csv "./data/interim/profile_features_crest_toes.csv" \
+	--forecasted-twl-csv "./data/interim/twl_mean_slope_nie91.csv" \
+	--output-file "./data/interim/impacts_forecasted_mean_slope_nie91.csv"

 ### GEOJSONs

-geojsons: ./data/interim/impacts_forecasted_mean_slope_sto06.geojson ./data/interim/impacts_forecasted_mean_slope_sto06_R_high.geojson ./data/interim/profile_features_crest_toes.geojson ./data/interim/sites.geojson
+geojsons: ./data/interim/impacts_forecasted_mean_slope_nie91.geojson ./data/interim/impacts_forecasted_mean_slope_nie91_R_high.geojson ./data/interim/impacts_forecasted_mean_slope_hol86.geojson ./data/interim/impacts_forecasted_mean_slope_hol86_R_high.geojson ./data/interim/impacts_forecasted_mean_slope_sto06.geojson ./data/interim/impacts_forecasted_mean_slope_sto06_R_high.geojson ./data/interim/profile_features_crest_toes.geojson ./data/interim/sites.geojson

 ./data/interim/impacts_forecasted_mean_slope_sto06.geojson: ./data/interim/impacts_forecasted_mean_slope_sto06.csv ./data/interim/impacts_observed.csv
 	$(PYTHON_CLI) impacts-to-geojson \
@ -152,6 +190,36 @@ geojsons: ./data/interim/impacts_forecasted_mean_slope_sto06.geojson ./data/inte
 	--impacts-csv "./data/interim/impacts_forecasted_mean_slope_sto06.csv" \
 	--output-geojson "./data/interim/impacts_forecasted_mean_slope_sto06_R_high.geojson"

+./data/interim/impacts_forecasted_mean_slope_hol86.geojson: ./data/interim/impacts_forecasted_mean_slope_hol86.csv ./data/interim/impacts_observed.csv
+	$(PYTHON_CLI) impacts-to-geojson \
+	--sites-csv "./data/interim/sites.csv" \
+	--observed-impacts-csv "./data/interim/impacts_observed.csv" \
+	--forecast-impacts-csv "./data/interim/impacts_forecasted_mean_slope_hol86.csv" \
+	--output-geojson "./data/interim/impacts_forecasted_mean_slope_hol86.geojson"
+
+./data/interim/impacts_forecasted_mean_slope_hol86_R_high.geojson: ./data/interim/impacts_forecasted_mean_slope_hol86.csv
+	$(PYTHON_CLI) r-high-to-geojson \
+	--sites-csv "./data/interim/sites.csv" \
+	--profiles-csv "./data/interim/profiles.csv" \
+	--crest-toes-csv "./data/interim/profile_features_crest_toes.csv" \
+	--impacts-csv "./data/interim/impacts_forecasted_mean_slope_hol86.csv" \
+	--output-geojson "./data/interim/impacts_forecasted_mean_slope_hol86_R_high.geojson"
+
+./data/interim/impacts_forecasted_mean_slope_nie91.geojson: ./data/interim/impacts_forecasted_mean_slope_nie91.csv ./data/interim/impacts_observed.csv
+	$(PYTHON_CLI) impacts-to-geojson \
+	--sites-csv "./data/interim/sites.csv" \
+	--observed-impacts-csv "./data/interim/impacts_observed.csv" \
+	--forecast-impacts-csv "./data/interim/impacts_forecasted_mean_slope_nie91.csv" \
+	--output-geojson "./data/interim/impacts_forecasted_mean_slope_nie91.geojson"
+
+./data/interim/impacts_forecasted_mean_slope_nie91_R_high.geojson: ./data/interim/impacts_forecasted_mean_slope_nie91.csv
+	$(PYTHON_CLI) r-high-to-geojson \
+	--sites-csv "./data/interim/sites.csv" \
+	--profiles-csv "./data/interim/profiles.csv" \
+	--crest-toes-csv "./data/interim/profile_features_crest_toes.csv" \
+	--impacts-csv "./data/interim/impacts_forecasted_mean_slope_nie91.csv" \
+	--output-geojson "./data/interim/impacts_forecasted_mean_slope_nie91_R_high.geojson"
+
 ./data/interim/profile_features_crest_toes.geojson: ./data/interim/profile_features_crest_toes.csv
 	$(PYTHON_CLI) profile-features-crest-toes-to-geojson \
 	--sites-csv "./data/interim/sites.csv" \
--- a/environment.yml
+++ b/environment.yml
@ -4,6 +4,7 @@ channels:
 dependencies:
  - python=3.6
  - attrs
+  - pre-commit
  - autopep8
  - black
  - click
@ -29,11 +30,14 @@ dependencies:
  - python-dateutil
  - pytz
  - pyyaml
+  - psutil
  - requests
  - scikit-learn
  - scipy
  - setuptools
  - shapely
+  - statsmodels
+  - tqdm
  - yaml
  - yapf
  - pip:
--- a/notebooks/01_exploration.ipynb
+++ b/notebooks/01_exploration.ipynb
@ -611,7 +611,46 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "df_sites.site_no.to_csv('temp.csv')"
+    "# Get twl information at maximum R_high for a site\n",
+    "# site_id = 'NARRA0008'\n",
+    "site_id = 'NARRA0012'\n",
+    "\n",
+    "print('TWLs:')\n",
+    "t = twls['forecasted']['mean_slope_sto06'].xs(site_id,level='site_id').R_high.idxmax()\n",
+    "print(twls['forecasted']['mean_slope_sto06'].loc[(site_id, t)])\n",
+    "\n",
+    "print('\\nforecast regime:')\n",
+    "print(impacts['forecasted']['mean_slope_sto06'].loc[site_id])\n",
+    "\n",
+    "print('\\nobserved regime:')\n",
+    "print(impacts['observed'].loc[site_id])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "site_id = 'NARRA0010'\n",
+    "\n",
+    "print(site_id)\n",
+    "# Storm duration\n",
+    "Hs0_storm = 3.0\n",
+    "df_twls = twls['forecasted']['mean_slope_sto06'].xs(site_id,level='site_id')\n",
+    "t_start = np.argmax(df_twls.Hs0 > Hs0_storm)\n",
+    "t_end = np.argmax((df_twls.Hs0 > Hs0_storm)[::-1])\n",
+    "i_start = df_twls.index.get_loc(t_start)\n",
+    "i_end = df_twls.index.get_loc(t_end)\n",
+    "\n",
+    "df_storm = df_twls.iloc[i_start:i_end]\n",
+    "print('Storm length: {} hrs'.format(len(df_storm)))\n",
+    "\n",
+    "# Get hours above a certain elevation\n",
+    "z_critical = 2.4\n",
+    "\n",
+    "n_impact_hrs = np.sum(df_storm.R_high >z_critical)\n",
+    "print('Number of hours before peak water level with R_high > {}m: {}hrs'.format(z_critical,n_impact_hrs))\n"
   ]
  },
  {
@ -879,10 +918,10 @@
    "height": "656px",
    "left": "508px",
    "top": "90px",
-    "width": "218.797px"
+    "width": "282.797px"
   },
   "toc_section_display": true,
-   "toc_window_display": true
+   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
--- a/notebooks/04b_profile_picker.ipynb
+++ b/notebooks/04b_profile_picker.ipynb
--- a/notebooks/04c_profile_picker_manual.ipynb
+++ b/notebooks/04c_profile_picker_manual.ipynb
@ -0,0 +1,242 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Profile picker"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "heading_collapsed": true
+   },
+   "source": [
+    "## Setup notebook"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "hidden": true
+   },
+   "outputs": [],
+   "source": [
+    "# Enable autoreloading of our modules. \n",
+    "# Most of the code will be located in the /src/ folder, \n",
+    "# and then called from the notebook.\n",
+    "%matplotlib inline\n",
+    "%reload_ext autoreload\n",
+    "%autoreload"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "hidden": true
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.debugger import set_trace\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import decimal\n",
+    "import plotly\n",
+    "import plotly.graph_objs as go\n",
+    "import plotly.plotly as py\n",
+    "import plotly.tools as tls\n",
+    "import plotly.figure_factory as ff\n",
+    "from plotly import tools\n",
+    "import plotly.io as pio\n",
+    "from scipy import stats\n",
+    "import math\n",
+    "import matplotlib\n",
+    "from matplotlib import cm\n",
+    "import colorlover as cl\n",
+    "import numpy.ma as ma\n",
+    "\n",
+    "from ipywidgets import widgets, Output\n",
+    "from IPython.display import display, clear_output, Image, HTML\n",
+    "\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "\n",
+    "import numpy as np\n",
+    "from matplotlib import pyplot as plt\n",
+    "\n",
+    "from sklearn import linear_model, datasets\n",
+    "\n",
+    "from scipy.interpolate import UnivariateSpline\n",
+    "from scipy.interpolate import interp1d\n",
+    "from scipy.interpolate import splrep, splev\n",
+    "from scipy.integrate import simps\n",
+    "from scipy.stats import linregress\n",
+    "from scipy.signal import find_peaks\n",
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "hidden": true
+   },
+   "outputs": [],
+   "source": [
+    "# Matplot lib default settings\n",
+    "plt.rcParams[\"figure.figsize\"] = (10,6)\n",
+    "plt.rcParams['axes.grid']=True\n",
+    "plt.rcParams['grid.alpha'] = 0.5\n",
+    "plt.rcParams['grid.color'] = \"grey\"\n",
+    "plt.rcParams['grid.linestyle'] = \"--\"\n",
+    "plt.rcParams['axes.grid']=True\n",
+    "\n",
+    "# https://stackoverflow.com/a/20709149\n",
+    "matplotlib.rcParams['text.usetex'] = True\n",
+    "\n",
+    "matplotlib.rcParams['text.latex.preamble'] = [\n",
+    "       r'\\usepackage{siunitx}',   # i need upright \\micro symbols, but you need...\n",
+    "       r'\\sisetup{detect-all}',   # ...this to force siunitx to actually use your fonts\n",
+    "       r'\\usepackage{helvet}',    # set the normal font here\n",
+    "       r'\\usepackage{amsmath}',\n",
+    "       r'\\usepackage{sansmath}',  # load up the sansmath so that math -> helvet\n",
+    "       r'\\sansmath',              # <- tricky! -- gotta actually tell tex to use!\n",
+    "]  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import data\n",
+    "Let's first import data from our pre-processed interim data folder."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
+    "    print('Importing {}'.format(csv))\n",
+    "    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
+    "\n",
+    "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
+    "df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0,1])\n",
+    "\n",
+    "print('Done!')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Manually pick features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib notebook\n",
+    "\n",
+    "sites = df_profiles.index.get_level_values('site_id').unique()\n",
+    "\n",
+    "\n",
+    "fig =plt.figure(figsize=(10, 3))\n",
+    "\n",
+    "df_prestorm = df_profiles.xs((sites[0],'prestorm'),level=('site_id','profile_type'))\n",
+    "df_poststorm = df_profiles.xs((sites[0],'poststorm'),level=('site_id','profile_type'))\n",
+    "line_prestorm, = plt.plot(df_prestorm.index, df_prestorm.z, label='prestorm')\n",
+    "line_poststorm, = plt.plot(df_prestorm.index, df_prestorm.z, label='poststorm')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# df_profiles.xs((sites[0],'prestorm'),level=('site_id','profile_type'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "hide_input": false,
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/05_twl_exceedence.ipynb
+++ b/notebooks/05_twl_exceedence.ipynb
@ -52,11 +52,41 @@
    "import matplotlib\n",
    "from matplotlib import cm\n",
    "import colorlover as cl\n",
-    "\n",
+    "from tqdm import tqdm_notebook\n",
    "from ipywidgets import widgets, Output\n",
    "from IPython.display import display, clear_output, Image, HTML\n",
+    "from scipy import stats\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "import matplotlib.pyplot as plt\n",
+    "from scipy.interpolate import interp1d"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Matplot lib default settings\n",
+    "plt.rcParams[\"figure.figsize\"] = (10,6)\n",
+    "plt.rcParams['axes.grid']=True\n",
+    "plt.rcParams['grid.alpha'] = 0.5\n",
+    "plt.rcParams['grid.color'] = \"grey\"\n",
+    "plt.rcParams['grid.linestyle'] = \"--\"\n",
+    "\n",
    "\n",
-    "from sklearn.metrics import confusion_matrix"
+    "# https://stackoverflow.com/a/20709149\n",
+    "matplotlib.rcParams['text.usetex'] = True\n",
+    "matplotlib.rcParams['font.family'] = 'sans-serif'\n",
+    "matplotlib.rcParams['font.sans-serif'] = 'Helvetica'\n",
+    "\n",
+    "matplotlib.rcParams['text.latex.preamble'] = [\n",
+    "       r'\\usepackage{siunitx}',   # i need upright \\micro symbols, but you need...\n",
+    "       r'\\sisetup{detect-all}',   # ...this to force siunitx to actually use your fonts\n",
+    "       r'\\usepackage{helvet}',    # set the normal font here\n",
+    "       r'\\usepackage{sansmath}',  # load up the sansmath so that math -> helvet\n",
+    "       r'\\sansmath'               # <- tricky! -- gotta actually tell tex to use!\n",
+    "]  "
   ]
  },
  {
@ -105,8 +135,8 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Calculate vertical distribution of wave count\n",
-    "For each site, calculate how many waves reached a certain elevation (store as a binned histogram)."
+    "## Calculate impact hours\n",
+    "- For each site_id, determine the R2 elevation."
   ]
  },
  {
@ -115,11 +145,236 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# Helper functions\n",
-    "def find_nearest(array, value):\n",
-    "    array = np.asarray(array)\n",
-    "    idx = np.nanargmin((np.abs(array - value)))\n",
-    "    return array[idx], idx"
+    "# Create figure to plot results\n",
+    "fig = tools.make_subplots(\n",
+    "    rows=2,\n",
+    "    cols=2,\n",
+    "    specs=[[{}, {}], [{}, {}]],\n",
+    "    subplot_titles=('Swash/Swash', 'Swash/Collision', 'Collision/Swash',\n",
+    "                    'Collision/Collision'),\n",
+    "    shared_xaxes=True,\n",
+    "    shared_yaxes=True,\n",
+    "    horizontal_spacing=0.05,\n",
+    "    vertical_spacing=0.1,\n",
+    "    print_grid=False)\n",
+    "\n",
+    "# Iterate through each site\n",
+    "print('Calculating cumulate frequency of R_high for each site:')\n",
+    "site_ids = twls['forecasted']['mean_slope_sto06'].index.get_level_values(\n",
+    "    'site_id').unique().values\n",
+    "for site_id in tqdm_notebook(site_ids):\n",
+    "\n",
+    "    # Put data into a temporary dataframe, shorter name is easier to work with\n",
+    "    df_impacts = impacts['forecasted']['mean_slope_sto06'].loc[site_id]\n",
+    "    df_twls = twls['forecasted']['mean_slope_sto06'].loc[site_id]\n",
+    "\n",
+    "    D_low = df_impacts.dune_toe_z\n",
+    "    if np.isnan(D_low):\n",
+    "        continue\n",
+    "\n",
+    "    # Get R_high elevations minus dune toe\n",
+    "    R_high_ts = df_twls.R_high.dropna().values\n",
+    "    R_high_D_low_ts = R_high_ts - D_low\n",
+    "\n",
+    "    # Get SWL minus dune toe\n",
+    "    SWL_D_low_ts = df_twls['tide'].dropna().values - D_low\n",
+    "    DSWL_D_low_ts = (df_twls['tide'] + df_twls['setup']).dropna().values - D_low\n",
+    "\n",
+    "    # Get cumulative freq\n",
+    "    cumfreq = stats.cumfreq(R_high_D_low_ts, numbins=100)\n",
+    "#     cumfreq = stats.cumfreq(DSWL_D_low_ts, numbins=100)\n",
+    "\n",
+    "    # Calculate space of values for x\n",
+    "    bin_vals = cumfreq.lowerlimit + np.linspace(\n",
+    "        0, cumfreq.binsize * cumfreq.cumcount.size, cumfreq.cumcount.size)\n",
+    "\n",
+    "    # Check which subplot we should put this site on\n",
+    "    forecasted_regime = impacts['forecasted']['mean_slope_sto06'].loc[\n",
+    "        site_id].storm_regime\n",
+    "    observed_regime = impacts['observed'].loc[site_id].storm_regime\n",
+    "\n",
+    "    if forecasted_regime == 'swash' and observed_regime == 'swash':\n",
+    "        x_col = 1\n",
+    "        y_col = 1\n",
+    "    elif forecasted_regime == 'collision' and observed_regime == 'collision':\n",
+    "        x_col = 2\n",
+    "        y_col = 2\n",
+    "    elif forecasted_regime == 'swash' and observed_regime == 'collision':\n",
+    "        x_col = 2\n",
+    "        y_col = 1\n",
+    "    elif forecasted_regime == 'collision' and observed_regime == 'swash':\n",
+    "        x_col = 1\n",
+    "        y_col = 2\n",
+    "    else:\n",
+    "        continue\n",
+    "\n",
+    "    fig.append_trace(\n",
+    "        go.Scattergl(\n",
+    "            x=bin_vals,\n",
+    "            y=[max(cumfreq.cumcount) - x for x in cumfreq.cumcount],\n",
+    "            name=site_id,\n",
+    "            line=dict(\n",
+    "                color=('rgba(22, 22, 22, 0.2)'),\n",
+    "                width=0.5,\n",
+    "            )), x_col, y_col)\n",
+    "\n",
+    "print('Finalizing plot:')\n",
+    "# Change some formatting for the plot\n",
+    "layout = go.Layout(\n",
+    "    xaxis=dict(domain=[0, 0.45]),\n",
+    "    yaxis=dict(\n",
+    "        domain=[0, 0.45],\n",
+    "        type='log',\n",
+    "    ),\n",
+    "    xaxis2=dict(domain=[0.55, 1]),\n",
+    "    xaxis4=dict(domain=[0.55, 1], anchor='y4'),\n",
+    "    yaxis3=dict(\n",
+    "        domain=[0.55, 1],\n",
+    "        type='log',\n",
+    "    ),\n",
+    "    yaxis4=dict(\n",
+    "        domain=[0.55, 1],\n",
+    "        anchor='x4',\n",
+    "        type='log',\n",
+    "    ))\n",
+    "\n",
+    "fig['layout'].update(\n",
+    "    showlegend=False,\n",
+    "    title='Impact hours',\n",
+    "    height=800,\n",
+    ")\n",
+    "\n",
+    "for ax in ['yaxis', 'yaxis2']:\n",
+    "#     fig['layout'][ax]['range'] = [0, 400]\n",
+    "    fig['layout'][ax]['range'] = [0, 15]\n",
+    "\n",
+    "for ax in ['xaxis', 'xaxis2']:\n",
+    "#     fig['layout'][ax]['range'] = [-2.5, 2.5]\n",
+    "    fig['layout'][ax]['range'] = [-1, 1]\n",
+    "\n",
+    "fig['layout']['xaxis'].update(title='R_high - D_low')\n",
+    "fig['layout']['xaxis2'].update(title='R_high - D_low')\n",
+    "fig['layout']['yaxis'].update(title='No. of Hours')\n",
+    "fig['layout']['yaxis2'].update(title='No. of Hours')\n",
+    "\n",
+    "# pio.write_image(fig, 'fig2.png')\n",
+    "\n",
+    "go.FigureWidget(fig)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This gives an overview of the distribution of impact hours. Try to calculate the confidence interval bounds for each prediction/observed combination.\n",
+    "\n",
+    "The following cell looks at combining all the observations from each CDF into one large CDF and calculating a confidence interval from it, but I'm not sure if this is a valid method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "code_folding": [
+     3
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "from statsmodels.distributions.empirical_distribution import ECDF\n",
+    "from statsmodels.distributions.empirical_distribution import _conf_set\n",
+    "\n",
+    "df_twls = twls['forecasted']['mean_slope_sto06']\n",
+    "df_forecasted_impacts = impacts['forecasted']['mean_slope_sto06']\n",
+    "df_observed_impacts = impacts['observed']\n",
+    "\n",
+    "plt.figure(figsize=(6,8))\n",
+    "\n",
+    "# Do some data rearranging and joining to make it easier\n",
+    "df_joined = df_twls.reset_index()\n",
+    "df_joined = df_joined.set_index('site_id')\n",
+    "df_joined = df_joined.merge(\n",
+    "    df_observed_impacts[['storm_regime']],\n",
+    "    left_on='site_id',\n",
+    "    right_on='site_id').rename({\n",
+    "        'storm_regime': 'observed_regime'\n",
+    "    },\n",
+    "                               axis='columns')\n",
+    "df_joined = df_joined.merge(\n",
+    "    df_forecasted_impacts[['storm_regime', 'dune_toe_z']],\n",
+    "    left_on='site_id',\n",
+    "    right_on='site_id').rename({\n",
+    "        'storm_regime': 'forecasted_regime'\n",
+    "    },\n",
+    "                               axis='columns')\n",
+    "\n",
+    "regime_combinations = [\n",
+    "    ('swash', 'swash', '#2b83ba'),\n",
+    "    ('collision', 'swash', '#abdda4'),\n",
+    "    ('swash', 'collision', '#fdae61'),\n",
+    "    ('collision', 'collision', '#d7191c'),\n",
+    "]\n",
+    "\n",
+    "for comb in regime_combinations:\n",
+    "\n",
+    "    observed_regime = comb[0]\n",
+    "    forecasted_regime = comb[1]\n",
+    "    color = comb[2]\n",
+    "\n",
+    "    # Get values of elevation difference to plot\n",
+    "    query = '(observed_regime==\"{}\") & (forecasted_regime==\"{}\")'.format(\n",
+    "        observed_regime, forecasted_regime)\n",
+    "    df = df_joined.query(query)\n",
+    "    R_high_D_low = (df.R_high - df.dune_toe_z).values\n",
+    "    R_high_D_low = R_high_D_low[~np.isnan(R_high_D_low)]\n",
+    "\n",
+    "    ecdf = ECDF(R_high_D_low)\n",
+    "\n",
+    "    y = ecdf.y\n",
+    "    lower, upper = _conf_set(y, alpha=0.05)\n",
+    "    x = ecdf.x\n",
+    "\n",
+    "    avg_hrs = df.groupby('site_id').count().R_high.mean()\n",
+    "    y = [avg_hrs - v * avg_hrs for v in y]\n",
+    "    lower = [avg_hrs - v * avg_hrs for v in lower]\n",
+    "    upper = [avg_hrs - v * avg_hrs for v in upper]\n",
+    "\n",
+    "    plt.step(\n",
+    "        x,\n",
+    "        y,\n",
+    "        color=color,\n",
+    "        label='Pred={}, Obs={}'.format(forecasted_regime, observed_regime))\n",
+    "    plt.fill_between(\n",
+    "        x, y, upper, color='grey', alpha=0.2, interpolate=False, step='pre')\n",
+    "    plt.fill_between(\n",
+    "        x, y, lower, color='grey', alpha=0.2, interpolate=False, step='pre')\n",
+    "\n",
+    "# # Plot for checking\n",
+    "\n",
+    "plt.title('Empirical CDF with 95\\% confidence intervals')\n",
+    "plt.xlabel('$R_{high} - D_{low} (m)$')\n",
+    "plt.ylabel('Hours of Elevation Exceedence')\n",
+    "plt.xlim([-1, 1])\n",
+    "plt.ylim([0, 25])\n",
+    "plt.legend(loc='best')\n",
+    "\n",
+    "# Print to figure\n",
+    "plt.savefig('05-empirical-cdf.png', dpi=600, bbox_inches='tight') \n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The plot above shows:\n",
+    "- collision if R_high - D_low > 0.25 m for 6 hours\n",
+    "- swash if R_high - D_low < -0.8m for 7 hours\n",
+    "\n",
+    "additionaly:\n",
+    "- collision if R_high > D_low for more than 10 hours\n",
+    "    \n",
+    "Let's test how these new critera would perform."
   ]
  },
  {
@ -128,7 +383,68 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "df_profile_features_crest_toes.loc[(site_id,'prestorm'),'dune_toe_z']"
+    "# Calculate elevation exceedence for each hours we are interested in\n",
+    "ele_exceedence_6hr = twls['forecasted']['mean_slope_sto06'].sort_values(['R_high'],ascending=False).groupby('site_id').R_high.nth(6-1).rename('ele_exceedence_6hr')\n",
+    "\n",
+    "ele_exceedence_7hr = twls['forecasted']['mean_slope_sto06'].sort_values(['R_high'],ascending=False).groupby('site_id').R_high.nth(7-1).rename('ele_exceedence_7hr')\n",
+    "\n",
+    "\n",
+    "ele_exceedence_2hr = twls['forecasted']['mean_slope_sto06'].sort_values(['R_high'],ascending=False).groupby('site_id').R_high.nth(2-1).rename('ele_exceedence_2hr')\n",
+    "\n",
+    "ele_exceedence_1hr = twls['forecasted']['mean_slope_sto06'].sort_values(['R_high'],ascending=False).groupby('site_id').R_high.nth(0).rename('ele_exceedence_1hr')\n",
+    "\n",
+    "\n",
+    "# Get our dune toes\n",
+    "dune_toes = df_profile_features_crest_toes.xs('prestorm',level='profile_type')['dune_toe_z']\n",
+    "\n",
+    "# Get our observed regimes\n",
+    "observed_regime = impacts['observed'].storm_regime.rename('observed_regime')\n",
+    "\n",
+    "# Concat into one data frame\n",
+    "df = pd.concat([dune_toes, ele_exceedence_6hr, ele_exceedence_7hr, ele_exceedence_1hr, ele_exceedence_2hr, observed_regime],axis=1)\n",
+    "\n",
+    "# Get predicted regime based on old criteria\n",
+    "df.loc[df.ele_exceedence_1hr < df.dune_toe_z, 'forecasted_regime'] = 'swash'\n",
+    "df.loc[df.ele_exceedence_1hr > df.dune_toe_z , 'forecasted_regime'] = 'collision'\n",
+    "\n",
+    "\n",
+    "regime_combinations = [\n",
+    "    ('swash','swash'),\n",
+    "    ('collision','swash'),\n",
+    "    ('swash','collision'),\n",
+    "    ('collision','collision'),\n",
+    "]\n",
+    "\n",
+    "print('Original')\n",
+    "for comb in regime_combinations:\n",
+    "    query = 'forecasted_regime==\"{}\" & observed_regime==\"{}\"'.format(comb[0], comb[1])\n",
+    "    print('Forecasted: {}, Observed: {}, Count: {}'.format(comb[0], comb[1], len(df.query(query))))\n",
+    "\n",
+    "\n",
+    "# Get predicted regime based on our new criteria\n",
+    "\n",
+    "adjust_swash_criteria = (df.forecasted_regime == 'swash') & (df.ele_exceedence_7hr - df.dune_toe_z > -0.8)\n",
+    "adjust_collision_criteria = (df.forecasted_regime == 'collision') & (df.ele_exceedence_6hr - df.dune_toe_z < 0.25)\n",
+    "df.loc[adjust_swash_criteria, 'forecasted_regime'] = 'collision'\n",
+    "df.loc[adjust_collision_criteria, 'forecasted_regime'] = 'swash'\n",
+    "\n",
+    "# df.loc[(df.ele_exceedence_1hr - df.dune_toe_z <= -0.15  ),'forecasted_regime'] = 'swash'\n",
+    "# df.loc[(df.ele_exceedence_1hr - df.dune_toe_z > -0.15  ),'forecasted_regime'] = 'collision'\n",
+    "\n",
+    "\n",
+    "print('\\nAfter adjustment')\n",
+    "for comb in regime_combinations:\n",
+    "    query = 'forecasted_regime==\"{}\" & observed_regime==\"{}\"'.format(comb[0], comb[1])\n",
+    "    print('Forecasted: {}, Observed: {}, Count: {}'.format(comb[0], comb[1], len(df.query(query))))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Looking at the adjusted values, we can see these criteria actually make it worse. There must be something wrong with the technique - maybe the way of calculating the confidence intervals is wrong? Let's try calculate confidence intervals for each regime combination.\n",
+    "\n",
+    "*This cell I don't think is used...*\n"
   ]
  },
  {
@ -136,6 +452,510 @@
   "execution_count": null,
   "metadata": {},
   "outputs": [],
+   "source": [
+    "def mean_confidence_interval(data, confidence=0.95):\n",
+    "    a = 1.0 * np.array(data)\n",
+    "    n = len(a)\n",
+    "    m, se = np.mean(a), stats.sem(a)\n",
+    "    h = se * stats.t.ppf((1 + confidence) / 2., n-1)\n",
+    "    return m, m-h, m+h\n",
+    "\n",
+    "# Add columns indicating how many n hrs was this the largest record\n",
+    "df = twls['forecasted']['mean_slope_sto06'].sort_values(['R_high'],ascending=False)\n",
+    "df['n_hrs_largest']= df.groupby('site_id').cumcount()+1\n",
+    "\n",
+    "# Join observed and forecast impacts and dune toe elevation\n",
+    "observed_regime = impacts['observed'].storm_regime.rename('observed_regime').to_frame()\n",
+    "forecasted_regime = impacts['forecasted']['mean_slope_sto06'].storm_regime.rename('forecasted_regime').to_frame()\n",
+    "dune_info = df_profile_features_crest_toes.xs('prestorm', level='profile_type')\n",
+    "\n",
+    "df['datetime'] = df.index.get_level_values('datetime')\n",
+    "df = df.merge(observed_regime,left_on=['site_id'],right_on='site_id')\n",
+    "df = df.merge(forecasted_regime,left_on=['site_id'],right_on='site_id')\n",
+    "df = df.merge(dune_info,left_on=['site_id'],right_on='site_id')\n",
+    "\n",
+    "# Make new column for R_high minus D_low\n",
+    "df['R_high_D_low_diff'] = df.R_high - df.dune_toe_z\n",
+    "\n",
+    "\n",
+    "regime_combinations = [\n",
+    "    ('swash','swash'),\n",
+    "    ('swash','collision'),\n",
+    "    ('collision','swash'),\n",
+    "    ('collision','collision'),\n",
+    "]\n",
+    "\n",
+    "print('Calculating hr exceedence elevations for each combination:')\n",
+    "exceedence_data = []\n",
+    "for hr in tqdm_notebook([x for x in range(1,101)]):\n",
+    "    \n",
+    "    for comb in regime_combinations:\n",
+    "        \n",
+    "        vals = df.loc[(df.n_hrs_largest==hr) & (df.observed_regime==comb[0]) & (df.forecasted_regime==comb[1])].R_high_D_low_diff.dropna().values\n",
+    "    \n",
+    "        ci = mean_confidence_interval(vals)\n",
+    "\n",
+    "        exceedence_data.append({\n",
+    "            'observed_regime': comb[0],\n",
+    "            'forecasted_regime': comb[1],\n",
+    "            'exceedence_hr': hr,\n",
+    "            'ci_mean': ci[0],\n",
+    "            'ci_lower': ci[1],\n",
+    "            'ci_upper': ci[2],\n",
+    "        })\n",
+    "        \n",
+    "  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's try a different apporach and try split the observed swash and collision regimes at each impact duration hour. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy.stats import norm\n",
+    "\n",
+    "best_split = []\n",
+    "exceedence_hrs = []\n",
+    "swash_mean = []\n",
+    "swash_95_lower = []\n",
+    "swash_95_upper = []\n",
+    "collision_mean = []\n",
+    "collision_95_lower = []\n",
+    "collision_95_upper = []\n",
+    "swash_median = []\n",
+    "swash_q1 = []\n",
+    "swash_q3 = []\n",
+    "collision_median = []\n",
+    "collision_q1 = []\n",
+    "collision_q3 = []\n",
+    "\n",
+    "for hr in tqdm_notebook([x for x in range(1,101)]):\n",
+    "    \n",
+    "    dists = []\n",
+    "    plt.figure(figsize=(10,2))\n",
+    "    for observed_regime in ['swash','collision']:\n",
+    "        \n",
+    "        vals = df.loc[(df.n_hrs_largest==hr) &\n",
+    "                      (df.observed_regime==observed_regime)].R_high_D_low_diff.dropna().values\n",
+    "      \n",
+    "        if observed_regime =='collision':\n",
+    "            color = 'red'\n",
+    "            label='collision'\n",
+    "        else:\n",
+    "            color = 'blue'\n",
+    "            label='swash'\n",
+    "            \n",
+    "        plt.hist(vals, bins='auto',color=color, alpha=0.5,label=label)  \n",
+    "        plt.title(\"{} hour exceedence TWL\".format(hr))\n",
+    "        plt.xlim([-2.5,2.5])\n",
+    "        \n",
+    "        dists.append(norm.fit(vals))\n",
+    "  \n",
+    "    # Find which elevation best splits swash and collision\n",
+    "#     eles = [x for x in np.linspace(-2,2,1000)]\n",
+    "#     total_cdfs = []\n",
+    "#     for ele in eles:\n",
+    "#         swash_cdf = norm.cdf(ele,*dists[0])\n",
+    "#         collision_cdf = 1 - norm.cdf(ele,*dists[1])\n",
+    "#         total_cdfs.append(swash_cdf + collision_cdf)\n",
+    "\n",
+    "#     i_max = np.argmax(total_cdfs)\n",
+    "#     best_ele = eles[i_max]\n",
+    "\n",
+    "#     exceedence_hrs.append(hr)\n",
+    "#     best_split.append(best_ele)\n",
+    "\n",
+    "    # Find which elevation best splits swash and collision\n",
+    "    eles = [x for x in np.linspace(-2,2,100)]\n",
+    "    total_cdfs = []\n",
+    "    swash_vals = df.loc[(df.n_hrs_largest==hr) &\n",
+    "                  (df.observed_regime=='swash')].R_high_D_low_diff.dropna().values\n",
+    "    collision_vals = df.loc[(df.n_hrs_largest==hr) &\n",
+    "                  (df.observed_regime=='collision')].R_high_D_low_diff.dropna().values\n",
+    "    for ele in eles:\n",
+    "        swash_samples = np.sum( swash_vals < ele) / len(swash_vals)\n",
+    "        collision_samples = np.sum( collision_vals > ele) / len(collision_vals)        \n",
+    "        total_cdfs.append(swash_samples + collision_samples)\n",
+    "    \n",
+    "    i_max = np.argmax(total_cdfs)\n",
+    "    best_ele = eles[i_max]\n",
+    "\n",
+    "    exceedence_hrs.append(hr)\n",
+    "    best_split.append(best_ele)    \n",
+    "    \n",
+    "    \n",
+    "    # Store stastistics\n",
+    "    swash_mean.append(dists[0][0])\n",
+    "    swash_95_lower.append(norm.interval(0.5, *dists[0])[0])\n",
+    "    swash_95_upper.append(norm.interval(0.5, *dists[0])[1])\n",
+    "    collision_mean.append(dists[1][0])\n",
+    "    collision_95_lower.append(norm.interval(0.5, *dists[1])[0])\n",
+    "    collision_95_upper.append(norm.interval(0.5, *dists[1])[1])\n",
+    "    \n",
+    "    swash_median.append(np.percentile(swash_vals, 50))\n",
+    "    swash_q1.append(np.percentile(swash_vals, 25))\n",
+    "    swash_q3.append(np.percentile(swash_vals, 75))\n",
+    "    collision_median.append(np.percentile(collision_vals, 50))\n",
+    "    collision_q1.append(np.percentile(collision_vals, 25))\n",
+    "    collision_q3.append(np.percentile(collision_vals, 75))    \n",
+    "    \n",
+    "    plt.axvline(best_ele, label='Best split (x={:.2f} m)'.format(best_ele))\n",
+    "    plt.legend(loc='upper right', prop={'size': 10} )\n",
+    "    plt.xlabel('$R_{high} - D_{low}$')\n",
+    "    plt.ylabel('No. of sites')\n",
+    "    plt.xlim([-2,2])\n",
+    "    if hr == 80 or hr < 5 or hr==90:\n",
+    "        plt.show()\n",
+    "        \n",
+    "    plt.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, let's plot our distributions for swash/collision and the best seperation between them."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(5,5))\n",
+    "plt.plot(best_split,exceedence_hrs, label='Best split', color='#000000', linestyle='--')\n",
+    "\n",
+    "# plt.plot(swash_mean, exceedence_hrs, label='Swash', color='#2b83ba')\n",
+    "# plt.fill_betweenx(exceedence_hrs,swash_95_lower,swash_95_upper, color='#2b83ba', alpha=0.2, interpolate=False)\n",
+    "\n",
+    "# plt.plot(collision_mean, exceedence_hrs, label='Collision', color='#d7191c')\n",
+    "# plt.fill_betweenx(exceedence_hrs,collision_95_lower,collision_95_upper, color='#d7191c', alpha=0.2, interpolate=False,label='plus 50')\n",
+    "\n",
+    "\n",
+    "plt.plot(swash_median, exceedence_hrs, label='Swash', color='#2b83ba')\n",
+    "plt.fill_betweenx(exceedence_hrs,swash_q1,swash_q3, color='#2b83ba', alpha=0.2, interpolate=False,label='Swash IQR')\n",
+    "\n",
+    "plt.plot(collision_median, exceedence_hrs, label='Collision', color='#d7191c')\n",
+    "plt.fill_betweenx(exceedence_hrs,collision_q1,collision_q3, color='#d7191c', alpha=0.2, interpolate=False,label='Collision IQR')\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "#===\n",
+    "# # Let's plot one site as well, just to check\n",
+    "# import random\n",
+    "# site_ids = list(impacts['observed'].index.unique().values)\n",
+    "# site_id = random.choice(site_ids)\n",
+    "\n",
+    "\n",
+    "# site_id = 'TREACH0011'\n",
+    "# site_predicted_regime = impacts['forecasted']['mean_slope_sto06'].loc[site_id].storm_regime\n",
+    "# site_observed_regime = impacts['observed'].loc[site_id].storm_regime\n",
+    "# df_site = df.loc[site_id]\n",
+    "# plt.plot(df_site.R_high_D_low_diff, df_site.n_hrs_largest,label='site_id={}\\n(pred={},obs={})'.format(site_id,site_predicted_regime, site_observed_regime),color='#ffffff', linestyle='--')\n",
+    "\n",
+    "\n",
+    "plt.title('Observed Swash/Collision - Best Split')\n",
+    "plt.xlabel('$R_{high} - D_{low}$ (m)')\n",
+    "plt.ylabel('Exceedance hours')\n",
+    "plt.ylim([0,100])\n",
+    "plt.xlim([-2,2])\n",
+    "plt.legend()\n",
+    "\n",
+    "# Print to figure\n",
+    "plt.savefig('05-best-split.png', dpi=600, bbox_inches='tight') \n",
+    "\n",
+    "plt.show()\n",
+    "plt.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Plot above shows that if Rhigh = Dlow plus/minus 0.25m, we should say the storm regime is uncertain, rather than trying to make an incorrect prediction."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_for = impacts['forecasted']['mean_slope_sto06']\n",
+    "df_obs = impacts['observed']\n",
+    "\n",
+    "# Join forecasted and observed impacts into same dataframe\n",
+    "df_for = df_for.rename(columns={'storm_regime': 'forecasted_regime'})\n",
+    "df_obs = df_obs.rename(columns={'storm_regime': 'observed_regime'})\n",
+    "df_for = df_for.merge(\n",
+    "    df_obs.observed_regime.to_frame(), left_index=True, right_index=True)\n",
+    "\n",
+    "# Get wrong forecasts\n",
+    "incorrect_for = df_for.forecasted_regime != df_for.observed_regime\n",
+    "\n",
+    "# How many wrong/correct forecasts\n",
+    "print('There were {} correct forecasts'.format(len(df_for[~incorrect_for])))\n",
+    "print('There were {} incorrect forecasts'.format(len(df_for[incorrect_for])))\n",
+    "print('')\n",
+    "\n",
+    "# How many of these forecasts were where R_high was near D_low?\n",
+    "close_eles = ((df.R_high > df.dune_toe_z - 0.25) &\n",
+    "              (df.R_high < df.dune_toe_z + 0.25))\n",
+    "\n",
+    "s = 'R_high and D_low elevations were close at {} correctly forecasted sites'\n",
+    "print(s.format(len(df_for[~incorrect_for & close_eles])))\n",
+    "\n",
+    "s = 'R_high and D_low elevations were close at {} wrongly forecasted sites'\n",
+    "print(s.format(len(df_for[incorrect_for & close_eles])))\n",
+    "\n",
+    "# df[(df.R_high>df.dune_toe_z-0.25)&(df.R_high<df.dune_toe_z+0.25)]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "So we can see more than half the number of incorrect predictions by saying they're unknown, but a quarter of correct predictions will say they're unknown."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# df_exceedence = pd.DataFrame(exceedence_data)\n",
+    "# df_exceedence = df_exceedence.set_index(['observed_regime','forecasted_regime','exceedence_hr'])\n",
+    "\n",
+    "# import random\n",
+    "# site_ids = list(impacts['observed'].index.unique().values)\n",
+    "# for site_id in random.sample(site_ids, 5):\n",
+    "\n",
+    "#     # Plot mean ele exceedence hours for each combination\n",
+    "#     plt.figure(figsize=(10,4))\n",
+    "#     regime_combinations = [\n",
+    "#         ('swash','swash'),\n",
+    "#         ('swash','collision'),\n",
+    "#         ('collision','swash'),\n",
+    "#         ('collision','collision'),\n",
+    "#     ]\n",
+    "\n",
+    "#     for comb in regime_combinations:\n",
+    "#         df_plot = df_exceedence.xs((comb[0], comb[1]), level=['observed_regime','forecasted_regime'])\n",
+    "#         plt.plot(df_plot.ci_mean, df_plot.index.values,label='obs={}, pred={}'.format(comb[0],comb[1]))\n",
+    "#         plt.fill_betweenx(df_plot.index.values, df_plot.ci_lower, df_plot.ci_upper, color='grey', alpha=0.2, interpolate=False)\n",
+    "\n",
+    "#     plt.xlim([-2,1])\n",
+    "#     plt.ylim([0,100])\n",
+    "\n",
+    "#     # Let's plot one site as well, just to check\n",
+    "#     site_predicted_regime = impacts['forecasted']['mean_slope_sto06'].loc[site_id].storm_regime\n",
+    "#     site_observed_regime = impacts['observed'].loc[site_id].storm_regime\n",
+    "#     df_site = df.loc[site_id]\n",
+    "#     plt.plot(df_site.R_high_D_low_diff, df_site.n_hrs_largest,label='site_id={} (pred={},obs={})'.format(site_id,site_predicted_regime, site_observed_regime))\n",
+    "\n",
+    "#     plt.legend(loc='upper right', prop={'size': 8})\n",
+    "#     plt.show()\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "heading_collapsed": true
+   },
+   "source": [
+    "# Other stuff which hasn't been tidied up"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "heading_collapsed": true,
+    "hidden": true
+   },
+   "source": [
+    "### Check the relationship between SWL-Dtoe, DSWL-Dtoe, R_high-Dtoe\n",
+    "Use 3D scatter plot to check the relationship between SWL-Dtoe, DSWL-Dtoe, R_high-Dtoe.\n",
+    "\n",
+    "This is moving away from time dependence..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "hidden": true
+   },
+   "outputs": [],
+   "source": [
+    "[x[1] for x in df.query('forecasted_regime==\"swash\" & observed_regime==\"swash\"').iterrows()][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "hidden": true
+   },
+   "outputs": [],
+   "source": [
+    "data = []\n",
+    "\n",
+    "# Iterate through each site\n",
+    "print('Calculating cumulate frequency of R_high for each site:')\n",
+    "site_ids = twls['forecasted']['mean_slope_sto06'].index.get_level_values(\n",
+    "    'site_id').unique().values\n",
+    "for site_id in tqdm_notebook(site_ids):\n",
+    "\n",
+    "    # Put data into a temporary dataframe, shorter name is easier to work with\n",
+    "    df_impacts = impacts['forecasted']['mean_slope_sto06'].loc[site_id]\n",
+    "    df_twls = twls['forecasted']['mean_slope_sto06'].loc[site_id]\n",
+    "\n",
+    "    D_low = df_impacts.dune_toe_z\n",
+    "    if np.isnan(D_low):\n",
+    "        continue\n",
+    "\n",
+    "    # Find time where R_max is the highest\n",
+    "    t = df_twls.R_high.idxmax()\n",
+    "\n",
+    "    # Get R_high, tide and setup at that time\n",
+    "    R_high = df_twls.loc[t].R_high\n",
+    "    tide = df_twls.loc[t].tide\n",
+    "    setup = df_twls.loc[t].setup\n",
+    "\n",
+    "    # Calculate differences in elevation\n",
+    "    R_high_D_low = R_high - D_low\n",
+    "    SWL_D_low = tide - D_low\n",
+    "    DSWL_D_low = tide + setup - D_low\n",
+    "\n",
+    "    # Check which subplot we should put this site on\n",
+    "    forecasted_regime = impacts['forecasted']['mean_slope_sto06'].loc[\n",
+    "        site_id].storm_regime\n",
+    "    observed_regime = impacts['observed'].loc[site_id].storm_regime\n",
+    "\n",
+    "    data.append({\n",
+    "        'R_high_D_low': R_high_D_low,\n",
+    "        'SWL_D_low': SWL_D_low,\n",
+    "        'DSWL_D_low': DSWL_D_low,\n",
+    "        'forecasted_regime': forecasted_regime,\n",
+    "        'observed_regime': observed_regime\n",
+    "    })\n",
+    "\n",
+    "# Turn data into a dataframe and plot\n",
+    "df = pd.DataFrame(data)\n",
+    "\n",
+    "# Plot swash/swash\n",
+    "query='forecasted_regime==\"swash\" & observed_regime==\"swash\"'\n",
+    "trace1 = go.Scatter3d(\n",
+    "    x=[x[1].R_high_D_low for x in df.query(query).iterrows()],\n",
+    "    y=[x[1].SWL_D_low for x in df.query(query).iterrows()],\n",
+    "    z=[x[1].DSWL_D_low for x in df.query(query).iterrows()],\n",
+    "    name='Swash/Swash',\n",
+    "    mode='markers',\n",
+    "    marker=dict(\n",
+    "        size=6,\n",
+    "        color='rgb(26,150,65)',\n",
+    "        opacity=0.8))\n",
+    "\n",
+    "query='forecasted_regime==\"swash\" & observed_regime==\"collision\"'\n",
+    "trace2 = go.Scatter3d(\n",
+    "    x=[x[1].R_high_D_low for x in df.query(query).iterrows()],\n",
+    "    y=[x[1].SWL_D_low for x in df.query(query).iterrows()],\n",
+    "    z=[x[1].DSWL_D_low for x in df.query(query).iterrows()],\n",
+    "    name='Swash/Collision',\n",
+    "    mode='markers',\n",
+    "    marker=dict(\n",
+    "        size=6,\n",
+    "        color='rgb(253,174,97)',\n",
+    "        opacity=0.8))\n",
+    "\n",
+    "query='forecasted_regime==\"collision\" & observed_regime==\"swash\"'\n",
+    "trace3 = go.Scatter3d(\n",
+    "    x=[x[1].R_high_D_low for x in df.query(query).iterrows()],\n",
+    "    y=[x[1].SWL_D_low for x in df.query(query).iterrows()],\n",
+    "    z=[x[1].DSWL_D_low for x in df.query(query).iterrows()],\n",
+    "    name='Collision/Swash',\n",
+    "    mode='markers',\n",
+    "    marker=dict(\n",
+    "        size=6,\n",
+    "        color='rgb(166,217,106)',\n",
+    "        opacity=0.8))\n",
+    "\n",
+    "query='forecasted_regime==\"collision\" & observed_regime==\"collision\"'\n",
+    "trace4 = go.Scatter3d(\n",
+    "    x=[x[1].R_high_D_low for x in df.query(query).iterrows()],\n",
+    "    y=[x[1].SWL_D_low for x in df.query(query).iterrows()],\n",
+    "    z=[x[1].DSWL_D_low for x in df.query(query).iterrows()],\n",
+    "    name='Collsion/Collision',\n",
+    "    mode='markers',\n",
+    "    marker=dict(\n",
+    "        size=6,\n",
+    "        color='rgb(215,25,28)',\n",
+    "        opacity=0.8))\n",
+    "\n",
+    "layout = go.Layout(\n",
+    "    autosize=False,\n",
+    "    width=1000,\n",
+    "    height=700,\n",
+    "    margin=go.layout.Margin(\n",
+    "        l=50,\n",
+    "        r=50,\n",
+    "        b=100,\n",
+    "        t=100,\n",
+    "        pad=4\n",
+    "    ),\n",
+    ")\n",
+    "\n",
+    "fig = go.Figure(data=[trace1,trace2,trace3,trace4], layout=layout)\n",
+    "go.FigureWidget(fig)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "hidden": true
+   },
+   "source": [
+    "## Calculate vertical distribution of wave count SS\n",
+    "For each site, calculate how many waves reached a certain elevation (store as a binned histogram)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "hidden": true
+   },
+   "outputs": [],
+   "source": [
+    "# Helper functions\n",
+    "def find_nearest(array, value):\n",
+    "    array = np.asarray(array)\n",
+    "    idx = np.nanargmin((np.abs(array - value)))\n",
+    "    return array[idx], idx"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "hidden": true
+   },
+   "outputs": [],
   "source": [
    "data = []\n",
    "for site_id, df_site_twl in twls['forecasted']['mean_slope_sto06'].groupby('site_id'):\n",
@ -194,25 +1014,9 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "counts, bin_edges = np.histogram (data_twl[0]['twl_levels'], bins=50) "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "list(np.asarray(twl_eles_per_wave)[~np.isfinite(twl_eles_per_wave)])"
-   ]
+   "metadata": {
+    "hidden": true
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
   "outputs": [],
   "source": [
    "fig = tools.make_subplots(\n",
@ -288,7 +1092,9 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "hidden": true
+   },
   "outputs": [],
   "source": [
    "fig['layout']['yaxis']"
--- a/notebooks/06_change_in_slope.ipynb
+++ b/notebooks/06_change_in_slope.ipynb
@ -57,10 +57,37 @@
    "\n",
    "from ipywidgets import widgets, Output\n",
    "from IPython.display import display, clear_output, Image, HTML\n",
-    "\n",
+    "import matplotlib.pyplot as plt\n",
    "from sklearn.metrics import confusion_matrix"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Matplot lib default settings\n",
+    "plt.rcParams[\"figure.figsize\"] = (10,6)\n",
+    "plt.rcParams['axes.grid']=True\n",
+    "plt.rcParams['grid.alpha'] = 0.5\n",
+    "plt.rcParams['grid.color'] = \"grey\"\n",
+    "plt.rcParams['grid.linestyle'] = \"--\"\n",
+    "plt.rcParams['axes.grid']=True\n",
+    "\n",
+    "# https://stackoverflow.com/a/20709149\n",
+    "matplotlib.rcParams['text.usetex'] = True\n",
+    "\n",
+    "matplotlib.rcParams['text.latex.preamble'] = [\n",
+    "       r'\\usepackage{siunitx}',   # i need upright \\micro symbols, but you need...\n",
+    "       r'\\sisetup{detect-all}',   # ...this to force siunitx to actually use your fonts\n",
+    "       r'\\usepackage{helvet}',    # set the normal font here\n",
+    "       r'\\usepackage{amsmath}',\n",
+    "       r'\\usepackage{sansmath}',  # load up the sansmath so that math -> helvet\n",
+    "       r'\\sansmath',              # <- tricky! -- gotta actually tell tex to use!\n",
+    "]  "
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@ -140,6 +167,12 @@
    "# Get x and z at poststorm dune toe for each site\n",
    "df_dune_toe_poststorm = df_profile_features_crest_toes.xs('poststorm', level='profile_type')[['dune_toe_x','dune_toe_z']]\n",
    "\n",
+    "# If there is no poststorm dune toe defined, use the dune crest\n",
+    "df_dune_crest_poststorm = df_profile_features_crest_toes.xs('poststorm', level='profile_type')[['dune_crest_x','dune_crest_z']]\n",
+    "df_dune_toe_poststorm.dune_toe_x = df_dune_toe_poststorm.dune_toe_x.fillna(df_dune_crest_poststorm.dune_crest_x)\n",
+    "df_dune_toe_poststorm.dune_toe_z = df_dune_toe_poststorm.dune_toe_z.fillna(df_dune_crest_poststorm.dune_crest_z)\n",
+    "\n",
+    "\n",
    "# Join df for mhw and dune toe\n",
    "df = df_mhw_poststorm.join(df_dune_toe_poststorm)\n",
    "df['beta'] = -(df['dune_toe_z'] - df['z_mhw']) / (df['dune_toe_x'] -df['x_mhw'])\n",
@ -176,26 +209,89 @@
    "df"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We also should add the change in beach width between prestorm and post storm profiles"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "df_data.index"
+    "ele = 0.7\n",
+    "data = []\n",
+    "for site_id, df_site in df_profiles.groupby('site_id'):\n",
+    "    \n",
+    "    # Beach width should be measured from dune toe (or crest if doesn't exist) to MHW\n",
+    "    \n",
+    "    dune_toe_x = np.nanmax([\n",
+    "        df_profile_features_crest_toes.loc[(site_id,'prestorm')].dune_crest_x,\n",
+    "        df_profile_features_crest_toes.loc[(site_id,'prestorm')].dune_toe_x\n",
+    "    ])\n",
+    "    \n",
+    "    \n",
+    "    # TODO This probably should take the closest value to ele starting from the seaward end of the profile\n",
+    "    temp = df_site.xs('prestorm',level='profile_type').dropna(subset=['z'])\n",
+    "    prestorm_width = temp.iloc[(temp.z - ele).abs().argsort()[0]].name[1] - dune_toe_x\n",
+    "    \n",
+    "    temp = df_site.xs('poststorm',level='profile_type').dropna(subset=['z'])\n",
+    "    poststorm_width = temp.iloc[(temp.z - ele).abs().argsort()[0]].name[1] - dune_toe_x\n",
+    "    \n",
+    "    width_change = prestorm_width - poststorm_width\n",
+    "    data.append(\n",
+    "    {\n",
+    "        'site_id': site_id,\n",
+    "        'width_change': width_change,\n",
+    "        'prestorm_width': prestorm_width,\n",
+    "        'poststorm_width': poststorm_width\n",
+    "    })\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "df_width_change = pd.DataFrame(data)\n",
+    "df_width_change = df_width_change.set_index(['site_id'])\n",
+    "\n",
+    "# Join with the data\n",
+    "df = df.merge(df_width_change, left_on=['site_id'], right_on=['site_id'])\n"
   ]
  },
  {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "heading_collapsed": true
+   },
   "source": [
-    "Plot our data"
+    "## Plot our data in a confusion matrix\n",
+    "Superseded"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "hidden": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "hidden": true
+   },
   "outputs": [],
   "source": [
    "fig = tools.make_subplots(\n",
@ -279,7 +375,9 @@
  },
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "hidden": true
+   },
   "source": [
    "Looking at the above plot:\n",
    "- In general, we can see that the prestorm mean slope is flatter than the poststorm mean slope. This can be explained by the presence of prestorm berms, which increase the prestorm mean slope. During the storm, these berms get eroded and decrease the slope.\n",
@ -287,6 +385,166 @@
    "- **Swash/Collision**: Where we predict collision but observe swash, we can see that the prestorm mean slopes >0.1 generate high TWLs. \n",
    "\n"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Plot our data in a confusion matrix\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[cc_mask].loc[df[cc_mask].poststorm_beta+0.05< df[cc_mask].prestorm_beta]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f, ([ax1, ax2], [ax3, ax4],) = plt.subplots(\n",
+    "    2,\n",
+    "    2,\n",
+    "    sharey=True,\n",
+    "    sharex=True,\n",
+    "    figsize=(8, 7))\n",
+    "\n",
+    "\n",
+    "ss_mask = (df.observed_regime=='swash') & (df.forecasted_regime=='swash')\n",
+    "sc_mask = (df.observed_regime=='swash') & (df.forecasted_regime=='collision')\n",
+    "cs_mask = (df.observed_regime=='collision') & (df.forecasted_regime=='swash')\n",
+    "cc_mask = (df.observed_regime=='collision') & (df.forecasted_regime=='collision')\n",
+    "\n",
+    "# Define colormap for our observations\n",
+    "cm = plt.cm.get_cmap('plasma')\n",
+    "\n",
+    "params = {'edgecolors': '#999999',\n",
+    "          's': 12,\n",
+    "          'linewidth': 0.1, \n",
+    "          'cmap':cm,\n",
+    "          'vmin':0, \n",
+    "          'vmax':60\n",
+    "         }\n",
+    "\n",
+    "sc=ax1.scatter(df[ss_mask].prestorm_beta, df[ss_mask].poststorm_beta, c=df[ss_mask].width_change,**params)\n",
+    "ax1.set_title('Swash/Swash')\n",
+    "ax1.set_ylabel('Observed swash')\n",
+    "\n",
+    "ax2.scatter(df[sc_mask].prestorm_beta, df[sc_mask].poststorm_beta, c=df[sc_mask].width_change,**params)\n",
+    "ax2.set_title('Swash/Collision')\n",
+    "\n",
+    "ax3.scatter(df[cs_mask].prestorm_beta, df[cs_mask].poststorm_beta, c=df[cs_mask].width_change,**params)\n",
+    "ax3.set_title('Collision/Swash')\n",
+    "ax3.set_ylabel('Observed collision')\n",
+    "ax3.set_xlabel('Predicted swash')\n",
+    "\n",
+    "ax4.scatter(df[cc_mask].prestorm_beta, df[cc_mask].poststorm_beta, c=df[cc_mask].width_change,**params)\n",
+    "ax4.set_title('Collision/Collision')\n",
+    "ax4.set_xlabel('Predicted collision')\n",
+    "\n",
+    "for ax in [ax1,ax2,ax3,ax4]:\n",
+    "    ax.plot([0,0.2],[0,0.2], 'k--')\n",
+    "    ax.set_xlim([0,0.2])\n",
+    "    ax.set_ylim([0,0.2])\n",
+    "\n",
+    "    \n",
+    "# Create a big ax so we can use common axis labels\n",
+    "# https://stackoverflow.com/a/36542971\n",
+    "f.add_subplot(111, frameon=False)\n",
+    "plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)\n",
+    "plt.grid(False)\n",
+    "plt.xlabel(\"Prestorm mean slope (-)\", labelpad=25)\n",
+    "plt.ylabel(\"Poststorm mean slope (-)\", labelpad=25)\n",
+    "    \n",
+    "# Layout adjustment\n",
+    "plt.tight_layout()\n",
+    "plt.subplots_adjust(hspace=0.25, bottom=0.1,right=0.9)\n",
+    "\n",
+    "# Add colorbar\n",
+    "cbar_ax = f.add_axes([0.95, 0.15, 0.05, 0.7])\n",
+    "cb = f.colorbar(sc, cax=cbar_ax)\n",
+    "cb.set_label(r'$\\varDelta$ beach width at MHW (m)')\n",
+    "\n",
+    "# Save and show figure\n",
+    "plt.savefig('06-confusion-change-in-slope.png'.format(beach), dpi=600, bbox_inches='tight') \n",
+    "plt.show()\n",
+    "plt.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Plot for single beach"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "beach = 'NARRA'\n",
+    "\n",
+    "df_beach = df.loc[df.index.str.contains(beach)]\n",
+    "\n",
+    "# Get index\n",
+    "n = [x for x in range(len(df_beach))][::-1]\n",
+    "n_sites = [x for x in df_beach.index][::-1]\n",
+    "\n",
+    "f, (ax1,ax2,ax3,ax4) = plt.subplots(1,4, sharey=True,figsize=(10, 8))\n",
+    "\n",
+    "ax1.plot(df_beach.prestorm_beta,n,label='Prestorm slope',color='#4d9221')\n",
+    "ax1.plot(df_beach.poststorm_beta,n,label='Poststorm slope',color='#c51b7d')\n",
+    "ax1.set_title('Mean beach slope')\n",
+    "ax1.legend(loc='center', bbox_to_anchor=(0.5, -0.15))\n",
+    "\n",
+    "# Replace yticks with site_ids\n",
+    "yticks = ax1.get_yticks().tolist()\n",
+    "yticks = [n_sites[int(y)] if 0 <= y <= len(n_sites) else y for y in yticks ]\n",
+    "ax1.set_yticklabels(yticks)\n",
+    "ax1.set_xlabel(r'Slope (-)')\n",
+    "\n",
+    "ax2.plot(df_beach.prestorm_width,n,label='Prestorm width',color='#4d9221')\n",
+    "ax2.plot(df_beach.poststorm_width,n, label='Poststorm width',color='#c51b7d')\n",
+    "# ax2.set_xlim([200,300])\n",
+    "ax2.set_xlabel(r'Beach width (m)')\n",
+    "ax2.set_title('Beach width\\nat MHW')\n",
+    "ax2.legend(loc='center', bbox_to_anchor=(0.5, -0.15))\n",
+    "\n",
+    "ax3.plot(df_beach.width_change,n,color='#999999')\n",
+    "ax3.set_xlim([0,75])\n",
+    "ax3.set_title('Change in MHW\\nbeach width')\n",
+    "ax3.set_xlabel(r'$\\varDelta$ Beach width (m)')\n",
+    "\n",
+    "\n",
+    "ax4.plot(df_beach.poststorm_beta / df_beach.prestorm_beta,n,color='#999999')\n",
+    "ax4.set_title('Ratio between pre and\\npost storm mean slopes')\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "f.subplots_adjust(top=0.88)\n",
+    "f.suptitle(beach)\n",
+    "\n",
+    "# Print to figure\n",
+    "plt.savefig('06-change-in-slope-{}.png'.format(beach), dpi=600, bbox_inches='tight') \n",
+    "plt.show()\n",
+    "plt.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_beach"
+   ]
  }
 ],
 "metadata": {
--- a/notebooks/07_longshore_plotting.ipynb
+++ b/notebooks/07_longshore_plotting.ipynb
@ -0,0 +1,348 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Longshore plots of each beach\n",
+    "- Need to create a longshore plot of each beach to see how the variables change alongshore."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup notebook"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Enable autoreloading of our modules. \n",
+    "# Most of the code will be located in the /src/ folder, \n",
+    "# and then called from the notebook.\n",
+    "%matplotlib inline\n",
+    "%reload_ext autoreload\n",
+    "%autoreload"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.core.debugger import set_trace\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import decimal\n",
+    "import plotly\n",
+    "import plotly.graph_objs as go\n",
+    "import plotly.plotly as py\n",
+    "import plotly.tools as tls\n",
+    "import plotly.figure_factory as ff\n",
+    "from plotly import tools\n",
+    "import plotly.io as pio\n",
+    "from scipy import stats\n",
+    "import math\n",
+    "import matplotlib\n",
+    "from matplotlib import cm\n",
+    "import colorlover as cl\n",
+    "from tqdm import tqdm_notebook\n",
+    "from ipywidgets import widgets, Output\n",
+    "from IPython.display import display, clear_output, Image, HTML\n",
+    "from scipy import stats\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "import matplotlib.pyplot as plt\n",
+    "from scipy.interpolate import interp1d\n",
+    "from pandas.api.types import CategoricalDtype"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Matplot lib default settings\n",
+    "plt.rcParams[\"figure.figsize\"] = (10,6)\n",
+    "plt.rcParams['axes.grid']=True\n",
+    "plt.rcParams['grid.alpha'] = 0.5\n",
+    "plt.rcParams['grid.color'] = \"grey\"\n",
+    "plt.rcParams['grid.linestyle'] = \"--\"\n",
+    "plt.rcParams['axes.grid']=True\n",
+    "\n",
+    "# https://stackoverflow.com/a/20709149\n",
+    "matplotlib.rcParams['text.usetex'] = True\n",
+    "\n",
+    "matplotlib.rcParams['text.latex.preamble'] = [\n",
+    "       r'\\usepackage{siunitx}',   # i need upright \\micro symbols, but you need...\n",
+    "       r'\\sisetup{detect-all}',   # ...this to force siunitx to actually use your fonts\n",
+    "       r'\\usepackage{helvet}',    # set the normal font here\n",
+    "       r'\\usepackage{amsmath}',\n",
+    "       r'\\usepackage{sansmath}',  # load up the sansmath so that math -> helvet\n",
+    "       r'\\sansmath',              # <- tricky! -- gotta actually tell tex to use!\n",
+    "]  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
+    "    print('Importing {}'.format(csv))\n",
+    "    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
+    "\n",
+    "df_waves = df_from_csv('waves.csv', index_col=[0, 1])\n",
+    "df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n",
+    "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
+    "df_sites = df_from_csv('sites.csv', index_col=[0])\n",
+    "df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0,1])\n",
+    "\n",
+    "# Note that the forecasted data sets should be in the same order for impacts and twls\n",
+    "impacts = {\n",
+    "    'forecasted': {\n",
+    "        'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),\n",
+    "        'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),\n",
+    "        },\n",
+    "    'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "twls = {\n",
+    "    'forecasted': {\n",
+    "        'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),\n",
+    "        'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),\n",
+    "    }\n",
+    "}\n",
+    "print('Done!')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Generate plot for each beach"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "beach = 'NARRA'\n",
+    "\n",
+    "# Get the dataframe\n",
+    "df = impacts['forecasted']['mean_slope_sto06']\n",
+    "df = df.rename(columns={'storm_regime': 'forecasted_regime'})\n",
+    "\n",
+    "df_beach = df.loc[df.index.str.contains(beach)]\n",
+    "\n",
+    "# Add information about hydrodynamics at max(R_high) time\n",
+    "df_beach = df_beach.merge(\n",
+    "    twls['forecasted']['mean_slope_sto06'].drop(columns=['R_high', 'R_low']),\n",
+    "    left_on=['site_id', 'datetime'],\n",
+    "    right_on=['site_id', 'datetime'])\n",
+    "\n",
+    "# Add information about observed impacts\n",
+    "obs_impacts = impacts['observed'].rename(columns={\n",
+    "    'storm_regime': 'observed_regime'\n",
+    "}).observed_regime.to_frame()\n",
+    "df_beach = df_beach.merge(obs_impacts, left_on='site_id', right_on='site_id')\n",
+    "\n",
+    "# Convert storm regimes to categorical datatype\n",
+    "cat_type = CategoricalDtype(\n",
+    "    categories=['swash', 'collision', 'overwash', 'inundation'], ordered=True)\n",
+    "df_beach.forecasted_regime = df_beach.forecasted_regime.astype(cat_type)\n",
+    "df_beach.observed_regime = df_beach.observed_regime.astype(cat_type)\n",
+    "\n",
+    "# Get index\n",
+    "n = [x for x in range(len(df_beach))][::-1]\n",
+    "n_sites = [x for x in df_beach.index][::-1]\n",
+    "\n",
+    "f, (ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8) = plt.subplots(\n",
+    "    1,\n",
+    "    8,\n",
+    "    sharey=True,\n",
+    "    figsize=(14, 8),\n",
+    "    gridspec_kw={'width_ratios': [4, 4, 2, 2, 2, 2,2,2]})\n",
+    "\n",
+    "# Specify colors for storm regimes\n",
+    "cmap = {\n",
+    "    'swash': '#1a9850',\n",
+    "    'collision': '#fee08b',\n",
+    "    'overwash': '#d73027'\n",
+    "}\n",
+    "\n",
+    "colors = [cmap.get(x) for x in df_beach.observed_regime]\n",
+    "colors = ['#d73027' if c is None else c for c in colors]\n",
+    "\n",
+    "# Plot forecasted and observed storm regime\n",
+    "ax1.scatter(\n",
+    "    df_beach.observed_regime.cat.codes.replace(-1,np.NaN),\n",
+    "    n,\n",
+    "    color=colors,\n",
+    "    marker='o',\n",
+    "    label='Observed regime')\n",
+    "\n",
+    "ax1.scatter(\n",
+    "    df_beach.forecasted_regime.cat.codes.replace(-1,np.NaN),\n",
+    "    n,\n",
+    "    color='b',\n",
+    "    marker='o',\n",
+    "    edgecolors='black',\n",
+    "    facecolors='none',\n",
+    "    label='Forecasted regime')\n",
+    "\n",
+    "ax1.set_title('Storm\\nregime')\n",
+    "ax1.set_xticks([0,1,2,3])\n",
+    "ax1.set_xticklabels(['swash','collision','overwash','inundation'])\n",
+    "ax1.tick_params(axis='x', rotation=45)\n",
+    "ax1.legend(loc='center', bbox_to_anchor=(0.5, -0.15))\n",
+    "\n",
+    "# Replace yticks with site_ids\n",
+    "yticks = ax1.get_yticks().tolist()\n",
+    "yticks = [n_sites[int(y)] if 0 <= y <= len(n_sites) else y for y in yticks ]\n",
+    "ax1.set_yticklabels(yticks)\n",
+    "\n",
+    "# Water levels\n",
+    "ax2.plot(df_beach.R_high, n, color='#2c7bb6')\n",
+    "ax2.plot(df_beach.R_low, n, color='#2c7bb6')\n",
+    "ax2.fill_betweenx(\n",
+    "    n, df_beach.R_low, df_beach.R_high, alpha=0.2, color='#2c7bb6', label='$R_{low}$ to $R_{high}$')\n",
+    "\n",
+    "# Dune elevations\n",
+    "ax2.plot(df_beach.dune_crest_z, n, color='#fdae61')\n",
+    "ax2.plot(df_beach.dune_toe_z, n, color='#fdae61')\n",
+    "ax2.fill_betweenx(\n",
+    "    n, df_beach.dune_toe_z, df_beach.dune_crest_z, alpha=0.2, color='#fdae61', label='$D_{low}$ to $D_{high}$')\n",
+    "\n",
+    "ax2.set_title('TWL \\& Dune\\nElevations')\n",
+    "ax2.legend(loc='center',bbox_to_anchor=(0.5,-0.15))\n",
+    "ax2.set_xlabel('Elevation (m AHD)')\n",
+    "\n",
+    "# Plot R_high - D_low\n",
+    "ax3.plot(df_beach.R_high - df_beach.dune_toe_z,n,color='#999999')\n",
+    "ax3.axvline(x=0,color='black',linestyle=':')\n",
+    "ax3.set_title('$R_{high}$ - $D_{low}$')\n",
+    "ax3.set_xlabel('Height (m)')\n",
+    "ax3.set_xlim([-2,2])\n",
+    "\n",
+    "# Wave height, wave period, beach slope\n",
+    "ax4.plot(df_beach.Hs0, n,color='#377eb8')\n",
+    "ax4.set_title('$H_{s0}$')\n",
+    "ax4.set_xlabel('Sig. wave height (m)')\n",
+    "ax4.set_xlim([3,5])\n",
+    "\n",
+    "ax5.plot(df_beach.Tp, n,color='#e41a1c')\n",
+    "ax5.set_title('$T_{p}$')\n",
+    "ax5.set_xlabel('Peak wave period (s)')\n",
+    "ax5.set_xlim([8,14])\n",
+    "\n",
+    "ax6.plot(df_beach.tide, n,color='#a6cee3')\n",
+    "ax6.set_title('Tide')\n",
+    "ax6.set_xlabel('Elevation (m AHD)')\n",
+    "ax6.set_xlim([0,2])\n",
+    "\n",
+    "ax7.plot(df_beach.beta, n,color='#4daf4a')\n",
+    "ax7.set_title(r'$\\beta$')\n",
+    "ax7.set_xlabel('Mean prestorm\\nbeach slope')\n",
+    "ax7.set_xlim([0,0.15])\n",
+    "\n",
+    "ax8.plot(df_beach.R2, n,color='#6a3d9a')\n",
+    "ax8.set_title(r'$R_{2\\%}$')\n",
+    "ax8.set_xlabel('Height (m)')\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "f.subplots_adjust(top=0.88)\n",
+    "f.suptitle(beach)\n",
+    "\n",
+    "\n",
+    "# Print to figure\n",
+    "plt.savefig('07-{}.png'.format(beach), dpi=600, bbox_inches='tight') \n",
+    "\n",
+    "plt.show()\n",
+    "plt.close()"
+   ]
+  }
+ ],
+ "metadata": {
+  "hide_input": false,
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/08_narr_topo_bathy_slope_test.ipynb
+++ b/notebooks/08_narr_topo_bathy_slope_test.ipynb
@ -0,0 +1,767 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Narrabeen Slope Test\n",
+    "With full topo and bathy combined"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup notebook"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Enable autoreloading of our modules. \n",
+    "# Most of the code will be located in the /src/ folder, \n",
+    "# and then called from the notebook.\n",
+    "%matplotlib inline\n",
+    "%reload_ext autoreload\n",
+    "%autoreload"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.core.debugger import set_trace\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import decimal\n",
+    "import plotly\n",
+    "import plotly.graph_objs as go\n",
+    "import plotly.plotly as py\n",
+    "import plotly.tools as tls\n",
+    "import plotly.figure_factory as ff\n",
+    "from plotly import tools\n",
+    "import plotly.io as pio\n",
+    "from scipy import stats\n",
+    "import math\n",
+    "import matplotlib\n",
+    "from matplotlib import cm\n",
+    "import colorlover as cl\n",
+    "from tqdm import tqdm_notebook\n",
+    "from ipywidgets import widgets, Output\n",
+    "from IPython.display import display, clear_output, Image, HTML\n",
+    "from scipy import stats\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "import matplotlib.pyplot as plt\n",
+    "from scipy.interpolate import interp1d\n",
+    "from pandas.api.types import CategoricalDtype\n",
+    "from scipy.interpolate import UnivariateSpline\n",
+    "from shapely.geometry import Point, LineString"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Matplot lib default settings\n",
+    "plt.rcParams[\"figure.figsize\"] = (10,6)\n",
+    "plt.rcParams['axes.grid']=True\n",
+    "plt.rcParams['grid.alpha'] = 0.5\n",
+    "plt.rcParams['grid.color'] = \"grey\"\n",
+    "plt.rcParams['grid.linestyle'] = \"--\"\n",
+    "plt.rcParams['axes.grid']=True\n",
+    "\n",
+    "# https://stackoverflow.com/a/20709149\n",
+    "matplotlib.rcParams['text.usetex'] = True\n",
+    "\n",
+    "matplotlib.rcParams['text.latex.preamble'] = [\n",
+    "       r'\\usepackage{siunitx}',   # i need upright \\micro symbols, but you need...\n",
+    "       r'\\sisetup{detect-all}',   # ...this to force siunitx to actually use your fonts\n",
+    "       r'\\usepackage{helvet}',    # set the normal font here\n",
+    "       r'\\usepackage{amsmath}',\n",
+    "       r'\\usepackage{sansmath}',  # load up the sansmath so that math -> helvet\n",
+    "       r'\\sansmath',              # <- tricky! -- gotta actually tell tex to use!\n",
+    "]  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import .csv data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_filename = '08-narr-topo-bathy-slope-test-full-profiles.csv'\n",
+    "\n",
+    "df_profiles = pd.read_csv(data_filename).set_index(['site_id','x'])\n",
+    "df_profiles = df_profiles[~df_profiles.index.duplicated(keep='first')]\n",
+    "print('df_profiles:')\n",
+    "df_profiles.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Manually cut off the prestorm topo \n",
+    "cuts = {'NARRA0004': {'prestorm_topo_max_x': 330,\n",
+    "                      'poststorm_topo_max_x': 250},\n",
+    "        'NARRA0008': {'prestorm_topo_max_x': 290,\n",
+    "                      'poststorm_topo_max_x': 250},\n",
+    "        'NARRA0012': {'prestorm_topo_max_x': 300,\n",
+    "                      'poststorm_topo_max_x': 250},\n",
+    "        'NARRA0016': {'prestorm_topo_max_x': 300,\n",
+    "                      'poststorm_topo_max_x': 225},\n",
+    "        'NARRA0021': {'prestorm_topo_max_x': 280,\n",
+    "                      'poststorm_topo_max_x': 225},\n",
+    "        'NARRA0023': {'prestorm_topo_max_x': 275,\n",
+    "                      'poststorm_topo_max_x': 215},\n",
+    "        'NARRA0027': {'prestorm_topo_max_x': 260,\n",
+    "                      'poststorm_topo_max_x': 225},\n",
+    "        'NARRA0031': {'prestorm_topo_max_x': 260,\n",
+    "                      'poststorm_topo_max_x': 225},\n",
+    "       }\n",
+    "\n",
+    "for site_id in cuts:\n",
+    "    mask1 = df_profiles.index.get_level_values('site_id') == site_id\n",
+    "    mask2 = df_profiles.index.get_level_values('x') > cuts[site_id]['prestorm_topo_max_x']\n",
+    "    df_profiles.loc[(mask1)&(mask2), 'pre_topo'] = np.nan\n",
+    "    \n",
+    "    mask3 = df_profiles.index.get_level_values('x') > cuts[site_id]['poststorm_topo_max_x']\n",
+    "    df_profiles.loc[(mask1)&(mask3), 'post_topo'] = np.nan\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for site_id,df_site in df_profiles.groupby('site_id'):\n",
+    "#     f, (ax1) = plt.subplots(1,1, figsize=(6, 3))\n",
+    "#     ax1.set_title(site_id)\n",
+    "    \n",
+    "#     ax1.plot(df_site.index.get_level_values('x'),\n",
+    "#              df_site.pre_topo,\n",
+    "#              label='Pre Topo',\n",
+    "#              color='#2c7bb6')\n",
+    "#     ax1.plot(df_site.index.get_level_values('x'),\n",
+    "#              df_site.pre_bathy,\n",
+    "#              label='Pre Bathy',\n",
+    "#              color='#abd9e9')\n",
+    "\n",
+    "#     ax1.plot(df_site.index.get_level_values('x'),\n",
+    "#              df_site.post_topo,\n",
+    "#              label='Post Topo',\n",
+    "#              color='#d7191c')\n",
+    "#     ax1.plot(df_site.index.get_level_values('x'),\n",
+    "#              df_site.post_bathy,\n",
+    "#              label='Post Bathy',\n",
+    "#              color='#fdae61')\n",
+    "\n",
+    "#     ax1.legend()\n",
+    "#     plt.show()\n",
+    "#     plt.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_profiles = df_profiles.dropna(\n",
+    "    subset=['post_topo', 'post_bathy', 'pre_bathy', 'pre_topo'], how='all')\n",
+    "\n",
+    "df_profiles = df_profiles.reset_index()\n",
+    "df_profiles = df_profiles.melt(id_vars=['site_id','x','lat','lon'],\n",
+    "                               value_vars=['post_topo','post_bathy','pre_bathy','pre_topo']).rename(columns={'variable':'profile_type', 'value':'z'})\n",
+    "\n",
+    "df_profiles = df_profiles.dropna(subset=['z'])\n",
+    "\n",
+    "df_profiles.loc[df_profiles.profile_type=='post_topo','profile_type']='poststorm'\n",
+    "df_profiles.loc[df_profiles.profile_type=='post_bathy','profile_type']='poststorm'\n",
+    "df_profiles.loc[df_profiles.profile_type=='pre_topo','profile_type']='prestorm'\n",
+    "df_profiles.loc[df_profiles.profile_type=='pre_bathy','profile_type']='prestorm'\n",
+    "\n",
+    "df_profiles = df_profiles.set_index(['site_id', 'profile_type', 'x'])\n",
+    "df_profiles = df_profiles[~df_profiles.index.duplicated(keep='first')]\n",
+    "\n",
+    "df_profiles = df_profiles.sort_index()\n",
+    "\n",
+    "print('df_profiles:')\n",
+    "df_profiles.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Just plots each site's x and z values\n",
+    "for site_id,df_site in df_profiles.groupby('site_id'):\n",
+    "    f, (ax1) = plt.subplots(1,1, figsize=(6, 3))\n",
+    "    ax1.set_title(site_id)\n",
+    "    \n",
+    "    prestorm=df_site.index.get_level_values('profile_type') == 'prestorm'\n",
+    "    ax1.plot(df_site[prestorm].index.get_level_values('x'),\n",
+    "             df_site[prestorm].z,\n",
+    "             label='Pre Topo',\n",
+    "             color='#2c7bb6')\n",
+    "\n",
+    "    \n",
+    "    poststorm=df_site.index.get_level_values('profile_type') == 'poststorm'\n",
+    "    ax1.plot(df_site[poststorm].index.get_level_values('x'),\n",
+    "             df_site[poststorm].z,\n",
+    "             label='Post Topo',\n",
+    "             color='#d7191c')\n",
+    "\n",
+    "\n",
+    "    ax1.legend()\n",
+    "    plt.show()\n",
+    "    plt.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Get dune faces"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "code_folding": []
+   },
+   "outputs": [],
+   "source": [
+    "# Manually define dune x coordinates and work out slope\n",
+    "\n",
+    "dune_data = [\n",
+    "    {\n",
+    "        'site_id': 'NARRA0004',\n",
+    "        'dune_crest_x': 180,\n",
+    "        'dune_toe_x': 205\n",
+    "    },\n",
+    "    {\n",
+    "        'site_id': 'NARRA0008',\n",
+    "        'dune_crest_x': 180,\n",
+    "        'dune_toe_x': 205\n",
+    "    },\n",
+    "    {\n",
+    "        'site_id': 'NARRA0012',\n",
+    "        'dune_crest_x': 195,\n",
+    "        'dune_toe_x': 205\n",
+    "    },\n",
+    "    {\n",
+    "        'site_id': 'NARRA0016',\n",
+    "        'dune_crest_x': 190,\n",
+    "        'dune_toe_x': 200\n",
+    "    },\n",
+    "    {\n",
+    "        'site_id': 'NARRA0021',\n",
+    "        'dune_crest_x': 205,\n",
+    "        'dune_toe_x': 210\n",
+    "    },\n",
+    "    {\n",
+    "        'site_id': 'NARRA0023',\n",
+    "        'dune_crest_x': 205,\n",
+    "        'dune_toe_x': 215\n",
+    "    },\n",
+    "    {\n",
+    "        'site_id': 'NARRA0027',\n",
+    "        'dune_crest_x': 210,\n",
+    "        'dune_toe_x': 219\n",
+    "    },\n",
+    "    {\n",
+    "        'site_id': 'NARRA0031',\n",
+    "        'dune_crest_x': 210,\n",
+    "        'dune_toe_x': 218\n",
+    "    },\n",
+    "]\n",
+    "\n",
+    "for site_dune in dune_data:\n",
+    "    df_site = df_profiles.xs(site_dune['site_id'], level='site_id').xs('prestorm',level='profile_type')\n",
+    "    \n",
+    "    dune_crest_x = site_dune['dune_crest_x']\n",
+    "    dune_toe_x = site_dune['dune_toe_x']\n",
+    "    dune_crest_z = df_site.iloc[df_site.index.get_loc(site_dune['dune_crest_x'],method='nearest')].z\n",
+    "    dune_toe_z = df_site.iloc[df_site.index.get_loc(site_dune['dune_toe_x'],method='nearest')].z\n",
+    "\n",
+    "    dune_slope = (dune_crest_z - dune_toe_z)/(dune_crest_x - dune_toe_x)\n",
+    "    \n",
+    "    site_dune['dune_crest_z'] = dune_crest_z\n",
+    "    site_dune['dune_toe_z'] = dune_toe_z\n",
+    "    site_dune['dune_slope'] = dune_slope\n",
+    "    \n",
+    "    \n",
+    "# Join back into main data\n",
+    "df_dunes = pd.DataFrame(dune_data).set_index('site_id')\n",
+    "print('df_dunes:')\n",
+    "df_dunes.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # Just plots each site's x and z values\n",
+    "# for site_id,df_site in df_profiles.xs('prestorm',level='profile_type').groupby('site_id'):\n",
+    "#     f, (ax1) = plt.subplots(1,1, figsize=(6, 3))\n",
+    "#     ax1.set_title(site_id)\n",
+    "#     ax1.plot(df_site.index.get_level_values('x'),\n",
+    "#              df_site.z)\n",
+    "#     ax1.plot([df_dunes.loc[site_id].dune_crest_x, df_dunes.loc[site_id].dune_toe_x],\n",
+    "#              [df_dunes.loc[site_id].dune_crest_z, df_dunes.loc[site_id].dune_toe_z],\n",
+    "#             'r.-')\n",
+    "#     ax1.set_xlim([150,250])\n",
+    "#     ax1.set_ylim([0,15])\n",
+    "#     plt.show()\n",
+    "#     plt.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Get prestorm slope"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "z_ele = 0.7\n",
+    "debug=False\n",
+    "\n",
+    "def find_nearest_idx(array, value):\n",
+    "    array = np.asarray(array)\n",
+    "    idx = (np.abs(array - value)).argmin()\n",
+    "    return idx\n",
+    "\n",
+    "prestorm_slope_data =[]\n",
+    "for site_id, df_site in df_profiles.xs('prestorm',level='profile_type').groupby('site_id'):\n",
+    "    \n",
+    "    # Find index of our z_ele\n",
+    "    idx = np.where(df_site.z.values>=z_ele)[0][-1]\n",
+    "    \n",
+    "    prestorm_end_x = df_site.iloc[idx].name[1]\n",
+    "    prestorm_end_z = df_site.iloc[idx].z\n",
+    "    \n",
+    "    prestorm_start_x = df_dunes.loc[site_id].dune_toe_x\n",
+    "    prestorm_start_z = df_dunes.loc[site_id].dune_toe_z\n",
+    "    \n",
+    "    prestorm_slope = (prestorm_end_z-prestorm_start_z)/(prestorm_end_x-prestorm_start_x)\n",
+    "    \n",
+    "    prestorm_slope_data.append({\n",
+    "        'site_id': site_id,\n",
+    "        'prestorm_end_x': prestorm_end_x,\n",
+    "        'prestorm_end_z': prestorm_end_z,\n",
+    "        'prestorm_start_x': prestorm_start_x,\n",
+    "        'prestorm_start_z': prestorm_start_z,\n",
+    "        'prestorm_slope': prestorm_slope\n",
+    "    })\n",
+    "    \n",
+    "df_prestorm_slope = pd.DataFrame(prestorm_slope_data).set_index(['site_id'])\n",
+    "print('df_prestorm_slope:')\n",
+    "df_prestorm_slope.head()\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Get shelf slope\n",
+    "At 10 m contour"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "code_folding": []
+   },
+   "outputs": [],
+   "source": [
+    "# Elevation to take shelf slope at\n",
+    "z_ele = -9\n",
+    "debug=False\n",
+    "\n",
+    "def find_nearest_idx(array, value):\n",
+    "    array = np.asarray(array)\n",
+    "    idx = (np.abs(array - value)).argmin()\n",
+    "    return idx\n",
+    "\n",
+    "def slope_at_point(x, z, z_ele,debug=False):\n",
+    "    # Smooth profile a bit\n",
+    "    # TODO the smoothing factor will change based on the number of data points\n",
+    "    # Need to fix\n",
+    "    s = UnivariateSpline(x, z, s=50)\n",
+    "    xs = np.linspace(min(x),max(x),1000)\n",
+    "    zs = s(xs)\n",
+    "\n",
+    "    # Calculate derivates of spline\n",
+    "    dzdx = np.diff(zs)/np.diff(xs)\n",
+    "\n",
+    "    # Find index of z_ele\n",
+    "    idx = find_nearest_idx(zs, z_ele)\n",
+    "    slope = dzdx[idx]\n",
+    "    shelf_x = xs[idx]\n",
+    "\n",
+    "\n",
+    "    \n",
+    "    # For checking how much smoothing is going on\n",
+    "    if debug:\n",
+    "        f, (ax1) = plt.subplots(1,1, figsize=(6, 3))\n",
+    "        ax1.plot(x,z)\n",
+    "        ax1.plot(xs,zs)\n",
+    "        plt.show()\n",
+    "        plt.close()\n",
+    "    \n",
+    "    return slope, shelf_x, z_ele\n",
+    "    \n",
+    "shelf_data = []\n",
+    "for site_id, df_site in df_profiles.xs('prestorm',level='profile_type').groupby('site_id'):\n",
+    "    shelf_slope, shelf_x, shelf_z = slope_at_point(df_site.index.get_level_values('x').values,\n",
+    "                                 df_site.z, \n",
+    "                                 z_ele, debug=debug)\n",
+    "    shelf_data.append({\n",
+    "        'site_id': site_id,\n",
+    "        'shelf_slope': shelf_slope,\n",
+    "        'shelf_x': shelf_x,\n",
+    "        'shelf_z': shelf_z\n",
+    "    })\n",
+    "    \n",
+    "df_shelf = pd.DataFrame(shelf_data).set_index(['site_id'])\n",
+    "\n",
+    "df_shelf.loc['NARRA0004','shelf_slope'] = -0.02\n",
+    "\n",
+    "print('df_shelf:')\n",
+    "df_shelf.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Do geometry\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "df_site"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for site_id, df_site in df_profiles.groupby('site_id'):\n",
+    "\n",
+    "    # Project the dune face outwards\n",
+    "    dune_face_toe = Point(df_dunes.loc[site_id].dune_toe_x,\n",
+    "                          df_dunes.loc[site_id].dune_toe_z)\n",
+    "    dune_face_sea = Point(\n",
+    "        df_dunes.loc[site_id].dune_toe_x + 1000,\n",
+    "        #         df_dunes.loc[site_id].dune_toe_z +1000 * -1\n",
+    "        df_dunes.loc[site_id].dune_toe_z +\n",
+    "        1000 * df_dunes.loc[site_id].dune_slope)\n",
+    "    dune_line = LineString([dune_face_toe, dune_face_sea])\n",
+    "\n",
+    "    # Project the shelf slope landwards\n",
+    "    shelf_point = Point(df_shelf.loc[site_id].shelf_x,\n",
+    "                        df_shelf.loc[site_id].shelf_z)\n",
+    "    shelf_land = Point(\n",
+    "        df_shelf.loc[site_id].shelf_x - 1000, df_shelf.loc[site_id].shelf_z -\n",
+    "        1000 * df_shelf.loc[site_id].shelf_slope)\n",
+    "    shelf_sea = Point(\n",
+    "        df_shelf.loc[site_id].shelf_x + 1000, df_shelf.loc[site_id].shelf_z +\n",
+    "        1000 * df_shelf.loc[site_id].shelf_slope)\n",
+    "    shelf_line = LineString([shelf_land, shelf_point, shelf_sea])\n",
+    "\n",
+    "    # Find intersection between to lines\n",
+    "    dune_shelf_int = dune_line.intersection(shelf_line)\n",
+    "    dist_toe_to_int = dune_face_toe.distance(dune_shelf_int)\n",
+    "\n",
+    "    # Plots\n",
+    "    f, (ax1) = plt.subplots(1, 1, figsize=(12, 4))\n",
+    "\n",
+    "    # Raw profile prestorm\n",
+    "    ax1.plot(\n",
+    "        df_site.xs('prestorm',\n",
+    "                   level='profile_type').index.get_level_values('x'),\n",
+    "        df_site.xs('prestorm', level='profile_type').z,\n",
+    "        label='Prestorm profile')\n",
+    "\n",
+    "    # Raw profile poststorm\n",
+    "    ax1.plot(\n",
+    "        df_site.xs('poststorm',\n",
+    "                   level='profile_type').index.get_level_values('x'),\n",
+    "        df_site.xs('poststorm', level='profile_type').z,\n",
+    "        label='Poststorm profile')\n",
+    "\n",
+    "    # Dune face\n",
+    "    ax1.plot(\n",
+    "        [df_dunes.loc[site_id].dune_crest_x, df_dunes.loc[site_id].dune_toe_x],\n",
+    "        [df_dunes.loc[site_id].dune_crest_z, df_dunes.loc[site_id].dune_toe_z],\n",
+    "        linestyle=':',\n",
+    "        color='#999999',\n",
+    "        label='Dune face ({:.2f})'.format(-df_dunes.loc[site_id].dune_slope))\n",
+    "\n",
+    "    # Projected dune face\n",
+    "    ax1.plot(\n",
+    "        dune_line.xy[0],\n",
+    "        dune_line.xy[1],\n",
+    "        linestyle='--',\n",
+    "        color='#999999',\n",
+    "        label='Dune face (projected)')\n",
+    "\n",
+    "    # Projected shelf slope\n",
+    "    ax1.plot(\n",
+    "        shelf_line.xy[0],\n",
+    "        shelf_line.xy[1],\n",
+    "        linestyle='--',\n",
+    "        color='#999999',\n",
+    "        label='Shelf slope (projected)')\n",
+    "\n",
+    "    # Intersection\n",
+    "    ax1.scatter(\n",
+    "        dune_shelf_int.xy[0],\n",
+    "        dune_shelf_int.xy[1],\n",
+    "        marker='x',\n",
+    "        color='#999999',\n",
+    "        label='Dune/shelf projected intersection')\n",
+    "\n",
+    "    # Prestorm slope\n",
+    "    ax1.plot([\n",
+    "        df_prestorm_slope.loc[site_id].prestorm_start_x,\n",
+    "        df_prestorm_slope.loc[site_id].prestorm_end_x\n",
+    "    ], [\n",
+    "        df_prestorm_slope.loc[site_id].prestorm_start_z,\n",
+    "        df_prestorm_slope.loc[site_id].prestorm_end_z\n",
+    "    ],\n",
+    "             color='violet',\n",
+    "             label='Prestorm slope ({:.2f})'.format(\n",
+    "                 -df_prestorm_slope.loc[site_id].prestorm_slope))\n",
+    "\n",
+    "    #     # Find best slope based on distance form toe to intersection?\n",
+    "    #     best_slope_toe = shelf_line.interpolate(\n",
+    "    #         shelf_line.project(intersection) - 4 * dist_toe_to_int)\n",
+    "    #     best_slope = (dune_face_toe.xy[1][0] - best_slope_toe.xy[1][0]) / (\n",
+    "    #         dune_face_toe.xy[0][0] - best_slope_toe.xy[0][0])\n",
+    "\n",
+    "    #     # Best slope toe\n",
+    "    #     ax1.scatter(\n",
+    "    #         best_slope_toe.xy[0], best_slope_toe.xy[1], marker='o', color='g')\n",
+    "\n",
+    "    #     # Best slope\n",
+    "    #     ax1.plot([dune_face_toe.xy[0], best_slope_toe.xy[0]],\n",
+    "    #              [dune_face_toe.xy[1], best_slope_toe.xy[1]],\n",
+    "    #              color='g',\n",
+    "    #              label='Best slope ({:.3f})'.format(-best_slope))\n",
+    "\n",
+    "    # Find best slope based on intersection of prestorm slope and surf zone slope\n",
+    "    prestorm_slope_line = LineString([\n",
+    "        Point(\n",
+    "            df_prestorm_slope.loc[site_id].prestorm_start_x,\n",
+    "            df_prestorm_slope.loc[site_id].prestorm_start_z,\n",
+    "        ),\n",
+    "        Point(\n",
+    "            df_prestorm_slope.loc[site_id].prestorm_start_x + 10000,\n",
+    "            df_prestorm_slope.loc[site_id].prestorm_start_z +\n",
+    "            10000 * df_prestorm_slope.loc[site_id].prestorm_slope)\n",
+    "    ])\n",
+    "\n",
+    "    # Where prestorm slope projection intersects shelf line\n",
+    "    prestorm_slope_shelf_int = prestorm_slope_line.intersection(shelf_line)\n",
+    "\n",
+    "    # Distance between dune/shelf intersection and prestorm/shelf intersection\n",
+    "    dist_shelf_prestorm_ints = prestorm_slope_shelf_int.distance(\n",
+    "        dune_shelf_int)\n",
+    "\n",
+    "    best_slope_pt = shelf_line.interpolate(\n",
+    "        shelf_line.project(dune_shelf_int) + 0.3 * (shelf_line.project(prestorm_slope_shelf_int) -\n",
+    "               shelf_line.project(dune_shelf_int)))\n",
+    "    \n",
+    "    best_slope =(df_prestorm_slope.loc[site_id].prestorm_start_z-best_slope_pt.xy[1][0])/(df_prestorm_slope.loc[site_id].prestorm_start_x-best_slope_pt.xy[0][0])\n",
+    "    \n",
+    "    if not prestorm_slope_shelf_int.is_empty:\n",
+    "        ax1.plot(\n",
+    "            prestorm_slope_shelf_int.xy[0],\n",
+    "            prestorm_slope_shelf_int.xy[1],\n",
+    "            marker='x',\n",
+    "            color='#999999',\n",
+    "            label='Prestorm slope/shelf\\nprojected intersection')\n",
+    "        ax1.plot(\n",
+    "            prestorm_slope_line.xy[0],\n",
+    "            prestorm_slope_line.xy[1],\n",
+    "            color='#999999',\n",
+    "            linestyle='--',\n",
+    "            label='Prestorm slope projected line')\n",
+    "        ax1.plot(\n",
+    "            [df_prestorm_slope.loc[site_id].prestorm_start_x,\n",
+    "             best_slope_pt.xy[0][0]],\n",
+    "            [df_prestorm_slope.loc[site_id].prestorm_start_z,\n",
+    "             best_slope_pt.xy[1][0]],\n",
+    "            color='red',\n",
+    "            linestyle='--',\n",
+    "            label='Best slope ({:.3f})'.format(-best_slope))\n",
+    "        \n",
+    "    # TEMP Target slopes\n",
+    "    target_slopes = {\n",
+    "        'NARRA0004': 0.076,\n",
+    "        'NARRA0008': 0.093,\n",
+    "        'NARRA0012': 0.060,\n",
+    "        'NARRA0016': 0.11,\n",
+    "        'NARRA0021': 0.063,\n",
+    "        'NARRA0023': 0.061,\n",
+    "        'NARRA0027': 0.060,\n",
+    "        'NARRA0031': 0.057,\n",
+    "    }\n",
+    "\n",
+    "    target_direction = {\n",
+    "        'NARRA0004': \"flatter\",\n",
+    "        'NARRA0008': \"steeper\",\n",
+    "        'NARRA0012': \"flatter\",\n",
+    "        'NARRA0016': \"flatter\",\n",
+    "        'NARRA0021': \"steeper\",\n",
+    "        'NARRA0023': \"steeper\",\n",
+    "        'NARRA0027': \"steeper\",\n",
+    "        'NARRA0031': \"steeper\",\n",
+    "    }\n",
+    "    ax1.plot([dune_face_toe.xy[0][0], dune_face_toe.xy[0][0] + 1000], [\n",
+    "        dune_face_toe.xy[1][0],\n",
+    "        dune_face_toe.xy[1][0] - 1000 * target_slopes[site_id]\n",
+    "    ],\n",
+    "             color='red',\n",
+    "             label='Target slope\\n({} than {:.3f})'.format(\n",
+    "                 target_direction[site_id], target_slopes[site_id]))\n",
+    "\n",
+    "    ax1.set_xlim([100, 800])\n",
+    "    ax1.set_ylim([-15, 12])\n",
+    "#     ax1.set_xlim([100, 600])\n",
+    "#     ax1.set_ylim([-10, 12])\n",
+    "\n",
+    "    #     ax1.set_xlim([df_dunes.loc[site_id].dune_crest_x - 50,\n",
+    "    #                   intersection.xy[0][0] + 50])\n",
+    "    #     ax1.set_ylim([intersection.xy[1][0] -3,\n",
+    "    #                   df_dunes.loc[site_id].dune_crest_z + 3])\n",
+    "\n",
+    "    ax1.set_title(site_id)\n",
+    "    ax1.legend(loc='upper right', prop={'size': 10})\n",
+    "    f.savefig('08-{}.png'.format(site_id), dpi=600)\n",
+    "    plt.show()\n",
+    "    plt.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dune_shelf_int"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "hide_input": false,
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/09_run_comparison.ipynb
+++ b/notebooks/09_run_comparison.ipynb
@ -0,0 +1,313 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Run comparison\n",
+    "Create a comparison between different runs by looking at the different R_high values and storm regimes."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup notebook"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Enable autoreloading of our modules. \n",
+    "# Most of the code will be located in the /src/ folder, \n",
+    "# and then called from the notebook.\n",
+    "%matplotlib inline\n",
+    "%reload_ext autoreload\n",
+    "%autoreload"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.core.debugger import set_trace\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import decimal\n",
+    "import plotly\n",
+    "import plotly.graph_objs as go\n",
+    "import plotly.plotly as py\n",
+    "import plotly.tools as tls\n",
+    "import plotly.figure_factory as ff\n",
+    "from plotly import tools\n",
+    "import plotly.io as pio\n",
+    "from scipy import stats\n",
+    "import math\n",
+    "import matplotlib\n",
+    "from matplotlib import cm\n",
+    "import colorlover as cl\n",
+    "from tqdm import tqdm_notebook\n",
+    "from ipywidgets import widgets, Output\n",
+    "from IPython.display import display, clear_output, Image, HTML\n",
+    "from scipy import stats\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "import matplotlib.pyplot as plt\n",
+    "from scipy.interpolate import interp1d\n",
+    "from pandas.api.types import CategoricalDtype\n",
+    "from scipy.interpolate import UnivariateSpline\n",
+    "from shapely.geometry import Point, LineString"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Matplot lib default settings\n",
+    "plt.rcParams[\"figure.figsize\"] = (10,6)\n",
+    "plt.rcParams['axes.grid']=True\n",
+    "plt.rcParams['grid.alpha'] = 0.5\n",
+    "plt.rcParams['grid.color'] = \"grey\"\n",
+    "plt.rcParams['grid.linestyle'] = \"--\"\n",
+    "plt.rcParams['axes.grid']=True\n",
+    "\n",
+    "# https://stackoverflow.com/a/20709149\n",
+    "matplotlib.rcParams['text.usetex'] = True\n",
+    "\n",
+    "matplotlib.rcParams['text.latex.preamble'] = [\n",
+    "       r'\\usepackage{siunitx}',   # i need upright \\micro symbols, but you need...\n",
+    "       r'\\sisetup{detect-all}',   # ...this to force siunitx to actually use your fonts\n",
+    "       r'\\usepackage{helvet}',    # set the normal font here\n",
+    "       r'\\usepackage{amsmath}',\n",
+    "       r'\\usepackage{sansmath}',  # load up the sansmath so that math -> helvet\n",
+    "       r'\\sansmath',              # <- tricky! -- gotta actually tell tex to use!\n",
+    "]  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
+    "    print('Importing {}'.format(csv))\n",
+    "    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
+    "\n",
+    "df_waves = df_from_csv('waves.csv', index_col=[0, 1])\n",
+    "df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n",
+    "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
+    "df_sites = df_from_csv('sites.csv', index_col=[0])\n",
+    "df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0])\n",
+    "\n",
+    "# Note that the forecasted data sets should be in the same order for impacts and twls\n",
+    "impacts = {\n",
+    "    'forecasted': {\n",
+    "        'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),\n",
+    "        'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),\n",
+    "        'mean_slope_nie91': df_from_csv('impacts_forecasted_mean_slope_nie91.csv', index_col=[0]),\n",
+    "        'mean_slope_hol86': df_from_csv('impacts_forecasted_mean_slope_hol86.csv', index_col=[0]),\n",
+    "        },\n",
+    "    'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "twls = {\n",
+    "    'forecasted': {\n",
+    "        'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),\n",
+    "        'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),\n",
+    "        'mean_slope_nie91':df_from_csv('twl_mean_slope_nie91.csv', index_col=[0, 1]),\n",
+    "        'mean_slope_hol86':df_from_csv('twl_mean_slope_hol86.csv', index_col=[0, 1]),\n",
+    "    }\n",
+    "}\n",
+    "print('Done!')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Get prediction accuracy\n",
+    "Use [scikit-learn](https://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics) model evaluation metrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pprint\n",
+    "pp = pprint.PrettyPrinter(indent=2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sklearn.metrics\n",
+    "\n",
+    "# Encode the storm regimes values as categorical intgers so we can compare them\n",
+    "cat_type = CategoricalDtype(\n",
+    "    categories=[\"swash\", \"collision\", \"overwash\", \"inundation\"], ordered=True)\n",
+    "correct_regime = impacts['observed'].storm_regime.astype(\n",
+    "    cat_type).cat.codes.values\n",
+    "\n",
+    "# Define our forecast model names\n",
+    "models = [model for model in impacts['forecasted']]\n",
+    "\n",
+    "# Define the metric we want to calculate for each forecast model\n",
+    "metrics = [\n",
+    "    'accuracy_score', 'balanced_accuracy_score', 'confusion_matrix',\n",
+    "    'classification_report', 'f1_score', 'fbeta_score', 'precision_score', 'recall_score'\n",
+    "]\n",
+    "\n",
+    "# Store results in a nested dictionary by metric\n",
+    "performance = {metric: {} for metric in metrics}\n",
+    "\n",
+    "for model, metric in itertools.product(models, metrics):\n",
+    "\n",
+    "    # Get predicted storm regims\n",
+    "    df_pred = impacts['forecasted'][model]\n",
+    "    predicted_regime = df_pred.storm_regime.astype(cat_type).cat.codes.values\n",
+    "\n",
+    "    if metric == 'accuracy_score':\n",
+    "        m = sklearn.metrics.accuracy_score(correct_regime, predicted_regime)\n",
+    "\n",
+    "    if metric == 'balanced_accuracy_score':\n",
+    "        m = sklearn.metrics.balanced_accuracy_score(correct_regime,\n",
+    "                                                    predicted_regime)\n",
+    "\n",
+    "    if metric == 'confusion_matrix':\n",
+    "        m = sklearn.metrics.confusion_matrix(\n",
+    "            correct_regime, predicted_regime, labels=[0, 1, 2, 3])\n",
+    "        \n",
+    "    if metric == 'f1_score':\n",
+    "        m = sklearn.metrics.f1_score(correct_regime, predicted_regime, average='weighted')\n",
+    "        \n",
+    "    if metric == 'fbeta_score':\n",
+    "        m = sklearn.metrics.fbeta_score(correct_regime, predicted_regime, average='weighted', beta=1)\n",
+    "        \n",
+    "    if metric == 'precision_score':\n",
+    "        m = sklearn.metrics.precision_score(correct_regime, predicted_regime, average='weighted')\n",
+    "        \n",
+    "    if metric == 'recall_score':\n",
+    "        m = sklearn.metrics.recall_score(correct_regime, predicted_regime, average='weighted')\n",
+    "#         m=1\n",
+    "        \n",
+    "    if metric == 'classification_report':\n",
+    "#         m = sklearn.metrics.classification_report(\n",
+    "#             correct_regime,\n",
+    "#             predicted_regime,\n",
+    "#             labels=[0, 1, 2, 3],\n",
+    "#             target_names=['swash', 'collision', 'overwash', 'inundation'])\n",
+    "#         print(m)\n",
+    "        continue\n",
+    "\n",
+    "    # Store metric in results dictionary\n",
+    "    performance[metric][model] = m\n",
+    "\n",
+    "pp.pprint(performance)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predicted_regime"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Scatter plot matirx\n",
+    " - Use [Altair](https://altair-viz.github.io/getting_started/installation.html) for interactivity?\n",
+    " - Or maybe [Holoviews](https://towardsdatascience.com/pyviz-simplifying-the-data-visualisation-process-in-python-1b6d2cb728f1)?"
+   ]
+  }
+ ],
+ "metadata": {
+  "hide_input": false,
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/src/analysis/compare_impacts.py
+++ b/src/analysis/compare_impacts.py
@ -19,7 +19,10 @@ def compare_impacts(df_forecasted, df_observed):
    :return:
    """
    df_compared = df_forecasted.merge(
-        df_observed, left_index=True, right_index=True, suffixes=["_forecasted", "_observed"]
+        df_observed,
+        left_index=True,
+        right_index=True,
+        suffixes=["_forecasted", "_observed"],
    )
    return df_compared

@ -27,7 +30,14 @@ def compare_impacts(df_forecasted, df_observed):
 if __name__ == "__main__":
    logger.info("Importing existing data")
    data_folder = "./data/interim"
-    df_forecasted = pd.read_csv(os.path.join(data_folder, "impacts_forecasted_mean_slope_sto06.csv"), index_col=[0])
-    df_observed = pd.read_csv(os.path.join(data_folder, "impacts_observed.csv"), index_col=[0])
+    df_forecasted = pd.read_csv(
+        os.path.join(data_folder, "impacts_forecasted_mean_slope_sto06.csv"),
+        index_col=[0],
+    )
+    df_observed = pd.read_csv(
+        os.path.join(data_folder, "impacts_observed.csv"), index_col=[0]
+    )
    df_compared = compare_impacts(df_forecasted, df_observed)
-    df_compared.to_csv(os.path.join(data_folder, "impacts_observed_vs_forecasted_mean_slope_sto06.csv"))
+    df_compared.to_csv(
+        os.path.join(data_folder, "impacts_observed_vs_forecasted_mean_slope_sto06.csv")
+    )
--- a/src/analysis/forecast_twl.py
+++ b/src/analysis/forecast_twl.py
@ -41,13 +41,16 @@ def forecast_twl(
        # Process each site_id with a different process and combine results at the end
        with Pool(processes=n_processes) as pool:
            results = pool.starmap(
-                foreshore_slope_for_site_id, [(site_id, df_twl, df_profiles) for site_id in site_ids]
+                foreshore_slope_for_site_id,
+                [(site_id, df_twl, df_profiles) for site_id in site_ids],
            )
        df_twl["beta"] = pd.concat(results)

    elif slope == "mean":
        df_temp = df_twl.join(
-            df_profile_features.query("profile_type=='{}'".format(profile_type)).reset_index(level="profile_type"),
+            df_profile_features.query(
+                "profile_type=='{}'".format(profile_type)
+            ).reset_index(level="profile_type"),
            how="inner",
        )
        df_temp["mhw"] = 0.5
@ -59,19 +62,26 @@ def forecast_twl(
            df_temp.dune_toe_z.isnull(), "dune_crest_z"
        ]
        df_temp["top_x"] = df_temp["dune_toe_x"]
-        df_temp.loc[df_temp.dune_toe_x.isnull(), "top_x"] = df_temp.loc[df_temp.dune_toe_x.isnull(), "dune_crest_x"]
+        df_temp.loc[df_temp.dune_toe_x.isnull(), "top_x"] = df_temp.loc[
+            df_temp.dune_toe_x.isnull(), "dune_crest_x"
+        ]

        with Pool(processes=n_processes) as pool:
            results = pool.starmap(
                mean_slope_for_site_id,
-                [(site_id, df_temp, df_profiles, "top_elevation", "top_x", "mhw") for site_id in site_ids],
+                [
+                    (site_id, df_temp, df_profiles, "top_elevation", "top_x", "mhw")
+                    for site_id in site_ids
+                ],
            )

        df_twl["beta"] = pd.concat(results)

    # Estimate runup
    R2, setup, S_total, S_inc, S_ig = runup_function(
-        Hs0=df_twl["Hs0"].tolist(), Tp=df_twl["Tp"].tolist(), beta=df_twl["beta"].tolist()
+        Hs0=df_twl["Hs0"].tolist(),
+        Tp=df_twl["Tp"].tolist(),
+        beta=df_twl["beta"].tolist(),
    )

    df_twl["R2"] = R2
@ -80,7 +90,9 @@ def forecast_twl(

    # Estimate TWL
    df_twl["R_high"] = df_twl["tide"] + df_twl["R2"]
-    df_twl["R_low"] = df_twl["tide"] + 1.1 * df_twl["setup"] - 1.1 / 2 * df_twl["S_total"]
+    df_twl["R_low"] = (
+        df_twl["tide"] + 1.1 * df_twl["setup"] - 1.1 / 2 * df_twl["S_total"]
+    )

    # Drop unneeded columns
    # df_twl.drop(columns=["E", "Exs", "P", "Pxs", "dir"], inplace=True, errors="ignore")
@ -89,7 +101,13 @@ def forecast_twl(


 def mean_slope_for_site_id(
-    site_id, df_twl, df_profiles, top_elevation_col, top_x_col, btm_elevation_col, profile_type="prestorm"
+    site_id,
+    df_twl,
+    df_profiles,
+    top_elevation_col,
+    top_x_col,
+    btm_elevation_col,
+    profile_type="prestorm",
 ):
    """
    Calculates the foreshore slope values a given site_id. Returns a series (with same indicies as df_twl) of
@ -135,7 +153,9 @@ def foreshore_slope_for_site_id(site_id, df_twl, df_profiles):
    """

    # Get the prestorm beach profile
-    profile = df_profiles.query("site_id =='{}' and profile_type == 'prestorm'".format(site_id))
+    profile = df_profiles.query(
+        "site_id =='{}' and profile_type == 'prestorm'".format(site_id)
+    )
    profile_x = profile.index.get_level_values("x").tolist()
    profile_z = profile.z.tolist()

@ -175,8 +195,12 @@ def foreshore_slope_from_profile(profile_x, profile_z, tide, runup_function, **k
    # Initalize estimates
    max_number_iterations = 30
    iteration_count = 0
-    averaged_accuracy = 0.03  # if slopes within this amount, average after max number of iterations
-    acceptable_accuracy = 0.01  # if slopes within this amount, accept after max number of iterations
+    averaged_accuracy = (
+        0.03
+    )  # if slopes within this amount, average after max number of iterations
+    acceptable_accuracy = (
+        0.01
+    )  # if slopes within this amount, accept after max number of iterations
    preferred_accuracy = 0.001  # if slopes within this amount, accept
    beta = 0.05

@ -212,7 +236,15 @@ def foreshore_slope_from_profile(profile_x, profile_z, tide, runup_function, **k
        iteration_count += 1


-def slope_from_profile(profile_x, profile_z, top_elevation, btm_elevation, method="end_points", top_x=None, btm_x=None):
+def slope_from_profile(
+    profile_x,
+    profile_z,
+    top_elevation,
+    btm_elevation,
+    method="end_points",
+    top_x=None,
+    btm_x=None,
+):
    """
    Returns a slope (beta) from a bed profile, given the top and bottom elevations of where the slope should be taken.
    :param x: List of x bed profile coordinates
@ -260,7 +292,9 @@ def slope_from_profile(profile_x, profile_z, top_elevation, btm_elevation, metho
                end_points[end_type]["x"] = intersection_x[-1]
            else:
                # For bottom elevation, take most landward intersection that is seaward of top elevation
-                end_point_btm = [x for x in intersection_x if x > end_points["top"]["x"]]
+                end_point_btm = [
+                    x for x in intersection_x if x > end_points["top"]["x"]
+                ]
                if len(end_point_btm) == 0:
                    # If there doesn't seem to be an intersection seaward of the top elevation, return none.
                    return None
@ -275,7 +309,10 @@ def slope_from_profile(profile_x, profile_z, top_elevation, btm_elevation, metho
        return -(z_top - z_btm) / (x_top - x_btm)

    elif method == "least_squares":
-        profile_mask = [True if end_points["top"]["x"] < pts < end_points["btm"]["x"] else False for pts in profile_x]
+        profile_mask = [
+            True if end_points["top"]["x"] < pts < end_points["btm"]["x"] else False
+            for pts in profile_x
+        ]
        slope_x = np.array(profile_x)[profile_mask].tolist()
        slope_z = np.array(profile_z)[profile_mask].tolist()
        slope, _, _, _, _ = stats.linregress(slope_x, slope_z)
@ -287,12 +324,31 @@ def slope_from_profile(profile_x, profile_z, top_elevation, btm_elevation, metho
@click.option("--tides-csv", required=True, help="")
@click.option("--profiles-csv", required=True, help="")
@click.option("--profile-features-csv", required=True, help="")
-@click.option("--runup-function", required=True, help="", type=click.Choice(["sto06"]))
-@click.option("--slope", required=True, help="", type=click.Choice(["foreshore", "mean"]))
-@click.option("--profile-type", required=True, help="", type=click.Choice(["prestorm", "poststorm"]))
+@click.option(
+    "--runup-function",
+    required=True,
+    help="",
+    type=click.Choice(["sto06", "hol86", "nie91"]),
+)
+@click.option(
+    "--slope", required=True, help="", type=click.Choice(["foreshore", "mean"])
+)
+@click.option(
+    "--profile-type",
+    required=True,
+    help="",
+    type=click.Choice(["prestorm", "poststorm"]),
+)
@click.option("--output-file", required=True, help="")
 def create_twl_forecast(
-    waves_csv, tides_csv, profiles_csv, profile_features_csv, runup_function, slope, profile_type, output_file
+    waves_csv,
+    tides_csv,
+    profiles_csv,
+    profile_features_csv,
+    runup_function,
+    slope,
+    profile_type,
+    output_file,
 ):
    logger.info("Creating forecast of total water levels")
    logger.info("Importing data")
--- a/src/analysis/forecasted_storm_impacts.py
+++ b/src/analysis/forecasted_storm_impacts.py
@ -20,18 +20,24 @@ def forecasted_impacts(df_profile_features, df_forecasted_twl):
    """
    logger.info("Getting forecasted storm impacts")

-    df_forecasted_impacts = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique())
+    df_forecasted_impacts = pd.DataFrame(
+        index=df_profile_features.index.get_level_values("site_id").unique()
+    )

    # For each site, find the maximum R_high value and the corresponding R_low value.
    idx = df_forecasted_twl.groupby(level=["site_id"])["R_high"].idxmax().dropna()
-    df_r_vals = df_forecasted_twl.loc[idx, ["R_high", "R_low"]].reset_index(["datetime"])
-    df_forecasted_impacts = df_forecasted_impacts.merge(df_r_vals, how="left", left_index=True, right_index=True)
+    df_r_vals = df_forecasted_twl.loc[idx, ["R_high", "R_low"]].reset_index(
+        ["datetime"]
+    )
+    df_forecasted_impacts = df_forecasted_impacts.merge(
+        df_r_vals, how="left", left_index=True, right_index=True
+    )

    # Join with df_profile features to find dune toe and crest elevations
    df_forecasted_impacts = df_forecasted_impacts.merge(
-        df_profile_features.query("profile_type=='prestorm'").reset_index("profile_type")[
-            ["dune_toe_z", "dune_crest_z"]
-        ],
+        df_profile_features.query("profile_type=='prestorm'").reset_index(
+            "profile_type"
+        )[["dune_toe_z", "dune_crest_z"]],
        how="left",
        left_on=["site_id"],
        right_on=["site_id"],
@ -71,7 +77,12 @@ def storm_regime(df_forecasted_impacts):
    return df_forecasted_impacts


-def twl_exceedence_time(df_profile_features, df_forecasted_twl, z_twl_col="R_high", z_exceedence_col="dune_toe_z"):
+def twl_exceedence_time(
+    df_profile_features,
+    df_forecasted_twl,
+    z_twl_col="R_high",
+    z_exceedence_col="dune_toe_z",
+):
    """
    Returns a dataframe of number of hours the twl exceeded a certain z elevation.
    May need to use this https://stackoverflow.com/a/53656968 if datetimes are not consistent.
@ -85,11 +96,15 @@ def twl_exceedence_time(df_profile_features, df_forecasted_twl, z_twl_col="R_hig

    # Get a dataframe of prestorm dune toes organised by site_id
    df_dune_toes = (
-        df_profile_features.query('profile_type=="prestorm"').reset_index("profile_type")[z_exceedence_col].to_frame()
+        df_profile_features.query('profile_type=="prestorm"')
+        .reset_index("profile_type")[z_exceedence_col]
+        .to_frame()
    )

    # Merge dune toes into site_id
-    df_merged = df_forecasted_twl.merge(df_dune_toes, left_on=["site_id"], right_on=["site_id"])
+    df_merged = df_forecasted_twl.merge(
+        df_dune_toes, left_on=["site_id"], right_on=["site_id"]
+    )

    # Return the sum of hours that twl exceeded the level
    return (
@ -114,7 +129,9 @@ def create_forecasted_impacts(profile_features_csv, forecasted_twl_csv, output_f
    df_forecasted_impacts = forecasted_impacts(df_profile_features, df_forecasted_twl)

    df_forecasted_impacts = df_forecasted_impacts.merge(
-        twl_exceedence_time(df_profile_features, df_forecasted_twl), left_on=["site_id"], right_on=["site_id"]
+        twl_exceedence_time(df_profile_features, df_forecasted_twl),
+        left_on=["site_id"],
+        right_on=["site_id"],
    )

    df_forecasted_impacts.to_csv(output_file)
--- a/src/analysis/observed_storm_impacts.py
+++ b/src/analysis/observed_storm_impacts.py
@ -30,12 +30,16 @@ def volume_change(df_profiles, df_profile_features, zone):
    """
    logger.info("Calculating change in beach volume in {} zone".format(zone))

-    df_vol_changes = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique())
+    df_vol_changes = pd.DataFrame(
+        index=df_profile_features.index.get_level_values("site_id").unique()
+    )
    df_profiles = df_profiles.sort_index()
    sites = df_profiles.groupby(level=["site_id"])

    for site_id, df_site in sites:
-        logger.debug("Calculating change in beach volume at {} in {} zone".format(site_id, zone))
+        logger.debug(
+            "Calculating change in beach volume at {} in {} zone".format(site_id, zone)
+        )

        # TODO Change this query to an index
        query = "site_id=='{}'&profile_type=='prestorm'".format(site_id)
@ -51,7 +55,10 @@ def volume_change(df_profiles, df_profile_features, zone):
            prestorm_dune_toe_x = prestorm_dune_crest_x

        # If no prestorm and poststorm profiles, skip site and continue
-        profile_lengths = [len(df_site.query("profile_type == '{}'".format(x))) for x in ["prestorm", "poststorm"]]
+        profile_lengths = [
+            len(df_site.query("profile_type == '{}'".format(x)))
+            for x in ["prestorm", "poststorm"]
+        ]
        if any([length == 0 for length in profile_lengths]):
            continue

@ -60,13 +67,21 @@ def volume_change(df_profiles, df_profile_features, zone):
        df_zone = df_site.dropna(subset=["z"])
        x_last_obs = min(
            [
-                max(df_zone.query("profile_type == '{}'".format(profile_type)).index.get_level_values("x"))
+                max(
+                    df_zone.query(
+                        "profile_type == '{}'".format(profile_type)
+                    ).index.get_level_values("x")
+                )
                for profile_type in ["prestorm", "poststorm"]
            ]
        )
        x_first_obs = max(
            [
-                min(df_zone.query("profile_type == '{}'".format(profile_type)).index.get_level_values("x"))
+                min(
+                    df_zone.query(
+                        "profile_type == '{}'".format(profile_type)
+                    ).index.get_level_values("x")
+                )
                for profile_type in ["prestorm", "poststorm"]
            ]
        )
@ -100,16 +115,24 @@ def volume_change(df_profiles, df_profile_features, zone):

        # Volume change needs to be calculated including a tolerance for LIDAR accuracy. If difference between
        # profiles is less than 20 cm, consider them as zero difference.
-        prestorm_z = df_zone.query("profile_type=='prestorm'").reset_index("profile_type").z
-        poststorm_z = df_zone.query("profile_type=='poststorm'").reset_index("profile_type").z
+        prestorm_z = (
+            df_zone.query("profile_type=='prestorm'").reset_index("profile_type").z
+        )
+        poststorm_z = (
+            df_zone.query("profile_type=='poststorm'").reset_index("profile_type").z
+        )
        diff_z = prestorm_z - poststorm_z
        diff_z[abs(diff_z) < 0.2] = 0
-        diff_vol = beach_volume(x=diff_z.index.get_level_values("x"), z=diff_z, x_min=x_min, x_max=x_max)
+        diff_vol = beach_volume(
+            x=diff_z.index.get_level_values("x"), z=diff_z, x_min=x_min, x_max=x_max
+        )

        df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = prestorm_vol
        df_vol_changes.loc[site_id, "poststorm_{}_vol".format(zone)] = poststorm_vol
        df_vol_changes.loc[site_id, "{}_vol_change".format(zone)] = diff_vol
-        df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = diff_vol / prestorm_vol * 100
+        df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = (
+            diff_vol / prestorm_vol * 100
+        )

    return df_vol_changes

@ -142,8 +165,12 @@ def storm_regime(df_observed_impacts):
    """
    logger.info("Getting observed storm regimes")

-    swash = (df_observed_impacts.dune_face_pct_change <= 2) & (df_observed_impacts.dune_face_vol_change <= 3)
-    collision = (df_observed_impacts.dune_face_pct_change >= 2) | (df_observed_impacts.dune_face_vol_change > 3)
+    swash = (df_observed_impacts.dune_face_pct_change <= 2) & (
+        df_observed_impacts.dune_face_vol_change <= 3
+    )
+    collision = (df_observed_impacts.dune_face_pct_change >= 2) | (
+        df_observed_impacts.dune_face_vol_change > 3
+    )

    df_observed_impacts.loc[swash, "storm_regime"] = "swash"
    df_observed_impacts.loc[collision, "storm_regime"] = "collision"
@ -160,7 +187,9 @@ def overwrite_impacts(df_observed_impacts, df_raw_features):
    :param df_raw_profile_features:
    :return:
    """
-    df_observed_impacts.update(df_raw_features.rename(columns={"observed_storm_regime": "storm_regime"}))
+    df_observed_impacts.update(
+        df_raw_features.rename(columns={"observed_storm_regime": "storm_regime"})
+    )
    return df_observed_impacts


@ -169,11 +198,15 @@ def overwrite_impacts(df_observed_impacts, df_raw_features):
@click.option("--profile-features-crest-toes-csv", required=True, help="")
@click.option("--raw-profile-features-csv", required=True, help="")
@click.option("--output-file", required=True, help="")
-def create_observed_impacts(profiles_csv, profile_features_crest_toes_csv, raw_profile_features_csv, output_file):
+def create_observed_impacts(
+    profiles_csv, profile_features_crest_toes_csv, raw_profile_features_csv, output_file
+):

    profiles_csv = "./data/interim/profiles.csv"
    profile_features_crest_toes_csv = "./data/interim/profile_features_crest_toes.csv"
-    raw_profile_features_csv = "./data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv"
+    raw_profile_features_csv = (
+        "./data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv"
+    )

    logger.info("Creating observed wave impacts")
    logger.info("Importing data")
@ -181,12 +214,18 @@ def create_observed_impacts(profiles_csv, profile_features_crest_toes_csv, raw_p
    df_profile_features = pd.read_csv(profile_features_crest_toes_csv, index_col=[0, 1])

    logger.info("Creating new dataframe for observed impacts")
-    df_observed_impacts = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique())
+    df_observed_impacts = pd.DataFrame(
+        index=df_profile_features.index.get_level_values("site_id").unique()
+    )

    logger.info("Getting pre/post storm volumes")
    df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone="swash")
-    df_dune_face_vol_changes = volume_change(df_profiles, df_profile_features, zone="dune_face")
-    df_observed_impacts = df_observed_impacts.join([df_swash_vol_changes, df_dune_face_vol_changes])
+    df_dune_face_vol_changes = volume_change(
+        df_profiles, df_profile_features, zone="dune_face"
+    )
+    df_observed_impacts = df_observed_impacts.join(
+        [df_swash_vol_changes, df_dune_face_vol_changes]
+    )

    # Classify regime based on volume changes
    df_observed_impacts = storm_regime(df_observed_impacts)
--- a/src/analysis/runup_models.py
+++ b/src/analysis/runup_models.py
@ -10,14 +10,20 @@ def sto06(Hs0, Tp, beta):
    :return: Float or list of R2, setup, S_total, S_inc and S_ig values
    """

-    df = pd.DataFrame({"Hs0": Hs0, "Tp": Tp, "beta": beta}, index=[x for x in range(0, np.size(Hs0))])
+    df = pd.DataFrame(
+        {"Hs0": Hs0, "Tp": Tp, "beta": beta}, index=[x for x in range(0, np.size(Hs0))]
+    )

    df["Lp"] = 9.8 * df["Tp"] ** 2 / 2 / np.pi

    # General equation
    df["S_ig"] = pd.to_numeric(0.06 * np.sqrt(df["Hs0"] * df["Lp"]), errors="coerce")
-    df["S_inc"] = pd.to_numeric(0.75 * df["beta"] * np.sqrt(df["Hs0"] * df["Lp"]), errors="coerce")
-    df["setup"] = pd.to_numeric(0.35 * df["beta"] * np.sqrt(df["Hs0"] * df["Lp"]), errors="coerce")
+    df["S_inc"] = pd.to_numeric(
+        0.75 * df["beta"] * np.sqrt(df["Hs0"] * df["Lp"]), errors="coerce"
+    )
+    df["setup"] = pd.to_numeric(
+        0.35 * df["beta"] * np.sqrt(df["Hs0"] * df["Lp"]), errors="coerce"
+    )
    df["S_total"] = np.sqrt(df["S_inc"] ** 2 + df["S_ig"] ** 2)
    df["R2"] = 1.1 * (df["setup"] + df["S_total"] / 2)

@ -37,6 +43,76 @@ def sto06(Hs0, Tp, beta):
    )


+def hol86(Hs0, Tp, beta):
+
+    df = pd.DataFrame(
+        {"Hs0": Hs0, "Tp": Tp, "beta": beta}, index=[x for x in range(0, np.size(Hs0))]
+    )
+
+    df["Lp"] = 9.8 * df["Tp"] ** 2 / 2 / np.pi
+
+    df["setup"] = 0.2 * df["Hs0"]
+    df["R2"] = 0.83 * df["beta"] * np.sqrt(df["Hs0"] * df["Lp"]) + df["setup"]
+
+    df["S_ig"] = np.nan
+    df["S_inc"] = np.nan
+    df["S_total"] = np.nan
+
+    return (
+        float_or_list(df["R2"].tolist()),
+        float_or_list(df["setup"].tolist()),
+        float_or_list(df["S_total"].tolist()),
+        float_or_list(df["S_inc"].tolist()),
+        float_or_list(df["S_ig"].tolist()),
+    )
+
+
+def nie91(Hs0, Tp, beta):
+
+    df = pd.DataFrame(
+        {"Hs0": Hs0, "Tp": Tp, "beta": beta}, index=[x for x in range(0, np.size(Hs0))]
+    )
+
+    df["Lp"] = 9.8 * df["Tp"] ** 2 / 2 / np.pi
+    df["Ls"] = df["Lp"]  # Need to make this approximation, refer to Atkinson 2017
+    df["Hrms"] = df["Hs0"] / np.sqrt(
+        2
+    )  # Acceptable approximation, refer to Atkinson 2017
+
+    df.loc[df.beta >= 0.1, "LR"] = 0.6 * df["beta"] * np.sqrt(df["Hrms"] * df["Ls"])
+    df.loc[df.beta < 0.1, "LR"] = 0.06 * np.sqrt(df["Hrms"] * df["Ls"])
+
+    df["R2"] = 1.98 * df["LR"]
+
+    # Note that this should be the level above Z100%, which in this case is taken as the time varying tide level,
+    # even though minimum run-down can still occur below tide SWL.
+
+    df["setup"] = np.nan
+    df["S_ig"] = np.nan
+    df["S_inc"] = np.nan
+    df["S_total"] = np.nan
+
+    return (
+        float_or_list(df["R2"].tolist()),
+        float_or_list(df["setup"].tolist()),
+        float_or_list(df["S_total"].tolist()),
+        float_or_list(df["S_inc"].tolist()),
+        float_or_list(df["S_ig"].tolist()),
+    )
+
+
+def atk18(Hs0, Tp, beta):
+    pass
+
+
+def pow18(Hs0, Tp, beta):
+    pass
+
+
+def beu(Hs0, Tp, beta):
+    pass
+
+
 def float_or_list(a):
    """
    If only one value in the array, return the float, else return a list
--- a/src/cli.py
+++ b/src/cli.py
@ -33,4 +33,5 @@ if __name__ == "__main__":
    cli.add_command(parse_mat.create_sites_and_profiles_csv)
    cli.add_command(parse_mat.create_tides_csv)
    cli.add_command(parse_mat.create_waves_csv)
+    cli.add_command(parse_mat.create_grain_size_csv)
    cli()
--- a/src/data/csv_to_geojson.py
+++ b/src/data/csv_to_geojson.py
@ -16,7 +16,9 @@ from logs import setup_logging
 logger = setup_logging()


-def lat_lon_from_profile_x_coord(center_lat_lon, orientation, center_profile_x, x_coord):
+def lat_lon_from_profile_x_coord(
+    center_lat_lon, orientation, center_profile_x, x_coord
+):
    """
    Returns the lat/lon of a point on a profile with the given x_coord
    :param center_lat_lon: Shapely point of lat/lon of profile center
@ -31,7 +33,9 @@ def lat_lon_from_profile_x_coord(center_lat_lon, orientation, center_profile_x,
    point_x = center_x + (center_profile_x - x_coord) * np.cos(np.deg2rad(orientation))
    point_y = center_y + (center_profile_x - x_coord) * np.sin(np.deg2rad(orientation))
    point_xy = Point(point_x, point_y)
-    point_lat_lon = convert_coord_systems(point_xy, in_coord_system="EPSG:28356", out_coord_system="EPSG:4326")
+    point_lat_lon = convert_coord_systems(
+        point_xy, in_coord_system="EPSG:28356", out_coord_system="EPSG:4326"
+    )
    return point_lat_lon


@ -41,7 +45,9 @@ def lat_lon_from_profile_x_coord(center_lat_lon, orientation, center_profile_x,
@click.option("--crest-toes-csv", required=True, help=".csv file to convert")
@click.option("--impacts-csv", required=True, help=".csv file to convert")
@click.option("--output-geojson", required=True, help="where to store .geojson file")
-def R_high_to_geojson(sites_csv, profiles_csv, crest_toes_csv, impacts_csv, output_geojson):
+def R_high_to_geojson(
+    sites_csv, profiles_csv, crest_toes_csv, impacts_csv, output_geojson
+):
    """
    Converts impact R_high into a lat/lon geojson that we can plot in QGIS
    :param sites_csv:
@ -58,10 +64,14 @@ def R_high_to_geojson(sites_csv, profiles_csv, crest_toes_csv, impacts_csv, outp
    # Create geojson file
    schema = {
        "geometry": "Point",
-        "properties": OrderedDict([("beach", "str"), ("site_id", "str"), ("elevation", "float")]),
+        "properties": OrderedDict(
+            [("beach", "str"), ("site_id", "str"), ("elevation", "float")]
+        ),
    }

-    with fiona.open(output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema) as output:
+    with fiona.open(
+        output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema
+    ) as output:
        for index, row in df_impacts.iterrows():

            site_id = index
@ -72,12 +82,18 @@ def R_high_to_geojson(sites_csv, profiles_csv, crest_toes_csv, impacts_csv, outp

            # Get poststorm profile
            df_profile = df_profiles.loc[(site_id, "prestorm")]
-            int_x = crossings(df_profile.index.get_level_values("x").tolist(), df_profile.z.tolist(), R_high_z)
+            int_x = crossings(
+                df_profile.index.get_level_values("x").tolist(),
+                df_profile.z.tolist(),
+                R_high_z,
+            )

            # Take the intersection closest to the dune face.
            try:
-                x_cols = [x for x in df_crest_toes.columns if '_x' in x]
-                dune_face_x = np.mean(df_crest_toes.loc[(site_id, "prestorm"),x_cols].tolist())
+                x_cols = [x for x in df_crest_toes.columns if "_x" in x]
+                dune_face_x = np.mean(
+                    df_crest_toes.loc[(site_id, "prestorm"), x_cols].tolist()
+                )
                int_x = min(int_x, key=lambda x: abs(x - dune_face_x))
            except:
                continue
@ -91,7 +107,9 @@ def R_high_to_geojson(sites_csv, profiles_csv, crest_toes_csv, impacts_csv, outp
                x_coord=int_x,
            )

-            prop = OrderedDict([("beach", beach), ("site_id", site_id), ("elevation", R_high_z)])
+            prop = OrderedDict(
+                [("beach", beach), ("site_id", site_id), ("elevation", R_high_z)]
+            )
            output.write({"geometry": mapping(point_lat_lon), "properties": prop})


@ -99,7 +117,9 @@ def R_high_to_geojson(sites_csv, profiles_csv, crest_toes_csv, impacts_csv, outp
@click.option("--sites-csv", required=True, help=".csv file to convert")
@click.option("--profile-features-csv", required=True, help=".csv file to convert")
@click.option("--output-geojson", required=True, help="where to store .geojson file")
-def profile_features_crest_toes_to_geojson(sites_csv, profile_features_csv, output_geojson):
+def profile_features_crest_toes_to_geojson(
+    sites_csv, profile_features_csv, output_geojson
+):
    """
    Converts profile_features containing dune toes and crest locations to a geojson we can load into QGIS
    :param sites_csv:
@ -131,7 +151,9 @@ def profile_features_crest_toes_to_geojson(sites_csv, profile_features_csv, outp
        ),
    }

-    with fiona.open(output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema) as output:
+    with fiona.open(
+        output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema
+    ) as output:
        for index, row in df_profile_features.iterrows():
            beach = index[:-4]
            site_id = index
@ -185,9 +207,14 @@ def sites_csv_to_geojson(input_csv, output_geojson):
    df_sites = pd.read_csv(input_csv, index_col=[0])
    logger.info(os.environ.get("GDAL_DATA", None))

-    schema = {"geometry": "LineString", "properties": OrderedDict([("beach", "str"), ("site_id", "str")])}
+    schema = {
+        "geometry": "LineString",
+        "properties": OrderedDict([("beach", "str"), ("site_id", "str")]),
+    }

-    with fiona.open(output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema) as output:
+    with fiona.open(
+        output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema
+    ) as output:
        for index, row in df_sites.iterrows():

            center_lat_lon = Point(row["lon"], row["lat"])
@ -215,10 +242,16 @@ def sites_csv_to_geojson(input_csv, output_geojson):

@click.command()
@click.option("--sites-csv", required=True, help="sites.csv file to convert")
-@click.option("--observed-impacts-csv", required=True, help="impacts-observed.csv file to convert")
-@click.option("--forecast-impacts-csv", required=True, help="impacts-forecast.csv file to convert")
+@click.option(
+    "--observed-impacts-csv", required=True, help="impacts-observed.csv file to convert"
+)
+@click.option(
+    "--forecast-impacts-csv", required=True, help="impacts-forecast.csv file to convert"
+)
@click.option("--output-geojson", required=True, help="where to store .geojson file")
-def impacts_to_geojson(sites_csv, observed_impacts_csv, forecast_impacts_csv, output_geojson):
+def impacts_to_geojson(
+    sites_csv, observed_impacts_csv, forecast_impacts_csv, output_geojson
+):
    """
    Converts impacts observed and forecasted to a geojson for visualization in QGIS
    :param sites_csv:
@ -231,7 +264,9 @@ def impacts_to_geojson(sites_csv, observed_impacts_csv, forecast_impacts_csv, ou
    # Get information from .csv and read into pandas dataframe
    df_sites = pd.read_csv(sites_csv, index_col=[0])
    df_observed = pd.read_csv(observed_impacts_csv, index_col=[0])
-    df_forecast = pd.read_csv(forecast_impacts_csv, index_col=[0]).rename({"storm_regime": "forecast_storm_regime"})
+    df_forecast = pd.read_csv(forecast_impacts_csv, index_col=[0]).rename(
+        {"storm_regime": "forecast_storm_regime"}
+    )

    # Rename columns, so we can distinguish between forecast and observed
    df_observed = df_observed.rename(columns={"storm_regime": "observed_storm_regime"})
@ -241,7 +276,9 @@ def impacts_to_geojson(sites_csv, observed_impacts_csv, forecast_impacts_csv, ou
    df = pd.concat([df_sites, df_observed, df_forecast], sort=True, axis=1)

    # Make new column for accuracy of forecast. Use underpredict/correct/overpredict classes
-    df.loc[df.observed_storm_regime == df.forecast_storm_regime, "forecast_accuray"] = "correct"
+    df.loc[
+        df.observed_storm_regime == df.forecast_storm_regime, "forecast_accuray"
+    ] = "correct"

    # Observed/Forecasted/Class for each combination
    classes = [
@ -256,7 +293,10 @@ def impacts_to_geojson(sites_csv, observed_impacts_csv, forecast_impacts_csv, ou
        ("overwash", "overwash", "correct"),
    ]
    for c in classes:
-        df.loc[(df.observed_storm_regime == c[0]) & (df.forecast_storm_regime == c[1]), "forecast_accuracy"] = c[2]
+        df.loc[
+            (df.observed_storm_regime == c[0]) & (df.forecast_storm_regime == c[1]),
+            "forecast_accuracy",
+        ] = c[2]

    schema = {
        "geometry": "Point",
@ -271,7 +311,9 @@ def impacts_to_geojson(sites_csv, observed_impacts_csv, forecast_impacts_csv, ou
        ),
    }

-    with fiona.open(output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema) as output:
+    with fiona.open(
+        output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema
+    ) as output:
        for index, row in df.iterrows():

            # Locate the marker at the seaward end of the profile to avoid cluttering the coastline.
--- a/src/data/parse_mat.py
+++ b/src/data/parse_mat.py
@ -27,10 +27,19 @@ def parse_crest_toes(df_raw_features, df_profiles):
    # Puts profiles_features_csv into format expected by rest of analysis
    df_crest_toes = df_raw_features.reset_index().melt(
        id_vars=["site_id"],
-        value_vars=["prestorm_dune_crest_x", "prestorm_dune_toe_x", "poststorm_dune_crest_x", "poststorm_dune_toe_x"],
+        value_vars=[
+            "prestorm_dune_crest_x",
+            "prestorm_dune_toe_x",
+            "poststorm_dune_crest_x",
+            "poststorm_dune_toe_x",
+        ],
+    )
+    df_crest_toes["profile_type"] = df_crest_toes.variable.str.extract(
+        r"(prestorm|poststorm)"
+    )
+    df_crest_toes["point_type"] = df_crest_toes.variable.str.extract(
+        r"(dune_crest_x|dune_toe_x)"
    )
-    df_crest_toes["profile_type"] = df_crest_toes.variable.str.extract(r"(prestorm|poststorm)")
-    df_crest_toes["point_type"] = df_crest_toes.variable.str.extract(r"(dune_crest_x|dune_toe_x)")
    df_crest_toes = df_crest_toes.drop(columns=["variable"])
    df_crest_toes = df_crest_toes.sort_values("site_id")
    df_crest_toes = df_crest_toes.set_index(["site_id", "profile_type", "point_type"])
@ -52,7 +61,9 @@ def parse_crest_toes(df_raw_features, df_profiles):
                x_val = df_crest_toes.loc[(site_id, param), "dune_{}_x".format(loc)]

                if np.isnan(x_val):
-                    df_crest_toes.loc[(site_id, param), "dune_{}_z".format(loc)] = np.nan
+                    df_crest_toes.loc[
+                        (site_id, param), "dune_{}_z".format(loc)
+                    ] = np.nan
                    continue

                # Try get the value from the other profile if we return nan or empty dataframe
@ -121,7 +132,9 @@ def parse_dune_crest_toes(df_sites, crest_mat, toe_mat):

    # Want the site_id instead of the site_no, so merge in df_sites
    df_sites.reset_index(inplace=True)
-    df_profile_features = df_sites[["site_no", "site_id"]].merge(df_profile_features, how="outer", on=["site_no"])
+    df_profile_features = df_sites[["site_no", "site_id"]].merge(
+        df_profile_features, how="outer", on=["site_no"]
+    )
    df_profile_features.drop(columns=["site_no"], inplace=True)
    df_profile_features.set_index(["site_id", "profile_type"], inplace=True)
    df_profile_features.sort_index(inplace=True)
@ -155,6 +168,10 @@ def parse_waves(waves_mat):
                    "P": mat_data["P"][i][j][0],
                    "Exs": mat_data["Exs"][i][j][0],
                    "Pxs": mat_data["Pxs"][i][j][0],
+                    "Ecum": mat_data["Ecum"][i],
+                    "Exscum": mat_data["Exscum"][i],
+                    "Pcum": mat_data["Pxscum"][i],
+                    "Pxscum": mat_data["Pxscum"][i],
                }
            )

@ -257,7 +274,9 @@ def parse_profiles_and_sites(profiles_mat):
                    z = z_site[j]
                    land_lim = mat_data["landlims"][i][j]

-                survey_datetime = matlab_datenum_to_datetime(mat_data["surveydates"][i][j])
+                survey_datetime = matlab_datenum_to_datetime(
+                    mat_data["surveydates"][i][j]
+                )

                # Keep a record of the where the center of the profile is located, and the locations of the land
                # and sea
@ -333,7 +352,9 @@ def remove_zeros(df_profiles):
        )
        df_profile = df_profiles[idx_site]
        x_last_ele = df_profile[df_profile.z == 0].index.get_level_values("x")[0]
-        df_profiles.loc[idx_site & (df_profiles.index.get_level_values("x") > x_last_ele), "z"] = np.nan
+        df_profiles.loc[
+            idx_site & (df_profiles.index.get_level_values("x") > x_last_ele), "z"
+        ] = np.nan
    logger.info("Removed zeros from end of profiles")

    return df_profiles
@ -345,7 +366,11 @@ def matlab_datenum_to_datetime(matlab_datenum):
    :param matlab_datenum:
    :return:
    """
-    return datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum % 1) - timedelta(days=366)
+    return (
+        datetime.fromordinal(int(matlab_datenum))
+        + timedelta(days=matlab_datenum % 1)
+        - timedelta(days=366)
+    )


 def replace_unique_sites(df, df_sites):
@ -359,7 +384,10 @@ def replace_unique_sites(df, df_sites):
    df_sites["site_id"] = df_sites.index.get_level_values("site_id")

    # Create eastings and northings so we can calculate distances
-    site_points = [convert_coord_systems(Point(lon, lat)).xy for lon, lat in zip(df_sites["lon"], df_sites["lat"])]
+    site_points = [
+        convert_coord_systems(Point(lon, lat)).xy
+        for lon, lat in zip(df_sites["lon"], df_sites["lat"])
+    ]
    df_sites["easting"] = [x[0][0] for x in site_points]
    df_sites["northing"] = [x[1][0] for x in site_points]

@ -369,41 +397,80 @@ def replace_unique_sites(df, df_sites):

        # Calculate distances from each point to each site and determine closest site
        easting, northing = [x[0] for x in convert_coord_systems(Point(lon, lat)).xy]
-        distances_to_sites = np.sqrt((df_sites["easting"] - easting) ** 2 + (df_sites["northing"] - northing) ** 2)
+        distances_to_sites = np.sqrt(
+            (df_sites["easting"] - easting) ** 2
+            + (df_sites["northing"] - northing) ** 2
+        )
        min_distance = distances_to_sites.min()
        closest_site = distances_to_sites.idxmin()

        # Do some logging so we can check later.
        if min_distance > 1:
-            logger.warning("Closest site to (%.4f,%.4f) is %s (%.2f m away)", lat, lon, closest_site, min_distance)
+            logger.warning(
+                "Closest site to (%.4f,%.4f) is %s (%.2f m away)",
+                lat,
+                lon,
+                closest_site,
+                min_distance,
+            )
        else:
-            logger.info("Closest site to (%.4f,%.4f) is %s (%.2f m away)", lat, lon, closest_site, min_distance)
+            logger.info(
+                "Closest site to (%.4f,%.4f) is %s (%.2f m away)",
+                lat,
+                lon,
+                closest_site,
+                min_distance,
+            )

        # Assign site_id based on closest site
        df.loc[df_group.index, "site_id"] = closest_site

    nan_count = df.site_id.isna().sum()
    if nan_count > 0:
-        logger.warning("Not all records (%d of %d) matched with a unique site", nan_count, len(df))
+        logger.warning(
+            "Not all records (%d of %d) matched with a unique site", nan_count, len(df)
+        )

    df = df.drop(columns=["lat", "lon", "beach"])

    return df


+def split_site_wave_params(df_waves):
+    """
+    When we parse the waves.mat file, the cumulative wave energy and power properties are given for each time step.
+    This is unnecessary, so let's extract them out of our dataframe and put them in their own seperate dataframe.
+    :param df_waves:
+    :return:
+    """
+    cols_to_extract = ["Ecum", "Exscum", "Pcum", "Pxscum"]
+
+    df_sites_waves = df_waves.loc[:, cols_to_extract].groupby(["site_id"]).first()
+    df_waves = df_waves.drop(columns=cols_to_extract, errors="ignore")
+    return df_waves, df_sites_waves
+
+
@click.command(short_help="create waves.csv")
@click.option("--waves-mat", required=True, help=".mat file containing wave records")
-@click.option("--sites-csv", required=True, help=".csv file description of cross section sites")
-@click.option("--output-file", required=True, help="where to save waves.csv")
-def create_waves_csv(waves_mat, sites_csv, output_file):
-    logger.info("Creating %s", output_file)
+@click.option(
+    "--sites-csv", required=True, help=".csv file description of cross section sites"
+)
+@click.option("--waves-output-file", required=True, help="where to save waves.csv")
+@click.option(
+    "--sites-waves-output-file", required=True, help="where to save sites_waves.csv"
+)
+def create_waves_csv(waves_mat, sites_csv, waves_output_file, sites_waves_output_file):
+    logger.info("Creating %s", waves_output_file)
    df_waves = parse_waves(waves_mat=waves_mat)
    df_sites = pd.read_csv(sites_csv, index_col=[0])
    df_waves = replace_unique_sites(df_waves, df_sites)
    df_waves.set_index(["site_id", "datetime"], inplace=True)
    df_waves.sort_index(inplace=True)
-    df_waves.to_csv(output_file)
-    logger.info("Created %s", output_file)
+    df_waves, df_sites_waves = split_site_wave_params(df_waves)
+    df_waves.to_csv(waves_output_file)
+    df_sites_waves.to_csv(sites_waves_output_file)
+    logger.info("Created %s", waves_output_file)
+    logger.info("Created %s", sites_waves_output_file)


 # @click.command(short_help="create profile_features.csv")
@ -420,7 +487,9 @@ def create_waves_csv(waves_mat, sites_csv, output_file):


@click.command(short_help="create profile_features.csv")
-@click.option("--profile-features-csv", required=True, help=".mat file containing wave records")
+@click.option(
+    "--profile-features-csv", required=True, help=".mat file containing wave records"
+)
@click.option("--profiles-csv", required=True, help=".mat file containing wave records")
@click.option("--output-file", required=True, help="where to save waves.csv")
 def create_crest_toes(profile_features_csv, profiles_csv, output_file):
@ -435,10 +504,16 @@ def create_crest_toes(profile_features_csv, profiles_csv, output_file):


@click.command(short_help="create profiles.csv")
-@click.option("--profiles-mat", required=True, help=".mat file containing beach profiles")
-@click.option("--profiles-output-file", required=True, help="where to save profiles.csv")
+@click.option(
+    "--profiles-mat", required=True, help=".mat file containing beach profiles"
+)
+@click.option(
+    "--profiles-output-file", required=True, help="where to save profiles.csv"
+)
@click.option("--sites-output-file", required=True, help="where to save sites.csv")
-def create_sites_and_profiles_csv(profiles_mat, profiles_output_file, sites_output_file):
+def create_sites_and_profiles_csv(
+    profiles_mat, profiles_output_file, sites_output_file
+):
    logger.info("Creating sites and profiles csvs")
    df_profiles, df_sites = parse_profiles_and_sites(profiles_mat=profiles_mat)
    df_profiles.set_index(["site_id", "profile_type", "x"], inplace=True)
@ -456,7 +531,9 @@ def create_sites_and_profiles_csv(profiles_mat, profiles_output_file, sites_outp

@click.command(short_help="create profiles.csv")
@click.option("--tides-mat", required=True, help=".mat file containing tides")
-@click.option("--sites-csv", required=True, help=".csv file description of cross section sites")
+@click.option(
+    "--sites-csv", required=True, help=".csv file description of cross section sites"
+)
@click.option("--output-file", required=True, help="where to save tides.csv")
 def create_tides_csv(tides_mat, sites_csv, output_file):
    logger.info("Creating %s", output_file)
@ -469,6 +546,23 @@ def create_tides_csv(tides_mat, sites_csv, output_file):
    logger.info("Created %s", output_file)


+@click.command(short_help="create sites_grain_size.csv")
+@click.option(
+    "--grain-size-csv",
+    required=True,
+    help=".csv file description of cross section sites",
+)
+@click.option("--output-file", required=True, help="where to save sites_grain_size.csv")
+def create_grain_size_csv(grain_size_csv, output_file):
+    logger.info("Creating %s", output_file)
+    df_sites_grain_size = pd.read_csv(grain_size_csv, index_col=[0])
+
+    # Calculate roughness, refer to Power et al (2018)
+    df_sites_grain_size["r"] = 2.5 * df_sites_grain_size["d_50"]
+    df_sites_grain_size.to_csv(output_file)
+    logger.info("Created %s", output_file)
+
+
@click.group()
 def cli():
    pass
--- a/src/data/parse_shp.py
+++ b/src/data/parse_shp.py
@ -31,7 +31,9 @@ def shapes_from_shp(shp_file):
    return shapes, ids, properties


-def distance_to_intersection(lat, lon, landward_orientation, beach, line_strings, line_properties):
+def distance_to_intersection(
+    lat, lon, landward_orientation, beach, line_strings, line_properties
+):
    """
    Returns the distance at whjch a line drawn from a lat/lon at an orientation intersects a line stinrg
    :param lat:
@ -58,7 +60,9 @@ def distance_to_intersection(lat, lon, landward_orientation, beach, line_strings
    seaward_line = LineString([start_point, seaward_point])

    # Look at relevant line_strings which have the same beach property in order to reduce computation time
-    line_strings = [s for s, p in zip(line_strings, line_properties) if p["beach"] == beach]
+    line_strings = [
+        s for s, p in zip(line_strings, line_properties) if p["beach"] == beach
+    ]

    # Check whether profile_line intersects with any lines in line_string. If intersection point is landwards,
    # consider this negative, otherwise seawards is positive.
@ -89,7 +93,9 @@ def beach_profile_elevation(x_coord, df_profiles, profile_type, site_id):
        return None

    # Get profile
-    df_profile = df_profiles.query('profile_type == "{}" and site_id =="{}"'.format(profile_type, site_id))
+    df_profile = df_profiles.query(
+        'profile_type == "{}" and site_id =="{}"'.format(profile_type, site_id)
+    )
    return np.interp(x_coord, df_profile.index.get_level_values("x"), df_profile["z"])


@ -102,7 +108,10 @@ def parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp):
    # Get site information. Base our profile features on each site
    df_profile_features = df_sites

-    features = {"dune_crest": {"file": dune_crest_shp}, "dune_toe": {"file": dune_toe_shp}}
+    features = {
+        "dune_crest": {"file": dune_crest_shp},
+        "dune_toe": {"file": dune_toe_shp},
+    }

    # Import our dune crest and toes
    for feat in features.keys():
@ -112,19 +121,31 @@ def parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp):
        # Figure out the x coordinates of our crest and toes, by looking at where our beach sections intersect our
        # shape files.
        col_name = "{}_x".format(feat)
-        df_profile_features[col_name] = df_profile_features["profile_x_lat_lon"] + df_profile_features.apply(
+        df_profile_features[col_name] = df_profile_features[
+            "profile_x_lat_lon"
+        ] + df_profile_features.apply(
            lambda row: distance_to_intersection(
-                row["lat"], row["lon"], row["orientation"], row["beach"], shapes, properties
+                row["lat"],
+                row["lon"],
+                row["orientation"],
+                row["beach"],
+                shapes,
+                properties,
            ),
            axis=1,
        )
        # Get the elevations of the crest and toe
        col_name = "{}_z".format(feat)
        df_profile_features[col_name] = df_profile_features.apply(
-            lambda row: beach_profile_elevation(row["{}_x".format(feat)], df_profiles, "prestorm", row.name), axis=1
+            lambda row: beach_profile_elevation(
+                row["{}_x".format(feat)], df_profiles, "prestorm", row.name
+            ),
+            axis=1,
        )

-    df_profile_features = df_profile_features.drop(columns=["beach", "lat", "lon", "orientation", "profile_x_lat_lon"])
+    df_profile_features = df_profile_features.drop(
+        columns=["beach", "lat", "lon", "orientation", "profile_x_lat_lon"]
+    )
    return df_profile_features


@ -134,10 +155,14 @@ def parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp):
@click.option("--sites-csv", required=True, help="where to store .shp file")
@click.option("--profiles-csv", required=True, help="where to store .shp file")
@click.option("--output-csv", required=True, help="where to store .shp file")
-def create_profile_features(dune_crest_shp, dune_toe_shp, sites_csv, profiles_csv, output_csv):
+def create_profile_features(
+    dune_crest_shp, dune_toe_shp, sites_csv, profiles_csv, output_csv
+):
    logger.info("Creating .csv of dune crests and toes")
    df_sites = pd.read_csv(sites_csv, index_col=[0])
    df_profiles = pd.read_csv(profiles_csv, index_col=[0, 1, 2])
-    df_profile_features = parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp)
+    df_profile_features = parse_profile_features(
+        df_sites, df_profiles, dune_crest_shp, dune_toe_shp
+    )
    df_profile_features.to_csv(output_csv)
    logger.info("Done!")
--- a/src/utils.py
+++ b/src/utils.py
@ -32,11 +32,16 @@ def crossings(profile_x, profile_z, constant_z):
    indicies = np.where(z[:-1] * z[1:] < 0)[0]

    # Use linear interpolation to find intersample crossings.
-    return [profile_x[i] - (profile_x[i] - profile_x[i + 1]) / (z[i] - z[i + 1]) * (z[i]) for i in indicies]
+    return [
+        profile_x[i] - (profile_x[i] - profile_x[i + 1]) / (z[i] - z[i + 1]) * (z[i])
+        for i in indicies
+    ]


 # TODO Think of a better way to do this than having to manually specify the coordinate systems
-def convert_coord_systems(g1, in_coord_system="EPSG:4326", out_coord_system="EPSG:28356"):
+def convert_coord_systems(
+    g1, in_coord_system="EPSG:4326", out_coord_system="EPSG:28356"
+):
    """
    Converts coordinates from one coordinates system to another. Needed because shapefiles are usually defined in
    lat/lon but should be converted to GDA to calculated distances.
Author	SHA1	Message	Date
Chris Leaman	821e5c34cf	Include grain size inputs	7 years ago
Chris Leaman	6c13fc389c	Parse cumulative wave power and energy from mat file into .csv	7 years ago
Chris Leaman	d4995266c9	Create new notebook for comparing models	7 years ago
Chris Leaman	3bfb13e9d6	Rename notebooks	7 years ago
Chris Leaman	d8353bd635	Add Nielsen & Hanslow 1991 runup parameterization	7 years ago
Chris Leaman	4e5b0ce81c	Fix formatting	7 years ago
Chris Leaman	43e85224e8	Remove pre-commit	7 years ago
Chris Leaman	0adbf68635	Add Holman 1986 to list of runup functions	7 years ago
Chris Leaman	7526ab1f3c	Add pre-commit hooks to ensure code formatting	7 years ago
Chris Leaman	0713a1302a	Update environment	7 years ago
Chris Leaman	5850871c14	Update notebooks	7 years ago
Chris Leaman	1bad6f6dd7	Update .gitignore	7 years ago