From 46589da736405282cc985619c4d8df4faa98485d Mon Sep 17 00:00:00 2001
From: Chris Leaman <ckleaman@gmail.com>
Date: Thu, 28 Feb 2019 11:27:33 +1100
Subject: [PATCH] Update notebooks

---
 notebooks/07_evaluate_model_performance.ipynb | 492 ++++++++++++++++++
 notebooks/07_longshore_plotting.ipynb         | 348 -------------
 ...ynb => 09_superseded_run_comparison.ipynb} |   0
 notebooks/10_profile_clustering.ipynb         | 186 +++++++
 4 files changed, 678 insertions(+), 348 deletions(-)
 create mode 100644 notebooks/07_evaluate_model_performance.ipynb
 delete mode 100644 notebooks/07_longshore_plotting.ipynb
 rename notebooks/{09_run_comparison.ipynb => 09_superseded_run_comparison.ipynb} (100%)
 create mode 100644 notebooks/10_profile_clustering.ipynb

diff --git a/notebooks/07_evaluate_model_performance.ipynb b/notebooks/07_evaluate_model_performance.ipynb
new file mode 100644
index 0000000..efa049b
--- /dev/null
+++ b/notebooks/07_evaluate_model_performance.ipynb
@@ -0,0 +1,492 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Evaluate prediction metrics \n",
+    "- This notebook is used to check out each of our storm impact prediction models performed in comparison to our observed storm impacts."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "heading_collapsed": true
+   },
+   "source": [
+    "## Setup notebook\n",
+    "Import our required packages and set default plotting options."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "hidden": true
+   },
+   "outputs": [],
+   "source": [
+    "# Enable autoreloading of our modules. \n",
+    "# Most of the code will be located in the /src/ folder, \n",
+    "# and then called from the notebook.\n",
+    "%matplotlib inline\n",
+    "%reload_ext autoreload\n",
+    "%autoreload"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "hidden": true
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.debugger import set_trace\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import decimal\n",
+    "import plotly\n",
+    "import plotly.graph_objs as go\n",
+    "import plotly.plotly as py\n",
+    "import plotly.tools as tls\n",
+    "import plotly.figure_factory as ff\n",
+    "from plotly import tools\n",
+    "import plotly.io as pio\n",
+    "from scipy import stats\n",
+    "import math\n",
+    "import matplotlib\n",
+    "from matplotlib import cm\n",
+    "import colorlover as cl\n",
+    "from tqdm import tqdm_notebook\n",
+    "from ipywidgets import widgets, Output\n",
+    "from IPython.display import display, clear_output, Image, HTML\n",
+    "from scipy import stats\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "import matplotlib.pyplot as plt\n",
+    "from matplotlib.lines import Line2D\n",
+    "from cycler import cycler\n",
+    "from scipy.interpolate import interp1d\n",
+    "from pandas.api.types import CategoricalDtype"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "hidden": true
+   },
+   "outputs": [],
+   "source": [
+    "# Matplot lib default settings\n",
+    "plt.rcParams[\"figure.figsize\"] = (10,6)\n",
+    "plt.rcParams['axes.grid']=True\n",
+    "plt.rcParams['grid.alpha'] = 0.5\n",
+    "plt.rcParams['grid.color'] = \"grey\"\n",
+    "plt.rcParams['grid.linestyle'] = \"--\"\n",
+    "plt.rcParams['axes.grid']=True\n",
+    "\n",
+    "# https://stackoverflow.com/a/20709149\n",
+    "matplotlib.rcParams['text.usetex'] = True\n",
+    "\n",
+    "matplotlib.rcParams['text.latex.preamble'] = [\n",
+    "       r'\\usepackage{siunitx}',   # i need upright \\micro symbols, but you need...\n",
+    "       r'\\sisetup{detect-all}',   # ...this to force siunitx to actually use your fonts\n",
+    "       r'\\usepackage{helvet}',    # set the normal font here\n",
+    "       r'\\usepackage{amsmath}',\n",
+    "       r'\\usepackage{sansmath}',  # load up the sansmath so that math -> helvet\n",
+    "       r'\\sansmath',              # <- tricky! -- gotta actually tell tex to use!\n",
+    "]  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import data\n",
+    "Import our data from the `./data/interim/` folder and load it into pandas dataframes. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
+    "    print('Importing {}'.format(csv))\n",
+    "    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
+    "\n",
+    "df_waves = df_from_csv('waves.csv', index_col=[0, 1])\n",
+    "df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n",
+    "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
+    "df_sites = df_from_csv('sites.csv', index_col=[0])\n",
+    "df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0,1])\n",
+    "\n",
+    "# Note that the forecasted data sets should be in the same order for impacts and twls\n",
+    "impacts = {\n",
+    "    'forecasted': {\n",
+    "        'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv',index_col=[0]),\n",
+    "        'mean_slope_hol86': df_from_csv('impacts_forecasted_mean_slope_hol86.csv',index_col=[0]),\n",
+    "        'mean_slope_nie91': df_from_csv('impacts_forecasted_mean_slope_nie91.csv',index_col=[0]),\n",
+    "        'mean_slope_pow18': df_from_csv('impacts_forecasted_mean_slope_pow18.csv',index_col=[0]),\n",
+    "        'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv',index_col=[0]),\n",
+    "        },\n",
+    "    'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "twls = {\n",
+    "    'forecasted': {\n",
+    "        'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv',index_col=[0,1]),\n",
+    "        'mean_slope_hol86': df_from_csv('twl_mean_slope_hol86.csv',index_col=[0,1]),\n",
+    "        'mean_slope_nie91': df_from_csv('twl_mean_slope_nie91.csv',index_col=[0,1]),\n",
+    "        'mean_slope_pow18': df_from_csv('twl_mean_slope_pow18.csv',index_col=[0,1]),\n",
+    "        'mean_slope_sto06': df_from_csv('twl_mean_slope_sto06.csv',index_col=[0,1]),\n",
+    "    }\n",
+    "}\n",
+    "print('Done!')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "heading_collapsed": true
+   },
+   "source": [
+    "## Generate longshore plots for each beach"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "hidden": true
+   },
+   "outputs": [],
+   "source": [
+    "beach = 'NARRA'\n",
+    "\n",
+    "df_obs_impacts = impacts['observed'].loc[impacts['observed'].index.str.\n",
+    "                                         contains(beach)]\n",
+    "\n",
+    "# Get index for each site on the beach\n",
+    "n = [x for x in range(len(df_obs_impacts))][::-1]\n",
+    "n_sites = [x for x in df_obs_impacts.index][::-1]\n",
+    "\n",
+    "# Convert storm regimes to categorical datatype\n",
+    "cat_type = CategoricalDtype(\n",
+    "    categories=['swash', 'collision', 'overwash', 'inundation'], ordered=True)\n",
+    "df_obs_impacts.storm_regime = df_obs_impacts.storm_regime.astype(cat_type)\n",
+    "\n",
+    "# Create figure\n",
+    "f, (ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8) = plt.subplots(\n",
+    "    1,\n",
+    "    8,\n",
+    "    sharey=True,\n",
+    "    figsize=(18, 8),\n",
+    "    gridspec_kw={'width_ratios': [4, 4, 2, 2, 2, 2, 2, 2]})\n",
+    "\n",
+    "# ax1: Impacts\n",
+    "\n",
+    "# Define colors for storm regime\n",
+    "cmap = {'swash': '#1a9850', 'collision': '#fee08b', 'overwash': '#d73027'}\n",
+    "\n",
+    "# Common marker style\n",
+    "marker_style = {\n",
+    "    's': 60,\n",
+    "    'linewidths': 0.7,\n",
+    "    'alpha': 1,\n",
+    "    'edgecolors': 'k',\n",
+    "    'marker': 'o',\n",
+    "}\n",
+    "\n",
+    "# Plot observed impacts\n",
+    "colors = [cmap.get(x) for x in df_obs_impacts.storm_regime]\n",
+    "colors = ['#d73027' if c is None else c for c in colors]\n",
+    "ax1.scatter([0 for x in n], n, color=colors, **marker_style)\n",
+    "\n",
+    "# Plot model impacts\n",
+    "for i, model in enumerate(impacts['forecasted']):\n",
+    "\n",
+    "    # Only get model results for this beach\n",
+    "    df_model = impacts['forecasted'][model].loc[impacts['forecasted'][model].\n",
+    "                                                index.str.contains(beach)]\n",
+    "\n",
+    "    # Recast storm regimes as categorical data\n",
+    "    df_model.storm_regime = df_model.storm_regime.astype(cat_type)\n",
+    "\n",
+    "    # Assign colors\n",
+    "    colors = [cmap.get(x) for x in df_model.storm_regime]\n",
+    "    colors = ['#aaaaaa' if c is None else c for c in colors]\n",
+    "    ax1.scatter([i + 1 for x in n], n, color=colors, **marker_style)\n",
+    "\n",
+    "# Add model names to each impact on x axis\n",
+    "ax1.set_xticks(range(len(impacts['forecasted']) + 1))\n",
+    "ax1.set_xticklabels(['observed'] +\n",
+    "                    [x.replace('_', '\\_') for x in impacts['forecasted']])\n",
+    "ax1.xaxis.set_tick_params(rotation=90)\n",
+    "\n",
+    "# Add title\n",
+    "ax1.set_title('Storm regime')\n",
+    "\n",
+    "# Create custom legend\n",
+    "legend_elements = [\n",
+    "    Line2D([0], [0],\n",
+    "           marker='o',\n",
+    "           color='w',\n",
+    "           label='Swash',\n",
+    "           markerfacecolor='#1a9850',\n",
+    "           markersize=8,\n",
+    "           markeredgewidth=1.0,\n",
+    "           markeredgecolor='k'),\n",
+    "    Line2D([0], [0],\n",
+    "           marker='o',\n",
+    "           color='w',\n",
+    "           label='Collision',\n",
+    "           markerfacecolor='#fee08b',\n",
+    "           markersize=8,\n",
+    "           markeredgewidth=1.0,\n",
+    "           markeredgecolor='k'),\n",
+    "    Line2D([0], [0],\n",
+    "           marker='o',\n",
+    "           color='w',\n",
+    "           label='Overwash',\n",
+    "           markerfacecolor='#d73027',\n",
+    "           markersize=8,\n",
+    "           markeredgewidth=1.0,\n",
+    "           markeredgecolor='k'),\n",
+    "]\n",
+    "ax1.legend(\n",
+    "    handles=legend_elements, loc='lower center', bbox_to_anchor=(0.5, 1.1))\n",
+    "\n",
+    "# Replace yticks with site_ids\n",
+    "yticks = ax1.get_yticks().tolist()\n",
+    "yticks = [n_sites[int(y)] if 0 <= y <= len(n_sites) else y for y in yticks]\n",
+    "ax1.set_yticklabels(yticks)\n",
+    "\n",
+    "# ax2: elevations\n",
+    "\n",
+    "# Dune elevations\n",
+    "df_feats = df_profile_features_crest_toes.xs(['prestorm'],\n",
+    "                                             level=['profile_type'])\n",
+    "df_feats = df_feats.loc[df_feats.index.str.contains(beach)]\n",
+    "\n",
+    "ax2.plot(df_feats.dune_crest_z, n, color='#fdae61')\n",
+    "ax2.plot(df_feats.dune_toe_z, n, color='#fdae61')\n",
+    "ax2.fill_betweenx(\n",
+    "    n,\n",
+    "    df_feats.dune_toe_z,\n",
+    "    df_feats.dune_crest_z,\n",
+    "    alpha=0.2,\n",
+    "    color='#fdae61',\n",
+    "    label='$D_{low}$ to $D_{high}$')\n",
+    "\n",
+    "model_colors = [\n",
+    "        '#1f78b4',\n",
+    "        '#33a02c',\n",
+    "        '#e31a1c',\n",
+    "        '#6a3d9a',\n",
+    "        '#a6cee3',\n",
+    "        '#b2df8a',\n",
+    "        '#fb9a99',\n",
+    "        '#cab2d6',\n",
+    "        '#ffff99',\n",
+    "    ]\n",
+    "\n",
+    "# Define colors to cycle through for our R_high\n",
+    "ax2.set_prop_cycle(cycler('color', model_colors))\n",
+    "\n",
+    "# Plot R_high values\n",
+    "for model in impacts['forecasted']:\n",
+    "\n",
+    "    # Only get model results for this beach\n",
+    "    df_model = impacts['forecasted'][model].loc[impacts['forecasted'][model].\n",
+    "                                                index.str.contains(beach)]\n",
+    "\n",
+    "    # Recast storm regimes as categorical data\n",
+    "    ax2.plot(df_model.R_high, n, label=model.replace('_', '\\_'))\n",
+    "\n",
+    "# Set title, legend and labels\n",
+    "ax2.set_title('TWL \\& dune\\nelevations')\n",
+    "ax2.legend(loc='lower center', bbox_to_anchor=(0.5, 1.1))\n",
+    "ax2.set_xlabel('Elevation (m AHD)')\n",
+    "\n",
+    "\n",
+    "# ax3: Plot R_high - D_low\n",
+    "\n",
+    "# Define colors to cycle through for our R_high\n",
+    "ax3.set_prop_cycle(cycler('color', model_colors))\n",
+    "\n",
+    "# Plot R_high values\n",
+    "for model in impacts['forecasted']:\n",
+    "    \n",
+    "    df_model = impacts['forecasted'][model].loc[impacts['forecasted'][model].\n",
+    "                                                index.str.contains(beach)]\n",
+    "    # R_high - D_low\n",
+    "    ax3.plot(df_model.R_high - df_feats.dune_toe_z, n, label=model.replace('_', '\\_'))\n",
+    "\n",
+    "ax3.axvline(x=0,color='black',linestyle=':')\n",
+    "ax3.set_title('$R_{high}$ - $D_{low}$')\n",
+    "ax3.set_xlabel('Height (m)')\n",
+    "ax3.set_xlim([-2,2])\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Define colors to cycle through for our R2\n",
+    "ax4.set_prop_cycle(cycler('color', model_colors))\n",
+    "\n",
+    "# R_high - D_low\n",
+    "for model in impacts['forecasted']:\n",
+    "    df_R2 = impacts['forecasted'][model].merge(twls['forecasted'][model],on=['site_id','datetime'])\n",
+    "    df_R2 = df_R2.loc[df_R2.index.str.contains(beach)]\n",
+    "    ax4.plot(df_R2.R2, n, label=model.replace('_', '\\_'))\n",
+    "\n",
+    "ax4.set_title(r'$R_{2\\%}$')\n",
+    "ax4.set_xlabel('Height (m)')\n",
+    "\n",
+    "# Need to chose a model to extract environmental parameters at maximum R_high time\n",
+    "model = 'mean_slope_sto06'\n",
+    "df_beach = impacts['forecasted'][model].merge(twls['forecasted'][model], on=['site_id','datetime'])\n",
+    "df_beach = df_beach.loc[df_beach.index.str.contains(beach)]\n",
+    "\n",
+    "# Wave height, wave period, beach slope\n",
+    "ax5.plot(df_beach.beta, n,color='#4daf4a')\n",
+    "ax5.set_title(r'$\\beta$')\n",
+    "ax5.set_xlabel('Mean prestorm\\nbeach slope')\n",
+    "ax5.set_xlim([0,0.15])\n",
+    "\n",
+    "ax6.plot(df_beach.Hs0, n,color='#999999')\n",
+    "ax6.set_title('$H_{s0}$')\n",
+    "ax6.set_xlabel('Sig. wave height (m)')\n",
+    "ax6.set_xlim([3,5])\n",
+    "\n",
+    "ax7.plot(df_beach.Tp, n,color='#999999')\n",
+    "ax7.set_title('$T_{p}$')\n",
+    "ax7.set_xlabel('Peak wave period (s)')\n",
+    "ax7.set_xlim([8,14])\n",
+    "\n",
+    "ax8.plot(df_beach.tide, n,color='#999999')\n",
+    "ax8.set_title('Tide \\& surge')\n",
+    "ax8.set_xlabel('Elevation (m AHD)')\n",
+    "ax8.set_xlim([0,2])\n",
+    "\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "f.subplots_adjust(top=0.88)\n",
+    "f.suptitle(beach)\n",
+    "\n",
+    "# # Print to figure\n",
+    "plt.savefig('07_{}.png'.format(beach), dpi=600, bbox_inches='tight')\n",
+    "\n",
+    "plt.show()\n",
+    "plt.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Generate classification reports for each model\n",
+    "Use sklearn metrics to generate classification reports for each forecasting model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sklearn.metrics\n",
+    "\n",
+    "# Get observed impacts\n",
+    "df_obs = impacts['observed']\n",
+    "\n",
+    "# Convert storm regimes to categorical datatype\n",
+    "cat_type = CategoricalDtype(\n",
+    "    categories=['swash', 'collision', 'overwash', 'inundation'], ordered=True)\n",
+    "df_obs.storm_regime = df_obs_impacts.storm_regime.astype(cat_type)\n",
+    "\n",
+    "for model in impacts['forecasted']:\n",
+    "    df_for = impacts['forecasted'][model]\n",
+    "    df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n",
+    "\n",
+    "    m = sklearn.metrics.classification_report(\n",
+    "            df_obs.storm_regime.astype(cat_type).cat.codes.values,\n",
+    "            df_for.storm_regime.astype(cat_type).cat.codes.values,\n",
+    "            labels=[0, 1, 2, 3],\n",
+    "            target_names=['swash', 'collision', 'overwash', 'inundation'])\n",
+    "    print(model)\n",
+    "    print(m)\n",
+    "    print()\n",
+    "    "
+   ]
+  }
+ ],
+ "metadata": {
+  "hide_input": false,
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/07_longshore_plotting.ipynb b/notebooks/07_longshore_plotting.ipynb
deleted file mode 100644
index 1f5a954..0000000
--- a/notebooks/07_longshore_plotting.ipynb
+++ /dev/null
@@ -1,348 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Longshore plots of each beach\n",
-    "- Need to create a longshore plot of each beach to see how the variables change alongshore."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Setup notebook"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Enable autoreloading of our modules. \n",
-    "# Most of the code will be located in the /src/ folder, \n",
-    "# and then called from the notebook.\n",
-    "%matplotlib inline\n",
-    "%reload_ext autoreload\n",
-    "%autoreload"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from IPython.core.debugger import set_trace\n",
-    "\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import os\n",
-    "import decimal\n",
-    "import plotly\n",
-    "import plotly.graph_objs as go\n",
-    "import plotly.plotly as py\n",
-    "import plotly.tools as tls\n",
-    "import plotly.figure_factory as ff\n",
-    "from plotly import tools\n",
-    "import plotly.io as pio\n",
-    "from scipy import stats\n",
-    "import math\n",
-    "import matplotlib\n",
-    "from matplotlib import cm\n",
-    "import colorlover as cl\n",
-    "from tqdm import tqdm_notebook\n",
-    "from ipywidgets import widgets, Output\n",
-    "from IPython.display import display, clear_output, Image, HTML\n",
-    "from scipy import stats\n",
-    "from sklearn.metrics import confusion_matrix\n",
-    "import matplotlib.pyplot as plt\n",
-    "from scipy.interpolate import interp1d\n",
-    "from pandas.api.types import CategoricalDtype"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Matplot lib default settings\n",
-    "plt.rcParams[\"figure.figsize\"] = (10,6)\n",
-    "plt.rcParams['axes.grid']=True\n",
-    "plt.rcParams['grid.alpha'] = 0.5\n",
-    "plt.rcParams['grid.color'] = \"grey\"\n",
-    "plt.rcParams['grid.linestyle'] = \"--\"\n",
-    "plt.rcParams['axes.grid']=True\n",
-    "\n",
-    "# https://stackoverflow.com/a/20709149\n",
-    "matplotlib.rcParams['text.usetex'] = True\n",
-    "\n",
-    "matplotlib.rcParams['text.latex.preamble'] = [\n",
-    "       r'\\usepackage{siunitx}',   # i need upright \\micro symbols, but you need...\n",
-    "       r'\\sisetup{detect-all}',   # ...this to force siunitx to actually use your fonts\n",
-    "       r'\\usepackage{helvet}',    # set the normal font here\n",
-    "       r'\\usepackage{amsmath}',\n",
-    "       r'\\usepackage{sansmath}',  # load up the sansmath so that math -> helvet\n",
-    "       r'\\sansmath',              # <- tricky! -- gotta actually tell tex to use!\n",
-    "]  "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Import data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
-    "    print('Importing {}'.format(csv))\n",
-    "    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
-    "\n",
-    "df_waves = df_from_csv('waves.csv', index_col=[0, 1])\n",
-    "df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n",
-    "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
-    "df_sites = df_from_csv('sites.csv', index_col=[0])\n",
-    "df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0,1])\n",
-    "\n",
-    "# Note that the forecasted data sets should be in the same order for impacts and twls\n",
-    "impacts = {\n",
-    "    'forecasted': {\n",
-    "        'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),\n",
-    "        'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),\n",
-    "        },\n",
-    "    'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n",
-    "    }\n",
-    "\n",
-    "\n",
-    "twls = {\n",
-    "    'forecasted': {\n",
-    "        'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),\n",
-    "        'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),\n",
-    "    }\n",
-    "}\n",
-    "print('Done!')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Generate plot for each beach"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "beach = 'NARRA'\n",
-    "\n",
-    "# Get the dataframe\n",
-    "df = impacts['forecasted']['mean_slope_sto06']\n",
-    "df = df.rename(columns={'storm_regime': 'forecasted_regime'})\n",
-    "\n",
-    "df_beach = df.loc[df.index.str.contains(beach)]\n",
-    "\n",
-    "# Add information about hydrodynamics at max(R_high) time\n",
-    "df_beach = df_beach.merge(\n",
-    "    twls['forecasted']['mean_slope_sto06'].drop(columns=['R_high', 'R_low']),\n",
-    "    left_on=['site_id', 'datetime'],\n",
-    "    right_on=['site_id', 'datetime'])\n",
-    "\n",
-    "# Add information about observed impacts\n",
-    "obs_impacts = impacts['observed'].rename(columns={\n",
-    "    'storm_regime': 'observed_regime'\n",
-    "}).observed_regime.to_frame()\n",
-    "df_beach = df_beach.merge(obs_impacts, left_on='site_id', right_on='site_id')\n",
-    "\n",
-    "# Convert storm regimes to categorical datatype\n",
-    "cat_type = CategoricalDtype(\n",
-    "    categories=['swash', 'collision', 'overwash', 'inundation'], ordered=True)\n",
-    "df_beach.forecasted_regime = df_beach.forecasted_regime.astype(cat_type)\n",
-    "df_beach.observed_regime = df_beach.observed_regime.astype(cat_type)\n",
-    "\n",
-    "# Get index\n",
-    "n = [x for x in range(len(df_beach))][::-1]\n",
-    "n_sites = [x for x in df_beach.index][::-1]\n",
-    "\n",
-    "f, (ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8) = plt.subplots(\n",
-    "    1,\n",
-    "    8,\n",
-    "    sharey=True,\n",
-    "    figsize=(14, 8),\n",
-    "    gridspec_kw={'width_ratios': [4, 4, 2, 2, 2, 2,2,2]})\n",
-    "\n",
-    "# Specify colors for storm regimes\n",
-    "cmap = {\n",
-    "    'swash': '#1a9850',\n",
-    "    'collision': '#fee08b',\n",
-    "    'overwash': '#d73027'\n",
-    "}\n",
-    "\n",
-    "colors = [cmap.get(x) for x in df_beach.observed_regime]\n",
-    "colors = ['#d73027' if c is None else c for c in colors]\n",
-    "\n",
-    "# Plot forecasted and observed storm regime\n",
-    "ax1.scatter(\n",
-    "    df_beach.observed_regime.cat.codes.replace(-1,np.NaN),\n",
-    "    n,\n",
-    "    color=colors,\n",
-    "    marker='o',\n",
-    "    label='Observed regime')\n",
-    "\n",
-    "ax1.scatter(\n",
-    "    df_beach.forecasted_regime.cat.codes.replace(-1,np.NaN),\n",
-    "    n,\n",
-    "    color='b',\n",
-    "    marker='o',\n",
-    "    edgecolors='black',\n",
-    "    facecolors='none',\n",
-    "    label='Forecasted regime')\n",
-    "\n",
-    "ax1.set_title('Storm\\nregime')\n",
-    "ax1.set_xticks([0,1,2,3])\n",
-    "ax1.set_xticklabels(['swash','collision','overwash','inundation'])\n",
-    "ax1.tick_params(axis='x', rotation=45)\n",
-    "ax1.legend(loc='center', bbox_to_anchor=(0.5, -0.15))\n",
-    "\n",
-    "# Replace yticks with site_ids\n",
-    "yticks = ax1.get_yticks().tolist()\n",
-    "yticks = [n_sites[int(y)] if 0 <= y <= len(n_sites) else y for y in yticks ]\n",
-    "ax1.set_yticklabels(yticks)\n",
-    "\n",
-    "# Water levels\n",
-    "ax2.plot(df_beach.R_high, n, color='#2c7bb6')\n",
-    "ax2.plot(df_beach.R_low, n, color='#2c7bb6')\n",
-    "ax2.fill_betweenx(\n",
-    "    n, df_beach.R_low, df_beach.R_high, alpha=0.2, color='#2c7bb6', label='$R_{low}$ to $R_{high}$')\n",
-    "\n",
-    "# Dune elevations\n",
-    "ax2.plot(df_beach.dune_crest_z, n, color='#fdae61')\n",
-    "ax2.plot(df_beach.dune_toe_z, n, color='#fdae61')\n",
-    "ax2.fill_betweenx(\n",
-    "    n, df_beach.dune_toe_z, df_beach.dune_crest_z, alpha=0.2, color='#fdae61', label='$D_{low}$ to $D_{high}$')\n",
-    "\n",
-    "ax2.set_title('TWL \\& Dune\\nElevations')\n",
-    "ax2.legend(loc='center',bbox_to_anchor=(0.5,-0.15))\n",
-    "ax2.set_xlabel('Elevation (m AHD)')\n",
-    "\n",
-    "# Plot R_high - D_low\n",
-    "ax3.plot(df_beach.R_high - df_beach.dune_toe_z,n,color='#999999')\n",
-    "ax3.axvline(x=0,color='black',linestyle=':')\n",
-    "ax3.set_title('$R_{high}$ - $D_{low}$')\n",
-    "ax3.set_xlabel('Height (m)')\n",
-    "ax3.set_xlim([-2,2])\n",
-    "\n",
-    "# Wave height, wave period, beach slope\n",
-    "ax4.plot(df_beach.Hs0, n,color='#377eb8')\n",
-    "ax4.set_title('$H_{s0}$')\n",
-    "ax4.set_xlabel('Sig. wave height (m)')\n",
-    "ax4.set_xlim([3,5])\n",
-    "\n",
-    "ax5.plot(df_beach.Tp, n,color='#e41a1c')\n",
-    "ax5.set_title('$T_{p}$')\n",
-    "ax5.set_xlabel('Peak wave period (s)')\n",
-    "ax5.set_xlim([8,14])\n",
-    "\n",
-    "ax6.plot(df_beach.tide, n,color='#a6cee3')\n",
-    "ax6.set_title('Tide')\n",
-    "ax6.set_xlabel('Elevation (m AHD)')\n",
-    "ax6.set_xlim([0,2])\n",
-    "\n",
-    "ax7.plot(df_beach.beta, n,color='#4daf4a')\n",
-    "ax7.set_title(r'$\\beta$')\n",
-    "ax7.set_xlabel('Mean prestorm\\nbeach slope')\n",
-    "ax7.set_xlim([0,0.15])\n",
-    "\n",
-    "ax8.plot(df_beach.R2, n,color='#6a3d9a')\n",
-    "ax8.set_title(r'$R_{2\\%}$')\n",
-    "ax8.set_xlabel('Height (m)')\n",
-    "\n",
-    "plt.tight_layout()\n",
-    "f.subplots_adjust(top=0.88)\n",
-    "f.suptitle(beach)\n",
-    "\n",
-    "\n",
-    "# Print to figure\n",
-    "plt.savefig('07-{}.png'.format(beach), dpi=600, bbox_inches='tight') \n",
-    "\n",
-    "plt.show()\n",
-    "plt.close()"
-   ]
-  }
- ],
- "metadata": {
-  "hide_input": false,
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.6"
-  },
-  "toc": {
-   "base_numbering": 1,
-   "nav_menu": {},
-   "number_sections": true,
-   "sideBar": true,
-   "skip_h1_title": false,
-   "title_cell": "Table of Contents",
-   "title_sidebar": "Contents",
-   "toc_cell": false,
-   "toc_position": {},
-   "toc_section_display": true,
-   "toc_window_display": false
-  },
-  "varInspector": {
-   "cols": {
-    "lenName": 16,
-    "lenType": 16,
-    "lenVar": 40
-   },
-   "kernels_config": {
-    "python": {
-     "delete_cmd_postfix": "",
-     "delete_cmd_prefix": "del ",
-     "library": "var_list.py",
-     "varRefreshCmd": "print(var_dic_list())"
-    },
-    "r": {
-     "delete_cmd_postfix": ") ",
-     "delete_cmd_prefix": "rm(",
-     "library": "var_list.r",
-     "varRefreshCmd": "cat(var_dic_list()) "
-    }
-   },
-   "types_to_exclude": [
-    "module",
-    "function",
-    "builtin_function_or_method",
-    "instance",
-    "_Feature"
-   ],
-   "window_display": false
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/09_run_comparison.ipynb b/notebooks/09_superseded_run_comparison.ipynb
similarity index 100%
rename from notebooks/09_run_comparison.ipynb
rename to notebooks/09_superseded_run_comparison.ipynb
diff --git a/notebooks/10_profile_clustering.ipynb b/notebooks/10_profile_clustering.ipynb
new file mode 100644
index 0000000..14e486f
--- /dev/null
+++ b/notebooks/10_profile_clustering.ipynb
@@ -0,0 +1,186 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import os\n",
+    "\n",
+    "from dtaidistance import dtw\n",
+    "from dtaidistance import dtw_visualisation as dtwvis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import data\n",
+    "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
+    "    print('Importing {}'.format(csv))\n",
+    "    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
+    "\n",
+    "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
+    "\n",
+    "print('Done!')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Use dtaidistance package"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p1 = df_profiles.dropna(subset=['z']).xs(['AVOCAn0003','prestorm'],level=['site_id','profile_type']).z.values\n",
+    "p2 = df_profiles.dropna(subset=['z']).xs(['AVOCAn0004','prestorm'],level=['site_id','profile_type']).z.values\n",
+    "path = dtw.warping_path(p1,p2)\n",
+    "dtwvis.plot_warping(p1,p2,path)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Use kshape package"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "profiles = df_profiles.dropna(subset=['z'])\\\n",
+    "            .xs(['prestorm'],level=['profile_type'])\\\n",
+    "            .groupby('site_id').z\\\n",
+    "            .apply(list).tolist()\n",
+    "\n",
+    "\n",
+    "# profiles = [x[-50:] for x in profiles]\n",
+    "# print(min(len(x) for x in profiles))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from kshape.core import kshape, zscore\n",
+    "\n",
+    "time_series = [[1,2,3,4], [0,1,2,3], [0,1,2,3], [1,2,2,3]]\n",
+    "cluster_num = 4\n",
+    "clusters = kshape(zscore(profiles, axis=1), cluster_num)\n",
+    "# print(clusters)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cluster_no = 0\n",
+    "\n",
+    "# Plot shape of all clusters\n",
+    "plt.figure(0)\n",
+    "for n,cluster in enumerate(clusters):\n",
+    "    plt.plot(cluster[0],label=n)\n",
+    "plt.legend()\n",
+    "\n",
+    "plt.figure(1)\n",
+    "# Plot all profiles in partiuclar cluster\n",
+    "for profile_no in clusters[cluster_no][1]:\n",
+    "    plt.plot(profiles[profile_no])\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a = [1,2,3,4,5,6]\n",
+    "a[-1:]"
+   ]
+  }
+ ],
+ "metadata": {
+  "hide_input": false,
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}