12 changed files with 2185 additions and 2544 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -1,3 +0,0 @@
 *.ipynb filter=nbstripout
 *.ipynb diff=ipynb
--- a/4
+++ b/4
@ -29,10 +29,6 @@ venv-requirements-install: ##@environment Ensures environment.yml packages are i
 # To install new packages: conda install --prefix .venv PACKAGE
 ###############################
 notebook: ##@notebooks Open jupyter notebook in venv
 	activate ./.venv && jupyter notebook
 ###############################
 # Get data from network drive
--- a/README.md
+++ b/README.md
@ -44,11 +44,10 @@ make push-data
 ```
 #### View notebooks
-Jupyter notebooks have been set up to help explore the data. Once you have set up your environment and pulled the data, this is probably a good place to start as you. To run the notebook, use the following command and navigate to the `./notebooks` folder once the jupyter interface opens in your web browser.
+Jupyter notebooks have been set up to help explore the data. Once you have set up your environment and pulled the data, this is probably a good place to start as you. To run the notebook, use the following command and navigate to the `./notebooks` folder.
 ```
-make notebook
+jupyter notebook
 ```
 Notebooks use nbstripout to remove output before committing.
 ## Available data
 Raw, interim and processed data used in this analysis is kept in the `/data/` folder. Data is not tracked in the repository due to size constraints, but stored locally. A mirror is kept of the coastal folder J drive which you can
--- a/environment.yml
+++ b/environment.yml
@ -16,7 +16,6 @@ dependencies:
  - matplotlib
  - line_profiler
  - nbformat
  - nbstripout
  - notebook
  - numpy
  - pandas
--- a/notebooks/01_exploration.ipynb
+++ b/notebooks/01_exploration.ipynb
--- a/notebooks/03_dune_toe_vs_runup.ipynb
+++ b/notebooks/03_dune_toe_vs_runup.ipynb
--- a/notebooks/04_profile_picker.ipynb
+++ b/notebooks/04_profile_picker.ipynb
--- a/notebooks/04a_profile_picker_superseded.ipynb
+++ b/notebooks/04a_profile_picker_superseded.ipynb
@ -1,407 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "import numpy.ma as ma\n",
    "\n",
    "import numpy\n",
    "from pyearth import Earth\n",
    "from matplotlib import pyplot\n",
    "\n",
    "np.random.seed(2017)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
    "    print('Importing {}'.format(csv))\n",
    "    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
    "\n",
    "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Try using pyearth"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "code_folding": [
     5,
     20,
     31,
     40
    ]
   },
   "outputs": [],
   "source": [
    "from scipy.signal import savgol_filter\n",
    "import re\n",
    "from scipy.stats import linregress\n",
    "import warnings\n",
    "warnings.simplefilter(action='ignore', category=FutureWarning)\n",
    "\n",
    "def get_breakpoints(model, min_distance=20):\n",
    "    # Get breakpoints\n",
    "    breakpoints = []\n",
    "    for line in model.summary().split('\\n'):\n",
    "        # Get unpruned lines\n",
    "        if 'No' in line and 'None' not in line:\n",
    "            # Get break points\n",
    "            m = re.search(\"h\\(x0-(\\d*\\.?\\d+)\\)\", line)\n",
    "            if m:\n",
    "                breakpoints.append(float(m.groups()[0]))\n",
    "            m = re.search(\"h\\((\\d*\\.?\\d+)-x0\\)\", line)\n",
    "            if m:\n",
    "                breakpoints.append(float(m.groups()[0]))\n",
    "    return sorted(list(set(breakpoints)))\n",
    "    \n",
    "def get_segments(breakpoints, x_min, x_max):\n",
    "    segments = []\n",
    "    breakpoints = [x_min] + breakpoints + [x_max]\n",
    "\n",
    "    for x1, x2 in zip(breakpoints, breakpoints[1:]):\n",
    "        segments.append({\n",
    "            'x_start': x1,\n",
    "            'x_end': x2\n",
    "        })\n",
    "    return segments            \n",
    "\n",
    "def get_segment_slopes(segments, x, z):\n",
    "    for segment in segments:\n",
    "        mask = ma.masked_where((segment['x_start'] < x) & (x < segment['x_end']),x ).mask\n",
    "        segment['z_mean'] = np.mean(z[mask])\n",
    "        segment['z_start'] = np.mean(z[mask][0])\n",
    "        segment['z_end'] = np.mean(z[mask][-1])\n",
    "        segment['slope'] = -linregress(x[mask], z[mask]).slope\n",
    "    return segments\n",
    "    \n",
    "def classify_segments(segments, x,z):\n",
    "    \n",
    "    # Most seaward slope must be foreshore\n",
    "    segments[-1]['type'] = 'foreshore'\n",
    "    \n",
    "    # Most landward slope must be land\n",
    "    segments[0]['type'] = 'land'\n",
    "    \n",
    "    # Segments with really high slopes must be structures\n",
    "    for seg in segments:\n",
    "        if seg['slope'] > 2.0:\n",
    "            seg['type'] = 'structure'\n",
    "    \n",
    "    # Segments with large change of slope and \n",
    "    # Segment with max slope should be dune face\n",
    "#     dune_face_idx = [n for n, seg in enumerate(segments) if seg['slope']==max(x['slope'] for x in segments)][0]\n",
    "#     segments[dune_face_idx]['type'] = 'dune_face'\n",
    "    \n",
    "    # Pick out berms \n",
    "    for seg in segments:\n",
    "        if (-0.03 < seg['slope'] < 0.03  # berms should be relatively flat\n",
    "            and 0 < seg['z_mean'] < 4  # berms should be located between 0-4 m AHD\n",
    "            ):  # berms should be seaward of dune face\n",
    "            seg['type'] = 'berm'\n",
    "            \n",
    "#     slope = None\n",
    "#     for seg in reversed(segments):\n",
    "#         if slope is None:\n",
    "#             continue\n",
    "#         elif slope - 0.03 < seg['slope'] < slope + 0.03:\n",
    "#             seg['type'] = 'foreshore'\n",
    "#         else:\n",
    "#             break\n",
    "    \n",
    "    return segments\n",
    "\n",
    "def get_piecewise_linear_model(x,z):\n",
    "    #Fit an Earth model\n",
    "    model = Earth(penalty=3,thresh=0.0005)\n",
    "    model.fit(x,z)\n",
    "    return model\n",
    "\n",
    "def plot_profile_classification(site_id, profile_type):\n",
    "    df_profile = df_profiles.query(\"site_id == '{}' and profile_type == '{}'\".format(site_id, profile_type))\n",
    "    x = np.array(df_profile.index.get_level_values('x').tolist())\n",
    "    z = np.array(df_profile.z.tolist())    \n",
    "    \n",
    "    nan_mask = ma.masked_invalid(z).mask\n",
    "    x = x[~nan_mask]\n",
    "    z_unfiltered = z[~nan_mask]\n",
    "    z = savgol_filter(z_unfiltered, 51, 3)\n",
    "    \n",
    "    model = get_piecewise_linear_model(x,z)\n",
    "    breakpoints = get_breakpoints(model)\n",
    "    segments = get_segments(breakpoints, x_min=x.min(), x_max=x.max())\n",
    "    segments = get_segment_slopes(segments, x=x, z=z)\n",
    "#     segments = merge_similar_segments(segments)\n",
    "    segments = classify_segments(segments, x=x, z=z)\n",
    "    \n",
    "    pyplot.figure()\n",
    "    pyplot.plot(x,z_unfiltered, color='0.5',marker='.', alpha=.2, ms=10,linestyle=\"None\")\n",
    "\n",
    "    # Plot different segments\n",
    "    foreshore_segments = [x for x in segments if x.get('type') == 'foreshore']\n",
    "    for seg in foreshore_segments:\n",
    "        pyplot.plot([seg['x_start'], seg['x_end']],\n",
    "                   [seg['z_start'], seg['z_end']],\n",
    "                   linewidth=4, \n",
    "                    color='b')\n",
    "\n",
    "    land_segments = [x for x in segments if x.get('type') == 'land']\n",
    "    for seg in land_segments:\n",
    "        pyplot.plot([seg['x_start'], seg['x_end']],\n",
    "                   [seg['z_start'], seg['z_end']],\n",
    "                   linewidth=4, \n",
    "                    color='g')\n",
    "\n",
    "    berm_segments = [x for x in segments if x.get('type') == 'berm']\n",
    "    for seg in berm_segments:\n",
    "        pyplot.plot([seg['x_start'], seg['x_end']],\n",
    "                   [seg['z_start'], seg['z_end']],\n",
    "                    linewidth=4, \n",
    "                    color='y')\n",
    "\n",
    "    dune_face_segments = [x for x in segments if x.get('type') == 'dune_face']\n",
    "    for seg in dune_face_segments:\n",
    "        pyplot.plot([seg['x_start'], seg['x_end']],\n",
    "                   [seg['z_start'], seg['z_end']],\n",
    "                    linewidth=4, \n",
    "                    color='r')\n",
    "        \n",
    "    structure_segments = [x for x in segments if x.get('type') == 'structure']\n",
    "    for seg in structure_segments:\n",
    "        pyplot.plot([seg['x_start'], seg['x_end']],\n",
    "                   [seg['z_start'], seg['z_end']],\n",
    "                    linewidth=4, \n",
    "                    color='m')\n",
    "        \n",
    "    unclassified_segments = [x for x in segments if x.get('type') is None]\n",
    "    for seg in unclassified_segments:\n",
    "        pyplot.plot([seg['x_start'], seg['x_end']],\n",
    "                   [seg['z_start'], seg['z_end']],\n",
    "                   linewidth=4, \n",
    "                    color='0.4')\n",
    "\n",
    "    pyplot.xlabel('x (m)')\n",
    "    pyplot.ylabel('z (m AHD)')\n",
    "    pyplot.title('{} profile at {}'.format(profile_type, site_id))\n",
    "    pyplot.show()\n",
    "\n",
    "    import pprint\n",
    "    pp = pprint.PrettyPrinter(indent=4)\n",
    "    pp.pprint(segments)\n",
    "\n",
    "plot_profile_classification('NARRA0018', 'prestorm')\n",
    "plot_profile_classification('NARRA0019', 'prestorm')\n",
    "plot_profile_classification('CRESn0017', 'poststorm')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## Try lmfit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "code_folding": [
     0
    ],
    "hidden": true
   },
   "outputs": [],
   "source": [
    "from lmfit import Model, Parameters\n",
    "\n",
    "def get_data():\n",
    "    site_id='NARRA0018'\n",
    "    profile_type='prestorm'\n",
    "    df_profile = df_profiles.query(\"site_id == '{}' and profile_type == '{}'\".format(site_id, profile_type))\n",
    "    x = np.array(df_profile.index.get_level_values('x').tolist())\n",
    "    z = np.array(df_profile.z.tolist())    \n",
    "\n",
    "    nan_mask = ma.masked_invalid(z).mask\n",
    "    x = x[~nan_mask]\n",
    "    z = z[~nan_mask]\n",
    "    return x,z\n",
    "\n",
    "# def piecewise_linear(x, x0, x1, b, k1, k2, k3):\n",
    "#     condlist = [x < x0, (x >= x0) & (x < x1), x >= x1]\n",
    "#     funclist = [lambda x: k1*x + b, lambda x: k1*x + b + k2*(x-x0), lambda x: k1*x + b + k2*(x-x0) + k3*(x - x1)]\n",
    "#     return np.piecewise(x, condlist, funclist)\n",
    "\n",
    "# x,z = get_data()\n",
    "\n",
    "# fmodel = Model(piecewise_linear)\n",
    "# params = Parameters()\n",
    "# params.add('x0', value=0, vary=True, min=min(x), max=max(x))\n",
    "# params.add('x1', value=0, vary=True, min=min(x), max=max(x))\n",
    "# params.add('b', value=0, vary=True)\n",
    "# params.add('k1', value=0, vary=True, min=-0.01, max=0.01)\n",
    "# params.add('k2', value=0, vary=True, min=-0.1, max=-0.5)\n",
    "# params.add('k3', value=0, vary=True, min=0.1, max=0.5)\n",
    "\n",
    "def piecewise_linear(x, x0, x1, x2, b, k1, k2, k3,k4):\n",
    "    condlist = [x < x0, (x >= x0) & (x < x1), (x >= x1) & (x < x2), x >= x2]\n",
    "    funclist = [lambda x: k1*x + b, lambda x: k1*x + b + k2*(x-x0), lambda x: k1*x + b + k2*(x-x0) + k3*(x - x1), lambda x: k1*x + b + k2*(x-x0) + k3*(x - x1) +k4*(x-x2)]\n",
    "    return np.piecewise(x, condlist, funclist)\n",
    "\n",
    "x,z = get_data()\n",
    "\n",
    "fmodel = Model(piecewise_linear)\n",
    "params = Parameters()\n",
    "params.add('x0', value=0, vary=True, min=min(x), max=max(x))\n",
    "params.add('x1', value=0, vary=True, min=min(x), max=max(x))\n",
    "params.add('x2', value=0, vary=True, min=min(x), max=max(x))\n",
    "params.add('b', value=0, vary=True)\n",
    "params.add('k1', value=0, vary=True, min=-0.5, max=0.5)\n",
    "params.add('k2', value=0, vary=True, min=-0.5, max=0.5)\n",
    "params.add('k3', value=0, vary=True, min=-0.5, max=0.5)\n",
    "params.add('k4', value=0, vary=True, min=-0.5, max=0.5)\n",
    "\n",
    "\n",
    "result = fmodel.fit(z, params, x=x,method='ampgo')\n",
    "\n",
    "\n",
    "pyplot.figure()\n",
    "pyplot.plot(x,z, color='0.5',marker='.', alpha=.2, ms=10,linestyle=\"None\")\n",
    "pyplot.plot(x,result.best_fit, color='r')\n",
    "pyplot.show()\n",
    "print(result.fit_report())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Try spline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "code_folding": [
     2
    ]
   },
   "outputs": [],
   "source": [
    "from scipy.signal import savgol_filter\n",
    "\n",
    "def get_data():\n",
    "    site_id='NARRA0018'\n",
    "    profile_type='prestorm'\n",
    "    df_profile = df_profiles.query(\"site_id == '{}' and profile_type == '{}'\".format(site_id, profile_type))\n",
    "    x = np.array(df_profile.index.get_level_values('x').tolist())\n",
    "    z = np.array(df_profile.z.tolist())    \n",
    "\n",
    "    nan_mask = ma.masked_invalid(z).mask\n",
    "    x = x[~nan_mask]\n",
    "    z = z[~nan_mask]\n",
    "    return x,z\n",
    "\n",
    "x,z = get_data()\n",
    "\n",
    "z_filtered = savgol_filter(z, 31, 3)\n",
    "\n",
    "\n",
    "pyplot.figure()\n",
    "pyplot.plot(x,z, color='0.5',marker='.', alpha=.2, ms=10,linestyle=\"None\")\n",
    "pyplot.plot(x,z_filtered, color='r')\n",
    "pyplot.show()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/notebooks/04b_profile_picker.ipynb
+++ b/notebooks/04b_profile_picker.ipynb
--- a/notebooks/05_twl_exceedence.ipynb
+++ b/notebooks/05_twl_exceedence.ipynb
@ -1,362 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# TWL Exceedance"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Enable autoreloading of our modules. \n",
    "# Most of the code will be located in the /src/ folder, \n",
    "# and then called from the notebook.\n",
    "%matplotlib inline\n",
    "%reload_ext autoreload\n",
    "%autoreload"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.core.debugger import set_trace\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import decimal\n",
    "import plotly\n",
    "import plotly.graph_objs as go\n",
    "import plotly.plotly as py\n",
    "import plotly.tools as tls\n",
    "import plotly.figure_factory as ff\n",
    "from plotly import tools\n",
    "import plotly.io as pio\n",
    "from scipy import stats\n",
    "import math\n",
    "import matplotlib\n",
    "from matplotlib import cm\n",
    "import colorlover as cl\n",
    "\n",
    "from ipywidgets import widgets, Output\n",
    "from IPython.display import display, clear_output, Image, HTML\n",
    "\n",
    "from sklearn.metrics import confusion_matrix"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
    "    print('Importing {}'.format(csv))\n",
    "    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
    "\n",
    "df_waves = df_from_csv('waves.csv', index_col=[0, 1])\n",
    "df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n",
    "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
    "df_sites = df_from_csv('sites.csv', index_col=[0])\n",
    "df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0,1])\n",
    "\n",
    "# Note that the forecasted data sets should be in the same order for impacts and twls\n",
    "impacts = {\n",
    "    'forecasted': {\n",
    "        'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),\n",
    "        'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),\n",
    "        },\n",
    "    'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n",
    "    }\n",
    "\n",
    "\n",
    "twls = {\n",
    "    'forecasted': {\n",
    "        'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),\n",
    "        'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),\n",
    "    }\n",
    "}\n",
    "print('Done!')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Calculate vertical distribution of wave count\n",
    "For each site, calculate how many waves reached a certain elevation (store as a binned histogram)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Helper functions\n",
    "def find_nearest(array, value):\n",
    "    array = np.asarray(array)\n",
    "    idx = np.nanargmin((np.abs(array - value)))\n",
    "    return array[idx], idx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_profile_features_crest_toes.loc[(site_id,'prestorm'),'dune_toe_z']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = []\n",
    "for site_id, df_site_twl in twls['forecasted']['mean_slope_sto06'].groupby('site_id'):\n",
    "    \n",
    "    twl_eles_per_wave = []\n",
    "    \n",
    "    # Iterate through each timestamp and calculate the number of waves at each interavl.\n",
    "    # THIS LOOP IS SLOW\n",
    "    for row in df_site_twl.itertuples():\n",
    "        \n",
    "        distribution = stats.norm(loc=row.tide+row.setup, scale=row.S_total/4) # CHECK\n",
    "\n",
    "        # Total number of waves we expect in this period\n",
    "        n_waves = int(3600 / row.Tp)  # Check that we have 1 hour\n",
    "        \n",
    "        # Get z elevation of each wave twl in this hour and append to list\n",
    "        twl_eles_per_wave.extend([distribution.ppf(1-x/n_waves) for x in range(1,n_waves+1)])\n",
    "    \n",
    "    # Remove nans and infs # CHECK WHY INF\n",
    "    twl_eles_per_wave = list(np.asarray(twl_eles_per_wave)[np.isfinite(twl_eles_per_wave)])\n",
    "    \n",
    "    # Sort wave twl z elevations in descending list\n",
    "    twl_eles_per_wave.sort(reverse=True)    \n",
    "    \n",
    "    # Get index of closest value of dune toe. This is the number of waves that exceeded the the dune toe\n",
    "    try:\n",
    "        _, idx =  find_nearest(twl_eles_per_wave, dune_toe_z)\n",
    "    except:\n",
    "        continue\n",
    "    \n",
    "    # Get forecasted and observed impacts\n",
    "    forecasted_regime = impacts['forecasted']['mean_slope_sto06'].loc[site_id,'storm_regime']\n",
    "    observed_regime = impacts['observed'].loc[site_id,'storm_regime']\n",
    "    \n",
    "    counts, bin_edges = np.histogram(twl_eles_per_wave, bins=100) \n",
    "    \n",
    "    data.append({\n",
    "        'site_id': site_id,\n",
    "        'forecasted_regime': forecasted_regime,\n",
    "        'observed_regime': observed_regime,\n",
    "        'n_waves_exceeding_dune_toe': idx,\n",
    "        'n_waves': [x for x in range(0,500,1)],\n",
    "        'truncated_twl_levels': [twl_eles_per_wave[x] for x in range(0,500,1)],\n",
    "        'truncated_dune_toe_z': df_profile_features_crest_toes.loc[(site_id,'prestorm'),'dune_toe_z'],\n",
    "        'full_counts': counts,\n",
    "        'full_bin_edges': bin_edges,\n",
    "    })\n",
    "    \n",
    "    print('Done {}'.format(site_id))\n",
    "\n",
    "data_twl = data\n",
    "# df = pd.DataFrame(data)\n",
    "# df = df.set_index('site_id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "counts, bin_edges = np.histogram (data_twl[0]['twl_levels'], bins=50) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "list(np.asarray(twl_eles_per_wave)[~np.isfinite(twl_eles_per_wave)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = tools.make_subplots(\n",
    "    rows=2,\n",
    "    cols=2,\n",
    "    specs=[[{}, {}], [{}, {}]],\n",
    "    subplot_titles=('Swash/Swash',  'Swash/Collision', \n",
    "                    'Collision/Swash', 'Collision/Collision'),\n",
    " shared_xaxes=True, shared_yaxes=True,)\n",
    "\n",
    "data = []\n",
    "for site in data_twl:\n",
    "    if site['forecasted_regime'] == 'swash' and site[\n",
    "            'observed_regime'] == 'swash':\n",
    "        x_col = 1\n",
    "        y_col = 1\n",
    "    elif site['forecasted_regime'] == 'collision' and site[\n",
    "            'observed_regime'] == 'collision':\n",
    "        x_col = 2\n",
    "        y_col = 2\n",
    "    elif site['forecasted_regime'] == 'swash' and site[\n",
    "            'observed_regime'] == 'collision':\n",
    "        x_col = 2\n",
    "        y_col = 1\n",
    "    elif site['forecasted_regime'] == 'collision' and site[\n",
    "            'observed_regime'] == 'swash':\n",
    "        x_col = 1\n",
    "        y_col = 2\n",
    "    else:\n",
    "        continue\n",
    "\n",
    "    fig.append_trace(\n",
    "        go.Scattergl(\n",
    "            x=[x - site['dune_toe_z'] for x in site['twl_levels']],\n",
    "            y=site['n_waves'],\n",
    "            name=site['site_id'],\n",
    "            line = dict(\n",
    "        color = ('rgba(22, 22, 22, 0.2)'),\n",
    "        width = 0.5,)),\n",
    "            x_col,\n",
    "            y_col)\n",
    "\n",
    "# layout = go.Layout(\n",
    "#     xaxis=dict(domain=[0, 0.45]),\n",
    "#     yaxis=dict(\n",
    "#         domain=[0, 0.45],\n",
    "#         type='log',\n",
    "#     ),\n",
    "#     xaxis2=dict(domain=[0.55, 1]),\n",
    "#     xaxis4=dict(domain=[0.55, 1], anchor='y4'),\n",
    "#     yaxis3=dict(\n",
    "#         domain=[0.55, 1],\n",
    "#         type='log',\n",
    "#     ),\n",
    "#     yaxis4=dict(\n",
    "#         domain=[0.55, 1],\n",
    "#         anchor='x4',\n",
    "#         type='log',\n",
    "#     ))\n",
    "\n",
    "fig['layout'].update(showlegend=False, title='Specs with Subplot Title',height=800,)\n",
    "\n",
    "for ax in ['yaxis','yaxis2']:\n",
    "#     fig['layout'][ax]['type']='log'\n",
    "    fig['layout'][ax]['range']= [0,100]\n",
    "\n",
    "for ax in ['xaxis', 'xaxis2']:\n",
    "    fig['layout'][ax]['range']= [-1,1]\n",
    "\n",
    "go.FigureWidget(fig)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig['layout']['yaxis']"
   ]
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/notebooks/06_change_in_slope.ipynb
+++ b/notebooks/06_change_in_slope.ipynb
@ -1,356 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Check change in mean slope\n",
    "- Check the effect of changes in prestorm and poststorm mean slope.\n",
    "- If there is a large berm, the prestorm mean slope (between dune toe and MHW) could be too small, and underpredict wave runup and TWL.\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Enable autoreloading of our modules. \n",
    "# Most of the code will be located in the /src/ folder, \n",
    "# and then called from the notebook.\n",
    "%matplotlib inline\n",
    "%reload_ext autoreload\n",
    "%autoreload"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.core.debugger import set_trace\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "\n",
    "import plotly\n",
    "import plotly.graph_objs as go\n",
    "import plotly.plotly as py\n",
    "import plotly.tools as tools\n",
    "import plotly.figure_factory as ff\n",
    "import plotly.io as pio\n",
    "\n",
    "import itertools\n",
    "\n",
    "import matplotlib\n",
    "from matplotlib import cm\n",
    "import colorlover as cl\n",
    "\n",
    "from ipywidgets import widgets, Output\n",
    "from IPython.display import display, clear_output, Image, HTML\n",
    "\n",
    "from sklearn.metrics import confusion_matrix"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import data\n",
    "Import our data into pandas Dataframes for the analysis. Data files are `.csv` files which are stored in the `./data/interim/` folder."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
    "    print('Importing {}'.format(csv))\n",
    "    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
    "\n",
    "df_waves = df_from_csv('waves.csv', index_col=[0, 1])\n",
    "df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n",
    "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
    "df_sites = df_from_csv('sites.csv', index_col=[0])\n",
    "df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0,1])\n",
    "\n",
    "# Note that the forecasted data sets should be in the same order for impacts and twls\n",
    "impacts = {\n",
    "    'forecasted': {\n",
    "        'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),\n",
    "        'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),\n",
    "        },\n",
    "    'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n",
    "    }\n",
    "\n",
    "\n",
    "twls = {\n",
    "    'forecasted': {\n",
    "        'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),\n",
    "        'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),\n",
    "    }\n",
    "}\n",
    "print('Done!')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Plot prestorm vs poststorm mean slopes\n",
    "Prestorm slopes have already been calculated as part of the TWL forecasting, however we'll need to extract the poststorm mean slopes from our profiles at each site."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prestorm slopes are easy as we have already calculated this as part of the \n",
    "df_slopes_prestorm = twls['forecasted']['mean_slope_sto06'].groupby('site_id').head(1).reset_index().set_index(['site_id']).beta.to_frame()\n",
    "\n",
    "# Get x and z at mhw (z=0.7m) for each site\n",
    "z_mhw = 0.7\n",
    "mhw_poststorm = []\n",
    "for site, df in df_profiles.xs('poststorm', level='profile_type').groupby('site_id'):\n",
    "    df = df.dropna(subset=['z'])\n",
    "    df = df.iloc[(df['z']-z_mhw).abs().argsort().head(1)].reset_index()\n",
    "    df = df.iloc[0]\n",
    "    mhw_poststorm.append({\n",
    "        'site_id': df.site_id,\n",
    "        'x_mhw': df.x,\n",
    "        'z_mhw': df.z\n",
    "    })\n",
    "#     break\n",
    "df_mhw_poststorm = pd.DataFrame(mhw_poststorm)\n",
    "df_mhw_poststorm = df_mhw_poststorm.set_index('site_id')\n",
    "\n",
    "# Get x and z at poststorm dune toe for each site\n",
    "df_dune_toe_poststorm = df_profile_features_crest_toes.xs('poststorm', level='profile_type')[['dune_toe_x','dune_toe_z']]\n",
    "\n",
    "# Join df for mhw and dune toe\n",
    "df = df_mhw_poststorm.join(df_dune_toe_poststorm)\n",
    "df['beta'] = -(df['dune_toe_z'] - df['z_mhw']) / (df['dune_toe_x'] -df['x_mhw'])\n",
    "df_slopes_poststorm = df['beta'].to_frame()\n",
    "\n",
    "# Count how many nans\n",
    "print('Number of nans: {}'.format(df_slopes_poststorm.beta.isna().sum()))\n",
    "\n",
    "# Display dataframe\n",
    "print('df_slopes_poststorm:')\n",
    "df_slopes_poststorm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now, let's join our post storm slopes, prestorm slopes, observed and forecasted impacts into one data frame to make it easier to plot."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dfs = [df_slopes_poststorm.rename(columns={'beta':'poststorm_beta'}),\n",
    "    df_slopes_prestorm.rename(columns={'beta':'prestorm_beta'}),\n",
    "    impacts['observed']['storm_regime'].to_frame().rename(columns={'storm_regime': 'observed_regime'}),\n",
    "    impacts['forecasted']['mean_slope_sto06']['storm_regime'].to_frame().rename(columns={'storm_regime': 'forecasted_regime'})\n",
    "      ]\n",
    "\n",
    "df = pd.concat(dfs, axis='columns')\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_data.index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Plot our data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = tools.make_subplots(\n",
    "    rows=2,\n",
    "    cols=2,\n",
    "    specs=[[{}, {}], [{}, {}]],\n",
    "    subplot_titles=('Swash/Swash',  'Swash/Collision', \n",
    "                    'Collision/Swash', 'Collision/Collision'),\n",
    " shared_xaxes=True, shared_yaxes=True,)\n",
    "\n",
    "\n",
    "# Loop through combinations of observed/forecasted swash/collision\n",
    "data = []\n",
    "for forecasted_regime, observed_regime in itertools.product(['swash','collision'],repeat=2):\n",
    "    \n",
    "    # Get data for this combination \n",
    "    query = 'forecasted_regime==\"{}\" & observed_regime==\"{}\"'.format(forecasted_regime, observed_regime)\n",
    "    df_data = df.query(query)\n",
    "    print(query)\n",
    "                  \n",
    "    \n",
    "    # Determine which subplot to plot results in\n",
    "    if forecasted_regime == 'swash' and observed_regime == 'swash':\n",
    "        x_col = 1\n",
    "        y_col = 1\n",
    "    elif forecasted_regime == 'collision' and observed_regime == 'collision':\n",
    "        x_col = 2\n",
    "        y_col = 2\n",
    "    elif forecasted_regime == 'swash' and observed_regime == 'collision':\n",
    "        x_col = 2\n",
    "        y_col = 1\n",
    "    elif forecasted_regime == 'collision' and observed_regime == 'swash':\n",
    "        x_col = 1\n",
    "        y_col = 2\n",
    "    else:\n",
    "        print('something went wrong')\n",
    "        continue\n",
    "\n",
    "    fig.append_trace(\n",
    "        go.Scatter(\n",
    "            x=df_data.prestorm_beta,\n",
    "            y=df_data.poststorm_beta,\n",
    "            text = df_data.index.tolist(),\n",
    "            hoverinfo = 'text',\n",
    "            mode = 'markers',\n",
    "            line = dict(\n",
    "        color = ('rgba(22, 22, 22, 0.2)'),\n",
    "        width = 0.5,)),\n",
    "            x_col,\n",
    "            y_col)\n",
    "\n",
    "# layout = go.Layout(\n",
    "#     xaxis=dict(domain=[0, 0.45]),\n",
    "#     yaxis=dict(\n",
    "#         domain=[0, 0.45],\n",
    "#         type='log',\n",
    "#     ),\n",
    "#     xaxis2=dict(domain=[0.55, 1]),\n",
    "#     xaxis4=dict(domain=[0.55, 1], anchor='y4'),\n",
    "#     yaxis3=dict(\n",
    "#         domain=[0.55, 1],\n",
    "#         type='log',\n",
    "#     ),\n",
    "#     yaxis4=dict(\n",
    "#         domain=[0.55, 1],\n",
    "#         anchor='x4',\n",
    "#         type='log',\n",
    "#     ))\n",
    "\n",
    "fig['layout'].update(showlegend=False, title='Specs with Subplot Title',height=800,)\n",
    "\n",
    "for ax in ['yaxis','yaxis2']:\n",
    "#     fig['layout'][ax]['type']='log'\n",
    "    fig['layout'][ax]['range']= [0,0.2]\n",
    "\n",
    "for ax in ['xaxis', 'xaxis2']:\n",
    "    fig['layout'][ax]['range']= [0,0.2]\n",
    "\n",
    "go.FigureWidget(fig)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Looking at the above plot:\n",
    "- In general, we can see that the prestorm mean slope is flatter than the poststorm mean slope. This can be explained by the presence of prestorm berms, which increase the prestorm mean slope. During the storm, these berms get eroded and decrease the slope.\n",
    "- **Collision/Collision**: Where we observe and predict collision, we see steeper prestorm slopes. This is to be expected since larger slopes will generate more runup and higher TWLs.\n",
    "- **Swash/Collision**: Where we predict collision but observe swash, we can see that the prestorm mean slopes >0.1 generate high TWLs. \n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/src/analysis/observed_storm_impacts.py
+++ b/src/analysis/observed_storm_impacts.py
@ -36,8 +36,6 @@ def volume_change(df_profiles, df_profile_features, zone):
    for site_id, df_site in sites:
        logger.debug("Calculating change in beach volume at {} in {} zone".format(site_id, zone))
        # TODO Change this query to an index
        query = "site_id=='{}'&profile_type=='prestorm'".format(site_id)
        prestorm_dune_toe_x = df_profile_features.query(query).dune_toe_x.tolist()
        prestorm_dune_crest_x = df_profile_features.query(query).dune_crest_x.tolist()
@ -46,7 +44,7 @@ def volume_change(df_profiles, df_profile_features, zone):
        prestorm_dune_crest_x = return_first_or_nan(prestorm_dune_crest_x)
        prestorm_dune_toe_x = return_first_or_nan(prestorm_dune_toe_x)
-        # If no dune toe has been defined, Dlow = Dhigh. Refer to Sallenger (2000).
+        # If no dune to has been defined, Dlow = Dhigh. Refer to Sallenger (2000).
        if np.isnan(prestorm_dune_toe_x):
            prestorm_dune_toe_x = prestorm_dune_crest_x
@ -147,10 +145,6 @@ def storm_regime(df_observed_impacts):
    df_observed_impacts.loc[swash, "storm_regime"] = "swash"
    df_observed_impacts.loc[collision, "storm_regime"] = "collision"
    # TODO We may be able to identify observed regimes by looking at the change in crest and toe elevation. This would be useful for 
    # locations where we have overwash and cannot calculate the change in volume correctly. Otherwise, maybe it's better to put it in manually.
    return df_observed_impacts
		`@ -1,3 +0,0 @@`
			`*.ipynb filter=nbstripout`

			`*.ipynb diff=ipynb`