{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "### Import packages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T04:02:38.872624Z",
     "start_time": "2018-12-10T04:02:38.448908Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "%reload_ext autoreload\n",
    "%autoreload"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T04:03:27.147221Z",
     "start_time": "2018-12-10T04:03:27.141204Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "from IPython.core.debugger import set_trace\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import matplotlib\n",
    "import plotly\n",
    "import plotly.graph_objs as go\n",
    "import plotly.plotly as py\n",
    "import plotly.tools as tls\n",
    "import plotly.figure_factory as ff\n",
    "import plotly.io as pio\n",
    "from plotly import tools\n",
    "\n",
    "from copy import copy\n",
    "import scipy\n",
    "from sklearn import svm\n",
    "\n",
    "# Disable numpy warnings\n",
    "import warnings\n",
    "warnings.simplefilter(action=\"ignore\", category=FutureWarning)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "### Load data\n",
    "Load data from the `./data/interim/` folder and parse into `pandas` dataframes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T04:03:40.638982Z",
     "start_time": "2018-12-10T04:03:31.765531Z"
    },
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Importing profiles.csv\n",
      "Importing profile_features.csv\n",
      "Importing impacts_forecasted_foreshore_slope_sto06.csv\n",
      "Importing impacts_forecasted_mean_slope_sto06.csv\n",
      "Importing impacts_observed.csv\n",
      "Importing twl_foreshore_slope_sto06.csv\n",
      "Importing twl_mean_slope_sto06.csv\n",
      "Done!\n"
     ]
    }
   ],
   "source": [
    "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
    "    print('Importing {}'.format(csv))\n",
    "    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
    "\n",
    "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
    "df_profile_features = df_from_csv('profile_features.csv', index_col=[0])\n",
    "\n",
    "impacts = {\n",
    "    'forecasted': {\n",
    "        'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),\n",
    "        'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),\n",
    "        },\n",
    "    'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n",
    "    }\n",
    "\n",
    "twls = {\n",
    "    'forecasted': {\n",
    "        'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),\n",
    "        'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),\n",
    "    }\n",
    "}\n",
    "\n",
    "print('Done!')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## Difference between $R_{high}$ and $D_{low}$\n",
    "Since the Storm Impact Regime is so dependant on the predicted $R_{high}$ and obsereved $D_{low}$ levels, let's investigate the difference between these two variables in more detail.\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "### Gather data\n",
    "\n",
    "First, let's split the `site_ids` by whether we observed swash or collision and by whether we predicted swash or collision. We want to identify if there are any difference between these four groups."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T04:05:15.741391Z",
     "start_time": "2018-12-10T04:05:15.734352Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "def get_site_ids(df_forecasted, df_observed, forecasted_regime,\n",
    "                 observed_regime):\n",
    "    \"\"\"\n",
    "    Returns list of site_ids which match the given forecasted and observed regime\n",
    "    \"\"\"\n",
    "    set1 = set(\n",
    "        df_forecasted.query(\"storm_regime == '{}'\".format(forecasted_regime)).\n",
    "        index.get_level_values('site_id'))\n",
    "    set2 = set(\n",
    "        df_observed.query(\"storm_regime == '{}'\".format(observed_regime)).\n",
    "        index.get_level_values('site_id'))\n",
    "    return sorted(list(set1.intersection(set2)))\n",
    "\n",
    "\n",
    "def get_R_high_D_low_diff(site_ids, df_profile_features, df_twls):\n",
    "    \"\"\"\n",
    "    Returns a dataframe of the difference between the R_high and D_low differences. \n",
    "    Positive values indicate R_high is larger than D_low.\n",
    "    \"\"\"\n",
    "    # Get dune toes at these sites and predicted max R_high\n",
    "    df_toes = df_profile_features.loc[site_ids].query(\n",
    "        'profile_type==\"prestorm\"').dune_toe_z\n",
    "    df_R_highs = df_twls.loc[site_ids].groupby('site_id')['R_high'].max()\n",
    "\n",
    "    # Join into one dataframe\n",
    "    df_twl_toes = pd.concat([df_toes, df_R_highs], axis=1, sort=True)\n",
    "    df_twl_toes['diff'] = df_twl_toes['R_high'] - df_twl_toes['dune_toe_z']\n",
    "    return df_twl_toes['diff']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T04:13:00.035545Z",
     "start_time": "2018-12-10T04:12:59.176352Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "# Identify sites where swash regime was correctly or overpredicted\n",
    "\n",
    "swash_overpredicted_site_ids = get_site_ids(\n",
    "    df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
    "    df_observed=impacts['observed'],\n",
    "    forecasted_regime='collision',\n",
    "    observed_regime='swash')\n",
    "swash_overpredicted_diffs = get_R_high_D_low_diff(\n",
    "    site_ids=swash_overpredicted_site_ids,\n",
    "    df_profile_features=df_profile_features,\n",
    "    df_twls=twls['forecasted']['mean_slope_sto06'])\n",
    "\n",
    "swash_correct_site_ids = get_site_ids(\n",
    "    df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
    "    df_observed=impacts['observed'],\n",
    "    forecasted_regime='swash',\n",
    "    observed_regime='swash')\n",
    "swash_correct_diffs = get_R_high_D_low_diff(\n",
    "    site_ids=swash_correct_site_ids,\n",
    "    df_profile_features=df_profile_features,\n",
    "    df_twls=twls['forecasted']['mean_slope_sto06'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T04:12:58.434634Z",
     "start_time": "2018-12-10T04:12:57.096839Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "# Identify sites where collision regime was correctly or underpredicted\n",
    "\n",
    "collision_underpredicted_site_ids = get_site_ids(\n",
    "    df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
    "    df_observed=impacts['observed'],\n",
    "    forecasted_regime='swash',\n",
    "    observed_regime='collision')\n",
    "collision_underpredicted_diffs = get_R_high_D_low_diff(\n",
    "    site_ids=collision_underpredicted_site_ids,\n",
    "    df_profile_features=df_profile_features,\n",
    "    df_twls=twls['forecasted']['mean_slope_sto06'])\n",
    "\n",
    "collision_correct_site_ids = get_site_ids(\n",
    "    df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
    "    df_observed=impacts['observed'],\n",
    "    forecasted_regime='collision',\n",
    "    observed_regime='collision')\n",
    "collision_correct_diffs = get_R_high_D_low_diff(\n",
    "    site_ids=collision_correct_site_ids,\n",
    "    df_profile_features=df_profile_features,\n",
    "    df_twls=twls['forecasted']['mean_slope_sto06'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "### Plot difference in $R_{high}$ and $D_{low}$ for swash and collision regimes\n",
    "What does the distribution of elevations look like for when we observe swash and collision regimes? Are there any difference between correctly and incorrectly predicted swash regime impacts?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T04:06:02.634355Z",
     "start_time": "2018-12-10T04:05:42.644585Z"
    },
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f53f0ffc577b406ab56c1357c1145683",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "FigureWidget({\n",
       "    'data': [{'marker': {'color': '#ef8a62'},\n",
       "              'name': 'Overpredicted',\n",
       "          …"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trace1 = go.Histogram(\n",
    "    y=swash_correct_diffs.tolist(),\n",
    "    opacity=0.75,\n",
    "    name='Correctly predicted',\n",
    "    marker=dict(color='#67a9cf', ),\n",
    "    ybins=dict(size=0.1),\n",
    ")\n",
    "trace2 = go.Histogram(\n",
    "    y=swash_overpredicted_diffs.tolist(),\n",
    "    opacity=0.75,\n",
    "    name='Overpredicted',\n",
    "    marker=dict(color='#ef8a62', ),\n",
    "    ybins=dict(size=0.1),\n",
    ")\n",
    "\n",
    "layout = go.Layout(\n",
    "    title='R_high - D_low<br>Swash Regime',\n",
    "    barmode='overlay',\n",
    "    yaxis=dict(title='z (m AHD)'),\n",
    "    xaxis=dict(title='Count'),\n",
    "    bargap=0.2,\n",
    "    bargroupgap=0.1,\n",
    "    legend=dict(x=.6, y=1))\n",
    "\n",
    "g_plot_swash = go.FigureWidget(data=[trace2, trace1], layout=layout)\n",
    "\n",
    "# To output to file\n",
    "img_bytes = pio.write_image(\n",
    "    g_plot_swash,\n",
    "    '02_R_high_D_low_swash.png',\n",
    "    format='png',\n",
    "    width=600,\n",
    "    height=400,\n",
    "    scale=5)\n",
    "\n",
    "g_plot_swash"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "The plot above shows that when $R_{high}$ - $D_{low}$ is $<0$, swash is correctly predicted. This is by definition, so is not surprising. The biggest occurance of $R_{high}$ - $D_{low}$ is slightly below $0$, so it appears that we have correctly predicted a majority of the observed swash regime events. \n",
    "\n",
    "Let's do the same thing, now considering `site_ids` where we have observed collision."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T04:13:03.703119Z",
     "start_time": "2018-12-10T04:13:03.463485Z"
    },
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4ec08bf2ea6f482ea3c52aa3348a05e2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "FigureWidget({\n",
       "    'data': [{'marker': {'color': '#ef8a62'},\n",
       "              'name': 'Underpredicted',\n",
       "         …"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trace1 = go.Histogram(\n",
    "    y=collision_correct_diffs.tolist(),\n",
    "    opacity=0.75,\n",
    "    name='Correctly predicted',\n",
    "    marker=dict(color='#67a9cf', ),\n",
    "    ybins=dict(size=0.1),\n",
    ")\n",
    "trace2 = go.Histogram(\n",
    "    y=collision_underpredicted_diffs.tolist(),\n",
    "    opacity=0.75,\n",
    "    name='Underpredicted',\n",
    "    marker=dict(color='#ef8a62', ),\n",
    "    ybins=dict(size=0.1),\n",
    ")\n",
    "\n",
    "layout = go.Layout(\n",
    "    title='R_high - D_low<br>Collision Regime',\n",
    "    barmode='overlay',\n",
    "    yaxis=dict(title='z (m AHD)'),\n",
    "    xaxis=dict(title='Count'),\n",
    "    bargap=0.2,\n",
    "    bargroupgap=0.1,\n",
    "    legend=dict(x=.6, y=1))\n",
    "\n",
    "g_plot_collision = go.FigureWidget(data=[trace2, trace1], layout=layout)\n",
    "\n",
    "# To output to file\n",
    "img_bytes = pio.write_image(\n",
    "    g_plot_collision,\n",
    "    '02_R_high_D_low_collision.png',\n",
    "    format='png',\n",
    "    width=600,\n",
    "    height=400,\n",
    "    scale=5)\n",
    "\n",
    "g_plot_collision"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "We can see a trend similar to the swash regime, except flipped. A majority of the correctly forecasted collision regimes occur when $R_{high}$ - $D_{low}$ is $>0$, by definition. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "### TODO Does dune toe lower?\n",
    "Is there any patterns that dictate whether the dune toe raises or lowers when subject to collision. Is it just based on the peak $R_{high}$ level, similar to equilibrium theory?\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Relationship between parameters\n",
    "Let's further investigate the relationship between hydrodynamic and morphodynamic parameters and see if they can tell us any more about how the storm regime may be correctly or incorrectly predicted."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Add functions for adding parameters to the dataframe\n",
    "We need some additional functions which will add parameters to our dataframe so we can plot them."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T04:55:43.229433Z",
     "start_time": "2018-12-10T04:55:43.218402Z"
    }
   },
   "outputs": [],
   "source": [
    "def add_berm_width(df, df_profiles, df__profile_features):\n",
    "    \"\"\"\n",
    "    Adds a new column to the dataframe, with the prestorm berm width\n",
    "    \"\"\"\n",
    "    # Get x coorindates of dune toe and limit of survey (z=0)\n",
    "    df_profile_end_x = df_profiles.query('profile_type==\"prestorm\"').dropna(\n",
    "        subset=['z']).groupby('site_id').tail(1).reset_index(\n",
    "            ['profile_type', 'x']).x.rename('x_end').to_frame()\n",
    "    df_profile_dune_toe_x = df_profile_features.query(\n",
    "        'profile_type==\"prestorm\"').dune_toe_x.to_frame()\n",
    "\n",
    "    # Merge and take the difference to calculate berm width\n",
    "    df_merged = df_profile_end_x.merge(\n",
    "        df_profile_dune_toe_x, left_index=True, right_index=True)\n",
    "    berm_width = (df_merged['x_end'] -\n",
    "                  df_merged['dune_toe_x']).rename('berm_width').to_frame()\n",
    "\n",
    "    # Return the dataframe with the berm_width col merged\n",
    "    return df.merge(berm_width, left_index=True, right_index=True)\n",
    "\n",
    "\n",
    "def add_observed_regime(df, df_observed, new_col='observed_storm_regime'):\n",
    "    \"\"\"\n",
    "    Adds a new column to the dataframe, with the observed storm regime\n",
    "    \"\"\"\n",
    "    return df.merge(\n",
    "        impacts['observed'].storm_regime.rename(new_col).to_frame(),\n",
    "        left_index=True,\n",
    "        right_index=True)\n",
    "\n",
    "\n",
    "def add_mean_slope(df, df_twl):\n",
    "    \"\"\"\n",
    "    Adds a new column to the dataframe with prestorm mean slope\n",
    "    \"\"\"\n",
    "    df_mean_slope = df_twl.groupby('site_id').first().beta.rename(\n",
    "        'mean_slope').to_frame()\n",
    "    return df.merge(df_mean_slope, left_index=True, right_index=True)\n",
    "\n",
    "\n",
    "def add_prestorm_berm_vol(df, df_impacts_observed):\n",
    "    \"\"\"\n",
    "    Adds a new column to the dataframe with prestorm berm volume\n",
    "    \"\"\"\n",
    "    return df.merge(\n",
    "        df_impacts_observed.prestorm_swash_vol.rename('prestorm_berm_vol').\n",
    "        to_frame(),\n",
    "        left_index=True,\n",
    "        right_index=True)\n",
    "\n",
    "\n",
    "def add_prediction_class(df_impacts, prediction_classes):\n",
    "    \"\"\"\n",
    "    Adds a column which groups site_ids into the predicted and observed storm regime combination\n",
    "    \"\"\"\n",
    "    for prediction_class in prediction_classes:\n",
    "        df_impacts.loc[df_impacts.index.isin(prediction_class['site_ids']),\n",
    "                       'prediction_class'] = prediction_class['class']\n",
    "    return df_impacts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create the dataframe\n",
    "We need to combine and add data into a single dataframe for comparison purposes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T04:55:45.293320Z",
     "start_time": "2018-12-10T04:55:44.802910Z"
    }
   },
   "outputs": [],
   "source": [
    "# Start with the forecasted impact dataframe\n",
    "df = impacts['forecasted']['mean_slope_sto06']\n",
    "\n",
    "# Add a column which groups site_ids into the predicted and observed storm regime combination\n",
    "prediction_classes = [\n",
    "    {\n",
    "        'site_ids': collision_underpredicted_site_ids,\n",
    "        'class': 'collision_underpredicted'\n",
    "    },\n",
    "    {\n",
    "        'site_ids': collision_correct_site_ids,\n",
    "        'class': 'collision_correct'\n",
    "    },\n",
    "    {\n",
    "        'site_ids': swash_overpredicted_site_ids,\n",
    "        'class': 'swash_overpredicted'\n",
    "    },\n",
    "    {\n",
    "        'site_ids': swash_correct_site_ids,\n",
    "        'class': 'swash_correct'\n",
    "    },\n",
    "]\n",
    "df = add_prediction_class(df, prediction_classes)\n",
    "\n",
    "# Drop site_ids where we do not have a prediction class (caused by NaNs)\n",
    "df = df.dropna(subset=['prediction_class'])\n",
    "\n",
    "# Add additional parameters\n",
    "df = add_observed_regime(df, impacts['observed'])\n",
    "df = add_berm_width(df, df_profiles, df_profile_features)\n",
    "df = add_mean_slope(df, df_twl=twls['forecasted']['mean_slope_sto06'])\n",
    "df = add_prestorm_berm_vol(df, df_impacts_observed=impacts['observed'])\n",
    "df['R_high_dune_toe_diff'] = df['R_high'] - df['dune_toe_z']\n",
    "df['R_high_dune_toe_ratio'] = df['R_high'] / df['dune_toe_z']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create scatter plot matrix of parameter interactions\n",
    "Plot each hydrodynamic and morphodynamic parameter against each other and see if we can identify any patterns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T05:52:23.384061Z",
     "start_time": "2018-12-10T05:52:21.345652Z"
    }
   },
   "outputs": [],
   "source": [
    "# Setup colors for different classes\n",
    "text = df['prediction_class'].tolist()\n",
    "class_code = {x['class']: n for n, x in enumerate(prediction_classes)}\n",
    "color_vals = [class_code[cl] for cl in df['prediction_class']]\n",
    "\n",
    "# Each prediction class will have its own color\n",
    "pl_colorscale = [[0.0, '#d7191c'], [0.25, '#d7191c'], [0.25, '#fdae61'],\n",
    "                 [0.5, '#fdae61'], [0.5, '#2c7bb6'], [0.75, '#2c7bb6'],\n",
    "                 [0.75, '#abd9e9'], [1, '#abd9e9']]\n",
    "\n",
    "# Setup plotly scatterplot matrix\n",
    "trace1 = go.Splom(\n",
    "    dimensions=[\n",
    "        dict(label='dune_toe_z', values=df['dune_toe_z']),\n",
    "        dict(label='R_high', values=df['R_high']),\n",
    "        dict(label='berm_width', values=df['berm_width']),\n",
    "        dict(\n",
    "            label='twl_dune_toe_z_exceedance_hrs',\n",
    "            values=df['twl_dune_toe_z_exceedance_hrs']),\n",
    "        dict(label='R_high_dune_toe_diff', values=df['R_high_dune_toe_diff']),\n",
    "        dict(\n",
    "            label='R_high_dune_toe_ratio', values=df['R_high_dune_toe_ratio']),\n",
    "        dict(label='mean_slope', values=df['mean_slope']),\n",
    "        dict(label='prestorm_berm_vol', values=df['prestorm_berm_vol']),\n",
    "    ],\n",
    "    text=text,\n",
    "    diagonal=dict(visible=False),\n",
    "    showupperhalf=False,\n",
    "    marker=dict(\n",
    "        color=color_vals,\n",
    "        size=2,\n",
    "        colorscale=pl_colorscale,\n",
    "        showscale=False,\n",
    "        line=dict(width=0.1, color='rgb(230,230,230)')))\n",
    "\n",
    "axis = dict(showline=True, zeroline=False, gridcolor='#fff', ticklen=4)\n",
    "\n",
    "layout = go.Layout(\n",
    "    title='Storm Impact Scatter Plot Matrix',\n",
    "    dragmode='select',\n",
    "    width=800,\n",
    "    height=800,\n",
    "    autosize=False,\n",
    "    hovermode='closest',\n",
    "    plot_bgcolor='rgba(240,240,240, 0.95)',\n",
    "    xaxis1=dict(axis),\n",
    "    xaxis2=dict(axis),\n",
    "    xaxis3=dict(axis),\n",
    "    xaxis4=dict(axis),\n",
    "    xaxis5=dict(axis),\n",
    "    xaxis6=dict(axis),\n",
    "    xaxis7=dict(axis),\n",
    "    xaxis8=dict(axis),\n",
    "    yaxis1=dict(axis),\n",
    "    yaxis2=dict(axis),\n",
    "    yaxis3=dict(axis),\n",
    "    yaxis4=dict(axis),\n",
    "    yaxis5=dict(axis),\n",
    "    yaxis6=dict(axis),\n",
    "    yaxis7=dict(axis),\n",
    "    yaxis8=dict(axis),\n",
    ")\n",
    "\n",
    "# Change font of axis labels\n",
    "for ax in layout:\n",
    "    if 'xaxis' in ax or 'yaxis' in ax:\n",
    "        layout[ax]['titlefont'] = {'size': 12}\n",
    "\n",
    "fig_scatter = go.FigureWidget(data=[trace1], layout=layout)\n",
    "\n",
    "# To output to file\n",
    "img_bytes = pio.write_image(\n",
    "    fig_scatter,\n",
    "    '02_scatter_plot.png',\n",
    "    format='png',\n",
    "    width=1500,\n",
    "    height=1500,\n",
    "    scale=5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Jupyter get's a little bit slow when trying to display this plot interactively, so let's output it as an image to view.\n",
    "![02_scatter_plot.png](02_scatter_plot.png)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create parameter confusion matrix\n",
    "It's a bit hard to see the difference between the different categories, lets do a confusion matrix but with plots showing two variables on each axis. First, list all the different parameters we have available to plot against:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T05:27:34.225427Z",
     "start_time": "2018-12-10T05:27:34.218427Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['datetime', 'R_high', 'R_low', 'dune_toe_z', 'dune_crest_z',\n",
       "       'storm_regime', 'twl_dune_toe_z_exceedance_hrs', 'prediction_class',\n",
       "       'observed_storm_regime', 'berm_width', 'mean_slope',\n",
       "       'prestorm_berm_vol', 'R_high_dune_toe_diff', 'R_high_dune_toe_ratio'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T05:29:42.927017Z",
     "start_time": "2018-12-10T05:29:38.603905Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "This is the format of your plot grid:\n",
      "[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]\n",
      "[ (2,1) x3,y3 ]  [ (2,2) x4,y4 ]\n",
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "397f170343fe45b4acf9aded46595ac8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "FigureWidget({\n",
       "    'data': [{'marker': {'color': 'rgb(200,200,200)', 'size': 4},\n",
       "              'mode': 'marker…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Define which columns we want to plot\n",
    "x_col = 'prestorm_berm_vol'\n",
    "y_col = 'R_high_dune_toe_diff'\n",
    "marker_size = 4\n",
    "\n",
    "# Create 2x2 subplot figure confusion matrix\n",
    "fig = tools.make_subplots(\n",
    "    rows=2,\n",
    "    cols=2,\n",
    "    vertical_spacing=0.09,\n",
    "    subplot_titles=(\n",
    "        'Predicted Swash',\n",
    "        'Predicted Collision',\n",
    "        '',\n",
    "        '',\n",
    "    ))\n",
    "\n",
    "# Get data for all traces\n",
    "x_all = df.loc[:, x_col]\n",
    "y_all = df.loc[:, y_col]\n",
    "\n",
    "# Create underlying grey traces of all data, so we can compare each subplot with the all the data.\n",
    "trace5 = go.Scatter(\n",
    "    mode='markers',\n",
    "    x=x_all,\n",
    "    y=y_all,\n",
    "    showlegend=False,\n",
    "    marker=dict(\n",
    "        color='rgb(200,200,200)',\n",
    "        size=marker_size,\n",
    "    ))\n",
    "fig.append_trace(trace5, 1, 1)\n",
    "\n",
    "trace6 = copy(trace5)\n",
    "trace6.xaxis = 'x2'\n",
    "trace6.yaxis = 'y'\n",
    "fig.append_trace(trace6, 1, 2)\n",
    "\n",
    "trace7 = copy(trace5)\n",
    "trace7.xaxis = 'x'\n",
    "trace7.yaxis = 'y2'\n",
    "fig.append_trace(trace7, 2, 1)\n",
    "\n",
    "trace8 = copy(trace5)\n",
    "trace8.xaxis = 'x2'\n",
    "trace8.yaxis = 'y2'\n",
    "fig.append_trace(trace8, 2, 2)\n",
    "\n",
    "# Add actual data for each subplot\n",
    "\n",
    "# Predicted swash, observed collision\n",
    "trace1 = go.Scatter(\n",
    "    mode='markers',\n",
    "    x=df.loc[df.index.isin(collision_underpredicted_site_ids), x_col],\n",
    "    y=df.loc[df.index.isin(collision_underpredicted_site_ids), y_col],\n",
    "    marker=dict(\n",
    "        color='#fc8d59',\n",
    "        size=marker_size,\n",
    "        line=dict(color='rgb(231,231,231)', width=0.5)))\n",
    "fig.append_trace(trace1, 2, 1)\n",
    "\n",
    "# Predicted collision, observed collision\n",
    "trace2 = go.Scatter(\n",
    "    mode='markers',\n",
    "    x=df.loc[df.index.isin(collision_correct_site_ids), x_col],\n",
    "    y=df.loc[df.index.isin(collision_correct_site_ids), y_col],\n",
    "    marker=dict(\n",
    "        color='#fc8d59',\n",
    "        size=marker_size,\n",
    "        line=dict(color='rgb(231,231,231)', width=0.5)))\n",
    "fig.append_trace(trace2, 2, 2)\n",
    "\n",
    "# Predicted swash, observed swash\n",
    "trace3 = go.Scatter(\n",
    "    mode='markers',\n",
    "    x=df.loc[df.index.isin(swash_correct_site_ids), x_col],\n",
    "    y=df.loc[df.index.isin(swash_correct_site_ids), y_col],\n",
    "    marker=dict(\n",
    "        color='#3182bd',\n",
    "        size=marker_size,\n",
    "        line=dict(color='rgb(231,231,231)', width=0.5)))\n",
    "fig.append_trace(trace3, 1, 1)\n",
    "\n",
    "# Predicted collision, observed swash\n",
    "trace4 = go.Scatter(\n",
    "    mode='markers',\n",
    "    x=df.loc[df.index.isin(swash_overpredicted_site_ids), x_col],\n",
    "    y=df.loc[df.index.isin(swash_overpredicted_site_ids), y_col],\n",
    "    marker=dict(\n",
    "        color='#3182bd',\n",
    "        size=marker_size,\n",
    "        line=dict(color='rgb(231,231,231)', width=0.5)))\n",
    "fig.append_trace(trace4, 1, 2)\n",
    "\n",
    "# Update formatting, titles, sizes etc.\n",
    "fig['layout']['yaxis1'].update(\n",
    "    title='{}<br>{}'.format('Observed swash', y_col))\n",
    "fig['layout']['yaxis3'].update(\n",
    "    title='{}<br>{}'.format('Observed collision', y_col))\n",
    "fig['layout']['xaxis3'].update(title=x_col)\n",
    "fig['layout']['xaxis4'].update(title=x_col)\n",
    "fig['layout'].update(\n",
    "    height=700, width=900, title='Storm Regime Confusion Matrix')\n",
    "fig['layout']['showlegend'] = False\n",
    "fig_to_plot = go.FigureWidget(data=fig.data, layout=fig.layout)\n",
    "\n",
    "# To output to file\n",
    "img_bytes = pio.write_image(\n",
    "    fig_to_plot,\n",
    "    '02_storm_regime_confusion.png',\n",
    "    format='png',\n",
    "    width=600,\n",
    "    height=600,\n",
    "    scale=5)\n",
    "\n",
    "fig_to_plot"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-09T00:30:34.940581Z",
     "start_time": "2018-12-09T00:30:34.825247Z"
    }
   },
   "source": [
    "Plotting `prestorm_berm_vol` vs `R_high_dune_toe_diff` shows there is a difference between observed swash and collision. It appears when the `prestorm_berm_vol` is smaller than 80 m, we will get collision, regardless of whether `R_high_dune_toe_diff` is greater than 0 m. Let's confirm that there it is a vertical line to differentiate between these two regimes."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create regime predictor which includes berm volume\n",
    "\n",
    "The technique for finding the boundary between two clusters is taken from [StackExchange](https://stackoverflow.com/a/22356267)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T05:37:35.785659Z",
     "start_time": "2018-12-10T05:37:34.355843Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "09471a2157774aa2b17f5b88c2bfaab4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "FigureWidget({\n",
       "    'data': [{'marker': {'color': '#3182bd', 'line': {'color': 'rgb(231,231,231)', 'width': 0.5…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Get data\n",
    "df = df.dropna(subset=['prestorm_berm_vol', 'R_high_dune_toe_diff'])\n",
    "\n",
    "swash_x = df.query('observed_storm_regime==\"swash\"').prestorm_berm_vol\n",
    "swash_y = df.query('observed_storm_regime==\"swash\"').R_high_dune_toe_diff\n",
    "swash_samples = np.array([[x, y] for x, y in zip(swash_x, swash_y)])\n",
    "\n",
    "collision_x = df.query('observed_storm_regime==\"collision\"').prestorm_berm_vol\n",
    "collision_y = df.query(\n",
    "    'observed_storm_regime==\"collision\"').R_high_dune_toe_diff\n",
    "collision_samples = np.array(\n",
    "    [[x, y] for x, y in zip(collision_x, collision_y)])\n",
    "\n",
    "# Fit SVM\n",
    "X = np.concatenate((swash_samples, collision_samples), axis=0)\n",
    "Y = np.array([0] * swash_x.shape[0] + [1] * collision_x.shape[0])\n",
    "\n",
    "C = 1.0  # SVM regularization parameter\n",
    "clf = svm.SVC(kernel='linear', gamma=0.7, C=C)\n",
    "clf.fit(X, Y)\n",
    "\n",
    "w = clf.coef_[0]\n",
    "a = -w[0] / w[1]\n",
    "y_vals = swash_y.tolist() + collision_y.tolist()\n",
    "yy = np.linspace(min(y_vals), max(y_vals))\n",
    "xx = (yy + (clf.intercept_[0]) / w[1]) / a\n",
    "\n",
    "# Prepare plot\n",
    "trace_swash = go.Scatter(\n",
    "    x=swash_x,\n",
    "    y=swash_y,\n",
    "    name='Swash',\n",
    "    mode='markers',\n",
    "    marker=dict(\n",
    "        color='#3182bd',\n",
    "        size=marker_size,\n",
    "        line=dict(color='rgb(231,231,231)', width=0.5)))\n",
    "\n",
    "trace_collision = go.Scatter(\n",
    "    x=collision_x,\n",
    "    y=collision_y,\n",
    "    name='Collision',\n",
    "    mode='markers',\n",
    "    marker=dict(\n",
    "        color='#fc8d59',\n",
    "        size=marker_size,\n",
    "        line=dict(color='rgb(231,231,231)', width=0.5)))\n",
    "\n",
    "trace_split = go.Scatter(\n",
    "    x=xx,\n",
    "    y=yy,\n",
    "    name='Split (y={:.1f}x-{:.1f})'.format(a, (clf.intercept_[0]) / w[1]),\n",
    ")\n",
    "\n",
    "layout = dict(\n",
    "    title='Observed Swash/Collision Regime Split',\n",
    "    xaxis=dict(title='Prestorm berm volume', ),\n",
    "    yaxis=dict(title='R_high - D_low'),\n",
    "    legend=dict(x=.6, y=1.))\n",
    "\n",
    "fig_to_plot = go.FigureWidget(\n",
    "    data=[trace_swash, trace_collision, trace_split], layout=layout)\n",
    "\n",
    "# To output to file\n",
    "img_bytes = pio.write_image(\n",
    "    fig_to_plot,\n",
    "    '02_storm_regime_split.png',\n",
    "    format='png',\n",
    "    width=600,\n",
    "    height=600,\n",
    "    scale=5)\n",
    "\n",
    "fig_to_plot"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Looking at the plot above, it appears when the `prestorm_berm_vol` is less than 80 m, then we should classify it as collision, even if wave runup does not reach the toe of the dune."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Test new berm vol predictor\n",
    "\n",
    "Now lets go back to our predicted forecasts and see if our confusion matrix improves if we adopt this new criteria for differentiating between swash and collision.\n",
    "\n",
    "First define a custom function to get colormap for our confusion matrix."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T02:18:08.658221Z",
     "start_time": "2018-12-10T02:18:08.653186Z"
    }
   },
   "outputs": [],
   "source": [
    "def matplotlib_to_plotly(cmap_name='RdYlGn', pl_entries=255):\n",
    "    \"\"\"\n",
    "    Function to convert matplotlib colorscale to plotly\n",
    "    \"\"\"\n",
    "    cmap = matplotlib.cm.get_cmap(cmap_name)\n",
    "    h = 1.0 / (pl_entries - 1)\n",
    "    pl_colorscale = []\n",
    "\n",
    "    for k in range(pl_entries):\n",
    "        C = list(map(np.uint8, np.array(cmap(k * h)[:3]) * 255))\n",
    "        pl_colorscale.append([k * h, 'rgb' + str((C[0], C[1], C[2]))])\n",
    "\n",
    "    return pl_colorscale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T05:44:27.306253Z",
     "start_time": "2018-12-10T05:44:25.791799Z"
    },
    "code_folding": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "97d8283101e748dd8e2fd7811c97973b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "FigureWidget({\n",
       "    'data': [{'colorscale': [[0.0, 'rgb(165, 0, 38)'], [0.003937007874015748,\n",
       "                 …"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "# Create colorscale\n",
    "rdylgr = matplotlib_to_plotly()\n",
    "\n",
    "# Add new column with our new prediction technique.\n",
    "df.loc[df['prestorm_berm_vol'] * 2.7 - 221.1 <= df['R_high_dune_toe_diff'],\n",
    "       'new_predicted_storm_regime'] = 'collision'\n",
    "df.loc[df['prestorm_berm_vol'] * 2.7 - 221.1 > df['R_high_dune_toe_diff'],\n",
    "       'new_predicted_storm_regime'] = 'swash'\n",
    "\n",
    "# Get observed and forecasted regimes, and merge\n",
    "observed_regimes = df.observed_storm_regime.rename(\n",
    "    'observed_regime').to_frame()\n",
    "forecasted_regimes = df.new_predicted_storm_regime.rename(\n",
    "    'forecasted_regime').to_frame()\n",
    "df_compare = pd.concat([observed_regimes, forecasted_regimes],\n",
    "                       axis='columns',\n",
    "                       names=['a', 'b'],\n",
    "                       sort=True)\n",
    "df_compare.dropna(axis='index', inplace=True)\n",
    "\n",
    "# Create a confusion matrix based on the observed/forecasted regimes.\n",
    "# Need to do some flipping and reversing to get it in the correct\n",
    "# order for the plotly heatmap.\n",
    "z = confusion_matrix(\n",
    "    df_compare.observed_regime.tolist(),\n",
    "    df_compare.forecasted_regime.tolist(),\n",
    "    labels=['swash', 'collision', 'overwash', 'inundation'])\n",
    "z = np.flip(z, axis=0)\n",
    "z_list = list(reversed(z.tolist()))\n",
    "\n",
    "# Make incorrect values negative, so they get assigned a different color.\n",
    "# Better for visualization\n",
    "z_neg_incorrect = np.flip(np.identity(4), axis=0)\n",
    "z_neg_incorrect[z_neg_incorrect == 0] = -1\n",
    "z_neg_incorrect = (z * z_neg_incorrect).tolist()\n",
    "\n",
    "# Change the text on the heatmap so it also displays percentages.\n",
    "z_with_pct = []\n",
    "for row in z:\n",
    "    new_row = []\n",
    "    for val in row:\n",
    "        new_row.append('{}<br>({}%)'.format(\n",
    "            val, np.around(val / np.sum(z) * 100, 1)))\n",
    "    z_with_pct.append(new_row)\n",
    "\n",
    "# Create the heatmap figure\n",
    "x = ['swash', 'collision', 'overwash', 'inundation']\n",
    "y = list(reversed(x))\n",
    "fig = ff.create_annotated_heatmap(\n",
    "    z_neg_incorrect, x=x, y=y, annotation_text=z_with_pct, colorscale=rdylgr)\n",
    "heatmap = go.FigureWidget(data=fig.data, layout=fig.layout)\n",
    "\n",
    "# Update axis titles\n",
    "heatmap.layout.xaxis.update(title='Predicted')\n",
    "heatmap.layout.yaxis.update(title='Observed')\n",
    "\n",
    "# Write to file\n",
    "img_bytes = pio.write_image(\n",
    "    heatmap,\n",
    "    '02_confusion_matrix_berm_vol_predictor.png',\n",
    "    format='png',\n",
    "    width=600,\n",
    "    height=600,\n",
    "    scale=5)\n",
    "\n",
    "heatmap"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TODO Compare predicted and underpredicted profiles"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Define a function for getting the average beach profile for a number of given site_ids:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 224,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T03:08:49.904157Z",
     "start_time": "2018-12-10T03:08:49.896136Z"
    }
   },
   "outputs": [],
   "source": [
    "def get_avg_profile(site_ids, debug=False):\n",
    "    rows = []\n",
    "    for n,site_id in enumerate(site_ids):\n",
    "        profile = df_profiles.query(\"site_id == '{}' and profile_type == 'prestorm'\".format(site_id))\n",
    "        profile_z = np.array(profile.z.tolist())\n",
    "        profile_x = np.array(profile.index.get_level_values('x').tolist())\n",
    "        \n",
    "        # Let's center the profile based on the z=0 location\n",
    "        idx_last_z_val = max(np.argwhere(~np.isnan(profile_z)==True))[0]\n",
    "        x_last_val = profile_x[idx_last_z_val]\n",
    "        profile_x = [x - x_last_val for x in profile_x]\n",
    "        \n",
    "        # Put values into a dictionary\n",
    "        for x,z in zip(profile_x, profile_z):\n",
    "            rows.append({'x':x, 'z': z})\n",
    "\n",
    "        # Return early for debugging\n",
    "        if debug and n>3:\n",
    "            break\n",
    "        \n",
    "    # Create dataframe from rows\n",
    "    df = pd.DataFrame(rows)\n",
    "    avg_profile = df.groupby('x').agg({'z': [np.nanmean, np.nanstd]}).reset_index()\n",
    "\n",
    "    return {\n",
    "        'x': avg_profile.x.tolist(),\n",
    "        'z': avg_profile.z.nanmean.tolist(),\n",
    "        'std': avg_profile.z.nanstd.tolist(),\n",
    "        'n': n+1  # number of profiles\n",
    "    }"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now, let's look at whether there is a difference between the average beach profile of correctly forecasted site_ids and incorrectly forecasted site_ids. First, looking at sites where we observed swash regime."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 225,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T03:11:20.818875Z",
     "start_time": "2018-12-10T03:08:49.906163Z"
    },
    "code_folding": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\z5189959\\Desktop\\nsw-2016-storm-impact\\.venv\\lib\\site-packages\\pandas\\core\\groupby\\groupby.py:1062: RuntimeWarning:\n",
      "\n",
      "Mean of empty slice\n",
      "\n",
      "C:\\Users\\z5189959\\Desktop\\nsw-2016-storm-impact\\.venv\\lib\\site-packages\\numpy\\lib\\nanfunctions.py:1545: RuntimeWarning:\n",
      "\n",
      "Degrees of freedom <= 0 for slice.\n",
      "\n"
     ]
    },
    {
     "ename": "NameError",
     "evalue": "name 'avg_correct_x' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-225-a35c8feaf0e6>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     41\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     42\u001b[0m trace_correct_mean = go.Scatter(\n\u001b[1;32m---> 43\u001b[1;33m     \u001b[0mx\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mavg_correct_x\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     44\u001b[0m     \u001b[0my\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mavg_correct_z\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     45\u001b[0m     \u001b[0mopacity\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 'avg_correct_x' is not defined"
     ]
    }
   ],
   "source": [
    "overpredicted = get_avg_profile(swash_overpredicted_site_ids)\n",
    "correct = get_avg_profile(swash_correct_site_ids)\n",
    "\n",
    "# Add mean profile\n",
    "trace_overpredicted_mean = go.Scatter(\n",
    "    x=overpredicted['x'],\n",
    "    y=overpredicted['z'],\n",
    "    opacity=1,\n",
    "    mode='lines',\n",
    "    name='Mean overpredicted profile (n={})'.format(overpredicted['n']),\n",
    "    line=dict(\n",
    "        color=('rgb(205, 0, 0)'),\n",
    "        width=2)\n",
    ")\n",
    "\n",
    "trace_overpredited_std_top = go.Scatter(\n",
    "    x=overpredicted['x'],\n",
    "    y=np.add(overpredicted['z'], overpredicted['std']),\n",
    "    opacity=1,\n",
    "    hoverinfo='none',\n",
    "    showlegend=False,\n",
    "    mode='lines',\n",
    "    line=dict(\n",
    "        color=('rgb(205, 0, 0)'),\n",
    "        width=0.5,\n",
    "        dash='dash')\n",
    ")\n",
    "\n",
    "trace_overpredited_std_btm = go.Scatter(\n",
    "    x=overpredicted['x'],\n",
    "    y=np.subtract(overpredicted['z'], overpredicted['std']),\n",
    "    opacity=1,\n",
    "    hoverinfo='none',\n",
    "    mode='lines',\n",
    "    showlegend=False,\n",
    "    line=dict(\n",
    "        color=('rgb(205, 0, 0)'),\n",
    "        width=0.5,\n",
    "        dash='dash')\n",
    ")\n",
    "\n",
    "trace_correct_mean = go.Scatter(\n",
    "    x=avg_correct_x,\n",
    "    y=avg_correct_z,\n",
    "    opacity=1,\n",
    "    mode='lines',\n",
    "    name='Mean correct profile (n={})'.format(correct['n']),\n",
    "    line=dict(\n",
    "        color=('rgb(0, 205, 0)'),\n",
    "        width=2)\n",
    ")\n",
    "\n",
    "trace_correct_std_top = go.Scatter(\n",
    "    x=avg_correct_x,\n",
    "    y=np.add(avg_correct_z, avg_correct_std),\n",
    "    opacity=1,\n",
    "    hoverinfo='none',\n",
    "    showlegend=False,\n",
    "    mode='lines',\n",
    "    line=dict(\n",
    "        color=('rgb(0, 205, 0)'),\n",
    "        width=0.5,\n",
    "        dash='dash')\n",
    ")\n",
    "\n",
    "trace_correct_std_btm = go.Scatter(\n",
    "    x=avg_correct_x,\n",
    "    y=np.subtract(avg_correct_z, avg_correct_std),\n",
    "    opacity=1,\n",
    "    hoverinfo='none',\n",
    "    mode='lines',\n",
    "    showlegend=False,\n",
    "    line=dict(\n",
    "        color=('rgb(0, 205, 0)'),\n",
    "        width=0.5,\n",
    "        dash='dash')\n",
    ")\n",
    "\n",
    "layout = dict(showlegend=True,\n",
    "              title='Observed Swash Impact Regime',\n",
    "              legend=dict(x=.6, y=1),\n",
    "              xaxis=dict(\n",
    "                  range=[-150, 0]),\n",
    "              yaxis=dict(\n",
    "                  range=[0, 10]))\n",
    "\n",
    "fig = go.FigureWidget(data=[trace_overpredicted_mean,\n",
    "                            trace_overpredited_std_top,\n",
    "                            trace_overpredited_std_btm,\n",
    "                            trace_correct_mean,\n",
    "                            trace_correct_std_top,\n",
    "                            trace_correct_std_btm],\n",
    "                      layout=layout)\n",
    "\n",
    "# To output to file\n",
    "img_bytes = pio.write_image(\n",
    "    fig, 'mean_profiles_swash.png', format='png', width=600, height=600, scale=5)\n",
    "\n",
    "fig"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can see that the difference is pretty minimal. For cases where we predicted collision, but observed swash (overprediction), we see that overpredicted profiles are slightly more concave than correctly predicted sites."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T03:11:20.824874Z",
     "start_time": "2018-12-10T03:08:27.623Z"
    }
   },
   "outputs": [],
   "source": [
    "underpredicted = get_avg_profile(collision_underpredicted_site_ids)\n",
    "correct = get_avg_profile(collision_correct_site_ids)\n",
    "\n",
    "# Add mean profile\n",
    "trace_underpredicted_mean = go.Scatter(\n",
    "    x = underpredicted['x'],\n",
    "    y= underpredicted['z'],\n",
    "    opacity = 1,\n",
    "    mode='lines',\n",
    "    name='Mean underpredicted profile (n={})'.format(underpredicted['n']),\n",
    "    line = dict(\n",
    "        color = ('rgb(205, 0, 0)'),\n",
    "        width = 2)\n",
    ")\n",
    "\n",
    "trace_underpredicted_std_top = go.Scatter(\n",
    "    x = underpredicted['x'],\n",
    "    y= np.add(underpredicted['z'],underpredicted['std']),\n",
    "    opacity = 1,\n",
    "    hoverinfo='none',\n",
    "    showlegend=False,\n",
    "    mode='lines',\n",
    "    line = dict(\n",
    "        color = ('rgb(205, 0, 0)'),\n",
    "        width = 0.5,\n",
    "        dash = 'dash')\n",
    ") \n",
    "\n",
    "trace_underpredicted_std_btm = go.Scatter(\n",
    "    x = underpredicted['x'],\n",
    "    y= np.subtract(underpredicted['z'],underpredicted['std']),\n",
    "    opacity = 1,\n",
    "    hoverinfo='none',\n",
    "    mode='lines',\n",
    "    showlegend=False,\n",
    "    line = dict(\n",
    "        color = ('rgb(205, 0, 0)'),\n",
    "        width = 0.5,\n",
    "        dash = 'dash')\n",
    ") \n",
    "\n",
    "trace_correct_mean = go.Scatter(\n",
    "    x = avg_correct_x,\n",
    "    y= avg_correct_z,\n",
    "    opacity = 1,\n",
    "    mode='lines',\n",
    "    name='Mean correct profile (n={})'.format(correct['n']),\n",
    "    line = dict(\n",
    "        color = ('rgb(0, 205, 0)'),\n",
    "        width = 2)\n",
    ")\n",
    "\n",
    "trace_correct_std_top = go.Scatter(\n",
    "    x = avg_correct_x,\n",
    "    y= np.add(avg_correct_z, avg_correct_std),\n",
    "    opacity = 1,\n",
    "    hoverinfo='none',\n",
    "    showlegend=False,\n",
    "    mode='lines',\n",
    "    line = dict(\n",
    "        color = ('rgb(0, 205, 0)'),\n",
    "        width = 0.5,\n",
    "        dash = 'dash')\n",
    ") \n",
    "\n",
    "trace_correct_std_btm = go.Scatter(\n",
    "    x = avg_correct_x,\n",
    "    y= np.subtract(avg_correct_z, avg_correct_std),\n",
    "    opacity = 1,\n",
    "    hoverinfo='none',\n",
    "    mode='lines',\n",
    "    showlegend=False,\n",
    "    line = dict(\n",
    "        color = ('rgb(0, 205, 0)'),\n",
    "        width = 0.5,\n",
    "        dash = 'dash')\n",
    ") \n",
    "    \n",
    "layout = dict(showlegend=True,\n",
    "             title='Observed Collision Impact Regime',\n",
    "             legend=dict(x=.6, y=1),\n",
    "             xaxis=dict(\n",
    "             range=[-150,0]),\n",
    "             yaxis=dict(\n",
    "             range=[0,10]))\n",
    "    \n",
    "fig=go.FigureWidget(data=[trace_underpredicted_mean, \n",
    "                          trace_underpredicted_std_top,\n",
    "                          trace_underpredicted_std_btm, \n",
    "                          trace_correct_mean, \n",
    "                          trace_correct_std_top, \n",
    "                          trace_correct_std_btm], \n",
    "                    layout=layout)\n",
    "\n",
    "# To output to file\n",
    "img_bytes = pio.write_image(fig, 'mean_profiles_collision.png',format='png', width=600, height=600, scale=5)\n",
    "\n",
    "fig\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This plot is a bit more interesting. It shows that we are correctly forecasting collision when the profile is more accreted/convex, but when the profile is more eroded/concave, the water level is underpredicted. Why is this? "
   ]
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "232.391px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}