diff --git a/notebooks/01_exploration.ipynb b/notebooks/01_exploration.ipynb
index 07a9649..deb0348 100644
--- a/notebooks/01_exploration.ipynb
+++ b/notebooks/01_exploration.ipynb
@@ -1001,9 +1001,1012 @@
     "    g_profiles\n",
     "])"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Scatter plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-07T05:08:12.117885Z",
+     "start_time": "2018-12-07T05:08:12.078780Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>prestorm_swash_vol</th>\n",
+       "      <th>poststorm_swash_vol</th>\n",
+       "      <th>swash_vol_change</th>\n",
+       "      <th>swash_pct_change</th>\n",
+       "      <th>prestorm_dune_face_vol</th>\n",
+       "      <th>poststorm_dune_face_vol</th>\n",
+       "      <th>dune_face_vol_change</th>\n",
+       "      <th>dune_face_pct_change</th>\n",
+       "      <th>storm_regime</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>site_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>AVOCAn0001</th>\n",
+       "      <td>113.9091</td>\n",
+       "      <td>77.9830</td>\n",
+       "      <td>35.6103</td>\n",
+       "      <td>31.2620</td>\n",
+       "      <td>165.4760</td>\n",
+       "      <td>166.7296</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>swash</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAn0002</th>\n",
+       "      <td>106.8959</td>\n",
+       "      <td>67.0913</td>\n",
+       "      <td>39.6373</td>\n",
+       "      <td>37.0803</td>\n",
+       "      <td>256.4137</td>\n",
+       "      <td>258.9174</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>swash</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAn0003</th>\n",
+       "      <td>99.0484</td>\n",
+       "      <td>53.6563</td>\n",
+       "      <td>45.1621</td>\n",
+       "      <td>45.5960</td>\n",
+       "      <td>372.7031</td>\n",
+       "      <td>373.9198</td>\n",
+       "      <td>-0.3147</td>\n",
+       "      <td>-0.0844</td>\n",
+       "      <td>swash</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAn0004</th>\n",
+       "      <td>74.7543</td>\n",
+       "      <td>29.3280</td>\n",
+       "      <td>45.4262</td>\n",
+       "      <td>60.7674</td>\n",
+       "      <td>275.1689</td>\n",
+       "      <td>276.0476</td>\n",
+       "      <td>0.4104</td>\n",
+       "      <td>0.1492</td>\n",
+       "      <td>swash</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAn0005</th>\n",
+       "      <td>70.5968</td>\n",
+       "      <td>24.1071</td>\n",
+       "      <td>46.4897</td>\n",
+       "      <td>65.8524</td>\n",
+       "      <td>268.5194</td>\n",
+       "      <td>263.4262</td>\n",
+       "      <td>7.4196</td>\n",
+       "      <td>2.7631</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAn0006</th>\n",
+       "      <td>68.7583</td>\n",
+       "      <td>23.9665</td>\n",
+       "      <td>44.7918</td>\n",
+       "      <td>65.1438</td>\n",
+       "      <td>202.6770</td>\n",
+       "      <td>198.2397</td>\n",
+       "      <td>4.7944</td>\n",
+       "      <td>2.3655</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAn0007</th>\n",
+       "      <td>75.7895</td>\n",
+       "      <td>27.2715</td>\n",
+       "      <td>48.5180</td>\n",
+       "      <td>64.0168</td>\n",
+       "      <td>149.8479</td>\n",
+       "      <td>143.1312</td>\n",
+       "      <td>7.1323</td>\n",
+       "      <td>4.7597</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAn0008</th>\n",
+       "      <td>93.3107</td>\n",
+       "      <td>42.4968</td>\n",
+       "      <td>50.8139</td>\n",
+       "      <td>54.4567</td>\n",
+       "      <td>187.9201</td>\n",
+       "      <td>187.3459</td>\n",
+       "      <td>2.8297</td>\n",
+       "      <td>1.5058</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAn0009</th>\n",
+       "      <td>3.6955</td>\n",
+       "      <td>0.1038</td>\n",
+       "      <td>3.5917</td>\n",
+       "      <td>97.1908</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAs0001</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAs0002</th>\n",
+       "      <td>97.3864</td>\n",
+       "      <td>26.6619</td>\n",
+       "      <td>70.7246</td>\n",
+       "      <td>72.6226</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAs0003</th>\n",
+       "      <td>70.7401</td>\n",
+       "      <td>40.0608</td>\n",
+       "      <td>30.7919</td>\n",
+       "      <td>43.5282</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAs0004</th>\n",
+       "      <td>97.8389</td>\n",
+       "      <td>45.4845</td>\n",
+       "      <td>52.2157</td>\n",
+       "      <td>53.3691</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAs0005</th>\n",
+       "      <td>95.1711</td>\n",
+       "      <td>54.9722</td>\n",
+       "      <td>40.1706</td>\n",
+       "      <td>42.2088</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAs0006</th>\n",
+       "      <td>112.5818</td>\n",
+       "      <td>67.8718</td>\n",
+       "      <td>44.8252</td>\n",
+       "      <td>39.8157</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAs0007</th>\n",
+       "      <td>65.3531</td>\n",
+       "      <td>44.1537</td>\n",
+       "      <td>21.5228</td>\n",
+       "      <td>32.9331</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVOCAs0008</th>\n",
+       "      <td>52.3940</td>\n",
+       "      <td>44.9152</td>\n",
+       "      <td>7.4803</td>\n",
+       "      <td>14.2770</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BILG0001</th>\n",
+       "      <td>19.4177</td>\n",
+       "      <td>7.5746</td>\n",
+       "      <td>11.8431</td>\n",
+       "      <td>60.9913</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BILG0002</th>\n",
+       "      <td>155.6998</td>\n",
+       "      <td>98.1693</td>\n",
+       "      <td>57.4340</td>\n",
+       "      <td>36.8876</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BILG0003</th>\n",
+       "      <td>83.5219</td>\n",
+       "      <td>52.8059</td>\n",
+       "      <td>30.5537</td>\n",
+       "      <td>36.5817</td>\n",
+       "      <td>41.1469</td>\n",
+       "      <td>40.6081</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>swash</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BILG0004</th>\n",
+       "      <td>158.6283</td>\n",
+       "      <td>116.6189</td>\n",
+       "      <td>42.1178</td>\n",
+       "      <td>26.5512</td>\n",
+       "      <td>11.2211</td>\n",
+       "      <td>11.0892</td>\n",
+       "      <td>-0.0132</td>\n",
+       "      <td>-0.1179</td>\n",
+       "      <td>swash</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BILG0005</th>\n",
+       "      <td>212.8478</td>\n",
+       "      <td>164.0044</td>\n",
+       "      <td>48.4312</td>\n",
+       "      <td>22.7539</td>\n",
+       "      <td>613.6156</td>\n",
+       "      <td>606.2766</td>\n",
+       "      <td>5.7738</td>\n",
+       "      <td>0.9410</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BLUEYS0001</th>\n",
+       "      <td>65.4628</td>\n",
+       "      <td>19.2938</td>\n",
+       "      <td>46.1690</td>\n",
+       "      <td>70.5270</td>\n",
+       "      <td>130.7447</td>\n",
+       "      <td>120.5446</td>\n",
+       "      <td>9.5601</td>\n",
+       "      <td>7.3121</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BLUEYS0002</th>\n",
+       "      <td>50.2084</td>\n",
+       "      <td>10.3009</td>\n",
+       "      <td>39.9074</td>\n",
+       "      <td>79.4836</td>\n",
+       "      <td>512.0154</td>\n",
+       "      <td>477.1774</td>\n",
+       "      <td>33.2825</td>\n",
+       "      <td>6.5003</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BLUEYS0003</th>\n",
+       "      <td>50.6308</td>\n",
+       "      <td>11.1682</td>\n",
+       "      <td>39.4625</td>\n",
+       "      <td>77.9418</td>\n",
+       "      <td>443.0853</td>\n",
+       "      <td>414.3901</td>\n",
+       "      <td>24.8870</td>\n",
+       "      <td>5.6167</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BLUEYS0004</th>\n",
+       "      <td>95.1608</td>\n",
+       "      <td>31.3330</td>\n",
+       "      <td>63.8279</td>\n",
+       "      <td>67.0737</td>\n",
+       "      <td>287.5805</td>\n",
+       "      <td>272.4267</td>\n",
+       "      <td>12.9641</td>\n",
+       "      <td>4.5080</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BLUEYS0005</th>\n",
+       "      <td>141.0643</td>\n",
+       "      <td>58.2545</td>\n",
+       "      <td>82.8098</td>\n",
+       "      <td>58.7036</td>\n",
+       "      <td>539.3864</td>\n",
+       "      <td>520.0732</td>\n",
+       "      <td>12.0470</td>\n",
+       "      <td>2.2335</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BLUEYS0006</th>\n",
+       "      <td>88.4207</td>\n",
+       "      <td>51.6205</td>\n",
+       "      <td>36.2553</td>\n",
+       "      <td>41.0032</td>\n",
+       "      <td>271.6036</td>\n",
+       "      <td>267.1954</td>\n",
+       "      <td>3.6045</td>\n",
+       "      <td>1.3271</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BOAT0001</th>\n",
+       "      <td>23.8510</td>\n",
+       "      <td>23.5660</td>\n",
+       "      <td>-0.0264</td>\n",
+       "      <td>-0.1108</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BOAT0002</th>\n",
+       "      <td>37.6524</td>\n",
+       "      <td>14.0209</td>\n",
+       "      <td>23.6316</td>\n",
+       "      <td>62.7624</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>TREACH0014</th>\n",
+       "      <td>97.5323</td>\n",
+       "      <td>46.2994</td>\n",
+       "      <td>51.0816</td>\n",
+       "      <td>52.3740</td>\n",
+       "      <td>508.7400</td>\n",
+       "      <td>505.7877</td>\n",
+       "      <td>0.4254</td>\n",
+       "      <td>0.0836</td>\n",
+       "      <td>swash</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>TREACH0015</th>\n",
+       "      <td>96.8327</td>\n",
+       "      <td>45.1962</td>\n",
+       "      <td>51.6364</td>\n",
+       "      <td>53.3254</td>\n",
+       "      <td>690.8275</td>\n",
+       "      <td>683.4458</td>\n",
+       "      <td>1.5086</td>\n",
+       "      <td>0.2184</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>TREACH0016</th>\n",
+       "      <td>106.9083</td>\n",
+       "      <td>66.0567</td>\n",
+       "      <td>40.3629</td>\n",
+       "      <td>37.7547</td>\n",
+       "      <td>508.0014</td>\n",
+       "      <td>499.6315</td>\n",
+       "      <td>0.3386</td>\n",
+       "      <td>0.0667</td>\n",
+       "      <td>swash</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0001</th>\n",
+       "      <td>132.3413</td>\n",
+       "      <td>143.4459</td>\n",
+       "      <td>-9.7255</td>\n",
+       "      <td>-7.3488</td>\n",
+       "      <td>665.9898</td>\n",
+       "      <td>667.5923</td>\n",
+       "      <td>0.0410</td>\n",
+       "      <td>0.0062</td>\n",
+       "      <td>swash</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0002</th>\n",
+       "      <td>151.1833</td>\n",
+       "      <td>126.9844</td>\n",
+       "      <td>23.9548</td>\n",
+       "      <td>15.8449</td>\n",
+       "      <td>385.8467</td>\n",
+       "      <td>386.7284</td>\n",
+       "      <td>-0.0449</td>\n",
+       "      <td>-0.0116</td>\n",
+       "      <td>swash</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0003</th>\n",
+       "      <td>154.1788</td>\n",
+       "      <td>117.9441</td>\n",
+       "      <td>36.2425</td>\n",
+       "      <td>23.5068</td>\n",
+       "      <td>694.2226</td>\n",
+       "      <td>700.5105</td>\n",
+       "      <td>-4.2136</td>\n",
+       "      <td>-0.6070</td>\n",
+       "      <td>swash</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0004</th>\n",
+       "      <td>137.8449</td>\n",
+       "      <td>76.6007</td>\n",
+       "      <td>61.2725</td>\n",
+       "      <td>44.4503</td>\n",
+       "      <td>559.5485</td>\n",
+       "      <td>569.8591</td>\n",
+       "      <td>-4.4590</td>\n",
+       "      <td>-0.7969</td>\n",
+       "      <td>swash</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0005</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0006</th>\n",
+       "      <td>205.8453</td>\n",
+       "      <td>186.0784</td>\n",
+       "      <td>22.5892</td>\n",
+       "      <td>10.9739</td>\n",
+       "      <td>55.0898</td>\n",
+       "      <td>55.8919</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>swash</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0007</th>\n",
+       "      <td>80.4674</td>\n",
+       "      <td>35.4614</td>\n",
+       "      <td>45.0059</td>\n",
+       "      <td>55.9307</td>\n",
+       "      <td>178.1005</td>\n",
+       "      <td>178.5439</td>\n",
+       "      <td>0.4727</td>\n",
+       "      <td>0.2654</td>\n",
+       "      <td>swash</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0008</th>\n",
+       "      <td>88.4574</td>\n",
+       "      <td>40.3200</td>\n",
+       "      <td>48.1374</td>\n",
+       "      <td>54.4187</td>\n",
+       "      <td>258.7513</td>\n",
+       "      <td>258.3849</td>\n",
+       "      <td>-1.2073</td>\n",
+       "      <td>-0.4666</td>\n",
+       "      <td>swash</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0009</th>\n",
+       "      <td>70.9159</td>\n",
+       "      <td>26.1742</td>\n",
+       "      <td>44.7418</td>\n",
+       "      <td>63.0913</td>\n",
+       "      <td>267.3725</td>\n",
+       "      <td>258.3720</td>\n",
+       "      <td>9.8041</td>\n",
+       "      <td>3.6668</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0010</th>\n",
+       "      <td>58.6604</td>\n",
+       "      <td>18.0418</td>\n",
+       "      <td>40.6186</td>\n",
+       "      <td>69.2437</td>\n",
+       "      <td>187.5259</td>\n",
+       "      <td>161.9748</td>\n",
+       "      <td>25.3087</td>\n",
+       "      <td>13.4961</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0011</th>\n",
+       "      <td>59.2415</td>\n",
+       "      <td>16.3165</td>\n",
+       "      <td>42.9250</td>\n",
+       "      <td>72.4577</td>\n",
+       "      <td>197.0129</td>\n",
+       "      <td>175.2512</td>\n",
+       "      <td>21.9882</td>\n",
+       "      <td>11.1608</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0012</th>\n",
+       "      <td>74.4189</td>\n",
+       "      <td>23.0232</td>\n",
+       "      <td>51.3957</td>\n",
+       "      <td>69.0627</td>\n",
+       "      <td>178.4783</td>\n",
+       "      <td>168.3475</td>\n",
+       "      <td>10.0386</td>\n",
+       "      <td>5.6246</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0013</th>\n",
+       "      <td>70.4964</td>\n",
+       "      <td>22.7546</td>\n",
+       "      <td>47.7419</td>\n",
+       "      <td>67.7224</td>\n",
+       "      <td>231.1513</td>\n",
+       "      <td>195.2581</td>\n",
+       "      <td>35.8072</td>\n",
+       "      <td>15.4908</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0014</th>\n",
+       "      <td>68.0896</td>\n",
+       "      <td>24.1853</td>\n",
+       "      <td>43.9043</td>\n",
+       "      <td>64.4802</td>\n",
+       "      <td>82.4268</td>\n",
+       "      <td>61.2601</td>\n",
+       "      <td>21.1718</td>\n",
+       "      <td>25.6856</td>\n",
+       "      <td>collision</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0015</th>\n",
+       "      <td>55.0780</td>\n",
+       "      <td>16.0119</td>\n",
+       "      <td>39.0660</td>\n",
+       "      <td>70.9286</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0016</th>\n",
+       "      <td>96.7687</td>\n",
+       "      <td>39.8224</td>\n",
+       "      <td>56.9463</td>\n",
+       "      <td>58.8479</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0017</th>\n",
+       "      <td>35.2987</td>\n",
+       "      <td>8.5140</td>\n",
+       "      <td>26.7847</td>\n",
+       "      <td>75.8801</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0018</th>\n",
+       "      <td>40.9407</td>\n",
+       "      <td>10.5147</td>\n",
+       "      <td>30.4260</td>\n",
+       "      <td>74.3173</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0019</th>\n",
+       "      <td>38.2838</td>\n",
+       "      <td>9.2156</td>\n",
+       "      <td>29.0682</td>\n",
+       "      <td>75.9282</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0020</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0021</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0022</th>\n",
+       "      <td>0.5516</td>\n",
+       "      <td>0.2840</td>\n",
+       "      <td>0.2675</td>\n",
+       "      <td>48.5063</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0023</th>\n",
+       "      <td>3.3761</td>\n",
+       "      <td>0.3020</td>\n",
+       "      <td>3.0741</td>\n",
+       "      <td>91.0554</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0024</th>\n",
+       "      <td>60.8648</td>\n",
+       "      <td>31.2794</td>\n",
+       "      <td>29.5854</td>\n",
+       "      <td>48.6084</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0025</th>\n",
+       "      <td>45.1055</td>\n",
+       "      <td>14.6028</td>\n",
+       "      <td>30.5028</td>\n",
+       "      <td>67.6253</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0026</th>\n",
+       "      <td>32.1502</td>\n",
+       "      <td>12.9335</td>\n",
+       "      <td>19.2167</td>\n",
+       "      <td>59.7716</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WAMBE0027</th>\n",
+       "      <td>26.2310</td>\n",
+       "      <td>18.6828</td>\n",
+       "      <td>7.5482</td>\n",
+       "      <td>28.7759</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>1768 rows × 9 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            prestorm_swash_vol  poststorm_swash_vol  swash_vol_change  \\\n",
+       "site_id                                                                 \n",
+       "AVOCAn0001            113.9091              77.9830           35.6103   \n",
+       "AVOCAn0002            106.8959              67.0913           39.6373   \n",
+       "AVOCAn0003             99.0484              53.6563           45.1621   \n",
+       "AVOCAn0004             74.7543              29.3280           45.4262   \n",
+       "AVOCAn0005             70.5968              24.1071           46.4897   \n",
+       "AVOCAn0006             68.7583              23.9665           44.7918   \n",
+       "AVOCAn0007             75.7895              27.2715           48.5180   \n",
+       "AVOCAn0008             93.3107              42.4968           50.8139   \n",
+       "AVOCAn0009              3.6955               0.1038            3.5917   \n",
+       "AVOCAs0001                 NaN                  NaN               NaN   \n",
+       "AVOCAs0002             97.3864              26.6619           70.7246   \n",
+       "AVOCAs0003             70.7401              40.0608           30.7919   \n",
+       "AVOCAs0004             97.8389              45.4845           52.2157   \n",
+       "AVOCAs0005             95.1711              54.9722           40.1706   \n",
+       "AVOCAs0006            112.5818              67.8718           44.8252   \n",
+       "AVOCAs0007             65.3531              44.1537           21.5228   \n",
+       "AVOCAs0008             52.3940              44.9152            7.4803   \n",
+       "BILG0001               19.4177               7.5746           11.8431   \n",
+       "BILG0002              155.6998              98.1693           57.4340   \n",
+       "BILG0003               83.5219              52.8059           30.5537   \n",
+       "BILG0004              158.6283             116.6189           42.1178   \n",
+       "BILG0005              212.8478             164.0044           48.4312   \n",
+       "BLUEYS0001             65.4628              19.2938           46.1690   \n",
+       "BLUEYS0002             50.2084              10.3009           39.9074   \n",
+       "BLUEYS0003             50.6308              11.1682           39.4625   \n",
+       "BLUEYS0004             95.1608              31.3330           63.8279   \n",
+       "BLUEYS0005            141.0643              58.2545           82.8098   \n",
+       "BLUEYS0006             88.4207              51.6205           36.2553   \n",
+       "BOAT0001               23.8510              23.5660           -0.0264   \n",
+       "BOAT0002               37.6524              14.0209           23.6316   \n",
+       "...                        ...                  ...               ...   \n",
+       "TREACH0014             97.5323              46.2994           51.0816   \n",
+       "TREACH0015             96.8327              45.1962           51.6364   \n",
+       "TREACH0016            106.9083              66.0567           40.3629   \n",
+       "WAMBE0001             132.3413             143.4459           -9.7255   \n",
+       "WAMBE0002             151.1833             126.9844           23.9548   \n",
+       "WAMBE0003             154.1788             117.9441           36.2425   \n",
+       "WAMBE0004             137.8449              76.6007           61.2725   \n",
+       "WAMBE0005                  NaN                  NaN               NaN   \n",
+       "WAMBE0006             205.8453             186.0784           22.5892   \n",
+       "WAMBE0007              80.4674              35.4614           45.0059   \n",
+       "WAMBE0008              88.4574              40.3200           48.1374   \n",
+       "WAMBE0009              70.9159              26.1742           44.7418   \n",
+       "WAMBE0010              58.6604              18.0418           40.6186   \n",
+       "WAMBE0011              59.2415              16.3165           42.9250   \n",
+       "WAMBE0012              74.4189              23.0232           51.3957   \n",
+       "WAMBE0013              70.4964              22.7546           47.7419   \n",
+       "WAMBE0014              68.0896              24.1853           43.9043   \n",
+       "WAMBE0015              55.0780              16.0119           39.0660   \n",
+       "WAMBE0016              96.7687              39.8224           56.9463   \n",
+       "WAMBE0017              35.2987               8.5140           26.7847   \n",
+       "WAMBE0018              40.9407              10.5147           30.4260   \n",
+       "WAMBE0019              38.2838               9.2156           29.0682   \n",
+       "WAMBE0020                  NaN                  NaN               NaN   \n",
+       "WAMBE0021                  NaN                  NaN               NaN   \n",
+       "WAMBE0022               0.5516               0.2840            0.2675   \n",
+       "WAMBE0023               3.3761               0.3020            3.0741   \n",
+       "WAMBE0024              60.8648              31.2794           29.5854   \n",
+       "WAMBE0025              45.1055              14.6028           30.5028   \n",
+       "WAMBE0026              32.1502              12.9335           19.2167   \n",
+       "WAMBE0027              26.2310              18.6828            7.5482   \n",
+       "\n",
+       "            swash_pct_change  prestorm_dune_face_vol  poststorm_dune_face_vol  \\\n",
+       "site_id                                                                         \n",
+       "AVOCAn0001           31.2620                165.4760                 166.7296   \n",
+       "AVOCAn0002           37.0803                256.4137                 258.9174   \n",
+       "AVOCAn0003           45.5960                372.7031                 373.9198   \n",
+       "AVOCAn0004           60.7674                275.1689                 276.0476   \n",
+       "AVOCAn0005           65.8524                268.5194                 263.4262   \n",
+       "AVOCAn0006           65.1438                202.6770                 198.2397   \n",
+       "AVOCAn0007           64.0168                149.8479                 143.1312   \n",
+       "AVOCAn0008           54.4567                187.9201                 187.3459   \n",
+       "AVOCAn0009           97.1908                     NaN                      NaN   \n",
+       "AVOCAs0001               NaN                     NaN                      NaN   \n",
+       "AVOCAs0002           72.6226                     NaN                      NaN   \n",
+       "AVOCAs0003           43.5282                     NaN                      NaN   \n",
+       "AVOCAs0004           53.3691                     NaN                      NaN   \n",
+       "AVOCAs0005           42.2088                     NaN                      NaN   \n",
+       "AVOCAs0006           39.8157                     NaN                      NaN   \n",
+       "AVOCAs0007           32.9331                     NaN                      NaN   \n",
+       "AVOCAs0008           14.2770                     NaN                      NaN   \n",
+       "BILG0001             60.9913                     NaN                      NaN   \n",
+       "BILG0002             36.8876                     NaN                      NaN   \n",
+       "BILG0003             36.5817                 41.1469                  40.6081   \n",
+       "BILG0004             26.5512                 11.2211                  11.0892   \n",
+       "BILG0005             22.7539                613.6156                 606.2766   \n",
+       "BLUEYS0001           70.5270                130.7447                 120.5446   \n",
+       "BLUEYS0002           79.4836                512.0154                 477.1774   \n",
+       "BLUEYS0003           77.9418                443.0853                 414.3901   \n",
+       "BLUEYS0004           67.0737                287.5805                 272.4267   \n",
+       "BLUEYS0005           58.7036                539.3864                 520.0732   \n",
+       "BLUEYS0006           41.0032                271.6036                 267.1954   \n",
+       "BOAT0001             -0.1108                     NaN                      NaN   \n",
+       "BOAT0002             62.7624                     NaN                      NaN   \n",
+       "...                      ...                     ...                      ...   \n",
+       "TREACH0014           52.3740                508.7400                 505.7877   \n",
+       "TREACH0015           53.3254                690.8275                 683.4458   \n",
+       "TREACH0016           37.7547                508.0014                 499.6315   \n",
+       "WAMBE0001            -7.3488                665.9898                 667.5923   \n",
+       "WAMBE0002            15.8449                385.8467                 386.7284   \n",
+       "WAMBE0003            23.5068                694.2226                 700.5105   \n",
+       "WAMBE0004            44.4503                559.5485                 569.8591   \n",
+       "WAMBE0005                NaN                     NaN                      NaN   \n",
+       "WAMBE0006            10.9739                 55.0898                  55.8919   \n",
+       "WAMBE0007            55.9307                178.1005                 178.5439   \n",
+       "WAMBE0008            54.4187                258.7513                 258.3849   \n",
+       "WAMBE0009            63.0913                267.3725                 258.3720   \n",
+       "WAMBE0010            69.2437                187.5259                 161.9748   \n",
+       "WAMBE0011            72.4577                197.0129                 175.2512   \n",
+       "WAMBE0012            69.0627                178.4783                 168.3475   \n",
+       "WAMBE0013            67.7224                231.1513                 195.2581   \n",
+       "WAMBE0014            64.4802                 82.4268                  61.2601   \n",
+       "WAMBE0015            70.9286                     NaN                      NaN   \n",
+       "WAMBE0016            58.8479                     NaN                      NaN   \n",
+       "WAMBE0017            75.8801                     NaN                      NaN   \n",
+       "WAMBE0018            74.3173                     NaN                      NaN   \n",
+       "WAMBE0019            75.9282                     NaN                      NaN   \n",
+       "WAMBE0020                NaN                     NaN                      NaN   \n",
+       "WAMBE0021                NaN                     NaN                      NaN   \n",
+       "WAMBE0022            48.5063                     NaN                      NaN   \n",
+       "WAMBE0023            91.0554                     NaN                      NaN   \n",
+       "WAMBE0024            48.6084                     NaN                      NaN   \n",
+       "WAMBE0025            67.6253                     NaN                      NaN   \n",
+       "WAMBE0026            59.7716                     NaN                      NaN   \n",
+       "WAMBE0027            28.7759                     NaN                      NaN   \n",
+       "\n",
+       "            dune_face_vol_change  dune_face_pct_change storm_regime  \n",
+       "site_id                                                              \n",
+       "AVOCAn0001                0.0000                0.0000        swash  \n",
+       "AVOCAn0002                0.0000                0.0000        swash  \n",
+       "AVOCAn0003               -0.3147               -0.0844        swash  \n",
+       "AVOCAn0004                0.4104                0.1492        swash  \n",
+       "AVOCAn0005                7.4196                2.7631    collision  \n",
+       "AVOCAn0006                4.7944                2.3655    collision  \n",
+       "AVOCAn0007                7.1323                4.7597    collision  \n",
+       "AVOCAn0008                2.8297                1.5058    collision  \n",
+       "AVOCAn0009                   NaN                   NaN          NaN  \n",
+       "AVOCAs0001                   NaN                   NaN          NaN  \n",
+       "AVOCAs0002                   NaN                   NaN          NaN  \n",
+       "AVOCAs0003                   NaN                   NaN          NaN  \n",
+       "AVOCAs0004                   NaN                   NaN          NaN  \n",
+       "AVOCAs0005                   NaN                   NaN          NaN  \n",
+       "AVOCAs0006                   NaN                   NaN          NaN  \n",
+       "AVOCAs0007                   NaN                   NaN          NaN  \n",
+       "AVOCAs0008                   NaN                   NaN          NaN  \n",
+       "BILG0001                     NaN                   NaN          NaN  \n",
+       "BILG0002                     NaN                   NaN          NaN  \n",
+       "BILG0003                  0.0000                0.0000        swash  \n",
+       "BILG0004                 -0.0132               -0.1179        swash  \n",
+       "BILG0005                  5.7738                0.9410    collision  \n",
+       "BLUEYS0001                9.5601                7.3121    collision  \n",
+       "BLUEYS0002               33.2825                6.5003    collision  \n",
+       "BLUEYS0003               24.8870                5.6167    collision  \n",
+       "BLUEYS0004               12.9641                4.5080    collision  \n",
+       "BLUEYS0005               12.0470                2.2335    collision  \n",
+       "BLUEYS0006                3.6045                1.3271    collision  \n",
+       "BOAT0001                     NaN                   NaN          NaN  \n",
+       "BOAT0002                     NaN                   NaN          NaN  \n",
+       "...                          ...                   ...          ...  \n",
+       "TREACH0014                0.4254                0.0836        swash  \n",
+       "TREACH0015                1.5086                0.2184          NaN  \n",
+       "TREACH0016                0.3386                0.0667        swash  \n",
+       "WAMBE0001                 0.0410                0.0062        swash  \n",
+       "WAMBE0002                -0.0449               -0.0116        swash  \n",
+       "WAMBE0003                -4.2136               -0.6070        swash  \n",
+       "WAMBE0004                -4.4590               -0.7969        swash  \n",
+       "WAMBE0005                    NaN                   NaN          NaN  \n",
+       "WAMBE0006                 0.0000                0.0000        swash  \n",
+       "WAMBE0007                 0.4727                0.2654        swash  \n",
+       "WAMBE0008                -1.2073               -0.4666        swash  \n",
+       "WAMBE0009                 9.8041                3.6668    collision  \n",
+       "WAMBE0010                25.3087               13.4961    collision  \n",
+       "WAMBE0011                21.9882               11.1608    collision  \n",
+       "WAMBE0012                10.0386                5.6246    collision  \n",
+       "WAMBE0013                35.8072               15.4908    collision  \n",
+       "WAMBE0014                21.1718               25.6856    collision  \n",
+       "WAMBE0015                    NaN                   NaN          NaN  \n",
+       "WAMBE0016                    NaN                   NaN          NaN  \n",
+       "WAMBE0017                    NaN                   NaN          NaN  \n",
+       "WAMBE0018                    NaN                   NaN          NaN  \n",
+       "WAMBE0019                    NaN                   NaN          NaN  \n",
+       "WAMBE0020                    NaN                   NaN          NaN  \n",
+       "WAMBE0021                    NaN                   NaN          NaN  \n",
+       "WAMBE0022                    NaN                   NaN          NaN  \n",
+       "WAMBE0023                    NaN                   NaN          NaN  \n",
+       "WAMBE0024                    NaN                   NaN          NaN  \n",
+       "WAMBE0025                    NaN                   NaN          NaN  \n",
+       "WAMBE0026                    NaN                   NaN          NaN  \n",
+       "WAMBE0027                    NaN                   NaN          NaN  \n",
+       "\n",
+       "[1768 rows x 9 columns]"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": []
   }
  ],
  "metadata": {
+  "hide_input": false,
   "kernelspec": {
    "display_name": "Python 3",
    "language": "python",
@@ -1033,9 +2036,14 @@
    "title_cell": "Table of Contents",
    "title_sidebar": "Contents",
    "toc_cell": false,
-   "toc_position": {},
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "275.797px"
+   },
    "toc_section_display": true,
-   "toc_window_display": false
+   "toc_window_display": true
   },
   "varInspector": {
    "cols": {
diff --git a/notebooks/03_dune_to_vs_runup.ipynb b/notebooks/03_dune_to_vs_runup.ipynb
index b99ab01..86154e4 100644
--- a/notebooks/03_dune_to_vs_runup.ipynb
+++ b/notebooks/03_dune_to_vs_runup.ipynb
@@ -4,17 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Investigate how dune toe compares to R_high"
+    "## Setup "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "heading_collapsed": true
+   },
+   "source": [
+    "### Import packages"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-12-03T23:04:57.331037Z",
-     "start_time": "2018-12-03T23:04:57.006071Z"
-    }
+     "end_time": "2018-12-10T04:02:38.872624Z",
+     "start_time": "2018-12-10T04:02:38.448908Z"
+    },
+    "hidden": true
    },
    "outputs": [],
    "source": [
@@ -25,12 +35,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 7,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-12-03T23:04:58.749827Z",
-     "start_time": "2018-12-03T23:04:57.333943Z"
-    }
+     "end_time": "2018-12-10T04:03:27.147221Z",
+     "start_time": "2018-12-10T04:03:27.141204Z"
+    },
+    "hidden": true
    },
    "outputs": [],
    "source": [
@@ -39,18 +50,29 @@
     "import pandas as pd\n",
     "import numpy as np\n",
     "import os\n",
-    "\n",
+    "import matplotlib\n",
     "import plotly\n",
     "import plotly.graph_objs as go\n",
     "import plotly.plotly as py\n",
     "import plotly.tools as tls\n",
     "import plotly.figure_factory as ff\n",
-    "import plotly.io as pio"
+    "import plotly.io as pio\n",
+    "from plotly import tools\n",
+    "\n",
+    "from copy import copy\n",
+    "import scipy\n",
+    "from sklearn import svm\n",
+    "\n",
+    "# Disable numpy warnings\n",
+    "import warnings\n",
+    "warnings.simplefilter(action=\"ignore\", category=FutureWarning)"
    ]
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "heading_collapsed": true
+   },
    "source": [
     "### Load data\n",
     "Load data from the `./data/interim/` folder and parse into `pandas` dataframes."
@@ -58,35 +80,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 8,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-12-03T23:05:05.800496Z",
-     "start_time": "2018-12-03T23:04:58.751721Z"
-    }
+     "end_time": "2018-12-10T04:03:40.638982Z",
+     "start_time": "2018-12-10T04:03:31.765531Z"
+    },
+    "hidden": true
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Importing profiles.csv\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\z5189959\\Desktop\\nsw-2016-storm-impact\\.venv\\lib\\site-packages\\numpy\\lib\\arraysetops.py:522: FutureWarning:\n",
-      "\n",
-      "elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "Importing profiles.csv\n",
       "Importing profile_features.csv\n",
       "Importing impacts_forecasted_foreshore_slope_sto06.csv\n",
       "Importing impacts_forecasted_mean_slope_sto06.csv\n",
@@ -125,31 +132,49 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "heading_collapsed": true
+   },
    "source": [
-    "### Compare predicted R_high with D_low\n",
-    "Let's see what the distribution of R_high is compared with D_low. How far off are the predicted water levels compared with the dune toes?"
+    "## Difference between $R_{high}$ and $D_{low}$\n",
+    "Since the Storm Impact Regime is so dependant on the predicted $R_{high}$ and obsereved $D_{low}$ levels, let's investigate the difference between these two variables in more detail.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "hidden": true
+   },
+   "source": [
+    "### Gather data\n",
+    "\n",
+    "First, let's split the `site_ids` by whether we observed swash or collision and by whether we predicted swash or collision. We want to identify if there are any difference between these four groups."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 10,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-12-04T02:20:58.446500Z",
-     "start_time": "2018-12-04T02:20:58.439480Z"
-    }
+     "end_time": "2018-12-10T04:05:15.741391Z",
+     "start_time": "2018-12-10T04:05:15.734352Z"
+    },
+    "hidden": true
    },
    "outputs": [],
    "source": [
-    "def get_site_ids(df_forecasted, df_observed, forecasted_regime, observed_regime):\n",
+    "def get_site_ids(df_forecasted, df_observed, forecasted_regime,\n",
+    "                 observed_regime):\n",
     "    \"\"\"\n",
     "    Returns list of site_ids which match the given forecasted and observed regime\n",
     "    \"\"\"\n",
-    "    set1 = set(df_forecasted.query(\"storm_regime == '{}'\".format(\n",
-    "        forecasted_regime)).index.get_level_values('site_id'))\n",
-    "    set2 = set(df_observed.query(\"storm_regime == '{}'\".format(\n",
-    "        observed_regime)).index.get_level_values('site_id'))\n",
+    "    set1 = set(\n",
+    "        df_forecasted.query(\"storm_regime == '{}'\".format(forecasted_regime)).\n",
+    "        index.get_level_values('site_id'))\n",
+    "    set2 = set(\n",
+    "        df_observed.query(\"storm_regime == '{}'\".format(observed_regime)).\n",
+    "        index.get_level_values('site_id'))\n",
     "    return sorted(list(set1.intersection(set2)))\n",
     "\n",
     "\n",
@@ -166,23 +191,104 @@
     "    # Join into one dataframe\n",
     "    df_twl_toes = pd.concat([df_toes, df_R_highs], axis=1, sort=True)\n",
     "    df_twl_toes['diff'] = df_twl_toes['R_high'] - df_twl_toes['dune_toe_z']\n",
-    "    return df_twl_toes['diff']\n"
+    "    return df_twl_toes['diff']"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 15,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-12-04T03:55:51.858020Z",
-     "start_time": "2018-12-04T03:55:50.879155Z"
-    }
+     "end_time": "2018-12-10T04:13:00.035545Z",
+     "start_time": "2018-12-10T04:12:59.176352Z"
+    },
+    "hidden": true
+   },
+   "outputs": [],
+   "source": [
+    "# Identify sites where swash regime was correctly or overpredicted\n",
+    "\n",
+    "swash_overpredicted_site_ids = get_site_ids(\n",
+    "    df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
+    "    df_observed=impacts['observed'],\n",
+    "    forecasted_regime='collision',\n",
+    "    observed_regime='swash')\n",
+    "swash_overpredicted_diffs = get_R_high_D_low_diff(\n",
+    "    site_ids=swash_overpredicted_site_ids,\n",
+    "    df_profile_features=df_profile_features,\n",
+    "    df_twls=twls['forecasted']['mean_slope_sto06'])\n",
+    "\n",
+    "swash_correct_site_ids = get_site_ids(\n",
+    "    df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
+    "    df_observed=impacts['observed'],\n",
+    "    forecasted_regime='swash',\n",
+    "    observed_regime='swash')\n",
+    "swash_correct_diffs = get_R_high_D_low_diff(\n",
+    "    site_ids=swash_correct_site_ids,\n",
+    "    df_profile_features=df_profile_features,\n",
+    "    df_twls=twls['forecasted']['mean_slope_sto06'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-10T04:12:58.434634Z",
+     "start_time": "2018-12-10T04:12:57.096839Z"
+    },
+    "hidden": true
+   },
+   "outputs": [],
+   "source": [
+    "# Identify sites where collision regime was correctly or underpredicted\n",
+    "\n",
+    "collision_underpredicted_site_ids = get_site_ids(\n",
+    "    df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
+    "    df_observed=impacts['observed'],\n",
+    "    forecasted_regime='swash',\n",
+    "    observed_regime='collision')\n",
+    "collision_underpredicted_diffs = get_R_high_D_low_diff(\n",
+    "    site_ids=collision_underpredicted_site_ids,\n",
+    "    df_profile_features=df_profile_features,\n",
+    "    df_twls=twls['forecasted']['mean_slope_sto06'])\n",
+    "\n",
+    "collision_correct_site_ids = get_site_ids(\n",
+    "    df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
+    "    df_observed=impacts['observed'],\n",
+    "    forecasted_regime='collision',\n",
+    "    observed_regime='collision')\n",
+    "collision_correct_diffs = get_R_high_D_low_diff(\n",
+    "    site_ids=collision_correct_site_ids,\n",
+    "    df_profile_features=df_profile_features,\n",
+    "    df_twls=twls['forecasted']['mean_slope_sto06'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "hidden": true
+   },
+   "source": [
+    "### Plot difference in $R_{high}$ and $D_{low}$ for swash and collision regimes\n",
+    "What does the distribution of elevations look like for when we observe swash and collision regimes? Are there any difference between correctly and incorrectly predicted swash regime impacts?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-10T04:06:02.634355Z",
+     "start_time": "2018-12-10T04:05:42.644585Z"
+    },
+    "hidden": true
    },
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "94883b85733444528fe8a73379ce4611",
+       "model_id": "f53f0ffc577b406ab56c1357c1145683",
        "version_major": 2,
        "version_minor": 0
       },
@@ -198,79 +304,70 @@
     }
    ],
    "source": [
-    "swash_overpredicted_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
-    "                                            df_observed=impacts['observed'],\n",
-    "                                            forecasted_regime='collision',\n",
-    "                                            observed_regime='swash')\n",
-    "swash_overpredicted_diffs = get_R_high_D_low_diff(site_ids=swash_overpredicted_site_ids,\n",
-    "                                                  df_profile_features=df_profile_features,\n",
-    "                                                  df_twls=twls['forecasted']['mean_slope_sto06'])\n",
-    "\n",
-    "swash_correct_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
-    "                                      df_observed=impacts['observed'],\n",
-    "                                      forecasted_regime='swash',\n",
-    "                                      observed_regime='swash')\n",
-    "swash_correct_diffs = get_R_high_D_low_diff(site_ids=swash_correct_site_ids,\n",
-    "                                            df_profile_features=df_profile_features,\n",
-    "                                            df_twls=twls['forecasted']['mean_slope_sto06'])\n",
-    "\n",
-    "\n",
-    "trace1 = go.Histogram(y=swash_correct_diffs.tolist(),\n",
-    "                      opacity=0.75,\n",
-    "                      name='Correctly predicted',\n",
-    "                      marker=dict(\n",
-    "                          color='#67a9cf',\n",
-    "                        ),\n",
-    "                    ybins=dict(\n",
-    "                size=0.1\n",
-    "),)\n",
-    "trace2 = go.Histogram(y=swash_overpredicted_diffs.tolist(),\n",
-    "                      opacity=0.75,\n",
-    "                      name='Overpredicted',\n",
-    "                      marker=dict(\n",
-    "                          color='#ef8a62',\n",
-    "),\n",
-    "                    ybins=dict(\n",
-    "                size=0.1\n",
-    "),)\n",
+    "trace1 = go.Histogram(\n",
+    "    y=swash_correct_diffs.tolist(),\n",
+    "    opacity=0.75,\n",
+    "    name='Correctly predicted',\n",
+    "    marker=dict(color='#67a9cf', ),\n",
+    "    ybins=dict(size=0.1),\n",
+    ")\n",
+    "trace2 = go.Histogram(\n",
+    "    y=swash_overpredicted_diffs.tolist(),\n",
+    "    opacity=0.75,\n",
+    "    name='Overpredicted',\n",
+    "    marker=dict(color='#ef8a62', ),\n",
+    "    ybins=dict(size=0.1),\n",
+    ")\n",
     "\n",
     "layout = go.Layout(\n",
     "    title='R_high - D_low<br>Swash Regime',\n",
     "    barmode='overlay',\n",
-    "    yaxis=dict(\n",
-    "        title='z (m AHD)'\n",
-    "    ),\n",
-    "    xaxis=dict(\n",
-    "        title='Count'\n",
-    "    ),\n",
+    "    yaxis=dict(title='z (m AHD)'),\n",
+    "    xaxis=dict(title='Count'),\n",
     "    bargap=0.2,\n",
     "    bargroupgap=0.1,\n",
-    "    legend=dict(x=.6, y=1)\n",
-    ")\n",
+    "    legend=dict(x=.6, y=1))\n",
     "\n",
     "g_plot_swash = go.FigureWidget(data=[trace2, trace1], layout=layout)\n",
     "\n",
     "# To output to file\n",
-    "img_bytes = pio.write_image(g_plot_swash, 'g_plot_swash.png',format='png', width=600, height=400, scale=5)\n",
+    "img_bytes = pio.write_image(\n",
+    "    g_plot_swash,\n",
+    "    '02_R_high_D_low_swash.png',\n",
+    "    format='png',\n",
+    "    width=600,\n",
+    "    height=400,\n",
+    "    scale=5)\n",
     "\n",
-    "g_plot_swash\n",
-    "\n"
+    "g_plot_swash"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "hidden": true
+   },
+   "source": [
+    "The plot above shows that when $R_{high}$ - $D_{low}$ is $<0$, swash is correctly predicted. This is by definition, so is not surprising. The biggest occurance of $R_{high}$ - $D_{low}$ is slightly below $0$, so it appears that we have correctly predicted a majority of the observed swash regime events. \n",
+    "\n",
+    "Let's do the same thing, now considering `site_ids` where we have observed collision."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 16,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-12-04T04:10:47.339268Z",
-     "start_time": "2018-12-04T04:10:45.796887Z"
-    }
+     "end_time": "2018-12-10T04:13:03.703119Z",
+     "start_time": "2018-12-10T04:13:03.463485Z"
+    },
+    "hidden": true
    },
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3933da9295fe446f9413bca8842100c2",
+       "model_id": "4ec08bf2ea6f482ea3c52aa3348a05e2",
        "version_major": 2,
        "version_minor": 0
       },
@@ -286,105 +383,794 @@
     }
    ],
    "source": [
-    "collision_underpredicted_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
-    "                                       df_observed=impacts['observed'],\n",
-    "                                       forecasted_regime='swash',\n",
-    "                                       observed_regime='collision')\n",
-    "collision_underpredicted_diffs = get_R_high_D_low_diff(site_ids=collision_underpredicted_site_ids,\n",
-    "                                             df_profile_features=df_profile_features,\n",
-    "                                             df_twls=twls['forecasted']['mean_slope_sto06'])\n",
-    "\n",
-    "collision_correct_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
-    "                                df_observed=impacts['observed'],\n",
-    "                                forecasted_regime='collision',\n",
-    "                                observed_regime='collision')\n",
-    "collision_correct_diffs = get_R_high_D_low_diff(site_ids=collision_correct_site_ids,\n",
-    "                                      df_profile_features=df_profile_features,\n",
-    "                                      df_twls=twls['forecasted']['mean_slope_sto06'])\n",
-    "\n",
-    "\n",
-    "trace1 = go.Histogram(y=collision_correct_diffs.tolist(),\n",
-    "                     opacity=0.75,\n",
-    "                     name='Correctly predicted',\n",
-    "                      marker=dict(\n",
-    "                          color='#67a9cf',\n",
-    "                       ),\n",
-    "                    ybins=dict(\n",
-    "                size=0.1\n",
-    "),)\n",
-    "trace2 = go.Histogram(y=collision_underpredicted_diffs.tolist(),\n",
-    "                     opacity=0.75,\n",
-    "                     name='Underpredicted',\n",
-    "                      marker=dict(\n",
-    "                          color='#ef8a62',\n",
-    "                       ),\n",
-    "                    ybins=dict(\n",
-    "                size=0.1\n",
-    "),)\n",
+    "trace1 = go.Histogram(\n",
+    "    y=collision_correct_diffs.tolist(),\n",
+    "    opacity=0.75,\n",
+    "    name='Correctly predicted',\n",
+    "    marker=dict(color='#67a9cf', ),\n",
+    "    ybins=dict(size=0.1),\n",
+    ")\n",
+    "trace2 = go.Histogram(\n",
+    "    y=collision_underpredicted_diffs.tolist(),\n",
+    "    opacity=0.75,\n",
+    "    name='Underpredicted',\n",
+    "    marker=dict(color='#ef8a62', ),\n",
+    "    ybins=dict(size=0.1),\n",
+    ")\n",
     "\n",
     "layout = go.Layout(\n",
     "    title='R_high - D_low<br>Collision Regime',\n",
     "    barmode='overlay',\n",
-    "    yaxis=dict(\n",
-    "        title='z (m AHD)'\n",
-    "    ),\n",
-    "    xaxis=dict(\n",
-    "        title='Count'\n",
-    "    ),\n",
+    "    yaxis=dict(title='z (m AHD)'),\n",
+    "    xaxis=dict(title='Count'),\n",
     "    bargap=0.2,\n",
     "    bargroupgap=0.1,\n",
-    "    legend=dict(x=.6, y=1)\n",
-    ")\n",
+    "    legend=dict(x=.6, y=1))\n",
     "\n",
     "g_plot_collision = go.FigureWidget(data=[trace2, trace1], layout=layout)\n",
     "\n",
     "# To output to file\n",
-    "img_bytes = pio.write_image(g_plot_collision, 'g_plot_collision.png',format='png', width=600, height=400, scale=5)\n",
+    "img_bytes = pio.write_image(\n",
+    "    g_plot_collision,\n",
+    "    '02_R_high_D_low_collision.png',\n",
+    "    format='png',\n",
+    "    width=600,\n",
+    "    height=400,\n",
+    "    scale=5)\n",
     "\n",
     "g_plot_collision"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "hidden": true
+   },
+   "source": [
+    "We can see a trend similar to the swash regime, except flipped. A majority of the correctly forecasted collision regimes occur when $R_{high}$ - $D_{low}$ is $>0$, by definition. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "hidden": true
+   },
+   "source": [
+    "### TODO Does dune toe lower?\n",
+    "Is there any patterns that dictate whether the dune toe raises or lowers when subject to collision. Is it just based on the peak $R_{high}$ level, similar to equilibrium theory?\n"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Does dune toe lower?\n"
+    "## Relationship between parameters\n",
+    "Let's further investigate the relationship between hydrodynamic and morphodynamic parameters and see if they can tell us any more about how the storm regime may be correctly or incorrectly predicted."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Add functions for adding parameters to the dataframe\n",
+    "We need some additional functions which will add parameters to our dataframe so we can plot them."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-10T04:55:43.229433Z",
+     "start_time": "2018-12-10T04:55:43.218402Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def add_berm_width(df, df_profiles, df__profile_features):\n",
+    "    \"\"\"\n",
+    "    Adds a new column to the dataframe, with the prestorm berm width\n",
+    "    \"\"\"\n",
+    "    # Get x coorindates of dune toe and limit of survey (z=0)\n",
+    "    df_profile_end_x = df_profiles.query('profile_type==\"prestorm\"').dropna(\n",
+    "        subset=['z']).groupby('site_id').tail(1).reset_index(\n",
+    "            ['profile_type', 'x']).x.rename('x_end').to_frame()\n",
+    "    df_profile_dune_toe_x = df_profile_features.query(\n",
+    "        'profile_type==\"prestorm\"').dune_toe_x.to_frame()\n",
+    "\n",
+    "    # Merge and take the difference to calculate berm width\n",
+    "    df_merged = df_profile_end_x.merge(\n",
+    "        df_profile_dune_toe_x, left_index=True, right_index=True)\n",
+    "    berm_width = (df_merged['x_end'] -\n",
+    "                  df_merged['dune_toe_x']).rename('berm_width').to_frame()\n",
+    "\n",
+    "    # Return the dataframe with the berm_width col merged\n",
+    "    return df.merge(berm_width, left_index=True, right_index=True)\n",
+    "\n",
+    "\n",
+    "def add_observed_regime(df, df_observed, new_col='observed_storm_regime'):\n",
+    "    \"\"\"\n",
+    "    Adds a new column to the dataframe, with the observed storm regime\n",
+    "    \"\"\"\n",
+    "    return df.merge(\n",
+    "        impacts['observed'].storm_regime.rename(new_col).to_frame(),\n",
+    "        left_index=True,\n",
+    "        right_index=True)\n",
+    "\n",
+    "\n",
+    "def add_mean_slope(df, df_twl):\n",
+    "    \"\"\"\n",
+    "    Adds a new column to the dataframe with prestorm mean slope\n",
+    "    \"\"\"\n",
+    "    df_mean_slope = df_twl.groupby('site_id').first().beta.rename(\n",
+    "        'mean_slope').to_frame()\n",
+    "    return df.merge(df_mean_slope, left_index=True, right_index=True)\n",
+    "\n",
+    "\n",
+    "def add_prestorm_berm_vol(df, df_impacts_observed):\n",
+    "    \"\"\"\n",
+    "    Adds a new column to the dataframe with prestorm berm volume\n",
+    "    \"\"\"\n",
+    "    return df.merge(\n",
+    "        df_impacts_observed.prestorm_swash_vol.rename('prestorm_berm_vol').\n",
+    "        to_frame(),\n",
+    "        left_index=True,\n",
+    "        right_index=True)\n",
+    "\n",
+    "\n",
+    "def add_prediction_class(df_impacts, prediction_classes):\n",
+    "    \"\"\"\n",
+    "    Adds a column which groups site_ids into the predicted and observed storm regime combination\n",
+    "    \"\"\"\n",
+    "    for prediction_class in prediction_classes:\n",
+    "        df_impacts.loc[df_impacts.index.isin(prediction_class['site_ids']),\n",
+    "                       'prediction_class'] = prediction_class['class']\n",
+    "    return df_impacts"
+   ]
+  },
+  {
+   "cell_type": "markdown",
    "metadata": {},
+   "source": [
+    "### Create the dataframe\n",
+    "We need to combine and add data into a single dataframe for comparison purposes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-10T04:55:45.293320Z",
+     "start_time": "2018-12-10T04:55:44.802910Z"
+    }
+   },
    "outputs": [],
-   "source": []
+   "source": [
+    "# Start with the forecasted impact dataframe\n",
+    "df = impacts['forecasted']['mean_slope_sto06']\n",
+    "\n",
+    "# Add a column which groups site_ids into the predicted and observed storm regime combination\n",
+    "prediction_classes = [\n",
+    "    {\n",
+    "        'site_ids': collision_underpredicted_site_ids,\n",
+    "        'class': 'collision_underpredicted'\n",
+    "    },\n",
+    "    {\n",
+    "        'site_ids': collision_correct_site_ids,\n",
+    "        'class': 'collision_correct'\n",
+    "    },\n",
+    "    {\n",
+    "        'site_ids': swash_overpredicted_site_ids,\n",
+    "        'class': 'swash_overpredicted'\n",
+    "    },\n",
+    "    {\n",
+    "        'site_ids': swash_correct_site_ids,\n",
+    "        'class': 'swash_correct'\n",
+    "    },\n",
+    "]\n",
+    "df = add_prediction_class(df, prediction_classes)\n",
+    "\n",
+    "# Drop site_ids where we do not have a prediction class (caused by NaNs)\n",
+    "df = df.dropna(subset=['prediction_class'])\n",
+    "\n",
+    "# Add additional parameters\n",
+    "df = add_observed_regime(df, impacts['observed'])\n",
+    "df = add_berm_width(df, df_profiles, df_profile_features)\n",
+    "df = add_mean_slope(df, df_twl=twls['forecasted']['mean_slope_sto06'])\n",
+    "df = add_prestorm_berm_vol(df, df_impacts_observed=impacts['observed'])\n",
+    "df['R_high_dune_toe_diff'] = df['R_high'] - df['dune_toe_z']\n",
+    "df['R_high_dune_toe_ratio'] = df['R_high'] / df['dune_toe_z']"
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create scatter plot matrix of parameter interactions\n",
+    "Plot each hydrodynamic and morphodynamic parameter against each other and see if we can identify any patterns."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
    "metadata": {
-    "heading_collapsed": true
+    "ExecuteTime": {
+     "end_time": "2018-12-10T05:52:23.384061Z",
+     "start_time": "2018-12-10T05:52:21.345652Z"
+    }
    },
+   "outputs": [],
+   "source": [
+    "# Setup colors for different classes\n",
+    "text = df['prediction_class'].tolist()\n",
+    "class_code = {x['class']: n for n, x in enumerate(prediction_classes)}\n",
+    "color_vals = [class_code[cl] for cl in df['prediction_class']]\n",
+    "\n",
+    "# Each prediction class will have its own color\n",
+    "pl_colorscale = [[0.0, '#d7191c'], [0.25, '#d7191c'], [0.25, '#fdae61'],\n",
+    "                 [0.5, '#fdae61'], [0.5, '#2c7bb6'], [0.75, '#2c7bb6'],\n",
+    "                 [0.75, '#abd9e9'], [1, '#abd9e9']]\n",
+    "\n",
+    "# Setup plotly scatterplot matrix\n",
+    "trace1 = go.Splom(\n",
+    "    dimensions=[\n",
+    "        dict(label='dune_toe_z', values=df['dune_toe_z']),\n",
+    "        dict(label='R_high', values=df['R_high']),\n",
+    "        dict(label='berm_width', values=df['berm_width']),\n",
+    "        dict(\n",
+    "            label='twl_dune_toe_z_exceedance_hrs',\n",
+    "            values=df['twl_dune_toe_z_exceedance_hrs']),\n",
+    "        dict(label='R_high_dune_toe_diff', values=df['R_high_dune_toe_diff']),\n",
+    "        dict(\n",
+    "            label='R_high_dune_toe_ratio', values=df['R_high_dune_toe_ratio']),\n",
+    "        dict(label='mean_slope', values=df['mean_slope']),\n",
+    "        dict(label='prestorm_berm_vol', values=df['prestorm_berm_vol']),\n",
+    "    ],\n",
+    "    text=text,\n",
+    "    diagonal=dict(visible=False),\n",
+    "    showupperhalf=False,\n",
+    "    marker=dict(\n",
+    "        color=color_vals,\n",
+    "        size=2,\n",
+    "        colorscale=pl_colorscale,\n",
+    "        showscale=False,\n",
+    "        line=dict(width=0.1, color='rgb(230,230,230)')))\n",
+    "\n",
+    "axis = dict(showline=True, zeroline=False, gridcolor='#fff', ticklen=4)\n",
+    "\n",
+    "layout = go.Layout(\n",
+    "    title='Storm Impact Scatter Plot Matrix',\n",
+    "    dragmode='select',\n",
+    "    width=800,\n",
+    "    height=800,\n",
+    "    autosize=False,\n",
+    "    hovermode='closest',\n",
+    "    plot_bgcolor='rgba(240,240,240, 0.95)',\n",
+    "    xaxis1=dict(axis),\n",
+    "    xaxis2=dict(axis),\n",
+    "    xaxis3=dict(axis),\n",
+    "    xaxis4=dict(axis),\n",
+    "    xaxis5=dict(axis),\n",
+    "    xaxis6=dict(axis),\n",
+    "    xaxis7=dict(axis),\n",
+    "    xaxis8=dict(axis),\n",
+    "    yaxis1=dict(axis),\n",
+    "    yaxis2=dict(axis),\n",
+    "    yaxis3=dict(axis),\n",
+    "    yaxis4=dict(axis),\n",
+    "    yaxis5=dict(axis),\n",
+    "    yaxis6=dict(axis),\n",
+    "    yaxis7=dict(axis),\n",
+    "    yaxis8=dict(axis),\n",
+    ")\n",
+    "\n",
+    "# Change font of axis labels\n",
+    "for ax in layout:\n",
+    "    if 'xaxis' in ax or 'yaxis' in ax:\n",
+    "        layout[ax]['titlefont'] = {'size': 12}\n",
+    "\n",
+    "fig_scatter = go.FigureWidget(data=[trace1], layout=layout)\n",
+    "\n",
+    "# To output to file\n",
+    "img_bytes = pio.write_image(\n",
+    "    fig_scatter,\n",
+    "    '02_scatter_plot.png',\n",
+    "    format='png',\n",
+    "    width=1500,\n",
+    "    height=1500,\n",
+    "    scale=5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
    "source": [
-    "### What do over predicted and underpredicted profiles look like?"
+    "Jupyter get's a little bit slow when trying to display this plot interactively, so let's output it as an image to view.\n",
+    "![02_scatter_plot.png](02_scatter_plot.png)"
    ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create parameter confusion matrix\n",
+    "It's a bit hard to see the difference between the different categories, lets do a confusion matrix but with plots showing two variables on each axis. First, list all the different parameters we have available to plot against:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
    "metadata": {
-    "hidden": true
+    "ExecuteTime": {
+     "end_time": "2018-12-10T05:27:34.225427Z",
+     "start_time": "2018-12-10T05:27:34.218427Z"
+    }
    },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['datetime', 'R_high', 'R_low', 'dune_toe_z', 'dune_crest_z',\n",
+       "       'storm_regime', 'twl_dune_toe_z_exceedance_hrs', 'prediction_class',\n",
+       "       'observed_storm_regime', 'berm_width', 'mean_slope',\n",
+       "       'prestorm_berm_vol', 'R_high_dune_toe_diff', 'R_high_dune_toe_ratio'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "Define a function for getting the average beach profile for a number of given site_ids:"
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-10T05:29:42.927017Z",
+     "start_time": "2018-12-10T05:29:38.603905Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "This is the format of your plot grid:\n",
+      "[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]\n",
+      "[ (2,1) x3,y3 ]  [ (2,2) x4,y4 ]\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "397f170343fe45b4acf9aded46595ac8",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FigureWidget({\n",
+       "    'data': [{'marker': {'color': 'rgb(200,200,200)', 'size': 4},\n",
+       "              'mode': 'marker…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Define which columns we want to plot\n",
+    "x_col = 'prestorm_berm_vol'\n",
+    "y_col = 'R_high_dune_toe_diff'\n",
+    "marker_size = 4\n",
+    "\n",
+    "# Create 2x2 subplot figure confusion matrix\n",
+    "fig = tools.make_subplots(\n",
+    "    rows=2,\n",
+    "    cols=2,\n",
+    "    vertical_spacing=0.09,\n",
+    "    subplot_titles=(\n",
+    "        'Predicted Swash',\n",
+    "        'Predicted Collision',\n",
+    "        '',\n",
+    "        '',\n",
+    "    ))\n",
+    "\n",
+    "# Get data for all traces\n",
+    "x_all = df.loc[:, x_col]\n",
+    "y_all = df.loc[:, y_col]\n",
+    "\n",
+    "# Create underlying grey traces of all data, so we can compare each subplot with the all the data.\n",
+    "trace5 = go.Scatter(\n",
+    "    mode='markers',\n",
+    "    x=x_all,\n",
+    "    y=y_all,\n",
+    "    showlegend=False,\n",
+    "    marker=dict(\n",
+    "        color='rgb(200,200,200)',\n",
+    "        size=marker_size,\n",
+    "    ))\n",
+    "fig.append_trace(trace5, 1, 1)\n",
+    "\n",
+    "trace6 = copy(trace5)\n",
+    "trace6.xaxis = 'x2'\n",
+    "trace6.yaxis = 'y'\n",
+    "fig.append_trace(trace6, 1, 2)\n",
+    "\n",
+    "trace7 = copy(trace5)\n",
+    "trace7.xaxis = 'x'\n",
+    "trace7.yaxis = 'y2'\n",
+    "fig.append_trace(trace7, 2, 1)\n",
+    "\n",
+    "trace8 = copy(trace5)\n",
+    "trace8.xaxis = 'x2'\n",
+    "trace8.yaxis = 'y2'\n",
+    "fig.append_trace(trace8, 2, 2)\n",
+    "\n",
+    "# Add actual data for each subplot\n",
+    "\n",
+    "# Predicted swash, observed collision\n",
+    "trace1 = go.Scatter(\n",
+    "    mode='markers',\n",
+    "    x=df.loc[df.index.isin(collision_underpredicted_site_ids), x_col],\n",
+    "    y=df.loc[df.index.isin(collision_underpredicted_site_ids), y_col],\n",
+    "    marker=dict(\n",
+    "        color='#fc8d59',\n",
+    "        size=marker_size,\n",
+    "        line=dict(color='rgb(231,231,231)', width=0.5)))\n",
+    "fig.append_trace(trace1, 2, 1)\n",
+    "\n",
+    "# Predicted collision, observed collision\n",
+    "trace2 = go.Scatter(\n",
+    "    mode='markers',\n",
+    "    x=df.loc[df.index.isin(collision_correct_site_ids), x_col],\n",
+    "    y=df.loc[df.index.isin(collision_correct_site_ids), y_col],\n",
+    "    marker=dict(\n",
+    "        color='#fc8d59',\n",
+    "        size=marker_size,\n",
+    "        line=dict(color='rgb(231,231,231)', width=0.5)))\n",
+    "fig.append_trace(trace2, 2, 2)\n",
+    "\n",
+    "# Predicted swash, observed swash\n",
+    "trace3 = go.Scatter(\n",
+    "    mode='markers',\n",
+    "    x=df.loc[df.index.isin(swash_correct_site_ids), x_col],\n",
+    "    y=df.loc[df.index.isin(swash_correct_site_ids), y_col],\n",
+    "    marker=dict(\n",
+    "        color='#3182bd',\n",
+    "        size=marker_size,\n",
+    "        line=dict(color='rgb(231,231,231)', width=0.5)))\n",
+    "fig.append_trace(trace3, 1, 1)\n",
+    "\n",
+    "# Predicted collision, observed swash\n",
+    "trace4 = go.Scatter(\n",
+    "    mode='markers',\n",
+    "    x=df.loc[df.index.isin(swash_overpredicted_site_ids), x_col],\n",
+    "    y=df.loc[df.index.isin(swash_overpredicted_site_ids), y_col],\n",
+    "    marker=dict(\n",
+    "        color='#3182bd',\n",
+    "        size=marker_size,\n",
+    "        line=dict(color='rgb(231,231,231)', width=0.5)))\n",
+    "fig.append_trace(trace4, 1, 2)\n",
+    "\n",
+    "# Update formatting, titles, sizes etc.\n",
+    "fig['layout']['yaxis1'].update(\n",
+    "    title='{}<br>{}'.format('Observed swash', y_col))\n",
+    "fig['layout']['yaxis3'].update(\n",
+    "    title='{}<br>{}'.format('Observed collision', y_col))\n",
+    "fig['layout']['xaxis3'].update(title=x_col)\n",
+    "fig['layout']['xaxis4'].update(title=x_col)\n",
+    "fig['layout'].update(\n",
+    "    height=700, width=900, title='Storm Regime Confusion Matrix')\n",
+    "fig['layout']['showlegend'] = False\n",
+    "fig_to_plot = go.FigureWidget(data=fig.data, layout=fig.layout)\n",
+    "\n",
+    "# To output to file\n",
+    "img_bytes = pio.write_image(\n",
+    "    fig_to_plot,\n",
+    "    '02_storm_regime_confusion.png',\n",
+    "    format='png',\n",
+    "    width=600,\n",
+    "    height=600,\n",
+    "    scale=5)\n",
+    "\n",
+    "fig_to_plot"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-09T00:30:34.940581Z",
+     "start_time": "2018-12-09T00:30:34.825247Z"
+    }
+   },
+   "source": [
+    "Plotting `prestorm_berm_vol` vs `R_high_dune_toe_diff` shows there is a difference between observed swash and collision. It appears when the `prestorm_berm_vol` is smaller than 80 m, we will get collision, regardless of whether `R_high_dune_toe_diff` is greater than 0 m. Let's confirm that there it is a vertical line to differentiate between these two regimes."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create regime predictor which includes berm volume\n",
+    "\n",
+    "The technique for finding the boundary between two clusters is taken from [StackExchange](https://stackoverflow.com/a/22356267)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-10T05:37:35.785659Z",
+     "start_time": "2018-12-10T05:37:34.355843Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "09471a2157774aa2b17f5b88c2bfaab4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FigureWidget({\n",
+       "    'data': [{'marker': {'color': '#3182bd', 'line': {'color': 'rgb(231,231,231)', 'width': 0.5…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Get data\n",
+    "df = df.dropna(subset=['prestorm_berm_vol', 'R_high_dune_toe_diff'])\n",
+    "\n",
+    "swash_x = df.query('observed_storm_regime==\"swash\"').prestorm_berm_vol\n",
+    "swash_y = df.query('observed_storm_regime==\"swash\"').R_high_dune_toe_diff\n",
+    "swash_samples = np.array([[x, y] for x, y in zip(swash_x, swash_y)])\n",
+    "\n",
+    "collision_x = df.query('observed_storm_regime==\"collision\"').prestorm_berm_vol\n",
+    "collision_y = df.query(\n",
+    "    'observed_storm_regime==\"collision\"').R_high_dune_toe_diff\n",
+    "collision_samples = np.array(\n",
+    "    [[x, y] for x, y in zip(collision_x, collision_y)])\n",
+    "\n",
+    "# Fit SVM\n",
+    "X = np.concatenate((swash_samples, collision_samples), axis=0)\n",
+    "Y = np.array([0] * swash_x.shape[0] + [1] * collision_x.shape[0])\n",
+    "\n",
+    "C = 1.0  # SVM regularization parameter\n",
+    "clf = svm.SVC(kernel='linear', gamma=0.7, C=C)\n",
+    "clf.fit(X, Y)\n",
+    "\n",
+    "w = clf.coef_[0]\n",
+    "a = -w[0] / w[1]\n",
+    "y_vals = swash_y.tolist() + collision_y.tolist()\n",
+    "yy = np.linspace(min(y_vals), max(y_vals))\n",
+    "xx = (yy + (clf.intercept_[0]) / w[1]) / a\n",
+    "\n",
+    "# Prepare plot\n",
+    "trace_swash = go.Scatter(\n",
+    "    x=swash_x,\n",
+    "    y=swash_y,\n",
+    "    name='Swash',\n",
+    "    mode='markers',\n",
+    "    marker=dict(\n",
+    "        color='#3182bd',\n",
+    "        size=marker_size,\n",
+    "        line=dict(color='rgb(231,231,231)', width=0.5)))\n",
+    "\n",
+    "trace_collision = go.Scatter(\n",
+    "    x=collision_x,\n",
+    "    y=collision_y,\n",
+    "    name='Collision',\n",
+    "    mode='markers',\n",
+    "    marker=dict(\n",
+    "        color='#fc8d59',\n",
+    "        size=marker_size,\n",
+    "        line=dict(color='rgb(231,231,231)', width=0.5)))\n",
+    "\n",
+    "trace_split = go.Scatter(\n",
+    "    x=xx,\n",
+    "    y=yy,\n",
+    "    name='Split (y={:.1f}x-{:.1f})'.format(a, (clf.intercept_[0]) / w[1]),\n",
+    ")\n",
+    "\n",
+    "layout = dict(\n",
+    "    title='Observed Swash/Collision Regime Split',\n",
+    "    xaxis=dict(title='Prestorm berm volume', ),\n",
+    "    yaxis=dict(title='R_high - D_low'),\n",
+    "    legend=dict(x=.6, y=1.))\n",
+    "\n",
+    "fig_to_plot = go.FigureWidget(\n",
+    "    data=[trace_swash, trace_collision, trace_split], layout=layout)\n",
+    "\n",
+    "# To output to file\n",
+    "img_bytes = pio.write_image(\n",
+    "    fig_to_plot,\n",
+    "    '02_storm_regime_split.png',\n",
+    "    format='png',\n",
+    "    width=600,\n",
+    "    height=600,\n",
+    "    scale=5)\n",
+    "\n",
+    "fig_to_plot"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Looking at the plot above, it appears when the `prestorm_berm_vol` is less than 80 m, then we should classify it as collision, even if wave runup does not reach the toe of the dune."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Test new berm vol predictor\n",
+    "\n",
+    "Now lets go back to our predicted forecasts and see if our confusion matrix improves if we adopt this new criteria for differentiating between swash and collision.\n",
+    "\n",
+    "First define a custom function to get colormap for our confusion matrix."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-10T02:18:08.658221Z",
+     "start_time": "2018-12-10T02:18:08.653186Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def matplotlib_to_plotly(cmap_name='RdYlGn', pl_entries=255):\n",
+    "    \"\"\"\n",
+    "    Function to convert matplotlib colorscale to plotly\n",
+    "    \"\"\"\n",
+    "    cmap = matplotlib.cm.get_cmap(cmap_name)\n",
+    "    h = 1.0 / (pl_entries - 1)\n",
+    "    pl_colorscale = []\n",
+    "\n",
+    "    for k in range(pl_entries):\n",
+    "        C = list(map(np.uint8, np.array(cmap(k * h)[:3]) * 255))\n",
+    "        pl_colorscale.append([k * h, 'rgb' + str((C[0], C[1], C[2]))])\n",
+    "\n",
+    "    return pl_colorscale"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 156,
+   "execution_count": 40,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-12-04T23:11:08.853877Z",
-     "start_time": "2018-12-04T23:11:08.846876Z"
+     "end_time": "2018-12-10T05:44:27.306253Z",
+     "start_time": "2018-12-10T05:44:25.791799Z"
     },
-    "hidden": true
+    "code_folding": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "97d8283101e748dd8e2fd7811c97973b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FigureWidget({\n",
+       "    'data': [{'colorscale': [[0.0, 'rgb(165, 0, 38)'], [0.003937007874015748,\n",
+       "                 …"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from sklearn.metrics import confusion_matrix\n",
+    "\n",
+    "# Create colorscale\n",
+    "rdylgr = matplotlib_to_plotly()\n",
+    "\n",
+    "# Add new column with our new prediction technique.\n",
+    "df.loc[df['prestorm_berm_vol'] * 2.7 - 221.1 <= df['R_high_dune_toe_diff'],\n",
+    "       'new_predicted_storm_regime'] = 'collision'\n",
+    "df.loc[df['prestorm_berm_vol'] * 2.7 - 221.1 > df['R_high_dune_toe_diff'],\n",
+    "       'new_predicted_storm_regime'] = 'swash'\n",
+    "\n",
+    "# Get observed and forecasted regimes, and merge\n",
+    "observed_regimes = df.observed_storm_regime.rename(\n",
+    "    'observed_regime').to_frame()\n",
+    "forecasted_regimes = df.new_predicted_storm_regime.rename(\n",
+    "    'forecasted_regime').to_frame()\n",
+    "df_compare = pd.concat([observed_regimes, forecasted_regimes],\n",
+    "                       axis='columns',\n",
+    "                       names=['a', 'b'],\n",
+    "                       sort=True)\n",
+    "df_compare.dropna(axis='index', inplace=True)\n",
+    "\n",
+    "# Create a confusion matrix based on the observed/forecasted regimes.\n",
+    "# Need to do some flipping and reversing to get it in the correct\n",
+    "# order for the plotly heatmap.\n",
+    "z = confusion_matrix(\n",
+    "    df_compare.observed_regime.tolist(),\n",
+    "    df_compare.forecasted_regime.tolist(),\n",
+    "    labels=['swash', 'collision', 'overwash', 'inundation'])\n",
+    "z = np.flip(z, axis=0)\n",
+    "z_list = list(reversed(z.tolist()))\n",
+    "\n",
+    "# Make incorrect values negative, so they get assigned a different color.\n",
+    "# Better for visualization\n",
+    "z_neg_incorrect = np.flip(np.identity(4), axis=0)\n",
+    "z_neg_incorrect[z_neg_incorrect == 0] = -1\n",
+    "z_neg_incorrect = (z * z_neg_incorrect).tolist()\n",
+    "\n",
+    "# Change the text on the heatmap so it also displays percentages.\n",
+    "z_with_pct = []\n",
+    "for row in z:\n",
+    "    new_row = []\n",
+    "    for val in row:\n",
+    "        new_row.append('{}<br>({}%)'.format(\n",
+    "            val, np.around(val / np.sum(z) * 100, 1)))\n",
+    "    z_with_pct.append(new_row)\n",
+    "\n",
+    "# Create the heatmap figure\n",
+    "x = ['swash', 'collision', 'overwash', 'inundation']\n",
+    "y = list(reversed(x))\n",
+    "fig = ff.create_annotated_heatmap(\n",
+    "    z_neg_incorrect, x=x, y=y, annotation_text=z_with_pct, colorscale=rdylgr)\n",
+    "heatmap = go.FigureWidget(data=fig.data, layout=fig.layout)\n",
+    "\n",
+    "# Update axis titles\n",
+    "heatmap.layout.xaxis.update(title='Predicted')\n",
+    "heatmap.layout.yaxis.update(title='Observed')\n",
+    "\n",
+    "# Write to file\n",
+    "img_bytes = pio.write_image(\n",
+    "    heatmap,\n",
+    "    '02_confusion_matrix_berm_vol_predictor.png',\n",
+    "    format='png',\n",
+    "    width=600,\n",
+    "    height=600,\n",
+    "    scale=5)\n",
+    "\n",
+    "heatmap"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## TODO Compare predicted and underpredicted profiles"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Define a function for getting the average beach profile for a number of given site_ids:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 224,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2018-12-10T03:08:49.904157Z",
+     "start_time": "2018-12-10T03:08:49.896136Z"
+    }
    },
    "outputs": [],
    "source": [
@@ -422,41 +1208,46 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {
-    "hidden": true
-   },
+   "metadata": {},
    "source": [
     "Now, let's look at whether there is a difference between the average beach profile of correctly forecasted site_ids and incorrectly forecasted site_ids. First, looking at sites where we observed swash regime."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 161,
+   "execution_count": 225,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-12-05T02:00:36.853374Z",
-     "start_time": "2018-12-05T01:58:21.839366Z"
+     "end_time": "2018-12-10T03:11:20.818875Z",
+     "start_time": "2018-12-10T03:08:49.906163Z"
     },
-    "code_folding": [],
-    "hidden": true
+    "code_folding": []
    },
    "outputs": [
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "03f2e99d20a347f3922a0e6a36f99ccd",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "FigureWidget({\n",
-       "    'data': [{'line': {'color': 'rgb(205, 0, 0)', 'width': 2},\n",
-       "              'mode': 'lines',\n",
-       " …"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\z5189959\\Desktop\\nsw-2016-storm-impact\\.venv\\lib\\site-packages\\pandas\\core\\groupby\\groupby.py:1062: RuntimeWarning:\n",
+      "\n",
+      "Mean of empty slice\n",
+      "\n",
+      "C:\\Users\\z5189959\\Desktop\\nsw-2016-storm-impact\\.venv\\lib\\site-packages\\numpy\\lib\\nanfunctions.py:1545: RuntimeWarning:\n",
+      "\n",
+      "Degrees of freedom <= 0 for slice.\n",
+      "\n"
+     ]
+    },
+    {
+     "ename": "NameError",
+     "evalue": "name 'avg_correct_x' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-225-a35c8feaf0e6>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     41\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     42\u001b[0m trace_correct_mean = go.Scatter(\n\u001b[1;32m---> 43\u001b[1;33m     \u001b[0mx\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mavg_correct_x\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     44\u001b[0m     \u001b[0my\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mavg_correct_z\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     45\u001b[0m     \u001b[0mopacity\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'avg_correct_x' is not defined"
+     ]
     }
    ],
    "source": [
@@ -563,42 +1354,21 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {
-    "hidden": true
-   },
+   "metadata": {},
    "source": [
     "We can see that the difference is pretty minimal. For cases where we predicted collision, but observed swash (overprediction), we see that overpredicted profiles are slightly more concave than correctly predicted sites."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 162,
+   "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-12-05T02:03:38.394415Z",
-     "start_time": "2018-12-05T02:00:37.335377Z"
-    },
-    "hidden": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1255bccc024e4690b4b8ff4ccc8e9e35",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "FigureWidget({\n",
-       "    'data': [{'line': {'color': 'rgb(205, 0, 0)', 'width': 2},\n",
-       "              'mode': 'lines',\n",
-       " …"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "end_time": "2018-12-10T03:11:20.824874Z",
+     "start_time": "2018-12-10T03:08:27.623Z"
     }
-   ],
+   },
+   "outputs": [],
    "source": [
     "underpredicted = get_avg_profile(collision_underpredicted_site_ids)\n",
     "correct = get_avg_profile(collision_correct_site_ids)\n",
@@ -703,15 +1473,14 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {
-    "hidden": true
-   },
+   "metadata": {},
    "source": [
     "This plot is a bit more interesting. It shows that we are correctly forecasting collision when the profile is more accreted/convex, but when the profile is more eroded/concave, the water level is underpredicted. Why is this? "
    ]
   }
  ],
  "metadata": {
+  "hide_input": false,
   "kernelspec": {
    "display_name": "Python 3",
    "language": "python",

	prestorm_swash_vol	poststorm_swash_vol	swash_vol_change	swash_pct_change	prestorm_dune_face_vol	poststorm_dune_face_vol	dune_face_vol_change	dune_face_pct_change	storm_regime
site_id
AVOCAn0001	113.9091	77.9830	35.6103	31.2620	165.4760	166.7296	0.0000	0.0000	swash
AVOCAn0002	106.8959	67.0913	39.6373	37.0803	256.4137	258.9174	0.0000	0.0000	swash
AVOCAn0003	99.0484	53.6563	45.1621	45.5960	372.7031	373.9198	-0.3147	-0.0844	swash
AVOCAn0004	74.7543	29.3280	45.4262	60.7674	275.1689	276.0476	0.4104	0.1492	swash
AVOCAn0005	70.5968	24.1071	46.4897	65.8524	268.5194	263.4262	7.4196	2.7631	collision
AVOCAn0006	68.7583	23.9665	44.7918	65.1438	202.6770	198.2397	4.7944	2.3655	collision
AVOCAn0007	75.7895	27.2715	48.5180	64.0168	149.8479	143.1312	7.1323	4.7597	collision
AVOCAn0008	93.3107	42.4968	50.8139	54.4567	187.9201	187.3459	2.8297	1.5058	collision
AVOCAn0009	3.6955	0.1038	3.5917	97.1908	NaN	NaN	NaN	NaN	NaN
AVOCAs0001	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
AVOCAs0002	97.3864	26.6619	70.7246	72.6226	NaN	NaN	NaN	NaN	NaN
AVOCAs0003	70.7401	40.0608	30.7919	43.5282	NaN	NaN	NaN	NaN	NaN
AVOCAs0004	97.8389	45.4845	52.2157	53.3691	NaN	NaN	NaN	NaN	NaN
AVOCAs0005	95.1711	54.9722	40.1706	42.2088	NaN	NaN	NaN	NaN	NaN
AVOCAs0006	112.5818	67.8718	44.8252	39.8157	NaN	NaN	NaN	NaN	NaN
AVOCAs0007	65.3531	44.1537	21.5228	32.9331	NaN	NaN	NaN	NaN	NaN
AVOCAs0008	52.3940	44.9152	7.4803	14.2770	NaN	NaN	NaN	NaN	NaN
BILG0001	19.4177	7.5746	11.8431	60.9913	NaN	NaN	NaN	NaN	NaN
BILG0002	155.6998	98.1693	57.4340	36.8876	NaN	NaN	NaN	NaN	NaN
BILG0003	83.5219	52.8059	30.5537	36.5817	41.1469	40.6081	0.0000	0.0000	swash
BILG0004	158.6283	116.6189	42.1178	26.5512	11.2211	11.0892	-0.0132	-0.1179	swash
BILG0005	212.8478	164.0044	48.4312	22.7539	613.6156	606.2766	5.7738	0.9410	collision
BLUEYS0001	65.4628	19.2938	46.1690	70.5270	130.7447	120.5446	9.5601	7.3121	collision
BLUEYS0002	50.2084	10.3009	39.9074	79.4836	512.0154	477.1774	33.2825	6.5003	collision
BLUEYS0003	50.6308	11.1682	39.4625	77.9418	443.0853	414.3901	24.8870	5.6167	collision
BLUEYS0004	95.1608	31.3330	63.8279	67.0737	287.5805	272.4267	12.9641	4.5080	collision
BLUEYS0005	141.0643	58.2545	82.8098	58.7036	539.3864	520.0732	12.0470	2.2335	collision
BLUEYS0006	88.4207	51.6205	36.2553	41.0032	271.6036	267.1954	3.6045	1.3271	collision
BOAT0001	23.8510	23.5660	-0.0264	-0.1108	NaN	NaN	NaN	NaN	NaN
BOAT0002	37.6524	14.0209	23.6316	62.7624	NaN	NaN	NaN	NaN	NaN
...	...	...	...	...	...	...	...	...	...
TREACH0014	97.5323	46.2994	51.0816	52.3740	508.7400	505.7877	0.4254	0.0836	swash
TREACH0015	96.8327	45.1962	51.6364	53.3254	690.8275	683.4458	1.5086	0.2184	NaN
TREACH0016	106.9083	66.0567	40.3629	37.7547	508.0014	499.6315	0.3386	0.0667	swash
WAMBE0001	132.3413	143.4459	-9.7255	-7.3488	665.9898	667.5923	0.0410	0.0062	swash
WAMBE0002	151.1833	126.9844	23.9548	15.8449	385.8467	386.7284	-0.0449	-0.0116	swash
WAMBE0003	154.1788	117.9441	36.2425	23.5068	694.2226	700.5105	-4.2136	-0.6070	swash
WAMBE0004	137.8449	76.6007	61.2725	44.4503	559.5485	569.8591	-4.4590	-0.7969	swash
WAMBE0005	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
WAMBE0006	205.8453	186.0784	22.5892	10.9739	55.0898	55.8919	0.0000	0.0000	swash
WAMBE0007	80.4674	35.4614	45.0059	55.9307	178.1005	178.5439	0.4727	0.2654	swash
WAMBE0008	88.4574	40.3200	48.1374	54.4187	258.7513	258.3849	-1.2073	-0.4666	swash
WAMBE0009	70.9159	26.1742	44.7418	63.0913	267.3725	258.3720	9.8041	3.6668	collision
WAMBE0010	58.6604	18.0418	40.6186	69.2437	187.5259	161.9748	25.3087	13.4961	collision
WAMBE0011	59.2415	16.3165	42.9250	72.4577	197.0129	175.2512	21.9882	11.1608	collision
WAMBE0012	74.4189	23.0232	51.3957	69.0627	178.4783	168.3475	10.0386	5.6246	collision
WAMBE0013	70.4964	22.7546	47.7419	67.7224	231.1513	195.2581	35.8072	15.4908	collision
WAMBE0014	68.0896	24.1853	43.9043	64.4802	82.4268	61.2601	21.1718	25.6856	collision
WAMBE0015	55.0780	16.0119	39.0660	70.9286	NaN	NaN	NaN	NaN	NaN
WAMBE0016	96.7687	39.8224	56.9463	58.8479	NaN	NaN	NaN	NaN	NaN
WAMBE0017	35.2987	8.5140	26.7847	75.8801	NaN	NaN	NaN	NaN	NaN
WAMBE0018	40.9407	10.5147	30.4260	74.3173	NaN	NaN	NaN	NaN	NaN
WAMBE0019	38.2838	9.2156	29.0682	75.9282	NaN	NaN	NaN	NaN	NaN
WAMBE0020	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
WAMBE0021	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
WAMBE0022	0.5516	0.2840	0.2675	48.5063	NaN	NaN	NaN	NaN	NaN
WAMBE0023	3.3761	0.3020	3.0741	91.0554	NaN	NaN	NaN	NaN	NaN
WAMBE0024	60.8648	31.2794	29.5854	48.6084	NaN	NaN	NaN	NaN	NaN
WAMBE0025	45.1055	14.6028	30.5028	67.6253	NaN	NaN	NaN	NaN	NaN
WAMBE0026	32.1502	12.9335	19.2167	59.7716	NaN	NaN	NaN	NaN	NaN
WAMBE0027	26.2310	18.6828	7.5482	28.7759	NaN	NaN	NaN	NaN	NaN