diff --git a/notebooks/01_exploration.ipynb b/notebooks/01_exploration.ipynb
index 07a9649..deb0348 100644
--- a/notebooks/01_exploration.ipynb
+++ b/notebooks/01_exploration.ipynb
@@ -1001,9 +1001,1012 @@
" g_profiles\n",
"])"
]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Scatter plot"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-12-07T05:08:12.117885Z",
+ "start_time": "2018-12-07T05:08:12.078780Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " prestorm_swash_vol | \n",
+ " poststorm_swash_vol | \n",
+ " swash_vol_change | \n",
+ " swash_pct_change | \n",
+ " prestorm_dune_face_vol | \n",
+ " poststorm_dune_face_vol | \n",
+ " dune_face_vol_change | \n",
+ " dune_face_pct_change | \n",
+ " storm_regime | \n",
+ "
\n",
+ " \n",
+ " site_id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " AVOCAn0001 | \n",
+ " 113.9091 | \n",
+ " 77.9830 | \n",
+ " 35.6103 | \n",
+ " 31.2620 | \n",
+ " 165.4760 | \n",
+ " 166.7296 | \n",
+ " 0.0000 | \n",
+ " 0.0000 | \n",
+ " swash | \n",
+ "
\n",
+ " \n",
+ " AVOCAn0002 | \n",
+ " 106.8959 | \n",
+ " 67.0913 | \n",
+ " 39.6373 | \n",
+ " 37.0803 | \n",
+ " 256.4137 | \n",
+ " 258.9174 | \n",
+ " 0.0000 | \n",
+ " 0.0000 | \n",
+ " swash | \n",
+ "
\n",
+ " \n",
+ " AVOCAn0003 | \n",
+ " 99.0484 | \n",
+ " 53.6563 | \n",
+ " 45.1621 | \n",
+ " 45.5960 | \n",
+ " 372.7031 | \n",
+ " 373.9198 | \n",
+ " -0.3147 | \n",
+ " -0.0844 | \n",
+ " swash | \n",
+ "
\n",
+ " \n",
+ " AVOCAn0004 | \n",
+ " 74.7543 | \n",
+ " 29.3280 | \n",
+ " 45.4262 | \n",
+ " 60.7674 | \n",
+ " 275.1689 | \n",
+ " 276.0476 | \n",
+ " 0.4104 | \n",
+ " 0.1492 | \n",
+ " swash | \n",
+ "
\n",
+ " \n",
+ " AVOCAn0005 | \n",
+ " 70.5968 | \n",
+ " 24.1071 | \n",
+ " 46.4897 | \n",
+ " 65.8524 | \n",
+ " 268.5194 | \n",
+ " 263.4262 | \n",
+ " 7.4196 | \n",
+ " 2.7631 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " AVOCAn0006 | \n",
+ " 68.7583 | \n",
+ " 23.9665 | \n",
+ " 44.7918 | \n",
+ " 65.1438 | \n",
+ " 202.6770 | \n",
+ " 198.2397 | \n",
+ " 4.7944 | \n",
+ " 2.3655 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " AVOCAn0007 | \n",
+ " 75.7895 | \n",
+ " 27.2715 | \n",
+ " 48.5180 | \n",
+ " 64.0168 | \n",
+ " 149.8479 | \n",
+ " 143.1312 | \n",
+ " 7.1323 | \n",
+ " 4.7597 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " AVOCAn0008 | \n",
+ " 93.3107 | \n",
+ " 42.4968 | \n",
+ " 50.8139 | \n",
+ " 54.4567 | \n",
+ " 187.9201 | \n",
+ " 187.3459 | \n",
+ " 2.8297 | \n",
+ " 1.5058 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " AVOCAn0009 | \n",
+ " 3.6955 | \n",
+ " 0.1038 | \n",
+ " 3.5917 | \n",
+ " 97.1908 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " AVOCAs0001 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " AVOCAs0002 | \n",
+ " 97.3864 | \n",
+ " 26.6619 | \n",
+ " 70.7246 | \n",
+ " 72.6226 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " AVOCAs0003 | \n",
+ " 70.7401 | \n",
+ " 40.0608 | \n",
+ " 30.7919 | \n",
+ " 43.5282 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " AVOCAs0004 | \n",
+ " 97.8389 | \n",
+ " 45.4845 | \n",
+ " 52.2157 | \n",
+ " 53.3691 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " AVOCAs0005 | \n",
+ " 95.1711 | \n",
+ " 54.9722 | \n",
+ " 40.1706 | \n",
+ " 42.2088 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " AVOCAs0006 | \n",
+ " 112.5818 | \n",
+ " 67.8718 | \n",
+ " 44.8252 | \n",
+ " 39.8157 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " AVOCAs0007 | \n",
+ " 65.3531 | \n",
+ " 44.1537 | \n",
+ " 21.5228 | \n",
+ " 32.9331 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " AVOCAs0008 | \n",
+ " 52.3940 | \n",
+ " 44.9152 | \n",
+ " 7.4803 | \n",
+ " 14.2770 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " BILG0001 | \n",
+ " 19.4177 | \n",
+ " 7.5746 | \n",
+ " 11.8431 | \n",
+ " 60.9913 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " BILG0002 | \n",
+ " 155.6998 | \n",
+ " 98.1693 | \n",
+ " 57.4340 | \n",
+ " 36.8876 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " BILG0003 | \n",
+ " 83.5219 | \n",
+ " 52.8059 | \n",
+ " 30.5537 | \n",
+ " 36.5817 | \n",
+ " 41.1469 | \n",
+ " 40.6081 | \n",
+ " 0.0000 | \n",
+ " 0.0000 | \n",
+ " swash | \n",
+ "
\n",
+ " \n",
+ " BILG0004 | \n",
+ " 158.6283 | \n",
+ " 116.6189 | \n",
+ " 42.1178 | \n",
+ " 26.5512 | \n",
+ " 11.2211 | \n",
+ " 11.0892 | \n",
+ " -0.0132 | \n",
+ " -0.1179 | \n",
+ " swash | \n",
+ "
\n",
+ " \n",
+ " BILG0005 | \n",
+ " 212.8478 | \n",
+ " 164.0044 | \n",
+ " 48.4312 | \n",
+ " 22.7539 | \n",
+ " 613.6156 | \n",
+ " 606.2766 | \n",
+ " 5.7738 | \n",
+ " 0.9410 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " BLUEYS0001 | \n",
+ " 65.4628 | \n",
+ " 19.2938 | \n",
+ " 46.1690 | \n",
+ " 70.5270 | \n",
+ " 130.7447 | \n",
+ " 120.5446 | \n",
+ " 9.5601 | \n",
+ " 7.3121 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " BLUEYS0002 | \n",
+ " 50.2084 | \n",
+ " 10.3009 | \n",
+ " 39.9074 | \n",
+ " 79.4836 | \n",
+ " 512.0154 | \n",
+ " 477.1774 | \n",
+ " 33.2825 | \n",
+ " 6.5003 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " BLUEYS0003 | \n",
+ " 50.6308 | \n",
+ " 11.1682 | \n",
+ " 39.4625 | \n",
+ " 77.9418 | \n",
+ " 443.0853 | \n",
+ " 414.3901 | \n",
+ " 24.8870 | \n",
+ " 5.6167 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " BLUEYS0004 | \n",
+ " 95.1608 | \n",
+ " 31.3330 | \n",
+ " 63.8279 | \n",
+ " 67.0737 | \n",
+ " 287.5805 | \n",
+ " 272.4267 | \n",
+ " 12.9641 | \n",
+ " 4.5080 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " BLUEYS0005 | \n",
+ " 141.0643 | \n",
+ " 58.2545 | \n",
+ " 82.8098 | \n",
+ " 58.7036 | \n",
+ " 539.3864 | \n",
+ " 520.0732 | \n",
+ " 12.0470 | \n",
+ " 2.2335 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " BLUEYS0006 | \n",
+ " 88.4207 | \n",
+ " 51.6205 | \n",
+ " 36.2553 | \n",
+ " 41.0032 | \n",
+ " 271.6036 | \n",
+ " 267.1954 | \n",
+ " 3.6045 | \n",
+ " 1.3271 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " BOAT0001 | \n",
+ " 23.8510 | \n",
+ " 23.5660 | \n",
+ " -0.0264 | \n",
+ " -0.1108 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " BOAT0002 | \n",
+ " 37.6524 | \n",
+ " 14.0209 | \n",
+ " 23.6316 | \n",
+ " 62.7624 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " TREACH0014 | \n",
+ " 97.5323 | \n",
+ " 46.2994 | \n",
+ " 51.0816 | \n",
+ " 52.3740 | \n",
+ " 508.7400 | \n",
+ " 505.7877 | \n",
+ " 0.4254 | \n",
+ " 0.0836 | \n",
+ " swash | \n",
+ "
\n",
+ " \n",
+ " TREACH0015 | \n",
+ " 96.8327 | \n",
+ " 45.1962 | \n",
+ " 51.6364 | \n",
+ " 53.3254 | \n",
+ " 690.8275 | \n",
+ " 683.4458 | \n",
+ " 1.5086 | \n",
+ " 0.2184 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " TREACH0016 | \n",
+ " 106.9083 | \n",
+ " 66.0567 | \n",
+ " 40.3629 | \n",
+ " 37.7547 | \n",
+ " 508.0014 | \n",
+ " 499.6315 | \n",
+ " 0.3386 | \n",
+ " 0.0667 | \n",
+ " swash | \n",
+ "
\n",
+ " \n",
+ " WAMBE0001 | \n",
+ " 132.3413 | \n",
+ " 143.4459 | \n",
+ " -9.7255 | \n",
+ " -7.3488 | \n",
+ " 665.9898 | \n",
+ " 667.5923 | \n",
+ " 0.0410 | \n",
+ " 0.0062 | \n",
+ " swash | \n",
+ "
\n",
+ " \n",
+ " WAMBE0002 | \n",
+ " 151.1833 | \n",
+ " 126.9844 | \n",
+ " 23.9548 | \n",
+ " 15.8449 | \n",
+ " 385.8467 | \n",
+ " 386.7284 | \n",
+ " -0.0449 | \n",
+ " -0.0116 | \n",
+ " swash | \n",
+ "
\n",
+ " \n",
+ " WAMBE0003 | \n",
+ " 154.1788 | \n",
+ " 117.9441 | \n",
+ " 36.2425 | \n",
+ " 23.5068 | \n",
+ " 694.2226 | \n",
+ " 700.5105 | \n",
+ " -4.2136 | \n",
+ " -0.6070 | \n",
+ " swash | \n",
+ "
\n",
+ " \n",
+ " WAMBE0004 | \n",
+ " 137.8449 | \n",
+ " 76.6007 | \n",
+ " 61.2725 | \n",
+ " 44.4503 | \n",
+ " 559.5485 | \n",
+ " 569.8591 | \n",
+ " -4.4590 | \n",
+ " -0.7969 | \n",
+ " swash | \n",
+ "
\n",
+ " \n",
+ " WAMBE0005 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " WAMBE0006 | \n",
+ " 205.8453 | \n",
+ " 186.0784 | \n",
+ " 22.5892 | \n",
+ " 10.9739 | \n",
+ " 55.0898 | \n",
+ " 55.8919 | \n",
+ " 0.0000 | \n",
+ " 0.0000 | \n",
+ " swash | \n",
+ "
\n",
+ " \n",
+ " WAMBE0007 | \n",
+ " 80.4674 | \n",
+ " 35.4614 | \n",
+ " 45.0059 | \n",
+ " 55.9307 | \n",
+ " 178.1005 | \n",
+ " 178.5439 | \n",
+ " 0.4727 | \n",
+ " 0.2654 | \n",
+ " swash | \n",
+ "
\n",
+ " \n",
+ " WAMBE0008 | \n",
+ " 88.4574 | \n",
+ " 40.3200 | \n",
+ " 48.1374 | \n",
+ " 54.4187 | \n",
+ " 258.7513 | \n",
+ " 258.3849 | \n",
+ " -1.2073 | \n",
+ " -0.4666 | \n",
+ " swash | \n",
+ "
\n",
+ " \n",
+ " WAMBE0009 | \n",
+ " 70.9159 | \n",
+ " 26.1742 | \n",
+ " 44.7418 | \n",
+ " 63.0913 | \n",
+ " 267.3725 | \n",
+ " 258.3720 | \n",
+ " 9.8041 | \n",
+ " 3.6668 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " WAMBE0010 | \n",
+ " 58.6604 | \n",
+ " 18.0418 | \n",
+ " 40.6186 | \n",
+ " 69.2437 | \n",
+ " 187.5259 | \n",
+ " 161.9748 | \n",
+ " 25.3087 | \n",
+ " 13.4961 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " WAMBE0011 | \n",
+ " 59.2415 | \n",
+ " 16.3165 | \n",
+ " 42.9250 | \n",
+ " 72.4577 | \n",
+ " 197.0129 | \n",
+ " 175.2512 | \n",
+ " 21.9882 | \n",
+ " 11.1608 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " WAMBE0012 | \n",
+ " 74.4189 | \n",
+ " 23.0232 | \n",
+ " 51.3957 | \n",
+ " 69.0627 | \n",
+ " 178.4783 | \n",
+ " 168.3475 | \n",
+ " 10.0386 | \n",
+ " 5.6246 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " WAMBE0013 | \n",
+ " 70.4964 | \n",
+ " 22.7546 | \n",
+ " 47.7419 | \n",
+ " 67.7224 | \n",
+ " 231.1513 | \n",
+ " 195.2581 | \n",
+ " 35.8072 | \n",
+ " 15.4908 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " WAMBE0014 | \n",
+ " 68.0896 | \n",
+ " 24.1853 | \n",
+ " 43.9043 | \n",
+ " 64.4802 | \n",
+ " 82.4268 | \n",
+ " 61.2601 | \n",
+ " 21.1718 | \n",
+ " 25.6856 | \n",
+ " collision | \n",
+ "
\n",
+ " \n",
+ " WAMBE0015 | \n",
+ " 55.0780 | \n",
+ " 16.0119 | \n",
+ " 39.0660 | \n",
+ " 70.9286 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " WAMBE0016 | \n",
+ " 96.7687 | \n",
+ " 39.8224 | \n",
+ " 56.9463 | \n",
+ " 58.8479 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " WAMBE0017 | \n",
+ " 35.2987 | \n",
+ " 8.5140 | \n",
+ " 26.7847 | \n",
+ " 75.8801 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " WAMBE0018 | \n",
+ " 40.9407 | \n",
+ " 10.5147 | \n",
+ " 30.4260 | \n",
+ " 74.3173 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " WAMBE0019 | \n",
+ " 38.2838 | \n",
+ " 9.2156 | \n",
+ " 29.0682 | \n",
+ " 75.9282 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " WAMBE0020 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " WAMBE0021 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " WAMBE0022 | \n",
+ " 0.5516 | \n",
+ " 0.2840 | \n",
+ " 0.2675 | \n",
+ " 48.5063 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " WAMBE0023 | \n",
+ " 3.3761 | \n",
+ " 0.3020 | \n",
+ " 3.0741 | \n",
+ " 91.0554 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " WAMBE0024 | \n",
+ " 60.8648 | \n",
+ " 31.2794 | \n",
+ " 29.5854 | \n",
+ " 48.6084 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " WAMBE0025 | \n",
+ " 45.1055 | \n",
+ " 14.6028 | \n",
+ " 30.5028 | \n",
+ " 67.6253 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " WAMBE0026 | \n",
+ " 32.1502 | \n",
+ " 12.9335 | \n",
+ " 19.2167 | \n",
+ " 59.7716 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " WAMBE0027 | \n",
+ " 26.2310 | \n",
+ " 18.6828 | \n",
+ " 7.5482 | \n",
+ " 28.7759 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1768 rows × 9 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " prestorm_swash_vol poststorm_swash_vol swash_vol_change \\\n",
+ "site_id \n",
+ "AVOCAn0001 113.9091 77.9830 35.6103 \n",
+ "AVOCAn0002 106.8959 67.0913 39.6373 \n",
+ "AVOCAn0003 99.0484 53.6563 45.1621 \n",
+ "AVOCAn0004 74.7543 29.3280 45.4262 \n",
+ "AVOCAn0005 70.5968 24.1071 46.4897 \n",
+ "AVOCAn0006 68.7583 23.9665 44.7918 \n",
+ "AVOCAn0007 75.7895 27.2715 48.5180 \n",
+ "AVOCAn0008 93.3107 42.4968 50.8139 \n",
+ "AVOCAn0009 3.6955 0.1038 3.5917 \n",
+ "AVOCAs0001 NaN NaN NaN \n",
+ "AVOCAs0002 97.3864 26.6619 70.7246 \n",
+ "AVOCAs0003 70.7401 40.0608 30.7919 \n",
+ "AVOCAs0004 97.8389 45.4845 52.2157 \n",
+ "AVOCAs0005 95.1711 54.9722 40.1706 \n",
+ "AVOCAs0006 112.5818 67.8718 44.8252 \n",
+ "AVOCAs0007 65.3531 44.1537 21.5228 \n",
+ "AVOCAs0008 52.3940 44.9152 7.4803 \n",
+ "BILG0001 19.4177 7.5746 11.8431 \n",
+ "BILG0002 155.6998 98.1693 57.4340 \n",
+ "BILG0003 83.5219 52.8059 30.5537 \n",
+ "BILG0004 158.6283 116.6189 42.1178 \n",
+ "BILG0005 212.8478 164.0044 48.4312 \n",
+ "BLUEYS0001 65.4628 19.2938 46.1690 \n",
+ "BLUEYS0002 50.2084 10.3009 39.9074 \n",
+ "BLUEYS0003 50.6308 11.1682 39.4625 \n",
+ "BLUEYS0004 95.1608 31.3330 63.8279 \n",
+ "BLUEYS0005 141.0643 58.2545 82.8098 \n",
+ "BLUEYS0006 88.4207 51.6205 36.2553 \n",
+ "BOAT0001 23.8510 23.5660 -0.0264 \n",
+ "BOAT0002 37.6524 14.0209 23.6316 \n",
+ "... ... ... ... \n",
+ "TREACH0014 97.5323 46.2994 51.0816 \n",
+ "TREACH0015 96.8327 45.1962 51.6364 \n",
+ "TREACH0016 106.9083 66.0567 40.3629 \n",
+ "WAMBE0001 132.3413 143.4459 -9.7255 \n",
+ "WAMBE0002 151.1833 126.9844 23.9548 \n",
+ "WAMBE0003 154.1788 117.9441 36.2425 \n",
+ "WAMBE0004 137.8449 76.6007 61.2725 \n",
+ "WAMBE0005 NaN NaN NaN \n",
+ "WAMBE0006 205.8453 186.0784 22.5892 \n",
+ "WAMBE0007 80.4674 35.4614 45.0059 \n",
+ "WAMBE0008 88.4574 40.3200 48.1374 \n",
+ "WAMBE0009 70.9159 26.1742 44.7418 \n",
+ "WAMBE0010 58.6604 18.0418 40.6186 \n",
+ "WAMBE0011 59.2415 16.3165 42.9250 \n",
+ "WAMBE0012 74.4189 23.0232 51.3957 \n",
+ "WAMBE0013 70.4964 22.7546 47.7419 \n",
+ "WAMBE0014 68.0896 24.1853 43.9043 \n",
+ "WAMBE0015 55.0780 16.0119 39.0660 \n",
+ "WAMBE0016 96.7687 39.8224 56.9463 \n",
+ "WAMBE0017 35.2987 8.5140 26.7847 \n",
+ "WAMBE0018 40.9407 10.5147 30.4260 \n",
+ "WAMBE0019 38.2838 9.2156 29.0682 \n",
+ "WAMBE0020 NaN NaN NaN \n",
+ "WAMBE0021 NaN NaN NaN \n",
+ "WAMBE0022 0.5516 0.2840 0.2675 \n",
+ "WAMBE0023 3.3761 0.3020 3.0741 \n",
+ "WAMBE0024 60.8648 31.2794 29.5854 \n",
+ "WAMBE0025 45.1055 14.6028 30.5028 \n",
+ "WAMBE0026 32.1502 12.9335 19.2167 \n",
+ "WAMBE0027 26.2310 18.6828 7.5482 \n",
+ "\n",
+ " swash_pct_change prestorm_dune_face_vol poststorm_dune_face_vol \\\n",
+ "site_id \n",
+ "AVOCAn0001 31.2620 165.4760 166.7296 \n",
+ "AVOCAn0002 37.0803 256.4137 258.9174 \n",
+ "AVOCAn0003 45.5960 372.7031 373.9198 \n",
+ "AVOCAn0004 60.7674 275.1689 276.0476 \n",
+ "AVOCAn0005 65.8524 268.5194 263.4262 \n",
+ "AVOCAn0006 65.1438 202.6770 198.2397 \n",
+ "AVOCAn0007 64.0168 149.8479 143.1312 \n",
+ "AVOCAn0008 54.4567 187.9201 187.3459 \n",
+ "AVOCAn0009 97.1908 NaN NaN \n",
+ "AVOCAs0001 NaN NaN NaN \n",
+ "AVOCAs0002 72.6226 NaN NaN \n",
+ "AVOCAs0003 43.5282 NaN NaN \n",
+ "AVOCAs0004 53.3691 NaN NaN \n",
+ "AVOCAs0005 42.2088 NaN NaN \n",
+ "AVOCAs0006 39.8157 NaN NaN \n",
+ "AVOCAs0007 32.9331 NaN NaN \n",
+ "AVOCAs0008 14.2770 NaN NaN \n",
+ "BILG0001 60.9913 NaN NaN \n",
+ "BILG0002 36.8876 NaN NaN \n",
+ "BILG0003 36.5817 41.1469 40.6081 \n",
+ "BILG0004 26.5512 11.2211 11.0892 \n",
+ "BILG0005 22.7539 613.6156 606.2766 \n",
+ "BLUEYS0001 70.5270 130.7447 120.5446 \n",
+ "BLUEYS0002 79.4836 512.0154 477.1774 \n",
+ "BLUEYS0003 77.9418 443.0853 414.3901 \n",
+ "BLUEYS0004 67.0737 287.5805 272.4267 \n",
+ "BLUEYS0005 58.7036 539.3864 520.0732 \n",
+ "BLUEYS0006 41.0032 271.6036 267.1954 \n",
+ "BOAT0001 -0.1108 NaN NaN \n",
+ "BOAT0002 62.7624 NaN NaN \n",
+ "... ... ... ... \n",
+ "TREACH0014 52.3740 508.7400 505.7877 \n",
+ "TREACH0015 53.3254 690.8275 683.4458 \n",
+ "TREACH0016 37.7547 508.0014 499.6315 \n",
+ "WAMBE0001 -7.3488 665.9898 667.5923 \n",
+ "WAMBE0002 15.8449 385.8467 386.7284 \n",
+ "WAMBE0003 23.5068 694.2226 700.5105 \n",
+ "WAMBE0004 44.4503 559.5485 569.8591 \n",
+ "WAMBE0005 NaN NaN NaN \n",
+ "WAMBE0006 10.9739 55.0898 55.8919 \n",
+ "WAMBE0007 55.9307 178.1005 178.5439 \n",
+ "WAMBE0008 54.4187 258.7513 258.3849 \n",
+ "WAMBE0009 63.0913 267.3725 258.3720 \n",
+ "WAMBE0010 69.2437 187.5259 161.9748 \n",
+ "WAMBE0011 72.4577 197.0129 175.2512 \n",
+ "WAMBE0012 69.0627 178.4783 168.3475 \n",
+ "WAMBE0013 67.7224 231.1513 195.2581 \n",
+ "WAMBE0014 64.4802 82.4268 61.2601 \n",
+ "WAMBE0015 70.9286 NaN NaN \n",
+ "WAMBE0016 58.8479 NaN NaN \n",
+ "WAMBE0017 75.8801 NaN NaN \n",
+ "WAMBE0018 74.3173 NaN NaN \n",
+ "WAMBE0019 75.9282 NaN NaN \n",
+ "WAMBE0020 NaN NaN NaN \n",
+ "WAMBE0021 NaN NaN NaN \n",
+ "WAMBE0022 48.5063 NaN NaN \n",
+ "WAMBE0023 91.0554 NaN NaN \n",
+ "WAMBE0024 48.6084 NaN NaN \n",
+ "WAMBE0025 67.6253 NaN NaN \n",
+ "WAMBE0026 59.7716 NaN NaN \n",
+ "WAMBE0027 28.7759 NaN NaN \n",
+ "\n",
+ " dune_face_vol_change dune_face_pct_change storm_regime \n",
+ "site_id \n",
+ "AVOCAn0001 0.0000 0.0000 swash \n",
+ "AVOCAn0002 0.0000 0.0000 swash \n",
+ "AVOCAn0003 -0.3147 -0.0844 swash \n",
+ "AVOCAn0004 0.4104 0.1492 swash \n",
+ "AVOCAn0005 7.4196 2.7631 collision \n",
+ "AVOCAn0006 4.7944 2.3655 collision \n",
+ "AVOCAn0007 7.1323 4.7597 collision \n",
+ "AVOCAn0008 2.8297 1.5058 collision \n",
+ "AVOCAn0009 NaN NaN NaN \n",
+ "AVOCAs0001 NaN NaN NaN \n",
+ "AVOCAs0002 NaN NaN NaN \n",
+ "AVOCAs0003 NaN NaN NaN \n",
+ "AVOCAs0004 NaN NaN NaN \n",
+ "AVOCAs0005 NaN NaN NaN \n",
+ "AVOCAs0006 NaN NaN NaN \n",
+ "AVOCAs0007 NaN NaN NaN \n",
+ "AVOCAs0008 NaN NaN NaN \n",
+ "BILG0001 NaN NaN NaN \n",
+ "BILG0002 NaN NaN NaN \n",
+ "BILG0003 0.0000 0.0000 swash \n",
+ "BILG0004 -0.0132 -0.1179 swash \n",
+ "BILG0005 5.7738 0.9410 collision \n",
+ "BLUEYS0001 9.5601 7.3121 collision \n",
+ "BLUEYS0002 33.2825 6.5003 collision \n",
+ "BLUEYS0003 24.8870 5.6167 collision \n",
+ "BLUEYS0004 12.9641 4.5080 collision \n",
+ "BLUEYS0005 12.0470 2.2335 collision \n",
+ "BLUEYS0006 3.6045 1.3271 collision \n",
+ "BOAT0001 NaN NaN NaN \n",
+ "BOAT0002 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "TREACH0014 0.4254 0.0836 swash \n",
+ "TREACH0015 1.5086 0.2184 NaN \n",
+ "TREACH0016 0.3386 0.0667 swash \n",
+ "WAMBE0001 0.0410 0.0062 swash \n",
+ "WAMBE0002 -0.0449 -0.0116 swash \n",
+ "WAMBE0003 -4.2136 -0.6070 swash \n",
+ "WAMBE0004 -4.4590 -0.7969 swash \n",
+ "WAMBE0005 NaN NaN NaN \n",
+ "WAMBE0006 0.0000 0.0000 swash \n",
+ "WAMBE0007 0.4727 0.2654 swash \n",
+ "WAMBE0008 -1.2073 -0.4666 swash \n",
+ "WAMBE0009 9.8041 3.6668 collision \n",
+ "WAMBE0010 25.3087 13.4961 collision \n",
+ "WAMBE0011 21.9882 11.1608 collision \n",
+ "WAMBE0012 10.0386 5.6246 collision \n",
+ "WAMBE0013 35.8072 15.4908 collision \n",
+ "WAMBE0014 21.1718 25.6856 collision \n",
+ "WAMBE0015 NaN NaN NaN \n",
+ "WAMBE0016 NaN NaN NaN \n",
+ "WAMBE0017 NaN NaN NaN \n",
+ "WAMBE0018 NaN NaN NaN \n",
+ "WAMBE0019 NaN NaN NaN \n",
+ "WAMBE0020 NaN NaN NaN \n",
+ "WAMBE0021 NaN NaN NaN \n",
+ "WAMBE0022 NaN NaN NaN \n",
+ "WAMBE0023 NaN NaN NaN \n",
+ "WAMBE0024 NaN NaN NaN \n",
+ "WAMBE0025 NaN NaN NaN \n",
+ "WAMBE0026 NaN NaN NaN \n",
+ "WAMBE0027 NaN NaN NaN \n",
+ "\n",
+ "[1768 rows x 9 columns]"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": []
}
],
"metadata": {
+ "hide_input": false,
"kernelspec": {
"display_name": "Python 3",
"language": "python",
@@ -1033,9 +2036,14 @@
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
- "toc_position": {},
+ "toc_position": {
+ "height": "calc(100% - 180px)",
+ "left": "10px",
+ "top": "150px",
+ "width": "275.797px"
+ },
"toc_section_display": true,
- "toc_window_display": false
+ "toc_window_display": true
},
"varInspector": {
"cols": {
diff --git a/notebooks/03_dune_to_vs_runup.ipynb b/notebooks/03_dune_to_vs_runup.ipynb
index b99ab01..86154e4 100644
--- a/notebooks/03_dune_to_vs_runup.ipynb
+++ b/notebooks/03_dune_to_vs_runup.ipynb
@@ -4,17 +4,27 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Investigate how dune toe compares to R_high"
+ "## Setup "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "heading_collapsed": true
+ },
+ "source": [
+ "### Import packages"
]
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 3,
"metadata": {
"ExecuteTime": {
- "end_time": "2018-12-03T23:04:57.331037Z",
- "start_time": "2018-12-03T23:04:57.006071Z"
- }
+ "end_time": "2018-12-10T04:02:38.872624Z",
+ "start_time": "2018-12-10T04:02:38.448908Z"
+ },
+ "hidden": true
},
"outputs": [],
"source": [
@@ -25,12 +35,13 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 7,
"metadata": {
"ExecuteTime": {
- "end_time": "2018-12-03T23:04:58.749827Z",
- "start_time": "2018-12-03T23:04:57.333943Z"
- }
+ "end_time": "2018-12-10T04:03:27.147221Z",
+ "start_time": "2018-12-10T04:03:27.141204Z"
+ },
+ "hidden": true
},
"outputs": [],
"source": [
@@ -39,18 +50,29 @@
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
- "\n",
+ "import matplotlib\n",
"import plotly\n",
"import plotly.graph_objs as go\n",
"import plotly.plotly as py\n",
"import plotly.tools as tls\n",
"import plotly.figure_factory as ff\n",
- "import plotly.io as pio"
+ "import plotly.io as pio\n",
+ "from plotly import tools\n",
+ "\n",
+ "from copy import copy\n",
+ "import scipy\n",
+ "from sklearn import svm\n",
+ "\n",
+ "# Disable numpy warnings\n",
+ "import warnings\n",
+ "warnings.simplefilter(action=\"ignore\", category=FutureWarning)"
]
},
{
"cell_type": "markdown",
- "metadata": {},
+ "metadata": {
+ "heading_collapsed": true
+ },
"source": [
"### Load data\n",
"Load data from the `./data/interim/` folder and parse into `pandas` dataframes."
@@ -58,35 +80,20 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 8,
"metadata": {
"ExecuteTime": {
- "end_time": "2018-12-03T23:05:05.800496Z",
- "start_time": "2018-12-03T23:04:58.751721Z"
- }
+ "end_time": "2018-12-10T04:03:40.638982Z",
+ "start_time": "2018-12-10T04:03:31.765531Z"
+ },
+ "hidden": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Importing profiles.csv\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "C:\\Users\\z5189959\\Desktop\\nsw-2016-storm-impact\\.venv\\lib\\site-packages\\numpy\\lib\\arraysetops.py:522: FutureWarning:\n",
- "\n",
- "elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n",
- "\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
+ "Importing profiles.csv\n",
"Importing profile_features.csv\n",
"Importing impacts_forecasted_foreshore_slope_sto06.csv\n",
"Importing impacts_forecasted_mean_slope_sto06.csv\n",
@@ -125,31 +132,49 @@
},
{
"cell_type": "markdown",
- "metadata": {},
+ "metadata": {
+ "heading_collapsed": true
+ },
"source": [
- "### Compare predicted R_high with D_low\n",
- "Let's see what the distribution of R_high is compared with D_low. How far off are the predicted water levels compared with the dune toes?"
+ "## Difference between $R_{high}$ and $D_{low}$\n",
+ "Since the Storm Impact Regime is so dependant on the predicted $R_{high}$ and obsereved $D_{low}$ levels, let's investigate the difference between these two variables in more detail.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "hidden": true
+ },
+ "source": [
+ "### Gather data\n",
+ "\n",
+ "First, let's split the `site_ids` by whether we observed swash or collision and by whether we predicted swash or collision. We want to identify if there are any difference between these four groups."
]
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 10,
"metadata": {
"ExecuteTime": {
- "end_time": "2018-12-04T02:20:58.446500Z",
- "start_time": "2018-12-04T02:20:58.439480Z"
- }
+ "end_time": "2018-12-10T04:05:15.741391Z",
+ "start_time": "2018-12-10T04:05:15.734352Z"
+ },
+ "hidden": true
},
"outputs": [],
"source": [
- "def get_site_ids(df_forecasted, df_observed, forecasted_regime, observed_regime):\n",
+ "def get_site_ids(df_forecasted, df_observed, forecasted_regime,\n",
+ " observed_regime):\n",
" \"\"\"\n",
" Returns list of site_ids which match the given forecasted and observed regime\n",
" \"\"\"\n",
- " set1 = set(df_forecasted.query(\"storm_regime == '{}'\".format(\n",
- " forecasted_regime)).index.get_level_values('site_id'))\n",
- " set2 = set(df_observed.query(\"storm_regime == '{}'\".format(\n",
- " observed_regime)).index.get_level_values('site_id'))\n",
+ " set1 = set(\n",
+ " df_forecasted.query(\"storm_regime == '{}'\".format(forecasted_regime)).\n",
+ " index.get_level_values('site_id'))\n",
+ " set2 = set(\n",
+ " df_observed.query(\"storm_regime == '{}'\".format(observed_regime)).\n",
+ " index.get_level_values('site_id'))\n",
" return sorted(list(set1.intersection(set2)))\n",
"\n",
"\n",
@@ -166,23 +191,104 @@
" # Join into one dataframe\n",
" df_twl_toes = pd.concat([df_toes, df_R_highs], axis=1, sort=True)\n",
" df_twl_toes['diff'] = df_twl_toes['R_high'] - df_twl_toes['dune_toe_z']\n",
- " return df_twl_toes['diff']\n"
+ " return df_twl_toes['diff']"
]
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 15,
"metadata": {
"ExecuteTime": {
- "end_time": "2018-12-04T03:55:51.858020Z",
- "start_time": "2018-12-04T03:55:50.879155Z"
- }
+ "end_time": "2018-12-10T04:13:00.035545Z",
+ "start_time": "2018-12-10T04:12:59.176352Z"
+ },
+ "hidden": true
+ },
+ "outputs": [],
+ "source": [
+ "# Identify sites where swash regime was correctly or overpredicted\n",
+ "\n",
+ "swash_overpredicted_site_ids = get_site_ids(\n",
+ " df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
+ " df_observed=impacts['observed'],\n",
+ " forecasted_regime='collision',\n",
+ " observed_regime='swash')\n",
+ "swash_overpredicted_diffs = get_R_high_D_low_diff(\n",
+ " site_ids=swash_overpredicted_site_ids,\n",
+ " df_profile_features=df_profile_features,\n",
+ " df_twls=twls['forecasted']['mean_slope_sto06'])\n",
+ "\n",
+ "swash_correct_site_ids = get_site_ids(\n",
+ " df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
+ " df_observed=impacts['observed'],\n",
+ " forecasted_regime='swash',\n",
+ " observed_regime='swash')\n",
+ "swash_correct_diffs = get_R_high_D_low_diff(\n",
+ " site_ids=swash_correct_site_ids,\n",
+ " df_profile_features=df_profile_features,\n",
+ " df_twls=twls['forecasted']['mean_slope_sto06'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-12-10T04:12:58.434634Z",
+ "start_time": "2018-12-10T04:12:57.096839Z"
+ },
+ "hidden": true
+ },
+ "outputs": [],
+ "source": [
+ "# Identify sites where collision regime was correctly or underpredicted\n",
+ "\n",
+ "collision_underpredicted_site_ids = get_site_ids(\n",
+ " df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
+ " df_observed=impacts['observed'],\n",
+ " forecasted_regime='swash',\n",
+ " observed_regime='collision')\n",
+ "collision_underpredicted_diffs = get_R_high_D_low_diff(\n",
+ " site_ids=collision_underpredicted_site_ids,\n",
+ " df_profile_features=df_profile_features,\n",
+ " df_twls=twls['forecasted']['mean_slope_sto06'])\n",
+ "\n",
+ "collision_correct_site_ids = get_site_ids(\n",
+ " df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
+ " df_observed=impacts['observed'],\n",
+ " forecasted_regime='collision',\n",
+ " observed_regime='collision')\n",
+ "collision_correct_diffs = get_R_high_D_low_diff(\n",
+ " site_ids=collision_correct_site_ids,\n",
+ " df_profile_features=df_profile_features,\n",
+ " df_twls=twls['forecasted']['mean_slope_sto06'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "hidden": true
+ },
+ "source": [
+ "### Plot difference in $R_{high}$ and $D_{low}$ for swash and collision regimes\n",
+ "What does the distribution of elevations look like for when we observe swash and collision regimes? Are there any difference between correctly and incorrectly predicted swash regime impacts?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-12-10T04:06:02.634355Z",
+ "start_time": "2018-12-10T04:05:42.644585Z"
+ },
+ "hidden": true
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "94883b85733444528fe8a73379ce4611",
+ "model_id": "f53f0ffc577b406ab56c1357c1145683",
"version_major": 2,
"version_minor": 0
},
@@ -198,79 +304,70 @@
}
],
"source": [
- "swash_overpredicted_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
- " df_observed=impacts['observed'],\n",
- " forecasted_regime='collision',\n",
- " observed_regime='swash')\n",
- "swash_overpredicted_diffs = get_R_high_D_low_diff(site_ids=swash_overpredicted_site_ids,\n",
- " df_profile_features=df_profile_features,\n",
- " df_twls=twls['forecasted']['mean_slope_sto06'])\n",
- "\n",
- "swash_correct_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
- " df_observed=impacts['observed'],\n",
- " forecasted_regime='swash',\n",
- " observed_regime='swash')\n",
- "swash_correct_diffs = get_R_high_D_low_diff(site_ids=swash_correct_site_ids,\n",
- " df_profile_features=df_profile_features,\n",
- " df_twls=twls['forecasted']['mean_slope_sto06'])\n",
- "\n",
- "\n",
- "trace1 = go.Histogram(y=swash_correct_diffs.tolist(),\n",
- " opacity=0.75,\n",
- " name='Correctly predicted',\n",
- " marker=dict(\n",
- " color='#67a9cf',\n",
- " ),\n",
- " ybins=dict(\n",
- " size=0.1\n",
- "),)\n",
- "trace2 = go.Histogram(y=swash_overpredicted_diffs.tolist(),\n",
- " opacity=0.75,\n",
- " name='Overpredicted',\n",
- " marker=dict(\n",
- " color='#ef8a62',\n",
- "),\n",
- " ybins=dict(\n",
- " size=0.1\n",
- "),)\n",
+ "trace1 = go.Histogram(\n",
+ " y=swash_correct_diffs.tolist(),\n",
+ " opacity=0.75,\n",
+ " name='Correctly predicted',\n",
+ " marker=dict(color='#67a9cf', ),\n",
+ " ybins=dict(size=0.1),\n",
+ ")\n",
+ "trace2 = go.Histogram(\n",
+ " y=swash_overpredicted_diffs.tolist(),\n",
+ " opacity=0.75,\n",
+ " name='Overpredicted',\n",
+ " marker=dict(color='#ef8a62', ),\n",
+ " ybins=dict(size=0.1),\n",
+ ")\n",
"\n",
"layout = go.Layout(\n",
" title='R_high - D_low
Swash Regime',\n",
" barmode='overlay',\n",
- " yaxis=dict(\n",
- " title='z (m AHD)'\n",
- " ),\n",
- " xaxis=dict(\n",
- " title='Count'\n",
- " ),\n",
+ " yaxis=dict(title='z (m AHD)'),\n",
+ " xaxis=dict(title='Count'),\n",
" bargap=0.2,\n",
" bargroupgap=0.1,\n",
- " legend=dict(x=.6, y=1)\n",
- ")\n",
+ " legend=dict(x=.6, y=1))\n",
"\n",
"g_plot_swash = go.FigureWidget(data=[trace2, trace1], layout=layout)\n",
"\n",
"# To output to file\n",
- "img_bytes = pio.write_image(g_plot_swash, 'g_plot_swash.png',format='png', width=600, height=400, scale=5)\n",
+ "img_bytes = pio.write_image(\n",
+ " g_plot_swash,\n",
+ " '02_R_high_D_low_swash.png',\n",
+ " format='png',\n",
+ " width=600,\n",
+ " height=400,\n",
+ " scale=5)\n",
"\n",
- "g_plot_swash\n",
- "\n"
+ "g_plot_swash"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "hidden": true
+ },
+ "source": [
+ "The plot above shows that when $R_{high}$ - $D_{low}$ is $<0$, swash is correctly predicted. This is by definition, so is not surprising. The biggest occurance of $R_{high}$ - $D_{low}$ is slightly below $0$, so it appears that we have correctly predicted a majority of the observed swash regime events. \n",
+ "\n",
+ "Let's do the same thing, now considering `site_ids` where we have observed collision."
]
},
{
"cell_type": "code",
- "execution_count": 54,
+ "execution_count": 16,
"metadata": {
"ExecuteTime": {
- "end_time": "2018-12-04T04:10:47.339268Z",
- "start_time": "2018-12-04T04:10:45.796887Z"
- }
+ "end_time": "2018-12-10T04:13:03.703119Z",
+ "start_time": "2018-12-10T04:13:03.463485Z"
+ },
+ "hidden": true
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "3933da9295fe446f9413bca8842100c2",
+ "model_id": "4ec08bf2ea6f482ea3c52aa3348a05e2",
"version_major": 2,
"version_minor": 0
},
@@ -286,105 +383,794 @@
}
],
"source": [
- "collision_underpredicted_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
- " df_observed=impacts['observed'],\n",
- " forecasted_regime='swash',\n",
- " observed_regime='collision')\n",
- "collision_underpredicted_diffs = get_R_high_D_low_diff(site_ids=collision_underpredicted_site_ids,\n",
- " df_profile_features=df_profile_features,\n",
- " df_twls=twls['forecasted']['mean_slope_sto06'])\n",
- "\n",
- "collision_correct_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n",
- " df_observed=impacts['observed'],\n",
- " forecasted_regime='collision',\n",
- " observed_regime='collision')\n",
- "collision_correct_diffs = get_R_high_D_low_diff(site_ids=collision_correct_site_ids,\n",
- " df_profile_features=df_profile_features,\n",
- " df_twls=twls['forecasted']['mean_slope_sto06'])\n",
- "\n",
- "\n",
- "trace1 = go.Histogram(y=collision_correct_diffs.tolist(),\n",
- " opacity=0.75,\n",
- " name='Correctly predicted',\n",
- " marker=dict(\n",
- " color='#67a9cf',\n",
- " ),\n",
- " ybins=dict(\n",
- " size=0.1\n",
- "),)\n",
- "trace2 = go.Histogram(y=collision_underpredicted_diffs.tolist(),\n",
- " opacity=0.75,\n",
- " name='Underpredicted',\n",
- " marker=dict(\n",
- " color='#ef8a62',\n",
- " ),\n",
- " ybins=dict(\n",
- " size=0.1\n",
- "),)\n",
+ "trace1 = go.Histogram(\n",
+ " y=collision_correct_diffs.tolist(),\n",
+ " opacity=0.75,\n",
+ " name='Correctly predicted',\n",
+ " marker=dict(color='#67a9cf', ),\n",
+ " ybins=dict(size=0.1),\n",
+ ")\n",
+ "trace2 = go.Histogram(\n",
+ " y=collision_underpredicted_diffs.tolist(),\n",
+ " opacity=0.75,\n",
+ " name='Underpredicted',\n",
+ " marker=dict(color='#ef8a62', ),\n",
+ " ybins=dict(size=0.1),\n",
+ ")\n",
"\n",
"layout = go.Layout(\n",
" title='R_high - D_low
Collision Regime',\n",
" barmode='overlay',\n",
- " yaxis=dict(\n",
- " title='z (m AHD)'\n",
- " ),\n",
- " xaxis=dict(\n",
- " title='Count'\n",
- " ),\n",
+ " yaxis=dict(title='z (m AHD)'),\n",
+ " xaxis=dict(title='Count'),\n",
" bargap=0.2,\n",
" bargroupgap=0.1,\n",
- " legend=dict(x=.6, y=1)\n",
- ")\n",
+ " legend=dict(x=.6, y=1))\n",
"\n",
"g_plot_collision = go.FigureWidget(data=[trace2, trace1], layout=layout)\n",
"\n",
"# To output to file\n",
- "img_bytes = pio.write_image(g_plot_collision, 'g_plot_collision.png',format='png', width=600, height=400, scale=5)\n",
+ "img_bytes = pio.write_image(\n",
+ " g_plot_collision,\n",
+ " '02_R_high_D_low_collision.png',\n",
+ " format='png',\n",
+ " width=600,\n",
+ " height=400,\n",
+ " scale=5)\n",
"\n",
"g_plot_collision"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "hidden": true
+ },
+ "source": [
+ "We can see a trend similar to the swash regime, except flipped. A majority of the correctly forecasted collision regimes occur when $R_{high}$ - $D_{low}$ is $>0$, by definition. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "hidden": true
+ },
+ "source": [
+ "### TODO Does dune toe lower?\n",
+ "Is there any patterns that dictate whether the dune toe raises or lowers when subject to collision. Is it just based on the peak $R_{high}$ level, similar to equilibrium theory?\n"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "### Does dune toe lower?\n"
+ "## Relationship between parameters\n",
+ "Let's further investigate the relationship between hydrodynamic and morphodynamic parameters and see if they can tell us any more about how the storm regime may be correctly or incorrectly predicted."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Add functions for adding parameters to the dataframe\n",
+ "We need some additional functions which will add parameters to our dataframe so we can plot them."
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 25,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-12-10T04:55:43.229433Z",
+ "start_time": "2018-12-10T04:55:43.218402Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def add_berm_width(df, df_profiles, df__profile_features):\n",
+ " \"\"\"\n",
+ " Adds a new column to the dataframe, with the prestorm berm width\n",
+ " \"\"\"\n",
+ " # Get x coorindates of dune toe and limit of survey (z=0)\n",
+ " df_profile_end_x = df_profiles.query('profile_type==\"prestorm\"').dropna(\n",
+ " subset=['z']).groupby('site_id').tail(1).reset_index(\n",
+ " ['profile_type', 'x']).x.rename('x_end').to_frame()\n",
+ " df_profile_dune_toe_x = df_profile_features.query(\n",
+ " 'profile_type==\"prestorm\"').dune_toe_x.to_frame()\n",
+ "\n",
+ " # Merge and take the difference to calculate berm width\n",
+ " df_merged = df_profile_end_x.merge(\n",
+ " df_profile_dune_toe_x, left_index=True, right_index=True)\n",
+ " berm_width = (df_merged['x_end'] -\n",
+ " df_merged['dune_toe_x']).rename('berm_width').to_frame()\n",
+ "\n",
+ " # Return the dataframe with the berm_width col merged\n",
+ " return df.merge(berm_width, left_index=True, right_index=True)\n",
+ "\n",
+ "\n",
+ "def add_observed_regime(df, df_observed, new_col='observed_storm_regime'):\n",
+ " \"\"\"\n",
+ " Adds a new column to the dataframe, with the observed storm regime\n",
+ " \"\"\"\n",
+ " return df.merge(\n",
+ " impacts['observed'].storm_regime.rename(new_col).to_frame(),\n",
+ " left_index=True,\n",
+ " right_index=True)\n",
+ "\n",
+ "\n",
+ "def add_mean_slope(df, df_twl):\n",
+ " \"\"\"\n",
+ " Adds a new column to the dataframe with prestorm mean slope\n",
+ " \"\"\"\n",
+ " df_mean_slope = df_twl.groupby('site_id').first().beta.rename(\n",
+ " 'mean_slope').to_frame()\n",
+ " return df.merge(df_mean_slope, left_index=True, right_index=True)\n",
+ "\n",
+ "\n",
+ "def add_prestorm_berm_vol(df, df_impacts_observed):\n",
+ " \"\"\"\n",
+ " Adds a new column to the dataframe with prestorm berm volume\n",
+ " \"\"\"\n",
+ " return df.merge(\n",
+ " df_impacts_observed.prestorm_swash_vol.rename('prestorm_berm_vol').\n",
+ " to_frame(),\n",
+ " left_index=True,\n",
+ " right_index=True)\n",
+ "\n",
+ "\n",
+ "def add_prediction_class(df_impacts, prediction_classes):\n",
+ " \"\"\"\n",
+ " Adds a column which groups site_ids into the predicted and observed storm regime combination\n",
+ " \"\"\"\n",
+ " for prediction_class in prediction_classes:\n",
+ " df_impacts.loc[df_impacts.index.isin(prediction_class['site_ids']),\n",
+ " 'prediction_class'] = prediction_class['class']\n",
+ " return df_impacts"
+ ]
+ },
+ {
+ "cell_type": "markdown",
"metadata": {},
+ "source": [
+ "### Create the dataframe\n",
+ "We need to combine and add data into a single dataframe for comparison purposes."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-12-10T04:55:45.293320Z",
+ "start_time": "2018-12-10T04:55:44.802910Z"
+ }
+ },
"outputs": [],
- "source": []
+ "source": [
+ "# Start with the forecasted impact dataframe\n",
+ "df = impacts['forecasted']['mean_slope_sto06']\n",
+ "\n",
+ "# Add a column which groups site_ids into the predicted and observed storm regime combination\n",
+ "prediction_classes = [\n",
+ " {\n",
+ " 'site_ids': collision_underpredicted_site_ids,\n",
+ " 'class': 'collision_underpredicted'\n",
+ " },\n",
+ " {\n",
+ " 'site_ids': collision_correct_site_ids,\n",
+ " 'class': 'collision_correct'\n",
+ " },\n",
+ " {\n",
+ " 'site_ids': swash_overpredicted_site_ids,\n",
+ " 'class': 'swash_overpredicted'\n",
+ " },\n",
+ " {\n",
+ " 'site_ids': swash_correct_site_ids,\n",
+ " 'class': 'swash_correct'\n",
+ " },\n",
+ "]\n",
+ "df = add_prediction_class(df, prediction_classes)\n",
+ "\n",
+ "# Drop site_ids where we do not have a prediction class (caused by NaNs)\n",
+ "df = df.dropna(subset=['prediction_class'])\n",
+ "\n",
+ "# Add additional parameters\n",
+ "df = add_observed_regime(df, impacts['observed'])\n",
+ "df = add_berm_width(df, df_profiles, df_profile_features)\n",
+ "df = add_mean_slope(df, df_twl=twls['forecasted']['mean_slope_sto06'])\n",
+ "df = add_prestorm_berm_vol(df, df_impacts_observed=impacts['observed'])\n",
+ "df['R_high_dune_toe_diff'] = df['R_high'] - df['dune_toe_z']\n",
+ "df['R_high_dune_toe_ratio'] = df['R_high'] / df['dune_toe_z']"
+ ]
},
{
"cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Create scatter plot matrix of parameter interactions\n",
+ "Plot each hydrodynamic and morphodynamic parameter against each other and see if we can identify any patterns."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
"metadata": {
- "heading_collapsed": true
+ "ExecuteTime": {
+ "end_time": "2018-12-10T05:52:23.384061Z",
+ "start_time": "2018-12-10T05:52:21.345652Z"
+ }
},
+ "outputs": [],
+ "source": [
+ "# Setup colors for different classes\n",
+ "text = df['prediction_class'].tolist()\n",
+ "class_code = {x['class']: n for n, x in enumerate(prediction_classes)}\n",
+ "color_vals = [class_code[cl] for cl in df['prediction_class']]\n",
+ "\n",
+ "# Each prediction class will have its own color\n",
+ "pl_colorscale = [[0.0, '#d7191c'], [0.25, '#d7191c'], [0.25, '#fdae61'],\n",
+ " [0.5, '#fdae61'], [0.5, '#2c7bb6'], [0.75, '#2c7bb6'],\n",
+ " [0.75, '#abd9e9'], [1, '#abd9e9']]\n",
+ "\n",
+ "# Setup plotly scatterplot matrix\n",
+ "trace1 = go.Splom(\n",
+ " dimensions=[\n",
+ " dict(label='dune_toe_z', values=df['dune_toe_z']),\n",
+ " dict(label='R_high', values=df['R_high']),\n",
+ " dict(label='berm_width', values=df['berm_width']),\n",
+ " dict(\n",
+ " label='twl_dune_toe_z_exceedance_hrs',\n",
+ " values=df['twl_dune_toe_z_exceedance_hrs']),\n",
+ " dict(label='R_high_dune_toe_diff', values=df['R_high_dune_toe_diff']),\n",
+ " dict(\n",
+ " label='R_high_dune_toe_ratio', values=df['R_high_dune_toe_ratio']),\n",
+ " dict(label='mean_slope', values=df['mean_slope']),\n",
+ " dict(label='prestorm_berm_vol', values=df['prestorm_berm_vol']),\n",
+ " ],\n",
+ " text=text,\n",
+ " diagonal=dict(visible=False),\n",
+ " showupperhalf=False,\n",
+ " marker=dict(\n",
+ " color=color_vals,\n",
+ " size=2,\n",
+ " colorscale=pl_colorscale,\n",
+ " showscale=False,\n",
+ " line=dict(width=0.1, color='rgb(230,230,230)')))\n",
+ "\n",
+ "axis = dict(showline=True, zeroline=False, gridcolor='#fff', ticklen=4)\n",
+ "\n",
+ "layout = go.Layout(\n",
+ " title='Storm Impact Scatter Plot Matrix',\n",
+ " dragmode='select',\n",
+ " width=800,\n",
+ " height=800,\n",
+ " autosize=False,\n",
+ " hovermode='closest',\n",
+ " plot_bgcolor='rgba(240,240,240, 0.95)',\n",
+ " xaxis1=dict(axis),\n",
+ " xaxis2=dict(axis),\n",
+ " xaxis3=dict(axis),\n",
+ " xaxis4=dict(axis),\n",
+ " xaxis5=dict(axis),\n",
+ " xaxis6=dict(axis),\n",
+ " xaxis7=dict(axis),\n",
+ " xaxis8=dict(axis),\n",
+ " yaxis1=dict(axis),\n",
+ " yaxis2=dict(axis),\n",
+ " yaxis3=dict(axis),\n",
+ " yaxis4=dict(axis),\n",
+ " yaxis5=dict(axis),\n",
+ " yaxis6=dict(axis),\n",
+ " yaxis7=dict(axis),\n",
+ " yaxis8=dict(axis),\n",
+ ")\n",
+ "\n",
+ "# Change font of axis labels\n",
+ "for ax in layout:\n",
+ " if 'xaxis' in ax or 'yaxis' in ax:\n",
+ " layout[ax]['titlefont'] = {'size': 12}\n",
+ "\n",
+ "fig_scatter = go.FigureWidget(data=[trace1], layout=layout)\n",
+ "\n",
+ "# To output to file\n",
+ "img_bytes = pio.write_image(\n",
+ " fig_scatter,\n",
+ " '02_scatter_plot.png',\n",
+ " format='png',\n",
+ " width=1500,\n",
+ " height=1500,\n",
+ " scale=5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
"source": [
- "### What do over predicted and underpredicted profiles look like?"
+ "Jupyter get's a little bit slow when trying to display this plot interactively, so let's output it as an image to view.\n",
+ "![02_scatter_plot.png](02_scatter_plot.png)"
]
},
{
"cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Create parameter confusion matrix\n",
+ "It's a bit hard to see the difference between the different categories, lets do a confusion matrix but with plots showing two variables on each axis. First, list all the different parameters we have available to plot against:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
"metadata": {
- "hidden": true
+ "ExecuteTime": {
+ "end_time": "2018-12-10T05:27:34.225427Z",
+ "start_time": "2018-12-10T05:27:34.218427Z"
+ }
},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['datetime', 'R_high', 'R_low', 'dune_toe_z', 'dune_crest_z',\n",
+ " 'storm_regime', 'twl_dune_toe_z_exceedance_hrs', 'prediction_class',\n",
+ " 'observed_storm_regime', 'berm_width', 'mean_slope',\n",
+ " 'prestorm_berm_vol', 'R_high_dune_toe_diff', 'R_high_dune_toe_ratio'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "Define a function for getting the average beach profile for a number of given site_ids:"
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-12-10T05:29:42.927017Z",
+ "start_time": "2018-12-10T05:29:38.603905Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "This is the format of your plot grid:\n",
+ "[ (1,1) x1,y1 ] [ (1,2) x2,y2 ]\n",
+ "[ (2,1) x3,y3 ] [ (2,2) x4,y4 ]\n",
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "397f170343fe45b4acf9aded46595ac8",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FigureWidget({\n",
+ " 'data': [{'marker': {'color': 'rgb(200,200,200)', 'size': 4},\n",
+ " 'mode': 'marker…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Define which columns we want to plot\n",
+ "x_col = 'prestorm_berm_vol'\n",
+ "y_col = 'R_high_dune_toe_diff'\n",
+ "marker_size = 4\n",
+ "\n",
+ "# Create 2x2 subplot figure confusion matrix\n",
+ "fig = tools.make_subplots(\n",
+ " rows=2,\n",
+ " cols=2,\n",
+ " vertical_spacing=0.09,\n",
+ " subplot_titles=(\n",
+ " 'Predicted Swash',\n",
+ " 'Predicted Collision',\n",
+ " '',\n",
+ " '',\n",
+ " ))\n",
+ "\n",
+ "# Get data for all traces\n",
+ "x_all = df.loc[:, x_col]\n",
+ "y_all = df.loc[:, y_col]\n",
+ "\n",
+ "# Create underlying grey traces of all data, so we can compare each subplot with the all the data.\n",
+ "trace5 = go.Scatter(\n",
+ " mode='markers',\n",
+ " x=x_all,\n",
+ " y=y_all,\n",
+ " showlegend=False,\n",
+ " marker=dict(\n",
+ " color='rgb(200,200,200)',\n",
+ " size=marker_size,\n",
+ " ))\n",
+ "fig.append_trace(trace5, 1, 1)\n",
+ "\n",
+ "trace6 = copy(trace5)\n",
+ "trace6.xaxis = 'x2'\n",
+ "trace6.yaxis = 'y'\n",
+ "fig.append_trace(trace6, 1, 2)\n",
+ "\n",
+ "trace7 = copy(trace5)\n",
+ "trace7.xaxis = 'x'\n",
+ "trace7.yaxis = 'y2'\n",
+ "fig.append_trace(trace7, 2, 1)\n",
+ "\n",
+ "trace8 = copy(trace5)\n",
+ "trace8.xaxis = 'x2'\n",
+ "trace8.yaxis = 'y2'\n",
+ "fig.append_trace(trace8, 2, 2)\n",
+ "\n",
+ "# Add actual data for each subplot\n",
+ "\n",
+ "# Predicted swash, observed collision\n",
+ "trace1 = go.Scatter(\n",
+ " mode='markers',\n",
+ " x=df.loc[df.index.isin(collision_underpredicted_site_ids), x_col],\n",
+ " y=df.loc[df.index.isin(collision_underpredicted_site_ids), y_col],\n",
+ " marker=dict(\n",
+ " color='#fc8d59',\n",
+ " size=marker_size,\n",
+ " line=dict(color='rgb(231,231,231)', width=0.5)))\n",
+ "fig.append_trace(trace1, 2, 1)\n",
+ "\n",
+ "# Predicted collision, observed collision\n",
+ "trace2 = go.Scatter(\n",
+ " mode='markers',\n",
+ " x=df.loc[df.index.isin(collision_correct_site_ids), x_col],\n",
+ " y=df.loc[df.index.isin(collision_correct_site_ids), y_col],\n",
+ " marker=dict(\n",
+ " color='#fc8d59',\n",
+ " size=marker_size,\n",
+ " line=dict(color='rgb(231,231,231)', width=0.5)))\n",
+ "fig.append_trace(trace2, 2, 2)\n",
+ "\n",
+ "# Predicted swash, observed swash\n",
+ "trace3 = go.Scatter(\n",
+ " mode='markers',\n",
+ " x=df.loc[df.index.isin(swash_correct_site_ids), x_col],\n",
+ " y=df.loc[df.index.isin(swash_correct_site_ids), y_col],\n",
+ " marker=dict(\n",
+ " color='#3182bd',\n",
+ " size=marker_size,\n",
+ " line=dict(color='rgb(231,231,231)', width=0.5)))\n",
+ "fig.append_trace(trace3, 1, 1)\n",
+ "\n",
+ "# Predicted collision, observed swash\n",
+ "trace4 = go.Scatter(\n",
+ " mode='markers',\n",
+ " x=df.loc[df.index.isin(swash_overpredicted_site_ids), x_col],\n",
+ " y=df.loc[df.index.isin(swash_overpredicted_site_ids), y_col],\n",
+ " marker=dict(\n",
+ " color='#3182bd',\n",
+ " size=marker_size,\n",
+ " line=dict(color='rgb(231,231,231)', width=0.5)))\n",
+ "fig.append_trace(trace4, 1, 2)\n",
+ "\n",
+ "# Update formatting, titles, sizes etc.\n",
+ "fig['layout']['yaxis1'].update(\n",
+ " title='{}
{}'.format('Observed swash', y_col))\n",
+ "fig['layout']['yaxis3'].update(\n",
+ " title='{}
{}'.format('Observed collision', y_col))\n",
+ "fig['layout']['xaxis3'].update(title=x_col)\n",
+ "fig['layout']['xaxis4'].update(title=x_col)\n",
+ "fig['layout'].update(\n",
+ " height=700, width=900, title='Storm Regime Confusion Matrix')\n",
+ "fig['layout']['showlegend'] = False\n",
+ "fig_to_plot = go.FigureWidget(data=fig.data, layout=fig.layout)\n",
+ "\n",
+ "# To output to file\n",
+ "img_bytes = pio.write_image(\n",
+ " fig_to_plot,\n",
+ " '02_storm_regime_confusion.png',\n",
+ " format='png',\n",
+ " width=600,\n",
+ " height=600,\n",
+ " scale=5)\n",
+ "\n",
+ "fig_to_plot"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-12-09T00:30:34.940581Z",
+ "start_time": "2018-12-09T00:30:34.825247Z"
+ }
+ },
+ "source": [
+ "Plotting `prestorm_berm_vol` vs `R_high_dune_toe_diff` shows there is a difference between observed swash and collision. It appears when the `prestorm_berm_vol` is smaller than 80 m, we will get collision, regardless of whether `R_high_dune_toe_diff` is greater than 0 m. Let's confirm that there it is a vertical line to differentiate between these two regimes."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Create regime predictor which includes berm volume\n",
+ "\n",
+ "The technique for finding the boundary between two clusters is taken from [StackExchange](https://stackoverflow.com/a/22356267)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-12-10T05:37:35.785659Z",
+ "start_time": "2018-12-10T05:37:34.355843Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "09471a2157774aa2b17f5b88c2bfaab4",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FigureWidget({\n",
+ " 'data': [{'marker': {'color': '#3182bd', 'line': {'color': 'rgb(231,231,231)', 'width': 0.5…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Get data\n",
+ "df = df.dropna(subset=['prestorm_berm_vol', 'R_high_dune_toe_diff'])\n",
+ "\n",
+ "swash_x = df.query('observed_storm_regime==\"swash\"').prestorm_berm_vol\n",
+ "swash_y = df.query('observed_storm_regime==\"swash\"').R_high_dune_toe_diff\n",
+ "swash_samples = np.array([[x, y] for x, y in zip(swash_x, swash_y)])\n",
+ "\n",
+ "collision_x = df.query('observed_storm_regime==\"collision\"').prestorm_berm_vol\n",
+ "collision_y = df.query(\n",
+ " 'observed_storm_regime==\"collision\"').R_high_dune_toe_diff\n",
+ "collision_samples = np.array(\n",
+ " [[x, y] for x, y in zip(collision_x, collision_y)])\n",
+ "\n",
+ "# Fit SVM\n",
+ "X = np.concatenate((swash_samples, collision_samples), axis=0)\n",
+ "Y = np.array([0] * swash_x.shape[0] + [1] * collision_x.shape[0])\n",
+ "\n",
+ "C = 1.0 # SVM regularization parameter\n",
+ "clf = svm.SVC(kernel='linear', gamma=0.7, C=C)\n",
+ "clf.fit(X, Y)\n",
+ "\n",
+ "w = clf.coef_[0]\n",
+ "a = -w[0] / w[1]\n",
+ "y_vals = swash_y.tolist() + collision_y.tolist()\n",
+ "yy = np.linspace(min(y_vals), max(y_vals))\n",
+ "xx = (yy + (clf.intercept_[0]) / w[1]) / a\n",
+ "\n",
+ "# Prepare plot\n",
+ "trace_swash = go.Scatter(\n",
+ " x=swash_x,\n",
+ " y=swash_y,\n",
+ " name='Swash',\n",
+ " mode='markers',\n",
+ " marker=dict(\n",
+ " color='#3182bd',\n",
+ " size=marker_size,\n",
+ " line=dict(color='rgb(231,231,231)', width=0.5)))\n",
+ "\n",
+ "trace_collision = go.Scatter(\n",
+ " x=collision_x,\n",
+ " y=collision_y,\n",
+ " name='Collision',\n",
+ " mode='markers',\n",
+ " marker=dict(\n",
+ " color='#fc8d59',\n",
+ " size=marker_size,\n",
+ " line=dict(color='rgb(231,231,231)', width=0.5)))\n",
+ "\n",
+ "trace_split = go.Scatter(\n",
+ " x=xx,\n",
+ " y=yy,\n",
+ " name='Split (y={:.1f}x-{:.1f})'.format(a, (clf.intercept_[0]) / w[1]),\n",
+ ")\n",
+ "\n",
+ "layout = dict(\n",
+ " title='Observed Swash/Collision Regime Split',\n",
+ " xaxis=dict(title='Prestorm berm volume', ),\n",
+ " yaxis=dict(title='R_high - D_low'),\n",
+ " legend=dict(x=.6, y=1.))\n",
+ "\n",
+ "fig_to_plot = go.FigureWidget(\n",
+ " data=[trace_swash, trace_collision, trace_split], layout=layout)\n",
+ "\n",
+ "# To output to file\n",
+ "img_bytes = pio.write_image(\n",
+ " fig_to_plot,\n",
+ " '02_storm_regime_split.png',\n",
+ " format='png',\n",
+ " width=600,\n",
+ " height=600,\n",
+ " scale=5)\n",
+ "\n",
+ "fig_to_plot"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Looking at the plot above, it appears when the `prestorm_berm_vol` is less than 80 m, then we should classify it as collision, even if wave runup does not reach the toe of the dune."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Test new berm vol predictor\n",
+ "\n",
+ "Now lets go back to our predicted forecasts and see if our confusion matrix improves if we adopt this new criteria for differentiating between swash and collision.\n",
+ "\n",
+ "First define a custom function to get colormap for our confusion matrix."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-12-10T02:18:08.658221Z",
+ "start_time": "2018-12-10T02:18:08.653186Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def matplotlib_to_plotly(cmap_name='RdYlGn', pl_entries=255):\n",
+ " \"\"\"\n",
+ " Function to convert matplotlib colorscale to plotly\n",
+ " \"\"\"\n",
+ " cmap = matplotlib.cm.get_cmap(cmap_name)\n",
+ " h = 1.0 / (pl_entries - 1)\n",
+ " pl_colorscale = []\n",
+ "\n",
+ " for k in range(pl_entries):\n",
+ " C = list(map(np.uint8, np.array(cmap(k * h)[:3]) * 255))\n",
+ " pl_colorscale.append([k * h, 'rgb' + str((C[0], C[1], C[2]))])\n",
+ "\n",
+ " return pl_colorscale"
]
},
{
"cell_type": "code",
- "execution_count": 156,
+ "execution_count": 40,
"metadata": {
"ExecuteTime": {
- "end_time": "2018-12-04T23:11:08.853877Z",
- "start_time": "2018-12-04T23:11:08.846876Z"
+ "end_time": "2018-12-10T05:44:27.306253Z",
+ "start_time": "2018-12-10T05:44:25.791799Z"
},
- "hidden": true
+ "code_folding": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "97d8283101e748dd8e2fd7811c97973b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FigureWidget({\n",
+ " 'data': [{'colorscale': [[0.0, 'rgb(165, 0, 38)'], [0.003937007874015748,\n",
+ " …"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from sklearn.metrics import confusion_matrix\n",
+ "\n",
+ "# Create colorscale\n",
+ "rdylgr = matplotlib_to_plotly()\n",
+ "\n",
+ "# Add new column with our new prediction technique.\n",
+ "df.loc[df['prestorm_berm_vol'] * 2.7 - 221.1 <= df['R_high_dune_toe_diff'],\n",
+ " 'new_predicted_storm_regime'] = 'collision'\n",
+ "df.loc[df['prestorm_berm_vol'] * 2.7 - 221.1 > df['R_high_dune_toe_diff'],\n",
+ " 'new_predicted_storm_regime'] = 'swash'\n",
+ "\n",
+ "# Get observed and forecasted regimes, and merge\n",
+ "observed_regimes = df.observed_storm_regime.rename(\n",
+ " 'observed_regime').to_frame()\n",
+ "forecasted_regimes = df.new_predicted_storm_regime.rename(\n",
+ " 'forecasted_regime').to_frame()\n",
+ "df_compare = pd.concat([observed_regimes, forecasted_regimes],\n",
+ " axis='columns',\n",
+ " names=['a', 'b'],\n",
+ " sort=True)\n",
+ "df_compare.dropna(axis='index', inplace=True)\n",
+ "\n",
+ "# Create a confusion matrix based on the observed/forecasted regimes.\n",
+ "# Need to do some flipping and reversing to get it in the correct\n",
+ "# order for the plotly heatmap.\n",
+ "z = confusion_matrix(\n",
+ " df_compare.observed_regime.tolist(),\n",
+ " df_compare.forecasted_regime.tolist(),\n",
+ " labels=['swash', 'collision', 'overwash', 'inundation'])\n",
+ "z = np.flip(z, axis=0)\n",
+ "z_list = list(reversed(z.tolist()))\n",
+ "\n",
+ "# Make incorrect values negative, so they get assigned a different color.\n",
+ "# Better for visualization\n",
+ "z_neg_incorrect = np.flip(np.identity(4), axis=0)\n",
+ "z_neg_incorrect[z_neg_incorrect == 0] = -1\n",
+ "z_neg_incorrect = (z * z_neg_incorrect).tolist()\n",
+ "\n",
+ "# Change the text on the heatmap so it also displays percentages.\n",
+ "z_with_pct = []\n",
+ "for row in z:\n",
+ " new_row = []\n",
+ " for val in row:\n",
+ " new_row.append('{}
({}%)'.format(\n",
+ " val, np.around(val / np.sum(z) * 100, 1)))\n",
+ " z_with_pct.append(new_row)\n",
+ "\n",
+ "# Create the heatmap figure\n",
+ "x = ['swash', 'collision', 'overwash', 'inundation']\n",
+ "y = list(reversed(x))\n",
+ "fig = ff.create_annotated_heatmap(\n",
+ " z_neg_incorrect, x=x, y=y, annotation_text=z_with_pct, colorscale=rdylgr)\n",
+ "heatmap = go.FigureWidget(data=fig.data, layout=fig.layout)\n",
+ "\n",
+ "# Update axis titles\n",
+ "heatmap.layout.xaxis.update(title='Predicted')\n",
+ "heatmap.layout.yaxis.update(title='Observed')\n",
+ "\n",
+ "# Write to file\n",
+ "img_bytes = pio.write_image(\n",
+ " heatmap,\n",
+ " '02_confusion_matrix_berm_vol_predictor.png',\n",
+ " format='png',\n",
+ " width=600,\n",
+ " height=600,\n",
+ " scale=5)\n",
+ "\n",
+ "heatmap"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## TODO Compare predicted and underpredicted profiles"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Define a function for getting the average beach profile for a number of given site_ids:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 224,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-12-10T03:08:49.904157Z",
+ "start_time": "2018-12-10T03:08:49.896136Z"
+ }
},
"outputs": [],
"source": [
@@ -422,41 +1208,46 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "hidden": true
- },
+ "metadata": {},
"source": [
"Now, let's look at whether there is a difference between the average beach profile of correctly forecasted site_ids and incorrectly forecasted site_ids. First, looking at sites where we observed swash regime."
]
},
{
"cell_type": "code",
- "execution_count": 161,
+ "execution_count": 225,
"metadata": {
"ExecuteTime": {
- "end_time": "2018-12-05T02:00:36.853374Z",
- "start_time": "2018-12-05T01:58:21.839366Z"
+ "end_time": "2018-12-10T03:11:20.818875Z",
+ "start_time": "2018-12-10T03:08:49.906163Z"
},
- "code_folding": [],
- "hidden": true
+ "code_folding": []
},
"outputs": [
{
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "03f2e99d20a347f3922a0e6a36f99ccd",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "FigureWidget({\n",
- " 'data': [{'line': {'color': 'rgb(205, 0, 0)', 'width': 2},\n",
- " 'mode': 'lines',\n",
- " …"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\z5189959\\Desktop\\nsw-2016-storm-impact\\.venv\\lib\\site-packages\\pandas\\core\\groupby\\groupby.py:1062: RuntimeWarning:\n",
+ "\n",
+ "Mean of empty slice\n",
+ "\n",
+ "C:\\Users\\z5189959\\Desktop\\nsw-2016-storm-impact\\.venv\\lib\\site-packages\\numpy\\lib\\nanfunctions.py:1545: RuntimeWarning:\n",
+ "\n",
+ "Degrees of freedom <= 0 for slice.\n",
+ "\n"
+ ]
+ },
+ {
+ "ename": "NameError",
+ "evalue": "name 'avg_correct_x' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 41\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 42\u001b[0m trace_correct_mean = go.Scatter(\n\u001b[1;32m---> 43\u001b[1;33m \u001b[0mx\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mavg_correct_x\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 44\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mavg_correct_z\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 45\u001b[0m \u001b[0mopacity\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;31mNameError\u001b[0m: name 'avg_correct_x' is not defined"
+ ]
}
],
"source": [
@@ -563,42 +1354,21 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "hidden": true
- },
+ "metadata": {},
"source": [
"We can see that the difference is pretty minimal. For cases where we predicted collision, but observed swash (overprediction), we see that overpredicted profiles are slightly more concave than correctly predicted sites."
]
},
{
"cell_type": "code",
- "execution_count": 162,
+ "execution_count": null,
"metadata": {
"ExecuteTime": {
- "end_time": "2018-12-05T02:03:38.394415Z",
- "start_time": "2018-12-05T02:00:37.335377Z"
- },
- "hidden": true
- },
- "outputs": [
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "1255bccc024e4690b4b8ff4ccc8e9e35",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "FigureWidget({\n",
- " 'data': [{'line': {'color': 'rgb(205, 0, 0)', 'width': 2},\n",
- " 'mode': 'lines',\n",
- " …"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "end_time": "2018-12-10T03:11:20.824874Z",
+ "start_time": "2018-12-10T03:08:27.623Z"
}
- ],
+ },
+ "outputs": [],
"source": [
"underpredicted = get_avg_profile(collision_underpredicted_site_ids)\n",
"correct = get_avg_profile(collision_correct_site_ids)\n",
@@ -703,15 +1473,14 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "hidden": true
- },
+ "metadata": {},
"source": [
"This plot is a bit more interesting. It shows that we are correctly forecasting collision when the profile is more accreted/convex, but when the profile is more eroded/concave, the water level is underpredicted. Why is this? "
]
}
],
"metadata": {
+ "hide_input": false,
"kernelspec": {
"display_name": "Python 3",
"language": "python",