diff --git a/Makefile b/Makefile index 74643fe..eb12728 100644 --- a/Makefile +++ b/Makefile @@ -118,6 +118,7 @@ impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv ./data/inte --profile-features-csv "./data/interim/profile_features.csv" \ --runup-function "sto06" \ --slope "foreshore" \ + --profile-type "prestorm" \ --output-file "./data/interim/twl_foreshore_slope_sto06.csv" ./data/interim/twl_mean_slope_sto06.csv: ./data/interim/waves.csv ./data/interim/tides.csv ./data/interim/profiles.csv ./data/interim/sites.csv ./data/interim/profile_features.csv @@ -128,6 +129,7 @@ impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv ./data/inte --profile-features-csv "./data/interim/profile_features.csv" \ --runup-function "sto06" \ --slope "mean" \ + --profile-type "prestorm" \ --output-file "./data/interim/twl_mean_slope_sto06.csv" ./data/interim/twl_poststorm_mean_slope_sto06.csv: ./data/interim/waves.csv ./data/interim/tides.csv ./data/interim/profiles.csv ./data/interim/sites.csv ./data/interim/profile_features.csv diff --git a/notebooks/01_exploration.ipynb b/notebooks/01_exploration.ipynb index 07a9649..deb0348 100644 --- a/notebooks/01_exploration.ipynb +++ b/notebooks/01_exploration.ipynb @@ -1001,9 +1001,1012 @@ " g_profiles\n", "])" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Scatter plot" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "ExecuteTime": { + "end_time": "2018-12-07T05:08:12.117885Z", + "start_time": "2018-12-07T05:08:12.078780Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
prestorm_swash_volpoststorm_swash_volswash_vol_changeswash_pct_changeprestorm_dune_face_volpoststorm_dune_face_voldune_face_vol_changedune_face_pct_changestorm_regime
site_id
AVOCAn0001113.909177.983035.610331.2620165.4760166.72960.00000.0000swash
AVOCAn0002106.895967.091339.637337.0803256.4137258.91740.00000.0000swash
AVOCAn000399.048453.656345.162145.5960372.7031373.9198-0.3147-0.0844swash
AVOCAn000474.754329.328045.426260.7674275.1689276.04760.41040.1492swash
AVOCAn000570.596824.107146.489765.8524268.5194263.42627.41962.7631collision
AVOCAn000668.758323.966544.791865.1438202.6770198.23974.79442.3655collision
AVOCAn000775.789527.271548.518064.0168149.8479143.13127.13234.7597collision
AVOCAn000893.310742.496850.813954.4567187.9201187.34592.82971.5058collision
AVOCAn00093.69550.10383.591797.1908NaNNaNNaNNaNNaN
AVOCAs0001NaNNaNNaNNaNNaNNaNNaNNaNNaN
AVOCAs000297.386426.661970.724672.6226NaNNaNNaNNaNNaN
AVOCAs000370.740140.060830.791943.5282NaNNaNNaNNaNNaN
AVOCAs000497.838945.484552.215753.3691NaNNaNNaNNaNNaN
AVOCAs000595.171154.972240.170642.2088NaNNaNNaNNaNNaN
AVOCAs0006112.581867.871844.825239.8157NaNNaNNaNNaNNaN
AVOCAs000765.353144.153721.522832.9331NaNNaNNaNNaNNaN
AVOCAs000852.394044.91527.480314.2770NaNNaNNaNNaNNaN
BILG000119.41777.574611.843160.9913NaNNaNNaNNaNNaN
BILG0002155.699898.169357.434036.8876NaNNaNNaNNaNNaN
BILG000383.521952.805930.553736.581741.146940.60810.00000.0000swash
BILG0004158.6283116.618942.117826.551211.221111.0892-0.0132-0.1179swash
BILG0005212.8478164.004448.431222.7539613.6156606.27665.77380.9410collision
BLUEYS000165.462819.293846.169070.5270130.7447120.54469.56017.3121collision
BLUEYS000250.208410.300939.907479.4836512.0154477.177433.28256.5003collision
BLUEYS000350.630811.168239.462577.9418443.0853414.390124.88705.6167collision
BLUEYS000495.160831.333063.827967.0737287.5805272.426712.96414.5080collision
BLUEYS0005141.064358.254582.809858.7036539.3864520.073212.04702.2335collision
BLUEYS000688.420751.620536.255341.0032271.6036267.19543.60451.3271collision
BOAT000123.851023.5660-0.0264-0.1108NaNNaNNaNNaNNaN
BOAT000237.652414.020923.631662.7624NaNNaNNaNNaNNaN
..............................
TREACH001497.532346.299451.081652.3740508.7400505.78770.42540.0836swash
TREACH001596.832745.196251.636453.3254690.8275683.44581.50860.2184NaN
TREACH0016106.908366.056740.362937.7547508.0014499.63150.33860.0667swash
WAMBE0001132.3413143.4459-9.7255-7.3488665.9898667.59230.04100.0062swash
WAMBE0002151.1833126.984423.954815.8449385.8467386.7284-0.0449-0.0116swash
WAMBE0003154.1788117.944136.242523.5068694.2226700.5105-4.2136-0.6070swash
WAMBE0004137.844976.600761.272544.4503559.5485569.8591-4.4590-0.7969swash
WAMBE0005NaNNaNNaNNaNNaNNaNNaNNaNNaN
WAMBE0006205.8453186.078422.589210.973955.089855.89190.00000.0000swash
WAMBE000780.467435.461445.005955.9307178.1005178.54390.47270.2654swash
WAMBE000888.457440.320048.137454.4187258.7513258.3849-1.2073-0.4666swash
WAMBE000970.915926.174244.741863.0913267.3725258.37209.80413.6668collision
WAMBE001058.660418.041840.618669.2437187.5259161.974825.308713.4961collision
WAMBE001159.241516.316542.925072.4577197.0129175.251221.988211.1608collision
WAMBE001274.418923.023251.395769.0627178.4783168.347510.03865.6246collision
WAMBE001370.496422.754647.741967.7224231.1513195.258135.807215.4908collision
WAMBE001468.089624.185343.904364.480282.426861.260121.171825.6856collision
WAMBE001555.078016.011939.066070.9286NaNNaNNaNNaNNaN
WAMBE001696.768739.822456.946358.8479NaNNaNNaNNaNNaN
WAMBE001735.29878.514026.784775.8801NaNNaNNaNNaNNaN
WAMBE001840.940710.514730.426074.3173NaNNaNNaNNaNNaN
WAMBE001938.28389.215629.068275.9282NaNNaNNaNNaNNaN
WAMBE0020NaNNaNNaNNaNNaNNaNNaNNaNNaN
WAMBE0021NaNNaNNaNNaNNaNNaNNaNNaNNaN
WAMBE00220.55160.28400.267548.5063NaNNaNNaNNaNNaN
WAMBE00233.37610.30203.074191.0554NaNNaNNaNNaNNaN
WAMBE002460.864831.279429.585448.6084NaNNaNNaNNaNNaN
WAMBE002545.105514.602830.502867.6253NaNNaNNaNNaNNaN
WAMBE002632.150212.933519.216759.7716NaNNaNNaNNaNNaN
WAMBE002726.231018.68287.548228.7759NaNNaNNaNNaNNaN
\n", + "

1768 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " prestorm_swash_vol poststorm_swash_vol swash_vol_change \\\n", + "site_id \n", + "AVOCAn0001 113.9091 77.9830 35.6103 \n", + "AVOCAn0002 106.8959 67.0913 39.6373 \n", + "AVOCAn0003 99.0484 53.6563 45.1621 \n", + "AVOCAn0004 74.7543 29.3280 45.4262 \n", + "AVOCAn0005 70.5968 24.1071 46.4897 \n", + "AVOCAn0006 68.7583 23.9665 44.7918 \n", + "AVOCAn0007 75.7895 27.2715 48.5180 \n", + "AVOCAn0008 93.3107 42.4968 50.8139 \n", + "AVOCAn0009 3.6955 0.1038 3.5917 \n", + "AVOCAs0001 NaN NaN NaN \n", + "AVOCAs0002 97.3864 26.6619 70.7246 \n", + "AVOCAs0003 70.7401 40.0608 30.7919 \n", + "AVOCAs0004 97.8389 45.4845 52.2157 \n", + "AVOCAs0005 95.1711 54.9722 40.1706 \n", + "AVOCAs0006 112.5818 67.8718 44.8252 \n", + "AVOCAs0007 65.3531 44.1537 21.5228 \n", + "AVOCAs0008 52.3940 44.9152 7.4803 \n", + "BILG0001 19.4177 7.5746 11.8431 \n", + "BILG0002 155.6998 98.1693 57.4340 \n", + "BILG0003 83.5219 52.8059 30.5537 \n", + "BILG0004 158.6283 116.6189 42.1178 \n", + "BILG0005 212.8478 164.0044 48.4312 \n", + "BLUEYS0001 65.4628 19.2938 46.1690 \n", + "BLUEYS0002 50.2084 10.3009 39.9074 \n", + "BLUEYS0003 50.6308 11.1682 39.4625 \n", + "BLUEYS0004 95.1608 31.3330 63.8279 \n", + "BLUEYS0005 141.0643 58.2545 82.8098 \n", + "BLUEYS0006 88.4207 51.6205 36.2553 \n", + "BOAT0001 23.8510 23.5660 -0.0264 \n", + "BOAT0002 37.6524 14.0209 23.6316 \n", + "... ... ... ... \n", + "TREACH0014 97.5323 46.2994 51.0816 \n", + "TREACH0015 96.8327 45.1962 51.6364 \n", + "TREACH0016 106.9083 66.0567 40.3629 \n", + "WAMBE0001 132.3413 143.4459 -9.7255 \n", + "WAMBE0002 151.1833 126.9844 23.9548 \n", + "WAMBE0003 154.1788 117.9441 36.2425 \n", + "WAMBE0004 137.8449 76.6007 61.2725 \n", + "WAMBE0005 NaN NaN NaN \n", + "WAMBE0006 205.8453 186.0784 22.5892 \n", + "WAMBE0007 80.4674 35.4614 45.0059 \n", + "WAMBE0008 88.4574 40.3200 48.1374 \n", + "WAMBE0009 70.9159 26.1742 44.7418 \n", + "WAMBE0010 58.6604 18.0418 40.6186 \n", + "WAMBE0011 59.2415 16.3165 42.9250 \n", + "WAMBE0012 74.4189 23.0232 51.3957 \n", + "WAMBE0013 70.4964 22.7546 47.7419 \n", + "WAMBE0014 68.0896 24.1853 43.9043 \n", + "WAMBE0015 55.0780 16.0119 39.0660 \n", + "WAMBE0016 96.7687 39.8224 56.9463 \n", + "WAMBE0017 35.2987 8.5140 26.7847 \n", + "WAMBE0018 40.9407 10.5147 30.4260 \n", + "WAMBE0019 38.2838 9.2156 29.0682 \n", + "WAMBE0020 NaN NaN NaN \n", + "WAMBE0021 NaN NaN NaN \n", + "WAMBE0022 0.5516 0.2840 0.2675 \n", + "WAMBE0023 3.3761 0.3020 3.0741 \n", + "WAMBE0024 60.8648 31.2794 29.5854 \n", + "WAMBE0025 45.1055 14.6028 30.5028 \n", + "WAMBE0026 32.1502 12.9335 19.2167 \n", + "WAMBE0027 26.2310 18.6828 7.5482 \n", + "\n", + " swash_pct_change prestorm_dune_face_vol poststorm_dune_face_vol \\\n", + "site_id \n", + "AVOCAn0001 31.2620 165.4760 166.7296 \n", + "AVOCAn0002 37.0803 256.4137 258.9174 \n", + "AVOCAn0003 45.5960 372.7031 373.9198 \n", + "AVOCAn0004 60.7674 275.1689 276.0476 \n", + "AVOCAn0005 65.8524 268.5194 263.4262 \n", + "AVOCAn0006 65.1438 202.6770 198.2397 \n", + "AVOCAn0007 64.0168 149.8479 143.1312 \n", + "AVOCAn0008 54.4567 187.9201 187.3459 \n", + "AVOCAn0009 97.1908 NaN NaN \n", + "AVOCAs0001 NaN NaN NaN \n", + "AVOCAs0002 72.6226 NaN NaN \n", + "AVOCAs0003 43.5282 NaN NaN \n", + "AVOCAs0004 53.3691 NaN NaN \n", + "AVOCAs0005 42.2088 NaN NaN \n", + "AVOCAs0006 39.8157 NaN NaN \n", + "AVOCAs0007 32.9331 NaN NaN \n", + "AVOCAs0008 14.2770 NaN NaN \n", + "BILG0001 60.9913 NaN NaN \n", + "BILG0002 36.8876 NaN NaN \n", + "BILG0003 36.5817 41.1469 40.6081 \n", + "BILG0004 26.5512 11.2211 11.0892 \n", + "BILG0005 22.7539 613.6156 606.2766 \n", + "BLUEYS0001 70.5270 130.7447 120.5446 \n", + "BLUEYS0002 79.4836 512.0154 477.1774 \n", + "BLUEYS0003 77.9418 443.0853 414.3901 \n", + "BLUEYS0004 67.0737 287.5805 272.4267 \n", + "BLUEYS0005 58.7036 539.3864 520.0732 \n", + "BLUEYS0006 41.0032 271.6036 267.1954 \n", + "BOAT0001 -0.1108 NaN NaN \n", + "BOAT0002 62.7624 NaN NaN \n", + "... ... ... ... \n", + "TREACH0014 52.3740 508.7400 505.7877 \n", + "TREACH0015 53.3254 690.8275 683.4458 \n", + "TREACH0016 37.7547 508.0014 499.6315 \n", + "WAMBE0001 -7.3488 665.9898 667.5923 \n", + "WAMBE0002 15.8449 385.8467 386.7284 \n", + "WAMBE0003 23.5068 694.2226 700.5105 \n", + "WAMBE0004 44.4503 559.5485 569.8591 \n", + "WAMBE0005 NaN NaN NaN \n", + "WAMBE0006 10.9739 55.0898 55.8919 \n", + "WAMBE0007 55.9307 178.1005 178.5439 \n", + "WAMBE0008 54.4187 258.7513 258.3849 \n", + "WAMBE0009 63.0913 267.3725 258.3720 \n", + "WAMBE0010 69.2437 187.5259 161.9748 \n", + "WAMBE0011 72.4577 197.0129 175.2512 \n", + "WAMBE0012 69.0627 178.4783 168.3475 \n", + "WAMBE0013 67.7224 231.1513 195.2581 \n", + "WAMBE0014 64.4802 82.4268 61.2601 \n", + "WAMBE0015 70.9286 NaN NaN \n", + "WAMBE0016 58.8479 NaN NaN \n", + "WAMBE0017 75.8801 NaN NaN \n", + "WAMBE0018 74.3173 NaN NaN \n", + "WAMBE0019 75.9282 NaN NaN \n", + "WAMBE0020 NaN NaN NaN \n", + "WAMBE0021 NaN NaN NaN \n", + "WAMBE0022 48.5063 NaN NaN \n", + "WAMBE0023 91.0554 NaN NaN \n", + "WAMBE0024 48.6084 NaN NaN \n", + "WAMBE0025 67.6253 NaN NaN \n", + "WAMBE0026 59.7716 NaN NaN \n", + "WAMBE0027 28.7759 NaN NaN \n", + "\n", + " dune_face_vol_change dune_face_pct_change storm_regime \n", + "site_id \n", + "AVOCAn0001 0.0000 0.0000 swash \n", + "AVOCAn0002 0.0000 0.0000 swash \n", + "AVOCAn0003 -0.3147 -0.0844 swash \n", + "AVOCAn0004 0.4104 0.1492 swash \n", + "AVOCAn0005 7.4196 2.7631 collision \n", + "AVOCAn0006 4.7944 2.3655 collision \n", + "AVOCAn0007 7.1323 4.7597 collision \n", + "AVOCAn0008 2.8297 1.5058 collision \n", + "AVOCAn0009 NaN NaN NaN \n", + "AVOCAs0001 NaN NaN NaN \n", + "AVOCAs0002 NaN NaN NaN \n", + "AVOCAs0003 NaN NaN NaN \n", + "AVOCAs0004 NaN NaN NaN \n", + "AVOCAs0005 NaN NaN NaN \n", + "AVOCAs0006 NaN NaN NaN \n", + "AVOCAs0007 NaN NaN NaN \n", + "AVOCAs0008 NaN NaN NaN \n", + "BILG0001 NaN NaN NaN \n", + "BILG0002 NaN NaN NaN \n", + "BILG0003 0.0000 0.0000 swash \n", + "BILG0004 -0.0132 -0.1179 swash \n", + "BILG0005 5.7738 0.9410 collision \n", + "BLUEYS0001 9.5601 7.3121 collision \n", + "BLUEYS0002 33.2825 6.5003 collision \n", + "BLUEYS0003 24.8870 5.6167 collision \n", + "BLUEYS0004 12.9641 4.5080 collision \n", + "BLUEYS0005 12.0470 2.2335 collision \n", + "BLUEYS0006 3.6045 1.3271 collision \n", + "BOAT0001 NaN NaN NaN \n", + "BOAT0002 NaN NaN NaN \n", + "... ... ... ... \n", + "TREACH0014 0.4254 0.0836 swash \n", + "TREACH0015 1.5086 0.2184 NaN \n", + "TREACH0016 0.3386 0.0667 swash \n", + "WAMBE0001 0.0410 0.0062 swash \n", + "WAMBE0002 -0.0449 -0.0116 swash \n", + "WAMBE0003 -4.2136 -0.6070 swash \n", + "WAMBE0004 -4.4590 -0.7969 swash \n", + "WAMBE0005 NaN NaN NaN \n", + "WAMBE0006 0.0000 0.0000 swash \n", + "WAMBE0007 0.4727 0.2654 swash \n", + "WAMBE0008 -1.2073 -0.4666 swash \n", + "WAMBE0009 9.8041 3.6668 collision \n", + "WAMBE0010 25.3087 13.4961 collision \n", + "WAMBE0011 21.9882 11.1608 collision \n", + "WAMBE0012 10.0386 5.6246 collision \n", + "WAMBE0013 35.8072 15.4908 collision \n", + "WAMBE0014 21.1718 25.6856 collision \n", + "WAMBE0015 NaN NaN NaN \n", + "WAMBE0016 NaN NaN NaN \n", + "WAMBE0017 NaN NaN NaN \n", + "WAMBE0018 NaN NaN NaN \n", + "WAMBE0019 NaN NaN NaN \n", + "WAMBE0020 NaN NaN NaN \n", + "WAMBE0021 NaN NaN NaN \n", + "WAMBE0022 NaN NaN NaN \n", + "WAMBE0023 NaN NaN NaN \n", + "WAMBE0024 NaN NaN NaN \n", + "WAMBE0025 NaN NaN NaN \n", + "WAMBE0026 NaN NaN NaN \n", + "WAMBE0027 NaN NaN NaN \n", + "\n", + "[1768 rows x 9 columns]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] } ], "metadata": { + "hide_input": false, "kernelspec": { "display_name": "Python 3", "language": "python", @@ -1033,9 +2036,14 @@ "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, - "toc_position": {}, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "275.797px" + }, "toc_section_display": true, - "toc_window_display": false + "toc_window_display": true }, "varInspector": { "cols": { diff --git a/notebooks/03_dune_to_vs_runup.ipynb b/notebooks/03_dune_to_vs_runup.ipynb index b99ab01..86154e4 100644 --- a/notebooks/03_dune_to_vs_runup.ipynb +++ b/notebooks/03_dune_to_vs_runup.ipynb @@ -4,17 +4,27 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Investigate how dune toe compares to R_high" + "## Setup " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "### Import packages" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": { "ExecuteTime": { - "end_time": "2018-12-03T23:04:57.331037Z", - "start_time": "2018-12-03T23:04:57.006071Z" - } + "end_time": "2018-12-10T04:02:38.872624Z", + "start_time": "2018-12-10T04:02:38.448908Z" + }, + "hidden": true }, "outputs": [], "source": [ @@ -25,12 +35,13 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "metadata": { "ExecuteTime": { - "end_time": "2018-12-03T23:04:58.749827Z", - "start_time": "2018-12-03T23:04:57.333943Z" - } + "end_time": "2018-12-10T04:03:27.147221Z", + "start_time": "2018-12-10T04:03:27.141204Z" + }, + "hidden": true }, "outputs": [], "source": [ @@ -39,18 +50,29 @@ "import pandas as pd\n", "import numpy as np\n", "import os\n", - "\n", + "import matplotlib\n", "import plotly\n", "import plotly.graph_objs as go\n", "import plotly.plotly as py\n", "import plotly.tools as tls\n", "import plotly.figure_factory as ff\n", - "import plotly.io as pio" + "import plotly.io as pio\n", + "from plotly import tools\n", + "\n", + "from copy import copy\n", + "import scipy\n", + "from sklearn import svm\n", + "\n", + "# Disable numpy warnings\n", + "import warnings\n", + "warnings.simplefilter(action=\"ignore\", category=FutureWarning)" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "heading_collapsed": true + }, "source": [ "### Load data\n", "Load data from the `./data/interim/` folder and parse into `pandas` dataframes." @@ -58,35 +80,20 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "metadata": { "ExecuteTime": { - "end_time": "2018-12-03T23:05:05.800496Z", - "start_time": "2018-12-03T23:04:58.751721Z" - } + "end_time": "2018-12-10T04:03:40.638982Z", + "start_time": "2018-12-10T04:03:31.765531Z" + }, + "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Importing profiles.csv\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\z5189959\\Desktop\\nsw-2016-storm-impact\\.venv\\lib\\site-packages\\numpy\\lib\\arraysetops.py:522: FutureWarning:\n", - "\n", - "elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "Importing profiles.csv\n", "Importing profile_features.csv\n", "Importing impacts_forecasted_foreshore_slope_sto06.csv\n", "Importing impacts_forecasted_mean_slope_sto06.csv\n", @@ -125,31 +132,49 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "heading_collapsed": true + }, "source": [ - "### Compare predicted R_high with D_low\n", - "Let's see what the distribution of R_high is compared with D_low. How far off are the predicted water levels compared with the dune toes?" + "## Difference between $R_{high}$ and $D_{low}$\n", + "Since the Storm Impact Regime is so dependant on the predicted $R_{high}$ and obsereved $D_{low}$ levels, let's investigate the difference between these two variables in more detail.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "### Gather data\n", + "\n", + "First, let's split the `site_ids` by whether we observed swash or collision and by whether we predicted swash or collision. We want to identify if there are any difference between these four groups." ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 10, "metadata": { "ExecuteTime": { - "end_time": "2018-12-04T02:20:58.446500Z", - "start_time": "2018-12-04T02:20:58.439480Z" - } + "end_time": "2018-12-10T04:05:15.741391Z", + "start_time": "2018-12-10T04:05:15.734352Z" + }, + "hidden": true }, "outputs": [], "source": [ - "def get_site_ids(df_forecasted, df_observed, forecasted_regime, observed_regime):\n", + "def get_site_ids(df_forecasted, df_observed, forecasted_regime,\n", + " observed_regime):\n", " \"\"\"\n", " Returns list of site_ids which match the given forecasted and observed regime\n", " \"\"\"\n", - " set1 = set(df_forecasted.query(\"storm_regime == '{}'\".format(\n", - " forecasted_regime)).index.get_level_values('site_id'))\n", - " set2 = set(df_observed.query(\"storm_regime == '{}'\".format(\n", - " observed_regime)).index.get_level_values('site_id'))\n", + " set1 = set(\n", + " df_forecasted.query(\"storm_regime == '{}'\".format(forecasted_regime)).\n", + " index.get_level_values('site_id'))\n", + " set2 = set(\n", + " df_observed.query(\"storm_regime == '{}'\".format(observed_regime)).\n", + " index.get_level_values('site_id'))\n", " return sorted(list(set1.intersection(set2)))\n", "\n", "\n", @@ -166,23 +191,104 @@ " # Join into one dataframe\n", " df_twl_toes = pd.concat([df_toes, df_R_highs], axis=1, sort=True)\n", " df_twl_toes['diff'] = df_twl_toes['R_high'] - df_twl_toes['dune_toe_z']\n", - " return df_twl_toes['diff']\n" + " return df_twl_toes['diff']" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 15, "metadata": { "ExecuteTime": { - "end_time": "2018-12-04T03:55:51.858020Z", - "start_time": "2018-12-04T03:55:50.879155Z" - } + "end_time": "2018-12-10T04:13:00.035545Z", + "start_time": "2018-12-10T04:12:59.176352Z" + }, + "hidden": true + }, + "outputs": [], + "source": [ + "# Identify sites where swash regime was correctly or overpredicted\n", + "\n", + "swash_overpredicted_site_ids = get_site_ids(\n", + " df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n", + " df_observed=impacts['observed'],\n", + " forecasted_regime='collision',\n", + " observed_regime='swash')\n", + "swash_overpredicted_diffs = get_R_high_D_low_diff(\n", + " site_ids=swash_overpredicted_site_ids,\n", + " df_profile_features=df_profile_features,\n", + " df_twls=twls['forecasted']['mean_slope_sto06'])\n", + "\n", + "swash_correct_site_ids = get_site_ids(\n", + " df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n", + " df_observed=impacts['observed'],\n", + " forecasted_regime='swash',\n", + " observed_regime='swash')\n", + "swash_correct_diffs = get_R_high_D_low_diff(\n", + " site_ids=swash_correct_site_ids,\n", + " df_profile_features=df_profile_features,\n", + " df_twls=twls['forecasted']['mean_slope_sto06'])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "ExecuteTime": { + "end_time": "2018-12-10T04:12:58.434634Z", + "start_time": "2018-12-10T04:12:57.096839Z" + }, + "hidden": true + }, + "outputs": [], + "source": [ + "# Identify sites where collision regime was correctly or underpredicted\n", + "\n", + "collision_underpredicted_site_ids = get_site_ids(\n", + " df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n", + " df_observed=impacts['observed'],\n", + " forecasted_regime='swash',\n", + " observed_regime='collision')\n", + "collision_underpredicted_diffs = get_R_high_D_low_diff(\n", + " site_ids=collision_underpredicted_site_ids,\n", + " df_profile_features=df_profile_features,\n", + " df_twls=twls['forecasted']['mean_slope_sto06'])\n", + "\n", + "collision_correct_site_ids = get_site_ids(\n", + " df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n", + " df_observed=impacts['observed'],\n", + " forecasted_regime='collision',\n", + " observed_regime='collision')\n", + "collision_correct_diffs = get_R_high_D_low_diff(\n", + " site_ids=collision_correct_site_ids,\n", + " df_profile_features=df_profile_features,\n", + " df_twls=twls['forecasted']['mean_slope_sto06'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "### Plot difference in $R_{high}$ and $D_{low}$ for swash and collision regimes\n", + "What does the distribution of elevations look like for when we observe swash and collision regimes? Are there any difference between correctly and incorrectly predicted swash regime impacts?" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2018-12-10T04:06:02.634355Z", + "start_time": "2018-12-10T04:05:42.644585Z" + }, + "hidden": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "94883b85733444528fe8a73379ce4611", + "model_id": "f53f0ffc577b406ab56c1357c1145683", "version_major": 2, "version_minor": 0 }, @@ -198,79 +304,70 @@ } ], "source": [ - "swash_overpredicted_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n", - " df_observed=impacts['observed'],\n", - " forecasted_regime='collision',\n", - " observed_regime='swash')\n", - "swash_overpredicted_diffs = get_R_high_D_low_diff(site_ids=swash_overpredicted_site_ids,\n", - " df_profile_features=df_profile_features,\n", - " df_twls=twls['forecasted']['mean_slope_sto06'])\n", - "\n", - "swash_correct_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n", - " df_observed=impacts['observed'],\n", - " forecasted_regime='swash',\n", - " observed_regime='swash')\n", - "swash_correct_diffs = get_R_high_D_low_diff(site_ids=swash_correct_site_ids,\n", - " df_profile_features=df_profile_features,\n", - " df_twls=twls['forecasted']['mean_slope_sto06'])\n", - "\n", - "\n", - "trace1 = go.Histogram(y=swash_correct_diffs.tolist(),\n", - " opacity=0.75,\n", - " name='Correctly predicted',\n", - " marker=dict(\n", - " color='#67a9cf',\n", - " ),\n", - " ybins=dict(\n", - " size=0.1\n", - "),)\n", - "trace2 = go.Histogram(y=swash_overpredicted_diffs.tolist(),\n", - " opacity=0.75,\n", - " name='Overpredicted',\n", - " marker=dict(\n", - " color='#ef8a62',\n", - "),\n", - " ybins=dict(\n", - " size=0.1\n", - "),)\n", + "trace1 = go.Histogram(\n", + " y=swash_correct_diffs.tolist(),\n", + " opacity=0.75,\n", + " name='Correctly predicted',\n", + " marker=dict(color='#67a9cf', ),\n", + " ybins=dict(size=0.1),\n", + ")\n", + "trace2 = go.Histogram(\n", + " y=swash_overpredicted_diffs.tolist(),\n", + " opacity=0.75,\n", + " name='Overpredicted',\n", + " marker=dict(color='#ef8a62', ),\n", + " ybins=dict(size=0.1),\n", + ")\n", "\n", "layout = go.Layout(\n", " title='R_high - D_low
Swash Regime',\n", " barmode='overlay',\n", - " yaxis=dict(\n", - " title='z (m AHD)'\n", - " ),\n", - " xaxis=dict(\n", - " title='Count'\n", - " ),\n", + " yaxis=dict(title='z (m AHD)'),\n", + " xaxis=dict(title='Count'),\n", " bargap=0.2,\n", " bargroupgap=0.1,\n", - " legend=dict(x=.6, y=1)\n", - ")\n", + " legend=dict(x=.6, y=1))\n", "\n", "g_plot_swash = go.FigureWidget(data=[trace2, trace1], layout=layout)\n", "\n", "# To output to file\n", - "img_bytes = pio.write_image(g_plot_swash, 'g_plot_swash.png',format='png', width=600, height=400, scale=5)\n", + "img_bytes = pio.write_image(\n", + " g_plot_swash,\n", + " '02_R_high_D_low_swash.png',\n", + " format='png',\n", + " width=600,\n", + " height=400,\n", + " scale=5)\n", "\n", - "g_plot_swash\n", - "\n" + "g_plot_swash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "The plot above shows that when $R_{high}$ - $D_{low}$ is $<0$, swash is correctly predicted. This is by definition, so is not surprising. The biggest occurance of $R_{high}$ - $D_{low}$ is slightly below $0$, so it appears that we have correctly predicted a majority of the observed swash regime events. \n", + "\n", + "Let's do the same thing, now considering `site_ids` where we have observed collision." ] }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 16, "metadata": { "ExecuteTime": { - "end_time": "2018-12-04T04:10:47.339268Z", - "start_time": "2018-12-04T04:10:45.796887Z" - } + "end_time": "2018-12-10T04:13:03.703119Z", + "start_time": "2018-12-10T04:13:03.463485Z" + }, + "hidden": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3933da9295fe446f9413bca8842100c2", + "model_id": "4ec08bf2ea6f482ea3c52aa3348a05e2", "version_major": 2, "version_minor": 0 }, @@ -286,105 +383,794 @@ } ], "source": [ - "collision_underpredicted_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n", - " df_observed=impacts['observed'],\n", - " forecasted_regime='swash',\n", - " observed_regime='collision')\n", - "collision_underpredicted_diffs = get_R_high_D_low_diff(site_ids=collision_underpredicted_site_ids,\n", - " df_profile_features=df_profile_features,\n", - " df_twls=twls['forecasted']['mean_slope_sto06'])\n", - "\n", - "collision_correct_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],\n", - " df_observed=impacts['observed'],\n", - " forecasted_regime='collision',\n", - " observed_regime='collision')\n", - "collision_correct_diffs = get_R_high_D_low_diff(site_ids=collision_correct_site_ids,\n", - " df_profile_features=df_profile_features,\n", - " df_twls=twls['forecasted']['mean_slope_sto06'])\n", - "\n", - "\n", - "trace1 = go.Histogram(y=collision_correct_diffs.tolist(),\n", - " opacity=0.75,\n", - " name='Correctly predicted',\n", - " marker=dict(\n", - " color='#67a9cf',\n", - " ),\n", - " ybins=dict(\n", - " size=0.1\n", - "),)\n", - "trace2 = go.Histogram(y=collision_underpredicted_diffs.tolist(),\n", - " opacity=0.75,\n", - " name='Underpredicted',\n", - " marker=dict(\n", - " color='#ef8a62',\n", - " ),\n", - " ybins=dict(\n", - " size=0.1\n", - "),)\n", + "trace1 = go.Histogram(\n", + " y=collision_correct_diffs.tolist(),\n", + " opacity=0.75,\n", + " name='Correctly predicted',\n", + " marker=dict(color='#67a9cf', ),\n", + " ybins=dict(size=0.1),\n", + ")\n", + "trace2 = go.Histogram(\n", + " y=collision_underpredicted_diffs.tolist(),\n", + " opacity=0.75,\n", + " name='Underpredicted',\n", + " marker=dict(color='#ef8a62', ),\n", + " ybins=dict(size=0.1),\n", + ")\n", "\n", "layout = go.Layout(\n", " title='R_high - D_low
Collision Regime',\n", " barmode='overlay',\n", - " yaxis=dict(\n", - " title='z (m AHD)'\n", - " ),\n", - " xaxis=dict(\n", - " title='Count'\n", - " ),\n", + " yaxis=dict(title='z (m AHD)'),\n", + " xaxis=dict(title='Count'),\n", " bargap=0.2,\n", " bargroupgap=0.1,\n", - " legend=dict(x=.6, y=1)\n", - ")\n", + " legend=dict(x=.6, y=1))\n", "\n", "g_plot_collision = go.FigureWidget(data=[trace2, trace1], layout=layout)\n", "\n", "# To output to file\n", - "img_bytes = pio.write_image(g_plot_collision, 'g_plot_collision.png',format='png', width=600, height=400, scale=5)\n", + "img_bytes = pio.write_image(\n", + " g_plot_collision,\n", + " '02_R_high_D_low_collision.png',\n", + " format='png',\n", + " width=600,\n", + " height=400,\n", + " scale=5)\n", "\n", "g_plot_collision" ] }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "We can see a trend similar to the swash regime, except flipped. A majority of the correctly forecasted collision regimes occur when $R_{high}$ - $D_{low}$ is $>0$, by definition. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "### TODO Does dune toe lower?\n", + "Is there any patterns that dictate whether the dune toe raises or lowers when subject to collision. Is it just based on the peak $R_{high}$ level, similar to equilibrium theory?\n" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Does dune toe lower?\n" + "## Relationship between parameters\n", + "Let's further investigate the relationship between hydrodynamic and morphodynamic parameters and see if they can tell us any more about how the storm regime may be correctly or incorrectly predicted." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add functions for adding parameters to the dataframe\n", + "We need some additional functions which will add parameters to our dataframe so we can plot them." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, + "metadata": { + "ExecuteTime": { + "end_time": "2018-12-10T04:55:43.229433Z", + "start_time": "2018-12-10T04:55:43.218402Z" + } + }, + "outputs": [], + "source": [ + "def add_berm_width(df, df_profiles, df__profile_features):\n", + " \"\"\"\n", + " Adds a new column to the dataframe, with the prestorm berm width\n", + " \"\"\"\n", + " # Get x coorindates of dune toe and limit of survey (z=0)\n", + " df_profile_end_x = df_profiles.query('profile_type==\"prestorm\"').dropna(\n", + " subset=['z']).groupby('site_id').tail(1).reset_index(\n", + " ['profile_type', 'x']).x.rename('x_end').to_frame()\n", + " df_profile_dune_toe_x = df_profile_features.query(\n", + " 'profile_type==\"prestorm\"').dune_toe_x.to_frame()\n", + "\n", + " # Merge and take the difference to calculate berm width\n", + " df_merged = df_profile_end_x.merge(\n", + " df_profile_dune_toe_x, left_index=True, right_index=True)\n", + " berm_width = (df_merged['x_end'] -\n", + " df_merged['dune_toe_x']).rename('berm_width').to_frame()\n", + "\n", + " # Return the dataframe with the berm_width col merged\n", + " return df.merge(berm_width, left_index=True, right_index=True)\n", + "\n", + "\n", + "def add_observed_regime(df, df_observed, new_col='observed_storm_regime'):\n", + " \"\"\"\n", + " Adds a new column to the dataframe, with the observed storm regime\n", + " \"\"\"\n", + " return df.merge(\n", + " impacts['observed'].storm_regime.rename(new_col).to_frame(),\n", + " left_index=True,\n", + " right_index=True)\n", + "\n", + "\n", + "def add_mean_slope(df, df_twl):\n", + " \"\"\"\n", + " Adds a new column to the dataframe with prestorm mean slope\n", + " \"\"\"\n", + " df_mean_slope = df_twl.groupby('site_id').first().beta.rename(\n", + " 'mean_slope').to_frame()\n", + " return df.merge(df_mean_slope, left_index=True, right_index=True)\n", + "\n", + "\n", + "def add_prestorm_berm_vol(df, df_impacts_observed):\n", + " \"\"\"\n", + " Adds a new column to the dataframe with prestorm berm volume\n", + " \"\"\"\n", + " return df.merge(\n", + " df_impacts_observed.prestorm_swash_vol.rename('prestorm_berm_vol').\n", + " to_frame(),\n", + " left_index=True,\n", + " right_index=True)\n", + "\n", + "\n", + "def add_prediction_class(df_impacts, prediction_classes):\n", + " \"\"\"\n", + " Adds a column which groups site_ids into the predicted and observed storm regime combination\n", + " \"\"\"\n", + " for prediction_class in prediction_classes:\n", + " df_impacts.loc[df_impacts.index.isin(prediction_class['site_ids']),\n", + " 'prediction_class'] = prediction_class['class']\n", + " return df_impacts" + ] + }, + { + "cell_type": "markdown", "metadata": {}, + "source": [ + "### Create the dataframe\n", + "We need to combine and add data into a single dataframe for comparison purposes." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "ExecuteTime": { + "end_time": "2018-12-10T04:55:45.293320Z", + "start_time": "2018-12-10T04:55:44.802910Z" + } + }, "outputs": [], - "source": [] + "source": [ + "# Start with the forecasted impact dataframe\n", + "df = impacts['forecasted']['mean_slope_sto06']\n", + "\n", + "# Add a column which groups site_ids into the predicted and observed storm regime combination\n", + "prediction_classes = [\n", + " {\n", + " 'site_ids': collision_underpredicted_site_ids,\n", + " 'class': 'collision_underpredicted'\n", + " },\n", + " {\n", + " 'site_ids': collision_correct_site_ids,\n", + " 'class': 'collision_correct'\n", + " },\n", + " {\n", + " 'site_ids': swash_overpredicted_site_ids,\n", + " 'class': 'swash_overpredicted'\n", + " },\n", + " {\n", + " 'site_ids': swash_correct_site_ids,\n", + " 'class': 'swash_correct'\n", + " },\n", + "]\n", + "df = add_prediction_class(df, prediction_classes)\n", + "\n", + "# Drop site_ids where we do not have a prediction class (caused by NaNs)\n", + "df = df.dropna(subset=['prediction_class'])\n", + "\n", + "# Add additional parameters\n", + "df = add_observed_regime(df, impacts['observed'])\n", + "df = add_berm_width(df, df_profiles, df_profile_features)\n", + "df = add_mean_slope(df, df_twl=twls['forecasted']['mean_slope_sto06'])\n", + "df = add_prestorm_berm_vol(df, df_impacts_observed=impacts['observed'])\n", + "df['R_high_dune_toe_diff'] = df['R_high'] - df['dune_toe_z']\n", + "df['R_high_dune_toe_ratio'] = df['R_high'] / df['dune_toe_z']" + ] }, { "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create scatter plot matrix of parameter interactions\n", + "Plot each hydrodynamic and morphodynamic parameter against each other and see if we can identify any patterns." + ] + }, + { + "cell_type": "code", + "execution_count": 46, "metadata": { - "heading_collapsed": true + "ExecuteTime": { + "end_time": "2018-12-10T05:52:23.384061Z", + "start_time": "2018-12-10T05:52:21.345652Z" + } }, + "outputs": [], + "source": [ + "# Setup colors for different classes\n", + "text = df['prediction_class'].tolist()\n", + "class_code = {x['class']: n for n, x in enumerate(prediction_classes)}\n", + "color_vals = [class_code[cl] for cl in df['prediction_class']]\n", + "\n", + "# Each prediction class will have its own color\n", + "pl_colorscale = [[0.0, '#d7191c'], [0.25, '#d7191c'], [0.25, '#fdae61'],\n", + " [0.5, '#fdae61'], [0.5, '#2c7bb6'], [0.75, '#2c7bb6'],\n", + " [0.75, '#abd9e9'], [1, '#abd9e9']]\n", + "\n", + "# Setup plotly scatterplot matrix\n", + "trace1 = go.Splom(\n", + " dimensions=[\n", + " dict(label='dune_toe_z', values=df['dune_toe_z']),\n", + " dict(label='R_high', values=df['R_high']),\n", + " dict(label='berm_width', values=df['berm_width']),\n", + " dict(\n", + " label='twl_dune_toe_z_exceedance_hrs',\n", + " values=df['twl_dune_toe_z_exceedance_hrs']),\n", + " dict(label='R_high_dune_toe_diff', values=df['R_high_dune_toe_diff']),\n", + " dict(\n", + " label='R_high_dune_toe_ratio', values=df['R_high_dune_toe_ratio']),\n", + " dict(label='mean_slope', values=df['mean_slope']),\n", + " dict(label='prestorm_berm_vol', values=df['prestorm_berm_vol']),\n", + " ],\n", + " text=text,\n", + " diagonal=dict(visible=False),\n", + " showupperhalf=False,\n", + " marker=dict(\n", + " color=color_vals,\n", + " size=2,\n", + " colorscale=pl_colorscale,\n", + " showscale=False,\n", + " line=dict(width=0.1, color='rgb(230,230,230)')))\n", + "\n", + "axis = dict(showline=True, zeroline=False, gridcolor='#fff', ticklen=4)\n", + "\n", + "layout = go.Layout(\n", + " title='Storm Impact Scatter Plot Matrix',\n", + " dragmode='select',\n", + " width=800,\n", + " height=800,\n", + " autosize=False,\n", + " hovermode='closest',\n", + " plot_bgcolor='rgba(240,240,240, 0.95)',\n", + " xaxis1=dict(axis),\n", + " xaxis2=dict(axis),\n", + " xaxis3=dict(axis),\n", + " xaxis4=dict(axis),\n", + " xaxis5=dict(axis),\n", + " xaxis6=dict(axis),\n", + " xaxis7=dict(axis),\n", + " xaxis8=dict(axis),\n", + " yaxis1=dict(axis),\n", + " yaxis2=dict(axis),\n", + " yaxis3=dict(axis),\n", + " yaxis4=dict(axis),\n", + " yaxis5=dict(axis),\n", + " yaxis6=dict(axis),\n", + " yaxis7=dict(axis),\n", + " yaxis8=dict(axis),\n", + ")\n", + "\n", + "# Change font of axis labels\n", + "for ax in layout:\n", + " if 'xaxis' in ax or 'yaxis' in ax:\n", + " layout[ax]['titlefont'] = {'size': 12}\n", + "\n", + "fig_scatter = go.FigureWidget(data=[trace1], layout=layout)\n", + "\n", + "# To output to file\n", + "img_bytes = pio.write_image(\n", + " fig_scatter,\n", + " '02_scatter_plot.png',\n", + " format='png',\n", + " width=1500,\n", + " height=1500,\n", + " scale=5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, "source": [ - "### What do over predicted and underpredicted profiles look like?" + "Jupyter get's a little bit slow when trying to display this plot interactively, so let's output it as an image to view.\n", + "![02_scatter_plot.png](02_scatter_plot.png)" ] }, { "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create parameter confusion matrix\n", + "It's a bit hard to see the difference between the different categories, lets do a confusion matrix but with plots showing two variables on each axis. First, list all the different parameters we have available to plot against:" + ] + }, + { + "cell_type": "code", + "execution_count": 33, "metadata": { - "hidden": true + "ExecuteTime": { + "end_time": "2018-12-10T05:27:34.225427Z", + "start_time": "2018-12-10T05:27:34.218427Z" + } }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['datetime', 'R_high', 'R_low', 'dune_toe_z', 'dune_crest_z',\n", + " 'storm_regime', 'twl_dune_toe_z_exceedance_hrs', 'prediction_class',\n", + " 'observed_storm_regime', 'berm_width', 'mean_slope',\n", + " 'prestorm_berm_vol', 'R_high_dune_toe_diff', 'R_high_dune_toe_ratio'],\n", + " dtype='object')" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "Define a function for getting the average beach profile for a number of given site_ids:" + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "ExecuteTime": { + "end_time": "2018-12-10T05:29:42.927017Z", + "start_time": "2018-12-10T05:29:38.603905Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "This is the format of your plot grid:\n", + "[ (1,1) x1,y1 ] [ (1,2) x2,y2 ]\n", + "[ (2,1) x3,y3 ] [ (2,2) x4,y4 ]\n", + "\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "397f170343fe45b4acf9aded46595ac8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FigureWidget({\n", + " 'data': [{'marker': {'color': 'rgb(200,200,200)', 'size': 4},\n", + " 'mode': 'marker…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Define which columns we want to plot\n", + "x_col = 'prestorm_berm_vol'\n", + "y_col = 'R_high_dune_toe_diff'\n", + "marker_size = 4\n", + "\n", + "# Create 2x2 subplot figure confusion matrix\n", + "fig = tools.make_subplots(\n", + " rows=2,\n", + " cols=2,\n", + " vertical_spacing=0.09,\n", + " subplot_titles=(\n", + " 'Predicted Swash',\n", + " 'Predicted Collision',\n", + " '',\n", + " '',\n", + " ))\n", + "\n", + "# Get data for all traces\n", + "x_all = df.loc[:, x_col]\n", + "y_all = df.loc[:, y_col]\n", + "\n", + "# Create underlying grey traces of all data, so we can compare each subplot with the all the data.\n", + "trace5 = go.Scatter(\n", + " mode='markers',\n", + " x=x_all,\n", + " y=y_all,\n", + " showlegend=False,\n", + " marker=dict(\n", + " color='rgb(200,200,200)',\n", + " size=marker_size,\n", + " ))\n", + "fig.append_trace(trace5, 1, 1)\n", + "\n", + "trace6 = copy(trace5)\n", + "trace6.xaxis = 'x2'\n", + "trace6.yaxis = 'y'\n", + "fig.append_trace(trace6, 1, 2)\n", + "\n", + "trace7 = copy(trace5)\n", + "trace7.xaxis = 'x'\n", + "trace7.yaxis = 'y2'\n", + "fig.append_trace(trace7, 2, 1)\n", + "\n", + "trace8 = copy(trace5)\n", + "trace8.xaxis = 'x2'\n", + "trace8.yaxis = 'y2'\n", + "fig.append_trace(trace8, 2, 2)\n", + "\n", + "# Add actual data for each subplot\n", + "\n", + "# Predicted swash, observed collision\n", + "trace1 = go.Scatter(\n", + " mode='markers',\n", + " x=df.loc[df.index.isin(collision_underpredicted_site_ids), x_col],\n", + " y=df.loc[df.index.isin(collision_underpredicted_site_ids), y_col],\n", + " marker=dict(\n", + " color='#fc8d59',\n", + " size=marker_size,\n", + " line=dict(color='rgb(231,231,231)', width=0.5)))\n", + "fig.append_trace(trace1, 2, 1)\n", + "\n", + "# Predicted collision, observed collision\n", + "trace2 = go.Scatter(\n", + " mode='markers',\n", + " x=df.loc[df.index.isin(collision_correct_site_ids), x_col],\n", + " y=df.loc[df.index.isin(collision_correct_site_ids), y_col],\n", + " marker=dict(\n", + " color='#fc8d59',\n", + " size=marker_size,\n", + " line=dict(color='rgb(231,231,231)', width=0.5)))\n", + "fig.append_trace(trace2, 2, 2)\n", + "\n", + "# Predicted swash, observed swash\n", + "trace3 = go.Scatter(\n", + " mode='markers',\n", + " x=df.loc[df.index.isin(swash_correct_site_ids), x_col],\n", + " y=df.loc[df.index.isin(swash_correct_site_ids), y_col],\n", + " marker=dict(\n", + " color='#3182bd',\n", + " size=marker_size,\n", + " line=dict(color='rgb(231,231,231)', width=0.5)))\n", + "fig.append_trace(trace3, 1, 1)\n", + "\n", + "# Predicted collision, observed swash\n", + "trace4 = go.Scatter(\n", + " mode='markers',\n", + " x=df.loc[df.index.isin(swash_overpredicted_site_ids), x_col],\n", + " y=df.loc[df.index.isin(swash_overpredicted_site_ids), y_col],\n", + " marker=dict(\n", + " color='#3182bd',\n", + " size=marker_size,\n", + " line=dict(color='rgb(231,231,231)', width=0.5)))\n", + "fig.append_trace(trace4, 1, 2)\n", + "\n", + "# Update formatting, titles, sizes etc.\n", + "fig['layout']['yaxis1'].update(\n", + " title='{}
{}'.format('Observed swash', y_col))\n", + "fig['layout']['yaxis3'].update(\n", + " title='{}
{}'.format('Observed collision', y_col))\n", + "fig['layout']['xaxis3'].update(title=x_col)\n", + "fig['layout']['xaxis4'].update(title=x_col)\n", + "fig['layout'].update(\n", + " height=700, width=900, title='Storm Regime Confusion Matrix')\n", + "fig['layout']['showlegend'] = False\n", + "fig_to_plot = go.FigureWidget(data=fig.data, layout=fig.layout)\n", + "\n", + "# To output to file\n", + "img_bytes = pio.write_image(\n", + " fig_to_plot,\n", + " '02_storm_regime_confusion.png',\n", + " format='png',\n", + " width=600,\n", + " height=600,\n", + " scale=5)\n", + "\n", + "fig_to_plot" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2018-12-09T00:30:34.940581Z", + "start_time": "2018-12-09T00:30:34.825247Z" + } + }, + "source": [ + "Plotting `prestorm_berm_vol` vs `R_high_dune_toe_diff` shows there is a difference between observed swash and collision. It appears when the `prestorm_berm_vol` is smaller than 80 m, we will get collision, regardless of whether `R_high_dune_toe_diff` is greater than 0 m. Let's confirm that there it is a vertical line to differentiate between these two regimes." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create regime predictor which includes berm volume\n", + "\n", + "The technique for finding the boundary between two clusters is taken from [StackExchange](https://stackoverflow.com/a/22356267)." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "ExecuteTime": { + "end_time": "2018-12-10T05:37:35.785659Z", + "start_time": "2018-12-10T05:37:34.355843Z" + } + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "09471a2157774aa2b17f5b88c2bfaab4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FigureWidget({\n", + " 'data': [{'marker': {'color': '#3182bd', 'line': {'color': 'rgb(231,231,231)', 'width': 0.5…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Get data\n", + "df = df.dropna(subset=['prestorm_berm_vol', 'R_high_dune_toe_diff'])\n", + "\n", + "swash_x = df.query('observed_storm_regime==\"swash\"').prestorm_berm_vol\n", + "swash_y = df.query('observed_storm_regime==\"swash\"').R_high_dune_toe_diff\n", + "swash_samples = np.array([[x, y] for x, y in zip(swash_x, swash_y)])\n", + "\n", + "collision_x = df.query('observed_storm_regime==\"collision\"').prestorm_berm_vol\n", + "collision_y = df.query(\n", + " 'observed_storm_regime==\"collision\"').R_high_dune_toe_diff\n", + "collision_samples = np.array(\n", + " [[x, y] for x, y in zip(collision_x, collision_y)])\n", + "\n", + "# Fit SVM\n", + "X = np.concatenate((swash_samples, collision_samples), axis=0)\n", + "Y = np.array([0] * swash_x.shape[0] + [1] * collision_x.shape[0])\n", + "\n", + "C = 1.0 # SVM regularization parameter\n", + "clf = svm.SVC(kernel='linear', gamma=0.7, C=C)\n", + "clf.fit(X, Y)\n", + "\n", + "w = clf.coef_[0]\n", + "a = -w[0] / w[1]\n", + "y_vals = swash_y.tolist() + collision_y.tolist()\n", + "yy = np.linspace(min(y_vals), max(y_vals))\n", + "xx = (yy + (clf.intercept_[0]) / w[1]) / a\n", + "\n", + "# Prepare plot\n", + "trace_swash = go.Scatter(\n", + " x=swash_x,\n", + " y=swash_y,\n", + " name='Swash',\n", + " mode='markers',\n", + " marker=dict(\n", + " color='#3182bd',\n", + " size=marker_size,\n", + " line=dict(color='rgb(231,231,231)', width=0.5)))\n", + "\n", + "trace_collision = go.Scatter(\n", + " x=collision_x,\n", + " y=collision_y,\n", + " name='Collision',\n", + " mode='markers',\n", + " marker=dict(\n", + " color='#fc8d59',\n", + " size=marker_size,\n", + " line=dict(color='rgb(231,231,231)', width=0.5)))\n", + "\n", + "trace_split = go.Scatter(\n", + " x=xx,\n", + " y=yy,\n", + " name='Split (y={:.1f}x-{:.1f})'.format(a, (clf.intercept_[0]) / w[1]),\n", + ")\n", + "\n", + "layout = dict(\n", + " title='Observed Swash/Collision Regime Split',\n", + " xaxis=dict(title='Prestorm berm volume', ),\n", + " yaxis=dict(title='R_high - D_low'),\n", + " legend=dict(x=.6, y=1.))\n", + "\n", + "fig_to_plot = go.FigureWidget(\n", + " data=[trace_swash, trace_collision, trace_split], layout=layout)\n", + "\n", + "# To output to file\n", + "img_bytes = pio.write_image(\n", + " fig_to_plot,\n", + " '02_storm_regime_split.png',\n", + " format='png',\n", + " width=600,\n", + " height=600,\n", + " scale=5)\n", + "\n", + "fig_to_plot" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Looking at the plot above, it appears when the `prestorm_berm_vol` is less than 80 m, then we should classify it as collision, even if wave runup does not reach the toe of the dune." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test new berm vol predictor\n", + "\n", + "Now lets go back to our predicted forecasts and see if our confusion matrix improves if we adopt this new criteria for differentiating between swash and collision.\n", + "\n", + "First define a custom function to get colormap for our confusion matrix." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-12-10T02:18:08.658221Z", + "start_time": "2018-12-10T02:18:08.653186Z" + } + }, + "outputs": [], + "source": [ + "def matplotlib_to_plotly(cmap_name='RdYlGn', pl_entries=255):\n", + " \"\"\"\n", + " Function to convert matplotlib colorscale to plotly\n", + " \"\"\"\n", + " cmap = matplotlib.cm.get_cmap(cmap_name)\n", + " h = 1.0 / (pl_entries - 1)\n", + " pl_colorscale = []\n", + "\n", + " for k in range(pl_entries):\n", + " C = list(map(np.uint8, np.array(cmap(k * h)[:3]) * 255))\n", + " pl_colorscale.append([k * h, 'rgb' + str((C[0], C[1], C[2]))])\n", + "\n", + " return pl_colorscale" ] }, { "cell_type": "code", - "execution_count": 156, + "execution_count": 40, "metadata": { "ExecuteTime": { - "end_time": "2018-12-04T23:11:08.853877Z", - "start_time": "2018-12-04T23:11:08.846876Z" + "end_time": "2018-12-10T05:44:27.306253Z", + "start_time": "2018-12-10T05:44:25.791799Z" }, - "hidden": true + "code_folding": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "97d8283101e748dd8e2fd7811c97973b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FigureWidget({\n", + " 'data': [{'colorscale': [[0.0, 'rgb(165, 0, 38)'], [0.003937007874015748,\n", + " …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "\n", + "# Create colorscale\n", + "rdylgr = matplotlib_to_plotly()\n", + "\n", + "# Add new column with our new prediction technique.\n", + "df.loc[df['prestorm_berm_vol'] * 2.7 - 221.1 <= df['R_high_dune_toe_diff'],\n", + " 'new_predicted_storm_regime'] = 'collision'\n", + "df.loc[df['prestorm_berm_vol'] * 2.7 - 221.1 > df['R_high_dune_toe_diff'],\n", + " 'new_predicted_storm_regime'] = 'swash'\n", + "\n", + "# Get observed and forecasted regimes, and merge\n", + "observed_regimes = df.observed_storm_regime.rename(\n", + " 'observed_regime').to_frame()\n", + "forecasted_regimes = df.new_predicted_storm_regime.rename(\n", + " 'forecasted_regime').to_frame()\n", + "df_compare = pd.concat([observed_regimes, forecasted_regimes],\n", + " axis='columns',\n", + " names=['a', 'b'],\n", + " sort=True)\n", + "df_compare.dropna(axis='index', inplace=True)\n", + "\n", + "# Create a confusion matrix based on the observed/forecasted regimes.\n", + "# Need to do some flipping and reversing to get it in the correct\n", + "# order for the plotly heatmap.\n", + "z = confusion_matrix(\n", + " df_compare.observed_regime.tolist(),\n", + " df_compare.forecasted_regime.tolist(),\n", + " labels=['swash', 'collision', 'overwash', 'inundation'])\n", + "z = np.flip(z, axis=0)\n", + "z_list = list(reversed(z.tolist()))\n", + "\n", + "# Make incorrect values negative, so they get assigned a different color.\n", + "# Better for visualization\n", + "z_neg_incorrect = np.flip(np.identity(4), axis=0)\n", + "z_neg_incorrect[z_neg_incorrect == 0] = -1\n", + "z_neg_incorrect = (z * z_neg_incorrect).tolist()\n", + "\n", + "# Change the text on the heatmap so it also displays percentages.\n", + "z_with_pct = []\n", + "for row in z:\n", + " new_row = []\n", + " for val in row:\n", + " new_row.append('{}
({}%)'.format(\n", + " val, np.around(val / np.sum(z) * 100, 1)))\n", + " z_with_pct.append(new_row)\n", + "\n", + "# Create the heatmap figure\n", + "x = ['swash', 'collision', 'overwash', 'inundation']\n", + "y = list(reversed(x))\n", + "fig = ff.create_annotated_heatmap(\n", + " z_neg_incorrect, x=x, y=y, annotation_text=z_with_pct, colorscale=rdylgr)\n", + "heatmap = go.FigureWidget(data=fig.data, layout=fig.layout)\n", + "\n", + "# Update axis titles\n", + "heatmap.layout.xaxis.update(title='Predicted')\n", + "heatmap.layout.yaxis.update(title='Observed')\n", + "\n", + "# Write to file\n", + "img_bytes = pio.write_image(\n", + " heatmap,\n", + " '02_confusion_matrix_berm_vol_predictor.png',\n", + " format='png',\n", + " width=600,\n", + " height=600,\n", + " scale=5)\n", + "\n", + "heatmap" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## TODO Compare predicted and underpredicted profiles" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define a function for getting the average beach profile for a number of given site_ids:" + ] + }, + { + "cell_type": "code", + "execution_count": 224, + "metadata": { + "ExecuteTime": { + "end_time": "2018-12-10T03:08:49.904157Z", + "start_time": "2018-12-10T03:08:49.896136Z" + } }, "outputs": [], "source": [ @@ -422,41 +1208,46 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Now, let's look at whether there is a difference between the average beach profile of correctly forecasted site_ids and incorrectly forecasted site_ids. First, looking at sites where we observed swash regime." ] }, { "cell_type": "code", - "execution_count": 161, + "execution_count": 225, "metadata": { "ExecuteTime": { - "end_time": "2018-12-05T02:00:36.853374Z", - "start_time": "2018-12-05T01:58:21.839366Z" + "end_time": "2018-12-10T03:11:20.818875Z", + "start_time": "2018-12-10T03:08:49.906163Z" }, - "code_folding": [], - "hidden": true + "code_folding": [] }, "outputs": [ { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "03f2e99d20a347f3922a0e6a36f99ccd", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "FigureWidget({\n", - " 'data': [{'line': {'color': 'rgb(205, 0, 0)', 'width': 2},\n", - " 'mode': 'lines',\n", - " …" - ] - }, - "metadata": {}, - "output_type": "display_data" + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\z5189959\\Desktop\\nsw-2016-storm-impact\\.venv\\lib\\site-packages\\pandas\\core\\groupby\\groupby.py:1062: RuntimeWarning:\n", + "\n", + "Mean of empty slice\n", + "\n", + "C:\\Users\\z5189959\\Desktop\\nsw-2016-storm-impact\\.venv\\lib\\site-packages\\numpy\\lib\\nanfunctions.py:1545: RuntimeWarning:\n", + "\n", + "Degrees of freedom <= 0 for slice.\n", + "\n" + ] + }, + { + "ename": "NameError", + "evalue": "name 'avg_correct_x' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 41\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 42\u001b[0m trace_correct_mean = go.Scatter(\n\u001b[1;32m---> 43\u001b[1;33m \u001b[0mx\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mavg_correct_x\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 44\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mavg_correct_z\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 45\u001b[0m \u001b[0mopacity\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mNameError\u001b[0m: name 'avg_correct_x' is not defined" + ] } ], "source": [ @@ -563,42 +1354,21 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "We can see that the difference is pretty minimal. For cases where we predicted collision, but observed swash (overprediction), we see that overpredicted profiles are slightly more concave than correctly predicted sites." ] }, { "cell_type": "code", - "execution_count": 162, + "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2018-12-05T02:03:38.394415Z", - "start_time": "2018-12-05T02:00:37.335377Z" - }, - "hidden": true - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "1255bccc024e4690b4b8ff4ccc8e9e35", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "FigureWidget({\n", - " 'data': [{'line': {'color': 'rgb(205, 0, 0)', 'width': 2},\n", - " 'mode': 'lines',\n", - " …" - ] - }, - "metadata": {}, - "output_type": "display_data" + "end_time": "2018-12-10T03:11:20.824874Z", + "start_time": "2018-12-10T03:08:27.623Z" } - ], + }, + "outputs": [], "source": [ "underpredicted = get_avg_profile(collision_underpredicted_site_ids)\n", "correct = get_avg_profile(collision_correct_site_ids)\n", @@ -703,15 +1473,14 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "This plot is a bit more interesting. It shows that we are correctly forecasting collision when the profile is more accreted/convex, but when the profile is more eroded/concave, the water level is underpredicted. Why is this? " ] } ], "metadata": { + "hide_input": false, "kernelspec": { "display_name": "Python 3", "language": "python", diff --git a/src/analysis/forecast_twl.py b/src/analysis/forecast_twl.py index 82efc65..0fab400 100644 --- a/src/analysis/forecast_twl.py +++ b/src/analysis/forecast_twl.py @@ -23,7 +23,7 @@ def forecast_twl( runup_function, n_processes=MULTIPROCESS_THREADS, slope="foreshore", - profile_type='prestorm' + profile_type="prestorm", ): # Use df_waves as a base df_twl = df_waves.copy() @@ -46,19 +46,20 @@ def forecast_twl( df_twl["beta"] = pd.concat(results) elif slope == "mean": - df_temp = df_twl.join(df_profile_features.query("profile_type=='{}'".format(profile_type)).reset_index( - level='profile_type') - , how="inner") + df_temp = df_twl.join( + df_profile_features.query("profile_type=='{}'".format(profile_type)).reset_index(level="profile_type"), + how="inner", + ) df_temp["mhw"] = 0.5 with Pool(processes=n_processes) as pool: results = pool.starmap( - mean_slope_for_site_id, [(site_id, df_temp, df_profiles, "dune_toe_z", "dune_toe_x", "mhw") for - site_id in site_ids] + mean_slope_for_site_id, + [(site_id, df_temp, df_profiles, "dune_toe_z", "dune_toe_x", "mhw") for site_id in site_ids], ) df_twl["beta"] = pd.concat(results) # Estimate runup - R2, setup, S_total, S_inc, S_ig = runup_function(df_twl, Hs0_col="Hs0", Tp_col="Tp", beta_col="beta") + R2, setup, S_total, S_inc, S_ig = runup_function(Hs0=df_twl['Hs0'].tolist(), Tp=df_twl["Tp"].tolist(), beta=df_twl["beta"].tolist()) df_twl["R2"] = R2 df_twl["setup"] = setup @@ -69,13 +70,14 @@ def forecast_twl( df_twl["R_low"] = df_twl["tide"] + 1.1 * df_twl["setup"] - 1.1 / 2 * df_twl["S_total"] # Drop unneeded columns - df_twl.drop(columns=["E", "Exs", "P", "Pxs", "dir"], inplace=True, errors="ignore") + # df_twl.drop(columns=["E", "Exs", "P", "Pxs", "dir"], inplace=True, errors="ignore") return df_twl -def mean_slope_for_site_id(site_id, df_twl, df_profiles, top_elevation_col, top_x_col, btm_elevation_col, - profile_type='prestorm'): +def mean_slope_for_site_id( + site_id, df_twl, df_profiles, top_elevation_col, top_x_col, btm_elevation_col, profile_type="prestorm" +): """ Calculates the foreshore slope values a given site_id. Returns a series (with same indicies as df_twl) of foreshore slopes. This function is used to parallelize getting foreshore slopes as it is computationally @@ -100,7 +102,7 @@ def mean_slope_for_site_id(site_id, df_twl, df_profiles, top_elevation_col, top_ top_elevation=row[top_elevation_col], btm_elevation=row[btm_elevation_col], method="end_points", - top_x= row[top_x_col] + top_x=row[top_x_col], ), axis=1, ) @@ -130,7 +132,7 @@ def foreshore_slope_for_site_id(site_id, df_twl, df_profiles): profile_x=profile_x, profile_z=profile_z, tide=row.tide, - runup_function=runup_models.sto06_individual, + runup_function=runup_models.sto06, Hs0=row.Hs0, Tp=row.Tp, ), @@ -216,16 +218,14 @@ def slope_from_profile(profile_x, profile_z, top_elevation, btm_elevation, metho end_points = {"top": {"z": top_elevation}, "btm": {"z": btm_elevation}} - - for end_type in end_points.keys(): # Add x coordinates if they are specified - if top_x and end_type == 'top': - end_points['top']['x'] = top_x + if top_x and end_type == "top": + end_points["top"]["x"] = top_x continue - if btm_x and end_type == 'top': - end_points['btm']['x'] = btm_x + if btm_x and end_type == "top": + end_points["btm"]["x"] = btm_x continue elevation = end_points[end_type]["z"] @@ -306,14 +306,15 @@ def crossings(profile_x, profile_z, constant_z): @click.option("--slope", required=True, help="", type=click.Choice(["foreshore", "mean"])) @click.option("--profile-type", required=True, help="", type=click.Choice(["prestorm", "poststorm"])) @click.option("--output-file", required=True, help="") -def create_twl_forecast(waves_csv, tides_csv, profiles_csv, profile_features_csv, runup_function, slope, - profile_type,output_file): +def create_twl_forecast( + waves_csv, tides_csv, profiles_csv, profile_features_csv, runup_function, slope, profile_type, output_file +): logger.info("Creating forecast of total water levels") logger.info("Importing data") df_waves = pd.read_csv(waves_csv, index_col=[0, 1]) df_tides = pd.read_csv(tides_csv, index_col=[0, 1]) df_profiles = pd.read_csv(profiles_csv, index_col=[0, 1, 2]) - df_profile_features = pd.read_csv(profile_features_csv, index_col=[0,1]) + df_profile_features = pd.read_csv(profile_features_csv, index_col=[0, 1]) logger.info("Forecasting TWL") df_twl = forecast_twl( @@ -323,7 +324,7 @@ def create_twl_forecast(waves_csv, tides_csv, profiles_csv, profile_features_csv df_profile_features, runup_function=getattr(runup_models, runup_function), slope=slope, - profile_type=profile_type + profile_type=profile_type, ) df_twl.to_csv(output_file) diff --git a/src/analysis/forecasted_storm_impacts.py b/src/analysis/forecasted_storm_impacts.py index 0ea92ec..0a04ea1 100644 --- a/src/analysis/forecasted_storm_impacts.py +++ b/src/analysis/forecasted_storm_impacts.py @@ -20,7 +20,7 @@ def forecasted_impacts(df_profile_features, df_forecasted_twl): """ logger.info("Getting forecasted storm impacts") - df_forecasted_impacts = pd.DataFrame(index=df_profile_features.index.get_level_values('site_id').unique()) + df_forecasted_impacts = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique()) # For each site, find the maximum R_high value and the corresponding R_low value. idx = df_forecasted_twl.groupby(level=["site_id"])["R_high"].idxmax().dropna() @@ -29,7 +29,12 @@ def forecasted_impacts(df_profile_features, df_forecasted_twl): # Join with df_profile features to find dune toe and crest elevations df_forecasted_impacts = df_forecasted_impacts.merge( - df_profile_features.query("profile_type=='prestorm'")[["dune_toe_z", "dune_crest_z"]], how="left", left_index=True, right_index=True + df_profile_features.query("profile_type=='prestorm'").reset_index("profile_type")[ + ["dune_toe_z", "dune_crest_z"] + ], + how="left", + left_on=["site_id"], + right_on=["site_id"], ) # Compare R_high and R_low wirth dune crest and toe elevations @@ -66,6 +71,36 @@ def storm_regime(df_forecasted_impacts): return df_forecasted_impacts +def twl_exceedence_time(df_profile_features, df_forecasted_twl, z_twl_col="R_high", z_exceedence_col="dune_toe_z"): + """ + Returns a dataframe of number of hours the twl exceeded a certain z elevation. + May need to use this https://stackoverflow.com/a/53656968 if datetimes are not consistent. + :param df_profile_features: + :param df_forecasted_twl: + :param z_twl_col: + :param z_exceedence_col: + :return: + """ + logger.info("Getting twl exceedence time") + + # Get a dataframe of prestorm dune toes organised by site_id + df_dune_toes = ( + df_profile_features.query('profile_type=="prestorm"').reset_index("profile_type")[z_exceedence_col].to_frame() + ) + + # Merge dune toes into site_id + df_merged = df_forecasted_twl.merge(df_dune_toes, left_on=["site_id"], right_on=["site_id"]) + + # Return the sum of hours that twl exceeded the level + return ( + (df_merged[z_twl_col] >= df_merged[z_exceedence_col]) + .groupby("site_id") + .sum() + .rename("twl_{}_exceedance_hrs".format(z_exceedence_col)) + .to_frame() + ) + + @click.command() @click.option("--profile-features-csv", required=True, help="") @click.option("--forecasted-twl-csv", required=True, help="") @@ -73,10 +108,15 @@ def storm_regime(df_forecasted_impacts): def create_forecasted_impacts(profile_features_csv, forecasted_twl_csv, output_file): logger.info("Creating observed wave impacts") logger.info("Importing existing data") - df_profile_features = pd.read_csv(profile_features_csv, index_col=[0,1]) + df_profile_features = pd.read_csv(profile_features_csv, index_col=[0, 1]) df_forecasted_twl = pd.read_csv(forecasted_twl_csv, index_col=[0, 1]) df_forecasted_impacts = forecasted_impacts(df_profile_features, df_forecasted_twl) + + df_forecasted_impacts = df_forecasted_impacts.merge( + twl_exceedence_time(df_profile_features, df_forecasted_twl), left_on=["site_id"], right_on=["site_id"] + ) + df_forecasted_impacts.to_csv(output_file) logger.info("Saved to %s", output_file) logger.info("Done!") diff --git a/src/analysis/observed_storm_impacts.py b/src/analysis/observed_storm_impacts.py index 6c76e36..6c4c0e1 100644 --- a/src/analysis/observed_storm_impacts.py +++ b/src/analysis/observed_storm_impacts.py @@ -30,13 +30,13 @@ def volume_change(df_profiles, df_profile_features, zone): """ logger.info("Calculating change in beach volume in {} zone".format(zone)) - df_vol_changes = pd.DataFrame(index=df_profile_features.index.get_level_values('site_id').unique()) + df_vol_changes = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique()) df_profiles = df_profiles.sort_index() sites = df_profiles.groupby(level=["site_id"]) for site_id, df_site in sites: logger.debug("Calculating change in beach volume at {} in {} zone".format(site_id, zone)) - query ="site_id=='{}'&profile_type=='prestorm'".format(site_id) + query = "site_id=='{}'&profile_type=='prestorm'".format(site_id) prestorm_dune_toe_x = df_profile_features.query(query).dune_toe_x.tolist() prestorm_dune_crest_x = df_profile_features.query(query).dune_crest_x.tolist() @@ -71,11 +71,11 @@ def volume_change(df_profiles, df_profile_features, zone): # Where we want to measure pre and post storm volume is dependant on the zone selected if zone == "swash": - x_min = max(prestorm_dune_toe_x,x_first_obs) + x_min = max(prestorm_dune_toe_x, x_first_obs) x_max = x_last_obs elif zone == "dune_face": x_min = max(prestorm_dune_crest_x, x_first_obs) - x_max = min(prestorm_dune_toe_x,x_last_obs) + x_max = min(prestorm_dune_toe_x, x_last_obs) else: logger.warning("Zone argument not properly specified. Please check") x_min = None @@ -98,16 +98,11 @@ def volume_change(df_profiles, df_profile_features, zone): # Volume change needs to be calculated including a tolerance for LIDAR accuracy. If difference between # profiles is less than 20 cm, consider them as zero difference. - prestorm_z = df_zone.query("profile_type=='prestorm'").reset_index('profile_type').z - poststorm_z = df_zone.query("profile_type=='poststorm'").reset_index('profile_type').z + prestorm_z = df_zone.query("profile_type=='prestorm'").reset_index("profile_type").z + poststorm_z = df_zone.query("profile_type=='poststorm'").reset_index("profile_type").z diff_z = prestorm_z - poststorm_z diff_z[abs(diff_z) < 0.2] = 0 - diff_vol = beach_volume( - x=diff_z.index.get_level_values("x"), - z=diff_z, - x_min=x_min, - x_max=x_max, - ) + diff_vol = beach_volume(x=diff_z.index.get_level_values("x"), z=diff_z, x_min=x_min, x_max=x_max) df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = prestorm_vol df_vol_changes.loc[site_id, "poststorm_{}_vol".format(zone)] = poststorm_vol @@ -146,14 +141,13 @@ def storm_regime(df_observed_impacts): logger.info("Getting observed storm regimes") swash = (df_observed_impacts.dune_face_pct_change <= 2) & (df_observed_impacts.dune_face_vol_change <= 3) - collision = (df_observed_impacts.dune_face_pct_change >= 2) |(df_observed_impacts.dune_face_vol_change > 3) + collision = (df_observed_impacts.dune_face_pct_change >= 2) | (df_observed_impacts.dune_face_vol_change > 3) df_observed_impacts.loc[swash, "storm_regime"] = "swash" df_observed_impacts.loc[collision, "storm_regime"] = "collision" return df_observed_impacts - @click.command() @click.option("--profiles-csv", required=True, help="") @click.option("--profile-features-csv", required=True, help="") @@ -163,10 +157,10 @@ def create_observed_impacts(profiles_csv, profile_features_csv, output_file): logger.info("Creating observed wave impacts") logger.info("Importing data") df_profiles = pd.read_csv(profiles_csv, index_col=[0, 1, 2]) - df_profile_features = pd.read_csv(profile_features_csv, index_col=[0,1]) + df_profile_features = pd.read_csv(profile_features_csv, index_col=[0, 1]) logger.info("Creating new dataframe for observed impacts") - df_observed_impacts = pd.DataFrame(index=df_profile_features.index.get_level_values('site_id').unique()) + df_observed_impacts = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique()) logger.info("Getting pre/post storm volumes") df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone="swash") @@ -177,7 +171,7 @@ def create_observed_impacts(profiles_csv, profile_features_csv, output_file): df_observed_impacts = storm_regime(df_observed_impacts) # Save dataframe to csv - df_observed_impacts.to_csv(output_file, float_format='%.4f') + df_observed_impacts.to_csv(output_file, float_format="%.4f") logger.info("Saved to %s", output_file) logger.info("Done!") diff --git a/src/analysis/runup_models.py b/src/analysis/runup_models.py index e088aeb..9176adb 100644 --- a/src/analysis/runup_models.py +++ b/src/analysis/runup_models.py @@ -2,52 +2,51 @@ import numpy as np import pandas as pd -def sto06_individual(Hs0, Tp, beta): +def sto06(Hs0, Tp, beta): + """ + :param Hs0: List or float of offshore significant wave height values + :param Tp: List or float of peak wave period + :param beta: List of float of beach slope + :return: Float or list of R2, setup, S_total, S_inc and S_ig values + """ - Lp = 9.8 * Tp ** 2 / 2 / np.pi + df = pd.DataFrame({"Hs0": Hs0, "Tp": Tp, "beta": beta}, index=[x for x in range(0, np.size(Hs0))]) - S_ig = 0.06 * np.sqrt(Hs0 * Lp) - S_inc = 0.75 * beta * np.sqrt(Hs0 * Lp) + df["Lp"] = 9.8 * df['Tp'] ** 2 / 2 / np.pi + + # General equation + df["S_ig"] = pd.to_numeric(0.06 * np.sqrt(df["Hs0"] * df["Lp"]), errors="coerce") + df["S_inc"] = pd.to_numeric(0.75 * df["beta"] * np.sqrt(df["Hs0"] * df["Lp"]), errors="coerce") + df["setup"] = pd.to_numeric(0.35 * df["beta"] * np.sqrt(df["Hs0"] * df["Lp"]), errors="coerce") + df["S_total"] = np.sqrt(df["S_inc"] ** 2 + df["S_ig"] ** 2) + df["R2"] = 1.1 * (df["setup"] + df["S_total"] / 2) # Dissipative conditions - if beta / (Hs0 / Lp) ** (0.5) <= 0.3: - setup = 0.016 * (Hs0 * Lp) ** 0.5 - S_total = 0.046 * (Hs0 * Lp) ** 0.5 - R2 = 0.043 * (Hs0 * Lp) ** 0.5 - else: - setup = 0.35 * beta * (Hs0 * Lp) ** 0.5 - S_total = np.sqrt(S_inc ** 2 + S_ig ** 2) - R2 = 1.1 * (setup + S_total / 2) + dissipative = df["beta"] / (df["Hs0"] / df["Lp"]) ** (0.5) <= 0.3 + + df.loc[dissipative, "setup"] = 0.016 * (df["Hs0"] * df["Lp"]) ** (0.5) # eqn 16 + df.loc[dissipative, "S_total"] = 0.046 * (df["Hs0"] * df["Lp"]) ** (0.5) # eqn 17 + df.loc[dissipative, "R2"] = 0.043 * (df["Hs0"] * df["Lp"]) ** (0.5) # eqn 18 - return R2, setup, S_total, S_inc, S_ig + return ( + float_or_list(df["R2"].tolist()), + float_or_list(df["setup"].tolist()), + float_or_list(df["S_total"].tolist()), + float_or_list(df["S_inc"].tolist()), + float_or_list(df["S_ig"].tolist()), + ) -def sto06(df, Hs0_col, Tp_col, beta_col): +def float_or_list(a): """ - Vectorized version of Stockdon06 which can be used with dataframes - :param df: - :param Hs0_col: - :param Tp_col: - :param beta_col: + If only one value in the array, return the float, else return a list + :param a: :return: """ - - Lp = 9.8 * df[Tp_col] ** 2 / 2 / np.pi - - # General equation - S_ig = pd.to_numeric(0.06 * np.sqrt(df[Hs0_col] * Lp), errors="coerce") - S_inc = pd.to_numeric(0.75 * df[beta_col] * np.sqrt(df[Hs0_col] * Lp), errors="coerce") - setup = pd.to_numeric(0.35 * df[beta_col] * np.sqrt(df[Hs0_col] * Lp), errors="coerce") - S_total = np.sqrt(S_inc ** 2 + S_ig ** 2) - R2 = 1.1 * (setup + S_total / 2) - - # Dissipative conditions - dissipative = df[beta_col] / (df[Hs0_col] / Lp) ** (0.5) <= 0.3 - setup.loc[dissipative, :] = 0.016 * (df[Hs0_col] * Lp) ** (0.5) # eqn 16 - S_total.loc[dissipative, :] = 0.046 * (df[Hs0_col] * Lp) ** (0.5) # eqn 17 - R2.loc[dissipative, :] = 0.043 * (df[Hs0_col] * Lp) ** (0.5) # eqn 18 - - return R2, setup, S_total, S_inc, S_ig + if len(a) == 1: + return a[0] + else: + return list(a) if __name__ == "__main__": diff --git a/src/cli.py b/src/cli.py index 10b79e4..9b1d3e5 100644 --- a/src/cli.py +++ b/src/cli.py @@ -13,7 +13,9 @@ import analysis.observed_storm_impacts as observed_storm_impacts # Disable numpy warnings import warnings -warnings.simplefilter(action='ignore', category=FutureWarning) + +warnings.simplefilter(action="ignore", category=FutureWarning) + @click.group() def cli(): diff --git a/src/data/parse_mat.py b/src/data/parse_mat.py index e7afd84..b2e73c6 100644 --- a/src/data/parse_mat.py +++ b/src/data/parse_mat.py @@ -58,35 +58,41 @@ def parse_dune_crest_toes(df_sites, crest_mat, toe_mat): crest_data = loadmat(crest_mat) toe_data = loadmat(toe_mat) - for n, _ in enumerate(crest_data['xc1']): - rows.extend([{ - 'dune_crest_x': crest_data['xc1'][n], - 'dune_crest_z': crest_data['zc1'][n], - 'dune_toe_x': toe_data['xt1'][n], - 'dune_toe_z': toe_data['zt1'][n], - 'profile_type': 'prestorm', - 'site_no': n+1 - },{ - 'dune_crest_x': crest_data['xc2'][n], - 'dune_crest_z': crest_data['zc2'][n], - 'dune_toe_x': toe_data['xt2'][n], - 'dune_toe_z': toe_data['zt2'][n], - 'profile_type': 'poststorm', - 'site_no': n + 1 - }]) + for n, _ in enumerate(crest_data["xc1"]): + rows.extend( + [ + { + "dune_crest_x": crest_data["xc1"][n], + "dune_crest_z": crest_data["zc1"][n], + "dune_toe_x": toe_data["xt1"][n], + "dune_toe_z": toe_data["zt1"][n], + "profile_type": "prestorm", + "site_no": n + 1, + }, + { + "dune_crest_x": crest_data["xc2"][n], + "dune_crest_z": crest_data["zc2"][n], + "dune_toe_x": toe_data["xt2"][n], + "dune_toe_z": toe_data["zt2"][n], + "profile_type": "poststorm", + "site_no": n + 1, + }, + ] + ) df_profile_features = pd.DataFrame(rows) # Want the site_id instead of the site_no, so merge in df_sites df_sites.reset_index(inplace=True) - df_profile_features = df_sites[['site_no','site_id']].merge(df_profile_features, how='outer', on=['site_no']) - df_profile_features.drop(columns=['site_no'],inplace=True) - df_profile_features.set_index(['site_id','profile_type'], inplace=True) + df_profile_features = df_sites[["site_no", "site_id"]].merge(df_profile_features, how="outer", on=["site_no"]) + df_profile_features.drop(columns=["site_no"], inplace=True) + df_profile_features.set_index(["site_id", "profile_type"], inplace=True) df_profile_features.sort_index(inplace=True) df_profile_features = df_profile_features.round(3) return df_profile_features + def combine_sites_and_orientaions(df_sites, df_orientations): """ Replaces beach/lat/lon columns with the unique site_id. @@ -193,7 +199,7 @@ def parse_profiles_and_sites(profiles_mat): site_counter = 0 for i, site in enumerate(mat_data["site"]): - logger.debug('Processing site {} of {}'.format(i+1, len(mat_data['site']))) + logger.debug("Processing site {} of {}".format(i + 1, len(mat_data["site"]))) # Give each site a unique id if len(site_rows) == 0 or site_rows[-1]["beach"] != site: @@ -248,7 +254,6 @@ def parse_profiles_and_sites(profiles_mat): profile_rows.append( { "site_id": site_id, - "lon": lon[0], "lat": lat[0], "profile_type": profile_type, @@ -387,6 +392,7 @@ def create_profile_features(crest_mat, toe_mat, sites_csv, output_file): df_profile_features.to_csv(output_file) logger.info("Created %s", output_file) + @click.command(short_help="create profiles.csv") @click.option("--profiles-mat", required=True, help=".mat file containing beach profiles") @click.option("--profiles-output-file", required=True, help="where to save profiles.csv")