From 46589da736405282cc985619c4d8df4faa98485d Mon Sep 17 00:00:00 2001 From: Chris Leaman Date: Thu, 28 Feb 2019 11:27:33 +1100 Subject: [PATCH] Update notebooks --- notebooks/07_evaluate_model_performance.ipynb | 492 ++++++++++++++++++ notebooks/07_longshore_plotting.ipynb | 348 ------------- ...ynb => 09_superseded_run_comparison.ipynb} | 0 notebooks/10_profile_clustering.ipynb | 186 +++++++ 4 files changed, 678 insertions(+), 348 deletions(-) create mode 100644 notebooks/07_evaluate_model_performance.ipynb delete mode 100644 notebooks/07_longshore_plotting.ipynb rename notebooks/{09_run_comparison.ipynb => 09_superseded_run_comparison.ipynb} (100%) create mode 100644 notebooks/10_profile_clustering.ipynb diff --git a/notebooks/07_evaluate_model_performance.ipynb b/notebooks/07_evaluate_model_performance.ipynb new file mode 100644 index 0000000..efa049b --- /dev/null +++ b/notebooks/07_evaluate_model_performance.ipynb @@ -0,0 +1,492 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluate prediction metrics \n", + "- This notebook is used to check out each of our storm impact prediction models performed in comparison to our observed storm impacts." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "## Setup notebook\n", + "Import our required packages and set default plotting options." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "# Enable autoreloading of our modules. \n", + "# Most of the code will be located in the /src/ folder, \n", + "# and then called from the notebook.\n", + "%matplotlib inline\n", + "%reload_ext autoreload\n", + "%autoreload" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "from IPython.core.debugger import set_trace\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "import decimal\n", + "import plotly\n", + "import plotly.graph_objs as go\n", + "import plotly.plotly as py\n", + "import plotly.tools as tls\n", + "import plotly.figure_factory as ff\n", + "from plotly import tools\n", + "import plotly.io as pio\n", + "from scipy import stats\n", + "import math\n", + "import matplotlib\n", + "from matplotlib import cm\n", + "import colorlover as cl\n", + "from tqdm import tqdm_notebook\n", + "from ipywidgets import widgets, Output\n", + "from IPython.display import display, clear_output, Image, HTML\n", + "from scipy import stats\n", + "from sklearn.metrics import confusion_matrix\n", + "import matplotlib.pyplot as plt\n", + "from matplotlib.lines import Line2D\n", + "from cycler import cycler\n", + "from scipy.interpolate import interp1d\n", + "from pandas.api.types import CategoricalDtype" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "# Matplot lib default settings\n", + "plt.rcParams[\"figure.figsize\"] = (10,6)\n", + "plt.rcParams['axes.grid']=True\n", + "plt.rcParams['grid.alpha'] = 0.5\n", + "plt.rcParams['grid.color'] = \"grey\"\n", + "plt.rcParams['grid.linestyle'] = \"--\"\n", + "plt.rcParams['axes.grid']=True\n", + "\n", + "# https://stackoverflow.com/a/20709149\n", + "matplotlib.rcParams['text.usetex'] = True\n", + "\n", + "matplotlib.rcParams['text.latex.preamble'] = [\n", + " r'\\usepackage{siunitx}', # i need upright \\micro symbols, but you need...\n", + " r'\\sisetup{detect-all}', # ...this to force siunitx to actually use your fonts\n", + " r'\\usepackage{helvet}', # set the normal font here\n", + " r'\\usepackage{amsmath}',\n", + " r'\\usepackage{sansmath}', # load up the sansmath so that math -> helvet\n", + " r'\\sansmath', # <- tricky! -- gotta actually tell tex to use!\n", + "] " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import data\n", + "Import our data from the `./data/interim/` folder and load it into pandas dataframes. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n", + " print('Importing {}'.format(csv))\n", + " return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n", + "\n", + "df_waves = df_from_csv('waves.csv', index_col=[0, 1])\n", + "df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n", + "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n", + "df_sites = df_from_csv('sites.csv', index_col=[0])\n", + "df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0,1])\n", + "\n", + "# Note that the forecasted data sets should be in the same order for impacts and twls\n", + "impacts = {\n", + " 'forecasted': {\n", + " 'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv',index_col=[0]),\n", + " 'mean_slope_hol86': df_from_csv('impacts_forecasted_mean_slope_hol86.csv',index_col=[0]),\n", + " 'mean_slope_nie91': df_from_csv('impacts_forecasted_mean_slope_nie91.csv',index_col=[0]),\n", + " 'mean_slope_pow18': df_from_csv('impacts_forecasted_mean_slope_pow18.csv',index_col=[0]),\n", + " 'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv',index_col=[0]),\n", + " },\n", + " 'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n", + " }\n", + "\n", + "\n", + "twls = {\n", + " 'forecasted': {\n", + " 'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv',index_col=[0,1]),\n", + " 'mean_slope_hol86': df_from_csv('twl_mean_slope_hol86.csv',index_col=[0,1]),\n", + " 'mean_slope_nie91': df_from_csv('twl_mean_slope_nie91.csv',index_col=[0,1]),\n", + " 'mean_slope_pow18': df_from_csv('twl_mean_slope_pow18.csv',index_col=[0,1]),\n", + " 'mean_slope_sto06': df_from_csv('twl_mean_slope_sto06.csv',index_col=[0,1]),\n", + " }\n", + "}\n", + "print('Done!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "## Generate longshore plots for each beach" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "beach = 'NARRA'\n", + "\n", + "df_obs_impacts = impacts['observed'].loc[impacts['observed'].index.str.\n", + " contains(beach)]\n", + "\n", + "# Get index for each site on the beach\n", + "n = [x for x in range(len(df_obs_impacts))][::-1]\n", + "n_sites = [x for x in df_obs_impacts.index][::-1]\n", + "\n", + "# Convert storm regimes to categorical datatype\n", + "cat_type = CategoricalDtype(\n", + " categories=['swash', 'collision', 'overwash', 'inundation'], ordered=True)\n", + "df_obs_impacts.storm_regime = df_obs_impacts.storm_regime.astype(cat_type)\n", + "\n", + "# Create figure\n", + "f, (ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8) = plt.subplots(\n", + " 1,\n", + " 8,\n", + " sharey=True,\n", + " figsize=(18, 8),\n", + " gridspec_kw={'width_ratios': [4, 4, 2, 2, 2, 2, 2, 2]})\n", + "\n", + "# ax1: Impacts\n", + "\n", + "# Define colors for storm regime\n", + "cmap = {'swash': '#1a9850', 'collision': '#fee08b', 'overwash': '#d73027'}\n", + "\n", + "# Common marker style\n", + "marker_style = {\n", + " 's': 60,\n", + " 'linewidths': 0.7,\n", + " 'alpha': 1,\n", + " 'edgecolors': 'k',\n", + " 'marker': 'o',\n", + "}\n", + "\n", + "# Plot observed impacts\n", + "colors = [cmap.get(x) for x in df_obs_impacts.storm_regime]\n", + "colors = ['#d73027' if c is None else c for c in colors]\n", + "ax1.scatter([0 for x in n], n, color=colors, **marker_style)\n", + "\n", + "# Plot model impacts\n", + "for i, model in enumerate(impacts['forecasted']):\n", + "\n", + " # Only get model results for this beach\n", + " df_model = impacts['forecasted'][model].loc[impacts['forecasted'][model].\n", + " index.str.contains(beach)]\n", + "\n", + " # Recast storm regimes as categorical data\n", + " df_model.storm_regime = df_model.storm_regime.astype(cat_type)\n", + "\n", + " # Assign colors\n", + " colors = [cmap.get(x) for x in df_model.storm_regime]\n", + " colors = ['#aaaaaa' if c is None else c for c in colors]\n", + " ax1.scatter([i + 1 for x in n], n, color=colors, **marker_style)\n", + "\n", + "# Add model names to each impact on x axis\n", + "ax1.set_xticks(range(len(impacts['forecasted']) + 1))\n", + "ax1.set_xticklabels(['observed'] +\n", + " [x.replace('_', '\\_') for x in impacts['forecasted']])\n", + "ax1.xaxis.set_tick_params(rotation=90)\n", + "\n", + "# Add title\n", + "ax1.set_title('Storm regime')\n", + "\n", + "# Create custom legend\n", + "legend_elements = [\n", + " Line2D([0], [0],\n", + " marker='o',\n", + " color='w',\n", + " label='Swash',\n", + " markerfacecolor='#1a9850',\n", + " markersize=8,\n", + " markeredgewidth=1.0,\n", + " markeredgecolor='k'),\n", + " Line2D([0], [0],\n", + " marker='o',\n", + " color='w',\n", + " label='Collision',\n", + " markerfacecolor='#fee08b',\n", + " markersize=8,\n", + " markeredgewidth=1.0,\n", + " markeredgecolor='k'),\n", + " Line2D([0], [0],\n", + " marker='o',\n", + " color='w',\n", + " label='Overwash',\n", + " markerfacecolor='#d73027',\n", + " markersize=8,\n", + " markeredgewidth=1.0,\n", + " markeredgecolor='k'),\n", + "]\n", + "ax1.legend(\n", + " handles=legend_elements, loc='lower center', bbox_to_anchor=(0.5, 1.1))\n", + "\n", + "# Replace yticks with site_ids\n", + "yticks = ax1.get_yticks().tolist()\n", + "yticks = [n_sites[int(y)] if 0 <= y <= len(n_sites) else y for y in yticks]\n", + "ax1.set_yticklabels(yticks)\n", + "\n", + "# ax2: elevations\n", + "\n", + "# Dune elevations\n", + "df_feats = df_profile_features_crest_toes.xs(['prestorm'],\n", + " level=['profile_type'])\n", + "df_feats = df_feats.loc[df_feats.index.str.contains(beach)]\n", + "\n", + "ax2.plot(df_feats.dune_crest_z, n, color='#fdae61')\n", + "ax2.plot(df_feats.dune_toe_z, n, color='#fdae61')\n", + "ax2.fill_betweenx(\n", + " n,\n", + " df_feats.dune_toe_z,\n", + " df_feats.dune_crest_z,\n", + " alpha=0.2,\n", + " color='#fdae61',\n", + " label='$D_{low}$ to $D_{high}$')\n", + "\n", + "model_colors = [\n", + " '#1f78b4',\n", + " '#33a02c',\n", + " '#e31a1c',\n", + " '#6a3d9a',\n", + " '#a6cee3',\n", + " '#b2df8a',\n", + " '#fb9a99',\n", + " '#cab2d6',\n", + " '#ffff99',\n", + " ]\n", + "\n", + "# Define colors to cycle through for our R_high\n", + "ax2.set_prop_cycle(cycler('color', model_colors))\n", + "\n", + "# Plot R_high values\n", + "for model in impacts['forecasted']:\n", + "\n", + " # Only get model results for this beach\n", + " df_model = impacts['forecasted'][model].loc[impacts['forecasted'][model].\n", + " index.str.contains(beach)]\n", + "\n", + " # Recast storm regimes as categorical data\n", + " ax2.plot(df_model.R_high, n, label=model.replace('_', '\\_'))\n", + "\n", + "# Set title, legend and labels\n", + "ax2.set_title('TWL \\& dune\\nelevations')\n", + "ax2.legend(loc='lower center', bbox_to_anchor=(0.5, 1.1))\n", + "ax2.set_xlabel('Elevation (m AHD)')\n", + "\n", + "\n", + "# ax3: Plot R_high - D_low\n", + "\n", + "# Define colors to cycle through for our R_high\n", + "ax3.set_prop_cycle(cycler('color', model_colors))\n", + "\n", + "# Plot R_high values\n", + "for model in impacts['forecasted']:\n", + " \n", + " df_model = impacts['forecasted'][model].loc[impacts['forecasted'][model].\n", + " index.str.contains(beach)]\n", + " # R_high - D_low\n", + " ax3.plot(df_model.R_high - df_feats.dune_toe_z, n, label=model.replace('_', '\\_'))\n", + "\n", + "ax3.axvline(x=0,color='black',linestyle=':')\n", + "ax3.set_title('$R_{high}$ - $D_{low}$')\n", + "ax3.set_xlabel('Height (m)')\n", + "ax3.set_xlim([-2,2])\n", + "\n", + "\n", + "\n", + "# Define colors to cycle through for our R2\n", + "ax4.set_prop_cycle(cycler('color', model_colors))\n", + "\n", + "# R_high - D_low\n", + "for model in impacts['forecasted']:\n", + " df_R2 = impacts['forecasted'][model].merge(twls['forecasted'][model],on=['site_id','datetime'])\n", + " df_R2 = df_R2.loc[df_R2.index.str.contains(beach)]\n", + " ax4.plot(df_R2.R2, n, label=model.replace('_', '\\_'))\n", + "\n", + "ax4.set_title(r'$R_{2\\%}$')\n", + "ax4.set_xlabel('Height (m)')\n", + "\n", + "# Need to chose a model to extract environmental parameters at maximum R_high time\n", + "model = 'mean_slope_sto06'\n", + "df_beach = impacts['forecasted'][model].merge(twls['forecasted'][model], on=['site_id','datetime'])\n", + "df_beach = df_beach.loc[df_beach.index.str.contains(beach)]\n", + "\n", + "# Wave height, wave period, beach slope\n", + "ax5.plot(df_beach.beta, n,color='#4daf4a')\n", + "ax5.set_title(r'$\\beta$')\n", + "ax5.set_xlabel('Mean prestorm\\nbeach slope')\n", + "ax5.set_xlim([0,0.15])\n", + "\n", + "ax6.plot(df_beach.Hs0, n,color='#999999')\n", + "ax6.set_title('$H_{s0}$')\n", + "ax6.set_xlabel('Sig. wave height (m)')\n", + "ax6.set_xlim([3,5])\n", + "\n", + "ax7.plot(df_beach.Tp, n,color='#999999')\n", + "ax7.set_title('$T_{p}$')\n", + "ax7.set_xlabel('Peak wave period (s)')\n", + "ax7.set_xlim([8,14])\n", + "\n", + "ax8.plot(df_beach.tide, n,color='#999999')\n", + "ax8.set_title('Tide \\& surge')\n", + "ax8.set_xlabel('Elevation (m AHD)')\n", + "ax8.set_xlim([0,2])\n", + "\n", + "\n", + "plt.tight_layout()\n", + "f.subplots_adjust(top=0.88)\n", + "f.suptitle(beach)\n", + "\n", + "# # Print to figure\n", + "plt.savefig('07_{}.png'.format(beach), dpi=600, bbox_inches='tight')\n", + "\n", + "plt.show()\n", + "plt.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generate classification reports for each model\n", + "Use sklearn metrics to generate classification reports for each forecasting model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sklearn.metrics\n", + "\n", + "# Get observed impacts\n", + "df_obs = impacts['observed']\n", + "\n", + "# Convert storm regimes to categorical datatype\n", + "cat_type = CategoricalDtype(\n", + " categories=['swash', 'collision', 'overwash', 'inundation'], ordered=True)\n", + "df_obs.storm_regime = df_obs_impacts.storm_regime.astype(cat_type)\n", + "\n", + "for model in impacts['forecasted']:\n", + " df_for = impacts['forecasted'][model]\n", + " df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n", + "\n", + " m = sklearn.metrics.classification_report(\n", + " df_obs.storm_regime.astype(cat_type).cat.codes.values,\n", + " df_for.storm_regime.astype(cat_type).cat.codes.values,\n", + " labels=[0, 1, 2, 3],\n", + " target_names=['swash', 'collision', 'overwash', 'inundation'])\n", + " print(model)\n", + " print(m)\n", + " print()\n", + " " + ] + } + ], + "metadata": { + "hide_input": false, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/07_longshore_plotting.ipynb b/notebooks/07_longshore_plotting.ipynb deleted file mode 100644 index 1f5a954..0000000 --- a/notebooks/07_longshore_plotting.ipynb +++ /dev/null @@ -1,348 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Longshore plots of each beach\n", - "- Need to create a longshore plot of each beach to see how the variables change alongshore." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Setup notebook" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Enable autoreloading of our modules. \n", - "# Most of the code will be located in the /src/ folder, \n", - "# and then called from the notebook.\n", - "%matplotlib inline\n", - "%reload_ext autoreload\n", - "%autoreload" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.core.debugger import set_trace\n", - "\n", - "import pandas as pd\n", - "import numpy as np\n", - "import os\n", - "import decimal\n", - "import plotly\n", - "import plotly.graph_objs as go\n", - "import plotly.plotly as py\n", - "import plotly.tools as tls\n", - "import plotly.figure_factory as ff\n", - "from plotly import tools\n", - "import plotly.io as pio\n", - "from scipy import stats\n", - "import math\n", - "import matplotlib\n", - "from matplotlib import cm\n", - "import colorlover as cl\n", - "from tqdm import tqdm_notebook\n", - "from ipywidgets import widgets, Output\n", - "from IPython.display import display, clear_output, Image, HTML\n", - "from scipy import stats\n", - "from sklearn.metrics import confusion_matrix\n", - "import matplotlib.pyplot as plt\n", - "from scipy.interpolate import interp1d\n", - "from pandas.api.types import CategoricalDtype" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Matplot lib default settings\n", - "plt.rcParams[\"figure.figsize\"] = (10,6)\n", - "plt.rcParams['axes.grid']=True\n", - "plt.rcParams['grid.alpha'] = 0.5\n", - "plt.rcParams['grid.color'] = \"grey\"\n", - "plt.rcParams['grid.linestyle'] = \"--\"\n", - "plt.rcParams['axes.grid']=True\n", - "\n", - "# https://stackoverflow.com/a/20709149\n", - "matplotlib.rcParams['text.usetex'] = True\n", - "\n", - "matplotlib.rcParams['text.latex.preamble'] = [\n", - " r'\\usepackage{siunitx}', # i need upright \\micro symbols, but you need...\n", - " r'\\sisetup{detect-all}', # ...this to force siunitx to actually use your fonts\n", - " r'\\usepackage{helvet}', # set the normal font here\n", - " r'\\usepackage{amsmath}',\n", - " r'\\usepackage{sansmath}', # load up the sansmath so that math -> helvet\n", - " r'\\sansmath', # <- tricky! -- gotta actually tell tex to use!\n", - "] " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n", - " print('Importing {}'.format(csv))\n", - " return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n", - "\n", - "df_waves = df_from_csv('waves.csv', index_col=[0, 1])\n", - "df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n", - "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n", - "df_sites = df_from_csv('sites.csv', index_col=[0])\n", - "df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0,1])\n", - "\n", - "# Note that the forecasted data sets should be in the same order for impacts and twls\n", - "impacts = {\n", - " 'forecasted': {\n", - " 'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),\n", - " 'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),\n", - " },\n", - " 'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n", - " }\n", - "\n", - "\n", - "twls = {\n", - " 'forecasted': {\n", - " 'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),\n", - " 'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),\n", - " }\n", - "}\n", - "print('Done!')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Generate plot for each beach" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "beach = 'NARRA'\n", - "\n", - "# Get the dataframe\n", - "df = impacts['forecasted']['mean_slope_sto06']\n", - "df = df.rename(columns={'storm_regime': 'forecasted_regime'})\n", - "\n", - "df_beach = df.loc[df.index.str.contains(beach)]\n", - "\n", - "# Add information about hydrodynamics at max(R_high) time\n", - "df_beach = df_beach.merge(\n", - " twls['forecasted']['mean_slope_sto06'].drop(columns=['R_high', 'R_low']),\n", - " left_on=['site_id', 'datetime'],\n", - " right_on=['site_id', 'datetime'])\n", - "\n", - "# Add information about observed impacts\n", - "obs_impacts = impacts['observed'].rename(columns={\n", - " 'storm_regime': 'observed_regime'\n", - "}).observed_regime.to_frame()\n", - "df_beach = df_beach.merge(obs_impacts, left_on='site_id', right_on='site_id')\n", - "\n", - "# Convert storm regimes to categorical datatype\n", - "cat_type = CategoricalDtype(\n", - " categories=['swash', 'collision', 'overwash', 'inundation'], ordered=True)\n", - "df_beach.forecasted_regime = df_beach.forecasted_regime.astype(cat_type)\n", - "df_beach.observed_regime = df_beach.observed_regime.astype(cat_type)\n", - "\n", - "# Get index\n", - "n = [x for x in range(len(df_beach))][::-1]\n", - "n_sites = [x for x in df_beach.index][::-1]\n", - "\n", - "f, (ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8) = plt.subplots(\n", - " 1,\n", - " 8,\n", - " sharey=True,\n", - " figsize=(14, 8),\n", - " gridspec_kw={'width_ratios': [4, 4, 2, 2, 2, 2,2,2]})\n", - "\n", - "# Specify colors for storm regimes\n", - "cmap = {\n", - " 'swash': '#1a9850',\n", - " 'collision': '#fee08b',\n", - " 'overwash': '#d73027'\n", - "}\n", - "\n", - "colors = [cmap.get(x) for x in df_beach.observed_regime]\n", - "colors = ['#d73027' if c is None else c for c in colors]\n", - "\n", - "# Plot forecasted and observed storm regime\n", - "ax1.scatter(\n", - " df_beach.observed_regime.cat.codes.replace(-1,np.NaN),\n", - " n,\n", - " color=colors,\n", - " marker='o',\n", - " label='Observed regime')\n", - "\n", - "ax1.scatter(\n", - " df_beach.forecasted_regime.cat.codes.replace(-1,np.NaN),\n", - " n,\n", - " color='b',\n", - " marker='o',\n", - " edgecolors='black',\n", - " facecolors='none',\n", - " label='Forecasted regime')\n", - "\n", - "ax1.set_title('Storm\\nregime')\n", - "ax1.set_xticks([0,1,2,3])\n", - "ax1.set_xticklabels(['swash','collision','overwash','inundation'])\n", - "ax1.tick_params(axis='x', rotation=45)\n", - "ax1.legend(loc='center', bbox_to_anchor=(0.5, -0.15))\n", - "\n", - "# Replace yticks with site_ids\n", - "yticks = ax1.get_yticks().tolist()\n", - "yticks = [n_sites[int(y)] if 0 <= y <= len(n_sites) else y for y in yticks ]\n", - "ax1.set_yticklabels(yticks)\n", - "\n", - "# Water levels\n", - "ax2.plot(df_beach.R_high, n, color='#2c7bb6')\n", - "ax2.plot(df_beach.R_low, n, color='#2c7bb6')\n", - "ax2.fill_betweenx(\n", - " n, df_beach.R_low, df_beach.R_high, alpha=0.2, color='#2c7bb6', label='$R_{low}$ to $R_{high}$')\n", - "\n", - "# Dune elevations\n", - "ax2.plot(df_beach.dune_crest_z, n, color='#fdae61')\n", - "ax2.plot(df_beach.dune_toe_z, n, color='#fdae61')\n", - "ax2.fill_betweenx(\n", - " n, df_beach.dune_toe_z, df_beach.dune_crest_z, alpha=0.2, color='#fdae61', label='$D_{low}$ to $D_{high}$')\n", - "\n", - "ax2.set_title('TWL \\& Dune\\nElevations')\n", - "ax2.legend(loc='center',bbox_to_anchor=(0.5,-0.15))\n", - "ax2.set_xlabel('Elevation (m AHD)')\n", - "\n", - "# Plot R_high - D_low\n", - "ax3.plot(df_beach.R_high - df_beach.dune_toe_z,n,color='#999999')\n", - "ax3.axvline(x=0,color='black',linestyle=':')\n", - "ax3.set_title('$R_{high}$ - $D_{low}$')\n", - "ax3.set_xlabel('Height (m)')\n", - "ax3.set_xlim([-2,2])\n", - "\n", - "# Wave height, wave period, beach slope\n", - "ax4.plot(df_beach.Hs0, n,color='#377eb8')\n", - "ax4.set_title('$H_{s0}$')\n", - "ax4.set_xlabel('Sig. wave height (m)')\n", - "ax4.set_xlim([3,5])\n", - "\n", - "ax5.plot(df_beach.Tp, n,color='#e41a1c')\n", - "ax5.set_title('$T_{p}$')\n", - "ax5.set_xlabel('Peak wave period (s)')\n", - "ax5.set_xlim([8,14])\n", - "\n", - "ax6.plot(df_beach.tide, n,color='#a6cee3')\n", - "ax6.set_title('Tide')\n", - "ax6.set_xlabel('Elevation (m AHD)')\n", - "ax6.set_xlim([0,2])\n", - "\n", - "ax7.plot(df_beach.beta, n,color='#4daf4a')\n", - "ax7.set_title(r'$\\beta$')\n", - "ax7.set_xlabel('Mean prestorm\\nbeach slope')\n", - "ax7.set_xlim([0,0.15])\n", - "\n", - "ax8.plot(df_beach.R2, n,color='#6a3d9a')\n", - "ax8.set_title(r'$R_{2\\%}$')\n", - "ax8.set_xlabel('Height (m)')\n", - "\n", - "plt.tight_layout()\n", - "f.subplots_adjust(top=0.88)\n", - "f.suptitle(beach)\n", - "\n", - "\n", - "# Print to figure\n", - "plt.savefig('07-{}.png'.format(beach), dpi=600, bbox_inches='tight') \n", - "\n", - "plt.show()\n", - "plt.close()" - ] - } - ], - "metadata": { - "hide_input": false, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - }, - "varInspector": { - "cols": { - "lenName": 16, - "lenType": 16, - "lenVar": 40 - }, - "kernels_config": { - "python": { - "delete_cmd_postfix": "", - "delete_cmd_prefix": "del ", - "library": "var_list.py", - "varRefreshCmd": "print(var_dic_list())" - }, - "r": { - "delete_cmd_postfix": ") ", - "delete_cmd_prefix": "rm(", - "library": "var_list.r", - "varRefreshCmd": "cat(var_dic_list()) " - } - }, - "types_to_exclude": [ - "module", - "function", - "builtin_function_or_method", - "instance", - "_Feature" - ], - "window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/09_run_comparison.ipynb b/notebooks/09_superseded_run_comparison.ipynb similarity index 100% rename from notebooks/09_run_comparison.ipynb rename to notebooks/09_superseded_run_comparison.ipynb diff --git a/notebooks/10_profile_clustering.ipynb b/notebooks/10_profile_clustering.ipynb new file mode 100644 index 0000000..14e486f --- /dev/null +++ b/notebooks/10_profile_clustering.ipynb @@ -0,0 +1,186 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "\n", + "from dtaidistance import dtw\n", + "from dtaidistance import dtw_visualisation as dtwvis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import data\n", + "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n", + " print('Importing {}'.format(csv))\n", + " return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n", + "\n", + "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n", + "\n", + "print('Done!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use dtaidistance package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p1 = df_profiles.dropna(subset=['z']).xs(['AVOCAn0003','prestorm'],level=['site_id','profile_type']).z.values\n", + "p2 = df_profiles.dropna(subset=['z']).xs(['AVOCAn0004','prestorm'],level=['site_id','profile_type']).z.values\n", + "path = dtw.warping_path(p1,p2)\n", + "dtwvis.plot_warping(p1,p2,path)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use kshape package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "profiles = df_profiles.dropna(subset=['z'])\\\n", + " .xs(['prestorm'],level=['profile_type'])\\\n", + " .groupby('site_id').z\\\n", + " .apply(list).tolist()\n", + "\n", + "\n", + "# profiles = [x[-50:] for x in profiles]\n", + "# print(min(len(x) for x in profiles))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from kshape.core import kshape, zscore\n", + "\n", + "time_series = [[1,2,3,4], [0,1,2,3], [0,1,2,3], [1,2,2,3]]\n", + "cluster_num = 4\n", + "clusters = kshape(zscore(profiles, axis=1), cluster_num)\n", + "# print(clusters)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cluster_no = 0\n", + "\n", + "# Plot shape of all clusters\n", + "plt.figure(0)\n", + "for n,cluster in enumerate(clusters):\n", + " plt.plot(cluster[0],label=n)\n", + "plt.legend()\n", + "\n", + "plt.figure(1)\n", + "# Plot all profiles in partiuclar cluster\n", + "for profile_no in clusters[cluster_no][1]:\n", + " plt.plot(profiles[profile_no])\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "a = [1,2,3,4,5,6]\n", + "a[-1:]" + ] + } + ], + "metadata": { + "hide_input": false, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}