diff --git a/notebooks/07_evaluate_model_performance.ipynb b/notebooks/07_evaluate_model_performance.ipynb index d88c6d8..649abcb 100644 --- a/notebooks/07_evaluate_model_performance.ipynb +++ b/notebooks/07_evaluate_model_performance.ipynb @@ -58,7 +58,7 @@ "from ipywidgets import widgets, Output\n", "from IPython.display import display, clear_output, Image, HTML\n", "from scipy import stats\n", - "from sklearn.metrics import confusion_matrix\n", + "from sklearn.metrics import confusion_matrix, matthews_corrcoef\n", "import matplotlib.pyplot as plt\n", "from matplotlib.ticker import MultipleLocator\n", "from matplotlib.lines import Line2D\n", @@ -116,6 +116,7 @@ "df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n", "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n", "df_sites = df_from_csv('sites.csv', index_col=[0])\n", + "df_sites_waves = df_from_csv('sites_waves.csv', index_col=[0])\n", "df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0,1])\n", "\n", "# Note that the forecasted data sets should be in the same order for impacts and twls\n", @@ -204,12 +205,12 @@ " \n", " # Determine the height of the figure, based on the number of sites.\n", " fig_height = max(6, 0.18 * len(n_sites))\n", - " f, (ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8) = plt.subplots(\n", + " f, (ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8, ax9) = plt.subplots(\n", " 1,\n", - " 8,\n", + " 9,\n", " sharey=True,\n", " figsize=(18, fig_height),\n", - " gridspec_kw={'width_ratios': [4, 4, 2, 2, 2, 2, 2, 2]})\n", + " gridspec_kw={'width_ratios': [4, 4, 2, 2, 2, 2, 2, 2,2]})\n", "\n", " # ax1: Impacts\n", "\n", @@ -442,7 +443,7 @@ " ax6.plot(df_beach.Hs0, n, color='#999999')\n", " ax6.set_title('$H_{s0}$')\n", " ax6.set_xlabel('Sig. wave height (m)')\n", - " ax6.set_xlim([3, 5])\n", + " ax6.set_xlim([2, 6])\n", "\n", " ax7.plot(df_beach.Tp, n, color='#999999')\n", " ax7.set_title('$T_{p}$')\n", @@ -452,8 +453,12 @@ " ax8.plot(df_beach.tide, n, color='#999999')\n", " ax8.set_title('Tide \\& surge')\n", " ax8.set_xlabel('Elevation (m AHD)')\n", - " ax8.set_xlim([0, 2])\n", + " ax8.set_xlim([1, 3])\n", "\n", + " \n", + " # TODO Cumulative wave energy\n", + " # df_sites_waves\n", + " \n", " plt.tight_layout()\n", " f.subplots_adjust(top=0.88)\n", " f.suptitle(beach.replace('_', '\\_'))\n", @@ -472,12 +477,28 @@ " # # Print to figure\n", " plt.savefig('07_{}.png'.format(beach), dpi=600, bbox_inches='tight')\n", "\n", - "# plt.show()\n", + " plt.show()\n", " plt.close()\n", " print('Done: {}'.format(beach))\n", + " \n", + " break\n", "print('Done!')" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -523,6 +544,44 @@ " print()\n", " " ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check matthews coefficient\n", + "# https://scikit-learn.org/stable/modules/generated/sklearn.metrics.matthews_corrcoef.html\n", + "\n", + "for model in impacts['forecasted']:\n", + " df_for = impacts['forecasted'][model]\n", + " df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n", + "\n", + " m = matthews_corrcoef(\n", + " df_obs.storm_regime.astype(cat_type).cat.codes.values,\n", + " df_for.storm_regime.astype(cat_type).cat.codes.values)\n", + " print('{}: {:.2f}'.format(model,m))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check accuracy\n", + "# https://scikit-learn.org/stable/modules/generated/sklearn.metrics.matthews_corrcoef.html\n", + "\n", + "for model in impacts['forecasted']:\n", + " df_for = impacts['forecasted'][model]\n", + " df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n", + "\n", + " m = sklearn.metrics.accuracy_score(\n", + " df_obs.storm_regime.astype(cat_type).cat.codes.values,\n", + " df_for.storm_regime.astype(cat_type).cat.codes.values)\n", + " print('{}: {:.2f}'.format(model,m))" + ] } ], "metadata": { diff --git a/notebooks/11_investigate.ipynb b/notebooks/11_investigate.ipynb index 70b620f..56c6ac9 100644 --- a/notebooks/11_investigate.ipynb +++ b/notebooks/11_investigate.ipynb @@ -63,7 +63,9 @@ "from matplotlib.lines import Line2D\n", "from cycler import cycler\n", "from scipy.interpolate import interp1d\n", - "from pandas.api.types import CategoricalDtype" + "from pandas.api.types import CategoricalDtype\n", + "import seaborn as sns\n", + "sns.set(style=\"white\")" ] }, { @@ -144,7 +146,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Sec1" + "# Gather data into one dataframe\n", + "For plotting, gather all our data into one dataframe." ] }, { @@ -153,9 +156,43 @@ "metadata": {}, "outputs": [], "source": [ + "# Which forecasted impacts dataframe should we use to assess prediction performance?\n", + "df_selected_forecast = impacts['forecasted']['postintertidal_slope_sto06']\n", + "\n", "# Create df with all our data\n", - "df = impacts['observed'].merge(df_sites_waves,left_index=True,right_index=True)\n", - "df.columns" + "df = impacts['observed'].merge(\n", + " df_sites_waves, left_index=True, right_index=True)\n", + "\n", + "# Join observed/forecasted regimes\n", + "df_forecasted = df_selected_forecast.rename(\n", + " {'storm_regime': 'forecasted_regime'\n", + " }, axis='columns').forecasted_regime\n", + "df = pd.concat([df, df_forecasted], axis=1)\n", + "\n", + "# Create new accuracy column which categorises each prediction\n", + "df.loc[(df.storm_regime == 'swash') & (df.forecasted_regime == 'swash'), 'accuracy'] = 'correct swash'\n", + "df.loc[(df.storm_regime == 'collision') & (df.forecasted_regime == 'collision'), 'accuracy'] = 'correct collision'\n", + "df.loc[(df.storm_regime == 'swash') & (df.forecasted_regime == 'collision'), 'accuracy'] = 'overpredicted swash'\n", + "df.loc[(df.storm_regime == 'collision') & (df.forecasted_regime == 'swash'), 'accuracy'] = 'underpredicted collision'\n", + "\n", + "print('df columns:\\n===')\n", + "for col in sorted(df.columns):\n", + " print(col)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create plots" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Variable pairplot, by observed storm impact\n", + "Create pairplot of selected variables and look for relationships between each. Colors represent the different observed storm impact regimes." ] }, { @@ -164,8 +201,6 @@ "metadata": {}, "outputs": [], "source": [ - "import seaborn as sns\n", - "sns.set(style=\"white\")\n", "g = sns.pairplot(\n", " data=df,\n", " hue='storm_regime',\n", @@ -175,13 +210,23 @@ " 'collision': 'orange',\n", " 'overwash': 'red'\n", " },\n", + " plot_kws=dict(s=20, edgecolor=\"white\", linewidth=0.1, alpha=0.1),\n", " vars=['beta_prestorm_mean',\n", " 'beta_poststorm_mean',\n", " 'beta_diff_mean',\n", " 'swash_pct_change',\n", - " 'df_width_msl_change_m',\n", - " 'df_width_msl_change_pct',\n", - " 'Exscum'])" + " 'width_msl_change_m',\n", + " 'width_msl_change_pct',\n", + " 'Exscum'])\n", + "g.savefig('11_pairplot_observed_impacts.png')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Variable pairplot, by observed/prediction class\n", + "Create pairplot of selected variables and look for relationships between each. Colors represent the different observed/prediction classes." ] }, { @@ -190,8 +235,142 @@ "metadata": {}, "outputs": [], "source": [ - "g.savefig('11_pairplot.png')" + "g = sns.pairplot(\n", + " data=df,\n", + " hue='accuracy',\n", + " dropna=True,\n", + " palette={\n", + " 'correct swash': 'blue',\n", + " 'correct collision': 'green',\n", + " 'overpredicted swash': 'orange',\n", + " 'underpredicted collision': 'red',\n", + " },\n", + " plot_kws=dict(s=20, edgecolor=\"white\", linewidth=0.1, alpha=0.1),\n", + " vars=['beta_prestorm_mean',\n", + " 'beta_poststorm_mean',\n", + " 'beta_diff_mean',\n", + " 'swash_pct_change',\n", + " 'width_msl_change_m',\n", + " 'width_msl_change_pct',\n", + " 'Exscum'])\n", + "g.savefig('11_pairplot_accuracy_classes.png')\n" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pre/post storm slope by observed/predicted class" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# First create a melted dataframe since our coulmn's aren't exactly as they should be for plotting\n", + "df_temp = df.copy()\n", + "df_temp = df_temp.reset_index()\n", + "\n", + "df_melt = pd.melt(\n", + " df_temp,\n", + " id_vars=['site_id', 'accuracy'],\n", + " value_vars=['beta_prestorm_mean', 'beta_poststorm_mean'],\n", + " var_name='profile_type',\n", + " value_name='beta_mean')\n", + "\n", + "df_melt.loc[df_melt.profile_type == 'beta_prestorm_mean','profile_type'] = 'prestorm'\n", + "df_melt.loc[df_melt.profile_type == 'beta_poststorm_mean','profile_type'] = 'poststorm'\n", + "df_melt.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "f, ax = plt.subplots(figsize=(6,5))\n", + "\n", + "cats = ['correct swash', 'overpredicted swash','underpredicted collision','correct collision']\n", + "\n", + "# Plot the orbital period with horizontal boxes\n", + "sns.boxplot(\n", + " data=df_melt,\n", + " x=\"accuracy\",\n", + " y=\"beta_mean\",\n", + " hue=\"profile_type\",\n", + " order=cats\n", + ")\n", + "\n", + "group_labels = [x.replace(' ','\\n') for x in cats]\n", + "ax.set_xticklabels(group_labels)\n", + "\n", + "# Setup ticks and grid\n", + "ax.xaxis.grid(True)\n", + "major_ticks = np.arange(-1, 1, 0.05)\n", + "minor_ticks = np.arange(-1, 1, 0.01)\n", + "ax.set_yticks(major_ticks)\n", + "ax.set_yticks(minor_ticks, minor=True)\n", + "ax.grid(which='both')\n", + "ax.grid(which='minor', alpha=0.3,linestyle='--')\n", + "ax.grid(which='major', alpha=0.8,linestyle='-')\n", + "\n", + "ax.set_ylim([-0.02,0.3])\n", + "\n", + "f.savefig('11_prepost_slopes_accuracy_classes.png',dpi=600)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Change in slope by observed/predicted class" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "f, ax = plt.subplots(figsize=(6,5))\n", + "\n", + "cats = ['correct swash', 'overpredicted swash','underpredicted collision','correct collision']\n", + "\n", + "# Plot the orbital period with horizontal boxes\n", + "sns.boxplot(\n", + " data=df,\n", + " x=\"accuracy\",\n", + " y=\"beta_diff_mean\",\n", + " order=cats\n", + ")\n", + "\n", + "group_labels = [x.replace(' ','\\n') for x in cats]\n", + "ax.set_xticklabels(group_labels)\n", + "\n", + "# Setup ticks and grid\n", + "ax.xaxis.grid(True)\n", + "major_ticks = np.arange(-1, 1, 0.05)\n", + "minor_ticks = np.arange(-1, 1, 0.01)\n", + "ax.set_yticks(major_ticks)\n", + "ax.set_yticks(minor_ticks, minor=True)\n", + "ax.grid(which='both')\n", + "ax.grid(which='minor', alpha=0.3,linestyle='--')\n", + "ax.grid(which='major', alpha=0.8,linestyle='-')\n", + "\n", + "ax.set_ylim([-0.2,0.2])\n", + "\n", + "f.savefig('11_change_in_slopes_accuracy_classes.png',dpi=600)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -222,9 +401,14 @@ "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, - "toc_position": {}, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "223.594px" + }, "toc_section_display": true, - "toc_window_display": false + "toc_window_display": true }, "varInspector": { "cols": { diff --git a/notebooks/qgis.qgz b/notebooks/qgis.qgz index 529bb77..c8cb214 100644 Binary files a/notebooks/qgis.qgz and b/notebooks/qgis.qgz differ