Update QGIS and notebooks

develop
Chris Leaman 6 years ago
parent ecd1d66dd2
commit 708606391f

@ -58,7 +58,7 @@
"from ipywidgets import widgets, Output\n", "from ipywidgets import widgets, Output\n",
"from IPython.display import display, clear_output, Image, HTML\n", "from IPython.display import display, clear_output, Image, HTML\n",
"from scipy import stats\n", "from scipy import stats\n",
"from sklearn.metrics import confusion_matrix\n", "from sklearn.metrics import confusion_matrix, matthews_corrcoef\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"from matplotlib.ticker import MultipleLocator\n", "from matplotlib.ticker import MultipleLocator\n",
"from matplotlib.lines import Line2D\n", "from matplotlib.lines import Line2D\n",
@ -116,6 +116,7 @@
"df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n", "df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n",
"df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n", "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
"df_sites = df_from_csv('sites.csv', index_col=[0])\n", "df_sites = df_from_csv('sites.csv', index_col=[0])\n",
"df_sites_waves = df_from_csv('sites_waves.csv', index_col=[0])\n",
"df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0,1])\n", "df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0,1])\n",
"\n", "\n",
"# Note that the forecasted data sets should be in the same order for impacts and twls\n", "# Note that the forecasted data sets should be in the same order for impacts and twls\n",
@ -204,12 +205,12 @@
" \n", " \n",
" # Determine the height of the figure, based on the number of sites.\n", " # Determine the height of the figure, based on the number of sites.\n",
" fig_height = max(6, 0.18 * len(n_sites))\n", " fig_height = max(6, 0.18 * len(n_sites))\n",
" f, (ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8) = plt.subplots(\n", " f, (ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8, ax9) = plt.subplots(\n",
" 1,\n", " 1,\n",
" 8,\n", " 9,\n",
" sharey=True,\n", " sharey=True,\n",
" figsize=(18, fig_height),\n", " figsize=(18, fig_height),\n",
" gridspec_kw={'width_ratios': [4, 4, 2, 2, 2, 2, 2, 2]})\n", " gridspec_kw={'width_ratios': [4, 4, 2, 2, 2, 2, 2, 2,2]})\n",
"\n", "\n",
" # ax1: Impacts\n", " # ax1: Impacts\n",
"\n", "\n",
@ -442,7 +443,7 @@
" ax6.plot(df_beach.Hs0, n, color='#999999')\n", " ax6.plot(df_beach.Hs0, n, color='#999999')\n",
" ax6.set_title('$H_{s0}$')\n", " ax6.set_title('$H_{s0}$')\n",
" ax6.set_xlabel('Sig. wave height (m)')\n", " ax6.set_xlabel('Sig. wave height (m)')\n",
" ax6.set_xlim([3, 5])\n", " ax6.set_xlim([2, 6])\n",
"\n", "\n",
" ax7.plot(df_beach.Tp, n, color='#999999')\n", " ax7.plot(df_beach.Tp, n, color='#999999')\n",
" ax7.set_title('$T_{p}$')\n", " ax7.set_title('$T_{p}$')\n",
@ -452,7 +453,11 @@
" ax8.plot(df_beach.tide, n, color='#999999')\n", " ax8.plot(df_beach.tide, n, color='#999999')\n",
" ax8.set_title('Tide \\& surge')\n", " ax8.set_title('Tide \\& surge')\n",
" ax8.set_xlabel('Elevation (m AHD)')\n", " ax8.set_xlabel('Elevation (m AHD)')\n",
" ax8.set_xlim([0, 2])\n", " ax8.set_xlim([1, 3])\n",
"\n",
" \n",
" # TODO Cumulative wave energy\n",
" # df_sites_waves\n",
" \n", " \n",
" plt.tight_layout()\n", " plt.tight_layout()\n",
" f.subplots_adjust(top=0.88)\n", " f.subplots_adjust(top=0.88)\n",
@ -472,12 +477,28 @@
" # # Print to figure\n", " # # Print to figure\n",
" plt.savefig('07_{}.png'.format(beach), dpi=600, bbox_inches='tight')\n", " plt.savefig('07_{}.png'.format(beach), dpi=600, bbox_inches='tight')\n",
"\n", "\n",
"# plt.show()\n", " plt.show()\n",
" plt.close()\n", " plt.close()\n",
" print('Done: {}'.format(beach))\n", " print('Done: {}'.format(beach))\n",
" \n",
" break\n",
"print('Done!')" "print('Done!')"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -523,6 +544,44 @@
" print()\n", " print()\n",
" " " "
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check matthews coefficient\n",
"# https://scikit-learn.org/stable/modules/generated/sklearn.metrics.matthews_corrcoef.html\n",
"\n",
"for model in impacts['forecasted']:\n",
" df_for = impacts['forecasted'][model]\n",
" df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n",
"\n",
" m = matthews_corrcoef(\n",
" df_obs.storm_regime.astype(cat_type).cat.codes.values,\n",
" df_for.storm_regime.astype(cat_type).cat.codes.values)\n",
" print('{}: {:.2f}'.format(model,m))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check accuracy\n",
"# https://scikit-learn.org/stable/modules/generated/sklearn.metrics.matthews_corrcoef.html\n",
"\n",
"for model in impacts['forecasted']:\n",
" df_for = impacts['forecasted'][model]\n",
" df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n",
"\n",
" m = sklearn.metrics.accuracy_score(\n",
" df_obs.storm_regime.astype(cat_type).cat.codes.values,\n",
" df_for.storm_regime.astype(cat_type).cat.codes.values)\n",
" print('{}: {:.2f}'.format(model,m))"
]
} }
], ],
"metadata": { "metadata": {

@ -63,7 +63,9 @@
"from matplotlib.lines import Line2D\n", "from matplotlib.lines import Line2D\n",
"from cycler import cycler\n", "from cycler import cycler\n",
"from scipy.interpolate import interp1d\n", "from scipy.interpolate import interp1d\n",
"from pandas.api.types import CategoricalDtype" "from pandas.api.types import CategoricalDtype\n",
"import seaborn as sns\n",
"sns.set(style=\"white\")"
] ]
}, },
{ {
@ -144,7 +146,8 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Sec1" "# Gather data into one dataframe\n",
"For plotting, gather all our data into one dataframe."
] ]
}, },
{ {
@ -153,9 +156,43 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Which forecasted impacts dataframe should we use to assess prediction performance?\n",
"df_selected_forecast = impacts['forecasted']['postintertidal_slope_sto06']\n",
"\n",
"# Create df with all our data\n", "# Create df with all our data\n",
"df = impacts['observed'].merge(df_sites_waves,left_index=True,right_index=True)\n", "df = impacts['observed'].merge(\n",
"df.columns" " df_sites_waves, left_index=True, right_index=True)\n",
"\n",
"# Join observed/forecasted regimes\n",
"df_forecasted = df_selected_forecast.rename(\n",
" {'storm_regime': 'forecasted_regime'\n",
" }, axis='columns').forecasted_regime\n",
"df = pd.concat([df, df_forecasted], axis=1)\n",
"\n",
"# Create new accuracy column which categorises each prediction\n",
"df.loc[(df.storm_regime == 'swash') & (df.forecasted_regime == 'swash'), 'accuracy'] = 'correct swash'\n",
"df.loc[(df.storm_regime == 'collision') & (df.forecasted_regime == 'collision'), 'accuracy'] = 'correct collision'\n",
"df.loc[(df.storm_regime == 'swash') & (df.forecasted_regime == 'collision'), 'accuracy'] = 'overpredicted swash'\n",
"df.loc[(df.storm_regime == 'collision') & (df.forecasted_regime == 'swash'), 'accuracy'] = 'underpredicted collision'\n",
"\n",
"print('df columns:\\n===')\n",
"for col in sorted(df.columns):\n",
" print(col)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Create plots"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Variable pairplot, by observed storm impact\n",
"Create pairplot of selected variables and look for relationships between each. Colors represent the different observed storm impact regimes."
] ]
}, },
{ {
@ -164,8 +201,6 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import seaborn as sns\n",
"sns.set(style=\"white\")\n",
"g = sns.pairplot(\n", "g = sns.pairplot(\n",
" data=df,\n", " data=df,\n",
" hue='storm_regime',\n", " hue='storm_regime',\n",
@ -175,13 +210,23 @@
" 'collision': 'orange',\n", " 'collision': 'orange',\n",
" 'overwash': 'red'\n", " 'overwash': 'red'\n",
" },\n", " },\n",
" plot_kws=dict(s=20, edgecolor=\"white\", linewidth=0.1, alpha=0.1),\n",
" vars=['beta_prestorm_mean',\n", " vars=['beta_prestorm_mean',\n",
" 'beta_poststorm_mean',\n", " 'beta_poststorm_mean',\n",
" 'beta_diff_mean',\n", " 'beta_diff_mean',\n",
" 'swash_pct_change',\n", " 'swash_pct_change',\n",
" 'df_width_msl_change_m',\n", " 'width_msl_change_m',\n",
" 'df_width_msl_change_pct',\n", " 'width_msl_change_pct',\n",
" 'Exscum'])" " 'Exscum'])\n",
"g.savefig('11_pairplot_observed_impacts.png')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Variable pairplot, by observed/prediction class\n",
"Create pairplot of selected variables and look for relationships between each. Colors represent the different observed/prediction classes."
] ]
}, },
{ {
@ -190,8 +235,142 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"g.savefig('11_pairplot.png')" "g = sns.pairplot(\n",
" data=df,\n",
" hue='accuracy',\n",
" dropna=True,\n",
" palette={\n",
" 'correct swash': 'blue',\n",
" 'correct collision': 'green',\n",
" 'overpredicted swash': 'orange',\n",
" 'underpredicted collision': 'red',\n",
" },\n",
" plot_kws=dict(s=20, edgecolor=\"white\", linewidth=0.1, alpha=0.1),\n",
" vars=['beta_prestorm_mean',\n",
" 'beta_poststorm_mean',\n",
" 'beta_diff_mean',\n",
" 'swash_pct_change',\n",
" 'width_msl_change_m',\n",
" 'width_msl_change_pct',\n",
" 'Exscum'])\n",
"g.savefig('11_pairplot_accuracy_classes.png')\n"
] ]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Pre/post storm slope by observed/predicted class"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# First create a melted dataframe since our coulmn's aren't exactly as they should be for plotting\n",
"df_temp = df.copy()\n",
"df_temp = df_temp.reset_index()\n",
"\n",
"df_melt = pd.melt(\n",
" df_temp,\n",
" id_vars=['site_id', 'accuracy'],\n",
" value_vars=['beta_prestorm_mean', 'beta_poststorm_mean'],\n",
" var_name='profile_type',\n",
" value_name='beta_mean')\n",
"\n",
"df_melt.loc[df_melt.profile_type == 'beta_prestorm_mean','profile_type'] = 'prestorm'\n",
"df_melt.loc[df_melt.profile_type == 'beta_poststorm_mean','profile_type'] = 'poststorm'\n",
"df_melt.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"f, ax = plt.subplots(figsize=(6,5))\n",
"\n",
"cats = ['correct swash', 'overpredicted swash','underpredicted collision','correct collision']\n",
"\n",
"# Plot the orbital period with horizontal boxes\n",
"sns.boxplot(\n",
" data=df_melt,\n",
" x=\"accuracy\",\n",
" y=\"beta_mean\",\n",
" hue=\"profile_type\",\n",
" order=cats\n",
")\n",
"\n",
"group_labels = [x.replace(' ','\\n') for x in cats]\n",
"ax.set_xticklabels(group_labels)\n",
"\n",
"# Setup ticks and grid\n",
"ax.xaxis.grid(True)\n",
"major_ticks = np.arange(-1, 1, 0.05)\n",
"minor_ticks = np.arange(-1, 1, 0.01)\n",
"ax.set_yticks(major_ticks)\n",
"ax.set_yticks(minor_ticks, minor=True)\n",
"ax.grid(which='both')\n",
"ax.grid(which='minor', alpha=0.3,linestyle='--')\n",
"ax.grid(which='major', alpha=0.8,linestyle='-')\n",
"\n",
"ax.set_ylim([-0.02,0.3])\n",
"\n",
"f.savefig('11_prepost_slopes_accuracy_classes.png',dpi=600)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Change in slope by observed/predicted class"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"f, ax = plt.subplots(figsize=(6,5))\n",
"\n",
"cats = ['correct swash', 'overpredicted swash','underpredicted collision','correct collision']\n",
"\n",
"# Plot the orbital period with horizontal boxes\n",
"sns.boxplot(\n",
" data=df,\n",
" x=\"accuracy\",\n",
" y=\"beta_diff_mean\",\n",
" order=cats\n",
")\n",
"\n",
"group_labels = [x.replace(' ','\\n') for x in cats]\n",
"ax.set_xticklabels(group_labels)\n",
"\n",
"# Setup ticks and grid\n",
"ax.xaxis.grid(True)\n",
"major_ticks = np.arange(-1, 1, 0.05)\n",
"minor_ticks = np.arange(-1, 1, 0.01)\n",
"ax.set_yticks(major_ticks)\n",
"ax.set_yticks(minor_ticks, minor=True)\n",
"ax.grid(which='both')\n",
"ax.grid(which='minor', alpha=0.3,linestyle='--')\n",
"ax.grid(which='major', alpha=0.8,linestyle='-')\n",
"\n",
"ax.set_ylim([-0.2,0.2])\n",
"\n",
"f.savefig('11_change_in_slopes_accuracy_classes.png',dpi=600)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {
@ -222,9 +401,14 @@
"title_cell": "Table of Contents", "title_cell": "Table of Contents",
"title_sidebar": "Contents", "title_sidebar": "Contents",
"toc_cell": false, "toc_cell": false,
"toc_position": {}, "toc_position": {
"height": "calc(100% - 180px)",
"left": "10px",
"top": "150px",
"width": "223.594px"
},
"toc_section_display": true, "toc_section_display": true,
"toc_window_display": false "toc_window_display": true
}, },
"varInspector": { "varInspector": {
"cols": { "cols": {

Binary file not shown.
Loading…
Cancel
Save