You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
446 lines
14 KiB
Plaintext
446 lines
14 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Investigate "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Setup notebook\n",
|
|
"Import our required packages and set default plotting options."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Enable autoreloading of our modules. \n",
|
|
"# Most of the code will be located in the /src/ folder, \n",
|
|
"# and then called from the notebook.\n",
|
|
"%matplotlib inline\n",
|
|
"%reload_ext autoreload\n",
|
|
"%autoreload"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from IPython.core.debugger import set_trace\n",
|
|
"\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import os\n",
|
|
"import decimal\n",
|
|
"import plotly\n",
|
|
"import plotly.graph_objs as go\n",
|
|
"import plotly.plotly as py\n",
|
|
"import plotly.tools as tls\n",
|
|
"import plotly.figure_factory as ff\n",
|
|
"from plotly import tools\n",
|
|
"import plotly.io as pio\n",
|
|
"from scipy import stats\n",
|
|
"import math\n",
|
|
"import matplotlib\n",
|
|
"from matplotlib import cm\n",
|
|
"import colorlover as cl\n",
|
|
"from tqdm import tqdm_notebook\n",
|
|
"from ipywidgets import widgets, Output\n",
|
|
"from IPython.display import display, clear_output, Image, HTML\n",
|
|
"from scipy import stats\n",
|
|
"from sklearn.metrics import confusion_matrix\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"from matplotlib.ticker import MultipleLocator\n",
|
|
"from matplotlib.lines import Line2D\n",
|
|
"from cycler import cycler\n",
|
|
"from scipy.interpolate import interp1d\n",
|
|
"from pandas.api.types import CategoricalDtype\n",
|
|
"import seaborn as sns\n",
|
|
"sns.set(style=\"white\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Matplot lib default settings\n",
|
|
"plt.rcParams[\"figure.figsize\"] = (10,6)\n",
|
|
"plt.rcParams['axes.grid']=True\n",
|
|
"plt.rcParams['grid.alpha'] = 0.5\n",
|
|
"plt.rcParams['grid.color'] = \"grey\"\n",
|
|
"plt.rcParams['grid.linestyle'] = \"--\"\n",
|
|
"plt.rcParams['axes.grid']=True\n",
|
|
"\n",
|
|
"# https://stackoverflow.com/a/20709149\n",
|
|
"# matplotlib.rcParams['text.usetex'] = True\n",
|
|
"\n",
|
|
"matplotlib.rcParams['text.latex.preamble'] = [\n",
|
|
" r'\\usepackage{siunitx}', # i need upright \\micro symbols, but you need...\n",
|
|
" r'\\sisetup{detect-all}', # ...this to force siunitx to actually use your fonts\n",
|
|
" r'\\usepackage{helvet}', # set the normal font here\n",
|
|
" r'\\usepackage{amsmath}',\n",
|
|
" r'\\usepackage{sansmath}', # load up the sansmath so that math -> helvet\n",
|
|
" r'\\sansmath', # <- tricky! -- gotta actually tell tex to use!\n",
|
|
"] "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Import data\n",
|
|
"Import our data from the `./data/interim/` folder and load it into pandas dataframes. "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
|
|
" print('Importing {}'.format(csv))\n",
|
|
" return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
|
|
"\n",
|
|
"df_waves = df_from_csv('waves.csv', index_col=[0, 1])\n",
|
|
"df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n",
|
|
"df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
|
|
"df_sites = df_from_csv('sites.csv', index_col=[0])\n",
|
|
"df_sites_waves = df_from_csv('sites_waves.csv', index_col=[0])\n",
|
|
"df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0,1])\n",
|
|
"\n",
|
|
"# Note that the forecasted data sets should be in the same order for impacts and twls\n",
|
|
"impacts = {\n",
|
|
" 'forecasted': {\n",
|
|
" 'postintertidal_slope_sto06': df_from_csv('impacts_forecasted_postintertidal_slope_sto06.csv', index_col=[0]),\n",
|
|
" 'postmean_slope_sto06': df_from_csv('impacts_forecasted_postmean_slope_sto06.csv', index_col=[0]),\n",
|
|
" 'preintertidal_slope_sto06': df_from_csv('impacts_forecasted_preintertidal_slope_sto06.csv', index_col=[0]),\n",
|
|
" 'premean_slope_sto06': df_from_csv('impacts_forecasted_premean_slope_sto06.csv', index_col=[0]),\n",
|
|
" },\n",
|
|
" 'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n",
|
|
" }\n",
|
|
"\n",
|
|
"twls = {\n",
|
|
" 'forecasted': {\n",
|
|
" 'postintertidal_slope_sto06': df_from_csv('twl_postintertidal_slope_sto06.csv', index_col=[0,1]),\n",
|
|
" 'postmean_slope_sto06': df_from_csv('twl_postmean_slope_sto06.csv', index_col=[0,1]),\n",
|
|
" 'preintertidal_slope_sto06': df_from_csv('twl_preintertidal_slope_sto06.csv', index_col=[0,1]),\n",
|
|
" 'premean_slope_sto06': df_from_csv('twl_premean_slope_sto06.csv', index_col=[0,1]),\n",
|
|
" }\n",
|
|
"}\n",
|
|
"print('Done!')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Gather data into one dataframe\n",
|
|
"For plotting, gather all our data into one dataframe."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Which forecasted impacts dataframe should we use to assess prediction performance?\n",
|
|
"df_selected_forecast = impacts['forecasted']['postintertidal_slope_sto06']\n",
|
|
"\n",
|
|
"# Create df with all our data\n",
|
|
"df = impacts['observed'].merge(\n",
|
|
" df_sites_waves, left_index=True, right_index=True)\n",
|
|
"\n",
|
|
"# Join observed/forecasted regimes\n",
|
|
"df_forecasted = df_selected_forecast.rename(\n",
|
|
" {'storm_regime': 'forecasted_regime'\n",
|
|
" }, axis='columns').forecasted_regime\n",
|
|
"df = pd.concat([df, df_forecasted], axis=1)\n",
|
|
"\n",
|
|
"# Create new accuracy column which categorises each prediction\n",
|
|
"df.loc[(df.storm_regime == 'swash') & (df.forecasted_regime == 'swash'), 'accuracy'] = 'correct swash'\n",
|
|
"df.loc[(df.storm_regime == 'collision') & (df.forecasted_regime == 'collision'), 'accuracy'] = 'correct collision'\n",
|
|
"df.loc[(df.storm_regime == 'swash') & (df.forecasted_regime == 'collision'), 'accuracy'] = 'overpredicted swash'\n",
|
|
"df.loc[(df.storm_regime == 'collision') & (df.forecasted_regime == 'swash'), 'accuracy'] = 'underpredicted collision'\n",
|
|
"\n",
|
|
"print('df columns:\\n===')\n",
|
|
"for col in sorted(df.columns):\n",
|
|
" print(col)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Create plots"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Variable pairplot, by observed storm impact\n",
|
|
"Create pairplot of selected variables and look for relationships between each. Colors represent the different observed storm impact regimes."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"g = sns.pairplot(\n",
|
|
" data=df,\n",
|
|
" hue='storm_regime',\n",
|
|
" dropna=True,\n",
|
|
" palette={\n",
|
|
" 'swash': 'blue',\n",
|
|
" 'collision': 'orange',\n",
|
|
" 'overwash': 'red'\n",
|
|
" },\n",
|
|
" plot_kws=dict(s=20, edgecolor=\"white\", linewidth=0.1, alpha=0.1),\n",
|
|
" vars=['beta_prestorm_mean',\n",
|
|
" 'beta_poststorm_mean',\n",
|
|
" 'beta_diff_mean',\n",
|
|
" 'swash_pct_change',\n",
|
|
" 'width_msl_change_m',\n",
|
|
" 'width_msl_change_pct',\n",
|
|
" 'Exscum'])\n",
|
|
"g.savefig('11_pairplot_observed_impacts.png')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Variable pairplot, by observed/prediction class\n",
|
|
"Create pairplot of selected variables and look for relationships between each. Colors represent the different observed/prediction classes."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"g = sns.pairplot(\n",
|
|
" data=df,\n",
|
|
" hue='accuracy',\n",
|
|
" dropna=True,\n",
|
|
" palette={\n",
|
|
" 'correct swash': 'blue',\n",
|
|
" 'correct collision': 'green',\n",
|
|
" 'overpredicted swash': 'orange',\n",
|
|
" 'underpredicted collision': 'red',\n",
|
|
" },\n",
|
|
" plot_kws=dict(s=20, edgecolor=\"white\", linewidth=0.1, alpha=0.1),\n",
|
|
" vars=['beta_prestorm_mean',\n",
|
|
" 'beta_poststorm_mean',\n",
|
|
" 'beta_diff_mean',\n",
|
|
" 'swash_pct_change',\n",
|
|
" 'width_msl_change_m',\n",
|
|
" 'width_msl_change_pct',\n",
|
|
" 'Exscum'])\n",
|
|
"g.savefig('11_pairplot_accuracy_classes.png')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Pre/post storm slope by observed/predicted class"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# First create a melted dataframe since our coulmn's aren't exactly as they should be for plotting\n",
|
|
"df_temp = df.copy()\n",
|
|
"df_temp = df_temp.reset_index()\n",
|
|
"\n",
|
|
"df_melt = pd.melt(\n",
|
|
" df_temp,\n",
|
|
" id_vars=['site_id', 'accuracy'],\n",
|
|
" value_vars=['beta_prestorm_mean', 'beta_poststorm_mean'],\n",
|
|
" var_name='profile_type',\n",
|
|
" value_name='beta_mean')\n",
|
|
"\n",
|
|
"df_melt.loc[df_melt.profile_type == 'beta_prestorm_mean','profile_type'] = 'prestorm'\n",
|
|
"df_melt.loc[df_melt.profile_type == 'beta_poststorm_mean','profile_type'] = 'poststorm'\n",
|
|
"df_melt.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"f, ax = plt.subplots(figsize=(6,5))\n",
|
|
"\n",
|
|
"cats = ['correct swash', 'overpredicted swash','underpredicted collision','correct collision']\n",
|
|
"\n",
|
|
"# Plot the orbital period with horizontal boxes\n",
|
|
"sns.boxplot(\n",
|
|
" data=df_melt,\n",
|
|
" x=\"accuracy\",\n",
|
|
" y=\"beta_mean\",\n",
|
|
" hue=\"profile_type\",\n",
|
|
" order=cats\n",
|
|
")\n",
|
|
"\n",
|
|
"group_labels = [x.replace(' ','\\n') for x in cats]\n",
|
|
"ax.set_xticklabels(group_labels)\n",
|
|
"\n",
|
|
"# Setup ticks and grid\n",
|
|
"ax.xaxis.grid(True)\n",
|
|
"major_ticks = np.arange(-1, 1, 0.05)\n",
|
|
"minor_ticks = np.arange(-1, 1, 0.01)\n",
|
|
"ax.set_yticks(major_ticks)\n",
|
|
"ax.set_yticks(minor_ticks, minor=True)\n",
|
|
"ax.grid(which='both')\n",
|
|
"ax.grid(which='minor', alpha=0.3,linestyle='--')\n",
|
|
"ax.grid(which='major', alpha=0.8,linestyle='-')\n",
|
|
"\n",
|
|
"ax.set_ylim([-0.02,0.3])\n",
|
|
"\n",
|
|
"f.savefig('11_prepost_slopes_accuracy_classes.png',dpi=600)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Change in slope by observed/predicted class"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"f, ax = plt.subplots(figsize=(6,5))\n",
|
|
"\n",
|
|
"cats = ['correct swash', 'overpredicted swash','underpredicted collision','correct collision']\n",
|
|
"\n",
|
|
"# Plot the orbital period with horizontal boxes\n",
|
|
"sns.boxplot(\n",
|
|
" data=df,\n",
|
|
" x=\"accuracy\",\n",
|
|
" y=\"beta_diff_mean\",\n",
|
|
" order=cats\n",
|
|
")\n",
|
|
"\n",
|
|
"group_labels = [x.replace(' ','\\n') for x in cats]\n",
|
|
"ax.set_xticklabels(group_labels)\n",
|
|
"\n",
|
|
"# Setup ticks and grid\n",
|
|
"ax.xaxis.grid(True)\n",
|
|
"major_ticks = np.arange(-1, 1, 0.05)\n",
|
|
"minor_ticks = np.arange(-1, 1, 0.01)\n",
|
|
"ax.set_yticks(major_ticks)\n",
|
|
"ax.set_yticks(minor_ticks, minor=True)\n",
|
|
"ax.grid(which='both')\n",
|
|
"ax.grid(which='minor', alpha=0.3,linestyle='--')\n",
|
|
"ax.grid(which='major', alpha=0.8,linestyle='-')\n",
|
|
"\n",
|
|
"ax.set_ylim([-0.2,0.2])\n",
|
|
"\n",
|
|
"f.savefig('11_change_in_slopes_accuracy_classes.png',dpi=600)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"hide_input": false,
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.6"
|
|
},
|
|
"toc": {
|
|
"base_numbering": 1,
|
|
"nav_menu": {},
|
|
"number_sections": true,
|
|
"sideBar": true,
|
|
"skip_h1_title": false,
|
|
"title_cell": "Table of Contents",
|
|
"title_sidebar": "Contents",
|
|
"toc_cell": false,
|
|
"toc_position": {
|
|
"height": "calc(100% - 180px)",
|
|
"left": "10px",
|
|
"top": "150px",
|
|
"width": "223.594px"
|
|
},
|
|
"toc_section_display": true,
|
|
"toc_window_display": true
|
|
},
|
|
"varInspector": {
|
|
"cols": {
|
|
"lenName": 16,
|
|
"lenType": 16,
|
|
"lenVar": 40
|
|
},
|
|
"kernels_config": {
|
|
"python": {
|
|
"delete_cmd_postfix": "",
|
|
"delete_cmd_prefix": "del ",
|
|
"library": "var_list.py",
|
|
"varRefreshCmd": "print(var_dic_list())"
|
|
},
|
|
"r": {
|
|
"delete_cmd_postfix": ") ",
|
|
"delete_cmd_prefix": "rm(",
|
|
"library": "var_list.r",
|
|
"varRefreshCmd": "cat(var_dic_list()) "
|
|
}
|
|
},
|
|
"types_to_exclude": [
|
|
"module",
|
|
"function",
|
|
"builtin_function_or_method",
|
|
"instance",
|
|
"_Feature"
|
|
],
|
|
"window_display": false
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|