You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
920 lines
28 KiB
Plaintext
920 lines
28 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Investigate "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Setup notebook\n",
|
|
"Import our required packages and set default plotting options."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Enable autoreloading of our modules. \n",
|
|
"# Most of the code will be located in the /src/ folder, \n",
|
|
"# and then called from the notebook.\n",
|
|
"%matplotlib inline\n",
|
|
"%reload_ext autoreload\n",
|
|
"%autoreload"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from IPython.core.debugger import set_trace\n",
|
|
"\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import os\n",
|
|
"import decimal\n",
|
|
"import plotly\n",
|
|
"import plotly.graph_objs as go\n",
|
|
"import plotly.plotly as py\n",
|
|
"import plotly.tools as tls\n",
|
|
"import plotly.figure_factory as ff\n",
|
|
"from plotly import tools\n",
|
|
"import plotly.io as pio\n",
|
|
"from scipy import stats\n",
|
|
"import math\n",
|
|
"import matplotlib\n",
|
|
"from matplotlib import cm\n",
|
|
"import colorlover as cl\n",
|
|
"from tqdm import tqdm_notebook\n",
|
|
"from ipywidgets import widgets, Output\n",
|
|
"from IPython.display import display, clear_output, Image, HTML\n",
|
|
"from scipy import stats\n",
|
|
"from sklearn.metrics import confusion_matrix\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"from matplotlib.ticker import MultipleLocator\n",
|
|
"from matplotlib.lines import Line2D\n",
|
|
"from cycler import cycler\n",
|
|
"from scipy.interpolate import interp1d\n",
|
|
"from pandas.api.types import CategoricalDtype\n",
|
|
"import seaborn as sns\n",
|
|
"sns.set(style=\"white\")\n",
|
|
"from scipy import interpolate\n",
|
|
"from tqdm import tqdm"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Matplot lib default settings\n",
|
|
"plt.rcParams[\"figure.figsize\"] = (10,6)\n",
|
|
"plt.rcParams['axes.grid']=True\n",
|
|
"plt.rcParams['grid.alpha'] = 0.5\n",
|
|
"plt.rcParams['grid.color'] = \"grey\"\n",
|
|
"plt.rcParams['grid.linestyle'] = \"--\"\n",
|
|
"plt.rcParams['axes.grid']=True\n",
|
|
"\n",
|
|
"# https://stackoverflow.com/a/20709149\n",
|
|
"# matplotlib.rcParams['text.usetex'] = True\n",
|
|
"\n",
|
|
"matplotlib.rcParams['text.latex.preamble'] = [\n",
|
|
" r'\\usepackage{siunitx}', # i need upright \\micro symbols, but you need...\n",
|
|
" r'\\sisetup{detect-all}', # ...this to force siunitx to actually use your fonts\n",
|
|
" r'\\usepackage{helvet}', # set the normal font here\n",
|
|
" r'\\usepackage{amsmath}',\n",
|
|
" r'\\usepackage{sansmath}', # load up the sansmath so that math -> helvet\n",
|
|
" r'\\sansmath', # <- tricky! -- gotta actually tell tex to use!\n",
|
|
"] "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Import data\n",
|
|
"Import our data from the `./data/interim/` folder and load it into pandas dataframes. "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
|
|
" print('Importing {}'.format(csv))\n",
|
|
" return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
|
|
"\n",
|
|
"df_waves = df_from_csv('waves.csv', index_col=[0, 1])\n",
|
|
"df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n",
|
|
"df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
|
|
"df_sites = df_from_csv('sites.csv', index_col=[0])\n",
|
|
"df_sites_waves = df_from_csv('sites_waves.csv', index_col=[0])\n",
|
|
"df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0,1])\n",
|
|
"\n",
|
|
"# Note that the forecasted data sets should be in the same order for impacts and twls\n",
|
|
"impacts = {\n",
|
|
" 'forecasted': {\n",
|
|
" 'postintertidal_slope_sto06': df_from_csv('impacts_forecasted_postintertidal_slope_sto06.csv', index_col=[0]),\n",
|
|
" 'postmean_slope_sto06': df_from_csv('impacts_forecasted_postmean_slope_sto06.csv', index_col=[0]),\n",
|
|
" 'preintertidal_slope_sto06': df_from_csv('impacts_forecasted_preintertidal_slope_sto06.csv', index_col=[0]),\n",
|
|
" 'premean_slope_sto06': df_from_csv('impacts_forecasted_premean_slope_sto06.csv', index_col=[0]),\n",
|
|
" },\n",
|
|
" 'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n",
|
|
" }\n",
|
|
"\n",
|
|
"twls = {\n",
|
|
" 'forecasted': {\n",
|
|
" 'postintertidal_slope_sto06': df_from_csv('twl_postintertidal_slope_sto06.csv', index_col=[0,1]),\n",
|
|
" 'postmean_slope_sto06': df_from_csv('twl_postmean_slope_sto06.csv', index_col=[0,1]),\n",
|
|
" 'preintertidal_slope_sto06': df_from_csv('twl_preintertidal_slope_sto06.csv', index_col=[0,1]),\n",
|
|
" 'premean_slope_sto06': df_from_csv('twl_premean_slope_sto06.csv', index_col=[0,1]),\n",
|
|
" }\n",
|
|
"}\n",
|
|
"print('Done!')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Gather data into one dataframe\n",
|
|
"For plotting, gather all our data into one dataframe."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Which forecasted impacts dataframe should we use to assess prediction performance?\n",
|
|
"df_selected_forecast = impacts['forecasted']['postintertidal_slope_sto06']\n",
|
|
"\n",
|
|
"# Create df with all our data\n",
|
|
"df = impacts['observed'].merge(\n",
|
|
" df_sites_waves, left_index=True, right_index=True)\n",
|
|
"\n",
|
|
"# Join observed/forecasted regimes\n",
|
|
"df_forecasted = df_selected_forecast.rename(\n",
|
|
" {'storm_regime': 'forecasted_regime'\n",
|
|
" }, axis='columns').forecasted_regime\n",
|
|
"df = pd.concat([df, df_forecasted], axis=1)\n",
|
|
"\n",
|
|
"# Create new accuracy column which categorises each prediction\n",
|
|
"df.loc[(df.storm_regime == 'swash') & (df.forecasted_regime == 'swash'), 'accuracy'] = 'correct swash'\n",
|
|
"df.loc[(df.storm_regime == 'collision') & (df.forecasted_regime == 'collision'), 'accuracy'] = 'correct collision'\n",
|
|
"df.loc[(df.storm_regime == 'swash') & (df.forecasted_regime == 'collision'), 'accuracy'] = 'overpredicted swash'\n",
|
|
"df.loc[(df.storm_regime == 'collision') & (df.forecasted_regime == 'swash'), 'accuracy'] = 'underpredicted collision'\n",
|
|
"\n",
|
|
"print('df columns:\\n===')\n",
|
|
"for col in sorted(df.columns):\n",
|
|
" print(col)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Create plots"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Variable pairplot, by observed storm impact\n",
|
|
"Create pairplot of selected variables and look for relationships between each. Colors represent the different observed storm impact regimes."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"g = sns.pairplot(\n",
|
|
" data=df,\n",
|
|
" hue='storm_regime',\n",
|
|
" dropna=True,\n",
|
|
" palette={\n",
|
|
" 'swash': 'blue',\n",
|
|
" 'collision': 'orange',\n",
|
|
" 'overwash': 'red'\n",
|
|
" },\n",
|
|
" plot_kws=dict(s=20, edgecolor=\"white\", linewidth=0.1, alpha=0.1),\n",
|
|
" vars=['beta_prestorm_mean',\n",
|
|
" 'beta_poststorm_mean',\n",
|
|
" 'beta_diff_mean',\n",
|
|
" 'swash_pct_change',\n",
|
|
" 'width_msl_change_m',\n",
|
|
" 'width_msl_change_pct',\n",
|
|
" 'Exscum'])\n",
|
|
"g.savefig('11_pairplot_observed_impacts.png')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Variable pairplot, by observed/prediction class\n",
|
|
"Create pairplot of selected variables and look for relationships between each. Colors represent the different observed/prediction classes."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"g = sns.pairplot(\n",
|
|
" data=df,\n",
|
|
" hue='accuracy',\n",
|
|
" dropna=True,\n",
|
|
" palette={\n",
|
|
" 'correct swash': 'blue',\n",
|
|
" 'correct collision': 'green',\n",
|
|
" 'overpredicted swash': 'orange',\n",
|
|
" 'underpredicted collision': 'red',\n",
|
|
" },\n",
|
|
" plot_kws=dict(s=20, edgecolor=\"white\", linewidth=0.1, alpha=0.1),\n",
|
|
" vars=['beta_prestorm_mean',\n",
|
|
" 'beta_poststorm_mean',\n",
|
|
" 'beta_diff_mean',\n",
|
|
" 'swash_pct_change',\n",
|
|
" 'width_msl_change_m',\n",
|
|
" 'width_msl_change_pct',\n",
|
|
" 'Exscum'])\n",
|
|
"g.savefig('11_pairplot_accuracy_classes.png')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Pre/post storm slope by observed/predicted class"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# First create a melted dataframe since our coulmn's aren't exactly as they should be for plotting\n",
|
|
"df_temp = df.copy()\n",
|
|
"df_temp = df_temp.reset_index()\n",
|
|
"\n",
|
|
"df_melt = pd.melt(\n",
|
|
" df_temp,\n",
|
|
" id_vars=['site_id', 'accuracy'],\n",
|
|
" value_vars=['beta_prestorm_mean', 'beta_poststorm_mean'],\n",
|
|
" var_name='profile_type',\n",
|
|
" value_name='beta_mean')\n",
|
|
"\n",
|
|
"df_melt.loc[df_melt.profile_type == 'beta_prestorm_mean','profile_type'] = 'prestorm'\n",
|
|
"df_melt.loc[df_melt.profile_type == 'beta_poststorm_mean','profile_type'] = 'poststorm'\n",
|
|
"df_melt.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"f, ax = plt.subplots(figsize=(6,5))\n",
|
|
"\n",
|
|
"cats = ['correct swash', 'overpredicted swash','underpredicted collision','correct collision']\n",
|
|
"\n",
|
|
"# Plot the orbital period with horizontal boxes\n",
|
|
"sns.boxplot(\n",
|
|
" data=df_melt,\n",
|
|
" x=\"accuracy\",\n",
|
|
" y=\"beta_mean\",\n",
|
|
" hue=\"profile_type\",\n",
|
|
" order=cats\n",
|
|
")\n",
|
|
"\n",
|
|
"group_labels = [x.replace(' ','\\n') for x in cats]\n",
|
|
"ax.set_xticklabels(group_labels)\n",
|
|
"\n",
|
|
"# Setup ticks and grid\n",
|
|
"ax.xaxis.grid(True)\n",
|
|
"major_ticks = np.arange(-1, 1, 0.05)\n",
|
|
"minor_ticks = np.arange(-1, 1, 0.01)\n",
|
|
"ax.set_yticks(major_ticks)\n",
|
|
"ax.set_yticks(minor_ticks, minor=True)\n",
|
|
"ax.grid(which='both')\n",
|
|
"ax.grid(which='minor', alpha=0.3,linestyle='--')\n",
|
|
"ax.grid(which='major', alpha=0.8,linestyle='-')\n",
|
|
"\n",
|
|
"ax.set_ylim([-0.02,0.3])\n",
|
|
"\n",
|
|
"f.savefig('11_prepost_slopes_accuracy_classes.png',dpi=600)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Change in slope by observed/predicted class"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"f, ax = plt.subplots(figsize=(6,5))\n",
|
|
"\n",
|
|
"cats = ['correct swash', 'overpredicted swash','underpredicted collision','correct collision']\n",
|
|
"\n",
|
|
"# Plot the orbital period with horizontal boxes\n",
|
|
"sns.boxplot(\n",
|
|
" data=df,\n",
|
|
" x=\"accuracy\",\n",
|
|
" y=\"beta_diff_mean\",\n",
|
|
" order=cats\n",
|
|
")\n",
|
|
"\n",
|
|
"group_labels = [x.replace(' ','\\n') for x in cats]\n",
|
|
"ax.set_xticklabels(group_labels)\n",
|
|
"\n",
|
|
"# Setup ticks and grid\n",
|
|
"ax.xaxis.grid(True)\n",
|
|
"major_ticks = np.arange(-1, 1, 0.05)\n",
|
|
"minor_ticks = np.arange(-1, 1, 0.01)\n",
|
|
"ax.set_yticks(major_ticks)\n",
|
|
"ax.set_yticks(minor_ticks, minor=True)\n",
|
|
"ax.grid(which='both')\n",
|
|
"ax.grid(which='minor', alpha=0.3,linestyle='--')\n",
|
|
"ax.grid(which='major', alpha=0.8,linestyle='-')\n",
|
|
"\n",
|
|
"ax.set_ylim([-0.2,0.2])\n",
|
|
"\n",
|
|
"f.savefig('11_change_in_slopes_accuracy_classes.png',dpi=600)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Swash zone volume change histogram"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"How much does the beach width change variation can we expect in the swash regime?"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"f, ax = plt.subplots(figsize=(5,4))\n",
|
|
"\n",
|
|
"sns.distplot(df.loc[df.storm_regime=='swash'].width_msl_change_pct.dropna(), \n",
|
|
" kde=False);\n",
|
|
"\n",
|
|
"ax.set_title('Distribution of beach width change for swash regime')\n",
|
|
"ax.set_xlabel('$\\Delta$ beach width (%)')\n",
|
|
"ax.set_ylabel('Count')\n",
|
|
"\n",
|
|
"f.savefig('11_change_in_beach_width.png',dpi=600)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Check prestorm and post storm width"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ax.get_xaxis().get_major_ticks()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"x_col = \"width_msl_prestorm\"\n",
|
|
"y_col = \"width_msl_poststorm\"\n",
|
|
"\n",
|
|
"with sns.axes_style(\"white\"):\n",
|
|
" g = sns.jointplot(x=x_col,\n",
|
|
" y=y_col,\n",
|
|
" data=df.dropna(subset=[x_col, y_col]),\n",
|
|
" kind=\"hex\",\n",
|
|
" ylim=(0, 150),\n",
|
|
" xlim=(0, 150))\n",
|
|
"\n",
|
|
" x0, x1 = g.ax_joint.get_xlim()\n",
|
|
" y0, y1 = g.ax_joint.get_ylim()\n",
|
|
" lims = [max(x0, y0), min(x1, y1)]\n",
|
|
" g.ax_joint.plot(lims, lims, ':k') \n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Find correlations between variables"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Create correlogram"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"code_folding": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from matplotlib.patches import Ellipse\n",
|
|
"def corrplot(data, pvalues, labels):\n",
|
|
" \"\"\"Creates a correlation plot of the passed data.\n",
|
|
" The function returns the plot which can then be shown with\n",
|
|
" plot.show(), saved to a file with plot.savefig(), or manipulated\n",
|
|
" in any other standard matplotlib way.\n",
|
|
" data is the correlation matrix, a 2-D numpy array containing\n",
|
|
" the pairwise correlations between variables;\n",
|
|
" pvalues is a matrix containing the pvalue for each corresponding\n",
|
|
" correlation value; if none it is assumed to be the zero matrix\n",
|
|
" labels is an array containing the variable names\n",
|
|
" https://github.com/louridas/corrplot/blob/master/corrplot.py\n",
|
|
" \"\"\"\n",
|
|
"\n",
|
|
" plt.figure(1)\n",
|
|
"\n",
|
|
" column_labels = labels\n",
|
|
" row_labels = labels\n",
|
|
" \n",
|
|
" f = plt.figure(figsize=(8,8))\n",
|
|
" ax = plt.subplot(1, 1, 1, aspect='equal')\n",
|
|
"\n",
|
|
" width, height = data.shape\n",
|
|
" num_cols, num_rows = width, height\n",
|
|
"\n",
|
|
" if pvalues is None:\n",
|
|
" pvalues = np.zeros([num_rows, num_cols])\n",
|
|
" \n",
|
|
" shrink = 0.9\n",
|
|
"\n",
|
|
" poscm = cm.get_cmap('Blues')\n",
|
|
" negcm = cm.get_cmap('Oranges')\n",
|
|
"\n",
|
|
" for x in range(width):\n",
|
|
" for y in range(height):\n",
|
|
" d = data[x, y]\n",
|
|
" c = pvalues[x, y]\n",
|
|
" rotate = -45 if d > 0 else +45\n",
|
|
" clrmap = poscm if d >= 0 else negcm\n",
|
|
" d_abs = np.abs(d)\n",
|
|
" ellipse = Ellipse((x, y),\n",
|
|
" width=1 * shrink,\n",
|
|
" height=(shrink - d_abs*shrink),\n",
|
|
" angle=rotate)\n",
|
|
" ellipse.set_edgecolor('black')\n",
|
|
" ellipse.set_facecolor(clrmap(d_abs))\n",
|
|
" if c > 0.05:\n",
|
|
" ellipse.set_linestyle('dotted')\n",
|
|
" ellipse.set_alpha(0.5)\n",
|
|
" ax.add_artist(ellipse)\n",
|
|
"\n",
|
|
" ax.set_xlim(-1, num_cols)\n",
|
|
" ax.set_ylim(-1, num_rows)\n",
|
|
" \n",
|
|
" ax.xaxis.tick_top()\n",
|
|
" xtickslocs = np.arange(len(row_labels))\n",
|
|
" ax.set_xticks(xtickslocs)\n",
|
|
" ax.set_xticklabels(row_labels, rotation=30, fontsize='small', ha='left')\n",
|
|
"\n",
|
|
" ax.invert_yaxis()\n",
|
|
" ytickslocs = np.arange(len(row_labels))\n",
|
|
" ax.set_yticks(ytickslocs)\n",
|
|
" ax.set_yticklabels(column_labels, fontsize='small')\n",
|
|
"\n",
|
|
" return plt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Calculate correlation coefficient and p-values\n",
|
|
"# https://stackoverflow.com/a/24469099\n",
|
|
"corr = df.corr(method ='pearson') \n",
|
|
"n=len(corr.columns)\n",
|
|
"t=corr*np.sqrt((n-2)/(1-corr*corr))\n",
|
|
"pvals = stats.t.cdf(t, n-2)\n",
|
|
"\n",
|
|
"plot = corrplot(corr.values, pvals, corr.columns.tolist())\n",
|
|
"plot.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Create regression plot between two variables"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from scipy import stats\n",
|
|
"\n",
|
|
"# x_col = 'beta_prestorm_intertidal'\n",
|
|
"# y_col = \"beta_diff_intertidal\"\n",
|
|
"# data = df.loc[df.storm_regime=='swash']\n",
|
|
"\n",
|
|
"# y_col = 'total_vol_change'\n",
|
|
"# x_col = \"Pxscum\"\n",
|
|
"# data = df\n",
|
|
"\n",
|
|
"y_col = 'prestorm_cum_exposed_vol'\n",
|
|
"x_col = \"Exscum\"\n",
|
|
"c_col = 'total_vol_change'\n",
|
|
"data = df\n",
|
|
"\n",
|
|
"slope, intercept, r_value, p_value, std_err = stats.linregress(\n",
|
|
" data.dropna()[x_col].values,\n",
|
|
" data.dropna()[y_col].values)\n",
|
|
"\n",
|
|
"fig = plt.figure(\n",
|
|
" figsize=(6, 4), dpi=150, facecolor='w', edgecolor='k')\n",
|
|
"ax = fig.add_subplot(111)\n",
|
|
"\n",
|
|
"scatter = ax.scatter(\n",
|
|
" x=data.dropna()[x_col].values,\n",
|
|
" y=data.dropna()[y_col].values,\n",
|
|
" c=data.dropna()[c_col].values,\n",
|
|
" s=1, \n",
|
|
" vmin=-150, vmax=0,\n",
|
|
")\n",
|
|
"\n",
|
|
"ax.set_xlabel(x_col)\n",
|
|
"ax.set_ylabel(y_col)\n",
|
|
"ax.set_ylim(0,20000)\n",
|
|
"\n",
|
|
"cbar = plt.colorbar(scatter)\n",
|
|
"cbar.set_label(c_col)\n",
|
|
"\n",
|
|
"ax.grid(True, linestyle=\"--\", alpha=0.2, color='grey', linewidth=1)\n",
|
|
"\n",
|
|
"plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Calculate berm shape index"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_profiles\n",
|
|
"df_profile_features_crest_toes\n",
|
|
"\n",
|
|
"berm_shape = []\n",
|
|
"grouped = df_profiles.dropna(subset=['z']).xs('prestorm',level='profile_type').groupby('site_id')\n",
|
|
"for site_id, df_site in tqdm(grouped):\n",
|
|
" features = df_profile_features_crest_toes.loc[(site_id,'prestorm')]\n",
|
|
" \n",
|
|
" # Get x-coordinate at z=0\n",
|
|
" x_last = df_site.iloc[-1].name[1]\n",
|
|
" z_last = 0\n",
|
|
" \n",
|
|
" # Get coordinates of dune toe\n",
|
|
" x_first = features.dune_toe_x\n",
|
|
" z_first = features.dune_toe_z\n",
|
|
" \n",
|
|
" # If there is no dune toe, get dune crest\n",
|
|
" if np.isnan(x_first):\n",
|
|
" x_first = features.dune_crest_x\n",
|
|
" z_first = features.dune_crest_z\n",
|
|
" \n",
|
|
" # If no dune crest, use nan\n",
|
|
" if np.isnan(x_first):\n",
|
|
" berm_shape.append({'site_id': site_id,\n",
|
|
" 'prestorm_berm_curvature': np.nan})\n",
|
|
" continue\n",
|
|
"\n",
|
|
" # Fit straight line between start and end points\n",
|
|
" segment = (df_site.loc[(df_site.index.get_level_values('x')>=x_first)&\n",
|
|
" (df_site.index.get_level_values('x')<=x_last)])\n",
|
|
" x_segment = segment.index.get_level_values('x')\n",
|
|
" z_segment = segment.z\n",
|
|
" f = interpolate.interp1d([x_first,x_last],[z_first,z_last])\n",
|
|
" z_straight = f(x_segment)\n",
|
|
"\n",
|
|
" area = np.trapz(y=z_straight-z_segment, x=x_segment)\n",
|
|
" length = x_last-x_first\n",
|
|
" \n",
|
|
" normalized_curvature = area\n",
|
|
"# normalized_curvature = area / length\n",
|
|
" berm_shape.append({'site_id': site_id,\n",
|
|
" 'prestorm_berm_curvature': normalized_curvature})\n",
|
|
"\n",
|
|
"# Convert to dataframe \n",
|
|
"df_berm_shape = pd.DataFrame(berm_shape)\n",
|
|
"df_berm_shape = df_berm_shape.set_index('site_id')\n",
|
|
"\n",
|
|
"# Join onto our big dataframe\n",
|
|
"df = df.drop(columns=['prestorm_berm_curvature'], errors='ignore')\n",
|
|
"df = pd.concat([df, df_berm_shape], axis=1)\n",
|
|
"\n",
|
|
"df_berm_shape.head()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Check wave timeseries\n",
|
|
"How much does wave height vary alongshore between sites?"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from dateutil.parser import parse\n",
|
|
"sites = ['NARRA0001', 'NARRA0012', 'NARRA0024']\n",
|
|
"\n",
|
|
"fig = plt.figure(\n",
|
|
" figsize=(6, 4), dpi=150, facecolor='w', edgecolor='k')\n",
|
|
"ax = fig.add_subplot(111)\n",
|
|
"\n",
|
|
"for site_id in sites:\n",
|
|
" print(site_id)\n",
|
|
" x = [parse(t) for t in df_waves.xs(site_id,level='site_id').index]\n",
|
|
" y = df_waves.xs(site_id,level='site_id').Hs\n",
|
|
" ax.plot(x,y)\n",
|
|
" \n",
|
|
"plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Cumulative sum of available prestorm volume?"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# At each site, determine relationship between height and available volume\n",
|
|
"data = []\n",
|
|
"site_ids = df_sites.index.values\n",
|
|
"for site_id in site_ids:\n",
|
|
" df_profile = df_profiles.xs([site_id, 'prestorm'],\n",
|
|
" level=['site_id',\n",
|
|
" 'profile_type']).dropna(subset=['z'])\n",
|
|
" x_profile = df_profile.index.get_level_values('x').values\n",
|
|
" z_profile = df_profile.z.values\n",
|
|
" \n",
|
|
" z_vals = np.arange(min(df_profile.z),max(df_profile.z),0.01)\n",
|
|
" \n",
|
|
" for z in z_vals:\n",
|
|
" i_start = np.where((z_profile > z))[0][-1]\n",
|
|
" x_start = x_profile[i_start]\n",
|
|
" x_end = x_profile[-1]\n",
|
|
" mask = (x_start <= x_profile) & (x_profile <= x_end)\n",
|
|
" vol = np.trapz(z_profile[mask], x=x_profile[mask])\n",
|
|
" data.append({'site_id': site_id,'z':z,'prestorm_vol':vol})\n",
|
|
" \n",
|
|
"df_prestorm_vols_by_z = pd.DataFrame(data)\n",
|
|
"df_prestorm_vols_by_z = df_prestorm_vols_by_z.set_index(['site_id','z'])\n",
|
|
"df_prestorm_vols_by_z.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_twl = twls['forecasted']['preintertidal_slope_sto06']\n",
|
|
"df_twl['z'] = df_twl.R_high.round(2)\n",
|
|
"\n",
|
|
"df_twl = df_twl.join(df_prestorm_vols_by_z, on=['site_id','z'])\n",
|
|
"df_twl = df_twl.drop(columns=['z'])\n",
|
|
"\n",
|
|
"df_site_cum_exposed_vols = df_twl.groupby('site_id').prestorm_vol.sum().to_frame()\n",
|
|
"df_site_cum_exposed_vols = df_site_cum_exposed_vols.rename({'prestorm_vol':'prestorm_cum_exposed_vol'},axis=1)\n",
|
|
"\n",
|
|
"# # Join onto main dataframe\n",
|
|
"df = df.drop(columns=['prestorm_cum_exposed_vol'], errors='ignore')\n",
|
|
"df = pd.concat([df, df_site_cum_exposed_vols], axis=1)\n",
|
|
"\n",
|
|
"df_site_cum_exposed_vols.head()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# PCA?"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X[0]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn import decomposition\n",
|
|
"from sklearn.preprocessing import StandardScaler\n",
|
|
"\n",
|
|
"target_col = 'swash_pct_change'\n",
|
|
"training_cols = ['beta_prestorm_mean','beta_prestorm_intertidal','prestorm_dune_vol','prestorm_swash_vol','width_msl_prestorm','Pxscum','prestorm_berm_curvature','prestorm_cum_exposed_vol']\n",
|
|
"\n",
|
|
"df_pca = df[training_cols+[target_col]].dropna()\n",
|
|
"df_pca_data_only = df_pca.drop(target_col,axis=1)\n",
|
|
"\n",
|
|
"# input data\n",
|
|
"X = df_pca_data_only.values\n",
|
|
"X = StandardScaler().fit_transform(X)\n",
|
|
"\n",
|
|
"# target\n",
|
|
"y = df_pca[target_col]\n",
|
|
"\n",
|
|
"# pca\n",
|
|
"pca = decomposition.PCA(n_components=2)\n",
|
|
"pca.fit(X)\n",
|
|
"\n",
|
|
"X = pca.transform(X)\n",
|
|
"\n",
|
|
"\n",
|
|
"fig = plt.figure(\n",
|
|
" figsize=(6, 4), dpi=150, facecolor='w', edgecolor='k')\n",
|
|
"ax = fig.add_subplot(111)\n",
|
|
"\n",
|
|
"scatter = ax.scatter(\n",
|
|
" x=X[:,0],\n",
|
|
" y=X[:,1],\n",
|
|
" c=y,\n",
|
|
" s=0.5, \n",
|
|
" vmin=-1, vmax=0,\n",
|
|
")\n",
|
|
"\n",
|
|
"# ax.set_xlabel(x_col)\n",
|
|
"# ax.set_ylabel(y_col)\n",
|
|
"# ax.set_ylim(0,20000)\n",
|
|
"\n",
|
|
"cbar = plt.colorbar(scatter)\n",
|
|
"# cbar.set_label(c_col)\n",
|
|
"\n",
|
|
"# ax.grid(True, linestyle=\"--\", alpha=0.2, color='grey', linewidth=1)\n",
|
|
"\n",
|
|
"plt.show()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_pca_dims = pd.DataFrame(pca.components_, columns=list(df_pca_data_only.columns))\n",
|
|
"\n",
|
|
"df_pca_dims.iloc[0]\n",
|
|
"# pca.explained_variance_ratio_"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"hide_input": false,
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.7"
|
|
},
|
|
"toc": {
|
|
"base_numbering": 1,
|
|
"nav_menu": {},
|
|
"number_sections": true,
|
|
"sideBar": true,
|
|
"skip_h1_title": false,
|
|
"title_cell": "Table of Contents",
|
|
"title_sidebar": "Contents",
|
|
"toc_cell": false,
|
|
"toc_position": {
|
|
"height": "calc(100% - 180px)",
|
|
"left": "10px",
|
|
"top": "150px",
|
|
"width": "223.594px"
|
|
},
|
|
"toc_section_display": true,
|
|
"toc_window_display": true
|
|
},
|
|
"varInspector": {
|
|
"cols": {
|
|
"lenName": 16,
|
|
"lenType": 16,
|
|
"lenVar": 40
|
|
},
|
|
"kernels_config": {
|
|
"python": {
|
|
"delete_cmd_postfix": "",
|
|
"delete_cmd_prefix": "del ",
|
|
"library": "var_list.py",
|
|
"varRefreshCmd": "print(var_dic_list())"
|
|
},
|
|
"r": {
|
|
"delete_cmd_postfix": ") ",
|
|
"delete_cmd_prefix": "rm(",
|
|
"library": "var_list.r",
|
|
"varRefreshCmd": "cat(var_dic_list()) "
|
|
}
|
|
},
|
|
"types_to_exclude": [
|
|
"module",
|
|
"function",
|
|
"builtin_function_or_method",
|
|
"instance",
|
|
"_Feature"
|
|
],
|
|
"window_display": false
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|