You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1169 lines
42 KiB
Plaintext

6 years ago
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# TWL Exceedance"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup notebook"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Enable autoreloading of our modules. \n",
"# Most of the code will be located in the /src/ folder, \n",
"# and then called from the notebook.\n",
"%matplotlib inline\n",
"%reload_ext autoreload\n",
"%autoreload"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from IPython.core.debugger import set_trace\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import decimal\n",
"import plotly\n",
"import plotly.graph_objs as go\n",
"import plotly.plotly as py\n",
"import plotly.tools as tls\n",
"import plotly.figure_factory as ff\n",
"from plotly import tools\n",
"import plotly.io as pio\n",
"from scipy import stats\n",
"import math\n",
"import matplotlib\n",
"from matplotlib import cm\n",
"import colorlover as cl\n",
6 years ago
"from tqdm import tqdm_notebook\n",
6 years ago
"from ipywidgets import widgets, Output\n",
"from IPython.display import display, clear_output, Image, HTML\n",
6 years ago
"from scipy import stats\n",
"from sklearn.metrics import confusion_matrix\n",
"import matplotlib.pyplot as plt\n",
"from scipy.interpolate import interp1d"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Matplot lib default settings\n",
"plt.rcParams[\"figure.figsize\"] = (10,6)\n",
"plt.rcParams['axes.grid']=True\n",
"plt.rcParams['grid.alpha'] = 0.5\n",
"plt.rcParams['grid.color'] = \"grey\"\n",
"plt.rcParams['grid.linestyle'] = \"--\"\n",
"\n",
6 years ago
"\n",
6 years ago
"# https://stackoverflow.com/a/20709149\n",
"matplotlib.rcParams['text.usetex'] = True\n",
"matplotlib.rcParams['font.family'] = 'sans-serif'\n",
"matplotlib.rcParams['font.sans-serif'] = 'Helvetica'\n",
"\n",
"matplotlib.rcParams['text.latex.preamble'] = [\n",
" r'\\usepackage{siunitx}', # i need upright \\micro symbols, but you need...\n",
" r'\\sisetup{detect-all}', # ...this to force siunitx to actually use your fonts\n",
" r'\\usepackage{helvet}', # set the normal font here\n",
" r'\\usepackage{sansmath}', # load up the sansmath so that math -> helvet\n",
" r'\\sansmath' # <- tricky! -- gotta actually tell tex to use!\n",
"] "
6 years ago
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Import data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
" print('Importing {}'.format(csv))\n",
" return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
"\n",
"df_waves = df_from_csv('waves.csv', index_col=[0, 1])\n",
"df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n",
"df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
"df_sites = df_from_csv('sites.csv', index_col=[0])\n",
"df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0,1])\n",
"\n",
"# Note that the forecasted data sets should be in the same order for impacts and twls\n",
"impacts = {\n",
" 'forecasted': {\n",
" 'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),\n",
" 'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),\n",
" },\n",
" 'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n",
" }\n",
"\n",
"\n",
"twls = {\n",
" 'forecasted': {\n",
" 'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),\n",
" 'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),\n",
" }\n",
"}\n",
"print('Done!')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
6 years ago
"## Calculate impact hours\n",
"- For each site_id, determine the R2 elevation."
6 years ago
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
6 years ago
"# Create figure to plot results\n",
"fig = tools.make_subplots(\n",
" rows=2,\n",
" cols=2,\n",
" specs=[[{}, {}], [{}, {}]],\n",
" subplot_titles=('Swash/Swash', 'Swash/Collision', 'Collision/Swash',\n",
" 'Collision/Collision'),\n",
" shared_xaxes=True,\n",
" shared_yaxes=True,\n",
" horizontal_spacing=0.05,\n",
" vertical_spacing=0.1,\n",
" print_grid=False)\n",
"\n",
"# Iterate through each site\n",
"print('Calculating cumulate frequency of R_high for each site:')\n",
"site_ids = twls['forecasted']['mean_slope_sto06'].index.get_level_values(\n",
" 'site_id').unique().values\n",
"for site_id in tqdm_notebook(site_ids):\n",
"\n",
" # Put data into a temporary dataframe, shorter name is easier to work with\n",
" df_impacts = impacts['forecasted']['mean_slope_sto06'].loc[site_id]\n",
" df_twls = twls['forecasted']['mean_slope_sto06'].loc[site_id]\n",
"\n",
" D_low = df_impacts.dune_toe_z\n",
" if np.isnan(D_low):\n",
" continue\n",
"\n",
" # Get R_high elevations minus dune toe\n",
" R_high_ts = df_twls.R_high.dropna().values\n",
" R_high_D_low_ts = R_high_ts - D_low\n",
"\n",
" # Get SWL minus dune toe\n",
" SWL_D_low_ts = df_twls['tide'].dropna().values - D_low\n",
" DSWL_D_low_ts = (df_twls['tide'] + df_twls['setup']).dropna().values - D_low\n",
"\n",
" # Get cumulative freq\n",
" cumfreq = stats.cumfreq(R_high_D_low_ts, numbins=100)\n",
"# cumfreq = stats.cumfreq(DSWL_D_low_ts, numbins=100)\n",
"\n",
" # Calculate space of values for x\n",
" bin_vals = cumfreq.lowerlimit + np.linspace(\n",
" 0, cumfreq.binsize * cumfreq.cumcount.size, cumfreq.cumcount.size)\n",
"\n",
" # Check which subplot we should put this site on\n",
" forecasted_regime = impacts['forecasted']['mean_slope_sto06'].loc[\n",
" site_id].storm_regime\n",
" observed_regime = impacts['observed'].loc[site_id].storm_regime\n",
"\n",
" if forecasted_regime == 'swash' and observed_regime == 'swash':\n",
" x_col = 1\n",
" y_col = 1\n",
" elif forecasted_regime == 'collision' and observed_regime == 'collision':\n",
" x_col = 2\n",
" y_col = 2\n",
" elif forecasted_regime == 'swash' and observed_regime == 'collision':\n",
" x_col = 2\n",
" y_col = 1\n",
" elif forecasted_regime == 'collision' and observed_regime == 'swash':\n",
" x_col = 1\n",
" y_col = 2\n",
" else:\n",
" continue\n",
"\n",
" fig.append_trace(\n",
" go.Scattergl(\n",
" x=bin_vals,\n",
" y=[max(cumfreq.cumcount) - x for x in cumfreq.cumcount],\n",
" name=site_id,\n",
" line=dict(\n",
" color=('rgba(22, 22, 22, 0.2)'),\n",
" width=0.5,\n",
" )), x_col, y_col)\n",
"\n",
"print('Finalizing plot:')\n",
"# Change some formatting for the plot\n",
"layout = go.Layout(\n",
" xaxis=dict(domain=[0, 0.45]),\n",
" yaxis=dict(\n",
" domain=[0, 0.45],\n",
" type='log',\n",
" ),\n",
" xaxis2=dict(domain=[0.55, 1]),\n",
" xaxis4=dict(domain=[0.55, 1], anchor='y4'),\n",
" yaxis3=dict(\n",
" domain=[0.55, 1],\n",
" type='log',\n",
" ),\n",
" yaxis4=dict(\n",
" domain=[0.55, 1],\n",
" anchor='x4',\n",
" type='log',\n",
" ))\n",
"\n",
"fig['layout'].update(\n",
" showlegend=False,\n",
" title='Impact hours',\n",
" height=800,\n",
")\n",
"\n",
"for ax in ['yaxis', 'yaxis2']:\n",
"# fig['layout'][ax]['range'] = [0, 400]\n",
" fig['layout'][ax]['range'] = [0, 15]\n",
"\n",
"for ax in ['xaxis', 'xaxis2']:\n",
"# fig['layout'][ax]['range'] = [-2.5, 2.5]\n",
" fig['layout'][ax]['range'] = [-1, 1]\n",
"\n",
"fig['layout']['xaxis'].update(title='R_high - D_low')\n",
"fig['layout']['xaxis2'].update(title='R_high - D_low')\n",
"fig['layout']['yaxis'].update(title='No. of Hours')\n",
"fig['layout']['yaxis2'].update(title='No. of Hours')\n",
"\n",
"# pio.write_image(fig, 'fig2.png')\n",
"\n",
"go.FigureWidget(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This gives an overview of the distribution of impact hours. Try to calculate the confidence interval bounds for each prediction/observed combination.\n",
"\n",
"The following cell looks at combining all the observations from each CDF into one large CDF and calculating a confidence interval from it, but I'm not sure if this is a valid method."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [
3
]
},
"outputs": [],
"source": [
"from statsmodels.distributions.empirical_distribution import ECDF\n",
"from statsmodels.distributions.empirical_distribution import _conf_set\n",
"\n",
"df_twls = twls['forecasted']['mean_slope_sto06']\n",
"df_forecasted_impacts = impacts['forecasted']['mean_slope_sto06']\n",
"df_observed_impacts = impacts['observed']\n",
"\n",
"plt.figure(figsize=(6,8))\n",
"\n",
"# Do some data rearranging and joining to make it easier\n",
"df_joined = df_twls.reset_index()\n",
"df_joined = df_joined.set_index('site_id')\n",
"df_joined = df_joined.merge(\n",
" df_observed_impacts[['storm_regime']],\n",
" left_on='site_id',\n",
" right_on='site_id').rename({\n",
" 'storm_regime': 'observed_regime'\n",
" },\n",
" axis='columns')\n",
"df_joined = df_joined.merge(\n",
" df_forecasted_impacts[['storm_regime', 'dune_toe_z']],\n",
" left_on='site_id',\n",
" right_on='site_id').rename({\n",
" 'storm_regime': 'forecasted_regime'\n",
" },\n",
" axis='columns')\n",
"\n",
"regime_combinations = [\n",
" ('swash', 'swash', '#2b83ba'),\n",
" ('collision', 'swash', '#abdda4'),\n",
" ('swash', 'collision', '#fdae61'),\n",
" ('collision', 'collision', '#d7191c'),\n",
"]\n",
"\n",
"for comb in regime_combinations:\n",
"\n",
" observed_regime = comb[0]\n",
" forecasted_regime = comb[1]\n",
" color = comb[2]\n",
"\n",
" # Get values of elevation difference to plot\n",
" query = '(observed_regime==\"{}\") & (forecasted_regime==\"{}\")'.format(\n",
" observed_regime, forecasted_regime)\n",
" df = df_joined.query(query)\n",
" R_high_D_low = (df.R_high - df.dune_toe_z).values\n",
" R_high_D_low = R_high_D_low[~np.isnan(R_high_D_low)]\n",
"\n",
" ecdf = ECDF(R_high_D_low)\n",
"\n",
" y = ecdf.y\n",
" lower, upper = _conf_set(y, alpha=0.05)\n",
" x = ecdf.x\n",
"\n",
" avg_hrs = df.groupby('site_id').count().R_high.mean()\n",
" y = [avg_hrs - v * avg_hrs for v in y]\n",
" lower = [avg_hrs - v * avg_hrs for v in lower]\n",
" upper = [avg_hrs - v * avg_hrs for v in upper]\n",
"\n",
" plt.step(\n",
" x,\n",
" y,\n",
" color=color,\n",
" label='Pred={}, Obs={}'.format(forecasted_regime, observed_regime))\n",
" plt.fill_between(\n",
" x, y, upper, color='grey', alpha=0.2, interpolate=False, step='pre')\n",
" plt.fill_between(\n",
" x, y, lower, color='grey', alpha=0.2, interpolate=False, step='pre')\n",
"\n",
"# # Plot for checking\n",
"\n",
"plt.title('Empirical CDF with 95\\% confidence intervals')\n",
"plt.xlabel('$R_{high} - D_{low} (m)$')\n",
"plt.ylabel('Hours of Elevation Exceedence')\n",
"plt.xlim([-1, 1])\n",
"plt.ylim([0, 25])\n",
"plt.legend(loc='best')\n",
"\n",
"# Print to figure\n",
"plt.savefig('05-empirical-cdf.png', dpi=600, bbox_inches='tight') \n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The plot above shows:\n",
"- collision if R_high - D_low > 0.25 m for 6 hours\n",
"- swash if R_high - D_low < -0.8m for 7 hours\n",
"\n",
"additionaly:\n",
"- collision if R_high > D_low for more than 10 hours\n",
" \n",
"Let's test how these new critera would perform."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate elevation exceedence for each hours we are interested in\n",
"ele_exceedence_6hr = twls['forecasted']['mean_slope_sto06'].sort_values(['R_high'],ascending=False).groupby('site_id').R_high.nth(6-1).rename('ele_exceedence_6hr')\n",
"\n",
"ele_exceedence_7hr = twls['forecasted']['mean_slope_sto06'].sort_values(['R_high'],ascending=False).groupby('site_id').R_high.nth(7-1).rename('ele_exceedence_7hr')\n",
"\n",
"\n",
"ele_exceedence_2hr = twls['forecasted']['mean_slope_sto06'].sort_values(['R_high'],ascending=False).groupby('site_id').R_high.nth(2-1).rename('ele_exceedence_2hr')\n",
"\n",
"ele_exceedence_1hr = twls['forecasted']['mean_slope_sto06'].sort_values(['R_high'],ascending=False).groupby('site_id').R_high.nth(0).rename('ele_exceedence_1hr')\n",
"\n",
"\n",
"# Get our dune toes\n",
"dune_toes = df_profile_features_crest_toes.xs('prestorm',level='profile_type')['dune_toe_z']\n",
"\n",
"# Get our observed regimes\n",
"observed_regime = impacts['observed'].storm_regime.rename('observed_regime')\n",
"\n",
"# Concat into one data frame\n",
"df = pd.concat([dune_toes, ele_exceedence_6hr, ele_exceedence_7hr, ele_exceedence_1hr, ele_exceedence_2hr, observed_regime],axis=1)\n",
"\n",
"# Get predicted regime based on old criteria\n",
"df.loc[df.ele_exceedence_1hr < df.dune_toe_z, 'forecasted_regime'] = 'swash'\n",
"df.loc[df.ele_exceedence_1hr > df.dune_toe_z , 'forecasted_regime'] = 'collision'\n",
"\n",
"\n",
"regime_combinations = [\n",
" ('swash','swash'),\n",
" ('collision','swash'),\n",
" ('swash','collision'),\n",
" ('collision','collision'),\n",
"]\n",
"\n",
"print('Original')\n",
"for comb in regime_combinations:\n",
" query = 'forecasted_regime==\"{}\" & observed_regime==\"{}\"'.format(comb[0], comb[1])\n",
" print('Forecasted: {}, Observed: {}, Count: {}'.format(comb[0], comb[1], len(df.query(query))))\n",
"\n",
"\n",
"# Get predicted regime based on our new criteria\n",
"\n",
"adjust_swash_criteria = (df.forecasted_regime == 'swash') & (df.ele_exceedence_7hr - df.dune_toe_z > -0.8)\n",
"adjust_collision_criteria = (df.forecasted_regime == 'collision') & (df.ele_exceedence_6hr - df.dune_toe_z < 0.25)\n",
"df.loc[adjust_swash_criteria, 'forecasted_regime'] = 'collision'\n",
"df.loc[adjust_collision_criteria, 'forecasted_regime'] = 'swash'\n",
"\n",
"# df.loc[(df.ele_exceedence_1hr - df.dune_toe_z <= -0.15 ),'forecasted_regime'] = 'swash'\n",
"# df.loc[(df.ele_exceedence_1hr - df.dune_toe_z > -0.15 ),'forecasted_regime'] = 'collision'\n",
"\n",
"\n",
"print('\\nAfter adjustment')\n",
"for comb in regime_combinations:\n",
" query = 'forecasted_regime==\"{}\" & observed_regime==\"{}\"'.format(comb[0], comb[1])\n",
" print('Forecasted: {}, Observed: {}, Count: {}'.format(comb[0], comb[1], len(df.query(query))))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Looking at the adjusted values, we can see these criteria actually make it worse. There must be something wrong with the technique - maybe the way of calculating the confidence intervals is wrong? Let's try calculate confidence intervals for each regime combination.\n",
"\n",
"*This cell I don't think is used...*\n"
6 years ago
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
6 years ago
"def mean_confidence_interval(data, confidence=0.95):\n",
" a = 1.0 * np.array(data)\n",
" n = len(a)\n",
" m, se = np.mean(a), stats.sem(a)\n",
" h = se * stats.t.ppf((1 + confidence) / 2., n-1)\n",
" return m, m-h, m+h\n",
"\n",
"# Add columns indicating how many n hrs was this the largest record\n",
"df = twls['forecasted']['mean_slope_sto06'].sort_values(['R_high'],ascending=False)\n",
"df['n_hrs_largest']= df.groupby('site_id').cumcount()+1\n",
"\n",
"# Join observed and forecast impacts and dune toe elevation\n",
"observed_regime = impacts['observed'].storm_regime.rename('observed_regime').to_frame()\n",
"forecasted_regime = impacts['forecasted']['mean_slope_sto06'].storm_regime.rename('forecasted_regime').to_frame()\n",
"dune_info = df_profile_features_crest_toes.xs('prestorm', level='profile_type')\n",
"\n",
"df['datetime'] = df.index.get_level_values('datetime')\n",
"df = df.merge(observed_regime,left_on=['site_id'],right_on='site_id')\n",
"df = df.merge(forecasted_regime,left_on=['site_id'],right_on='site_id')\n",
"df = df.merge(dune_info,left_on=['site_id'],right_on='site_id')\n",
"\n",
"# Make new column for R_high minus D_low\n",
"df['R_high_D_low_diff'] = df.R_high - df.dune_toe_z\n",
"\n",
"\n",
"regime_combinations = [\n",
" ('swash','swash'),\n",
" ('swash','collision'),\n",
" ('collision','swash'),\n",
" ('collision','collision'),\n",
"]\n",
"\n",
"print('Calculating hr exceedence elevations for each combination:')\n",
"exceedence_data = []\n",
"for hr in tqdm_notebook([x for x in range(1,101)]):\n",
" \n",
" for comb in regime_combinations:\n",
" \n",
" vals = df.loc[(df.n_hrs_largest==hr) & (df.observed_regime==comb[0]) & (df.forecasted_regime==comb[1])].R_high_D_low_diff.dropna().values\n",
" \n",
" ci = mean_confidence_interval(vals)\n",
"\n",
" exceedence_data.append({\n",
" 'observed_regime': comb[0],\n",
" 'forecasted_regime': comb[1],\n",
" 'exceedence_hr': hr,\n",
" 'ci_mean': ci[0],\n",
" 'ci_lower': ci[1],\n",
" 'ci_upper': ci[2],\n",
" })\n",
" \n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's try a different apporach and try split the observed swash and collision regimes at each impact duration hour. "
6 years ago
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
6 years ago
"source": [
"from scipy.stats import norm\n",
"\n",
"best_split = []\n",
"exceedence_hrs = []\n",
"swash_mean = []\n",
"swash_95_lower = []\n",
"swash_95_upper = []\n",
"collision_mean = []\n",
"collision_95_lower = []\n",
"collision_95_upper = []\n",
"swash_median = []\n",
"swash_q1 = []\n",
"swash_q3 = []\n",
"collision_median = []\n",
"collision_q1 = []\n",
"collision_q3 = []\n",
"\n",
"for hr in tqdm_notebook([x for x in range(1,101)]):\n",
" \n",
" dists = []\n",
" plt.figure(figsize=(10,2))\n",
" for observed_regime in ['swash','collision']:\n",
" \n",
" vals = df.loc[(df.n_hrs_largest==hr) &\n",
" (df.observed_regime==observed_regime)].R_high_D_low_diff.dropna().values\n",
" \n",
" if observed_regime =='collision':\n",
" color = 'red'\n",
" label='collision'\n",
" else:\n",
" color = 'blue'\n",
" label='swash'\n",
" \n",
" plt.hist(vals, bins='auto',color=color, alpha=0.5,label=label) \n",
" plt.title(\"{} hour exceedence TWL\".format(hr))\n",
" plt.xlim([-2.5,2.5])\n",
" \n",
" dists.append(norm.fit(vals))\n",
" \n",
" # Find which elevation best splits swash and collision\n",
"# eles = [x for x in np.linspace(-2,2,1000)]\n",
"# total_cdfs = []\n",
"# for ele in eles:\n",
"# swash_cdf = norm.cdf(ele,*dists[0])\n",
"# collision_cdf = 1 - norm.cdf(ele,*dists[1])\n",
"# total_cdfs.append(swash_cdf + collision_cdf)\n",
"\n",
"# i_max = np.argmax(total_cdfs)\n",
"# best_ele = eles[i_max]\n",
"\n",
"# exceedence_hrs.append(hr)\n",
"# best_split.append(best_ele)\n",
"\n",
" # Find which elevation best splits swash and collision\n",
" eles = [x for x in np.linspace(-2,2,100)]\n",
" total_cdfs = []\n",
" swash_vals = df.loc[(df.n_hrs_largest==hr) &\n",
" (df.observed_regime=='swash')].R_high_D_low_diff.dropna().values\n",
" collision_vals = df.loc[(df.n_hrs_largest==hr) &\n",
" (df.observed_regime=='collision')].R_high_D_low_diff.dropna().values\n",
" for ele in eles:\n",
" swash_samples = np.sum( swash_vals < ele) / len(swash_vals)\n",
" collision_samples = np.sum( collision_vals > ele) / len(collision_vals) \n",
" total_cdfs.append(swash_samples + collision_samples)\n",
" \n",
" i_max = np.argmax(total_cdfs)\n",
" best_ele = eles[i_max]\n",
"\n",
" exceedence_hrs.append(hr)\n",
" best_split.append(best_ele) \n",
" \n",
" \n",
" # Store stastistics\n",
" swash_mean.append(dists[0][0])\n",
" swash_95_lower.append(norm.interval(0.5, *dists[0])[0])\n",
" swash_95_upper.append(norm.interval(0.5, *dists[0])[1])\n",
" collision_mean.append(dists[1][0])\n",
" collision_95_lower.append(norm.interval(0.5, *dists[1])[0])\n",
" collision_95_upper.append(norm.interval(0.5, *dists[1])[1])\n",
" \n",
" swash_median.append(np.percentile(swash_vals, 50))\n",
" swash_q1.append(np.percentile(swash_vals, 25))\n",
" swash_q3.append(np.percentile(swash_vals, 75))\n",
" collision_median.append(np.percentile(collision_vals, 50))\n",
" collision_q1.append(np.percentile(collision_vals, 25))\n",
" collision_q3.append(np.percentile(collision_vals, 75)) \n",
" \n",
" plt.axvline(best_ele, label='Best split (x={:.2f} m)'.format(best_ele))\n",
" plt.legend(loc='upper right', prop={'size': 10} )\n",
" plt.xlabel('$R_{high} - D_{low}$')\n",
" plt.ylabel('No. of sites')\n",
" plt.xlim([-2,2])\n",
" if hr == 80 or hr < 5 or hr==90:\n",
" plt.show()\n",
" \n",
" plt.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now, let's plot our distributions for swash/collision and the best seperation between them."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(5,5))\n",
"plt.plot(best_split,exceedence_hrs, label='Best split', color='#000000', linestyle='--')\n",
"\n",
"# plt.plot(swash_mean, exceedence_hrs, label='Swash', color='#2b83ba')\n",
"# plt.fill_betweenx(exceedence_hrs,swash_95_lower,swash_95_upper, color='#2b83ba', alpha=0.2, interpolate=False)\n",
"\n",
"# plt.plot(collision_mean, exceedence_hrs, label='Collision', color='#d7191c')\n",
"# plt.fill_betweenx(exceedence_hrs,collision_95_lower,collision_95_upper, color='#d7191c', alpha=0.2, interpolate=False,label='plus 50')\n",
"\n",
"\n",
"plt.plot(swash_median, exceedence_hrs, label='Swash', color='#2b83ba')\n",
"plt.fill_betweenx(exceedence_hrs,swash_q1,swash_q3, color='#2b83ba', alpha=0.2, interpolate=False,label='Swash IQR')\n",
"\n",
"plt.plot(collision_median, exceedence_hrs, label='Collision', color='#d7191c')\n",
"plt.fill_betweenx(exceedence_hrs,collision_q1,collision_q3, color='#d7191c', alpha=0.2, interpolate=False,label='Collision IQR')\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"#===\n",
"# # Let's plot one site as well, just to check\n",
"# import random\n",
"# site_ids = list(impacts['observed'].index.unique().values)\n",
"# site_id = random.choice(site_ids)\n",
"\n",
"\n",
"# site_id = 'TREACH0011'\n",
"# site_predicted_regime = impacts['forecasted']['mean_slope_sto06'].loc[site_id].storm_regime\n",
"# site_observed_regime = impacts['observed'].loc[site_id].storm_regime\n",
"# df_site = df.loc[site_id]\n",
"# plt.plot(df_site.R_high_D_low_diff, df_site.n_hrs_largest,label='site_id={}\\n(pred={},obs={})'.format(site_id,site_predicted_regime, site_observed_regime),color='#ffffff', linestyle='--')\n",
"\n",
"\n",
"plt.title('Observed Swash/Collision - Best Split')\n",
"plt.xlabel('$R_{high} - D_{low}$ (m)')\n",
"plt.ylabel('Exceedance hours')\n",
"plt.ylim([0,100])\n",
"plt.xlim([-2,2])\n",
"plt.legend()\n",
"\n",
"# Print to figure\n",
"plt.savefig('05-best-split.png', dpi=600, bbox_inches='tight') \n",
"\n",
"plt.show()\n",
"plt.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot above shows that if Rhigh = Dlow plus/minus 0.25m, we should say the storm regime is uncertain, rather than trying to make an incorrect prediction."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_for = impacts['forecasted']['mean_slope_sto06']\n",
"df_obs = impacts['observed']\n",
"\n",
"# Join forecasted and observed impacts into same dataframe\n",
"df_for = df_for.rename(columns={'storm_regime': 'forecasted_regime'})\n",
"df_obs = df_obs.rename(columns={'storm_regime': 'observed_regime'})\n",
"df_for = df_for.merge(\n",
" df_obs.observed_regime.to_frame(), left_index=True, right_index=True)\n",
"\n",
"# Get wrong forecasts\n",
"incorrect_for = df_for.forecasted_regime != df_for.observed_regime\n",
"\n",
"# How many wrong/correct forecasts\n",
"print('There were {} correct forecasts'.format(len(df_for[~incorrect_for])))\n",
"print('There were {} incorrect forecasts'.format(len(df_for[incorrect_for])))\n",
"print('')\n",
"\n",
"# How many of these forecasts were where R_high was near D_low?\n",
"close_eles = ((df.R_high > df.dune_toe_z - 0.25) &\n",
" (df.R_high < df.dune_toe_z + 0.25))\n",
"\n",
"s = 'R_high and D_low elevations were close at {} correctly forecasted sites'\n",
"print(s.format(len(df_for[~incorrect_for & close_eles])))\n",
"\n",
"s = 'R_high and D_low elevations were close at {} wrongly forecasted sites'\n",
"print(s.format(len(df_for[incorrect_for & close_eles])))\n",
"\n",
"# df[(df.R_high>df.dune_toe_z-0.25)&(df.R_high<df.dune_toe_z+0.25)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"So we can see more than half the number of incorrect predictions by saying they're unknown, but a quarter of correct predictions will say they're unknown."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# df_exceedence = pd.DataFrame(exceedence_data)\n",
"# df_exceedence = df_exceedence.set_index(['observed_regime','forecasted_regime','exceedence_hr'])\n",
"\n",
"# import random\n",
"# site_ids = list(impacts['observed'].index.unique().values)\n",
"# for site_id in random.sample(site_ids, 5):\n",
"\n",
"# # Plot mean ele exceedence hours for each combination\n",
"# plt.figure(figsize=(10,4))\n",
"# regime_combinations = [\n",
"# ('swash','swash'),\n",
"# ('swash','collision'),\n",
"# ('collision','swash'),\n",
"# ('collision','collision'),\n",
"# ]\n",
"\n",
"# for comb in regime_combinations:\n",
"# df_plot = df_exceedence.xs((comb[0], comb[1]), level=['observed_regime','forecasted_regime'])\n",
"# plt.plot(df_plot.ci_mean, df_plot.index.values,label='obs={}, pred={}'.format(comb[0],comb[1]))\n",
"# plt.fill_betweenx(df_plot.index.values, df_plot.ci_lower, df_plot.ci_upper, color='grey', alpha=0.2, interpolate=False)\n",
"\n",
"# plt.xlim([-2,1])\n",
"# plt.ylim([0,100])\n",
"\n",
"# # Let's plot one site as well, just to check\n",
"# site_predicted_regime = impacts['forecasted']['mean_slope_sto06'].loc[site_id].storm_regime\n",
"# site_observed_regime = impacts['observed'].loc[site_id].storm_regime\n",
"# df_site = df.loc[site_id]\n",
"# plt.plot(df_site.R_high_D_low_diff, df_site.n_hrs_largest,label='site_id={} (pred={},obs={})'.format(site_id,site_predicted_regime, site_observed_regime))\n",
"\n",
"# plt.legend(loc='upper right', prop={'size': 8})\n",
"# plt.show()\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true
},
"source": [
"# Other stuff which hasn't been tidied up"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true,
"hidden": true
},
"source": [
"### Check the relationship between SWL-Dtoe, DSWL-Dtoe, R_high-Dtoe\n",
"Use 3D scatter plot to check the relationship between SWL-Dtoe, DSWL-Dtoe, R_high-Dtoe.\n",
"\n",
"This is moving away from time dependence..."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"hidden": true
},
"outputs": [],
"source": [
"[x[1] for x in df.query('forecasted_regime==\"swash\" & observed_regime==\"swash\"').iterrows()][0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"hidden": true
},
"outputs": [],
"source": [
"data = []\n",
"\n",
"# Iterate through each site\n",
"print('Calculating cumulate frequency of R_high for each site:')\n",
"site_ids = twls['forecasted']['mean_slope_sto06'].index.get_level_values(\n",
" 'site_id').unique().values\n",
"for site_id in tqdm_notebook(site_ids):\n",
"\n",
" # Put data into a temporary dataframe, shorter name is easier to work with\n",
" df_impacts = impacts['forecasted']['mean_slope_sto06'].loc[site_id]\n",
" df_twls = twls['forecasted']['mean_slope_sto06'].loc[site_id]\n",
"\n",
" D_low = df_impacts.dune_toe_z\n",
" if np.isnan(D_low):\n",
" continue\n",
"\n",
" # Find time where R_max is the highest\n",
" t = df_twls.R_high.idxmax()\n",
"\n",
" # Get R_high, tide and setup at that time\n",
" R_high = df_twls.loc[t].R_high\n",
" tide = df_twls.loc[t].tide\n",
" setup = df_twls.loc[t].setup\n",
"\n",
" # Calculate differences in elevation\n",
" R_high_D_low = R_high - D_low\n",
" SWL_D_low = tide - D_low\n",
" DSWL_D_low = tide + setup - D_low\n",
"\n",
" # Check which subplot we should put this site on\n",
" forecasted_regime = impacts['forecasted']['mean_slope_sto06'].loc[\n",
" site_id].storm_regime\n",
" observed_regime = impacts['observed'].loc[site_id].storm_regime\n",
"\n",
" data.append({\n",
" 'R_high_D_low': R_high_D_low,\n",
" 'SWL_D_low': SWL_D_low,\n",
" 'DSWL_D_low': DSWL_D_low,\n",
" 'forecasted_regime': forecasted_regime,\n",
" 'observed_regime': observed_regime\n",
" })\n",
"\n",
"# Turn data into a dataframe and plot\n",
"df = pd.DataFrame(data)\n",
"\n",
"# Plot swash/swash\n",
"query='forecasted_regime==\"swash\" & observed_regime==\"swash\"'\n",
"trace1 = go.Scatter3d(\n",
" x=[x[1].R_high_D_low for x in df.query(query).iterrows()],\n",
" y=[x[1].SWL_D_low for x in df.query(query).iterrows()],\n",
" z=[x[1].DSWL_D_low for x in df.query(query).iterrows()],\n",
" name='Swash/Swash',\n",
" mode='markers',\n",
" marker=dict(\n",
" size=6,\n",
" color='rgb(26,150,65)',\n",
" opacity=0.8))\n",
"\n",
"query='forecasted_regime==\"swash\" & observed_regime==\"collision\"'\n",
"trace2 = go.Scatter3d(\n",
" x=[x[1].R_high_D_low for x in df.query(query).iterrows()],\n",
" y=[x[1].SWL_D_low for x in df.query(query).iterrows()],\n",
" z=[x[1].DSWL_D_low for x in df.query(query).iterrows()],\n",
" name='Swash/Collision',\n",
" mode='markers',\n",
" marker=dict(\n",
" size=6,\n",
" color='rgb(253,174,97)',\n",
" opacity=0.8))\n",
"\n",
"query='forecasted_regime==\"collision\" & observed_regime==\"swash\"'\n",
"trace3 = go.Scatter3d(\n",
" x=[x[1].R_high_D_low for x in df.query(query).iterrows()],\n",
" y=[x[1].SWL_D_low for x in df.query(query).iterrows()],\n",
" z=[x[1].DSWL_D_low for x in df.query(query).iterrows()],\n",
" name='Collision/Swash',\n",
" mode='markers',\n",
" marker=dict(\n",
" size=6,\n",
" color='rgb(166,217,106)',\n",
" opacity=0.8))\n",
"\n",
"query='forecasted_regime==\"collision\" & observed_regime==\"collision\"'\n",
"trace4 = go.Scatter3d(\n",
" x=[x[1].R_high_D_low for x in df.query(query).iterrows()],\n",
" y=[x[1].SWL_D_low for x in df.query(query).iterrows()],\n",
" z=[x[1].DSWL_D_low for x in df.query(query).iterrows()],\n",
" name='Collsion/Collision',\n",
" mode='markers',\n",
" marker=dict(\n",
" size=6,\n",
" color='rgb(215,25,28)',\n",
" opacity=0.8))\n",
"\n",
"layout = go.Layout(\n",
" autosize=False,\n",
" width=1000,\n",
" height=700,\n",
" margin=go.layout.Margin(\n",
" l=50,\n",
" r=50,\n",
" b=100,\n",
" t=100,\n",
" pad=4\n",
" ),\n",
")\n",
"\n",
"fig = go.Figure(data=[trace1,trace2,trace3,trace4], layout=layout)\n",
"go.FigureWidget(fig)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"hidden": true
},
"source": [
"## Calculate vertical distribution of wave count SS\n",
"For each site, calculate how many waves reached a certain elevation (store as a binned histogram)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"hidden": true
},
"outputs": [],
"source": [
"# Helper functions\n",
"def find_nearest(array, value):\n",
" array = np.asarray(array)\n",
" idx = np.nanargmin((np.abs(array - value)))\n",
" return array[idx], idx"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"hidden": true
},
"outputs": [],
6 years ago
"source": [
"data = []\n",
"for site_id, df_site_twl in twls['forecasted']['mean_slope_sto06'].groupby('site_id'):\n",
" \n",
" twl_eles_per_wave = []\n",
" \n",
" # Iterate through each timestamp and calculate the number of waves at each interavl.\n",
" # THIS LOOP IS SLOW\n",
" for row in df_site_twl.itertuples():\n",
" \n",
" distribution = stats.norm(loc=row.tide+row.setup, scale=row.S_total/4) # CHECK\n",
"\n",
" # Total number of waves we expect in this period\n",
" n_waves = int(3600 / row.Tp) # Check that we have 1 hour\n",
" \n",
" # Get z elevation of each wave twl in this hour and append to list\n",
" twl_eles_per_wave.extend([distribution.ppf(1-x/n_waves) for x in range(1,n_waves+1)])\n",
" \n",
" # Remove nans and infs # CHECK WHY INF\n",
" twl_eles_per_wave = list(np.asarray(twl_eles_per_wave)[np.isfinite(twl_eles_per_wave)])\n",
" \n",
" # Sort wave twl z elevations in descending list\n",
" twl_eles_per_wave.sort(reverse=True) \n",
" \n",
" # Get index of closest value of dune toe. This is the number of waves that exceeded the the dune toe\n",
" try:\n",
" _, idx = find_nearest(twl_eles_per_wave, dune_toe_z)\n",
" except:\n",
" continue\n",
" \n",
" # Get forecasted and observed impacts\n",
" forecasted_regime = impacts['forecasted']['mean_slope_sto06'].loc[site_id,'storm_regime']\n",
" observed_regime = impacts['observed'].loc[site_id,'storm_regime']\n",
" \n",
" counts, bin_edges = np.histogram(twl_eles_per_wave, bins=100) \n",
" \n",
" data.append({\n",
" 'site_id': site_id,\n",
" 'forecasted_regime': forecasted_regime,\n",
" 'observed_regime': observed_regime,\n",
" 'n_waves_exceeding_dune_toe': idx,\n",
" 'n_waves': [x for x in range(0,500,1)],\n",
" 'truncated_twl_levels': [twl_eles_per_wave[x] for x in range(0,500,1)],\n",
" 'truncated_dune_toe_z': df_profile_features_crest_toes.loc[(site_id,'prestorm'),'dune_toe_z'],\n",
" 'full_counts': counts,\n",
" 'full_bin_edges': bin_edges,\n",
" })\n",
" \n",
" print('Done {}'.format(site_id))\n",
"\n",
"data_twl = data\n",
"# df = pd.DataFrame(data)\n",
"# df = df.set_index('site_id')"
]
},
{
"cell_type": "code",
"execution_count": null,
6 years ago
"metadata": {
"hidden": true
},
6 years ago
"outputs": [],
"source": [
"fig = tools.make_subplots(\n",
" rows=2,\n",
" cols=2,\n",
" specs=[[{}, {}], [{}, {}]],\n",
" subplot_titles=('Swash/Swash', 'Swash/Collision', \n",
" 'Collision/Swash', 'Collision/Collision'),\n",
" shared_xaxes=True, shared_yaxes=True,)\n",
"\n",
"data = []\n",
"for site in data_twl:\n",
" if site['forecasted_regime'] == 'swash' and site[\n",
" 'observed_regime'] == 'swash':\n",
" x_col = 1\n",
" y_col = 1\n",
" elif site['forecasted_regime'] == 'collision' and site[\n",
" 'observed_regime'] == 'collision':\n",
" x_col = 2\n",
" y_col = 2\n",
" elif site['forecasted_regime'] == 'swash' and site[\n",
" 'observed_regime'] == 'collision':\n",
" x_col = 2\n",
" y_col = 1\n",
" elif site['forecasted_regime'] == 'collision' and site[\n",
" 'observed_regime'] == 'swash':\n",
" x_col = 1\n",
" y_col = 2\n",
" else:\n",
" continue\n",
"\n",
" fig.append_trace(\n",
" go.Scattergl(\n",
" x=[x - site['dune_toe_z'] for x in site['twl_levels']],\n",
" y=site['n_waves'],\n",
" name=site['site_id'],\n",
" line = dict(\n",
" color = ('rgba(22, 22, 22, 0.2)'),\n",
" width = 0.5,)),\n",
" x_col,\n",
" y_col)\n",
"\n",
"# layout = go.Layout(\n",
"# xaxis=dict(domain=[0, 0.45]),\n",
"# yaxis=dict(\n",
"# domain=[0, 0.45],\n",
"# type='log',\n",
"# ),\n",
"# xaxis2=dict(domain=[0.55, 1]),\n",
"# xaxis4=dict(domain=[0.55, 1], anchor='y4'),\n",
"# yaxis3=dict(\n",
"# domain=[0.55, 1],\n",
"# type='log',\n",
"# ),\n",
"# yaxis4=dict(\n",
"# domain=[0.55, 1],\n",
"# anchor='x4',\n",
"# type='log',\n",
"# ))\n",
"\n",
"fig['layout'].update(showlegend=False, title='Specs with Subplot Title',height=800,)\n",
"\n",
"for ax in ['yaxis','yaxis2']:\n",
"# fig['layout'][ax]['type']='log'\n",
" fig['layout'][ax]['range']= [0,100]\n",
"\n",
"for ax in ['xaxis', 'xaxis2']:\n",
" fig['layout'][ax]['range']= [-1,1]\n",
"\n",
"go.FigureWidget(fig)"
]
},
{
"cell_type": "code",
"execution_count": null,
6 years ago
"metadata": {
"hidden": true
},
6 years ago
"outputs": [],
"source": [
"fig['layout']['yaxis']"
]
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}