Compare commits

...

6 Commits

Author SHA1 Message Date
Chris Leaman 3a1d87eaeb Update notebooks 6 years ago
Chris Leaman f616f56fd3 Optimize volume change calculations 6 years ago
Chris Leaman d06a5ab576 Removes TWL forecast using foreshore slope from Makefile
Predicting TWL using time-varying foreshore slope is pretty slow, so let's remove it from our data analysis pipeline. The code is still there if we want to call it, but for the time being, let's remove this model when we're looking at comparisons.
6 years ago
Chris Leaman a293d5d691 Fix incorrect calculation of pre/post change point
If pre and post storm profiles returned to similar elevations within the sampled profile length, the change point detection would select the most seaward point and incorrectly calculate the swash zone volumes. This change looks at the slope of the difference between the two profiles and assumes that the difference between the two profiles should be increasing at the location of the change point.
6 years ago
Chris Leaman c17051788c Fix formatting 6 years ago
Chris Leaman 34ada7c12f Add checking of inputs early in slope_from_profile
This allows us to exit early and suppresses the annoying warnings.
6 years ago

@ -53,9 +53,6 @@ pull-data: ##@data Copies data from data backup directory to ./data/
# Command for activating our virtual environment and calling the CLI entry point # Command for activating our virtual environment and calling the CLI entry point
PYTHON_CLI = activate ./.venv && python ./src/cli.py PYTHON_CLI = activate ./.venv && python ./src/cli.py
impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv ./data/interim/impacts_forecasted_premean_slope_sto06.csv ./data/interim/impacts_observed.csv ##@products makes obsered and forecasted impacts
### Parses raw matfiles ### Parses raw matfiles
./data/raw/processed_shorelines/orientations.mat: ./data/raw/processed_shorelines/profiles.mat ./data/raw/processed_shorelines/orientations.mat: ./data/raw/processed_shorelines/profiles.mat
@ -94,7 +91,6 @@ impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv ./data/inte
### TWLs ### TWLs
twls: ./data/interim/twl_foreshore_slope_sto06.csv
twls: ./data/interim/twl_premean_slope_sto06.csv twls: ./data/interim/twl_premean_slope_sto06.csv
twls: ./data/interim/twl_premean_slope_hol86.csv twls: ./data/interim/twl_premean_slope_hol86.csv
twls: ./data/interim/twl_premean_slope_nie91.csv twls: ./data/interim/twl_premean_slope_nie91.csv
@ -320,7 +316,6 @@ twls: ./data/interim/twl_postintertidal_slope_pow18.csv
### IMPACTS ### IMPACTS
impacts: ./data/interim/impacts_observed.csv impacts: ./data/interim/impacts_observed.csv
impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv
impacts: ./data/interim/impacts_forecasted_premean_slope_sto06.csv impacts: ./data/interim/impacts_forecasted_premean_slope_sto06.csv
impacts: ./data/interim/impacts_forecasted_premean_slope_hol86.csv impacts: ./data/interim/impacts_forecasted_premean_slope_hol86.csv
impacts: ./data/interim/impacts_forecasted_premean_slope_nie91.csv impacts: ./data/interim/impacts_forecasted_premean_slope_nie91.csv

@ -485,20 +485,6 @@
"print('Done!')" "print('Done!')"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -507,13 +493,6 @@
"Use sklearn metrics to generate classification reports for each forecasting model." "Use sklearn metrics to generate classification reports for each forecasting model."
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@ -582,6 +561,355 @@
" df_for.storm_regime.astype(cat_type).cat.codes.values)\n", " df_for.storm_regime.astype(cat_type).cat.codes.values)\n",
" print('{}: {:.2f}'.format(model,m))" " print('{}: {:.2f}'.format(model,m))"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix\n",
"# Check confusion matrix\n",
"for model in impacts['forecasted']:\n",
" df_for = impacts['forecasted'][model]\n",
" df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n",
"\n",
" m = sklearn.metrics.confusion_matrix(\n",
" df_obs.storm_regime.astype(cat_type).cat.codes.values,\n",
" df_for.storm_regime.astype(cat_type).cat.codes.values,\n",
" labels=[0,1,2,3])\n",
" print('{}\\n{}'.format(model,m))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create confusion matrix figure\n",
"From https://github.com/wcipriano/pretty-print-confusion-matrix/blob/master/confusion_matrix_pretty_print.py"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"\"\"\"\n",
"plot a pretty confusion matrix with seaborn\n",
"Created on Mon Jun 25 14:17:37 2018\n",
"@author: Wagner Cipriano - wagnerbhbr - gmail - CEFETMG / MMC\n",
"REFerences:\n",
" https://www.mathworks.com/help/nnet/ref/plotconfusion.html\n",
" https://stackoverflow.com/questions/28200786/how-to-plot-scikit-learn-classification-report\n",
" https://stackoverflow.com/questions/5821125/how-to-plot-confusion-matrix-with-string-axis-rather-than-integer-in-python\n",
" https://www.programcreek.com/python/example/96197/seaborn.heatmap\n",
" https://stackoverflow.com/questions/19233771/sklearn-plot-confusion-matrix-with-labels/31720054\n",
" http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py\n",
"\"\"\"\n",
"\n",
"#imports\n",
"from pandas import DataFrame\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.font_manager as fm\n",
"from matplotlib.collections import QuadMesh\n",
"import seaborn as sn\n",
"\n",
"\n",
"def get_new_fig(fn, figsize=[9,9]):\n",
" \"\"\" Init graphics \"\"\"\n",
" fig1 = plt.figure(fn, figsize)\n",
" ax1 = fig1.gca() #Get Current Axis\n",
" ax1.cla() # clear existing plot\n",
" return fig1, ax1\n",
"#\n",
"\n",
"def configcell_text_and_colors(array_df, lin, col, oText, facecolors, posi, fz, fmt, show_null_values=0):\n",
" \"\"\"\n",
" config cell text and colors\n",
" and return text elements to add and to dell\n",
" @TODO: use fmt\n",
" \"\"\"\n",
" text_add = []; text_del = [];\n",
" cell_val = array_df[lin][col]\n",
" tot_all = array_df[-1][-1]\n",
" per = (float(cell_val) / tot_all) * 100\n",
" curr_column = array_df[:,col]\n",
" ccl = len(curr_column)\n",
"\n",
" #last line and/or last column\n",
" if(col == (ccl - 1)) or (lin == (ccl - 1)):\n",
" #tots and percents\n",
" if(cell_val != 0):\n",
" if(col == ccl - 1) and (lin == ccl - 1):\n",
" tot_rig = 0\n",
" for i in range(array_df.shape[0] - 1):\n",
" tot_rig += array_df[i][i]\n",
" per_ok = (float(tot_rig) / cell_val) * 100\n",
" elif(col == ccl - 1):\n",
" tot_rig = array_df[lin][lin]\n",
" per_ok = (float(tot_rig) / cell_val) * 100\n",
" elif(lin == ccl - 1):\n",
" tot_rig = array_df[col][col]\n",
" per_ok = (float(tot_rig) / cell_val) * 100\n",
" per_err = 100 - per_ok\n",
" else:\n",
" per_ok = per_err = 0\n",
"\n",
" per_ok_s = ['%.1f%%'%(per_ok), '100%'] [per_ok == 100]\n",
"\n",
" #text to DEL\n",
" text_del.append(oText)\n",
"\n",
" #text to ADD\n",
" font_prop = fm.FontProperties(weight='bold', size=fz)\n",
" text_kwargs = dict(color='w', ha=\"center\", va=\"center\", gid='sum', fontproperties=font_prop)\n",
" lis_txt = ['%d'%(cell_val), per_ok_s, '%.1f%%'%(per_err)]\n",
" lis_kwa = [text_kwargs]\n",
" dic = text_kwargs.copy(); dic['color'] = 'g'; lis_kwa.append(dic);\n",
" dic = text_kwargs.copy(); dic['color'] = 'r'; lis_kwa.append(dic);\n",
" lis_pos = [(oText._x, oText._y-0.3), (oText._x, oText._y), (oText._x, oText._y+0.3)]\n",
" for i in range(len(lis_txt)):\n",
" newText = dict(x=lis_pos[i][0], y=lis_pos[i][1], text=lis_txt[i], kw=lis_kwa[i])\n",
" #print 'lin: %s, col: %s, newText: %s' %(lin, col, newText)\n",
" text_add.append(newText)\n",
" #print '\\n'\n",
"\n",
" #set background color for sum cells (last line and last column)\n",
" carr = [0.27, 0.30, 0.27, 1.0]\n",
" if(col == ccl - 1) and (lin == ccl - 1):\n",
" carr = [0.17, 0.20, 0.17, 1.0]\n",
" facecolors[posi] = carr\n",
"\n",
" else:\n",
" if(per > 0):\n",
" txt = '%s\\n%.1f%%' %(cell_val, per)\n",
" else:\n",
" if(show_null_values == 0):\n",
" txt = ''\n",
" elif(show_null_values == 1):\n",
" txt = '0'\n",
" else:\n",
" txt = '0\\n0.0%'\n",
" oText.set_text(txt)\n",
"\n",
" #main diagonal\n",
" if(col == lin):\n",
" #set color of the textin the diagonal to white\n",
" oText.set_color('w')\n",
" # set background color in the diagonal to blue\n",
" facecolors[posi] = [0.35, 0.8, 0.55, 1.0]\n",
" else:\n",
" oText.set_color('r')\n",
"\n",
" return text_add, text_del\n",
"#\n",
"\n",
"def insert_totals(df_cm):\n",
" \"\"\" insert total column and line (the last ones) \"\"\"\n",
" sum_col = []\n",
" for c in df_cm.columns:\n",
" sum_col.append( df_cm[c].sum() )\n",
" sum_lin = []\n",
" for item_line in df_cm.iterrows():\n",
" sum_lin.append( item_line[1].sum() )\n",
" df_cm['sum_lin'] = sum_lin\n",
" sum_col.append(np.sum(sum_lin))\n",
" df_cm.loc['sum_col'] = sum_col\n",
" #print ('\\ndf_cm:\\n', df_cm, '\\n\\b\\n')\n",
"#\n",
"\n",
"def pretty_plot_confusion_matrix(df_cm, annot=True, cmap=\"Oranges\", fmt='.2f', fz=11,\n",
" lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='y'):\n",
" \"\"\"\n",
" print conf matrix with default layout (like matlab)\n",
" params:\n",
" df_cm dataframe (pandas) without totals\n",
" annot print text in each cell\n",
" cmap Oranges,Oranges_r,YlGnBu,Blues,RdBu, ... see:\n",
" fz fontsize\n",
" lw linewidth\n",
" pred_val_axis where to show the prediction values (x or y axis)\n",
" 'col' or 'x': show predicted values in columns (x axis) instead lines\n",
" 'lin' or 'y': show predicted values in lines (y axis)\n",
" \"\"\"\n",
" if(pred_val_axis in ('col', 'x')):\n",
" xlbl = 'Predicted'\n",
" ylbl = 'Actual'\n",
" else:\n",
" xlbl = 'Actual'\n",
" ylbl = 'Predicted'\n",
" df_cm = df_cm.T\n",
"\n",
" # create \"Total\" column\n",
" insert_totals(df_cm)\n",
"\n",
" #this is for print allways in the same window\n",
" fig, ax1 = get_new_fig('Conf matrix default', figsize)\n",
"\n",
" #thanks for seaborn\n",
" ax = sn.heatmap(df_cm, annot=annot, annot_kws={\"size\": fz}, linewidths=lw, ax=ax1,\n",
" cbar=cbar, cmap=cmap, linecolor='w', fmt=fmt)\n",
"\n",
" #set ticklabels rotation\n",
" ax.set_xticklabels(ax.get_xticklabels(), rotation = 45, fontsize = 10)\n",
" ax.set_yticklabels(ax.get_yticklabels(), rotation = 25, fontsize = 10)\n",
"\n",
" # Turn off all the ticks\n",
" for t in ax.xaxis.get_major_ticks():\n",
" t.tick1On = False\n",
" t.tick2On = False\n",
" for t in ax.yaxis.get_major_ticks():\n",
" t.tick1On = False\n",
" t.tick2On = False\n",
"\n",
" #face colors list\n",
" quadmesh = ax.findobj(QuadMesh)[0]\n",
" facecolors = quadmesh.get_facecolors()\n",
"\n",
" #iter in text elements\n",
" array_df = np.array( df_cm.to_records(index=False).tolist() )\n",
" text_add = []; text_del = [];\n",
" posi = -1 #from left to right, bottom to top.\n",
" for t in ax.collections[0].axes.texts: #ax.texts:\n",
" pos = np.array( t.get_position()) - [0.5,0.5]\n",
" lin = int(pos[1]); col = int(pos[0]);\n",
" posi += 1\n",
" #print ('>>> pos: %s, posi: %s, val: %s, txt: %s' %(pos, posi, array_df[lin][col], t.get_text()))\n",
"\n",
" #set text\n",
" txt_res = configcell_text_and_colors(array_df, lin, col, t, facecolors, posi, fz, fmt, show_null_values)\n",
"\n",
" text_add.extend(txt_res[0])\n",
" text_del.extend(txt_res[1])\n",
"\n",
" #remove the old ones\n",
" for item in text_del:\n",
" item.remove()\n",
" #append the new ones\n",
" for item in text_add:\n",
" ax.text(item['x'], item['y'], item['text'], **item['kw'])\n",
"\n",
" #titles and legends\n",
" ax.set_title('Confusion matrix')\n",
" ax.set_xlabel(xlbl)\n",
" ax.set_ylabel(ylbl)\n",
" plt.tight_layout() #set layout slim\n",
" plt.show()\n",
" return fig\n",
"#\n",
"\n",
"def plot_confusion_matrix_from_data(y_test, predictions, columns=None, annot=True, cmap=\"Oranges\",\n",
" fmt='.2f', fz=11, lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='lin'):\n",
" \"\"\"\n",
" plot confusion matrix function with y_test (actual values) and predictions (predic),\n",
" whitout a confusion matrix yet\n",
" \"\"\"\n",
" from sklearn.metrics import confusion_matrix\n",
" from pandas import DataFrame\n",
"\n",
" #data\n",
" if(not columns):\n",
" #labels axis integer:\n",
" ##columns = range(1, len(np.unique(y_test))+1)\n",
" #labels axis string:\n",
" from string import ascii_uppercase\n",
" columns = ['class %s' %(i) for i in list(ascii_uppercase)[0:len(np.unique(y_test))]]\n",
"\n",
" confm = confusion_matrix(y_test, predictions)\n",
" cmap = 'Oranges';\n",
" fz = 11;\n",
" figsize=[9,9];\n",
" show_null_values = 2\n",
" df_cm = DataFrame(confm, index=columns, columns=columns)\n",
" pretty_plot_confusion_matrix(df_cm, fz=fz, cmap=cmap, figsize=figsize, show_null_values=show_null_values, pred_val_axis=pred_val_axis)\n",
"#\n",
"\n",
"\n",
"\n",
"#\n",
"#TEST functions\n",
"#\n",
"def _test_cm():\n",
" #test function with confusion matrix done\n",
" array = np.array( [[13, 0, 1, 0, 2, 0],\n",
" [ 0, 50, 2, 0, 10, 0],\n",
" [ 0, 13, 16, 0, 0, 3],\n",
" [ 0, 0, 0, 13, 1, 0],\n",
" [ 0, 40, 0, 1, 15, 0],\n",
" [ 0, 0, 0, 0, 0, 20]])\n",
" #get pandas dataframe\n",
" df_cm = DataFrame(array, index=range(1,7), columns=range(1,7))\n",
" #colormap: see this and choose your more dear\n",
" cmap = 'PuRd'\n",
" pretty_plot_confusion_matrix(df_cm, cmap=cmap)\n",
"#\n",
"\n",
"def _test_data_class():\n",
" \"\"\" test function with y_test (actual values) and predictions (predic) \"\"\"\n",
" #data\n",
" y_test = np.array([1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5])\n",
" predic = np.array([1,2,4,3,5, 1,2,4,3,5, 1,2,3,4,4, 1,4,3,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,3,3,5, 1,2,3,3,5, 1,2,3,4,4, 1,2,3,4,1, 1,2,3,4,1, 1,2,3,4,1, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5])\n",
" \"\"\"\n",
" Examples to validate output (confusion matrix plot)\n",
" actual: 5 and prediction 1 >> 3\n",
" actual: 2 and prediction 4 >> 1\n",
" actual: 3 and prediction 4 >> 10\n",
" \"\"\"\n",
" columns = []\n",
" annot = True;\n",
" cmap = 'Oranges';\n",
" fmt = '.2f'\n",
" lw = 0.5\n",
" cbar = False\n",
" show_null_values = 2\n",
" pred_val_axis = 'y'\n",
" #size::\n",
" fz = 12;\n",
" figsize = [9,9];\n",
" if(len(y_test) > 10):\n",
" fz=9; figsize=[14,14];\n",
" plot_confusion_matrix_from_data(y_test, predic, columns,\n",
" annot, cmap, fmt, fz, lw, cbar, figsize, show_null_values, pred_val_axis)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# plot_confusion_matrix_from_data(y_test, predictions, columns=None, annot=True, cmap=\"Oranges\",\n",
"# fmt='.2f', fz=11, lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='lin'):\n",
"\n",
"matplotlib.rcParams['text.usetex'] = False\n",
"\n",
"forecast_model = 'postintertidal_slope_sto06'\n",
"\n",
"df_for = impacts['forecasted'][forecast_model]\n",
"df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n",
"observed_regimes = df_obs.storm_regime.astype(cat_type).cat.codes.values\n",
"forecasted_regimes = df_for.storm_regime.astype(cat_type).cat.codes.values\n",
"\n",
"\n",
"confm = confusion_matrix(observed_regimes, forecasted_regimes,labels=[0,1,2,3])\n",
"labels=['swash','collision','overwash','inundation']\n",
"df_cm = DataFrame(confm, index=labels, columns=labels)\n",
"\n",
"fig = pretty_plot_confusion_matrix(df_cm, annot=True, cmap=\"Oranges\", fmt='.1f', fz=13,\n",
" lw=0.1, cbar=False, figsize=[8,5], show_null_values=1, pred_val_axis='y')\n",
"\n",
"fig.savefig('11_confusion_matrix',dpi=600)"
]
} }
], ],
"metadata": { "metadata": {
@ -612,9 +940,14 @@
"title_cell": "Table of Contents", "title_cell": "Table of Contents",
"title_sidebar": "Contents", "title_sidebar": "Contents",
"toc_cell": false, "toc_cell": false,
"toc_position": {}, "toc_position": {
"height": "calc(100% - 180px)",
"left": "10px",
"top": "150px",
"width": "286.391px"
},
"toc_section_display": true, "toc_section_display": true,
"toc_window_display": false "toc_window_display": true
}, },
"varInspector": { "varInspector": {
"cols": { "cols": {

@ -365,12 +365,244 @@
"f.savefig('11_change_in_slopes_accuracy_classes.png',dpi=600)" "f.savefig('11_change_in_slopes_accuracy_classes.png',dpi=600)"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Swash zone volume change histogram"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How much does the beach width change variation can we expect in the swash regime?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"f, ax = plt.subplots(figsize=(5,4))\n",
"\n",
"sns.distplot(df.loc[df.storm_regime=='swash'].width_msl_change_pct.dropna(), \n",
" kde=False);\n",
"\n",
"ax.set_title('Distribution of beach width change for swash regime')\n",
"ax.set_xlabel('$\\Delta$ beach width (%)')\n",
"ax.set_ylabel('Count')\n",
"\n",
"f.savefig('11_change_in_beach_width.png',dpi=600)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Split swash regime"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df['beta_Pxscum'] = df.beta_prestorm_mean * df.Ecum\n",
"df['neg_swash_vol_change'] = -df.swash_vol_change\n",
"\n",
"ax = sns.scatterplot(data=df.loc[df.storm_regime=='swash'],\n",
" x=\"beta_Pxscum\", \n",
" y='neg_swash_vol_change')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"corr = df.corr(method ='pearson') \n",
"sns.heatmap(corr, \n",
" xticklabels=corr.columns,\n",
" yticklabels=corr.columns,\n",
" cmap='RdBu_r')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(corr.columns)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import scipy.stats as ss\n",
"#https://stackoverflow.com/a/24469099\n",
"\n",
"corr = df.corr(method ='pearson') \n",
"\n",
"n=len(corr.columns)\n",
"t=corr*np.sqrt((n-2)/(1-corr*corr))\n",
"\n",
"\n",
"pvals = ss.t.cdf(t, n-2)\n",
"\n",
"corr.values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": []
},
"outputs": [],
"source": [
"from matplotlib.patches import Ellipse\n",
"def corrplot(data, pvalues, labels):\n",
" \"\"\"Creates a correlation plot of the passed data.\n",
" The function returns the plot which can then be shown with\n",
" plot.show(), saved to a file with plot.savefig(), or manipulated\n",
" in any other standard matplotlib way.\n",
" data is the correlation matrix, a 2-D numpy array containing\n",
" the pairwise correlations between variables;\n",
" pvalues is a matrix containing the pvalue for each corresponding\n",
" correlation value; if none it is assumed to be the zero matrix\n",
" labels is an array containing the variable names\n",
" https://github.com/louridas/corrplot/blob/master/corrplot.py\n",
" \"\"\"\n",
"\n",
" plt.figure(1)\n",
"\n",
" column_labels = labels\n",
" row_labels = labels\n",
" \n",
" f = plt.figure(figsize=(8,8))\n",
" ax = plt.subplot(1, 1, 1, aspect='equal')\n",
"\n",
" width, height = data.shape\n",
" num_cols, num_rows = width, height\n",
"\n",
" if pvalues is None:\n",
" pvalues = np.zeros([num_rows, num_cols])\n",
" \n",
" shrink = 0.9\n",
"\n",
" poscm = cm.get_cmap('Blues')\n",
" negcm = cm.get_cmap('Oranges')\n",
"\n",
" for x in range(width):\n",
" for y in range(height):\n",
" d = data[x, y]\n",
" c = pvalues[x, y]\n",
" rotate = -45 if d > 0 else +45\n",
" clrmap = poscm if d >= 0 else negcm\n",
" d_abs = np.abs(d)\n",
" ellipse = Ellipse((x, y),\n",
" width=1 * shrink,\n",
" height=(shrink - d_abs*shrink),\n",
" angle=rotate)\n",
" ellipse.set_edgecolor('black')\n",
" ellipse.set_facecolor(clrmap(d_abs))\n",
" if c > 0.05:\n",
" ellipse.set_linestyle('dotted')\n",
" ellipse.set_alpha(0.5)\n",
" ax.add_artist(ellipse)\n",
"\n",
" ax.set_xlim(-1, num_cols)\n",
" ax.set_ylim(-1, num_rows)\n",
" \n",
" ax.xaxis.tick_top()\n",
" xtickslocs = np.arange(len(row_labels))\n",
" ax.set_xticks(xtickslocs)\n",
" ax.set_xticklabels(row_labels, rotation=30, fontsize='small', ha='left')\n",
"\n",
" ax.invert_yaxis()\n",
" ytickslocs = np.arange(len(row_labels))\n",
" ax.set_yticks(ytickslocs)\n",
" ax.set_yticklabels(column_labels, fontsize='small')\n",
"\n",
" return plt\n",
"\n",
"import string\n",
"num_rows = 20\n",
"num_cols = num_rows\n",
"\n",
"min_length = 10\n",
"max_length = 20\n",
"\n",
"alnums = list(string.ascii_uppercase + string.digits)\n",
"labels = [''.join(np.random.choice(alnums,\n",
" np.random.randint(min_length,\n",
" max_length)))\n",
" for y in np.arange(num_rows)]\n",
"\n",
"\n",
"data = np.random.random([num_rows, num_cols])\n",
"\n",
"# data[np.random.choice(num_rows, num_rows / 2), :] *= -1\n",
"\n",
"np.fill_diagonal(data, 1)\n",
"\n",
"data_symm = (data + data.T) / 2\n",
"\n",
"# plot = corrplot(data_symm, None, labels)\n",
"plot = corrplot(corr.values, pvals, corr.columns.tolist())\n",
"plot.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from scipy import stats\n",
"\n",
"x_col = 'beta_prestorm_intertidal'\n",
"y_col = \"beta_diff_intertidal\"\n",
"# y_col = 'swash_vol_change'\n",
"# x_col = \"Pxscum\"\n",
"data = df.loc[df.storm_regime=='swash']\n",
"\n",
"slope, intercept, r_value, p_value, std_err = stats.linregress(data[x_col],data[y_col])\n",
"\n",
"ax = sns.regplot(data=data,\n",
" y=y_col, \n",
" x=x_col, marker=\"+\",fit_reg=True, scatter_kws={'linewidth':1},\n",
" line_kws={'label':\"y={0:.2f}x+{1:.2f}\\n(r2={2:.2f})\".format(slope,intercept,r_value**2)})\n",
"ax.legend()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# df.loc[(df.storm_regime=='swash')&(df.swash_vol_change<10)].sort_values(by=['Pxscum'],ascending=False)\n",
"df.loc[(df.swash_vol_change>200)].sort_values(by=['Pxscum'],ascending=False)"
]
} }
], ],
"metadata": { "metadata": {

@ -316,8 +316,15 @@ def slope_from_profile(
""" """
# Need all data to get the slope # Need all data to get the slope
if any([x is None for x in [profile_x, profile_z, top_elevation, btm_elevation]]): # Check validity of profile arrays and return None if data is not good
return None for profile in [profile_x, profile_z]:
if all(np.isnan(profile)) or all(x is None for x in profile):
return None
# Check validity of elevation values
for ele in [top_elevation, btm_elevation]:
if np.isnan(ele) or ele is None:
return None
end_points = {"top": {"z": top_elevation}, "btm": {"z": btm_elevation}} end_points = {"top": {"z": top_elevation}, "btm": {"z": btm_elevation}}
@ -377,6 +384,11 @@ def slope_from_profile(
True if end_points["top"]["x"] < pts < end_points["btm"]["x"] else False True if end_points["top"]["x"] < pts < end_points["btm"]["x"] else False
for pts in profile_x for pts in profile_x
] ]
# Need at least two points to do linear regression
if sum(profile_mask) < 2:
return None
slope_x = np.array(profile_x)[profile_mask].tolist() slope_x = np.array(profile_x)[profile_mask].tolist()
slope_z = np.array(profile_z)[profile_mask].tolist() slope_z = np.array(profile_z)[profile_mask].tolist()
slope, _, _, _, _ = stats.linregress(slope_x, slope_z) slope, _, _, _, _ = stats.linregress(slope_x, slope_z)

@ -2,7 +2,11 @@ import click
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from scipy.integrate import simps from scipy.integrate import simps
from scipy.signal import savgol_filter
from scipy.interpolate import interp1d
from numpy import ma as ma
from tqdm import tqdm
from logs import setup_logging from logs import setup_logging
from utils import crossings, get_i_or_default from utils import crossings, get_i_or_default
from analysis.forecast_twl import get_mean_slope, get_intertidal_slope from analysis.forecast_twl import get_mean_slope, get_intertidal_slope
@ -22,149 +26,101 @@ def return_first_or_nan(l):
return l[0] return l[0]
def volume_change(df_profiles, df_profile_features, zone): def round_up_to_odd(f):
"""
https://stackoverflow.com/a/31648815
:param f:
:return:
"""
return int(np.ceil(f) // 2 * 2 + 1)
def volume_change(df_profiles, df_profile_features):
""" """
Calculates how much the volume change there is between prestrom and post storm profiles. Calculates how much the volume change there is between prestrom and post storm profiles.
:param df_profiles: :param df_profiles:
:param df_profile_features: :param df_profile_features:
:param zone: Either 'swash' or 'dune_face'
:return: :return:
""" """
logger.info("Calculating change in beach volume in {} zone".format(zone)) logger.info("Calculating change in beach volume")
df_vol_changes = pd.DataFrame( df_vol_changes = pd.DataFrame(
index=df_profile_features.index.get_level_values("site_id").unique() index=df_profile_features.index.get_level_values("site_id").unique()
) )
df_profiles = df_profiles.dropna(subset=["z"])
df_profiles = df_profiles.sort_index() df_profiles = df_profiles.sort_index()
sites = df_profiles.groupby(level=["site_id"]) sites = df_profiles.groupby(level=["site_id"])
for site_id, df_site in sites: for site_id, df_site in tqdm(sites):
logger.debug(
"Calculating change in beach volume at {} in {} zone".format(site_id, zone) params = {}
)
for zone in ["dune", "swash"]:
prestorm_row = df_profile_features.loc[(site_id, "prestorm")] params[zone] = {}
prestorm_dune_toe_x = prestorm_row.dune_toe_x
prestorm_dune_crest_x = prestorm_row.dune_crest_x for profile_type in ["prestorm", "poststorm"]:
params[zone][profile_type] = {}
# If no dune toe has been defined, Dlow = Dhigh. Refer to Sallenger (2000).
if np.isnan(prestorm_dune_toe_x): # Define the edges of the swash and dunes where we want to calculate subaeraial volume.
prestorm_dune_toe_x = prestorm_dune_crest_x if zone == "swash":
params[zone][profile_type]["x_min"] = df_profile_features.loc[
# If no prestorm and poststorm profiles, skip site and continue (site_id, profile_type)
profile_lengths = [ ].dune_toe_x
len(df_site.xs(x, level="profile_type")) for x in ["prestorm", "poststorm"] params[zone][profile_type]["x_max"] = max(
] df_profiles.loc[(site_id, profile_type)].index.get_level_values(
if any([length == 0 for length in profile_lengths]): "x"
continue )
)
# Find last x coordinate where we have both prestorm and poststorm measurements. If we don't do this,
# the prestorm and poststorm values are going to be calculated over different lengths. # For profiles with no Dlow value, we take Dhigh as the minimum value to calculate swash
df_zone = df_site.dropna(subset=["z"]) if np.isnan(params[zone][profile_type]["x_min"]):
x_last_obs = min( params[zone][profile_type]["x_min"] = df_profile_features.loc[
[ (site_id, profile_type)
max( ].dune_crest_x
df_zone.query(
"profile_type == '{}'".format(profile_type) elif zone == "dune":
).index.get_level_values("x") params[zone][profile_type]["x_min"] = df_profile_features.loc[
) (site_id, profile_type)
for profile_type in ["prestorm", "poststorm"] ].dune_crest_x
] params[zone][profile_type]["x_max"] = df_profile_features.loc[
) (site_id, profile_type)
x_first_obs = max( ].dune_toe_x
[
min( # For profiles with no Dlow value, the dune is undefined and we cannot calculate a dune volume.
df_zone.query(
"profile_type == '{}'".format(profile_type) # Calculate subaerial volume based on our x min and maxes
).index.get_level_values("x") params[zone][profile_type]["subaerial_vol"] = beach_volume(
x=df_profiles.loc[(site_id, profile_type)].index.get_level_values(
"x"
),
z=df_profiles.loc[(site_id, profile_type)].z.values,
x_min=params[zone][profile_type]["x_min"],
x_max=params[zone][profile_type]["x_max"],
) )
for profile_type in ["prestorm", "poststorm"]
]
)
# Where we want to measure pre and post storm volume is dependant on the zone selected
if zone == "swash":
x_min = max(prestorm_dune_toe_x, x_first_obs)
x_max = x_last_obs
elif zone == "dune_face":
x_min = max(prestorm_dune_crest_x, x_first_obs)
x_max = min(prestorm_dune_toe_x, x_last_obs)
else:
logger.warning("Zone argument not properly specified. Please check")
x_min = None
x_max = None
# Now, compute the volume of sand between the x-coordinates prestorm_dune_toe_x and x_swash_last for both prestorm
# and post storm profiles.
prestorm_vol = beach_volume(
x=df_zone.query("profile_type=='prestorm'").index.get_level_values("x"),
z=df_zone.query("profile_type=='prestorm'").z,
x_min=x_min,
x_max=x_max,
)
poststorm_vol = beach_volume(
x=df_zone.query("profile_type=='poststorm'").index.get_level_values("x"),
z=df_zone.query("profile_type=='poststorm'").z,
x_min=x_min,
x_max=x_max,
)
# Identify the x location where our pre and post storm profiles start to differ. This is so changes no due to # TODO Landward limit still needs to be incorporated
# the storm are not included when calculating volume.
df_prestorm = (
df_site.xs("prestorm", level="profile_type").z.rename("z_pre").to_frame()
)
df_poststorm = (
df_site.xs("poststorm", level="profile_type").z.rename("z_post").to_frame()
)
df_diff = df_prestorm.merge(df_poststorm, on=["site_id", "x"])
df_diff["z_diff"] = df_diff.z_pre - df_diff.z_post
# Find all locations where the difference in pre and post storm is zero. Take the most seaward location as the
# x location where our profiles are the same.
x_crossings = crossings(df_diff.index.get_level_values("x"), df_diff.z_diff, 0)
if len(x_crossings) != 0:
x_change_point = x_crossings[-1]
else:
x_change_point = np.nan
# # For debugging
# import matplotlib.pyplot as plt
# f,(ax1,ax2) = plt.subplots(2,1,sharex=True)
# ax1.plot(df_prestorm.index.get_level_values('x'), df_prestorm.z_pre,label='prestorm')
# ax1.plot(df_poststorm.index.get_level_values('x'), df_poststorm.z_post,label='poststorm')
# ax1.axvline(x_crossings[-1], color='red', linestyle='--', linewidth=0.5, label='Change point')
# ax1.legend()
# ax1.set_title(site_id)
# ax1.set_ylabel('elevation (m AHD)')
# ax2.plot(df_diff.index.get_level_values('x'), df_diff.z_diff)
# ax2.set_xlabel('x coordinate (m)')
# ax2.set_ylabel('elevation diff (m)')
# ax2.axvline(x_crossings[-1],color='red',linestyle='--',linewidth=0.5)
# plt.show()
diff_vol = beach_volume(
x=df_diff.index.get_level_values("x"),
z=df_diff.z_diff,
x_min=np.nanmax([x_min, x_change_point]),
x_max=np.nanmax([x_max, x_change_point]),
)
# Here, if cannot calculate the difference volume, assume no volume change
if np.isnan(diff_vol):
diff_vol = 0
# Base pct change on diff volume params[zone]["vol_change"] = (
if diff_vol == 0: params[zone]["poststorm"]["subaerial_vol"]
pct_change = 0 - params[zone]["prestorm"]["subaerial_vol"]
else: )
pct_change = diff_vol / prestorm_vol * 100 params[zone]["pct_change"] = (
params[zone]["vol_change"] / params[zone]["prestorm"]["subaerial_vol"]
)
df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = prestorm_vol df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = params[zone][
df_vol_changes.loc[site_id, "poststorm_{}_vol".format(zone)] = poststorm_vol "prestorm"
df_vol_changes.loc[site_id, "{}_vol_change".format(zone)] = diff_vol ]["subaerial_vol"]
df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = pct_change df_vol_changes.loc[site_id, "poststorm_{}_vol".format(zone)] = params[zone][
"poststorm"
]["subaerial_vol"]
df_vol_changes.loc[site_id, "{}_vol_change".format(zone)] = params[zone][
"vol_change"
]
df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = params[zone][
"pct_change"
]
return df_vol_changes return df_vol_changes
@ -197,11 +153,11 @@ def storm_regime(df_observed_impacts):
""" """
logger.info("Getting observed storm regimes") logger.info("Getting observed storm regimes")
swash = (df_observed_impacts.dune_face_pct_change <= 2) & ( swash = (df_observed_impacts.dune_pct_change <= 2) & (
df_observed_impacts.dune_face_vol_change <= 3 df_observed_impacts.dune_vol_change <= 3
) )
collision = (df_observed_impacts.dune_face_pct_change >= 2) | ( collision = (df_observed_impacts.dune_pct_change >= 2) | (
df_observed_impacts.dune_face_vol_change > 3 df_observed_impacts.dune_vol_change > 3
) )
df_observed_impacts.loc[swash, "storm_regime"] = "swash" df_observed_impacts.loc[swash, "storm_regime"] = "swash"
@ -258,13 +214,9 @@ def create_observed_impacts(
# TODO Review volume change with changing dune toe/crests # TODO Review volume change with changing dune toe/crests
logger.info("Getting pre/post storm volumes") logger.info("Getting pre/post storm volumes")
df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone="swash")
df_dune_face_vol_changes = volume_change( df_vol_changes = volume_change(df_profiles, df_profile_features)
df_profiles, df_profile_features, zone="dune_face" df_observed_impacts = df_observed_impacts.join(df_vol_changes)
)
df_observed_impacts = df_observed_impacts.join(
[df_swash_vol_changes, df_dune_face_vol_changes]
)
# Classify regime based on volume changes # Classify regime based on volume changes
df_observed_impacts = storm_regime(df_observed_impacts) df_observed_impacts = storm_regime(df_observed_impacts)
@ -343,8 +295,12 @@ def create_observed_impacts(
ele=0, ele=0,
col_name="width_msl_poststorm", col_name="width_msl_poststorm",
) )
df_width_msl_change_m = (df_width_msl_poststorm - df_width_msl_prestorm).rename('width_msl_change_m') df_width_msl_change_m = (df_width_msl_poststorm - df_width_msl_prestorm).rename(
df_width_msl_change_pct = (df_width_msl_change_m / df_width_msl_prestorm * 100).rename('width_msl_change_pct') "width_msl_change_m"
)
df_width_msl_change_pct = (
df_width_msl_change_m / df_width_msl_prestorm * 100
).rename("width_msl_change_pct")
# Join beach width change onto observed impacts dataframe # Join beach width change onto observed impacts dataframe
df_observed_impacts = pd.concat( df_observed_impacts = pd.concat(

Loading…
Cancel
Save