Compare commits

..

No commits in common. '3a1d87eaebb867a9579ad8ced347c59af3a55691' and '708606391f272c8cf2b2832611e0485c49cb121c' have entirely different histories.

@ -53,6 +53,9 @@ pull-data: ##@data Copies data from data backup directory to ./data/
# Command for activating our virtual environment and calling the CLI entry point # Command for activating our virtual environment and calling the CLI entry point
PYTHON_CLI = activate ./.venv && python ./src/cli.py PYTHON_CLI = activate ./.venv && python ./src/cli.py
impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv ./data/interim/impacts_forecasted_premean_slope_sto06.csv ./data/interim/impacts_observed.csv ##@products makes obsered and forecasted impacts
### Parses raw matfiles ### Parses raw matfiles
./data/raw/processed_shorelines/orientations.mat: ./data/raw/processed_shorelines/profiles.mat ./data/raw/processed_shorelines/orientations.mat: ./data/raw/processed_shorelines/profiles.mat
@ -91,6 +94,7 @@ PYTHON_CLI = activate ./.venv && python ./src/cli.py
### TWLs ### TWLs
twls: ./data/interim/twl_foreshore_slope_sto06.csv
twls: ./data/interim/twl_premean_slope_sto06.csv twls: ./data/interim/twl_premean_slope_sto06.csv
twls: ./data/interim/twl_premean_slope_hol86.csv twls: ./data/interim/twl_premean_slope_hol86.csv
twls: ./data/interim/twl_premean_slope_nie91.csv twls: ./data/interim/twl_premean_slope_nie91.csv
@ -316,6 +320,7 @@ twls: ./data/interim/twl_postintertidal_slope_pow18.csv
### IMPACTS ### IMPACTS
impacts: ./data/interim/impacts_observed.csv impacts: ./data/interim/impacts_observed.csv
impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv
impacts: ./data/interim/impacts_forecasted_premean_slope_sto06.csv impacts: ./data/interim/impacts_forecasted_premean_slope_sto06.csv
impacts: ./data/interim/impacts_forecasted_premean_slope_hol86.csv impacts: ./data/interim/impacts_forecasted_premean_slope_hol86.csv
impacts: ./data/interim/impacts_forecasted_premean_slope_nie91.csv impacts: ./data/interim/impacts_forecasted_premean_slope_nie91.csv

@ -485,6 +485,20 @@
"print('Done!')" "print('Done!')"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -493,6 +507,13 @@
"Use sklearn metrics to generate classification reports for each forecasting model." "Use sklearn metrics to generate classification reports for each forecasting model."
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@ -561,355 +582,6 @@
" df_for.storm_regime.astype(cat_type).cat.codes.values)\n", " df_for.storm_regime.astype(cat_type).cat.codes.values)\n",
" print('{}: {:.2f}'.format(model,m))" " print('{}: {:.2f}'.format(model,m))"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix\n",
"# Check confusion matrix\n",
"for model in impacts['forecasted']:\n",
" df_for = impacts['forecasted'][model]\n",
" df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n",
"\n",
" m = sklearn.metrics.confusion_matrix(\n",
" df_obs.storm_regime.astype(cat_type).cat.codes.values,\n",
" df_for.storm_regime.astype(cat_type).cat.codes.values,\n",
" labels=[0,1,2,3])\n",
" print('{}\\n{}'.format(model,m))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create confusion matrix figure\n",
"From https://github.com/wcipriano/pretty-print-confusion-matrix/blob/master/confusion_matrix_pretty_print.py"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"\"\"\"\n",
"plot a pretty confusion matrix with seaborn\n",
"Created on Mon Jun 25 14:17:37 2018\n",
"@author: Wagner Cipriano - wagnerbhbr - gmail - CEFETMG / MMC\n",
"REFerences:\n",
" https://www.mathworks.com/help/nnet/ref/plotconfusion.html\n",
" https://stackoverflow.com/questions/28200786/how-to-plot-scikit-learn-classification-report\n",
" https://stackoverflow.com/questions/5821125/how-to-plot-confusion-matrix-with-string-axis-rather-than-integer-in-python\n",
" https://www.programcreek.com/python/example/96197/seaborn.heatmap\n",
" https://stackoverflow.com/questions/19233771/sklearn-plot-confusion-matrix-with-labels/31720054\n",
" http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py\n",
"\"\"\"\n",
"\n",
"#imports\n",
"from pandas import DataFrame\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.font_manager as fm\n",
"from matplotlib.collections import QuadMesh\n",
"import seaborn as sn\n",
"\n",
"\n",
"def get_new_fig(fn, figsize=[9,9]):\n",
" \"\"\" Init graphics \"\"\"\n",
" fig1 = plt.figure(fn, figsize)\n",
" ax1 = fig1.gca() #Get Current Axis\n",
" ax1.cla() # clear existing plot\n",
" return fig1, ax1\n",
"#\n",
"\n",
"def configcell_text_and_colors(array_df, lin, col, oText, facecolors, posi, fz, fmt, show_null_values=0):\n",
" \"\"\"\n",
" config cell text and colors\n",
" and return text elements to add and to dell\n",
" @TODO: use fmt\n",
" \"\"\"\n",
" text_add = []; text_del = [];\n",
" cell_val = array_df[lin][col]\n",
" tot_all = array_df[-1][-1]\n",
" per = (float(cell_val) / tot_all) * 100\n",
" curr_column = array_df[:,col]\n",
" ccl = len(curr_column)\n",
"\n",
" #last line and/or last column\n",
" if(col == (ccl - 1)) or (lin == (ccl - 1)):\n",
" #tots and percents\n",
" if(cell_val != 0):\n",
" if(col == ccl - 1) and (lin == ccl - 1):\n",
" tot_rig = 0\n",
" for i in range(array_df.shape[0] - 1):\n",
" tot_rig += array_df[i][i]\n",
" per_ok = (float(tot_rig) / cell_val) * 100\n",
" elif(col == ccl - 1):\n",
" tot_rig = array_df[lin][lin]\n",
" per_ok = (float(tot_rig) / cell_val) * 100\n",
" elif(lin == ccl - 1):\n",
" tot_rig = array_df[col][col]\n",
" per_ok = (float(tot_rig) / cell_val) * 100\n",
" per_err = 100 - per_ok\n",
" else:\n",
" per_ok = per_err = 0\n",
"\n",
" per_ok_s = ['%.1f%%'%(per_ok), '100%'] [per_ok == 100]\n",
"\n",
" #text to DEL\n",
" text_del.append(oText)\n",
"\n",
" #text to ADD\n",
" font_prop = fm.FontProperties(weight='bold', size=fz)\n",
" text_kwargs = dict(color='w', ha=\"center\", va=\"center\", gid='sum', fontproperties=font_prop)\n",
" lis_txt = ['%d'%(cell_val), per_ok_s, '%.1f%%'%(per_err)]\n",
" lis_kwa = [text_kwargs]\n",
" dic = text_kwargs.copy(); dic['color'] = 'g'; lis_kwa.append(dic);\n",
" dic = text_kwargs.copy(); dic['color'] = 'r'; lis_kwa.append(dic);\n",
" lis_pos = [(oText._x, oText._y-0.3), (oText._x, oText._y), (oText._x, oText._y+0.3)]\n",
" for i in range(len(lis_txt)):\n",
" newText = dict(x=lis_pos[i][0], y=lis_pos[i][1], text=lis_txt[i], kw=lis_kwa[i])\n",
" #print 'lin: %s, col: %s, newText: %s' %(lin, col, newText)\n",
" text_add.append(newText)\n",
" #print '\\n'\n",
"\n",
" #set background color for sum cells (last line and last column)\n",
" carr = [0.27, 0.30, 0.27, 1.0]\n",
" if(col == ccl - 1) and (lin == ccl - 1):\n",
" carr = [0.17, 0.20, 0.17, 1.0]\n",
" facecolors[posi] = carr\n",
"\n",
" else:\n",
" if(per > 0):\n",
" txt = '%s\\n%.1f%%' %(cell_val, per)\n",
" else:\n",
" if(show_null_values == 0):\n",
" txt = ''\n",
" elif(show_null_values == 1):\n",
" txt = '0'\n",
" else:\n",
" txt = '0\\n0.0%'\n",
" oText.set_text(txt)\n",
"\n",
" #main diagonal\n",
" if(col == lin):\n",
" #set color of the textin the diagonal to white\n",
" oText.set_color('w')\n",
" # set background color in the diagonal to blue\n",
" facecolors[posi] = [0.35, 0.8, 0.55, 1.0]\n",
" else:\n",
" oText.set_color('r')\n",
"\n",
" return text_add, text_del\n",
"#\n",
"\n",
"def insert_totals(df_cm):\n",
" \"\"\" insert total column and line (the last ones) \"\"\"\n",
" sum_col = []\n",
" for c in df_cm.columns:\n",
" sum_col.append( df_cm[c].sum() )\n",
" sum_lin = []\n",
" for item_line in df_cm.iterrows():\n",
" sum_lin.append( item_line[1].sum() )\n",
" df_cm['sum_lin'] = sum_lin\n",
" sum_col.append(np.sum(sum_lin))\n",
" df_cm.loc['sum_col'] = sum_col\n",
" #print ('\\ndf_cm:\\n', df_cm, '\\n\\b\\n')\n",
"#\n",
"\n",
"def pretty_plot_confusion_matrix(df_cm, annot=True, cmap=\"Oranges\", fmt='.2f', fz=11,\n",
" lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='y'):\n",
" \"\"\"\n",
" print conf matrix with default layout (like matlab)\n",
" params:\n",
" df_cm dataframe (pandas) without totals\n",
" annot print text in each cell\n",
" cmap Oranges,Oranges_r,YlGnBu,Blues,RdBu, ... see:\n",
" fz fontsize\n",
" lw linewidth\n",
" pred_val_axis where to show the prediction values (x or y axis)\n",
" 'col' or 'x': show predicted values in columns (x axis) instead lines\n",
" 'lin' or 'y': show predicted values in lines (y axis)\n",
" \"\"\"\n",
" if(pred_val_axis in ('col', 'x')):\n",
" xlbl = 'Predicted'\n",
" ylbl = 'Actual'\n",
" else:\n",
" xlbl = 'Actual'\n",
" ylbl = 'Predicted'\n",
" df_cm = df_cm.T\n",
"\n",
" # create \"Total\" column\n",
" insert_totals(df_cm)\n",
"\n",
" #this is for print allways in the same window\n",
" fig, ax1 = get_new_fig('Conf matrix default', figsize)\n",
"\n",
" #thanks for seaborn\n",
" ax = sn.heatmap(df_cm, annot=annot, annot_kws={\"size\": fz}, linewidths=lw, ax=ax1,\n",
" cbar=cbar, cmap=cmap, linecolor='w', fmt=fmt)\n",
"\n",
" #set ticklabels rotation\n",
" ax.set_xticklabels(ax.get_xticklabels(), rotation = 45, fontsize = 10)\n",
" ax.set_yticklabels(ax.get_yticklabels(), rotation = 25, fontsize = 10)\n",
"\n",
" # Turn off all the ticks\n",
" for t in ax.xaxis.get_major_ticks():\n",
" t.tick1On = False\n",
" t.tick2On = False\n",
" for t in ax.yaxis.get_major_ticks():\n",
" t.tick1On = False\n",
" t.tick2On = False\n",
"\n",
" #face colors list\n",
" quadmesh = ax.findobj(QuadMesh)[0]\n",
" facecolors = quadmesh.get_facecolors()\n",
"\n",
" #iter in text elements\n",
" array_df = np.array( df_cm.to_records(index=False).tolist() )\n",
" text_add = []; text_del = [];\n",
" posi = -1 #from left to right, bottom to top.\n",
" for t in ax.collections[0].axes.texts: #ax.texts:\n",
" pos = np.array( t.get_position()) - [0.5,0.5]\n",
" lin = int(pos[1]); col = int(pos[0]);\n",
" posi += 1\n",
" #print ('>>> pos: %s, posi: %s, val: %s, txt: %s' %(pos, posi, array_df[lin][col], t.get_text()))\n",
"\n",
" #set text\n",
" txt_res = configcell_text_and_colors(array_df, lin, col, t, facecolors, posi, fz, fmt, show_null_values)\n",
"\n",
" text_add.extend(txt_res[0])\n",
" text_del.extend(txt_res[1])\n",
"\n",
" #remove the old ones\n",
" for item in text_del:\n",
" item.remove()\n",
" #append the new ones\n",
" for item in text_add:\n",
" ax.text(item['x'], item['y'], item['text'], **item['kw'])\n",
"\n",
" #titles and legends\n",
" ax.set_title('Confusion matrix')\n",
" ax.set_xlabel(xlbl)\n",
" ax.set_ylabel(ylbl)\n",
" plt.tight_layout() #set layout slim\n",
" plt.show()\n",
" return fig\n",
"#\n",
"\n",
"def plot_confusion_matrix_from_data(y_test, predictions, columns=None, annot=True, cmap=\"Oranges\",\n",
" fmt='.2f', fz=11, lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='lin'):\n",
" \"\"\"\n",
" plot confusion matrix function with y_test (actual values) and predictions (predic),\n",
" whitout a confusion matrix yet\n",
" \"\"\"\n",
" from sklearn.metrics import confusion_matrix\n",
" from pandas import DataFrame\n",
"\n",
" #data\n",
" if(not columns):\n",
" #labels axis integer:\n",
" ##columns = range(1, len(np.unique(y_test))+1)\n",
" #labels axis string:\n",
" from string import ascii_uppercase\n",
" columns = ['class %s' %(i) for i in list(ascii_uppercase)[0:len(np.unique(y_test))]]\n",
"\n",
" confm = confusion_matrix(y_test, predictions)\n",
" cmap = 'Oranges';\n",
" fz = 11;\n",
" figsize=[9,9];\n",
" show_null_values = 2\n",
" df_cm = DataFrame(confm, index=columns, columns=columns)\n",
" pretty_plot_confusion_matrix(df_cm, fz=fz, cmap=cmap, figsize=figsize, show_null_values=show_null_values, pred_val_axis=pred_val_axis)\n",
"#\n",
"\n",
"\n",
"\n",
"#\n",
"#TEST functions\n",
"#\n",
"def _test_cm():\n",
" #test function with confusion matrix done\n",
" array = np.array( [[13, 0, 1, 0, 2, 0],\n",
" [ 0, 50, 2, 0, 10, 0],\n",
" [ 0, 13, 16, 0, 0, 3],\n",
" [ 0, 0, 0, 13, 1, 0],\n",
" [ 0, 40, 0, 1, 15, 0],\n",
" [ 0, 0, 0, 0, 0, 20]])\n",
" #get pandas dataframe\n",
" df_cm = DataFrame(array, index=range(1,7), columns=range(1,7))\n",
" #colormap: see this and choose your more dear\n",
" cmap = 'PuRd'\n",
" pretty_plot_confusion_matrix(df_cm, cmap=cmap)\n",
"#\n",
"\n",
"def _test_data_class():\n",
" \"\"\" test function with y_test (actual values) and predictions (predic) \"\"\"\n",
" #data\n",
" y_test = np.array([1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5])\n",
" predic = np.array([1,2,4,3,5, 1,2,4,3,5, 1,2,3,4,4, 1,4,3,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,3,3,5, 1,2,3,3,5, 1,2,3,4,4, 1,2,3,4,1, 1,2,3,4,1, 1,2,3,4,1, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5])\n",
" \"\"\"\n",
" Examples to validate output (confusion matrix plot)\n",
" actual: 5 and prediction 1 >> 3\n",
" actual: 2 and prediction 4 >> 1\n",
" actual: 3 and prediction 4 >> 10\n",
" \"\"\"\n",
" columns = []\n",
" annot = True;\n",
" cmap = 'Oranges';\n",
" fmt = '.2f'\n",
" lw = 0.5\n",
" cbar = False\n",
" show_null_values = 2\n",
" pred_val_axis = 'y'\n",
" #size::\n",
" fz = 12;\n",
" figsize = [9,9];\n",
" if(len(y_test) > 10):\n",
" fz=9; figsize=[14,14];\n",
" plot_confusion_matrix_from_data(y_test, predic, columns,\n",
" annot, cmap, fmt, fz, lw, cbar, figsize, show_null_values, pred_val_axis)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# plot_confusion_matrix_from_data(y_test, predictions, columns=None, annot=True, cmap=\"Oranges\",\n",
"# fmt='.2f', fz=11, lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='lin'):\n",
"\n",
"matplotlib.rcParams['text.usetex'] = False\n",
"\n",
"forecast_model = 'postintertidal_slope_sto06'\n",
"\n",
"df_for = impacts['forecasted'][forecast_model]\n",
"df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n",
"observed_regimes = df_obs.storm_regime.astype(cat_type).cat.codes.values\n",
"forecasted_regimes = df_for.storm_regime.astype(cat_type).cat.codes.values\n",
"\n",
"\n",
"confm = confusion_matrix(observed_regimes, forecasted_regimes,labels=[0,1,2,3])\n",
"labels=['swash','collision','overwash','inundation']\n",
"df_cm = DataFrame(confm, index=labels, columns=labels)\n",
"\n",
"fig = pretty_plot_confusion_matrix(df_cm, annot=True, cmap=\"Oranges\", fmt='.1f', fz=13,\n",
" lw=0.1, cbar=False, figsize=[8,5], show_null_values=1, pred_val_axis='y')\n",
"\n",
"fig.savefig('11_confusion_matrix',dpi=600)"
]
} }
], ],
"metadata": { "metadata": {
@ -940,14 +612,9 @@
"title_cell": "Table of Contents", "title_cell": "Table of Contents",
"title_sidebar": "Contents", "title_sidebar": "Contents",
"toc_cell": false, "toc_cell": false,
"toc_position": { "toc_position": {},
"height": "calc(100% - 180px)",
"left": "10px",
"top": "150px",
"width": "286.391px"
},
"toc_section_display": true, "toc_section_display": true,
"toc_window_display": true "toc_window_display": false
}, },
"varInspector": { "varInspector": {
"cols": { "cols": {

@ -365,244 +365,12 @@
"f.savefig('11_change_in_slopes_accuracy_classes.png',dpi=600)" "f.savefig('11_change_in_slopes_accuracy_classes.png',dpi=600)"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Swash zone volume change histogram"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How much does the beach width change variation can we expect in the swash regime?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"f, ax = plt.subplots(figsize=(5,4))\n",
"\n",
"sns.distplot(df.loc[df.storm_regime=='swash'].width_msl_change_pct.dropna(), \n",
" kde=False);\n",
"\n",
"ax.set_title('Distribution of beach width change for swash regime')\n",
"ax.set_xlabel('$\\Delta$ beach width (%)')\n",
"ax.set_ylabel('Count')\n",
"\n",
"f.savefig('11_change_in_beach_width.png',dpi=600)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Split swash regime"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df['beta_Pxscum'] = df.beta_prestorm_mean * df.Ecum\n",
"df['neg_swash_vol_change'] = -df.swash_vol_change\n",
"\n",
"ax = sns.scatterplot(data=df.loc[df.storm_regime=='swash'],\n",
" x=\"beta_Pxscum\", \n",
" y='neg_swash_vol_change')"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": []
"corr = df.corr(method ='pearson') \n",
"sns.heatmap(corr, \n",
" xticklabels=corr.columns,\n",
" yticklabels=corr.columns,\n",
" cmap='RdBu_r')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(corr.columns)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import scipy.stats as ss\n",
"#https://stackoverflow.com/a/24469099\n",
"\n",
"corr = df.corr(method ='pearson') \n",
"\n",
"n=len(corr.columns)\n",
"t=corr*np.sqrt((n-2)/(1-corr*corr))\n",
"\n",
"\n",
"pvals = ss.t.cdf(t, n-2)\n",
"\n",
"corr.values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": []
},
"outputs": [],
"source": [
"from matplotlib.patches import Ellipse\n",
"def corrplot(data, pvalues, labels):\n",
" \"\"\"Creates a correlation plot of the passed data.\n",
" The function returns the plot which can then be shown with\n",
" plot.show(), saved to a file with plot.savefig(), or manipulated\n",
" in any other standard matplotlib way.\n",
" data is the correlation matrix, a 2-D numpy array containing\n",
" the pairwise correlations between variables;\n",
" pvalues is a matrix containing the pvalue for each corresponding\n",
" correlation value; if none it is assumed to be the zero matrix\n",
" labels is an array containing the variable names\n",
" https://github.com/louridas/corrplot/blob/master/corrplot.py\n",
" \"\"\"\n",
"\n",
" plt.figure(1)\n",
"\n",
" column_labels = labels\n",
" row_labels = labels\n",
" \n",
" f = plt.figure(figsize=(8,8))\n",
" ax = plt.subplot(1, 1, 1, aspect='equal')\n",
"\n",
" width, height = data.shape\n",
" num_cols, num_rows = width, height\n",
"\n",
" if pvalues is None:\n",
" pvalues = np.zeros([num_rows, num_cols])\n",
" \n",
" shrink = 0.9\n",
"\n",
" poscm = cm.get_cmap('Blues')\n",
" negcm = cm.get_cmap('Oranges')\n",
"\n",
" for x in range(width):\n",
" for y in range(height):\n",
" d = data[x, y]\n",
" c = pvalues[x, y]\n",
" rotate = -45 if d > 0 else +45\n",
" clrmap = poscm if d >= 0 else negcm\n",
" d_abs = np.abs(d)\n",
" ellipse = Ellipse((x, y),\n",
" width=1 * shrink,\n",
" height=(shrink - d_abs*shrink),\n",
" angle=rotate)\n",
" ellipse.set_edgecolor('black')\n",
" ellipse.set_facecolor(clrmap(d_abs))\n",
" if c > 0.05:\n",
" ellipse.set_linestyle('dotted')\n",
" ellipse.set_alpha(0.5)\n",
" ax.add_artist(ellipse)\n",
"\n",
" ax.set_xlim(-1, num_cols)\n",
" ax.set_ylim(-1, num_rows)\n",
" \n",
" ax.xaxis.tick_top()\n",
" xtickslocs = np.arange(len(row_labels))\n",
" ax.set_xticks(xtickslocs)\n",
" ax.set_xticklabels(row_labels, rotation=30, fontsize='small', ha='left')\n",
"\n",
" ax.invert_yaxis()\n",
" ytickslocs = np.arange(len(row_labels))\n",
" ax.set_yticks(ytickslocs)\n",
" ax.set_yticklabels(column_labels, fontsize='small')\n",
"\n",
" return plt\n",
"\n",
"import string\n",
"num_rows = 20\n",
"num_cols = num_rows\n",
"\n",
"min_length = 10\n",
"max_length = 20\n",
"\n",
"alnums = list(string.ascii_uppercase + string.digits)\n",
"labels = [''.join(np.random.choice(alnums,\n",
" np.random.randint(min_length,\n",
" max_length)))\n",
" for y in np.arange(num_rows)]\n",
"\n",
"\n",
"data = np.random.random([num_rows, num_cols])\n",
"\n",
"# data[np.random.choice(num_rows, num_rows / 2), :] *= -1\n",
"\n",
"np.fill_diagonal(data, 1)\n",
"\n",
"data_symm = (data + data.T) / 2\n",
"\n",
"# plot = corrplot(data_symm, None, labels)\n",
"plot = corrplot(corr.values, pvals, corr.columns.tolist())\n",
"plot.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from scipy import stats\n",
"\n",
"x_col = 'beta_prestorm_intertidal'\n",
"y_col = \"beta_diff_intertidal\"\n",
"# y_col = 'swash_vol_change'\n",
"# x_col = \"Pxscum\"\n",
"data = df.loc[df.storm_regime=='swash']\n",
"\n",
"slope, intercept, r_value, p_value, std_err = stats.linregress(data[x_col],data[y_col])\n",
"\n",
"ax = sns.regplot(data=data,\n",
" y=y_col, \n",
" x=x_col, marker=\"+\",fit_reg=True, scatter_kws={'linewidth':1},\n",
" line_kws={'label':\"y={0:.2f}x+{1:.2f}\\n(r2={2:.2f})\".format(slope,intercept,r_value**2)})\n",
"ax.legend()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# df.loc[(df.storm_regime=='swash')&(df.swash_vol_change<10)].sort_values(by=['Pxscum'],ascending=False)\n",
"df.loc[(df.swash_vol_change>200)].sort_values(by=['Pxscum'],ascending=False)"
]
} }
], ],
"metadata": { "metadata": {

@ -316,14 +316,7 @@ def slope_from_profile(
""" """
# Need all data to get the slope # Need all data to get the slope
# Check validity of profile arrays and return None if data is not good if any([x is None for x in [profile_x, profile_z, top_elevation, btm_elevation]]):
for profile in [profile_x, profile_z]:
if all(np.isnan(profile)) or all(x is None for x in profile):
return None
# Check validity of elevation values
for ele in [top_elevation, btm_elevation]:
if np.isnan(ele) or ele is None:
return None return None
end_points = {"top": {"z": top_elevation}, "btm": {"z": btm_elevation}} end_points = {"top": {"z": top_elevation}, "btm": {"z": btm_elevation}}
@ -384,11 +377,6 @@ def slope_from_profile(
True if end_points["top"]["x"] < pts < end_points["btm"]["x"] else False True if end_points["top"]["x"] < pts < end_points["btm"]["x"] else False
for pts in profile_x for pts in profile_x
] ]
# Need at least two points to do linear regression
if sum(profile_mask) < 2:
return None
slope_x = np.array(profile_x)[profile_mask].tolist() slope_x = np.array(profile_x)[profile_mask].tolist()
slope_z = np.array(profile_z)[profile_mask].tolist() slope_z = np.array(profile_z)[profile_mask].tolist()
slope, _, _, _, _ = stats.linregress(slope_x, slope_z) slope, _, _, _, _ = stats.linregress(slope_x, slope_z)

@ -2,11 +2,7 @@ import click
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from scipy.integrate import simps from scipy.integrate import simps
from scipy.signal import savgol_filter
from scipy.interpolate import interp1d
from numpy import ma as ma
from tqdm import tqdm
from logs import setup_logging from logs import setup_logging
from utils import crossings, get_i_or_default from utils import crossings, get_i_or_default
from analysis.forecast_twl import get_mean_slope, get_intertidal_slope from analysis.forecast_twl import get_mean_slope, get_intertidal_slope
@ -26,101 +22,149 @@ def return_first_or_nan(l):
return l[0] return l[0]
def round_up_to_odd(f): def volume_change(df_profiles, df_profile_features, zone):
"""
https://stackoverflow.com/a/31648815
:param f:
:return:
"""
return int(np.ceil(f) // 2 * 2 + 1)
def volume_change(df_profiles, df_profile_features):
""" """
Calculates how much the volume change there is between prestrom and post storm profiles. Calculates how much the volume change there is between prestrom and post storm profiles.
:param df_profiles: :param df_profiles:
:param df_profile_features: :param df_profile_features:
:param zone: Either 'swash' or 'dune_face'
:return: :return:
""" """
logger.info("Calculating change in beach volume") logger.info("Calculating change in beach volume in {} zone".format(zone))
df_vol_changes = pd.DataFrame( df_vol_changes = pd.DataFrame(
index=df_profile_features.index.get_level_values("site_id").unique() index=df_profile_features.index.get_level_values("site_id").unique()
) )
df_profiles = df_profiles.dropna(subset=["z"])
df_profiles = df_profiles.sort_index() df_profiles = df_profiles.sort_index()
sites = df_profiles.groupby(level=["site_id"]) sites = df_profiles.groupby(level=["site_id"])
for site_id, df_site in tqdm(sites): for site_id, df_site in sites:
logger.debug(
"Calculating change in beach volume at {} in {} zone".format(site_id, zone)
)
params = {} prestorm_row = df_profile_features.loc[(site_id, "prestorm")]
prestorm_dune_toe_x = prestorm_row.dune_toe_x
prestorm_dune_crest_x = prestorm_row.dune_crest_x
for zone in ["dune", "swash"]: # If no dune toe has been defined, Dlow = Dhigh. Refer to Sallenger (2000).
params[zone] = {} if np.isnan(prestorm_dune_toe_x):
prestorm_dune_toe_x = prestorm_dune_crest_x
for profile_type in ["prestorm", "poststorm"]: # If no prestorm and poststorm profiles, skip site and continue
params[zone][profile_type] = {} profile_lengths = [
len(df_site.xs(x, level="profile_type")) for x in ["prestorm", "poststorm"]
]
if any([length == 0 for length in profile_lengths]):
continue
# Define the edges of the swash and dunes where we want to calculate subaeraial volume. # Find last x coordinate where we have both prestorm and poststorm measurements. If we don't do this,
# the prestorm and poststorm values are going to be calculated over different lengths.
df_zone = df_site.dropna(subset=["z"])
x_last_obs = min(
[
max(
df_zone.query(
"profile_type == '{}'".format(profile_type)
).index.get_level_values("x")
)
for profile_type in ["prestorm", "poststorm"]
]
)
x_first_obs = max(
[
min(
df_zone.query(
"profile_type == '{}'".format(profile_type)
).index.get_level_values("x")
)
for profile_type in ["prestorm", "poststorm"]
]
)
# Where we want to measure pre and post storm volume is dependant on the zone selected
if zone == "swash": if zone == "swash":
params[zone][profile_type]["x_min"] = df_profile_features.loc[ x_min = max(prestorm_dune_toe_x, x_first_obs)
(site_id, profile_type) x_max = x_last_obs
].dune_toe_x elif zone == "dune_face":
params[zone][profile_type]["x_max"] = max( x_min = max(prestorm_dune_crest_x, x_first_obs)
df_profiles.loc[(site_id, profile_type)].index.get_level_values( x_max = min(prestorm_dune_toe_x, x_last_obs)
"x" else:
logger.warning("Zone argument not properly specified. Please check")
x_min = None
x_max = None
# Now, compute the volume of sand between the x-coordinates prestorm_dune_toe_x and x_swash_last for both prestorm
# and post storm profiles.
prestorm_vol = beach_volume(
x=df_zone.query("profile_type=='prestorm'").index.get_level_values("x"),
z=df_zone.query("profile_type=='prestorm'").z,
x_min=x_min,
x_max=x_max,
) )
poststorm_vol = beach_volume(
x=df_zone.query("profile_type=='poststorm'").index.get_level_values("x"),
z=df_zone.query("profile_type=='poststorm'").z,
x_min=x_min,
x_max=x_max,
) )
# For profiles with no Dlow value, we take Dhigh as the minimum value to calculate swash # Identify the x location where our pre and post storm profiles start to differ. This is so changes no due to
if np.isnan(params[zone][profile_type]["x_min"]): # the storm are not included when calculating volume.
params[zone][profile_type]["x_min"] = df_profile_features.loc[ df_prestorm = (
(site_id, profile_type) df_site.xs("prestorm", level="profile_type").z.rename("z_pre").to_frame()
].dune_crest_x )
df_poststorm = (
elif zone == "dune": df_site.xs("poststorm", level="profile_type").z.rename("z_post").to_frame()
params[zone][profile_type]["x_min"] = df_profile_features.loc[
(site_id, profile_type)
].dune_crest_x
params[zone][profile_type]["x_max"] = df_profile_features.loc[
(site_id, profile_type)
].dune_toe_x
# For profiles with no Dlow value, the dune is undefined and we cannot calculate a dune volume.
# Calculate subaerial volume based on our x min and maxes
params[zone][profile_type]["subaerial_vol"] = beach_volume(
x=df_profiles.loc[(site_id, profile_type)].index.get_level_values(
"x"
),
z=df_profiles.loc[(site_id, profile_type)].z.values,
x_min=params[zone][profile_type]["x_min"],
x_max=params[zone][profile_type]["x_max"],
) )
df_diff = df_prestorm.merge(df_poststorm, on=["site_id", "x"])
df_diff["z_diff"] = df_diff.z_pre - df_diff.z_post
# TODO Landward limit still needs to be incorporated # Find all locations where the difference in pre and post storm is zero. Take the most seaward location as the
# x location where our profiles are the same.
x_crossings = crossings(df_diff.index.get_level_values("x"), df_diff.z_diff, 0)
params[zone]["vol_change"] = ( if len(x_crossings) != 0:
params[zone]["poststorm"]["subaerial_vol"] x_change_point = x_crossings[-1]
- params[zone]["prestorm"]["subaerial_vol"] else:
) x_change_point = np.nan
params[zone]["pct_change"] = (
params[zone]["vol_change"] / params[zone]["prestorm"]["subaerial_vol"] # # For debugging
# import matplotlib.pyplot as plt
# f,(ax1,ax2) = plt.subplots(2,1,sharex=True)
# ax1.plot(df_prestorm.index.get_level_values('x'), df_prestorm.z_pre,label='prestorm')
# ax1.plot(df_poststorm.index.get_level_values('x'), df_poststorm.z_post,label='poststorm')
# ax1.axvline(x_crossings[-1], color='red', linestyle='--', linewidth=0.5, label='Change point')
# ax1.legend()
# ax1.set_title(site_id)
# ax1.set_ylabel('elevation (m AHD)')
# ax2.plot(df_diff.index.get_level_values('x'), df_diff.z_diff)
# ax2.set_xlabel('x coordinate (m)')
# ax2.set_ylabel('elevation diff (m)')
# ax2.axvline(x_crossings[-1],color='red',linestyle='--',linewidth=0.5)
# plt.show()
diff_vol = beach_volume(
x=df_diff.index.get_level_values("x"),
z=df_diff.z_diff,
x_min=np.nanmax([x_min, x_change_point]),
x_max=np.nanmax([x_max, x_change_point]),
) )
df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = params[zone][ # Here, if cannot calculate the difference volume, assume no volume change
"prestorm" if np.isnan(diff_vol):
]["subaerial_vol"] diff_vol = 0
df_vol_changes.loc[site_id, "poststorm_{}_vol".format(zone)] = params[zone][
"poststorm" # Base pct change on diff volume
]["subaerial_vol"] if diff_vol == 0:
df_vol_changes.loc[site_id, "{}_vol_change".format(zone)] = params[zone][ pct_change = 0
"vol_change" else:
] pct_change = diff_vol / prestorm_vol * 100
df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = params[zone][
"pct_change" df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = prestorm_vol
] df_vol_changes.loc[site_id, "poststorm_{}_vol".format(zone)] = poststorm_vol
df_vol_changes.loc[site_id, "{}_vol_change".format(zone)] = diff_vol
df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = pct_change
return df_vol_changes return df_vol_changes
@ -153,11 +197,11 @@ def storm_regime(df_observed_impacts):
""" """
logger.info("Getting observed storm regimes") logger.info("Getting observed storm regimes")
swash = (df_observed_impacts.dune_pct_change <= 2) & ( swash = (df_observed_impacts.dune_face_pct_change <= 2) & (
df_observed_impacts.dune_vol_change <= 3 df_observed_impacts.dune_face_vol_change <= 3
) )
collision = (df_observed_impacts.dune_pct_change >= 2) | ( collision = (df_observed_impacts.dune_face_pct_change >= 2) | (
df_observed_impacts.dune_vol_change > 3 df_observed_impacts.dune_face_vol_change > 3
) )
df_observed_impacts.loc[swash, "storm_regime"] = "swash" df_observed_impacts.loc[swash, "storm_regime"] = "swash"
@ -214,9 +258,13 @@ def create_observed_impacts(
# TODO Review volume change with changing dune toe/crests # TODO Review volume change with changing dune toe/crests
logger.info("Getting pre/post storm volumes") logger.info("Getting pre/post storm volumes")
df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone="swash")
df_vol_changes = volume_change(df_profiles, df_profile_features) df_dune_face_vol_changes = volume_change(
df_observed_impacts = df_observed_impacts.join(df_vol_changes) df_profiles, df_profile_features, zone="dune_face"
)
df_observed_impacts = df_observed_impacts.join(
[df_swash_vol_changes, df_dune_face_vol_changes]
)
# Classify regime based on volume changes # Classify regime based on volume changes
df_observed_impacts = storm_regime(df_observed_impacts) df_observed_impacts = storm_regime(df_observed_impacts)
@ -295,12 +343,8 @@ def create_observed_impacts(
ele=0, ele=0,
col_name="width_msl_poststorm", col_name="width_msl_poststorm",
) )
df_width_msl_change_m = (df_width_msl_poststorm - df_width_msl_prestorm).rename( df_width_msl_change_m = (df_width_msl_poststorm - df_width_msl_prestorm).rename('width_msl_change_m')
"width_msl_change_m" df_width_msl_change_pct = (df_width_msl_change_m / df_width_msl_prestorm * 100).rename('width_msl_change_pct')
)
df_width_msl_change_pct = (
df_width_msl_change_m / df_width_msl_prestorm * 100
).rename("width_msl_change_pct")
# Join beach width change onto observed impacts dataframe # Join beach width change onto observed impacts dataframe
df_observed_impacts = pd.concat( df_observed_impacts = pd.concat(

Loading…
Cancel
Save