From 3a1d87eaebb867a9579ad8ced347c59af3a55691 Mon Sep 17 00:00:00 2001 From: Chris Leaman Date: Tue, 26 Mar 2019 15:47:27 +1100 Subject: [PATCH] Update notebooks --- notebooks/07_evaluate_model_performance.ipynb | 379 ++++++++++++++++-- notebooks/11_investigate.ipynb | 234 ++++++++++- 2 files changed, 589 insertions(+), 24 deletions(-) diff --git a/notebooks/07_evaluate_model_performance.ipynb b/notebooks/07_evaluate_model_performance.ipynb index 649abcb..84f7c58 100644 --- a/notebooks/07_evaluate_model_performance.ipynb +++ b/notebooks/07_evaluate_model_performance.ipynb @@ -485,20 +485,6 @@ "print('Done!')" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -507,13 +493,6 @@ "Use sklearn metrics to generate classification reports for each forecasting model." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, @@ -582,6 +561,355 @@ " df_for.storm_regime.astype(cat_type).cat.codes.values)\n", " print('{}: {:.2f}'.format(model,m))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "# Check confusion matrix\n", + "for model in impacts['forecasted']:\n", + " df_for = impacts['forecasted'][model]\n", + " df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n", + "\n", + " m = sklearn.metrics.confusion_matrix(\n", + " df_obs.storm_regime.astype(cat_type).cat.codes.values,\n", + " df_for.storm_regime.astype(cat_type).cat.codes.values,\n", + " labels=[0,1,2,3])\n", + " print('{}\\n{}'.format(model,m))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create confusion matrix figure\n", + "From https://github.com/wcipriano/pretty-print-confusion-matrix/blob/master/confusion_matrix_pretty_print.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# -*- coding: utf-8 -*-\n", + "\"\"\"\n", + "plot a pretty confusion matrix with seaborn\n", + "Created on Mon Jun 25 14:17:37 2018\n", + "@author: Wagner Cipriano - wagnerbhbr - gmail - CEFETMG / MMC\n", + "REFerences:\n", + " https://www.mathworks.com/help/nnet/ref/plotconfusion.html\n", + " https://stackoverflow.com/questions/28200786/how-to-plot-scikit-learn-classification-report\n", + " https://stackoverflow.com/questions/5821125/how-to-plot-confusion-matrix-with-string-axis-rather-than-integer-in-python\n", + " https://www.programcreek.com/python/example/96197/seaborn.heatmap\n", + " https://stackoverflow.com/questions/19233771/sklearn-plot-confusion-matrix-with-labels/31720054\n", + " http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py\n", + "\"\"\"\n", + "\n", + "#imports\n", + "from pandas import DataFrame\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.font_manager as fm\n", + "from matplotlib.collections import QuadMesh\n", + "import seaborn as sn\n", + "\n", + "\n", + "def get_new_fig(fn, figsize=[9,9]):\n", + " \"\"\" Init graphics \"\"\"\n", + " fig1 = plt.figure(fn, figsize)\n", + " ax1 = fig1.gca() #Get Current Axis\n", + " ax1.cla() # clear existing plot\n", + " return fig1, ax1\n", + "#\n", + "\n", + "def configcell_text_and_colors(array_df, lin, col, oText, facecolors, posi, fz, fmt, show_null_values=0):\n", + " \"\"\"\n", + " config cell text and colors\n", + " and return text elements to add and to dell\n", + " @TODO: use fmt\n", + " \"\"\"\n", + " text_add = []; text_del = [];\n", + " cell_val = array_df[lin][col]\n", + " tot_all = array_df[-1][-1]\n", + " per = (float(cell_val) / tot_all) * 100\n", + " curr_column = array_df[:,col]\n", + " ccl = len(curr_column)\n", + "\n", + " #last line and/or last column\n", + " if(col == (ccl - 1)) or (lin == (ccl - 1)):\n", + " #tots and percents\n", + " if(cell_val != 0):\n", + " if(col == ccl - 1) and (lin == ccl - 1):\n", + " tot_rig = 0\n", + " for i in range(array_df.shape[0] - 1):\n", + " tot_rig += array_df[i][i]\n", + " per_ok = (float(tot_rig) / cell_val) * 100\n", + " elif(col == ccl - 1):\n", + " tot_rig = array_df[lin][lin]\n", + " per_ok = (float(tot_rig) / cell_val) * 100\n", + " elif(lin == ccl - 1):\n", + " tot_rig = array_df[col][col]\n", + " per_ok = (float(tot_rig) / cell_val) * 100\n", + " per_err = 100 - per_ok\n", + " else:\n", + " per_ok = per_err = 0\n", + "\n", + " per_ok_s = ['%.1f%%'%(per_ok), '100%'] [per_ok == 100]\n", + "\n", + " #text to DEL\n", + " text_del.append(oText)\n", + "\n", + " #text to ADD\n", + " font_prop = fm.FontProperties(weight='bold', size=fz)\n", + " text_kwargs = dict(color='w', ha=\"center\", va=\"center\", gid='sum', fontproperties=font_prop)\n", + " lis_txt = ['%d'%(cell_val), per_ok_s, '%.1f%%'%(per_err)]\n", + " lis_kwa = [text_kwargs]\n", + " dic = text_kwargs.copy(); dic['color'] = 'g'; lis_kwa.append(dic);\n", + " dic = text_kwargs.copy(); dic['color'] = 'r'; lis_kwa.append(dic);\n", + " lis_pos = [(oText._x, oText._y-0.3), (oText._x, oText._y), (oText._x, oText._y+0.3)]\n", + " for i in range(len(lis_txt)):\n", + " newText = dict(x=lis_pos[i][0], y=lis_pos[i][1], text=lis_txt[i], kw=lis_kwa[i])\n", + " #print 'lin: %s, col: %s, newText: %s' %(lin, col, newText)\n", + " text_add.append(newText)\n", + " #print '\\n'\n", + "\n", + " #set background color for sum cells (last line and last column)\n", + " carr = [0.27, 0.30, 0.27, 1.0]\n", + " if(col == ccl - 1) and (lin == ccl - 1):\n", + " carr = [0.17, 0.20, 0.17, 1.0]\n", + " facecolors[posi] = carr\n", + "\n", + " else:\n", + " if(per > 0):\n", + " txt = '%s\\n%.1f%%' %(cell_val, per)\n", + " else:\n", + " if(show_null_values == 0):\n", + " txt = ''\n", + " elif(show_null_values == 1):\n", + " txt = '0'\n", + " else:\n", + " txt = '0\\n0.0%'\n", + " oText.set_text(txt)\n", + "\n", + " #main diagonal\n", + " if(col == lin):\n", + " #set color of the textin the diagonal to white\n", + " oText.set_color('w')\n", + " # set background color in the diagonal to blue\n", + " facecolors[posi] = [0.35, 0.8, 0.55, 1.0]\n", + " else:\n", + " oText.set_color('r')\n", + "\n", + " return text_add, text_del\n", + "#\n", + "\n", + "def insert_totals(df_cm):\n", + " \"\"\" insert total column and line (the last ones) \"\"\"\n", + " sum_col = []\n", + " for c in df_cm.columns:\n", + " sum_col.append( df_cm[c].sum() )\n", + " sum_lin = []\n", + " for item_line in df_cm.iterrows():\n", + " sum_lin.append( item_line[1].sum() )\n", + " df_cm['sum_lin'] = sum_lin\n", + " sum_col.append(np.sum(sum_lin))\n", + " df_cm.loc['sum_col'] = sum_col\n", + " #print ('\\ndf_cm:\\n', df_cm, '\\n\\b\\n')\n", + "#\n", + "\n", + "def pretty_plot_confusion_matrix(df_cm, annot=True, cmap=\"Oranges\", fmt='.2f', fz=11,\n", + " lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='y'):\n", + " \"\"\"\n", + " print conf matrix with default layout (like matlab)\n", + " params:\n", + " df_cm dataframe (pandas) without totals\n", + " annot print text in each cell\n", + " cmap Oranges,Oranges_r,YlGnBu,Blues,RdBu, ... see:\n", + " fz fontsize\n", + " lw linewidth\n", + " pred_val_axis where to show the prediction values (x or y axis)\n", + " 'col' or 'x': show predicted values in columns (x axis) instead lines\n", + " 'lin' or 'y': show predicted values in lines (y axis)\n", + " \"\"\"\n", + " if(pred_val_axis in ('col', 'x')):\n", + " xlbl = 'Predicted'\n", + " ylbl = 'Actual'\n", + " else:\n", + " xlbl = 'Actual'\n", + " ylbl = 'Predicted'\n", + " df_cm = df_cm.T\n", + "\n", + " # create \"Total\" column\n", + " insert_totals(df_cm)\n", + "\n", + " #this is for print allways in the same window\n", + " fig, ax1 = get_new_fig('Conf matrix default', figsize)\n", + "\n", + " #thanks for seaborn\n", + " ax = sn.heatmap(df_cm, annot=annot, annot_kws={\"size\": fz}, linewidths=lw, ax=ax1,\n", + " cbar=cbar, cmap=cmap, linecolor='w', fmt=fmt)\n", + "\n", + " #set ticklabels rotation\n", + " ax.set_xticklabels(ax.get_xticklabels(), rotation = 45, fontsize = 10)\n", + " ax.set_yticklabels(ax.get_yticklabels(), rotation = 25, fontsize = 10)\n", + "\n", + " # Turn off all the ticks\n", + " for t in ax.xaxis.get_major_ticks():\n", + " t.tick1On = False\n", + " t.tick2On = False\n", + " for t in ax.yaxis.get_major_ticks():\n", + " t.tick1On = False\n", + " t.tick2On = False\n", + "\n", + " #face colors list\n", + " quadmesh = ax.findobj(QuadMesh)[0]\n", + " facecolors = quadmesh.get_facecolors()\n", + "\n", + " #iter in text elements\n", + " array_df = np.array( df_cm.to_records(index=False).tolist() )\n", + " text_add = []; text_del = [];\n", + " posi = -1 #from left to right, bottom to top.\n", + " for t in ax.collections[0].axes.texts: #ax.texts:\n", + " pos = np.array( t.get_position()) - [0.5,0.5]\n", + " lin = int(pos[1]); col = int(pos[0]);\n", + " posi += 1\n", + " #print ('>>> pos: %s, posi: %s, val: %s, txt: %s' %(pos, posi, array_df[lin][col], t.get_text()))\n", + "\n", + " #set text\n", + " txt_res = configcell_text_and_colors(array_df, lin, col, t, facecolors, posi, fz, fmt, show_null_values)\n", + "\n", + " text_add.extend(txt_res[0])\n", + " text_del.extend(txt_res[1])\n", + "\n", + " #remove the old ones\n", + " for item in text_del:\n", + " item.remove()\n", + " #append the new ones\n", + " for item in text_add:\n", + " ax.text(item['x'], item['y'], item['text'], **item['kw'])\n", + "\n", + " #titles and legends\n", + " ax.set_title('Confusion matrix')\n", + " ax.set_xlabel(xlbl)\n", + " ax.set_ylabel(ylbl)\n", + " plt.tight_layout() #set layout slim\n", + " plt.show()\n", + " return fig\n", + "#\n", + "\n", + "def plot_confusion_matrix_from_data(y_test, predictions, columns=None, annot=True, cmap=\"Oranges\",\n", + " fmt='.2f', fz=11, lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='lin'):\n", + " \"\"\"\n", + " plot confusion matrix function with y_test (actual values) and predictions (predic),\n", + " whitout a confusion matrix yet\n", + " \"\"\"\n", + " from sklearn.metrics import confusion_matrix\n", + " from pandas import DataFrame\n", + "\n", + " #data\n", + " if(not columns):\n", + " #labels axis integer:\n", + " ##columns = range(1, len(np.unique(y_test))+1)\n", + " #labels axis string:\n", + " from string import ascii_uppercase\n", + " columns = ['class %s' %(i) for i in list(ascii_uppercase)[0:len(np.unique(y_test))]]\n", + "\n", + " confm = confusion_matrix(y_test, predictions)\n", + " cmap = 'Oranges';\n", + " fz = 11;\n", + " figsize=[9,9];\n", + " show_null_values = 2\n", + " df_cm = DataFrame(confm, index=columns, columns=columns)\n", + " pretty_plot_confusion_matrix(df_cm, fz=fz, cmap=cmap, figsize=figsize, show_null_values=show_null_values, pred_val_axis=pred_val_axis)\n", + "#\n", + "\n", + "\n", + "\n", + "#\n", + "#TEST functions\n", + "#\n", + "def _test_cm():\n", + " #test function with confusion matrix done\n", + " array = np.array( [[13, 0, 1, 0, 2, 0],\n", + " [ 0, 50, 2, 0, 10, 0],\n", + " [ 0, 13, 16, 0, 0, 3],\n", + " [ 0, 0, 0, 13, 1, 0],\n", + " [ 0, 40, 0, 1, 15, 0],\n", + " [ 0, 0, 0, 0, 0, 20]])\n", + " #get pandas dataframe\n", + " df_cm = DataFrame(array, index=range(1,7), columns=range(1,7))\n", + " #colormap: see this and choose your more dear\n", + " cmap = 'PuRd'\n", + " pretty_plot_confusion_matrix(df_cm, cmap=cmap)\n", + "#\n", + "\n", + "def _test_data_class():\n", + " \"\"\" test function with y_test (actual values) and predictions (predic) \"\"\"\n", + " #data\n", + " y_test = np.array([1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5])\n", + " predic = np.array([1,2,4,3,5, 1,2,4,3,5, 1,2,3,4,4, 1,4,3,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,3,3,5, 1,2,3,3,5, 1,2,3,4,4, 1,2,3,4,1, 1,2,3,4,1, 1,2,3,4,1, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5])\n", + " \"\"\"\n", + " Examples to validate output (confusion matrix plot)\n", + " actual: 5 and prediction 1 >> 3\n", + " actual: 2 and prediction 4 >> 1\n", + " actual: 3 and prediction 4 >> 10\n", + " \"\"\"\n", + " columns = []\n", + " annot = True;\n", + " cmap = 'Oranges';\n", + " fmt = '.2f'\n", + " lw = 0.5\n", + " cbar = False\n", + " show_null_values = 2\n", + " pred_val_axis = 'y'\n", + " #size::\n", + " fz = 12;\n", + " figsize = [9,9];\n", + " if(len(y_test) > 10):\n", + " fz=9; figsize=[14,14];\n", + " plot_confusion_matrix_from_data(y_test, predic, columns,\n", + " annot, cmap, fmt, fz, lw, cbar, figsize, show_null_values, pred_val_axis)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# plot_confusion_matrix_from_data(y_test, predictions, columns=None, annot=True, cmap=\"Oranges\",\n", + "# fmt='.2f', fz=11, lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='lin'):\n", + "\n", + "matplotlib.rcParams['text.usetex'] = False\n", + "\n", + "forecast_model = 'postintertidal_slope_sto06'\n", + "\n", + "df_for = impacts['forecasted'][forecast_model]\n", + "df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n", + "observed_regimes = df_obs.storm_regime.astype(cat_type).cat.codes.values\n", + "forecasted_regimes = df_for.storm_regime.astype(cat_type).cat.codes.values\n", + "\n", + "\n", + "confm = confusion_matrix(observed_regimes, forecasted_regimes,labels=[0,1,2,3])\n", + "labels=['swash','collision','overwash','inundation']\n", + "df_cm = DataFrame(confm, index=labels, columns=labels)\n", + "\n", + "fig = pretty_plot_confusion_matrix(df_cm, annot=True, cmap=\"Oranges\", fmt='.1f', fz=13,\n", + " lw=0.1, cbar=False, figsize=[8,5], show_null_values=1, pred_val_axis='y')\n", + "\n", + "fig.savefig('11_confusion_matrix',dpi=600)" + ] } ], "metadata": { @@ -612,9 +940,14 @@ "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, - "toc_position": {}, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "286.391px" + }, "toc_section_display": true, - "toc_window_display": false + "toc_window_display": true }, "varInspector": { "cols": { diff --git a/notebooks/11_investigate.ipynb b/notebooks/11_investigate.ipynb index 56c6ac9..61374b1 100644 --- a/notebooks/11_investigate.ipynb +++ b/notebooks/11_investigate.ipynb @@ -365,12 +365,244 @@ "f.savefig('11_change_in_slopes_accuracy_classes.png',dpi=600)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Swash zone volume change histogram" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How much does the beach width change variation can we expect in the swash regime?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "f, ax = plt.subplots(figsize=(5,4))\n", + "\n", + "sns.distplot(df.loc[df.storm_regime=='swash'].width_msl_change_pct.dropna(), \n", + " kde=False);\n", + "\n", + "ax.set_title('Distribution of beach width change for swash regime')\n", + "ax.set_xlabel('$\\Delta$ beach width (%)')\n", + "ax.set_ylabel('Count')\n", + "\n", + "f.savefig('11_change_in_beach_width.png',dpi=600)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Split swash regime" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df['beta_Pxscum'] = df.beta_prestorm_mean * df.Ecum\n", + "df['neg_swash_vol_change'] = -df.swash_vol_change\n", + "\n", + "ax = sns.scatterplot(data=df.loc[df.storm_regime=='swash'],\n", + " x=\"beta_Pxscum\", \n", + " y='neg_swash_vol_change')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "corr = df.corr(method ='pearson') \n", + "sns.heatmap(corr, \n", + " xticklabels=corr.columns,\n", + " yticklabels=corr.columns,\n", + " cmap='RdBu_r')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(corr.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import scipy.stats as ss\n", + "#https://stackoverflow.com/a/24469099\n", + "\n", + "corr = df.corr(method ='pearson') \n", + "\n", + "n=len(corr.columns)\n", + "t=corr*np.sqrt((n-2)/(1-corr*corr))\n", + "\n", + "\n", + "pvals = ss.t.cdf(t, n-2)\n", + "\n", + "corr.values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [] + }, + "outputs": [], + "source": [ + "from matplotlib.patches import Ellipse\n", + "def corrplot(data, pvalues, labels):\n", + " \"\"\"Creates a correlation plot of the passed data.\n", + " The function returns the plot which can then be shown with\n", + " plot.show(), saved to a file with plot.savefig(), or manipulated\n", + " in any other standard matplotlib way.\n", + " data is the correlation matrix, a 2-D numpy array containing\n", + " the pairwise correlations between variables;\n", + " pvalues is a matrix containing the pvalue for each corresponding\n", + " correlation value; if none it is assumed to be the zero matrix\n", + " labels is an array containing the variable names\n", + " https://github.com/louridas/corrplot/blob/master/corrplot.py\n", + " \"\"\"\n", + "\n", + " plt.figure(1)\n", + "\n", + " column_labels = labels\n", + " row_labels = labels\n", + " \n", + " f = plt.figure(figsize=(8,8))\n", + " ax = plt.subplot(1, 1, 1, aspect='equal')\n", + "\n", + " width, height = data.shape\n", + " num_cols, num_rows = width, height\n", + "\n", + " if pvalues is None:\n", + " pvalues = np.zeros([num_rows, num_cols])\n", + " \n", + " shrink = 0.9\n", + "\n", + " poscm = cm.get_cmap('Blues')\n", + " negcm = cm.get_cmap('Oranges')\n", + "\n", + " for x in range(width):\n", + " for y in range(height):\n", + " d = data[x, y]\n", + " c = pvalues[x, y]\n", + " rotate = -45 if d > 0 else +45\n", + " clrmap = poscm if d >= 0 else negcm\n", + " d_abs = np.abs(d)\n", + " ellipse = Ellipse((x, y),\n", + " width=1 * shrink,\n", + " height=(shrink - d_abs*shrink),\n", + " angle=rotate)\n", + " ellipse.set_edgecolor('black')\n", + " ellipse.set_facecolor(clrmap(d_abs))\n", + " if c > 0.05:\n", + " ellipse.set_linestyle('dotted')\n", + " ellipse.set_alpha(0.5)\n", + " ax.add_artist(ellipse)\n", + "\n", + " ax.set_xlim(-1, num_cols)\n", + " ax.set_ylim(-1, num_rows)\n", + " \n", + " ax.xaxis.tick_top()\n", + " xtickslocs = np.arange(len(row_labels))\n", + " ax.set_xticks(xtickslocs)\n", + " ax.set_xticklabels(row_labels, rotation=30, fontsize='small', ha='left')\n", + "\n", + " ax.invert_yaxis()\n", + " ytickslocs = np.arange(len(row_labels))\n", + " ax.set_yticks(ytickslocs)\n", + " ax.set_yticklabels(column_labels, fontsize='small')\n", + "\n", + " return plt\n", + "\n", + "import string\n", + "num_rows = 20\n", + "num_cols = num_rows\n", + "\n", + "min_length = 10\n", + "max_length = 20\n", + "\n", + "alnums = list(string.ascii_uppercase + string.digits)\n", + "labels = [''.join(np.random.choice(alnums,\n", + " np.random.randint(min_length,\n", + " max_length)))\n", + " for y in np.arange(num_rows)]\n", + "\n", + "\n", + "data = np.random.random([num_rows, num_cols])\n", + "\n", + "# data[np.random.choice(num_rows, num_rows / 2), :] *= -1\n", + "\n", + "np.fill_diagonal(data, 1)\n", + "\n", + "data_symm = (data + data.T) / 2\n", + "\n", + "# plot = corrplot(data_symm, None, labels)\n", + "plot = corrplot(corr.values, pvals, corr.columns.tolist())\n", + "plot.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy import stats\n", + "\n", + "x_col = 'beta_prestorm_intertidal'\n", + "y_col = \"beta_diff_intertidal\"\n", + "# y_col = 'swash_vol_change'\n", + "# x_col = \"Pxscum\"\n", + "data = df.loc[df.storm_regime=='swash']\n", + "\n", + "slope, intercept, r_value, p_value, std_err = stats.linregress(data[x_col],data[y_col])\n", + "\n", + "ax = sns.regplot(data=data,\n", + " y=y_col, \n", + " x=x_col, marker=\"+\",fit_reg=True, scatter_kws={'linewidth':1},\n", + " line_kws={'label':\"y={0:.2f}x+{1:.2f}\\n(r2={2:.2f})\".format(slope,intercept,r_value**2)})\n", + "ax.legend()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# df.loc[(df.storm_regime=='swash')&(df.swash_vol_change<10)].sort_values(by=['Pxscum'],ascending=False)\n", + "df.loc[(df.swash_vol_change>200)].sort_values(by=['Pxscum'],ascending=False)" + ] } ], "metadata": {