@ -485,20 +485,6 @@
"print('Done!')"
"print('Done!')"
]
]
},
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
{
"cell_type": "markdown",
"cell_type": "markdown",
"metadata": {},
"metadata": {},
@ -507,13 +493,6 @@
"Use sklearn metrics to generate classification reports for each forecasting model."
"Use sklearn metrics to generate classification reports for each forecasting model."
]
]
},
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
@ -582,6 +561,355 @@
" df_for.storm_regime.astype(cat_type).cat.codes.values)\n",
" df_for.storm_regime.astype(cat_type).cat.codes.values)\n",
" print('{}: {:.2f}'.format(model,m))"
" print('{}: {:.2f}'.format(model,m))"
]
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix\n",
"# Check confusion matrix\n",
"for model in impacts['forecasted']:\n",
" df_for = impacts['forecasted'][model]\n",
" df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n",
"\n",
" m = sklearn.metrics.confusion_matrix(\n",
" df_obs.storm_regime.astype(cat_type).cat.codes.values,\n",
" df_for.storm_regime.astype(cat_type).cat.codes.values,\n",
" labels=[0,1,2,3])\n",
" print('{}\\n{}'.format(model,m))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create confusion matrix figure\n",
"From https://github.com/wcipriano/pretty-print-confusion-matrix/blob/master/confusion_matrix_pretty_print.py"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"\"\"\"\n",
"plot a pretty confusion matrix with seaborn\n",
"Created on Mon Jun 25 14:17:37 2018\n",
"@author: Wagner Cipriano - wagnerbhbr - gmail - CEFETMG / MMC\n",
"REFerences:\n",
" https://www.mathworks.com/help/nnet/ref/plotconfusion.html\n",
" https://stackoverflow.com/questions/28200786/how-to-plot-scikit-learn-classification-report\n",
" https://stackoverflow.com/questions/5821125/how-to-plot-confusion-matrix-with-string-axis-rather-than-integer-in-python\n",
" https://www.programcreek.com/python/example/96197/seaborn.heatmap\n",
" https://stackoverflow.com/questions/19233771/sklearn-plot-confusion-matrix-with-labels/31720054\n",
" http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py\n",
"\"\"\"\n",
"\n",
"#imports\n",
"from pandas import DataFrame\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.font_manager as fm\n",
"from matplotlib.collections import QuadMesh\n",
"import seaborn as sn\n",
"\n",
"\n",
"def get_new_fig(fn, figsize=[9,9]):\n",
" \"\"\" Init graphics \"\"\"\n",
" fig1 = plt.figure(fn, figsize)\n",
" ax1 = fig1.gca() #Get Current Axis\n",
" ax1.cla() # clear existing plot\n",
" return fig1, ax1\n",
"#\n",
"\n",
"def configcell_text_and_colors(array_df, lin, col, oText, facecolors, posi, fz, fmt, show_null_values=0):\n",
" \"\"\"\n",
" config cell text and colors\n",
" and return text elements to add and to dell\n",
" @TODO: use fmt\n",
" \"\"\"\n",
" text_add = []; text_del = [];\n",
" cell_val = array_df[lin][col]\n",
" tot_all = array_df[-1][-1]\n",
" per = (float(cell_val) / tot_all) * 100\n",
" curr_column = array_df[:,col]\n",
" ccl = len(curr_column)\n",
"\n",
" #last line and/or last column\n",
" if(col == (ccl - 1)) or (lin == (ccl - 1)):\n",
" #tots and percents\n",
" if(cell_val != 0):\n",
" if(col == ccl - 1) and (lin == ccl - 1):\n",
" tot_rig = 0\n",
" for i in range(array_df.shape[0] - 1):\n",
" tot_rig += array_df[i][i]\n",
" per_ok = (float(tot_rig) / cell_val) * 100\n",
" elif(col == ccl - 1):\n",
" tot_rig = array_df[lin][lin]\n",
" per_ok = (float(tot_rig) / cell_val) * 100\n",
" elif(lin == ccl - 1):\n",
" tot_rig = array_df[col][col]\n",
" per_ok = (float(tot_rig) / cell_val) * 100\n",
" per_err = 100 - per_ok\n",
" else:\n",
" per_ok = per_err = 0\n",
"\n",
" per_ok_s = ['%.1f%%'%(per_ok), '100%'] [per_ok == 100]\n",
"\n",
" #text to DEL\n",
" text_del.append(oText)\n",
"\n",
" #text to ADD\n",
" font_prop = fm.FontProperties(weight='bold', size=fz)\n",
" text_kwargs = dict(color='w', ha=\"center\", va=\"center\", gid='sum', fontproperties=font_prop)\n",
" lis_txt = ['%d'%(cell_val), per_ok_s, '%.1f%%'%(per_err)]\n",
" lis_kwa = [text_kwargs]\n",
" dic = text_kwargs.copy(); dic['color'] = 'g'; lis_kwa.append(dic);\n",
" dic = text_kwargs.copy(); dic['color'] = 'r'; lis_kwa.append(dic);\n",
" lis_pos = [(oText._x, oText._y-0.3), (oText._x, oText._y), (oText._x, oText._y+0.3)]\n",
" for i in range(len(lis_txt)):\n",
" newText = dict(x=lis_pos[i][0], y=lis_pos[i][1], text=lis_txt[i], kw=lis_kwa[i])\n",
" #print 'lin: %s, col: %s, newText: %s' %(lin, col, newText)\n",
" text_add.append(newText)\n",
" #print '\\n'\n",
"\n",
" #set background color for sum cells (last line and last column)\n",
" carr = [0.27, 0.30, 0.27, 1.0]\n",
" if(col == ccl - 1) and (lin == ccl - 1):\n",
" carr = [0.17, 0.20, 0.17, 1.0]\n",
" facecolors[posi] = carr\n",
"\n",
" else:\n",
" if(per > 0):\n",
" txt = '%s\\n%.1f%%' %(cell_val, per)\n",
" else:\n",
" if(show_null_values == 0):\n",
" txt = ''\n",
" elif(show_null_values == 1):\n",
" txt = '0'\n",
" else:\n",
" txt = '0\\n0.0%'\n",
" oText.set_text(txt)\n",
"\n",
" #main diagonal\n",
" if(col == lin):\n",
" #set color of the textin the diagonal to white\n",
" oText.set_color('w')\n",
" # set background color in the diagonal to blue\n",
" facecolors[posi] = [0.35, 0.8, 0.55, 1.0]\n",
" else:\n",
" oText.set_color('r')\n",
"\n",
" return text_add, text_del\n",
"#\n",
"\n",
"def insert_totals(df_cm):\n",
" \"\"\" insert total column and line (the last ones) \"\"\"\n",
" sum_col = []\n",
" for c in df_cm.columns:\n",
" sum_col.append( df_cm[c].sum() )\n",
" sum_lin = []\n",
" for item_line in df_cm.iterrows():\n",
" sum_lin.append( item_line[1].sum() )\n",
" df_cm['sum_lin'] = sum_lin\n",
" sum_col.append(np.sum(sum_lin))\n",
" df_cm.loc['sum_col'] = sum_col\n",
" #print ('\\ndf_cm:\\n', df_cm, '\\n\\b\\n')\n",
"#\n",
"\n",
"def pretty_plot_confusion_matrix(df_cm, annot=True, cmap=\"Oranges\", fmt='.2f', fz=11,\n",
" lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='y'):\n",
" \"\"\"\n",
" print conf matrix with default layout (like matlab)\n",
" params:\n",
" df_cm dataframe (pandas) without totals\n",
" annot print text in each cell\n",
" cmap Oranges,Oranges_r,YlGnBu,Blues,RdBu, ... see:\n",
" fz fontsize\n",
" lw linewidth\n",
" pred_val_axis where to show the prediction values (x or y axis)\n",
" 'col' or 'x': show predicted values in columns (x axis) instead lines\n",
" 'lin' or 'y': show predicted values in lines (y axis)\n",
" \"\"\"\n",
" if(pred_val_axis in ('col', 'x')):\n",
" xlbl = 'Predicted'\n",
" ylbl = 'Actual'\n",
" else:\n",
" xlbl = 'Actual'\n",
" ylbl = 'Predicted'\n",
" df_cm = df_cm.T\n",
"\n",
" # create \"Total\" column\n",
" insert_totals(df_cm)\n",
"\n",
" #this is for print allways in the same window\n",
" fig, ax1 = get_new_fig('Conf matrix default', figsize)\n",
"\n",
" #thanks for seaborn\n",
" ax = sn.heatmap(df_cm, annot=annot, annot_kws={\"size\": fz}, linewidths=lw, ax=ax1,\n",
" cbar=cbar, cmap=cmap, linecolor='w', fmt=fmt)\n",
"\n",
" #set ticklabels rotation\n",
" ax.set_xticklabels(ax.get_xticklabels(), rotation = 45, fontsize = 10)\n",
" ax.set_yticklabels(ax.get_yticklabels(), rotation = 25, fontsize = 10)\n",
"\n",
" # Turn off all the ticks\n",
" for t in ax.xaxis.get_major_ticks():\n",
" t.tick1On = False\n",
" t.tick2On = False\n",
" for t in ax.yaxis.get_major_ticks():\n",
" t.tick1On = False\n",
" t.tick2On = False\n",
"\n",
" #face colors list\n",
" quadmesh = ax.findobj(QuadMesh)[0]\n",
" facecolors = quadmesh.get_facecolors()\n",
"\n",
" #iter in text elements\n",
" array_df = np.array( df_cm.to_records(index=False).tolist() )\n",
" text_add = []; text_del = [];\n",
" posi = -1 #from left to right, bottom to top.\n",
" for t in ax.collections[0].axes.texts: #ax.texts:\n",
" pos = np.array( t.get_position()) - [0.5,0.5]\n",
" lin = int(pos[1]); col = int(pos[0]);\n",
" posi += 1\n",
" #print ('>>> pos: %s, posi: %s, val: %s, txt: %s' %(pos, posi, array_df[lin][col], t.get_text()))\n",
"\n",
" #set text\n",
" txt_res = configcell_text_and_colors(array_df, lin, col, t, facecolors, posi, fz, fmt, show_null_values)\n",
"\n",
" text_add.extend(txt_res[0])\n",
" text_del.extend(txt_res[1])\n",
"\n",
" #remove the old ones\n",
" for item in text_del:\n",
" item.remove()\n",
" #append the new ones\n",
" for item in text_add:\n",
" ax.text(item['x'], item['y'], item['text'], **item['kw'])\n",
"\n",
" #titles and legends\n",
" ax.set_title('Confusion matrix')\n",
" ax.set_xlabel(xlbl)\n",
" ax.set_ylabel(ylbl)\n",
" plt.tight_layout() #set layout slim\n",
" plt.show()\n",
" return fig\n",
"#\n",
"\n",
"def plot_confusion_matrix_from_data(y_test, predictions, columns=None, annot=True, cmap=\"Oranges\",\n",
" fmt='.2f', fz=11, lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='lin'):\n",
" \"\"\"\n",
" plot confusion matrix function with y_test (actual values) and predictions (predic),\n",
" whitout a confusion matrix yet\n",
" \"\"\"\n",
" from sklearn.metrics import confusion_matrix\n",
" from pandas import DataFrame\n",
"\n",
" #data\n",
" if(not columns):\n",
" #labels axis integer:\n",
" ##columns = range(1, len(np.unique(y_test))+1)\n",
" #labels axis string:\n",
" from string import ascii_uppercase\n",
" columns = ['class %s' %(i) for i in list(ascii_uppercase)[0:len(np.unique(y_test))]]\n",
"\n",
" confm = confusion_matrix(y_test, predictions)\n",
" cmap = 'Oranges';\n",
" fz = 11;\n",
" figsize=[9,9];\n",
" show_null_values = 2\n",
" df_cm = DataFrame(confm, index=columns, columns=columns)\n",
" pretty_plot_confusion_matrix(df_cm, fz=fz, cmap=cmap, figsize=figsize, show_null_values=show_null_values, pred_val_axis=pred_val_axis)\n",
"#\n",
"\n",
"\n",
"\n",
"#\n",
"#TEST functions\n",
"#\n",
"def _test_cm():\n",
" #test function with confusion matrix done\n",
" array = np.array( [[13, 0, 1, 0, 2, 0],\n",
" [ 0, 50, 2, 0, 10, 0],\n",
" [ 0, 13, 16, 0, 0, 3],\n",
" [ 0, 0, 0, 13, 1, 0],\n",
" [ 0, 40, 0, 1, 15, 0],\n",
" [ 0, 0, 0, 0, 0, 20]])\n",
" #get pandas dataframe\n",
" df_cm = DataFrame(array, index=range(1,7), columns=range(1,7))\n",
" #colormap: see this and choose your more dear\n",
" cmap = 'PuRd'\n",
" pretty_plot_confusion_matrix(df_cm, cmap=cmap)\n",
"#\n",
"\n",
"def _test_data_class():\n",
" \"\"\" test function with y_test (actual values) and predictions (predic) \"\"\"\n",
" #data\n",
" y_test = np.array([1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5])\n",
" predic = np.array([1,2,4,3,5, 1,2,4,3,5, 1,2,3,4,4, 1,4,3,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,3,3,5, 1,2,3,3,5, 1,2,3,4,4, 1,2,3,4,1, 1,2,3,4,1, 1,2,3,4,1, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5])\n",
" \"\"\"\n",
" Examples to validate output (confusion matrix plot)\n",
" actual: 5 and prediction 1 >> 3\n",
" actual: 2 and prediction 4 >> 1\n",
" actual: 3 and prediction 4 >> 10\n",
" \"\"\"\n",
" columns = []\n",
" annot = True;\n",
" cmap = 'Oranges';\n",
" fmt = '.2f'\n",
" lw = 0.5\n",
" cbar = False\n",
" show_null_values = 2\n",
" pred_val_axis = 'y'\n",
" #size::\n",
" fz = 12;\n",
" figsize = [9,9];\n",
" if(len(y_test) > 10):\n",
" fz=9; figsize=[14,14];\n",
" plot_confusion_matrix_from_data(y_test, predic, columns,\n",
" annot, cmap, fmt, fz, lw, cbar, figsize, show_null_values, pred_val_axis)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# plot_confusion_matrix_from_data(y_test, predictions, columns=None, annot=True, cmap=\"Oranges\",\n",
"# fmt='.2f', fz=11, lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='lin'):\n",
"\n",
"matplotlib.rcParams['text.usetex'] = False\n",
"\n",
"forecast_model = 'postintertidal_slope_sto06'\n",
"\n",
"df_for = impacts['forecasted'][forecast_model]\n",
"df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n",
"observed_regimes = df_obs.storm_regime.astype(cat_type).cat.codes.values\n",
"forecasted_regimes = df_for.storm_regime.astype(cat_type).cat.codes.values\n",
"\n",
"\n",
"confm = confusion_matrix(observed_regimes, forecasted_regimes,labels=[0,1,2,3])\n",
"labels=['swash','collision','overwash','inundation']\n",
"df_cm = DataFrame(confm, index=labels, columns=labels)\n",
"\n",
"fig = pretty_plot_confusion_matrix(df_cm, annot=True, cmap=\"Oranges\", fmt='.1f', fz=13,\n",
" lw=0.1, cbar=False, figsize=[8,5], show_null_values=1, pred_val_axis='y')\n",
"\n",
"fig.savefig('11_confusion_matrix',dpi=600)"
]
}
}
],
],
"metadata": {
"metadata": {
@ -612,9 +940,14 @@
"title_cell": "Table of Contents",
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_cell": false,
"toc_position": {},
"toc_position": {
"height": "calc(100% - 180px)",
"left": "10px",
"top": "150px",
"width": "286.391px"
},
"toc_section_display": true,
"toc_section_display": true,
"toc_window_display": false
"toc_window_display": tru e
},
},
"varInspector": {
"varInspector": {
"cols": {
"cols": {