Update notebooks

Optimize volume change calculations
Removes TWL forecast using foreshore slope from Makefile
5 changed files with 698 additions and 170 deletions
--- a/5
+++ b/5
@ -53,9 +53,6 @@ pull-data: ##@data Copies data from data backup directory to ./data/
 # Command for activating our virtual environment and calling the CLI entry point
 PYTHON_CLI = activate ./.venv && python ./src/cli.py
 impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv ./data/interim/impacts_forecasted_premean_slope_sto06.csv ./data/interim/impacts_observed.csv ##@products makes obsered and forecasted impacts
 ### Parses raw matfiles
 ./data/raw/processed_shorelines/orientations.mat: ./data/raw/processed_shorelines/profiles.mat
@ -94,7 +91,6 @@ impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv ./data/inte
 ### TWLs
 twls: ./data/interim/twl_foreshore_slope_sto06.csv
 twls: ./data/interim/twl_premean_slope_sto06.csv
 twls: ./data/interim/twl_premean_slope_hol86.csv
 twls: ./data/interim/twl_premean_slope_nie91.csv
@ -320,7 +316,6 @@ twls: ./data/interim/twl_postintertidal_slope_pow18.csv
 ### IMPACTS
 impacts: ./data/interim/impacts_observed.csv
 impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv
 impacts: ./data/interim/impacts_forecasted_premean_slope_sto06.csv
 impacts: ./data/interim/impacts_forecasted_premean_slope_hol86.csv
 impacts: ./data/interim/impacts_forecasted_premean_slope_nie91.csv
--- a/notebooks/07_evaluate_model_performance.ipynb
+++ b/notebooks/07_evaluate_model_performance.ipynb
@ -485,20 +485,6 @@
    "print('Done!')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
@ -507,13 +493,6 @@
    "Use sklearn metrics to generate classification reports for each forecasting model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -582,6 +561,355 @@
    "            df_for.storm_regime.astype(cat_type).cat.codes.values)\n",
    "    print('{}: {:.2f}'.format(model,m))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import confusion_matrix\n",
    "# Check confusion matrix\n",
    "for model in impacts['forecasted']:\n",
    "    df_for = impacts['forecasted'][model]\n",
    "    df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n",
    "\n",
    "    m = sklearn.metrics.confusion_matrix(\n",
    "            df_obs.storm_regime.astype(cat_type).cat.codes.values,\n",
    "            df_for.storm_regime.astype(cat_type).cat.codes.values,\n",
    "            labels=[0,1,2,3])\n",
    "    print('{}\\n{}'.format(model,m))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create confusion matrix figure\n",
    "From https://github.com/wcipriano/pretty-print-confusion-matrix/blob/master/confusion_matrix_pretty_print.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# -*- coding: utf-8 -*-\n",
    "\"\"\"\n",
    "plot a pretty confusion matrix with seaborn\n",
    "Created on Mon Jun 25 14:17:37 2018\n",
    "@author: Wagner Cipriano - wagnerbhbr - gmail - CEFETMG / MMC\n",
    "REFerences:\n",
    "  https://www.mathworks.com/help/nnet/ref/plotconfusion.html\n",
    "  https://stackoverflow.com/questions/28200786/how-to-plot-scikit-learn-classification-report\n",
    "  https://stackoverflow.com/questions/5821125/how-to-plot-confusion-matrix-with-string-axis-rather-than-integer-in-python\n",
    "  https://www.programcreek.com/python/example/96197/seaborn.heatmap\n",
    "  https://stackoverflow.com/questions/19233771/sklearn-plot-confusion-matrix-with-labels/31720054\n",
    "  http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py\n",
    "\"\"\"\n",
    "\n",
    "#imports\n",
    "from pandas import DataFrame\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.font_manager as fm\n",
    "from matplotlib.collections import QuadMesh\n",
    "import seaborn as sn\n",
    "\n",
    "\n",
    "def get_new_fig(fn, figsize=[9,9]):\n",
    "    \"\"\" Init graphics \"\"\"\n",
    "    fig1 = plt.figure(fn, figsize)\n",
    "    ax1 = fig1.gca()   #Get Current Axis\n",
    "    ax1.cla() # clear existing plot\n",
    "    return fig1, ax1\n",
    "#\n",
    "\n",
    "def configcell_text_and_colors(array_df, lin, col, oText, facecolors, posi, fz, fmt, show_null_values=0):\n",
    "    \"\"\"\n",
    "      config cell text and colors\n",
    "      and return text elements to add and to dell\n",
    "      @TODO: use fmt\n",
    "    \"\"\"\n",
    "    text_add = []; text_del = [];\n",
    "    cell_val = array_df[lin][col]\n",
    "    tot_all = array_df[-1][-1]\n",
    "    per = (float(cell_val) / tot_all) * 100\n",
    "    curr_column = array_df[:,col]\n",
    "    ccl = len(curr_column)\n",
    "\n",
    "    #last line  and/or last column\n",
    "    if(col == (ccl - 1)) or (lin == (ccl - 1)):\n",
    "        #tots and percents\n",
    "        if(cell_val != 0):\n",
    "            if(col == ccl - 1) and (lin == ccl - 1):\n",
    "                tot_rig = 0\n",
    "                for i in range(array_df.shape[0] - 1):\n",
    "                    tot_rig += array_df[i][i]\n",
    "                per_ok = (float(tot_rig) / cell_val) * 100\n",
    "            elif(col == ccl - 1):\n",
    "                tot_rig = array_df[lin][lin]\n",
    "                per_ok = (float(tot_rig) / cell_val) * 100\n",
    "            elif(lin == ccl - 1):\n",
    "                tot_rig = array_df[col][col]\n",
    "                per_ok = (float(tot_rig) / cell_val) * 100\n",
    "            per_err = 100 - per_ok\n",
    "        else:\n",
    "            per_ok = per_err = 0\n",
    "\n",
    "        per_ok_s = ['%.1f%%'%(per_ok), '100%'] [per_ok == 100]\n",
    "\n",
    "        #text to DEL\n",
    "        text_del.append(oText)\n",
    "\n",
    "        #text to ADD\n",
    "        font_prop = fm.FontProperties(weight='bold', size=fz)\n",
    "        text_kwargs = dict(color='w', ha=\"center\", va=\"center\", gid='sum', fontproperties=font_prop)\n",
    "        lis_txt = ['%d'%(cell_val), per_ok_s, '%.1f%%'%(per_err)]\n",
    "        lis_kwa = [text_kwargs]\n",
    "        dic = text_kwargs.copy(); dic['color'] = 'g'; lis_kwa.append(dic);\n",
    "        dic = text_kwargs.copy(); dic['color'] = 'r'; lis_kwa.append(dic);\n",
    "        lis_pos = [(oText._x, oText._y-0.3), (oText._x, oText._y), (oText._x, oText._y+0.3)]\n",
    "        for i in range(len(lis_txt)):\n",
    "            newText = dict(x=lis_pos[i][0], y=lis_pos[i][1], text=lis_txt[i], kw=lis_kwa[i])\n",
    "            #print 'lin: %s, col: %s, newText: %s' %(lin, col, newText)\n",
    "            text_add.append(newText)\n",
    "        #print '\\n'\n",
    "\n",
    "        #set background color for sum cells (last line and last column)\n",
    "        carr = [0.27, 0.30, 0.27, 1.0]\n",
    "        if(col == ccl - 1) and (lin == ccl - 1):\n",
    "            carr = [0.17, 0.20, 0.17, 1.0]\n",
    "        facecolors[posi] = carr\n",
    "\n",
    "    else:\n",
    "        if(per > 0):\n",
    "            txt = '%s\\n%.1f%%' %(cell_val, per)\n",
    "        else:\n",
    "            if(show_null_values == 0):\n",
    "                txt = ''\n",
    "            elif(show_null_values == 1):\n",
    "                txt = '0'\n",
    "            else:\n",
    "                txt = '0\\n0.0%'\n",
    "        oText.set_text(txt)\n",
    "\n",
    "        #main diagonal\n",
    "        if(col == lin):\n",
    "            #set color of the textin the diagonal to white\n",
    "            oText.set_color('w')\n",
    "            # set background color in the diagonal to blue\n",
    "            facecolors[posi] = [0.35, 0.8, 0.55, 1.0]\n",
    "        else:\n",
    "            oText.set_color('r')\n",
    "\n",
    "    return text_add, text_del\n",
    "#\n",
    "\n",
    "def insert_totals(df_cm):\n",
    "    \"\"\" insert total column and line (the last ones) \"\"\"\n",
    "    sum_col = []\n",
    "    for c in df_cm.columns:\n",
    "        sum_col.append( df_cm[c].sum() )\n",
    "    sum_lin = []\n",
    "    for item_line in df_cm.iterrows():\n",
    "        sum_lin.append( item_line[1].sum() )\n",
    "    df_cm['sum_lin'] = sum_lin\n",
    "    sum_col.append(np.sum(sum_lin))\n",
    "    df_cm.loc['sum_col'] = sum_col\n",
    "    #print ('\\ndf_cm:\\n', df_cm, '\\n\\b\\n')\n",
    "#\n",
    "\n",
    "def pretty_plot_confusion_matrix(df_cm, annot=True, cmap=\"Oranges\", fmt='.2f', fz=11,\n",
    "      lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='y'):\n",
    "    \"\"\"\n",
    "      print conf matrix with default layout (like matlab)\n",
    "      params:\n",
    "        df_cm          dataframe (pandas) without totals\n",
    "        annot          print text in each cell\n",
    "        cmap           Oranges,Oranges_r,YlGnBu,Blues,RdBu, ... see:\n",
    "        fz             fontsize\n",
    "        lw             linewidth\n",
    "        pred_val_axis  where to show the prediction values (x or y axis)\n",
    "                        'col' or 'x': show predicted values in columns (x axis) instead lines\n",
    "                        'lin' or 'y': show predicted values in lines   (y axis)\n",
    "    \"\"\"\n",
    "    if(pred_val_axis in ('col', 'x')):\n",
    "        xlbl = 'Predicted'\n",
    "        ylbl = 'Actual'\n",
    "    else:\n",
    "        xlbl = 'Actual'\n",
    "        ylbl = 'Predicted'\n",
    "        df_cm = df_cm.T\n",
    "\n",
    "    # create \"Total\" column\n",
    "    insert_totals(df_cm)\n",
    "\n",
    "    #this is for print allways in the same window\n",
    "    fig, ax1 = get_new_fig('Conf matrix default', figsize)\n",
    "\n",
    "    #thanks for seaborn\n",
    "    ax = sn.heatmap(df_cm, annot=annot, annot_kws={\"size\": fz}, linewidths=lw, ax=ax1,\n",
    "                    cbar=cbar, cmap=cmap, linecolor='w', fmt=fmt)\n",
    "\n",
    "    #set ticklabels rotation\n",
    "    ax.set_xticklabels(ax.get_xticklabels(), rotation = 45, fontsize = 10)\n",
    "    ax.set_yticklabels(ax.get_yticklabels(), rotation = 25, fontsize = 10)\n",
    "\n",
    "    # Turn off all the ticks\n",
    "    for t in ax.xaxis.get_major_ticks():\n",
    "        t.tick1On = False\n",
    "        t.tick2On = False\n",
    "    for t in ax.yaxis.get_major_ticks():\n",
    "        t.tick1On = False\n",
    "        t.tick2On = False\n",
    "\n",
    "    #face colors list\n",
    "    quadmesh = ax.findobj(QuadMesh)[0]\n",
    "    facecolors = quadmesh.get_facecolors()\n",
    "\n",
    "    #iter in text elements\n",
    "    array_df = np.array( df_cm.to_records(index=False).tolist() )\n",
    "    text_add = []; text_del = [];\n",
    "    posi = -1 #from left to right, bottom to top.\n",
    "    for t in ax.collections[0].axes.texts: #ax.texts:\n",
    "        pos = np.array( t.get_position()) - [0.5,0.5]\n",
    "        lin = int(pos[1]); col = int(pos[0]);\n",
    "        posi += 1\n",
    "        #print ('>>> pos: %s, posi: %s, val: %s, txt: %s' %(pos, posi, array_df[lin][col], t.get_text()))\n",
    "\n",
    "        #set text\n",
    "        txt_res = configcell_text_and_colors(array_df, lin, col, t, facecolors, posi, fz, fmt, show_null_values)\n",
    "\n",
    "        text_add.extend(txt_res[0])\n",
    "        text_del.extend(txt_res[1])\n",
    "\n",
    "    #remove the old ones\n",
    "    for item in text_del:\n",
    "        item.remove()\n",
    "    #append the new ones\n",
    "    for item in text_add:\n",
    "        ax.text(item['x'], item['y'], item['text'], **item['kw'])\n",
    "\n",
    "    #titles and legends\n",
    "    ax.set_title('Confusion matrix')\n",
    "    ax.set_xlabel(xlbl)\n",
    "    ax.set_ylabel(ylbl)\n",
    "    plt.tight_layout()  #set layout slim\n",
    "    plt.show()\n",
    "    return fig\n",
    "#\n",
    "\n",
    "def plot_confusion_matrix_from_data(y_test, predictions, columns=None, annot=True, cmap=\"Oranges\",\n",
    "      fmt='.2f', fz=11, lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='lin'):\n",
    "    \"\"\"\n",
    "        plot confusion matrix function with y_test (actual values) and predictions (predic),\n",
    "        whitout a confusion matrix yet\n",
    "    \"\"\"\n",
    "    from sklearn.metrics import confusion_matrix\n",
    "    from pandas import DataFrame\n",
    "\n",
    "    #data\n",
    "    if(not columns):\n",
    "        #labels axis integer:\n",
    "        ##columns = range(1, len(np.unique(y_test))+1)\n",
    "        #labels axis string:\n",
    "        from string import ascii_uppercase\n",
    "        columns = ['class %s' %(i) for i in list(ascii_uppercase)[0:len(np.unique(y_test))]]\n",
    "\n",
    "    confm = confusion_matrix(y_test, predictions)\n",
    "    cmap = 'Oranges';\n",
    "    fz = 11;\n",
    "    figsize=[9,9];\n",
    "    show_null_values = 2\n",
    "    df_cm = DataFrame(confm, index=columns, columns=columns)\n",
    "    pretty_plot_confusion_matrix(df_cm, fz=fz, cmap=cmap, figsize=figsize, show_null_values=show_null_values, pred_val_axis=pred_val_axis)\n",
    "#\n",
    "\n",
    "\n",
    "\n",
    "#\n",
    "#TEST functions\n",
    "#\n",
    "def _test_cm():\n",
    "    #test function with confusion matrix done\n",
    "    array = np.array( [[13,  0,  1,  0,  2,  0],\n",
    "                       [ 0, 50,  2,  0, 10,  0],\n",
    "                       [ 0, 13, 16,  0,  0,  3],\n",
    "                       [ 0,  0,  0, 13,  1,  0],\n",
    "                       [ 0, 40,  0,  1, 15,  0],\n",
    "                       [ 0,  0,  0,  0,  0, 20]])\n",
    "    #get pandas dataframe\n",
    "    df_cm = DataFrame(array, index=range(1,7), columns=range(1,7))\n",
    "    #colormap: see this and choose your more dear\n",
    "    cmap = 'PuRd'\n",
    "    pretty_plot_confusion_matrix(df_cm, cmap=cmap)\n",
    "#\n",
    "\n",
    "def _test_data_class():\n",
    "    \"\"\" test function with y_test (actual values) and predictions (predic) \"\"\"\n",
    "    #data\n",
    "    y_test = np.array([1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5])\n",
    "    predic = np.array([1,2,4,3,5, 1,2,4,3,5, 1,2,3,4,4, 1,4,3,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,3,3,5, 1,2,3,3,5, 1,2,3,4,4, 1,2,3,4,1, 1,2,3,4,1, 1,2,3,4,1, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5])\n",
    "    \"\"\"\n",
    "      Examples to validate output (confusion matrix plot)\n",
    "        actual: 5 and prediction 1   >>  3\n",
    "        actual: 2 and prediction 4   >>  1\n",
    "        actual: 3 and prediction 4   >>  10\n",
    "    \"\"\"\n",
    "    columns = []\n",
    "    annot = True;\n",
    "    cmap = 'Oranges';\n",
    "    fmt = '.2f'\n",
    "    lw = 0.5\n",
    "    cbar = False\n",
    "    show_null_values = 2\n",
    "    pred_val_axis = 'y'\n",
    "    #size::\n",
    "    fz = 12;\n",
    "    figsize = [9,9];\n",
    "    if(len(y_test) > 10):\n",
    "        fz=9; figsize=[14,14];\n",
    "    plot_confusion_matrix_from_data(y_test, predic, columns,\n",
    "      annot, cmap, fmt, fz, lw, cbar, figsize, show_null_values, pred_val_axis)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot_confusion_matrix_from_data(y_test, predictions, columns=None, annot=True, cmap=\"Oranges\",\n",
    "#       fmt='.2f', fz=11, lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='lin'):\n",
    "\n",
    "matplotlib.rcParams['text.usetex'] = False\n",
    "\n",
    "forecast_model = 'postintertidal_slope_sto06'\n",
    "\n",
    "df_for = impacts['forecasted'][forecast_model]\n",
    "df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n",
    "observed_regimes = df_obs.storm_regime.astype(cat_type).cat.codes.values\n",
    "forecasted_regimes = df_for.storm_regime.astype(cat_type).cat.codes.values\n",
    "\n",
    "\n",
    "confm = confusion_matrix(observed_regimes, forecasted_regimes,labels=[0,1,2,3])\n",
    "labels=['swash','collision','overwash','inundation']\n",
    "df_cm = DataFrame(confm, index=labels, columns=labels)\n",
    "\n",
    "fig = pretty_plot_confusion_matrix(df_cm, annot=True, cmap=\"Oranges\", fmt='.1f', fz=13,\n",
    "      lw=0.1, cbar=False, figsize=[8,5], show_null_values=1, pred_val_axis='y')\n",
    "\n",
    "fig.savefig('11_confusion_matrix',dpi=600)"
   ]
  }
 ],
 "metadata": {
@ -612,9 +940,14 @@
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
-   "toc_position": {},
+   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "286.391px"
   },
   "toc_section_display": true,
-   "toc_window_display": false
+   "toc_window_display": true
  },
  "varInspector": {
   "cols": {
--- a/notebooks/11_investigate.ipynb
+++ b/notebooks/11_investigate.ipynb
@ -365,12 +365,244 @@
    "f.savefig('11_change_in_slopes_accuracy_classes.png',dpi=600)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Swash zone volume change histogram"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "How much does the beach width change variation can we expect in the swash regime?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "f, ax = plt.subplots(figsize=(5,4))\n",
    "\n",
    "sns.distplot(df.loc[df.storm_regime=='swash'].width_msl_change_pct.dropna(), \n",
    "             kde=False);\n",
    "\n",
    "ax.set_title('Distribution of beach width change for swash regime')\n",
    "ax.set_xlabel('$\\Delta$ beach width (%)')\n",
    "ax.set_ylabel('Count')\n",
    "\n",
    "f.savefig('11_change_in_beach_width.png',dpi=600)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Split swash regime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
-   "source": []
+   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['beta_Pxscum'] = df.beta_prestorm_mean * df.Ecum\n",
    "df['neg_swash_vol_change'] = -df.swash_vol_change\n",
    "\n",
    "ax = sns.scatterplot(data=df.loc[df.storm_regime=='swash'],\n",
    "                       x=\"beta_Pxscum\", \n",
    "                       y='neg_swash_vol_change')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "corr = df.corr(method ='pearson') \n",
    "sns.heatmap(corr, \n",
    "        xticklabels=corr.columns,\n",
    "        yticklabels=corr.columns,\n",
    "           cmap='RdBu_r')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(corr.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import scipy.stats as ss\n",
    "#https://stackoverflow.com/a/24469099\n",
    "\n",
    "corr = df.corr(method ='pearson') \n",
    "\n",
    "n=len(corr.columns)\n",
    "t=corr*np.sqrt((n-2)/(1-corr*corr))\n",
    "\n",
    "\n",
    "pvals = ss.t.cdf(t, n-2)\n",
    "\n",
    "corr.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "code_folding": []
   },
   "outputs": [],
   "source": [
    "from matplotlib.patches import Ellipse\n",
    "def corrplot(data, pvalues, labels):\n",
    "    \"\"\"Creates a correlation plot of the passed data.\n",
    "    The function returns the plot which can then be shown with\n",
    "    plot.show(), saved to a file with plot.savefig(), or manipulated\n",
    "    in any other standard matplotlib way.\n",
    "    data is the correlation matrix, a 2-D numpy array containing\n",
    "    the pairwise correlations between variables;\n",
    "    pvalues is a matrix containing the pvalue for each corresponding\n",
    "    correlation value; if none it is assumed to be the zero matrix\n",
    "    labels is an array containing the variable names\n",
    "    https://github.com/louridas/corrplot/blob/master/corrplot.py\n",
    "    \"\"\"\n",
    "\n",
    "    plt.figure(1)\n",
    "\n",
    "    column_labels = labels\n",
    "    row_labels = labels\n",
    "    \n",
    "    f = plt.figure(figsize=(8,8))\n",
    "    ax = plt.subplot(1, 1, 1, aspect='equal')\n",
    "\n",
    "    width, height = data.shape\n",
    "    num_cols, num_rows = width, height\n",
    "\n",
    "    if pvalues is None:\n",
    "        pvalues = np.zeros([num_rows, num_cols])\n",
    "        \n",
    "    shrink = 0.9\n",
    "\n",
    "    poscm = cm.get_cmap('Blues')\n",
    "    negcm = cm.get_cmap('Oranges')\n",
    "\n",
    "    for x in range(width):\n",
    "        for y in range(height):\n",
    "            d = data[x, y]\n",
    "            c = pvalues[x, y]\n",
    "            rotate = -45 if d > 0 else +45\n",
    "            clrmap = poscm if d >= 0 else negcm\n",
    "            d_abs = np.abs(d)\n",
    "            ellipse = Ellipse((x, y),\n",
    "                              width=1 * shrink,\n",
    "                              height=(shrink - d_abs*shrink),\n",
    "                              angle=rotate)\n",
    "            ellipse.set_edgecolor('black')\n",
    "            ellipse.set_facecolor(clrmap(d_abs))\n",
    "            if c > 0.05:\n",
    "                ellipse.set_linestyle('dotted')\n",
    "                ellipse.set_alpha(0.5)\n",
    "            ax.add_artist(ellipse)\n",
    "\n",
    "    ax.set_xlim(-1, num_cols)\n",
    "    ax.set_ylim(-1, num_rows)\n",
    "        \n",
    "    ax.xaxis.tick_top()\n",
    "    xtickslocs = np.arange(len(row_labels))\n",
    "    ax.set_xticks(xtickslocs)\n",
    "    ax.set_xticklabels(row_labels, rotation=30, fontsize='small', ha='left')\n",
    "\n",
    "    ax.invert_yaxis()\n",
    "    ytickslocs = np.arange(len(row_labels))\n",
    "    ax.set_yticks(ytickslocs)\n",
    "    ax.set_yticklabels(column_labels, fontsize='small')\n",
    "\n",
    "    return plt\n",
    "\n",
    "import string\n",
    "num_rows = 20\n",
    "num_cols = num_rows\n",
    "\n",
    "min_length = 10\n",
    "max_length = 20\n",
    "\n",
    "alnums = list(string.ascii_uppercase + string.digits)\n",
    "labels = [''.join(np.random.choice(alnums,\n",
    "                                   np.random.randint(min_length,\n",
    "                                                     max_length)))\n",
    "          for y in np.arange(num_rows)]\n",
    "\n",
    "\n",
    "data = np.random.random([num_rows, num_cols])\n",
    "\n",
    "# data[np.random.choice(num_rows, num_rows / 2), :] *= -1\n",
    "\n",
    "np.fill_diagonal(data, 1)\n",
    "\n",
    "data_symm = (data + data.T) / 2\n",
    "\n",
    "# plot = corrplot(data_symm, None, labels)\n",
    "plot = corrplot(corr.values, pvals, corr.columns.tolist())\n",
    "plot.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy import stats\n",
    "\n",
    "x_col = 'beta_prestorm_intertidal'\n",
    "y_col = \"beta_diff_intertidal\"\n",
    "# y_col = 'swash_vol_change'\n",
    "# x_col = \"Pxscum\"\n",
    "data = df.loc[df.storm_regime=='swash']\n",
    "\n",
    "slope, intercept, r_value, p_value, std_err = stats.linregress(data[x_col],data[y_col])\n",
    "\n",
    "ax = sns.regplot(data=data,\n",
    "                       y=y_col, \n",
    "                       x=x_col, marker=\"+\",fit_reg=True, scatter_kws={'linewidth':1},\n",
    "                line_kws={'label':\"y={0:.2f}x+{1:.2f}\\n(r2={2:.2f})\".format(slope,intercept,r_value**2)})\n",
    "ax.legend()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# df.loc[(df.storm_regime=='swash')&(df.swash_vol_change<10)].sort_values(by=['Pxscum'],ascending=False)\n",
    "df.loc[(df.swash_vol_change>200)].sort_values(by=['Pxscum'],ascending=False)"
   ]
  }
 ],
 "metadata": {
--- a/src/analysis/forecast_twl.py
+++ b/src/analysis/forecast_twl.py
@ -316,8 +316,15 @@ def slope_from_profile(
    """
    # Need all data to get the slope
-    if any([x is None for x in [profile_x, profile_z, top_elevation, btm_elevation]]):
+    # Check validity of profile arrays and return None if data is not good
-        return None
+    for profile in [profile_x, profile_z]:
        if all(np.isnan(profile)) or all(x is None for x in profile):
            return None
    # Check validity of elevation values
    for ele in [top_elevation, btm_elevation]:
        if np.isnan(ele) or ele is None:
            return None
    end_points = {"top": {"z": top_elevation}, "btm": {"z": btm_elevation}}
@ -377,6 +384,11 @@ def slope_from_profile(
            True if end_points["top"]["x"] < pts < end_points["btm"]["x"] else False
            for pts in profile_x
        ]
        # Need at least two points to do linear regression
        if sum(profile_mask) < 2:
            return None
        slope_x = np.array(profile_x)[profile_mask].tolist()
        slope_z = np.array(profile_z)[profile_mask].tolist()
        slope, _, _, _, _ = stats.linregress(slope_x, slope_z)
--- a/src/analysis/observed_storm_impacts.py
+++ b/src/analysis/observed_storm_impacts.py
@ -2,7 +2,11 @@ import click
 import numpy as np
 import pandas as pd
 from scipy.integrate import simps
 from scipy.signal import savgol_filter
 from scipy.interpolate import interp1d
 from numpy import ma as ma
 from tqdm import tqdm
 from logs import setup_logging
 from utils import crossings, get_i_or_default
 from analysis.forecast_twl import get_mean_slope, get_intertidal_slope
@ -22,149 +26,101 @@ def return_first_or_nan(l):
        return l[0]
-def volume_change(df_profiles, df_profile_features, zone):
+def round_up_to_odd(f):
    """
    https://stackoverflow.com/a/31648815
    :param f:
    :return:
    """
    return int(np.ceil(f) // 2 * 2 + 1)
 def volume_change(df_profiles, df_profile_features):
    """
    Calculates how much the volume change there is between prestrom and post storm profiles.
    :param df_profiles:
    :param df_profile_features:
    :param zone: Either 'swash' or 'dune_face'
    :return:
    """
-    logger.info("Calculating change in beach volume in {} zone".format(zone))
+    logger.info("Calculating change in beach volume")
    df_vol_changes = pd.DataFrame(
        index=df_profile_features.index.get_level_values("site_id").unique()
    )
    df_profiles = df_profiles.dropna(subset=["z"])
    df_profiles = df_profiles.sort_index()
    sites = df_profiles.groupby(level=["site_id"])
-    for site_id, df_site in sites:
+    for site_id, df_site in tqdm(sites):
-        logger.debug(
+
-            "Calculating change in beach volume at {} in {} zone".format(site_id, zone)
+        params = {}
-        )
+
-
+        for zone in ["dune", "swash"]:
-        prestorm_row = df_profile_features.loc[(site_id, "prestorm")]
+            params[zone] = {}
-        prestorm_dune_toe_x = prestorm_row.dune_toe_x
+
-        prestorm_dune_crest_x = prestorm_row.dune_crest_x
+            for profile_type in ["prestorm", "poststorm"]:
-
+                params[zone][profile_type] = {}
-        # If no dune toe has been defined, Dlow = Dhigh. Refer to Sallenger (2000).
+
-        if np.isnan(prestorm_dune_toe_x):
+                # Define the edges of the swash and dunes where we want to calculate subaeraial volume.
-            prestorm_dune_toe_x = prestorm_dune_crest_x
+                if zone == "swash":
-
+                    params[zone][profile_type]["x_min"] = df_profile_features.loc[
-        # If no prestorm and poststorm profiles, skip site and continue
+                        (site_id, profile_type)
-        profile_lengths = [
+                    ].dune_toe_x
-            len(df_site.xs(x, level="profile_type")) for x in ["prestorm", "poststorm"]
+                    params[zone][profile_type]["x_max"] = max(
-        ]
+                        df_profiles.loc[(site_id, profile_type)].index.get_level_values(
-        if any([length == 0 for length in profile_lengths]):
+                            "x"
-            continue
+                        )
-
+                    )
-        # Find last x coordinate where we have both prestorm and poststorm measurements. If we don't do this,
+
-        # the prestorm and poststorm values are going to be calculated over different lengths.
+                    # For profiles with no Dlow value, we take Dhigh as the minimum value to calculate swash
-        df_zone = df_site.dropna(subset=["z"])
+                    if np.isnan(params[zone][profile_type]["x_min"]):
-        x_last_obs = min(
+                        params[zone][profile_type]["x_min"] = df_profile_features.loc[
-            [
+                            (site_id, profile_type)
-                max(
+                        ].dune_crest_x
-                    df_zone.query(
+
-                        "profile_type == '{}'".format(profile_type)
+                elif zone == "dune":
-                    ).index.get_level_values("x")
+                    params[zone][profile_type]["x_min"] = df_profile_features.loc[
-                )
+                        (site_id, profile_type)
-                for profile_type in ["prestorm", "poststorm"]
+                    ].dune_crest_x
-            ]
+                    params[zone][profile_type]["x_max"] = df_profile_features.loc[
-        )
+                        (site_id, profile_type)
-        x_first_obs = max(
+                    ].dune_toe_x
-            [
+
-                min(
+                # For profiles with no Dlow value, the dune is undefined and we cannot calculate a dune volume.
-                    df_zone.query(
+
-                        "profile_type == '{}'".format(profile_type)
+                # Calculate subaerial volume based on our x min and maxes
-                    ).index.get_level_values("x")
+                params[zone][profile_type]["subaerial_vol"] = beach_volume(
                    x=df_profiles.loc[(site_id, profile_type)].index.get_level_values(
                        "x"
                    ),
                    z=df_profiles.loc[(site_id, profile_type)].z.values,
                    x_min=params[zone][profile_type]["x_min"],
                    x_max=params[zone][profile_type]["x_max"],
                )
                for profile_type in ["prestorm", "poststorm"]
            ]
        )
        # Where we want to measure pre and post storm volume is dependant on the zone selected
        if zone == "swash":
            x_min = max(prestorm_dune_toe_x, x_first_obs)
            x_max = x_last_obs
        elif zone == "dune_face":
            x_min = max(prestorm_dune_crest_x, x_first_obs)
            x_max = min(prestorm_dune_toe_x, x_last_obs)
        else:
            logger.warning("Zone argument not properly specified. Please check")
            x_min = None
            x_max = None
        # Now, compute the volume of sand between the x-coordinates prestorm_dune_toe_x and x_swash_last for both prestorm
        # and post storm profiles.
        prestorm_vol = beach_volume(
            x=df_zone.query("profile_type=='prestorm'").index.get_level_values("x"),
            z=df_zone.query("profile_type=='prestorm'").z,
            x_min=x_min,
            x_max=x_max,
        )
        poststorm_vol = beach_volume(
            x=df_zone.query("profile_type=='poststorm'").index.get_level_values("x"),
            z=df_zone.query("profile_type=='poststorm'").z,
            x_min=x_min,
            x_max=x_max,
        )
-        # Identify the x location where our pre and post storm profiles start to differ. This is so changes no due to
+            # TODO Landward limit still needs to be incorporated
        # the storm are not included when calculating volume.
        df_prestorm = (
            df_site.xs("prestorm", level="profile_type").z.rename("z_pre").to_frame()
        )
        df_poststorm = (
            df_site.xs("poststorm", level="profile_type").z.rename("z_post").to_frame()
        )
        df_diff = df_prestorm.merge(df_poststorm, on=["site_id", "x"])
        df_diff["z_diff"] = df_diff.z_pre - df_diff.z_post
        # Find all locations where the difference in pre and post storm is zero. Take the most seaward location as the
        # x location where our profiles are the same.
        x_crossings = crossings(df_diff.index.get_level_values("x"), df_diff.z_diff, 0)
        if len(x_crossings) != 0:
            x_change_point = x_crossings[-1]
        else:
            x_change_point = np.nan
        # # For debugging
        # import matplotlib.pyplot as plt
        # f,(ax1,ax2) = plt.subplots(2,1,sharex=True)
        # ax1.plot(df_prestorm.index.get_level_values('x'), df_prestorm.z_pre,label='prestorm')
        # ax1.plot(df_poststorm.index.get_level_values('x'), df_poststorm.z_post,label='poststorm')
        # ax1.axvline(x_crossings[-1], color='red', linestyle='--', linewidth=0.5, label='Change point')
        # ax1.legend()
        # ax1.set_title(site_id)
        # ax1.set_ylabel('elevation (m AHD)')
        # ax2.plot(df_diff.index.get_level_values('x'), df_diff.z_diff)
        # ax2.set_xlabel('x coordinate (m)')
        # ax2.set_ylabel('elevation diff (m)')
        # ax2.axvline(x_crossings[-1],color='red',linestyle='--',linewidth=0.5)
        # plt.show()
        diff_vol = beach_volume(
            x=df_diff.index.get_level_values("x"),
            z=df_diff.z_diff,
            x_min=np.nanmax([x_min, x_change_point]),
            x_max=np.nanmax([x_max, x_change_point]),
        )
        # Here, if cannot calculate the difference volume, assume no volume change
        if np.isnan(diff_vol):
            diff_vol = 0
-        # Base pct change on diff volume
+            params[zone]["vol_change"] = (
-        if diff_vol == 0:
+                params[zone]["poststorm"]["subaerial_vol"]
-            pct_change = 0
+                - params[zone]["prestorm"]["subaerial_vol"]
-        else:
+            )
-            pct_change = diff_vol / prestorm_vol * 100
+            params[zone]["pct_change"] = (
                params[zone]["vol_change"] / params[zone]["prestorm"]["subaerial_vol"]
            )
-        df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = prestorm_vol
+            df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = params[zone][
-        df_vol_changes.loc[site_id, "poststorm_{}_vol".format(zone)] = poststorm_vol
+                "prestorm"
-        df_vol_changes.loc[site_id, "{}_vol_change".format(zone)] = diff_vol
+            ]["subaerial_vol"]
-        df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = pct_change
+            df_vol_changes.loc[site_id, "poststorm_{}_vol".format(zone)] = params[zone][
                "poststorm"
            ]["subaerial_vol"]
            df_vol_changes.loc[site_id, "{}_vol_change".format(zone)] = params[zone][
                "vol_change"
            ]
            df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = params[zone][
                "pct_change"
            ]
    return df_vol_changes
@ -197,11 +153,11 @@ def storm_regime(df_observed_impacts):
    """
    logger.info("Getting observed storm regimes")
-    swash = (df_observed_impacts.dune_face_pct_change <= 2) & (
+    swash = (df_observed_impacts.dune_pct_change <= 2) & (
-        df_observed_impacts.dune_face_vol_change <= 3
+        df_observed_impacts.dune_vol_change <= 3
    )
-    collision = (df_observed_impacts.dune_face_pct_change >= 2) | (
+    collision = (df_observed_impacts.dune_pct_change >= 2) | (
-        df_observed_impacts.dune_face_vol_change > 3
+        df_observed_impacts.dune_vol_change > 3
    )
    df_observed_impacts.loc[swash, "storm_regime"] = "swash"
@ -258,13 +214,9 @@ def create_observed_impacts(
    # TODO Review volume change with changing dune toe/crests
    logger.info("Getting pre/post storm volumes")
-    df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone="swash")
+
-    df_dune_face_vol_changes = volume_change(
+    df_vol_changes = volume_change(df_profiles, df_profile_features)
-        df_profiles, df_profile_features, zone="dune_face"
+    df_observed_impacts = df_observed_impacts.join(df_vol_changes)
    )
    df_observed_impacts = df_observed_impacts.join(
        [df_swash_vol_changes, df_dune_face_vol_changes]
    )
    # Classify regime based on volume changes
    df_observed_impacts = storm_regime(df_observed_impacts)
@ -343,8 +295,12 @@ def create_observed_impacts(
        ele=0,
        col_name="width_msl_poststorm",
    )
-    df_width_msl_change_m = (df_width_msl_poststorm - df_width_msl_prestorm).rename('width_msl_change_m')
+    df_width_msl_change_m = (df_width_msl_poststorm - df_width_msl_prestorm).rename(
-    df_width_msl_change_pct = (df_width_msl_change_m / df_width_msl_prestorm * 100).rename('width_msl_change_pct')
+        "width_msl_change_m"
    )
    df_width_msl_change_pct = (
        df_width_msl_change_m / df_width_msl_prestorm * 100
    ).rename("width_msl_change_pct")
    # Join beach width change onto observed impacts dataframe
    df_observed_impacts = pd.concat(
Author	SHA1	Message	Date
Chris Leaman	3a1d87eaeb	Update notebooks	6 years ago
Chris Leaman	f616f56fd3	Optimize volume change calculations	6 years ago
Chris Leaman	d06a5ab576	Removes TWL forecast using foreshore slope from Makefile Predicting TWL using time-varying foreshore slope is pretty slow, so let's remove it from our data analysis pipeline. The code is still there if we want to call it, but for the time being, let's remove this model when we're looking at comparisons.	6 years ago
Chris Leaman	a293d5d691	Fix incorrect calculation of pre/post change point If pre and post storm profiles returned to similar elevations within the sampled profile length, the change point detection would select the most seaward point and incorrectly calculate the swash zone volumes. This change looks at the slope of the difference between the two profiles and assumes that the difference between the two profiles should be increasing at the location of the change point.	6 years ago
Chris Leaman	c17051788c	Fix formatting	6 years ago
Chris Leaman	34ada7c12f	Add checking of inputs early in slope_from_profile This allows us to exit early and suppresses the annoying warnings.	6 years ago