Update notebooks

Reinstall nbstripout
Optimize calculation of change in volume
7 changed files with 2816 additions and 249 deletions
--- a/environment.yml
+++ b/environment.yml
@ -4,9 +4,11 @@ channels:
 dependencies:
  - python=3.6
  - attrs
-  - pre-commit
+  - pre_commit
  - autopep8
  - black
  - cartopy
  - colorcet
  - click
  - click-plugins
  - colorlover
@ -35,6 +37,7 @@ dependencies:
  - scikit-learn
  - scipy
  - setuptools
  - seaborn
  - shapely
  - statsmodels
  - tqdm
@ -43,3 +46,4 @@ dependencies:
  - pip:
    - blackcellmagic
    - mat4py
    - overpass
--- a/notebooks/.gitattributes
+++ b/notebooks/.gitattributes
@ -0,0 +1,3 @@
 *.ipynb filter=nbstripout
 *.ipynb diff=ipynb
--- a/notebooks/07_evaluate_model_performance.ipynb
+++ b/notebooks/07_evaluate_model_performance.ipynb
@ -94,6 +94,61 @@
    "]  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Matplot lib default settings\n",
    "plt.rcParams[\"figure.figsize\"] = (10, 6)\n",
    "plt.rcParams['axes.grid'] = True\n",
    "plt.rcParams['grid.alpha'] = 0.3\n",
    "plt.rcParams['grid.color'] = \"grey\"\n",
    "plt.rcParams['grid.linestyle'] = \"--\"\n",
    "plt.rcParams['grid.linewidth'] = 0.5\n",
    "plt.rcParams['axes.grid'] = True\n",
    "\n",
    "# # https://stackoverflow.com/a/20709149\n",
    "matplotlib.rcParams['text.usetex'] = True\n",
    "matplotlib.rcParams['font.family'] = 'sans-serif'\n",
    "\n",
    "# # matplotlib.rcParams['text.latex.preamble'] = [\n",
    "# #        r'\\usepackage{siunitx}',   # i need upright \\micro symbols, but you need...\n",
    "# #        r'\\sisetup{detect-all}',   # ...this to force siunitx to actually use your fonts\n",
    "# #        r'\\usepackage{helvet}',    # set the normal font here\n",
    "# #        r'\\usepackage{amsmath}',\n",
    "# #        r'\\usepackage{sansmath}',  # load up the sansmath so that math -> helvet\n",
    "# #        r'\\sansmath',              # <- tricky! -- gotta actually tell tex to use!\n",
    "# # ]\n",
    "\n",
    "matplotlib.rcParams['text.latex.preamble'] = [\n",
    "    r'\\usepackage{siunitx}',   # i need upright \\micro symbols, but you need...\n",
    "    r'\\sisetup{detect-all}',   # ...this to force siunitx to actually use your fonts\n",
    "    r'\\usepackage[default]{sourcesanspro}',\n",
    "    r'\\usepackage{amsmath}',\n",
    "       r'\\usepackage{sansmath}',  # load up the sansmath so that math -> helvet\n",
    "       r'\\sansmath',              # <- tricky! -- gotta actually tell tex to use!\n",
    "]\n",
    "\n",
    "# import matplotlib as mpl\n",
    "# mpl.use(\"pgf\")\n",
    "# pgf_with_custom_preamble = {\n",
    "#     \"font.family\":\"sans-serif\",  # use serif/main font for text elements\n",
    "#     \"text.usetex\":True,  # use inline math for ticks\n",
    "#     \"pgf.rcfonts\":False,  # don't setup fonts from rc parameters\n",
    "#     \"pgf.preamble\": [\n",
    "#         r'\\usepackage{siunitx}',  # i need upright \\micro symbols, but you need...\n",
    "#         r'\\sisetup{detect-all}',  # ...this to force siunitx to actually use your fonts\n",
    "#         r'\\usepackage[default]{sourcesanspro}',\n",
    "#         r'\\usepackage{amsmath}',\n",
    "#         r'\\usepackage[mathrm=sym]{unicode-math}',\n",
    "#         r'\\setmathfont{Fira Math}',\n",
    "#     ]\n",
    "# }\n",
    "# mpl.rcParams.update(pgf_with_custom_preamble)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
@ -599,7 +654,12 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
    "code_folding": [
     23,
     223
    ]
   },
   "outputs": [],
   "source": [
    "# -*- coding: utf-8 -*-\n",
@ -633,7 +693,7 @@
    "    return fig1, ax1\n",
    "#\n",
    "\n",
-    "def configcell_text_and_colors(array_df, lin, col, oText, facecolors, posi, fz, fmt, show_null_values=0):\n",
+    "def configcell_text_and_colors(array_df, lin, col, oText, facecolors, posi, fz, fmt, show_null_values=0, show_pcts=True):\n",
    "    \"\"\"\n",
    "      config cell text and colors\n",
    "      and return text elements to add and to dell\n",
@ -665,48 +725,61 @@
    "        else:\n",
    "            per_ok = per_err = 0\n",
    "\n",
-    "        per_ok_s = ['%.1f%%'%(per_ok), '100%'] [per_ok == 100]\n",
+    "        per_ok_s = ['(\\\\small{{{:.1f}\\%}})'.format(per_ok), '100%'] [per_ok == 100]\n",
-    "\n",
+    "    \n",
    "        #text to DEL\n",
    "        text_del.append(oText)\n",
    "\n",
    "        #text to ADD\n",
    "        font_prop = fm.FontProperties(weight='bold', size=fz)\n",
-    "        text_kwargs = dict(color='w', ha=\"center\", va=\"center\", gid='sum', fontproperties=font_prop)\n",
+    "        text_kwargs = dict(color='k', ha=\"center\", va=\"center\", gid='sum', fontproperties=font_prop)\n",
-    "        lis_txt = ['%d'%(cell_val), per_ok_s, '%.1f%%'%(per_err)]\n",
+    "        lis_txt = ['%d'%(cell_val), per_ok_s, '(\\\\small{{{:.1f}\\%}})'.format(per_err)]\n",
    "        lis_kwa = [text_kwargs]\n",
    "        dic = text_kwargs.copy(); dic['color'] = 'g'; lis_kwa.append(dic);\n",
    "        dic = text_kwargs.copy(); dic['color'] = 'r'; lis_kwa.append(dic);\n",
-    "        lis_pos = [(oText._x, oText._y-0.3), (oText._x, oText._y), (oText._x, oText._y+0.3)]\n",
+    "        \n",
-    "        for i in range(len(lis_txt)):\n",
+    "        if show_pcts:\n",
    "            lis_pos = [(oText._x, oText._y-0.3), (oText._x, oText._y), (oText._x, oText._y+0.3)]\n",
    "        else:\n",
    "            lis_pos = [(oText._x, oText._y)]\n",
    "        \n",
    "        for i in range(len(lis_pos)):\n",
    "            newText = dict(x=lis_pos[i][0], y=lis_pos[i][1], text=lis_txt[i], kw=lis_kwa[i])\n",
    "            #print 'lin: %s, col: %s, newText: %s' %(lin, col, newText)\n",
    "            text_add.append(newText)\n",
-    "        #print '\\n'\n",
+    "            \n",
    "\n",
    "        #set background color for sum cells (last line and last column)\n",
-    "        carr = [0.27, 0.30, 0.27, 1.0]\n",
+    "        carr = [0.9, 0.9, 0.9, 1.0]\n",
    "        if(col == ccl - 1) and (lin == ccl - 1):\n",
-    "            carr = [0.17, 0.20, 0.17, 1.0]\n",
+    "            carr = [0.9, 0.9, 0.9, 1.0]\n",
    "        facecolors[posi] = carr\n",
    "\n",
    "    else:\n",
    "        if(per > 0):\n",
-    "            txt = '%s\\n%.1f%%' %(cell_val, per)\n",
+    "#             txt = '%s\\n%.1f\\%%' %(cell_val, per)\n",
    "            \n",
    "            if show_pcts:\n",
    "                txt = '{}\\n\\\\small{{({:.1f}\\%)}}'.format(cell_val,per)\n",
    "            else:\n",
    "                txt = '{}'.format(cell_val)\n",
    "        else:\n",
    "            if(show_null_values == 0):\n",
    "                txt = ''\n",
    "            elif(show_null_values == 1):\n",
    "                txt = '0'\n",
    "            else:\n",
-    "                txt = '0\\n0.0%'\n",
+    "                if show_pcts:\n",
    "                    txt = '0\\n0.0\\%'\n",
    "                else:\n",
    "                    txt = '0'\n",
    "        oText.set_text(txt)\n",
    "\n",
    "        #main diagonal\n",
    "        if(col == lin):\n",
    "            #set color of the textin the diagonal to white\n",
-    "            oText.set_color('w')\n",
+    "#             oText.set_color('w')\n",
-    "            # set background color in the diagonal to blue\n",
+    "            oText.set_color('k')\n",
    "            # set background color in the diagonal to green\n",
    "            facecolors[posi] = [0.35, 0.8, 0.55, 1.0]\n",
    "        else:\n",
    "            oText.set_color('r')\n",
@ -722,14 +795,13 @@
    "    sum_lin = []\n",
    "    for item_line in df_cm.iterrows():\n",
    "        sum_lin.append( item_line[1].sum() )\n",
-    "    df_cm['sum_lin'] = sum_lin\n",
+    "    df_cm[r'$\\sum$ Row'] = sum_lin\n",
    "    sum_col.append(np.sum(sum_lin))\n",
-    "    df_cm.loc['sum_col'] = sum_col\n",
+    "    df_cm.loc[r'$\\sum$ Col'] = sum_col\n",
-    "    #print ('\\ndf_cm:\\n', df_cm, '\\n\\b\\n')\n",
+    "\n",
    "#\n",
    "\n",
    "def pretty_plot_confusion_matrix(df_cm, annot=True, cmap=\"Oranges\", fmt='.2f', fz=11,\n",
-    "      lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='y'):\n",
+    "      lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='y',title='Confusion matrix',show_pcts=True):\n",
    "    \"\"\"\n",
    "      print conf matrix with default layout (like matlab)\n",
    "      params:\n",
@ -743,11 +815,11 @@
    "                        'lin' or 'y': show predicted values in lines   (y axis)\n",
    "    \"\"\"\n",
    "    if(pred_val_axis in ('col', 'x')):\n",
-    "        xlbl = 'Predicted'\n",
+    "        xlbl = 'Hindcast'\n",
-    "        ylbl = 'Actual'\n",
+    "        ylbl = 'Observed'\n",
    "    else:\n",
-    "        xlbl = 'Actual'\n",
+    "        xlbl = 'Observed'\n",
-    "        ylbl = 'Predicted'\n",
+    "        ylbl = 'Hindcast'\n",
    "        df_cm = df_cm.T\n",
    "\n",
    "    # create \"Total\" column\n",
@ -761,8 +833,8 @@
    "                    cbar=cbar, cmap=cmap, linecolor='w', fmt=fmt)\n",
    "\n",
    "    #set ticklabels rotation\n",
-    "    ax.set_xticklabels(ax.get_xticklabels(), rotation = 45, fontsize = 10)\n",
+    "#     ax.set_xticklabels(ax.get_xticklabels(),fontsize = tick_label_fontsize)\n",
-    "    ax.set_yticklabels(ax.get_yticklabels(), rotation = 25, fontsize = 10)\n",
+    "    ax.set_yticklabels(ax.get_yticklabels(), horizontalalignment='right',verticalalignment='center')\n",
    "\n",
    "    # Turn off all the ticks\n",
    "    for t in ax.xaxis.get_major_ticks():\n",
@ -787,7 +859,7 @@
    "        #print ('>>> pos: %s, posi: %s, val: %s, txt: %s' %(pos, posi, array_df[lin][col], t.get_text()))\n",
    "\n",
    "        #set text\n",
-    "        txt_res = configcell_text_and_colors(array_df, lin, col, t, facecolors, posi, fz, fmt, show_null_values)\n",
+    "        txt_res = configcell_text_and_colors(array_df, lin, col, t, facecolors, posi, fz, fmt, show_null_values, show_pcts)\n",
    "\n",
    "        text_add.extend(txt_res[0])\n",
    "        text_del.extend(txt_res[1])\n",
@ -798,88 +870,19 @@
    "    #append the new ones\n",
    "    for item in text_add:\n",
    "        ax.text(item['x'], item['y'], item['text'], **item['kw'])\n",
-    "\n",
+    "    \n",
    "    #titles and legends\n",
-    "    ax.set_title('Confusion matrix')\n",
+    "    ax.set_title(title)\n",
-    "    ax.set_xlabel(xlbl)\n",
+    "    ax.set_ylabel(r'{} storm regimes'.format(ylbl),fontsize=12)\n",
-    "    ax.set_ylabel(ylbl)\n",
+    "    ax.set_xlabel(r'{} storm regimes'.format(xlbl),fontsize=12)\n",
    "    plt.tight_layout()  #set layout slim\n",
    "    plt.show()\n",
    "    return fig\n",
    "#\n",
    "\n",
    "def plot_confusion_matrix_from_data(y_test, predictions, columns=None, annot=True, cmap=\"Oranges\",\n",
    "      fmt='.2f', fz=11, lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='lin'):\n",
    "    \"\"\"\n",
    "        plot confusion matrix function with y_test (actual values) and predictions (predic),\n",
    "        whitout a confusion matrix yet\n",
    "    \"\"\"\n",
    "    from sklearn.metrics import confusion_matrix\n",
    "    from pandas import DataFrame\n",
    "\n",
    "    #data\n",
    "    if(not columns):\n",
    "        #labels axis integer:\n",
    "        ##columns = range(1, len(np.unique(y_test))+1)\n",
    "        #labels axis string:\n",
    "        from string import ascii_uppercase\n",
    "        columns = ['class %s' %(i) for i in list(ascii_uppercase)[0:len(np.unique(y_test))]]\n",
    "\n",
    "    confm = confusion_matrix(y_test, predictions)\n",
    "    cmap = 'Oranges';\n",
    "    fz = 11;\n",
    "    figsize=[9,9];\n",
    "    show_null_values = 2\n",
    "    df_cm = DataFrame(confm, index=columns, columns=columns)\n",
    "    pretty_plot_confusion_matrix(df_cm, fz=fz, cmap=cmap, figsize=figsize, show_null_values=show_null_values, pred_val_axis=pred_val_axis)\n",
    "#\n",
    "\n",
    "\n",
-    "\n",
+    "#     ax.xaxis.set_label_coords(0.5, -0.1)\n",
-    "#\n",
+    "#     ax.yaxis.set_label_coords(-0.05, 0.5)\n",
-    "#TEST functions\n",
+    "    \n",
-    "#\n",
+    "#     plt.tight_layout()  #set layout slim\n",
-    "def _test_cm():\n",
+    "    \n",
-    "    #test function with confusion matrix done\n",
+    "    return fig\n",
-    "    array = np.array( [[13,  0,  1,  0,  2,  0],\n",
+    "#\n"
    "                       [ 0, 50,  2,  0, 10,  0],\n",
    "                       [ 0, 13, 16,  0,  0,  3],\n",
    "                       [ 0,  0,  0, 13,  1,  0],\n",
    "                       [ 0, 40,  0,  1, 15,  0],\n",
    "                       [ 0,  0,  0,  0,  0, 20]])\n",
    "    #get pandas dataframe\n",
    "    df_cm = DataFrame(array, index=range(1,7), columns=range(1,7))\n",
    "    #colormap: see this and choose your more dear\n",
    "    cmap = 'PuRd'\n",
    "    pretty_plot_confusion_matrix(df_cm, cmap=cmap)\n",
    "#\n",
    "\n",
    "def _test_data_class():\n",
    "    \"\"\" test function with y_test (actual values) and predictions (predic) \"\"\"\n",
    "    #data\n",
    "    y_test = np.array([1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5])\n",
    "    predic = np.array([1,2,4,3,5, 1,2,4,3,5, 1,2,3,4,4, 1,4,3,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,3,3,5, 1,2,3,3,5, 1,2,3,4,4, 1,2,3,4,1, 1,2,3,4,1, 1,2,3,4,1, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,4,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5])\n",
    "    \"\"\"\n",
    "      Examples to validate output (confusion matrix plot)\n",
    "        actual: 5 and prediction 1   >>  3\n",
    "        actual: 2 and prediction 4   >>  1\n",
    "        actual: 3 and prediction 4   >>  10\n",
    "    \"\"\"\n",
    "    columns = []\n",
    "    annot = True;\n",
    "    cmap = 'Oranges';\n",
    "    fmt = '.2f'\n",
    "    lw = 0.5\n",
    "    cbar = False\n",
    "    show_null_values = 2\n",
    "    pred_val_axis = 'y'\n",
    "    #size::\n",
    "    fz = 12;\n",
    "    figsize = [9,9];\n",
    "    if(len(y_test) > 10):\n",
    "        fz=9; figsize=[14,14];\n",
    "    plot_confusion_matrix_from_data(y_test, predic, columns,\n",
    "      annot, cmap, fmt, fz, lw, cbar, figsize, show_null_values, pred_val_axis)"
   ]
  },
  {
@ -888,12 +891,11 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# plot_confusion_matrix_from_data(y_test, predictions, columns=None, annot=True, cmap=\"Oranges\",\n",
+    "## Plot for Coasts & Ports\n",
    "#       fmt='.2f', fz=11, lw=0.5, cbar=False, figsize=[8,8], show_null_values=0, pred_val_axis='lin'):\n",
    "\n",
-    "matplotlib.rcParams['text.usetex'] = False\n",
+    "matplotlib.rcParams['text.usetex'] = True\n",
    "\n",
-    "forecast_model = 'postintertidal_slope_sto06'\n",
+    "forecast_model = 'premean_slope_sto06'\n",
    "\n",
    "df_for = impacts['forecasted'][forecast_model]\n",
    "df_for.storm_regime = df_for.storm_regime.astype(cat_type)\n",
@ -902,13 +904,22 @@
    "\n",
    "\n",
    "confm = confusion_matrix(observed_regimes, forecasted_regimes,labels=[0,1,2,3])\n",
-    "labels=['swash','collision','overwash','inundation']\n",
+    "labels=['Swash','Collision','Overwash','Inundation']\n",
    "df_cm = DataFrame(confm, index=labels, columns=labels)\n",
    "\n",
-    "fig = pretty_plot_confusion_matrix(df_cm, annot=True, cmap=\"Oranges\", fmt='.1f', fz=13,\n",
+    "fig = pretty_plot_confusion_matrix(df_cm, annot=True, cmap=\"OrRd\", fmt='.1f', fz=8,\n",
-    "      lw=0.1, cbar=False, figsize=[8,5], show_null_values=1, pred_val_axis='y')\n",
+    "      lw=1, cbar=False, figsize=[3,1.5], show_null_values=1, pred_val_axis='y', \n",
    "                                   title = r'Storm impact regime '   + r'confusion matrix',show_pcts=False)\n",
    "\n",
    "ax = fig.axes[0]\n",
    "ax.set_ylabel(ax.get_ylabel(),fontsize=8)\n",
    "ax.set_xlabel(ax.get_xlabel(),fontsize=8)\n",
    "ax.set_title(ax.get_title(),fontsize=8)\n",
    "ax.set_yticklabels(ax.get_yticklabels(), fontsize=8)\n",
    "ax.set_xticklabels(ax.get_xticklabels(), fontsize=8)\n",
    "\n",
-    "fig.savefig('11_confusion_matrix',dpi=600)"
+    "fig.savefig('07_c&p_confusion_matrix',dpi=600,bbox_inches = \"tight\")\n",
    "plt.show()"
   ]
  }
 ],
--- a/notebooks/11_investigate.ipynb
+++ b/notebooks/11_investigate.ipynb
@ -65,7 +65,9 @@
    "from scipy.interpolate import interp1d\n",
    "from pandas.api.types import CategoricalDtype\n",
    "import seaborn as sns\n",
-    "sns.set(style=\"white\")"
+    "sns.set(style=\"white\")\n",
    "from scipy import interpolate\n",
    "from tqdm import tqdm"
   ]
  },
  {
@ -401,72 +403,14 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Split swash regime"
+    "# Find correlations between variables"
   ]
  },
  {
-   "cell_type": "code",
+   "cell_type": "markdown",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['beta_Pxscum'] = df.beta_prestorm_mean * df.Ecum\n",
    "df['neg_swash_vol_change'] = -df.swash_vol_change\n",
    "\n",
    "ax = sns.scatterplot(data=df.loc[df.storm_regime=='swash'],\n",
    "                       x=\"beta_Pxscum\", \n",
    "                       y='neg_swash_vol_change')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "corr = df.corr(method ='pearson') \n",
    "sns.heatmap(corr, \n",
    "        xticklabels=corr.columns,\n",
    "        yticklabels=corr.columns,\n",
    "           cmap='RdBu_r')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "len(corr.columns)"
+    "## Create correlogram"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import scipy.stats as ss\n",
    "#https://stackoverflow.com/a/24469099\n",
    "\n",
    "corr = df.corr(method ='pearson') \n",
    "\n",
    "n=len(corr.columns)\n",
    "t=corr*np.sqrt((n-2)/(1-corr*corr))\n",
    "\n",
    "\n",
    "pvals = ss.t.cdf(t, n-2)\n",
    "\n",
    "corr.values"
   ]
  },
  {
@ -541,33 +485,162 @@
    "    ax.set_yticks(ytickslocs)\n",
    "    ax.set_yticklabels(column_labels, fontsize='small')\n",
    "\n",
-    "    return plt\n",
+    "    return plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculate correlation coefficient and p-values\n",
    "# https://stackoverflow.com/a/24469099\n",
    "corr = df.corr(method ='pearson') \n",
    "n=len(corr.columns)\n",
    "t=corr*np.sqrt((n-2)/(1-corr*corr))\n",
    "pvals = stats.t.cdf(t, n-2)\n",
    "\n",
-    "import string\n",
+    "plot = corrplot(corr.values, pvals, corr.columns.tolist())\n",
-    "num_rows = 20\n",
+    "plot.show()"
-    "num_cols = num_rows\n",
+   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create regression plot between two variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy import stats\n",
    "\n",
-    "min_length = 10\n",
+    "# x_col = 'beta_prestorm_intertidal'\n",
-    "max_length = 20\n",
+    "# y_col = \"beta_diff_intertidal\"\n",
    "# data = df.loc[df.storm_regime=='swash']\n",
    "\n",
-    "alnums = list(string.ascii_uppercase + string.digits)\n",
+    "# y_col = 'total_vol_change'\n",
-    "labels = [''.join(np.random.choice(alnums,\n",
+    "# x_col = \"Pxscum\"\n",
-    "                                   np.random.randint(min_length,\n",
+    "# data = df\n",
-    "                                                     max_length)))\n",
+    "\n",
-    "          for y in np.arange(num_rows)]\n",
+    "y_col = 'prestorm_cum_exposed_vol'\n",
    "x_col = \"Exscum\"\n",
    "c_col = 'total_vol_change'\n",
    "data = df\n",
    "\n",
    "slope, intercept, r_value, p_value, std_err = stats.linregress(\n",
    "    data.dropna()[x_col].values,\n",
    "    data.dropna()[y_col].values)\n",
    "\n",
    "fig = plt.figure(\n",
    "    figsize=(6, 4), dpi=150, facecolor='w', edgecolor='k')\n",
    "ax = fig.add_subplot(111)\n",
    "\n",
    "scatter = ax.scatter(\n",
    "    x=data.dropna()[x_col].values,\n",
    "    y=data.dropna()[y_col].values,\n",
    "    c=data.dropna()[c_col].values,\n",
    "    s=1, \n",
    "    vmin=-150, vmax=0,\n",
    ")\n",
    "\n",
    "ax.set_xlabel(x_col)\n",
    "ax.set_ylabel(y_col)\n",
    "ax.set_ylim(0,20000)\n",
    "\n",
-    "data = np.random.random([num_rows, num_cols])\n",
+    "cbar = plt.colorbar(scatter)\n",
    "cbar.set_label(c_col)\n",
    "\n",
-    "# data[np.random.choice(num_rows, num_rows / 2), :] *= -1\n",
+    "ax.grid(True, linestyle=\"--\", alpha=0.2, color='grey', linewidth=1)\n",
    "\n",
-    "np.fill_diagonal(data, 1)\n",
+    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Calculate berm shape index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_profiles\n",
    "df_profile_features_crest_toes\n",
    "\n",
    "berm_shape = []\n",
    "grouped = df_profiles.dropna(subset=['z']).xs('prestorm',level='profile_type').groupby('site_id')\n",
    "for site_id, df_site in tqdm(grouped):\n",
    "    features = df_profile_features_crest_toes.loc[(site_id,'prestorm')]\n",
    "    \n",
    "    # Get x-coordinate at z=0\n",
    "    x_last = df_site.iloc[-1].name[1]\n",
    "    z_last = 0\n",
    "    \n",
    "    # Get coordinates of dune toe\n",
    "    x_first = features.dune_toe_x\n",
    "    z_first = features.dune_toe_z\n",
    "    \n",
    "    # If there is no dune toe, get dune crest\n",
    "    if np.isnan(x_first):\n",
    "        x_first = features.dune_crest_x\n",
    "        z_first = features.dune_crest_z\n",
    "    \n",
    "    # If no dune crest, use nan\n",
    "    if np.isnan(x_first):\n",
    "        berm_shape.append({'site_id': site_id,\n",
    "                     'prestorm_berm_curvature': np.nan})\n",
    "        continue\n",
    "\n",
    "    # Fit straight line between start and end points\n",
    "    segment = (df_site.loc[(df_site.index.get_level_values('x')>=x_first)&\n",
    "                (df_site.index.get_level_values('x')<=x_last)])\n",
    "    x_segment = segment.index.get_level_values('x')\n",
    "    z_segment = segment.z\n",
    "    f = interpolate.interp1d([x_first,x_last],[z_first,z_last])\n",
    "    z_straight = f(x_segment)\n",
    "\n",
    "    area = np.trapz(y=z_straight-z_segment, x=x_segment)\n",
    "    length = x_last-x_first\n",
    "    \n",
    "    normalized_curvature = area\n",
    "#     normalized_curvature = area / length\n",
    "    berm_shape.append({'site_id': site_id,\n",
    "                 'prestorm_berm_curvature': normalized_curvature})\n",
    "\n",
-    "data_symm = (data + data.T) / 2\n",
+    "# Convert to dataframe    \n",
    "df_berm_shape = pd.DataFrame(berm_shape)\n",
    "df_berm_shape = df_berm_shape.set_index('site_id')\n",
    "\n",
-    "# plot = corrplot(data_symm, None, labels)\n",
+    "# Join onto our big dataframe\n",
-    "plot = corrplot(corr.values, pvals, corr.columns.tolist())\n",
+    "df = df.drop(columns=['prestorm_berm_curvature'], errors='ignore')\n",
-    "plot.show()"
+    "df = pd.concat([df, df_berm_shape], axis=1)\n",
    "\n",
    "df_berm_shape.head()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Check wave timeseries\n",
    "How much does wave height vary alongshore between sites?"
   ]
  },
  {
@ -576,22 +649,149 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from scipy import stats\n",
+    "from dateutil.parser import parse\n",
    "sites = ['NARRA0001', 'NARRA0012', 'NARRA0024']\n",
    "\n",
    "fig = plt.figure(\n",
    "    figsize=(6, 4), dpi=150, facecolor='w', edgecolor='k')\n",
    "ax = fig.add_subplot(111)\n",
    "\n",
    "for site_id in sites:\n",
    "    print(site_id)\n",
    "    x = [parse(t) for t in df_waves.xs(site_id,level='site_id').index]\n",
    "    y = df_waves.xs(site_id,level='site_id').Hs\n",
    "    ax.plot(x,y)\n",
    "    \n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Cumulative sum of available prestorm volume?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# At each site, determine relationship between height and available volume\n",
    "data = []\n",
    "site_ids = df_sites.index.values\n",
    "for site_id in site_ids:\n",
    "    df_profile = df_profiles.xs([site_id, 'prestorm'],\n",
    "                                level=['site_id',\n",
    "                                       'profile_type']).dropna(subset=['z'])\n",
    "    x_profile = df_profile.index.get_level_values('x').values\n",
    "    z_profile = df_profile.z.values\n",
    "    \n",
    "    z_vals = np.arange(min(df_profile.z),max(df_profile.z),0.01)\n",
    "    \n",
    "    for z in z_vals:\n",
    "        i_start = np.where((z_profile > z))[0][-1]\n",
    "        x_start = x_profile[i_start]\n",
    "        x_end = x_profile[-1]\n",
    "        mask = (x_start <= x_profile) & (x_profile <= x_end)\n",
    "        vol = np.trapz(z_profile[mask], x=x_profile[mask])\n",
    "        data.append({'site_id': site_id,'z':z,'prestorm_vol':vol})\n",
    "        \n",
    "df_prestorm_vols_by_z = pd.DataFrame(data)\n",
    "df_prestorm_vols_by_z = df_prestorm_vols_by_z.set_index(['site_id','z'])\n",
    "df_prestorm_vols_by_z.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_twl = twls['forecasted']['preintertidal_slope_sto06']\n",
    "df_twl['z'] = df_twl.R_high.round(2)\n",
    "\n",
-    "x_col = 'beta_prestorm_intertidal'\n",
+    "df_twl = df_twl.join(df_prestorm_vols_by_z, on=['site_id','z'])\n",
-    "y_col = \"beta_diff_intertidal\"\n",
+    "df_twl = df_twl.drop(columns=['z'])\n",
-    "# y_col = 'swash_vol_change'\n",
+    "\n",
-    "# x_col = \"Pxscum\"\n",
+    "df_site_cum_exposed_vols = df_twl.groupby('site_id').prestorm_vol.sum().to_frame()\n",
-    "data = df.loc[df.storm_regime=='swash']\n",
+    "df_site_cum_exposed_vols = df_site_cum_exposed_vols.rename({'prestorm_vol':'prestorm_cum_exposed_vol'},axis=1)\n",
    "\n",
    "# # Join onto main dataframe\n",
    "df = df.drop(columns=['prestorm_cum_exposed_vol'], errors='ignore')\n",
    "df = pd.concat([df, df_site_cum_exposed_vols], axis=1)\n",
    "\n",
    "df_site_cum_exposed_vols.head()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# PCA?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import decomposition\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "target_col = 'swash_pct_change'\n",
    "training_cols = ['beta_prestorm_mean','beta_prestorm_intertidal','prestorm_dune_vol','prestorm_swash_vol','width_msl_prestorm','Pxscum','prestorm_berm_curvature','prestorm_cum_exposed_vol']\n",
    "\n",
    "df_pca = df[training_cols+[target_col]].dropna()\n",
    "df_pca_data_only =  df_pca.drop(target_col,axis=1)\n",
    "\n",
-    "slope, intercept, r_value, p_value, std_err = stats.linregress(data[x_col],data[y_col])\n",
+    "# input data\n",
    "X = df_pca_data_only.values\n",
    "X = StandardScaler().fit_transform(X)\n",
    "\n",
-    "ax = sns.regplot(data=data,\n",
+    "# target\n",
-    "                       y=y_col, \n",
+    "y = df_pca[target_col]\n",
-    "                       x=x_col, marker=\"+\",fit_reg=True, scatter_kws={'linewidth':1},\n",
+    "\n",
-    "                line_kws={'label':\"y={0:.2f}x+{1:.2f}\\n(r2={2:.2f})\".format(slope,intercept,r_value**2)})\n",
+    "# pca\n",
-    "ax.legend()\n",
+    "pca = decomposition.PCA(n_components=2)\n",
-    "\n"
+    "pca.fit(X)\n",
    "\n",
    "X = pca.transform(X)\n",
    "\n",
    "\n",
    "fig = plt.figure(\n",
    "    figsize=(6, 4), dpi=150, facecolor='w', edgecolor='k')\n",
    "ax = fig.add_subplot(111)\n",
    "\n",
    "scatter = ax.scatter(\n",
    "    x=X[:,0],\n",
    "    y=X[:,1],\n",
    "    c=y,\n",
    "    s=0.5, \n",
    "    vmin=-1, vmax=0,\n",
    ")\n",
    "\n",
    "# ax.set_xlabel(x_col)\n",
    "# ax.set_ylabel(y_col)\n",
    "# ax.set_ylim(0,20000)\n",
    "\n",
    "cbar = plt.colorbar(scatter)\n",
    "# cbar.set_label(c_col)\n",
    "\n",
    "# ax.grid(True, linestyle=\"--\", alpha=0.2, color='grey', linewidth=1)\n",
    "\n",
    "plt.show()\n"
   ]
  },
  {
@ -600,8 +800,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# df.loc[(df.storm_regime=='swash')&(df.swash_vol_change<10)].sort_values(by=['Pxscum'],ascending=False)\n",
+    "df_pca_dims = pd.DataFrame(pca.components_, columns=list(df_pca_data_only.columns))\n",
-    "df.loc[(df.swash_vol_change>200)].sort_values(by=['Pxscum'],ascending=False)"
+    "\n",
    "df_pca_dims.iloc[0]\n",
    "# pca.explained_variance_ratio_"
   ]
  }
 ],
--- a/notebooks/12_method_plots.ipynb
+++ b/notebooks/12_method_plots.ipynb
--- a/notebooks/13_locality_plot.ipynb
+++ b/notebooks/13_locality_plot.ipynb
@ -0,0 +1,582 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "import shapely.geometry as sgeom\n",
    "from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER\n",
    "import cartopy.feature \n",
    "import cartopy.crs as ccrs\n",
    "\n",
    "import matplotlib.lines as mlines"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Matplot lib default settings\n",
    "plt.rcParams[\"figure.figsize\"] = (10, 6)\n",
    "plt.rcParams['axes.grid'] = True\n",
    "plt.rcParams['grid.alpha'] = 0.3\n",
    "plt.rcParams['grid.color'] = \"grey\"\n",
    "plt.rcParams['grid.linestyle'] = \"--\"\n",
    "plt.rcParams['grid.linewidth'] = 0.5\n",
    "plt.rcParams['axes.grid'] = True\n",
    "\n",
    "# # https://stackoverflow.com/a/20709149\n",
    "matplotlib.rcParams['text.usetex'] = True\n",
    "matplotlib.rcParams['font.family'] = 'sans-serif'\n",
    "\n",
    "matplotlib.rcParams['text.latex.preamble'] = [\n",
    "    r'\\usepackage{siunitx}',   # i need upright \\micro symbols, but you need...\n",
    "    r'\\sisetup{detect-all}',   # ...this to force siunitx to actually use your fonts\n",
    "    r'\\usepackage[default]{sourcesanspro}',\n",
    "    r'\\usepackage{amsmath}',\n",
    "       r'\\usepackage{sansmath}',  # load up the sansmath so that math -> helvet\n",
    "       r'\\sansmath',              # <- tricky! -- gotta actually tell tex to use!\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def blank_axes(ax):\n",
    "    \"\"\"\n",
    "    blank_axes:  blank the extraneous spines and tick marks for an axes\n",
    "\n",
    "    Input:\n",
    "    ax:  a matplotlib Axes object\n",
    "\n",
    "    Output: None\n",
    "    \"\"\"\n",
    "\n",
    "\n",
    "    ax.spines['right'].set_visible(False)\n",
    "    ax.spines['top'].set_visible(False)\n",
    "    ax.spines['bottom'].set_visible(False)\n",
    "    ax.spines['left'].set_visible(False)\n",
    "    ax.yaxis.set_ticks_position('none')\n",
    "    ax.xaxis.set_ticks_position('none')\n",
    "    ax.tick_params(labelbottom='off', labeltop='off', labelleft='off', labelright='off' ,\\\n",
    "                    bottom='off', top='off', left='off', right='off' )\n",
    "#end blank_axes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "code_folding": [
     66,
     268,
     296
    ]
   },
   "outputs": [],
   "source": [
    "# Define figure and axes\n",
    "fig, ax1 = plt.subplots(figsize=(5, 5),\n",
    "                        subplot_kw=dict(projection=ccrs.PlateCarree()))\n",
    "\n",
    "# Inset axes showing overall Australia plot\n",
    "ax2 = fig.add_axes([0.14, 0.58, 0.20, 0.15], projection=ccrs.PlateCarree())\n",
    "\n",
    "# Define extents to our axes\n",
    "ax1_extent = [146, 154, -35, -30]\n",
    "ax2_extent = [110, 157, -42, -7]\n",
    "ax1.set_extent(ax1_extent)\n",
    "ax2.set_extent(ax2_extent)\n",
    "\n",
    "# Add gridlines to ax1\n",
    "gl = ax1.gridlines(draw_labels=True, linestyle='--', zorder=2, alpha=0.5)\n",
    "gl.xlabels_top = gl.ylabels_right = False\n",
    "gl.xformatter = LONGITUDE_FORMATTER\n",
    "gl.yformatter = LATITUDE_FORMATTER\n",
    "\n",
    "# Define features we want to plot\n",
    "feat_rivers = cartopy.feature.NaturalEarthFeature(\n",
    "    'physical',\n",
    "    'rivers_lake_centerlines',\n",
    "    '10m',\n",
    "    edgecolor=cartopy.feature.COLORS['water'],\n",
    "    facecolor='none')\n",
    "\n",
    "feat_oceans = cartopy.feature.NaturalEarthFeature(\n",
    "    'physical', 'ocean', '10m', facecolor=cartopy.feature.COLORS['water'])\n",
    "\n",
    "feat_borders = cartopy.feature.NaturalEarthFeature(\n",
    "    'cultural',\n",
    "    'admin_1_states_provinces',\n",
    "    '10m',\n",
    "    edgecolor='black',\n",
    "    facecolor=cartopy.feature.COLORS['land'],\n",
    "    linewidth=0.5)\n",
    "\n",
    "# Add features to our plots\n",
    "ax1.add_feature(feat_rivers)\n",
    "ax1.add_feature(feat_oceans)\n",
    "ax1.add_feature(feat_borders)\n",
    "ax2.add_feature(feat_oceans)\n",
    "ax2.add_feature(feat_borders)\n",
    "\n",
    "# Plot location box on ax2\n",
    "ax1_extent_box = sgeom.box(ax1_extent[0], ax1_extent[2], ax1_extent[1],\n",
    "                           ax1_extent[3])\n",
    "ax2.add_geometries([ax1_extent_box],\n",
    "                   ccrs.PlateCarree(),\n",
    "                   color='none',\n",
    "                   edgecolor='r',\n",
    "                   linewidth=2)\n",
    "\n",
    "# Define marker properties\n",
    "marker_edge_width = 1.3\n",
    "marker_edge_color = '#ffffff'\n",
    "wave_buoy_color = 'red'\n",
    "wave_buoy_marker = 'o'\n",
    "tide_gauge_color = 'blue'\n",
    "tide_gauge_marker = 's'\n",
    "beach_color = 'green'\n",
    "beach_marker = '^'\n",
    "\n",
    "# Plot beaches\n",
    "# df_sites.groupby('beach').mean()[['lat','lon']].to_dict('index')\n",
    "beaches = {\n",
    "    'AVOCAn': {\n",
    "        'lat': -33.460695367777774,\n",
    "        'lon': 151.43853769000003\n",
    "    },\n",
    "    'AVOCAs': {\n",
    "        'lat': -33.467647595,\n",
    "        'lon': 151.43574445875\n",
    "    },\n",
    "    'BILG': {\n",
    "        'lat': -33.645234478,\n",
    "        'lon': 151.328779182\n",
    "    },\n",
    "    'BLUEYS': {\n",
    "        'lat': -32.35377103,\n",
    "        'lon': 152.53584677666666\n",
    "    },\n",
    "    'BOAT': {\n",
    "        'lat': -32.43502469599999,\n",
    "        'lon': 152.530818656\n",
    "    },\n",
    "    'BOOM': {\n",
    "        'lat': -32.34039573142857,\n",
    "        'lon': 152.54337415\n",
    "    },\n",
    "    'CATHIE': {\n",
    "        'lat': -31.57630510275862,\n",
    "        'lon': 152.8433463127586\n",
    "    },\n",
    "    'CRESn': {\n",
    "        'lat': -31.12568202392001,\n",
    "        'lon': 153.00734157120007\n",
    "    },\n",
    "    'CRESs': {\n",
    "        'lat': -31.180938470000008,\n",
    "        'lon': 152.97574073\n",
    "    },\n",
    "    'DEEWHYn': {\n",
    "        'lat': -33.745759471666666,\n",
    "        'lon': 151.3055993875\n",
    "    },\n",
    "    'DEEWHYs': {\n",
    "        'lat': -33.751954194999996,\n",
    "        'lon': 151.29818175499997\n",
    "    },\n",
    "    'DIAMONDn': {\n",
    "        'lat': -32.026216662195125,\n",
    "        'lon': 152.55036803634147\n",
    "    },\n",
    "    'DIAMONDs': {\n",
    "        'lat': -32.046040624285716,\n",
    "        'lon': 152.54134085\n",
    "    },\n",
    "    'DUNBn': {\n",
    "        'lat': -31.674815349864858,\n",
    "        'lon': 152.81198585391894\n",
    "    },\n",
    "    'DUNBs': {\n",
    "        'lat': -31.710181410909083,\n",
    "        'lon': 152.79323301090912\n",
    "    },\n",
    "    'ELIZA': {\n",
    "        'lat': -32.3298006057143,\n",
    "        'lon': 152.53714101142856\n",
    "    },\n",
    "    'ENTRA': {\n",
    "        'lat': -33.31609181329114,\n",
    "        'lon': 151.5278903848101\n",
    "    },\n",
    "    'FOST': {\n",
    "        'lat': -32.17670982666667,\n",
    "        'lon': 152.51195243333333\n",
    "    },\n",
    "    'GRANTSn': {\n",
    "        'lat': -31.613473751666664,\n",
    "        'lon': 152.8381070795833\n",
    "    },\n",
    "    'GRANTSs': {\n",
    "        'lat': -31.63005646785714,\n",
    "        'lon': 152.83392283714286\n",
    "    },\n",
    "    'HARGn': {\n",
    "        'lat': -33.25858048428571,\n",
    "        'lon': 151.56334493285718\n",
    "    },\n",
    "    'HARGs': {\n",
    "        'lat': -33.26487224142857,\n",
    "        'lon': 151.5624840085714\n",
    "    },\n",
    "    'HARR': {\n",
    "        'lat': -31.859077996607144,\n",
    "        'lon': 152.72314068214285\n",
    "    },\n",
    "    'LHOUSE': {\n",
    "        'lat': -32.443838815384616,\n",
    "        'lon': 152.52969125769232\n",
    "    },\n",
    "    'LHOUSEn': {\n",
    "        'lat': -31.506830332043016,\n",
    "        'lon': 152.900197138172\n",
    "    },\n",
    "    'LHOUSEs': {\n",
    "        'lat': -31.55095255875001,\n",
    "        'lon': 152.85847451375002\n",
    "    },\n",
    "    'MACM': {\n",
    "        'lat': -33.494884234375,\n",
    "        'lon': 151.42840894187498\n",
    "    },\n",
    "    'MANNING': {\n",
    "        'lat': -31.922794031338576,\n",
    "        'lon': 152.63626414188988\n",
    "    },\n",
    "    'MONA': {\n",
    "        'lat': -33.68342594,\n",
    "        'lon': 151.31180166238096\n",
    "    },\n",
    "    'NAMB': {\n",
    "        'lat': -30.702570222054792,\n",
    "        'lon': 152.99174024657532\n",
    "    },\n",
    "    'NARRA': {\n",
    "        'lat': -33.71824857833333,\n",
    "        'lon': 151.30161430805555\n",
    "    },\n",
    "    'NINEMn': {\n",
    "        'lat': -32.098527227407416,\n",
    "        'lon': 152.5245430024074\n",
    "    },\n",
    "    'NINEMs': {\n",
    "        'lat': -32.146616644,\n",
    "        'lon': 152.50721414266667\n",
    "    },\n",
    "    'NSHORE_n': {\n",
    "        'lat': -31.35297012609755,\n",
    "        'lon': 152.94414099536587\n",
    "    },\n",
    "    'NSHORE_s': {\n",
    "        'lat': -31.4042148925,\n",
    "        'lon': 152.91674769522717\n",
    "    },\n",
    "    'OLDBAR': {\n",
    "        'lat': -31.981825014722215,\n",
    "        'lon': 152.58157028555553\n",
    "    },\n",
    "    'ONEMILE': {\n",
    "        'lat': -32.19014868,\n",
    "        'lon': 152.53698099153846\n",
    "    },\n",
    "    'PEARLn': {\n",
    "        'lat': -33.5394179,\n",
    "        'lon': 151.310494964\n",
    "    },\n",
    "    'PEARLs': {\n",
    "        'lat': -33.543258066,\n",
    "        'lon': 151.30794061\n",
    "    },\n",
    "    'SCOT': {\n",
    "        'lat': -30.740275808333333,\n",
    "        'lon': 152.99018976333335\n",
    "    },\n",
    "    'STOCNn': {\n",
    "        'lat': -32.78820750815384,\n",
    "        'lon': 152.0395944421538\n",
    "    },\n",
    "    'STOCNs': {\n",
    "        'lat': -32.833099094162684,\n",
    "        'lon': 151.9039352245933\n",
    "    },\n",
    "    'STOCS': {\n",
    "        'lat': -32.8965449047826,\n",
    "        'lon': 151.79411199869566\n",
    "    },\n",
    "    'STUART': {\n",
    "        'lat': -30.835545341910105,\n",
    "        'lon': 153.00643798999994\n",
    "    },\n",
    "    'SWRO': {\n",
    "        'lat': -30.885526112307694,\n",
    "        'lon': 153.05837861230768\n",
    "    },\n",
    "    'TREACH': {\n",
    "        'lat': -32.454167825000006,\n",
    "        'lon': 152.508508009375\n",
    "    },\n",
    "    'WAMBE': {\n",
    "        'lat': -33.43660858444444,\n",
    "        'lon': 151.445516972963\n",
    "    }\n",
    "}\n",
    "\n",
    "for beach in beaches:\n",
    "    ax1.plot(beaches[beach]['lon'],\n",
    "             beaches[beach]['lat'],\n",
    "             color=beach_color,\n",
    "             marker=beach_marker,\n",
    "             markeredgewidth=marker_edge_width-0.5,\n",
    "             markeredgecolor='#000000',\n",
    "             transform=ccrs.Geodetic())\n",
    "\n",
    "\n",
    "# Add wave buoys\n",
    "wave_buoys = [\n",
    "    {\n",
    "        'name': 'Sydney',\n",
    "        'lat': -33.77166667,\n",
    "        'lon': 151.40861111\n",
    "    },\n",
    "    {\n",
    "        'name': 'Crowdy Head',\n",
    "        'lat': -31.81388889,\n",
    "        'lon': 152.85611111\n",
    "    },\n",
    "    {\n",
    "        'name': 'Coffs Harbour',\n",
    "        'lat': -30.36250000,\n",
    "        'lon': 153.26916667\n",
    "    },\n",
    "]\n",
    "\n",
    "for wave_buoy in wave_buoys:\n",
    "    ax1.plot(wave_buoy['lon'],\n",
    "             wave_buoy['lat'],\n",
    "             color=wave_buoy_color,\n",
    "             marker=wave_buoy_marker,\n",
    "             markeredgewidth=marker_edge_width,\n",
    "             markeredgecolor=marker_edge_color,\n",
    "             transform=ccrs.Geodetic())\n",
    "\n",
    "# Add tide gauges\n",
    "tide_gauges = [\n",
    "    {\n",
    "        'name': 'HMAS Penguin',\n",
    "        'lat': -33.82546,\n",
    "        'lon': 151.25853\n",
    "    },\n",
    "    {\n",
    "        'name': 'Patonga',\n",
    "        'lat': -33.55098,\n",
    "        'lon': 151.27461\n",
    "    },\n",
    "    {\n",
    "        'name': 'Shoal Bay',\n",
    "        'lat': -32.71967,\n",
    "        'lon': 152.17565\n",
    "    },\n",
    "    {\n",
    "        'name': 'Forster',\n",
    "        'lat': -32.17398,\n",
    "        'lon': 152.50820\n",
    "    },\n",
    "    {\n",
    "        'name': 'Crowdy Head',\n",
    "        'lat': -31.83870,\n",
    "        'lon': 152.75001\n",
    "    },\n",
    "    {\n",
    "        'name': 'Port Macquarie',\n",
    "        'lat': -31.42682,\n",
    "        'lon': 152.91112\n",
    "    },\n",
    "    {\n",
    "        'name': 'Coffs Harbour',\n",
    "        'lat': -30.30286,\n",
    "        'lon': 153.14614\n",
    "    },\n",
    "]\n",
    "\n",
    "for tide_gauge in tide_gauges:\n",
    "    ax1.plot(tide_gauge['lon'],\n",
    "             tide_gauge['lat'],\n",
    "             color=tide_gauge_color,\n",
    "             marker=tide_gauge_marker,\n",
    "             markeredgewidth=marker_edge_width,\n",
    "             markeredgecolor=marker_edge_color,\n",
    "             transform=ccrs.Geodetic())\n",
    "\n",
    "\n",
    "\n",
    "# Prepare legend\n",
    "legend_buoy = mlines.Line2D([], [],\n",
    "                            color=wave_buoy_color,\n",
    "                            marker=wave_buoy_marker,\n",
    "                            markersize=5,\n",
    "                            linestyle=\"None\",\n",
    "                            markeredgewidth=marker_edge_width,\n",
    "                            markeredgecolor=marker_edge_color,\n",
    "                            label='Wave buoys')\n",
    "\n",
    "legend_gauge = mlines.Line2D([], [],\n",
    "                             color=tide_gauge_color,\n",
    "                             marker=tide_gauge_marker,\n",
    "                             markersize=5,\n",
    "                             linestyle=\"None\",\n",
    "                             markeredgewidth=marker_edge_width,\n",
    "                             markeredgecolor=marker_edge_color,\n",
    "                             label='Tide gauges')\n",
    "\n",
    "legend_beaches = mlines.Line2D([], [],\n",
    "                             color=beach_color,\n",
    "                             marker=beach_marker,\n",
    "                             markersize=5,\n",
    "                             linestyle=\"None\",\n",
    "                             markeredgewidth=marker_edge_width-0.5,\n",
    "                             markeredgecolor='#000000',\n",
    "                             label='Beaches included')\n",
    "\n",
    "handles = [legend_buoy, legend_gauge, legend_beaches]\n",
    "names = ['Wave buoys', 'Tide gauges', 'Beaches']\n",
    "\n",
    "# create legend\n",
    "ax1.legend(handles, names, title=r'\\underline{Legend}', loc='lower left')\n",
    "\n",
    "# Add landmarks\n",
    "ax1.text(151.204325-0.1, -33.869810, r'\\textsc{Sydney}', transform=ccrs.Geodetic(),ha='right',zorder=4)\n",
    "ax1.text(151.784937-0.1, -32.928103, r'\\textsc{Newcastle}', transform=ccrs.Geodetic(),ha='right',zorder=4)\n",
    "ax1.text(152.909329-0.1, -31.440207, r'\\textsc{Port Macquarie}', transform=ccrs.Geodetic(),ha='right',zorder=4)\n",
    "ax1.text(153.111704-0.1, -30.300466, r'\\textsc{Coffs Harbour}', transform=ccrs.Geodetic(),ha='right',zorder=4)\n",
    "ax1.text(150.891708-0.1, -34.433129, r'\\textsc{Wollongong}', transform=ccrs.Geodetic(),ha='right',zorder=4)\n",
    "\n",
    "ax2.text(133.729975, -25.173095, r'\\textsc{Australia}', transform=ccrs.Geodetic(),ha='center',zorder=4,va='bottom', fontsize=6, bbox=dict(facecolor=cartopy.feature.COLORS['land'],pad=0.1,linewidth=0, alpha=0.9))\n",
    "\n",
    "# # Add inset for Narrabeen\n",
    "# ax3 = fig.add_axes([0.7, 0.28, 0.2, 0.3], projection=ccrs.PlateCarree())\n",
    "# ax3_extent = [151.296915, 151.316252, -33.739274, -33.702466]\n",
    "# # ax3_extent = [151.296915, 151.32, -33.739274, -33.68]\n",
    "# ax3.set_extent(ax3_extent)\n",
    "# # ax3.add_feature(feat_oceans)\n",
    "# # ax3.add_feature(feat_borders)\n",
    "\n",
    "\n",
    "fig.savefig('07_c&p_locality.png',dpi=600,bbox_inches = \"tight\", pad_inches=0.01)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Try using overpass api\n",
    "# Hard because coastline is given as line string, not shapefile.\n",
    "\n",
    "import overpass\n",
    "api = overpass.API()\n",
    "response = api.get('way[\"natural\"=\"coastline\"](-34, 151.0,  -33, 152);out geom;')\n",
    "coords = [x['geometry']['coordinates'] for x in response['features']]\n",
    "\n",
    "\n",
    "for line in coords:\n",
    "    lats = [x[1] for x in line]\n",
    "    lons = [x[0] for x in line]\n",
    "    ax3.plot(lons,lats, transform=ccrs.Geodetic())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###"
   ]
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/src/analysis/observed_storm_impacts.py
+++ b/src/analysis/observed_storm_impacts.py
@ -5,6 +5,7 @@ from scipy.integrate import simps
 from scipy.signal import savgol_filter
 from scipy.interpolate import interp1d
 from numpy import ma as ma
 from itertools import groupby
 from tqdm import tqdm
 from logs import setup_logging
@ -56,59 +57,154 @@ def volume_change(df_profiles, df_profile_features):
        params = {}
        # Calculate pre and post storm volume seawards of our profile change point
        # and store in dictionary and dataframe.
        df_prestorm = df_profiles.loc[(site_id, "prestorm")]
        df_poststorm = df_profiles.loc[(site_id, "poststorm")]
        # Calculate total subaerial volume change
        # Calculate difference between pre and post storm profiles
        z_diff = (
            df_profiles.loc[(site_id, "poststorm")].z
            - df_profiles.loc[(site_id, "prestorm")].z
        )
        # There are no common points between pre and poststorm values, so we have to
        # skip this
        if z_diff.dropna().size == 0:
            continue
        # # # Debug
        # import matplotlib.pyplot as plt
        # plt.plot(z_diff)
        # plt.plot(df_profiles.loc[(site_id, "prestorm")].z)
        # plt.plot(df_profiles.loc[(site_id, "poststorm")].z)
        # plt.show()
        # First, find locations where we have a good match between pre and post storm
        # profiles.
        lidar_accuracy = 0.2  # m
        good_match = start_stop(
            (abs(z_diff) < lidar_accuracy).values, trigger_val=True, len_thresh=20
        )
        # If entire profile has changed (usually at lagoon entrance), take change
        # point as the first x-coord where we have both pre and poststorm profiles
        if good_match.size == 0:
            x_change_point = min(
                set(df_prestorm.z.dropna().index.get_level_values("x"))
                & set(df_poststorm.z.dropna().index.get_level_values("x"))
            )
            z_change_point = df_prestorm.loc[x_change_point].z
        else:
            # Minimum idx_change_points should be the first place where we have a good match
            idx_change_point_min = good_match[0][0]
            # Identify locations where z_diff is negative, i.e. profile has been
            # eroded by the storm. Then group them by the number of consecutive
            # values.
            grouped = start_stop((z_diff < 0).values, trigger_val=True, len_thresh=1)
            # Sort by streaklength, then get the start index of the longest streak
            # of true values. x_change_point is then the x-coordinate where our pre and
            # post storm profiles start to change.
            idx_change_points = sorted(
                [x for x in grouped if x[0] > idx_change_point_min],
                key=lambda x: x[1] - x[0],
                reverse=True,
            )
            if len(idx_change_points) == 0:
                continue
            else:
                idx_change_point = idx_change_points[0][0]
                x_change_point = z_diff.index[idx_change_point]
                z_change_point = df_prestorm.loc[x_change_point].z
        # Landward of the change point, set difference in pre/post storm profiles
        # equal to zero
        z_diff[z_diff.index < x_change_point] = 0
        params["prestorm_total_subaerial_vol"] = beach_volume(
            x=df_prestorm.index.get_level_values("x"),
            z=df_prestorm.z.values,
            x_min=x_change_point,
        )
        params["poststorm_total_subaerial_vol"] = beach_volume(
            x=df_poststorm.index.get_level_values("x"),
            z=df_poststorm.z.values,
            x_min=x_change_point,
        )
        params["total_vol_change"] = (
            params["poststorm_total_subaerial_vol"]
            - params["prestorm_total_subaerial_vol"]
        )
        params["x_change_point"] = x_change_point
        params["z_change_point"] = z_change_point
        df_vol_changes.loc[site_id, "total_vol_change"] = params["total_vol_change"]
        df_vol_changes.loc[site_id, "x_change_point"] = params["x_change_point"]
        df_vol_changes.loc[site_id, "z_change_point"] = params["z_change_point"]
        for zone in ["dune", "swash"]:
            params[zone] = {}
            for profile_type in ["prestorm", "poststorm"]:
-                params[zone][profile_type] = {}
+
                # Store zone/profile_type results in a dictionary, then append at the
                # end to the params dictionary.
                d = {}
                # Get variables, this helps simplify the below code
                df_profile_feature = df_profile_features.loc[(site_id, profile_type)]
                df_profile = df_profiles.loc[(site_id, profile_type)]
                # Define the edges of the swash and dunes where we want to calculate subaeraial volume.
                if zone == "swash":
-                    params[zone][profile_type]["x_min"] = df_profile_features.loc[
+                    d["x_min"] = df_profile_feature.dune_toe_x
-                        (site_id, profile_type)
+                    d["x_max"] = max(df_profile.index.get_level_values("x"))
                    ].dune_toe_x
                    params[zone][profile_type]["x_max"] = max(
                        df_profiles.loc[(site_id, profile_type)].index.get_level_values(
                            "x"
                        )
                    )
                    # For profiles with no Dlow value, we take Dhigh as the minimum value to calculate swash
-                    if np.isnan(params[zone][profile_type]["x_min"]):
+                    if np.isnan(d["x_min"]):
-                        params[zone][profile_type]["x_min"] = df_profile_features.loc[
+                        d["x_min"] = df_profile_feature.dune_crest_x
                            (site_id, profile_type)
                        ].dune_crest_x
                elif zone == "dune":
-                    params[zone][profile_type]["x_min"] = df_profile_features.loc[
+                    d["x_min"] = df_profile_feature.dune_crest_x
-                        (site_id, profile_type)
+                    d["x_max"] = df_profile_feature.dune_toe_x
                    ].dune_crest_x
                    params[zone][profile_type]["x_max"] = df_profile_features.loc[
                        (site_id, profile_type)
                    ].dune_toe_x
                # For profiles with no Dlow value, the dune is undefined and we cannot calculate a dune volume.
                # Calculate subaerial volume based on our x min and maxes
-                params[zone][profile_type]["subaerial_vol"] = beach_volume(
+                d["subaerial_vol"] = beach_volume(
-                    x=df_profiles.loc[(site_id, profile_type)].index.get_level_values(
+                    x=df_profile.index.get_level_values("x"),
-                        "x"
+                    z=df_profile.z.values,
-                    ),
+                    x_min=d["x_min"],
-                    z=df_profiles.loc[(site_id, profile_type)].z.values,
+                    x_max=d["x_max"],
                    x_min=params[zone][profile_type]["x_min"],
                    x_max=params[zone][profile_type]["x_max"],
                )
-            # TODO Landward limit still needs to be incorporated
+                params[zone][profile_type] = d
-            params[zone]["vol_change"] = (
+            # Calculate change in volumes. Use the z_diff array which has been
-                params[zone]["poststorm"]["subaerial_vol"]
+            # zero'ed out landward of the x_change_point
-                - params[zone]["prestorm"]["subaerial_vol"]
+            params[zone]["vol_change"] = beach_volume(
                x=z_diff.index.values,
                z=z_diff.values,
                x_min=params[zone]["prestorm"]["x_min"],
                x_max=params[zone]["prestorm"]["x_max"],
            )
            params[zone]["pct_change"] = (
                params[zone]["vol_change"] / params[zone]["prestorm"]["subaerial_vol"]
            )
            # params[zone]["vol_loss"] = (params[zone]["prestorm"]["subaerial_vol"] -
            #                             params[zone]["poststorm"]["subaerial_vol"])
            # params[zone]["pct_loss"] = \
            #     params[zone]["vol_loss"] / params[zone]["prestorm"]["subaerial_vol"]
            # Save results in our data frame
            df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = params[zone][
                "prestorm"
            ]["subaerial_vol"]
@ -121,7 +217,6 @@ def volume_change(df_profiles, df_profile_features):
            df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = params[zone][
                "pct_change"
            ]
    return df_vol_changes
@ -153,12 +248,8 @@ def storm_regime(df_observed_impacts):
    """
    logger.info("Getting observed storm regimes")
-    swash = (df_observed_impacts.dune_pct_change <= 2) & (
+    swash = df_observed_impacts.dune_vol_change > -3
-        df_observed_impacts.dune_vol_change <= 3
+    collision = df_observed_impacts.dune_vol_change < -3
    )
    collision = (df_observed_impacts.dune_pct_change >= 2) | (
        df_observed_impacts.dune_vol_change > 3
    )
    df_observed_impacts.loc[swash, "storm_regime"] = "swash"
    df_observed_impacts.loc[collision, "storm_regime"] = "collision"
@ -344,3 +435,32 @@ def get_beach_width(df_profile_features, df_profiles, profile_type, ele, col_nam
    ).dune_toe_x
    df_width = (df_x_position - df_x_prestorm_dune_toe).rename(col_name)
    return df_width
 def start_stop(a, trigger_val, len_thresh=2):
    """
    https://stackoverflow.com/a/51259253
    In [47]: myArray
    Out[47]: array([1, 1, 0, 2, 0, 1, 1, 1, 1, 0, 0, 1, 2, 1, 1, 1])
    In [48]: start_stop(myArray, trigger_val=1, len_thresh=2)
    Out[48]:
    array([[ 5,  8],
            [13, 15]])
    :param a:
    :param trigger_val:
    :param len_thresh:
    :return:
    """
    # "Enclose" mask with sentients to catch shifts later on
    mask = np.r_[False, np.equal(a, trigger_val), False]
    # Get the shifting indices
    idx = np.flatnonzero(mask[1:] != mask[:-1])
    # Get lengths
    lens = idx[1::2] - idx[::2]
    return idx.reshape(-1, 2)[lens > len_thresh] - [0, 1]
Author	SHA1	Message	Date
Chris Leaman	5776111fdb	Update notebooks	6 years ago
Chris Leaman	6925424661	Reinstall nbstripout	6 years ago
Chris Leaman	965dcf2ec0	Optimize calculation of change in volume We now calculate the where the pre-storm and post-storm profiles change a bit more robustly.	6 years ago
Chris Leaman	f0f77f5505	Update packages	6 years ago
		`@ -0,0 +1,3 @@`
							`*.ipynb filter=nbstripout`

							`*.ipynb diff=ipynb`