nsw-2016-storm-impact/notebooks/04a_profile_picker_supersed...

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "import numpy.ma as ma\n",
    "\n",
    "import numpy\n",
    "from pyearth import Earth\n",
    "from matplotlib import pyplot\n",
    "\n",
    "np.random.seed(2017)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
    "    print('Importing {}'.format(csv))\n",
    "    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
    "\n",
    "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Try using pyearth"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "code_folding": [
     5,
     20,
     31,
     40
    ]
   },
   "outputs": [],
   "source": [
    "from scipy.signal import savgol_filter\n",
    "import re\n",
    "from scipy.stats import linregress\n",
    "import warnings\n",
    "warnings.simplefilter(action='ignore', category=FutureWarning)\n",
    "\n",
    "def get_breakpoints(model, min_distance=20):\n",
    "    # Get breakpoints\n",
    "    breakpoints = []\n",
    "    for line in model.summary().split('\\n'):\n",
    "        # Get unpruned lines\n",
    "        if 'No' in line and 'None' not in line:\n",
    "            # Get break points\n",
    "            m = re.search(\"h\\(x0-(\\d*\\.?\\d+)\\)\", line)\n",
    "            if m:\n",
    "                breakpoints.append(float(m.groups()[0]))\n",
    "            m = re.search(\"h\\((\\d*\\.?\\d+)-x0\\)\", line)\n",
    "            if m:\n",
    "                breakpoints.append(float(m.groups()[0]))\n",
    "    return sorted(list(set(breakpoints)))\n",
    "    \n",
    "def get_segments(breakpoints, x_min, x_max):\n",
    "    segments = []\n",
    "    breakpoints = [x_min] + breakpoints + [x_max]\n",
    "\n",
    "    for x1, x2 in zip(breakpoints, breakpoints[1:]):\n",
    "        segments.append({\n",
    "            'x_start': x1,\n",
    "            'x_end': x2\n",
    "        })\n",
    "    return segments            \n",
    "\n",
    "def get_segment_slopes(segments, x, z):\n",
    "    for segment in segments:\n",
    "        mask = ma.masked_where((segment['x_start'] < x) & (x < segment['x_end']),x ).mask\n",
    "        segment['z_mean'] = np.mean(z[mask])\n",
    "        segment['z_start'] = np.mean(z[mask][0])\n",
    "        segment['z_end'] = np.mean(z[mask][-1])\n",
    "        segment['slope'] = -linregress(x[mask], z[mask]).slope\n",
    "    return segments\n",
    "    \n",
    "def classify_segments(segments, x,z):\n",
    "    \n",
    "    # Most seaward slope must be foreshore\n",
    "    segments[-1]['type'] = 'foreshore'\n",
    "    \n",
    "    # Most landward slope must be land\n",
    "    segments[0]['type'] = 'land'\n",
    "    \n",
    "    # Segments with really high slopes must be structures\n",
    "    for seg in segments:\n",
    "        if seg['slope'] > 2.0:\n",
    "            seg['type'] = 'structure'\n",
    "    \n",
    "    # Segments with large change of slope and \n",
    "    # Segment with max slope should be dune face\n",
    "#     dune_face_idx = [n for n, seg in enumerate(segments) if seg['slope']==max(x['slope'] for x in segments)][0]\n",
    "#     segments[dune_face_idx]['type'] = 'dune_face'\n",
    "    \n",
    "    # Pick out berms \n",
    "    for seg in segments:\n",
    "        if (-0.03 < seg['slope'] < 0.03  # berms should be relatively flat\n",
    "            and 0 < seg['z_mean'] < 4  # berms should be located between 0-4 m AHD\n",
    "            ):  # berms should be seaward of dune face\n",
    "            seg['type'] = 'berm'\n",
    "            \n",
    "#     slope = None\n",
    "#     for seg in reversed(segments):\n",
    "#         if slope is None:\n",
    "#             continue\n",
    "#         elif slope - 0.03 < seg['slope'] < slope + 0.03:\n",
    "#             seg['type'] = 'foreshore'\n",
    "#         else:\n",
    "#             break\n",
    "    \n",
    "    return segments\n",
    "\n",
    "def get_piecewise_linear_model(x,z):\n",
    "    #Fit an Earth model\n",
    "    model = Earth(penalty=3,thresh=0.0005)\n",
    "    model.fit(x,z)\n",
    "    return model\n",
    "\n",
    "def plot_profile_classification(site_id, profile_type):\n",
    "    df_profile = df_profiles.query(\"site_id == '{}' and profile_type == '{}'\".format(site_id, profile_type))\n",
    "    x = np.array(df_profile.index.get_level_values('x').tolist())\n",
    "    z = np.array(df_profile.z.tolist())    \n",
    "    \n",
    "    nan_mask = ma.masked_invalid(z).mask\n",
    "    x = x[~nan_mask]\n",
    "    z_unfiltered = z[~nan_mask]\n",
    "    z = savgol_filter(z_unfiltered, 51, 3)\n",
    "    \n",
    "    model = get_piecewise_linear_model(x,z)\n",
    "    breakpoints = get_breakpoints(model)\n",
    "    segments = get_segments(breakpoints, x_min=x.min(), x_max=x.max())\n",
    "    segments = get_segment_slopes(segments, x=x, z=z)\n",
    "#     segments = merge_similar_segments(segments)\n",
    "    segments = classify_segments(segments, x=x, z=z)\n",
    "    \n",
    "    pyplot.figure()\n",
    "    pyplot.plot(x,z_unfiltered, color='0.5',marker='.', alpha=.2, ms=10,linestyle=\"None\")\n",
    "\n",
    "    # Plot different segments\n",
    "    foreshore_segments = [x for x in segments if x.get('type') == 'foreshore']\n",
    "    for seg in foreshore_segments:\n",
    "        pyplot.plot([seg['x_start'], seg['x_end']],\n",
    "                   [seg['z_start'], seg['z_end']],\n",
    "                   linewidth=4, \n",
    "                    color='b')\n",
    "\n",
    "    land_segments = [x for x in segments if x.get('type') == 'land']\n",
    "    for seg in land_segments:\n",
    "        pyplot.plot([seg['x_start'], seg['x_end']],\n",
    "                   [seg['z_start'], seg['z_end']],\n",
    "                   linewidth=4, \n",
    "                    color='g')\n",
    "\n",
    "    berm_segments = [x for x in segments if x.get('type') == 'berm']\n",
    "    for seg in berm_segments:\n",
    "        pyplot.plot([seg['x_start'], seg['x_end']],\n",
    "                   [seg['z_start'], seg['z_end']],\n",
    "                    linewidth=4, \n",
    "                    color='y')\n",
    "\n",
    "    dune_face_segments = [x for x in segments if x.get('type') == 'dune_face']\n",
    "    for seg in dune_face_segments:\n",
    "        pyplot.plot([seg['x_start'], seg['x_end']],\n",
    "                   [seg['z_start'], seg['z_end']],\n",
    "                    linewidth=4, \n",
    "                    color='r')\n",
    "        \n",
    "    structure_segments = [x for x in segments if x.get('type') == 'structure']\n",
    "    for seg in structure_segments:\n",
    "        pyplot.plot([seg['x_start'], seg['x_end']],\n",
    "                   [seg['z_start'], seg['z_end']],\n",
    "                    linewidth=4, \n",
    "                    color='m')\n",
    "        \n",
    "    unclassified_segments = [x for x in segments if x.get('type') is None]\n",
    "    for seg in unclassified_segments:\n",
    "        pyplot.plot([seg['x_start'], seg['x_end']],\n",
    "                   [seg['z_start'], seg['z_end']],\n",
    "                   linewidth=4, \n",
    "                    color='0.4')\n",
    "\n",
    "    pyplot.xlabel('x (m)')\n",
    "    pyplot.ylabel('z (m AHD)')\n",
    "    pyplot.title('{} profile at {}'.format(profile_type, site_id))\n",
    "    pyplot.show()\n",
    "\n",
    "    import pprint\n",
    "    pp = pprint.PrettyPrinter(indent=4)\n",
    "    pp.pprint(segments)\n",
    "\n",
    "plot_profile_classification('NARRA0018', 'prestorm')\n",
    "plot_profile_classification('NARRA0019', 'prestorm')\n",
    "plot_profile_classification('CRESn0017', 'poststorm')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## Try lmfit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "code_folding": [
     0
    ],
    "hidden": true
   },
   "outputs": [],
   "source": [
    "from lmfit import Model, Parameters\n",
    "\n",
    "def get_data():\n",
    "    site_id='NARRA0018'\n",
    "    profile_type='prestorm'\n",
    "    df_profile = df_profiles.query(\"site_id == '{}' and profile_type == '{}'\".format(site_id, profile_type))\n",
    "    x = np.array(df_profile.index.get_level_values('x').tolist())\n",
    "    z = np.array(df_profile.z.tolist())    \n",
    "\n",
    "    nan_mask = ma.masked_invalid(z).mask\n",
    "    x = x[~nan_mask]\n",
    "    z = z[~nan_mask]\n",
    "    return x,z\n",
    "\n",
    "# def piecewise_linear(x, x0, x1, b, k1, k2, k3):\n",
    "#     condlist = [x < x0, (x >= x0) & (x < x1), x >= x1]\n",
    "#     funclist = [lambda x: k1*x + b, lambda x: k1*x + b + k2*(x-x0), lambda x: k1*x + b + k2*(x-x0) + k3*(x - x1)]\n",
    "#     return np.piecewise(x, condlist, funclist)\n",
    "\n",
    "# x,z = get_data()\n",
    "\n",
    "# fmodel = Model(piecewise_linear)\n",
    "# params = Parameters()\n",
    "# params.add('x0', value=0, vary=True, min=min(x), max=max(x))\n",
    "# params.add('x1', value=0, vary=True, min=min(x), max=max(x))\n",
    "# params.add('b', value=0, vary=True)\n",
    "# params.add('k1', value=0, vary=True, min=-0.01, max=0.01)\n",
    "# params.add('k2', value=0, vary=True, min=-0.1, max=-0.5)\n",
    "# params.add('k3', value=0, vary=True, min=0.1, max=0.5)\n",
    "\n",
    "def piecewise_linear(x, x0, x1, x2, b, k1, k2, k3,k4):\n",
    "    condlist = [x < x0, (x >= x0) & (x < x1), (x >= x1) & (x < x2), x >= x2]\n",
    "    funclist = [lambda x: k1*x + b, lambda x: k1*x + b + k2*(x-x0), lambda x: k1*x + b + k2*(x-x0) + k3*(x - x1), lambda x: k1*x + b + k2*(x-x0) + k3*(x - x1) +k4*(x-x2)]\n",
    "    return np.piecewise(x, condlist, funclist)\n",
    "\n",
    "x,z = get_data()\n",
    "\n",
    "fmodel = Model(piecewise_linear)\n",
    "params = Parameters()\n",
    "params.add('x0', value=0, vary=True, min=min(x), max=max(x))\n",
    "params.add('x1', value=0, vary=True, min=min(x), max=max(x))\n",
    "params.add('x2', value=0, vary=True, min=min(x), max=max(x))\n",
    "params.add('b', value=0, vary=True)\n",
    "params.add('k1', value=0, vary=True, min=-0.5, max=0.5)\n",
    "params.add('k2', value=0, vary=True, min=-0.5, max=0.5)\n",
    "params.add('k3', value=0, vary=True, min=-0.5, max=0.5)\n",
    "params.add('k4', value=0, vary=True, min=-0.5, max=0.5)\n",
    "\n",
    "\n",
    "result = fmodel.fit(z, params, x=x,method='ampgo')\n",
    "\n",
    "\n",
    "pyplot.figure()\n",
    "pyplot.plot(x,z, color='0.5',marker='.', alpha=.2, ms=10,linestyle=\"None\")\n",
    "pyplot.plot(x,result.best_fit, color='r')\n",
    "pyplot.show()\n",
    "print(result.fit_report())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Try spline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "code_folding": [
     2
    ]
   },
   "outputs": [],
   "source": [
    "from scipy.signal import savgol_filter\n",
    "\n",
    "def get_data():\n",
    "    site_id='NARRA0018'\n",
    "    profile_type='prestorm'\n",
    "    df_profile = df_profiles.query(\"site_id == '{}' and profile_type == '{}'\".format(site_id, profile_type))\n",
    "    x = np.array(df_profile.index.get_level_values('x').tolist())\n",
    "    z = np.array(df_profile.z.tolist())    \n",
    "\n",
    "    nan_mask = ma.masked_invalid(z).mask\n",
    "    x = x[~nan_mask]\n",
    "    z = z[~nan_mask]\n",
    "    return x,z\n",
    "\n",
    "x,z = get_data()\n",
    "\n",
    "z_filtered = savgol_filter(z, 31, 3)\n",
    "\n",
    "\n",
    "pyplot.figure()\n",
    "pyplot.plot(x,z, color='0.5',marker='.', alpha=.2, ms=10,linestyle=\"None\")\n",
    "pyplot.plot(x,z_filtered, color='r')\n",
    "pyplot.show()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}