{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "import numpy.ma as ma\n", "\n", "import numpy\n", "from pyearth import Earth\n", "from matplotlib import pyplot\n", "\n", "np.random.seed(2017)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n", " print('Importing {}'.format(csv))\n", " return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n", "\n", "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Try using pyearth" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "code_folding": [ 5, 20, 31, 40 ] }, "outputs": [], "source": [ "from scipy.signal import savgol_filter\n", "import re\n", "from scipy.stats import linregress\n", "import warnings\n", "warnings.simplefilter(action='ignore', category=FutureWarning)\n", "\n", "def get_breakpoints(model, min_distance=20):\n", " # Get breakpoints\n", " breakpoints = []\n", " for line in model.summary().split('\\n'):\n", " # Get unpruned lines\n", " if 'No' in line and 'None' not in line:\n", " # Get break points\n", " m = re.search(\"h\\(x0-(\\d*\\.?\\d+)\\)\", line)\n", " if m:\n", " breakpoints.append(float(m.groups()[0]))\n", " m = re.search(\"h\\((\\d*\\.?\\d+)-x0\\)\", line)\n", " if m:\n", " breakpoints.append(float(m.groups()[0]))\n", " return sorted(list(set(breakpoints)))\n", " \n", "def get_segments(breakpoints, x_min, x_max):\n", " segments = []\n", " breakpoints = [x_min] + breakpoints + [x_max]\n", "\n", " for x1, x2 in zip(breakpoints, breakpoints[1:]):\n", " segments.append({\n", " 'x_start': x1,\n", " 'x_end': x2\n", " })\n", " return segments \n", "\n", "def get_segment_slopes(segments, x, z):\n", " for segment in segments:\n", " mask = ma.masked_where((segment['x_start'] < x) & (x < segment['x_end']),x ).mask\n", " segment['z_mean'] = np.mean(z[mask])\n", " segment['z_start'] = np.mean(z[mask][0])\n", " segment['z_end'] = np.mean(z[mask][-1])\n", " segment['slope'] = -linregress(x[mask], z[mask]).slope\n", " return segments\n", " \n", "def classify_segments(segments, x,z):\n", " \n", " # Most seaward slope must be foreshore\n", " segments[-1]['type'] = 'foreshore'\n", " \n", " # Most landward slope must be land\n", " segments[0]['type'] = 'land'\n", " \n", " # Segments with really high slopes must be structures\n", " for seg in segments:\n", " if seg['slope'] > 2.0:\n", " seg['type'] = 'structure'\n", " \n", " # Segments with large change of slope and \n", " # Segment with max slope should be dune face\n", "# dune_face_idx = [n for n, seg in enumerate(segments) if seg['slope']==max(x['slope'] for x in segments)][0]\n", "# segments[dune_face_idx]['type'] = 'dune_face'\n", " \n", " # Pick out berms \n", " for seg in segments:\n", " if (-0.03 < seg['slope'] < 0.03 # berms should be relatively flat\n", " and 0 < seg['z_mean'] < 4 # berms should be located between 0-4 m AHD\n", " ): # berms should be seaward of dune face\n", " seg['type'] = 'berm'\n", " \n", "# slope = None\n", "# for seg in reversed(segments):\n", "# if slope is None:\n", "# continue\n", "# elif slope - 0.03 < seg['slope'] < slope + 0.03:\n", "# seg['type'] = 'foreshore'\n", "# else:\n", "# break\n", " \n", " return segments\n", "\n", "def get_piecewise_linear_model(x,z):\n", " #Fit an Earth model\n", " model = Earth(penalty=3,thresh=0.0005)\n", " model.fit(x,z)\n", " return model\n", "\n", "def plot_profile_classification(site_id, profile_type):\n", " df_profile = df_profiles.query(\"site_id == '{}' and profile_type == '{}'\".format(site_id, profile_type))\n", " x = np.array(df_profile.index.get_level_values('x').tolist())\n", " z = np.array(df_profile.z.tolist()) \n", " \n", " nan_mask = ma.masked_invalid(z).mask\n", " x = x[~nan_mask]\n", " z_unfiltered = z[~nan_mask]\n", " z = savgol_filter(z_unfiltered, 51, 3)\n", " \n", " model = get_piecewise_linear_model(x,z)\n", " breakpoints = get_breakpoints(model)\n", " segments = get_segments(breakpoints, x_min=x.min(), x_max=x.max())\n", " segments = get_segment_slopes(segments, x=x, z=z)\n", "# segments = merge_similar_segments(segments)\n", " segments = classify_segments(segments, x=x, z=z)\n", " \n", " pyplot.figure()\n", " pyplot.plot(x,z_unfiltered, color='0.5',marker='.', alpha=.2, ms=10,linestyle=\"None\")\n", "\n", " # Plot different segments\n", " foreshore_segments = [x for x in segments if x.get('type') == 'foreshore']\n", " for seg in foreshore_segments:\n", " pyplot.plot([seg['x_start'], seg['x_end']],\n", " [seg['z_start'], seg['z_end']],\n", " linewidth=4, \n", " color='b')\n", "\n", " land_segments = [x for x in segments if x.get('type') == 'land']\n", " for seg in land_segments:\n", " pyplot.plot([seg['x_start'], seg['x_end']],\n", " [seg['z_start'], seg['z_end']],\n", " linewidth=4, \n", " color='g')\n", "\n", " berm_segments = [x for x in segments if x.get('type') == 'berm']\n", " for seg in berm_segments:\n", " pyplot.plot([seg['x_start'], seg['x_end']],\n", " [seg['z_start'], seg['z_end']],\n", " linewidth=4, \n", " color='y')\n", "\n", " dune_face_segments = [x for x in segments if x.get('type') == 'dune_face']\n", " for seg in dune_face_segments:\n", " pyplot.plot([seg['x_start'], seg['x_end']],\n", " [seg['z_start'], seg['z_end']],\n", " linewidth=4, \n", " color='r')\n", " \n", " structure_segments = [x for x in segments if x.get('type') == 'structure']\n", " for seg in structure_segments:\n", " pyplot.plot([seg['x_start'], seg['x_end']],\n", " [seg['z_start'], seg['z_end']],\n", " linewidth=4, \n", " color='m')\n", " \n", " unclassified_segments = [x for x in segments if x.get('type') is None]\n", " for seg in unclassified_segments:\n", " pyplot.plot([seg['x_start'], seg['x_end']],\n", " [seg['z_start'], seg['z_end']],\n", " linewidth=4, \n", " color='0.4')\n", "\n", " pyplot.xlabel('x (m)')\n", " pyplot.ylabel('z (m AHD)')\n", " pyplot.title('{} profile at {}'.format(profile_type, site_id))\n", " pyplot.show()\n", "\n", " import pprint\n", " pp = pprint.PrettyPrinter(indent=4)\n", " pp.pprint(segments)\n", "\n", "plot_profile_classification('NARRA0018', 'prestorm')\n", "plot_profile_classification('NARRA0019', 'prestorm')\n", "plot_profile_classification('CRESn0017', 'poststorm')" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## Try lmfit" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "hidden": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "code_folding": [ 0 ], "hidden": true }, "outputs": [], "source": [ "from lmfit import Model, Parameters\n", "\n", "def get_data():\n", " site_id='NARRA0018'\n", " profile_type='prestorm'\n", " df_profile = df_profiles.query(\"site_id == '{}' and profile_type == '{}'\".format(site_id, profile_type))\n", " x = np.array(df_profile.index.get_level_values('x').tolist())\n", " z = np.array(df_profile.z.tolist()) \n", "\n", " nan_mask = ma.masked_invalid(z).mask\n", " x = x[~nan_mask]\n", " z = z[~nan_mask]\n", " return x,z\n", "\n", "# def piecewise_linear(x, x0, x1, b, k1, k2, k3):\n", "# condlist = [x < x0, (x >= x0) & (x < x1), x >= x1]\n", "# funclist = [lambda x: k1*x + b, lambda x: k1*x + b + k2*(x-x0), lambda x: k1*x + b + k2*(x-x0) + k3*(x - x1)]\n", "# return np.piecewise(x, condlist, funclist)\n", "\n", "# x,z = get_data()\n", "\n", "# fmodel = Model(piecewise_linear)\n", "# params = Parameters()\n", "# params.add('x0', value=0, vary=True, min=min(x), max=max(x))\n", "# params.add('x1', value=0, vary=True, min=min(x), max=max(x))\n", "# params.add('b', value=0, vary=True)\n", "# params.add('k1', value=0, vary=True, min=-0.01, max=0.01)\n", "# params.add('k2', value=0, vary=True, min=-0.1, max=-0.5)\n", "# params.add('k3', value=0, vary=True, min=0.1, max=0.5)\n", "\n", "def piecewise_linear(x, x0, x1, x2, b, k1, k2, k3,k4):\n", " condlist = [x < x0, (x >= x0) & (x < x1), (x >= x1) & (x < x2), x >= x2]\n", " funclist = [lambda x: k1*x + b, lambda x: k1*x + b + k2*(x-x0), lambda x: k1*x + b + k2*(x-x0) + k3*(x - x1), lambda x: k1*x + b + k2*(x-x0) + k3*(x - x1) +k4*(x-x2)]\n", " return np.piecewise(x, condlist, funclist)\n", "\n", "x,z = get_data()\n", "\n", "fmodel = Model(piecewise_linear)\n", "params = Parameters()\n", "params.add('x0', value=0, vary=True, min=min(x), max=max(x))\n", "params.add('x1', value=0, vary=True, min=min(x), max=max(x))\n", "params.add('x2', value=0, vary=True, min=min(x), max=max(x))\n", "params.add('b', value=0, vary=True)\n", "params.add('k1', value=0, vary=True, min=-0.5, max=0.5)\n", "params.add('k2', value=0, vary=True, min=-0.5, max=0.5)\n", "params.add('k3', value=0, vary=True, min=-0.5, max=0.5)\n", "params.add('k4', value=0, vary=True, min=-0.5, max=0.5)\n", "\n", "\n", "result = fmodel.fit(z, params, x=x,method='ampgo')\n", "\n", "\n", "pyplot.figure()\n", "pyplot.plot(x,z, color='0.5',marker='.', alpha=.2, ms=10,linestyle=\"None\")\n", "pyplot.plot(x,result.best_fit, color='r')\n", "pyplot.show()\n", "print(result.fit_report())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Try spline" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "code_folding": [ 2 ] }, "outputs": [], "source": [ "from scipy.signal import savgol_filter\n", "\n", "def get_data():\n", " site_id='NARRA0018'\n", " profile_type='prestorm'\n", " df_profile = df_profiles.query(\"site_id == '{}' and profile_type == '{}'\".format(site_id, profile_type))\n", " x = np.array(df_profile.index.get_level_values('x').tolist())\n", " z = np.array(df_profile.z.tolist()) \n", "\n", " nan_mask = ma.masked_invalid(z).mask\n", " x = x[~nan_mask]\n", " z = z[~nan_mask]\n", " return x,z\n", "\n", "x,z = get_data()\n", "\n", "z_filtered = savgol_filter(z, 31, 3)\n", "\n", "\n", "pyplot.figure()\n", "pyplot.plot(x,z, color='0.5',marker='.', alpha=.2, ms=10,linestyle=\"None\")\n", "pyplot.plot(x,z_filtered, color='r')\n", "pyplot.show()\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }