You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
nsw-2016-storm-impact/notebooks/04a_profile_picker_supersed...

408 lines
13 KiB
Plaintext

6 years ago
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os\n",
"import numpy.ma as ma\n",
"\n",
"import numpy\n",
"from pyearth import Earth\n",
"from matplotlib import pyplot\n",
"\n",
"np.random.seed(2017)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
" print('Importing {}'.format(csv))\n",
" return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
"\n",
"df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Try using pyearth"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [
5,
20,
31,
40
]
},
"outputs": [],
"source": [
"from scipy.signal import savgol_filter\n",
"import re\n",
"from scipy.stats import linregress\n",
"import warnings\n",
"warnings.simplefilter(action='ignore', category=FutureWarning)\n",
"\n",
"def get_breakpoints(model, min_distance=20):\n",
" # Get breakpoints\n",
" breakpoints = []\n",
" for line in model.summary().split('\\n'):\n",
" # Get unpruned lines\n",
" if 'No' in line and 'None' not in line:\n",
" # Get break points\n",
" m = re.search(\"h\\(x0-(\\d*\\.?\\d+)\\)\", line)\n",
" if m:\n",
" breakpoints.append(float(m.groups()[0]))\n",
" m = re.search(\"h\\((\\d*\\.?\\d+)-x0\\)\", line)\n",
" if m:\n",
" breakpoints.append(float(m.groups()[0]))\n",
" return sorted(list(set(breakpoints)))\n",
" \n",
"def get_segments(breakpoints, x_min, x_max):\n",
" segments = []\n",
" breakpoints = [x_min] + breakpoints + [x_max]\n",
"\n",
" for x1, x2 in zip(breakpoints, breakpoints[1:]):\n",
" segments.append({\n",
" 'x_start': x1,\n",
" 'x_end': x2\n",
" })\n",
" return segments \n",
"\n",
"def get_segment_slopes(segments, x, z):\n",
" for segment in segments:\n",
" mask = ma.masked_where((segment['x_start'] < x) & (x < segment['x_end']),x ).mask\n",
" segment['z_mean'] = np.mean(z[mask])\n",
" segment['z_start'] = np.mean(z[mask][0])\n",
" segment['z_end'] = np.mean(z[mask][-1])\n",
" segment['slope'] = -linregress(x[mask], z[mask]).slope\n",
" return segments\n",
" \n",
"def classify_segments(segments, x,z):\n",
" \n",
" # Most seaward slope must be foreshore\n",
" segments[-1]['type'] = 'foreshore'\n",
" \n",
" # Most landward slope must be land\n",
" segments[0]['type'] = 'land'\n",
" \n",
" # Segments with really high slopes must be structures\n",
" for seg in segments:\n",
" if seg['slope'] > 2.0:\n",
" seg['type'] = 'structure'\n",
" \n",
" # Segments with large change of slope and \n",
" # Segment with max slope should be dune face\n",
"# dune_face_idx = [n for n, seg in enumerate(segments) if seg['slope']==max(x['slope'] for x in segments)][0]\n",
"# segments[dune_face_idx]['type'] = 'dune_face'\n",
" \n",
" # Pick out berms \n",
" for seg in segments:\n",
" if (-0.03 < seg['slope'] < 0.03 # berms should be relatively flat\n",
" and 0 < seg['z_mean'] < 4 # berms should be located between 0-4 m AHD\n",
" ): # berms should be seaward of dune face\n",
" seg['type'] = 'berm'\n",
" \n",
"# slope = None\n",
"# for seg in reversed(segments):\n",
"# if slope is None:\n",
"# continue\n",
"# elif slope - 0.03 < seg['slope'] < slope + 0.03:\n",
"# seg['type'] = 'foreshore'\n",
"# else:\n",
"# break\n",
" \n",
" return segments\n",
"\n",
"def get_piecewise_linear_model(x,z):\n",
" #Fit an Earth model\n",
" model = Earth(penalty=3,thresh=0.0005)\n",
" model.fit(x,z)\n",
" return model\n",
"\n",
"def plot_profile_classification(site_id, profile_type):\n",
" df_profile = df_profiles.query(\"site_id == '{}' and profile_type == '{}'\".format(site_id, profile_type))\n",
" x = np.array(df_profile.index.get_level_values('x').tolist())\n",
" z = np.array(df_profile.z.tolist()) \n",
" \n",
" nan_mask = ma.masked_invalid(z).mask\n",
" x = x[~nan_mask]\n",
" z_unfiltered = z[~nan_mask]\n",
" z = savgol_filter(z_unfiltered, 51, 3)\n",
" \n",
" model = get_piecewise_linear_model(x,z)\n",
" breakpoints = get_breakpoints(model)\n",
" segments = get_segments(breakpoints, x_min=x.min(), x_max=x.max())\n",
" segments = get_segment_slopes(segments, x=x, z=z)\n",
"# segments = merge_similar_segments(segments)\n",
" segments = classify_segments(segments, x=x, z=z)\n",
" \n",
" pyplot.figure()\n",
" pyplot.plot(x,z_unfiltered, color='0.5',marker='.', alpha=.2, ms=10,linestyle=\"None\")\n",
"\n",
" # Plot different segments\n",
" foreshore_segments = [x for x in segments if x.get('type') == 'foreshore']\n",
" for seg in foreshore_segments:\n",
" pyplot.plot([seg['x_start'], seg['x_end']],\n",
" [seg['z_start'], seg['z_end']],\n",
" linewidth=4, \n",
" color='b')\n",
"\n",
" land_segments = [x for x in segments if x.get('type') == 'land']\n",
" for seg in land_segments:\n",
" pyplot.plot([seg['x_start'], seg['x_end']],\n",
" [seg['z_start'], seg['z_end']],\n",
" linewidth=4, \n",
" color='g')\n",
"\n",
" berm_segments = [x for x in segments if x.get('type') == 'berm']\n",
" for seg in berm_segments:\n",
" pyplot.plot([seg['x_start'], seg['x_end']],\n",
" [seg['z_start'], seg['z_end']],\n",
" linewidth=4, \n",
" color='y')\n",
"\n",
" dune_face_segments = [x for x in segments if x.get('type') == 'dune_face']\n",
" for seg in dune_face_segments:\n",
" pyplot.plot([seg['x_start'], seg['x_end']],\n",
" [seg['z_start'], seg['z_end']],\n",
" linewidth=4, \n",
" color='r')\n",
" \n",
" structure_segments = [x for x in segments if x.get('type') == 'structure']\n",
" for seg in structure_segments:\n",
" pyplot.plot([seg['x_start'], seg['x_end']],\n",
" [seg['z_start'], seg['z_end']],\n",
" linewidth=4, \n",
" color='m')\n",
" \n",
" unclassified_segments = [x for x in segments if x.get('type') is None]\n",
" for seg in unclassified_segments:\n",
" pyplot.plot([seg['x_start'], seg['x_end']],\n",
" [seg['z_start'], seg['z_end']],\n",
" linewidth=4, \n",
" color='0.4')\n",
"\n",
" pyplot.xlabel('x (m)')\n",
" pyplot.ylabel('z (m AHD)')\n",
" pyplot.title('{} profile at {}'.format(profile_type, site_id))\n",
" pyplot.show()\n",
"\n",
" import pprint\n",
" pp = pprint.PrettyPrinter(indent=4)\n",
" pp.pprint(segments)\n",
"\n",
"plot_profile_classification('NARRA0018', 'prestorm')\n",
"plot_profile_classification('NARRA0019', 'prestorm')\n",
"plot_profile_classification('CRESn0017', 'poststorm')"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true
},
"source": [
"## Try lmfit"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"hidden": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [
0
],
"hidden": true
},
"outputs": [],
"source": [
"from lmfit import Model, Parameters\n",
"\n",
"def get_data():\n",
" site_id='NARRA0018'\n",
" profile_type='prestorm'\n",
" df_profile = df_profiles.query(\"site_id == '{}' and profile_type == '{}'\".format(site_id, profile_type))\n",
" x = np.array(df_profile.index.get_level_values('x').tolist())\n",
" z = np.array(df_profile.z.tolist()) \n",
"\n",
" nan_mask = ma.masked_invalid(z).mask\n",
" x = x[~nan_mask]\n",
" z = z[~nan_mask]\n",
" return x,z\n",
"\n",
"# def piecewise_linear(x, x0, x1, b, k1, k2, k3):\n",
"# condlist = [x < x0, (x >= x0) & (x < x1), x >= x1]\n",
"# funclist = [lambda x: k1*x + b, lambda x: k1*x + b + k2*(x-x0), lambda x: k1*x + b + k2*(x-x0) + k3*(x - x1)]\n",
"# return np.piecewise(x, condlist, funclist)\n",
"\n",
"# x,z = get_data()\n",
"\n",
"# fmodel = Model(piecewise_linear)\n",
"# params = Parameters()\n",
"# params.add('x0', value=0, vary=True, min=min(x), max=max(x))\n",
"# params.add('x1', value=0, vary=True, min=min(x), max=max(x))\n",
"# params.add('b', value=0, vary=True)\n",
"# params.add('k1', value=0, vary=True, min=-0.01, max=0.01)\n",
"# params.add('k2', value=0, vary=True, min=-0.1, max=-0.5)\n",
"# params.add('k3', value=0, vary=True, min=0.1, max=0.5)\n",
"\n",
"def piecewise_linear(x, x0, x1, x2, b, k1, k2, k3,k4):\n",
" condlist = [x < x0, (x >= x0) & (x < x1), (x >= x1) & (x < x2), x >= x2]\n",
" funclist = [lambda x: k1*x + b, lambda x: k1*x + b + k2*(x-x0), lambda x: k1*x + b + k2*(x-x0) + k3*(x - x1), lambda x: k1*x + b + k2*(x-x0) + k3*(x - x1) +k4*(x-x2)]\n",
" return np.piecewise(x, condlist, funclist)\n",
"\n",
"x,z = get_data()\n",
"\n",
"fmodel = Model(piecewise_linear)\n",
"params = Parameters()\n",
"params.add('x0', value=0, vary=True, min=min(x), max=max(x))\n",
"params.add('x1', value=0, vary=True, min=min(x), max=max(x))\n",
"params.add('x2', value=0, vary=True, min=min(x), max=max(x))\n",
"params.add('b', value=0, vary=True)\n",
"params.add('k1', value=0, vary=True, min=-0.5, max=0.5)\n",
"params.add('k2', value=0, vary=True, min=-0.5, max=0.5)\n",
"params.add('k3', value=0, vary=True, min=-0.5, max=0.5)\n",
"params.add('k4', value=0, vary=True, min=-0.5, max=0.5)\n",
"\n",
"\n",
"result = fmodel.fit(z, params, x=x,method='ampgo')\n",
"\n",
"\n",
"pyplot.figure()\n",
"pyplot.plot(x,z, color='0.5',marker='.', alpha=.2, ms=10,linestyle=\"None\")\n",
"pyplot.plot(x,result.best_fit, color='r')\n",
"pyplot.show()\n",
"print(result.fit_report())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Try spline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [
2
]
},
"outputs": [],
"source": [
"from scipy.signal import savgol_filter\n",
"\n",
"def get_data():\n",
" site_id='NARRA0018'\n",
" profile_type='prestorm'\n",
" df_profile = df_profiles.query(\"site_id == '{}' and profile_type == '{}'\".format(site_id, profile_type))\n",
" x = np.array(df_profile.index.get_level_values('x').tolist())\n",
" z = np.array(df_profile.z.tolist()) \n",
"\n",
" nan_mask = ma.masked_invalid(z).mask\n",
" x = x[~nan_mask]\n",
" z = z[~nan_mask]\n",
" return x,z\n",
"\n",
"x,z = get_data()\n",
"\n",
"z_filtered = savgol_filter(z, 31, 3)\n",
"\n",
"\n",
"pyplot.figure()\n",
"pyplot.plot(x,z, color='0.5',marker='.', alpha=.2, ms=10,linestyle=\"None\")\n",
"pyplot.plot(x,z_filtered, color='r')\n",
"pyplot.show()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}