You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

920 lines
32 KiB
Plaintext

6 years ago
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data exploration\n",
"This notebook provides an example how the data has been loaded and accessed for further analysis."
]
},
6 years ago
{
"cell_type": "markdown",
"metadata": {},
6 years ago
"source": [
"## Setup notebook"
]
},
6 years ago
{
"cell_type": "code",
6 years ago
"execution_count": null,
"metadata": {},
6 years ago
"outputs": [],
"source": [
"# Enable autoreloading of our modules. \n",
"# Most of the code will be located in the /src/ folder, \n",
"# and then called from the notebook.\n",
6 years ago
"%matplotlib inline\n",
6 years ago
"%reload_ext autoreload\n",
"%autoreload"
]
},
{
"cell_type": "code",
6 years ago
"execution_count": null,
"metadata": {},
6 years ago
"outputs": [],
"source": [
"from IPython.core.debugger import set_trace\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"\n",
"import plotly\n",
"import plotly.graph_objs as go\n",
"import plotly.plotly as py\n",
"import plotly.tools as tls\n",
"import plotly.figure_factory as ff\n",
6 years ago
"import plotly.io as pio\n",
"\n",
6 years ago
"\n",
"import matplotlib\n",
"from matplotlib import cm\n",
6 years ago
"import colorlover as cl\n",
"\n",
"from ipywidgets import widgets, Output\n",
"from IPython.display import display, clear_output, Image, HTML\n",
"\n",
6 years ago
"from sklearn.metrics import confusion_matrix"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
6 years ago
"## Import data\n",
"Import our data into pandas Dataframes for the analysis. Data files are `.csv` files which are stored in the `./data/interim/` folder."
6 years ago
]
},
{
"cell_type": "code",
6 years ago
"execution_count": null,
6 years ago
"metadata": {
"pixiedust": {
"displayParams": {}
}
6 years ago
},
"outputs": [],
6 years ago
"source": [
"def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
6 years ago
" print('Importing {}'.format(csv))\n",
" return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
"\n",
"df_waves = df_from_csv('waves.csv', index_col=[0, 1])\n",
"df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n",
"df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
"df_sites = df_from_csv('sites.csv', index_col=[0])\n",
"df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0])\n",
"\n",
"# Note that the forecasted data sets should be in the same order for impacts and twls\n",
"impacts = {\n",
" 'forecasted': {\n",
" 'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),\n",
" 'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),\n",
" },\n",
" 'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n",
" }\n",
"\n",
"\n",
"twls = {\n",
" 'forecasted': {\n",
" 'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),\n",
" 'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),\n",
" }\n",
6 years ago
"}\n",
"print('Done!')"
6 years ago
]
},
6 years ago
{
6 years ago
"cell_type": "markdown",
6 years ago
"metadata": {},
6 years ago
"source": [
"## Profile/timeseries dashboard"
]
6 years ago
},
6 years ago
{
"cell_type": "markdown",
6 years ago
"metadata": {
"hide_input": true
},
6 years ago
"source": [
"The following interactive data explorer displays information on a per `site_id` basis. It can be used to examine pre/post storm cross-sections, water level time series and observed/predicted storm impacts."
6 years ago
]
},
{
"cell_type": "code",
6 years ago
"execution_count": null,
"metadata": {
"code_folding": [
6 years ago
408
],
6 years ago
"hide_input": false
6 years ago
},
6 years ago
"outputs": [],
6 years ago
"source": [
"# Create widgets for filtering by observed and forecasted impacts\n",
"filter_title = widgets.HTML(\n",
" value=\"<b>Filter by observed and predicted impacts:</b>\", )\n",
"\n",
"titles = ['Observed Impacts']\n",
"selectboxes = [\n",
" widgets.SelectMultiple(\n",
" options=impacts['observed'].storm_regime.dropna().unique().tolist(),\n",
" value=impacts['observed'].storm_regime.dropna().unique().tolist(),\n",
" disabled=False)\n",
"]\n",
"\n",
"# Iterate through each of our forecasted impacts\n",
"for forecast in impacts['forecasted']:\n",
" selectboxes.append(\n",
" widgets.SelectMultiple(\n",
6 years ago
" options=impacts['forecasted'][forecast].storm_regime.dropna().\n",
" unique().tolist(),\n",
" value=impacts['forecasted'][forecast].storm_regime.dropna().\n",
" unique().tolist(),\n",
" disabled=False))\n",
" titles.append('Forecasted: {}'.format(forecast))\n",
"\n",
"titles = [widgets.HTML(value=title) for title in titles]\n",
"\n",
"children = widgets.HBox(children=[\n",
" widgets.VBox(children=[title, box])\n",
" for title, box in zip(titles, selectboxes)\n",
"])\n",
"filter_container = widgets.VBox(children=[filter_title, children])\n",
"\n",
"# Create widgets for selecting site_id\n",
"site_id_title = widgets.HTML(value=\"<b>Filter by site_id:</b>\", )\n",
"\n",
"site_id_select = widgets.Dropdown(\n",
6 years ago
" description='site_id: ',\n",
" value='NARRA0001',\n",
" options=df_profiles.index.get_level_values('site_id').unique().\n",
" sort_values().tolist(),\n",
"# options=df_no_crests.index.tolist(),\n",
6 years ago
")\n",
6 years ago
"\n",
"site_id_impacts = widgets.HTML(value=\"\", )\n",
6 years ago
"\n",
"site_id_container = widgets.HBox(children=[\n",
" widgets.VBox(\n",
" children=[site_id_title,\n",
" widgets.HBox(children=[site_id_select])]), site_id_impacts\n",
6 years ago
"])\n",
"\n",
"# Build colors for each of our forecasts\n",
"colors = list(\n",
" reversed(cl.scales[str(max(len(impacts['forecasted']),\n",
" 3))]['seq']['YlGnBu']))\n",
6 years ago
"\n",
"# Add panel for pre/post storm profiles\n",
"trace1 = go.Scatter(\n",
" x=[0],\n",
" y=[0],\n",
6 years ago
" name='Pre Storm Profile',\n",
" line=dict(color=('rgb(51,160,44)'), width=2))\n",
6 years ago
"trace2 = go.Scatter(\n",
" x=[0],\n",
" y=[0],\n",
6 years ago
" name='Post Storm Profile',\n",
" line=dict(color=('rgb(255,127,0)'), width=2))\n",
"trace3 = go.Scatter(\n",
" x=[0],\n",
" y=[0],\n",
" name='Pre-storm dune crest',\n",
" mode='markers',\n",
" marker=dict(\n",
6 years ago
" color='rgba(255,255,255,0)',\n",
" size=10,\n",
" line=dict(color='rgba(106,61,154, 1)', width=2)),\n",
")\n",
"trace4 = go.Scatter(\n",
" x=[0],\n",
" y=[0],\n",
" name='Pre-storm dune toe',\n",
" mode='markers',\n",
" marker=dict(\n",
6 years ago
" color='rgba(255,255,255,0)',\n",
" size=10,\n",
" line=dict(color='rgba(202,178,214,1)', width=2)),\n",
")\n",
6 years ago
"\n",
6 years ago
"trace5 = go.Scatter(\n",
" x=[0],\n",
" y=[0],\n",
" name='Post-storm dune crest',\n",
" mode='markers',\n",
" marker=dict(\n",
" color='rgba(255,255,255,0)',\n",
" size=10,\n",
" line=dict(color='rgba(106,61,154, 1)', width=2),\n",
" symbol='square'),\n",
")\n",
"trace6 = go.Scatter(\n",
" x=[0],\n",
" y=[0],\n",
" name='Post-storm dune toe',\n",
" mode='markers',\n",
" marker=dict(\n",
" color='rgba(255,255,255,0)',\n",
" size=10,\n",
" line=dict(color='rgba(202,178,214,1)', width=2),\n",
" symbol='square'),\n",
")\n",
"\n",
"forecast_traces = []\n",
6 years ago
"for forecast, color in zip(impacts['forecasted'], colors):\n",
" forecast_traces.append(\n",
" go.Scatter(\n",
" x=[0],\n",
" y=[0],\n",
" name='Peak R_high: {}'.format(forecast),\n",
" mode='lines',\n",
" line=dict(\n",
" color=color,\n",
" width=4,\n",
" )))\n",
"\n",
6 years ago
"layout = go.Layout(\n",
" title='Bed Profiles',\n",
" height=300,\n",
" legend=dict(font={'size': 10}),\n",
" margin=dict(t=50, b=50, l=50, r=20),\n",
6 years ago
" xaxis=dict(\n",
" title='x (m)',\n",
6 years ago
" autorange=True,\n",
" showgrid=True,\n",
" zeroline=True,\n",
" showline=True,\n",
" range=[0, 200]),\n",
6 years ago
" yaxis=dict(\n",
" title='z (m)',\n",
6 years ago
" autorange=False,\n",
" showgrid=True,\n",
" zeroline=True,\n",
" showline=True,\n",
" range=[-1, 20]))\n",
6 years ago
"\n",
"g_profiles = go.FigureWidget(\n",
6 years ago
" data=[trace1, trace2, trace3, trace4, trace5, trace6] + forecast_traces,\n",
" layout=layout)\n",
6 years ago
"\n",
"# Add panel for google maps\n",
"mapbox_access_token = 'pk.eyJ1IjoiY2hyaXNsZWFtYW4iLCJhIjoiY2pvNTY1MzZpMDc2OTN2bmw5MGsycHp5bCJ9.U2dwFg2c7RFjUNSayERUiw'\n",
"\n",
"data = [\n",
" go.Scattermapbox(\n",
" lat=df_sites['lat'],\n",
" lon=df_sites['lon'],\n",
" mode='markers',\n",
" marker=dict(size=10),\n",
6 years ago
" text=df_sites.index.get_level_values('site_id'),\n",
" ),\n",
" go.Scattermapbox(\n",
" lat=[0],\n",
" lon=[0],\n",
" mode='markers',\n",
" marker=dict(\n",
" size=20,\n",
" color='rgb(255, 0, 0)',\n",
" opacity=0.5,\n",
6 years ago
" ),\n",
" text=df_sites.index.get_level_values('site_id'),\n",
" ),\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" autosize=True,\n",
" height=300,\n",
6 years ago
" hovermode='closest',\n",
" showlegend=False,\n",
" margin=dict(t=50, b=50, l=20, r=20),\n",
6 years ago
" mapbox=dict(\n",
" accesstoken=mapbox_access_token,\n",
" bearing=0,\n",
" center=dict(lat=-33.7, lon=151.3),\n",
6 years ago
" pitch=0,\n",
" zoom=12,\n",
" style='satellite-streets'),\n",
6 years ago
")\n",
"\n",
"fig = dict(data=data, layout=layout)\n",
"g_map = go.FigureWidget(data=data, layout=layout)\n",
"\n",
"subplot = tls.make_subplots(3, 1, print_grid=False, shared_xaxes=True)\n",
"g_timeseries = go.FigureWidget(subplot)\n",
"\n",
"# Add trace for Hs0\n",
"g_timeseries.add_trace(\n",
" go.Scatter(\n",
" x=[0, 1],\n",
" y=[0, 1],\n",
" name='Hs0',\n",
" ),\n",
" row=3,\n",
" col=1,\n",
")\n",
"\n",
"# Add trace for Tp\n",
"g_timeseries.add_trace(\n",
" go.Scatter(\n",
" x=[0, 1],\n",
" y=[0, 1],\n",
" name='Tp',\n",
" ),\n",
" row=3,\n",
" col=1,\n",
")\n",
"\n",
"# Add water levels\n",
"g_timeseries.add_trace(\n",
" go.Scatter(\n",
6 years ago
" x=[0, 3],\n",
" y=[0, 3],\n",
" name='Dune Crest',\n",
" mode='lines',\n",
" line=dict(color=('rgb(214, 117, 14)'), width=2, dash='dot')),\n",
" row=1,\n",
" col=1)\n",
"\n",
"g_timeseries.add_trace(\n",
" go.Scatter(\n",
6 years ago
" x=[0, 3],\n",
" y=[0, 3],\n",
" name='Dune Toe',\n",
" mode='lines',\n",
" line=dict(color=('rgb(142, 77, 8)'), width=2, dash='dash')),\n",
" row=1,\n",
" col=1)\n",
"\n",
"g_timeseries.add_trace(\n",
" go.Scatter(\n",
6 years ago
" x=[0, 3],\n",
" y=[0, 3],\n",
" name='Tide+Surge WL',\n",
" line=dict(color=('rgb(8,51,137)'), width=2, dash='dot')),\n",
" row=1,\n",
" col=1)\n",
6 years ago
"\n",
"for forecast, color in zip(twls['forecasted'], colors):\n",
" g_timeseries.add_trace(\n",
6 years ago
" go.Scatter(\n",
" x=[0],\n",
" y=[0],\n",
" name='R_high: {}'.format(forecast),\n",
" line=dict(color=color, width=3)),\n",
" row=1,\n",
" col=1)\n",
"\n",
"# Add trace for each forecasted beta term\n",
"for forecast, color in zip(impacts['forecasted'], colors):\n",
" g_timeseries.add_trace(\n",
" go.Scatter(\n",
6 years ago
" x=[0, 1],\n",
" y=[0, 1],\n",
" name='Beta: {}'.format(forecast),\n",
" line=dict(color=color, width=3)),\n",
" row=2,\n",
" col=1,\n",
" )\n",
6 years ago
"\n",
"# Create axis for Tp on same plot as Hs\n",
6 years ago
"g_timeseries['layout']['yaxis4'] = {'overlaying': 'y3', 'side': 'right'}\n",
"g_timeseries.data[1]['yaxis'] = 'y4'\n",
"\n",
"# Add labels to each axis\n",
"g_timeseries.layout['xaxis']['title'] = 'datetime'\n",
"g_timeseries.layout['yaxis1']['title'] = 'z (mAHD)'\n",
"g_timeseries.layout['yaxis2']['title'] = 'beta (-)'\n",
"g_timeseries.layout['yaxis3']['title'] = 'Hs0 (m)'\n",
"g_timeseries.layout['yaxis4']['title'] = 'Tp (s)'\n",
"\n",
"# Update figure size\n",
6 years ago
"g_timeseries['layout'].update(height=400, legend=dict(font={'size': 10}))\n",
"g_timeseries['layout'].update(margin=dict(t=20, l=50, r=20, b=100))\n",
6 years ago
"\n",
6 years ago
"# Add panel for some tables\n",
"titles = ['observed'] + [forecast for forecast in impacts['forecasted']]\n",
"titles = [widgets.HTML(value=\"{}\".format(title)) for title in titles]\n",
"\n",
"\n",
6 years ago
"def get_observed_impacts_table(site_id):\n",
" display(impacts['observed'].query(\"site_id=='{}'\".format(site_id)).T)\n",
6 years ago
"\n",
"\n",
"def get_forecasted_impacts_table(site_id, forecast):\n",
" display(impacts['forecasted'][forecast].query(\n",
" \"site_id=='{}'\".format(site_id)).T)\n",
6 years ago
"\n",
"\n",
"impacts_table_observed = widgets.interactive_output(\n",
" get_observed_impacts_table, {'site_id': site_id_select})\n",
6 years ago
"forecasted_impacts_tables = []\n",
"for forecast, title in zip(impacts['forecasted'], titles[1:]):\n",
" forecasted_impacts_tables.append(\n",
" widgets.interactive_output(get_forecasted_impacts_table, {\n",
" 'site_id': site_id_select,\n",
" 'forecast': title\n",
" }))\n",
6 years ago
"\n",
"tables = [impacts_table_observed] + forecasted_impacts_tables\n",
"\n",
"title_tables = [\n",
" widgets.VBox(children=[title, table])\n",
" for title, table in zip(titles, tables)\n",
"]\n",
"\n",
"tables_container = widgets.HBox(children=[*title_tables])\n",
"\n",
"\n",
"def update_profile(change):\n",
"\n",
" site_id = site_id_select.value\n",
"\n",
" if site_id is None:\n",
" return\n",
"\n",
6 years ago
" site_profile = df_profiles.query('site_id == \"{}\"'.format(site_id))\n",
" prestorm_profile = site_profile.query('profile_type == \"prestorm\"')\n",
" poststorm_profile = site_profile.query('profile_type == \"poststorm\"')\n",
"\n",
" poststorm_x = poststorm_profile.index.get_level_values('x').tolist()\n",
" poststorm_z = poststorm_profile.z.tolist()\n",
"\n",
" prestorm_x = prestorm_profile.index.get_level_values('x').tolist()\n",
" prestorm_z = prestorm_profile.z.tolist()\n",
"\n",
" prestorm_site_features = df_profile_features_crest_toes.query(\n",
6 years ago
" 'site_id == \"{}\" and profile_type==\"prestorm\"'.format(site_id))\n",
" prestorm_dune_crest_x = prestorm_site_features.dune_crest_x\n",
" prestorm_dune_crest_z = prestorm_site_features.dune_crest_z\n",
" prestorm_dune_toe_x = prestorm_site_features.dune_toe_x\n",
" prestorm_dune_toe_z = prestorm_site_features.dune_toe_z\n",
"\n",
" poststorm_site_features = df_profile_features_crest_toes.query(\n",
6 years ago
" 'site_id == \"{}\" and profile_type==\"poststorm\"'.format(site_id))\n",
" poststorm_dune_crest_x = poststorm_site_features.dune_crest_x\n",
" poststorm_dune_crest_z = poststorm_site_features.dune_crest_z\n",
" poststorm_dune_toe_x = poststorm_site_features.dune_toe_x\n",
" poststorm_dune_toe_z = poststorm_site_features.dune_toe_z\n",
6 years ago
"\n",
" # Update beach profile section plots\n",
" with g_profiles.batch_update():\n",
" g_profiles.data[0].x = prestorm_x\n",
" g_profiles.data[0].y = prestorm_z\n",
" g_profiles.data[1].x = poststorm_x\n",
" g_profiles.data[1].y = poststorm_z\n",
6 years ago
" g_profiles.data[2].x = prestorm_dune_crest_x\n",
" g_profiles.data[2].y = prestorm_dune_crest_z\n",
" g_profiles.data[3].x = prestorm_dune_toe_x\n",
" g_profiles.data[3].y = prestorm_dune_toe_z\n",
" g_profiles.data[4].x = poststorm_dune_crest_x\n",
" g_profiles.data[4].y = poststorm_dune_crest_z\n",
" g_profiles.data[5].x = poststorm_dune_toe_x\n",
" g_profiles.data[5].y = poststorm_dune_toe_z\n",
6 years ago
"\n",
" for n, forecast in enumerate(impacts['forecasted']):\n",
" R_high = max(impacts['forecasted'][forecast].query(\n",
" \"site_id=='{}'\".format(site_id)).R_high)\n",
6 years ago
" g_profiles.data[6 + n].x = [200, 400]\n",
" g_profiles.data[6 + n].y = [R_high, R_high]\n",
"\n",
" # Relocate plan of satellite imagery\n",
6 years ago
" site_coords = df_sites.query('site_id == \"{}\"'.format(site_id))\n",
" with g_map.batch_update():\n",
" g_map.layout.mapbox['center'] = {\n",
6 years ago
" 'lat': site_coords['lat'].values[0],\n",
" 'lon': site_coords['lon'].values[0]\n",
" }\n",
" g_map.layout.mapbox['zoom'] = 15\n",
" g_map.data[1].lat = [site_coords['lat'].values[0]]\n",
" g_map.data[1].lon = [site_coords['lon'].values[0]]\n",
" g_map.data[1].text = site_coords['lon'].index.get_level_values(\n",
" 'site_id').tolist()\n",
"\n",
" # Update time series plots\n",
" df_waves_site = df_waves.query(\"site_id=='{}'\".format(site_id))\n",
" times = df_waves_site.index.get_level_values('datetime').tolist()\n",
" Hs0s = df_waves_site.Hs0.tolist()\n",
" Tps = df_waves_site.Tp.tolist()\n",
"\n",
" df_tide_site = df_tides.query(\"site_id=='{}'\".format(site_id))\n",
" mask = (df_tide_site.index.get_level_values('datetime') >= min(times)) & (\n",
" df_tide_site.index.get_level_values('datetime') <= max(times))\n",
" df_tide_site = df_tide_site.loc[mask]\n",
"\n",
" with g_timeseries.batch_update():\n",
" g_timeseries.data[0].x = times\n",
" g_timeseries.data[0].y = Hs0s\n",
" g_timeseries.data[1].x = times\n",
" g_timeseries.data[1].y = Tps\n",
"\n",
" # Update beta values\n",
" idx_betas = [\n",
" n for n, x in enumerate(g_timeseries.data) if 'Beta' in x.name\n",
" ]\n",
" for i, forecast in zip(idx_betas, twls['forecasted']):\n",
6 years ago
" df_twl = twls['forecasted'][forecast].query(\n",
" \"site_id=='{}'\".format(site_id))\n",
" times = df_twl.index.get_level_values('datetime').tolist()\n",
" beta = df_twl.beta.tolist()\n",
" g_timeseries.data[i].x = times\n",
" g_timeseries.data[i].y = beta\n",
"\n",
" g_timeseries.data[2].x = [min(times), max(times)]\n",
" g_timeseries.data[3].x = [min(times), max(times)]\n",
6 years ago
" g_timeseries.data[4].x = df_tide_site.index.get_level_values(\n",
" 'datetime')\n",
6 years ago
" g_timeseries.data[2].y = prestorm_dune_crest_z.tolist(\n",
" )[0], prestorm_dune_crest_z.tolist()[0],\n",
" g_timeseries.data[3].y = prestorm_dune_toe_z.tolist(\n",
" )[0], prestorm_dune_toe_z.tolist()[0],\n",
" g_timeseries.data[4].y = df_tide_site.tide.tolist()\n",
"\n",
" # Update rhigh values\n",
" idx_betas = [\n",
" n for n, x in enumerate(g_timeseries.data) if 'R_high' in x.name\n",
" ]\n",
6 years ago
" for i, forecast in zip(idx_betas, twls['forecasted']):\n",
" df_twl = twls['forecasted'][forecast].query(\n",
" \"site_id=='{}'\".format(site_id))\n",
" times = df_twl.index.get_level_values('datetime').tolist()\n",
" R_high = df_twl.R_high.tolist()\n",
6 years ago
" g_timeseries.data[i].x = times\n",
" g_timeseries.data[i].y = R_high\n",
"\n",
6 years ago
" # Update site id impacts\n",
" observed_regime = impacts['observed'].query(\n",
" \"site_id=='{}'\".format(site_id)).storm_regime.values[0]\n",
6 years ago
" site_id_impacts.value = \"Observed: <b>{}</b><br>\".format(observed_regime)\n",
"\n",
6 years ago
" for forecast in impacts['forecasted']:\n",
" regime = impacts['forecasted'][forecast].query(\n",
" \"site_id=='{}'\".format(site_id)).storm_regime.values[0]\n",
6 years ago
" site_id_impacts.value += '{}: <b>{}</b><br>'.format(forecast, regime)\n",
"\n",
"\n",
"site_id_select.observe(update_profile, names=\"value\")\n",
"\n",
"\n",
"def update_filter(change):\n",
"\n",
" # Iterate through each box, only keeping site_ids which are not filtered out by each box\n",
" valid_site_ids = impacts['observed'].index.tolist()\n",
" dfs = [impacts['observed']\n",
" ] + [impacts['forecasted'][key] for key in impacts['forecasted']]\n",
"\n",
" for box, df in zip(selectboxes, dfs):\n",
" valid_site_ids = list(\n",
" set(valid_site_ids).intersection(\n",
" set(df[df.storm_regime.isin(box.value)].index.tolist())))\n",
" site_id_select.options = sorted(valid_site_ids)\n",
"\n",
" # TODO Update options in selectboxes with number of observations?\n",
"\n",
"\n",
"# Update the filter if any of the boxes changes\n",
"for box in selectboxes:\n",
" box.observe(update_filter, names=\"value\")\n",
"\n",
6 years ago
"# Display our widgets!\n",
"widgets.VBox([\n",
" filter_container, site_id_container,\n",
" widgets.HBox([g_profiles, g_map]), g_timeseries, tables_container\n",
"])"
6 years ago
]
},
6 years ago
{
"cell_type": "code",
6 years ago
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_sites.site_no.to_csv('temp.csv')"
]
6 years ago
},
6 years ago
{
"cell_type": "markdown",
6 years ago
"metadata": {
6 years ago
"hide_input": true
6 years ago
},
6 years ago
"source": [
6 years ago
"## Confusion matrix\n",
"This visualization looks at how well the storm impact predictions performed. "
]
},
{
"cell_type": "code",
6 years ago
"execution_count": null,
"metadata": {
"code_folding": [],
6 years ago
"hide_input": false
},
6 years ago
"outputs": [],
"source": [
"# Create colorscale\n",
"rdylgr_cmap = matplotlib.cm.get_cmap('RdYlGn')\n",
"\n",
"norm = matplotlib.colors.Normalize(vmin=0, vmax=255)\n",
"\n",
"def matplotlib_to_plotly(cmap, pl_entries):\n",
" h = 1.0/(pl_entries-1)\n",
" pl_colorscale = []\n",
"\n",
" for k in range(pl_entries):\n",
" C = list(map(np.uint8, np.array(cmap(k*h)[:3])*255))\n",
" pl_colorscale.append([k*h, 'rgb'+str((C[0], C[1], C[2]))])\n",
"\n",
" return pl_colorscale\n",
"\n",
"rdylgr = matplotlib_to_plotly(rdylgr_cmap, 255)\n",
"\n",
"\n",
"\n",
"# Create widget for list of beaches.\n",
"beaches = df_sites.beach.unique().tolist()\n",
"\n",
"beach_title = widgets.HTML(value=\"<b>Filter by beach:</b>\", )\n",
"\n",
"beach_select = widgets.SelectMultiple(\n",
" options=beaches, value=beaches, disabled=False)\n",
"\n",
"beach_container = widgets.VBox([beach_title, beach_select])\n",
"\n",
"# Create confusion matrix for each forecasted impact data set\n",
"heatmaps = []\n",
"for forecast in impacts['forecasted']:\n",
"\n",
" z = [[.1, .3, .5, 2], [1.0, .8, .6, 1], [1.4, .28, 1.6, .21],\n",
" [.6, .4, .2, 3]]\n",
"\n",
" x = ['swash', 'collision', 'overwash', 'inundation']\n",
" y = list(reversed(x))\n",
"\n",
" z_text = z\n",
"\n",
" fig = ff.create_annotated_heatmap(z, x=x, y=y, annotation_text=z_text, colorscale=rdylgr)\n",
" heatmap = go.FigureWidget(data=fig.data, layout=fig.layout)\n",
"\n",
" heatmap.layout.update(\n",
6 years ago
" height=300, margin=go.layout.Margin(l=100, r=100, b=40, t=40, pad=0))\n",
" heatmap.layout.xaxis.update(title='Predicted')\n",
" heatmap.layout.yaxis.update(title='Observed')\n",
" heatmap_title = widgets.HTML(value=\"<b>{}</b>\".format(forecast) )\n",
" heatmaps.append(widgets.VBox([heatmap_title, heatmap]))\n",
"\n",
" \n",
"def update_heatmaps(change):\n",
" \n",
" for forecast, heatmap in zip(impacts['forecasted'], heatmaps):\n",
" selected_site_ids = df_sites[df_sites.beach.isin(beach_select.value)].index.tolist()\n",
"\n",
" df_ob = impacts['observed']\n",
" df_fo = impacts['forecasted'][forecast]\n",
"\n",
" observed_regimes = df_ob[df_ob.index.isin(selected_site_ids)].storm_regime.dropna().rename(\"observed_regime\")\n",
" forecasted_regimes = df_fo[df_fo.index.isin(selected_site_ids)].storm_regime.dropna().rename(\"forecasted_regime\")\n",
"\n",
" if any([observed_regimes.empty, forecasted_regimes.empty]):\n",
" return\n",
" \n",
" df_compare = pd.concat([observed_regimes, forecasted_regimes], axis='columns', names=['a','b'], sort=True)\n",
" df_compare.dropna(axis='index',inplace=True)\n",
"\n",
" z = confusion_matrix(df_compare.observed_regime.tolist(), df_compare.forecasted_regime.tolist(), labels = ['swash','collision','overwash','inundation'])\n",
" z = np.flip(z,axis=0)\n",
" z_list = list(reversed(z.tolist()))\n",
" \n",
" # Make incorrect values negative, so they get assigned a different color.\n",
" # Better for visualization\n",
" z_neg_incorrect = np.flip(np.identity(4),axis=0)\n",
" z_neg_incorrect[z_neg_incorrect==0]= -1\n",
" z_neg_incorrect = (z * z_neg_incorrect).tolist()\n",
" \n",
6 years ago
" # Also want to display percentages\n",
" z_with_pct = []\n",
" for row in z:\n",
" new_row = []\n",
" for val in row:\n",
" new_row.append('{}<br>({}%)'.format(val, np.around(val/np.sum(z)*100,1)))\n",
" z_with_pct.append(new_row)\n",
" \n",
" fig = ff.create_annotated_heatmap(z_neg_incorrect, x=x, y=y, annotation_text=z_with_pct)\n",
" heatmap.children[1].data[0].z = z_neg_incorrect\n",
" heatmap.children[1].layout.annotations = fig.layout.annotations\n",
"\n",
"# Hook changes to beach filter to update confusion heatmaps\n",
"beach_select.observe(update_heatmaps, names=\"value\")\n",
"\n",
"# Display our widgets\n",
6 years ago
"widgets.VBox([beach_container, widgets.VBox(heatmaps)])\n",
"\n"
6 years ago
]
},
{
"cell_type": "code",
6 years ago
"execution_count": null,
6 years ago
"metadata": {},
6 years ago
"outputs": [],
6 years ago
"source": [
6 years ago
"# To output to file\n",
6 years ago
"# fig = heatmaps[1].children[1]\n",
"# img_bytes = pio.write_image(fig, 'fig1.png',format='png', width=600, height=400, scale=5)\n",
6 years ago
"\n",
"# fig = g_profiles\n",
"# img_bytes = pio.write_image(fig, 'fig1.png',format='png', width=600, height=200, scale=5)\n"
]
},
6 years ago
{
"cell_type": "markdown",
6 years ago
"metadata": {},
6 years ago
"source": [
6 years ago
"## Identify sites with no results"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Check forecast TWL\n",
"Most probable explanation for TWL's which are NaN'ed is that the prestorm dune toe are not defined."
6 years ago
]
},
6 years ago
{
"cell_type": "code",
6 years ago
"execution_count": null,
"metadata": {},
"outputs": [],
6 years ago
"source": [
6 years ago
"df_twls = twls['forecasted']['mean_slope_sto06']\n",
"\n",
6 years ago
"slope_mask = df_twls.groupby('site_id').agg({'beta': lambda x: x.isnull().sum() == len(x)}).beta\n",
"print('The following sites have no slope defined in the twl csv file:')\n",
"print(slope_mask.index[slope_mask].tolist())\n",
"print()\n",
"\n",
6 years ago
"R_high_mask = df_twls.groupby('site_id').agg({'R_high': lambda x: x.isnull().sum() == len(x)}).R_high\n",
"print('The following sites have no R_high defined in the twl csv file:')\n",
"print(slope_mask.index[slope_mask].tolist())\n"
6 years ago
]
6 years ago
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
6 years ago
"### Check observed impacts\n",
"Find sites which have no observed impacts. If we do not identify an observed storm regime, the site cannot be included when we're trying to compare predicted and observed impacts."
6 years ago
]
},
{
"cell_type": "code",
6 years ago
"execution_count": null,
"metadata": {},
"outputs": [],
6 years ago
"source": [
"df_impacts = impacts['observed']\n",
"df_no_obs_impacts = df_impacts[df_impacts.storm_regime.isnull()]\n",
"no_obs_impacts_sites = df_no_obs_impacts.index\n",
"\n",
"df_no_obs_impacts\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"It looks like the problem comes from if we cannot identify the prestorm and post storm swash and berm volume changes."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Check pre and post storm profiles\n",
"It looks like, for some reason, there are no post storm profiles (`STUART0011`) for some of the profiles. Not sure if this is a processing error, or it hasn't been included in the dataset."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
6 years ago
"source": []
6 years ago
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Check prestorm dune crests\n",
"If there are no prestorm dune crests defined, we cannot define a mean slope and hence we won't be able to get observed impacts"
]
},
{
"cell_type": "code",
6 years ago
"execution_count": null,
"metadata": {},
"outputs": [],
6 years ago
"source": [
"df_no_crests = df_profile_features_crest_toes.query('profile_type==\"prestorm\" & (dune_crest_x != dune_crest_x)')\n",
6 years ago
"print('{} sites have no dune crests:'.format(len(df_no_crests)))\n",
"\n",
"for site_id in df_no_crests.index.tolist():\n",
" print(site_id)"
]
6 years ago
}
],
"metadata": {
6 years ago
"hide_input": false,
6 years ago
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
6 years ago
},
"toc": {
"base_numbering": 1,
"nav_menu": {
"height": "47px",
"width": "262px"
},
6 years ago
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
6 years ago
"toc_position": {
6 years ago
"height": "656px",
"left": "508px",
"top": "90px",
"width": "218.797px"
6 years ago
},
6 years ago
"toc_section_display": true,
6 years ago
"toc_window_display": true
6 years ago
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}