nsw-2016-storm-impact/notebooks/01_exploration.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data exploration\n",
    "This notebook provides an example how the data has been loaded and accessed for further analysis."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Enable autoreloading of our modules. \n",
    "# Most of the code will be located in the /src/ folder, \n",
    "# and then called from the notebook.\n",
    "%matplotlib inline\n",
    "%reload_ext autoreload\n",
    "%autoreload"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.core.debugger import set_trace\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "\n",
    "import plotly\n",
    "import plotly.graph_objs as go\n",
    "import plotly.plotly as py\n",
    "import plotly.tools as tls\n",
    "import plotly.figure_factory as ff\n",
    "import plotly.io as pio\n",
    "\n",
    "\n",
    "import matplotlib\n",
    "from matplotlib import cm\n",
    "import colorlover as cl\n",
    "\n",
    "from ipywidgets import widgets, Output\n",
    "from IPython.display import display, clear_output, Image, HTML\n",
    "\n",
    "from sklearn.metrics import confusion_matrix"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import data\n",
    "Import our data into pandas Dataframes for the analysis. Data files are `.csv` files which are stored in the `./data/interim/` folder."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pixiedust": {
     "displayParams": {}
    }
   },
   "outputs": [],
   "source": [
    "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
    "    print('Importing {}'.format(csv))\n",
    "    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
    "\n",
    "df_waves = df_from_csv('waves.csv', index_col=[0, 1])\n",
    "df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n",
    "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
    "df_sites = df_from_csv('sites.csv', index_col=[0])\n",
    "df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0])\n",
    "\n",
    "# Note that the forecasted data sets should be in the same order for impacts and twls\n",
    "impacts = {\n",
    "    'forecasted': {\n",
    "        'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),\n",
    "        'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),\n",
    "        },\n",
    "    'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n",
    "    }\n",
    "\n",
    "\n",
    "twls = {\n",
    "    'forecasted': {\n",
    "        'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),\n",
    "        'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),\n",
    "    }\n",
    "}\n",
    "print('Done!')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Profile/timeseries dashboard"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hide_input": true
   },
   "source": [
    "The following interactive data explorer displays information on a per `site_id` basis. It can be used to examine pre/post storm cross-sections, water level time series and observed/predicted storm impacts."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "code_folding": [
     408
    ],
    "hide_input": false
   },
   "outputs": [],
   "source": [
    "# Create widgets for filtering by observed and forecasted impacts\n",
    "filter_title = widgets.HTML(\n",
    "    value=\"<b>Filter by observed and predicted impacts:</b>\", )\n",
    "\n",
    "titles = ['Observed Impacts']\n",
    "selectboxes = [\n",
    "    widgets.SelectMultiple(\n",
    "        options=impacts['observed'].storm_regime.dropna().unique().tolist(),\n",
    "        value=impacts['observed'].storm_regime.dropna().unique().tolist(),\n",
    "        disabled=False)\n",
    "]\n",
    "\n",
    "# Iterate through each of our forecasted impacts\n",
    "for forecast in impacts['forecasted']:\n",
    "    selectboxes.append(\n",
    "        widgets.SelectMultiple(\n",
    "            options=impacts['forecasted'][forecast].storm_regime.dropna().\n",
    "            unique().tolist(),\n",
    "            value=impacts['forecasted'][forecast].storm_regime.dropna().\n",
    "            unique().tolist(),\n",
    "            disabled=False))\n",
    "    titles.append('Forecasted: {}'.format(forecast))\n",
    "\n",
    "titles = [widgets.HTML(value=title) for title in titles]\n",
    "\n",
    "children = widgets.HBox(children=[\n",
    "    widgets.VBox(children=[title, box])\n",
    "    for title, box in zip(titles, selectboxes)\n",
    "])\n",
    "filter_container = widgets.VBox(children=[filter_title, children])\n",
    "\n",
    "# Create widgets for selecting site_id\n",
    "site_id_title = widgets.HTML(value=\"<b>Filter by site_id:</b>\", )\n",
    "\n",
    "site_id_select = widgets.Dropdown(\n",
    "    description='site_id:   ',\n",
    "    value='NARRA0001',\n",
    "        options=df_profiles.index.get_level_values('site_id').unique().\n",
    "        sort_values().tolist(),\n",
    "#     options=df_no_crests.index.tolist(),\n",
    ")\n",
    "\n",
    "site_id_impacts = widgets.HTML(value=\"\", )\n",
    "\n",
    "site_id_container = widgets.HBox(children=[\n",
    "    widgets.VBox(\n",
    "        children=[site_id_title,\n",
    "                  widgets.HBox(children=[site_id_select])]), site_id_impacts\n",
    "])\n",
    "\n",
    "# Build colors for each of our forecasts\n",
    "colors = list(\n",
    "    reversed(cl.scales[str(max(len(impacts['forecasted']),\n",
    "                               3))]['seq']['YlGnBu']))\n",
    "\n",
    "# Add panel for pre/post storm profiles\n",
    "trace1 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Pre Storm Profile',\n",
    "    line=dict(color=('rgb(51,160,44)'), width=2))\n",
    "trace2 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Post Storm Profile',\n",
    "    line=dict(color=('rgb(255,127,0)'), width=2))\n",
    "trace3 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Pre-storm dune crest',\n",
    "    mode='markers',\n",
    "    marker=dict(\n",
    "        color='rgba(255,255,255,0)',\n",
    "        size=10,\n",
    "        line=dict(color='rgba(106,61,154, 1)', width=2)),\n",
    ")\n",
    "trace4 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Pre-storm dune toe',\n",
    "    mode='markers',\n",
    "    marker=dict(\n",
    "        color='rgba(255,255,255,0)',\n",
    "        size=10,\n",
    "        line=dict(color='rgba(202,178,214,1)', width=2)),\n",
    ")\n",
    "\n",
    "trace5 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Post-storm dune crest',\n",
    "    mode='markers',\n",
    "    marker=dict(\n",
    "        color='rgba(255,255,255,0)',\n",
    "        size=10,\n",
    "        line=dict(color='rgba(106,61,154, 1)', width=2),\n",
    "        symbol='square'),\n",
    ")\n",
    "trace6 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Post-storm dune toe',\n",
    "    mode='markers',\n",
    "    marker=dict(\n",
    "        color='rgba(255,255,255,0)',\n",
    "        size=10,\n",
    "        line=dict(color='rgba(202,178,214,1)', width=2),\n",
    "        symbol='square'),\n",
    ")\n",
    "\n",
    "forecast_traces = []\n",
    "for forecast, color in zip(impacts['forecasted'], colors):\n",
    "    forecast_traces.append(\n",
    "        go.Scatter(\n",
    "            x=[0],\n",
    "            y=[0],\n",
    "            name='Peak R_high: {}'.format(forecast),\n",
    "            mode='lines',\n",
    "            line=dict(\n",
    "                color=color,\n",
    "                width=4,\n",
    "            )))\n",
    "\n",
    "layout = go.Layout(\n",
    "    title='Bed Profiles',\n",
    "    height=300,\n",
    "    legend=dict(font={'size': 10}),\n",
    "    margin=dict(t=50, b=50, l=50, r=20),\n",
    "    xaxis=dict(\n",
    "        title='x (m)',\n",
    "        autorange=True,\n",
    "        showgrid=True,\n",
    "        zeroline=True,\n",
    "        showline=True,\n",
    "        range=[0, 200]),\n",
    "    yaxis=dict(\n",
    "        title='z (m)',\n",
    "        autorange=False,\n",
    "        showgrid=True,\n",
    "        zeroline=True,\n",
    "        showline=True,\n",
    "        range=[-1, 20]))\n",
    "\n",
    "g_profiles = go.FigureWidget(\n",
    "    data=[trace1, trace2, trace3, trace4, trace5, trace6] + forecast_traces,\n",
    "    layout=layout)\n",
    "\n",
    "# Add panel for google maps\n",
    "mapbox_access_token = 'pk.eyJ1IjoiY2hyaXNsZWFtYW4iLCJhIjoiY2pvNTY1MzZpMDc2OTN2bmw5MGsycHp5bCJ9.U2dwFg2c7RFjUNSayERUiw'\n",
    "\n",
    "data = [\n",
    "    go.Scattermapbox(\n",
    "        lat=df_sites['lat'],\n",
    "        lon=df_sites['lon'],\n",
    "        mode='markers',\n",
    "        marker=dict(size=10),\n",
    "        text=df_sites.index.get_level_values('site_id'),\n",
    "    ),\n",
    "    go.Scattermapbox(\n",
    "        lat=[0],\n",
    "        lon=[0],\n",
    "        mode='markers',\n",
    "        marker=dict(\n",
    "            size=20,\n",
    "            color='rgb(255, 0, 0)',\n",
    "            opacity=0.5,\n",
    "        ),\n",
    "        text=df_sites.index.get_level_values('site_id'),\n",
    "    ),\n",
    "]\n",
    "\n",
    "layout = go.Layout(\n",
    "    autosize=True,\n",
    "    height=300,\n",
    "    hovermode='closest',\n",
    "    showlegend=False,\n",
    "    margin=dict(t=50, b=50, l=20, r=20),\n",
    "    mapbox=dict(\n",
    "        accesstoken=mapbox_access_token,\n",
    "        bearing=0,\n",
    "        center=dict(lat=-33.7, lon=151.3),\n",
    "        pitch=0,\n",
    "        zoom=12,\n",
    "        style='satellite-streets'),\n",
    ")\n",
    "\n",
    "fig = dict(data=data, layout=layout)\n",
    "g_map = go.FigureWidget(data=data, layout=layout)\n",
    "\n",
    "subplot = tls.make_subplots(3, 1, print_grid=False, shared_xaxes=True)\n",
    "g_timeseries = go.FigureWidget(subplot)\n",
    "\n",
    "# Add trace for Hs0\n",
    "g_timeseries.add_trace(\n",
    "    go.Scatter(\n",
    "        x=[0, 1],\n",
    "        y=[0, 1],\n",
    "        name='Hs0',\n",
    "    ),\n",
    "    row=3,\n",
    "    col=1,\n",
    ")\n",
    "\n",
    "# Add trace for Tp\n",
    "g_timeseries.add_trace(\n",
    "    go.Scatter(\n",
    "        x=[0, 1],\n",
    "        y=[0, 1],\n",
    "        name='Tp',\n",
    "    ),\n",
    "    row=3,\n",
    "    col=1,\n",
    ")\n",
    "\n",
    "# Add water levels\n",
    "g_timeseries.add_trace(\n",
    "    go.Scatter(\n",
    "        x=[0, 3],\n",
    "        y=[0, 3],\n",
    "        name='Dune Crest',\n",
    "        mode='lines',\n",
    "        line=dict(color=('rgb(214, 117, 14)'), width=2, dash='dot')),\n",
    "    row=1,\n",
    "    col=1)\n",
    "\n",
    "g_timeseries.add_trace(\n",
    "    go.Scatter(\n",
    "        x=[0, 3],\n",
    "        y=[0, 3],\n",
    "        name='Dune Toe',\n",
    "        mode='lines',\n",
    "        line=dict(color=('rgb(142, 77, 8)'), width=2, dash='dash')),\n",
    "    row=1,\n",
    "    col=1)\n",
    "\n",
    "g_timeseries.add_trace(\n",
    "    go.Scatter(\n",
    "        x=[0, 3],\n",
    "        y=[0, 3],\n",
    "        name='Tide+Surge WL',\n",
    "        line=dict(color=('rgb(8,51,137)'), width=2, dash='dot')),\n",
    "    row=1,\n",
    "    col=1)\n",
    "\n",
    "for forecast, color in zip(twls['forecasted'], colors):\n",
    "    g_timeseries.add_trace(\n",
    "        go.Scatter(\n",
    "            x=[0],\n",
    "            y=[0],\n",
    "            name='R_high: {}'.format(forecast),\n",
    "            line=dict(color=color, width=3)),\n",
    "        row=1,\n",
    "        col=1)\n",
    "\n",
    "# Add trace for each forecasted beta term\n",
    "for forecast, color in zip(impacts['forecasted'], colors):\n",
    "    g_timeseries.add_trace(\n",
    "        go.Scatter(\n",
    "            x=[0, 1],\n",
    "            y=[0, 1],\n",
    "            name='Beta: {}'.format(forecast),\n",
    "            line=dict(color=color, width=3)),\n",
    "        row=2,\n",
    "        col=1,\n",
    "    )\n",
    "\n",
    "# Create axis for Tp on same plot as Hs\n",
    "g_timeseries['layout']['yaxis4'] = {'overlaying': 'y3', 'side': 'right'}\n",
    "g_timeseries.data[1]['yaxis'] = 'y4'\n",
    "\n",
    "# Add labels to each axis\n",
    "g_timeseries.layout['xaxis']['title'] = 'datetime'\n",
    "g_timeseries.layout['yaxis1']['title'] = 'z (mAHD)'\n",
    "g_timeseries.layout['yaxis2']['title'] = 'beta (-)'\n",
    "g_timeseries.layout['yaxis3']['title'] = 'Hs0 (m)'\n",
    "g_timeseries.layout['yaxis4']['title'] = 'Tp (s)'\n",
    "\n",
    "# Update figure size\n",
    "g_timeseries['layout'].update(height=400, legend=dict(font={'size': 10}))\n",
    "g_timeseries['layout'].update(margin=dict(t=20, l=50, r=20, b=100))\n",
    "\n",
    "# Add panel for some tables\n",
    "titles = ['observed'] + [forecast for forecast in impacts['forecasted']]\n",
    "titles = [widgets.HTML(value=\"{}\".format(title)) for title in titles]\n",
    "\n",
    "\n",
    "def get_observed_impacts_table(site_id):\n",
    "    display(impacts['observed'].query(\"site_id=='{}'\".format(site_id)).T)\n",
    "\n",
    "\n",
    "def get_forecasted_impacts_table(site_id, forecast):\n",
    "    display(impacts['forecasted'][forecast].query(\n",
    "        \"site_id=='{}'\".format(site_id)).T)\n",
    "\n",
    "\n",
    "impacts_table_observed = widgets.interactive_output(\n",
    "    get_observed_impacts_table, {'site_id': site_id_select})\n",
    "forecasted_impacts_tables = []\n",
    "for forecast, title in zip(impacts['forecasted'], titles[1:]):\n",
    "    forecasted_impacts_tables.append(\n",
    "        widgets.interactive_output(get_forecasted_impacts_table, {\n",
    "            'site_id': site_id_select,\n",
    "            'forecast': title\n",
    "        }))\n",
    "\n",
    "tables = [impacts_table_observed] + forecasted_impacts_tables\n",
    "\n",
    "title_tables = [\n",
    "    widgets.VBox(children=[title, table])\n",
    "    for title, table in zip(titles, tables)\n",
    "]\n",
    "\n",
    "tables_container = widgets.HBox(children=[*title_tables])\n",
    "\n",
    "\n",
    "def update_profile(change):\n",
    "\n",
    "    site_id = site_id_select.value\n",
    "\n",
    "    if site_id is None:\n",
    "        return\n",
    "\n",
    "    site_profile = df_profiles.query('site_id == \"{}\"'.format(site_id))\n",
    "    prestorm_profile = site_profile.query('profile_type == \"prestorm\"')\n",
    "    poststorm_profile = site_profile.query('profile_type == \"poststorm\"')\n",
    "\n",
    "    poststorm_x = poststorm_profile.index.get_level_values('x').tolist()\n",
    "    poststorm_z = poststorm_profile.z.tolist()\n",
    "\n",
    "    prestorm_x = prestorm_profile.index.get_level_values('x').tolist()\n",
    "    prestorm_z = prestorm_profile.z.tolist()\n",
    "\n",
    "    prestorm_site_features = df_profile_features_crest_toes.query(\n",
    "        'site_id == \"{}\" and profile_type==\"prestorm\"'.format(site_id))\n",
    "    prestorm_dune_crest_x = prestorm_site_features.dune_crest_x\n",
    "    prestorm_dune_crest_z = prestorm_site_features.dune_crest_z\n",
    "    prestorm_dune_toe_x = prestorm_site_features.dune_toe_x\n",
    "    prestorm_dune_toe_z = prestorm_site_features.dune_toe_z\n",
    "\n",
    "    poststorm_site_features = df_profile_features_crest_toes.query(\n",
    "        'site_id == \"{}\" and profile_type==\"poststorm\"'.format(site_id))\n",
    "    poststorm_dune_crest_x = poststorm_site_features.dune_crest_x\n",
    "    poststorm_dune_crest_z = poststorm_site_features.dune_crest_z\n",
    "    poststorm_dune_toe_x = poststorm_site_features.dune_toe_x\n",
    "    poststorm_dune_toe_z = poststorm_site_features.dune_toe_z\n",
    "\n",
    "    # Update beach profile section plots\n",
    "    with g_profiles.batch_update():\n",
    "        g_profiles.data[0].x = prestorm_x\n",
    "        g_profiles.data[0].y = prestorm_z\n",
    "        g_profiles.data[1].x = poststorm_x\n",
    "        g_profiles.data[1].y = poststorm_z\n",
    "        g_profiles.data[2].x = prestorm_dune_crest_x\n",
    "        g_profiles.data[2].y = prestorm_dune_crest_z\n",
    "        g_profiles.data[3].x = prestorm_dune_toe_x\n",
    "        g_profiles.data[3].y = prestorm_dune_toe_z\n",
    "        g_profiles.data[4].x = poststorm_dune_crest_x\n",
    "        g_profiles.data[4].y = poststorm_dune_crest_z\n",
    "        g_profiles.data[5].x = poststorm_dune_toe_x\n",
    "        g_profiles.data[5].y = poststorm_dune_toe_z\n",
    "\n",
    "        for n, forecast in enumerate(impacts['forecasted']):\n",
    "            R_high = max(impacts['forecasted'][forecast].query(\n",
    "                \"site_id=='{}'\".format(site_id)).R_high)\n",
    "            g_profiles.data[6 + n].x = [200, 400]\n",
    "            g_profiles.data[6 + n].y = [R_high, R_high]\n",
    "\n",
    "    # Relocate plan of satellite imagery\n",
    "    site_coords = df_sites.query('site_id == \"{}\"'.format(site_id))\n",
    "    with g_map.batch_update():\n",
    "        g_map.layout.mapbox['center'] = {\n",
    "            'lat': site_coords['lat'].values[0],\n",
    "            'lon': site_coords['lon'].values[0]\n",
    "        }\n",
    "        g_map.layout.mapbox['zoom'] = 15\n",
    "        g_map.data[1].lat = [site_coords['lat'].values[0]]\n",
    "        g_map.data[1].lon = [site_coords['lon'].values[0]]\n",
    "        g_map.data[1].text = site_coords['lon'].index.get_level_values(\n",
    "            'site_id').tolist()\n",
    "\n",
    "    # Update time series plots\n",
    "    df_waves_site = df_waves.query(\"site_id=='{}'\".format(site_id))\n",
    "    times = df_waves_site.index.get_level_values('datetime').tolist()\n",
    "    Hs0s = df_waves_site.Hs0.tolist()\n",
    "    Tps = df_waves_site.Tp.tolist()\n",
    "\n",
    "    df_tide_site = df_tides.query(\"site_id=='{}'\".format(site_id))\n",
    "    mask = (df_tide_site.index.get_level_values('datetime') >= min(times)) & (\n",
    "        df_tide_site.index.get_level_values('datetime') <= max(times))\n",
    "    df_tide_site = df_tide_site.loc[mask]\n",
    "\n",
    "    with g_timeseries.batch_update():\n",
    "        g_timeseries.data[0].x = times\n",
    "        g_timeseries.data[0].y = Hs0s\n",
    "        g_timeseries.data[1].x = times\n",
    "        g_timeseries.data[1].y = Tps\n",
    "\n",
    "        # Update beta values\n",
    "        idx_betas = [\n",
    "            n for n, x in enumerate(g_timeseries.data) if 'Beta' in x.name\n",
    "        ]\n",
    "        for i, forecast in zip(idx_betas, twls['forecasted']):\n",
    "            df_twl = twls['forecasted'][forecast].query(\n",
    "                \"site_id=='{}'\".format(site_id))\n",
    "            times = df_twl.index.get_level_values('datetime').tolist()\n",
    "            beta = df_twl.beta.tolist()\n",
    "            g_timeseries.data[i].x = times\n",
    "            g_timeseries.data[i].y = beta\n",
    "\n",
    "        g_timeseries.data[2].x = [min(times), max(times)]\n",
    "        g_timeseries.data[3].x = [min(times), max(times)]\n",
    "        g_timeseries.data[4].x = df_tide_site.index.get_level_values(\n",
    "            'datetime')\n",
    "        g_timeseries.data[2].y = prestorm_dune_crest_z.tolist(\n",
    "        )[0], prestorm_dune_crest_z.tolist()[0],\n",
    "        g_timeseries.data[3].y = prestorm_dune_toe_z.tolist(\n",
    "        )[0], prestorm_dune_toe_z.tolist()[0],\n",
    "        g_timeseries.data[4].y = df_tide_site.tide.tolist()\n",
    "\n",
    "        # Update rhigh values\n",
    "        idx_betas = [\n",
    "            n for n, x in enumerate(g_timeseries.data) if 'R_high' in x.name\n",
    "        ]\n",
    "        for i, forecast in zip(idx_betas, twls['forecasted']):\n",
    "            df_twl = twls['forecasted'][forecast].query(\n",
    "                \"site_id=='{}'\".format(site_id))\n",
    "            times = df_twl.index.get_level_values('datetime').tolist()\n",
    "            R_high = df_twl.R_high.tolist()\n",
    "            g_timeseries.data[i].x = times\n",
    "            g_timeseries.data[i].y = R_high\n",
    "\n",
    "    # Update site id impacts\n",
    "    observed_regime = impacts['observed'].query(\n",
    "        \"site_id=='{}'\".format(site_id)).storm_regime.values[0]\n",
    "    site_id_impacts.value = \"Observed: <b>{}</b><br>\".format(observed_regime)\n",
    "\n",
    "    for forecast in impacts['forecasted']:\n",
    "        regime = impacts['forecasted'][forecast].query(\n",
    "            \"site_id=='{}'\".format(site_id)).storm_regime.values[0]\n",
    "        site_id_impacts.value += '{}: <b>{}</b><br>'.format(forecast, regime)\n",
    "\n",
    "\n",
    "site_id_select.observe(update_profile, names=\"value\")\n",
    "\n",
    "\n",
    "def update_filter(change):\n",
    "\n",
    "    # Iterate through each box, only keeping site_ids which are not filtered out by each box\n",
    "    valid_site_ids = impacts['observed'].index.tolist()\n",
    "    dfs = [impacts['observed']\n",
    "           ] + [impacts['forecasted'][key] for key in impacts['forecasted']]\n",
    "\n",
    "    for box, df in zip(selectboxes, dfs):\n",
    "        valid_site_ids = list(\n",
    "            set(valid_site_ids).intersection(\n",
    "                set(df[df.storm_regime.isin(box.value)].index.tolist())))\n",
    "    site_id_select.options = sorted(valid_site_ids)\n",
    "\n",
    "    # TODO Update options in selectboxes with number of observations?\n",
    "\n",
    "\n",
    "# Update the filter if any of the boxes changes\n",
    "for box in selectboxes:\n",
    "    box.observe(update_filter, names=\"value\")\n",
    "\n",
    "# Display our widgets!\n",
    "widgets.VBox([\n",
    "    filter_container, site_id_container,\n",
    "    widgets.HBox([g_profiles, g_map]), g_timeseries, tables_container\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_sites.site_no.to_csv('temp.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hide_input": true
   },
   "source": [
    "## Confusion matrix\n",
    "This visualization looks at how well the storm impact predictions performed. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "code_folding": [],
    "hide_input": false
   },
   "outputs": [],
   "source": [
    "# Create colorscale\n",
    "rdylgr_cmap = matplotlib.cm.get_cmap('RdYlGn')\n",
    "\n",
    "norm = matplotlib.colors.Normalize(vmin=0, vmax=255)\n",
    "\n",
    "def matplotlib_to_plotly(cmap, pl_entries):\n",
    "    h = 1.0/(pl_entries-1)\n",
    "    pl_colorscale = []\n",
    "\n",
    "    for k in range(pl_entries):\n",
    "        C = list(map(np.uint8, np.array(cmap(k*h)[:3])*255))\n",
    "        pl_colorscale.append([k*h, 'rgb'+str((C[0], C[1], C[2]))])\n",
    "\n",
    "    return pl_colorscale\n",
    "\n",
    "rdylgr = matplotlib_to_plotly(rdylgr_cmap, 255)\n",
    "\n",
    "\n",
    "\n",
    "# Create widget for list of beaches.\n",
    "beaches = df_sites.beach.unique().tolist()\n",
    "\n",
    "beach_title = widgets.HTML(value=\"<b>Filter by beach:</b>\", )\n",
    "\n",
    "beach_select = widgets.SelectMultiple(\n",
    "    options=beaches, value=beaches, disabled=False)\n",
    "\n",
    "beach_container = widgets.VBox([beach_title, beach_select])\n",
    "\n",
    "# Create confusion matrix for each forecasted impact data set\n",
    "heatmaps = []\n",
    "for forecast in impacts['forecasted']:\n",
    "\n",
    "    z = [[.1, .3, .5, 2], [1.0, .8, .6, 1], [1.4, .28, 1.6, .21],\n",
    "         [.6, .4, .2, 3]]\n",
    "\n",
    "    x = ['swash', 'collision', 'overwash', 'inundation']\n",
    "    y = list(reversed(x))\n",
    "\n",
    "    z_text = z\n",
    "\n",
    "    fig = ff.create_annotated_heatmap(z, x=x, y=y, annotation_text=z_text, colorscale=rdylgr)\n",
    "    heatmap = go.FigureWidget(data=fig.data, layout=fig.layout)\n",
    "\n",
    "    heatmap.layout.update(\n",
    "        height=300, margin=go.layout.Margin(l=100, r=100, b=40, t=40, pad=0))\n",
    "    heatmap.layout.xaxis.update(title='Predicted')\n",
    "    heatmap.layout.yaxis.update(title='Observed')\n",
    "    heatmap_title = widgets.HTML(value=\"<b>{}</b>\".format(forecast) )\n",
    "    heatmaps.append(widgets.VBox([heatmap_title, heatmap]))\n",
    "\n",
    "    \n",
    "def update_heatmaps(change):\n",
    "    \n",
    "    for forecast, heatmap in zip(impacts['forecasted'], heatmaps):\n",
    "        selected_site_ids = df_sites[df_sites.beach.isin(beach_select.value)].index.tolist()\n",
    "\n",
    "        df_ob = impacts['observed']\n",
    "        df_fo = impacts['forecasted'][forecast]\n",
    "\n",
    "        observed_regimes = df_ob[df_ob.index.isin(selected_site_ids)].storm_regime.dropna().rename(\"observed_regime\")\n",
    "        forecasted_regimes = df_fo[df_fo.index.isin(selected_site_ids)].storm_regime.dropna().rename(\"forecasted_regime\")\n",
    "\n",
    "        if any([observed_regimes.empty, forecasted_regimes.empty]):\n",
    "            return\n",
    "        \n",
    "        df_compare = pd.concat([observed_regimes, forecasted_regimes], axis='columns', names=['a','b'], sort=True)\n",
    "        df_compare.dropna(axis='index',inplace=True)\n",
    "\n",
    "        z = confusion_matrix(df_compare.observed_regime.tolist(), df_compare.forecasted_regime.tolist(), labels = ['swash','collision','overwash','inundation'])\n",
    "        z = np.flip(z,axis=0)\n",
    "        z_list = list(reversed(z.tolist()))\n",
    "        \n",
    "        # Make incorrect values negative, so they get assigned a different color.\n",
    "        # Better for visualization\n",
    "        z_neg_incorrect = np.flip(np.identity(4),axis=0)\n",
    "        z_neg_incorrect[z_neg_incorrect==0]= -1\n",
    "        z_neg_incorrect = (z * z_neg_incorrect).tolist()\n",
    "        \n",
    "        # Also want to display percentages\n",
    "        z_with_pct = []\n",
    "        for row in z:\n",
    "            new_row = []\n",
    "            for val in row:\n",
    "                new_row.append('{}<br>({}%)'.format(val, np.around(val/np.sum(z)*100,1)))\n",
    "            z_with_pct.append(new_row)\n",
    "        \n",
    "        fig = ff.create_annotated_heatmap(z_neg_incorrect, x=x, y=y, annotation_text=z_with_pct)\n",
    "        heatmap.children[1].data[0].z = z_neg_incorrect\n",
    "        heatmap.children[1].layout.annotations = fig.layout.annotations\n",
    "\n",
    "# Hook changes to beach filter to update confusion heatmaps\n",
    "beach_select.observe(update_heatmaps, names=\"value\")\n",
    "\n",
    "# Display our widgets\n",
    "widgets.VBox([beach_container, widgets.VBox(heatmaps)])\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# To output to file\n",
    "# fig = heatmaps[1].children[1]\n",
    "# img_bytes = pio.write_image(fig, 'fig1.png',format='png', width=600, height=400, scale=5)\n",
    "\n",
    "# fig = g_profiles\n",
    "# img_bytes = pio.write_image(fig, 'fig1.png',format='png', width=600, height=200, scale=5)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Identify sites with no results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Check forecast TWL\n",
    "Most probable explanation for TWL's which are NaN'ed is that the prestorm dune toe are not defined."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_twls = twls['forecasted']['mean_slope_sto06']\n",
    "\n",
    "slope_mask = df_twls.groupby('site_id').agg({'beta': lambda x: x.isnull().sum() == len(x)}).beta\n",
    "print('The following sites have no slope defined in the twl csv file:')\n",
    "print(slope_mask.index[slope_mask].tolist())\n",
    "print()\n",
    "\n",
    "R_high_mask = df_twls.groupby('site_id').agg({'R_high': lambda x: x.isnull().sum() == len(x)}).R_high\n",
    "print('The following sites have no R_high defined in the twl csv file:')\n",
    "print(slope_mask.index[slope_mask].tolist())\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Check observed impacts\n",
    "Find sites which have no observed impacts. If we do not identify an observed storm regime, the site cannot be included when we're trying to compare predicted and observed impacts."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_impacts = impacts['observed']\n",
    "df_no_obs_impacts = df_impacts[df_impacts.storm_regime.isnull()]\n",
    "no_obs_impacts_sites = df_no_obs_impacts.index\n",
    "\n",
    "df_no_obs_impacts\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It looks like the problem comes from if we cannot identify the prestorm and post storm swash and berm volume changes."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Check pre and post storm profiles\n",
    "It looks like, for some reason, there are no post storm profiles (`STUART0011`) for some of the profiles. Not sure if this is a processing error, or it hasn't been included in the dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Check prestorm dune crests\n",
    "If there are no prestorm dune crests defined, we cannot define a mean slope and hence we won't be able to get observed impacts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_no_crests = df_profile_features_crest_toes.query('profile_type==\"prestorm\" & (dune_crest_x != dune_crest_x)')\n",
    "print('{} sites have no dune crests:'.format(len(df_no_crests)))\n",
    "\n",
    "for site_id in df_no_crests.index.tolist():\n",
    "    print(site_id)"
   ]
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {
    "height": "47px",
    "width": "262px"
   },
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "656px",
    "left": "508px",
    "top": "90px",
    "width": "218.797px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}