nsw-2016-storm-impact/notebooks/01_exploration.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data exploration\n",
    "This notebook provides an example how the data has been loaded and accessed for further analysis."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-03T23:02:22.927101Z",
     "start_time": "2018-12-03T23:02:22.612233Z"
    }
   },
   "outputs": [],
   "source": [
    "# Enable autoreloading of our modules. \n",
    "# Most of the code will be located in the /src/ folder, \n",
    "# and then called from the notebook.\n",
    "%matplotlib inline\n",
    "%reload_ext autoreload\n",
    "%autoreload"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-03T23:02:24.527369Z",
     "start_time": "2018-12-03T23:02:22.929088Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from IPython.core.debugger import set_trace\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "\n",
    "import plotly\n",
    "import plotly.graph_objs as go\n",
    "import plotly.plotly as py\n",
    "import plotly.tools as tls\n",
    "import plotly.figure_factory as ff\n",
    "import plotly.io as pio\n",
    "\n",
    "\n",
    "import matplotlib\n",
    "from matplotlib import cm\n",
    "import colorlover as cl\n",
    "\n",
    "from ipywidgets import widgets, Output\n",
    "from IPython.display import display, clear_output, Image, HTML\n",
    "\n",
    "from sklearn.metrics import confusion_matrix"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Import our data into pandas Dataframes for the analysis. Data files are `.csv` files which are stored in the `./data/interim/` folder."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-03T23:02:39.868010Z",
     "start_time": "2018-12-03T23:02:24.529339Z"
    },
    "pixiedust": {
     "displayParams": {}
    },
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Importing waves.csv\n",
      "Importing tides.csv\n",
      "Importing profiles.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\z5189959\\Desktop\\nsw-2016-storm-impact\\.venv\\lib\\site-packages\\numpy\\lib\\arraysetops.py:522: FutureWarning:\n",
      "\n",
      "elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Importing sites.csv\n",
      "Importing profile_features.csv\n",
      "Importing impacts_forecasted_foreshore_slope_sto06.csv\n",
      "Importing impacts_forecasted_mean_slope_sto06.csv\n",
      "Importing impacts_observed.csv\n",
      "Importing twl_foreshore_slope_sto06.csv\n",
      "Importing twl_mean_slope_sto06.csv\n",
      "Done!\n"
     ]
    }
   ],
   "source": [
    "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n",
    "    print('Importing {}'.format(csv))\n",
    "    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n",
    "\n",
    "df_waves = df_from_csv('waves.csv', index_col=[0, 1])\n",
    "df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n",
    "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n",
    "df_sites = df_from_csv('sites.csv', index_col=[0])\n",
    "df_profile_features = df_from_csv('profile_features.csv', index_col=[0])\n",
    "\n",
    "# Note that the forecasted data sets should be in the same order for impacts and twls\n",
    "impacts = {\n",
    "    'forecasted': {\n",
    "        'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),\n",
    "        'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),\n",
    "        },\n",
    "    'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n",
    "    }\n",
    "\n",
    "\n",
    "twls = {\n",
    "    'forecasted': {\n",
    "        'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),\n",
    "        'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),\n",
    "    }\n",
    "}\n",
    "print('Done!')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-27T23:02:57.631306Z",
     "start_time": "2018-11-27T23:02:57.615263Z"
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hide_input": true
   },
   "source": [
    "The following interactive data explorer displays information on a per `site_id` basis. It can be used to examine pre/post storm cross-sections, water level time series and observed/predicted storm impacts."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-05T03:57:14.533063Z",
     "start_time": "2018-12-05T03:57:13.745017Z"
    },
    "code_folding": [
     408
    ],
    "hide_input": false,
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "eae0e9440a5f45599b2c9b43352d3d13",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "VBox(children=(VBox(children=(HTML(value='<b>Filter by observed and predicted impacts:</b>'), HBox(children=(V…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create widgets for filtering by observed and forecasted impacts\n",
    "filter_title = widgets.HTML(\n",
    "    value=\"<b>Filter by observed and predicted impacts:</b>\", )\n",
    "\n",
    "titles = ['Observed Impacts']\n",
    "selectboxes = [\n",
    "    widgets.SelectMultiple(\n",
    "        options=impacts['observed'].storm_regime.dropna().unique().tolist(),\n",
    "        value=impacts['observed'].storm_regime.dropna().unique().tolist(),\n",
    "        disabled=False)\n",
    "]\n",
    "\n",
    "# Iterate through each of our forecasted impacts\n",
    "for forecast in impacts['forecasted']:\n",
    "    selectboxes.append(\n",
    "        widgets.SelectMultiple(\n",
    "            options=impacts['forecasted'][\n",
    "                forecast].storm_regime.dropna().unique().tolist(),\n",
    "            value=impacts['forecasted'][forecast].storm_regime.dropna()\n",
    "            .unique().tolist(),\n",
    "            disabled=False))\n",
    "    titles.append('Forecasted: {}'.format(forecast))\n",
    "\n",
    "titles = [widgets.HTML(value=title) for title in titles]\n",
    "\n",
    "children = widgets.HBox(children=[\n",
    "    widgets.VBox(children=[title, box])\n",
    "    for title, box in zip(titles, selectboxes)\n",
    "])\n",
    "filter_container = widgets.VBox(children=[filter_title, children])\n",
    "\n",
    "# Create widgets for selecting site_id\n",
    "site_id_title = widgets.HTML(value=\"<b>Filter by site_id:</b>\", )\n",
    "\n",
    "site_id_select = widgets.Dropdown(\n",
    "    description='site_id:   ',\n",
    "    value='NARRA0001',\n",
    "    options=df_profiles.index.get_level_values('site_id').unique()\n",
    "    .sort_values().tolist())\n",
    "\n",
    "site_id_impacts = widgets.HTML(value=\"\", )\n",
    "\n",
    "site_id_container = widgets.HBox(children=[\n",
    "    widgets.VBox(\n",
    "        children=[site_id_title,\n",
    "                  widgets.HBox(children=[site_id_select])]), site_id_impacts\n",
    "])\n",
    "\n",
    "# Build colors for each of our forecasts\n",
    "colors = list(\n",
    "    reversed(cl.scales[str(max(len(impacts['forecasted']),\n",
    "                               3))]['seq']['YlGnBu']))\n",
    "\n",
    "# Add panel for pre/post storm profiles\n",
    "trace1 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Pre Storm Profile',\n",
    "    line=dict(color=('rgb(51,160,44)'), width=2))\n",
    "trace2 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Post Storm Profile',\n",
    "    line=dict(color=('rgb(255,127,0)'), width=2))\n",
    "trace3 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Pre-storm dune crest',\n",
    "    mode='markers',\n",
    "    marker=dict(\n",
    "        color='rgba(255,255,255,0)',\n",
    "        size=10,\n",
    "        line=dict(color='rgba(106,61,154, 1)', width=2)),\n",
    ")\n",
    "trace4 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Pre-storm dune toe',\n",
    "    mode='markers',\n",
    "    marker=dict(\n",
    "        color='rgba(255,255,255,0)',\n",
    "        size=10,\n",
    "        line=dict(color='rgba(202,178,214,1)', width=2)),\n",
    ")\n",
    "\n",
    "trace5 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Post-storm dune crest',\n",
    "    mode='markers',\n",
    "    marker=dict(\n",
    "        color='rgba(255,255,255,0)',\n",
    "        size=10,\n",
    "        line=dict(color='rgba(106,61,154, 1)', width=2),\n",
    "        symbol='square'),\n",
    ")\n",
    "trace6 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Post-storm dune toe',\n",
    "    mode='markers',\n",
    "    marker=dict(\n",
    "        color='rgba(255,255,255,0)',\n",
    "        size=10,\n",
    "        line=dict(color='rgba(202,178,214,1)', width=2),\n",
    "        symbol='square'),\n",
    ")\n",
    "\n",
    "\n",
    "forecast_traces = []\n",
    "for forecast, color in zip(impacts['forecasted'], colors):\n",
    "    forecast_traces.append(\n",
    "        go.Scatter(\n",
    "            x=[0],\n",
    "            y=[0],\n",
    "            name='Peak R_high: {}'.format(forecast),\n",
    "            mode='lines',\n",
    "            line=dict(\n",
    "                color=color,\n",
    "                width=4,\n",
    "            )))\n",
    "\n",
    "layout = go.Layout(\n",
    "    title='Bed Profiles',\n",
    "    height=300,\n",
    "    legend=dict(font={'size': 10}),\n",
    "    margin=dict(t=50, b=50, l=50, r=20),\n",
    "    xaxis=dict(\n",
    "        title='x (m)',\n",
    "        autorange=True,\n",
    "        showgrid=True,\n",
    "        zeroline=True,\n",
    "        showline=True,\n",
    "        range=[0, 200]),\n",
    "    yaxis=dict(\n",
    "        title='z (m)',\n",
    "        autorange=False,\n",
    "        showgrid=True,\n",
    "        zeroline=True,\n",
    "        showline=True,\n",
    "        range=[-1, 20]))\n",
    "\n",
    "g_profiles = go.FigureWidget(\n",
    "    data=[trace1, trace2, trace3, trace4, trace5, trace6] + forecast_traces, layout=layout)\n",
    "\n",
    "# Add panel for google maps\n",
    "mapbox_access_token = 'pk.eyJ1IjoiY2hyaXNsZWFtYW4iLCJhIjoiY2pvNTY1MzZpMDc2OTN2bmw5MGsycHp5bCJ9.U2dwFg2c7RFjUNSayERUiw'\n",
    "\n",
    "data = [\n",
    "    go.Scattermapbox(\n",
    "        lat=df_sites['lat'],\n",
    "        lon=df_sites['lon'],\n",
    "        mode='markers',\n",
    "        marker=dict(size=10),\n",
    "        text=df_sites.index.get_level_values('site_id'),\n",
    "    ),\n",
    "    go.Scattermapbox(\n",
    "        lat=[0],\n",
    "        lon=[0],\n",
    "        mode='markers',\n",
    "        marker=dict(\n",
    "            size=20,\n",
    "            color='rgb(255, 0, 0)',\n",
    "            opacity=0.5,\n",
    "        ),\n",
    "        text=df_sites.index.get_level_values('site_id'),\n",
    "    ),\n",
    "]\n",
    "\n",
    "layout = go.Layout(\n",
    "    autosize=True,\n",
    "    height=300,\n",
    "    hovermode='closest',\n",
    "    showlegend=False,\n",
    "    margin=dict(t=50, b=50, l=20, r=20),\n",
    "    mapbox=dict(\n",
    "        accesstoken=mapbox_access_token,\n",
    "        bearing=0,\n",
    "        center=dict(lat=-33.7, lon=151.3),\n",
    "        pitch=0,\n",
    "        zoom=12,\n",
    "        style='satellite-streets'),\n",
    ")\n",
    "\n",
    "fig = dict(data=data, layout=layout)\n",
    "g_map = go.FigureWidget(data=data, layout=layout)\n",
    "\n",
    "subplot = tls.make_subplots(3, 1, print_grid=False, shared_xaxes=True)\n",
    "g_timeseries = go.FigureWidget(subplot)\n",
    "\n",
    "# Add trace for Hs0\n",
    "g_timeseries.add_trace(\n",
    "    go.Scatter(\n",
    "        x=[0, 1],\n",
    "        y=[0, 1],\n",
    "        name='Hs0',\n",
    "    ),\n",
    "    row=3,\n",
    "    col=1,\n",
    ")\n",
    "\n",
    "# Add trace for Tp\n",
    "g_timeseries.add_trace(\n",
    "    go.Scatter(\n",
    "        x=[0, 1],\n",
    "        y=[0, 1],\n",
    "        name='Tp',\n",
    "    ),\n",
    "    row=3,\n",
    "    col=1,\n",
    ")\n",
    "\n",
    "# Add water levels\n",
    "g_timeseries.add_trace(\n",
    "    go.Scatter(\n",
    "        x=[0, 3],\n",
    "        y=[0, 3],\n",
    "        name='Dune Crest',\n",
    "        mode='lines',\n",
    "        line=dict(color=('rgb(214, 117, 14)'), width=2, dash='dot')),\n",
    "    row=1,\n",
    "    col=1)\n",
    "\n",
    "g_timeseries.add_trace(\n",
    "    go.Scatter(\n",
    "        x=[0, 3],\n",
    "        y=[0, 3],\n",
    "        name='Dune Toe',\n",
    "        mode='lines',\n",
    "        line=dict(color=('rgb(142, 77, 8)'), width=2, dash='dash')),\n",
    "    row=1,\n",
    "    col=1)\n",
    "\n",
    "g_timeseries.add_trace(\n",
    "    go.Scatter(\n",
    "        x=[0, 3],\n",
    "        y=[0, 3],\n",
    "        name='Tide+Surge WL',\n",
    "        line=dict(color=('rgb(8,51,137)'), width=2, dash='dot')),\n",
    "    row=1,\n",
    "    col=1)\n",
    "\n",
    "for forecast, color in zip(twls['forecasted'], colors):\n",
    "    g_timeseries.add_trace(\n",
    "        go.Scatter(\n",
    "            x=[0],\n",
    "            y=[0],\n",
    "            name='R_high: {}'.format(forecast),\n",
    "            line=dict(color=color, width=3)),\n",
    "        row=1,\n",
    "        col=1)\n",
    "\n",
    "# Add trace for each forecasted beta term\n",
    "for forecast, color in zip(impacts['forecasted'], colors):\n",
    "    g_timeseries.add_trace(\n",
    "        go.Scatter(\n",
    "            x=[0, 1],\n",
    "            y=[0, 1],\n",
    "            name='Beta: {}'.format(forecast),\n",
    "            line=dict(color=color, width=3)),\n",
    "        row=2,\n",
    "        col=1,\n",
    "    )\n",
    "\n",
    "# Create axis for Tp on same plot as Hs\n",
    "g_timeseries['layout']['yaxis4'] = {'overlaying': 'y3', 'side': 'right'}\n",
    "g_timeseries.data[1]['yaxis'] = 'y4'\n",
    "\n",
    "# Add labels to each axis\n",
    "g_timeseries.layout['xaxis']['title'] = 'datetime'\n",
    "g_timeseries.layout['yaxis1']['title'] = 'z (mAHD)'\n",
    "g_timeseries.layout['yaxis2']['title'] = 'beta (-)'\n",
    "g_timeseries.layout['yaxis3']['title'] = 'Hs0 (m)'\n",
    "g_timeseries.layout['yaxis4']['title'] = 'Tp (s)'\n",
    "\n",
    "# Update figure size\n",
    "g_timeseries['layout'].update(height=400, legend=dict(font={'size': 10}))\n",
    "g_timeseries['layout'].update(margin=dict(t=20, l=50, r=20, b=100))\n",
    "\n",
    "# Add panel for some tables\n",
    "titles = ['observed'] + [forecast for forecast in impacts['forecasted']]\n",
    "titles = [widgets.HTML(value=\"{}\".format(title)) for title in titles]\n",
    "\n",
    "\n",
    "def get_observed_impacts_table(site_id):\n",
    "    display(impacts['observed'].query(\"site_id=='{}'\".format(site_id)).T)\n",
    "\n",
    "\n",
    "def get_forecasted_impacts_table(site_id, forecast):\n",
    "    display(impacts['forecasted'][forecast].query(\n",
    "        \"site_id=='{}'\".format(site_id)).T)\n",
    "\n",
    "\n",
    "impacts_table_observed = widgets.interactive_output(\n",
    "    get_observed_impacts_table, {'site_id': site_id_select})\n",
    "forecasted_impacts_tables = []\n",
    "for forecast, title in zip(impacts['forecasted'], titles[1:]):\n",
    "    forecasted_impacts_tables.append(\n",
    "        widgets.interactive_output(get_forecasted_impacts_table, {\n",
    "            'site_id': site_id_select,\n",
    "            'forecast': title\n",
    "        }))\n",
    "\n",
    "tables = [impacts_table_observed] + forecasted_impacts_tables\n",
    "\n",
    "title_tables = [\n",
    "    widgets.VBox(children=[title, table])\n",
    "    for title, table in zip(titles, tables)\n",
    "]\n",
    "\n",
    "tables_container = widgets.HBox(children=[*title_tables])\n",
    "\n",
    "\n",
    "def update_profile(change):\n",
    "\n",
    "    site_id = site_id_select.value\n",
    "\n",
    "    if site_id is None:\n",
    "        return\n",
    "\n",
    "    site_profile = df_profiles.query('site_id == \"{}\"'.format(site_id))\n",
    "    prestorm_profile = site_profile.query('profile_type == \"prestorm\"')\n",
    "    poststorm_profile = site_profile.query('profile_type == \"poststorm\"')\n",
    "\n",
    "    poststorm_x = poststorm_profile.index.get_level_values('x').tolist()\n",
    "    poststorm_z = poststorm_profile.z.tolist()\n",
    "\n",
    "    prestorm_x = prestorm_profile.index.get_level_values('x').tolist()\n",
    "    prestorm_z = prestorm_profile.z.tolist()\n",
    "\n",
    "    prestorm_site_features = df_profile_features.query(\n",
    "        'site_id == \"{}\" and profile_type==\"prestorm\"'.format(site_id))\n",
    "    prestorm_dune_crest_x = prestorm_site_features.dune_crest_x\n",
    "    prestorm_dune_crest_z = prestorm_site_features.dune_crest_z\n",
    "    prestorm_dune_toe_x = prestorm_site_features.dune_toe_x\n",
    "    prestorm_dune_toe_z = prestorm_site_features.dune_toe_z\n",
    "\n",
    "    \n",
    "    poststorm_site_features = df_profile_features.query(\n",
    "        'site_id == \"{}\" and profile_type==\"poststorm\"'.format(site_id))\n",
    "    poststorm_dune_crest_x = poststorm_site_features.dune_crest_x\n",
    "    poststorm_dune_crest_z = poststorm_site_features.dune_crest_z\n",
    "    poststorm_dune_toe_x = poststorm_site_features.dune_toe_x\n",
    "    poststorm_dune_toe_z = poststorm_site_features.dune_toe_z\n",
    "    \n",
    "    # Update beach profile section plots\n",
    "    with g_profiles.batch_update():\n",
    "        g_profiles.data[0].x = prestorm_x\n",
    "        g_profiles.data[0].y = prestorm_z\n",
    "        g_profiles.data[1].x = poststorm_x\n",
    "        g_profiles.data[1].y = poststorm_z\n",
    "        g_profiles.data[2].x = prestorm_dune_crest_x\n",
    "        g_profiles.data[2].y = prestorm_dune_crest_z\n",
    "        g_profiles.data[3].x = prestorm_dune_toe_x\n",
    "        g_profiles.data[3].y = prestorm_dune_toe_z\n",
    "        g_profiles.data[4].x = poststorm_dune_crest_x\n",
    "        g_profiles.data[4].y = poststorm_dune_crest_z\n",
    "        g_profiles.data[5].x = poststorm_dune_toe_x\n",
    "        g_profiles.data[5].y = poststorm_dune_toe_z\n",
    "    \n",
    "        for n, forecast in enumerate(impacts['forecasted']):\n",
    "            R_high = max(impacts['forecasted'][forecast].query(\n",
    "                \"site_id=='{}'\".format(site_id)).R_high)\n",
    "            g_profiles.data[6 + n].x = [200, 400]\n",
    "            g_profiles.data[6 + n].y = [R_high, R_high]\n",
    "\n",
    "    # Relocate plan of satellite imagery\n",
    "    site_coords = df_sites.query('site_id == \"{}\"'.format(site_id))\n",
    "    with g_map.batch_update():\n",
    "        g_map.layout.mapbox['center'] = {\n",
    "            'lat': site_coords['lat'].values[0],\n",
    "            'lon': site_coords['lon'].values[0]\n",
    "        }\n",
    "        g_map.layout.mapbox['zoom'] = 15\n",
    "        g_map.data[1].lat = [site_coords['lat'].values[0]]\n",
    "        g_map.data[1].lon = [site_coords['lon'].values[0]]\n",
    "        g_map.data[1].text = site_coords['lon'].index.get_level_values(\n",
    "            'site_id').tolist()\n",
    "\n",
    "    # Update time series plots\n",
    "    df_waves_site = df_waves.query(\"site_id=='{}'\".format(site_id))\n",
    "    times = df_waves_site.index.get_level_values('datetime').tolist()\n",
    "    Hs0s = df_waves_site.Hs0.tolist()\n",
    "    Tps = df_waves_site.Tp.tolist()\n",
    "\n",
    "    df_tide_site = df_tides.query(\"site_id=='{}'\".format(site_id))\n",
    "    mask = (df_tide_site.index.get_level_values('datetime') >= min(times)) & (\n",
    "        df_tide_site.index.get_level_values('datetime') <= max(times))\n",
    "    df_tide_site = df_tide_site.loc[mask]\n",
    "\n",
    "    with g_timeseries.batch_update():\n",
    "        g_timeseries.data[0].x = times\n",
    "        g_timeseries.data[0].y = Hs0s\n",
    "        g_timeseries.data[1].x = times\n",
    "        g_timeseries.data[1].y = Tps\n",
    "\n",
    "        # Update beta values\n",
    "        idx_betas = [\n",
    "            n for n, x in enumerate(g_timeseries.data) if 'Beta' in x.name\n",
    "        ]\n",
    "        for i, forecast in zip(idx_betas, twls['forecasted']):\n",
    "            df_twl = twls['forecasted'][forecast].query(\n",
    "                \"site_id=='{}'\".format(site_id))\n",
    "            times = df_twl.index.get_level_values('datetime').tolist()\n",
    "            beta = df_twl.beta.tolist()\n",
    "            g_timeseries.data[i].x = times\n",
    "            g_timeseries.data[i].y = beta\n",
    "\n",
    "        g_timeseries.data[2].x = [min(times), max(times)]\n",
    "        g_timeseries.data[3].x = [min(times), max(times)]\n",
    "        g_timeseries.data[4].x = df_tide_site.index.get_level_values(\n",
    "            'datetime')\n",
    "        g_timeseries.data[2].y = prestorm_dune_crest_z.tolist()[\n",
    "            0], prestorm_dune_crest_z.tolist()[0],\n",
    "        g_timeseries.data[3].y = prestorm_dune_toe_z.tolist()[0], prestorm_dune_toe_z.tolist()[\n",
    "            0],\n",
    "        g_timeseries.data[4].y = df_tide_site.tide.tolist()\n",
    "\n",
    "        # Update rhigh values\n",
    "        idx_betas = [\n",
    "            n for n, x in enumerate(g_timeseries.data) if 'R_high' in x.name\n",
    "        ]\n",
    "        for i, forecast in zip(idx_betas, twls['forecasted']):\n",
    "            df_twl = twls['forecasted'][forecast].query(\n",
    "                \"site_id=='{}'\".format(site_id))\n",
    "            times = df_twl.index.get_level_values('datetime').tolist()\n",
    "            R_high = df_twl.R_high.tolist()\n",
    "            g_timeseries.data[i].x = times\n",
    "            g_timeseries.data[i].y = R_high\n",
    "\n",
    "    # Update site id impacts\n",
    "    observed_regime = impacts['observed'].query(\n",
    "        \"site_id=='{}'\".format(site_id)).storm_regime.values[0]\n",
    "    site_id_impacts.value = \"Observed: <b>{}</b><br>\".format(\n",
    "        observed_regime)\n",
    "\n",
    "    for forecast in impacts['forecasted']:\n",
    "        regime = impacts['forecasted'][forecast].query(\n",
    "            \"site_id=='{}'\".format(site_id)).storm_regime.values[0]\n",
    "        site_id_impacts.value += '{}: <b>{}</b><br>'.format(\n",
    "            forecast, regime)\n",
    "\n",
    "\n",
    "site_id_select.observe(update_profile, names=\"value\")\n",
    "\n",
    "\n",
    "def update_filter(change):\n",
    "\n",
    "    # Iterate through each box, only keeping site_ids which are not filtered out by each box\n",
    "    valid_site_ids = impacts['observed'].index.tolist()\n",
    "    dfs = [impacts['observed']\n",
    "           ] + [impacts['forecasted'][key] for key in impacts['forecasted']]\n",
    "\n",
    "    for box, df in zip(selectboxes, dfs):\n",
    "        valid_site_ids = list(\n",
    "            set(valid_site_ids).intersection(\n",
    "                set(df[df.storm_regime.isin(box.value)].index.tolist())))\n",
    "    site_id_select.options = sorted(valid_site_ids)\n",
    "\n",
    "    # TODO Update options in selectboxes with number of observations?\n",
    "\n",
    "\n",
    "# Update the filter if any of the boxes changes\n",
    "for box in selectboxes:\n",
    "    box.observe(update_filter, names=\"value\")\n",
    "\n",
    "# Display our widgets!\n",
    "widgets.VBox([\n",
    "    filter_container, site_id_container,\n",
    "    widgets.HBox([g_profiles, g_map]), g_timeseries, tables_container\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-27T23:06:31.686277Z",
     "start_time": "2018-11-27T23:06:31.665206Z"
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-22T22:52:36.039701Z",
     "start_time": "2018-11-22T22:52:36.035189Z"
    },
    "hide_input": true,
    "scrolled": true
   },
   "source": [
    "This visualization looks at how well the storm impact predictions performed. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-03T23:02:42.021445Z",
     "start_time": "2018-12-03T23:02:41.468637Z"
    },
    "code_folding": [],
    "hide_input": false,
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a2b52c5ad861454db1756a427f13b55d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "VBox(children=(VBox(children=(HTML(value='<b>Filter by beach:</b>'), SelectMultiple(index=(0, 1, 2, 3, 4, 5, 6…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create colorscale\n",
    "rdylgr_cmap = matplotlib.cm.get_cmap('RdYlGn')\n",
    "\n",
    "norm = matplotlib.colors.Normalize(vmin=0, vmax=255)\n",
    "\n",
    "def matplotlib_to_plotly(cmap, pl_entries):\n",
    "    h = 1.0/(pl_entries-1)\n",
    "    pl_colorscale = []\n",
    "\n",
    "    for k in range(pl_entries):\n",
    "        C = list(map(np.uint8, np.array(cmap(k*h)[:3])*255))\n",
    "        pl_colorscale.append([k*h, 'rgb'+str((C[0], C[1], C[2]))])\n",
    "\n",
    "    return pl_colorscale\n",
    "\n",
    "rdylgr = matplotlib_to_plotly(rdylgr_cmap, 255)\n",
    "\n",
    "\n",
    "\n",
    "# Create widget for list of beaches.\n",
    "beaches = df_sites.beach.unique().tolist()\n",
    "\n",
    "beach_title = widgets.HTML(value=\"<b>Filter by beach:</b>\", )\n",
    "\n",
    "beach_select = widgets.SelectMultiple(\n",
    "    options=beaches, value=beaches, disabled=False)\n",
    "\n",
    "beach_container = widgets.VBox([beach_title, beach_select])\n",
    "\n",
    "# Create confusion matrix for each forecasted impact data set\n",
    "heatmaps = []\n",
    "for forecast in impacts['forecasted']:\n",
    "\n",
    "    z = [[.1, .3, .5, 2], [1.0, .8, .6, 1], [1.4, .28, 1.6, .21],\n",
    "         [.6, .4, .2, 3]]\n",
    "\n",
    "    x = ['swash', 'collision', 'overwash', 'inundation']\n",
    "    y = list(reversed(x))\n",
    "\n",
    "    z_text = z\n",
    "\n",
    "    fig = ff.create_annotated_heatmap(z, x=x, y=y, annotation_text=z_text, colorscale=rdylgr)\n",
    "    heatmap = go.FigureWidget(data=fig.data, layout=fig.layout)\n",
    "\n",
    "    heatmap.layout.update(\n",
    "        height=300, margin=go.layout.Margin(l=100, r=100, b=40, t=40, pad=0))\n",
    "    heatmap.layout.xaxis.update(title='Predicted')\n",
    "    heatmap.layout.yaxis.update(title='Observed')\n",
    "    heatmap_title = widgets.HTML(value=\"<b>{}</b>\".format(forecast) )\n",
    "    heatmaps.append(widgets.VBox([heatmap_title, heatmap]))\n",
    "\n",
    "    \n",
    "def update_heatmaps(change):\n",
    "    \n",
    "    for forecast, heatmap in zip(impacts['forecasted'], heatmaps):\n",
    "        selected_site_ids = df_sites[df_sites.beach.isin(beach_select.value)].index.tolist()\n",
    "\n",
    "        df_ob = impacts['observed']\n",
    "        df_fo = impacts['forecasted'][forecast]\n",
    "\n",
    "        observed_regimes = df_ob[df_ob.index.isin(selected_site_ids)].storm_regime.dropna().rename(\"observed_regime\")\n",
    "        forecasted_regimes = df_fo[df_fo.index.isin(selected_site_ids)].storm_regime.dropna().rename(\"forecasted_regime\")\n",
    "\n",
    "        if any([observed_regimes.empty, forecasted_regimes.empty]):\n",
    "            return\n",
    "        \n",
    "        df_compare = pd.concat([observed_regimes, forecasted_regimes], axis='columns', names=['a','b'], sort=True)\n",
    "        df_compare.dropna(axis='index',inplace=True)\n",
    "\n",
    "        z = confusion_matrix(df_compare.observed_regime.tolist(), df_compare.forecasted_regime.tolist(), labels = ['swash','collision','overwash','inundation'])\n",
    "        z = np.flip(z,axis=0)\n",
    "        z_list = list(reversed(z.tolist()))\n",
    "        \n",
    "        # Make incorrect values negative, so they get assigned a different color.\n",
    "        # Better for visualization\n",
    "        z_neg_incorrect = np.flip(np.identity(4),axis=0)\n",
    "        z_neg_incorrect[z_neg_incorrect==0]= -1\n",
    "        z_neg_incorrect = (z * z_neg_incorrect).tolist()\n",
    "        \n",
    "        # Also want to display percentages\n",
    "        z_with_pct = []\n",
    "        for row in z:\n",
    "            new_row = []\n",
    "            for val in row:\n",
    "                new_row.append('{}<br>({}%)'.format(val, np.around(val/np.sum(z)*100,1)))\n",
    "            z_with_pct.append(new_row)\n",
    "        \n",
    "        fig = ff.create_annotated_heatmap(z_neg_incorrect, x=x, y=y, annotation_text=z_with_pct)\n",
    "        heatmap.children[1].data[0].z = z_neg_incorrect\n",
    "        heatmap.children[1].layout.annotations = fig.layout.annotations\n",
    "\n",
    "# Hook changes to beach filter to update confusion heatmaps\n",
    "beach_select.observe(update_heatmaps, names=\"value\")\n",
    "\n",
    "# Display our widgets\n",
    "widgets.VBox([beach_container, widgets.VBox(heatmaps)])\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-03T23:03:48.457193Z",
     "start_time": "2018-12-03T23:03:48.274709Z"
    }
   },
   "outputs": [],
   "source": [
    "# To output to file\n",
    "# fig = heatmaps[1].children[1]\n",
    "# img_bytes = pio.write_image(fig, 'fig1.png',format='png', width=600, height=400, scale=5)\n",
    "\n",
    "# fig = g_profiles\n",
    "# img_bytes = pio.write_image(fig, 'fig1.png',format='png', width=600, height=200, scale=5)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-03T23:02:47.179180Z",
     "start_time": "2018-12-03T23:02:46.367273Z"
    }
   },
   "source": [
    "### Look at time dependance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-03T23:49:16.581105Z",
     "start_time": "2018-12-03T23:49:16.274275Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8a366d02e4564347a5950b0f24c86363",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "VBox(children=(HBox(children=(VBox(children=(HTML(value='<b>Filter by site_id:</b>'), HBox(children=(Dropdown(…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create widgets for selecting site_id\n",
    "site_id_title = widgets.HTML(value=\"<b>Filter by site_id:</b>\", )\n",
    "\n",
    "site_id_select = widgets.Dropdown(\n",
    "    description='site_id:   ',\n",
    "    value='NARRA0001',\n",
    "    options=df_profiles.index.get_level_values('site_id').unique()\n",
    "    .sort_values().tolist())\n",
    "\n",
    "site_id_impacts = widgets.HTML(value=\"\", )\n",
    "\n",
    "site_id_container = widgets.HBox(children=[\n",
    "    widgets.VBox(\n",
    "        children=[site_id_title,\n",
    "                  widgets.HBox(children=[site_id_select])]), site_id_impacts\n",
    "])\n",
    "\n",
    "\n",
    "# Plot profiles\n",
    "\n",
    "# Add panel for pre/post storm profiles\n",
    "trace1 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Pre Storm Profile',\n",
    "    line=dict(color=('rgb(51,160,44)'), width=2))\n",
    "trace2 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Post Storm Profile',\n",
    "    line=dict(color=('rgb(255,127,0)'), width=2))\n",
    "trace3 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Pre-storm dune crest',\n",
    "    mode='markers',\n",
    "    marker=dict(\n",
    "        color='rgba(255,255,255,0)',\n",
    "        size=10,\n",
    "        line=dict(color='rgba(106,61,154, 1)', width=2)),\n",
    ")\n",
    "trace4 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Pre-storm dune toe',\n",
    "    mode='markers',\n",
    "    marker=dict(\n",
    "        color='rgba(255,255,255,0)',\n",
    "        size=10,\n",
    "        line=dict(color='rgba(202,178,214,1)', width=2)),\n",
    ")\n",
    "\n",
    "trace5 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Post-storm dune crest',\n",
    "    mode='markers',\n",
    "    marker=dict(\n",
    "        color='rgba(255,255,255,0)',\n",
    "        size=10,\n",
    "        line=dict(color='rgba(106,61,154, 1)', width=2),\n",
    "        symbol='square'),\n",
    ")\n",
    "trace6 = go.Scatter(\n",
    "    x=[0],\n",
    "    y=[0],\n",
    "    name='Post-storm dune toe',\n",
    "    mode='markers',\n",
    "    marker=dict(\n",
    "        color='rgba(255,255,255,0)',\n",
    "        size=10,\n",
    "        line=dict(color='rgba(202,178,214,1)', width=2),\n",
    "        symbol='square'),\n",
    ")\n",
    "\n",
    "\n",
    "forecast_traces = []\n",
    "for forecast, color in zip(impacts['forecasted'], colors):\n",
    "    forecast_traces.append(\n",
    "        go.Scatter(\n",
    "            x=[0],\n",
    "            y=[0],\n",
    "            name='Peak R_high: {}'.format(forecast),\n",
    "            mode='lines',\n",
    "            line=dict(\n",
    "                color=color,\n",
    "                width=4,\n",
    "            )))\n",
    "\n",
    "layout = go.Layout(\n",
    "    title='Bed Profiles',\n",
    "    height=300,\n",
    "    legend=dict(font={'size': 10}),\n",
    "    margin=dict(t=50, b=50, l=50, r=20),\n",
    "    xaxis=dict(\n",
    "        title='x (m)',\n",
    "        autorange=True,\n",
    "        showgrid=True,\n",
    "        zeroline=True,\n",
    "        showline=True,\n",
    "        range=[0, 200]),\n",
    "    yaxis=dict(\n",
    "        title='z (m)',\n",
    "        autorange=False,\n",
    "        showgrid=True,\n",
    "        zeroline=True,\n",
    "        showline=True,\n",
    "        range=[-1, 20]))\n",
    "\n",
    "g_profiles = go.FigureWidget(\n",
    "    data=[trace1, trace2, trace3, trace4, trace5, trace6] + forecast_traces, layout=layout)\n",
    "\n",
    "widgets.VBox([\n",
    "   site_id_container,\n",
    "    g_profiles\n",
    "])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {
    "height": "47px",
    "width": "262px"
   },
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}