{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Data exploration\n", "This notebook provides an example how the data has been loaded and accessed for further analysis." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2018-11-22T22:48:17.973982Z", "start_time": "2018-11-22T22:48:17.825797Z" } }, "outputs": [], "source": [ "# Enable autoreloading of our modules. \n", "# Most of the code will be located in the /src/ folder, \n", "# and then called from the notebook.\n", "\n", "%reload_ext autoreload\n", "%autoreload" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2018-11-22T22:48:17.826Z" }, "scrolled": true }, "outputs": [], "source": [ "from IPython.core.debugger import set_trace\n", "\n", "import pandas as pd\n", "import numpy as np\n", "import os\n", "\n", "import plotly\n", "import plotly.graph_objs as go\n", "import plotly.plotly as py\n", "import plotly.tools as tls\n", "\n", "import colorlover as cl\n", "\n", "from ipywidgets import widgets, Output\n", "from IPython.display import display, clear_output, Image, HTML" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2018-11-22T22:48:17.829Z" }, "pixiedust": { "displayParams": {} } }, "outputs": [], "source": [ "def df_from_csv(csv, index_col, data_folder='../data/interim'):\n", " return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)\n", "\n", "df_waves = df_from_csv('waves.csv', index_col=[0, 1])\n", "df_tides = df_from_csv('tides.csv', index_col=[0, 1])\n", "df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])\n", "df_sites = df_from_csv('sites.csv', index_col=[0])\n", "df_profile_features = df_from_csv('profile_features.csv', index_col=[0])\n", "\n", "# Note that the forecasted data sets should be in the same order for impacts and twls\n", "impacts = {\n", " 'forecasted': {\n", " 'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),\n", " 'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),\n", " },\n", " 'observed': df_from_csv('impacts_observed.csv', index_col=[0])\n", " }\n", "\n", "\n", "twls = {\n", " 'forecasted': {\n", " 'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),\n", " 'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2018-11-22T22:48:17.832Z" } }, "outputs": [], "source": [ "tables = [Output() for x in range(len(impacts['forecasted']) + 1)]\n", "tables" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2018-11-22T22:48:17.835Z" }, "code_folding": [ 408 ], "scrolled": false }, "outputs": [], "source": [ "# Create widgets for filtering by observed and forecasted impacts\n", "filter_title = widgets.HTML(\n", " value=\"Filter by observed and predicted impacts:\",\n", ")\n", "\n", "titles = ['Observed Impacts']\n", "selectboxes = [widgets.SelectMultiple(\n", " options=impacts['observed'].storm_regime.dropna().unique().tolist(),\n", " value=impacts['observed'].storm_regime.dropna().unique().tolist(),\n", " disabled=False)]\n", "\n", "# Iterate through each of our forecasted impacts\n", "for forecast in impacts['forecasted']:\n", " selectboxes.append(\n", " widgets.SelectMultiple(\n", " options=impacts['forecasted'][forecast].storm_regime.dropna(\n", " ).unique().tolist(),\n", " value=impacts['forecasted'][forecast].storm_regime.dropna(\n", " ).unique().tolist(),\n", " disabled=False))\n", " titles.append('Forecasted: {}'.format(forecast))\n", "\n", "titles = [widgets.HTML(value=title) for title in titles]\n", "\n", "children = widgets.HBox(children=[widgets.VBox(\n", " children=[title, box]) for title, box in zip(titles, selectboxes)])\n", "filter_container = widgets.VBox(children=[filter_title, children])\n", "\n", "\n", "# Create widgets for selecting site_id\n", "site_id_title = widgets.HTML(\n", " value=\"Filter by site_id:\",\n", ")\n", "\n", "site_id_select = widgets.Dropdown(\n", " description='site_id: ',\n", " value='NARRA0001',\n", " options=df_profiles.index.get_level_values(\n", " 'site_id').unique().sort_values().tolist()\n", ")\n", "\n", "site_id_impacts = widgets.HTML(\n", " value=\"\",\n", ")\n", "\n", "site_id_container = widgets.HBox(children=[\n", " widgets.VBox(children=[site_id_title, widgets.HBox(children=[site_id_select])]),\n", " site_id_impacts\n", "])\n", "\n", "\n", "\n", "\n", "# Build colors for each of our forecasts\n", "colors = list(reversed(cl.scales[str(max(len(impacts['forecasted']), 3))]['seq']['YlGnBu']))\n", "\n", "# Add panel for pre/post storm profiles\n", "trace1 = go.Scatter(\n", " x=[0],\n", " y=[0],\n", " name='Pre Storm Profile',\n", " line=dict(\n", " color=('rgb(51,160,44)'),\n", " width=2)\n", ")\n", "trace2 = go.Scatter(\n", " x=[0],\n", " y=[0],\n", " name='Post Storm Profile',\n", " line=dict(\n", " color=('rgb(255,127,0)'),\n", " width=2)\n", ")\n", "trace3 = go.Scatter(\n", " x=[0],\n", " y=[0],\n", " name='Pre-storm dune crest',\n", " mode='markers',\n", " marker=dict(\n", " color='rgba(255,255,255,0)',\n", " size=10,\n", " line=dict(\n", " color='rgba(106,61,154, 1)',\n", " width=2\n", " )\n", " ),\n", ")\n", "trace4 = go.Scatter(\n", " x=[0],\n", " y=[0],\n", " name='Pre-storm dune toe',\n", " mode='markers',\n", " marker=dict(\n", " color='rgba(255,255,255,0)',\n", " size=10,\n", " line=dict(\n", " color='rgba(202,178,214,1)',\n", " width=2\n", " )\n", " ),\n", ")\n", "\n", "forecast_traces = []\n", "for forecast, color in zip(impacts['forecasted'], colors):\n", " forecast_traces.append(go.Scatter(\n", " x=[0],\n", " y=[0],\n", " name='Peak R_high: {}'.format(forecast),\n", " mode='lines',\n", " line=dict(\n", " color=color,\n", " width=4,)\n", "\n", " ))\n", "\n", "layout = go.Layout(\n", " title='Bed Profiles',\n", " height=300,\n", " legend=dict(font={'size': 10}),\n", " margin=dict(t=50, b=50, l=50, r=20),\n", " xaxis=dict(\n", " title='x (m)',\n", " autorange=True,\n", " showgrid=True,\n", " zeroline=True,\n", " showline=True,\n", " range=[0, 200]\n", " ),\n", " yaxis=dict(\n", " title='z (m)',\n", " autorange=False,\n", " showgrid=True,\n", " zeroline=True,\n", " showline=True,\n", " range=[-1, 20]\n", " )\n", ")\n", "\n", "g_profiles = go.FigureWidget(data=[trace1, trace2, trace3, trace4]+forecast_traces,\n", " layout=layout)\n", "\n", "\n", "# Add panel for google maps\n", "mapbox_access_token = 'pk.eyJ1IjoiY2hyaXNsZWFtYW4iLCJhIjoiY2pvNTY1MzZpMDc2OTN2bmw5MGsycHp5bCJ9.U2dwFg2c7RFjUNSayERUiw'\n", "\n", "data = [\n", " go.Scattermapbox(\n", " lat=df_sites['lat'],\n", " lon=df_sites['lon'],\n", " mode='markers',\n", " marker=dict(\n", " size=10\n", " ),\n", " text=df_sites.index.get_level_values('site_id'),\n", " ),\n", " go.Scattermapbox(\n", " lat=[0],\n", " lon=[0],\n", " mode='markers',\n", " marker=dict(\n", " size=20,\n", " color='rgb(255, 0, 0)',\n", " opacity=0.5,\n", " ),\n", " text=df_sites.index.get_level_values('site_id'),\n", " ),\n", "]\n", "\n", "layout = go.Layout(\n", " autosize=True,\n", " height=300,\n", " hovermode='closest',\n", " showlegend=False,\n", " margin=dict(t=50, b=50, l=20, r=20),\n", " mapbox=dict(\n", " accesstoken=mapbox_access_token,\n", " bearing=0,\n", " center=dict(\n", " lat=-33.7,\n", " lon=151.3\n", " ),\n", " pitch=0,\n", " zoom=12,\n", " style='satellite-streets'\n", " ),\n", ")\n", "\n", "fig = dict(data=data, layout=layout)\n", "g_map = go.FigureWidget(data=data, layout=layout)\n", "\n", "\n", "subplot = tls.make_subplots(3, 1, print_grid=False, shared_xaxes=True)\n", "g_timeseries = go.FigureWidget(subplot)\n", "\n", "# Add trace for Hs0\n", "g_timeseries.add_trace(\n", " go.Scatter(\n", " x=[0, 1],\n", " y=[0, 1],\n", " name='Hs0',\n", " ),\n", " row=3,\n", " col=1,\n", ")\n", "\n", "# Add trace for Tp\n", "g_timeseries.add_trace(\n", " go.Scatter(\n", " x=[0, 1],\n", " y=[0, 1],\n", " name='Tp',\n", " ),\n", " row=3,\n", " col=1,\n", ")\n", "\n", "# Add water levels\n", "g_timeseries.add_trace(\n", " go.Scatter(\n", " x=[0, 3],\n", " y=[0, 3],\n", " name='Dune Crest',\n", " mode='lines',\n", " line=dict(\n", " color=('rgb(214, 117, 14)'),\n", " width=2,\n", " dash='dot')\n", " ), row=1, col=1)\n", "\n", "\n", "g_timeseries.add_trace(\n", " go.Scatter(\n", " x=[0, 3],\n", " y=[0, 3],\n", " name='Dune Toe',\n", " mode='lines',\n", " line=dict(\n", " color=('rgb(142, 77, 8)'),\n", " width=2,\n", " dash='dash')\n", " ), row=1, col=1)\n", "\n", "g_timeseries.add_trace(\n", " go.Scatter(\n", " x=[0, 3],\n", " y=[0, 3],\n", " name='Tide+Surge WL',\n", " line=dict(\n", " color=('rgb(8,51,137)'),\n", " width=2,\n", " dash='dot')\n", " ), row=1, col=1)\n", "\n", "for forecast, color in zip(twls['forecasted'], colors):\n", " g_timeseries.add_trace(\n", " go.Scatter(\n", " x=[0],\n", " y=[0],\n", " name='R_high: {}'.format(forecast),\n", " line=dict(\n", " color=color,\n", " width=3)), row=1, col=1)\n", "\n", "\n", "# Add trace for each forecasted beta term\n", "for forecast, color in zip(impacts['forecasted'],colors):\n", " g_timeseries.add_trace(\n", " go.Scatter(\n", " x=[0, 1],\n", " y=[0, 1],\n", " name='Beta: {}'.format(forecast),\n", " line=dict(\n", " color=color,\n", " width=3)),\n", " row=2,\n", " col=1,\n", " )\n", "\n", "\n", "# Create axis for Tp on same plot as Hs\n", "g_timeseries['layout']['yaxis4'] = {'overlaying': 'y3', 'side': 'right'}\n", "g_timeseries.data[1]['yaxis'] = 'y4'\n", "\n", "# Add labels to each axis\n", "g_timeseries.layout['xaxis']['title'] = 'datetime'\n", "g_timeseries.layout['yaxis1']['title'] = 'z (mAHD)'\n", "g_timeseries.layout['yaxis2']['title'] = 'beta (-)'\n", "g_timeseries.layout['yaxis3']['title'] = 'Hs0 (m)'\n", "g_timeseries.layout['yaxis4']['title'] = 'Tp (s)'\n", "\n", "# Update figure size\n", "g_timeseries['layout'].update(height=400, legend=dict(font={'size': 10}))\n", "g_timeseries['layout'].update(margin=dict(t=20, l=50, r=20, b=100))\n", "\n", "\n", "\n", "# Add panel for some tables\n", "titles = ['observed'] + [forecast for forecast in impacts['forecasted']]\n", "titles = [widgets.HTML(value=\"{}\".format(title)) for title in titles]\n", "\n", "def get_observed_impacts_table(site_id):\n", " display(impacts['observed'].query(\"site_id=='{}'\".format(site_id)).T)\n", "\n", "\n", "def get_forecasted_impacts_table(site_id,forecast):\n", " display(impacts['forecasted'][forecast].query(\"site_id=='{}'\".format(site_id)).T)\n", "\n", " \n", "impacts_table_observed = widgets.interactive_output(get_observed_impacts_table, {'site_id': site_id_select})\n", "forecasted_impacts_tables = []\n", "for forecast, title in zip(impacts['forecasted'], titles[1:]):\n", " forecasted_impacts_tables.append(widgets.interactive_output(get_forecasted_impacts_table, {'site_id': site_id_select, 'forecast':title}))\n", "\n", "tables = [impacts_table_observed] + forecasted_impacts_tables\n", "\n", "title_tables=[widgets.VBox(children=[title,table]) for title,table in zip(titles,tables)]\n", " \n", "tables_container= widgets.HBox(children=[*title_tables])\n", "\n", "\n", "\n", "\n", "\n", "def update_profile(change):\n", "\n", " site_id = site_id_select.value\n", "\n", " if site_id is None:\n", " return\n", "\n", " site_profile = df_profiles.query('site_id == \"{}\"'.format(site_id))\n", " prestorm_profile = site_profile.query('profile_type == \"prestorm\"')\n", " poststorm_profile = site_profile.query('profile_type == \"poststorm\"')\n", "\n", " poststorm_x = poststorm_profile.index.get_level_values('x').tolist()\n", " poststorm_z = poststorm_profile.z.tolist()\n", "\n", " prestorm_x = prestorm_profile.index.get_level_values('x').tolist()\n", " prestorm_z = prestorm_profile.z.tolist()\n", "\n", " site_features = df_profile_features.query(\n", " 'site_id == \"{}\"'.format(site_id))\n", " dune_crest_x = site_features.dune_crest_x\n", " dune_crest_z = site_features.dune_crest_z\n", " dune_toe_x = site_features.dune_toe_x\n", " dune_toe_z = site_features.dune_toe_z\n", "\n", " # Update beach profile section plots\n", " with g_profiles.batch_update():\n", " g_profiles.data[0].x = prestorm_x\n", " g_profiles.data[0].y = prestorm_z\n", " g_profiles.data[1].x = poststorm_x\n", " g_profiles.data[1].y = poststorm_z\n", " g_profiles.data[2].x = dune_crest_x\n", " g_profiles.data[2].y = dune_crest_z\n", " g_profiles.data[3].x = dune_toe_x\n", " g_profiles.data[3].y = dune_toe_z\n", "\n", " for n, forecast in enumerate(impacts['forecasted']):\n", " R_high = max(impacts['forecasted'][forecast].query(\n", " \"site_id=='{}'\".format(site_id)).R_high)\n", " g_profiles.data[4+n].x = [200, 400]\n", " g_profiles.data[4+n].y = [R_high, R_high]\n", "\n", " # Relocate plan of satellite imagery\n", " site_coords = df_sites.query('site_id == \"{}\"'.format(site_id))\n", " with g_map.batch_update():\n", " g_map.layout.mapbox['center'] = {\n", " 'lat': site_coords['lat'].values[0],\n", " 'lon': site_coords['lon'].values[0]\n", " }\n", " g_map.layout.mapbox['zoom'] = 15\n", " g_map.data[1].lat = [site_coords['lat'].values[0]]\n", " g_map.data[1].lon = [site_coords['lon'].values[0]]\n", " g_map.data[1].text = site_coords['lon'].index.get_level_values(\n", " 'site_id').tolist()\n", "\n", " # Update time series plots\n", " df_waves_site = df_waves.query(\"site_id=='{}'\".format(site_id))\n", " times = df_waves_site.index.get_level_values('datetime').tolist()\n", " Hs0s = df_waves_site.Hs0.tolist()\n", " Tps = df_waves_site.Tp.tolist()\n", " with g_timeseries.batch_update():\n", " g_timeseries.data[0].x = times\n", " g_timeseries.data[0].y = Hs0s\n", " g_timeseries.data[1].x = times\n", " g_timeseries.data[1].y = Tps\n", "\n", " # Update beta values\n", " idx_betas = [n for n, x in enumerate(\n", " g_timeseries.data) if 'Beta' in x.name]\n", " for i, forecast in zip(idx_betas, twls['forecasted']):\n", " df_twl = twls['forecasted'][forecast].query(\n", " \"site_id=='{}'\".format(site_id))\n", " times = df_twl.index.get_level_values('datetime').tolist()\n", " beta = df_twl.beta.tolist()\n", " g_timeseries.data[i].x = times\n", " g_timeseries.data[i].y = beta\n", "\n", " # Update water levels plot\n", " df_tide_site = df_tides.query(\"site_id=='{}'\".format(site_id))\n", " mask = (df_tide_site.index.get_level_values('datetime') >= min(times)) & (\n", " df_tide_site.index.get_level_values('datetime') <= max(times))\n", " df_tide_site = df_tide_site.loc[mask]\n", "\n", " with g_timeseries.batch_update():\n", " g_timeseries.data[2].x = [min(times), max(times)]\n", " g_timeseries.data[3].x = [min(times), max(times)]\n", " g_timeseries.data[4].x = df_tide_site.index.get_level_values(\n", " 'datetime')\n", " g_timeseries.data[2].y = dune_crest_z.tolist()[0], dune_crest_z.tolist()[\n", " 0],\n", " g_timeseries.data[3].y = dune_toe_z.tolist()[0], dune_toe_z.tolist()[\n", " 0],\n", " g_timeseries.data[4].y = df_tide_site.tide.tolist()\n", "\n", " # Update rhigh values\n", " idx_betas = [n for n, x in enumerate(\n", " g_timeseries.data) if 'R_high' in x.name]\n", " for i, forecast in zip(idx_betas, twls['forecasted']):\n", " df_twl = twls['forecasted'][forecast].query(\n", " \"site_id=='{}'\".format(site_id))\n", " times = df_twl.index.get_level_values('datetime').tolist()\n", " R_high = df_twl.R_high.tolist()\n", " g_timeseries.data[i].x = times\n", " g_timeseries.data[i].y = R_high\n", "\n", " # Update site id impacts\n", " observed_regime = impacts['observed'].query(\"site_id=='{}'\".format(site_id)).storm_regime.values[0]\n", " site_id_impacts.value = \"Observed: {}
\".format(observed_regime)\n", " \n", " for forecast in impacts['forecasted']:\n", " regime = impacts['forecasted'][forecast].query(\"site_id=='{}'\".format(site_id)).storm_regime.values[0]\n", " site_id_impacts.value += '{}: {}
'.format(forecast, regime)\n", " \n", " # Update our tables\n", " \n", "site_id_select.observe(update_profile, names=\"value\")\n", "\n", "\n", "def update_filter(change):\n", "\n", " # Iterate through each box, only keeping site_ids which are not filtered out by each box\n", " valid_site_ids = impacts['observed'].index.tolist()\n", " dfs = [impacts['observed']] + [impacts['forecasted'][key]\n", " for key in impacts['forecasted']]\n", "\n", " for box, df in zip(selectboxes, dfs):\n", " valid_site_ids = list(set(valid_site_ids).intersection(\n", " set(df[df.storm_regime.isin(box.value)].index.tolist())))\n", " site_id_select.options = sorted(valid_site_ids)\n", "\n", " # TODO Update options in selectboxes with number of observations?\n", "\n", "\n", "# Update the filter if any of the boxes changes\n", "for box in selectboxes:\n", " box.observe(update_filter, names=\"value\")\n", "\n", " \n", "\n", "\n", "# Display our widgets!\n", "widgets.VBox([filter_container, site_id_container,\n", " widgets.HBox([g_profiles, g_map]), g_timeseries, tables_container])\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2018-11-22T22:48:17.837Z" }, "scrolled": true }, "outputs": [], "source": [ "titles[0].observe" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2018-11-22T22:48:17.840Z" } }, "outputs": [], "source": [ "# impacts['observed'].query(\"site_id=='{}'\".format(\"NARRA0018\")).T\n", "impacts['forecasted']['foreshore_slope_sto06'].query(\"site_id=='{}'\".format(\"NARRA0018\")).T\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" }, "toc": { "base_numbering": 1, "nav_menu": { "height": "47px", "width": "262px" }, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }