# Data exploration
This notebook provides an example how the data has been loaded and accessed for further analysis.

In [1]:
# Enable autoreloading of our modules. 
# Most of the code will be located in the /src/ folder, 
# and then called from the notebook.

%reload_ext autoreload
%autoreload

In [2]:
from IPython.core.debugger import set_trace

import pandas as pd
import numpy as np
import os

import plotly
import plotly.graph_objs as go
import plotly.plotly as py

from ipywidgets import widgets
from IPython.display import display, clear_output, Image

In [9]:
data_folder = '../data/interim'
df_waves = pd.read_csv(os.path.join(data_folder, 'waves.csv'), index_col=[0,1])
df_tides = pd.read_csv(os.path.join(data_folder, 'tides.csv'), index_col=[0,1])
df_profiles = pd.read_csv(os.path.join(data_folder, 'profiles.csv'), index_col=[0,1,2])
df_sites = pd.read_csv(os.path.join(data_folder, 'sites.csv'),index_col=[0])
df_profile_features = pd.read_csv(os.path.join(data_folder, 'profile_features.csv'),index_col=[0])
df_impacts_compared = pd.read_csv(os.path.join(data_folder,'impacts_observed_vs_forecasted_mean_slope_sto06.csv'),index_col=[0])
df_twl = pd.read_csv(os.path.join(data_folder,'twl_mean_slope_sto06.csv'),index_col=[0,1])


elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison



In [60]:
df_twl.query("site_id=='NARRA0018'").Hs0.tolist()

[1.2632991506198927,
 1.393768803168096,
 1.4898137015209056,
 1.4536075884721669,
 1.4108238472203196,
 1.3456902382958191,
 1.3190526770579034,
 1.291134623539095,
 1.2716049325008096,
 1.234754724771738,
 1.1825435278076464,
 1.2252064606390358,
 1.2640989420800277,
 1.2757496999030895,
 1.2958669929936903,
 1.2951053747668917,
 1.2997067012745651,
 1.2927882315939971,
 1.3309337732401414,
 1.3193355176891717,
 1.2844322857877195,
 1.2628576180893467,
 1.2097343104491254,
 1.2013077303201378,
 1.1993481704538602,
 1.2198569961855183,
 1.2984481427280574,
 1.3698938539307974,
 1.4269234117912923,
 1.4452966027439913,
 1.4230609854421774,
 1.3842558204110529,
 1.3450040465222477,
 1.29724974139755,
 1.251148426943638,
 1.2203239982067688,
 1.1863161021683857,
 1.1941366924930163,
 1.1469348307275298,
 1.1407884178283354,
 1.1054402354387345,
 1.0888031069851187,
 1.052178951137407,
 1.042391452505114,
 1.0022582557622686,
 0.9574583082522315,
 0.9273904554517179,
 0.913868938605881,
 

In [184]:
# Create widgets for filtering by observed and forecasted impacts

filter_title = widgets.HTML(
    value="<b>Filter by observed and predicted impacts:</b>",
)

observed_impact_select = widgets.SelectMultiple(
    options=df_impacts_compared.storm_regime_observed.dropna().unique(),
    value=df_impacts_compared.storm_regime_observed.dropna().unique().tolist(),
    description='Observed Impacts',
    disabled=False
)

forecasted_impact_select = widgets.SelectMultiple(
    options=df_impacts_compared.storm_regime_forecasted.dropna().unique(),
    value=df_impacts_compared.storm_regime_forecasted.dropna().unique().tolist(),
    description='Forecasted Impacts',
    disabled=False
)

filter_container = widgets.VBox(children=[filter_title,widgets.HBox(children=[forecasted_impact_select,observed_impact_select])])


# Create widgets for selecting site_id

site_id_title = widgets.HTML(
    value="<b>Filter by site_id:</b>",
)

site_id_select = widgets.Dropdown(
    description='site_id:   ',
    value='NARRA0001',
    options=df_profiles.index.get_level_values('site_id').unique().sort_values().tolist()
)
site_id_container = widgets.VBox(children=[site_id_title,widgets.HBox(children=[site_id_select])])



# Add panel for pre/post storm profiles
trace1 = go.Scatter(
    x = [0],
    y = [0],
    name='Pre Storm Profile'
)
trace2 = go.Scatter(
    x = [0],
    y = [0],
    name='Post Storm Profile'
)
trace3 = go.Scatter(
    x = [0],
    y = [0],
    name='Pre-storm dune crest',
    mode = 'markers',
    marker = dict(
          color = 'rgb(17, 157, 255)',
          size = 20,
        ),
)
trace4 = go.Scatter(
    x = [0],
    y = [0],
    name='Pre-storm dune toe',
    mode = 'markers',
        marker = dict(
          color = 'rgb(231, 99, 250)',
          size = 20,
         ),
)

layout = go.Layout(
    title = 'Bed Profiles',
    height=300,
    legend=dict(x=0.5, y=1),
    margin=dict(t=50,b=50,l=20,r=20),
    xaxis=dict(
        title = 'x (m)',
        autorange=True,
        showgrid=True,
        zeroline=True,
        showline=True,
        range=[0, 200]
    ),
    yaxis=dict(
        title = 'z (m)',
        autorange=False,
        showgrid=True,
        zeroline=True,
        showline=True,
        range=[-1, 20]
    )
)

g1 = go.FigureWidget(data=[trace1, trace2, trace3, trace4],
                    layout=layout)


# Add panel for google maps
mapbox_access_token = 'pk.eyJ1IjoiY2hyaXNsZWFtYW4iLCJhIjoiY2pvNTY1MzZpMDc2OTN2bmw5MGsycHp5bCJ9.U2dwFg2c7RFjUNSayERUiw'

data = [
    go.Scattermapbox(
        lat=df_sites['lat'],
        lon=df_sites['lon'],
        mode='markers',
        marker=dict(
            size=10
        ),
        text=df_sites.index.get_level_values('site_id'),
    ),
    go.Scattermapbox(
        lat=[0],
        lon=[0],
        mode='markers',
        marker=dict(
            size=20,
            color='rgb(255, 0, 0)',
            opacity = 0.5,
        ),
        text=df_sites.index.get_level_values('site_id'),
    ),
]

layout = go.Layout(
    autosize=True,
    height=300,
    hovermode='closest',
    showlegend=False,
    margin=dict(t=50,b=50,l=20,r=20),
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=-33.7,
            lon=151.3
        ),
        pitch=0,
        zoom=12,
        style='satellite-streets'
    ),
)

fig = dict(data=data, layout=layout)
g2 = go.FigureWidget(data=data,layout=layout)


# Add panel for time series

trace_Hs0 = go.Scatter(
    x = [0,1],
    y = [0,1],
    name='Hs0'
)
trace_Tp = go.Scatter(
    x = [0,2],
    y = [0,2],
    name='Tp',
    yaxis='y2'
)
trace_beta = go.Scatter(
    x = [0,3],
    y = [0,3],
    name='beta',
    yaxis='y3'
)
data=[trace_Hs0, trace_Tp, trace_beta]

layout = go.Layout(
    title = 'Hydro/Morpho Parameters',
    height=200,
    margin=dict(t=50,b=50,l=50,r=50),
    xaxis=dict(
        title='time',
        domain=[0.0, 0.9],
        zeroline=False,
    ),
    yaxis=dict(
        title = 'Hs0 (m)',
    ),
    yaxis2=dict(
        title='Tp (s)',
        overlaying='y',
        side='right'
    ),
    yaxis3=dict(
        title='beta (-)',
        overlaying='y',
        side='right',
        position=0.97
    )
)

g3 = go.FigureWidget(data=data, layout=layout)


# Add panel for water level

trace_R_high = go.Scatter(
    x = [0,1],
    y = [0,1],
    name='R High',
    line = dict(
        color = ('rgb(91,220,229)'),
        width = 2)
)
trace_R_low = go.Scatter(
    x = [0,2],
    y = [0,2],
    name='R Low',
    line = dict(
        color = ('rgb(13,174,186)'),
        width = 2)
)
trace_dune_crest = go.Scatter(
    x = [0,3],
    y = [0,3],
    name='Dune Crest',
    line = dict(
        color = ('rgb(214, 117, 14)'),
        width = 2,
        dash = 'dot')
)
trace_dune_toe = go.Scatter(
    x = [0,3],
    y = [0,3],
    name='Dune Toe',
    line = dict(
        color = ('rgb(142, 77, 8)'),
        width = 2,
        dash = 'dash')
)
trace_tide = go.Scatter(
    x = [0,4],
    y = [0,4],
    name='Tide+Surge WL',
    line = dict(
        color = ('rgb(8,51,137)'),
        width = 2,
        dash = 'dot')
)

data=[trace_R_high, trace_R_low, trace_dune_crest, trace_dune_toe,trace_tide]

layout = go.Layout(
    title = 'Water Level & Dune Toe/Crest',
    height=200,
    margin=dict(t=50,b=50,l=50,r=50),
    xaxis=dict(
        title='time',
        domain=[0.0, 0.95],
        zeroline=False,
    ),
    yaxis=dict(
        title = 'Water Level (m)',
    ),
)

g4 = go.FigureWidget(data=data, layout=layout)


def update_profile(change):
    
    site_id = site_id_select.value
    site_profile = df_profiles.query('site_id == "{}"'.format(site_id))
    prestorm_profile = site_profile.query('profile_type == "prestorm"')
    poststorm_profile = site_profile.query('profile_type == "poststorm"')

    poststorm_x = poststorm_profile.index.get_level_values('x').tolist()
    poststorm_z = poststorm_profile.z.tolist()

    prestorm_x = prestorm_profile.index.get_level_values('x').tolist()
    prestorm_z = prestorm_profile.z.tolist()
    
    site_features = df_profile_features.query('site_id == "{}"'.format(site_id))
    dune_crest_x = site_features.dune_crest_x
    dune_crest_z = site_features.dune_crest_z
    dune_toe_x = site_features.dune_toe_x
    dune_toe_z = site_features.dune_toe_z
    
    # Update beach profile section plots
    with g1.batch_update():
        g1.data[0].x = prestorm_x
        g1.data[0].y = prestorm_z
        g1.data[1].x = poststorm_x
        g1.data[1].y = poststorm_z
        g1.data[2].x = dune_crest_x
        g1.data[2].y = dune_crest_z
        g1.data[3].x = dune_toe_x
        g1.data[3].y = dune_toe_z
        
    # Relocate plan of satellite imagery
    site_coords = df_sites.query('site_id == "{}"'.format(site_id))
    with g2.batch_update():
        g2.layout.mapbox['center'] = {
            'lat': site_coords['lat'].values[0],
            'lon': site_coords['lon'].values[0]
        }
        g2.layout.mapbox['zoom'] = 15
        g2.data[1].lat = [site_coords['lat'].values[0]]
        g2.data[1].lon = [site_coords['lon'].values[0]]
        g2.data[1].text = site_coords['lon'].index.get_level_values('site_id').tolist()

    # Update time series plots 
    df_timeseries = df_twl.query("site_id=='{}'".format(site_id))
    times = df_timeseries.index.get_level_values('datetime').tolist()
    with g3.batch_update():
        g3.data[0].x = times
        g3.data[1].x = times
        g3.data[2].x = times
        g3.data[0].y = df_timeseries.Hs0.tolist()
        g3.data[1].y = df_timeseries.Tp.tolist()
        g3.data[2].y = df_timeseries.beta.tolist()
        
    # Update water levels plot
    df_timeseries = df_twl.query("site_id=='{}'".format(site_id))
    with g4.batch_update():
        g4.data[0].x = times
        g4.data[1].x = times
        g4.data[2].x = [min(times), max(times)]
        g4.data[3].x = [min(times), max(times)]
        g4.data[4].x = times
        g4.data[0].y = df_timeseries.R_high.tolist()
        g4.data[1].y = df_timeseries.R_low.tolist()
        g4.data[2].y = dune_crest_z.tolist()[0], dune_crest_z.tolist()[0],
        g4.data[3].y = dune_toe_z.tolist()[0], dune_toe_z.tolist()[0],
        g4.data[4].y = df_timeseries.tide.tolist()
    
        
def update_filter(change):
    
    # Get filtered impacts
    observed_impacts = observed_impact_select.value
    forecasted_impacts = forecasted_impact_select.value
    
    # Get sites with these impacts 
    site_id_select.options = df_impacts_compared.loc[df_impacts_compared.storm_regime_forecasted.isin(forecasted_impacts) & 
                                                     df_impacts_compared.storm_regime_observed.isin(observed_impacts),].index.tolist()
    
    
site_id_select.observe(update_profile, names="value")
observed_impact_select.observe(update_filter, names="value")
forecasted_impact_select.observe(update_filter, names="value")

widgets.VBox([filter_container,site_id_container,widgets.HBox([g1,g2]),g3,g4])

In [178]:
g4.data[2].y

([3.111490610630103, 3.111490610630103],)