# Data exploration
This notebook provides an example how the data has been loaded and accessed for further analysis.

In [1]:
# Enable autoreloading of our modules. 
# Most of the code will be located in the /src/ folder, 
# and then called from the notebook.

%reload_ext autoreload
%autoreload

In [None]:
from IPython.core.debugger import set_trace

import pandas as pd
import numpy as np
import os

import plotly
import plotly.graph_objs as go
import plotly.plotly as py
import plotly.tools as tls

import colorlover as cl

from ipywidgets import widgets, Output
from IPython.display import display, clear_output, Image, HTML

In [None]:
def df_from_csv(csv, index_col, data_folder='../data/interim'):
    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)

df_waves = df_from_csv('waves.csv', index_col=[0, 1])
df_tides = df_from_csv('tides.csv', index_col=[0, 1])
df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])
df_sites = df_from_csv('sites.csv', index_col=[0])
df_profile_features = df_from_csv('profile_features.csv', index_col=[0])

# Note that the forecasted data sets should be in the same order for impacts and twls
impacts = {
    'forecasted': {
        'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),
        'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),
        },
    'observed': df_from_csv('impacts_observed.csv', index_col=[0])
    }


twls = {
    'forecasted': {
        'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),
        'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),
    }
}

In [None]:
tables = [Output() for x in range(len(impacts['forecasted']) + 1)]
tables

In [None]:
# Create widgets for filtering by observed and forecasted impacts
filter_title = widgets.HTML(
    value="<b>Filter by observed and predicted impacts:</b>",
)

titles = ['Observed Impacts']
selectboxes = [widgets.SelectMultiple(
    options=impacts['observed'].storm_regime.dropna().unique().tolist(),
    value=impacts['observed'].storm_regime.dropna().unique().tolist(),
    disabled=False)]

# Iterate through each of our forecasted impacts
for forecast in impacts['forecasted']:
    selectboxes.append(
        widgets.SelectMultiple(
            options=impacts['forecasted'][forecast].storm_regime.dropna(
            ).unique().tolist(),
            value=impacts['forecasted'][forecast].storm_regime.dropna(
            ).unique().tolist(),
            disabled=False))
    titles.append('Forecasted: {}'.format(forecast))

titles = [widgets.HTML(value=title) for title in titles]

children = widgets.HBox(children=[widgets.VBox(
    children=[title, box]) for title, box in zip(titles, selectboxes)])
filter_container = widgets.VBox(children=[filter_title, children])


# Create widgets for selecting site_id
site_id_title = widgets.HTML(
    value="<b>Filter by site_id:</b>",
)

site_id_select = widgets.Dropdown(
    description='site_id:   ',
    value='NARRA0001',
    options=df_profiles.index.get_level_values(
        'site_id').unique().sort_values().tolist()
)

site_id_impacts = widgets.HTML(
    value="",
)

site_id_container = widgets.HBox(children=[
    widgets.VBox(children=[site_id_title, widgets.HBox(children=[site_id_select])]),
    site_id_impacts
])




# Build colors for each of our forecasts
colors = list(reversed(cl.scales[str(max(len(impacts['forecasted']), 3))]['seq']['YlGnBu']))

# Add panel for pre/post storm profiles
trace1 = go.Scatter(
    x=[0],
    y=[0],
    name='Pre Storm Profile',
    line=dict(
        color=('rgb(51,160,44)'),
        width=2)
)
trace2 = go.Scatter(
    x=[0],
    y=[0],
    name='Post Storm Profile',
    line=dict(
        color=('rgb(255,127,0)'),
        width=2)
)
trace3 = go.Scatter(
    x=[0],
    y=[0],
    name='Pre-storm dune crest',
    mode='markers',
    marker=dict(
        color='rgba(255,255,255,0)',
        size=10,
        line=dict(
            color='rgba(106,61,154, 1)',
            width=2
        )
    ),
)
trace4 = go.Scatter(
    x=[0],
    y=[0],
    name='Pre-storm dune toe',
    mode='markers',
    marker=dict(
        color='rgba(255,255,255,0)',
        size=10,
        line=dict(
            color='rgba(202,178,214,1)',
            width=2
        )
    ),
)

forecast_traces = []
for forecast, color in zip(impacts['forecasted'], colors):
    forecast_traces.append(go.Scatter(
        x=[0],
        y=[0],
        name='Peak R_high: {}'.format(forecast),
        mode='lines',
        line=dict(
            color=color,
            width=4,)

    ))

layout = go.Layout(
    title='Bed Profiles',
    height=300,
    legend=dict(font={'size': 10}),
    margin=dict(t=50, b=50, l=50, r=20),
    xaxis=dict(
        title='x (m)',
        autorange=True,
        showgrid=True,
        zeroline=True,
        showline=True,
        range=[0, 200]
    ),
    yaxis=dict(
        title='z (m)',
        autorange=False,
        showgrid=True,
        zeroline=True,
        showline=True,
        range=[-1, 20]
    )
)

g_profiles = go.FigureWidget(data=[trace1, trace2, trace3, trace4]+forecast_traces,
                             layout=layout)


# Add panel for google maps
mapbox_access_token = 'pk.eyJ1IjoiY2hyaXNsZWFtYW4iLCJhIjoiY2pvNTY1MzZpMDc2OTN2bmw5MGsycHp5bCJ9.U2dwFg2c7RFjUNSayERUiw'

data = [
    go.Scattermapbox(
        lat=df_sites['lat'],
        lon=df_sites['lon'],
        mode='markers',
        marker=dict(
            size=10
        ),
        text=df_sites.index.get_level_values('site_id'),
    ),
    go.Scattermapbox(
        lat=[0],
        lon=[0],
        mode='markers',
        marker=dict(
            size=20,
            color='rgb(255, 0, 0)',
            opacity=0.5,
        ),
        text=df_sites.index.get_level_values('site_id'),
    ),
]

layout = go.Layout(
    autosize=True,
    height=300,
    hovermode='closest',
    showlegend=False,
    margin=dict(t=50, b=50, l=20, r=20),
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=-33.7,
            lon=151.3
        ),
        pitch=0,
        zoom=12,
        style='satellite-streets'
    ),
)

fig = dict(data=data, layout=layout)
g_map = go.FigureWidget(data=data, layout=layout)


subplot = tls.make_subplots(3, 1, print_grid=False, shared_xaxes=True)
g_timeseries = go.FigureWidget(subplot)

# Add trace for Hs0
g_timeseries.add_trace(
    go.Scatter(
        x=[0, 1],
        y=[0, 1],
        name='Hs0',
    ),
    row=3,
    col=1,
)

# Add trace for Tp
g_timeseries.add_trace(
    go.Scatter(
        x=[0, 1],
        y=[0, 1],
        name='Tp',
    ),
    row=3,
    col=1,
)

# Add water levels
g_timeseries.add_trace(
    go.Scatter(
        x=[0, 3],
        y=[0, 3],
        name='Dune Crest',
        mode='lines',
        line=dict(
            color=('rgb(214, 117, 14)'),
            width=2,
            dash='dot')
    ), row=1, col=1)


g_timeseries.add_trace(
    go.Scatter(
        x=[0, 3],
        y=[0, 3],
        name='Dune Toe',
        mode='lines',
        line=dict(
            color=('rgb(142, 77, 8)'),
            width=2,
            dash='dash')
    ), row=1, col=1)

g_timeseries.add_trace(
    go.Scatter(
        x=[0, 3],
        y=[0, 3],
        name='Tide+Surge WL',
        line=dict(
            color=('rgb(8,51,137)'),
            width=2,
            dash='dot')
    ), row=1, col=1)

for forecast, color in zip(twls['forecasted'], colors):
    g_timeseries.add_trace(
        go.Scatter(
            x=[0],
            y=[0],
            name='R_high: {}'.format(forecast),
            line=dict(
                color=color,
                width=3)), row=1, col=1)


# Add trace for each forecasted beta term
for forecast, color in zip(impacts['forecasted'],colors):
    g_timeseries.add_trace(
        go.Scatter(
            x=[0, 1],
            y=[0, 1],
            name='Beta: {}'.format(forecast),
                    line=dict(
                color=color,
                width=3)),
        row=2,
        col=1,
    )


# Create axis for Tp on same plot as Hs
g_timeseries['layout']['yaxis4'] = {'overlaying': 'y3', 'side': 'right'}
g_timeseries.data[1]['yaxis'] = 'y4'

# Add labels to each axis
g_timeseries.layout['xaxis']['title'] = 'datetime'
g_timeseries.layout['yaxis1']['title'] = 'z (mAHD)'
g_timeseries.layout['yaxis2']['title'] = 'beta (-)'
g_timeseries.layout['yaxis3']['title'] = 'Hs0 (m)'
g_timeseries.layout['yaxis4']['title'] = 'Tp (s)'

# Update figure size
g_timeseries['layout'].update(height=400, legend=dict(font={'size': 10}))
g_timeseries['layout'].update(margin=dict(t=20, l=50, r=20, b=100))



# Add panel for some tables
titles = ['observed'] + [forecast for forecast in impacts['forecasted']]
titles = [widgets.HTML(value="{}".format(title)) for title in titles]

def get_observed_impacts_table(site_id):
     display(impacts['observed'].query("site_id=='{}'".format(site_id)).T)


def get_forecasted_impacts_table(site_id,forecast):
     display(impacts['forecasted'][forecast].query("site_id=='{}'".format(site_id)).T)

        
impacts_table_observed = widgets.interactive_output(get_observed_impacts_table, {'site_id': site_id_select})
forecasted_impacts_tables = []
for forecast, title in zip(impacts['forecasted'], titles[1:]):
    forecasted_impacts_tables.append(widgets.interactive_output(get_forecasted_impacts_table, {'site_id': site_id_select, 'forecast':title}))

tables = [impacts_table_observed] + forecasted_impacts_tables

title_tables=[widgets.VBox(children=[title,table]) for title,table in zip(titles,tables)]
        
tables_container= widgets.HBox(children=[*title_tables])





def update_profile(change):

    site_id = site_id_select.value

    if site_id is None:
        return

    site_profile = df_profiles.query('site_id == "{}"'.format(site_id))
    prestorm_profile = site_profile.query('profile_type == "prestorm"')
    poststorm_profile = site_profile.query('profile_type == "poststorm"')

    poststorm_x = poststorm_profile.index.get_level_values('x').tolist()
    poststorm_z = poststorm_profile.z.tolist()

    prestorm_x = prestorm_profile.index.get_level_values('x').tolist()
    prestorm_z = prestorm_profile.z.tolist()

    site_features = df_profile_features.query(
        'site_id == "{}"'.format(site_id))
    dune_crest_x = site_features.dune_crest_x
    dune_crest_z = site_features.dune_crest_z
    dune_toe_x = site_features.dune_toe_x
    dune_toe_z = site_features.dune_toe_z

    # Update beach profile section plots
    with g_profiles.batch_update():
        g_profiles.data[0].x = prestorm_x
        g_profiles.data[0].y = prestorm_z
        g_profiles.data[1].x = poststorm_x
        g_profiles.data[1].y = poststorm_z
        g_profiles.data[2].x = dune_crest_x
        g_profiles.data[2].y = dune_crest_z
        g_profiles.data[3].x = dune_toe_x
        g_profiles.data[3].y = dune_toe_z

        for n, forecast in enumerate(impacts['forecasted']):
            R_high = max(impacts['forecasted'][forecast].query(
                "site_id=='{}'".format(site_id)).R_high)
            g_profiles.data[4+n].x = [200, 400]
            g_profiles.data[4+n].y = [R_high, R_high]

    # Relocate plan of satellite imagery
    site_coords = df_sites.query('site_id == "{}"'.format(site_id))
    with g_map.batch_update():
        g_map.layout.mapbox['center'] = {
            'lat': site_coords['lat'].values[0],
            'lon': site_coords['lon'].values[0]
        }
        g_map.layout.mapbox['zoom'] = 15
        g_map.data[1].lat = [site_coords['lat'].values[0]]
        g_map.data[1].lon = [site_coords['lon'].values[0]]
        g_map.data[1].text = site_coords['lon'].index.get_level_values(
            'site_id').tolist()

    # Update time series plots
    df_waves_site = df_waves.query("site_id=='{}'".format(site_id))
    times = df_waves_site.index.get_level_values('datetime').tolist()
    Hs0s = df_waves_site.Hs0.tolist()
    Tps = df_waves_site.Tp.tolist()
    with g_timeseries.batch_update():
        g_timeseries.data[0].x = times
        g_timeseries.data[0].y = Hs0s
        g_timeseries.data[1].x = times
        g_timeseries.data[1].y = Tps

        # Update beta values
        idx_betas = [n for n, x in enumerate(
            g_timeseries.data) if 'Beta' in x.name]
        for i, forecast in zip(idx_betas, twls['forecasted']):
            df_twl = twls['forecasted'][forecast].query(
                "site_id=='{}'".format(site_id))
            times = df_twl.index.get_level_values('datetime').tolist()
            beta = df_twl.beta.tolist()
            g_timeseries.data[i].x = times
            g_timeseries.data[i].y = beta

    # Update water levels plot
    df_tide_site = df_tides.query("site_id=='{}'".format(site_id))
    mask = (df_tide_site.index.get_level_values('datetime') >= min(times)) & (
        df_tide_site.index.get_level_values('datetime') <= max(times))
    df_tide_site = df_tide_site.loc[mask]

    with g_timeseries.batch_update():
        g_timeseries.data[2].x = [min(times), max(times)]
        g_timeseries.data[3].x = [min(times), max(times)]
        g_timeseries.data[4].x = df_tide_site.index.get_level_values(
            'datetime')
        g_timeseries.data[2].y = dune_crest_z.tolist()[0], dune_crest_z.tolist()[
            0],
        g_timeseries.data[3].y = dune_toe_z.tolist()[0], dune_toe_z.tolist()[
            0],
        g_timeseries.data[4].y = df_tide_site.tide.tolist()

        # Update rhigh values
        idx_betas = [n for n, x in enumerate(
            g_timeseries.data) if 'R_high' in x.name]
        for i, forecast in zip(idx_betas, twls['forecasted']):
            df_twl = twls['forecasted'][forecast].query(
                "site_id=='{}'".format(site_id))
            times = df_twl.index.get_level_values('datetime').tolist()
            R_high = df_twl.R_high.tolist()
            g_timeseries.data[i].x = times
            g_timeseries.data[i].y = R_high

    # Update site id impacts
    observed_regime = impacts['observed'].query("site_id=='{}'".format(site_id)).storm_regime.values[0]
    site_id_impacts.value = "Observed: <b>{}</b><br>".format(observed_regime)
    
    for forecast in impacts['forecasted']:
        regime = impacts['forecasted'][forecast].query("site_id=='{}'".format(site_id)).storm_regime.values[0]
        site_id_impacts.value += '{}: <b>{}</b><br>'.format(forecast, regime)
    
    # Update our tables
    
site_id_select.observe(update_profile, names="value")


def update_filter(change):

    # Iterate through each box, only keeping site_ids which are not filtered out by each box
    valid_site_ids = impacts['observed'].index.tolist()
    dfs = [impacts['observed']] + [impacts['forecasted'][key]
                                   for key in impacts['forecasted']]

    for box, df in zip(selectboxes, dfs):
        valid_site_ids = list(set(valid_site_ids).intersection(
            set(df[df.storm_regime.isin(box.value)].index.tolist())))
    site_id_select.options = sorted(valid_site_ids)

    # TODO Update options in selectboxes with number of observations?


# Update the filter if any of the boxes changes
for box in selectboxes:
    box.observe(update_filter, names="value")

    


# Display our widgets!
widgets.VBox([filter_container, site_id_container,
              widgets.HBox([g_profiles, g_map]), g_timeseries,  tables_container])



In [None]:
titles[0].observe

In [None]:
# impacts['observed'].query("site_id=='{}'".format("NARRA0018")).T
impacts['forecasted']['foreshore_slope_sto06'].query("site_id=='{}'".format("NARRA0018")).T

