# Data exploration
This notebook provides an example how the data has been loaded and accessed for further analysis.

In [6]:
# Enable autoreloading of our modules. 
# Most of the code will be located in the /src/ folder, 
# and then called from the notebook.

%reload_ext autoreload
%autoreload

In [7]:
from IPython.core.debugger import set_trace

import pandas as pd
import numpy as np
import os

import plotly
import plotly.graph_objs as go
import plotly.plotly as py

from ipywidgets import widgets, Output
from IPython.display import display, clear_output, Image

In [22]:
def df_from_csv(csv, index_col, data_folder='../data/interim'):
    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)

df_waves = df_from_csv('waves.csv', index_col=[0, 1])
df_tides = df_from_csv('tides.csv', index_col=[0, 1])
df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])
df_sites = df_from_csv('sites.csv', index_col=[0])
df_profile_features = df_from_csv('profile_features.csv', index_col=[0])

# Note that the forecasted data sets should be in the same order for impacts and twls
impacts = {
    'forecasted': {
        'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),
        'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),
        },
    'observed': df_from_csv('impacts_observed.csv', index_col=[0])
    }


twls = {
    'forecasted': {
        'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),
        'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),
    }
}


elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison



In [60]:
# Create widgets for filtering by observed and forecasted impacts
filter_title = widgets.HTML(
    value="<b>Filter by observed and predicted impacts:</b>",
)

titles = ['Observed Impacts']
selectboxes = [widgets.SelectMultiple(
    options=impacts['observed'].storm_regime.dropna().unique().tolist(),
    value=impacts['observed'].storm_regime.dropna().unique().tolist(),
    disabled=False)]

# Iterate through each of our forecasted impacts 
for forecast in impacts['forecasted']:
    selectboxes.append(
        widgets.SelectMultiple(
            options=impacts['forecasted'][forecast].storm_regime.dropna().unique().tolist(),
            value=impacts['forecasted'][forecast].storm_regime.dropna().unique().tolist(),
            disabled=False))
    titles.append('Forecasted: {}'.format(forecast))

titles = [widgets.HTML(value=title) for title in titles]
    
children = widgets.HBox(children=[widgets.VBox(children=[title, box]) for title, box in zip(titles, selectboxes)])
filter_container = widgets.VBox(children=[filter_title,children])



# Create widgets for selecting site_id
site_id_title = widgets.HTML(
    value="<b>Filter by site_id:</b>",
)

site_id_select = widgets.Dropdown(
    description='site_id:   ',
    value='NARRA0001',
    options=df_profiles.index.get_level_values('site_id').unique().sort_values().tolist()
)
site_id_container = widgets.VBox(children=[site_id_title,widgets.HBox(children=[site_id_select])])



# Add panel for pre/post storm profiles
trace1 = go.Scatter(
    x = [0],
    y = [0],
    name='Pre Storm Profile'
)
trace2 = go.Scatter(
    x = [0],
    y = [0],
    name='Post Storm Profile'
)
trace3 = go.Scatter(
    x = [0],
    y = [0],
    name='Pre-storm dune crest',
    mode = 'markers',
    marker = dict(
          color = 'rgb(17, 157, 255)',
          size = 20,
        ),
)
trace4 = go.Scatter(
    x = [0],
    y = [0],
    name='Pre-storm dune toe',
    mode = 'markers',
        marker = dict(
          color = 'rgb(231, 99, 250)',
          size = 20,
         ),
)

forecast_traces = []
for forecast in impacts['forecasted']:
    forecast_traces.append(go.Scatter(
        x = [0],
        y = [0],
        name = 'Peak R_high: {}'.format(forecast)
    ))
    
layout = go.Layout(
    title = 'Bed Profiles',
    height=300,
    legend=dict(font={'size':10}),
    margin=dict(t=50,b=50,l=50,r=20),
    xaxis=dict(
        title = 'x (m)',
        autorange=True,
        showgrid=True,
        zeroline=True,
        showline=True,
        range=[0, 200]
    ),
    yaxis=dict(
        title = 'z (m)',
        autorange=False,
        showgrid=True,
        zeroline=True,
        showline=True,
        range=[-1, 20]
    )
)

g_profiles = go.FigureWidget(data=[trace1, trace2, trace3, trace4]+forecast_traces,
                    layout=layout)


# Add panel for google maps
mapbox_access_token = 'pk.eyJ1IjoiY2hyaXNsZWFtYW4iLCJhIjoiY2pvNTY1MzZpMDc2OTN2bmw5MGsycHp5bCJ9.U2dwFg2c7RFjUNSayERUiw'

data = [
    go.Scattermapbox(
        lat=df_sites['lat'],
        lon=df_sites['lon'],
        mode='markers',
        marker=dict(
            size=10
        ),
        text=df_sites.index.get_level_values('site_id'),
    ),
    go.Scattermapbox(
        lat=[0],
        lon=[0],
        mode='markers',
        marker=dict(
            size=20,
            color='rgb(255, 0, 0)',
            opacity = 0.5,
        ),
        text=df_sites.index.get_level_values('site_id'),
    ),
]

layout = go.Layout(
    autosize=True,
    height=300,
    hovermode='closest',
    showlegend=False,
    margin=dict(t=50,b=50,l=20,r=20),
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=-33.7,
            lon=151.3
        ),
        pitch=0,
        zoom=12,
        style='satellite-streets'
    ),
)

fig = dict(data=data, layout=layout)
g_map = go.FigureWidget(data=data,layout=layout)


# Add panel for time series

trace_Hs0 = go.Scatter(
    x = [0,1],
    y = [0,1],
    name='Hs0'
)
trace_Tp = go.Scatter(
    x = [0,2],
    y = [0,2],
    name='Tp',
    yaxis='y2'
)

forecast_traces = []
for forecast in impacts['forecasted']:
    forecast_traces.append(go.Scatter(
        x = [0],
        y = [0],
        name = 'Beta: {}'.format(forecast),
        yaxis='y3'
    ))

    
data=[trace_Hs0, trace_Tp] + forecast_traces

layout = go.Layout(
    title = 'Hydro/Morpho Parameters',
    height=200,
    margin=dict(t=50,b=50,l=50,r=50),
    xaxis=dict(
        title='time',
        domain=[0.0, 0.9],
        zeroline=False,
    ),
    yaxis=dict(
        title = 'Hs0 (m)',
    ),
    yaxis2=dict(
        title='Tp (s)',
        overlaying='y',
        side='right'
    ),
    yaxis3=dict(
        title='beta (-)',
        overlaying='y',
        side='right',
        position=0.97
    )
)

g_params = go.FigureWidget(data=data, layout=layout)


# Add panel for water level
trace_dune_crest = go.Scatter(
    x = [0,3],
    y = [0,3],
    name='Dune Crest',
    line = dict(
        color = ('rgb(214, 117, 14)'),
        width = 2,
        dash = 'dot')
)
trace_dune_toe = go.Scatter(
    x = [0,3],
    y = [0,3],
    name='Dune Toe',
    line = dict(
        color = ('rgb(142, 77, 8)'),
        width = 2,
        dash = 'dash')
)
trace_tide = go.Scatter(
    x = [0,4],
    y = [0,4],
    name='Tide+Surge WL',
    line = dict(
        color = ('rgb(8,51,137)'),
        width = 2,
        dash = 'dot')
)

forecast_traces = []
for forecast in twls['forecasted']:
    forecast_traces.append(go.Scatter(
        x = [0],
        y = [0],
        name = 'R_high: {}'.format(forecast),
    ))
    
data=[trace_dune_crest, trace_dune_toe,trace_tide] + forecast_traces

layout = go.Layout(
    title = 'Water Level & Dune Toe/Crest',
    height=200,
    margin=dict(t=50,b=50,l=50,r=50),
    xaxis=dict(
        title='time',
        domain=[0.0, 0.95],
        zeroline=False,
    ),
    yaxis=dict(
        title = 'Water Level (m)',
    ),
)

g_twls = go.FigureWidget(data=data, layout=layout)



def update_profile(change):
    
    site_id = site_id_select.value
    
    if site_id is None:
        return 
    
    site_profile = df_profiles.query('site_id == "{}"'.format(site_id))
    prestorm_profile = site_profile.query('profile_type == "prestorm"')
    poststorm_profile = site_profile.query('profile_type == "poststorm"')

    poststorm_x = poststorm_profile.index.get_level_values('x').tolist()
    poststorm_z = poststorm_profile.z.tolist()

    prestorm_x = prestorm_profile.index.get_level_values('x').tolist()
    prestorm_z = prestorm_profile.z.tolist()
    
    site_features = df_profile_features.query('site_id == "{}"'.format(site_id))
    dune_crest_x = site_features.dune_crest_x
    dune_crest_z = site_features.dune_crest_z
    dune_toe_x = site_features.dune_toe_x
    dune_toe_z = site_features.dune_toe_z
    
    # Update beach profile section plots
    with g_profiles.batch_update():
        g_profiles.data[0].x = prestorm_x
        g_profiles.data[0].y = prestorm_z
        g_profiles.data[1].x = poststorm_x
        g_profiles.data[1].y = poststorm_z
        g_profiles.data[2].x = dune_crest_x
        g_profiles.data[2].y = dune_crest_z
        g_profiles.data[3].x = dune_toe_x
        g_profiles.data[3].y = dune_toe_z
        
        for n, forecast in enumerate(impacts['forecasted']):
            R_high = max(impacts['forecasted'][forecast].query("site_id=='{}'".format(site_id)).R_high)
            g_profiles.data[4+n].x=[200,400]
            g_profiles.data[4+n].y=[R_high, R_high]
        
    # Relocate plan of satellite imagery
    site_coords = df_sites.query('site_id == "{}"'.format(site_id))
    with g_map.batch_update():
        g_map.layout.mapbox['center'] = {
            'lat': site_coords['lat'].values[0],
            'lon': site_coords['lon'].values[0]
        }
        g_map.layout.mapbox['zoom'] = 15
        g_map.data[1].lat = [site_coords['lat'].values[0]]
        g_map.data[1].lon = [site_coords['lon'].values[0]]
        g_map.data[1].text = site_coords['lon'].index.get_level_values('site_id').tolist()

    # Update time series plots 
    df_waves_site = df_waves.query("site_id=='{}'".format(site_id))
    times = df_waves_site.index.get_level_values('datetime').tolist()
    Hs0s = df_waves_site.Hs0.tolist()
    Tps = df_waves_site.Tp.tolist()
    with g_params.batch_update():
        g_params.data[0].x = times
        g_params.data[0].y = Hs0s
        g_params.data[1].x = times
        g_params.data[1].y = Tps
        
        for n, forecast in enumerate(twls['forecasted']):
            df_twl = twls['forecasted'][forecast].query("site_id=='{}'".format(site_id))
            times = df_twl.index.get_level_values('datetime').tolist()
            beta = df_twl.beta.tolist()
            g_params.data[2+n].x= times
            g_params.data[2+n].y= beta


    # Update water levels plot
    df_tide_site = df_tides.query("site_id=='{}'".format(site_id))
    mask = (df_tide_site.index.get_level_values('datetime') >= min(times)) & (df_tide_site.index.get_level_values('datetime') <= max(times))
    df_tide_site = df_tide_site.loc[mask]

    with g_twls.batch_update():
        g_twls.data[0].x = [min(times), max(times)]
        g_twls.data[1].x = [min(times), max(times)]
        g_twls.data[2].x = df_tide_site.index.get_level_values('datetime')
        g_twls.data[0].y = dune_crest_z.tolist()[0], dune_crest_z.tolist()[0],
        g_twls.data[1].y = dune_toe_z.tolist()[0], dune_toe_z.tolist()[0],
        g_twls.data[2].y = df_tide_site.tide.tolist()
        
        for n, forecast in enumerate(twls['forecasted']):
            df_twl = twls['forecasted'][forecast].query("site_id=='{}'".format(site_id))
            times = df_twl.index.get_level_values('datetime').tolist()
            R_high = df_twl.R_high.tolist()
            g_twls.data[3+n].x= times
            g_twls.data[3+n].y= R_high
    
    
site_id_select.observe(update_profile, names="value")
        
        
        
def update_filter(change):
    
    # Iterate through each box, only keeping site_ids which are not filtered out by each box
    valid_site_ids = impacts['observed'].index.tolist()
    dfs = [impacts['observed']] + [impacts['forecasted'][key] for key in impacts['forecasted']]
    
    for box, df in zip(selectboxes, dfs):
        valid_site_ids = list(set(valid_site_ids).intersection(set(df[df.storm_regime.isin(box.value)].index.tolist())))
    site_id_select.options = sorted(valid_site_ids)

    # TODO Update options in selectboxes with number of observations?
    
# Update the filter if any of the boxes changes
for box in selectboxes:
    box.observe(update_filter, names="value")
    
# Display our widgets!
widgets.VBox([filter_container,site_id_container,widgets.HBox([g_profiles,g_map]),g_twls,g_params])



# For table
# out = Output()
# with out:
#     display(df_waves.head(3))

# widgets.VBox([filter_container,site_id_container, out])


In [55]:
# print(max(impacts['forecasted']['foreshore_slope_sto06'].query("site_id=='{}'".format('NARRA0018')).R_high))
# print(max(impacts['forecasted']['mean_slope_sto06'].query("site_id=='{}'".format('NARRA0018')).R_high))

# df_twl = twls['forecasted']['foreshore_slope_sto06'].query("site_id=='{}'".format('NARRA0018'))

df_waves_site = df_waves.query("site_id=='{}'".format("NARRA0016"))
times = df_waves_site.index.get_level_values('datetime').tolist()


df_tides_site = df_tides.query("{} <= datetime <= {}".format(min(times), max(times)))
mask = (df_tides.index.get_level_values('datetime') >= min(times)) & (df_waves_site.index.get_level_values('datetime') <= max(times))