## Investigate how dune toe compares to R_high

In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload

In [2]:
from IPython.core.debugger import set_trace

import pandas as pd
import numpy as np
import os

import plotly
import plotly.graph_objs as go
import plotly.plotly as py
import plotly.tools as tls
import plotly.figure_factory as ff
import plotly.io as pio

### Load data
Load data from the `./data/interim/` folder and parse into `pandas` dataframes.

In [3]:
def df_from_csv(csv, index_col, data_folder='../data/interim'):
    print('Importing {}'.format(csv))
    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)

df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])
df_profile_features = df_from_csv('profile_features.csv', index_col=[0])

impacts = {
    'forecasted': {
        'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),
        'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),
        },
    'observed': df_from_csv('impacts_observed.csv', index_col=[0])
    }

twls = {
    'forecasted': {
        'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),
        'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),
    }
}

print('Done!')

Importing profiles.csv



elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison



Importing profile_features.csv
Importing impacts_forecasted_foreshore_slope_sto06.csv
Importing impacts_forecasted_mean_slope_sto06.csv
Importing impacts_observed.csv
Importing twl_foreshore_slope_sto06.csv
Importing twl_mean_slope_sto06.csv
Done!


### Compare predicted R_high with D_low
Let's see what the distribution of R_high is compared with D_low. How far off are the predicted water levels compared with the dune toes?

In [29]:
def get_site_ids(df_forecasted, df_observed, forecasted_regime, observed_regime):
    """
    Returns list of site_ids which match the given forecasted and observed regime
    """
    set1 = set(df_forecasted.query("storm_regime == '{}'".format(
        forecasted_regime)).index.get_level_values('site_id'))
    set2 = set(df_observed.query("storm_regime == '{}'".format(
        observed_regime)).index.get_level_values('site_id'))
    return sorted(list(set1.intersection(set2)))


def get_R_high_D_low_diff(site_ids, df_profile_features, df_twls):
    """
    Returns a dataframe of the difference between the R_high and D_low differences. 
    Positive values indicate R_high is larger than D_low.
    """
    # Get dune toes at these sites and predicted max R_high
    df_toes = df_profile_features.loc[site_ids].query(
        'profile_type=="prestorm"').dune_toe_z
    df_R_highs = df_twls.loc[site_ids].groupby('site_id')['R_high'].max()

    # Join into one dataframe
    df_twl_toes = pd.concat([df_toes, df_R_highs], axis=1, sort=True)
    df_twl_toes['diff'] = df_twl_toes['R_high'] - df_twl_toes['dune_toe_z']
    return df_twl_toes['diff']


In [53]:
swash_overpredicted_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],
                                            df_observed=impacts['observed'],
                                            forecasted_regime='collision',
                                            observed_regime='swash')
swash_overpredicted_diffs = get_R_high_D_low_diff(site_ids=swash_overpredicted_site_ids,
                                                  df_profile_features=df_profile_features,
                                                  df_twls=twls['forecasted']['mean_slope_sto06'])

swash_correct_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],
                                      df_observed=impacts['observed'],
                                      forecasted_regime='swash',
                                      observed_regime='swash')
swash_correct_diffs = get_R_high_D_low_diff(site_ids=swash_correct_site_ids,
                                            df_profile_features=df_profile_features,
                                            df_twls=twls['forecasted']['mean_slope_sto06'])


trace1 = go.Histogram(y=swash_correct_diffs.tolist(),
                      opacity=0.75,
                      name='Correctly predicted',
                      marker=dict(
                          color='#67a9cf',
                        ),
                    ybins=dict(
                size=0.1
),)
trace2 = go.Histogram(y=swash_overpredicted_diffs.tolist(),
                      opacity=0.75,
                      name='Overpredicted',
                      marker=dict(
                          color='#ef8a62',
),
                    ybins=dict(
                size=0.1
),)

layout = go.Layout(
    title='R_high - D_low<br>Swash Regime',
    barmode='overlay',
    yaxis=dict(
        title='z (m AHD)'
    ),
    xaxis=dict(
        title='Count'
    ),
    bargap=0.2,
    bargroupgap=0.1,
    legend=dict(x=.6, y=1)
)

g_plot_swash = go.FigureWidget(data=[trace2, trace1], layout=layout)

# To output to file
img_bytes = pio.write_image(g_plot_swash, 'g_plot_swash.png',format='png', width=600, height=400, scale=5)

g_plot_swash



FigureWidget({
    'data': [{'marker': {'color': '#ef8a62'},
              'name': 'Overpredicted',
          …

In [54]:
collision_underpredicted_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],
                                       df_observed=impacts['observed'],
                                       forecasted_regime='swash',
                                       observed_regime='collision')
collision_underpredicted_diffs = get_R_high_D_low_diff(site_ids=collision_underpredicted_site_ids,
                                             df_profile_features=df_profile_features,
                                             df_twls=twls['forecasted']['mean_slope_sto06'])

collision_correct_site_ids = get_site_ids(df_forecasted=impacts['forecasted']['mean_slope_sto06'],
                                df_observed=impacts['observed'],
                                forecasted_regime='collision',
                                observed_regime='collision')
collision_correct_diffs = get_R_high_D_low_diff(site_ids=collision_correct_site_ids,
                                      df_profile_features=df_profile_features,
                                      df_twls=twls['forecasted']['mean_slope_sto06'])


trace1 = go.Histogram(y=collision_correct_diffs.tolist(),
                     opacity=0.75,
                     name='Correctly predicted',
                      marker=dict(
                          color='#67a9cf',
                       ),
                    ybins=dict(
                size=0.1
),)
trace2 = go.Histogram(y=collision_underpredicted_diffs.tolist(),
                     opacity=0.75,
                     name='Underpredicted',
                      marker=dict(
                          color='#ef8a62',
                       ),
                    ybins=dict(
                size=0.1
),)

layout = go.Layout(
    title='R_high - D_low<br>Collision Regime',
    barmode='overlay',
    yaxis=dict(
        title='z (m AHD)'
    ),
    xaxis=dict(
        title='Count'
    ),
    bargap=0.2,
    bargroupgap=0.1,
    legend=dict(x=.6, y=1)
)

g_plot_collision = go.FigureWidget(data=[trace2, trace1], layout=layout)

# To output to file
img_bytes = pio.write_image(g_plot_collision, 'g_plot_collision.png',format='png', width=600, height=400, scale=5)

g_plot_collision

FigureWidget({
    'data': [{'marker': {'color': '#ef8a62'},
              'name': 'Underpredicted',
         …

### Does dune toe lower?


### What do over predicted and underpredicted profiles look like?

Define a function for getting the average beach profile for a number of given site_ids:

In [156]:
def get_avg_profile(site_ids, debug=False):
    rows = []
    for n,site_id in enumerate(site_ids):
        profile = df_profiles.query("site_id == '{}' and profile_type == 'prestorm'".format(site_id))
        profile_z = np.array(profile.z.tolist())
        profile_x = np.array(profile.index.get_level_values('x').tolist())
        
        # Let's center the profile based on the z=0 location
        idx_last_z_val = max(np.argwhere(~np.isnan(profile_z)==True))[0]
        x_last_val = profile_x[idx_last_z_val]
        profile_x = [x - x_last_val for x in profile_x]
        
        # Put values into a dictionary
        for x,z in zip(profile_x, profile_z):
            rows.append({'x':x, 'z': z})

        # Return early for debugging
        if debug and n>3:
            break
        
    # Create dataframe from rows
    df = pd.DataFrame(rows)
    avg_profile = df.groupby('x').agg({'z': [np.nanmean, np.nanstd]}).reset_index()

    return {
        'x': avg_profile.x.tolist(),
        'z': avg_profile.z.nanmean.tolist(),
        'std': avg_profile.z.nanstd.tolist(),
        'n': n+1  # number of profiles
    }

Now, let's look at whether there is a difference between the average beach profile of correctly forecasted site_ids and incorrectly forecasted site_ids. First, looking at sites where we observed swash regime.

In [161]:
overpredicted = get_avg_profile(swash_overpredicted_site_ids)
correct = get_avg_profile(swash_correct_site_ids)

# Add mean profile
trace_overpredicted_mean = go.Scatter(
    x=overpredicted['x'],
    y=overpredicted['z'],
    opacity=1,
    mode='lines',
    name='Mean overpredicted profile (n={})'.format(overpredicted['n']),
    line=dict(
        color=('rgb(205, 0, 0)'),
        width=2)
)

trace_overpredited_std_top = go.Scatter(
    x=overpredicted['x'],
    y=np.add(overpredicted['z'], overpredicted['std']),
    opacity=1,
    hoverinfo='none',
    showlegend=False,
    mode='lines',
    line=dict(
        color=('rgb(205, 0, 0)'),
        width=0.5,
        dash='dash')
)

trace_overpredited_std_btm = go.Scatter(
    x=overpredicted['x'],
    y=np.subtract(overpredicted['z'], overpredicted['std']),
    opacity=1,
    hoverinfo='none',
    mode='lines',
    showlegend=False,
    line=dict(
        color=('rgb(205, 0, 0)'),
        width=0.5,
        dash='dash')
)

trace_correct_mean = go.Scatter(
    x=avg_correct_x,
    y=avg_correct_z,
    opacity=1,
    mode='lines',
    name='Mean correct profile (n={})'.format(correct['n']),
    line=dict(
        color=('rgb(0, 205, 0)'),
        width=2)
)

trace_correct_std_top = go.Scatter(
    x=avg_correct_x,
    y=np.add(avg_correct_z, avg_correct_std),
    opacity=1,
    hoverinfo='none',
    showlegend=False,
    mode='lines',
    line=dict(
        color=('rgb(0, 205, 0)'),
        width=0.5,
        dash='dash')
)

trace_correct_std_btm = go.Scatter(
    x=avg_correct_x,
    y=np.subtract(avg_correct_z, avg_correct_std),
    opacity=1,
    hoverinfo='none',
    mode='lines',
    showlegend=False,
    line=dict(
        color=('rgb(0, 205, 0)'),
        width=0.5,
        dash='dash')
)

layout = dict(showlegend=True,
              title='Observed Swash Impact Regime',
              legend=dict(x=.6, y=1),
              xaxis=dict(
                  range=[-150, 0]),
              yaxis=dict(
                  range=[0, 10]))

fig = go.FigureWidget(data=[trace_overpredicted_mean,
                            trace_overpredited_std_top,
                            trace_overpredited_std_btm,
                            trace_correct_mean,
                            trace_correct_std_top,
                            trace_correct_std_btm],
                      layout=layout)

# To output to file
img_bytes = pio.write_image(
    fig, 'mean_profiles_swash.png', format='png', width=600, height=600, scale=5)

fig

FigureWidget({
    'data': [{'line': {'color': 'rgb(205, 0, 0)', 'width': 2},
              'mode': 'lines',
 …

We can see that the difference is pretty minimal. For cases where we predicted collision, but observed swash (overprediction), we see that overpredicted profiles are slightly more concave than correctly predicted sites.

In [162]:
underpredicted = get_avg_profile(collision_underpredicted_site_ids)
correct = get_avg_profile(collision_correct_site_ids)

# Add mean profile
trace_underpredicted_mean = go.Scatter(
    x = underpredicted['x'],
    y= underpredicted['z'],
    opacity = 1,
    mode='lines',
    name='Mean underpredicted profile (n={})'.format(underpredicted['n']),
    line = dict(
        color = ('rgb(205, 0, 0)'),
        width = 2)
)

trace_underpredicted_std_top = go.Scatter(
    x = underpredicted['x'],
    y= np.add(underpredicted['z'],underpredicted['std']),
    opacity = 1,
    hoverinfo='none',
    showlegend=False,
    mode='lines',
    line = dict(
        color = ('rgb(205, 0, 0)'),
        width = 0.5,
        dash = 'dash')
) 

trace_underpredicted_std_btm = go.Scatter(
    x = underpredicted['x'],
    y= np.subtract(underpredicted['z'],underpredicted['std']),
    opacity = 1,
    hoverinfo='none',
    mode='lines',
    showlegend=False,
    line = dict(
        color = ('rgb(205, 0, 0)'),
        width = 0.5,
        dash = 'dash')
) 

trace_correct_mean = go.Scatter(
    x = avg_correct_x,
    y= avg_correct_z,
    opacity = 1,
    mode='lines',
    name='Mean correct profile (n={})'.format(correct['n']),
    line = dict(
        color = ('rgb(0, 205, 0)'),
        width = 2)
)

trace_correct_std_top = go.Scatter(
    x = avg_correct_x,
    y= np.add(avg_correct_z, avg_correct_std),
    opacity = 1,
    hoverinfo='none',
    showlegend=False,
    mode='lines',
    line = dict(
        color = ('rgb(0, 205, 0)'),
        width = 0.5,
        dash = 'dash')
) 

trace_correct_std_btm = go.Scatter(
    x = avg_correct_x,
    y= np.subtract(avg_correct_z, avg_correct_std),
    opacity = 1,
    hoverinfo='none',
    mode='lines',
    showlegend=False,
    line = dict(
        color = ('rgb(0, 205, 0)'),
        width = 0.5,
        dash = 'dash')
) 
    
layout = dict(showlegend=True,
             title='Observed Collision Impact Regime',
             legend=dict(x=.6, y=1),
             xaxis=dict(
             range=[-150,0]),
             yaxis=dict(
             range=[0,10]))
    
fig=go.FigureWidget(data=[trace_underpredicted_mean, 
                          trace_underpredicted_std_top,
                          trace_underpredicted_std_btm, 
                          trace_correct_mean, 
                          trace_correct_std_top, 
                          trace_correct_std_btm], 
                    layout=layout)

# To output to file
img_bytes = pio.write_image(fig, 'mean_profiles_collision.png',format='png', width=600, height=600, scale=5)

fig



FigureWidget({
    'data': [{'line': {'color': 'rgb(205, 0, 0)', 'width': 2},
              'mode': 'lines',
 …

This plot is a bit more interesting. It shows that we are correctly forecasting collision when the profile is more accreted/convex, but when the profile is more eroded/concave, the water level is underpredicted. Why is this? 