# Investigate 

## Setup notebook
Import our required packages and set default plotting options.

In [None]:
# Enable autoreloading of our modules. 
# Most of the code will be located in the /src/ folder, 
# and then called from the notebook.
%matplotlib inline
%reload_ext autoreload
%autoreload

In [None]:
from IPython.core.debugger import set_trace

import pandas as pd
import numpy as np
import os
import decimal
import plotly
import plotly.graph_objs as go
import plotly.plotly as py
import plotly.tools as tls
import plotly.figure_factory as ff
from plotly import tools
import plotly.io as pio
from scipy import stats
import math
import matplotlib
from matplotlib import cm
import colorlover as cl
from tqdm import tqdm_notebook
from ipywidgets import widgets, Output
from IPython.display import display, clear_output, Image, HTML
from scipy import stats
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from matplotlib.lines import Line2D
from cycler import cycler
from scipy.interpolate import interp1d
from pandas.api.types import CategoricalDtype
import seaborn as sns
sns.set(style="white")

In [None]:
# Matplot lib default settings
plt.rcParams["figure.figsize"] = (10,6)
plt.rcParams['axes.grid']=True
plt.rcParams['grid.alpha'] = 0.5
plt.rcParams['grid.color'] = "grey"
plt.rcParams['grid.linestyle'] = "--"
plt.rcParams['axes.grid']=True

# https://stackoverflow.com/a/20709149
# matplotlib.rcParams['text.usetex'] = True

matplotlib.rcParams['text.latex.preamble'] = [
       r'\usepackage{siunitx}',   # i need upright \micro symbols, but you need...
       r'\sisetup{detect-all}',   # ...this to force siunitx to actually use your fonts
       r'\usepackage{helvet}',    # set the normal font here
       r'\usepackage{amsmath}',
       r'\usepackage{sansmath}',  # load up the sansmath so that math -> helvet
       r'\sansmath',              # <- tricky! -- gotta actually tell tex to use!
]  

## Import data
Import our data from the `./data/interim/` folder and load it into pandas dataframes. 

In [None]:
def df_from_csv(csv, index_col, data_folder='../data/interim'):
    print('Importing {}'.format(csv))
    return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)

df_waves = df_from_csv('waves.csv', index_col=[0, 1])
df_tides = df_from_csv('tides.csv', index_col=[0, 1])
df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])
df_sites = df_from_csv('sites.csv', index_col=[0])
df_sites_waves = df_from_csv('sites_waves.csv', index_col=[0])
df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0,1])

# Note that the forecasted data sets should be in the same order for impacts and twls
impacts = {
    'forecasted': {
    'postintertidal_slope_sto06': df_from_csv('impacts_forecasted_postintertidal_slope_sto06.csv', index_col=[0]),
    'postmean_slope_sto06': df_from_csv('impacts_forecasted_postmean_slope_sto06.csv', index_col=[0]),
    'preintertidal_slope_sto06': df_from_csv('impacts_forecasted_preintertidal_slope_sto06.csv', index_col=[0]),
    'premean_slope_sto06': df_from_csv('impacts_forecasted_premean_slope_sto06.csv', index_col=[0]),
        },
    'observed': df_from_csv('impacts_observed.csv', index_col=[0])
    }

twls = {
    'forecasted': {
    'postintertidal_slope_sto06': df_from_csv('twl_postintertidal_slope_sto06.csv', index_col=[0,1]),
    'postmean_slope_sto06': df_from_csv('twl_postmean_slope_sto06.csv', index_col=[0,1]),
    'preintertidal_slope_sto06': df_from_csv('twl_preintertidal_slope_sto06.csv', index_col=[0,1]),
    'premean_slope_sto06': df_from_csv('twl_premean_slope_sto06.csv', index_col=[0,1]),
    }
}
print('Done!')

# Gather data into one dataframe
For plotting, gather all our data into one dataframe.

In [None]:
# Which forecasted impacts dataframe should we use to assess prediction performance?
df_selected_forecast = impacts['forecasted']['postintertidal_slope_sto06']

# Create df with all our data
df = impacts['observed'].merge(
    df_sites_waves, left_index=True, right_index=True)

# Join observed/forecasted regimes
df_forecasted = df_selected_forecast.rename(
    {'storm_regime': 'forecasted_regime'
    }, axis='columns').forecasted_regime
df = pd.concat([df, df_forecasted], axis=1)

# Create new accuracy column which categorises each prediction
df.loc[(df.storm_regime == 'swash') & (df.forecasted_regime == 'swash'), 'accuracy'] = 'correct swash'
df.loc[(df.storm_regime == 'collision') & (df.forecasted_regime == 'collision'), 'accuracy'] = 'correct collision'
df.loc[(df.storm_regime == 'swash') & (df.forecasted_regime == 'collision'), 'accuracy'] = 'overpredicted swash'
df.loc[(df.storm_regime == 'collision') & (df.forecasted_regime == 'swash'), 'accuracy'] = 'underpredicted collision'

print('df columns:\n===')
for col in sorted(df.columns):
    print(col)

# Create plots

## Variable pairplot, by observed storm impact
Create pairplot of selected variables and look for relationships between each. Colors represent the different observed storm impact regimes.

In [None]:
g = sns.pairplot(
    data=df,
    hue='storm_regime',
    dropna=True,
    palette={
        'swash': 'blue',
        'collision': 'orange',
        'overwash': 'red'
    },
    plot_kws=dict(s=20, edgecolor="white", linewidth=0.1, alpha=0.1),
    vars=['beta_prestorm_mean',
          'beta_poststorm_mean',
          'beta_diff_mean',
          'swash_pct_change',
          'width_msl_change_m',
          'width_msl_change_pct',
          'Exscum'])
g.savefig('11_pairplot_observed_impacts.png')

## Variable pairplot, by observed/prediction class
Create pairplot of selected variables and look for relationships between each. Colors represent the different observed/prediction classes.

In [None]:
g = sns.pairplot(
    data=df,
    hue='accuracy',
    dropna=True,
    palette={
        'correct swash': 'blue',
        'correct collision': 'green',
        'overpredicted swash': 'orange',
        'underpredicted collision': 'red',
    },
    plot_kws=dict(s=20, edgecolor="white", linewidth=0.1, alpha=0.1),
    vars=['beta_prestorm_mean',
          'beta_poststorm_mean',
          'beta_diff_mean',
          'swash_pct_change',
          'width_msl_change_m',
          'width_msl_change_pct',
          'Exscum'])
g.savefig('11_pairplot_accuracy_classes.png')


## Pre/post storm slope by observed/predicted class

In [None]:
# First create a melted dataframe since our coulmn's aren't exactly as they should be for plotting
df_temp = df.copy()
df_temp = df_temp.reset_index()

df_melt = pd.melt(
    df_temp,
    id_vars=['site_id', 'accuracy'],
    value_vars=['beta_prestorm_mean', 'beta_poststorm_mean'],
    var_name='profile_type',
    value_name='beta_mean')

df_melt.loc[df_melt.profile_type == 'beta_prestorm_mean','profile_type'] = 'prestorm'
df_melt.loc[df_melt.profile_type == 'beta_poststorm_mean','profile_type'] = 'poststorm'
df_melt.head()

In [None]:
f, ax = plt.subplots(figsize=(6,5))

cats = ['correct swash', 'overpredicted swash','underpredicted collision','correct collision']

# Plot the orbital period with horizontal boxes
sns.boxplot(
    data=df_melt,
    x="accuracy",
    y="beta_mean",
    hue="profile_type",
    order=cats
)

group_labels = [x.replace(' ','\n') for x in cats]
ax.set_xticklabels(group_labels)

# Setup ticks and grid
ax.xaxis.grid(True)
major_ticks = np.arange(-1, 1, 0.05)
minor_ticks = np.arange(-1, 1, 0.01)
ax.set_yticks(major_ticks)
ax.set_yticks(minor_ticks, minor=True)
ax.grid(which='both')
ax.grid(which='minor', alpha=0.3,linestyle='--')
ax.grid(which='major', alpha=0.8,linestyle='-')

ax.set_ylim([-0.02,0.3])

f.savefig('11_prepost_slopes_accuracy_classes.png',dpi=600)

## Change in slope by observed/predicted class

In [None]:
f, ax = plt.subplots(figsize=(6,5))

cats = ['correct swash', 'overpredicted swash','underpredicted collision','correct collision']

# Plot the orbital period with horizontal boxes
sns.boxplot(
    data=df,
    x="accuracy",
    y="beta_diff_mean",
    order=cats
)

group_labels = [x.replace(' ','\n') for x in cats]
ax.set_xticklabels(group_labels)

# Setup ticks and grid
ax.xaxis.grid(True)
major_ticks = np.arange(-1, 1, 0.05)
minor_ticks = np.arange(-1, 1, 0.01)
ax.set_yticks(major_ticks)
ax.set_yticks(minor_ticks, minor=True)
ax.grid(which='both')
ax.grid(which='minor', alpha=0.3,linestyle='--')
ax.grid(which='major', alpha=0.8,linestyle='-')

ax.set_ylim([-0.2,0.2])

f.savefig('11_change_in_slopes_accuracy_classes.png',dpi=600)