# Run comparison
Create a comparison between different runs by looking at the different R_high values and storm regimes.

## Setup notebook

In [None]:
# Enable autoreloading of our modules. 
# Most of the code will be located in the /src/ folder, 
# and then called from the notebook.
%matplotlib inline
%reload_ext autoreload
%autoreload

In [None]:
from IPython.core.debugger import set_trace

import pandas as pd
import numpy as np
import os
import decimal
import plotly
import plotly.graph_objs as go
import plotly.plotly as py
import plotly.tools as tls
import plotly.figure_factory as ff
from plotly import tools
import plotly.io as pio
from scipy import stats
import math
import matplotlib
from matplotlib import cm
import colorlover as cl
from tqdm import tqdm_notebook
from ipywidgets import widgets, Output
from IPython.display import display, clear_output, Image, HTML
from scipy import stats
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
from pandas.api.types import CategoricalDtype
from scipy.interpolate import UnivariateSpline
from shapely.geometry import Point, LineString

In [None]:
# Matplot lib default settings
plt.rcParams["figure.figsize"] = (10,6)
plt.rcParams['axes.grid']=True
plt.rcParams['grid.alpha'] = 0.5
plt.rcParams['grid.color'] = "grey"
plt.rcParams['grid.linestyle'] = "--"
plt.rcParams['axes.grid']=True

# https://stackoverflow.com/a/20709149
matplotlib.rcParams['text.usetex'] = True

matplotlib.rcParams['text.latex.preamble'] = [
 r'\usepackage{siunitx}', # i need upright \micro symbols, but you need...
 r'\sisetup{detect-all}', # ...this to force siunitx to actually use your fonts
 r'\usepackage{helvet}', # set the normal font here
 r'\usepackage{amsmath}',
 r'\usepackage{sansmath}', # load up the sansmath so that math -> helvet
 r'\sansmath', # <- tricky! -- gotta actually tell tex to use!
] 

## Import data

In [None]:
def df_from_csv(csv, index_col, data_folder='../data/interim'):
 print('Importing {}'.format(csv))
 return pd.read_csv(os.path.join(data_folder,csv), index_col=index_col)

df_waves = df_from_csv('waves.csv', index_col=[0, 1])
df_tides = df_from_csv('tides.csv', index_col=[0, 1])
df_profiles = df_from_csv('profiles.csv', index_col=[0, 1, 2])
df_sites = df_from_csv('sites.csv', index_col=[0])
df_profile_features_crest_toes = df_from_csv('profile_features_crest_toes.csv', index_col=[0])

# Note that the forecasted data sets should be in the same order for impacts and twls
impacts = {
 'forecasted': {
 'foreshore_slope_sto06': df_from_csv('impacts_forecasted_foreshore_slope_sto06.csv', index_col=[0]),
 'mean_slope_sto06': df_from_csv('impacts_forecasted_mean_slope_sto06.csv', index_col=[0]),
 'mean_slope_nie91': df_from_csv('impacts_forecasted_mean_slope_nie91.csv', index_col=[0]),
 'mean_slope_hol86': df_from_csv('impacts_forecasted_mean_slope_hol86.csv', index_col=[0]),
 },
 'observed': df_from_csv('impacts_observed.csv', index_col=[0])
 }


twls = {
 'forecasted': {
 'foreshore_slope_sto06': df_from_csv('twl_foreshore_slope_sto06.csv', index_col=[0, 1]),
 'mean_slope_sto06':df_from_csv('twl_mean_slope_sto06.csv', index_col=[0, 1]),
 'mean_slope_nie91':df_from_csv('twl_mean_slope_nie91.csv', index_col=[0, 1]),
 'mean_slope_hol86':df_from_csv('twl_mean_slope_hol86.csv', index_col=[0, 1]),
 }
}
print('Done!')

## Get prediction accuracy
Use [scikit-learn](https://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics) model evaluation metrics

In [None]:
import pprint
pp = pprint.PrettyPrinter(indent=2)

In [None]:
import sklearn.metrics

# Encode the storm regimes values as categorical intgers so we can compare them
cat_type = CategoricalDtype(
 categories=["swash", "collision", "overwash", "inundation"], ordered=True)
correct_regime = impacts['observed'].storm_regime.astype(
 cat_type).cat.codes.values

# Define our forecast model names
models = [model for model in impacts['forecasted']]

# Define the metric we want to calculate for each forecast model
metrics = [
 'accuracy_score', 'balanced_accuracy_score', 'confusion_matrix',
 'classification_report', 'f1_score', 'fbeta_score', 'precision_score', 'recall_score'
]

# Store results in a nested dictionary by metric
performance = {metric: {} for metric in metrics}

for model, metric in itertools.product(models, metrics):

 # Get predicted storm regims
 df_pred = impacts['forecasted'][model]
 predicted_regime = df_pred.storm_regime.astype(cat_type).cat.codes.values

 if metric == 'accuracy_score':
 m = sklearn.metrics.accuracy_score(correct_regime, predicted_regime)

 if metric == 'balanced_accuracy_score':
 m = sklearn.metrics.balanced_accuracy_score(correct_regime,
 predicted_regime)

 if metric == 'confusion_matrix':
 m = sklearn.metrics.confusion_matrix(
 correct_regime, predicted_regime, labels=[0, 1, 2, 3])
 
 if metric == 'f1_score':
 m = sklearn.metrics.f1_score(correct_regime, predicted_regime, average='weighted')
 
 if metric == 'fbeta_score':
 m = sklearn.metrics.fbeta_score(correct_regime, predicted_regime, average='weighted', beta=1)
 
 if metric == 'precision_score':
 m = sklearn.metrics.precision_score(correct_regime, predicted_regime, average='weighted')
 
 if metric == 'recall_score':
 m = sklearn.metrics.recall_score(correct_regime, predicted_regime, average='weighted')
# m=1
 
 if metric == 'classification_report':
# m = sklearn.metrics.classification_report(
# correct_regime,
# predicted_regime,
# labels=[0, 1, 2, 3],
# target_names=['swash', 'collision', 'overwash', 'inundation'])
# print(m)
 continue

 # Store metric in results dictionary
 performance[metric][model] = m

pp.pprint(performance)

In [None]:
predicted_regime

## Scatter plot matirx
 - Use [Altair](https://altair-viz.github.io/getting_started/installation.html) for interactivity?
 - Or maybe [Holoviews](https://towardsdatascience.com/pyviz-simplifying-the-data-visualisation-process-in-python-1b6d2cb728f1)?