From ff8106ce8a8f0153d9a205e94db8e6f51acc441c Mon Sep 17 00:00:00 2001
From: Dan Howe <d.howe@wrl.unsw.edu.au>
Date: Mon, 28 Mar 2022 12:26:36 +0200
Subject: [PATCH] Add 'probabilistic_assessment.py'

---
 .../probabilistic_assessment.py               | 620 ++++++++++++++++++
 1 file changed, 620 insertions(+)
 create mode 100644 probabilistic-analysis/probabilistic_assessment.py

diff --git a/probabilistic-analysis/probabilistic_assessment.py b/probabilistic-analysis/probabilistic_assessment.py
new file mode 100644
index 0000000..80be570
--- /dev/null
+++ b/probabilistic-analysis/probabilistic_assessment.py
@@ -0,0 +1,620 @@
+# Performs probabilstic assessment on *.yaml files,
+# and saves output shoreline chainages to csv
+
+import os
+import yaml
+import argparse
+import numpy as np
+import pandas as pd
+import matplotlib
+import matplotlib.patheffects
+import matplotlib.pyplot as plt
+import matplotlib.gridspec as gridspec
+from scipy.optimize import curve_fit
+from scipy import interpolate
+from tqdm import tqdm
+
+# Suppress pandas SettingWithCopy warnings
+# pd.options.mode.chained_assignment = None
+
+import __main__
+if not hasattr(__main__, '__file__'):
+    get_ipython().magic('matplotlib inline')
+    get_ipython().magic('config InlineBackend.figure_format = "svg"')
+
+
+def choose(options):
+    """
+    Get the user to select one (or all) items from a list of options.
+
+    Args:
+        x (list): list of options
+    Returns:
+        the selected option(s) in a list
+    """
+    options.append('All')
+    for i, option in enumerate(options):
+        print('{:3d}) {:s}'.format(i + 1, option))
+    while True:
+        try:
+            val = input("Choose> ")
+            if int(val) == len(options):
+                selected = options[:-1]
+            else:
+                selected = [options[int(val) - 1]]
+            return selected
+            break
+        except (ValueError, IndexError):
+            print("Invalid input, please try again")
+
+
+def parse_args():
+    """
+    Check if a parameter file was provided at the command prompt
+
+    Returns:
+        the parsed input arguments in a dict
+    """
+    parser = argparse.ArgumentParser(usage=__doc__)
+    parser.add_argument(
+        '-f', '--file', help='name of parameter file', default=None)
+    parser.add_argument(
+        '-a',
+        '--all',
+        help='process all *.yaml files in folder',
+        action='store_true')
+
+    return parser.parse_args()
+
+
+def read_parameter_file(fname):
+    """
+    Read yaml file
+
+    Args:
+        fname (str): name of yaml file
+    Returns:
+        the file contents in a dict
+    """
+    with open(fname, 'r') as stream:
+        try:
+            params = yaml.load(stream)
+        except yaml.YAMLError as exc:
+            print(exc, "\nCheck the formatting of the yaml file.")
+
+    return params
+
+
+def ari_to_aep(x):
+    """
+    Convert annual recurrance interval to annual exceedance probability.
+
+    Args:
+        x (array_like): input array
+    Returns:
+        the calculated AEP
+    """
+    return 1 - np.exp(-1 / x)
+
+
+def aep_to_ari(x):
+    """
+    Convert annual exceedance probability to annual recurrance interval.
+
+    Args:
+        x (array_like): input array
+    Returns:
+        the calculated ARI
+    """
+    return -1 / np.log(1 - x)
+
+
+def get_value_from_ep(x, p):
+    """
+    Calculate percentile value from array based on exceedance probability.
+
+    Args:
+        x (array_like): input array
+        p (float): probability (e.g. p=0.05 for 5% exceedance)
+    Returns:
+        the calculated percentile value
+    """
+    return np.percentile(x, (1 - p) * 100)
+
+
+def get_cumulative_distribution(x, min_ep=1e-5, num=1000):
+    """
+    Calculate cumulative distrubution function of a random variable.
+
+    Args:
+        x (array_like): input array
+        min_ep (float): minimum exceedance probability
+        num (int): number of points to calculate distribution over
+    Returns:
+        the cumulative distribution for plotting, e.g.
+        plt.plot(exceedance_probability, value)
+    """
+    exceedance_probability = np.logspace(np.log10(min_ep), 0, num)
+    value = get_value_from_ep(x, exceedance_probability)
+
+    return exceedance_probability, value
+
+
+def constrained_log_fit(x, p1, p2):
+    """
+    Fit log curve based on AEP values. Gordon (1987) provides two cases:
+        1. Low demand, open beaches:
+            p1 = 5
+            p2 = 30
+        2. High demand, rip heads
+            p1 = 40
+            p2 = 40
+
+    Args:
+        x (array_like): input array of AEP values
+        p1 (float): parameter 1
+        p2 (float): parameter 2
+    Returns:
+        the fitted values for the given AEPs
+    """
+    ari = aep_to_ari(x)
+
+    return p1 + p2 * np.log(ari)
+
+
+def get_ongoing_recession(n_runs, start_year, end_year, sea_level_rise,
+                          bruun_factor, underlying_recession):
+    """
+    Use Monte Carlo simulation to calculate ongoing shoreline recession.
+
+    Args:
+        n_runs     (int): number of runs
+        start_year (int): first year of model
+        end_year   (int): last year of model
+        sea_level_rise (dict):
+            'year' (array_like): years
+            'min'  (array_like): minimum value
+            'mode' (array_like): most likely value
+            'max'  (array_like): maximum value
+        bruun_factor (dict):
+            'min'  (float): minimum value
+            'mode' (float): most likely value
+            'max'  (float): maximum value
+        underlying_recession (dict):
+            'min'  (float): minimum value
+            'mode' (float): most likely value
+            'max'  (float): maximum value
+
+    Returns:
+        the simulated recession distance (m)
+    """
+
+    # Get time interval from input file
+    years = np.arange(start_year, end_year + 1)
+    n_years = len(years)
+
+    # Interpolate sea level rise projections (m)
+    slr_mode = np.interp(
+        years, xp=sea_level_rise['year'],
+        fp=sea_level_rise['mode'])[:, np.newaxis]
+
+    try:
+        slr_min = np.interp(
+            years, xp=sea_level_rise['year'],
+            fp=sea_level_rise['min'])[:, np.newaxis]
+    except ValueError:
+        # Use mode for deterministic beaches
+        slr_min = slr_mode
+
+    try:
+        slr_max = np.interp(
+            years, xp=sea_level_rise['year'],
+            fp=sea_level_rise['max'])[:, np.newaxis]
+    except ValueError:
+        # Use mode for deterministic beaches
+        slr_max = slr_mode
+
+    # Initialise sea level rise array
+    slr = np.zeros([n_years, n_runs])
+
+    for i in range(n_years):
+        # Use triangular distribution for SLR in each year (m)
+        try:
+            slr[i, :] = np.random.triangular(
+                left=slr_min[i],
+                mode=slr_mode[i],
+                right=slr_max[i],
+                size=n_runs)
+        except ValueError:
+            # Use constant value if slr_min == slr_max
+            slr[i, :] = np.ones([1, n_runs]) * slr_mode[i]
+
+    # Sort each row, so SLR follows a smooth trajectory for each model run
+    slr.sort(axis=1)  # FIXME: re-assign variable?
+
+    # Shuffle columns, so the order of model runs is randomised
+    np.random.shuffle(slr.T)  # FIXME: re-assign variable?
+
+    # Shift sea level so it is zero in the start year
+    slr -= slr[0, :].mean(axis=0)
+
+    # Simulate probabilistic Bruun factors (-)
+    if (bruun_factor['min'] < bruun_factor['mode'] < bruun_factor['max']):
+        # Use probabilistic method if min and max are provided
+        bruun_factor = np.random.triangular(
+            left=bruun_factor['min'],
+            mode=bruun_factor['mode'],
+            right=bruun_factor['max'],
+            size=n_runs)
+    else:
+        # Ensure values were not given in reverse order
+        if bruun_factor['min'] > bruun_factor['mode']:
+            raise ValueError(
+                "bruun_factor['min'] must be less than bruun_factor['mode']")
+        if bruun_factor['mode'] > bruun_factor['max']:
+            raise ValueError(
+                "bruun_factor['mode'] must be less than bruun_factor['max']")
+
+        # Use deterministic method if only mode is provided
+        bruun_factor = np.ones([1, n_runs]) * bruun_factor['mode']
+
+    # Simulate probabilistic underlying recession rate (m/y)
+    if (underlying_recession['min'] < underlying_recession['mode'] <
+            underlying_recession['max']):
+        # Use probabilistic method if min and max are provided
+        underlying_recession_rate = np.random.triangular(
+            left=underlying_recession['min'],
+            mode=underlying_recession['mode'],
+            right=underlying_recession['max'],
+            size=n_runs)
+    else:
+        # Ensure values were not given in reverse order
+        if underlying_recession['min'] > underlying_recession['mode']:
+            raise ValueError(("underlying_recession['min'] must be "
+                              "less than underlying_recession['mode']"))
+        if underlying_recession['mode'] > underlying_recession['max']:
+            raise ValueError(("underlying_recession['mode'] must be "
+                              "less than underlying_recession['max']"))
+
+        # Use deterministic method if only mode is provided
+        underlying_recession_rate = np.ones([1, n_runs
+                                             ]) * underlying_recession['mode']
+
+    # Repeat Bruun factors for each year
+    bruun_factor = np.tile(bruun_factor, [n_years, 1])
+
+    # Calculate total underlying recession
+    year_factor = np.arange(1, n_years + 1)[:, np.newaxis]
+    underlying_recession = underlying_recession_rate * year_factor
+
+    # Remove probabilistic component from start year
+    slr[0, :] = slr[0, :].mean()
+    underlying_recession[0, :] = underlying_recession[0, :].mean()
+    bruun_factor[0, :] = bruun_factor[0, :].mean()
+
+    # Calculate total ongoing recession (m)
+    ongoing_recession = slr * bruun_factor + underlying_recession
+
+    return ongoing_recession, slr, bruun_factor, underlying_recession
+
+
+def get_storm_demand_volume(ref_aep, ref_vol, n, mode='fit'):
+    """
+    Calculate storm demand volume, either by fitting a log curve to the
+    supplied values (mode='fit'), or by simulating storm events with a
+    poisson distribution (mode='simulate').
+
+    Note: 'simulate' mode tends to overestimate storm demand for high ARIs
+          to account for storm clustering.
+
+    Args:
+        ref_aep (array_like): input AEP values
+        ref_vol (array_like): input volumes (m3)
+        n (int): number of model runs
+        mode (str): 'fit' (default) or 'simulate'
+    Returns:
+        the storm demand volume (m3/m)
+    """
+    if mode == 'fit':
+        # Fit curve based on reference storm demand volumes
+        p, _ = curve_fit(constrained_log_fit, ref_aep, ref_vol, p0=(5., 30.))
+        rand_aep = np.random.rand(n)
+        volume = constrained_log_fit(rand_aep, *p)
+        volume[volume < 0] = 0
+
+    elif mode == 'simulate':
+        # Simulate number of storms of each ARI magnitude
+        lam = np.tile(ref_aep[:, np.newaxis], [1, n])
+        n_storms = np.random.poisson(lam)
+
+        # Calculate storm demand volume (m3/m)
+        vol_storms = ref_vol[:, np.newaxis] * n_storms
+        volume = vol_storms.sum(axis=0)
+
+    else:
+        raise ValueError("mode must be 'fit' or 'simulate'")
+
+    return volume
+
+
+def process(beach_name, beach_scenario, n_runs, start_year, end_year,
+            output_years, output_ep, zsa_profile_file, zfc_profile_file,
+            output_folder, figure_folder, sea_level_rise, bruun_factor,
+            underlying_recession, storm_demand, plot_stats, omit_from_shp,
+            min_chainage, segment_gaps, insert_points, append_points):
+    """
+    Calculate probabilistic shorelines using Monte Carlo simulation
+
+    Args:
+        beach_name         (str): name of beach in photogrammetry database
+        beach_scenario     (str): name for saved csv profile chainage file
+        n_runs             (int): number of runs
+        start_year         (int): first year of model
+        end_year           (int): last year of model
+        output_years      (list): years to save profiles
+        output_ep        (list): EP values for saved profiles
+        zsa_profile_file (str): path to storm demand vs chainge data (ZSA)
+        zfc_profile_file (str): path to storm demand vs chainge data (ZFC)
+        output_folder      (str): where to save profiles
+        sea_level_rise (dict):
+            'year' (list): years
+            'min'  (list): minimum value
+            'mode' (list): most likely value
+            'max'  (list): maximum value
+        bruun_factor (dict):
+            'min'  (float): minimum value
+            'mode' (float): most likely value
+            'max'  (float): maximum value
+        underlying_recession (dict):
+            'min'  (float): minimum value
+            'mode' (float): most likely value
+            'max'  (float): maximum value
+        plot_stats (dict):
+            'block'   (list): block id for plotting stats
+            'profile' (list): profile id for plotting stats
+        omit_from_shp (dict):
+            'block'   (list): block id for plotting stats
+            'profile' (list): profile id for plotting stats
+        min_chainage (dict):
+            'block'    (list): block id for trimming chainage
+            'profile'  (list): profile id for trimming chainage
+            'chainage' (list): minimum chainage, at non-erodable barrier
+        segment_gaps (dict):
+            'block'    (list): block id for breaking segment
+            'profile'  (list): profile id for breaking segments
+        insert_points (dict):
+            'before' (dict): points to add to (south/west) end of hazard lines
+                'x'  (list): eastings
+                'y'  (list): northings
+        append_points (dict): points to add to (north/east) end of lines
+                'x'  (list): eastings
+                'y'  (list): nortings
+    """
+
+    # Get reference storm demand (m3/m)
+    ref_ari = np.asarray(storm_demand['ari'])
+    ref_aep = ari_to_aep(ref_ari)
+    ref_vol = storm_demand['vol']
+
+    # Get year index
+    years = np.arange(start_year, end_year + 1)
+
+    # Check if beach is probabilistic
+    if n_runs == 1:
+        probabilistic = False
+    else:
+        probabilistic = True
+
+    # Simulate ongoing shoreline recession
+    r, slr, bf, ur = get_ongoing_recession(n_runs, start_year, end_year,
+                                           sea_level_rise, bruun_factor,
+                                           underlying_recession)
+    ongoing_recession = r.copy()
+
+    # Pre-allocate storm demand volume for each year (m3/m)
+    storm_demand_volume = np.zeros([len(years), n_runs])
+
+    if probabilistic:
+        for i in range(len(years)):
+            # Generate synthetic storm demands for each year
+            storm_demand_volume[i, :] = get_storm_demand_volume(
+                ref_aep, ref_vol, n=n_runs, mode='fit')
+    else:
+        # Get storm demand for 1% AEP event
+        sort_idx = np.argsort(ref_aep)
+        storm_demand_volume[:] = np.interp(output_ep,
+                                           np.array(ref_aep)[sort_idx],
+                                           np.array(ref_vol)[sort_idx])
+
+    # Load profile data for current beach
+    pbar_profiles = tqdm(['ZSA', 'ZFC'])
+    for profile_type in pbar_profiles:
+        pbar_profiles.set_description('{}'.format(profile_type))
+
+        if profile_type == 'ZSA':
+            df_in = pd.read_csv(zsa_profile_file)
+        if profile_type == 'ZFC':
+            df_in = pd.read_csv(zfc_profile_file)
+
+        col_names = [c for c in df_in.columns if c.isdigit()]
+
+        # Loop through profiles
+        pbar_profile = tqdm(
+            df_in[df_in['beach'] == beach_name].iterrows(),
+            total=df_in[df_in['beach'] == beach_name].shape[0])
+        for i, prof in pbar_profile:
+
+            pbar_profile.set_description(('Block: {}, profile: {}'.format(
+                prof['block'], prof['profile'])))
+
+            # Convert storm demand volume to a profile chainage (m)
+            profile_volume = np.array([int(c) for c in col_names])
+            profile_chainage = np.array(prof[col_names], dtype=float)
+            valid_idx = np.isfinite(profile_chainage)
+            storm_demand_chainage = np.interp(
+                storm_demand_volume,
+                xp=profile_volume[valid_idx],
+                fp=profile_chainage[valid_idx])
+
+            # Loop through years
+            pbar_year = tqdm(output_years)
+            for year in pbar_year:
+
+                pbar_year.set_description('Year: {}'.format(year))
+
+                # Create output dataframe
+                df_out = df_in[['beach', 'block', 'profile']]
+
+                # Add info on non-erodable sections
+                df_out = df_out.assign(min_chainage=np.nan)
+                for b, p, ch in zip(min_chainage['block'],
+                                    min_chainage['profile'],
+                                    min_chainage['chainage']):
+                    idx = (df_out['block'] == str(b)) & (
+                        df_out['profile'] == p)
+                    df_out.loc[idx, 'min_chainage'] = ch
+
+                # Specify which segments to break
+                df_out = df_out.assign(segment_gaps=False)
+                for b, p, in zip(segment_gaps['block'],
+                                 segment_gaps['profile']):
+                    idx = (df_out['block'] == str(b)) & (
+                        df_out['profile'] == p)
+                    df_out.loc[idx, 'segment_gaps'] = True
+
+                # Specify which profiles to plot
+                df_out = df_out.assign(plot_stats=False)
+                for b, p in zip(plot_stats['block'], plot_stats['profile']):
+                    idx = (df_out['block'] == str(b)) & (
+                        df_out['profile'] == p)
+                    df_out.loc[idx, 'plot_stats'] = True
+
+                # Specify which profiles to omit from shapefiles
+                df_out = df_out.assign(omit_from_shp=False)
+                for b, p in zip(omit_from_shp['block'],
+                                omit_from_shp['profile']):
+                    idx = (df_out['block'] == str(b)) & (
+                        df_out['profile'] == p)
+                    df_out.loc[idx, 'omit_from_shp'] = True
+
+                # Specify additional points to be included in shapefiles
+                df_out = df_out.assign(insert_points='').astype('object')
+                for b, p, x, y in zip(insert_points['block'],
+                                      insert_points['profile'],
+                                      insert_points['x'], insert_points['y']):
+                    idx = np.where((df_out['block'] == str(b)) & (
+                        df_out['profile'] == p) & (
+                            df_out['beach'] == beach_name))[0][0]
+                    if not df_out.loc[idx, 'insert_points']:
+                        df_out.loc[idx, 'insert_points'] = []
+
+                    df_out.loc[idx, 'insert_points'].append((x, y))
+
+                df_out = df_out.assign(append_points='').astype('object')
+                for b, p, x, y in zip(append_points['block'],
+                                      append_points['profile'],
+                                      append_points['x'], append_points['y']):
+                    idx = np.where((df_out['block'] == str(b)) & (
+                        df_out['profile'] == p) & (
+                            df_out['beach'] == beach_name))[0][0]
+                    if not df_out.loc[idx, 'append_points']:
+                        df_out.loc[idx, 'append_points'] = []
+
+                    df_out.loc[idx, 'append_points'].append((x, y))
+
+                # Exit if no profiles found
+                if (df_in['beach'] == beach_name).sum() == 0:
+                    raise KeyError('"{}" not found in {}'.format(
+                        beach_name, zsa_profile_file))
+
+                # Loop through EP values
+                ep_cols = []
+                for ep in output_ep:
+                    # Add column for current EP
+                    ep_col_name = 'ep_{}'.format(ep)
+                    df_out.loc[:, ep_col_name] = 0
+                    ep_cols.append(ep_col_name)
+
+                # Combine recession and storm demand chainage
+                # Subtract because chainages decrease landward
+                chainage_with_recession = (
+                    storm_demand_chainage[year >= years, :] -
+                    ongoing_recession[year >= years, :])
+
+                # Calculate maximum recession that has occured so far
+                most_receeded_chainage = chainage_with_recession.min(axis=0)
+
+                # Apply recession to current chainage (m)
+                # Use negative exceedance probability,
+                # because chainages increase seaward
+                ch_out = get_value_from_ep(most_receeded_chainage,
+                                           1 - np.array(output_ep))
+                df_out.loc[i, ep_cols] = ch_out
+
+                # Check if profile is non-erodable
+                ch_min = df_out.loc[i, 'min_chainage']
+                if np.isfinite(ch_min):
+                    ch_out[ch_out < ch_min] = ch_min
+                    # Stop erosion past maximum limit
+                    df_out.loc[i, ep_cols] = ch_out
+
+                # Calculate chainage for zero storm demand
+                zero_chainage = interpolate.interp1d(
+                    profile_volume[valid_idx],
+                    profile_chainage[valid_idx],
+                    fill_value='extrapolate')(0)
+
+                storm_demand_dist = zero_chainage - storm_demand_chainage
+
+                # Plot statistics once for each profile
+                year_idx = year == years
+
+                # Include additional points for shapefile
+                df_csv = df_out[df_out['beach'] == beach_name]
+
+                # Save values for current beach
+                csv_name = os.path.join(output_folder, '{} {} {}.csv'.format(
+                    beach_scenario, year, profile_type))
+
+                # Write header for csv file on first iteration
+                if df_out[df_out['beach'] == beach_name].index[0] == i:
+                    df_csv.loc[[], :].to_csv(csv_name, index=False)
+
+                # Append data for current profile
+                df_csv[df_csv.index == i].to_csv(
+                    csv_name,
+                    mode='a',
+                    index=False,
+                    header=False,
+                    float_format='%g')
+
+
+def main():
+
+    # Choose yaml file, if not already specified
+    args = parse_args()
+    if args.file is not None:
+        param_files = [args.file]
+    else:
+        yaml_files = [f for f in os.listdir('.') if f.endswith('.yaml')]
+
+        # Choose file, unless 'all' option specified
+        if not args.all:
+            param_files = choose(yaml_files)
+        else:
+            param_files = yaml_files
+
+    # Loop through beaches
+    pbar_beach = tqdm(param_files)
+    for p in pbar_beach:
+
+        params = read_parameter_file(p)
+        pbar_beach.set_description('Beach: {}'.format(params['beach_name']))
+
+        process(**params)
+
+    print('\n')
+
+
+if __name__ == '__main__':
+    main()