# Performs probabilstic assessment on *.yaml files, # and saves output shoreline chainages to csv import os import yaml import argparse import numpy as np import pandas as pd import matplotlib import matplotlib.patheffects import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec from scipy.optimize import curve_fit from scipy import interpolate from tqdm import tqdm def choose(options): """ Get the user to select one (or all) items from a list of options. Args: x (list): list of options Returns: the selected option(s) in a list """ options.append('All') for i, option in enumerate(options): print('{:3d}) {:s}'.format(i + 1, option)) while True: try: val = input("Choose> ") if int(val) == len(options): selected = options[:-1] else: selected = [options[int(val) - 1]] return selected break except (ValueError, IndexError): print("Invalid input, please try again") def parse_args(): """ Check if a parameter file was provided at the command prompt Returns: the parsed input arguments in a dict """ parser = argparse.ArgumentParser(usage=__doc__) parser.add_argument('-f', '--file', help='name of parameter file', default=None) parser.add_argument('-a', '--all', help='process all *.yaml files in folder', action='store_true') return parser.parse_args() def read_parameter_file(fname): """ Read yaml file Args: fname (str): name of yaml file Returns: the file contents in a dict """ with open(fname, 'r') as stream: try: params = yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc, "\nCheck the formatting of the yaml file.") return params def ari_to_aep(x): """ Convert annual recurrance interval to annual exceedance probability. Args: x (array_like): input array Returns: the calculated AEP """ return 1 - np.exp(-1 / x) def aep_to_ari(x): """ Convert annual exceedance probability to annual recurrance interval. Args: x (array_like): input array Returns: the calculated ARI """ return -1 / np.log(1 - x) def get_value_from_ep(x, p): """ Calculate percentile value from array based on exceedance probability. Args: x (array_like): input array p (float): probability (e.g. p=0.05 for 5% exceedance) Returns: the calculated percentile value """ return np.percentile(x, (1 - p) * 100) def get_cumulative_distribution(x, min_ep=1e-5, num=1000): """ Calculate cumulative distrubution function of a random variable. Args: x (array_like): input array min_ep (float): minimum exceedance probability num (int): number of points to calculate distribution over Returns: the cumulative distribution for plotting, e.g. plt.plot(exceedance_probability, value) """ exceedance_probability = np.logspace(np.log10(min_ep), 0, num) value = get_value_from_ep(x, exceedance_probability) return exceedance_probability, value def constrained_log_fit(x, p1, p2): """ Fit log curve based on AEP values. Gordon (1987) provides two cases: 1. Low demand, open beaches: p1 = 5 p2 = 30 2. High demand, rip heads p1 = 40 p2 = 40 Args: x (array_like): input array of AEP values p1 (float): parameter 1 p2 (float): parameter 2 Returns: the fitted values for the given AEPs """ ari = aep_to_ari(x) return p1 + p2 * np.log(ari) def get_ongoing_recession(n_runs, start_year, end_year, sea_level_rise, bruun_factor, underlying_recession): """ Use Monte Carlo simulation to calculate ongoing shoreline recession. Args: n_runs (int): number of runs start_year (int): first year of model end_year (int): last year of model sea_level_rise (dict): 'year' (array_like): years 'min' (array_like): minimum value 'mode' (array_like): most likely value 'max' (array_like): maximum value bruun_factor (dict): 'min' (float): minimum value 'mode' (float): most likely value 'max' (float): maximum value underlying_recession (dict): 'min' (float): minimum value 'mode' (float): most likely value 'max' (float): maximum value Returns: the simulated recession distance (m) """ # Get time interval from input file years = np.arange(start_year, end_year + 1) n_years = len(years) # Interpolate sea level rise projections (m) slr_mode = np.interp(years, xp=sea_level_rise['year'], fp=sea_level_rise['mode'])[:, np.newaxis] try: slr_min = np.interp(years, xp=sea_level_rise['year'], fp=sea_level_rise['min'])[:, np.newaxis] except ValueError: # Use mode for deterministic beaches slr_min = slr_mode try: slr_max = np.interp(years, xp=sea_level_rise['year'], fp=sea_level_rise['max'])[:, np.newaxis] except ValueError: # Use mode for deterministic beaches slr_max = slr_mode # Initialise sea level rise array slr = np.zeros([n_years, n_runs]) for i in range(n_years): # Use triangular distribution for SLR in each year (m) try: slr[i, :] = np.random.triangular(left=slr_min[i], mode=slr_mode[i], right=slr_max[i], size=n_runs) except ValueError: # Use constant value if slr_min == slr_max slr[i, :] = np.ones([1, n_runs]) * slr_mode[i] # Sort each row, so SLR follows a smooth trajectory for each model run slr.sort(axis=1) # Shuffle columns, so the order of model runs is randomised np.random.shuffle(slr.T) # Shift sea level so it is zero in the start year slr -= slr[0, :].mean(axis=0) # Simulate probabilistic Bruun factors (-) if (bruun_factor['min'] < bruun_factor['mode'] < bruun_factor['max']): # Use probabilistic method if min and max are provided bruun_factor = np.random.triangular(left=bruun_factor['min'], mode=bruun_factor['mode'], right=bruun_factor['max'], size=n_runs) else: # Ensure values were not given in reverse order if bruun_factor['min'] > bruun_factor['mode']: raise ValueError( "bruun_factor['min'] must be less than bruun_factor['mode']") if bruun_factor['mode'] > bruun_factor['max']: raise ValueError( "bruun_factor['mode'] must be less than bruun_factor['max']") # Use deterministic method if only mode is provided bruun_factor = np.ones([1, n_runs]) * bruun_factor['mode'] # Simulate probabilistic underlying recession rate (m/y) if (underlying_recession['min'] < underlying_recession['mode'] < underlying_recession['max']): # Use probabilistic method if min and max are provided underlying_recession_rate = np.random.triangular( left=underlying_recession['min'], mode=underlying_recession['mode'], right=underlying_recession['max'], size=n_runs) else: # Ensure values were not given in reverse order if underlying_recession['min'] > underlying_recession['mode']: raise ValueError(("underlying_recession['min'] must be " "less than underlying_recession['mode']")) if underlying_recession['mode'] > underlying_recession['max']: raise ValueError(("underlying_recession['mode'] must be " "less than underlying_recession['max']")) # Use deterministic method if only mode is provided underlying_recession_rate = np.ones([1, n_runs ]) * underlying_recession['mode'] # Repeat Bruun factors for each year bruun_factor = np.tile(bruun_factor, [n_years, 1]) # Calculate total underlying recession year_factor = np.arange(1, n_years + 1)[:, np.newaxis] underlying_recession = underlying_recession_rate * year_factor underlying_recession_rate = np.tile(underlying_recession_rate, [n_years, 1]) # Remove probabilistic component from start year slr[0, :] = slr[0, :].mean() underlying_recession[0, :] = underlying_recession[0, :].mean() bruun_factor[0, :] = bruun_factor[0, :].mean() underlying_recession_rate[0, :] = underlying_recession_rate[0, :].mean() # Calculate total ongoing recession (m) ongoing_recession = slr * bruun_factor + underlying_recession return (ongoing_recession, slr, bruun_factor, underlying_recession, underlying_recession_rate) def get_storm_demand_volume(ref_aep, ref_vol, n, mode='fit'): """ Calculate storm demand volume, either by fitting a log curve to the supplied values (mode='fit'), or by simulating storm events with a poisson distribution (mode='simulate'). Note: 'simulate' mode tends to overestimate storm demand for high ARIs to account for storm clustering. Args: ref_aep (array_like): input AEP values ref_vol (array_like): input volumes (m3) n (int): number of model runs mode (str): 'fit' (default) or 'simulate' Returns: the storm demand volume (m3/m) """ if mode == 'fit': # Fit curve based on reference storm demand volumes p, _ = curve_fit(constrained_log_fit, ref_aep, ref_vol, p0=(5., 30.)) rand_aep = np.random.rand(n) volume = constrained_log_fit(rand_aep, *p) volume[volume < 0] = 0 elif mode == 'simulate': # Simulate number of storms of each ARI magnitude lam = np.tile(ref_aep[:, np.newaxis], [1, n]) n_storms = np.random.poisson(lam) # Calculate storm demand volume (m3/m) vol_storms = ref_vol[:, np.newaxis] * n_storms volume = vol_storms.sum(axis=0) else: raise ValueError("mode must be 'fit' or 'simulate'") return volume def process(beach_name, beach_scenario, n_runs, start_year, end_year, output_years, output_ep, zsa_profile_file, zrfc_profile_file, output_folder, figure_folder, sea_level_rise, bruun_factor, underlying_recession, storm_demand, diagnostics, omit, min_chainage, segment_gaps, insert_points, append_points): """ Calculate probabilistic shorelines using Monte Carlo simulation Args: beach_name (str): name of beach in photogrammetry database beach_scenario (str): name for saved csv profile chainage file n_runs (int): number of runs start_year (int): first year of model end_year (int): last year of model output_years (list): years to save profiles output_ep (list): EP values for saved profiles zsa_profile_file (str): path to storm demand vs chainge data (ZSA) zrfc_profile_file (str): path to storm demand vs chainge data (ZRFC) output_folder (str): where to save profiles sea_level_rise (dict): 'year' (list): years 'min' (list): minimum value 'mode' (list): most likely value 'max' (list): maximum value bruun_factor (dict): 'min' (float): minimum value 'mode' (float): most likely value 'max' (float): maximum value underlying_recession (dict): 'min' (float): minimum value 'mode' (float): most likely value 'max' (float): maximum value diagnostics (dict): 'block' (list): block IDs for outputting diagnostics 'profile' (list): profile IDs for outputting diagnostics omit (dict): 'block' (list): block IDs to omit from analysis 'profile' (list): profile IDs to omit from analysis min_chainage (dict): 'block' (list): block IDs for trimming chainage 'profile' (list): profile IDs for trimming chainage 'chainage' (list): minimum chainage, at non-erodable barrier segment_gaps (dict): 'block' (list): block IDs for breaking segment 'profile' (list): profile IDs for breaking segments insert_points (dict): 'before' (dict): points to add to (south/west) end of hazard lines 'x' (list): eastings 'y' (list): northings append_points (dict): points to add to (north/east) end of lines 'x' (list): eastings 'y' (list): nortings """ # Get reference storm demand (m3/m) ref_ari = np.asarray(storm_demand['ari']) ref_aep = ari_to_aep(ref_ari) ref_vol = storm_demand['vol'] # Get year index years = np.arange(start_year, end_year + 1) # Check if beach is probabilistic if n_runs == 1: probabilistic = False else: probabilistic = True # Simulate ongoing shoreline recession r, slr, bf, ur, ur_rate = get_ongoing_recession(n_runs, start_year, end_year, sea_level_rise, bruun_factor, underlying_recession) ongoing_recession = r.copy() # Pre-allocate storm demand volume for each year (m3/m) storm_demand_volume = np.zeros([len(years), n_runs]) if probabilistic: for i in range(len(years)): # Generate synthetic storm demands for each year storm_demand_volume[i, :] = get_storm_demand_volume(ref_aep, ref_vol, n=n_runs, mode='fit') else: # Get storm demand for 1% AEP event sort_idx = np.argsort(ref_aep) storm_demand_volume[:] = np.interp(output_ep, np.array(ref_aep)[sort_idx], np.array(ref_vol)[sort_idx]) # Load profile data for current beach pbar_profiles = tqdm(['ZSA', 'ZRFC'], leave=False) for profile_type in pbar_profiles: pbar_profiles.set_description('{}'.format(profile_type)) if profile_type == 'ZSA': df_in = pd.read_csv(zsa_profile_file) if profile_type == 'ZRFC': df_in = pd.read_csv(zrfc_profile_file) col_names = [c for c in df_in.columns if c.isdigit()] # Loop through profiles dff = df_in[df_in['beach'] == beach_name] # Remove omitted profiles dff = pd.merge( pd.DataFrame(omit), dff, how='outer', indicator='source').query('source!="both"').drop(columns='source') pbar_profile = tqdm(dff.iterrows(), total=dff.shape[0], leave=False) for i, prof in pbar_profile: pbar_profile.set_description( ('Block: {}, profile: {}'.format(prof['block'], prof['profile']))) # Convert storm demand volume to a profile chainage (m) profile_volume = np.array([int(c) for c in col_names]) profile_chainage = np.array(prof[col_names], dtype=float) valid_idx = np.isfinite(profile_chainage) storm_demand_chainage = np.interp(storm_demand_volume, xp=profile_volume[valid_idx], fp=profile_chainage[valid_idx]) fig, ax = plt.subplots(9, len(output_years), figsize=(16, 24), sharey='row') # Check whether to save probabilistic diagnostics for _, bp in pd.DataFrame(diagnostics).iterrows(): if ((str(prof['block']) == str(bp['block'])) and (prof['profile'] == bp['profile'])): output_diagnostics = True # Loop through years pbar_year = tqdm(output_years, leave=False) for j, year in enumerate(pbar_year): pbar_year.set_description('Year: {}'.format(year)) # Create output dataframe df_out = df_in[['beach', 'block', 'profile']].copy() df_out['block'] = df_out['block'].astype(str) # Add info on non-erodable sections df_out = df_out.assign(min_chainage=np.nan) for b, p, ch in zip(min_chainage['block'], min_chainage['profile'], min_chainage['chainage']): idx = (df_out['block'] == str(b)) & (df_out['profile'] == p) df_out.loc[idx, 'min_chainage'] = ch # Specify which segments to break df_out = df_out.assign(segment_gaps=False) for b, p, in zip(segment_gaps['block'], segment_gaps['profile']): idx = (df_out['block'] == str(b)) & (df_out['profile'] == p) df_out.loc[idx, 'segment_gaps'] = True # Specify which profiles to plot df_out = df_out.assign(diagnostics=False) for b, p in zip(diagnostics['block'], diagnostics['profile']): idx = (df_out['block'] == str(b)) & (df_out['profile'] == p) df_out.loc[idx, 'diagnostics'] = True # Specify which profiles to omit from shapefiles df_out = df_out.assign(omit=False) for b, p in zip(omit['block'], omit['profile']): idx = (df_out['block'] == b) & (df_out['profile'] == p) df_out.loc[idx, 'omit'] = True # Specify additional points to be included in shapefiles df_out = df_out.assign(insert_points='').astype('object') for b, p, x, y in zip(insert_points['block'], insert_points['profile'], insert_points['x'], insert_points['y']): idx = np.where((df_out['block'] == str(b)) & (df_out['profile'] == p) & (df_out['beach'] == beach_name))[0][0] if not df_out.loc[idx, 'insert_points']: df_out.loc[idx, 'insert_points'] = [] df_out.loc[idx, 'insert_points'].append((x, y)) df_out = df_out.assign(append_points='').astype('object') for b, p, x, y in zip(append_points['block'], append_points['profile'], append_points['x'], append_points['y']): idx = np.where((df_out['block'] == str(b)) & (df_out['profile'] == p) & (df_out['beach'] == beach_name))[0][0] if not df_out.loc[idx, 'append_points']: df_out.loc[idx, 'append_points'] = [] df_out.loc[idx, 'append_points'].append((x, y)) # Exit if no profiles found if (df_in['beach'] == beach_name).sum() == 0: raise KeyError('"{}" not found in {}'.format( beach_name, zsa_profile_file)) # Loop through EP values ep_cols = [] for ep in output_ep: # Add column for current EP ep_col_name = 'ep_{}'.format(ep) df_out.loc[:, ep_col_name] = 0 ep_cols.append(ep_col_name) # Combine recession and storm demand chainage # Subtract because chainages decrease landward chainage_with_recession = ( storm_demand_chainage[year >= years, :] - ongoing_recession[year >= years, :]) # Calculate maximum recession that has occured so far most_receeded_chainage = chainage_with_recession.min(axis=0) # Apply recession to current chainage (m) # Use negative exceedance probability, # because chainages increase seaward ch_out = get_value_from_ep(most_receeded_chainage, 1 - np.array(output_ep)) df_out.loc[i, ep_cols] = ch_out # Check if profile is non-erodable ch_min = df_out.loc[i, 'min_chainage'] if np.isfinite(ch_min): ch_out[ch_out < ch_min] = ch_min # Stop erosion past maximum limit df_out.loc[i, ep_cols] = ch_out # Calculate chainage for zero storm demand zero_chainage = interpolate.interp1d( profile_volume[valid_idx], profile_chainage[valid_idx], fill_value='extrapolate')(0) storm_demand_dist = zero_chainage - storm_demand_chainage # Include additional points for shapefile df_csv = df_out[df_out['beach'] == beach_name] # Save values for current beach csv_name = os.path.join( output_folder, '{} {} {}.csv'.format(beach_scenario, year, profile_type)) # Write header for csv file on first iteration if df_out[df_out['beach'] == beach_name].index[0] == i: df_csv.loc[[], :].to_csv(csv_name, index=False) # Only save points if they are going into shapefile df_csv = df_csv[~df_csv['omit'].astype(bool)] # Append data for current profile df_csv[df_csv.index == i].to_csv(csv_name, mode='a', index=False, header=False, float_format='%g') if output_diagnostics: # Save probabilistic diagnostics year_idx = year == years # Find index where most extreme event occurred event_year_idx = chainage_with_recession.argmin(axis=0) # define dummy index ix = np.arange(n_runs) dump_data = { 'Sea level rise (m)': slr[event_year_idx, ix].ravel(), 'Bruun factor (-)': bf[event_year_idx, ix].ravel(), 'Bruun factor x SLR (m)': slr[event_year_idx, ix].ravel() * bf[event_year_idx, ix].ravel(), 'Underlying trend rate (m/yr)': ur_rate[year_idx, :].ravel(), 'Underlying trend (m)': ur[event_year_idx, ix].ravel(), 'Underlying + SLR (m)': r[event_year_idx, ix].ravel(), 'Total movement (m)': (storm_demand_dist + r)[event_year_idx, ix].ravel(), 'Storm demand distance (m)': storm_demand_dist[event_year_idx, ix].ravel(), 'Storm demand volume (m3/m)': storm_demand_volume[event_year_idx, ix].ravel(), } dump_df = pd.DataFrame(dump_data) dump_df['Run ID'] = np.arange(len(event_year_idx)) + 1 dump_df['Event year'] = years[event_year_idx] dump_df['Years elapsed'] = event_year_idx + 1 # Reorder columns dump_df = dump_df[[ 'Run ID', 'Event year', 'Years elapsed', 'Sea level rise (m)', 'Bruun factor (-)', 'Bruun factor x SLR (m)', 'Underlying trend rate (m/yr)', 'Underlying trend (m)', 'Underlying + SLR (m)', 'Total movement (m)', 'Storm demand distance (m)', 'Storm demand volume (m3/m)', ]] # Sort based on maximum movement dump_df = dump_df.sort_values('Total movement (m)', ascending=False) # Add encounter probabilities dump_df['Encounter probability (%)'] = np.linspace( 0, 100, num=n_runs + 2)[1:-1] dump_df = dump_df.set_index('Encounter probability (%)') csv_name = os.path.join( 'diagnostics', '{} {} {}.csv'.format(beach_scenario, year, profile_type)) dump_df.to_csv(csv_name, float_format='%g') for i, c in enumerate(dump_df.columns[3:]): ax[i, j].plot(dump_df.index, dump_df[c], '.', color='#666666', markersize=2) ax[i, j].spines['right'].set_visible(False) ax[i, j].spines['top'].set_visible(False) if j == 0: ax[i, 0].yaxis.set_label_coords(-0.4, 0.5) label = c.replace('(', '\n(') ax[i, 0].set_ylabel(label, va='top', linespacing=1.5) ax[i, j].set_xlabel('Encounter probability (%)', labelpad=10) ax[0, j].set_title(year) fig.suptitle('{}, block {}, profile {}'.format( beach_scenario, prof['block'], prof['profile']), y=0.92) if output_diagnostics: figname = os.path.join( 'diagnostics', f'{beach_scenario} {profile_type} scatter.png') plt.savefig(figname, bbox_inches='tight', dpi=300) plt.close(fig) def main(): # Choose yaml file, if not already specified args = parse_args() if args.file is not None: param_files = [args.file] else: yaml_files = [f for f in os.listdir('.') if f.endswith('.yaml')] # Choose file, unless 'all' option specified if not args.all: param_files = choose(yaml_files) else: param_files = yaml_files # Loop through beaches pbar_beach = tqdm(param_files) for p in pbar_beach: params = read_parameter_file(p) pbar_beach.set_description('Beach: {}'.format(params['beach_name'])) process(**params) print('\n') if __name__ == '__main__': main()