You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
roches-probabilistic-hazard.../probabilistic-analysis/probabilistic_assessment.py

731 lines
28 KiB
Python

# Performs probabilstic assessment on *.yaml files,
# and saves output shoreline chainages to csv
import os
import yaml
import argparse
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.patheffects
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from scipy.optimize import curve_fit
from scipy import interpolate
from tqdm import tqdm
def choose(options):
"""
Get the user to select one (or all) items from a list of options.
Args:
x (list): list of options
Returns:
the selected option(s) in a list
"""
options.append('All')
for i, option in enumerate(options):
print('{:3d}) {:s}'.format(i + 1, option))
while True:
try:
val = input("Choose> ")
if int(val) == len(options):
selected = options[:-1]
else:
selected = [options[int(val) - 1]]
return selected
break
except (ValueError, IndexError):
print("Invalid input, please try again")
def parse_args():
"""
Check if a parameter file was provided at the command prompt
Returns:
the parsed input arguments in a dict
"""
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument('-f',
'--file',
help='name of parameter file',
default=None)
parser.add_argument('-a',
'--all',
help='process all *.yaml files in folder',
action='store_true')
return parser.parse_args()
def read_parameter_file(fname):
"""
Read yaml file
Args:
fname (str): name of yaml file
Returns:
the file contents in a dict
"""
with open(fname, 'r') as stream:
try:
params = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc, "\nCheck the formatting of the yaml file.")
return params
def ari_to_aep(x):
"""
Convert annual recurrance interval to annual exceedance probability.
Args:
x (array_like): input array
Returns:
the calculated AEP
"""
return 1 - np.exp(-1 / x)
def aep_to_ari(x):
"""
Convert annual exceedance probability to annual recurrance interval.
Args:
x (array_like): input array
Returns:
the calculated ARI
"""
return -1 / np.log(1 - x)
def get_value_from_ep(x, p):
"""
Calculate percentile value from array based on exceedance probability.
Args:
x (array_like): input array
p (float): probability (e.g. p=0.05 for 5% exceedance)
Returns:
the calculated percentile value
"""
return np.percentile(x, (1 - p) * 100)
def get_cumulative_distribution(x, min_ep=1e-5, num=1000):
"""
Calculate cumulative distrubution function of a random variable.
Args:
x (array_like): input array
min_ep (float): minimum exceedance probability
num (int): number of points to calculate distribution over
Returns:
the cumulative distribution for plotting, e.g.
plt.plot(exceedance_probability, value)
"""
exceedance_probability = np.logspace(np.log10(min_ep), 0, num)
value = get_value_from_ep(x, exceedance_probability)
return exceedance_probability, value
def constrained_log_fit(x, p1, p2):
"""
Fit log curve based on AEP values. Gordon (1987) provides two cases:
1. Low demand, open beaches:
p1 = 5
p2 = 30
2. High demand, rip heads
p1 = 40
p2 = 40
Args:
x (array_like): input array of AEP values
p1 (float): parameter 1
p2 (float): parameter 2
Returns:
the fitted values for the given AEPs
"""
ari = aep_to_ari(x)
return p1 + p2 * np.log(ari)
def get_ongoing_recession(n_runs, start_year, end_year, sea_level_rise,
bruun_factor, underlying_recession):
"""
Use Monte Carlo simulation to calculate ongoing shoreline recession.
Args:
n_runs (int): number of runs
start_year (int): first year of model
end_year (int): last year of model
sea_level_rise (dict):
'year' (array_like): years
'min' (array_like): minimum value
'mode' (array_like): most likely value
'max' (array_like): maximum value
bruun_factor (dict):
'min' (float): minimum value
'mode' (float): most likely value
'max' (float): maximum value
underlying_recession (dict):
'min' (float): minimum value
'mode' (float): most likely value
'max' (float): maximum value
Returns:
the simulated recession distance (m)
"""
# Get time interval from input file
years = np.arange(start_year, end_year + 1)
n_years = len(years)
# Interpolate sea level rise projections (m)
slr_mode = np.interp(years,
xp=sea_level_rise['year'],
fp=sea_level_rise['mode'])[:, np.newaxis]
try:
slr_min = np.interp(years,
xp=sea_level_rise['year'],
fp=sea_level_rise['min'])[:, np.newaxis]
except ValueError:
# Use mode for deterministic beaches
slr_min = slr_mode
try:
slr_max = np.interp(years,
xp=sea_level_rise['year'],
fp=sea_level_rise['max'])[:, np.newaxis]
except ValueError:
# Use mode for deterministic beaches
slr_max = slr_mode
# Initialise sea level rise array
slr = np.zeros([n_years, n_runs])
for i in range(n_years):
# Use triangular distribution for SLR in each year (m)
try:
slr[i, :] = np.random.triangular(left=slr_min[i],
mode=slr_mode[i],
right=slr_max[i],
size=n_runs)
except ValueError:
# Use constant value if slr_min == slr_max
slr[i, :] = np.ones([1, n_runs]) * slr_mode[i]
# Sort each row, so SLR follows a smooth trajectory for each model run
slr.sort(axis=1)
# Shuffle columns, so the order of model runs is randomised
np.random.shuffle(slr.T)
# Shift sea level so it is zero in the start year
slr -= slr[0, :].mean(axis=0)
# Simulate probabilistic Bruun factors (-)
if (bruun_factor['min'] < bruun_factor['mode'] < bruun_factor['max']):
# Use probabilistic method if min and max are provided
bruun_factor = np.random.triangular(left=bruun_factor['min'],
mode=bruun_factor['mode'],
right=bruun_factor['max'],
size=n_runs)
else:
# Ensure values were not given in reverse order
if bruun_factor['min'] > bruun_factor['mode']:
raise ValueError(
"bruun_factor['min'] must be less than bruun_factor['mode']")
if bruun_factor['mode'] > bruun_factor['max']:
raise ValueError(
"bruun_factor['mode'] must be less than bruun_factor['max']")
# Use deterministic method if only mode is provided
bruun_factor = np.ones([1, n_runs]) * bruun_factor['mode']
# Simulate probabilistic underlying recession rate (m/y)
if (underlying_recession['min'] < underlying_recession['mode'] <
underlying_recession['max']):
# Use probabilistic method if min and max are provided
underlying_recession_rate = np.random.triangular(
left=underlying_recession['min'],
mode=underlying_recession['mode'],
right=underlying_recession['max'],
size=n_runs)
else:
# Ensure values were not given in reverse order
if underlying_recession['min'] > underlying_recession['mode']:
raise ValueError(("underlying_recession['min'] must be "
"less than underlying_recession['mode']"))
if underlying_recession['mode'] > underlying_recession['max']:
raise ValueError(("underlying_recession['mode'] must be "
"less than underlying_recession['max']"))
# Use deterministic method if only mode is provided
underlying_recession_rate = np.ones([1, n_runs
]) * underlying_recession['mode']
# Repeat Bruun factors for each year
bruun_factor = np.tile(bruun_factor, [n_years, 1])
# Calculate total underlying recession
year_factor = np.arange(1, n_years + 1)[:, np.newaxis]
underlying_recession = underlying_recession_rate * year_factor
underlying_recession_rate = np.tile(underlying_recession_rate,
[n_years, 1])
# Remove probabilistic component from start year
slr[0, :] = slr[0, :].mean()
underlying_recession[0, :] = underlying_recession[0, :].mean()
bruun_factor[0, :] = bruun_factor[0, :].mean()
underlying_recession_rate[0, :] = underlying_recession_rate[0, :].mean()
# Calculate total ongoing recession (m)
ongoing_recession = slr * bruun_factor + underlying_recession
return (ongoing_recession, slr, bruun_factor, underlying_recession,
underlying_recession_rate)
def get_storm_demand_volume(ref_aep, ref_vol, n, mode='fit'):
"""
Calculate storm demand volume, either by fitting a log curve to the
supplied values (mode='fit'), or by simulating storm events with a
poisson distribution (mode='simulate').
Note: 'simulate' mode tends to overestimate storm demand for high ARIs
to account for storm clustering.
Args:
ref_aep (array_like): input AEP values
ref_vol (array_like): input volumes (m3)
n (int): number of model runs
mode (str): 'fit' (default) or 'simulate'
Returns:
the storm demand volume (m3/m)
"""
if mode == 'fit':
# Fit curve based on reference storm demand volumes
p, _ = curve_fit(constrained_log_fit, ref_aep, ref_vol, p0=(5., 30.))
rand_aep = np.random.rand(n)
volume = constrained_log_fit(rand_aep, *p)
volume[volume < 0] = 0
elif mode == 'simulate':
# Simulate number of storms of each ARI magnitude
lam = np.tile(ref_aep[:, np.newaxis], [1, n])
n_storms = np.random.poisson(lam)
# Calculate storm demand volume (m3/m)
vol_storms = ref_vol[:, np.newaxis] * n_storms
volume = vol_storms.sum(axis=0)
else:
raise ValueError("mode must be 'fit' or 'simulate'")
return volume
def process(beach_name, beach_scenario, n_runs, start_year, end_year,
output_years, output_ep, zsa_profile_file, zrfc_profile_file,
output_folder, figure_folder, sea_level_rise, bruun_factor,
underlying_recession, storm_demand, diagnostics, omit,
min_chainage, segment_gaps, insert_points, append_points):
"""
Calculate probabilistic shorelines using Monte Carlo simulation
Args:
beach_name (str): name of beach in photogrammetry database
beach_scenario (str): name for saved csv profile chainage file
n_runs (int): number of runs
start_year (int): first year of model
end_year (int): last year of model
output_years (list): years to save profiles
output_ep (list): EP values for saved profiles
zsa_profile_file (str): path to storm demand vs chainge data (ZSA)
zrfc_profile_file (str): path to storm demand vs chainge data (ZRFC)
output_folder (str): where to save profiles
sea_level_rise (dict):
'year' (list): years
'min' (list): minimum value
'mode' (list): most likely value
'max' (list): maximum value
bruun_factor (dict):
'min' (float): minimum value
'mode' (float): most likely value
'max' (float): maximum value
underlying_recession (dict):
'min' (float): minimum value
'mode' (float): most likely value
'max' (float): maximum value
diagnostics (dict):
'block' (list): block IDs for outputting diagnostics
'profile' (list): profile IDs for outputting diagnostics
omit (dict):
'block' (list): block IDs to omit from analysis
'profile' (list): profile IDs to omit from analysis
min_chainage (dict):
'block' (list): block IDs for trimming chainage
'profile' (list): profile IDs for trimming chainage
'chainage' (list): minimum chainage, at non-erodable barrier
segment_gaps (dict):
'block' (list): block IDs for breaking segment
'profile' (list): profile IDs for breaking segments
insert_points (dict):
'before' (dict): points to add to (south/west) end of hazard lines
'x' (list): eastings
'y' (list): northings
append_points (dict): points to add to (north/east) end of lines
'x' (list): eastings
'y' (list): nortings
"""
# Get reference storm demand (m3/m)
ref_ari = np.asarray(storm_demand['ari'])
ref_aep = ari_to_aep(ref_ari)
ref_vol = storm_demand['vol']
# Get year index
years = np.arange(start_year, end_year + 1)
# Check if beach is probabilistic
if n_runs == 1:
probabilistic = False
else:
probabilistic = True
# Simulate ongoing shoreline recession
r, slr, bf, ur, ur_rate = get_ongoing_recession(n_runs, start_year,
end_year, sea_level_rise,
bruun_factor,
underlying_recession)
ongoing_recession = r.copy()
# Pre-allocate storm demand volume for each year (m3/m)
storm_demand_volume = np.zeros([len(years), n_runs])
if probabilistic:
for i in range(len(years)):
# Generate synthetic storm demands for each year
storm_demand_volume[i, :] = get_storm_demand_volume(ref_aep,
ref_vol,
n=n_runs,
mode='fit')
else:
# Get storm demand for 1% AEP event
sort_idx = np.argsort(ref_aep)
storm_demand_volume[:] = np.interp(output_ep,
np.array(ref_aep)[sort_idx],
np.array(ref_vol)[sort_idx])
# Load profile data for current beach
pbar_profiles = tqdm(['ZSA', 'ZRFC'], leave=False)
for profile_type in pbar_profiles:
pbar_profiles.set_description('{}'.format(profile_type))
if profile_type == 'ZSA':
df_in = pd.read_csv(zsa_profile_file)
if profile_type == 'ZRFC':
df_in = pd.read_csv(zrfc_profile_file)
col_names = [c for c in df_in.columns if c.isdigit()]
# Loop through profiles
dff = df_in[df_in['beach'] == beach_name]
# Remove omitted profiles
dff = pd.merge(
pd.DataFrame(omit), dff, how='outer',
indicator='source').query('source!="both"').drop(columns='source')
pbar_profile = tqdm(dff.iterrows(), total=dff.shape[0], leave=False)
for i, prof in pbar_profile:
pbar_profile.set_description(
('Block: {}, profile: {}'.format(prof['block'],
prof['profile'])))
# Convert storm demand volume to a profile chainage (m)
profile_volume = np.array([int(c) for c in col_names])
profile_chainage = np.array(prof[col_names], dtype=float)
valid_idx = np.isfinite(profile_chainage)
storm_demand_chainage = np.interp(storm_demand_volume,
xp=profile_volume[valid_idx],
fp=profile_chainage[valid_idx])
fig, ax = plt.subplots(9,
len(output_years),
figsize=(16, 24),
sharey='row')
# Check whether to save probabilistic diagnostics
for _, bp in pd.DataFrame(diagnostics).iterrows():
if ((str(prof['block']) == str(bp['block']))
and (prof['profile'] == bp['profile'])):
output_diagnostics = True
# Loop through years
pbar_year = tqdm(output_years, leave=False)
for j, year in enumerate(pbar_year):
pbar_year.set_description('Year: {}'.format(year))
# Create output dataframe
df_out = df_in[['beach', 'block', 'profile']].copy()
df_out['block'] = df_out['block'].astype(str)
# Add info on non-erodable sections
df_out = df_out.assign(min_chainage=np.nan)
for b, p, ch in zip(min_chainage['block'],
min_chainage['profile'],
min_chainage['chainage']):
idx = (df_out['block'] == str(b)) & (df_out['profile']
== p)
df_out.loc[idx, 'min_chainage'] = ch
# Specify which segments to break
df_out = df_out.assign(segment_gaps=False)
for b, p, in zip(segment_gaps['block'],
segment_gaps['profile']):
idx = (df_out['block'] == str(b)) & (df_out['profile']
== p)
df_out.loc[idx, 'segment_gaps'] = True
# Specify which profiles to plot
df_out = df_out.assign(diagnostics=False)
for b, p in zip(diagnostics['block'], diagnostics['profile']):
idx = (df_out['block'] == str(b)) & (df_out['profile']
== p)
df_out.loc[idx, 'diagnostics'] = True
# Specify which profiles to omit from shapefiles
df_out = df_out.assign(omit=False)
for b, p in zip(omit['block'], omit['profile']):
idx = (df_out['block'] == b) & (df_out['profile'] == p)
df_out.loc[idx, 'omit'] = True
# Specify additional points to be included in shapefiles
df_out = df_out.assign(insert_points='').astype('object')
for b, p, x, y in zip(insert_points['block'],
insert_points['profile'],
insert_points['x'], insert_points['y']):
idx = np.where((df_out['block'] == str(b))
& (df_out['profile'] == p)
& (df_out['beach'] == beach_name))[0][0]
if not df_out.loc[idx, 'insert_points']:
df_out.loc[idx, 'insert_points'] = []
df_out.loc[idx, 'insert_points'].append((x, y))
df_out = df_out.assign(append_points='').astype('object')
for b, p, x, y in zip(append_points['block'],
append_points['profile'],
append_points['x'], append_points['y']):
idx = np.where((df_out['block'] == str(b))
& (df_out['profile'] == p)
& (df_out['beach'] == beach_name))[0][0]
if not df_out.loc[idx, 'append_points']:
df_out.loc[idx, 'append_points'] = []
df_out.loc[idx, 'append_points'].append((x, y))
# Exit if no profiles found
if (df_in['beach'] == beach_name).sum() == 0:
raise KeyError('"{}" not found in {}'.format(
beach_name, zsa_profile_file))
# Loop through EP values
ep_cols = []
for ep in output_ep:
# Add column for current EP
ep_col_name = 'ep_{}'.format(ep)
df_out.loc[:, ep_col_name] = 0
ep_cols.append(ep_col_name)
# Combine recession and storm demand chainage
# Subtract because chainages decrease landward
chainage_with_recession = (
storm_demand_chainage[year >= years, :] -
ongoing_recession[year >= years, :])
# Calculate maximum recession that has occured so far
most_receeded_chainage = chainage_with_recession.min(axis=0)
# Apply recession to current chainage (m)
# Use negative exceedance probability,
# because chainages increase seaward
ch_out = get_value_from_ep(most_receeded_chainage,
1 - np.array(output_ep))
df_out.loc[i, ep_cols] = ch_out
# Check if profile is non-erodable
ch_min = df_out.loc[i, 'min_chainage']
if np.isfinite(ch_min):
ch_out[ch_out < ch_min] = ch_min
# Stop erosion past maximum limit
df_out.loc[i, ep_cols] = ch_out
# Calculate chainage for zero storm demand
zero_chainage = interpolate.interp1d(
profile_volume[valid_idx],
profile_chainage[valid_idx],
fill_value='extrapolate')(0)
storm_demand_dist = zero_chainage - storm_demand_chainage
# Include additional points for shapefile
df_csv = df_out[df_out['beach'] == beach_name]
# Save values for current beach
csv_name = os.path.join(
output_folder,
'{} {} {}.csv'.format(beach_scenario, year, profile_type))
# Write header for csv file on first iteration
if df_out[df_out['beach'] == beach_name].index[0] == i:
df_csv.loc[[], :].to_csv(csv_name, index=False)
# Only save points if they are going into shapefile
df_csv = df_csv[~df_csv['omit'].astype(bool)]
# Append data for current profile
df_csv[df_csv.index == i].to_csv(csv_name,
mode='a',
index=False,
header=False,
float_format='%g')
if output_diagnostics:
# Save probabilistic diagnostics
year_idx = year == years
# Find index where most extreme event occurred
event_year_idx = chainage_with_recession.argmin(axis=0)
# define dummy index
ix = np.arange(n_runs)
dump_data = {
'Sea level rise (m)':
slr[event_year_idx, ix].ravel(),
'Bruun factor (-)':
bf[event_year_idx, ix].ravel(),
'Bruun factor x SLR (m)':
slr[event_year_idx, ix].ravel() *
bf[event_year_idx, ix].ravel(),
'Underlying trend rate (m/yr)':
ur_rate[year_idx, :].ravel(),
'Underlying trend (m)':
ur[event_year_idx, ix].ravel(),
'Underlying + SLR (m)':
r[event_year_idx, ix].ravel(),
'Total movement (m)':
(storm_demand_dist + r)[event_year_idx, ix].ravel(),
'Storm demand distance (m)':
storm_demand_dist[event_year_idx, ix].ravel(),
'Storm demand volume (m3/m)':
storm_demand_volume[event_year_idx, ix].ravel(),
}
dump_df = pd.DataFrame(dump_data)
dump_df['Run ID'] = np.arange(len(event_year_idx)) + 1
dump_df['Event year'] = years[event_year_idx]
dump_df['Years elapsed'] = event_year_idx + 1
# Reorder columns
dump_df = dump_df[[
'Run ID',
'Event year',
'Years elapsed',
'Sea level rise (m)',
'Bruun factor (-)',
'Bruun factor x SLR (m)',
'Underlying trend rate (m/yr)',
'Underlying trend (m)',
'Underlying + SLR (m)',
'Total movement (m)',
'Storm demand distance (m)',
'Storm demand volume (m3/m)',
]]
# Sort based on maximum movement
dump_df = dump_df.sort_values('Total movement (m)',
ascending=False)
# Add encounter probabilities
dump_df['Encounter probability (%)'] = np.linspace(
0, 100, num=n_runs + 2)[1:-1]
dump_df = dump_df.set_index('Encounter probability (%)')
csv_name = os.path.join(
'diagnostics',
'{} {} {}.csv'.format(beach_scenario, year,
profile_type))
dump_df.to_csv(csv_name, float_format='%g')
for i, c in enumerate(dump_df.columns[3:]):
ax[i, j].plot(dump_df.index,
dump_df[c],
'.',
color='#666666',
markersize=2)
ax[i, j].spines['right'].set_visible(False)
ax[i, j].spines['top'].set_visible(False)
if j == 0:
ax[i, 0].yaxis.set_label_coords(-0.4, 0.5)
label = c.replace('(', '\n(')
ax[i, 0].set_ylabel(label,
va='top',
linespacing=1.5)
ax[i, j].set_xlabel('Encounter probability (%)',
labelpad=10)
ax[0, j].set_title(year)
fig.suptitle('{}, block {}, profile {}'.format(
beach_scenario, prof['block'], prof['profile']),
y=0.92)
if output_diagnostics:
figname = os.path.join(
'diagnostics',
f'{beach_scenario} {profile_type} scatter.png')
plt.savefig(figname, bbox_inches='tight', dpi=300)
plt.close(fig)
def main():
# Choose yaml file, if not already specified
args = parse_args()
if args.file is not None:
param_files = [args.file]
else:
yaml_files = [f for f in os.listdir('.') if f.endswith('.yaml')]
# Choose file, unless 'all' option specified
if not args.all:
param_files = choose(yaml_files)
else:
param_files = yaml_files
# Loop through beaches
pbar_beach = tqdm(param_files)
for p in pbar_beach:
params = read_parameter_file(p)
pbar_beach.set_description('Beach: {}'.format(params['beach_name']))
process(**params)
print('\n')
if __name__ == '__main__':
main()