diff --git a/probabilistic-analysis/generate_slr.py b/probabilistic-analysis/generate_slr.py new file mode 100644 index 0000000..5e7e211 --- /dev/null +++ b/probabilistic-analysis/generate_slr.py @@ -0,0 +1,94 @@ +import os +import re +import numpy as np +import pandas as pd +from scipy import stats, optimize + +WORKBOOK = '../inputs/20220505_Probabilistic_Erosion_Parameters_5th_JTC_REVIEWED_IPCC_SLR_OFFSET.xlsx' # noqa +SHEET = 'IPCC AR6 WRL FINAL' +VERSION = 'WRL V5 corrected to 2020' # First row of data + + +def get_cdf(x, loc, scale): + """Calculate cumulative density function, using Cauchy distribution.""" + return stats.cauchy(loc=loc, scale=scale).cdf(x) + + +def cauchy(n_runs, start_year, end_year): + """ + Use Monte Carlo simulation to generate sea level rise trajectories by + fitting IPCC data to a Cauchy distribution. + + Args: + n_runs (int): number of runs + start_year (int): first year of model + end_year (int): last year of model + + Returns: + the simulated sea level rise (m) + """ + + # Load IPCC SLR data + df = pd.read_excel(WORKBOOK, sheet_name=SHEET, index_col='psmsl_id') + idx = df.index.get_loc(VERSION) # First row containing values we want + df = df.iloc[idx:idx + 5].set_index('quantile') + df = df.drop(columns=['process', 'confidence', 'scenario']).T + df.index.name = 'year' + percentiles = df.columns.values / 100 + + for i, row in df.iterrows(): + values = row.values + + # Set valid range of probability distribution function + x_min = row[5] + (row[5] - row[50]) + x_max = row[95] + (row[95] - row[50]) + x = np.linspace(x_min, x_max, num=1000) + + # Fit Cauchy distribution + loc, scale = optimize.curve_fit(get_cdf, values, percentiles)[0] + + if x_min == x_max: + # Harcode values for start year (when everything is zero) + scale = 0.001 + x_min = -0.001 + x_max = 0.001 + + df.loc[i, 'loc'] = loc + df.loc[i, 'scale'] = scale + df.loc[i, 'min'] = x_min + df.loc[i, 'max'] = x_max + + # Interpolate intermediate values + index = np.arange(df.index.min(), df.index.max() + 1) + df = df.reindex(index).interpolate(method='linear') + df = df.loc[start_year:end_year] # Trim dataframe to given range + + # Prepare array for SLR values + slr = np.zeros([len(df), n_runs], dtype=float) + + for i, (year, row) in enumerate(df.iterrows()): + # Get probability distribution for current year + dist = stats.cauchy(loc=row['loc'], scale=row['scale']) + + # Generate random samples + for factor in range(2, 1000): + s_raw = dist.rvs(n_runs * factor) + + # Take first samples within valid range + s = s_raw[(s_raw > row['min']) & (s_raw < row['max'])] + + if len(s) > n_runs: + break # Success + else: + continue # We need more samples, so try larger factor + + # Add the requried number of samples + slr[i] = s[:n_runs] + + # Sort each row to make SLR trajectories smooth + slr = np.sort(slr, axis=1) + + # Randomise run order (column-wise) + slr = np.random.permutation(slr.T).T + + return slr