diff --git a/inputs/distributions.py b/inputs/distributions.py new file mode 100644 index 0000000..03df1ab --- /dev/null +++ b/inputs/distributions.py @@ -0,0 +1,128 @@ +"""Calculate probability distributions for IPCC sea level rise forecasts. + +This will calculate the values required to generate triangular distributions, +i.e. 'min', 'mode', and 'max' in the `numpy.random.triang()` function. + +The values are written to 'triang-values.csv' + +D. Howe +d.howe@wrl.unsw.edu.au +2022-05-05 +""" +import os +import re +import numpy as np +import pandas as pd +from scipy import stats, optimize +import matplotlib.pyplot as plt + +PLOT = False + + +def norm_cdf(x, loc, scale): + """Calculate cumulative density function, using normal distribution.""" + return stats.norm(loc=loc, scale=scale).cdf(x) + + +def triang_cdf(x, loc, scale, c): + """Calculate cumulative density function, using triangular distribution.""" + return stats.triang(loc=loc, scale=scale, c=c).cdf(x) + + +# Read data +xlsx_path = '20220502_Probabilistic_Erosion_Parameters_1st_DRAFT_FOR_DISCUSSION.xlsx' + +df = pd.read_excel(xlsx_path, sheet_name='IPCC AR6', index_col=[0, 1, 2, 3, 4]) +df = df.sort_index() +dff = df.loc[838, 'total', 'medium', 'ssp585'].T +dff.index.name = 'year' +percentiles = dff.columns.to_numpy() / 100 + +# Make SLR relative to 2020 level (at the 50th percentile) +dff -= dff.loc[2020, 50] + +for i, row in dff.iterrows(): + values = row.to_numpy() + + # Fit normal distribution + loc, scale = optimize.curve_fit(norm_cdf, values, percentiles)[0] + p_norm = {'loc': loc, 'scale': scale} + + # Fit triangular distribution + loc, scale, c = optimize.curve_fit(triang_cdf, + values, + percentiles, + p0=[values[0] - 0.1, 0.5, 0.5])[0] + p_triang = {'loc': loc, 'scale': scale, 'c': c} + + # Get triangular distribution parameters + left = p_triang['loc'] + centre = p_triang['loc'] + p_triang['scale'] * p_triang['c'] + right = p_triang['loc'] + p_triang['scale'] + + dff.loc[i, 'min'] = left + dff.loc[i, 'mode'] = centre + dff.loc[i, 'max'] = right + + if PLOT: + fig, ax = plt.subplots(1, 2, figsize=(10, 3)) + + x_min = stats.triang.ppf(0.01, **p_triang) - 0.2 + x_max = stats.triang.ppf(0.99, **p_triang) + 0.2 + x = np.linspace(x_min, x_max, num=1000) + + ax[0].plot(x, 100 * stats.norm.cdf(x, **p_norm)) + ax[0].plot(x, 100 * stats.triang.cdf(x, **p_triang)) + ax[0].plot(values, 100 * percentiles, '.', c='#444444') + + ax[1].plot(x, stats.norm.pdf(x, **p_norm), label='Normal') + ax[1].plot(x, stats.triang.pdf(x, **p_triang), label='Triangular') + ax[1].plot([], [], '.', c='#444444', label='IPCC data') + ax[1].legend() + + ax[1].axvline(x=left, c='C3') + ax[1].axvline(x=centre, c='C3') + ax[1].axvline(x=right, c='C3') + + ax[0].set_ylabel('Percentile', labelpad=10) + ax[0].set_title('Cumulative distribution') + ax[1].set_title('Probability density') + + ax[0].annotate(i, (-0.3, 1), + xycoords='axes fraction', + clip_on=False, + size=14) + for a in ax: + + a.set_xlabel('SLR (m)', labelpad=10) + a.spines['top'].set_visible(False) + a.spines['right'].set_visible(False) + + plt.show() + +# Save distribution parameters +dff[['min', 'mode', 'max']].to_csv('triang-values.csv', float_format='%0.3f') + +if PLOT: + # Plot all triangular distributions + fig, ax = plt.subplots(1, 1, figsize=(8, 4)) + + cmap = plt.cm.get_cmap('RdBu_r', len(dff)) + c = list(cmap(range(cmap.N))) + + j = -1 + for i, row in dff.iterrows(): + j += 1 + ax.plot(row[['min', 'mode', 'max']], [0, 1, 0], c=c[j]) + if j % 2 == 0: + ax.annotate(f' {i}', (row['mode'], 1), + ha='center', + va='bottom', + rotation=90) + + ax.set_xlabel('SLR (m)', labelpad=10) + ax.set_ylabel('Probability density (-)', labelpad=10) + + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + plt.show()