diff --git a/slr/get_slr_distributions.py b/slr/get_slr_distributions.py index eeeb94d..25f8fdc 100644 --- a/slr/get_slr_distributions.py +++ b/slr/get_slr_distributions.py @@ -1,17 +1,17 @@ """Calculate probability distributions for IPCC sea level rise forecasts. -This will calculate the values required to generate triangular distributions, -i.e. 'min', 'mode', and 'max' in the `numpy.random.triang()` function. +This will calculate the values required to Cauchy Distributions for +different SLR projections. Reads: 'IPCC AR6.xlsx' Writes: - 'triang-values.csv' + 'cauchy-values.csv' D. Howe d.howe@wrl.unsw.edu.au -2022-05-05 +2022-05-12 """ import os import re @@ -20,93 +20,77 @@ import pandas as pd from scipy import stats, optimize import matplotlib.pyplot as plt -PLOT = False +PLOT = True -def norm_cdf(x, loc, scale): - """Calculate cumulative density function, using normal distribution.""" - return stats.norm(loc=loc, scale=scale).cdf(x) - - -def triang_cdf(x, loc, scale, c): - """Calculate cumulative density function, using triangular distribution.""" - return stats.triang(loc=loc, scale=scale, c=c).cdf(x) +def cauchy_cdf(x, loc, scale): + """Calculate cumulative density function, using Cauchy distribution.""" + return stats.cauchy(loc=loc, scale=scale).cdf(x) # Read data df = pd.read_excel('IPCC AR6.xlsx', index_col=[0, 1, 2, 3, 4]) df = df.sort_index() -dff = df.loc[838, 'total', 'medium', 'ssp585'].T + +# Use all 'medium' confidence scenarios for intermediate quantiles +scenarios = ['ssp119', 'ssp126', 'ssp245', 'ssp370', 'ssp585'] +dff = df.loc[838, 'total', 'medium', scenarios].groupby('quantile').mean() + +# Use ssp119/ssp585 for 5th and 95th quantiles +dff.loc[5] = df.loc[838, 'total', 'medium', 'ssp119', 5] +dff.loc[95] = df.loc[838, 'total', 'medium', 'ssp585', 95] +dff = dff.T + dff.index.name = 'year' percentiles = dff.columns.values / 100 for i, row in dff.iterrows(): values = row.values - # Fit normal distribution - loc, scale = optimize.curve_fit(norm_cdf, values, percentiles)[0] - p_norm = {'loc': loc, 'scale': scale} + x_min = values.min() - 0.2 + x_max = values.max() + 0.2 + x = np.linspace(x_min, x_max, num=1000) - # Fit triangular distribution - loc, scale, c = optimize.curve_fit(triang_cdf, - values, - percentiles, - p0=[values[0] - 0.1, 0.5, 0.5])[0] - p_triang = {'loc': loc, 'scale': scale, 'c': c} + # Fit distribution + loc, scale = optimize.curve_fit(cauchy_cdf, values, percentiles)[0] + p = {'loc': loc, 'scale': scale} - # Get triangular distribution parameters - left = p_triang['loc'] - centre = p_triang['loc'] + p_triang['scale'] * p_triang['c'] - right = p_triang['loc'] + p_triang['scale'] + dff.loc[i, 'loc'] = loc + dff.loc[i, 'scale'] = scale - dff.loc[i, 'min'] = left - dff.loc[i, 'mode'] = centre - dff.loc[i, 'max'] = right + if not PLOT: + continue - if PLOT: - fig, ax = plt.subplots(1, 2, figsize=(10, 3)) + fig, ax = plt.subplots(1, 2, figsize=(10, 3)) - x_min = stats.triang.ppf(0.01, **p_triang) - 0.2 - x_max = stats.triang.ppf(0.99, **p_triang) + 0.2 - x = np.linspace(x_min, x_max, num=1000) + ax[0].plot(x, 100 * stats.cauchy.cdf(x, **p)) + ax[0].plot(values, 100 * percentiles, '.', c='#444444') - ax[0].plot(x, 100 * stats.norm.cdf(x, **p_norm)) - ax[0].plot(x, 100 * stats.triang.cdf(x, **p_triang)) - ax[0].plot(values, 100 * percentiles, '.', c='#444444') + ax[1].plot(x, stats.cauchy.pdf(x, **p), label='Cauchy') + ax[1].plot([], [], '.', c='#444444', label='IPCC data') + ax[1].legend() - ax[1].plot(x, stats.norm.pdf(x, **p_norm), label='Normal') - ax[1].plot(x, stats.triang.pdf(x, **p_triang), label='Triangular') - ax[1].plot([], [], '.', c='#444444', label='IPCC data') - ax[1].legend() + ax[0].set_ylabel('Percentile', labelpad=10) + ax[0].set_title('Cumulative distribution') + ax[1].set_title('Probability density') - ax[1].axvline(x=left, c='C3') - ax[1].axvline(x=centre, c='C3') - ax[1].axvline(x=right, c='C3') + ax[0].annotate(i, (-0.3, 1), + xycoords='axes fraction', + clip_on=False, + size=14) + for a in ax: - ax[0].set_ylabel('Percentile', labelpad=10) - ax[0].set_title('Cumulative distribution') - ax[1].set_title('Probability density') + a.set_xlabel('SLR (m)', labelpad=10) + a.spines['top'].set_visible(False) + a.spines['right'].set_visible(False) - ax[0].annotate(i, (-0.3, 1), - xycoords='axes fraction', - clip_on=False, - size=14) - for a in ax: - - a.set_xlabel('SLR (m)', labelpad=10) - a.spines['top'].set_visible(False) - a.spines['right'].set_visible(False) - - plt.show() - -# Make SLR relative to 2020 level (at the 50th percentile) -dff -= dff.loc[2020, 'mode'] + plt.show() # Save distribution parameters -dff[['min', 'mode', 'max']].to_csv('triang-values.csv', float_format='%0.3f') +dff[['loc', 'scale']].to_csv('cauchy-values.csv', float_format='%0.3f') if PLOT: - # Plot all triangular distributions + # Plot all distributions fig, ax = plt.subplots(1, 1, figsize=(8, 4)) cmap = plt.cm.get_cmap('RdBu_r', len(dff)) @@ -115,9 +99,11 @@ if PLOT: j = -1 for i, row in dff.iterrows(): j += 1 - ax.plot(row[['min', 'mode', 'max']], [0, 1, 0], c=c[j]) + + y = stats.cauchy(loc=row['loc'], scale=row['scale']).pdf(x) + ax.plot(x, y * row['scale'], c=c[j]) if j % 2 == 0: - ax.annotate(f' {i}', (row['mode'], 1), + ax.annotate(f' {i}', (x[y.argmax()], 1), ha='center', va='bottom', rotation=90)