"""Calculate probability distributions for IPCC sea level rise forecasts. This will calculate the values required to Cauchy Distributions for different SLR projections. Reads: 'IPCC AR6.xlsx' Writes: 'cauchy-values.csv' D. Howe d.howe@wrl.unsw.edu.au 2022-05-12 """ import os import re import numpy as np import pandas as pd from scipy import stats, optimize import matplotlib.pyplot as plt PLOT = True def cauchy_cdf(x, loc, scale): """Calculate cumulative density function, using Cauchy distribution.""" return stats.cauchy(loc=loc, scale=scale).cdf(x) # Read data df = pd.read_excel('IPCC AR6.xlsx', index_col=[0, 1, 2, 3, 4]) df = df.sort_index() # Use all 'medium' confidence scenarios for intermediate quantiles scenarios = ['ssp119', 'ssp126', 'ssp245', 'ssp370', 'ssp585'] dff = df.loc[838, 'total', 'medium', scenarios].groupby('quantile').mean() # Use ssp119/ssp585 for 5th and 95th quantiles dff.loc[5] = df.loc[838, 'total', 'medium', 'ssp119', 5] dff.loc[95] = df.loc[838, 'total', 'medium', 'ssp585', 95] dff = dff.T dff.index.name = 'year' percentiles = dff.columns.values / 100 for i, row in dff.iterrows(): values = row.values x_min = row[5] + (row[5] - row[50]) x_max = row[95] + (row[95] - row[50]) x = np.linspace(x_min, x_max, num=1000) # Fit distribution loc, scale = optimize.curve_fit(cauchy_cdf, values, percentiles)[0] p = {'loc': loc, 'scale': scale} dff.loc[i, 'loc'] = loc dff.loc[i, 'scale'] = scale if not PLOT: continue fig, ax = plt.subplots(1, 2, figsize=(10, 3)) ax[0].plot(x, 100 * stats.cauchy.cdf(x, **p)) ax[0].plot(values, 100 * percentiles, '.', c='#444444') ax[1].plot(x, stats.cauchy.pdf(x, **p), label='Cauchy') ax[1].plot([], [], '.', c='#444444', label='IPCC data') ax[1].legend() ax[0].set_ylabel('Percentile', labelpad=10) ax[0].set_title('Cumulative distribution') ax[1].set_title('Probability density') ax[0].annotate(i, (-0.3, 1), xycoords='axes fraction', clip_on=False, size=14) for a in ax: a.set_xlabel('SLR (m)', labelpad=10) a.spines['top'].set_visible(False) a.spines['right'].set_visible(False) plt.show() # Save distribution parameters dff[['loc', 'scale']].to_csv('cauchy-values.csv', float_format='%0.3f') if PLOT: # Plot all distributions fig, ax = plt.subplots(1, 1, figsize=(8, 4)) cmap = plt.cm.get_cmap('RdBu_r', len(dff)) c = list(cmap(range(cmap.N))) j = -1 for i, row in dff.iterrows(): j += 1 x_min = row[5] + (row[5] - row[50]) x_max = row[95] + (row[95] - row[50]) x = np.linspace(x_min, x_max, num=1000) y = stats.cauchy(loc=row['loc'], scale=row['scale']).pdf(x) ax.plot(x, y * row['scale'], c=c[j]) if j % 2 == 0: ax.annotate(f' {i}', (x[y.argmax()], 0.32), ha='center', va='bottom', clip_on=False, rotation=90) ax.set_ylim(bottom=0, top=0.35) ax.set_yticks([]) ax.set_xlabel('SLR (m)', labelpad=10) ax.set_ylabel('Normalised probability density (-)', labelpad=10) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.show() # Show histgrams with values clipped to our specific range if PLOT: for i, row in dff[::4].iterrows(): fig, ax = plt.subplots(1, 1, figsize=(10, 2)) # Generate random samples in Cauchy distribution r = stats.cauchy(loc=row['loc'], scale=row['scale']).rvs(1000000) # Clip to our range x_min = row[5] + (row[5] - row[50]) x_max = row[95] + (row[95] - row[50]) rc = np.clip(r, a_min=x_min, a_max=x_max) f = (r != rc).sum() / len(r) # Fraction outside range ax.hist(rc, 100) ym = ax.get_ylim()[1] # Maximum y value ax.axvline(x=x_min, c='C3') ax.axvline(x=row[5], c='C3') ax.axvline(x=row[50], c='C3') ax.axvline(x=row[95], c='C3') ax.axvline(x=x_max, c='C3') ax.annotate(' P0', (x_min, ym)) ax.annotate(' P5', (row[5], ym)) ax.annotate(' P50', (row[50], ym)) ax.annotate(' P95', (row[95], ym)) ax.annotate(' P100', (x_max, ym)) ax.annotate(f' Samples clipped = {100 * f:0.1f}%', (x_max, ym / 2)) ax.set_title(row.name, x=0) ax.set_yticks([]) ax.set_xlabel('SLR (m)', labelpad=10) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.show()