You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

129 lines
3.9 KiB
Python

"""Calculate probability distributions for IPCC sea level rise forecasts.
This will calculate the values required to generate triangular distributions,
i.e. 'min', 'mode', and 'max' in the `numpy.random.triang()` function.
The values are written to 'triang-values.csv'
D. Howe
d.howe@wrl.unsw.edu.au
2022-05-05
"""
import os
import re
import numpy as np
import pandas as pd
from scipy import stats, optimize
import matplotlib.pyplot as plt
PLOT = False
def norm_cdf(x, loc, scale):
"""Calculate cumulative density function, using normal distribution."""
return stats.norm(loc=loc, scale=scale).cdf(x)
def triang_cdf(x, loc, scale, c):
"""Calculate cumulative density function, using triangular distribution."""
return stats.triang(loc=loc, scale=scale, c=c).cdf(x)
# Read data
xlsx_path = '20220502_Probabilistic_Erosion_Parameters_1st_DRAFT_FOR_DISCUSSION.xlsx'
df = pd.read_excel(xlsx_path, sheet_name='IPCC AR6', index_col=[0, 1, 2, 3, 4])
df = df.sort_index()
dff = df.loc[838, 'total', 'medium', 'ssp585'].T
dff.index.name = 'year'
percentiles = dff.columns.to_numpy() / 100
# Make SLR relative to 2020 level (at the 50th percentile)
dff -= dff.loc[2020, 50]
for i, row in dff.iterrows():
values = row.to_numpy()
# Fit normal distribution
loc, scale = optimize.curve_fit(norm_cdf, values, percentiles)[0]
p_norm = {'loc': loc, 'scale': scale}
# Fit triangular distribution
loc, scale, c = optimize.curve_fit(triang_cdf,
values,
percentiles,
p0=[values[0] - 0.1, 0.5, 0.5])[0]
p_triang = {'loc': loc, 'scale': scale, 'c': c}
# Get triangular distribution parameters
left = p_triang['loc']
centre = p_triang['loc'] + p_triang['scale'] * p_triang['c']
right = p_triang['loc'] + p_triang['scale']
dff.loc[i, 'min'] = left
dff.loc[i, 'mode'] = centre
dff.loc[i, 'max'] = right
if PLOT:
fig, ax = plt.subplots(1, 2, figsize=(10, 3))
x_min = stats.triang.ppf(0.01, **p_triang) - 0.2
x_max = stats.triang.ppf(0.99, **p_triang) + 0.2
x = np.linspace(x_min, x_max, num=1000)
ax[0].plot(x, 100 * stats.norm.cdf(x, **p_norm))
ax[0].plot(x, 100 * stats.triang.cdf(x, **p_triang))
ax[0].plot(values, 100 * percentiles, '.', c='#444444')
ax[1].plot(x, stats.norm.pdf(x, **p_norm), label='Normal')
ax[1].plot(x, stats.triang.pdf(x, **p_triang), label='Triangular')
ax[1].plot([], [], '.', c='#444444', label='IPCC data')
ax[1].legend()
ax[1].axvline(x=left, c='C3')
ax[1].axvline(x=centre, c='C3')
ax[1].axvline(x=right, c='C3')
ax[0].set_ylabel('Percentile', labelpad=10)
ax[0].set_title('Cumulative distribution')
ax[1].set_title('Probability density')
ax[0].annotate(i, (-0.3, 1),
xycoords='axes fraction',
clip_on=False,
size=14)
for a in ax:
a.set_xlabel('SLR (m)', labelpad=10)
a.spines['top'].set_visible(False)
a.spines['right'].set_visible(False)
plt.show()
# Save distribution parameters
dff[['min', 'mode', 'max']].to_csv('triang-values.csv', float_format='%0.3f')
if PLOT:
# Plot all triangular distributions
fig, ax = plt.subplots(1, 1, figsize=(8, 4))
cmap = plt.cm.get_cmap('RdBu_r', len(dff))
c = list(cmap(range(cmap.N)))
j = -1
for i, row in dff.iterrows():
j += 1
ax.plot(row[['min', 'mode', 'max']], [0, 1, 0], c=c[j])
if j % 2 == 0:
ax.annotate(f' {i}', (row['mode'], 1),
ha='center',
va='bottom',
rotation=90)
ax.set_xlabel('SLR (m)', labelpad=10)
ax.set_ylabel('Probability density (-)', labelpad=10)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.show()