You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
roches-probabilistic-hazard.../slr/distributions.py

131 lines
3.9 KiB
Python

"""Calculate probability distributions for IPCC sea level rise forecasts.
This will calculate the values required to generate triangular distributions,
i.e. 'min', 'mode', and 'max' in the `numpy.random.triang()` function.
3 years ago
Reads:
'IPCC AR6.xlsx'
Writes:
'triang-values.csv'
D. Howe
d.howe@wrl.unsw.edu.au
2022-05-05
"""
import os
import re
import numpy as np
import pandas as pd
from scipy import stats, optimize
import matplotlib.pyplot as plt
PLOT = False
def norm_cdf(x, loc, scale):
"""Calculate cumulative density function, using normal distribution."""
return stats.norm(loc=loc, scale=scale).cdf(x)
def triang_cdf(x, loc, scale, c):
"""Calculate cumulative density function, using triangular distribution."""
return stats.triang(loc=loc, scale=scale, c=c).cdf(x)
# Read data
df = pd.read_excel('IPCC AR6.xlsx', index_col=[0, 1, 2, 3, 4])
df = df.sort_index()
dff = df.loc[838, 'total', 'medium', 'ssp585'].T
dff.index.name = 'year'
percentiles = dff.columns.values / 100
for i, row in dff.iterrows():
values = row.values
# Fit normal distribution
loc, scale = optimize.curve_fit(norm_cdf, values, percentiles)[0]
p_norm = {'loc': loc, 'scale': scale}
# Fit triangular distribution
loc, scale, c = optimize.curve_fit(triang_cdf,
values,
percentiles,
p0=[values[0] - 0.1, 0.5, 0.5])[0]
p_triang = {'loc': loc, 'scale': scale, 'c': c}
# Get triangular distribution parameters
left = p_triang['loc']
centre = p_triang['loc'] + p_triang['scale'] * p_triang['c']
right = p_triang['loc'] + p_triang['scale']
dff.loc[i, 'min'] = left
dff.loc[i, 'mode'] = centre
dff.loc[i, 'max'] = right
if PLOT:
fig, ax = plt.subplots(1, 2, figsize=(10, 3))
x_min = stats.triang.ppf(0.01, **p_triang) - 0.2
x_max = stats.triang.ppf(0.99, **p_triang) + 0.2
x = np.linspace(x_min, x_max, num=1000)
ax[0].plot(x, 100 * stats.norm.cdf(x, **p_norm))
ax[0].plot(x, 100 * stats.triang.cdf(x, **p_triang))
ax[0].plot(values, 100 * percentiles, '.', c='#444444')
ax[1].plot(x, stats.norm.pdf(x, **p_norm), label='Normal')
ax[1].plot(x, stats.triang.pdf(x, **p_triang), label='Triangular')
ax[1].plot([], [], '.', c='#444444', label='IPCC data')
ax[1].legend()
ax[1].axvline(x=left, c='C3')
ax[1].axvline(x=centre, c='C3')
ax[1].axvline(x=right, c='C3')
ax[0].set_ylabel('Percentile', labelpad=10)
ax[0].set_title('Cumulative distribution')
ax[1].set_title('Probability density')
ax[0].annotate(i, (-0.3, 1),
xycoords='axes fraction',
clip_on=False,
size=14)
for a in ax:
a.set_xlabel('SLR (m)', labelpad=10)
a.spines['top'].set_visible(False)
a.spines['right'].set_visible(False)
plt.show()
# Make SLR relative to 2020 level (at the 50th percentile)
dff -= dff.loc[2020, 'mode']
# Save distribution parameters
dff[['min', 'mode', 'max']].to_csv('triang-values.csv', float_format='%0.3f')
if PLOT:
# Plot all triangular distributions
fig, ax = plt.subplots(1, 1, figsize=(8, 4))
cmap = plt.cm.get_cmap('RdBu_r', len(dff))
c = list(cmap(range(cmap.N)))
j = -1
for i, row in dff.iterrows():
j += 1
ax.plot(row[['min', 'mode', 'max']], [0, 1, 0], c=c[j])
if j % 2 == 0:
ax.annotate(f' {i}', (row['mode'], 1),
ha='center',
va='bottom',
rotation=90)
ax.set_xlabel('SLR (m)', labelpad=10)
ax.set_ylabel('Probability density (-)', labelpad=10)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.show()