Use cauchy distribution instead of triangular

master
Dan Howe 3 years ago
parent 3619504f36
commit 674688ad86

@ -1,17 +1,17 @@
"""Calculate probability distributions for IPCC sea level rise forecasts. """Calculate probability distributions for IPCC sea level rise forecasts.
This will calculate the values required to generate triangular distributions, This will calculate the values required to Cauchy Distributions for
i.e. 'min', 'mode', and 'max' in the `numpy.random.triang()` function. different SLR projections.
Reads: Reads:
'IPCC AR6.xlsx' 'IPCC AR6.xlsx'
Writes: Writes:
'triang-values.csv' 'cauchy-values.csv'
D. Howe D. Howe
d.howe@wrl.unsw.edu.au d.howe@wrl.unsw.edu.au
2022-05-05 2022-05-12
""" """
import os import os
import re import re
@ -20,93 +20,77 @@ import pandas as pd
from scipy import stats, optimize from scipy import stats, optimize
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
PLOT = False PLOT = True
def norm_cdf(x, loc, scale): def cauchy_cdf(x, loc, scale):
"""Calculate cumulative density function, using normal distribution.""" """Calculate cumulative density function, using Cauchy distribution."""
return stats.norm(loc=loc, scale=scale).cdf(x) return stats.cauchy(loc=loc, scale=scale).cdf(x)
def triang_cdf(x, loc, scale, c):
"""Calculate cumulative density function, using triangular distribution."""
return stats.triang(loc=loc, scale=scale, c=c).cdf(x)
# Read data # Read data
df = pd.read_excel('IPCC AR6.xlsx', index_col=[0, 1, 2, 3, 4]) df = pd.read_excel('IPCC AR6.xlsx', index_col=[0, 1, 2, 3, 4])
df = df.sort_index() df = df.sort_index()
dff = df.loc[838, 'total', 'medium', 'ssp585'].T
# Use all 'medium' confidence scenarios for intermediate quantiles
scenarios = ['ssp119', 'ssp126', 'ssp245', 'ssp370', 'ssp585']
dff = df.loc[838, 'total', 'medium', scenarios].groupby('quantile').mean()
# Use ssp119/ssp585 for 5th and 95th quantiles
dff.loc[5] = df.loc[838, 'total', 'medium', 'ssp119', 5]
dff.loc[95] = df.loc[838, 'total', 'medium', 'ssp585', 95]
dff = dff.T
dff.index.name = 'year' dff.index.name = 'year'
percentiles = dff.columns.values / 100 percentiles = dff.columns.values / 100
for i, row in dff.iterrows(): for i, row in dff.iterrows():
values = row.values values = row.values
# Fit normal distribution x_min = values.min() - 0.2
loc, scale = optimize.curve_fit(norm_cdf, values, percentiles)[0] x_max = values.max() + 0.2
p_norm = {'loc': loc, 'scale': scale} x = np.linspace(x_min, x_max, num=1000)
# Fit triangular distribution # Fit distribution
loc, scale, c = optimize.curve_fit(triang_cdf, loc, scale = optimize.curve_fit(cauchy_cdf, values, percentiles)[0]
values, p = {'loc': loc, 'scale': scale}
percentiles,
p0=[values[0] - 0.1, 0.5, 0.5])[0]
p_triang = {'loc': loc, 'scale': scale, 'c': c}
# Get triangular distribution parameters dff.loc[i, 'loc'] = loc
left = p_triang['loc'] dff.loc[i, 'scale'] = scale
centre = p_triang['loc'] + p_triang['scale'] * p_triang['c']
right = p_triang['loc'] + p_triang['scale']
dff.loc[i, 'min'] = left if not PLOT:
dff.loc[i, 'mode'] = centre continue
dff.loc[i, 'max'] = right
if PLOT: fig, ax = plt.subplots(1, 2, figsize=(10, 3))
fig, ax = plt.subplots(1, 2, figsize=(10, 3))
x_min = stats.triang.ppf(0.01, **p_triang) - 0.2 ax[0].plot(x, 100 * stats.cauchy.cdf(x, **p))
x_max = stats.triang.ppf(0.99, **p_triang) + 0.2 ax[0].plot(values, 100 * percentiles, '.', c='#444444')
x = np.linspace(x_min, x_max, num=1000)
ax[0].plot(x, 100 * stats.norm.cdf(x, **p_norm)) ax[1].plot(x, stats.cauchy.pdf(x, **p), label='Cauchy')
ax[0].plot(x, 100 * stats.triang.cdf(x, **p_triang)) ax[1].plot([], [], '.', c='#444444', label='IPCC data')
ax[0].plot(values, 100 * percentiles, '.', c='#444444') ax[1].legend()
ax[1].plot(x, stats.norm.pdf(x, **p_norm), label='Normal') ax[0].set_ylabel('Percentile', labelpad=10)
ax[1].plot(x, stats.triang.pdf(x, **p_triang), label='Triangular') ax[0].set_title('Cumulative distribution')
ax[1].plot([], [], '.', c='#444444', label='IPCC data') ax[1].set_title('Probability density')
ax[1].legend()
ax[1].axvline(x=left, c='C3') ax[0].annotate(i, (-0.3, 1),
ax[1].axvline(x=centre, c='C3') xycoords='axes fraction',
ax[1].axvline(x=right, c='C3') clip_on=False,
size=14)
for a in ax:
ax[0].set_ylabel('Percentile', labelpad=10) a.set_xlabel('SLR (m)', labelpad=10)
ax[0].set_title('Cumulative distribution') a.spines['top'].set_visible(False)
ax[1].set_title('Probability density') a.spines['right'].set_visible(False)
ax[0].annotate(i, (-0.3, 1), plt.show()
xycoords='axes fraction',
clip_on=False,
size=14)
for a in ax:
a.set_xlabel('SLR (m)', labelpad=10)
a.spines['top'].set_visible(False)
a.spines['right'].set_visible(False)
plt.show()
# Make SLR relative to 2020 level (at the 50th percentile)
dff -= dff.loc[2020, 'mode']
# Save distribution parameters # Save distribution parameters
dff[['min', 'mode', 'max']].to_csv('triang-values.csv', float_format='%0.3f') dff[['loc', 'scale']].to_csv('cauchy-values.csv', float_format='%0.3f')
if PLOT: if PLOT:
# Plot all triangular distributions # Plot all distributions
fig, ax = plt.subplots(1, 1, figsize=(8, 4)) fig, ax = plt.subplots(1, 1, figsize=(8, 4))
cmap = plt.cm.get_cmap('RdBu_r', len(dff)) cmap = plt.cm.get_cmap('RdBu_r', len(dff))
@ -115,9 +99,11 @@ if PLOT:
j = -1 j = -1
for i, row in dff.iterrows(): for i, row in dff.iterrows():
j += 1 j += 1
ax.plot(row[['min', 'mode', 'max']], [0, 1, 0], c=c[j])
y = stats.cauchy(loc=row['loc'], scale=row['scale']).pdf(x)
ax.plot(x, y * row['scale'], c=c[j])
if j % 2 == 0: if j % 2 == 0:
ax.annotate(f' {i}', (row['mode'], 1), ax.annotate(f' {i}', (x[y.argmax()], 1),
ha='center', ha='center',
va='bottom', va='bottom',
rotation=90) rotation=90)

Loading…
Cancel
Save