Use cauchy distribution instead of triangular

3 years ago · 674688ad86
parent 3619504f36
commit 674688ad86
1 changed files with 52 additions and 66 deletions
--- a/slr/get_slr_distributions.py
+++ b/slr/get_slr_distributions.py
@ -1,17 +1,17 @@
 """Calculate probability distributions for IPCC sea level rise forecasts.
-This will calculate the values required to generate triangular distributions,
+This will calculate the values required to Cauchy Distributions for
-i.e. 'min', 'mode', and 'max' in the `numpy.random.triang()` function.
+different SLR projections.
 Reads:
    'IPCC AR6.xlsx'
 Writes:
-    'triang-values.csv'
+    'cauchy-values.csv'
 D. Howe
 d.howe@wrl.unsw.edu.au
-2022-05-05
+2022-05-12
 """
 import os
 import re
@ -20,93 +20,77 @@ import pandas as pd
 from scipy import stats, optimize
 import matplotlib.pyplot as plt
-PLOT = False
+PLOT = True
-def norm_cdf(x, loc, scale):
+def cauchy_cdf(x, loc, scale):
-    """Calculate cumulative density function, using normal distribution."""
+    """Calculate cumulative density function, using Cauchy distribution."""
-    return stats.norm(loc=loc, scale=scale).cdf(x)
+    return stats.cauchy(loc=loc, scale=scale).cdf(x)
 def triang_cdf(x, loc, scale, c):
    """Calculate cumulative density function, using triangular distribution."""
    return stats.triang(loc=loc, scale=scale, c=c).cdf(x)
 # Read data
 df = pd.read_excel('IPCC AR6.xlsx', index_col=[0, 1, 2, 3, 4])
 df = df.sort_index()
-dff = df.loc[838, 'total', 'medium', 'ssp585'].T
+
 # Use all 'medium' confidence scenarios for intermediate quantiles
 scenarios = ['ssp119', 'ssp126', 'ssp245', 'ssp370', 'ssp585']
 dff = df.loc[838, 'total', 'medium', scenarios].groupby('quantile').mean()
 # Use ssp119/ssp585 for 5th and 95th quantiles
 dff.loc[5] = df.loc[838, 'total', 'medium', 'ssp119', 5]
 dff.loc[95] = df.loc[838, 'total', 'medium', 'ssp585', 95]
 dff = dff.T
 dff.index.name = 'year'
 percentiles = dff.columns.values / 100
 for i, row in dff.iterrows():
    values = row.values
-    # Fit normal distribution
+    x_min = values.min() - 0.2
-    loc, scale = optimize.curve_fit(norm_cdf, values, percentiles)[0]
+    x_max = values.max() + 0.2
-    p_norm = {'loc': loc, 'scale': scale}
+    x = np.linspace(x_min, x_max, num=1000)
-    # Fit triangular distribution
+    # Fit distribution
-    loc, scale, c = optimize.curve_fit(triang_cdf,
+    loc, scale = optimize.curve_fit(cauchy_cdf, values, percentiles)[0]
-                                       values,
+    p = {'loc': loc, 'scale': scale}
                                       percentiles,
                                       p0=[values[0] - 0.1, 0.5, 0.5])[0]
    p_triang = {'loc': loc, 'scale': scale, 'c': c}
-    # Get triangular distribution parameters
+    dff.loc[i, 'loc'] = loc
-    left = p_triang['loc']
+    dff.loc[i, 'scale'] = scale
    centre = p_triang['loc'] + p_triang['scale'] * p_triang['c']
    right = p_triang['loc'] + p_triang['scale']
-    dff.loc[i, 'min'] = left
+    if not PLOT:
-    dff.loc[i, 'mode'] = centre
+        continue
    dff.loc[i, 'max'] = right
-    if PLOT:
+    fig, ax = plt.subplots(1, 2, figsize=(10, 3))
        fig, ax = plt.subplots(1, 2, figsize=(10, 3))
-        x_min = stats.triang.ppf(0.01, **p_triang) - 0.2
+    ax[0].plot(x, 100 * stats.cauchy.cdf(x, **p))
-        x_max = stats.triang.ppf(0.99, **p_triang) + 0.2
+    ax[0].plot(values, 100 * percentiles, '.', c='#444444')
        x = np.linspace(x_min, x_max, num=1000)
-        ax[0].plot(x, 100 * stats.norm.cdf(x, **p_norm))
+    ax[1].plot(x, stats.cauchy.pdf(x, **p), label='Cauchy')
-        ax[0].plot(x, 100 * stats.triang.cdf(x, **p_triang))
+    ax[1].plot([], [], '.', c='#444444', label='IPCC data')
-        ax[0].plot(values, 100 * percentiles, '.', c='#444444')
+    ax[1].legend()
-        ax[1].plot(x, stats.norm.pdf(x, **p_norm), label='Normal')
+    ax[0].set_ylabel('Percentile', labelpad=10)
-        ax[1].plot(x, stats.triang.pdf(x, **p_triang), label='Triangular')
+    ax[0].set_title('Cumulative distribution')
-        ax[1].plot([], [], '.', c='#444444', label='IPCC data')
+    ax[1].set_title('Probability density')
        ax[1].legend()
-        ax[1].axvline(x=left, c='C3')
+    ax[0].annotate(i, (-0.3, 1),
-        ax[1].axvline(x=centre, c='C3')
+                   xycoords='axes fraction',
-        ax[1].axvline(x=right, c='C3')
+                   clip_on=False,
                   size=14)
    for a in ax:
-        ax[0].set_ylabel('Percentile', labelpad=10)
+        a.set_xlabel('SLR (m)', labelpad=10)
-        ax[0].set_title('Cumulative distribution')
+        a.spines['top'].set_visible(False)
-        ax[1].set_title('Probability density')
+        a.spines['right'].set_visible(False)
-        ax[0].annotate(i, (-0.3, 1),
+    plt.show()
                       xycoords='axes fraction',
                       clip_on=False,
                       size=14)
        for a in ax:
            a.set_xlabel('SLR (m)', labelpad=10)
            a.spines['top'].set_visible(False)
            a.spines['right'].set_visible(False)
        plt.show()
 # Make SLR relative to 2020 level (at the 50th percentile)
 dff -= dff.loc[2020, 'mode']
 # Save distribution parameters
-dff[['min', 'mode', 'max']].to_csv('triang-values.csv', float_format='%0.3f')
+dff[['loc', 'scale']].to_csv('cauchy-values.csv', float_format='%0.3f')
 if PLOT:
-    # Plot all triangular distributions
+    # Plot all distributions
    fig, ax = plt.subplots(1, 1, figsize=(8, 4))
    cmap = plt.cm.get_cmap('RdBu_r', len(dff))
@ -115,9 +99,11 @@ if PLOT:
    j = -1
    for i, row in dff.iterrows():
        j += 1
-        ax.plot(row[['min', 'mode', 'max']], [0, 1, 0], c=c[j])
+
        y = stats.cauchy(loc=row['loc'], scale=row['scale']).pdf(x)
        ax.plot(x, y * row['scale'], c=c[j])
        if j % 2 == 0:
-            ax.annotate(f' {i}', (row['mode'], 1),
+            ax.annotate(f' {i}', (x[y.argmax()], 1),
                        ha='center',
                        va='bottom',
                        rotation=90)