"""Calculate probability distributions for IPCC sea level rise forecasts.

This will calculate the values required to Cauchy Distributions for
different SLR projections.

Reads:
    'IPCC AR6.xlsx'

Writes:
    'cauchy-values.csv'

D. Howe
d.howe@wrl.unsw.edu.au
2022-05-12
"""
import os
import re
import numpy as np
import pandas as pd
from scipy import stats, optimize
import matplotlib.pyplot as plt

PLOT = True


def cauchy_cdf(x, loc, scale):
    """Calculate cumulative density function, using Cauchy distribution."""
    return stats.cauchy(loc=loc, scale=scale).cdf(x)


# Read data
df = pd.read_excel('IPCC AR6.xlsx', index_col=[0, 1, 2, 3, 4])
df = df.sort_index()

# Use all 'medium' confidence scenarios for intermediate quantiles
scenarios = ['ssp119', 'ssp126', 'ssp245', 'ssp370', 'ssp585']
dff = df.loc[838, 'total', 'medium', scenarios].groupby('quantile').mean()

# Use ssp119/ssp585 for 5th and 95th quantiles
dff.loc[5] = df.loc[838, 'total', 'medium', 'ssp119', 5]
dff.loc[95] = df.loc[838, 'total', 'medium', 'ssp585', 95]
dff = dff.T

dff.index.name = 'year'
percentiles = dff.columns.values / 100

for i, row in dff.iterrows():
    values = row.values

    x_min = row[5] + (row[5] - row[50])
    x_max = row[95] + (row[95] - row[50])
    x = np.linspace(x_min, x_max, num=1000)

    # Fit distribution
    loc, scale = optimize.curve_fit(cauchy_cdf, values, percentiles)[0]
    p = {'loc': loc, 'scale': scale}

    dff.loc[i, 'loc'] = loc
    dff.loc[i, 'scale'] = scale
    dff.loc[i, 'min'] = x_min
    dff.loc[i, 'max'] = x_max

    if not PLOT:
        continue

    fig, ax = plt.subplots(1, 2, figsize=(10, 3))

    ax[0].plot(x, 100 * stats.cauchy.cdf(x, **p))
    ax[0].plot(values, 100 * percentiles, '.', c='#444444')

    ax[1].plot(x, stats.cauchy.pdf(x, **p), label='Cauchy')
    ax[1].plot([], [], '.', c='#444444', label='IPCC data')
    ax[1].legend()

    ax[0].set_ylabel('Percentile', labelpad=10)
    ax[0].set_title('Cumulative distribution')
    ax[1].set_title('Probability density')

    ax[0].annotate(i, (-0.3, 1),
                   xycoords='axes fraction',
                   clip_on=False,
                   size=14)
    for a in ax:

        a.set_xlabel('SLR (m)', labelpad=10)
        a.spines['top'].set_visible(False)
        a.spines['right'].set_visible(False)

    plt.show()

# Save distribution parameters
dff[['loc', 'scale', 'min', 'max']].to_csv('cauchy-values.csv',
                                           float_format='%g')

if PLOT:
    # Plot all distributions
    fig, ax = plt.subplots(1, 1, figsize=(8, 4))

    cmap = plt.cm.get_cmap('RdBu_r', len(dff))
    c = list(cmap(range(cmap.N)))

    j = -1
    for i, row in dff.iterrows():
        j += 1

        x = np.linspace(row['min'], row['max'], num=1000)

        y = stats.cauchy(loc=row['loc'], scale=row['scale']).pdf(x)
        ax.plot(x, y * row['scale'], c=c[j])
        if j % 2 == 0:
            ax.annotate(f' {i}', (x[y.argmax()], 0.32),
                        ha='center',
                        va='bottom',
                        clip_on=False,
                        rotation=90)

    ax.set_ylim(bottom=0, top=0.35)
    ax.set_yticks([])
    ax.set_xlabel('SLR (m)', labelpad=10)
    ax.set_ylabel('Normalised probability density (-)', labelpad=10)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    plt.show()

# Show histgrams with values clipped to our specific range
if PLOT:
    for i, row in dff[::4].iterrows():
        fig, ax = plt.subplots(1, 1, figsize=(10, 2))

        # Generate random samples in Cauchy distribution
        r = stats.cauchy(loc=row['loc'], scale=row['scale']).rvs(1000000)

        # Clip to our range
        rc = np.clip(r, a_min=row['min'], a_max=row['max'])
        f = (r != rc).sum() / len(r)  # Fraction outside range

        ax.hist(rc, 100)

        ym = ax.get_ylim()[1]  # Maximum y value

        ax.axvline(x=row['min'], c='C3')
        ax.axvline(x=row[5], c='C3')
        ax.axvline(x=row[50], c='C3')
        ax.axvline(x=row[95], c='C3')
        ax.axvline(x=row['max'], c='C3')

        ax.annotate(' P_min', (row['min'], ym))
        ax.annotate(' P5', (row[5], ym))
        ax.annotate(' P50', (row[50], ym))
        ax.annotate(' P95', (row[95], ym))
        ax.annotate(' P_max', (row['max'], ym))

        ax.annotate(f' Samples clipped = {100 * f:0.1f}%', (x_max, ym / 2))

        ax.set_title(row.name, x=0)
        ax.set_yticks([])
        ax.set_xlabel('SLR (m)', labelpad=10)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)

        plt.show()