roches-probabilistic-hazard.../probabilistic-analysis/generate_slr.py

import os
import re
import numpy as np
import pandas as pd
from scipy import stats, optimize

WORKBOOK = '../inputs/20220505_Probabilistic_Erosion_Parameters_5th_JTC_REVIEWED_IPCC_SLR_OFFSET.xlsx'  # noqa
SHEET = 'IPCC AR6 WRL FINAL'
VERSION = 'WRL V5 corrected to 2020'  # First row of data


def get_cdf(x, loc, scale):
    """Calculate cumulative density function, using Cauchy distribution."""
    return stats.cauchy(loc=loc, scale=scale).cdf(x)


def cauchy(n_runs, start_year, end_year):
    """
    Use Monte Carlo simulation to generate sea level rise trajectories by
    fitting IPCC data to a Cauchy distribution.

    Args:
        n_runs     (int): number of runs
        start_year (int): first year of model
        end_year   (int): last year of model

    Returns:
        the simulated sea level rise (m)
    """

    # Load IPCC SLR data
    df = pd.read_excel(WORKBOOK, sheet_name=SHEET, index_col='psmsl_id')
    idx = df.index.get_loc(VERSION)  # First row containing values we want
    df = df.iloc[idx:idx + 5].set_index('quantile')
    df = df.drop(columns=['process', 'confidence', 'scenario']).T
    df.index.name = 'year'
    percentiles = df.columns.values / 100

    for i, row in df.iterrows():
        values = row.values

        # Set valid range of probability distribution function
        x_min = row[5] + (row[5] - row[50])
        x_max = row[95] + (row[95] - row[50])
        x = np.linspace(x_min, x_max, num=1000)

        # Fit Cauchy distribution
        loc, scale = optimize.curve_fit(get_cdf, values, percentiles)[0]

        if x_min == x_max:
            # Harcode values for start year (when everything is zero)
            scale = 0.001
            x_min = -0.001
            x_max = 0.001

        df.loc[i, 'loc'] = loc
        df.loc[i, 'scale'] = scale
        df.loc[i, 'min'] = x_min
        df.loc[i, 'max'] = x_max

    # Interpolate intermediate values
    index = np.arange(df.index.min(), df.index.max() + 1)
    df = df.reindex(index).interpolate(method='linear')
    df = df.loc[start_year:end_year]  # Trim dataframe to given range

    # Prepare array for SLR values
    slr = np.zeros([len(df), n_runs], dtype=float)

    for i, (year, row) in enumerate(df.iterrows()):
        # Get probability distribution for current year
        dist = stats.cauchy(loc=row['loc'], scale=row['scale'])

        # Generate random samples
        for factor in range(2, 1000):
            s_raw = dist.rvs(n_runs * factor)

            # Take first samples within valid range
            s = s_raw[(s_raw > row['min']) & (s_raw < row['max'])]

            if len(s) > n_runs:
                break  # Success
            else:
                continue  # We need more samples, so try larger factor

        # Add the requried number of samples
        slr[i] = s[:n_runs]

    # Sort each row to make SLR trajectories smooth
    slr = np.sort(slr, axis=1)

    # Randomise run order (column-wise)
    slr = np.random.permutation(slr.T).T

    return slr
Add 'generate_slr.py' 3 years ago			`import os`
			`import re`
			`import numpy as np`
			`import pandas as pd`
			`from scipy import stats, optimize`

			`WORKBOOK = '../inputs/20220505_Probabilistic_Erosion_Parameters_5th_JTC_REVIEWED_IPCC_SLR_OFFSET.xlsx' # noqa`
			`SHEET = 'IPCC AR6 WRL FINAL'`
			`VERSION = 'WRL V5 corrected to 2020' # First row of data`


			`def get_cdf(x, loc, scale):`
			`"""Calculate cumulative density function, using Cauchy distribution."""`
			`return stats.cauchy(loc=loc, scale=scale).cdf(x)`


			`def cauchy(n_runs, start_year, end_year):`
			`"""`
			`Use Monte Carlo simulation to generate sea level rise trajectories by`
			`fitting IPCC data to a Cauchy distribution.`

			`Args:`
			`n_runs (int): number of runs`
			`start_year (int): first year of model`
			`end_year (int): last year of model`

			`Returns:`
			`the simulated sea level rise (m)`
			`"""`

			`# Load IPCC SLR data`
			`df = pd.read_excel(WORKBOOK, sheet_name=SHEET, index_col='psmsl_id')`
			`idx = df.index.get_loc(VERSION) # First row containing values we want`
			`df = df.iloc[idx:idx + 5].set_index('quantile')`
			`df = df.drop(columns=['process', 'confidence', 'scenario']).T`
			`df.index.name = 'year'`
			`percentiles = df.columns.values / 100`

			`for i, row in df.iterrows():`
			`values = row.values`

			`# Set valid range of probability distribution function`
			`x_min = row[5] + (row[5] - row[50])`
			`x_max = row[95] + (row[95] - row[50])`
			`x = np.linspace(x_min, x_max, num=1000)`

			`# Fit Cauchy distribution`
			`loc, scale = optimize.curve_fit(get_cdf, values, percentiles)[0]`

			`if x_min == x_max:`
			`# Harcode values for start year (when everything is zero)`
			`scale = 0.001`
			`x_min = -0.001`
			`x_max = 0.001`

			`df.loc[i, 'loc'] = loc`
			`df.loc[i, 'scale'] = scale`
			`df.loc[i, 'min'] = x_min`
			`df.loc[i, 'max'] = x_max`

			`# Interpolate intermediate values`
			`index = np.arange(df.index.min(), df.index.max() + 1)`
			`df = df.reindex(index).interpolate(method='linear')`
			`df = df.loc[start_year:end_year] # Trim dataframe to given range`

			`# Prepare array for SLR values`
			`slr = np.zeros([len(df), n_runs], dtype=float)`

			`for i, (year, row) in enumerate(df.iterrows()):`
			`# Get probability distribution for current year`
			`dist = stats.cauchy(loc=row['loc'], scale=row['scale'])`

			`# Generate random samples`
			`for factor in range(2, 1000):`
			`s_raw = dist.rvs(n_runs * factor)`

			`# Take first samples within valid range`
			`s = s_raw[(s_raw > row['min']) & (s_raw < row['max'])]`

			`if len(s) > n_runs:`
			`break # Success`
			`else:`
			`continue # We need more samples, so try larger factor`

			`# Add the requried number of samples`
			`slr[i] = s[:n_runs]`

			`# Sort each row to make SLR trajectories smooth`
			`slr = np.sort(slr, axis=1)`

			`# Randomise run order (column-wise)`
			`slr = np.random.permutation(slr.T).T`

			`return slr`