Simplified things.

master
Per A Brodtkorb 8 years ago
parent 76bdf630a4
commit e6ed2f4d3f

@ -3,10 +3,10 @@ from scipy.stats._distn_infrastructure import * # @UnusedWildImport
from scipy.stats._distn_infrastructure import (_skew, # @UnusedImport
_kurtosis, _ncx2_log_pdf, # @IgnorePep8 @UnusedImport
_ncx2_pdf, _ncx2_cdf) # @UnusedImport @IgnorePep8
from .estimation import FitDistribution
from .estimation import FitDistribution, rv_frozen # @Reimport
from ._constants import _XMAX, _XMIN
from wafo.misc import lazyselect as _lazyselect
from wafo.misc import lazywhere as _lazywhere
from wafo.misc import lazyselect as _lazyselect # @UnusedImport
from wafo.misc import lazywhere as _lazywhere # @UnusedImport
_doc_default_example = """\
Examples
@ -70,137 +70,6 @@ Accurate confidence interval with profile loglikelihood
"""
# Frozen RV class
class rv_frozen(object):
''' Frozen continous or discrete 1D Random Variable object (RV)
Methods
-------
rvs(size=1)
Random variates.
pdf(x)
Probability density function.
cdf(x)
Cumulative density function.
sf(x)
Survival function (1-cdf --- sometimes more accurate).
ppf(q)
Percent point function (inverse of cdf --- percentiles).
isf(q)
Inverse survival function (inverse of sf).
stats(moments='mv')
Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
moment(n)
n-th order non-central moment of distribution.
entropy()
(Differential) entropy of the RV.
interval(alpha)
Confidence interval with equal areas around the median.
expect(func, lb, ub, conditional=False)
Calculate expected value of a function with respect to the
distribution.
'''
def __init__(self, dist, *args, **kwds):
# create a new instance
self.dist = dist # .__class__(**dist._ctor_param)
shapes, loc, scale = self.dist._parse_args(*args, **kwds)
if isinstance(dist, rv_continuous):
self.par = shapes + (loc, scale)
else: # rv_discrete
self.par = shapes + (loc,)
self.a = self.dist.a
self.b = self.dist.b
self.shapes = self.dist.shapes
# @property
# def shapes(self):
# return self.dist.shapes
@property
def random_state(self):
return self.dist._random_state
@random_state.setter
def random_state(self, seed):
self.dist._random_state = check_random_state(seed)
def pdf(self, x):
''' Probability density function at x of the given RV.'''
return self.dist.pdf(x, *self.par)
def logpdf(self, x):
return self.dist.logpdf(x, *self.par)
def cdf(self, x):
'''Cumulative distribution function at x of the given RV.'''
return self.dist.cdf(x, *self.par)
def logcdf(self, x):
return self.dist.logcdf(x, *self.par)
def ppf(self, q):
'''Percent point function (inverse of cdf) at q of the given RV.'''
return self.dist.ppf(q, *self.par)
def isf(self, q):
'''Inverse survival function at q of the given RV.'''
return self.dist.isf(q, *self.par)
def rvs(self, size=None, random_state=None):
kwds = {'size': size, 'random_state': random_state}
return self.dist.rvs(*self.par, **kwds)
def sf(self, x):
'''Survival function (1-cdf) at x of the given RV.'''
return self.dist.sf(x, *self.par)
def logsf(self, x):
return self.dist.logsf(x, *self.par)
def stats(self, moments='mv'):
''' Some statistics of the given RV'''
kwds = dict(moments=moments)
return self.dist.stats(*self.par, **kwds)
def median(self):
return self.dist.median(*self.par)
def mean(self):
return self.dist.mean(*self.par)
def var(self):
return self.dist.var(*self.par)
def std(self):
return self.dist.std(*self.par)
def moment(self, n):
return self.dist.moment(n, *self.par)
def entropy(self):
return self.dist.entropy(*self.par)
def pmf(self, k):
'''Probability mass function at k of the given RV'''
return self.dist.pmf(k, *self.par)
def logpmf(self, k):
return self.dist.logpmf(k, *self.par)
def interval(self, alpha):
return self.dist.interval(alpha, *self.par)
def expect(self, func=None, lb=None, ub=None, conditional=False, **kwds):
if isinstance(self.dist, rv_continuous):
a, loc, scale = self.par[:-2], self.par[:-2], self.par[-1]
return self.dist.expect(func, a, loc, scale, lb, ub, conditional,
**kwds)
a, loc = self.par[:-1], self.par[-1]
if kwds:
raise ValueError("Discrete expect does not accept **kwds.")
return self.dist.expect(func, a, loc, lb, ub, conditional)
def freeze(self, *args, **kwds):
"""Freeze the distribution for the given arguments.
@ -219,41 +88,6 @@ def freeze(self, *args, **kwds):
return rv_frozen(self, *args, **kwds)
# def link(self, x, logSF, theta, i):
# '''
# Return theta[i] as function of quantile, survival probability and
# theta[j] for j!=i.
#
# Parameters
# ----------
# x : quantile
# logSF : logarithm of the survival probability
# theta : list
# all distribution parameters including location and scale.
#
# Returns
# -------
# theta[i] : real scalar
# fixed distribution parameter theta[i] as function of x, logSF and
# theta[j] where j != i.
#
# LINK is a function connecting the fixed distribution parameter theta[i]
# with the quantile (x) and the survival probability (SF) and the
# remaining free distribution parameters theta[j] for j!=i, i.e.:
# theta[i] = link(x, logSF, theta, i),
# where logSF = log(Prob(X>x; theta)).
#
# See also
# estimation.Profile
# '''
# return self._link(x, logSF, theta, i)
#
#
# def _link(self, x, logSF, theta, i):
# msg = 'Link function not implemented for the {} distribution'
# raise NotImplementedError(msg.format(self.name))
def _resolve_ties(self, log_dprb, x, args, scale):
dx = np.diff(x, axis=0)
tie = dx == 0
@ -293,7 +127,6 @@ def _nlogps_and_penalty(self, x, scale, args):
if n_bad > 0:
penalty = 100.0 * np.log(_XMAX) * n_bad
return -np.sum(log_dprb[finite_log_dprb], axis=0) + penalty
return -np.sum(log_dprb, axis=0)
@ -353,7 +186,7 @@ def _nnlf_and_penalty(self, x, args):
return -np.sum(logpdf, axis=0)
def _penalized_nnlf(self, theta, x, penalty=None):
def _penalized_nnlf(self, theta, x):
''' Return negative loglikelihood function,
i.e., - sum (log pdf(x, theta), axis=0)
where theta are the parameters (including loc and scale)
@ -617,8 +450,7 @@ def _open_support_mask(self, x):
rv_generic.freeze = freeze
rv_discrete.freeze = freeze
rv_continuous.freeze = freeze
#rv_continuous.link = link
#rv_continuous._link = _link
rv_continuous._penalized_nlogps = _penalized_nlogps
rv_continuous._penalized_nnlf = _penalized_nnlf
rv_continuous._reduce_func = _reduce_func

@ -9,11 +9,10 @@ Author: Per A. Brodtkorb 2008
from __future__ import division, absolute_import
import warnings
from scipy.stats import rv_continuous
from wafo.plotbackend import plotbackend as plt
from wafo.misc import ecross, findcross, argsreduce
from wafo.stats._constants import _EPS, _XMAX, _XMIN
from wafo.stats._distn_infrastructure import rv_frozen, rv_continuous
from wafo.misc import ecross, findcross
from wafo.stats._constants import _EPS # , _XMAX, _XMIN
from scipy._lib.six import string_types
import numdifftools as nd # @UnresolvedImport
from scipy import special
@ -36,6 +35,157 @@ arr = asarray
# all = alltrue # @ReservedAssignment
def _assert_warn(cond, msg):
if not cond:
warnings.warn(msg)
def _assert(cond, msg):
if not cond:
raise ValueError(msg)
def _assert_index(cond, msg):
if not cond:
raise IndexError(msg)
def _assert_not_implemented(cond, msg):
if not cond:
raise NotImplementedError(msg)
# Frozen RV class
class rv_frozen(object):
''' Frozen continous or discrete 1D Random Variable object (RV)
Methods
-------
rvs(size=1)
Random variates.
pdf(x)
Probability density function.
cdf(x)
Cumulative density function.
sf(x)
Survival function (1-cdf --- sometimes more accurate).
ppf(q)
Percent point function (inverse of cdf --- percentiles).
isf(q)
Inverse survival function (inverse of sf).
stats(moments='mv')
Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
moment(n)
n-th order non-central moment of distribution.
entropy()
(Differential) entropy of the RV.
interval(alpha)
Confidence interval with equal areas around the median.
expect(func, lb, ub, conditional=False)
Calculate expected value of a function with respect to the
distribution.
'''
def __init__(self, dist, *args, **kwds):
# create a new instance
self.dist = dist # .__class__(**dist._ctor_param)
shapes, loc, scale = self.dist._parse_args(*args, **kwds)
if isinstance(dist, rv_continuous):
self.par = shapes + (loc, scale)
else: # rv_discrete
self.par = shapes + (loc,)
self.a = self.dist.a
self.b = self.dist.b
self.shapes = self.dist.shapes
# @property
# def shapes(self):
# return self.dist.shapes
@property
def random_state(self):
return self.dist._random_state
@random_state.setter
def random_state(self, seed):
self.dist._random_state = check_random_state(seed)
def pdf(self, x):
''' Probability density function at x of the given RV.'''
return self.dist.pdf(x, *self.par)
def logpdf(self, x):
return self.dist.logpdf(x, *self.par)
def cdf(self, x):
'''Cumulative distribution function at x of the given RV.'''
return self.dist.cdf(x, *self.par)
def logcdf(self, x):
return self.dist.logcdf(x, *self.par)
def ppf(self, q):
'''Percent point function (inverse of cdf) at q of the given RV.'''
return self.dist.ppf(q, *self.par)
def isf(self, q):
'''Inverse survival function at q of the given RV.'''
return self.dist.isf(q, *self.par)
def rvs(self, size=None, random_state=None):
kwds = {'size': size, 'random_state': random_state}
return self.dist.rvs(*self.par, **kwds)
def sf(self, x):
'''Survival function (1-cdf) at x of the given RV.'''
return self.dist.sf(x, *self.par)
def logsf(self, x):
return self.dist.logsf(x, *self.par)
def stats(self, moments='mv'):
''' Some statistics of the given RV'''
kwds = dict(moments=moments)
return self.dist.stats(*self.par, **kwds)
def median(self):
return self.dist.median(*self.par)
def mean(self):
return self.dist.mean(*self.par)
def var(self):
return self.dist.var(*self.par)
def std(self):
return self.dist.std(*self.par)
def moment(self, n):
return self.dist.moment(n, *self.par)
def entropy(self):
return self.dist.entropy(*self.par)
def pmf(self, k):
'''Probability mass function at k of the given RV'''
return self.dist.pmf(k, *self.par)
def logpmf(self, k):
return self.dist.logpmf(k, *self.par)
def interval(self, alpha):
return self.dist.interval(alpha, *self.par)
def expect(self, func=None, lb=None, ub=None, conditional=False, **kwds):
if isinstance(self.dist, rv_continuous):
a, loc, scale = self.par[:-2], self.par[:-2], self.par[-1]
return self.dist.expect(func, a, loc, scale, lb, ub, conditional,
**kwds)
a, loc = self.par[:-1], self.par[-1]
if kwds:
raise ValueError("Discrete expect does not accept **kwds.")
return self.dist.expect(func, a, loc, lb, ub, conditional)
def _burr_link(x, logsf, phat, ix):
c, d, loc, scale = phat
logp = log(-expm1(logsf))
@ -90,43 +240,43 @@ def _genpareto_link(x, logsf, phat, ix):
# Stuart Coles (2004)
# "An introduction to statistical modelling of extreme values".
# Springer series in statistics
_assert_not_implemented(ix != 0, 'link(x,logsf,phat,i) where i=0 is '
'not implemented!')
c, loc, scale = phat
if c == 0:
return _expon_link(x, logsf, phat[1:], ix-1)
if ix == 2:
# Reorganizing w.r.t.scale, Eq. 4.13 and 4.14, pp 81 in
# Coles (2004) gives
# link = -(x-loc)*c/expm1(-c*logsf)
if c != 0.0:
phati = (x - loc) * c / expm1(-c * logsf)
else:
phati = -(x - loc) / logsf
elif ix == 1:
if c != 0:
phati = x + scale * expm1(c * logsf) / c
else:
phati = x + scale * logsf
elif ix == 0:
raise NotImplementedError(
'link(x,logsf,phat,i) where i=0 is not implemented!')
else:
raise IndexError('Index to the fixed parameter is out of bounds')
return phati
return (x - loc) * c / expm1(-c * logsf)
if ix == 1:
return x + scale * expm1(c * logsf) / c
raise IndexError('Index to the fixed parameter is out of bounds')
def _gumbel_r_link(x, logsf, phat, ix):
loc, scale = phat
loglogP = log(-log(-expm1(logsf)))
if ix == 1:
return -(x - loc) / loglogP
if ix == 1:
return x + scale * loglogP
raise IndexError('Index to the fixed parameter is out of bounds')
def _genextreme_link(x, logsf, phat, ix):
_assert_not_implemented(ix != 0, 'link(x,logsf,phat,i) where i=0 is not '
'implemented!')
c, loc, scale = phat
if c == 0:
return _gumbel_r_link(x, logsf, phat[1:], ix-1)
loglogP = log(-log(-expm1(logsf)))
if ix == 2:
# link = -(x-loc)*c/expm1(c*log(-logP))
if c != 0.0:
return -(x - loc) * c / expm1(c * loglogP)
return -(x - loc) / loglogP
return -(x - loc) * c / expm1(c * loglogP)
if ix == 1:
if c != 0:
return x + scale * expm1(c * loglogP) / c
return x + scale * loglogP
if ix == 0:
raise NotImplementedError(
'link(x,logsf,phat,i) where i=0 is not implemented!')
return x + scale * expm1(c * loglogP) / c
raise IndexError('Index to the fixed parameter is out of bounds')
@ -168,6 +318,7 @@ LINKS = dict(expon=_expon_link,
frechet_r=_weibull_min_link,
genpareto=_genpareto_link,
genexpon=_genexpon_link,
gumbel_r=_gumbel_r_link,
rayleigh=_rayleigh_link,
trunclayleigh=_trunclayleigh_link,
genextreme=_genextreme_link,
@ -414,9 +565,9 @@ class Profile(object):
p_low, p_up = self._approx_p_min_max(p_opt)
pmin, pmax = self.pmin, self.pmax
if pmin is None:
pmin = self._search_pmin(phatfree0, p_low, p_opt)
pmin = self._search_pminmax(phatfree0, p_low, p_opt, 'min')
if pmax is None:
pmax = self._search_pmax(phatfree0, p_up, p_opt)
pmax = self._search_pminmax(phatfree0, p_up, p_opt, 'max')
return pmin, pmax
def _adaptive_pvec(self, p_opt, pmin, pmax):
@ -438,58 +589,32 @@ class Profile(object):
return self._adaptive_pvec(p_opt, pmin, pmax)
return np.linspace(self.pmin, self.pmax, self.n)
def _search_pmin(self, phatfree0, p_min0, p_opt):
def _search_pminmax(self, phatfree0, p_minmax0, p_opt, direction):
phatfree = phatfree0.copy()
dp = np.maximum((p_opt - p_min0)/40, 0.01)*10
sign = -1 if direction == 'min' else 1
dp = np.maximum(sign*(p_minmax0 - p_opt) / 40, 0.01) * 10
Lmax, phatfree = self._profile_optimum(phatfree, p_opt)
p_min_opt = p_min0
p_minmax_opt = p_minmax0
for j in range(51):
p_min = p_opt - dp
Lmax, phatfree = self._profile_optimum(phatfree, p_min)
# print((dp, p_min, p_min_opt, Lmax))
p_minmax = p_opt + sign * dp
Lmax, phatfree = self._profile_optimum(phatfree, p_minmax)
# print((dp, p_minmax, p_minmax_opt, Lmax))
if np.isnan(Lmax):
dp *= 0.33
elif Lmax < self.alpha_cross_level - self.alpha_Lrange*5*(j+1):
p_min_opt = p_min
p_minmax_opt = p_minmax
dp *= 0.33
elif Lmax < self.alpha_cross_level:
p_min_opt = p_min
p_minmax_opt = p_minmax
break
else:
dp *= 1.67
else:
msg = 'Exceeded max iterations. (p_min0={}, p_min={}, p={})'
warnings.warn(msg.format(p_min0, p_min_opt, p_opt))
msg = 'Exceeded max iterations. (p_{0}0={1}, p_{0}={2}, p={3})'
warnings.warn(msg.format(direction, p_minmax0, p_minmax_opt,
p_opt))
# print('search_pmin iterations={}'.format(j))
return p_min_opt
def _search_pmax(self, phatfree0, p_max0, p_opt):
phatfree = phatfree0.copy()
dp = (p_max0 - p_opt)/40
if dp < 1e-2:
dp = 0.1
Lmax, phatfree = self._profile_optimum(phatfree, p_opt)
p_max_opt = p_opt
for j in range(51):
p_max = p_opt + dp
Lmax, phatfree = self._profile_optimum(phatfree, p_max)
if np.isnan(Lmax):
dp *= 0.33
elif Lmax < self.alpha_cross_level - self.alpha_Lrange*5*(j+1):
p_max_opt = p_max
dp *= 0.33
elif Lmax < self.alpha_cross_level:
p_max_opt = p_max
break
else:
dp *= 1.67
else:
msg = 'Exceeded max iterations. (p={}, p_max={}, p_max0 = {})'
warnings.warn(msg.format(p_opt, p_max_opt, p_max0))
# print('search_pmax iterations={}'.format(j))
return p_max_opt
return p_minmax_opt
def _profile_fun(self, free_par, fix_par):
''' Return negative of loglike or logps function
@ -504,37 +629,40 @@ class Profile(object):
par[self.i_fixed] = self._local_link(fix_par, par)
return self.fit_dist.fitfun(par)
def get_bounds(self, alpha=0.05):
'''Return confidence interval for profiled parameter
'''
if alpha < self.alpha:
msg = 'Might not be able to return bounds with alpha less than {}'
warnings.warn(msg.format(self.alpha))
cross_level = self.Lmax - 0.5 * chi2isf(alpha, 1)
ind = findcross(self.data, cross_level)
n = len(ind)
def _check_bounds(self, cross_level, ind, n):
if n == 0:
warnings.warn('''Number of crossings is zero, i.e.,
upper and lower bound is not found!''')
bounds = (self.pmin, self.pmax)
warnings.warn('Number of crossings is zero, i.e., upper and lower '
'bound is not found!')
bounds = self.pmin, self.pmax
elif n == 1:
x0 = ecross(self.args, self.data, ind, cross_level)
is_upcrossing = self.data[ind] < self.data[ind + 1]
if is_upcrossing:
bounds = (x0, self.pmax)
bounds = x0, self.pmax
warnings.warn('Upper bound is larger')
else:
bounds = (self.pmin, x0)
bounds = self.pmin, x0
warnings.warn('Lower bound is smaller')
elif n == 2:
bounds = ecross(self.args, self.data, ind, cross_level)
else:
warnings.warn('Number of crossings too large! Something is wrong!')
bounds = ecross(self.args, self.data, ind[[0, -1]], cross_level)
return bounds
def get_bounds(self, alpha=0.05):
'''Return confidence interval for profiled parameter
'''
_assert_warn(self.alpha <= alpha, 'Might not be able to return bounds '
'with alpha less than {}'.format(self.alpha))
cross_level = self.Lmax - 0.5 * chi2isf(alpha, 1)
ind = findcross(self.data, cross_level)
n = len(ind)
if n == 2:
bounds = ecross(self.args, self.data, ind, cross_level)
else:
bounds = self._check_bounds(cross_level, ind, n)
return bounds
def plot(self, axis=None):
'''
Plot profile function for p_opt with 100(1-alpha)% confidence interval.
@ -1121,7 +1249,6 @@ class FitDistribution(rv_frozen):
def _nnlf(self, theta, x):
return self.dist._penalized_nnlf(theta, x)
def _nlogps(self, theta, x):
""" Moran's negative log Product Spacings statistic
@ -1323,6 +1450,29 @@ class FitDistribution(rv_frozen):
ci.append((np.nan, np.nan))
return np.array(ci)
def _fit_summary_text(self):
fixstr = ''
if self.par_fix is not None:
numfix = len(self.i_fixed)
if numfix > 0:
format0 = ', '.join(['%d'] * numfix)
format1 = ', '.join(['%g'] * numfix)
phatistr = format0 % tuple(self.i_fixed)
phatvstr = format1 % tuple(self.par[self.i_fixed])
fixstr = 'Fixed: phat[{0:s}] = {1:s} '.format(phatistr,
phatvstr)
subtxt = ('Fit method: {0:s}, Fit p-value: {1:2.2f} {2:s}, ' +
'phat=[{3:s}], {4:s}')
par_txt = '{:1.2g}, ' * len(self.par)[:-2].format(*self.par)
try:
LL_txt = 'Lps_max={:2.2g}, Ll_max={:2.2g}'.format(self.LPSmax,
self.LLmax)
except Exception:
LL_txt = 'Lps_max={}, Ll_max={}'.format(self.LPSmax, self.LLmax)
txt = subtxt.format(self.method.upper(), self.pvalue, fixstr, par_txt,
LL_txt)
return txt
def plotfitsummary(self, axes=None, fig=None):
''' Plot various diagnostic plots to asses the quality of the fit.
@ -1346,29 +1496,9 @@ class FitDistribution(rv_frozen):
if fig is None:
fig = plt.gcf()
fixstr = ''
if self.par_fix is not None:
numfix = len(self.i_fixed)
if numfix > 0:
format0 = ', '.join(['%d'] * numfix)
format1 = ', '.join(['%g'] * numfix)
phatistr = format0 % tuple(self.i_fixed)
phatvstr = format1 % tuple(self.par[self.i_fixed])
fixstr = 'Fixed: phat[{0:s}] = {1:s} '.format(phatistr,
phatvstr)
subtxt = ('Fit method: {0:s}, Fit p-value: {1:2.2f} {2:s}, ' +
'phat=[{3:s}], {4:s}')
par_txt = ('{:1.2g}, '*len(self.par))[:-2].format(*self.par)
try:
LL_txt = 'Lps_max={:2.2g}, Ll_max={:2.2g}'.format(self.LPSmax,
self.LLmax)
except Exception:
LL_txt = 'Lps_max={}, Ll_max={}'.format(self.LPSmax, self.LLmax)
try:
fig.text(0.05, 0.01, subtxt.format(self.method.upper(),
self.pvalue, fixstr, par_txt,
LL_txt))
txt = self._fit_summary_text()
fig.text(0.05, 0.01, txt)
except AttributeError:
pass

Loading…
Cancel
Save