Updated stats

master
Per A Brodtkorb 9 years ago
parent 8337b60531
commit eca5368f75

@ -19,6 +19,8 @@ from time import strftime, gmtime
from numdifftools.extrapolation import dea3 # @UnusedImport from numdifftools.extrapolation import dea3 # @UnusedImport
from wafo.plotbackend import plotbackend from wafo.plotbackend import plotbackend
from collections import Callable from collections import Callable
import numbers
try: try:
from wafo import c_library as clib # @UnresolvedImport from wafo import c_library as clib # @UnresolvedImport
except ImportError: except ImportError:
@ -37,8 +39,28 @@ __all__ = ['now', 'spaceline', 'narg_smallest', 'args_flat', 'is_numlike',
'betaloge', 'gravity', 'nextpow2', 'discretize', 'polar2cart', 'betaloge', 'gravity', 'nextpow2', 'discretize', 'polar2cart',
'cart2polar', 'meshgrid', 'ndgrid', 'trangood', 'tranproc', 'cart2polar', 'meshgrid', 'ndgrid', 'trangood', 'tranproc',
'plot_histgrm', 'num2pistr', 'test_docstrings', 'lazywhere', 'plot_histgrm', 'num2pistr', 'test_docstrings', 'lazywhere',
'lazyselect'
'piecewise', 'piecewise',
'valarray'] 'valarray', 'check_random_state']
def check_random_state(seed):
"""Turn seed into a np.random.RandomState instance
If seed is None (or np.random), return the RandomState singleton used
by np.random.
If seed is an int, return a new RandomState instance seeded with seed.
If seed is already a RandomState instance, return it.
Otherwise raise ValueError.
"""
if seed is None or seed is np.random:
return np.random.mtrand._rand
if isinstance(seed, (numbers.Integral, np.integer)):
return np.random.RandomState(seed)
if isinstance(seed, np.random.RandomState):
return seed
raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
' instance' % seed)
def valarray(shape, value=np.NaN, typecode=None): def valarray(shape, value=np.NaN, typecode=None):
@ -217,6 +239,46 @@ def lazywhere(cond, arrays, f, fillvalue=None, f2=None):
return out return out
def lazyselect(condlist, choicelist, arrays, default=0):
"""
Mimic `np.select(condlist, choicelist)`.
Notice it assumes that all `arrays` are of the same shape, or can be
broadcasted together.
All functions in `choicelist` must accept array arguments in the order
given in `arrays` and must return an array of the same shape as broadcasted
`arrays`.
Examples
--------
>>> x = np.arange(6)
>>> np.select([x <3, x > 3], [x**2, x**3], default=0)
array([ 0, 1, 4, 0, 64, 125])
>>> lazyselect([x < 3, x > 3], [lambda x: x**2, lambda x: x**3], (x,))
array([ 0., 1., 4., 0., 64., 125.])
>>> a = -np.ones_like(x)
>>> lazyselect([x < 3, x > 3],
... [lambda x, a: x**2, lambda x, a: a * x**3],
... (x, a))
array([ 0., 1., 4., 0., -64., -125.])
"""
arrays = np.broadcast_arrays(*arrays)
tcode = np.mintypecode([a.dtype.char for a in arrays])
out = valarray(np.shape(arrays[0]), value=default, typecode=tcode)
for index in range(len(condlist)):
func, cond = choicelist[index], condlist[index]
if np.all(cond is False):
continue
cond, _ = np.broadcast_arrays(cond, arrays[0])
temp = tuple(np.extract(cond, arr) for arr in arrays)
np.place(out, cond, func(*temp))
return out
def rotation_matrix(heading, pitch, roll): def rotation_matrix(heading, pitch, roll):
''' '''

@ -15,12 +15,13 @@ from scipy.special import (gammaln as gamln, gamma as gam, boxcox, boxcox1p,
inv_boxcox, inv_boxcox1p, erfc, chndtr, chndtrix, inv_boxcox, inv_boxcox1p, erfc, chndtr, chndtrix,
log1p, expm1, log1p, expm1,
i0, i1, ndtr as _norm_cdf, log_ndtr as _norm_logcdf) i0, i1, ndtr as _norm_cdf, log_ndtr as _norm_logcdf)
# from scipy._lib._numpy_compat import broadcast_to
from numpy import (where, arange, putmask, ravel, shape, from numpy import (where, arange, putmask, ravel, shape,
log, sqrt, exp, arctanh, tan, sin, arcsin, arctan, log, sqrt, exp, arctanh, tan, sin, arcsin, arctan,
tanh, cos, cosh, sinh) tanh, cos, cosh, sinh)
from numpy import polyval, place, extract, asarray, nan, inf, pi from numpy import polyval, place, extract, asarray, nan, inf, pi, broadcast_to
import numpy as np import numpy as np
from scipy.stats.mstats_basic import mode from scipy.stats.mstats_basic import mode
@ -35,9 +36,10 @@ from scipy.stats._tukeylambda_stats import (tukeylambda_variance as _tlvar,
from wafo.stats._distn_infrastructure import ( from wafo.stats._distn_infrastructure import (
rv_continuous, valarray, _skew, _kurtosis, _lazywhere, rv_continuous, valarray, _skew, _kurtosis, _lazywhere,
_ncx2_log_pdf, _ncx2_pdf, _ncx2_cdf, get_distribution_names, _ncx2_log_pdf, _ncx2_pdf, _ncx2_cdf, get_distribution_names,
_lazyselect
) )
from wafo.stats._constants import _XMIN, _EULER, _ZETA3, _XMAX, _LOGXMAX, _EPS from wafo.stats._constants import _XMIN, _EULER, _ZETA3, _XMAX, _LOGXMAX
## Kolmogorov-Smirnov one-sided and two-sided test statistics ## Kolmogorov-Smirnov one-sided and two-sided test statistics
@ -215,6 +217,8 @@ class alpha_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _pdf(self, x, a): def _pdf(self, x, a):
return 1.0/(x**2)/_norm_cdf(a)*_norm_pdf(a-1.0/x) return 1.0/(x**2)/_norm_cdf(a)*_norm_pdf(a-1.0/x)
@ -285,6 +289,8 @@ class arcsine_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _pdf(self, x): def _pdf(self, x):
return 1.0/pi/sqrt(x*(1-x)) return 1.0/pi/sqrt(x*(1-x))
@ -535,6 +541,8 @@ class betaprime_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _rvs(self, a, b): def _rvs(self, a, b):
sz, rndm = self._size, self._random_state sz, rndm = self._size, self._random_state
u1 = gamma.rvs(a, size=sz, random_state=rndm) u1 = gamma.rvs(a, size=sz, random_state=rndm)
@ -604,10 +612,10 @@ class bradford_gen(rv_continuous):
return c * exp(-log1p(c * x)) / log1p(c) return c * exp(-log1p(c * x)) / log1p(c)
def _cdf(self, x, c): def _cdf(self, x, c):
return log1p(c * x) / log1p(c) return special.log1p(c * x) / special.log1p(c)
def _ppf(self, q, c): def _ppf(self, q, c):
return expm1(q * log1p(c))/c # ((1.0+c)**q-1)/c return special.expm1(q * special.log1p(c)) / c
def _stats(self, c, moments='mv'): def _stats(self, c, moments='mv'):
k = log1p(c) k = log1p(c)
@ -672,6 +680,8 @@ class burr_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _pdf(self, x, c, d): def _pdf(self, x, c, d):
return c * d * (x**(-c - 1.0)) * ((1 + x**(-c))**(-d - 1.0)) return c * d * (x**(-c - 1.0)) * ((1 + x**(-c))**(-d - 1.0))
@ -725,6 +735,8 @@ class burr12_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _pdf(self, x, c, d): def _pdf(self, x, c, d):
return np.exp(self._logpdf(x, c, d)) return np.exp(self._logpdf(x, c, d))
@ -732,7 +744,7 @@ class burr12_gen(rv_continuous):
return log(c) + log(d) + special.xlogy(c-1, x) + special.xlog1py(-d-1, x**c) return log(c) + log(d) + special.xlogy(c-1, x) + special.xlog1py(-d-1, x**c)
def _cdf(self, x, c, d): def _cdf(self, x, c, d):
return 1 - self._sf(x, c, d) return -special.expm1(self._logsf(x, c, d))
def _logcdf(self, x, c, d): def _logcdf(self, x, c, d):
return special.log1p(-(1 + x**c)**(-d)) return special.log1p(-(1 + x**c)**(-d))
@ -869,6 +881,7 @@ class chi_gen(rv_continuous):
%(example)s %(example)s
""" """
def _rvs(self, df): def _rvs(self, df):
sz, rndm = self._size, self._random_state sz, rndm = self._size, self._random_state
return sqrt(chi2.rvs(df, size=sz, random_state=rndm)) return sqrt(chi2.rvs(df, size=sz, random_state=rndm))
@ -1338,6 +1351,8 @@ class fatiguelife_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _rvs(self, c): def _rvs(self, c):
z = self._random_state.standard_normal(self._size) z = self._random_state.standard_normal(self._size)
x = 0.5*c*z x = 0.5*c*z
@ -1584,6 +1599,7 @@ class frechet_r_gen(rv_continuous):
%(example)s %(example)s
""" """
def _pdf(self, x, c): def _pdf(self, x, c):
return c*pow(x, c-1)*exp(-pow(x, c)) return c*pow(x, c-1)*exp(-pow(x, c))
@ -1853,7 +1869,7 @@ class genexpon_gen(rv_continuous):
""" """
def _pdf(self, x, a, b, c): def _pdf(self, x, a, b, c):
return (a + b*(-special.expm1(-c*x))) * exp((-a-b)*x + return (a + b*(-special.expm1(-c*x)))*exp((-a-b)*x +
b*(-special.expm1(-c*x))/c) b*(-special.expm1(-c*x))/c)
def _cdf(self, x, a, b, c): def _cdf(self, x, a, b, c):
@ -1900,11 +1916,15 @@ class genextreme_gen(rv_continuous):
self.a = where(c < 0, 1.0 / min(c, -_XMIN), -inf) self.a = where(c < 0, 1.0 / min(c, -_XMIN), -inf)
return where(abs(c) == inf, 0, 1) return where(abs(c) == inf, 0, 1)
def _loglogcdf(self, x, c):
return _lazywhere((x == x) & (c != 0), (x, c),
lambda x, c: special.log1p(-c*x)/c, -x)
def _pdf(self, x, c): def _pdf(self, x, c):
return exp(self._logpdf(x, c)) return exp(self._logpdf(x, c))
def _logpdf(self, x, c): def _logpdf(self, x, c):
cx = _lazywhere((c != 0)*(x == x), (x, c), lambda x, c: c*x, 0.0) cx = _lazywhere((x == x) & (c != 0), (x, c), lambda x, c: c*x, 0.0)
logex2 = special.log1p(-cx) logex2 = special.log1p(-cx)
logpex2 = self._loglogcdf(x, c) logpex2 = self._loglogcdf(x, c)
pex2 = exp(logpex2) pex2 = exp(logpex2)
@ -1914,18 +1934,14 @@ class genextreme_gen(rv_continuous):
putmask(logpdf, (c == 1) & (x == 1), 0.0) putmask(logpdf, (c == 1) & (x == 1), 0.0)
return logpdf return logpdf
def _cdf(self, x, c):
return exp(self._logcdf(x, c))
def _loglogcdf(self, x, c):
return _lazywhere((c != 0) & (x == x), (x, c),
lambda x, c: special.log1p(-c * x)/c, -x)
def _logcdf(self, x, c): def _logcdf(self, x, c):
return -exp(self._loglogcdf(x, c)) return -exp(self._loglogcdf(x, c))
def _cdf(self, x, c):
return exp(self._logcdf(x, c))
def _sf(self, x, c): def _sf(self, x, c):
return -expm1(self._logcdf(x, c)) return - expm1(self._logcdf(x, c))
def _ppf(self, q, c): def _ppf(self, q, c):
x = -log(-log(q)) x = -log(-log(q))
@ -2233,7 +2249,7 @@ class gengamma_gen(rv_continuous):
gengamma.pdf(x, a, c) = abs(c) * x**(c*a-1) * exp(-x**c) / gamma(a) gengamma.pdf(x, a, c) = abs(c) * x**(c*a-1) * exp(-x**c) / gamma(a)
for ``x > 0``, ``a > 0``, and ``c != 0``. for ``x >= 0``, ``a > 0``, and ``c != 0``.
`gengamma` takes ``a`` and ``c`` as shape parameters. `gengamma` takes ``a`` and ``c`` as shape parameters.
@ -2443,17 +2459,20 @@ class gumbel_l_gen(rv_continuous):
def _logpdf(self, x): def _logpdf(self, x):
return x - exp(x) return x - exp(x)
def _cdf(self, x):
return -special.expm1(-exp(x))
def _ppf(self, q):
return log(-special.log1p(-q))
def _logsf(self, x): def _logsf(self, x):
return -exp(x) return -exp(x)
def _sf(self, x): def _sf(self, x):
return exp(-exp(x)) return exp(-exp(x))
def _cdf(self, x): def _isf(self, x):
return -expm1(-exp(x)) return log(-log(x))
def _ppf(self, q):
return log(-log1p(-q))
def _stats(self): def _stats(self):
return -_EULER, pi*pi/6.0, \ return -_EULER, pi*pi/6.0, \
@ -2684,6 +2703,8 @@ class invgamma_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _pdf(self, x, a): def _pdf(self, x, a):
return exp(self._logpdf(x, a)) return exp(self._logpdf(x, a))
@ -2691,10 +2712,16 @@ class invgamma_gen(rv_continuous):
return (-(a+1) * log(x) - gamln(a) - 1.0/x) return (-(a+1) * log(x) - gamln(a) - 1.0/x)
def _cdf(self, x, a): def _cdf(self, x, a):
return 1.0 - special.gammainc(a, 1.0/x) return special.gammaincc(a, 1.0 / x)
def _ppf(self, q, a): def _ppf(self, q, a):
return 1.0 / special.gammaincinv(a, 1.-q) return 1.0 / special.gammainccinv(a, q)
def _sf(self, x, a):
return special.gammainc(a, 1.0 / x)
def _isf(self, q, a):
return 1.0 / special.gammaincinv(a, q)
def _stats(self, a, moments='mvsk'): def _stats(self, a, moments='mvsk'):
m1 = _lazywhere(a > 1, (a,), lambda x: 1. / (x - 1.), np.inf) m1 = _lazywhere(a > 1, (a,), lambda x: 1. / (x - 1.), np.inf)
@ -2742,6 +2769,8 @@ class invgauss_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _rvs(self, mu): def _rvs(self, mu):
return self._random_state.wald(mu, 1.0, size=self._size) return self._random_state.wald(mu, 1.0, size=self._size)
@ -2788,6 +2817,8 @@ class invweibull_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _pdf(self, x, c): def _pdf(self, x, c):
xc1 = np.power(x, -c - 1.0) xc1 = np.power(x, -c - 1.0)
xc2 = np.power(x, -c) xc2 = np.power(x, -c)
@ -2833,6 +2864,8 @@ class johnsonsb_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _argcheck(self, a, b): def _argcheck(self, a, b):
return (b > 0) & (a == a) return (b > 0) & (a == a)
@ -2915,7 +2948,10 @@ class laplace_gen(rv_continuous):
return where(x > 0, 1.0-0.5*exp(-x), 0.5*exp(x)) return where(x > 0, 1.0-0.5*exp(-x), 0.5*exp(x))
def _ppf(self, q): def _ppf(self, q):
return where(q > 0.5, -log(2) - log1p(-q), log(2 * q)) return where(q > 0.5, -log(2*(1-q)), log(2*q))
def _isf(self, q):
return where(q < 0.5, -log(2*q), log(2*(1-q)))
def _stats(self): def _stats(self):
return 0, 2, 0, 3 return 0, 2, 0, 3
@ -2949,6 +2985,8 @@ class levy_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _pdf(self, x): def _pdf(self, x):
return 1 / sqrt(2*pi*x) / x * exp(-1/(2*x)) return 1 / sqrt(2*pi*x) / x * exp(-1/(2*x))
@ -2990,6 +3028,8 @@ class levy_l_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _pdf(self, x): def _pdf(self, x):
ax = abs(x) ax = abs(x)
return 1/sqrt(2*pi*ax)/ax*exp(-1/(2*ax)) return 1/sqrt(2*pi*ax)/ax*exp(-1/(2*ax))
@ -3026,29 +3066,47 @@ class levy_stable_gen(rv_continuous):
%(example)s %(example)s
""" """
def _rvs(self, alpha, beta): def _rvs(self, alpha, beta):
sz = self._size
TH = uniform.rvs(loc=-pi/2.0, scale=pi, size=sz)
W = expon.rvs(size=sz)
if alpha == 1:
return 2/pi*(pi/2+beta*TH)*tan(TH)-beta*log((pi/2*W*cos(TH))/(pi/2+beta*TH))
ialpha = 1.0/alpha def alpha1func(alpha, beta, TH, aTH, bTH, cosTH, tanTH, W):
aTH = alpha*TH return (2/pi*(pi/2 + bTH)*tanTH -
if beta == 0: beta*log((pi/2*W*cosTH)/(pi/2 + bTH)))
return W/(cos(TH)/tan(aTH)+sin(TH))*((cos(aTH)+sin(aTH)*tan(TH))/W)**ialpha
def beta0func(alpha, beta, TH, aTH, bTH, cosTH, tanTH, W):
return (W/(cosTH/tan(aTH) + sin(TH)) *
((cos(aTH) + sin(aTH)*tanTH)/W)**(1.0/alpha))
def otherwise(alpha, beta, TH, aTH, bTH, cosTH, tanTH, W):
# alpha is not 1 and beta is not 0
val0 = beta*tan(pi*alpha/2) val0 = beta*tan(pi*alpha/2)
th0 = arctan(val0)/alpha th0 = arctan(val0)/alpha
val3 = W/(cos(TH)/tan(alpha*(th0+TH))+sin(TH)) val3 = W/(cosTH/tan(alpha*(th0 + TH)) + sin(TH))
res3 = val3*((cos(aTH)+sin(aTH)*tan(TH)-val0*(sin(aTH)-cos(aTH)*tan(TH)))/W)**ialpha res3 = val3*((cos(aTH) + sin(aTH)*tanTH - val0*(sin(aTH) -
cos(aTH)*tanTH))/W)**(1.0/alpha)
return res3 return res3
def alphanot1func(alpha, beta, TH, aTH, bTH, cosTH, tanTH, W):
res = _lazywhere(beta == 0,
(alpha, beta, TH, aTH, bTH, cosTH, tanTH, W),
beta0func, f2=otherwise)
return res
sz = self._size
alpha = broadcast_to(alpha, sz)
beta = broadcast_to(beta, sz)
TH = uniform.rvs(loc=-pi/2.0, scale=pi, size=sz,
random_state=self._random_state)
W = expon.rvs(size=sz, random_state=self._random_state)
aTH = alpha*TH
bTH = beta*TH
cosTH = cos(TH)
tanTH = tan(TH)
res = _lazywhere(alpha == 1, (alpha, beta, TH, aTH, bTH, cosTH, tanTH, W),
alpha1func, f2=alphanot1func)
return res
def _argcheck(self, alpha, beta): def _argcheck(self, alpha, beta):
if beta == -1:
self.b = 0.0
elif beta == 1:
self.a = 0.0
return (alpha > 0) & (alpha <= 2) & (beta <= 1) & (beta >= -1) return (alpha > 0) & (alpha <= 2) & (beta <= 1) & (beta >= -1)
def _pdf(self, x, alpha, beta): def _pdf(self, x, alpha, beta):
@ -3087,10 +3145,13 @@ class logistic_gen(rv_continuous):
return special.expit(x) return special.expit(x)
def _ppf(self, q): def _ppf(self, q):
return -log1p(-q) + log(q) return special.logit(q)
def _sf(self, x):
return special.expit(-x)
def _isf(self, q): def _isf(self, q):
return log1p(-q) - log(q) return -special.logit(q)
def _stats(self): def _stats(self):
return 0, pi*pi/3.0, 0, 6.0/5.0 return 0, pi*pi/3.0, 0, 6.0/5.0
@ -3222,6 +3283,8 @@ class lognorm_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _rvs(self, s): def _rvs(self, s):
return exp(s * self._random_state.standard_normal(self._size)) return exp(s * self._random_state.standard_normal(self._size))
@ -3285,6 +3348,8 @@ class gilbrat_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _rvs(self): def _rvs(self):
return exp(self._random_state.standard_normal(self._size)) return exp(self._random_state.standard_normal(self._size))
@ -3397,6 +3462,306 @@ class mielke_gen(rv_continuous):
mielke = mielke_gen(a=0.0, name='mielke') mielke = mielke_gen(a=0.0, name='mielke')
class kappa4_gen(rv_continuous):
"""Kappa 4 parameter distribution.
%(before_notes)s
Notes
-----
The probability density function for kappa4 is::
kappa4.pdf(x, h, k) = (1.0 - k*x)**(1.0/k - 1.0)*
(1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h-1)
if ``h`` and ``k`` are not equal to 0.
If ``h`` or ``k`` are zero then the pdf can be simplified:
h = 0 and k != 0::
kappa4.pdf(x, h, k) = (1.0 - k*x)**(1.0/k - 1.0)*
exp(-(1.0 - k*x)**(1.0/k))
h != 0 and k = 0::
kappa4.pdf(x, h, k) = exp(-x)*(1.0 - h*exp(-x))**(1.0/h - 1.0)
h = 0 and k = 0::
kappa4.pdf(x, h, k) = exp(-x)*exp(-exp(-x))
kappa4 takes ``h`` and ``k`` as shape parameters.
The kappa4 distribution returns other distributions when certain
``h`` and ``k`` values are used.
+------+-------------+----------------+------------------+
| h | k=0.0 | k=1.0 | -inf<=k<=inf |
+======+=============+================+==================+
| -1.0 | Logistic | | Generalized |
| | | | Logistic(1) |
| | | | |
| | logistic(x) | | |
+------+-------------+----------------+------------------+
| 0.0 | Gumbel | Reverse | Generalized |
| | | Exponential(2) | Extreme Value |
| | | | |
| | gumbel_r(x) | | genextreme(x, k) |
+------+-------------+----------------+------------------+
| 1.0 | Exponential | Uniform | Generalized |
| | | | Pareto |
| | | | |
| | expon(x) | uniform(x) | genpareto(x, -k) |
+------+-------------+----------------+------------------+
(1) There are at least five generalized logistic distributions.
Four are described here:
https://en.wikipedia.org/wiki/Generalized_logistic_distribution
The "fifth" one is the one kappa4 should match which currently
isn't implemented in scipy:
https://en.wikipedia.org/wiki/Talk:Generalized_logistic_distribution
http://www.mathwave.com/help/easyfit/html/analyses/distributions/gen_logistic.html
(2) This distribution is currently not in scipy.
References
----------
J.C. Finney, "Optimization of a Skewed Logistic Distribution With Respect
to the Kolmogorov-Smirnov Test", A Dissertation Submitted to the Graduate
Faculty of the Louisiana State University and Agricultural and Mechanical
College, (August, 2004),
http://etd.lsu.edu/docs/available/etd-05182004-144851/unrestricted/Finney_dis.pdf
J.R.M. Hosking, "The four-parameter kappa distribution". IBM J. Res.
Develop. 38 (3), 25 1-258 (1994).
B. Kumphon, A. Kaew-Man, P. Seenoi, "A Rainfall Distribution for the Lampao
Site in the Chi River Basin, Thailand", Journal of Water Resource and
Protection, vol. 4, 866-869, (2012).
http://file.scirp.org/pdf/JWARP20121000009_14676002.pdf
C. Winchester, "On Estimation of the Four-Parameter Kappa Distribution", A
Thesis Submitted to Dalhousie University, Halifax, Nova Scotia, (March
2000).
http://www.nlc-bnc.ca/obj/s4/f2/dsk2/ftp01/MQ57336.pdf
%(after_notes)s
%(example)s
"""
def _argcheck(self, h, k):
condlist = [np.logical_and(h > 0, k > 0),
np.logical_and(h > 0, k == 0),
np.logical_and(h > 0, k < 0),
np.logical_and(h <= 0, k > 0),
np.logical_and(h <= 0, k == 0),
np.logical_and(h <= 0, k < 0)]
def f0(h, k):
return (1.0 - h**(-k))/k
def f1(h, k):
return log(h)
def f3(h, k):
a = np.empty(shape(h))
a[:] = -inf
return a
def f5(h, k):
return 1.0/k
self.a = _lazyselect(condlist,
[f0, f1, f0, f3, f3, f5],
[h, k],
default=nan)
def f00(h, k):
return 1.0/k
def f11(h, k):
a = np.empty(shape(h))
a[:] = inf
return a
self.b = _lazyselect(condlist,
[f00, f11, f11, f00, f11, f11],
[h, k],
default=nan)
return (h == h)
def _pdf(self, x, h, k):
return exp(self._logpdf(x, h, k))
def _logpdf(self, x, h, k):
condlist = [np.logical_and(h != 0, k != 0),
np.logical_and(h == 0, k != 0),
np.logical_and(h != 0, k == 0),
np.logical_and(h == 0, k == 0)]
def f0(x, h, k):
'''pdf = (1.0 - k*x)**(1.0/k - 1.0)*(
1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h-1.0)
logpdf = ...
'''
return special.xlog1py(1.0/k-1.0,
-k*x
) + special.xlog1py(1.0/h-1.0,
-h*(1.0-k*x)**(1.0/k))
def f1(x, h, k):
'''pdf = (1.0 - k*x)**(1.0/k - 1.0)*exp(-(
1.0 - k*x)**(1.0/k))
logpdf = ...
'''
return special.xlog1py(1.0/k-1.0,-k*x)-(1.0-k*x)**(1.0/k)
def f2(x, h, k):
'''pdf = exp(-x)*(1.0 - h*exp(-x))**(1.0/h - 1.0)
logpdf = ...
'''
return -x + special.xlog1py(1.0/h-1.0, -h*exp(-x))
def f3(x, h, k):
'''pdf = exp(-x-exp(-x))
logpdf = ...
'''
return -x-exp(-x)
return _lazyselect(condlist,
[f0, f1, f2, f3],
[x, h, k],
default=nan)
def _cdf(self, x, h, k):
return exp(self._logcdf(x, h, k))
def _logcdf(self, x, h, k):
condlist = [np.logical_and(h != 0, k != 0),
np.logical_and(h == 0, k != 0),
np.logical_and(h != 0, k == 0),
np.logical_and(h == 0, k == 0)]
def f0(x, h, k):
'''cdf = (1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h)
logcdf = ...
'''
return (1.0/h)*special.log1p(-h*(1.0-k*x)**(1.0/k))
def f1(x, h, k):
'''cdf = exp(-(1.0 - k*x)**(1.0/k))
logcdf = ...
'''
return -(1.0 - k*x)**(1.0/k)
def f2(x, h, k):
'''cdf = (1.0 - h*exp(-x))**(1.0/h)
logcdf = ...
'''
return (1.0/h)*special.log1p(-h*exp(-x))
def f3(x, h, k):
'''cdf = exp(-exp(-x))
logcdf = ...
'''
return -exp(-x)
return _lazyselect(condlist,
[f0, f1, f2, f3],
[x, h, k],
default=nan)
def _ppf(self, q, h, k):
condlist = [np.logical_and(h != 0, k != 0),
np.logical_and(h == 0, k != 0),
np.logical_and(h != 0, k == 0),
np.logical_and(h == 0, k == 0)]
def f0(q, h, k):
return 1.0/k*(1.0 - ((1.0 - (q**h))/h)**k)
def f1(q, h, k):
return 1.0/k*(1.0 - (-log(q))**k)
def f2(q, h, k):
'''ppf = -log((1.0 - (q**h))/h)
'''
return -special.log1p(-(q**h)) + log(h)
def f3(q, h, k):
return -log(-log(q))
return _lazyselect(condlist,
[f0, f1, f2, f3],
[q, h, k],
default=nan)
def _stats(self, h, k):
if h >= 0 and k >= 0:
maxr = 5
elif h < 0 and k >= 0:
maxr = int(-1.0/h*k)
elif k < 0:
maxr = int(-1.0/k)
else:
maxr = 5
outputs = [None if r < maxr else nan for r in range(1, 5)]
return outputs[:]
kappa4 = kappa4_gen(name='kappa4')
class kappa3_gen(rv_continuous):
"""Kappa 3 parameter distribution.
%(before_notes)s
Notes
-----
The probability density function for `kappa` is::
kappa3.pdf(x, a) =
a*[a + x**a]**(-(a + 1)/a), for ``x > 0``
0.0, for ``x <= 0``
`kappa3` takes ``a`` as a shape parameter and ``a > 0``.
References
----------
P.W. Mielke and E.S. Johnson, "Three-Parameter Kappa Distribution Maximum
Likelihood and Likelihood Ratio Tests", Methods in Weather Research,
701-707, (September, 1973),
http://docs.lib.noaa.gov/rescue/mwr/101/mwr-101-09-0701.pdf
B. Kumphon, "Maximum Entropy and Maximum Likelihood Estimation for the
Three-Parameter Kappa Distribution", Open Journal of Statistics, vol 2,
415-419 (2012)
http://file.scirp.org/pdf/OJS20120400011_95789012.pdf
%(after_notes)s
%(example)s
"""
def _argcheck(self, a):
return a > 0
def _pdf(self, x, a):
return a*(a + x**a)**(-1.0/a-1)
def _cdf(self, x, a):
return x*(a + x**a)**(-1.0/a)
def _ppf(self, q, a):
return (a/(q**-a - 1.0))**(1.0/a)
def _stats(self, a):
outputs = [None if i < a else nan for i in range(1, 5)]
return outputs[:]
kappa3 = kappa3_gen(a=0.0, name='kappa3')
class nakagami_gen(rv_continuous): class nakagami_gen(rv_continuous):
"""A Nakagami continuous random variable. """A Nakagami continuous random variable.
@ -3881,6 +4246,7 @@ class pearson3_gen(rv_continuous):
ans, x, skew = np.broadcast_arrays([1.0], x, skew) ans, x, skew = np.broadcast_arrays([1.0], x, skew)
ans = ans.copy() ans = ans.copy()
# mask is True where skew is small enough to use the normal approx.
mask = np.absolute(skew) < norm2pearson_transition mask = np.absolute(skew) < norm2pearson_transition
invmask = ~mask invmask = ~mask
@ -3940,13 +4306,18 @@ class pearson3_gen(rv_continuous):
return ans return ans
def _rvs(self, skew): def _rvs(self, skew):
skew = broadcast_to(skew, self._size)
ans, x, transx, skew, mask, invmask, beta, alpha, zeta = ( ans, x, transx, skew, mask, invmask, beta, alpha, zeta = (
self._preprocess([0], skew)) self._preprocess([0], skew))
if mask[0]:
return self._random_state.standard_normal(self._size) nsmall = mask.sum()
ans = self._random_state.standard_gamma(alpha, self._size)/beta + zeta nbig = mask.size - nsmall
if ans.size == 1: ans[mask] = self._random_state.standard_normal(nsmall)
return ans[0] ans[invmask] = (self._random_state.standard_gamma(alpha, nbig)/beta +
zeta)
if self._size == ():
ans = ans[0]
return ans return ans
def _ppf(self, q, skew): def _ppf(self, q, skew):
@ -4028,6 +4399,8 @@ class powerlognorm_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _pdf(self, x, c, s): def _pdf(self, x, c, s):
return (c/(x*s) * _norm_pdf(log(x)/s) * return (c/(x*s) * _norm_pdf(log(x)/s) *
pow(_norm_cdf(-log(x)/s), c*1.0-1.0)) pow(_norm_cdf(-log(x)/s), c*1.0-1.0))
@ -4134,6 +4507,8 @@ class rayleigh_gen(rv_continuous):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _rvs(self): def _rvs(self):
return chi.rvs(2, size=self._size, random_state=self._random_state) return chi.rvs(2, size=self._size, random_state=self._random_state)
@ -4141,9 +4516,7 @@ class rayleigh_gen(rv_continuous):
return exp(self._logpdf(r)) return exp(self._logpdf(r))
def _logpdf(self, r): def _logpdf(self, r):
rr2 = r * r / 2.0 return log(r) - 0.5 * r * r
return _lazywhere(rr2 != inf, (r, rr2), lambda r, rr2: log(r) - rr2,
-rr2)
def _cdf(self, r): def _cdf(self, r):
return -special.expm1(-0.5 * r**2) return -special.expm1(-0.5 * r**2)
@ -4152,10 +4525,10 @@ class rayleigh_gen(rv_continuous):
return sqrt(-2 * special.log1p(-q)) return sqrt(-2 * special.log1p(-q))
def _sf(self, r): def _sf(self, r):
return exp(-0.5 * r**2) return exp(self._logsf(r))
def _logsf(self, r): def _logsf(self, r):
return -0.5 * r**2 return -0.5 * r * r
def _isf(self, q): def _isf(self, q):
return sqrt(-2 * log(q)) return sqrt(-2 * log(q))
@ -4290,6 +4663,7 @@ class reciprocal_gen(rv_continuous):
return 0.5*log(a*b)+log(log(b/a)) return 0.5*log(a*b)+log(log(b/a))
reciprocal = reciprocal_gen(name="reciprocal") reciprocal = reciprocal_gen(name="reciprocal")
class rice_gen(rv_continuous): class rice_gen(rv_continuous):
"""A Rice continuous random variable. """A Rice continuous random variable.
@ -4321,8 +4695,8 @@ class rice_gen(rv_continuous):
def _rvs(self, b): def _rvs(self, b):
# http://en.wikipedia.org/wiki/Rice_distribution # http://en.wikipedia.org/wiki/Rice_distribution
sz = self._size if self._size else 1 t = b/np.sqrt(2) + self._random_state.standard_normal(size=(2,) +
t = b/np.sqrt(2) + self._random_state.standard_normal(size=(2, sz)) self._size)
return np.sqrt((t*t).sum(axis=0)) return np.sqrt((t*t).sum(axis=0))
def _cdf(self, x, b): def _cdf(self, x, b):
@ -4653,10 +5027,9 @@ class truncnorm_gen(rv_continuous):
self._na = _norm_cdf(a) self._na = _norm_cdf(a)
self._sb = _norm_sf(b) self._sb = _norm_sf(b)
self._sa = _norm_sf(a) self._sa = _norm_sf(a)
if self.a > 0: self._delta = np.where(self.a > 0,
self._delta = -(self._sb - self._sa) -(self._sb - self._sa),
else: self._nb - self._na)
self._delta = self._nb - self._na
self._logdelta = log(self._delta) self._logdelta = log(self._delta)
return (a != b) return (a != b)
@ -4670,10 +5043,11 @@ class truncnorm_gen(rv_continuous):
return (_norm_cdf(x) - self._na) / self._delta return (_norm_cdf(x) - self._na) / self._delta
def _ppf(self, q, a, b): def _ppf(self, q, a, b):
if self.a > 0: # XXX Use _lazywhere...
return _norm_isf(q*self._sb + self._sa*(1.0-q)) ppf = np.where(self.a > 0,
else: _norm_isf(q*self._sb + self._sa*(1.0-q)),
return _norm_ppf(q*self._nb + self._na*(1.0-q)) _norm_ppf(q*self._nb + self._na*(1.0-q)))
return ppf
def _stats(self, a, b): def _stats(self, a, b):
nA, nB = self._na, self._nb nA, nB = self._na, self._nb
@ -4830,6 +5204,8 @@ class wald_gen(invgauss_gen):
%(example)s %(example)s
""" """
_support_mask = rv_continuous._open_support_mask
def _rvs(self): def _rvs(self):
return self._random_state.wald(1.0, 1.0, size=self._size) return self._random_state.wald(1.0, 1.0, size=self._size)

@ -1,11 +1,12 @@
from __future__ import absolute_import from __future__ import absolute_import
from scipy.stats._distn_infrastructure import * # @UnusedWildImport from scipy.stats._distn_infrastructure import * # @UnusedWildImport
from scipy.stats._distn_infrastructure import (_skew, # @UnusedImport from scipy.stats._distn_infrastructure import (_skew, # @UnusedImport
_kurtosis, _lazywhere, _ncx2_log_pdf, # @IgnorePep8 @UnusedImport _kurtosis, _ncx2_log_pdf, # @IgnorePep8 @UnusedImport
_ncx2_pdf, _ncx2_cdf) # @UnusedImport @IgnorePep8 _ncx2_pdf, _ncx2_cdf) # @UnusedImport @IgnorePep8
from .estimation import FitDistribution from .estimation import FitDistribution
from ._constants import _XMAX from ._constants import _XMAX
from wafo.misc import lazyselect as _lazyselect
from wafo.misc import lazywhere as _lazywhere
_doc_default_example = """\ _doc_default_example = """\
Examples Examples
@ -328,7 +329,7 @@ def nlogps(self, theta, x):
return T return T
def _penalized_nnlf(self, theta, x): def _penalized_nnlf(self, theta, x, penalty=None):
''' Return negative loglikelihood function, ''' Return negative loglikelihood function,
i.e., - sum (log pdf(x, theta), axis=0) i.e., - sum (log pdf(x, theta), axis=0)
where theta are the parameters (including loc and scale) where theta are the parameters (including loc and scale)
@ -352,6 +353,7 @@ def _penalized_nnlf(self, theta, x):
Nbad += np.sum(~finite_logpdf, axis=0) Nbad += np.sum(~finite_logpdf, axis=0)
logscale = N * log(scale) logscale = N * log(scale)
if Nbad > 0: if Nbad > 0:
if penalty is None:
penalty = Nbad * log(_XMAX) * 100 penalty = Nbad * log(_XMAX) * 100
return -np.sum(logpdf[finite_logpdf], axis=0) + penalty + logscale return -np.sum(logpdf[finite_logpdf], axis=0) + penalty + logscale
return -np.sum(logpdf, axis=0) + logscale return -np.sum(logpdf, axis=0) + logscale
@ -597,6 +599,14 @@ def fit2(self, data, *args, **kwds):
return FitDistribution(self, data, args, **kwds) return FitDistribution(self, data, args, **kwds)
def _support_mask(self, x):
return (self.a <= x) & (x <= self.b)
def _open_support_mask(self, x):
return (self.a < x) & (x < self.b)
rv_generic.freeze = freeze rv_generic.freeze = freeze
rv_discrete.freeze = freeze rv_discrete.freeze = freeze
rv_continuous.freeze = freeze rv_continuous.freeze = freeze
@ -607,3 +617,5 @@ rv_continuous._penalized_nnlf = _penalized_nnlf
rv_continuous._reduce_func = _reduce_func rv_continuous._reduce_func = _reduce_func
rv_continuous.fit = fit rv_continuous.fit = fit
rv_continuous.fit2 = fit2 rv_continuous.fit2 = fit2
rv_continuous._support_mask = _support_mask
rv_continuous._open_support_mask = _open_support_mask

@ -188,8 +188,355 @@ def norm_ppf(q):
return special.ndtri(q) return special.ndtri(q)
# internal class to profile parameters of a given distribution
class Profile(object): class Profile(object):
''' Profile Log- likelihood or Product Spacing-function.
which can be used for constructing confidence interval for
phat[i].
Parameters
----------
fit_dist : FitDistribution object
with ML or MPS estimated distribution parameters.
i : scalar integer
defining which distribution parameter to keep fixed in the
profiling process (default first non-fixed parameter)
pmin, pmax : real scalars
Interval for either the parameter, phat[i] used in the
optimization of the profile function (default is based on the
100*(1-alpha)% confidence interval computed with the delta method.)
n : scalar integer
Max number of points used in Lp (default 100)
alpha : real scalar
confidence coefficent (default 0.05)
Returns
-------
Lp : Profile log-likelihood function with parameters phat given
the data and phat[i], i.e.,
Lp = max(log(f(phat|data,phat[i]))),
Member methods
-------------
plot() : Plot profile function with 100(1-alpha)% confidence interval
get_bounds() : Return 100(1-alpha)% confidence interval
Member variables
----------------
fit_dist : FitDistribution data object.
data : profile function values
args : profile function arguments
alpha : confidence coefficient
Lmax : Maximum value of profile function
alpha_cross_level :
PROFILE is a utility function for making inferences either on a particular
component of the vector phat or the quantile, x, or the probability, SF.
This is usually more accurate than using the delta method assuming
asymptotic normality of the ML estimator or the MPS estimator.
Examples
--------
# MLE
>>> import wafo.stats as ws
>>> R = ws.weibull_min.rvs(1,size=100);
>>> phat = FitDistribution(ws.weibull_min, R, 1, scale=1, floc=0.0)
# Better CI for phat.par[i=0]
>>> Lp = Profile(phat, i=0)
>>> Lp.plot()
>>> phat_ci = Lp.get_bounds(alpha=0.1)
'''
def __init__(self, fit_dist, i=None, pmin=None, pmax=None, n=100,
alpha=0.05):
method = fit_dist.method.lower()
self.fit_dist = fit_dist
self._set_indexes(fit_dist, i)
self.pmin = pmin
self.pmax = pmax
self.n = n
self.alpha = alpha
self.data = None
self.args = None
self.xlabel = 'phat[{}]'.format(np.ravel(self.i_fixed)[0])
like_txt = 'likelihood' if method == 'ml' else 'product spacing'
self.ylabel = 'Profile log' + like_txt
self.title = '{:g}% CI for {:s} params'.format(100*(1.0 - self.alpha),
fit_dist.dist.name)
Lmax = self._loglike_max(fit_dist, method)
self.Lmax = Lmax
self.alpha_Lrange = 0.5 * chi2isf(self.alpha, 1)
self.alpha_cross_level = Lmax - self.alpha_Lrange
phatv = fit_dist.par.copy()
self._set_profile(phatv)
def _loglike_max(self, fit_dist, method):
if method.startswith('ml'):
Lmax = fit_dist.LLmax
elif method.startswith('mps'):
Lmax = fit_dist.LPSmax
else:
raise ValueError(
"PROFILE is only valid for ML- or MPS- estimators")
return Lmax
def _set_indexes(self, fit_dist, i):
if i is None:
i = self._default_i_fixed(fit_dist)
self.i_fixed = np.atleast_1d(i)
isnotfixed = self._get_not_fixed_mask(fit_dist)
self.i_notfixed = nonzero(isnotfixed)
self._check_i_fixed()
isfree = isnotfixed
isfree[self.i_fixed] = False
self.i_free = nonzero(isfree)
def _default_i_fixed(self, fit_dist):
try:
i0 = 1 - np.isfinite(fit_dist.par_fix).argmax()
except:
i0 = 0
return i0
def _get_not_fixed_mask(self, fit_dist):
if fit_dist.par_fix is None:
isnotfixed = np.ones(fit_dist.par.shape, dtype=bool)
else:
isnotfixed = 1 - np.isfinite(fit_dist.par_fix)
return isnotfixed
def _check_i_fixed(self):
if self.i_fixed not in self.i_notfixed:
raise IndexError("Index i must be equal to an index to one of " +
"the free parameters.")
def _local_link(self, fix_par, par):
return fix_par
def _correct_Lmax(self, Lmax, free_par, fix_par):
if Lmax > self.Lmax: # foundNewphat = True
dL = self.Lmax - Lmax
self.alpha_cross_level -= dL
self.Lmax = Lmax
par = self._par.copy()
par[self.i_free] = free_par
par[self.i_fixed] = fix_par
self.best_par = par
self._par = par
warnings.warn(
'The fitted parameters does not provide the optimum fit. ' +
'Something wrong with fit (par = {})'.format(str(par)))
def _profile_optimum(self, phatfree0, p_opt):
phatfree = optimize.fmin(
self._profile_fun, phatfree0, args=(p_opt,), disp=0)
Lmax = -self._profile_fun(phatfree, p_opt)
self._correct_Lmax(Lmax, phatfree, p_opt)
return Lmax, phatfree
def _set_profile(self, phat):
self._par = phat.copy()
# Set up variable to profile and _local_link function
p_opt = self._par[self.i_fixed]
phatfree = self._par[self.i_free].copy()
pvec = self._get_pvec(phatfree, p_opt)
self.data = np.ones_like(pvec) * nan
k1 = (pvec >= p_opt).argmax()
for size, step in ((-1, -1), (pvec.size, 1)):
phatfree = self._par[self.i_free].copy()
for ix in range(k1, size, step):
Lmax, phatfree = self._profile_optimum(phatfree, pvec[ix])
self.data[ix] = Lmax
if ix != k1 and self.data[ix] < self.alpha_cross_level:
break
np.putmask(pvec, np.isnan(self.data), nan)
self.args = pvec
self._prettify_profile()
def _prettify_profile(self):
pvec = self.args
ix = nonzero(np.isfinite(pvec))
self.data = self.data[ix]
self.args = pvec[ix]
cond = self.data == -np.inf
if any(cond):
ind, = cond.nonzero()
self.data.put(ind, floatinfo.min / 2.0)
ind1 = np.where(ind == 0, ind, ind - 1)
cl = self.alpha_cross_level - self.alpha_Lrange / 2.0
t0 = ecross(self.args, self.data, ind1, cl)
self.data.put(ind, cl)
self.args.put(ind, t0)
def _get_variance(self):
return self.fit_dist.par_cov[self.i_fixed, :][:, self.i_fixed]
def _approx_p_min_max(self, p_opt):
pvar = self._get_variance()
if pvar <= 1e-5 or np.isnan(pvar):
pvar = max(abs(p_opt) * 0.5, 0.2)
pvar = max(pvar, 0.1)
p_crit = -norm_ppf(self.alpha / 2.0) * sqrt(np.ravel(pvar)) * 1.5
return p_opt - p_crit * 5, p_opt + p_crit * 5
def _p_min_max(self, phatfree0, p_opt):
p_low, p_up = self._approx_p_min_max(p_opt)
pmin, pmax = self.pmin, self.pmax
if pmin is None:
pmin = self._search_pmin(phatfree0, p_low, p_opt)
if pmax is None:
pmax = self._search_pmax(phatfree0, p_up, p_opt)
return pmin, pmax
def _adaptive_pvec(self, p_opt, pmin, pmax):
p_crit_low = (p_opt - pmin) / 5
p_crit_up = (pmax - p_opt) / 5
n4 = np.floor(self.n / 4.0)
a, b = p_opt - p_crit_low, p_opt + p_crit_up
pvec1 = np.linspace(pmin, a, n4 + 1)
pvec2 = np.linspace(a, b, self.n - 2 * n4)
pvec3 = np.linspace(b, pmax, n4 + 1)
pvec = np.unique(np.hstack((pvec1, p_opt, pvec2, pvec3)))
return pvec
def _get_pvec(self, phatfree0, p_opt):
''' return proper interval for the variable to profile
'''
if self.pmin is None or self.pmax is None:
pmin, pmax = self._p_min_max(phatfree0, p_opt)
return self._adaptive_pvec(p_opt, pmin, pmax)
return np.linspace(self.pmin, self.pmax, self.n)
def _search_pmin(self, phatfree0, p_min0, p_opt):
phatfree = phatfree0.copy()
dp = (p_opt - p_min0)/40
if dp < 1e-2:
dp = 0.1
p_min_opt = p_opt - dp
Lmax, phatfree = self._profile_optimum(phatfree, p_opt)
for _j in range(50):
p_min = p_opt - dp
Lmax, phatfree = self._profile_optimum(phatfree, p_min)
if np.isnan(Lmax):
dp *= 0.33
elif Lmax < self.alpha_cross_level - self.alpha_Lrange * 5:
p_min_opt = p_min
dp *= 0.33
elif Lmax < self.alpha_cross_level:
p_min_opt = p_min
break
else:
dp *= 1.67
return p_min_opt
def _search_pmax(self, phatfree0, p_max0, p_opt):
phatfree = phatfree0.copy()
dp = (p_max0 - p_opt)/40
if dp < 1e-2:
dp = 0.1
p_max_opt = p_opt + dp
Lmax, phatfree = self._profile_optimum(phatfree, p_opt)
for _j in range(50):
p_max = p_opt + dp
Lmax, phatfree = self._profile_optimum(phatfree, p_max)
if np.isnan(Lmax):
dp *= 0.33
elif Lmax < self.alpha_cross_level - self.alpha_Lrange * 2:
p_max_opt = p_max
dp *= 0.33
elif Lmax < self.alpha_cross_level:
p_max_opt = p_max
break
else:
dp *= 1.67
return p_max_opt
def _profile_fun(self, free_par, fix_par):
''' Return negative of loglike or logps function
free_par - vector of free parameters
fix_par - fixed parameter, i.e., either quantile (return level),
probability (return period) or distribution parameter
'''
par = self._par.copy()
par[self.i_free] = free_par
# _local_link: connects fixed quantile or probability with fixed
# distribution parameter
par[self.i_fixed] = self._local_link(fix_par, par)
return self.fit_dist.fitfun(par)
def get_bounds(self, alpha=0.05):
'''Return confidence interval for profiled parameter
'''
if alpha < self.alpha:
warnings.warn(
'Might not be able to return CI with alpha less than %g' %
self.alpha)
cross_level = self.Lmax - 0.5 * chi2isf(alpha, 1)
ind = findcross(self.data, cross_level)
N = len(ind)
if N == 0:
warnings.warn('''Number of crossings is zero, i.e.,
upper and lower bound is not found!''')
CI = (self.pmin, self.pmax)
elif N == 1:
x0 = ecross(self.args, self.data, ind, cross_level)
isUpcrossing = self.data[ind] > self.data[ind + 1]
if isUpcrossing:
CI = (x0, self.pmax)
warnings.warn('Upper bound is larger')
else:
CI = (self.pmin, x0)
warnings.warn('Lower bound is smaller')
elif N == 2:
CI = ecross(self.args, self.data, ind, cross_level)
else:
warnings.warn('Number of crossings too large! Something is wrong!')
CI = ecross(self.args, self.data, ind[[0, -1]], cross_level)
return CI
def plot(self, axis=None):
''' Plot profile function with 100(1-alpha)% CI
'''
if axis is None:
axis = plotbackend.gca()
p_ci = self.get_bounds(self.alpha)
axis.plot(
self.args, self.data,
p_ci, [self.Lmax, ] * 2, 'r--',
p_ci, [self.alpha_cross_level, ] * 2, 'r--')
ymax = self.Lmax + self.alpha_Lrange/10
ymin = self.alpha_cross_level - self.alpha_Lrange/10
axis.vlines(p_ci, ymin=ymin, ymax=self.Lmax,
color='r', linestyles='--')
if self.i_fixed is not None:
axis.vlines(self.fit_dist.par[self.i_fixed],
ymin=ymin, ymax=self.Lmax,
color='g', linestyles='--')
axis.set_title(self.title)
axis.set_ylabel(self.ylabel)
axis.set_xlabel(self.xlabel)
axis.set_ylim(ymin=ymin, ymax=ymax)
class ProfileOld(object):
''' Profile Log- likelihood or Product Spacing-function. ''' Profile Log- likelihood or Product Spacing-function.
which can be used for constructing confidence interval for which can be used for constructing confidence interval for
@ -276,6 +623,18 @@ class Profile(object):
>>> sf_ci = Lsf.get_bounds(alpha=0.2) >>> sf_ci = Lsf.get_bounds(alpha=0.2)
''' '''
def _get_not_fixed_mask(self, fit_dist):
if fit_dist.par_fix is None:
isnotfixed = np.ones(fit_dist.par.shape, dtype=bool)
else:
isnotfixed = 1 - np.isfinite(fit_dist.par_fix)
return isnotfixed
def _check_i_fixed(self):
if self.i_fixed not in self.i_notfixed:
raise IndexError("Index i must be equal to an index to one of " +
"the free parameters.")
def __init__(self, fit_dist, method=None, **kwds): def __init__(self, fit_dist, method=None, **kwds):
if method is None: if method is None:
method = fit_dist.method.lower() method = fit_dist.method.lower()
@ -307,19 +666,11 @@ class Profile(object):
raise ValueError( raise ValueError(
"PROFILE is only valid for ML- or MPS- estimators") "PROFILE is only valid for ML- or MPS- estimators")
if fit_dist.par_fix is None: isnotfixed = self._get_not_fixed_mask(fit_dist)
isnotfixed = np.ones(fit_dist.par.shape, dtype=bool)
else:
isnotfixed = 1 - np.isfinite(fit_dist.par_fix)
self.i_notfixed = nonzero(isnotfixed) self.i_notfixed = nonzero(isnotfixed)
self.i_fixed = atleast_1d(self.i_fixed) self.i_fixed = atleast_1d(self.i_fixed)
self._check_i_fixed()
if 1 - isnotfixed[self.i_fixed]:
raise IndexError(
"Index i must be equal to an index to one of the free " +
"parameters.")
isfree = isnotfixed isfree = isnotfixed
isfree[self.i_fixed] = False isfree[self.i_fixed] = False
@ -339,7 +690,7 @@ class Profile(object):
self.profile_par = not (self.profile_x or self.profile_logSF) self.profile_par = not (self.profile_x or self.profile_logSF)
if self.link is None: if self.link is None:
self.link = LINKS[self.fit_dist.dist.name] self.link = LINKS.get(self.fit_dist.dist.name)
if self.profile_par: if self.profile_par:
self._local_link = self._par_link self._local_link = self._par_link
self.xlabel = 'phat(%d)' % self.i_fixed self.xlabel = 'phat(%d)' % self.i_fixed
@ -373,20 +724,26 @@ class Profile(object):
def _logSF_link(self, fix_par, par): def _logSF_link(self, fix_par, par):
return self.link(self.x, fix_par, par, self.i_fixed) return self.link(self.x, fix_par, par, self.i_fixed)
def _correct_Lmax(self, Lmax): def _correct_Lmax(self, Lmax, free_par, fix_par):
if Lmax > self.Lmax: # foundNewphat = True if Lmax > self.Lmax: # foundNewphat = True
warnings.warn(
'The fitted parameters does not provide the optimum fit. ' +
'Something wrong with fit')
dL = self.Lmax - Lmax dL = self.Lmax - Lmax
self.alpha_cross_level -= dL self.alpha_cross_level -= dL
self.Lmax = Lmax self.Lmax = Lmax
par = self._par.copy()
par[self.i_free] = free_par
par[self.i_fixed] = fix_par
self.best_par = par
warnings.warn(
'The fitted parameters does not provide the optimum fit. ' +
'Something wrong with fit (par = {})'.format(str(par)))
def _profile_optimum(self, phatfree0, p_opt): def _profile_optimum(self, phatfree0, p_opt):
phatfree = optimize.fmin( phatfree = optimize.fmin(
self._profile_fun, phatfree0, args=(p_opt,), disp=0) self._profile_fun, phatfree0, args=(p_opt,), disp=0)
Lmax = -self._profile_fun(phatfree, p_opt) Lmax = -self._profile_fun(phatfree, p_opt)
self._correct_Lmax(Lmax) self._correct_Lmax(Lmax, phatfree, p_opt)
return Lmax, phatfree return Lmax, phatfree
def _set_profile(self, phatfree0, p_opt): def _set_profile(self, phatfree0, p_opt):
@ -400,7 +757,7 @@ class Profile(object):
for ix in range(k1, size, step): for ix in range(k1, size, step):
Lmax, phatfree = self._profile_optimum(phatfree, pvec[ix]) Lmax, phatfree = self._profile_optimum(phatfree, pvec[ix])
self.data[ix] = Lmax self.data[ix] = Lmax
if self.data[ix] < self.alpha_cross_level: if ix != k1 and self.data[ix] < self.alpha_cross_level:
break break
np.putmask(pvec, np.isnan(self.data), nan) np.putmask(pvec, np.isnan(self.data), nan)
self.args = pvec self.args = pvec
@ -443,13 +800,14 @@ class Profile(object):
''' return proper interval for the variable to profile ''' return proper interval for the variable to profile
''' '''
linspace = np.linspace
if self.pmin is None or self.pmax is None: if self.pmin is None or self.pmax is None:
pvar = self._get_variance() pvar = self._get_variance()
if pvar <= 1e-5 or np.isnan(pvar): if pvar <= 1e-5 or np.isnan(pvar):
pvar = max(abs(p_opt) * 0.5, 0.5) pvar = max(abs(p_opt) * 0.5, 0.2)
else:
pvar = max(pvar, 0.1)
p_crit = (-norm_ppf(self.alpha / 2.0) * p_crit = (-norm_ppf(self.alpha / 2.0) *
sqrt(np.ravel(pvar)) * 1.5) sqrt(np.ravel(pvar)) * 1.5)
@ -465,14 +823,14 @@ class Profile(object):
N4 = np.floor(self.N / 4.0) N4 = np.floor(self.N / 4.0)
pvec1 = linspace(self.pmin, p_opt - p_crit_low, N4 + 1) pvec1 = np.linspace(self.pmin, p_opt - p_crit_low, N4 + 1)
pvec2 = linspace( pvec2 = np.linspace(p_opt - p_crit_low, p_opt + p_crit_up,
p_opt - p_crit_low, p_opt + p_crit_up, self.N - 2 * N4) self.N - 2 * N4)
pvec3 = linspace(p_opt + p_crit_up, self.pmax, N4 + 1) pvec3 = np.linspace(p_opt + p_crit_up, self.pmax, N4 + 1)
pvec = np.unique(np.hstack((pvec1, p_opt, pvec2, pvec3))) pvec = np.unique(np.hstack((pvec1, p_opt, pvec2, pvec3)))
else: else:
pvec = linspace(self.pmin, self.pmax, self.N) pvec = np.linspace(self.pmin, self.pmax, self.N)
return pvec return pvec
def _search_pmin(self, phatfree0, p_min0, p_opt): def _search_pmin(self, phatfree0, p_min0, p_opt):
@ -488,7 +846,7 @@ class Profile(object):
Lmax, phatfree = self._profile_optimum(phatfree, p_min) Lmax, phatfree = self._profile_optimum(phatfree, p_min)
if np.isnan(Lmax): if np.isnan(Lmax):
dp *= 0.33 dp *= 0.33
elif Lmax < self.alpha_cross_level - self.alpha_Lrange * 2: elif Lmax < self.alpha_cross_level - self.alpha_Lrange * 5:
p_min_opt = p_min p_min_opt = p_min
dp *= 0.33 dp *= 0.33
elif Lmax < self.alpha_cross_level: elif Lmax < self.alpha_cross_level:
@ -588,14 +946,20 @@ class Profile(object):
p_ci = self.get_bounds(self.alpha) p_ci = self.get_bounds(self.alpha)
axis.plot( axis.plot(
self.args, self.data, self.args, self.data,
self.args[[0, -1]], [self.Lmax, ] * 2, 'r--', p_ci, [self.Lmax, ] * 2, 'r--',
self.args[[0, -1]], [self.alpha_cross_level, ] * 2, 'r--') p_ci, [self.alpha_cross_level, ] * 2, 'r--')
axis.vlines(p_ci, ymin=axis.get_ylim()[0], ymax = self.Lmax + self.alpha_Lrange/10
ymax=self.Lmax, # self.alpha_cross_level, ymin = self.alpha_cross_level - self.alpha_Lrange/10
axis.vlines(p_ci, ymin=ymin, ymax=self.Lmax,
color='r', linestyles='--') color='r', linestyles='--')
if self.i_fixed is not None:
axis.vlines(self.fit_dist.par[self.i_fixed],
ymin=ymin, ymax=self.Lmax,
color='g', linestyles='--')
axis.set_title(self.title) axis.set_title(self.title)
axis.set_ylabel(self.ylabel) axis.set_ylabel(self.ylabel)
axis.set_xlabel(self.xlabel) axis.set_xlabel(self.xlabel)
axis.set_ylim(ymin=ymin, ymax=ymax)
class FitDistribution(rv_frozen): class FitDistribution(rv_frozen):
@ -857,7 +1221,7 @@ class FitDistribution(rv_frozen):
(including loc and scale) (including loc and scale)
''' '''
if eps is None: if eps is None:
eps = (_EPS) ** 0.4 eps = (_EPS) ** 0.25
num_par = len(theta) num_par = len(theta)
# pab 07.01.2001: Always choose the stepsize h so that # pab 07.01.2001: Always choose the stepsize h so that
# it is an exactly representable number. # it is an exactly representable number.
@ -901,7 +1265,7 @@ class FitDistribution(rv_frozen):
return -H return -H
def _nnlf(self, theta, x): def _nnlf(self, theta, x):
return self.dist._penalized_nnlf(theta, x) return self.dist._penalized_nnlf(theta, x, self._penalty)
def _nlogps(self, theta, x): def _nlogps(self, theta, x):
""" Moran's negative log Product Spacings statistic """ Moran's negative log Product Spacings statistic
@ -974,27 +1338,28 @@ class FitDistribution(rv_frozen):
nonfiniteD = 1 - finiteD nonfiniteD = 1 - finiteD
Nbad += np.sum(nonfiniteD, axis=0) Nbad += np.sum(nonfiniteD, axis=0)
if Nbad > 0: if Nbad > 0:
T = -np.sum(logD[finiteD], axis=0) + 100.0 * log(_XMAX) * Nbad if self._penalty is None:
penalty = 100.0 * log(_XMAX) * Nbad
else: else:
T = -np.sum(logD, axis=0) penalty = 0.0
return T return -np.sum(logD[finiteD], axis=0) + penalty
return -np.sum(logD, axis=0)
def _fit(self, *args, **kwds): def _fit(self, *args, **kwds):
self._penalty = None
dist = self.dist dist = self.dist
data = self.data data = self.data
Narg = len(args) Narg = len(args)
if Narg > dist.numargs: if Narg > dist.numargs + 2:
raise ValueError("Too many input arguments.") raise ValueError("Too many input arguments.")
start = [None] * 2 if (Narg < dist.numargs + 2) or not ('loc' in kwds and 'scale' in kwds):
if (Narg < dist.numargs) or not ('loc' in kwds and 'scale' in kwds):
# get distribution specific starting locations # get distribution specific starting locations
start = dist._fitstart(data) start = dist._fitstart(data)
args += start[Narg:-2] args += start[Narg:]
loc = kwds.pop('loc', start[-2]) loc = kwds.pop('loc', args[-2])
scale = kwds.pop('scale', start[-1]) scale = kwds.pop('scale', args[-1])
args += (loc, scale) args = args[:-2] + (loc, scale)
x0, func, restore, args, fixedn = self._reduce_func(args, kwds) x0, func, restore, args, fixedn = self._reduce_func(args, kwds)
if self.search: if self.search:
optimizer = kwds.pop('optimizer', optimize.fmin) optimizer = kwds.pop('optimizer', optimize.fmin)
@ -1011,8 +1376,24 @@ class FitDistribution(rv_frozen):
# by now kwds must be empty, since everybody took what they needed # by now kwds must be empty, since everybody took what they needed
if kwds: if kwds:
raise TypeError("Unknown arguments: %s." % kwds) raise TypeError("Unknown arguments: %s." % kwds)
vals = optimizer(func, x0, args=(np.ravel(data),), disp=0) output = optimizer(func, x0, args=(data,), full_output=True)
vals = tuple(vals)
# output = optimize.fmin_bfgs(func, vals, args=(data,), full_output=True)
# dfunc = nd.Gradient(func)(vals, data)
# nd.directionaldiff(f, x0, vec)
warnflag = output[-1]
if warnflag == 1:
output = optimizer(func, output[0], args=(data,), full_output=True)
warnflag = output[-1]
vals = tuple(output[0])
if warnflag == 1:
warnings.warn("The maximum number of iterations was exceeded.")
elif warnflag == 2:
warnings.warn("Did not converge")
else:
vals = tuple(output[0])
else: else:
vals = tuple(x0) vals = tuple(x0)
if restore is not None: if restore is not None:
@ -1022,8 +1403,12 @@ class FitDistribution(rv_frozen):
def _compute_cov(self): def _compute_cov(self):
'''Compute covariance '''Compute covariance
''' '''
# self._penalty = 0
somefixed = (self.par_fix is not None) and any(isfinite(self.par_fix)) somefixed = (self.par_fix is not None) and any(isfinite(self.par_fix))
H = np.asmatrix(self._hessian(self._fitfun, self.par, self.data)) H = np.asmatrix(self._hessian(self._fitfun, self.par, self.data))
# H = -nd.Hessian(lambda par: self._fitfun(par, self.data),
# method='forward')(self.par)
self.H = H self.H = H
try: try:
if somefixed: if somefixed:
@ -1153,6 +1538,7 @@ class FitDistribution(rv_frozen):
plotbackend.subplot(2, 2, 4) plotbackend.subplot(2, 2, 4)
self.plotresprb() self.plotresprb()
plotbackend.subplots_adjust(hspace=0.4, wspace=0.4)
fixstr = '' fixstr = ''
if self.par_fix is not None: if self.par_fix is not None:
numfix = len(self.i_fixed) numfix = len(self.i_fixed)
@ -1161,12 +1547,16 @@ class FitDistribution(rv_frozen):
format1 = ', '.join(['%g'] * numfix) format1 = ', '.join(['%g'] * numfix)
phatistr = format0 % tuple(self.i_fixed) phatistr = format0 % tuple(self.i_fixed)
phatvstr = format1 % tuple(self.par[self.i_fixed]) phatvstr = format1 % tuple(self.par[self.i_fixed])
fixstr = 'Fixed: phat[%s] = %s ' % (phatistr, phatvstr) fixstr = 'Fixed: phat[{0:s}] = {1:s} '.format(phatistr,
phatvstr)
infostr = 'Fit method: %s, Fit p-value: %2.2f %s' % ( infostr = 'Fit method: {0:s}, Fit p-value: {1:2.2f} {2:s}, phat=[{3:s}]'
self.method.upper(), self.pvalue, fixstr) par_txt = ('{:1.2g}, '*len(self.par))[:-2].format(*self.par)
try: try:
plotbackend.figtext(0.05, 0.01, infostr) plotbackend.figtext(0.05, 0.01, infostr.format(self.method.upper(),
self.pvalue,
fixstr,
par_txt))
except: except:
pass pass
@ -1339,14 +1729,68 @@ def test1():
import wafo.stats as ws import wafo.stats as ws
dist = ws.weibull_min dist = ws.weibull_min
# dist = ws.bradford # dist = ws.bradford
R = dist.rvs(0.3, size=1000) dist = ws.gengamma
R = dist.rvs(2,.5, size=500)
phat = FitDistribution(dist, R, method='ml') phat = FitDistribution(dist, R, method='ml')
phats = FitDistribution(dist, R, method='mps')
import matplotlib.pyplot as plt
# Better CI for phat.par[i=0] # Better CI for phat.par[i=0]
Lp1 = Profile(phat, i=0) # @UnusedVariable plt.figure(0)
N = dist.numargs + 2
for i in range(N-1, -1, -1):
plt.subplot(N, 1, i+1)
pmax = None
if i == 0:
pmax = None
Lp1 = Profile(phats, i=i)
j = 0
while hasattr(Lp1, 'best_par') and j < 7:
phats = FitDistribution(dist, R, args=Lp1.best_par,
method='mps', search=True)
Lp1 = Profile(phats, i=i, pmax=pmax)
j += 1
Lp1.plot() Lp1.plot()
import matplotlib.pyplot as plt if i != 0:
plt.show() plt.title('')
if i != N//2:
plt.ylabel('')
plt.subplots_adjust(hspace=0.5)
par_txt = ('{:1.2g}, '*len(phats.par))[:-2].format(*phats.par)
plt.suptitle('phat = [{}]'.format(par_txt))
plt.figure(1)
phats.plotfitsummary()
plt.figure(2)
for i in range(N-1, -1, -1):
plt.subplot(N, 1, i+1)
pmax = None
if i == 0:
pmax = None
Lp1 = Profile(phat, i=i, pmax=pmax) # @UnusedVariable
j = 0
while hasattr(Lp1, 'best_par') and j < 7:
phats = FitDistribution(dist, R, args=Lp1.best_par,
method='ml', search=True)
Lp1 = Profile(phat, i=i) # @UnusedVariable
j += 1
Lp1.plot()
if i != 0:
plt.title('')
if i != N//2:
plt.ylabel('')
par_txt = ('{:1.2g}, '*len(phat.par))[:-2].format(*phat.par)
plt.suptitle('phat = [{}]'.format(par_txt))
plt.subplots_adjust(hspace=0.5)
plt.figure(3)
phat.plotfitsummary()
plt.show('hold')
# Lp2 = Profile(phat, i=2) # Lp2 = Profile(phat, i=2)
# SF = 1./990 # SF = 1./990
# x = phat.isf(SF) # x = phat.isf(SF)

Loading…
Cancel
Save