From aeed49139b8ad652ef34eeed4b2b2e36b3fd8801 Mon Sep 17 00:00:00 2001 From: Per A Brodtkorb Date: Mon, 6 Jun 2016 19:35:52 +0200 Subject: [PATCH 1/4] Upated wafo.stats --- wafo/stats/_continuous_distns.py | 143 +++++++++++++++++++++++-------- 1 file changed, 105 insertions(+), 38 deletions(-) diff --git a/wafo/stats/_continuous_distns.py b/wafo/stats/_continuous_distns.py index 504ce8e..8e6687b 100644 --- a/wafo/stats/_continuous_distns.py +++ b/wafo/stats/_continuous_distns.py @@ -13,7 +13,8 @@ from scipy import optimize from scipy import integrate from scipy.special import (gammaln as gamln, gamma as gam, boxcox, boxcox1p, inv_boxcox, inv_boxcox1p, erfc, chndtr, chndtrix, - log1p, expm1) + log1p, expm1, + i0, i1, ndtr as _norm_cdf, log_ndtr as _norm_logcdf) from numpy import (where, arange, putmask, ravel, shape, log, sqrt, exp, arctanh, tan, sin, arcsin, arctan, @@ -32,9 +33,8 @@ from scipy.stats._tukeylambda_stats import (tukeylambda_variance as _tlvar, tukeylambda_kurtosis as _tlkurt) from ._distn_infrastructure import ( - rv_continuous, valarray, _skew, _kurtosis, # @UnresolvedImport - _lazywhere, _ncx2_log_pdf, _ncx2_pdf, _ncx2_cdf, # @UnresolvedImport - get_distribution_names, # @UnresolvedImport + rv_continuous, valarray, _skew, _kurtosis, _lazywhere, + _ncx2_log_pdf, _ncx2_pdf, _ncx2_cdf, get_distribution_names, ) from ._constants import _XMIN, _EULER, _ZETA3, _XMAX, _LOGXMAX, _EPS @@ -89,24 +89,16 @@ def _norm_logpdf(x): return -x**2 / 2.0 - _norm_pdf_logC -def _norm_cdf(x): - return special.ndtr(x) - - -def _norm_logcdf(x): - return special.log_ndtr(x) - - def _norm_ppf(q): return special.ndtri(q) def _norm_sf(x): - return special.ndtr(-x) + return _norm_cdf(-x) def _norm_logsf(x): - return special.log_ndtr(-x) + return _norm_logcdf(-x) def _norm_isf(q): @@ -127,6 +119,10 @@ class norm_gen(rv_continuous): norm.pdf(x) = exp(-x**2/2)/sqrt(2*pi) + The survival function, ``norm.sf``, is also referred to as the + Q-function in some contexts (see, e.g., + `Wikipedia's `_ definition). + %(after_notes)s %(example)s @@ -220,13 +216,13 @@ class alpha_gen(rv_continuous): """ def _pdf(self, x, a): - return 1.0/(x**2)/special.ndtr(a)*_norm_pdf(a-1.0/x) + return 1.0/(x**2)/_norm_cdf(a)*_norm_pdf(a-1.0/x) def _logpdf(self, x, a): - return -2*log(x) + _norm_logpdf(a-1.0/x) - log(special.ndtr(a)) + return -2*log(x) + _norm_logpdf(a-1.0/x) - log(_norm_cdf(a)) def _cdf(self, x, a): - return special.ndtr(a-1.0/x) / special.ndtr(a) + return _norm_cdf(a-1.0/x) / _norm_cdf(a) def _ppf(self, q, a): return 1.0/asarray(a-special.ndtri(q*special.ndtr(a))) @@ -1221,12 +1217,12 @@ class exponnorm_gen(rv_continuous): def _cdf(self, x, K): invK = 1.0 / K expval = invK * (0.5 * invK - x) - return special.ndtr(x) - exp(expval) * special.ndtr(x - invK) + return _norm_cdf(x) - exp(expval) * _norm_cdf(x - invK) def _sf(self, x, K): invK = 1.0 / K expval = invK * (0.5 * invK - x) - return special.ndtr(-x) + exp(expval) * special.ndtr(x - invK) + return _norm_cdf(-x) + exp(expval) * _norm_cdf(x - invK) def _stats(self, K): K2 = K * K @@ -1366,7 +1362,7 @@ class fatiguelife_gen(rv_continuous): 0.5*(log(2*pi) + 3*log(x))) def _cdf(self, x, c): - return special.ndtr(1.0 / c * (sqrt(x) - 1.0/sqrt(x))) + return _norm_cdf(1.0 / c * (sqrt(x) - 1.0/sqrt(x))) def _ppf(self, q, c): tmp = c*special.ndtri(q) @@ -1545,7 +1541,7 @@ class foldnorm_gen(rv_continuous): return _norm_pdf(x + c) + _norm_pdf(x-c) def _cdf(self, x, c): - return special.ndtr(x-c) + special.ndtr(x+c) - 1.0 + return _norm_cdf(x-c) + _norm_cdf(x+c) - 1.0 def _stats(self, c): # Regina C. Elandt, Technometrics 3, 551 (1961) @@ -2061,6 +2057,13 @@ class genextreme_gen(rv_continuous): return _lazywhere((x == x) & (c != 0), (x, c), lambda x, c: -expm1(-c * x) / c, x) + def _isf(self, q, c): + x = -log(-special.log1p(-q)) + result = _lazywhere((c == 0)*(x == x), (x, c), + f=lambda x, c: x, + f2=lambda x, c: -special.expm1(-c*x)/c) + return result + def _stats(self, c): g = lambda n: gam(n*c+1) g1 = g(1) @@ -2087,16 +2090,6 @@ class genextreme_gen(rv_continuous): ku = where(abs(c) <= (eps)**0.23, 12.0/5.0, ku1-3.0) return m, v, sk, ku - def _munp(self, n, c): - k = arange(0, n+1) - vals = 1.0/c**n * np.sum( - comb(n, k) * (-1)**k * special.gamma(c*k + 1), - axis=0) - return where(c*n > -1, vals, inf) - - def _entropy(self, c): - return _EULER*(1 - c) + 1 - def _fitstart(self, data): d = asarray(data) # Probability weighted moments @@ -2114,6 +2107,17 @@ class genextreme_gen(rv_continuous): (exp(gamln(1 + shape)) * (1 - 2 ** (-shape))) loc = b0 + scale * (expm1(gamln(1 + shape))) / shape return shape, loc, scale + + def _munp(self, n, c): + k = arange(0, n+1) + vals = 1.0/c**n * np.sum( + comb(n, k) * (-1)**k * special.gamma(c*k + 1), + axis=0) + return where(c*n > -1, vals, inf) + + def _entropy(self, c): + return _EULER*(1 - c) + 1 + genextreme = genextreme_gen(name='genextreme') @@ -2695,7 +2699,7 @@ class halfnorm_gen(rv_continuous): return 0.5 * np.log(2.0/pi) - x*x/2.0 def _cdf(self, x): - return special.ndtr(x)*2-1.0 + return _norm_cdf(x)*2-1.0 def _ppf(self, q): return special.ndtri((1+q)/2.0) @@ -2852,7 +2856,7 @@ class invgauss_gen(rv_continuous): %(after_notes)s - When `mu` is too small, evaluating the cumulative density function will be + When `mu` is too small, evaluating the cumulative distribution function will be inaccurate due to ``cdf(mu -> 0) = inf * 0``. NaNs are returned for ``mu <= 0.0028``. @@ -3348,9 +3352,18 @@ class lognorm_gen(rv_continuous): def _cdf(self, x, s): return _norm_cdf(log(x) / s) + def _logcdf(self, x, s): + return _norm_logcdf(log(x) / s) + def _ppf(self, q, s): return exp(s * _norm_ppf(q)) + def _sf(self, x, s): + return _norm_sf(log(x) / s) + + def _logsf(self, x, s): + return _norm_logsf(log(x) / s) + def _stats(self, s): p = exp(s*s) mu = sqrt(p) @@ -4546,13 +4559,13 @@ class skew_norm_gen(rv_continuous): Notes ----- - The pdf is + The pdf is:: - skewnorm.pdf(x, a) = 2*norm.pdf(x)*norm.cdf(ax) + skewnorm.pdf(x, a) = 2*norm.pdf(x)*norm.cdf(ax) `skewnorm` takes ``a`` as a skewness parameter When a=0 the distribution is identical to a normal distribution. - rvs implements the method of [1]. + rvs implements the method of [1]_. %(after_notes)s @@ -4562,7 +4575,7 @@ class skew_norm_gen(rv_continuous): References ---------- - [1] A. Azzalini and A. Capitanio (1999). Statistical applications of the + .. [1] A. Azzalini and A. Capitanio (1999). Statistical applications of the multivariate skew-normal distribution. J. Roy. Statist. Soc., B 61, 579-602. http://azzalini.stat.unipd.it/SN/faq-r.html """ @@ -4597,6 +4610,56 @@ class skew_norm_gen(rv_continuous): skewnorm = skew_norm_gen(name='skewnorm') + +class trapz_gen(rv_continuous): + """A trapezoidal continuous random variable. + + %(before_notes)s + + Notes + ----- + The trapezoidal distribution can be represented with an up-sloping line + from ``loc`` to ``(loc + c*scale)``, then constant to ``(loc + d*scale)`` + and then downsloping from ``(loc + d*scale)`` to ``(loc+scale)``. + + `trapz` takes ``c`` and ``d`` as shape parameters. + + %(after_notes)s + + The standard form is in the range [0, 1] with c the mode. + The location parameter shifts the start to `loc`. + The scale parameter changes the width from 1 to `scale`. + + %(example)s + + """ + def _argcheck(self, c, d): + return (c >= 0) & (c <= 1) & (d >= 0) & (d <= 1) & (d >= c) + + def _pdf(self, x, c, d): + u = 2 / (d - c + 1) + + condlist = [x < c, x <= d, x > d] + choicelist = [u * x / c, u, u * (1 - x) / (1 - d)] + return np.select(condlist, choicelist) + + def _cdf(self, x, c, d): + condlist = [x < c, x <= d, x > d] + choicelist = [x**2 / c / (d - c + 1), + (c + 2 * (x - c)) / (d - c + 1), + 1 - ((1 - x)**2 / (d - c + 1) / (1 - d))] + return np.select(condlist, choicelist) + + def _ppf(self, q, c, d): + qc, qd = self._cdf(c, c, d), self._cdf(d, c, d) + condlist = [q < qc, q <= qd, q > qd] + choicelist = [np.sqrt(q * c * (1 + d - c)), + 0.5 * q * (1 + d - c) + 0.5 * c, + 1 - sqrt((1 - q) * (d - c + 1) * (1 - d))] + return np.select(condlist, choicelist) +trapz = trapz_gen(a=0.0, b=1.0, name="trapz") + + class triang_gen(rv_continuous): """A triangular continuous random variable. @@ -4868,13 +4931,17 @@ class vonmises_gen(rv_continuous): return self._random_state.vonmises(0.0, kappa, size=self._size) def _pdf(self, x, kappa): - return exp(kappa * cos(x)) / (2*pi*special.i0(kappa)) + return exp(kappa * cos(x)) / (2*pi*i0(kappa)) def _cdf(self, x, kappa): return vonmises_cython.von_mises_cdf(kappa, x) def _stats_skip(self, kappa): return 0, None, 0, None + + def _entropy(self, kappa): + return (-kappa * i1(kappa) / i0(kappa) + + np.log(2 * np.pi * i0(kappa))) vonmises = vonmises_gen(name='vonmises') vonmises_line = vonmises_gen(a=-np.pi, b=np.pi, name='vonmises_line') From 0750bdd94b6ceceb351833caba3766a60d23b335 Mon Sep 17 00:00:00 2001 From: Per A Brodtkorb Date: Tue, 7 Jun 2016 16:06:09 +0200 Subject: [PATCH 2/4] Removed runtime warnings --- wafo/stats/_continuous_distns.py | 39 +++++++++++++------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/wafo/stats/_continuous_distns.py b/wafo/stats/_continuous_distns.py index 8e6687b..93ea6de 100644 --- a/wafo/stats/_continuous_distns.py +++ b/wafo/stats/_continuous_distns.py @@ -601,13 +601,13 @@ class bradford_gen(rv_continuous): """ def _pdf(self, x, c): - return c / (c * x + 1.0) / log1p(c) + return c * exp(-log1p(c * x)) / log1p(c) def _cdf(self, x, c): return log1p(c * x) / log1p(c) def _ppf(self, q, c): - return ((1.0+c)**q-1)/c + return expm1(q * log1p(c))/c # ((1.0+c)**q-1)/c def _stats(self, c, moments='mv'): k = log1p(c) @@ -1732,14 +1732,6 @@ class genlogistic_gen(rv_continuous): genlogistic = genlogistic_gen(name='genlogistic') -def log1pxdx(x): - '''Computes Log(1+x)/x - ''' - xd = where((x == 0) | (x == inf), 1.0, x) # avoid 0/0 or inf/inf - y = where(x == 0, 1.0, log1p(x) / xd) - return where(x == inf, 0.0, y) - - class genpareto_gen(rv_continuous): """A generalized Pareto continuous random variable. @@ -2026,28 +2018,26 @@ class genextreme_gen(rv_continuous): return exp(self._logpdf(x, c)) def _logpdf(self, x, c): - x1 = where((c == 0) & (x == inf), 0.0, x) - cx = c * x1 - cond1 = (c == 0) * (x == x) - logex2 = where(cond1, 0.0, log1p(-cx)) - logpex2 = -x * log1pxdx(-cx) - # logpex2 = where(cond1,-x, logex2/c) + cx = c*x + logex2 = where((c == 0)*(x == x), 0.0, special.log1p(-cx)) + logpex2 = self._loglogcdf(x, c) + # logpex2 = where((c == 0)*(x == x), -x, logex2/c) pex2 = exp(logpex2) # Handle special cases - logpdf = where( - (cx == 1) | (cx == -inf), -inf, -pex2 + logpex2 - logex2) + logpdf = where((cx == 1) | (cx == -inf), -inf, -pex2+logpex2-logex2) putmask(logpdf, (c == 1) & (x == 1), 0.0) return logpdf def _cdf(self, x, c): return exp(self._logcdf(x, c)) + def _loglogcdf(self, x, c): + return _lazywhere((c == 0)*(x == x), (x, c), + f=lambda x, c: -x, + f2=lambda x, c: special.log1p(-c*x)/c) + def _logcdf(self, x, c): - x1 = where((c == 0) & (x == inf), 0.0, x) - cx = c * x1 - loglogcdf = -x * log1pxdx(-cx) - # loglogcdf = where((c==0)*(x==x),-x,log1p(-cx)/c) - return -exp(loglogcdf) + return -exp(self._loglogcdf(x, c)) def _sf(self, x, c): return -expm1(self._logcdf(x, c)) @@ -2570,6 +2560,9 @@ class gumbel_l_gen(rv_continuous): def _logpdf(self, x): return x - exp(x) + def _sf(self, x): + return exp(-exp(x)) + def _cdf(self, x): return -expm1(-exp(x)) From 59186edfd6fc3a71b999a9f87113379331ebaaf0 Mon Sep 17 00:00:00 2001 From: Per A Brodtkorb Date: Wed, 8 Jun 2016 16:00:55 +0200 Subject: [PATCH 3/4] Improved accuracy and avoids some runtime warnings. --- wafo/stats/_continuous_distns.py | 39 +++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/wafo/stats/_continuous_distns.py b/wafo/stats/_continuous_distns.py index 93ea6de..cdce229 100644 --- a/wafo/stats/_continuous_distns.py +++ b/wafo/stats/_continuous_distns.py @@ -32,12 +32,12 @@ except: from scipy.stats._tukeylambda_stats import (tukeylambda_variance as _tlvar, tukeylambda_kurtosis as _tlkurt) -from ._distn_infrastructure import ( +from wafo.stats._distn_infrastructure import ( rv_continuous, valarray, _skew, _kurtosis, _lazywhere, _ncx2_log_pdf, _ncx2_pdf, _ncx2_cdf, get_distribution_names, ) -from ._constants import _XMIN, _EULER, _ZETA3, _XMAX, _LOGXMAX, _EPS +from wafo.stats._constants import _XMIN, _EULER, _ZETA3, _XMAX, _LOGXMAX, _EPS ## Kolmogorov-Smirnov one-sided and two-sided test statistics @@ -2018,12 +2018,12 @@ class genextreme_gen(rv_continuous): return exp(self._logpdf(x, c)) def _logpdf(self, x, c): - cx = c*x - logex2 = where((c == 0)*(x == x), 0.0, special.log1p(-cx)) + cx = _lazywhere((c != 0)*(x == x), (x, c), lambda x, c: c*x, 0.0) + logex2 = special.log1p(-cx) logpex2 = self._loglogcdf(x, c) - # logpex2 = where((c == 0)*(x == x), -x, logex2/c) pex2 = exp(logpex2) # Handle special cases + putmask(logpex2, (c == 0) & (x == -inf), 0.0) logpdf = where((cx == 1) | (cx == -inf), -inf, -pex2+logpex2-logex2) putmask(logpdf, (c == 1) & (x == 1), 0.0) return logpdf @@ -2032,9 +2032,8 @@ class genextreme_gen(rv_continuous): return exp(self._logcdf(x, c)) def _loglogcdf(self, x, c): - return _lazywhere((c == 0)*(x == x), (x, c), - f=lambda x, c: -x, - f2=lambda x, c: special.log1p(-c*x)/c) + return _lazywhere((c != 0) & (x == x), (x, c), + lambda x, c: special.log1p(-c * x)/c, -x) def _logcdf(self, x, c): return -exp(self._loglogcdf(x, c)) @@ -2049,10 +2048,8 @@ class genextreme_gen(rv_continuous): def _isf(self, q, c): x = -log(-special.log1p(-q)) - result = _lazywhere((c == 0)*(x == x), (x, c), - f=lambda x, c: x, - f2=lambda x, c: -special.expm1(-c*x)/c) - return result + return _lazywhere((x == x) & (c != 0), (x, c), + lambda x, c: -expm1(-c * x) / c, x) def _stats(self, c): g = lambda n: gam(n*c+1) @@ -2560,6 +2557,9 @@ class gumbel_l_gen(rv_continuous): def _logpdf(self, x): return x - exp(x) + def _logsf(self, x): + return -exp(x) + def _sf(self, x): return exp(-exp(x)) @@ -3203,6 +3203,9 @@ class logistic_gen(rv_continuous): def _ppf(self, q): return -log1p(-q) + log(q) + def _isf(self, q): + return log1p(-q) - log(q) + def _stats(self): return 0, pi*pi/3.0, 0, 6.0/5.0 @@ -4262,7 +4265,8 @@ class rayleigh_gen(rv_continuous): def _logpdf(self, r): rr2 = r * r / 2.0 - return where(rr2 == inf, - rr2, log(r) - rr2) + return _lazywhere(rr2 != inf, (r, rr2), lambda r, rr2: log(r) - rr2, + -rr2) def _cdf(self, r): return -special.expm1(-0.5 * r**2) @@ -4273,6 +4277,9 @@ class rayleigh_gen(rv_continuous): def _sf(self, r): return exp(-0.5 * r**2) + def _logsf(self, r): + return -0.5 * r**2 + def _isf(self, q): return sqrt(-2 * log(q)) @@ -5158,3 +5165,9 @@ pairs = list(globals().items()) _distn_names, _distn_gen_names = get_distribution_names(pairs, rv_continuous) __all__ = _distn_names + _distn_gen_names + + +if __name__=='__main__': + v = genextreme.logpdf(np.inf, 0) + v2 = genextreme.logpdf(-np.inf, 0) + v2 = genextreme.logpdf(-100, 0) From f8d1afb93b2ea1d0c9418b735585782a86a945e2 Mon Sep 17 00:00:00 2001 From: Per A Brodtkorb Date: Thu, 9 Jun 2016 15:25:54 +0200 Subject: [PATCH 4/4] Moved all links to estimation.py --- wafo/stats/_continuous_distns.py | 141 +------------------------------ wafo/stats/estimation.py | 106 +++++++++++++++++++++-- 2 files changed, 104 insertions(+), 143 deletions(-) diff --git a/wafo/stats/_continuous_distns.py b/wafo/stats/_continuous_distns.py index cdce229..dc7980d 100644 --- a/wafo/stats/_continuous_distns.py +++ b/wafo/stats/_continuous_distns.py @@ -1120,15 +1120,6 @@ class expon_gen(rv_continuous): %(example)s """ - - def _link(self, x, logSF, phat, ix): - if ix == 1: - return - (x - phat[0]) / logSF - elif ix == 0: - return x + phat[1] * logSF - else: - raise IndexError('Index to the fixed parameter is out of bounds') - def _rvs(self): return self._random_state.standard_exponential(self._size) @@ -1593,17 +1584,6 @@ class frechet_r_gen(rv_continuous): %(example)s """ - def _link(self, x, logSF, phat, ix): - if ix == 0: - phati = log(-logSF) / log((x - phat[1]) / phat[2]) - elif ix == 1: - phati = x - phat[2] * (-logSF) ** (1. / phat[0]) - elif ix == 2: - phati = (x - phat[1]) / (-logSF) ** (1. / phat[0]) - else: - raise IndexError('Index to the fixed parameter is out of bounds') - return phati - def _pdf(self, x, c): return c*pow(x, c-1)*exp(-pow(x, c)) @@ -1762,33 +1742,6 @@ class genpareto_gen(rv_continuous): %(example)s """ - - def _link(self, x, logSF, phat, ix): - # Reference - # Stuart Coles (2004) - # "An introduction to statistical modelling of extreme values". - # Springer series in statistics - c, loc, scale = phat - if ix == 2: - # Reorganizing w.r.t.scale, Eq. 4.13 and 4.14, pp 81 in - # Coles (2004) gives - # link = -(x-loc)*c/expm1(-c*logSF) - if c != 0.0: - phati = (x - loc) * c / expm1(-c * logSF) - else: - phati = -(x - loc) / logSF - elif ix == 1: - if c != 0: - phati = x + scale * expm1(c * logSF) / c - else: - phati = x + scale * logSF - elif ix == 0: - raise NotImplementedError( - 'link(x,logSF,phat,i) where i=0 is not implemented!') - else: - raise IndexError('Index to the fixed parameter is out of bounds') - return phati - def _argcheck(self, c): c = asarray(c) self.b = _lazywhere(c < 0, (c,), @@ -1832,57 +1785,6 @@ class genpareto_gen(rv_continuous): scale = m * ((m / s) ** 2 + 1) / 2 return shape, loc, scale - def hessian_nnlf(self, theta, x, eps=None): - try: - loc = theta[-2] - scale = theta[-1] - args = tuple(theta[:-2]) - except IndexError: - raise ValueError("Not enough input arguments.") - if not self._argcheck(*args) or scale <= 0: - return inf - x = asarray((x - loc) / scale) - cond0 = (x <= self.a) | (x >= self.b) - if any(cond0): - np = self.numargs + 2 - return valarray((np, np), value=nan) - eps = _EPS - c = args[0] - n = len(x) - if abs(c) > eps: - cx = c * x - sumlog1pcx = sum(log1p(cx)) - # LL = n*log(scale) + (1-1/k)*sumlog1mkxn - r = x / (1.0 + cx) - sumix = sum(1.0 / (1.0 + cx) ** 2.0) - - sumr = sum(r) - sumr2 = sum(r ** 2.0) - H11 = -2 * sumlog1pcx / c ** 3 + 2 * \ - sumr / c ** 2 + (1.0 + 1.0 / c) * sumr2 - H22 = c * (c + 1) * sumix / scale ** 2.0 - H33 = (n - 2 * (c + 1) * sumr + - c * (c + 1) * sumr2) / scale ** 2.0 - H12 = -sum((1 - x) / ((1 + cx) ** 2.0)) / scale - H23 = -(c + 1) * sumix / scale ** 2.0 - H13 = -(sumr - (c + 1) * sumr2) / scale - - else: # c == 0 - sumx = sum(x) - # LL = n*log(scale) + sumx; - - sumx2 = sum(x ** 2.0) - H11 = -(2 / 3) * sum(x ** 3.0) + sumx2 - H22 = 0.0 - H12 = -(n - sum(x)) / scale - H23 = -n * 1.0 / scale ** 2.0 - H33 = (n - 2 * sumx) / scale ** 2.0 - H13 = -(sumx - sumx2) / scale - - # Hessian matrix - H = [[H11, H12, H13], [H12, H22, H23], [H13, H23, H33]] - return asarray(H) - def __stats(self, c): # return None,None,None,None k = -c @@ -1950,25 +1852,9 @@ class genexpon_gen(rv_continuous): %(example)s """ - - def _link(self, x, logSF, phat, ix): - _a, b, c, loc, scale = phat - xn = (x - loc) / scale - fact1 = (xn + expm1(-c * xn) / c) - if ix == 0: - phati = b * fact1 + logSF - elif ix == 1: - phati = (phat[0] - logSF) / fact1 - elif ix in [2, 3, 4]: - raise NotImplementedError('Only implemented for index in [0,1]!') - else: - raise IndexError('Index to the fixed parameter is out of bounds') - - return phati - def _pdf(self, x, a, b, c): - return (a + b*(-special.expm1(-c*x)))*exp((-a-b)*x + - b*(-special.expm1(-c*x))/c) + return (a + b*(-special.expm1(-c*x))) * exp((-a-b)*x + + b*(-special.expm1(-c*x))/c) def _cdf(self, x, a, b, c): return -special.expm1((-a-b)*x + b*(-special.expm1(-c*x))/c) @@ -4248,15 +4134,6 @@ class rayleigh_gen(rv_continuous): %(example)s """ - - def _link(self, x, logSF, phat, ix): - if ix == 1: - return x - phat[0] / sqrt(-2.0 * logSF) - elif ix == 0: - return x - phat[1] * sqrt(-2.0 * logSF) - else: - raise IndexError('Index to the fixed parameter is out of bounds') - def _rvs(self): return chi.rvs(2, size=self._size, random_state=self._random_state) @@ -4314,18 +4191,6 @@ class truncrayleigh_gen(rv_continuous): def _argcheck(self, c): return (c >= 0) - def _link(self, x, logSF, phat, ix): - c, loc, scale = phat - if ix == 2: - return x - loc / (sqrt(c * c - 2 * logSF) - c) - elif ix == 1: - return x - scale * (sqrt(c * c - 2 * logSF) - c) - elif ix == 0: - xn = (x - loc) / scale - return - 2 * logSF / xn - xn / 2.0 - else: - raise IndexError('Index to the fixed parameter is out of bounds') - def _fitstart(self, data, args=None): if args is None: args = (0.0,) * self.numargs @@ -5167,7 +5032,7 @@ _distn_names, _distn_gen_names = get_distribution_names(pairs, rv_continuous) __all__ = _distn_names + _distn_gen_names -if __name__=='__main__': +if __name__ == '__main__': v = genextreme.logpdf(np.inf, 0) v2 = genextreme.logpdf(-np.inf, 0) v2 = genextreme.logpdf(-100, 0) diff --git a/wafo/stats/estimation.py b/wafo/stats/estimation.py index 6fbd769..fb220ef 100644 --- a/wafo/stats/estimation.py +++ b/wafo/stats/estimation.py @@ -21,19 +21,115 @@ from scipy.linalg import pinv2 from scipy import optimize import numpy as np -from numpy import (alltrue, arange, zeros, log, sqrt, exp, +from numpy import (alltrue, arange, zeros, log, sqrt, exp, expm1, atleast_1d, any, asarray, nan, pi, isfinite) from numpy import flatnonzero as nonzero __all__ = ['Profile', 'FitDistribution'] + floatinfo = np.finfo(float) arr = asarray all = alltrue # @ReservedAssignment +def _expon_link(x, logSF, phat, ix): + if ix == 1: + return - (x - phat[0]) / logSF + elif ix == 0: + return x + phat[1] * logSF + else: + raise IndexError('Index to the fixed parameter is out of bounds') + + +def _weibull_min_link(x, logSF, phat, ix): + if ix == 0: + phati = log(-logSF) / log((x - phat[1]) / phat[2]) + elif ix == 1: + phati = x - phat[2] * (-logSF) ** (1. / phat[0]) + elif ix == 2: + phati = (x - phat[1]) / (-logSF) ** (1. / phat[0]) + else: + raise IndexError('Index to the fixed parameter is out of bounds') + return phati + + +def _genpareto_link(x, logSF, phat, ix): + # Reference + # Stuart Coles (2004) + # "An introduction to statistical modelling of extreme values". + # Springer series in statistics + c, loc, scale = phat + if ix == 2: + # Reorganizing w.r.t.scale, Eq. 4.13 and 4.14, pp 81 in + # Coles (2004) gives + # link = -(x-loc)*c/expm1(-c*logSF) + if c != 0.0: + phati = (x - loc) * c / expm1(-c * logSF) + else: + phati = -(x - loc) / logSF + elif ix == 1: + if c != 0: + phati = x + scale * expm1(c * logSF) / c + else: + phati = x + scale * logSF + elif ix == 0: + raise NotImplementedError( + 'link(x,logSF,phat,i) where i=0 is not implemented!') + else: + raise IndexError('Index to the fixed parameter is out of bounds') + return phati + + +def _genexpon_link(x, logSF, phat, ix): + _a, b, c, loc, scale = phat + xn = (x - loc) / scale + fact1 = (xn + expm1(-c * xn) / c) + if ix == 0: + phati = b * fact1 + logSF + elif ix == 1: + phati = (phat[0] - logSF) / fact1 + elif ix in [2, 3, 4]: + raise NotImplementedError('Only implemented for index in [0,1]!') + else: + raise IndexError('Index to the fixed parameter is out of bounds') + + return phati + + +def _rayleigh_link(x, logSF, phat, ix): + if ix == 1: + return x - phat[0] / sqrt(-2.0 * logSF) + elif ix == 0: + return x - phat[1] * sqrt(-2.0 * logSF) + else: + raise IndexError('Index to the fixed parameter is out of bounds') + + +def _trunclayleigh_link(x, logSF, phat, ix): + c, loc, scale = phat + if ix == 2: + return x - loc / (sqrt(c * c - 2 * logSF) - c) + elif ix == 1: + return x - scale * (sqrt(c * c - 2 * logSF) - c) + elif ix == 0: + xn = (x - loc) / scale + return - 2 * logSF / xn - xn / 2.0 + else: + raise IndexError('Index to the fixed parameter is out of bounds') + + +LINKS = dict(expon=_expon_link, + weibull_min=_weibull_min_link, + frechet_r=_weibull_min_link, + genpareto=_genpareto_link, + genexpon=_genexpon_link, + rayleigh=_rayleigh_link, + trunclayleigh=_trunclayleigh_link) + + def chi2isf(p, df): return special.chdtri(df, p) @@ -195,7 +291,7 @@ class Profile(object): self.profile_par = not (self.profile_x or self.profile_logSF) if self.link is None: - self.link = self.fit_dist.dist.link + self.link = LINKS[self.fit_dist.dist.name] if self.profile_par: self._local_link = self._par_link self.xlabel = 'phat(%d)' % self.i_fixed @@ -604,13 +700,13 @@ class FitDistribution(rv_frozen): self.dist = dist numargs = dist.numargs - self.method = method + self.method = method.lower() self.alpha = alpha self.par_fix = par_fix self.search = search self.copydata = copydata - if self.method.lower()[:].startswith('mps'): + if self.method.startswith('mps'): self._fitfun = self._nlogps else: self._fitfun = self._nnlf @@ -797,7 +893,7 @@ class FitDistribution(rv_frozen): return np.inf dist = self.dist x = asarray((x - loc) / scale) - cond0 = (x <= dist.a) | (dist.b <= x) + cond0 = (x < dist.a) | (dist.b < x) Nbad = np.sum(cond0) if Nbad > 0: x = argsreduce(~cond0, x)[0]