From eca5368f754c0665b3212fbec44b1a7c4b56509e Mon Sep 17 00:00:00 2001 From: Per A Brodtkorb Date: Sun, 26 Jun 2016 07:36:58 +0200 Subject: [PATCH] Updated stats --- wafo/misc.py | 64 +++- wafo/stats/_continuous_distns.py | 506 +++++++++++++++++++++---- wafo/stats/_distn_infrastructure.py | 20 +- wafo/stats/estimation.py | 556 +++++++++++++++++++++++++--- 4 files changed, 1020 insertions(+), 126 deletions(-) diff --git a/wafo/misc.py b/wafo/misc.py index 6d0e44c..01ba6c5 100644 --- a/wafo/misc.py +++ b/wafo/misc.py @@ -19,6 +19,8 @@ from time import strftime, gmtime from numdifftools.extrapolation import dea3 # @UnusedImport from wafo.plotbackend import plotbackend from collections import Callable +import numbers + try: from wafo import c_library as clib # @UnresolvedImport except ImportError: @@ -37,8 +39,28 @@ __all__ = ['now', 'spaceline', 'narg_smallest', 'args_flat', 'is_numlike', 'betaloge', 'gravity', 'nextpow2', 'discretize', 'polar2cart', 'cart2polar', 'meshgrid', 'ndgrid', 'trangood', 'tranproc', 'plot_histgrm', 'num2pistr', 'test_docstrings', 'lazywhere', + 'lazyselect' 'piecewise', - 'valarray'] + 'valarray', 'check_random_state'] + + +def check_random_state(seed): + """Turn seed into a np.random.RandomState instance + + If seed is None (or np.random), return the RandomState singleton used + by np.random. + If seed is an int, return a new RandomState instance seeded with seed. + If seed is already a RandomState instance, return it. + Otherwise raise ValueError. + """ + if seed is None or seed is np.random: + return np.random.mtrand._rand + if isinstance(seed, (numbers.Integral, np.integer)): + return np.random.RandomState(seed) + if isinstance(seed, np.random.RandomState): + return seed + raise ValueError('%r cannot be used to seed a numpy.random.RandomState' + ' instance' % seed) def valarray(shape, value=np.NaN, typecode=None): @@ -217,6 +239,46 @@ def lazywhere(cond, arrays, f, fillvalue=None, f2=None): return out +def lazyselect(condlist, choicelist, arrays, default=0): + """ + Mimic `np.select(condlist, choicelist)`. + + Notice it assumes that all `arrays` are of the same shape, or can be + broadcasted together. + + All functions in `choicelist` must accept array arguments in the order + given in `arrays` and must return an array of the same shape as broadcasted + `arrays`. + + Examples + -------- + >>> x = np.arange(6) + >>> np.select([x <3, x > 3], [x**2, x**3], default=0) + array([ 0, 1, 4, 0, 64, 125]) + + >>> lazyselect([x < 3, x > 3], [lambda x: x**2, lambda x: x**3], (x,)) + array([ 0., 1., 4., 0., 64., 125.]) + + >>> a = -np.ones_like(x) + >>> lazyselect([x < 3, x > 3], + ... [lambda x, a: x**2, lambda x, a: a * x**3], + ... (x, a)) + array([ 0., 1., 4., 0., -64., -125.]) + + """ + arrays = np.broadcast_arrays(*arrays) + tcode = np.mintypecode([a.dtype.char for a in arrays]) + out = valarray(np.shape(arrays[0]), value=default, typecode=tcode) + for index in range(len(condlist)): + func, cond = choicelist[index], condlist[index] + if np.all(cond is False): + continue + cond, _ = np.broadcast_arrays(cond, arrays[0]) + temp = tuple(np.extract(cond, arr) for arr in arrays) + np.place(out, cond, func(*temp)) + return out + + def rotation_matrix(heading, pitch, roll): ''' diff --git a/wafo/stats/_continuous_distns.py b/wafo/stats/_continuous_distns.py index ef3c21f..1b0a95f 100644 --- a/wafo/stats/_continuous_distns.py +++ b/wafo/stats/_continuous_distns.py @@ -15,12 +15,13 @@ from scipy.special import (gammaln as gamln, gamma as gam, boxcox, boxcox1p, inv_boxcox, inv_boxcox1p, erfc, chndtr, chndtrix, log1p, expm1, i0, i1, ndtr as _norm_cdf, log_ndtr as _norm_logcdf) +# from scipy._lib._numpy_compat import broadcast_to from numpy import (where, arange, putmask, ravel, shape, log, sqrt, exp, arctanh, tan, sin, arcsin, arctan, tanh, cos, cosh, sinh) -from numpy import polyval, place, extract, asarray, nan, inf, pi +from numpy import polyval, place, extract, asarray, nan, inf, pi, broadcast_to import numpy as np from scipy.stats.mstats_basic import mode @@ -35,9 +36,10 @@ from scipy.stats._tukeylambda_stats import (tukeylambda_variance as _tlvar, from wafo.stats._distn_infrastructure import ( rv_continuous, valarray, _skew, _kurtosis, _lazywhere, _ncx2_log_pdf, _ncx2_pdf, _ncx2_cdf, get_distribution_names, + _lazyselect ) -from wafo.stats._constants import _XMIN, _EULER, _ZETA3, _XMAX, _LOGXMAX, _EPS +from wafo.stats._constants import _XMIN, _EULER, _ZETA3, _XMAX, _LOGXMAX ## Kolmogorov-Smirnov one-sided and two-sided test statistics @@ -215,6 +217,8 @@ class alpha_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _pdf(self, x, a): return 1.0/(x**2)/_norm_cdf(a)*_norm_pdf(a-1.0/x) @@ -285,6 +289,8 @@ class arcsine_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _pdf(self, x): return 1.0/pi/sqrt(x*(1-x)) @@ -535,6 +541,8 @@ class betaprime_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _rvs(self, a, b): sz, rndm = self._size, self._random_state u1 = gamma.rvs(a, size=sz, random_state=rndm) @@ -604,10 +612,10 @@ class bradford_gen(rv_continuous): return c * exp(-log1p(c * x)) / log1p(c) def _cdf(self, x, c): - return log1p(c * x) / log1p(c) + return special.log1p(c * x) / special.log1p(c) def _ppf(self, q, c): - return expm1(q * log1p(c))/c # ((1.0+c)**q-1)/c + return special.expm1(q * special.log1p(c)) / c def _stats(self, c, moments='mv'): k = log1p(c) @@ -672,6 +680,8 @@ class burr_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _pdf(self, x, c, d): return c * d * (x**(-c - 1.0)) * ((1 + x**(-c))**(-d - 1.0)) @@ -725,6 +735,8 @@ class burr12_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _pdf(self, x, c, d): return np.exp(self._logpdf(x, c, d)) @@ -732,7 +744,7 @@ class burr12_gen(rv_continuous): return log(c) + log(d) + special.xlogy(c-1, x) + special.xlog1py(-d-1, x**c) def _cdf(self, x, c, d): - return 1 - self._sf(x, c, d) + return -special.expm1(self._logsf(x, c, d)) def _logcdf(self, x, c, d): return special.log1p(-(1 + x**c)**(-d)) @@ -869,6 +881,7 @@ class chi_gen(rv_continuous): %(example)s """ + def _rvs(self, df): sz, rndm = self._size, self._random_state return sqrt(chi2.rvs(df, size=sz, random_state=rndm)) @@ -1338,6 +1351,8 @@ class fatiguelife_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _rvs(self, c): z = self._random_state.standard_normal(self._size) x = 0.5*c*z @@ -1584,6 +1599,7 @@ class frechet_r_gen(rv_continuous): %(example)s """ + def _pdf(self, x, c): return c*pow(x, c-1)*exp(-pow(x, c)) @@ -1853,8 +1869,8 @@ class genexpon_gen(rv_continuous): """ def _pdf(self, x, a, b, c): - return (a + b*(-special.expm1(-c*x))) * exp((-a-b)*x + - b*(-special.expm1(-c*x))/c) + return (a + b*(-special.expm1(-c*x)))*exp((-a-b)*x + + b*(-special.expm1(-c*x))/c) def _cdf(self, x, a, b, c): return -special.expm1((-a-b)*x + b*(-special.expm1(-c*x))/c) @@ -1900,11 +1916,15 @@ class genextreme_gen(rv_continuous): self.a = where(c < 0, 1.0 / min(c, -_XMIN), -inf) return where(abs(c) == inf, 0, 1) + def _loglogcdf(self, x, c): + return _lazywhere((x == x) & (c != 0), (x, c), + lambda x, c: special.log1p(-c*x)/c, -x) + def _pdf(self, x, c): return exp(self._logpdf(x, c)) def _logpdf(self, x, c): - cx = _lazywhere((c != 0)*(x == x), (x, c), lambda x, c: c*x, 0.0) + cx = _lazywhere((x == x) & (c != 0), (x, c), lambda x, c: c*x, 0.0) logex2 = special.log1p(-cx) logpex2 = self._loglogcdf(x, c) pex2 = exp(logpex2) @@ -1914,18 +1934,14 @@ class genextreme_gen(rv_continuous): putmask(logpdf, (c == 1) & (x == 1), 0.0) return logpdf - def _cdf(self, x, c): - return exp(self._logcdf(x, c)) - - def _loglogcdf(self, x, c): - return _lazywhere((c != 0) & (x == x), (x, c), - lambda x, c: special.log1p(-c * x)/c, -x) - def _logcdf(self, x, c): return -exp(self._loglogcdf(x, c)) + def _cdf(self, x, c): + return exp(self._logcdf(x, c)) + def _sf(self, x, c): - return -expm1(self._logcdf(x, c)) + return - expm1(self._logcdf(x, c)) def _ppf(self, q, c): x = -log(-log(q)) @@ -2233,7 +2249,7 @@ class gengamma_gen(rv_continuous): gengamma.pdf(x, a, c) = abs(c) * x**(c*a-1) * exp(-x**c) / gamma(a) - for ``x > 0``, ``a > 0``, and ``c != 0``. + for ``x >= 0``, ``a > 0``, and ``c != 0``. `gengamma` takes ``a`` and ``c`` as shape parameters. @@ -2443,17 +2459,20 @@ class gumbel_l_gen(rv_continuous): def _logpdf(self, x): return x - exp(x) + def _cdf(self, x): + return -special.expm1(-exp(x)) + + def _ppf(self, q): + return log(-special.log1p(-q)) + def _logsf(self, x): return -exp(x) def _sf(self, x): return exp(-exp(x)) - def _cdf(self, x): - return -expm1(-exp(x)) - - def _ppf(self, q): - return log(-log1p(-q)) + def _isf(self, x): + return log(-log(x)) def _stats(self): return -_EULER, pi*pi/6.0, \ @@ -2684,6 +2703,8 @@ class invgamma_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _pdf(self, x, a): return exp(self._logpdf(x, a)) @@ -2691,10 +2712,16 @@ class invgamma_gen(rv_continuous): return (-(a+1) * log(x) - gamln(a) - 1.0/x) def _cdf(self, x, a): - return 1.0 - special.gammainc(a, 1.0/x) + return special.gammaincc(a, 1.0 / x) def _ppf(self, q, a): - return 1.0 / special.gammaincinv(a, 1.-q) + return 1.0 / special.gammainccinv(a, q) + + def _sf(self, x, a): + return special.gammainc(a, 1.0 / x) + + def _isf(self, q, a): + return 1.0 / special.gammaincinv(a, q) def _stats(self, a, moments='mvsk'): m1 = _lazywhere(a > 1, (a,), lambda x: 1. / (x - 1.), np.inf) @@ -2742,6 +2769,8 @@ class invgauss_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _rvs(self, mu): return self._random_state.wald(mu, 1.0, size=self._size) @@ -2788,6 +2817,8 @@ class invweibull_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _pdf(self, x, c): xc1 = np.power(x, -c - 1.0) xc2 = np.power(x, -c) @@ -2833,6 +2864,8 @@ class johnsonsb_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _argcheck(self, a, b): return (b > 0) & (a == a) @@ -2915,7 +2948,10 @@ class laplace_gen(rv_continuous): return where(x > 0, 1.0-0.5*exp(-x), 0.5*exp(x)) def _ppf(self, q): - return where(q > 0.5, -log(2) - log1p(-q), log(2 * q)) + return where(q > 0.5, -log(2*(1-q)), log(2*q)) + + def _isf(self, q): + return where(q < 0.5, -log(2*q), log(2*(1-q))) def _stats(self): return 0, 2, 0, 3 @@ -2949,6 +2985,8 @@ class levy_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _pdf(self, x): return 1 / sqrt(2*pi*x) / x * exp(-1/(2*x)) @@ -2990,6 +3028,8 @@ class levy_l_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _pdf(self, x): ax = abs(x) return 1/sqrt(2*pi*ax)/ax*exp(-1/(2*ax)) @@ -3026,29 +3066,47 @@ class levy_stable_gen(rv_continuous): %(example)s """ + def _rvs(self, alpha, beta): - sz = self._size - TH = uniform.rvs(loc=-pi/2.0, scale=pi, size=sz) - W = expon.rvs(size=sz) - if alpha == 1: - return 2/pi*(pi/2+beta*TH)*tan(TH)-beta*log((pi/2*W*cos(TH))/(pi/2+beta*TH)) - ialpha = 1.0/alpha - aTH = alpha*TH - if beta == 0: - return W/(cos(TH)/tan(aTH)+sin(TH))*((cos(aTH)+sin(aTH)*tan(TH))/W)**ialpha + def alpha1func(alpha, beta, TH, aTH, bTH, cosTH, tanTH, W): + return (2/pi*(pi/2 + bTH)*tanTH - + beta*log((pi/2*W*cosTH)/(pi/2 + bTH))) + + def beta0func(alpha, beta, TH, aTH, bTH, cosTH, tanTH, W): + return (W/(cosTH/tan(aTH) + sin(TH)) * + ((cos(aTH) + sin(aTH)*tanTH)/W)**(1.0/alpha)) + + def otherwise(alpha, beta, TH, aTH, bTH, cosTH, tanTH, W): + # alpha is not 1 and beta is not 0 + val0 = beta*tan(pi*alpha/2) + th0 = arctan(val0)/alpha + val3 = W/(cosTH/tan(alpha*(th0 + TH)) + sin(TH)) + res3 = val3*((cos(aTH) + sin(aTH)*tanTH - val0*(sin(aTH) - + cos(aTH)*tanTH))/W)**(1.0/alpha) + return res3 + + def alphanot1func(alpha, beta, TH, aTH, bTH, cosTH, tanTH, W): + res = _lazywhere(beta == 0, + (alpha, beta, TH, aTH, bTH, cosTH, tanTH, W), + beta0func, f2=otherwise) + return res - val0 = beta*tan(pi*alpha/2) - th0 = arctan(val0)/alpha - val3 = W/(cos(TH)/tan(alpha*(th0+TH))+sin(TH)) - res3 = val3*((cos(aTH)+sin(aTH)*tan(TH)-val0*(sin(aTH)-cos(aTH)*tan(TH)))/W)**ialpha - return res3 + sz = self._size + alpha = broadcast_to(alpha, sz) + beta = broadcast_to(beta, sz) + TH = uniform.rvs(loc=-pi/2.0, scale=pi, size=sz, + random_state=self._random_state) + W = expon.rvs(size=sz, random_state=self._random_state) + aTH = alpha*TH + bTH = beta*TH + cosTH = cos(TH) + tanTH = tan(TH) + res = _lazywhere(alpha == 1, (alpha, beta, TH, aTH, bTH, cosTH, tanTH, W), + alpha1func, f2=alphanot1func) + return res def _argcheck(self, alpha, beta): - if beta == -1: - self.b = 0.0 - elif beta == 1: - self.a = 0.0 return (alpha > 0) & (alpha <= 2) & (beta <= 1) & (beta >= -1) def _pdf(self, x, alpha, beta): @@ -3087,10 +3145,13 @@ class logistic_gen(rv_continuous): return special.expit(x) def _ppf(self, q): - return -log1p(-q) + log(q) + return special.logit(q) + + def _sf(self, x): + return special.expit(-x) def _isf(self, q): - return log1p(-q) - log(q) + return -special.logit(q) def _stats(self): return 0, pi*pi/3.0, 0, 6.0/5.0 @@ -3222,6 +3283,8 @@ class lognorm_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _rvs(self, s): return exp(s * self._random_state.standard_normal(self._size)) @@ -3285,6 +3348,8 @@ class gilbrat_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _rvs(self): return exp(self._random_state.standard_normal(self._size)) @@ -3397,6 +3462,306 @@ class mielke_gen(rv_continuous): mielke = mielke_gen(a=0.0, name='mielke') +class kappa4_gen(rv_continuous): + """Kappa 4 parameter distribution. + + %(before_notes)s + + Notes + ----- + The probability density function for kappa4 is:: + + kappa4.pdf(x, h, k) = (1.0 - k*x)**(1.0/k - 1.0)* + (1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h-1) + + if ``h`` and ``k`` are not equal to 0. + + If ``h`` or ``k`` are zero then the pdf can be simplified: + + h = 0 and k != 0:: + + kappa4.pdf(x, h, k) = (1.0 - k*x)**(1.0/k - 1.0)* + exp(-(1.0 - k*x)**(1.0/k)) + + h != 0 and k = 0:: + + kappa4.pdf(x, h, k) = exp(-x)*(1.0 - h*exp(-x))**(1.0/h - 1.0) + + h = 0 and k = 0:: + + kappa4.pdf(x, h, k) = exp(-x)*exp(-exp(-x)) + + kappa4 takes ``h`` and ``k`` as shape parameters. + + The kappa4 distribution returns other distributions when certain + ``h`` and ``k`` values are used. + + +------+-------------+----------------+------------------+ + | h | k=0.0 | k=1.0 | -inf<=k<=inf | + +======+=============+================+==================+ + | -1.0 | Logistic | | Generalized | + | | | | Logistic(1) | + | | | | | + | | logistic(x) | | | + +------+-------------+----------------+------------------+ + | 0.0 | Gumbel | Reverse | Generalized | + | | | Exponential(2) | Extreme Value | + | | | | | + | | gumbel_r(x) | | genextreme(x, k) | + +------+-------------+----------------+------------------+ + | 1.0 | Exponential | Uniform | Generalized | + | | | | Pareto | + | | | | | + | | expon(x) | uniform(x) | genpareto(x, -k) | + +------+-------------+----------------+------------------+ + + (1) There are at least five generalized logistic distributions. + Four are described here: + https://en.wikipedia.org/wiki/Generalized_logistic_distribution + The "fifth" one is the one kappa4 should match which currently + isn't implemented in scipy: + https://en.wikipedia.org/wiki/Talk:Generalized_logistic_distribution + http://www.mathwave.com/help/easyfit/html/analyses/distributions/gen_logistic.html + (2) This distribution is currently not in scipy. + + References + ---------- + J.C. Finney, "Optimization of a Skewed Logistic Distribution With Respect + to the Kolmogorov-Smirnov Test", A Dissertation Submitted to the Graduate + Faculty of the Louisiana State University and Agricultural and Mechanical + College, (August, 2004), + http://etd.lsu.edu/docs/available/etd-05182004-144851/unrestricted/Finney_dis.pdf + + J.R.M. Hosking, "The four-parameter kappa distribution". IBM J. Res. + Develop. 38 (3), 25 1-258 (1994). + + B. Kumphon, A. Kaew-Man, P. Seenoi, "A Rainfall Distribution for the Lampao + Site in the Chi River Basin, Thailand", Journal of Water Resource and + Protection, vol. 4, 866-869, (2012). + http://file.scirp.org/pdf/JWARP20121000009_14676002.pdf + + C. Winchester, "On Estimation of the Four-Parameter Kappa Distribution", A + Thesis Submitted to Dalhousie University, Halifax, Nova Scotia, (March + 2000). + http://www.nlc-bnc.ca/obj/s4/f2/dsk2/ftp01/MQ57336.pdf + + %(after_notes)s + + %(example)s + + """ + def _argcheck(self, h, k): + condlist = [np.logical_and(h > 0, k > 0), + np.logical_and(h > 0, k == 0), + np.logical_and(h > 0, k < 0), + np.logical_and(h <= 0, k > 0), + np.logical_and(h <= 0, k == 0), + np.logical_and(h <= 0, k < 0)] + + def f0(h, k): + return (1.0 - h**(-k))/k + + def f1(h, k): + return log(h) + + def f3(h, k): + a = np.empty(shape(h)) + a[:] = -inf + return a + + def f5(h, k): + return 1.0/k + + self.a = _lazyselect(condlist, + [f0, f1, f0, f3, f3, f5], + [h, k], + default=nan) + + def f00(h, k): + return 1.0/k + + def f11(h, k): + a = np.empty(shape(h)) + a[:] = inf + return a + + self.b = _lazyselect(condlist, + [f00, f11, f11, f00, f11, f11], + [h, k], + default=nan) + return (h == h) + + def _pdf(self, x, h, k): + return exp(self._logpdf(x, h, k)) + + def _logpdf(self, x, h, k): + condlist = [np.logical_and(h != 0, k != 0), + np.logical_and(h == 0, k != 0), + np.logical_and(h != 0, k == 0), + np.logical_and(h == 0, k == 0)] + + def f0(x, h, k): + '''pdf = (1.0 - k*x)**(1.0/k - 1.0)*( + 1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h-1.0) + logpdf = ... + ''' + return special.xlog1py(1.0/k-1.0, + -k*x + ) + special.xlog1py(1.0/h-1.0, + -h*(1.0-k*x)**(1.0/k)) + + def f1(x, h, k): + '''pdf = (1.0 - k*x)**(1.0/k - 1.0)*exp(-( + 1.0 - k*x)**(1.0/k)) + logpdf = ... + ''' + return special.xlog1py(1.0/k-1.0,-k*x)-(1.0-k*x)**(1.0/k) + + def f2(x, h, k): + '''pdf = exp(-x)*(1.0 - h*exp(-x))**(1.0/h - 1.0) + logpdf = ... + ''' + return -x + special.xlog1py(1.0/h-1.0, -h*exp(-x)) + + def f3(x, h, k): + '''pdf = exp(-x-exp(-x)) + logpdf = ... + ''' + return -x-exp(-x) + + return _lazyselect(condlist, + [f0, f1, f2, f3], + [x, h, k], + default=nan) + + def _cdf(self, x, h, k): + return exp(self._logcdf(x, h, k)) + + def _logcdf(self, x, h, k): + condlist = [np.logical_and(h != 0, k != 0), + np.logical_and(h == 0, k != 0), + np.logical_and(h != 0, k == 0), + np.logical_and(h == 0, k == 0)] + + def f0(x, h, k): + '''cdf = (1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h) + logcdf = ... + ''' + return (1.0/h)*special.log1p(-h*(1.0-k*x)**(1.0/k)) + + def f1(x, h, k): + '''cdf = exp(-(1.0 - k*x)**(1.0/k)) + logcdf = ... + ''' + return -(1.0 - k*x)**(1.0/k) + + def f2(x, h, k): + '''cdf = (1.0 - h*exp(-x))**(1.0/h) + logcdf = ... + ''' + return (1.0/h)*special.log1p(-h*exp(-x)) + + def f3(x, h, k): + '''cdf = exp(-exp(-x)) + logcdf = ... + ''' + return -exp(-x) + + return _lazyselect(condlist, + [f0, f1, f2, f3], + [x, h, k], + default=nan) + + def _ppf(self, q, h, k): + condlist = [np.logical_and(h != 0, k != 0), + np.logical_and(h == 0, k != 0), + np.logical_and(h != 0, k == 0), + np.logical_and(h == 0, k == 0)] + + def f0(q, h, k): + return 1.0/k*(1.0 - ((1.0 - (q**h))/h)**k) + + def f1(q, h, k): + return 1.0/k*(1.0 - (-log(q))**k) + + def f2(q, h, k): + '''ppf = -log((1.0 - (q**h))/h) + ''' + return -special.log1p(-(q**h)) + log(h) + + def f3(q, h, k): + return -log(-log(q)) + + return _lazyselect(condlist, + [f0, f1, f2, f3], + [q, h, k], + default=nan) + + def _stats(self, h, k): + if h >= 0 and k >= 0: + maxr = 5 + elif h < 0 and k >= 0: + maxr = int(-1.0/h*k) + elif k < 0: + maxr = int(-1.0/k) + else: + maxr = 5 + + outputs = [None if r < maxr else nan for r in range(1, 5)] + return outputs[:] +kappa4 = kappa4_gen(name='kappa4') + + +class kappa3_gen(rv_continuous): + """Kappa 3 parameter distribution. + + %(before_notes)s + + Notes + ----- + The probability density function for `kappa` is:: + + kappa3.pdf(x, a) = + a*[a + x**a]**(-(a + 1)/a), for ``x > 0`` + 0.0, for ``x <= 0`` + + `kappa3` takes ``a`` as a shape parameter and ``a > 0``. + + References + ---------- + P.W. Mielke and E.S. Johnson, "Three-Parameter Kappa Distribution Maximum + Likelihood and Likelihood Ratio Tests", Methods in Weather Research, + 701-707, (September, 1973), + http://docs.lib.noaa.gov/rescue/mwr/101/mwr-101-09-0701.pdf + + B. Kumphon, "Maximum Entropy and Maximum Likelihood Estimation for the + Three-Parameter Kappa Distribution", Open Journal of Statistics, vol 2, + 415-419 (2012) + http://file.scirp.org/pdf/OJS20120400011_95789012.pdf + + %(after_notes)s + + %(example)s + + """ + def _argcheck(self, a): + return a > 0 + + def _pdf(self, x, a): + return a*(a + x**a)**(-1.0/a-1) + + def _cdf(self, x, a): + return x*(a + x**a)**(-1.0/a) + + def _ppf(self, q, a): + return (a/(q**-a - 1.0))**(1.0/a) + + def _stats(self, a): + outputs = [None if i < a else nan for i in range(1, 5)] + return outputs[:] +kappa3 = kappa3_gen(a=0.0, name='kappa3') + + class nakagami_gen(rv_continuous): """A Nakagami continuous random variable. @@ -3881,6 +4246,7 @@ class pearson3_gen(rv_continuous): ans, x, skew = np.broadcast_arrays([1.0], x, skew) ans = ans.copy() + # mask is True where skew is small enough to use the normal approx. mask = np.absolute(skew) < norm2pearson_transition invmask = ~mask @@ -3940,13 +4306,18 @@ class pearson3_gen(rv_continuous): return ans def _rvs(self, skew): + skew = broadcast_to(skew, self._size) ans, x, transx, skew, mask, invmask, beta, alpha, zeta = ( self._preprocess([0], skew)) - if mask[0]: - return self._random_state.standard_normal(self._size) - ans = self._random_state.standard_gamma(alpha, self._size)/beta + zeta - if ans.size == 1: - return ans[0] + + nsmall = mask.sum() + nbig = mask.size - nsmall + ans[mask] = self._random_state.standard_normal(nsmall) + ans[invmask] = (self._random_state.standard_gamma(alpha, nbig)/beta + + zeta) + + if self._size == (): + ans = ans[0] return ans def _ppf(self, q, skew): @@ -4028,6 +4399,8 @@ class powerlognorm_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _pdf(self, x, c, s): return (c/(x*s) * _norm_pdf(log(x)/s) * pow(_norm_cdf(-log(x)/s), c*1.0-1.0)) @@ -4134,6 +4507,8 @@ class rayleigh_gen(rv_continuous): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _rvs(self): return chi.rvs(2, size=self._size, random_state=self._random_state) @@ -4141,9 +4516,7 @@ class rayleigh_gen(rv_continuous): return exp(self._logpdf(r)) def _logpdf(self, r): - rr2 = r * r / 2.0 - return _lazywhere(rr2 != inf, (r, rr2), lambda r, rr2: log(r) - rr2, - -rr2) + return log(r) - 0.5 * r * r def _cdf(self, r): return -special.expm1(-0.5 * r**2) @@ -4152,10 +4525,10 @@ class rayleigh_gen(rv_continuous): return sqrt(-2 * special.log1p(-q)) def _sf(self, r): - return exp(-0.5 * r**2) + return exp(self._logsf(r)) def _logsf(self, r): - return -0.5 * r**2 + return -0.5 * r * r def _isf(self, q): return sqrt(-2 * log(q)) @@ -4290,6 +4663,7 @@ class reciprocal_gen(rv_continuous): return 0.5*log(a*b)+log(log(b/a)) reciprocal = reciprocal_gen(name="reciprocal") + class rice_gen(rv_continuous): """A Rice continuous random variable. @@ -4321,8 +4695,8 @@ class rice_gen(rv_continuous): def _rvs(self, b): # http://en.wikipedia.org/wiki/Rice_distribution - sz = self._size if self._size else 1 - t = b/np.sqrt(2) + self._random_state.standard_normal(size=(2, sz)) + t = b/np.sqrt(2) + self._random_state.standard_normal(size=(2,) + + self._size) return np.sqrt((t*t).sum(axis=0)) def _cdf(self, x, b): @@ -4653,10 +5027,9 @@ class truncnorm_gen(rv_continuous): self._na = _norm_cdf(a) self._sb = _norm_sf(b) self._sa = _norm_sf(a) - if self.a > 0: - self._delta = -(self._sb - self._sa) - else: - self._delta = self._nb - self._na + self._delta = np.where(self.a > 0, + -(self._sb - self._sa), + self._nb - self._na) self._logdelta = log(self._delta) return (a != b) @@ -4670,10 +5043,11 @@ class truncnorm_gen(rv_continuous): return (_norm_cdf(x) - self._na) / self._delta def _ppf(self, q, a, b): - if self.a > 0: - return _norm_isf(q*self._sb + self._sa*(1.0-q)) - else: - return _norm_ppf(q*self._nb + self._na*(1.0-q)) + # XXX Use _lazywhere... + ppf = np.where(self.a > 0, + _norm_isf(q*self._sb + self._sa*(1.0-q)), + _norm_ppf(q*self._nb + self._na*(1.0-q))) + return ppf def _stats(self, a, b): nA, nB = self._na, self._nb @@ -4830,6 +5204,8 @@ class wald_gen(invgauss_gen): %(example)s """ + _support_mask = rv_continuous._open_support_mask + def _rvs(self): return self._random_state.wald(1.0, 1.0, size=self._size) diff --git a/wafo/stats/_distn_infrastructure.py b/wafo/stats/_distn_infrastructure.py index c9520bb..bc41dbe 100644 --- a/wafo/stats/_distn_infrastructure.py +++ b/wafo/stats/_distn_infrastructure.py @@ -1,11 +1,12 @@ from __future__ import absolute_import from scipy.stats._distn_infrastructure import * # @UnusedWildImport from scipy.stats._distn_infrastructure import (_skew, # @UnusedImport - _kurtosis, _lazywhere, _ncx2_log_pdf, # @IgnorePep8 @UnusedImport + _kurtosis, _ncx2_log_pdf, # @IgnorePep8 @UnusedImport _ncx2_pdf, _ncx2_cdf) # @UnusedImport @IgnorePep8 from .estimation import FitDistribution from ._constants import _XMAX - +from wafo.misc import lazyselect as _lazyselect +from wafo.misc import lazywhere as _lazywhere _doc_default_example = """\ Examples @@ -328,7 +329,7 @@ def nlogps(self, theta, x): return T -def _penalized_nnlf(self, theta, x): +def _penalized_nnlf(self, theta, x, penalty=None): ''' Return negative loglikelihood function, i.e., - sum (log pdf(x, theta), axis=0) where theta are the parameters (including loc and scale) @@ -352,7 +353,8 @@ def _penalized_nnlf(self, theta, x): Nbad += np.sum(~finite_logpdf, axis=0) logscale = N * log(scale) if Nbad > 0: - penalty = Nbad * log(_XMAX) * 100 + if penalty is None: + penalty = Nbad * log(_XMAX) * 100 return -np.sum(logpdf[finite_logpdf], axis=0) + penalty + logscale return -np.sum(logpdf, axis=0) + logscale @@ -597,6 +599,14 @@ def fit2(self, data, *args, **kwds): return FitDistribution(self, data, args, **kwds) +def _support_mask(self, x): + return (self.a <= x) & (x <= self.b) + + +def _open_support_mask(self, x): + return (self.a < x) & (x < self.b) + + rv_generic.freeze = freeze rv_discrete.freeze = freeze rv_continuous.freeze = freeze @@ -607,3 +617,5 @@ rv_continuous._penalized_nnlf = _penalized_nnlf rv_continuous._reduce_func = _reduce_func rv_continuous.fit = fit rv_continuous.fit2 = fit2 +rv_continuous._support_mask = _support_mask +rv_continuous._open_support_mask = _open_support_mask diff --git a/wafo/stats/estimation.py b/wafo/stats/estimation.py index 03c90c8..7808b6e 100644 --- a/wafo/stats/estimation.py +++ b/wafo/stats/estimation.py @@ -188,8 +188,355 @@ def norm_ppf(q): return special.ndtri(q) -# internal class to profile parameters of a given distribution class Profile(object): + ''' Profile Log- likelihood or Product Spacing-function. + which can be used for constructing confidence interval for + phat[i]. + + Parameters + ---------- + fit_dist : FitDistribution object + with ML or MPS estimated distribution parameters. + i : scalar integer + defining which distribution parameter to keep fixed in the + profiling process (default first non-fixed parameter) + pmin, pmax : real scalars + Interval for either the parameter, phat[i] used in the + optimization of the profile function (default is based on the + 100*(1-alpha)% confidence interval computed with the delta method.) + n : scalar integer + Max number of points used in Lp (default 100) + alpha : real scalar + confidence coefficent (default 0.05) + Returns + ------- + Lp : Profile log-likelihood function with parameters phat given + the data and phat[i], i.e., + Lp = max(log(f(phat|data,phat[i]))), + + Member methods + ------------- + plot() : Plot profile function with 100(1-alpha)% confidence interval + get_bounds() : Return 100(1-alpha)% confidence interval + + Member variables + ---------------- + fit_dist : FitDistribution data object. + data : profile function values + args : profile function arguments + alpha : confidence coefficient + Lmax : Maximum value of profile function + alpha_cross_level : + + PROFILE is a utility function for making inferences either on a particular + component of the vector phat or the quantile, x, or the probability, SF. + This is usually more accurate than using the delta method assuming + asymptotic normality of the ML estimator or the MPS estimator. + + Examples + -------- + # MLE + >>> import wafo.stats as ws + >>> R = ws.weibull_min.rvs(1,size=100); + >>> phat = FitDistribution(ws.weibull_min, R, 1, scale=1, floc=0.0) + + # Better CI for phat.par[i=0] + >>> Lp = Profile(phat, i=0) + >>> Lp.plot() + >>> phat_ci = Lp.get_bounds(alpha=0.1) + + ''' + + def __init__(self, fit_dist, i=None, pmin=None, pmax=None, n=100, + alpha=0.05): + + method = fit_dist.method.lower() + self.fit_dist = fit_dist + self._set_indexes(fit_dist, i) + + self.pmin = pmin + self.pmax = pmax + self.n = n + self.alpha = alpha + + self.data = None + self.args = None + self.xlabel = 'phat[{}]'.format(np.ravel(self.i_fixed)[0]) + like_txt = 'likelihood' if method == 'ml' else 'product spacing' + self.ylabel = 'Profile log' + like_txt + self.title = '{:g}% CI for {:s} params'.format(100*(1.0 - self.alpha), + fit_dist.dist.name) + + Lmax = self._loglike_max(fit_dist, method) + self.Lmax = Lmax + self.alpha_Lrange = 0.5 * chi2isf(self.alpha, 1) + self.alpha_cross_level = Lmax - self.alpha_Lrange + + phatv = fit_dist.par.copy() + self._set_profile(phatv) + + def _loglike_max(self, fit_dist, method): + if method.startswith('ml'): + Lmax = fit_dist.LLmax + elif method.startswith('mps'): + Lmax = fit_dist.LPSmax + else: + raise ValueError( + "PROFILE is only valid for ML- or MPS- estimators") + return Lmax + + def _set_indexes(self, fit_dist, i): + if i is None: + i = self._default_i_fixed(fit_dist) + self.i_fixed = np.atleast_1d(i) + isnotfixed = self._get_not_fixed_mask(fit_dist) + self.i_notfixed = nonzero(isnotfixed) + self._check_i_fixed() + isfree = isnotfixed + isfree[self.i_fixed] = False + self.i_free = nonzero(isfree) + + def _default_i_fixed(self, fit_dist): + try: + i0 = 1 - np.isfinite(fit_dist.par_fix).argmax() + except: + i0 = 0 + return i0 + + def _get_not_fixed_mask(self, fit_dist): + if fit_dist.par_fix is None: + isnotfixed = np.ones(fit_dist.par.shape, dtype=bool) + else: + isnotfixed = 1 - np.isfinite(fit_dist.par_fix) + return isnotfixed + + def _check_i_fixed(self): + if self.i_fixed not in self.i_notfixed: + raise IndexError("Index i must be equal to an index to one of " + + "the free parameters.") + + def _local_link(self, fix_par, par): + return fix_par + + def _correct_Lmax(self, Lmax, free_par, fix_par): + + if Lmax > self.Lmax: # foundNewphat = True + + dL = self.Lmax - Lmax + self.alpha_cross_level -= dL + self.Lmax = Lmax + par = self._par.copy() + par[self.i_free] = free_par + par[self.i_fixed] = fix_par + self.best_par = par + self._par = par + warnings.warn( + 'The fitted parameters does not provide the optimum fit. ' + + 'Something wrong with fit (par = {})'.format(str(par))) + + def _profile_optimum(self, phatfree0, p_opt): + phatfree = optimize.fmin( + self._profile_fun, phatfree0, args=(p_opt,), disp=0) + Lmax = -self._profile_fun(phatfree, p_opt) + self._correct_Lmax(Lmax, phatfree, p_opt) + return Lmax, phatfree + + def _set_profile(self, phat): + self._par = phat.copy() + + # Set up variable to profile and _local_link function + p_opt = self._par[self.i_fixed] + phatfree = self._par[self.i_free].copy() + + pvec = self._get_pvec(phatfree, p_opt) + + self.data = np.ones_like(pvec) * nan + k1 = (pvec >= p_opt).argmax() + + for size, step in ((-1, -1), (pvec.size, 1)): + phatfree = self._par[self.i_free].copy() + for ix in range(k1, size, step): + Lmax, phatfree = self._profile_optimum(phatfree, pvec[ix]) + self.data[ix] = Lmax + if ix != k1 and self.data[ix] < self.alpha_cross_level: + break + np.putmask(pvec, np.isnan(self.data), nan) + self.args = pvec + + self._prettify_profile() + + def _prettify_profile(self): + pvec = self.args + ix = nonzero(np.isfinite(pvec)) + self.data = self.data[ix] + self.args = pvec[ix] + cond = self.data == -np.inf + if any(cond): + ind, = cond.nonzero() + self.data.put(ind, floatinfo.min / 2.0) + ind1 = np.where(ind == 0, ind, ind - 1) + cl = self.alpha_cross_level - self.alpha_Lrange / 2.0 + t0 = ecross(self.args, self.data, ind1, cl) + self.data.put(ind, cl) + self.args.put(ind, t0) + + def _get_variance(self): + return self.fit_dist.par_cov[self.i_fixed, :][:, self.i_fixed] + + def _approx_p_min_max(self, p_opt): + pvar = self._get_variance() + if pvar <= 1e-5 or np.isnan(pvar): + pvar = max(abs(p_opt) * 0.5, 0.2) + pvar = max(pvar, 0.1) + p_crit = -norm_ppf(self.alpha / 2.0) * sqrt(np.ravel(pvar)) * 1.5 + return p_opt - p_crit * 5, p_opt + p_crit * 5 + + def _p_min_max(self, phatfree0, p_opt): + p_low, p_up = self._approx_p_min_max(p_opt) + pmin, pmax = self.pmin, self.pmax + if pmin is None: + pmin = self._search_pmin(phatfree0, p_low, p_opt) + if pmax is None: + pmax = self._search_pmax(phatfree0, p_up, p_opt) + return pmin, pmax + + def _adaptive_pvec(self, p_opt, pmin, pmax): + p_crit_low = (p_opt - pmin) / 5 + p_crit_up = (pmax - p_opt) / 5 + n4 = np.floor(self.n / 4.0) + a, b = p_opt - p_crit_low, p_opt + p_crit_up + pvec1 = np.linspace(pmin, a, n4 + 1) + pvec2 = np.linspace(a, b, self.n - 2 * n4) + pvec3 = np.linspace(b, pmax, n4 + 1) + pvec = np.unique(np.hstack((pvec1, p_opt, pvec2, pvec3))) + return pvec + + def _get_pvec(self, phatfree0, p_opt): + ''' return proper interval for the variable to profile + ''' + if self.pmin is None or self.pmax is None: + pmin, pmax = self._p_min_max(phatfree0, p_opt) + return self._adaptive_pvec(p_opt, pmin, pmax) + return np.linspace(self.pmin, self.pmax, self.n) + + def _search_pmin(self, phatfree0, p_min0, p_opt): + phatfree = phatfree0.copy() + + dp = (p_opt - p_min0)/40 + if dp < 1e-2: + dp = 0.1 + p_min_opt = p_opt - dp + Lmax, phatfree = self._profile_optimum(phatfree, p_opt) + for _j in range(50): + p_min = p_opt - dp + Lmax, phatfree = self._profile_optimum(phatfree, p_min) + if np.isnan(Lmax): + dp *= 0.33 + elif Lmax < self.alpha_cross_level - self.alpha_Lrange * 5: + p_min_opt = p_min + dp *= 0.33 + elif Lmax < self.alpha_cross_level: + p_min_opt = p_min + break + else: + dp *= 1.67 + return p_min_opt + + def _search_pmax(self, phatfree0, p_max0, p_opt): + phatfree = phatfree0.copy() + + dp = (p_max0 - p_opt)/40 + if dp < 1e-2: + dp = 0.1 + p_max_opt = p_opt + dp + Lmax, phatfree = self._profile_optimum(phatfree, p_opt) + for _j in range(50): + p_max = p_opt + dp + Lmax, phatfree = self._profile_optimum(phatfree, p_max) + if np.isnan(Lmax): + dp *= 0.33 + elif Lmax < self.alpha_cross_level - self.alpha_Lrange * 2: + p_max_opt = p_max + dp *= 0.33 + elif Lmax < self.alpha_cross_level: + p_max_opt = p_max + break + else: + dp *= 1.67 + return p_max_opt + + def _profile_fun(self, free_par, fix_par): + ''' Return negative of loglike or logps function + + free_par - vector of free parameters + fix_par - fixed parameter, i.e., either quantile (return level), + probability (return period) or distribution parameter + ''' + par = self._par.copy() + par[self.i_free] = free_par + # _local_link: connects fixed quantile or probability with fixed + # distribution parameter + par[self.i_fixed] = self._local_link(fix_par, par) + return self.fit_dist.fitfun(par) + + def get_bounds(self, alpha=0.05): + '''Return confidence interval for profiled parameter + ''' + if alpha < self.alpha: + warnings.warn( + 'Might not be able to return CI with alpha less than %g' % + self.alpha) + cross_level = self.Lmax - 0.5 * chi2isf(alpha, 1) + ind = findcross(self.data, cross_level) + N = len(ind) + if N == 0: + warnings.warn('''Number of crossings is zero, i.e., + upper and lower bound is not found!''') + CI = (self.pmin, self.pmax) + + elif N == 1: + x0 = ecross(self.args, self.data, ind, cross_level) + isUpcrossing = self.data[ind] > self.data[ind + 1] + if isUpcrossing: + CI = (x0, self.pmax) + warnings.warn('Upper bound is larger') + else: + CI = (self.pmin, x0) + warnings.warn('Lower bound is smaller') + + elif N == 2: + CI = ecross(self.args, self.data, ind, cross_level) + else: + warnings.warn('Number of crossings too large! Something is wrong!') + CI = ecross(self.args, self.data, ind[[0, -1]], cross_level) + return CI + + def plot(self, axis=None): + ''' Plot profile function with 100(1-alpha)% CI + ''' + if axis is None: + axis = plotbackend.gca() + + p_ci = self.get_bounds(self.alpha) + axis.plot( + self.args, self.data, + p_ci, [self.Lmax, ] * 2, 'r--', + p_ci, [self.alpha_cross_level, ] * 2, 'r--') + ymax = self.Lmax + self.alpha_Lrange/10 + ymin = self.alpha_cross_level - self.alpha_Lrange/10 + axis.vlines(p_ci, ymin=ymin, ymax=self.Lmax, + color='r', linestyles='--') + if self.i_fixed is not None: + axis.vlines(self.fit_dist.par[self.i_fixed], + ymin=ymin, ymax=self.Lmax, + color='g', linestyles='--') + axis.set_title(self.title) + axis.set_ylabel(self.ylabel) + axis.set_xlabel(self.xlabel) + axis.set_ylim(ymin=ymin, ymax=ymax) + + +class ProfileOld(object): ''' Profile Log- likelihood or Product Spacing-function. which can be used for constructing confidence interval for @@ -276,6 +623,18 @@ class Profile(object): >>> sf_ci = Lsf.get_bounds(alpha=0.2) ''' + def _get_not_fixed_mask(self, fit_dist): + if fit_dist.par_fix is None: + isnotfixed = np.ones(fit_dist.par.shape, dtype=bool) + else: + isnotfixed = 1 - np.isfinite(fit_dist.par_fix) + return isnotfixed + + def _check_i_fixed(self): + if self.i_fixed not in self.i_notfixed: + raise IndexError("Index i must be equal to an index to one of " + + "the free parameters.") + def __init__(self, fit_dist, method=None, **kwds): if method is None: method = fit_dist.method.lower() @@ -307,19 +666,11 @@ class Profile(object): raise ValueError( "PROFILE is only valid for ML- or MPS- estimators") - if fit_dist.par_fix is None: - isnotfixed = np.ones(fit_dist.par.shape, dtype=bool) - else: - isnotfixed = 1 - np.isfinite(fit_dist.par_fix) - + isnotfixed = self._get_not_fixed_mask(fit_dist) self.i_notfixed = nonzero(isnotfixed) self.i_fixed = atleast_1d(self.i_fixed) - - if 1 - isnotfixed[self.i_fixed]: - raise IndexError( - "Index i must be equal to an index to one of the free " + - "parameters.") + self._check_i_fixed() isfree = isnotfixed isfree[self.i_fixed] = False @@ -339,7 +690,7 @@ class Profile(object): self.profile_par = not (self.profile_x or self.profile_logSF) if self.link is None: - self.link = LINKS[self.fit_dist.dist.name] + self.link = LINKS.get(self.fit_dist.dist.name) if self.profile_par: self._local_link = self._par_link self.xlabel = 'phat(%d)' % self.i_fixed @@ -373,20 +724,26 @@ class Profile(object): def _logSF_link(self, fix_par, par): return self.link(self.x, fix_par, par, self.i_fixed) - def _correct_Lmax(self, Lmax): + def _correct_Lmax(self, Lmax, free_par, fix_par): + if Lmax > self.Lmax: # foundNewphat = True - warnings.warn( - 'The fitted parameters does not provide the optimum fit. ' + - 'Something wrong with fit') + dL = self.Lmax - Lmax self.alpha_cross_level -= dL self.Lmax = Lmax + par = self._par.copy() + par[self.i_free] = free_par + par[self.i_fixed] = fix_par + self.best_par = par + warnings.warn( + 'The fitted parameters does not provide the optimum fit. ' + + 'Something wrong with fit (par = {})'.format(str(par))) def _profile_optimum(self, phatfree0, p_opt): phatfree = optimize.fmin( self._profile_fun, phatfree0, args=(p_opt,), disp=0) Lmax = -self._profile_fun(phatfree, p_opt) - self._correct_Lmax(Lmax) + self._correct_Lmax(Lmax, phatfree, p_opt) return Lmax, phatfree def _set_profile(self, phatfree0, p_opt): @@ -400,7 +757,7 @@ class Profile(object): for ix in range(k1, size, step): Lmax, phatfree = self._profile_optimum(phatfree, pvec[ix]) self.data[ix] = Lmax - if self.data[ix] < self.alpha_cross_level: + if ix != k1 and self.data[ix] < self.alpha_cross_level: break np.putmask(pvec, np.isnan(self.data), nan) self.args = pvec @@ -443,13 +800,14 @@ class Profile(object): ''' return proper interval for the variable to profile ''' - linspace = np.linspace if self.pmin is None or self.pmax is None: pvar = self._get_variance() if pvar <= 1e-5 or np.isnan(pvar): - pvar = max(abs(p_opt) * 0.5, 0.5) + pvar = max(abs(p_opt) * 0.5, 0.2) + else: + pvar = max(pvar, 0.1) p_crit = (-norm_ppf(self.alpha / 2.0) * sqrt(np.ravel(pvar)) * 1.5) @@ -465,14 +823,14 @@ class Profile(object): N4 = np.floor(self.N / 4.0) - pvec1 = linspace(self.pmin, p_opt - p_crit_low, N4 + 1) - pvec2 = linspace( - p_opt - p_crit_low, p_opt + p_crit_up, self.N - 2 * N4) - pvec3 = linspace(p_opt + p_crit_up, self.pmax, N4 + 1) + pvec1 = np.linspace(self.pmin, p_opt - p_crit_low, N4 + 1) + pvec2 = np.linspace(p_opt - p_crit_low, p_opt + p_crit_up, + self.N - 2 * N4) + pvec3 = np.linspace(p_opt + p_crit_up, self.pmax, N4 + 1) pvec = np.unique(np.hstack((pvec1, p_opt, pvec2, pvec3))) else: - pvec = linspace(self.pmin, self.pmax, self.N) + pvec = np.linspace(self.pmin, self.pmax, self.N) return pvec def _search_pmin(self, phatfree0, p_min0, p_opt): @@ -488,7 +846,7 @@ class Profile(object): Lmax, phatfree = self._profile_optimum(phatfree, p_min) if np.isnan(Lmax): dp *= 0.33 - elif Lmax < self.alpha_cross_level - self.alpha_Lrange * 2: + elif Lmax < self.alpha_cross_level - self.alpha_Lrange * 5: p_min_opt = p_min dp *= 0.33 elif Lmax < self.alpha_cross_level: @@ -588,14 +946,20 @@ class Profile(object): p_ci = self.get_bounds(self.alpha) axis.plot( self.args, self.data, - self.args[[0, -1]], [self.Lmax, ] * 2, 'r--', - self.args[[0, -1]], [self.alpha_cross_level, ] * 2, 'r--') - axis.vlines(p_ci, ymin=axis.get_ylim()[0], - ymax=self.Lmax, # self.alpha_cross_level, + p_ci, [self.Lmax, ] * 2, 'r--', + p_ci, [self.alpha_cross_level, ] * 2, 'r--') + ymax = self.Lmax + self.alpha_Lrange/10 + ymin = self.alpha_cross_level - self.alpha_Lrange/10 + axis.vlines(p_ci, ymin=ymin, ymax=self.Lmax, color='r', linestyles='--') + if self.i_fixed is not None: + axis.vlines(self.fit_dist.par[self.i_fixed], + ymin=ymin, ymax=self.Lmax, + color='g', linestyles='--') axis.set_title(self.title) axis.set_ylabel(self.ylabel) axis.set_xlabel(self.xlabel) + axis.set_ylim(ymin=ymin, ymax=ymax) class FitDistribution(rv_frozen): @@ -857,7 +1221,7 @@ class FitDistribution(rv_frozen): (including loc and scale) ''' if eps is None: - eps = (_EPS) ** 0.4 + eps = (_EPS) ** 0.25 num_par = len(theta) # pab 07.01.2001: Always choose the stepsize h so that # it is an exactly representable number. @@ -901,7 +1265,7 @@ class FitDistribution(rv_frozen): return -H def _nnlf(self, theta, x): - return self.dist._penalized_nnlf(theta, x) + return self.dist._penalized_nnlf(theta, x, self._penalty) def _nlogps(self, theta, x): """ Moran's negative log Product Spacings statistic @@ -974,27 +1338,28 @@ class FitDistribution(rv_frozen): nonfiniteD = 1 - finiteD Nbad += np.sum(nonfiniteD, axis=0) if Nbad > 0: - T = -np.sum(logD[finiteD], axis=0) + 100.0 * log(_XMAX) * Nbad - else: - T = -np.sum(logD, axis=0) - return T + if self._penalty is None: + penalty = 100.0 * log(_XMAX) * Nbad + else: + penalty = 0.0 + return -np.sum(logD[finiteD], axis=0) + penalty + return -np.sum(logD, axis=0) def _fit(self, *args, **kwds): - + self._penalty = None dist = self.dist data = self.data Narg = len(args) - if Narg > dist.numargs: + if Narg > dist.numargs + 2: raise ValueError("Too many input arguments.") - start = [None] * 2 - if (Narg < dist.numargs) or not ('loc' in kwds and 'scale' in kwds): + if (Narg < dist.numargs + 2) or not ('loc' in kwds and 'scale' in kwds): # get distribution specific starting locations start = dist._fitstart(data) - args += start[Narg:-2] - loc = kwds.pop('loc', start[-2]) - scale = kwds.pop('scale', start[-1]) - args += (loc, scale) + args += start[Narg:] + loc = kwds.pop('loc', args[-2]) + scale = kwds.pop('scale', args[-1]) + args = args[:-2] + (loc, scale) x0, func, restore, args, fixedn = self._reduce_func(args, kwds) if self.search: optimizer = kwds.pop('optimizer', optimize.fmin) @@ -1011,8 +1376,24 @@ class FitDistribution(rv_frozen): # by now kwds must be empty, since everybody took what they needed if kwds: raise TypeError("Unknown arguments: %s." % kwds) - vals = optimizer(func, x0, args=(np.ravel(data),), disp=0) - vals = tuple(vals) + output = optimizer(func, x0, args=(data,), full_output=True) + + # output = optimize.fmin_bfgs(func, vals, args=(data,), full_output=True) + # dfunc = nd.Gradient(func)(vals, data) + # nd.directionaldiff(f, x0, vec) + warnflag = output[-1] + if warnflag == 1: + output = optimizer(func, output[0], args=(data,), full_output=True) + warnflag = output[-1] + vals = tuple(output[0]) + if warnflag == 1: + warnings.warn("The maximum number of iterations was exceeded.") + + elif warnflag == 2: + warnings.warn("Did not converge") + else: + vals = tuple(output[0]) + else: vals = tuple(x0) if restore is not None: @@ -1022,8 +1403,12 @@ class FitDistribution(rv_frozen): def _compute_cov(self): '''Compute covariance ''' + # self._penalty = 0 somefixed = (self.par_fix is not None) and any(isfinite(self.par_fix)) + H = np.asmatrix(self._hessian(self._fitfun, self.par, self.data)) + # H = -nd.Hessian(lambda par: self._fitfun(par, self.data), + # method='forward')(self.par) self.H = H try: if somefixed: @@ -1153,6 +1538,7 @@ class FitDistribution(rv_frozen): plotbackend.subplot(2, 2, 4) self.plotresprb() + plotbackend.subplots_adjust(hspace=0.4, wspace=0.4) fixstr = '' if self.par_fix is not None: numfix = len(self.i_fixed) @@ -1161,12 +1547,16 @@ class FitDistribution(rv_frozen): format1 = ', '.join(['%g'] * numfix) phatistr = format0 % tuple(self.i_fixed) phatvstr = format1 % tuple(self.par[self.i_fixed]) - fixstr = 'Fixed: phat[%s] = %s ' % (phatistr, phatvstr) + fixstr = 'Fixed: phat[{0:s}] = {1:s} '.format(phatistr, + phatvstr) - infostr = 'Fit method: %s, Fit p-value: %2.2f %s' % ( - self.method.upper(), self.pvalue, fixstr) + infostr = 'Fit method: {0:s}, Fit p-value: {1:2.2f} {2:s}, phat=[{3:s}]' + par_txt = ('{:1.2g}, '*len(self.par))[:-2].format(*self.par) try: - plotbackend.figtext(0.05, 0.01, infostr) + plotbackend.figtext(0.05, 0.01, infostr.format(self.method.upper(), + self.pvalue, + fixstr, + par_txt)) except: pass @@ -1339,14 +1729,68 @@ def test1(): import wafo.stats as ws dist = ws.weibull_min # dist = ws.bradford - R = dist.rvs(0.3, size=1000) - phat = FitDistribution(dist, R, method='ml') + dist = ws.gengamma + R = dist.rvs(2,.5, size=500) + phat = FitDistribution(dist, R, method='ml') + phats = FitDistribution(dist, R, method='mps') + import matplotlib.pyplot as plt # Better CI for phat.par[i=0] - Lp1 = Profile(phat, i=0) # @UnusedVariable - Lp1.plot() - import matplotlib.pyplot as plt - plt.show() + plt.figure(0) + N = dist.numargs + 2 + for i in range(N-1, -1, -1): + plt.subplot(N, 1, i+1) + + pmax = None + if i == 0: + pmax = None + Lp1 = Profile(phats, i=i) + j = 0 + while hasattr(Lp1, 'best_par') and j < 7: + phats = FitDistribution(dist, R, args=Lp1.best_par, + method='mps', search=True) + Lp1 = Profile(phats, i=i, pmax=pmax) + + j += 1 + Lp1.plot() + if i != 0: + plt.title('') + if i != N//2: + plt.ylabel('') + plt.subplots_adjust(hspace=0.5) + par_txt = ('{:1.2g}, '*len(phats.par))[:-2].format(*phats.par) + plt.suptitle('phat = [{}]'.format(par_txt)) + + plt.figure(1) + phats.plotfitsummary() + + plt.figure(2) + + for i in range(N-1, -1, -1): + plt.subplot(N, 1, i+1) + pmax = None + if i == 0: + pmax = None + Lp1 = Profile(phat, i=i, pmax=pmax) # @UnusedVariable + j = 0 + while hasattr(Lp1, 'best_par') and j < 7: + phats = FitDistribution(dist, R, args=Lp1.best_par, + method='ml', search=True) + Lp1 = Profile(phat, i=i) # @UnusedVariable + j += 1 + Lp1.plot() + if i != 0: + plt.title('') + if i != N//2: + plt.ylabel('') + par_txt = ('{:1.2g}, '*len(phat.par))[:-2].format(*phat.par) + plt.suptitle('phat = [{}]'.format(par_txt)) + + plt.subplots_adjust(hspace=0.5) + plt.figure(3) + phat.plotfitsummary() + plt.show('hold') + # Lp2 = Profile(phat, i=2) # SF = 1./990 # x = phat.isf(SF)