From 79fa7ab1904bddc51584bb3a98fb9f3d033ad688 Mon Sep 17 00:00:00 2001 From: "Per.Andreas.Brodtkorb" Date: Mon, 21 Feb 2011 18:26:13 +0000 Subject: [PATCH] Updated distributions.py and test_distributions.py according to the latest scipy.stats --- pywafo/src/wafo/stats/distributions.py | 368 +++++++++++++----- .../wafo/stats/tests/test_distributions.py | 141 ++++++- 2 files changed, 410 insertions(+), 99 deletions(-) diff --git a/pywafo/src/wafo/stats/distributions.py b/pywafo/src/wafo/stats/distributions.py index abb57ab..c96db91 100644 --- a/pywafo/src/wafo/stats/distributions.py +++ b/pywafo/src/wafo/stats/distributions.py @@ -1,8 +1,8 @@ # Functions to implement several important functions for # various Continous and Discrete Probability Distributions # -# Author: Travis Oliphant 2002-2010 with contributions from -# SciPy Developers 2004-2010 +# Author: Travis Oliphant 2002-2011 with contributions from +# SciPy Developers 2004-2011 # from __future__ import division @@ -45,6 +45,38 @@ def _moment(data, n, mu=None): mu = data.mean() return ((data - mu)**n).mean() +def _moment_from_stats(n, mu, mu2, g1, g2, moment_func, args): + if (n==0): + return 1.0 + elif (n==1): + if mu is None: + val = moment_func(1,*args) + else: + val = mu + elif (n==2): + if mu2 is None or mu is None: + val = moment_func(2,*args) + else: + val = mu2 + mu*mu + elif (n==3): + if g1 is None or mu2 is None or mu is None: + val = moment_func(3,*args) + else: + mu3 = g1*(mu2**1.5) # 3rd central moment + val = mu3+3*mu*mu2+mu**3 # 3rd non-central moment + elif (n==4): + if g1 is None or g2 is None or mu2 is None or mu is None: + val = moment_func(4,*args) + else: + mu4 = (g2+3.0)*(mu2**2.0) # 4th central moment + mu3 = g1*(mu2**1.5) # 3rd central moment + val = mu4+4*mu*mu3+6*mu*mu*mu2+mu**4 + else: + val = moment_func(n, *args) + + return val + + def _skew(data): data = np.ravel(data) mu = data.mean() @@ -121,18 +153,34 @@ _doc_pdf = \ """pdf(x, %(shapes)s, loc=0, scale=1) Probability density function. """ +_doc_logpdf = \ +"""logpdf(x, %(shapes)s, loc=0, scale=1) + Log of the probability density function. +""" _doc_pmf = \ """pmf(x, %(shapes)s, loc=0, scale=1) Probability mass function. """ +_doc_logpmf = \ +"""logpmf(x, %(shapes)s, loc=0, scale=1) + Log of the probability mass function. +""" _doc_cdf = \ """cdf(x, %(shapes)s, loc=0, scale=1) Cumulative density function. """ +_doc_logcdf = \ +"""logcdf(x, %(shapes)s, loc=0, scale=1) + Log of the cumulative density function. +""" _doc_sf = \ """sf(x, %(shapes)s, loc=0, scale=1) Survival function (1-cdf --- sometimes more accurate). """ +_doc_logsf = \ +"""logsf(x, %(shapes)s, loc=0, scale=1) + Log of the survival function. +""" _doc_ppf = \ """ppf(q, %(shapes)s, loc=0, scale=1) Percent point function (inverse of cdf --- percentiles). @@ -141,6 +189,10 @@ _doc_isf = \ """isf(q, %(shapes)s, loc=0, scale=1) Inverse survival function (inverse of sf). """ +_doc_moment = \ +"""moment(n, %(shapes)s, loc=0, scale=1) + Non-central moment of order n +""" _doc_stats = \ """stats(%(shapes)s, loc=0, scale=1, moments='mv') Mean('m'), variance('v'), skew('s'), and/or kurtosis('k'). @@ -153,9 +205,40 @@ _doc_fit = \ """fit(data, %(shapes)s, loc=0, scale=1) Parameter estimates for generic data. """ +_doc_expect = \ +"""expect(func, %(shapes)s, loc=0, scale=1, lb=None, ub=None, conditional=False, **kwds) + Expected value of a function (of one argument) with respect to the distribution. +""" +_doc_expect_discrete = \ +"""expect(func, %(shapes)s, loc=0, lb=None, ub=None, conditional=False) + Expected value of a function (of one argument) with respect to the distribution. +""" +_doc_median = \ +"""median(%(shapes)s, loc=0, scale=1) + Median of the distribution. +""" +_doc_mean = \ +"""mean(%(shapes)s, loc=0, scale=1) + Mean of the distribution. +""" +_doc_var = \ +"""var(%(shapes)s, loc=0, scale=1) + Variance of the distribution. +""" +_doc_std = \ +"""std(%(shapes)s, loc=0, scale=1) + Standard deviation of the distribution. +""" +_doc_interval = \ +"""interval(alpha, %(shapes)s, loc=0, scale=1) + Endpoints of the range that contains alpha percent of the distribution +""" _doc_allmethods = ''.join([docheaders['methods'], _doc_rvs, _doc_pdf, - _doc_cdf, _doc_sf, _doc_ppf, _doc_isf, - _doc_stats, _doc_entropy, _doc_fit]) + _doc_logpdf, _doc_cdf, _doc_logcdf, _doc_sf, + _doc_logsf, _doc_ppf, _doc_isf, _doc_moment, + _doc_stats, _doc_entropy, _doc_fit, + _doc_expect, _doc_median, + _doc_mean, _doc_var, _doc_std, _doc_interval]) # Note that the two lines for %(shapes) are searched for and replaced in # rv_continuous and rv_discrete - update there if the exact string changes @@ -199,6 +282,7 @@ _doc_default_example = \ """Examples -------- >>> import matplotlib.pyplot as plt +>>> from scipy.stats import %(name)s >>> numargs = %(name)s.numargs >>> [ %(shapes)s ] = [0.9,] * numargs >>> rv = %(name)s(%(shapes)s) @@ -247,13 +331,23 @@ _doc_default_before_notes = ''.join([_doc_default_longsummary, docdict = {'rvs':_doc_rvs, 'pdf':_doc_pdf, + 'logpdf':_doc_logpdf, 'cdf':_doc_cdf, + 'logcdf':_doc_logcdf, 'sf':_doc_sf, + 'logsf':_doc_logsf, 'ppf':_doc_ppf, 'isf':_doc_isf, 'stats':_doc_stats, 'entropy':_doc_entropy, 'fit':_doc_fit, + 'moment':_doc_moment, + 'expect':_doc_expect, + 'interval':_doc_interval, + 'mean':_doc_mean, + 'std':_doc_std, + 'var':_doc_var, + 'median':_doc_median, 'allmethods':_doc_allmethods, 'callparams':_doc_default_callparams, 'longsummary':_doc_default_longsummary, @@ -267,11 +361,15 @@ docdict = {'rvs':_doc_rvs, docdict_discrete = docdict.copy() docdict_discrete['pmf'] = _doc_pmf -_doc_disc_methods = ['rvs', 'pmf', 'cdf', 'sf', 'ppf', 'isf', 'stats', - 'entropy', 'fit'] +docdict_discrete['logpmf'] = _doc_logpmf +docdict_discrete['expect'] = _doc_expect_discrete +_doc_disc_methods = ['rvs', 'pmf', 'logpmf', 'cdf', 'logcdf', 'sf', 'logsf', + 'ppf', 'isf', 'stats', 'entropy', 'fit', 'expect', 'median', + 'mean', 'var', 'std', 'interval'] for obj in _doc_disc_methods: docdict_discrete[obj] = docdict_discrete[obj].replace(', scale=1', '') docdict_discrete.pop('pdf') +docdict_discrete.pop('logpdf') _doc_allmethods = ''.join([docdict_discrete[obj] for obj in _doc_disc_methods]) @@ -362,9 +460,15 @@ class rv_frozen_old(object): def pdf(self, x): #raises AttributeError in frozen discrete distribution return self.dist.pdf(x, *self.args, **self.kwds) + def logpdf(self, x): + return self.dist.logpdf(x, *self.args, **self.kwds) + def cdf(self, x): return self.dist.cdf(x, *self.args, **self.kwds) + def logcdf(self, x): + return self.dist.logcdf(x, *self.args, **self.kwds) + def ppf(self, q): return self.dist.ppf(q, *self.args, **self.kwds) @@ -379,6 +483,9 @@ class rv_frozen_old(object): def sf(self, x): return self.dist.sf(x, *self.args, **self.kwds) + def logsf(self, x): + return self.dist.logsf(x, *self.args, **self.kwds) + def stats(self, moments='mv'): kwds = self.kwds.copy() kwds.update({'moments':moments}) @@ -402,6 +509,9 @@ class rv_frozen_old(object): def pmf(self,k): return self.dist.pmf(k, *self.args, **self.kwds) + def logpmf(self,k): + return self.dist.logpmf(k, *self.args, **self.kwds) + def interval(self, alpha): return self.dist.interval(alpha, *self.args, **self.kwds) @@ -463,9 +573,13 @@ class rv_frozen(object): def pdf(self, x): ''' Probability density function at x of the given RV.''' return self.dist.pdf(x, *self.par) + def logpdf(self, x): + return self.dist.logpdf(x, *self.par) def cdf(self, x): '''Cumulative distribution function at x of the given RV.''' return self.dist.cdf(x, *self.par) + def logcdf(self, x): + return self.dist.logcdf(x, *self.par) def ppf(self, q): '''Percent point function (inverse of cdf) at q of the given RV.''' return self.dist.ppf(q, *self.par) @@ -479,6 +593,8 @@ class rv_frozen(object): def sf(self, x): '''Survival function (1-cdf) at x of the given RV.''' return self.dist.sf(x, *self.par) + def logsf(self, x): + return self.dist.logsf(x, *self.par) def stats(self, moments='mv'): ''' Some statistics of the given RV''' kwds = dict(moments=moments) @@ -492,13 +608,14 @@ class rv_frozen(object): def std(self): return self.dist.std(*self.par) def moment(self, n): - par1 = self.par[:self.dist.numargs] - return self.dist.moment(n, *par1) + return self.dist.moment(n, *self.par) def entropy(self): return self.dist.entropy(*self.par) def pmf(self, k): '''Probability mass function at k of the given RV''' return self.dist.pmf(k, *self.par) + def logpmf(self,k): + return self.dist.logpmf(k, *self.par) def interval(self, alpha): return self.dist.interval(alpha, *self.par) @@ -1072,7 +1189,7 @@ class rv_continuous(rv_generic): Parameters ---------- momtype : int, optional - The type of generic moment calculation to use (check this). + The type of generic moment calculation to use: 0 for pdf, 1 (default) for ppf. a : float, optional Lower bound of the support of the distribution, default is minus infinity. @@ -1099,7 +1216,7 @@ class rv_continuous(rv_generic): The shape of the distribution. For example ``"m, n"`` for a distribution that takes two integers as the two shape arguments for all its methods. - extradoc : str, optional + extradoc : str, optional, deprecated This string is used as the last part of the docstring returned when a subclass has no docstring of its own. Note: `extradoc` exists for backwards compatibility, do not use for new subclasses. @@ -1112,20 +1229,29 @@ class rv_continuous(rv_generic): pdf(x, , loc=0, scale=1) probability density function + logpdf(x, , loc=0, scale=1) + log of the probability density function + cdf(x, , loc=0, scale=1) cumulative density function + logcdf(x, , loc=0, scale=1) + log of the cumulative density function + sf(x, , loc=0, scale=1) survival function (1-cdf --- sometimes more accurate) + logsf(x, , loc=0, scale=1) + log of the survival function + ppf(q, , loc=0, scale=1) percent point function (inverse of cdf --- quantiles) isf(q, , loc=0, scale=1) inverse survival function (inverse of sf) - moments(n, ) - non-central n-th moment of the standard distribution (oc=0, scale=1) + moment(n, , loc=0, scale=1) + non-central n-th moment of the distribution. May not work for array arguments. stats(, loc=0, scale=1, moments='mv') mean('m'), variance('v'), skew('s'), and/or kurtosis('k') @@ -1136,10 +1262,31 @@ class rv_continuous(rv_generic): fit(data, , loc=0, scale=1) Parameter estimates for generic data + expect(func=None, args=(), loc=0, scale=1, lb=None, ub=None, + conditional=False, **kwds) + Expected value of a function with respect to the distribution. + Additional kwd arguments passed to integrate.quad + + median(, loc=0, scale=1) + Median of the distribution. + + mean(, loc=0, scale=1) + Mean of the distribution. + + std(, loc=0, scale=1) + Standard deviation of the distribution. + + var(, loc=0, scale=1) + Variance of the distribution. + + interval(alpha, , loc=0, scale=1) + Interval that with `alpha` percent probability contains a random + realization of this distribution. + __call__(, loc=0, scale=1) - calling a distribution instance creates a frozen RV object with the + Calling a distribution instance creates a frozen RV object with the same methods but holding the given shape, location, and scale fixed. - see Notes section + See Notes section. **Parameters for Methods** @@ -1199,8 +1346,9 @@ class rv_continuous(rv_generic): New random variables can be defined by subclassing rv_continuous class and re-defining at least the - _pdf or the cdf method which will be given clean arguments (in between a - and b) and passing the argument check method + _pdf or the _cdf method (normalized to location 0 and scale 1) + which will be given clean arguments (in between a and b) and + passing the argument check method If postive argument checking is not correct for your RV then you will also need to re-define :: @@ -1210,9 +1358,9 @@ class rv_continuous(rv_generic): Correct, but potentially slow defaults exist for the remaining methods but for speed and/or accuracy you can over-ride :: - _cdf, _ppf, _rvs, _isf, _sf + _logpdf, _cdf, _logcdf, _ppf, _rvs, _isf, _sf, _logsf - Rarely would you override _isf and _sf but you could. + Rarely would you override _isf, _sf, and _logsf but you could. Statistics are computed using numerical integration by default. For speed you can redefine this using @@ -1498,7 +1646,7 @@ class rv_continuous(rv_generic): """ Log of the probability density function at x of the given RV. - This uses more numerically accurate calculation if available. + This uses a more numerically accurate calculation if available. Parameters ---------- @@ -1804,7 +1952,7 @@ class rv_continuous(rv_generic): proxy_value = self.a * scale + loc if product(shape(proxy_value)) != 1: - proxy_value = extract(cond2, proxy_value * cond2) + proxy_value = extract(cond2, proxy_value * cond2) place(output, cond2, proxy_value) @@ -1943,7 +2091,7 @@ class rv_continuous(rv_generic): else: return tuple(output) - def moment(self, n, *args): + def moment(self, n, *args, **kwds): """ n'th order non-central moment of distribution @@ -1952,17 +2100,24 @@ class rv_continuous(rv_generic): n: int, n>=1 order of moment - arg1, arg2, arg3,... : array-like + arg1, arg2, arg3,... : float The shape parameter(s) for the distribution (see docstring of the instance object for more information) + loc : float, optional + location parameter (default=0) + scale : float, optional + scale parameter (default=1) + """ - if not self._argcheck(*args): + loc, scale = map(kwds.get,['loc','scale']) + args, loc, scale = self.fix_loc_scale(args, loc, scale) + if not (self._argcheck(*args) and (scale > 0)): return nan if (floor(n) != n): raise ValueError("Moment must be an integer.") if (n < 0): raise ValueError("Moment must be positive.") - if (n == 0): return 1.0 + mu, mu2, g1, g2 = None, None, None, None if (n > 0) and (n < 5): signature = inspect.getargspec(self._stats.im_func) if (signature[2] is not None) or ('moments' in signature[0]): @@ -1970,28 +2125,20 @@ class rv_continuous(rv_generic): else: mdict = {} mu, mu2, g1, g2 = self._stats(*args,**mdict) - if (n==1): - if mu is None: return self._munp(1,*args) - else: return mu - elif (n==2): - if mu2 is None or mu is None: - return self._munp(2,*args) - else: return mu2 + mu*mu - elif (n==3): - if g1 is None or mu2 is None: - return self._munp(3,*args) - else: - mu3 = g1*(mu2**1.5) # 3rd central moment - return mu3+3*mu*mu2+mu**3 # 3rd non-central moment - else: # (n==4) - if g2 is None or mu2 is None: - return self._munp(4,*args) - else: - mu4 = (g2+3.0)*(mu2**2.0) # 4th central moment - mu3 = g1*(mu2**1.5) # 3rd central moment - return mu4+4*mu*mu3+6*mu*mu*mu2+mu**4 + val = _moment_from_stats(n, mu, mu2, g1, g2, self._munp, args) + + # Convert to transformed X = L + S*Y + # so E[X^n] = E[(L+S*Y)^n] = L^n sum(comb(n,k)*(S/L)^k E[Y^k],k=0...n) + if loc == 0: + return scale**n * val else: - return self._munp(n,*args) + result = 0 + fac = float(scale) / float(loc) + for k in range(n): + valk = _moment_from_stats(k, mu, mu2, g1, g2, self._munp, args) + result += comb(n,k,exact=True)*(fac**k) * valk + result += fac**n * val + return result * loc**n def nlogps(self, theta, x): """ Moran's negative log Product Spacings statistic @@ -2106,7 +2253,7 @@ class rv_continuous(rv_generic): return inf else: N = len(x) - return self._nnlf(x, *args) + N * log(scale) + return self._nnlf(x, *args) + N*log(scale) def hessian_nlogps(self, theta, data, eps=None): ''' approximate hessian of nlogps where theta are the parameters (including loc and scale) ''' @@ -2230,6 +2377,8 @@ class rv_continuous(rv_generic): args = (1.0,)*self.numargs return args + self.fit_loc_scale(data, *args) + # Return the (possibly reduced) function to optimize in order to find MLE + # estimates for the .fit method def _reduce_func(self, args, kwds): args = list(args) Nargs = len(args) @@ -2267,7 +2416,7 @@ class rv_continuous(rv_generic): def func(theta, x): newtheta = restore(args[:], theta) - return fitfun(newtheta, x) + return self.nnlf(newtheta, x) return x0, func, restore, args @@ -2281,9 +2430,10 @@ class rv_continuous(rv_generic): with starting estimates, ``self._fitstart(data)`` is called to generate such. - One can hold some parameters fixed to specific values by passing in - keyword arguments f0..fn for shape paramters and floc, fscale for - location and scale parameters, respectively. + One can hold some parameters fixed to specific values by passing in + keyword arguments ``f0``, ``f1``, ..., ``fn`` (for shape parameters) + and ``floc`` and ``fscale`` (for location and scale parameters, + respectively). Parameters ---------- @@ -2298,7 +2448,7 @@ class rv_continuous(rv_generic): Special keyword arguments are recognized as holding certain parameters fixed: - f0..fn : hold respective shape parameters fixed. + f0...fn : hold respective shape parameters fixed. floc : hold location parameter fixed to specified value. @@ -2497,6 +2647,9 @@ class rv_continuous(rv_generic): of the integration interval. The return value is the expectation of the function, conditional on being in the given interval. + Additional keyword arguments are passed to the integration routine. + + Returns ------- expected value : float @@ -2506,19 +2659,21 @@ class rv_continuous(rv_generic): This function has not been checked for it's behavior when the integral is not finite. The integration behavior is inherited from integrate.quad. """ + lockwds = {'loc': loc, + 'scale':scale} if func is None: def fun(x, *args): - return x*self.pdf(x, *args, **{'loc':loc, 'scale':scale}) + return x*self.pdf(x, *args, **lockwds) else: def fun(x, *args): - return func(x)*self.pdf(x, *args, **{'loc':loc, 'scale':scale}) + return func(x)*self.pdf(x, *args, **lockwds) if lb is None: lb = loc + self.a * scale if ub is None: ub = loc + self.b * scale if conditional: - invfac = (self.sf(lb, *args, **{'loc':loc, 'scale':scale}) - - self.sf(ub, *args, **{'loc':loc, 'scale':scale})) + invfac = (self.sf(lb, *args, **lockwds) + - self.sf(ub, *args, **lockwds)) else: invfac = 1.0 kwds['args'] = args @@ -3968,7 +4123,7 @@ class gumbel_l_gen(rv_continuous): def _logpdf(self, x): return x - exp(x) def _cdf(self, x): - return expm1(-exp(x)) + return -expm1(-exp(x)) def _ppf(self, q): return log(-log1p(-q)) def _stats(self): @@ -4990,8 +5145,6 @@ for -1 <= x <= 1, c > 0. # scale is the mode. class rayleigh_gen(rv_continuous): - #rayleigh_gen.link.__doc__ = rv_continuous.link.__doc__ - def link(self, x, logSF, phat, ix): rv_continuous.link.__doc__ if ix == 1: @@ -5181,16 +5334,16 @@ class truncexpon_gen(rv_continuous): #wrong answer with formula, same as in continuous.pdf #return gam(n+1)-special.gammainc(1+n,b) if n == 1: - return (1 - (b + 1) * exp(-b)) / (-expm1(-b)) + return (1-(b+1)*exp(-b))/(-expm1(-b)) elif n == 2: - return 2 * (1 - 0.5 * (b * b + 2 * b + 2) * exp(-b)) / (-expm1(-b)) + return 2*(1-0.5*(b*b+2*b+2)*exp(-b))/(-expm1(-b)) else: #return generic for higher moments #return rv_continuous._mom1_sc(self,n, b) return self._mom1_sc(n, b) def _entropy(self, b): eB = exp(b) - return log(eB - 1) + (1 + eB * (b - 1.0)) / (1.0 - eB) + return log(eB-1)+(1+eB*(b-1.0))/(1.0-eB) truncexpon = truncexpon_gen(a=0.0, name='truncexpon', longname="A truncated exponential", shapes="b", extradoc=""" @@ -5222,13 +5375,13 @@ class truncnorm_gen(rv_continuous): def _cdf(self, x, a, b): return (_norm_cdf(x) - self._na) / self._delta def _ppf(self, q, a, b): - return norm._ppf(q * self._nb + self._na * (1.0 - q)) + return norm._ppf(q*self._nb + self._na*(1.0-q)) def _stats(self, a, b): nA, nB = self._na, self._nb d = nB - nA pA, pB = _norm_pdf(a), _norm_pdf(b) mu = (pA - pB) / d #correction sign - mu2 = 1 + (a * pA - b * pB) / d - mu * mu + mu2 = 1 + (a*pA - b*pB) / d - mu*mu return mu, mu2, None, None truncnorm = truncnorm_gen(name='truncnorm', longname="A truncated normal", shapes="a, b", extradoc=""" @@ -5251,11 +5404,14 @@ Truncated Normal distribution. # FIXME: RVS does not work. class tukeylambda_gen(rv_continuous): + def _argcheck(self, lam): + # lam in RR. + return np.ones(np.shape(lam), dtype=bool) def _pdf(self, x, lam): Fx = arr(special.tklmbda(x,lam)) Px = Fx**(lam-1.0) + (arr(1-Fx))**(lam-1.0) Px = 1.0/arr(Px) - return where((lam > 0) & (abs(x) < 1.0/lam), Px, 0.0) + return where((lam <= 0) | (abs(x) < 1.0/arr(lam)), Px, 0.0) def _cdf(self, x, lam): return special.tklmbda(x, lam) def _ppf(self, q, lam): @@ -5638,24 +5794,59 @@ class rv_discrete(rv_generic): generic.pmf(x, , loc=0) probability mass function + logpmf(x, , loc=0) + log of the probability density function + generic.cdf(x, , loc=0) cumulative density function + generic.logcdf(x, , loc=0) + log of the cumulative density function + generic.sf(x, , loc=0) survival function (1-cdf --- sometimes more accurate) + generic.logsf(x, , loc=0, scale=1) + log of the survival function + generic.ppf(q, , loc=0) percent point function (inverse of cdf --- percentiles) generic.isf(q, , loc=0) inverse survival function (inverse of sf) + generic.moment(n, , loc=0) + non-central n-th moment of the distribution. May not work for array arguments. + generic.stats(, loc=0, moments='mv') mean('m', axis=0), variance('v'), skew('s'), and/or kurtosis('k') generic.entropy(, loc=0) entropy of the RV + generic.fit(data, , loc=0) + Parameter estimates for generic data + + generic.expect(func=None, args=(), loc=0, lb=None, ub=None, conditional=False) + Expected value of a function with respect to the distribution. + Additional kwd arguments passed to integrate.quad + + generic.median(, loc=0) + Median of the distribution. + + generic.mean(, loc=0) + Mean of the distribution. + + generic.std(, loc=0) + Standard deviation of the distribution. + + generic.var(, loc=0) + Variance of the distribution. + + generic.interval(alpha, , loc=0) + Interval that with `alpha` percent probability contains a random + realization of this distribution. + generic(, loc=0) calling a distribution instance returns a frozen distribution @@ -6355,17 +6546,24 @@ class rv_discrete(rv_generic): ---------- n: int, n>=1 order of moment - arg1, arg2, arg3,...: array-like + arg1, arg2, arg3,...: float The shape parameter(s) for the distribution (see docstring of the instance object for more information) + loc : float, optional + location parameter (default=0) + scale : float, optional + scale parameter (default=1) + """ - if not self._argcheck(*args): + loc = kwds.get('loc', 0) + scale = kwds.get('scale', 1) + if not (self._argcheck(*args) and (scale > 0)): return nan if (floor(n) != n): raise ValueError("Moment must be an integer.") if (n < 0): raise ValueError("Moment must be positive.") - if (n == 0): return 1.0 + mu, mu2, g1, g2 = None, None, None, None if (n > 0) and (n < 5): signature = inspect.getargspec(self._stats.im_func) if (signature[2] is not None) or ('moments' in signature[0]): @@ -6373,27 +6571,21 @@ class rv_discrete(rv_generic): else: dict = {} mu, mu2, g1, g2 = self._stats(*args,**dict) - if (n==1): - if mu is None: return self._munp(1,*args) - else: return mu - elif (n==2): - if mu2 is None or mu is None: return self._munp(2,*args) - else: return mu2 + mu*mu - elif (n==3): - if (g1 is None) or (mu2 is None) or (mu is None): - return self._munp(3,*args) - else: - mu3 = g1*(mu2**1.5) # 3rd central moment - return mu3+3*mu*mu2+mu**3 # 3rd non-central moment - else: # (n==4) - if (g2 is None) or (g1 is None) or (mu is None) or (mu2 is None): - return self._munp(4,*args) - else: - mu4 = (g2+3.0)*(mu2**2.0) # 4th central moment - mu3 = g1*(mu2**1.5) # 3rd central moment - return mu4+4*mu*mu3+6*mu*mu*mu2+mu**4 + val = _moment_from_stats(n, mu, mu2, g1, g2, self._munp, args) + + # Convert to transformed X = L + S*Y + # so E[X^n] = E[(L+S*Y)^n] = L^n sum(comb(n,k)*(S/L)^k E[Y^k],k=0...n) + if loc == 0: + return scale**n * val else: - return self._munp(n,*args) + result = 0 + fac = float(scale) / float(loc) + for k in range(n): + valk = _moment_from_stats(k, mu, mu2, g1, g2, self._munp, args) + result += comb(n,k,exact=True)*(fac**k) * valk + result += fac**n * val + return result * loc**n + def freeze(self, *args, **kwds): return rv_frozen(self, *args, **kwds) @@ -6440,7 +6632,7 @@ class rv_discrete(rv_generic): Parameters ---------- fn : function (default: identity mapping) - Function for which integral is calculated. Takes only one argument. + Function for which sum is calculated. Takes only one argument. args : tuple argument (parameters) of the distribution optional keyword parameters diff --git a/pywafo/src/wafo/stats/tests/test_distributions.py b/pywafo/src/wafo/stats/tests/test_distributions.py index 1df98ef..13ef9b6 100644 --- a/pywafo/src/wafo/stats/tests/test_distributions.py +++ b/pywafo/src/wafo/stats/tests/test_distributions.py @@ -14,7 +14,7 @@ import wafo.stats as stats from wafo.stats.distributions import argsreduce def kolmogorov_check(diststr, args=(), N=20, significance=0.01): - qtest = stats.ksoneisf(significance, N) + qtest = stats.ksone.isf(significance, N) cdf = eval('stats.'+diststr+'.cdf') dist = eval('stats.'+diststr) # Get random numbers @@ -242,6 +242,10 @@ class TestDLaplace(TestCase): assert_(isinstance(val, numpy.ndarray)) assert_(val.dtype.char in typecodes['AllInteger']) +def test_rvgeneric_std(): + """Regression test for #1191""" + assert_array_almost_equal(stats.t.std([5, 6]), [1.29099445, 1.22474487]) + class TestRvDiscrete(TestCase): def test_rvs(self): states = [-1,0,1,2,3,4] @@ -260,7 +264,7 @@ class TestRvDiscrete(TestCase): class TestExpon(TestCase): def test_zero(self): assert_equal(stats.expon.pdf(0),1) - + def test_tail(self): # Regression test for ticket 807 assert_equal(stats.expon.cdf(1e-18), 1e-18) assert_equal(stats.expon.isf(stats.expon.sf(40)), 40) @@ -277,7 +281,7 @@ class TestGenExpon(TestCase): # CDF should always be positive cdf = stats.genexpon.cdf(numpy.arange(0, 10, 0.01), 0.5, 0.5, 2.0) assert_(numpy.all((0 <= cdf) & (cdf <= 1))) - + class TestExponpow(TestCase): def test_tail(self): assert_almost_equal(stats.exponpow.cdf(1e-10, 2.), 1e-20) @@ -303,9 +307,9 @@ class TestSkellam(TestCase): 4.3268692953013159e-002, 3.0248159818374226e-002, 1.9991434305603021e-002, 1.2516877303301180e-002, 7.4389876226229707e-003]) - + assert_almost_equal(stats.skellam.pmf(k, mu1, mu2), skpmfR, decimal=15) - + def test_cdf(self): #comparison to R, only 5 decimals k = numpy.arange(-10, 15) @@ -328,7 +332,7 @@ class TestSkellam(TestCase): assert_almost_equal(stats.skellam.cdf(k, mu1, mu2), skcdfR, decimal=5) -class TestHypergeom(TestCase): +class TestHypergeom2(TestCase): def test_precision(self): # comparison number from mpmath M = 2500 @@ -407,7 +411,7 @@ class TestFitMethod(TestCase): else: assert_(len(vals) == 2+len(args)) assert_(len(vals2)==2+len(args)) - + @dec.slow def test_fix_fit(self): for func, dist, args, alpha in test_all_distributions(): @@ -425,15 +429,15 @@ class TestFitMethod(TestCase): assert_(len(vals2) == 2+len(args)) if len(args) > 0: vals3 = distfunc.fit(res, f0=args[0]) - assert_(len(vals3) == 2+len(args)) + assert_(len(vals3) == 2+len(args)) assert_(vals3[0] == args[0]) if len(args) > 1: vals4 = distfunc.fit(res, f1=args[1]) - assert_(len(vals4) == 2+len(args)) + assert_(len(vals4) == 2+len(args)) assert_(vals4[1] == args[1]) if len(args) > 2: vals5 = distfunc.fit(res, f2=args[2]) - assert_(len(vals5) == 2+len(args)) + assert_(len(vals5) == 2+len(args)) assert_(vals5[2] == args[2]) class TestFrozen(TestCase): @@ -487,7 +491,7 @@ class TestFrozen(TestCase): assert_equal(result_f, result) result_f = frozen.moment(2) - result = dist.moment(2) + result = dist.moment(2,loc=10.0, scale=3.0) assert_equal(result_f, result) def test_gamma(self): @@ -555,6 +559,100 @@ class TestFrozen(TestCase): # the focus of this test. assert_equal(m1, m2) +class TestExpect(TestCase): + """Test for expect method. + + Uses normal distribution and beta distribution for finite bounds, and + hypergeom for discrete distribution with finite support + + """ + def test_norm(self): + v = stats.norm.expect(lambda x: (x-5)*(x-5), loc=5, scale=2) + assert_almost_equal(v, 4, decimal=14) + + m = stats.norm.expect(lambda x: (x), loc=5, scale=2) + assert_almost_equal(m, 5, decimal=14) + + lb = stats.norm.ppf(0.05, loc=5, scale=2) + ub = stats.norm.ppf(0.95, loc=5, scale=2) + prob90 = stats.norm.expect(lambda x: 1, loc=5, scale=2, lb=lb, ub=ub) + assert_almost_equal(prob90, 0.9, decimal=14) + + prob90c = stats.norm.expect(lambda x: 1, loc=5, scale=2, lb=lb, ub=ub, + conditional=True) + assert_almost_equal(prob90c, 1., decimal=14) + + def test_beta(self): + #case with finite support interval +## >>> mtrue, vtrue = stats.beta.stats(10,5, loc=5., scale=2.) +## >>> mtrue, vtrue +## (array(6.333333333333333), array(0.055555555555555552)) + v = stats.beta.expect(lambda x: (x-19/3.)*(x-19/3.), args=(10,5), + loc=5, scale=2) + assert_almost_equal(v, 1./18., decimal=14) + + m = stats.beta.expect(lambda x: x, args=(10,5), loc=5., scale=2.) + assert_almost_equal(m, 19/3., decimal=14) + + ub = stats.beta.ppf(0.95, 10, 10, loc=5, scale=2) + lb = stats.beta.ppf(0.05, 10, 10, loc=5, scale=2) + prob90 = stats.beta.expect(lambda x: 1., args=(10,10), loc=5., + scale=2.,lb=lb, ub=ub, conditional=False) + assert_almost_equal(prob90, 0.9, decimal=14) + + prob90c = stats.beta.expect(lambda x: 1, args=(10,10), loc=5, + scale=2, lb=lb, ub=ub, conditional=True) + assert_almost_equal(prob90c, 1., decimal=14) + + + def test_hypergeom(self): + #test case with finite bounds + + #without specifying bounds + m_true, v_true = stats.hypergeom.stats(20, 10, 8, loc=5.) + m = stats.hypergeom.expect(lambda x: x, args=(20, 10, 8), loc=5.) + assert_almost_equal(m, m_true, decimal=13) + + v = stats.hypergeom.expect(lambda x: (x-9.)**2, args=(20, 10, 8), + loc=5.) + assert_almost_equal(v, v_true, decimal=14) + + #with bounds, bounds equal to shifted support + v_bounds = stats.hypergeom.expect(lambda x: (x-9.)**2, args=(20, 10, 8), + loc=5., lb=5, ub=13) + assert_almost_equal(v_bounds, v_true, decimal=14) + + #drop boundary points + prob_true = 1-stats.hypergeom.pmf([5, 13], 20, 10, 8, loc=5).sum() + prob_bounds = stats.hypergeom.expect(lambda x: 1, args=(20, 10, 8), + loc=5., lb=6, ub=12) + assert_almost_equal(prob_bounds, prob_true, decimal=13) + + #conditional + prob_bc = stats.hypergeom.expect(lambda x: 1, args=(20, 10, 8), loc=5., + lb=6, ub=12, conditional=True) + assert_almost_equal(prob_bc, 1, decimal=14) + + #check simple integral + prob_b = stats.hypergeom.expect(lambda x: 1, args=(20, 10, 8), + lb=0, ub=8) + assert_almost_equal(prob_b, 1, decimal=13) + + def test_poisson(self): + #poisson, use lower bound only + prob_bounds = stats.poisson.expect(lambda x: 1, args=(2,), lb=3, + conditional=False) + prob_b_true = 1-stats.poisson.cdf(2,2) + assert_almost_equal(prob_bounds, prob_b_true, decimal=14) + + + prob_lb = stats.poisson.expect(lambda x: 1, args=(2,), lb=2, + conditional=True) + assert_almost_equal(prob_lb, 1, decimal=14) + + + + def test_regression_ticket_1316(): """Regression test for ticket #1316.""" @@ -563,5 +661,26 @@ def test_regression_ticket_1316(): g = stats.distributions.gamma_gen(name='gamma') +def test_regression_ticket_1326(): + """Regression test for ticket #1326.""" + #adjust to avoid nan with 0*log(0) + assert_almost_equal(stats.chi2.pdf(0.0, 2), 0.5, 14) + +def test_regression_tukey_lambda(): + """ Make sure that Tukey-Lambda distribution correctly handles non-positive lambdas. + """ + x = np.linspace(-5.0, 5.0, 101) + for lam in [0.0, -1.0, -2.0, np.array([[-1.0], [0.0], [-2.0]])]: + p = stats.tukeylambda.pdf(x, lam) + assert_((p != 0.0).all()) + assert_(~np.isnan(p).all()) + lam = np.array([[-1.0], [0.0], [2.0]]) + p = stats.tukeylambda.pdf(x, lam) + assert_(~np.isnan(p).all()) + assert_((p[0] != 0.0).all()) + assert_((p[1] != 0.0).all()) + assert_((p[2] != 0.0).any()) + assert_((p[2] == 0.0).any()) + if __name__ == "__main__": run_module_suite()