Updated distributions.py and test_distributions.py according to the latest scipy.stats

master
Per.Andreas.Brodtkorb 14 years ago
parent 8a0004ba59
commit 79fa7ab190

@ -1,8 +1,8 @@
# Functions to implement several important functions for # Functions to implement several important functions for
# various Continous and Discrete Probability Distributions # various Continous and Discrete Probability Distributions
# #
# Author: Travis Oliphant 2002-2010 with contributions from # Author: Travis Oliphant 2002-2011 with contributions from
# SciPy Developers 2004-2010 # SciPy Developers 2004-2011
# #
from __future__ import division from __future__ import division
@ -45,6 +45,38 @@ def _moment(data, n, mu=None):
mu = data.mean() mu = data.mean()
return ((data - mu)**n).mean() return ((data - mu)**n).mean()
def _moment_from_stats(n, mu, mu2, g1, g2, moment_func, args):
if (n==0):
return 1.0
elif (n==1):
if mu is None:
val = moment_func(1,*args)
else:
val = mu
elif (n==2):
if mu2 is None or mu is None:
val = moment_func(2,*args)
else:
val = mu2 + mu*mu
elif (n==3):
if g1 is None or mu2 is None or mu is None:
val = moment_func(3,*args)
else:
mu3 = g1*(mu2**1.5) # 3rd central moment
val = mu3+3*mu*mu2+mu**3 # 3rd non-central moment
elif (n==4):
if g1 is None or g2 is None or mu2 is None or mu is None:
val = moment_func(4,*args)
else:
mu4 = (g2+3.0)*(mu2**2.0) # 4th central moment
mu3 = g1*(mu2**1.5) # 3rd central moment
val = mu4+4*mu*mu3+6*mu*mu*mu2+mu**4
else:
val = moment_func(n, *args)
return val
def _skew(data): def _skew(data):
data = np.ravel(data) data = np.ravel(data)
mu = data.mean() mu = data.mean()
@ -121,18 +153,34 @@ _doc_pdf = \
"""pdf(x, %(shapes)s, loc=0, scale=1) """pdf(x, %(shapes)s, loc=0, scale=1)
Probability density function. Probability density function.
""" """
_doc_logpdf = \
"""logpdf(x, %(shapes)s, loc=0, scale=1)
Log of the probability density function.
"""
_doc_pmf = \ _doc_pmf = \
"""pmf(x, %(shapes)s, loc=0, scale=1) """pmf(x, %(shapes)s, loc=0, scale=1)
Probability mass function. Probability mass function.
""" """
_doc_logpmf = \
"""logpmf(x, %(shapes)s, loc=0, scale=1)
Log of the probability mass function.
"""
_doc_cdf = \ _doc_cdf = \
"""cdf(x, %(shapes)s, loc=0, scale=1) """cdf(x, %(shapes)s, loc=0, scale=1)
Cumulative density function. Cumulative density function.
""" """
_doc_logcdf = \
"""logcdf(x, %(shapes)s, loc=0, scale=1)
Log of the cumulative density function.
"""
_doc_sf = \ _doc_sf = \
"""sf(x, %(shapes)s, loc=0, scale=1) """sf(x, %(shapes)s, loc=0, scale=1)
Survival function (1-cdf --- sometimes more accurate). Survival function (1-cdf --- sometimes more accurate).
""" """
_doc_logsf = \
"""logsf(x, %(shapes)s, loc=0, scale=1)
Log of the survival function.
"""
_doc_ppf = \ _doc_ppf = \
"""ppf(q, %(shapes)s, loc=0, scale=1) """ppf(q, %(shapes)s, loc=0, scale=1)
Percent point function (inverse of cdf --- percentiles). Percent point function (inverse of cdf --- percentiles).
@ -141,6 +189,10 @@ _doc_isf = \
"""isf(q, %(shapes)s, loc=0, scale=1) """isf(q, %(shapes)s, loc=0, scale=1)
Inverse survival function (inverse of sf). Inverse survival function (inverse of sf).
""" """
_doc_moment = \
"""moment(n, %(shapes)s, loc=0, scale=1)
Non-central moment of order n
"""
_doc_stats = \ _doc_stats = \
"""stats(%(shapes)s, loc=0, scale=1, moments='mv') """stats(%(shapes)s, loc=0, scale=1, moments='mv')
Mean('m'), variance('v'), skew('s'), and/or kurtosis('k'). Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
@ -153,9 +205,40 @@ _doc_fit = \
"""fit(data, %(shapes)s, loc=0, scale=1) """fit(data, %(shapes)s, loc=0, scale=1)
Parameter estimates for generic data. Parameter estimates for generic data.
""" """
_doc_expect = \
"""expect(func, %(shapes)s, loc=0, scale=1, lb=None, ub=None, conditional=False, **kwds)
Expected value of a function (of one argument) with respect to the distribution.
"""
_doc_expect_discrete = \
"""expect(func, %(shapes)s, loc=0, lb=None, ub=None, conditional=False)
Expected value of a function (of one argument) with respect to the distribution.
"""
_doc_median = \
"""median(%(shapes)s, loc=0, scale=1)
Median of the distribution.
"""
_doc_mean = \
"""mean(%(shapes)s, loc=0, scale=1)
Mean of the distribution.
"""
_doc_var = \
"""var(%(shapes)s, loc=0, scale=1)
Variance of the distribution.
"""
_doc_std = \
"""std(%(shapes)s, loc=0, scale=1)
Standard deviation of the distribution.
"""
_doc_interval = \
"""interval(alpha, %(shapes)s, loc=0, scale=1)
Endpoints of the range that contains alpha percent of the distribution
"""
_doc_allmethods = ''.join([docheaders['methods'], _doc_rvs, _doc_pdf, _doc_allmethods = ''.join([docheaders['methods'], _doc_rvs, _doc_pdf,
_doc_cdf, _doc_sf, _doc_ppf, _doc_isf, _doc_logpdf, _doc_cdf, _doc_logcdf, _doc_sf,
_doc_stats, _doc_entropy, _doc_fit]) _doc_logsf, _doc_ppf, _doc_isf, _doc_moment,
_doc_stats, _doc_entropy, _doc_fit,
_doc_expect, _doc_median,
_doc_mean, _doc_var, _doc_std, _doc_interval])
# Note that the two lines for %(shapes) are searched for and replaced in # Note that the two lines for %(shapes) are searched for and replaced in
# rv_continuous and rv_discrete - update there if the exact string changes # rv_continuous and rv_discrete - update there if the exact string changes
@ -199,6 +282,7 @@ _doc_default_example = \
"""Examples """Examples
-------- --------
>>> import matplotlib.pyplot as plt >>> import matplotlib.pyplot as plt
>>> from scipy.stats import %(name)s
>>> numargs = %(name)s.numargs >>> numargs = %(name)s.numargs
>>> [ %(shapes)s ] = [0.9,] * numargs >>> [ %(shapes)s ] = [0.9,] * numargs
>>> rv = %(name)s(%(shapes)s) >>> rv = %(name)s(%(shapes)s)
@ -247,13 +331,23 @@ _doc_default_before_notes = ''.join([_doc_default_longsummary,
docdict = {'rvs':_doc_rvs, docdict = {'rvs':_doc_rvs,
'pdf':_doc_pdf, 'pdf':_doc_pdf,
'logpdf':_doc_logpdf,
'cdf':_doc_cdf, 'cdf':_doc_cdf,
'logcdf':_doc_logcdf,
'sf':_doc_sf, 'sf':_doc_sf,
'logsf':_doc_logsf,
'ppf':_doc_ppf, 'ppf':_doc_ppf,
'isf':_doc_isf, 'isf':_doc_isf,
'stats':_doc_stats, 'stats':_doc_stats,
'entropy':_doc_entropy, 'entropy':_doc_entropy,
'fit':_doc_fit, 'fit':_doc_fit,
'moment':_doc_moment,
'expect':_doc_expect,
'interval':_doc_interval,
'mean':_doc_mean,
'std':_doc_std,
'var':_doc_var,
'median':_doc_median,
'allmethods':_doc_allmethods, 'allmethods':_doc_allmethods,
'callparams':_doc_default_callparams, 'callparams':_doc_default_callparams,
'longsummary':_doc_default_longsummary, 'longsummary':_doc_default_longsummary,
@ -267,11 +361,15 @@ docdict = {'rvs':_doc_rvs,
docdict_discrete = docdict.copy() docdict_discrete = docdict.copy()
docdict_discrete['pmf'] = _doc_pmf docdict_discrete['pmf'] = _doc_pmf
_doc_disc_methods = ['rvs', 'pmf', 'cdf', 'sf', 'ppf', 'isf', 'stats', docdict_discrete['logpmf'] = _doc_logpmf
'entropy', 'fit'] docdict_discrete['expect'] = _doc_expect_discrete
_doc_disc_methods = ['rvs', 'pmf', 'logpmf', 'cdf', 'logcdf', 'sf', 'logsf',
'ppf', 'isf', 'stats', 'entropy', 'fit', 'expect', 'median',
'mean', 'var', 'std', 'interval']
for obj in _doc_disc_methods: for obj in _doc_disc_methods:
docdict_discrete[obj] = docdict_discrete[obj].replace(', scale=1', '') docdict_discrete[obj] = docdict_discrete[obj].replace(', scale=1', '')
docdict_discrete.pop('pdf') docdict_discrete.pop('pdf')
docdict_discrete.pop('logpdf')
_doc_allmethods = ''.join([docdict_discrete[obj] for obj in _doc_allmethods = ''.join([docdict_discrete[obj] for obj in
_doc_disc_methods]) _doc_disc_methods])
@ -362,9 +460,15 @@ class rv_frozen_old(object):
def pdf(self, x): #raises AttributeError in frozen discrete distribution def pdf(self, x): #raises AttributeError in frozen discrete distribution
return self.dist.pdf(x, *self.args, **self.kwds) return self.dist.pdf(x, *self.args, **self.kwds)
def logpdf(self, x):
return self.dist.logpdf(x, *self.args, **self.kwds)
def cdf(self, x): def cdf(self, x):
return self.dist.cdf(x, *self.args, **self.kwds) return self.dist.cdf(x, *self.args, **self.kwds)
def logcdf(self, x):
return self.dist.logcdf(x, *self.args, **self.kwds)
def ppf(self, q): def ppf(self, q):
return self.dist.ppf(q, *self.args, **self.kwds) return self.dist.ppf(q, *self.args, **self.kwds)
@ -379,6 +483,9 @@ class rv_frozen_old(object):
def sf(self, x): def sf(self, x):
return self.dist.sf(x, *self.args, **self.kwds) return self.dist.sf(x, *self.args, **self.kwds)
def logsf(self, x):
return self.dist.logsf(x, *self.args, **self.kwds)
def stats(self, moments='mv'): def stats(self, moments='mv'):
kwds = self.kwds.copy() kwds = self.kwds.copy()
kwds.update({'moments':moments}) kwds.update({'moments':moments})
@ -402,6 +509,9 @@ class rv_frozen_old(object):
def pmf(self,k): def pmf(self,k):
return self.dist.pmf(k, *self.args, **self.kwds) return self.dist.pmf(k, *self.args, **self.kwds)
def logpmf(self,k):
return self.dist.logpmf(k, *self.args, **self.kwds)
def interval(self, alpha): def interval(self, alpha):
return self.dist.interval(alpha, *self.args, **self.kwds) return self.dist.interval(alpha, *self.args, **self.kwds)
@ -463,9 +573,13 @@ class rv_frozen(object):
def pdf(self, x): def pdf(self, x):
''' Probability density function at x of the given RV.''' ''' Probability density function at x of the given RV.'''
return self.dist.pdf(x, *self.par) return self.dist.pdf(x, *self.par)
def logpdf(self, x):
return self.dist.logpdf(x, *self.par)
def cdf(self, x): def cdf(self, x):
'''Cumulative distribution function at x of the given RV.''' '''Cumulative distribution function at x of the given RV.'''
return self.dist.cdf(x, *self.par) return self.dist.cdf(x, *self.par)
def logcdf(self, x):
return self.dist.logcdf(x, *self.par)
def ppf(self, q): def ppf(self, q):
'''Percent point function (inverse of cdf) at q of the given RV.''' '''Percent point function (inverse of cdf) at q of the given RV.'''
return self.dist.ppf(q, *self.par) return self.dist.ppf(q, *self.par)
@ -479,6 +593,8 @@ class rv_frozen(object):
def sf(self, x): def sf(self, x):
'''Survival function (1-cdf) at x of the given RV.''' '''Survival function (1-cdf) at x of the given RV.'''
return self.dist.sf(x, *self.par) return self.dist.sf(x, *self.par)
def logsf(self, x):
return self.dist.logsf(x, *self.par)
def stats(self, moments='mv'): def stats(self, moments='mv'):
''' Some statistics of the given RV''' ''' Some statistics of the given RV'''
kwds = dict(moments=moments) kwds = dict(moments=moments)
@ -492,13 +608,14 @@ class rv_frozen(object):
def std(self): def std(self):
return self.dist.std(*self.par) return self.dist.std(*self.par)
def moment(self, n): def moment(self, n):
par1 = self.par[:self.dist.numargs] return self.dist.moment(n, *self.par)
return self.dist.moment(n, *par1)
def entropy(self): def entropy(self):
return self.dist.entropy(*self.par) return self.dist.entropy(*self.par)
def pmf(self, k): def pmf(self, k):
'''Probability mass function at k of the given RV''' '''Probability mass function at k of the given RV'''
return self.dist.pmf(k, *self.par) return self.dist.pmf(k, *self.par)
def logpmf(self,k):
return self.dist.logpmf(k, *self.par)
def interval(self, alpha): def interval(self, alpha):
return self.dist.interval(alpha, *self.par) return self.dist.interval(alpha, *self.par)
@ -1072,7 +1189,7 @@ class rv_continuous(rv_generic):
Parameters Parameters
---------- ----------
momtype : int, optional momtype : int, optional
The type of generic moment calculation to use (check this). The type of generic moment calculation to use: 0 for pdf, 1 (default) for ppf.
a : float, optional a : float, optional
Lower bound of the support of the distribution, default is minus Lower bound of the support of the distribution, default is minus
infinity. infinity.
@ -1099,7 +1216,7 @@ class rv_continuous(rv_generic):
The shape of the distribution. For example ``"m, n"`` for a The shape of the distribution. For example ``"m, n"`` for a
distribution that takes two integers as the two shape arguments for all distribution that takes two integers as the two shape arguments for all
its methods. its methods.
extradoc : str, optional extradoc : str, optional, deprecated
This string is used as the last part of the docstring returned when a This string is used as the last part of the docstring returned when a
subclass has no docstring of its own. Note: `extradoc` exists for subclass has no docstring of its own. Note: `extradoc` exists for
backwards compatibility, do not use for new subclasses. backwards compatibility, do not use for new subclasses.
@ -1112,20 +1229,29 @@ class rv_continuous(rv_generic):
pdf(x, <shape(s)>, loc=0, scale=1) pdf(x, <shape(s)>, loc=0, scale=1)
probability density function probability density function
logpdf(x, <shape(s)>, loc=0, scale=1)
log of the probability density function
cdf(x, <shape(s)>, loc=0, scale=1) cdf(x, <shape(s)>, loc=0, scale=1)
cumulative density function cumulative density function
logcdf(x, <shape(s)>, loc=0, scale=1)
log of the cumulative density function
sf(x, <shape(s)>, loc=0, scale=1) sf(x, <shape(s)>, loc=0, scale=1)
survival function (1-cdf --- sometimes more accurate) survival function (1-cdf --- sometimes more accurate)
logsf(x, <shape(s)>, loc=0, scale=1)
log of the survival function
ppf(q, <shape(s)>, loc=0, scale=1) ppf(q, <shape(s)>, loc=0, scale=1)
percent point function (inverse of cdf --- quantiles) percent point function (inverse of cdf --- quantiles)
isf(q, <shape(s)>, loc=0, scale=1) isf(q, <shape(s)>, loc=0, scale=1)
inverse survival function (inverse of sf) inverse survival function (inverse of sf)
moments(n, <shape(s)>) moment(n, <shape(s)>, loc=0, scale=1)
non-central n-th moment of the standard distribution (oc=0, scale=1) non-central n-th moment of the distribution. May not work for array arguments.
stats(<shape(s)>, loc=0, scale=1, moments='mv') stats(<shape(s)>, loc=0, scale=1, moments='mv')
mean('m'), variance('v'), skew('s'), and/or kurtosis('k') mean('m'), variance('v'), skew('s'), and/or kurtosis('k')
@ -1136,10 +1262,31 @@ class rv_continuous(rv_generic):
fit(data, <shape(s)>, loc=0, scale=1) fit(data, <shape(s)>, loc=0, scale=1)
Parameter estimates for generic data Parameter estimates for generic data
expect(func=None, args=(), loc=0, scale=1, lb=None, ub=None,
conditional=False, **kwds)
Expected value of a function with respect to the distribution.
Additional kwd arguments passed to integrate.quad
median(<shape(s)>, loc=0, scale=1)
Median of the distribution.
mean(<shape(s)>, loc=0, scale=1)
Mean of the distribution.
std(<shape(s)>, loc=0, scale=1)
Standard deviation of the distribution.
var(<shape(s)>, loc=0, scale=1)
Variance of the distribution.
interval(alpha, <shape(s)>, loc=0, scale=1)
Interval that with `alpha` percent probability contains a random
realization of this distribution.
__call__(<shape(s)>, loc=0, scale=1) __call__(<shape(s)>, loc=0, scale=1)
calling a distribution instance creates a frozen RV object with the Calling a distribution instance creates a frozen RV object with the
same methods but holding the given shape, location, and scale fixed. same methods but holding the given shape, location, and scale fixed.
see Notes section See Notes section.
**Parameters for Methods** **Parameters for Methods**
@ -1199,8 +1346,9 @@ class rv_continuous(rv_generic):
New random variables can be defined by subclassing rv_continuous class New random variables can be defined by subclassing rv_continuous class
and re-defining at least the and re-defining at least the
_pdf or the cdf method which will be given clean arguments (in between a _pdf or the _cdf method (normalized to location 0 and scale 1)
and b) and passing the argument check method which will be given clean arguments (in between a and b) and
passing the argument check method
If postive argument checking is not correct for your RV If postive argument checking is not correct for your RV
then you will also need to re-define :: then you will also need to re-define ::
@ -1210,9 +1358,9 @@ class rv_continuous(rv_generic):
Correct, but potentially slow defaults exist for the remaining Correct, but potentially slow defaults exist for the remaining
methods but for speed and/or accuracy you can over-ride :: methods but for speed and/or accuracy you can over-ride ::
_cdf, _ppf, _rvs, _isf, _sf _logpdf, _cdf, _logcdf, _ppf, _rvs, _isf, _sf, _logsf
Rarely would you override _isf and _sf but you could. Rarely would you override _isf, _sf, and _logsf but you could.
Statistics are computed using numerical integration by default. Statistics are computed using numerical integration by default.
For speed you can redefine this using For speed you can redefine this using
@ -1498,7 +1646,7 @@ class rv_continuous(rv_generic):
""" """
Log of the probability density function at x of the given RV. Log of the probability density function at x of the given RV.
This uses more numerically accurate calculation if available. This uses a more numerically accurate calculation if available.
Parameters Parameters
---------- ----------
@ -1804,7 +1952,7 @@ class rv_continuous(rv_generic):
proxy_value = self.a * scale + loc proxy_value = self.a * scale + loc
if product(shape(proxy_value)) != 1: if product(shape(proxy_value)) != 1:
proxy_value = extract(cond2, proxy_value * cond2) proxy_value = extract(cond2, proxy_value * cond2)
place(output, cond2, proxy_value) place(output, cond2, proxy_value)
@ -1943,7 +2091,7 @@ class rv_continuous(rv_generic):
else: else:
return tuple(output) return tuple(output)
def moment(self, n, *args): def moment(self, n, *args, **kwds):
""" """
n'th order non-central moment of distribution n'th order non-central moment of distribution
@ -1952,17 +2100,24 @@ class rv_continuous(rv_generic):
n: int, n>=1 n: int, n>=1
order of moment order of moment
arg1, arg2, arg3,... : array-like arg1, arg2, arg3,... : float
The shape parameter(s) for the distribution (see docstring of the The shape parameter(s) for the distribution (see docstring of the
instance object for more information) instance object for more information)
loc : float, optional
location parameter (default=0)
scale : float, optional
scale parameter (default=1)
""" """
if not self._argcheck(*args): loc, scale = map(kwds.get,['loc','scale'])
args, loc, scale = self.fix_loc_scale(args, loc, scale)
if not (self._argcheck(*args) and (scale > 0)):
return nan return nan
if (floor(n) != n): if (floor(n) != n):
raise ValueError("Moment must be an integer.") raise ValueError("Moment must be an integer.")
if (n < 0): raise ValueError("Moment must be positive.") if (n < 0): raise ValueError("Moment must be positive.")
if (n == 0): return 1.0 mu, mu2, g1, g2 = None, None, None, None
if (n > 0) and (n < 5): if (n > 0) and (n < 5):
signature = inspect.getargspec(self._stats.im_func) signature = inspect.getargspec(self._stats.im_func)
if (signature[2] is not None) or ('moments' in signature[0]): if (signature[2] is not None) or ('moments' in signature[0]):
@ -1970,28 +2125,20 @@ class rv_continuous(rv_generic):
else: else:
mdict = {} mdict = {}
mu, mu2, g1, g2 = self._stats(*args,**mdict) mu, mu2, g1, g2 = self._stats(*args,**mdict)
if (n==1): val = _moment_from_stats(n, mu, mu2, g1, g2, self._munp, args)
if mu is None: return self._munp(1,*args)
else: return mu # Convert to transformed X = L + S*Y
elif (n==2): # so E[X^n] = E[(L+S*Y)^n] = L^n sum(comb(n,k)*(S/L)^k E[Y^k],k=0...n)
if mu2 is None or mu is None: if loc == 0:
return self._munp(2,*args) return scale**n * val
else: return mu2 + mu*mu
elif (n==3):
if g1 is None or mu2 is None:
return self._munp(3,*args)
else:
mu3 = g1*(mu2**1.5) # 3rd central moment
return mu3+3*mu*mu2+mu**3 # 3rd non-central moment
else: # (n==4)
if g2 is None or mu2 is None:
return self._munp(4,*args)
else:
mu4 = (g2+3.0)*(mu2**2.0) # 4th central moment
mu3 = g1*(mu2**1.5) # 3rd central moment
return mu4+4*mu*mu3+6*mu*mu*mu2+mu**4
else: else:
return self._munp(n,*args) result = 0
fac = float(scale) / float(loc)
for k in range(n):
valk = _moment_from_stats(k, mu, mu2, g1, g2, self._munp, args)
result += comb(n,k,exact=True)*(fac**k) * valk
result += fac**n * val
return result * loc**n
def nlogps(self, theta, x): def nlogps(self, theta, x):
""" Moran's negative log Product Spacings statistic """ Moran's negative log Product Spacings statistic
@ -2106,7 +2253,7 @@ class rv_continuous(rv_generic):
return inf return inf
else: else:
N = len(x) N = len(x)
return self._nnlf(x, *args) + N * log(scale) return self._nnlf(x, *args) + N*log(scale)
def hessian_nlogps(self, theta, data, eps=None): def hessian_nlogps(self, theta, data, eps=None):
''' approximate hessian of nlogps where theta are the parameters (including loc and scale) ''' approximate hessian of nlogps where theta are the parameters (including loc and scale)
''' '''
@ -2230,6 +2377,8 @@ class rv_continuous(rv_generic):
args = (1.0,)*self.numargs args = (1.0,)*self.numargs
return args + self.fit_loc_scale(data, *args) return args + self.fit_loc_scale(data, *args)
# Return the (possibly reduced) function to optimize in order to find MLE
# estimates for the .fit method
def _reduce_func(self, args, kwds): def _reduce_func(self, args, kwds):
args = list(args) args = list(args)
Nargs = len(args) Nargs = len(args)
@ -2267,7 +2416,7 @@ class rv_continuous(rv_generic):
def func(theta, x): def func(theta, x):
newtheta = restore(args[:], theta) newtheta = restore(args[:], theta)
return fitfun(newtheta, x) return self.nnlf(newtheta, x)
return x0, func, restore, args return x0, func, restore, args
@ -2282,8 +2431,9 @@ class rv_continuous(rv_generic):
such. such.
One can hold some parameters fixed to specific values by passing in One can hold some parameters fixed to specific values by passing in
keyword arguments f0..fn for shape paramters and floc, fscale for keyword arguments ``f0``, ``f1``, ..., ``fn`` (for shape parameters)
location and scale parameters, respectively. and ``floc`` and ``fscale`` (for location and scale parameters,
respectively).
Parameters Parameters
---------- ----------
@ -2298,7 +2448,7 @@ class rv_continuous(rv_generic):
Special keyword arguments are recognized as holding certain Special keyword arguments are recognized as holding certain
parameters fixed: parameters fixed:
f0..fn : hold respective shape parameters fixed. f0...fn : hold respective shape parameters fixed.
floc : hold location parameter fixed to specified value. floc : hold location parameter fixed to specified value.
@ -2497,6 +2647,9 @@ class rv_continuous(rv_generic):
of the integration interval. The return value is the expectation of the integration interval. The return value is the expectation
of the function, conditional on being in the given interval. of the function, conditional on being in the given interval.
Additional keyword arguments are passed to the integration routine.
Returns Returns
------- -------
expected value : float expected value : float
@ -2506,19 +2659,21 @@ class rv_continuous(rv_generic):
This function has not been checked for it's behavior when the integral is This function has not been checked for it's behavior when the integral is
not finite. The integration behavior is inherited from integrate.quad. not finite. The integration behavior is inherited from integrate.quad.
""" """
lockwds = {'loc': loc,
'scale':scale}
if func is None: if func is None:
def fun(x, *args): def fun(x, *args):
return x*self.pdf(x, *args, **{'loc':loc, 'scale':scale}) return x*self.pdf(x, *args, **lockwds)
else: else:
def fun(x, *args): def fun(x, *args):
return func(x)*self.pdf(x, *args, **{'loc':loc, 'scale':scale}) return func(x)*self.pdf(x, *args, **lockwds)
if lb is None: if lb is None:
lb = loc + self.a * scale lb = loc + self.a * scale
if ub is None: if ub is None:
ub = loc + self.b * scale ub = loc + self.b * scale
if conditional: if conditional:
invfac = (self.sf(lb, *args, **{'loc':loc, 'scale':scale}) invfac = (self.sf(lb, *args, **lockwds)
- self.sf(ub, *args, **{'loc':loc, 'scale':scale})) - self.sf(ub, *args, **lockwds))
else: else:
invfac = 1.0 invfac = 1.0
kwds['args'] = args kwds['args'] = args
@ -3968,7 +4123,7 @@ class gumbel_l_gen(rv_continuous):
def _logpdf(self, x): def _logpdf(self, x):
return x - exp(x) return x - exp(x)
def _cdf(self, x): def _cdf(self, x):
return expm1(-exp(x)) return -expm1(-exp(x))
def _ppf(self, q): def _ppf(self, q):
return log(-log1p(-q)) return log(-log1p(-q))
def _stats(self): def _stats(self):
@ -4990,8 +5145,6 @@ for -1 <= x <= 1, c > 0.
# scale is the mode. # scale is the mode.
class rayleigh_gen(rv_continuous): class rayleigh_gen(rv_continuous):
#rayleigh_gen.link.__doc__ = rv_continuous.link.__doc__
def link(self, x, logSF, phat, ix): def link(self, x, logSF, phat, ix):
rv_continuous.link.__doc__ rv_continuous.link.__doc__
if ix == 1: if ix == 1:
@ -5181,16 +5334,16 @@ class truncexpon_gen(rv_continuous):
#wrong answer with formula, same as in continuous.pdf #wrong answer with formula, same as in continuous.pdf
#return gam(n+1)-special.gammainc(1+n,b) #return gam(n+1)-special.gammainc(1+n,b)
if n == 1: if n == 1:
return (1 - (b + 1) * exp(-b)) / (-expm1(-b)) return (1-(b+1)*exp(-b))/(-expm1(-b))
elif n == 2: elif n == 2:
return 2 * (1 - 0.5 * (b * b + 2 * b + 2) * exp(-b)) / (-expm1(-b)) return 2*(1-0.5*(b*b+2*b+2)*exp(-b))/(-expm1(-b))
else: else:
#return generic for higher moments #return generic for higher moments
#return rv_continuous._mom1_sc(self,n, b) #return rv_continuous._mom1_sc(self,n, b)
return self._mom1_sc(n, b) return self._mom1_sc(n, b)
def _entropy(self, b): def _entropy(self, b):
eB = exp(b) eB = exp(b)
return log(eB - 1) + (1 + eB * (b - 1.0)) / (1.0 - eB) return log(eB-1)+(1+eB*(b-1.0))/(1.0-eB)
truncexpon = truncexpon_gen(a=0.0, name='truncexpon', truncexpon = truncexpon_gen(a=0.0, name='truncexpon',
longname="A truncated exponential", longname="A truncated exponential",
shapes="b", extradoc=""" shapes="b", extradoc="""
@ -5222,13 +5375,13 @@ class truncnorm_gen(rv_continuous):
def _cdf(self, x, a, b): def _cdf(self, x, a, b):
return (_norm_cdf(x) - self._na) / self._delta return (_norm_cdf(x) - self._na) / self._delta
def _ppf(self, q, a, b): def _ppf(self, q, a, b):
return norm._ppf(q * self._nb + self._na * (1.0 - q)) return norm._ppf(q*self._nb + self._na*(1.0-q))
def _stats(self, a, b): def _stats(self, a, b):
nA, nB = self._na, self._nb nA, nB = self._na, self._nb
d = nB - nA d = nB - nA
pA, pB = _norm_pdf(a), _norm_pdf(b) pA, pB = _norm_pdf(a), _norm_pdf(b)
mu = (pA - pB) / d #correction sign mu = (pA - pB) / d #correction sign
mu2 = 1 + (a * pA - b * pB) / d - mu * mu mu2 = 1 + (a*pA - b*pB) / d - mu*mu
return mu, mu2, None, None return mu, mu2, None, None
truncnorm = truncnorm_gen(name='truncnorm', longname="A truncated normal", truncnorm = truncnorm_gen(name='truncnorm', longname="A truncated normal",
shapes="a, b", extradoc=""" shapes="a, b", extradoc="""
@ -5251,11 +5404,14 @@ Truncated Normal distribution.
# FIXME: RVS does not work. # FIXME: RVS does not work.
class tukeylambda_gen(rv_continuous): class tukeylambda_gen(rv_continuous):
def _argcheck(self, lam):
# lam in RR.
return np.ones(np.shape(lam), dtype=bool)
def _pdf(self, x, lam): def _pdf(self, x, lam):
Fx = arr(special.tklmbda(x,lam)) Fx = arr(special.tklmbda(x,lam))
Px = Fx**(lam-1.0) + (arr(1-Fx))**(lam-1.0) Px = Fx**(lam-1.0) + (arr(1-Fx))**(lam-1.0)
Px = 1.0/arr(Px) Px = 1.0/arr(Px)
return where((lam > 0) & (abs(x) < 1.0/lam), Px, 0.0) return where((lam <= 0) | (abs(x) < 1.0/arr(lam)), Px, 0.0)
def _cdf(self, x, lam): def _cdf(self, x, lam):
return special.tklmbda(x, lam) return special.tklmbda(x, lam)
def _ppf(self, q, lam): def _ppf(self, q, lam):
@ -5638,24 +5794,59 @@ class rv_discrete(rv_generic):
generic.pmf(x, <shape(s)>, loc=0) generic.pmf(x, <shape(s)>, loc=0)
probability mass function probability mass function
logpmf(x, <shape(s)>, loc=0)
log of the probability density function
generic.cdf(x, <shape(s)>, loc=0) generic.cdf(x, <shape(s)>, loc=0)
cumulative density function cumulative density function
generic.logcdf(x, <shape(s)>, loc=0)
log of the cumulative density function
generic.sf(x, <shape(s)>, loc=0) generic.sf(x, <shape(s)>, loc=0)
survival function (1-cdf --- sometimes more accurate) survival function (1-cdf --- sometimes more accurate)
generic.logsf(x, <shape(s)>, loc=0, scale=1)
log of the survival function
generic.ppf(q, <shape(s)>, loc=0) generic.ppf(q, <shape(s)>, loc=0)
percent point function (inverse of cdf --- percentiles) percent point function (inverse of cdf --- percentiles)
generic.isf(q, <shape(s)>, loc=0) generic.isf(q, <shape(s)>, loc=0)
inverse survival function (inverse of sf) inverse survival function (inverse of sf)
generic.moment(n, <shape(s)>, loc=0)
non-central n-th moment of the distribution. May not work for array arguments.
generic.stats(<shape(s)>, loc=0, moments='mv') generic.stats(<shape(s)>, loc=0, moments='mv')
mean('m', axis=0), variance('v'), skew('s'), and/or kurtosis('k') mean('m', axis=0), variance('v'), skew('s'), and/or kurtosis('k')
generic.entropy(<shape(s)>, loc=0) generic.entropy(<shape(s)>, loc=0)
entropy of the RV entropy of the RV
generic.fit(data, <shape(s)>, loc=0)
Parameter estimates for generic data
generic.expect(func=None, args=(), loc=0, lb=None, ub=None, conditional=False)
Expected value of a function with respect to the distribution.
Additional kwd arguments passed to integrate.quad
generic.median(<shape(s)>, loc=0)
Median of the distribution.
generic.mean(<shape(s)>, loc=0)
Mean of the distribution.
generic.std(<shape(s)>, loc=0)
Standard deviation of the distribution.
generic.var(<shape(s)>, loc=0)
Variance of the distribution.
generic.interval(alpha, <shape(s)>, loc=0)
Interval that with `alpha` percent probability contains a random
realization of this distribution.
generic(<shape(s)>, loc=0) generic(<shape(s)>, loc=0)
calling a distribution instance returns a frozen distribution calling a distribution instance returns a frozen distribution
@ -6355,17 +6546,24 @@ class rv_discrete(rv_generic):
---------- ----------
n: int, n>=1 n: int, n>=1
order of moment order of moment
arg1, arg2, arg3,...: array-like arg1, arg2, arg3,...: float
The shape parameter(s) for the distribution (see docstring of the The shape parameter(s) for the distribution (see docstring of the
instance object for more information) instance object for more information)
loc : float, optional
location parameter (default=0)
scale : float, optional
scale parameter (default=1)
""" """
if not self._argcheck(*args): loc = kwds.get('loc', 0)
scale = kwds.get('scale', 1)
if not (self._argcheck(*args) and (scale > 0)):
return nan return nan
if (floor(n) != n): if (floor(n) != n):
raise ValueError("Moment must be an integer.") raise ValueError("Moment must be an integer.")
if (n < 0): raise ValueError("Moment must be positive.") if (n < 0): raise ValueError("Moment must be positive.")
if (n == 0): return 1.0 mu, mu2, g1, g2 = None, None, None, None
if (n > 0) and (n < 5): if (n > 0) and (n < 5):
signature = inspect.getargspec(self._stats.im_func) signature = inspect.getargspec(self._stats.im_func)
if (signature[2] is not None) or ('moments' in signature[0]): if (signature[2] is not None) or ('moments' in signature[0]):
@ -6373,27 +6571,21 @@ class rv_discrete(rv_generic):
else: else:
dict = {} dict = {}
mu, mu2, g1, g2 = self._stats(*args,**dict) mu, mu2, g1, g2 = self._stats(*args,**dict)
if (n==1): val = _moment_from_stats(n, mu, mu2, g1, g2, self._munp, args)
if mu is None: return self._munp(1,*args)
else: return mu # Convert to transformed X = L + S*Y
elif (n==2): # so E[X^n] = E[(L+S*Y)^n] = L^n sum(comb(n,k)*(S/L)^k E[Y^k],k=0...n)
if mu2 is None or mu is None: return self._munp(2,*args) if loc == 0:
else: return mu2 + mu*mu return scale**n * val
elif (n==3):
if (g1 is None) or (mu2 is None) or (mu is None):
return self._munp(3,*args)
else:
mu3 = g1*(mu2**1.5) # 3rd central moment
return mu3+3*mu*mu2+mu**3 # 3rd non-central moment
else: # (n==4)
if (g2 is None) or (g1 is None) or (mu is None) or (mu2 is None):
return self._munp(4,*args)
else:
mu4 = (g2+3.0)*(mu2**2.0) # 4th central moment
mu3 = g1*(mu2**1.5) # 3rd central moment
return mu4+4*mu*mu3+6*mu*mu*mu2+mu**4
else: else:
return self._munp(n,*args) result = 0
fac = float(scale) / float(loc)
for k in range(n):
valk = _moment_from_stats(k, mu, mu2, g1, g2, self._munp, args)
result += comb(n,k,exact=True)*(fac**k) * valk
result += fac**n * val
return result * loc**n
def freeze(self, *args, **kwds): def freeze(self, *args, **kwds):
return rv_frozen(self, *args, **kwds) return rv_frozen(self, *args, **kwds)
@ -6440,7 +6632,7 @@ class rv_discrete(rv_generic):
Parameters Parameters
---------- ----------
fn : function (default: identity mapping) fn : function (default: identity mapping)
Function for which integral is calculated. Takes only one argument. Function for which sum is calculated. Takes only one argument.
args : tuple args : tuple
argument (parameters) of the distribution argument (parameters) of the distribution
optional keyword parameters optional keyword parameters

@ -14,7 +14,7 @@ import wafo.stats as stats
from wafo.stats.distributions import argsreduce from wafo.stats.distributions import argsreduce
def kolmogorov_check(diststr, args=(), N=20, significance=0.01): def kolmogorov_check(diststr, args=(), N=20, significance=0.01):
qtest = stats.ksoneisf(significance, N) qtest = stats.ksone.isf(significance, N)
cdf = eval('stats.'+diststr+'.cdf') cdf = eval('stats.'+diststr+'.cdf')
dist = eval('stats.'+diststr) dist = eval('stats.'+diststr)
# Get random numbers # Get random numbers
@ -242,6 +242,10 @@ class TestDLaplace(TestCase):
assert_(isinstance(val, numpy.ndarray)) assert_(isinstance(val, numpy.ndarray))
assert_(val.dtype.char in typecodes['AllInteger']) assert_(val.dtype.char in typecodes['AllInteger'])
def test_rvgeneric_std():
"""Regression test for #1191"""
assert_array_almost_equal(stats.t.std([5, 6]), [1.29099445, 1.22474487])
class TestRvDiscrete(TestCase): class TestRvDiscrete(TestCase):
def test_rvs(self): def test_rvs(self):
states = [-1,0,1,2,3,4] states = [-1,0,1,2,3,4]
@ -328,7 +332,7 @@ class TestSkellam(TestCase):
assert_almost_equal(stats.skellam.cdf(k, mu1, mu2), skcdfR, decimal=5) assert_almost_equal(stats.skellam.cdf(k, mu1, mu2), skcdfR, decimal=5)
class TestHypergeom(TestCase): class TestHypergeom2(TestCase):
def test_precision(self): def test_precision(self):
# comparison number from mpmath # comparison number from mpmath
M = 2500 M = 2500
@ -487,7 +491,7 @@ class TestFrozen(TestCase):
assert_equal(result_f, result) assert_equal(result_f, result)
result_f = frozen.moment(2) result_f = frozen.moment(2)
result = dist.moment(2) result = dist.moment(2,loc=10.0, scale=3.0)
assert_equal(result_f, result) assert_equal(result_f, result)
def test_gamma(self): def test_gamma(self):
@ -555,6 +559,100 @@ class TestFrozen(TestCase):
# the focus of this test. # the focus of this test.
assert_equal(m1, m2) assert_equal(m1, m2)
class TestExpect(TestCase):
"""Test for expect method.
Uses normal distribution and beta distribution for finite bounds, and
hypergeom for discrete distribution with finite support
"""
def test_norm(self):
v = stats.norm.expect(lambda x: (x-5)*(x-5), loc=5, scale=2)
assert_almost_equal(v, 4, decimal=14)
m = stats.norm.expect(lambda x: (x), loc=5, scale=2)
assert_almost_equal(m, 5, decimal=14)
lb = stats.norm.ppf(0.05, loc=5, scale=2)
ub = stats.norm.ppf(0.95, loc=5, scale=2)
prob90 = stats.norm.expect(lambda x: 1, loc=5, scale=2, lb=lb, ub=ub)
assert_almost_equal(prob90, 0.9, decimal=14)
prob90c = stats.norm.expect(lambda x: 1, loc=5, scale=2, lb=lb, ub=ub,
conditional=True)
assert_almost_equal(prob90c, 1., decimal=14)
def test_beta(self):
#case with finite support interval
## >>> mtrue, vtrue = stats.beta.stats(10,5, loc=5., scale=2.)
## >>> mtrue, vtrue
## (array(6.333333333333333), array(0.055555555555555552))
v = stats.beta.expect(lambda x: (x-19/3.)*(x-19/3.), args=(10,5),
loc=5, scale=2)
assert_almost_equal(v, 1./18., decimal=14)
m = stats.beta.expect(lambda x: x, args=(10,5), loc=5., scale=2.)
assert_almost_equal(m, 19/3., decimal=14)
ub = stats.beta.ppf(0.95, 10, 10, loc=5, scale=2)
lb = stats.beta.ppf(0.05, 10, 10, loc=5, scale=2)
prob90 = stats.beta.expect(lambda x: 1., args=(10,10), loc=5.,
scale=2.,lb=lb, ub=ub, conditional=False)
assert_almost_equal(prob90, 0.9, decimal=14)
prob90c = stats.beta.expect(lambda x: 1, args=(10,10), loc=5,
scale=2, lb=lb, ub=ub, conditional=True)
assert_almost_equal(prob90c, 1., decimal=14)
def test_hypergeom(self):
#test case with finite bounds
#without specifying bounds
m_true, v_true = stats.hypergeom.stats(20, 10, 8, loc=5.)
m = stats.hypergeom.expect(lambda x: x, args=(20, 10, 8), loc=5.)
assert_almost_equal(m, m_true, decimal=13)
v = stats.hypergeom.expect(lambda x: (x-9.)**2, args=(20, 10, 8),
loc=5.)
assert_almost_equal(v, v_true, decimal=14)
#with bounds, bounds equal to shifted support
v_bounds = stats.hypergeom.expect(lambda x: (x-9.)**2, args=(20, 10, 8),
loc=5., lb=5, ub=13)
assert_almost_equal(v_bounds, v_true, decimal=14)
#drop boundary points
prob_true = 1-stats.hypergeom.pmf([5, 13], 20, 10, 8, loc=5).sum()
prob_bounds = stats.hypergeom.expect(lambda x: 1, args=(20, 10, 8),
loc=5., lb=6, ub=12)
assert_almost_equal(prob_bounds, prob_true, decimal=13)
#conditional
prob_bc = stats.hypergeom.expect(lambda x: 1, args=(20, 10, 8), loc=5.,
lb=6, ub=12, conditional=True)
assert_almost_equal(prob_bc, 1, decimal=14)
#check simple integral
prob_b = stats.hypergeom.expect(lambda x: 1, args=(20, 10, 8),
lb=0, ub=8)
assert_almost_equal(prob_b, 1, decimal=13)
def test_poisson(self):
#poisson, use lower bound only
prob_bounds = stats.poisson.expect(lambda x: 1, args=(2,), lb=3,
conditional=False)
prob_b_true = 1-stats.poisson.cdf(2,2)
assert_almost_equal(prob_bounds, prob_b_true, decimal=14)
prob_lb = stats.poisson.expect(lambda x: 1, args=(2,), lb=2,
conditional=True)
assert_almost_equal(prob_lb, 1, decimal=14)
def test_regression_ticket_1316(): def test_regression_ticket_1316():
"""Regression test for ticket #1316.""" """Regression test for ticket #1316."""
@ -563,5 +661,26 @@ def test_regression_ticket_1316():
g = stats.distributions.gamma_gen(name='gamma') g = stats.distributions.gamma_gen(name='gamma')
def test_regression_ticket_1326():
"""Regression test for ticket #1326."""
#adjust to avoid nan with 0*log(0)
assert_almost_equal(stats.chi2.pdf(0.0, 2), 0.5, 14)
def test_regression_tukey_lambda():
""" Make sure that Tukey-Lambda distribution correctly handles non-positive lambdas.
"""
x = np.linspace(-5.0, 5.0, 101)
for lam in [0.0, -1.0, -2.0, np.array([[-1.0], [0.0], [-2.0]])]:
p = stats.tukeylambda.pdf(x, lam)
assert_((p != 0.0).all())
assert_(~np.isnan(p).all())
lam = np.array([[-1.0], [0.0], [2.0]])
p = stats.tukeylambda.pdf(x, lam)
assert_(~np.isnan(p).all())
assert_((p[0] != 0.0).all())
assert_((p[1] != 0.0).all())
assert_((p[2] != 0.0).any())
assert_((p[2] == 0.0).any())
if __name__ == "__main__": if __name__ == "__main__":
run_module_suite() run_module_suite()

Loading…
Cancel
Save