Updated distributions.py

master
Per.Andreas.Brodtkorb 12 years ago
parent cf2b303821
commit d540b8d078

@ -1136,7 +1136,7 @@ def richardson(Q, k):
R = Q[k] + (Q[k] - Q[k - 1]) / c R = Q[k] + (Q[k] - Q[k - 1]) / c
return R return R
def quadgr(fun, a, b, abseps=1e-5, maxiter=17): def quadgr(fun, a, b, abseps=1e-5, max_iter=17):
''' '''
Gauss-Legendre quadrature with Richardson extrapolation. Gauss-Legendre quadrature with Richardson extrapolation.
@ -1446,35 +1446,35 @@ def qdemo(f, a, b):
def main(): def main():
val, err = clencurt(np.exp, 0, 2) # val, err = clencurt(np.exp, 0, 2)
valt = np.exp(2) - np.exp(0) # valt = np.exp(2) - np.exp(0)
[Q, err] = quadgr(lambda x: x ** 2, 1, 4, 1e-9) # [Q, err] = quadgr(lambda x: x ** 2, 1, 4, 1e-9)
[Q, err] = quadgr(humps, 1, 4, 1e-9) # [Q, err] = quadgr(humps, 1, 4, 1e-9)
#
[x, w] = h_roots(11, 'newton') # [x, w] = h_roots(11, 'newton')
sum(w) # sum(w)
[x2, w2] = la_roots(11, 1, 't') # [x2, w2] = la_roots(11, 1, 't')
#
from scitools import numpyutils as npu #@UnresolvedImport # from scitools import numpyutils as npu #@UnresolvedImport
fun = npu.wrap2callable('x**2') # fun = npu.wrap2callable('x**2')
p0 = fun(0) # p0 = fun(0)
A = [0, 1, 1]; B = [2, 4, 3] # A = [0, 1, 1]; B = [2, 4, 3]
area, err = gaussq(fun, A, B) # area, err = gaussq(fun, A, B)
#
fun = npu.wrap2callable('x**2') # fun = npu.wrap2callable('x**2')
[val1, err1] = gaussq(fun, A, B) # [val1, err1] = gaussq(fun, A, B)
#
#
#Integration of x^2*exp(-x) from zero to infinity: # #Integration of x^2*exp(-x) from zero to infinity:
fun2 = npu.wrap2callable('1') # fun2 = npu.wrap2callable('1')
[val2, err2] = gaussq(fun2, 0, np.inf, wfun=3, alpha=2) # [val2, err2] = gaussq(fun2, 0, np.inf, wfun=3, alpha=2)
[val2, err2] = gaussq(lambda x: x ** 2, 0, np.inf, wfun=3, alpha=0) # [val2, err2] = gaussq(lambda x: x ** 2, 0, np.inf, wfun=3, alpha=0)
#
#Integrate humps from 0 to 2 and from 1 to 4 # #Integrate humps from 0 to 2 and from 1 to 4
[val3, err3] = gaussq(humps, A, B) # [val3, err3] = gaussq(humps, A, B)
#
[x, w] = p_roots(11, 'newton', 1, 3) # [x, w] = p_roots(11, 'newton', 1, 3)
y = np.sum(x ** 2 * w) # y = np.sum(x ** 2 * w)
x = np.linspace(0, np.pi / 2) x = np.linspace(0, np.pi / 2)
q0 = np.trapz(humps(x), x) q0 = np.trapz(humps(x), x)

@ -58,10 +58,9 @@ __all__ = [
'rdist', 'rayleigh', 'truncrayleigh','reciprocal', 'rice', 'recipinvgauss', 'rdist', 'rayleigh', 'truncrayleigh','reciprocal', 'rice', 'recipinvgauss',
'semicircular', 'triang', 'truncexpon', 'truncnorm', 'semicircular', 'triang', 'truncexpon', 'truncnorm',
'tukeylambda', 'uniform', 'vonmises', 'wald', 'wrapcauchy', 'tukeylambda', 'uniform', 'vonmises', 'wald', 'wrapcauchy',
'entropy', 'rv_discrete', 'entropy', 'rv_discrete', 'binom', 'bernoulli', 'nbinom', 'geom',
'binom', 'bernoulli', 'nbinom', 'geom', 'hypergeom', 'logser', 'hypergeom', 'logser', 'poisson', 'planck', 'boltzmann', 'randint',
'poisson', 'planck', 'boltzmann', 'randint', 'zipf', 'dlaplace', 'zipf', 'dlaplace', 'skellam'
'skellam'
] ]
floatinfo = numpy.finfo(float) floatinfo = numpy.finfo(float)
@ -241,6 +240,8 @@ Display frozen pdf
>>> x = np.linspace(0, np.minimum(rv.dist.b, 3)) >>> x = np.linspace(0, np.minimum(rv.dist.b, 3))
>>> h = plt.plot(x, rv.pdf(x)) >>> h = plt.plot(x, rv.pdf(x))
Here, ``rv.dist.b`` is the right endpoint of the support of ``rv.dist``.
Check accuracy of cdf and ppf Check accuracy of cdf and ppf
>>> prb = %(name)s.cdf(x, %(shapes)s) >>> prb = %(name)s.cdf(x, %(shapes)s)
@ -1165,8 +1166,8 @@ class rv_generic(object):
arg1, arg2, ... : array_like arg1, arg2, ... : array_like
The shape parameter(s) for the distribution (see docstring of the instance The shape parameter(s) for the distribution (see docstring of the instance
object for more information) object for more information)
loc: array_like, optioal loc: array_like, optional
location parameter (deafult = 0) location parameter (default = 0)
scale : array_like, optional scale : array_like, optional
scale paramter (default = 1) scale paramter (default = 1)
@ -1204,9 +1205,9 @@ class rv_continuous(rv_generic):
Upper bound of the support of the distribution, default is plus Upper bound of the support of the distribution, default is plus
infinity. infinity.
xa : float, optional xa : float, optional
Lower bound for fixed point calculation for generic ppf. DEPRECATED
xb : float, optional xb : float, optional
Upper bound for fixed point calculation for generic ppf. DEPRECATED
xtol : float, optional xtol : float, optional
The tolerance for fixed point calculation for generic ppf. The tolerance for fixed point calculation for generic ppf.
badvalue : object, optional badvalue : object, optional
@ -1438,7 +1439,7 @@ class rv_continuous(rv_generic):
""" """
def __init__(self, momtype=1, a=None, b=None, xa=-10.0, xb=10.0, def __init__(self, momtype=1, a=None, b=None, xa=None, xb=None,
xtol=1e-14, badvalue=None, name=None, longname=None, xtol=1e-14, badvalue=None, name=None, longname=None,
shapes=None, extradoc=None): shapes=None, extradoc=None):
@ -1457,6 +1458,12 @@ class rv_continuous(rv_generic):
self.a = -inf self.a = -inf
if b is None: if b is None:
self.b = inf self.b = inf
if xa is not None:
warnings.warn("The `xa` parameter is deprecated and will be "
"removed in scipy 0.12", DeprecationWarning)
if xb is not None:
warnings.warn("The `xb` parameter is deprecated and will be "
"removed in scipy 0.12", DeprecationWarning)
self.xa = xa self.xa = xa
self.xb = xb self.xb = xb
self.xtol = xtol self.xtol = xtol
@ -1539,7 +1546,28 @@ class rv_continuous(rv_generic):
return apply(self.cdf, (x, )+args)-q return apply(self.cdf, (x, )+args)-q
def _ppf_single_call(self, q, *args): def _ppf_single_call(self, q, *args):
return optimize.brentq(self._ppf_to_solve, self.xa, self.xb, args=(q,)+args, xtol=self.xtol) left = right = None
if self.a > -np.inf:
left = self.a
if self.b < np.inf:
right = self.b
factor = 10.
if not left: # i.e. self.a = -inf
left = -1.*factor
while self._ppf_to_solve(left, q,*args) > 0.:
right = left
left *= factor
# left is now such that cdf(left) < q
if not right: # i.e. self.b = inf
right = factor
while self._ppf_to_solve(right, q,*args) < 0.:
left = right
right *= factor
# right is now such that cdf(right) > q
return optimize.brentq(self._ppf_to_solve, \
left, right, args=(q,)+args, xtol=self.xtol)
# moment from definition # moment from definition
def _mom_integ0(self, x,m,*args): def _mom_integ0(self, x,m,*args):
@ -1727,7 +1755,6 @@ class rv_continuous(rv_generic):
cond2 = (x >= self.b) & cond0 cond2 = (x >= self.b) & cond0
cond = cond0 & cond1 cond = cond0 & cond1
output = zeros(shape(cond),'d') output = zeros(shape(cond),'d')
#place(output,(1-cond0)*(cond1==cond1),self.badvalue)
place(output,(1-cond0)+np.isnan(x),self.badvalue) place(output,(1-cond0)+np.isnan(x),self.badvalue)
place(output,cond2,1.0) place(output,cond2,1.0)
if any(cond): #call only if at least 1 entry if any(cond): #call only if at least 1 entry
@ -2396,14 +2423,15 @@ class rv_continuous(rv_generic):
args = list(args) args = list(args)
Nargs = len(args) Nargs = len(args)
fixedn = [] fixedn = []
index = range(Nargs) index = range(Nargs)
names = ['f%d' % n for n in range(Nargs - 2)] + ['floc', 'fscale'] names = ['f%d' % n for n in range(Nargs - 2)] + ['floc', 'fscale']
x0 = args[:] x0 = []
for n, key in zip(index[::-1], names[::-1]): for n, key in zip(index, names):
if kwds.has_key(key): if kwds.has_key(key):
fixedn.append(n) fixedn.append(n)
args[n] = kwds[key] args[n] = kwds[key]
del x0[n] else:
x0.append(args[n])
method = kwds.get('method', 'ml').lower() method = kwds.get('method', 'ml').lower()
if method.startswith('mps'): if method.startswith('mps'):
fitfun = self.nlogps fitfun = self.nlogps
@ -2737,12 +2765,7 @@ def _norm_logpdf(x):
def _norm_cdf(x): def _norm_cdf(x):
return special.ndtr(x) return special.ndtr(x)
def _norm_logcdf(x): def _norm_logcdf(x):
logcdf = log(special.ndtr(x)) return special.log_ndtr(x)
large_x = (5<x)
if np.any(large_x):
logcdf[large_x] = log1p(-special.ndtr(-x[large_x]))
return logcdf
#return log(special.ndtr(x))
def _norm_ppf(q): def _norm_ppf(q):
return special.ndtri(q) return special.ndtri(q)
class norm_gen(rv_continuous): class norm_gen(rv_continuous):
@ -3362,35 +3385,39 @@ class erlang_gen(rv_continuous):
%(before_notes)s %(before_notes)s
See Also
--------
gamma
Notes Notes
----- -----
The Erlang distribution is a special case of the Gamma distribution, with The Erlang distribution is a special case of the Gamma
the shape parameter an integer. distribution, with the shape parameter ``a`` an integer. Refer to
the ``gamma`` distribution for further examples.
%(example)s %(example)s
""" """
def _rvs(self, n): def _rvs(self, a):
return gamma.rvs(n,size=self._size) return gamma.rvs(a, size=self._size)
def _arg_check(self, n): def _arg_check(self, a):
return (n > 0) & (floor(n)==n) return (a > 0) & (floor(a)==a)
def _pdf(self, x, n): def _pdf(self, x, a):
Px = (x)**(n-1.0)*exp(-x)/special.gamma(n) Px = (x)**(a-1.0)*exp(-x)/special.gamma(a)
return Px return Px
def _logpdf(self, x, n): def _logpdf(self, x, a):
return (n-1.0)*log(x) - x - gamln(n) return (a-1.0)*log(x) - x - gamln(a)
def _cdf(self, x, n): def _cdf(self, x, a):
return special.gdtr(1.0,n,x) return special.gdtr(1.0,a,x)
def _sf(self, x, n): def _sf(self, x, a):
return special.gdtrc(1.0,n,x) return special.gdtrc(1.0,a,x)
def _ppf(self, q, n): def _ppf(self, q, a):
return special.gdtrix(1.0, n, q) return special.gdtrix(1.0, a, q)
def _stats(self, n): def _stats(self, a):
n = n*1.0 a = a*1.0
return n, n, 2/sqrt(n), 6/n return a, a, 2/sqrt(a), 6/a
def _entropy(self, n): def _entropy(self, a):
return special.psi(n)*(1-n) + 1 + gamln(n) return special.psi(a)*(1-a) + 1 + gamln(a)
erlang = erlang_gen(a=0.0, name='erlang', shapes='n') erlang = erlang_gen(a=0.0, name='erlang', shapes='a')
## Exponential (gamma distributed with a=1.0, loc=loc and scale=scale) ## Exponential (gamma distributed with a=1.0, loc=loc and scale=scale)
@ -3405,12 +3432,14 @@ class expon_gen(rv_continuous):
----- -----
The probability density function for `expon` is:: The probability density function for `expon` is::
expon.pdf(x) = exp(-x) expon.pdf(x) = lambda * exp(- lambda*x)
for ``x >= 0``. for ``x >= 0``.
The scale parameter is equal to ``scale = 1.0 / lambda``. The scale parameter is equal to ``scale = 1.0 / lambda``.
`expon` does not have shape parameters.
%(example)s %(example)s
""" """
@ -3601,8 +3630,7 @@ class foldcauchy_gen(rv_continuous):
return 1.0/pi*(arctan(x-c) + arctan(x+c)) return 1.0/pi*(arctan(x-c) + arctan(x+c))
def _stats(self, c): def _stats(self, c):
return inf, inf, nan, nan return inf, inf, nan, nan
# setting xb=1000 allows to calculate ppf for up to q=0.9993 foldcauchy = foldcauchy_gen(a=0.0, name='foldcauchy', shapes='c')
foldcauchy = foldcauchy_gen(a=0.0, name='foldcauchy', xb=1000, shapes='c')
## F ## F
@ -4218,14 +4246,25 @@ class gamma_gen(rv_continuous):
Notes Notes
----- -----
When ``a`` is an integer, this is the Erlang distribution, and for ``a=1``
it is the exponential distribution.
The probability density function for `gamma` is:: The probability density function for `gamma` is::
gamma.pdf(x, a) = x**(a-1) * exp(-x) / gamma(a) gamma.pdf(x, a) = (lambda*x)**(a-1) * exp(-lambda*x) / gamma(a)
for ``x >= 0``, ``a > 0``. Here ``gamma(a)`` refers to the gamma function.
The scale parameter is equal to ``scale = 1.0 / lambda``.
`gamma` has a shape parameter `a` which needs to be set explicitly. For instance:
>>> from scipy.stats import gamma
>>> rv = gamma(3., loc = 0., scale = 2.)
produces a frozen form of `gamma` with shape ``a = 3.``, ``loc =
0.`` and ``lambda = 1./scale = 1./2.``.
for ``x >= 0``, ``a > 0``. When ``a`` is an integer, `gamma` reduces to the Erlang
distribution, and when ``a=1`` to the exponential distribution.
%(example)s %(example)s
@ -5590,9 +5629,10 @@ class powerlaw_gen(rv_continuous):
def _ppf(self, q, a): def _ppf(self, q, a):
return pow(q, 1.0/a) return pow(q, 1.0/a)
def _stats(self, a): def _stats(self, a):
return a/(a+1.0), a*(a+2.0)/(a+1.0)**2, \ return (a / (a + 1.0),
2*(1.0-a)*sqrt((a+2.0)/(a*(a+3.0))), \ a / (a + 2.0) / (a + 1.0) ** 2,
6*polyval([1,-1,-6,2],a)/(a*(a+3.0)*(a+4)) -2.0 * ((a - 1.0) / (a + 3.0)) * sqrt((a + 2.0) / a),
6 * polyval([1, -1, -6, 2], a) / (a * (a + 3.0) * (a + 4)))
def _entropy(self, a): def _entropy(self, a):
return 1 - 1.0/a - log(a) return 1 - 1.0/a - log(a)
powerlaw = powerlaw_gen(a=0.0, b=1.0, name="powerlaw", shapes="a") powerlaw = powerlaw_gen(a=0.0, b=1.0, name="powerlaw", shapes="a")
@ -5894,9 +5934,8 @@ class recipinvgauss_gen(rv_continuous):
trm2 = 1.0/mu + x trm2 = 1.0/mu + x
isqx = 1.0/sqrt(x) isqx = 1.0/sqrt(x)
return 1.0-_norm_cdf(isqx*trm1)-exp(2.0/mu)*_norm_cdf(-isqx*trm2) return 1.0-_norm_cdf(isqx*trm1)-exp(2.0/mu)*_norm_cdf(-isqx*trm2)
# xb=50 or something large is necessary for stats to converge without exception recipinvgauss = recipinvgauss_gen(a=0.0, name='recipinvgauss', shapes="mu")
recipinvgauss = recipinvgauss_gen(a=0.0, xb=50, name='recipinvgauss',
shapes="mu")
# Semicircular # Semicircular
@ -6096,16 +6135,8 @@ class tukeylambda_gen(rv_continuous):
vals2 = log(q/(1-q)) vals2 = log(q/(1-q))
return where((lam == 0)&(q==q), vals2, vals1) return where((lam == 0)&(q==q), vals2, vals1)
def _stats(self, lam): def _stats(self, lam):
mu2 = 2*gam(lam+1.5)-lam*pow(4,-lam)*sqrt(pi)*gam(lam)*(1-2*lam) return 0, _tlvar(lam), 0, _tlkurt(lam)
mu2 /= lam*lam*(1+2*lam)*gam(1+1.5)
mu4 = 3*gam(lam)*gam(lam+0.5)*pow(2,-2*lam) / lam**3 / gam(2*lam+1.5)
mu4 += 2.0/lam**4 / (1+4*lam)
mu4 -= 2*sqrt(3)*gam(lam)*pow(2,-6*lam)*pow(3,3*lam) * \
gam(lam+1.0/3)*gam(lam+2.0/3) / (lam**3.0 * gam(2*lam+1.5) * \
gam(lam+0.5))
g2 = mu4 / mu2 / mu2 - 3.0
return 0, mu2, 0, g2
def _entropy(self, lam): def _entropy(self, lam):
def integ(p): def integ(p):
return log(pow(p,lam-1)+pow(1-p,lam-1)) return log(pow(p,lam-1)+pow(1-p,lam-1))
@ -6118,7 +6149,7 @@ tukeylambda = tukeylambda_gen(name='tukeylambda', shapes="lam")
class uniform_gen(rv_continuous): class uniform_gen(rv_continuous):
"""A uniform continuous random variable. """A uniform continuous random variable.
This distribution is constant between `loc` and ``loc = scale``. This distribution is constant between `loc` and ``loc + scale``.
%(before_notes)s %(before_notes)s
@ -6263,32 +6294,52 @@ wrapcauchy = wrapcauchy_gen(a=0.0, b=2*pi, name='wrapcauchy', shapes="c")
### DISCRETE DISTRIBUTIONS ### DISCRETE DISTRIBUTIONS
### ###
def entropy(pk,qk=None): def entropy(pk, qk=None, base=None):
"""S = entropy(pk,qk=None) """ Calculate the entropy of a distribution for given probability values.
If only probabilities `pk` are given, the entropy is calculated as
``S = -sum(pk * log(pk), axis=0)``.
calculate the entropy of a distribution given the p_k values If `qk` is not None, then compute a relative entropy
S = -sum(pk * log(pk), axis=0) ``S = sum(pk * log(pk / qk), axis=0)``.
If qk is not None, then compute a relative entropy This routine will normalize `pk` and `qk` if they don't sum to 1.
S = sum(pk * log(pk / qk), axis=0)
Parameters
----------
pk : sequence
Defines the (discrete) distribution. ``pk[i]`` is the (possibly
unnormalized) probability of event ``i``.
qk : sequence, optional
Sequence against which the relative entropy is computed. Should be in
the same format as `pk`.
base : float, optional
The logarithmic base to use, defaults to ``e`` (natural logarithm).
Returns
-------
S : float
The calculated entropy.
Routine will normalize pk and qk if they don't sum to 1
""" """
pk = arr(pk) pk = arr(pk)
pk = 1.0* pk / sum(pk,axis=0) pk = 1.0* pk / sum(pk, axis=0)
if qk is None: if qk is None:
vec = where(pk == 0, 0.0, pk*log(pk)) vec = where(pk == 0, 0.0, pk*log(pk))
else: else:
qk = arr(qk) qk = arr(qk)
if len(qk) != len(pk): if len(qk) != len(pk):
raise ValueError("qk and pk must have same length.") raise ValueError("qk and pk must have same length.")
qk = 1.0*qk / sum(qk,axis=0) qk = 1.0*qk / sum(qk, axis=0)
# If qk is zero anywhere, then unless pk is zero at those places # If qk is zero anywhere, then unless pk is zero at those places
# too, the relative entropy is infinite. # too, the relative entropy is infinite.
if any(take(pk,nonzero(qk==0.0),axis=0)!=0.0, 0): if any(take(pk, nonzero(qk == 0.0), axis=0) != 0.0, 0):
return inf return inf
vec = where (pk == 0, 0.0, -pk*log(pk / qk)) vec = where (pk == 0, 0.0, -pk*log(pk / qk))
return -sum(vec,axis=0) S = -sum(vec, axis=0)
if base is not None:
S /= log(base)
return S
## Handlers for generic case where xk and pk are given ## Handlers for generic case where xk and pk are given
@ -6568,6 +6619,8 @@ class rv_discrete(rv_generic):
>>> x = np.arange(0, np.min(rv.dist.b, 3)+1) >>> x = np.arange(0, np.min(rv.dist.b, 3)+1)
>>> h = plt.plot(x, rv.pmf(x)) >>> h = plt.plot(x, rv.pmf(x))
Here, ``rv.dist.b`` is the right endpoint of the support of ``rv.dist``.
Check accuracy of cdf and ppf: Check accuracy of cdf and ppf:
>>> prb = generic.cdf(x, <shape(s)>) >>> prb = generic.cdf(x, <shape(s)>)
@ -6588,8 +6641,8 @@ class rv_discrete(rv_generic):
moment_tol=1e-8,values=None,inc=1,longname=None, moment_tol=1e-8,values=None,inc=1,longname=None,
shapes=None, extradoc=None): shapes=None, extradoc=None):
super(rv_generic, self).__init__() super(rv_generic,self).__init__()
self.fix_loc = self._fix_loc self.fix_loc = self._fix_loc
if badvalue is None: if badvalue is None:
@ -7517,87 +7570,96 @@ class nbinom_gen(rv_discrete):
Notes Notes
----- -----
Probability mass function, given by The probability mass function for `nbinom` is::
``np.choose(k+n-1, n-1) * p**n * (1-p)**k`` for ``k >= 0``.
nbinom.pmf(k) = choose(k+n-1, n-1) * p**n * (1-p)**k
for ``k >= 0``.
`nbinom` takes ``n`` and ``p`` as shape parameters.
%(example)s %(example)s
""" """
def _rvs(self, n, pr): def _rvs(self, n, p):
return mtrand.negative_binomial(n, pr, self._size) return mtrand.negative_binomial(n, p, self._size)
def _argcheck(self, n, pr): def _argcheck(self, n, p):
return (n >= 0) & (pr >= 0) & (pr <= 1) return (n >= 0) & (p >= 0) & (p <= 1)
def _pmf(self, x, n, pr): def _pmf(self, x, n, p):
coeff = exp(gamln(n+x) - gamln(x+1) - gamln(n)) coeff = exp(gamln(n+x) - gamln(x+1) - gamln(n))
return coeff * power(pr,n) * power(1-pr,x) return coeff * power(p,n) * power(1-p,x)
def _logpmf(self, x, n, pr): def _logpmf(self, x, n, p):
coeff = gamln(n+x) - gamln(x+1) - gamln(n) coeff = gamln(n+x) - gamln(x+1) - gamln(n)
return coeff + n*log(pr) + x*log1p(-pr) return coeff + n*log(p) + x*log1p(-p)
def _cdf(self, x, n, pr): def _cdf(self, x, n, p):
k = floor(x) k = floor(x)
return special.betainc(n, k+1, pr) return special.betainc(n, k+1, p)
def _sf_skip(self, x, n, pr): def _sf_skip(self, x, n, p):
#skip because special.nbdtrc doesn't work for 0<n<1 #skip because special.nbdtrc doesn't work for 0<n<1
k = floor(x) k = floor(x)
return special.nbdtrc(k,n,pr) return special.nbdtrc(k,n,p)
def _ppf(self, q, n, pr): def _ppf(self, q, n, p):
vals = ceil(special.nbdtrik(q,n,pr)) vals = ceil(special.nbdtrik(q,n,p))
vals1 = (vals-1).clip(0.0, np.inf) vals1 = (vals-1).clip(0.0, np.inf)
temp = self._cdf(vals1,n,pr) temp = self._cdf(vals1,n,p)
return where(temp >= q, vals1, vals) return where(temp >= q, vals1, vals)
def _stats(self, n, pr): def _stats(self, n, p):
Q = 1.0 / pr Q = 1.0 / p
P = Q - 1.0 P = Q - 1.0
mu = n*P mu = n*P
var = n*P*Q var = n*P*Q
g1 = (Q+P)/sqrt(n*P*Q) g1 = (Q+P)/sqrt(n*P*Q)
g2 = (1.0 + 6*P*Q) / (n*P*Q) g2 = (1.0 + 6*P*Q) / (n*P*Q)
return mu, var, g1, g2 return mu, var, g1, g2
nbinom = nbinom_gen(name='nbinom', shapes="n, pr", extradoc=""" nbinom = nbinom_gen(name='nbinom', shapes="n, p")
Negative binomial distribution
nbinom.pmf(k,n,p) = choose(k+n-1,n-1) * p**n * (1-p)**k
for k >= 0.
"""
)
## Geometric distribution ## Geometric distribution
class geom_gen(rv_discrete): class geom_gen(rv_discrete):
def _rvs(self, pr): """A geometric discrete random variable.
return mtrand.geometric(pr,size=self._size)
def _argcheck(self, pr): %(before_notes)s
return (pr<=1) & (pr >= 0)
def _pmf(self, k, pr): Notes
return (1-pr)**(k-1) * pr -----
def _logpmf(self, k, pr): The probability mass function for `geom` is::
return (k-1)*log1p(-pr) + pr
def _cdf(self, x, pr): geom.pmf(k) = (1-p)**(k-1)*p
for ``k >= 1``.
`geom` takes ``p`` as shape parameter.
%(example)s
"""
def _rvs(self, p):
return mtrand.geometric(p,size=self._size)
def _argcheck(self, p):
return (p<=1) & (p >= 0)
def _pmf(self, k, p):
return (1-p)**(k-1) * p
def _logpmf(self, k, p):
return (k-1)*log1p(-p) + p
def _cdf(self, x, p):
k = floor(x) k = floor(x)
return (1.0-(1.0-pr)**k) return (1.0-(1.0-p)**k)
def _sf(self, x, pr): def _sf(self, x, p):
k = floor(x) k = floor(x)
return (1.0-pr)**k return (1.0-p)**k
def _ppf(self, q, pr): def _ppf(self, q, p):
vals = ceil(log1p(-q)/log1p(-pr)) vals = ceil(log1p(-q)/log1p(-p))
temp = 1.0-(1.0-pr)**(vals-1) temp = 1.0-(1.0-p)**(vals-1)
return where((temp >= q) & (vals > 0), vals-1, vals) return where((temp >= q) & (vals > 0), vals-1, vals)
def _stats(self, pr): def _stats(self, p):
mu = 1.0/pr mu = 1.0/p
qr = 1.0-pr qr = 1.0-p
var = qr / pr / pr var = qr / p / p
g1 = (2.0-pr) / sqrt(qr) g1 = (2.0-p) / sqrt(qr)
g2 = numpy.polyval([1,-6,6],pr)/(1.0-pr) g2 = numpy.polyval([1,-6,6],p)/(1.0-p)
return mu, var, g1, g2 return mu, var, g1, g2
geom = geom_gen(a=1,name='geom', longname="A geometric", geom = geom_gen(a=1,name='geom', longname="A geometric",
shapes="pr", extradoc=""" shapes="p")
Geometric distribution
geom.pmf(k,p) = (1-p)**(k-1)*p
for k >= 1
"""
)
## Hypergeometric distribution ## Hypergeometric distribution
@ -7618,7 +7680,37 @@ class hypergeom_gen(rv_discrete):
pmf(k, M, n, N) = choose(n, k) * choose(M - n, N - k) / choose(M, N), pmf(k, M, n, N) = choose(n, k) * choose(M - n, N - k) / choose(M, N),
for N - (M-n) <= k <= min(m,N) for N - (M-n) <= k <= min(m,N)
%(example)s Examples
--------
>>> from scipy.stats import hypergeom
Suppose we have a collection of 20 animals, of which 7 are dogs. Then if
we want to know the probability of finding a given number of dogs if we
choose at random 12 of the 20 animals, we can initialize a frozen
distribution and plot the probability mass function:
>>> [M, n, N] = [20, 7, 12]
>>> rv = hypergeom(M, n, N)
>>> x = np.arange(0, n+1)
>>> pmf_dogs = rv.pmf(x)
>>> fig = plt.figure()
>>> ax = fig.add_subplot(111)
>>> ax.plot(x, pmf_dogs, 'bo')
>>> ax.vlines(x, 0, pmf_dogs, lw=2)
>>> ax.set_xlabel('# of dogs in our group of chosen animals')
>>> ax.set_ylabel('hypergeom PMF')
>>> plt.show()
Instead of using a frozen distribution we can also use `hypergeom`
methods directly. To for example obtain the cumulative distribution
function, use:
>>> prb = hypergeom.cdf(x, M, n, N)
And to generate random numbers:
>>> R = hypergeom.rvs(M, n, N, size=10)
""" """
def _rvs(self, M, n, N): def _rvs(self, M, n, N):
@ -7681,6 +7773,23 @@ hypergeom = hypergeom_gen(name='hypergeom', shapes="M, n, N")
## Logarithmic (Log-Series), (Series) distribution ## Logarithmic (Log-Series), (Series) distribution
# FIXME: Fails _cdfvec # FIXME: Fails _cdfvec
class logser_gen(rv_discrete): class logser_gen(rv_discrete):
"""A Logarithmic (Log-Series, Series) discrete random variable.
%(before_notes)s
Notes
-----
The probability mass function for `logser` is::
logser.pmf(k) = - p**k / (k*log(1-p))
for ``k >= 1``.
`logser` takes ``p`` as shape parameter.
%(example)s
"""
def _rvs(self, pr): def _rvs(self, pr):
# looks wrong for pr>0.5, too few k=1 # looks wrong for pr>0.5, too few k=1
# trying to use generic is worse, no k=1 at all # trying to use generic is worse, no k=1 at all
@ -7704,23 +7813,35 @@ class logser_gen(rv_discrete):
g2 = mu4 / var**2 - 3.0 g2 = mu4 / var**2 - 3.0
return mu, var, g1, g2 return mu, var, g1, g2
logser = logser_gen(a=1,name='logser', longname='A logarithmic', logser = logser_gen(a=1,name='logser', longname='A logarithmic',
shapes='pr', extradoc=""" shapes='p')
Logarithmic (Log-Series, Series) distribution
logser.pmf(k,p) = - p**k / (k*log(1-p))
for k >= 1
"""
)
## Poisson distribution ## Poisson distribution
class poisson_gen(rv_discrete): class poisson_gen(rv_discrete):
"""A Poisson discrete random variable.
%(before_notes)s
Notes
-----
The probability mass function for `poisson` is::
poisson.pmf(k) = exp(-mu) * mu**k / k!
for ``k >= 0``.
`poisson` takes ``mu`` as shape parameter.
%(example)s
"""
def _rvs(self, mu): def _rvs(self, mu):
return mtrand.poisson(mu, self._size) return mtrand.poisson(mu, self._size)
def _pmf(self, k, mu): def _logpmf(self, k, mu):
Pk = k*log(mu)-gamln(k+1) - mu Pk = k*log(mu)-gamln(k+1) - mu
return exp(Pk) return Pk
def _pmf(self, k, mu):
return exp(self._logpmf(k, mu))
def _cdf(self, x, mu): def _cdf(self, x, mu):
k = floor(x) k = floor(x)
return special.pdtr(k,mu) return special.pdtr(k,mu)
@ -7737,19 +7858,27 @@ class poisson_gen(rv_discrete):
g1 = 1.0/arr(sqrt(mu)) g1 = 1.0/arr(sqrt(mu))
g2 = 1.0 / arr(mu) g2 = 1.0 / arr(mu)
return mu, var, g1, g2 return mu, var, g1, g2
poisson = poisson_gen(name="poisson", longname='A Poisson', poisson = poisson_gen(name="poisson", longname='A Poisson', shapes="mu")
shapes="mu", extradoc="""
Poisson distribution ## (Planck) Discrete Exponential
class planck_gen(rv_discrete):
"""A Planck discrete exponential random variable.
poisson.pmf(k, mu) = exp(-mu) * mu**k / k! %(before_notes)s
for k >= 0
"""
)
## (Planck) Discrete Exponential Notes
-----
The probability mass function for `planck` is::
class planck_gen(rv_discrete): planck.pmf(k) = (1-exp(-lambda))*exp(-lambda*k)
for ``k*lambda >= 0``.
`planck` takes ``lambda`` as shape parameter.
%(example)s
"""
def _argcheck(self, lambda_): def _argcheck(self, lambda_):
if (lambda_ > 0): if (lambda_ > 0):
self.a = 0 self.a = 0
@ -7781,18 +7910,28 @@ class planck_gen(rv_discrete):
l = lambda_ l = lambda_
C = -expm1(-l) C = -expm1(-l)
return l * exp(-l) / C - log(C) return l * exp(-l) / C - log(C)
planck = planck_gen(name='planck', longname='A discrete exponential ', planck = planck_gen(name='planck',longname='A discrete exponential ',
shapes="lamda", shapes="lamda")
extradoc="""
Planck (Discrete Exponential)
planck.pmf(k,b) = (1-exp(-b))*exp(-b*k)
for k*b >= 0
"""
)
class boltzmann_gen(rv_discrete): class boltzmann_gen(rv_discrete):
"""A Boltzmann (Truncated Discrete Exponential) random variable.
%(before_notes)s
Notes
-----
The probability mass function for `boltzmann` is::
boltzmann.pmf(k) = (1-exp(-lambda)*exp(-lambda*k)/(1-exp(-lambda*N))
for ``k = 0,...,N-1``.
`boltzmann` takes ``lambda`` and ``N`` as shape parameters.
%(example)s
"""
def _pmf(self, k, lambda_, N): def _pmf(self, k, lambda_, N):
fact = (expm1(-lambda_))/(expm1(-lambda_*N)) fact = (expm1(-lambda_))/(expm1(-lambda_*N))
return fact*exp(-lambda_*k) return fact*exp(-lambda_*k)
@ -7819,22 +7958,28 @@ class boltzmann_gen(rv_discrete):
return mu, var, g1, g2 return mu, var, g1, g2
boltzmann = boltzmann_gen(name='boltzmann',longname='A truncated discrete exponential ', boltzmann = boltzmann_gen(name='boltzmann',longname='A truncated discrete exponential ',
shapes="lamda, N", shapes="lamda, N")
extradoc="""
Boltzmann (Truncated Discrete Exponential) ## Discrete Uniform
boltzmann.pmf(k,b,N) = (1-exp(-b))*exp(-b*k)/(1-exp(-b*N)) class randint_gen(rv_discrete):
for k=0,..,N-1 """A uniform discrete random variable.
"""
) %(before_notes)s
Notes
-----
The probability mass function for `randint` is::
randint.pmf(k) = 1./(max- min)
for ``k = min,...,max``.
`randint` takes ``min`` and ``max`` as shape parameters.
## Discrete Uniform %(example)s
class randint_gen(rv_discrete): """
def _argcheck(self, min, max): def _argcheck(self, min, max):
self.a = min self.a = min
self.b = max-1 self.b = max-1
@ -7868,23 +8013,30 @@ class randint_gen(rv_discrete):
def _entropy(self, min, max): def _entropy(self, min, max):
return log(max-min) return log(max-min)
randint = randint_gen(name='randint',longname='A discrete uniform '\ randint = randint_gen(name='randint',longname='A discrete uniform '\
'(random integer)', shapes="min, max", '(random integer)', shapes="min, max")
extradoc="""
Discrete Uniform
Random integers >=min and <max.
randint.pmf(k,min, max) = 1/(max-min)
for min <= k < max.
"""
)
# Zipf distribution # Zipf distribution
# FIXME: problems sampling. # FIXME: problems sampling.
class zipf_gen(rv_discrete): class zipf_gen(rv_discrete):
"""A Zipf discrete random variable.
%(before_notes)s
Notes
-----
The probability mass function for `zipf` is::
zipf.pmf(k) = 1/(zeta(a)*k**a)
for ``k >= 1``.
`zipf` takes ``a`` as shape parameter.
%(example)s
"""
def _rvs(self, a): def _rvs(self, a):
return mtrand.zipf(a, size=self._size) return mtrand.zipf(a, size=self._size)
def _argcheck(self, a): def _argcheck(self, a):
@ -7910,19 +8062,28 @@ class zipf_gen(rv_discrete):
g2 = mu4 / arr(var**2) - 3.0 g2 = mu4 / arr(var**2) - 3.0
return mu, var, g1, g2 return mu, var, g1, g2
zipf = zipf_gen(a=1,name='zipf', longname='A Zipf', zipf = zipf_gen(a=1,name='zipf', longname='A Zipf',
shapes="a", extradoc=""" shapes="a")
Zipf distribution
zipf.pmf(k,a) = 1/(zeta(a)*k**a) # Discrete Laplacian
for k >= 1 class dlaplace_gen(rv_discrete):
""" """A Laplacian discrete random variable.
)
%(before_notes)s
# Discrete Laplacian Notes
-----
The probability mass function for `dlaplace` is::
class dlaplace_gen(rv_discrete): dlaplace.pmf(k) = tanh(a/2) * exp(-a*abs(k))
for ``a >0``.
`dlaplace` takes ``a`` as shape parameter.
%(example)s
"""
def _pmf(self, k, a): def _pmf(self, k, a):
return tanh(a/2.0)*exp(-a*abs(k)) return tanh(a/2.0)*exp(-a*abs(k))
def _cdf(self, x, a): def _cdf(self, x, a):
@ -7955,17 +8116,35 @@ class dlaplace_gen(rv_discrete):
return a / sinh(a) - log(tanh(a/2.0)) return a / sinh(a) - log(tanh(a/2.0))
dlaplace = dlaplace_gen(a=-inf, dlaplace = dlaplace_gen(a=-inf,
name='dlaplace', longname='A discrete Laplacian', name='dlaplace', longname='A discrete Laplacian',
shapes="a", extradoc=""" shapes="a")
Discrete Laplacian distribution.
dlaplace.pmf(k,a) = tanh(a/2) * exp(-a*abs(k)) class skellam_gen(rv_discrete):
for a > 0. """A Skellam discrete random variable.
"""
)
%(before_notes)s
class skellam_gen(rv_discrete): Notes
-----
Probability distribution of the difference of two correlated or
uncorrelated Poisson random variables.
Let k1 and k2 be two Poisson-distributed r.v. with expected values
lam1 and lam2. Then, ``k1 - k2`` follows a Skellam distribution with
parameters ``mu1 = lam1 - rho*sqrt(lam1*lam2)`` and
``mu2 = lam2 - rho*sqrt(lam1*lam2)``, where rho is the correlation
coefficient between k1 and k2. If the two Poisson-distributed r.v.
are independent then ``rho = 0``.
Parameters mu1 and mu2 must be strictly positive.
For details see: http://en.wikipedia.org/wiki/Skellam_distribution
`skellam` takes ``mu1`` and ``mu2`` as shape parameters.
%(example)s
"""
def _rvs(self, mu1, mu2): def _rvs(self, mu1, mu2):
n = self._size n = self._size
return np.random.poisson(mu1, n)-np.random.poisson(mu2, n) return np.random.poisson(mu1, n)-np.random.poisson(mu2, n)
@ -7993,26 +8172,9 @@ class skellam_gen(rv_discrete):
g2 = 1 / var g2 = 1 / var
return mean, var, g1, g2 return mean, var, g1, g2
skellam = skellam_gen(a=-np.inf, name="skellam", longname='A Skellam', skellam = skellam_gen(a=-np.inf, name="skellam", longname='A Skellam',
shapes="mu1,mu2", extradoc=""" shapes="mu1,mu2")
Skellam distribution
Probability distribution of the difference of two correlated or
uncorrelated Poisson random variables.
Let k1 and k2 be two Poisson-distributed r.v. with expected values
lam1 and lam2. Then, k1-k2 follows a Skellam distribution with
parameters mu1 = lam1 - rho*sqrt(lam1*lam2) and
mu2 = lam2 - rho*sqrt(lam1*lam2), where rho is the correlation
coefficient between k1 and k2. If the two Poisson-distributed r.v.
are independent then rho = 0.
Parameters mu1 and mu2 must be strictly positive.
For details see: http://en.wikipedia.org/wiki/Skellam_distribution
"""
)
def test_lognorm(): def test_lognorm():

Loading…
Cancel
Save