updated distributions.py according to newest updates from scipy.

14 years ago · 79bdce50fd
parent b93fa2bd68
commit 79bdce50fd
1 changed files with 1274 additions and 1272 deletions
--- a/pywafo/src/wafo/stats/distributions.py
+++ b/pywafo/src/wafo/stats/distributions.py
@ -8,6 +8,7 @@
 from __future__ import division

 import math
+import warnings
 from copy import copy

 from scipy.misc import comb, derivative  #@UnresolvedImport
@ -39,7 +40,6 @@ try:
 except:
    vonmises_cython = None

-
 def _moment(data, n, mu=None):
    if mu is None:
        mu = data.mean()
@ -398,6 +398,7 @@ class rv_frozen_old(object):

    def entropy(self):
        return self.dist.entropy(*self.args, **self.kwds)
+
    def pmf(self,k):
        return self.dist.pmf(k, *self.args, **self.kwds)

@ -1485,6 +1486,7 @@ class rv_continuous(rv_generic):
        cond = cond0 & cond1
        output = zeros(shape(cond),'d')
        putmask(output,(1-cond0)*array(cond1,bool),self.badvalue)
+        if any(cond):
            goodargs = argsreduce(cond, *((x,)+args+(scale,)))
            scale, goodargs = goodargs[-1], goodargs[:-1]
            place(output,cond,self._pdf(*goodargs) / scale)
@ -1517,7 +1519,7 @@ class rv_continuous(rv_generic):

        """
        loc,scale=map(kwds.get,['loc','scale'])
-        args, loc, scale = self._fix_loc_scale(args, loc, scale)
+        args, loc, scale = self.fix_loc_scale(args, loc, scale)
        x, loc, scale = map(arr, (x, loc, scale))
        args = tuple(map(arr, args))
        x = arr((x - loc) * 1.0 / scale)
@ -1527,6 +1529,7 @@ class rv_continuous(rv_generic):
        output = empty(shape(cond),'d')
        output.fill(NINF)
        putmask(output,(1-cond0)*array(cond1,bool),self.badvalue)
+        if any(cond):
            goodargs = argsreduce(cond, *((x,)+args+(scale,)))
            scale, goodargs = goodargs[-1], goodargs[:-1]
            place(output,cond,self._logpdf(*goodargs) - log(scale))
@ -1600,7 +1603,7 @@ class rv_continuous(rv_generic):

        """
        loc,scale=map(kwds.get,['loc','scale'])
-        args, loc, scale = self._fix_loc_scale(args, loc, scale)
+        args, loc, scale = self.fix_loc_scale(args, loc, scale)
        x,loc,scale = map(arr,(x,loc,scale))
        args = tuple(map(arr,args))
        x = (x-loc)*1.0/scale
@ -1642,7 +1645,7 @@ class rv_continuous(rv_generic):

        """
        loc,scale=map(kwds.get,['loc','scale'])
-        args, loc, scale = self._fix_loc_scale(args, loc, scale)
+        args, loc, scale = self.fix_loc_scale(args, loc, scale)
        x,loc,scale = map(arr,(x,loc,scale))
        args = tuple(map(arr,args))
        x = (x-loc)*1.0/scale
@ -1653,6 +1656,7 @@ class rv_continuous(rv_generic):
        output = zeros(shape(cond),'d')
        place(output,(1-cond0)*(cond1==cond1),self.badvalue)
        place(output,cond2,1.0)
+        if any(cond):
            goodargs = argsreduce(cond, *((x,)+args))
            place(output,cond,self._sf(*goodargs))
        if output.ndim == 0:
@ -1681,7 +1685,7 @@ class rv_continuous(rv_generic):
            Log of the survival function evaluated at x
        """
        loc,scale=map(kwds.get,['loc','scale'])
-        args, loc, scale = self._fix_loc_scale(args, loc, scale)
+        args, loc, scale = self.fix_loc_scale(args, loc, scale)
        x,loc,scale = map(arr,(x,loc,scale))
        args = tuple(map(arr,args))
        x = (x-loc)*1.0/scale
@ -1693,6 +1697,7 @@ class rv_continuous(rv_generic):
        output.fill(NINF)
        place(output,(1-cond0)*(cond1==cond1),self.badvalue)
        place(output,cond2,0.0)
+        if any(cond):
            goodargs = argsreduce(cond, *((x,)+args))
            place(output,cond,self._logsf(*goodargs))
        if output.ndim == 0:
@ -1729,21 +1734,7 @@ class rv_continuous(rv_generic):
        cond1 = (q > 0) & (q < 1)
        cond2 = (q==1) & cond0
        cond = cond0 & cond1
-  
-        # begin CAT patch 1: 17 MARCH 2010
-        # If expr in value is an array of shape=(N,1), valarray's call to repeat()
-        #   replicates it N times, so output gets shape=(N,N,1). When .reshape(N,1)
-        #   is invoked --> exception. The following may not be robust:
        output = valarray(shape(cond),value=self.a*scale + loc)
- 
-        #initial_value = self.a * scale + loc
-        #if product(shape(initial_value)) > 1:   # side effects?
-        #    output = initial_value
-        #else:
-        #    output = valarray(shape(cond),value=initial_value)
-        # end CAT patch 1: 17 MARCH 2010
- 
-        # This line is good
        place(output,(1-cond0)+(1-cond1)*(q!=0.0), self.badvalue)
        
        # begin CAT patch 2: 17 MARCH 2010
@ -1798,22 +1789,8 @@ class rv_continuous(rv_generic):
        cond2 = (q==1) & cond0
        cond = cond0 & cond1
   
-        # begin CAT patch 3: 18 MARCH 2010
-        # If expr in value is an array of shape=(N,1), valarray's call to the
-        #   repeat() method will replicate it N times, so output becomes NxNx1.
-        #   The subsequent call to .reshape(N,1) --> exception. 
-        # This statement also has the unfortunate side effect of being wrong
-        #   when the support of the distribution is bounded above.
-        #WASoutput = valarray(shape(cond),value=self.b)
        initial_value = self.b * scale + loc
-        
-        #if product(shape(initial_value)) > 1:
-        #    output = initial_value
-        #else:
        output = valarray(shape(cond), value=initial_value)
-
-        #end CAT patch 3: 18 MARCH 2010
-        
        #place(output,(1-cond0)*(cond1==cond1), self.badvalue)
        place(output,(1-cond0)*(cond1==cond1)+(1-cond1)*(q!=0.0), self.badvalue)

@ -1830,7 +1807,7 @@ class rv_continuous(rv_generic):
 			proxy_value = extract(cond2, proxy_value * cond2)
        place(output, cond2, proxy_value)
        
-        #end CAT patch 4: 18 MARCH 2010
+
        if any(cond):  #call only if at least 1 entry
            goodargs = argsreduce(cond, *((q,)+args+(scale,loc)))  #PB replace 1-q by q
            scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
@ -1891,8 +1868,7 @@ class rv_continuous(rv_generic):

        signature = inspect.getargspec(self._stats.im_func)
        if (signature[2] is not None) or ('moments' in signature[0]):
-            #this did not fetch mv, adjust to also get mv
-            mu, mu2, g1, g2 = self._stats(*args, **{'moments':moments + 'mv'})
+            mu, mu2, g1, g2 = self._stats(*args,**{'moments':moments})
        else:
            mu, mu2, g1, g2 = self._stats(*args)
        if g1 is None:
@ -1903,6 +1879,7 @@ class rv_continuous(rv_generic):
        output = []

        # Use only entries that are valid in calculation
+        if any(cond):
            goodargs = argsreduce(cond, *(args+(scale,loc)))
            scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
            if 'm' in moments:
@ -1955,6 +1932,11 @@ class rv_continuous(rv_generic):
                out0 = default.copy()
                place(out0,cond,g2)
                output.append(out0)
+        else: #no valid args
+            output = []
+            for _ in moments:
+                out0 = default.copy()
+                output.append(out0)

        if len(output) == 1:
            return output[0]
@ -1975,6 +1957,8 @@ class rv_continuous(rv_generic):
            instance object for more information)

        """
+        if not self._argcheck(*args):
+            return nan
        if (floor(n) != n):
            raise ValueError("Moment must be an integer.")
        if (n < 0): raise ValueError("Moment must be positive.")
@ -2460,7 +2444,7 @@ class rv_continuous(rv_generic):

        """
        loc,scale=map(kwds.get,['loc','scale'])
-        args, loc, scale = self._fix_loc_scale(args, loc, scale)
+        args, loc, scale = self.fix_loc_scale(args, loc, scale)
        args = tuple(map(arr,args))
        cond0 = self._argcheck(*args) & (scale > 0) & (loc==loc)
        output = zeros(shape(cond0),'d')
@ -2510,11 +2494,12 @@ class rv_continuous(rv_generic):
            def fun(x, *args):
                return func(x)*self.pdf(x, *args, **{'loc':loc, 'scale':scale})
        if lb is None:
-            lb = (self.a - loc) / (1.0 * scale)
+            lb = loc + self.a * scale
        if ub is None:
-            ub = (self.b - loc) / (1.0 * scale)
+            ub = loc + self.b * scale
        if conditional:
-            invfac = self.sf(lb, *args) - self.sf(ub, *args)
+            invfac = (self.sf(lb, *args, **{'loc':loc, 'scale':scale})
+                      - self.sf(ub, *args, **{'loc':loc, 'scale':scale}))
        else:
            invfac = 1.0
        kwds['args'] = args
@ -2683,7 +2668,7 @@ class beta_gen(rv_continuous):
        Px /= special.beta(a,b)
        return Px
    def _logpdf(self, x, a, b):
-        lPx = (b - 1.0) * log(1.0 - x) + (a - 1.0) * log(x)
+        lPx = (b-1.0)*log1p(-x) + (a-1.0)*log(x)
        lPx -= log(special.beta(a,b))
        return lPx
    def _cdf(self, x, a, b):
@ -2702,7 +2687,7 @@ class beta_gen(rv_continuous):
        g2 = _kurtosis(data)
        def func(x):
            a, b = x
-            sk = 2 * (b - a) * math.sqrt(a + b + 1) / (a + b + 2) / math.sqrt(a * b)
+            sk = 2*(b-a)*sqrt(a + b + 1) / (a + b + 2) / sqrt(a*b)
            ku = a**3 - a**2*(2*b-1) + b**2*(b+1) - 2*a*b*(b+2)
            ku /= a*b*(a+b+2)*(a+b+3)
            ku *= 6
@ -2740,7 +2725,7 @@ class betaprime_gen(rv_continuous):
    def _pdf(self, x, a, b):
        return 1.0/special.beta(a,b)*x**(a-1.0)/(1+x)**(a+b)
    def _logpdf(self, x, a, b):
-        return (a - 1.0) * log(x) - (a + b) * log(1 + x) - log(special.beta(a, b))
+        return (a-1.0)*log(x) - (a+b)*log1p(x) - log(special.beta(a,b))
    def _cdf_skip(self, x, a, b):
        # remove for now: special.hyp2f1 is incorrect for large a
        x = where(x==1.0, 1.0-1e-6,x)
@ -2774,13 +2759,13 @@ for x > 0, a, b > 0.

 class bradford_gen(rv_continuous):
    def _pdf(self, x, c):
-        return  c / (c * x + 1.0) / log(1.0 + c)
+        return  c / (c*x + 1.0) / log1p(c)
    def _cdf(self, x, c):
-        return log(1.0 + c * x) / log(c + 1.0)
+        return log1p(c*x) / log1p(c)
    def _ppf(self, q, c):
        return ((1.0+c)**q-1)/c
    def _stats(self, c, moments='mv'):
-        k = log(1.0 + c)
+        k = log1p(c)
        mu = (c-k)/(c*k)
        mu2 = ((c+2.0)*k-2.0*c)/(2*c*k*k)
        g1 = None
@ -2794,7 +2779,7 @@ class bradford_gen(rv_continuous):
            g2 /= 3*c*(c*(k-2)+2*k)**2
        return mu, mu2, g1, g2
    def _entropy(self, c):
-        k = log(1 + c)
+        k = log1p(c)
        return k / 2.0 - log(c / k)
 bradford = bradford_gen(a=0.0, b=1.0, name='bradford', longname="A Bradford",
                        shapes='c', extradoc="""
@ -2940,7 +2925,10 @@ class chi2_gen(rv_continuous):
    def _pdf(self, x, df):
        return exp(self._logpdf(x, df))
    def _logpdf(self, x, df):
-        return (df / 2. - 1) * log(x) - x / 2. - gamln(df / 2.) - (log(2) * df) / 2.
+        #term1 = (df/2.-1)*log(x)
+        #term1[(df==2)*(x==0)] = 0
+        #avoid 0*log(0)==nan
+        return (df/2.-1)*log(x+1e-300) - x/2. - gamln(df/2.) - (log(2)*df)/2.
 ##        Px = x**(df/2.0-1)*exp(-x/2.0)
 ##        Px /= special.gamma(df/2.0)* 2**(df/2.0)
 ##        return log(Px)
@ -3152,7 +3140,7 @@ class exponweib_gen(rv_continuous):
        return a*c*(1-exc)**arr(a-1) * exc * x**(c-1)
    def _logpdf(self, x, a, c):
        exc = exp(-x**c)
-        return log(a) + log(c) + (a - 1.) * log(1 - exc) - x ** c + (c - 1.0) * log(x)
+        return log(a) + log(c) + (a-1.)*log1p(-exc) - x**c + (c-1.0)*log(x)
    def _cdf(self, x, a, c):
        exm1c = -expm1(-x**c)
        return arr((exm1c)**a)
@ -3911,7 +3899,7 @@ class gompertz_gen(rv_continuous):
    def _cdf(self, x, c):
        return 1.0-exp(-c*(exp(x)-1))
    def _ppf(self, q, c):
-        return log(1 - 1.0 / c * log(1 - q))
+        return log1p(-1.0/c*log1p(-q))
    def _entropy(self, c):
        return 1.0 - log(c) - exp(c)*special.expn(1,c)
 gompertz = gompertz_gen(a=0.0, name='gompertz',
@ -3963,7 +3951,7 @@ class gumbel_l_gen(rv_continuous):
    def _cdf(self, x):
        return expm1(-exp(x))
    def _ppf(self, q):
-        return log(-log(1 - q))
+        return log(-log1p(-q))
    def _stats(self):
        return -_EULER, pi*pi/6.0, \
               -12*sqrt(6)/pi**3 * _ZETA3, 12.0/5
@ -4713,7 +4701,7 @@ class t_gen(rv_continuous):
    def _logpdf(self, x, df):
        r = df*1.0
        lPx = gamln((r+1)/2)-gamln(r/2)
-        lPx -= 0.5 * log(r * pi) + (r + 1) / 2 * log(1 + (x ** 2) / r)
+        lPx -= 0.5*log(r*pi) + (r+1)/2*log1p((x**2)/r)
        return lPx
    def _cdf(self, x, df):
        return special.stdtr(df, x)
@ -4858,13 +4846,13 @@ class lomax_gen(rv_continuous):
    def _pdf(self, x, c):
        return c*1.0/(1.0+x)**(c+1.0)
    def _logpdf(self, x, c):
-        return log(c) - (c + 1) * log(1 + x)
+        return log(c) - (c+1)*log1p(x)
    def _cdf(self, x, c):
        return 1.0-1.0/(1.0+x)**c
    def _sf(self, x, c):
        return 1.0/(1.0+x)**c
    def _logsf(self, x, c):
-        return - c * log(1 + x)
+        return -c*log1p(x)
    def _ppf(self, q, c):
        return pow(1.0-q,-1.0/c)-1
    def _stats(self, c):
@ -4999,7 +4987,7 @@ class rayleigh_gen(rv_continuous):
    def _cdf(self, r):
        return - expm1(-r * r / 2.0)
    def _ppf(self, q):
-        return sqrt(-2 * log(1 - q))
+        return sqrt(-2 * log1p(-q))
    def _stats(self):
        val = 4-pi
        return np.sqrt(pi/2), val/2, 2*(pi-3)*sqrt(pi)/val**1.5, \
@ -5930,6 +5918,7 @@ class rv_discrete(rv_generic):
        cond = cond0 & cond1
        output = zeros(shape(cond),'d')
        place(output,(1-cond0)*(cond1==cond1),self.badvalue)
+        if any(cond):
            goodargs = argsreduce(cond, *((k,)+args))
            place(output,cond,self._pmf(*goodargs))
        if output.ndim == 0:
@ -5968,6 +5957,7 @@ class rv_discrete(rv_generic):
        output = empty(shape(cond),'d')
        output.fill(NINF)
        place(output,(1-cond0)*(cond1==cond1),self.badvalue)
+        if any(cond):
            goodargs = argsreduce(cond, *((k,)+args))
            place(output,cond,self._logpmf(*goodargs))
        if output.ndim == 0:
@ -6087,6 +6077,7 @@ class rv_discrete(rv_generic):
        output = zeros(shape(cond),'d')
        place(output,(1-cond0)*(cond1==cond1),self.badvalue)
        place(output,cond2,1.0)
+        if any(cond):
            goodargs = argsreduce(cond, *((k,)+args))
            place(output,cond,self._sf(*goodargs))
        if output.ndim == 0:
@ -6126,6 +6117,7 @@ class rv_discrete(rv_generic):
        output.fill(NINF)
        place(output,(1-cond0)*(cond1==cond1),self.badvalue)
        place(output,cond2,0.0)
+        if any(cond):
            goodargs = argsreduce(cond, *((k,)+args))
            place(output,cond,self._logsf(*goodargs))
        if output.ndim == 0:
@ -6349,6 +6341,8 @@ class rv_discrete(rv_generic):
            instance object for more information)

        """
+        if not self._argcheck(*args):
+            return nan
        if (floor(n) != n):
            raise ValueError("Moment must be an integer.")
        if (n < 0): raise ValueError("Moment must be positive.")
@ -6483,10 +6477,18 @@ class rv_discrete(rv_generic):
        self._argcheck(*args) # (re)generate scalar self.a and self.b
        if lb is None:
            lb = (self.a)
+        else:
+            lb = lb - loc   #convert bound for standardized distribution
        if ub is None:
            ub = (self.b)
+        else:
+            ub = ub - loc   #convert bound for standardized distribution
        if conditional:
-            invfac = self.sf(lb, *args) - self.sf(ub + 1, *args)
+            if np.isposinf(ub)[()]:
+                #work around bug: stats.poisson.sf(stats.poisson.b, 2) is nan
+                invfac = 1 - self.cdf(lb-1,*args)
+            else:
+                invfac = 1 - self.cdf(lb-1,*args) - self.sf(ub,*args)
        else:
            invfac = 1.0

@ -6605,7 +6607,7 @@ class bernoulli_gen(binom_gen):
    def _stats(self, pr):
        return binom._stats(1, pr)
    def _entropy(self, pr):
-        return - pr * log(pr) - (1 - pr) * log(1 - pr)
+        return -pr*log(pr)-(1-pr)*log1p(-pr)
 bernoulli = bernoulli_gen(b=1,name='bernoulli',shapes="pr",extradoc="""

 Bernoulli distribution
@ -6641,7 +6643,7 @@ class nbinom_gen(rv_discrete):
        return coeff * power(pr,n) * power(1-pr,x)
    def _logpmf(self, x, n, pr):
        coeff = gamln(n+x) - gamln(x+1) - gamln(n)
-        return coeff + n * log(pr) + x * log(1 - pr)
+        return coeff + n*log(pr) + x*log1p(-pr)
    def _cdf(self, x, n, pr):
        k = floor(x)
        return special.betainc(n, k+1, pr)
@ -6681,7 +6683,7 @@ class geom_gen(rv_discrete):
    def _pmf(self, k, pr):
        return (1-pr)**(k-1) * pr
    def _logpmf(self, k, pr):
-        return (k - 1) * log(1 - pr) + pr
+        return (k-1)*log1p(-pr) + pr
    def _cdf(self, x, pr):
        k = floor(x)
        return (1.0-(1.0-pr)**k)
@ -6689,7 +6691,7 @@ class geom_gen(rv_discrete):
        k = floor(x)
        return (1.0-pr)**k
    def _ppf(self, q, pr):
-        vals = ceil(log(1.0 - q) / log(1 - pr))
+        vals = ceil(log1p(-q)/log1p(-pr))
        temp = 1.0-(1.0-pr)**(vals-1)
        return where((temp >= q) & (vals > 0), vals-1, vals)
    def _stats(self, pr):
@ -6778,9 +6780,9 @@ class logser_gen(rv_discrete):
    def _argcheck(self, pr):
        return (pr > 0) & (pr < 1)
    def _pmf(self, k, pr):
-        return - pr ** k * 1.0 / k / log(1 - pr)
+        return -pr**k * 1.0 / k / log1p(-pr)
    def _stats(self, pr):
-        r = log(1 - pr)
+        r = log1p(-pr)
        mu = pr / (pr - 1.0) / r
        mu2p = -pr / r / (pr-1.0)**2
        var = mu2p - mu*mu
@ -6798,7 +6800,7 @@ logser = logser_gen(a=1, name='logser', longname='A logarithmic',

 Logarithmic (Log-Series, Series) distribution

-logser.pmf(k,p) = - p**k / (k*log(1-p))
+logser.pmf(k,p) = - p**k / (k*log1p(-p))
 for k >= 1
 """
                    )
@ -6891,7 +6893,7 @@ class boltzmann_gen(rv_discrete):
        return (1-exp(-lambda_*(k+1)))/(1-exp(-lambda_*N))
    def _ppf(self, q, lambda_, N):
        qnew = q*(1-exp(-lambda_*N))
-        vals = ceil(-1.0 / lambda_ * log(1 - qnew) - 1)
+        vals = ceil(-1.0/lambda_ * log1p(-qnew)-1)
        vals1 = (vals-1).clip(0.0, np.inf)
        temp = self._cdf(vals1, lambda_, N)
        return where(temp >= q, vals1, vals)