Updated distributions.py so it is in accordance with scipy.stats.distributions.py

15 years ago · 24360c33f8
parent e77fc13a66
commit 24360c33f8
2 changed files with 2650 additions and 1510 deletions
--- a/pywafo/src/wafo/stats/distributions.py
+++ b/pywafo/src/wafo/stats/distributions.py
--- a/pywafo/src/wafo/stats/estimation.py
+++ b/pywafo/src/wafo/stats/estimation.py
@ -55,7 +55,7 @@ def valarray(shape, value=nan, typecode=None):
    if typecode is not None:
        out = out.astype(typecode)
    if not isinstance(out, ndarray):
-        out = asarray(out)
+        out = arr(out)
    return out 
 # Frozen RV class
@ -99,7 +99,6 @@ class rv_frozen(object):
            args, loc0 = dist.fix_loc(args, loc0)
            self.par = args + (loc0,)
    def pdf(self, x):
        ''' Probability density function at x of the given RV.'''
        return self.dist.pdf(x, *self.par)
@ -123,6 +122,14 @@ class rv_frozen(object):
        ''' Some statistics of the given RV'''
        kwds = dict(moments=moments)
        return self.dist.stats(*self.par, **kwds)
    def median(self):
        return self.dist.median(*self.par, **self.kwds)
    def mean(self):
        return self.dist.mean(*self.par,**self.kwds)
    def var(self):
        return self.dist.var(*self.par, **self.kwds)
    def std(self):
        return self.dist.std(*self.par, **self.kwds)
    def moment(self, n):
        par1 = self.par[:self.dist.numargs]
        return self.dist.moment(n, *par1)
@ -131,6 +138,8 @@ class rv_frozen(object):
    def pmf(self, k):
        '''Probability mass function at k of the given RV'''
        return self.dist.pmf(k, *self.par)
    def interval(self,alpha):
        return self.dist.interval(alpha, *self.par, **self.kwds)
@ -525,8 +534,8 @@ class FitDistribution(rv_frozen):
        self.par_cov = zeros((np, np))
        self.LLmax = -dist.nnlf(self.par, self.data)
        self.LPSmax = -dist.nlogps(self.par, self.data)
-        self.pvalue = dist.pvalue(self.par, self.data, unknown_numpar=numpar)
+        self.pvalue = self._pvalue(self.par, self.data, unknown_numpar=numpar)
-        H = numpy.asmatrix(dist.hessian_nnlf(self.par, self.data))
+        H = numpy.asmatrix(self._hessian_nnlf(self.par, self.data))
        self.H = H
        try:
            if allfixed:
@ -772,7 +781,7 @@ class FitDistribution(rv_frozen):
-    def pvalue(self, theta, x, unknown_numpar=None):
+    def _pvalue(self, theta, x, unknown_numpar=None):
        ''' Return the P-value for the fit using Moran's negative log Product Spacings statistic
            where theta are the parameters (including loc and scale)
@ -784,7 +793,7 @@ class FitDistribution(rv_frozen):
        if any(tie):
            print('P-value is on the conservative side (i.e. too large) due to ties in the data!')
-        T = self.nlogps(theta, x)
+        T = self.dist.nlogps(theta, x)
        n = len(x)
        np1 = n + 1
@ -802,115 +811,8 @@ class FitDistribution(rv_frozen):
        pvalue = chi2sf(Tn, n) #_WAFODIST.chi2.sf(Tn, n)
        return pvalue
    def nlogps(self, theta, x):
        """ Moran's negative log Product Spacings statistic
            where theta are the parameters (including loc and scale)
            Note the data in x must be sorted
        References
        -----------
        R. C. H. Cheng; N. A. K. Amin (1983)
        "Estimating Parameters in Continuous Univariate Distributions with a
        Shifted Origin.",
        Journal of the Royal Statistical Society. Series B (Methodological),
        Vol. 45, No. 3. (1983), pp. 394-403.
        R. C. H. Cheng; M. A. Stephens (1989)
        "A Goodness-Of-Fit Test Using Moran's Statistic with Estimated
        Parameters", Biometrika, 76, 2, pp 385-392
        Wong, T.S.T. and Li, W.K. (2006)
        "A note on the estimation of extreme value distributions using maximum
        product of spacings.",
        IMS Lecture Notes Monograph Series 2006, Vol. 52, pp. 272-283
        """
        try:
            loc = theta[-2]
            scale = theta[-1]
            args = tuple(theta[:-2])
        except IndexError:
            raise ValueError, "Not enough input arguments."
        if not self.dist._argcheck(*args) or scale <= 0:
            return inf
        x = arr((x - loc) / scale)
        cond0 = (x <= self.dist.a) | (x >= self.dist.b)
        if (any(cond0)):
            return inf
        else:
            #linfo = numpy.finfo(float)
            realmax = floatinfo.machar.xmax
            lowertail = True
            if lowertail:
                prb = numpy.hstack((0.0, self.dist.cdf(x, *args), 1.0))
                dprb = numpy.diff(prb)
            else:
                prb = numpy.hstack((1.0, self.dist.sf(x, *args), 0.0))
                dprb = -numpy.diff(prb)
            logD = log(dprb)
            dx = numpy.diff(x, axis=0)
            tie = (dx == 0)
            if any(tie):
                # TODO % implement this method for treating ties in data:
                # Assume measuring error is delta. Then compute
                # yL = F(xi-delta,theta)
                # yU = F(xi+delta,theta)
                # and replace
                # logDj = log((yU-yL)/(r-1)) for j = i+1,i+2,...i+r-1
                # The following is OK when only minimization of T is wanted
                i_tie = nonzero(tie)
                tiedata = x[i_tie]
                logD[(i_tie[0] + 1,)] = log(self.dist._pdf(tiedata, *args)) + log(scale)
            finiteD = numpy.isfinite(logD)
            nonfiniteD = 1 - finiteD
            if any(nonfiniteD):
                T = -sum(logD[finiteD], axis=0) + 100.0 * log(realmax) * sum(nonfiniteD, axis=0);
            else:
                T = -sum(logD, axis=0) #%Moran's negative log product spacing statistic
        return T
    def _nnlf(self, x, *args):
        return - sum(log(self._pdf(x, *args)), axis=0)
    def nnlf(self, theta, x):
        ''' Return negative loglikelihood function, i.e., - sum (log pdf(x, theta),axis=0)
           where theta are the parameters (including loc and scale)
        '''
        try:
            loc = theta[-2]
            scale = theta[-1]
            args = tuple(theta[:-2])
        except IndexError:
            raise ValueError, "Not enough input arguments."
        if not self._argcheck(*args) or scale <= 0:
            return inf
        x = arr((x - loc) / scale)
        cond0 = (x <= self.a) | (self.b <= x)
 #        newCall = False
 #        if newCall:
 #            goodargs = argsreduce(1-cond0, *((x,)))
 #            goodargs = tuple(goodargs + list(args))
 #            N = len(x)
 #            Nbad = sum(cond0)
 #            xmin = floatinfo.machar.xmin
 #            return self._nnlf(*goodargs) + N*log(scale) + Nbad*100.0*log(xmin)
 #        el
        if (any(cond0)):
            return inf
        else:
            N = len(x)
            return self._nnlf(x, *args) + N * log(scale)
-    def hessian_nnlf(self, theta, data, eps=None):
+    def _hessian_nnlf(self, theta, data, eps=None):
        ''' approximate hessian of nnlf where theta are the parameters (including loc and scale)
        '''
        #Nd = len(x)
@ -922,7 +824,7 @@ class FitDistribution(rv_frozen):
        if eps == None:
            eps = (floatinfo.machar.eps) ** 0.4
-        xmin = floatinfo.machar.xmin
+        #xmin = floatinfo.machar.xmin
        #myfun = lambda y: max(y,100.0*log(xmin)) #% trick to avoid log of zero
        delta = (eps + 2.0) - 2.0
        delta2 = delta ** 2.0
@ -930,36 +832,37 @@ class FitDistribution(rv_frozen):
        #    %             1/(d^2 L(theta|x)/dtheta^2)
        #    %  using central differences
-        LL = self.nnlf(theta, data)
+        dist = self.dist
        LL = dist.nnlf(theta, data)
        H = zeros((np, np))   #%% Hessian matrix
        theta = tuple(theta)
        for ix in xrange(np):
            sparam = list(theta)
            sparam[ix] = theta[ix] + delta
-            fp = self.nnlf(sparam, data)
+            fp = dist.nnlf(sparam, data)
            #fp = sum(myfun(x))
            sparam[ix] = theta[ix] - delta
-            fm = self.nnlf(sparam, data)
+            fm = dist.nnlf(sparam, data)
            #fm = sum(myfun(x))
            H[ix, ix] = (fp - 2 * LL + fm) / delta2
            for iy in range(ix + 1, np):
                sparam[ix] = theta[ix] + delta
                sparam[iy] = theta[iy] + delta
-                fpp = self.nnlf(sparam, data)
+                fpp = dist.nnlf(sparam, data)
                #fpp = sum(myfun(x))
                sparam[iy] = theta[iy] - delta
-                fpm = self.nnlf(sparam, data)
+                fpm = dist.nnlf(sparam, data)
                #fpm = sum(myfun(x))
                sparam[ix] = theta[ix] - delta
-                fmm = self.nnlf(sparam, data)
+                fmm = dist.nnlf(sparam, data)
                #fmm = sum(myfun(x));
                sparam[iy] = theta[iy] + delta
-                fmp = self.nnlf(sparam, data)
+                fmp = dist.nnlf(sparam, data)
                #fmp = sum(myfun(x))
                H[ix, iy] = ((fpp + fmm) - (fmp + fpm)) / (4. * delta2)
                H[iy, ix] = H[ix, iy]