Mainly updated the documentation

15 years ago · 9d10be02be
parent 51de2e3530
commit 9d10be02be
2 changed files with 196 additions and 75 deletions
--- a/pywafo/src/wafo/stats/distributions.py
+++ b/pywafo/src/wafo/stats/distributions.py
@ -96,7 +96,14 @@ from scipy.misc import doccer
 all = alltrue
 sgf = vectorize
-import new
+
 try:
    from new import instancemethod
 except ImportError:
    # Python 3
    def instancemethod(func, obj, cls):
        return types.MethodType(func, obj)
 # These are the docstring parts used for substitution in specific
 # distribution docstrings.
@ -280,7 +287,13 @@ docdict_discrete['default'] = _doc_default_disc
 # clean up all the separate docstring elements, we do not need them anymore
 for obj in [s for s in dir() if s.startswith('_doc_')]:
    exec('del ' + obj)
-del s, obj
+del obj
 try:
    del s
 except NameError:
    # in Python 3, loop variables are not visible after the loop
    pass
 def _build_random_array(fun, args, size=None):
@ -854,7 +867,7 @@ class rv_generic(object):
        # self._size is total size of all output values
        self._size = product(size, axis=0)
-        if self._size > 1:
+        if self._size is not None and self._size > 1:
            size = numpy.array(size, ndmin=1)
        if np.all(scale == 0):
@ -4202,7 +4215,7 @@ class loggamma_gen(rv_continuous):
    def _munp(self,n,*args):
        # use generic moment calculation using ppf
        return self._mom0_sc(n,*args)
-loggamma = loggamma_gen(name='loggamma', longname="A log gamma",
+loggamma = loggamma_gen(name='loggamma', longname="A log gamma", shapes='c',
                        extradoc="""
 Log gamma distribution
@ -5489,7 +5502,7 @@ class rv_discrete(rv_generic):
                 shapes=None, extradoc=None):
        super(rv_generic,self).__init__()
-        self.fix_loc = self._fix_loc
+
        if badvalue is None:
            badvalue = nan
        self.badvalue = badvalue
@ -5518,16 +5531,16 @@ class rv_discrete(rv_generic):
            self.qvals = numpy.cumsum(self.pk,axis=0)
            self.F = make_dict(self.xk, self.qvals)
            self.Finv = reverse_dict(self.F)
-            self._ppf = new.instancemethod(sgf(_drv_ppf,otypes='d'),
+            self._ppf = instancemethod(sgf(_drv_ppf,otypes='d'),
                                       self, rv_discrete)
-            self._pmf = new.instancemethod(sgf(_drv_pmf,otypes='d'),
+            self._pmf = instancemethod(sgf(_drv_pmf,otypes='d'),
                                       self, rv_discrete)
-            self._cdf = new.instancemethod(sgf(_drv_cdf,otypes='d'),
+            self._cdf = instancemethod(sgf(_drv_cdf,otypes='d'),
                                       self, rv_discrete)
-            self._nonzero = new.instancemethod(_drv_nonzero, self, rv_discrete)
+            self._nonzero = instancemethod(_drv_nonzero, self, rv_discrete)
-            self.generic_moment = new.instancemethod(_drv_moment,
+            self.generic_moment = instancemethod(_drv_moment,
                                                 self, rv_discrete)
-            self.moment_gen = new.instancemethod(_drv_moment_gen,
+            self.moment_gen = instancemethod(_drv_moment_gen,
                                             self, rv_discrete)
            self.numargs=0
        else:
@ -5541,13 +5554,13 @@ class rv_discrete(rv_generic):
            #correct nin for generic moment vectorization
            self.vec_generic_moment = sgf(_drv2_moment, otypes='d')
            self.vec_generic_moment.nin = self.numargs + 2
-            self.generic_moment = new.instancemethod(self.vec_generic_moment,
+            self.generic_moment = instancemethod(self.vec_generic_moment,
                                                 self, rv_discrete)
            #correct nin for ppf vectorization
            _vppf = sgf(_drv2_ppfsingle,otypes='d')
            _vppf.nin = self.numargs + 2 # +1 is for self
-            self._vecppf = new.instancemethod(_vppf,
+            self._vecppf = instancemethod(_vppf,
                                          self, rv_discrete)
@ -5908,6 +5921,8 @@ class rv_discrete(rv_generic):
            instance object for more information)
        loc : array-like, optional
            location parameter (default=0)
        scale: array-like, optional
            scale parameter (default=1)
        Returns
        -------
--- a/pywafo/src/wafo/stats/estimation.py
+++ b/pywafo/src/wafo/stats/estimation.py
@ -57,32 +57,24 @@ def valarray(shape, value=nan, typecode=None):
 class rv_frozen(object):
    ''' Frozen continous or discrete 1D Random Variable object (RV)
-    RV.rvs(size=1)
+    Methods
-        - random variates
+    -------
-
+    rvs(size=1)
-    RV.pdf(x)
+        Random variates.
-        - probability density function (continous case)
+    pdf(x)
-
+        Probability density function.
-    RV.pmf(x)
+    cdf(x)
-        - probability mass function (discrete case)
+        Cumulative density function.
-
+    sf(x)
-    RV.cdf(x)
+        Survival function (1-cdf --- sometimes more accurate).
-        - cumulative density function
+    ppf(q)
-
+        Percent point function (inverse of cdf --- percentiles).
-    RV.sf(x)
+    isf(q)
-        - survival function (1-cdf --- sometimes more accurate)
+        Inverse survival function (inverse of sf).
-
+    stats(moments='mv')
-    RV.ppf(q)
+        Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
-        - percent point function (inverse of cdf --- percentiles)
+    entropy()
-
+        (Differential) entropy of the RV.
    RV.isf(q)
        - inverse survival function (inverse of sf)
    RV.stats(moments='mv')
        - mean('m',axis=0), variance('v'), skew('s'), and/or kurtosis('k')
    RV.entropy()
        - (differential) entropy of the RV.
    '''
    def __init__(self, dist, *args, **kwds):
        self.dist = dist
@ -200,8 +192,10 @@ class Profile(object):
    #MLE and better CI for phat.par[0]
    >>> import numpy as np
    >>> R = weibull_min.rvs(1,size=100);
-    >>> phat = weibull_min.fit(R,1,1,par_fix=[np.nan,0.,np.nan])
+    >>> phat = FitDistribution(ws.weibull_min, R,1,scale=1, floc=0.0)
-    >>> Lp = Profile(phat,i=0)
+    
    >>> Lp = Profile(phat, i=0)
    >>> Lp.plot()
    >>> Lp.get_CI(alpha=0.1)
    >>> SF = 1./990
@ -434,38 +428,150 @@ class Profile(object):
        plotbackend.ylabel(self.ylabel)
        plotbackend.xlabel(self.xlabel)
-# internal class to fit given distribution to data
+# class to fit given distribution to data
 class FitDistribution(rv_frozen):
-    def __init__(self, dist, data, *args, **kwds):
+    '''
-        extradoc = '''
+    Return estimators to shape, location, and scale from data
-        RV.plotfitsumry() - Plot various diagnostic plots to asses quality of fit.
+    Starting points for the fit are given by input arguments.  For any
-        RV.plotecdf()     - Plot Empirical and fitted Cumulative Distribution Function
+    arguments not given starting points, dist._fitstart(data) is called
-        RV.plotesf()      - Plot Empirical and fitted Survival Function
+    to get the starting estimates.
        RV.plotepdf()     - Plot Empirical and fitted Probability Distribution Function
        RV.plotresq()     - Displays a residual quantile plot.
        RV.plotresprb()   - Displays a residual probability plot.
-        RV.profile()      - Return Profile Log- likelihood or Product Spacing-function.
+    You can hold some parameters fixed to specific values by passing in
    keyword arguments f0..fn for shape paramters and floc, fscale for
    location and scale parameters.
-        Member variables
+    Parameters
-        ----------------
+    ----------
-        data - data used in fitting
+    dist : scipy distribution object
-        alpha - confidence coefficient
+        distribution to fit to data
-        method - method used
+    data : array-like
-        LLmax  - loglikelihood function evaluated using par
+        Data to use in calculating the ML or MPS estimators
-        LPSmax - log product spacing function evaluated using par
+    args : optional
-        pvalue - p-value for the fit
+        Starting values for any shape arguments (those not specified
-        search - True if search for distribution parameters (default)
+        will be determined by _fitstart(data))
-        copydata - True if copy input data (default)
+    kwds : loc, scale
-
+        Starting values for the location and scale parameters
-        par     - parameters (fixed and fitted)
+        Special keyword arguments are recognized as holding certain
-        par_cov - covariance of parameters
+        parameters fixed:
-        par_fix - fixed parameters
+            f0..fn : hold respective shape paramters fixed
-        par_lower - lower (1-alpha)% confidence bound for the parameters
+            floc : hold location parameter fixed to specified value
-        par_upper - upper (1-alpha)% confidence bound for the parameters
+            fscale : hold scale parameter fixed to specified value
        method : of estimation. Options are
            'ml' : Maximum Likelihood method (default)
            'mps': Maximum Product Spacing method
        alpha : scalar, optional
            Confidence coefficent  (default=0.05)
        search : bool
            If true search for best estimator (default),
            otherwise return object with initial distribution parameters
        copydata : bool
            If true copydata (default)
        optimizer : The optimizer to use.  The optimizer must take func,
                     and starting position as the first two arguments,
                     plus args (for extra arguments to pass to the
                     function to be optimized) and disp=0 to suppress
                     output as keyword arguments.
    Return
    ------
    phat : FitDistribution object
        Fitted distribution object with following member variables:
        LLmax  : loglikelihood function evaluated using par
        LPSmax : log product spacing function evaluated using par
        pvalue : p-value for the fit
        par : distribution parameters (fixed and fitted)
        par_cov : covariance of distribution parameters
        par_fix : fixed distribution parameters
        par_lower : lower (1-alpha)% confidence bound for the parameters
        par_upper : upper (1-alpha)% confidence bound for the parameters
    Note
    ----
    `data` is sorted using this function, so if `copydata`==False the data
    in your namespace will be sorted as well.
    Examples
    --------
    Estimate distribution parameters for weibull_min distribution.
    >>> import wafo.stats as ws
    >>> import numpy as np
    >>> R = ws.weibull_min.rvs(1,size=100);
    >>> phat = FitDistribution(ws.weibull_min, R, 1, scale=1, floc=0.0)
    #Plot various diagnostic plots to asses quality of fit.
    >>> phat.plotfitsumry() 
    #phat.par holds the estimated parameters
    #phat.par_upper upper CI for parameters
    #phat.par_lower lower CI for parameters
    #Better CI for phat.par[0]
    >>> Lp = Profile(phat,i=0)
    >>> Lp.plot()
    >>> Lp.get_CI(alpha=0.1)
    >>> SF = 1./990
    >>> x = phat.isf(SF)
    # CI for x
    >>> Lx = phat.profile(i=0,x=x,link=phat.dist.link)
    >>> Lx.plot()
    >>> Lx.get_CI(alpha=0.2)
    # CI for logSF=log(SF)
    >>> Lpr = phat.profile(i=0,logSF=log(SF),link = phat.dist.link) 
    '''
    def __init__(self, dist, data, *args, **kwds):
        extradoc = '''
    plotfitsumry()
         Plot various diagnostic plots to asses quality of fit.
    plotecdf()
        Plot Empirical and fitted Cumulative Distribution Function
    plotesf()
        Plot Empirical and fitted Survival Function
    plotepdf()
        Plot Empirical and fitted Probability Distribution Function
    plotresq()
        Displays a residual quantile plot.
    plotresprb()
        Displays a residual probability plot.
    profile()
        Return Profile Log- likelihood or Product Spacing-function.
    Parameters
    ----------
    x : array-like
        quantiles
    q : array-like
        lower or upper tail probability
    size : int or tuple of ints, optional
        shape of random variates (default computed from input arguments )
    moments : str, optional
        composed of letters ['mvsk'] specifying which moments to compute where
        'm' = mean, 'v' = variance, 's' = (Fisher's) skew and
        'k' = (Fisher's) kurtosis. (default='mv')
       '''
 #    Member variables
 #    ----------------
 #    data - data used in fitting
 #    alpha - confidence coefficient
 #    method - method used
 #    LLmax  - loglikelihood function evaluated using par
 #    LPSmax - log product spacing function evaluated using par
 #    pvalue - p-value for the fit
 #    search - True if search for distribution parameters (default)
 #    copydata - True if copy input data (default)
 #
 #    par     - parameters (fixed and fitted)
 #    par_cov - covariance of parameters
 #    par_fix - fixed parameters
 #    par_lower - lower (1-alpha)% confidence bound for the parameters
 #    par_upper - upper (1-alpha)% confidence bound for the parameters
 #
 #        '''
        self.__doc__ = rv_frozen.__doc__ + extradoc
        self.dist = dist
        numargs = dist.numargs