From 9d10be02bec63914a193d8b55d8810a8878a0aec Mon Sep 17 00:00:00 2001 From: "Per.Andreas.Brodtkorb" Date: Tue, 5 Oct 2010 09:11:11 +0000 Subject: [PATCH] Mainly updated the documentation --- pywafo/src/wafo/stats/distributions.py | 53 +++--- pywafo/src/wafo/stats/estimation.py | 218 ++++++++++++++++++------- 2 files changed, 196 insertions(+), 75 deletions(-) diff --git a/pywafo/src/wafo/stats/distributions.py b/pywafo/src/wafo/stats/distributions.py index d9d5c36..17ba980 100644 --- a/pywafo/src/wafo/stats/distributions.py +++ b/pywafo/src/wafo/stats/distributions.py @@ -96,7 +96,14 @@ from scipy.misc import doccer all = alltrue sgf = vectorize -import new + +try: + from new import instancemethod +except ImportError: + # Python 3 + def instancemethod(func, obj, cls): + return types.MethodType(func, obj) + # These are the docstring parts used for substitution in specific # distribution docstrings. @@ -280,7 +287,13 @@ docdict_discrete['default'] = _doc_default_disc # clean up all the separate docstring elements, we do not need them anymore for obj in [s for s in dir() if s.startswith('_doc_')]: exec('del ' + obj) -del s, obj +del obj +try: + del s +except NameError: + # in Python 3, loop variables are not visible after the loop + pass + def _build_random_array(fun, args, size=None): @@ -854,7 +867,7 @@ class rv_generic(object): # self._size is total size of all output values self._size = product(size, axis=0) - if self._size > 1: + if self._size is not None and self._size > 1: size = numpy.array(size, ndmin=1) if np.all(scale == 0): @@ -4202,7 +4215,7 @@ class loggamma_gen(rv_continuous): def _munp(self,n,*args): # use generic moment calculation using ppf return self._mom0_sc(n,*args) -loggamma = loggamma_gen(name='loggamma', longname="A log gamma", +loggamma = loggamma_gen(name='loggamma', longname="A log gamma", shapes='c', extradoc=""" Log gamma distribution @@ -5489,7 +5502,7 @@ class rv_discrete(rv_generic): shapes=None, extradoc=None): super(rv_generic,self).__init__() - self.fix_loc = self._fix_loc + if badvalue is None: badvalue = nan self.badvalue = badvalue @@ -5518,17 +5531,17 @@ class rv_discrete(rv_generic): self.qvals = numpy.cumsum(self.pk,axis=0) self.F = make_dict(self.xk, self.qvals) self.Finv = reverse_dict(self.F) - self._ppf = new.instancemethod(sgf(_drv_ppf,otypes='d'), - self, rv_discrete) - self._pmf = new.instancemethod(sgf(_drv_pmf,otypes='d'), - self, rv_discrete) - self._cdf = new.instancemethod(sgf(_drv_cdf,otypes='d'), - self, rv_discrete) - self._nonzero = new.instancemethod(_drv_nonzero, self, rv_discrete) - self.generic_moment = new.instancemethod(_drv_moment, - self, rv_discrete) - self.moment_gen = new.instancemethod(_drv_moment_gen, + self._ppf = instancemethod(sgf(_drv_ppf,otypes='d'), + self, rv_discrete) + self._pmf = instancemethod(sgf(_drv_pmf,otypes='d'), + self, rv_discrete) + self._cdf = instancemethod(sgf(_drv_cdf,otypes='d'), + self, rv_discrete) + self._nonzero = instancemethod(_drv_nonzero, self, rv_discrete) + self.generic_moment = instancemethod(_drv_moment, self, rv_discrete) + self.moment_gen = instancemethod(_drv_moment_gen, + self, rv_discrete) self.numargs=0 else: cdf_signature = inspect.getargspec(self._cdf.im_func) @@ -5541,14 +5554,14 @@ class rv_discrete(rv_generic): #correct nin for generic moment vectorization self.vec_generic_moment = sgf(_drv2_moment, otypes='d') self.vec_generic_moment.nin = self.numargs + 2 - self.generic_moment = new.instancemethod(self.vec_generic_moment, - self, rv_discrete) + self.generic_moment = instancemethod(self.vec_generic_moment, + self, rv_discrete) #correct nin for ppf vectorization _vppf = sgf(_drv2_ppfsingle,otypes='d') _vppf.nin = self.numargs + 2 # +1 is for self - self._vecppf = new.instancemethod(_vppf, - self, rv_discrete) + self._vecppf = instancemethod(_vppf, + self, rv_discrete) @@ -5908,6 +5921,8 @@ class rv_discrete(rv_generic): instance object for more information) loc : array-like, optional location parameter (default=0) + scale: array-like, optional + scale parameter (default=1) Returns ------- diff --git a/pywafo/src/wafo/stats/estimation.py b/pywafo/src/wafo/stats/estimation.py index 98f373f..4e724ce 100644 --- a/pywafo/src/wafo/stats/estimation.py +++ b/pywafo/src/wafo/stats/estimation.py @@ -56,33 +56,25 @@ def valarray(shape, value=nan, typecode=None): # Frozen RV class class rv_frozen(object): ''' Frozen continous or discrete 1D Random Variable object (RV) - - RV.rvs(size=1) - - random variates - - RV.pdf(x) - - probability density function (continous case) - - RV.pmf(x) - - probability mass function (discrete case) - - RV.cdf(x) - - cumulative density function - - RV.sf(x) - - survival function (1-cdf --- sometimes more accurate) - - RV.ppf(q) - - percent point function (inverse of cdf --- percentiles) - - RV.isf(q) - - inverse survival function (inverse of sf) - - RV.stats(moments='mv') - - mean('m',axis=0), variance('v'), skew('s'), and/or kurtosis('k') - - RV.entropy() - - (differential) entropy of the RV. + + Methods + ------- + rvs(size=1) + Random variates. + pdf(x) + Probability density function. + cdf(x) + Cumulative density function. + sf(x) + Survival function (1-cdf --- sometimes more accurate). + ppf(q) + Percent point function (inverse of cdf --- percentiles). + isf(q) + Inverse survival function (inverse of sf). + stats(moments='mv') + Mean('m'), variance('v'), skew('s'), and/or kurtosis('k'). + entropy() + (Differential) entropy of the RV. ''' def __init__(self, dist, *args, **kwds): self.dist = dist @@ -200,8 +192,10 @@ class Profile(object): #MLE and better CI for phat.par[0] >>> import numpy as np >>> R = weibull_min.rvs(1,size=100); - >>> phat = weibull_min.fit(R,1,1,par_fix=[np.nan,0.,np.nan]) - >>> Lp = Profile(phat,i=0) + >>> phat = FitDistribution(ws.weibull_min, R,1,scale=1, floc=0.0) + + + >>> Lp = Profile(phat, i=0) >>> Lp.plot() >>> Lp.get_CI(alpha=0.1) >>> SF = 1./990 @@ -434,38 +428,150 @@ class Profile(object): plotbackend.ylabel(self.ylabel) plotbackend.xlabel(self.xlabel) -# internal class to fit given distribution to data +# class to fit given distribution to data class FitDistribution(rv_frozen): - def __init__(self, dist, data, *args, **kwds): - extradoc = ''' + ''' + Return estimators to shape, location, and scale from data - RV.plotfitsumry() - Plot various diagnostic plots to asses quality of fit. - RV.plotecdf() - Plot Empirical and fitted Cumulative Distribution Function - RV.plotesf() - Plot Empirical and fitted Survival Function - RV.plotepdf() - Plot Empirical and fitted Probability Distribution Function - RV.plotresq() - Displays a residual quantile plot. - RV.plotresprb() - Displays a residual probability plot. + Starting points for the fit are given by input arguments. For any + arguments not given starting points, dist._fitstart(data) is called + to get the starting estimates. + + You can hold some parameters fixed to specific values by passing in + keyword arguments f0..fn for shape paramters and floc, fscale for + location and scale parameters. + + Parameters + ---------- + dist : scipy distribution object + distribution to fit to data + data : array-like + Data to use in calculating the ML or MPS estimators + args : optional + Starting values for any shape arguments (those not specified + will be determined by _fitstart(data)) + kwds : loc, scale + Starting values for the location and scale parameters + Special keyword arguments are recognized as holding certain + parameters fixed: + f0..fn : hold respective shape paramters fixed + floc : hold location parameter fixed to specified value + fscale : hold scale parameter fixed to specified value + method : of estimation. Options are + 'ml' : Maximum Likelihood method (default) + 'mps': Maximum Product Spacing method + alpha : scalar, optional + Confidence coefficent (default=0.05) + search : bool + If true search for best estimator (default), + otherwise return object with initial distribution parameters + copydata : bool + If true copydata (default) + optimizer : The optimizer to use. The optimizer must take func, + and starting position as the first two arguments, + plus args (for extra arguments to pass to the + function to be optimized) and disp=0 to suppress + output as keyword arguments. + + Return + ------ + phat : FitDistribution object + Fitted distribution object with following member variables: + LLmax : loglikelihood function evaluated using par + LPSmax : log product spacing function evaluated using par + pvalue : p-value for the fit + par : distribution parameters (fixed and fitted) + par_cov : covariance of distribution parameters + par_fix : fixed distribution parameters + par_lower : lower (1-alpha)% confidence bound for the parameters + par_upper : upper (1-alpha)% confidence bound for the parameters + + + Note + ---- + `data` is sorted using this function, so if `copydata`==False the data + in your namespace will be sorted as well. + + Examples + -------- + Estimate distribution parameters for weibull_min distribution. + >>> import wafo.stats as ws + >>> import numpy as np + >>> R = ws.weibull_min.rvs(1,size=100); + >>> phat = FitDistribution(ws.weibull_min, R, 1, scale=1, floc=0.0) + + #Plot various diagnostic plots to asses quality of fit. + >>> phat.plotfitsumry() - RV.profile() - Return Profile Log- likelihood or Product Spacing-function. + #phat.par holds the estimated parameters + #phat.par_upper upper CI for parameters + #phat.par_lower lower CI for parameters - Member variables - ---------------- - data - data used in fitting - alpha - confidence coefficient - method - method used - LLmax - loglikelihood function evaluated using par - LPSmax - log product spacing function evaluated using par - pvalue - p-value for the fit - search - True if search for distribution parameters (default) - copydata - True if copy input data (default) - - par - parameters (fixed and fitted) - par_cov - covariance of parameters - par_fix - fixed parameters - par_lower - lower (1-alpha)% confidence bound for the parameters - par_upper - upper (1-alpha)% confidence bound for the parameters + #Better CI for phat.par[0] + >>> Lp = Profile(phat,i=0) + >>> Lp.plot() + >>> Lp.get_CI(alpha=0.1) + + >>> SF = 1./990 + >>> x = phat.isf(SF) - ''' + # CI for x + >>> Lx = phat.profile(i=0,x=x,link=phat.dist.link) + >>> Lx.plot() + >>> Lx.get_CI(alpha=0.2) + + # CI for logSF=log(SF) + >>> Lpr = phat.profile(i=0,logSF=log(SF),link = phat.dist.link) + ''' + def __init__(self, dist, data, *args, **kwds): + extradoc = ''' + plotfitsumry() + Plot various diagnostic plots to asses quality of fit. + plotecdf() + Plot Empirical and fitted Cumulative Distribution Function + plotesf() + Plot Empirical and fitted Survival Function + plotepdf() + Plot Empirical and fitted Probability Distribution Function + plotresq() + Displays a residual quantile plot. + plotresprb() + Displays a residual probability plot. + + profile() + Return Profile Log- likelihood or Product Spacing-function. + + Parameters + ---------- + x : array-like + quantiles + q : array-like + lower or upper tail probability + size : int or tuple of ints, optional + shape of random variates (default computed from input arguments ) + moments : str, optional + composed of letters ['mvsk'] specifying which moments to compute where + 'm' = mean, 'v' = variance, 's' = (Fisher's) skew and + 'k' = (Fisher's) kurtosis. (default='mv') + ''' +# Member variables +# ---------------- +# data - data used in fitting +# alpha - confidence coefficient +# method - method used +# LLmax - loglikelihood function evaluated using par +# LPSmax - log product spacing function evaluated using par +# pvalue - p-value for the fit +# search - True if search for distribution parameters (default) +# copydata - True if copy input data (default) +# +# par - parameters (fixed and fitted) +# par_cov - covariance of parameters +# par_fix - fixed parameters +# par_lower - lower (1-alpha)% confidence bound for the parameters +# par_upper - upper (1-alpha)% confidence bound for the parameters +# +# ''' self.__doc__ = rv_frozen.__doc__ + extradoc self.dist = dist numargs = dist.numargs