Added functions to stats.core.py

-reslife -extremal_idx -++ Improved estimation.py
15 years ago · df05bec641
parent a490e23e2f
commit df05bec641
5 changed files with 828 additions and 234 deletions
--- a/pywafo/src/wafo/stats/init.py
+++ b/pywafo/src/wafo/stats/init.py
@ -7,7 +7,3 @@ Statistics package in WAFO Toolbox.
 from scipy.stats import *
 from wafo.stats.core import *
 from wafo.stats.distributions import *
 #from wafo.spectrum.core import SpecData1D
 #import wafo.spectrum.models
 #import wafo.spectrum.dispersion_relation
 #from wafo.data_structures import SpecData1D
--- a/pywafo/src/wafo/stats/core.py
+++ b/pywafo/src/wafo/stats/core.py
@ -1,10 +1,34 @@
 from __future__ import division
 import warnings
 from wafo.wafodata import WafoData
 from wafo.misc import findextrema
 from scipy import special
 import numpy as np
 from numpy import inf
-from numpy import atleast_1d
+from numpy import atleast_1d, nan, ndarray, sqrt, vstack, ones, where, zeros
-from numpy import arange, floor
+from numpy import arange, floor, linspace, asarray, reshape, repeat, product
-__all__ = ['edf']
+
 __all__ = ['edf', 'edfcnd']
 arr = asarray
 def valarray(shape, value=nan, typecode=None):
    """Return an array of all value.
    """
    out = reshape(repeat([value], product(shape, axis=0), axis=0), shape)
    if typecode is not None:
        out = out.astype(typecode)
    if not isinstance(out, ndarray):
        out = arr(out)
    return out 
 def _invt(q, df):
    return special.stdtrit(df, q)
 def _invchi2(q, df):
    return special.chdtri(df, q)
 def edf(x, method=2):
    ''' 
@ -25,7 +49,7 @@ def edf(x, method=2):
    >>> x = np.linspace(0,6,200)
    >>> R = ws.rayleigh.rvs(scale=2,size=100)
    >>> F = ws.edf(R)
-    >>> F.plot()
+    >>> h = F.plot()
     See also edf, pdfplot, cumtrapz
    '''
@ -64,10 +88,10 @@ def edfcnd(x, c=None, method=2):
    >>> import wafo.stats as ws
    >>> x = np.linspace(0,6,200)
    >>> R = ws.rayleigh.rvs(scale=2,size=100)
-    >>> Fc = ws.edfcd(R, 1)
+    >>> Fc = ws.edfcnd(R, 1)
-    >>> Fc.plot()
+    >>> hc = Fc.plot()
    >>> F = ws.edf(R)
-    >>> F.plot()
+    >>> h = F.plot()
     See also edf, pdfplot, cumtrapz
    '''
@ -84,3 +108,586 @@ def edfcnd(x, c=None, method=2):
        F.labels.ylab = 'F(x| X>=%g)' % c
    return F
 def reslife(data, u=None, umin=None, umax=None, nu=None, nmin=3, alpha=0.05, plotflag=False):
    ''' 
    Return Mean Residual Life, i.e., mean excesses vs thresholds
    Parameters
    ---------
    data : array_like
        vector of data of length N. 
    u :  array-like
        threshold values (default linspace(umin, umax, nu))
    umin, umax : real scalars
        Minimum and maximum threshold, respectively (default min(data), max(data)).
    nu : scalar integer
        number of threshold values (default min(N-nmin,100))
    nmin : scalar integer
        Minimum number of extremes to include. (Default 3).
    alpha : real scalar
        Confidence coefficient (default 0.05)        
    plotflag: bool
    Returns
    -------
    mrl : WafoData object
        Mean residual life values, i.e., mean excesses over thresholds, u.
    Notes
    -----
    RESLIFE estimate mean excesses over thresholds. The purpose of MRL is
    to determine the threshold where the upper tail of the data can be 
    approximated with the generalized Pareto distribution (GPD). The GPD is
    appropriate for the tail, if the MRL is a linear function of the 
    threshold, u. Theoretically in the GPD model 
        E(X-u0|X>u0) = s0/(1+k)
        E(X-u |X>u)  = s/(1+k) = (s0 -k*u)/(1+k)   for u>u0
    where k,s is the shape and scale parameter, respectively.
    s0 = scale parameter for threshold u0<u.
    Example
    -------
    >>> import wafo
    >>> R = wafo.stats.genpareto.rvs(0.1,2,2,size=100)
    >>> mrl = reslife(R,nu=20)
    >>> h = mrl.plot() 
    See also
    ---------
    genpareto
    fitgenparrange, disprsnidx
    '''
    if u is None:
        sd = np.sort(data)
        n = len(data)
        nmin = max(nmin, 0)
        if 2 * nmin > n:
            warnings.warn('nmin possibly too large!')
        sdmax, sdmin = sd[-nmin], sd[0]
        umax = sdmax if umax is None else min(umax, sdmax)
        umin = sdmin if umin is None else max(umin, sdmin)
        if nu is None:
            nu = min(n - nmin, 100)
        u = linspace(umin, umax, nu)
    nu = len(u)
    #mrl1 = valarray(nu) 
    #srl = valarray(nu)
    #num = valarray(nu)
    mean_and_std = lambda data1 : (data1.mean(), data1.std(), data1.size)
    dat = arr(data)
    tmp = arr([mean_and_std(dat[dat > tresh] - tresh) for tresh in u.tolist()])
    mrl, srl, num = tmp.T
    p = 1 - alpha
    alpha2 = alpha / 2
    # Approximate P% confidence interval
    #%Za = -invnorm(alpha2);   % known mean
    Za = -_invt(alpha2, num - 1) # unknown mean
    mrlu = mrl + Za * srl / sqrt(num)
    mrll = mrl - Za * srl / sqrt(num)
    #options.CI = [mrll,mrlu];
    #options.numdata = num;
    titleTxt = 'Mean residual life with %d%s CI' % (100 * p, '%')
    res = WafoData(mrl, u, xlab='Threshold', ylab='Mean Excess', title=titleTxt)
    res.workspace = dict(numdata=num, umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha)
    res.children = [WafoData(vstack([mrll, mrlu]).T, u, xlab='Threshold', title=titleTxt)]
    res.children_args = [':r']
    if plotflag:
        res.plot()
    return res
 def dispersion_idx(data, t=None, u=None, umin=None, umax=None, nu=None, nmin=10, tb=1,
               alpha=0.05, plotflag=False):
    '''Return Dispersion Index vs threshold
    Parameters
    ----------
    data, ti : array_like
        data values and sampled times, respectively.
    u :  array-like
        threshold values (default linspace(umin, umax, nu))
    umin, umax : real scalars
        Minimum and maximum threshold, respectively (default min(data), max(data)).
    nu : scalar integer
        number of threshold values (default min(N-nmin,100))
    nmin : scalar integer
        Minimum number of extremes to include. (Default 10).
    tb : Real scalar
        Block period (same unit as the sampled times)  (default 1)
    alpha : real scalar
        Confidence coefficient (default 0.05)        
    plotflag: bool
    Returns
    -------
    DI : WafoData object
        Dispersion index
    b_u : real scalar
        threshold where the number of exceedances in a fixed period (Tb) is
        consistent with a Poisson process.
    ok_u : array-like
        all thresholds where the number of exceedances in a fixed period (Tb) is
        consistent with a Poisson process.
    Notes
    ------
    DISPRSNIDX estimate the Dispersion Index (DI) as function of threshold. 
    DI measures the homogenity of data and the purpose of DI is to determine 
    the threshold where the number of exceedances in a fixed period (Tb) is
    consistent with a Poisson process. For a Poisson process the DI is one. 
    Thus the threshold should be so high that DI is not significantly
    different from 1.
    The Poisson hypothesis is not rejected if the estimated DI is between:
    chi2(alpha/2, M-1)/(M-1)< DI < chi^2(1 - alpha/2, M-1 }/(M - 1)
    where M is the total number of fixed periods/blocks -generally
    the total number of years in the sample.
    Example
    -------
    >>> import wafo.data
    >>> xn = wafo.data.sea()
    >>> t, data = xn.T
    >>> Ie = findpot(data,t,0,5);
    >>> di, u, ok_u = dispersion_idx(data[Ie],t[Ie],tb=100)
    >>> h = di.plot() # a threshold around 1 seems appropriate.
    vline(u)
    See also
    --------
    reslife, 
    fitgenparrange, 
    extremal_idx
    References
    ----------
    Ribatet, M. A.,(2006),
    A User's Guide to the POT Package (Version 1.0)
    month = {August},
    url = {http://cran.r-project.org/}
    Cunnane, C. (1979) Note on the poisson assumption in
    partial duration series model. Water Resource Research, 15\bold{(2)}
         :489--494.}
    '''
 # This program is free software; you can redistribute it and/or modify it under the terms of the GNU
 # General Public License as published by the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 # This program is distributed in the hope that it will be useful, but without any warranty; without even
 # the implied warranty of merchantability or fitness for a particular purpose. See the GNU General Public
 # License for moredetails.
 # The GNU General Public License can be obtained from http://www.gnu.org/copyleft/gpl.html. You
 # can also obtain it by writing to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 #  MA 02111-1307, USA.
    n = len(data)
    if t is None:
        ti = arange(n)
    else:
        ti = arr(t) - min(t)
    t1 = np.empty(ti.shape,dtype=int)
    t1[:] = np.floor(ti / tb)
    if u is None:
        sd = np.sort(data)
        nmin = max(nmin, 0)
        if 2 * nmin > n:
            warnings.warn('nmin possibly too large!')
        sdmax, sdmin = sd[-nmin], sd[0]
        umax = sdmax if umax is None else min(umax, sdmax)
        umin = sdmin if umin is None else max(umin, sdmin)
        if nu is None:
            nu = min(n - nmin, 100)
        u = linspace(umin, umax, nu)
    nu = len(u)
    di = np.zeros(nu)
    d = arr(data)
    mint = int(min(t1)) #; % mint should be 0.
    maxt = int(max(t1))
    M = maxt - mint + 1;
    occ = np.zeros(M);
    for ix, tresh in enumerate(u.tolist()):
        excess = (d > tresh)
        lambda_ = excess.sum() / M
        for block in  range(M):
            occ[block] = sum(excess[t1 == block])
        di[ix] = occ.var() / lambda_
    p = 1 - alpha
    diUp = _invchi2(1 - alpha / 2, M - 1) / (M - 1)
    diLo = _invchi2(alpha / 2, M - 1) / (M - 1)
    # Find appropriate threshold
    k1, = np.where((diLo < di) & (di < diUp))
    if len(k1) > 0:
        ok_u = u[k1]
        b_di = (di[k1].mean() < di[k1])
        k = b_di.argmax()
        b_u = ok_u[k]
    else:
        b_u = ok_u = None
    CItxt = '%d%s CI' % (100 * p, '%')
    titleTxt = 'Dispersion Index plot';
    res = WafoData(di, u, title=titleTxt, labx='Threshold', laby='Dispersion Index')
        #'caption',CItxt);
    res.workspace = dict(umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha)
    res.children = [WafoData(vstack([diLo * ones(nu), diUp * ones(nu)]).T, u, xlab='Threshold', title=CItxt)]
    res.children_args = ['--r']
    if plotflag:
        res.plot(di)
    return res, b_u, ok_u
 def decluster(data, t=None, thresh=None, tmin=1):   
    '''
    Return declustered peaks over threshold values
    Parameters
    ----------
    data, t : array-like     
        data-values and sampling-times, respectively.
    thresh : real scalar
        minimum threshold for levels in data.
    tmin : real scalar
        minimum distance to another peak [same unit as t] (default 1)
    Returns
    -------
    ev, te : ndarray
        extreme values and its corresponding sampling times, respectively, i.e., 
        all data > thresh which are at least tmin distance apart.
    Example
    -------
    >>> import pylab
    >>> import wafo.data
    >>> from wafo.misc import findtc
    >>> x  = wafo.data.sea()
    >>> t, data = x[:400,:].T
    >>> itc, iv = findtc(data,0,'dw')
    >>> ytc, ttc = data[itc], t[itc]
    >>> ymin = 2*data.std()
    >>> tmin = 10 # sec 
    >>> [ye, te] = decluster(ytc,ttc, ymin,tmin);
    >>> h = pylab.plot(t,data,ttc,ytc,'ro',t,zeros(len(t)),':',te,ye,'k.')
    See also
    --------
    fitgenpar, findpot, extremalidx
    '''
    if t is None:
        t = np.arange(len(data))
    i = findpot(data, t, thresh, tmin)
    return data[i], t[i]
 def findpot(data, t=None, thresh=None, tmin=1):
    '''
    Retrun indices to Peaks over threshold values
    Parameters
    ----------
    data, t : array-like     
        data-values and sampling-times, respectively.
    thresh : real scalar
        minimum threshold for levels in data.
    tmin : real scalar
        minimum distance to another peak [same unit as t] (default 1)
    Returns
    -------
    Ie : ndarray
        indices to extreme values, i.e., all data > tresh which are at least 
        tmin distance apart.
    Example
    -------
    >>> import pylab
    >>> import wafo.data
    >>> from wafo.misc import findtc
    >>> x  = wafo.data.sea()
    >>> t, data = x.T
    >>> itc, iv = findtc(data,0,'dw')
    >>> ytc, ttc = data[itc], t[itc]
    >>> ymin = 2*data.std()
    >>> tmin = 10 # sec
    >>> I = findpot(data, t, ymin, tmin)
    >>> yp, tp = data[I], t[I]
    >>> Ie = findpot(yp, tp, ymin,tmin)
    >>> ye, te = yp[Ie], tp[Ie]
    >>> h = pylab.plot(t,data,ttc,ytc,'ro',t,zeros(len(t)),':',te, ye,'k.',tp,yp,'+')
    See also
    --------
    fitgenpar, decluster, extremalidx
    '''
    Data = arr(data)
    if t is None:
        ti = np.arange(len(Data))
    else:
        ti = arr(t)
    Ie, = where(Data > thresh);
    Ye = Data[Ie]
    Te = ti[Ie]
    if len(Ye) <= 1:
        return Ie
    dT = np.diff(Te)
    notSorted = np.any(dT < 0);
    if notSorted:
        I = np.argsort(Te)
        Te = Te[I] 
        Ie = Ie[I]
        Ye = Ye[I]
        dT = np.diff(Te)
    isTooSmall = (dT <= tmin)
    if np.any(isTooSmall):
        isTooClose = np.hstack((isTooSmall[0], isTooSmall[:-1] | isTooSmall[1:], isTooSmall[-1]))
        #Find opening (NO) and closing (NC) index for data beeing to close:
        iy = findextrema(np.hstack([0, 0, isTooSmall, 0]))
        NO = iy[::2] - 1 
        NC = iy[1::2] 
        for no, nc in zip(NO, NC):
            iz = slice(no, nc)
            iOK = _find_ok_peaks(Ye[iz], Te[iz], tmin)
            if len(iOK):
                isTooClose[no + iOK] = 0
        # Remove data which is too close to other data.        
        if isTooClose.any():
            #len(tooClose)>0:
            iOK, = where(1 - isTooClose)
            Ie = Ie[iOK]
    return Ie  
 def _find_ok_peaks(Ye, Te, Tmin):
    '''
    Return indices to the largest maxima that are at least Tmin
    distance apart.
    '''
    Ny = len(Ye)
    I = np.argsort(-Ye) #  sort in descending order
    Te1 = Te[I]
    oOrder = zeros(Ny, dtype=int)
    oOrder[I] = range(Ny) #indices to the variables original location
    isTooClose = zeros(Ny, dtype=bool)
    pool = zeros((Ny, 2))
    T_range = np.hstack([-Tmin, Tmin])
    K = 0
    for i, ti in enumerate(Te1):
        isTooClose[i] = np.any((pool[:K, 0] <= ti) & (ti <= pool[:K, 1]))
        if not isTooClose[i]:
            pool[K] = ti + T_range
            K += 1
    iOK, = where(1 - isTooClose[oOrder])
    return iOK
 def declustering_time(t):
    '''
    Returns minimum distance between clusters.
    Parameters
    ----------
    t : array-like     
        sampling times for data.
    Returns
    -------
    tc : real scalar
        minimum distance between clusters.
    Example
    -------
    >>> import wafo.data
    >>> x  = wafo.data.sea()
    >>> t, data = x[:400,:].T
    >>> Ie = findpot(data,t,0,5);
    >>> tc = declustering_time(Ie) 
    >>> tc
    21
    '''   
    t0 = arr(t)
    nt = len(t0)
    if nt<2:
        return arr([])
    ti = interexceedance_times(t0)
    ei = extremal_idx(ti)
    if ei==1:
        tc = ti.min()
    else:
        i = int(np.floor(nt*ei))
        sti = -np.sort(-ti)
        tc = sti[min(i, nt-2)] #% declustering time
    return tc
 def interexceedance_times(t):
    '''
    Returns interexceedance times of data
    Parameters
    ----------
    t : array-like     
        sampling times for data.
    Returns
    -------
    ti : ndarray
        interexceedance times
    Example
    -------
    >>> t = [1,2,5,10]
    >>> interexceedance_times(t)
    array([1, 3, 5])
    '''  
    return np.diff(np.sort(t))  
 def extremal_idx(ti):
    '''
    Returns Extremal Index measuring the dependence of data
    Parameters
    ----------
    ti : array-like     
        interexceedance times for data.
    Returns
    -------
    ei : real scalar
        Extremal index.
    Notes
    ----- 
    The Extremal Index (EI) is  one if the data are independent and less than 
    one if there are some dependence. The extremal index can also be intepreted 
    as the reciprocal of the mean cluster size.
    Example
    -------
    >>> import wafo.data
    >>> x  = wafo.data.sea()
    >>> t, data = x[:400,:].T
    >>> Ie = findpot(data,t,0,5);
    >>> ti = interexceedance_times(Ie)
    >>> ei = extremal_idx(ti) 
    >>> ei
    1
    See also 
    --------
    reslife, fitgenparrange, disprsnidx, findpot, decluster
    Reference
    ---------
    Christopher A. T. Ferro, Johan Segers (2003)
    Inference for clusters of extreme values
    Journal of the Royal Statistical society: Series B (Statistical Methodology) 54 (2), 545-556
    doi:10.1111/1467-9868.00401
    '''
    t = arr(ti)
    tmax = t.max() 
    if tmax<=1:
        ei = 0
    elif tmax<=2:
        ei = min(1, 2*t.mean()**2/((t**2).mean()))
    else:
        ei = min(1, 2*np.mean(t-1)**2/np.mean((t-1)*(t-2)))
    return ei
 def _test_dispersion_idx():
    import wafo.data
    xn = wafo.data.sea()
    t, data = xn.T
    Ie = findpot(data,t,0,5);
    di, u, ok_u = dispersion_idx(data[Ie],t[Ie],tb=100)
    di.plot() # a threshold around 1 seems appropriate.
    di.show()
    pass
 def _test_findpot():
    import pylab
    import wafo.data
    from wafo.misc import findtc
    x = wafo.data.sea()
    t, data = x[:, :].T
    itc, iv = findtc(data, 0, 'dw')
    ytc, ttc = data[itc], t[itc]
    ymin = 2 * data.std()
    tmin = 10 # sec
    I = findpot(data, t, ymin, tmin)
    yp, tp = data[I], t[I]
    Ie = findpot(yp, tp, ymin, tmin)
    ye, te = yp[Ie], tp[Ie]
    h = pylab.plot(t, data, ttc,ytc,'ro', t, zeros(len(t)), ':', te, ye, 'kx', tp, yp, '+')
    pylab.show() #
    pass
 def _test_reslife():
    import wafo
    R = wafo.stats.genpareto.rvs(0.1, 2, 2, size=100)
    mrl = reslife(R, nu=20)
    mrl.plot()
 def main():
    #_test_dispersion_idx() 
    import doctest
    doctest.testmod()
 if __name__ == '__main__':
    main()
--- a/pywafo/src/wafo/stats/distributions.py
+++ b/pywafo/src/wafo/stats/distributions.py
@ -400,15 +400,15 @@ class rv_frozen(object):
    def stats(self, moments='mv'):
        ''' Some statistics of the given RV'''
        kwds = dict(moments=moments)
-        return self.dist.stats(*self.par, **kwds)
+        return self.dist.stats(*self.par)
    def median(self):
-        return self.dist.median(*self.par, **self.kwds)
+        return self.dist.median(*self.par)
    def mean(self):
-        return self.dist.mean(*self.par,**self.kwds)
+        return self.dist.mean(*self.par)
    def var(self):
-        return self.dist.var(*self.par, **self.kwds)
+        return self.dist.var(*self.par)
    def std(self):
-        return self.dist.std(*self.par, **self.kwds)
+        return self.dist.std(*self.par)
    def moment(self, n):
        par1 = self.par[:self.dist.numargs]
        return self.dist.moment(n, *par1)
@ -418,7 +418,7 @@ class rv_frozen(object):
        '''Probability mass function at k of the given RV'''
        return self.dist.pmf(k, *self.par)
    def interval(self,alpha):
-        return self.dist.interval(alpha, *self.par, **self.kwds)
+        return self.dist.interval(alpha, *self.par)
 # Frozen RV class
@ -1982,10 +1982,8 @@ class rv_continuous(rv_generic):
    def link(self, x, logSF, theta, i):
        ''' Return dist. par. no. i as function of quantile (x) and log survival probability (sf)
-
+            where
-            Assumptions:
+            theta is the list containing all parameters including location and scale.
            ------------
            theta is list containing all parameters including location and scale.
        '''
        raise ValueError('Link function not implemented for the %s distribution' % self.name)
        return None
@ -2077,7 +2075,7 @@ class rv_continuous(rv_generic):
        # invert the Hessian matrix (i.e. invert the observed information number)
        #pcov = -pinv(H);
-        return - H
+        return -H
    # return starting point for fit (shape arguments + loc + scale)
    def _fitstart(self, data, args=None):
@ -2232,25 +2230,6 @@ class rv_continuous(rv_generic):
        '''
        return FitDistribution(self, data, *args, **kwds)
 #        loc0, scale0, method = map(kwds.get, ['loc', 'scale','method'],[none, none,'ml'])
 #        args, loc0, scale0 = self.fix_loc_scale(args, loc0, scale0)
 #        Narg = len(args)
 #        if Narg != self.numargs:
 #            if Narg > self.numargs:
 #                raise ValueError, "Too many input arguments."
 #            else:
 #                args += (1.0,)*(self.numargs-Narg)
 #        # location and scale are at the end
 #        x0 = args + (loc0, scale0)
 #        if method.lower()[:].startswith('mps'):
 #            data.sort()
 #            fitfun = self.nlogps
 #        else:
 #            fitfun = self.nnlf
 #
 #        return optimize.fmin(fitfun,x0,args=(ravel(data),),disp=0)
    def fit_loc_scale(self, data, *args):
        """
        Estimate loc and scale parameters from data using 1st and 2nd moments
@ -3387,6 +3366,20 @@ class genpareto_gen(rv_continuous):
        #vals = 1.0/c * (pow(1-q, -c)-1)
        #return vals
    def _fitstart(self, data):
        d = arr(data)
        loc = d.min()-0.01*d.std()
        #moments estimator
        d1 = d-loc
        m = d1.mean() 
        s = d1.std()
        shape = ((m/s)**2 - 1)/2
        scale = m*((m/s)**2+1)/2
        return shape, loc, scale 
    def hessian_nnlf(self, theta, x, eps=None):
        try:
            loc = theta[-2]
@ -6876,6 +6869,10 @@ Skellam distribution
 def main():
    import matplotlib
    matplotlib.interactive(True)
    R = norm.rvs(size=100)
    phat = norm.fit(R)
    phat = genpareto.fit(R[R>0.7],f0=0.1, floc=0.7)
    #nbinom(10, 0.75).rvs(3)
    t = bernoulli(0.75).rvs(3)
    x = np.r_[5, 10]
--- a/pywafo/src/wafo/stats/estimation.py
+++ b/pywafo/src/wafo/stats/estimation.py
@ -10,21 +10,18 @@ from __future__ import division
 from wafo.plotbackend import plotbackend
 from wafo.misc import ecross, findcross
-from scipy.misc.ppimport import ppimport
+#from scipy.misc.ppimport import ppimport
 import numdifftools 
 from scipy import special
 from scipy.linalg import pinv2
 from scipy import optimize
-from numpy import alltrue, arange, \
+
     ravel, ones, sum, \
     zeros, log, sqrt, exp
 from numpy import atleast_1d, any, asarray, nan, inf, pi, reshape, repeat, product, ndarray
 import numpy
 import numpy as np
-
+from numpy import alltrue, arange, ravel, ones, sum, zeros, log, sqrt, exp
 from numpy import (atleast_1d, any, asarray, nan, pi, reshape, repeat, 
                   product, ndarray, isfinite)
 from numpy import flatnonzero as nonzero
@ -33,11 +30,8 @@ __all__ = [
          ]
 floatinfo = np.finfo(float)
 #arr = atleast_1d
 arr = asarray
 all = alltrue
 def chi2isf(p, df):
@ -123,13 +117,13 @@ class rv_frozen(object):
        kwds = dict(moments=moments)
        return self.dist.stats(*self.par, **kwds)
    def median(self):
-        return self.dist.median(*self.par, **self.kwds)
+        return self.dist.median(*self.par)
    def mean(self):
-        return self.dist.mean(*self.par,**self.kwds)
+        return self.dist.mean(*self.par)
    def var(self):
-        return self.dist.var(*self.par, **self.kwds)
+        return self.dist.var(*self.par)
    def std(self):
-        return self.dist.std(*self.par, **self.kwds)
+        return self.dist.std(*self.par)
    def moment(self, n):
        par1 = self.par[:self.dist.numargs]
        return self.dist.moment(n, *par1)
@ -139,9 +133,7 @@ class rv_frozen(object):
        '''Probability mass function at k of the given RV'''
        return self.dist.pmf(k, *self.par)
    def interval(self,alpha):
-        return self.dist.interval(alpha, *self.par, **self.kwds)
+        return self.dist.interval(alpha, *self.par)
 # internal class to profile parameters of a given distribution
@ -261,9 +253,9 @@ class Profile(object):
        self.i_free = nonzero(isfree)
        self.Lmax = Lmax
-        self.alpha_Lrange = 0.5 * chi2isf(self.alpha, 1) #_WAFODIST.chi2.isf(self.alpha, 1)
+        self.alpha_Lrange = 0.5 * chi2isf(self.alpha, 1) 
        self.alpha_cross_level = Lmax - self.alpha_Lrange
-        lowLevel = self.alpha_cross_level - self.alpha_Lrange / 7.0
+        #lowLevel = self.alpha_cross_level - self.alpha_Lrange / 7.0
        ## Check that par are actually at the optimum
        phatv = fit_dist.par.copy()
@ -327,7 +319,7 @@ class Profile(object):
        cond = self.data == -numpy.inf
        if any(cond):
            ind, = cond.nonzero()
-            self.data.put(ind, numpy.finfo(float).min / 2.0)
+            self.data.put(ind, floatinfo.min / 2.0)
            ind1 = numpy.where(ind == 0, ind, ind - 1)
            cl = self.alpha_cross_level - self.alpha_Lrange / 2.0
            t0 = ecross(self.args, self.data, ind1, cl)
@ -446,16 +438,17 @@ class FitDistribution(rv_frozen):
    def __init__(self, dist, data, *args, **kwds):
        extradoc = '''
-    RV.plotfitsumry() - Plot various diagnostic plots to asses quality of fit.
+        RV.plotfitsumry() - Plot various diagnostic plots to asses quality of fit.
-    RV.plotecdf()     - Plot Empirical and fitted Cumulative Distribution Function
+        RV.plotecdf()     - Plot Empirical and fitted Cumulative Distribution Function
-    RV.plotesf()      - Plot Empirical and fitted Survival Function
+        RV.plotesf()      - Plot Empirical and fitted Survival Function
-    RV.plotepdf()     - Plot Empirical and fitted Probability Distribution Function
+        RV.plotepdf()     - Plot Empirical and fitted Probability Distribution Function
-    RV.plotresq()     - Displays a residual quantile plot.
+        RV.plotresq()     - Displays a residual quantile plot.
-    RV.plotresprb()   - Displays a residual probability plot.
+        RV.plotresprb()   - Displays a residual probability plot.
-    RV.profile()      - Return Profile Log- likelihood or Product Spacing-function.
+        RV.profile()      - Return Profile Log- likelihood or Product Spacing-function.
-    Member variables:
+        Member variables
        ----------------
        data - data used in fitting
        alpha - confidence coefficient
        method - method used
@ -476,85 +469,143 @@ class FitDistribution(rv_frozen):
        self.dist = dist
        numargs = dist.numargs
-        self.method, self.alpha, self.par_fix, self.search, self.copydata = map(kwds.get, ['method', 'alpha', 'par_fix', 'search', 'copydata'], ['ml', 0.05, None, True, True])
+        self.method=self.alpha=self.par_fix=self.search=self.copydata=None
-        self.data = ravel(data)
+        m_variables = ['method', 'alpha', 'par_fix', 'search', 'copydata']
-        if self.copydata:
+        m_defaults = ['ml', 0.05, None, True, True]
-            self.data = self.data.copy()
+        for (name, val) in zip(m_variables,m_defaults):
-        self.data.sort()
+            setattr(self, name, kwds.get(name,val))
        #self.method, self.alpha, self.par_fix, self.search, self.copydata = map(kwds.get, m_variables, m_defaults)
        if self.method.lower()[:].startswith('mps'):
            self._fitfun = dist.nlogps
        else:
            self._fitfun = dist.nnlf
-        allfixed = False
+        self.data = ravel(data)
-        isfinite = numpy.isfinite
+        if self.copydata:
-        somefixed = (self.par_fix != None) and any(isfinite(self.par_fix))
+            self.data = self.data.copy()
        self.data.sort()
        par, fixedn = self._fit(*args, **kwds)
        self.par = arr(par)
        somefixed = len(fixedn)>0 
        if somefixed:
-            fitfun = self._fxfitfun
+            self.par_fix = [nan,]*len(self.par)
-            self.par_fix = tuple(self.par_fix)
+            for i in fixedn:
-            allfixed = all(isfinite(self.par_fix))
+                self.par_fix[i] = self.par[i]
            self.par = atleast_1d(self.par_fix)
            self.i_notfixed = nonzero(1 - isfinite(self.par))
            self.i_fixed = nonzero(isfinite(self.par))
            if len(self.par) != numargs + 2:
                raise ValueError, "Wrong number of input arguments."
            if len(args) != len(self.i_notfixed):
                raise ValueError("Length of args must equal number of non-fixed parameters given in par_fix! (%d) " % len(self.i_notfixed))
            x0 = atleast_1d(args)
        else:
            fitfun = self.fitfun
            loc0, scale0 = map(kwds.get, ['loc', 'scale'])
            args, loc0, scale0 = dist.fix_loc_scale(args, loc0, scale0)
            Narg = len(args)
            if Narg != numargs:
                if Narg > numargs:
                    raise ValueError, "Too many input arguments."
                else:
                    args += (1.0,)*(numargs - Narg)
            # location and scale are at the end
            x0 = args + (loc0, scale0)
            x0 = atleast_1d(x0)
        numpar = len(x0)
        if self.search and not allfixed:
            #args=(self.data,),
            par = optimize.fmin(fitfun, x0, disp=0)
            if not somefixed:
                self.par = par
        elif  (not allfixed) and somefixed:
            self.par[self.i_notfixed] = x0
        else:
            self.par = x0
-        np = numargs + 2
+            self.i_notfixed = nonzero(1 - isfinite(self.par_fix))
            self.i_fixed = nonzero(isfinite(self.par_fix))
        numpar = numargs + 2
        self.par_cov = zeros((numpar, numpar))
        self._compute_cov()
        # Set confidence interval for parameters
        pvar = numpy.diag(self.par_cov)
        zcrit = -norm_ppf(self.alpha / 2.0)
        self.par_lower = self.par - zcrit * sqrt(pvar)
        self.par_upper = self.par + zcrit * sqrt(pvar)
        self.par_upper = None
        self.par_lower = None
        self.par_cov = zeros((np, np))
        self.LLmax = -dist.nnlf(self.par, self.data)
        self.LPSmax = -dist.nlogps(self.par, self.data)
        self.pvalue = self._pvalue(self.par, self.data, unknown_numpar=numpar)
-        H = numpy.asmatrix(self._hessian_nnlf(self.par, self.data))
+    
    def _reduce_func(self, args, kwds):
        args = list(args)
        Nargs = len(args) - 2
        fixedn = []
        index = range(Nargs) + [-2, -1]
        names = ['f%d' % n for n in range(Nargs)] + ['floc', 'fscale']
        x0 = args[:]
        for n, key in zip(index, names):
            if kwds.has_key(key):
                fixedn.append(n)
                args[n] = kwds[key]
                del x0[n]
        fitfun = self._fitfun
        if len(fixedn) == 0:
            func = fitfun
            restore = None
        else:
            if len(fixedn) == len(index):
                raise ValueError, "All parameters fixed. There is nothing to optimize."
            def restore(args, theta):
                # Replace with theta for all numbers not in fixedn
                # This allows the non-fixed values to vary, but
                #  we still call self.nnlf with all parameters.
                i = 0
                for n in range(Nargs):
                    if n not in fixedn:
                        args[n] = theta[i]
                        i += 1
                return args
            def func(theta, x):
                newtheta = restore(args[:], theta)
                return fitfun(newtheta, x)
        return x0, func, restore, args, fixedn
    def _fit(self, *args, **kwds):
        dist = self.dist
        data = self.data
        Narg = len(args)
        if Narg > dist.numargs:
                raise ValueError, "Too many input arguments."
        start = [None]*2
        if (Narg < dist.numargs) or not (kwds.has_key('loc') and
                                         kwds.has_key('scale')):
            start = dist._fitstart(data)  # get distribution specific starting locations
            args += start[Narg:-2]
        loc = kwds.get('loc', start[-2])
        scale = kwds.get('scale', start[-1])
        args += (loc, scale)
        x0, func, restore, args, fixedn = self._reduce_func(args, kwds)
        if self.search:
            optimizer = kwds.get('optimizer', optimize.fmin)
            # convert string to function in scipy.optimize
            if not callable(optimizer) and isinstance(optimizer, (str, unicode)):
                if not optimizer.startswith('fmin_'):
                    optimizer = "fmin_"+optimizer
                if optimizer == 'fmin_': 
                    optimizer = 'fmin'
                try:
                    optimizer = getattr(optimize, optimizer)
                except AttributeError:
                    raise ValueError, "%s is not a valid optimizer" % optimizer
            vals = optimizer(func,x0,args=(ravel(data),),disp=0)
            vals = tuple(vals)
        else:
            vals = tuple(x0)
        if restore is not None:
            vals = restore(args, vals)
        return vals, fixedn
    def _compute_cov(self):
        '''Compute covariance
        '''
        somefixed = (self.par_fix != None) and any(isfinite(self.par_fix))
        H = numpy.asmatrix(self.dist.hessian_nnlf(self.par, self.data))
        self.H = H
        try:
-            if allfixed:
+            if somefixed:
-                pass
+                allfixed = all(isfinite(self.par_fix))
-            elif somefixed:
+                if allfixed:
-                pcov = -pinv2(H[self.i_notfixed, :][..., self.i_notfixed])
+                    self.par_cov[:,:]=0
-                for row, ix in enumerate(list(self.i_notfixed)):
+                else:
-                    self.par_cov[ix, self.i_notfixed] = pcov[row, :]
+                    pcov = -pinv2(H[self.i_notfixed, :][..., self.i_notfixed])
-
+                    for row, ix in enumerate(list(self.i_notfixed)):
                        self.par_cov[ix, self.i_notfixed] = pcov[row, :]
            else:
                self.par_cov = -pinv2(H)
        except:
            self.par_cov[:, :] = nan
        pvar = numpy.diag(self.par_cov)
        zcrit = -norm_ppf(self.alpha / 2.0)#_WAFODIST.norm.ppf(self.alpha / 2.0)
        self.par_lower = self.par - zcrit * sqrt(pvar)
        self.par_upper = self.par + zcrit * sqrt(pvar)
    def fitfun(self, phat):
        return self._fitfun(phat, self.data)
@ -562,7 +613,6 @@ class FitDistribution(rv_frozen):
        self.par[self.i_notfixed] = phat10
        return self._fitfun(self.par, self.data)
    def profile(self, **kwds):
        ''' Profile Log- likelihood or Log Product Spacing- function,
            which can be used for constructing confidence interval for
@ -812,65 +862,7 @@ class FitDistribution(rv_frozen):
        return pvalue
    def _hessian_nnlf(self, theta, data, eps=None):
        ''' approximate hessian of nnlf where theta are the parameters (including loc and scale)
        '''
        #Nd = len(x)
        np = len(theta)
        # pab 07.01.2001: Always choose the stepsize h so that
        # it is an exactly representable number.
        # This is important when calculating numerical derivatives and is
        #  accomplished by the following.
        if eps == None:
            eps = (floatinfo.machar.eps) ** 0.4
        #xmin = floatinfo.machar.xmin
        #myfun = lambda y: max(y,100.0*log(xmin)) #% trick to avoid log of zero
        delta = (eps + 2.0) - 2.0
        delta2 = delta ** 2.0
        #    % Approximate 1/(nE( (d L(x|theta)/dtheta)^2)) with
        #    %             1/(d^2 L(theta|x)/dtheta^2)
        #    %  using central differences
        dist = self.dist
        LL = dist.nnlf(theta, data)
        H = zeros((np, np))   #%% Hessian matrix
        theta = tuple(theta)
        for ix in xrange(np):
            sparam = list(theta)
            sparam[ix] = theta[ix] + delta
            fp = dist.nnlf(sparam, data)
            #fp = sum(myfun(x))
            sparam[ix] = theta[ix] - delta
            fm = dist.nnlf(sparam, data)
            #fm = sum(myfun(x))
            H[ix, ix] = (fp - 2 * LL + fm) / delta2
            for iy in range(ix + 1, np):
                sparam[ix] = theta[ix] + delta
                sparam[iy] = theta[iy] + delta
                fpp = dist.nnlf(sparam, data)
                #fpp = sum(myfun(x))
                sparam[iy] = theta[iy] - delta
                fpm = dist.nnlf(sparam, data)
                #fpm = sum(myfun(x))
                sparam[ix] = theta[ix] - delta
                fmm = dist.nnlf(sparam, data)
                #fmm = sum(myfun(x));
                sparam[iy] = theta[iy] + delta
                fmp = dist.nnlf(sparam, data)
                #fmp = sum(myfun(x))
                H[ix, iy] = ((fpp + fmm) - (fmp + fpm)) / (4. * delta2)
                H[iy, ix] = H[ix, iy]
                sparam[iy] = theta[iy];
        # invert the Hessian matrix (i.e. invert the observed information number)
        #pcov = -pinv(H);
        return - H
 def main():
    _WAFODIST = ppimport('wafo.stats.distributions')
--- a/pywafo/src/wafo/wafodata.py
+++ b/pywafo/src/wafo/wafodata.py
@ -52,39 +52,41 @@ class WafoData(object):
    >>> h = d2.plot()
    Plot with confidence interval
-    d3 = wdata(sin(x),x)
+    >>> d3 = WafoData(np.sin(x),x)
-    d3 = set(d3,'dataCI',[sin(x(:))*0.9 sin(x(:))*1.2])
+    >>> d3.children = [WafoData(np.vstack([np.sin(x)*0.9, np.sin(x)*1.2]).T,x)]
-    plot(d3)
+    >>> d3.children_args=[':r']
    >>> h = d3.plot()
    See also
    --------
    wdata/plot,
    specdata,
    covdata
    '''
-    def __init__(self, data=None, args=None,*args2,**kwds):
+    def __init__(self, data=None, args=None, *args2, **kwds):
        self.data = data
        self.args = args
        self.date = now()
        self.plotter = None
        self.children = None
        self.children_args = []
        self.labels = AxisLabels(**kwds)
        self.setplotter()
-    def plot(self,*args,**kwds):
+    def plot(self, *args, **kwds):
        tmp = None
-        if self.children!=None:
+        if self.children != None:
            plotbackend.hold('on')
            tmp = []
            child_args = args + tuple(self.children_args)
            for child in self.children:
-                tmp1 = child.plot(*args, **kwds)
+                tmp1 = child.plot(*child_args, **kwds)
-                if tmp1 !=None:
+                if tmp1 != None:
                    tmp.append(tmp1)
-            if len(tmp)==0:
+            if len(tmp) == 0:
                tmp = None
-        tmp2 =  self.plotter.plot(self,*args,**kwds)
+        tmp2 = self.plotter.plot(self, *args, **kwds)
-        return tmp2,tmp
+        return tmp2, tmp
    def show(self):
        self.plotter.show()
@ -95,20 +97,20 @@ class WafoData(object):
        return newcopy
-    def setplotter(self,plotmethod=None):
+    def setplotter(self, plotmethod=None):
        '''
            Set plotter based on the data type data_1d, data_2d, data_3d or data_nd
        '''
-        if isinstance(self.args,(list,tuple)): # Multidimensional data
+        if isinstance(self.args, (list, tuple)): # Multidimensional data
            ndim = len(self.args)
-            if ndim<2:
+            if ndim < 2:
                warnings.warn('Unable to determine plotter-type, because len(self.args)<2.')
                print('If the data is 1D, then self.args should be a vector!')
                print('If the data is 2D, then length(self.args) should be 2.')
                print('If the data is 3D, then length(self.args) should be 3.')
                print('Unless you fix this, the plot methods will not work!')
-            elif ndim==2:
+            elif ndim == 2:
                self.plotter = Plotter_2d(plotmethod)
            else:
                warnings.warn('Plotter method not implemented for ndim>2')
@ -118,7 +120,7 @@ class WafoData(object):
 class AxisLabels:
-    def __init__(self,title='',xlab='',ylab='',zlab='',**kwds):
+    def __init__(self, title='', xlab='', ylab='', zlab='', **kwds):
        self.title = title
        self.xlab = xlab
        self.ylab = ylab
@ -137,7 +139,7 @@ class AxisLabels:
            h2 = plotbackend.xlabel(self.xlab)
            h3 = plotbackend.ylabel(self.ylab)
            #h4 = plotbackend.zlabel(self.zlab)
-            return h1,h2,h3
+            return h1, h2, h3
        except:
            pass
@ -159,7 +161,7 @@ class Plotter_1d(object):
        scatter : scatter plot
    """
-    def __init__(self,plotmethod='plot'):
+    def __init__(self, plotmethod='plot'):
        self.plotfun = None
        if plotmethod is None:
            plotmethod = 'plot'
@ -172,14 +174,14 @@ class Plotter_1d(object):
    def show(self):
        plotbackend.show()
-    def plot(self,wdata,*args,**kwds):
+    def plot(self, wdata, *args, **kwds):
-        if isinstance(wdata.args,(list,tuple)):
+        if isinstance(wdata.args, (list, tuple)):
-            args1 = tuple((wdata.args))+(wdata.data,)+args
+            args1 = tuple((wdata.args)) + (wdata.data,) + args
        else:
-            args1 = tuple((wdata.args,))+(wdata.data,)+args
+            args1 = tuple((wdata.args,)) + (wdata.data,) + args
-        h1 = self.plotfun(*args1,**kwds)
+        h1 = self.plotfun(*args1, **kwds)
        h2 = wdata.labels.labelfig()
-        return h1,h2
+        return h1, h2
 class Plotter_2d(Plotter_1d):
    """
@ -192,10 +194,10 @@ class Plotter_2d(Plotter_1d):
        surf
    """
-    def __init__(self,plotmethod='contour'):
+    def __init__(self, plotmethod='contour'):
        if plotmethod is None:
            plotmethod = 'contour'
-        super(Plotter_2d,self).__init__(plotmethod)
+        super(Plotter_2d, self).__init__(plotmethod)
        #self.plotfun = plotbackend.__dict__[plotmethod]