Updated kdetools.py (but still not working correctly)

Small cosmetic fixes to other files
15 years ago · b581ec4051
parent da9c0695d0
commit b581ec4051
8 changed files with 383 additions and 359 deletions
--- a/pywafo/src/wafo/definitions.py
+++ b/pywafo/src/wafo/definitions.py
@ -1,12 +1,24 @@
 """
 WAFO defintions and numenclature

-crossings :
-cycle_pairs :
-turning_points :
-wave_amplitudes :
-wave_periods :
-waves :
+    crossings :
+    cycle_pairs :
+    turning_points :
+    wave_amplitudes :
+    wave_periods :
+    waves :
+
+Examples
+--------
+In order to view the documentation do the following in an ipython window:
+
+>>> import wafo.definitions as wd
+>>> wd.crossings() 
+
+or
+>>> wd.crossings?
+
+
 """
 def wave_amplitudes():
    r"""
--- a/pywafo/src/wafo/gaussian.py
+++ b/pywafo/src/wafo/gaussian.py
@ -708,7 +708,7 @@ def  prbnormnd(correl, a, b, abseps=1e-4, releps=1e-3, maxpts=None, method=0):
 #    exTime = etime(clock,t0);
 #  '

- #%     gauss legendre points and weights, n = 6
+#%     gauss legendre points and weights, n = 6
 _W6 = [ 0.1713244923791705e+00, 0.3607615730481384e+00, 0.4679139345726904e+00]
 _X6 = [-0.9324695142031522e+00, -0.6612093864662647e+00, -0.2386191860831970e+00]
 #%     gauss legendre points and weights, n = 12
--- a/pywafo/src/wafo/kdetools.py
+++ b/pywafo/src/wafo/kdetools.py
@ -13,11 +13,21 @@ from __future__ import division
 import numpy as np
 from numpy import pi, sqrt, atleast_2d, exp, newaxis #@UnresolvedImport
 import scipy
-from scipy.linalg import sqrtm
+from scipy import linalg
 from scipy.special import gamma
 from misc import tranproc, trangood
 from itertools import product

+_stats_epan=(1. / 5, 3. / 5, np.inf)
+_stats_biwe=(1. / 7, 5. / 7, 45. / 2),
+_stats_triw=(1. / 9, 350. / 429, np.inf),
+_stats_rect=(1. / 3, 1. / 2, np.inf),
+_stats_tria=(1. / 6, 2. / 3, np.inf),
+_stats_lapl=(2, 1. / 4, np.inf),
+_stats_logi=(pi ** 2 / 3, 1. / 6, 1 / 42),
+_stats_gaus=(1, 1. / (2 * sqrt(pi)), 3. / (8 * sqrt(pi)))
+              
+

 def sphere_volume(d, r=1.0):
    """
@ -47,7 +57,7 @@ def sphere_volume(d, r=1.0):



-class kde(object):
+class KDE(object):
    """ Representation of a kernel-density estimate using Gaussian kernels.

    Parameters
@ -84,23 +94,53 @@ class kde(object):
        obtain the kernel covariance matrix. Set this method to
        kde.scotts_factor or kde.silverman_factor (or subclass to provide your
        own). The default is scotts_factor.
+    
+    Example
+    -------
+    
    """

-    def __init__(self, dataset, **kwds):
-        self.kernel = 'gauss'
-        self.hs = None
-        self.hsmethod = None
-        self.L2 = None
-        self.alpha = 0
-        self.__dict__.update(kwds)
-
+    def __init__(self, dataset, hs=None, kernel=None,L2=None,alpha=0.0):
+        self.kernel = kernel if kernel else Kernel('gauss')
+        self.hs = hs
+        self.L2 = L2
+        self.alpha = alpha
+        
        self.dataset = atleast_2d(dataset)
        self.d, self.n = self.dataset.shape

+        
+        self._compute_smoothing()

-        self._compute_covariance()
-
-
+    def _compute_smoothing(self):
+        """Computes the smoothing matrix
+        """
+        get_smoothing = self.kernel.get_smoothing
+        h = self.hs
+        if h is None:
+            h = get_smoothing(self.dataset)
+        hsiz = h.shape
+    
+        if (min(hsiz)==1) or (self.d==1):
+            if max(hsiz)==1:
+                h = h*np.ones(self.d)
+            else:
+                h.shape = (self.d,) # make sure it has the correct dimension
+          
+            # If h negative calculate automatic values
+            ind, = np.where(h<=0)
+            for i in ind.tolist(): # 
+                h[i] = get_smoothing(self.dataset[i])
+            deth = h.prod()
+            self.inv_hs = linalg.diag(1.0/h)
+        else: #fully general smoothing matrix
+            deth = linalg.det(h)
+            if deth<=0:
+                raise ValueError('bandwidth matrix h must be positive definit!')
+            self.inv_hs = linalg.inv(h)
+        self.hs = h
+        self._norm_factor = deth * self.n
+        
    def evaluate(self, points):
        """Evaluate the estimated pdf on a set of points.

@ -140,23 +180,23 @@ class kde(object):
            # there are more points than data, so loop over data
            for i in range(self.n):
                diff = self.dataset[:, i, np.newaxis] - points
-                tdiff = np.dot(self.inv_cov, diff)
-                energy = np.sum(diff * tdiff, axis=0) / 2.0
-                result += np.exp(-energy)
+                tdiff = np.dot(self.inv_hs, diff)
+                result += self.kernel(tdiff)
        else:
            # loop over points
            for i in range(m):
                diff = self.dataset - points[:, i, np.newaxis]
-                tdiff = np.dot(self.inv_cov, diff)
-                energy = sum(diff * tdiff, axis=0) / 2.0
-                result[i] = np.sum(np.exp(-energy), axis=0)
+                tdiff = np.dot(self.inv_hs, diff)
+                tmp = self.kernel(tdiff)
+                result[i] = tmp.sum(axis=-1)

-        result /= self._norm_factor
+        result /= (self._norm_factor*self.kernel.norm_factor(d,self.n))

        return result

    __call__ = evaluate

+
 #function [f, hs,lambda]= kdefun(A,options,varargin)
 #%KDEFUN  Kernel Density Estimator.
 #%
@ -433,71 +473,106 @@ class kde(object):
 #  hs=h;
 #end

-
-def mkernel_multi(X, p=None):
+class _Kernel(object):
+    def __init__(self, r=1.0, stats=None):
+        self.r = r # radius of kernel
+        self.stats = stats
+    def norm_factor(self, d=1, n=None):
+        return 1.0
+    def norm_kernel(self, x):
+        X = np.atleast_2d(x)
+        return self._kernel(X)/self.norm_factor(*X.shape)
+    def kernel(self, x):
+        return self._kernel(np.atleast_2d(x))
+    __call__ = kernel 
+    
+class _KernelMulti(_Kernel):
    # p=0;  %Sphere = rect for 1D
    # p=1;  %Multivariate Epanechnikov kernel.
    # p=2;  %Multivariate Bi-weight Kernel
    # p=3;  %Multi variate Tri-weight Kernel 
    # p=4;  %Multi variate Four-weight Kernel
-    d, n = X.shape
-    r = 1    # radius of the kernel
-    c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(d, r) / np.prod(np.r_[(d + 2):(2 * p + d + 1):2])# normalizing constant
-    # c = beta(r+1,r+1)*vsph(d,b)*(2^(2*r)); % Wand and Jones pp 31
-    # the commented c above does note yield the right scaling for d>1   
-    x2 = X ** 2
-    return ((1.0 - x2.sum(axis=0) / r ** 2).clip(min=0.0)) ** p / c
+    def __init__(self, r=1.0, p=1, stats=None):
+        self.r = r
+        self.p = p
+        self.stats = stats
+    def norm_factor(self, d=1, n=None):
+        r = self.r
+        p = self.p
+        c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(d, r) / np.prod(np.r_[(d + 2):(2 * p + d + 1):2])# normalizing constant
+        return c
+    def _kernel(self, x):
+        r = self.r
+        p = self.p
+        x2 = x ** 2
+        return ((1.0 - x2.sum(axis=0) / r ** 2).clip(min=0.0)) ** p

-def mkernel_epanechnikov(X):
-    return mkernel_multi(X, p=1)
-def mkernel_biweight(X):
-    return mkernel_multi(X, p=2)
-def mkernel_triweight(X):
-    return mkernel_multi(X, p=3)
- 
-def mkernel_product(X, p):
+mkernel_epanechnikov = _KernelMulti(p=1, stats=_stats_epan)
+mkernel_biweight = _KernelMulti(p=2, stats=_stats_biwe)
+mkernel_triweight = _KernelMulti(p=3, stats=_stats_triw)
+
+class _KernelProduct(_KernelMulti):
    # p=0;  %rectangular
    # p=1;  %1D product Epanechnikov kernel.
    # p=2;  %1D product Bi-weight Kernel
    # p=3;  %1D product Tri-weight Kernel 
    # p=4;  %1D product Four-weight Kernel
-    d, n = X.shape  
-    r = 1.0 # radius
-    pdf = (1 - (X / r) ** 2).clip(min=0.0)
-    c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(1, r) / np.prod(np.r_[(1 + 2):(2 * p + 2):2])# normalizing constant
-    return np.prod(pdf, axis=0) / c ** d
+    
+    def norm_factor(self, d=1, n=None):
+        r = self.r
+        p = self.p
+        c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(1, r) / np.prod(np.r_[(1 + 2):(2 * p + 2):2])# normalizing constant
+        return c ** d
+    def _kernel(self, x):
+        r = self.r # radius
+        pdf = (1 - (x / r) ** 2).clip(min=0.0)
+        return pdf.prod(axis=0)

-def mkernel_p1epanechnikov(X):
-    return mkernel_product(X, p=1)
-def mkernel_p1biweight(X):
-    return mkernel_product(X, p=2)
-def mkernel_p1triweight(X):
-    return mkernel_product(X, p=3)   
+mkernel_p1epanechnikov = _KernelProduct(p=1, stats=_stats_epan)
+mkernel_p1biweight = _KernelProduct(p=2, stats=_stats_biwe)
+mkernel_p1triweight = _KernelProduct(p=3, stats=_stats_triw)


-def mkernel_rectangular(X):
-    d, n = X.shape
-    return np.where(np.all(np.abs(X) <= 1, axis=0), 0.5 ** d, 0.0)
-    
-def mkernel_triangular(X):
-    pdf = (1 - np.abs(X)).clip(min=0.0)
-    return np.prod(pdf, axis=0)
-       
-def mkernel_gaussian(X):
-    x2 = X ** 2
-    d, n = X.shape
-    return (2 * pi) ** (-d / 2) * exp(-0.5 * x2.sum(axis=0))       
+class _KernelRectangular(_Kernel):
+    def _kernel(self, x):
+        return np.where(np.all(np.abs(x) <= self.r, axis=0), 1, 0.0)
+    def norm_factor(self, d=1, n=None):
+        r = self.r
+        return (2*r) ** d
+mkernel_rectangular = _KernelRectangular(stats=_stats_rect)
+
+class _KernelTriangular(_Kernel):
+    def _kernel(self, x):
+        pdf = (1 - np.abs(x)).clip(min=0.0)
+        return pdf.prod(axis=0)
+mkernel_triangular = _KernelTriangular(stats=_stats_tria)
    
-def mkernel_laplace(X):
-    ''' Return multivariate laplace kernel'''
-    d, n = X.shape
-    absX = np.abs(X)
-    return 0.5 ** d * exp(-absX.sum(axis=0))
+class _KernelGaussian(_Kernel):
+    def _kernel(self, x):
+        x2 = x ** 2
+        return exp(-0.5 * x2.sum(axis=0))       
+    def norm_factor(self, d=1, n=None):
+        return (2 * pi) ** (d / 2.0) 
+mkernel_gaussian = _KernelGaussian(stats=_stats_gaus)

-def mkernel_logistic(X):
-    ''' Return multivariate logistic kernel'''
-    s = exp(X)
-    return np.prod(s / (s + 1) ** 2, axis=0)    
+#def mkernel_gaussian(X):
+#    x2 = X ** 2
+#    d = X.shape[0]
+#    return (2 * pi) ** (-d / 2) * exp(-0.5 * x2.sum(axis=0))       
+
+class _KernelLaplace(_Kernel):
+    def _kernel(self, x):
+        absX = np.abs(x)    
+        return exp(-absX.sum(axis=0))
+    def norm_factor(self, d=1, n=None):
+        return 2**d    
+mkernel_laplace = _KernelLaplace(stats=_stats_lapl)
+
+class _KernelLogistic(_Kernel):
+    def _kernel(self, x):
+        s = exp(x)
+        return np.prod(s / (s + 1) ** 2, axis=0)
+mkernel_logistic = _KernelLogistic(stats=_stats_logi)

 _MKERNEL_DICT = dict(
                     epan=mkernel_epanechnikov,
@ -513,18 +588,7 @@ _MKERNEL_DICT = dict(
                     gaus=mkernel_gaussian
                     )
 _KERNEL_EXPONENT_DICT = dict(re=0, sp=0, ep=1, bi=2, tr=3, fo=4, fi=5, si=6, se=7)
-_KERNEL_STATS_DICT = dict(epan=(1. / 5, 3. / 5, np.inf),
-                          biwe=(1. / 7, 5. / 7, 45. / 2),
-                          triw=(1. / 9, 350. / 429, np.inf),
-                          rect=(1. / 3, 1. / 2, np.inf),
-                          tria=(1. / 6, 2. / 3, np.inf),
-                          lapl=(2, 1. / 4, np.inf),
-                          logi=(pi ** 2 / 3, 1. / 6, 1 / 42),
-                          gaus=(1, 1. / (2 * sqrt(pi)), 3. / (8 * sqrt(pi)))
-                          )
-

-  
 class Kernel(object):
    '''
    Multivariate kernel
@ -574,9 +638,13 @@ class Kernel(object):
    'Density estimation for statistics and data analysis'  
     Chapman and Hall, pp 31, 103,  175  
    '''
-    def __init__(self, name):
+    def __init__(self, name, fun='hns'):
        self.kernel = _MKERNEL_DICT[name[:4]]
-        self.name = self.kernel.__name__.replace('mkernel_','').title()
+        #self.name = self.kernel.__name__.replace('mkernel_', '').title()
+        try:
+            self.get_smoothing = getattr(self, fun) 
+        except:
+            self.get_smoothing = self.hns
        
    def stats(self):
        ''' Return some 1D statistics of the kernel.
@ -596,8 +664,9 @@ class Kernel(object):
        'Kernel smoothing'
        Chapman and Hall, pp 176.
        '''  
-        name = self.name[2:6] if self.name[:2].lower()=='p1' else self.name[:4] 
-        return _KERNEL_STATS_DICT[name.lower()]
+        return self.kernel.stats
+        #name = self.name[2:6] if self.name[:2].lower() == 'p1' else self.name[:4] 
+        #return _KERNEL_STATS_DICT[name.lower()]
    
    def hns(self, data):
        '''
@ -637,18 +706,19 @@ class Kernel(object):
        '''
        
        A = np.atleast_2d(data)
-        d,n = A.shape
+        n = A.shape[1]
        
        # R= int(mkernel(x)^2),  mu2= int(x^2*mkernel(x))
        mu2, R, Rdd = self.stats()
-        AMISEconstant = (8*sqrt(pi)*R/(3*mu2**2*n))**(1./5)
-        iqr = np.abs(np.percentile(A, 75, axis=1)-np.percentile(A, 25, axis=1))# interquartile range
+        AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
+        iqr = np.abs(np.percentile(A, 75, axis=1) - np.percentile(A, 25, axis=1))# interquartile range
        stdA = np.std(A, axis=1)
        #  % use of interquartile range guards against outliers.
        #  % the use of interquartile range is better if 
        #  % the distribution is skew or have heavy tails
        #  % This lessen the chance of oversmoothing.
-        return np.where(iqr>0, np.minimum(stdA, iqr/1.349), stdA)*AMISEconstant
+        return np.where(iqr > 0, np.minimum(stdA, iqr / 1.349), stdA) * AMISEconstant
+    
    def hos(self, data):
        ''' Return Oversmoothing Parameter.

@ -685,7 +755,7 @@ class Kernel(object):
         'Kernel smoothing'
          Chapman and Hall, pp 60--63
        '''    
-        return self.hns(data)/0.93
+        return self.hns(data) / 0.93
    def hmns(self, data):
        '''
        Returns Multivariate Normal Scale Estimate of Smoothing Parameter.
@ -694,7 +764,7 @@ class Kernel(object):
        
           h      = M dimensional optimal value for smoothing parameter
                    given the data and kernel.  size D x D
-           data   = data matrix, size N x D (D = # dimensions )
+           data   = data matrix, size D x N (D = # dimensions )
           kernel = 'epanechnikov'  - Epanechnikov kernel.
                    'biweight'      - Bi-weight kernel.
                    'triweight'     - Tri-weight kernel.  
@ -713,8 +783,13 @@ class Kernel(object):
            data = rndnorm(0, 1,20,2)
            h = hmns(data,'epan');
          
-         See also  hns, hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt
-         Reference:  
+         See also 
+         --------
+          
+        hns, hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt
+        
+         Reference
+         ----------  
          B. W. Silverman (1986) 
         'Density estimation for statistics and data analysis'  
          Chapman and Hall, pp 43-48, 87 
@ -723,37 +798,35 @@ class Kernel(object):
         'Kernel smoothing'
          Chapman and Hall, pp 60--63, 86--88
        '''
-        # TODO implement more kernels  
+        # TODO: implement more kernels  
          
        A = np.atleast_2d(data)
-        d,n = A.shape
+        d, n = A.shape
        
-        if d==1:
+        if d == 1:
            return self.hns(data)
        name = self.name[:4].lower()
-        if name=='epan':        # Epanechnikov kernel
-            a=(8*(d+4)*(2*sqrt(pi))**d/sphere_volume(d))**(1./(4+d))
-        elif name=='biwe': # Bi-weight kernel
-            a=2.7779;
-            if d>2:
+        if name == 'epan':        # Epanechnikov kernel
+            a = (8.0 * (d + 4.0) * (2 * sqrt(pi)) ** d / sphere_volume(d)) ** (1. / (4.0 + d))
+        elif name == 'biwe': # Bi-weight kernel
+            a = 2.7779;
+            if d > 2:
                raise ValueError('not implemented for d>2')
-        elif name=='triw': # Triweight
-            a=3.12;
-            if d>2:
+        elif name == 'triw': # Triweight
+            a = 3.12;
+            if d > 2:
                raise ValueError('not implemented for d>2')
-        elif name=='gaus': # Gaussian kernel
-            a = (4/(d+2))**(1./(d+4))
+        elif name == 'gaus': # Gaussian kernel
+            a = (4.0 / (d + 2.0)) ** (1. / (d + 4.0))
        else:
            raise ValueError('Unknown kernel.')
         
        covA = scipy.cov(A)
        
-        return a*sqrtm(covA)*n*(-1./(d+4))
-        
-        
-
-
+        return a * linalg.sqrtm(covA) * n * (-1. / (d + 4))
        
+    def norm_factor(self, d=1,n=None):
+        return  self.kernel.norm_factor(n,d)    
    def evaluate(self, X):
        return self.kernel(np.atleast_2d(X))
    __call__ = evaluate
@ -908,7 +981,8 @@ def accum(accmap, a, func=None, size=None, fill_value=0, dtype=None):
            out[s] = func(vals[s])

    return out
-def gridcount(data,X):
+
+def gridcount(data, X):
    '''
    GRIDCOUNT D-dimensional histogram using linear binning.
      
@ -939,16 +1013,21 @@ def gridcount(data,X):
       
    Example
    -------
-      N     = 500;
-      data  = rndray(1,N,1);
-      x = linspace(0,max(data)+1,50).';  
-      dx = x(2)-x(1);  
-      c = gridcount(data,x);
-      plot(x,c,'.')    1D histogram
-      plot(x,c/dx/N)   1D probability density plot
-      trapz(x,c/dx/N)   
+    >>> import numpy as np
+    >>> import wafo.kdetools as wk
+    >>> import pylab as plb
+    >>> N     = 500;
+    >>> data  = np.random.rayleigh(1,N)
+    >>> x = np.linspace(0,max(data)+1,50)  
+    >>> dx = x[1]-x[0]  
+    >>> c = wk.gridcount(data,x)
+    >>> h = plb.plot(x,c,'.')   # 1D histogram
+    >>> h1 = plb.plot(x,c/dx/N) #  1D probability density plot
+    >>> np.trapz(x,c/dx/N)   
    
-     See also  bincount, kdebin
+    See also
+    --------
+    bincount, accum, kdebin
      
    Reference
    ----------
@ -958,84 +1037,114 @@ def gridcount(data,X):
    '''  
    dat = np.atleast_2d(data)
    x = np.atleast_2d(X)
-    d,n = dat.shape
+    d, n = dat.shape
    d1, inc = x.shape
    
-    if d!=d1:
+    if d != d1:
        raise ValueError('Dimension 0 of data and X do not match.')
    
    
-    dx  = np.diff(x[:,:2],axis=1)
-    xlo = x[:,0]
-    xup = x[:,-1]
+    dx = np.diff(x[:, :2], axis=1)
+    xlo = x[:, 0]
+    xup = x[:, -1]
    
    datlo = dat.min(axis=1)
    datup = dat.max(axis=1)
-    if ((datlo<xlo) or (xup<datup)).any():
+    if ((datlo < xlo) | (xup < datup)).any():
        raise ValueError('X does not include whole range of the data!')
    
    csiz = np.repeat(inc, d)
    
      
-    binx = np.assarray(np.floor((dat-xlo[:, newaxis])/dx[:, newaxis]),dtype=int)
-    w  = dx.prod()
-    if  d==1:
-        c = (accum(binx,(x[binx+1]-data),size=[inc,]) +
-             accum(binx,(data-x[binx]),size=[inc,]))/w
-    elif d==2:
+    binx = np.asarray(np.floor((dat - xlo[:, newaxis]) / dx), dtype=int)
+    w = dx.prod()
+    abs = np.abs
+    if  d == 1:
+        x.shape = (-1,)
+        c = (accum(binx, (x[binx + 1] - data), size=[inc, ]) + 
+             accum(binx, (data - x[binx]), size=[inc, ])) / w
+    elif d == 2:
        b2 = binx[1]
        b1 = binx[0]
-        c = (accum([b1,b2] ,abs(prod(([X(b1+1,1) X(b2+1,2)]-data),2)),size=[inc,inc])+...
-          accum([b1+1,b2]  ,abs(prod(([X(b1,1) X(b2+1,2)]-data),2)),size=[inc,inc])+...
-          accum([b1  ,b2+1],abs(prod(([X(b1+1,1) X(b2,2)]-data),2)),size=[inc,inc])+...
-          accum([b1+1,b2+1],abs(prod(([X(b1,1) X(b2,2)]-data),2)),size=[inc,inc]))/w;
-     
+        c_ = np.c_
+        stk = np.vstack
+        c = (accum(c_[b1, b2] , abs(np.prod(stk([X[0, b1 + 1], X[1, b2 + 1]]) - data, axis=0)), size=[inc, inc]) + 
+          accum(c_[b1 + 1, b2]  , abs(np.prod(stk([X[0, b1], X[1, b2 + 1]]) - data, axis=0)), size=[inc, inc]) + 
+          accum(c_[b1  , b2 + 1], abs(np.prod(stk([X[0, b1 + 1], X[1, b2]]) - data, axis=0)), size=[inc, inc]) + 
+          accum(c_[b1 + 1, b2 + 1], abs(np.prod(stk([X[0, b1], X[1, b2]]) - data, axis=0)), size=[inc, inc])) / w
      
    else: # % d>2
-     useSparse = 0;
-      Nc = prod(csiz);
-      c  = zeros(Nc,1);
+        raise ValueError('Not implemented for d>2')
+        Nc = csiz.prod()
+        c = np.zeros((Nc, 1))
     
-      fact2 = [0 inc*(1:d-1)];
-      fact1 = [1 cumprod(csiz(1:d-1))];
-      fact1 = fact1(ones(n,1),:);
-      for ir=0:2^(d-1)-1,
-        bt0(:,:,1) = bitget(ir,1:d);
-        bt0(:,:,2) = ~bt0(:,:,1);
-        
-        for ix = 0:1
-          one = mod(ix,2)+1;
-          two = mod(ix+1,2)+1;
-          % Convert to linear index (faster than sub2ind)
-          b1  = sum((binx + bt0(ones(n,1),:,one)-1).*fact1,2)+1; %linear index to c
-          bt2 = bt0(:,:,two) + fact2;
-          b2  = binx + bt2(ones(n,1),:);                     % linear index to X
-          try
-            if useSparse
-              % Fast gridding using sparse
-              c = c + sparse(b1,1,abs(prod(X(b2)-data,2)),Nc,1);
-            else
-              c = c + accum(b1,abs(prod(X(b2)-data,2)),[Nc,1]);
-              %c = c + accum([b1,ones(n,1)],abs(prod(X(b2)-data,2)),[Nc,1]);
-              %[len,bin,val] = bincount(b1,abs(prod(X(b2)-data,2)));
-              %c(bin)        = c(bin)+val;
-            end
-          catch
-             c = c + sparse(b1,1,abs(prod(X(b2)-data,2)),Nc,1);
-          end
-        end
-      end
-      c = reshape(c/w,csiz);
-    end
-    switch d % make sure c is stored in the same way as meshgrid
-     case 2,  c = c.';
-     case 3,  c = permute(c,[2 1 3]);
-    end
-    return
-
+        fact2 = inc * np.arange(d)
+        fact1 = csiz.cumprod() / inc
+        #fact1 = fact1(ones(n,1),:);
+#        for ir in xrange(2**(d-1)):
+#            bt0[:,:,1] = bitget(ir,1:d)
+#            bt0[:,:,2] = 1-bt0[:,:,1]
+#            for ix in range(2):
+#                one = mod(ix,2)+1;
+#                two = mod(ix+1,2)+1;
+#                # Convert to linear index (faster than sub2ind)
+#                b1  = sum((binx + bt0(ones(n,1),:,one)-1).*fact1,2)+1; #%linear index to c
+#                bt2 = bt0(:,:,two) + fact2;
+#                b2  = binx + bt2(ones(n,1),:);                     #% linear index to X
+#         
+#                c = c + accum(b1,abs(prod(X(b2)-data,2)),[Nc,1]);
+#                #c = c + accum([b1,ones(n,1)],abs(prod(X(b2)-data,2)),[Nc,1]);
+#                #[len,bin,val] = bincount(b1,abs(prod(X(b2)-data,2)));
+#                #c(bin)        = c(bin)+val;
+#         
+#            #end
+#        #end
+#        c = reshape(c/w,csiz);
+    #end
+    if d == 2: #% make sure c is stored in the same way as meshgrid
+        c = c.T
+    elif d == 3:
+        c = c.transpose(1, 0, 2)
+    
+    return c
+def test_kde():
+    import numpy as np
+    import wafo.kdetools as wk
+    import pylab as plb
+    N = 500;
+    data = np.random.rayleigh(1, size=(1, N))
+    kde = wk.KDE(data)
+    x = np.linspace(0, max(data.ravel()) + 1, 10)
+    #X,Y = np.meshgrid(x, x)
+    f = kde(x)
+    
+    #plb.hist(data.ravel())
+    plb.plot(x,f)
+    plb.show()
+    
+def test_gridcount():
+    import numpy as np
+    import wafo.kdetools as wk
+    import pylab as plb
+    N = 500;
+    data = np.random.rayleigh(1, size=(2, N))
+    x = np.linspace(0, max(data.ravel()) + 1, 10)
+    X = np.vstack((x, x))  
+    dx = x[1] - x[0]  
+    c = wk.gridcount(data, X)
+    h = plb.contourf(x, x, c)
+    plb.show()
+    h = plb.plot(x, c, '.')   # 1D histogram
+   
+    h1 = plb.plot(x, c / dx / N) #  1D probability density plot
+    t = np.trapz(x, c / dx / N)   
+    print(t)
+    
 def main():
    import doctest
    doctest.testmod()
    
 if __name__ == '__main__':
-    main()
+    #main()
+    #test_gridcount()
+    test_kde()
--- a/pywafo/src/wafo/misc.py
+++ b/pywafo/src/wafo/misc.py
@ -637,15 +637,16 @@ def findtp(x, h=0.0, kind=None):
    Example:
    --------
    >>> import wafo.data
-    >>> import pylab
+    >>> import pylab as plb
+    >>> import wafo.misc as wm
    >>> x = wafo.data.sea()
    >>> x1 = x[0:200,:]
-    >>> itp = findtp(x1[:,1],0,'Mw')
-    >>> itph = findtp(x1[:,1],0.3,'Mw')
+    >>> itp = wm.findtp(x1[:,1],0,'Mw')
+    >>> itph = wm.findtp(x1[:,1],0.3,'Mw')
    >>> tp = x1[itp,:]
    >>> tph = x1[itph,:]
-    >>> a = pylab.plot(x1[:,0],x1[:,1],tp[:,0],tp[:,1],'ro',tph[:,1],tph[:,1],'k.')
-    >>> pylab.close('all')
+    >>> a = plb.plot(x1[:,0],x1[:,1],tp[:,0],tp[:,1],'ro',tph[:,1],tph[:,1],'k.')
+    >>> plb.close('all')
    >>> itp
    array([ 11,  21,  22,  24,  26,  28,  31,  39,  43,  45,  47,  51,  56,
            64,  70,  78,  82,  84,  89,  94, 101, 108, 119, 131, 141, 148,
@ -731,13 +732,14 @@ def findtc(x_in, v=None, kind=None):
    Example:
    --------
    >>> import wafo.data
-    >>> import pylab
+    >>> import pylab as plb
+    >>> import wafo.misc as wm
    >>> x = wafo.data.sea()
    >>> x1 = x[0:200,:]
-    >>> itc, iv = findtc(x1[:,1],0,'dw')
+    >>> itc, iv = wm.findtc(x1[:,1],0,'dw')
    >>> tc = x1[itc,:]
-    >>> a = pylab.plot(x1[:,0],x1[:,1],tc[:,0],tc[:,1],'ro')
-    >>> pylab.close('all')
+    >>> a = plb.plot(x1[:,0],x1[:,1],tc[:,0],tc[:,1],'ro')
+    >>> plb.close('all')

    See also
    --------
@ -833,12 +835,13 @@ def findoutliers(x, zcrit=0.0, dcrit=None, ddcrit=None, verbose=False):
    --------
    >>> import numpy as np
    >>> import wafo
+    >>> import wafo.misc as wm
    >>> xx = wafo.data.sea()
    >>> dt = np.diff(xx[:2,0])
    >>> dcrit = 5*dt
    >>> ddcrit = 9.81/2*dt*dt
    >>> zcrit = 0
-    >>> [inds, indg] = findoutliers(xx[:,1],zcrit,dcrit,ddcrit,verbose=True)
+    >>> [inds, indg] = wm.findoutliers(xx[:,1],zcrit,dcrit,ddcrit,verbose=True)
    Found 0 spurious positive jumps of Dx
    Found 0 spurious negative jumps of Dx
    Found 37 spurious positive jumps of D^2x
@ -1179,7 +1182,8 @@ def getshipchar(value, property="max_deadweight"):

    Example
    ---------
-    >>> getshipchar(10,'service_speed')
+    >>> import wafo.misc as wm
+    >>> wm.getshipchar(10,'service_speed')
    {'beam': 29.0,
     'beamSTD': 2.9000000000000004,
     'draught': 9.5999999999999996,
--- a/pywafo/src/wafo/objects.py
+++ b/pywafo/src/wafo/objects.py
@ -17,7 +17,7 @@ from wafo.transform.core import TrData
 from wafo.transform.models import TrHermite, TrOchi, TrLinear
 from wafo.interpolate import SmoothSpline
 from scipy.interpolate.interpolate import interp1d
-from scipy.integrate.quadrature import cumtrapz
+from scipy.integrate.quadrature import cumtrapz  #@UnresolvedImport
 import warnings
 import numpy as np

@ -39,6 +39,7 @@ from wafo.misc import (nextpow2, findtp, findtc, findcross, sub_dict_select,
 from wafodata import WafoData
 from plotbackend import plotbackend
 import matplotlib
+from scipy.stats.stats import skew, kurtosis
 matplotlib.interactive(True)
 _wafocov = JITImport('wafo.covariance')
 _wafospec = JITImport('wafo.spectrum')
@ -47,8 +48,6 @@ __all__ = ['TimeSeries', 'LevelCrossings', 'CyclePairs', 'TurningPoints',
    'sensortypeid', 'sensortype']


-
-
 class LevelCrossings(WafoData):
    '''
    Container class for Level crossing data objects in WAFO
@ -59,7 +58,18 @@ class LevelCrossings(WafoData):
        number of upcrossings
    args : array-like
        crossing levels
-
+      Examples
+    --------
+    >>> import wafo.data
+    >>> import wafo.objects as wo
+    >>> x = wafo.data.sea()
+    >>> ts = wo.mat2timeseries(x)
+    
+    >>> tp = ts.turning_points()
+    >>> mm = tp.cycle_pairs()
+    
+    >>> lc = mm.level_crossings()
+    >>> h2 = lc.plot()
    '''
    def __init__(self, *args, **kwds):
        options = dict(title='Level crossing spectrum',
@ -448,7 +458,16 @@ class CyclePairs(WafoData):
    data : array_like
    args : vector for 1D

-
+      Examples
+    --------
+    >>> import wafo.data
+    >>> import wafo.objects as wo
+    >>> x = wafo.data.sea()
+    >>> ts = wo.mat2timeseries(x)
+    
+    >>> tp = ts.turning_points()
+    >>> mm = tp.cycle_pairs()
+    >>> h1 = mm.plot(marker='x')
    '''
    def __init__(self, *args, **kwds):
        self.type_ = kwds.get('type_', 'max2min')
@ -492,7 +511,7 @@ class CyclePairs(WafoData):
        >>> ts = wafo.objects.mat2timeseries(wafo.data.sea())
        >>> tp = ts.turning_points()
        >>> mm = tp.cycle_pairs()
-        >>> h = mm.plot('.')
+        >>> h = mm.plot(marker='.')
        >>> bv = range(3,9)
        >>> D = mm.damage(beta=bv)
        >>> D
@ -533,7 +552,7 @@ class CyclePairs(WafoData):
        >>> ts = wafo.objects.mat2timeseries(wafo.data.sea())
        >>> tp = ts.turning_points()
        >>> mm = tp.cycle_pairs()
-        >>> h = mm.plot('.')
+        >>> h = mm.plot(marker='.')
        >>> lc = mm.level_crossings()
        >>> h2 = lc.plot()

@ -608,6 +627,16 @@ class TurningPoints(WafoData):
    ----------------
    data : array_like
    args : vector for 1D
+    
+      Examples
+    --------
+    >>> import wafo.data
+    >>> import wafo.objects as wo
+    >>> x = wafo.data.sea()
+    >>> ts = wo.mat2timeseries(x)
+    
+    >>> tp = ts.turning_points()
+    >>> h1 = tp.plot(marker='x')
    '''
    def __init__(self, *args, **kwds):
        super(TurningPoints, self).__init__(*args, **kwds)
@ -643,7 +672,7 @@ class TurningPoints(WafoData):
        >>> ts = wafo.objects.mat2timeseries(x)
        >>> tp = ts.turning_points()
        >>> mM = tp.cycle_pairs()
-        >>> h = mM.plot('x')
+        >>> h = mM.plot(marker='x')


        See also
@ -699,6 +728,13 @@ class TimeSeries(WafoData):
    >>> ts = wo.mat2timeseries(x)
    >>> rf = ts.tocovdata(lag=150)
    >>> h = rf.plot()
+    
+    >>> tp = ts.turning_points()
+    >>> mm = tp.cycle_pairs()
+    >>> h1 = mm.plot(marker='x')
+    
+    >>> lc = mm.level_crossings()
+    >>> h2 = lc.plot()

    '''
    def __init__(self, *args, **kwds):
@ -969,7 +1005,8 @@ class TimeSeries(WafoData):
        
        See also
        --------
-           troptset, lc2tr, cdf2tr, trplot
+        troptset, lc2tr, cdf2tr, trplot
+           
        References
        ----------
        Rychlik, I. , Johannesson, P and Leadbetter, M. R. (1997)
@ -983,57 +1020,12 @@ class TimeSeries(WafoData):
        reconstructed data"
        in Proceedings of 9th ISOPE Conference, Vol III, pp 66-73        
        '''
-#        Tested on: Matlab 5.3, 5.2, 5.1
-#        History:
-#         revised pab Dec2004
-#          -Fixed bug: string comparison for def at fault.  
-#         revised pab Nov2004
-#          -Fixed bug: linextrap was not accounted for  
-#         revised pab july 2004
-#         revised pab 3 april 2004
-#         -fixed a bug in hermite estimation: excess changed to kurtosis  
-#         revised pab 29.12.2000
-#         - added example, hermite and ochi options
-#         - replaced optional arguments with a options struct
-#         - default param is now [-5 5 513] -> better to have the discretization
-#          represented with exact numbers, especially when calculating
-#          derivatives of the transformation numerically.
-#         revised pab 19.12.2000
-#          - updated call edf(X,-inf,[],monitor) to  edf(X,[],monitor)
-#            due to new calling syntax for edf
-#         modifed pab 24.09.2000
-#          - changed call from norminv to wnorminv
-#          - also removed the 7 lowest and 7 highest points from
-#            the estimation using def='mnonlinear' 
-#            (This is similar to what lc2tr does. lc2tr removes
-#             the 9 highest and 9 lowest TP from the estimation)
-#         modified pab 09.06.2000
-#          - made all the *empirical options secret.
-#          - Added 'mnonlinear' and 'mempirical' 
-#          - Fixed the problem of multip==1 and def=='empirical' by interpolating 
-#            with spline to ensure that the length of g is fixed
-#          - Replaced the test statistic for def=='empirical' with the one
-#            obtained when csm1=csm2=1. (Previously only the smoothed test
-#            statistic where returned)
-#         modified pab 12.10.1999
-#          fixed a bug
-#          added secret output of empirical estimate g2
-#         modified by svi  29.09.1999
-#         changed input def by adding new options.
-#         revised by pab 11.08.99
-#           changed name from dat2tran to dat2tr
-#         modified by Per A. Brodtkorb 12.05.1999,15.08.98
-#           added  secret option: to accept multiple data, to monitor the steps 
-#           of estimation of the transformation 
-#           also removed some code and replaced it with a call to lc2tr (cross2tr) 
-#           making the maintainance easier
-#        
+
        
        #opt = troptset('plotflag','off','csm',.95,'gsm',.05,....
        #    'param',[-5 5 513],'delay',2,'linextrap','on','ne',7,...
        #    'cvar',1,'gvar',1,'multip',0);
        
-        
        opt = DotDict(chkder=True, plotflag=True, csm=.95, gsm=.05,
            param=[-5, 5, 513], delay=2, ntr=inf, linextrap=True, ne=7, cvar=1, gvar=1,
            multip=False, crossdef='uM')
@ -1053,14 +1045,14 @@ class TimeSeries(WafoData):
        elif method[0] == 'm':
            return cdftr()
        elif method[0] == 'h':
-            ga1 = np.skew(self.data)
-            ga2 = np.kurtosis(self.data, fisher=True) #kurt(xx(n+1:end))-3;
+            ga1 = skew(self.data)
+            ga2 = kurtosis(self.data, fisher=True) #kurt(xx(n+1:end))-3;
            up = min(4 * (4 * ga1 / 3) ** 2, 13)
            lo = (ga1 ** 2) * 3 / 2;
            kurt1 = min(up, max(ga2, lo)) + 3
            return TrHermite(mean=ma, var=sa ** 2, skew=ga1, kurt=kurt1)
        elif method[0] == 'o':
-            ga1 = np.skew(self.data)
+            ga1 = skew(self.data)
            return TrOchi(mean=ma, var=sa ** 2, skew=ga1)
             
    def turning_points(self, h=0.0, wavetype=None):
@ -1197,13 +1189,13 @@ class TimeSeries(WafoData):

        Example:
        --------
+        Histogram of crest2crest waveperiods
        >>> import wafo
+        >>> import pylab as plb
        >>> x = wafo.data.sea()
        >>> ts = wafo.objects.mat2timeseries(x[0:400,:])
-        >>> T = ts.wave_periods(vh=0.0,pdef='c2c')
-
-        T = dat2wa(x1,0,'c2c') #% Returns crest2crest waveperiods
-        subplot(121), waveplot(x1,'-',1,1),subplot(122),histgrm(T)
+        >>> T, ix = ts.wave_periods(vh=0.0,pdef='c2c')
+        >>> h = plb.hist(T)

        See also:
        --------
@ -1310,20 +1302,10 @@ class TimeSeries(WafoData):
            t1 = x[index[(start + dist):nn:step]]

        T = t1 - t0
-##        if False: #% Secret option: indices to the actual crossings used.
-##            index=index.ravel()
-##            ind = [index(start:(nn-dist):step) index((start+dist):nn:step)].'
-##            ind = ind(:)
-
-
        return T, index

-        #% Old call: kept just in case
-        #%T  = x(index((start+dist):step:nn),1)-x(index(start:step:(nn-dist)),1)
-
-
-
    def reconstruct(self):
+        # TODO: finish reconstruct
        pass
    def plot_wave(self, sym1='k.', ts=None, sym2='k+', nfig=None, nsub=None,
                  stdev=None, vfact=3):
@ -1362,7 +1344,7 @@ class TimeSeries(WafoData):
        --------  
        findtc, plot
        ''' 
-        # TODO: finish reconstruct
+       
        nw = 20
        tn = self.args
        xn = self.data.ravel()
--- a/pywafo/src/wafo/stats/distributions.py
+++ b/pywafo/src/wafo/stats/distributions.py
@ -10,7 +10,7 @@ from __future__ import division
 import math
 from copy import copy

-from scipy.misc import comb, derivative
+from scipy.misc import comb, derivative  #@UnresolvedImport
 from scipy import special
 from scipy import optimize
 from scipy import integrate 
@ -3544,6 +3544,8 @@ for c != 0, and for x >= 0 for all c, and x < 1/abs(c) for c < 0.
 class genexpon_gen(rv_continuous):
    def link(self, x, logSF, phat, ix):
        xn = (x - phat[3]) / phat[4]
+        b = phat[1]
+        c = phat[2]
        fact1 = (xn + expm1(-c * xn) / c)
        if ix == 0:
            phati = b * fact1 + logSF
@ -4507,8 +4509,8 @@ class ncf_gen(rv_continuous):
        Px *= (n2+n1*x)**(-(n1+n2)/2)
        Px *= special.assoc_laguerre(-nc*n1*x/(2.0*(n2+n1*x)),n2/2,n1/2-1)
        Px /= special.beta(n1/2,n2/2)
-         #this function does not have a return
-         #   drop it for now, the generic function seems to work ok
+        #this function does not have a return
+        #   drop it for now, the generic function seems to work ok
    def _cdf(self, x, dfn, dfd, nc):
        return special.ncfdtr(dfn,dfd,nc,x)
    def _ppf(self, q, dfn, dfd, nc):
--- a/pywafo/src/wafo/stats/estimation.py
+++ b/pywafo/src/wafo/stats/estimation.py
@ -12,7 +12,7 @@ from wafo.plotbackend import plotbackend
 from wafo.misc import ecross, findcross


-import numdifftools 
+import numdifftools  #@UnresolvedImport
 from scipy import special
 from scipy.linalg import pinv2
 from scipy import optimize
--- a/pywafo/src/wafo/stats/misc.py
+++ b/pywafo/src/wafo/stats/misc.py
@ -10,88 +10,3 @@ def valarray(shape,value=nan,typecode=None):
        out = asarray(out)
    return out

-class rv_frozen(object):
-    ''' Frozen continous or discrete 1D Random Variable object (RV)
-
-    Methods
-    -------
-    RV.rvs(size=1)
-        - random variates
-
-    RV.pdf(x)
-        - probability density function (continous case)
-
-    RV.pmf(x)
-        - probability mass function (discrete case)
-
-    RV.cdf(x)
-        - cumulative density function
-
-    RV.sf(x)
-        - survival function (1-cdf --- sometimes more accurate)
-
-    RV.ppf(q)
-        - percent point function (inverse of cdf --- percentiles)
-
-    RV.isf(q)
-        - inverse survival function (inverse of sf)
-
-    RV.stats(moments='mv')
-        - mean('m'), variance('v'), skew('s'), and/or kurtosis('k')
-
-    RV.entropy()
-        - (differential) entropy of the RV.
-
-    Parameters
-    ----------
-    x : array-like
-        quantiles
-    q : array-like
-        lower or upper tail probability
-    size : int or tuple of ints, optional, keyword
-        shape of random variates
-    moments : string, optional, keyword
-        one or more of 'm' mean, 'v' variance, 's' skewness, 'k' kurtosis
-    '''
-    def __init__(self, dist, *args, **kwds):
-        self.dist = dist
-        loc0, scale0 = map(kwds.get, ['loc', 'scale'])
-        if isinstance(dist,rv_continuous):
-            args, loc0, scale0 = dist.fix_loc_scale(args, loc0, scale0)
-            self.par = args + (loc0, scale0)
-        else: # rv_discrete
-            args, loc0 = dist.fix_loc(args, loc0)
-            self.par = args + (loc0,)
-
-
-    def pdf(self,x):
-        ''' Probability density function at x of the given RV.'''
-        return self.dist.pdf(x,*self.par)
-    def cdf(self,x):
-        '''Cumulative distribution function at x of the given RV.'''
-        return self.dist.cdf(x,*self.par)
-    def ppf(self,q):
-        '''Percent point function (inverse of cdf) at q of the given RV.'''
-        return self.dist.ppf(q,*self.par)
-    def isf(self,q):
-        '''Inverse survival function at q of the given RV.'''
-        return self.dist.isf(q,*self.par)
-    def rvs(self, size=None):
-        '''Random variates of given type.'''
-        kwds = dict(size=size)
-        return self.dist.rvs(*self.par,**kwds)
-    def sf(self,x):
-        '''Survival function (1-cdf) at x of the given RV.'''
-        return self.dist.sf(x,*self.par)
-    def stats(self,moments='mv'):
-        ''' Some statistics of the given RV'''
-        kwds = dict(moments=moments)
-        return self.dist.stats(*self.par,**kwds)
-    def moment(self,n):
-        par1 = self.par[:self.dist.numargs]
-        return self.dist.moment(n,*par1)
-    def entropy(self):
-        return self.dist.entropy(*self.par)
-    def pmf(self,k):
-        '''Probability mass function at k of the given RV'''
-        return self.dist.pmf(k,*self.par)