From 3257d675d8bbdb148390d15e5567b226e4901643 Mon Sep 17 00:00:00 2001
From: "Per.Andreas.Brodtkorb"
 <Per.Andreas.Brodtkorb@96a810ac-173e-11df-b9d6-e102a254d9bb>
Date: Fri, 18 Nov 2011 11:52:13 +0000
Subject: [PATCH] Made interpolation more general and faster in
 TKDE._eval_grid_fast

---
 pywafo/src/wafo/kdetools.py           | 163 +++++++++-----------------
 pywafo/src/wafo/test/test_kdetools.py |   7 +-
 2 files changed, 57 insertions(+), 113 deletions(-)

diff --git a/pywafo/src/wafo/kdetools.py b/pywafo/src/wafo/kdetools.py
index dacae61..93aa9c8 100644
--- a/pywafo/src/wafo/kdetools.py
+++ b/pywafo/src/wafo/kdetools.py
@@ -142,6 +142,9 @@ class _KDE(object):
         arg_0,arg_1,... arg_d-1 : vectors
             Alternatively, if no vectors is passed in then
              arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
+        output : string optional
+            'value' if value output
+            'data' if object output
 
         Returns
         -------
@@ -149,6 +152,11 @@ class _KDE(object):
             The values evaluated at meshgrid(*args).
 
         """
+        if len(args) == 0:
+            args = []
+            for i in range(self.d):
+                args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc))
+        self.args = args
         return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)
        
     def _eval_grid_fast(self, *args):
@@ -162,9 +170,9 @@ class _KDE(object):
         arg_0,arg_1,... arg_d-1 : vectors
             Alternatively, if no vectors is passed in then
              arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
-        output : string
+        output : string optional
             'value' if value output
-            'wafodata' if object output
+            'data' if object output
         
         Returns
         -------
@@ -172,15 +180,15 @@ class _KDE(object):
             The values evaluated at meshgrid(*args).
 
         """
-        return self._eval_grid_fun(self._eval_grid, *args, **kwds)
-    def _eval_grid(self, *args):
-        pass
-    def _eval_grid_fun(self, eval_grd, *args, **kwds):
         if len(args) == 0:
             args = []
             for i in range(self.d):
                 args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc))
         self.args = args
+        return self._eval_grid_fun(self._eval_grid, *args, **kwds)
+    def _eval_grid(self, *args):
+        pass
+    def _eval_grid_fun(self, eval_grd, *args, **kwds):
         f = eval_grd(*args)
         if kwds.get('output', 'value') == 'value':
             return f
@@ -189,6 +197,7 @@ class _KDE(object):
             kwds2 = dict(title=titlestr)
             kwds2['plot_kwds'] = dict(plotflag=1)
             kwds2.update(**kwds)
+            args = self.args
             if self.d == 1:
                 args = args[0]
             wdata = WafoData(f, args, **kwds2)
@@ -390,24 +399,28 @@ class TKDE(_KDE):
         arg_0,arg_1,... arg_d-1 : vectors
            Alternatively, if no vectors is passed in then
             arg_i = gauss2dat(linspace(dat2gauss(self.xmin[i]), dat2gauss(self.xmax[i]), self.inc))
-        
+        output : string optional
+            'value' if value output
+            'data' if object output
+            
         Returns
         -------
         values : array-like
            The values evaluated at meshgrid(*args).
         
         """
-        f = self._eval_grid_fast(*args)
-        if kwds.get('output', 'value') == 'value':
-            return f
-        else:
-            args = self.args
-            titlestr = 'Kernel density estimate (%s)' % self.kernel.name
-            kwds2 = dict(title=titlestr)
-            kwds2.update(**kwds)
-            if self.d == 1:
-                args = args[0]        
-            return WafoData(f, args, **kwds2)
+        return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)
+#        f = self._eval_grid_fast(*args)
+#        if kwds.get('output', 'value') == 'value':
+#            return f
+#        else:
+#            args = self.args
+#            titlestr = 'Kernel density estimate (%s)' % self.kernel.name
+#            kwds2 = dict(title=titlestr)
+#            kwds2.update(**kwds)
+#            if self.d == 1:
+#                args = args[0]        
+#            return WafoData(f, args, **kwds2)
     
     def _eval_grid_fast(self, *args): 
         if self.L2 is None:
@@ -420,14 +433,18 @@ class TKDE(_KDE):
         points = meshgrid(*self.args) if self.d > 1 else self.args
         f = self._scale_pdf(tf, points)
         if len(args):
-            if self.d == 1:
-                pdf = interpolate.interp1d(points[0], f, bounds_error=False, fill_value=0.0)
-            elif self.d == 2:
-                pdf = interpolate.interp2d(points[0], points[1], f, bounds_error=False, fill_value=0.0)
-            #ipoints = meshgrid(*args) if self.d>1 else args
-            fi = pdf(*args)
+            ipoints = meshgrid(*args) if self.d>1 else args
+            #shape0 = points[0].shape
+            #shape0i = ipoints[0].shape
+            for i in range(self.d):
+                points[i].shape = (-1,)
+                #ipoints[i].shape = (-1,)
+            points = np.asarray(points).T
+            #ipoints = np.asarray(ipoints).T
+            fi = interpolate.griddata(points, f.ravel(), tuple(ipoints), method='linear', 
+                                      fill_value=0.0)
+            #fi.shape = shape0i
             self.args = args
-            #fi.shape = ipoints[0].shape
             return fi*(fi>0)
         return f
     def _eval_grid(self, *args):
@@ -1636,91 +1653,18 @@ def mkernel(X, kernel):
     fun = _MKERNEL_DICT[kernel[:4]]
     return fun(np.atleast_2d(X))
 
-def accumsum(accmap, a, size=None, dtype=None):
-    """
-    A sum accumulation function
-
-    Parameters
-    ----------
-    accmap : ndarray
-        This is the "accumulation map".  It maps input (i.e. indices into
-        `a`) to their destination in the output array.  The first `a.ndim`
-        dimensions of `accmap` must be the same as `a.shape`.  That is,
-        `accmap.shape[:a.ndim]` must equal `a.shape`.  For example, if `a`
-        has shape (15,4), then `accmap.shape[:2]` must equal (15,4).  In this
-        case `accmap[i,j]` gives the index into the output array where
-        element (i,j) of `a` is to be accumulated.  If the output is, say,
-        a 2D, then `accmap` must have shape (15,4,2).  The value in the
-        last dimension give indices into the output array. If the output is
-        1D, then the shape of `accmap` can be either (15,4) or (15,4,1) 
-    a : ndarray
-        The input data to be accumulated.
-    size : ndarray or None
-        The size of the output array.  If None, the size will be determined
-        from `accmap`.
-    dtype : numpy data type, or None
-        The data type of the output array.  If None, the data type of
-        `a` is used.
-
-    Returns
-    -------
-    out : ndarray
-        The accumulated results.
-
-        The shape of `out` is `size` if `size` is given.  Otherwise the
-        shape is determined by the (lexicographically) largest indices of
-        the output found in `accmap`.
-
-
-    Examples
-    --------
-    >>> from numpy import array, prod
-    >>> a = array([[1,2,3],[4,-1,6],[-1,8,9]])
-    >>> a
-    array([[ 1,  2,  3],
-           [ 4, -1,  6],
-           [-1,  8,  9]])
-    >>> # Sum the diagonals.
-    >>> accmap = array([[0,1,2],[2,0,1],[1,2,0]])
-    >>> s = accum(accmap, a)
-    >>> s
-    array([ 9,  7, 15])
-    >>> # A 2D output, from sub-arrays with shapes and positions like this:
-    >>> # [ (2,2) (2,1)]
-    >>> # [ (1,2) (1,1)]
-    >>> accmap = array([
-    ...        [[0,0],[0,0],[0,1]],
-    ...        [[0,0],[0,0],[0,1]],
-    ...        [[1,0],[1,0],[1,1]]])
-    >>> # Accumulate using a product.
-    >>> accum(accmap, a, func=prod, dtype=float)
-    array([[ -8.,  18.],
-           [ -8.,   9.]])
-    >>> # Same accmap, but create an array of lists of values.
-    >>> accum(accmap, a, func=lambda x: x, dtype='O')
-    array([[[1, 2, 4, -1], [3, 6]],
-           [[-1, 8], [9]]], dtype=object)
-    """
-
-    # Check for bad arguments and handle the defaults.
-    if accmap.shape[:a.ndim] != a.shape:
-        raise ValueError("The initial dimensions of accmap must be the same as a.shape")
+def accumsum(accmap, a, size, dtype=None):
     if dtype is None:
         dtype = a.dtype
-    
-    adims = tuple(range(a.ndim))
-    if size is None:
-        size = 1 + np.squeeze(np.apply_over_axes(np.max, accmap, axes=adims))
     size = np.atleast_1d(size)
     if len(size)>1:
         binx = accmap[:,0]
         biny = accmap[:,1]
-        out = np.asarray(sparse.coo_matrix((a.ravel(), (binx, biny)),shape=size, dtype=dtype).todense()).reshape(size)
+        out = sparse.coo_matrix((a.ravel(), (binx, biny)),shape=size, dtype=dtype).tocsr()
     else:
         binx = accmap.ravel()
         zero = np.zeros(len(binx))
-        out = np.asarray(sparse.coo_matrix((a.ravel(), (binx, zero)),shape=(size,1), dtype=dtype).todense()).reshape(size)
-   
+        out = sparse.coo_matrix((a.ravel(), (binx, zero)),shape=(size,1), dtype=dtype).tocsr()   
     return out
 
 def accumsum2(accmap, a, size):
@@ -2335,8 +2279,8 @@ def gridcount(data, X, use_sparse=False):
     abs = np.abs
     if  d == 1:
         x.shape = (-1,)
-        c = (acfun(binx, (x[binx + 1] - dat), size=[inc, ]) + 
-             acfun(binx+1, (dat - x[binx]), size=[inc, ])) / w
+        c = np.asarray((acfun(binx, (x[binx + 1] - dat), size=(inc, )) + 
+             acfun(binx+1, (dat - x[binx]), size=(inc, ))) / w).ravel()
 #    elif d == 2:
 #        b2 = binx[1]
 #        b1 = binx[0]
@@ -2350,6 +2294,9 @@ def gridcount(data, X, use_sparse=False):
     else: # % d>2
        
         Nc = csiz.prod()
+#        if use_sparse:
+#            c = sparse.csr_matrix((Nc,1))
+#        else:
         c = np.zeros((Nc,))
      
         fact2 = np.asarray(np.reshape(inc * np.arange(d), (d, -1)), dtype=int)
@@ -2368,12 +2315,12 @@ def gridcount(data, X, use_sparse=False):
                 bt2 = bt0[two] + fact2
                 b2 = binx + bt2                     # linear index to X
                 c += acfun(b1, abs(np.prod(X1[b2] - dat, axis=0)), size=(Nc,))
-                
-        c = np.reshape(c / w, csiz, order='F')
+#        if use_sparse:
+#            c = c.toarray().ravel()
+        c = np.reshape(c/w , csiz, order='F')
         
         T = range(d)
         T[1], T[0] = T[0], T[1]
-        #T[-2], T[-1] = T[-1], T[-2]
         c = c.transpose(*T) # make sure c is stored in the same way as meshgrid    
     return c
 
@@ -2477,5 +2424,5 @@ def test_docstrings():
     doctest.testmod()
     
 if __name__ == '__main__':
-    test_docstrings()
-    
\ No newline at end of file
+    #test_docstrings()
+    kde_demo3()
\ No newline at end of file
diff --git a/pywafo/src/wafo/test/test_kdetools.py b/pywafo/src/wafo/test/test_kdetools.py
index 324a035..4a603d1 100644
--- a/pywafo/src/wafo/test/test_kdetools.py
+++ b/pywafo/src/wafo/test/test_kdetools.py
@@ -224,11 +224,8 @@ def test_gridcount_1D():
     
     >>> x = np.linspace(0, max(data.ravel()) + 1, 10)  
     >>> dx = x[1] - x[0]  
-    >>> c = wk.gridcount(data, x, use_sparse=False)
+    >>> c = wk.gridcount(data, x)
     >>> c 
-    array([ 0.78762626,  1.77520717,  7.99190087,  4.04054449,  1.67156643,
-            2.38228499,  1.05933195,  0.29153785,  0.        ,  0.        ])
-    >>> wk.gridcount(data, x, use_sparse=True)
     array([ 0.78762626,  1.77520717,  7.99190087,  4.04054449,  1.67156643,
             2.38228499,  1.05933195,  0.29153785,  0.        ,  0.        ])
             
@@ -289,7 +286,7 @@ def test_gridcount_3D():
     >>> x = np.linspace(0, max(data.ravel()) + 1, 3)  
     >>> dx = x[1] - x[0]
     >>> X = np.vstack((x, x, x))   
-    >>> c = wk.gridcount(data, X, use_sparse=True)
+    >>> c = wk.gridcount(data, X)
     >>> c 
     array([[[  8.74229894e-01,   1.27910940e+00,   1.42033973e-01],
             [  1.94778915e+00,   2.59536282e+00,   3.28213680e-01],