added more tests and simplified things..

9 years ago · 500aff71c6
parent 9e6a79d24d
commit 500aff71c6
3 changed files with 252 additions and 343 deletions
--- a/wafo/kdetools/gridding.py
+++ b/wafo/kdetools/gridding.py
@ -29,7 +29,7 @@ def bitget(int_type, offset):
    return np.bitwise_and(int_type, 1 << offset) >> offset


-def accumsum_sparse(accmap, a, shape, dtype=None):
+def accumsum_sparse(accmap, a, shape=None, dtype=None):
    """
    Example
    -------
@ -42,12 +42,21 @@ def accumsum_sparse(accmap, a, shape, dtype=None):
    >>> # Sum the diagonals.
    >>> accmap = array([[0,1,2],[2,0,1],[1,2,0]])
    >>> s = accumsum_sparse(accmap, a, (3,))
-    >>> np.allclose(s.toarray().T, [ 9,  7, 15])
+    >>> np.allclose(s.toarray(), [ 9,  7, 15])
    True

+     # group vals by idx and sum
+    >>> vals = array([12.0, 3.2, -15, 88, 12.9])
+    >>> idx = array([1,    0,    1,  4,   1  ])
+    >>> np.allclose(accumsum_sparse(idx, vals).toarray(),
+    ...   [3.2, 9.9, 0, 0, 88.])
+    True
    """
    if dtype is None:
        dtype = a.dtype
+    if shape is None:
+        shape = 1 + np.squeeze(np.apply_over_axes(np.max, accmap,
+                                                  axes=tuple(range(a.ndim))))
    shape = np.atleast_1d(shape)
    if len(shape) > 1:
        binx = accmap[:, 0]
@ -58,12 +67,13 @@ def accumsum_sparse(accmap, a, shape, dtype=None):
        binx = accmap.ravel()
        zero = np.zeros(len(binx))
        out = sparse.coo_matrix(
-            (a.ravel(), (binx, zero)), shape=(shape, 1), dtype=dtype).tocsr()
+            (a.ravel(), (binx, zero)), shape=(shape, 1), dtype=dtype).tocsr().T
    return out


-def accumsum(accmap, a, shape):
+def accumsum(accmap, a, shape=None):
    """
+
    Example
    -------
    >>> from numpy import array
@ -74,11 +84,21 @@ def accumsum(accmap, a, shape):
           [-1,  8,  9]])

    >>> accmap = array([[0,1,2],[2,0,1],[1,2,0]])  # Sum the diagonals.
-    >>> s = accumsum(accmap, a, (3,))
-    >>> np.allclose(s, [ 9,  7, 15])
+    >>> np.allclose(accumsum(accmap, a, (3,)), [ 9,  7, 15])
+    True
+    >>> accmap = array([[0,1,2],[0,1,2],[0,1,2]])  # Sum the columns.
+    >>> np.allclose(accumsum(accmap, a, (3,)), [4, 9, 18])
    True

+    # group vals by idx and sum
+    >>> vals = array([12.0, 3.2, -15, 88, 12.9])
+    >>> idx = array([1,    0,    1,  4,   1  ])
+    >>> np.allclose(accumsum(idx, vals), [3.2, 9.9, 0, 0, 88.])
+    True
    """
+    if shape is None:
+        shape = 1 + np.squeeze(np.apply_over_axes(np.max, accmap,
+                                                  axes=tuple(range(a.ndim))))
    return np.bincount(accmap.ravel(), a.ravel(), np.array(shape).max())


@ -148,6 +168,9 @@ def accum(accmap, a, func=None, shape=None, fill_value=0, dtype=None):
    >>> accum(accmap, a, func=prod, dtype=float)
    array([[ -8.,  18.],
           [ -8.,   9.]])
+    >>> accum(accmap, a, dtype=float)
+    array([[ 6.,  9.],
+           [ 7.,  9.]])

    # Same accmap, but create an array of lists of values.
    >>> accum(accmap, a, func=lambda x: x, dtype='O')
@ -210,7 +233,7 @@ def _gridcount_nd(acfun, data, x, y, w, binx):
    fact2 = np.asarray(np.reshape(inc * np.arange(d), (d, -1)), dtype=int)
    fact1 = np.asarray(np.reshape(c_shape.cumprod() / inc, (d, -1)), dtype=int)
    bt0 = [0, 0]
-    X1 = x.ravel()
+    x1 = x.ravel()
    for ir in range(2 ** (d - 1)):
        bt0[0] = np.reshape(bitget(ir, np.arange(d)), (d, -1))
        bt0[1] = 1 - bt0[0]
@ -222,7 +245,7 @@ def _gridcount_nd(acfun, data, x, y, w, binx):
            b1 = np.sum((binx + bt0[one]) * fact1, axis=0)
            bt2 = bt0[two] + fact2
            b2 = binx + bt2   # linear index to X
-            c += acfun(b1, np.abs(np.prod(X1[b2] - data, axis=0)) * y,
+            c += acfun(b1, np.abs(np.prod(x1[b2] - data, axis=0)) * y,
                       shape=(nc, ))

    c = np.reshape(c / w, c_shape, order='F')
@ -247,13 +270,18 @@ def gridcount(data, X, y=1):

    Parameters
    ----------
-    data = column vectors with D-dimensional data, shape D x Nd
-    X    = row vectors defining discretization, shape D x N
-            Must include the range of the data.
+    data : array-like
+        column vectors with D-dimensional data, shape D x Nd
+    X : array-like
+        row vectors defining discretization in each dimension, shape D x N.
+        The discretization must include the range of the data.
+    y : array-like
+        response data. Scalar or vector of size Nd.

    Returns
    -------
-    c    = gridcount,  shape N x N x ... x N
+    c : ndarray
+        gridcount,  shape N x N x ... x N

    GRIDCOUNT obtains the grid counts using linear binning.
    There are 2 strategies: simple- or linear- binning.
@ -298,7 +326,7 @@ def gridcount(data, X, y=1):

    See also
    --------
-    bincount, accum, kdebin
+    bincount, accum, kde

    Reference
    ----------
@ -306,28 +334,27 @@ def gridcount(data, X, y=1):
    'Kernel smoothing'
    Chapman and Hall, pp 182-192
    '''
-    data2, x = np.atleast_2d(data, X)
+    dataset, x = np.atleast_2d(data, X)
    y = np.atleast_1d(y).ravel()
    d, inc = x.shape

-    _assert(d == data2.shape[0], 'Dimension 0 of data and X do not match.')
+    _assert(d == dataset.shape[0], 'Dimension 0 of data and X do not match.')

-    dx = np.diff(x[:, :2], axis=1)
    xlo, xup = x[:, 0], x[:, -1]

-    datlo, datup = data2.min(axis=1), data2.max(axis=1)
+    datlo, datup = dataset.min(axis=1), dataset.max(axis=1)
    _assert(not ((datlo < xlo) | (xup < datup)).any(),
            'X does not include whole range of the data!')

    #  acfun = accumsum_sparse  # faster than accum
    acfun = accumsum  # faster than accumsum_sparse

-    binx = np.asarray(np.floor((data2 - xlo[:, np.newaxis]) / dx), dtype=int)
+    dx = np.diff(x[:, :2], axis=1)
+    binx = np.asarray(np.floor((dataset - xlo[:, np.newaxis]) / dx), dtype=int)
    w = dx.prod()
    if d == 1:
-        return _gridcount_1d(acfun, data2, x, y, w, binx, inc)
-    # else:  # d>1
-    return _gridcount_nd(acfun, data2, x, y, w, binx)
+        return _gridcount_1d(acfun, dataset, x, y, w, binx, inc)
+    return _gridcount_nd(acfun, dataset, x, y, w, binx)


 if __name__ == '__main__':
--- a/wafo/kdetools/kdetools.py
+++ b/wafo/kdetools/kdetools.py
@ -247,7 +247,7 @@ class _KDE(object):
        if len(args) == 0:
            args = self.get_args()
        self.args = args
-        return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)
+        return self.eval_grid_fun(self._eval_grid_fast, *args, **kwds)

    def _eval_grid_fast(self, *args, **kwds):
        pass
@ -273,7 +273,7 @@ class _KDE(object):
        if len(args) == 0:
            args = self.get_args()
        self.args = args
-        return self._eval_grid_fun(self._eval_grid, *args, **kwds)
+        return self.eval_grid_fun(self._eval_grid, *args, **kwds)

    def _eval_grid(self, *args, **kwds):
        pass
@ -301,7 +301,7 @@ class _KDE(object):
            self._add_contour_levels(wdata)
        return wdata

-    def _eval_grid_fun(self, eval_grd, *args, **kwds):
+    def eval_grid_fun(self, eval_grd, *args, **kwds):
        output = kwds.pop('output', 'value')
        f = eval_grd(*args, **kwds)
        if output == 'value':
@ -434,9 +434,11 @@ class TKDE(_KDE):
    t = np.trapz(f, x)
    """

-    def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None,
-                 xmax=None, inc=512, L2=None):
+    def __init__(self, data, hs=None, kernel=None, alpha=0.0,
+                 xmin=None, xmax=None, inc=512, L2=None):
        self.L2 = L2
+#         self.dataset = data
+#         self.tkde =
        super(TKDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc)

 #     @property
@ -445,6 +447,8 @@ class TKDE(_KDE):
 #
 #     @dataset.setter
 #     def dataset(self, data):
+#         self._dataset = atleast_2d(data)
+#         self._tdataset = self._dat2gaus(self._dataset)

    def _initialize(self):
        self._check_xmin()
@ -455,7 +459,8 @@ class TKDE(_KDE):
        xmax = self.xmax
        if xmax is not None:
            xmax = self._dat2gaus(np.reshape(xmax, (-1, 1)))
-        self.tkde = KDE(tdataset, self.hs, self.kernel, self.alpha, xmin, xmax,
+        self.tkde = KDE(tdataset, self.hs, self.kernel, self.alpha,
+                        np.ravel(xmin), np.ravel(xmax),
                        self.inc)
        if self.inc is None:
            self.inc = self.tkde.inc
@ -467,7 +472,7 @@ class TKDE(_KDE):
            L2 = np.atleast_1d(self.L2) * np.ones(self.d)
            self.xmin = np.where(L2 != 1,
                                 np.maximum(self.xmin, amin / 100.0),
-                                 self.xmin).reshape((-1, 1))
+                                 self.xmin)

    def _dat2gaus(self, points):
        if self.L2 is None:
@ -527,7 +532,7 @@ class TKDE(_KDE):
           The values evaluated at meshgrid(*args).

        """
-        return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)
+        return self.eval_grid_fun(self._eval_grid_fast, *args, **kwds)

    def _interpolate(self, points, f, *args, **kwds):
        ipoints = meshgrid(*args)  # if self.d > 1 else args
@ -754,7 +759,7 @@ class KDE(_KDE):

        y = kwds.get('y', 1.0)
        if self.alpha > 0:
-            y = y / self._lambda**d
+            warnings.warn('alpha parameter is not used for binned!')

        # Find the binned kernel weights, c.
        c = gridcount(self.dataset, X, y=y)
@ -922,16 +927,16 @@ class KRegression(object):   # _KDE):

        self.tkde = TKDE(data, hs=hs, kernel=kernel,
                         alpha=alpha, xmin=xmin, xmax=xmax, inc=inc, L2=L2)
-        self.y = y
+        self.y = np.atleast_1d(y)
        self.p = p

    def eval_grid_fast(self, *args, **kwds):
        self._grdfun = self.tkde.eval_grid_fast
-        return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds)
+        return self.tkde.eval_grid_fun(self._eval_gridfun, *args, **kwds)

    def eval_grid(self, *args, **kwds):
        self._grdfun = self.tkde.eval_grid
-        return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds)
+        return self.tkde.eval_grid_fun(self._eval_gridfun, *args, **kwds)

    def _eval_gridfun(self, *args, **kwds):
        grdfun = self._grdfun
@ -1017,7 +1022,30 @@ class BKRegression(object):
        xi = np.linspace(xmin - sml, xmax + sml, ni)
        return xi

-    def prb_ci(self, n, p, alpha=0.05, **kwds):
+    def _wilson_score(self, n, p, alpha):
+        # Wilson score
+        z0 = -_invnorm(alpha / 2)
+        den = 1 + (z0 ** 2. / n)
+        xc = (p + (z0 ** 2) / (2 * n)) / den
+        halfwidth = (z0 * sqrt((p * (1 - p) / n) +
+                               (z0 ** 2 / (4 * (n ** 2))))) / den
+        plo = xc - halfwidth.clip(min=0)  # wilson score
+        pup = xc + halfwidth.clip(max=1.0)  # wilson score
+        return plo, pup
+
+    def _credible_interval(self, n, p, alpha):
+        # Jeffreys intervall a=b=0.5
+        # st.beta.isf(alpha/2, y+a, n-y+b) y = n*p, n-y = n*(1-p)
+        a = self.a
+        b = self.b
+        st = scipy.stats
+        pup = np.where(p == 1, 1,
+                       st.beta.isf(alpha / 2, n * p + a, n * (1 - p) + b))
+        plo = np.where(p == 0, 0,
+                       st.beta.isf(1 - alpha / 2, n * p + a, n * (1 - p) + b))
+        return plo, pup
+
+    def prb_ci(self, n, p, alpha=0.05):
        """Return Confidence Interval for the binomial probability p.

        Parameters
@ -1040,25 +1068,9 @@ class BKRegression(object):

        """
        if self.method.startswith('w'):
-            # Wilson score
-            z0 = -_invnorm(alpha / 2)
-            den = 1 + (z0 ** 2. / n)
-            xc = (p + (z0 ** 2) / (2 * n)) / den
-            halfwidth = (z0 * sqrt((p * (1 - p) / n) +
-                                   (z0 ** 2 / (4 * (n ** 2))))) / den
-            plo = (xc - halfwidth).clip(min=0)  # wilson score
-            pup = (xc + halfwidth).clip(max=1.0)  # wilson score
+            plo, pup = self._wilson_score(n, p, alpha)
        else:
-            # Jeffreys intervall a=b=0.5
-            # st.beta.isf(alpha/2, y+a, n-y+b) y = n*p, n-y = n*(1-p)
-            a = self.a
-            b = self.b
-            st = scipy.stats
-            pup = np.where(p == 1, 1,
-                           st.beta.isf(alpha / 2, n * p + a, n * (1 - p) + b))
-            plo = np.where(p == 0, 0,
-                           st.beta.isf(1 - alpha / 2,
-                                       n * p + a, n * (1 - p) + b))
+            plo, pup = self._credible_interval(n, p, alpha)
        return plo, pup

    def prb_empirical(self, xi=None, hs_e=None, alpha=0.05, color='r', **kwds):
@ -1088,12 +1100,12 @@ class BKRegression(object):
        y = self.y

        c = gridcount(x, xi)  # + self.a + self.b # count data
-        if (y == 1).any():
+        if np.any(y == 1):
            c0 = gridcount(x[y == 1], xi)  # + self.a # count success
        else:
            c0 = np.zeros(np.shape(xi))
        prb = np.where(c == 0, 0, c0 / (c + _TINY))  # assume prb==0 for c==0
-        CI = np.vstack(self.prb_ci(c, prb, alpha, **kwds))
+        CI = np.vstack(self.prb_ci(c, prb, alpha))

        prb_e = PlotData(prb, xi, plotmethod='plot', plot_args=['.'],
                         plot_kwds=dict(markersize=6, color=color, picker=5))
@ -1213,7 +1225,7 @@ def kde_demo1():
    for ix, h in enumerate(hVec):
        plt.figure(ix)
        kde = KDE(data, hs=h, kernel=kernel)
-        f2 = kde(x, output='plot', title='h_s = {0:2.2f}'.format(h),
+        f2 = kde(x, output='plot', title='h_s = {0:2.2f}'.format(float(h)),
                 ylab='Density')
        f2.plot('k-')

@ -1278,8 +1290,8 @@ def kde_demo3():
    plt.plot(data[0], data[1], '.')

    # plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable
-
-    tkde = TKDE(data, L2=0.5)
+    hs = Kernel('gauss').get_smoothing(data**0.5)
+    tkde = TKDE(data, hs=hs, L2=0.5)
    ft = tkde.eval_grid_fast(
        output='plot', title='Transformation KDE', plotflag=1)

@ -1312,12 +1324,12 @@ def kde_demo4(N=50):
    f1 = kde1(output='plot', label='Ordinary KDE', plotflag=1)

    plt.figure(0)
-    f.plot('r', label='hns={0:g}'.format(kde.hs))
+    f.plot('r', label='hns={0}'.format(kde.hs))
    # plt.figure(2)
-    f1.plot('b', label='hisj={0:g}'.format(kde1.hs))
-    x = np.linspace(-4, 4)
-    for loc in [-5, 5]:
-        plt.plot(x + loc, st.norm.pdf(x, 0, scale=1) / 2, 'k:',
+    f1.plot('b', label='hisj={0}'.format(kde1.hs))
+    x = np.linspace(-9, 9)
+    plt.plot(x, (st.norm.pdf(x, loc=-5, scale=1) +
+                 st.norm.pdf(x, loc=5, scale=1)) / 2, 'k:',
             label='True density')
    plt.legend()

@ -1335,11 +1347,11 @@ def kde_demo5(N=500):
                      st.norm.rvs(loc=-5, scale=1, size=(2, N,))))
    kde = KDE(data, kernel=Kernel('gauss', 'hns'))
    f = kde(output='plot', plotflag=1,
-            title='Ordinary KDE (hns={0:s}'.format(str(kde.hs.tolist())))
+            title='Ordinary KDE, hns={0:s}'.format(str(list(kde.hs))))

    kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
    f1 = kde1(output='plot', plotflag=1,
-              title='Ordinary KDE (hisj={0:s})'.format(str(kde1.hs.tolist())))
+              title='Ordinary KDE, hisj={0:s}'.format(str(list(kde1.hs))))

    plt.figure(0)
    plt.clf()
@ -1766,9 +1778,9 @@ def check_bkregression():
    plt.ion()
    k = 0
    for _i, n in enumerate([50, 100, 300, 600]):
-        x, y, fun1 = _get_data(
-            n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75)
-        bkreg = BKRegression(x, y)
+        x, y, fun1 = _get_data(n, symmetric=True, loc1=0.1,
+                               scale1=0.6, scale2=0.75)
+        bkreg = BKRegression(x, y, a=0.05, b=0.05)
        fbest = bkreg.prb_search_best(
            hsfun='hste', alpha=0.05, color='g', label='Transit_D')

@ -1951,8 +1963,8 @@ if __name__ == '__main__':
    if False:
        test_docstrings(__file__)
    else:
-        kde_demo2()
-        # check_bkregression()
+        # kde_demo5()
+        check_bkregression()
        # check_regression_bin()
        # check_kreg_demo3()
        # check_kreg_demo4()
--- a/wafo/kdetools/tests/test_kdetools.py
+++ b/wafo/kdetools/tests/test_kdetools.py
@ -7,17 +7,18 @@ from __future__ import division
 import unittest
 import numpy as np
 from numpy.testing import assert_allclose
-from numpy import array, inf
+import wafo.objects as wo
 import wafo.kdetools as wk
+import scipy.stats as st


-class TestKdeTools(unittest.TestCase):
+class TestKde(unittest.TestCase):

    def setUp(self):

        # N = 20
        # data = np.random.rayleigh(1, size=(N,))
-        self.data = array([0.75355792, 0.72779194, 0.94149169, 0.07841119,
+        self.data = np.array([0.75355792, 0.72779194, 0.94149169, 0.07841119,
                              2.32291887, 1.10419995, 0.77055114, 0.60288273,
                              1.36883635, 1.74754326, 1.09547561, 1.01671133,
                              0.73211143, 0.61891719, 0.75903487, 1.8919469,
@ -26,7 +27,6 @@ class TestKdeTools(unittest.TestCase):

    def test0_KDE1D(self):
        data, x = self.data, self.x
-        # kde = wk.KDE(data, hs=0.5, alpha=0.5)

        kde0 = wk.KDE(data, hs=0.5, alpha=0.0, inc=16)

@ -36,7 +36,6 @@ class TestKdeTools(unittest.TestCase):
                             0.08270612,  0.02991145,  0.00720821])

        fx = kde0.eval_grid(x, r=1)
-
        assert_allclose(-fx, [0.11911419724002906, 0.13440000694772541,
                              0.044400116190638696, -0.0677695267531197,
                              -0.09555596523854318, -0.07498819087690148,
@ -125,6 +124,13 @@ class TestKdeTools(unittest.TestCase):
                            0.16555235,  0.0541248])

        assert_allclose(np.trapz(f, x), 0.97323338046725172)
+        f0 = kde(output='plot')
+        self.assertIsInstance(f0, wo.PlotData)
+        assert_allclose(np.trapz(f0.data, f0.args), 0.9319800260106625)
+
+        f0 = kde.eval_grid_fast(output='plot')
+        self.assertIsInstance(f0, wo.PlotData)
+        assert_allclose(np.trapz(f0.data, f0.args), 0.9319799696210691)

    def test1a_KDE1D(self):
        data, x = self.data, self.x
@ -136,7 +142,15 @@ class TestKdeTools(unittest.TestCase):

        assert_allclose(np.trapz(f, x), 0.92938023659047952)

-    def test2a_KDE1D(self):
+        f0 = kde(output='plot')
+        self.assertIsInstance(f0, wo.PlotData)
+        assert_allclose(np.trapz(f0.data, f0.args), 0.9871189376720593)
+
+        f0 = kde.eval_grid_fast(output='plot')
+        self.assertIsInstance(f0, wo.PlotData)
+        assert_allclose(np.trapz(f0.data, f0.args), 0.9962507385131669)
+
+    def test2a_KDE_1D_hs_5_alpha_5(self):
        # data, x = self.data, self.x
        data = np.asarray([1, 2])
        x = np.linspace(0, max(np.ravel(data)) + 1, 10)
@ -151,7 +165,7 @@ class TestKdeTools(unittest.TestCase):
    def test_KDE2D(self):
        # N = 20
        # data = np.random.rayleigh(1, size=(2, N))
-        data = array([
+        data = np.array([
            [0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399,
             0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145,
             0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821,
@ -176,269 +190,125 @@ class TestKdeTools(unittest.TestCase):
             ]
        assert_allclose(kde0.eval_grid_fast(x, x), t)

-    def test_gridcount_1D(self):
-        data, x = self.data, self.x
-        dx = x[1] - x[0]
-        c = wk.gridcount(data, x)
-        assert_allclose(c, [0.78762626, 1.77520717, 7.99190087, 4.04054449,
-                            1.67156643, 2.38228499, 1.05933195, 0.29153785, 0.,
-                            0.])
-        t = np.trapz(c / dx / len(data), x)
-        assert_allclose(t, 0.9803093435140049)
-
-    def test_gridcount_2D(self):
-        N = 20
-        # data = np.random.rayleigh(1, size=(2, N))
-        data = array([
-            [0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399,
-             0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145,
-             0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821,
-             0.69800708, 0.58428735, 1.59128776, 2.05771405, 0.87021964],
-            [1.44080694, 0.39973751, 1.331243, 2.48895822, 1.18894158,
-             1.40526085, 1.01967897, 0.81196474, 1.37978932, 2.03334689,
-             0.870329, 1.25106862, 0.5346619, 0.47541236, 1.51930093,
-             0.58861519, 1.19780448, 0.81548296, 1.56859488, 1.60653533]])
-
-        x = np.linspace(0, max(np.ravel(data)) + 1, 5)
-        dx = x[1] - x[0]
-        X = np.vstack((x, x))
-        c = wk.gridcount(data, X)
-        assert_allclose(c,
-                        [[0.38922806, 0.8987982,  0.34676493, 0.21042807,  0.],
-                         [1.15012203, 5.16513541, 3.19250588, 0.55420752,  0.],
-                         [0.74293418, 3.42517219, 1.97923195, 0.76076621,  0.],
-                         [0.02063536, 0.31054405, 0.71865964, 0.13486633,  0.],
-                         [0.,  0.,  0.,  0.,  0.]], 1e-5)
-
-        t = np.trapz(np.trapz(c / (dx**2 * N), x), x)
-        assert_allclose(t, 0.9011618785736376)
-
-    def test_gridcount_3D(self):
-        N = 20
-        # data = np.random.rayleigh(1, size=(3, N))
-        data = np.array([
-            [0.932896, 0.89522635, 0.80636346, 1.32283371, 0.27125435,
-             1.91666304, 2.30736635, 1.13662384, 1.73071287, 1.06061127,
-             0.99598512, 2.16396591, 1.23458213, 1.12406686, 1.16930431,
-             0.73700592, 1.21135139, 0.46671506, 1.3530304, 0.91419104],
-            [0.62759088, 0.23988169, 2.04909823, 0.93766571, 1.19343762,
-             1.94954931, 0.84687514, 0.49284897, 1.05066204, 1.89088505,
-             0.840738, 1.02901457, 1.0758625, 1.76357967, 0.45792897,
-             1.54488066, 0.17644313, 1.6798871, 0.72583514, 2.22087245],
-            [1.69496432, 0.81791905, 0.82534709, 0.71642389, 0.89294732,
-             1.66888649, 0.69036947, 0.99961448, 0.30657267, 0.98798713,
-             0.83298728, 1.83334948, 1.90144186, 1.25781913, 0.07122458,
-             2.42340852, 2.41342037, 0.87233305, 1.17537114, 1.69505988]])

-        x = np.linspace(0, max(np.ravel(data)) + 1, 3)
-        dx = x[1] - x[0]
-        X = np.vstack((x, x, x))
-        c = wk.gridcount(data, X)
-        assert_allclose(c,
-                        [[[8.74229894e-01, 1.27910940e+00, 1.42033973e-01],
-                          [1.94778915e+00, 2.59536282e+00, 3.28213680e-01],
-                          [1.08429416e-01, 1.69571495e-01, 7.48896775e-03]],
-                         [[1.44969128e+00, 2.58396370e+00, 2.45459949e-01],
-                          [2.28951650e+00, 4.49653348e+00, 2.73167915e-01],
-                          [1.10905565e-01, 3.18733817e-01, 1.12880816e-02]],
-                         [[7.49265424e-02, 2.18142488e-01, 0.0],
-                          [8.53886762e-02, 3.73415131e-01, 0.0],
-                          [4.16196568e-04, 1.62218824e-02, 0.0]]])
-
-        t = np.trapz(np.trapz(np.trapz(c / dx**3 / N, x), x), x)
-        assert_allclose(t, 0.5164999727560187)
-
-    def test_gridcount_4D(self):
-
-        N = 20
-        # data = np.random.rayleigh(1, size=(2, N))
-        data = array([
-            [0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399,
-             0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145],
-            [0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821,
-             0.69800708, 0.58428735, 1.59128776, 2.05771405, 0.87021964],
-            [1.44080694, 0.39973751, 1.331243, 2.48895822, 1.18894158,
-                1.40526085, 1.01967897, 0.81196474, 1.37978932, 2.03334689],
-            [0.870329, 1.25106862, 0.5346619, 0.47541236, 1.51930093,
-                0.58861519, 1.19780448, 0.81548296, 1.56859488, 1.60653533]])
-
-        x = np.linspace(0, max(np.ravel(data)) + 1, 3)
-        dx = x[1] - x[0]
-        X = np.vstack((x, x, x, x))
-        c = wk.gridcount(data, X)
-        assert_allclose(c,
-                        [[[[1.77163904e-01, 1.87720108e-01, 0.0],
-                           [5.72573585e-01, 6.09557834e-01, 0.0],
-                            [3.48549923e-03, 4.05931870e-02, 0.0]],
-                            [[1.83770124e-01, 2.56357594e-01, 0.0],
-                             [4.35845892e-01, 6.14958970e-01, 0.0],
-                             [3.07662204e-03, 3.58312786e-02, 0.0]],
-                            [[0.0, 0.0, 0.0],
-                             [0.0, 0.0, 0.0],
-                             [0.0, 0.0, 0.0]]],
-                            [[[3.41883175e-01, 5.97977973e-01, 0.0],
-                              [5.72071865e-01, 8.58566538e-01, 0.0],
-                                [3.46939323e-03, 4.04056116e-02, 0.0]],
-                             [[3.58861043e-01, 6.28962785e-01, 0.0],
-                              [8.80697705e-01, 1.47373158e+00, 0.0],
-                                [2.22868504e-01, 1.18008528e-01, 0.0]],
-                             [[2.91835067e-03, 2.60268355e-02, 0.0],
-                              [3.63686503e-02, 1.07959459e-01, 0.0],
-                                [1.88555613e-02, 7.06358976e-03, 0.0]]],
-                            [[[3.13810608e-03, 2.11731327e-02, 0.0],
-                              [6.71606255e-03, 4.53139824e-02, 0.0],
-                                [0.0, 0.0, 0.0]],
-                             [[7.05946179e-03, 5.44614852e-02, 0.0],
-                              [1.09099593e-01, 1.95935584e-01, 0.0],
-                                [6.61257395e-02, 2.47717418e-02, 0.0]],
-                             [[6.38695629e-04, 5.69610302e-03, 0.0],
-                              [1.00358265e-02, 2.44053065e-02, 0.0],
-                                [5.67244468e-03, 2.12498697e-03, 0.0]]]])
-
-        t = np.trapz(np.trapz(np.trapz(np.trapz(c / dx**4 / N, x), x), x), x)
-        assert_allclose(t, 0.21183518274521254)
-
-
-class TestKernels(unittest.TestCase):
-    def setUp(self):
-        self.names = ['epanechnikov', 'biweight', 'triweight', 'logistic',
-                      'p1epanechnikov', 'p1biweight', 'p1triweight',
-                      'triangular', 'gaussian', 'rectangular', 'laplace']
-
-    def test_stats(self):
-        truth = {
-            'biweight': (0.14285714285714285, 0.7142857142857143, 22.5),
-            'logistic': (3.289868133696453, 1./6, 0.023809523809523808),
-            'p1biweight': (0.14285714285714285, 0.7142857142857143, 22.5),
-            'triangular': (0.16666666666666666, 0.6666666666666666, inf),
-            'gaussian': (1, 0.28209479177387814, 0.21157109383040862),
-            'epanechnikov': (0.2, 0.6, inf),
-            'triweight': (0.1111111111111111, 0.8158508158508159, inf),
-            'p1triweight': (0.1111111111111111, 0.8158508158508159, inf),
-            'p1epanechnikov': (0.2, 0.6, inf),
-            'rectangular': (0.3333333333333333, 0.5, inf),
-            'laplace': (2, 0.25, inf)}
-        for name in self.names:
-            kernel = wk.Kernel(name)
-            assert_allclose(kernel.stats(), truth[name])
-            # truth[name] = kernel.stats()
-        # print(truth)
-
-    def test_norm_factors_1d(self):
-        truth = {
-            'biweight': 1.0666666666666667, 'logistic': 1.0,
-            'p1biweight': 1.0666666666666667, 'triangular': 1.0,
-            'gaussian': 2.5066282746310002, 'epanechnikov': 1.3333333333333333,
-            'triweight': 0.91428571428571426, 'laplace': 2,
-            'p1triweight': 0.91428571428571426,
-            'p1epanechnikov': 1.3333333333333333, 'rectangular': 2.0}
-        for name in self.names:
-            kernel = wk.Kernel(name)
-            assert_allclose(kernel.norm_factor(d=1, n=20), truth[name])
-            # truth[name] = kernel.norm_factor(d=1, n=20)
-
-    def test_effective_support(self):
-        truth = {'biweight': (-1.0, 1.0), 'logistic': (-7.0, 7.0),
-                 'p1biweight': (-1.0, 1.0), 'triangular': (-1.0, 1.0),
-                 'gaussian': (-4.0, 4.0), 'epanechnikov': (-1.0, 1.0),
-                 'triweight': (-1.0, 1.0), 'p1triweight': (-1.0, 1.0),
-                 'p1epanechnikov': (-1.0, 1.0), 'rectangular': (-1.0, 1.0),
-                 'laplace': (-7.0, 7.0)}
-        for name in self.names:
-            kernel = wk.Kernel(name)
-            assert_allclose(kernel.effective_support(), truth[name])
-            # truth[name] = kernel.effective_support()
-        # print(truth)
-        # self.assertTrue(False)
-
-    def test_that_kernel_is_a_pdf(self):
-
-        for name in self.names:
-            kernel = wk.Kernel(name)
-            xmin, xmax = kernel.effective_support()
-            x = np.linspace(xmin, xmax, 4*1024+1)
-            m0 = kernel.norm_factor(d=1, n=1)
-            pdf = kernel(x)/m0
-            #             print(name)
-            #             print(pdf[0], pdf[-1])
-            #             print(np.trapz(pdf, x) - 1)
-            assert_allclose(np.trapz(pdf, x), 1, 1e-2)
-        # self.assertTrue(False)
-
-
-class TestSmoothing(unittest.TestCase):
-    def setUp(self):
-        self.data = np.array([
-            [0.932896, 0.89522635, 0.80636346, 1.32283371, 0.27125435,
-             1.91666304, 2.30736635, 1.13662384, 1.73071287, 1.06061127,
-             0.99598512, 2.16396591, 1.23458213, 1.12406686, 1.16930431,
-             0.73700592, 1.21135139, 0.46671506, 1.3530304, 0.91419104],
-            [0.62759088, 0.23988169, 2.04909823, 0.93766571, 1.19343762,
-             1.94954931, 0.84687514, 0.49284897, 1.05066204, 1.89088505,
-             0.840738, 1.02901457, 1.0758625, 1.76357967, 0.45792897,
-             1.54488066, 0.17644313, 1.6798871, 0.72583514, 2.22087245],
-            [1.69496432, 0.81791905, 0.82534709, 0.71642389, 0.89294732,
-             1.66888649, 0.69036947, 0.99961448, 0.30657267, 0.98798713,
-             0.83298728, 1.83334948, 1.90144186, 1.25781913, 0.07122458,
-             2.42340852, 2.41342037, 0.87233305, 1.17537114, 1.69505988]])
-        self.gauss = wk.Kernel('gaussian')
-
-    def test_hns(self):
-        hs = self.gauss.hns(self.data)
-        assert_allclose(hs, [0.18154437, 0.36207987, 0.37396219])
-
-    def test_hos(self):
-        hs = self.gauss.hos(self.data)
-        assert_allclose(hs, [0.195209, 0.3893332, 0.40210988])
-
-    def test_hms(self):
-        hs = self.gauss.hmns(self.data)
-        assert_allclose(hs, [[3.25196193e-01, -2.68892467e-02, 3.18932448e-04],
-                             [-2.68892467e-02, 3.91283306e-01, 2.38654678e-02],
-                             [3.18932448e-04, 2.38654678e-02, 4.05123874e-01]])
-        hs = self.gauss.hmns(self.data[0])
-        assert_allclose(hs, self.gauss.hns(self.data[0]))
-
-        hs = wk.Kernel('epan').hmns(self.data)
-        assert_allclose(hs,
-                        [[8.363847e-01, -6.915749e-02, 8.202747e-04],
-                         [-6.915749e-02, 1.006357e+00, 6.138052e-02],
-                         [8.202747e-04, 6.138052e-02, 1.041954e+00]],
-                        rtol=1e-5)
-        hs = wk.Kernel('biwe').hmns(self.data[:2])
-        assert_allclose(hs, [[0.868428, -0.071705],
-                             [-0.071705, 1.04685]], rtol=1e-5)
-        hs = wk.Kernel('triwe').hmns(self.data[:2])
-        assert_allclose(hs, [[0.975375, -0.080535],
-                             [-0.080535, 1.17577]], rtol=1e-5)
-        self.assertRaises(NotImplementedError,
-                          wk.Kernel('biwe').hmns, self.data)
-        self.assertRaises(NotImplementedError,
-                          wk.Kernel('triwe').hmns, self.data)
-        self.assertRaises(NotImplementedError,
-                          wk.Kernel('triangular').hmns, self.data)
-
-    def test_hscv(self):
-        hs = self.gauss.hscv(self.data)
-        assert_allclose(hs, [0.1656318800590673, 0.3273938258112911,
-                             0.31072126996412214])
-
-    def test_hstt(self):
-        hs = self.gauss.hstt(self.data)
-        assert_allclose(hs, [0.18099075, 0.50409881, 0.11018912])
-
-    def test_hste(self):
-        hs = self.gauss.hste(self.data)
-        assert_allclose(hs, [0.17035204677390572, 0.29851960273788863,
-                             0.186685349741972])
-
-    def test_hldpi(self):
-        hs = self.gauss.hldpi(self.data)
-        assert_allclose(hs, [0.1732289, 0.33159097, 0.3107633])
-
-    def test_hisj(self):
-        hs = self.gauss.hisj(self.data)
-        assert_allclose(hs, [0.29542502, 0.74277133, 0.51899114])
+class TestRegression(unittest.TestCase):
+    def test_KRegression(self):
+
+        N = 51
+        x = np.linspace(0, 1, N)
+        # ei = np.random.normal(loc=0, scale=0.075, size=(N,))
+        ei = [0.0514233500271586, 0.00165101982431131, 0.042827107319028994,
+              -0.084351702283385, 0.05978024392552100, -0.07121894535738457,
+              0.0855578119920183, -0.0061865198365448, 0.060986773136137415,
+              0.0467717713275598, -0.0852368434029634, 0.09790798995780517,
+              -0.174003547831554, 0.1100349974247687, 0.12934695904976257,
+              -0.036688944487546, -0.0279545148054110, 0.09660222791922815,
+              -0.108463847524115, -0.0635162550551463, 0.017192887741329627,
+              -0.031520480101878, 0.03939880367791403, -0.06343921941793985,
+              0.0574763321274059, -0.1186005160931940, 0.023007133904660495,
+              0.0572646924609536, -0.0334012844057809, -0.03444460758658313,
+              0.0325434547422866, 0.06063111859444784, 0.0010264474321885913,
+              -0.162288671571205, 0.01334616853351956, -0.020490428895193084,
+              0.0446047497979159, 0.02924587567502737, 0.021177586536616458,
+              0.0634083218094540, -0.1506377646036794, -0.03214553797245153,
+              0.1850745187671265, -0.0151240946088902, -0.10599562843454335,
+              0.0317357805015679, -0.0736187558312158, 0.04791463883941161,
+              0.0660021138871709, -0.1049359954387588, 0.0034961490852392463]
+        # print(ei.tolist())
+
+        y = 2*np.exp(-x**2/(2*0.3**2))+3*np.exp(-(x-1)**2/(2*0.7**2)) + ei
+        kreg = wk.KRegression(x, y)
+        f = kreg(output='plotobj', title='Kernel regression', plotflag=1)
+
+        assert_allclose(f.data[::5],
+                        [3.14313544673463, 3.14582567119112, 3.149199078830904,
+                         3.153335095194225, 3.15813722171621, 3.16302709623568,
+                         3.16631430398602, 3.164138775969285, 3.14947062082316,
+                         3.11341295908516, 3.05213808272656, 2.976097561057097,
+                         2.908020176929025, 2.867826513276857, 2.8615179445705,
+                         2.88155232529645, 2.91307482047679, 2.942469210090470,
+                         2.96350144269953, 2.976399025328952, 2.9836554385038,
+                         2.987516554300354, 2.9894470264681, 2.990311688080114,
+                         2.9906144224522406, 2.9906534916935743])
+
+    def test_BKRegression(self):
+        from wafo.kdetools.kdetools import _get_data
+        n = 51
+        loc1 = 0.1
+        scale1 = 0.6
+        scale2 = 0.75
+        # x, y, fun1 = _get_data(n, symmetric=True, loc1=loc1,
+        #                       scale1=scale1, scale2=scale2)
+        # print(x.tolist())
+        # print(y.tolist())
+#        dist = st.norm
+#         norm1 = scale2 * (dist.pdf(-loc1, loc=-loc1, scale=scale1) +
+#                           dist.pdf(-loc1, loc=loc1, scale=scale1))
+#         def fun1(x):
+#             return ((dist.pdf(x, loc=-loc1, scale=scale1) +
+#                      dist.pdf(x, loc=loc1, scale=scale1)) / norm1).clip(max=1.0)
+        x = [-2.9784022156693037, -2.923269270862857, -2.640625797489305,
+             -2.592465150170373, -2.5777471766751514, -2.5597898266706323,
+             -2.5411937415815604, -2.501753472506631, -2.4939048380402378,
+             -2.4747969073957368, -2.3324036659351286, -2.3228634370815,
+             -2.230871371173083, -2.21411949373986, -2.2035967461005335,
+             -2.1927287694263082, -2.1095391808427064, -2.0942500415622503,
+             -2.0774862883018708, -2.0700940505412, -2.054918428555726,
+             -1.979624045501378, -1.815804869116454, -1.780636214263252,
+             -1.7494324035239686, -1.723149182957688, -1.7180532497996817,
+             -1.7016701153705522, -1.6120633534061788, -1.5862592143187193,
+             -1.517561220921166, -1.5017798665502253, -1.4895432407186429,
+             -1.4470094450898578, -1.4302454657287063, -1.3243060491576388,
+             -1.293989140781724, -1.2570066577415648, -1.2332757902347795,
+             -1.2306697417054666, -1.0495284321772482, -0.9923351727665026,
+             -0.9047559818364217, -0.4092063139968012, -0.3845725606766721,
+             -0.30700232234899083, -0.2565844426798063, -0.25415109620097187,
+             -0.20223029999069952, -0.10388696244007978, -0.07822191388462896,
+             0.07822191388462896, 0.10388696244007978, 0.20223029999069952,
+             0.25415109620097187, 0.2565844426798063, 0.30700232234899083,
+             0.3845725606766721, 0.4092063139968012, 0.9047559818364217,
+             0.9923351727665026, 1.0495284321772482, 1.2306697417054666,
+             1.2332757902347795, 1.2570066577415648, 1.293989140781724,
+             1.3243060491576388, 1.4302454657287063, 1.4470094450898578,
+             1.4895432407186429, 1.5017798665502253, 1.517561220921166,
+             1.5862592143187193, 1.6120633534061788, 1.7016701153705522,
+             1.7180532497996817, 1.723149182957688, 1.7494324035239686,
+             1.780636214263252, 1.815804869116454, 1.979624045501378,
+             2.054918428555726, 2.0700940505412, 2.0774862883018708,
+             2.0942500415622503, 2.1095391808427064, 2.1927287694263082,
+             2.2035967461005335, 2.21411949373986, 2.230871371173083,
+             2.3228634370815, 2.3324036659351286, 2.4747969073957368,
+             2.4939048380402378, 2.501753472506631, 2.5411937415815604,
+             2.5597898266706323, 2.5777471766751514, 2.592465150170373,
+             2.640625797489305, 2.923269270862857, 2.9784022156693037]
+        y = [False, False, False, False, False, False, False, False, False,
+             False, False, False, False, False, False, False, False, False,
+             False, False, False, False, False, False, False, False, False,
+             False, False, False, False, False, False, False, False, False,
+             False, False, False, False, False, False, True, True, True, True,
+             True, True, True, True, True, True, True, True, True, True, True,
+             True, True, True, False, False, False, False, False, False, False,
+             False, False, False, False, False, False, False, False, False,
+             False, False, False, False, False, False, False, False, False,
+             False, False, False, False, False, False, False, False, False,
+             False, False, False, False, False, False, False, False]
+
+        bkreg = wk.BKRegression(x, y, a=0.05, b=0.05)
+        fbest = bkreg.prb_search_best(hsfun='hste', alpha=0.05, color='g')
+        # print(fbest.data[::10])
+        assert_allclose(fbest.data[::10],
+                        [1.80899736e-15, 0,  6.48351162e-16,  6.61404311e-15,
+                         1.10010120e-12, 1.36709203e-10,  1.11994766e-08,
+                         5.73040143e-07, 1.68974054e-05,  2.68633448e-04,
+                         2.49075176e-03,  1.48687767e-02,  5.98536245e-02,
+                         1.74083352e-01,  4.33339557e-01,  8.26039018e-01,
+                         9.78387628e-01,  9.98137653e-01,  9.99876002e-01,
+                         9.99876002e-01,   9.98137653e-01,  9.78387628e-01,
+                         8.26039018e-01,  4.33339557e-01,  1.74083352e-01,
+                         5.98536245e-02,  1.48687767e-02,  2.49075176e-03,
+                         2.68633448e-04,  1.68974054e-05,  5.73040143e-07,
+                         1.11994760e-08,  1.36708818e-10,  1.09965904e-12,
+                         5.43806309e-15, 0.0, 0, 0])

 if __name__ == "__main__":
    # import sys;sys.argv = ['', 'Test.testName']