From 67e15d7c8c5d2e9fde4ac71a28f2704b245fbcaa Mon Sep 17 00:00:00 2001
From: Per A Brodtkorb <per.andreas.brodtkorb@gmail.com>
Date: Tue, 20 Dec 2016 11:45:29 +0100
Subject: [PATCH] Refactored kdetools.py into a subpackage. Simplified and
 removed obsolete code.

---
 wafo/kdetools.py            | 4071 -----------------------------------
 wafo/kdetools/__init__.py   |    3 +
 wafo/kdetools/gridding.py   |  324 +++
 wafo/kdetools/kdetools.py   | 2139 ++++++++++++++++++
 wafo/kdetools/kernels.py    | 1382 ++++++++++++
 wafo/testing.py             |   18 +
 wafo/tests/test_kdetools.py |  865 ++++----
 7 files changed, 4336 insertions(+), 4466 deletions(-)
 delete mode 100644 wafo/kdetools.py
 create mode 100644 wafo/kdetools/__init__.py
 create mode 100644 wafo/kdetools/gridding.py
 create mode 100644 wafo/kdetools/kdetools.py
 create mode 100644 wafo/kdetools/kernels.py
 create mode 100644 wafo/testing.py

diff --git a/wafo/kdetools.py b/wafo/kdetools.py
deleted file mode 100644
index c63ba99..0000000
--- a/wafo/kdetools.py
+++ /dev/null
@@ -1,4071 +0,0 @@
-#!/usr/bin/env python
-# -------------------------------------------------------------------------
-# Name:        kdetools
-# Purpose:
-#
-# Author:      pab
-#
-# Created:     01.11.2008
-# Copyright:   (c) pab 2008
-# Licence:     LGPL
-# -------------------------------------------------------------------------
-
-from __future__ import absolute_import, division
-import copy
-import numpy as np
-import scipy
-import warnings
-from itertools import product
-from scipy import interpolate, linalg, optimize, sparse, special, stats
-from scipy.special import gamma
-from numpy import pi, sqrt, atleast_2d, exp, newaxis  # @UnresolvedImport
-
-from wafo.misc import meshgrid, nextpow2, tranproc  # , trangood
-from wafo.containers import PlotData
-from wafo.dctpack import dct, dctn, idctn
-from wafo.plotbackend import plotbackend as plt
-try:
-    from wafo import fig
-except ImportError:
-    warnings.warn('fig import only supported on Windows')
-
-
-def _invnorm(q):
-    return special.ndtri(q)
-
-_stats_epan = (1. / 5, 3. / 5, np.inf)
-_stats_biwe = (1. / 7, 5. / 7, 45. / 2)
-_stats_triw = (1. / 9, 350. / 429, np.inf)
-_stats_rect = (1. / 3, 1. / 2, np.inf)
-_stats_tria = (1. / 6, 2. / 3, np.inf)
-_stats_lapl = (2, 1. / 4, np.inf)
-_stats_logi = (pi ** 2 / 3, 1. / 6, 1 / 42)
-_stats_gaus = (1, 1. / (2 * sqrt(pi)), 3. / (8 * sqrt(pi)))
-
-__all__ = ['sphere_volume', 'TKDE', 'KDE', 'Kernel', 'accum', 'qlevels',
-           'iqrange', 'gridcount', 'kde_demo1', 'kde_demo2', 'test_docstrings']
-
-
-def sphere_volume(d, r=1.0):
-    """
-    Returns volume of  d-dimensional sphere with radius r
-
-    Parameters
-    ----------
-    d : scalar or array_like
-        dimension of sphere
-    r : scalar or array_like
-        radius of sphere (default 1)
-
-    Example
-    -------
-    >>> sphere_volume(2., r=2.)
-    12.566370614359172
-    >>> sphere_volume(2., r=1.)
-    3.1415926535897931
-
-    Reference
-    ---------
-    Wand,M.P. and Jones, M.C. (1995)
-    'Kernel smoothing'
-    Chapman and Hall, pp 105
-    """
-    return (r ** d) * 2.0 * pi ** (d / 2.0) / (d * gamma(d / 2.0))
-
-
-class KDEgauss(object):
-
-    """ Kernel-Density Estimator base class.
-
-    Parameters
-    ----------
-    data : (# of dims, # of data)-array
-        datapoints to estimate from
-    hs : array-like (optional)
-        smooting parameter vector/matrix.
-        (default compute from data using kernel.get_smoothing function)
-    alpha : real scalar (optional)
-        sensitivity parameter               (default 0 regular KDE)
-        A good choice might be alpha = 0.5 ( or 1/D)
-        alpha = 0      Regular  KDE (hs is constant)
-        0 < alpha <= 1 Adaptive KDE (Make hs change)
-
-
-    Members
-    -------
-    d : int
-        number of dimensions
-    n : int
-        number of datapoints
-
-    Methods
-    -------
-    kde.eval_grid_fast(x0, x1,..., xd) : array
-        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
-    kde(x0, x1,..., xd) : array
-        same as kde.eval_grid_fast(x0, x1,..., xd)
-    """
-
-    def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None,
-                 xmax=None, inc=512):
-        self.dataset = atleast_2d(data)
-        self.hs = hs
-        self.kernel = kernel if kernel else Kernel('gauss')
-        self.alpha = alpha
-        self.xmin = xmin
-        self.xmax = xmax
-        self.inc = inc
-        self.initialize()
-
-    def initialize(self):
-        self.d, self.n = self.dataset.shape
-        self._set_xlimits()
-        self._initialize()
-
-    def _initialize(self):
-        self._compute_smoothing()
-
-    def _compute_smoothing(self):
-        """Computes the smoothing matrix."""
-        get_smoothing = self.kernel.get_smoothing
-        h = self.hs
-        if h is None:
-            h = get_smoothing(self.dataset)
-        h = np.atleast_1d(h)
-        hsiz = h.shape
-
-        if (len(hsiz) == 1) or (self.d == 1):
-            if max(hsiz) == 1:
-                h = h * np.ones(self.d)
-            else:
-                h.shape = (self.d,)  # make sure it has the correct dimension
-
-            # If h negative calculate automatic values
-            ind, = np.where(h <= 0)
-            for i in ind.tolist():
-                h[i] = get_smoothing(self.dataset[i])
-            deth = h.prod()
-            self.inv_hs = np.diag(1.0 / h)
-        else:  # fully general smoothing matrix
-            deth = linalg.det(h)
-            if deth <= 0:
-                raise ValueError(
-                    'bandwidth matrix h must be positive definit!')
-            self.inv_hs = linalg.inv(h)
-        self.hs = h
-        self._norm_factor = deth * self.n
-
-    def _set_xlimits(self):
-        amin = self.dataset.min(axis=-1)
-        amax = self.dataset.max(axis=-1)
-        iqr = iqrange(self.dataset, axis=-1)
-        sigma = np.minimum(np.std(self.dataset, axis=-1, ddof=1), iqr / 1.34)
-        # xyzrange = amax - amin
-        # offset = xyzrange / 4.0
-        offset = 2 * sigma
-        if self.xmin is None:
-            self.xmin = amin - offset
-        else:
-            self.xmin = self.xmin * np.ones((self.d, 1))
-        if self.xmax is None:
-            self.xmax = amax + offset
-        else:
-            self.xmax = self.xmax * np.ones((self.d, 1))
-
-    def eval_grid_fast(self, *args, **kwds):
-        """Evaluate the estimated pdf on a grid.
-
-        Parameters
-        ----------
-        arg_0,arg_1,... arg_d-1 : vectors
-            Alternatively, if no vectors is passed in then
-             arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
-        output : string optional
-            'value' if value output
-            'data' if object output
-
-        Returns
-        -------
-        values : array-like
-            The values evaluated at meshgrid(*args).
-
-        """
-        if len(args) == 0:
-            args = []
-            for i in range(self.d):
-                args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc))
-        self.args = args
-        return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)
-
-    def _eval_grid_fast(self, *args, **kwds):
-        X = np.vstack(args)
-        d, inc = X.shape
-        # dx = X[:, 1] - X[:, 0]
-        R = X.max(axis=-1) - X.min(axis=-1)
-
-        t_star = (self.hs / R) ** 2
-        I = (np.asfarray(np.arange(0, inc)) * pi) ** 2
-        In = []
-
-        for i in range(d):
-            In.append(I * t_star[i] * 0.5)
-
-        Inc = meshgrid(*In) if d > 1 else In
-
-        kw = np.zeros((inc,) * d)
-        for i in range(d):
-            kw += exp(-Inc[i])
-        y = kwds.get('y', 1.0)
-        d, n = self.dataset.shape
-        # Find the binned kernel weights, c.
-        c = gridcount(self.dataset, X, y=y) / n
-        # Perform the convolution.
-        at = dctn(c) * kw
-        z = idctn(at) * at.size / np.prod(R)
-        return z * (z > 0.0)
-
-    def _eval_grid_fun(self, eval_grd, *args, **kwds):
-        output = kwds.pop('output', 'value')
-        f = eval_grd(*args, **kwds)
-        if output == 'value':
-            return f
-        else:
-            titlestr = 'Kernel density estimate (%s)' % self.kernel.name
-            kwds2 = dict(title=titlestr)
-            kwds2['plot_kwds'] = dict(plotflag=1)
-            kwds2.update(**kwds)
-            args = self.args
-            if self.d == 1:
-                args = args[0]
-            wdata = PlotData(f, args, **kwds2)
-            if self.d > 1:
-                PL = np.r_[10:90:20, 95, 99, 99.9]
-                try:
-                    ql = qlevels(f, p=PL)
-                    wdata.clevels = ql
-                    wdata.plevels = PL
-                except:
-                    pass
-            return wdata
-
-    def _check_shape(self, points):
-        points = atleast_2d(points)
-        d, m = points.shape
-        if d != self.d:
-            if d == 1 and m == self.d:
-                # points was passed in as a row vector
-                points = np.reshape(points, (self.d, 1))
-            else:
-                msg = "points have dimension %s, dataset has dimension %s"
-                raise ValueError(msg % (d, self.d))
-        return points
-
-    def eval_points(self, points, **kwds):
-        """Evaluate the estimated pdf on a set of points.
-
-        Parameters
-        ----------
-        points : (# of dimensions, # of points)-array
-            Alternatively, a (# of dimensions,) vector can be passed in and
-            treated as a single point.
-
-        Returns
-        -------
-        values : (# of points,)-array
-            The values at each point.
-
-        Raises
-        ------
-        ValueError if the dimensionality of the input points is different than
-        the dimensionality of the KDE.
-
-        """
-
-        points = self._check_shape(points)
-        return self._eval_points(points, **kwds)
-
-    def _eval_points(self, points, **kwds):
-        pass
-
-    __call__ = eval_grid_fast
-
-
-class _KDE(object):
-
-    """ Kernel-Density Estimator base class.
-
-    Parameters
-    ----------
-    data : (# of dims, # of data)-array
-        datapoints to estimate from
-    hs : array-like (optional)
-        smooting parameter vector/matrix.
-        (default compute from data using kernel.get_smoothing function)
-    kernel :  kernel function object.
-        kernel must have get_smoothing method
-    alpha : real scalar (optional)
-        sensitivity parameter               (default 0 regular KDE)
-        A good choice might be alpha = 0.5 ( or 1/D)
-        alpha = 0      Regular  KDE (hs is constant)
-        0 < alpha <= 1 Adaptive KDE (Make hs change)
-
-    Members
-    -------
-    d : int
-        number of dimensions
-    n : int
-        number of datapoints
-
-    Methods
-    -------
-    kde.eval_grid_fast(x0, x1,..., xd) : array
-        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
-    kde.eval_grid(x0, x1,..., xd) : array
-        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
-    kde.eval_points(points) : array
-        evaluate the estimated pdf on a provided set of points
-    kde(x0, x1,..., xd) : array
-        same as kde.eval_grid(x0, x1,..., xd)
-    """
-
-    def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None,
-                 xmax=None, inc=512):
-        self.dataset = atleast_2d(data)
-        self.hs = hs
-        self.kernel = kernel if kernel else Kernel('gauss')
-        self.alpha = alpha
-        self.xmin = xmin
-        self.xmax = xmax
-        self.inc = inc
-        self.initialize()
-
-    def initialize(self):
-        self.d, self.n = self.dataset.shape
-        if self.n > 1:
-            self._set_xlimits()
-            self._initialize()
-
-    def _initialize(self):
-        pass
-
-    def _set_xlimits(self):
-        amin = self.dataset.min(axis=-1)
-        amax = self.dataset.max(axis=-1)
-        iqr = iqrange(self.dataset, axis=-1)
-        self._sigma = np.minimum(
-            np.std(self.dataset, axis=-1, ddof=1), iqr / 1.34)
-        # xyzrange = amax - amin
-        # offset = xyzrange / 4.0
-        offset = self._sigma
-        if self.xmin is None:
-            self.xmin = amin - offset
-        else:
-            self.xmin = self.xmin * np.ones((self.d, 1))
-        if self.xmax is None:
-            self.xmax = amax + offset
-        else:
-            self.xmax = self.xmax * np.ones((self.d, 1))
-
-    def get_args(self, xmin=None, xmax=None):
-        if xmin is None:
-            xmin = self.xmin
-        else:
-            xmin = [min(i, j) for i, j in zip(xmin, self.xmin)]
-        if xmax is None:
-            xmax = self.xmax
-        else:
-            xmax = [max(i, j) for i, j in zip(xmax, self.xmax)]
-        args = []
-        for i in range(self.d):
-            args.append(np.linspace(xmin[i], xmax[i], self.inc))
-        return args
-
-    def eval_grid_fast(self, *args, **kwds):
-        """Evaluate the estimated pdf on a grid.
-
-        Parameters
-        ----------
-        arg_0,arg_1,... arg_d-1 : vectors
-            Alternatively, if no vectors is passed in then
-             arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
-        output : string optional
-            'value' if value output
-            'data' if object output
-
-        Returns
-        -------
-        values : array-like
-            The values evaluated at meshgrid(*args).
-
-        """
-        if len(args) == 0:
-            args = self.get_args()
-        self.args = args
-        return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)
-
-    def _eval_grid_fast(self, *args, **kwds):
-        pass
-
-    def eval_grid(self, *args, **kwds):
-        """Evaluate the estimated pdf on a grid.
-
-        Parameters
-        ----------
-        arg_0,arg_1,... arg_d-1 : vectors
-            Alternatively, if no vectors is passed in then
-             arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
-        output : string optional
-            'value' if value output
-            'data' if object output
-
-        Returns
-        -------
-        values : array-like
-            The values evaluated at meshgrid(*args).
-
-        """
-        if len(args) == 0:
-            args = []
-            for i in range(self.d):
-                args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc))
-        self.args = args
-        return self._eval_grid_fun(self._eval_grid, *args, **kwds)
-
-    def _eval_grid(self, *args):
-        pass
-
-    def _eval_grid_fun(self, eval_grd, *args, **kwds):
-        output = kwds.pop('output', 'value')
-        f = eval_grd(*args, **kwds)
-        if output == 'value':
-            return f
-        else:
-            titlestr = 'Kernel density estimate (%s)' % self.kernel.name
-            kwds2 = dict(title=titlestr)
-
-            kwds2['plot_kwds'] = kwds.pop('plot_kwds', dict(plotflag=1))
-            kwds2.update(**kwds)
-            args = self.args
-            if self.d == 1:
-                args = args[0]
-            wdata = PlotData(f, args, **kwds2)
-            if self.d > 1:
-                PL = np.r_[10:90:20, 95, 99, 99.9]
-                try:
-                    ql = qlevels(f, p=PL)
-                    wdata.clevels = ql
-                    wdata.plevels = PL
-                except:
-                    pass
-            return wdata
-
-    def _check_shape(self, points):
-        points = atleast_2d(points)
-        d, m = points.shape
-        if d != self.d:
-            if d == 1 and m == self.d:
-                # points was passed in as a row vector
-                points = np.reshape(points, (self.d, 1))
-            else:
-                msg = "points have dimension %s, dataset has dimension %s"
-                raise ValueError(msg % (d, self.d))
-        return points
-
-    def eval_points(self, points, **kwds):
-        """Evaluate the estimated pdf on a set of points.
-
-        Parameters
-        ----------
-        points : (# of dimensions, # of points)-array
-            Alternatively, a (# of dimensions,) vector can be passed in and
-            treated as a single point.
-
-        Returns
-        -------
-        values : (# of points,)-array
-            The values at each point.
-
-        Raises
-        ------
-        ValueError if the dimensionality of the input points is different than
-        the dimensionality of the KDE.
-
-        """
-
-        points = self._check_shape(points)
-        return self._eval_points(points, **kwds)
-
-    def _eval_points(self, points, **kwds):
-        pass
-
-    __call__ = eval_grid
-
-
-class TKDE(_KDE):
-
-    """ Transformation Kernel-Density Estimator.
-
-    Parameters
-    ----------
-    dataset : (# of dims, # of data)-array
-        datapoints to estimate from
-    hs : array-like (optional)
-        smooting parameter vector/matrix.
-        (default compute from data using kernel.get_smoothing function)
-    kernel :  kernel function object.
-        kernel must have get_smoothing method
-    alpha : real scalar (optional)
-        sensitivity parameter               (default 0 regular KDE)
-        A good choice might be alpha = 0.5 ( or 1/D)
-        alpha = 0      Regular  KDE (hs is constant)
-        0 < alpha <= 1 Adaptive KDE (Make hs change)
-    xmin, xmax  : vectors
-        specifying the default argument range for the kde.eval_grid methods.
-        For the kde.eval_grid_fast methods the values must cover the range of
-        the data. (default min(data)-range(data)/4, max(data)-range(data)/4)
-        If a single value of xmin or xmax is given then the boundary is the is
-        the same for all dimensions.
-    inc :  scalar integer
-        defining the default dimension of the output from kde.eval_grid methods
-        (default 512)
-        (For kde.eval_grid_fast: A value below 50 is very fast to compute but
-        may give some inaccuracies. Values between 100 and 500 give very
-        accurate results)
-    L2 : array-like
-        vector of transformation parameters (default 1 no transformation)
-        t(xi;L2) = xi^L2*sign(L2)   for L2(i) ~= 0
-        t(xi;L2) = log(xi)          for L2(i) == 0
-        If single value of L2 is given then the transformation is the same in
-        all directions.
-
-    Members
-    -------
-    d : int
-        number of dimensions
-    n : int
-        number of datapoints
-
-    Methods
-    -------
-    kde.eval_grid_fast(x0, x1,..., xd) : array
-        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
-    kde.eval_grid(x0, x1,..., xd) : array
-        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
-    kde.eval_points(points) : array
-        evaluate the estimated pdf on a provided set of points
-    kde(x0, x1,..., xd) : array
-        same as kde.eval_grid(x0, x1,..., xd)
-
-    Example
-    -------
-    N = 20
-    data = np.random.rayleigh(1, size=(N,))
-    >>> data = np.array([
-    ...        0.75355792,  0.72779194,  0.94149169,  0.07841119,2.32291887,
-    ...        1.10419995,  0.77055114,  0.60288273,  1.36883635,  1.74754326,
-    ...        1.09547561,  1.01671133,  0.73211143,  0.61891719,  0.75903487,
-    ...        1.8919469 ,  0.72433808,  1.92973094,  0.44749838,  1.36508452])
-
-    >>> import wafo.kdetools as wk
-    >>> x = np.linspace(0.01, max(data.ravel()) + 1, 10)
-    >>> kde = wk.TKDE(data, hs=0.5, L2=0.5)
-    >>> f = kde(x)
-    >>> f
-    array([ 1.03982714,  0.45839018,  0.39514782,  0.32860602,  0.26433318,
-            0.20717946,  0.15907684,  0.1201074 ,  0.08941027,  0.06574882])
-
-    >>> kde.eval_grid(x)
-    array([ 1.03982714,  0.45839018,  0.39514782,  0.32860602,  0.26433318,
-            0.20717946,  0.15907684,  0.1201074 ,  0.08941027,  0.06574882])
-
-    >>> kde.eval_grid_fast(x)
-    array([ 1.04018924,  0.45838973,  0.39514689,  0.32860532,  0.26433301,
-            0.20717976,  0.15907697,  0.1201077 ,  0.08941129,  0.06574899])
-
-    import pylab as plb
-    h1 = plb.plot(x, f) #  1D probability density plot
-    t = np.trapz(f, x)
-    """
-
-    def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None,
-                 xmax=None, inc=512, L2=None):
-        self.L2 = L2
-        super(TKDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc)
-
-    def _initialize(self):
-        self._check_xmin()
-        tdataset = self._dat2gaus(self.dataset)
-        xmin = self.xmin
-        if xmin is not None:
-            xmin = self._dat2gaus(np.reshape(xmin, (-1, 1)))
-        xmax = self.xmax
-        if xmax is not None:
-            xmax = self._dat2gaus(np.reshape(xmax, (-1, 1)))
-        self.tkde = KDE(tdataset, self.hs, self.kernel, self.alpha, xmin, xmax,
-                        self.inc)
-        if self.inc is None:
-            self.inc = self.tkde.inc
-
-    def _check_xmin(self):
-        if self.L2 is not None:
-            amin = self.dataset.min(axis=-1)
-            # default no transformation
-            L2 = np.atleast_1d(self.L2) * np.ones(self.d)
-            self.xmin = np.where(L2 != 1, np.maximum(
-                self.xmin, amin / 100.0), self.xmin).reshape((-1, 1))
-
-    def _dat2gaus(self, points):
-        if self.L2 is None:
-            return points  # default no transformation
-
-        # default no transformation
-        L2 = np.atleast_1d(self.L2) * np.ones(self.d)
-
-        tpoints = copy.copy(points)
-        for i, v2 in enumerate(L2.tolist()):
-            tpoints[i] = np.log(points[i]) if v2 == 0 else points[i] ** v2
-        return tpoints
-
-    def _gaus2dat(self, tpoints):
-        if self.L2 is None:
-            return tpoints  # default no transformation
-
-        # default no transformation
-        L2 = np.atleast_1d(self.L2) * np.ones(self.d)
-
-        points = copy.copy(tpoints)
-        for i, v2 in enumerate(L2.tolist()):
-            points[i] = np.exp(
-                tpoints[i]) if v2 == 0 else tpoints[i] ** (1.0 / v2)
-        return points
-
-    def _scale_pdf(self, pdf, points):
-        if self.L2 is None:
-            return pdf
-        # default no transformation
-        L2 = np.atleast_1d(self.L2) * np.ones(self.d)
-        for i, v2 in enumerate(L2.tolist()):
-            factor = v2 * np.sign(v2) if v2 else 1
-            pdf *= np.where(v2 == 1, 1, points[i] ** (v2 - 1) * factor)
-        if (np.abs(np.diff(pdf)).max() > 10).any():
-            msg = ''' Numerical problems may have occured due to the power
-                    transformation. Check the KDE for spurious spikes'''
-            warnings.warn(msg)
-        return pdf
-
-    def eval_grid_fast2(self, *args, **kwds):
-        """Evaluate the estimated pdf on a grid.
-
-        Parameters
-        ----------
-        arg_0,arg_1,... arg_d-1 : vectors
-           Alternatively, if no vectors is passed in then
-            arg_i = gauss2dat(linspace(dat2gauss(self.xmin[i]),
-                                       dat2gauss(self.xmax[i]), self.inc))
-        output : string optional
-            'value' if value output
-            'data' if object output
-
-        Returns
-        -------
-        values : array-like
-           The values evaluated at meshgrid(*args).
-
-        """
-        return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)
-
-    def _eval_grid_fast(self, *args, **kwds):
-        if self.L2 is None:
-            f = self.tkde.eval_grid_fast(*args, **kwds)
-            self.args = self.tkde.args
-            return f
-        targs = []
-        if len(args):
-            targs0 = self._dat2gaus(list(args))
-            xmin = [min(t) for t in targs0]
-            xmax = [max(t) for t in targs0]
-            targs = self.tkde.get_args(xmin, xmax)
-        tf = self.tkde.eval_grid_fast(*targs)
-        self.args = self._gaus2dat(list(self.tkde.args))
-        points = meshgrid(*self.args) if self.d > 1 else self.args
-        f = self._scale_pdf(tf, points)
-        if len(args):
-            ipoints = meshgrid(*args) if self.d > 1 else args
-            # shape0 = points[0].shape
-            # shape0i = ipoints[0].shape
-            for i in range(self.d):
-                points[i].shape = (-1,)
-                # ipoints[i].shape = (-1,)
-            points = np.asarray(points).T
-            # ipoints = np.asarray(ipoints).T
-            fi = interpolate.griddata(points, f.ravel(), tuple(ipoints),
-                                      method='linear',
-                                      fill_value=0.0)
-            # fi.shape = shape0i
-            self.args = args
-            r = kwds.get('r', 0)
-            if r == 0:
-                return fi * (fi > 0)
-            else:
-                return fi
-        return f
-
-    def _eval_grid(self, *args, **kwds):
-        if self.L2 is None:
-            return self.tkde.eval_grid(*args, **kwds)
-        targs = self._dat2gaus(list(args))
-        tf = self.tkde.eval_grid(*targs, **kwds)
-        points = meshgrid(*args) if self.d > 1 else self.args
-        f = self._scale_pdf(tf, points)
-        return f
-
-    def _eval_points(self, points):
-        """Evaluate the estimated pdf on a set of points.
-
-        Parameters
-        ----------
-        points : (# of dimensions, # of points)-array
-            Alternatively, a (# of dimensions,) vector can be passed in and
-            treated as a single point.
-
-        Returns
-        -------
-        values : (# of points,)-array
-            The values at each point.
-
-        Raises
-        ------
-        ValueError if the dimensionality of the input points is different than
-        the dimensionality of the KDE.
-
-        """
-        if self.L2 is None:
-            return self.tkde.eval_points(points)
-
-        tpoints = self._dat2gaus(points)
-        tf = self.tkde.eval_points(tpoints)
-        f = self._scale_pdf(tf, points)
-        return f
-
-
-class KDE(_KDE):
-
-    """ Kernel-Density Estimator.
-
-    Parameters
-    ----------
-    data : (# of dims, # of data)-array
-        datapoints to estimate from
-    hs : array-like (optional)
-        smooting parameter vector/matrix.
-        (default compute from data using kernel.get_smoothing function)
-    kernel :  kernel function object.
-        kernel must have get_smoothing method
-    alpha : real scalar (optional)
-        sensitivity parameter               (default 0 regular KDE)
-        A good choice might be alpha = 0.5 ( or 1/D)
-        alpha = 0      Regular  KDE (hs is constant)
-        0 < alpha <= 1 Adaptive KDE (Make hs change)
-    xmin, xmax  : vectors
-        specifying the default argument range for the kde.eval_grid methods.
-        For the kde.eval_grid_fast methods the values must cover the range of
-        the data.
-        (default min(data)-range(data)/4, max(data)-range(data)/4)
-        If a single value of xmin or xmax is given then the boundary is the is
-        the same for all dimensions.
-    inc :  scalar integer (default 512)
-        defining the default dimension of the output from kde.eval_grid methods
-        (For kde.eval_grid_fast: A value below 50 is very fast to compute but
-        may give some inaccuracies. Values between 100 and 500 give very
-        accurate results)
-
-    Members
-    -------
-    d : int
-        number of dimensions
-    n : int
-        number of datapoints
-
-    Methods
-    -------
-    kde.eval_grid_fast(x0, x1,..., xd) : array
-        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
-    kde.eval_grid(x0, x1,..., xd) : array
-        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
-    kde.eval_points(points) : array
-        evaluate the estimated pdf on a provided set of points
-    kde(x0, x1,..., xd) : array
-        same as kde.eval_grid(x0, x1,..., xd)
-
-    Example
-    -------
-    N = 20
-    data = np.random.rayleigh(1, size=(N,))
-    >>> data = np.array([
-    ...        0.75355792,  0.72779194,  0.94149169,  0.07841119,  2.32291887,
-    ...        1.10419995,  0.77055114,  0.60288273,  1.36883635,  1.74754326,
-    ...        1.09547561,  1.01671133,  0.73211143,  0.61891719,  0.75903487,
-    ...        1.8919469 ,  0.72433808,  1.92973094,  0.44749838,  1.36508452])
-
-    >>> x = np.linspace(0, max(data.ravel()) + 1, 10)
-    >>> import wafo.kdetools as wk
-    >>> kde = wk.KDE(data, hs=0.5, alpha=0.5)
-    >>> f = kde(x)
-    >>> f
-    array([ 0.17252055,  0.41014271,  0.61349072,  0.57023834,  0.37198073,
-            0.21409279,  0.12738463,  0.07460326,  0.03956191,  0.01887164])
-
-    >>> kde.eval_grid(x)
-    array([ 0.17252055,  0.41014271,  0.61349072,  0.57023834,  0.37198073,
-            0.21409279,  0.12738463,  0.07460326,  0.03956191,  0.01887164])
-
-    >>> kde0 = wk.KDE(data, hs=0.5, alpha=0.0)
-    >>> kde0.eval_points(x)
-    array([ 0.2039735 ,  0.40252503,  0.54595078,  0.52219649,  0.3906213 ,
-            0.26381501,  0.16407362,  0.08270612,  0.02991145,  0.00720821])
-
-    >>> kde0.eval_grid(x)
-    array([ 0.2039735 ,  0.40252503,  0.54595078,  0.52219649,  0.3906213 ,
-            0.26381501,  0.16407362,  0.08270612,  0.02991145,  0.00720821])
-    >>> f = kde0.eval_grid(x, output='plotobj')
-    >>> f.data
-    array([ 0.2039735 ,  0.40252503,  0.54595078,  0.52219649,  0.3906213 ,
-            0.26381501,  0.16407362,  0.08270612,  0.02991145,  0.00720821])
-
-    >>> f = kde0.eval_grid_fast()
-    >>> np.allclose(np.interp(x, kde0.args[0], f),
-    ...    [ 0.20397743,  0.40252228,  0.54594119,  0.52219025,  0.39062189,
-    ...      0.2638171 ,  0.16407487,  0.08270755,  0.04784434,  0.04784434])
-    True
-    >>> f1 = kde0.eval_grid_fast(output='plot')
-    >>> np.allclose(np.interp(x, f1.args, f1.data),
-    ...   [ 0.20397743,  0.40252228,  0.54594119,  0.52219025,  0.39062189,
-    ...     0.2638171 ,  0.16407487,  0.08270755,  0.04784434,  0.04784434])
-    True
-
-    h = f1.plot()
-    import pylab as plb
-    h1 = plb.plot(x, f) #  1D probability density plot
-    t = np.trapz(f, x)
-    """
-
-    def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None,
-                 xmax=None, inc=512):
-        super(KDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc)
-
-    def _initialize(self):
-        self._compute_smoothing()
-        self._lambda = np.ones(self.n)
-        if self.alpha > 0:
-            # pilt = KDE(self.dataset, hs=self.hs, kernel=self.kernel, alpha=0)
-            # f = pilt.eval_points(self.dataset) # get a pilot estimate by
-            # regular KDE (alpha=0)
-            f = self.eval_points(self.dataset)  # pilot estimate
-            g = np.exp(np.mean(np.log(f)))
-            self._lambda = (f / g) ** (-self.alpha)
-
-        if self.inc is None:
-            unused_tau, tau = self.kernel.effective_support()
-            xyzrange = 8 * self._sigma
-            L1 = 10
-            self.inc = 2 ** nextpow2(
-                max(48, (L1 * xyzrange / (tau * self.hs)).max()))
-            pass
-
-    def _compute_smoothing(self):
-        """Computes the smoothing matrix."""
-        get_smoothing = self.kernel.get_smoothing
-        h = self.hs
-        if h is None:
-            h = get_smoothing(self.dataset)
-        h = np.atleast_1d(h)
-        hsiz = h.shape
-
-        if (len(hsiz) == 1) or (self.d == 1):
-            if max(hsiz) == 1:
-                h = h * np.ones(self.d)
-            else:
-                h.shape = (self.d,)  # make sure it has the correct dimension
-
-            # If h negative calculate automatic values
-            ind, = np.where(h <= 0)
-            for i in ind.tolist():
-                h[i] = get_smoothing(self.dataset[i])
-            deth = h.prod()
-            self.inv_hs = np.diag(1.0 / h)
-        else:  # fully general smoothing matrix
-            deth = linalg.det(h)
-            if deth <= 0:
-                raise ValueError(
-                    'bandwidth matrix h must be positive definit!')
-            self.inv_hs = linalg.inv(h)
-        self.hs = h
-        self._norm_factor = deth * self.n
-
-    def _eval_grid_fast(self, *args, **kwds):
-        X = np.vstack(args)
-        d, inc = X.shape
-        dx = X[:, 1] - X[:, 0]
-
-        Xn = []
-        nfft0 = 2 * inc
-        nfft = (nfft0,) * d
-        x0 = np.linspace(-inc, inc, nfft0 + 1)
-        for i in range(d):
-            Xn.append(x0[:-1] * dx[i])
-
-        Xnc = meshgrid(*Xn) if d > 1 else Xn
-
-        shape0 = Xnc[0].shape
-        for i in range(d):
-            Xnc[i].shape = (-1,)
-
-        Xn = np.dot(self.inv_hs, np.vstack(Xnc))
-
-        # Obtain the kernel weights.
-        kw = self.kernel(Xn)
-
-        # plt.plot(kw)
-        # plt.draw()
-        # plt.show()
-        norm_fact0 = (kw.sum() * dx.prod() * self.n)
-        norm_fact = (self._norm_factor * self.kernel.norm_factor(d, self.n))
-        if np.abs(norm_fact0 - norm_fact) > 0.05 * norm_fact:
-            warnings.warn(
-                'Numerical inaccuracy due to too low discretization. ' +
-                'Increase the discretization of the evaluation grid ' +
-                '(inc=%d)!' % inc)
-            norm_fact = norm_fact0
-
-        kw = kw / norm_fact
-        r = kwds.get('r', 0)
-        if r != 0:
-            kw *= np.vstack(Xnc) ** r if d > 1 else Xnc[0]
-        kw.shape = shape0
-        kw = np.fft.ifftshift(kw)
-        fftn = np.fft.fftn
-        ifftn = np.fft.ifftn
-
-        y = kwds.get('y', 1.0)
-        # if self.alpha>0:
-        #    y = y / self._lambda**d
-
-        # Find the binned kernel weights, c.
-        c = gridcount(self.dataset, X, y=y)
-        # Perform the convolution.
-        z = np.real(ifftn(fftn(c, s=nfft) * fftn(kw)))
-
-        ix = (slice(0, inc),) * d
-        if r == 0:
-            return z[ix] * (z[ix] > 0.0)
-        else:
-            return z[ix]
-
-    def _eval_grid(self, *args, **kwds):
-
-        grd = meshgrid(*args) if len(args) > 1 else list(args)
-        shape0 = grd[0].shape
-        d = len(grd)
-        for i in range(d):
-            grd[i] = grd[i].ravel()
-        f = self.eval_points(np.vstack(grd), **kwds)
-        return f.reshape(shape0)
-
-    def _eval_points(self, points, **kwds):
-        """Evaluate the estimated pdf on a set of points.
-
-        Parameters
-        ----------
-        points : (# of dimensions, # of points)-array
-            Alternatively, a (# of dimensions,) vector can be passed in and
-            treated as a single point.
-
-        Returns
-        -------
-        values : (# of points,)-array
-            The values at each point.
-
-        Raises
-        ------
-        ValueError if the dimensionality of the input points is different than
-        the dimensionality of the KDE.
-
-        """
-        d, m = points.shape
-
-        result = np.zeros((m,))
-
-        r = kwds.get('r', 0)
-        if r == 0:
-            def fun(xi):
-                return 1
-        else:
-            def fun(xi):
-                return (xi ** r).sum(axis=0)
-
-        if m >= self.n:
-            y = kwds.get('y', np.ones(self.n))
-            # there are more points than data, so loop over data
-            for i in range(self.n):
-                diff = self.dataset[:, i, np.newaxis] - points
-                tdiff = np.dot(self.inv_hs / self._lambda[i], diff)
-                result += y[i] * \
-                    fun(diff) * self.kernel(tdiff) / self._lambda[i] ** d
-        else:
-            y = kwds.get('y', 1)
-            # loop over points
-            for i in range(m):
-                diff = self.dataset - points[:, i, np.newaxis]
-                tdiff = np.dot(self.inv_hs, diff / self._lambda[np.newaxis, :])
-                tmp = y * fun(diff) * self.kernel(tdiff) / self._lambda ** d
-                result[i] = tmp.sum(axis=-1)
-
-        result /= (self._norm_factor * self.kernel.norm_factor(d, self.n))
-
-        return result
-
-
-class KRegression(_KDE):
-
-    """ Kernel-Regression
-
-    Parameters
-    ----------
-    data : (# of dims, # of data)-array
-        datapoints to estimate from
-    y : # of data - array
-        response variable
-    p : scalar integer (0 or 1)
-        Nadaraya-Watson estimator if p=0,
-        local linear estimator if p=1.
-    hs : array-like (optional)
-        smooting parameter vector/matrix.
-        (default compute from data using kernel.get_smoothing function)
-    kernel :  kernel function object.
-        kernel must have get_smoothing method
-    alpha : real scalar (optional)
-        sensitivity parameter               (default 0 regular KDE)
-        A good choice might be alpha = 0.5 ( or 1/D)
-        alpha = 0      Regular  KDE (hs is constant)
-        0 < alpha <= 1 Adaptive KDE (Make hs change)
-    xmin, xmax  : vectors
-        specifying the default argument range for the kde.eval_grid methods.
-        For the kde.eval_grid_fast methods the values must cover the range of
-        the data. (default min(data)-range(data)/4, max(data)-range(data)/4)
-        If a single value of xmin or xmax is given then the boundary is the is
-        the same for all dimensions.
-    inc :  scalar integer   (default 128)
-        defining the default dimension of the output from kde.eval_grid methods
-        (For kde.eval_grid_fast: A value below 50 is very fast to compute but
-        may give some inaccuracies. Values between 100 and 500 give very
-        accurate results)
-
-    Members
-    -------
-    d : int
-        number of dimensions
-    n : int
-        number of datapoints
-
-    Methods
-    -------
-    kde.eval_grid_fast(x0, x1,..., xd) : array
-        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
-    kde.eval_grid(x0, x1,..., xd) : array
-        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
-    kde.eval_points(points) : array
-        evaluate the estimated pdf on a provided set of points
-    kde(x0, x1,..., xd) : array
-        same as kde.eval_grid(x0, x1,..., xd)
-
-
-    Example
-    -------
-    >>> import wafo.kdetools as wk
-    >>> N = 100
-    >>> x = np.linspace(0, 1, N)
-    >>> ei = np.random.normal(loc=0, scale=0.075, size=(N,))
-    >>> ei = np.sqrt(0.075) * np.sin(100*x)
-
-    >>> y = 2*np.exp(-x**2/(2*0.3**2))+3*np.exp(-(x-1)**2/(2*0.7**2)) + ei
-    >>> kreg = wk.KRegression(x, y)
-    >>> f = kreg(output='plotobj', title='Kernel regression', plotflag=1)
-    >>> np.allclose(f.data[:5],
-    ...     [ 3.18381052,  3.18362269,  3.18343648,  3.1832536 ,  3.1830757 ])
-    True
-
-    h = f.plot(label='p=0')
-    """
-
-    def __init__(self, data, y, p=0, hs=None, kernel=None, alpha=0.0,
-                 xmin=None, xmax=None, inc=128, L2=None):
-
-        self.tkde = TKDE(data, hs=hs, kernel=kernel,
-                         alpha=alpha, xmin=xmin, xmax=xmax, inc=inc, L2=L2)
-        self.y = y
-        self.p = p
-
-    def eval_grid_fast(self, *args, **kwds):
-        self._grdfun = self.tkde.eval_grid_fast
-        return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds)
-
-    def eval_grid(self, *args, **kwds):
-        self._grdfun = self.tkde.eval_grid
-        return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds)
-
-    def _eval_gridfun(self, *args, **kwds):
-        grdfun = self._grdfun
-        s0 = grdfun(*args, r=0)
-        t0 = grdfun(*args, r=0, y=self.y)
-        if self.p == 0:
-            return (t0 / (s0 + _TINY)).clip(min=-_REALMAX, max=_REALMAX)
-        elif self.p == 1:
-            s1 = grdfun(*args, r=1)
-            s2 = grdfun(*args, r=2)
-            t1 = grdfun(*args, r=1, y=self.y)
-            return ((s2 * t0 - s1 * t1) /
-                    (s2 * s0 - s1 ** 2)).clip(min=-_REALMAX, max=_REALMAX)
-    __call__ = eval_grid_fast
-
-
-class BKRegression(object):
-
-    '''
-    Kernel-Regression on binomial data
-
-    method : {'beta', 'wilson'}
-        method is one of the following
-        'beta', return Bayesian Credible interval using beta-distribution.
-        'wilson', return Wilson score interval
-    a, b : scalars
-        parameters of the beta distribution defining the apriori distribution
-        of p, i.e., the Bayes estimator for p: p = (y+a)/(n+a+b).
-        Setting a=b=0.5 gives Jeffreys interval.
-    '''
-
-    def __init__(self, *args, **kwds):
-        self.method = kwds.pop('method', 'beta')
-        self.a = max(kwds.pop('a', 0.5), _TINY)
-        self.b = max(kwds.pop('b', 0.5), _TINY)
-        self.kreg = KRegression(*args, **kwds)
-        # defines bin width (i.e. smoothing) in empirical estimate
-        self.hs_e = None
-#        self.x = self.kreg.tkde.dataset
-#        self.y = self.kreg.y
-
-    def _set_smoothing(self, hs):
-        self.kreg.tkde.hs = hs
-        self.kreg.tkde.initialize()
-
-    x = property(fget=lambda cls: cls.kreg.tkde.dataset.squeeze())
-    y = property(fget=lambda cls: cls.kreg.y)
-    kernel = property(fget=lambda cls: cls.kreg.tkde.kernel)
-    hs = property(fset=_set_smoothing, fget=lambda cls: cls.kreg.tkde.hs)
-
-    def _get_max_smoothing(self, fun=None):
-        """Return maximum value for smoothing parameter."""
-        x = self.x
-        y = self.y
-        if fun is None:
-            get_smoothing = self.kernel.get_smoothing
-        else:
-            get_smoothing = getattr(self.kernel, fun)
-
-        hs1 = get_smoothing(x)
-        # hx = np.median(np.abs(x-np.median(x)))/0.6745*(4.0/(3*n))**0.2
-        if (y == 1).any():
-            hs2 = get_smoothing(x[y == 1])
-            # hy = np.median(np.abs(y-np.mean(y)))/0.6745*(4.0/(3*n))**0.2
-        else:
-            hs2 = 4 * hs1
-            # hy = 4*hx
-
-        hopt = sqrt(hs1 * hs2)
-        return hopt, hs1, hs2
-
-    def get_grid(self, hs_e=None):
-        if hs_e is None:
-            if self.hs_e is None:
-                hs1 = self._get_max_smoothing('hste')[0]
-                hs2 = self._get_max_smoothing('hos')[0]
-                self.hs_e = sqrt(hs1 * hs2)
-            hs_e = self.hs_e
-        x = self.x
-        xmin, xmax = x.min(), x.max()
-        ni = max(2 * int((xmax - xmin) / hs_e) + 3, 5)
-        sml = hs_e  # *0.1
-        xi = np.linspace(xmin - sml, xmax + sml, ni)
-        return xi
-
-    def prb_ci(self, n, p, alpha=0.05, **kwds):
-        """Return Confidence Interval for the binomial probability p.
-
-        Parameters
-        ----------
-        n : array-like
-            number of Bernoulli trials
-        p : array-like
-            estimated probability of success in each trial
-        alpha : scalar
-            confidence level
-        method : {'beta', 'wilson'}
-            method is one of the following
-            'beta', return Bayesian Credible interval using beta-distribution.
-            'wilson', return Wilson score interval
-        a, b : scalars
-            parameters of the beta distribution defining the apriori
-            distribution of p, i.e.,
-            the Bayes estimator for p: p = (y+a)/(n+a+b).
-            Setting a=b=0.5 gives Jeffreys interval.
-
-        """
-        if self.method.startswith('w'):
-            # Wilson score
-            z0 = -_invnorm(alpha / 2)
-            den = 1 + (z0 ** 2. / n)
-            xc = (p + (z0 ** 2) / (2 * n)) / den
-            halfwidth = (z0 * sqrt((p * (1 - p) / n) +
-                                   (z0 ** 2 / (4 * (n ** 2))))) / den
-            plo = (xc - halfwidth).clip(min=0)  # wilson score
-            pup = (xc + halfwidth).clip(max=1.0)  # wilson score
-        else:
-            # Jeffreys intervall a=b=0.5
-            # st.beta.isf(alpha/2, y+a, n-y+b) y = n*p, n-y = n*(1-p)
-            a = self.a
-            b = self.b
-            st = stats
-            pup = np.where(
-                p == 1, 1, st.beta.isf(alpha / 2, n * p + a, n * (1 - p) + b))
-            plo = np.where(p == 0, 0,
-                           st.beta.isf(1 - alpha / 2,
-                                       n * p + a, n * (1 - p) + b))
-        return plo, pup
-
-    def prb_empirical(self, xi=None, hs_e=None, alpha=0.05, color='r', **kwds):
-        """Returns empirical binomial probabiltity.
-
-        Parameters
-        ----------
-        x : ndarray
-            position vector
-        y : ndarray
-            binomial response variable (zeros and ones)
-        alpha : scalar
-            confidence level
-        color:
-            used in plot
-
-        Returns
-        -------
-        P(x) : PlotData object
-            empirical probability
-
-        """
-        if xi is None:
-            xi = self.get_grid(hs_e)
-
-        x = self.x
-        y = self.y
-
-        c = gridcount(x, xi)  # + self.a + self.b # count data
-        if (y == 1).any():
-            c0 = gridcount(x[y == 1], xi)  # + self.a # count success
-        else:
-            c0 = np.zeros(xi.shape)
-        prb = np.where(c == 0, 0, c0 / (c + _TINY))  # assume prb==0 for c==0
-        CI = np.vstack(self.prb_ci(c, prb, alpha, **kwds))
-
-        prb_e = PlotData(prb, xi, plotmethod='plot', plot_args=['.'],
-                         plot_kwds=dict(markersize=6, color=color, picker=5))
-        prb_e.dataCI = CI.T
-        prb_e.count = c
-        return prb_e
-
-    def prb_smoothed(self, prb_e, hs, alpha=0.05, color='r', label=''):
-        """Return smoothed binomial probability.
-
-        Parameters
-        ----------
-        prb_e : PlotData object with empirical binomial probabilites
-        hs : smoothing parameter
-        alpha : confidence level
-        color : color of plot object
-        label : label for plot object
-
-        """
-
-        x_e = prb_e.args
-        n_e = len(x_e)
-        dx_e = x_e[1] - x_e[0]
-        n = self.x.size
-
-        x_s = np.linspace(x_e[0], x_e[-1], 10 * n_e + 1)
-        self.hs = hs
-
-        prb_s = self.kreg(x_s, output='plotobj', title='', plot_kwds=dict(
-            color=color, linewidth=2))  # dict(plotflag=7))
-        m_nan = np.isnan(prb_s.data)
-        if m_nan.any():  # assume 0/0 division
-            prb_s.data[m_nan] = 0.0
-
-        # prb_s.data[np.isnan(prb_s.data)] = 0
-        # expected number of data in each bin
-        c_s = self.kreg.tkde.eval_grid_fast(x_s) * dx_e * n
-        plo, pup = self.prb_ci(c_s, prb_s.data, alpha)
-
-        prb_s.dataCI = np.vstack((plo, pup)).T
-        prb_s.prediction_error_avg = np.trapz(
-            pup - plo, x_s) / (x_s[-1] - x_s[0])
-
-        if label:
-            prb_s.plot_kwds['label'] = label
-        prb_s.children = [PlotData([plo, pup], x_s,
-                                   plotmethod='fill_between',
-                                   plot_kwds=dict(alpha=0.2, color=color)),
-                          prb_e]
-
-        # empirical oversmooths the data
-#        p_s = prb_s.eval_points(self.x)
-#        dp_s = np.diff(prb_s.data)
-# k = (dp_s[:-1]*dp_s[1:]<0).sum() # numpeaks
-#        p_e = self.y
-#        n_s = interpolate.interp1d(x_s, c_s)(self.x)
-#        plo, pup = self.prb_ci(n_s, p_s, alpha)
-#        sigmai = (pup-plo)
-#        aicc = (((p_e-p_s)/sigmai)**2).sum()+ 2*k*(k+1)/np.maximum(n-k+1,1)
-
-        p_e = prb_e.eval_points(x_s)
-        p_s = prb_s.data
-        dp_s = np.sign(np.diff(p_s))
-        k = (dp_s[:-1] != dp_s[1:]).sum()  # numpeaks
-
-        # sigmai = (pup-plo)+_EPS
-        # aicc = (((p_e-p_s)/sigmai)**2).sum()+ 2*k*(k+1)/np.maximum(n_e-k+1,1)
-        # + np.abs((p_e-pup).clip(min=0)-(p_e-plo).clip(max=0)).sum()
-        sigmai = _logit(pup) - _logit(plo) + _EPS
-        aicc = ((((_logit(p_e) - _logit(p_s)) / sigmai) ** 2).sum() +
-                2 * k * (k + 1) / np.maximum(n_e - k + 1, 1) +
-                np.abs((p_e - pup).clip(min=0) -
-                       (p_e - plo).clip(max=0)).sum())
-
-        prb_s.aicc = aicc
-        # prb_s.labels.title = ''
-        # prb_s.labels.title='perr=%1.3f,aicc=%1.3f, n=%d, hs=%1.3f' %
-        # (prb_s.prediction_error_avg,aicc,n,hs)
-
-        return prb_s
-
-    def prb_search_best(self, prb_e=None, hsvec=None, hsfun='hste',
-                        alpha=0.05, color='r', label=''):
-        """Return best smoothed binomial probability.
-
-        Parameters
-        ----------
-        prb_e : PlotData object with empirical binomial probabilites
-        hsvec : arraylike  (default np.linspace(hsmax*0.1,hsmax,55))
-            vector smoothing parameters
-        hsfun :
-            method for calculating hsmax
-
-        """
-        if prb_e is None:
-            prb_e = self.prb_empirical(
-                hs_e=self.hs_e, alpha=alpha, color=color)
-        if hsvec is None:
-            hsmax = self._get_max_smoothing(hsfun)[0]  # @UnusedVariable
-            hsmax = max(hsmax, self.hs_e)
-            hsvec = np.linspace(hsmax * 0.2, hsmax, 55)
-
-        hs_best = hsvec[-1] + 0.1
-        prb_best = self.prb_smoothed(prb_e, hs_best, alpha, color, label)
-        aicc = np.zeros(np.size(hsvec))
-        for i, hi in enumerate(hsvec):
-            f = self.prb_smoothed(prb_e, hi, alpha, color, label)
-            aicc[i] = f.aicc
-            if f.aicc <= prb_best.aicc:
-                prb_best = f
-                hs_best = hi
-        prb_best.score = PlotData(aicc, hsvec)
-        prb_best.hs = hs_best
-        self._set_smoothing(hs_best)
-        return prb_best
-
-
-class _Kernel(object):
-
-    def __init__(self, r=1.0, stats=None):
-        self.r = r  # radius of kernel
-        self.stats = stats
-
-    def norm_factor(self, d=1, n=None):
-        return 1.0
-
-    def norm_kernel(self, x):
-        X = np.atleast_2d(x)
-        return self._kernel(X) / self.norm_factor(*X.shape)
-
-    def kernel(self, x):
-        return self._kernel(np.atleast_2d(x))
-
-    def deriv4_6_8_10(self, t, numout=4):
-        raise Exception('Method not implemented for this kernel!')
-
-    def effective_support(self):
-        """Return the effective support of kernel.
-
-        The kernel must be symmetric and compactly supported on [-tau tau]
-        if the kernel has infinite support then the kernel must have the
-        effective support in [-tau tau], i.e., be negligible outside the range
-
-        """
-        return self._effective_support()
-
-    def _effective_support(self):
-        return - self.r, self.r
-    __call__ = kernel
-
-
-class _KernelMulti(_Kernel):
-    # p=0;  %Sphere = rect for 1D
-    # p=1;  %Multivariate Epanechnikov kernel.
-    # p=2;  %Multivariate Bi-weight Kernel
-    # p=3;  %Multi variate Tri-weight Kernel
-    # p=4;  %Multi variate Four-weight Kernel
-
-    def __init__(self, r=1.0, p=1, stats=None):
-        self.r = r
-        self.p = p
-        self.stats = stats
-
-    def norm_factor(self, d=1, n=None):
-        r = self.r
-        p = self.p
-        c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(d, r) / np.prod(
-            np.r_[(d + 2):(2 * p + d + 1):2])  # normalizing constant
-        return c
-
-    def _kernel(self, x):
-        r = self.r
-        p = self.p
-        x2 = x ** 2
-        return ((1.0 - x2.sum(axis=0) / r ** 2).clip(min=0.0)) ** p
-
-mkernel_epanechnikov = _KernelMulti(p=1, stats=_stats_epan)
-mkernel_biweight = _KernelMulti(p=2, stats=_stats_biwe)
-mkernel_triweight = _KernelMulti(p=3, stats=_stats_triw)
-
-
-class _KernelProduct(_KernelMulti):
-    # p=0;  %rectangular
-    # p=1;  %1D product Epanechnikov kernel.
-    # p=2;  %1D product Bi-weight Kernel
-    # p=3;  %1D product Tri-weight Kernel
-    # p=4;  %1D product Four-weight Kernel
-
-    def norm_factor(self, d=1, n=None):
-        r = self.r
-        p = self.p
-        c = (2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(1, r) /
-             np.prod(np.r_[(1 + 2):(2 * p + 2):2]))
-        return c ** d
-
-    def _kernel(self, x):
-        r = self.r  # radius
-        pdf = (1 - (x / r) ** 2).clip(min=0.0)
-        return pdf.prod(axis=0)
-
-mkernel_p1epanechnikov = _KernelProduct(p=1, stats=_stats_epan)
-mkernel_p1biweight = _KernelProduct(p=2, stats=_stats_biwe)
-mkernel_p1triweight = _KernelProduct(p=3, stats=_stats_triw)
-
-
-class _KernelRectangular(_Kernel):
-
-    def _kernel(self, x):
-        return np.where(np.all(np.abs(x) <= self.r, axis=0), 1, 0.0)
-
-    def norm_factor(self, d=1, n=None):
-        r = self.r
-        return (2 * r) ** d
-mkernel_rectangular = _KernelRectangular(stats=_stats_rect)
-
-
-class _KernelTriangular(_Kernel):
-
-    def _kernel(self, x):
-        pdf = (1 - np.abs(x)).clip(min=0.0)
-        return pdf.prod(axis=0)
-mkernel_triangular = _KernelTriangular(stats=_stats_tria)
-
-
-class _KernelGaussian(_Kernel):
-
-    def _kernel(self, x):
-        sigma = self.r / 4.0
-        x2 = (x / sigma) ** 2
-        return exp(-0.5 * x2.sum(axis=0))
-
-    def norm_factor(self, d=1, n=None):
-        sigma = self.r / 4.0
-        return (2 * pi * sigma) ** (d / 2.0)
-
-    def deriv4_6_8_10(self, t, numout=4):
-        """Returns 4th, 6th, 8th and 10th derivatives of the kernel
-        function."""
-        phi0 = exp(-0.5 * t ** 2) / sqrt(2 * pi)
-        p4 = [1, 0, -6, 0, +3]
-        p4val = np.polyval(p4, t) * phi0
-        if numout == 1:
-            return p4val
-        out = [p4val]
-        pn = p4
-        for unusedix in range(numout - 1):
-            pnp1 = np.polyadd(-np.r_[pn, 0], np.polyder(pn))
-            pnp2 = np.polyadd(-np.r_[pnp1, 0], np.polyder(pnp1))
-            out.append(np.polyval(pnp2, t) * phi0)
-            pn = pnp2
-        return out
-
-mkernel_gaussian = _KernelGaussian(r=4.0, stats=_stats_gaus)
-
-# def mkernel_gaussian(X):
-#    x2 = X ** 2
-#    d = X.shape[0]
-#    return (2 * pi) ** (-d / 2) * exp(-0.5 * x2.sum(axis=0))
-
-
-class _KernelLaplace(_Kernel):
-
-    def _kernel(self, x):
-        absX = np.abs(x)
-        return exp(-absX.sum(axis=0))
-
-    def norm_factor(self, d=1, n=None):
-        return 2 ** d
-mkernel_laplace = _KernelLaplace(r=7.0, stats=_stats_lapl)
-
-
-class _KernelLogistic(_Kernel):
-
-    def _kernel(self, x):
-        s = exp(-x)
-        return np.prod(1.0 / (s + 1) ** 2, axis=0)
-mkernel_logistic = _KernelLogistic(r=7.0, stats=_stats_logi)
-
-_MKERNEL_DICT = dict(
-    epan=mkernel_epanechnikov,
-    biwe=mkernel_biweight,
-    triw=mkernel_triweight,
-    p1ep=mkernel_p1epanechnikov,
-    p1bi=mkernel_p1biweight,
-    p1tr=mkernel_p1triweight,
-    rect=mkernel_rectangular,
-    tria=mkernel_triangular,
-    lapl=mkernel_laplace,
-    logi=mkernel_logistic,
-    gaus=mkernel_gaussian
-)
-_KERNEL_EXPONENT_DICT = dict(
-    re=0, sp=0, ep=1, bi=2, tr=3, fo=4, fi=5, si=6, se=7)
-
-
-class Kernel(object):
-
-    """Multivariate kernel.
-
-    Parameters
-    ----------
-    name : string
-        defining the kernel. Valid options are:
-        'epanechnikov'  - Epanechnikov kernel.
-        'biweight'      - Bi-weight kernel.
-        'triweight'     - Tri-weight kernel.
-        'p1epanechnikov' - product of 1D Epanechnikov kernel.
-        'p1biweight'    - product of 1D Bi-weight kernel.
-        'p1triweight'   - product of 1D Tri-weight kernel.
-        'triangular'    - Triangular kernel.
-        'gaussian'      - Gaussian kernel
-        'rectangular'   - Rectangular kernel.
-        'laplace'       - Laplace kernel.
-        'logistic'      - Logistic kernel.
-    Note that only the first 4 letters of the kernel name is needed.
-
-    Examples
-    --------
-     N = 20
-    data = np.random.rayleigh(1, size=(N,))
-    >>> data = np.array([
-    ...        0.75355792,  0.72779194,  0.94149169,  0.07841119,  2.32291887,
-    ...        1.10419995,  0.77055114,  0.60288273,  1.36883635,  1.74754326,
-    ...        1.09547561,  1.01671133,  0.73211143,  0.61891719,  0.75903487,
-    ...        1.8919469 ,  0.72433808,  1.92973094,  0.44749838,  1.36508452])
-
-    >>> import wafo.kdetools as wk
-    >>> gauss = wk.Kernel('gaussian')
-    >>> gauss.stats()
-    (1, 0.28209479177387814, 0.21157109383040862)
-    >>> np.allclose(gauss.hscv(data), 0.21779575)
-    True
-    >>> np.allclose(gauss.hstt(data), 0.16341135)
-    True
-    >>> np.allclose(gauss.hste(data), 0.19179399)
-    True
-    >>> np.allclose(gauss.hldpi(data), 0.22502733)
-    True
-    >>> wk.Kernel('laplace').stats()
-    (2, 0.25, inf)
-
-    >>> triweight = wk.Kernel('triweight')
-    >>> np.allclose(triweight.stats(),
-    ...            (0.1111111111111111, 0.81585081585081587, np.inf))
-    True
-    >>> np.allclose(triweight(np.linspace(-1,1,11)),
-    ...   [ 0.,  0.046656,  0.262144,  0.592704,  0.884736,  1.,
-    ...     0.884736,  0.592704,  0.262144,  0.046656,  0.])
-    True
-    >>> np.allclose(triweight.hns(data), 0.82, rtol=1e-2)
-    True
-    >>> np.allclose(triweight.hos(data), 0.88, rtol=1e-2)
-    True
-    >>> np.allclose(triweight.hste(data), 0.57, rtol=1e-2)
-    True
-    >>> np.allclose(triweight.hscv(data), 0.648, rtol=1e-2)
-    True
-
-    See also
-    --------
-    mkernel
-
-    References
-    ----------
-    B. W. Silverman (1986)
-    'Density estimation for statistics and data analysis'
-     Chapman and Hall, pp. 43, 76
-
-    Wand, M. P. and Jones, M. C. (1995)
-    'Density estimation for statistics and data analysis'
-     Chapman and Hall, pp 31, 103,  175
-
-    """
-
-    def __init__(self, name, fun='hste'):  # 'hns'):
-        self.kernel = _MKERNEL_DICT[name[:4]]
-        # self.name = self.kernel.__name__.replace('mkernel_', '').title()
-        try:
-            self.get_smoothing = getattr(self, fun)
-        except:
-            self.get_smoothing = self.hste
-
-    def _get_name(self):
-        return self.kernel.__class__.__name__.replace('_Kernel', '').title()
-    name = property(_get_name)
-
-    def get_smoothing(self, *args, **kwds):
-        pass
-
-    def stats(self):
-        """Return some 1D statistics of the kernel.
-
-        Returns
-        -------
-        mu2 : real scalar
-            2'nd order moment, i.e.,int(x^2*kernel(x))
-        R : real scalar
-            integral of squared kernel, i.e., int(kernel(x)^2)
-        Rdd  : real scalar
-            integral of squared double derivative of kernel,
-            i.e., int( (kernel''(x))^2 ).
-
-        Reference
-        ---------
-        Wand,M.P. and Jones, M.C. (1995)
-        'Kernel smoothing'
-        Chapman and Hall, pp 176.
-
-        """
-        return self.kernel.stats
-
-    def deriv4_6_8_10(self, t, numout=4):
-        return self.kernel.deriv4_6_8_10(t, numout)
-
-    def effective_support(self):
-        return self.kernel.effective_support()
-
-    def hns(self, data):
-        """Returns Normal Scale Estimate of Smoothing Parameter.
-
-        Parameter
-        ---------
-        data : 2D array
-            shape d x n (d = # dimensions )
-
-        Returns
-        -------
-        h : array-like
-            one dimensional optimal value for smoothing parameter
-            given the data and kernel.  size D
-
-        HNS only gives an optimal value with respect to mean integrated
-        square error, when the true underlying distribution
-        is Gaussian. This works reasonably well if the data resembles a
-        Gaussian distribution. However if the distribution is asymmetric,
-        multimodal or have long tails then HNS may  return a to large
-        smoothing parameter, i.e., the KDE may be oversmoothed and mask
-        important features of the data. (=> large bias).
-        One way to remedy this is to reduce H by multiplying with a constant
-        factor, e.g., 0.85. Another is to try different values for H and make a
-        visual check by eye.
-
-        Example:
-          data = rndnorm(0, 1,20,1)
-          h = hns(data,'epan')
-
-        See also:
-        ---------
-        hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt, kde
-
-        Reference:
-        ---------
-        B. W. Silverman (1986)
-        'Density estimation for statistics and data analysis'
-        Chapman and Hall, pp 43-48
-        Wand,M.P. and Jones, M.C. (1995)
-        'Kernel smoothing'
-        Chapman and Hall, pp 60--63
-
-        """
-
-        A = np.atleast_2d(data)
-        n = A.shape[1]
-
-        # R= int(mkernel(x)^2),  mu2= int(x^2*mkernel(x))
-        mu2, R, unusedRdd = self.stats()
-        AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
-        iqr = iqrange(A, axis=1)  # interquartile range
-        stdA = np.std(A, axis=1, ddof=1)
-        # use of interquartile range guards against outliers.
-        # the use of interquartile range is better if
-        # the distribution is skew or have heavy tails
-        # This lessen the chance of oversmoothing.
-        return np.where(iqr > 0,
-                        np.minimum(stdA, iqr / 1.349), stdA) * AMISEconstant
-
-    def hos(self, data):
-        """Returns Oversmoothing Parameter.
-
-        Parameter
-        ---------
-        data   = data matrix, size N x D (D = # dimensions )
-
-        Returns
-        -------
-        h : vector size 1 x D
-            one dimensional maximum smoothing value for smoothing parameter
-            given the data and kernel.
-
-        The oversmoothing or maximal smoothing principle relies on the fact
-        that there is a simple upper bound for the AMISE-optimal bandwidth for
-        estimation of densities with a fixed value of a particular scale
-        measure. While HOS will give too large bandwidth for optimal estimation
-        of a general density it provides an excellent starting point for
-        subjective choice of bandwidth. A sensible strategy is to plot an
-        estimate with bandwidth HOS and then sucessively look at plots based on
-        convenient fractions of HOS to see what features are present in the
-        data for various amount of smoothing. The relation to HNS is given by:
-
-                HOS = HNS/0.93
-
-        Example:
-        --------
-        data = rndnorm(0, 1,20,1)
-        h = hos(data,'epan');
-
-        See also  hste, hbcv, hboot, hldpi, hlscv, hscv, hstt, kde, kdefun
-
-        Reference
-        ---------
-        B. W. Silverman (1986)
-        'Density estimation for statistics and data analysis'
-        Chapman and Hall, pp 43-48
-
-        Wand,M.P. and Jones, M.C. (1986)
-        'Kernel smoothing'
-        Chapman and Hall, pp 60--63
-
-        """
-        return self.hns(data) / 0.93
-
-    def hmns(self, data):
-        """Returns Multivariate Normal Scale Estimate of Smoothing Parameter.
-
-         CALL:  h = hmns(data,kernel)
-
-           h      = M dimensional optimal value for smoothing parameter
-                    given the data and kernel.  size D x D
-           data   = data matrix, size D x N (D = # dimensions )
-           kernel = 'epanechnikov'  - Epanechnikov kernel.
-                    'biweight'      - Bi-weight kernel.
-                    'triweight'     - Tri-weight kernel.
-                    'gaussian'      - Gaussian kernel
-
-          Note that only the first 4 letters of the kernel name is needed.
-
-         HMNS  only gives  a optimal value with respect to mean integrated
-         square error, when the true underlying distribution is Multivariate
-         Gaussian. This works reasonably well if the data resembles a
-         Multivariate Gaussian distribution. However if the distribution is
-         asymmetric, multimodal or have long tails then HNS is maybe more
-         appropriate.
-
-          Example:
-            data = rndnorm(0, 1,20,2)
-            h = hmns(data,'epan')
-
-         See also
-         --------
-
-        hns, hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt
-
-         Reference
-         ----------
-          B. W. Silverman (1986)
-         'Density estimation for statistics and data analysis'
-          Chapman and Hall, pp 43-48, 87
-
-          Wand,M.P. and Jones, M.C. (1995)
-         'Kernel smoothing'
-          Chapman and Hall, pp 60--63, 86--88
-
-        """
-        # TODO: implement more kernels
-
-        A = np.atleast_2d(data)
-        d, n = A.shape
-
-        if d == 1:
-            return self.hns(data)
-        name = self.name[:4].lower()
-        if name == 'epan':        # Epanechnikov kernel
-            a = (8.0 * (d + 4.0) * (2 * sqrt(pi)) ** d /
-                 sphere_volume(d)) ** (1. / (4.0 + d))
-        elif name == 'biwe':  # Bi-weight kernel
-            a = 2.7779
-            if d > 2:
-                raise ValueError('not implemented for d>2')
-        elif name == 'triw':  # Triweight
-            a = 3.12
-            if d > 2:
-                raise ValueError('not implemented for d>2')
-        elif name == 'gaus':  # Gaussian kernel
-            a = (4.0 / (d + 2.0)) ** (1. / (d + 4.0))
-        else:
-            raise ValueError('Unknown kernel.')
-
-        covA = scipy.cov(A)
-
-        return a * linalg.sqrtm(covA).real * n ** (-1. / (d + 4))
-
-    def hste(self, data, h0=None, inc=128, maxit=100, releps=0.01, abseps=0.0):
-        '''HSTE 2-Stage Solve the Equation estimate of smoothing parameter.
-
-         CALL:  hs = hste(data,kernel,h0)
-
-               hs = one dimensional value for smoothing parameter
-                    given the data and kernel.  size 1 x D
-           data   = data matrix, size N x D (D = # dimensions )
-           kernel = 'gaussian'  - Gaussian kernel (default)
-                     ( currently the only supported kernel)
-               h0 = initial starting guess for hs (default h0=hns(A,kernel))
-
-          Example:
-           x  = rndnorm(0,1,50,1);
-           hs = hste(x,'gauss');
-
-         See also  hbcv, hboot, hos, hldpi, hlscv, hscv, hstt, kde, kdefun
-
-         Reference
-         ---------
-          B. W. Silverman (1986)
-         'Density estimation for statistics and data analysis'
-          Chapman and Hall, pp 57--61
-
-          Wand,M.P. and Jones, M.C. (1986)
-         'Kernel smoothing'
-          Chapman and Hall, pp 74--75
-        '''
-        # TODO: NB: this routine can be made faster:
-        # TODO: replace the iteration in the end with a Newton Raphson scheme
-
-        A = np.atleast_2d(data)
-        d, n = A.shape
-
-        # R= int(mkernel(x)^2),  mu2= int(x^2*mkernel(x))
-        mu2, R, unusedRdd = self.stats()
-
-        AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
-        STEconstant = R / (mu2 ** (2) * n)
-
-        sigmaA = self.hns(A) / AMISEconstant
-        if h0 is None:
-            h0 = sigmaA * AMISEconstant
-
-        h = np.asarray(h0, dtype=float)
-
-        nfft = inc * 2
-        amin = A.min(axis=1)  # Find the minimum value of A.
-        amax = A.max(axis=1)  # Find the maximum value of A.
-        arange = amax - amin  # Find the range of A.
-
-        # xa holds the x 'axis' vector, defining a grid of x values where
-        # the k.d. function will be evaluated.
-
-        ax1 = amin - arange / 8.0
-        bx1 = amax + arange / 8.0
-
-        kernel2 = Kernel('gauss')
-        mu2, R, unusedRdd = kernel2.stats()
-        STEconstant2 = R / (mu2 ** (2) * n)
-        fft = np.fft.fft
-        ifft = np.fft.ifft
-
-        for dim in range(d):
-            s = sigmaA[dim]
-            ax = ax1[dim]
-            bx = bx1[dim]
-
-            xa = np.linspace(ax, bx, inc)
-            xn = np.linspace(0, bx - ax, inc)
-
-            c = gridcount(A[dim], xa)
-
-            # Step 1
-            psi6NS = -15 / (16 * sqrt(pi) * s ** 7)
-            psi8NS = 105 / (32 * sqrt(pi) * s ** 9)
-
-            # Step 2
-            k40, k60 = kernel2.deriv4_6_8_10(0, numout=2)
-            g1 = (-2 * k40 / (mu2 * psi6NS * n)) ** (1.0 / 7)
-            g2 = (-2 * k60 / (mu2 * psi8NS * n)) ** (1.0 / 9)
-
-            # Estimate psi6 given g2.
-            # kernel weights.
-            kw4, kw6 = kernel2.deriv4_6_8_10(xn / g2, numout=2)
-            # Apply fftshift to kw.
-            kw = np.r_[kw6, 0, kw6[-1:0:-1]]
-            z = np.real(ifft(fft(c, nfft) * fft(kw)))     # convolution.
-            psi6 = np.sum(c * z[:inc]) / (n * (n - 1) * g2 ** 7)
-
-            # Estimate psi4 given g1.
-            kw4 = kernel2.deriv4_6_8_10(xn / g1, numout=1)  # kernel weights.
-            kw = np.r_[kw4, 0, kw4[-1:0:-1]]  # Apply 'fftshift' to kw.
-            z = np.real(ifft(fft(c, nfft) * fft(kw)))  # convolution.
-            psi4 = np.sum(c * z[:inc]) / (n * (n - 1) * g1 ** 5)
-
-            h1 = h[dim]
-            h_old = 0
-            count = 0
-
-            while ((abs(h_old - h1) > max(releps * h1, abseps)) and
-                   (count < maxit)):
-                count += 1
-                h_old = h1
-
-                # Step 3
-                gamma = ((2 * k40 * mu2 * psi4 * h1 ** 5) /
-                         (-psi6 * R)) ** (1.0 / 7)
-
-                # Now estimate psi4 given gamma.
-                # kernel weights.
-                kw4 = kernel2.deriv4_6_8_10(xn / gamma, numout=1)
-                kw = np.r_[kw4, 0, kw4[-1:0:-1]]  # Apply 'fftshift' to kw.
-                z = np.real(ifft(fft(c, nfft) * fft(kw)))  # convolution.
-
-                psi4Gamma = np.sum(c * z[:inc]) / (n * (n - 1) * gamma ** 5)
-
-                # Step 4
-                h1 = (STEconstant2 / psi4Gamma) ** (1.0 / 5)
-
-            # Kernel other than Gaussian scale bandwidth
-            h1 = h1 * (STEconstant / STEconstant2) ** (1.0 / 5)
-
-            if count >= maxit:
-                warnings.warn('The obtained value did not converge.')
-
-            h[dim] = h1
-        # end for dim loop
-        return h
-
-    def hisj(self, data, inc=512, L=7):
-        '''
-        HISJ Improved Sheather-Jones estimate of smoothing parameter.
-
-        Unlike many other implementations, this one is immune to problems
-        caused by multimodal densities with widely separated modes. The
-        estimation does not deteriorate for multimodal densities, because
-        it do not assume a parametric model for the data.
-
-        Parameters
-        ----------
-        data - a vector of data from which the density estimate is constructed
-        inc  - the number of mesh points used in the uniform discretization
-
-        Returns
-        -------
-        bandwidth - the optimal bandwidth
-
-        Reference
-        ---------
-        Kernel density estimation via diffusion
-        Z. I. Botev, J. F. Grotowski, and D. P. Kroese (2010)
-        Annals of Statistics, Volume 38, Number 5, pages 2916-2957.
-        '''
-        A = np.atleast_2d(data)
-        d, n = A.shape
-
-        # R= int(mkernel(x)^2),  mu2= int(x^2*mkernel(x))
-        mu2, R, unusedRdd = self.stats()
-        STEconstant = R / (n * mu2 ** 2)
-
-        amin = A.min(axis=1)  # Find the minimum value of A.
-        amax = A.max(axis=1)  # Find the maximum value of A.
-        arange = amax - amin  # Find the range of A.
-
-        # xa holds the x 'axis' vector, defining a grid of x values where
-        # the k.d. function will be evaluated.
-
-        ax1 = amin - arange / 8.0
-        bx1 = amax + arange / 8.0
-
-        kernel2 = Kernel('gauss')
-        mu2, R, unusedRdd = kernel2.stats()
-        STEconstant2 = R / (mu2 ** (2) * n)
-
-        def fixed_point(t, N, I, a2):
-            ''' this implements the function t-zeta*gamma^[L](t)'''
-
-            prod = np.prod
-            # L = 7
-            logI = np.log(I)
-            f = 2 * pi ** (2 * L) * \
-                (a2 * exp(L * logI - I * pi ** 2 * t)).sum()
-            for s in range(L - 1, 1, -1):
-                K0 = prod(np.r_[1:2 * s:2]) / sqrt(2 * pi)
-                const = (1 + (1. / 2) ** (s + 1. / 2)) / 3
-                time = (2 * const * K0 / N / f) ** (2. / (3 + 2 * s))
-                f = 2 * pi ** (2 * s) * \
-                    (a2 * exp(s * logI - I * pi ** 2 * time)).sum()
-            return t - (2 * N * sqrt(pi) * f) ** (-2. / 5)
-
-        h = np.empty(d)
-        for dim in range(d):
-            ax = ax1[dim]
-            bx = bx1[dim]
-            xa = np.linspace(ax, bx, inc)
-            R = bx - ax
-
-            c = gridcount(A[dim], xa)
-            N = len(set(A[dim]))
-            # a = dct(c/c.sum(), norm=None)
-            a = dct(c / len(A[dim]), norm=None)
-
-            # now compute the optimal bandwidth^2 using the referenced method
-            I = np.asfarray(np.arange(1, inc)) ** 2
-            a2 = (a[1:] / 2) ** 2
-
-            def fun(t):
-                return fixed_point(t, N, I, a2)
-            x = np.linspace(0, 0.1, 150)
-            ai = x[0]
-            f0 = fun(ai)
-            for bi in x[1:]:
-                f1 = fun(bi)
-                if f1 * f0 <= 0:
-                    # print('ai = %g, bi = %g' % (ai,bi))
-                    break
-                else:
-                    ai = bi
-            # y = np.asarray([fun(j) for j in x])
-            # plt.figure(1)
-            # plt.plot(x,y)
-            # plt.show()
-
-            # use  fzero to solve the equation t=zeta*gamma^[5](t)
-            try:
-                t_star = optimize.brentq(fun, a=ai, b=bi)
-            except:
-                t_star = 0.28 * N ** (-2. / 5)
-                warnings.warn('Failure in obtaining smoothing parameter')
-
-            # smooth the discrete cosine transform of initial data using t_star
-            # a_t = a*exp(-np.arange(inc)**2*pi**2*t_star/2)
-            # now apply the inverse discrete cosine transform
-            # density = idct(a_t)/R;
-
-            # take the rescaling of the data into account
-            bandwidth = sqrt(t_star) * R
-
-            # Kernel other than Gaussian scale bandwidth
-            h[dim] = bandwidth * (STEconstant / STEconstant2) ** (1.0 / 5)
-        # end  for dim loop
-        return h
-
-    def hstt(self, data, h0=None, inc=128, maxit=100, releps=0.01, abseps=0.0):
-        '''HSTT Scott-Tapia-Thompson estimate of smoothing parameter.
-
-         CALL: hs = hstt(data,kernel)
-
-               hs = one dimensional value for smoothing parameter
-                    given the data and kernel.  size 1 x D
-           data   = data matrix, size N x D (D = # dimensions )
-           kernel = 'epanechnikov'  - Epanechnikov kernel. (default)
-                    'biweight'      - Bi-weight kernel.
-                    'triweight'     - Tri-weight kernel.
-                    'triangular'    - Triangular kernel.
-                    'gaussian'      - Gaussian kernel
-                    'rectangular'   - Rectangular kernel.
-                    'laplace'       - Laplace kernel.
-                    'logistic'      - Logistic kernel.
-
-         HSTT returns Scott-Tapia-Thompson (STT) estimate of smoothing
-         parameter. This is a Solve-The-Equation rule (STE).
-         Simulation studies shows that the STT estimate of HS
-         is a good choice under a variety of models. A comparison with
-         likelihood cross-validation (LCV) indicates that LCV performs slightly
-         better for short tailed densities.
-         However, STT method in contrast to LCV is insensitive to outliers.
-
-        Example
-        -------
-           x  = rndnorm(0,1,50,1);
-           hs = hstt(x,'gauss');
-
-        See also
-        --------
-        hste, hbcv, hboot, hos, hldpi, hlscv, hscv, kde, kdebin
-
-        Reference
-        ---------
-        B. W. Silverman (1986)
-         'Density estimation for statistics and data analysis'
-          Chapman and Hall, pp 57--61
-        '''
-        A = np.atleast_2d(data)
-        d, n = A.shape
-
-        # R= int(mkernel(x)^2),  mu2= int(x^2*mkernel(x))
-        mu2, R, unusedRdd = self.stats()
-
-        AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
-        STEconstant = R / (mu2 ** (2) * n)
-
-        sigmaA = self.hns(A) / AMISEconstant
-        if h0 is None:
-            h0 = sigmaA * AMISEconstant
-
-        h = np.asarray(h0, dtype=float)
-
-        nfft = inc * 2
-        amin = A.min(axis=1)  # Find the minimum value of A.
-        amax = A.max(axis=1)  # Find the maximum value of A.
-        arange = amax - amin  # Find the range of A.
-
-        # xa holds the x 'axis' vector, defining a grid of x values where
-        # the k.d. function will be evaluated.
-
-        ax1 = amin - arange / 8.0
-        bx1 = amax + arange / 8.0
-
-        fft = np.fft.fft
-        ifft = np.fft.ifft
-        for dim in range(d):
-            s = sigmaA[dim]
-            datan = A[dim] / s
-            ax = ax1[dim] / s
-            bx = bx1[dim] / s
-
-            xa = np.linspace(ax, bx, inc)
-            xn = np.linspace(0, bx - ax, inc)
-
-            c = gridcount(datan, xa)
-
-            count = 1
-            h_old = 0
-            h1 = h[dim] / s
-            delta = (bx - ax) / (inc - 1)
-            while ((abs(h_old - h1) > max(releps * h1, abseps)) and
-                   (count < maxit)):
-                count += 1
-                h_old = h1
-
-                kw4 = self.kernel(xn / h1) / (n * h1 * self.norm_factor(d=1))
-                kw = np.r_[kw4, 0, kw4[-1:0:-1]]  # Apply 'fftshift' to kw.
-                f = np.real(ifft(fft(c, nfft) * fft(kw)))  # convolution.
-
-                # Estimate psi4=R(f'') using simple finite differences and
-                # quadrature.
-                ix = np.arange(1, inc - 1)
-                z = ((f[ix + 1] - 2 * f[ix] + f[ix - 1]) / delta ** 2) ** 2
-                psi4 = delta * z.sum()
-                h1 = (STEconstant / psi4) ** (1. / 5)
-
-            if count >= maxit:
-                warnings.warn('The obtained value did not converge.')
-
-            h[dim] = h1 * s
-        # end % for dim loop
-        return h
-
-    def hscv(self, data, hvec=None, inc=128, maxit=100, fulloutput=False):
-        '''
-        HSCV Smoothed cross-validation estimate of smoothing parameter.
-
-         CALL: [hs,hvec,score] = hscv(data,kernel,hvec)
-
-           hs     = smoothing parameter
-           hvec   = vector defining possible values of hs
-                     (default linspace(0.25*h0,h0,100), h0=0.62)
-           score  = score vector
-           data   = data vector
-           kernel = 'gaussian'      - Gaussian kernel the only supported
-
-          Note that only the first 4 letters of the kernel name is needed.
-
-          Example:
-            data = rndnorm(0,1,20,1)
-             [hs hvec score] = hscv(data,'epan');
-             plot(hvec,score)
-         See also  hste, hbcv, hboot, hos, hldpi, hlscv, hstt, kde, kdefun
-
-         Wand,M.P. and Jones, M.C. (1986)
-         'Kernel smoothing'
-          Chapman and Hall, pp 75--79
-        '''
-        # TODO: Add support for other kernels than Gaussian
-        A = np.atleast_2d(data)
-        d, n = A.shape
-
-        # R= int(mkernel(x)^2),  mu2= int(x^2*mkernel(x))
-        mu2, R, unusedRdd = self.stats()
-
-        AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
-        STEconstant = R / (mu2 ** (2) * n)
-
-        sigmaA = self.hns(A) / AMISEconstant
-        if hvec is None:
-            H = AMISEconstant / 0.93
-            hvec = np.linspace(0.25 * H, H, maxit)
-        hvec = np.asarray(hvec, dtype=float)
-
-        steps = len(hvec)
-        score = np.zeros(steps)
-
-        nfft = inc * 2
-        amin = A.min(axis=1)  # Find the minimum value of A.
-        amax = A.max(axis=1)  # Find the maximum value of A.
-        arange = amax - amin  # Find the range of A.
-
-        # xa holds the x 'axis' vector, defining a grid of x values where
-        # the k.d. function will be evaluated.
-
-        ax1 = amin - arange / 8.0
-        bx1 = amax + arange / 8.0
-
-        kernel2 = Kernel('gauss')
-        mu2, R, unusedRdd = kernel2.stats()
-        STEconstant2 = R / (mu2 ** (2) * n)
-        fft = np.fft.fft
-        ifft = np.fft.ifft
-
-        h = np.zeros(d)
-        hvec = hvec * (STEconstant2 / STEconstant) ** (1. / 5.)
-
-        k40, k60, k80, k100 = kernel2.deriv4_6_8_10(0, numout=4)
-        psi8 = 105 / (32 * sqrt(pi))
-        psi12 = 3465. / (512 * sqrt(pi))
-        g1 = (-2. * k60 / (mu2 * psi8 * n)) ** (1. / 9.)
-        g2 = (-2. * k100 / (mu2 * psi12 * n)) ** (1. / 13.)
-
-        for dim in range(d):
-            s = sigmaA[dim]
-            ax = ax1[dim] / s
-            bx = bx1[dim] / s
-            datan = A[dim] / s
-
-            xa = np.linspace(ax, bx, inc)
-            xn = np.linspace(0, bx - ax, inc)
-
-            c = gridcount(datan, xa)
-
-            kw4, kw6 = kernel2.deriv4_6_8_10(xn / g1, numout=2)
-            kw = np.r_[kw6, 0, kw6[-1:0:-1]]
-            z = np.real(ifft(fft(c, nfft) * fft(kw)))
-            psi6 = np.sum(c * z[:inc]) / (n ** 2 * g1 ** 7)
-
-            kw4, kw6, kw8, kw10 = kernel2.deriv4_6_8_10(xn / g2, numout=4)
-            kw = np.r_[kw10, 0, kw10[-1:0:-1]]
-            z = np.real(ifft(fft(c, nfft) * fft(kw)))
-            psi10 = np.sum(c * z[:inc]) / (n ** 2 * g2 ** 11)
-
-            g3 = (-2. * k40 / (mu2 * psi6 * n)) ** (1. / 7.)
-            g4 = (-2. * k80 / (mu2 * psi10 * n)) ** (1. / 11.)
-
-            kw4 = kernel2.deriv4_6_8_10(xn / g3, numout=1)
-            kw = np.r_[kw4, 0, kw4[-1:0:-1]]
-            z = np.real(ifft(fft(c, nfft) * fft(kw)))
-            psi4 = np.sum(c * z[:inc]) / (n ** 2 * g3 ** 5)
-
-            kw4, kw6, kw8 = kernel2.deriv4_6_8_10(xn / g3, numout=3)
-            kw = np.r_[kw8, 0, kw8[-1:0:-1]]
-            z = np.real(ifft(fft(c, nfft) * fft(kw)))
-            psi8 = np.sum(c * z[:inc]) / (n ** 2 * g4 ** 9)
-
-            const = (441. / (64 * pi)) ** (1. / 18.) * \
-                (4 * pi) ** (-1. / 5.) * \
-                psi4 ** (-2. / 5.) * psi8 ** (-1. / 9.)
-
-            M = np.atleast_2d(datan)
-
-            Y = (M - M.T).ravel()
-
-            for i in range(steps):
-                g = const * n ** (-23. / 45) * hvec[i] ** (-2)
-                sig1 = sqrt(2 * hvec[i] ** 2 + 2 * g ** 2)
-                sig2 = sqrt(hvec[i] ** 2 + 2 * g ** 2)
-                sig3 = sqrt(2 * g ** 2)
-                term2 = np.sum(kernel2(Y / sig1) / sig1 - 2 * kernel2(
-                    Y / sig2) / sig2 + kernel2(Y / sig3) / sig3)
-
-                score[i] = 1. / (n * hvec[i] * 2. * sqrt(pi)) + term2 / n ** 2
-
-            idx = score.argmin()
-            # Kernel other than Gaussian scale bandwidth
-            h[dim] = hvec[idx] * (STEconstant / STEconstant2) ** (1 / 5)
-            if idx == 0:
-                warnings.warn(
-                    'Optimum is probably lower than hs=%g for dim=%d' %
-                    (h[dim] * s, dim))
-            elif idx == maxit - 1:
-                warnings.warn(
-                    'Optimum is probably higher than hs=%g for dim=%d' %
-                    (h[dim] * s, dim))
-
-        hvec = hvec * (STEconstant / STEconstant2) ** (1 / 5)
-        if fulloutput:
-            return h * sigmaA, score, hvec, sigmaA
-        else:
-            return h * sigmaA
-
-    def hldpi(self, data, L=2, inc=128):
-        '''HLDPI L-stage Direct Plug-In estimate of smoothing parameter.
-
-         CALL: hs = hldpi(data,kernel,L)
-
-               hs = one dimensional value for smoothing parameter
-                    given the data and kernel.  size 1 x D
-           data   = data matrix, size N x D (D = # dimensions )
-           kernel = 'epanechnikov'  - Epanechnikov kernel.
-                    'biweight'      - Bi-weight kernel.
-                    'triweight'     - Tri-weight kernel.
-                    'triangluar'    - Triangular kernel.
-                    'gaussian'      - Gaussian kernel
-                    'rectangular'   - Rectanguler kernel.
-                    'laplace'       - Laplace kernel.
-                    'logistic'      - Logistic kernel.
-                L = 0,1,2,3,...   (default 2)
-
-          Note that only the first 4 letters of the kernel name is needed.
-
-          Example:
-           x  = rndnorm(0,1,50,1);
-           hs = hldpi(x,'gauss',1);
-
-         See also  hste, hbcv, hboot, hos, hlscv, hscv, hstt, kde, kdefun
-
-          Wand,M.P. and Jones, M.C. (1995)
-         'Kernel smoothing'
-          Chapman and Hall, pp 67--74
-        '''
-        A = np.atleast_2d(data)
-        d, n = A.shape
-
-        # R= int(mkernel(x)^2),  mu2= int(x^2*mkernel(x))
-        mu2, R, unusedRdd = self.stats()
-
-        AMISEconstant = (8 * sqrt(pi) * R / (3 * n * mu2 ** 2)) ** (1. / 5)
-        STEconstant = R / (n * mu2 ** 2)
-
-        sigmaA = self.hns(A) / AMISEconstant
-
-        nfft = inc * 2
-        amin = A.min(axis=1)  # Find the minimum value of A.
-        amax = A.max(axis=1)  # Find the maximum value of A.
-        arange = amax - amin  # Find the range of A.
-
-        # xa holds the x 'axis' vector, defining a grid of x values where
-        # the k.d. function will be evaluated.
-
-        ax1 = amin - arange / 8.0
-        bx1 = amax + arange / 8.0
-
-        kernel2 = Kernel('gauss')
-        mu2, unusedR, unusedRdd = kernel2.stats()
-
-        fft = np.fft.fft
-        ifft = np.fft.ifft
-
-        h = np.zeros(d)
-        for dim in range(d):
-            s = sigmaA[dim]
-            datan = A[dim]  # / s
-            ax = ax1[dim]  # / s
-            bx = bx1[dim]  # / s
-
-            xa = np.linspace(ax, bx, inc)
-            xn = np.linspace(0, bx - ax, inc)
-
-            c = gridcount(datan, xa)
-
-            r = 2 * L + 4
-            rd2 = L + 2
-
-            # Eq. 3.7 in Wand and Jones (1995)
-            PSI_r = (-1) ** (rd2) * np.prod(
-                np.r_[rd2 + 1:r + 1]) / (sqrt(pi) * (2 * s) ** (r + 1))
-            PSI = PSI_r
-            if L > 0:
-                # High order derivatives of the Gaussian kernel
-                Kd = kernel2.deriv4_6_8_10(0, numout=L)
-
-                # L-stage iterations to estimate PSI_4
-                for ix in range(L, 0, -1):
-                    gi = (-2 * Kd[ix - 1] /
-                          (mu2 * PSI * n)) ** (1. / (2 * ix + 5))
-
-                    # Obtain the kernel weights.
-                    KW0 = kernel2.deriv4_6_8_10(xn / gi, numout=ix)
-                    if ix > 1:
-                        KW0 = KW0[-1]
-                    # Apply 'fftshift' to kw.
-                    kw = np.r_[KW0, 0, KW0[inc - 1:0:-1]]
-
-                    # Perform the convolution.
-                    z = np.real(ifft(fft(c, nfft) * fft(kw)))
-
-                    PSI = np.sum(c * z[:inc]) / (n ** 2 * gi ** (2 * ix + 3))
-                    # end
-                # end
-            h[dim] = (STEconstant / PSI) ** (1. / 5)
-        return h
-
-    def norm_factor(self, d=1, n=None):
-        return self.kernel.norm_factor(d, n)
-
-    def eval_points(self, points):
-        return self.kernel(np.atleast_2d(points))
-    __call__ = eval_points
-
-
-def mkernel(X, kernel):
-    """MKERNEL Multivariate Kernel Function.
-
-    Paramaters
-    ----------
-    X : array-like
-        matrix  size d x n (d = # dimensions, n = # evaluation points)
-    kernel : string
-        defining kernel
-        'epanechnikov'  - Epanechnikov kernel.
-        'biweight'      - Bi-weight kernel.
-        'triweight'     - Tri-weight kernel.
-        'p1epanechnikov' - product of 1D Epanechnikov kernel.
-        'p1biweight'    - product of 1D Bi-weight kernel.
-        'p1triweight'   - product of 1D Tri-weight kernel.
-        'triangular'    - Triangular kernel.
-        'gaussian'      - Gaussian kernel
-        'rectangular'   - Rectangular kernel.
-        'laplace'       - Laplace kernel.
-        'logistic'      - Logistic kernel.
-    Note that only the first 4 letters of the kernel name is needed.
-
-    Returns
-    -------
-    z : ndarray
-        kernel function values evaluated at X
-
-    See also
-    --------
-    kde, kdefun, kdebin
-
-    References
-    ----------
-    B. W. Silverman (1986)
-    'Density estimation for statistics and data analysis'
-     Chapman and Hall, pp. 43, 76
-
-    Wand, M. P. and Jones, M. C. (1995)
-    'Density estimation for statistics and data analysis'
-     Chapman and Hall, pp 31, 103,  175
-
-    """
-    fun = _MKERNEL_DICT[kernel[:4]]
-    return fun(np.atleast_2d(X))
-
-
-def accumsum(accmap, a, size, dtype=None):
-    if dtype is None:
-        dtype = a.dtype
-    size = np.atleast_1d(size)
-    if len(size) > 1:
-        binx = accmap[:, 0]
-        biny = accmap[:, 1]
-        out = sparse.coo_matrix(
-            (a.ravel(), (binx, biny)), shape=size, dtype=dtype).tocsr()
-    else:
-        binx = accmap.ravel()
-        zero = np.zeros(len(binx))
-        out = sparse.coo_matrix(
-            (a.ravel(), (binx, zero)), shape=(size, 1), dtype=dtype).tocsr()
-    return out
-
-
-def accumsum2(accmap, a, size):
-    return np.bincount(accmap.ravel(), a.ravel(), np.array(size).max())
-
-
-def accum(accmap, a, func=None, size=None, fill_value=0, dtype=None):
-    """An accumulation function similar to Matlab's `accumarray` function.
-
-    Parameters
-    ----------
-    accmap : ndarray
-        This is the "accumulation map".  It maps input (i.e. indices into
-        `a`) to their destination in the output array.  The first `a.ndim`
-        dimensions of `accmap` must be the same as `a.shape`.  That is,
-        `accmap.shape[:a.ndim]` must equal `a.shape`.  For example, if `a`
-        has shape (15,4), then `accmap.shape[:2]` must equal (15,4).  In this
-        case `accmap[i,j]` gives the index into the output array where
-        element (i,j) of `a` is to be accumulated.  If the output is, say,
-        a 2D, then `accmap` must have shape (15,4,2).  The value in the
-        last dimension give indices into the output array. If the output is
-        1D, then the shape of `accmap` can be either (15,4) or (15,4,1)
-    a : ndarray
-        The input data to be accumulated.
-    func : callable or None
-        The accumulation function.  The function will be passed a list
-        of values from `a` to be accumulated.
-        If None, numpy.sum is assumed.
-    size : ndarray or None
-        The size of the output array.  If None, the size will be determined
-        from `accmap`.
-    fill_value : scalar
-        The default value for elements of the output array.
-    dtype : numpy data type, or None
-        The data type of the output array.  If None, the data type of
-        `a` is used.
-
-    Returns
-    -------
-    out : ndarray
-        The accumulated results.
-
-        The shape of `out` is `size` if `size` is given.  Otherwise the
-        shape is determined by the (lexicographically) largest indices of
-        the output found in `accmap`.
-
-
-    Examples
-    --------
-    >>> from numpy import array, prod
-    >>> a = array([[1,2,3],[4,-1,6],[-1,8,9]])
-    >>> a
-    array([[ 1,  2,  3],
-           [ 4, -1,  6],
-           [-1,  8,  9]])
-    >>> # Sum the diagonals.
-    >>> accmap = array([[0,1,2],[2,0,1],[1,2,0]])
-    >>> s = accum(accmap, a)
-    >>> s
-    array([ 9,  7, 15])
-    >>> # A 2D output, from sub-arrays with shapes and positions like this:
-    >>> # [ (2,2) (2,1)]
-    >>> # [ (1,2) (1,1)]
-    >>> accmap = array([
-    ...        [[0,0],[0,0],[0,1]],
-    ...        [[0,0],[0,0],[0,1]],
-    ...        [[1,0],[1,0],[1,1]]])
-    >>> # Accumulate using a product.
-    >>> accum(accmap, a, func=prod, dtype=float)
-    array([[ -8.,  18.],
-           [ -8.,   9.]])
-    >>> # Same accmap, but create an array of lists of values.
-    >>> accum(accmap, a, func=lambda x: x, dtype='O')
-    array([[[1, 2, 4, -1], [3, 6]],
-           [[-1, 8], [9]]], dtype=object)
-
-    """
-
-    def create_array_of_python_lists(accmap, a, size):
-        vals = np.empty(size, dtype='O')
-        for s in product(*[range(k) for k in size]):
-            vals[s] = []
-
-        for s in product(*[range(k) for k in a.shape]):
-            indx = tuple(accmap[s])
-            val = a[s]
-            vals[indx].append(val)
-
-        return vals
-
-    # Check for bad arguments and handle the defaults.
-    if accmap.shape[:a.ndim] != a.shape:
-        raise ValueError(
-            "The initial dimensions of accmap must be the same as a.shape")
-    if func is None:
-        func = np.sum
-    if dtype is None:
-        dtype = a.dtype
-    if accmap.shape == a.shape:
-        accmap = np.expand_dims(accmap, -1)
-    adims = tuple(range(a.ndim))
-    if size is None:
-        size = 1 + np.squeeze(np.apply_over_axes(np.max, accmap, axes=adims))
-    size = np.atleast_1d(size)
-
-    # Create an array of python lists of values.
-    vals = create_array_of_python_lists(accmap, a, size)
-
-    # Create the output array.
-    out = np.empty(size, dtype=dtype)
-    for s in product(*[range(k) for k in size]):
-        if vals[s] == []:
-            out[s] = fill_value
-        else:
-            out[s] = func(vals[s])
-    return out
-
-
-def qlevels(pdf, p=(10, 30, 50, 70, 90, 95, 99, 99.9), x1=None, x2=None):
-    """QLEVELS Calculates quantile levels which encloses P% of PDF.
-
-      CALL: [ql PL] = qlevels(pdf,PL,x1,x2);
-
-            ql    = the discrete quantile levels.
-            pdf   = joint point density function matrix or vector
-            PL    = percent level (default [10:20:90 95 99 99.9])
-            x1,x2 = vectors of the spacing of the variables
-                   (Default unit spacing)
-
-    QLEVELS numerically integrates PDF by decreasing height and find the
-    quantile levels which  encloses P% of the distribution. If X1 and
-    (or) X2 is unspecified it is assumed that dX1 and dX2 is constant.
-    NB! QLEVELS normalizes the integral of PDF to N/(N+0.001) before
-    calculating QL in order to reflect the sampling of PDF is finite.
-    Currently only able to handle 1D and 2D PDF's if dXi is not constant
-    (i=1,2).
-
-    Example
-    -------
-    >>> import wafo.stats as ws
-    >>> x = np.linspace(-8,8,2001);
-    >>> PL = np.r_[10:90:20, 90, 95, 99, 99.9]
-    >>> qlevels(ws.norm.pdf(x),p=PL, x1=x);
-    array([ 0.39591707,  0.37058719,  0.31830968,  0.23402133,  0.10362052,
-            0.05862129,  0.01449505,  0.00178806])
-
-    # compared with the exact values
-    >>> ws.norm.pdf(ws.norm.ppf((100-PL)/200))
-    array([ 0.39580488,  0.370399  ,  0.31777657,  0.23315878,  0.10313564,
-            0.05844507,  0.01445974,  0.00177719])
-
-    See also
-    --------
-    qlevels2, tranproc
-
-    """
-
-    norm = 1  # normalize cdf to unity
-    pdf = np.atleast_1d(pdf)
-    if any(pdf.ravel() < 0):
-        raise ValueError(
-            'This is not a pdf since one or more values of pdf is negative')
-
-    fsiz = pdf.shape
-    fsizmin = min(fsiz)
-    if fsizmin == 0:
-        return []
-
-    N = np.prod(fsiz)
-    d = len(fsiz)
-    if x1 is None or ((x2 is None) and d > 2):
-        fdfi = pdf.ravel()
-    else:
-        if d == 1:  # pdf in one dimension
-            dx22 = np.ones(1)
-        else:  # % pdf in two dimensions
-            dx2 = np.diff(x2.ravel()) * 0.5
-            dx22 = np.r_[0, dx2] + np.r_[dx2, 0]
-
-        dx1 = np.diff(x1.ravel()) * 0.5
-        dx11 = np.r_[0, dx1] + np.r_[dx1, 0]
-        dx1x2 = dx22[:, None] * dx11
-        fdfi = (pdf * dx1x2).ravel()
-
-    p = np.atleast_1d(p)
-
-    if np.any((p < 0) | (100 < p)):
-        raise ValueError('PL must satisfy 0 <= PL <= 100')
-
-    p2 = p / 100.0
-    ind = np.argsort(pdf.ravel())  # sort by height of pdf
-    ind = ind[::-1]
-    fi = pdf.flat[ind]
-
-    # integration in the order of decreasing height of pdf
-    Fi = np.cumsum(fdfi[ind])
-
-    if norm:  # %normalize Fi to make sure int pdf dx1 dx2 approx 1
-        Fi = Fi / Fi[-1] * N / (N + 1.5e-8)
-
-    maxFi = np.max(Fi)
-    if maxFi > 1:
-        warnings.warn('this is not a pdf since cdf>1! normalizing')
-
-        Fi = Fi / Fi[-1] * N / (N + 1.5e-8)
-
-    elif maxFi < .95:
-        msg = '''The given pdf is too sparsely sampled since cdf<.95.
-        Thus QL is questionable'''
-        warnings.warn(msg)
-
-    # make sure Fi is strictly increasing by not considering duplicate values
-    ind, = np.where(np.diff(np.r_[Fi, 1]) > 0)
-    # calculating the inverse of Fi to find the index
-    ui = tranproc(Fi[ind], fi[ind], p2)
-    # to the desired quantile level
-    # ui=smooth(Fi(ind),fi(ind),1,p2(:),1) % alternative
-    # res=ui-ui2
-
-    if np.any(ui >= max(pdf.ravel())):
-        warnings.warn('The lowest percent level is too close to 0%')
-
-    if np.any(ui <= min(pdf.ravel())):
-        msg = '''The given pdf is too sparsely sampled or
-       the highest percent level is too close to 100%'''
-        warnings.warn(msg)
-        ui[ui < 0] = 0.0
-
-    return ui
-
-
-def qlevels2(data, p=(10, 30, 50, 70, 90, 95, 99, 99.9), method=1):
-    """QLEVELS2 Calculates quantile levels which encloses P% of data.
-
-     CALL: [ql PL] = qlevels2(data,PL,method);
-
-       ql   = the discrete quantile levels, size D X Np
-    Parameters
-    ----------
-    data : data matrix, size D x N (D = # of dimensions)
-    p : percent level vector, length Np (default [10:20:90 95 99 99.9])
-    method : integer
-        1 Interpolation so that F(X_(k)) == (k-0.5)/n. (default)
-        2 Interpolation so that F(X_(k)) == k/(n+1).
-        3 Based on the empirical distribution.
-
-    Returns
-    -------
-
-    QLEVELS2 sort the columns of data in ascending order and find the
-             quantile levels for each column which encloses  P% of the data.
-
-    Examples :  Finding quantile levels enclosing P% of data:
-    --------
-    >>> import wafo.stats as ws
-    >>> PL = np.r_[10:90:20, 90, 95, 99, 99.9]
-    >>> xs = ws.norm.rvs(size=2500000)
-    >>> np.allclose(qlevels2(ws.norm.pdf(xs), p=PL),
-    ...  [0.3958, 0.3704, 0.3179, 0.2331, 0.1031, 0.05841, 0.01451, 0.001751],
-    ...   rtol=1e-1)
-    True
-
-    # compared with the exact values
-    >>> ws.norm.pdf(ws.norm.ppf((100-PL)/200))
-    array([ 0.39580488,  0.370399  ,  0.31777657,  0.23315878,  0.10313564,
-            0.05844507,  0.01445974,  0.00177719])
-
-    # Finding the median of xs:
-    >>> '%2.2f' % np.abs(qlevels2(xs,50)[0])
-    '0.00'
-
-    See also
-    --------
-    qlevels
-
-    """
-    q = 100 - np.atleast_1d(p)
-    return percentile(data, q, axis=-1, method=method)
-
-
-_PKDICT = {1: lambda k, w, n: (k - w) / (n - 1),
-           2: lambda k, w, n: (k - w / 2) / n,
-           3: lambda k, w, n: k / n,
-           4: lambda k, w, n: k / (n + 1),
-           5: lambda k, w, n: (k - w / 3) / (n + 1 / 3),
-           6: lambda k, w, n: (k - w * 3 / 8) / (n + 1 / 4)}
-
-
-def _compute_qth_weighted_percentile(a, q, axis, out, method, weights,
-                                     overwrite_input):
-    # normalise weight vector such that sum of the weight vector equals to n
-    q = np.atleast_1d(q) / 100.0
-    if (q < 0).any() or (q > 1).any():
-        raise ValueError("percentile must be in the range [0,100]")
-
-    shape0 = a.shape
-    if axis is None:
-        sorted_ = a.ravel()
-    else:
-        taxes = [i for i in range(a.ndim)]
-        taxes[-1], taxes[axis] = taxes[axis], taxes[-1]
-        sorted_ = np.transpose(a, taxes).reshape(-1, shape0[axis])
-
-    ind = sorted_.argsort(axis=-1)
-    if overwrite_input:
-        sorted_.sort(axis=-1)
-    else:
-        sorted_ = np.sort(sorted_, axis=-1)
-
-    w = np.atleast_1d(weights)
-    n = len(w)
-    w = w * n / w.sum()
-
-    # Work on each column separately because of weight vector
-    m = sorted_.shape[0]
-    nq = len(q)
-    y = np.zeros((m, nq))
-    pk_fun = _PKDICT.get(method, 1)
-    for i in range(m):
-        sortedW = w[ind[i]]            # rearrange the weight according to ind
-        k = sortedW.cumsum()           # cumulative weight
-        # different algorithm to compute percentile
-        pk = pk_fun(k, sortedW, n)
-        # Interpolation between pk and sorted_ for given value of q
-        y[i] = np.interp(q, pk, sorted_[i])
-    if axis is None:
-        return np.squeeze(y)
-    else:
-        shape1 = list(shape0)
-        shape1[axis], shape1[-1] = shape1[-1], nq
-        return np.squeeze(np.transpose(y.reshape(shape1), taxes))
-
-# method=1: p(k) = k/(n-1)
-# method=2: p(k) = (k+0.5)/n.
-# method=3: p(k) = (k+1)/n
-# method=4: p(k) = (k+1)/(n+1)
-# method=5: p(k) = (k+2/3)/(n+1/3)
-# method=6: p(k) = (k+5/8)/(n+1/4)
-
-_KDICT = {1: lambda p, n: p * (n - 1),
-          2: lambda p, n: p * n - 0.5,
-          3: lambda p, n: p * n - 1,
-          4: lambda p, n: p * (n + 1) - 1,
-          5: lambda p, n: p * (n + 1. / 3) - 2. / 3,
-          6: lambda p, n: p * (n + 1. / 4) - 5. / 8}
-
-
-def _compute_qth_percentile(sorted_, q, axis, out, method):
-    if not np.isscalar(q):
-        p = [_compute_qth_percentile(sorted_, qi, axis, None, method)
-             for qi in q]
-        if out is not None:
-            out.flat = p
-        return p
-
-    q = q / 100.0
-    if (q < 0) or (q > 1):
-        raise ValueError("percentile must be in the range [0,100]")
-
-    indexer = [slice(None)] * sorted_.ndim
-    Nx = sorted_.shape[axis]
-    k_fun = _KDICT.get(method, 1)
-    index = np.clip(k_fun(q, Nx), 0, Nx - 1)
-    i = int(index)
-    if i == index:
-        indexer[axis] = slice(i, i + 1)
-        weights1 = np.array(1)
-        sumval = 1.0
-    else:
-        indexer[axis] = slice(i, i + 2)
-        j = i + 1
-        weights1 = np.array([(j - index), (index - i)], float)
-        wshape = [1] * sorted_.ndim
-        wshape[axis] = 2
-        weights1.shape = wshape
-        sumval = weights1.sum()
-
-    # Use add.reduce in both cases to coerce data type as well as
-    # check and use out array.
-    return np.add.reduce(sorted_[indexer] * weights1,
-                         axis=axis, out=out) / sumval
-
-
-def percentile(a, q, axis=None, out=None, overwrite_input=False, method=1,
-               weights=None):
-    """Compute the qth percentile of the data along the specified axis.
-
-    Returns the qth percentile of the array elements.
-
-    Parameters
-    ----------
-    a : array_like
-        Input array or object that can be converted to an array.
-    q : float in range of [0,100] (or sequence of floats)
-        percentile to compute which must be between 0 and 100 inclusive
-    axis : {None, int}, optional
-        Axis along which the percentiles are computed. The default (axis=None)
-        is to compute the median along a flattened version of the array.
-    out : ndarray, optional
-        Alternative output array in which to place the result. It must
-        have the same shape and buffer length as the expected output,
-        but the type (of the output) will be cast if necessary.
-    overwrite_input : {False, True}, optional
-       If True, then allow use of memory of input array (a) for
-       calculations. The input array will be modified by the call to
-       median. This will save memory when you do not need to preserve
-       the contents of the input array. Treat the input as undefined,
-       but it will probably be fully or partially sorted. Default is
-       False. Note that, if `overwrite_input` is True and the input
-       is not already an ndarray, an error will be raised.
-    method : scalar integer
-        defining the interpolation method. Valid options are
-        1 : p[k] = k/(n-1). In this case, p[k] = mode[F(x[k])].
-                 This is used by S. (default)
-        2 : p[k] = (k+0.5)/n. That is a piecewise linear function where
-                 the knots are the values midway through the steps of the
-                 empirical cdf. This is popular amongst hydrologists.
-                 Matlab also uses this formula.
-        3 : p[k] = (k+1)/n. That is, linear interpolation of the empirical cdf.
-        4 : p[k] = (k+1)/(n+1). Thus p[k] = E[F(x[k])].
-                 This is used by Minitab and by SPSS.
-        5 : p[k] = (k+2/3)/(n+1/3). Then p[k] =~ median[F(x[k])].
-                 The resulting quantile estimates are approximately
-                 median-unbiased regardless of the distribution of x.
-        6 : p[k] = (k+5/8)/(n+1/4). The resulting quantile estimates are
-                 approximately unbiased for the expected order statistics
-                 if x is normally distributed.
-
-    Returns
-    -------
-    pcntile : ndarray
-        A new array holding the result (unless `out` is specified, in
-        which case that array is returned instead).  If the input contains
-        integers, or floats of smaller precision than 64, then the output
-        data-type is float64.  Otherwise, the output data-type is the same
-        as that of the input.
-
-    See Also
-    --------
-    mean, median
-
-    Notes
-    -----
-    Given a vector V of length N, the qth percentile of V is the qth ranked
-    value in a sorted copy of V.  A weighted average of the two nearest
-    neighbors is used if the normalized ranking does not match q exactly.
-    The same as the median if q is 0.5; the same as the min if q is 0;
-    and the same as the max if q is 1
-
-    Examples
-    --------
-    >>> import wafo.kdetools as wk
-    >>> a = np.array([[10, 7, 4], [3, 2, 1]])
-    >>> a
-    array([[10,  7,  4],
-           [ 3,  2,  1]])
-    >>> wk.percentile(a, 50)
-    3.5
-    >>> wk.percentile(a, 50, axis=0)
-    array([ 6.5,  4.5,  2.5])
-    >>> wk.percentile(a, 50, axis=0, weights=np.ones(2))
-    array([ 6.5,  4.5,  2.5])
-    >>> wk.percentile(a, 50, axis=1)
-    array([ 7.,  2.])
-    >>> wk.percentile(a, 50, axis=1, weights=np.ones(3))
-    array([ 7.,  2.])
-    >>> m = wk.percentile(a, 50, axis=0)
-    >>> out = np.zeros_like(m)
-    >>> wk.percentile(a, 50, axis=0, out=m)
-    array([ 6.5,  4.5,  2.5])
-    >>> m
-    array([ 6.5,  4.5,  2.5])
-    >>> b = a.copy()
-    >>> wk.percentile(b, 50, axis=1, overwrite_input=True)
-    array([ 7.,  2.])
-    >>> assert not np.all(a==b)
-    >>> b = a.copy()
-    >>> wk.percentile(b, 50, axis=None, overwrite_input=True)
-    3.5
-    >>> np.all(a==b)
-    True
-
-    """
-    a = np.asarray(a)
-    try:
-        if q == 0:
-            return a.min(axis=axis, out=out)
-        elif q == 100:
-            return a.max(axis=axis, out=out)
-    except:
-        pass
-    if weights is not None:
-        return _compute_qth_weighted_percentile(a, q, axis, out, method,
-                                                weights, overwrite_input)
-    elif overwrite_input:
-        if axis is None:
-            sorted_ = np.sort(a, axis=axis)
-        else:
-            a.sort(axis=axis)
-            sorted_ = a
-    else:
-        sorted_ = np.sort(a, axis=axis)
-    if axis is None:
-        axis = 0
-
-    return _compute_qth_percentile(sorted_, q, axis, out, method)
-
-
-def iqrange(data, axis=None):
-    """Returns the Inter Quartile Range of data.
-
-    Parameters
-    ----------
-    data : array-like
-        Input array or object that can be converted to an array.
-    axis : {None, int}, optional
-        Axis along which the percentiles are computed. The default (axis=None)
-        is to compute the median along a flattened version of the array.
-
-    Returns
-    -------
-    r : array-like
-        abs(np.percentile(data, 75, axis)-np.percentile(data, 25, axis))
-
-    Notes
-    -----
-    IQRANGE is a robust measure of spread. The use of interquartile range
-    guards against outliers if the distribution have heavy tails.
-
-    Example
-    -------
-    >>> a = np.arange(101)
-    >>> iqrange(a)
-    50.0
-
-    See also
-    --------
-    np.std
-
-    """
-    return np.abs(np.percentile(data, 75, axis=axis) -
-                  np.percentile(data, 25, axis=axis))
-
-
-def bitget(int_type, offset):
-    """Returns the value of the bit at the offset position in int_type.
-
-    Example
-    -------
-    >>> bitget(5, np.r_[0:4])
-    array([1, 0, 1, 0])
-
-    """
-    return np.bitwise_and(int_type, 1 << offset) >> offset
-
-
-def gridcount(data, X, y=1):
-    '''
-    Returns D-dimensional histogram using linear binning.
-
-    Parameters
-    ----------
-    data = column vectors with D-dimensional data, shape D x Nd
-    X    = row vectors defining discretization, shape D x N
-            Must include the range of the data.
-
-    Returns
-    -------
-    c    = gridcount,  shape N x N x ... x N
-
-    GRIDCOUNT obtains the grid counts using linear binning.
-    There are 2 strategies: simple- or linear- binning.
-    Suppose that an observation occurs at x and that the nearest point
-    below and above is y and z, respectively. Then simple binning strategy
-    assigns a unit weight to either y or z, whichever is closer. Linear
-    binning, on the other hand, assigns the grid point at y with the weight
-    of (z-x)/(z-y) and the gridpoint at z a weight of (y-x)/(z-y).
-
-    In terms of approximation error of using gridcounts as pdf-estimate,
-    linear binning is significantly more accurate than simple binning.
-
-     NOTE: The interval [min(X);max(X)] must include the range of the data.
-           The order of C is permuted in the same order as
-           meshgrid for D==2 or D==3.
-
-    Example
-    -------
-    >>> import numpy as np
-    >>> import wafo.kdetools as wk
-    >>> import pylab as plb
-    >>> N = 20
-    >>> data  = np.random.rayleigh(1,N)
-    >>> data = np.array(
-    ...    [ 1.07855907,  1.51199717,  1.54382893,  1.54774808,  1.51913566,
-    ...     1.11386486,  1.49146216,  1.51127214,  2.61287913,  0.94793051,
-    ...     2.08532731,  1.35510641,  0.56759888,  1.55766981,  0.77883602,
-    ...     0.9135759 ,  0.81177855,  1.02111483,  1.76334202,  0.07571454])
-    >>> x = np.linspace(0,max(data)+1,50)
-    >>> dx = x[1]-x[0]
-
-    >>> c = wk.gridcount(data, x)
-    >>> np.allclose(c[:5], [ 0.,  0.9731147,  0.0268853,  0.,  0.])
-    True
-
-    >>> pdf = c/dx/N
-    >>> np.allclose(np.trapz(pdf, x), 1)
-    True
-
-    h = plb.plot(x,c,'.')   # 1D histogram
-    h1 = plb.plot(x, pdf) #  1D probability density plot
-
-    See also
-    --------
-    bincount, accum, kdebin
-
-    Reference
-    ----------
-    Wand,M.P. and Jones, M.C. (1995)
-    'Kernel smoothing'
-    Chapman and Hall, pp 182-192
-    '''
-    dat = np.atleast_2d(data)
-    x = np.atleast_2d(X)
-    y = np.atleast_1d(y).ravel()
-    d = dat.shape[0]
-    d1, inc = x.shape
-
-    if d != d1:
-        raise ValueError('Dimension 0 of data and X do not match.')
-
-    dx = np.diff(x[:, :2], axis=1)
-    xlo = x[:, 0]
-    xup = x[:, -1]
-
-    datlo = dat.min(axis=1)
-    datup = dat.max(axis=1)
-    if ((datlo < xlo) | (xup < datup)).any():
-        raise ValueError('X does not include whole range of the data!')
-
-    csiz = np.repeat(inc, d)
-    use_sparse = False
-    if use_sparse:
-        acfun = accumsum  # faster than accum
-    else:
-        acfun = accumsum2  # accum
-
-    binx = np.asarray(np.floor((dat - xlo[:, newaxis]) / dx), dtype=int)
-    w = dx.prod()
-    abs = np.abs  # @ReservedAssignment
-    if d == 1:
-        x.shape = (-1,)
-        c = np.asarray((acfun(binx, (x[binx + 1] - dat) * y, size=(inc, )) +
-                        acfun(binx + 1, (dat - x[binx]) * y, size=(inc, ))) /
-                       w).ravel()
-    else:  # d>2
-
-        Nc = csiz.prod()
-        c = np.zeros((Nc,))
-
-        fact2 = np.asarray(np.reshape(inc * np.arange(d), (d, -1)), dtype=int)
-        fact1 = np.asarray(
-            np.reshape(csiz.cumprod() / inc, (d, -1)), dtype=int)
-        # fact1 = fact1(ones(n,1),:);
-        bt0 = [0, 0]
-        X1 = X.ravel()
-        for ir in range(2 ** (d - 1)):
-            bt0[0] = np.reshape(bitget(ir, np.arange(d)), (d, -1))
-            bt0[1] = 1 - bt0[0]
-            for ix in range(2):
-                one = np.mod(ix, 2)
-                two = np.mod(ix + 1, 2)
-                # Convert to linear index
-                # linear index to c
-                b1 = np.sum((binx + bt0[one]) * fact1, axis=0)
-                bt2 = bt0[two] + fact2
-                b2 = binx + bt2                     # linear index to X
-                c += acfun(
-                    b1, abs(np.prod(X1[b2] - dat, axis=0)) * y, size=(Nc,))
-
-        c = np.reshape(c / w, csiz, order='F')
-
-        T = [i for i in range(d)]
-        T[1], T[0] = T[0], T[1]
-        # make sure c is stored in the same way as meshgrid
-        c = c.transpose(*T)
-    return c
-
-
-def kde_demo1():
-    """KDEDEMO1 Demonstrate the smoothing parameter impact on KDE.
-
-    KDEDEMO1 shows the true density (dotted) compared to KDE based on 7
-    observations (solid) and their individual kernels (dashed) for 3
-    different values of the smoothing parameter, hs.
-
-    """
-
-    import scipy.stats as st
-    x = np.linspace(-4, 4, 101)
-    x0 = x / 2.0
-    data = np.random.normal(loc=0, scale=1.0, size=7)
-    kernel = Kernel('gauss')
-    hs = kernel.hns(data)
-    hVec = [hs / 2, hs, 2 * hs]
-
-    for ix, h in enumerate(hVec):
-        plt.figure(ix)
-        kde = KDE(data, hs=h, kernel=kernel)
-        f2 = kde(x, output='plot', title='h_s = %2.2f' % h, ylab='Density')
-        f2.plot('k-')
-
-        plt.plot(x, st.norm.pdf(x, 0, 1), 'k:')
-        n = len(data)
-        plt.plot(data, np.zeros(data.shape), 'bx')
-        y = kernel(x0) / (n * h * kernel.norm_factor(d=1, n=n))
-        for i in range(n):
-            plt.plot(data[i] + x0 * h, y, 'b--')
-            plt.plot([data[i], data[i]], [0, np.max(y)], 'b')
-
-        plt.axis([x.min(), x.max(), 0, 0.5])
-
-
-def kde_demo2():
-    '''Demonstrate the difference between transformation- and ordinary-KDE.
-
-    KDEDEMO2 shows that the transformation KDE is a better estimate for
-    Rayleigh distributed data around 0 than the ordinary KDE.
-    '''
-    import scipy.stats as st
-    data = st.rayleigh.rvs(scale=1, size=300)
-
-    x = np.linspace(1.5e-2, 5, 55)
-
-    kde = KDE(data)
-    f = kde(output='plot', title='Ordinary KDE (hs=%g)' % kde.hs)
-    plt.figure(0)
-    f.plot()
-
-    plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')
-
-    # plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable
-
-    tkde = TKDE(data, L2=0.5)
-    ft = tkde(x, output='plot', title='Transformation KDE (hs=%g)' %
-              tkde.tkde.hs)
-    plt.figure(1)
-    ft.plot()
-
-    plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')
-
-    plt.figure(0)
-
-
-def kde_demo3():
-    '''Demonstrate the difference between transformation and ordinary-KDE in 2D
-
-    KDEDEMO3 shows that the transformation KDE is a better estimate for
-    Rayleigh distributed data around 0 than the ordinary KDE.
-    '''
-    import scipy.stats as st
-    data = st.rayleigh.rvs(scale=1, size=(2, 300))
-
-    # x = np.linspace(1.5e-3, 5, 55)
-
-    kde = KDE(data)
-    f = kde(output='plot', title='Ordinary KDE', plotflag=1)
-    plt.figure(0)
-    f.plot()
-
-    plt.plot(data[0], data[1], '.')
-
-    # plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable
-
-    tkde = TKDE(data, L2=0.5)
-    ft = tkde.eval_grid_fast(
-        output='plot', title='Transformation KDE', plotflag=1)
-
-    plt.figure(1)
-    ft.plot()
-
-    plt.plot(data[0], data[1], '.')
-
-    plt.figure(0)
-
-
-def kde_demo4(N=50):
-    '''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
-       for 1D multimodal distributions
-
-    KDEDEMO4 shows that the improved Sheather-Jones plug-in smoothing is a
-    better compared to normal reference rules (in this case the hns)
-    '''
-    import scipy.stats as st
-
-    data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(N,)),
-                      st.norm.rvs(loc=-5, scale=1, size=(N,))))
-
-    # x = np.linspace(1.5e-3, 5, 55)
-
-    kde = KDE(data, kernel=Kernel('gauss', 'hns'))
-    f = kde(output='plot', title='Ordinary KDE', plotflag=1)
-
-    kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
-    f1 = kde1(output='plot', label='Ordinary KDE', plotflag=1)
-
-    plt.figure(0)
-    f.plot('r', label='hns=%g' % kde.hs)
-    # plt.figure(2)
-    f1.plot('b', label='hisj=%g' % kde1.hs)
-    x = np.linspace(-4, 4)
-    for loc in [-5, 5]:
-        plt.plot(x + loc, st.norm.pdf(x, 0, scale=1) / 2, 'k:',
-                 label='True density')
-    plt.legend()
-
-
-def kde_demo5(N=500):
-    '''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
-       for 2D multimodal distributions
-
-    KDEDEMO5 shows that the improved Sheather-Jones plug-in smoothing is better
-    compared to normal reference rules (in this case the hns)
-    '''
-    import scipy.stats as st
-
-    data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(2, N,)),
-                      st.norm.rvs(loc=-5, scale=1, size=(2, N,))))
-    kde = KDE(data, kernel=Kernel('gauss', 'hns'))
-    f = kde(output='plot', title='Ordinary KDE (hns=%g %g)' %
-            tuple(kde.hs.tolist()), plotflag=1)
-
-    kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
-    f1 = kde1(output='plot', title='Ordinary KDE (hisj=%g %g)' %
-              tuple(kde1.hs.tolist()), plotflag=1)
-
-    plt.figure(0)
-    plt.clf()
-    f.plot()
-    plt.plot(data[0], data[1], '.')
-    plt.figure(1)
-    plt.clf()
-    f1.plot()
-    plt.plot(data[0], data[1], '.')
-
-
-def kreg_demo1(hs=None, fast=False, fun='hisj'):
-    """"""
-    N = 100
-    # ei = np.random.normal(loc=0, scale=0.075, size=(N,))
-    ei = np.array([
-        -0.08508516, 0.10462496, 0.07694448, -0.03080661, 0.05777525,
-        0.06096313, -0.16572389, 0.01838912, -0.06251845, -0.09186784,
-        -0.04304887, -0.13365788, -0.0185279, -0.07289167, 0.02319097,
-        0.06887854, -0.08938374, -0.15181813, 0.03307712, 0.08523183,
-        -0.0378058, -0.06312874, 0.01485772, 0.06307944, -0.0632959,
-        0.18963205, 0.0369126, -0.01485447, 0.04037722, 0.0085057,
-        -0.06912903, 0.02073998, 0.1174351, 0.17599277, -0.06842139,
-        0.12587608, 0.07698113, -0.0032394, -0.12045792, -0.03132877,
-        0.05047314, 0.02013453, 0.04080741, 0.00158392, 0.10237899,
-        -0.09069682, 0.09242174, -0.15445323, 0.09190278, 0.07138498,
-        0.03002497, 0.02495252, 0.01286942, 0.06449978, 0.03031802,
-        0.11754861, -0.02322272, 0.00455867, -0.02132251, 0.09119446,
-        -0.03210086, -0.06509545, 0.07306443, 0.04330647, 0.078111,
-        -0.04146907, 0.05705476, 0.02492201, -0.03200572, -0.02859788,
-        -0.05893749, 0.00089538, 0.0432551, 0.04001474, 0.04888828,
-        -0.17708392, 0.16478644, 0.1171006, 0.11664846, 0.01410477,
-        -0.12458953, -0.11692081, 0.0413047, -0.09292439, -0.07042327,
-        0.14119701, -0.05114335, 0.04994696, -0.09520663, 0.04829406,
-        -0.01603065, -0.1933216, 0.19352763, 0.11819496, 0.04567619,
-        -0.08348306, 0.00812816, -0.00908206, 0.14528945, 0.02901065])
-    x = np.linspace(0, 1, N)
-
-    y0 = 2 * np.exp(-x ** 2 / (2 * 0.3 ** 2)) + \
-        3 * np.exp(-(x - 1) ** 2 / (2 * 0.7 ** 2))
-    y = y0 + ei
-    kernel = Kernel('gauss', fun=fun)
-    hopt = kernel.hisj(x)
-    kreg = KRegression(
-        x, y, p=0, hs=hs, kernel=kernel, xmin=-2 * hopt, xmax=1 + 2 * hopt)
-    if fast:
-        kreg.__call__ = kreg.eval_grid_fast
-
-    f = kreg(output='plot', title='Kernel regression', plotflag=1)
-    plt.figure(0)
-    f.plot(label='p=0')
-
-    kreg.p = 1
-    f1 = kreg(output='plot', title='Kernel regression', plotflag=1)
-    f1.plot(label='p=1')
-    # print(f1.data)
-    plt.plot(x, y, '.', label='data')
-    plt.plot(x, y0, 'k', label='True model')
-    plt.legend()
-
-    plt.show()
-
-    print(kreg.tkde.tkde.inv_hs)
-    print(kreg.tkde.tkde.hs)
-
-_TINY = np.finfo(float).machar.tiny
-_REALMIN = np.finfo(float).machar.xmin
-_REALMAX = np.finfo(float).machar.xmax
-_EPS = np.finfo(float).eps
-
-
-def _logit(p):
-    pc = p.clip(min=0, max=1)
-    return (np.log(pc) - np.log1p(-pc)).clip(min=-40, max=40)
-
-
-def _logitinv(x):
-    return 1.0 / (np.exp(-x) + 1)
-
-
-def _get_data(n=100, symmetric=False, loc1=1.1, scale1=0.6, scale2=1.0):
-    import scipy.stats as st
-    # from sg_filter import SavitzkyGolay
-    dist = st.norm
-
-    norm1 = scale2 * (dist.pdf(-loc1, loc=-loc1, scale=scale1) +
-                      dist.pdf(-loc1, loc=loc1, scale=scale1))
-
-    def fun1(x):
-        return ((dist.pdf(x, loc=-loc1, scale=scale1) +
-                 dist.pdf(x, loc=loc1, scale=scale1)) / norm1).clip(max=1.0)
-
-    x = np.sort(6 * np.random.rand(n, 1) - 3, axis=0)
-
-    y = (fun1(x) > np.random.rand(n, 1)).ravel()
-    # y = (np.cos(x)>2*np.random.rand(n, 1)-1).ravel()
-    x = x.ravel()
-
-    if symmetric:
-        xi = np.hstack((x.ravel(), -x.ravel()))
-        yi = np.hstack((y, y))
-        i = np.argsort(xi)
-        x = xi[i]
-        y = yi[i]
-    return x, y, fun1
-
-
-def kreg_demo2(n=100, hs=None, symmetric=False, fun='hisj', plotlog=False):
-    x, y, fun1 = _get_data(n, symmetric)
-    kreg_demo3(x, y, fun1, hs=None, fun='hisj', plotlog=False)
-
-
-def kreg_demo3(x, y, fun1, hs=None, fun='hisj', plotlog=False):
-    st = stats
-
-    alpha = 0.1
-    z0 = -_invnorm(alpha / 2)
-
-    n = x.size
-    hopt, hs1, hs2 = _get_regression_smooting(x, y, fun='hos')
-    if hs is None:
-        hs = hopt
-
-    forward = _logit
-    reverse = _logitinv
-    # forward = np.log
-    # reverse = np.exp
-
-    xmin, xmax = x.min(), x.max()
-    ni = max(2 * int((xmax - xmin) / hopt) + 3, 5)
-    print(ni)
-    print(xmin, xmax)
-    sml = hopt * 0.1
-    xi = np.linspace(xmin - sml, xmax + sml, ni)
-    xiii = np.linspace(xmin - sml, xmax + sml, 4 * ni + 1)
-
-    c = gridcount(x, xi)
-    if (y == 1).any():
-        c0 = gridcount(x[y == 1], xi)
-    else:
-        c0 = np.zeros(xi.shape)
-    yi = np.where(c == 0, 0, c0 / c)
-
-    kreg = KRegression(x, y, hs=hs, p=0)
-    fiii = kreg(xiii)
-    yiii = interpolate.interp1d(xi, yi)(xiii)
-    fit = fun1(xiii).clip(max=1.0)
-    df = np.diff(fiii)
-    eerr = np.abs((yiii - fiii)).std() + 0.5 * (df[:-1] * df[1:] < 0).sum() / n
-    err = (fiii - fit).std()
-    f = kreg(
-        xiii, output='plotobj',
-        title='%s err=%1.3f,eerr=%1.3f, n=%d, hs=%1.3f, hs1=%1.3f, hs2=%1.3f' %
-        (fun, err, eerr, n, hs, hs1, hs2), plotflag=1)
-
-    # yi[yi==0] = 1.0/(c[c!=0].min()+4)
-    # yi[yi==1] = 1-1.0/(c[c!=0].min()+4)
-    # yi[yi==0] = fi[yi==0]
-    # yi[yi==0] = np.exp(stineman_interp(xi[yi==0], xi[yi>0],np.log(yi[yi>0])))
-    # yi[yi==0] = fun1(xi[yi==0])
-    try:
-        yi[yi == 0] = yi[yi > 0].min() / sqrt(n)
-    except:
-        yi[yi == 0] = 1. / n
-    yi[yi == 1] = 1 - (1 - yi[yi < 1].max()) / sqrt(n)
-
-    logity = forward(yi)
-
-    gkreg = KRegression(xi, logity, hs=hs, xmin=xmin - hopt, xmax=xmax + hopt)
-    fg = gkreg.eval_grid(
-        xi, output='plotobj', title='Kernel regression', plotflag=1)
-    sa = (fg.data - logity).std()
-    sa2 = iqrange(fg.data - logity) / 1.349
-    # print('sa=%g %g' % (sa, sa2))
-    sa = min(sa, sa2)
-
-#    plt.figure(1)
-#    plt.plot(xi, slogity-logity,'r.')
-# plt.plot(xi, logity-,'b.')
-#    plt.plot(xi, fg.data-logity, 'b.')
-#    plt.show()
-#    return
-
-    fg = gkreg.eval_grid(
-        xiii, output='plotobj', title='Kernel regression', plotflag=1)
-    pi = reverse(fg.data)
-
-    dx = xi[1] - xi[0]
-    ckreg = KDE(x, hs=hs)
-    # ci = ckreg.eval_grid_fast(xi)*n*dx
-    ciii = ckreg.eval_grid_fast(xiii) * dx * x.size  # n*(1+symmetric)
-
-#    sa1 = np.sqrt(1./(ciii*pi*(1-pi)))
-#    plo3 = reverse(fg.data-z0*sa)
-#    pup3 = reverse(fg.data+z0*sa)
-    fg.data = pi
-    pi = f.data
-
-    # ref Casella and Berger (1990) "Statistical inference" pp444
-#    a = 2*pi + z0**2/(ciii+1e-16)
-#    b = 2*(1+z0**2/(ciii+1e-16))
-#    plo2 = ((a-sqrt(a**2-2*pi**2*b))/b).clip(min=0,max=1)
-#    pup2 = ((a+sqrt(a**2-2*pi**2*b))/b).clip(min=0,max=1)
-    # Jeffreys intervall a=b=0.5
-    # st.beta.isf(alpha/2, x+a, n-x+b)
-    ab = 0.07  # 0.055
-    pi1 = pi  # fun1(xiii)
-    pup2 = np.where(pi == 1,
-                    1,
-                    st.beta.isf(alpha / 2,
-                                ciii * pi1 + ab,
-                                ciii * (1 - pi1) + ab))
-    plo2 = np.where(pi == 0,
-                    0,
-                    st.beta.isf(1 - alpha / 2,
-                                ciii * pi1 + ab,
-                                ciii * (1 - pi1) + ab))
-
-    averr = np.trapz(pup2 - plo2, xiii) / \
-        (xiii[-1] - xiii[0]) + 0.5 * (df[:-1] * df[1:] < 0).sum()
-
-    # f2 = kreg_demo4(x, y, hs, hopt)
-    # Wilson score
-    den = 1 + (z0 ** 2. / ciii)
-    xc = (pi1 + (z0 ** 2) / (2 * ciii)) / den
-    halfwidth = (z0 * sqrt((pi1 * (1 - pi1) / ciii) +
-                           (z0 ** 2 / (4 * (ciii ** 2))))) / den
-    plo = (xc - halfwidth).clip(min=0)  # wilson score
-    pup = (xc + halfwidth).clip(max=1.0)  # wilson score
-    # pup = (pi + z0*np.sqrt(pi*(1-pi)/ciii)).clip(min=0,max=1) # dont use
-    # plo = (pi - z0*np.sqrt(pi*(1-pi)/ciii)).clip(min=0,max=1)
-
-    # mi = kreg.eval_grid(x)
-    # sigma = (stineman_interp(x, xiii, pup)-stineman_interp(x, xiii, plo))/4
-    # aic = np.abs((y-mi)/sigma).std()+ 0.5*(df[:-1]*df[1:]<0).sum()/n
-    # aic = np.abs((yiii-fiii)/(pup-plo)).std() + \
-    #                0.5*(df[:-1]*df[1:]<0).sum() + \
-    #            ((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum()
-
-    k = (df[:-1] * df[1:] < 0).sum()  # numpeaks
-    sigmai = (pup - plo)
-    aic = (((yiii - fiii) / sigmai) ** 2).sum() + \
-        2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \
-        np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum()
-
-    # aic = (((yiii-fiii)/sigmai)**2).sum()+ 2*k*(k+1)/(ni-k+1) + \
-    #        np.abs((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum()
-
-    # aic = averr + ((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum()
-
-    fg.plot(label='KReg grid aic=%2.3f' % (aic))
-    f.plot(label='KReg averr=%2.3f ' % (averr))
-    labtxt = '%d CI' % (int(100 * (1 - alpha)))
-    plt.fill_between(xiii, pup, plo, alpha=0.20,
-                     color='r', linestyle='--', label=labtxt)
-    plt.fill_between(xiii, pup2, plo2, alpha=0.20, color='b',
-                     linestyle=':', label='%d CI2' % (int(100 * (1 - alpha))))
-    plt.plot(xiii, fun1(xiii), 'r', label='True model')
-    plt.scatter(xi, yi, label='data')
-    print('maxp = %g' % (np.nanmax(f.data)))
-    print('hs = %g' % (kreg.tkde.tkde.hs))
-    plt.legend()
-    h = plt.gca()
-    if plotlog:
-        plt.setp(h, yscale='log')
-    # plt.show()
-    return hs1, hs2
-
-
-def kreg_demo4(x, y, hs, hopt, alpha=0.05):
-    st = stats
-
-    n = x.size
-    xmin, xmax = x.min(), x.max()
-    ni = max(2 * int((xmax - xmin) / hopt) + 3, 5)
-
-    sml = hopt * 0.1
-    xi = np.linspace(xmin - sml, xmax + sml, ni)
-    xiii = np.linspace(xmin - sml, xmax + sml, 4 * ni + 1)
-
-    kreg = KRegression(x, y, hs=hs, p=0)
-
-    dx = xi[1] - xi[0]
-    ciii = kreg.tkde.eval_grid_fast(xiii) * dx * x.size
-#    ckreg = KDE(x,hs=hs)
-# ciiii = ckreg.eval_grid_fast(xiii)*dx* x.size #n*(1+symmetric)
-
-    f = kreg(xiii, output='plotobj')  # , plot_kwds=dict(plotflag=7))
-    pi = f.data
-
-    # Jeffreys intervall a=b=0.5
-    # st.beta.isf(alpha/2, x+a, n-x+b)
-    ab = 0.07  # 0.5
-    pi1 = pi
-    pup = np.where(pi1 == 1, 1, st.beta.isf(
-        alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab))
-    plo = np.where(pi1 == 0, 0, st.beta.isf(
-        1 - alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab))
-
-    # Wilson score
-    # z0 = -_invnorm(alpha/2)
-#    den = 1+(z0**2./ciii);
-#    xc=(pi1+(z0**2)/(2*ciii))/den;
-#    halfwidth=(z0*sqrt((pi1*(1-pi1)/ciii)+(z0**2/(4*(ciii**2)))))/den
-# plo2 = (xc-halfwidth).clip(min=0) # wilson score
-# pup2 = (xc+halfwidth).clip(max=1.0) # wilson score
-    # f.dataCI = np.vstack((plo,pup)).T
-    f.prediction_error_avg = np.trapz(pup - plo, xiii) / (xiii[-1] - xiii[0])
-    fiii = f.data
-
-    c = gridcount(x, xi)
-    if (y == 1).any():
-        c0 = gridcount(x[y == 1], xi)
-    else:
-        c0 = np.zeros(xi.shape)
-    yi = np.where(c == 0, 0, c0 / c)
-
-    f.children = [PlotData([plo, pup], xiii, plotmethod='fill_between',
-                           plot_kwds=dict(alpha=0.2, color='r')),
-                  PlotData(yi, xi, plotmethod='scatter',
-                           plot_kwds=dict(color='r', s=5))]
-
-    yiii = interpolate.interp1d(xi, yi)(xiii)
-    df = np.diff(fiii)
-    k = (df[:-1] * df[1:] < 0).sum()  # numpeaks
-    sigmai = (pup - plo)
-    aicc = (((yiii - fiii) / sigmai) ** 2).sum() + \
-        2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \
-        np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum()
-
-    f.aicc = aicc
-    f.labels.title = 'perr=%1.3f,aicc=%1.3f, n=%d, hs=%1.3f' % (
-        f.prediction_error_avg, aicc, n, hs)
-
-    return f
-
-
-def check_kreg_demo3():
-
-    plt.ion()
-    k = 0
-    for n in [50, 100, 300, 600, 4000]:
-        x, y, fun1 = _get_data(
-            n, symmetric=True, loc1=1.0, scale1=0.6, scale2=1.25)
-        k0 = k
-
-        for fun in ['hste', ]:
-            hsmax, _hs1, _hs2 = _get_regression_smooting(x, y, fun=fun)
-            for hi in np.linspace(hsmax * 0.25, hsmax, 9):
-                plt.figure(k)
-                k += 1
-                unused = kreg_demo3(x, y, fun1, hs=hi, fun=fun, plotlog=False)
-
-            # kreg_demo2(n=n,symmetric=True,fun='hste', plotlog=False)
-        fig.tile(range(k0, k))
-    plt.ioff()
-    plt.show()
-
-
-def check_kreg_demo4():
-    plt.ion()
-    # test_docstrings()
-    # kde_demo2()
-    # kreg_demo1(fast=True)
-    # kde_gauss_demo()
-    # kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True)
-    k = 0
-    for _i, n in enumerate([100, 300, 600, 4000]):
-        x, y, fun1 = _get_data(
-            n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75)
-        # k0 = k
-        hopt1, _h1, _h2 = _get_regression_smooting(x, y, fun='hos')
-        hopt2, _h1, _h2 = _get_regression_smooting(x, y, fun='hste')
-        hopt = sqrt(hopt1 * hopt2)
-        # hopt = _get_regression_smooting(x,y,fun='hos')[0]
-        for _j, fun in enumerate(['hste']):  # , 'hisj', 'hns', 'hstt'
-            hsmax, _hs1, _hs2 = _get_regression_smooting(x, y, fun=fun)
-
-            fmax = kreg_demo4(x, y, hsmax + 0.1, hopt)
-            for hi in np.linspace(hsmax * 0.1, hsmax, 55):
-                f = kreg_demo4(x, y, hi, hopt)
-                if f.aicc <= fmax.aicc:
-                    fmax = f
-            plt.figure(k)
-            k += 1
-            fmax.plot()
-            plt.plot(x, fun1(x), 'r')
-
-            # kreg_demo2(n=n,symmetric=True,fun='hste', plotlog=False)
-    fig.tile(range(0, k))
-    plt.ioff()
-    plt.show()
-
-
-def check_regression_bin():
-    plt.ion()
-    # test_docstrings()
-    # kde_demo2()
-    # kreg_demo1(fast=True)
-    # kde_gauss_demo()
-    # kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True)
-    k = 0
-    for _i, n in enumerate([100, 300, 600, 4000]):
-        x, y, fun1 = _get_data(
-            n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75)
-        fbest = regressionbin(x, y, alpha=0.05, color='g', label='Transit_D')
-
-        figk = plt.figure(k)
-        ax = figk.gca()
-        k += 1
-        fbest.labels.title = 'N = %d' % n
-        fbest.plot(axis=ax)
-        ax.plot(x, fun1(x), 'r')
-        ax.legend(frameon=False, markerscale=4)
-        # ax = plt.gca()
-        ax.set_yticklabels(ax.get_yticks() * 100.0)
-        ax.grid(True)
-
-    fig.tile(range(0, k))
-    plt.ioff()
-    plt.show()
-
-
-def check_bkregression():
-    plt.ion()
-    k = 0
-    for _i, n in enumerate([50, 100, 300, 600]):
-        x, y, fun1 = _get_data(
-            n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75)
-        bkreg = BKRegression(x, y)
-        fbest = bkreg.prb_search_best(
-            hsfun='hste', alpha=0.05, color='g', label='Transit_D')
-
-        figk = plt.figure(k)
-        ax = figk.gca()
-        k += 1
-#        fbest.score.plot(axis=ax)
-#        axsize = ax.axis()
-#        ax.vlines(fbest.hs,axsize[2]+1,axsize[3])
-#        ax.set(yscale='log')
-        fbest.labels.title = 'N = %d' % n
-        fbest.plot(axis=ax)
-        ax.plot(x, fun1(x), 'r')
-        ax.legend(frameon=False, markerscale=4)
-        # ax = plt.gca()
-        ax.set_yticklabels(ax.get_yticks() * 100.0)
-        ax.grid(True)
-
-    fig.tile(range(0, k))
-    plt.ioff()
-    plt.show()
-
-
-def _get_regression_smooting(x, y, fun='hste'):
-    hs1 = Kernel('gauss', fun=fun).get_smoothing(x)
-    # hx = np.median(np.abs(x-np.median(x)))/0.6745*(4.0/(3*n))**0.2
-    if (y == 1).any():
-        hs2 = Kernel('gauss', fun=fun).get_smoothing(x[y == 1])
-        # hy = np.median(np.abs(y-np.mean(y)))/0.6745*(4.0/(3*n))**0.2
-    else:
-        hs2 = 4 * hs1
-        # hy = 4*hx
-
-    # hy2 = Kernel('gauss', fun=fun).get_smoothing(y)
-    # kernel = Kernel('gauss',fun=fun)
-    # hopt = (hs1+2*hs2)/3
-    # hopt = (hs1+4*hs2)/5 #kernel.get_smoothing(x)
-    # hopt = hs2
-    hopt = sqrt(hs1 * hs2)
-    return hopt, hs1, hs2
-
-
-def empirical_bin_prb(x, y, hopt, color='r'):
-    """Returns empirical binomial probabiltity.
-
-    Parameters
-    ----------
-    x : ndarray
-        position ve
-    y : ndarray
-        binomial response variable (zeros and ones)
-
-    Returns
-    -------
-    P(x) : PlotData object
-        empirical probability
-
-    """
-    xmin, xmax = x.min(), x.max()
-    ni = max(2 * int((xmax - xmin) / hopt) + 3, 5)
-
-    sml = hopt  # *0.1
-    xi = np.linspace(xmin - sml, xmax + sml, ni)
-
-    c = gridcount(x, xi)
-    if (y == 1).any():
-        c0 = gridcount(x[y == 1], xi)
-    else:
-        c0 = np.zeros(xi.shape)
-    yi = np.where(c == 0, 0, c0 / c)
-    return PlotData(yi, xi, plotmethod='scatter',
-                    plot_kwds=dict(color=color, s=5))
-
-
-def smoothed_bin_prb(x, y, hs, hopt, alpha=0.05, color='r', label='',
-                     bin_prb=None):
-    '''
-    Parameters
-    ----------
-    x,y
-    hs : smoothing parameter
-    hopt : spacing in empirical_bin_prb
-    alpha : confidence level
-    color : color of plot object
-    bin_prb : PlotData object with empirical bin prb
-    '''
-    if bin_prb is None:
-        bin_prb = empirical_bin_prb(x, y, hopt, color)
-
-    xi = bin_prb.args
-    yi = bin_prb.data
-    ni = len(xi)
-    dxi = xi[1] - xi[0]
-
-    n = x.size
-
-    xiii = np.linspace(xi[0], xi[-1], 10 * ni + 1)
-
-    kreg = KRegression(x, y, hs=hs, p=0)
-    # expected number of data in each bin
-    ciii = kreg.tkde.eval_grid_fast(xiii) * dxi * n
-
-    f = kreg(xiii, output='plotobj')  # , plot_kwds=dict(plotflag=7))
-    pi = f.data
-
-    st = stats
-    # Jeffreys intervall a=b=0.5
-    # st.beta.isf(alpha/2, x+a, n-x+b)
-    ab = 0.07  # 0.5
-    pi1 = pi
-    pup = np.where(pi1 == 1, 1, st.beta.isf(
-        alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab))
-    plo = np.where(pi1 == 0, 0, st.beta.isf(
-        1 - alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab))
-
-    # Wilson score
-    # z0 = -_invnorm(alpha/2)
-#    den = 1+(z0**2./ciii);
-#    xc=(pi1+(z0**2)/(2*ciii))/den;
-#    halfwidth=(z0*sqrt((pi1*(1-pi1)/ciii)+(z0**2/(4*(ciii**2)))))/den
-# plo2 = (xc-halfwidth).clip(min=0) # wilson score
-# pup2 = (xc+halfwidth).clip(max=1.0) # wilson score
-    # f.dataCI = np.vstack((plo,pup)).T
-    f.prediction_error_avg = np.trapz(pup - plo, xiii) / (xiii[-1] - xiii[0])
-    fiii = f.data
-
-    f.plot_kwds['color'] = color
-    f.plot_kwds['linewidth'] = 2
-    if label:
-        f.plot_kwds['label'] = label
-    f.children = [PlotData([plo, pup], xiii, plotmethod='fill_between',
-                           plot_kwds=dict(alpha=0.2, color=color)),
-                  bin_prb]
-
-    yiii = interpolate.interp1d(xi, yi)(xiii)
-    df = np.diff(fiii)
-    k = (df[:-1] * df[1:] < 0).sum()  # numpeaks
-    sigmai = (pup - plo)
-    aicc = (((yiii - fiii) / sigmai) ** 2).sum() + \
-        2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \
-        np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum()
-
-    f.aicc = aicc
-    f.fun = kreg
-    f.labels.title = 'perr=%1.3f,aicc=%1.3f, n=%d, hs=%1.3f' % (
-        f.prediction_error_avg, aicc, n, hs)
-
-    return f
-
-
-def regressionbin(x, y, alpha=0.05, color='r', label=''):
-    """Return kernel regression estimate for binomial data.
-
-    Parameters
-    ----------
-    x : arraylike
-        positions
-    y : arraylike
-        of 0 and 1
-
-    """
-
-    hopt1, _h1, _h2 = _get_regression_smooting(x, y, fun='hos')
-    hopt2, _h1, _h2 = _get_regression_smooting(x, y, fun='hste')
-    hopt = sqrt(hopt1 * hopt2)
-
-    fbest = smoothed_bin_prb(x, y, hopt2 + 0.1, hopt, alpha, color, label)
-    bin_prb = fbest.children[-1]
-    for fun in ['hste']:  # , 'hisj', 'hns', 'hstt'
-        hsmax, _hs1, _hs2 = _get_regression_smooting(x, y, fun=fun)
-        for hi in np.linspace(hsmax * 0.1, hsmax, 55):
-            f = smoothed_bin_prb(x, y, hi, hopt, alpha, color, label, bin_prb)
-            if f.aicc <= fbest.aicc:
-                fbest = f
-                # hbest = hi
-    return fbest
-
-
-def kde_gauss_demo(n=50):
-    """KDEDEMO Demonstrate the KDEgauss.
-
-    KDEDEMO1 shows the true density (dotted) compared to KDE based on 7
-    observations (solid) and their individual kernels (dashed) for 3
-    different values of the smoothing parameter, hs.
-
-    """
-
-    st = stats
-    # x = np.linspace(-4, 4, 101)
-    # data = np.random.normal(loc=0, scale=1.0, size=n)
-    # data = np.random.exponential(scale=1.0, size=n)
-#    n1 = 128
-#    I = (np.arange(n1)*pi)**2 *0.01*0.5
-#    kw = exp(-I)
-#    plt.plot(idctn(kw))
-#    return
-    # dist = st.norm
-    dist = st.expon
-    data = dist.rvs(loc=0, scale=1.0, size=n)
-    d, _N = np.atleast_2d(data).shape
-
-    if d == 1:
-        plot_options = [dict(color='red'), dict(
-            color='green'), dict(color='black')]
-    else:
-        plot_options = [dict(colors='red'), dict(colors='green'),
-                        dict(colors='black')]
-
-    plt.figure(1)
-    kde0 = KDE(data, kernel=Kernel('gauss', 'hste'))
-    f0 = kde0.eval_grid_fast(output='plot', ylab='Density')
-    f0.plot(**plot_options[0])
-
-    kde1 = TKDE(data, kernel=Kernel('gauss', 'hisj'), L2=.5)
-    f1 = kde1.eval_grid_fast(output='plot', ylab='Density')
-    f1.plot(**plot_options[1])
-
-    kde2 = KDEgauss(data)
-    f2 = kde2(output='plot', ylab='Density')
-    x = f2.args
-    f2.plot(**plot_options[2])
-
-    fmax = dist.pdf(x, 0, 1).max()
-    if d == 1:
-        plt.plot(x, dist.pdf(x, 0, 1), 'k:')
-        plt.axis([x.min(), x.max(), 0, fmax])
-    plt.show()
-    print(fmax / f2.data.max())
-    format_ = ''.join(('%g, ') * d)
-    format_ = 'hs0=%s hs1=%s hs2=%s' % (format_, format_, format_)
-    print(format_ % tuple(kde0.hs.tolist() +
-                          kde1.tkde.hs.tolist() + kde2.hs.tolist()))
-    print('inc0 = %d, inc1 = %d, inc2 = %d' % (kde0.inc, kde1.inc, kde2.inc))
-
-
-def test_kde():
-    data = np.array([
-        0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887,
-        1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326,
-        1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487,
-        1.8919469, 0.72433808, 1.92973094, 0.44749838, 1.36508452])
-
-    x = np.linspace(0.01, max(data.ravel()) + 1, 10)
-    kde = TKDE(data, hs=0.5, L2=0.5)
-    _f = kde(x)
-    # f = array([1.03982714, 0.45839018, 0.39514782, 0.32860602, 0.26433318,
-    #   0.20717946,  0.15907684,  0.1201074 ,  0.08941027,  0.06574882])
-
-    _f1 = kde.eval_grid(x)
-    # array([ 1.03982714,  0.45839018,  0.39514782,  0.32860602,  0.26433318,
-    #        0.20717946,  0.15907684,  0.1201074 ,  0.08941027,  0.06574882])
-
-    _f2 = kde.eval_grid_fast(x)
-    # array([ 1.06437223,  0.46203314,  0.39593137,  0.32781899,  0.26276433,
-    #        0.20532206,  0.15723498,  0.11843998,  0.08797755,  0.        ])
-
-
-def test_docstrings():
-    import doctest
-    print('Testing docstrings in %s' % __file__)
-    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
-
-
-if __name__ == '__main__':
-    test_docstrings()
-    # test_kde()
-    # check_bkregression()
-    # check_regression_bin()
-    # check_kreg_demo3()
-    # check_kreg_demo4()
-
-    # test_smoothn_1d()
-    # test_smoothn_2d()
-
-    # kde_demo2()
-    # kreg_demo1(fast=True)
-    # kde_gauss_demo()
-    # kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True)
-    # plt.show('hold')
diff --git a/wafo/kdetools/__init__.py b/wafo/kdetools/__init__.py
new file mode 100644
index 0000000..f113919
--- /dev/null
+++ b/wafo/kdetools/__init__.py
@@ -0,0 +1,3 @@
+from .kdetools import *  #@PydevCodeAnalysisIgnore
+from .gridding import *  #@PydevCodeAnalysisIgnore
+from .kernels import *  #@PydevCodeAnalysisIgnore
diff --git a/wafo/kdetools/gridding.py b/wafo/kdetools/gridding.py
new file mode 100644
index 0000000..8afc712
--- /dev/null
+++ b/wafo/kdetools/gridding.py
@@ -0,0 +1,324 @@
+'''
+Created on 15. des. 2016
+
+@author: pab
+'''
+from __future__ import division
+from scipy import sparse
+import numpy as np
+from wafo.testing import test_docstrings
+from itertools import product
+
+__all__ = ['accum',  'gridcount']
+
+
+def bitget(int_type, offset):
+    """Returns the value of the bit at the offset position in int_type.
+
+    Example
+    -------
+    >>> bitget(5, np.r_[0:4])
+    array([1, 0, 1, 0])
+
+    """
+    return np.bitwise_and(int_type, 1 << offset) >> offset
+
+
+def accumsum(accmap, a, shape, dtype=None):
+    """
+    Example
+    -------
+    >>> from numpy import array
+    >>> a = array([[1,2,3],[4,-1,6],[-1,8,9]])
+    >>> a
+    array([[ 1,  2,  3],
+           [ 4, -1,  6],
+           [-1,  8,  9]])
+    >>> # Sum the diagonals.
+    >>> accmap = array([[0,1,2],[2,0,1],[1,2,0]])
+    >>> s = accumsum(accmap, a, (3,)
+    >>> s
+    array([ 9,  7, 15])
+
+    """
+    if dtype is None:
+        dtype = a.dtype
+    shape = np.atleast_1d(shape)
+    if len(shape) > 1:
+        binx = accmap[:, 0]
+        biny = accmap[:, 1]
+        out = sparse.coo_matrix(
+            (a.ravel(), (binx, biny)), shape=shape, dtype=dtype).tocsr()
+    else:
+        binx = accmap.ravel()
+        zero = np.zeros(len(binx))
+        out = sparse.coo_matrix(
+            (a.ravel(), (binx, zero)), shape=(shape, 1), dtype=dtype).tocsr()
+    return out
+
+
+def accumsum2(accmap, a, shape):
+    """
+    Example
+    -------
+    >>> from numpy import array
+    >>> a = array([[1,2,3],[4,-1,6],[-1,8,9]])
+    >>> a
+    array([[ 1,  2,  3],
+           [ 4, -1,  6],
+           [-1,  8,  9]])
+    >>> # Sum the diagonals.
+    >>> accmap = array([[0,1,2],[2,0,1],[1,2,0]])
+    >>> s = accumsum2(accmap, a, (3,)
+    >>> s
+    array([ 9,  7, 15])
+
+    """
+    return np.bincount(accmap.ravel(), a.ravel(), np.array(shape).max())
+
+
+def accum(accmap, a, func=None, size=None, fill_value=0, dtype=None):
+    """An accumulation function similar to Matlab's `accumarray` function.
+
+    Parameters
+    ----------
+    accmap : ndarray
+        This is the "accumulation map".  It maps input (i.e. indices into
+        `a`) to their destination in the output array.  The first `a.ndim`
+        dimensions of `accmap` must be the same as `a.shape`.  That is,
+        `accmap.shape[:a.ndim]` must equal `a.shape`.  For example, if `a`
+        has shape (15,4), then `accmap.shape[:2]` must equal (15,4).  In this
+        case `accmap[i,j]` gives the index into the output array where
+        element (i,j) of `a` is to be accumulated.  If the output is, say,
+        a 2D, then `accmap` must have shape (15,4,2).  The value in the
+        last dimension give indices into the output array. If the output is
+        1D, then the shape of `accmap` can be either (15,4) or (15,4,1)
+    a : ndarray
+        The input data to be accumulated.
+    func : callable or None
+        The accumulation function.  The function will be passed a list
+        of values from `a` to be accumulated.
+        If None, numpy.sum is assumed.
+    size : ndarray or None
+        The size of the output array.  If None, the size will be determined
+        from `accmap`.
+    fill_value : scalar
+        The default value for elements of the output array.
+    dtype : numpy data type, or None
+        The data type of the output array.  If None, the data type of
+        `a` is used.
+
+    Returns
+    -------
+    out : ndarray
+        The accumulated results.
+
+        The shape of `out` is `size` if `size` is given.  Otherwise the
+        shape is determined by the (lexicographically) largest indices of
+        the output found in `accmap`.
+
+
+    Examples
+    --------
+    >>> from numpy import array, prod
+    >>> a = array([[1,2,3],[4,-1,6],[-1,8,9]])
+    >>> a
+    array([[ 1,  2,  3],
+           [ 4, -1,  6],
+           [-1,  8,  9]])
+    >>> # Sum the diagonals.
+    >>> accmap = array([[0,1,2],[2,0,1],[1,2,0]])
+    >>> s = accum(accmap, a)
+    >>> s
+    array([ 9,  7, 15])
+    >>> # A 2D output, from sub-arrays with shapes and positions like this:
+    >>> # [ (2,2) (2,1)]
+    >>> # [ (1,2) (1,1)]
+    >>> accmap = array([
+    ...        [[0,0],[0,0],[0,1]],
+    ...        [[0,0],[0,0],[0,1]],
+    ...        [[1,0],[1,0],[1,1]]])
+    >>> # Accumulate using a product.
+    >>> accum(accmap, a, func=prod, dtype=float)
+    array([[ -8.,  18.],
+           [ -8.,   9.]])
+    >>> # Same accmap, but create an array of lists of values.
+    >>> accum(accmap, a, func=lambda x: x, dtype='O')
+    array([[[1, 2, 4, -1], [3, 6]],
+           [[-1, 8], [9]]], dtype=object)
+
+    """
+
+    def create_array_of_python_lists(accmap, a, size):
+        vals = np.empty(size, dtype='O')
+        for s in product(*[range(k) for k in size]):
+            vals[s] = []
+
+        for s in product(*[range(k) for k in a.shape]):
+            indx = tuple(accmap[s])
+            val = a[s]
+            vals[indx].append(val)
+
+        return vals
+
+    # Check for bad arguments and handle the defaults.
+    if accmap.shape[:a.ndim] != a.shape:
+        raise ValueError(
+            "The initial dimensions of accmap must be the same as a.shape")
+    if func is None:
+        func = np.sum
+    if dtype is None:
+        dtype = a.dtype
+    if accmap.shape == a.shape:
+        accmap = np.expand_dims(accmap, -1)
+    adims = tuple(range(a.ndim))
+    if size is None:
+        size = 1 + np.squeeze(np.apply_over_axes(np.max, accmap, axes=adims))
+    size = np.atleast_1d(size)
+
+    # Create an array of python lists of values.
+    vals = create_array_of_python_lists(accmap, a, size)
+
+    # Create the output array.
+    out = np.empty(size, dtype=dtype)
+    for s in np.product(*[range(k) for k in size]):
+        if vals[s] == []:
+            out[s] = fill_value
+        else:
+            out[s] = func(vals[s])
+    return out
+
+
+def gridcount(data, X, y=1):
+    '''
+    Returns D-dimensional histogram using linear binning.
+
+    Parameters
+    ----------
+    data = column vectors with D-dimensional data, shape D x Nd
+    X    = row vectors defining discretization, shape D x N
+            Must include the range of the data.
+
+    Returns
+    -------
+    c    = gridcount,  shape N x N x ... x N
+
+    GRIDCOUNT obtains the grid counts using linear binning.
+    There are 2 strategies: simple- or linear- binning.
+    Suppose that an observation occurs at x and that the nearest point
+    below and above is y and z, respectively. Then simple binning strategy
+    assigns a unit weight to either y or z, whichever is closer. Linear
+    binning, on the other hand, assigns the grid point at y with the weight
+    of (z-x)/(z-y) and the gridpoint at z a weight of (y-x)/(z-y).
+
+    In terms of approximation error of using gridcounts as pdf-estimate,
+    linear binning is significantly more accurate than simple binning.
+
+     NOTE: The interval [min(X);max(X)] must include the range of the data.
+           The order of C is permuted in the same order as
+           meshgrid for D==2 or D==3.
+
+    Example
+    -------
+    >>> import numpy as np
+    >>> import wafo.kdetools as wk
+    >>> import pylab as plb
+    >>> N = 20
+    >>> data  = np.random.rayleigh(1,N)
+    >>> data = np.array(
+    ...    [ 1.07855907,  1.51199717,  1.54382893,  1.54774808,  1.51913566,
+    ...     1.11386486,  1.49146216,  1.51127214,  2.61287913,  0.94793051,
+    ...     2.08532731,  1.35510641,  0.56759888,  1.55766981,  0.77883602,
+    ...     0.9135759 ,  0.81177855,  1.02111483,  1.76334202,  0.07571454])
+    >>> x = np.linspace(0,max(data)+1,50)
+    >>> dx = x[1]-x[0]
+
+    >>> c = wk.gridcount(data, x)
+    >>> np.allclose(c[:5], [ 0.,  0.9731147,  0.0268853,  0.,  0.])
+    True
+
+    >>> pdf = c/dx/N
+    >>> np.allclose(np.trapz(pdf, x), 1)
+    True
+
+    h = plb.plot(x,c,'.')   # 1D histogram
+    h1 = plb.plot(x, pdf) #  1D probability density plot
+
+    See also
+    --------
+    bincount, accum, kdebin
+
+    Reference
+    ----------
+    Wand,M.P. and Jones, M.C. (1995)
+    'Kernel smoothing'
+    Chapman and Hall, pp 182-192
+    '''
+    dat = np.atleast_2d(data)
+    x = np.atleast_2d(X)
+    y = np.atleast_1d(y).ravel()
+    d = dat.shape[0]
+    d1, inc = x.shape
+
+    if d != d1:
+        raise ValueError('Dimension 0 of data and X do not match.')
+
+    dx = np.diff(x[:, :2], axis=1)
+    xlo = x[:, 0]
+    xup = x[:, -1]
+
+    datlo = dat.min(axis=1)
+    datup = dat.max(axis=1)
+    if ((datlo < xlo) | (xup < datup)).any():
+        raise ValueError('X does not include whole range of the data!')
+
+    csiz = np.repeat(inc, d)
+    use_sparse = False
+    if use_sparse:
+        acfun = accumsum  # faster than accum
+    else:
+        acfun = accumsum2  # accum
+
+    binx = np.asarray(np.floor((dat - xlo[:, np.newaxis]) / dx), dtype=int)
+    w = dx.prod()
+    if d == 1:
+        x.shape = (-1,)
+        c = np.asarray((acfun(binx, (x[binx + 1] - dat) * y, shape=(inc, )) +
+                        acfun(binx + 1, (dat - x[binx]) * y, shape=(inc, ))) /
+                       w).ravel()
+    else:  # d>2
+
+        Nc = csiz.prod()
+        c = np.zeros((Nc,))
+
+        fact2 = np.asarray(np.reshape(inc * np.arange(d), (d, -1)), dtype=int)
+        fact1 = np.asarray(
+            np.reshape(csiz.cumprod() / inc, (d, -1)), dtype=int)
+        # fact1 = fact1(ones(n,1),:);
+        bt0 = [0, 0]
+        X1 = X.ravel()
+        for ir in range(2 ** (d - 1)):
+            bt0[0] = np.reshape(bitget(ir, np.arange(d)), (d, -1))
+            bt0[1] = 1 - bt0[0]
+            for ix in range(2):
+                one = np.mod(ix, 2)
+                two = np.mod(ix + 1, 2)
+                # Convert to linear index
+                # linear index to c
+                b1 = np.sum((binx + bt0[one]) * fact1, axis=0)
+                bt2 = bt0[two] + fact2
+                b2 = binx + bt2                     # linear index to X
+                c += acfun(b1, np.abs(np.prod(X1[b2] - dat, axis=0)) * y,
+                           shape=(Nc,))
+
+        c = np.reshape(c / w, csiz, order='F')
+
+        T = [i for i in range(d)]
+        T[1], T[0] = T[0], T[1]
+        # make sure c is stored in the same way as meshgrid
+        c = c.transpose(*T)
+    return c
+
+
+if __name__ == '__main__':
+    test_docstrings(__file__)
diff --git a/wafo/kdetools/kdetools.py b/wafo/kdetools/kdetools.py
new file mode 100644
index 0000000..4129341
--- /dev/null
+++ b/wafo/kdetools/kdetools.py
@@ -0,0 +1,2139 @@
+#!/usr/bin/env python
+# -------------------------------------------------------------------------
+# Name:        kdetools
+# Purpose:
+#
+# Author:      pab
+#
+# Created:     01.11.2008
+# Copyright:   (c) pab 2008
+# Licence:     LGPL
+# -------------------------------------------------------------------------
+
+from __future__ import absolute_import, division
+# from abc import ABCMeta, abstractmethod
+import copy
+import warnings
+import numpy as np
+import scipy.stats
+from scipy import interpolate, linalg, special
+from numpy import pi, sqrt, atleast_2d, exp, meshgrid
+from wafo.misc import nextpow2
+from wafo.containers import PlotData
+from wafo.dctpack import dctn, idctn  # , dstn, idstn
+from wafo.plotbackend import plotbackend as plt
+from wafo.testing import test_docstrings
+from wafo.kdetools.kernels import iqrange, qlevels, Kernel
+from wafo.kdetools.gridding import gridcount
+import time
+
+try:
+    from wafo import fig
+except ImportError:
+    warnings.warn('fig import only supported on Windows')
+
+__all__ = ['TKDE', 'KDE', 'kde_demo1', 'kde_demo2', 'test_docstrings',
+           'KRegression', 'KDEgauss']
+
+
+def _assert(cond, msg):
+    if not cond:
+        raise ValueError(msg)
+
+
+def _invnorm(q):
+    return special.ndtri(q)
+
+
+class _KDE(object):
+
+    """ Kernel-Density Estimator base class.
+
+    Parameters
+    ----------
+    data : (# of dims, # of data)-array
+        datapoints to estimate from
+    hs : array-like (optional)
+        smooting parameter vector/matrix.
+        (default compute from data using kernel.get_smoothing function)
+    kernel :  kernel function object.
+        kernel must have get_smoothing method
+    alpha : real scalar (optional)
+        sensitivity parameter               (default 0 regular KDE)
+        A good choice might be alpha = 0.5 ( or 1/D)
+        alpha = 0      Regular  KDE (hs is constant)
+        0 < alpha <= 1 Adaptive KDE (Make hs change)
+
+    Members
+    -------
+    d : int
+        number of dimensions
+    n : int
+        number of datapoints
+
+    Methods
+    -------
+    kde.eval_grid_fast(x0, x1,..., xd) : array
+        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
+    kde.eval_grid(x0, x1,..., xd) : array
+        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
+    kde.eval_points(points) : array
+        evaluate the estimated pdf on a provided set of points
+    kde(x0, x1,..., xd) : array
+        same as kde.eval_grid(x0, x1,..., xd)
+    """
+
+    def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None,
+                 xmax=None, inc=512):
+        self.dataset = atleast_2d(data)
+        self.kernel = kernel if kernel else Kernel('gauss')
+        self.xmin = xmin
+        self.xmax = xmax
+        self.hs = hs
+        self.inc = inc
+        self.alpha = alpha
+        self.initialize()
+
+    @property
+    def n(self):
+        return self.dataset.shape[1]
+
+    @property
+    def d(self):
+        return self.dataset.shape[0]
+
+    @property
+    def sigma(self):
+        """minimum(stdev, 0.75 * interquartile-range)"""
+        iqr = iqrange(self.dataset, axis=-1)
+        sigma = np.minimum(np.std(self.dataset, axis=-1, ddof=1), iqr / 1.34)
+        return sigma
+
+    @property
+    def xmin(self):
+        return self._xmin
+
+    @xmin.setter
+    def xmin(self, xmin):
+        if xmin is None:
+            self._xmin = self.dataset.min(axis=-1) - 2 * self.sigma
+        else:
+            self._xmin = xmin * np.ones(self.d)
+
+    @property
+    def xmax(self):
+        return self._xmax
+
+    @xmax.setter
+    def xmax(self, xmax):
+        if xmax is None:
+            self._xmax = self.dataset.max(axis=-1) + 2 * self.sigma
+        else:
+            self._xmax = xmax * np.ones(self.d)
+
+    def _replace_negatives_with_default_hs(self, h):
+        get_default_hs = self.kernel.get_smoothing
+        ind, = np.where(h <= 0)
+        for i in ind.tolist():
+            h[i] = get_default_hs(self.dataset[i])
+
+    def _check_hs(self, h):
+        """make sure it has the correct dimension and replace negative vals"""
+        h = np.atleast_1d(h)
+        if (len(h.shape) == 1) or (self.d == 1):
+            h = h * np.ones(self.d) if max(h.shape) == 1 else h.reshape(self.d)
+            self._replace_negatives_with_default_hs(h)
+        return h
+
+    def _invert_hs(self, h):
+        if (len(h.shape) == 1) or (self.d == 1):
+            determinant = h.prod()
+            inv_hs = np.diag(1.0 / h)
+        else:  # fully general smoothing matrix
+            determinant = linalg.det(h)
+            _assert(0 < determinant,
+                    'bandwidth matrix h must be positive definit!')
+            inv_hs = linalg.inv(h)
+        return inv_hs, determinant
+
+    @property
+    def hs(self):
+        return self._hs
+
+    @hs.setter
+    def hs(self, h):
+        if h is None:
+            h = self.kernel.get_smoothing(self.dataset)
+        h = self._check_hs(h)
+        inv_hs, deth = self._invert_hs(h)
+
+        self._norm_factor = deth * self.n
+        self._inv_hs = inv_hs
+        self._hs = h
+
+    @property
+    def inc(self):
+        return self._inc
+
+    @inc.setter
+    def inc(self, inc):
+        if inc is None:
+            _tau, tau = self.kernel.effective_support()
+            xyzrange = 8 * self.sigma
+            L1 = 10
+            inc = max(48, (L1 * xyzrange / (tau * self.hs)).max())
+            inc = 2 ** nextpow2(inc)
+        self._inc = inc
+
+    @property
+    def alpha(self):
+        return self._alpha
+
+    @alpha.setter
+    def alpha(self, alpha):
+        self._alpha = alpha
+        self._lambda = np.ones(self.n)
+        if alpha > 0:
+            f = self.eval_points(self.dataset)  # pilot estimate
+            g = np.exp(np.mean(np.log(f)))
+            self._lambda = (f / g) ** (-alpha)
+
+    def initialize(self):
+        if self.n > 1:
+            self._initialize()
+
+    def _initialize(self):
+        pass
+
+    def get_args(self, xmin=None, xmax=None):
+        if xmin is None:
+            xmin = self.xmin
+        else:
+            xmin = [min(i, j) for i, j in zip(xmin, self.xmin)]
+        if xmax is None:
+            xmax = self.xmax
+        else:
+            xmax = [max(i, j) for i, j in zip(xmax, self.xmax)]
+        args = []
+        inc = self.inc
+        for i in range(self.d):
+            args.append(np.linspace(xmin[i], xmax[i], inc))
+        return args
+
+    def eval_grid_fast(self, *args, **kwds):
+        """Evaluate the estimated pdf on a grid.
+
+        Parameters
+        ----------
+        arg_0,arg_1,... arg_d-1 : vectors
+            Alternatively, if no vectors is passed in then
+             arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
+        output : string optional
+            'value' if value output
+            'data' if object output
+
+        Returns
+        -------
+        values : array-like
+            The values evaluated at meshgrid(*args).
+
+        """
+        if len(args) == 0:
+            args = self.get_args()
+        self.args = args
+        return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)
+
+    def _eval_grid_fast(self, *args, **kwds):
+        pass
+
+    def eval_grid(self, *args, **kwds):
+        """Evaluate the estimated pdf on a grid.
+
+        Parameters
+        ----------
+        arg_0,arg_1,... arg_d-1 : vectors
+            Alternatively, if no vectors is passed in then
+             arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
+        output : string optional
+            'value' if value output
+            'data' if object output
+
+        Returns
+        -------
+        values : array-like
+            The values evaluated at meshgrid(*args).
+
+        """
+        if len(args) == 0:
+            args = self.get_args()
+        self.args = args
+        return self._eval_grid_fun(self._eval_grid, *args, **kwds)
+
+    def _eval_grid(self, *args, **kwds):
+        pass
+
+    def _add_contour_levels(self, wdata):
+        p_levels = np.r_[10:90:20, 95, 99, 99.9]
+        try:
+            c_levels = qlevels(wdata.data, p=p_levels)
+            wdata.clevels = c_levels
+            wdata.plevels = p_levels
+        except Exception as e:
+            msg = "Could not calculate contour levels!. ({})".format(str(e))
+            warnings.warn(msg)
+
+    def _make_object(self, f, **kwds):
+        titlestr = 'Kernel density estimate ({})'.format(self.kernel.name)
+        kwds2 = dict(title=titlestr)
+        kwds2['plot_kwds'] = dict(plotflag=1)
+        kwds2.update(**kwds)
+        args = self.args
+        if self.d == 1:
+            args = args[0]
+        wdata = PlotData(f, args, **kwds2)
+        if self.d > 1:
+            self._add_contour_levels(wdata)
+        return wdata
+
+    def _eval_grid_fun(self, eval_grd, *args, **kwds):
+        output = kwds.pop('output', 'value')
+        f = eval_grd(*args, **kwds)
+        if output == 'value':
+            return f
+        return self._make_object(f, **kwds)
+
+    def _check_shape(self, points):
+        points = atleast_2d(points)
+        d, m = points.shape
+        if d != self.d:
+            _assert(d == 1 and m == self.d, "points have dimension {}, "
+                    "dataset has dimension {}".format(d, self.d))
+            # points was passed in as a row vector
+            points = np.reshape(points, (self.d, 1))
+        return points
+
+    def eval_points(self, points, **kwds):
+        """Evaluate the estimated pdf on a set of points.
+
+        Parameters
+        ----------
+        points : (# of dimensions, # of points)-array
+            Alternatively, a (# of dimensions,) vector can be passed in and
+            treated as a single point.
+
+        Returns
+        -------
+        values : (# of points,)-array
+            The values at each point.
+
+        Raises
+        ------
+        ValueError if the dimensionality of the input points is different than
+        the dimensionality of the KDE.
+
+        """
+
+        points = self._check_shape(points)
+        return self._eval_points(points, **kwds)
+
+    def _eval_points(self, points, **kwds):
+        pass
+
+    __call__ = eval_grid
+
+
+class TKDE(_KDE):
+
+    """ Transformation Kernel-Density Estimator.
+
+    Parameters
+    ----------
+    dataset : (# of dims, # of data)-array
+        datapoints to estimate from
+    hs : array-like (optional)
+        smooting parameter vector/matrix.
+        (default compute from data using kernel.get_smoothing function)
+    kernel :  kernel function object.
+        kernel must have get_smoothing method
+    alpha : real scalar (optional)
+        sensitivity parameter               (default 0 regular KDE)
+        A good choice might be alpha = 0.5 ( or 1/D)
+        alpha = 0      Regular  KDE (hs is constant)
+        0 < alpha <= 1 Adaptive KDE (Make hs change)
+    xmin, xmax  : vectors
+        specifying the default argument range for the kde.eval_grid methods.
+        For the kde.eval_grid_fast methods the values must cover the range of
+        the data. (default min(data)-range(data)/4, max(data)-range(data)/4)
+        If a single value of xmin or xmax is given then the boundary is the is
+        the same for all dimensions.
+    inc :  scalar integer
+        defining the default dimension of the output from kde.eval_grid methods
+        (default 512)
+        (For kde.eval_grid_fast: A value below 50 is very fast to compute but
+        may give some inaccuracies. Values between 100 and 500 give very
+        accurate results)
+    L2 : array-like
+        vector of transformation parameters (default 1 no transformation)
+        t(xi;L2) = xi^L2*sign(L2)   for L2(i) ~= 0
+        t(xi;L2) = log(xi)          for L2(i) == 0
+        If single value of L2 is given then the transformation is the same in
+        all directions.
+
+    Members
+    -------
+    d : int
+        number of dimensions
+    n : int
+        number of datapoints
+
+    Methods
+    -------
+    kde.eval_grid_fast(x0, x1,..., xd) : array
+        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
+    kde.eval_grid(x0, x1,..., xd) : array
+        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
+    kde.eval_points(points) : array
+        evaluate the estimated pdf on a provided set of points
+    kde(x0, x1,..., xd) : array
+        same as kde.eval_grid(x0, x1,..., xd)
+
+    Example
+    -------
+    N = 20
+    data = np.random.rayleigh(1, size=(N,))
+    >>> data = np.array([
+    ...        0.75355792,  0.72779194,  0.94149169,  0.07841119,2.32291887,
+    ...        1.10419995,  0.77055114,  0.60288273,  1.36883635,  1.74754326,
+    ...        1.09547561,  1.01671133,  0.73211143,  0.61891719,  0.75903487,
+    ...        1.8919469 ,  0.72433808,  1.92973094,  0.44749838,  1.36508452])
+
+    >>> import wafo.kdetools as wk
+    >>> x = np.linspace(0.01, max(data.ravel()) + 1, 10)
+    >>> kde = wk.TKDE(data, hs=0.5, L2=0.5)
+    >>> f = kde(x)
+    >>> f
+    array([ 1.03982714,  0.45839018,  0.39514782,  0.32860602,  0.26433318,
+            0.20717946,  0.15907684,  0.1201074 ,  0.08941027,  0.06574882])
+
+    >>> kde.eval_grid(x)
+    array([ 1.03982714,  0.45839018,  0.39514782,  0.32860602,  0.26433318,
+            0.20717946,  0.15907684,  0.1201074 ,  0.08941027,  0.06574882])
+
+    >>> kde.eval_grid_fast(x)
+    array([ 1.04018924,  0.45838973,  0.39514689,  0.32860532,  0.26433301,
+            0.20717976,  0.15907697,  0.1201077 ,  0.08941129,  0.06574899])
+
+    import pylab as plb
+    h1 = plb.plot(x, f) #  1D probability density plot
+    t = np.trapz(f, x)
+    """
+
+    def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None,
+                 xmax=None, inc=512, L2=None):
+        self.L2 = L2
+        super(TKDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc)
+
+    def _initialize(self):
+        self._check_xmin()
+        tdataset = self._dat2gaus(self.dataset)
+        xmin = self.xmin
+        if xmin is not None:
+            xmin = self._dat2gaus(np.reshape(xmin, (-1, 1)))
+        xmax = self.xmax
+        if xmax is not None:
+            xmax = self._dat2gaus(np.reshape(xmax, (-1, 1)))
+        self.tkde = KDE(tdataset, self.hs, self.kernel, self.alpha, xmin, xmax,
+                        self.inc)
+        if self.inc is None:
+            self.inc = self.tkde.inc
+
+    def _check_xmin(self):
+        if self.L2 is not None:
+            amin = self.dataset.min(axis=-1)
+            # default no transformation
+            L2 = np.atleast_1d(self.L2) * np.ones(self.d)
+            self.xmin = np.where(L2 != 1,
+                                 np.maximum(self.xmin, amin / 100.0),
+                                 self.xmin).reshape((-1, 1))
+
+    def _dat2gaus(self, points):
+        if self.L2 is None:
+            return points  # default no transformation
+
+        # default no transformation
+        L2 = np.atleast_1d(self.L2) * np.ones(self.d)
+
+        tpoints = copy.copy(points)
+        for i, v2 in enumerate(L2.tolist()):
+            tpoints[i] = np.log(points[i]) if v2 == 0 else points[i] ** v2
+        return tpoints
+
+    def _gaus2dat(self, tpoints):
+        if self.L2 is None:
+            return tpoints  # default no transformation
+
+        # default no transformation
+        L2 = np.atleast_1d(self.L2) * np.ones(self.d)
+
+        points = copy.copy(tpoints)
+        for i, v2 in enumerate(L2.tolist()):
+            points[i] = np.exp(
+                tpoints[i]) if v2 == 0 else tpoints[i] ** (1.0 / v2)
+        return points
+
+    def _scale_pdf(self, pdf, points):
+        if self.L2 is None:
+            return pdf
+        # default no transformation
+        L2 = np.atleast_1d(self.L2) * np.ones(self.d)
+        for i, v2 in enumerate(L2.tolist()):
+            factor = v2 * np.sign(v2) if v2 else 1
+            pdf *= np.where(v2 == 1, 1, points[i] ** (v2 - 1) * factor)
+        if (np.abs(np.diff(pdf)).max() > 10).any():
+            msg = ''' Numerical problems may have occured due to the power
+                    transformation. Check the KDE for spurious spikes'''
+            warnings.warn(msg)
+        return pdf
+
+    def eval_grid_fast2(self, *args, **kwds):
+        """Evaluate the estimated pdf on a grid.
+
+        Parameters
+        ----------
+        arg_0,arg_1,... arg_d-1 : vectors
+           Alternatively, if no vectors is passed in then
+            arg_i = gauss2dat(linspace(dat2gauss(self.xmin[i]),
+                                       dat2gauss(self.xmax[i]), self.inc))
+        output : string optional
+            'value' if value output
+            'data' if object output
+
+        Returns
+        -------
+        values : array-like
+           The values evaluated at meshgrid(*args).
+
+        """
+        return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)
+
+    def _interpolate(self, points, f, *args, **kwds):
+        ipoints = meshgrid(*args) if self.d > 1 else args
+        for i in range(self.d):
+            points[i].shape = -1,
+        points = np.asarray(points).T
+
+        fi = interpolate.griddata(points, np.ravel(f), tuple(ipoints),
+                                  method='linear', fill_value=0.0)
+        self.args = args
+        r = kwds.get('r', 0)
+        if r == 0:
+            return fi * (fi > 0)
+        return fi
+
+    def _eval_grid_fast(self, *args, **kwds):
+        if self.L2 is None:
+            f = self.tkde.eval_grid_fast(*args, **kwds)
+            self.args = self.tkde.args
+            return f
+        targs = []
+        if len(args):
+            targs0 = self._dat2gaus(list(args))
+            xmin = [min(t) for t in targs0]
+            xmax = [max(t) for t in targs0]
+            targs = self.tkde.get_args(xmin, xmax)
+        tf = self.tkde.eval_grid_fast(*targs)
+        self.args = self._gaus2dat(list(self.tkde.args))
+        points = meshgrid(*self.args) if self.d > 1 else self.args
+        f = self._scale_pdf(tf, points)
+        if len(args):
+            return self._interpolate(points, f, *args, **kwds)
+        return f
+
+    def _eval_grid(self, *args, **kwds):
+        if self.L2 is None:
+            return self.tkde.eval_grid(*args, **kwds)
+        targs = self._dat2gaus(list(args))
+        tf = self.tkde.eval_grid(*targs, **kwds)
+        points = meshgrid(*args) if self.d > 1 else list(args)
+        f = self._scale_pdf(tf, points)
+        return f
+
+    def _eval_points(self, points):
+        """Evaluate the estimated pdf on a set of points.
+
+        Parameters
+        ----------
+        points : (# of dimensions, # of points)-array
+            Alternatively, a (# of dimensions,) vector can be passed in and
+            treated as a single point.
+
+        Returns
+        -------
+        values : (# of points,)-array
+            The values at each point.
+
+        Raises
+        ------
+        ValueError if the dimensionality of the input points is different than
+        the dimensionality of the KDE.
+
+        """
+        if self.L2 is None:
+            return self.tkde.eval_points(points)
+
+        tpoints = self._dat2gaus(points)
+        tf = self.tkde.eval_points(tpoints)
+        f = self._scale_pdf(tf, points)
+        return f
+
+
+class KDE(_KDE):
+
+    """ Kernel-Density Estimator.
+
+    Parameters
+    ----------
+    data : (# of dims, # of data)-array
+        datapoints to estimate from
+    hs : array-like (optional)
+        smooting parameter vector/matrix.
+        (default compute from data using kernel.get_smoothing function)
+    kernel :  kernel function object.
+        kernel must have get_smoothing method
+    alpha : real scalar (optional)
+        sensitivity parameter               (default 0 regular KDE)
+        A good choice might be alpha = 0.5 ( or 1/D)
+        alpha = 0      Regular  KDE (hs is constant)
+        0 < alpha <= 1 Adaptive KDE (Make hs change)
+    xmin, xmax  : vectors
+        specifying the default argument range for the kde.eval_grid methods.
+        For the kde.eval_grid_fast methods the values must cover the range of
+        the data.
+        (default min(data)-range(data)/4, max(data)-range(data)/4)
+        If a single value of xmin or xmax is given then the boundary is the is
+        the same for all dimensions.
+    inc :  scalar integer (default 512)
+        defining the default dimension of the output from kde.eval_grid methods
+        (For kde.eval_grid_fast: A value below 50 is very fast to compute but
+        may give some inaccuracies. Values between 100 and 500 give very
+        accurate results)
+
+    Members
+    -------
+    d : int
+        number of dimensions
+    n : int
+        number of datapoints
+
+    Methods
+    -------
+    kde.eval_grid_fast(x0, x1,..., xd) : array
+        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
+    kde.eval_grid(x0, x1,..., xd) : array
+        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
+    kde.eval_points(points) : array
+        evaluate the estimated pdf on a provided set of points
+    kde(x0, x1,..., xd) : array
+        same as kde.eval_grid(x0, x1,..., xd)
+
+    Example
+    -------
+    N = 20
+    data = np.random.rayleigh(1, size=(N,))
+    >>> data = np.array([
+    ...        0.75355792,  0.72779194,  0.94149169,  0.07841119,  2.32291887,
+    ...        1.10419995,  0.77055114,  0.60288273,  1.36883635,  1.74754326,
+    ...        1.09547561,  1.01671133,  0.73211143,  0.61891719,  0.75903487,
+    ...        1.8919469 ,  0.72433808,  1.92973094,  0.44749838,  1.36508452])
+
+    >>> x = np.linspace(0, max(data.ravel()) + 1, 10)
+    >>> import wafo.kdetools as wk
+    >>> kde = wk.KDE(data, hs=0.5, alpha=0.5)
+    >>> f = kde(x)
+    >>> f
+    array([ 0.17252055,  0.41014271,  0.61349072,  0.57023834,  0.37198073,
+            0.21409279,  0.12738463,  0.07460326,  0.03956191,  0.01887164])
+
+    >>> kde.eval_grid(x)
+    array([ 0.17252055,  0.41014271,  0.61349072,  0.57023834,  0.37198073,
+            0.21409279,  0.12738463,  0.07460326,  0.03956191,  0.01887164])
+    >>> kde.eval_grid_fast(x)
+    array([ 0.21720891,  0.43308789,  0.59017626,  0.55847998,  0.39681482,
+            0.23987473,  0.13113066,  0.06062029,  0.02160104,  0.00559028])
+
+    >>> kde0 = wk.KDE(data, hs=0.5, alpha=0.0)
+    >>> kde0.eval_points(x)
+    array([ 0.2039735 ,  0.40252503,  0.54595078,  0.52219649,  0.3906213 ,
+            0.26381501,  0.16407362,  0.08270612,  0.02991145,  0.00720821])
+
+    >>> kde0.eval_grid(x)
+    array([ 0.2039735 ,  0.40252503,  0.54595078,  0.52219649,  0.3906213 ,
+            0.26381501,  0.16407362,  0.08270612,  0.02991145,  0.00720821])
+    >>> f = kde0.eval_grid(x, output='plotobj')
+    >>> f.data
+    array([ 0.2039735 ,  0.40252503,  0.54595078,  0.52219649,  0.3906213 ,
+            0.26381501,  0.16407362,  0.08270612,  0.02991145,  0.00720821])
+
+    >>> f = kde0.eval_grid_fast()
+    >>> np.allclose(np.interp(x, kde0.args[0], f),
+    ...    [ 0.20398034,  0.40252166,  0.54593292,  0.52218993,  0.39062245,
+    ...     0.26381651,  0.16407487,  0.08270847,  0.02991439,  0.00882095])
+    True
+    >>> f1 = kde0.eval_grid_fast(output='plot')
+    >>> np.allclose(np.interp(x, f1.args, f1.data),
+    ...   [ 0.20398034,  0.40252166,  0.54593292,  0.52218993,  0.39062245,
+    ...     0.26381651,  0.16407487,  0.08270847,  0.02991439,  0.00882095])
+    True
+
+    h = f1.plot()
+    import pylab as plb
+    h1 = plb.plot(x, f) #  1D probability density plot
+    t = np.trapz(f, x)
+    """
+
+    def _eval_grid_fast(self, *args, **kwds):
+        X = np.vstack(args)
+        d, inc = X.shape
+        dx = X[:, 1] - X[:, 0]
+
+        Xn = []
+        nfft0 = 2 * inc
+        nfft = (nfft0,) * d
+        x0 = np.linspace(-inc, inc, nfft0 + 1)
+        for i in range(d):
+            Xn.append(x0[:-1] * dx[i])
+
+        Xnc = meshgrid(*Xn)  # if d > 1 else Xn
+
+        shape0 = Xnc[0].shape
+        for i in range(d):
+            Xnc[i].shape = (-1,)
+
+        Xn = np.dot(self._inv_hs, np.vstack(Xnc))
+
+        # Obtain the kernel weights.
+        kw = self.kernel(Xn)
+        norm_fact0 = (kw.sum() * dx.prod() * self.n)
+        norm_fact = (self._norm_factor * self.kernel.norm_factor(d, self.n))
+        if np.abs(norm_fact0 - norm_fact) > 0.05 * norm_fact:
+            warnings.warn(
+                'Numerical inaccuracy due to too low discretization. ' +
+                'Increase the discretization of the evaluation grid ' +
+                '(inc={})!'.format(inc))
+            norm_fact = norm_fact0
+
+        kw = kw / norm_fact
+        r = kwds.get('r', 0)
+        if r != 0:
+            kw *= np.vstack(Xnc) ** r if d > 1 else Xnc[0] ** r
+        kw.shape = shape0
+        kw = np.fft.ifftshift(kw)
+        fftn = np.fft.fftn
+        ifftn = np.fft.ifftn
+
+        y = kwds.get('y', 1.0)
+        if self.alpha > 0:
+            y = y / self._lambda**d
+
+        # Find the binned kernel weights, c.
+        c = gridcount(self.dataset, X, y=y)
+        # Perform the convolution.
+        z = np.real(ifftn(fftn(c, s=nfft) * fftn(kw)))
+#        opt = dict(type=1, norm=None)
+#        z = idctn(dctn(c, shape=(inc,)*d, **opt) * dctn(kw[:inc], **opt),
+#                  **opt)/(inc-1)/2
+#         # if r is odd
+#         op2 = dict(type=3, norm=None)
+#         z3 = idstn(dctn(c, shape=(inc,)*d, **op2) * dstn(kw[1:inc+1], **op2),
+#                    **op2)/(inc-1)/2
+
+        ix = (slice(0, inc),) * d
+        if r == 0:
+            return z[ix] * (z[ix] > 0.0)
+        return z[ix]
+
+    def _eval_grid(self, *args, **kwds):
+
+        grd = meshgrid(*args) if len(args) > 1 else list(args)
+        shape0 = grd[0].shape
+        d = len(grd)
+        for i in range(d):
+            grd[i] = grd[i].ravel()
+        f = self.eval_points(np.vstack(grd), **kwds)
+        return f.reshape(shape0)
+
+    def _moment_fun(self, r):
+        if r == 0:
+            return lambda x: 1
+        return lambda x: (x ** r).sum(axis=0)
+
+    @property
+    def norm_factor(self):
+        return self._norm_factor * self.kernel.norm_factor(self.d, self.n)
+
+    def _loop_over_data(self, data, points, y, r):
+        fun = self._moment_fun(r)
+        d, m = points.shape
+        inv_hs, lambda_ = self._inv_hs, self._lambda
+        kernel = self.kernel
+
+        y_d_lambda = y / lambda_ ** d
+        result = np.zeros((m,))
+        for i in range(self.n):
+            dxi = points - data[:, i, np.newaxis]
+            tdiff = np.dot(inv_hs / lambda_[i], dxi)
+            result += fun(dxi) * kernel(tdiff) * y_d_lambda[i]
+        return result / self.norm_factor
+
+    def _loop_over_points(self, data, points, y, r):
+        fun = self._moment_fun(r)
+        d, m = points.shape
+        inv_hs, lambda_ = self._inv_hs, self._lambda
+        kernel = self.kernel
+
+        y_d_lambda = y / lambda_ ** d
+        result = np.zeros((m,))
+        for i in range(m):
+            dxi = points[:, i, np.newaxis] - data
+            tdiff = np.dot(inv_hs, dxi / lambda_[np.newaxis, :])
+            result[i] = np.sum(fun(dxi) * kernel(tdiff) * y_d_lambda, axis=-1)
+        return result / self.norm_factor
+
+    def _eval_points(self, points, **kwds):
+        """Evaluate the estimated pdf on a set of points.
+
+        Parameters
+        ----------
+        points : (# of dimensions, # of points)-array
+            Alternatively, a (# of dimensions,) vector can be passed in and
+            treated as a single point.
+
+        Returns
+        -------
+        values : (# of points,)-array
+            The values at each point.
+
+        Raises
+        ------
+        ValueError if the dimensionality of the input points is different than
+        the dimensionality of the KDE.
+
+        """
+        d, m = points.shape
+        _assert(d == self.d, "d={} expected, got {}".format(self.d, d))
+
+        y = kwds.get('y', 1)
+        r = kwds.get('r', 0)
+
+        more_points_than_data = m >= self.n
+        if more_points_than_data:
+            return self._loop_over_data(self.dataset, points, y, r)
+        return self._loop_over_points(self.dataset, points, y, r)
+
+
+class KDEgauss(KDE):
+
+    """ Kernel-Density Estimator base class.
+
+    data : (# of dims, # of data)-array
+        datapoints to estimate from
+    hs : array-like (optional)
+        smooting parameter vector/matrix.
+        (default compute from data using kernel.get_smoothing function)
+    kernel :  kernel function object.
+        kernel must have get_smoothing method
+    alpha : real scalar (optional)
+        sensitivity parameter               (default 0 regular KDE)
+        A good choice might be alpha = 0.5 ( or 1/D)
+        alpha = 0      Regular  KDE (hs is constant)
+        0 < alpha <= 1 Adaptive KDE (Make hs change)
+    xmin, xmax  : vectors
+        specifying the default argument range for the kde.eval_grid methods.
+        For the kde.eval_grid_fast methods the values must cover the range of
+        the data.
+        (default min(data)-range(data)/4, max(data)-range(data)/4)
+        If a single value of xmin or xmax is given, then the boundary is the
+        the same for all dimensions.
+    inc :  scalar integer (default 512)
+        defining the default dimension of the output from kde.eval_grid methods
+        (For kde.eval_grid_fast: A value below 50 is very fast to compute but
+        may give some inaccuracies. Values between 100 and 500 give very
+        accurate results)
+
+    Members
+    -------
+    d : int
+        number of dimensions
+    n : int
+        number of datapoints
+
+    Methods
+    -------
+    kde.eval_grid_fast(x0, x1,..., xd) : array
+        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
+    kde(x0, x1,..., xd) : array
+        same as kde.eval_grid_fast(x0, x1,..., xd)
+    """
+    def _eval_grid_fast(self, *args, **kwds):
+        X = np.vstack(args)
+        d, inc = X.shape
+        # dx = X[:, 1] - X[:, 0]
+        R = X.max(axis=-1) - X.min(axis=-1)
+
+        t_star = (self.hs / R) ** 2
+        I = (np.asfarray(np.arange(0, inc)) * pi) ** 2
+        In = []
+        for i in range(d):
+            In.append(I * t_star[i] * 0.5)
+
+        r = kwds.get('r', 0)
+        fun = self._moment_fun(r)
+
+        Inc = meshgrid(*In) if d > 1 else In
+        kw = np.zeros((inc,) * d)
+        for i in range(d):
+            kw += exp(-Inc[i]) * fun(Inc[i])
+
+        y = kwds.get('y', 1.0)
+        d, n = self.dataset.shape
+        # Find the binned kernel weights, c.
+        c = gridcount(self.dataset, X, y=y)
+        # Perform the convolution.
+        at = dctn(c) * kw / n
+        z = idctn(at) * (at.size-1) / np.prod(R)
+        return z * (z > 0.0)
+
+    __call__ = _KDE.eval_grid_fast
+
+
+class KRegression(_KDE):
+
+    """ Kernel-Regression
+
+    Parameters
+    ----------
+    data : (# of dims, # of data)-array
+        datapoints to estimate from
+    y : # of data - array
+        response variable
+    p : scalar integer (0 or 1)
+        Nadaraya-Watson estimator if p=0,
+        local linear estimator if p=1.
+    hs : array-like (optional)
+        smooting parameter vector/matrix.
+        (default compute from data using kernel.get_smoothing function)
+    kernel :  kernel function object.
+        kernel must have get_smoothing method
+    alpha : real scalar (optional)
+        sensitivity parameter               (default 0 regular KDE)
+        A good choice might be alpha = 0.5 ( or 1/D)
+        alpha = 0      Regular  KDE (hs is constant)
+        0 < alpha <= 1 Adaptive KDE (Make hs change)
+    xmin, xmax  : vectors
+        specifying the default argument range for the kde.eval_grid methods.
+        For the kde.eval_grid_fast methods the values must cover the range of
+        the data. (default min(data)-range(data)/4, max(data)-range(data)/4)
+        If a single value of xmin or xmax is given then the boundary is the is
+        the same for all dimensions.
+    inc :  scalar integer   (default 128)
+        defining the default dimension of the output from kde.eval_grid methods
+        (For kde.eval_grid_fast: A value below 50 is very fast to compute but
+        may give some inaccuracies. Values between 100 and 500 give very
+        accurate results)
+
+    Members
+    -------
+    d : int
+        number of dimensions
+    n : int
+        number of datapoints
+
+    Methods
+    -------
+    kde.eval_grid_fast(x0, x1,..., xd) : array
+        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
+    kde.eval_grid(x0, x1,..., xd) : array
+        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
+    kde.eval_points(points) : array
+        evaluate the estimated pdf on a provided set of points
+    kde(x0, x1,..., xd) : array
+        same as kde.eval_grid(x0, x1,..., xd)
+
+
+    Example
+    -------
+    >>> import wafo.kdetools as wk
+    >>> N = 100
+    >>> x = np.linspace(0, 1, N)
+    >>> ei = np.random.normal(loc=0, scale=0.075, size=(N,))
+    >>> ei = np.sqrt(0.075) * np.sin(100*x)
+
+    >>> y = 2*np.exp(-x**2/(2*0.3**2))+3*np.exp(-(x-1)**2/(2*0.7**2)) + ei
+    >>> kreg = wk.KRegression(x, y)
+    >>> f = kreg(output='plotobj', title='Kernel regression', plotflag=1)
+    >>> np.allclose(f.data[:5],
+    ...     [ 3.18670593,  3.18678088,  3.18682196,  3.18682932,  3.18680337])
+    True
+
+    h = f.plot(label='p=0')
+    """
+
+    def __init__(self, data, y, p=0, hs=None, kernel=None, alpha=0.0,
+                 xmin=None, xmax=None, inc=128, L2=None):
+
+        self.tkde = TKDE(data, hs=hs, kernel=kernel,
+                         alpha=alpha, xmin=xmin, xmax=xmax, inc=inc, L2=L2)
+        self.y = y
+        self.p = p
+
+    def eval_grid_fast(self, *args, **kwds):
+        self._grdfun = self.tkde.eval_grid_fast
+        return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds)
+
+    def eval_grid(self, *args, **kwds):
+        self._grdfun = self.tkde.eval_grid
+        return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds)
+
+    def _eval_gridfun(self, *args, **kwds):
+        grdfun = self._grdfun
+        s0 = grdfun(*args, r=0)
+        t0 = grdfun(*args, r=0, y=self.y)
+        if self.p == 0:
+            return (t0 / (s0 + _TINY)).clip(min=-_REALMAX, max=_REALMAX)
+        elif self.p == 1:
+            s1 = grdfun(*args, r=1)
+            s2 = grdfun(*args, r=2)
+            t1 = grdfun(*args, r=1, y=self.y)
+            return ((s2 * t0 - s1 * t1) /
+                    (s2 * s0 - s1 ** 2)).clip(min=-_REALMAX, max=_REALMAX)
+    __call__ = eval_grid_fast
+
+
+class BKRegression(object):
+
+    '''
+    Kernel-Regression on binomial data
+
+    method : {'beta', 'wilson'}
+        method is one of the following
+        'beta', return Bayesian Credible interval using beta-distribution.
+        'wilson', return Wilson score interval
+    a, b : scalars
+        parameters of the beta distribution defining the apriori distribution
+        of p, i.e., the Bayes estimator for p: p = (y+a)/(n+a+b).
+        Setting a=b=0.5 gives Jeffreys interval.
+    '''
+
+    def __init__(self, *args, **kwds):
+        self.method = kwds.pop('method', 'beta')
+        self.a = max(kwds.pop('a', 0.5), _TINY)
+        self.b = max(kwds.pop('b', 0.5), _TINY)
+        self.kreg = KRegression(*args, **kwds)
+        # defines bin width (i.e. smoothing) in empirical estimate
+        self.hs_e = None
+#        self.x = self.kreg.tkde.dataset
+#        self.y = self.kreg.y
+
+    def _set_smoothing(self, hs):
+        self.kreg.tkde.hs = hs
+        self.kreg.tkde.initialize()
+
+    x = property(fget=lambda cls: cls.kreg.tkde.dataset.squeeze())
+    y = property(fget=lambda cls: cls.kreg.y)
+    kernel = property(fget=lambda cls: cls.kreg.tkde.kernel)
+    hs = property(fset=_set_smoothing, fget=lambda cls: cls.kreg.tkde.hs)
+
+    def _get_max_smoothing(self, fun=None):
+        """Return maximum value for smoothing parameter."""
+        x = self.x
+        y = self.y
+        if fun is None:
+            get_smoothing = self.kernel.get_smoothing
+        else:
+            get_smoothing = getattr(self.kernel, fun)
+
+        hs1 = get_smoothing(x)
+        # hx = np.median(np.abs(x-np.median(x)))/0.6745*(4.0/(3*n))**0.2
+        if (y == 1).any():
+            hs2 = get_smoothing(x[y == 1])
+            # hy = np.median(np.abs(y-np.mean(y)))/0.6745*(4.0/(3*n))**0.2
+        else:
+            hs2 = 4 * hs1
+            # hy = 4*hx
+
+        hopt = sqrt(hs1 * hs2)
+        return hopt, hs1, hs2
+
+    def get_grid(self, hs_e=None):
+        if hs_e is None:
+            if self.hs_e is None:
+                hs1 = self._get_max_smoothing('hste')[0]
+                hs2 = self._get_max_smoothing('hos')[0]
+                self.hs_e = sqrt(hs1 * hs2)
+            hs_e = self.hs_e
+        x = self.x
+        xmin, xmax = x.min(), x.max()
+        ni = max(2 * int((xmax - xmin) / hs_e) + 3, 5)
+        sml = hs_e  # *0.1
+        xi = np.linspace(xmin - sml, xmax + sml, ni)
+        return xi
+
+    def prb_ci(self, n, p, alpha=0.05, **kwds):
+        """Return Confidence Interval for the binomial probability p.
+
+        Parameters
+        ----------
+        n : array-like
+            number of Bernoulli trials
+        p : array-like
+            estimated probability of success in each trial
+        alpha : scalar
+            confidence level
+        method : {'beta', 'wilson'}
+            method is one of the following
+            'beta', return Bayesian Credible interval using beta-distribution.
+            'wilson', return Wilson score interval
+        a, b : scalars
+            parameters of the beta distribution defining the apriori
+            distribution of p, i.e.,
+            the Bayes estimator for p: p = (y+a)/(n+a+b).
+            Setting a=b=0.5 gives Jeffreys interval.
+
+        """
+        if self.method.startswith('w'):
+            # Wilson score
+            z0 = -_invnorm(alpha / 2)
+            den = 1 + (z0 ** 2. / n)
+            xc = (p + (z0 ** 2) / (2 * n)) / den
+            halfwidth = (z0 * sqrt((p * (1 - p) / n) +
+                                   (z0 ** 2 / (4 * (n ** 2))))) / den
+            plo = (xc - halfwidth).clip(min=0)  # wilson score
+            pup = (xc + halfwidth).clip(max=1.0)  # wilson score
+        else:
+            # Jeffreys intervall a=b=0.5
+            # st.beta.isf(alpha/2, y+a, n-y+b) y = n*p, n-y = n*(1-p)
+            a = self.a
+            b = self.b
+            st = scipy.stats
+            pup = np.where(p == 1, 1,
+                           st.beta.isf(alpha / 2, n * p + a, n * (1 - p) + b))
+            plo = np.where(p == 0, 0,
+                           st.beta.isf(1 - alpha / 2,
+                                       n * p + a, n * (1 - p) + b))
+        return plo, pup
+
+    def prb_empirical(self, xi=None, hs_e=None, alpha=0.05, color='r', **kwds):
+        """Returns empirical binomial probabiltity.
+
+        Parameters
+        ----------
+        x : ndarray
+            position vector
+        y : ndarray
+            binomial response variable (zeros and ones)
+        alpha : scalar
+            confidence level
+        color:
+            used in plot
+
+        Returns
+        -------
+        P(x) : PlotData object
+            empirical probability
+
+        """
+        if xi is None:
+            xi = self.get_grid(hs_e)
+
+        x = self.x
+        y = self.y
+
+        c = gridcount(x, xi)  # + self.a + self.b # count data
+        if (y == 1).any():
+            c0 = gridcount(x[y == 1], xi)  # + self.a # count success
+        else:
+            c0 = np.zeros(np.shape(xi))
+        prb = np.where(c == 0, 0, c0 / (c + _TINY))  # assume prb==0 for c==0
+        CI = np.vstack(self.prb_ci(c, prb, alpha, **kwds))
+
+        prb_e = PlotData(prb, xi, plotmethod='plot', plot_args=['.'],
+                         plot_kwds=dict(markersize=6, color=color, picker=5))
+        prb_e.dataCI = CI.T
+        prb_e.count = c
+        return prb_e
+
+    def prb_smoothed(self, prb_e, hs, alpha=0.05, color='r', label=''):
+        """Return smoothed binomial probability.
+
+        Parameters
+        ----------
+        prb_e : PlotData object with empirical binomial probabilites
+        hs : smoothing parameter
+        alpha : confidence level
+        color : color of plot object
+        label : label for plot object
+
+        """
+
+        x_e = prb_e.args
+        n_e = len(x_e)
+        dx_e = x_e[1] - x_e[0]
+        n = self.x.size
+
+        x_s = np.linspace(x_e[0], x_e[-1], 10 * n_e + 1)
+        self.hs = hs
+
+        prb_s = self.kreg(x_s, output='plotobj', title='', plot_kwds=dict(
+            color=color, linewidth=2))  # dict(plotflag=7))
+        m_nan = np.isnan(prb_s.data)
+        if m_nan.any():  # assume 0/0 division
+            prb_s.data[m_nan] = 0.0
+
+        # prb_s.data[np.isnan(prb_s.data)] = 0
+        # expected number of data in each bin
+        c_s = self.kreg.tkde.eval_grid_fast(x_s) * dx_e * n
+        plo, pup = self.prb_ci(c_s, prb_s.data, alpha)
+
+        prb_s.dataCI = np.vstack((plo, pup)).T
+        prb_s.prediction_error_avg = np.trapz(
+            pup - plo, x_s) / (x_s[-1] - x_s[0])
+
+        if label:
+            prb_s.plot_kwds['label'] = label
+        prb_s.children = [PlotData([plo, pup], x_s,
+                                   plotmethod='fill_between',
+                                   plot_kwds=dict(alpha=0.2, color=color)),
+                          prb_e]
+
+        # empirical oversmooths the data
+#        p_s = prb_s.eval_points(self.x)
+#        dp_s = np.diff(prb_s.data)
+# k = (dp_s[:-1]*dp_s[1:]<0).sum() # numpeaks
+#        p_e = self.y
+#        n_s = interpolate.interp1d(x_s, c_s)(self.x)
+#        plo, pup = self.prb_ci(n_s, p_s, alpha)
+#        sigmai = (pup-plo)
+#        aicc = (((p_e-p_s)/sigmai)**2).sum()+ 2*k*(k+1)/np.maximum(n-k+1,1)
+
+        p_e = prb_e.eval_points(x_s)
+        p_s = prb_s.data
+        dp_s = np.sign(np.diff(p_s))
+        k = (dp_s[:-1] != dp_s[1:]).sum()  # numpeaks
+
+        # sigmai = (pup-plo)+_EPS
+        # aicc = (((p_e-p_s)/sigmai)**2).sum()+ 2*k*(k+1)/np.maximum(n_e-k+1,1)
+        # + np.abs((p_e-pup).clip(min=0)-(p_e-plo).clip(max=0)).sum()
+        sigmai = _logit(pup) - _logit(plo) + _EPS
+        aicc = ((((_logit(p_e) - _logit(p_s)) / sigmai) ** 2).sum() +
+                2 * k * (k + 1) / np.maximum(n_e - k + 1, 1) +
+                np.abs((p_e - pup).clip(min=0) -
+                       (p_e - plo).clip(max=0)).sum())
+
+        prb_s.aicc = aicc
+        # prb_s.labels.title = ''
+        # prb_s.labels.title='perr=%1.3f,aicc=%1.3f, n=%d, hs=%1.3f' %
+        # (prb_s.prediction_error_avg,aicc,n,hs)
+
+        return prb_s
+
+    def prb_search_best(self, prb_e=None, hsvec=None, hsfun='hste',
+                        alpha=0.05, color='r', label=''):
+        """Return best smoothed binomial probability.
+
+        Parameters
+        ----------
+        prb_e : PlotData object with empirical binomial probabilites
+        hsvec : arraylike  (default np.linspace(hsmax*0.1,hsmax,55))
+            vector smoothing parameters
+        hsfun :
+            method for calculating hsmax
+
+        """
+        if prb_e is None:
+            prb_e = self.prb_empirical(
+                hs_e=self.hs_e, alpha=alpha, color=color)
+        if hsvec is None:
+            hsmax = self._get_max_smoothing(hsfun)[0]  # @UnusedVariable
+            hsmax = max(hsmax, self.hs_e)
+            hsvec = np.linspace(hsmax * 0.2, hsmax, 55)
+
+        hs_best = hsvec[-1] + 0.1
+        prb_best = self.prb_smoothed(prb_e, hs_best, alpha, color, label)
+        aicc = np.zeros(np.size(hsvec))
+        for i, hi in enumerate(hsvec):
+            f = self.prb_smoothed(prb_e, hi, alpha, color, label)
+            aicc[i] = f.aicc
+            if f.aicc <= prb_best.aicc:
+                prb_best = f
+                hs_best = hi
+        prb_best.score = PlotData(aicc, hsvec)
+        prb_best.hs = hs_best
+        self._set_smoothing(hs_best)
+        return prb_best
+
+
+def kde_demo1():
+    """KDEDEMO1 Demonstrate the smoothing parameter impact on KDE.
+
+    KDEDEMO1 shows the true density (dotted) compared to KDE based on 7
+    observations (solid) and their individual kernels (dashed) for 3
+    different values of the smoothing parameter, hs.
+
+    """
+    st = scipy.stats
+    x = np.linspace(-4, 4, 101)
+    x0 = x / 2.0
+    data = np.random.normal(loc=0, scale=1.0, size=7)
+    kernel = Kernel('gauss')
+    hs = kernel.hns(data)
+    hVec = [hs / 2, hs, 2 * hs]
+
+    for ix, h in enumerate(hVec):
+        plt.figure(ix)
+        kde = KDE(data, hs=h, kernel=kernel)
+        f2 = kde(x, output='plot', title='h_s = {0:2.2f}'.format(h),
+                 ylab='Density')
+        f2.plot('k-')
+
+        plt.plot(x, st.norm.pdf(x, 0, 1), 'k:')
+        n = len(data)
+        plt.plot(data, np.zeros(data.shape), 'bx')
+        y = kernel(x0) / (n * h * kernel.norm_factor(d=1, n=n))
+        for i in range(n):
+            plt.plot(data[i] + x0 * h, y, 'b--')
+            plt.plot([data[i], data[i]], [0, np.max(y)], 'b')
+
+        plt.axis([min(x), max(x), 0, 0.5])
+
+
+def kde_demo2():
+    '''Demonstrate the difference between transformation- and ordinary-KDE.
+
+    KDEDEMO2 shows that the transformation KDE is a better estimate for
+    Rayleigh distributed data around 0 than the ordinary KDE.
+    '''
+    st = scipy.stats
+    data = st.rayleigh.rvs(scale=1, size=300)
+
+    x = np.linspace(1.5e-2, 5, 55)
+
+    kde = KDE(data)
+    f = kde(output='plot', title='Ordinary KDE (hs={0:g})'.format(kde.hs))
+    plt.figure(0)
+    f.plot()
+
+    plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')
+
+    # plotnorm((data).^(L2))  # gives a straight line => L2 = 0.5 reasonable
+
+    tkde = TKDE(data, L2=0.5)
+    ft = tkde(x, output='plot',
+              title='Transformation KDE (hs={0:g})'.format(tkde.tkde.hs))
+    plt.figure(1)
+    ft.plot()
+
+    plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')
+
+    plt.figure(0)
+
+
+def kde_demo3():
+    '''Demonstrate the difference between transformation and ordinary-KDE in 2D
+
+    KDEDEMO3 shows that the transformation KDE is a better estimate for
+    Rayleigh distributed data around 0 than the ordinary KDE.
+    '''
+    st = scipy.stats
+    data = st.rayleigh.rvs(scale=1, size=(2, 300))
+
+    # x = np.linspace(1.5e-3, 5, 55)
+
+    kde = KDE(data)
+    f = kde(output='plot', title='Ordinary KDE', plotflag=1)
+    plt.figure(0)
+    f.plot()
+
+    plt.plot(data[0], data[1], '.')
+
+    # plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable
+
+    tkde = TKDE(data, L2=0.5)
+    ft = tkde.eval_grid_fast(
+        output='plot', title='Transformation KDE', plotflag=1)
+
+    plt.figure(1)
+    ft.plot()
+
+    plt.plot(data[0], data[1], '.')
+
+    plt.figure(0)
+
+
+def kde_demo4(N=50):
+    '''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
+       for 1D multimodal distributions
+
+    KDEDEMO4 shows that the improved Sheather-Jones plug-in smoothing is a
+    better compared to normal reference rules (in this case the hns)
+    '''
+    st = scipy.stats
+
+    data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(N,)),
+                      st.norm.rvs(loc=-5, scale=1, size=(N,))))
+
+    # x = np.linspace(1.5e-3, 5, 55)
+
+    kde = KDE(data, kernel=Kernel('gauss', 'hns'))
+    f = kde(output='plot', title='Ordinary KDE', plotflag=1)
+
+    kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
+    f1 = kde1(output='plot', label='Ordinary KDE', plotflag=1)
+
+    plt.figure(0)
+    f.plot('r', label='hns={0:g}'.format(kde.hs))
+    # plt.figure(2)
+    f1.plot('b', label='hisj={0:g}'.format(kde1.hs))
+    x = np.linspace(-4, 4)
+    for loc in [-5, 5]:
+        plt.plot(x + loc, st.norm.pdf(x, 0, scale=1) / 2, 'k:',
+                 label='True density')
+    plt.legend()
+
+
+def kde_demo5(N=500):
+    '''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
+       for 2D multimodal distributions
+
+    KDEDEMO5 shows that the improved Sheather-Jones plug-in smoothing is better
+    compared to normal reference rules (in this case the hns)
+    '''
+    st = scipy.stats
+
+    data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(2, N,)),
+                      st.norm.rvs(loc=-5, scale=1, size=(2, N,))))
+    kde = KDE(data, kernel=Kernel('gauss', 'hns'))
+    f = kde(output='plot', plotflag=1,
+            title='Ordinary KDE (hns={0:s}'.format(str(kde.hs.tolist())))
+
+    kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
+    f1 = kde1(output='plot', plotflag=1,
+              title='Ordinary KDE (hisj={0:s})'.format(str(kde1.hs.tolist())))
+
+    plt.figure(0)
+    plt.clf()
+    f.plot()
+    plt.plot(data[0], data[1], '.')
+    plt.figure(1)
+    plt.clf()
+    f1.plot()
+    plt.plot(data[0], data[1], '.')
+
+
+def kreg_demo1(hs=None, fast=False, fun='hisj'):
+    """"""
+    N = 100
+    # ei = np.random.normal(loc=0, scale=0.075, size=(N,))
+    ei = np.array([
+        -0.08508516, 0.10462496, 0.07694448, -0.03080661, 0.05777525,
+        0.06096313, -0.16572389, 0.01838912, -0.06251845, -0.09186784,
+        -0.04304887, -0.13365788, -0.0185279, -0.07289167, 0.02319097,
+        0.06887854, -0.08938374, -0.15181813, 0.03307712, 0.08523183,
+        -0.0378058, -0.06312874, 0.01485772, 0.06307944, -0.0632959,
+        0.18963205, 0.0369126, -0.01485447, 0.04037722, 0.0085057,
+        -0.06912903, 0.02073998, 0.1174351, 0.17599277, -0.06842139,
+        0.12587608, 0.07698113, -0.0032394, -0.12045792, -0.03132877,
+        0.05047314, 0.02013453, 0.04080741, 0.00158392, 0.10237899,
+        -0.09069682, 0.09242174, -0.15445323, 0.09190278, 0.07138498,
+        0.03002497, 0.02495252, 0.01286942, 0.06449978, 0.03031802,
+        0.11754861, -0.02322272, 0.00455867, -0.02132251, 0.09119446,
+        -0.03210086, -0.06509545, 0.07306443, 0.04330647, 0.078111,
+        -0.04146907, 0.05705476, 0.02492201, -0.03200572, -0.02859788,
+        -0.05893749, 0.00089538, 0.0432551, 0.04001474, 0.04888828,
+        -0.17708392, 0.16478644, 0.1171006, 0.11664846, 0.01410477,
+        -0.12458953, -0.11692081, 0.0413047, -0.09292439, -0.07042327,
+        0.14119701, -0.05114335, 0.04994696, -0.09520663, 0.04829406,
+        -0.01603065, -0.1933216, 0.19352763, 0.11819496, 0.04567619,
+        -0.08348306, 0.00812816, -0.00908206, 0.14528945, 0.02901065])
+    x = np.linspace(0, 1, N)
+
+    y0 = 2 * np.exp(-x ** 2 / (2 * 0.3 ** 2)) + \
+        3 * np.exp(-(x - 1) ** 2 / (2 * 0.7 ** 2))
+    y = y0 + ei
+    kernel = Kernel('gauss', fun=fun)
+    hopt = kernel.hisj(x)
+    kreg = KRegression(
+        x, y, p=0, hs=hs, kernel=kernel, xmin=-2 * hopt, xmax=1 + 2 * hopt)
+    if fast:
+        kreg.__call__ = kreg.eval_grid_fast
+
+    f = kreg(output='plot', title='Kernel regression', plotflag=1)
+    plt.figure(0)
+    f.plot(label='p=0')
+
+    kreg.p = 1
+    f1 = kreg(output='plot', title='Kernel regression', plotflag=1)
+    f1.plot(label='p=1')
+    # print(f1.data)
+    plt.plot(x, y, '.', label='data')
+    plt.plot(x, y0, 'k', label='True model')
+    plt.legend()
+
+    plt.show()
+
+    print(kreg.tkde.tkde._inv_hs)
+    print(kreg.tkde.tkde.hs)
+
+_TINY = np.finfo(float).machar.tiny
+_REALMIN = np.finfo(float).machar.xmin
+_REALMAX = np.finfo(float).machar.xmax
+_EPS = np.finfo(float).eps
+
+
+def _logit(p):
+    pc = p.clip(min=0, max=1)
+    return (np.log(pc) - np.log1p(-pc)).clip(min=-40, max=40)
+
+
+def _logitinv(x):
+    return 1.0 / (np.exp(-x) + 1)
+
+
+def _get_data(n=100, symmetric=False, loc1=1.1, scale1=0.6, scale2=1.0):
+    st = scipy.stats
+    # from sg_filter import SavitzkyGolay
+    dist = st.norm
+
+    norm1 = scale2 * (dist.pdf(-loc1, loc=-loc1, scale=scale1) +
+                      dist.pdf(-loc1, loc=loc1, scale=scale1))
+
+    def fun1(x):
+        return ((dist.pdf(x, loc=-loc1, scale=scale1) +
+                 dist.pdf(x, loc=loc1, scale=scale1)) / norm1).clip(max=1.0)
+
+    x = np.sort(6 * np.random.rand(n, 1) - 3, axis=0)
+
+    y = (fun1(x) > np.random.rand(n, 1)).ravel()
+    # y = (np.cos(x)>2*np.random.rand(n, 1)-1).ravel()
+    x = x.ravel()
+
+    if symmetric:
+        xi = np.hstack((x.ravel(), -x.ravel()))
+        yi = np.hstack((y, y))
+        i = np.argsort(xi)
+        x = xi[i]
+        y = yi[i]
+    return x, y, fun1
+
+
+def kreg_demo2(n=100, hs=None, symmetric=False, fun='hisj', plotlog=False):
+    x, y, fun1 = _get_data(n, symmetric)
+    kreg_demo3(x, y, fun1, hs=None, fun='hisj', plotlog=False)
+
+
+def kreg_demo3(x, y, fun1, hs=None, fun='hisj', plotlog=False):
+    st = scipy.stats
+
+    alpha = 0.1
+    z0 = -_invnorm(alpha / 2)
+
+    n = x.size
+    hopt, hs1, hs2 = _get_regression_smooting(x, y, fun='hos')
+    if hs is None:
+        hs = hopt
+
+    forward = _logit
+    reverse = _logitinv
+    # forward = np.log
+    # reverse = np.exp
+
+    xmin, xmax = x.min(), x.max()
+    ni = max(2 * int((xmax - xmin) / hopt) + 3, 5)
+    print(ni)
+    print(xmin, xmax)
+    sml = hopt * 0.1
+    xi = np.linspace(xmin - sml, xmax + sml, ni)
+    xiii = np.linspace(xmin - sml, xmax + sml, 4 * ni + 1)
+
+    c = gridcount(x, xi)
+    if (y == 1).any():
+        c0 = gridcount(x[y == 1], xi)
+    else:
+        c0 = np.zeros(np.shape(xi))
+    yi = np.where(c == 0, 0, c0 / c)
+
+    kreg = KRegression(x, y, hs=hs, p=0)
+    fiii = kreg(xiii)
+    yiii = interpolate.interp1d(xi, yi)(xiii)
+    fit = fun1(xiii).clip(max=1.0)
+    df = np.diff(fiii)
+    eerr = np.abs((yiii - fiii)).std() + 0.5 * (df[:-1] * df[1:] < 0).sum() / n
+    err = (fiii - fit).std()
+    msg = '{} err={1:1.3f},eerr={2:1.3f}, n={:d}, hs={:1.3f}, hs1={:1.3f}, '\
+        'hs2={:1.3f}'
+    title = (msg.format(fun, err, eerr, n, hs, hs1, hs2))
+    f = kreg(xiii, output='plotobj', title=title, plotflag=1)
+
+    # yi[yi==0] = 1.0/(c[c!=0].min()+4)
+    # yi[yi==1] = 1-1.0/(c[c!=0].min()+4)
+    # yi[yi==0] = fi[yi==0]
+    # yi[yi==0] = np.exp(stineman_interp(xi[yi==0], xi[yi>0],np.log(yi[yi>0])))
+    # yi[yi==0] = fun1(xi[yi==0])
+    try:
+        yi[yi == 0] = yi[yi > 0].min() / sqrt(n)
+    except:
+        yi[yi == 0] = 1. / n
+    yi[yi == 1] = 1 - (1 - yi[yi < 1].max()) / sqrt(n)
+
+    logity = forward(yi)
+
+    gkreg = KRegression(xi, logity, hs=hs, xmin=xmin - hopt, xmax=xmax + hopt)
+    fg = gkreg.eval_grid(
+        xi, output='plotobj', title='Kernel regression', plotflag=1)
+    sa = (fg.data - logity).std()
+    sa2 = iqrange(fg.data - logity) / 1.349
+    # print('sa=%g %g' % (sa, sa2))
+    sa = min(sa, sa2)
+
+#    plt.figure(1)
+#    plt.plot(xi, slogity-logity,'r.')
+# plt.plot(xi, logity-,'b.')
+#    plt.plot(xi, fg.data-logity, 'b.')
+#    plt.show()
+#    return
+
+    fg = gkreg.eval_grid(
+        xiii, output='plotobj', title='Kernel regression', plotflag=1)
+    pi = reverse(fg.data)
+
+    dx = xi[1] - xi[0]
+    ckreg = KDE(x, hs=hs)
+    # ci = ckreg.eval_grid_fast(xi)*n*dx
+    ciii = ckreg.eval_grid_fast(xiii) * dx * x.size  # n*(1+symmetric)
+
+#    sa1 = np.sqrt(1./(ciii*pi*(1-pi)))
+#    plo3 = reverse(fg.data-z0*sa)
+#    pup3 = reverse(fg.data+z0*sa)
+    fg.data = pi
+    pi = f.data
+
+    # ref Casella and Berger (1990) "Statistical inference" pp444
+#    a = 2*pi + z0**2/(ciii+1e-16)
+#    b = 2*(1+z0**2/(ciii+1e-16))
+#    plo2 = ((a-sqrt(a**2-2*pi**2*b))/b).clip(min=0,max=1)
+#    pup2 = ((a+sqrt(a**2-2*pi**2*b))/b).clip(min=0,max=1)
+    # Jeffreys intervall a=b=0.5
+    # st.beta.isf(alpha/2, x+a, n-x+b)
+    ab = 0.07  # 0.055
+    pi1 = pi  # fun1(xiii)
+    pup2 = np.where(pi == 1,
+                    1,
+                    st.beta.isf(alpha / 2,
+                                ciii * pi1 + ab,
+                                ciii * (1 - pi1) + ab))
+    plo2 = np.where(pi == 0,
+                    0,
+                    st.beta.isf(1 - alpha / 2,
+                                ciii * pi1 + ab,
+                                ciii * (1 - pi1) + ab))
+
+    averr = np.trapz(pup2 - plo2, xiii) / \
+        (xiii[-1] - xiii[0]) + 0.5 * (df[:-1] * df[1:] < 0).sum()
+
+    # f2 = kreg_demo4(x, y, hs, hopt)
+    # Wilson score
+    den = 1 + (z0 ** 2. / ciii)
+    xc = (pi1 + (z0 ** 2) / (2 * ciii)) / den
+    halfwidth = (z0 * sqrt((pi1 * (1 - pi1) / ciii) +
+                           (z0 ** 2 / (4 * (ciii ** 2))))) / den
+    plo = (xc - halfwidth).clip(min=0)  # wilson score
+    pup = (xc + halfwidth).clip(max=1.0)  # wilson score
+    # pup = (pi + z0*np.sqrt(pi*(1-pi)/ciii)).clip(min=0,max=1) # dont use
+    # plo = (pi - z0*np.sqrt(pi*(1-pi)/ciii)).clip(min=0,max=1)
+
+    # mi = kreg.eval_grid(x)
+    # sigma = (stineman_interp(x, xiii, pup)-stineman_interp(x, xiii, plo))/4
+    # aic = np.abs((y-mi)/sigma).std()+ 0.5*(df[:-1]*df[1:]<0).sum()/n
+    # aic = np.abs((yiii-fiii)/(pup-plo)).std() + \
+    #                0.5*(df[:-1]*df[1:]<0).sum() + \
+    #            ((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum()
+
+    k = (df[:-1] * df[1:] < 0).sum()  # numpeaks
+    sigmai = (pup - plo)
+    aic = (((yiii - fiii) / sigmai) ** 2).sum() + \
+        2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \
+        np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum()
+
+    # aic = (((yiii-fiii)/sigmai)**2).sum()+ 2*k*(k+1)/(ni-k+1) + \
+    #        np.abs((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum()
+
+    # aic = averr + ((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum()
+
+    fg.plot(label='KReg grid aic={:2.3f}'.format(aic))
+    f.plot(label='KReg averr={:2.3f} '.format(averr))
+    labtxt = '%d CI' % (int(100 * (1 - alpha)))
+    plt.fill_between(xiii, pup, plo, alpha=0.20,
+                     color='r', linestyle='--', label=labtxt)
+    plt.fill_between(xiii, pup2, plo2, alpha=0.20, color='b', linestyle=':',
+                     label='{:d} CI2'.format(int(100 * (1 - alpha))))
+    plt.plot(xiii, fun1(xiii), 'r', label='True model')
+    plt.scatter(xi, yi, label='data')
+    print('maxp = {:g}'.format(np.nanmax(f.data)))
+    print('hs = {:g}'.format(kreg.tkde.tkde.hs))
+    plt.legend()
+    h = plt.gca()
+    if plotlog:
+        plt.setp(h, yscale='log')
+    # plt.show()
+    return hs1, hs2
+
+
+def kreg_demo4(x, y, hs, hopt, alpha=0.05):
+    st = scipy.stats
+
+    n = x.size
+    xmin, xmax = x.min(), x.max()
+    ni = max(2 * int((xmax - xmin) / hopt) + 3, 5)
+
+    sml = hopt * 0.1
+    xi = np.linspace(xmin - sml, xmax + sml, ni)
+    xiii = np.linspace(xmin - sml, xmax + sml, 4 * ni + 1)
+
+    kreg = KRegression(x, y, hs=hs, p=0)
+
+    dx = xi[1] - xi[0]
+    ciii = kreg.tkde.eval_grid_fast(xiii) * dx * x.size
+#    ckreg = KDE(x,hs=hs)
+# ciiii = ckreg.eval_grid_fast(xiii)*dx* x.size #n*(1+symmetric)
+
+    f = kreg(xiii, output='plotobj')  # , plot_kwds=dict(plotflag=7))
+    pi = f.data
+
+    # Jeffreys intervall a=b=0.5
+    # st.beta.isf(alpha/2, x+a, n-x+b)
+    ab = 0.07  # 0.5
+    pi1 = pi
+    pup = np.where(pi1 == 1, 1, st.beta.isf(
+        alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab))
+    plo = np.where(pi1 == 0, 0, st.beta.isf(
+        1 - alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab))
+
+    # Wilson score
+    # z0 = -_invnorm(alpha/2)
+#    den = 1+(z0**2./ciii);
+#    xc=(pi1+(z0**2)/(2*ciii))/den;
+#    halfwidth=(z0*sqrt((pi1*(1-pi1)/ciii)+(z0**2/(4*(ciii**2)))))/den
+# plo2 = (xc-halfwidth).clip(min=0) # wilson score
+# pup2 = (xc+halfwidth).clip(max=1.0) # wilson score
+    # f.dataCI = np.vstack((plo,pup)).T
+    f.prediction_error_avg = np.trapz(pup - plo, xiii) / (xiii[-1] - xiii[0])
+    fiii = f.data
+
+    c = gridcount(x, xi)
+    if (y == 1).any():
+        c0 = gridcount(x[y == 1], xi)
+    else:
+        c0 = np.zeros(np.shape(xi))
+    yi = np.where(c == 0, 0, c0 / c)
+
+    f.children = [PlotData([plo, pup], xiii, plotmethod='fill_between',
+                           plot_kwds=dict(alpha=0.2, color='r')),
+                  PlotData(yi, xi, plotmethod='scatter',
+                           plot_kwds=dict(color='r', s=5))]
+
+    yiii = interpolate.interp1d(xi, yi)(xiii)
+    df = np.diff(fiii)
+    k = (df[:-1] * df[1:] < 0).sum()  # numpeaks
+    sigmai = (pup - plo)
+    aicc = (((yiii - fiii) / sigmai) ** 2).sum() + \
+        2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \
+        np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum()
+
+    f.aicc = aicc
+    f.labels.title = ('perr={:1.3f},aicc={:1.3f}, n={:d}, '
+                      'hs={:1.3f}'.format(f.prediction_error_avg, aicc, n, hs))
+
+    return f
+
+
+def check_kreg_demo3():
+
+    plt.ion()
+    k = 0
+    for n in [50, 100, 300, 600, 4000]:
+        x, y, fun1 = _get_data(
+            n, symmetric=True, loc1=1.0, scale1=0.6, scale2=1.25)
+        k0 = k
+
+        for fun in ['hste', ]:
+            hsmax, _hs1, _hs2 = _get_regression_smooting(x, y, fun=fun)
+            for hi in np.linspace(hsmax * 0.25, hsmax, 9):
+                plt.figure(k)
+                k += 1
+                unused = kreg_demo3(x, y, fun1, hs=hi, fun=fun, plotlog=False)
+
+            # kreg_demo2(n=n,symmetric=True,fun='hste', plotlog=False)
+        fig.tile(range(k0, k))
+    plt.ioff()
+    plt.show()
+
+
+def check_kreg_demo4():
+    plt.ion()
+    # test_docstrings()
+    # kde_demo2()
+    # kreg_demo1(fast=True)
+    # kde_gauss_demo()
+    # kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True)
+    k = 0
+    for _i, n in enumerate([100, 300, 600, 4000]):
+        x, y, fun1 = _get_data(
+            n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75)
+        # k0 = k
+        hopt1, _h1, _h2 = _get_regression_smooting(x, y, fun='hos')
+        hopt2, _h1, _h2 = _get_regression_smooting(x, y, fun='hste')
+        hopt = sqrt(hopt1 * hopt2)
+        # hopt = _get_regression_smooting(x,y,fun='hos')[0]
+        for _j, fun in enumerate(['hste']):  # , 'hisj', 'hns', 'hstt'
+            hsmax, _hs1, _hs2 = _get_regression_smooting(x, y, fun=fun)
+
+            fmax = kreg_demo4(x, y, hsmax + 0.1, hopt)
+            for hi in np.linspace(hsmax * 0.1, hsmax, 55):
+                f = kreg_demo4(x, y, hi, hopt)
+                if f.aicc <= fmax.aicc:
+                    fmax = f
+            plt.figure(k)
+            k += 1
+            fmax.plot()
+            plt.plot(x, fun1(x), 'r')
+
+            # kreg_demo2(n=n,symmetric=True,fun='hste', plotlog=False)
+    fig.tile(range(0, k))
+    plt.ioff()
+    plt.show()
+
+
+def check_regression_bin():
+    plt.ion()
+    # test_docstrings()
+    # kde_demo2()
+    # kreg_demo1(fast=True)
+    # kde_gauss_demo()
+    # kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True)
+    k = 0
+    for _i, n in enumerate([100, 300, 600, 4000]):
+        x, y, fun1 = _get_data(
+            n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75)
+        fbest = regressionbin(x, y, alpha=0.05, color='g', label='Transit_D')
+
+        figk = plt.figure(k)
+        ax = figk.gca()
+        k += 1
+        fbest.labels.title = 'N = {:d}'.format(n)
+        fbest.plot(axis=ax)
+        ax.plot(x, fun1(x), 'r')
+        ax.legend(frameon=False, markerscale=4)
+        # ax = plt.gca()
+        ax.set_yticklabels(ax.get_yticks() * 100.0)
+        ax.grid(True)
+
+    fig.tile(range(0, k))
+    plt.ioff()
+    plt.show()
+
+
+def check_bkregression():
+    plt.ion()
+    k = 0
+    for _i, n in enumerate([50, 100, 300, 600]):
+        x, y, fun1 = _get_data(
+            n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75)
+        bkreg = BKRegression(x, y)
+        fbest = bkreg.prb_search_best(
+            hsfun='hste', alpha=0.05, color='g', label='Transit_D')
+
+        figk = plt.figure(k)
+        ax = figk.gca()
+        k += 1
+#        fbest.score.plot(axis=ax)
+#        axsize = ax.axis()
+#        ax.vlines(fbest.hs,axsize[2]+1,axsize[3])
+#        ax.set(yscale='log')
+        fbest.labels.title = 'N = {:d}'.format(n)
+        fbest.plot(axis=ax)
+        ax.plot(x, fun1(x), 'r')
+        ax.legend(frameon=False, markerscale=4)
+        # ax = plt.gca()
+        ax.set_yticklabels(ax.get_yticks() * 100.0)
+        ax.grid(True)
+
+    fig.tile(range(0, k))
+    plt.ioff()
+    plt.show()
+
+
+def _get_regression_smooting(x, y, fun='hste'):
+    hs1 = Kernel('gauss', fun=fun).get_smoothing(x)
+    # hx = np.median(np.abs(x-np.median(x)))/0.6745*(4.0/(3*n))**0.2
+    if (y == 1).any():
+        hs2 = Kernel('gauss', fun=fun).get_smoothing(x[y == 1])
+        # hy = np.median(np.abs(y-np.mean(y)))/0.6745*(4.0/(3*n))**0.2
+    else:
+        hs2 = 4 * hs1
+        # hy = 4*hx
+
+    # hy2 = Kernel('gauss', fun=fun).get_smoothing(y)
+    # kernel = Kernel('gauss',fun=fun)
+    # hopt = (hs1+2*hs2)/3
+    # hopt = (hs1+4*hs2)/5 #kernel.get_smoothing(x)
+    # hopt = hs2
+    hopt = sqrt(hs1 * hs2)
+    return hopt, hs1, hs2
+
+
+def empirical_bin_prb(x, y, hopt, color='r'):
+    """Returns empirical binomial probabiltity.
+
+    Parameters
+    ----------
+    x : ndarray
+        position ve
+    y : ndarray
+        binomial response variable (zeros and ones)
+
+    Returns
+    -------
+    P(x) : PlotData object
+        empirical probability
+
+    """
+    xmin, xmax = x.min(), x.max()
+    ni = max(2 * int((xmax - xmin) / hopt) + 3, 5)
+
+    sml = hopt  # *0.1
+    xi = np.linspace(xmin - sml, xmax + sml, ni)
+
+    c = gridcount(x, xi)
+    if (y == 1).any():
+        c0 = gridcount(x[y == 1], xi)
+    else:
+        c0 = np.zeros(np.shape(xi))
+    yi = np.where(c == 0, 0, c0 / c)
+    return PlotData(yi, xi, plotmethod='scatter',
+                    plot_kwds=dict(color=color, s=5))
+
+
+def smoothed_bin_prb(x, y, hs, hopt, alpha=0.05, color='r', label='',
+                     bin_prb=None):
+    '''
+    Parameters
+    ----------
+    x,y
+    hs : smoothing parameter
+    hopt : spacing in empirical_bin_prb
+    alpha : confidence level
+    color : color of plot object
+    bin_prb : PlotData object with empirical bin prb
+    '''
+    if bin_prb is None:
+        bin_prb = empirical_bin_prb(x, y, hopt, color)
+
+    xi = bin_prb.args
+    yi = bin_prb.data
+    ni = len(xi)
+    dxi = xi[1] - xi[0]
+
+    n = x.size
+
+    xiii = np.linspace(xi[0], xi[-1], 10 * ni + 1)
+
+    kreg = KRegression(x, y, hs=hs, p=0)
+    # expected number of data in each bin
+    ciii = kreg.tkde.eval_grid_fast(xiii) * dxi * n
+
+    f = kreg(xiii, output='plotobj')  # , plot_kwds=dict(plotflag=7))
+    pi = f.data
+
+    st = scipy.stats
+    # Jeffreys intervall a=b=0.5
+    # st.beta.isf(alpha/2, x+a, n-x+b)
+    ab = 0.07  # 0.5
+    pi1 = pi
+    pup = np.where(pi1 == 1, 1, st.beta.isf(
+        alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab))
+    plo = np.where(pi1 == 0, 0, st.beta.isf(
+        1 - alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab))
+
+    # Wilson score
+    # z0 = -_invnorm(alpha/2)
+#    den = 1+(z0**2./ciii);
+#    xc=(pi1+(z0**2)/(2*ciii))/den;
+#    halfwidth=(z0*sqrt((pi1*(1-pi1)/ciii)+(z0**2/(4*(ciii**2)))))/den
+# plo2 = (xc-halfwidth).clip(min=0) # wilson score
+# pup2 = (xc+halfwidth).clip(max=1.0) # wilson score
+    # f.dataCI = np.vstack((plo,pup)).T
+    f.prediction_error_avg = np.trapz(pup - plo, xiii) / (xiii[-1] - xiii[0])
+    fiii = f.data
+
+    f.plot_kwds['color'] = color
+    f.plot_kwds['linewidth'] = 2
+    if label:
+        f.plot_kwds['label'] = label
+    f.children = [PlotData([plo, pup], xiii, plotmethod='fill_between',
+                           plot_kwds=dict(alpha=0.2, color=color)),
+                  bin_prb]
+
+    yiii = interpolate.interp1d(xi, yi)(xiii)
+    df = np.diff(fiii)
+    k = (df[:-1] * df[1:] < 0).sum()  # numpeaks
+    sigmai = (pup - plo)
+    aicc = (((yiii - fiii) / sigmai) ** 2).sum() + \
+        2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \
+        np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum()
+
+    f.aicc = aicc
+    f.fun = kreg
+    f.labels.title = ('perr={:1.3f},aicc={:1.3f}, n={:d}, '
+                      'hs={:1.3f}'.format(f.prediction_error_avg, aicc, n, hs))
+
+    return f
+
+
+def regressionbin(x, y, alpha=0.05, color='r', label=''):
+    """Return kernel regression estimate for binomial data.
+
+    Parameters
+    ----------
+    x : arraylike
+        positions
+    y : arraylike
+        of 0 and 1
+
+    """
+
+    hopt1, _h1, _h2 = _get_regression_smooting(x, y, fun='hos')
+    hopt2, _h1, _h2 = _get_regression_smooting(x, y, fun='hste')
+    hopt = sqrt(hopt1 * hopt2)
+
+    fbest = smoothed_bin_prb(x, y, hopt2 + 0.1, hopt, alpha, color, label)
+    bin_prb = fbest.children[-1]
+    for fun in ['hste']:  # , 'hisj', 'hns', 'hstt'
+        hsmax, _hs1, _hs2 = _get_regression_smooting(x, y, fun=fun)
+        for hi in np.linspace(hsmax * 0.1, hsmax, 55):
+            f = smoothed_bin_prb(x, y, hi, hopt, alpha, color, label, bin_prb)
+            if f.aicc <= fbest.aicc:
+                fbest = f
+                # hbest = hi
+    return fbest
+
+
+def kde_gauss_demo(n=50):
+    """KDEDEMO Demonstrate the KDEgauss.
+
+    KDEDEMO1 shows the true density (dotted) compared to KDE based on 7
+    observations (solid) and their individual kernels (dashed) for 3
+    different values of the smoothing parameter, hs.
+
+    """
+
+    st = scipy.stats
+    # x = np.linspace(-4, 4, 101)
+    # data = np.random.normal(loc=0, scale=1.0, size=n)
+    # data = np.random.exponential(scale=1.0, size=n)
+#    n1 = 128
+#    I = (np.arange(n1)*pi)**2 *0.01*0.5
+#    kw = exp(-I)
+#    plt.plot(idctn(kw))
+#    return
+    dist = st.norm
+    # dist = st.expon
+    data = dist.rvs(loc=0, scale=1.0, size=n)
+    d, _N = np.atleast_2d(data).shape
+
+    if d == 1:
+        plot_options = [dict(color='red', label='KDE hste'),
+                        dict(color='green', label='TKDE hisj'),
+                        dict(color='black', label='KDEgauss hste')]
+    else:
+        plot_options = [dict(colors='red'), dict(colors='green'),
+                        dict(colors='black')]
+
+    plt.figure(1)
+    t0 = time.time()
+    kde0 = KDE(data, kernel=Kernel('gauss', 'hste'))
+    f0 = kde0.eval_grid_fast(output='plot', ylab='Density', r=0)
+    t1 = time.time()
+    total1 = t1-t0
+
+    f0.plot('.', **plot_options[0])
+    if dist.name != 'norm':
+        kde1 = TKDE(data, kernel=Kernel('gauss', 'hisj'), L2=.5)
+        f1 = kde1.eval_grid_fast(output='plot', ylab='Density', r=0)
+        f1.plot(**plot_options[1])
+    else:
+        kde1 = kde0
+        f1 = f0
+    t1 = time.time()
+    kde2 = KDEgauss(data)
+    f2 = kde2(output='plot', ylab='Density', r=0)
+    t2 = time.time()
+    total2 = t2-t1
+
+    x = f2.args
+    f2.plot(**plot_options[2])
+
+    fmax = dist.pdf(x, 0, 1).max()
+    if d == 1:
+        plt.plot(x, dist.pdf(x, 0, 1), 'k:', label='True pdf')
+        plt.axis([x.min(), x.max(), 0, fmax])
+    plt.legend()
+    plt.show()
+    print(fmax / f2.data.max())
+    try:
+        print('hs0={:s} hs1={:s} hs2={:s}'.format(str(kde0.hs.tolist()),
+                                                  str(kde1.tkde.hs.tolist()),
+                                                  str(kde2.hs.tolist())))
+    except:
+        pass
+    print('inc0 = {:d}, inc1 = {:d}, inc2 = {:d}'.format(kde0.inc, kde1.inc,
+                                                         kde2.inc))
+    print(np.trapz(f0.data, f0.args), np.trapz(f2.data, f2.args))
+    print(total1, total2)
+
+
+def test_kde():
+    data = np.array([
+        0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887,
+        1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326,
+        1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487,
+        1.8919469, 0.72433808, 1.92973094, 0.44749838, 1.36508452])
+
+    x = np.linspace(0.01, max(data + 1), 10)
+    kde = TKDE(data, hs=0.5, L2=0.5)
+    _f = kde(x)
+    # f = array([1.03982714, 0.45839018, 0.39514782, 0.32860602, 0.26433318,
+    #   0.20717946,  0.15907684,  0.1201074 ,  0.08941027,  0.06574882])
+
+    _f1 = kde.eval_grid(x)
+    # array([ 1.03982714,  0.45839018,  0.39514782,  0.32860602,  0.26433318,
+    #        0.20717946,  0.15907684,  0.1201074 ,  0.08941027,  0.06574882])
+
+    _f2 = kde.eval_grid_fast(x)
+    # array([ 1.06437223,  0.46203314,  0.39593137,  0.32781899,  0.26276433,
+    #        0.20532206,  0.15723498,  0.11843998,  0.08797755,  0.        ])
+
+
+if __name__ == '__main__':
+    if True:
+        test_docstrings(__file__)
+    else:
+        # test_kde()
+        # check_bkregression()
+        # check_regression_bin()
+        # check_kreg_demo3()
+        # check_kreg_demo4()
+
+        # kde_demo2()
+        # kreg_demo1(fast=True)
+        kde_gauss_demo(n=50)
+        # kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True)
+        plt.show('hold')
diff --git a/wafo/kdetools/kernels.py b/wafo/kdetools/kernels.py
new file mode 100644
index 0000000..94e013c
--- /dev/null
+++ b/wafo/kdetools/kernels.py
@@ -0,0 +1,1382 @@
+'''
+Created on 15. des. 2016
+
+@author: pab
+'''
+from __future__ import division
+from abc import ABCMeta, abstractmethod
+import warnings
+import numpy as np
+from numpy import pi, sqrt, exp, percentile
+from scipy import optimize, linalg
+from scipy.special import gamma
+from wafo.misc import tranproc  # , trangood
+from wafo.kdetools.gridding import gridcount
+from wafo.dctpack import dct
+from wafo.testing import test_docstrings
+
+__all__ = ['Kernel', 'sphere_volume', 'qlevels', 'iqrange', 'percentile']
+
+
+def _assert(cond, msg):
+    if not cond:
+        raise ValueError(msg)
+
+# stats = (mu2, R, Rdd) where
+#     mu2 : 2'nd order moment, i.e.,int(x^2*kernel(x))
+#     R :  integral of squared kernel, i.e., int(kernel(x)^2)
+#     Rdd  : int( (kernel''(x))^2 ).
+_stats_epan = (1. / 5, 3. / 5, np.inf)
+_stats_biwe = (1. / 7, 5. / 7, 45. / 2)
+_stats_triw = (1. / 9, 350. / 429, np.inf)
+_stats_rect = (1. / 3, 1. / 2, np.inf)
+_stats_tria = (1. / 6, 2. / 3, np.inf)
+_stats_lapl = (2, 1. / 4, np.inf)
+_stats_logi = (pi ** 2 / 3, 1. / 6, 1 / 42)
+_stats_gaus = (1, 1. / (2 * sqrt(pi)), 3. / (8 * sqrt(pi)))
+
+
+def qlevels(pdf, p=(10, 30, 50, 70, 90, 95, 99, 99.9), x1=None, x2=None):
+    """QLEVELS Calculates quantile levels which encloses P% of PDF.
+
+      CALL: [ql PL] = qlevels(pdf,PL,x1,x2);
+
+            ql    = the discrete quantile levels.
+            pdf   = joint point density function matrix or vector
+            PL    = percent level (default [10:20:90 95 99 99.9])
+            x1,x2 = vectors of the spacing of the variables
+                   (Default unit spacing)
+
+    QLEVELS numerically integrates PDF by decreasing height and find the
+    quantile levels which  encloses P% of the distribution. If X1 and
+    (or) X2 is unspecified it is assumed that dX1 and dX2 is constant.
+    NB! QLEVELS normalizes the integral of PDF to N/(N+0.001) before
+    calculating QL in order to reflect the sampling of PDF is finite.
+    Currently only able to handle 1D and 2D PDF's if dXi is not constant
+    (i=1,2).
+
+    Example
+    -------
+    >>> import wafo.stats as ws
+    >>> x = np.linspace(-8,8,2001);
+    >>> PL = np.r_[10:90:20, 90, 95, 99, 99.9]
+    >>> qlevels(ws.norm.pdf(x),p=PL, x1=x);
+    array([ 0.39591707,  0.37058719,  0.31830968,  0.23402133,  0.10362052,
+            0.05862129,  0.01449505,  0.00178806])
+
+    # compared with the exact values
+    >>> ws.norm.pdf(ws.norm.ppf((100-PL)/200))
+    array([ 0.39580488,  0.370399  ,  0.31777657,  0.23315878,  0.10313564,
+            0.05844507,  0.01445974,  0.00177719])
+
+    See also
+    --------
+    qlevels2, tranproc
+
+    """
+
+    norm = 1  # normalize cdf to unity
+    pdf = np.atleast_1d(pdf)
+    _assert(not any(pdf.ravel() < 0), 'This is not a pdf since one or more '
+            'values of pdf is negative')
+
+    fsiz = pdf.shape
+    fsizmin = min(fsiz)
+    if fsizmin == 0:
+        return []
+
+    N = np.prod(fsiz)
+    d = len(fsiz)
+    if x1 is None or ((x2 is None) and d > 2):
+        fdfi = pdf.ravel()
+    else:
+        if d == 1:  # pdf in one dimension
+            dx22 = np.ones(1)
+        else:  # % pdf in two dimensions
+            dx2 = np.diff(x2.ravel()) * 0.5
+            dx22 = np.r_[0, dx2] + np.r_[dx2, 0]
+
+        dx1 = np.diff(x1.ravel()) * 0.5
+        dx11 = np.r_[0, dx1] + np.r_[dx1, 0]
+        dx1x2 = dx22[:, None] * dx11
+        fdfi = (pdf * dx1x2).ravel()
+
+    p = np.atleast_1d(p)
+    _assert(not np.any((p < 0) | (100 < p)), 'PL must satisfy 0 <= PL <= 100')
+
+    p2 = p / 100.0
+    ind = np.argsort(pdf.ravel())  # sort by height of pdf
+    ind = ind[::-1]
+    fi = pdf.flat[ind]
+
+    # integration in the order of decreasing height of pdf
+    Fi = np.cumsum(fdfi[ind])
+
+    if norm:  # normalize Fi to make sure int pdf dx1 dx2 approx 1
+        Fi = Fi / Fi[-1] * N / (N + 1.5e-8)
+
+    maxFi = np.max(Fi)
+    if maxFi > 1:
+        warnings.warn('this is not a pdf since cdf>1! normalizing')
+
+        Fi = Fi / Fi[-1] * N / (N + 1.5e-8)
+
+    elif maxFi < .95:
+        msg = '''The given pdf is too sparsely sampled since cdf<.95.
+        Thus QL is questionable'''
+        warnings.warn(msg)
+
+    # make sure Fi is strictly increasing by not considering duplicate values
+    ind, = np.where(np.diff(np.r_[Fi, 1]) > 0)
+    # calculating the inverse of Fi to find the index
+    ui = tranproc(Fi[ind], fi[ind], p2)
+
+    if np.any(ui >= max(pdf.ravel())):
+        warnings.warn('The lowest percent level is too close to 0%')
+
+    if np.any(ui <= min(pdf.ravel())):
+        msg = '''The given pdf is too sparsely sampled or
+       the highest percent level is too close to 100%'''
+        warnings.warn(msg)
+        ui[ui < 0] = 0.0
+
+    return ui
+
+
+def qlevels2(data, p=(10, 30, 50, 70, 90, 95, 99, 99.9), method=1):
+    """QLEVELS2 Calculates quantile levels which encloses P% of data.
+
+     CALL: [ql PL] = qlevels2(data,PL,method);
+
+       ql   = the discrete quantile levels, size D X Np
+    Parameters
+    ----------
+    data : data matrix, size D x N (D = # of dimensions)
+    p : percent level vector, length Np (default [10:20:90 95 99 99.9])
+    method : integer
+        1 Interpolation so that F(X_[k]) == k/(n-1). (linear default)
+        2 Interpolation so that F(X_[k]) == (k+0.5)/n. (midpoint)
+        3 Interpolation so that F(X_[k]) == (k+1)/n.   (lower)
+        4 Interpolation so that F(X_[k]) == k/n.       (higher)
+
+    Returns
+    -------
+
+    QLEVELS2 sort the columns of data in ascending order and find the
+             quantile levels for each column which encloses  P% of the data.
+
+    Examples :  Finding quantile levels enclosing P% of data:
+    --------
+    >>> import wafo.stats as ws
+    >>> PL = np.r_[10:90:20, 90, 95, 99, 99.9]
+    >>> xs = ws.norm.rvs(size=2500000)
+    >>> np.allclose(qlevels2(ws.norm.pdf(xs), p=PL),
+    ...  [0.3958, 0.3704, 0.3179, 0.2331, 0.1031, 0.05841, 0.01451, 0.001751],
+    ...   rtol=1e-1)
+    True
+
+    # compared with the exact values
+    >>> ws.norm.pdf(ws.norm.ppf((100-PL)/200))
+    array([ 0.39580488,  0.370399  ,  0.31777657,  0.23315878,  0.10313564,
+            0.05844507,  0.01445974,  0.00177719])
+
+    # Finding the median of xs:
+    >>> '%2.2f' % np.abs(qlevels2(xs,50)[0])
+    '0.00'
+
+    See also
+    --------
+    qlevels
+
+    """
+    _assert(0 < method < 5,
+            'Method must be between 1 to 4. Got method={}.'.format(method))
+    interpolation = ['', 'linear', 'midpoint', 'lower', 'higher'][method]
+    q = 100 - np.atleast_1d(p)
+    return percentile(data, q, axis=-1, interpolation=interpolation)
+
+
+def iqrange(data, axis=None):
+    """Returns the Inter Quartile Range of data.
+
+    Parameters
+    ----------
+    data : array-like
+        Input array or object that can be converted to an array.
+    axis : {None, int}, optional
+        Axis along which the percentiles are computed. The default (axis=None)
+        is to compute the median along a flattened version of the array.
+
+    Returns
+    -------
+    r : array-like
+        abs(np.percentile(data, 75, axis)-np.percentile(data, 25, axis))
+
+    Notes
+    -----
+    IQRANGE is a robust measure of spread. The use of interquartile range
+    guards against outliers if the distribution have heavy tails.
+
+    Example
+    -------
+    >>> a = np.arange(101)
+    >>> iqrange(a)
+    50.0
+
+    See also
+    --------
+    np.std
+
+    """
+    return np.abs(np.percentile(data, 75, axis=axis) -
+                  np.percentile(data, 25, axis=axis))
+
+
+def sphere_volume(d, r=1.0):
+    """
+    Returns volume of  d-dimensional sphere with radius r
+
+    Parameters
+    ----------
+    d : scalar or array_like
+        dimension of sphere
+    r : scalar or array_like
+        radius of sphere (default 1)
+
+    Example
+    -------
+    >>> sphere_volume(2., r=2.)
+    12.566370614359172
+    >>> sphere_volume(2., r=1.)
+    3.1415926535897931
+
+    Reference
+    ---------
+    Wand,M.P. and Jones, M.C. (1995)
+    'Kernel smoothing'
+    Chapman and Hall, pp 105
+    """
+    return (r ** d) * 2.0 * pi ** (d / 2.0) / (d * gamma(d / 2.0))
+
+
+class _Kernel(object):
+    __metaclass__ = ABCMeta
+
+    def __init__(self, r=1.0, stats=None):
+        self.r = r  # radius of kernel
+        self.stats = stats
+
+    def norm_factor(self, d=1, n=None):
+        _assert(0 < d, "D")
+        _assert(0 < n, "Number of samples too few (n={})".format(n))
+        return 1.0
+
+    @abstractmethod
+    def _kernel(self, x):
+        pass
+
+    def norm_kernel(self, x):
+        X = np.atleast_2d(x)
+        return self._kernel(X) / self.norm_factor(*X.shape)
+
+    def kernel(self, x):
+        return self._kernel(np.atleast_2d(x))
+
+    def deriv4_6_8_10(self, t, numout=4):
+        raise NotImplementedError('Method not implemented for this kernel!')
+
+    def effective_support(self):
+        """Return the effective support of kernel.
+
+        The kernel must be symmetric and compactly supported on [-tau tau]
+        if the kernel has infinite support then the kernel must have the
+        effective support in [-tau tau], i.e., be negligible outside the range
+
+        """
+        return self._effective_support()
+
+    def _effective_support(self):
+        return -self.r, self.r
+    __call__ = kernel
+
+
+class _KernelMulti(_Kernel):
+    """
+    p=0;  Sphere = rect for 1D
+    p=1;  Multivariate Epanechnikov kernel.
+    p=2;  Multivariate Bi-weight Kernel
+    p=3;  Multi variate Tri-weight Kernel
+    p=4;  Multi variate Four-weight Kernel
+    """
+    def __init__(self, r=1.0, p=1, stats=None):
+        self.p = p
+        super(_KernelMulti, self).__init__(r, stats)
+
+    def norm_factor(self, d=1, n=None):
+        r = self.r
+        p = self.p
+        c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(d, r) / np.prod(
+            np.r_[(d + 2):(2 * p + d + 1):2])  # normalizing constant
+        return c
+
+    def _kernel(self, x):
+        r = self.r
+        p = self.p
+        x2 = x ** 2
+        return ((1.0 - x2.sum(axis=0) / r ** 2).clip(min=0.0)) ** p
+
+mkernel_epanechnikov = _KernelMulti(p=1, stats=_stats_epan)
+mkernel_biweight = _KernelMulti(p=2, stats=_stats_biwe)
+mkernel_triweight = _KernelMulti(p=3, stats=_stats_triw)
+
+
+class _KernelProduct(_KernelMulti):
+    """
+    p=0;  rectangular
+    p=1;  1D product Epanechnikov kernel.
+    p=2;  1D product Bi-weight Kernel
+    p=3;  1D product Tri-weight Kernel
+    p=4;  1D product Four-weight Kernel
+    """
+    def norm_factor(self, d=1, n=None):
+        r = self.r
+        p = self.p
+        c = (2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(1, r) /
+             np.prod(np.r_[(1 + 2):(2 * p + 2):2]))
+        return c ** d
+
+    def _kernel(self, x):
+        r = self.r  # radius
+        pdf = (1 - (x / r) ** 2).clip(min=0.0) ** self.p
+        return pdf.prod(axis=0)
+
+mkernel_p1epanechnikov = _KernelProduct(p=1, stats=_stats_epan)
+mkernel_p1biweight = _KernelProduct(p=2, stats=_stats_biwe)
+mkernel_p1triweight = _KernelProduct(p=3, stats=_stats_triw)
+
+
+class _KernelRectangular(_Kernel):
+
+    def _kernel(self, x):
+        return np.where(np.all(np.abs(x) <= self.r, axis=0), 1, 0.0)
+
+    def norm_factor(self, d=1, n=None):
+        r = self.r
+        return (2 * r) ** d
+mkernel_rectangular = _KernelRectangular(stats=_stats_rect)
+
+
+class _KernelTriangular(_Kernel):
+
+    def _kernel(self, x):
+        pdf = (1 - np.abs(x)).clip(min=0.0)
+        return pdf.prod(axis=0)
+mkernel_triangular = _KernelTriangular(stats=_stats_tria)
+
+
+class _KernelGaussian(_Kernel):
+
+    def _kernel(self, x):
+        sigma = self.r / 4.0
+        x2 = (x / sigma) ** 2
+        return exp(-0.5 * x2.sum(axis=0))
+
+    def norm_factor(self, d=1, n=None):
+        sigma = self.r / 4.0
+        return (2 * pi * sigma) ** (d / 2.0)
+
+    def deriv4_6_8_10(self, t, numout=4):
+        """Returns 4th, 6th, 8th and 10th derivatives of the kernel
+        function."""
+        phi0 = exp(-0.5 * t ** 2) / sqrt(2 * pi)
+        p4 = [1, 0, -6, 0, +3]
+        p4val = np.polyval(p4, t) * phi0
+        if numout == 1:
+            return p4val
+        out = [p4val]
+        pn = p4
+        for _i in range(numout - 1):
+            pnp1 = np.polyadd(-np.r_[pn, 0], np.polyder(pn))
+            pnp2 = np.polyadd(-np.r_[pnp1, 0], np.polyder(pnp1))
+            out.append(np.polyval(pnp2, t) * phi0)
+            pn = pnp2
+        return out
+
+mkernel_gaussian = _KernelGaussian(r=4.0, stats=_stats_gaus)
+
+# def mkernel_gaussian(X):
+#    x2 = X ** 2
+#    d = X.shape[0]
+#    return (2 * pi) ** (-d / 2) * exp(-0.5 * x2.sum(axis=0))
+
+
+class _KernelLaplace(_Kernel):
+
+    def _kernel(self, x):
+        absX = np.abs(x)
+        return exp(-absX.sum(axis=0))
+
+    def norm_factor(self, d=1, n=None):
+        return 2 ** d
+mkernel_laplace = _KernelLaplace(r=7.0, stats=_stats_lapl)
+
+
+class _KernelLogistic(_Kernel):
+
+    def _kernel(self, x):
+        s = exp(x)
+        return np.prod(s / (s + 1) ** 2, axis=0)
+mkernel_logistic = _KernelLogistic(r=7.0, stats=_stats_logi)
+
+_MKERNEL_DICT = dict(
+    epan=mkernel_epanechnikov,
+    biwe=mkernel_biweight,
+    triw=mkernel_triweight,
+    p1ep=mkernel_p1epanechnikov,
+    p1bi=mkernel_p1biweight,
+    p1tr=mkernel_p1triweight,
+    rect=mkernel_rectangular,
+    tria=mkernel_triangular,
+    lapl=mkernel_laplace,
+    logi=mkernel_logistic,
+    gaus=mkernel_gaussian
+)
+_KERNEL_EXPONENT_DICT = dict(
+    re=0, sp=0, ep=1, bi=2, tr=3, fo=4, fi=5, si=6, se=7)
+
+
+class Kernel(object):
+
+    """Multivariate kernel.
+
+    Parameters
+    ----------
+    name : string
+        defining the kernel. Valid options are:
+        'epanechnikov'  - Epanechnikov kernel.
+        'biweight'      - Bi-weight kernel.
+        'triweight'     - Tri-weight kernel.
+        'p1epanechnikov' - product of 1D Epanechnikov kernel.
+        'p1biweight'    - product of 1D Bi-weight kernel.
+        'p1triweight'   - product of 1D Tri-weight kernel.
+        'triangular'    - Triangular kernel.
+        'gaussian'      - Gaussian kernel
+        'rectangular'   - Rectangular kernel.
+        'laplace'       - Laplace kernel.
+        'logistic'      - Logistic kernel.
+    Note that only the first 4 letters of the kernel name is needed.
+
+    Examples
+    --------
+     N = 20
+    data = np.random.rayleigh(1, size=(N,))
+    >>> data = np.array([
+    ...        0.75355792,  0.72779194,  0.94149169,  0.07841119,  2.32291887,
+    ...        1.10419995,  0.77055114,  0.60288273,  1.36883635,  1.74754326,
+    ...        1.09547561,  1.01671133,  0.73211143,  0.61891719,  0.75903487,
+    ...        1.8919469 ,  0.72433808,  1.92973094,  0.44749838,  1.36508452])
+
+    >>> import wafo.kdetools as wk
+    >>> gauss = wk.Kernel('gaussian')
+    >>> gauss.stats()
+    (1, 0.28209479177387814, 0.21157109383040862)
+    >>> np.allclose(gauss.hscv(data), 0.21779575)
+    True
+    >>> np.allclose(gauss.hstt(data), 0.16341135)
+    True
+    >>> np.allclose(gauss.hste(data), 0.19179399)
+    True
+    >>> np.allclose(gauss.hldpi(data), 0.22502733)
+    True
+    >>> wk.Kernel('laplace').stats()
+    (2, 0.25, inf)
+
+    >>> triweight = wk.Kernel('triweight')
+    >>> np.allclose(triweight.stats(),
+    ...            (0.1111111111111111, 0.81585081585081587, np.inf))
+    True
+    >>> np.allclose(triweight(np.linspace(-1,1,11)),
+    ...   [ 0.,  0.046656,  0.262144,  0.592704,  0.884736,  1.,
+    ...     0.884736,  0.592704,  0.262144,  0.046656,  0.])
+    True
+    >>> np.allclose(triweight.hns(data), 0.82, rtol=1e-2)
+    True
+    >>> np.allclose(triweight.hos(data), 0.88, rtol=1e-2)
+    True
+    >>> np.allclose(triweight.hste(data), 0.57, rtol=1e-2)
+    True
+    >>> np.allclose(triweight.hscv(data), 0.648, rtol=1e-2)
+    True
+
+    See also
+    --------
+    mkernel
+
+    References
+    ----------
+    B. W. Silverman (1986)
+    'Density estimation for statistics and data analysis'
+     Chapman and Hall, pp. 43, 76
+
+    Wand, M. P. and Jones, M. C. (1995)
+    'Density estimation for statistics and data analysis'
+     Chapman and Hall, pp 31, 103,  175
+
+    """
+
+    def __init__(self, name, fun='hste'):  # 'hns'):
+        self.kernel = _MKERNEL_DICT[name[:4]]
+        self.get_smoothing = getattr(self, fun)
+
+    @property
+    def name(self):
+        return self.kernel.__class__.__name__.replace('_Kernel', '').title()
+
+    def stats(self):
+        """Return some 1D statistics of the kernel.
+
+        Returns
+        -------
+        mu2 : real scalar
+            2'nd order moment, i.e.,int(x^2*kernel(x))
+        R : real scalar
+            integral of squared kernel, i.e., int(kernel(x)^2)
+        Rdd  : real scalar
+            integral of squared double derivative of kernel,
+            i.e., int( (kernel''(x))^2 ).
+
+        Reference
+        ---------
+        Wand,M.P. and Jones, M.C. (1995)
+        'Kernel smoothing'
+        Chapman and Hall, pp 176.
+
+        """
+        return self.kernel.stats
+
+    def deriv4_6_8_10(self, t, numout=4):
+        return self.kernel.deriv4_6_8_10(t, numout)
+
+    def effective_support(self):
+        return self.kernel.effective_support()
+
+    def hns(self, data):
+        """Returns Normal Scale Estimate of Smoothing Parameter.
+
+        Parameter
+        ---------
+        data : 2D array
+            shape d x n (d = # dimensions )
+
+        Returns
+        -------
+        h : array-like
+            one dimensional optimal value for smoothing parameter
+            given the data and kernel.  size D
+
+        HNS only gives an optimal value with respect to mean integrated
+        square error, when the true underlying distribution
+        is Gaussian. This works reasonably well if the data resembles a
+        Gaussian distribution. However if the distribution is asymmetric,
+        multimodal or have long tails then HNS may  return a to large
+        smoothing parameter, i.e., the KDE may be oversmoothed and mask
+        important features of the data. (=> large bias).
+        One way to remedy this is to reduce H by multiplying with a constant
+        factor, e.g., 0.85. Another is to try different values for H and make a
+        visual check by eye.
+
+        Example:
+          data = rndnorm(0, 1,20,1)
+          h = hns(data,'epan')
+
+        See also:
+        ---------
+        hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt, kde
+
+        Reference:
+        ---------
+        B. W. Silverman (1986)
+        'Density estimation for statistics and data analysis'
+        Chapman and Hall, pp 43-48
+        Wand,M.P. and Jones, M.C. (1995)
+        'Kernel smoothing'
+        Chapman and Hall, pp 60--63
+
+        """
+
+        a = np.atleast_2d(data)
+        n = a.shape[1]
+
+        # R= int(mkernel(x)^2),  mu2= int(x^2*mkernel(x))
+        mu2, R, _Rdd = self.stats()
+        amise_constant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
+        iqr = iqrange(a, axis=1)  # interquartile range
+        stdA = np.std(a, axis=1, ddof=1)
+        # use of interquartile range guards against outliers.
+        # the use of interquartile range is better if
+        # the distribution is skew or have heavy tails
+        # This lessen the chance of oversmoothing.
+        return np.where(iqr > 0,
+                        np.minimum(stdA, iqr / 1.349), stdA) * amise_constant
+
+    def hos(self, data):
+        """Returns Oversmoothing Parameter.
+
+        Parameter
+        ---------
+        data   = data matrix, size N x D (D = # dimensions )
+
+        Returns
+        -------
+        h : vector size 1 x D
+            one dimensional maximum smoothing value for smoothing parameter
+            given the data and kernel.
+
+        The oversmoothing or maximal smoothing principle relies on the fact
+        that there is a simple upper bound for the AMISE-optimal bandwidth for
+        estimation of densities with a fixed value of a particular scale
+        measure. While HOS will give too large bandwidth for optimal estimation
+        of a general density it provides an excellent starting point for
+        subjective choice of bandwidth. A sensible strategy is to plot an
+        estimate with bandwidth HOS and then sucessively look at plots based on
+        convenient fractions of HOS to see what features are present in the
+        data for various amount of smoothing. The relation to HNS is given by:
+
+                HOS = HNS/0.93
+
+        Example:
+        --------
+        data = rndnorm(0, 1,20,1)
+        h = hos(data,'epan');
+
+        See also  hste, hbcv, hboot, hldpi, hlscv, hscv, hstt, kde, kdefun
+
+        Reference
+        ---------
+        B. W. Silverman (1986)
+        'Density estimation for statistics and data analysis'
+        Chapman and Hall, pp 43-48
+
+        Wand,M.P. and Jones, M.C. (1986)
+        'Kernel smoothing'
+        Chapman and Hall, pp 60--63
+
+        """
+        return self.hns(data) / 0.93
+
+    def _hmns_scale(self, d):
+        name = self.name[:4].lower()
+        if name == 'epan':  # Epanechnikov kernel
+            a = (8.0 * (d + 4.0) * (2 * sqrt(pi)) ** d /
+                 sphere_volume(d)) ** (1. / (4.0 + d))
+        elif name == 'biwe':  # Bi-weight kernel
+            a = 2.7779
+            if d > 2:
+                raise NotImplementedError('Not implemented for d>2')
+        elif name == 'triw':  # Triweight
+            a = 3.12
+            if d > 2:
+                raise NotImplementedError('not implemented for d>2')
+        elif name == 'gaus':  # Gaussian kernel
+            a = (4.0 / (d + 2.0)) ** (1. / (d + 4.0))
+        else:
+            raise ValueError('Unknown kernel.')
+        return a
+
+    def hmns(self, data):
+        """Returns Multivariate Normal Scale Estimate of Smoothing Parameter.
+
+         CALL:  h = hmns(data,kernel)
+
+           h      = M dimensional optimal value for smoothing parameter
+                    given the data and kernel.  size D x D
+           data   = data matrix, size D x N (D = # dimensions )
+           kernel = 'epanechnikov'  - Epanechnikov kernel.
+                    'biweight'      - Bi-weight kernel.
+                    'triweight'     - Tri-weight kernel.
+                    'gaussian'      - Gaussian kernel
+
+          Note that only the first 4 letters of the kernel name is needed.
+
+         HMNS  only gives  a optimal value with respect to mean integrated
+         square error, when the true underlying distribution is Multivariate
+         Gaussian. This works reasonably well if the data resembles a
+         Multivariate Gaussian distribution. However if the distribution is
+         asymmetric, multimodal or have long tails then HNS is maybe more
+         appropriate.
+
+          Example:
+            data = rndnorm(0, 1,20,2)
+            h = hmns(data,'epan')
+
+         See also
+         --------
+
+        hns, hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt
+
+         Reference
+         ----------
+          B. W. Silverman (1986)
+         'Density estimation for statistics and data analysis'
+          Chapman and Hall, pp 43-48, 87
+
+          Wand,M.P. and Jones, M.C. (1995)
+         'Kernel smoothing'
+          Chapman and Hall, pp 60--63, 86--88
+
+        """
+        # TODO: implement more kernels
+
+        a = np.atleast_2d(data)
+        d, n = a.shape
+        if d == 1:
+            return self.hns(data)
+        scale = self._hmns_scale(d)
+        cov_a = np.cov(a)
+        return scale * linalg.sqrtm(cov_a).real * n ** (-1. / (d + 4))
+
+    def hste(self, data, h0=None, inc=128, maxit=100, releps=0.01, abseps=0.0):
+        '''HSTE 2-Stage Solve the Equation estimate of smoothing parameter.
+
+         CALL:  hs = hste(data,kernel,h0)
+
+               hs = one dimensional value for smoothing parameter
+                    given the data and kernel.  size 1 x D
+           data   = data matrix, size N x D (D = # dimensions )
+           kernel = 'gaussian'  - Gaussian kernel (default)
+                     ( currently the only supported kernel)
+               h0 = initial starting guess for hs (default h0=hns(A,kernel))
+
+          Example:
+           x  = rndnorm(0,1,50,1);
+           hs = hste(x,'gauss');
+
+         See also  hbcv, hboot, hos, hldpi, hlscv, hscv, hstt, kde, kdefun
+
+         Reference
+         ---------
+          B. W. Silverman (1986)
+         'Density estimation for statistics and data analysis'
+          Chapman and Hall, pp 57--61
+
+          Wand,M.P. and Jones, M.C. (1986)
+         'Kernel smoothing'
+          Chapman and Hall, pp 74--75
+        '''
+        # TODO: NB: this routine can be made faster:
+        # TODO: replace the iteration in the end with a Newton Raphson scheme
+
+        A = np.atleast_2d(data)
+        d, n = A.shape
+
+        # R = int(mkernel(x)^2),  mu2 = int(x^2*mkernel(x))
+        mu2, R, _Rdd = self.stats()
+
+        amise_constant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
+        ste_constant = R / (mu2 ** (2) * n)
+
+        sigmaA = self.hns(A) / amise_constant
+        if h0 is None:
+            h0 = sigmaA * amise_constant
+
+        h = np.asarray(h0, dtype=float)
+
+        nfft = inc * 2
+        amin = A.min(axis=1)  # Find the minimum value of A.
+        amax = A.max(axis=1)  # Find the maximum value of A.
+        arange = amax - amin  # Find the range of A.
+
+        # xa holds the x 'axis' vector, defining a grid of x values where
+        # the k.d. function will be evaluated.
+
+        ax1 = amin - arange / 8.0
+        bx1 = amax + arange / 8.0
+
+        kernel2 = Kernel('gauss')
+        mu2, R, _Rdd = kernel2.stats()
+        ste_constant2 = R / (mu2 ** (2) * n)
+        fft = np.fft.fft
+        ifft = np.fft.ifft
+
+        for dim in range(d):
+            s = sigmaA[dim]
+            ax = ax1[dim]
+            bx = bx1[dim]
+
+            xa = np.linspace(ax, bx, inc)
+            xn = np.linspace(0, bx - ax, inc)
+
+            c = gridcount(A[dim], xa)
+
+            # Step 1
+            psi6NS = -15 / (16 * sqrt(pi) * s ** 7)
+            psi8NS = 105 / (32 * sqrt(pi) * s ** 9)
+
+            # Step 2
+            k40, k60 = kernel2.deriv4_6_8_10(0, numout=2)
+            g1 = (-2 * k40 / (mu2 * psi6NS * n)) ** (1.0 / 7)
+            g2 = (-2 * k60 / (mu2 * psi8NS * n)) ** (1.0 / 9)
+
+            # Estimate psi6 given g2.
+            # kernel weights.
+            kw4, kw6 = kernel2.deriv4_6_8_10(xn / g2, numout=2)
+            # Apply fftshift to kw.
+            kw = np.r_[kw6, 0, kw6[-1:0:-1]]
+            z = np.real(ifft(fft(c, nfft) * fft(kw)))     # convolution.
+            psi6 = np.sum(c * z[:inc]) / (n * (n - 1) * g2 ** 7)
+
+            # Estimate psi4 given g1.
+            kw4 = kernel2.deriv4_6_8_10(xn / g1, numout=1)  # kernel weights.
+            kw = np.r_[kw4, 0, kw4[-1:0:-1]]  # Apply 'fftshift' to kw.
+            z = np.real(ifft(fft(c, nfft) * fft(kw)))  # convolution.
+            psi4 = np.sum(c * z[:inc]) / (n * (n - 1) * g1 ** 5)
+
+            h1 = h[dim]
+            h_old = 0
+            count = 0
+
+            while ((abs(h_old - h1) > max(releps * h1, abseps)) and
+                   (count < maxit)):
+                count += 1
+                h_old = h1
+
+                # Step 3
+                gamma_ = ((2 * k40 * mu2 * psi4 * h1 ** 5) /
+                          (-psi6 * R)) ** (1.0 / 7)
+
+                # Now estimate psi4 given gamma_.
+                # kernel weights.
+                kw4 = kernel2.deriv4_6_8_10(xn / gamma_, numout=1)
+                kw = np.r_[kw4, 0, kw4[-1:0:-1]]  # Apply 'fftshift' to kw.
+                z = np.real(ifft(fft(c, nfft) * fft(kw)))  # convolution.
+
+                psi4Gamma = np.sum(c * z[:inc]) / (n * (n - 1) * gamma_ ** 5)
+
+                # Step 4
+                h1 = (ste_constant2 / psi4Gamma) ** (1.0 / 5)
+
+            # Kernel other than Gaussian scale bandwidth
+            h1 = h1 * (ste_constant / ste_constant2) ** (1.0 / 5)
+
+            if count >= maxit:
+                warnings.warn('The obtained value did not converge.')
+
+            h[dim] = h1
+        # end for dim loop
+        return h
+
+    def hisj(self, data, inc=512, L=7):
+        '''
+        HISJ Improved Sheather-Jones estimate of smoothing parameter.
+
+        Unlike many other implementations, this one is immune to problems
+        caused by multimodal densities with widely separated modes. The
+        estimation does not deteriorate for multimodal densities, because
+        it do not assume a parametric model for the data.
+
+        Parameters
+        ----------
+        data - a vector of data from which the density estimate is constructed
+        inc  - the number of mesh points used in the uniform discretization
+
+        Returns
+        -------
+        bandwidth - the optimal bandwidth
+
+        Reference
+        ---------
+        Kernel density estimation via diffusion
+        Z. I. Botev, J. F. Grotowski, and D. P. Kroese (2010)
+        Annals of Statistics, Volume 38, Number 5, pages 2916-2957.
+        '''
+        A = np.atleast_2d(data)
+        d, n = A.shape
+
+        # R = int(mkernel(x)^2),  mu2 = int(x^2*mkernel(x))
+        mu2, R, _Rdd = self.stats()
+        ste_constant = R / (n * mu2 ** 2)
+
+        amin = A.min(axis=1)  # Find the minimum value of A.
+        amax = A.max(axis=1)  # Find the maximum value of A.
+        arange = amax - amin  # Find the range of A.
+
+        # xa holds the x 'axis' vector, defining a grid of x values where
+        # the k.d. function will be evaluated.
+
+        ax1 = amin - arange / 8.0
+        bx1 = amax + arange / 8.0
+
+        kernel2 = Kernel('gauss')
+        mu2, R, _Rdd = kernel2.stats()
+        ste_constant2 = R / (mu2 ** (2) * n)
+
+        def fixed_point(t, N, I, a2):
+            ''' this implements the function t-zeta*gamma^[L](t)'''
+
+            prod = np.prod
+            # L = 7
+            logI = np.log(I)
+            f = 2 * pi ** (2 * L) * \
+                (a2 * exp(L * logI - I * pi ** 2 * t)).sum()
+            for s in range(L - 1, 1, -1):
+                K0 = prod(np.r_[1:2 * s:2]) / sqrt(2 * pi)
+                const = (1 + (1. / 2) ** (s + 1. / 2)) / 3
+                time = (2 * const * K0 / N / f) ** (2. / (3 + 2 * s))
+                f = 2 * pi ** (2 * s) * \
+                    (a2 * exp(s * logI - I * pi ** 2 * time)).sum()
+            return t - (2 * N * sqrt(pi) * f) ** (-2. / 5)
+
+        h = np.empty(d)
+        for dim in range(d):
+            ax = ax1[dim]
+            bx = bx1[dim]
+            xa = np.linspace(ax, bx, inc)
+            R = bx - ax
+
+            c = gridcount(A[dim], xa)
+            N = len(set(A[dim]))
+            a = dct(c / len(A[dim]), norm=None)
+
+            # now compute the optimal bandwidth^2 using the referenced method
+            I = np.asfarray(np.arange(1, inc)) ** 2
+            a2 = (a[1:] / 2) ** 2
+
+            def fun(t):
+                return fixed_point(t, N, I, a2)
+            x = np.linspace(0, 0.1, 150)
+            ai = x[0]
+            f0 = fun(ai)
+            for bi in x[1:]:
+                f1 = fun(bi)
+                if f1 * f0 <= 0:
+                    # print('ai = %g, bi = %g' % (ai,bi))
+                    break
+                else:
+                    ai = bi
+            # y = np.asarray([fun(j) for j in x])
+            # plt.figure(1)
+            # plt.plot(x,y)
+            # plt.show()
+
+            # use  fzero to solve the equation t=zeta*gamma^[5](t)
+            try:
+                t_star = optimize.brentq(fun, a=ai, b=bi)
+            except:
+                t_star = 0.28 * N ** (-2. / 5)
+                warnings.warn('Failure in obtaining smoothing parameter')
+
+            # smooth the discrete cosine transform of initial data using t_star
+            # a_t = a*exp(-np.arange(inc)**2*pi**2*t_star/2)
+            # now apply the inverse discrete cosine transform
+            # density = idct(a_t)/R;
+
+            # take the rescaling of the data into account
+            bandwidth = sqrt(t_star) * R
+
+            # Kernel other than Gaussian scale bandwidth
+            h[dim] = bandwidth * (ste_constant / ste_constant2) ** (1.0 / 5)
+        # end  for dim loop
+        return h
+
+    def hstt(self, data, h0=None, inc=128, maxit=100, releps=0.01, abseps=0.0):
+        '''HSTT Scott-Tapia-Thompson estimate of smoothing parameter.
+
+         CALL: hs = hstt(data,kernel)
+
+               hs = one dimensional value for smoothing parameter
+                    given the data and kernel.  size 1 x D
+           data   = data matrix, size N x D (D = # dimensions )
+           kernel = 'epanechnikov'  - Epanechnikov kernel. (default)
+                    'biweight'      - Bi-weight kernel.
+                    'triweight'     - Tri-weight kernel.
+                    'triangular'    - Triangular kernel.
+                    'gaussian'      - Gaussian kernel
+                    'rectangular'   - Rectangular kernel.
+                    'laplace'       - Laplace kernel.
+                    'logistic'      - Logistic kernel.
+
+         HSTT returns Scott-Tapia-Thompson (STT) estimate of smoothing
+         parameter. This is a Solve-The-Equation rule (STE).
+         Simulation studies shows that the STT estimate of HS
+         is a good choice under a variety of models. A comparison with
+         likelihood cross-validation (LCV) indicates that LCV performs slightly
+         better for short tailed densities.
+         However, STT method in contrast to LCV is insensitive to outliers.
+
+        Example
+        -------
+           x  = rndnorm(0,1,50,1);
+           hs = hstt(x,'gauss');
+
+        See also
+        --------
+        hste, hbcv, hboot, hos, hldpi, hlscv, hscv, kde, kdebin
+
+        Reference
+        ---------
+        B. W. Silverman (1986)
+         'Density estimation for statistics and data analysis'
+          Chapman and Hall, pp 57--61
+        '''
+        A = np.atleast_2d(data)
+        d, n = A.shape
+
+        # R= int(mkernel(x)^2),  mu2= int(x^2*mkernel(x))
+        mu2, R, _Rdd = self.stats()
+
+        amise_constant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
+        ste_constant = R / (mu2 ** (2) * n)
+
+        sigmaA = self.hns(A) / amise_constant
+        if h0 is None:
+            h0 = sigmaA * amise_constant
+
+        h = np.asarray(h0, dtype=float)
+
+        nfft = inc * 2
+        amin = A.min(axis=1)  # Find the minimum value of A.
+        amax = A.max(axis=1)  # Find the maximum value of A.
+        arange = amax - amin  # Find the range of A.
+
+        # xa holds the x 'axis' vector, defining a grid of x values where
+        # the k.d. function will be evaluated.
+
+        ax1 = amin - arange / 8.0
+        bx1 = amax + arange / 8.0
+
+        fft = np.fft.fft
+        ifft = np.fft.ifft
+        for dim in range(d):
+            s = sigmaA[dim]
+            datan = A[dim] / s
+            ax = ax1[dim] / s
+            bx = bx1[dim] / s
+
+            xa = np.linspace(ax, bx, inc)
+            xn = np.linspace(0, bx - ax, inc)
+
+            c = gridcount(datan, xa)
+
+            count = 1
+            h_old = 0
+            h1 = h[dim] / s
+            delta = (bx - ax) / (inc - 1)
+            while ((abs(h_old - h1) > max(releps * h1, abseps)) and
+                   (count < maxit)):
+                count += 1
+                h_old = h1
+
+                kw4 = self.kernel(xn / h1) / (n * h1 * self.norm_factor(d=1))
+                kw = np.r_[kw4, 0, kw4[-1:0:-1]]  # Apply 'fftshift' to kw.
+                f = np.real(ifft(fft(c, nfft) * fft(kw)))  # convolution.
+
+                # Estimate psi4=R(f'') using simple finite differences and
+                # quadrature.
+                ix = np.arange(1, inc - 1)
+                z = ((f[ix + 1] - 2 * f[ix] + f[ix - 1]) / delta ** 2) ** 2
+                psi4 = delta * z.sum()
+                h1 = (ste_constant / psi4) ** (1. / 5)
+
+            if count >= maxit:
+                warnings.warn('The obtained value did not converge.')
+
+            h[dim] = h1 * s
+        # end % for dim loop
+        return h
+
+    def hscv(self, data, hvec=None, inc=128, maxit=100, fulloutput=False):
+        '''
+        HSCV Smoothed cross-validation estimate of smoothing parameter.
+
+         CALL: [hs,hvec,score] = hscv(data,kernel,hvec)
+
+           hs     = smoothing parameter
+           hvec   = vector defining possible values of hs
+                     (default linspace(0.25*h0,h0,100), h0=0.62)
+           score  = score vector
+           data   = data vector
+           kernel = 'gaussian'      - Gaussian kernel the only supported
+
+          Note that only the first 4 letters of the kernel name is needed.
+
+          Example:
+            data = rndnorm(0,1,20,1)
+             [hs hvec score] = hscv(data,'epan');
+             plot(hvec,score)
+         See also  hste, hbcv, hboot, hos, hldpi, hlscv, hstt, kde, kdefun
+
+         Wand,M.P. and Jones, M.C. (1986)
+         'Kernel smoothing'
+          Chapman and Hall, pp 75--79
+        '''
+        # TODO: Add support for other kernels than Gaussian
+        A = np.atleast_2d(data)
+        d, n = A.shape
+
+        # R= int(mkernel(x)^2),  mu2= int(x^2*mkernel(x))
+        mu2, R, _Rdd = self.stats()
+
+        amise_constant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
+        ste_constant = R / (mu2 ** (2) * n)
+
+        sigmaA = self.hns(A) / amise_constant
+        if hvec is None:
+            H = amise_constant / 0.93
+            hvec = np.linspace(0.25 * H, H, maxit)
+        hvec = np.asarray(hvec, dtype=float)
+
+        steps = len(hvec)
+        score = np.zeros(steps)
+
+        nfft = inc * 2
+        amin = A.min(axis=1)  # Find the minimum value of A.
+        amax = A.max(axis=1)  # Find the maximum value of A.
+        arange = amax - amin  # Find the range of A.
+
+        # xa holds the x 'axis' vector, defining a grid of x values where
+        # the k.d. function will be evaluated.
+
+        ax1 = amin - arange / 8.0
+        bx1 = amax + arange / 8.0
+
+        kernel2 = Kernel('gauss')
+        mu2, R, _Rdd = kernel2.stats()
+        ste_constant2 = R / (mu2 ** (2) * n)
+        fft = np.fft.fft
+        ifft = np.fft.ifft
+
+        h = np.zeros(d)
+        hvec = hvec * (ste_constant2 / ste_constant) ** (1. / 5.)
+
+        k40, k60, k80, k100 = kernel2.deriv4_6_8_10(0, numout=4)
+        psi8 = 105 / (32 * sqrt(pi))
+        psi12 = 3465. / (512 * sqrt(pi))
+        g1 = (-2. * k60 / (mu2 * psi8 * n)) ** (1. / 9.)
+        g2 = (-2. * k100 / (mu2 * psi12 * n)) ** (1. / 13.)
+
+        for dim in range(d):
+            s = sigmaA[dim]
+            ax = ax1[dim] / s
+            bx = bx1[dim] / s
+            datan = A[dim] / s
+
+            xa = np.linspace(ax, bx, inc)
+            xn = np.linspace(0, bx - ax, inc)
+
+            c = gridcount(datan, xa)
+
+            kw4, kw6 = kernel2.deriv4_6_8_10(xn / g1, numout=2)
+            kw = np.r_[kw6, 0, kw6[-1:0:-1]]
+            z = np.real(ifft(fft(c, nfft) * fft(kw)))
+            psi6 = np.sum(c * z[:inc]) / (n ** 2 * g1 ** 7)
+
+            kw4, kw6, kw8, kw10 = kernel2.deriv4_6_8_10(xn / g2, numout=4)
+            kw = np.r_[kw10, 0, kw10[-1:0:-1]]
+            z = np.real(ifft(fft(c, nfft) * fft(kw)))
+            psi10 = np.sum(c * z[:inc]) / (n ** 2 * g2 ** 11)
+
+            g3 = (-2. * k40 / (mu2 * psi6 * n)) ** (1. / 7.)
+            g4 = (-2. * k80 / (mu2 * psi10 * n)) ** (1. / 11.)
+
+            kw4 = kernel2.deriv4_6_8_10(xn / g3, numout=1)
+            kw = np.r_[kw4, 0, kw4[-1:0:-1]]
+            z = np.real(ifft(fft(c, nfft) * fft(kw)))
+            psi4 = np.sum(c * z[:inc]) / (n ** 2 * g3 ** 5)
+
+            kw4, kw6, kw8 = kernel2.deriv4_6_8_10(xn / g3, numout=3)
+            kw = np.r_[kw8, 0, kw8[-1:0:-1]]
+            z = np.real(ifft(fft(c, nfft) * fft(kw)))
+            psi8 = np.sum(c * z[:inc]) / (n ** 2 * g4 ** 9)
+
+            const = (441. / (64 * pi)) ** (1. / 18.) * \
+                (4 * pi) ** (-1. / 5.) * \
+                psi4 ** (-2. / 5.) * psi8 ** (-1. / 9.)
+
+            M = np.atleast_2d(datan)
+
+            Y = (M - M.T).ravel()
+
+            for i in range(steps):
+                g = const * n ** (-23. / 45) * hvec[i] ** (-2)
+                sig1 = sqrt(2 * hvec[i] ** 2 + 2 * g ** 2)
+                sig2 = sqrt(hvec[i] ** 2 + 2 * g ** 2)
+                sig3 = sqrt(2 * g ** 2)
+                term2 = np.sum(kernel2(Y / sig1) / sig1 - 2 * kernel2(
+                    Y / sig2) / sig2 + kernel2(Y / sig3) / sig3)
+
+                score[i] = 1. / (n * hvec[i] * 2. * sqrt(pi)) + term2 / n ** 2
+
+            idx = score.argmin()
+            # Kernel other than Gaussian scale bandwidth
+            h[dim] = hvec[idx] * (ste_constant / ste_constant2) ** (1 / 5)
+            if idx == 0:
+                warnings.warn("Optimum is probably lower than "
+                              "hs={0:g} for dim={1:d}".format(h[dim] * s, dim))
+            elif idx == maxit - 1:
+                msg = "Optimum is probably higher than hs={0:g] for dim={1:d}"
+                warnings.warn(msg.format(h[dim] * s, dim))
+
+        hvec = hvec * (ste_constant / ste_constant2) ** (1 / 5)
+        if fulloutput:
+            return h * sigmaA, score, hvec, sigmaA
+        else:
+            return h * sigmaA
+
+    def hldpi(self, data, L=2, inc=128):
+        '''HLDPI L-stage Direct Plug-In estimate of smoothing parameter.
+
+         CALL: hs = hldpi(data,kernel,L)
+
+               hs = one dimensional value for smoothing parameter
+                    given the data and kernel.  size 1 x D
+           data   = data matrix, size N x D (D = # dimensions )
+           kernel = 'epanechnikov'  - Epanechnikov kernel.
+                    'biweight'      - Bi-weight kernel.
+                    'triweight'     - Tri-weight kernel.
+                    'triangluar'    - Triangular kernel.
+                    'gaussian'      - Gaussian kernel
+                    'rectangular'   - Rectanguler kernel.
+                    'laplace'       - Laplace kernel.
+                    'logistic'      - Logistic kernel.
+                L = 0,1,2,3,...   (default 2)
+
+          Note that only the first 4 letters of the kernel name is needed.
+
+          Example:
+           x  = rndnorm(0,1,50,1);
+           hs = hldpi(x,'gauss',1);
+
+         See also  hste, hbcv, hboot, hos, hlscv, hscv, hstt, kde, kdefun
+
+          Wand,M.P. and Jones, M.C. (1995)
+         'Kernel smoothing'
+          Chapman and Hall, pp 67--74
+        '''
+        A = np.atleast_2d(data)
+        d, n = A.shape
+
+        # R= int(mkernel(x)^2),  mu2= int(x^2*mkernel(x))
+        mu2, R, _Rdd = self.stats()
+
+        amise_constant = (8 * sqrt(pi) * R / (3 * n * mu2 ** 2)) ** (1. / 5)
+        ste_constant = R / (n * mu2 ** 2)
+
+        sigmaA = self.hns(A) / amise_constant
+
+        nfft = inc * 2
+        amin = A.min(axis=1)  # Find the minimum value of A.
+        amax = A.max(axis=1)  # Find the maximum value of A.
+        arange = amax - amin  # Find the range of A.
+
+        # xa holds the x 'axis' vector, defining a grid of x values where
+        # the k.d. function will be evaluated.
+
+        ax1 = amin - arange / 8.0
+        bx1 = amax + arange / 8.0
+
+        kernel2 = Kernel('gauss')
+        mu2, _R, _Rdd = kernel2.stats()
+
+        fft = np.fft.fft
+        ifft = np.fft.ifft
+
+        h = np.zeros(d)
+        for dim in range(d):
+            s = sigmaA[dim]
+            datan = A[dim]  # / s
+            ax = ax1[dim]  # / s
+            bx = bx1[dim]  # / s
+
+            xa = np.linspace(ax, bx, inc)
+            xn = np.linspace(0, bx - ax, inc)
+
+            c = gridcount(datan, xa)
+
+            r = 2 * L + 4
+            rd2 = L + 2
+
+            # Eq. 3.7 in Wand and Jones (1995)
+            psi_r = (-1) ** (rd2) * np.prod(
+                np.r_[rd2 + 1:r + 1]) / (sqrt(pi) * (2 * s) ** (r + 1))
+            psi = psi_r
+            if L > 0:
+                # High order derivatives of the Gaussian kernel
+                Kd = kernel2.deriv4_6_8_10(0, numout=L)
+
+                # L-stage iterations to estimate PSI_4
+                for ix in range(L, 0, -1):
+                    gi = (-2 * Kd[ix - 1] /
+                          (mu2 * psi * n)) ** (1. / (2 * ix + 5))
+
+                    # Obtain the kernel weights.
+                    kw0 = kernel2.deriv4_6_8_10(xn / gi, numout=ix)
+                    if ix > 1:
+                        kw0 = kw0[-1]
+                    # Apply 'fftshift' to kw.
+                    kw = np.r_[kw0, 0, kw0[inc - 1:0:-1]]
+
+                    # Perform the convolution.
+                    z = np.real(ifft(fft(c, nfft) * fft(kw)))
+
+                    psi = np.sum(c * z[:inc]) / (n ** 2 * gi ** (2 * ix + 3))
+                    # end
+                # end
+            h[dim] = (ste_constant / psi) ** (1. / 5)
+        return h
+
+    def norm_factor(self, d=1, n=None):
+        return self.kernel.norm_factor(d, n)
+
+    def eval_points(self, points):
+        return self.kernel(np.atleast_2d(points))
+    __call__ = eval_points
+
+
+def mkernel(X, kernel):
+    """MKERNEL Multivariate Kernel Function.
+
+    Paramaters
+    ----------
+    X : array-like
+        matrix  size d x n (d = # dimensions, n = # evaluation points)
+    kernel : string
+        defining kernel
+        'epanechnikov'  - Epanechnikov kernel.
+        'biweight'      - Bi-weight kernel.
+        'triweight'     - Tri-weight kernel.
+        'p1epanechnikov' - product of 1D Epanechnikov kernel.
+        'p1biweight'    - product of 1D Bi-weight kernel.
+        'p1triweight'   - product of 1D Tri-weight kernel.
+        'triangular'    - Triangular kernel.
+        'gaussian'      - Gaussian kernel
+        'rectangular'   - Rectangular kernel.
+        'laplace'       - Laplace kernel.
+        'logistic'      - Logistic kernel.
+    Note that only the first 4 letters of the kernel name is needed.
+
+    Returns
+    -------
+    z : ndarray
+        kernel function values evaluated at X
+
+    See also
+    --------
+    kde, kdefun, kdebin
+
+    References
+    ----------
+    B. W. Silverman (1986)
+    'Density estimation for statistics and data analysis'
+     Chapman and Hall, pp. 43, 76
+
+    Wand, M. P. and Jones, M. C. (1995)
+    'Density estimation for statistics and data analysis'
+     Chapman and Hall, pp 31, 103,  175
+
+    """
+    fun = _MKERNEL_DICT[kernel[:4]]
+    return fun(np.atleast_2d(X))
+
+
+if __name__ == '__main__':
+    test_docstrings(__file__)
diff --git a/wafo/testing.py b/wafo/testing.py
new file mode 100644
index 0000000..6d5329b
--- /dev/null
+++ b/wafo/testing.py
@@ -0,0 +1,18 @@
+'''
+Created on 15. des. 2016
+
+@author: pab
+'''
+import inspect
+
+
+def test_docstrings(name=''):
+    import doctest
+    if not name:
+        name = inspect.stack()[1][1]
+    print('Testing docstrings in {}'.format(name))
+    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE |
+                    doctest.ELLIPSIS)
+
+if __name__ == '__main__':
+    pass
diff --git a/wafo/tests/test_kdetools.py b/wafo/tests/test_kdetools.py
index 9db6d6a..bcbc0a7 100644
--- a/wafo/tests/test_kdetools.py
+++ b/wafo/tests/test_kdetools.py
@@ -3,398 +3,473 @@ Created on 20. nov. 2010
 
 @author: pab
 '''
-
-import numpy as np  # @UnusedImport
-from numpy import array  # @UnusedImport
-import wafo.kdetools as wk  # @UnusedImport
-# import pylab as plb
-
-
-def test0_KDE1D():
-    '''
-    >>> data = array([0.75355792, 0.72779194, 0.94149169, 0.07841119,
-    ...               2.32291887, 1.10419995,  0.77055114, 0.60288273,
-    ...               1.36883635,  1.74754326, 1.09547561,  1.01671133,
-    ...               0.73211143,  0.61891719,  0.75903487, 1.8919469,
-    ...               0.72433808,  1.92973094,  0.44749838,  1.36508452])
-
-    >>> x = np.linspace(0, max(data.ravel()) + 1, 10)
-    >>> import wafo.kdetools as wk
-    >>> kde = wk.KDE(data, hs=0.5, alpha=0.5)
-
-    >>> kde0 = wk.KDE(data, hs=0.5, alpha=0.0, inc=16)
-
-    >>> kde0.eval_grid(x)
-    array([ 0.2039735 ,  0.40252503,  0.54595078,  0.52219649,  0.3906213 ,
-            0.26381501,  0.16407362,  0.08270612,  0.02991145,  0.00720821])
-    >>> kde0.eval_grid_fast(x)
-    array([ 0.20729484,  0.39865044,  0.53716945,  0.5169322 ,  0.39060223,
-            0.26441126,  0.16388801,  0.08388527,  0.03227164,  0.00883579])
-
-    >>> f = kde0.eval_grid_fast(); f
-    array([ 0.06807544,  0.12949095,  0.21985421,  0.33178031,  0.44334874,
-            0.52429234,  0.55140336,  0.52221323,  0.45500674,  0.3752208 ,
-            0.30046799,  0.235667  ,  0.17854402,  0.12721305,  0.08301993,
-            0.04862324])
-    >>> np.allclose(np.trapz(f,kde0.args), array([ 0.96716261]))
-    True
-    '''
-
-
-def test1_TKDE1D():
-    '''
-    N = 20
-    data = np.random.rayleigh(1, size=(N,))
-    >>> data = array([0.75355792, 0.72779194, 0.94149169, 0.07841119,
-    ...               2.32291887, 1.10419995, 0.77055114, 0.60288273,
-    ...               1.36883635, 1.74754326, 1.09547561, 1.01671133,
-    ...               0.73211143, 0.61891719,  0.75903487, 1.8919469,
-    ...               0.72433808,  1.92973094,  0.44749838,  1.36508452])
-
-    >>> x = np.linspace(0.01, max(data.ravel()) + 1, 10)
-    >>> kde = wk.TKDE(data, hs=0.5, L2=0.5)
-    >>> f = kde(x)
-    >>> f
-    array([ 1.03982714,  0.45839018,  0.39514782,  0.32860602,  0.26433318,
-            0.20717946,  0.15907684,  0.1201074 ,  0.08941027,  0.06574882])
-
-    >>> np.allclose(np.trapz(f, x), 0.94787730659349068)
-    True
-
-    h1 = plb.plot(x, f) #  1D probability density plot
-    '''
-
-
-def test1_KDE1D():
-    '''
-    N = 20
-    data = np.random.rayleigh(1, size=(N,))
-    >>> data = array([0.75355792, 0.72779194, 0.94149169, 0.07841119,
-    ...               2.32291887, 1.10419995, 0.77055114, 0.60288273,
-    ...               1.36883635, 1.74754326, 1.09547561, 1.01671133,
-    ...               0.73211143, 0.61891719, 0.75903487, 1.8919469,
-    ...               0.72433808,  1.92973094,  0.44749838,  1.36508452])
-
-    >>> x = np.linspace(0, max(data.ravel()) + 1, 10)
-    >>> kde = wk.KDE(data, hs=0.5)
-    >>> f = kde(x)
-    >>> np.allclose(f, [ 0.2039735 ,  0.40252503,  0.54595078,  0.52219649,
-    ...    0.3906213, 0.26381501,  0.16407362,  0.08270612,  0.02991145,
-    ...    0.00720821])
-    True
-    >>> np.allclose(np.trapz(f, x), 0.92576174424281876)
-    True
-
-    h1 = plb.plot(x, f) #  1D probability density plot
-    '''
-
-
-def test2_KDE1D():
-    '''
-    N = 20
-    data = np.random.rayleigh(1, size=(N,))
-    >>> data = array([ 0.75355792,  0.72779194,  0.94149169,  0.07841119,
-    ...        2.32291887, 1.10419995,  0.77055114,  0.60288273,  1.36883635,
-    ...        1.74754326, 1.09547561,  1.01671133,  0.73211143,  0.61891719,
-    ...        0.75903487, 1.8919469,  0.72433808,  1.92973094,  0.44749838,
-    ...        1.36508452])
-
-    >>> data = np.asarray([1,2])
-    >>> x = np.linspace(0, max(data.ravel()) + 1, 10)
-    >>> kde = wk.KDE(data, hs=0.5)
-    >>> f = kde(x)
-    >>> np.allclose(f,
-    ...    [ 0.0541248 ,  0.16555235,  0.33084399,  0.45293325,  0.48345808,
-    ...     0.48345808,  0.45293325,  0.33084399,  0.16555235,  0.0541248 ])
-    True
-    >>> np.allclose(np.trapz(f, x), 0.97323338046725172)
-    True
-
-    h1 = plb.plot(x, f) #  1D probability density plot
-    '''
-
-
-def test1a_KDE1D():
-    '''
-    N = 20
-    data = np.random.rayleigh(1, size=(N,))
-    >>> data = array([
-    ...        0.75355792,  0.72779194,  0.94149169,  0.07841119,  2.32291887,
-    ...        1.10419995,  0.77055114,  0.60288273,  1.36883635,  1.74754326,
-    ...        1.09547561,  1.01671133,  0.73211143,  0.61891719,  0.75903487,
-    ...        1.8919469 ,  0.72433808,  1.92973094,  0.44749838,  1.36508452])
-
-    >>> x = np.linspace(0, max(data.ravel()) + 1, 10)
-    >>> kde = wk.KDE(data, hs=0.5, alpha=0.5)
-    >>> f = kde(x)
-    >>> np.allclose(f,
-    ...    [ 0.17252055,  0.41014271,  0.61349072,  0.57023834,  0.37198073,
-    ...      0.21409279,  0.12738463,  0.07460326,  0.03956191,  0.01887164])
-    True
-    >>> np.allclose(np.trapz(f, x), 0.92938023659047952)
-    True
-
-    h1 = plb.plot(x, f) #  1D probability density plot
-    '''
-
-
-def test2a_KDE1D():
-    '''
-    N = 20
-    data = np.random.rayleigh(1, size=(N,))
-    >>> data = array([
-    ...        0.75355792,  0.72779194,  0.94149169,  0.07841119,  2.32291887,
-    ...        1.10419995,  0.77055114,  0.60288273,  1.36883635,  1.74754326,
-    ...        1.09547561,  1.01671133,  0.73211143,  0.61891719,  0.75903487,
-    ...        1.8919469 ,  0.72433808,  1.92973094,  0.44749838,  1.36508452])
-
-    >>> data = np.asarray([1,2])
-    >>> x = np.linspace(0, max(data.ravel()) + 1, 10)
-    >>> kde = wk.KDE(data, hs=0.5, alpha=0.5)
-    >>> f = kde(x)
-    >>> np.allclose(f,
-    ...    [ 0.0541248 ,  0.16555235,  0.33084399,  0.45293325,  0.48345808,
-    ...     0.48345808,  0.45293325,  0.33084399,  0.16555235,  0.0541248 ])
-    True
-    >>> np.allclose(np.trapz(f, x), 0.97323338046725172)
-    True
-
-    h1 = plb.plot(x, f) #  1D probability density plot
-    '''
-
-
-def test_KDE2D():
-    '''
-    N = 20
-    data = np.random.rayleigh(1, size=(2, N))
-    >>> data = array([[
-    ...       0.38103275,  0.35083136,  0.90024207,  1.88230239,  0.96815399,
-    ...       0.57392873,  1.63367908,  1.20944125,  2.03887811,  0.81789145,
-    ...       0.69302049,  1.40856592,  0.92156032,  2.14791432,  2.04373821,
-    ...       0.69800708,  0.58428735,  1.59128776,  2.05771405,  0.87021964],
-    ...      [1.44080694,  0.39973751,  1.331243  ,  2.48895822,  1.18894158,
-    ...       1.40526085,  1.01967897,  0.81196474,  1.37978932,  2.03334689,
-    ...       0.870329  ,  1.25106862,  0.5346619 ,  0.47541236,  1.51930093,
-    ...       0.58861519,  1.19780448,  0.81548296,  1.56859488,  1.60653533]])
-
-    >>> x = np.linspace(0, max(data.ravel()) + 1, 3)
-
-    >>> kde = wk.KDE(data, hs=0.5, alpha=0.5)
-
-    >>> kde0 = wk.KDE(data, hs=0.5, alpha=0.0, inc=16)
-
-    >>> np.allclose(kde0.eval_grid(x, x),
-    ...    [[  3.27260963e-02,   4.21654678e-02,   5.85338634e-04],
-    ...    [  6.78845466e-02,   1.42195839e-01,   1.41676003e-03],
-    ...    [  1.39466746e-04,   4.26983850e-03,   2.52736185e-05]])
-    True
-    >>> np.allclose(kde0.eval_grid_fast(x, x),
-    ...    [[ 0.04435061,  0.06433531,  0.00413538],
-    ...     [ 0.07218297,  0.12358196,  0.00928889],
-    ...     [ 0.00161333,  0.00794858,  0.00058748]])
-    True
-    '''
-
-
-def test_smooth_params():
-    '''
-    >>> data = np.array([[
-    ...       0.932896  ,  0.89522635,  0.80636346,  1.32283371,  0.27125435,
-    ...       1.91666304,  2.30736635,  1.13662384,  1.73071287,  1.06061127,
-    ...       0.99598512,  2.16396591,  1.23458213,  1.12406686,  1.16930431,
-    ...       0.73700592,  1.21135139,  0.46671506,  1.3530304 ,  0.91419104],
-    ...     [ 0.62759088,  0.23988169,  2.04909823,  0.93766571,  1.19343762,
-    ...       1.94954931,  0.84687514,  0.49284897,  1.05066204,  1.89088505,
-    ...       0.840738  ,  1.02901457,  1.0758625 ,  1.76357967,  0.45792897,
-    ...       1.54488066,  0.17644313,  1.6798871 ,  0.72583514,  2.22087245],
-    ...     [ 1.69496432,  0.81791905,  0.82534709,  0.71642389,  0.89294732,
-    ...       1.66888649,  0.69036947,  0.99961448,  0.30657267,  0.98798713,
-    ...       0.83298728,  1.83334948,  1.90144186,  1.25781913,  0.07122458,
-    ...       2.42340852,  2.41342037,  0.87233305,  1.17537114,  1.69505988]])
-
-    >>> gauss = wk.Kernel('gaussian')
-    >>> gauss.hns(data)
-    array([ 0.18154437,  0.36207987,  0.37396219])
-    >>> gauss.hos(data)
-    array([ 0.195209  ,  0.3893332 ,  0.40210988])
-    >>> gauss.hmns(data)
-    array([[  3.25196193e-01,  -2.68892467e-02,   3.18932448e-04],
-           [ -2.68892467e-02,   3.91283306e-01,   2.38654678e-02],
-           [  3.18932448e-04,   2.38654678e-02,   4.05123874e-01]])
-    >>> gauss.hscv(data)
-    array([ 0.16858959,  0.32739383,  0.3046287 ])
-
-    >>> gauss.hstt(data)
-    array([ 0.18099075,  0.50409881,  0.11018912])
-
-    >>> gauss.hste(data)
-    array([ 0.16750009,  0.29059113,  0.17994255])
-
-    >>> gauss.hldpi(data)
-    array([ 0.1732289 ,  0.33159097,  0.3107633 ])
-
-    >>> np.allclose(gauss.hisj(data),
-    ...             array([ 0.29542502,  0.74277133,  0.51899114]))
-    True
-    '''
-
-
-def test_gridcount_1D():
-    '''
-    N = 20
-    data = np.random.rayleigh(1, size=(N,))
-    >>> data = array([
-    ...        0.75355792,  0.72779194,  0.94149169,  0.07841119,  2.32291887,
-    ...        1.10419995,  0.77055114,  0.60288273,  1.36883635,  1.74754326,
-    ...        1.09547561,  1.01671133,  0.73211143,  0.61891719,  0.75903487,
-    ...        1.8919469 ,  0.72433808,  1.92973094,  0.44749838,  1.36508452])
-
-    >>> x = np.linspace(0, max(data.ravel()) + 1, 10)
-    >>> dx = x[1] - x[0]
-    >>> c = wk.gridcount(data, x)
-    >>> np.allclose(c,
-    ...    [ 0.78762626,  1.77520717,  7.99190087,  4.04054449,  1.67156643,
-    ...     2.38228499,  1.05933195,  0.29153785,  0.        ,  0.        ])
-    True
-
-    h = plb.plot(x, c, '.')   # 1D histogram
-
-    h1 = plb.plot(x, c / dx / N) #  1D probability density plot
-    t = np.trapz(c / dx / N, x)
-    print(t)
-    '''
-
-
-def test_gridcount_2D():
-    '''
-    N = 20
-    data = np.random.rayleigh(1, size=(2, N))
-    >>> data = array([[
-    ...       0.38103275,  0.35083136,  0.90024207,  1.88230239,  0.96815399,
-    ...       0.57392873,  1.63367908,  1.20944125,  2.03887811,  0.81789145,
-    ...       0.69302049,  1.40856592,  0.92156032,  2.14791432,  2.04373821,
-    ...       0.69800708,  0.58428735,  1.59128776,  2.05771405,  0.87021964],
-    ...     [ 1.44080694,  0.39973751,  1.331243  ,  2.48895822,  1.18894158,
-    ...       1.40526085,  1.01967897,  0.81196474,  1.37978932,  2.03334689,
-    ...       0.870329  ,  1.25106862,  0.5346619 ,  0.47541236,  1.51930093,
-    ...       0.58861519,  1.19780448,  0.81548296,  1.56859488,  1.60653533]])
-
-    >>> x = np.linspace(0, max(data.ravel()) + 1, 5)
-    >>> dx = x[1] - x[0]
-    >>> X = np.vstack((x, x))
-    >>> c = wk.gridcount(data, X)
-    >>> np.allclose(c,
-    ...    [[ 0.38922806,  0.8987982 ,  0.34676493,  0.21042807,  0.        ],
-    ...     [ 1.15012203,  5.16513541,  3.19250588,  0.55420752,  0.        ],
-    ...     [ 0.74293418,  3.42517219,  1.97923195,  0.76076621,  0.        ],
-    ...     [ 0.02063536,  0.31054405,  0.71865964,  0.13486633,  0.        ],
-    ...     [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ]])
-    True
-
-    h = plb.plot(x, c, '.')   # 1D histogram
-
-    h1 = plb.plot(x, c / dx / N) #  1D probability density plot
-    t = np.trapz(c / dx / N, x)
-    print(t)
-    '''
-
-
-def test_gridcount_3D():
-    '''
-    N = 20
-    data = np.random.rayleigh(1, size=(3, N))
-    >>> data = np.array([[
-    ...       0.932896  ,  0.89522635,  0.80636346,  1.32283371,  0.27125435,
-    ...       1.91666304,  2.30736635,  1.13662384,  1.73071287,  1.06061127,
-    ...       0.99598512,  2.16396591,  1.23458213,  1.12406686,  1.16930431,
-    ...       0.73700592,  1.21135139,  0.46671506,  1.3530304 ,  0.91419104],
-    ...     [ 0.62759088,  0.23988169,  2.04909823,  0.93766571,  1.19343762,
-    ...       1.94954931,  0.84687514,  0.49284897,  1.05066204,  1.89088505,
-    ...       0.840738  ,  1.02901457,  1.0758625 ,  1.76357967,  0.45792897,
-    ...       1.54488066,  0.17644313,  1.6798871 ,  0.72583514,  2.22087245],
-    ...     [ 1.69496432,  0.81791905,  0.82534709,  0.71642389,  0.89294732,
-    ...       1.66888649,  0.69036947,  0.99961448,  0.30657267,  0.98798713,
-    ...       0.83298728,  1.83334948,  1.90144186,  1.25781913,  0.07122458,
-    ...       2.42340852,  2.41342037,  0.87233305,  1.17537114,  1.69505988]])
-
-    >>> x = np.linspace(0, max(data.ravel()) + 1, 3)
-    >>> dx = x[1] - x[0]
-    >>> X = np.vstack((x, x, x))
-    >>> c = wk.gridcount(data, X)
-    >>> np.allclose(c,
-    ...    [[[  8.74229894e-01,   1.27910940e+00,   1.42033973e-01],
-    ...     [  1.94778915e+00,   2.59536282e+00,   3.28213680e-01],
-    ...     [  1.08429416e-01,   1.69571495e-01,   7.48896775e-03]],
-    ...    [[  1.44969128e+00,   2.58396370e+00,   2.45459949e-01],
-    ...     [  2.28951650e+00,   4.49653348e+00,   2.73167915e-01],
-    ...     [  1.10905565e-01,   3.18733817e-01,   1.12880816e-02]],
-    ...    [[  7.49265424e-02,   2.18142488e-01,   0.00000000e+00],
-    ...     [  8.53886762e-02,   3.73415131e-01,   0.00000000e+00],
-    ...     [  4.16196568e-04,   1.62218824e-02,   0.00000000e+00]]])
-    True
-    '''
-
-
-def test_gridcount_4D():
-    '''
-    N = 20
-    data = np.random.rayleigh(1, size=(2, N))
-    >>> data = array([[
-    ...      0.38103275,  0.35083136,  0.90024207,  1.88230239,  0.96815399,
-    ...      0.57392873,  1.63367908,  1.20944125,  2.03887811,  0.81789145],
-    ...    [ 0.69302049,  1.40856592,  0.92156032,  2.14791432,  2.04373821,
-    ...      0.69800708,  0.58428735,  1.59128776,  2.05771405,  0.87021964],
-    ...    [ 1.44080694,  0.39973751,  1.331243  ,  2.48895822,  1.18894158,
-    ...      1.40526085,  1.01967897,  0.81196474,  1.37978932,  2.03334689],
-    ...    [ 0.870329  ,  1.25106862,  0.5346619 ,  0.47541236,  1.51930093,
-    ...      0.58861519,  1.19780448,  0.81548296,  1.56859488,  1.60653533]])
-
-    >>> x = np.linspace(0, max(data.ravel()) + 1, 3)
-    >>> dx = x[1] - x[0]
-    >>> X = np.vstack((x, x, x, x))
-    >>> c = wk.gridcount(data, X)
-    >>> np.allclose(c,
-    ...    [[[[  1.77163904e-01,   1.87720108e-01,   0.00000000e+00],
-    ...      [  5.72573585e-01,   6.09557834e-01,   0.00000000e+00],
-    ...      [  3.48549923e-03,   4.05931870e-02,   0.00000000e+00]],
-    ...     [[  1.83770124e-01,   2.56357594e-01,   0.00000000e+00],
-    ...      [  4.35845892e-01,   6.14958970e-01,   0.00000000e+00],
-    ...      [  3.07662204e-03,   3.58312786e-02,   0.00000000e+00]],
-    ...     [[  0.00000000e+00,   0.00000000e+00,   0.00000000e+00],
-    ...      [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00],
-    ...      [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00]]],
-    ...    [[[  3.41883175e-01,   5.97977973e-01,   0.00000000e+00],
-    ...      [  5.72071865e-01,   8.58566538e-01,   0.00000000e+00],
-    ...      [  3.46939323e-03,   4.04056116e-02,   0.00000000e+00]],
-    ...     [[  3.58861043e-01,   6.28962785e-01,   0.00000000e+00],
-    ...      [  8.80697705e-01,   1.47373158e+00,   0.00000000e+00],
-    ...      [  2.22868504e-01,   1.18008528e-01,   0.00000000e+00]],
-    ...     [[  2.91835067e-03,   2.60268355e-02,   0.00000000e+00],
-    ...      [  3.63686503e-02,   1.07959459e-01,   0.00000000e+00],
-    ...      [  1.88555613e-02,   7.06358976e-03,   0.00000000e+00]]],
-    ...    [[[  3.13810608e-03,   2.11731327e-02,   0.00000000e+00],
-    ...      [  6.71606255e-03,   4.53139824e-02,   0.00000000e+00],
-    ...      [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00]],
-    ...    [[  7.05946179e-03,   5.44614852e-02,   0.00000000e+00],
-    ...      [  1.09099593e-01,   1.95935584e-01,   0.00000000e+00],
-    ...      [  6.61257395e-02,   2.47717418e-02,   0.00000000e+00]],
-    ...     [[  6.38695629e-04,   5.69610302e-03,   0.00000000e+00],
-    ...      [  1.00358265e-02,   2.44053065e-02,   0.00000000e+00],
-    ...      [  5.67244468e-03,   2.12498697e-03,   0.00000000e+00]]]])
-    True
-
-    h = plb.plot(x, c, '.')   # 1D histogram
-
-    h1 = plb.plot(x, c / dx / N) #  1D probability density plot
-    t = np.trapz(x, c / dx / N)
-    print(t)
-    '''
-
-
-def test_docstrings():
-    import doctest
-    print('Testing docstrings in %s' % __file__)
-    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
-
-if __name__ == '__main__':
-    test_docstrings()
+from __future__ import division
+import unittest
+import numpy as np
+from numpy.testing import assert_allclose
+from numpy import array, inf
+import wafo.kdetools as wk
+
+
+class TestKdeTools(unittest.TestCase):
+
+    def setUp(self):
+
+        # N = 20
+        # data = np.random.rayleigh(1, size=(N,))
+        self.data = array([0.75355792, 0.72779194, 0.94149169, 0.07841119,
+                           2.32291887, 1.10419995, 0.77055114, 0.60288273,
+                           1.36883635, 1.74754326, 1.09547561, 1.01671133,
+                           0.73211143, 0.61891719, 0.75903487, 1.8919469,
+                           0.72433808, 1.92973094, 0.44749838, 1.36508452])
+        self.x = np.linspace(0, max(self.data) + 1, 10)
+
+    def test0_KDE1D(self):
+        data, x = self.data, self.x
+        # kde = wk.KDE(data, hs=0.5, alpha=0.5)
+
+        kde0 = wk.KDE(data, hs=0.5, alpha=0.0, inc=16)
+
+        fx = kde0.eval_grid(x)
+        assert_allclose(fx, [0.2039735,  0.40252503,  0.54595078,
+                             0.52219649,  0.3906213, 0.26381501,  0.16407362,
+                             0.08270612,  0.02991145,  0.00720821])
+
+        fx = kde0.eval_grid(x, r=1)
+
+        assert_allclose(-fx, [0.11911419724002906, 0.13440000694772541,
+                              0.044400116190638696, -0.0677695267531197,
+                              -0.09555596523854318, -0.07498819087690148,
+                              -0.06167607128369182, -0.04678588231996062,
+                              -0.024515979196411814, -0.008022010381009501])
+
+        fx = kde0.eval_grid(x, r=2)
+        assert_allclose(fx, [0.08728138131197069, 0.07558648034784508,
+                             0.05093715852686607, 0.07908624791267539,
+                             0.10495675573359599, 0.07916167222333347,
+                             0.048168330179460386, 0.03438361415806721,
+                             0.02197927811015286, 0.009222988165160621])
+
+        ffx = kde0.eval_grid_fast(x)
+        assert_allclose(ffx, [0.20729484,  0.39865044,  0.53716945,  0.5169322,
+                              0.39060223, 0.26441126,  0.16388801,  0.08388527,
+                              0.03227164,  0.00883579], 1e-6)
+
+        fx = kde0.eval_grid_fast(x, r=1)
+        assert_allclose(fx, [-0.11582450668441863, -0.12901768780183628,
+                             -0.04402464127812092, 0.0636190549560749,
+                             0.09345144501310157, 0.07573621607126926,
+                             0.06149475587201987, 0.04550210608639078,
+                             0.024427027615689087, 0.00885576504750473])
+
+        fx = kde0.eval_grid_fast(x, r=2)
+        assert_allclose(fx, [0.08499284131672676, 0.07572564161758065,
+                             0.05329987919556978, 0.07849796347259348,
+                             0.10232741197885842, 0.07869015379158453,
+                             0.049431823916945394, 0.034527256372343613,
+                             0.021517998409663567, 0.009527401063843402])
+
+        f = kde0.eval_grid_fast()
+        assert_allclose(np.trapz(f, kde0.args),  0.995001)
+        assert_allclose(f, [0.011494108953097538, 0.0348546729842836,
+                            0.08799292403553607, 0.18568717590587996,
+                            0.32473136104523725, 0.46543163412700084,
+                            0.5453201564089711, 0.5300582814373698,
+                            0.44447650672207173, 0.3411961246641896,
+                            0.25103852230993573, 0.17549519961525845,
+                            0.11072988772879173, 0.05992730870218242,
+                            0.02687783924833738, 0.00974982785617795])
+
+    def skiptest0_KDEgauss_1D(self):
+        data, x = self.data, self.x
+        # kde = wk.KDE(data, hs=0.5, alpha=0.5)
+
+        kde0 = wk.KDEgauss(data, hs=0.5, alpha=0.0, inc=16)
+
+        fx = kde0.eval_grid(x)
+        assert_allclose(fx, [0.2039735,  0.40252503,  0.54595078,
+                             0.52219649,  0.3906213, 0.26381501,  0.16407362,
+                             0.08270612,  0.02991145,  0.00720821])
+
+        fx = kde0.eval_grid(x, r=1)
+
+        assert_allclose(-fx, [0.11911419724002906, 0.13440000694772541,
+                              0.044400116190638696, -0.0677695267531197,
+                              -0.09555596523854318, -0.07498819087690148,
+                              -0.06167607128369182, -0.04678588231996062,
+                              -0.024515979196411814, -0.008022010381009501])
+
+        fx = kde0.eval_grid(x, r=2)
+        assert_allclose(fx, [0.08728138131197069, 0.07558648034784508,
+                             0.05093715852686607, 0.07908624791267539,
+                             0.10495675573359599, 0.07916167222333347,
+                             0.048168330179460386, 0.03438361415806721,
+                             0.02197927811015286, 0.009222988165160621])
+
+        ffx = kde0.eval_grid_fast(x)
+        # print(ffx.tolist())
+        assert_allclose(ffx, [0.20729484,  0.39865044,  0.53716945,  0.5169322,
+                              0.39060223, 0.26441126,  0.16388801,  0.08388527,
+                              0.03227164,  0.00883579], 1e-6)
+
+        fx = kde0.eval_grid_fast(x, r=1)
+        assert_allclose(fx, [-0.11582450668441863, -0.12901768780183628,
+                             -0.04402464127812092, 0.0636190549560749,
+                             0.09345144501310157, 0.07573621607126926,
+                             0.06149475587201987, 0.04550210608639078,
+                             0.024427027615689087, 0.00885576504750473])
+
+        fx = kde0.eval_grid_fast(x, r=2)
+        assert_allclose(fx, [0.08499284131672676, 0.07572564161758065,
+                             0.05329987919556978, 0.07849796347259348,
+                             0.10232741197885842, 0.07869015379158453,
+                             0.049431823916945394, 0.034527256372343613,
+                             0.021517998409663567, 0.009527401063843402])
+
+        f = kde0.eval_grid_fast()
+        assert_allclose(f, [0.06807544,  0.12949095,  0.21985421,  0.33178031,
+                            0.44334874, 0.52429234,  0.55140336,  0.52221323,
+                            0.45500674,  0.3752208, 0.30046799,  0.235667,
+                            0.17854402,  0.12721305,  0.08301993, 0.04862324])
+        assert_allclose(np.trapz(f, kde0.args),  0.96716261)
+
+    def test1_TKDE1D(self):
+        data = self.data
+        x = np.linspace(0.01, max(data) + 1, 10)
+        kde = wk.TKDE(data, hs=0.5, L2=0.5)
+        f = kde(x)
+        assert_allclose(f, [1.03982714,  0.45839018,  0.39514782,  0.32860602,
+                            0.26433318, 0.20717946,  0.15907684,  0.1201074,
+                            0.08941027,  0.06574882])
+        assert_allclose(np.trapz(f, x), 0.94787730659349068)
+        f = kde.eval_grid_fast(x)
+        assert_allclose(f, [1.0401892415290148, 0.45838973393693677,
+                            0.39514689240671547, 0.32860531818532457,
+                            0.2643330110605783, 0.20717975528556506,
+                            0.15907696844388747, 0.12010770443337843,
+                            0.08941129458260941, 0.06574899139165799])
+        f = kde.eval_grid_fast2(x)
+        assert_allclose(f, [1.0401892415290148, 0.45838973393693677,
+                            0.39514689240671547, 0.32860531818532457,
+                            0.2643330110605783, 0.20717975528556506,
+                            0.15907696844388747, 0.12010770443337843,
+                            0.08941129458260941, 0.06574899139165799])
+        assert_allclose(np.trapz(f, x), 0.9479438058416647)
+
+    def test1_KDE1D(self):
+        data, x = self.data, self.x
+        kde = wk.KDE(data, hs=0.5)
+        f = kde(x)
+        assert_allclose(f, [0.2039735,  0.40252503,  0.54595078,  0.52219649,
+                            0.3906213, 0.26381501,  0.16407362,  0.08270612,
+                            0.02991145, 0.00720821])
+
+        assert_allclose(np.trapz(f, x), 0.92576174424281876)
+
+    def test2_KDE1D(self):
+        # data, x = self.data, self.x
+
+        data = np.asarray([1, 2])
+        x = np.linspace(0, max(np.ravel(data)) + 1, 10)
+        kde = wk.KDE(data, hs=0.5)
+        f = kde(x)
+        assert_allclose(f, [0.0541248,  0.16555235,  0.33084399,  0.45293325,
+                            0.48345808, 0.48345808,  0.45293325,  0.33084399,
+                            0.16555235,  0.0541248])
+
+        assert_allclose(np.trapz(f, x), 0.97323338046725172)
+
+    def test1a_KDE1D(self):
+        data, x = self.data, self.x
+        kde = wk.KDE(data, hs=0.5, alpha=0.5)
+        f = kde(x)
+        assert_allclose(f, [0.17252055,  0.41014271,  0.61349072,  0.57023834,
+                            0.37198073, 0.21409279,  0.12738463,  0.07460326,
+                            0.03956191,  0.01887164])
+
+        assert_allclose(np.trapz(f, x), 0.92938023659047952)
+
+    def test2a_KDE1D(self):
+        # data, x = self.data, self.x
+        data = np.asarray([1, 2])
+        x = np.linspace(0, max(np.ravel(data)) + 1, 10)
+        kde = wk.KDE(data, hs=0.5, alpha=0.5)
+        f = kde(x)
+        assert_allclose(f, [0.0541248,  0.16555235,  0.33084399,  0.45293325,
+                            0.48345808, 0.48345808,  0.45293325,  0.33084399,
+                            0.16555235,  0.0541248])
+
+        assert_allclose(np.trapz(f, x), 0.97323338046725172)
+
+    def test_KDE2D(self):
+        # N = 20
+        # data = np.random.rayleigh(1, size=(2, N))
+        data = array([
+            [0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399,
+             0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145,
+             0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821,
+             0.69800708, 0.58428735, 1.59128776, 2.05771405, 0.87021964],
+            [1.44080694, 0.39973751, 1.331243, 2.48895822, 1.18894158,
+             1.40526085, 1.01967897, 0.81196474, 1.37978932, 2.03334689,
+             0.870329, 1.25106862, 0.5346619, 0.47541236, 1.51930093,
+             0.58861519, 1.19780448, 0.81548296, 1.56859488, 1.60653533]])
+
+        x = np.linspace(0, max(np.ravel(data)) + 1, 3)
+
+        kde0 = wk.KDE(data, hs=0.5, alpha=0.0, inc=512)
+
+        assert_allclose(kde0.eval_grid(x, x),
+                        [[3.27260963e-02, 4.21654678e-02, 5.85338634e-04],
+                         [6.78845466e-02, 1.42195839e-01, 1.41676003e-03],
+                         [1.39466746e-04, 4.26983850e-03, 2.52736185e-05]])
+
+        t = [[0.0443506097653615, 0.06433530873456418, 0.0041353838654317856],
+             [0.07218297149063724, 0.1235819591878892, 0.009288890372002473],
+             [0.001613328022214066, 0.00794857884864038, 0.0005874786787715641]
+             ]
+        assert_allclose(kde0.eval_grid_fast(x, x), t)
+
+    def test_gridcount_1D(self):
+        data, x = self.data, self.x
+        dx = x[1] - x[0]
+        c = wk.gridcount(data, x)
+        assert_allclose(c, [0.78762626, 1.77520717, 7.99190087, 4.04054449,
+                            1.67156643, 2.38228499, 1.05933195, 0.29153785, 0.,
+                            0.])
+        t = np.trapz(c / dx / len(data), x)
+        assert_allclose(t, 0.9803093435140049)
+
+    def test_gridcount_2D(self):
+        N = 20
+        # data = np.random.rayleigh(1, size=(2, N))
+        data = array([
+            [0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399,
+             0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145,
+             0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821,
+             0.69800708, 0.58428735, 1.59128776, 2.05771405, 0.87021964],
+            [1.44080694, 0.39973751, 1.331243, 2.48895822, 1.18894158,
+             1.40526085, 1.01967897, 0.81196474, 1.37978932, 2.03334689,
+             0.870329, 1.25106862, 0.5346619, 0.47541236, 1.51930093,
+             0.58861519, 1.19780448, 0.81548296, 1.56859488, 1.60653533]])
+
+        x = np.linspace(0, max(np.ravel(data)) + 1, 5)
+        dx = x[1] - x[0]
+        X = np.vstack((x, x))
+        c = wk.gridcount(data, X)
+        assert_allclose(c,
+                        [[0.38922806, 0.8987982,  0.34676493, 0.21042807,  0.],
+                         [1.15012203, 5.16513541, 3.19250588, 0.55420752,  0.],
+                         [0.74293418, 3.42517219, 1.97923195, 0.76076621,  0.],
+                         [0.02063536, 0.31054405, 0.71865964, 0.13486633,  0.],
+                         [0.,  0.,  0.,  0.,  0.]], 1e-5)
+
+        t = np.trapz(np.trapz(c / (dx**2 * N), x), x)
+        assert_allclose(t, 0.9011618785736376)
+
+    def test_gridcount_3D(self):
+        N = 20
+        # data = np.random.rayleigh(1, size=(3, N))
+        data = np.array([
+            [0.932896, 0.89522635, 0.80636346, 1.32283371, 0.27125435,
+             1.91666304, 2.30736635, 1.13662384, 1.73071287, 1.06061127,
+             0.99598512, 2.16396591, 1.23458213, 1.12406686, 1.16930431,
+             0.73700592, 1.21135139, 0.46671506, 1.3530304, 0.91419104],
+            [0.62759088, 0.23988169, 2.04909823, 0.93766571, 1.19343762,
+             1.94954931, 0.84687514, 0.49284897, 1.05066204, 1.89088505,
+             0.840738, 1.02901457, 1.0758625, 1.76357967, 0.45792897,
+             1.54488066, 0.17644313, 1.6798871, 0.72583514, 2.22087245],
+            [1.69496432, 0.81791905, 0.82534709, 0.71642389, 0.89294732,
+             1.66888649, 0.69036947, 0.99961448, 0.30657267, 0.98798713,
+             0.83298728, 1.83334948, 1.90144186, 1.25781913, 0.07122458,
+             2.42340852, 2.41342037, 0.87233305, 1.17537114, 1.69505988]])
+
+        x = np.linspace(0, max(np.ravel(data)) + 1, 3)
+        dx = x[1] - x[0]
+        X = np.vstack((x, x, x))
+        c = wk.gridcount(data, X)
+        assert_allclose(c,
+                        [[[8.74229894e-01, 1.27910940e+00, 1.42033973e-01],
+                          [1.94778915e+00, 2.59536282e+00, 3.28213680e-01],
+                          [1.08429416e-01, 1.69571495e-01, 7.48896775e-03]],
+                         [[1.44969128e+00, 2.58396370e+00, 2.45459949e-01],
+                          [2.28951650e+00, 4.49653348e+00, 2.73167915e-01],
+                          [1.10905565e-01, 3.18733817e-01, 1.12880816e-02]],
+                         [[7.49265424e-02, 2.18142488e-01, 0.0],
+                          [8.53886762e-02, 3.73415131e-01, 0.0],
+                          [4.16196568e-04, 1.62218824e-02, 0.0]]])
+
+        t = np.trapz(np.trapz(np.trapz(c / dx**3 / N, x), x), x)
+        assert_allclose(t, 0.5164999727560187)
+
+    def test_gridcount_4D(self):
+
+        N = 20
+        # data = np.random.rayleigh(1, size=(2, N))
+        data = array([
+            [0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399,
+             0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145],
+            [0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821,
+             0.69800708, 0.58428735, 1.59128776, 2.05771405, 0.87021964],
+            [1.44080694, 0.39973751, 1.331243, 2.48895822, 1.18894158,
+                1.40526085, 1.01967897, 0.81196474, 1.37978932, 2.03334689],
+            [0.870329, 1.25106862, 0.5346619, 0.47541236, 1.51930093,
+                0.58861519, 1.19780448, 0.81548296, 1.56859488, 1.60653533]])
+
+        x = np.linspace(0, max(np.ravel(data)) + 1, 3)
+        dx = x[1] - x[0]
+        X = np.vstack((x, x, x, x))
+        c = wk.gridcount(data, X)
+        assert_allclose(c,
+                        [[[[1.77163904e-01, 1.87720108e-01, 0.0],
+                           [5.72573585e-01, 6.09557834e-01, 0.0],
+                            [3.48549923e-03, 4.05931870e-02, 0.0]],
+                            [[1.83770124e-01, 2.56357594e-01, 0.0],
+                             [4.35845892e-01, 6.14958970e-01, 0.0],
+                             [3.07662204e-03, 3.58312786e-02, 0.0]],
+                            [[0.0, 0.0, 0.0],
+                             [0.0, 0.0, 0.0],
+                             [0.0, 0.0, 0.0]]],
+                            [[[3.41883175e-01, 5.97977973e-01, 0.0],
+                              [5.72071865e-01, 8.58566538e-01, 0.0],
+                                [3.46939323e-03, 4.04056116e-02, 0.0]],
+                             [[3.58861043e-01, 6.28962785e-01, 0.0],
+                              [8.80697705e-01, 1.47373158e+00, 0.0],
+                                [2.22868504e-01, 1.18008528e-01, 0.0]],
+                             [[2.91835067e-03, 2.60268355e-02, 0.0],
+                              [3.63686503e-02, 1.07959459e-01, 0.0],
+                                [1.88555613e-02, 7.06358976e-03, 0.0]]],
+                            [[[3.13810608e-03, 2.11731327e-02, 0.0],
+                              [6.71606255e-03, 4.53139824e-02, 0.0],
+                                [0.0, 0.0, 0.0]],
+                             [[7.05946179e-03, 5.44614852e-02, 0.0],
+                              [1.09099593e-01, 1.95935584e-01, 0.0],
+                                [6.61257395e-02, 2.47717418e-02, 0.0]],
+                             [[6.38695629e-04, 5.69610302e-03, 0.0],
+                              [1.00358265e-02, 2.44053065e-02, 0.0],
+                                [5.67244468e-03, 2.12498697e-03, 0.0]]]])
+
+        t = np.trapz(np.trapz(np.trapz(np.trapz(c / dx**4 / N, x), x), x), x)
+        assert_allclose(t, 0.21183518274521254)
+
+
+class TestKernels(unittest.TestCase):
+    def setUp(self):
+        self.names = ['epanechnikov', 'biweight', 'triweight', 'logistic',
+                      'p1epanechnikov', 'p1biweight', 'p1triweight',
+                      'triangular', 'gaussian', 'rectangular', 'laplace']
+
+    def test_stats(self):
+        truth = {
+            'biweight': (0.14285714285714285, 0.7142857142857143, 22.5),
+            'logistic': (3.289868133696453, 1./6, 0.023809523809523808),
+            'p1biweight': (0.14285714285714285, 0.7142857142857143, 22.5),
+            'triangular': (0.16666666666666666, 0.6666666666666666, inf),
+            'gaussian': (1, 0.28209479177387814, 0.21157109383040862),
+            'epanechnikov': (0.2, 0.6, inf),
+            'triweight': (0.1111111111111111, 0.8158508158508159, inf),
+            'p1triweight': (0.1111111111111111, 0.8158508158508159, inf),
+            'p1epanechnikov': (0.2, 0.6, inf),
+            'rectangular': (0.3333333333333333, 0.5, inf),
+            'laplace': (2, 0.25, inf)}
+        for name in self.names:
+            kernel = wk.Kernel(name)
+            assert_allclose(kernel.stats(), truth[name])
+            # truth[name] = kernel.stats()
+        # print(truth)
+
+    def test_norm_factors_1d(self):
+        truth = {
+            'biweight': 1.0666666666666667, 'logistic': 1.0,
+            'p1biweight': 1.0666666666666667, 'triangular': 1.0,
+            'gaussian': 2.5066282746310002, 'epanechnikov': 1.3333333333333333,
+            'triweight': 0.91428571428571426, 'laplace': 2,
+            'p1triweight': 0.91428571428571426,
+            'p1epanechnikov': 1.3333333333333333, 'rectangular': 2.0}
+        for name in self.names:
+            kernel = wk.Kernel(name)
+            assert_allclose(kernel.norm_factor(d=1, n=20), truth[name])
+            # truth[name] = kernel.norm_factor(d=1, n=20)
+
+    def test_effective_support(self):
+        truth = {'biweight': (-1.0, 1.0), 'logistic': (-7.0, 7.0),
+                 'p1biweight': (-1.0, 1.0), 'triangular': (-1.0, 1.0),
+                 'gaussian': (-4.0, 4.0), 'epanechnikov': (-1.0, 1.0),
+                 'triweight': (-1.0, 1.0), 'p1triweight': (-1.0, 1.0),
+                 'p1epanechnikov': (-1.0, 1.0), 'rectangular': (-1.0, 1.0),
+                 'laplace': (-7.0, 7.0)}
+        for name in self.names:
+            kernel = wk.Kernel(name)
+            assert_allclose(kernel.effective_support(), truth[name])
+            # truth[name] = kernel.effective_support()
+        # print(truth)
+        # self.assertTrue(False)
+
+    def test_that_kernel_is_a_pdf(self):
+
+        for name in self.names:
+            kernel = wk.Kernel(name)
+            xmin, xmax = kernel.effective_support()
+            x = np.linspace(xmin, xmax, 4*1024+1)
+            m0 = kernel.norm_factor(d=1, n=1)
+            pdf = kernel(x)/m0
+            #             print(name)
+            #             print(pdf[0], pdf[-1])
+            #             print(np.trapz(pdf, x) - 1)
+            assert_allclose(np.trapz(pdf, x), 1, 1e-2)
+        # self.assertTrue(False)
+
+
+class TestSmoothing(unittest.TestCase):
+    def setUp(self):
+        self.data = np.array([
+            [0.932896, 0.89522635, 0.80636346, 1.32283371, 0.27125435,
+             1.91666304, 2.30736635, 1.13662384, 1.73071287, 1.06061127,
+             0.99598512, 2.16396591, 1.23458213, 1.12406686, 1.16930431,
+             0.73700592, 1.21135139, 0.46671506, 1.3530304, 0.91419104],
+            [0.62759088, 0.23988169, 2.04909823, 0.93766571, 1.19343762,
+             1.94954931, 0.84687514, 0.49284897, 1.05066204, 1.89088505,
+             0.840738, 1.02901457, 1.0758625, 1.76357967, 0.45792897,
+             1.54488066, 0.17644313, 1.6798871, 0.72583514, 2.22087245],
+            [1.69496432, 0.81791905, 0.82534709, 0.71642389, 0.89294732,
+             1.66888649, 0.69036947, 0.99961448, 0.30657267, 0.98798713,
+             0.83298728, 1.83334948, 1.90144186, 1.25781913, 0.07122458,
+             2.42340852, 2.41342037, 0.87233305, 1.17537114, 1.69505988]])
+        self.gauss = wk.Kernel('gaussian')
+
+    def test_hns(self):
+        hs = self.gauss.hns(self.data)
+        assert_allclose(hs, [0.18154437, 0.36207987, 0.37396219])
+
+    def test_hos(self):
+        hs = self.gauss.hos(self.data)
+        assert_allclose(hs, [0.195209, 0.3893332, 0.40210988])
+
+    def test_hms(self):
+        hs = self.gauss.hmns(self.data)
+        assert_allclose(hs, [[3.25196193e-01, -2.68892467e-02, 3.18932448e-04],
+                             [-2.68892467e-02, 3.91283306e-01, 2.38654678e-02],
+                             [3.18932448e-04, 2.38654678e-02, 4.05123874e-01]])
+
+    def test_hscv(self):
+        hs = self.gauss.hscv(self.data)
+        assert_allclose(hs, [0.16858959, 0.32739383, 0.3046287])
+
+    def test_hstt(self):
+        hs = self.gauss.hstt(self.data)
+        assert_allclose(hs, [0.18099075, 0.50409881, 0.11018912])
+
+    def test_hste(self):
+        hs = self.gauss.hste(self.data)
+        assert_allclose(hs, [0.16750009, 0.29059113, 0.17994255])
+
+    def test_hldpi(self):
+        hs = self.gauss.hldpi(self.data)
+        assert_allclose(hs, [0.1732289, 0.33159097, 0.3107633])
+
+    def test_hisj(self):
+        hs = self.gauss.hisj(self.data)
+        assert_allclose(hs, [0.29542502, 0.74277133, 0.51899114])
+
+if __name__ == "__main__":
+    # import sys;sys.argv = ['', 'Test.testName']
+    unittest.main()