pywafo/wafo/kdetools/kdetools.py

#!/usr/bin/env python
# -------------------------------------------------------------------------
# Name:        kdetools
# Purpose:
#
# Author:      pab
#
# Created:     01.11.2008
# Copyright:   (c) pab 2008
# Licence:     LGPL
# -------------------------------------------------------------------------

from __future__ import absolute_import, division
# from abc import ABCMeta, abstractmethod
import copy
import warnings
import numpy as np
import scipy.stats
from scipy import interpolate, linalg, special
from numpy import pi, sqrt, atleast_2d, exp, meshgrid
from numpy.fft import fftn, ifftn
from wafo.misc import nextpow2
from wafo.containers import PlotData
from wafo.dctpack import dctn, idctn  # , dstn, idstn
from wafo.plotbackend import plotbackend as plt
from wafo.testing import test_docstrings
from wafo.kdetools.kernels import iqrange, qlevels, Kernel
from wafo.kdetools.gridding import gridcount
import time

try:
    from wafo import fig
except ImportError:
    warnings.warn('fig import only supported on Windows')

__all__ = ['TKDE', 'KDE', 'kde_demo1', 'kde_demo2', 'test_docstrings',
           'KRegression', 'BKRegression']


def _assert(cond, msg):
    if not cond:
        raise ValueError(msg)


def _assert_warn(cond, msg):
    if not cond:
        warnings.warn(msg)


def _invnorm(q):
    return special.ndtri(q)


class _KDE(object):

    """ Kernel-Density Estimator base class.

    Parameters
    ----------
    data : (# of dims, # of data)-array
        datapoints to estimate from
    hs : array-like (optional)
        smooting parameter vector/matrix.
        (default compute from data using kernel.get_smoothing function)
    kernel :  kernel function object.
        kernel must have get_smoothing method
    alpha : real scalar (optional)
        sensitivity parameter               (default 0 regular KDE)
        A good choice might be alpha = 0.5 ( or 1/D)
        alpha = 0      Regular  KDE (hs is constant)
        0 < alpha <= 1 Adaptive KDE (Make hs change)

    Members
    -------
    d : int
        number of dimensions
    n : int
        number of datapoints

    Methods
    -------
    kde.eval_grid_fast(x0, x1,..., xd) : array
        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
    kde.eval_grid(x0, x1,..., xd) : array
        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
    kde.eval_points(points) : array
        evaluate the estimated pdf on a provided set of points
    kde(x0, x1,..., xd) : array
        same as kde.eval_grid(x0, x1,..., xd)
    """

    def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None,
                 xmax=None, inc=512):
        self.dataset = atleast_2d(data)
        self.kernel = kernel if kernel else Kernel('gauss')
        self.xmin = xmin
        self.xmax = xmax
        self.hs = hs
        self.inc = inc
        self.alpha = alpha
        self.initialize()

    @property
    def n(self):
        return self.dataset.shape[1]

    @property
    def d(self):
        return self.dataset.shape[0]

    @property
    def sigma(self):
        """minimum(stdev, 0.75 * interquartile-range)"""
        iqr = iqrange(self.dataset, axis=-1)
        sigma = np.minimum(np.std(self.dataset, axis=-1, ddof=1), iqr / 1.34)
        return sigma

    @property
    def xmin(self):
        return self._xmin

    @xmin.setter
    def xmin(self, xmin):
        if xmin is None:
            self._xmin = self.dataset.min(axis=-1) - 2 * self.sigma
        else:
            self._xmin = xmin * np.ones(self.d)

    @property
    def xmax(self):
        return self._xmax

    @xmax.setter
    def xmax(self, xmax):
        if xmax is None:
            self._xmax = self.dataset.max(axis=-1) + 2 * self.sigma
        else:
            self._xmax = xmax * np.ones(self.d)

    def _replace_negatives_with_default_hs(self, h):
        get_default_hs = self.kernel.get_smoothing
        ind, = np.where(h <= 0)
        for i in ind.tolist():
            h[i] = get_default_hs(self.dataset[i])

    def _check_hs(self, h):
        """make sure it has the correct dimension and replace negative vals"""
        h = np.atleast_1d(h)
        if (len(h.shape) == 1) or (self.d == 1):
            h = h * np.ones(self.d) if max(h.shape) == 1 else h.reshape(self.d)
            self._replace_negatives_with_default_hs(h)
        return h

    def _invert_hs(self, h):
        if (len(h.shape) == 1) or (self.d == 1):
            determinant = h.prod()
            inv_hs = np.diag(1.0 / h)
        else:  # fully general smoothing matrix
            determinant = linalg.det(h)
            _assert(0 < determinant,
                    'bandwidth matrix h must be positive definit!')
            inv_hs = linalg.inv(h)
        return inv_hs, determinant

    @property
    def hs(self):
        return self._hs

    @hs.setter
    def hs(self, h):
        if h is None:
            h = self.kernel.get_smoothing(self.dataset)
        h = self._check_hs(h)
        inv_hs, deth = self._invert_hs(h)

        self._norm_factor = deth * self.n
        self._inv_hs = inv_hs
        self._hs = h

    @property
    def inc(self):
        return self._inc

    @inc.setter
    def inc(self, inc):
        if inc is None:
            _tau, tau = self.kernel.effective_support()
            xyzrange = 8 * self.sigma
            L1 = 10
            inc = max(48, (L1 * xyzrange / (tau * self.hs)).max())
            inc = 2 ** nextpow2(inc)
        self._inc = inc

    @property
    def alpha(self):
        return self._alpha

    @alpha.setter
    def alpha(self, alpha):
        self._alpha = alpha
        self._lambda = np.ones(self.n)
        if alpha > 0:
            f = self.eval_points(self.dataset)  # pilot estimate
            g = np.exp(np.mean(np.log(f)))
            self._lambda = (f / g) ** (-alpha)

    def initialize(self):
        if self.n > 1:
            self._initialize()

    def _initialize(self):
        pass

    def get_args(self, xmin=None, xmax=None):
        sxmin = self.xmin
        if xmin is not None:
            sxmin = np.minimum(xmin, sxmin)

        sxmax = self.xmax
        if xmax is not None:
            sxmax = np.maximum(xmax, sxmax)

        args = []
        inc = self.inc
        for i in range(self.d):
            args.append(np.linspace(sxmin[i], sxmax[i], inc))
        return args

    def eval_grid_fast(self, *args, **kwds):
        """Evaluate the estimated pdf on a grid.

        Parameters
        ----------
        arg_0,arg_1,... arg_d-1 : vectors
            Alternatively, if no vectors is passed in then
             arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
        output : string optional
            'value' if value output
            'data' if object output

        Returns
        -------
        values : array-like
            The values evaluated at meshgrid(*args).

        """
        if len(args) == 0:
            args = self.get_args()
        self.args = args
        return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)

    def _eval_grid_fast(self, *args, **kwds):
        pass

    def eval_grid(self, *args, **kwds):
        """Evaluate the estimated pdf on a grid.

        Parameters
        ----------
        arg_0,arg_1,... arg_d-1 : vectors
            Alternatively, if no vectors is passed in then
             arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
        output : string optional
            'value' if value output
            'data' if object output

        Returns
        -------
        values : array-like
            The values evaluated at meshgrid(*args).

        """
        if len(args) == 0:
            args = self.get_args()
        self.args = args
        return self._eval_grid_fun(self._eval_grid, *args, **kwds)

    def _eval_grid(self, *args, **kwds):
        pass

    def _add_contour_levels(self, wdata):
        p_levels = np.r_[10:90:20, 95, 99, 99.9]
        try:
            c_levels = qlevels(wdata.data, p=p_levels)
            wdata.clevels = c_levels
            wdata.plevels = p_levels
        except Exception as e:
            msg = "Could not calculate contour levels!. ({})".format(str(e))
            warnings.warn(msg)

    def _make_object(self, f, **kwds):
        titlestr = 'Kernel density estimate ({})'.format(self.kernel.name)
        kwds2 = dict(title=titlestr)
        kwds2['plot_kwds'] = dict(plotflag=1)
        kwds2.update(**kwds)
        args = self.args
        if self.d == 1:
            args = args[0]
        wdata = PlotData(f, args, **kwds2)
        if self.d > 1:
            self._add_contour_levels(wdata)
        return wdata

    def _eval_grid_fun(self, eval_grd, *args, **kwds):
        output = kwds.pop('output', 'value')
        f = eval_grd(*args, **kwds)
        if output == 'value':
            return f
        return self._make_object(f, **kwds)

    def _check_shape(self, points):
        points = atleast_2d(points)
        d, m = points.shape
        if d != self.d:
            _assert(d == 1 and m == self.d, "points have dimension {}, "
                    "dataset has dimension {}".format(d, self.d))
            # points was passed in as a row vector
            points = np.reshape(points, (self.d, 1))
        return points

    def eval_points(self, points, **kwds):
        """Evaluate the estimated pdf on a set of points.

        Parameters
        ----------
        points : (# of dimensions, # of points)-array
            Alternatively, a (# of dimensions,) vector can be passed in and
            treated as a single point.

        Returns
        -------
        values : (# of points,)-array
            The values at each point.

        Raises
        ------
        ValueError if the dimensionality of the input points is different than
        the dimensionality of the KDE.

        """

        points = self._check_shape(points)
        return self._eval_points(points, **kwds)

    def _eval_points(self, points, **kwds):
        pass

    __call__ = eval_grid


class TKDE(_KDE):

    """ Transformation Kernel-Density Estimator.

    Parameters
    ----------
    dataset : (# of dims, # of data)-array
        datapoints to estimate from
    hs : array-like (optional)
        smooting parameter vector/matrix.
        (default compute from data using kernel.get_smoothing function)
    kernel :  kernel function object.
        kernel must have get_smoothing method
    alpha : real scalar (optional)
        sensitivity parameter               (default 0 regular KDE)
        A good choice might be alpha = 0.5 ( or 1/D)
        alpha = 0      Regular  KDE (hs is constant)
        0 < alpha <= 1 Adaptive KDE (Make hs change)
    xmin, xmax  : vectors
        specifying the default argument range for the kde.eval_grid methods.
        For the kde.eval_grid_fast methods the values must cover the range of
        the data. (default min(data)-range(data)/4, max(data)-range(data)/4)
        If a single value of xmin or xmax is given then the boundary is the is
        the same for all dimensions.
    inc :  scalar integer
        defining the default dimension of the output from kde.eval_grid methods
        (default 512)
        (For kde.eval_grid_fast: A value below 50 is very fast to compute but
        may give some inaccuracies. Values between 100 and 500 give very
        accurate results)
    L2 : array-like
        vector of transformation parameters (default 1 no transformation)
        t(xi;L2) = xi^L2*sign(L2)   for L2(i) ~= 0
        t(xi;L2) = log(xi)          for L2(i) == 0
        If single value of L2 is given then the transformation is the same in
        all directions.

    Members
    -------
    d : int
        number of dimensions
    n : int
        number of datapoints

    Methods
    -------
    kde.eval_grid_fast(x0, x1,..., xd) : array
        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
    kde.eval_grid(x0, x1,..., xd) : array
        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
    kde.eval_points(points) : array
        evaluate the estimated pdf on a provided set of points
    kde(x0, x1,..., xd) : array
        same as kde.eval_grid(x0, x1,..., xd)

    Example
    -------
    N = 20
    data = np.random.rayleigh(1, size=(N,))
    >>> data = np.array([
    ...        0.75355792,  0.72779194,  0.94149169,  0.07841119,2.32291887,
    ...        1.10419995,  0.77055114,  0.60288273,  1.36883635,  1.74754326,
    ...        1.09547561,  1.01671133,  0.73211143,  0.61891719,  0.75903487,
    ...        1.8919469 ,  0.72433808,  1.92973094,  0.44749838,  1.36508452])

    >>> import wafo.kdetools as wk
    >>> x = np.linspace(0.01, max(data.ravel()) + 1, 10)
    >>> kde = wk.TKDE(data, hs=0.5, L2=0.5)
    >>> f = kde(x)
    >>> f
    array([ 1.03982714,  0.45839018,  0.39514782,  0.32860602,  0.26433318,
            0.20717946,  0.15907684,  0.1201074 ,  0.08941027,  0.06574882])

    >>> kde.eval_grid(x)
    array([ 1.03982714,  0.45839018,  0.39514782,  0.32860602,  0.26433318,
            0.20717946,  0.15907684,  0.1201074 ,  0.08941027,  0.06574882])

    >>> kde.eval_grid_fast(x)
    array([ 1.04018924,  0.45838973,  0.39514689,  0.32860532,  0.26433301,
            0.20717976,  0.15907697,  0.1201077 ,  0.08941129,  0.06574899])

    import pylab as plb
    h1 = plb.plot(x, f) #  1D probability density plot
    t = np.trapz(f, x)
    """

    def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None,
                 xmax=None, inc=512, L2=None):
        self.L2 = L2
        super(TKDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc)

#     @property
#     def dataset(self):
#         return self._dataset
#
#     @dataset.setter
#     def dataset(self, data):

    def _initialize(self):
        self._check_xmin()
        tdataset = self._dat2gaus(self.dataset)
        xmin = self.xmin
        if xmin is not None:
            xmin = self._dat2gaus(np.reshape(xmin, (-1, 1)))
        xmax = self.xmax
        if xmax is not None:
            xmax = self._dat2gaus(np.reshape(xmax, (-1, 1)))
        self.tkde = KDE(tdataset, self.hs, self.kernel, self.alpha, xmin, xmax,
                        self.inc)
        if self.inc is None:
            self.inc = self.tkde.inc

    def _check_xmin(self):
        if self.L2 is not None:
            amin = self.dataset.min(axis=-1)
            # default no transformation
            L2 = np.atleast_1d(self.L2) * np.ones(self.d)
            self.xmin = np.where(L2 != 1,
                                 np.maximum(self.xmin, amin / 100.0),
                                 self.xmin).reshape((-1, 1))

    def _dat2gaus(self, points):
        if self.L2 is None:
            return points  # default no transformation

        # default no transformation
        L2 = np.atleast_1d(self.L2) * np.ones(self.d)

        tpoints = copy.copy(points)
        for i, v2 in enumerate(L2.tolist()):
            tpoints[i] = np.log(points[i]) if v2 == 0 else points[i] ** v2
        return tpoints

    def _gaus2dat(self, tpoints):
        if self.L2 is None:
            return tpoints  # default no transformation

        # default no transformation
        L2 = np.atleast_1d(self.L2) * np.ones(self.d)

        points = copy.copy(tpoints)
        for i, v2 in enumerate(L2.tolist()):
            points[i] = np.exp(
                tpoints[i]) if v2 == 0 else tpoints[i] ** (1.0 / v2)
        return points

    def _scale_pdf(self, pdf, points):
        if self.L2 is None:
            return pdf
        # default no transformation
        L2 = np.atleast_1d(self.L2) * np.ones(self.d)
        for i, v2 in enumerate(L2.tolist()):
            factor = v2 * np.sign(v2) if v2 else 1
            pdf *= np.where(v2 == 1, 1, points[i] ** (v2 - 1) * factor)
        if (np.abs(np.diff(pdf)).max() > 10).any():
            msg = ''' Numerical problems may have occured due to the power
                    transformation. Check the KDE for spurious spikes'''
            warnings.warn(msg)
        return pdf

    def eval_grid_fast2(self, *args, **kwds):
        """Evaluate the estimated pdf on a grid.

        Parameters
        ----------
        arg_0,arg_1,... arg_d-1 : vectors
           Alternatively, if no vectors is passed in then
            arg_i = gauss2dat(linspace(dat2gauss(self.xmin[i]),
                                       dat2gauss(self.xmax[i]), self.inc))
        output : string optional
            'value' if value output
            'data' if object output

        Returns
        -------
        values : array-like
           The values evaluated at meshgrid(*args).

        """
        return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)

    def _interpolate(self, points, f, *args, **kwds):
        ipoints = meshgrid(*args)  # if self.d > 1 else args
        for i in range(self.d):
            points[i].shape = -1,
        points = np.asarray(points).T

        fi = interpolate.griddata(points, np.ravel(f), tuple(ipoints),
                                  method='linear', fill_value=0.0)
        self.args = args
        r = kwds.get('r', 0)
        if r == 0:
            return fi * (fi > 0)
        return fi

    def _get_targs(self, args):
        targs = []
        if len(args):
            targs0 = self._dat2gaus(list(args))
            xmin = [min(t) for t in targs0]
            xmax = [max(t) for t in targs0]
            targs = self.tkde.get_args(xmin, xmax)
        return targs

    def _eval_grid_fast(self, *args, **kwds):
        if self.L2 is None:
            f = self.tkde.eval_grid_fast(*args, **kwds)
            self.args = self.tkde.args
            return f
        targs = self._get_targs(args)
        tf = self.tkde.eval_grid_fast(*targs)

        self.args = self._gaus2dat(list(self.tkde.args))
        points = meshgrid(*self.args)
        f = self._scale_pdf(tf, points)
        if len(args):
            return self._interpolate(points, f, *args, **kwds)
        return f

    def _eval_grid(self, *args, **kwds):
        if self.L2 is None:
            return self.tkde.eval_grid(*args, **kwds)
        targs = self._dat2gaus(list(args))
        tf = self.tkde.eval_grid(*targs, **kwds)
        points = meshgrid(*args)
        f = self._scale_pdf(tf, points)
        return f

    def _eval_points(self, points):
        """Evaluate the estimated pdf on a set of points.

        Parameters
        ----------
        points : (# of dimensions, # of points)-array
            Alternatively, a (# of dimensions,) vector can be passed in and
            treated as a single point.

        Returns
        -------
        values : (# of points,)-array
            The values at each point.

        Raises
        ------
        ValueError if the dimensionality of the input points is different than
        the dimensionality of the KDE.

        """
        if self.L2 is None:
            return self.tkde.eval_points(points)

        tpoints = self._dat2gaus(points)
        tf = self.tkde.eval_points(tpoints)
        f = self._scale_pdf(tf, points)
        return f


class KDE(_KDE):

    """ Kernel-Density Estimator.

    Parameters
    ----------
    data : (# of dims, # of data)-array
        datapoints to estimate from
    hs : array-like (optional)
        smooting parameter vector/matrix.
        (default compute from data using kernel.get_smoothing function)
    kernel :  kernel function object.
        kernel must have get_smoothing method
    alpha : real scalar (optional)
        sensitivity parameter               (default 0 regular KDE)
        A good choice might be alpha = 0.5 ( or 1/D)
        alpha = 0      Regular  KDE (hs is constant)
        0 < alpha <= 1 Adaptive KDE (Make hs change)
    xmin, xmax  : vectors
        specifying the default argument range for the kde.eval_grid methods.
        For the kde.eval_grid_fast methods the values must cover the range of
        the data.
        (default min(data)-range(data)/4, max(data)-range(data)/4)
        If a single value of xmin or xmax is given then the boundary is the is
        the same for all dimensions.
    inc :  scalar integer (default 512)
        defining the default dimension of the output from kde.eval_grid methods
        (For kde.eval_grid_fast: A value below 50 is very fast to compute but
        may give some inaccuracies. Values between 100 and 500 give very
        accurate results)

    Members
    -------
    d : int
        number of dimensions
    n : int
        number of datapoints

    Methods
    -------
    kde.eval_grid_fast(x0, x1,..., xd) : array
        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
    kde.eval_grid(x0, x1,..., xd) : array
        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
    kde.eval_points(points) : array
        evaluate the estimated pdf on a provided set of points
    kde(x0, x1,..., xd) : array
        same as kde.eval_grid(x0, x1,..., xd)

    Example
    -------
    N = 20
    data = np.random.rayleigh(1, size=(N,))
    >>> data = np.array([
    ...        0.75355792,  0.72779194,  0.94149169,  0.07841119,  2.32291887,
    ...        1.10419995,  0.77055114,  0.60288273,  1.36883635,  1.74754326,
    ...        1.09547561,  1.01671133,  0.73211143,  0.61891719,  0.75903487,
    ...        1.8919469 ,  0.72433808,  1.92973094,  0.44749838,  1.36508452])

    >>> x = np.linspace(0, max(data.ravel()) + 1, 10)
    >>> import wafo.kdetools as wk
    >>> kde = wk.KDE(data, hs=0.5, alpha=0.5)
    >>> f = kde(x)
    >>> f
    array([ 0.17252055,  0.41014271,  0.61349072,  0.57023834,  0.37198073,
            0.21409279,  0.12738463,  0.07460326,  0.03956191,  0.01887164])

    >>> kde.eval_grid(x)
    array([ 0.17252055,  0.41014271,  0.61349072,  0.57023834,  0.37198073,
            0.21409279,  0.12738463,  0.07460326,  0.03956191,  0.01887164])
    >>> kde.eval_grid_fast(x)
    array([ 0.21720891,  0.43308789,  0.59017626,  0.55847998,  0.39681482,
            0.23987473,  0.13113066,  0.06062029,  0.02160104,  0.00559028])

    >>> kde0 = wk.KDE(data, hs=0.5, alpha=0.0)
    >>> kde0.eval_points(x)
    array([ 0.2039735 ,  0.40252503,  0.54595078,  0.52219649,  0.3906213 ,
            0.26381501,  0.16407362,  0.08270612,  0.02991145,  0.00720821])

    >>> kde0.eval_grid(x)
    array([ 0.2039735 ,  0.40252503,  0.54595078,  0.52219649,  0.3906213 ,
            0.26381501,  0.16407362,  0.08270612,  0.02991145,  0.00720821])
    >>> f = kde0.eval_grid(x, output='plotobj')
    >>> f.data
    array([ 0.2039735 ,  0.40252503,  0.54595078,  0.52219649,  0.3906213 ,
            0.26381501,  0.16407362,  0.08270612,  0.02991145,  0.00720821])

    >>> f = kde0.eval_grid_fast()
    >>> np.allclose(np.interp(x, kde0.args[0], f),
    ...    [ 0.20398034,  0.40252166,  0.54593292,  0.52218993,  0.39062245,
    ...     0.26381651,  0.16407487,  0.08270847,  0.02991439,  0.00882095])
    True
    >>> f1 = kde0.eval_grid_fast(output='plot')
    >>> np.allclose(np.interp(x, f1.args, f1.data),
    ...   [ 0.20398034,  0.40252166,  0.54593292,  0.52218993,  0.39062245,
    ...     0.26381651,  0.16407487,  0.08270847,  0.02991439,  0.00882095])
    True

    h = f1.plot()
    import pylab as plb
    h1 = plb.plot(x, f) #  1D probability density plot
    t = np.trapz(f, x)
    """
    @staticmethod
    def _make_flat_grid(dx, d, inc):
        Xn = []
        x0 = np.linspace(-inc, inc, 2 * inc + 1)
        for i in range(d):
            Xn.append(x0[:-1] * dx[i])

        Xnc = meshgrid(*Xn)

        for i in range(d):
            Xnc[i].shape = (-1,)
        return np.vstack(Xnc)

    def _kernel_weights(self, Xn, dx, d, inc):
        kw = self.kernel(Xn)
        norm_fact0 = (kw.sum() * dx.prod() * self.n)
        norm_fact = (self._norm_factor * self.kernel.norm_factor(d, self.n))
        if np.abs(norm_fact0 - norm_fact) > 0.05 * norm_fact:
            warnings.warn(
                'Numerical inaccuracy due to too low discretization. ' +
                'Increase the discretization of the evaluation grid ' +
                '(inc={})!'.format(inc))
            norm_fact = norm_fact0

        kw = kw / norm_fact
        return kw

    def _eval_grid_fast(self, *args, **kwds):
        X = np.vstack(args)
        d, inc = X.shape
        dx = X[:, 1] - X[:, 0]

        Xnc = self._make_flat_grid(dx, d, inc)

        Xn = np.dot(self._inv_hs, Xnc)
        kw = self._kernel_weights(Xn, dx, d, inc)

        r = kwds.get('r', 0)
        if r != 0:
            fun = self._moment_fun(r)
            kw *= fun(np.vstack(Xnc))
        kw.shape = (2 * inc, ) * d
        kw = np.fft.ifftshift(kw)

        y = kwds.get('y', 1.0)
        if self.alpha > 0:
            y = y / self._lambda**d

        # Find the binned kernel weights, c.
        c = gridcount(self.dataset, X, y=y)
        # Perform the convolution.
        z = np.real(ifftn(fftn(c, s=kw.shape) * fftn(kw)))

        ix = (slice(0, inc),) * d
        if r == 0:
            return z[ix] * (z[ix] > 0.0)
        return z[ix]

    def _eval_grid(self, *args, **kwds):

        grd = meshgrid(*args)
        shape0 = grd[0].shape
        d = len(grd)
        for i in range(d):
            grd[i] = grd[i].ravel()
        f = self.eval_points(np.vstack(grd), **kwds)
        return f.reshape(shape0)

    def _moment_fun(self, r):
        if r == 0:
            return lambda x: 1
        return lambda x: (x ** r).sum(axis=0)

    @property
    def norm_factor(self):
        return self._norm_factor * self.kernel.norm_factor(self.d, self.n)

    def _loop_over_data(self, data, points, y, r):
        fun = self._moment_fun(r)
        d, m = points.shape
        inv_hs, lambda_ = self._inv_hs, self._lambda
        kernel = self.kernel

        y_d_lambda = y / lambda_ ** d
        result = np.zeros((m,))
        for i in range(self.n):
            dxi = points - data[:, i, np.newaxis]
            tdiff = np.dot(inv_hs / lambda_[i], dxi)
            result += fun(dxi) * kernel(tdiff) * y_d_lambda[i]
        return result / self.norm_factor

    def _loop_over_points(self, data, points, y, r):
        fun = self._moment_fun(r)
        d, m = points.shape
        inv_hs, lambda_ = self._inv_hs, self._lambda
        kernel = self.kernel

        y_d_lambda = y / lambda_ ** d
        result = np.zeros((m,))
        for i in range(m):
            dxi = points[:, i, np.newaxis] - data
            tdiff = np.dot(inv_hs, dxi / lambda_[np.newaxis, :])
            result[i] = np.sum(fun(dxi) * kernel(tdiff) * y_d_lambda, axis=-1)
        return result / self.norm_factor

    def _eval_points(self, points, **kwds):
        """Evaluate the estimated pdf on a set of points.

        Parameters
        ----------
        points : (# of dimensions, # of points)-array
            Alternatively, a (# of dimensions,) vector can be passed in and
            treated as a single point.

        Returns
        -------
        values : (# of points,)-array
            The values at each point.

        Raises
        ------
        ValueError if the dimensionality of the input points is different than
        the dimensionality of the KDE.

        """
        d, m = points.shape
        _assert(d == self.d, "d={} expected, got {}".format(self.d, d))

        y = kwds.get('y', 1)
        r = kwds.get('r', 0)

        more_points_than_data = m >= self.n
        if more_points_than_data:
            return self._loop_over_data(self.dataset, points, y, r)
        return self._loop_over_points(self.dataset, points, y, r)


class KRegression(object):   # _KDE):

    """ Kernel-Regression

    Parameters
    ----------
    data : (# of dims, # of data)-array
        datapoints to estimate from
    y : # of data - array
        response variable
    p : scalar integer (0 or 1)
        Nadaraya-Watson estimator if p=0,
        local linear estimator if p=1.
    hs : array-like (optional)
        smooting parameter vector/matrix.
        (default compute from data using kernel.get_smoothing function)
    kernel :  kernel function object.
        kernel must have get_smoothing method
    alpha : real scalar (optional)
        sensitivity parameter               (default 0 regular KDE)
        A good choice might be alpha = 0.5 ( or 1/D)
        alpha = 0      Regular  KDE (hs is constant)
        0 < alpha <= 1 Adaptive KDE (Make hs change)
    xmin, xmax  : vectors
        specifying the default argument range for the kde.eval_grid methods.
        For the kde.eval_grid_fast methods the values must cover the range of
        the data. (default min(data)-range(data)/4, max(data)-range(data)/4)
        If a single value of xmin or xmax is given then the boundary is the is
        the same for all dimensions.
    inc :  scalar integer   (default 128)
        defining the default dimension of the output from kde.eval_grid methods
        (For kde.eval_grid_fast: A value below 50 is very fast to compute but
        may give some inaccuracies. Values between 100 and 500 give very
        accurate results)

    Members
    -------
    d : int
        number of dimensions
    n : int
        number of datapoints

    Methods
    -------
    kde.eval_grid_fast(x0, x1,..., xd) : array
        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
    kde.eval_grid(x0, x1,..., xd) : array
        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
    kde.eval_points(points) : array
        evaluate the estimated pdf on a provided set of points
    kde(x0, x1,..., xd) : array
        same as kde.eval_grid(x0, x1,..., xd)


    Example
    -------
    >>> import wafo.kdetools as wk
    >>> N = 100
    >>> x = np.linspace(0, 1, N)
    >>> ei = np.random.normal(loc=0, scale=0.075, size=(N,))
    >>> ei = np.sqrt(0.075) * np.sin(100*x)

    >>> y = 2*np.exp(-x**2/(2*0.3**2))+3*np.exp(-(x-1)**2/(2*0.7**2)) + ei
    >>> kreg = wk.KRegression(x, y)
    >>> f = kreg(output='plotobj', title='Kernel regression', plotflag=1)
    >>> np.allclose(f.data[:5],
    ...     [ 3.18670593,  3.18678088,  3.18682196,  3.18682932,  3.18680337])
    True

    h = f.plot(label='p=0')
    """

    def __init__(self, data, y, p=0, hs=None, kernel=None, alpha=0.0,
                 xmin=None, xmax=None, inc=128, L2=None):

        self.tkde = TKDE(data, hs=hs, kernel=kernel,
                         alpha=alpha, xmin=xmin, xmax=xmax, inc=inc, L2=L2)
        self.y = y
        self.p = p

    def eval_grid_fast(self, *args, **kwds):
        self._grdfun = self.tkde.eval_grid_fast
        return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds)

    def eval_grid(self, *args, **kwds):
        self._grdfun = self.tkde.eval_grid
        return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds)

    def _eval_gridfun(self, *args, **kwds):
        grdfun = self._grdfun
        s0 = grdfun(*args, r=0)
        t0 = grdfun(*args, r=0, y=self.y)
        if self.p == 0:
            return (t0 / (s0 + _TINY)).clip(min=-_REALMAX, max=_REALMAX)
        elif self.p == 1:
            s1 = grdfun(*args, r=1)
            s2 = grdfun(*args, r=2)
            t1 = grdfun(*args, r=1, y=self.y)
            return ((s2 * t0 - s1 * t1) /
                    (s2 * s0 - s1 ** 2)).clip(min=-_REALMAX, max=_REALMAX)
    __call__ = eval_grid_fast


class BKRegression(object):

    '''
    Kernel-Regression on binomial data

    method : {'beta', 'wilson'}
        method is one of the following
        'beta', return Bayesian Credible interval using beta-distribution.
        'wilson', return Wilson score interval
    a, b : scalars
        parameters of the beta distribution defining the apriori distribution
        of p, i.e., the Bayes estimator for p: p = (y+a)/(n+a+b).
        Setting a=b=0.5 gives Jeffreys interval.
    '''

    def __init__(self, *args, **kwds):
        self.method = kwds.pop('method', 'beta')
        self.a = max(kwds.pop('a', 0.5), _TINY)
        self.b = max(kwds.pop('b', 0.5), _TINY)
        self.kreg = KRegression(*args, **kwds)
        # defines bin width (i.e. smoothing) in empirical estimate
        self.hs_e = None
#        self.x = self.kreg.tkde.dataset
#        self.y = self.kreg.y

    def _set_smoothing(self, hs):
        self.kreg.tkde.hs = hs
        self.kreg.tkde.initialize()

    x = property(fget=lambda cls: cls.kreg.tkde.dataset.squeeze())
    y = property(fget=lambda cls: cls.kreg.y)
    kernel = property(fget=lambda cls: cls.kreg.tkde.kernel)
    hs = property(fset=_set_smoothing, fget=lambda cls: cls.kreg.tkde.hs)

    def _get_max_smoothing(self, fun=None):
        """Return maximum value for smoothing parameter."""
        x = self.x
        y = self.y
        if fun is None:
            get_smoothing = self.kernel.get_smoothing
        else:
            get_smoothing = getattr(self.kernel, fun)

        hs1 = get_smoothing(x)
        # hx = np.median(np.abs(x-np.median(x)))/0.6745*(4.0/(3*n))**0.2
        if (y == 1).any():
            hs2 = get_smoothing(x[y == 1])
            # hy = np.median(np.abs(y-np.mean(y)))/0.6745*(4.0/(3*n))**0.2
        else:
            hs2 = 4 * hs1
            # hy = 4*hx

        hopt = sqrt(hs1 * hs2)
        return hopt, hs1, hs2

    def get_grid(self, hs_e=None):
        if hs_e is None:
            if self.hs_e is None:
                hs1 = self._get_max_smoothing('hste')[0]
                hs2 = self._get_max_smoothing('hos')[0]
                self.hs_e = sqrt(hs1 * hs2)
            hs_e = self.hs_e
        x = self.x
        xmin, xmax = x.min(), x.max()
        ni = max(2 * int((xmax - xmin) / hs_e) + 3, 5)
        sml = hs_e  # *0.1
        xi = np.linspace(xmin - sml, xmax + sml, ni)
        return xi

    def prb_ci(self, n, p, alpha=0.05, **kwds):
        """Return Confidence Interval for the binomial probability p.

        Parameters
        ----------
        n : array-like
            number of Bernoulli trials
        p : array-like
            estimated probability of success in each trial
        alpha : scalar
            confidence level
        method : {'beta', 'wilson'}
            method is one of the following
            'beta', return Bayesian Credible interval using beta-distribution.
            'wilson', return Wilson score interval
        a, b : scalars
            parameters of the beta distribution defining the apriori
            distribution of p, i.e.,
            the Bayes estimator for p: p = (y+a)/(n+a+b).
            Setting a=b=0.5 gives Jeffreys interval.

        """
        if self.method.startswith('w'):
            # Wilson score
            z0 = -_invnorm(alpha / 2)
            den = 1 + (z0 ** 2. / n)
            xc = (p + (z0 ** 2) / (2 * n)) / den
            halfwidth = (z0 * sqrt((p * (1 - p) / n) +
                                   (z0 ** 2 / (4 * (n ** 2))))) / den
            plo = (xc - halfwidth).clip(min=0)  # wilson score
            pup = (xc + halfwidth).clip(max=1.0)  # wilson score
        else:
            # Jeffreys intervall a=b=0.5
            # st.beta.isf(alpha/2, y+a, n-y+b) y = n*p, n-y = n*(1-p)
            a = self.a
            b = self.b
            st = scipy.stats
            pup = np.where(p == 1, 1,
                           st.beta.isf(alpha / 2, n * p + a, n * (1 - p) + b))
            plo = np.where(p == 0, 0,
                           st.beta.isf(1 - alpha / 2,
                                       n * p + a, n * (1 - p) + b))
        return plo, pup

    def prb_empirical(self, xi=None, hs_e=None, alpha=0.05, color='r', **kwds):
        """Returns empirical binomial probabiltity.

        Parameters
        ----------
        x : ndarray
            position vector
        y : ndarray
            binomial response variable (zeros and ones)
        alpha : scalar
            confidence level
        color:
            used in plot

        Returns
        -------
        P(x) : PlotData object
            empirical probability

        """
        if xi is None:
            xi = self.get_grid(hs_e)

        x = self.x
        y = self.y

        c = gridcount(x, xi)  # + self.a + self.b # count data
        if (y == 1).any():
            c0 = gridcount(x[y == 1], xi)  # + self.a # count success
        else:
            c0 = np.zeros(np.shape(xi))
        prb = np.where(c == 0, 0, c0 / (c + _TINY))  # assume prb==0 for c==0
        CI = np.vstack(self.prb_ci(c, prb, alpha, **kwds))

        prb_e = PlotData(prb, xi, plotmethod='plot', plot_args=['.'],
                         plot_kwds=dict(markersize=6, color=color, picker=5))
        prb_e.dataCI = CI.T
        prb_e.count = c
        return prb_e

    def prb_smoothed(self, prb_e, hs, alpha=0.05, color='r', label=''):
        """Return smoothed binomial probability.

        Parameters
        ----------
        prb_e : PlotData object with empirical binomial probabilites
        hs : smoothing parameter
        alpha : confidence level
        color : color of plot object
        label : label for plot object

        """

        x_e = prb_e.args
        n_e = len(x_e)
        dx_e = x_e[1] - x_e[0]
        n = self.x.size

        x_s = np.linspace(x_e[0], x_e[-1], 10 * n_e + 1)
        self.hs = hs

        prb_s = self.kreg(x_s, output='plotobj', title='', plot_kwds=dict(
            color=color, linewidth=2))  # dict(plotflag=7))
        m_nan = np.isnan(prb_s.data)
        if m_nan.any():  # assume 0/0 division
            prb_s.data[m_nan] = 0.0

        # prb_s.data[np.isnan(prb_s.data)] = 0
        # expected number of data in each bin
        c_s = self.kreg.tkde.eval_grid_fast(x_s) * dx_e * n
        plo, pup = self.prb_ci(c_s, prb_s.data, alpha)

        prb_s.dataCI = np.vstack((plo, pup)).T
        prb_s.prediction_error_avg = np.trapz(
            pup - plo, x_s) / (x_s[-1] - x_s[0])

        if label:
            prb_s.plot_kwds['label'] = label
        prb_s.children = [PlotData([plo, pup], x_s,
                                   plotmethod='fill_between',
                                   plot_kwds=dict(alpha=0.2, color=color)),
                          prb_e]

        p_e = prb_e.eval_points(x_s)
        p_s = prb_s.data
        dp_s = np.sign(np.diff(p_s))
        k = (dp_s[:-1] != dp_s[1:]).sum()  # numpeaks

        sigmai = _logit(pup) - _logit(plo) + _EPS
        aicc = ((((_logit(p_e) - _logit(p_s)) / sigmai) ** 2).sum() +
                2 * k * (k + 1) / np.maximum(n_e - k + 1, 1) +
                np.abs((p_e - pup).clip(min=0) -
                       (p_e - plo).clip(max=0)).sum())

        prb_s.aicc = aicc
        return prb_s

    def prb_search_best(self, prb_e=None, hsvec=None, hsfun='hste',
                        alpha=0.05, color='r', label=''):
        """Return best smoothed binomial probability.

        Parameters
        ----------
        prb_e : PlotData object with empirical binomial probabilites
        hsvec : arraylike  (default np.linspace(hsmax*0.1,hsmax,55))
            vector smoothing parameters
        hsfun :
            method for calculating hsmax

        """
        if prb_e is None:
            prb_e = self.prb_empirical(
                hs_e=self.hs_e, alpha=alpha, color=color)
        if hsvec is None:
            hsmax = self._get_max_smoothing(hsfun)[0]
            hsmax = max(hsmax, self.hs_e)
            hsvec = np.linspace(hsmax * 0.2, hsmax, 55)

        hs_best = hsvec[-1] + 0.1
        prb_best = self.prb_smoothed(prb_e, hs_best, alpha, color, label)
        aicc = np.zeros(np.size(hsvec))
        for i, hi in enumerate(hsvec):
            f = self.prb_smoothed(prb_e, hi, alpha, color, label)
            aicc[i] = f.aicc
            if f.aicc <= prb_best.aicc:
                prb_best = f
                hs_best = hi
        prb_best.score = PlotData(aicc, hsvec)
        prb_best.hs = hs_best
        self._set_smoothing(hs_best)
        return prb_best


def kde_demo1():
    """KDEDEMO1 Demonstrate the smoothing parameter impact on KDE.

    KDEDEMO1 shows the true density (dotted) compared to KDE based on 7
    observations (solid) and their individual kernels (dashed) for 3
    different values of the smoothing parameter, hs.

    """
    st = scipy.stats
    x = np.linspace(-4, 4, 101)
    x0 = x / 2.0
    data = np.random.normal(loc=0, scale=1.0, size=7)
    kernel = Kernel('gauss')
    hs = kernel.hns(data)
    hVec = [hs / 2, hs, 2 * hs]

    for ix, h in enumerate(hVec):
        plt.figure(ix)
        kde = KDE(data, hs=h, kernel=kernel)
        f2 = kde(x, output='plot', title='h_s = {0:2.2f}'.format(h),
                 ylab='Density')
        f2.plot('k-')

        plt.plot(x, st.norm.pdf(x, 0, 1), 'k:')
        n = len(data)
        plt.plot(data, np.zeros(data.shape), 'bx')
        y = kernel(x0) / (n * h * kernel.norm_factor(d=1, n=n))
        for i in range(n):
            plt.plot(data[i] + x0 * h, y, 'b--')
            plt.plot([data[i], data[i]], [0, np.max(y)], 'b')

        plt.axis([min(x), max(x), 0, 0.5])


def kde_demo2():
    '''Demonstrate the difference between transformation- and ordinary-KDE.

    KDEDEMO2 shows that the transformation KDE is a better estimate for
    Rayleigh distributed data around 0 than the ordinary KDE.
    '''
    st = scipy.stats
    data = st.rayleigh.rvs(scale=1, size=300)

    x = np.linspace(1.5e-2, 5, 55)

    kde = KDE(data)
    f = kde(output='plot', title='Ordinary KDE (hs={0:})'.format(kde.hs))
    plt.figure(0)
    f.plot()

    plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')

    # plotnorm((data).^(L2))  # gives a straight line => L2 = 0.5 reasonable
    hs = Kernel('gauss').get_smoothing(data**0.5)
    tkde = TKDE(data, hs=hs, L2=0.5)
    ft = tkde(x, output='plot',
              title='Transformation KDE (hs={0:})'.format(tkde.tkde.hs))
    plt.figure(1)
    ft.plot()

    plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')

    plt.figure(0)


def kde_demo3():
    '''Demonstrate the difference between transformation and ordinary-KDE in 2D

    KDEDEMO3 shows that the transformation KDE is a better estimate for
    Rayleigh distributed data around 0 than the ordinary KDE.
    '''
    st = scipy.stats
    data = st.rayleigh.rvs(scale=1, size=(2, 300))

    # x = np.linspace(1.5e-3, 5, 55)

    kde = KDE(data)
    f = kde(output='plot', title='Ordinary KDE', plotflag=1)
    plt.figure(0)
    f.plot()

    plt.plot(data[0], data[1], '.')

    # plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable

    tkde = TKDE(data, L2=0.5)
    ft = tkde.eval_grid_fast(
        output='plot', title='Transformation KDE', plotflag=1)

    plt.figure(1)
    ft.plot()

    plt.plot(data[0], data[1], '.')

    plt.figure(0)


def kde_demo4(N=50):
    '''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
       for 1D multimodal distributions

    KDEDEMO4 shows that the improved Sheather-Jones plug-in smoothing is a
    better compared to normal reference rules (in this case the hns)
    '''
    st = scipy.stats

    data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(N,)),
                      st.norm.rvs(loc=-5, scale=1, size=(N,))))

    # x = np.linspace(1.5e-3, 5, 55)

    kde = KDE(data, kernel=Kernel('gauss', 'hns'))
    f = kde(output='plot', title='Ordinary KDE', plotflag=1)

    kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
    f1 = kde1(output='plot', label='Ordinary KDE', plotflag=1)

    plt.figure(0)
    f.plot('r', label='hns={0:g}'.format(kde.hs))
    # plt.figure(2)
    f1.plot('b', label='hisj={0:g}'.format(kde1.hs))
    x = np.linspace(-4, 4)
    for loc in [-5, 5]:
        plt.plot(x + loc, st.norm.pdf(x, 0, scale=1) / 2, 'k:',
                 label='True density')
    plt.legend()


def kde_demo5(N=500):
    '''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
       for 2D multimodal distributions

    KDEDEMO5 shows that the improved Sheather-Jones plug-in smoothing is better
    compared to normal reference rules (in this case the hns)
    '''
    st = scipy.stats

    data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(2, N,)),
                      st.norm.rvs(loc=-5, scale=1, size=(2, N,))))
    kde = KDE(data, kernel=Kernel('gauss', 'hns'))
    f = kde(output='plot', plotflag=1,
            title='Ordinary KDE (hns={0:s}'.format(str(kde.hs.tolist())))

    kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
    f1 = kde1(output='plot', plotflag=1,
              title='Ordinary KDE (hisj={0:s})'.format(str(kde1.hs.tolist())))

    plt.figure(0)
    plt.clf()
    f.plot()
    plt.plot(data[0], data[1], '.')
    plt.figure(1)
    plt.clf()
    f1.plot()
    plt.plot(data[0], data[1], '.')


def kreg_demo1(hs=None, fast=False, fun='hisj'):
    """"""
    N = 100
    # ei = np.random.normal(loc=0, scale=0.075, size=(N,))
    ei = np.array([
        -0.08508516, 0.10462496, 0.07694448, -0.03080661, 0.05777525,
        0.06096313, -0.16572389, 0.01838912, -0.06251845, -0.09186784,
        -0.04304887, -0.13365788, -0.0185279, -0.07289167, 0.02319097,
        0.06887854, -0.08938374, -0.15181813, 0.03307712, 0.08523183,
        -0.0378058, -0.06312874, 0.01485772, 0.06307944, -0.0632959,
        0.18963205, 0.0369126, -0.01485447, 0.04037722, 0.0085057,
        -0.06912903, 0.02073998, 0.1174351, 0.17599277, -0.06842139,
        0.12587608, 0.07698113, -0.0032394, -0.12045792, -0.03132877,
        0.05047314, 0.02013453, 0.04080741, 0.00158392, 0.10237899,
        -0.09069682, 0.09242174, -0.15445323, 0.09190278, 0.07138498,
        0.03002497, 0.02495252, 0.01286942, 0.06449978, 0.03031802,
        0.11754861, -0.02322272, 0.00455867, -0.02132251, 0.09119446,
        -0.03210086, -0.06509545, 0.07306443, 0.04330647, 0.078111,
        -0.04146907, 0.05705476, 0.02492201, -0.03200572, -0.02859788,
        -0.05893749, 0.00089538, 0.0432551, 0.04001474, 0.04888828,
        -0.17708392, 0.16478644, 0.1171006, 0.11664846, 0.01410477,
        -0.12458953, -0.11692081, 0.0413047, -0.09292439, -0.07042327,
        0.14119701, -0.05114335, 0.04994696, -0.09520663, 0.04829406,
        -0.01603065, -0.1933216, 0.19352763, 0.11819496, 0.04567619,
        -0.08348306, 0.00812816, -0.00908206, 0.14528945, 0.02901065])
    x = np.linspace(0, 1, N)

    y0 = 2 * np.exp(-x ** 2 / (2 * 0.3 ** 2)) + \
        3 * np.exp(-(x - 1) ** 2 / (2 * 0.7 ** 2))
    y = y0 + ei
    kernel = Kernel('gauss', fun=fun)
    hopt = kernel.hisj(x)
    kreg = KRegression(
        x, y, p=0, hs=hs, kernel=kernel, xmin=-2 * hopt, xmax=1 + 2 * hopt)
    if fast:
        kreg.__call__ = kreg.eval_grid_fast

    f = kreg(output='plot', title='Kernel regression', plotflag=1)
    plt.figure(0)
    f.plot(label='p=0')

    kreg.p = 1
    f1 = kreg(output='plot', title='Kernel regression', plotflag=1)
    f1.plot(label='p=1')
    # print(f1.data)
    plt.plot(x, y, '.', label='data')
    plt.plot(x, y0, 'k', label='True model')
    plt.legend()

    plt.show()

    print(kreg.tkde.tkde._inv_hs)
    print(kreg.tkde.tkde.hs)

_TINY = np.finfo(float).machar.tiny
_REALMIN = np.finfo(float).machar.xmin
_REALMAX = np.finfo(float).machar.xmax
_EPS = np.finfo(float).eps


def _logit(p):
    pc = p.clip(min=0, max=1)
    return (np.log(pc) - np.log1p(-pc)).clip(min=-40, max=40)


def _logitinv(x):
    return 1.0 / (np.exp(-x) + 1)


def _get_data(n=100, symmetric=False, loc1=1.1, scale1=0.6, scale2=1.0):
    st = scipy.stats
    dist = st.norm

    norm1 = scale2 * (dist.pdf(-loc1, loc=-loc1, scale=scale1) +
                      dist.pdf(-loc1, loc=loc1, scale=scale1))

    def fun1(x):
        return ((dist.pdf(x, loc=-loc1, scale=scale1) +
                 dist.pdf(x, loc=loc1, scale=scale1)) / norm1).clip(max=1.0)

    x = np.sort(6 * np.random.rand(n, 1) - 3, axis=0)

    y = (fun1(x) > np.random.rand(n, 1)).ravel()
    # y = (np.cos(x)>2*np.random.rand(n, 1)-1).ravel()
    x = x.ravel()

    if symmetric:
        xi = np.hstack((x.ravel(), -x.ravel()))
        yi = np.hstack((y, y))
        i = np.argsort(xi)
        x = xi[i]
        y = yi[i]
    return x, y, fun1


def kreg_demo2(n=100, hs=None, symmetric=False, fun='hisj', plotlog=False):
    x, y, fun1 = _get_data(n, symmetric)
    kreg_demo3(x, y, fun1, hs=None, fun='hisj', plotlog=False)


def kreg_demo3(x, y, fun1, hs=None, fun='hisj', plotlog=False):
    st = scipy.stats

    alpha = 0.1
    z0 = -_invnorm(alpha / 2)

    n = x.size
    hopt, hs1, hs2 = _get_regression_smooting(x, y, fun='hos')
    if hs is None:
        hs = hopt

    forward = _logit
    reverse = _logitinv
    # forward = np.log
    # reverse = np.exp

    xmin, xmax = x.min(), x.max()
    ni = max(2 * int((xmax - xmin) / hopt) + 3, 5)
    print(ni)
    print(xmin, xmax)
    sml = hopt * 0.1
    xi = np.linspace(xmin - sml, xmax + sml, ni)
    xiii = np.linspace(xmin - sml, xmax + sml, 4 * ni + 1)

    c = gridcount(x, xi)
    if (y == 1).any():
        c0 = gridcount(x[y == 1], xi)
    else:
        c0 = np.zeros(np.shape(xi))
    yi = np.where(c == 0, 0, c0 / c)

    kreg = KRegression(x, y, hs=hs, p=0)
    fiii = kreg(xiii)
    yiii = interpolate.interp1d(xi, yi)(xiii)
    fit = fun1(xiii).clip(max=1.0)
    df = np.diff(fiii)
    eerr = np.abs((yiii - fiii)).std() + 0.5 * (df[:-1] * df[1:] < 0).sum() / n
    err = (fiii - fit).std()
    msg = '{0} err={1:1.3f},eerr={2:1.3f}, n={3:d}, hs={4:}, hs1={5:}, '\
        'hs2={6:}'
    title = (msg.format(fun, err, eerr, n, hs, hs1, hs2))
    f = kreg(xiii, output='plotobj', title=title, plotflag=1)

    # yi[yi==0] = 1.0/(c[c!=0].min()+4)
    # yi[yi==1] = 1-1.0/(c[c!=0].min()+4)
    # yi[yi==0] = fi[yi==0]
    # yi[yi==0] = np.exp(stineman_interp(xi[yi==0], xi[yi>0],np.log(yi[yi>0])))
    # yi[yi==0] = fun1(xi[yi==0])
    try:
        yi[yi == 0] = yi[yi > 0].min() / sqrt(n)
    except:
        yi[yi == 0] = 1. / n
    yi[yi == 1] = 1 - (1 - yi[yi < 1].max()) / sqrt(n)

    logity = forward(yi)

    gkreg = KRegression(xi, logity, hs=hs, xmin=xmin - hopt, xmax=xmax + hopt)
    fg = gkreg.eval_grid(
        xi, output='plotobj', title='Kernel regression', plotflag=1)
    sa = (fg.data - logity).std()
    sa2 = iqrange(fg.data - logity) / 1.349
    # print('sa=%g %g' % (sa, sa2))
    sa = min(sa, sa2)

#    plt.figure(1)
#    plt.plot(xi, slogity-logity,'r.')
# plt.plot(xi, logity-,'b.')
#    plt.plot(xi, fg.data-logity, 'b.')
#    plt.show()
#    return

    fg = gkreg.eval_grid(
        xiii, output='plotobj', title='Kernel regression', plotflag=1)
    pi = reverse(fg.data)

    dx = xi[1] - xi[0]
    ckreg = KDE(x, hs=hs)
    # ci = ckreg.eval_grid_fast(xi)*n*dx
    ciii = ckreg.eval_grid_fast(xiii) * dx * x.size  # n*(1+symmetric)

#    sa1 = np.sqrt(1./(ciii*pi*(1-pi)))
#    plo3 = reverse(fg.data-z0*sa)
#    pup3 = reverse(fg.data+z0*sa)
    fg.data = pi
    pi = f.data

    # ref Casella and Berger (1990) "Statistical inference" pp444
#    a = 2*pi + z0**2/(ciii+1e-16)
#    b = 2*(1+z0**2/(ciii+1e-16))
#    plo2 = ((a-sqrt(a**2-2*pi**2*b))/b).clip(min=0,max=1)
#    pup2 = ((a+sqrt(a**2-2*pi**2*b))/b).clip(min=0,max=1)
    # Jeffreys intervall a=b=0.5
    # st.beta.isf(alpha/2, x+a, n-x+b)
    ab = 0.07  # 0.055
    pi1 = pi  # fun1(xiii)
    pup2 = np.where(pi == 1,
                    1,
                    st.beta.isf(alpha / 2,
                                ciii * pi1 + ab,
                                ciii * (1 - pi1) + ab))
    plo2 = np.where(pi == 0,
                    0,
                    st.beta.isf(1 - alpha / 2,
                                ciii * pi1 + ab,
                                ciii * (1 - pi1) + ab))

    averr = np.trapz(pup2 - plo2, xiii) / \
        (xiii[-1] - xiii[0]) + 0.5 * (df[:-1] * df[1:] < 0).sum()

    # f2 = kreg_demo4(x, y, hs, hopt)
    # Wilson score
    den = 1 + (z0 ** 2. / ciii)
    xc = (pi1 + (z0 ** 2) / (2 * ciii)) / den
    halfwidth = (z0 * sqrt((pi1 * (1 - pi1) / ciii) +
                           (z0 ** 2 / (4 * (ciii ** 2))))) / den
    plo = (xc - halfwidth).clip(min=0)  # wilson score
    pup = (xc + halfwidth).clip(max=1.0)  # wilson score
    # pup = (pi + z0*np.sqrt(pi*(1-pi)/ciii)).clip(min=0,max=1) # dont use
    # plo = (pi - z0*np.sqrt(pi*(1-pi)/ciii)).clip(min=0,max=1)

    # mi = kreg.eval_grid(x)
    # sigma = (stineman_interp(x, xiii, pup)-stineman_interp(x, xiii, plo))/4
    # aic = np.abs((y-mi)/sigma).std()+ 0.5*(df[:-1]*df[1:]<0).sum()/n
    # aic = np.abs((yiii-fiii)/(pup-plo)).std() + \
    #                0.5*(df[:-1]*df[1:]<0).sum() + \
    #            ((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum()

    k = (df[:-1] * df[1:] < 0).sum()  # numpeaks
    sigmai = (pup - plo)
    aic = (((yiii - fiii) / sigmai) ** 2).sum() + \
        2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \
        np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum()

    # aic = (((yiii-fiii)/sigmai)**2).sum()+ 2*k*(k+1)/(ni-k+1) + \
    #        np.abs((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum()

    # aic = averr + ((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum()

    fg.plot(label='KReg grid aic={:2.3f}'.format(aic))
    f.plot(label='KReg averr={:2.3f} '.format(averr))
    labtxt = '%d CI' % (int(100 * (1 - alpha)))
    plt.fill_between(xiii, pup, plo, alpha=0.20,
                     color='r', linestyle='--', label=labtxt)
    plt.fill_between(xiii, pup2, plo2, alpha=0.20, color='b', linestyle=':',
                     label='{:d} CI2'.format(int(100 * (1 - alpha))))
    plt.plot(xiii, fun1(xiii), 'r', label='True model')
    plt.scatter(xi, yi, label='data')
    print('maxp = {}'.format(np.nanmax(f.data)))
    print('hs = {}'.format(kreg.tkde.tkde.hs))
    plt.legend()
    h = plt.gca()
    if plotlog:
        plt.setp(h, yscale='log')
    # plt.show()
    return hs1, hs2


def kreg_demo4(x, y, hs, hopt, alpha=0.05):
    st = scipy.stats

    n = x.size
    xmin, xmax = x.min(), x.max()
    ni = max(2 * int((xmax - xmin) / hopt) + 3, 5)

    sml = hopt * 0.1
    xi = np.linspace(xmin - sml, xmax + sml, ni)
    xiii = np.linspace(xmin - sml, xmax + sml, 4 * ni + 1)

    kreg = KRegression(x, y, hs=hs, p=0)

    dx = xi[1] - xi[0]
    ciii = kreg.tkde.eval_grid_fast(xiii) * dx * x.size
#    ckreg = KDE(x,hs=hs)
# ciiii = ckreg.eval_grid_fast(xiii)*dx* x.size #n*(1+symmetric)

    f = kreg(xiii, output='plotobj')  # , plot_kwds=dict(plotflag=7))
    pi = f.data

    # Jeffreys intervall a=b=0.5
    # st.beta.isf(alpha/2, x+a, n-x+b)
    ab = 0.07  # 0.5
    pi1 = pi
    pup = np.where(pi1 == 1, 1, st.beta.isf(
        alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab))
    plo = np.where(pi1 == 0, 0, st.beta.isf(
        1 - alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab))

    # Wilson score
    # z0 = -_invnorm(alpha/2)
#    den = 1+(z0**2./ciii);
#    xc=(pi1+(z0**2)/(2*ciii))/den;
#    halfwidth=(z0*sqrt((pi1*(1-pi1)/ciii)+(z0**2/(4*(ciii**2)))))/den
# plo2 = (xc-halfwidth).clip(min=0) # wilson score
# pup2 = (xc+halfwidth).clip(max=1.0) # wilson score
    # f.dataCI = np.vstack((plo,pup)).T
    f.prediction_error_avg = np.trapz(pup - plo, xiii) / (xiii[-1] - xiii[0])
    fiii = f.data

    c = gridcount(x, xi)
    if (y == 1).any():
        c0 = gridcount(x[y == 1], xi)
    else:
        c0 = np.zeros(np.shape(xi))
    yi = np.where(c == 0, 0, c0 / c)

    f.children = [PlotData([plo, pup], xiii, plotmethod='fill_between',
                           plot_kwds=dict(alpha=0.2, color='r')),
                  PlotData(yi, xi, plotmethod='scatter',
                           plot_kwds=dict(color='r', s=5))]

    yiii = interpolate.interp1d(xi, yi)(xiii)
    df = np.diff(fiii)
    k = (df[:-1] * df[1:] < 0).sum()  # numpeaks
    sigmai = (pup - plo)
    aicc = (((yiii - fiii) / sigmai) ** 2).sum() + \
        2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \
        np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum()

    f.aicc = aicc
    f.labels.title = ('perr={:1.3f},aicc={:1.3f}, n={:d}, '
                      'hs={:1.3f}'.format(f.prediction_error_avg, aicc, n, hs))

    return f


def check_kreg_demo3():

    plt.ion()
    k = 0
    for n in [50, 100, 300, 600, 4000]:
        x, y, fun1 = _get_data(
            n, symmetric=True, loc1=1.0, scale1=0.6, scale2=1.25)
        k0 = k

        for fun in ['hste', ]:
            hsmax, _hs1, _hs2 = _get_regression_smooting(x, y, fun=fun)
            for hi in np.linspace(hsmax * 0.25, hsmax, 9):
                plt.figure(k)
                k += 1
                unused = kreg_demo3(x, y, fun1, hs=hi, fun=fun, plotlog=False)

            # kreg_demo2(n=n,symmetric=True,fun='hste', plotlog=False)
        fig.tile(range(k0, k))
    plt.ioff()
    plt.show()


def check_kreg_demo4():
    plt.ion()
    # test_docstrings()
    # kde_demo2()
    # kreg_demo1(fast=True)
    # kde_gauss_demo()
    # kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True)
    k = 0
    for _i, n in enumerate([100, 300, 600, 4000]):
        x, y, fun1 = _get_data(
            n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75)
        # k0 = k
        hopt1, _h1, _h2 = _get_regression_smooting(x, y, fun='hos')
        hopt2, _h1, _h2 = _get_regression_smooting(x, y, fun='hste')
        hopt = sqrt(hopt1 * hopt2)
        # hopt = _get_regression_smooting(x,y,fun='hos')[0]
        for _j, fun in enumerate(['hste']):  # , 'hisj', 'hns', 'hstt'
            hsmax, _hs1, _hs2 = _get_regression_smooting(x, y, fun=fun)

            fmax = kreg_demo4(x, y, hsmax + 0.1, hopt)
            for hi in np.linspace(hsmax * 0.1, hsmax, 55):
                f = kreg_demo4(x, y, hi, hopt)
                if f.aicc <= fmax.aicc:
                    fmax = f
            plt.figure(k)
            k += 1
            fmax.plot()
            plt.plot(x, fun1(x), 'r')

            # kreg_demo2(n=n,symmetric=True,fun='hste', plotlog=False)
    fig.tile(range(0, k))
    plt.ioff()
    plt.show()


def check_regression_bin():
    plt.ion()
    # test_docstrings()
    # kde_demo2()
    # kreg_demo1(fast=True)
    # kde_gauss_demo()
    # kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True)
    k = 0
    for _i, n in enumerate([100, 300, 600, 4000]):
        x, y, fun1 = _get_data(
            n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75)
        fbest = regressionbin(x, y, alpha=0.05, color='g', label='Transit_D')

        figk = plt.figure(k)
        ax = figk.gca()
        k += 1
        fbest.labels.title = 'N = {:d}'.format(n)
        fbest.plot(axis=ax)
        ax.plot(x, fun1(x), 'r')
        ax.legend(frameon=False, markerscale=4)
        # ax = plt.gca()
        ax.set_yticklabels(ax.get_yticks() * 100.0)
        ax.grid(True)

    fig.tile(range(0, k))
    plt.ioff()
    plt.show()


def check_bkregression():
    plt.ion()
    k = 0
    for _i, n in enumerate([50, 100, 300, 600]):
        x, y, fun1 = _get_data(
            n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75)
        bkreg = BKRegression(x, y)
        fbest = bkreg.prb_search_best(
            hsfun='hste', alpha=0.05, color='g', label='Transit_D')

        figk = plt.figure(k)
        ax = figk.gca()
        k += 1
#        fbest.score.plot(axis=ax)
#        axsize = ax.axis()
#        ax.vlines(fbest.hs,axsize[2]+1,axsize[3])
#        ax.set(yscale='log')
        fbest.labels.title = 'N = {:d}'.format(n)
        fbest.plot(axis=ax)
        ax.plot(x, fun1(x), 'r')
        ax.legend(frameon=False, markerscale=4)
        # ax = plt.gca()
        ax.set_yticklabels(ax.get_yticks() * 100.0)
        ax.grid(True)

    fig.tile(range(0, k))
    plt.ioff()
    plt.show('hold')


def _get_regression_smooting(x, y, fun='hste'):
    hs1 = Kernel('gauss', fun=fun).get_smoothing(x)
    # hx = np.median(np.abs(x-np.median(x)))/0.6745*(4.0/(3*n))**0.2
    if (y == 1).any():
        hs2 = Kernel('gauss', fun=fun).get_smoothing(x[y == 1])
        # hy = np.median(np.abs(y-np.mean(y)))/0.6745*(4.0/(3*n))**0.2
    else:
        hs2 = 4 * hs1
        # hy = 4*hx

    # hy2 = Kernel('gauss', fun=fun).get_smoothing(y)
    # kernel = Kernel('gauss',fun=fun)
    # hopt = (hs1+2*hs2)/3
    # hopt = (hs1+4*hs2)/5 #kernel.get_smoothing(x)
    # hopt = hs2
    hopt = sqrt(hs1 * hs2)
    return hopt, hs1, hs2


def empirical_bin_prb(x, y, hopt, color='r'):
    """Returns empirical binomial probabiltity.

    Parameters
    ----------
    x : ndarray
        position ve
    y : ndarray
        binomial response variable (zeros and ones)

    Returns
    -------
    P(x) : PlotData object
        empirical probability

    """
    xmin, xmax = x.min(), x.max()
    ni = max(2 * int((xmax - xmin) / hopt) + 3, 5)

    sml = hopt  # *0.1
    xi = np.linspace(xmin - sml, xmax + sml, ni)

    c = gridcount(x, xi)
    if (y == 1).any():
        c0 = gridcount(x[y == 1], xi)
    else:
        c0 = np.zeros(np.shape(xi))
    yi = np.where(c == 0, 0, c0 / c)
    return PlotData(yi, xi, plotmethod='scatter',
                    plot_kwds=dict(color=color, s=5))


def smoothed_bin_prb(x, y, hs, hopt, alpha=0.05, color='r', label='',
                     bin_prb=None):
    '''
    Parameters
    ----------
    x,y
    hs : smoothing parameter
    hopt : spacing in empirical_bin_prb
    alpha : confidence level
    color : color of plot object
    bin_prb : PlotData object with empirical bin prb
    '''
    if bin_prb is None:
        bin_prb = empirical_bin_prb(x, y, hopt, color)

    xi = bin_prb.args
    yi = bin_prb.data
    ni = len(xi)
    dxi = xi[1] - xi[0]

    n = x.size

    xiii = np.linspace(xi[0], xi[-1], 10 * ni + 1)

    kreg = KRegression(x, y, hs=hs, p=0)
    # expected number of data in each bin
    ciii = kreg.tkde.eval_grid_fast(xiii) * dxi * n

    f = kreg(xiii, output='plotobj')  # , plot_kwds=dict(plotflag=7))
    pi = f.data

    st = scipy.stats
    # Jeffreys intervall a=b=0.5
    # st.beta.isf(alpha/2, x+a, n-x+b)
    ab = 0.07  # 0.5
    pi1 = pi
    pup = np.where(pi1 == 1, 1, st.beta.isf(
        alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab))
    plo = np.where(pi1 == 0, 0, st.beta.isf(
        1 - alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab))

    # Wilson score
    # z0 = -_invnorm(alpha/2)
#    den = 1+(z0**2./ciii);
#    xc=(pi1+(z0**2)/(2*ciii))/den;
#    halfwidth=(z0*sqrt((pi1*(1-pi1)/ciii)+(z0**2/(4*(ciii**2)))))/den
# plo2 = (xc-halfwidth).clip(min=0) # wilson score
# pup2 = (xc+halfwidth).clip(max=1.0) # wilson score
    # f.dataCI = np.vstack((plo,pup)).T
    f.prediction_error_avg = np.trapz(pup - plo, xiii) / (xiii[-1] - xiii[0])
    fiii = f.data

    f.plot_kwds['color'] = color
    f.plot_kwds['linewidth'] = 2
    if label:
        f.plot_kwds['label'] = label
    f.children = [PlotData([plo, pup], xiii, plotmethod='fill_between',
                           plot_kwds=dict(alpha=0.2, color=color)),
                  bin_prb]

    yiii = interpolate.interp1d(xi, yi)(xiii)
    df = np.diff(fiii)
    k = (df[:-1] * df[1:] < 0).sum()  # numpeaks
    sigmai = (pup - plo)
    aicc = (((yiii - fiii) / sigmai) ** 2).sum() + \
        2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \
        np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum()

    f.aicc = aicc
    f.fun = kreg
    ttl = "perr={0:1.3f}, aicc={1:1.3f}, n={2:d}, hs={3}"
    f.labels.title = ttl.format(f.prediction_error_avg, aicc, n, hs)

    return f


def regressionbin(x, y, alpha=0.05, color='r', label=''):
    """Return kernel regression estimate for binomial data.

    Parameters
    ----------
    x : arraylike
        positions
    y : arraylike
        of 0 and 1

    """

    hopt1, _h1, _h2 = _get_regression_smooting(x, y, fun='hos')
    hopt2, _h1, _h2 = _get_regression_smooting(x, y, fun='hste')
    hopt = sqrt(hopt1 * hopt2)

    fbest = smoothed_bin_prb(x, y, hopt2 + 0.1, hopt, alpha, color, label)
    bin_prb = fbest.children[-1]
    for fun in ['hste']:  # , 'hisj', 'hns', 'hstt'
        hsmax, _hs1, _hs2 = _get_regression_smooting(x, y, fun=fun)
        for hi in np.linspace(hsmax * 0.1, hsmax, 55):
            f = smoothed_bin_prb(x, y, hi, hopt, alpha, color, label, bin_prb)
            if f.aicc <= fbest.aicc:
                fbest = f
                # hbest = hi
    return fbest


if __name__ == '__main__':
    if False:
        test_docstrings(__file__)
    else:
        kde_demo2()
        # check_bkregression()
        # check_regression_bin()
        # check_kreg_demo3()
        # check_kreg_demo4()

        # kreg_demo1(fast=True)

        # kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True)
        plt.show('hold')