From 669fadc9a5f8697fb4685a765b57ef16b9e3a35a Mon Sep 17 00:00:00 2001
From: Per A Brodtkorb <per.andreas.brodtkorb@gmail.com>
Date: Tue, 20 Dec 2016 18:52:29 +0100
Subject: [PATCH] fixed a bug in Kernel.name and simplified glevels and made it
 general.

---
 wafo/kdetools/kernels.py    | 280 +++++++++++++++++++-----------------
 wafo/spectrum/core.py       |   2 +-
 wafo/tests/test_kdetools.py |  21 +++
 3 files changed, 174 insertions(+), 129 deletions(-)

diff --git a/wafo/kdetools/kernels.py b/wafo/kdetools/kernels.py
index 94e013c..928d067 100644
--- a/wafo/kdetools/kernels.py
+++ b/wafo/kdetools/kernels.py
@@ -22,6 +22,12 @@ def _assert(cond, msg):
     if not cond:
         raise ValueError(msg)
 
+
+def _assert_warn(cond, msg):
+    if not cond:
+        warnings.warn(msg)
+
+
 # stats = (mu2, R, Rdd) where
 #     mu2 : 2'nd order moment, i.e.,int(x^2*kernel(x))
 #     R :  integral of squared kernel, i.e., int(kernel(x)^2)
@@ -36,31 +42,39 @@ _stats_logi = (pi ** 2 / 3, 1. / 6, 1 / 42)
 _stats_gaus = (1, 1. / (2 * sqrt(pi)), 3. / (8 * sqrt(pi)))
 
 
-def qlevels(pdf, p=(10, 30, 50, 70, 90, 95, 99, 99.9), x1=None, x2=None):
-    """QLEVELS Calculates quantile levels which encloses P% of PDF.
+def qlevels(pdf, p=(10, 30, 50, 70, 90, 95, 99, 99.9), xi=(), indexing='xy'):
+    """QLEVELS Calculates quantile levels which encloses P% of pdf.
 
-      CALL: [ql PL] = qlevels(pdf,PL,x1,x2);
+    Parameters
+    ----------
+    pdf: array-like
+        joint point density function given as array or vector
+    p : float in range of [0,100] (or sequence of floats)
+        Percentage to compute which must be between 0 and 100 inclusive.
+    xi : tuple
+        input arguments to the pdf, i.e., (x0, x1,...., xn)
+    indexing : {'xy', 'ij'}, optional
+        Cartesian ('xy', default) or matrix ('ij') indexing of pdf.
+        See numpy.meshgrid for more details.
 
-            ql    = the discrete quantile levels.
-            pdf   = joint point density function matrix or vector
-            PL    = percent level (default [10:20:90 95 99 99.9])
-            x1,x2 = vectors of the spacing of the variables
-                   (Default unit spacing)
+    Returns
+    ------
+    levels: array-like
+        discrete levels which encloses P% of pdf
 
     QLEVELS numerically integrates PDF by decreasing height and find the
-    quantile levels which  encloses P% of the distribution. If X1 and
-    (or) X2 is unspecified it is assumed that dX1 and dX2 is constant.
-    NB! QLEVELS normalizes the integral of PDF to N/(N+0.001) before
-    calculating QL in order to reflect the sampling of PDF is finite.
-    Currently only able to handle 1D and 2D PDF's if dXi is not constant
-    (i=1,2).
+    quantile levels which  encloses P% of the distribution.
+
+    If Xi is unspecified it is assumed that dX0, dX1,..., and dXn is constant.
+    NB! QLEVELS normalizes the integral of PDF to n/(n+0.001) before
+    calculating 'levels' in order to reflect the sampling of PDF is finite.
 
     Example
     -------
     >>> import wafo.stats as ws
     >>> x = np.linspace(-8,8,2001);
     >>> PL = np.r_[10:90:20, 90, 95, 99, 99.9]
-    >>> qlevels(ws.norm.pdf(x),p=PL, x1=x);
+    >>> qlevels(ws.norm.pdf(x),p=PL, xi=(x,));
     array([ 0.39591707,  0.37058719,  0.31830968,  0.23402133,  0.10362052,
             0.05862129,  0.01449505,  0.00178806])
 
@@ -74,73 +88,58 @@ def qlevels(pdf, p=(10, 30, 50, 70, 90, 95, 99, 99.9), x1=None, x2=None):
     qlevels2, tranproc
 
     """
+    def _dx(x):
+        dx = np.diff(x.ravel()) * 0.5
+        return np.r_[0, dx] + np.r_[dx, 0]
+
+    def _init(pdf, xi, indexing):
+        if not xi:
+            return pdf.ravel()
+        if not isinstance(xi, tuple):
+            xi = (xi,)
+        dx = np.meshgrid(*[_dx(x) for x in xi], sparse=True, indexing=indexing)
+        dxij = np.ones((1))
+        for dxi in dx:
+            dxij = dxij * dxi
+        _assert(dxij.shape == pdf.shape,
+                'Shape of pdf does not match the arguments')
+        return (pdf * dxij).ravel()
+
+    def _check_levels(levels, pdf):
+        _assert_warn(not np.any(levels >= max(pdf.ravel())),
+                     'The lowest percent level is too close to 0%')
+        _assert_warn(not np.any(levels <= min(pdf.ravel())),
+                     'The given pdf is too sparsely sampled or the highest '
+                     'percent level is too close to 100%')
+
+    pdf, p = np.atleast_1d(pdf, p)
+    _assert(not any(pdf.ravel() < 0),
+            'This is not a pdf since one or more values of pdf is negative')
+    _assert(not np.any((p < 0) | (100 < p)), 'PL must satisfy 0 <= PL <= 100')
 
-    norm = 1  # normalize cdf to unity
-    pdf = np.atleast_1d(pdf)
-    _assert(not any(pdf.ravel() < 0), 'This is not a pdf since one or more '
-            'values of pdf is negative')
-
-    fsiz = pdf.shape
-    fsizmin = min(fsiz)
-    if fsizmin == 0:
+    if min(pdf.shape) == 0:
         return []
 
-    N = np.prod(fsiz)
-    d = len(fsiz)
-    if x1 is None or ((x2 is None) and d > 2):
-        fdfi = pdf.ravel()
-    else:
-        if d == 1:  # pdf in one dimension
-            dx22 = np.ones(1)
-        else:  # % pdf in two dimensions
-            dx2 = np.diff(x2.ravel()) * 0.5
-            dx22 = np.r_[0, dx2] + np.r_[dx2, 0]
-
-        dx1 = np.diff(x1.ravel()) * 0.5
-        dx11 = np.r_[0, dx1] + np.r_[dx1, 0]
-        dx1x2 = dx22[:, None] * dx11
-        fdfi = (pdf * dx1x2).ravel()
-
-    p = np.atleast_1d(p)
-    _assert(not np.any((p < 0) | (100 < p)), 'PL must satisfy 0 <= PL <= 100')
-
-    p2 = p / 100.0
     ind = np.argsort(pdf.ravel())  # sort by height of pdf
     ind = ind[::-1]
-    fi = pdf.flat[ind]
+    sorted_pdf = pdf.flat[ind]
 
+    pdf_dx = _init(pdf, xi, indexing=indexing)
     # integration in the order of decreasing height of pdf
-    Fi = np.cumsum(fdfi[ind])
-
-    if norm:  # normalize Fi to make sure int pdf dx1 dx2 approx 1
-        Fi = Fi / Fi[-1] * N / (N + 1.5e-8)
-
-    maxFi = np.max(Fi)
-    if maxFi > 1:
-        warnings.warn('this is not a pdf since cdf>1! normalizing')
-
-        Fi = Fi / Fi[-1] * N / (N + 1.5e-8)
+    cdf = np.cumsum(pdf_dx[ind])
+    n = pdf_dx.size
+    # normalize cdf to make sure int pdf dx1 dx2 approx 1
+    cdf = cdf / cdf[-1] * n / (n + 1.5e-8)
 
-    elif maxFi < .95:
-        msg = '''The given pdf is too sparsely sampled since cdf<.95.
-        Thus QL is questionable'''
-        warnings.warn(msg)
-
-    # make sure Fi is strictly increasing by not considering duplicate values
-    ind, = np.where(np.diff(np.r_[Fi, 1]) > 0)
-    # calculating the inverse of Fi to find the index
-    ui = tranproc(Fi[ind], fi[ind], p2)
+    # make sure cdf is strictly increasing by not considering duplicate values
+    ind, = np.where(np.diff(np.r_[cdf, 1]) > 0)
 
-    if np.any(ui >= max(pdf.ravel())):
-        warnings.warn('The lowest percent level is too close to 0%')
+    # calculating the inverse of cdf to find the levels
+    levels = tranproc(cdf[ind], sorted_pdf[ind], p / 100.0)
 
-    if np.any(ui <= min(pdf.ravel())):
-        msg = '''The given pdf is too sparsely sampled or
-       the highest percent level is too close to 100%'''
-        warnings.warn(msg)
-        ui[ui < 0] = 0.0
-
-    return ui
+    _check_levels(levels, pdf)
+    levels[levels < 0] = 0.0
+    return levels
 
 
 def qlevels2(data, p=(10, 30, 50, 70, 90, 95, 99, 99.9), method=1):
@@ -262,9 +261,16 @@ def sphere_volume(d, r=1.0):
 class _Kernel(object):
     __metaclass__ = ABCMeta
 
-    def __init__(self, r=1.0, stats=None):
-        self.r = r  # radius of kernel
+    def __init__(self, r=1.0, stats=None, name=''):
+        self.r = r  # radius of effective support of kernel
         self.stats = stats
+        if not name:
+            name = self.__class__.__name__.replace('_Kernel', '')
+        self._name = name
+
+    @property
+    def name(self):
+        return self._name
 
     def norm_factor(self, d=1, n=None):
         _assert(0 < d, "D")
@@ -305,12 +311,12 @@ class _KernelMulti(_Kernel):
     p=0;  Sphere = rect for 1D
     p=1;  Multivariate Epanechnikov kernel.
     p=2;  Multivariate Bi-weight Kernel
-    p=3;  Multi variate Tri-weight Kernel
-    p=4;  Multi variate Four-weight Kernel
+    p=3;  Multivariate Tri-weight Kernel
+    p=4;  Multivariate Four-weight Kernel
     """
-    def __init__(self, r=1.0, p=1, stats=None):
+    def __init__(self, r=1.0, p=1, stats=None, name=''):
         self.p = p
-        super(_KernelMulti, self).__init__(r, stats)
+        super(_KernelMulti, self).__init__(r, stats, name)
 
     def norm_factor(self, d=1, n=None):
         r = self.r
@@ -325,9 +331,10 @@ class _KernelMulti(_Kernel):
         x2 = x ** 2
         return ((1.0 - x2.sum(axis=0) / r ** 2).clip(min=0.0)) ** p
 
-mkernel_epanechnikov = _KernelMulti(p=1, stats=_stats_epan)
-mkernel_biweight = _KernelMulti(p=2, stats=_stats_biwe)
-mkernel_triweight = _KernelMulti(p=3, stats=_stats_triw)
+mkernel_epanechnikov = _KernelMulti(p=1, stats=_stats_epan,
+                                    name='epanechnikov')
+mkernel_biweight = _KernelMulti(p=2, stats=_stats_biwe, name='biweight')
+mkernel_triweight = _KernelMulti(p=3, stats=_stats_triw, name='triweight')
 
 
 class _KernelProduct(_KernelMulti):
@@ -350,9 +357,11 @@ class _KernelProduct(_KernelMulti):
         pdf = (1 - (x / r) ** 2).clip(min=0.0) ** self.p
         return pdf.prod(axis=0)
 
-mkernel_p1epanechnikov = _KernelProduct(p=1, stats=_stats_epan)
-mkernel_p1biweight = _KernelProduct(p=2, stats=_stats_biwe)
-mkernel_p1triweight = _KernelProduct(p=3, stats=_stats_triw)
+mkernel_p1epanechnikov = _KernelProduct(p=1, stats=_stats_epan,
+                                        name='p1epanechnikov')
+mkernel_p1biweight = _KernelProduct(p=2, stats=_stats_biwe, name='p1biweight')
+mkernel_p1triweight = _KernelProduct(p=3, stats=_stats_triw,
+                                     name='p1triweight')
 
 
 class _KernelRectangular(_Kernel):
@@ -404,11 +413,6 @@ class _KernelGaussian(_Kernel):
 
 mkernel_gaussian = _KernelGaussian(r=4.0, stats=_stats_gaus)
 
-# def mkernel_gaussian(X):
-#    x2 = X ** 2
-#    d = X.shape[0]
-#    return (2 * pi) ** (-d / 2) * exp(-0.5 * x2.sum(axis=0))
-
 
 class _KernelLaplace(_Kernel):
 
@@ -439,8 +443,8 @@ _MKERNEL_DICT = dict(
     tria=mkernel_triangular,
     lapl=mkernel_laplace,
     logi=mkernel_logistic,
-    gaus=mkernel_gaussian
-)
+    gaus=mkernel_gaussian)
+
 _KERNEL_EXPONENT_DICT = dict(
     re=0, sp=0, ep=1, bi=2, tr=3, fo=4, fi=5, si=6, se=7)
 
@@ -530,7 +534,7 @@ class Kernel(object):
 
     @property
     def name(self):
-        return self.kernel.__class__.__name__.replace('_Kernel', '').title()
+        return self.kernel.name
 
     def stats(self):
         """Return some 1D statistics of the kernel.
@@ -586,8 +590,13 @@ class Kernel(object):
         visual check by eye.
 
         Example:
-          data = rndnorm(0, 1,20,1)
-          h = hns(data,'epan')
+        -------
+        >>> import numpy as np
+        >>> import wafo.kdetools as wk
+        >>> import wafo.stats as ws
+        >>> kernel = wk.Kernel('epan')
+        >>> data = ws.norm.rvs(0, 1, size=(1,20))
+        >>> h = kernel.hns(data)
 
         See also:
         ---------
@@ -601,7 +610,6 @@ class Kernel(object):
         Wand,M.P. and Jones, M.C. (1995)
         'Kernel smoothing'
         Chapman and Hall, pp 60--63
-
         """
 
         a = np.atleast_2d(data)
@@ -611,13 +619,13 @@ class Kernel(object):
         mu2, R, _Rdd = self.stats()
         amise_constant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
         iqr = iqrange(a, axis=1)  # interquartile range
-        stdA = np.std(a, axis=1, ddof=1)
+        std_a = np.std(a, axis=1, ddof=1)
         # use of interquartile range guards against outliers.
         # the use of interquartile range is better if
         # the distribution is skew or have heavy tails
         # This lessen the chance of oversmoothing.
         return np.where(iqr > 0,
-                        np.minimum(stdA, iqr / 1.349), stdA) * amise_constant
+                        np.minimum(std_a, iqr / 1.349), std_a) * amise_constant
 
     def hos(self, data):
         """Returns Oversmoothing Parameter.
@@ -680,7 +688,8 @@ class Kernel(object):
         elif name == 'gaus':  # Gaussian kernel
             a = (4.0 / (d + 2.0)) ** (1. / (d + 4.0))
         else:
-            raise ValueError('Unknown kernel.')
+            raise NotImplementedError('Hmns bandwidth not implemented for '
+                                      'kernel {}.'.format(name))
         return a
 
     def hmns(self, data):
@@ -696,7 +705,6 @@ class Kernel(object):
                     'triweight'     - Tri-weight kernel.
                     'gaussian'      - Gaussian kernel
 
-          Note that only the first 4 letters of the kernel name is needed.
 
          HMNS  only gives  a optimal value with respect to mean integrated
          square error, when the true underlying distribution is Multivariate
@@ -725,7 +733,6 @@ class Kernel(object):
           Chapman and Hall, pp 60--63, 86--88
 
         """
-        # TODO: implement more kernels
 
         a = np.atleast_2d(data)
         d, n = a.shape
@@ -1081,35 +1088,53 @@ class Kernel(object):
                 warnings.warn('The obtained value did not converge.')
 
             h[dim] = h1 * s
-        # end % for dim loop
+        # end # for dim loop
         return h
 
     def hscv(self, data, hvec=None, inc=128, maxit=100, fulloutput=False):
         '''
         HSCV Smoothed cross-validation estimate of smoothing parameter.
 
-         CALL: [hs,hvec,score] = hscv(data,kernel,hvec)
 
-           hs     = smoothing parameter
-           hvec   = vector defining possible values of hs
+        Parameters
+        ----------
+        data   = data vector
+        hvec   = vector defining possible values of hs
                      (default linspace(0.25*h0,h0,100), h0=0.62)
-           score  = score vector
-           data   = data vector
-           kernel = 'gaussian'      - Gaussian kernel the only supported
-
-          Note that only the first 4 letters of the kernel name is needed.
+        inc = length of estimated kerneldensity estimate
+        maxit = maximum number of iterations
+        fulloutput = True if fulloutput is wanted
 
-          Example:
-            data = rndnorm(0,1,20,1)
-             [hs hvec score] = hscv(data,'epan');
-             plot(hvec,score)
-         See also  hste, hbcv, hboot, hos, hldpi, hlscv, hstt, kde, kdefun
+        Returns
+        -------
+        hs     = smoothing parameter
+        hvec   = vector defining possible values of hs
+        score  = score vector
+
+          Example
+          ------
+          >>> import wafo.kdetools as wk
+          >>> import wafo.stats as ws
+          >>> data = ws.norm.rvs(0,1, size=(1,20))
+          >>> kernel = wk.Kernel('epan')
+          >>> hs0 = kernel.hscv(data, fulloutput=False)
+          >>> hs, hvec, score = kernel.hscv(data, fulloutput=True)
+          >>> np.allclose(hs, hs0)
+          True
+
+         import matplotlib.pyplot as plt
+         plt.plot(hvec,score)
+
+         See also:
+         hste, hbcv, hboot, hos, hldpi, hlscv, hstt, kde, kdefun
 
-         Wand,M.P. and Jones, M.C. (1986)
-         'Kernel smoothing'
-          Chapman and Hall, pp 75--79
+        Reference
+        ---------
+        Wand,M.P. and Jones, M.C. (1986)
+        'Kernel smoothing'
+        Chapman and Hall, pp 75--79
         '''
-        # TODO: Add support for other kernels than Gaussian
+
         A = np.atleast_2d(data)
         d, n = A.shape
 
@@ -1209,18 +1234,17 @@ class Kernel(object):
             idx = score.argmin()
             # Kernel other than Gaussian scale bandwidth
             h[dim] = hvec[idx] * (ste_constant / ste_constant2) ** (1 / 5)
-            if idx == 0:
-                warnings.warn("Optimum is probably lower than "
-                              "hs={0:g} for dim={1:d}".format(h[dim] * s, dim))
-            elif idx == maxit - 1:
-                msg = "Optimum is probably higher than hs={0:g] for dim={1:d}"
-                warnings.warn(msg.format(h[dim] * s, dim))
+            _assert_warn(0 < idx,
+                         "Optimum is probably lower than "
+                         "hs={0:g} for dim={1:d}".format(h[dim] * s, dim))
+            _assert_warn(idx < maxit - 1,
+                         "Optimum is probably higher than "
+                         "hs={0:g} for dim={1:d}".format(h[dim] * s, dim))
 
         hvec = hvec * (ste_constant / ste_constant2) ** (1 / 5)
         if fulloutput:
-            return h * sigmaA, score, hvec, sigmaA
-        else:
-            return h * sigmaA
+            return h * sigmaA, score, hvec
+        return h * sigmaA
 
     def hldpi(self, data, L=2, inc=128):
         '''HLDPI L-stage Direct Plug-In estimate of smoothing parameter.
@@ -1361,7 +1385,7 @@ def mkernel(X, kernel):
 
     See also
     --------
-    kde, kdefun, kdebin
+    KDE
 
     References
     ----------
diff --git a/wafo/spectrum/core.py b/wafo/spectrum/core.py
index fabaefb..d4909cf 100644
--- a/wafo/spectrum/core.py
+++ b/wafo/spectrum/core.py
@@ -1131,7 +1131,7 @@ class SpecData1D(PlotData):
                          title='Joint density of maximum and minimum')
         try:
             pl = [10, 30, 50, 70, 90, 95, 99, 99.9]
-            mmpdf.cl = qlevels(uvdens, pl, h, h)
+            mmpdf.cl = qlevels(uvdens, pl, xi=(h, h))
             mmpdf.pl = pl
         except:
             pass
diff --git a/wafo/tests/test_kdetools.py b/wafo/tests/test_kdetools.py
index bcbc0a7..9e936b9 100644
--- a/wafo/tests/test_kdetools.py
+++ b/wafo/tests/test_kdetools.py
@@ -449,6 +449,27 @@ class TestSmoothing(unittest.TestCase):
         assert_allclose(hs, [[3.25196193e-01, -2.68892467e-02, 3.18932448e-04],
                              [-2.68892467e-02, 3.91283306e-01, 2.38654678e-02],
                              [3.18932448e-04, 2.38654678e-02, 4.05123874e-01]])
+        hs = self.gauss.hmns(self.data[0])
+        assert_allclose(hs, self.gauss.hns(self.data[0]))
+
+        hs = wk.Kernel('epan').hmns(self.data)
+        assert_allclose(hs,
+                        [[8.363847e-01, -6.915749e-02, 8.202747e-04],
+                         [-6.915749e-02, 1.006357e+00, 6.138052e-02],
+                         [8.202747e-04, 6.138052e-02, 1.041954e+00]],
+                        rtol=1e-5)
+        hs = wk.Kernel('biwe').hmns(self.data[:2])
+        assert_allclose(hs, [[0.868428, -0.071705],
+                             [-0.071705, 1.04685]], rtol=1e-5)
+        hs = wk.Kernel('triwe').hmns(self.data[:2])
+        assert_allclose(hs, [[0.975375, -0.080535],
+                             [-0.080535, 1.17577]], rtol=1e-5)
+        self.assertRaises(NotImplementedError,
+                          wk.Kernel('biwe').hmns, self.data)
+        self.assertRaises(NotImplementedError,
+                          wk.Kernel('triwe').hmns, self.data)
+        self.assertRaises(NotImplementedError,
+                          wk.Kernel('triangular').hmns, self.data)
 
     def test_hscv(self):
         hs = self.gauss.hscv(self.data)