Small updates

14 years ago · 485ecfcd2f
parent 5d8c259ce6
commit 485ecfcd2f
3 changed files with 337 additions and 69 deletions
--- a/pywafo/src/wafo/fig.py
+++ b/pywafo/src/wafo/fig.py
@ -747,8 +747,6 @@ def cycle(*figs,**kwds):
        _show_windows(wnds, win32con.SW_SHOWNORMAL)
 if __name__ == '__main__':
    import doctest
    doctest.testmod()
--- a/pywafo/src/wafo/kdetools.py
+++ b/pywafo/src/wafo/kdetools.py
@ -19,7 +19,7 @@ import scipy.optimize as optimize
 from wafo.misc import meshgrid
 from wafo.wafodata import WafoData
-from dctpack import dct
+from dctpack import dct, dctn, idctn
 import copy
 import numpy as np
@ -63,7 +63,220 @@ def sphere_volume(d, r=1.0):
    Chapman and Hall, pp 105
    """
    return (r ** d) * 2.0 * pi ** (d / 2.0) / (d * gamma(d / 2.0))
 class KDEgauss(object):
    """ Kernel-Density Estimator base class.
    Parameters
    ----------
    data : (# of dims, # of data)-array
        datapoints to estimate from
    hs : array-like (optional) 
        smooting parameter vector/matrix.
        (default compute from data using kernel.get_smoothing function)
    alpha : real scalar (optional)
        sensitivity parameter               (default 0 regular KDE)
        A good choice might be alpha = 0.5 ( or 1/D)
        alpha = 0      Regular  KDE (hs is constant)
        0 < alpha <= 1 Adaptive KDE (Make hs change)  
    Members
    -------
    d : int
        number of dimensions
    n : int
        number of datapoints
    Methods
    -------
    kde.eval_grid_fast(x0, x1,..., xd) : array
        evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
    kde(x0, x1,..., xd) : array
        same as kde.eval_grid_fast(x0, x1,..., xd)
    """
    def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, xmax=None, inc=128):
        self.dataset = atleast_2d(data)
        self.hs = hs
        self.kernel = kernel if kernel else Kernel('gauss')
        self.alpha = alpha
        self.xmin = xmin
        self.xmax = xmax
        self.inc = inc
        self.initialize()
    def initialize(self):
        self.d, self.n = self.dataset.shape
        self._set_xlimits()
        self._initialize()
    def _initialize(self):
        self._compute_smoothing()
    def _compute_smoothing(self):
        """Computes the smoothing matrix
        """
        get_smoothing = self.kernel.get_smoothing
        h = self.hs
        if h is None:
            h = get_smoothing(self.dataset)
        h = np.atleast_1d(h)
        hsiz = h.shape
        if (len(hsiz) == 1) or (self.d == 1):
            if max(hsiz) == 1:
                h = h * np.ones(self.d)
            else:
                h.shape = (self.d,) # make sure it has the correct dimension
            # If h negative calculate automatic values
            ind, = np.where(h <= 0)
            for i in ind.tolist(): # 
                h[i] = get_smoothing(self.dataset[i])
            deth = h.prod()
            self.inv_hs = linalg.diag(1.0 / h)
        else: #fully general smoothing matrix
            deth = linalg.det(h)
            if deth <= 0:
                raise ValueError('bandwidth matrix h must be positive definit!')
            self.inv_hs = linalg.inv(h)
        self.hs = h
        self._norm_factor = deth * self.n
    def _set_xlimits(self):
        amin = self.dataset.min(axis= -1)
        amax = self.dataset.max(axis= -1)
        iqr = iqrange(self.dataset, axis= -1)
        sigma = np.minimum(np.std(self.dataset, axis= -1, ddof=1), iqr / 1.34)
        #xyzrange = amax - amin
        #offset = xyzrange / 4.0
        offset = 2 * sigma
        if self.xmin is None:
            self.xmin = amin - offset
        else:
            self.xmin = self.xmin * np.ones((self.d,1))
        if self.xmax is None:
            self.xmax = amax + offset
        else:
            self.xmax = self.xmax * np.ones((self.d,1))
    def eval_grid_fast(self, *args, **kwds):
        """Evaluate the estimated pdf on a grid.
        Parameters
        ----------
        arg_0,arg_1,... arg_d-1 : vectors
            Alternatively, if no vectors is passed in then
             arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
        output : string optional
            'value' if value output
            'data' if object output
        Returns
        -------
        values : array-like
            The values evaluated at meshgrid(*args).
        """
        if len(args) == 0:
            args = []
            for i in range(self.d):
                args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc))
        self.args = args
        return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)
    def _eval_grid_fast(self, *args, **kwds):
        X = np.vstack(args)
        d, inc = X.shape
        dx = X[:, 1] - X[:, 0]
        R = X.max(axis=-1)- X.min(axis=-1)
        t_star = (self.hs/R)**2
        I = (np.asfarray(np.arange(0, inc))*pi)**2 
        In = []
        for i in range(d):
            In.append(I * t_star[i] * 0.5)
        Inc = meshgrid(*In) if d > 1 else In
        kw = np.zeros((inc,)*d)
        for i in range(d):
            kw += exp(-Inc[i])
        y = kwds.get('y', 1.0)
        d, n = self.dataset.shape
        # Find the binned kernel weights, c.
        c = gridcount(self.dataset, X, y=y)/n
        # Perform the convolution.
        at = dctn(c) * kw
        z = idctn(at)*at.size/np.prod(R)
        return z*(z>0.0)
        #ix = (slice(0, inc),)*d
        #return z[ix] * (z[ix] > 0.0)
    def _eval_grid_fun(self, eval_grd, *args, **kwds):
        output = kwds.pop('output', 'value')
        f = eval_grd(*args, **kwds)
        if output == 'value':
            return f
        else:
            titlestr = 'Kernel density estimate (%s)' % self.kernel.name
            kwds2 = dict(title=titlestr)
            kwds2['plot_kwds'] = dict(plotflag=1)
            kwds2.update(**kwds)
            args = self.args
            if self.d == 1:
                args = args[0]
            wdata = WafoData(f, args, **kwds2)
            if self.d > 1:
                PL = np.r_[10:90:20, 95, 99, 99.9]
                try:
                    ql = qlevels(f, p=PL)
                    wdata.clevels = ql
                    wdata.plevels = PL
                except:
                    pass
            return wdata 
    def _check_shape(self, points):
        points = atleast_2d(points)
        d, m = points.shape
        if d != self.d:
            if d == 1 and m == self.d:
                # points was passed in as a row vector
                points = np.reshape(points, (self.d, 1))
            else:
                msg = "points have dimension %s, dataset has dimension %s" % (d,
                    self.d)
                raise ValueError(msg)
        return points   
    def eval_points(self, points, **kwds):
        """Evaluate the estimated pdf on a set of points.
        Parameters
        ----------
        points : (# of dimensions, # of points)-array
            Alternatively, a (# of dimensions,) vector can be passed in and
            treated as a single point.
        Returns
        -------
        values : (# of points,)-array
            The values at each point.
        Raises
        ------
        ValueError if the dimensionality of the input points is different than
        the dimensionality of the KDE.
        """
        points = self._check_shape(points)
        return self._eval_points(points, **kwds)
    def _eval_points(self, points, **kwds):
        pass
    __call__ = eval_grid_fast
 class _KDE(object):
    """ Kernel-Density Estimator base class.
@ -341,7 +554,7 @@ class TKDE(_KDE):
    def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None,
                 xmax=None, inc=128, L2=None):
        self.L2 = L2
-        _KDE.__init__(self, data, hs, kernel, alpha, xmin, xmax, inc)
+        super(TKDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc)
    def _initialize(self):
        self._check_xmin()
@ -574,7 +787,7 @@ class KDE(_KDE):
    """
    def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, xmax=None, inc=128):
-        _KDE.__init__(self, data, hs, kernel, alpha, xmin, xmax, inc)
+        super(KDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc)
    def _initialize(self):
        self._compute_smoothing()
@ -638,11 +851,14 @@ class KDE(_KDE):
        Xn = np.dot(self.inv_hs, np.vstack(Xnc))
        # Obtain the kernel weights.        
        kw = self.kernel(Xn) 
        norm_fact = (kw.sum()*dx.prod()*self.n)
        norm_fact2 = (self._norm_factor * self.kernel.norm_factor(d, self.n))
        kw = kw/norm_fact
        r = kwds.get('r', 0)
-        if r==0:
+        if r!=0:
-            kw = self.kernel(Xn) / (self._norm_factor * self.kernel.norm_factor(d, self.n))
+            kw *= np.vstack(Xnc) ** r 
        else:
            kw = np.vstack(Xnc) ** r * self.kernel(Xn) / (self._norm_factor * self.kernel.norm_factor(d, self.n))
        kw.shape = shape0
        kw = np.fft.ifftshift(kw)
        fftn = np.fft.fftn
@ -1434,7 +1650,8 @@ class Kernel(object):
            c = gridcount(A[dim], xa)
            N = len(set(A[dim]))
-            a = dct(c/c.sum(), norm=None)
+            #a = dct(c/c.sum(), norm=None)
            a = dct(c/len(A[dim]), norm=None)
            #% now compute the optimal bandwidth^2 using the referenced method
            I = np.asfarray(np.arange(1, inc))**2 
@ -2551,8 +2768,8 @@ def kde_demo1():
    import scipy.stats as st
    x = np.linspace(-4, 4, 101)
    x0 = x / 2.0
-    data = np.random.normal(loc=0, scale=1.0, size=7) #rndnorm(0,1,7,1);
+    data = np.random.normal(loc=0, scale=1.0, size=7) 
-    kernel = Kernel('gaus')
+    kernel = Kernel('gauss')
    hs = kernel.hns(data)
    hVec = [hs / 2, hs, 2 * hs]
@ -2743,6 +2960,58 @@ def kreg_demo1(hs=None, fast=False, fun='hisj'):
    print(kreg.tkde.tkde.inv_hs)
    print(kreg.tkde.tkde.hs) 
 def kde_gauss_demo(n=50000):
    '''
    KDEDEMO Demonstrate the KDEgauss
    KDEDEMO1 shows the true density (dotted) compared to KDE based on 7
    observations (solid) and their individual kernels (dashed) for 3
    different values of the smoothing parameter, hs.
    '''
    import scipy.stats as st
    #x = np.linspace(-4, 4, 101)
    #data = np.random.normal(loc=0, scale=1.0, size=n) 
    #data = np.random.exponential(scale=1.0, size=n)
 #    n1 = 128
 #    I = (np.arange(n1)*pi)**2 *0.01*0.5
 #    kw = exp(-I)
 #    pylab.plot(idctn(kw))
 #    return
    dist = st.norm
    dist = st.expon
    data = dist.rvs(loc=0, scale=1.0, size=n)
    d, N = np.atleast_2d(data).shape
    if d==1:
        plot_options = [dict(color='red'), dict(color='green'), dict(color='black')]
    else:
        plot_options = [dict(colors='red'), dict(colors='green'), dict(colors='black')]
    pylab.figure(1)
    kde0 = KDE(data, kernel=Kernel('gauss', 'hisj'))
    f0 = kde0.eval_grid_fast(output='plot', ylab='Density')
    f0.plot(**plot_options[0])
    kde1 = TKDE(data, kernel=Kernel('gauss', 'hisj'), L2=0)
    f1 = kde1.eval_grid_fast(output='plot', ylab='Density')
    f1.plot(**plot_options[1])
    kde2 = KDEgauss(data)
    f2 = kde2(output='plot', ylab='Density')
    x = f2.args
    f2.plot(**plot_options[2])
    fmax = dist.pdf(x, 0, 1).max() 
    if d==1:
        pylab.plot(x, dist.pdf(x, 0, 1), 'k:')
        pylab.axis([x.min(), x.max(), 0, fmax])
    pylab.show()
    print(fmax/f2.data.max())
    format_ = ''.join(('%g, ')*d)
    format_ = 'hs0=%s hs2=%s' % (format_,format_)
    print(format_ % tuple(kde0.hs.tolist()+kde2.hs.tolist()))
 def test_docstrings():
    import doctest
    doctest.testmod()
@ -2750,4 +3019,5 @@ def test_docstrings():
 if __name__ == '__main__':
    #test_docstrings()
    #kde_demo2()
-    kreg_demo1()
+    #kreg_demo1(fast=True)
    kde_gauss_demo()
--- a/pywafo/src/wafo/wafodata.py
+++ b/pywafo/src/wafo/wafodata.py
@ -190,7 +190,7 @@ class Plotter_1d(object):
    def plot(self, wdata, *args, **kwds):
        plotflag = kwds.pop('plotflag', False)
        if plotflag:
-            h1 = self._plot(plotflag, wdata, **kwds)
+            h1 = self._plot(plotflag, wdata, *args, **kwds)
        else:
            if isinstance(wdata.args, (list, tuple)):
                args1 = tuple((wdata.args)) + (wdata.data,) + args