From 31f61eb3192c06f3c49768d2c581d19de09322ac Mon Sep 17 00:00:00 2001 From: "Per.Andreas.Brodtkorb" Date: Tue, 22 Nov 2011 14:57:18 +0000 Subject: [PATCH] Added kernel regression --- pywafo/src/wafo/kdetools.py | 204 ++++++++++++++++++++++++++++++------ 1 file changed, 171 insertions(+), 33 deletions(-) diff --git a/pywafo/src/wafo/kdetools.py b/pywafo/src/wafo/kdetools.py index 68c0c35..2c2a1e4 100644 --- a/pywafo/src/wafo/kdetools.py +++ b/pywafo/src/wafo/kdetools.py @@ -189,8 +189,9 @@ class _KDE(object): def _eval_grid(self, *args): pass def _eval_grid_fun(self, eval_grd, *args, **kwds): + output = kwds.pop('output', 'value') f = eval_grd(*args, **kwds) - if kwds.get('output', 'value') == 'value': + if output == 'value': return f else: titlestr = 'Kernel density estimate (%s)' % self.kernel.name @@ -223,7 +224,7 @@ class _KDE(object): self.d) raise ValueError(msg) return points - def eval_points(self, points, r=0): + def eval_points(self, points, **kwds): """Evaluate the estimated pdf on a set of points. Parameters @@ -244,9 +245,9 @@ class _KDE(object): """ points = self._check_shape(points) - return self._eval_points(points, r=r) + return self._eval_points(points, **kwds) - def _eval_points(self, points, r=0): + def _eval_points(self, points, **kwds): pass __call__ = eval_grid @@ -410,21 +411,10 @@ class TKDE(_KDE): """ return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds) -# f = self._eval_grid_fast(*args) -# if kwds.get('output', 'value') == 'value': -# return f -# else: -# args = self.args -# titlestr = 'Kernel density estimate (%s)' % self.kernel.name -# kwds2 = dict(title=titlestr) -# kwds2.update(**kwds) -# if self.d == 1: -# args = args[0] -# return WafoData(f, args, **kwds2) - - def _eval_grid_fast(self, *args): + + def _eval_grid_fast(self, *args, **kwds): if self.L2 is None: - f = self.tkde.eval_grid_fast(*args) + f = self.tkde.eval_grid_fast(*args, **kwds) self.args = self.tkde.args return f #targs = self._dat2gaus(list(args)) if len(args) else args @@ -447,11 +437,11 @@ class TKDE(_KDE): self.args = args return fi*(fi>0) return f - def _eval_grid(self, *args): + def _eval_grid(self, *args, **kwds): if self.L2 is None: - return self.tkde.eval_grid(*args) + return self.tkde.eval_grid(*args, **kwds) targs = self._dat2gaus(list(args)) - tf = self.tkde.eval_grid(*targs) + tf = self.tkde.eval_grid(*targs, **kwds) points = meshgrid(*args) if self.d > 1 else self.args f = self._scale_pdf(tf, points) return f @@ -632,7 +622,7 @@ class KDE(_KDE): Xn = [] nfft0 = 2 * inc nfft = (nfft0,)*d - x0 = np.linspace(-inc, inc, nfft0 + 1) + x0 = np.linspace(-inc, inc, nfft0+1) for i in range(d): Xn.append(x0[:-1] * dx[i]) @@ -671,12 +661,11 @@ class KDE(_KDE): d = len(grd) for i in range(d): grd[i] = grd[i].ravel() - r = kwds.get('r', 0) - f = self.eval_points(np.vstack(grd), r=r) + f = self.eval_points(np.vstack(grd), **kwds) return f.reshape(shape0) - def _eval_points(self, points, r=0): + def _eval_points(self, points, **kwds): """Evaluate the estimated pdf on a set of points. Parameters @@ -698,29 +687,124 @@ class KDE(_KDE): d, m = points.shape result = np.zeros((m,)) + + r = kwds.get('r', 0) if r==0: fun = lambda xi : 1 else: - fun = lambda xi : xi ** r + fun = lambda xi : (xi ** r).sum(axis=0) if m >= self.n: + y = kwds.get('y', np.ones(self.n)) # there are more points than data, so loop over data for i in range(self.n): diff = self.dataset[:, i, np.newaxis] - points tdiff = np.dot(self.inv_hs / self._lambda[i], diff) - result += fun(diff) * self.kernel(tdiff) / self._lambda[i] ** d + result += y[i] * fun(diff) * self.kernel(tdiff) / self._lambda[i] ** d else: + y = kwds.get('y', 1) # loop over points for i in range(m): diff = self.dataset - points[:, i, np.newaxis] tdiff = np.dot(self.inv_hs, diff / self._lambda[np.newaxis, :]) - tmp = fun(diff) * self.kernel(tdiff) / self._lambda ** d + tmp = y * fun(diff) * self.kernel(tdiff) / self._lambda ** d result[i] = tmp.sum(axis= -1) result /= (self._norm_factor * self.kernel.norm_factor(d, self.n)) return result +class KRegression(_KDE): + """ Kernel-Regression + + Parameters + ---------- + data : (# of dims, # of data)-array + datapoints to estimate from + y : # of data - array + response variable + p : scalar integer (0 or 1) + Nadaraya-Watson estimator if p=0, + local linear estimator if p=1. + hs : array-like (optional) + smooting parameter vector/matrix. + (default compute from data using kernel.get_smoothing function) + kernel : kernel function object. + kernel must have get_smoothing method + alpha : real scalar (optional) + sensitivity parameter (default 0 regular KDE) + A good choice might be alpha = 0.5 ( or 1/D) + alpha = 0 Regular KDE (hs is constant) + 0 < alpha <= 1 Adaptive KDE (Make hs change) + xmin, xmax : vectors + specifying the default argument range for the kde.eval_grid methods. + For the kde.eval_grid_fast methods the values must cover the range of the data. + (default min(data)-range(data)/4, max(data)-range(data)/4) + If a single value of xmin or xmax is given then the boundary is the is + the same for all dimensions. + inc : scalar integer + defining the default dimension of the output from kde.eval_grid methods (default 128) + (For kde.eval_grid_fast: A value below 50 is very fast to compute but + may give some inaccuracies. Values between 100 and 500 give very + accurate results) + + Members + ------- + d : int + number of dimensions + n : int + number of datapoints + + Methods + ------- + kde.eval_grid_fast(x0, x1,..., xd) : array + evaluate the estimated pdf on meshgrid(x0, x1,..., xd) + kde.eval_grid(x0, x1,..., xd) : array + evaluate the estimated pdf on meshgrid(x0, x1,..., xd) + kde.eval_points(points) : array + evaluate the estimated pdf on a provided set of points + kde(x0, x1,..., xd) : array + same as kde.eval_grid(x0, x1,..., xd) + + + Example + ------- + >>> N = 100 + >>> ei = np.random.normal(loc=0, scale=0.075, size=(N,)) + + >>> x = np.linspace(0, 1, N) + >>> import wafo.kdetools as wk + + >>> y = 2*np.exp(-x**2/(2*0.3**2))+3*np.exp(-(x-1)**2/(2*0.7**2)) + ei + >>> kreg = wk.KRegression(x, y) + + """ + + def __init__(self, data, y, p=0, hs=None, kernel=None, alpha=0.0, xmin=None, xmax=None, inc=128, L2=None): + self.tkde = TKDE(data, hs, kernel, alpha, xmin, xmax, inc, L2) + self.y = y + self.p = p + + def eval_grid_fast(self, *args, **kwds): + self._grdfun = self.tkde.eval_grid_fast + return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds) + + def eval_grid(self, *args, **kwds): + self._grdfun = self.tkde.eval_grid + return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds) + + def _eval_gridfun(self, *args, **kwds): + grdfun = self._grdfun + s0 = grdfun(*args, r=0) + t0 = grdfun(*args, r=0, y=self.y) + if self.p==0: + return t0 / s0 + elif self.p==1: + s1 = grdfun(*args, r=1) + s2 = grdfun(*args, r=2) + t1 = grdfun(*args, r=1, y=self.y) + return (s2 * t0 -s1 * t1) / (s2 * s0 - s1**2) + __call__ = eval_grid_fast class _Kernel(object): def __init__(self, r=1.0, stats=None): @@ -956,6 +1040,8 @@ class Kernel(object): def _get_name(self): return self.kernel.__class__.__name__.replace('_Kernel', '').title() name = property(_get_name) + def get_smoothing(self, *args, **kwds): + pass def stats(self): ''' Return some 1D statistics of the kernel. @@ -1298,16 +1384,20 @@ class Kernel(object): better for short tailed densities. However, STT method in contrast to LCV is insensitive to outliers. - Example: + Example + ------- x = rndnorm(0,1,50,1); hs = hstt(x,'gauss'); - See also hste, hbcv, hboot, hos, hldpi, hlscv, hscv, kde, kdebin + See also + -------- + hste, hbcv, hboot, hos, hldpi, hlscv, hscv, kde, kdebin - Reference: - B. W. Silverman (1986) + Reference + --------- + B. W. Silverman (1986) 'Density estimation for statistics and data analysis' Chapman and Hall, pp 57--61 ''' @@ -2426,6 +2516,54 @@ def kde_demo3(): pylab.figure(0) +def kde_demo4(hs=None, fast=False): + N = 100 + #ei = np.random.normal(loc=0, scale=0.075, size=(N,)) + ei = np.array([-0.08508516, 0.10462496, 0.07694448, -0.03080661, 0.05777525, + 0.06096313, -0.16572389, 0.01838912, -0.06251845, -0.09186784, + -0.04304887, -0.13365788, -0.0185279 , -0.07289167, 0.02319097, + 0.06887854, -0.08938374, -0.15181813, 0.03307712, 0.08523183, + -0.0378058 , -0.06312874, 0.01485772, 0.06307944, -0.0632959 , + 0.18963205, 0.0369126 , -0.01485447, 0.04037722, 0.0085057 , + -0.06912903, 0.02073998, 0.1174351 , 0.17599277, -0.06842139, + 0.12587608, 0.07698113, -0.0032394 , -0.12045792, -0.03132877, + 0.05047314, 0.02013453, 0.04080741, 0.00158392, 0.10237899, + -0.09069682, 0.09242174, -0.15445323, 0.09190278, 0.07138498, + 0.03002497, 0.02495252, 0.01286942, 0.06449978, 0.03031802, + 0.11754861, -0.02322272, 0.00455867, -0.02132251, 0.09119446, + -0.03210086, -0.06509545, 0.07306443, 0.04330647, 0.078111 , + -0.04146907, 0.05705476, 0.02492201, -0.03200572, -0.02859788, + -0.05893749, 0.00089538, 0.0432551 , 0.04001474, 0.04888828, + -0.17708392, 0.16478644, 0.1171006 , 0.11664846, 0.01410477, + -0.12458953, -0.11692081, 0.0413047 , -0.09292439, -0.07042327, + 0.14119701, -0.05114335, 0.04994696, -0.09520663, 0.04829406, + -0.01603065, -0.1933216 , 0.19352763, 0.11819496, 0.04567619, + -0.08348306, 0.00812816, -0.00908206, 0.14528945, 0.02901065]) + x = np.linspace(0, 1, N) + + + y0 = 2*np.exp(-x**2/(2*0.3**2))+3*np.exp(-(x-1)**2/(2*0.7**2)) + y = y0 + ei + kreg = KRegression(x, y, p=0, hs=hs) + kreg.tkde.kernel.get_smooting = kreg.tkde.kernel.hste + if fast: + kreg.__call__ = kreg.eval_grid_fast + + f = kreg(output='plot', title='Kernel regression', plotflag=1) + pylab.figure(0) + f.plot(label='p=0') + + + kreg.p=1 + f1 = kreg(output='plot', title='Kernel regression', plotflag=1) + f1.plot(label='p=1') + pylab.plot(x,y,'.', x,y0, 'k') + pylab.legend() + + pylab.show() + + print(kreg.tkde.tkde.inv_hs) + print(kreg.tkde.tkde.hs) def test_docstrings(): import doctest @@ -2433,4 +2571,4 @@ def test_docstrings(): if __name__ == '__main__': #test_docstrings() - kde_demo3() \ No newline at end of file + kde_demo4() \ No newline at end of file