Added kernel regression

master
Per.Andreas.Brodtkorb 13 years ago
parent a7a00851cc
commit 31f61eb319

@ -189,8 +189,9 @@ class _KDE(object):
def _eval_grid(self, *args): def _eval_grid(self, *args):
pass pass
def _eval_grid_fun(self, eval_grd, *args, **kwds): def _eval_grid_fun(self, eval_grd, *args, **kwds):
output = kwds.pop('output', 'value')
f = eval_grd(*args, **kwds) f = eval_grd(*args, **kwds)
if kwds.get('output', 'value') == 'value': if output == 'value':
return f return f
else: else:
titlestr = 'Kernel density estimate (%s)' % self.kernel.name titlestr = 'Kernel density estimate (%s)' % self.kernel.name
@ -223,7 +224,7 @@ class _KDE(object):
self.d) self.d)
raise ValueError(msg) raise ValueError(msg)
return points return points
def eval_points(self, points, r=0): def eval_points(self, points, **kwds):
"""Evaluate the estimated pdf on a set of points. """Evaluate the estimated pdf on a set of points.
Parameters Parameters
@ -244,9 +245,9 @@ class _KDE(object):
""" """
points = self._check_shape(points) points = self._check_shape(points)
return self._eval_points(points, r=r) return self._eval_points(points, **kwds)
def _eval_points(self, points, r=0): def _eval_points(self, points, **kwds):
pass pass
__call__ = eval_grid __call__ = eval_grid
@ -410,21 +411,10 @@ class TKDE(_KDE):
""" """
return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds) return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)
# f = self._eval_grid_fast(*args)
# if kwds.get('output', 'value') == 'value': def _eval_grid_fast(self, *args, **kwds):
# return f
# else:
# args = self.args
# titlestr = 'Kernel density estimate (%s)' % self.kernel.name
# kwds2 = dict(title=titlestr)
# kwds2.update(**kwds)
# if self.d == 1:
# args = args[0]
# return WafoData(f, args, **kwds2)
def _eval_grid_fast(self, *args):
if self.L2 is None: if self.L2 is None:
f = self.tkde.eval_grid_fast(*args) f = self.tkde.eval_grid_fast(*args, **kwds)
self.args = self.tkde.args self.args = self.tkde.args
return f return f
#targs = self._dat2gaus(list(args)) if len(args) else args #targs = self._dat2gaus(list(args)) if len(args) else args
@ -447,11 +437,11 @@ class TKDE(_KDE):
self.args = args self.args = args
return fi*(fi>0) return fi*(fi>0)
return f return f
def _eval_grid(self, *args): def _eval_grid(self, *args, **kwds):
if self.L2 is None: if self.L2 is None:
return self.tkde.eval_grid(*args) return self.tkde.eval_grid(*args, **kwds)
targs = self._dat2gaus(list(args)) targs = self._dat2gaus(list(args))
tf = self.tkde.eval_grid(*targs) tf = self.tkde.eval_grid(*targs, **kwds)
points = meshgrid(*args) if self.d > 1 else self.args points = meshgrid(*args) if self.d > 1 else self.args
f = self._scale_pdf(tf, points) f = self._scale_pdf(tf, points)
return f return f
@ -632,7 +622,7 @@ class KDE(_KDE):
Xn = [] Xn = []
nfft0 = 2 * inc nfft0 = 2 * inc
nfft = (nfft0,)*d nfft = (nfft0,)*d
x0 = np.linspace(-inc, inc, nfft0 + 1) x0 = np.linspace(-inc, inc, nfft0+1)
for i in range(d): for i in range(d):
Xn.append(x0[:-1] * dx[i]) Xn.append(x0[:-1] * dx[i])
@ -671,12 +661,11 @@ class KDE(_KDE):
d = len(grd) d = len(grd)
for i in range(d): for i in range(d):
grd[i] = grd[i].ravel() grd[i] = grd[i].ravel()
r = kwds.get('r', 0) f = self.eval_points(np.vstack(grd), **kwds)
f = self.eval_points(np.vstack(grd), r=r)
return f.reshape(shape0) return f.reshape(shape0)
def _eval_points(self, points, r=0): def _eval_points(self, points, **kwds):
"""Evaluate the estimated pdf on a set of points. """Evaluate the estimated pdf on a set of points.
Parameters Parameters
@ -698,29 +687,124 @@ class KDE(_KDE):
d, m = points.shape d, m = points.shape
result = np.zeros((m,)) result = np.zeros((m,))
r = kwds.get('r', 0)
if r==0: if r==0:
fun = lambda xi : 1 fun = lambda xi : 1
else: else:
fun = lambda xi : xi ** r fun = lambda xi : (xi ** r).sum(axis=0)
if m >= self.n: if m >= self.n:
y = kwds.get('y', np.ones(self.n))
# there are more points than data, so loop over data # there are more points than data, so loop over data
for i in range(self.n): for i in range(self.n):
diff = self.dataset[:, i, np.newaxis] - points diff = self.dataset[:, i, np.newaxis] - points
tdiff = np.dot(self.inv_hs / self._lambda[i], diff) tdiff = np.dot(self.inv_hs / self._lambda[i], diff)
result += fun(diff) * self.kernel(tdiff) / self._lambda[i] ** d result += y[i] * fun(diff) * self.kernel(tdiff) / self._lambda[i] ** d
else: else:
y = kwds.get('y', 1)
# loop over points # loop over points
for i in range(m): for i in range(m):
diff = self.dataset - points[:, i, np.newaxis] diff = self.dataset - points[:, i, np.newaxis]
tdiff = np.dot(self.inv_hs, diff / self._lambda[np.newaxis, :]) tdiff = np.dot(self.inv_hs, diff / self._lambda[np.newaxis, :])
tmp = fun(diff) * self.kernel(tdiff) / self._lambda ** d tmp = y * fun(diff) * self.kernel(tdiff) / self._lambda ** d
result[i] = tmp.sum(axis= -1) result[i] = tmp.sum(axis= -1)
result /= (self._norm_factor * self.kernel.norm_factor(d, self.n)) result /= (self._norm_factor * self.kernel.norm_factor(d, self.n))
return result return result
class KRegression(_KDE):
""" Kernel-Regression
Parameters
----------
data : (# of dims, # of data)-array
datapoints to estimate from
y : # of data - array
response variable
p : scalar integer (0 or 1)
Nadaraya-Watson estimator if p=0,
local linear estimator if p=1.
hs : array-like (optional)
smooting parameter vector/matrix.
(default compute from data using kernel.get_smoothing function)
kernel : kernel function object.
kernel must have get_smoothing method
alpha : real scalar (optional)
sensitivity parameter (default 0 regular KDE)
A good choice might be alpha = 0.5 ( or 1/D)
alpha = 0 Regular KDE (hs is constant)
0 < alpha <= 1 Adaptive KDE (Make hs change)
xmin, xmax : vectors
specifying the default argument range for the kde.eval_grid methods.
For the kde.eval_grid_fast methods the values must cover the range of the data.
(default min(data)-range(data)/4, max(data)-range(data)/4)
If a single value of xmin or xmax is given then the boundary is the is
the same for all dimensions.
inc : scalar integer
defining the default dimension of the output from kde.eval_grid methods (default 128)
(For kde.eval_grid_fast: A value below 50 is very fast to compute but
may give some inaccuracies. Values between 100 and 500 give very
accurate results)
Members
-------
d : int
number of dimensions
n : int
number of datapoints
Methods
-------
kde.eval_grid_fast(x0, x1,..., xd) : array
evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
kde.eval_grid(x0, x1,..., xd) : array
evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
kde.eval_points(points) : array
evaluate the estimated pdf on a provided set of points
kde(x0, x1,..., xd) : array
same as kde.eval_grid(x0, x1,..., xd)
Example
-------
>>> N = 100
>>> ei = np.random.normal(loc=0, scale=0.075, size=(N,))
>>> x = np.linspace(0, 1, N)
>>> import wafo.kdetools as wk
>>> y = 2*np.exp(-x**2/(2*0.3**2))+3*np.exp(-(x-1)**2/(2*0.7**2)) + ei
>>> kreg = wk.KRegression(x, y)
"""
def __init__(self, data, y, p=0, hs=None, kernel=None, alpha=0.0, xmin=None, xmax=None, inc=128, L2=None):
self.tkde = TKDE(data, hs, kernel, alpha, xmin, xmax, inc, L2)
self.y = y
self.p = p
def eval_grid_fast(self, *args, **kwds):
self._grdfun = self.tkde.eval_grid_fast
return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds)
def eval_grid(self, *args, **kwds):
self._grdfun = self.tkde.eval_grid
return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds)
def _eval_gridfun(self, *args, **kwds):
grdfun = self._grdfun
s0 = grdfun(*args, r=0)
t0 = grdfun(*args, r=0, y=self.y)
if self.p==0:
return t0 / s0
elif self.p==1:
s1 = grdfun(*args, r=1)
s2 = grdfun(*args, r=2)
t1 = grdfun(*args, r=1, y=self.y)
return (s2 * t0 -s1 * t1) / (s2 * s0 - s1**2)
__call__ = eval_grid_fast
class _Kernel(object): class _Kernel(object):
def __init__(self, r=1.0, stats=None): def __init__(self, r=1.0, stats=None):
@ -956,6 +1040,8 @@ class Kernel(object):
def _get_name(self): def _get_name(self):
return self.kernel.__class__.__name__.replace('_Kernel', '').title() return self.kernel.__class__.__name__.replace('_Kernel', '').title()
name = property(_get_name) name = property(_get_name)
def get_smoothing(self, *args, **kwds):
pass
def stats(self): def stats(self):
''' Return some 1D statistics of the kernel. ''' Return some 1D statistics of the kernel.
@ -1298,15 +1384,19 @@ class Kernel(object):
better for short tailed densities. better for short tailed densities.
However, STT method in contrast to LCV is insensitive to outliers. However, STT method in contrast to LCV is insensitive to outliers.
Example: Example
-------
x = rndnorm(0,1,50,1); x = rndnorm(0,1,50,1);
hs = hstt(x,'gauss'); hs = hstt(x,'gauss');
See also hste, hbcv, hboot, hos, hldpi, hlscv, hscv, kde, kdebin See also
--------
hste, hbcv, hboot, hos, hldpi, hlscv, hscv, kde, kdebin
Reference: Reference
---------
B. W. Silverman (1986) B. W. Silverman (1986)
'Density estimation for statistics and data analysis' 'Density estimation for statistics and data analysis'
Chapman and Hall, pp 57--61 Chapman and Hall, pp 57--61
@ -2426,6 +2516,54 @@ def kde_demo3():
pylab.figure(0) pylab.figure(0)
def kde_demo4(hs=None, fast=False):
N = 100
#ei = np.random.normal(loc=0, scale=0.075, size=(N,))
ei = np.array([-0.08508516, 0.10462496, 0.07694448, -0.03080661, 0.05777525,
0.06096313, -0.16572389, 0.01838912, -0.06251845, -0.09186784,
-0.04304887, -0.13365788, -0.0185279 , -0.07289167, 0.02319097,
0.06887854, -0.08938374, -0.15181813, 0.03307712, 0.08523183,
-0.0378058 , -0.06312874, 0.01485772, 0.06307944, -0.0632959 ,
0.18963205, 0.0369126 , -0.01485447, 0.04037722, 0.0085057 ,
-0.06912903, 0.02073998, 0.1174351 , 0.17599277, -0.06842139,
0.12587608, 0.07698113, -0.0032394 , -0.12045792, -0.03132877,
0.05047314, 0.02013453, 0.04080741, 0.00158392, 0.10237899,
-0.09069682, 0.09242174, -0.15445323, 0.09190278, 0.07138498,
0.03002497, 0.02495252, 0.01286942, 0.06449978, 0.03031802,
0.11754861, -0.02322272, 0.00455867, -0.02132251, 0.09119446,
-0.03210086, -0.06509545, 0.07306443, 0.04330647, 0.078111 ,
-0.04146907, 0.05705476, 0.02492201, -0.03200572, -0.02859788,
-0.05893749, 0.00089538, 0.0432551 , 0.04001474, 0.04888828,
-0.17708392, 0.16478644, 0.1171006 , 0.11664846, 0.01410477,
-0.12458953, -0.11692081, 0.0413047 , -0.09292439, -0.07042327,
0.14119701, -0.05114335, 0.04994696, -0.09520663, 0.04829406,
-0.01603065, -0.1933216 , 0.19352763, 0.11819496, 0.04567619,
-0.08348306, 0.00812816, -0.00908206, 0.14528945, 0.02901065])
x = np.linspace(0, 1, N)
y0 = 2*np.exp(-x**2/(2*0.3**2))+3*np.exp(-(x-1)**2/(2*0.7**2))
y = y0 + ei
kreg = KRegression(x, y, p=0, hs=hs)
kreg.tkde.kernel.get_smooting = kreg.tkde.kernel.hste
if fast:
kreg.__call__ = kreg.eval_grid_fast
f = kreg(output='plot', title='Kernel regression', plotflag=1)
pylab.figure(0)
f.plot(label='p=0')
kreg.p=1
f1 = kreg(output='plot', title='Kernel regression', plotflag=1)
f1.plot(label='p=1')
pylab.plot(x,y,'.', x,y0, 'k')
pylab.legend()
pylab.show()
print(kreg.tkde.tkde.inv_hs)
print(kreg.tkde.tkde.hs)
def test_docstrings(): def test_docstrings():
import doctest import doctest
@ -2433,4 +2571,4 @@ def test_docstrings():
if __name__ == '__main__': if __name__ == '__main__':
#test_docstrings() #test_docstrings()
kde_demo3() kde_demo4()
Loading…
Cancel
Save