Made a baseclass _KDE for KDE and TKDE + updated tests

master
Per.Andreas.Brodtkorb 14 years ago
parent f8c543b94e
commit 171a0cd0c6

@ -57,9 +57,172 @@ def sphere_volume(d, r=1.0):
'Kernel smoothing' 'Kernel smoothing'
Chapman and Hall, pp 105 Chapman and Hall, pp 105
""" """
return (r ** d) * 2. * pi ** (d / 2.) / (d * gamma(d / 2.)) return (r ** d) * 2.0 * pi ** (d / 2.0) / (d * gamma(d / 2.0))
class TKDE(object): class _KDE(object):
""" Kernel-Density Estimator base class.
Parameters
----------
data : (# of dims, # of data)-array
datapoints to estimate from
hs : array-like (optional)
smooting parameter vector/matrix.
(default compute from data using kernel.get_smoothing function)
kernel : kernel function object.
kernel must have get_smoothing method
alpha : real scalar (optional)
sensitivity parameter (default 0 regular KDE)
A good choice might be alpha = 0.5 ( or 1/D)
alpha = 0 Regular KDE (hs is constant)
0 < alpha <= 1 Adaptive KDE (Make hs change)
Members
-------
d : int
number of dimensions
n : int
number of datapoints
Methods
-------
kde.eval_grid_fast(x0, x1,..., xd) : array
evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
kde.eval_grid(x0, x1,..., xd) : array
evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
kde.eval_points(points) : array
evaluate the estimated pdf on a provided set of points
kde(x0, x1,..., xd) : array
same as kde.eval_grid(x0, x1,..., xd)
"""
def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, xmax=None, inc=128):
self.dataset = atleast_2d(data)
self.hs = hs
self.kernel = kernel if kernel else Kernel('gauss')
self.alpha = alpha
self.xmin = xmin
self.xmax = xmax
self.inc = inc
self.initialize()
def initialize(self):
self.d, self.n = self.dataset.shape
self._set_xlimits()
self._initialize()
def _initialize(self):
pass
def _set_xlimits(self):
amin = self.dataset.min(axis= -1)
amax = self.dataset.max(axis= -1)
iqr = iqrange(self.dataset, axis=-1)
sigma = np.minimum(np.std(self.dataset, axis=-1, ddof=1),iqr/1.34)
#xyzrange = amax - amin
#offset = xyzrange / 4.0
offset = 2*sigma
if self.xmin is None:
self.xmin = amin - offset
else:
self.xmin = self.xmin * np.ones(self.d)
if self.xmax is None:
self.xmax = amax + offset
else:
self.xmax = self.xmax * np.ones(self.d)
def eval_grid_fast(self, *args):
"""Evaluate the estimated pdf on a grid.
Parameters
----------
arg_0,arg_1,... arg_d-1 : vectors
Alternatively, if no vectors is passed in then
arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
Returns
-------
values : array-like
The values evaluated at meshgrid(*args).
"""
if len(args) == 0:
args = []
for i in range(self.d):
args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc))
self.args = args
return self._eval_grid_fast(*args)
def _eval_grid_fast(self, *args):
pass
def eval_grid(self, *args):
"""Evaluate the estimated pdf on a grid.
Parameters
----------
arg_0,arg_1,... arg_d-1 : vectors
Alternatively, if no vectors is passed in then
arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
Returns
-------
values : array-like
The values evaluated at meshgrid(*args).
"""
if len(args) == 0:
args = []
for i in range(self.d):
args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc))
self.args = args
return self._eval_grid(*args)
def _eval_grid(self, *args):
pass
def _check_shape(self, points):
points = atleast_2d(points)
d, m = points.shape
if d != self.d:
if d == 1 and m == self.d:
# points was passed in as a row vector
points = np.reshape(points, (self.d, 1))
else:
msg = "points have dimension %s, dataset has dimension %s" % (d,
self.d)
raise ValueError(msg)
return points
def eval_points(self, points):
"""Evaluate the estimated pdf on a set of points.
Parameters
----------
points : (# of dimensions, # of points)-array
Alternatively, a (# of dimensions,) vector can be passed in and
treated as a single point.
Returns
-------
values : (# of points,)-array
The values at each point.
Raises
------
ValueError if the dimensionality of the input points is different than
the dimensionality of the KDE.
"""
points = self._check_shape(points)
return self._eval_points(points)
def _eval_points(self, points):
pass
__call__ = eval_grid
class TKDE(_KDE):
""" Transformation Kernel-Density Estimator. """ Transformation Kernel-Density Estimator.
Parameters Parameters
@ -76,6 +239,17 @@ class TKDE(object):
A good choice might be alpha = 0.5 ( or 1/D) A good choice might be alpha = 0.5 ( or 1/D)
alpha = 0 Regular KDE (hs is constant) alpha = 0 Regular KDE (hs is constant)
0 < alpha <= 1 Adaptive KDE (Make hs change) 0 < alpha <= 1 Adaptive KDE (Make hs change)
xmin, xmax : vectors
specifying the default argument range for the kde.eval_grid methods.
For the kde.eval_grid_fast methods the values must cover the range of the data.
(default min(data)-range(data)/4, max(data)-range(data)/4)
If a single value of xmin or xmax is given then the boundary is the is
the same for all dimensions.
inc : scalar integer
defining the default dimension of the output from kde.eval_grid methods (default 128)
(For kde.eval_grid_fast: A value below 50 is very fast to compute but
may give some inaccuracies. Values between 100 and 500 give very
accurate results)
L2 : array-like L2 : array-like
vector of transformation parameters (default 1 no transformation) vector of transformation parameters (default 1 no transformation)
t(xi;L2) = xi^L2*sign(L2) for L2(i) ~= 0 t(xi;L2) = xi^L2*sign(L2) for L2(i) ~= 0
@ -91,10 +265,14 @@ class TKDE(object):
Methods Methods
------- -------
kde.evaluate(points) : array kde.eval_grid_fast(x0, x1,..., xd) : array
evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
kde.eval_grid(x0, x1,..., xd) : array
evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
kde.eval_points(points) : array
evaluate the estimated pdf on a provided set of points evaluate the estimated pdf on a provided set of points
kde(points) : array kde(x0, x1,..., xd) : array
same as kde.evaluate(points) same as kde.eval_grid(x0, x1,..., xd)
Example Example
@ -119,8 +297,8 @@ class TKDE(object):
0.20717946, 0.15907684, 0.1201074 , 0.08941027, 0.06574882]) 0.20717946, 0.15907684, 0.1201074 , 0.08941027, 0.06574882])
>>> kde.eval_grid_fast(x) >>> kde.eval_grid_fast(x)
array([ 0. , 0.4614821 , 0.39554839, 0.32764086, 0.26275681, array([ 1.06437223, 0.46203314, 0.39593137, 0.32781899, 0.26276433,
0.20543731, 0.15741056, 0.11863464, 0. , 0. ]) 0.20532206, 0.15723498, 0.11843998, 0.08797755, 0. ])
import pylab as plb import pylab as plb
h1 = plb.plot(x, f) # 1D probability density plot h1 = plb.plot(x, f) # 1D probability density plot
@ -129,19 +307,11 @@ class TKDE(object):
def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None,
xmax=None, inc=128, L2=None): xmax=None, inc=128, L2=None):
self.dataset = atleast_2d(data)
self.hs = hs
self.kernel = kernel if kernel else Kernel('gauss')
self.alpha = alpha
self.xmin = xmin
self.xmax = xmax
self.inc = inc
self.L2 = L2 self.L2 = L2
self.d, self.n = self.dataset.shape _KDE.__init__(self, data, hs, kernel, alpha, xmin, xmax, inc)
self.initialize()
def initialize(self): def _initialize(self):
self._set_xlimits() self._check_xmin()
tdataset = self._dat2gaus(self.dataset) tdataset = self._dat2gaus(self.dataset)
xmin = self.xmin xmin = self.xmin
if xmin is not None: if xmin is not None:
@ -151,38 +321,11 @@ class TKDE(object):
xmax = self._dat2gaus(xmax) xmax = self._dat2gaus(xmax)
self.tkde = KDE(tdataset, self.hs, self.kernel, self.alpha, xmin, xmax, self.tkde = KDE(tdataset, self.hs, self.kernel, self.alpha, xmin, xmax,
self.inc) self.inc)
def _set_xlimits(self): def _check_xmin(self):
amin = self.dataset.min(axis=-1)
amax = self.dataset.max(axis=-1)
xyzrange = amax-amin
offset = xyzrange/4.0
if self.xmin is None:
self.xmin = amin - offset
else:
self.xmin = self.xmin * np.ones(self.d)
if self.xmax is None:
self.xmax = amax + offset
else:
self.xmax = self.xmax * np.ones(self.d)
if self.L2 is not None: if self.L2 is not None:
amin = self.dataset.min(axis= -1)
L2 = np.atleast_1d(self.L2) * np.ones(self.d) # default no transformation L2 = np.atleast_1d(self.L2) * np.ones(self.d) # default no transformation
self.xmin = np.where(L2!=1, np.maximum(self.xmin, amin/2.0), self.xmin) self.xmin = np.where(L2 != 1, np.maximum(self.xmin, amin / 100.0), self.xmin)
def _check_shape(self, points):
points = atleast_2d(points)
d, m = points.shape
if d != self.d:
if d == 1 and m == self.d:
# points was passed in as a row vector
points = np.reshape(points, (self.d, 1))
m = 1
else:
msg = "points have dimension %s, dataset has dimension %s" % (d,
self.d)
raise ValueError(msg)
return points
def _dat2gaus(self, points): def _dat2gaus(self, points):
if self.L2 is None: if self.L2 is None:
@ -203,7 +346,7 @@ class TKDE(object):
points = copy.copy(tpoints) points = copy.copy(tpoints)
for i, v2 in enumerate(L2.tolist()): for i, v2 in enumerate(L2.tolist()):
points[i] = np.exp(tpoints[i]) if v2 == 0 else tpoints[i] ** (1.0/v2) points[i] = np.exp(tpoints[i]) if v2 == 0 else tpoints[i] ** (1.0 / v2)
return points return points
def _scale_pdf(self, pdf, points): def _scale_pdf(self, pdf, points):
@ -211,29 +354,32 @@ class TKDE(object):
return pdf return pdf
L2 = np.atleast_1d(self.L2) * np.ones(self.d) # default no transformation L2 = np.atleast_1d(self.L2) * np.ones(self.d) # default no transformation
for i, v2 in enumerate(L2.tolist()): for i, v2 in enumerate(L2.tolist()):
factor = v2 * np.sign(v2) if v2 else 1 factor = v2 * np.sign(v2) if v2 else 1
pdf *= np.where(v2 == 1, 1, points[i] ** (v2 - 1) * factor) pdf *= np.where(v2 == 1, 1, points[i] ** (v2 - 1) * factor)
if (np.abs(np.diff(pdf)).max() > 10).any(): if (np.abs(np.diff(pdf)).max() > 10).any():
msg = ''' Numerical problems may have occured due to the power msg = ''' Numerical problems may have occured due to the power
transformation. Check the KDE for spurious spikes''' transformation. Check the KDE for spurious spikes'''
warnings.warn(msg) warnings.warn(msg)
return pdf return pdf
def eval_grid_fast(self, *args):
def eval_grid_fast2(self, *args):
"""Evaluate the estimated pdf on a grid. """Evaluate the estimated pdf on a grid.
Parameters Parameters
---------- ----------
arg_0,arg_1,... arg_d-1 : vectors arg_0,arg_1,... arg_d-1 : vectors
Alternatively, if no vectors is passed in then Alternatively, if no vectors is passed in then
arg_i = linspace(self.xmin[i], self.xmax[i], self.inc) arg_i = gauss2dat(linspace(dat2gauss(self.xmin[i]), dat2gauss(self.xmax[i]), self.inc))
Returns Returns
------- -------
values : array-like values : array-like
The values evaluated at meshgrid(*args). The values evaluated at meshgrid(*args).
""" """
return self._eval_grid_fast(*args)
def _eval_grid_fast(self, *args):
if self.L2 is None: if self.L2 is None:
f = self.tkde.eval_grid_fast(*args) f = self.tkde.eval_grid_fast(*args)
self.args = self.tkde.args self.args = self.tkde.args
@ -241,48 +387,28 @@ class TKDE(object):
#targs = self._dat2gaus(list(args)) if len(args) else args #targs = self._dat2gaus(list(args)) if len(args) else args
tf = self.tkde.eval_grid_fast() tf = self.tkde.eval_grid_fast()
self.args = self._gaus2dat(list(self.tkde.args)) self.args = self._gaus2dat(list(self.tkde.args))
points = meshgrid(*self.args) if self.d>1 else self.args points = meshgrid(*self.args) if self.d > 1 else self.args
f = self._scale_pdf(tf, points) f = self._scale_pdf(tf, points)
if len(args): if len(args):
if self.d==1: if self.d == 1:
pdf = interpolate.interp1d(points[0],f, bounds_error=False, fill_value=0.0) pdf = interpolate.interp1d(points[0], f, bounds_error=False, fill_value=0.0)
elif self.d==2: elif self.d == 2:
pdf = interpolate.interp2d(points[0],points[1], f, bounds_error=False, fill_value=0.0) pdf = interpolate.interp2d(points[0], points[1], f, bounds_error=False, fill_value=0.0)
#ipoints = meshgrid(*args) if self.d>1 else args #ipoints = meshgrid(*args) if self.d>1 else args
fi = pdf(*args) fi = pdf(*args)
#fi.shape = ipoints[0].shape #fi.shape = ipoints[0].shape
return fi return fi
return f return f
def eval_grid(self, *args): def _eval_grid(self, *args):
"""Evaluate the estimated pdf on a grid.
Parameters
----------
arg_0,arg_1,... arg_d-1 : vectors
Alternatively, if no vectors is passed in then
arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
Returns
-------
values : array-like
The values evaluated at meshgrid(*args).
"""
if len(args)==0:
args = []
for i in range(self.d):
args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc))
self.args = args
if self.L2 is None: if self.L2 is None:
return self.tkde.eval_grid(*args) return self.tkde.eval_grid(*args)
targs = self._dat2gaus(list(args)) targs = self._dat2gaus(list(args))
tf = self.tkde.eval_grid(*targs) tf = self.tkde.eval_grid(*targs)
points = meshgrid(*args) if self.d>1 else self.args points = meshgrid(*args) if self.d > 1 else self.args
f = self._scale_pdf(tf, points) f = self._scale_pdf(tf, points)
return f return f
return self.tkde.eval_grid(*args) def _eval_points(self, points):
def evaluate(self, points):
"""Evaluate the estimated pdf on a set of points. """Evaluate the estimated pdf on a set of points.
Parameters Parameters
@ -302,16 +428,14 @@ class TKDE(object):
the dimensionality of the KDE. the dimensionality of the KDE.
""" """
if self.L2 is None: if self.L2 is None:
return self.tkde(points) return self.tkde.eval_points(points)
points = self._check_shape(points)
tpoints = self._dat2gaus(points) tpoints = self._dat2gaus(points)
tf = self.tkde(tpoints) tf = self.tkde.eval_points(tpoints)
f = self._scale_pdf(tf, points) f = self._scale_pdf(tf, points)
return f return f
__call__ = evaluate class KDE(_KDE):
class KDE(object):
""" Kernel-Density Estimator. """ Kernel-Density Estimator.
Parameters Parameters
@ -328,7 +452,17 @@ class KDE(object):
A good choice might be alpha = 0.5 ( or 1/D) A good choice might be alpha = 0.5 ( or 1/D)
alpha = 0 Regular KDE (hs is constant) alpha = 0 Regular KDE (hs is constant)
0 < alpha <= 1 Adaptive KDE (Make hs change) 0 < alpha <= 1 Adaptive KDE (Make hs change)
xmin, xmax : vectors
specifying the default argument range for the kde.eval_grid methods.
For the kde.eval_grid_fast methods the values must cover the range of the data.
(default min(data)-range(data)/4, max(data)-range(data)/4)
If a single value of xmin or xmax is given then the boundary is the is
the same for all dimensions.
inc : scalar integer
defining the default dimension of the output from kde.eval_grid methods (default 128)
(For kde.eval_grid_fast: A value below 50 is very fast to compute but
may give some inaccuracies. Values between 100 and 500 give very
accurate results)
Members Members
------- -------
@ -339,10 +473,14 @@ class KDE(object):
Methods Methods
------- -------
kde.evaluate(points) : array kde.eval_grid_fast(x0, x1,..., xd) : array
evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
kde.eval_grid(x0, x1,..., xd) : array
evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
kde.eval_points(points) : array
evaluate the estimated pdf on a provided set of points evaluate the estimated pdf on a provided set of points
kde(points) : array kde(x0, x1,..., xd) : array
same as kde.evaluate(points) same as kde.eval_grid(x0, x1,..., xd)
Example Example
@ -367,7 +505,7 @@ class KDE(object):
0.21409279, 0.12738463, 0.07460326, 0.03956191, 0.01887164]) 0.21409279, 0.12738463, 0.07460326, 0.03956191, 0.01887164])
>>> kde0 = wk.KDE(data, hs=0.5, alpha=0.0) >>> kde0 = wk.KDE(data, hs=0.5, alpha=0.0)
>>> kde0.evaluate(x) >>> kde0.eval_points(x)
array([ 0.2039735 , 0.40252503, 0.54595078, 0.52219649, 0.3906213 , array([ 0.2039735 , 0.40252503, 0.54595078, 0.52219649, 0.3906213 ,
0.26381501, 0.16407362, 0.08270612, 0.02991145, 0.00720821]) 0.26381501, 0.16407362, 0.08270612, 0.02991145, 0.00720821])
@ -377,8 +515,8 @@ class KDE(object):
>>> f = kde0.eval_grid_fast() >>> f = kde0.eval_grid_fast()
>>> np.interp(x, kde0.args[0], f) >>> np.interp(x, kde0.args[0], f)
array([ 0.21165996, 0.41218257, 0.54961961, 0.51713209, 0.38292245, array([ 0.21227584, 0.41256459, 0.5495661 , 0.5176579 , 0.38431616,
0.25864661, 0.16113184, 0.08055992, 0.03576856, 0.03576856]) 0.2591162 , 0.15978948, 0.07889179, 0.02769818, 0.00791829])
import pylab as plb import pylab as plb
h1 = plb.plot(x, f) # 1D probability density plot h1 = plb.plot(x, f) # 1D probability density plot
@ -386,41 +524,18 @@ class KDE(object):
""" """
def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, xmax=None, inc=128): def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, xmax=None, inc=128):
self.kernel = kernel if kernel else Kernel('gauss') _KDE.__init__(self, data, hs, kernel, alpha, xmin, xmax, inc)
self.hs = hs
self.alpha = alpha
self.dataset = atleast_2d(data) def _initialize(self):
self.d, self.n = self.dataset.shape
self.xmin = xmin
self.xmax = xmax
self.inc = inc
self.initialize()
def initialize(self):
self._set_xlimits()
self._compute_smoothing() self._compute_smoothing()
if self.alpha > 0: if self.alpha > 0:
pilot = KDE(self.dataset, hs=self.hs, kernel=self.kernel, alpha=0) pilot = KDE(self.dataset, hs=self.hs, kernel=self.kernel, alpha=0)
f = pilot(self.dataset) # get a pilot estimate by regular KDE (alpha=0) f = pilot.eval_points(self.dataset) # get a pilot estimate by regular KDE (alpha=0)
g = np.exp(np.mean(np.log(f))) g = np.exp(np.mean(np.log(f)))
self._lambda = (f / g) ** (-self.alpha) self._lambda = (f / g) ** (-self.alpha)
else: else:
self._lambda = np.ones(self.n) self._lambda = np.ones(self.n)
def _set_xlimits(self):
amin = self.dataset.min(axis=-1)
amax = self.dataset.max(axis=-1)
xyzrange = amax-amin
if self.xmin is None:
self.xmin = amin-xyzrange/4.0
else:
self.xmin = self.xmin * np.ones(self.d)
if self.xmax is None:
self.xmax = amax + xyzrange/4.0
else:
self.xmax = self.xmax * np.ones(self.d)
def _compute_smoothing(self): def _compute_smoothing(self):
"""Computes the smoothing matrix """Computes the smoothing matrix
""" """
@ -451,41 +566,21 @@ class KDE(object):
self.hs = h self.hs = h
self._norm_factor = deth * self.n self._norm_factor = deth * self.n
def eval_grid_fast(self, *args):
"""Evaluate the estimated pdf on a grid.
Parameters
----------
arg_0,arg_1,... arg_d-1 : vectors
Alternatively, if no vectors is passed in then
arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
Returns
-------
values : array-like
The values evaluated at meshgrid(*args).
"""
if len(args)==0:
args = []
for i in range(self.d):
args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc))
self.args = args
return self._eval_grid_fast(*args)
def _eval_grid_fast(self, *args): def _eval_grid_fast(self, *args):
# TODO: This does not work correctly yet! Check it. # TODO: This does not work correctly yet! Check it.
X = np.vstack(args) X = np.vstack(args)
d, inc = X.shape d, inc = X.shape
dx = X[:,1]-X[:,0] dx = X[:, 1] - X[:, 0]
Xn = [] Xn = []
nfft0 = 2*inc nfft0 = 2 * inc
nfft = (nfft0,)*d nfft = (nfft0,)*d
x0 = np.linspace(-inc, inc, nfft0+1) x0 = np.linspace(-inc, inc, nfft0 + 1)
for i in range(d): for i in range(d):
Xn.append(x0[:-1]*dx[i]) Xn.append(x0[:-1] * dx[i])
Xnc = meshgrid(*Xn) if d>1 else Xn Xnc = meshgrid(*Xn) if d > 1 else Xn
shape0 = Xnc[0].shape shape0 = Xnc[0].shape
for i in range(d): for i in range(d):
@ -494,7 +589,7 @@ class KDE(object):
Xn = np.dot(self.inv_hs, np.vstack(Xnc)) Xn = np.dot(self.inv_hs, np.vstack(Xnc))
# Obtain the kernel weights. # Obtain the kernel weights.
kw = self.kernel(Xn)/(self._norm_factor * self.kernel.norm_factor(d, self.n)) kw = self.kernel(Xn) / (self._norm_factor * self.kernel.norm_factor(d, self.n))
kw.shape = shape0 kw.shape = shape0
kw = np.fft.ifftshift(kw) kw = np.fft.ifftshift(kw)
fftn = np.fft.fftn fftn = np.fft.fftn
@ -504,58 +599,23 @@ class KDE(object):
c = gridcount(self.dataset, X) c = gridcount(self.dataset, X)
# Perform the convolution. # Perform the convolution.
z = np.real(ifftn(fftn(c,s=nfft)*fftn(kw))) z = np.real(ifftn(fftn(c, s=nfft) * fftn(kw)))
ix = (slice(0,inc),)*d ix = (slice(0, inc),)*d
return z[ix]*(z[ix]>0.0) return z[ix] * (z[ix] > 0.0)
def eval_grid(self, *args):
"""Evaluate the estimated pdf on a grid.
Parameters
----------
arg_0,arg_1,... arg_d-1 : vectors
Alternatively, if no vectors is passed in then
arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
Returns
-------
values : array-like
The values evaluated at meshgrid(*args).
"""
if len(args)==0:
args = []
for i in range(self.d):
args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc))
self.args = args
return self._eval_grid(*args)
def _eval_grid(self, *args): def _eval_grid(self, *args):
grd = meshgrid(*args) if len(args)>1 else list(args) grd = meshgrid(*args) if len(args) > 1 else list(args)
shape0 = grd[0].shape shape0 = grd[0].shape
d = len(grd) d = len(grd)
for i in range(d): for i in range(d):
grd[i] = grd[i].ravel() grd[i] = grd[i].ravel()
f = self.evaluate(np.vstack(grd)) f = self.eval_points(np.vstack(grd))
return f.reshape(shape0) return f.reshape(shape0)
def _check_shape(self, points):
points = atleast_2d(points) def _eval_points(self, points):
d, m = points.shape
if d != self.d:
if d == 1 and m == self.d:
# points was passed in as a row vector
points = np.reshape(points, (self.d, 1))
m = 1
else:
msg = "points have dimension %s, dataset has dimension %s" % (d,
self.d)
raise ValueError(msg)
return points
def evaluate(self, points):
"""Evaluate the estimated pdf on a set of points. """Evaluate the estimated pdf on a set of points.
Parameters Parameters
@ -574,8 +634,6 @@ class KDE(object):
ValueError if the dimensionality of the input points is different than ValueError if the dimensionality of the input points is different than
the dimensionality of the KDE. the dimensionality of the KDE.
""" """
points = self._check_shape(points)
d, m = points.shape d, m = points.shape
result = np.zeros((m,)) result = np.zeros((m,))
@ -598,8 +656,6 @@ class KDE(object):
return result return result
__call__ = evaluate
class _Kernel(object): class _Kernel(object):
def __init__(self, r=1.0, stats=None): def __init__(self, r=1.0, stats=None):
@ -624,7 +680,7 @@ class _Kernel(object):
''' '''
return self._effective_support() return self._effective_support()
def _effective_support(self): def _effective_support(self):
return -self.r, self.r return - self.r, self.r
__call__ = kernel __call__ = kernel
class _KernelMulti(_Kernel): class _KernelMulti(_Kernel):
@ -690,27 +746,27 @@ mkernel_triangular = _KernelTriangular(stats=_stats_tria)
class _KernelGaussian(_Kernel): class _KernelGaussian(_Kernel):
def _kernel(self, x): def _kernel(self, x):
sigma = self.r/4.0 sigma = self.r / 4.0
x2 = (x/sigma) ** 2 x2 = (x / sigma) ** 2
return exp(-0.5 * x2.sum(axis=0)) return exp(-0.5 * x2.sum(axis=0))
def norm_factor(self, d=1, n=None): def norm_factor(self, d=1, n=None):
sigma = self.r/4.0 sigma = self.r / 4.0
return (2 * pi * sigma) ** (d / 2.0) return (2 * pi * sigma) ** (d / 2.0)
def deriv4_6_8_10(self, t, numout=4): def deriv4_6_8_10(self, t, numout=4):
''' '''
Returns 4th, 6th, 8th and 10th derivatives of the kernel function. Returns 4th, 6th, 8th and 10th derivatives of the kernel function.
''' '''
phi0 = exp(-0.5*t**2)/sqrt(2*pi) phi0 = exp(-0.5 * t ** 2) / sqrt(2 * pi)
p4 = [1, 0, -6, 0, +3] p4 = [1, 0, -6, 0, +3]
p4val = np.polyval(p4,t)*phi0 p4val = np.polyval(p4, t) * phi0
if numout==1: if numout == 1:
return p4val return p4val
out = [p4val] out = [p4val]
pn = p4 pn = p4
for ix in range(numout-1): for ix in range(numout - 1):
pnp1 = np.polyadd(-np.r_[pn, 0], np.polyder(pn)) pnp1 = np.polyadd(-np.r_[pn, 0], np.polyder(pn))
pnp2 = np.polyadd(-np.r_[pnp1, 0], np.polyder(pnp1)) pnp2 = np.polyadd(-np.r_[pnp1, 0], np.polyder(pnp1))
out.append(np.polyval(pnp2, t)*phi0) out.append(np.polyval(pnp2, t) * phi0)
pn = pnp2 pn = pnp2
return out return out
@ -898,7 +954,7 @@ class Kernel(object):
# R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x))
mu2, R, Rdd = self.stats() mu2, R, Rdd = self.stats()
AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5) AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
iqr = np.abs(np.percentile(A, 75, axis=1) - np.percentile(A, 25, axis=1))# interquartile range iqr = iqrange(A, axis=1) # interquartile range
stdA = np.std(A, axis=1, ddof=1) stdA = np.std(A, axis=1, ddof=1)
# % use of interquartile range guards against outliers. # % use of interquartile range guards against outliers.
# % the use of interquartile range is better if # % the use of interquartile range is better if
@ -1043,34 +1099,34 @@ class Kernel(object):
# TODO: replace the iteration in the end with a Newton Raphson scheme # TODO: replace the iteration in the end with a Newton Raphson scheme
A = np.atleast_2d(data) A = np.atleast_2d(data)
d, n= A.shape d, n = A.shape
# R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x))
mu2, R, Rdd = self.stats() mu2, R, Rdd = self.stats()
AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5) AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
STEconstant = R /(mu2**(2)*n) STEconstant = R / (mu2 ** (2) * n)
sigmaA = self.hns(A)/AMISEconstant sigmaA = self.hns(A) / AMISEconstant
if h0 is None: if h0 is None:
h0 = sigmaA*AMISEconstant h0 = sigmaA * AMISEconstant
h = np.asarray(h0, dtype=float) h = np.asarray(h0, dtype=float)
nfft = inc*2 nfft = inc * 2
amin = A.min(axis=1) # Find the minimum value of A. amin = A.min(axis=1) # Find the minimum value of A.
amax = A.max(axis=1) #Find the maximum value of A. amax = A.max(axis=1) #Find the maximum value of A.
arange = amax-amin # Find the range of A. arange = amax - amin # Find the range of A.
#% xa holds the x 'axis' vector, defining a grid of x values where #% xa holds the x 'axis' vector, defining a grid of x values where
#% the k.d. function will be evaluated. #% the k.d. function will be evaluated.
ax1 = amin-arange/8.0 ax1 = amin - arange / 8.0
bx1 = amax+arange/8.0 bx1 = amax + arange / 8.0
kernel2 = Kernel('gaus') kernel2 = Kernel('gauss')
mu2,R,Rdd = kernel2.stats() mu2, R, Rdd = kernel2.stats()
STEconstant2 = R /(mu2**(2)*n) STEconstant2 = R / (mu2 ** (2) * n)
fft = np.fft.fft fft = np.fft.fft
ifft = np.fft.ifft ifft = np.fft.ifft
@ -1079,61 +1135,61 @@ class Kernel(object):
ax = ax1[dim] ax = ax1[dim]
bx = bx1[dim] bx = bx1[dim]
xa = np.linspace(ax,bx,inc) xa = np.linspace(ax, bx, inc)
xn = np.linspace(0,bx-ax,inc) xn = np.linspace(0, bx - ax, inc)
c = gridcount(A[dim],xa) c = gridcount(A[dim], xa)
# Step 1 # Step 1
psi6NS = -15/(16*sqrt(pi)*s**7) psi6NS = -15 / (16 * sqrt(pi) * s ** 7)
psi8NS = 105/(32*sqrt(pi)*s**9) psi8NS = 105 / (32 * sqrt(pi) * s ** 9)
# Step 2 # Step 2
k40, k60 = kernel2.deriv4_6_8_10(0, numout=2) k40, k60 = kernel2.deriv4_6_8_10(0, numout=2)
g1 = (-2*k40/(mu2*psi6NS*n))**(1.0/7) g1 = (-2 * k40 / (mu2 * psi6NS * n)) ** (1.0 / 7)
g2 = (-2*k60/(mu2*psi8NS*n))**(1.0/9) g2 = (-2 * k60 / (mu2 * psi8NS * n)) ** (1.0 / 9)
# Estimate psi6 given g2. # Estimate psi6 given g2.
kw4, kw6 = kernel2.deriv4_6_8_10(xn/g2, numout=2) # kernel weights. kw4, kw6 = kernel2.deriv4_6_8_10(xn / g2, numout=2) # kernel weights.
kw = np.r_[kw6,0,kw6[-1:0:-1]] # Apply fftshift to kw. kw = np.r_[kw6, 0, kw6[-1:0:-1]] # Apply fftshift to kw.
z = np.real(ifft(fft(c,nfft)*fft(kw))) # convolution. z = np.real(ifft(fft(c, nfft) * fft(kw))) # convolution.
psi6 = np.sum(c*z[:inc])/(n*(n-1)*g2**7) psi6 = np.sum(c * z[:inc]) / (n * (n - 1) * g2 ** 7)
# Estimate psi4 given g1. # Estimate psi4 given g1.
kw4 = kernel2.deriv4_6_8_10(xn/g1, numout=1) # kernel weights. kw4 = kernel2.deriv4_6_8_10(xn / g1, numout=1) # kernel weights.
kw = np.r_[kw4,0,kw4[-1:0:-1]] #Apply 'fftshift' to kw. kw = np.r_[kw4, 0, kw4[-1:0:-1]] #Apply 'fftshift' to kw.
z = np.real(ifft(fft(c,nfft)*fft(kw))) # convolution. z = np.real(ifft(fft(c, nfft) * fft(kw))) # convolution.
psi4 = np.sum(c*z[:inc])/(n*(n-1)*g1**5) psi4 = np.sum(c * z[:inc]) / (n * (n - 1) * g1 ** 5)
h1 = h[dim] h1 = h[dim]
h_old = 0 h_old = 0
count = 0 count = 0
while ((abs(h_old-h1)>max(releps*h1,abseps)) and (count < maxit)): while ((abs(h_old - h1) > max(releps * h1, abseps)) and (count < maxit)):
count += 1 count += 1
h_old = h1 h_old = h1
# Step 3 # Step 3
gamma=((2*k40*mu2*psi4*h1**5)/(-psi6*R))**(1.0/7) gamma = ((2 * k40 * mu2 * psi4 * h1 ** 5) / (-psi6 * R)) ** (1.0 / 7)
# Now estimate psi4 given gamma. # Now estimate psi4 given gamma.
kw4 = kernel2.deriv4_6_8_10(xn/gamma, numout=1) #kernel weights. kw4 = kernel2.deriv4_6_8_10(xn / gamma, numout=1) #kernel weights.
kw = np.r_[kw4,0,kw4[-1:0:-1]] # Apply 'fftshift' to kw. kw = np.r_[kw4, 0, kw4[-1:0:-1]] # Apply 'fftshift' to kw.
z = np.real(ifft(fft(c,nfft)*fft(kw))) # convolution. z = np.real(ifft(fft(c, nfft) * fft(kw))) # convolution.
psi4Gamma = np.sum(c*z[:inc])/(n*(n-1)*gamma**5) psi4Gamma = np.sum(c * z[:inc]) / (n * (n - 1) * gamma ** 5)
# Step 4 # Step 4
h1 = (STEconstant2/psi4Gamma)**(1.0/5) h1 = (STEconstant2 / psi4Gamma) ** (1.0 / 5)
# Kernel other than Gaussian scale bandwidth # Kernel other than Gaussian scale bandwidth
h1 = h1*(STEconstant/STEconstant2)**(1.0/5) h1 = h1 * (STEconstant / STEconstant2) ** (1.0 / 5)
if count>= maxit: if count >= maxit:
warnings.warn('The obtained value did not converge.') warnings.warn('The obtained value did not converge.')
h[dim] = h1 h[dim] = h1
@ -1142,9 +1198,9 @@ class Kernel(object):
def norm_factor(self, d=1, n=None): def norm_factor(self, d=1, n=None):
return self.kernel.norm_factor(d, n) return self.kernel.norm_factor(d, n)
def evaluate(self, X): def eval_points(self, points):
return self.kernel(np.atleast_2d(X)) return self.kernel(np.atleast_2d(points))
__call__ = evaluate __call__ = eval_points
def mkernel(X, kernel): def mkernel(X, kernel):
''' '''
@ -1297,6 +1353,39 @@ def accum(accmap, a, func=None, size=None, fill_value=0, dtype=None):
return out return out
def iqrange(data, axis=None):
'''
Returns the Inter Quartile Range of data
Parameters
----------
data : array-like
Input array or object that can be converted to an array.
axis : {None, int}, optional
Axis along which the percentiles are computed. The default (axis=None)
is to compute the median along a flattened version of the array.
Returns
-------
r : array-like
abs(np.percentile(data, 75, axis)-np.percentile(data, 25, axis))
Notes
-----
IQRANGE is a robust measure of spread. The use of interquartile range
guards against outliers if the distribution have heavy tails.
Example
-------
>>> a = np.arange(101)
>>> iqrange(a)
50.0
See also
--------
np.std
'''
return np.abs(np.percentile(data, 75, axis=axis)-np.percentile(data, 25, axis=axis))
def bitget(int_type, offset): def bitget(int_type, offset):
''' '''
Returns the value of the bit at the offset position in int_type. Returns the value of the bit at the offset position in int_type.

@ -25,14 +25,16 @@ def test0_KDE1D():
>>> kde0.eval_grid(x) >>> kde0.eval_grid(x)
array([ 0.2039735 , 0.40252503, 0.54595078, 0.52219649, 0.3906213 , array([ 0.2039735 , 0.40252503, 0.54595078, 0.52219649, 0.3906213 ,
0.26381501, 0.16407362, 0.08270612, 0.02991145, 0.00720821]) 0.26381501, 0.16407362, 0.08270612, 0.02991145, 0.00720821])
>>> kde0.eval_grid_fast(x)
array([ 0.32343789, 0.51366167, 0.55643329, 0.43688805, 0.28972471,
0.19445277, 0.12473331, 0.06195215, 0.02087712, 0.00449567])
>>> f = kde0.eval_grid_fast(); f >>> f = kde0.eval_grid_fast(); f
array([ 0.07264948, 0.14135253, 0.24141397, 0.36045498, 0.46962192, array([ 0.02076721, 0.0612371 , 0.14515308, 0.27604202, 0.42001793,
0.53604004, 0.5427015 , 0.49767387, 0.42419428, 0.34349993, 0.51464781, 0.52131018, 0.45976136, 0.37621768, 0.29589521,
0.26650289, 0.19666903, 0.13569857, 0.0857818 , 0.04868357, 0.21985316, 0.1473364 , 0.08502256, 0.04063749, 0.0155788 ,
0.02432961]) 0.00466938])
>>> np.trapz(f,kde0.args) >>> np.trapz(f,kde0.args)
array([ 0.97384215]) array([ 0.99416766])
''' '''
def test1_TKDE1D(): def test1_TKDE1D():
''' '''

Loading…
Cancel
Save