Small updates

master
Per.Andreas.Brodtkorb 13 years ago
parent 5d8c259ce6
commit 485ecfcd2f

@ -747,8 +747,6 @@ def cycle(*figs,**kwds):
_show_windows(wnds, win32con.SW_SHOWNORMAL) _show_windows(wnds, win32con.SW_SHOWNORMAL)
if __name__ == '__main__': if __name__ == '__main__':
import doctest import doctest
doctest.testmod() doctest.testmod()

@ -19,7 +19,7 @@ import scipy.optimize as optimize
from wafo.misc import meshgrid from wafo.misc import meshgrid
from wafo.wafodata import WafoData from wafo.wafodata import WafoData
from dctpack import dct from dctpack import dct, dctn, idctn
import copy import copy
import numpy as np import numpy as np
@ -63,7 +63,220 @@ def sphere_volume(d, r=1.0):
Chapman and Hall, pp 105 Chapman and Hall, pp 105
""" """
return (r ** d) * 2.0 * pi ** (d / 2.0) / (d * gamma(d / 2.0)) return (r ** d) * 2.0 * pi ** (d / 2.0) / (d * gamma(d / 2.0))
class KDEgauss(object):
""" Kernel-Density Estimator base class.
Parameters
----------
data : (# of dims, # of data)-array
datapoints to estimate from
hs : array-like (optional)
smooting parameter vector/matrix.
(default compute from data using kernel.get_smoothing function)
alpha : real scalar (optional)
sensitivity parameter (default 0 regular KDE)
A good choice might be alpha = 0.5 ( or 1/D)
alpha = 0 Regular KDE (hs is constant)
0 < alpha <= 1 Adaptive KDE (Make hs change)
Members
-------
d : int
number of dimensions
n : int
number of datapoints
Methods
-------
kde.eval_grid_fast(x0, x1,..., xd) : array
evaluate the estimated pdf on meshgrid(x0, x1,..., xd)
kde(x0, x1,..., xd) : array
same as kde.eval_grid_fast(x0, x1,..., xd)
"""
def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, xmax=None, inc=128):
self.dataset = atleast_2d(data)
self.hs = hs
self.kernel = kernel if kernel else Kernel('gauss')
self.alpha = alpha
self.xmin = xmin
self.xmax = xmax
self.inc = inc
self.initialize()
def initialize(self):
self.d, self.n = self.dataset.shape
self._set_xlimits()
self._initialize()
def _initialize(self):
self._compute_smoothing()
def _compute_smoothing(self):
"""Computes the smoothing matrix
"""
get_smoothing = self.kernel.get_smoothing
h = self.hs
if h is None:
h = get_smoothing(self.dataset)
h = np.atleast_1d(h)
hsiz = h.shape
if (len(hsiz) == 1) or (self.d == 1):
if max(hsiz) == 1:
h = h * np.ones(self.d)
else:
h.shape = (self.d,) # make sure it has the correct dimension
# If h negative calculate automatic values
ind, = np.where(h <= 0)
for i in ind.tolist(): #
h[i] = get_smoothing(self.dataset[i])
deth = h.prod()
self.inv_hs = linalg.diag(1.0 / h)
else: #fully general smoothing matrix
deth = linalg.det(h)
if deth <= 0:
raise ValueError('bandwidth matrix h must be positive definit!')
self.inv_hs = linalg.inv(h)
self.hs = h
self._norm_factor = deth * self.n
def _set_xlimits(self):
amin = self.dataset.min(axis= -1)
amax = self.dataset.max(axis= -1)
iqr = iqrange(self.dataset, axis= -1)
sigma = np.minimum(np.std(self.dataset, axis= -1, ddof=1), iqr / 1.34)
#xyzrange = amax - amin
#offset = xyzrange / 4.0
offset = 2 * sigma
if self.xmin is None:
self.xmin = amin - offset
else:
self.xmin = self.xmin * np.ones((self.d,1))
if self.xmax is None:
self.xmax = amax + offset
else:
self.xmax = self.xmax * np.ones((self.d,1))
def eval_grid_fast(self, *args, **kwds):
"""Evaluate the estimated pdf on a grid.
Parameters
----------
arg_0,arg_1,... arg_d-1 : vectors
Alternatively, if no vectors is passed in then
arg_i = linspace(self.xmin[i], self.xmax[i], self.inc)
output : string optional
'value' if value output
'data' if object output
Returns
-------
values : array-like
The values evaluated at meshgrid(*args).
"""
if len(args) == 0:
args = []
for i in range(self.d):
args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc))
self.args = args
return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)
def _eval_grid_fast(self, *args, **kwds):
X = np.vstack(args)
d, inc = X.shape
dx = X[:, 1] - X[:, 0]
R = X.max(axis=-1)- X.min(axis=-1)
t_star = (self.hs/R)**2
I = (np.asfarray(np.arange(0, inc))*pi)**2
In = []
for i in range(d):
In.append(I * t_star[i] * 0.5)
Inc = meshgrid(*In) if d > 1 else In
kw = np.zeros((inc,)*d)
for i in range(d):
kw += exp(-Inc[i])
y = kwds.get('y', 1.0)
d, n = self.dataset.shape
# Find the binned kernel weights, c.
c = gridcount(self.dataset, X, y=y)/n
# Perform the convolution.
at = dctn(c) * kw
z = idctn(at)*at.size/np.prod(R)
return z*(z>0.0)
#ix = (slice(0, inc),)*d
#return z[ix] * (z[ix] > 0.0)
def _eval_grid_fun(self, eval_grd, *args, **kwds):
output = kwds.pop('output', 'value')
f = eval_grd(*args, **kwds)
if output == 'value':
return f
else:
titlestr = 'Kernel density estimate (%s)' % self.kernel.name
kwds2 = dict(title=titlestr)
kwds2['plot_kwds'] = dict(plotflag=1)
kwds2.update(**kwds)
args = self.args
if self.d == 1:
args = args[0]
wdata = WafoData(f, args, **kwds2)
if self.d > 1:
PL = np.r_[10:90:20, 95, 99, 99.9]
try:
ql = qlevels(f, p=PL)
wdata.clevels = ql
wdata.plevels = PL
except:
pass
return wdata
def _check_shape(self, points):
points = atleast_2d(points)
d, m = points.shape
if d != self.d:
if d == 1 and m == self.d:
# points was passed in as a row vector
points = np.reshape(points, (self.d, 1))
else:
msg = "points have dimension %s, dataset has dimension %s" % (d,
self.d)
raise ValueError(msg)
return points
def eval_points(self, points, **kwds):
"""Evaluate the estimated pdf on a set of points.
Parameters
----------
points : (# of dimensions, # of points)-array
Alternatively, a (# of dimensions,) vector can be passed in and
treated as a single point.
Returns
-------
values : (# of points,)-array
The values at each point.
Raises
------
ValueError if the dimensionality of the input points is different than
the dimensionality of the KDE.
"""
points = self._check_shape(points)
return self._eval_points(points, **kwds)
def _eval_points(self, points, **kwds):
pass
__call__ = eval_grid_fast
class _KDE(object): class _KDE(object):
""" Kernel-Density Estimator base class. """ Kernel-Density Estimator base class.
@ -341,7 +554,7 @@ class TKDE(_KDE):
def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None,
xmax=None, inc=128, L2=None): xmax=None, inc=128, L2=None):
self.L2 = L2 self.L2 = L2
_KDE.__init__(self, data, hs, kernel, alpha, xmin, xmax, inc) super(TKDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc)
def _initialize(self): def _initialize(self):
self._check_xmin() self._check_xmin()
@ -574,7 +787,7 @@ class KDE(_KDE):
""" """
def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, xmax=None, inc=128): def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, xmax=None, inc=128):
_KDE.__init__(self, data, hs, kernel, alpha, xmin, xmax, inc) super(KDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc)
def _initialize(self): def _initialize(self):
self._compute_smoothing() self._compute_smoothing()
@ -638,11 +851,14 @@ class KDE(_KDE):
Xn = np.dot(self.inv_hs, np.vstack(Xnc)) Xn = np.dot(self.inv_hs, np.vstack(Xnc))
# Obtain the kernel weights. # Obtain the kernel weights.
kw = self.kernel(Xn)
norm_fact = (kw.sum()*dx.prod()*self.n)
norm_fact2 = (self._norm_factor * self.kernel.norm_factor(d, self.n))
kw = kw/norm_fact
r = kwds.get('r', 0) r = kwds.get('r', 0)
if r==0: if r!=0:
kw = self.kernel(Xn) / (self._norm_factor * self.kernel.norm_factor(d, self.n)) kw *= np.vstack(Xnc) ** r
else:
kw = np.vstack(Xnc) ** r * self.kernel(Xn) / (self._norm_factor * self.kernel.norm_factor(d, self.n))
kw.shape = shape0 kw.shape = shape0
kw = np.fft.ifftshift(kw) kw = np.fft.ifftshift(kw)
fftn = np.fft.fftn fftn = np.fft.fftn
@ -1434,7 +1650,8 @@ class Kernel(object):
c = gridcount(A[dim], xa) c = gridcount(A[dim], xa)
N = len(set(A[dim])) N = len(set(A[dim]))
a = dct(c/c.sum(), norm=None) #a = dct(c/c.sum(), norm=None)
a = dct(c/len(A[dim]), norm=None)
#% now compute the optimal bandwidth^2 using the referenced method #% now compute the optimal bandwidth^2 using the referenced method
I = np.asfarray(np.arange(1, inc))**2 I = np.asfarray(np.arange(1, inc))**2
@ -2551,8 +2768,8 @@ def kde_demo1():
import scipy.stats as st import scipy.stats as st
x = np.linspace(-4, 4, 101) x = np.linspace(-4, 4, 101)
x0 = x / 2.0 x0 = x / 2.0
data = np.random.normal(loc=0, scale=1.0, size=7) #rndnorm(0,1,7,1); data = np.random.normal(loc=0, scale=1.0, size=7)
kernel = Kernel('gaus') kernel = Kernel('gauss')
hs = kernel.hns(data) hs = kernel.hns(data)
hVec = [hs / 2, hs, 2 * hs] hVec = [hs / 2, hs, 2 * hs]
@ -2743,6 +2960,58 @@ def kreg_demo1(hs=None, fast=False, fun='hisj'):
print(kreg.tkde.tkde.inv_hs) print(kreg.tkde.tkde.inv_hs)
print(kreg.tkde.tkde.hs) print(kreg.tkde.tkde.hs)
def kde_gauss_demo(n=50000):
'''
KDEDEMO Demonstrate the KDEgauss
KDEDEMO1 shows the true density (dotted) compared to KDE based on 7
observations (solid) and their individual kernels (dashed) for 3
different values of the smoothing parameter, hs.
'''
import scipy.stats as st
#x = np.linspace(-4, 4, 101)
#data = np.random.normal(loc=0, scale=1.0, size=n)
#data = np.random.exponential(scale=1.0, size=n)
# n1 = 128
# I = (np.arange(n1)*pi)**2 *0.01*0.5
# kw = exp(-I)
# pylab.plot(idctn(kw))
# return
dist = st.norm
dist = st.expon
data = dist.rvs(loc=0, scale=1.0, size=n)
d, N = np.atleast_2d(data).shape
if d==1:
plot_options = [dict(color='red'), dict(color='green'), dict(color='black')]
else:
plot_options = [dict(colors='red'), dict(colors='green'), dict(colors='black')]
pylab.figure(1)
kde0 = KDE(data, kernel=Kernel('gauss', 'hisj'))
f0 = kde0.eval_grid_fast(output='plot', ylab='Density')
f0.plot(**plot_options[0])
kde1 = TKDE(data, kernel=Kernel('gauss', 'hisj'), L2=0)
f1 = kde1.eval_grid_fast(output='plot', ylab='Density')
f1.plot(**plot_options[1])
kde2 = KDEgauss(data)
f2 = kde2(output='plot', ylab='Density')
x = f2.args
f2.plot(**plot_options[2])
fmax = dist.pdf(x, 0, 1).max()
if d==1:
pylab.plot(x, dist.pdf(x, 0, 1), 'k:')
pylab.axis([x.min(), x.max(), 0, fmax])
pylab.show()
print(fmax/f2.data.max())
format_ = ''.join(('%g, ')*d)
format_ = 'hs0=%s hs2=%s' % (format_,format_)
print(format_ % tuple(kde0.hs.tolist()+kde2.hs.tolist()))
def test_docstrings(): def test_docstrings():
import doctest import doctest
doctest.testmod() doctest.testmod()
@ -2750,4 +3019,5 @@ def test_docstrings():
if __name__ == '__main__': if __name__ == '__main__':
#test_docstrings() #test_docstrings()
#kde_demo2() #kde_demo2()
kreg_demo1() #kreg_demo1(fast=True)
kde_gauss_demo()

@ -190,7 +190,7 @@ class Plotter_1d(object):
def plot(self, wdata, *args, **kwds): def plot(self, wdata, *args, **kwds):
plotflag = kwds.pop('plotflag', False) plotflag = kwds.pop('plotflag', False)
if plotflag: if plotflag:
h1 = self._plot(plotflag, wdata, **kwds) h1 = self._plot(plotflag, wdata, *args, **kwds)
else: else:
if isinstance(wdata.args, (list, tuple)): if isinstance(wdata.args, (list, tuple)):
args1 = tuple((wdata.args)) + (wdata.data,) + args args1 = tuple((wdata.args)) + (wdata.data,) + args

Loading…
Cancel
Save