Small updates

master
per.andreas.brodtkorb 14 years ago
parent 111d6ce808
commit 39023b0ef8

@ -18,6 +18,7 @@ from scipy import linalg
from scipy.special import gamma from scipy.special import gamma
from misc import tranproc, trangood from misc import tranproc, trangood
from itertools import product from itertools import product
from wafo.misc import meshgrid
_stats_epan = (1. / 5, 3. / 5, np.inf) _stats_epan = (1. / 5, 3. / 5, np.inf)
_stats_biwe = (1. / 7, 5. / 7, 45. / 2) _stats_biwe = (1. / 7, 5. / 7, 45. / 2)
@ -300,7 +301,16 @@ class KDE(object):
self.inv_hs = linalg.inv(h) self.inv_hs = linalg.inv(h)
self.hs = h self.hs = h
self._norm_factor = deth * self.n self._norm_factor = deth * self.n
def eval_grid(self, *args):
grd = meshgrid(*args)
shape0 = grd[0].shape
d = len(grd)
for i in range(d):
grd[i] = grd[i].ravel()
f = self.evaluate(np.vstack(grd))
return f.reshape(shape0)
def _check_shape(self, points): def _check_shape(self, points):
points = atleast_2d(points) points = atleast_2d(points)
d, m = points.shape d, m = points.shape
@ -359,7 +369,12 @@ class KDE(object):
__call__ = evaluate __call__ = evaluate
class KDEBIN(KDE):
def __init__(self, dataset, hs=None, kernel=None, alpha=0.0, inc=128):
KDE.__init__(self, dataset, hs, kernel, alpha)
self.inc = inc
def evaluate(self, *args):
pass
class _Kernel(object): class _Kernel(object):
def __init__(self, r=1.0, stats=None): def __init__(self, r=1.0, stats=None):
self.r = r # radius of kernel self.r = r # radius of kernel
@ -371,6 +386,8 @@ class _Kernel(object):
return self._kernel(X) / self.norm_factor(*X.shape) return self._kernel(X) / self.norm_factor(*X.shape)
def kernel(self, x): def kernel(self, x):
return self._kernel(np.atleast_2d(x)) return self._kernel(np.atleast_2d(x))
def deriv4_6_8_10(self, t, numout=4):
raise Exception('Method not implemented for this kernel!')
__call__ = kernel __call__ = kernel
class _KernelMulti(_Kernel): class _KernelMulti(_Kernel):
@ -440,6 +457,24 @@ class _KernelGaussian(_Kernel):
return exp(-0.5 * x2.sum(axis=0)) return exp(-0.5 * x2.sum(axis=0))
def norm_factor(self, d=1, n=None): def norm_factor(self, d=1, n=None):
return (2 * pi) ** (d / 2.0) return (2 * pi) ** (d / 2.0)
def deriv4_6_8_10(self, t, numout=4):
'''
Returns 4th, 6th, 8th and 10th derivatives of the kernel function.
'''
phi0 = exp(-0.5*t**2)/sqrt(2*pi)
p4 = [1, 0, -6, 0, +3]
p4val = np.polyval(p4,t)*phi0
if numout==1:
return p4val
out = [p4val]
pn = p4
for ix in range(numout-1):
pnp1 = np.polyadd(-np.r_[pn, 0], np.polyder(pn))
pnp2 = np.polyadd(-np.r_[pnp1, 0], np.polyder(pnp1))
out.append(np.polyval(pnp2, t)*phi0)
pn = pnp2
return out
mkernel_gaussian = _KernelGaussian(stats=_stats_gaus) mkernel_gaussian = _KernelGaussian(stats=_stats_gaus)
#def mkernel_gaussian(X): #def mkernel_gaussian(X):
@ -499,6 +534,13 @@ class Kernel(object):
Examples Examples
-------- --------
N = 20
data = np.random.rayleigh(1, size=(N,))
>>> data = array([ 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887,
... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326,
... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487,
... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452])
>>> Kernel('gaussian').stats() >>> Kernel('gaussian').stats()
(1, 0.28209479177387814, 0.21157109383040862) (1, 0.28209479177387814, 0.21157109383040862)
>>> Kernel('laplace').stats() >>> Kernel('laplace').stats()
@ -510,11 +552,17 @@ class Kernel(object):
>>> triweight(np.linspace(-1,1,11)) >>> triweight(np.linspace(-1,1,11))
array([ 0. , 0.046656, 0.262144, 0.592704, 0.884736, 1. , array([ 0. , 0.046656, 0.262144, 0.592704, 0.884736, 1. ,
0.884736, 0.592704, 0.262144, 0.046656, 0. ]) 0.884736, 0.592704, 0.262144, 0.046656, 0. ])
>>> triweight.hns(np.random.normal(size=100)) >>> triweight.hns(data)
array([ 0.82087056])
>>> triweight.hos(data)
array([ 0.88265652])
>>> triweight.hste(data)
array([ 0.56570278])
See also See also
-------- --------
mkernel mkernel
References References
---------- ----------
B. W. Silverman (1986) B. W. Silverman (1986)
@ -554,6 +602,8 @@ class Kernel(object):
return self.kernel.stats return self.kernel.stats
#name = self.name[2:6] if self.name[:2].lower() == 'p1' else self.name[:4] #name = self.name[2:6] if self.name[:2].lower() == 'p1' else self.name[:4]
#return _KERNEL_STATS_DICT[name.lower()] #return _KERNEL_STATS_DICT[name.lower()]
def deriv4_6_8_10(self, t, numout=4):
return self.kernel.deriv4_6_8_10(t, numout)
def hns(self, data): def hns(self, data):
''' '''
@ -719,7 +769,134 @@ class Kernel(object):
covA = scipy.cov(A) covA = scipy.cov(A)
return a * linalg.sqrtm(covA) * n * (-1. / (d + 4)) return a * linalg.sqrtm(covA) * n * (-1. / (d + 4))
def hste(self, data, h0=None, inc=128, maxit=100, releps=0.01, abseps=0.0):
'''HSTE 2-Stage Solve the Equation estimate of smoothing parameter.
CALL: hs = hste(data,kernel,h0)
hs = one dimensional value for smoothing parameter
given the data and kernel. size 1 x D
data = data matrix, size N x D (D = # dimensions )
kernel = 'gaussian' - Gaussian kernel (default)
( currently the only supported kernel)
h0 = initial starting guess for hs (default h0=hns(A,kernel))
Example:
x = rndnorm(0,1,50,1);
hs = hste(x,'gauss');
See also hbcv, hboot, hos, hldpi, hlscv, hscv, hstt, kde, kdefun
Reference:
B. W. Silverman (1986)
'Density estimation for statistics and data analysis'
Chapman and Hall, pp 57--61
Wand,M.P. and Jones, M.C. (1986)
'Kernel smoothing'
Chapman and Hall, pp 74--75
'''
# TODO: NB: this routine can be made faster:
# TODO: replace the iteration in the end with a Newton Raphson scheme
A = np.atleast_2d(data)
d, n= A.shape
# R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x))
mu2, R, Rdd = self.stats()
AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
STEconstant = R /(mu2**(2)*n)
sigmaA = self.hns(A)/AMISEconstant
if h0 is None:
h0 = sigmaA*AMISEconstant
h = np.asarray(h0, dtype=float)
nfft = inc*2
amin = A.min(axis=1) # Find the minimum value of A.
amax = A.max(axis=1) #Find the maximum value of A.
arange = amax-amin # Find the range of A.
#% xa holds the x 'axis' vector, defining a grid of x values where
#% the k.d. function will be evaluated.
ax1 = amin-arange/8.0
bx1 = amax+arange/8.0
kernel2 = Kernel('gaus')
mu2,R,Rdd = kernel2.stats()
STEconstant2 = R /(mu2**(2)*n)
fft = np.fft.fft
ifft = np.fft.ifft
for dim in range(d):
s = sigmaA[dim]
ax = ax1[dim]
bx = bx1[dim]
xa = np.linspace(ax,bx,inc)
xn = np.linspace(0,bx-ax,inc)
c = gridcount(A[dim],xa)
# Step 1
psi6NS = -15/(16*sqrt(pi)*s**7)
psi8NS = 105/(32*sqrt(pi)*s**9)
# Step 2
k40, k60 = kernel2.deriv4_6_8_10(0, numout=2)
g1 = (-2*k40/(mu2*psi6NS*n))**(1.0/7)
g2 = (-2*k60/(mu2*psi8NS*n))**(1.0/9)
# Estimate psi6 given g2.
kw4, kw6 = kernel2.deriv4_6_8_10(xn/g2, numout=2) # kernel weights.
kw = np.r_[kw6,0,kw6[-1:0:-1]] # Apply fftshift to kw.
z = np.real(ifft(fft(c,nfft)*fft(kw))) # convolution.
psi6 = np.sum(c*z[:inc])/(n*(n-1)*g2**7)
# Estimate psi4 given g1.
kw4 = kernel2.deriv4_6_8_10(xn/g1, numout=1) # kernel weights.
kw = np.r_[kw4,0,kw4[-1:0:-1]] #Apply 'fftshift' to kw.
z = np.real(ifft(fft(c,nfft)*fft(kw))) # convolution.
psi4 = np.sum(c*z[:inc])/(n*(n-1)*g1**5)
h1 = h[dim]
h_old = 0
count = 0
while ((abs(h_old-h1)>max(releps*h1,abseps)) and (count < maxit)):
count += 1
h_old = h1
# Step 3
gamma=((2*k40*mu2*psi4*h1**5)/(-psi6*R))**(1.0/7)
# Now estimate psi4 given gamma.
kw4 = kernel2.deriv4_6_8_10(xn/gamma, numout=1) #kernel weights.
kw = np.r_[kw4,0,kw4[-1:0:-1]] # Apply 'fftshift' to kw.
z = np.real(ifft(fft(c,nfft)*fft(kw))) # convolution.
psi4Gamma = np.sum(c*z[:inc])/(n*(n-1)*gamma**5)
# Step 4
h1 = (STEconstant2/psi4Gamma)**(1.0/5)
# Kernel other than Gaussian scale bandwidth
h1 = h1*(STEconstant/STEconstant2)**(1.0/5)
if count>= maxit:
warnings.warn('The obtained value did not converge.')
h[dim] = h1
#end % for dim loop
return h
def norm_factor(self, d=1, n=None): def norm_factor(self, d=1, n=None):
return self.kernel.norm_factor(d, n) return self.kernel.norm_factor(d, n)
def evaluate(self, X): def evaluate(self, X):
@ -891,7 +1068,7 @@ def bitget(int_type, offset):
def gridcount(data, X): def gridcount(data, X):
''' '''
GRIDCOUNT D-dimensional histogram using linear binning. Returns D-dimensional histogram using linear binning.
Parameters Parameters
---------- ----------

Loading…
Cancel
Save