Small updates

master
per.andreas.brodtkorb 14 years ago
parent 111d6ce808
commit 39023b0ef8

@ -18,6 +18,7 @@ from scipy import linalg
from scipy.special import gamma
from misc import tranproc, trangood
from itertools import product
from wafo.misc import meshgrid
_stats_epan = (1. / 5, 3. / 5, np.inf)
_stats_biwe = (1. / 7, 5. / 7, 45. / 2)
@ -301,6 +302,15 @@ class KDE(object):
self.hs = h
self._norm_factor = deth * self.n
def eval_grid(self, *args):
grd = meshgrid(*args)
shape0 = grd[0].shape
d = len(grd)
for i in range(d):
grd[i] = grd[i].ravel()
f = self.evaluate(np.vstack(grd))
return f.reshape(shape0)
def _check_shape(self, points):
points = atleast_2d(points)
d, m = points.shape
@ -359,7 +369,12 @@ class KDE(object):
__call__ = evaluate
class KDEBIN(KDE):
def __init__(self, dataset, hs=None, kernel=None, alpha=0.0, inc=128):
KDE.__init__(self, dataset, hs, kernel, alpha)
self.inc = inc
def evaluate(self, *args):
pass
class _Kernel(object):
def __init__(self, r=1.0, stats=None):
self.r = r # radius of kernel
@ -371,6 +386,8 @@ class _Kernel(object):
return self._kernel(X) / self.norm_factor(*X.shape)
def kernel(self, x):
return self._kernel(np.atleast_2d(x))
def deriv4_6_8_10(self, t, numout=4):
raise Exception('Method not implemented for this kernel!')
__call__ = kernel
class _KernelMulti(_Kernel):
@ -440,6 +457,24 @@ class _KernelGaussian(_Kernel):
return exp(-0.5 * x2.sum(axis=0))
def norm_factor(self, d=1, n=None):
return (2 * pi) ** (d / 2.0)
def deriv4_6_8_10(self, t, numout=4):
'''
Returns 4th, 6th, 8th and 10th derivatives of the kernel function.
'''
phi0 = exp(-0.5*t**2)/sqrt(2*pi)
p4 = [1, 0, -6, 0, +3]
p4val = np.polyval(p4,t)*phi0
if numout==1:
return p4val
out = [p4val]
pn = p4
for ix in range(numout-1):
pnp1 = np.polyadd(-np.r_[pn, 0], np.polyder(pn))
pnp2 = np.polyadd(-np.r_[pnp1, 0], np.polyder(pnp1))
out.append(np.polyval(pnp2, t)*phi0)
pn = pnp2
return out
mkernel_gaussian = _KernelGaussian(stats=_stats_gaus)
#def mkernel_gaussian(X):
@ -499,6 +534,13 @@ class Kernel(object):
Examples
--------
N = 20
data = np.random.rayleigh(1, size=(N,))
>>> data = array([ 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887,
... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326,
... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487,
... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452])
>>> Kernel('gaussian').stats()
(1, 0.28209479177387814, 0.21157109383040862)
>>> Kernel('laplace').stats()
@ -510,11 +552,17 @@ class Kernel(object):
>>> triweight(np.linspace(-1,1,11))
array([ 0. , 0.046656, 0.262144, 0.592704, 0.884736, 1. ,
0.884736, 0.592704, 0.262144, 0.046656, 0. ])
>>> triweight.hns(np.random.normal(size=100))
>>> triweight.hns(data)
array([ 0.82087056])
>>> triweight.hos(data)
array([ 0.88265652])
>>> triweight.hste(data)
array([ 0.56570278])
See also
--------
mkernel
References
----------
B. W. Silverman (1986)
@ -554,6 +602,8 @@ class Kernel(object):
return self.kernel.stats
#name = self.name[2:6] if self.name[:2].lower() == 'p1' else self.name[:4]
#return _KERNEL_STATS_DICT[name.lower()]
def deriv4_6_8_10(self, t, numout=4):
return self.kernel.deriv4_6_8_10(t, numout)
def hns(self, data):
'''
@ -719,6 +769,133 @@ class Kernel(object):
covA = scipy.cov(A)
return a * linalg.sqrtm(covA) * n * (-1. / (d + 4))
def hste(self, data, h0=None, inc=128, maxit=100, releps=0.01, abseps=0.0):
'''HSTE 2-Stage Solve the Equation estimate of smoothing parameter.
CALL: hs = hste(data,kernel,h0)
hs = one dimensional value for smoothing parameter
given the data and kernel. size 1 x D
data = data matrix, size N x D (D = # dimensions )
kernel = 'gaussian' - Gaussian kernel (default)
( currently the only supported kernel)
h0 = initial starting guess for hs (default h0=hns(A,kernel))
Example:
x = rndnorm(0,1,50,1);
hs = hste(x,'gauss');
See also hbcv, hboot, hos, hldpi, hlscv, hscv, hstt, kde, kdefun
Reference:
B. W. Silverman (1986)
'Density estimation for statistics and data analysis'
Chapman and Hall, pp 57--61
Wand,M.P. and Jones, M.C. (1986)
'Kernel smoothing'
Chapman and Hall, pp 74--75
'''
# TODO: NB: this routine can be made faster:
# TODO: replace the iteration in the end with a Newton Raphson scheme
A = np.atleast_2d(data)
d, n= A.shape
# R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x))
mu2, R, Rdd = self.stats()
AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
STEconstant = R /(mu2**(2)*n)
sigmaA = self.hns(A)/AMISEconstant
if h0 is None:
h0 = sigmaA*AMISEconstant
h = np.asarray(h0, dtype=float)
nfft = inc*2
amin = A.min(axis=1) # Find the minimum value of A.
amax = A.max(axis=1) #Find the maximum value of A.
arange = amax-amin # Find the range of A.
#% xa holds the x 'axis' vector, defining a grid of x values where
#% the k.d. function will be evaluated.
ax1 = amin-arange/8.0
bx1 = amax+arange/8.0
kernel2 = Kernel('gaus')
mu2,R,Rdd = kernel2.stats()
STEconstant2 = R /(mu2**(2)*n)
fft = np.fft.fft
ifft = np.fft.ifft
for dim in range(d):
s = sigmaA[dim]
ax = ax1[dim]
bx = bx1[dim]
xa = np.linspace(ax,bx,inc)
xn = np.linspace(0,bx-ax,inc)
c = gridcount(A[dim],xa)
# Step 1
psi6NS = -15/(16*sqrt(pi)*s**7)
psi8NS = 105/(32*sqrt(pi)*s**9)
# Step 2
k40, k60 = kernel2.deriv4_6_8_10(0, numout=2)
g1 = (-2*k40/(mu2*psi6NS*n))**(1.0/7)
g2 = (-2*k60/(mu2*psi8NS*n))**(1.0/9)
# Estimate psi6 given g2.
kw4, kw6 = kernel2.deriv4_6_8_10(xn/g2, numout=2) # kernel weights.
kw = np.r_[kw6,0,kw6[-1:0:-1]] # Apply fftshift to kw.
z = np.real(ifft(fft(c,nfft)*fft(kw))) # convolution.
psi6 = np.sum(c*z[:inc])/(n*(n-1)*g2**7)
# Estimate psi4 given g1.
kw4 = kernel2.deriv4_6_8_10(xn/g1, numout=1) # kernel weights.
kw = np.r_[kw4,0,kw4[-1:0:-1]] #Apply 'fftshift' to kw.
z = np.real(ifft(fft(c,nfft)*fft(kw))) # convolution.
psi4 = np.sum(c*z[:inc])/(n*(n-1)*g1**5)
h1 = h[dim]
h_old = 0
count = 0
while ((abs(h_old-h1)>max(releps*h1,abseps)) and (count < maxit)):
count += 1
h_old = h1
# Step 3
gamma=((2*k40*mu2*psi4*h1**5)/(-psi6*R))**(1.0/7)
# Now estimate psi4 given gamma.
kw4 = kernel2.deriv4_6_8_10(xn/gamma, numout=1) #kernel weights.
kw = np.r_[kw4,0,kw4[-1:0:-1]] # Apply 'fftshift' to kw.
z = np.real(ifft(fft(c,nfft)*fft(kw))) # convolution.
psi4Gamma = np.sum(c*z[:inc])/(n*(n-1)*gamma**5)
# Step 4
h1 = (STEconstant2/psi4Gamma)**(1.0/5)
# Kernel other than Gaussian scale bandwidth
h1 = h1*(STEconstant/STEconstant2)**(1.0/5)
if count>= maxit:
warnings.warn('The obtained value did not converge.')
h[dim] = h1
#end % for dim loop
return h
def norm_factor(self, d=1, n=None):
return self.kernel.norm_factor(d, n)
@ -891,7 +1068,7 @@ def bitget(int_type, offset):
def gridcount(data, X):
'''
GRIDCOUNT D-dimensional histogram using linear binning.
Returns D-dimensional histogram using linear binning.
Parameters
----------

Loading…
Cancel
Save