Updated test examples in kdetools.py

+ cosmetic fixes to the rest
master
per.andreas.brodtkorb 14 years ago
parent a61bd48a71
commit 343f29a615

@ -3,7 +3,7 @@
<pydev_project> <pydev_project>
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH"> <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
<path>/google_pywafo/src</path> <path>/pywafo/src</path>
</pydev_pathproperty> </pydev_pathproperty>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property> <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property> <pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>

@ -85,6 +85,7 @@ def _set_seed(iseed):
# See also # See also
# -------- # --------
# chol, svd, sqrtm, genchol # chol, svd, sqrtm, genchol
# np.random.multivariate_normal
# ''' # '''
# sa = np.atleast_2d(cov) # sa = np.atleast_2d(cov)
# mu = np.atleast_1d(mean).ravel() # mu = np.atleast_1d(mean).ravel()
@ -537,7 +538,7 @@ class CovData1D(WafoData):
reconstructed data" reconstructed data"
in Proceedings of 9th ISOPE Conference, Vol III, pp 66-73 in Proceedings of 9th ISOPE Conference, Vol III, pp 66-73
""" """
# TODO does not work yet. # TODO: does not work yet.
# secret methods: # secret methods:
# 'dec1-3': different decomposing algorithm's # 'dec1-3': different decomposing algorithm's
@ -716,7 +717,7 @@ class CovData1D(WafoData):
Nsig = 2 * n; Nsig = 2 * n;
Sigma = sptoeplitz(hstack((ACF, zeros(Nsig - n)))) Sigma = sptoeplitz(hstack((acf, zeros(Nsig - n))))
n2 = floor(Nsig / 4) n2 = floor(Nsig / 4)
idx = r_[0:Nsig] + max(0, inds[0] - n2) # indices to the points used idx = r_[0:Nsig] + max(0, inds[0] - n2) # indices to the points used
tmpinds = zeros(N, dtype=bool) tmpinds = zeros(N, dtype=bool)

@ -11,7 +11,7 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import division from __future__ import division
from itertools import product from itertools import product
#from misc import tranproc, trangood from misc import tranproc #, trangood
from numpy import pi, sqrt, atleast_2d, exp, newaxis, array #@UnresolvedImport from numpy import pi, sqrt, atleast_2d, exp, newaxis, array #@UnresolvedImport
from scipy import interpolate, linalg from scipy import interpolate, linalg
from scipy.special import gamma from scipy.special import gamma
@ -119,11 +119,11 @@ class _KDE(object):
def _set_xlimits(self): def _set_xlimits(self):
amin = self.dataset.min(axis= -1) amin = self.dataset.min(axis= -1)
amax = self.dataset.max(axis= -1) amax = self.dataset.max(axis= -1)
iqr = iqrange(self.dataset, axis=-1) iqr = iqrange(self.dataset, axis= -1)
sigma = np.minimum(np.std(self.dataset, axis=-1, ddof=1),iqr/1.34) sigma = np.minimum(np.std(self.dataset, axis= -1, ddof=1), iqr / 1.34)
#xyzrange = amax - amin #xyzrange = amax - amin
#offset = xyzrange / 4.0 #offset = xyzrange / 4.0
offset = 2*sigma offset = 2 * sigma
if self.xmin is None: if self.xmin is None:
self.xmin = amin - offset self.xmin = amin - offset
else: else:
@ -181,15 +181,19 @@ class _KDE(object):
args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc)) args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc))
self.args = args self.args = args
f = eval_grd(*args) f = eval_grd(*args)
if kwds.get('output', 'value')=='value': if kwds.get('output', 'value') == 'value':
return f return f
else: else:
titlestr = 'Kernel density estimate (%s)' % self.kernel.name titlestr = 'Kernel density estimate (%s)' % self.kernel.name
kwds2 = dict(title=titlestr) kwds2 = dict(title=titlestr)
kwds2.update(**kwds) kwds2.update(**kwds)
if self.d==1: if self.d == 1:
args = args[0] args = args[0]
return WafoData(f,args, **kwds2) elif self.d > 1:
PL = np.r_[10:90:20, 95, 99, 99.9]
ql = qlevels(f,p=PL)
kwds2.setdefault('levels', ql)
return WafoData(f, args, **kwds2)
def _check_shape(self, points): def _check_shape(self, points):
points = atleast_2d(points) points = atleast_2d(points)
@ -387,16 +391,16 @@ class TKDE(_KDE):
""" """
f = self._eval_grid_fast(*args) f = self._eval_grid_fast(*args)
if kwds.get('output', 'value')=='value': if kwds.get('output', 'value') == 'value':
return f return f
else: else:
args = self.args args = self.args
titlestr = 'Kernel density estimate (%s)' % self.kernel.name titlestr = 'Kernel density estimate (%s)' % self.kernel.name
kwds2 = dict(title=titlestr) kwds2 = dict(title=titlestr)
kwds2.update(**kwds) kwds2.update(**kwds)
if self.d==1: if self.d == 1:
args = args[0] args = args[0]
return WafoData(f,args, **kwds2) return WafoData(f, args, **kwds2)
def _eval_grid_fast(self, *args): def _eval_grid_fast(self, *args):
if self.L2 is None: if self.L2 is None:
@ -541,6 +545,11 @@ class KDE(_KDE):
>>> np.interp(x, kde0.args[0], f) >>> np.interp(x, kde0.args[0], f)
array([ 0.21227584, 0.41256459, 0.5495661 , 0.5176579 , 0.38431616, array([ 0.21227584, 0.41256459, 0.5495661 , 0.5176579 , 0.38431616,
0.2591162 , 0.15978948, 0.07889179, 0.02769818, 0.00791829]) 0.2591162 , 0.15978948, 0.07889179, 0.02769818, 0.00791829])
>>> f1 = kde0.eval_grid_fast(output='plot')
>>> np.interp(x, f1.args, f1.data)
array([ 0.21227584, 0.41256459, 0.5495661 , 0.5176579 , 0.38431616,
0.2591162 , 0.15978948, 0.07889179, 0.02769818, 0.00791829])
>>> h = f1.plot()
import pylab as plb import pylab as plb
h1 = plb.plot(x, f) # 1D probability density plot h1 = plb.plot(x, f) # 1D probability density plot
@ -787,14 +796,13 @@ class _KernelGaussian(_Kernel):
return p4val return p4val
out = [p4val] out = [p4val]
pn = p4 pn = p4
for ix in range(numout - 1): for unusedix in range(numout - 1):
pnp1 = np.polyadd(-np.r_[pn, 0], np.polyder(pn)) pnp1 = np.polyadd(-np.r_[pn, 0], np.polyder(pn))
pnp2 = np.polyadd(-np.r_[pnp1, 0], np.polyder(pnp1)) pnp2 = np.polyadd(-np.r_[pnp1, 0], np.polyder(pnp1))
out.append(np.polyval(pnp2, t) * phi0) out.append(np.polyval(pnp2, t) * phi0)
pn = pnp2 pn = pnp2
return out return out
mkernel_gaussian = _KernelGaussian(r=4.0, stats=_stats_gaus) mkernel_gaussian = _KernelGaussian(r=4.0, stats=_stats_gaus)
#def mkernel_gaussian(X): #def mkernel_gaussian(X):
@ -861,8 +869,18 @@ class Kernel(object):
... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487, ... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487,
... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452]) ... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452])
>>> Kernel('gaussian').stats() >>> gauss = Kernel('gaussian')
>>> gauss.stats()
(1, 0.28209479177387814, 0.21157109383040862) (1, 0.28209479177387814, 0.21157109383040862)
>>> gauss.hscv(data)
array([ 0.21555043])
>>> gauss.hstt(data)
array([ 0.15165387])
>>> gauss.hste(data)
array([ 0.18942238])
>>> gauss.hldpi(data)
array([ 0.1718688])
>>> Kernel('laplace').stats() >>> Kernel('laplace').stats()
(2, 0.25, inf) (2, 0.25, inf)
@ -878,6 +896,8 @@ class Kernel(object):
array([ 0.88265652]) array([ 0.88265652])
>>> triweight.hste(data) >>> triweight.hste(data)
array([ 0.56570278]) array([ 0.56570278])
>>> triweight.hscv(data)
array([ 0.64193201])
See also See also
-------- --------
@ -978,14 +998,14 @@ class Kernel(object):
n = A.shape[1] n = A.shape[1]
# R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x))
mu2, R, Rdd = self.stats() mu2, R, unusedRdd = self.stats()
AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5) AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
iqr = iqrange(A, axis=1) # interquartile range iqr = iqrange(A, axis=1) # interquartile range
stdA = np.std(A, axis=1, ddof=1) stdA = np.std(A, axis=1, ddof=1)
# % use of interquartile range guards against outliers. # use of interquartile range guards against outliers.
# % the use of interquartile range is better if # the use of interquartile range is better if
# % the distribution is skew or have heavy tails # the distribution is skew or have heavy tails
# % This lessen the chance of oversmoothing. # This lessen the chance of oversmoothing.
return np.where(iqr > 0, np.minimum(stdA, iqr / 1.349), stdA) * AMISEconstant return np.where(iqr > 0, np.minimum(stdA, iqr / 1.349), stdA) * AMISEconstant
def hos(self, data): def hos(self, data):
@ -1128,7 +1148,7 @@ class Kernel(object):
d, n = A.shape d, n = A.shape
# R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x))
mu2, R, Rdd = self.stats() mu2, R, unusedRdd = self.stats()
AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5) AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
STEconstant = R / (mu2 ** (2) * n) STEconstant = R / (mu2 ** (2) * n)
@ -1151,7 +1171,7 @@ class Kernel(object):
bx1 = amax + arange / 8.0 bx1 = amax + arange / 8.0
kernel2 = Kernel('gauss') kernel2 = Kernel('gauss')
mu2, R, Rdd = kernel2.stats() mu2, R, unusedRdd = kernel2.stats()
STEconstant2 = R / (mu2 ** (2) * n) STEconstant2 = R / (mu2 ** (2) * n)
fft = np.fft.fft fft = np.fft.fft
ifft = np.fft.ifft ifft = np.fft.ifft
@ -1166,7 +1186,6 @@ class Kernel(object):
c = gridcount(A[dim], xa) c = gridcount(A[dim], xa)
# Step 1 # Step 1
psi6NS = -15 / (16 * sqrt(pi) * s ** 7) psi6NS = -15 / (16 * sqrt(pi) * s ** 7)
psi8NS = 105 / (32 * sqrt(pi) * s ** 9) psi8NS = 105 / (32 * sqrt(pi) * s ** 9)
@ -1221,6 +1240,342 @@ class Kernel(object):
h[dim] = h1 h[dim] = h1
#end % for dim loop #end % for dim loop
return h return h
def hstt(self, data, h0=None, inc=128, maxit=100, releps=0.01, abseps=0.0):
'''HSTT Scott-Tapia-Thompson estimate of smoothing parameter.
CALL: hs = hstt(data,kernel)
hs = one dimensional value for smoothing parameter
given the data and kernel. size 1 x D
data = data matrix, size N x D (D = # dimensions )
kernel = 'epanechnikov' - Epanechnikov kernel. (default)
'biweight' - Bi-weight kernel.
'triweight' - Tri-weight kernel.
'triangular' - Triangular kernel.
'gaussian' - Gaussian kernel
'rectangular' - Rectangular kernel.
'laplace' - Laplace kernel.
'logistic' - Logistic kernel.
HSTT returns Scott-Tapia-Thompson (STT) estimate of smoothing
parameter. This is a Solve-The-Equation rule (STE).
Simulation studies shows that the STT estimate of HS
is a good choice under a variety of models. A comparison with
likelihood cross-validation (LCV) indicates that LCV performs slightly
better for short tailed densities.
However, STT method in contrast to LCV is insensitive to outliers.
Example:
x = rndnorm(0,1,50,1);
hs = hstt(x,'gauss');
See also hste, hbcv, hboot, hos, hldpi, hlscv, hscv, kde, kdebin
Reference:
B. W. Silverman (1986)
'Density estimation for statistics and data analysis'
Chapman and Hall, pp 57--61
'''
A = np.atleast_2d(data)
d, n = A.shape
# R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x))
mu2, R, unusedRdd = self.stats()
AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
STEconstant = R / (mu2 ** (2) * n)
sigmaA = self.hns(A) / AMISEconstant
if h0 is None:
h0 = sigmaA * AMISEconstant
h = np.asarray(h0, dtype=float)
nfft = inc * 2
amin = A.min(axis=1) # Find the minimum value of A.
amax = A.max(axis=1) #Find the maximum value of A.
arange = amax - amin # Find the range of A.
#% xa holds the x 'axis' vector, defining a grid of x values where
#% the k.d. function will be evaluated.
ax1 = amin - arange / 8.0
bx1 = amax + arange / 8.0
fft = np.fft.fft
ifft = np.fft.ifft
for dim in range(d):
s = sigmaA[dim]
datan = A[dim] / s
ax = ax1[dim] / s
bx = bx1[dim] / s
xa = np.linspace(ax, bx, inc)
xn = np.linspace(0, bx - ax, inc)
c = gridcount(datan, xa)
count = 1
h_old = 0
h1 = h[dim] / s
delta = (bx - ax) / (inc - 1)
while ((abs(h_old - h1) > max(releps * h1, abseps)) and (count < maxit)):
count += 1
h_old = h1
kw4 = self.kernel(xn / h1) / (n * h1 * self.norm_factor(d=1))
kw = np.r_[kw4, 0, kw4[-1:0:-1]] # Apply 'fftshift' to kw.
f = np.real(ifft(fft(c, nfft) * fft(kw))) # convolution.
# Estimate psi4=R(f'') using simple finite differences and quadrature.
ix = np.arange(1, inc - 1)
z = ((f[ix + 1] - 2 * f[ix] + f[ix - 1]) / delta ** 2) ** 2
psi4 = delta * z.sum()
h1 = (STEconstant / psi4) ** (1 / 5);
if count >= maxit:
warnings.warn('The obtained value did not converge.')
h[dim] = h1 * s
#end % for dim loop
return h
def hscv(self, data, hvec=None, inc=128, maxit=100):
'''
HSCV Smoothed cross-validation estimate of smoothing parameter.
CALL: [hs,hvec,score] = hscv(data,kernel,hvec);
hs = smoothing parameter
hvec = vector defining possible values of hs
(default linspace(0.25*h0,h0,100), h0=0.62)
score = score vector
data = data vector
kernel = 'gaussian' - Gaussian kernel the only supported
Note that only the first 4 letters of the kernel name is needed.
Example:
data = rndnorm(0,1,20,1)
[hs hvec score] = hscv(data,'epan');
plot(hvec,score)
See also hste, hbcv, hboot, hos, hldpi, hlscv, hstt, kde, kdefun
Wand,M.P. and Jones, M.C. (1986)
'Kernel smoothing'
Chapman and Hall, pp 75--79
'''
# TODO: Add support for other kernels than Gaussian
A = np.atleast_2d(data)
d, n = A.shape
# R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x))
mu2, R, unusedRdd = self.stats()
AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
STEconstant = R / (mu2 ** (2) * n)
sigmaA = self.hns(A) / AMISEconstant
if hvec is None:
H = AMISEconstant / 0.93
hvec = np.linspace(0.25 * H, H, maxit)
hvec = np.asarray(hvec, dtype=float)
steps = len(hvec)
score = np.zeros(steps)
nfft = inc * 2
amin = A.min(axis=1) # Find the minimum value of A.
amax = A.max(axis=1) #Find the maximum value of A.
arange = amax - amin # Find the range of A.
#% xa holds the x 'axis' vector, defining a grid of x values where
#% the k.d. function will be evaluated.
ax1 = amin - arange / 8.0
bx1 = amax + arange / 8.0
kernel2 = Kernel('gauss')
mu2, R, unusedRdd = kernel2.stats()
STEconstant2 = R / (mu2 ** (2) * n)
fft = np.fft.fft
ifft = np.fft.ifft
h = np.zeros(d)
hvec = hvec * (STEconstant2 / STEconstant) ** (1. / 5.)
k40, k60, k80, k100 = kernel2.deriv4_6_8_10(0, numout=4)
psi8 = 105 / (32 * sqrt(pi));
psi12 = 3465. / (512 * sqrt(pi))
g1 = (-2. * k60 / (mu2 * psi8 * n)) ** (1. / 9.)
g2 = (-2. * k100 / (mu2 * psi12 * n)) ** (1. / 13.)
for dim in range(d):
s = sigmaA[dim]
ax = ax1[dim] / s
bx = bx1[dim] / s
datan = A[dim] / s
xa = np.linspace(ax, bx, inc)
xn = np.linspace(0, bx - ax, inc)
c = gridcount(datan, xa)
kw4, kw6 = kernel2.deriv4_6_8_10(xn / g1, numout=2)
kw = np.r_[kw6, 0, kw6[-1:0:-1]]
z = np.real(ifft(fft(c, nfft) * fft(kw)))
psi6 = np.sum(c * z[:inc]) / (n ** 2 * g1 ** 7)
kw4, kw6, kw8, kw10 = kernel2.deriv4_6_8_10(xn / g2, numout=4)
kw = np.r_[kw10, 0, kw10[-1:0:-1]]
z = np.real(ifft(fft(c, nfft) * fft(kw)))
psi10 = np.sum(c * z[:inc]) / (n ** 2 * g2 ** 11)
g3 = (-2. * k40 / (mu2 * psi6 * n)) ** (1. / 7.)
g4 = (-2. * k80 / (mu2 * psi10 * n)) ** (1. / 11.)
kw4 = kernel2.deriv4_6_8_10(xn / g3, numout=1)
kw = np.r_[kw4, 0, kw4[-1:0:-1]]
z = np.real(ifft(fft(c, nfft) * fft(kw)))
psi4 = np.sum(c * z[:inc]) / (n ** 2 * g3 ** 5)
kw4, kw6, kw8 = kernel2.deriv4_6_8_10(xn / g3, numout=3)
kw = np.r_[kw8, 0, kw8[-1:0:-1]]
z = np.real(ifft(fft(c, nfft) * fft(kw)))
psi8 = np.sum(c * z[:inc]) / (n ** 2 * g4 ** 9)
const = (441. / (64 * pi)) ** (1. / 18.) * (4 * pi) ** (-1. / 5.) * psi4 ** (-2. / 5.) * psi8 ** (-1. / 9.)
M = np.atleast_2d(datan)
Y = (M - M.T).ravel()
for i in range(steps):
g = const * n ** (-23. / 45) * hvec[i] ** (-2)
sig1 = sqrt(2 * hvec[i] ** 2 + 2 * g ** 2)
sig2 = sqrt(hvec[i] ** 2 + 2 * g ** 2)
sig3 = sqrt(2 * g ** 2)
term2 = np.sum(kernel2(Y / sig1) / sig1 - 2 * kernel2(Y / sig2) / sig2 + kernel2(Y / sig3) / sig3)
score[i] = 1. / (n * hvec[i] * 2. * sqrt(pi)) + term2 / n ** 2
idx = score.argmin()
# Kernel other than Gaussian scale bandwidth
h[dim] = hvec[idx] * (STEconstant / STEconstant2) ** (1 / 5)
if idx == 0:
warnings.warn('Optimum is probably lower than hs=%g for dim=%d' % (h[dim] * s, dim))
elif idx == maxit - 1:
warnings.warn('Optimum is probably higher than hs=%g for dim=%d' % (h[dim] * s, dim))
hvec = hvec * (STEconstant / STEconstant2) ** (1 / 5)
return h * sigmaA
def hldpi(self, data, L=2, inc=128):
'''HLDPI L-stage Direct Plug-In estimate of smoothing parameter.
CALL: hs = hldpi(data,kernel,L)
hs = one dimensional value for smoothing parameter
given the data and kernel. size 1 x D
data = data matrix, size N x D (D = # dimensions )
kernel = 'epanechnikov' - Epanechnikov kernel.
'biweight' - Bi-weight kernel.
'triweight' - Tri-weight kernel.
'triangluar' - Triangular kernel.
'gaussian' - Gaussian kernel
'rectangular' - Rectanguler kernel.
'laplace' - Laplace kernel.
'logistic' - Logistic kernel.
L = 0,1,2,3,... (default 2)
Note that only the first 4 letters of the kernel name is needed.
Example:
x = rndnorm(0,1,50,1);
hs = hldpi(x,'gauss',1);
See also hste, hbcv, hboot, hos, hlscv, hscv, hstt, kde, kdefun
Wand,M.P. and Jones, M.C. (1995)
'Kernel smoothing'
Chapman and Hall, pp 67--74
'''
A = np.atleast_2d(data)
d, n = A.shape
# R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x))
mu2, R, unusedRdd = self.stats()
AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
STEconstant = R / (mu2 ** (2) * n)
sigmaA = self.hns(A) / AMISEconstant
nfft = inc * 2
amin = A.min(axis=1) # Find the minimum value of A.
amax = A.max(axis=1) #Find the maximum value of A.
arange = amax - amin # Find the range of A.
#% xa holds the x 'axis' vector, defining a grid of x values where
#% the k.d. function will be evaluated.
ax1 = amin - arange / 8.0
bx1 = amax + arange / 8.0
kernel2 = Kernel('gauss')
mu2, unusedR, unusedRdd = kernel2.stats()
fft = np.fft.fft
ifft = np.fft.ifft
h = np.zeros(d)
for dim in range(d):
s = sigmaA[dim]
datan = A[dim] / s
ax = ax1[dim] / s
bx = bx1[dim] / s
xa = np.linspace(ax, bx, inc)
xn = np.linspace(0, bx - ax, inc)
c = gridcount(datan, xa)
r = 2 * L + 4
rd2 = L + 2
# Eq. 3.7 in Wand and Jones (1995)
PSI_r = (-1) ** (rd2) * np.prod(np.r_[rd2 + 1:r]) / (sqrt(pi) * (2 * s) ** (r + 1));
PSI = PSI_r
if L > 0:
# High order derivatives of the Gaussian kernel
Kd = kernel2.deriv4_6_8_10(0, numout=L)
# L-stage iterations to estimate PSI_4
for ix in range(L - 1, -1, -1):
gi = (-2 * Kd[ix] / (mu2 * PSI * n)) ** (1. / (2 * ix + 5))
# Obtain the kernel weights.
KW0 = kernel2.deriv4_6_8_10(xn / gi, numout=ix + 1)
if ix > 0:
KW0 = KW0[-1]
kw = np.r_[KW0, 0, KW0[inc - 1:0:-1]] # Apply 'fftshift' to kw.
# Perform the convolution.
z = np.real(ifft(fft(c, nfft) * fft(kw)))
PSI = np.sum(c * z[:inc]) / (n ** 2 * gi ** (2 * ix + 3))
#end
#end
h[dim] = s * (STEconstant / PSI) ** (1. / 5)
return h
def norm_factor(self, d=1, n=None): def norm_factor(self, d=1, n=None):
return self.kernel.norm_factor(d, n) return self.kernel.norm_factor(d, n)
@ -1379,6 +1734,121 @@ def accum(accmap, a, func=None, size=None, fill_value=0, dtype=None):
return out return out
def qlevels(pdf, p=(10, 30, 50, 70, 90, 95, 99, 99.9), x1=None, x2=None):
'''QLEVELS Calculates quantile levels which encloses P% of PDF
CALL: [ql PL] = qlevels(pdf,PL,x1,x2);
ql = the discrete quantile levels.
pdf = joint point density function matrix or vector
PL = percent level (default [10:20:90 95 99 99.9])
x1,x2 = vectors of the spacing of the variables
(Default unit spacing)
QLEVELS numerically integrates PDF by decreasing height and find the
quantile levels which encloses P% of the distribution. If X1 and
(or) X2 is unspecified it is assumed that dX1 and dX2 is constant.
NB! QLEVELS normalizes the integral of PDF to N/(N+0.001) before
calculating QL in order to reflect the sampling of PDF is finite.
Currently only able to handle 1D and 2D PDF's if dXi is not constant (i=1,2).
Example
-------
>>> import wafo.stats as ws
>>> x = np.linspace(-8,8,2001);
>>> PL = np.r_[10:90:20, 90, 95, 99, 99.9]
>>> qlevels(ws.norm.pdf(x),p=PL, x1=x);
array([ 0.39591707, 0.37058719, 0.31830968, 0.23402133, 0.10362052,
0.05862129, 0.01449505, 0.00178806])
# compared with the exact values
>>> ws.norm.pdf(ws.norm.ppf((100-PL)/200))
array([ 0.39580488, 0.370399 , 0.31777657, 0.23315878, 0.10313564,
0.05844507, 0.01445974, 0.00177719])
See also
--------
qlevels2, tranproc
'''
norm = 1 # normalize cdf to unity
pdf = np.atleast_1d(pdf)
if any(pdf.ravel() < 0):
raise ValueError('This is not a pdf since one or more values of pdf is negative')
fsiz = pdf.shape
fsizmin = min(fsiz)
if fsizmin == 0:
return []
N = np.prod(fsiz);
d = len(fsiz)
if x1 is None or ((x2 is None) and d > 2):
fdfi = pdf.ravel()
else:
if d == 1: # pdf in one dimension
dx22 = np.ones(1)
else: # % pdf in two dimensions
dx2 = np.diff(x2.ravel())*0.5;
dx22 = np.r_[0, dx2] + np.r_[dx2, 0];
dx1 = np.diff(x1.ravel())*0.5
dx11 = np.r_[0 , dx1] + np.r_[dx1, 0]
dx1x2 = dx22[:, None] * dx11
fdfi = (pdf * dx1x2).ravel();
p = np.atleast_1d(p)
if np.any((p < 0) | (100 < p)):
raise ValueError('PL must satisfy 0 <= PL <= 100')
p2 = p / 100.0
ind = np.argsort(pdf.ravel()) # sort by height of pdf
ind = ind[::-1]
fi = pdf[ind]
Fi = np.cumsum(fdfi[ind]) # integration in the order of decreasing height of pdf
if norm: # %normalize Fi to make sure int pdf dx1 dx2 approx 1
Fi = Fi / Fi[-1] * N / (N + 1.5e-8)
maxFi = np.max(Fi)
if maxFi > 1:
warnings.warn('this is not a pdf since cdf>1! normalizing')
Fi = Fi / Fi[-1] * N / (N + 1.5e-8)
elif maxFi < .95:
msg = '''The given pdf is too sparsely sampled since cdf<.95.
Thus QL is questionable'''
warnings.warn(msg)
ind, = np.where(np.diff(np.r_[Fi, 1]) > 0) # make sure Fi is strictly increasing by not considering duplicate values
ui = tranproc(Fi[ind], fi[ind], p2) # calculating the inverse of Fi to find the index
# to the desired quantile level
#ui=smooth(Fi(ind),fi(ind),1,p2(:),1) % alternative
#res=ui-ui2
if np.any(ui >= max(pdf.ravel())):
warnings.warn('The lowest percent level is too close to 0%')
if np.any(ui <= min(pdf.ravel())):
msg = '''The given pdf is too sparsely sampled or
the highest percent level is too close to 100%'''
warnings.warn(msg)
ui[ui < 0] = 0.0
return ui
def iqrange(data, axis=None): def iqrange(data, axis=None):
''' '''
Returns the Inter Quartile Range of data Returns the Inter Quartile Range of data
@ -1410,7 +1880,7 @@ def iqrange(data, axis=None):
-------- --------
np.std np.std
''' '''
return np.abs(np.percentile(data, 75, axis=axis)-np.percentile(data, 25, axis=axis)) return np.abs(np.percentile(data, 75, axis=axis) - np.percentile(data, 25, axis=axis))
def bitget(int_type, offset): def bitget(int_type, offset):
''' '''
@ -1484,7 +1954,7 @@ def gridcount(data, X):
''' '''
dat = np.atleast_2d(data) dat = np.atleast_2d(data)
x = np.atleast_2d(X) x = np.atleast_2d(X)
d, n = dat.shape d = dat.shape[0]
d1, inc = x.shape d1, inc = x.shape
if d != d1: if d != d1:
@ -1564,28 +2034,28 @@ def kde_demo1():
''' '''
import scipy.stats as st import scipy.stats as st
x = np.linspace(-4,4) x = np.linspace(-4, 4)
x0 = x/2.0 x0 = x / 2.0
data = np.random.normal(loc=0, scale=1.0, size=7) #rndnorm(0,1,7,1); data = np.random.normal(loc=0, scale=1.0, size=7) #rndnorm(0,1,7,1);
kernel = Kernel('gaus') kernel = Kernel('gaus')
hs = kernel.hns(data) hs = kernel.hns(data)
hVec = [hs/2, hs, 2*hs] hVec = [hs / 2, hs, 2 * hs]
for ix, h in enumerate(hVec): for ix, h in enumerate(hVec):
pylab.figure(ix) pylab.figure(ix)
kde = KDE(data,hs=h, kernel=kernel) kde = KDE(data, hs=h, kernel=kernel)
f2 = kde(x, output='plot', title='h_s = %2.2f' % h, ylab='Density') f2 = kde(x, output='plot', title='h_s = %2.2f' % h, ylab='Density')
f2.plot('k-') f2.plot('k-')
pylab.plot(x, st.norm.pdf(x,0,1),'k:') pylab.plot(x, st.norm.pdf(x, 0, 1), 'k:')
n = len(data) n = len(data)
pylab.plot(data,np.zeros(data.shape),'bx') pylab.plot(data, np.zeros(data.shape), 'bx')
y = kernel(x0)/(n*h*kernel.norm_factor(d=1, n=n)) y = kernel(x0) / (n * h * kernel.norm_factor(d=1, n=n))
for i in range(n): for i in range(n):
pylab.plot(data[i]+x0*h, y,'b--') pylab.plot(data[i] + x0 * h, y, 'b--')
pylab.plot([data[i], data[i]], [0, np.max(y)],'b') pylab.plot([data[i], data[i]], [0, np.max(y)], 'b')
pylab.axis([x.min(),x.max(), 0, 0.5]) pylab.axis([x.min(), x.max(), 0, 0.5])
def kde_demo2(): def kde_demo2():
'''Demonstrate the difference between transformation- and ordinary-KDE '''Demonstrate the difference between transformation- and ordinary-KDE
@ -1596,7 +2066,7 @@ def kde_demo2():
import scipy.stats as st import scipy.stats as st
data = st.rayleigh.rvs(scale=1, size=300) data = st.rayleigh.rvs(scale=1, size=300)
x = np.linspace(1.5e-3,5, 55); x = np.linspace(1.5e-3, 5, 55);
kde = KDE(data) kde = KDE(data)
f = kde(output='plot', title='Ordinary KDE') f = kde(output='plot', title='Ordinary KDE')
@ -1604,7 +2074,7 @@ def kde_demo2():
f.plot() f.plot()
pylab.plot(x,st.rayleigh.pdf(x,scale=1),':') pylab.plot(x, st.rayleigh.pdf(x, scale=1), ':')
#plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable #plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable
@ -1613,7 +2083,7 @@ def kde_demo2():
pylab.figure(1) pylab.figure(1)
ft.plot() ft.plot()
pylab.plot(x,st.rayleigh.pdf(x,scale=1),':') pylab.plot(x, st.rayleigh.pdf(x, scale=1), ':')
pylab.figure(0) pylab.figure(0)

@ -1269,7 +1269,7 @@ def betaloge(z, w):
Example Example
>>> betaloge(3,2) >>> betaloge(3,2)
array([-2.48490665])
See also See also
-------- --------

@ -125,8 +125,8 @@ def test_prbnormnd():
0 0
>>> np.abs(val-Et)< err0+terr0 >>> np.abs(val-Et)< err0+terr0
array([ True], dtype=bool) array([ True], dtype=bool)
>>> 'val = %2.6f' % val >>> 'val = %2.5f' % val
'val = 0.001945' 'val = 0.00195'
''' '''
def test_cdfnorm2d(): def test_cdfnorm2d():
''' '''

@ -69,7 +69,9 @@ class WafoData(object):
self.plotter = None self.plotter = None
self.children = None self.children = None
self.plot_args_children = kwds.get('plot_args_children',[]) self.plot_args_children = kwds.get('plot_args_children',[])
self.plot_kwds_children = kwds.get('plot_kwds_children',{})
self.plot_args = kwds.get('plot_args',[]) self.plot_args = kwds.get('plot_args',[])
self.plot_kwds = kwds.get('plot_kwds',{})
self.labels = AxisLabels(**kwds) self.labels = AxisLabels(**kwds)
self.setplotter() self.setplotter()
@ -80,6 +82,9 @@ class WafoData(object):
plotbackend.hold('on') plotbackend.hold('on')
tmp = [] tmp = []
child_args = args + tuple(self.plot_args_children) child_args = args + tuple(self.plot_args_children)
child_kwds = dict()
child_kwds.update(self.plot_kwds_children)
child_kwds.update(**kwds)
for child in self.children: for child in self.children:
tmp1 = child.plot(*child_args, **kwds) tmp1 = child.plot(*child_args, **kwds)
if tmp1 != None: if tmp1 != None:
@ -87,7 +92,10 @@ class WafoData(object):
if len(tmp) == 0: if len(tmp) == 0:
tmp = None tmp = None
main_args = args + tuple(self.plot_args) main_args = args + tuple(self.plot_args)
tmp2 = self.plotter.plot(self, *main_args, **kwds) main_kwds = dict()
main_kwds.update(self.plot_kwds)
main_kwds.update(kwds)
tmp2 = self.plotter.plot(self, *main_args, **main_kwds)
return tmp2, tmp return tmp2, tmp
def show(self): def show(self):
@ -107,11 +115,12 @@ class WafoData(object):
if isinstance(self.args, (list, tuple)): # Multidimensional data if isinstance(self.args, (list, tuple)): # Multidimensional data
ndim = len(self.args) ndim = len(self.args)
if ndim < 2: if ndim < 2:
warnings.warn('Unable to determine plotter-type, because len(self.args)<2.') msg = '''Unable to determine plotter-type, because len(self.args)<2.
print('If the data is 1D, then self.args should be a vector!') If the data is 1D, then self.args should be a vector!
print('If the data is 2D, then length(self.args) should be 2.') If the data is 2D, then length(self.args) should be 2.
print('If the data is 3D, then length(self.args) should be 3.') If the data is 3D, then length(self.args) should be 3.
print('Unless you fix this, the plot methods will not work!') Unless you fix this, the plot methods will not work!'''
warnings.warn(msg)
elif ndim == 2: elif ndim == 2:
self.plotter = Plotter_2d(plotmethod) self.plotter = Plotter_2d(plotmethod)
else: else:

Loading…
Cancel
Save