Fixed bugs in KDE

Added TKDE + tests
master
per.andreas.brodtkorb 14 years ago
parent 51387dcb5d
commit b5835a7549

@ -10,6 +10,7 @@
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------------
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import division from __future__ import division
import warnings
import numpy as np import numpy as np
from numpy import pi, sqrt, atleast_2d, exp, newaxis #@UnresolvedImport from numpy import pi, sqrt, atleast_2d, exp, newaxis #@UnresolvedImport
import scipy import scipy
@ -55,15 +56,163 @@ def sphere_volume(d, r=1.0):
""" """
return (r ** d) * 2. * pi ** (d / 2.) / (d * gamma(d / 2.)) return (r ** d) * 2. * pi ** (d / 2.) / (d * gamma(d / 2.))
class TKDE(object):
""" Transformation Kernel-Density Estimator.
Parameters
----------
dataset : (# of dims, # of data)-array
datapoints to estimate from
hs : array-like (optional)
smooting parameter vector/matrix.
(default compute from data using kernel.get_smoothing function)
kernel : kernel function object.
kernel must have get_smoothing method
alpha : real scalar (optional)
sensitivity parameter (default 0 regular KDE)
A good choice might be alpha = 0.5 ( or 1/D)
alpha = 0 Regular KDE (hs is constant)
0 < alpha <= 1 Adaptive KDE (Make hs change)
L2 : array-like
vector of transformation parameters (default 1 no transformation)
t(xi;L2) = xi^L2*sign(L2) for L2(i) ~= 0
t(xi;L2) = log(xi) for L2(i) == 0
If single value of L2 is given then the transformation is the same in all directions.
Members
-------
d : int
number of dimensions
n : int
number of datapoints
Methods
-------
kde.evaluate(points) : array
evaluate the estimated pdf on a provided set of points
kde(points) : array
same as kde.evaluate(points)
Example
-------
N = 20
data = np.random.rayleigh(1, size=(N,))
>>> data = array([ 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887,
... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326,
... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487,
... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452])
>>> x = np.linspace(0, max(data.ravel()) + 1, 10)
>>> kde = wk.TKDE(data, hs=0.5, alpha=0.5, L2=0.5)
>>> f = kde(x)
>>> f
array([ 0.0541248 , 0.16555235, 0.33084399, 0.45293325, 0.48345808,
0.48345808, 0.45293325, 0.33084399, 0.16555235, 0.0541248 ])
import pylab as plb
h1 = plb.plot(x, f) # 1D probability density plot
t = np.trapz(f, x)
"""
def __init__(self, dataset, hs=None, kernel=None, alpha=0.0, L2=None):
self.dataset = atleast_2d(dataset)
self.hs = hs
self.kernel = kernel
self.alpha = alpha
self.L2 = L2
self.d, self.n = self.dataset.shape
self.initialize()
def initialize(self):
tdataset = self._dat2gaus(self.dataset)
self.kde = KDE(tdataset, self.hs, self.kernel, self.alpha)
def _check_shape(self, points):
points = atleast_2d(points)
d, m = points.shape
if d != self.d:
if d == 1 and m == self.d:
# points was passed in as a row vector
points = np.reshape(points, (self.d, 1))
m = 1
else:
msg = "points have dimension %s, dataset has dimension %s" % (d,
self.d)
raise ValueError(msg)
return points
def _dat2gaus(self, points):
if self.L2 is None:
return points # default no transformation
L2 = np.atleast_1d(self.L2) * np.ones(self.d) # default no transformation
tpoints = points.copy()
for i, v2 in enumerate(L2.tolist()):
tpoints[i] = np.where(v2 == 0, np.log(points[i]), points[i] ** v2)
return tpoints
def _scale_pdf(self, pdf, points):
if self.L2 is None:
return pdf
L2 = np.atleast_1d(self.L2) * np.ones(self.d) # default no transformation
for i, v2 in enumerate(L2.tolist()):
factor = v2 * np.sign(v2) if v2 else 1
pdf *= np.where(v2 == 1, 1, points[i] ** (v2 - 1) * factor)
if (np.abs(np.diff(pdf)).max() > 10).any():
msg = ''' Numerical problems may have occured due to the power
transformation. Check the KDE for spurious spikes'''
warnings.warn(msg)
return pdf
def evaluate(self, points):
"""Evaluate the estimated pdf on a set of points.
Parameters
----------
points : (# of dimensions, # of points)-array
Alternatively, a (# of dimensions,) vector can be passed in and
treated as a single point.
Returns
-------
values : (# of points,)-array
The values at each point.
Raises
------
ValueError if the dimensionality of the input points is different than
the dimensionality of the KDE.
"""
if self.L2 is None:
return self.kde(points)
points = self._check_shape(points)
tpoints = self._dat2gaus(points)
tf = self.kde(tpoints)
f = self._scale_pdf(tf, points)
return f
__call__ = evaluate
class KDE(object): class KDE(object):
""" Representation of a kernel-density estimate using Gaussian kernels. """ Kernel-Density Estimator.
Parameters Parameters
---------- ----------
dataset : (# of dims, # of data)-array dataset : (# of dims, # of data)-array
datapoints to estimate from datapoints to estimate from
hs : array-like (optional)
smooting parameter vector/matrix.
(default compute from data using kernel.get_smoothing function)
kernel : kernel function object.
kernel must have get_smoothing method
alpha : real scalar (optional)
sensitivity parameter (default 0 regular KDE)
A good choice might be alpha = 0.5 ( or 1/D)
alpha = 0 Regular KDE (hs is constant)
0 < alpha <= 1 Adaptive KDE (Make hs change)
Members Members
------- -------
@ -78,39 +227,47 @@ class KDE(object):
evaluate the estimated pdf on a provided set of points evaluate the estimated pdf on a provided set of points
kde(points) : array kde(points) : array
same as kde.evaluate(points) same as kde.evaluate(points)
kde.integrate_gaussian(mean, cov) : float
multiply pdf with a specified Gaussian and integrate over the whole domain
kde.integrate_box_1d(low, high) : float
integrate pdf (1D only) between two bounds
kde.integrate_box(low_bounds, high_bounds) : float
integrate pdf over a rectangular space between low_bounds and high_bounds
kde.integrate_kde(other_kde) : float
integrate two kernel density estimates multiplied together
Internal Methods
----------------
kde.covariance_factor() : float
computes the coefficient that multiplies the data covariance matrix to
obtain the kernel covariance matrix. Set this method to
kde.scotts_factor or kde.silverman_factor (or subclass to provide your
own). The default is scotts_factor.
Example Example
------- -------
N = 20
data = np.random.rayleigh(1, size=(N,))
>>> data = array([ 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887,
... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326,
... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487,
... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452])
>>> x = np.linspace(0, max(data.ravel()) + 1, 10)
>>> kde = wk.KDE(data, hs=0.5, alpha=0.5)
>>> f = kde(x)
>>> f
array([ 0.0541248 , 0.16555235, 0.33084399, 0.45293325, 0.48345808,
0.48345808, 0.45293325, 0.33084399, 0.16555235, 0.0541248 ])
import pylab as plb
h1 = plb.plot(x, f) # 1D probability density plot
t = np.trapz(f, x)
""" """
def __init__(self, dataset, hs=None, kernel=None, L2=None, alpha=0.0): def __init__(self, dataset, hs=None, kernel=None, alpha=0.0):
self.kernel = kernel if kernel else Kernel('gauss') self.kernel = kernel if kernel else Kernel('gauss')
self.hs = hs self.hs = hs
self.L2 = L2
self.alpha = alpha self.alpha = alpha
self.dataset = atleast_2d(dataset) self.dataset = atleast_2d(dataset)
self.d, self.n = self.dataset.shape self.d, self.n = self.dataset.shape
self.initialize()
def initialize(self):
self._compute_smoothing() self._compute_smoothing()
if self.alpha > 0:
pilot = KDE(self.dataset, hs=self.hs, kernel=self.kernel, alpha=0)
f = pilot(self.dataset) # get a pilot estimate by regular KDE (alpha=0)
g = np.exp(np.mean(np.log(f)))
self._lambda = (f / g) ** (-self.alpha)
else:
self._lambda = np.ones(self.n)
def _compute_smoothing(self): def _compute_smoothing(self):
"""Computes the smoothing matrix """Computes the smoothing matrix
@ -119,6 +276,7 @@ class KDE(object):
h = self.hs h = self.hs
if h is None: if h is None:
h = get_smoothing(self.dataset) h = get_smoothing(self.dataset)
h = np.atleast_1d(h)
hsiz = h.shape hsiz = h.shape
if (min(hsiz) == 1) or (self.d == 1): if (min(hsiz) == 1) or (self.d == 1):
@ -141,6 +299,19 @@ class KDE(object):
self.hs = h self.hs = h
self._norm_factor = deth * self.n self._norm_factor = deth * self.n
def _check_shape(self, points):
points = atleast_2d(points)
d, m = points.shape
if d != self.d:
if d == 1 and m == self.d:
# points was passed in as a row vector
points = np.reshape(points, (self.d, 1))
m = 1
else:
msg = "points have dimension %s, dataset has dimension %s" % (d,
self.d)
raise ValueError(msg)
return points
def evaluate(self, points): def evaluate(self, points):
"""Evaluate the estimated pdf on a set of points. """Evaluate the estimated pdf on a set of points.
@ -161,33 +332,24 @@ class KDE(object):
the dimensionality of the KDE. the dimensionality of the KDE.
""" """
points = atleast_2d(points).astype(self.dataset.dtype) points = self._check_shape(points)
d, m = points.shape d, m = points.shape
if d != self.d:
if d == 1 and m == self.d: result = np.zeros((m,))
# points was passed in as a row vector
points = np.reshape(points, (self.d, 1))
m = 1
else:
msg = "points have dimension %s, dataset has dimension %s" % (d,
self.d)
raise ValueError(msg)
result = np.zeros((m,), points.dtype)
if m >= self.n: if m >= self.n:
# there are more points than data, so loop over data # there are more points than data, so loop over data
for i in range(self.n): for i in range(self.n):
diff = self.dataset[:, i, np.newaxis] - points diff = self.dataset[:, i, np.newaxis] - points
tdiff = np.dot(self.inv_hs, diff) tdiff = np.dot(self.inv_hs / self._lambda[i], diff)
result += self.kernel(tdiff) result += self.kernel(tdiff) / self._lambda[i] ** d
else: else:
# loop over points # loop over points
for i in range(m): for i in range(m):
diff = self.dataset - points[:, i, np.newaxis] diff = self.dataset - points[:, i, np.newaxis]
tdiff = np.dot(self.inv_hs, diff) tdiff = np.dot(self.inv_hs, diff / self._lambda[np.newaxis, :])
tmp = self.kernel(tdiff) tmp = self.kernel(tdiff) / self._lambda ** d
result[i] = tmp.sum(axis= -1) result[i] = tmp.sum(axis= -1)
result /= (self._norm_factor * self.kernel.norm_factor(d, self.n)) result /= (self._norm_factor * self.kernel.norm_factor(d, self.n))
@ -197,282 +359,6 @@ class KDE(object):
__call__ = evaluate __call__ = evaluate
#function [f, hs,lambda]= kdefun(A,options,varargin)
#%KDEFUN Kernel Density Estimator.
#%
#% CALL: [f, hs] = kdefun(data,options,x1,x2,...,xd)
#%
#% f = kernel density estimate evaluated at x1,x2,...,xd.
#% data = data matrix, size N x D (D = # dimensions)
#% options = kdeoptions-structure or cellvector of named parameters with
#% corresponding values, see kdeoptset for details.
#% x1,x2..= vectors/matrices defining the points to evaluate the density
#%
#% KDEFUN gives a slow, but exact kernel density estimate evaluated at x1,x2,...,xd.
#% Notice that densities close to normality appear to be the easiest for the kernel
#% estimator to estimate and that the degree of estimation difficulty increases with
#% skewness, kurtosis and multimodality.
#%
#% If D > 1 KDE calculates quantile levels by integration. An
#% alternative is to calculate them by ranking the kernel density
#% estimate obtained at the points DATA i.e. use the commands
#%
#% f = kde(data);
#% r = kdefun(data,[],num2cell(data,1));
#% f.cl = qlevels2(r,f.PL);
#%
#% The first is probably best when estimating the pdf and the latter is the
#% easiest and most robust for multidimensional data when only a visualization
#% of the data is needed.
#%
#% For faster estimates try kdebin.
#%
#% Examples:
#% data = rndray(1,500,1);
#% x = linspace(sqrt(eps),5,55);
#% plotnorm((data).^(.5)) % gives a straight line => L2 = 0.5 reasonable
#% f = kdefun(data,{'L2',.5},x);
#% plot(x,f,x,pdfray(x,1),'r')
#%
#% See also kde, mkernel, kdebin
#
#% Reference:
#% B. W. Silverman (1986)
#% 'Density estimation for statistics and data analysis'
#% Chapman and Hall , pp 100-110
#%
#% Wand, M.P. and Jones, M.C. (1995)
#% 'Kernel smoothing'
#% Chapman and Hall, pp 43--45
#
#
#
#
#%Tested on: matlab 5.2
#% History:
#% revised pab Feb2004
#% -options moved into a structure
#% revised pab Dec2003
#% -removed some code
#% revised pab 27.04.2001
#% - changed call from mkernel to mkernel2 (increased speed by 10%)
#% revised pab 01.01.2001
#% - added the possibility that L2 is a cellarray of parametric
#% or non-parametric transformations (secret option)
#% revised pab 14.12.1999
#% - fixed a small error in example in help header
#% revised pab 28.10.1999
#% - added L2
#% revised pab 21.10.99
#% - added alpha to input arguments
#% - made it fully general for d dimensions
#% - HS may be a smoothing matrix
#% revised pab 21.09.99
#% - adapted from kdetools by Christian Beardah
#
# defaultoptions = kdeoptset;
#% If just 'defaults' passed in, return the default options in g
#if ((nargin==1) && (nargout <= 1) && isequal(A,'defaults')),
# f = defaultoptions;
# return
#end
#error(nargchk(1,inf, nargin))
#
#[n, d]=size(A); % Find dimensions of A,
# % n=number of data points,
# % d=dimension of the data.
#if (nargin<2 || isempty(options))
# options = defaultoptions;
#else
# switch lower(class(options))
# case {'char','struct'},
# options = kdeoptset(defaultoptions,options);
# case {'cell'}
#
# options = kdeoptset(defaultoptions,options{:});
# otherwise
# error('Invalid options')
# end
#end
#kernel = options.kernel;
#h = options.hs;
#alpha = options.alpha;
#L2 = options.L2;
#hsMethod = options.hsMethod;
#
#if isempty(h)
# h=zeros(1,d);
#end
#
#L22 = cell(1,d);
#k3=[];
#if isempty(L2)
# L2=ones(1,d); % default no transformation
#elseif iscell(L2) % cellarray of non-parametric and parametric transformations
# Nl2 = length(L2);
# if ~(Nl2==1||Nl2==d), error('Wrong size of L2'), end
# [L22{1:d}] = deal(L2{1:min(Nl2,d)});
# L2 = ones(1,d); % default no transformation
# for ix=1:d,
# if length(L22{ix})>1,
# k3=[k3 ix]; % Non-parametric transformation
# else
# L2(ix) = L22{ix}; % Parameter to the Box-Cox transformation
# end
# end
#elseif length(L2)==1
# L2=L2(:,ones(1,d));
#end
#
#amin=min(A);
#if any((amin(L2~=1)<=0)) ,
# error('DATA cannot be negative or zero when L2~=1')
#end
#
#
#nv=length(varargin);
#if nv<d,
# error('some or all of the evaluation points x1,x2,...,xd is missing')
#end
#
#xsiz = size(varargin{1}); % remember size of input
#Nx = prod(xsiz);
#X = zeros(Nx,d);
#for ix=1:min(nv,d),
# if (any(varargin{ix}(:)<=0) && (L2(ix)~=1)),
# error('xi cannot be negative or zero when L2~=1')
# end
# X(:,ix)=varargin{ix}(:); % make sure it is a column vector
#end
#
#
#%new call
#lX = X; %zeros(Nx,d);
#lA = A; %zeros(size(A));
#
#k1 = find(L2==0); % logaritmic transformation
#if any(k1)
# lA(:,k1)=log(A(:,k1));
# lX(:,k1)=log(X(:,k1));
#end
#k2=find(L2~=0 & L2~=1); % power transformation
#if any(k2)
# lA(:,k2)=sign(L2(ones(n,1),k2)).*A(:,k2).^L2(ones(n,1),k2);
# lX(:,k2)=sign(L2(ones(Nx,1),k2)).*X(:,k2).^L2(ones(Nx,1),k2);
#end
#% Non-parametric transformation
#for ix = k3,
# lA(:,ix) = tranproc(A(:,ix),L22{ix});
# lX(:,ix) = tranproc(X(:,ix),L22{ix});
#end
#
#
#hsiz=size(h);
#if (min(hsiz)==1)||(d==1)
# if max(hsiz)==1,
# h=h*ones(1,d);
# else
# h=reshape(h,[1,d]); % make sure it has the correct dimension
# end;
# ind=find(h<=0);
# if any(ind) % If no value of h has been specified by the user then
# h(ind)=feval(hsMethod,lA(:,ind),kernel); % calculate automatic values.
# end
# deth = prod(h);
#else % fully general smoothing matrix
# deth = det(h);
# if deth<=0
# error('bandwidth matrix h must be positive definit')
# end
#end
#
#if alpha>0
# Xn = num2cell(lA,1);
# opt1 = kdeoptset('kernel',kernel,'hs',h,'alpha',0,'L2',1);
# f2 = kdefun(lA,opt1,Xn{:}); % get a pilot estimate by regular KDE (alpha=0)
# g = exp(sum(log(f2))/n);
#
# lambda=(f2(:)/g).^(-alpha);
#else
# lambda=ones(n,1);
#end
#
#
#
#
#
#f=zeros(Nx,1);
#if (min(hsiz)==1)||(d==1)
# for ix=1:n, % Sum over all data points
# Avec=lA(ix,:);
# Xnn=(lX-Avec(ones(Nx,1),:))./(h(ones(Nx,1),:) *lambda(ix));
# f = f + mkernel2(Xnn,kernel)/lambda(ix)^d;
# end
#else % fully general
# h1=inv(h);
# for ix=1:n, % Sum over all data points
# Avec=lA(ix,:);
# Xnn=(lX-Avec(ones(Nx,1),:))*(h1/lambda(ix));
# f = f + mkernel2(Xnn,kernel)/lambda(ix)^d;
# end
#end
#f=f/(n*deth);
#
#% transforming back
#if any(k1), % L2=0 i.e. logaritmic transformation
# for ix=k1
# f=f./X(:,ix);
# end
# if any(max(abs(diff(f)))>10)
# disp('Warning: Numerical problems may have occured due to the logaritmic')
# disp('transformation. Check the KDE for spurious spikes')
# end
#end
#if any(k2) % L2~=0 i.e. power transformation
# for ix=k2
# f=f.*(X(:,ix).^(L2(ix)-1))*L2(ix)*sign(L2(ix));
# end
# if any(max(abs(diff(f)))>10)
# disp('Warning: Numerical problems may have occured due to the power')
# disp('transformation. Check the KDE for spurious spikes')
# end
#end
#if any(k3), % non-parametric transformation
# oneC = ones(Nx,1);
# for ix=k3
# gn = L22{ix};
# %Gn = fliplr(L22{ix});
# %x0 = tranproc(lX(:,ix),Gn);
# if any(isnan(X(:,ix))),
# error('The transformation does not have a strictly positive derivative.')
# end
# hg1 = tranproc([X(:,ix) oneC],gn);
# der1 = abs(hg1(:,2)); % dg(X)/dX = 1/(dG(Y)/dY)
# % alternative 2
# %pp = smooth(Gn(:,1),Gn(:,2),1,[],1);
# %dpp = diffpp(pp);
# %der1 = 1./abs(ppval(dpp,f.x{ix}));
# % Alternative 3
# %pp = smooth(gn(:,1),gn(:,2),1,[],1);
# %dpp = diffpp(pp);
# %%plot(hg1(:,1),der1-abs(ppval(dpp,x0)))
# %der1 = abs(ppval(dpp,x0));
# if any(der1<=0),
# error('The transformation must have a strictly positive derivative')
# end
# f = f.*der1;
# end
# if any(max(abs(diff(f)))>10)
# disp('Warning: Numerical problems may have occured due to the power')
# disp('transformation. Check the KDE for spurious spikes')
# end
#end
#
#f=reshape(f,xsiz); % restore original shape
#if nargout>1
# hs=h;
#end
class _Kernel(object): class _Kernel(object):
def __init__(self, r=1.0, stats=None): def __init__(self, r=1.0, stats=None):
self.r = r # radius of kernel self.r = r # radius of kernel
@ -712,7 +598,7 @@ class Kernel(object):
mu2, R, Rdd = self.stats() mu2, R, Rdd = self.stats()
AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5) AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
iqr = np.abs(np.percentile(A, 75, axis=1) - np.percentile(A, 25, axis=1))# interquartile range iqr = np.abs(np.percentile(A, 75, axis=1) - np.percentile(A, 25, axis=1))# interquartile range
stdA = np.std(A, axis=1) stdA = np.std(A, axis=1, ddof=1)
# % use of interquartile range guards against outliers. # % use of interquartile range guards against outliers.
# % the use of interquartile range is better if # % the use of interquartile range is better if
# % the distribution is skew or have heavy tails # % the distribution is skew or have heavy tails
@ -746,7 +632,8 @@ class Kernel(object):
See also hste, hbcv, hboot, hldpi, hlscv, hscv, hstt, kde, kdefun See also hste, hbcv, hboot, hldpi, hlscv, hscv, hstt, kde, kdefun
Reference: Reference
---------
B. W. Silverman (1986) B. W. Silverman (1986)
'Density estimation for statistics and data analysis' 'Density estimation for statistics and data analysis'
Chapman and Hall, pp 43-48 Chapman and Hall, pp 43-48
@ -826,7 +713,7 @@ class Kernel(object):
return a * linalg.sqrtm(covA) * n * (-1. / (d + 4)) return a * linalg.sqrtm(covA) * n * (-1. / (d + 4))
def norm_factor(self, d=1, n=None): def norm_factor(self, d=1, n=None):
return self.kernel.norm_factor(n, d) return self.kernel.norm_factor(d, n)
def evaluate(self, X): def evaluate(self, X):
return self.kernel(np.atleast_2d(X)) return self.kernel(np.atleast_2d(X))
__call__ = evaluate __call__ = evaluate
@ -981,6 +868,7 @@ def accum(accmap, a, func=None, size=None, fill_value=0, dtype=None):
out[s] = func(vals[s]) out[s] = func(vals[s])
return out return out
def bitget(int_type, offset): def bitget(int_type, offset):
''' '''
Returns the value of the bit at the offset position in int_type. Returns the value of the bit at the offset position in int_type.
@ -1106,9 +994,9 @@ def gridcount(data, X):
b2 = binx + bt2 # linear index to X b2 = binx + bt2 # linear index to X
c += accum(b1, abs(np.prod(X1[b2] - dat, axis=0)), size=(Nc,)) c += accum(b1, abs(np.prod(X1[b2] - dat, axis=0)), size=(Nc,))
c = np.reshape(c / w, csiz) c = np.reshape(c / w, csiz, order='C')
# TODO: check that the flipping of axis is correct # TODO: check that the flipping of axis is correct
T = range(d); T[-2],T[-1] = T[-1], T[-2] T = range(d); T[-2], T[-1] = T[-1], T[-2]
c = c.transpose(*T) c = c.transpose(*T)
if d == 2: # make sure c is stored in the same way as meshgrid if d == 2: # make sure c is stored in the same way as meshgrid

@ -0,0 +1,240 @@
'''
Created on 20. nov. 2010
@author: pab
'''
import numpy as np
from numpy import array
import wafo.kdetools as wk
import pylab as plb
def test1_TKDE1D():
'''
N = 20
data = np.random.rayleigh(1, size=(N,))
>>> data = array([ 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887,
... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326,
... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487,
... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452])
>>> x = np.linspace(0.01, max(data.ravel()) + 1, 10)
>>> kde = wk.TKDE(data, hs=0.5, L2=0.5)
>>> f = kde(x)
>>> f
array([ 1.03982714, 0.45839018, 0.39514782, 0.32860602, 0.26433318,
0.20717946, 0.15907684, 0.1201074 , 0.08941027, 0.06574882])
>>> np.trapz(f, x)
0.94787730659349068
h1 = plb.plot(x, f) # 1D probability density plot
'''
def test1_KDE1D():
'''
N = 20
data = np.random.rayleigh(1, size=(N,))
>>> data = array([ 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887,
... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326,
... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487,
... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452])
>>> x = np.linspace(0, max(data.ravel()) + 1, 10)
>>> kde = wk.KDE(data, hs=0.5)
>>> f = kde(x)
>>> f
array([ 0.2039735 , 0.40252503, 0.54595078, 0.52219649, 0.3906213 ,
0.26381501, 0.16407362, 0.08270612, 0.02991145, 0.00720821])
>>> np.trapz(f, x)
0.92576174424281876
h1 = plb.plot(x, f) # 1D probability density plot
'''
def test2_KDE1D():
'''
N = 20
data = np.random.rayleigh(1, size=(N,))
>>> data = array([ 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887,
... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326,
... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487,
... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452])
>>> data = np.asarray([1,2])
>>> x = np.linspace(0, max(data.ravel()) + 1, 10)
>>> kde = wk.KDE(data, hs=0.5)
>>> f = kde(x)
>>> f
array([ 0.0541248 , 0.16555235, 0.33084399, 0.45293325, 0.48345808,
0.48345808, 0.45293325, 0.33084399, 0.16555235, 0.0541248 ])
>>> np.trapz(f, x)
0.97323338046725172
h1 = plb.plot(x, f) # 1D probability density plot
'''
def test1a_KDE1D():
'''
N = 20
data = np.random.rayleigh(1, size=(N,))
>>> data = array([ 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887,
... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326,
... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487,
... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452])
>>> x = np.linspace(0, max(data.ravel()) + 1, 10)
>>> kde = wk.KDE(data, hs=0.5, alpha=0.5)
>>> f = kde(x)
>>> f
array([ 0.17252055, 0.41014271, 0.61349072, 0.57023834, 0.37198073,
0.21409279, 0.12738463, 0.07460326, 0.03956191, 0.01887164])
>>> np.trapz(f, x)
0.92938023659047952
h1 = plb.plot(x, f) # 1D probability density plot
'''
def test2a_KDE1D():
'''
N = 20
data = np.random.rayleigh(1, size=(N,))
>>> data = array([ 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887,
... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326,
... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487,
... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452])
>>> data = np.asarray([1,2])
>>> x = np.linspace(0, max(data.ravel()) + 1, 10)
>>> kde = wk.KDE(data, hs=0.5, alpha=0.5)
>>> f = kde(x)
>>> f
array([ 0.0541248 , 0.16555235, 0.33084399, 0.45293325, 0.48345808,
0.48345808, 0.45293325, 0.33084399, 0.16555235, 0.0541248 ])
>>> np.trapz(f, x)
0.97323338046725172
h1 = plb.plot(x, f) # 1D probability density plot
'''
def test_gridcount_1D():
'''
N = 20
data = np.random.rayleigh(1, size=(N,))
>>> data = array([ 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887,
... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326,
... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487,
... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452])
>>> x = np.linspace(0, max(data.ravel()) + 1, 10)
>>> dx = x[1] - x[0]
>>> c = wk.gridcount(data, x)
>>> c
array([ 1., 6., 7., 2., 1., 2., 1., 0., 0., 0.])
h = plb.plot(x, c, '.') # 1D histogram
h1 = plb.plot(x, c / dx / N) # 1D probability density plot
t = np.trapz(c / dx / N, x)
print(t)
'''
def test_gridcount_2D():
'''
N = 20
data = np.random.rayleigh(1, size=(2, N))
>>> data = array([[ 0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399,
... 0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145,
... 0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821,
... 0.69800708, 0.58428735, 1.59128776, 2.05771405, 0.87021964],
... [ 1.44080694, 0.39973751, 1.331243 , 2.48895822, 1.18894158,
... 1.40526085, 1.01967897, 0.81196474, 1.37978932, 2.03334689,
... 0.870329 , 1.25106862, 0.5346619 , 0.47541236, 1.51930093,
... 0.58861519, 1.19780448, 0.81548296, 1.56859488, 1.60653533]])
>>> x = np.linspace(0, max(data.ravel()) + 1, 5)
>>> dx = x[1] - x[0]
>>> X = np.vstack((x, x))
>>> c = wk.gridcount(data, X)
>>> c
array([[ 0.38922806, 0.8987982 , 0.34676493, 0.21042807, 0. ],
[ 1.15012203, 5.16513541, 3.19250588, 0.55420752, 0. ],
[ 0.74293418, 3.42517219, 1.97923195, 0.76076621, 0. ],
[ 0.02063536, 0.31054405, 0.71865964, 0.13486633, 0. ],
[ 0. , 0. , 0. , 0. , 0. ]])
h = plb.plot(x, c, '.') # 1D histogram
h1 = plb.plot(x, c / dx / N) # 1D probability density plot
t = np.trapz(c / dx / N, x)
print(t)
'''
def test_gridcount_4D():
'''
N = 20
data = np.random.rayleigh(1, size=(2, N))
>>> data = array([[ 0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399,
... 0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145],
... [ 0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821,
... 0.69800708, 0.58428735, 1.59128776, 2.05771405, 0.87021964],
... [ 1.44080694, 0.39973751, 1.331243 , 2.48895822, 1.18894158,
... 1.40526085, 1.01967897, 0.81196474, 1.37978932, 2.03334689],
... [ 0.870329 , 1.25106862, 0.5346619 , 0.47541236, 1.51930093,
... 0.58861519, 1.19780448, 0.81548296, 1.56859488, 1.60653533]])
>>> x = np.linspace(0, max(data.ravel()) + 1, 3)
>>> dx = x[1] - x[0]
>>> X = np.vstack((x, x, x, x))
>>> c = wk.gridcount(data, X)
>>> c
array([[[[ 1.77163904e-01, 3.41883175e-01, 3.13810608e-03],
[ 1.83770124e-01, 3.58861043e-01, 7.05946179e-03],
[ 0.00000000e+00, 2.91835067e-03, 6.38695629e-04]],
<BLANKLINE>
[[ 5.72573585e-01, 5.72071865e-01, 6.71606255e-03],
[ 4.35845892e-01, 8.80697705e-01, 1.09099593e-01],
[ 0.00000000e+00, 3.63686503e-02, 1.00358265e-02]],
<BLANKLINE>
[[ 3.48549923e-03, 3.46939323e-03, 0.00000000e+00],
[ 3.07662204e-03, 2.22868504e-01, 6.61257395e-02],
[ 0.00000000e+00, 1.88555613e-02, 5.67244468e-03]]],
<BLANKLINE>
<BLANKLINE>
[[[ 1.87720108e-01, 5.97977973e-01, 2.11731327e-02],
[ 2.56357594e-01, 6.28962785e-01, 5.44614852e-02],
[ 0.00000000e+00, 2.60268355e-02, 5.69610302e-03]],
<BLANKLINE>
[[ 6.09557834e-01, 8.58566538e-01, 4.53139824e-02],
[ 6.14958970e-01, 1.47373158e+00, 1.95935584e-01],
[ 0.00000000e+00, 1.07959459e-01, 2.44053065e-02]],
<BLANKLINE>
[[ 4.05931870e-02, 4.04056116e-02, 0.00000000e+00],
[ 3.58312786e-02, 1.18008528e-01, 2.47717418e-02],
[ 0.00000000e+00, 7.06358976e-03, 2.12498697e-03]]],
<BLANKLINE>
<BLANKLINE>
[[[ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
[ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
[ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00]],
<BLANKLINE>
[[ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
[ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
[ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00]],
<BLANKLINE>
[[ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
[ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
[ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00]]]])
h = plb.plot(x, c, '.') # 1D histogram
h1 = plb.plot(x, c / dx / N) # 1D probability density plot
t = np.trapz(x, c / dx / N)
print(t)
'''
if __name__ == '__main__':
import doctest
doctest.testmod()
Loading…
Cancel
Save