Updated kdetools.py (but still not working correctly)

Small cosmetic fixes to other files
master
per.andreas.brodtkorb 14 years ago
parent da9c0695d0
commit b581ec4051

@ -7,6 +7,18 @@ turning_points :
wave_amplitudes : wave_amplitudes :
wave_periods : wave_periods :
waves : waves :
Examples
--------
In order to view the documentation do the following in an ipython window:
>>> import wafo.definitions as wd
>>> wd.crossings()
or
>>> wd.crossings?
""" """
def wave_amplitudes(): def wave_amplitudes():
r""" r"""

@ -13,11 +13,21 @@ from __future__ import division
import numpy as np import numpy as np
from numpy import pi, sqrt, atleast_2d, exp, newaxis #@UnresolvedImport from numpy import pi, sqrt, atleast_2d, exp, newaxis #@UnresolvedImport
import scipy import scipy
from scipy.linalg import sqrtm from scipy import linalg
from scipy.special import gamma from scipy.special import gamma
from misc import tranproc, trangood from misc import tranproc, trangood
from itertools import product from itertools import product
_stats_epan=(1. / 5, 3. / 5, np.inf)
_stats_biwe=(1. / 7, 5. / 7, 45. / 2),
_stats_triw=(1. / 9, 350. / 429, np.inf),
_stats_rect=(1. / 3, 1. / 2, np.inf),
_stats_tria=(1. / 6, 2. / 3, np.inf),
_stats_lapl=(2, 1. / 4, np.inf),
_stats_logi=(pi ** 2 / 3, 1. / 6, 1 / 42),
_stats_gaus=(1, 1. / (2 * sqrt(pi)), 3. / (8 * sqrt(pi)))
def sphere_volume(d, r=1.0): def sphere_volume(d, r=1.0):
""" """
@ -47,7 +57,7 @@ def sphere_volume(d, r=1.0):
class kde(object): class KDE(object):
""" Representation of a kernel-density estimate using Gaussian kernels. """ Representation of a kernel-density estimate using Gaussian kernels.
Parameters Parameters
@ -84,22 +94,52 @@ class kde(object):
obtain the kernel covariance matrix. Set this method to obtain the kernel covariance matrix. Set this method to
kde.scotts_factor or kde.silverman_factor (or subclass to provide your kde.scotts_factor or kde.silverman_factor (or subclass to provide your
own). The default is scotts_factor. own). The default is scotts_factor.
Example
-------
""" """
def __init__(self, dataset, **kwds): def __init__(self, dataset, hs=None, kernel=None,L2=None,alpha=0.0):
self.kernel = 'gauss' self.kernel = kernel if kernel else Kernel('gauss')
self.hs = None self.hs = hs
self.hsmethod = None self.L2 = L2
self.L2 = None self.alpha = alpha
self.alpha = 0
self.__dict__.update(kwds)
self.dataset = atleast_2d(dataset) self.dataset = atleast_2d(dataset)
self.d, self.n = self.dataset.shape self.d, self.n = self.dataset.shape
self._compute_covariance() self._compute_smoothing()
def _compute_smoothing(self):
"""Computes the smoothing matrix
"""
get_smoothing = self.kernel.get_smoothing
h = self.hs
if h is None:
h = get_smoothing(self.dataset)
hsiz = h.shape
if (min(hsiz)==1) or (self.d==1):
if max(hsiz)==1:
h = h*np.ones(self.d)
else:
h.shape = (self.d,) # make sure it has the correct dimension
# If h negative calculate automatic values
ind, = np.where(h<=0)
for i in ind.tolist(): #
h[i] = get_smoothing(self.dataset[i])
deth = h.prod()
self.inv_hs = linalg.diag(1.0/h)
else: #fully general smoothing matrix
deth = linalg.det(h)
if deth<=0:
raise ValueError('bandwidth matrix h must be positive definit!')
self.inv_hs = linalg.inv(h)
self.hs = h
self._norm_factor = deth * self.n
def evaluate(self, points): def evaluate(self, points):
"""Evaluate the estimated pdf on a set of points. """Evaluate the estimated pdf on a set of points.
@ -140,23 +180,23 @@ class kde(object):
# there are more points than data, so loop over data # there are more points than data, so loop over data
for i in range(self.n): for i in range(self.n):
diff = self.dataset[:, i, np.newaxis] - points diff = self.dataset[:, i, np.newaxis] - points
tdiff = np.dot(self.inv_cov, diff) tdiff = np.dot(self.inv_hs, diff)
energy = np.sum(diff * tdiff, axis=0) / 2.0 result += self.kernel(tdiff)
result += np.exp(-energy)
else: else:
# loop over points # loop over points
for i in range(m): for i in range(m):
diff = self.dataset - points[:, i, np.newaxis] diff = self.dataset - points[:, i, np.newaxis]
tdiff = np.dot(self.inv_cov, diff) tdiff = np.dot(self.inv_hs, diff)
energy = sum(diff * tdiff, axis=0) / 2.0 tmp = self.kernel(tdiff)
result[i] = np.sum(np.exp(-energy), axis=0) result[i] = tmp.sum(axis=-1)
result /= self._norm_factor result /= (self._norm_factor*self.kernel.norm_factor(d,self.n))
return result return result
__call__ = evaluate __call__ = evaluate
#function [f, hs,lambda]= kdefun(A,options,varargin) #function [f, hs,lambda]= kdefun(A,options,varargin)
#%KDEFUN Kernel Density Estimator. #%KDEFUN Kernel Density Estimator.
#% #%
@ -433,71 +473,106 @@ class kde(object):
# hs=h; # hs=h;
#end #end
class _Kernel(object):
def mkernel_multi(X, p=None): def __init__(self, r=1.0, stats=None):
self.r = r # radius of kernel
self.stats = stats
def norm_factor(self, d=1, n=None):
return 1.0
def norm_kernel(self, x):
X = np.atleast_2d(x)
return self._kernel(X)/self.norm_factor(*X.shape)
def kernel(self, x):
return self._kernel(np.atleast_2d(x))
__call__ = kernel
class _KernelMulti(_Kernel):
# p=0; %Sphere = rect for 1D # p=0; %Sphere = rect for 1D
# p=1; %Multivariate Epanechnikov kernel. # p=1; %Multivariate Epanechnikov kernel.
# p=2; %Multivariate Bi-weight Kernel # p=2; %Multivariate Bi-weight Kernel
# p=3; %Multi variate Tri-weight Kernel # p=3; %Multi variate Tri-weight Kernel
# p=4; %Multi variate Four-weight Kernel # p=4; %Multi variate Four-weight Kernel
d, n = X.shape def __init__(self, r=1.0, p=1, stats=None):
r = 1 # radius of the kernel self.r = r
self.p = p
self.stats = stats
def norm_factor(self, d=1, n=None):
r = self.r
p = self.p
c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(d, r) / np.prod(np.r_[(d + 2):(2 * p + d + 1):2])# normalizing constant c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(d, r) / np.prod(np.r_[(d + 2):(2 * p + d + 1):2])# normalizing constant
# c = beta(r+1,r+1)*vsph(d,b)*(2^(2*r)); % Wand and Jones pp 31 return c
# the commented c above does note yield the right scaling for d>1 def _kernel(self, x):
x2 = X ** 2 r = self.r
return ((1.0 - x2.sum(axis=0) / r ** 2).clip(min=0.0)) ** p / c p = self.p
x2 = x ** 2
def mkernel_epanechnikov(X): return ((1.0 - x2.sum(axis=0) / r ** 2).clip(min=0.0)) ** p
return mkernel_multi(X, p=1)
def mkernel_biweight(X): mkernel_epanechnikov = _KernelMulti(p=1, stats=_stats_epan)
return mkernel_multi(X, p=2) mkernel_biweight = _KernelMulti(p=2, stats=_stats_biwe)
def mkernel_triweight(X): mkernel_triweight = _KernelMulti(p=3, stats=_stats_triw)
return mkernel_multi(X, p=3)
class _KernelProduct(_KernelMulti):
def mkernel_product(X, p):
# p=0; %rectangular # p=0; %rectangular
# p=1; %1D product Epanechnikov kernel. # p=1; %1D product Epanechnikov kernel.
# p=2; %1D product Bi-weight Kernel # p=2; %1D product Bi-weight Kernel
# p=3; %1D product Tri-weight Kernel # p=3; %1D product Tri-weight Kernel
# p=4; %1D product Four-weight Kernel # p=4; %1D product Four-weight Kernel
d, n = X.shape
r = 1.0 # radius def norm_factor(self, d=1, n=None):
pdf = (1 - (X / r) ** 2).clip(min=0.0) r = self.r
p = self.p
c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(1, r) / np.prod(np.r_[(1 + 2):(2 * p + 2):2])# normalizing constant c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(1, r) / np.prod(np.r_[(1 + 2):(2 * p + 2):2])# normalizing constant
return np.prod(pdf, axis=0) / c ** d return c ** d
def _kernel(self, x):
def mkernel_p1epanechnikov(X): r = self.r # radius
return mkernel_product(X, p=1) pdf = (1 - (x / r) ** 2).clip(min=0.0)
def mkernel_p1biweight(X): return pdf.prod(axis=0)
return mkernel_product(X, p=2)
def mkernel_p1triweight(X): mkernel_p1epanechnikov = _KernelProduct(p=1, stats=_stats_epan)
return mkernel_product(X, p=3) mkernel_p1biweight = _KernelProduct(p=2, stats=_stats_biwe)
mkernel_p1triweight = _KernelProduct(p=3, stats=_stats_triw)
def mkernel_rectangular(X):
d, n = X.shape class _KernelRectangular(_Kernel):
return np.where(np.all(np.abs(X) <= 1, axis=0), 0.5 ** d, 0.0) def _kernel(self, x):
return np.where(np.all(np.abs(x) <= self.r, axis=0), 1, 0.0)
def mkernel_triangular(X): def norm_factor(self, d=1, n=None):
pdf = (1 - np.abs(X)).clip(min=0.0) r = self.r
return np.prod(pdf, axis=0) return (2*r) ** d
mkernel_rectangular = _KernelRectangular(stats=_stats_rect)
def mkernel_gaussian(X):
x2 = X ** 2 class _KernelTriangular(_Kernel):
d, n = X.shape def _kernel(self, x):
return (2 * pi) ** (-d / 2) * exp(-0.5 * x2.sum(axis=0)) pdf = (1 - np.abs(x)).clip(min=0.0)
return pdf.prod(axis=0)
def mkernel_laplace(X): mkernel_triangular = _KernelTriangular(stats=_stats_tria)
''' Return multivariate laplace kernel'''
d, n = X.shape class _KernelGaussian(_Kernel):
absX = np.abs(X) def _kernel(self, x):
return 0.5 ** d * exp(-absX.sum(axis=0)) x2 = x ** 2
return exp(-0.5 * x2.sum(axis=0))
def mkernel_logistic(X): def norm_factor(self, d=1, n=None):
''' Return multivariate logistic kernel''' return (2 * pi) ** (d / 2.0)
s = exp(X) mkernel_gaussian = _KernelGaussian(stats=_stats_gaus)
#def mkernel_gaussian(X):
# x2 = X ** 2
# d = X.shape[0]
# return (2 * pi) ** (-d / 2) * exp(-0.5 * x2.sum(axis=0))
class _KernelLaplace(_Kernel):
def _kernel(self, x):
absX = np.abs(x)
return exp(-absX.sum(axis=0))
def norm_factor(self, d=1, n=None):
return 2**d
mkernel_laplace = _KernelLaplace(stats=_stats_lapl)
class _KernelLogistic(_Kernel):
def _kernel(self, x):
s = exp(x)
return np.prod(s / (s + 1) ** 2, axis=0) return np.prod(s / (s + 1) ** 2, axis=0)
mkernel_logistic = _KernelLogistic(stats=_stats_logi)
_MKERNEL_DICT = dict( _MKERNEL_DICT = dict(
epan=mkernel_epanechnikov, epan=mkernel_epanechnikov,
@ -513,17 +588,6 @@ _MKERNEL_DICT = dict(
gaus=mkernel_gaussian gaus=mkernel_gaussian
) )
_KERNEL_EXPONENT_DICT = dict(re=0, sp=0, ep=1, bi=2, tr=3, fo=4, fi=5, si=6, se=7) _KERNEL_EXPONENT_DICT = dict(re=0, sp=0, ep=1, bi=2, tr=3, fo=4, fi=5, si=6, se=7)
_KERNEL_STATS_DICT = dict(epan=(1. / 5, 3. / 5, np.inf),
biwe=(1. / 7, 5. / 7, 45. / 2),
triw=(1. / 9, 350. / 429, np.inf),
rect=(1. / 3, 1. / 2, np.inf),
tria=(1. / 6, 2. / 3, np.inf),
lapl=(2, 1. / 4, np.inf),
logi=(pi ** 2 / 3, 1. / 6, 1 / 42),
gaus=(1, 1. / (2 * sqrt(pi)), 3. / (8 * sqrt(pi)))
)
class Kernel(object): class Kernel(object):
''' '''
@ -574,9 +638,13 @@ class Kernel(object):
'Density estimation for statistics and data analysis' 'Density estimation for statistics and data analysis'
Chapman and Hall, pp 31, 103, 175 Chapman and Hall, pp 31, 103, 175
''' '''
def __init__(self, name): def __init__(self, name, fun='hns'):
self.kernel = _MKERNEL_DICT[name[:4]] self.kernel = _MKERNEL_DICT[name[:4]]
self.name = self.kernel.__name__.replace('mkernel_','').title() #self.name = self.kernel.__name__.replace('mkernel_', '').title()
try:
self.get_smoothing = getattr(self, fun)
except:
self.get_smoothing = self.hns
def stats(self): def stats(self):
''' Return some 1D statistics of the kernel. ''' Return some 1D statistics of the kernel.
@ -596,8 +664,9 @@ class Kernel(object):
'Kernel smoothing' 'Kernel smoothing'
Chapman and Hall, pp 176. Chapman and Hall, pp 176.
''' '''
name = self.name[2:6] if self.name[:2].lower()=='p1' else self.name[:4] return self.kernel.stats
return _KERNEL_STATS_DICT[name.lower()] #name = self.name[2:6] if self.name[:2].lower() == 'p1' else self.name[:4]
#return _KERNEL_STATS_DICT[name.lower()]
def hns(self, data): def hns(self, data):
''' '''
@ -637,7 +706,7 @@ class Kernel(object):
''' '''
A = np.atleast_2d(data) A = np.atleast_2d(data)
d,n = A.shape n = A.shape[1]
# R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x))
mu2, R, Rdd = self.stats() mu2, R, Rdd = self.stats()
@ -649,6 +718,7 @@ class Kernel(object):
# % the distribution is skew or have heavy tails # % the distribution is skew or have heavy tails
# % This lessen the chance of oversmoothing. # % This lessen the chance of oversmoothing.
return np.where(iqr > 0, np.minimum(stdA, iqr / 1.349), stdA) * AMISEconstant return np.where(iqr > 0, np.minimum(stdA, iqr / 1.349), stdA) * AMISEconstant
def hos(self, data): def hos(self, data):
''' Return Oversmoothing Parameter. ''' Return Oversmoothing Parameter.
@ -694,7 +764,7 @@ class Kernel(object):
h = M dimensional optimal value for smoothing parameter h = M dimensional optimal value for smoothing parameter
given the data and kernel. size D x D given the data and kernel. size D x D
data = data matrix, size N x D (D = # dimensions ) data = data matrix, size D x N (D = # dimensions )
kernel = 'epanechnikov' - Epanechnikov kernel. kernel = 'epanechnikov' - Epanechnikov kernel.
'biweight' - Bi-weight kernel. 'biweight' - Bi-weight kernel.
'triweight' - Tri-weight kernel. 'triweight' - Tri-weight kernel.
@ -713,8 +783,13 @@ class Kernel(object):
data = rndnorm(0, 1,20,2) data = rndnorm(0, 1,20,2)
h = hmns(data,'epan'); h = hmns(data,'epan');
See also hns, hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt See also
Reference: --------
hns, hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt
Reference
----------
B. W. Silverman (1986) B. W. Silverman (1986)
'Density estimation for statistics and data analysis' 'Density estimation for statistics and data analysis'
Chapman and Hall, pp 43-48, 87 Chapman and Hall, pp 43-48, 87
@ -723,7 +798,7 @@ class Kernel(object):
'Kernel smoothing' 'Kernel smoothing'
Chapman and Hall, pp 60--63, 86--88 Chapman and Hall, pp 60--63, 86--88
''' '''
# TODO implement more kernels # TODO: implement more kernels
A = np.atleast_2d(data) A = np.atleast_2d(data)
d, n = A.shape d, n = A.shape
@ -732,7 +807,7 @@ class Kernel(object):
return self.hns(data) return self.hns(data)
name = self.name[:4].lower() name = self.name[:4].lower()
if name == 'epan': # Epanechnikov kernel if name == 'epan': # Epanechnikov kernel
a=(8*(d+4)*(2*sqrt(pi))**d/sphere_volume(d))**(1./(4+d)) a = (8.0 * (d + 4.0) * (2 * sqrt(pi)) ** d / sphere_volume(d)) ** (1. / (4.0 + d))
elif name == 'biwe': # Bi-weight kernel elif name == 'biwe': # Bi-weight kernel
a = 2.7779; a = 2.7779;
if d > 2: if d > 2:
@ -742,18 +817,16 @@ class Kernel(object):
if d > 2: if d > 2:
raise ValueError('not implemented for d>2') raise ValueError('not implemented for d>2')
elif name == 'gaus': # Gaussian kernel elif name == 'gaus': # Gaussian kernel
a = (4/(d+2))**(1./(d+4)) a = (4.0 / (d + 2.0)) ** (1. / (d + 4.0))
else: else:
raise ValueError('Unknown kernel.') raise ValueError('Unknown kernel.')
covA = scipy.cov(A) covA = scipy.cov(A)
return a*sqrtm(covA)*n*(-1./(d+4)) return a * linalg.sqrtm(covA) * n * (-1. / (d + 4))
def norm_factor(self, d=1,n=None):
return self.kernel.norm_factor(n,d)
def evaluate(self, X): def evaluate(self, X):
return self.kernel(np.atleast_2d(X)) return self.kernel(np.atleast_2d(X))
__call__ = evaluate __call__ = evaluate
@ -908,6 +981,7 @@ def accum(accmap, a, func=None, size=None, fill_value=0, dtype=None):
out[s] = func(vals[s]) out[s] = func(vals[s])
return out return out
def gridcount(data, X): def gridcount(data, X):
''' '''
GRIDCOUNT D-dimensional histogram using linear binning. GRIDCOUNT D-dimensional histogram using linear binning.
@ -939,16 +1013,21 @@ def gridcount(data,X):
Example Example
------- -------
N = 500; >>> import numpy as np
data = rndray(1,N,1); >>> import wafo.kdetools as wk
x = linspace(0,max(data)+1,50).'; >>> import pylab as plb
dx = x(2)-x(1); >>> N = 500;
c = gridcount(data,x); >>> data = np.random.rayleigh(1,N)
plot(x,c,'.') 1D histogram >>> x = np.linspace(0,max(data)+1,50)
plot(x,c/dx/N) 1D probability density plot >>> dx = x[1]-x[0]
trapz(x,c/dx/N) >>> c = wk.gridcount(data,x)
>>> h = plb.plot(x,c,'.') # 1D histogram
>>> h1 = plb.plot(x,c/dx/N) # 1D probability density plot
>>> np.trapz(x,c/dx/N)
See also bincount, kdebin See also
--------
bincount, accum, kdebin
Reference Reference
---------- ----------
@ -971,71 +1050,101 @@ def gridcount(data,X):
datlo = dat.min(axis=1) datlo = dat.min(axis=1)
datup = dat.max(axis=1) datup = dat.max(axis=1)
if ((datlo<xlo) or (xup<datup)).any(): if ((datlo < xlo) | (xup < datup)).any():
raise ValueError('X does not include whole range of the data!') raise ValueError('X does not include whole range of the data!')
csiz = np.repeat(inc, d) csiz = np.repeat(inc, d)
binx = np.assarray(np.floor((dat-xlo[:, newaxis])/dx[:, newaxis]),dtype=int) binx = np.asarray(np.floor((dat - xlo[:, newaxis]) / dx), dtype=int)
w = dx.prod() w = dx.prod()
abs = np.abs
if d == 1: if d == 1:
x.shape = (-1,)
c = (accum(binx, (x[binx + 1] - data), size=[inc, ]) + c = (accum(binx, (x[binx + 1] - data), size=[inc, ]) +
accum(binx, (data - x[binx]), size=[inc, ])) / w accum(binx, (data - x[binx]), size=[inc, ])) / w
elif d == 2: elif d == 2:
b2 = binx[1] b2 = binx[1]
b1 = binx[0] b1 = binx[0]
c = (accum([b1,b2] ,abs(prod(([X(b1+1,1) X(b2+1,2)]-data),2)),size=[inc,inc])+... c_ = np.c_
accum([b1+1,b2] ,abs(prod(([X(b1,1) X(b2+1,2)]-data),2)),size=[inc,inc])+... stk = np.vstack
accum([b1 ,b2+1],abs(prod(([X(b1+1,1) X(b2,2)]-data),2)),size=[inc,inc])+... c = (accum(c_[b1, b2] , abs(np.prod(stk([X[0, b1 + 1], X[1, b2 + 1]]) - data, axis=0)), size=[inc, inc]) +
accum([b1+1,b2+1],abs(prod(([X(b1,1) X(b2,2)]-data),2)),size=[inc,inc]))/w; accum(c_[b1 + 1, b2] , abs(np.prod(stk([X[0, b1], X[1, b2 + 1]]) - data, axis=0)), size=[inc, inc]) +
accum(c_[b1 , b2 + 1], abs(np.prod(stk([X[0, b1 + 1], X[1, b2]]) - data, axis=0)), size=[inc, inc]) +
accum(c_[b1 + 1, b2 + 1], abs(np.prod(stk([X[0, b1], X[1, b2]]) - data, axis=0)), size=[inc, inc])) / w
else: # % d>2 else: # % d>2
useSparse = 0; raise ValueError('Not implemented for d>2')
Nc = prod(csiz); Nc = csiz.prod()
c = zeros(Nc,1); c = np.zeros((Nc, 1))
fact2 = [0 inc*(1:d-1)]; fact2 = inc * np.arange(d)
fact1 = [1 cumprod(csiz(1:d-1))]; fact1 = csiz.cumprod() / inc
fact1 = fact1(ones(n,1),:); #fact1 = fact1(ones(n,1),:);
for ir=0:2^(d-1)-1, # for ir in xrange(2**(d-1)):
bt0(:,:,1) = bitget(ir,1:d); # bt0[:,:,1] = bitget(ir,1:d)
bt0(:,:,2) = ~bt0(:,:,1); # bt0[:,:,2] = 1-bt0[:,:,1]
# for ix in range(2):
for ix = 0:1 # one = mod(ix,2)+1;
one = mod(ix,2)+1; # two = mod(ix+1,2)+1;
two = mod(ix+1,2)+1; # # Convert to linear index (faster than sub2ind)
% Convert to linear index (faster than sub2ind) # b1 = sum((binx + bt0(ones(n,1),:,one)-1).*fact1,2)+1; #%linear index to c
b1 = sum((binx + bt0(ones(n,1),:,one)-1).*fact1,2)+1; %linear index to c # bt2 = bt0(:,:,two) + fact2;
bt2 = bt0(:,:,two) + fact2; # b2 = binx + bt2(ones(n,1),:); #% linear index to X
b2 = binx + bt2(ones(n,1),:); % linear index to X #
try # c = c + accum(b1,abs(prod(X(b2)-data,2)),[Nc,1]);
if useSparse # #c = c + accum([b1,ones(n,1)],abs(prod(X(b2)-data,2)),[Nc,1]);
% Fast gridding using sparse # #[len,bin,val] = bincount(b1,abs(prod(X(b2)-data,2)));
c = c + sparse(b1,1,abs(prod(X(b2)-data,2)),Nc,1); # #c(bin) = c(bin)+val;
else #
c = c + accum(b1,abs(prod(X(b2)-data,2)),[Nc,1]); # #end
%c = c + accum([b1,ones(n,1)],abs(prod(X(b2)-data,2)),[Nc,1]); # #end
%[len,bin,val] = bincount(b1,abs(prod(X(b2)-data,2))); # c = reshape(c/w,csiz);
%c(bin) = c(bin)+val; #end
end if d == 2: #% make sure c is stored in the same way as meshgrid
catch c = c.T
c = c + sparse(b1,1,abs(prod(X(b2)-data,2)),Nc,1); elif d == 3:
end c = c.transpose(1, 0, 2)
end
end return c
c = reshape(c/w,csiz); def test_kde():
end import numpy as np
switch d % make sure c is stored in the same way as meshgrid import wafo.kdetools as wk
case 2, c = c.'; import pylab as plb
case 3, c = permute(c,[2 1 3]); N = 500;
end data = np.random.rayleigh(1, size=(1, N))
return kde = wk.KDE(data)
x = np.linspace(0, max(data.ravel()) + 1, 10)
#X,Y = np.meshgrid(x, x)
f = kde(x)
#plb.hist(data.ravel())
plb.plot(x,f)
plb.show()
def test_gridcount():
import numpy as np
import wafo.kdetools as wk
import pylab as plb
N = 500;
data = np.random.rayleigh(1, size=(2, N))
x = np.linspace(0, max(data.ravel()) + 1, 10)
X = np.vstack((x, x))
dx = x[1] - x[0]
c = wk.gridcount(data, X)
h = plb.contourf(x, x, c)
plb.show()
h = plb.plot(x, c, '.') # 1D histogram
h1 = plb.plot(x, c / dx / N) # 1D probability density plot
t = np.trapz(x, c / dx / N)
print(t)
def main(): def main():
import doctest import doctest
doctest.testmod() doctest.testmod()
if __name__ == '__main__': if __name__ == '__main__':
main() #main()
#test_gridcount()
test_kde()

@ -637,15 +637,16 @@ def findtp(x, h=0.0, kind=None):
Example: Example:
-------- --------
>>> import wafo.data >>> import wafo.data
>>> import pylab >>> import pylab as plb
>>> import wafo.misc as wm
>>> x = wafo.data.sea() >>> x = wafo.data.sea()
>>> x1 = x[0:200,:] >>> x1 = x[0:200,:]
>>> itp = findtp(x1[:,1],0,'Mw') >>> itp = wm.findtp(x1[:,1],0,'Mw')
>>> itph = findtp(x1[:,1],0.3,'Mw') >>> itph = wm.findtp(x1[:,1],0.3,'Mw')
>>> tp = x1[itp,:] >>> tp = x1[itp,:]
>>> tph = x1[itph,:] >>> tph = x1[itph,:]
>>> a = pylab.plot(x1[:,0],x1[:,1],tp[:,0],tp[:,1],'ro',tph[:,1],tph[:,1],'k.') >>> a = plb.plot(x1[:,0],x1[:,1],tp[:,0],tp[:,1],'ro',tph[:,1],tph[:,1],'k.')
>>> pylab.close('all') >>> plb.close('all')
>>> itp >>> itp
array([ 11, 21, 22, 24, 26, 28, 31, 39, 43, 45, 47, 51, 56, array([ 11, 21, 22, 24, 26, 28, 31, 39, 43, 45, 47, 51, 56,
64, 70, 78, 82, 84, 89, 94, 101, 108, 119, 131, 141, 148, 64, 70, 78, 82, 84, 89, 94, 101, 108, 119, 131, 141, 148,
@ -731,13 +732,14 @@ def findtc(x_in, v=None, kind=None):
Example: Example:
-------- --------
>>> import wafo.data >>> import wafo.data
>>> import pylab >>> import pylab as plb
>>> import wafo.misc as wm
>>> x = wafo.data.sea() >>> x = wafo.data.sea()
>>> x1 = x[0:200,:] >>> x1 = x[0:200,:]
>>> itc, iv = findtc(x1[:,1],0,'dw') >>> itc, iv = wm.findtc(x1[:,1],0,'dw')
>>> tc = x1[itc,:] >>> tc = x1[itc,:]
>>> a = pylab.plot(x1[:,0],x1[:,1],tc[:,0],tc[:,1],'ro') >>> a = plb.plot(x1[:,0],x1[:,1],tc[:,0],tc[:,1],'ro')
>>> pylab.close('all') >>> plb.close('all')
See also See also
-------- --------
@ -833,12 +835,13 @@ def findoutliers(x, zcrit=0.0, dcrit=None, ddcrit=None, verbose=False):
-------- --------
>>> import numpy as np >>> import numpy as np
>>> import wafo >>> import wafo
>>> import wafo.misc as wm
>>> xx = wafo.data.sea() >>> xx = wafo.data.sea()
>>> dt = np.diff(xx[:2,0]) >>> dt = np.diff(xx[:2,0])
>>> dcrit = 5*dt >>> dcrit = 5*dt
>>> ddcrit = 9.81/2*dt*dt >>> ddcrit = 9.81/2*dt*dt
>>> zcrit = 0 >>> zcrit = 0
>>> [inds, indg] = findoutliers(xx[:,1],zcrit,dcrit,ddcrit,verbose=True) >>> [inds, indg] = wm.findoutliers(xx[:,1],zcrit,dcrit,ddcrit,verbose=True)
Found 0 spurious positive jumps of Dx Found 0 spurious positive jumps of Dx
Found 0 spurious negative jumps of Dx Found 0 spurious negative jumps of Dx
Found 37 spurious positive jumps of D^2x Found 37 spurious positive jumps of D^2x
@ -1179,7 +1182,8 @@ def getshipchar(value, property="max_deadweight"):
Example Example
--------- ---------
>>> getshipchar(10,'service_speed') >>> import wafo.misc as wm
>>> wm.getshipchar(10,'service_speed')
{'beam': 29.0, {'beam': 29.0,
'beamSTD': 2.9000000000000004, 'beamSTD': 2.9000000000000004,
'draught': 9.5999999999999996, 'draught': 9.5999999999999996,

@ -17,7 +17,7 @@ from wafo.transform.core import TrData
from wafo.transform.models import TrHermite, TrOchi, TrLinear from wafo.transform.models import TrHermite, TrOchi, TrLinear
from wafo.interpolate import SmoothSpline from wafo.interpolate import SmoothSpline
from scipy.interpolate.interpolate import interp1d from scipy.interpolate.interpolate import interp1d
from scipy.integrate.quadrature import cumtrapz from scipy.integrate.quadrature import cumtrapz #@UnresolvedImport
import warnings import warnings
import numpy as np import numpy as np
@ -39,6 +39,7 @@ from wafo.misc import (nextpow2, findtp, findtc, findcross, sub_dict_select,
from wafodata import WafoData from wafodata import WafoData
from plotbackend import plotbackend from plotbackend import plotbackend
import matplotlib import matplotlib
from scipy.stats.stats import skew, kurtosis
matplotlib.interactive(True) matplotlib.interactive(True)
_wafocov = JITImport('wafo.covariance') _wafocov = JITImport('wafo.covariance')
_wafospec = JITImport('wafo.spectrum') _wafospec = JITImport('wafo.spectrum')
@ -47,8 +48,6 @@ __all__ = ['TimeSeries', 'LevelCrossings', 'CyclePairs', 'TurningPoints',
'sensortypeid', 'sensortype'] 'sensortypeid', 'sensortype']
class LevelCrossings(WafoData): class LevelCrossings(WafoData):
''' '''
Container class for Level crossing data objects in WAFO Container class for Level crossing data objects in WAFO
@ -59,7 +58,18 @@ class LevelCrossings(WafoData):
number of upcrossings number of upcrossings
args : array-like args : array-like
crossing levels crossing levels
Examples
--------
>>> import wafo.data
>>> import wafo.objects as wo
>>> x = wafo.data.sea()
>>> ts = wo.mat2timeseries(x)
>>> tp = ts.turning_points()
>>> mm = tp.cycle_pairs()
>>> lc = mm.level_crossings()
>>> h2 = lc.plot()
''' '''
def __init__(self, *args, **kwds): def __init__(self, *args, **kwds):
options = dict(title='Level crossing spectrum', options = dict(title='Level crossing spectrum',
@ -448,7 +458,16 @@ class CyclePairs(WafoData):
data : array_like data : array_like
args : vector for 1D args : vector for 1D
Examples
--------
>>> import wafo.data
>>> import wafo.objects as wo
>>> x = wafo.data.sea()
>>> ts = wo.mat2timeseries(x)
>>> tp = ts.turning_points()
>>> mm = tp.cycle_pairs()
>>> h1 = mm.plot(marker='x')
''' '''
def __init__(self, *args, **kwds): def __init__(self, *args, **kwds):
self.type_ = kwds.get('type_', 'max2min') self.type_ = kwds.get('type_', 'max2min')
@ -492,7 +511,7 @@ class CyclePairs(WafoData):
>>> ts = wafo.objects.mat2timeseries(wafo.data.sea()) >>> ts = wafo.objects.mat2timeseries(wafo.data.sea())
>>> tp = ts.turning_points() >>> tp = ts.turning_points()
>>> mm = tp.cycle_pairs() >>> mm = tp.cycle_pairs()
>>> h = mm.plot('.') >>> h = mm.plot(marker='.')
>>> bv = range(3,9) >>> bv = range(3,9)
>>> D = mm.damage(beta=bv) >>> D = mm.damage(beta=bv)
>>> D >>> D
@ -533,7 +552,7 @@ class CyclePairs(WafoData):
>>> ts = wafo.objects.mat2timeseries(wafo.data.sea()) >>> ts = wafo.objects.mat2timeseries(wafo.data.sea())
>>> tp = ts.turning_points() >>> tp = ts.turning_points()
>>> mm = tp.cycle_pairs() >>> mm = tp.cycle_pairs()
>>> h = mm.plot('.') >>> h = mm.plot(marker='.')
>>> lc = mm.level_crossings() >>> lc = mm.level_crossings()
>>> h2 = lc.plot() >>> h2 = lc.plot()
@ -608,6 +627,16 @@ class TurningPoints(WafoData):
---------------- ----------------
data : array_like data : array_like
args : vector for 1D args : vector for 1D
Examples
--------
>>> import wafo.data
>>> import wafo.objects as wo
>>> x = wafo.data.sea()
>>> ts = wo.mat2timeseries(x)
>>> tp = ts.turning_points()
>>> h1 = tp.plot(marker='x')
''' '''
def __init__(self, *args, **kwds): def __init__(self, *args, **kwds):
super(TurningPoints, self).__init__(*args, **kwds) super(TurningPoints, self).__init__(*args, **kwds)
@ -643,7 +672,7 @@ class TurningPoints(WafoData):
>>> ts = wafo.objects.mat2timeseries(x) >>> ts = wafo.objects.mat2timeseries(x)
>>> tp = ts.turning_points() >>> tp = ts.turning_points()
>>> mM = tp.cycle_pairs() >>> mM = tp.cycle_pairs()
>>> h = mM.plot('x') >>> h = mM.plot(marker='x')
See also See also
@ -700,6 +729,13 @@ class TimeSeries(WafoData):
>>> rf = ts.tocovdata(lag=150) >>> rf = ts.tocovdata(lag=150)
>>> h = rf.plot() >>> h = rf.plot()
>>> tp = ts.turning_points()
>>> mm = tp.cycle_pairs()
>>> h1 = mm.plot(marker='x')
>>> lc = mm.level_crossings()
>>> h2 = lc.plot()
''' '''
def __init__(self, *args, **kwds): def __init__(self, *args, **kwds):
super(TimeSeries, self).__init__(*args, **kwds) super(TimeSeries, self).__init__(*args, **kwds)
@ -970,6 +1006,7 @@ class TimeSeries(WafoData):
See also See also
-------- --------
troptset, lc2tr, cdf2tr, trplot troptset, lc2tr, cdf2tr, trplot
References References
---------- ----------
Rychlik, I. , Johannesson, P and Leadbetter, M. R. (1997) Rychlik, I. , Johannesson, P and Leadbetter, M. R. (1997)
@ -983,57 +1020,12 @@ class TimeSeries(WafoData):
reconstructed data" reconstructed data"
in Proceedings of 9th ISOPE Conference, Vol III, pp 66-73 in Proceedings of 9th ISOPE Conference, Vol III, pp 66-73
''' '''
# Tested on: Matlab 5.3, 5.2, 5.1
# History:
# revised pab Dec2004
# -Fixed bug: string comparison for def at fault.
# revised pab Nov2004
# -Fixed bug: linextrap was not accounted for
# revised pab july 2004
# revised pab 3 april 2004
# -fixed a bug in hermite estimation: excess changed to kurtosis
# revised pab 29.12.2000
# - added example, hermite and ochi options
# - replaced optional arguments with a options struct
# - default param is now [-5 5 513] -> better to have the discretization
# represented with exact numbers, especially when calculating
# derivatives of the transformation numerically.
# revised pab 19.12.2000
# - updated call edf(X,-inf,[],monitor) to edf(X,[],monitor)
# due to new calling syntax for edf
# modifed pab 24.09.2000
# - changed call from norminv to wnorminv
# - also removed the 7 lowest and 7 highest points from
# the estimation using def='mnonlinear'
# (This is similar to what lc2tr does. lc2tr removes
# the 9 highest and 9 lowest TP from the estimation)
# modified pab 09.06.2000
# - made all the *empirical options secret.
# - Added 'mnonlinear' and 'mempirical'
# - Fixed the problem of multip==1 and def=='empirical' by interpolating
# with spline to ensure that the length of g is fixed
# - Replaced the test statistic for def=='empirical' with the one
# obtained when csm1=csm2=1. (Previously only the smoothed test
# statistic where returned)
# modified pab 12.10.1999
# fixed a bug
# added secret output of empirical estimate g2
# modified by svi 29.09.1999
# changed input def by adding new options.
# revised by pab 11.08.99
# changed name from dat2tran to dat2tr
# modified by Per A. Brodtkorb 12.05.1999,15.08.98
# added secret option: to accept multiple data, to monitor the steps
# of estimation of the transformation
# also removed some code and replaced it with a call to lc2tr (cross2tr)
# making the maintainance easier
#
#opt = troptset('plotflag','off','csm',.95,'gsm',.05,.... #opt = troptset('plotflag','off','csm',.95,'gsm',.05,....
# 'param',[-5 5 513],'delay',2,'linextrap','on','ne',7,... # 'param',[-5 5 513],'delay',2,'linextrap','on','ne',7,...
# 'cvar',1,'gvar',1,'multip',0); # 'cvar',1,'gvar',1,'multip',0);
opt = DotDict(chkder=True, plotflag=True, csm=.95, gsm=.05, opt = DotDict(chkder=True, plotflag=True, csm=.95, gsm=.05,
param=[-5, 5, 513], delay=2, ntr=inf, linextrap=True, ne=7, cvar=1, gvar=1, param=[-5, 5, 513], delay=2, ntr=inf, linextrap=True, ne=7, cvar=1, gvar=1,
multip=False, crossdef='uM') multip=False, crossdef='uM')
@ -1053,14 +1045,14 @@ class TimeSeries(WafoData):
elif method[0] == 'm': elif method[0] == 'm':
return cdftr() return cdftr()
elif method[0] == 'h': elif method[0] == 'h':
ga1 = np.skew(self.data) ga1 = skew(self.data)
ga2 = np.kurtosis(self.data, fisher=True) #kurt(xx(n+1:end))-3; ga2 = kurtosis(self.data, fisher=True) #kurt(xx(n+1:end))-3;
up = min(4 * (4 * ga1 / 3) ** 2, 13) up = min(4 * (4 * ga1 / 3) ** 2, 13)
lo = (ga1 ** 2) * 3 / 2; lo = (ga1 ** 2) * 3 / 2;
kurt1 = min(up, max(ga2, lo)) + 3 kurt1 = min(up, max(ga2, lo)) + 3
return TrHermite(mean=ma, var=sa ** 2, skew=ga1, kurt=kurt1) return TrHermite(mean=ma, var=sa ** 2, skew=ga1, kurt=kurt1)
elif method[0] == 'o': elif method[0] == 'o':
ga1 = np.skew(self.data) ga1 = skew(self.data)
return TrOchi(mean=ma, var=sa ** 2, skew=ga1) return TrOchi(mean=ma, var=sa ** 2, skew=ga1)
def turning_points(self, h=0.0, wavetype=None): def turning_points(self, h=0.0, wavetype=None):
@ -1197,13 +1189,13 @@ class TimeSeries(WafoData):
Example: Example:
-------- --------
Histogram of crest2crest waveperiods
>>> import wafo >>> import wafo
>>> import pylab as plb
>>> x = wafo.data.sea() >>> x = wafo.data.sea()
>>> ts = wafo.objects.mat2timeseries(x[0:400,:]) >>> ts = wafo.objects.mat2timeseries(x[0:400,:])
>>> T = ts.wave_periods(vh=0.0,pdef='c2c') >>> T, ix = ts.wave_periods(vh=0.0,pdef='c2c')
>>> h = plb.hist(T)
T = dat2wa(x1,0,'c2c') #% Returns crest2crest waveperiods
subplot(121), waveplot(x1,'-',1,1),subplot(122),histgrm(T)
See also: See also:
-------- --------
@ -1310,20 +1302,10 @@ class TimeSeries(WafoData):
t1 = x[index[(start + dist):nn:step]] t1 = x[index[(start + dist):nn:step]]
T = t1 - t0 T = t1 - t0
## if False: #% Secret option: indices to the actual crossings used.
## index=index.ravel()
## ind = [index(start:(nn-dist):step) index((start+dist):nn:step)].'
## ind = ind(:)
return T, index return T, index
#% Old call: kept just in case
#%T = x(index((start+dist):step:nn),1)-x(index(start:step:(nn-dist)),1)
def reconstruct(self): def reconstruct(self):
# TODO: finish reconstruct
pass pass
def plot_wave(self, sym1='k.', ts=None, sym2='k+', nfig=None, nsub=None, def plot_wave(self, sym1='k.', ts=None, sym2='k+', nfig=None, nsub=None,
stdev=None, vfact=3): stdev=None, vfact=3):
@ -1362,7 +1344,7 @@ class TimeSeries(WafoData):
-------- --------
findtc, plot findtc, plot
''' '''
# TODO: finish reconstruct
nw = 20 nw = 20
tn = self.args tn = self.args
xn = self.data.ravel() xn = self.data.ravel()

@ -10,7 +10,7 @@ from __future__ import division
import math import math
from copy import copy from copy import copy
from scipy.misc import comb, derivative from scipy.misc import comb, derivative #@UnresolvedImport
from scipy import special from scipy import special
from scipy import optimize from scipy import optimize
from scipy import integrate from scipy import integrate
@ -3544,6 +3544,8 @@ for c != 0, and for x >= 0 for all c, and x < 1/abs(c) for c < 0.
class genexpon_gen(rv_continuous): class genexpon_gen(rv_continuous):
def link(self, x, logSF, phat, ix): def link(self, x, logSF, phat, ix):
xn = (x - phat[3]) / phat[4] xn = (x - phat[3]) / phat[4]
b = phat[1]
c = phat[2]
fact1 = (xn + expm1(-c * xn) / c) fact1 = (xn + expm1(-c * xn) / c)
if ix == 0: if ix == 0:
phati = b * fact1 + logSF phati = b * fact1 + logSF

@ -12,7 +12,7 @@ from wafo.plotbackend import plotbackend
from wafo.misc import ecross, findcross from wafo.misc import ecross, findcross
import numdifftools import numdifftools #@UnresolvedImport
from scipy import special from scipy import special
from scipy.linalg import pinv2 from scipy.linalg import pinv2
from scipy import optimize from scipy import optimize

@ -10,88 +10,3 @@ def valarray(shape,value=nan,typecode=None):
out = asarray(out) out = asarray(out)
return out return out
class rv_frozen(object):
''' Frozen continous or discrete 1D Random Variable object (RV)
Methods
-------
RV.rvs(size=1)
- random variates
RV.pdf(x)
- probability density function (continous case)
RV.pmf(x)
- probability mass function (discrete case)
RV.cdf(x)
- cumulative density function
RV.sf(x)
- survival function (1-cdf --- sometimes more accurate)
RV.ppf(q)
- percent point function (inverse of cdf --- percentiles)
RV.isf(q)
- inverse survival function (inverse of sf)
RV.stats(moments='mv')
- mean('m'), variance('v'), skew('s'), and/or kurtosis('k')
RV.entropy()
- (differential) entropy of the RV.
Parameters
----------
x : array-like
quantiles
q : array-like
lower or upper tail probability
size : int or tuple of ints, optional, keyword
shape of random variates
moments : string, optional, keyword
one or more of 'm' mean, 'v' variance, 's' skewness, 'k' kurtosis
'''
def __init__(self, dist, *args, **kwds):
self.dist = dist
loc0, scale0 = map(kwds.get, ['loc', 'scale'])
if isinstance(dist,rv_continuous):
args, loc0, scale0 = dist.fix_loc_scale(args, loc0, scale0)
self.par = args + (loc0, scale0)
else: # rv_discrete
args, loc0 = dist.fix_loc(args, loc0)
self.par = args + (loc0,)
def pdf(self,x):
''' Probability density function at x of the given RV.'''
return self.dist.pdf(x,*self.par)
def cdf(self,x):
'''Cumulative distribution function at x of the given RV.'''
return self.dist.cdf(x,*self.par)
def ppf(self,q):
'''Percent point function (inverse of cdf) at q of the given RV.'''
return self.dist.ppf(q,*self.par)
def isf(self,q):
'''Inverse survival function at q of the given RV.'''
return self.dist.isf(q,*self.par)
def rvs(self, size=None):
'''Random variates of given type.'''
kwds = dict(size=size)
return self.dist.rvs(*self.par,**kwds)
def sf(self,x):
'''Survival function (1-cdf) at x of the given RV.'''
return self.dist.sf(x,*self.par)
def stats(self,moments='mv'):
''' Some statistics of the given RV'''
kwds = dict(moments=moments)
return self.dist.stats(*self.par,**kwds)
def moment(self,n):
par1 = self.par[:self.dist.numargs]
return self.dist.moment(n,*par1)
def entropy(self):
return self.dist.entropy(*self.par)
def pmf(self,k):
'''Probability mass function at k of the given RV'''
return self.dist.pmf(k,*self.par)
Loading…
Cancel
Save