Updated kdetools.py (but still not working correctly)

Small cosmetic fixes to other files
master
per.andreas.brodtkorb 14 years ago
parent da9c0695d0
commit b581ec4051

@ -1,12 +1,24 @@
"""
WAFO defintions and numenclature
crossings :
cycle_pairs :
turning_points :
wave_amplitudes :
wave_periods :
waves :
crossings :
cycle_pairs :
turning_points :
wave_amplitudes :
wave_periods :
waves :
Examples
--------
In order to view the documentation do the following in an ipython window:
>>> import wafo.definitions as wd
>>> wd.crossings()
or
>>> wd.crossings?
"""
def wave_amplitudes():
r"""

@ -708,7 +708,7 @@ def prbnormnd(correl, a, b, abseps=1e-4, releps=1e-3, maxpts=None, method=0):
# exTime = etime(clock,t0);
# '
#% gauss legendre points and weights, n = 6
#% gauss legendre points and weights, n = 6
_W6 = [ 0.1713244923791705e+00, 0.3607615730481384e+00, 0.4679139345726904e+00]
_X6 = [-0.9324695142031522e+00, -0.6612093864662647e+00, -0.2386191860831970e+00]
#% gauss legendre points and weights, n = 12

@ -13,11 +13,21 @@ from __future__ import division
import numpy as np
from numpy import pi, sqrt, atleast_2d, exp, newaxis #@UnresolvedImport
import scipy
from scipy.linalg import sqrtm
from scipy import linalg
from scipy.special import gamma
from misc import tranproc, trangood
from itertools import product
_stats_epan=(1. / 5, 3. / 5, np.inf)
_stats_biwe=(1. / 7, 5. / 7, 45. / 2),
_stats_triw=(1. / 9, 350. / 429, np.inf),
_stats_rect=(1. / 3, 1. / 2, np.inf),
_stats_tria=(1. / 6, 2. / 3, np.inf),
_stats_lapl=(2, 1. / 4, np.inf),
_stats_logi=(pi ** 2 / 3, 1. / 6, 1 / 42),
_stats_gaus=(1, 1. / (2 * sqrt(pi)), 3. / (8 * sqrt(pi)))
def sphere_volume(d, r=1.0):
"""
@ -47,7 +57,7 @@ def sphere_volume(d, r=1.0):
class kde(object):
class KDE(object):
""" Representation of a kernel-density estimate using Gaussian kernels.
Parameters
@ -84,22 +94,52 @@ class kde(object):
obtain the kernel covariance matrix. Set this method to
kde.scotts_factor or kde.silverman_factor (or subclass to provide your
own). The default is scotts_factor.
Example
-------
"""
def __init__(self, dataset, **kwds):
self.kernel = 'gauss'
self.hs = None
self.hsmethod = None
self.L2 = None
self.alpha = 0
self.__dict__.update(kwds)
def __init__(self, dataset, hs=None, kernel=None,L2=None,alpha=0.0):
self.kernel = kernel if kernel else Kernel('gauss')
self.hs = hs
self.L2 = L2
self.alpha = alpha
self.dataset = atleast_2d(dataset)
self.d, self.n = self.dataset.shape
self._compute_covariance()
self._compute_smoothing()
def _compute_smoothing(self):
"""Computes the smoothing matrix
"""
get_smoothing = self.kernel.get_smoothing
h = self.hs
if h is None:
h = get_smoothing(self.dataset)
hsiz = h.shape
if (min(hsiz)==1) or (self.d==1):
if max(hsiz)==1:
h = h*np.ones(self.d)
else:
h.shape = (self.d,) # make sure it has the correct dimension
# If h negative calculate automatic values
ind, = np.where(h<=0)
for i in ind.tolist(): #
h[i] = get_smoothing(self.dataset[i])
deth = h.prod()
self.inv_hs = linalg.diag(1.0/h)
else: #fully general smoothing matrix
deth = linalg.det(h)
if deth<=0:
raise ValueError('bandwidth matrix h must be positive definit!')
self.inv_hs = linalg.inv(h)
self.hs = h
self._norm_factor = deth * self.n
def evaluate(self, points):
"""Evaluate the estimated pdf on a set of points.
@ -140,23 +180,23 @@ class kde(object):
# there are more points than data, so loop over data
for i in range(self.n):
diff = self.dataset[:, i, np.newaxis] - points
tdiff = np.dot(self.inv_cov, diff)
energy = np.sum(diff * tdiff, axis=0) / 2.0
result += np.exp(-energy)
tdiff = np.dot(self.inv_hs, diff)
result += self.kernel(tdiff)
else:
# loop over points
for i in range(m):
diff = self.dataset - points[:, i, np.newaxis]
tdiff = np.dot(self.inv_cov, diff)
energy = sum(diff * tdiff, axis=0) / 2.0
result[i] = np.sum(np.exp(-energy), axis=0)
tdiff = np.dot(self.inv_hs, diff)
tmp = self.kernel(tdiff)
result[i] = tmp.sum(axis=-1)
result /= self._norm_factor
result /= (self._norm_factor*self.kernel.norm_factor(d,self.n))
return result
__call__ = evaluate
#function [f, hs,lambda]= kdefun(A,options,varargin)
#%KDEFUN Kernel Density Estimator.
#%
@ -433,71 +473,106 @@ class kde(object):
# hs=h;
#end
def mkernel_multi(X, p=None):
class _Kernel(object):
def __init__(self, r=1.0, stats=None):
self.r = r # radius of kernel
self.stats = stats
def norm_factor(self, d=1, n=None):
return 1.0
def norm_kernel(self, x):
X = np.atleast_2d(x)
return self._kernel(X)/self.norm_factor(*X.shape)
def kernel(self, x):
return self._kernel(np.atleast_2d(x))
__call__ = kernel
class _KernelMulti(_Kernel):
# p=0; %Sphere = rect for 1D
# p=1; %Multivariate Epanechnikov kernel.
# p=2; %Multivariate Bi-weight Kernel
# p=3; %Multi variate Tri-weight Kernel
# p=4; %Multi variate Four-weight Kernel
d, n = X.shape
r = 1 # radius of the kernel
c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(d, r) / np.prod(np.r_[(d + 2):(2 * p + d + 1):2])# normalizing constant
# c = beta(r+1,r+1)*vsph(d,b)*(2^(2*r)); % Wand and Jones pp 31
# the commented c above does note yield the right scaling for d>1
x2 = X ** 2
return ((1.0 - x2.sum(axis=0) / r ** 2).clip(min=0.0)) ** p / c
def mkernel_epanechnikov(X):
return mkernel_multi(X, p=1)
def mkernel_biweight(X):
return mkernel_multi(X, p=2)
def mkernel_triweight(X):
return mkernel_multi(X, p=3)
def mkernel_product(X, p):
def __init__(self, r=1.0, p=1, stats=None):
self.r = r
self.p = p
self.stats = stats
def norm_factor(self, d=1, n=None):
r = self.r
p = self.p
c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(d, r) / np.prod(np.r_[(d + 2):(2 * p + d + 1):2])# normalizing constant
return c
def _kernel(self, x):
r = self.r
p = self.p
x2 = x ** 2
return ((1.0 - x2.sum(axis=0) / r ** 2).clip(min=0.0)) ** p
mkernel_epanechnikov = _KernelMulti(p=1, stats=_stats_epan)
mkernel_biweight = _KernelMulti(p=2, stats=_stats_biwe)
mkernel_triweight = _KernelMulti(p=3, stats=_stats_triw)
class _KernelProduct(_KernelMulti):
# p=0; %rectangular
# p=1; %1D product Epanechnikov kernel.
# p=2; %1D product Bi-weight Kernel
# p=3; %1D product Tri-weight Kernel
# p=4; %1D product Four-weight Kernel
d, n = X.shape
r = 1.0 # radius
pdf = (1 - (X / r) ** 2).clip(min=0.0)
c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(1, r) / np.prod(np.r_[(1 + 2):(2 * p + 2):2])# normalizing constant
return np.prod(pdf, axis=0) / c ** d
def mkernel_p1epanechnikov(X):
return mkernel_product(X, p=1)
def mkernel_p1biweight(X):
return mkernel_product(X, p=2)
def mkernel_p1triweight(X):
return mkernel_product(X, p=3)
def mkernel_rectangular(X):
d, n = X.shape
return np.where(np.all(np.abs(X) <= 1, axis=0), 0.5 ** d, 0.0)
def mkernel_triangular(X):
pdf = (1 - np.abs(X)).clip(min=0.0)
return np.prod(pdf, axis=0)
def mkernel_gaussian(X):
x2 = X ** 2
d, n = X.shape
return (2 * pi) ** (-d / 2) * exp(-0.5 * x2.sum(axis=0))
def mkernel_laplace(X):
''' Return multivariate laplace kernel'''
d, n = X.shape
absX = np.abs(X)
return 0.5 ** d * exp(-absX.sum(axis=0))
def mkernel_logistic(X):
''' Return multivariate logistic kernel'''
s = exp(X)
return np.prod(s / (s + 1) ** 2, axis=0)
def norm_factor(self, d=1, n=None):
r = self.r
p = self.p
c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(1, r) / np.prod(np.r_[(1 + 2):(2 * p + 2):2])# normalizing constant
return c ** d
def _kernel(self, x):
r = self.r # radius
pdf = (1 - (x / r) ** 2).clip(min=0.0)
return pdf.prod(axis=0)
mkernel_p1epanechnikov = _KernelProduct(p=1, stats=_stats_epan)
mkernel_p1biweight = _KernelProduct(p=2, stats=_stats_biwe)
mkernel_p1triweight = _KernelProduct(p=3, stats=_stats_triw)
class _KernelRectangular(_Kernel):
def _kernel(self, x):
return np.where(np.all(np.abs(x) <= self.r, axis=0), 1, 0.0)
def norm_factor(self, d=1, n=None):
r = self.r
return (2*r) ** d
mkernel_rectangular = _KernelRectangular(stats=_stats_rect)
class _KernelTriangular(_Kernel):
def _kernel(self, x):
pdf = (1 - np.abs(x)).clip(min=0.0)
return pdf.prod(axis=0)
mkernel_triangular = _KernelTriangular(stats=_stats_tria)
class _KernelGaussian(_Kernel):
def _kernel(self, x):
x2 = x ** 2
return exp(-0.5 * x2.sum(axis=0))
def norm_factor(self, d=1, n=None):
return (2 * pi) ** (d / 2.0)
mkernel_gaussian = _KernelGaussian(stats=_stats_gaus)
#def mkernel_gaussian(X):
# x2 = X ** 2
# d = X.shape[0]
# return (2 * pi) ** (-d / 2) * exp(-0.5 * x2.sum(axis=0))
class _KernelLaplace(_Kernel):
def _kernel(self, x):
absX = np.abs(x)
return exp(-absX.sum(axis=0))
def norm_factor(self, d=1, n=None):
return 2**d
mkernel_laplace = _KernelLaplace(stats=_stats_lapl)
class _KernelLogistic(_Kernel):
def _kernel(self, x):
s = exp(x)
return np.prod(s / (s + 1) ** 2, axis=0)
mkernel_logistic = _KernelLogistic(stats=_stats_logi)
_MKERNEL_DICT = dict(
epan=mkernel_epanechnikov,
@ -513,17 +588,6 @@ _MKERNEL_DICT = dict(
gaus=mkernel_gaussian
)
_KERNEL_EXPONENT_DICT = dict(re=0, sp=0, ep=1, bi=2, tr=3, fo=4, fi=5, si=6, se=7)
_KERNEL_STATS_DICT = dict(epan=(1. / 5, 3. / 5, np.inf),
biwe=(1. / 7, 5. / 7, 45. / 2),
triw=(1. / 9, 350. / 429, np.inf),
rect=(1. / 3, 1. / 2, np.inf),
tria=(1. / 6, 2. / 3, np.inf),
lapl=(2, 1. / 4, np.inf),
logi=(pi ** 2 / 3, 1. / 6, 1 / 42),
gaus=(1, 1. / (2 * sqrt(pi)), 3. / (8 * sqrt(pi)))
)
class Kernel(object):
'''
@ -574,9 +638,13 @@ class Kernel(object):
'Density estimation for statistics and data analysis'
Chapman and Hall, pp 31, 103, 175
'''
def __init__(self, name):
def __init__(self, name, fun='hns'):
self.kernel = _MKERNEL_DICT[name[:4]]
self.name = self.kernel.__name__.replace('mkernel_','').title()
#self.name = self.kernel.__name__.replace('mkernel_', '').title()
try:
self.get_smoothing = getattr(self, fun)
except:
self.get_smoothing = self.hns
def stats(self):
''' Return some 1D statistics of the kernel.
@ -596,8 +664,9 @@ class Kernel(object):
'Kernel smoothing'
Chapman and Hall, pp 176.
'''
name = self.name[2:6] if self.name[:2].lower()=='p1' else self.name[:4]
return _KERNEL_STATS_DICT[name.lower()]
return self.kernel.stats
#name = self.name[2:6] if self.name[:2].lower() == 'p1' else self.name[:4]
#return _KERNEL_STATS_DICT[name.lower()]
def hns(self, data):
'''
@ -637,18 +706,19 @@ class Kernel(object):
'''
A = np.atleast_2d(data)
d,n = A.shape
n = A.shape[1]
# R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x))
mu2, R, Rdd = self.stats()
AMISEconstant = (8*sqrt(pi)*R/(3*mu2**2*n))**(1./5)
iqr = np.abs(np.percentile(A, 75, axis=1)-np.percentile(A, 25, axis=1))# interquartile range
AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5)
iqr = np.abs(np.percentile(A, 75, axis=1) - np.percentile(A, 25, axis=1))# interquartile range
stdA = np.std(A, axis=1)
# % use of interquartile range guards against outliers.
# % the use of interquartile range is better if
# % the distribution is skew or have heavy tails
# % This lessen the chance of oversmoothing.
return np.where(iqr>0, np.minimum(stdA, iqr/1.349), stdA)*AMISEconstant
return np.where(iqr > 0, np.minimum(stdA, iqr / 1.349), stdA) * AMISEconstant
def hos(self, data):
''' Return Oversmoothing Parameter.
@ -685,7 +755,7 @@ class Kernel(object):
'Kernel smoothing'
Chapman and Hall, pp 60--63
'''
return self.hns(data)/0.93
return self.hns(data) / 0.93
def hmns(self, data):
'''
Returns Multivariate Normal Scale Estimate of Smoothing Parameter.
@ -694,7 +764,7 @@ class Kernel(object):
h = M dimensional optimal value for smoothing parameter
given the data and kernel. size D x D
data = data matrix, size N x D (D = # dimensions )
data = data matrix, size D x N (D = # dimensions )
kernel = 'epanechnikov' - Epanechnikov kernel.
'biweight' - Bi-weight kernel.
'triweight' - Tri-weight kernel.
@ -713,8 +783,13 @@ class Kernel(object):
data = rndnorm(0, 1,20,2)
h = hmns(data,'epan');
See also hns, hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt
Reference:
See also
--------
hns, hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt
Reference
----------
B. W. Silverman (1986)
'Density estimation for statistics and data analysis'
Chapman and Hall, pp 43-48, 87
@ -723,37 +798,35 @@ class Kernel(object):
'Kernel smoothing'
Chapman and Hall, pp 60--63, 86--88
'''
# TODO implement more kernels
# TODO: implement more kernels
A = np.atleast_2d(data)
d,n = A.shape
d, n = A.shape
if d==1:
if d == 1:
return self.hns(data)
name = self.name[:4].lower()
if name=='epan': # Epanechnikov kernel
a=(8*(d+4)*(2*sqrt(pi))**d/sphere_volume(d))**(1./(4+d))
elif name=='biwe': # Bi-weight kernel
a=2.7779;
if d>2:
if name == 'epan': # Epanechnikov kernel
a = (8.0 * (d + 4.0) * (2 * sqrt(pi)) ** d / sphere_volume(d)) ** (1. / (4.0 + d))
elif name == 'biwe': # Bi-weight kernel
a = 2.7779;
if d > 2:
raise ValueError('not implemented for d>2')
elif name=='triw': # Triweight
a=3.12;
if d>2:
elif name == 'triw': # Triweight
a = 3.12;
if d > 2:
raise ValueError('not implemented for d>2')
elif name=='gaus': # Gaussian kernel
a = (4/(d+2))**(1./(d+4))
elif name == 'gaus': # Gaussian kernel
a = (4.0 / (d + 2.0)) ** (1. / (d + 4.0))
else:
raise ValueError('Unknown kernel.')
covA = scipy.cov(A)
return a*sqrtm(covA)*n*(-1./(d+4))
return a * linalg.sqrtm(covA) * n * (-1. / (d + 4))
def norm_factor(self, d=1,n=None):
return self.kernel.norm_factor(n,d)
def evaluate(self, X):
return self.kernel(np.atleast_2d(X))
__call__ = evaluate
@ -908,7 +981,8 @@ def accum(accmap, a, func=None, size=None, fill_value=0, dtype=None):
out[s] = func(vals[s])
return out
def gridcount(data,X):
def gridcount(data, X):
'''
GRIDCOUNT D-dimensional histogram using linear binning.
@ -939,16 +1013,21 @@ def gridcount(data,X):
Example
-------
N = 500;
data = rndray(1,N,1);
x = linspace(0,max(data)+1,50).';
dx = x(2)-x(1);
c = gridcount(data,x);
plot(x,c,'.') 1D histogram
plot(x,c/dx/N) 1D probability density plot
trapz(x,c/dx/N)
>>> import numpy as np
>>> import wafo.kdetools as wk
>>> import pylab as plb
>>> N = 500;
>>> data = np.random.rayleigh(1,N)
>>> x = np.linspace(0,max(data)+1,50)
>>> dx = x[1]-x[0]
>>> c = wk.gridcount(data,x)
>>> h = plb.plot(x,c,'.') # 1D histogram
>>> h1 = plb.plot(x,c/dx/N) # 1D probability density plot
>>> np.trapz(x,c/dx/N)
See also bincount, kdebin
See also
--------
bincount, accum, kdebin
Reference
----------
@ -958,84 +1037,114 @@ def gridcount(data,X):
'''
dat = np.atleast_2d(data)
x = np.atleast_2d(X)
d,n = dat.shape
d, n = dat.shape
d1, inc = x.shape
if d!=d1:
if d != d1:
raise ValueError('Dimension 0 of data and X do not match.')
dx = np.diff(x[:,:2],axis=1)
xlo = x[:,0]
xup = x[:,-1]
dx = np.diff(x[:, :2], axis=1)
xlo = x[:, 0]
xup = x[:, -1]
datlo = dat.min(axis=1)
datup = dat.max(axis=1)
if ((datlo<xlo) or (xup<datup)).any():
if ((datlo < xlo) | (xup < datup)).any():
raise ValueError('X does not include whole range of the data!')
csiz = np.repeat(inc, d)
binx = np.assarray(np.floor((dat-xlo[:, newaxis])/dx[:, newaxis]),dtype=int)
w = dx.prod()
if d==1:
c = (accum(binx,(x[binx+1]-data),size=[inc,]) +
accum(binx,(data-x[binx]),size=[inc,]))/w
elif d==2:
binx = np.asarray(np.floor((dat - xlo[:, newaxis]) / dx), dtype=int)
w = dx.prod()
abs = np.abs
if d == 1:
x.shape = (-1,)
c = (accum(binx, (x[binx + 1] - data), size=[inc, ]) +
accum(binx, (data - x[binx]), size=[inc, ])) / w
elif d == 2:
b2 = binx[1]
b1 = binx[0]
c = (accum([b1,b2] ,abs(prod(([X(b1+1,1) X(b2+1,2)]-data),2)),size=[inc,inc])+...
accum([b1+1,b2] ,abs(prod(([X(b1,1) X(b2+1,2)]-data),2)),size=[inc,inc])+...
accum([b1 ,b2+1],abs(prod(([X(b1+1,1) X(b2,2)]-data),2)),size=[inc,inc])+...
accum([b1+1,b2+1],abs(prod(([X(b1,1) X(b2,2)]-data),2)),size=[inc,inc]))/w;
c_ = np.c_
stk = np.vstack
c = (accum(c_[b1, b2] , abs(np.prod(stk([X[0, b1 + 1], X[1, b2 + 1]]) - data, axis=0)), size=[inc, inc]) +
accum(c_[b1 + 1, b2] , abs(np.prod(stk([X[0, b1], X[1, b2 + 1]]) - data, axis=0)), size=[inc, inc]) +
accum(c_[b1 , b2 + 1], abs(np.prod(stk([X[0, b1 + 1], X[1, b2]]) - data, axis=0)), size=[inc, inc]) +
accum(c_[b1 + 1, b2 + 1], abs(np.prod(stk([X[0, b1], X[1, b2]]) - data, axis=0)), size=[inc, inc])) / w
else: # % d>2
useSparse = 0;
Nc = prod(csiz);
c = zeros(Nc,1);
fact2 = [0 inc*(1:d-1)];
fact1 = [1 cumprod(csiz(1:d-1))];
fact1 = fact1(ones(n,1),:);
for ir=0:2^(d-1)-1,
bt0(:,:,1) = bitget(ir,1:d);
bt0(:,:,2) = ~bt0(:,:,1);
for ix = 0:1
one = mod(ix,2)+1;
two = mod(ix+1,2)+1;
% Convert to linear index (faster than sub2ind)
b1 = sum((binx + bt0(ones(n,1),:,one)-1).*fact1,2)+1; %linear index to c
bt2 = bt0(:,:,two) + fact2;
b2 = binx + bt2(ones(n,1),:); % linear index to X
try
if useSparse
% Fast gridding using sparse
c = c + sparse(b1,1,abs(prod(X(b2)-data,2)),Nc,1);
else
c = c + accum(b1,abs(prod(X(b2)-data,2)),[Nc,1]);
%c = c + accum([b1,ones(n,1)],abs(prod(X(b2)-data,2)),[Nc,1]);
%[len,bin,val] = bincount(b1,abs(prod(X(b2)-data,2)));
%c(bin) = c(bin)+val;
end
catch
c = c + sparse(b1,1,abs(prod(X(b2)-data,2)),Nc,1);
end
end
end
c = reshape(c/w,csiz);
end
switch d % make sure c is stored in the same way as meshgrid
case 2, c = c.';
case 3, c = permute(c,[2 1 3]);
end
return
raise ValueError('Not implemented for d>2')
Nc = csiz.prod()
c = np.zeros((Nc, 1))
fact2 = inc * np.arange(d)
fact1 = csiz.cumprod() / inc
#fact1 = fact1(ones(n,1),:);
# for ir in xrange(2**(d-1)):
# bt0[:,:,1] = bitget(ir,1:d)
# bt0[:,:,2] = 1-bt0[:,:,1]
# for ix in range(2):
# one = mod(ix,2)+1;
# two = mod(ix+1,2)+1;
# # Convert to linear index (faster than sub2ind)
# b1 = sum((binx + bt0(ones(n,1),:,one)-1).*fact1,2)+1; #%linear index to c
# bt2 = bt0(:,:,two) + fact2;
# b2 = binx + bt2(ones(n,1),:); #% linear index to X
#
# c = c + accum(b1,abs(prod(X(b2)-data,2)),[Nc,1]);
# #c = c + accum([b1,ones(n,1)],abs(prod(X(b2)-data,2)),[Nc,1]);
# #[len,bin,val] = bincount(b1,abs(prod(X(b2)-data,2)));
# #c(bin) = c(bin)+val;
#
# #end
# #end
# c = reshape(c/w,csiz);
#end
if d == 2: #% make sure c is stored in the same way as meshgrid
c = c.T
elif d == 3:
c = c.transpose(1, 0, 2)
return c
def test_kde():
import numpy as np
import wafo.kdetools as wk
import pylab as plb
N = 500;
data = np.random.rayleigh(1, size=(1, N))
kde = wk.KDE(data)
x = np.linspace(0, max(data.ravel()) + 1, 10)
#X,Y = np.meshgrid(x, x)
f = kde(x)
#plb.hist(data.ravel())
plb.plot(x,f)
plb.show()
def test_gridcount():
import numpy as np
import wafo.kdetools as wk
import pylab as plb
N = 500;
data = np.random.rayleigh(1, size=(2, N))
x = np.linspace(0, max(data.ravel()) + 1, 10)
X = np.vstack((x, x))
dx = x[1] - x[0]
c = wk.gridcount(data, X)
h = plb.contourf(x, x, c)
plb.show()
h = plb.plot(x, c, '.') # 1D histogram
h1 = plb.plot(x, c / dx / N) # 1D probability density plot
t = np.trapz(x, c / dx / N)
print(t)
def main():
import doctest
doctest.testmod()
if __name__ == '__main__':
main()
#main()
#test_gridcount()
test_kde()

@ -637,15 +637,16 @@ def findtp(x, h=0.0, kind=None):
Example:
--------
>>> import wafo.data
>>> import pylab
>>> import pylab as plb
>>> import wafo.misc as wm
>>> x = wafo.data.sea()
>>> x1 = x[0:200,:]
>>> itp = findtp(x1[:,1],0,'Mw')
>>> itph = findtp(x1[:,1],0.3,'Mw')
>>> itp = wm.findtp(x1[:,1],0,'Mw')
>>> itph = wm.findtp(x1[:,1],0.3,'Mw')
>>> tp = x1[itp,:]
>>> tph = x1[itph,:]
>>> a = pylab.plot(x1[:,0],x1[:,1],tp[:,0],tp[:,1],'ro',tph[:,1],tph[:,1],'k.')
>>> pylab.close('all')
>>> a = plb.plot(x1[:,0],x1[:,1],tp[:,0],tp[:,1],'ro',tph[:,1],tph[:,1],'k.')
>>> plb.close('all')
>>> itp
array([ 11, 21, 22, 24, 26, 28, 31, 39, 43, 45, 47, 51, 56,
64, 70, 78, 82, 84, 89, 94, 101, 108, 119, 131, 141, 148,
@ -731,13 +732,14 @@ def findtc(x_in, v=None, kind=None):
Example:
--------
>>> import wafo.data
>>> import pylab
>>> import pylab as plb
>>> import wafo.misc as wm
>>> x = wafo.data.sea()
>>> x1 = x[0:200,:]
>>> itc, iv = findtc(x1[:,1],0,'dw')
>>> itc, iv = wm.findtc(x1[:,1],0,'dw')
>>> tc = x1[itc,:]
>>> a = pylab.plot(x1[:,0],x1[:,1],tc[:,0],tc[:,1],'ro')
>>> pylab.close('all')
>>> a = plb.plot(x1[:,0],x1[:,1],tc[:,0],tc[:,1],'ro')
>>> plb.close('all')
See also
--------
@ -833,12 +835,13 @@ def findoutliers(x, zcrit=0.0, dcrit=None, ddcrit=None, verbose=False):
--------
>>> import numpy as np
>>> import wafo
>>> import wafo.misc as wm
>>> xx = wafo.data.sea()
>>> dt = np.diff(xx[:2,0])
>>> dcrit = 5*dt
>>> ddcrit = 9.81/2*dt*dt
>>> zcrit = 0
>>> [inds, indg] = findoutliers(xx[:,1],zcrit,dcrit,ddcrit,verbose=True)
>>> [inds, indg] = wm.findoutliers(xx[:,1],zcrit,dcrit,ddcrit,verbose=True)
Found 0 spurious positive jumps of Dx
Found 0 spurious negative jumps of Dx
Found 37 spurious positive jumps of D^2x
@ -1179,7 +1182,8 @@ def getshipchar(value, property="max_deadweight"):
Example
---------
>>> getshipchar(10,'service_speed')
>>> import wafo.misc as wm
>>> wm.getshipchar(10,'service_speed')
{'beam': 29.0,
'beamSTD': 2.9000000000000004,
'draught': 9.5999999999999996,

@ -17,7 +17,7 @@ from wafo.transform.core import TrData
from wafo.transform.models import TrHermite, TrOchi, TrLinear
from wafo.interpolate import SmoothSpline
from scipy.interpolate.interpolate import interp1d
from scipy.integrate.quadrature import cumtrapz
from scipy.integrate.quadrature import cumtrapz #@UnresolvedImport
import warnings
import numpy as np
@ -39,6 +39,7 @@ from wafo.misc import (nextpow2, findtp, findtc, findcross, sub_dict_select,
from wafodata import WafoData
from plotbackend import plotbackend
import matplotlib
from scipy.stats.stats import skew, kurtosis
matplotlib.interactive(True)
_wafocov = JITImport('wafo.covariance')
_wafospec = JITImport('wafo.spectrum')
@ -47,8 +48,6 @@ __all__ = ['TimeSeries', 'LevelCrossings', 'CyclePairs', 'TurningPoints',
'sensortypeid', 'sensortype']
class LevelCrossings(WafoData):
'''
Container class for Level crossing data objects in WAFO
@ -59,7 +58,18 @@ class LevelCrossings(WafoData):
number of upcrossings
args : array-like
crossing levels
Examples
--------
>>> import wafo.data
>>> import wafo.objects as wo
>>> x = wafo.data.sea()
>>> ts = wo.mat2timeseries(x)
>>> tp = ts.turning_points()
>>> mm = tp.cycle_pairs()
>>> lc = mm.level_crossings()
>>> h2 = lc.plot()
'''
def __init__(self, *args, **kwds):
options = dict(title='Level crossing spectrum',
@ -448,7 +458,16 @@ class CyclePairs(WafoData):
data : array_like
args : vector for 1D
Examples
--------
>>> import wafo.data
>>> import wafo.objects as wo
>>> x = wafo.data.sea()
>>> ts = wo.mat2timeseries(x)
>>> tp = ts.turning_points()
>>> mm = tp.cycle_pairs()
>>> h1 = mm.plot(marker='x')
'''
def __init__(self, *args, **kwds):
self.type_ = kwds.get('type_', 'max2min')
@ -492,7 +511,7 @@ class CyclePairs(WafoData):
>>> ts = wafo.objects.mat2timeseries(wafo.data.sea())
>>> tp = ts.turning_points()
>>> mm = tp.cycle_pairs()
>>> h = mm.plot('.')
>>> h = mm.plot(marker='.')
>>> bv = range(3,9)
>>> D = mm.damage(beta=bv)
>>> D
@ -533,7 +552,7 @@ class CyclePairs(WafoData):
>>> ts = wafo.objects.mat2timeseries(wafo.data.sea())
>>> tp = ts.turning_points()
>>> mm = tp.cycle_pairs()
>>> h = mm.plot('.')
>>> h = mm.plot(marker='.')
>>> lc = mm.level_crossings()
>>> h2 = lc.plot()
@ -608,6 +627,16 @@ class TurningPoints(WafoData):
----------------
data : array_like
args : vector for 1D
Examples
--------
>>> import wafo.data
>>> import wafo.objects as wo
>>> x = wafo.data.sea()
>>> ts = wo.mat2timeseries(x)
>>> tp = ts.turning_points()
>>> h1 = tp.plot(marker='x')
'''
def __init__(self, *args, **kwds):
super(TurningPoints, self).__init__(*args, **kwds)
@ -643,7 +672,7 @@ class TurningPoints(WafoData):
>>> ts = wafo.objects.mat2timeseries(x)
>>> tp = ts.turning_points()
>>> mM = tp.cycle_pairs()
>>> h = mM.plot('x')
>>> h = mM.plot(marker='x')
See also
@ -700,6 +729,13 @@ class TimeSeries(WafoData):
>>> rf = ts.tocovdata(lag=150)
>>> h = rf.plot()
>>> tp = ts.turning_points()
>>> mm = tp.cycle_pairs()
>>> h1 = mm.plot(marker='x')
>>> lc = mm.level_crossings()
>>> h2 = lc.plot()
'''
def __init__(self, *args, **kwds):
super(TimeSeries, self).__init__(*args, **kwds)
@ -969,7 +1005,8 @@ class TimeSeries(WafoData):
See also
--------
troptset, lc2tr, cdf2tr, trplot
troptset, lc2tr, cdf2tr, trplot
References
----------
Rychlik, I. , Johannesson, P and Leadbetter, M. R. (1997)
@ -983,57 +1020,12 @@ class TimeSeries(WafoData):
reconstructed data"
in Proceedings of 9th ISOPE Conference, Vol III, pp 66-73
'''
# Tested on: Matlab 5.3, 5.2, 5.1
# History:
# revised pab Dec2004
# -Fixed bug: string comparison for def at fault.
# revised pab Nov2004
# -Fixed bug: linextrap was not accounted for
# revised pab july 2004
# revised pab 3 april 2004
# -fixed a bug in hermite estimation: excess changed to kurtosis
# revised pab 29.12.2000
# - added example, hermite and ochi options
# - replaced optional arguments with a options struct
# - default param is now [-5 5 513] -> better to have the discretization
# represented with exact numbers, especially when calculating
# derivatives of the transformation numerically.
# revised pab 19.12.2000
# - updated call edf(X,-inf,[],monitor) to edf(X,[],monitor)
# due to new calling syntax for edf
# modifed pab 24.09.2000
# - changed call from norminv to wnorminv
# - also removed the 7 lowest and 7 highest points from
# the estimation using def='mnonlinear'
# (This is similar to what lc2tr does. lc2tr removes
# the 9 highest and 9 lowest TP from the estimation)
# modified pab 09.06.2000
# - made all the *empirical options secret.
# - Added 'mnonlinear' and 'mempirical'
# - Fixed the problem of multip==1 and def=='empirical' by interpolating
# with spline to ensure that the length of g is fixed
# - Replaced the test statistic for def=='empirical' with the one
# obtained when csm1=csm2=1. (Previously only the smoothed test
# statistic where returned)
# modified pab 12.10.1999
# fixed a bug
# added secret output of empirical estimate g2
# modified by svi 29.09.1999
# changed input def by adding new options.
# revised by pab 11.08.99
# changed name from dat2tran to dat2tr
# modified by Per A. Brodtkorb 12.05.1999,15.08.98
# added secret option: to accept multiple data, to monitor the steps
# of estimation of the transformation
# also removed some code and replaced it with a call to lc2tr (cross2tr)
# making the maintainance easier
#
#opt = troptset('plotflag','off','csm',.95,'gsm',.05,....
# 'param',[-5 5 513],'delay',2,'linextrap','on','ne',7,...
# 'cvar',1,'gvar',1,'multip',0);
opt = DotDict(chkder=True, plotflag=True, csm=.95, gsm=.05,
param=[-5, 5, 513], delay=2, ntr=inf, linextrap=True, ne=7, cvar=1, gvar=1,
multip=False, crossdef='uM')
@ -1053,14 +1045,14 @@ class TimeSeries(WafoData):
elif method[0] == 'm':
return cdftr()
elif method[0] == 'h':
ga1 = np.skew(self.data)
ga2 = np.kurtosis(self.data, fisher=True) #kurt(xx(n+1:end))-3;
ga1 = skew(self.data)
ga2 = kurtosis(self.data, fisher=True) #kurt(xx(n+1:end))-3;
up = min(4 * (4 * ga1 / 3) ** 2, 13)
lo = (ga1 ** 2) * 3 / 2;
kurt1 = min(up, max(ga2, lo)) + 3
return TrHermite(mean=ma, var=sa ** 2, skew=ga1, kurt=kurt1)
elif method[0] == 'o':
ga1 = np.skew(self.data)
ga1 = skew(self.data)
return TrOchi(mean=ma, var=sa ** 2, skew=ga1)
def turning_points(self, h=0.0, wavetype=None):
@ -1197,13 +1189,13 @@ class TimeSeries(WafoData):
Example:
--------
Histogram of crest2crest waveperiods
>>> import wafo
>>> import pylab as plb
>>> x = wafo.data.sea()
>>> ts = wafo.objects.mat2timeseries(x[0:400,:])
>>> T = ts.wave_periods(vh=0.0,pdef='c2c')
T = dat2wa(x1,0,'c2c') #% Returns crest2crest waveperiods
subplot(121), waveplot(x1,'-',1,1),subplot(122),histgrm(T)
>>> T, ix = ts.wave_periods(vh=0.0,pdef='c2c')
>>> h = plb.hist(T)
See also:
--------
@ -1310,20 +1302,10 @@ class TimeSeries(WafoData):
t1 = x[index[(start + dist):nn:step]]
T = t1 - t0
## if False: #% Secret option: indices to the actual crossings used.
## index=index.ravel()
## ind = [index(start:(nn-dist):step) index((start+dist):nn:step)].'
## ind = ind(:)
return T, index
#% Old call: kept just in case
#%T = x(index((start+dist):step:nn),1)-x(index(start:step:(nn-dist)),1)
def reconstruct(self):
# TODO: finish reconstruct
pass
def plot_wave(self, sym1='k.', ts=None, sym2='k+', nfig=None, nsub=None,
stdev=None, vfact=3):
@ -1362,7 +1344,7 @@ class TimeSeries(WafoData):
--------
findtc, plot
'''
# TODO: finish reconstruct
nw = 20
tn = self.args
xn = self.data.ravel()

@ -10,7 +10,7 @@ from __future__ import division
import math
from copy import copy
from scipy.misc import comb, derivative
from scipy.misc import comb, derivative #@UnresolvedImport
from scipy import special
from scipy import optimize
from scipy import integrate
@ -3544,6 +3544,8 @@ for c != 0, and for x >= 0 for all c, and x < 1/abs(c) for c < 0.
class genexpon_gen(rv_continuous):
def link(self, x, logSF, phat, ix):
xn = (x - phat[3]) / phat[4]
b = phat[1]
c = phat[2]
fact1 = (xn + expm1(-c * xn) / c)
if ix == 0:
phati = b * fact1 + logSF
@ -4507,8 +4509,8 @@ class ncf_gen(rv_continuous):
Px *= (n2+n1*x)**(-(n1+n2)/2)
Px *= special.assoc_laguerre(-nc*n1*x/(2.0*(n2+n1*x)),n2/2,n1/2-1)
Px /= special.beta(n1/2,n2/2)
#this function does not have a return
# drop it for now, the generic function seems to work ok
#this function does not have a return
# drop it for now, the generic function seems to work ok
def _cdf(self, x, dfn, dfd, nc):
return special.ncfdtr(dfn,dfd,nc,x)
def _ppf(self, q, dfn, dfd, nc):

@ -12,7 +12,7 @@ from wafo.plotbackend import plotbackend
from wafo.misc import ecross, findcross
import numdifftools
import numdifftools #@UnresolvedImport
from scipy import special
from scipy.linalg import pinv2
from scipy import optimize

@ -10,88 +10,3 @@ def valarray(shape,value=nan,typecode=None):
out = asarray(out)
return out
class rv_frozen(object):
''' Frozen continous or discrete 1D Random Variable object (RV)
Methods
-------
RV.rvs(size=1)
- random variates
RV.pdf(x)
- probability density function (continous case)
RV.pmf(x)
- probability mass function (discrete case)
RV.cdf(x)
- cumulative density function
RV.sf(x)
- survival function (1-cdf --- sometimes more accurate)
RV.ppf(q)
- percent point function (inverse of cdf --- percentiles)
RV.isf(q)
- inverse survival function (inverse of sf)
RV.stats(moments='mv')
- mean('m'), variance('v'), skew('s'), and/or kurtosis('k')
RV.entropy()
- (differential) entropy of the RV.
Parameters
----------
x : array-like
quantiles
q : array-like
lower or upper tail probability
size : int or tuple of ints, optional, keyword
shape of random variates
moments : string, optional, keyword
one or more of 'm' mean, 'v' variance, 's' skewness, 'k' kurtosis
'''
def __init__(self, dist, *args, **kwds):
self.dist = dist
loc0, scale0 = map(kwds.get, ['loc', 'scale'])
if isinstance(dist,rv_continuous):
args, loc0, scale0 = dist.fix_loc_scale(args, loc0, scale0)
self.par = args + (loc0, scale0)
else: # rv_discrete
args, loc0 = dist.fix_loc(args, loc0)
self.par = args + (loc0,)
def pdf(self,x):
''' Probability density function at x of the given RV.'''
return self.dist.pdf(x,*self.par)
def cdf(self,x):
'''Cumulative distribution function at x of the given RV.'''
return self.dist.cdf(x,*self.par)
def ppf(self,q):
'''Percent point function (inverse of cdf) at q of the given RV.'''
return self.dist.ppf(q,*self.par)
def isf(self,q):
'''Inverse survival function at q of the given RV.'''
return self.dist.isf(q,*self.par)
def rvs(self, size=None):
'''Random variates of given type.'''
kwds = dict(size=size)
return self.dist.rvs(*self.par,**kwds)
def sf(self,x):
'''Survival function (1-cdf) at x of the given RV.'''
return self.dist.sf(x,*self.par)
def stats(self,moments='mv'):
''' Some statistics of the given RV'''
kwds = dict(moments=moments)
return self.dist.stats(*self.par,**kwds)
def moment(self,n):
par1 = self.par[:self.dist.numargs]
return self.dist.moment(n,*par1)
def entropy(self):
return self.dist.entropy(*self.par)
def pmf(self,k):
'''Probability mass function at k of the given RV'''
return self.dist.pmf(k,*self.par)
Loading…
Cancel
Save