Small modifications

master
per.andreas.brodtkorb 14 years ago
parent 1484f9128e
commit 85792549d1

@ -5,14 +5,14 @@
# Author: pab # Author: pab
# #
# Created: 01.11.2008 # Created: 01.11.2008
# Copyright: (c) pab2 2008 # Copyright: (c) pab 2008
# Licence: LGPL # Licence: LGPL
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------------
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import division from __future__ import division
import warnings import warnings
import numpy as np import numpy as np
from numpy import pi, sqrt, atleast_2d, exp, newaxis #@UnresolvedImport from numpy import pi, sqrt, atleast_2d, exp, newaxis, array #@UnresolvedImport
import scipy import scipy
from scipy import linalg from scipy import linalg
from scipy.special import gamma from scipy.special import gamma
@ -20,12 +20,12 @@ from misc import tranproc, trangood
from itertools import product from itertools import product
_stats_epan = (1. / 5, 3. / 5, np.inf) _stats_epan = (1. / 5, 3. / 5, np.inf)
_stats_biwe = (1. / 7, 5. / 7, 45. / 2), _stats_biwe = (1. / 7, 5. / 7, 45. / 2)
_stats_triw = (1. / 9, 350. / 429, np.inf), _stats_triw = (1. / 9, 350. / 429, np.inf)
_stats_rect = (1. / 3, 1. / 2, np.inf), _stats_rect = (1. / 3, 1. / 2, np.inf)
_stats_tria = (1. / 6, 2. / 3, np.inf), _stats_tria = (1. / 6, 2. / 3, np.inf)
_stats_lapl = (2, 1. / 4, np.inf), _stats_lapl = (2, 1. / 4, np.inf)
_stats_logi = (pi ** 2 / 3, 1. / 6, 1 / 42), _stats_logi = (pi ** 2 / 3, 1. / 6, 1 / 42)
_stats_gaus = (1, 1. / (2 * sqrt(pi)), 3. / (8 * sqrt(pi))) _stats_gaus = (1, 1. / (2 * sqrt(pi)), 3. / (8 * sqrt(pi)))
@ -103,6 +103,7 @@ class TKDE(object):
... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487, ... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487,
... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452]) ... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452])
>>> import wafo.kdetools as wk
>>> x = np.linspace(0.01, max(data.ravel()) + 1, 10) >>> x = np.linspace(0.01, max(data.ravel()) + 1, 10)
>>> kde = wk.TKDE(data, hs=0.5, L2=0.5) >>> kde = wk.TKDE(data, hs=0.5, L2=0.5)
>>> f = kde(x) >>> f = kde(x)
@ -238,12 +239,13 @@ class KDE(object):
... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487, ... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487,
... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452]) ... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452])
>>> x = np.linspace(0, max(data.ravel()) + 1, 10) >>> x = np.linspace(0, max(data.ravel()) + 1, 10)
>>> import wafo.kdetools as wk
>>> kde = wk.KDE(data, hs=0.5, alpha=0.5) >>> kde = wk.KDE(data, hs=0.5, alpha=0.5)
>>> f = kde(x) >>> f = kde(x)
>>> f >>> f
array([ 0.0541248 , 0.16555235, 0.33084399, 0.45293325, 0.48345808, array([ 0.17252055, 0.41014271, 0.61349072, 0.57023834, 0.37198073,
0.48345808, 0.45293325, 0.33084399, 0.16555235, 0.0541248 ]) 0.21409279, 0.12738463, 0.07460326, 0.03956191, 0.01887164])
import pylab as plb import pylab as plb
h1 = plb.plot(x, f) # 1D probability density plot h1 = plb.plot(x, f) # 1D probability density plot
@ -340,7 +342,6 @@ class KDE(object):
if m >= self.n: if m >= self.n:
# there are more points than data, so loop over data # there are more points than data, so loop over data
for i in range(self.n): for i in range(self.n):
diff = self.dataset[:, i, np.newaxis] - points diff = self.dataset[:, i, np.newaxis] - points
tdiff = np.dot(self.inv_hs / self._lambda[i], diff) tdiff = np.dot(self.inv_hs / self._lambda[i], diff)
result += self.kernel(tdiff) / self._lambda[i] ** d result += self.kernel(tdiff) / self._lambda[i] ** d
@ -507,8 +508,8 @@ class Kernel(object):
(0.1111111111111111, 0.81585081585081587, inf) (0.1111111111111111, 0.81585081585081587, inf)
>>> triweight(np.linspace(-1,1,11)) >>> triweight(np.linspace(-1,1,11))
array([ 0. , 0.05103, 0.28672, 0.64827, 0.96768, 1.09375, array([ 0. , 0.046656, 0.262144, 0.592704, 0.884736, 1. ,
0.96768, 0.64827, 0.28672, 0.05103, 0. ]) 0.884736, 0.592704, 0.262144, 0.046656, 0. ])
>>> triweight.hns(np.random.normal(size=100)) >>> triweight.hns(np.random.normal(size=100))
See also See also
@ -556,39 +557,46 @@ class Kernel(object):
def hns(self, data): def hns(self, data):
''' '''
HNS Normal Scale Estimate of Smoothing Parameter. Returns Normal Scale Estimate of Smoothing Parameter.
CALL: h = hns(data,kernel) Parameter
---------
data : 2D array
shape d x n (d = # dimensions )
h = one dimensional optimal value for smoothing parameter Returns
given the data and kernel. size 1 x D -------
data = data matrix, size N x D (D = # dimensions ) h : array-like
one dimensional optimal value for smoothing parameter
given the data and kernel. size D
HNS only gives an optimal value with respect to mean integrated HNS only gives an optimal value with respect to mean integrated
square error, when the true underlying distribution square error, when the true underlying distribution
is Gaussian. This works reasonably well if the data resembles a is Gaussian. This works reasonably well if the data resembles a
Gaussian distribution. However if the distribution is asymmetric, Gaussian distribution. However if the distribution is asymmetric,
multimodal or have long tails then HNS may return a to large multimodal or have long tails then HNS may return a to large
smoothing parameter, i.e., the KDE may be oversmoothed and mask smoothing parameter, i.e., the KDE may be oversmoothed and mask
important features of the data. (=> large bias). important features of the data. (=> large bias).
One way to remedy this is to reduce H by multiplying with a constant One way to remedy this is to reduce H by multiplying with a constant
factor, e.g., 0.85. Another is to try different values for H and make a factor, e.g., 0.85. Another is to try different values for H and make a
visual check by eye. visual check by eye.
Example: Example:
data = rndnorm(0, 1,20,1) data = rndnorm(0, 1,20,1)
h = hns(data,'epan'); h = hns(data,'epan');
See also hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt, kde
Reference: See also:
--------- ---------
B. W. Silverman (1986) hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt, kde
'Density estimation for statistics and data analysis'
Chapman and Hall, pp 43-48 Reference:
Wand,M.P. and Jones, M.C. (1995) ---------
'Kernel smoothing' B. W. Silverman (1986)
Chapman and Hall, pp 60--63 'Density estimation for statistics and data analysis'
Chapman and Hall, pp 43-48
Wand,M.P. and Jones, M.C. (1995)
'Kernel smoothing'
Chapman and Hall, pp 60--63
''' '''
A = np.atleast_2d(data) A = np.atleast_2d(data)
@ -606,7 +614,7 @@ class Kernel(object):
return np.where(iqr > 0, np.minimum(stdA, iqr / 1.349), stdA) * AMISEconstant return np.where(iqr > 0, np.minimum(stdA, iqr / 1.349), stdA) * AMISEconstant
def hos(self, data): def hos(self, data):
''' Return Oversmoothing Parameter. ''' Returns Oversmoothing Parameter.
@ -817,15 +825,15 @@ def accum(accmap, a, func=None, size=None, fill_value=0, dtype=None):
>>> # Sum the diagonals. >>> # Sum the diagonals.
>>> accmap = array([[0,1,2],[2,0,1],[1,2,0]]) >>> accmap = array([[0,1,2],[2,0,1],[1,2,0]])
>>> s = accum(accmap, a) >>> s = accum(accmap, a)
array([9, 7, 15]) >>> s
array([ 9, 7, 15])
>>> # A 2D output, from sub-arrays with shapes and positions like this: >>> # A 2D output, from sub-arrays with shapes and positions like this:
>>> # [ (2,2) (2,1)] >>> # [ (2,2) (2,1)]
>>> # [ (1,2) (1,1)] >>> # [ (1,2) (1,1)]
>>> accmap = array([ >>> accmap = array([
[[0,0],[0,0],[0,1]], ... [[0,0],[0,0],[0,1]],
[[0,0],[0,0],[0,1]], ... [[0,0],[0,0],[0,1]],
[[1,0],[1,0],[1,1]], ... [[1,0],[1,0],[1,1]]])
])
>>> # Accumulate using a product. >>> # Accumulate using a product.
>>> accum(accmap, a, func=prod, dtype=float) >>> accum(accmap, a, func=prod, dtype=float)
array([[ -8., 18.], array([[ -8., 18.],
@ -880,6 +888,7 @@ def bitget(int_type, offset):
''' '''
mask = (1 << offset) mask = (1 << offset)
return (int_type & mask) != 0 return (int_type & mask) != 0
def gridcount(data, X): def gridcount(data, X):
''' '''
GRIDCOUNT D-dimensional histogram using linear binning. GRIDCOUNT D-dimensional histogram using linear binning.
@ -922,8 +931,10 @@ def gridcount(data, X):
>>> c = wk.gridcount(data,x) >>> c = wk.gridcount(data,x)
>>> h = plb.plot(x,c,'.') # 1D histogram >>> h = plb.plot(x,c,'.') # 1D histogram
>>> h1 = plb.plot(x,c/dx/N) # 1D probability density plot >>> pdf = c/dx/N
>>> np.trapz(x,c/dx/N) >>> h1 = plb.plot(x, pdf) # 1D probability density plot
>>> np.trapz(pdf, x)
0.99999999999999956
See also See also
-------- --------
@ -1005,45 +1016,12 @@ def gridcount(data, X):
c = c.transpose(1, 0, 2) c = c.transpose(1, 0, 2)
return c return c
def test_kde():
import numpy as np
import wafo.kdetools as wk
import pylab as plb
N = 500;
data = np.random.rayleigh(1, size=(1, N))
kde = wk.KDE(data)
x = np.linspace(0, max(data.ravel()) + 1, 10)
#X,Y = np.meshgrid(x, x)
f = kde(x)
#plb.hist(data.ravel())
plb.plot(x, f)
plb.show()
def test_gridcount():
import numpy as np
import wafo.kdetools as wk
import pylab as plb
N = 500;
data = np.random.rayleigh(1, size=(2, N))
x = np.linspace(0, max(data.ravel()) + 1, 10)
X = np.vstack((x, x))
dx = x[1] - x[0]
c = wk.gridcount(data, X)
h = plb.contourf(x, x, c)
plb.show()
h = plb.plot(x, c, '.') # 1D histogram
h1 = plb.plot(x, c / dx / N) # 1D probability density plot
t = np.trapz(x, c / dx / N)
print(t)
def main(): def main():
import doctest import doctest
doctest.testmod() doctest.testmod()
if __name__ == '__main__': if __name__ == '__main__':
#main() main()
#test_gridcount()
test_kde()

Loading…
Cancel
Save