You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

446 lines
21 KiB
Python

'''
Created on 20. nov. 2010
@author: pab
'''
from __future__ import division
import unittest
import numpy as np
from numpy.testing import assert_allclose
from numpy import array, inf
import wafo.kdetools as wk
class TestKdeTools(unittest.TestCase):
def setUp(self):
# N = 20
# data = np.random.rayleigh(1, size=(N,))
self.data = array([0.75355792, 0.72779194, 0.94149169, 0.07841119,
2.32291887, 1.10419995, 0.77055114, 0.60288273,
1.36883635, 1.74754326, 1.09547561, 1.01671133,
0.73211143, 0.61891719, 0.75903487, 1.8919469,
0.72433808, 1.92973094, 0.44749838, 1.36508452])
self.x = np.linspace(0, max(self.data) + 1, 10)
def test0_KDE1D(self):
data, x = self.data, self.x
# kde = wk.KDE(data, hs=0.5, alpha=0.5)
kde0 = wk.KDE(data, hs=0.5, alpha=0.0, inc=16)
fx = kde0.eval_grid(x)
assert_allclose(fx, [0.2039735, 0.40252503, 0.54595078,
0.52219649, 0.3906213, 0.26381501, 0.16407362,
0.08270612, 0.02991145, 0.00720821])
fx = kde0.eval_grid(x, r=1)
assert_allclose(-fx, [0.11911419724002906, 0.13440000694772541,
0.044400116190638696, -0.0677695267531197,
-0.09555596523854318, -0.07498819087690148,
-0.06167607128369182, -0.04678588231996062,
-0.024515979196411814, -0.008022010381009501])
fx = kde0.eval_grid(x, r=2)
assert_allclose(fx, [0.08728138131197069, 0.07558648034784508,
0.05093715852686607, 0.07908624791267539,
0.10495675573359599, 0.07916167222333347,
0.048168330179460386, 0.03438361415806721,
0.02197927811015286, 0.009222988165160621])
ffx = kde0.eval_grid_fast(x)
assert_allclose(ffx, [0.20729484, 0.39865044, 0.53716945, 0.5169322,
0.39060223, 0.26441126, 0.16388801, 0.08388527,
0.03227164, 0.00883579], 1e-6)
fx = kde0.eval_grid_fast(x, r=1)
assert_allclose(fx, [-0.11582450668441863, -0.12901768780183628,
-0.04402464127812092, 0.0636190549560749,
0.09345144501310157, 0.07573621607126926,
0.06149475587201987, 0.04550210608639078,
0.024427027615689087, 0.00885576504750473])
fx = kde0.eval_grid_fast(x, r=2)
assert_allclose(fx, [0.08499284131672676, 0.07572564161758065,
0.05329987919556978, 0.07849796347259348,
0.10232741197885842, 0.07869015379158453,
0.049431823916945394, 0.034527256372343613,
0.021517998409663567, 0.009527401063843402])
f = kde0.eval_grid_fast()
assert_allclose(np.trapz(f, kde0.args), 0.995001)
assert_allclose(f, [0.011494108953097538, 0.0348546729842836,
0.08799292403553607, 0.18568717590587996,
0.32473136104523725, 0.46543163412700084,
0.5453201564089711, 0.5300582814373698,
0.44447650672207173, 0.3411961246641896,
0.25103852230993573, 0.17549519961525845,
0.11072988772879173, 0.05992730870218242,
0.02687783924833738, 0.00974982785617795])
def test1_TKDE1D(self):
data = self.data
x = np.linspace(0.01, max(data) + 1, 10)
kde = wk.TKDE(data, hs=0.5, L2=0.5)
f = kde(x)
assert_allclose(f, [1.03982714, 0.45839018, 0.39514782, 0.32860602,
0.26433318, 0.20717946, 0.15907684, 0.1201074,
0.08941027, 0.06574882])
assert_allclose(np.trapz(f, x), 0.94787730659349068)
f = kde.eval_grid_fast(x)
assert_allclose(f, [1.0401892415290148, 0.45838973393693677,
0.39514689240671547, 0.32860531818532457,
0.2643330110605783, 0.20717975528556506,
0.15907696844388747, 0.12010770443337843,
0.08941129458260941, 0.06574899139165799])
f = kde.eval_grid_fast2(x)
assert_allclose(f, [1.0401892415290148, 0.45838973393693677,
0.39514689240671547, 0.32860531818532457,
0.2643330110605783, 0.20717975528556506,
0.15907696844388747, 0.12010770443337843,
0.08941129458260941, 0.06574899139165799])
assert_allclose(np.trapz(f, x), 0.9479438058416647)
def test1_KDE1D(self):
data, x = self.data, self.x
kde = wk.KDE(data, hs=0.5)
f = kde(x)
assert_allclose(f, [0.2039735, 0.40252503, 0.54595078, 0.52219649,
0.3906213, 0.26381501, 0.16407362, 0.08270612,
0.02991145, 0.00720821])
assert_allclose(np.trapz(f, x), 0.92576174424281876)
def test2_KDE1D(self):
# data, x = self.data, self.x
data = np.asarray([1, 2])
x = np.linspace(0, max(np.ravel(data)) + 1, 10)
kde = wk.KDE(data, hs=0.5)
f = kde(x)
assert_allclose(f, [0.0541248, 0.16555235, 0.33084399, 0.45293325,
0.48345808, 0.48345808, 0.45293325, 0.33084399,
0.16555235, 0.0541248])
assert_allclose(np.trapz(f, x), 0.97323338046725172)
def test1a_KDE1D(self):
data, x = self.data, self.x
kde = wk.KDE(data, hs=0.5, alpha=0.5)
f = kde(x)
assert_allclose(f, [0.17252055, 0.41014271, 0.61349072, 0.57023834,
0.37198073, 0.21409279, 0.12738463, 0.07460326,
0.03956191, 0.01887164])
assert_allclose(np.trapz(f, x), 0.92938023659047952)
def test2a_KDE1D(self):
# data, x = self.data, self.x
data = np.asarray([1, 2])
x = np.linspace(0, max(np.ravel(data)) + 1, 10)
kde = wk.KDE(data, hs=0.5, alpha=0.5)
f = kde(x)
assert_allclose(f, [0.0541248, 0.16555235, 0.33084399, 0.45293325,
0.48345808, 0.48345808, 0.45293325, 0.33084399,
0.16555235, 0.0541248])
assert_allclose(np.trapz(f, x), 0.97323338046725172)
def test_KDE2D(self):
# N = 20
# data = np.random.rayleigh(1, size=(2, N))
data = array([
[0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399,
0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145,
0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821,
0.69800708, 0.58428735, 1.59128776, 2.05771405, 0.87021964],
[1.44080694, 0.39973751, 1.331243, 2.48895822, 1.18894158,
1.40526085, 1.01967897, 0.81196474, 1.37978932, 2.03334689,
0.870329, 1.25106862, 0.5346619, 0.47541236, 1.51930093,
0.58861519, 1.19780448, 0.81548296, 1.56859488, 1.60653533]])
x = np.linspace(0, max(np.ravel(data)) + 1, 3)
kde0 = wk.KDE(data, hs=0.5, alpha=0.0, inc=512)
assert_allclose(kde0.eval_grid(x, x),
[[3.27260963e-02, 4.21654678e-02, 5.85338634e-04],
[6.78845466e-02, 1.42195839e-01, 1.41676003e-03],
[1.39466746e-04, 4.26983850e-03, 2.52736185e-05]])
t = [[0.0443506097653615, 0.06433530873456418, 0.0041353838654317856],
[0.07218297149063724, 0.1235819591878892, 0.009288890372002473],
[0.001613328022214066, 0.00794857884864038, 0.0005874786787715641]
]
assert_allclose(kde0.eval_grid_fast(x, x), t)
def test_gridcount_1D(self):
data, x = self.data, self.x
dx = x[1] - x[0]
c = wk.gridcount(data, x)
assert_allclose(c, [0.78762626, 1.77520717, 7.99190087, 4.04054449,
1.67156643, 2.38228499, 1.05933195, 0.29153785, 0.,
0.])
t = np.trapz(c / dx / len(data), x)
assert_allclose(t, 0.9803093435140049)
def test_gridcount_2D(self):
N = 20
# data = np.random.rayleigh(1, size=(2, N))
data = array([
[0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399,
0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145,
0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821,
0.69800708, 0.58428735, 1.59128776, 2.05771405, 0.87021964],
[1.44080694, 0.39973751, 1.331243, 2.48895822, 1.18894158,
1.40526085, 1.01967897, 0.81196474, 1.37978932, 2.03334689,
0.870329, 1.25106862, 0.5346619, 0.47541236, 1.51930093,
0.58861519, 1.19780448, 0.81548296, 1.56859488, 1.60653533]])
x = np.linspace(0, max(np.ravel(data)) + 1, 5)
dx = x[1] - x[0]
X = np.vstack((x, x))
c = wk.gridcount(data, X)
assert_allclose(c,
[[0.38922806, 0.8987982, 0.34676493, 0.21042807, 0.],
[1.15012203, 5.16513541, 3.19250588, 0.55420752, 0.],
[0.74293418, 3.42517219, 1.97923195, 0.76076621, 0.],
[0.02063536, 0.31054405, 0.71865964, 0.13486633, 0.],
[0., 0., 0., 0., 0.]], 1e-5)
t = np.trapz(np.trapz(c / (dx**2 * N), x), x)
assert_allclose(t, 0.9011618785736376)
def test_gridcount_3D(self):
N = 20
# data = np.random.rayleigh(1, size=(3, N))
data = np.array([
[0.932896, 0.89522635, 0.80636346, 1.32283371, 0.27125435,
1.91666304, 2.30736635, 1.13662384, 1.73071287, 1.06061127,
0.99598512, 2.16396591, 1.23458213, 1.12406686, 1.16930431,
0.73700592, 1.21135139, 0.46671506, 1.3530304, 0.91419104],
[0.62759088, 0.23988169, 2.04909823, 0.93766571, 1.19343762,
1.94954931, 0.84687514, 0.49284897, 1.05066204, 1.89088505,
0.840738, 1.02901457, 1.0758625, 1.76357967, 0.45792897,
1.54488066, 0.17644313, 1.6798871, 0.72583514, 2.22087245],
[1.69496432, 0.81791905, 0.82534709, 0.71642389, 0.89294732,
1.66888649, 0.69036947, 0.99961448, 0.30657267, 0.98798713,
0.83298728, 1.83334948, 1.90144186, 1.25781913, 0.07122458,
2.42340852, 2.41342037, 0.87233305, 1.17537114, 1.69505988]])
x = np.linspace(0, max(np.ravel(data)) + 1, 3)
dx = x[1] - x[0]
X = np.vstack((x, x, x))
c = wk.gridcount(data, X)
assert_allclose(c,
[[[8.74229894e-01, 1.27910940e+00, 1.42033973e-01],
[1.94778915e+00, 2.59536282e+00, 3.28213680e-01],
[1.08429416e-01, 1.69571495e-01, 7.48896775e-03]],
[[1.44969128e+00, 2.58396370e+00, 2.45459949e-01],
[2.28951650e+00, 4.49653348e+00, 2.73167915e-01],
[1.10905565e-01, 3.18733817e-01, 1.12880816e-02]],
[[7.49265424e-02, 2.18142488e-01, 0.0],
[8.53886762e-02, 3.73415131e-01, 0.0],
[4.16196568e-04, 1.62218824e-02, 0.0]]])
t = np.trapz(np.trapz(np.trapz(c / dx**3 / N, x), x), x)
assert_allclose(t, 0.5164999727560187)
def test_gridcount_4D(self):
N = 20
# data = np.random.rayleigh(1, size=(2, N))
data = array([
[0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399,
0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145],
[0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821,
0.69800708, 0.58428735, 1.59128776, 2.05771405, 0.87021964],
[1.44080694, 0.39973751, 1.331243, 2.48895822, 1.18894158,
1.40526085, 1.01967897, 0.81196474, 1.37978932, 2.03334689],
[0.870329, 1.25106862, 0.5346619, 0.47541236, 1.51930093,
0.58861519, 1.19780448, 0.81548296, 1.56859488, 1.60653533]])
x = np.linspace(0, max(np.ravel(data)) + 1, 3)
dx = x[1] - x[0]
X = np.vstack((x, x, x, x))
c = wk.gridcount(data, X)
assert_allclose(c,
[[[[1.77163904e-01, 1.87720108e-01, 0.0],
[5.72573585e-01, 6.09557834e-01, 0.0],
[3.48549923e-03, 4.05931870e-02, 0.0]],
[[1.83770124e-01, 2.56357594e-01, 0.0],
[4.35845892e-01, 6.14958970e-01, 0.0],
[3.07662204e-03, 3.58312786e-02, 0.0]],
[[0.0, 0.0, 0.0],
[0.0, 0.0, 0.0],
[0.0, 0.0, 0.0]]],
[[[3.41883175e-01, 5.97977973e-01, 0.0],
[5.72071865e-01, 8.58566538e-01, 0.0],
[3.46939323e-03, 4.04056116e-02, 0.0]],
[[3.58861043e-01, 6.28962785e-01, 0.0],
[8.80697705e-01, 1.47373158e+00, 0.0],
[2.22868504e-01, 1.18008528e-01, 0.0]],
[[2.91835067e-03, 2.60268355e-02, 0.0],
[3.63686503e-02, 1.07959459e-01, 0.0],
[1.88555613e-02, 7.06358976e-03, 0.0]]],
[[[3.13810608e-03, 2.11731327e-02, 0.0],
[6.71606255e-03, 4.53139824e-02, 0.0],
[0.0, 0.0, 0.0]],
[[7.05946179e-03, 5.44614852e-02, 0.0],
[1.09099593e-01, 1.95935584e-01, 0.0],
[6.61257395e-02, 2.47717418e-02, 0.0]],
[[6.38695629e-04, 5.69610302e-03, 0.0],
[1.00358265e-02, 2.44053065e-02, 0.0],
[5.67244468e-03, 2.12498697e-03, 0.0]]]])
t = np.trapz(np.trapz(np.trapz(np.trapz(c / dx**4 / N, x), x), x), x)
assert_allclose(t, 0.21183518274521254)
class TestKernels(unittest.TestCase):
def setUp(self):
self.names = ['epanechnikov', 'biweight', 'triweight', 'logistic',
'p1epanechnikov', 'p1biweight', 'p1triweight',
'triangular', 'gaussian', 'rectangular', 'laplace']
def test_stats(self):
truth = {
'biweight': (0.14285714285714285, 0.7142857142857143, 22.5),
'logistic': (3.289868133696453, 1./6, 0.023809523809523808),
'p1biweight': (0.14285714285714285, 0.7142857142857143, 22.5),
'triangular': (0.16666666666666666, 0.6666666666666666, inf),
'gaussian': (1, 0.28209479177387814, 0.21157109383040862),
'epanechnikov': (0.2, 0.6, inf),
'triweight': (0.1111111111111111, 0.8158508158508159, inf),
'p1triweight': (0.1111111111111111, 0.8158508158508159, inf),
'p1epanechnikov': (0.2, 0.6, inf),
'rectangular': (0.3333333333333333, 0.5, inf),
'laplace': (2, 0.25, inf)}
for name in self.names:
kernel = wk.Kernel(name)
assert_allclose(kernel.stats(), truth[name])
# truth[name] = kernel.stats()
# print(truth)
def test_norm_factors_1d(self):
truth = {
'biweight': 1.0666666666666667, 'logistic': 1.0,
'p1biweight': 1.0666666666666667, 'triangular': 1.0,
'gaussian': 2.5066282746310002, 'epanechnikov': 1.3333333333333333,
'triweight': 0.91428571428571426, 'laplace': 2,
'p1triweight': 0.91428571428571426,
'p1epanechnikov': 1.3333333333333333, 'rectangular': 2.0}
for name in self.names:
kernel = wk.Kernel(name)
assert_allclose(kernel.norm_factor(d=1, n=20), truth[name])
# truth[name] = kernel.norm_factor(d=1, n=20)
def test_effective_support(self):
truth = {'biweight': (-1.0, 1.0), 'logistic': (-7.0, 7.0),
'p1biweight': (-1.0, 1.0), 'triangular': (-1.0, 1.0),
'gaussian': (-4.0, 4.0), 'epanechnikov': (-1.0, 1.0),
'triweight': (-1.0, 1.0), 'p1triweight': (-1.0, 1.0),
'p1epanechnikov': (-1.0, 1.0), 'rectangular': (-1.0, 1.0),
'laplace': (-7.0, 7.0)}
for name in self.names:
kernel = wk.Kernel(name)
assert_allclose(kernel.effective_support(), truth[name])
# truth[name] = kernel.effective_support()
# print(truth)
# self.assertTrue(False)
def test_that_kernel_is_a_pdf(self):
for name in self.names:
kernel = wk.Kernel(name)
xmin, xmax = kernel.effective_support()
x = np.linspace(xmin, xmax, 4*1024+1)
m0 = kernel.norm_factor(d=1, n=1)
pdf = kernel(x)/m0
# print(name)
# print(pdf[0], pdf[-1])
# print(np.trapz(pdf, x) - 1)
assert_allclose(np.trapz(pdf, x), 1, 1e-2)
# self.assertTrue(False)
class TestSmoothing(unittest.TestCase):
def setUp(self):
self.data = np.array([
[0.932896, 0.89522635, 0.80636346, 1.32283371, 0.27125435,
1.91666304, 2.30736635, 1.13662384, 1.73071287, 1.06061127,
0.99598512, 2.16396591, 1.23458213, 1.12406686, 1.16930431,
0.73700592, 1.21135139, 0.46671506, 1.3530304, 0.91419104],
[0.62759088, 0.23988169, 2.04909823, 0.93766571, 1.19343762,
1.94954931, 0.84687514, 0.49284897, 1.05066204, 1.89088505,
0.840738, 1.02901457, 1.0758625, 1.76357967, 0.45792897,
1.54488066, 0.17644313, 1.6798871, 0.72583514, 2.22087245],
[1.69496432, 0.81791905, 0.82534709, 0.71642389, 0.89294732,
1.66888649, 0.69036947, 0.99961448, 0.30657267, 0.98798713,
0.83298728, 1.83334948, 1.90144186, 1.25781913, 0.07122458,
2.42340852, 2.41342037, 0.87233305, 1.17537114, 1.69505988]])
self.gauss = wk.Kernel('gaussian')
def test_hns(self):
hs = self.gauss.hns(self.data)
assert_allclose(hs, [0.18154437, 0.36207987, 0.37396219])
def test_hos(self):
hs = self.gauss.hos(self.data)
assert_allclose(hs, [0.195209, 0.3893332, 0.40210988])
def test_hms(self):
hs = self.gauss.hmns(self.data)
assert_allclose(hs, [[3.25196193e-01, -2.68892467e-02, 3.18932448e-04],
[-2.68892467e-02, 3.91283306e-01, 2.38654678e-02],
[3.18932448e-04, 2.38654678e-02, 4.05123874e-01]])
hs = self.gauss.hmns(self.data[0])
assert_allclose(hs, self.gauss.hns(self.data[0]))
hs = wk.Kernel('epan').hmns(self.data)
assert_allclose(hs,
[[8.363847e-01, -6.915749e-02, 8.202747e-04],
[-6.915749e-02, 1.006357e+00, 6.138052e-02],
[8.202747e-04, 6.138052e-02, 1.041954e+00]],
rtol=1e-5)
hs = wk.Kernel('biwe').hmns(self.data[:2])
assert_allclose(hs, [[0.868428, -0.071705],
[-0.071705, 1.04685]], rtol=1e-5)
hs = wk.Kernel('triwe').hmns(self.data[:2])
assert_allclose(hs, [[0.975375, -0.080535],
[-0.080535, 1.17577]], rtol=1e-5)
self.assertRaises(NotImplementedError,
wk.Kernel('biwe').hmns, self.data)
self.assertRaises(NotImplementedError,
wk.Kernel('triwe').hmns, self.data)
self.assertRaises(NotImplementedError,
wk.Kernel('triangular').hmns, self.data)
def test_hscv(self):
hs = self.gauss.hscv(self.data)
assert_allclose(hs, [0.1656318800590673, 0.3273938258112911,
0.31072126996412214])
def test_hstt(self):
hs = self.gauss.hstt(self.data)
assert_allclose(hs, [0.18099075, 0.50409881, 0.11018912])
def test_hste(self):
hs = self.gauss.hste(self.data)
assert_allclose(hs, [0.17035204677390572, 0.29851960273788863,
0.186685349741972])
def test_hldpi(self):
hs = self.gauss.hldpi(self.data)
assert_allclose(hs, [0.1732289, 0.33159097, 0.3107633])
def test_hisj(self):
hs = self.gauss.hisj(self.data)
assert_allclose(hs, [0.29542502, 0.74277133, 0.51899114])
if __name__ == "__main__":
# import sys;sys.argv = ['', 'Test.testName']
unittest.main()