added more tests and simplified things..

master
Per A Brodtkorb 8 years ago
parent 9e6a79d24d
commit 500aff71c6

@ -29,7 +29,7 @@ def bitget(int_type, offset):
return np.bitwise_and(int_type, 1 << offset) >> offset
def accumsum_sparse(accmap, a, shape, dtype=None):
def accumsum_sparse(accmap, a, shape=None, dtype=None):
"""
Example
-------
@ -42,12 +42,21 @@ def accumsum_sparse(accmap, a, shape, dtype=None):
>>> # Sum the diagonals.
>>> accmap = array([[0,1,2],[2,0,1],[1,2,0]])
>>> s = accumsum_sparse(accmap, a, (3,))
>>> np.allclose(s.toarray().T, [ 9, 7, 15])
>>> np.allclose(s.toarray(), [ 9, 7, 15])
True
# group vals by idx and sum
>>> vals = array([12.0, 3.2, -15, 88, 12.9])
>>> idx = array([1, 0, 1, 4, 1 ])
>>> np.allclose(accumsum_sparse(idx, vals).toarray(),
... [3.2, 9.9, 0, 0, 88.])
True
"""
if dtype is None:
dtype = a.dtype
if shape is None:
shape = 1 + np.squeeze(np.apply_over_axes(np.max, accmap,
axes=tuple(range(a.ndim))))
shape = np.atleast_1d(shape)
if len(shape) > 1:
binx = accmap[:, 0]
@ -58,12 +67,13 @@ def accumsum_sparse(accmap, a, shape, dtype=None):
binx = accmap.ravel()
zero = np.zeros(len(binx))
out = sparse.coo_matrix(
(a.ravel(), (binx, zero)), shape=(shape, 1), dtype=dtype).tocsr()
(a.ravel(), (binx, zero)), shape=(shape, 1), dtype=dtype).tocsr().T
return out
def accumsum(accmap, a, shape):
def accumsum(accmap, a, shape=None):
"""
Example
-------
>>> from numpy import array
@ -74,11 +84,21 @@ def accumsum(accmap, a, shape):
[-1, 8, 9]])
>>> accmap = array([[0,1,2],[2,0,1],[1,2,0]]) # Sum the diagonals.
>>> s = accumsum(accmap, a, (3,))
>>> np.allclose(s, [ 9, 7, 15])
>>> np.allclose(accumsum(accmap, a, (3,)), [ 9, 7, 15])
True
>>> accmap = array([[0,1,2],[0,1,2],[0,1,2]]) # Sum the columns.
>>> np.allclose(accumsum(accmap, a, (3,)), [4, 9, 18])
True
# group vals by idx and sum
>>> vals = array([12.0, 3.2, -15, 88, 12.9])
>>> idx = array([1, 0, 1, 4, 1 ])
>>> np.allclose(accumsum(idx, vals), [3.2, 9.9, 0, 0, 88.])
True
"""
if shape is None:
shape = 1 + np.squeeze(np.apply_over_axes(np.max, accmap,
axes=tuple(range(a.ndim))))
return np.bincount(accmap.ravel(), a.ravel(), np.array(shape).max())
@ -148,6 +168,9 @@ def accum(accmap, a, func=None, shape=None, fill_value=0, dtype=None):
>>> accum(accmap, a, func=prod, dtype=float)
array([[ -8., 18.],
[ -8., 9.]])
>>> accum(accmap, a, dtype=float)
array([[ 6., 9.],
[ 7., 9.]])
# Same accmap, but create an array of lists of values.
>>> accum(accmap, a, func=lambda x: x, dtype='O')
@ -210,7 +233,7 @@ def _gridcount_nd(acfun, data, x, y, w, binx):
fact2 = np.asarray(np.reshape(inc * np.arange(d), (d, -1)), dtype=int)
fact1 = np.asarray(np.reshape(c_shape.cumprod() / inc, (d, -1)), dtype=int)
bt0 = [0, 0]
X1 = x.ravel()
x1 = x.ravel()
for ir in range(2 ** (d - 1)):
bt0[0] = np.reshape(bitget(ir, np.arange(d)), (d, -1))
bt0[1] = 1 - bt0[0]
@ -222,7 +245,7 @@ def _gridcount_nd(acfun, data, x, y, w, binx):
b1 = np.sum((binx + bt0[one]) * fact1, axis=0)
bt2 = bt0[two] + fact2
b2 = binx + bt2 # linear index to X
c += acfun(b1, np.abs(np.prod(X1[b2] - data, axis=0)) * y,
c += acfun(b1, np.abs(np.prod(x1[b2] - data, axis=0)) * y,
shape=(nc, ))
c = np.reshape(c / w, c_shape, order='F')
@ -247,13 +270,18 @@ def gridcount(data, X, y=1):
Parameters
----------
data = column vectors with D-dimensional data, shape D x Nd
X = row vectors defining discretization, shape D x N
Must include the range of the data.
data : array-like
column vectors with D-dimensional data, shape D x Nd
X : array-like
row vectors defining discretization in each dimension, shape D x N.
The discretization must include the range of the data.
y : array-like
response data. Scalar or vector of size Nd.
Returns
-------
c = gridcount, shape N x N x ... x N
c : ndarray
gridcount, shape N x N x ... x N
GRIDCOUNT obtains the grid counts using linear binning.
There are 2 strategies: simple- or linear- binning.
@ -298,7 +326,7 @@ def gridcount(data, X, y=1):
See also
--------
bincount, accum, kdebin
bincount, accum, kde
Reference
----------
@ -306,28 +334,27 @@ def gridcount(data, X, y=1):
'Kernel smoothing'
Chapman and Hall, pp 182-192
'''
data2, x = np.atleast_2d(data, X)
dataset, x = np.atleast_2d(data, X)
y = np.atleast_1d(y).ravel()
d, inc = x.shape
_assert(d == data2.shape[0], 'Dimension 0 of data and X do not match.')
_assert(d == dataset.shape[0], 'Dimension 0 of data and X do not match.')
dx = np.diff(x[:, :2], axis=1)
xlo, xup = x[:, 0], x[:, -1]
datlo, datup = data2.min(axis=1), data2.max(axis=1)
datlo, datup = dataset.min(axis=1), dataset.max(axis=1)
_assert(not ((datlo < xlo) | (xup < datup)).any(),
'X does not include whole range of the data!')
# acfun = accumsum_sparse # faster than accum
acfun = accumsum # faster than accumsum_sparse
binx = np.asarray(np.floor((data2 - xlo[:, np.newaxis]) / dx), dtype=int)
dx = np.diff(x[:, :2], axis=1)
binx = np.asarray(np.floor((dataset - xlo[:, np.newaxis]) / dx), dtype=int)
w = dx.prod()
if d == 1:
return _gridcount_1d(acfun, data2, x, y, w, binx, inc)
# else: # d>1
return _gridcount_nd(acfun, data2, x, y, w, binx)
return _gridcount_1d(acfun, dataset, x, y, w, binx, inc)
return _gridcount_nd(acfun, dataset, x, y, w, binx)
if __name__ == '__main__':

@ -247,7 +247,7 @@ class _KDE(object):
if len(args) == 0:
args = self.get_args()
self.args = args
return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)
return self.eval_grid_fun(self._eval_grid_fast, *args, **kwds)
def _eval_grid_fast(self, *args, **kwds):
pass
@ -273,7 +273,7 @@ class _KDE(object):
if len(args) == 0:
args = self.get_args()
self.args = args
return self._eval_grid_fun(self._eval_grid, *args, **kwds)
return self.eval_grid_fun(self._eval_grid, *args, **kwds)
def _eval_grid(self, *args, **kwds):
pass
@ -301,7 +301,7 @@ class _KDE(object):
self._add_contour_levels(wdata)
return wdata
def _eval_grid_fun(self, eval_grd, *args, **kwds):
def eval_grid_fun(self, eval_grd, *args, **kwds):
output = kwds.pop('output', 'value')
f = eval_grd(*args, **kwds)
if output == 'value':
@ -434,9 +434,11 @@ class TKDE(_KDE):
t = np.trapz(f, x)
"""
def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None,
xmax=None, inc=512, L2=None):
def __init__(self, data, hs=None, kernel=None, alpha=0.0,
xmin=None, xmax=None, inc=512, L2=None):
self.L2 = L2
# self.dataset = data
# self.tkde =
super(TKDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc)
# @property
@ -445,6 +447,8 @@ class TKDE(_KDE):
#
# @dataset.setter
# def dataset(self, data):
# self._dataset = atleast_2d(data)
# self._tdataset = self._dat2gaus(self._dataset)
def _initialize(self):
self._check_xmin()
@ -455,7 +459,8 @@ class TKDE(_KDE):
xmax = self.xmax
if xmax is not None:
xmax = self._dat2gaus(np.reshape(xmax, (-1, 1)))
self.tkde = KDE(tdataset, self.hs, self.kernel, self.alpha, xmin, xmax,
self.tkde = KDE(tdataset, self.hs, self.kernel, self.alpha,
np.ravel(xmin), np.ravel(xmax),
self.inc)
if self.inc is None:
self.inc = self.tkde.inc
@ -467,7 +472,7 @@ class TKDE(_KDE):
L2 = np.atleast_1d(self.L2) * np.ones(self.d)
self.xmin = np.where(L2 != 1,
np.maximum(self.xmin, amin / 100.0),
self.xmin).reshape((-1, 1))
self.xmin)
def _dat2gaus(self, points):
if self.L2 is None:
@ -527,7 +532,7 @@ class TKDE(_KDE):
The values evaluated at meshgrid(*args).
"""
return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds)
return self.eval_grid_fun(self._eval_grid_fast, *args, **kwds)
def _interpolate(self, points, f, *args, **kwds):
ipoints = meshgrid(*args) # if self.d > 1 else args
@ -754,7 +759,7 @@ class KDE(_KDE):
y = kwds.get('y', 1.0)
if self.alpha > 0:
y = y / self._lambda**d
warnings.warn('alpha parameter is not used for binned!')
# Find the binned kernel weights, c.
c = gridcount(self.dataset, X, y=y)
@ -922,16 +927,16 @@ class KRegression(object): # _KDE):
self.tkde = TKDE(data, hs=hs, kernel=kernel,
alpha=alpha, xmin=xmin, xmax=xmax, inc=inc, L2=L2)
self.y = y
self.y = np.atleast_1d(y)
self.p = p
def eval_grid_fast(self, *args, **kwds):
self._grdfun = self.tkde.eval_grid_fast
return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds)
return self.tkde.eval_grid_fun(self._eval_gridfun, *args, **kwds)
def eval_grid(self, *args, **kwds):
self._grdfun = self.tkde.eval_grid
return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds)
return self.tkde.eval_grid_fun(self._eval_gridfun, *args, **kwds)
def _eval_gridfun(self, *args, **kwds):
grdfun = self._grdfun
@ -1017,7 +1022,30 @@ class BKRegression(object):
xi = np.linspace(xmin - sml, xmax + sml, ni)
return xi
def prb_ci(self, n, p, alpha=0.05, **kwds):
def _wilson_score(self, n, p, alpha):
# Wilson score
z0 = -_invnorm(alpha / 2)
den = 1 + (z0 ** 2. / n)
xc = (p + (z0 ** 2) / (2 * n)) / den
halfwidth = (z0 * sqrt((p * (1 - p) / n) +
(z0 ** 2 / (4 * (n ** 2))))) / den
plo = xc - halfwidth.clip(min=0) # wilson score
pup = xc + halfwidth.clip(max=1.0) # wilson score
return plo, pup
def _credible_interval(self, n, p, alpha):
# Jeffreys intervall a=b=0.5
# st.beta.isf(alpha/2, y+a, n-y+b) y = n*p, n-y = n*(1-p)
a = self.a
b = self.b
st = scipy.stats
pup = np.where(p == 1, 1,
st.beta.isf(alpha / 2, n * p + a, n * (1 - p) + b))
plo = np.where(p == 0, 0,
st.beta.isf(1 - alpha / 2, n * p + a, n * (1 - p) + b))
return plo, pup
def prb_ci(self, n, p, alpha=0.05):
"""Return Confidence Interval for the binomial probability p.
Parameters
@ -1040,25 +1068,9 @@ class BKRegression(object):
"""
if self.method.startswith('w'):
# Wilson score
z0 = -_invnorm(alpha / 2)
den = 1 + (z0 ** 2. / n)
xc = (p + (z0 ** 2) / (2 * n)) / den
halfwidth = (z0 * sqrt((p * (1 - p) / n) +
(z0 ** 2 / (4 * (n ** 2))))) / den
plo = (xc - halfwidth).clip(min=0) # wilson score
pup = (xc + halfwidth).clip(max=1.0) # wilson score
plo, pup = self._wilson_score(n, p, alpha)
else:
# Jeffreys intervall a=b=0.5
# st.beta.isf(alpha/2, y+a, n-y+b) y = n*p, n-y = n*(1-p)
a = self.a
b = self.b
st = scipy.stats
pup = np.where(p == 1, 1,
st.beta.isf(alpha / 2, n * p + a, n * (1 - p) + b))
plo = np.where(p == 0, 0,
st.beta.isf(1 - alpha / 2,
n * p + a, n * (1 - p) + b))
plo, pup = self._credible_interval(n, p, alpha)
return plo, pup
def prb_empirical(self, xi=None, hs_e=None, alpha=0.05, color='r', **kwds):
@ -1088,12 +1100,12 @@ class BKRegression(object):
y = self.y
c = gridcount(x, xi) # + self.a + self.b # count data
if (y == 1).any():
if np.any(y == 1):
c0 = gridcount(x[y == 1], xi) # + self.a # count success
else:
c0 = np.zeros(np.shape(xi))
prb = np.where(c == 0, 0, c0 / (c + _TINY)) # assume prb==0 for c==0
CI = np.vstack(self.prb_ci(c, prb, alpha, **kwds))
CI = np.vstack(self.prb_ci(c, prb, alpha))
prb_e = PlotData(prb, xi, plotmethod='plot', plot_args=['.'],
plot_kwds=dict(markersize=6, color=color, picker=5))
@ -1213,7 +1225,7 @@ def kde_demo1():
for ix, h in enumerate(hVec):
plt.figure(ix)
kde = KDE(data, hs=h, kernel=kernel)
f2 = kde(x, output='plot', title='h_s = {0:2.2f}'.format(h),
f2 = kde(x, output='plot', title='h_s = {0:2.2f}'.format(float(h)),
ylab='Density')
f2.plot('k-')
@ -1278,8 +1290,8 @@ def kde_demo3():
plt.plot(data[0], data[1], '.')
# plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable
tkde = TKDE(data, L2=0.5)
hs = Kernel('gauss').get_smoothing(data**0.5)
tkde = TKDE(data, hs=hs, L2=0.5)
ft = tkde.eval_grid_fast(
output='plot', title='Transformation KDE', plotflag=1)
@ -1312,13 +1324,13 @@ def kde_demo4(N=50):
f1 = kde1(output='plot', label='Ordinary KDE', plotflag=1)
plt.figure(0)
f.plot('r', label='hns={0:g}'.format(kde.hs))
f.plot('r', label='hns={0}'.format(kde.hs))
# plt.figure(2)
f1.plot('b', label='hisj={0:g}'.format(kde1.hs))
x = np.linspace(-4, 4)
for loc in [-5, 5]:
plt.plot(x + loc, st.norm.pdf(x, 0, scale=1) / 2, 'k:',
label='True density')
f1.plot('b', label='hisj={0}'.format(kde1.hs))
x = np.linspace(-9, 9)
plt.plot(x, (st.norm.pdf(x, loc=-5, scale=1) +
st.norm.pdf(x, loc=5, scale=1)) / 2, 'k:',
label='True density')
plt.legend()
@ -1335,11 +1347,11 @@ def kde_demo5(N=500):
st.norm.rvs(loc=-5, scale=1, size=(2, N,))))
kde = KDE(data, kernel=Kernel('gauss', 'hns'))
f = kde(output='plot', plotflag=1,
title='Ordinary KDE (hns={0:s}'.format(str(kde.hs.tolist())))
title='Ordinary KDE, hns={0:s}'.format(str(list(kde.hs))))
kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
f1 = kde1(output='plot', plotflag=1,
title='Ordinary KDE (hisj={0:s})'.format(str(kde1.hs.tolist())))
title='Ordinary KDE, hisj={0:s}'.format(str(list(kde1.hs))))
plt.figure(0)
plt.clf()
@ -1766,9 +1778,9 @@ def check_bkregression():
plt.ion()
k = 0
for _i, n in enumerate([50, 100, 300, 600]):
x, y, fun1 = _get_data(
n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75)
bkreg = BKRegression(x, y)
x, y, fun1 = _get_data(n, symmetric=True, loc1=0.1,
scale1=0.6, scale2=0.75)
bkreg = BKRegression(x, y, a=0.05, b=0.05)
fbest = bkreg.prb_search_best(
hsfun='hste', alpha=0.05, color='g', label='Transit_D')
@ -1951,8 +1963,8 @@ if __name__ == '__main__':
if False:
test_docstrings(__file__)
else:
kde_demo2()
# check_bkregression()
# kde_demo5()
check_bkregression()
# check_regression_bin()
# check_kreg_demo3()
# check_kreg_demo4()

@ -7,26 +7,26 @@ from __future__ import division
import unittest
import numpy as np
from numpy.testing import assert_allclose
from numpy import array, inf
import wafo.objects as wo
import wafo.kdetools as wk
import scipy.stats as st
class TestKdeTools(unittest.TestCase):
class TestKde(unittest.TestCase):
def setUp(self):
# N = 20
# data = np.random.rayleigh(1, size=(N,))
self.data = array([0.75355792, 0.72779194, 0.94149169, 0.07841119,
2.32291887, 1.10419995, 0.77055114, 0.60288273,
1.36883635, 1.74754326, 1.09547561, 1.01671133,
0.73211143, 0.61891719, 0.75903487, 1.8919469,
0.72433808, 1.92973094, 0.44749838, 1.36508452])
self.data = np.array([0.75355792, 0.72779194, 0.94149169, 0.07841119,
2.32291887, 1.10419995, 0.77055114, 0.60288273,
1.36883635, 1.74754326, 1.09547561, 1.01671133,
0.73211143, 0.61891719, 0.75903487, 1.8919469,
0.72433808, 1.92973094, 0.44749838, 1.36508452])
self.x = np.linspace(0, max(self.data) + 1, 10)
def test0_KDE1D(self):
data, x = self.data, self.x
# kde = wk.KDE(data, hs=0.5, alpha=0.5)
kde0 = wk.KDE(data, hs=0.5, alpha=0.0, inc=16)
@ -36,7 +36,6 @@ class TestKdeTools(unittest.TestCase):
0.08270612, 0.02991145, 0.00720821])
fx = kde0.eval_grid(x, r=1)
assert_allclose(-fx, [0.11911419724002906, 0.13440000694772541,
0.044400116190638696, -0.0677695267531197,
-0.09555596523854318, -0.07498819087690148,
@ -125,6 +124,13 @@ class TestKdeTools(unittest.TestCase):
0.16555235, 0.0541248])
assert_allclose(np.trapz(f, x), 0.97323338046725172)
f0 = kde(output='plot')
self.assertIsInstance(f0, wo.PlotData)
assert_allclose(np.trapz(f0.data, f0.args), 0.9319800260106625)
f0 = kde.eval_grid_fast(output='plot')
self.assertIsInstance(f0, wo.PlotData)
assert_allclose(np.trapz(f0.data, f0.args), 0.9319799696210691)
def test1a_KDE1D(self):
data, x = self.data, self.x
@ -136,7 +142,15 @@ class TestKdeTools(unittest.TestCase):
assert_allclose(np.trapz(f, x), 0.92938023659047952)
def test2a_KDE1D(self):
f0 = kde(output='plot')
self.assertIsInstance(f0, wo.PlotData)
assert_allclose(np.trapz(f0.data, f0.args), 0.9871189376720593)
f0 = kde.eval_grid_fast(output='plot')
self.assertIsInstance(f0, wo.PlotData)
assert_allclose(np.trapz(f0.data, f0.args), 0.9962507385131669)
def test2a_KDE_1D_hs_5_alpha_5(self):
# data, x = self.data, self.x
data = np.asarray([1, 2])
x = np.linspace(0, max(np.ravel(data)) + 1, 10)
@ -151,7 +165,7 @@ class TestKdeTools(unittest.TestCase):
def test_KDE2D(self):
# N = 20
# data = np.random.rayleigh(1, size=(2, N))
data = array([
data = np.array([
[0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399,
0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145,
0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821,
@ -176,269 +190,125 @@ class TestKdeTools(unittest.TestCase):
]
assert_allclose(kde0.eval_grid_fast(x, x), t)
def test_gridcount_1D(self):
data, x = self.data, self.x
dx = x[1] - x[0]
c = wk.gridcount(data, x)
assert_allclose(c, [0.78762626, 1.77520717, 7.99190087, 4.04054449,
1.67156643, 2.38228499, 1.05933195, 0.29153785, 0.,
0.])
t = np.trapz(c / dx / len(data), x)
assert_allclose(t, 0.9803093435140049)
def test_gridcount_2D(self):
N = 20
# data = np.random.rayleigh(1, size=(2, N))
data = array([
[0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399,
0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145,
0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821,
0.69800708, 0.58428735, 1.59128776, 2.05771405, 0.87021964],
[1.44080694, 0.39973751, 1.331243, 2.48895822, 1.18894158,
1.40526085, 1.01967897, 0.81196474, 1.37978932, 2.03334689,
0.870329, 1.25106862, 0.5346619, 0.47541236, 1.51930093,
0.58861519, 1.19780448, 0.81548296, 1.56859488, 1.60653533]])
x = np.linspace(0, max(np.ravel(data)) + 1, 5)
dx = x[1] - x[0]
X = np.vstack((x, x))
c = wk.gridcount(data, X)
assert_allclose(c,
[[0.38922806, 0.8987982, 0.34676493, 0.21042807, 0.],
[1.15012203, 5.16513541, 3.19250588, 0.55420752, 0.],
[0.74293418, 3.42517219, 1.97923195, 0.76076621, 0.],
[0.02063536, 0.31054405, 0.71865964, 0.13486633, 0.],
[0., 0., 0., 0., 0.]], 1e-5)
t = np.trapz(np.trapz(c / (dx**2 * N), x), x)
assert_allclose(t, 0.9011618785736376)
def test_gridcount_3D(self):
N = 20
# data = np.random.rayleigh(1, size=(3, N))
data = np.array([
[0.932896, 0.89522635, 0.80636346, 1.32283371, 0.27125435,
1.91666304, 2.30736635, 1.13662384, 1.73071287, 1.06061127,
0.99598512, 2.16396591, 1.23458213, 1.12406686, 1.16930431,
0.73700592, 1.21135139, 0.46671506, 1.3530304, 0.91419104],
[0.62759088, 0.23988169, 2.04909823, 0.93766571, 1.19343762,
1.94954931, 0.84687514, 0.49284897, 1.05066204, 1.89088505,
0.840738, 1.02901457, 1.0758625, 1.76357967, 0.45792897,
1.54488066, 0.17644313, 1.6798871, 0.72583514, 2.22087245],
[1.69496432, 0.81791905, 0.82534709, 0.71642389, 0.89294732,
1.66888649, 0.69036947, 0.99961448, 0.30657267, 0.98798713,
0.83298728, 1.83334948, 1.90144186, 1.25781913, 0.07122458,
2.42340852, 2.41342037, 0.87233305, 1.17537114, 1.69505988]])
x = np.linspace(0, max(np.ravel(data)) + 1, 3)
dx = x[1] - x[0]
X = np.vstack((x, x, x))
c = wk.gridcount(data, X)
assert_allclose(c,
[[[8.74229894e-01, 1.27910940e+00, 1.42033973e-01],
[1.94778915e+00, 2.59536282e+00, 3.28213680e-01],
[1.08429416e-01, 1.69571495e-01, 7.48896775e-03]],
[[1.44969128e+00, 2.58396370e+00, 2.45459949e-01],
[2.28951650e+00, 4.49653348e+00, 2.73167915e-01],
[1.10905565e-01, 3.18733817e-01, 1.12880816e-02]],
[[7.49265424e-02, 2.18142488e-01, 0.0],
[8.53886762e-02, 3.73415131e-01, 0.0],
[4.16196568e-04, 1.62218824e-02, 0.0]]])
t = np.trapz(np.trapz(np.trapz(c / dx**3 / N, x), x), x)
assert_allclose(t, 0.5164999727560187)
def test_gridcount_4D(self):
N = 20
# data = np.random.rayleigh(1, size=(2, N))
data = array([
[0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399,
0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145],
[0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821,
0.69800708, 0.58428735, 1.59128776, 2.05771405, 0.87021964],
[1.44080694, 0.39973751, 1.331243, 2.48895822, 1.18894158,
1.40526085, 1.01967897, 0.81196474, 1.37978932, 2.03334689],
[0.870329, 1.25106862, 0.5346619, 0.47541236, 1.51930093,
0.58861519, 1.19780448, 0.81548296, 1.56859488, 1.60653533]])
x = np.linspace(0, max(np.ravel(data)) + 1, 3)
dx = x[1] - x[0]
X = np.vstack((x, x, x, x))
c = wk.gridcount(data, X)
assert_allclose(c,
[[[[1.77163904e-01, 1.87720108e-01, 0.0],
[5.72573585e-01, 6.09557834e-01, 0.0],
[3.48549923e-03, 4.05931870e-02, 0.0]],
[[1.83770124e-01, 2.56357594e-01, 0.0],
[4.35845892e-01, 6.14958970e-01, 0.0],
[3.07662204e-03, 3.58312786e-02, 0.0]],
[[0.0, 0.0, 0.0],
[0.0, 0.0, 0.0],
[0.0, 0.0, 0.0]]],
[[[3.41883175e-01, 5.97977973e-01, 0.0],
[5.72071865e-01, 8.58566538e-01, 0.0],
[3.46939323e-03, 4.04056116e-02, 0.0]],
[[3.58861043e-01, 6.28962785e-01, 0.0],
[8.80697705e-01, 1.47373158e+00, 0.0],
[2.22868504e-01, 1.18008528e-01, 0.0]],
[[2.91835067e-03, 2.60268355e-02, 0.0],
[3.63686503e-02, 1.07959459e-01, 0.0],
[1.88555613e-02, 7.06358976e-03, 0.0]]],
[[[3.13810608e-03, 2.11731327e-02, 0.0],
[6.71606255e-03, 4.53139824e-02, 0.0],
[0.0, 0.0, 0.0]],
[[7.05946179e-03, 5.44614852e-02, 0.0],
[1.09099593e-01, 1.95935584e-01, 0.0],
[6.61257395e-02, 2.47717418e-02, 0.0]],
[[6.38695629e-04, 5.69610302e-03, 0.0],
[1.00358265e-02, 2.44053065e-02, 0.0],
[5.67244468e-03, 2.12498697e-03, 0.0]]]])
t = np.trapz(np.trapz(np.trapz(np.trapz(c / dx**4 / N, x), x), x), x)
assert_allclose(t, 0.21183518274521254)
class TestKernels(unittest.TestCase):
def setUp(self):
self.names = ['epanechnikov', 'biweight', 'triweight', 'logistic',
'p1epanechnikov', 'p1biweight', 'p1triweight',
'triangular', 'gaussian', 'rectangular', 'laplace']
def test_stats(self):
truth = {
'biweight': (0.14285714285714285, 0.7142857142857143, 22.5),
'logistic': (3.289868133696453, 1./6, 0.023809523809523808),
'p1biweight': (0.14285714285714285, 0.7142857142857143, 22.5),
'triangular': (0.16666666666666666, 0.6666666666666666, inf),
'gaussian': (1, 0.28209479177387814, 0.21157109383040862),
'epanechnikov': (0.2, 0.6, inf),
'triweight': (0.1111111111111111, 0.8158508158508159, inf),
'p1triweight': (0.1111111111111111, 0.8158508158508159, inf),
'p1epanechnikov': (0.2, 0.6, inf),
'rectangular': (0.3333333333333333, 0.5, inf),
'laplace': (2, 0.25, inf)}
for name in self.names:
kernel = wk.Kernel(name)
assert_allclose(kernel.stats(), truth[name])
# truth[name] = kernel.stats()
# print(truth)
def test_norm_factors_1d(self):
truth = {
'biweight': 1.0666666666666667, 'logistic': 1.0,
'p1biweight': 1.0666666666666667, 'triangular': 1.0,
'gaussian': 2.5066282746310002, 'epanechnikov': 1.3333333333333333,
'triweight': 0.91428571428571426, 'laplace': 2,
'p1triweight': 0.91428571428571426,
'p1epanechnikov': 1.3333333333333333, 'rectangular': 2.0}
for name in self.names:
kernel = wk.Kernel(name)
assert_allclose(kernel.norm_factor(d=1, n=20), truth[name])
# truth[name] = kernel.norm_factor(d=1, n=20)
def test_effective_support(self):
truth = {'biweight': (-1.0, 1.0), 'logistic': (-7.0, 7.0),
'p1biweight': (-1.0, 1.0), 'triangular': (-1.0, 1.0),
'gaussian': (-4.0, 4.0), 'epanechnikov': (-1.0, 1.0),
'triweight': (-1.0, 1.0), 'p1triweight': (-1.0, 1.0),
'p1epanechnikov': (-1.0, 1.0), 'rectangular': (-1.0, 1.0),
'laplace': (-7.0, 7.0)}
for name in self.names:
kernel = wk.Kernel(name)
assert_allclose(kernel.effective_support(), truth[name])
# truth[name] = kernel.effective_support()
# print(truth)
# self.assertTrue(False)
def test_that_kernel_is_a_pdf(self):
for name in self.names:
kernel = wk.Kernel(name)
xmin, xmax = kernel.effective_support()
x = np.linspace(xmin, xmax, 4*1024+1)
m0 = kernel.norm_factor(d=1, n=1)
pdf = kernel(x)/m0
# print(name)
# print(pdf[0], pdf[-1])
# print(np.trapz(pdf, x) - 1)
assert_allclose(np.trapz(pdf, x), 1, 1e-2)
# self.assertTrue(False)
class TestSmoothing(unittest.TestCase):
def setUp(self):
self.data = np.array([
[0.932896, 0.89522635, 0.80636346, 1.32283371, 0.27125435,
1.91666304, 2.30736635, 1.13662384, 1.73071287, 1.06061127,
0.99598512, 2.16396591, 1.23458213, 1.12406686, 1.16930431,
0.73700592, 1.21135139, 0.46671506, 1.3530304, 0.91419104],
[0.62759088, 0.23988169, 2.04909823, 0.93766571, 1.19343762,
1.94954931, 0.84687514, 0.49284897, 1.05066204, 1.89088505,
0.840738, 1.02901457, 1.0758625, 1.76357967, 0.45792897,
1.54488066, 0.17644313, 1.6798871, 0.72583514, 2.22087245],
[1.69496432, 0.81791905, 0.82534709, 0.71642389, 0.89294732,
1.66888649, 0.69036947, 0.99961448, 0.30657267, 0.98798713,
0.83298728, 1.83334948, 1.90144186, 1.25781913, 0.07122458,
2.42340852, 2.41342037, 0.87233305, 1.17537114, 1.69505988]])
self.gauss = wk.Kernel('gaussian')
def test_hns(self):
hs = self.gauss.hns(self.data)
assert_allclose(hs, [0.18154437, 0.36207987, 0.37396219])
def test_hos(self):
hs = self.gauss.hos(self.data)
assert_allclose(hs, [0.195209, 0.3893332, 0.40210988])
def test_hms(self):
hs = self.gauss.hmns(self.data)
assert_allclose(hs, [[3.25196193e-01, -2.68892467e-02, 3.18932448e-04],
[-2.68892467e-02, 3.91283306e-01, 2.38654678e-02],
[3.18932448e-04, 2.38654678e-02, 4.05123874e-01]])
hs = self.gauss.hmns(self.data[0])
assert_allclose(hs, self.gauss.hns(self.data[0]))
hs = wk.Kernel('epan').hmns(self.data)
assert_allclose(hs,
[[8.363847e-01, -6.915749e-02, 8.202747e-04],
[-6.915749e-02, 1.006357e+00, 6.138052e-02],
[8.202747e-04, 6.138052e-02, 1.041954e+00]],
rtol=1e-5)
hs = wk.Kernel('biwe').hmns(self.data[:2])
assert_allclose(hs, [[0.868428, -0.071705],
[-0.071705, 1.04685]], rtol=1e-5)
hs = wk.Kernel('triwe').hmns(self.data[:2])
assert_allclose(hs, [[0.975375, -0.080535],
[-0.080535, 1.17577]], rtol=1e-5)
self.assertRaises(NotImplementedError,
wk.Kernel('biwe').hmns, self.data)
self.assertRaises(NotImplementedError,
wk.Kernel('triwe').hmns, self.data)
self.assertRaises(NotImplementedError,
wk.Kernel('triangular').hmns, self.data)
def test_hscv(self):
hs = self.gauss.hscv(self.data)
assert_allclose(hs, [0.1656318800590673, 0.3273938258112911,
0.31072126996412214])
def test_hstt(self):
hs = self.gauss.hstt(self.data)
assert_allclose(hs, [0.18099075, 0.50409881, 0.11018912])
def test_hste(self):
hs = self.gauss.hste(self.data)
assert_allclose(hs, [0.17035204677390572, 0.29851960273788863,
0.186685349741972])
def test_hldpi(self):
hs = self.gauss.hldpi(self.data)
assert_allclose(hs, [0.1732289, 0.33159097, 0.3107633])
def test_hisj(self):
hs = self.gauss.hisj(self.data)
assert_allclose(hs, [0.29542502, 0.74277133, 0.51899114])
class TestRegression(unittest.TestCase):
def test_KRegression(self):
N = 51
x = np.linspace(0, 1, N)
# ei = np.random.normal(loc=0, scale=0.075, size=(N,))
ei = [0.0514233500271586, 0.00165101982431131, 0.042827107319028994,
-0.084351702283385, 0.05978024392552100, -0.07121894535738457,
0.0855578119920183, -0.0061865198365448, 0.060986773136137415,
0.0467717713275598, -0.0852368434029634, 0.09790798995780517,
-0.174003547831554, 0.1100349974247687, 0.12934695904976257,
-0.036688944487546, -0.0279545148054110, 0.09660222791922815,
-0.108463847524115, -0.0635162550551463, 0.017192887741329627,
-0.031520480101878, 0.03939880367791403, -0.06343921941793985,
0.0574763321274059, -0.1186005160931940, 0.023007133904660495,
0.0572646924609536, -0.0334012844057809, -0.03444460758658313,
0.0325434547422866, 0.06063111859444784, 0.0010264474321885913,
-0.162288671571205, 0.01334616853351956, -0.020490428895193084,
0.0446047497979159, 0.02924587567502737, 0.021177586536616458,
0.0634083218094540, -0.1506377646036794, -0.03214553797245153,
0.1850745187671265, -0.0151240946088902, -0.10599562843454335,
0.0317357805015679, -0.0736187558312158, 0.04791463883941161,
0.0660021138871709, -0.1049359954387588, 0.0034961490852392463]
# print(ei.tolist())
y = 2*np.exp(-x**2/(2*0.3**2))+3*np.exp(-(x-1)**2/(2*0.7**2)) + ei
kreg = wk.KRegression(x, y)
f = kreg(output='plotobj', title='Kernel regression', plotflag=1)
assert_allclose(f.data[::5],
[3.14313544673463, 3.14582567119112, 3.149199078830904,
3.153335095194225, 3.15813722171621, 3.16302709623568,
3.16631430398602, 3.164138775969285, 3.14947062082316,
3.11341295908516, 3.05213808272656, 2.976097561057097,
2.908020176929025, 2.867826513276857, 2.8615179445705,
2.88155232529645, 2.91307482047679, 2.942469210090470,
2.96350144269953, 2.976399025328952, 2.9836554385038,
2.987516554300354, 2.9894470264681, 2.990311688080114,
2.9906144224522406, 2.9906534916935743])
def test_BKRegression(self):
from wafo.kdetools.kdetools import _get_data
n = 51
loc1 = 0.1
scale1 = 0.6
scale2 = 0.75
# x, y, fun1 = _get_data(n, symmetric=True, loc1=loc1,
# scale1=scale1, scale2=scale2)
# print(x.tolist())
# print(y.tolist())
# dist = st.norm
# norm1 = scale2 * (dist.pdf(-loc1, loc=-loc1, scale=scale1) +
# dist.pdf(-loc1, loc=loc1, scale=scale1))
# def fun1(x):
# return ((dist.pdf(x, loc=-loc1, scale=scale1) +
# dist.pdf(x, loc=loc1, scale=scale1)) / norm1).clip(max=1.0)
x = [-2.9784022156693037, -2.923269270862857, -2.640625797489305,
-2.592465150170373, -2.5777471766751514, -2.5597898266706323,
-2.5411937415815604, -2.501753472506631, -2.4939048380402378,
-2.4747969073957368, -2.3324036659351286, -2.3228634370815,
-2.230871371173083, -2.21411949373986, -2.2035967461005335,
-2.1927287694263082, -2.1095391808427064, -2.0942500415622503,
-2.0774862883018708, -2.0700940505412, -2.054918428555726,
-1.979624045501378, -1.815804869116454, -1.780636214263252,
-1.7494324035239686, -1.723149182957688, -1.7180532497996817,
-1.7016701153705522, -1.6120633534061788, -1.5862592143187193,
-1.517561220921166, -1.5017798665502253, -1.4895432407186429,
-1.4470094450898578, -1.4302454657287063, -1.3243060491576388,
-1.293989140781724, -1.2570066577415648, -1.2332757902347795,
-1.2306697417054666, -1.0495284321772482, -0.9923351727665026,
-0.9047559818364217, -0.4092063139968012, -0.3845725606766721,
-0.30700232234899083, -0.2565844426798063, -0.25415109620097187,
-0.20223029999069952, -0.10388696244007978, -0.07822191388462896,
0.07822191388462896, 0.10388696244007978, 0.20223029999069952,
0.25415109620097187, 0.2565844426798063, 0.30700232234899083,
0.3845725606766721, 0.4092063139968012, 0.9047559818364217,
0.9923351727665026, 1.0495284321772482, 1.2306697417054666,
1.2332757902347795, 1.2570066577415648, 1.293989140781724,
1.3243060491576388, 1.4302454657287063, 1.4470094450898578,
1.4895432407186429, 1.5017798665502253, 1.517561220921166,
1.5862592143187193, 1.6120633534061788, 1.7016701153705522,
1.7180532497996817, 1.723149182957688, 1.7494324035239686,
1.780636214263252, 1.815804869116454, 1.979624045501378,
2.054918428555726, 2.0700940505412, 2.0774862883018708,
2.0942500415622503, 2.1095391808427064, 2.1927287694263082,
2.2035967461005335, 2.21411949373986, 2.230871371173083,
2.3228634370815, 2.3324036659351286, 2.4747969073957368,
2.4939048380402378, 2.501753472506631, 2.5411937415815604,
2.5597898266706323, 2.5777471766751514, 2.592465150170373,
2.640625797489305, 2.923269270862857, 2.9784022156693037]
y = [False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, True, True, True, True,
True, True, True, True, True, True, True, True, True, True, True,
True, True, True, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False]
bkreg = wk.BKRegression(x, y, a=0.05, b=0.05)
fbest = bkreg.prb_search_best(hsfun='hste', alpha=0.05, color='g')
# print(fbest.data[::10])
assert_allclose(fbest.data[::10],
[1.80899736e-15, 0, 6.48351162e-16, 6.61404311e-15,
1.10010120e-12, 1.36709203e-10, 1.11994766e-08,
5.73040143e-07, 1.68974054e-05, 2.68633448e-04,
2.49075176e-03, 1.48687767e-02, 5.98536245e-02,
1.74083352e-01, 4.33339557e-01, 8.26039018e-01,
9.78387628e-01, 9.98137653e-01, 9.99876002e-01,
9.99876002e-01, 9.98137653e-01, 9.78387628e-01,
8.26039018e-01, 4.33339557e-01, 1.74083352e-01,
5.98536245e-02, 1.48687767e-02, 2.49075176e-03,
2.68633448e-04, 1.68974054e-05, 5.73040143e-07,
1.11994760e-08, 1.36708818e-10, 1.09965904e-12,
5.43806309e-15, 0.0, 0, 0])
if __name__ == "__main__":
# import sys;sys.argv = ['', 'Test.testName']

Loading…
Cancel
Save