From 500aff71c685585f1b9ef72d317f121246f424ec Mon Sep 17 00:00:00 2001 From: Per A Brodtkorb Date: Fri, 23 Dec 2016 15:22:07 +0100 Subject: [PATCH] added more tests and simplified things.. --- wafo/kdetools/gridding.py | 69 +++-- wafo/kdetools/kdetools.py | 110 +++---- wafo/kdetools/tests/test_kdetools.py | 416 +++++++++------------------ 3 files changed, 252 insertions(+), 343 deletions(-) diff --git a/wafo/kdetools/gridding.py b/wafo/kdetools/gridding.py index 16e8684..6c95554 100644 --- a/wafo/kdetools/gridding.py +++ b/wafo/kdetools/gridding.py @@ -29,7 +29,7 @@ def bitget(int_type, offset): return np.bitwise_and(int_type, 1 << offset) >> offset -def accumsum_sparse(accmap, a, shape, dtype=None): +def accumsum_sparse(accmap, a, shape=None, dtype=None): """ Example ------- @@ -42,12 +42,21 @@ def accumsum_sparse(accmap, a, shape, dtype=None): >>> # Sum the diagonals. >>> accmap = array([[0,1,2],[2,0,1],[1,2,0]]) >>> s = accumsum_sparse(accmap, a, (3,)) - >>> np.allclose(s.toarray().T, [ 9, 7, 15]) + >>> np.allclose(s.toarray(), [ 9, 7, 15]) True + # group vals by idx and sum + >>> vals = array([12.0, 3.2, -15, 88, 12.9]) + >>> idx = array([1, 0, 1, 4, 1 ]) + >>> np.allclose(accumsum_sparse(idx, vals).toarray(), + ... [3.2, 9.9, 0, 0, 88.]) + True """ if dtype is None: dtype = a.dtype + if shape is None: + shape = 1 + np.squeeze(np.apply_over_axes(np.max, accmap, + axes=tuple(range(a.ndim)))) shape = np.atleast_1d(shape) if len(shape) > 1: binx = accmap[:, 0] @@ -58,12 +67,13 @@ def accumsum_sparse(accmap, a, shape, dtype=None): binx = accmap.ravel() zero = np.zeros(len(binx)) out = sparse.coo_matrix( - (a.ravel(), (binx, zero)), shape=(shape, 1), dtype=dtype).tocsr() + (a.ravel(), (binx, zero)), shape=(shape, 1), dtype=dtype).tocsr().T return out -def accumsum(accmap, a, shape): +def accumsum(accmap, a, shape=None): """ + Example ------- >>> from numpy import array @@ -74,11 +84,21 @@ def accumsum(accmap, a, shape): [-1, 8, 9]]) >>> accmap = array([[0,1,2],[2,0,1],[1,2,0]]) # Sum the diagonals. - >>> s = accumsum(accmap, a, (3,)) - >>> np.allclose(s, [ 9, 7, 15]) + >>> np.allclose(accumsum(accmap, a, (3,)), [ 9, 7, 15]) + True + >>> accmap = array([[0,1,2],[0,1,2],[0,1,2]]) # Sum the columns. + >>> np.allclose(accumsum(accmap, a, (3,)), [4, 9, 18]) True + # group vals by idx and sum + >>> vals = array([12.0, 3.2, -15, 88, 12.9]) + >>> idx = array([1, 0, 1, 4, 1 ]) + >>> np.allclose(accumsum(idx, vals), [3.2, 9.9, 0, 0, 88.]) + True """ + if shape is None: + shape = 1 + np.squeeze(np.apply_over_axes(np.max, accmap, + axes=tuple(range(a.ndim)))) return np.bincount(accmap.ravel(), a.ravel(), np.array(shape).max()) @@ -148,6 +168,9 @@ def accum(accmap, a, func=None, shape=None, fill_value=0, dtype=None): >>> accum(accmap, a, func=prod, dtype=float) array([[ -8., 18.], [ -8., 9.]]) + >>> accum(accmap, a, dtype=float) + array([[ 6., 9.], + [ 7., 9.]]) # Same accmap, but create an array of lists of values. >>> accum(accmap, a, func=lambda x: x, dtype='O') @@ -210,7 +233,7 @@ def _gridcount_nd(acfun, data, x, y, w, binx): fact2 = np.asarray(np.reshape(inc * np.arange(d), (d, -1)), dtype=int) fact1 = np.asarray(np.reshape(c_shape.cumprod() / inc, (d, -1)), dtype=int) bt0 = [0, 0] - X1 = x.ravel() + x1 = x.ravel() for ir in range(2 ** (d - 1)): bt0[0] = np.reshape(bitget(ir, np.arange(d)), (d, -1)) bt0[1] = 1 - bt0[0] @@ -222,7 +245,7 @@ def _gridcount_nd(acfun, data, x, y, w, binx): b1 = np.sum((binx + bt0[one]) * fact1, axis=0) bt2 = bt0[two] + fact2 b2 = binx + bt2 # linear index to X - c += acfun(b1, np.abs(np.prod(X1[b2] - data, axis=0)) * y, + c += acfun(b1, np.abs(np.prod(x1[b2] - data, axis=0)) * y, shape=(nc, )) c = np.reshape(c / w, c_shape, order='F') @@ -247,13 +270,18 @@ def gridcount(data, X, y=1): Parameters ---------- - data = column vectors with D-dimensional data, shape D x Nd - X = row vectors defining discretization, shape D x N - Must include the range of the data. + data : array-like + column vectors with D-dimensional data, shape D x Nd + X : array-like + row vectors defining discretization in each dimension, shape D x N. + The discretization must include the range of the data. + y : array-like + response data. Scalar or vector of size Nd. Returns ------- - c = gridcount, shape N x N x ... x N + c : ndarray + gridcount, shape N x N x ... x N GRIDCOUNT obtains the grid counts using linear binning. There are 2 strategies: simple- or linear- binning. @@ -298,7 +326,7 @@ def gridcount(data, X, y=1): See also -------- - bincount, accum, kdebin + bincount, accum, kde Reference ---------- @@ -306,28 +334,27 @@ def gridcount(data, X, y=1): 'Kernel smoothing' Chapman and Hall, pp 182-192 ''' - data2, x = np.atleast_2d(data, X) + dataset, x = np.atleast_2d(data, X) y = np.atleast_1d(y).ravel() d, inc = x.shape - _assert(d == data2.shape[0], 'Dimension 0 of data and X do not match.') + _assert(d == dataset.shape[0], 'Dimension 0 of data and X do not match.') - dx = np.diff(x[:, :2], axis=1) xlo, xup = x[:, 0], x[:, -1] - datlo, datup = data2.min(axis=1), data2.max(axis=1) + datlo, datup = dataset.min(axis=1), dataset.max(axis=1) _assert(not ((datlo < xlo) | (xup < datup)).any(), 'X does not include whole range of the data!') # acfun = accumsum_sparse # faster than accum acfun = accumsum # faster than accumsum_sparse - binx = np.asarray(np.floor((data2 - xlo[:, np.newaxis]) / dx), dtype=int) + dx = np.diff(x[:, :2], axis=1) + binx = np.asarray(np.floor((dataset - xlo[:, np.newaxis]) / dx), dtype=int) w = dx.prod() if d == 1: - return _gridcount_1d(acfun, data2, x, y, w, binx, inc) - # else: # d>1 - return _gridcount_nd(acfun, data2, x, y, w, binx) + return _gridcount_1d(acfun, dataset, x, y, w, binx, inc) + return _gridcount_nd(acfun, dataset, x, y, w, binx) if __name__ == '__main__': diff --git a/wafo/kdetools/kdetools.py b/wafo/kdetools/kdetools.py index 80d6454..ff781b7 100644 --- a/wafo/kdetools/kdetools.py +++ b/wafo/kdetools/kdetools.py @@ -247,7 +247,7 @@ class _KDE(object): if len(args) == 0: args = self.get_args() self.args = args - return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds) + return self.eval_grid_fun(self._eval_grid_fast, *args, **kwds) def _eval_grid_fast(self, *args, **kwds): pass @@ -273,7 +273,7 @@ class _KDE(object): if len(args) == 0: args = self.get_args() self.args = args - return self._eval_grid_fun(self._eval_grid, *args, **kwds) + return self.eval_grid_fun(self._eval_grid, *args, **kwds) def _eval_grid(self, *args, **kwds): pass @@ -301,7 +301,7 @@ class _KDE(object): self._add_contour_levels(wdata) return wdata - def _eval_grid_fun(self, eval_grd, *args, **kwds): + def eval_grid_fun(self, eval_grd, *args, **kwds): output = kwds.pop('output', 'value') f = eval_grd(*args, **kwds) if output == 'value': @@ -434,9 +434,11 @@ class TKDE(_KDE): t = np.trapz(f, x) """ - def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, - xmax=None, inc=512, L2=None): + def __init__(self, data, hs=None, kernel=None, alpha=0.0, + xmin=None, xmax=None, inc=512, L2=None): self.L2 = L2 +# self.dataset = data +# self.tkde = super(TKDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc) # @property @@ -445,6 +447,8 @@ class TKDE(_KDE): # # @dataset.setter # def dataset(self, data): +# self._dataset = atleast_2d(data) +# self._tdataset = self._dat2gaus(self._dataset) def _initialize(self): self._check_xmin() @@ -455,7 +459,8 @@ class TKDE(_KDE): xmax = self.xmax if xmax is not None: xmax = self._dat2gaus(np.reshape(xmax, (-1, 1))) - self.tkde = KDE(tdataset, self.hs, self.kernel, self.alpha, xmin, xmax, + self.tkde = KDE(tdataset, self.hs, self.kernel, self.alpha, + np.ravel(xmin), np.ravel(xmax), self.inc) if self.inc is None: self.inc = self.tkde.inc @@ -467,7 +472,7 @@ class TKDE(_KDE): L2 = np.atleast_1d(self.L2) * np.ones(self.d) self.xmin = np.where(L2 != 1, np.maximum(self.xmin, amin / 100.0), - self.xmin).reshape((-1, 1)) + self.xmin) def _dat2gaus(self, points): if self.L2 is None: @@ -527,7 +532,7 @@ class TKDE(_KDE): The values evaluated at meshgrid(*args). """ - return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds) + return self.eval_grid_fun(self._eval_grid_fast, *args, **kwds) def _interpolate(self, points, f, *args, **kwds): ipoints = meshgrid(*args) # if self.d > 1 else args @@ -754,7 +759,7 @@ class KDE(_KDE): y = kwds.get('y', 1.0) if self.alpha > 0: - y = y / self._lambda**d + warnings.warn('alpha parameter is not used for binned!') # Find the binned kernel weights, c. c = gridcount(self.dataset, X, y=y) @@ -922,16 +927,16 @@ class KRegression(object): # _KDE): self.tkde = TKDE(data, hs=hs, kernel=kernel, alpha=alpha, xmin=xmin, xmax=xmax, inc=inc, L2=L2) - self.y = y + self.y = np.atleast_1d(y) self.p = p def eval_grid_fast(self, *args, **kwds): self._grdfun = self.tkde.eval_grid_fast - return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds) + return self.tkde.eval_grid_fun(self._eval_gridfun, *args, **kwds) def eval_grid(self, *args, **kwds): self._grdfun = self.tkde.eval_grid - return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds) + return self.tkde.eval_grid_fun(self._eval_gridfun, *args, **kwds) def _eval_gridfun(self, *args, **kwds): grdfun = self._grdfun @@ -1017,7 +1022,30 @@ class BKRegression(object): xi = np.linspace(xmin - sml, xmax + sml, ni) return xi - def prb_ci(self, n, p, alpha=0.05, **kwds): + def _wilson_score(self, n, p, alpha): + # Wilson score + z0 = -_invnorm(alpha / 2) + den = 1 + (z0 ** 2. / n) + xc = (p + (z0 ** 2) / (2 * n)) / den + halfwidth = (z0 * sqrt((p * (1 - p) / n) + + (z0 ** 2 / (4 * (n ** 2))))) / den + plo = xc - halfwidth.clip(min=0) # wilson score + pup = xc + halfwidth.clip(max=1.0) # wilson score + return plo, pup + + def _credible_interval(self, n, p, alpha): + # Jeffreys intervall a=b=0.5 + # st.beta.isf(alpha/2, y+a, n-y+b) y = n*p, n-y = n*(1-p) + a = self.a + b = self.b + st = scipy.stats + pup = np.where(p == 1, 1, + st.beta.isf(alpha / 2, n * p + a, n * (1 - p) + b)) + plo = np.where(p == 0, 0, + st.beta.isf(1 - alpha / 2, n * p + a, n * (1 - p) + b)) + return plo, pup + + def prb_ci(self, n, p, alpha=0.05): """Return Confidence Interval for the binomial probability p. Parameters @@ -1040,25 +1068,9 @@ class BKRegression(object): """ if self.method.startswith('w'): - # Wilson score - z0 = -_invnorm(alpha / 2) - den = 1 + (z0 ** 2. / n) - xc = (p + (z0 ** 2) / (2 * n)) / den - halfwidth = (z0 * sqrt((p * (1 - p) / n) + - (z0 ** 2 / (4 * (n ** 2))))) / den - plo = (xc - halfwidth).clip(min=0) # wilson score - pup = (xc + halfwidth).clip(max=1.0) # wilson score + plo, pup = self._wilson_score(n, p, alpha) else: - # Jeffreys intervall a=b=0.5 - # st.beta.isf(alpha/2, y+a, n-y+b) y = n*p, n-y = n*(1-p) - a = self.a - b = self.b - st = scipy.stats - pup = np.where(p == 1, 1, - st.beta.isf(alpha / 2, n * p + a, n * (1 - p) + b)) - plo = np.where(p == 0, 0, - st.beta.isf(1 - alpha / 2, - n * p + a, n * (1 - p) + b)) + plo, pup = self._credible_interval(n, p, alpha) return plo, pup def prb_empirical(self, xi=None, hs_e=None, alpha=0.05, color='r', **kwds): @@ -1088,12 +1100,12 @@ class BKRegression(object): y = self.y c = gridcount(x, xi) # + self.a + self.b # count data - if (y == 1).any(): + if np.any(y == 1): c0 = gridcount(x[y == 1], xi) # + self.a # count success else: c0 = np.zeros(np.shape(xi)) prb = np.where(c == 0, 0, c0 / (c + _TINY)) # assume prb==0 for c==0 - CI = np.vstack(self.prb_ci(c, prb, alpha, **kwds)) + CI = np.vstack(self.prb_ci(c, prb, alpha)) prb_e = PlotData(prb, xi, plotmethod='plot', plot_args=['.'], plot_kwds=dict(markersize=6, color=color, picker=5)) @@ -1213,7 +1225,7 @@ def kde_demo1(): for ix, h in enumerate(hVec): plt.figure(ix) kde = KDE(data, hs=h, kernel=kernel) - f2 = kde(x, output='plot', title='h_s = {0:2.2f}'.format(h), + f2 = kde(x, output='plot', title='h_s = {0:2.2f}'.format(float(h)), ylab='Density') f2.plot('k-') @@ -1278,8 +1290,8 @@ def kde_demo3(): plt.plot(data[0], data[1], '.') # plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable - - tkde = TKDE(data, L2=0.5) + hs = Kernel('gauss').get_smoothing(data**0.5) + tkde = TKDE(data, hs=hs, L2=0.5) ft = tkde.eval_grid_fast( output='plot', title='Transformation KDE', plotflag=1) @@ -1312,13 +1324,13 @@ def kde_demo4(N=50): f1 = kde1(output='plot', label='Ordinary KDE', plotflag=1) plt.figure(0) - f.plot('r', label='hns={0:g}'.format(kde.hs)) + f.plot('r', label='hns={0}'.format(kde.hs)) # plt.figure(2) - f1.plot('b', label='hisj={0:g}'.format(kde1.hs)) - x = np.linspace(-4, 4) - for loc in [-5, 5]: - plt.plot(x + loc, st.norm.pdf(x, 0, scale=1) / 2, 'k:', - label='True density') + f1.plot('b', label='hisj={0}'.format(kde1.hs)) + x = np.linspace(-9, 9) + plt.plot(x, (st.norm.pdf(x, loc=-5, scale=1) + + st.norm.pdf(x, loc=5, scale=1)) / 2, 'k:', + label='True density') plt.legend() @@ -1335,11 +1347,11 @@ def kde_demo5(N=500): st.norm.rvs(loc=-5, scale=1, size=(2, N,)))) kde = KDE(data, kernel=Kernel('gauss', 'hns')) f = kde(output='plot', plotflag=1, - title='Ordinary KDE (hns={0:s}'.format(str(kde.hs.tolist()))) + title='Ordinary KDE, hns={0:s}'.format(str(list(kde.hs)))) kde1 = KDE(data, kernel=Kernel('gauss', 'hisj')) f1 = kde1(output='plot', plotflag=1, - title='Ordinary KDE (hisj={0:s})'.format(str(kde1.hs.tolist()))) + title='Ordinary KDE, hisj={0:s}'.format(str(list(kde1.hs)))) plt.figure(0) plt.clf() @@ -1766,9 +1778,9 @@ def check_bkregression(): plt.ion() k = 0 for _i, n in enumerate([50, 100, 300, 600]): - x, y, fun1 = _get_data( - n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75) - bkreg = BKRegression(x, y) + x, y, fun1 = _get_data(n, symmetric=True, loc1=0.1, + scale1=0.6, scale2=0.75) + bkreg = BKRegression(x, y, a=0.05, b=0.05) fbest = bkreg.prb_search_best( hsfun='hste', alpha=0.05, color='g', label='Transit_D') @@ -1951,8 +1963,8 @@ if __name__ == '__main__': if False: test_docstrings(__file__) else: - kde_demo2() - # check_bkregression() + # kde_demo5() + check_bkregression() # check_regression_bin() # check_kreg_demo3() # check_kreg_demo4() diff --git a/wafo/kdetools/tests/test_kdetools.py b/wafo/kdetools/tests/test_kdetools.py index d44f769..9c9e08c 100644 --- a/wafo/kdetools/tests/test_kdetools.py +++ b/wafo/kdetools/tests/test_kdetools.py @@ -7,26 +7,26 @@ from __future__ import division import unittest import numpy as np from numpy.testing import assert_allclose -from numpy import array, inf +import wafo.objects as wo import wafo.kdetools as wk +import scipy.stats as st -class TestKdeTools(unittest.TestCase): +class TestKde(unittest.TestCase): def setUp(self): # N = 20 # data = np.random.rayleigh(1, size=(N,)) - self.data = array([0.75355792, 0.72779194, 0.94149169, 0.07841119, - 2.32291887, 1.10419995, 0.77055114, 0.60288273, - 1.36883635, 1.74754326, 1.09547561, 1.01671133, - 0.73211143, 0.61891719, 0.75903487, 1.8919469, - 0.72433808, 1.92973094, 0.44749838, 1.36508452]) + self.data = np.array([0.75355792, 0.72779194, 0.94149169, 0.07841119, + 2.32291887, 1.10419995, 0.77055114, 0.60288273, + 1.36883635, 1.74754326, 1.09547561, 1.01671133, + 0.73211143, 0.61891719, 0.75903487, 1.8919469, + 0.72433808, 1.92973094, 0.44749838, 1.36508452]) self.x = np.linspace(0, max(self.data) + 1, 10) def test0_KDE1D(self): data, x = self.data, self.x - # kde = wk.KDE(data, hs=0.5, alpha=0.5) kde0 = wk.KDE(data, hs=0.5, alpha=0.0, inc=16) @@ -36,7 +36,6 @@ class TestKdeTools(unittest.TestCase): 0.08270612, 0.02991145, 0.00720821]) fx = kde0.eval_grid(x, r=1) - assert_allclose(-fx, [0.11911419724002906, 0.13440000694772541, 0.044400116190638696, -0.0677695267531197, -0.09555596523854318, -0.07498819087690148, @@ -125,6 +124,13 @@ class TestKdeTools(unittest.TestCase): 0.16555235, 0.0541248]) assert_allclose(np.trapz(f, x), 0.97323338046725172) + f0 = kde(output='plot') + self.assertIsInstance(f0, wo.PlotData) + assert_allclose(np.trapz(f0.data, f0.args), 0.9319800260106625) + + f0 = kde.eval_grid_fast(output='plot') + self.assertIsInstance(f0, wo.PlotData) + assert_allclose(np.trapz(f0.data, f0.args), 0.9319799696210691) def test1a_KDE1D(self): data, x = self.data, self.x @@ -136,7 +142,15 @@ class TestKdeTools(unittest.TestCase): assert_allclose(np.trapz(f, x), 0.92938023659047952) - def test2a_KDE1D(self): + f0 = kde(output='plot') + self.assertIsInstance(f0, wo.PlotData) + assert_allclose(np.trapz(f0.data, f0.args), 0.9871189376720593) + + f0 = kde.eval_grid_fast(output='plot') + self.assertIsInstance(f0, wo.PlotData) + assert_allclose(np.trapz(f0.data, f0.args), 0.9962507385131669) + + def test2a_KDE_1D_hs_5_alpha_5(self): # data, x = self.data, self.x data = np.asarray([1, 2]) x = np.linspace(0, max(np.ravel(data)) + 1, 10) @@ -151,7 +165,7 @@ class TestKdeTools(unittest.TestCase): def test_KDE2D(self): # N = 20 # data = np.random.rayleigh(1, size=(2, N)) - data = array([ + data = np.array([ [0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399, 0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145, 0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821, @@ -176,269 +190,125 @@ class TestKdeTools(unittest.TestCase): ] assert_allclose(kde0.eval_grid_fast(x, x), t) - def test_gridcount_1D(self): - data, x = self.data, self.x - dx = x[1] - x[0] - c = wk.gridcount(data, x) - assert_allclose(c, [0.78762626, 1.77520717, 7.99190087, 4.04054449, - 1.67156643, 2.38228499, 1.05933195, 0.29153785, 0., - 0.]) - t = np.trapz(c / dx / len(data), x) - assert_allclose(t, 0.9803093435140049) - - def test_gridcount_2D(self): - N = 20 - # data = np.random.rayleigh(1, size=(2, N)) - data = array([ - [0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399, - 0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145, - 0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821, - 0.69800708, 0.58428735, 1.59128776, 2.05771405, 0.87021964], - [1.44080694, 0.39973751, 1.331243, 2.48895822, 1.18894158, - 1.40526085, 1.01967897, 0.81196474, 1.37978932, 2.03334689, - 0.870329, 1.25106862, 0.5346619, 0.47541236, 1.51930093, - 0.58861519, 1.19780448, 0.81548296, 1.56859488, 1.60653533]]) - - x = np.linspace(0, max(np.ravel(data)) + 1, 5) - dx = x[1] - x[0] - X = np.vstack((x, x)) - c = wk.gridcount(data, X) - assert_allclose(c, - [[0.38922806, 0.8987982, 0.34676493, 0.21042807, 0.], - [1.15012203, 5.16513541, 3.19250588, 0.55420752, 0.], - [0.74293418, 3.42517219, 1.97923195, 0.76076621, 0.], - [0.02063536, 0.31054405, 0.71865964, 0.13486633, 0.], - [0., 0., 0., 0., 0.]], 1e-5) - - t = np.trapz(np.trapz(c / (dx**2 * N), x), x) - assert_allclose(t, 0.9011618785736376) - - def test_gridcount_3D(self): - N = 20 - # data = np.random.rayleigh(1, size=(3, N)) - data = np.array([ - [0.932896, 0.89522635, 0.80636346, 1.32283371, 0.27125435, - 1.91666304, 2.30736635, 1.13662384, 1.73071287, 1.06061127, - 0.99598512, 2.16396591, 1.23458213, 1.12406686, 1.16930431, - 0.73700592, 1.21135139, 0.46671506, 1.3530304, 0.91419104], - [0.62759088, 0.23988169, 2.04909823, 0.93766571, 1.19343762, - 1.94954931, 0.84687514, 0.49284897, 1.05066204, 1.89088505, - 0.840738, 1.02901457, 1.0758625, 1.76357967, 0.45792897, - 1.54488066, 0.17644313, 1.6798871, 0.72583514, 2.22087245], - [1.69496432, 0.81791905, 0.82534709, 0.71642389, 0.89294732, - 1.66888649, 0.69036947, 0.99961448, 0.30657267, 0.98798713, - 0.83298728, 1.83334948, 1.90144186, 1.25781913, 0.07122458, - 2.42340852, 2.41342037, 0.87233305, 1.17537114, 1.69505988]]) - x = np.linspace(0, max(np.ravel(data)) + 1, 3) - dx = x[1] - x[0] - X = np.vstack((x, x, x)) - c = wk.gridcount(data, X) - assert_allclose(c, - [[[8.74229894e-01, 1.27910940e+00, 1.42033973e-01], - [1.94778915e+00, 2.59536282e+00, 3.28213680e-01], - [1.08429416e-01, 1.69571495e-01, 7.48896775e-03]], - [[1.44969128e+00, 2.58396370e+00, 2.45459949e-01], - [2.28951650e+00, 4.49653348e+00, 2.73167915e-01], - [1.10905565e-01, 3.18733817e-01, 1.12880816e-02]], - [[7.49265424e-02, 2.18142488e-01, 0.0], - [8.53886762e-02, 3.73415131e-01, 0.0], - [4.16196568e-04, 1.62218824e-02, 0.0]]]) - - t = np.trapz(np.trapz(np.trapz(c / dx**3 / N, x), x), x) - assert_allclose(t, 0.5164999727560187) - - def test_gridcount_4D(self): - - N = 20 - # data = np.random.rayleigh(1, size=(2, N)) - data = array([ - [0.38103275, 0.35083136, 0.90024207, 1.88230239, 0.96815399, - 0.57392873, 1.63367908, 1.20944125, 2.03887811, 0.81789145], - [0.69302049, 1.40856592, 0.92156032, 2.14791432, 2.04373821, - 0.69800708, 0.58428735, 1.59128776, 2.05771405, 0.87021964], - [1.44080694, 0.39973751, 1.331243, 2.48895822, 1.18894158, - 1.40526085, 1.01967897, 0.81196474, 1.37978932, 2.03334689], - [0.870329, 1.25106862, 0.5346619, 0.47541236, 1.51930093, - 0.58861519, 1.19780448, 0.81548296, 1.56859488, 1.60653533]]) - - x = np.linspace(0, max(np.ravel(data)) + 1, 3) - dx = x[1] - x[0] - X = np.vstack((x, x, x, x)) - c = wk.gridcount(data, X) - assert_allclose(c, - [[[[1.77163904e-01, 1.87720108e-01, 0.0], - [5.72573585e-01, 6.09557834e-01, 0.0], - [3.48549923e-03, 4.05931870e-02, 0.0]], - [[1.83770124e-01, 2.56357594e-01, 0.0], - [4.35845892e-01, 6.14958970e-01, 0.0], - [3.07662204e-03, 3.58312786e-02, 0.0]], - [[0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0]]], - [[[3.41883175e-01, 5.97977973e-01, 0.0], - [5.72071865e-01, 8.58566538e-01, 0.0], - [3.46939323e-03, 4.04056116e-02, 0.0]], - [[3.58861043e-01, 6.28962785e-01, 0.0], - [8.80697705e-01, 1.47373158e+00, 0.0], - [2.22868504e-01, 1.18008528e-01, 0.0]], - [[2.91835067e-03, 2.60268355e-02, 0.0], - [3.63686503e-02, 1.07959459e-01, 0.0], - [1.88555613e-02, 7.06358976e-03, 0.0]]], - [[[3.13810608e-03, 2.11731327e-02, 0.0], - [6.71606255e-03, 4.53139824e-02, 0.0], - [0.0, 0.0, 0.0]], - [[7.05946179e-03, 5.44614852e-02, 0.0], - [1.09099593e-01, 1.95935584e-01, 0.0], - [6.61257395e-02, 2.47717418e-02, 0.0]], - [[6.38695629e-04, 5.69610302e-03, 0.0], - [1.00358265e-02, 2.44053065e-02, 0.0], - [5.67244468e-03, 2.12498697e-03, 0.0]]]]) - - t = np.trapz(np.trapz(np.trapz(np.trapz(c / dx**4 / N, x), x), x), x) - assert_allclose(t, 0.21183518274521254) - - -class TestKernels(unittest.TestCase): - def setUp(self): - self.names = ['epanechnikov', 'biweight', 'triweight', 'logistic', - 'p1epanechnikov', 'p1biweight', 'p1triweight', - 'triangular', 'gaussian', 'rectangular', 'laplace'] - - def test_stats(self): - truth = { - 'biweight': (0.14285714285714285, 0.7142857142857143, 22.5), - 'logistic': (3.289868133696453, 1./6, 0.023809523809523808), - 'p1biweight': (0.14285714285714285, 0.7142857142857143, 22.5), - 'triangular': (0.16666666666666666, 0.6666666666666666, inf), - 'gaussian': (1, 0.28209479177387814, 0.21157109383040862), - 'epanechnikov': (0.2, 0.6, inf), - 'triweight': (0.1111111111111111, 0.8158508158508159, inf), - 'p1triweight': (0.1111111111111111, 0.8158508158508159, inf), - 'p1epanechnikov': (0.2, 0.6, inf), - 'rectangular': (0.3333333333333333, 0.5, inf), - 'laplace': (2, 0.25, inf)} - for name in self.names: - kernel = wk.Kernel(name) - assert_allclose(kernel.stats(), truth[name]) - # truth[name] = kernel.stats() - # print(truth) - - def test_norm_factors_1d(self): - truth = { - 'biweight': 1.0666666666666667, 'logistic': 1.0, - 'p1biweight': 1.0666666666666667, 'triangular': 1.0, - 'gaussian': 2.5066282746310002, 'epanechnikov': 1.3333333333333333, - 'triweight': 0.91428571428571426, 'laplace': 2, - 'p1triweight': 0.91428571428571426, - 'p1epanechnikov': 1.3333333333333333, 'rectangular': 2.0} - for name in self.names: - kernel = wk.Kernel(name) - assert_allclose(kernel.norm_factor(d=1, n=20), truth[name]) - # truth[name] = kernel.norm_factor(d=1, n=20) - - def test_effective_support(self): - truth = {'biweight': (-1.0, 1.0), 'logistic': (-7.0, 7.0), - 'p1biweight': (-1.0, 1.0), 'triangular': (-1.0, 1.0), - 'gaussian': (-4.0, 4.0), 'epanechnikov': (-1.0, 1.0), - 'triweight': (-1.0, 1.0), 'p1triweight': (-1.0, 1.0), - 'p1epanechnikov': (-1.0, 1.0), 'rectangular': (-1.0, 1.0), - 'laplace': (-7.0, 7.0)} - for name in self.names: - kernel = wk.Kernel(name) - assert_allclose(kernel.effective_support(), truth[name]) - # truth[name] = kernel.effective_support() - # print(truth) - # self.assertTrue(False) - - def test_that_kernel_is_a_pdf(self): - - for name in self.names: - kernel = wk.Kernel(name) - xmin, xmax = kernel.effective_support() - x = np.linspace(xmin, xmax, 4*1024+1) - m0 = kernel.norm_factor(d=1, n=1) - pdf = kernel(x)/m0 - # print(name) - # print(pdf[0], pdf[-1]) - # print(np.trapz(pdf, x) - 1) - assert_allclose(np.trapz(pdf, x), 1, 1e-2) - # self.assertTrue(False) - - -class TestSmoothing(unittest.TestCase): - def setUp(self): - self.data = np.array([ - [0.932896, 0.89522635, 0.80636346, 1.32283371, 0.27125435, - 1.91666304, 2.30736635, 1.13662384, 1.73071287, 1.06061127, - 0.99598512, 2.16396591, 1.23458213, 1.12406686, 1.16930431, - 0.73700592, 1.21135139, 0.46671506, 1.3530304, 0.91419104], - [0.62759088, 0.23988169, 2.04909823, 0.93766571, 1.19343762, - 1.94954931, 0.84687514, 0.49284897, 1.05066204, 1.89088505, - 0.840738, 1.02901457, 1.0758625, 1.76357967, 0.45792897, - 1.54488066, 0.17644313, 1.6798871, 0.72583514, 2.22087245], - [1.69496432, 0.81791905, 0.82534709, 0.71642389, 0.89294732, - 1.66888649, 0.69036947, 0.99961448, 0.30657267, 0.98798713, - 0.83298728, 1.83334948, 1.90144186, 1.25781913, 0.07122458, - 2.42340852, 2.41342037, 0.87233305, 1.17537114, 1.69505988]]) - self.gauss = wk.Kernel('gaussian') - - def test_hns(self): - hs = self.gauss.hns(self.data) - assert_allclose(hs, [0.18154437, 0.36207987, 0.37396219]) - - def test_hos(self): - hs = self.gauss.hos(self.data) - assert_allclose(hs, [0.195209, 0.3893332, 0.40210988]) - - def test_hms(self): - hs = self.gauss.hmns(self.data) - assert_allclose(hs, [[3.25196193e-01, -2.68892467e-02, 3.18932448e-04], - [-2.68892467e-02, 3.91283306e-01, 2.38654678e-02], - [3.18932448e-04, 2.38654678e-02, 4.05123874e-01]]) - hs = self.gauss.hmns(self.data[0]) - assert_allclose(hs, self.gauss.hns(self.data[0])) - - hs = wk.Kernel('epan').hmns(self.data) - assert_allclose(hs, - [[8.363847e-01, -6.915749e-02, 8.202747e-04], - [-6.915749e-02, 1.006357e+00, 6.138052e-02], - [8.202747e-04, 6.138052e-02, 1.041954e+00]], - rtol=1e-5) - hs = wk.Kernel('biwe').hmns(self.data[:2]) - assert_allclose(hs, [[0.868428, -0.071705], - [-0.071705, 1.04685]], rtol=1e-5) - hs = wk.Kernel('triwe').hmns(self.data[:2]) - assert_allclose(hs, [[0.975375, -0.080535], - [-0.080535, 1.17577]], rtol=1e-5) - self.assertRaises(NotImplementedError, - wk.Kernel('biwe').hmns, self.data) - self.assertRaises(NotImplementedError, - wk.Kernel('triwe').hmns, self.data) - self.assertRaises(NotImplementedError, - wk.Kernel('triangular').hmns, self.data) - - def test_hscv(self): - hs = self.gauss.hscv(self.data) - assert_allclose(hs, [0.1656318800590673, 0.3273938258112911, - 0.31072126996412214]) - - def test_hstt(self): - hs = self.gauss.hstt(self.data) - assert_allclose(hs, [0.18099075, 0.50409881, 0.11018912]) - - def test_hste(self): - hs = self.gauss.hste(self.data) - assert_allclose(hs, [0.17035204677390572, 0.29851960273788863, - 0.186685349741972]) - - def test_hldpi(self): - hs = self.gauss.hldpi(self.data) - assert_allclose(hs, [0.1732289, 0.33159097, 0.3107633]) - - def test_hisj(self): - hs = self.gauss.hisj(self.data) - assert_allclose(hs, [0.29542502, 0.74277133, 0.51899114]) +class TestRegression(unittest.TestCase): + def test_KRegression(self): + + N = 51 + x = np.linspace(0, 1, N) + # ei = np.random.normal(loc=0, scale=0.075, size=(N,)) + ei = [0.0514233500271586, 0.00165101982431131, 0.042827107319028994, + -0.084351702283385, 0.05978024392552100, -0.07121894535738457, + 0.0855578119920183, -0.0061865198365448, 0.060986773136137415, + 0.0467717713275598, -0.0852368434029634, 0.09790798995780517, + -0.174003547831554, 0.1100349974247687, 0.12934695904976257, + -0.036688944487546, -0.0279545148054110, 0.09660222791922815, + -0.108463847524115, -0.0635162550551463, 0.017192887741329627, + -0.031520480101878, 0.03939880367791403, -0.06343921941793985, + 0.0574763321274059, -0.1186005160931940, 0.023007133904660495, + 0.0572646924609536, -0.0334012844057809, -0.03444460758658313, + 0.0325434547422866, 0.06063111859444784, 0.0010264474321885913, + -0.162288671571205, 0.01334616853351956, -0.020490428895193084, + 0.0446047497979159, 0.02924587567502737, 0.021177586536616458, + 0.0634083218094540, -0.1506377646036794, -0.03214553797245153, + 0.1850745187671265, -0.0151240946088902, -0.10599562843454335, + 0.0317357805015679, -0.0736187558312158, 0.04791463883941161, + 0.0660021138871709, -0.1049359954387588, 0.0034961490852392463] + # print(ei.tolist()) + + y = 2*np.exp(-x**2/(2*0.3**2))+3*np.exp(-(x-1)**2/(2*0.7**2)) + ei + kreg = wk.KRegression(x, y) + f = kreg(output='plotobj', title='Kernel regression', plotflag=1) + + assert_allclose(f.data[::5], + [3.14313544673463, 3.14582567119112, 3.149199078830904, + 3.153335095194225, 3.15813722171621, 3.16302709623568, + 3.16631430398602, 3.164138775969285, 3.14947062082316, + 3.11341295908516, 3.05213808272656, 2.976097561057097, + 2.908020176929025, 2.867826513276857, 2.8615179445705, + 2.88155232529645, 2.91307482047679, 2.942469210090470, + 2.96350144269953, 2.976399025328952, 2.9836554385038, + 2.987516554300354, 2.9894470264681, 2.990311688080114, + 2.9906144224522406, 2.9906534916935743]) + + def test_BKRegression(self): + from wafo.kdetools.kdetools import _get_data + n = 51 + loc1 = 0.1 + scale1 = 0.6 + scale2 = 0.75 + # x, y, fun1 = _get_data(n, symmetric=True, loc1=loc1, + # scale1=scale1, scale2=scale2) + # print(x.tolist()) + # print(y.tolist()) +# dist = st.norm +# norm1 = scale2 * (dist.pdf(-loc1, loc=-loc1, scale=scale1) + +# dist.pdf(-loc1, loc=loc1, scale=scale1)) +# def fun1(x): +# return ((dist.pdf(x, loc=-loc1, scale=scale1) + +# dist.pdf(x, loc=loc1, scale=scale1)) / norm1).clip(max=1.0) + x = [-2.9784022156693037, -2.923269270862857, -2.640625797489305, + -2.592465150170373, -2.5777471766751514, -2.5597898266706323, + -2.5411937415815604, -2.501753472506631, -2.4939048380402378, + -2.4747969073957368, -2.3324036659351286, -2.3228634370815, + -2.230871371173083, -2.21411949373986, -2.2035967461005335, + -2.1927287694263082, -2.1095391808427064, -2.0942500415622503, + -2.0774862883018708, -2.0700940505412, -2.054918428555726, + -1.979624045501378, -1.815804869116454, -1.780636214263252, + -1.7494324035239686, -1.723149182957688, -1.7180532497996817, + -1.7016701153705522, -1.6120633534061788, -1.5862592143187193, + -1.517561220921166, -1.5017798665502253, -1.4895432407186429, + -1.4470094450898578, -1.4302454657287063, -1.3243060491576388, + -1.293989140781724, -1.2570066577415648, -1.2332757902347795, + -1.2306697417054666, -1.0495284321772482, -0.9923351727665026, + -0.9047559818364217, -0.4092063139968012, -0.3845725606766721, + -0.30700232234899083, -0.2565844426798063, -0.25415109620097187, + -0.20223029999069952, -0.10388696244007978, -0.07822191388462896, + 0.07822191388462896, 0.10388696244007978, 0.20223029999069952, + 0.25415109620097187, 0.2565844426798063, 0.30700232234899083, + 0.3845725606766721, 0.4092063139968012, 0.9047559818364217, + 0.9923351727665026, 1.0495284321772482, 1.2306697417054666, + 1.2332757902347795, 1.2570066577415648, 1.293989140781724, + 1.3243060491576388, 1.4302454657287063, 1.4470094450898578, + 1.4895432407186429, 1.5017798665502253, 1.517561220921166, + 1.5862592143187193, 1.6120633534061788, 1.7016701153705522, + 1.7180532497996817, 1.723149182957688, 1.7494324035239686, + 1.780636214263252, 1.815804869116454, 1.979624045501378, + 2.054918428555726, 2.0700940505412, 2.0774862883018708, + 2.0942500415622503, 2.1095391808427064, 2.1927287694263082, + 2.2035967461005335, 2.21411949373986, 2.230871371173083, + 2.3228634370815, 2.3324036659351286, 2.4747969073957368, + 2.4939048380402378, 2.501753472506631, 2.5411937415815604, + 2.5597898266706323, 2.5777471766751514, 2.592465150170373, + 2.640625797489305, 2.923269270862857, 2.9784022156693037] + y = [False, False, False, False, False, False, False, False, False, + False, False, False, False, False, False, False, False, False, + False, False, False, False, False, False, False, False, False, + False, False, False, False, False, False, False, False, False, + False, False, False, False, False, False, True, True, True, True, + True, True, True, True, True, True, True, True, True, True, True, + True, True, True, False, False, False, False, False, False, False, + False, False, False, False, False, False, False, False, False, + False, False, False, False, False, False, False, False, False, + False, False, False, False, False, False, False, False, False, + False, False, False, False, False, False, False, False] + + bkreg = wk.BKRegression(x, y, a=0.05, b=0.05) + fbest = bkreg.prb_search_best(hsfun='hste', alpha=0.05, color='g') + # print(fbest.data[::10]) + assert_allclose(fbest.data[::10], + [1.80899736e-15, 0, 6.48351162e-16, 6.61404311e-15, + 1.10010120e-12, 1.36709203e-10, 1.11994766e-08, + 5.73040143e-07, 1.68974054e-05, 2.68633448e-04, + 2.49075176e-03, 1.48687767e-02, 5.98536245e-02, + 1.74083352e-01, 4.33339557e-01, 8.26039018e-01, + 9.78387628e-01, 9.98137653e-01, 9.99876002e-01, + 9.99876002e-01, 9.98137653e-01, 9.78387628e-01, + 8.26039018e-01, 4.33339557e-01, 1.74083352e-01, + 5.98536245e-02, 1.48687767e-02, 2.49075176e-03, + 2.68633448e-04, 1.68974054e-05, 5.73040143e-07, + 1.11994760e-08, 1.36708818e-10, 1.09965904e-12, + 5.43806309e-15, 0.0, 0, 0]) if __name__ == "__main__": # import sys;sys.argv = ['', 'Test.testName']