diff --git a/wafo/kdetools/kdetools.py b/wafo/kdetools/kdetools.py index f6981ed..a943dba 100644 --- a/wafo/kdetools/kdetools.py +++ b/wafo/kdetools/kdetools.py @@ -34,7 +34,7 @@ except ImportError: warnings.warn('fig import only supported on Windows') __all__ = ['TKDE', 'KDE', 'kde_demo1', 'kde_demo2', 'test_docstrings', - 'KRegression', 'KDEgauss'] + 'KRegression', 'BKRegression'] def _assert(cond, msg): @@ -439,6 +439,13 @@ class TKDE(_KDE): self.L2 = L2 super(TKDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc) +# @property +# def dataset(self): +# return self._dataset +# +# @dataset.setter +# def dataset(self, data): + def _initialize(self): self._check_xmin() tdataset = self._dat2gaus(self.dataset) @@ -761,7 +768,7 @@ class KDE(_KDE): def _eval_grid(self, *args, **kwds): - grd = meshgrid(*args) if len(args) > 1 else list(args) + grd = meshgrid(*args) shape0 = grd[0].shape d = len(grd) for i in range(d): @@ -838,82 +845,7 @@ class KDE(_KDE): return self._loop_over_points(self.dataset, points, y, r) -class KDEgauss(KDE): - - """ Kernel-Density Estimator base class. - - data : (# of dims, # of data)-array - datapoints to estimate from - hs : array-like (optional) - smooting parameter vector/matrix. - (default compute from data using kernel.get_smoothing function) - kernel : kernel function object. - kernel must have get_smoothing method - alpha : real scalar (optional) - sensitivity parameter (default 0 regular KDE) - A good choice might be alpha = 0.5 ( or 1/D) - alpha = 0 Regular KDE (hs is constant) - 0 < alpha <= 1 Adaptive KDE (Make hs change) - xmin, xmax : vectors - specifying the default argument range for the kde.eval_grid methods. - For the kde.eval_grid_fast methods the values must cover the range of - the data. - (default min(data)-range(data)/4, max(data)-range(data)/4) - If a single value of xmin or xmax is given, then the boundary is the - the same for all dimensions. - inc : scalar integer (default 512) - defining the default dimension of the output from kde.eval_grid methods - (For kde.eval_grid_fast: A value below 50 is very fast to compute but - may give some inaccuracies. Values between 100 and 500 give very - accurate results) - - Members - ------- - d : int - number of dimensions - n : int - number of datapoints - - Methods - ------- - kde.eval_grid_fast(x0, x1,..., xd) : array - evaluate the estimated pdf on meshgrid(x0, x1,..., xd) - kde(x0, x1,..., xd) : array - same as kde.eval_grid_fast(x0, x1,..., xd) - """ - def _eval_grid_fast(self, *args, **kwds): - X = np.vstack(args) - d, inc = X.shape - # dx = X[:, 1] - X[:, 0] - R = X.max(axis=-1) - X.min(axis=-1) - - t_star = (self.hs / R) ** 2 - I = (np.asfarray(np.arange(0, inc)) * pi) ** 2 - In = [] - for i in range(d): - In.append(I * t_star[i] * 0.5) - - r = kwds.get('r', 0) - fun = self._moment_fun(r) - - Inc = meshgrid(*In) if d > 1 else In - kw = np.zeros((inc,) * d) - for i in range(d): - kw += exp(-Inc[i]) * fun(Inc[i]) - - y = kwds.get('y', 1.0) - d, n = self.dataset.shape - # Find the binned kernel weights, c. - c = gridcount(self.dataset, X, y=y) - # Perform the convolution. - at = dctn(c) * kw / n - z = idctn(at) * (at.size-1) / np.prod(R) - return z * (z > 0.0) - - __call__ = _KDE.eval_grid_fast - - -class KRegression(_KDE): +class KRegression(object): # _KDE): """ Kernel-Regression @@ -1325,17 +1257,17 @@ def kde_demo2(): x = np.linspace(1.5e-2, 5, 55) kde = KDE(data) - f = kde(output='plot', title='Ordinary KDE (hs={0:g})'.format(kde.hs)) + f = kde(output='plot', title='Ordinary KDE (hs={0:})'.format(kde.hs)) plt.figure(0) f.plot() plt.plot(x, st.rayleigh.pdf(x, scale=1), ':') # plotnorm((data).^(L2)) # gives a straight line => L2 = 0.5 reasonable - - tkde = TKDE(data, L2=0.5) + hs = Kernel('gauss').get_smoothing(data**0.5) + tkde = TKDE(data, hs=hs, L2=0.5) ft = tkde(x, output='plot', - title='Transformation KDE (hs={0:g})'.format(tkde.tkde.hs)) + title='Transformation KDE (hs={0:})'.format(tkde.tkde.hs)) plt.figure(1) ft.plot() @@ -1507,7 +1439,6 @@ def _logitinv(x): def _get_data(n=100, symmetric=False, loc1=1.1, scale1=0.6, scale2=1.0): st = scipy.stats - # from sg_filter import SavitzkyGolay dist = st.norm norm1 = scale2 * (dist.pdf(-loc1, loc=-loc1, scale=scale1) + @@ -1575,8 +1506,8 @@ def kreg_demo3(x, y, fun1, hs=None, fun='hisj', plotlog=False): df = np.diff(fiii) eerr = np.abs((yiii - fiii)).std() + 0.5 * (df[:-1] * df[1:] < 0).sum() / n err = (fiii - fit).std() - msg = '{} err={1:1.3f},eerr={2:1.3f}, n={:d}, hs={:1.3f}, hs1={:1.3f}, '\ - 'hs2={:1.3f}' + msg = '{0} err={1:1.3f},eerr={2:1.3f}, n={3:d}, hs={4:}, hs1={5:}, '\ + 'hs2={6:}' title = (msg.format(fun, err, eerr, n, hs, hs1, hs2)) f = kreg(xiii, output='plotobj', title=title, plotflag=1) @@ -1684,8 +1615,8 @@ def kreg_demo3(x, y, fun1, hs=None, fun='hisj', plotlog=False): label='{:d} CI2'.format(int(100 * (1 - alpha)))) plt.plot(xiii, fun1(xiii), 'r', label='True model') plt.scatter(xi, yi, label='data') - print('maxp = {:g}'.format(np.nanmax(f.data))) - print('hs = {:g}'.format(kreg.tkde.tkde.hs)) + print('maxp = {}'.format(np.nanmax(f.data))) + print('hs = {}'.format(kreg.tkde.tkde.hs)) plt.legend() h = plt.gca() if plotlog: @@ -1875,7 +1806,7 @@ def check_bkregression(): fig.tile(range(0, k)) plt.ioff() - plt.show() + plt.show('hold') def _get_regression_smooting(x, y, fun='hste'): @@ -1999,8 +1930,8 @@ def smoothed_bin_prb(x, y, hs, hopt, alpha=0.05, color='r', label='', f.aicc = aicc f.fun = kreg - f.labels.title = ('perr={:1.3f},aicc={:1.3f}, n={:d}, ' - 'hs={:1.3f}'.format(f.prediction_error_avg, aicc, n, hs)) + ttl = "perr={0:1.3f}, aicc={1:1.3f}, n={2:d}, hs={3}" + f.labels.title = ttl.format(f.prediction_error_avg, aicc, n, hs) return f @@ -2033,114 +1964,17 @@ def regressionbin(x, y, alpha=0.05, color='r', label=''): return fbest -def kde_gauss_demo(n=50): - """KDEDEMO Demonstrate the KDEgauss. - - KDEDEMO1 shows the true density (dotted) compared to KDE based on 7 - observations (solid) and their individual kernels (dashed) for 3 - different values of the smoothing parameter, hs. - - """ - - st = scipy.stats - # x = np.linspace(-4, 4, 101) - # data = np.random.normal(loc=0, scale=1.0, size=n) - # data = np.random.exponential(scale=1.0, size=n) -# n1 = 128 -# I = (np.arange(n1)*pi)**2 *0.01*0.5 -# kw = exp(-I) -# plt.plot(idctn(kw)) -# return - dist = st.norm - # dist = st.expon - data = dist.rvs(loc=0, scale=1.0, size=n) - d, _N = np.atleast_2d(data).shape - - if d == 1: - plot_options = [dict(color='red', label='KDE hste'), - dict(color='green', label='TKDE hisj'), - dict(color='black', label='KDEgauss hste')] - else: - plot_options = [dict(colors='red'), dict(colors='green'), - dict(colors='black')] - - plt.figure(1) - t0 = time.time() - kde0 = KDE(data, kernel=Kernel('gauss', 'hste')) - f0 = kde0.eval_grid_fast(output='plot', ylab='Density', r=0) - t1 = time.time() - total1 = t1-t0 - - f0.plot('.', **plot_options[0]) - if dist.name != 'norm': - kde1 = TKDE(data, kernel=Kernel('gauss', 'hisj'), L2=.5) - f1 = kde1.eval_grid_fast(output='plot', ylab='Density', r=0) - f1.plot(**plot_options[1]) - else: - kde1 = kde0 - f1 = f0 - t1 = time.time() - kde2 = KDEgauss(data) - f2 = kde2(output='plot', ylab='Density', r=0) - t2 = time.time() - total2 = t2-t1 - - x = f2.args - f2.plot(**plot_options[2]) - - fmax = dist.pdf(x, 0, 1).max() - if d == 1: - plt.plot(x, dist.pdf(x, 0, 1), 'k:', label='True pdf') - plt.axis([x.min(), x.max(), 0, fmax]) - plt.legend() - plt.show() - print(fmax / f2.data.max()) - try: - print('hs0={:s} hs1={:s} hs2={:s}'.format(str(kde0.hs.tolist()), - str(kde1.tkde.hs.tolist()), - str(kde2.hs.tolist()))) - except: - pass - print('inc0 = {:d}, inc1 = {:d}, inc2 = {:d}'.format(kde0.inc, kde1.inc, - kde2.inc)) - print(np.trapz(f0.data, f0.args), np.trapz(f2.data, f2.args)) - print(total1, total2) - - -def test_kde(): - data = np.array([ - 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887, - 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326, - 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487, - 1.8919469, 0.72433808, 1.92973094, 0.44749838, 1.36508452]) - - x = np.linspace(0.01, max(data + 1), 10) - kde = TKDE(data, hs=0.5, L2=0.5) - _f = kde(x) - # f = array([1.03982714, 0.45839018, 0.39514782, 0.32860602, 0.26433318, - # 0.20717946, 0.15907684, 0.1201074 , 0.08941027, 0.06574882]) - - _f1 = kde.eval_grid(x) - # array([ 1.03982714, 0.45839018, 0.39514782, 0.32860602, 0.26433318, - # 0.20717946, 0.15907684, 0.1201074 , 0.08941027, 0.06574882]) - - _f2 = kde.eval_grid_fast(x) - # array([ 1.06437223, 0.46203314, 0.39593137, 0.32781899, 0.26276433, - # 0.20532206, 0.15723498, 0.11843998, 0.08797755, 0. ]) - - if __name__ == '__main__': - if True: + if False: test_docstrings(__file__) else: - # test_kde() + kde_demo2() # check_bkregression() # check_regression_bin() # check_kreg_demo3() # check_kreg_demo4() - # kde_demo2() # kreg_demo1(fast=True) - kde_gauss_demo(n=50) + # kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True) plt.show('hold') diff --git a/wafo/tests/test_kdetools.py b/wafo/tests/test_kdetools.py index bce854e..d44f769 100644 --- a/wafo/tests/test_kdetools.py +++ b/wafo/tests/test_kdetools.py @@ -80,59 +80,6 @@ class TestKdeTools(unittest.TestCase): 0.11072988772879173, 0.05992730870218242, 0.02687783924833738, 0.00974982785617795]) - def skiptest0_KDEgauss_1D(self): - data, x = self.data, self.x - # kde = wk.KDE(data, hs=0.5, alpha=0.5) - - kde0 = wk.KDEgauss(data, hs=0.5, alpha=0.0, inc=16) - - fx = kde0.eval_grid(x) - assert_allclose(fx, [0.2039735, 0.40252503, 0.54595078, - 0.52219649, 0.3906213, 0.26381501, 0.16407362, - 0.08270612, 0.02991145, 0.00720821]) - - fx = kde0.eval_grid(x, r=1) - - assert_allclose(-fx, [0.11911419724002906, 0.13440000694772541, - 0.044400116190638696, -0.0677695267531197, - -0.09555596523854318, -0.07498819087690148, - -0.06167607128369182, -0.04678588231996062, - -0.024515979196411814, -0.008022010381009501]) - - fx = kde0.eval_grid(x, r=2) - assert_allclose(fx, [0.08728138131197069, 0.07558648034784508, - 0.05093715852686607, 0.07908624791267539, - 0.10495675573359599, 0.07916167222333347, - 0.048168330179460386, 0.03438361415806721, - 0.02197927811015286, 0.009222988165160621]) - - ffx = kde0.eval_grid_fast(x) - # print(ffx.tolist()) - assert_allclose(ffx, [0.20729484, 0.39865044, 0.53716945, 0.5169322, - 0.39060223, 0.26441126, 0.16388801, 0.08388527, - 0.03227164, 0.00883579], 1e-6) - - fx = kde0.eval_grid_fast(x, r=1) - assert_allclose(fx, [-0.11582450668441863, -0.12901768780183628, - -0.04402464127812092, 0.0636190549560749, - 0.09345144501310157, 0.07573621607126926, - 0.06149475587201987, 0.04550210608639078, - 0.024427027615689087, 0.00885576504750473]) - - fx = kde0.eval_grid_fast(x, r=2) - assert_allclose(fx, [0.08499284131672676, 0.07572564161758065, - 0.05329987919556978, 0.07849796347259348, - 0.10232741197885842, 0.07869015379158453, - 0.049431823916945394, 0.034527256372343613, - 0.021517998409663567, 0.009527401063843402]) - - f = kde0.eval_grid_fast() - assert_allclose(f, [0.06807544, 0.12949095, 0.21985421, 0.33178031, - 0.44334874, 0.52429234, 0.55140336, 0.52221323, - 0.45500674, 0.3752208, 0.30046799, 0.235667, - 0.17854402, 0.12721305, 0.08301993, 0.04862324]) - assert_allclose(np.trapz(f, kde0.args), 0.96716261) - def test1_TKDE1D(self): data = self.data x = np.linspace(0.01, max(data) + 1, 10)