From 663636eecee89e9dba458c46df1e1a640b41a2cc Mon Sep 17 00:00:00 2001 From: "Per.Andreas.Brodtkorb" Date: Tue, 6 Dec 2011 16:22:19 +0000 Subject: [PATCH] Fixed a bug in KRegression --- pywafo/src/wafo/kdetools.py | 40 +++++++++---- pywafo/src/wafo/stats/core.py | 106 ++++++++++++++++++++++++++++++---- 2 files changed, 126 insertions(+), 20 deletions(-) diff --git a/pywafo/src/wafo/kdetools.py b/pywafo/src/wafo/kdetools.py index 103e738..630e4fb 100644 --- a/pywafo/src/wafo/kdetools.py +++ b/pywafo/src/wafo/kdetools.py @@ -211,8 +211,6 @@ class KDEgauss(object): at = dctn(c) * kw z = idctn(at)*at.size/np.prod(R) return z*(z>0.0) - #ix = (slice(0, inc),)*d - #return z[ix] * (z[ix] > 0.0) def _eval_grid_fun(self, eval_grd, *args, **kwds): output = kwds.pop('output', 'value') @@ -655,7 +653,11 @@ class TKDE(_KDE): fill_value=0.0) #fi.shape = shape0i self.args = args - return fi*(fi>0) + r = kwds.get('r', 0) + if r==0: + return fi*(fi>0) + else: + return fi return f def _eval_grid(self, *args, **kwds): if self.L2 is None: @@ -869,13 +871,13 @@ class KDE(_KDE): norm_fact0 = (kw.sum()*dx.prod()*self.n) norm_fact = (self._norm_factor * self.kernel.norm_factor(d, self.n)) if np.abs(norm_fact0-norm_fact)>0.05*norm_fact: - warnings.warn('Numerical inaccuracy due to too low discretization. Increase the discretization (inc=%d)!' % self.inc) + warnings.warn('Numerical inaccuracy due to too low discretization. Increase the discretization of the evaluation grid (inc=%d)!' % inc) norm_fact = norm_fact0 kw = kw/norm_fact r = kwds.get('r', 0) if r!=0: - kw *= np.vstack(Xnc) ** r + kw *= np.vstack(Xnc) ** r if d>1 else Xnc[0] kw.shape = shape0 kw = np.fft.ifftshift(kw) fftn = np.fft.fftn @@ -891,8 +893,10 @@ class KDE(_KDE): z = np.real(ifftn(fftn(c, s=nfft) * fftn(kw))) ix = (slice(0, inc),)*d - return z[ix] * (z[ix] > 0.0) - + if r==0: + return z[ix] * (z[ix] > 0.0) + else: + return z[ix] def _eval_grid(self, *args, **kwds): grd = meshgrid(*args) if len(args) > 1 else list(args) @@ -2957,7 +2961,8 @@ def kreg_demo1(hs=None, fast=False, fun='hisj'): y0 = 2*np.exp(-x**2/(2*0.3**2))+3*np.exp(-(x-1)**2/(2*0.7**2)) y = y0 + ei kernel = Kernel('gauss',fun=fun) - kreg = KRegression(x, y, p=0, hs=hs, kernel=kernel) + hopt = kernel.hisj(x) + kreg = KRegression(x, y, p=0, hs=hs, kernel=kernel, xmin=-2*hopt, xmax=1+2*hopt) if fast: kreg.__call__ = kreg.eval_grid_fast @@ -2969,14 +2974,26 @@ def kreg_demo1(hs=None, fast=False, fun='hisj'): kreg.p=1 f1 = kreg(output='plot', title='Kernel regression', plotflag=1) f1.plot(label='p=1') - plt.plot(x,y,'.', x,y0, 'k') + print(f1.data) + plt.plot(x, y, '.', label='data') + plt.plot(x, y0, 'k', label='True model') plt.legend() plt.show() print(kreg.tkde.tkde.inv_hs) print(kreg.tkde.tkde.hs) +def kreg_demo2(n=100): + + + x = np.sort(5*np.random.rand(n,1)-2.5, axis=0) + y = (np.cos(x)>2*np.random.rand(n, 1)-1).ravel() + kreg = KRegression(x.ravel(),y) + f = kreg(output='plotobj', title='Kernel regression', plotflag=1) + f.plot() + plt.show() + def kde_gauss_demo(n=50): ''' KDEDEMO Demonstrate the KDEgauss @@ -3029,6 +3046,8 @@ def kde_gauss_demo(n=50): format_ = 'hs0=%s hs1=%s hs2=%s' % (format_, format_, format_) print(format_ % tuple(kde0.hs.tolist()+kde1.tkde.hs.tolist()+kde2.hs.tolist())) print('inc0 = %d, inc1 = %d, inc2 = %d' % (kde0.inc, kde1.inc,kde2.inc)) + + def test_docstrings(): import doctest doctest.testmod() @@ -3038,4 +3057,5 @@ if __name__ == '__main__': #test_docstrings() #kde_demo2() #kreg_demo1(fast=True) - kde_gauss_demo() \ No newline at end of file + #kde_gauss_demo() + kreg_demo1() \ No newline at end of file diff --git a/pywafo/src/wafo/stats/core.py b/pywafo/src/wafo/stats/core.py index b9a1440..53b9a9e 100644 --- a/pywafo/src/wafo/stats/core.py +++ b/pywafo/src/wafo/stats/core.py @@ -873,13 +873,10 @@ class RegLogit(object): nulldev = self.loglike (tb0, y, X, z, z1)[0] else: nulldev = dev - - + # maximize likelihood using Levenberg modified Newton's method - iter = 0; - stop = False - while not stop: - iter += 1 + for i in range(self.maxiter+1): + tbold = tb; devold = dev; tb = tbold - np.linalg.lstsq(d2l, dl)[0] @@ -907,7 +904,9 @@ class RegLogit(object): print(np.linalg.eig(d2l)[0].T); #end #end - stop = np.abs(np.dot(dl, np.linalg.lstsq(d2l, dl)[0]) / len(dl)) <= tol or iter>self.maxiter + stop = np.abs(np.dot(dl, np.linalg.lstsq(d2l, dl)[0]) / len(dl)) <= tol + if stop: + break #end %while #% tidy up output @@ -975,8 +974,8 @@ class RegLogit(object): self.dispersion = 1; self.R2 = R2; self.R2adj = R2adj; - self.numiter = iter; - self.converged = iter2*np.random.rand(n,1)-1) + b = RegLogit() + b.fit(y,x) + #b.display() #% members and methods + b.summary() + [mu,plo,pup] = b.predict(fulloutput=True); + import matplotlib.pyplot as pl + pl.plot(x,mu,'g',x,plo,'r:',x,pup,'r:') + pl.show() + +def test_sklearn0(): + from sklearn.linear_model import LogisticRegression + from sklearn import datasets + + # FIXME: the iris dataset has only 4 features! + iris = datasets.load_iris() + X = iris.data + y = iris.target + + X = np.sort(5*np.random.rand(40, 1)-2.5, axis=0) + y = (2*(np.cos(X)>2*np.random.rand(40, 1)-1)-1).ravel() + + score = [] + # Set regularization parameter + cvals = np.logspace(-1,1,5) + for C in cvals: + clf_LR = LogisticRegression(C=C, penalty='l2') + clf_LR.fit(X, y) + score.append(clf_LR.score(X,y)) + + plot(cvals, score) + +def test_sklearn(): + X = np.sort(5*np.random.rand(40, 1)-2.5, axis=0) + y = (2*(np.cos(X)>2*np.random.rand(40, 1)-1)-1).ravel() + from sklearn.svm import SVR + + + + ############################################################################### + # look at the results + import pylab as pl + pl.scatter(X, .5*np.cos(X)+0.5, c='k', label='True model') + pl.hold('on') + cvals= np.logspace(-1,3,20) + score = [] + for c in cvals: + svr_rbf = SVR(kernel='rbf', C=c, gamma=0.1, probability=True) + svrf = svr_rbf.fit(X, y) + y_rbf = svrf.predict(X) + score.append(svrf.score(X,y)) + pl.plot(X, y_rbf, label='RBF model c=%g' % c) + pl.xlabel('data') + pl.ylabel('target') + pl.title('Support Vector Regression') + pl.legend() + pl.show() + +def test_sklearn1(): + X = np.sort(5*np.random.rand(40, 1)-2.5, axis=0) + y = (2*(np.cos(X)>2*np.random.rand(40, 1)-1)-1).ravel() + from sklearn.svm import SVR + + cvals= np.logspace(-1,4,10) + svr_rbf = SVR(kernel='rbf', C=1e4, gamma=0.1, probability=True) + svr_lin = SVR(kernel='linear', C=1e4, probability=True) + svr_poly = SVR(kernel='poly', C=1e4, degree=2, probability=True) + y_rbf = svr_rbf.fit(X, y).predict(X) + y_lin = svr_lin.fit(X, y).predict(X) + y_poly = svr_poly.fit(X, y).predict(X) + + ############################################################################### + # look at the results + import pylab as pl + pl.scatter(X, .5*np.cos(X)+0.5, c='k', label='True model') + pl.hold('on') + pl.plot(X, y_rbf, c='g', label='RBF model') + pl.plot(X, y_lin, c='r', label='Linear model') + pl.plot(X, y_poly, c='b', label='Polynomial model') + pl.xlabel('data') + pl.ylabel('target') + pl.title('Support Vector Regression') + pl.legend() + pl.show() def test_doctstrings(): #_test_dispersion_idx() @@ -1278,6 +1364,6 @@ def test_doctstrings(): if __name__ == '__main__': - test_reglogit() + test_reglogit2() #test_doctstrings()(