From 663636eecee89e9dba458c46df1e1a640b41a2cc Mon Sep 17 00:00:00 2001
From: "Per.Andreas.Brodtkorb"
 <Per.Andreas.Brodtkorb@96a810ac-173e-11df-b9d6-e102a254d9bb>
Date: Tue, 6 Dec 2011 16:22:19 +0000
Subject: [PATCH] Fixed a bug in KRegression

---
 pywafo/src/wafo/kdetools.py   |  40 +++++++++----
 pywafo/src/wafo/stats/core.py | 106 ++++++++++++++++++++++++++++++----
 2 files changed, 126 insertions(+), 20 deletions(-)

diff --git a/pywafo/src/wafo/kdetools.py b/pywafo/src/wafo/kdetools.py
index 103e738..630e4fb 100644
--- a/pywafo/src/wafo/kdetools.py
+++ b/pywafo/src/wafo/kdetools.py
@@ -211,8 +211,6 @@ class KDEgauss(object):
         at = dctn(c) * kw
         z = idctn(at)*at.size/np.prod(R)
         return z*(z>0.0)
-        #ix = (slice(0, inc),)*d
-        #return z[ix] * (z[ix] > 0.0)
     
     def _eval_grid_fun(self, eval_grd, *args, **kwds):
         output = kwds.pop('output', 'value')
@@ -655,7 +653,11 @@ class TKDE(_KDE):
                                       fill_value=0.0)
             #fi.shape = shape0i
             self.args = args
-            return fi*(fi>0)
+            r = kwds.get('r', 0)
+            if r==0:
+                return fi*(fi>0)
+            else:
+                return fi
         return f
     def _eval_grid(self, *args, **kwds):
         if self.L2 is None:
@@ -869,13 +871,13 @@ class KDE(_KDE):
         norm_fact0 = (kw.sum()*dx.prod()*self.n)
         norm_fact = (self._norm_factor * self.kernel.norm_factor(d, self.n))
         if np.abs(norm_fact0-norm_fact)>0.05*norm_fact:
-            warnings.warn('Numerical inaccuracy due to too low discretization. Increase the discretization (inc=%d)!' % self.inc)
+            warnings.warn('Numerical inaccuracy due to too low discretization. Increase the discretization of the evaluation grid (inc=%d)!' % inc)
             norm_fact = norm_fact0
         
         kw = kw/norm_fact
         r = kwds.get('r', 0)
         if r!=0:
-            kw *= np.vstack(Xnc) ** r 
+            kw *= np.vstack(Xnc) ** r if d>1 else Xnc[0]
         kw.shape = shape0
         kw = np.fft.ifftshift(kw)
         fftn = np.fft.fftn
@@ -891,8 +893,10 @@ class KDE(_KDE):
         z = np.real(ifftn(fftn(c, s=nfft) * fftn(kw)))
 
         ix = (slice(0, inc),)*d
-        return z[ix] * (z[ix] > 0.0)
-   
+        if r==0:
+            return z[ix] * (z[ix] > 0.0)
+        else:
+            return z[ix]
     def _eval_grid(self, *args, **kwds):
         
         grd = meshgrid(*args) if len(args) > 1 else list(args)
@@ -2957,7 +2961,8 @@ def kreg_demo1(hs=None, fast=False, fun='hisj'):
     y0 = 2*np.exp(-x**2/(2*0.3**2))+3*np.exp(-(x-1)**2/(2*0.7**2)) 
     y = y0 + ei
     kernel = Kernel('gauss',fun=fun)
-    kreg = KRegression(x, y, p=0, hs=hs, kernel=kernel)
+    hopt = kernel.hisj(x)
+    kreg = KRegression(x, y, p=0, hs=hs, kernel=kernel, xmin=-2*hopt, xmax=1+2*hopt)
     if fast:
         kreg.__call__ = kreg.eval_grid_fast
          
@@ -2969,14 +2974,26 @@ def kreg_demo1(hs=None, fast=False, fun='hisj'):
     kreg.p=1
     f1 = kreg(output='plot', title='Kernel regression', plotflag=1)
     f1.plot(label='p=1')
-    plt.plot(x,y,'.', x,y0, 'k')
+    print(f1.data)
+    plt.plot(x, y, '.', label='data')
+    plt.plot(x, y0, 'k', label='True model')
     plt.legend()
     
     plt.show()
     
     print(kreg.tkde.tkde.inv_hs)
     print(kreg.tkde.tkde.hs) 
+def kreg_demo2(n=100):
+    
+
+    x = np.sort(5*np.random.rand(n,1)-2.5, axis=0)
+    y = (np.cos(x)>2*np.random.rand(n, 1)-1).ravel()
 
+    kreg = KRegression(x.ravel(),y)
+    f = kreg(output='plotobj', title='Kernel regression', plotflag=1)
+    f.plot()
+    plt.show()
+    
 def kde_gauss_demo(n=50):
     '''
     KDEDEMO Demonstrate the KDEgauss
@@ -3029,6 +3046,8 @@ def kde_gauss_demo(n=50):
     format_ = 'hs0=%s hs1=%s hs2=%s' % (format_, format_, format_)
     print(format_ % tuple(kde0.hs.tolist()+kde1.tkde.hs.tolist()+kde2.hs.tolist()))
     print('inc0 = %d, inc1 = %d, inc2 = %d' % (kde0.inc, kde1.inc,kde2.inc))
+    
+
 def test_docstrings():
     import doctest
     doctest.testmod()
@@ -3038,4 +3057,5 @@ if __name__ == '__main__':
     #test_docstrings()
     #kde_demo2()
     #kreg_demo1(fast=True)
-    kde_gauss_demo()
\ No newline at end of file
+    #kde_gauss_demo()
+    kreg_demo1()
\ No newline at end of file
diff --git a/pywafo/src/wafo/stats/core.py b/pywafo/src/wafo/stats/core.py
index b9a1440..53b9a9e 100644
--- a/pywafo/src/wafo/stats/core.py
+++ b/pywafo/src/wafo/stats/core.py
@@ -873,13 +873,10 @@ class RegLogit(object):
             nulldev = self.loglike (tb0, y, X, z, z1)[0]
         else:
             nulldev = dev
-      
-      
+
         # maximize likelihood using Levenberg modified Newton's method
-        iter = 0;
-        stop = False
-        while not stop:
-            iter += 1
+        for i in range(self.maxiter+1):
+            
             tbold = tb;
             devold = dev;
             tb = tbold - np.linalg.lstsq(d2l, dl)[0]
@@ -907,7 +904,9 @@ class RegLogit(object):
                 print(np.linalg.eig(d2l)[0].T);
                 #end
                 #end
-            stop = np.abs(np.dot(dl, np.linalg.lstsq(d2l, dl)[0]) / len(dl)) <= tol or iter>self.maxiter
+            stop = np.abs(np.dot(dl, np.linalg.lstsq(d2l, dl)[0]) / len(dl)) <= tol 
+            if stop:
+                break
             #end %while
         
         #% tidy up output
@@ -975,8 +974,8 @@ class RegLogit(object):
         self.dispersion = 1;
         self.R2 = R2;
         self.R2adj = R2adj;
-        self.numiter = iter;
-        self.converged = iter<self.maxiter;
+        self.numiter = i
+        self.converged = i<self.maxiter;
         self.note = '';
         self.date = now()
         
@@ -1270,6 +1269,93 @@ def test_reglogit():
     [mu,plo,pup] = b.predict(fulloutput=True);
     pass
     #plot(x,mu,'g',x,plo,'r:',x,pup,'r:')
+def test_reglogit2():
+    n = 40
+    x = np.sort(5*np.random.rand(n, 1)-2.5, axis=0)
+    y = (np.cos(x)>2*np.random.rand(n,1)-1)
+    b = RegLogit()
+    b.fit(y,x)
+    #b.display() #% members and methods
+    b.summary()
+    [mu,plo,pup] = b.predict(fulloutput=True);
+    import matplotlib.pyplot as pl
+    pl.plot(x,mu,'g',x,plo,'r:',x,pup,'r:')
+    pl.show()
+    
+def test_sklearn0():
+    from sklearn.linear_model import LogisticRegression
+    from sklearn import datasets
+    
+    # FIXME: the iris dataset has only 4 features!
+    iris = datasets.load_iris()
+    X = iris.data
+    y = iris.target
+    
+    X = np.sort(5*np.random.rand(40, 1)-2.5, axis=0)
+    y = (2*(np.cos(X)>2*np.random.rand(40, 1)-1)-1).ravel()
+    
+    score = []
+    # Set regularization parameter
+    cvals = np.logspace(-1,1,5)
+    for C in cvals:
+        clf_LR = LogisticRegression(C=C, penalty='l2')
+        clf_LR.fit(X, y)
+        score.append(clf_LR.score(X,y))
+            
+    plot(cvals, score)
+
+def test_sklearn():
+    X = np.sort(5*np.random.rand(40, 1)-2.5, axis=0)
+    y = (2*(np.cos(X)>2*np.random.rand(40, 1)-1)-1).ravel()
+    from sklearn.svm import SVR
+    
+    
+    
+    ###############################################################################
+    # look at the results
+    import pylab as pl
+    pl.scatter(X, .5*np.cos(X)+0.5, c='k', label='True model')
+    pl.hold('on')
+    cvals= np.logspace(-1,3,20)
+    score = []
+    for c in cvals:
+        svr_rbf = SVR(kernel='rbf', C=c, gamma=0.1, probability=True)
+        svrf = svr_rbf.fit(X, y)
+        y_rbf = svrf.predict(X)
+        score.append(svrf.score(X,y))
+        pl.plot(X, y_rbf, label='RBF model c=%g' % c)
+    pl.xlabel('data')
+    pl.ylabel('target')
+    pl.title('Support Vector Regression')
+    pl.legend()
+    pl.show()
+
+def test_sklearn1():
+    X = np.sort(5*np.random.rand(40, 1)-2.5, axis=0)
+    y = (2*(np.cos(X)>2*np.random.rand(40, 1)-1)-1).ravel()
+    from sklearn.svm import SVR
+    
+    cvals= np.logspace(-1,4,10)
+    svr_rbf = SVR(kernel='rbf', C=1e4, gamma=0.1, probability=True)
+    svr_lin = SVR(kernel='linear', C=1e4, probability=True)
+    svr_poly = SVR(kernel='poly', C=1e4, degree=2, probability=True)
+    y_rbf = svr_rbf.fit(X, y).predict(X)
+    y_lin = svr_lin.fit(X, y).predict(X)
+    y_poly = svr_poly.fit(X, y).predict(X)
+    
+    ###############################################################################
+    # look at the results
+    import pylab as pl
+    pl.scatter(X, .5*np.cos(X)+0.5, c='k', label='True model')
+    pl.hold('on')
+    pl.plot(X, y_rbf, c='g', label='RBF model')
+    pl.plot(X, y_lin, c='r', label='Linear model')
+    pl.plot(X, y_poly, c='b', label='Polynomial model')
+    pl.xlabel('data')
+    pl.ylabel('target')
+    pl.title('Support Vector Regression')
+    pl.legend()
+    pl.show()
     
 def test_doctstrings():
     #_test_dispersion_idx() 
@@ -1278,6 +1364,6 @@ def test_doctstrings():
     
     
 if __name__ == '__main__':
-    test_reglogit()
+    test_reglogit2()
     #test_doctstrings()(