Fixed some bugs in RegLogit (still bugs left)

master
Per.Andreas.Brodtkorb 13 years ago
parent 8c5754e30c
commit dd324acedf

@ -853,8 +853,8 @@ class RegLogit(object):
[my, ny] = y.shape
g = (z.sum(axis=0).cumsum() / my).reshape(-1,1)
theta00 = np.log(g / (1 - g))
beta00 = np.zeros((nx, 1))
theta00 = np.log(g / (1 - g)).ravel()
beta00 = np.zeros((nx,))
# starting values
if theta0 is None:
theta0 = theta00
@ -862,7 +862,7 @@ class RegLogit(object):
if beta0 is None:
beta0 = beta00
tb = np.vstack((theta0, beta0))
tb = np.hstack((theta0, beta0))
# likelihood and derivatives at starting values
[dev, dl, d2l] = self.loglike(tb, y, X, z, z1)
@ -882,18 +882,18 @@ class RegLogit(object):
iter += 1
tbold = tb;
devold = dev;
tb = tbold - np.linalg.lstsq(d2l, dl)
[dev,p,dl,d2l] = self.loglike(tb, y, X, z, z1)
if ((dev - devold) / (dl.T * (tb - tbold)) < 0):
tb = tbold - np.linalg.lstsq(d2l, dl)[0]
[dev, dl, d2l] = self.loglike(tb, y, X, z, z1)
if ((dev - devold) / np.dot(dl, tb - tbold) < 0):
epsilon = epsilon / decr
else:
while ((dev - devold) / (dl.T * (tb - tbold)) > 0):
while ((dev - devold) / np.dot(dl, tb - tbold) > 0):
epsilon = epsilon * incr;
if (epsilon > 1e+15):
raise ValueError('epsilon too large');
tb = tbold - np.linalg.lstsq(d2l - epsilon * np.eye(d2l.shape), dl);
[dev,p,dl,d2l] = self.loglike(tb, y, X, z, z1);
tb = tbold - np.linalg.lstsq(d2l - epsilon * np.eye(d2l.shape), dl)
[dev, dl, d2l] = self.loglike(tb, y, X, z, z1);
print('epsilon %g' % epsilon)
#end %while
#end else
@ -902,31 +902,31 @@ class RegLogit(object):
print('Iter: %d, Deviance: %8.6f',iter,dev)
print('First derivative');
print(dl.T);
print(dl);
print('Eigenvalues of second derivative');
print(np.linalg.eig(d2l)[0].T);
#end
#end
stop = abs (dl.T * np.linalg.lstsq(d2l, dl) / len(dl)) <= tol or iter>self.maxiter
stop = np.abs(np.dot(dl, np.linalg.lstsq(d2l, dl)[0]) / len(dl)) <= tol or iter>self.maxiter
#end %while
#% tidy up output
theta = tb[:nz, 0]
beta = tb[nz:(nz + nx), 1]
theta = tb[:nz,]
beta = tb[nz:(nz + nx)]
pcov = np.linalg.pinv(-d2l)
se = sqrt(np.diag (pcov))
se = sqrt(np.diag(pcov))
if (nx > 0):
eta = ((X * beta) * ones (1, nz)) + ((y * 0 + 1) * theta.T);
eta = ((X * beta) * ones((1, nz))) + ((y * 0 + 1) * theta)
else:
eta = (y * 0 + 1) * theta.T;
eta = (y * 0 + 1) * theta;
#end
gammai = np.diff(np.hstack(((y * 0), self.logitinv(eta), (y * 0 + 1))),n=1,axis=1)
gammai = np.diff(np.hstack(((y * 0), _logitinv(eta), (y * 0 + 1))),n=1,axis=1)
k0 = min(y)
mu = (k0-1)+np.dot(gammai,np.arange(1,nz+2).T); #% E(Y|X)
r = np.corrcoef(np.hstack((y,mu)))
mu = (k0-1)+np.dot(gammai,np.arange(1, nz+2)).reshape(-1,1)
r = np.corrcoef(np.hstack((y,mu)).T)
R2 = r[0,1]**2; #coefficient of determination
R2adj = max(1 - (1-R2)* (my-1)/(my-nx-nz-1),0); # adjusted coefficient of determination
@ -939,40 +939,40 @@ class RegLogit(object):
self.link = 'logit';
self.numvar = nx+nz;
self.numobs = my;
self.numk = nz+1;
self.df = max(my-nx-nz,0);
self.numvar = nx+nz
self.numobs = my
self.numk = nz+1
self.df = max(my-nx-nz,0)
self.df_null = my-nz; #nulldf; nulldf = n - nz;
self.params = tb[:(nz + nx),0].T;
self.params_ci = 1;
self.params_std = se.T;
self.params = tb[:(nz + nx)]
self.params_ci = 1
self.params_std = se
self.params_cov = pcov
self.params_tstat = (self.params/self.params_std);
self.params_tstat = (self.params/self.params_std)
if False: # % options.estdispersn %dispersion_parameter=='mean_deviance'
self.params_pvalue=2.*_cdft(-abs(self.params_tstat),self.df);
bcrit = -se.T*_invt(self.alpha/2,self.df);
self.params_pvalue=2.*_cdft(-abs(self.params_tstat),self.df)
bcrit = -se*_invt(self.alpha/2,self.df);
else:
self.params_pvalue=2.*_cdfnorm(-abs(self.params_tstat));
bcrit = -se.T*_invnorm(self.alpha/2);
self.params_pvalue=2.*_cdfnorm(-abs(self.params_tstat))
bcrit = -se*_invnorm(self.alpha/2);
#end
self.params_ci = np.vstack((self.params+bcrit,self.params-bcrit))
self.mu = gammai;
self.eta = self.logit(gammai);
self.eta = _logit(gammai);
self.X = X;
[dev, dl, d2l, p] = self.loglike(tb, y, X, z, z1,numout=4)
self.theta = theta;
self.beta = beta;
self.gamma = gammai;
self.residual = res.T;
self.residualD = np.sign(self.residual)*sqrt(-2*np.log(p)).T;
self.residualD = np.sign(self.residual)*sqrt(-2*np.log(p));
self.deviance = dev;
self.deviance_null = nulldev;
self.d2L = d2l;
self.dL = dl.T;
self.dispersnfit=1;
self.dispersn = 1;
self.dispersionfit=1;
self.dispersion = 1;
self.R2 = R2;
self.R2adj = R2adj;
self.numiter = iter;
@ -1054,11 +1054,11 @@ class RegLogit(object):
print(' ')
print(' Analysis of Deviance')
if False: # %options.estdispersn
localstat = abs(self.deviance_null-self.deviance)/self.dispersnfit/(self.numvar-1);
localstat = abs(self.deviance_null-self.deviance)/self.dispersionfit/(self.numvar-1);
localpvalue = 1-_cdff(localstat,self.numvar-1,self.df);
print('Model DF Residual deviance F-stat Pr(>F)')
else:
localstat = abs(self.deviance_null-self.deviance)/self.dispersnfit;
localstat = abs(self.deviance_null-self.deviance)/self.dispersionfit;
localpvalue = 1-_cdfchi2(localstat,self.numvar-1);
print('Model DF Residual deviance Chi2-stat Pr(>Chi2)')
#end
@ -1139,11 +1139,11 @@ class RegLogit(object):
nz = self.numk-1;
one = ones((n,1))
if (nx > 0):
eta = np.dot(Xnew * self.beta) + self.theta.T;
eta = np.dot(Xnew, self.beta).reshape(-1,1) + self.theta
else:
eta = one * self.theta.T
eta = one * self.theta
#end
y = np.diff(np.hstack((zeros((n,1)), self.logitinv(eta), one)),1,2)
y = np.diff(np.hstack((zeros((n,1)), _logitinv(eta), one)),n=1, axis=1)
if fulloutput:
eps = np.finfo(float).eps
pcov = self.params_cov;
@ -1182,28 +1182,24 @@ class RegLogit(object):
return y,ylo,yup
return y
def loglike(self, beta, y, x, z, z1):
def loglike(self, beta, y, x, z, z1, numout=3):
'''
[dev, p g, g1] = loglike( y ,x,beta,z,z1)
[dev, dl, d2l, p] = loglike( y ,x,beta,z,z1)
Calculates likelihood for the ordinal logistic regression model.
'''
# Author: Gordon K. Smyth <gks@maths.uq.oz.au>
zx = np.hstack((z, x))
z1x = np.hstack((z1, x))
g = _logitinv(np.dot(zx, beta))
g1 = _logitinv(np.dot(z1x, beta))
g = _logitinv(np.dot(zx, beta)).reshape((-1,1))
g1 = _logitinv(np.dot(z1x, beta)).reshape((-1,1))
g = np.maximum(y == y.max(), g)
g1 = np.minimum(y > y.min(), g1)
p = g - g1
dev = -2 * sum (np.log(p));
#return dev, p, g, g1
##end %function
#
#
# def logistic_regression_derivatives(self, x, z, z1, g, g1, p):
'''% [dl, d2l] = logistic_regression_derivatives(x, z, z1, g, g1, p)
dev = -2 * np.log(p).sum()
'''[dl, d2l] = derivatives of loglike(beta, y, x, z, z1)
% Called by logistic_regression. Calculates derivates of the
% log-likelihood for ordinal logistic regression model.
'''
@ -1215,14 +1211,17 @@ class RegLogit(object):
v = g * (1 - g) / p;
v1 = g1 * (1 - g1) / p;
dlogp = np.hstack((((v*z) - (v1*z1)), ((v - v1)*x)))
dl = np.sum(dlogp, axis=0).T
dl = np.sum(dlogp, axis=0)
# second derivative
w = v * (1 - 2 * g)
w1 = v1 * (1 - 2 * g1)
d2l = np.dot(zx.T, (w*zx)) - np.dot(z1x.T, (w1*z1x)) - np.dot(dlogp.T, dlogp)
return dev, p, dl, d2l
if numout==4:
return dev, dl, d2l, p
else:
return dev, dl, d2l
#end %function
@ -1282,14 +1281,14 @@ def _test_reslife():
def test_reglogit():
y=np.array([1, 1, 2, 1, 3, 2, 3, 2, 3, 3]).reshape(-1,1)
x = np.arange(10).reshape(-1,1)
x = np.arange(1,11).reshape(-1,1)
b = RegLogit()
b.fit(y,x)
#b.display() #% members and methods
b.summary()
[mu,plo,pup] = b.predict();
plot(x,mu,'g',x,plo,'r:',x,pup,'r:')
[mu,plo,pup] = b.predict(fulloutput=True);
#plot(x,mu,'g',x,plo,'r:',x,pup,'r:')
def test_doctstrings():
#_test_dispersion_idx()

Loading…
Cancel
Save