Small updates

master
per.andreas.brodtkorb 13 years ago
parent 14e65da072
commit 3ef99faa56

@ -31,13 +31,20 @@ def valarray(shape, value=nan, typecode=None):
if not isinstance(out, ndarray): if not isinstance(out, ndarray):
out = arr(out) out = arr(out)
return out return out
def _cdff(self, x, dfn, dfd):
return special.fdtr(dfn, dfd, x)
def _cdft(x,df):
return special.stdtr(df, x)
def _invt(q, df): def _invt(q, df):
return special.stdtrit(df, q) return special.stdtrit(df, q)
def _cdfchi2(x, df):
return special.chdtr(df, x)
def _invchi2(q, df): def _invchi2(q, df):
return special.chdtri(df, q) return special.chdtri(df, q)
def _cdfnorm(x):
return special.ndtr(x)
def _invnorm(q):
return special.ndtri(q)
def edf(x, method=2): def edf(x, method=2):
''' '''
@ -653,6 +660,11 @@ def extremal_idx(ti):
ei = min(1, 2*np.mean(t-1)**2/np.mean((t-1)*(t-2))) ei = min(1, 2*np.mean(t-1)**2/np.mean((t-1)*(t-2)))
return ei return ei
def _logit(p):
return np.log(p)-np.log1p(-p)
def _logitinv(x):
return 1.0/(np.exp(-x)+1)
class RegLogit(object): class RegLogit(object):
''' '''
REGLOGIT Fit ordinal logistic regression model. REGLOGIT Fit ordinal logistic regression model.
@ -798,22 +810,16 @@ class RegLogit(object):
self.note = '' self.note = ''
self.date = now() self.date = now()
def _logit(self, p):
return np.log(p)-np.log1p(-p)
def _logitinv(self, x):
return 1.0/(np.exp(-x)+1)
def check_xy(self, y, X): def check_xy(self, y, X):
y = np.round(np.atleast_2d(y)) y = np.round(np.atleast_2d(y))
my, ny = y.shape my = y.shape[0]
if X is None: if X is None:
X = np.zeros((my, 0)) X = np.zeros((my, 0))
elif self.deletecolinear: elif self.deletecolinear:
X = np.atleast_2d(X) X = np.atleast_2d(X)
# Make sure X is full rank # Make sure X is full rank
s = np.linalg.svd(X)[1] s = np.linalg.svd(X)[1]
tol = max(size(X)) * np.finfo(s.max()).eps tol = max(X.shape) * np.finfo(s.max()).eps
ix = np.flatnonzero(s>tol) ix = np.flatnonzero(s>tol)
iy = np.flatnonzero(s<=tol) iy = np.flatnonzero(s<=tol)
if len(ix): if len(ix):
@ -821,7 +827,7 @@ class RegLogit(object):
txt = [' %d,' % i for i in iy] txt = [' %d,' % i for i in iy]
txt[-1] = ' %d' % iy[-1] txt[-1] = ' %d' % iy[-1]
warnings.warn('Covariate matrix is singular. Removing column(s):%s',txt) warnings.warn('Covariate matrix is singular. Removing column(s):%s',txt)
[mx, nx] = X.shape mx = X.shape[0]
if (mx != my): if (mx != my):
raise ValueError('x and y must have the same number of observations'); raise ValueError('x and y must have the same number of observations');
return y, X return y, X
@ -879,12 +885,12 @@ class RegLogit(object):
tb = np.vstack((theta0, beta0)) tb = np.vstack((theta0, beta0))
# likelihood and derivatives at starting values # likelihood and derivatives at starting values
[dev,p,g, g1] = logistic_regression_likelihood (y, X, tb, z, z1); [dev, dl, d2l] = self.loglike(tb, y, X, z, z1)
[dl, d2l] = logistic_regression_derivatives (X, z, z1, g, g1, p);
epsilon = std (d2l) / 1000; epsilon = np.std(d2l) / 1000;
if np.any(beta) or np.any(theta!=theta0): if np.any(beta0) or np.any(theta00!=theta0):
tb0 = np.vstack((theta00,beta00)) tb0 = np.vstack((theta00,beta00))
nulldev = logistic_regression_likelihood (y, X, tb0, z, z1); nulldev = self.loglike (tb0, y, X, z, z1)[0]
else: else:
nulldev = dev nulldev = dev
@ -897,7 +903,7 @@ class RegLogit(object):
tbold = tb; tbold = tb;
devold = dev; devold = dev;
tb = tbold - np.linalg.lstsq(d2l, dl) tb = tbold - np.linalg.lstsq(d2l, dl)
[dev,p,g, g1] = logistic_regression_likelihood (y, X, tb, z, z1); [dev,p,dl,d2l] = self.loglike(tb, y, X, z, z1)
if ((dev - devold) / (dl.T * (tb - tbold)) < 0): if ((dev - devold) / (dl.T * (tb - tbold)) < 0):
epsilon = epsilon / decr epsilon = epsilon / decr
else: else:
@ -907,22 +913,21 @@ class RegLogit(object):
raise ValueError('epsilon too large'); raise ValueError('epsilon too large');
tb = tbold - np.linalg.lstsq(d2l - epsilon * np.eye(d2l.shape), dl); tb = tbold - np.linalg.lstsq(d2l - epsilon * np.eye(d2l.shape), dl);
[dev,p,g, g1] = logistic_regression_likelihood (y, X, tb, z, z1); [dev,p,dl,d2l] = self.loglike(tb, y, X, z, z1);
print('epsilon %g' % epsilon) print('epsilon %g' % epsilon)
#end %while #end %while
#end else #end else
[dl, d2l] = logistic_regression_derivatives (X, z, z1, g, g1, p); #[dl, d2l] = logistic_regression_derivatives (X, z, z1, g, g1, p);
if (self.verbose>1): if (self.verbose>1):
print('Iter: %d, Deviance: %8.6f',iter,dev) print('Iter: %d, Deviance: %8.6f',iter,dev)
print('First derivative'); print('First derivative');
print(dl.T); print(dl.T);
print('Eigenvalues of second derivative'); print('Eigenvalues of second derivative');
print(np.linalg.eig(d2l)[0].T); print(np.linalg.eig(d2l)[0].T);
#end #end
#end #end
stop = abs (dl.T * np.linalg.lstq(d2l, dl) / length (dl)) <= tol or iter>self.maxiter stop = abs (dl.T * np.linalg.lstsq(d2l, dl) / len(dl)) <= tol or iter>self.maxiter
#end %while #end %while
#% tidy up output #% tidy up output
@ -933,13 +938,11 @@ class RegLogit(object):
se = sqrt(np.diag (pcov)) se = sqrt(np.diag (pcov))
if (nx > 0): if (nx > 0):
eta = ((X * beta) * ones (1, nz)) + ((y * 0 + 1) * theta.T); eta = ((X * beta) * ones (1, nz)) + ((y * 0 + 1) * theta.T);
else: else:
eta = (y * 0 + 1) * theta.T; eta = (y * 0 + 1) * theta.T;
#end #end
gammai = np.diff(np.hstack(((y * 0), self.logitinv(eta), (y * 0 + 1))),1,2) gammai = np.diff(np.hstack(((y * 0), self.logitinv(eta), (y * 0 + 1))),1,2)
k0 = min(y) k0 = min(y)
mu = (k0-1)+np.dot(gammai,np.arange(1,nz+2).T); #% E(Y|X) mu = (k0-1)+np.dot(gammai,np.arange(1,nz+2).T); #% E(Y|X)
@ -947,7 +950,7 @@ class RegLogit(object):
R2 = r[0,1]**2; #coefficient of determination R2 = r[0,1]**2; #coefficient of determination
R2adj = max(1 - (1-R2)* (my-1)/(my-nx-nz-1),0); # adjusted coefficient of determination R2adj = max(1 - (1-R2)* (my-1)/(my-nx-nz-1),0); # adjusted coefficient of determination
res = y-mu; res = y-mu
if nz==1: if nz==1:
self.family = 'binomial'; self.family = 'binomial';
@ -967,11 +970,11 @@ class RegLogit(object):
self.params_cov = pcov self.params_cov = pcov
self.params_tstat = (self.params/self.params_std); self.params_tstat = (self.params/self.params_std);
if False: # % options.estdispersn %dispersion_parameter=='mean_deviance' if False: # % options.estdispersn %dispersion_parameter=='mean_deviance'
self.params_pvalue=2.*cdft(-abs(self.params_tstat),self.df); self.params_pvalue=2.*_cdft(-abs(self.params_tstat),self.df);
bcrit = -se.T*invt(self.alpha/2,self.df); bcrit = -se.T*_invt(self.alpha/2,self.df);
else: else:
self.params_pvalue=2.*cdfnorm(-abs(self.params_tstat)); self.params_pvalue=2.*_cdfnorm(-abs(self.params_tstat));
bcrit = -se.T*invnorm(self.alpha/2); bcrit = -se.T*_invnorm(self.alpha/2);
#end #end
self.params_ci = np.vstack((self.params+bcrit,self.params-bcrit)) self.params_ci = np.vstack((self.params+bcrit,self.params-bcrit))
@ -983,7 +986,7 @@ class RegLogit(object):
self.beta = beta; self.beta = beta;
self.gamma = gammai; self.gamma = gammai;
self.residual = res.T; self.residual = res.T;
self.residualD = sign(self.residual)*sqrt(-2*log(p)).T; self.residualD = np.sign(self.residual)*sqrt(-2*np.log(p)).T;
self.deviance = dev; self.deviance = dev;
self.deviance_null = nulldev; self.deviance_null = nulldev;
self.d2L = d2l; self.d2L = d2l;
@ -1052,11 +1055,11 @@ class RegLogit(object):
print(' Analysis of Deviance') print(' Analysis of Deviance')
if False: # %options.estdispersn if False: # %options.estdispersn
localstat = abs(devL-devs)/disprsn/pmq; localstat = abs(devL-devs)/disprsn/pmq;
localpvalue = 1-cdff(localstat, pmq, dfL) # localpvalue = 1-cdff(localstat, pmq, dfL)
print('Model DF Residual deviance F-stat Pr(>F)') # print('Model DF Residual deviance F-stat Pr(>F)')
else: else:
localstat = abs(devL-devs)/disprsn; localstat = abs(devL-devs)/disprsn;
localpvalue = 1-cdfchi2(localstat,pmq) localpvalue = 1-_cdfchi2(localstat,pmq)
print('Model DF Residual deviance Chi2-stat Pr(>Chi2)') print('Model DF Residual deviance Chi2-stat Pr(>Chi2)')
#end #end
@ -1072,11 +1075,11 @@ class RegLogit(object):
print(' Analysis of Deviance') print(' Analysis of Deviance')
if False: # %options.estdispersn if False: # %options.estdispersn
localstat = abs(self.deviance_null-self.deviance)/self.dispersnfit/(self.numvar-1); localstat = abs(self.deviance_null-self.deviance)/self.dispersnfit/(self.numvar-1);
localpvalue = 1-cdff(localstat,self.numvar-1,self.df); localpvalue = 1-_cdff(localstat,self.numvar-1,self.df);
print('Model DF Residual deviance F-stat Pr(>F)') print('Model DF Residual deviance F-stat Pr(>F)')
else: else:
localstat = abs(self.deviance_null-self.deviance)/self.dispersnfit; localstat = abs(self.deviance_null-self.deviance)/self.dispersnfit;
localpvalue = 1-cdfchi2(localstat,self.numvar-1); localpvalue = 1-_cdfchi2(localstat,self.numvar-1);
print('Model DF Residual deviance Chi2-stat Pr(>Chi2)') print('Model DF Residual deviance Chi2-stat Pr(>Chi2)')
#end #end
@ -1116,7 +1119,7 @@ class RegLogit(object):
print('(Dispersion parameter for %s family taken to be %2.2f)' % (self.family,self.dispersionfit)) print('(Dispersion parameter for %s family taken to be %2.2f)' % (self.family,self.dispersionfit))
print(' ') print(' ')
if True: #%options.constant if True: #%options.constant
printf(' Null deviance: %2.4f on %d degrees of freedom' % (self.deviance_null,self.df_null)) print(' Null deviance: %2.4f on %d degrees of freedom' % (self.deviance_null,self.df_null))
#end #end
print('Residual deviance: %2.4f on %d degrees of freedom' % (self.deviance,self.df)) print('Residual deviance: %2.4f on %d degrees of freedom' % (self.deviance,self.df))
@ -1125,194 +1128,141 @@ class RegLogit(object):
#end % summary #end % summary
def predict(self, Xnew=None,alpha=0.05, fulloutput=False): def predict(self, Xnew=None,alpha=0.05, fulloutput=False):
'''LOGIT/PREDICT Predict from a fitted LOGIT object '''LOGIT/PREDICT Predict from a fitted LOGIT object
CALL [y,ylo,yup] = predict(Xnew,options) CALL [y,ylo,yup] = predict(Xnew,options)
y = predicted value y = predicted value
ylo,yup = 100(1-alpha)% confidence interval for y ylo,yup = 100(1-alpha)% confidence interval for y
Xnew = new covariate Xnew = new covariate
options = options struct defining the calculation options = options struct defining the calculation
.alpha : confidence coefficient (default 0.05) .alpha : confidence coefficient (default 0.05)
.size : size if binomial family (default 1). .size : size if binomial family (default 1).
''' '''
[mx, nx] = self.X.shape [mx, nx] = self.X.shape
if Xnew is None: if Xnew is None:
Xnew = self.X; Xnew = self.X;
else: else:
Xnew = np.atleast_2d(Xnew) Xnew = np.atleast_2d(Xnew)
notnans = np.flatnonzero(1-(1-np.isfinite(Xnew)).any(axis=1)) notnans = np.flatnonzero(1-(1-np.isfinite(Xnew)).any(axis=1))
Xnew = Xnew[notnans,:] Xnew = Xnew[notnans,:]
[n,p] = Xnew.shape [n,p] = Xnew.shape
if p != nx if p != nx:
raise ValueError('Number of covariates must match the number of regression coefficients') raise ValueError('Number of covariates must match the number of regression coefficients')
nz = self.numk-1; nz = self.numk-1;
one = ones((n,1)) one = ones((n,1))
if (nx > 0): if (nx > 0):
eta = np.dot(Xnew * self.beta) + self.theta.T; eta = np.dot(Xnew * self.beta) + self.theta.T;
else: else:
eta = one * self.theta.T eta = one * self.theta.T
#end #end
y = np.diff(np.hstack((zeros((n,1)), self.logitinv(eta), one),1,2); y = np.diff(np.hstack((zeros((n,1)), self.logitinv(eta), one)),1,2)
if fulloutput: if fulloutput:
pcov = self.params_cov; eps = np.finfo(float).eps
if (nx > 0) pcov = self.params_cov;
np = size(pcov,1); if (nx > 0):
np1 = pcov.shape[0]
[U S V]= np.linalg.svd(pcov,0);
R=(U*sqrt(S)*V.T); #%squareroot of pcov [U, S, V]= np.linalg.svd(pcov,0);
ixb = nz+1:np; R = np.dot(U,np.dot(sqrt(S),V.T)); #%squareroot of pcov
ib = np.r_[0,nz:np1]
% Var(eta_i) = var(theta_i+Xnew*b)
vareta = zeros(n,nz); #% Var(eta_i) = var(theta_i+Xnew*b)
for ix1 = 1:nz vareta = zeros((n,nz));
vareta(:,ix1) = max(sum(([one Xnew]*R([ix1,ixb],[ix1,ixb])).^2,2),eps); u = np.hstack((one,Xnew))
end for i in range(nz):
else ib[0] = i
vareta = diag(pcov); vareta[:,i] = np.maximum(sum((np.dot(u,R[ib,ib]))**2,axis=1),eps)
end #end
crit = -invnorm(alpha/2); else:
vareta = np.diag(pcov)
#end
ecrit = crit * sqrt(vareta); crit = -_invnorm(alpha/2);
mulo = logitinv(eta-ecrit);
muup = logitinv(eta+ecrit);
ylo1 = diff ([zeros(n,1), mulo , one],1,2);
yup1 = diff ([zeros(n,1), muup , one],1,2);
ylo = min(ylo1,yup1);
yup = max(ylo1,yup1);
for ix1 = 2:self.numk-1
yup(:,ix1) = max( [yup(:,ix1),muup(:,ix1)-mulo(:,ix1-1)],[],2);
end
return y,ylo,yup
end
def logistic_regression_derivatives (x, z, z1, g, g1, p)
% [dl, d2l] = logistic_regression_derivatives (@var{x}, @var{z}, @var{z1}, @var{g}, @var{g1}, @var{p})
% Called by logistic_regression. Calculates derivates of the
% log-likelihood for ordinal logistic regression model.
% Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2005, 2007
% Kurt Hornik
%
%
% Reglogit is free software; you can redistribute it and/or modify it
% under the terms of the GNU General Public License as published by
% the Free Software Foundation; either version 3 of the License, or (at
% your option) any later version.
%
% Reglogit is distributed in the hope that it will be useful, but
% WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
% General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Reglogit; see the file COPYING. If not, see
% <http://www.gnu.org/licenses/>.
% Author: Gordon K. Smyth <gks@maths.uq.oz.au>
% Adapted-By: KH <Kurt.Hornik@wu-wien.ac.at>
% Description: Derivates of log-likelihood in logistic regression
% first derivative
v = g .* (1 - g) ./ p;
v1 = g1 .* (1 - g1) ./ p;
dlogp = [(dmult (v, z) - dmult (v1, z1)), (dmult (v - v1, x))];
dl = sum (dlogp)';
% second derivative
w = v .* (1 - 2 * g); w1 = v1 .* (1 - 2 * g1);
d2l = [z, x]' * dmult (w, [z, x]) - [z1, x]' * dmult (w1, [z1, x]) ...
- dlogp' * dlogp;
return [dl, d2l]
end %function
def logistic_regression_likelihood (y, x, beta, z, z1)
% [g, g1, p, dev] = logistic_regression_likelihood (y ,x,beta,z,z1)
% Calculates likelihood for the ordinal logistic regression model.
% Called by logistic_regression.
% Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2005, 2007
% Kurt Hornik
%
%
% Reglogit is free software; you can redistribute it and/or modify it
% under the terms of the GNU General Public License as published by
% the Free Software Foundation; either version 3 of the License, or (at
% your option) any later version.
%
% Reglogit is distributed in the hope that it will be useful, but
% WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
% General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Reglogit; see the file COPYING. If not, see
% <http://www.gnu.org/licenses/>.
% Author: Gordon K. Smyth <gks@maths.uq.oz.au>
% Adapted-By: KH <Kurt.Hornik@wu-wien.ac.at>
% Description: Likelihood in logistic regression
e = exp ([z, x] * beta);
e1 = exp ([z1, x] * beta);
g = e ./ (1 + e);
g1 = e1 ./ (1 + e1);
g = max (y == max (y), g);
g1 = min (y > min(y), g1);
p = g - g1;
dev = -2 * sum (log (p));
return dev,p,g, g1
end %function
function M = dmult(A,B)
% PURPOSE: computes the product of diag(A) and B
% -----------------------------------------------------
% USAGE: m = dmult(a,b)
% where: a = a matrix
% b = a matrix
% -----------------------------------------------------
% RETURNS: m = diag(A) times B
% -----------------------------------------------------
% NOTE: a Gauss compatability function
% -----------------------------------------------------
% written by:
% Gordon K Smyth, U of Queensland, Australia, gks@maths.uq.oz.au
% Nov 19, 1990. Last revision Aug 29, 1995.
% documentation modifications made by:
% James P. LeSage, Dept of Economics
% University of Toledo
% 2801 W. Bancroft St,
% Toledo, OH 43606
% jpl@jpl.econ.utoledo.edu
[mb,nb] = size(B);
M=A(:,ones(1,nb)).*B;
end
ecrit = crit * sqrt(vareta);
mulo = _logitinv(eta-ecrit);
muup = _logitinv(eta+ecrit);
ylo1 = np.diff(np.hstack((zeros((n,1)), mulo , one)),1,2);
yup1 = np.diff(np.hstack((zeros((n,1)), muup , one)),1,2);
ylo = np.minimum(ylo1,yup1);
yup = np.maximum(ylo1,yup1);
for i in range(1, nz): #= 2:self.numk-1
yup[:,i] = np.hstack((yup[:,i],muup[:,i]-mulo[:,i-1])).max(axis=1)
#end
return y,ylo,yup
return y
def loglike(self, beta, y, x, z, z1):
'''
[dev, p g, g1] = loglike( y ,x,beta,z,z1)
Calculates likelihood for the ordinal logistic regression model.
'''
# Author: Gordon K. Smyth <gks@maths.uq.oz.au>
zx = np.hstack((z,x))
z1x = np.hstack((z1,x))
g = _logitinv(np.dot(zx,beta))
g1 = _logitinv(np.dot(z1x,beta))
g = np.maximum(y == y.max(), g)
g1 = np.minimum(y > y.min(), g1)
p = g - g1
dev = -2 * sum (np.log(p));
#return dev, p, g, g1
##end %function
#
#
# def logistic_regression_derivatives(self, x, z, z1, g, g1, p):
'''% [dl, d2l] = logistic_regression_derivatives(x, z, z1, g, g1, p)
% Called by logistic_regression. Calculates derivates of the
% log-likelihood for ordinal logistic regression model.
'''
# Author: Gordon K. Smyth <gks@maths.uq.oz.au>
# Description: Derivates of log-likelihood in logistic regression
# first derivative
v = g * (1 - g) / p;
v1 = g1 * (1 - g1) / p;
dlogp = np.hstack(((dmult(v, z) - dmult(v1, z1)), (dmult(v - v1, x))))
dl = np.sum(dlogp, axis=0).T
# second derivative
w = v * (1 - 2 * g)
w1 = v1 * (1 - 2 * g1)
d2l = zx.T * dmult (w, zx) - z1x.T * dmult(w1, z1x) - dlogp.T * dlogp;
return dev, p, dl, d2l
#end %function
def dmult(A,B):
''' Return the product of diag(A) and B
USAGE: m = dmult(a,b)
where: a = a matrix
b = a matrix
-----------------------------------------------------
RETURNS: m = diag(A) times B
-----------------------------------------------------
NOTE: a Gauss compatability function
-----------------------------------------------------
'''
#% written by:
#% Gordon K Smyth, U of Queensland, Australia, gks@maths.uq.oz.au
#% Nov 19, 1990. Last revision Aug 29, 1995.
return A[:,None]*B;

@ -2237,7 +2237,8 @@ class rv_continuous(rv_generic):
return None return None
def _nnlf(self, x, *args): def _nnlf(self, x, *args):
return -sum(self._logpdf(x, *args),axis=0) xmin = floatinfo.machar.xmin
return -sum(np.maximum(self._logpdf(x, *args),100*log(xmin)),axis=0)
def nnlf(self, theta, x): def nnlf(self, theta, x):
''' Return negative loglikelihood function, i.e., - sum (log pdf(x, theta),axis=0) ''' Return negative loglikelihood function, i.e., - sum (log pdf(x, theta),axis=0)
@ -2253,7 +2254,7 @@ class rv_continuous(rv_generic):
return inf return inf
x = arr((x-loc) / scale) x = arr((x-loc) / scale)
cond0 = (x <= self.a) | (self.b <= x) cond0 = (x <= self.a) | (self.b <= x)
newCall = False newCall = True
if newCall: if newCall:
goodargs = argsreduce(1 - cond0, *((x,))) goodargs = argsreduce(1 - cond0, *((x,)))
goodargs = tuple(goodargs + list(args)) goodargs = tuple(goodargs + list(args))

Loading…
Cancel
Save