From 3ef99faa562081000ae3c7dda93d6ee3e538108c Mon Sep 17 00:00:00 2001 From: "per.andreas.brodtkorb" Date: Thu, 3 Nov 2011 07:29:45 +0000 Subject: [PATCH] Small updates --- pywafo/src/wafo/stats/core.py | 394 +++++++++++-------------- pywafo/src/wafo/stats/distributions.py | 5 +- 2 files changed, 175 insertions(+), 224 deletions(-) diff --git a/pywafo/src/wafo/stats/core.py b/pywafo/src/wafo/stats/core.py index e78b9ef..c80b980 100644 --- a/pywafo/src/wafo/stats/core.py +++ b/pywafo/src/wafo/stats/core.py @@ -31,13 +31,20 @@ def valarray(shape, value=nan, typecode=None): if not isinstance(out, ndarray): out = arr(out) return out - +def _cdff(self, x, dfn, dfd): + return special.fdtr(dfn, dfd, x) +def _cdft(x,df): + return special.stdtr(df, x) def _invt(q, df): return special.stdtrit(df, q) - +def _cdfchi2(x, df): + return special.chdtr(df, x) def _invchi2(q, df): return special.chdtri(df, q) - +def _cdfnorm(x): + return special.ndtr(x) +def _invnorm(q): + return special.ndtri(q) def edf(x, method=2): ''' @@ -653,6 +660,11 @@ def extremal_idx(ti): ei = min(1, 2*np.mean(t-1)**2/np.mean((t-1)*(t-2))) return ei +def _logit(p): + return np.log(p)-np.log1p(-p) +def _logitinv(x): + return 1.0/(np.exp(-x)+1) + class RegLogit(object): ''' REGLOGIT Fit ordinal logistic regression model. @@ -797,23 +809,17 @@ class RegLogit(object): self.converged = None self.note = '' self.date = now() - - - def _logit(self, p): - return np.log(p)-np.log1p(-p) - def _logitinv(self, x): - return 1.0/(np.exp(-x)+1) def check_xy(self, y, X): y = np.round(np.atleast_2d(y)) - my, ny = y.shape + my = y.shape[0] if X is None: X = np.zeros((my, 0)) elif self.deletecolinear: X = np.atleast_2d(X) # Make sure X is full rank s = np.linalg.svd(X)[1] - tol = max(size(X)) * np.finfo(s.max()).eps + tol = max(X.shape) * np.finfo(s.max()).eps ix = np.flatnonzero(s>tol) iy = np.flatnonzero(s<=tol) if len(ix): @@ -821,7 +827,7 @@ class RegLogit(object): txt = [' %d,' % i for i in iy] txt[-1] = ' %d' % iy[-1] warnings.warn('Covariate matrix is singular. Removing column(s):%s',txt) - [mx, nx] = X.shape + mx = X.shape[0] if (mx != my): raise ValueError('x and y must have the same number of observations'); return y, X @@ -879,12 +885,12 @@ class RegLogit(object): tb = np.vstack((theta0, beta0)) # likelihood and derivatives at starting values - [dev,p,g, g1] = logistic_regression_likelihood (y, X, tb, z, z1); - [dl, d2l] = logistic_regression_derivatives (X, z, z1, g, g1, p); - epsilon = std (d2l) / 1000; - if np.any(beta) or np.any(theta!=theta0): + [dev, dl, d2l] = self.loglike(tb, y, X, z, z1) + + epsilon = np.std(d2l) / 1000; + if np.any(beta0) or np.any(theta00!=theta0): tb0 = np.vstack((theta00,beta00)) - nulldev = logistic_regression_likelihood (y, X, tb0, z, z1); + nulldev = self.loglike (tb0, y, X, z, z1)[0] else: nulldev = dev @@ -897,7 +903,7 @@ class RegLogit(object): tbold = tb; devold = dev; tb = tbold - np.linalg.lstsq(d2l, dl) - [dev,p,g, g1] = logistic_regression_likelihood (y, X, tb, z, z1); + [dev,p,dl,d2l] = self.loglike(tb, y, X, z, z1) if ((dev - devold) / (dl.T * (tb - tbold)) < 0): epsilon = epsilon / decr else: @@ -907,22 +913,21 @@ class RegLogit(object): raise ValueError('epsilon too large'); tb = tbold - np.linalg.lstsq(d2l - epsilon * np.eye(d2l.shape), dl); - [dev,p,g, g1] = logistic_regression_likelihood (y, X, tb, z, z1); + [dev,p,dl,d2l] = self.loglike(tb, y, X, z, z1); print('epsilon %g' % epsilon) #end %while #end else - [dl, d2l] = logistic_regression_derivatives (X, z, z1, g, g1, p); + #[dl, d2l] = logistic_regression_derivatives (X, z, z1, g, g1, p); if (self.verbose>1): print('Iter: %d, Deviance: %8.6f',iter,dev) - print('First derivative'); print(dl.T); print('Eigenvalues of second derivative'); print(np.linalg.eig(d2l)[0].T); #end #end - stop = abs (dl.T * np.linalg.lstq(d2l, dl) / length (dl)) <= tol or iter>self.maxiter + stop = abs (dl.T * np.linalg.lstsq(d2l, dl) / len(dl)) <= tol or iter>self.maxiter #end %while #% tidy up output @@ -933,13 +938,11 @@ class RegLogit(object): se = sqrt(np.diag (pcov)) - - if (nx > 0): eta = ((X * beta) * ones (1, nz)) + ((y * 0 + 1) * theta.T); else: eta = (y * 0 + 1) * theta.T; - #end + #end gammai = np.diff(np.hstack(((y * 0), self.logitinv(eta), (y * 0 + 1))),1,2) k0 = min(y) mu = (k0-1)+np.dot(gammai,np.arange(1,nz+2).T); #% E(Y|X) @@ -947,7 +950,7 @@ class RegLogit(object): R2 = r[0,1]**2; #coefficient of determination R2adj = max(1 - (1-R2)* (my-1)/(my-nx-nz-1),0); # adjusted coefficient of determination - res = y-mu; + res = y-mu if nz==1: self.family = 'binomial'; @@ -967,11 +970,11 @@ class RegLogit(object): self.params_cov = pcov self.params_tstat = (self.params/self.params_std); if False: # % options.estdispersn %dispersion_parameter=='mean_deviance' - self.params_pvalue=2.*cdft(-abs(self.params_tstat),self.df); - bcrit = -se.T*invt(self.alpha/2,self.df); + self.params_pvalue=2.*_cdft(-abs(self.params_tstat),self.df); + bcrit = -se.T*_invt(self.alpha/2,self.df); else: - self.params_pvalue=2.*cdfnorm(-abs(self.params_tstat)); - bcrit = -se.T*invnorm(self.alpha/2); + self.params_pvalue=2.*_cdfnorm(-abs(self.params_tstat)); + bcrit = -se.T*_invnorm(self.alpha/2); #end self.params_ci = np.vstack((self.params+bcrit,self.params-bcrit)) @@ -983,7 +986,7 @@ class RegLogit(object): self.beta = beta; self.gamma = gammai; self.residual = res.T; - self.residualD = sign(self.residual)*sqrt(-2*log(p)).T; + self.residualD = np.sign(self.residual)*sqrt(-2*np.log(p)).T; self.deviance = dev; self.deviance_null = nulldev; self.d2L = d2l; @@ -1052,11 +1055,11 @@ class RegLogit(object): print(' Analysis of Deviance') if False: # %options.estdispersn localstat = abs(devL-devs)/disprsn/pmq; - localpvalue = 1-cdff(localstat, pmq, dfL) - print('Model DF Residual deviance F-stat Pr(>F)') +# localpvalue = 1-cdff(localstat, pmq, dfL) +# print('Model DF Residual deviance F-stat Pr(>F)') else: localstat = abs(devL-devs)/disprsn; - localpvalue = 1-cdfchi2(localstat,pmq) + localpvalue = 1-_cdfchi2(localstat,pmq) print('Model DF Residual deviance Chi2-stat Pr(>Chi2)') #end @@ -1072,11 +1075,11 @@ class RegLogit(object): print(' Analysis of Deviance') if False: # %options.estdispersn localstat = abs(self.deviance_null-self.deviance)/self.dispersnfit/(self.numvar-1); - localpvalue = 1-cdff(localstat,self.numvar-1,self.df); + localpvalue = 1-_cdff(localstat,self.numvar-1,self.df); print('Model DF Residual deviance F-stat Pr(>F)') else: localstat = abs(self.deviance_null-self.deviance)/self.dispersnfit; - localpvalue = 1-cdfchi2(localstat,self.numvar-1); + localpvalue = 1-_cdfchi2(localstat,self.numvar-1); print('Model DF Residual deviance Chi2-stat Pr(>Chi2)') #end @@ -1116,7 +1119,7 @@ class RegLogit(object): print('(Dispersion parameter for %s family taken to be %2.2f)' % (self.family,self.dispersionfit)) print(' ') if True: #%options.constant - printf(' Null deviance: %2.4f on %d degrees of freedom' % (self.deviance_null,self.df_null)) + print(' Null deviance: %2.4f on %d degrees of freedom' % (self.deviance_null,self.df_null)) #end print('Residual deviance: %2.4f on %d degrees of freedom' % (self.deviance,self.df)) @@ -1125,194 +1128,141 @@ class RegLogit(object): #end % summary def predict(self, Xnew=None,alpha=0.05, fulloutput=False): -'''LOGIT/PREDICT Predict from a fitted LOGIT object - - CALL [y,ylo,yup] = predict(Xnew,options) - - y = predicted value - ylo,yup = 100(1-alpha)% confidence interval for y - - Xnew = new covariate - options = options struct defining the calculation - .alpha : confidence coefficient (default 0.05) - .size : size if binomial family (default 1). - -''' - -[mx, nx] = self.X.shape -if Xnew is None: - Xnew = self.X; -else: - Xnew = np.atleast_2d(Xnew) - notnans = np.flatnonzero(1-(1-np.isfinite(Xnew)).any(axis=1)) - Xnew = Xnew[notnans,:] - -[n,p] = Xnew.shape - - -if p != nx - raise ValueError('Number of covariates must match the number of regression coefficients') - - -nz = self.numk-1; -one = ones((n,1)) - if (nx > 0): - eta = np.dot(Xnew * self.beta) + self.theta.T; - else: - eta = one * self.theta.T - #end - y = np.diff(np.hstack((zeros((n,1)), self.logitinv(eta), one),1,2); -if fulloutput: - pcov = self.params_cov; - if (nx > 0) - np = size(pcov,1); - - [U S V]= np.linalg.svd(pcov,0); - R=(U*sqrt(S)*V.T); #%squareroot of pcov - ixb = nz+1:np; - - % Var(eta_i) = var(theta_i+Xnew*b) - vareta = zeros(n,nz); - for ix1 = 1:nz - vareta(:,ix1) = max(sum(([one Xnew]*R([ix1,ixb],[ix1,ixb])).^2,2),eps); - end - else - vareta = diag(pcov); - end - crit = -invnorm(alpha/2); - - - ecrit = crit * sqrt(vareta); - mulo = logitinv(eta-ecrit); - muup = logitinv(eta+ecrit); - ylo1 = diff ([zeros(n,1), mulo , one],1,2); - yup1 = diff ([zeros(n,1), muup , one],1,2); - - ylo = min(ylo1,yup1); - yup = max(ylo1,yup1); - - for ix1 = 2:self.numk-1 - yup(:,ix1) = max( [yup(:,ix1),muup(:,ix1)-mulo(:,ix1-1)],[],2); - end - return y,ylo,yup -end - - - - - -def logistic_regression_derivatives (x, z, z1, g, g1, p) -% [dl, d2l] = logistic_regression_derivatives (@var{x}, @var{z}, @var{z1}, @var{g}, @var{g1}, @var{p}) -% Called by logistic_regression. Calculates derivates of the -% log-likelihood for ordinal logistic regression model. - - -% Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2005, 2007 -% Kurt Hornik -% -% -% Reglogit is free software; you can redistribute it and/or modify it -% under the terms of the GNU General Public License as published by -% the Free Software Foundation; either version 3 of the License, or (at -% your option) any later version. -% -% Reglogit is distributed in the hope that it will be useful, but -% WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -% General Public License for more details. -% -% You should have received a copy of the GNU General Public License -% along with Reglogit; see the file COPYING. If not, see -% . - -% Author: Gordon K. Smyth -% Adapted-By: KH -% Description: Derivates of log-likelihood in logistic regression - - - % first derivative - v = g .* (1 - g) ./ p; - v1 = g1 .* (1 - g1) ./ p; - dlogp = [(dmult (v, z) - dmult (v1, z1)), (dmult (v - v1, x))]; - dl = sum (dlogp)'; - - % second derivative - w = v .* (1 - 2 * g); w1 = v1 .* (1 - 2 * g1); - d2l = [z, x]' * dmult (w, [z, x]) - [z1, x]' * dmult (w1, [z1, x]) ... - - dlogp' * dlogp; - -return [dl, d2l] -end %function - -def logistic_regression_likelihood (y, x, beta, z, z1) -% [g, g1, p, dev] = logistic_regression_likelihood (y ,x,beta,z,z1) -% Calculates likelihood for the ordinal logistic regression model. -% Called by logistic_regression. - - - -% Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2005, 2007 -% Kurt Hornik -% -% -% Reglogit is free software; you can redistribute it and/or modify it -% under the terms of the GNU General Public License as published by -% the Free Software Foundation; either version 3 of the License, or (at -% your option) any later version. -% -% Reglogit is distributed in the hope that it will be useful, but -% WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -% General Public License for more details. -% -% You should have received a copy of the GNU General Public License -% along with Reglogit; see the file COPYING. If not, see -% . - -% Author: Gordon K. Smyth -% Adapted-By: KH -% Description: Likelihood in logistic regression - e = exp ([z, x] * beta); - e1 = exp ([z1, x] * beta); - g = e ./ (1 + e); - g1 = e1 ./ (1 + e1); - g = max (y == max (y), g); - g1 = min (y > min(y), g1); - - p = g - g1; - dev = -2 * sum (log (p)); -return dev,p,g, g1 -end %function - -function M = dmult(A,B) -% PURPOSE: computes the product of diag(A) and B -% ----------------------------------------------------- -% USAGE: m = dmult(a,b) -% where: a = a matrix -% b = a matrix -% ----------------------------------------------------- -% RETURNS: m = diag(A) times B -% ----------------------------------------------------- -% NOTE: a Gauss compatability function -% ----------------------------------------------------- - -% written by: -% Gordon K Smyth, U of Queensland, Australia, gks@maths.uq.oz.au -% Nov 19, 1990. Last revision Aug 29, 1995. - -% documentation modifications made by: -% James P. LeSage, Dept of Economics -% University of Toledo -% 2801 W. Bancroft St, -% Toledo, OH 43606 -% jpl@jpl.econ.utoledo.edu - + '''LOGIT/PREDICT Predict from a fitted LOGIT object + + CALL [y,ylo,yup] = predict(Xnew,options) + + y = predicted value + ylo,yup = 100(1-alpha)% confidence interval for y + + Xnew = new covariate + options = options struct defining the calculation + .alpha : confidence coefficient (default 0.05) + .size : size if binomial family (default 1). + + ''' + + [mx, nx] = self.X.shape + if Xnew is None: + Xnew = self.X; + else: + Xnew = np.atleast_2d(Xnew) + notnans = np.flatnonzero(1-(1-np.isfinite(Xnew)).any(axis=1)) + Xnew = Xnew[notnans,:] + + [n,p] = Xnew.shape + + + if p != nx: + raise ValueError('Number of covariates must match the number of regression coefficients') + + + nz = self.numk-1; + one = ones((n,1)) + if (nx > 0): + eta = np.dot(Xnew * self.beta) + self.theta.T; + else: + eta = one * self.theta.T + #end + y = np.diff(np.hstack((zeros((n,1)), self.logitinv(eta), one)),1,2) + if fulloutput: + eps = np.finfo(float).eps + pcov = self.params_cov; + if (nx > 0): + np1 = pcov.shape[0] + + [U, S, V]= np.linalg.svd(pcov,0); + R = np.dot(U,np.dot(sqrt(S),V.T)); #%squareroot of pcov + ib = np.r_[0,nz:np1] + + #% Var(eta_i) = var(theta_i+Xnew*b) + vareta = zeros((n,nz)); + u = np.hstack((one,Xnew)) + for i in range(nz): + ib[0] = i + vareta[:,i] = np.maximum(sum((np.dot(u,R[ib,ib]))**2,axis=1),eps) + #end + else: + vareta = np.diag(pcov) + #end + crit = -_invnorm(alpha/2); + + + ecrit = crit * sqrt(vareta); + mulo = _logitinv(eta-ecrit); + muup = _logitinv(eta+ecrit); + ylo1 = np.diff(np.hstack((zeros((n,1)), mulo , one)),1,2); + yup1 = np.diff(np.hstack((zeros((n,1)), muup , one)),1,2); + + ylo = np.minimum(ylo1,yup1); + yup = np.maximum(ylo1,yup1); + + for i in range(1, nz): #= 2:self.numk-1 + yup[:,i] = np.hstack((yup[:,i],muup[:,i]-mulo[:,i-1])).max(axis=1) + #end + return y,ylo,yup + return y -[mb,nb] = size(B); -M=A(:,ones(1,nb)).*B; -end + def loglike(self, beta, y, x, z, z1): + ''' + [dev, p g, g1] = loglike( y ,x,beta,z,z1) + Calculates likelihood for the ordinal logistic regression model. + ''' + # Author: Gordon K. Smyth + zx = np.hstack((z,x)) + z1x = np.hstack((z1,x)) + g = _logitinv(np.dot(zx,beta)) + g1 = _logitinv(np.dot(z1x,beta)) + g = np.maximum(y == y.max(), g) + g1 = np.minimum(y > y.min(), g1) + + p = g - g1 + dev = -2 * sum (np.log(p)); + #return dev, p, g, g1 +##end %function +# +# +# def logistic_regression_derivatives(self, x, z, z1, g, g1, p): + '''% [dl, d2l] = logistic_regression_derivatives(x, z, z1, g, g1, p) + % Called by logistic_regression. Calculates derivates of the + % log-likelihood for ordinal logistic regression model. + ''' + # Author: Gordon K. Smyth + # Description: Derivates of log-likelihood in logistic regression + + + # first derivative + v = g * (1 - g) / p; + v1 = g1 * (1 - g1) / p; + dlogp = np.hstack(((dmult(v, z) - dmult(v1, z1)), (dmult(v - v1, x)))) + dl = np.sum(dlogp, axis=0).T + + # second derivative + w = v * (1 - 2 * g) + w1 = v1 * (1 - 2 * g1) + d2l = zx.T * dmult (w, zx) - z1x.T * dmult(w1, z1x) - dlogp.T * dlogp; + + return dev, p, dl, d2l + #end %function +def dmult(A,B): + ''' Return the product of diag(A) and B + + USAGE: m = dmult(a,b) + where: a = a matrix + b = a matrix + ----------------------------------------------------- + RETURNS: m = diag(A) times B + ----------------------------------------------------- + NOTE: a Gauss compatability function + ----------------------------------------------------- + ''' + #% written by: + #% Gordon K Smyth, U of Queensland, Australia, gks@maths.uq.oz.au + #% Nov 19, 1990. Last revision Aug 29, 1995. + + return A[:,None]*B; diff --git a/pywafo/src/wafo/stats/distributions.py b/pywafo/src/wafo/stats/distributions.py index 4d2c400..8e6904a 100644 --- a/pywafo/src/wafo/stats/distributions.py +++ b/pywafo/src/wafo/stats/distributions.py @@ -2237,7 +2237,8 @@ class rv_continuous(rv_generic): return None def _nnlf(self, x, *args): - return -sum(self._logpdf(x, *args),axis=0) + xmin = floatinfo.machar.xmin + return -sum(np.maximum(self._logpdf(x, *args),100*log(xmin)),axis=0) def nnlf(self, theta, x): ''' Return negative loglikelihood function, i.e., - sum (log pdf(x, theta),axis=0) @@ -2253,7 +2254,7 @@ class rv_continuous(rv_generic): return inf x = arr((x-loc) / scale) cond0 = (x <= self.a) | (self.b <= x) - newCall = False + newCall = True if newCall: goodargs = argsreduce(1 - cond0, *((x,))) goodargs = tuple(goodargs + list(args))