Updated distributions.py so it is in accordance with scipy.stats.distributions.py

master
Per.Andreas.Brodtkorb 15 years ago
parent e77fc13a66
commit 24360c33f8

File diff suppressed because it is too large Load Diff

@ -55,7 +55,7 @@ def valarray(shape, value=nan, typecode=None):
if typecode is not None: if typecode is not None:
out = out.astype(typecode) out = out.astype(typecode)
if not isinstance(out, ndarray): if not isinstance(out, ndarray):
out = asarray(out) out = arr(out)
return out return out
# Frozen RV class # Frozen RV class
@ -99,7 +99,6 @@ class rv_frozen(object):
args, loc0 = dist.fix_loc(args, loc0) args, loc0 = dist.fix_loc(args, loc0)
self.par = args + (loc0,) self.par = args + (loc0,)
def pdf(self, x): def pdf(self, x):
''' Probability density function at x of the given RV.''' ''' Probability density function at x of the given RV.'''
return self.dist.pdf(x, *self.par) return self.dist.pdf(x, *self.par)
@ -123,6 +122,14 @@ class rv_frozen(object):
''' Some statistics of the given RV''' ''' Some statistics of the given RV'''
kwds = dict(moments=moments) kwds = dict(moments=moments)
return self.dist.stats(*self.par, **kwds) return self.dist.stats(*self.par, **kwds)
def median(self):
return self.dist.median(*self.par, **self.kwds)
def mean(self):
return self.dist.mean(*self.par,**self.kwds)
def var(self):
return self.dist.var(*self.par, **self.kwds)
def std(self):
return self.dist.std(*self.par, **self.kwds)
def moment(self, n): def moment(self, n):
par1 = self.par[:self.dist.numargs] par1 = self.par[:self.dist.numargs]
return self.dist.moment(n, *par1) return self.dist.moment(n, *par1)
@ -131,6 +138,8 @@ class rv_frozen(object):
def pmf(self, k): def pmf(self, k):
'''Probability mass function at k of the given RV''' '''Probability mass function at k of the given RV'''
return self.dist.pmf(k, *self.par) return self.dist.pmf(k, *self.par)
def interval(self,alpha):
return self.dist.interval(alpha, *self.par, **self.kwds)
@ -525,8 +534,8 @@ class FitDistribution(rv_frozen):
self.par_cov = zeros((np, np)) self.par_cov = zeros((np, np))
self.LLmax = -dist.nnlf(self.par, self.data) self.LLmax = -dist.nnlf(self.par, self.data)
self.LPSmax = -dist.nlogps(self.par, self.data) self.LPSmax = -dist.nlogps(self.par, self.data)
self.pvalue = dist.pvalue(self.par, self.data, unknown_numpar=numpar) self.pvalue = self._pvalue(self.par, self.data, unknown_numpar=numpar)
H = numpy.asmatrix(dist.hessian_nnlf(self.par, self.data)) H = numpy.asmatrix(self._hessian_nnlf(self.par, self.data))
self.H = H self.H = H
try: try:
if allfixed: if allfixed:
@ -772,7 +781,7 @@ class FitDistribution(rv_frozen):
def pvalue(self, theta, x, unknown_numpar=None): def _pvalue(self, theta, x, unknown_numpar=None):
''' Return the P-value for the fit using Moran's negative log Product Spacings statistic ''' Return the P-value for the fit using Moran's negative log Product Spacings statistic
where theta are the parameters (including loc and scale) where theta are the parameters (including loc and scale)
@ -784,7 +793,7 @@ class FitDistribution(rv_frozen):
if any(tie): if any(tie):
print('P-value is on the conservative side (i.e. too large) due to ties in the data!') print('P-value is on the conservative side (i.e. too large) due to ties in the data!')
T = self.nlogps(theta, x) T = self.dist.nlogps(theta, x)
n = len(x) n = len(x)
np1 = n + 1 np1 = n + 1
@ -802,115 +811,8 @@ class FitDistribution(rv_frozen):
pvalue = chi2sf(Tn, n) #_WAFODIST.chi2.sf(Tn, n) pvalue = chi2sf(Tn, n) #_WAFODIST.chi2.sf(Tn, n)
return pvalue return pvalue
def nlogps(self, theta, x):
""" Moran's negative log Product Spacings statistic
where theta are the parameters (including loc and scale)
Note the data in x must be sorted
References
-----------
R. C. H. Cheng; N. A. K. Amin (1983)
"Estimating Parameters in Continuous Univariate Distributions with a
Shifted Origin.",
Journal of the Royal Statistical Society. Series B (Methodological),
Vol. 45, No. 3. (1983), pp. 394-403.
R. C. H. Cheng; M. A. Stephens (1989)
"A Goodness-Of-Fit Test Using Moran's Statistic with Estimated
Parameters", Biometrika, 76, 2, pp 385-392
Wong, T.S.T. and Li, W.K. (2006)
"A note on the estimation of extreme value distributions using maximum
product of spacings.",
IMS Lecture Notes Monograph Series 2006, Vol. 52, pp. 272-283
"""
try:
loc = theta[-2]
scale = theta[-1]
args = tuple(theta[:-2])
except IndexError:
raise ValueError, "Not enough input arguments."
if not self.dist._argcheck(*args) or scale <= 0:
return inf
x = arr((x - loc) / scale)
cond0 = (x <= self.dist.a) | (x >= self.dist.b)
if (any(cond0)):
return inf
else:
#linfo = numpy.finfo(float)
realmax = floatinfo.machar.xmax
lowertail = True def _hessian_nnlf(self, theta, data, eps=None):
if lowertail:
prb = numpy.hstack((0.0, self.dist.cdf(x, *args), 1.0))
dprb = numpy.diff(prb)
else:
prb = numpy.hstack((1.0, self.dist.sf(x, *args), 0.0))
dprb = -numpy.diff(prb)
logD = log(dprb)
dx = numpy.diff(x, axis=0)
tie = (dx == 0)
if any(tie):
# TODO % implement this method for treating ties in data:
# Assume measuring error is delta. Then compute
# yL = F(xi-delta,theta)
# yU = F(xi+delta,theta)
# and replace
# logDj = log((yU-yL)/(r-1)) for j = i+1,i+2,...i+r-1
# The following is OK when only minimization of T is wanted
i_tie = nonzero(tie)
tiedata = x[i_tie]
logD[(i_tie[0] + 1,)] = log(self.dist._pdf(tiedata, *args)) + log(scale)
finiteD = numpy.isfinite(logD)
nonfiniteD = 1 - finiteD
if any(nonfiniteD):
T = -sum(logD[finiteD], axis=0) + 100.0 * log(realmax) * sum(nonfiniteD, axis=0);
else:
T = -sum(logD, axis=0) #%Moran's negative log product spacing statistic
return T
def _nnlf(self, x, *args):
return - sum(log(self._pdf(x, *args)), axis=0)
def nnlf(self, theta, x):
''' Return negative loglikelihood function, i.e., - sum (log pdf(x, theta),axis=0)
where theta are the parameters (including loc and scale)
'''
try:
loc = theta[-2]
scale = theta[-1]
args = tuple(theta[:-2])
except IndexError:
raise ValueError, "Not enough input arguments."
if not self._argcheck(*args) or scale <= 0:
return inf
x = arr((x - loc) / scale)
cond0 = (x <= self.a) | (self.b <= x)
# newCall = False
# if newCall:
# goodargs = argsreduce(1-cond0, *((x,)))
# goodargs = tuple(goodargs + list(args))
# N = len(x)
# Nbad = sum(cond0)
# xmin = floatinfo.machar.xmin
# return self._nnlf(*goodargs) + N*log(scale) + Nbad*100.0*log(xmin)
# el
if (any(cond0)):
return inf
else:
N = len(x)
return self._nnlf(x, *args) + N * log(scale)
def hessian_nnlf(self, theta, data, eps=None):
''' approximate hessian of nnlf where theta are the parameters (including loc and scale) ''' approximate hessian of nnlf where theta are the parameters (including loc and scale)
''' '''
#Nd = len(x) #Nd = len(x)
@ -922,7 +824,7 @@ class FitDistribution(rv_frozen):
if eps == None: if eps == None:
eps = (floatinfo.machar.eps) ** 0.4 eps = (floatinfo.machar.eps) ** 0.4
xmin = floatinfo.machar.xmin #xmin = floatinfo.machar.xmin
#myfun = lambda y: max(y,100.0*log(xmin)) #% trick to avoid log of zero #myfun = lambda y: max(y,100.0*log(xmin)) #% trick to avoid log of zero
delta = (eps + 2.0) - 2.0 delta = (eps + 2.0) - 2.0
delta2 = delta ** 2.0 delta2 = delta ** 2.0
@ -930,36 +832,37 @@ class FitDistribution(rv_frozen):
# % 1/(d^2 L(theta|x)/dtheta^2) # % 1/(d^2 L(theta|x)/dtheta^2)
# % using central differences # % using central differences
LL = self.nnlf(theta, data) dist = self.dist
LL = dist.nnlf(theta, data)
H = zeros((np, np)) #%% Hessian matrix H = zeros((np, np)) #%% Hessian matrix
theta = tuple(theta) theta = tuple(theta)
for ix in xrange(np): for ix in xrange(np):
sparam = list(theta) sparam = list(theta)
sparam[ix] = theta[ix] + delta sparam[ix] = theta[ix] + delta
fp = self.nnlf(sparam, data) fp = dist.nnlf(sparam, data)
#fp = sum(myfun(x)) #fp = sum(myfun(x))
sparam[ix] = theta[ix] - delta sparam[ix] = theta[ix] - delta
fm = self.nnlf(sparam, data) fm = dist.nnlf(sparam, data)
#fm = sum(myfun(x)) #fm = sum(myfun(x))
H[ix, ix] = (fp - 2 * LL + fm) / delta2 H[ix, ix] = (fp - 2 * LL + fm) / delta2
for iy in range(ix + 1, np): for iy in range(ix + 1, np):
sparam[ix] = theta[ix] + delta sparam[ix] = theta[ix] + delta
sparam[iy] = theta[iy] + delta sparam[iy] = theta[iy] + delta
fpp = self.nnlf(sparam, data) fpp = dist.nnlf(sparam, data)
#fpp = sum(myfun(x)) #fpp = sum(myfun(x))
sparam[iy] = theta[iy] - delta sparam[iy] = theta[iy] - delta
fpm = self.nnlf(sparam, data) fpm = dist.nnlf(sparam, data)
#fpm = sum(myfun(x)) #fpm = sum(myfun(x))
sparam[ix] = theta[ix] - delta sparam[ix] = theta[ix] - delta
fmm = self.nnlf(sparam, data) fmm = dist.nnlf(sparam, data)
#fmm = sum(myfun(x)); #fmm = sum(myfun(x));
sparam[iy] = theta[iy] + delta sparam[iy] = theta[iy] + delta
fmp = self.nnlf(sparam, data) fmp = dist.nnlf(sparam, data)
#fmp = sum(myfun(x)) #fmp = sum(myfun(x))
H[ix, iy] = ((fpp + fmm) - (fmp + fpm)) / (4. * delta2) H[ix, iy] = ((fpp + fmm) - (fmp + fpm)) / (4. * delta2)
H[iy, ix] = H[ix, iy] H[iy, ix] = H[ix, iy]

Loading…
Cancel
Save