Ongoing work to simplify estimation

master
pbrod 8 years ago
parent 91c0a2fd78
commit 649cd0fba2

@ -219,42 +219,85 @@ def freeze(self, *args, **kwds):
return rv_frozen(self, *args, **kwds)
def link(self, x, logSF, theta, i):
'''
Return theta[i] as function of quantile, survival probability and
theta[j] for j!=i.
# def link(self, x, logSF, theta, i):
# '''
# Return theta[i] as function of quantile, survival probability and
# theta[j] for j!=i.
#
# Parameters
# ----------
# x : quantile
# logSF : logarithm of the survival probability
# theta : list
# all distribution parameters including location and scale.
#
# Returns
# -------
# theta[i] : real scalar
# fixed distribution parameter theta[i] as function of x, logSF and
# theta[j] where j != i.
#
# LINK is a function connecting the fixed distribution parameter theta[i]
# with the quantile (x) and the survival probability (SF) and the
# remaining free distribution parameters theta[j] for j!=i, i.e.:
# theta[i] = link(x, logSF, theta, i),
# where logSF = log(Prob(X>x; theta)).
#
# See also
# estimation.Profile
# '''
# return self._link(x, logSF, theta, i)
#
#
# def _link(self, x, logSF, theta, i):
# msg = 'Link function not implemented for the {} distribution'
# raise NotImplementedError(msg.format(self.name))
def _resolve_ties(self, log_dprb, x, args, scale):
dx = np.diff(x, axis=0)
tie = dx == 0
if any(tie): # TODO : implement this method for treating ties in data:
# Assume measuring error is delta. Then compute
# yL = F(xi-delta,theta)
# yU = F(xi+delta,theta)
# and replace
# logDj = log((yU-yL)/(r-1)) for j = i+1,i+2,...i+r-1
# The following is OK when only minimization of T is wanted
i_tie, = np.nonzero(tie)
log_dprb[i_tie + 1] = log(self._pdf(x[i_tie], *args)) - log(scale)
return log_dprb
Parameters
----------
x : quantile
logSF : logarithm of the survival probability
theta : list
all distribution parameters including location and scale.
Returns
-------
theta[i] : real scalar
fixed distribution parameter theta[i] as function of x, logSF and
theta[j] where j != i.
LINK is a function connecting the fixed distribution parameter theta[i]
with the quantile (x) and the survival probability (SF) and the
remaining free distribution parameters theta[j] for j!=i, i.e.:
theta[i] = link(x, logSF, theta, i),
where logSF = log(Prob(X>x; theta)).
See also
estimation.Profile
'''
return self._link(x, logSF, theta, i)
def _log_dprb(self, x, args, scale, lowertail=True):
if lowertail:
prb = np.hstack((0.0, self.cdf(x, *args), 1.0))
dprb = np.diff(prb)
else:
prb = np.hstack((1.0, self.sf(x, *args), 0.0))
dprb = -np.diff(prb)
log_dprb = log(dprb + _XMIN)
log_dprb = _resolve_ties(self, log_dprb, x, args, scale)
return log_dprb
def _link(self, x, logSF, theta, i):
msg = 'Link function not implemented for the {} distribution'
raise NotImplementedError(msg.format(self.name))
def _nlogps_and_penalty(self, x, scale, args):
cond0 = ~self._support_mask(x)
n_bad = np.sum(cond0)
if n_bad > 0:
x = argsreduce(~cond0, x)[0]
log_dprb = _log_dprb(x, args, scale)
finite_log_dprb = np.isfinite(log_dprb)
n_bad += np.sum(~finite_log_dprb, axis=0)
if n_bad > 0:
penalty = 100.0 * np.log(_XMAX) * n_bad
return -np.sum(log_dprb[finite_log_dprb], axis=0) + penalty
return -np.sum(log_dprb, axis=0)
def nlogps(self, theta, x):
def _penalized_nlogps(self, theta, x):
""" Moran's negative log Product Spacings statistic
where theta are the parameters (including loc and scale)
@ -279,53 +322,35 @@ def nlogps(self, theta, x):
product of spacings.",
IMS Lecture Notes Monograph Series 2006, Vol. 52, pp. 272-283
"""
loc, scale, args = _unpack_loc_scale(theta)
if not self._argcheck(*args) or scale <= 0:
return inf
x = asarray((x - loc) / scale)
return _nlogps_and_penalty(self, x, scale, args)
def _unpack_loc_scale(theta):
try:
loc = theta[-2]
scale = theta[-1]
args = tuple(theta[:-2])
except IndexError:
raise ValueError("Not enough input arguments.")
if not self._argcheck(*args) or scale <= 0:
return inf
x = asarray((x - loc) / scale)
cond0 = (x < self.a) | (self.b < x)
Nbad = np.sum(cond0)
if Nbad > 0:
x = argsreduce(~cond0, x)[0]
return loc, scale, args
lowertail = True
if lowertail:
prb = np.hstack((0.0, self.cdf(x, *args), 1.0))
dprb = np.diff(prb)
else:
prb = np.hstack((1.0, self.sf(x, *args), 0.0))
dprb = -np.diff(prb)
logD = log(dprb + _XMIN)
dx = np.diff(x, axis=0)
tie = (dx == 0)
if any(tie):
# TODO : implement this method for treating ties in data:
# Assume measuring error is delta. Then compute
# yL = F(xi-delta,theta)
# yU = F(xi+delta,theta)
# and replace
# logDj = log((yU-yL)/(r-1)) for j = i+1,i+2,...i+r-1
# The following is OK when only minimization of T is wanted
i_tie, = np.nonzero(tie)
tiedata = x[i_tie]
logD[i_tie + 1] = log(self._pdf(tiedata, *args)) - log(scale)
finiteD = np.isfinite(logD)
nonfiniteD = 1 - finiteD
Nbad += np.sum(nonfiniteD, axis=0)
if Nbad > 0:
T = -np.sum(logD[finiteD], axis=0) + 100.0 * np.log(_XMAX) * Nbad
else:
T = -np.sum(logD, axis=0)
return T
def _nnlf_and_penalty(self, x, args):
cond0 = ~self._support_mask(x)
n_bad = sum(cond0)
if n_bad > 0:
x = argsreduce(~cond0, x)[0]
logpdf = self._logpdf(x, *args)
finite_logpdf = np.isfinite(logpdf)
n_bad += np.sum(~finite_logpdf, axis=0)
if n_bad > 0:
penalty = n_bad * log(_XMAX) * 100
return -np.sum(logpdf[finite_logpdf], axis=0) + penalty
return -np.sum(logpdf, axis=0)
def _penalized_nnlf(self, theta, x, penalty=None):
@ -333,29 +358,12 @@ def _penalized_nnlf(self, theta, x, penalty=None):
i.e., - sum (log pdf(x, theta), axis=0)
where theta are the parameters (including loc and scale)
'''
try:
loc = theta[-2]
scale = theta[-1]
args = tuple(theta[:-2])
except IndexError:
raise ValueError("Not enough input arguments.")
loc, scale, args = _unpack_loc_scale(theta)
if not self._argcheck(*args) or scale <= 0:
return inf
x = asarray((x-loc) / scale)
N = len(x)
cond0 = (x < self.a) | (self.b < x)
Nbad = sum(cond0)
if Nbad > 0:
x = argsreduce(~cond0, x)[0]
logpdf = self._logpdf(x, *args)
finite_logpdf = np.isfinite(logpdf)
Nbad += np.sum(~finite_logpdf, axis=0)
logscale = N * log(scale)
if Nbad > 0:
if penalty is None:
penalty = Nbad * log(_XMAX) * 100
return -np.sum(logpdf[finite_logpdf], axis=0) + penalty + logscale
return -np.sum(logpdf, axis=0) + logscale
n_log_scale = len(x) * log(scale)
return _nnlf_and_penalty(self, x, args) + n_log_scale
def _reduce_func(self, args, options):
@ -387,7 +395,7 @@ def _reduce_func(self, args, options):
x0.append(args[n])
method = kwds.pop('method', 'ml').lower()
if method.startswith('mps'):
fitfun = self.nlogps
fitfun = self._penalized_nlogps
else:
fitfun = self._penalized_nnlf
@ -609,9 +617,9 @@ def _open_support_mask(self, x):
rv_generic.freeze = freeze
rv_discrete.freeze = freeze
rv_continuous.freeze = freeze
rv_continuous.link = link
rv_continuous._link = _link
rv_continuous.nlogps = nlogps
#rv_continuous.link = link
#rv_continuous._link = _link
rv_continuous._penalized_nlogps = _penalized_nlogps
rv_continuous._penalized_nnlf = _penalized_nnlf
rv_continuous._reduce_func = _reduce_func
rv_continuous.fit = fit

@ -1119,43 +1119,8 @@ class FitDistribution(rv_frozen):
return -H
def _nnlf(self, theta, x):
return self.dist._penalized_nnlf(theta, x, self._penalty)
return self.dist._penalized_nnlf(theta, x)
def _log_dprb(self, x, args, scale, dist):
lowertail = True
if lowertail:
prb = np.hstack((0.0, dist.cdf(x, *args), 1.0))
dprb = np.diff(prb)
else:
prb = np.hstack((1.0, dist.sf(x, *args), 0.0))
dprb = -np.diff(prb)
log_dprb = log(dprb + _XMIN)
dx = np.diff(x, axis=0)
tie = dx == 0
if np.any(tie):
# TODO : implement this method for treating ties in data:
# Assume measuring error is delta. Then compute
# yL = F(xi - delta, theta)
# yU = F(xi + delta, theta)
# and replace
# logDj = log((yU-yL)/(r-1)) for j = i+1,i+2,...i+r-1
# The following is OK when only minimization of T is wanted
i_tie, = np.nonzero(tie)
tiedata = x[i_tie]
log_dprb[i_tie + 1] = log(dist._pdf(tiedata, *args)) - log(scale)
return log_dprb
def _compute_penalty(self, Nbad, finiteD):
nonfiniteD = 1 - finiteD
Nbad += np.sum(nonfiniteD, axis=0)
if Nbad > 0:
if self._penalty is None:
penalty = 100.0 * log(_XMAX) * Nbad
else:
penalty = 0.0
return penalty
def _nlogps(self, theta, x):
""" Moran's negative log Product Spacings statistic
@ -1182,33 +1147,7 @@ class FitDistribution(rv_frozen):
product of spacings.",
IMS Lecture Notes Monograph Series 2006, Vol. 52, pp. 272-283
"""
def parse_theta(theta, dist):
n = 2 if isinstance(dist, rv_continuous) else 1
try:
loc = theta[-n]
scale = theta[-1]
args = tuple(theta[:-n])
except IndexError:
raise ValueError("Not enough input arguments.")
if not isinstance(dist, rv_continuous):
scale = 1
return args, loc, scale
dist = self.dist
args, loc, scale = parse_theta(theta, dist)
if not dist._argcheck(*args) or scale <= 0:
return np.inf
x = asarray((x - loc) / scale)
cond0 = (x < dist.a) | (dist.b < x)
Nbad = np.sum(cond0)
if Nbad > 0:
x = argsreduce(~cond0, x)[0]
log_dprb = self._log_dprb(x, args, scale, dist)
finiteD = np.isfinite(log_dprb)
penalty = self._compute_penalty(Nbad, finiteD)
return -np.sum(log_dprb[finiteD], axis=0) + penalty
return self.dist._penalized_nlogps(theta, x)
@staticmethod
def _get_optimizer(kwds):
@ -1246,7 +1185,6 @@ class FitDistribution(rv_frozen):
warnings.warn("Did not converge")
def _fit(self, *args, **kwds):
self._penalty = None
data = self.data
args = self._fit_start(data, args, kwds)
@ -1276,7 +1214,7 @@ class FitDistribution(rv_frozen):
'''
numpar = self.dist.numargs + 2
par_cov = zeros((numpar, numpar))
# self._penalty = 0
somefixed = ((self.par_fix is not None) and
np.any(isfinite(self.par_fix)))

Loading…
Cancel
Save