From 91c0a2fd78ae913bcca9a0ac7758e5a2dc029355 Mon Sep 17 00:00:00 2001 From: pbrod Date: Fri, 30 Dec 2016 10:54:00 +0100 Subject: [PATCH] Simplified _nlogps --- wafo/stats/estimation.py | 99 ++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 44 deletions(-) diff --git a/wafo/stats/estimation.py b/wafo/stats/estimation.py index dde85ea..1f592cc 100644 --- a/wafo/stats/estimation.py +++ b/wafo/stats/estimation.py @@ -1121,6 +1121,42 @@ class FitDistribution(rv_frozen): def _nnlf(self, theta, x): return self.dist._penalized_nnlf(theta, x, self._penalty) + def _log_dprb(self, x, args, scale, dist): + lowertail = True + if lowertail: + prb = np.hstack((0.0, dist.cdf(x, *args), 1.0)) + dprb = np.diff(prb) + else: + prb = np.hstack((1.0, dist.sf(x, *args), 0.0)) + dprb = -np.diff(prb) + + log_dprb = log(dprb + _XMIN) + + dx = np.diff(x, axis=0) + tie = dx == 0 + if np.any(tie): + # TODO : implement this method for treating ties in data: + # Assume measuring error is delta. Then compute + # yL = F(xi - delta, theta) + # yU = F(xi + delta, theta) + # and replace + # logDj = log((yU-yL)/(r-1)) for j = i+1,i+2,...i+r-1 + # The following is OK when only minimization of T is wanted + i_tie, = np.nonzero(tie) + tiedata = x[i_tie] + log_dprb[i_tie + 1] = log(dist._pdf(tiedata, *args)) - log(scale) + return log_dprb + + def _compute_penalty(self, Nbad, finiteD): + nonfiniteD = 1 - finiteD + Nbad += np.sum(nonfiniteD, axis=0) + if Nbad > 0: + if self._penalty is None: + penalty = 100.0 * log(_XMAX) * Nbad + else: + penalty = 0.0 + return penalty + def _nlogps(self, theta, x): """ Moran's negative log Product Spacings statistic @@ -1146,58 +1182,33 @@ class FitDistribution(rv_frozen): product of spacings.", IMS Lecture Notes Monograph Series 2006, Vol. 52, pp. 272-283 """ - n = 2 if isinstance(self.dist, rv_continuous) else 1 - try: - loc = theta[-n] - scale = theta[-1] - args = tuple(theta[:-n]) - except IndexError: - raise ValueError("Not enough input arguments.") - if not isinstance(self.dist, rv_continuous): - scale = 1 - if not self.dist._argcheck(*args) or scale <= 0: - return np.inf + def parse_theta(theta, dist): + n = 2 if isinstance(dist, rv_continuous) else 1 + try: + loc = theta[-n] + scale = theta[-1] + args = tuple(theta[:-n]) + except IndexError: + raise ValueError("Not enough input arguments.") + if not isinstance(dist, rv_continuous): + scale = 1 + return args, loc, scale + dist = self.dist + args, loc, scale = parse_theta(theta, dist) + if not dist._argcheck(*args) or scale <= 0: + return np.inf x = asarray((x - loc) / scale) cond0 = (x < dist.a) | (dist.b < x) Nbad = np.sum(cond0) if Nbad > 0: x = argsreduce(~cond0, x)[0] - lowertail = True - if lowertail: - prb = np.hstack((0.0, dist.cdf(x, *args), 1.0)) - dprb = np.diff(prb) - else: - prb = np.hstack((1.0, dist.sf(x, *args), 0.0)) - dprb = -np.diff(prb) + log_dprb = self._log_dprb(x, args, scale, dist) - logD = log(dprb + _XMIN) - dx = np.diff(x, axis=0) - tie = (dx == 0) - if np.any(tie): - # TODO : implement this method for treating ties in data: - # Assume measuring error is delta. Then compute - # yL = F(xi - delta, theta) - # yU = F(xi + delta, theta) - # and replace - # logDj = log((yU-yL)/(r-1)) for j = i+1,i+2,...i+r-1 - - # The following is OK when only minimization of T is wanted - i_tie, = np.nonzero(tie) - tiedata = x[i_tie] - logD[i_tie + 1] = log(dist._pdf(tiedata, *args)) - log(scale) - - finiteD = np.isfinite(logD) - nonfiniteD = 1 - finiteD - Nbad += np.sum(nonfiniteD, axis=0) - if Nbad > 0: - if self._penalty is None: - penalty = 100.0 * log(_XMAX) * Nbad - else: - penalty = 0.0 - return -np.sum(logD[finiteD], axis=0) + penalty - return -np.sum(logD, axis=0) + finiteD = np.isfinite(log_dprb) + penalty = self._compute_penalty(Nbad, finiteD) + return -np.sum(log_dprb[finiteD], axis=0) + penalty @staticmethod def _get_optimizer(kwds):