pywafo/wafo/stats/_distn_infrastructure.py

from __future__ import absolute_import
from scipy.stats._distn_infrastructure import *  # @UnusedWildImport
from scipy.stats._distn_infrastructure import (_skew,  # @UnusedImport
    _kurtosis,  _ncx2_log_pdf,  # @IgnorePep8 @UnusedImport
    _ncx2_pdf,  _ncx2_cdf)  # @UnusedImport @IgnorePep8
from .estimation import FitDistribution, rv_frozen  # @Reimport
from ._constants import _XMAX, _XMIN
from wafo.misc import lazyselect as _lazyselect  # @UnusedImport
from wafo.misc import lazywhere as _lazywhere  # @UnusedImport

_doc_default_example = """\
Examples
--------
>>> from wafo.stats import %(name)s
>>> import matplotlib.pyplot as plt
>>> fig, ax = plt.subplots(1, 1)

Calculate a few first moments:

%(set_vals_stmt)s
>>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')

Display the probability density function (``pdf``):

>>> x = np.linspace(%(name)s.ppf(0.01, %(shapes)s),
...                 %(name)s.ppf(0.99, %(shapes)s), 100)
>>> ax.plot(x, %(name)s.pdf(x, %(shapes)s),
...        'r-', lw=5, alpha=0.6, label='%(name)s pdf')

Alternatively, the distribution object can be called (as a function)
to fix the shape, location and scale parameters. This returns a "frozen"
RV object holding the given parameters fixed.

Freeze the distribution and display the frozen ``pdf``:

>>> rv = %(name)s(%(shapes)s)
>>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

Check accuracy of ``cdf`` and ``ppf``:

>>> vals = %(name)s.ppf([0.001, 0.5, 0.999], %(shapes)s)
>>> np.allclose([0.001, 0.5, 0.999], %(name)s.cdf(vals, %(shapes)s))
True

Generate random numbers:

>>> r = %(name)s.rvs(%(shapes)s, size=1000)

And compare the histogram:

>>> ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
>>> ax.legend(loc='best', frameon=False)
>>> plt.show()

Compare ML and MPS method
>>> phat = %(name)s.fit2(R, method='ml');>>> phat.plotfitsummary()
>>> plt.figure(plt.gcf().number+1)
>>> phat2 = %(name)s.fit2(R, method='mps')
>>> phat2.plotfitsummary(); plt.figure(plt.gcf().number+1)

Fix loc=0 and estimate shapes and scale
>>> phat3 = %(name)s.fit2(R, scale=1, floc=0, method='mps')
>>> phat3.plotfitsummary(); plt.figure(plt.gcf().number+1)

Accurate confidence interval with profile loglikelihood
>>> lp = phat3.profile()
>>> lp.plot()
>>> pci = lp.get_bounds()

"""


def freeze(self, *args, **kwds):
    """Freeze the distribution for the given arguments.

    Parameters
    ----------
    arg1, arg2, arg3,... : array_like
        The shape parameter(s) for the distribution.  Should include all
        the non-optional arguments, may include ``loc`` and ``scale``.

    Returns
    -------
    rv_frozen : rv_frozen instance
        The frozen distribution.

    """
    return rv_frozen(self, *args, **kwds)


def _resolve_ties(self, log_dprb, x, args, scale):
    dx = np.diff(x, axis=0)
    tie = dx == 0
    if any(tie):  # TODO : implement this method for treating ties in data:
        # Assume measuring error is delta. Then compute
        # yL = F(xi-delta,theta)
        # yU = F(xi+delta,theta)
        # and replace
        # logDj = log((yU-yL)/(r-1)) for j = i+1,i+2,...i+r-1
        # The following is OK when only minimization of T is wanted
        i_tie, = np.nonzero(tie)
        log_dprb[i_tie + 1] = log(self._pdf(x[i_tie], *args)) - log(scale)
    return log_dprb


def _log_dprb(self, x, args, scale, lowertail=True):

    if lowertail:
        prb = np.hstack((0.0, self.cdf(x, *args), 1.0))
        dprb = np.diff(prb)
    else:
        prb = np.hstack((1.0, self.sf(x, *args), 0.0))
        dprb = -np.diff(prb)
    log_dprb = log(dprb + _XMIN)
    log_dprb = _resolve_ties(self, log_dprb, x, args, scale)
    return log_dprb


def _nlogps_and_penalty(self, x, scale, args):
    cond0 = ~self._support_mask(x)
    n_bad = np.sum(cond0)
    if n_bad > 0:
        x = argsreduce(~cond0, x)[0]
    log_dprb = _log_dprb(self, x, args, scale)
    finite_log_dprb = np.isfinite(log_dprb)
    n_bad += np.sum(~finite_log_dprb, axis=0)
    if n_bad > 0:
        penalty = 100.0 * np.log(_XMAX) * n_bad
        return -np.sum(log_dprb[finite_log_dprb], axis=0) + penalty
    return -np.sum(log_dprb, axis=0)


def _penalized_nlogps(self, theta, x):
    """ Moran's negative log Product Spacings statistic

        where theta are the parameters (including loc and scale)

        Note the data in x must be sorted

    References
    -----------

    R. C. H. Cheng; N. A. K. Amin (1983)
    "Estimating Parameters in Continuous Univariate Distributions with a
    Shifted Origin.",
    Journal of the Royal Statistical Society. Series B (Methodological),
    Vol. 45, No. 3. (1983), pp. 394-403.

    R. C. H. Cheng; M. A. Stephens (1989)
    "A Goodness-Of-Fit Test Using Moran's Statistic with Estimated
    Parameters", Biometrika, 76, 2, pp 385-392

    Wong, T.S.T. and Li, W.K. (2006)
    "A note on the estimation of extreme value distributions using maximum
    product of spacings.",
    IMS Lecture Notes Monograph Series 2006, Vol. 52, pp. 272-283
    """
    loc, scale, args = _unpack_loc_scale(theta)
    if not self._argcheck(*args) or scale <= 0:
        return inf
    x = asarray((x - loc) / scale)
    return _nlogps_and_penalty(self, x, scale, args)


def _unpack_loc_scale(theta):
    try:
        loc = theta[-2]
        scale = theta[-1]
        args = tuple(theta[:-2])
    except IndexError:
        raise ValueError("Not enough input arguments.")
    return loc, scale, args


def _nnlf_and_penalty(self, x, args):
    cond0 = ~self._support_mask(x)
    n_bad = sum(cond0)
    if n_bad > 0:
        x = argsreduce(~cond0, x)[0]
    logpdf = self._logpdf(x, *args)
    finite_logpdf = np.isfinite(logpdf)
    n_bad += np.sum(~finite_logpdf, axis=0)
    if n_bad > 0:
        penalty = n_bad * log(_XMAX) * 100
        return -np.sum(logpdf[finite_logpdf], axis=0) + penalty
    return -np.sum(logpdf, axis=0)


def _penalized_nnlf(self, theta, x):
    ''' Return negative loglikelihood function,
    i.e., - sum (log pdf(x, theta), axis=0)
       where theta are the parameters (including loc and scale)
    '''
    loc, scale, args = _unpack_loc_scale(theta)
    if not self._argcheck(*args) or scale <= 0:
        return inf
    x = asarray((x-loc) / scale)
    n_log_scale = len(x) * log(scale)
    return _nnlf_and_penalty(self, x, args) + n_log_scale


def _convert_fshapes2num(self, kwds):
    # First of all, convert fshapes params to fnum: eg for stats.beta,
    # shapes='a, b'. To fix `a`, can specify either `f1` or `fa`.
    # Convert the latter into the former.
    if self.shapes:
        shapes = self.shapes.replace(',', ' ').split()
        for j, s in enumerate(shapes):
            val = kwds.pop('f' + s, None) or kwds.pop('fix_' + s, None)
            if val is not None:
                key = 'f{0:d}'.format(j)
                if key in kwds:
                    raise ValueError("Duplicate entry for {0:s}.".format(key))
                else:
                    kwds[key] = val
    return kwds


def _unpack_args_kwds(self, args, kwds):
    kwds = _convert_fshapes2num(self, kwds)
    args = list(args)
    fixedn = []
    names = ['f%d' % n for n in range(len(args) - 2)] + ['floc', 'fscale']
    x0 = []
    for n, key in enumerate(names):
        if key in kwds:
            fixedn.append(n)
            args[n] = kwds.pop(key)
        else:
            x0.append(args[n])
    return x0, args, fixedn


def _reduce_func(self, args, kwds):
    method = kwds.pop('method', 'ml').lower()
    if method.startswith('mps'):
        fitfun = self._penalized_nlogps
    else:
        fitfun = self._penalized_nnlf

    x0, args, fixedn = _unpack_args_kwds(self, args, kwds)

    nargs = len(args)

    if len(fixedn) == 0:
        func = fitfun
        restore = None
    else:
        if len(fixedn) == nargs:
            raise ValueError(
                "All parameters fixed. There is nothing to optimize.")

        def restore(args, theta):
            # Replace with theta for all numbers not in fixedn
            # This allows the non-fixed values to vary, but
            #  we still call self.nnlf with all parameters.
            i = 0
            for n in range(nargs):
                if n not in fixedn:
                    args[n] = theta[i]
                    i += 1
            return args

        def func(theta, x):
            newtheta = restore(args[:], theta)
            return fitfun(newtheta, x)

    return x0, func, restore, args, fixedn


def _get_optimizer(kwds):
    optimizer = kwds.pop('optimizer', optimize.fmin)
# convert string to function in scipy.optimize
    if not callable(optimizer) and isinstance(optimizer, string_types):
        if not optimizer.startswith('fmin_'):
                optimizer = '_'.join(("fmin", optimizer))
        try:
            optimizer = getattr(optimize, optimizer)
        except AttributeError:
            raise ValueError("{} is not a valid optimizer".format(optimizer))
    return optimizer


def _warn_if_no_success(warnflag):
    if warnflag == 1:
        warnings.warn("The maximum number of iterations was exceeded.")
    elif warnflag == 2:
        warnings.warn("Did not converge")


def _fitstart(self, data, args, kwds):
    narg = len(args)
    if narg > self.numargs:
        raise TypeError("Too many input arguments.")
    start = [None] * 2
    if (narg < self.numargs) or not ('loc' in kwds and 'scale' in kwds):
        # get distribution specific starting locations
        start = self._fitstart(data)
        args += start[narg:-2]
    loc = kwds.pop('loc', start[-2])
    scale = kwds.pop('scale', start[-1])
    args += loc, scale
    return args, kwds


def fit(self, data, *args, **kwargs):
    """
    Return ML/MPS estimate for shape, location, and scale parameters from data.

    ML and MPS stands for Maximum Likelihood and Maximum Product Spacing,
    respectively.  Starting estimates for
    the fit are given by input arguments; for any arguments not provided
    with starting estimates, ``self._fitstart(data)`` is called to generate
    such.

    One can hold some parameters fixed to specific values by passing in
    keyword arguments ``f0``, ``f1``, ..., ``fn`` (for shape parameters)
    and ``floc`` and ``fscale`` (for location and scale parameters,
    respectively).

    Parameters
    ----------
    data : array_like
        Data to use in calculating the MLEs.
    args : floats, optional
        Starting value(s) for any shape-characterizing arguments (those not
        provided will be determined by a call to ``_fitstart(data)``).
        No default value.
    kwds : floats, optional
        Starting values for the location and scale parameters; no default.
        Special keyword arguments are recognized as holding certain
        parameters fixed:

        - f0...fn : hold respective shape parameters fixed.
          Alternatively, shape parameters to fix can be specified by name.
          For example, if ``self.shapes == "a, b"``, ``fa``and ``fix_a``
          are equivalent to ``f0``, and ``fb`` and ``fix_b`` are
          equivalent to ``f1``.

        - floc : hold location parameter fixed to specified value.

        - fscale : hold scale parameter fixed to specified value.

        - method : of estimation. Options are
            'ml' : Maximum Likelihood method (default)
            'mps': Maximum Product Spacing method

        - alpha :  Confidence coefficent  (default=0.05)

        - search : bool
            If true search for best estimator (default),
            otherwise return object with initial distribution parameters

        - optimizer : The optimizer to use.  The optimizer must take ``func``,
            and starting position as the first two arguments,
            plus ``args`` (for extra arguments to pass to the
            function to be optimized) and ``disp=0`` to suppress
            output as keyword arguments.

    Returns
    -------
    shape, loc, scale : tuple of floats
        MLEs for any shape statistics, followed by those for location and
        scale.

    Notes
    -----
    This fit is computed by maximizing a log-likelihood function, with
    penalty applied for samples outside of range of the distribution. The
    returned answer is not guaranteed to be the globally optimal MLE, it
    may only be locally optimal, or the optimization may fail altogether.


    Examples
    --------

    Generate some data to fit: draw random variates from the `beta`
    distribution

    >>> from wafo.stats import beta
    >>> a, b = 1., 2.
    >>> x = beta.rvs(a, b, size=1000)

    Now we can fit all four parameters (``a``, ``b``, ``loc`` and ``scale``):

    >>> a1, b1, loc1, scale1 = beta.fit(x)

    We can also use some prior knowledge about the dataset: let's keep
    ``loc`` and ``scale`` fixed:

    >>> a1, b1, loc1, scale1 = beta.fit(x, floc=0, fscale=1)
    >>> loc1, scale1
    (0, 1)

    We can also keep shape parameters fixed by using ``f``-keywords. To
    keep the zero-th shape parameter ``a`` equal 1, use ``f0=1`` or,
    equivalently, ``fa=1``:

    >>> a1, b1, loc1, scale1 = beta.fit(x, fa=1, floc=0, fscale=1)
    >>> a1
    1

    """
    vals, _ = self._fit(data, *args, **kwargs)
    return vals


def _fit(self, data, *args, **kwargs):
    args, kwds = _fitstart(self, data, args, kwargs.copy())
    x0, func, restore, args, fixedn = self._reduce_func(args, kwds)
    if kwds.pop('search', True):
        optimizer = _get_optimizer(kwds)

        # by now kwds must be empty, since everybody took what they needed
        if kwds:
            raise TypeError("Unknown arguments: {}.".format(kwds))

        output = optimizer(func, x0, args=(ravel(data),), full_output=True,
                           disp=0)
        if output[-1] != 0:
            output = optimizer(func, output[0], args=(ravel(data),),
                               full_output=True)

        _warn_if_no_success(output[-1])
        vals = tuple(output[0])
    else:
        vals = tuple(x0)

    if restore is not None:
        vals = restore(args, vals)
    vals = tuple(vals)
    return vals, fixedn


def fit2(self, data, *args, **kwds):
    ''' Return Maximum Likelihood or Maximum Product Spacing estimator object

    Parameters
    ----------
    data : array-like
        Data to use in calculating the ML or MPS estimators
    args : optional
        Starting values for any shape arguments (those not specified
        will be determined by dist._fitstart(data))
    kwds : loc, scale
        Starting values for the location and scale parameters
        Special keyword arguments are recognized as holding certain
        parameters fixed:
            f0..fn : hold respective shape paramters fixed
            floc : hold location parameter fixed to specified value
            fscale : hold scale parameter fixed to specified value
        method : of estimation. Options are
            'ml' : Maximum Likelihood method (default)
            'mps': Maximum Product Spacing method
        alpha : scalar, optional
            Confidence coefficent  (default=0.05)
        search : bool
            If true search for best estimator (default),
            otherwise return object with initial distribution parameters
        copydata : bool
            If true copydata (default)
        optimizer : The optimizer to use.  The optimizer must take func,
                     and starting position as the first two arguments,
                     plus args (for extra arguments to pass to the
                     function to be optimized) and disp=0 to suppress
                     output as keyword arguments.

    Return
    ------
    phat : FitDistribution object
        Fitted distribution object with following member variables:
        LLmax  : loglikelihood function evaluated using par
        LPSmax : log product spacing function evaluated using par
        pvalue : p-value for the fit
        par : distribution parameters (fixed and fitted)
        par_cov : covariance of distribution parameters
        par_fix : fixed distribution parameters
        par_lower : lower (1-alpha)% confidence bound for the parameters
        par_upper : upper (1-alpha)% confidence bound for the parameters

    Note
    ----
    `data` is sorted using this function, so if `copydata`==False the data
    in your namespace will be sorted as well.
    '''
    return FitDistribution(self, data, args, **kwds)


def _support_mask(self, x):
    return (self.a <= x) & (x <= self.b)


def _open_support_mask(self, x):
    return (self.a < x) & (x < self.b)


rv_generic.freeze = freeze
rv_discrete.freeze = freeze
rv_continuous.freeze = freeze

rv_continuous._penalized_nlogps = _penalized_nlogps
rv_continuous._penalized_nnlf = _penalized_nnlf
rv_continuous._reduce_func = _reduce_func
rv_continuous.fit = fit
rv_continuous._fit = _fit
rv_continuous.fit2 = fit2
rv_continuous._support_mask = _support_mask
rv_continuous._open_support_mask = _open_support_mask
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`from __future__ import absolute_import`
			`from scipy.stats._distn_infrastructure import * # @UnusedWildImport`
			`from scipy.stats._distn_infrastructure import (_skew, # @UnusedImport`
Updated stats 9 years ago			`_kurtosis, _ncx2_log_pdf, # @IgnorePep8 @UnusedImport`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`_ncx2_pdf, _ncx2_cdf) # @UnusedImport @IgnorePep8`
Simplified things. 8 years ago			`from .estimation import FitDistribution, rv_frozen # @Reimport`
Added numba_misc.py 8 years ago			`from ._constants import _XMAX, _XMIN`
Simplified things. 8 years ago			`from wafo.misc import lazyselect as _lazyselect # @UnusedImport`
			`from wafo.misc import lazywhere as _lazywhere # @UnusedImport`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago
			`_doc_default_example = """\`
			`Examples`
			`--------`
			`>>> from wafo.stats import %(name)s`
			`>>> import matplotlib.pyplot as plt`
			`>>> fig, ax = plt.subplots(1, 1)`

			`Calculate a few first moments:`

			`%(set_vals_stmt)s`
			`>>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')`

			Display the probability density function (``pdf``):

			`>>> x = np.linspace(%(name)s.ppf(0.01, %(shapes)s),`
			`... %(name)s.ppf(0.99, %(shapes)s), 100)`
			`>>> ax.plot(x, %(name)s.pdf(x, %(shapes)s),`
			`... 'r-', lw=5, alpha=0.6, label='%(name)s pdf')`

			`Alternatively, the distribution object can be called (as a function)`
			`to fix the shape, location and scale parameters. This returns a "frozen"`
			`RV object holding the given parameters fixed.`

			Freeze the distribution and display the frozen ``pdf``:

			`>>> rv = %(name)s(%(shapes)s)`
			`>>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')`

			Check accuracy of ``cdf`` and ``ppf``:

			`>>> vals = %(name)s.ppf([0.001, 0.5, 0.999], %(shapes)s)`
			`>>> np.allclose([0.001, 0.5, 0.999], %(name)s.cdf(vals, %(shapes)s))`
			`True`

			`Generate random numbers:`

			`>>> r = %(name)s.rvs(%(shapes)s, size=1000)`

			`And compare the histogram:`

			`>>> ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)`
			`>>> ax.legend(loc='best', frameon=False)`
			`>>> plt.show()`

			`Compare ML and MPS method`
			`>>> phat = %(name)s.fit2(R, method='ml');>>> phat.plotfitsummary()`
			`>>> plt.figure(plt.gcf().number+1)`
			`>>> phat2 = %(name)s.fit2(R, method='mps')`
			`>>> phat2.plotfitsummary(); plt.figure(plt.gcf().number+1)`

			`Fix loc=0 and estimate shapes and scale`
			`>>> phat3 = %(name)s.fit2(R, scale=1, floc=0, method='mps')`
			`>>> phat3.plotfitsummary(); plt.figure(plt.gcf().number+1)`

			`Accurate confidence interval with profile loglikelihood`
			`>>> lp = phat3.profile()`
			`>>> lp.plot()`
			`>>> pci = lp.get_bounds()`

			`"""`


			`def freeze(self, args, *kwds):`
			`"""Freeze the distribution for the given arguments.`

			`Parameters`
			`----------`
			`arg1, arg2, arg3,... : array_like`
			`The shape parameter(s) for the distribution. Should include all`
			the non-optional arguments, may include ``loc`` and ``scale``.

			`Returns`
			`-------`
			`rv_frozen : rv_frozen instance`
			`The frozen distribution.`

			`"""`
			`return rv_frozen(self, args, *kwds)`


Ongoing work to simplify estimation 8 years ago			`def _resolve_ties(self, log_dprb, x, args, scale):`
			`dx = np.diff(x, axis=0)`
			`tie = dx == 0`
			`if any(tie): # TODO : implement this method for treating ties in data:`
			`# Assume measuring error is delta. Then compute`
			`# yL = F(xi-delta,theta)`
			`# yU = F(xi+delta,theta)`
			`# and replace`
			`# logDj = log((yU-yL)/(r-1)) for j = i+1,i+2,...i+r-1`
			`# The following is OK when only minimization of T is wanted`
			`i_tie, = np.nonzero(tie)`
			`log_dprb[i_tie + 1] = log(self._pdf(x[i_tie], *args)) - log(scale)`
			`return log_dprb`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago

Ongoing work to simplify estimation 8 years ago			`def _log_dprb(self, x, args, scale, lowertail=True):`

			`if lowertail:`
			`prb = np.hstack((0.0, self.cdf(x, *args), 1.0))`
			`dprb = np.diff(prb)`
			`else:`
			`prb = np.hstack((1.0, self.sf(x, *args), 0.0))`
			`dprb = -np.diff(prb)`
			`log_dprb = log(dprb + _XMIN)`
			`log_dprb = _resolve_ties(self, log_dprb, x, args, scale)`
			`return log_dprb`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago

Ongoing work to simplify estimation 8 years ago			`def _nlogps_and_penalty(self, x, scale, args):`
			`cond0 = ~self._support_mask(x)`
			`n_bad = np.sum(cond0)`
			`if n_bad > 0:`
			`x = argsreduce(~cond0, x)[0]`
Fixed a bug 8 years ago			`log_dprb = _log_dprb(self, x, args, scale)`
Ongoing work to simplify estimation 8 years ago			`finite_log_dprb = np.isfinite(log_dprb)`
			`n_bad += np.sum(~finite_log_dprb, axis=0)`
			`if n_bad > 0:`
			`penalty = 100.0 * np.log(_XMAX) * n_bad`
			`return -np.sum(log_dprb[finite_log_dprb], axis=0) + penalty`
			`return -np.sum(log_dprb, axis=0)`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago

Ongoing work to simplify estimation 8 years ago			`def _penalized_nlogps(self, theta, x):`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`""" Moran's negative log Product Spacings statistic`

			`where theta are the parameters (including loc and scale)`

			`Note the data in x must be sorted`

			`References`
			`-----------`

			`R. C. H. Cheng; N. A. K. Amin (1983)`
			`"Estimating Parameters in Continuous Univariate Distributions with a`
			`Shifted Origin.",`
			`Journal of the Royal Statistical Society. Series B (Methodological),`
			`Vol. 45, No. 3. (1983), pp. 394-403.`

			`R. C. H. Cheng; M. A. Stephens (1989)`
			`"A Goodness-Of-Fit Test Using Moran's Statistic with Estimated`
			`Parameters", Biometrika, 76, 2, pp 385-392`

			`Wong, T.S.T. and Li, W.K. (2006)`
			`"A note on the estimation of extreme value distributions using maximum`
			`product of spacings.",`
			`IMS Lecture Notes Monograph Series 2006, Vol. 52, pp. 272-283`
			`"""`
Ongoing work to simplify estimation 8 years ago			`loc, scale, args = _unpack_loc_scale(theta)`
			`if not self._argcheck(*args) or scale <= 0:`
			`return inf`
			`x = asarray((x - loc) / scale)`
			`return _nlogps_and_penalty(self, x, scale, args)`

Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago
Ongoing work to simplify estimation 8 years ago			`def _unpack_loc_scale(theta):`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`try:`
			`loc = theta[-2]`
			`scale = theta[-1]`
			`args = tuple(theta[:-2])`
			`except IndexError:`
			`raise ValueError("Not enough input arguments.")`
Ongoing work to simplify estimation 8 years ago			`return loc, scale, args`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago

Ongoing work to simplify estimation 8 years ago			`def _nnlf_and_penalty(self, x, args):`
			`cond0 = ~self._support_mask(x)`
			`n_bad = sum(cond0)`
			`if n_bad > 0:`
			`x = argsreduce(~cond0, x)[0]`
			`logpdf = self._logpdf(x, *args)`
			`finite_logpdf = np.isfinite(logpdf)`
			`n_bad += np.sum(~finite_logpdf, axis=0)`
			`if n_bad > 0:`
			`penalty = n_bad * log(_XMAX) * 100`
			`return -np.sum(logpdf[finite_logpdf], axis=0) + penalty`
			`return -np.sum(logpdf, axis=0)`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago

Simplified things. 8 years ago			`def _penalized_nnlf(self, theta, x):`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`''' Return negative loglikelihood function,`
			`i.e., - sum (log pdf(x, theta), axis=0)`
			`where theta are the parameters (including loc and scale)`
			`'''`
Ongoing work to simplify estimation 8 years ago			`loc, scale, args = _unpack_loc_scale(theta)`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`if not self._argcheck(*args) or scale <= 0:`
			`return inf`
			`x = asarray((x-loc) / scale)`
Ongoing work to simplify estimation 8 years ago			`n_log_scale = len(x) * log(scale)`
			`return _nnlf_and_penalty(self, x, args) + n_log_scale`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago

Removed duplicated code. 8 years ago			`def _convert_fshapes2num(self, kwds):`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`# First of all, convert fshapes params to fnum: eg for stats.beta,`
			# shapes='a, b'. To fix `a`, can specify either `f1` or `fa`.
			`# Convert the latter into the former.`
			`if self.shapes:`
			`shapes = self.shapes.replace(',', ' ').split()`
			`for j, s in enumerate(shapes):`
			`val = kwds.pop('f' + s, None) or kwds.pop('fix_' + s, None)`
			`if val is not None:`
Replaced string interpolation operator with 'format()' 8 years ago			`key = 'f{0:d}'.format(j)`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`if key in kwds:`
Replaced string interpolation operator with 'format()' 8 years ago			`raise ValueError("Duplicate entry for {0:s}.".format(key))`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`else:`
			`kwds[key] = val`
Removed duplicated code. 8 years ago			`return kwds`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago
Removed duplicated code. 8 years ago
			`def _unpack_args_kwds(self, args, kwds):`
			`kwds = _convert_fshapes2num(self, kwds)`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`args = list(args)`
			`fixedn = []`
Removed duplicated code. 8 years ago			`names = ['f%d' % n for n in range(len(args) - 2)] + ['floc', 'fscale']`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`x0 = []`
			`for n, key in enumerate(names):`
			`if key in kwds:`
			`fixedn.append(n)`
			`args[n] = kwds.pop(key)`
			`else:`
			`x0.append(args[n])`
Removed duplicated code. 8 years ago			`return x0, args, fixedn`


			`def _reduce_func(self, args, kwds):`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`method = kwds.pop('method', 'ml').lower()`
			`if method.startswith('mps'):`
Ongoing work to simplify estimation 8 years ago			`fitfun = self._penalized_nlogps`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`else:`
			`fitfun = self._penalized_nnlf`

Removed duplicated code. 8 years ago			`x0, args, fixedn = _unpack_args_kwds(self, args, kwds)`

			`nargs = len(args)`

Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`if len(fixedn) == 0:`
			`func = fitfun`
			`restore = None`
			`else:`
Removed duplicated code. 8 years ago			`if len(fixedn) == nargs:`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`raise ValueError(`
			`"All parameters fixed. There is nothing to optimize.")`

			`def restore(args, theta):`
			`# Replace with theta for all numbers not in fixedn`
			`# This allows the non-fixed values to vary, but`
			`# we still call self.nnlf with all parameters.`
			`i = 0`
Removed duplicated code. 8 years ago			`for n in range(nargs):`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`if n not in fixedn:`
			`args[n] = theta[i]`
			`i += 1`
			`return args`

			`def func(theta, x):`
			`newtheta = restore(args[:], theta)`
			`return fitfun(newtheta, x)`

Removed duplicated code. 8 years ago			`return x0, func, restore, args, fixedn`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago

Simplified code 8 years ago			`def _get_optimizer(kwds):`
			`optimizer = kwds.pop('optimizer', optimize.fmin)`
			`# convert string to function in scipy.optimize`
			`if not callable(optimizer) and isinstance(optimizer, string_types):`
			`if not optimizer.startswith('fmin_'):`
Removed duplicated code. 8 years ago			`optimizer = '_'.join(("fmin", optimizer))`
Simplified code 8 years ago			`try:`
			`optimizer = getattr(optimize, optimizer)`
			`except AttributeError:`
Removed duplicated code. 8 years ago			`raise ValueError("{} is not a valid optimizer".format(optimizer))`
Simplified code 8 years ago			`return optimizer`


Removed duplicated code. 8 years ago			`def _warn_if_no_success(warnflag):`
			`if warnflag == 1:`
			`warnings.warn("The maximum number of iterations was exceeded.")`
			`elif warnflag == 2:`
			`warnings.warn("Did not converge")`


			`def _fitstart(self, data, args, kwds):`
			`narg = len(args)`
			`if narg > self.numargs:`
			`raise TypeError("Too many input arguments.")`
			`start = [None] * 2`
			`if (narg < self.numargs) or not ('loc' in kwds and 'scale' in kwds):`
			`# get distribution specific starting locations`
			`start = self._fitstart(data)`
			`args += start[narg:-2]`
			`loc = kwds.pop('loc', start[-2])`
			`scale = kwds.pop('scale', start[-1])`
			`args += loc, scale`
			`return args, kwds`


Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`def fit(self, data, args, *kwargs):`
			`"""`
			`Return ML/MPS estimate for shape, location, and scale parameters from data.`

			`ML and MPS stands for Maximum Likelihood and Maximum Product Spacing,`
			`respectively. Starting estimates for`
			`the fit are given by input arguments; for any arguments not provided`
			with starting estimates, ``self._fitstart(data)`` is called to generate
			`such.`

			`One can hold some parameters fixed to specific values by passing in`
			keyword arguments ``f0``, ``f1``, ..., ``fn`` (for shape parameters)
			and ``floc`` and ``fscale`` (for location and scale parameters,
			`respectively).`

			`Parameters`
			`----------`
			`data : array_like`
			`Data to use in calculating the MLEs.`
			`args : floats, optional`
			`Starting value(s) for any shape-characterizing arguments (those not`
			provided will be determined by a call to ``_fitstart(data)``).
			`No default value.`
			`kwds : floats, optional`
			`Starting values for the location and scale parameters; no default.`
			`Special keyword arguments are recognized as holding certain`
			`parameters fixed:`

			`- f0...fn : hold respective shape parameters fixed.`
			`Alternatively, shape parameters to fix can be specified by name.`
			For example, if ``self.shapes == "a, b"``, ``fa``and ``fix_a``
			are equivalent to ``f0``, and ``fb`` and ``fix_b`` are
			equivalent to ``f1``.

			`- floc : hold location parameter fixed to specified value.`

			`- fscale : hold scale parameter fixed to specified value.`

Removed duplicated code. 8 years ago			`- method : of estimation. Options are`
			`'ml' : Maximum Likelihood method (default)`
			`'mps': Maximum Product Spacing method`

			`- alpha : Confidence coefficent (default=0.05)`

			`- search : bool`
			`If true search for best estimator (default),`
			`otherwise return object with initial distribution parameters`

Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			- optimizer : The optimizer to use. The optimizer must take ``func``,
Removed duplicated code. 8 years ago			`and starting position as the first two arguments,`
			plus ``args`` (for extra arguments to pass to the
			function to be optimized) and ``disp=0`` to suppress
			`output as keyword arguments.`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago
			`Returns`
			`-------`
			`shape, loc, scale : tuple of floats`
			`MLEs for any shape statistics, followed by those for location and`
			`scale.`

			`Notes`
			`-----`
			`This fit is computed by maximizing a log-likelihood function, with`
			`penalty applied for samples outside of range of the distribution. The`
			`returned answer is not guaranteed to be the globally optimal MLE, it`
			`may only be locally optimal, or the optimization may fail altogether.`


			`Examples`
			`--------`

			Generate some data to fit: draw random variates from the `beta`
			`distribution`

			`>>> from wafo.stats import beta`
			`>>> a, b = 1., 2.`
			`>>> x = beta.rvs(a, b, size=1000)`

			Now we can fit all four parameters (``a``, ``b``, ``loc`` and ``scale``):

			`>>> a1, b1, loc1, scale1 = beta.fit(x)`

			`We can also use some prior knowledge about the dataset: let's keep`
			``loc`` and ``scale`` fixed:

			`>>> a1, b1, loc1, scale1 = beta.fit(x, floc=0, fscale=1)`
			`>>> loc1, scale1`
			`(0, 1)`

			We can also keep shape parameters fixed by using ``f``-keywords. To
			keep the zero-th shape parameter ``a`` equal 1, use ``f0=1`` or,
			equivalently, ``fa=1``:

			`>>> a1, b1, loc1, scale1 = beta.fit(x, fa=1, floc=0, fscale=1)`
			`>>> a1`
			`1`

			`"""`
Removed duplicated code. 8 years ago			`vals, _ = self._fit(data, args, *kwargs)`
			`return vals`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago

Removed duplicated code. 8 years ago			`def _fit(self, data, args, *kwargs):`
			`args, kwds = _fitstart(self, data, args, kwargs.copy())`
			`x0, func, restore, args, fixedn = self._reduce_func(args, kwds)`
			`if kwds.pop('search', True):`
			`optimizer = _get_optimizer(kwds)`

			`# by now kwds must be empty, since everybody took what they needed`
			`if kwds:`
Replaced string interpolation operator with 'format()' 8 years ago			`raise TypeError("Unknown arguments: {}.".format(kwds))`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago
Removed duplicated code. 8 years ago			`output = optimizer(func, x0, args=(ravel(data),), full_output=True,`
			`disp=0)`
			`if output[-1] != 0:`
			`output = optimizer(func, output[0], args=(ravel(data),),`
			`full_output=True)`

			`_warn_if_no_success(output[-1])`
			`vals = tuple(output[0])`
			`else:`
			`vals = tuple(x0)`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago
			`if restore is not None:`
			`vals = restore(args, vals)`
			`vals = tuple(vals)`
Removed duplicated code. 8 years ago			`return vals, fixedn`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago

			`def fit2(self, data, args, *kwds):`
			`''' Return Maximum Likelihood or Maximum Product Spacing estimator object`

			`Parameters`
			`----------`
			`data : array-like`
			`Data to use in calculating the ML or MPS estimators`
			`args : optional`
			`Starting values for any shape arguments (those not specified`
			`will be determined by dist._fitstart(data))`
			`kwds : loc, scale`
			`Starting values for the location and scale parameters`
			`Special keyword arguments are recognized as holding certain`
			`parameters fixed:`
			`f0..fn : hold respective shape paramters fixed`
			`floc : hold location parameter fixed to specified value`
			`fscale : hold scale parameter fixed to specified value`
			`method : of estimation. Options are`
			`'ml' : Maximum Likelihood method (default)`
			`'mps': Maximum Product Spacing method`
			`alpha : scalar, optional`
			`Confidence coefficent (default=0.05)`
			`search : bool`
			`If true search for best estimator (default),`
			`otherwise return object with initial distribution parameters`
			`copydata : bool`
			`If true copydata (default)`
			`optimizer : The optimizer to use. The optimizer must take func,`
			`and starting position as the first two arguments,`
			`plus args (for extra arguments to pass to the`
			`function to be optimized) and disp=0 to suppress`
			`output as keyword arguments.`

			`Return`
			`------`
			`phat : FitDistribution object`
			`Fitted distribution object with following member variables:`
			`LLmax : loglikelihood function evaluated using par`
			`LPSmax : log product spacing function evaluated using par`
			`pvalue : p-value for the fit`
			`par : distribution parameters (fixed and fitted)`
			`par_cov : covariance of distribution parameters`
			`par_fix : fixed distribution parameters`
			`par_lower : lower (1-alpha)% confidence bound for the parameters`
			`par_upper : upper (1-alpha)% confidence bound for the parameters`

			`Note`
			`----`
			`data` is sorted using this function, so if `copydata`==False the data
			`in your namespace will be sorted as well.`
			`'''`
			`return FitDistribution(self, data, args, **kwds)`


Updated stats 9 years ago			`def _support_mask(self, x):`
			`return (self.a <= x) & (x <= self.b)`


			`def _open_support_mask(self, x):`
			`return (self.a < x) & (x < self.b)`


Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`rv_generic.freeze = freeze`
			`rv_discrete.freeze = freeze`
			`rv_continuous.freeze = freeze`
Simplified things. 8 years ago
Ongoing work to simplify estimation 8 years ago			`rv_continuous._penalized_nlogps = _penalized_nlogps`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`rv_continuous._penalized_nnlf = _penalized_nnlf`
			`rv_continuous._reduce_func = _reduce_func`
			`rv_continuous.fit = fit`
Removed duplicated code. 8 years ago			`rv_continuous._fit = _fit`
Fixed a failing doctest. Reorganized _penalized_nnlf 9 years ago			`rv_continuous.fit2 = fit2`
Updated stats 9 years ago			`rv_continuous._support_mask = _support_mask`
			`rv_continuous._open_support_mask = _open_support_mask`