Simplified wafo.stats:
-Deleted obsolete files. -Requires scipy v0.16 -._distn_infrastructure.py monkeypatch scipy.stats._distn_infrastructure.pymaster
							parent
							
								
									7403d821df
								
							
						
					
					
						commit
						e73624161c
					
				@ -1,408 +0,0 @@
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
import warnings
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
from scipy._lib.six import callable
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def binned_statistic(x, values, statistic='mean',
 | 
			
		||||
                     bins=10, range=None):
 | 
			
		||||
    """
 | 
			
		||||
    Compute a binned statistic for a set of data.
 | 
			
		||||
 | 
			
		||||
    This is a generalization of a histogram function.  A histogram divides
 | 
			
		||||
    the space into bins, and returns the count of the number of points in
 | 
			
		||||
    each bin.  This function allows the computation of the sum, mean, median,
 | 
			
		||||
    or other statistic of the values within each bin.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    x : array_like
 | 
			
		||||
        A sequence of values to be binned.
 | 
			
		||||
    values : array_like
 | 
			
		||||
        The values on which the statistic will be computed.  This must be
 | 
			
		||||
        the same shape as `x`.
 | 
			
		||||
    statistic : string or callable, optional
 | 
			
		||||
        The statistic to compute (default is 'mean').
 | 
			
		||||
        The following statistics are available:
 | 
			
		||||
 | 
			
		||||
          * 'mean' : compute the mean of values for points within each bin.
 | 
			
		||||
            Empty bins will be represented by NaN.
 | 
			
		||||
          * 'median' : compute the median of values for points within each
 | 
			
		||||
            bin. Empty bins will be represented by NaN.
 | 
			
		||||
          * 'count' : compute the count of points within each bin.  This is
 | 
			
		||||
            identical to an unweighted histogram.  `values` array is not
 | 
			
		||||
            referenced.
 | 
			
		||||
          * 'sum' : compute the sum of values for points within each bin.
 | 
			
		||||
            This is identical to a weighted histogram.
 | 
			
		||||
          * function : a user-defined function which takes a 1D array of
 | 
			
		||||
            values, and outputs a single numerical statistic. This function
 | 
			
		||||
            will be called on the values in each bin.  Empty bins will be
 | 
			
		||||
            represented by function([]), or NaN if this returns an error.
 | 
			
		||||
 | 
			
		||||
    bins : int or sequence of scalars, optional
 | 
			
		||||
        If `bins` is an int, it defines the number of equal-width
 | 
			
		||||
        bins in the given range (10, by default). If `bins` is a sequence,
 | 
			
		||||
        it defines the bin edges, including the rightmost edge, allowing
 | 
			
		||||
        for non-uniform bin widths.
 | 
			
		||||
    range : (float, float) or [(float, float)], optional
 | 
			
		||||
        The lower and upper range of the bins.  If not provided, range
 | 
			
		||||
        is simply ``(x.min(), x.max())``.  Values outside the range are
 | 
			
		||||
        ignored.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    statistic : array
 | 
			
		||||
        The values of the selected statistic in each bin.
 | 
			
		||||
    bin_edges : array of dtype float
 | 
			
		||||
        Return the bin edges ``(length(statistic)+1)``.
 | 
			
		||||
    binnumber : 1-D ndarray of ints
 | 
			
		||||
        This assigns to each observation an integer that represents the bin
 | 
			
		||||
        in which this observation falls. Array has the same length as values.
 | 
			
		||||
 | 
			
		||||
    See Also
 | 
			
		||||
    --------
 | 
			
		||||
    numpy.histogram, binned_statistic_2d, binned_statistic_dd
 | 
			
		||||
 | 
			
		||||
    Notes
 | 
			
		||||
    -----
 | 
			
		||||
    All but the last (righthand-most) bin is half-open.  In other words, if
 | 
			
		||||
    `bins` is::
 | 
			
		||||
 | 
			
		||||
      [1, 2, 3, 4]
 | 
			
		||||
 | 
			
		||||
    then the first bin is ``[1, 2)`` (including 1, but excluding 2) and the
 | 
			
		||||
    second ``[2, 3)``.  The last bin, however, is ``[3, 4]``, which *includes*
 | 
			
		||||
    4.
 | 
			
		||||
 | 
			
		||||
    .. versionadded:: 0.11.0
 | 
			
		||||
 | 
			
		||||
    Examples
 | 
			
		||||
    --------
 | 
			
		||||
    >>> stats.binned_statistic([1, 2, 1, 2, 4], np.arange(5), statistic='mean',
 | 
			
		||||
    ... bins=3)
 | 
			
		||||
    (array([ 1.,  2.,  4.]), array([ 1.,  2.,  3.,  4.]), array([1, 2, 1, 2, 3]))
 | 
			
		||||
 | 
			
		||||
    >>> stats.binned_statistic([1, 2, 1, 2, 4], np.arange(5), statistic='mean', bins=3)
 | 
			
		||||
    (array([ 1.,  2.,  4.]), array([ 1.,  2.,  3.,  4.]), array([1, 2, 1, 2, 3]))
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    try:
 | 
			
		||||
        N = len(bins)
 | 
			
		||||
    except TypeError:
 | 
			
		||||
        N = 1
 | 
			
		||||
 | 
			
		||||
    if N != 1:
 | 
			
		||||
        bins = [np.asarray(bins, float)]
 | 
			
		||||
 | 
			
		||||
    if range is not None:
 | 
			
		||||
        if len(range) == 2:
 | 
			
		||||
            range = [range]
 | 
			
		||||
 | 
			
		||||
    medians, edges, xy = binned_statistic_dd([x], values, statistic,
 | 
			
		||||
                                             bins, range)
 | 
			
		||||
 | 
			
		||||
    return medians, edges[0], xy
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def binned_statistic_2d(x, y, values, statistic='mean',
 | 
			
		||||
                        bins=10, range=None):
 | 
			
		||||
    """
 | 
			
		||||
    Compute a bidimensional binned statistic for a set of data.
 | 
			
		||||
 | 
			
		||||
    This is a generalization of a histogram2d function.  A histogram divides
 | 
			
		||||
    the space into bins, and returns the count of the number of points in
 | 
			
		||||
    each bin.  This function allows the computation of the sum, mean, median,
 | 
			
		||||
    or other statistic of the values within each bin.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    x : (N,) array_like
 | 
			
		||||
        A sequence of values to be binned along the first dimension.
 | 
			
		||||
    y : (M,) array_like
 | 
			
		||||
        A sequence of values to be binned along the second dimension.
 | 
			
		||||
    values : (N,) array_like
 | 
			
		||||
        The values on which the statistic will be computed.  This must be
 | 
			
		||||
        the same shape as `x`.
 | 
			
		||||
    statistic : string or callable, optional
 | 
			
		||||
        The statistic to compute (default is 'mean').
 | 
			
		||||
        The following statistics are available:
 | 
			
		||||
 | 
			
		||||
          * 'mean' : compute the mean of values for points within each bin.
 | 
			
		||||
            Empty bins will be represented by NaN.
 | 
			
		||||
          * 'median' : compute the median of values for points within each
 | 
			
		||||
            bin. Empty bins will be represented by NaN.
 | 
			
		||||
          * 'count' : compute the count of points within each bin.  This is
 | 
			
		||||
            identical to an unweighted histogram.  `values` array is not
 | 
			
		||||
            referenced.
 | 
			
		||||
          * 'sum' : compute the sum of values for points within each bin.
 | 
			
		||||
            This is identical to a weighted histogram.
 | 
			
		||||
          * function : a user-defined function which takes a 1D array of
 | 
			
		||||
            values, and outputs a single numerical statistic. This function
 | 
			
		||||
            will be called on the values in each bin.  Empty bins will be
 | 
			
		||||
            represented by function([]), or NaN if this returns an error.
 | 
			
		||||
 | 
			
		||||
    bins : int or [int, int] or array-like or [array, array], optional
 | 
			
		||||
        The bin specification:
 | 
			
		||||
 | 
			
		||||
          * the number of bins for the two dimensions (nx=ny=bins),
 | 
			
		||||
          * the number of bins in each dimension (nx, ny = bins),
 | 
			
		||||
          * the bin edges for the two dimensions (x_edges = y_edges = bins),
 | 
			
		||||
          * the bin edges in each dimension (x_edges, y_edges = bins).
 | 
			
		||||
 | 
			
		||||
    range : (2,2) array_like, optional
 | 
			
		||||
        The leftmost and rightmost edges of the bins along each dimension
 | 
			
		||||
        (if not specified explicitly in the `bins` parameters):
 | 
			
		||||
        [[xmin, xmax], [ymin, ymax]]. All values outside of this range will be
 | 
			
		||||
        considered outliers and not tallied in the histogram.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    statistic : (nx, ny) ndarray
 | 
			
		||||
        The values of the selected statistic in each two-dimensional bin
 | 
			
		||||
    xedges : (nx + 1) ndarray
 | 
			
		||||
        The bin edges along the first dimension.
 | 
			
		||||
    yedges : (ny + 1) ndarray
 | 
			
		||||
        The bin edges along the second dimension.
 | 
			
		||||
    binnumber : 1-D ndarray of ints
 | 
			
		||||
        This assigns to each observation an integer that represents the bin
 | 
			
		||||
        in which this observation falls. Array has the same length as `values`.
 | 
			
		||||
 | 
			
		||||
    See Also
 | 
			
		||||
    --------
 | 
			
		||||
    numpy.histogram2d, binned_statistic, binned_statistic_dd
 | 
			
		||||
 | 
			
		||||
    Notes
 | 
			
		||||
    -----
 | 
			
		||||
 | 
			
		||||
    .. versionadded:: 0.11.0
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    # This code is based on np.histogram2d
 | 
			
		||||
    try:
 | 
			
		||||
        N = len(bins)
 | 
			
		||||
    except TypeError:
 | 
			
		||||
        N = 1
 | 
			
		||||
 | 
			
		||||
    if N != 1 and N != 2:
 | 
			
		||||
        xedges = yedges = np.asarray(bins, float)
 | 
			
		||||
        bins = [xedges, yedges]
 | 
			
		||||
 | 
			
		||||
    medians, edges, xy = binned_statistic_dd([x, y], values, statistic,
 | 
			
		||||
                                             bins, range)
 | 
			
		||||
 | 
			
		||||
    return medians, edges[0], edges[1], xy
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def binned_statistic_dd(sample, values, statistic='mean',
 | 
			
		||||
                        bins=10, range=None):
 | 
			
		||||
    """
 | 
			
		||||
    Compute a multidimensional binned statistic for a set of data.
 | 
			
		||||
 | 
			
		||||
    This is a generalization of a histogramdd function.  A histogram divides
 | 
			
		||||
    the space into bins, and returns the count of the number of points in
 | 
			
		||||
    each bin.  This function allows the computation of the sum, mean, median,
 | 
			
		||||
    or other statistic of the values within each bin.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    sample : array_like
 | 
			
		||||
        Data to histogram passed as a sequence of D arrays of length N, or
 | 
			
		||||
        as an (N,D) array.
 | 
			
		||||
    values : array_like
 | 
			
		||||
        The values on which the statistic will be computed.  This must be
 | 
			
		||||
        the same shape as x.
 | 
			
		||||
    statistic : string or callable, optional
 | 
			
		||||
        The statistic to compute (default is 'mean').
 | 
			
		||||
        The following statistics are available:
 | 
			
		||||
 | 
			
		||||
          * 'mean' : compute the mean of values for points within each bin.
 | 
			
		||||
            Empty bins will be represented by NaN.
 | 
			
		||||
          * 'median' : compute the median of values for points within each
 | 
			
		||||
            bin. Empty bins will be represented by NaN.
 | 
			
		||||
          * 'count' : compute the count of points within each bin.  This is
 | 
			
		||||
            identical to an unweighted histogram.  `values` array is not
 | 
			
		||||
            referenced.
 | 
			
		||||
          * 'sum' : compute the sum of values for points within each bin.
 | 
			
		||||
            This is identical to a weighted histogram.
 | 
			
		||||
          * function : a user-defined function which takes a 1D array of
 | 
			
		||||
            values, and outputs a single numerical statistic. This function
 | 
			
		||||
            will be called on the values in each bin.  Empty bins will be
 | 
			
		||||
            represented by function([]), or NaN if this returns an error.
 | 
			
		||||
 | 
			
		||||
    bins : sequence or int, optional
 | 
			
		||||
        The bin specification:
 | 
			
		||||
 | 
			
		||||
          * A sequence of arrays describing the bin edges along each dimension.
 | 
			
		||||
          * The number of bins for each dimension (nx, ny, ... =bins)
 | 
			
		||||
          * The number of bins for all dimensions (nx=ny=...=bins).
 | 
			
		||||
 | 
			
		||||
    range : sequence, optional
 | 
			
		||||
        A sequence of lower and upper bin edges to be used if the edges are
 | 
			
		||||
        not given explicitely in `bins`. Defaults to the minimum and maximum
 | 
			
		||||
        values along each dimension.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    statistic : ndarray, shape(nx1, nx2, nx3,...)
 | 
			
		||||
        The values of the selected statistic in each two-dimensional bin
 | 
			
		||||
    edges : list of ndarrays
 | 
			
		||||
        A list of D arrays describing the (nxi + 1) bin edges for each
 | 
			
		||||
        dimension
 | 
			
		||||
    binnumber : 1-D ndarray of ints
 | 
			
		||||
        This assigns to each observation an integer that represents the bin
 | 
			
		||||
        in which this observation falls. Array has the same length as values.
 | 
			
		||||
 | 
			
		||||
    See Also
 | 
			
		||||
    --------
 | 
			
		||||
    np.histogramdd, binned_statistic, binned_statistic_2d
 | 
			
		||||
 | 
			
		||||
    Notes
 | 
			
		||||
    -----
 | 
			
		||||
 | 
			
		||||
    .. versionadded:: 0.11.0
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    if type(statistic) == str:
 | 
			
		||||
        if statistic not in ['mean', 'median', 'count', 'sum', 'std']:
 | 
			
		||||
            raise ValueError('unrecognized statistic "%s"' % statistic)
 | 
			
		||||
    elif callable(statistic):
 | 
			
		||||
        pass
 | 
			
		||||
    else:
 | 
			
		||||
        raise ValueError("statistic not understood")
 | 
			
		||||
 | 
			
		||||
    # This code is based on np.histogramdd
 | 
			
		||||
    try:
 | 
			
		||||
        # Sample is an ND-array.
 | 
			
		||||
        N, D = sample.shape
 | 
			
		||||
    except (AttributeError, ValueError):
 | 
			
		||||
        # Sample is a sequence of 1D arrays.
 | 
			
		||||
        sample = np.atleast_2d(sample).T
 | 
			
		||||
        N, D = sample.shape
 | 
			
		||||
 | 
			
		||||
    nbin = np.empty(D, int)
 | 
			
		||||
    edges = D * [None]
 | 
			
		||||
    dedges = D * [None]
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        M = len(bins)
 | 
			
		||||
        if M != D:
 | 
			
		||||
            raise AttributeError('The dimension of bins must be equal '
 | 
			
		||||
                                 'to the dimension of the sample x.')
 | 
			
		||||
    except TypeError:
 | 
			
		||||
        bins = D * [bins]
 | 
			
		||||
 | 
			
		||||
    # Select range for each dimension
 | 
			
		||||
    # Used only if number of bins is given.
 | 
			
		||||
    if range is None:
 | 
			
		||||
        smin = np.atleast_1d(np.array(sample.min(0), float))
 | 
			
		||||
        smax = np.atleast_1d(np.array(sample.max(0), float))
 | 
			
		||||
    else:
 | 
			
		||||
        smin = np.zeros(D)
 | 
			
		||||
        smax = np.zeros(D)
 | 
			
		||||
        for i in np.arange(D):
 | 
			
		||||
            smin[i], smax[i] = range[i]
 | 
			
		||||
 | 
			
		||||
    # Make sure the bins have a finite width.
 | 
			
		||||
    for i in np.arange(len(smin)):
 | 
			
		||||
        if smin[i] == smax[i]:
 | 
			
		||||
            smin[i] = smin[i] - .5
 | 
			
		||||
            smax[i] = smax[i] + .5
 | 
			
		||||
 | 
			
		||||
    # Create edge arrays
 | 
			
		||||
    for i in np.arange(D):
 | 
			
		||||
        if np.isscalar(bins[i]):
 | 
			
		||||
            nbin[i] = bins[i] + 2  # +2 for outlier bins
 | 
			
		||||
            edges[i] = np.linspace(smin[i], smax[i], nbin[i] - 1)
 | 
			
		||||
        else:
 | 
			
		||||
            edges[i] = np.asarray(bins[i], float)
 | 
			
		||||
            nbin[i] = len(edges[i]) + 1  # +1 for outlier bins
 | 
			
		||||
        dedges[i] = np.diff(edges[i])
 | 
			
		||||
 | 
			
		||||
    nbin = np.asarray(nbin)
 | 
			
		||||
 | 
			
		||||
    # Compute the bin number each sample falls into.
 | 
			
		||||
    Ncount = {}
 | 
			
		||||
    for i in np.arange(D):
 | 
			
		||||
        Ncount[i] = np.digitize(sample[:, i], edges[i])
 | 
			
		||||
 | 
			
		||||
    # Using digitize, values that fall on an edge are put in the right bin.
 | 
			
		||||
    # For the rightmost bin, we want values equal to the right
 | 
			
		||||
    # edge to be counted in the last bin, and not as an outlier.
 | 
			
		||||
    for i in np.arange(D):
 | 
			
		||||
        # Rounding precision
 | 
			
		||||
        decimal = int(-np.log10(dedges[i].min())) + 6
 | 
			
		||||
        # Find which points are on the rightmost edge.
 | 
			
		||||
        on_edge = np.where(np.around(sample[:, i], decimal)
 | 
			
		||||
                           == np.around(edges[i][-1], decimal))[0]
 | 
			
		||||
        # Shift these points one bin to the left.
 | 
			
		||||
        Ncount[i][on_edge] -= 1
 | 
			
		||||
 | 
			
		||||
    # Compute the sample indices in the flattened statistic matrix.
 | 
			
		||||
    ni = nbin.argsort()
 | 
			
		||||
    xy = np.zeros(N, int)
 | 
			
		||||
    for i in np.arange(0, D - 1):
 | 
			
		||||
        xy += Ncount[ni[i]] * nbin[ni[i + 1:]].prod()
 | 
			
		||||
    xy += Ncount[ni[-1]]
 | 
			
		||||
 | 
			
		||||
    result = np.empty(nbin.prod(), float)
 | 
			
		||||
 | 
			
		||||
    if statistic == 'mean':
 | 
			
		||||
        result.fill(np.nan)
 | 
			
		||||
        flatcount = np.bincount(xy, None)
 | 
			
		||||
        flatsum = np.bincount(xy, values)
 | 
			
		||||
        a = flatcount.nonzero()
 | 
			
		||||
        result[a] = flatsum[a] / flatcount[a]
 | 
			
		||||
    elif statistic == 'std':
 | 
			
		||||
        result.fill(0)
 | 
			
		||||
        flatcount = np.bincount(xy, None)
 | 
			
		||||
        flatsum = np.bincount(xy, values)
 | 
			
		||||
        flatsum2 = np.bincount(xy, values ** 2)
 | 
			
		||||
        a = flatcount.nonzero()
 | 
			
		||||
        result[a] = np.sqrt(flatsum2[a] / flatcount[a]
 | 
			
		||||
                            - (flatsum[a] / flatcount[a]) ** 2)
 | 
			
		||||
    elif statistic == 'count':
 | 
			
		||||
        result.fill(0)
 | 
			
		||||
        flatcount = np.bincount(xy, None)
 | 
			
		||||
        a = np.arange(len(flatcount))
 | 
			
		||||
        result[a] = flatcount
 | 
			
		||||
    elif statistic == 'sum':
 | 
			
		||||
        result.fill(0)
 | 
			
		||||
        flatsum = np.bincount(xy, values)
 | 
			
		||||
        a = np.arange(len(flatsum))
 | 
			
		||||
        result[a] = flatsum
 | 
			
		||||
    elif statistic == 'median':
 | 
			
		||||
        result.fill(np.nan)
 | 
			
		||||
        for i in np.unique(xy):
 | 
			
		||||
            result[i] = np.median(values[xy == i])
 | 
			
		||||
    elif callable(statistic):
 | 
			
		||||
        with warnings.catch_warnings():
 | 
			
		||||
            # Numpy generates a warnings for mean/std/... with empty list
 | 
			
		||||
            warnings.filterwarnings('ignore', category=RuntimeWarning)
 | 
			
		||||
            old = np.seterr(invalid='ignore')
 | 
			
		||||
            try:
 | 
			
		||||
                null = statistic([])
 | 
			
		||||
            except:
 | 
			
		||||
                null = np.nan
 | 
			
		||||
            np.seterr(**old)
 | 
			
		||||
        result.fill(null)
 | 
			
		||||
        for i in np.unique(xy):
 | 
			
		||||
            result[i] = statistic(values[xy == i])
 | 
			
		||||
 | 
			
		||||
    # Shape into a proper matrix
 | 
			
		||||
    result = result.reshape(np.sort(nbin))
 | 
			
		||||
    for i in np.arange(nbin.size):
 | 
			
		||||
        j = ni.argsort()[i]
 | 
			
		||||
        result = result.swapaxes(i, j)
 | 
			
		||||
        ni[i], ni[j] = ni[j], ni[i]
 | 
			
		||||
 | 
			
		||||
    # Remove outliers (indices 0 and -1 for each dimension).
 | 
			
		||||
    core = D * [slice(1, -1)]
 | 
			
		||||
    result = result[core]
 | 
			
		||||
 | 
			
		||||
    if (result.shape != nbin - 2).any():
 | 
			
		||||
        raise RuntimeError('Internal Shape Error')
 | 
			
		||||
 | 
			
		||||
    return result, edges, xy
 | 
			
		||||
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								@ -1,884 +0,0 @@
 | 
			
		||||
#
 | 
			
		||||
# Author: Joris Vankerschaver 2013
 | 
			
		||||
#
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
import scipy.linalg
 | 
			
		||||
from scipy.misc import doccer
 | 
			
		||||
from scipy.special import gammaln
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
__all__ = ['multivariate_normal', 'dirichlet']
 | 
			
		||||
 | 
			
		||||
_LOG_2PI = np.log(2 * np.pi)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _process_parameters(dim, mean, cov):
 | 
			
		||||
    """
 | 
			
		||||
    Infer dimensionality from mean or covariance matrix, ensure that
 | 
			
		||||
    mean and covariance are full vector resp. matrix.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    # Try to infer dimensionality
 | 
			
		||||
    if dim is None:
 | 
			
		||||
        if mean is None:
 | 
			
		||||
            if cov is None:
 | 
			
		||||
                dim = 1
 | 
			
		||||
            else:
 | 
			
		||||
                cov = np.asarray(cov, dtype=float)
 | 
			
		||||
                if cov.ndim < 2:
 | 
			
		||||
                    dim = 1
 | 
			
		||||
                else:
 | 
			
		||||
                    dim = cov.shape[0]
 | 
			
		||||
        else:
 | 
			
		||||
            mean = np.asarray(mean, dtype=float)
 | 
			
		||||
            dim = mean.size
 | 
			
		||||
    else:
 | 
			
		||||
        if not np.isscalar(dim):
 | 
			
		||||
            raise ValueError("Dimension of random variable must be a scalar.")
 | 
			
		||||
 | 
			
		||||
    # Check input sizes and return full arrays for mean and cov if necessary
 | 
			
		||||
    if mean is None:
 | 
			
		||||
        mean = np.zeros(dim)
 | 
			
		||||
    mean = np.asarray(mean, dtype=float)
 | 
			
		||||
 | 
			
		||||
    if cov is None:
 | 
			
		||||
        cov = 1.0
 | 
			
		||||
    cov = np.asarray(cov, dtype=float)
 | 
			
		||||
 | 
			
		||||
    if dim == 1:
 | 
			
		||||
        mean.shape = (1,)
 | 
			
		||||
        cov.shape = (1, 1)
 | 
			
		||||
 | 
			
		||||
    if mean.ndim != 1 or mean.shape[0] != dim:
 | 
			
		||||
        raise ValueError("Array 'mean' must be a vector of length %d." % dim)
 | 
			
		||||
    if cov.ndim == 0:
 | 
			
		||||
        cov = cov * np.eye(dim)
 | 
			
		||||
    elif cov.ndim == 1:
 | 
			
		||||
        cov = np.diag(cov)
 | 
			
		||||
    elif cov.ndim == 2 and cov.shape != (dim, dim):
 | 
			
		||||
        rows, cols = cov.shape
 | 
			
		||||
        if rows != cols:
 | 
			
		||||
            msg = ("Array 'cov' must be square if it is two dimensional,"
 | 
			
		||||
                   " but cov.shape = %s." % str(cov.shape))
 | 
			
		||||
        else:
 | 
			
		||||
            msg = ("Dimension mismatch: array 'cov' is of shape %s,"
 | 
			
		||||
                   " but 'mean' is a vector of length %d.")
 | 
			
		||||
            msg = msg % (str(cov.shape), len(mean))
 | 
			
		||||
        raise ValueError(msg)
 | 
			
		||||
    elif cov.ndim > 2:
 | 
			
		||||
        raise ValueError("Array 'cov' must be at most two-dimensional,"
 | 
			
		||||
                         " but cov.ndim = %d" % cov.ndim)
 | 
			
		||||
 | 
			
		||||
    return dim, mean, cov
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _process_quantiles(x, dim):
 | 
			
		||||
    """
 | 
			
		||||
    Adjust quantiles array so that last axis labels the components of
 | 
			
		||||
    each data point.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    x = np.asarray(x, dtype=float)
 | 
			
		||||
 | 
			
		||||
    if x.ndim == 0:
 | 
			
		||||
        x = x[np.newaxis]
 | 
			
		||||
    elif x.ndim == 1:
 | 
			
		||||
        if dim == 1:
 | 
			
		||||
            x = x[:, np.newaxis]
 | 
			
		||||
        else:
 | 
			
		||||
            x = x[np.newaxis, :]
 | 
			
		||||
 | 
			
		||||
    return x
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _squeeze_output(out):
 | 
			
		||||
    """
 | 
			
		||||
    Remove single-dimensional entries from array and convert to scalar,
 | 
			
		||||
    if necessary.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    out = out.squeeze()
 | 
			
		||||
    if out.ndim == 0:
 | 
			
		||||
        out = out[()]
 | 
			
		||||
    return out
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _eigvalsh_to_eps(spectrum, cond=None, rcond=None):
 | 
			
		||||
    """
 | 
			
		||||
    Determine which eigenvalues are "small" given the spectrum.
 | 
			
		||||
 | 
			
		||||
    This is for compatibility across various linear algebra functions
 | 
			
		||||
    that should agree about whether or not a Hermitian matrix is numerically
 | 
			
		||||
    singular and what is its numerical matrix rank.
 | 
			
		||||
    This is designed to be compatible with scipy.linalg.pinvh.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    spectrum : 1d ndarray
 | 
			
		||||
        Array of eigenvalues of a Hermitian matrix.
 | 
			
		||||
    cond, rcond : float, optional
 | 
			
		||||
        Cutoff for small eigenvalues.
 | 
			
		||||
        Singular values smaller than rcond * largest_eigenvalue are
 | 
			
		||||
        considered zero.
 | 
			
		||||
        If None or -1, suitable machine precision is used.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    eps : float
 | 
			
		||||
        Magnitude cutoff for numerical negligibility.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    if rcond is not None:
 | 
			
		||||
        cond = rcond
 | 
			
		||||
    if cond in [None, -1]:
 | 
			
		||||
        t = spectrum.dtype.char.lower()
 | 
			
		||||
        factor = {'f': 1E3, 'd': 1E6}
 | 
			
		||||
        cond = factor[t] * np.finfo(t).eps
 | 
			
		||||
    eps = cond * np.max(abs(spectrum))
 | 
			
		||||
    return eps
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _pinv_1d(v, eps=1e-5):
 | 
			
		||||
    """
 | 
			
		||||
    A helper function for computing the pseudoinverse.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    v : iterable of numbers
 | 
			
		||||
        This may be thought of as a vector of eigenvalues or singular values.
 | 
			
		||||
    eps : float
 | 
			
		||||
        Values with magnitude no greater than eps are considered negligible.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    v_pinv : 1d float ndarray
 | 
			
		||||
        A vector of pseudo-inverted numbers.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    return np.array([0 if abs(x) <= eps else 1/x for x in v], dtype=float)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class _PSD(object):
 | 
			
		||||
    """
 | 
			
		||||
    Compute coordinated functions of a symmetric positive semidefinite matrix.
 | 
			
		||||
 | 
			
		||||
    This class addresses two issues.  Firstly it allows the pseudoinverse,
 | 
			
		||||
    the logarithm of the pseudo-determinant, and the rank of the matrix
 | 
			
		||||
    to be computed using one call to eigh instead of three.
 | 
			
		||||
    Secondly it allows these functions to be computed in a way
 | 
			
		||||
    that gives mutually compatible results.
 | 
			
		||||
    All of the functions are computed with a common understanding as to
 | 
			
		||||
    which of the eigenvalues are to be considered negligibly small.
 | 
			
		||||
    The functions are designed to coordinate with scipy.linalg.pinvh()
 | 
			
		||||
    but not necessarily with np.linalg.det() or with np.linalg.matrix_rank().
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    M : 2d array-like
 | 
			
		||||
        Symmetric positive semidefinite matrix.
 | 
			
		||||
    cond, rcond : float, optional
 | 
			
		||||
        Cutoff for small eigenvalues.
 | 
			
		||||
        Singular values smaller than rcond * largest_eigenvalue are
 | 
			
		||||
        considered zero.
 | 
			
		||||
        If None or -1, suitable machine precision is used.
 | 
			
		||||
    lower : bool, optional
 | 
			
		||||
        Whether the pertinent array data is taken from the lower
 | 
			
		||||
        or upper triangle of M. (Default: lower)
 | 
			
		||||
    check_finite : bool, optional
 | 
			
		||||
        Whether to check that the input matrices contain only finite
 | 
			
		||||
        numbers. Disabling may give a performance gain, but may result
 | 
			
		||||
        in problems (crashes, non-termination) if the inputs do contain
 | 
			
		||||
        infinities or NaNs.
 | 
			
		||||
    allow_singular : bool, optional
 | 
			
		||||
        Whether to allow a singular matrix.  (Default: True)
 | 
			
		||||
 | 
			
		||||
    Notes
 | 
			
		||||
    -----
 | 
			
		||||
    The arguments are similar to those of scipy.linalg.pinvh().
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, M, cond=None, rcond=None, lower=True,
 | 
			
		||||
                 check_finite=True, allow_singular=True):
 | 
			
		||||
        # Compute the symmetric eigendecomposition.
 | 
			
		||||
        # Note that eigh takes care of array conversion, chkfinite,
 | 
			
		||||
        # and assertion that the matrix is square.
 | 
			
		||||
        s, u = scipy.linalg.eigh(M, lower=lower, check_finite=check_finite)
 | 
			
		||||
 | 
			
		||||
        eps = _eigvalsh_to_eps(s, cond, rcond)
 | 
			
		||||
        if np.min(s) < -eps:
 | 
			
		||||
            raise ValueError('the input matrix must be positive semidefinite')
 | 
			
		||||
        d = s[s > eps]
 | 
			
		||||
        if len(d) < len(s) and not allow_singular:
 | 
			
		||||
            raise np.linalg.LinAlgError('singular matrix')
 | 
			
		||||
        s_pinv = _pinv_1d(s, eps)
 | 
			
		||||
        U = np.multiply(u, np.sqrt(s_pinv))
 | 
			
		||||
 | 
			
		||||
        # Initialize the eagerly precomputed attributes.
 | 
			
		||||
        self.rank = len(d)
 | 
			
		||||
        self.U = U
 | 
			
		||||
        self.log_pdet = np.sum(np.log(d))
 | 
			
		||||
 | 
			
		||||
        # Initialize an attribute to be lazily computed.
 | 
			
		||||
        self._pinv = None
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def pinv(self):
 | 
			
		||||
        if self._pinv is None:
 | 
			
		||||
            self._pinv = np.dot(self.U, self.U.T)
 | 
			
		||||
        return self._pinv
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
_doc_default_callparams = """\
 | 
			
		||||
mean : array_like, optional
 | 
			
		||||
    Mean of the distribution (default zero)
 | 
			
		||||
cov : array_like, optional
 | 
			
		||||
    Covariance matrix of the distribution (default one)
 | 
			
		||||
allow_singular : bool, optional
 | 
			
		||||
    Whether to allow a singular covariance matrix.  (Default: False)
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
_doc_callparams_note = \
 | 
			
		||||
    """Setting the parameter `mean` to `None` is equivalent to having `mean`
 | 
			
		||||
    be the zero-vector. The parameter `cov` can be a scalar, in which case
 | 
			
		||||
    the covariance matrix is the identity times that value, a vector of
 | 
			
		||||
    diagonal entries for the covariance matrix, or a two-dimensional
 | 
			
		||||
    array_like.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
_doc_frozen_callparams = ""
 | 
			
		||||
 | 
			
		||||
_doc_frozen_callparams_note = \
 | 
			
		||||
    """See class definition for a detailed description of parameters."""
 | 
			
		||||
 | 
			
		||||
docdict_params = {
 | 
			
		||||
    '_doc_default_callparams': _doc_default_callparams,
 | 
			
		||||
    '_doc_callparams_note': _doc_callparams_note
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
docdict_noparams = {
 | 
			
		||||
    '_doc_default_callparams': _doc_frozen_callparams,
 | 
			
		||||
    '_doc_callparams_note': _doc_frozen_callparams_note
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class multivariate_normal_gen(object):
 | 
			
		||||
    r"""
 | 
			
		||||
    A multivariate normal random variable.
 | 
			
		||||
 | 
			
		||||
    The `mean` keyword specifies the mean. The `cov` keyword specifies the
 | 
			
		||||
    covariance matrix.
 | 
			
		||||
 | 
			
		||||
    Methods
 | 
			
		||||
    -------
 | 
			
		||||
    pdf(x, mean=None, cov=1, allow_singular=False)
 | 
			
		||||
        Probability density function.
 | 
			
		||||
    logpdf(x, mean=None, cov=1, allow_singular=False)
 | 
			
		||||
        Log of the probability density function.
 | 
			
		||||
    rvs(mean=None, cov=1, allow_singular=False, size=1)
 | 
			
		||||
        Draw random samples from a multivariate normal distribution.
 | 
			
		||||
    entropy()
 | 
			
		||||
        Compute the differential entropy of the multivariate normal.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    x : array_like
 | 
			
		||||
        Quantiles, with the last axis of `x` denoting the components.
 | 
			
		||||
    %(_doc_default_callparams)s
 | 
			
		||||
 | 
			
		||||
    Alternatively, the object may be called (as a function) to fix the mean
 | 
			
		||||
    and covariance parameters, returning a "frozen" multivariate normal
 | 
			
		||||
    random variable:
 | 
			
		||||
 | 
			
		||||
    rv = multivariate_normal(mean=None, cov=1, allow_singular=False)
 | 
			
		||||
        - Frozen object with the same methods but holding the given
 | 
			
		||||
          mean and covariance fixed.
 | 
			
		||||
 | 
			
		||||
    Notes
 | 
			
		||||
    -----
 | 
			
		||||
    %(_doc_callparams_note)s
 | 
			
		||||
 | 
			
		||||
    The covariance matrix `cov` must be a (symmetric) positive
 | 
			
		||||
    semi-definite matrix. The determinant and inverse of `cov` are computed
 | 
			
		||||
    as the pseudo-determinant and pseudo-inverse, respectively, so
 | 
			
		||||
    that `cov` does not need to have full rank.
 | 
			
		||||
 | 
			
		||||
    The probability density function for `multivariate_normal` is
 | 
			
		||||
 | 
			
		||||
    .. math::
 | 
			
		||||
 | 
			
		||||
        f(x) = \frac{1}{\sqrt{(2 \pi)^k \det \Sigma}} \exp\left( -\frac{1}{2} (x - \mu)^T \Sigma^{-1} (x - \mu) \right),
 | 
			
		||||
 | 
			
		||||
    where :math:`\mu` is the mean, :math:`\Sigma` the covariance matrix,
 | 
			
		||||
    and :math:`k` is the dimension of the space where :math:`x` takes values.
 | 
			
		||||
 | 
			
		||||
    .. versionadded:: 0.14.0
 | 
			
		||||
 | 
			
		||||
    Examples
 | 
			
		||||
    --------
 | 
			
		||||
    >>> import matplotlib.pyplot as plt
 | 
			
		||||
    >>> from scipy.stats import multivariate_normal
 | 
			
		||||
    >>> x = np.linspace(0, 5, 10, endpoint=False)
 | 
			
		||||
    >>> y = multivariate_normal.pdf(x, mean=2.5, cov=0.5); y
 | 
			
		||||
    array([ 0.00108914,  0.01033349,  0.05946514,  0.20755375,  0.43939129,
 | 
			
		||||
            0.56418958,  0.43939129,  0.20755375,  0.05946514,  0.01033349])
 | 
			
		||||
    >>> plt.plot(x, y)
 | 
			
		||||
 | 
			
		||||
    The input quantiles can be any shape of array, as long as the last
 | 
			
		||||
    axis labels the components.  This allows us for instance to
 | 
			
		||||
    display the frozen pdf for a non-isotropic random variable in 2D as
 | 
			
		||||
    follows:
 | 
			
		||||
 | 
			
		||||
    >>> x, y = np.mgrid[-1:1:.01, -1:1:.01]
 | 
			
		||||
    >>> pos = np.empty(x.shape + (2,))
 | 
			
		||||
    >>> pos[:, :, 0] = x; pos[:, :, 1] = y
 | 
			
		||||
    >>> rv = multivariate_normal([0.5, -0.2], [[2.0, 0.3], [0.3, 0.5]])
 | 
			
		||||
    >>> plt.contourf(x, y, rv.pdf(pos))
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.__doc__ = doccer.docformat(self.__doc__, docdict_params)
 | 
			
		||||
 | 
			
		||||
    def __call__(self, mean=None, cov=1, allow_singular=False):
 | 
			
		||||
        """
 | 
			
		||||
        Create a frozen multivariate normal distribution.
 | 
			
		||||
 | 
			
		||||
        See `multivariate_normal_frozen` for more information.
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        return multivariate_normal_frozen(mean, cov,
 | 
			
		||||
                                          allow_singular=allow_singular)
 | 
			
		||||
 | 
			
		||||
    def _logpdf(self, x, mean, prec_U, log_det_cov, rank):
 | 
			
		||||
        """
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        x : ndarray
 | 
			
		||||
            Points at which to evaluate the log of the probability
 | 
			
		||||
            density function
 | 
			
		||||
        mean : ndarray
 | 
			
		||||
            Mean of the distribution
 | 
			
		||||
        prec_U : ndarray
 | 
			
		||||
            A decomposition such that np.dot(prec_U, prec_U.T)
 | 
			
		||||
            is the precision matrix, i.e. inverse of the covariance matrix.
 | 
			
		||||
        log_det_cov : float
 | 
			
		||||
            Logarithm of the determinant of the covariance matrix
 | 
			
		||||
        rank : int
 | 
			
		||||
            Rank of the covariance matrix.
 | 
			
		||||
 | 
			
		||||
        Notes
 | 
			
		||||
        -----
 | 
			
		||||
        As this function does no argument checking, it should not be
 | 
			
		||||
        called directly; use 'logpdf' instead.
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        dev = x - mean
 | 
			
		||||
        maha = np.sum(np.square(np.dot(dev, prec_U)), axis=-1)
 | 
			
		||||
        return -0.5 * (rank * _LOG_2PI + log_det_cov + maha)
 | 
			
		||||
 | 
			
		||||
    def logpdf(self, x, mean, cov, allow_singular=False):
 | 
			
		||||
        """
 | 
			
		||||
        Log of the multivariate normal probability density function.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        x : array_like
 | 
			
		||||
            Quantiles, with the last axis of `x` denoting the components.
 | 
			
		||||
        %(_doc_default_callparams)s
 | 
			
		||||
 | 
			
		||||
        Notes
 | 
			
		||||
        -----
 | 
			
		||||
        %(_doc_callparams_note)s
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        pdf : ndarray
 | 
			
		||||
            Log of the probability density function evaluated at `x`
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        dim, mean, cov = _process_parameters(None, mean, cov)
 | 
			
		||||
        x = _process_quantiles(x, dim)
 | 
			
		||||
        psd = _PSD(cov, allow_singular=allow_singular)
 | 
			
		||||
        out = self._logpdf(x, mean, psd.U, psd.log_pdet, psd.rank)
 | 
			
		||||
        return _squeeze_output(out)
 | 
			
		||||
 | 
			
		||||
    def pdf(self, x, mean, cov, allow_singular=False):
 | 
			
		||||
        """
 | 
			
		||||
        Multivariate normal probability density function.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        x : array_like
 | 
			
		||||
            Quantiles, with the last axis of `x` denoting the components.
 | 
			
		||||
        %(_doc_default_callparams)s
 | 
			
		||||
 | 
			
		||||
        Notes
 | 
			
		||||
        -----
 | 
			
		||||
        %(_doc_callparams_note)s
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        pdf : ndarray
 | 
			
		||||
            Probability density function evaluated at `x`
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        dim, mean, cov = _process_parameters(None, mean, cov)
 | 
			
		||||
        x = _process_quantiles(x, dim)
 | 
			
		||||
        psd = _PSD(cov, allow_singular=allow_singular)
 | 
			
		||||
        out = np.exp(self._logpdf(x, mean, psd.U, psd.log_pdet, psd.rank))
 | 
			
		||||
        return _squeeze_output(out)
 | 
			
		||||
 | 
			
		||||
    def rvs(self, mean=None, cov=1, size=1):
 | 
			
		||||
        """
 | 
			
		||||
        Draw random samples from a multivariate normal distribution.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        %(_doc_default_callparams)s
 | 
			
		||||
        size : integer, optional
 | 
			
		||||
            Number of samples to draw (default 1).
 | 
			
		||||
 | 
			
		||||
        Notes
 | 
			
		||||
        -----
 | 
			
		||||
        %(_doc_callparams_note)s
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        rvs : ndarray or scalar
 | 
			
		||||
            Random variates of size (`size`, `N`), where `N` is the
 | 
			
		||||
            dimension of the random variable.
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        dim, mean, cov = _process_parameters(None, mean, cov)
 | 
			
		||||
        out = np.random.multivariate_normal(mean, cov, size)
 | 
			
		||||
        return _squeeze_output(out)
 | 
			
		||||
 | 
			
		||||
    def entropy(self, mean=None, cov=1):
 | 
			
		||||
        """
 | 
			
		||||
        Compute the differential entropy of the multivariate normal.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        %(_doc_default_callparams)s
 | 
			
		||||
 | 
			
		||||
        Notes
 | 
			
		||||
        -----
 | 
			
		||||
        %(_doc_callparams_note)s
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        h : scalar
 | 
			
		||||
            Entropy of the multivariate normal distribution
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        dim, mean, cov = _process_parameters(None, mean, cov)
 | 
			
		||||
        return 0.5 * np.log(np.linalg.det(2 * np.pi * np.e * cov))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
multivariate_normal = multivariate_normal_gen()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class multivariate_normal_frozen(object):
 | 
			
		||||
    def __init__(self, mean=None, cov=1, allow_singular=False):
 | 
			
		||||
        """
 | 
			
		||||
        Create a frozen multivariate normal distribution.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        mean : array_like, optional
 | 
			
		||||
            Mean of the distribution (default zero)
 | 
			
		||||
        cov : array_like, optional
 | 
			
		||||
            Covariance matrix of the distribution (default one)
 | 
			
		||||
        allow_singular : bool, optional
 | 
			
		||||
            If this flag is True then tolerate a singular
 | 
			
		||||
            covariance matrix (default False).
 | 
			
		||||
 | 
			
		||||
        Examples
 | 
			
		||||
        --------
 | 
			
		||||
        When called with the default parameters, this will create a 1D random
 | 
			
		||||
        variable with mean 0 and covariance 1:
 | 
			
		||||
 | 
			
		||||
        >>> from scipy.stats import multivariate_normal
 | 
			
		||||
        >>> r = multivariate_normal()
 | 
			
		||||
        >>> r.mean
 | 
			
		||||
        array([ 0.])
 | 
			
		||||
        >>> r.cov
 | 
			
		||||
        array([[1.]])
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        self.dim, self.mean, self.cov = _process_parameters(None, mean, cov)
 | 
			
		||||
        self.cov_info = _PSD(self.cov, allow_singular=allow_singular)
 | 
			
		||||
        self._mnorm = multivariate_normal_gen()
 | 
			
		||||
 | 
			
		||||
    def logpdf(self, x):
 | 
			
		||||
        x = _process_quantiles(x, self.dim)
 | 
			
		||||
        out = self._mnorm._logpdf(x, self.mean, self.cov_info.U,
 | 
			
		||||
                                  self.cov_info.log_pdet, self.cov_info.rank)
 | 
			
		||||
        return _squeeze_output(out)
 | 
			
		||||
 | 
			
		||||
    def pdf(self, x):
 | 
			
		||||
        return np.exp(self.logpdf(x))
 | 
			
		||||
 | 
			
		||||
    def rvs(self, size=1):
 | 
			
		||||
        return self._mnorm.rvs(self.mean, self.cov, size)
 | 
			
		||||
 | 
			
		||||
    def entropy(self):
 | 
			
		||||
        """
 | 
			
		||||
        Computes the differential entropy of the multivariate normal.
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        h : scalar
 | 
			
		||||
            Entropy of the multivariate normal distribution
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        log_pdet = self.cov_info.log_pdet
 | 
			
		||||
        rank = self.cov_info.rank
 | 
			
		||||
        return 0.5 * (rank * (_LOG_2PI + 1) + log_pdet)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Set frozen generator docstrings from corresponding docstrings in
 | 
			
		||||
# multivariate_normal_gen and fill in default strings in class docstrings
 | 
			
		||||
for name in ['logpdf', 'pdf', 'rvs']:
 | 
			
		||||
    method = multivariate_normal_gen.__dict__[name]
 | 
			
		||||
    method_frozen = multivariate_normal_frozen.__dict__[name]
 | 
			
		||||
    method_frozen.__doc__ = doccer.docformat(method.__doc__, docdict_noparams)
 | 
			
		||||
    method.__doc__ = doccer.docformat(method.__doc__, docdict_params)
 | 
			
		||||
 | 
			
		||||
_dirichlet_doc_default_callparams = """\
 | 
			
		||||
alpha : array_like
 | 
			
		||||
    The concentration parameters. The number of entries determines the
 | 
			
		||||
    dimensionality of the distribution.
 | 
			
		||||
"""
 | 
			
		||||
_dirichlet_doc_frozen_callparams = ""
 | 
			
		||||
 | 
			
		||||
_dirichlet_doc_frozen_callparams_note = \
 | 
			
		||||
    """See class definition for a detailed description of parameters."""
 | 
			
		||||
 | 
			
		||||
dirichlet_docdict_params = {
 | 
			
		||||
    '_dirichlet_doc_default_callparams': _dirichlet_doc_default_callparams,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
dirichlet_docdict_noparams = {
 | 
			
		||||
    '_dirichlet_doc_default_callparams': _dirichlet_doc_frozen_callparams,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _dirichlet_check_parameters(alpha):
 | 
			
		||||
    alpha = np.asarray(alpha)
 | 
			
		||||
    if np.min(alpha) <= 0:
 | 
			
		||||
        raise ValueError("All parameters must be greater than 0")
 | 
			
		||||
    elif alpha.ndim != 1:
 | 
			
		||||
        raise ValueError("Parameter vector 'a' must be one dimensional, " +
 | 
			
		||||
                         "but a.shape = %s." % str(alpha.shape))
 | 
			
		||||
    return alpha
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _dirichlet_check_input(alpha, x):
 | 
			
		||||
    x = np.asarray(x)
 | 
			
		||||
 | 
			
		||||
    if x.shape[0] + 1 != alpha.shape[0] and x.shape[0] != alpha.shape[0]:
 | 
			
		||||
        raise ValueError("Vector 'x' must have one entry less then the" +
 | 
			
		||||
                         " parameter vector 'a', but alpha.shape = " +
 | 
			
		||||
                         "%s and " % alpha.shape +
 | 
			
		||||
                         "x.shape = %s." % x.shape)
 | 
			
		||||
 | 
			
		||||
    if x.shape[0] != alpha.shape[0]:
 | 
			
		||||
        xk = np.array([1 - np.sum(x, 0)])
 | 
			
		||||
        if xk.ndim == 1:
 | 
			
		||||
            x = np.append(x, xk)
 | 
			
		||||
        elif xk.ndim == 2:
 | 
			
		||||
            x = np.vstack((x, xk))
 | 
			
		||||
        else:
 | 
			
		||||
            raise ValueError("The input must be one dimensional or a two "
 | 
			
		||||
                             "dimensional matrix containing the entries.")
 | 
			
		||||
 | 
			
		||||
    if np.min(x) < 0:
 | 
			
		||||
        raise ValueError("Each entry in 'x' must be greater or equal zero.")
 | 
			
		||||
 | 
			
		||||
    if np.max(x) > 1:
 | 
			
		||||
        raise ValueError("Each entry in 'x' must be smaller or equal one.")
 | 
			
		||||
 | 
			
		||||
    if (np.abs(np.sum(x, 0) - 1.0) > 10e-10).any():
 | 
			
		||||
        raise ValueError("The input vector 'x' must lie within the normal " +
 | 
			
		||||
                         "simplex. but sum(x)=%f." % np.sum(x, 0))
 | 
			
		||||
 | 
			
		||||
    return x
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _lnB(alpha):
 | 
			
		||||
    r"""
 | 
			
		||||
    Internal helper function to compute the log of the useful quotient
 | 
			
		||||
 | 
			
		||||
    .. math::
 | 
			
		||||
        B(\alpha) = \frac{\prod_{i=1}{K}\Gamma(\alpha_i)}{\Gamma\left(\sum_{i=1}^{K}\alpha_i\right)}
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    %(_dirichlet_doc_default_callparams)s
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    B : scalar
 | 
			
		||||
        Helper quotient, internal use only
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    return np.sum(gammaln(alpha)) - gammaln(np.sum(alpha))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class dirichlet_gen(object):
 | 
			
		||||
    r"""
 | 
			
		||||
    A Dirichlet random variable.
 | 
			
		||||
 | 
			
		||||
    The `alpha` keyword specifies the concentration parameters of the
 | 
			
		||||
    distribution.
 | 
			
		||||
 | 
			
		||||
    .. versionadded:: 0.15.0
 | 
			
		||||
 | 
			
		||||
    Methods
 | 
			
		||||
    -------
 | 
			
		||||
    pdf(x, alpha)
 | 
			
		||||
        Probability density function.
 | 
			
		||||
    logpdf(x, alpha)
 | 
			
		||||
        Log of the probability density function.
 | 
			
		||||
    rvs(alpha, size=1)
 | 
			
		||||
        Draw random samples from a Dirichlet distribution.
 | 
			
		||||
    mean(alpha)
 | 
			
		||||
        The mean of the Dirichlet distribution
 | 
			
		||||
    var(alpha)
 | 
			
		||||
        The variance of the Dirichlet distribution
 | 
			
		||||
    entropy(alpha)
 | 
			
		||||
        Compute the differential entropy of the multivariate normal.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    x : array_like
 | 
			
		||||
        Quantiles, with the last axis of `x` denoting the components.
 | 
			
		||||
    %(_dirichlet_doc_default_callparams)s
 | 
			
		||||
 | 
			
		||||
    Alternatively, the object may be called (as a function) to fix
 | 
			
		||||
    concentration parameters, returning a "frozen" Dirichlet
 | 
			
		||||
    random variable:
 | 
			
		||||
 | 
			
		||||
    rv = dirichlet(alpha)
 | 
			
		||||
        - Frozen object with the same methods but holding the given
 | 
			
		||||
          concentration parameters fixed.
 | 
			
		||||
 | 
			
		||||
    Notes
 | 
			
		||||
    -----
 | 
			
		||||
    Each :math:`\alpha` entry must be positive. The distribution has only
 | 
			
		||||
    support on the simplex defined by
 | 
			
		||||
 | 
			
		||||
    .. math::
 | 
			
		||||
        \sum_{i=1}^{K} x_i \le 1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    The probability density function for `dirichlet` is
 | 
			
		||||
 | 
			
		||||
    .. math::
 | 
			
		||||
 | 
			
		||||
        f(x) = \frac{1}{\mathrm{B}(\boldsymbol\alpha)} \prod_{i=1}^K x_i^{\alpha_i - 1}
 | 
			
		||||
 | 
			
		||||
    where
 | 
			
		||||
 | 
			
		||||
    .. math::
 | 
			
		||||
        \mathrm{B}(\boldsymbol\alpha) = \frac{\prod_{i=1}^K \Gamma(\alpha_i)}{\Gamma\bigl(\sum_{i=1}^K \alpha_i\bigr)}
 | 
			
		||||
 | 
			
		||||
    and :math:`\boldsymbol\alpha=(\alpha_1,\ldots,\alpha_K)`, the
 | 
			
		||||
    concentration parameters and :math:`K` is the dimension of the space
 | 
			
		||||
    where :math:`x` takes values.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.__doc__ = doccer.docformat(self.__doc__, dirichlet_docdict_params)
 | 
			
		||||
 | 
			
		||||
    def __call__(self, alpha):
 | 
			
		||||
        return dirichlet_frozen(alpha)
 | 
			
		||||
 | 
			
		||||
    def _logpdf(self, x, alpha):
 | 
			
		||||
        """
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        x : ndarray
 | 
			
		||||
            Points at which to evaluate the log of the probability
 | 
			
		||||
            density function
 | 
			
		||||
        %(_dirichlet_doc_default_callparams)s
 | 
			
		||||
 | 
			
		||||
        Notes
 | 
			
		||||
        -----
 | 
			
		||||
        As this function does no argument checking, it should not be
 | 
			
		||||
        called directly; use 'logpdf' instead.
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        lnB = _lnB(alpha)
 | 
			
		||||
        return - lnB + np.sum((np.log(x.T) * (alpha - 1)).T, 0)
 | 
			
		||||
 | 
			
		||||
    def logpdf(self, x, alpha):
 | 
			
		||||
        """
 | 
			
		||||
        Log of the Dirichlet probability density function.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        x : array_like
 | 
			
		||||
            Quantiles, with the last axis of `x` denoting the components.
 | 
			
		||||
        %(_dirichlet_doc_default_callparams)s
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        pdf : ndarray
 | 
			
		||||
            Log of the probability density function evaluated at `x`
 | 
			
		||||
        """
 | 
			
		||||
        alpha = _dirichlet_check_parameters(alpha)
 | 
			
		||||
        x = _dirichlet_check_input(alpha, x)
 | 
			
		||||
 | 
			
		||||
        out = self._logpdf(x, alpha)
 | 
			
		||||
        return _squeeze_output(out)
 | 
			
		||||
 | 
			
		||||
    def pdf(self, x, alpha):
 | 
			
		||||
        """
 | 
			
		||||
        The Dirichlet probability density function.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        x : array_like
 | 
			
		||||
            Quantiles, with the last axis of `x` denoting the components.
 | 
			
		||||
        %(_dirichlet_doc_default_callparams)s
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        pdf : ndarray
 | 
			
		||||
            The probability density function evaluated at `x`
 | 
			
		||||
        """
 | 
			
		||||
        alpha = _dirichlet_check_parameters(alpha)
 | 
			
		||||
        x = _dirichlet_check_input(alpha, x)
 | 
			
		||||
 | 
			
		||||
        out = np.exp(self._logpdf(x, alpha))
 | 
			
		||||
        return _squeeze_output(out)
 | 
			
		||||
 | 
			
		||||
    def mean(self, alpha):
 | 
			
		||||
        """
 | 
			
		||||
        Compute the mean of the dirichlet distribution.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        %(_dirichlet_doc_default_callparams)s
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        mu : scalar
 | 
			
		||||
            Mean of the Dirichlet distribution
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        alpha = _dirichlet_check_parameters(alpha)
 | 
			
		||||
 | 
			
		||||
        out = alpha / (np.sum(alpha))
 | 
			
		||||
        return _squeeze_output(out)
 | 
			
		||||
 | 
			
		||||
    def var(self, alpha):
 | 
			
		||||
        """
 | 
			
		||||
        Compute the variance of the dirichlet distribution.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        %(_dirichlet_doc_default_callparams)s
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        v : scalar
 | 
			
		||||
            Variance of the Dirichlet distribution
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        alpha = _dirichlet_check_parameters(alpha)
 | 
			
		||||
 | 
			
		||||
        alpha0 = np.sum(alpha)
 | 
			
		||||
        out = (alpha * (alpha0 - alpha)) / ((alpha0 * alpha0) * (alpha0 + 1))
 | 
			
		||||
        return out
 | 
			
		||||
 | 
			
		||||
    def entropy(self, alpha):
 | 
			
		||||
        """
 | 
			
		||||
        Compute the differential entropy of the dirichlet distribution.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        %(_dirichlet_doc_default_callparams)s
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        h : scalar
 | 
			
		||||
            Entropy of the Dirichlet distribution
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        alpha = _dirichlet_check_parameters(alpha)
 | 
			
		||||
 | 
			
		||||
        alpha0 = np.sum(alpha)
 | 
			
		||||
        lnB = _lnB(alpha)
 | 
			
		||||
        K = alpha.shape[0]
 | 
			
		||||
 | 
			
		||||
        out = lnB + (alpha0 - K) * scipy.special.psi(alpha0) - np.sum(
 | 
			
		||||
            (alpha - 1) * scipy.special.psi(alpha))
 | 
			
		||||
        return _squeeze_output(out)
 | 
			
		||||
 | 
			
		||||
    def rvs(self, alpha, size=1):
 | 
			
		||||
        """
 | 
			
		||||
        Draw random samples from a Dirichlet distribution.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        %(_dirichlet_doc_default_callparams)s
 | 
			
		||||
        size : integer, optional
 | 
			
		||||
            Number of samples to draw (default 1).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        rvs : ndarray or scalar
 | 
			
		||||
            Random variates of size (`size`, `N`), where `N` is the
 | 
			
		||||
            dimension of the random variable.
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        alpha = _dirichlet_check_parameters(alpha)
 | 
			
		||||
        return np.random.dirichlet(alpha, size=size)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
dirichlet = dirichlet_gen()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class dirichlet_frozen(object):
 | 
			
		||||
    def __init__(self, alpha):
 | 
			
		||||
        self.alpha = _dirichlet_check_parameters(alpha)
 | 
			
		||||
        self._dirichlet = dirichlet_gen()
 | 
			
		||||
 | 
			
		||||
    def logpdf(self, x):
 | 
			
		||||
        return self._dirichlet.logpdf(x, self.alpha)
 | 
			
		||||
 | 
			
		||||
    def pdf(self, x):
 | 
			
		||||
        return self._dirichlet.pdf(x, self.alpha)
 | 
			
		||||
 | 
			
		||||
    def mean(self):
 | 
			
		||||
        return self._dirichlet.mean(self.alpha)
 | 
			
		||||
 | 
			
		||||
    def var(self):
 | 
			
		||||
        return self._dirichlet.var(self.alpha)
 | 
			
		||||
 | 
			
		||||
    def entropy(self):
 | 
			
		||||
        return self._dirichlet.entropy(self.alpha)
 | 
			
		||||
 | 
			
		||||
    def rvs(self, size=1):
 | 
			
		||||
        return self._dirichlet.rvs(self.alpha, size)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Set frozen generator docstrings from corresponding docstrings in
 | 
			
		||||
# multivariate_normal_gen and fill in default strings in class docstrings
 | 
			
		||||
for name in ['logpdf', 'pdf', 'rvs', 'mean', 'var', 'entropy']:
 | 
			
		||||
    method = dirichlet_gen.__dict__[name]
 | 
			
		||||
    method_frozen = dirichlet_frozen.__dict__[name]
 | 
			
		||||
    method_frozen.__doc__ = doccer.docformat(
 | 
			
		||||
        method.__doc__, dirichlet_docdict_noparams)
 | 
			
		||||
    method.__doc__ = doccer.docformat(method.__doc__, dirichlet_docdict_params)
 | 
			
		||||
@ -1,201 +0,0 @@
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
from numpy import poly1d
 | 
			
		||||
from scipy.special import beta
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# The following code was used to generate the Pade coefficients for the
 | 
			
		||||
# Tukey Lambda variance function.  Version 0.17 of mpmath was used.
 | 
			
		||||
#---------------------------------------------------------------------------
 | 
			
		||||
# import mpmath as mp
 | 
			
		||||
#
 | 
			
		||||
# mp.mp.dps = 60
 | 
			
		||||
#
 | 
			
		||||
# one   = mp.mpf(1)
 | 
			
		||||
# two   = mp.mpf(2)
 | 
			
		||||
#
 | 
			
		||||
# def mpvar(lam):
 | 
			
		||||
#     if lam == 0:
 | 
			
		||||
#         v = mp.pi**2 / three
 | 
			
		||||
#     else:
 | 
			
		||||
#         v = (two / lam**2) * (one / (one + two*lam) -
 | 
			
		||||
#                               mp.beta(lam + one, lam + one))
 | 
			
		||||
#     return v
 | 
			
		||||
#
 | 
			
		||||
# t = mp.taylor(mpvar, 0, 8)
 | 
			
		||||
# p, q = mp.pade(t, 4, 4)
 | 
			
		||||
# print "p =", [mp.fp.mpf(c) for c in p]
 | 
			
		||||
# print "q =", [mp.fp.mpf(c) for c in q]
 | 
			
		||||
#---------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
# Pade coefficients for the Tukey Lambda variance function.
 | 
			
		||||
_tukeylambda_var_pc = [3.289868133696453, 0.7306125098871127,
 | 
			
		||||
                       -0.5370742306855439, 0.17292046290190008,
 | 
			
		||||
                       -0.02371146284628187]
 | 
			
		||||
_tukeylambda_var_qc = [1.0, 3.683605511659861, 4.184152498888124,
 | 
			
		||||
                       1.7660926747377275, 0.2643989311168465]
 | 
			
		||||
 | 
			
		||||
# numpy.poly1d instances for the numerator and denominator of the
 | 
			
		||||
# Pade approximation to the Tukey Lambda variance.
 | 
			
		||||
_tukeylambda_var_p = poly1d(_tukeylambda_var_pc[::-1])
 | 
			
		||||
_tukeylambda_var_q = poly1d(_tukeylambda_var_qc[::-1])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def tukeylambda_variance(lam):
 | 
			
		||||
    """Variance of the Tukey Lambda distribution.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    lam : array_like
 | 
			
		||||
        The lambda values at which to compute the variance.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    v : ndarray
 | 
			
		||||
        The variance.  For lam < -0.5, the variance is not defined, so
 | 
			
		||||
        np.nan is returned.  For lam = 0.5, np.inf is returned.
 | 
			
		||||
 | 
			
		||||
    Notes
 | 
			
		||||
    -----
 | 
			
		||||
    In an interval around lambda=0, this function uses the [4,4] Pade
 | 
			
		||||
    approximation to compute the variance.  Otherwise it uses the standard
 | 
			
		||||
    formula (http://en.wikipedia.org/wiki/Tukey_lambda_distribution).  The
 | 
			
		||||
    Pade approximation is used because the standard formula has a removable
 | 
			
		||||
    discontinuity at lambda = 0, and does not produce accurate numerical
 | 
			
		||||
    results near lambda = 0.
 | 
			
		||||
    """
 | 
			
		||||
    lam = np.asarray(lam)
 | 
			
		||||
    shp = lam.shape
 | 
			
		||||
    lam = np.atleast_1d(lam).astype(np.float64)
 | 
			
		||||
 | 
			
		||||
    # For absolute values of lam less than threshold, use the Pade
 | 
			
		||||
    # approximation.
 | 
			
		||||
    threshold = 0.075
 | 
			
		||||
 | 
			
		||||
    # Play games with masks to implement the conditional evaluation of
 | 
			
		||||
    # the distribution.
 | 
			
		||||
    # lambda < -0.5:  var = nan
 | 
			
		||||
    low_mask = lam < -0.5
 | 
			
		||||
    # lambda == -0.5: var = inf
 | 
			
		||||
    neghalf_mask = lam == -0.5
 | 
			
		||||
    # abs(lambda) < threshold:  use Pade approximation
 | 
			
		||||
    small_mask = np.abs(lam) < threshold
 | 
			
		||||
    # else the "regular" case:  use the explicit formula.
 | 
			
		||||
    reg_mask = ~(low_mask | neghalf_mask | small_mask)
 | 
			
		||||
 | 
			
		||||
    # Get the 'lam' values for the cases where they are needed.
 | 
			
		||||
    small = lam[small_mask]
 | 
			
		||||
    reg = lam[reg_mask]
 | 
			
		||||
 | 
			
		||||
    # Compute the function for each case.
 | 
			
		||||
    v = np.empty_like(lam)
 | 
			
		||||
    v[low_mask] = np.nan
 | 
			
		||||
    v[neghalf_mask] = np.inf
 | 
			
		||||
    if small.size > 0:
 | 
			
		||||
        # Use the Pade approximation near lambda = 0.
 | 
			
		||||
        v[small_mask] = _tukeylambda_var_p(small) / _tukeylambda_var_q(small)
 | 
			
		||||
    if reg.size > 0:
 | 
			
		||||
        v[reg_mask] = (2.0 / reg**2) * (1.0 / (1.0 + 2 * reg) -
 | 
			
		||||
                                      beta(reg + 1, reg + 1))
 | 
			
		||||
    v.shape = shp
 | 
			
		||||
    return v
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# The following code was used to generate the Pade coefficients for the
 | 
			
		||||
# Tukey Lambda kurtosis function.  Version 0.17 of mpmath was used.
 | 
			
		||||
#---------------------------------------------------------------------------
 | 
			
		||||
# import mpmath as mp
 | 
			
		||||
#
 | 
			
		||||
# mp.mp.dps = 60
 | 
			
		||||
#
 | 
			
		||||
# one   = mp.mpf(1)
 | 
			
		||||
# two   = mp.mpf(2)
 | 
			
		||||
# three = mp.mpf(3)
 | 
			
		||||
# four  = mp.mpf(4)
 | 
			
		||||
#
 | 
			
		||||
# def mpkurt(lam):
 | 
			
		||||
#     if lam == 0:
 | 
			
		||||
#         k = mp.mpf(6)/5
 | 
			
		||||
#     else:
 | 
			
		||||
#         numer = (one/(four*lam+one) - four*mp.beta(three*lam+one, lam+one) +
 | 
			
		||||
#                  three*mp.beta(two*lam+one, two*lam+one))
 | 
			
		||||
#         denom = two*(one/(two*lam+one) - mp.beta(lam+one,lam+one))**2
 | 
			
		||||
#         k = numer / denom - three
 | 
			
		||||
#     return k
 | 
			
		||||
#
 | 
			
		||||
# # There is a bug in mpmath 0.17: when we use the 'method' keyword of the
 | 
			
		||||
# # taylor function and we request a degree 9 Taylor polynomial, we actually
 | 
			
		||||
# # get degree 8.
 | 
			
		||||
# t = mp.taylor(mpkurt, 0, 9, method='quad', radius=0.01)
 | 
			
		||||
# t = [mp.chop(c, tol=1e-15) for c in t]
 | 
			
		||||
# p, q = mp.pade(t, 4, 4)
 | 
			
		||||
# print "p =", [mp.fp.mpf(c) for c in p]
 | 
			
		||||
# print "q =", [mp.fp.mpf(c) for c in q]
 | 
			
		||||
#---------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
# Pade coefficients for the Tukey Lambda kurtosis function.
 | 
			
		||||
_tukeylambda_kurt_pc = [1.2, -5.853465139719495, -22.653447381131077,
 | 
			
		||||
                        0.20601184383406815, 4.59796302262789]
 | 
			
		||||
_tukeylambda_kurt_qc = [1.0, 7.171149192233599, 12.96663094361842,
 | 
			
		||||
                        0.43075235247853005, -2.789746758009912]
 | 
			
		||||
 | 
			
		||||
# numpy.poly1d instances for the numerator and denominator of the
 | 
			
		||||
# Pade approximation to the Tukey Lambda kurtosis.
 | 
			
		||||
_tukeylambda_kurt_p = poly1d(_tukeylambda_kurt_pc[::-1])
 | 
			
		||||
_tukeylambda_kurt_q = poly1d(_tukeylambda_kurt_qc[::-1])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def tukeylambda_kurtosis(lam):
 | 
			
		||||
    """Kurtosis of the Tukey Lambda distribution.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    lam : array_like
 | 
			
		||||
        The lambda values at which to compute the variance.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    v : ndarray
 | 
			
		||||
        The variance.  For lam < -0.25, the variance is not defined, so
 | 
			
		||||
        np.nan is returned.  For lam = 0.25, np.inf is returned.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    lam = np.asarray(lam)
 | 
			
		||||
    shp = lam.shape
 | 
			
		||||
    lam = np.atleast_1d(lam).astype(np.float64)
 | 
			
		||||
 | 
			
		||||
    # For absolute values of lam less than threshold, use the Pade
 | 
			
		||||
    # approximation.
 | 
			
		||||
    threshold = 0.055
 | 
			
		||||
 | 
			
		||||
    # Use masks to implement the conditional evaluation of the kurtosis.
 | 
			
		||||
    # lambda < -0.25:  kurtosis = nan
 | 
			
		||||
    low_mask = lam < -0.25
 | 
			
		||||
    # lambda == -0.25: kurtosis = inf
 | 
			
		||||
    negqrtr_mask = lam == -0.25
 | 
			
		||||
    # lambda near 0:  use Pade approximation
 | 
			
		||||
    small_mask = np.abs(lam) < threshold
 | 
			
		||||
    # else the "regular" case:  use the explicit formula.
 | 
			
		||||
    reg_mask = ~(low_mask | negqrtr_mask | small_mask)
 | 
			
		||||
 | 
			
		||||
    # Get the 'lam' values for the cases where they are needed.
 | 
			
		||||
    small = lam[small_mask]
 | 
			
		||||
    reg = lam[reg_mask]
 | 
			
		||||
 | 
			
		||||
    # Compute the function for each case.
 | 
			
		||||
    k = np.empty_like(lam)
 | 
			
		||||
    k[low_mask] = np.nan
 | 
			
		||||
    k[negqrtr_mask] = np.inf
 | 
			
		||||
    if small.size > 0:
 | 
			
		||||
        k[small_mask] = _tukeylambda_kurt_p(small) / _tukeylambda_kurt_q(small)
 | 
			
		||||
    if reg.size > 0:
 | 
			
		||||
        numer = (1.0 / (4 * reg + 1) - 4 * beta(3 * reg + 1, reg + 1) +
 | 
			
		||||
                 3 * beta(2 * reg + 1, 2 * reg + 1))
 | 
			
		||||
        denom = 2 * (1.0/(2 * reg + 1) - beta(reg + 1, reg + 1))**2
 | 
			
		||||
        k[reg_mask] = numer / denom - 3
 | 
			
		||||
 | 
			
		||||
    # The return value will be a numpy array; resetting the shape ensures that
 | 
			
		||||
    # if `lam` was a scalar, the return value is a 0-d array.
 | 
			
		||||
    k.shape = shp
 | 
			
		||||
    return k
 | 
			
		||||
@ -1,271 +0,0 @@
 | 
			
		||||
"""Some functions for working with contingency tables (i.e. cross tabulations).
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
from functools import reduce
 | 
			
		||||
import numpy as np
 | 
			
		||||
from .stats import power_divergence
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
__all__ = ['margins', 'expected_freq', 'chi2_contingency']
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def margins(a):
 | 
			
		||||
    """Return a list of the marginal sums of the array `a`.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    a : ndarray
 | 
			
		||||
        The array for which to compute the marginal sums.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    margsums : list of ndarrays
 | 
			
		||||
        A list of length `a.ndim`.  `margsums[k]` is the result
 | 
			
		||||
        of summing `a` over all axes except `k`; it has the same
 | 
			
		||||
        number of dimensions as `a`, but the length of each axis
 | 
			
		||||
        except axis `k` will be 1.
 | 
			
		||||
 | 
			
		||||
    Examples
 | 
			
		||||
    --------
 | 
			
		||||
    >>> a = np.arange(12).reshape(2, 6)
 | 
			
		||||
    >>> a
 | 
			
		||||
    array([[ 0,  1,  2,  3,  4,  5],
 | 
			
		||||
           [ 6,  7,  8,  9, 10, 11]])
 | 
			
		||||
    >>> m0, m1 = margins(a)
 | 
			
		||||
    >>> m0
 | 
			
		||||
    array([[15],
 | 
			
		||||
           [51]])
 | 
			
		||||
    >>> m1
 | 
			
		||||
    array([[ 6,  8, 10, 12, 14, 16]])
 | 
			
		||||
 | 
			
		||||
    >>> b = np.arange(24).reshape(2,3,4)
 | 
			
		||||
    >>> m0, m1, m2 = margins(b)
 | 
			
		||||
    >>> m0
 | 
			
		||||
    array([[[ 66]],
 | 
			
		||||
           [[210]]])
 | 
			
		||||
    >>> m1
 | 
			
		||||
    array([[[ 60],
 | 
			
		||||
            [ 92],
 | 
			
		||||
            [124]]])
 | 
			
		||||
    >>> m2
 | 
			
		||||
    array([[[60, 66, 72, 78]]])
 | 
			
		||||
    """
 | 
			
		||||
    margsums = []
 | 
			
		||||
    ranged = list(range(a.ndim))
 | 
			
		||||
    for k in ranged:
 | 
			
		||||
        marg = np.apply_over_axes(np.sum, a, [j for j in ranged if j != k])
 | 
			
		||||
        margsums.append(marg)
 | 
			
		||||
    return margsums
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def expected_freq(observed):
 | 
			
		||||
    """
 | 
			
		||||
    Compute the expected frequencies from a contingency table.
 | 
			
		||||
 | 
			
		||||
    Given an n-dimensional contingency table of observed frequencies,
 | 
			
		||||
    compute the expected frequencies for the table based on the marginal
 | 
			
		||||
    sums under the assumption that the groups associated with each
 | 
			
		||||
    dimension are independent.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    observed : array_like
 | 
			
		||||
        The table of observed frequencies.  (While this function can handle
 | 
			
		||||
        a 1-D array, that case is trivial.  Generally `observed` is at
 | 
			
		||||
        least 2-D.)
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    expected : ndarray of float64
 | 
			
		||||
        The expected frequencies, based on the marginal sums of the table.
 | 
			
		||||
        Same shape as `observed`.
 | 
			
		||||
 | 
			
		||||
    Examples
 | 
			
		||||
    --------
 | 
			
		||||
    >>> observed = np.array([[10, 10, 20],[20, 20, 20]])
 | 
			
		||||
    >>> expected_freq(observed)
 | 
			
		||||
    array([[ 12.,  12.,  16.],
 | 
			
		||||
           [ 18.,  18.,  24.]])
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    # Typically `observed` is an integer array. If `observed` has a large
 | 
			
		||||
    # number of dimensions or holds large values, some of the following
 | 
			
		||||
    # computations may overflow, so we first switch to floating point.
 | 
			
		||||
    observed = np.asarray(observed, dtype=np.float64)
 | 
			
		||||
 | 
			
		||||
    # Create a list of the marginal sums.
 | 
			
		||||
    margsums = margins(observed)
 | 
			
		||||
 | 
			
		||||
    # Create the array of expected frequencies.  The shapes of the
 | 
			
		||||
    # marginal sums returned by apply_over_axes() are just what we
 | 
			
		||||
    # need for broadcasting in the following product.
 | 
			
		||||
    d = observed.ndim
 | 
			
		||||
    expected = reduce(np.multiply, margsums) / observed.sum() ** (d - 1)
 | 
			
		||||
    return expected
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def chi2_contingency(observed, correction=True, lambda_=None):
 | 
			
		||||
    """Chi-square test of independence of variables in a contingency table.
 | 
			
		||||
 | 
			
		||||
    This function computes the chi-square statistic and p-value for the
 | 
			
		||||
    hypothesis test of independence of the observed frequencies in the
 | 
			
		||||
    contingency table [1]_ `observed`.  The expected frequencies are computed
 | 
			
		||||
    based on the marginal sums under the assumption of independence; see
 | 
			
		||||
    `scipy.stats.contingency.expected_freq`.  The number of degrees of
 | 
			
		||||
    freedom is (expressed using numpy functions and attributes)::
 | 
			
		||||
 | 
			
		||||
        dof = observed.size - sum(observed.shape) + observed.ndim - 1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    observed : array_like
 | 
			
		||||
        The contingency table. The table contains the observed frequencies
 | 
			
		||||
        (i.e. number of occurrences) in each category.  In the two-dimensional
 | 
			
		||||
        case, the table is often described as an "R x C table".
 | 
			
		||||
    correction : bool, optional
 | 
			
		||||
        If True, *and* the degrees of freedom is 1, apply Yates' correction
 | 
			
		||||
        for continuity.  The effect of the correction is to adjust each
 | 
			
		||||
        observed value by 0.5 towards the corresponding expected value.
 | 
			
		||||
    lambda_ : float or str, optional.
 | 
			
		||||
        By default, the statistic computed in this test is Pearson's
 | 
			
		||||
        chi-squared statistic [2]_.  `lambda_` allows a statistic from the
 | 
			
		||||
        Cressie-Read power divergence family [3]_ to be used instead.  See
 | 
			
		||||
        `power_divergence` for details.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    chi2 : float
 | 
			
		||||
        The test statistic.
 | 
			
		||||
    p : float
 | 
			
		||||
        The p-value of the test
 | 
			
		||||
    dof : int
 | 
			
		||||
        Degrees of freedom
 | 
			
		||||
    expected : ndarray, same shape as `observed`
 | 
			
		||||
        The expected frequencies, based on the marginal sums of the table.
 | 
			
		||||
 | 
			
		||||
    See Also
 | 
			
		||||
    --------
 | 
			
		||||
    contingency.expected_freq
 | 
			
		||||
    fisher_exact
 | 
			
		||||
    chisquare
 | 
			
		||||
    power_divergence
 | 
			
		||||
 | 
			
		||||
    Notes
 | 
			
		||||
    -----
 | 
			
		||||
    An often quoted guideline for the validity of this calculation is that
 | 
			
		||||
    the test should be used only if the observed and expected frequency in
 | 
			
		||||
    each cell is at least 5.
 | 
			
		||||
 | 
			
		||||
    This is a test for the independence of different categories of a
 | 
			
		||||
    population. The test is only meaningful when the dimension of
 | 
			
		||||
    `observed` is two or more.  Applying the test to a one-dimensional
 | 
			
		||||
    table will always result in `expected` equal to `observed` and a
 | 
			
		||||
    chi-square statistic equal to 0.
 | 
			
		||||
 | 
			
		||||
    This function does not handle masked arrays, because the calculation
 | 
			
		||||
    does not make sense with missing values.
 | 
			
		||||
 | 
			
		||||
    Like stats.chisquare, this function computes a chi-square statistic;
 | 
			
		||||
    the convenience this function provides is to figure out the expected
 | 
			
		||||
    frequencies and degrees of freedom from the given contingency table.
 | 
			
		||||
    If these were already known, and if the Yates' correction was not
 | 
			
		||||
    required, one could use stats.chisquare.  That is, if one calls::
 | 
			
		||||
 | 
			
		||||
        chi2, p, dof, ex = chi2_contingency(obs, correction=False)
 | 
			
		||||
 | 
			
		||||
    then the following is true::
 | 
			
		||||
 | 
			
		||||
        (chi2, p) == stats.chisquare(obs.ravel(), f_exp=ex.ravel(),
 | 
			
		||||
                                     ddof=obs.size - 1 - dof)
 | 
			
		||||
 | 
			
		||||
    The `lambda_` argument was added in version 0.13.0 of scipy.
 | 
			
		||||
 | 
			
		||||
    References
 | 
			
		||||
    ----------
 | 
			
		||||
    .. [1] "Contingency table", http://en.wikipedia.org/wiki/Contingency_table
 | 
			
		||||
    .. [2] "Pearson's chi-squared test",
 | 
			
		||||
           http://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test
 | 
			
		||||
    .. [3] Cressie, N. and Read, T. R. C., "Multinomial Goodness-of-Fit
 | 
			
		||||
           Tests", J. Royal Stat. Soc. Series B, Vol. 46, No. 3 (1984),
 | 
			
		||||
           pp. 440-464.
 | 
			
		||||
 | 
			
		||||
    Examples
 | 
			
		||||
    --------
 | 
			
		||||
    A two-way example (2 x 3):
 | 
			
		||||
 | 
			
		||||
    >>> obs = np.array([[10, 10, 20], [20, 20, 20]])
 | 
			
		||||
    >>> chi2_contingency(obs)
 | 
			
		||||
    (2.7777777777777777,
 | 
			
		||||
     0.24935220877729619,
 | 
			
		||||
     2,
 | 
			
		||||
     array([[ 12.,  12.,  16.],
 | 
			
		||||
            [ 18.,  18.,  24.]]))
 | 
			
		||||
 | 
			
		||||
    Perform the test using the log-likelihood ratio (i.e. the "G-test")
 | 
			
		||||
    instead of Pearson's chi-squared statistic.
 | 
			
		||||
 | 
			
		||||
    >>> g, p, dof, expctd = chi2_contingency(obs, lambda_="log-likelihood")
 | 
			
		||||
    >>> g, p
 | 
			
		||||
    (2.7688587616781319, 0.25046668010954165)
 | 
			
		||||
 | 
			
		||||
    A four-way example (2 x 2 x 2 x 2):
 | 
			
		||||
 | 
			
		||||
    >>> obs = np.array(
 | 
			
		||||
    ...     [[[[12, 17],
 | 
			
		||||
    ...        [11, 16]],
 | 
			
		||||
    ...       [[11, 12],
 | 
			
		||||
    ...        [15, 16]]],
 | 
			
		||||
    ...      [[[23, 15],
 | 
			
		||||
    ...        [30, 22]],
 | 
			
		||||
    ...       [[14, 17],
 | 
			
		||||
    ...        [15, 16]]]])
 | 
			
		||||
    >>> chi2_contingency(obs)
 | 
			
		||||
    (8.7584514426741897,
 | 
			
		||||
     0.64417725029295503,
 | 
			
		||||
     11,
 | 
			
		||||
     array([[[[ 14.15462386,  14.15462386],
 | 
			
		||||
              [ 16.49423111,  16.49423111]],
 | 
			
		||||
             [[ 11.2461395 ,  11.2461395 ],
 | 
			
		||||
              [ 13.10500554,  13.10500554]]],
 | 
			
		||||
            [[[ 19.5591166 ,  19.5591166 ],
 | 
			
		||||
              [ 22.79202844,  22.79202844]],
 | 
			
		||||
             [[ 15.54012004,  15.54012004],
 | 
			
		||||
              [ 18.10873492,  18.10873492]]]]))
 | 
			
		||||
    """
 | 
			
		||||
    observed = np.asarray(observed)
 | 
			
		||||
    if np.any(observed < 0):
 | 
			
		||||
        raise ValueError("All values in `observed` must be nonnegative.")
 | 
			
		||||
    if observed.size == 0:
 | 
			
		||||
        raise ValueError("No data; `observed` has size 0.")
 | 
			
		||||
 | 
			
		||||
    expected = expected_freq(observed)
 | 
			
		||||
    if np.any(expected == 0):
 | 
			
		||||
        # Include one of the positions where expected is zero in
 | 
			
		||||
        # the exception message.
 | 
			
		||||
        zeropos = list(zip(*np.where(expected == 0)))[0]
 | 
			
		||||
        raise ValueError("The internally computed table of expected "
 | 
			
		||||
                         "frequencies has a zero element at %s." % (zeropos,))
 | 
			
		||||
 | 
			
		||||
    # The degrees of freedom
 | 
			
		||||
    dof = expected.size - sum(expected.shape) + expected.ndim - 1
 | 
			
		||||
 | 
			
		||||
    if dof == 0:
 | 
			
		||||
        # Degenerate case; this occurs when `observed` is 1D (or, more
 | 
			
		||||
        # generally, when it has only one nontrivial dimension).  In this
 | 
			
		||||
        # case, we also have observed == expected, so chi2 is 0.
 | 
			
		||||
        chi2 = 0.0
 | 
			
		||||
        p = 1.0
 | 
			
		||||
    else:
 | 
			
		||||
        if dof == 1 and correction:
 | 
			
		||||
            # Adjust `observed` according to Yates' correction for continuity.
 | 
			
		||||
            observed = observed + 0.5 * np.sign(expected - observed)
 | 
			
		||||
 | 
			
		||||
        chi2, p = power_divergence(observed, expected,
 | 
			
		||||
                                   ddof=observed.size - 1 - dof, axis=None,
 | 
			
		||||
                                   lambda_=lambda_)
 | 
			
		||||
 | 
			
		||||
    return chi2, p, dof, expected
 | 
			
		||||
@ -1,541 +0,0 @@
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
#
 | 
			
		||||
#  Define classes for (uni/multi)-variate kernel density estimation.
 | 
			
		||||
#
 | 
			
		||||
#  Currently, only Gaussian kernels are implemented.
 | 
			
		||||
#
 | 
			
		||||
#  Written by: Robert Kern
 | 
			
		||||
#
 | 
			
		||||
#  Date: 2004-08-09
 | 
			
		||||
#
 | 
			
		||||
#  Modified: 2005-02-10 by Robert Kern.
 | 
			
		||||
#              Contributed to Scipy
 | 
			
		||||
#            2005-10-07 by Robert Kern.
 | 
			
		||||
#              Some fixes to match the new scipy_core
 | 
			
		||||
#
 | 
			
		||||
#  Copyright 2004-2005 by Enthought, Inc.
 | 
			
		||||
#
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
# Standard library imports.
 | 
			
		||||
import warnings
 | 
			
		||||
 | 
			
		||||
# Scipy imports.
 | 
			
		||||
from scipy._lib.six import callable, string_types
 | 
			
		||||
from scipy import linalg, special
 | 
			
		||||
 | 
			
		||||
from numpy import atleast_2d, reshape, zeros, newaxis, dot, exp, pi, sqrt, \
 | 
			
		||||
     ravel, power, atleast_1d, squeeze, sum, transpose
 | 
			
		||||
import numpy as np
 | 
			
		||||
from numpy.random import randint, multivariate_normal
 | 
			
		||||
 | 
			
		||||
# Local imports.
 | 
			
		||||
from . import mvn
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
__all__ = ['gaussian_kde']
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class gaussian_kde(object):
 | 
			
		||||
    """Representation of a kernel-density estimate using Gaussian kernels.
 | 
			
		||||
 | 
			
		||||
    Kernel density estimation is a way to estimate the probability density
 | 
			
		||||
    function (PDF) of a random variable in a non-parametric way.
 | 
			
		||||
    `gaussian_kde` works for both uni-variate and multi-variate data.   It
 | 
			
		||||
    includes automatic bandwidth determination.  The estimation works best for
 | 
			
		||||
    a unimodal distribution; bimodal or multi-modal distributions tend to be
 | 
			
		||||
    oversmoothed.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    dataset : array_like
 | 
			
		||||
        Datapoints to estimate from. In case of univariate data this is a 1-D
 | 
			
		||||
        array, otherwise a 2-D array with shape (# of dims, # of data).
 | 
			
		||||
    bw_method : str, scalar or callable, optional
 | 
			
		||||
        The method used to calculate the estimator bandwidth.  This can be
 | 
			
		||||
        'scott', 'silverman', a scalar constant or a callable.  If a scalar,
 | 
			
		||||
        this will be used directly as `kde.factor`.  If a callable, it should
 | 
			
		||||
        take a `gaussian_kde` instance as only parameter and return a scalar.
 | 
			
		||||
        If None (default), 'scott' is used.  See Notes for more details.
 | 
			
		||||
 | 
			
		||||
    Attributes
 | 
			
		||||
    ----------
 | 
			
		||||
    dataset : ndarray
 | 
			
		||||
        The dataset with which `gaussian_kde` was initialized.
 | 
			
		||||
    d : int
 | 
			
		||||
        Number of dimensions.
 | 
			
		||||
    n : int
 | 
			
		||||
        Number of datapoints.
 | 
			
		||||
    factor : float
 | 
			
		||||
        The bandwidth factor, obtained from `kde.covariance_factor`, with which
 | 
			
		||||
        the covariance matrix is multiplied.
 | 
			
		||||
    covariance : ndarray
 | 
			
		||||
        The covariance matrix of `dataset`, scaled by the calculated bandwidth
 | 
			
		||||
        (`kde.factor`).
 | 
			
		||||
    inv_cov : ndarray
 | 
			
		||||
        The inverse of `covariance`.
 | 
			
		||||
 | 
			
		||||
    Methods
 | 
			
		||||
    -------
 | 
			
		||||
    kde.evaluate(points) : ndarray
 | 
			
		||||
        Evaluate the estimated pdf on a provided set of points.
 | 
			
		||||
    kde(points) : ndarray
 | 
			
		||||
        Same as kde.evaluate(points)
 | 
			
		||||
    kde.integrate_gaussian(mean, cov) : float
 | 
			
		||||
        Multiply pdf with a specified Gaussian and integrate over the whole
 | 
			
		||||
        domain.
 | 
			
		||||
    kde.integrate_box_1d(low, high) : float
 | 
			
		||||
        Integrate pdf (1D only) between two bounds.
 | 
			
		||||
    kde.integrate_box(low_bounds, high_bounds) : float
 | 
			
		||||
        Integrate pdf over a rectangular space between low_bounds and
 | 
			
		||||
        high_bounds.
 | 
			
		||||
    kde.integrate_kde(other_kde) : float
 | 
			
		||||
        Integrate two kernel density estimates multiplied together.
 | 
			
		||||
    kde.pdf(points) : ndarray
 | 
			
		||||
        Alias for ``kde.evaluate(points)``.
 | 
			
		||||
    kde.logpdf(points) : ndarray
 | 
			
		||||
        Equivalent to ``np.log(kde.evaluate(points))``.
 | 
			
		||||
    kde.resample(size=None) : ndarray
 | 
			
		||||
        Randomly sample a dataset from the estimated pdf.
 | 
			
		||||
    kde.set_bandwidth(bw_method='scott') : None
 | 
			
		||||
        Computes the bandwidth, i.e. the coefficient that multiplies the data
 | 
			
		||||
        covariance matrix to obtain the kernel covariance matrix.
 | 
			
		||||
        .. versionadded:: 0.11.0
 | 
			
		||||
    kde.covariance_factor : float
 | 
			
		||||
        Computes the coefficient (`kde.factor`) that multiplies the data
 | 
			
		||||
        covariance matrix to obtain the kernel covariance matrix.
 | 
			
		||||
        The default is `scotts_factor`.  A subclass can overwrite this method
 | 
			
		||||
        to provide a different method, or set it through a call to
 | 
			
		||||
        `kde.set_bandwidth`.
 | 
			
		||||
 | 
			
		||||
    Notes
 | 
			
		||||
    -----
 | 
			
		||||
    Bandwidth selection strongly influences the estimate obtained from the KDE
 | 
			
		||||
    (much more so than the actual shape of the kernel).  Bandwidth selection
 | 
			
		||||
    can be done by a "rule of thumb", by cross-validation, by "plug-in
 | 
			
		||||
    methods" or by other means; see [3]_, [4]_ for reviews.  `gaussian_kde`
 | 
			
		||||
    uses a rule of thumb, the default is Scott's Rule.
 | 
			
		||||
 | 
			
		||||
    Scott's Rule [1]_, implemented as `scotts_factor`, is::
 | 
			
		||||
 | 
			
		||||
        n**(-1./(d+4)),
 | 
			
		||||
 | 
			
		||||
    with ``n`` the number of data points and ``d`` the number of dimensions.
 | 
			
		||||
    Silverman's Rule [2]_, implemented as `silverman_factor`, is::
 | 
			
		||||
 | 
			
		||||
        (n * (d + 2) / 4.)**(-1. / (d + 4)).
 | 
			
		||||
 | 
			
		||||
    Good general descriptions of kernel density estimation can be found in [1]_
 | 
			
		||||
    and [2]_, the mathematics for this multi-dimensional implementation can be
 | 
			
		||||
    found in [1]_.
 | 
			
		||||
 | 
			
		||||
    References
 | 
			
		||||
    ----------
 | 
			
		||||
    .. [1] D.W. Scott, "Multivariate Density Estimation: Theory, Practice, and
 | 
			
		||||
           Visualization", John Wiley & Sons, New York, Chicester, 1992.
 | 
			
		||||
    .. [2] B.W. Silverman, "Density Estimation for Statistics and Data
 | 
			
		||||
           Analysis", Vol. 26, Monographs on Statistics and Applied Probability,
 | 
			
		||||
           Chapman and Hall, London, 1986.
 | 
			
		||||
    .. [3] B.A. Turlach, "Bandwidth Selection in Kernel Density Estimation: A
 | 
			
		||||
           Review", CORE and Institut de Statistique, Vol. 19, pp. 1-33, 1993.
 | 
			
		||||
    .. [4] D.M. Bashtannyk and R.J. Hyndman, "Bandwidth selection for kernel
 | 
			
		||||
           conditional density estimation", Computational Statistics & Data
 | 
			
		||||
           Analysis, Vol. 36, pp. 279-298, 2001.
 | 
			
		||||
 | 
			
		||||
    Examples
 | 
			
		||||
    --------
 | 
			
		||||
    Generate some random two-dimensional data:
 | 
			
		||||
 | 
			
		||||
    >>> from scipy import stats
 | 
			
		||||
    >>> def measure(n):
 | 
			
		||||
    >>>     "Measurement model, return two coupled measurements."
 | 
			
		||||
    >>>     m1 = np.random.normal(size=n)
 | 
			
		||||
    >>>     m2 = np.random.normal(scale=0.5, size=n)
 | 
			
		||||
    >>>     return m1+m2, m1-m2
 | 
			
		||||
 | 
			
		||||
    >>> m1, m2 = measure(2000)
 | 
			
		||||
    >>> xmin = m1.min()
 | 
			
		||||
    >>> xmax = m1.max()
 | 
			
		||||
    >>> ymin = m2.min()
 | 
			
		||||
    >>> ymax = m2.max()
 | 
			
		||||
 | 
			
		||||
    Perform a kernel density estimate on the data:
 | 
			
		||||
 | 
			
		||||
    >>> X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
 | 
			
		||||
    >>> positions = np.vstack([X.ravel(), Y.ravel()])
 | 
			
		||||
    >>> values = np.vstack([m1, m2])
 | 
			
		||||
    >>> kernel = stats.gaussian_kde(values)
 | 
			
		||||
    >>> Z = np.reshape(kernel(positions).T, X.shape)
 | 
			
		||||
 | 
			
		||||
    Plot the results:
 | 
			
		||||
 | 
			
		||||
    >>> import matplotlib.pyplot as plt
 | 
			
		||||
    >>> fig = plt.figure()
 | 
			
		||||
    >>> ax = fig.add_subplot(111)
 | 
			
		||||
    >>> ax.imshow(np.rot90(Z), cmap=plt.cm.gist_earth_r,
 | 
			
		||||
    ...           extent=[xmin, xmax, ymin, ymax])
 | 
			
		||||
    >>> ax.plot(m1, m2, 'k.', markersize=2)
 | 
			
		||||
    >>> ax.set_xlim([xmin, xmax])
 | 
			
		||||
    >>> ax.set_ylim([ymin, ymax])
 | 
			
		||||
    >>> plt.show()
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    def __init__(self, dataset, bw_method=None):
 | 
			
		||||
        self.dataset = atleast_2d(dataset)
 | 
			
		||||
        if not self.dataset.size > 1:
 | 
			
		||||
            raise ValueError("`dataset` input should have multiple elements.")
 | 
			
		||||
 | 
			
		||||
        self.d, self.n = self.dataset.shape
 | 
			
		||||
        self.set_bandwidth(bw_method=bw_method)
 | 
			
		||||
 | 
			
		||||
    def evaluate(self, points):
 | 
			
		||||
        """Evaluate the estimated pdf on a set of points.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        points : (# of dimensions, # of points)-array
 | 
			
		||||
            Alternatively, a (# of dimensions,) vector can be passed in and
 | 
			
		||||
            treated as a single point.
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        values : (# of points,)-array
 | 
			
		||||
            The values at each point.
 | 
			
		||||
 | 
			
		||||
        Raises
 | 
			
		||||
        ------
 | 
			
		||||
        ValueError : if the dimensionality of the input points is different than
 | 
			
		||||
                     the dimensionality of the KDE.
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        points = atleast_2d(points)
 | 
			
		||||
 | 
			
		||||
        d, m = points.shape
 | 
			
		||||
        if d != self.d:
 | 
			
		||||
            if d == 1 and m == self.d:
 | 
			
		||||
                # points was passed in as a row vector
 | 
			
		||||
                points = reshape(points, (self.d, 1))
 | 
			
		||||
                m = 1
 | 
			
		||||
            else:
 | 
			
		||||
                msg = "points have dimension %s, dataset has dimension %s" % (d,
 | 
			
		||||
                    self.d)
 | 
			
		||||
                raise ValueError(msg)
 | 
			
		||||
 | 
			
		||||
        result = zeros((m,), dtype=np.float)
 | 
			
		||||
 | 
			
		||||
        if m >= self.n:
 | 
			
		||||
            # there are more points than data, so loop over data
 | 
			
		||||
            for i in range(self.n):
 | 
			
		||||
                diff = self.dataset[:, i, newaxis] - points
 | 
			
		||||
                tdiff = dot(self.inv_cov, diff)
 | 
			
		||||
                energy = sum(diff*tdiff,axis=0) / 2.0
 | 
			
		||||
                result = result + exp(-energy)
 | 
			
		||||
        else:
 | 
			
		||||
            # loop over points
 | 
			
		||||
            for i in range(m):
 | 
			
		||||
                diff = self.dataset - points[:, i, newaxis]
 | 
			
		||||
                tdiff = dot(self.inv_cov, diff)
 | 
			
		||||
                energy = sum(diff * tdiff, axis=0) / 2.0
 | 
			
		||||
                result[i] = sum(exp(-energy), axis=0)
 | 
			
		||||
 | 
			
		||||
        result = result / self._norm_factor
 | 
			
		||||
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    __call__ = evaluate
 | 
			
		||||
 | 
			
		||||
    def integrate_gaussian(self, mean, cov):
 | 
			
		||||
        """
 | 
			
		||||
        Multiply estimated density by a multivariate Gaussian and integrate
 | 
			
		||||
        over the whole space.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        mean : aray_like
 | 
			
		||||
            A 1-D array, specifying the mean of the Gaussian.
 | 
			
		||||
        cov : array_like
 | 
			
		||||
            A 2-D array, specifying the covariance matrix of the Gaussian.
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        result : scalar
 | 
			
		||||
            The value of the integral.
 | 
			
		||||
 | 
			
		||||
        Raises
 | 
			
		||||
        ------
 | 
			
		||||
        ValueError :
 | 
			
		||||
            If the mean or covariance of the input Gaussian differs from
 | 
			
		||||
            the KDE's dimensionality.
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        mean = atleast_1d(squeeze(mean))
 | 
			
		||||
        cov = atleast_2d(cov)
 | 
			
		||||
 | 
			
		||||
        if mean.shape != (self.d,):
 | 
			
		||||
            raise ValueError("mean does not have dimension %s" % self.d)
 | 
			
		||||
        if cov.shape != (self.d, self.d):
 | 
			
		||||
            raise ValueError("covariance does not have dimension %s" % self.d)
 | 
			
		||||
 | 
			
		||||
        # make mean a column vector
 | 
			
		||||
        mean = mean[:, newaxis]
 | 
			
		||||
 | 
			
		||||
        sum_cov = self.covariance + cov
 | 
			
		||||
 | 
			
		||||
        diff = self.dataset - mean
 | 
			
		||||
        tdiff = dot(linalg.inv(sum_cov), diff)
 | 
			
		||||
 | 
			
		||||
        energies = sum(diff * tdiff, axis=0) / 2.0
 | 
			
		||||
        result = sum(exp(-energies), axis=0) / sqrt(linalg.det(2 * pi *
 | 
			
		||||
                                                        sum_cov)) / self.n
 | 
			
		||||
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    def integrate_box_1d(self, low, high):
 | 
			
		||||
        """
 | 
			
		||||
        Computes the integral of a 1D pdf between two bounds.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        low : scalar
 | 
			
		||||
            Lower bound of integration.
 | 
			
		||||
        high : scalar
 | 
			
		||||
            Upper bound of integration.
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        value : scalar
 | 
			
		||||
            The result of the integral.
 | 
			
		||||
 | 
			
		||||
        Raises
 | 
			
		||||
        ------
 | 
			
		||||
        ValueError
 | 
			
		||||
            If the KDE is over more than one dimension.
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        if self.d != 1:
 | 
			
		||||
            raise ValueError("integrate_box_1d() only handles 1D pdfs")
 | 
			
		||||
 | 
			
		||||
        stdev = ravel(sqrt(self.covariance))[0]
 | 
			
		||||
 | 
			
		||||
        normalized_low = ravel((low - self.dataset) / stdev)
 | 
			
		||||
        normalized_high = ravel((high - self.dataset) / stdev)
 | 
			
		||||
 | 
			
		||||
        value = np.mean(special.ndtr(normalized_high) -
 | 
			
		||||
                        special.ndtr(normalized_low))
 | 
			
		||||
        return value
 | 
			
		||||
 | 
			
		||||
    def integrate_box(self, low_bounds, high_bounds, maxpts=None):
 | 
			
		||||
        """Computes the integral of a pdf over a rectangular interval.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        low_bounds : array_like
 | 
			
		||||
            A 1-D array containing the lower bounds of integration.
 | 
			
		||||
        high_bounds : array_like
 | 
			
		||||
            A 1-D array containing the upper bounds of integration.
 | 
			
		||||
        maxpts : int, optional
 | 
			
		||||
            The maximum number of points to use for integration.
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        value : scalar
 | 
			
		||||
            The result of the integral.
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        if maxpts is not None:
 | 
			
		||||
            extra_kwds = {'maxpts': maxpts}
 | 
			
		||||
        else:
 | 
			
		||||
            extra_kwds = {}
 | 
			
		||||
 | 
			
		||||
        value, inform = mvn.mvnun(low_bounds, high_bounds, self.dataset,
 | 
			
		||||
                                  self.covariance, **extra_kwds)
 | 
			
		||||
        if inform:
 | 
			
		||||
            msg = ('An integral in mvn.mvnun requires more points than %s' %
 | 
			
		||||
                   (self.d * 1000))
 | 
			
		||||
            warnings.warn(msg)
 | 
			
		||||
 | 
			
		||||
        return value
 | 
			
		||||
 | 
			
		||||
    def integrate_kde(self, other):
 | 
			
		||||
        """
 | 
			
		||||
        Computes the integral of the product of this  kernel density estimate
 | 
			
		||||
        with another.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        other : gaussian_kde instance
 | 
			
		||||
            The other kde.
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        value : scalar
 | 
			
		||||
            The result of the integral.
 | 
			
		||||
 | 
			
		||||
        Raises
 | 
			
		||||
        ------
 | 
			
		||||
        ValueError
 | 
			
		||||
            If the KDEs have different dimensionality.
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        if other.d != self.d:
 | 
			
		||||
            raise ValueError("KDEs are not the same dimensionality")
 | 
			
		||||
 | 
			
		||||
        # we want to iterate over the smallest number of points
 | 
			
		||||
        if other.n < self.n:
 | 
			
		||||
            small = other
 | 
			
		||||
            large = self
 | 
			
		||||
        else:
 | 
			
		||||
            small = self
 | 
			
		||||
            large = other
 | 
			
		||||
 | 
			
		||||
        sum_cov = small.covariance + large.covariance
 | 
			
		||||
        sum_cov_chol = linalg.cho_factor(sum_cov)
 | 
			
		||||
        result = 0.0
 | 
			
		||||
        for i in range(small.n):
 | 
			
		||||
            mean = small.dataset[:, i, newaxis]
 | 
			
		||||
            diff = large.dataset - mean
 | 
			
		||||
            tdiff = linalg.cho_solve(sum_cov_chol, diff)
 | 
			
		||||
 | 
			
		||||
            energies = sum(diff * tdiff, axis=0) / 2.0
 | 
			
		||||
            result += sum(exp(-energies), axis=0)
 | 
			
		||||
 | 
			
		||||
        result /= sqrt(linalg.det(2 * pi * sum_cov)) * large.n * small.n
 | 
			
		||||
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    def resample(self, size=None):
 | 
			
		||||
        """
 | 
			
		||||
        Randomly sample a dataset from the estimated pdf.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        size : int, optional
 | 
			
		||||
            The number of samples to draw.  If not provided, then the size is
 | 
			
		||||
            the same as the underlying dataset.
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        resample : (self.d, `size`) ndarray
 | 
			
		||||
            The sampled dataset.
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        if size is None:
 | 
			
		||||
            size = self.n
 | 
			
		||||
 | 
			
		||||
        norm = transpose(multivariate_normal(zeros((self.d,), float),
 | 
			
		||||
                         self.covariance, size=size))
 | 
			
		||||
        indices = randint(0, self.n, size=size)
 | 
			
		||||
        means = self.dataset[:, indices]
 | 
			
		||||
 | 
			
		||||
        return means + norm
 | 
			
		||||
 | 
			
		||||
    def scotts_factor(self):
 | 
			
		||||
        return power(self.n, -1./(self.d+4))
 | 
			
		||||
 | 
			
		||||
    def silverman_factor(self):
 | 
			
		||||
        return power(self.n*(self.d+2.0)/4.0, -1./(self.d+4))
 | 
			
		||||
 | 
			
		||||
    #  Default method to calculate bandwidth, can be overwritten by subclass
 | 
			
		||||
    covariance_factor = scotts_factor
 | 
			
		||||
 | 
			
		||||
    def set_bandwidth(self, bw_method=None):
 | 
			
		||||
        """Compute the estimator bandwidth with given method.
 | 
			
		||||
 | 
			
		||||
        The new bandwidth calculated after a call to `set_bandwidth` is used
 | 
			
		||||
        for subsequent evaluations of the estimated density.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        bw_method : str, scalar or callable, optional
 | 
			
		||||
            The method used to calculate the estimator bandwidth.  This can be
 | 
			
		||||
            'scott', 'silverman', a scalar constant or a callable.  If a
 | 
			
		||||
            scalar, this will be used directly as `kde.factor`.  If a callable,
 | 
			
		||||
            it should take a `gaussian_kde` instance as only parameter and
 | 
			
		||||
            return a scalar.  If None (default), nothing happens; the current
 | 
			
		||||
            `kde.covariance_factor` method is kept.
 | 
			
		||||
 | 
			
		||||
        Notes
 | 
			
		||||
        -----
 | 
			
		||||
        .. versionadded:: 0.11
 | 
			
		||||
 | 
			
		||||
        Examples
 | 
			
		||||
        --------
 | 
			
		||||
        >>> x1 = np.array([-7, -5, 1, 4, 5.])
 | 
			
		||||
        >>> kde = stats.gaussian_kde(x1)
 | 
			
		||||
        >>> xs = np.linspace(-10, 10, num=50)
 | 
			
		||||
        >>> y1 = kde(xs)
 | 
			
		||||
        >>> kde.set_bandwidth(bw_method='silverman')
 | 
			
		||||
        >>> y2 = kde(xs)
 | 
			
		||||
        >>> kde.set_bandwidth(bw_method=kde.factor / 3.)
 | 
			
		||||
        >>> y3 = kde(xs)
 | 
			
		||||
 | 
			
		||||
        >>> fig = plt.figure()
 | 
			
		||||
        >>> ax = fig.add_subplot(111)
 | 
			
		||||
        >>> ax.plot(x1, np.ones(x1.shape) / (4. * x1.size), 'bo',
 | 
			
		||||
        ...         label='Data points (rescaled)')
 | 
			
		||||
        >>> ax.plot(xs, y1, label='Scott (default)')
 | 
			
		||||
        >>> ax.plot(xs, y2, label='Silverman')
 | 
			
		||||
        >>> ax.plot(xs, y3, label='Const (1/3 * Silverman)')
 | 
			
		||||
        >>> ax.legend()
 | 
			
		||||
        >>> plt.show()
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        if bw_method is None:
 | 
			
		||||
            pass
 | 
			
		||||
        elif bw_method == 'scott':
 | 
			
		||||
            self.covariance_factor = self.scotts_factor
 | 
			
		||||
        elif bw_method == 'silverman':
 | 
			
		||||
            self.covariance_factor = self.silverman_factor
 | 
			
		||||
        elif np.isscalar(bw_method) and not isinstance(bw_method, string_types):
 | 
			
		||||
            self._bw_method = 'use constant'
 | 
			
		||||
            self.covariance_factor = lambda: bw_method
 | 
			
		||||
        elif callable(bw_method):
 | 
			
		||||
            self._bw_method = bw_method
 | 
			
		||||
            self.covariance_factor = lambda: self._bw_method(self)
 | 
			
		||||
        else:
 | 
			
		||||
            msg = "`bw_method` should be 'scott', 'silverman', a scalar " \
 | 
			
		||||
                  "or a callable."
 | 
			
		||||
            raise ValueError(msg)
 | 
			
		||||
 | 
			
		||||
        self._compute_covariance()
 | 
			
		||||
 | 
			
		||||
    def _compute_covariance(self):
 | 
			
		||||
        """Computes the covariance matrix for each Gaussian kernel using
 | 
			
		||||
        covariance_factor().
 | 
			
		||||
        """
 | 
			
		||||
        self.factor = self.covariance_factor()
 | 
			
		||||
        # Cache covariance and inverse covariance of the data
 | 
			
		||||
        if not hasattr(self, '_data_inv_cov'):
 | 
			
		||||
            self._data_covariance = atleast_2d(np.cov(self.dataset, rowvar=1,
 | 
			
		||||
                                               bias=False))
 | 
			
		||||
            self._data_inv_cov = linalg.inv(self._data_covariance)
 | 
			
		||||
 | 
			
		||||
        self.covariance = self._data_covariance * self.factor**2
 | 
			
		||||
        self.inv_cov = self._data_inv_cov / self.factor**2
 | 
			
		||||
        self._norm_factor = sqrt(linalg.det(2*pi*self.covariance)) * self.n
 | 
			
		||||
 | 
			
		||||
    def pdf(self, x):
 | 
			
		||||
        """
 | 
			
		||||
        Evaluate the estimated pdf on a provided set of points.
 | 
			
		||||
 | 
			
		||||
        Notes
 | 
			
		||||
        -----
 | 
			
		||||
        This is an alias for `gaussian_kde.evaluate`.  See the ``evaluate``
 | 
			
		||||
        docstring for more details.
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        return self.evaluate(x)
 | 
			
		||||
 | 
			
		||||
    def logpdf(self, x):
 | 
			
		||||
        """
 | 
			
		||||
        Evaluate the log of the estimated pdf on a provided set of points.
 | 
			
		||||
 | 
			
		||||
        Notes
 | 
			
		||||
        -----
 | 
			
		||||
        See `gaussian_kde.evaluate` for more details; this method simply
 | 
			
		||||
        returns ``np.log(gaussian_kde.evaluate(x))``.
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        return np.log(self.evaluate(x))
 | 
			
		||||
@ -1,15 +0,0 @@
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
"""
 | 
			
		||||
Created on Tue Dec 06 16:02:47 2011
 | 
			
		||||
 | 
			
		||||
@author: pab
 | 
			
		||||
"""
 | 
			
		||||
import numpy as np
 | 
			
		||||
import wafo.kdetools as wk
 | 
			
		||||
n = 100
 | 
			
		||||
x = np.sort(5*np.random.rand(1,n)-2.5, axis=-1).ravel()
 | 
			
		||||
y = (np.cos(x)>2*np.random.rand(n, 1)-1).ravel()
 | 
			
		||||
 | 
			
		||||
kreg = wk.KRegression(x,y)
 | 
			
		||||
f = kreg(output='plotobj', title='Kernel regression', plotflag=1)
 | 
			
		||||
f.plot()
 | 
			
		||||
@ -1,13 +0,0 @@
 | 
			
		||||
from numpy import asarray, ndarray, ones, nan #, reshape, repeat,  product
 | 
			
		||||
 | 
			
		||||
def valarray(shape, value=nan, typecode=None):
 | 
			
		||||
    """Return an array of all value.
 | 
			
		||||
    """
 | 
			
		||||
    #out = reshape(repeat([value],product(shape,axis=0),axis=0),shape)
 | 
			
		||||
    out = ones(shape, dtype=bool) * value
 | 
			
		||||
    if typecode is not None:
 | 
			
		||||
        out = out.astype(typecode)
 | 
			
		||||
    if not isinstance(out, ndarray):
 | 
			
		||||
        out = asarray(out)
 | 
			
		||||
    return out
 | 
			
		||||
 | 
			
		||||
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								@ -1,79 +0,0 @@
 | 
			
		||||
"""
 | 
			
		||||
===================================================================
 | 
			
		||||
Statistical functions for masked arrays (:mod:`scipy.stats.mstats`)
 | 
			
		||||
===================================================================
 | 
			
		||||
 | 
			
		||||
.. currentmodule:: scipy.stats.mstats
 | 
			
		||||
 | 
			
		||||
This module contains a large number of statistical functions that can
 | 
			
		||||
be used with masked arrays.
 | 
			
		||||
 | 
			
		||||
Most of these functions are similar to those in scipy.stats but might
 | 
			
		||||
have small differences in the API or in the algorithm used. Since this
 | 
			
		||||
is a relatively new package, some API changes are still possible.
 | 
			
		||||
 | 
			
		||||
.. autosummary::
 | 
			
		||||
   :toctree: generated/
 | 
			
		||||
 | 
			
		||||
   argstoarray
 | 
			
		||||
   betai
 | 
			
		||||
   chisquare
 | 
			
		||||
   count_tied_groups
 | 
			
		||||
   describe
 | 
			
		||||
   f_oneway
 | 
			
		||||
   f_value_wilks_lambda
 | 
			
		||||
   find_repeats
 | 
			
		||||
   friedmanchisquare
 | 
			
		||||
   kendalltau
 | 
			
		||||
   kendalltau_seasonal
 | 
			
		||||
   kruskalwallis
 | 
			
		||||
   ks_twosamp
 | 
			
		||||
   kurtosis
 | 
			
		||||
   kurtosistest
 | 
			
		||||
   linregress
 | 
			
		||||
   mannwhitneyu
 | 
			
		||||
   plotting_positions
 | 
			
		||||
   mode
 | 
			
		||||
   moment
 | 
			
		||||
   mquantiles
 | 
			
		||||
   msign
 | 
			
		||||
   normaltest
 | 
			
		||||
   obrientransform
 | 
			
		||||
   pearsonr
 | 
			
		||||
   plotting_positions
 | 
			
		||||
   pointbiserialr
 | 
			
		||||
   rankdata
 | 
			
		||||
   scoreatpercentile
 | 
			
		||||
   sem
 | 
			
		||||
   signaltonoise
 | 
			
		||||
   skew
 | 
			
		||||
   skewtest
 | 
			
		||||
   spearmanr
 | 
			
		||||
   theilslopes
 | 
			
		||||
   threshold
 | 
			
		||||
   tmax
 | 
			
		||||
   tmean
 | 
			
		||||
   tmin
 | 
			
		||||
   trim
 | 
			
		||||
   trima
 | 
			
		||||
   trimboth
 | 
			
		||||
   trimmed_stde
 | 
			
		||||
   trimr
 | 
			
		||||
   trimtail
 | 
			
		||||
   tsem
 | 
			
		||||
   ttest_onesamp
 | 
			
		||||
   ttest_ind
 | 
			
		||||
   ttest_onesamp
 | 
			
		||||
   ttest_rel
 | 
			
		||||
   tvar
 | 
			
		||||
   variation
 | 
			
		||||
   winsorize
 | 
			
		||||
   zmap
 | 
			
		||||
   zscore
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
from .mstats_basic import *
 | 
			
		||||
from .mstats_extras import *
 | 
			
		||||
from scipy.stats import gmean, hmean
 | 
			
		||||
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								@ -1,451 +0,0 @@
 | 
			
		||||
"""
 | 
			
		||||
Additional statistics functions with support for masked arrays.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
# Original author (2007): Pierre GF Gerard-Marchant
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
__all__ = ['compare_medians_ms',
 | 
			
		||||
           'hdquantiles', 'hdmedian', 'hdquantiles_sd',
 | 
			
		||||
           'idealfourths',
 | 
			
		||||
           'median_cihs','mjci','mquantiles_cimj',
 | 
			
		||||
           'rsh',
 | 
			
		||||
           'trimmed_mean_ci',]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
from numpy import float_, int_, ndarray
 | 
			
		||||
 | 
			
		||||
import numpy.ma as ma
 | 
			
		||||
from numpy.ma import MaskedArray
 | 
			
		||||
 | 
			
		||||
from . import mstats_basic as mstats
 | 
			
		||||
 | 
			
		||||
from scipy.stats.distributions import norm, beta, t, binom
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def hdquantiles(data, prob=list([.25,.5,.75]), axis=None, var=False,):
 | 
			
		||||
    """
 | 
			
		||||
    Computes quantile estimates with the Harrell-Davis method.
 | 
			
		||||
 | 
			
		||||
    The quantile estimates are calculated as a weighted linear combination
 | 
			
		||||
    of order statistics.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    data : array_like
 | 
			
		||||
        Data array.
 | 
			
		||||
    prob : sequence
 | 
			
		||||
        Sequence of quantiles to compute.
 | 
			
		||||
    axis : int
 | 
			
		||||
        Axis along which to compute the quantiles. If None, use a flattened
 | 
			
		||||
        array.
 | 
			
		||||
    var : boolean
 | 
			
		||||
        Whether to return the variance of the estimate.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    hdquantiles : MaskedArray
 | 
			
		||||
        A (p,) array of quantiles (if `var` is False), or a (2,p) array of
 | 
			
		||||
        quantiles and variances (if `var` is True), where ``p`` is the
 | 
			
		||||
        number of quantiles.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    def _hd_1D(data,prob,var):
 | 
			
		||||
        "Computes the HD quantiles for a 1D array. Returns nan for invalid data."
 | 
			
		||||
        xsorted = np.squeeze(np.sort(data.compressed().view(ndarray)))
 | 
			
		||||
        # Don't use length here, in case we have a numpy scalar
 | 
			
		||||
        n = xsorted.size
 | 
			
		||||
 | 
			
		||||
        hd = np.empty((2,len(prob)), float_)
 | 
			
		||||
        if n < 2:
 | 
			
		||||
            hd.flat = np.nan
 | 
			
		||||
            if var:
 | 
			
		||||
                return hd
 | 
			
		||||
            return hd[0]
 | 
			
		||||
 | 
			
		||||
        v = np.arange(n+1) / float(n)
 | 
			
		||||
        betacdf = beta.cdf
 | 
			
		||||
        for (i,p) in enumerate(prob):
 | 
			
		||||
            _w = betacdf(v, (n+1)*p, (n+1)*(1-p))
 | 
			
		||||
            w = _w[1:] - _w[:-1]
 | 
			
		||||
            hd_mean = np.dot(w, xsorted)
 | 
			
		||||
            hd[0,i] = hd_mean
 | 
			
		||||
            #
 | 
			
		||||
            hd[1,i] = np.dot(w, (xsorted-hd_mean)**2)
 | 
			
		||||
            #
 | 
			
		||||
        hd[0, prob == 0] = xsorted[0]
 | 
			
		||||
        hd[0, prob == 1] = xsorted[-1]
 | 
			
		||||
        if var:
 | 
			
		||||
            hd[1, prob == 0] = hd[1, prob == 1] = np.nan
 | 
			
		||||
            return hd
 | 
			
		||||
        return hd[0]
 | 
			
		||||
    # Initialization & checks
 | 
			
		||||
    data = ma.array(data, copy=False, dtype=float_)
 | 
			
		||||
    p = np.array(prob, copy=False, ndmin=1)
 | 
			
		||||
    # Computes quantiles along axis (or globally)
 | 
			
		||||
    if (axis is None) or (data.ndim == 1):
 | 
			
		||||
        result = _hd_1D(data, p, var)
 | 
			
		||||
    else:
 | 
			
		||||
        if data.ndim > 2:
 | 
			
		||||
            raise ValueError("Array 'data' must be at most two dimensional, "
 | 
			
		||||
                             "but got data.ndim = %d" % data.ndim)
 | 
			
		||||
        result = ma.apply_along_axis(_hd_1D, axis, data, p, var)
 | 
			
		||||
 | 
			
		||||
    return ma.fix_invalid(result, copy=False)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def hdmedian(data, axis=-1, var=False):
 | 
			
		||||
    """
 | 
			
		||||
    Returns the Harrell-Davis estimate of the median along the given axis.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    data : ndarray
 | 
			
		||||
        Data array.
 | 
			
		||||
    axis : int
 | 
			
		||||
        Axis along which to compute the quantiles. If None, use a flattened
 | 
			
		||||
        array.
 | 
			
		||||
    var : boolean
 | 
			
		||||
        Whether to return the variance of the estimate.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    result = hdquantiles(data,[0.5], axis=axis, var=var)
 | 
			
		||||
    return result.squeeze()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
 | 
			
		||||
    """
 | 
			
		||||
    The standard error of the Harrell-Davis quantile estimates by jackknife.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    data : array_like
 | 
			
		||||
        Data array.
 | 
			
		||||
    prob : sequence
 | 
			
		||||
        Sequence of quantiles to compute.
 | 
			
		||||
    axis : int
 | 
			
		||||
        Axis along which to compute the quantiles. If None, use a flattened
 | 
			
		||||
        array.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    hdquantiles_sd : MaskedArray
 | 
			
		||||
        Standard error of the Harrell-Davis quantile estimates.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    def _hdsd_1D(data,prob):
 | 
			
		||||
        "Computes the std error for 1D arrays."
 | 
			
		||||
        xsorted = np.sort(data.compressed())
 | 
			
		||||
        n = len(xsorted)
 | 
			
		||||
        #.........
 | 
			
		||||
        hdsd = np.empty(len(prob), float_)
 | 
			
		||||
        if n < 2:
 | 
			
		||||
            hdsd.flat = np.nan
 | 
			
		||||
 | 
			
		||||
        vv = np.arange(n) / float(n-1)
 | 
			
		||||
        betacdf = beta.cdf
 | 
			
		||||
 | 
			
		||||
        for (i,p) in enumerate(prob):
 | 
			
		||||
            _w = betacdf(vv, (n+1)*p, (n+1)*(1-p))
 | 
			
		||||
            w = _w[1:] - _w[:-1]
 | 
			
		||||
            mx_ = np.fromiter([np.dot(w,xsorted[np.r_[list(range(0,k)),
 | 
			
		||||
                                                      list(range(k+1,n))].astype(int_)])
 | 
			
		||||
                                  for k in range(n)], dtype=float_)
 | 
			
		||||
            mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1)
 | 
			
		||||
            hdsd[i] = float(n-1) * np.sqrt(np.diag(mx_var).diagonal() / float(n))
 | 
			
		||||
        return hdsd
 | 
			
		||||
    # Initialization & checks
 | 
			
		||||
    data = ma.array(data, copy=False, dtype=float_)
 | 
			
		||||
    p = np.array(prob, copy=False, ndmin=1)
 | 
			
		||||
    # Computes quantiles along axis (or globally)
 | 
			
		||||
    if (axis is None):
 | 
			
		||||
        result = _hdsd_1D(data, p)
 | 
			
		||||
    else:
 | 
			
		||||
        if data.ndim > 2:
 | 
			
		||||
            raise ValueError("Array 'data' must be at most two dimensional, "
 | 
			
		||||
                             "but got data.ndim = %d" % data.ndim)
 | 
			
		||||
        result = ma.apply_along_axis(_hdsd_1D, axis, data, p)
 | 
			
		||||
 | 
			
		||||
    return ma.fix_invalid(result, copy=False).ravel()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def trimmed_mean_ci(data, limits=(0.2,0.2), inclusive=(True,True),
 | 
			
		||||
                    alpha=0.05, axis=None):
 | 
			
		||||
    """
 | 
			
		||||
    Selected confidence interval of the trimmed mean along the given axis.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    data : array_like
 | 
			
		||||
        Input data.
 | 
			
		||||
    limits : {None, tuple}, optional
 | 
			
		||||
        None or a two item tuple.
 | 
			
		||||
        Tuple of the percentages to cut on each side of the array, with respect
 | 
			
		||||
        to the number of unmasked data, as floats between 0. and 1. If ``n``
 | 
			
		||||
        is the number of unmasked data before trimming, then
 | 
			
		||||
        (``n * limits[0]``)th smallest data and (``n * limits[1]``)th
 | 
			
		||||
        largest data are masked.  The total number of unmasked data after
 | 
			
		||||
        trimming is ``n * (1. - sum(limits))``.
 | 
			
		||||
        The value of one limit can be set to None to indicate an open interval.
 | 
			
		||||
 | 
			
		||||
        Defaults to (0.2, 0.2).
 | 
			
		||||
    inclusive : (2,) tuple of boolean, optional
 | 
			
		||||
        If relative==False, tuple indicating whether values exactly equal to
 | 
			
		||||
        the absolute limits are allowed.
 | 
			
		||||
        If relative==True, tuple indicating whether the number of data being
 | 
			
		||||
        masked on each side should be rounded (True) or truncated (False).
 | 
			
		||||
 | 
			
		||||
        Defaults to (True, True).
 | 
			
		||||
    alpha : float, optional
 | 
			
		||||
        Confidence level of the intervals.
 | 
			
		||||
 | 
			
		||||
        Defaults to 0.05.
 | 
			
		||||
    axis : int, optional
 | 
			
		||||
        Axis along which to cut. If None, uses a flattened version of `data`.
 | 
			
		||||
 | 
			
		||||
        Defaults to None.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    trimmed_mean_ci : (2,) ndarray
 | 
			
		||||
        The lower and upper confidence intervals of the trimmed data.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    data = ma.array(data, copy=False)
 | 
			
		||||
    trimmed = mstats.trimr(data, limits=limits, inclusive=inclusive, axis=axis)
 | 
			
		||||
    tmean = trimmed.mean(axis)
 | 
			
		||||
    tstde = mstats.trimmed_stde(data,limits=limits,inclusive=inclusive,axis=axis)
 | 
			
		||||
    df = trimmed.count(axis) - 1
 | 
			
		||||
    tppf = t.ppf(1-alpha/2.,df)
 | 
			
		||||
    return np.array((tmean - tppf*tstde, tmean+tppf*tstde))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def mjci(data, prob=[0.25,0.5,0.75], axis=None):
 | 
			
		||||
    """
 | 
			
		||||
    Returns the Maritz-Jarrett estimators of the standard error of selected
 | 
			
		||||
    experimental quantiles of the data.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    data: ndarray
 | 
			
		||||
        Data array.
 | 
			
		||||
    prob: sequence
 | 
			
		||||
        Sequence of quantiles to compute.
 | 
			
		||||
    axis : int
 | 
			
		||||
        Axis along which to compute the quantiles. If None, use a flattened
 | 
			
		||||
        array.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    def _mjci_1D(data, p):
 | 
			
		||||
        data = np.sort(data.compressed())
 | 
			
		||||
        n = data.size
 | 
			
		||||
        prob = (np.array(p) * n + 0.5).astype(int_)
 | 
			
		||||
        betacdf = beta.cdf
 | 
			
		||||
 | 
			
		||||
        mj = np.empty(len(prob), float_)
 | 
			
		||||
        x = np.arange(1,n+1, dtype=float_) / n
 | 
			
		||||
        y = x - 1./n
 | 
			
		||||
        for (i,m) in enumerate(prob):
 | 
			
		||||
            (m1,m2) = (m-1, n-m)
 | 
			
		||||
            W = betacdf(x,m-1,n-m) - betacdf(y,m-1,n-m)
 | 
			
		||||
            C1 = np.dot(W,data)
 | 
			
		||||
            C2 = np.dot(W,data**2)
 | 
			
		||||
            mj[i] = np.sqrt(C2 - C1**2)
 | 
			
		||||
        return mj
 | 
			
		||||
 | 
			
		||||
    data = ma.array(data, copy=False)
 | 
			
		||||
    if data.ndim > 2:
 | 
			
		||||
        raise ValueError("Array 'data' must be at most two dimensional, "
 | 
			
		||||
                         "but got data.ndim = %d" % data.ndim)
 | 
			
		||||
 | 
			
		||||
    p = np.array(prob, copy=False, ndmin=1)
 | 
			
		||||
    # Computes quantiles along axis (or globally)
 | 
			
		||||
    if (axis is None):
 | 
			
		||||
        return _mjci_1D(data, p)
 | 
			
		||||
    else:
 | 
			
		||||
        return ma.apply_along_axis(_mjci_1D, axis, data, p)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def mquantiles_cimj(data, prob=[0.25,0.50,0.75], alpha=0.05, axis=None):
 | 
			
		||||
    """
 | 
			
		||||
    Computes the alpha confidence interval for the selected quantiles of the
 | 
			
		||||
    data, with Maritz-Jarrett estimators.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    data : ndarray
 | 
			
		||||
        Data array.
 | 
			
		||||
    prob : sequence
 | 
			
		||||
        Sequence of quantiles to compute.
 | 
			
		||||
    alpha : float
 | 
			
		||||
        Confidence level of the intervals.
 | 
			
		||||
    axis : integer
 | 
			
		||||
        Axis along which to compute the quantiles.
 | 
			
		||||
        If None, use a flattened array.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    alpha = min(alpha, 1-alpha)
 | 
			
		||||
    z = norm.ppf(1-alpha/2.)
 | 
			
		||||
    xq = mstats.mquantiles(data, prob, alphap=0, betap=0, axis=axis)
 | 
			
		||||
    smj = mjci(data, prob, axis=axis)
 | 
			
		||||
    return (xq - z * smj, xq + z * smj)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def median_cihs(data, alpha=0.05, axis=None):
 | 
			
		||||
    """
 | 
			
		||||
    Computes the alpha-level confidence interval for the median of the data.
 | 
			
		||||
 | 
			
		||||
    Uses the Hettmasperger-Sheather method.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    data : array_like
 | 
			
		||||
        Input data. Masked values are discarded. The input should be 1D only,
 | 
			
		||||
        or `axis` should be set to None.
 | 
			
		||||
    alpha : float
 | 
			
		||||
        Confidence level of the intervals.
 | 
			
		||||
    axis : integer
 | 
			
		||||
        Axis along which to compute the quantiles. If None, use a flattened
 | 
			
		||||
        array.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    median_cihs :
 | 
			
		||||
        Alpha level confidence interval.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    def _cihs_1D(data, alpha):
 | 
			
		||||
        data = np.sort(data.compressed())
 | 
			
		||||
        n = len(data)
 | 
			
		||||
        alpha = min(alpha, 1-alpha)
 | 
			
		||||
        k = int(binom._ppf(alpha/2., n, 0.5))
 | 
			
		||||
        gk = binom.cdf(n-k,n,0.5) - binom.cdf(k-1,n,0.5)
 | 
			
		||||
        if gk < 1-alpha:
 | 
			
		||||
            k -= 1
 | 
			
		||||
            gk = binom.cdf(n-k,n,0.5) - binom.cdf(k-1,n,0.5)
 | 
			
		||||
        gkk = binom.cdf(n-k-1,n,0.5) - binom.cdf(k,n,0.5)
 | 
			
		||||
        I = (gk - 1 + alpha)/(gk - gkk)
 | 
			
		||||
        lambd = (n-k) * I / float(k + (n-2*k)*I)
 | 
			
		||||
        lims = (lambd*data[k] + (1-lambd)*data[k-1],
 | 
			
		||||
                lambd*data[n-k-1] + (1-lambd)*data[n-k])
 | 
			
		||||
        return lims
 | 
			
		||||
    data = ma.rray(data, copy=False)
 | 
			
		||||
    # Computes quantiles along axis (or globally)
 | 
			
		||||
    if (axis is None):
 | 
			
		||||
        result = _cihs_1D(data.compressed(), alpha)
 | 
			
		||||
    else:
 | 
			
		||||
        if data.ndim > 2:
 | 
			
		||||
            raise ValueError("Array 'data' must be at most two dimensional, "
 | 
			
		||||
                             "but got data.ndim = %d" % data.ndim)
 | 
			
		||||
        result = ma.apply_along_axis(_cihs_1D, axis, data, alpha)
 | 
			
		||||
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def compare_medians_ms(group_1, group_2, axis=None):
 | 
			
		||||
    """
 | 
			
		||||
    Compares the medians from two independent groups along the given axis.
 | 
			
		||||
 | 
			
		||||
    The comparison is performed using the McKean-Schrader estimate of the
 | 
			
		||||
    standard error of the medians.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    group_1 : array_like
 | 
			
		||||
        First dataset.
 | 
			
		||||
    group_2 : array_like
 | 
			
		||||
        Second dataset.
 | 
			
		||||
    axis : int, optional
 | 
			
		||||
        Axis along which the medians are estimated. If None, the arrays are
 | 
			
		||||
        flattened.  If `axis` is not None, then `group_1` and `group_2`
 | 
			
		||||
        should have the same shape.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    compare_medians_ms : {float, ndarray}
 | 
			
		||||
        If `axis` is None, then returns a float, otherwise returns a 1-D
 | 
			
		||||
        ndarray of floats with a length equal to the length of `group_1`
 | 
			
		||||
        along `axis`.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    (med_1, med_2) = (ma.median(group_1,axis=axis), ma.median(group_2,axis=axis))
 | 
			
		||||
    (std_1, std_2) = (mstats.stde_median(group_1, axis=axis),
 | 
			
		||||
                      mstats.stde_median(group_2, axis=axis))
 | 
			
		||||
    W = np.abs(med_1 - med_2) / ma.sqrt(std_1**2 + std_2**2)
 | 
			
		||||
    return 1 - norm.cdf(W)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def idealfourths(data, axis=None):
 | 
			
		||||
    """
 | 
			
		||||
    Returns an estimate of the lower and upper quartiles.
 | 
			
		||||
 | 
			
		||||
    Uses the ideal fourths algorithm.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    data : array_like
 | 
			
		||||
        Input array.
 | 
			
		||||
    axis : int, optional
 | 
			
		||||
        Axis along which the quartiles are estimated. If None, the arrays are
 | 
			
		||||
        flattened.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    idealfourths : {list of floats, masked array}
 | 
			
		||||
        Returns the two internal values that divide `data` into four parts
 | 
			
		||||
        using the ideal fourths algorithm either along the flattened array
 | 
			
		||||
        (if `axis` is None) or along `axis` of `data`.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    def _idf(data):
 | 
			
		||||
        x = data.compressed()
 | 
			
		||||
        n = len(x)
 | 
			
		||||
        if n < 3:
 | 
			
		||||
            return [np.nan,np.nan]
 | 
			
		||||
        (j,h) = divmod(n/4. + 5/12.,1)
 | 
			
		||||
        j = int(j)
 | 
			
		||||
        qlo = (1-h)*x[j-1] + h*x[j]
 | 
			
		||||
        k = n - j
 | 
			
		||||
        qup = (1-h)*x[k] + h*x[k-1]
 | 
			
		||||
        return [qlo, qup]
 | 
			
		||||
    data = ma.sort(data, axis=axis).view(MaskedArray)
 | 
			
		||||
    if (axis is None):
 | 
			
		||||
        return _idf(data)
 | 
			
		||||
    else:
 | 
			
		||||
        return ma.apply_along_axis(_idf, axis, data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def rsh(data, points=None):
 | 
			
		||||
    """
 | 
			
		||||
    Evaluates Rosenblatt's shifted histogram estimators for each point
 | 
			
		||||
    on the dataset 'data'.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    data : sequence
 | 
			
		||||
        Input data. Masked values are ignored.
 | 
			
		||||
    points : sequence
 | 
			
		||||
        Sequence of points where to evaluate Rosenblatt shifted histogram.
 | 
			
		||||
        If None, use the data.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    data = ma.array(data, copy=False)
 | 
			
		||||
    if points is None:
 | 
			
		||||
        points = data
 | 
			
		||||
    else:
 | 
			
		||||
        points = np.array(points, copy=False, ndmin=1)
 | 
			
		||||
 | 
			
		||||
    if data.ndim != 1:
 | 
			
		||||
        raise AttributeError("The input array should be 1D only !")
 | 
			
		||||
 | 
			
		||||
    n = data.count()
 | 
			
		||||
    r = idealfourths(data, axis=None)
 | 
			
		||||
    h = 1.2 * (r[-1]-r[0]) / n**(1./5)
 | 
			
		||||
    nhi = (data[:,None] <= points[None,:] + h).sum(0)
 | 
			
		||||
    nlo = (data[:,None] < points[None,:] - h).sum(0)
 | 
			
		||||
    return (nhi-nlo) / (2.*n*h)
 | 
			
		||||
@ -1,76 +0,0 @@
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
from numpy import vectorize, deprecate
 | 
			
		||||
from numpy.random import random_sample
 | 
			
		||||
 | 
			
		||||
__all__ = ['randwppf', 'randwcdf']
 | 
			
		||||
 | 
			
		||||
# XXX: Are these needed anymore?
 | 
			
		||||
 | 
			
		||||
#####################################
 | 
			
		||||
# General purpose continuous
 | 
			
		||||
######################################
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@deprecate(message="Deprecated in scipy 0.14.0, use "
 | 
			
		||||
                   "distribution-specific rvs() method instead")
 | 
			
		||||
def randwppf(ppf, args=(), size=None):
 | 
			
		||||
    """
 | 
			
		||||
    returns an array of randomly distributed integers of a distribution
 | 
			
		||||
    whose percent point function (inverse of the CDF or quantile function)
 | 
			
		||||
    is given.
 | 
			
		||||
 | 
			
		||||
    args is a tuple of extra arguments to the ppf function (i.e. shape,
 | 
			
		||||
    location, scale), and size is the size of the output.  Note the ppf
 | 
			
		||||
    function must accept an array of q values to compute over.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    U = random_sample(size=size)
 | 
			
		||||
    return ppf(*(U,)+args)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@deprecate(message="Deprecated in scipy 0.14.0, use "
 | 
			
		||||
                   "distribution-specific rvs() method instead")
 | 
			
		||||
def randwcdf(cdf, mean=1.0, args=(), size=None):
 | 
			
		||||
    """
 | 
			
		||||
    Returns an array of randomly distributed integers given a CDF.
 | 
			
		||||
 | 
			
		||||
    Given a cumulative distribution function (CDF) returns an array of
 | 
			
		||||
    randomly distributed integers that would satisfy the CDF.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    cdf : function
 | 
			
		||||
        CDF function that accepts a single value and `args`, and returns
 | 
			
		||||
        an single value.
 | 
			
		||||
    mean : float, optional
 | 
			
		||||
        The mean of the distribution which helps the solver.  Defaults
 | 
			
		||||
        to 1.0.
 | 
			
		||||
    args : tuple, optional
 | 
			
		||||
        Extra arguments to the cdf function (i.e. shape, location, scale)
 | 
			
		||||
    size : {int, None}, optional
 | 
			
		||||
        Is the size of the output.  If None, only 1 value will be returned.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    randwcdf : ndarray
 | 
			
		||||
        Array of random numbers.
 | 
			
		||||
 | 
			
		||||
    Notes
 | 
			
		||||
    -----
 | 
			
		||||
    Can use the ``scipy.stats.distributions.*.cdf`` functions for the
 | 
			
		||||
    `cdf` parameter.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    import scipy.optimize as optimize
 | 
			
		||||
 | 
			
		||||
    def _ppfopt(x, q, *nargs):
 | 
			
		||||
        newargs = (x,)+nargs
 | 
			
		||||
        return cdf(*newargs) - q
 | 
			
		||||
 | 
			
		||||
    def _ppf(q, *nargs):
 | 
			
		||||
        return optimize.fsolve(_ppfopt, mean, args=(q,)+nargs)
 | 
			
		||||
 | 
			
		||||
    _vppf = vectorize(_ppf)
 | 
			
		||||
    U = random_sample(size=size)
 | 
			
		||||
    return _vppf(*(U,)+args)
 | 
			
		||||
@ -1,389 +0,0 @@
 | 
			
		||||
"""Utilities for writing code that runs on Python 2 and 3"""
 | 
			
		||||
 | 
			
		||||
# Copyright (c) 2010-2012 Benjamin Peterson
 | 
			
		||||
#
 | 
			
		||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of
 | 
			
		||||
# this software and associated documentation files (the "Software"), to deal in
 | 
			
		||||
# the Software without restriction, including without limitation the rights to
 | 
			
		||||
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 | 
			
		||||
# the Software, and to permit persons to whom the Software is furnished to do so,
 | 
			
		||||
# subject to the following conditions:
 | 
			
		||||
#
 | 
			
		||||
# The above copyright notice and this permission notice shall be included in all
 | 
			
		||||
# copies or substantial portions of the Software.
 | 
			
		||||
#
 | 
			
		||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | 
			
		||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 | 
			
		||||
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 | 
			
		||||
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 | 
			
		||||
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 | 
			
		||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 | 
			
		||||
 | 
			
		||||
import operator
 | 
			
		||||
import sys
 | 
			
		||||
import types
 | 
			
		||||
 | 
			
		||||
__author__ = "Benjamin Peterson <benjamin@python.org>"
 | 
			
		||||
__version__ = "1.2.0"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# True if we are running on Python 3.
 | 
			
		||||
PY3 = sys.version_info[0] == 3
 | 
			
		||||
 | 
			
		||||
if PY3:
 | 
			
		||||
    string_types = str,
 | 
			
		||||
    integer_types = int,
 | 
			
		||||
    class_types = type,
 | 
			
		||||
    text_type = str
 | 
			
		||||
    binary_type = bytes
 | 
			
		||||
 | 
			
		||||
    MAXSIZE = sys.maxsize
 | 
			
		||||
else:
 | 
			
		||||
    string_types = basestring,
 | 
			
		||||
    integer_types = (int, long)
 | 
			
		||||
    class_types = (type, types.ClassType)
 | 
			
		||||
    text_type = unicode
 | 
			
		||||
    binary_type = str
 | 
			
		||||
 | 
			
		||||
    if sys.platform.startswith("java"):
 | 
			
		||||
        # Jython always uses 32 bits.
 | 
			
		||||
        MAXSIZE = int((1 << 31) - 1)
 | 
			
		||||
    else:
 | 
			
		||||
        # It's possible to have sizeof(long) != sizeof(Py_ssize_t).
 | 
			
		||||
        class X(object):
 | 
			
		||||
            def __len__(self):
 | 
			
		||||
                return 1 << 31
 | 
			
		||||
        try:
 | 
			
		||||
            len(X())
 | 
			
		||||
        except OverflowError:
 | 
			
		||||
            # 32-bit
 | 
			
		||||
            MAXSIZE = int((1 << 31) - 1)
 | 
			
		||||
        else:
 | 
			
		||||
            # 64-bit
 | 
			
		||||
            MAXSIZE = int((1 << 63) - 1)
 | 
			
		||||
            del X
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _add_doc(func, doc):
 | 
			
		||||
    """Add documentation to a function."""
 | 
			
		||||
    func.__doc__ = doc
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _import_module(name):
 | 
			
		||||
    """Import module, returning the module after the last dot."""
 | 
			
		||||
    __import__(name)
 | 
			
		||||
    return sys.modules[name]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class _LazyDescr(object):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, name):
 | 
			
		||||
        self.name = name
 | 
			
		||||
 | 
			
		||||
    def __get__(self, obj, tp):
 | 
			
		||||
        result = self._resolve()
 | 
			
		||||
        setattr(obj, self.name, result)
 | 
			
		||||
        # This is a bit ugly, but it avoids running this again.
 | 
			
		||||
        delattr(tp, self.name)
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MovedModule(_LazyDescr):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, name, old, new=None):
 | 
			
		||||
        super(MovedModule, self).__init__(name)
 | 
			
		||||
        if PY3:
 | 
			
		||||
            if new is None:
 | 
			
		||||
                new = name
 | 
			
		||||
            self.mod = new
 | 
			
		||||
        else:
 | 
			
		||||
            self.mod = old
 | 
			
		||||
 | 
			
		||||
    def _resolve(self):
 | 
			
		||||
        return _import_module(self.mod)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MovedAttribute(_LazyDescr):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None):
 | 
			
		||||
        super(MovedAttribute, self).__init__(name)
 | 
			
		||||
        if PY3:
 | 
			
		||||
            if new_mod is None:
 | 
			
		||||
                new_mod = name
 | 
			
		||||
            self.mod = new_mod
 | 
			
		||||
            if new_attr is None:
 | 
			
		||||
                if old_attr is None:
 | 
			
		||||
                    new_attr = name
 | 
			
		||||
                else:
 | 
			
		||||
                    new_attr = old_attr
 | 
			
		||||
            self.attr = new_attr
 | 
			
		||||
        else:
 | 
			
		||||
            self.mod = old_mod
 | 
			
		||||
            if old_attr is None:
 | 
			
		||||
                old_attr = name
 | 
			
		||||
            self.attr = old_attr
 | 
			
		||||
 | 
			
		||||
    def _resolve(self):
 | 
			
		||||
        module = _import_module(self.mod)
 | 
			
		||||
        return getattr(module, self.attr)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class _MovedItems(types.ModuleType):
 | 
			
		||||
    """Lazy loading of moved objects"""
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
_moved_attributes = [
 | 
			
		||||
    MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"),
 | 
			
		||||
    MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"),
 | 
			
		||||
    MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"),
 | 
			
		||||
    MovedAttribute("map", "itertools", "builtins", "imap", "map"),
 | 
			
		||||
    MovedAttribute("reload_module", "__builtin__", "imp", "reload"),
 | 
			
		||||
    MovedAttribute("reduce", "__builtin__", "functools"),
 | 
			
		||||
    MovedAttribute("StringIO", "StringIO", "io"),
 | 
			
		||||
    MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"),
 | 
			
		||||
    MovedAttribute("zip", "itertools", "builtins", "izip", "zip"),
 | 
			
		||||
 | 
			
		||||
    MovedModule("builtins", "__builtin__"),
 | 
			
		||||
    MovedModule("configparser", "ConfigParser"),
 | 
			
		||||
    MovedModule("copyreg", "copy_reg"),
 | 
			
		||||
    MovedModule("http_cookiejar", "cookielib", "http.cookiejar"),
 | 
			
		||||
    MovedModule("http_cookies", "Cookie", "http.cookies"),
 | 
			
		||||
    MovedModule("html_entities", "htmlentitydefs", "html.entities"),
 | 
			
		||||
    MovedModule("html_parser", "HTMLParser", "html.parser"),
 | 
			
		||||
    MovedModule("http_client", "httplib", "http.client"),
 | 
			
		||||
    MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"),
 | 
			
		||||
    MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"),
 | 
			
		||||
    MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"),
 | 
			
		||||
    MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"),
 | 
			
		||||
    MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"),
 | 
			
		||||
    MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"),
 | 
			
		||||
    MovedModule("cPickle", "cPickle", "pickle"),
 | 
			
		||||
    MovedModule("queue", "Queue"),
 | 
			
		||||
    MovedModule("reprlib", "repr"),
 | 
			
		||||
    MovedModule("socketserver", "SocketServer"),
 | 
			
		||||
    MovedModule("tkinter", "Tkinter"),
 | 
			
		||||
    MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"),
 | 
			
		||||
    MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"),
 | 
			
		||||
    MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"),
 | 
			
		||||
    MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"),
 | 
			
		||||
    MovedModule("tkinter_tix", "Tix", "tkinter.tix"),
 | 
			
		||||
    MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"),
 | 
			
		||||
    MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"),
 | 
			
		||||
    MovedModule("tkinter_colorchooser", "tkColorChooser",
 | 
			
		||||
                "tkinter.colorchooser"),
 | 
			
		||||
    MovedModule("tkinter_commondialog", "tkCommonDialog",
 | 
			
		||||
                "tkinter.commondialog"),
 | 
			
		||||
    MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"),
 | 
			
		||||
    MovedModule("tkinter_font", "tkFont", "tkinter.font"),
 | 
			
		||||
    MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"),
 | 
			
		||||
    MovedModule("tkinter_tksimpledialog", "tkSimpleDialog",
 | 
			
		||||
                "tkinter.simpledialog"),
 | 
			
		||||
    MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"),
 | 
			
		||||
    MovedModule("winreg", "_winreg"),
 | 
			
		||||
]
 | 
			
		||||
for attr in _moved_attributes:
 | 
			
		||||
    setattr(_MovedItems, attr.name, attr)
 | 
			
		||||
del attr
 | 
			
		||||
 | 
			
		||||
moves = sys.modules[__name__ + ".moves"] = _MovedItems("moves")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def add_move(move):
 | 
			
		||||
    """Add an item to six.moves."""
 | 
			
		||||
    setattr(_MovedItems, move.name, move)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def remove_move(name):
 | 
			
		||||
    """Remove item from six.moves."""
 | 
			
		||||
    try:
 | 
			
		||||
        delattr(_MovedItems, name)
 | 
			
		||||
    except AttributeError:
 | 
			
		||||
        try:
 | 
			
		||||
            del moves.__dict__[name]
 | 
			
		||||
        except KeyError:
 | 
			
		||||
            raise AttributeError("no such move, %r" % (name,))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if PY3:
 | 
			
		||||
    _meth_func = "__func__"
 | 
			
		||||
    _meth_self = "__self__"
 | 
			
		||||
 | 
			
		||||
    _func_code = "__code__"
 | 
			
		||||
    _func_defaults = "__defaults__"
 | 
			
		||||
 | 
			
		||||
    _iterkeys = "keys"
 | 
			
		||||
    _itervalues = "values"
 | 
			
		||||
    _iteritems = "items"
 | 
			
		||||
else:
 | 
			
		||||
    _meth_func = "im_func"
 | 
			
		||||
    _meth_self = "im_self"
 | 
			
		||||
 | 
			
		||||
    _func_code = "func_code"
 | 
			
		||||
    _func_defaults = "func_defaults"
 | 
			
		||||
 | 
			
		||||
    _iterkeys = "iterkeys"
 | 
			
		||||
    _itervalues = "itervalues"
 | 
			
		||||
    _iteritems = "iteritems"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    advance_iterator = next
 | 
			
		||||
except NameError:
 | 
			
		||||
    def advance_iterator(it):
 | 
			
		||||
        return it.next()
 | 
			
		||||
next = advance_iterator
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if PY3:
 | 
			
		||||
    def get_unbound_function(unbound):
 | 
			
		||||
        return unbound
 | 
			
		||||
 | 
			
		||||
    Iterator = object
 | 
			
		||||
 | 
			
		||||
    def callable(obj):
 | 
			
		||||
        return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
 | 
			
		||||
else:
 | 
			
		||||
    def get_unbound_function(unbound):
 | 
			
		||||
        return unbound.im_func
 | 
			
		||||
 | 
			
		||||
    class Iterator(object):
 | 
			
		||||
 | 
			
		||||
        def next(self):
 | 
			
		||||
            return type(self).__next__(self)
 | 
			
		||||
 | 
			
		||||
    callable = callable
 | 
			
		||||
_add_doc(get_unbound_function,
 | 
			
		||||
         """Get the function out of a possibly unbound function""")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
get_method_function = operator.attrgetter(_meth_func)
 | 
			
		||||
get_method_self = operator.attrgetter(_meth_self)
 | 
			
		||||
get_function_code = operator.attrgetter(_func_code)
 | 
			
		||||
get_function_defaults = operator.attrgetter(_func_defaults)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def iterkeys(d):
 | 
			
		||||
    """Return an iterator over the keys of a dictionary."""
 | 
			
		||||
    return iter(getattr(d, _iterkeys)())
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def itervalues(d):
 | 
			
		||||
    """Return an iterator over the values of a dictionary."""
 | 
			
		||||
    return iter(getattr(d, _itervalues)())
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def iteritems(d):
 | 
			
		||||
    """Return an iterator over the (key, value) pairs of a dictionary."""
 | 
			
		||||
    return iter(getattr(d, _iteritems)())
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if PY3:
 | 
			
		||||
    def b(s):
 | 
			
		||||
        return s.encode("latin-1")
 | 
			
		||||
 | 
			
		||||
    def u(s):
 | 
			
		||||
        return s
 | 
			
		||||
 | 
			
		||||
    if sys.version_info[1] <= 1:
 | 
			
		||||
        def int2byte(i):
 | 
			
		||||
            return bytes((i,))
 | 
			
		||||
    else:
 | 
			
		||||
        # This is about 2x faster than the implementation above on 3.2+
 | 
			
		||||
        int2byte = operator.methodcaller("to_bytes", 1, "big")
 | 
			
		||||
    import io
 | 
			
		||||
    StringIO = io.StringIO
 | 
			
		||||
    BytesIO = io.BytesIO
 | 
			
		||||
else:
 | 
			
		||||
    def b(s):
 | 
			
		||||
        return s
 | 
			
		||||
 | 
			
		||||
    def u(s):
 | 
			
		||||
        return unicode(s, "unicode_escape")
 | 
			
		||||
    int2byte = chr
 | 
			
		||||
    import StringIO
 | 
			
		||||
    StringIO = BytesIO = StringIO.StringIO
 | 
			
		||||
_add_doc(b, """Byte literal""")
 | 
			
		||||
_add_doc(u, """Text literal""")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if PY3:
 | 
			
		||||
    import builtins  # @UnresolvedImport
 | 
			
		||||
    exec_ = getattr(builtins, "exec")
 | 
			
		||||
 | 
			
		||||
    def reraise(tp, value, tb=None):
 | 
			
		||||
        if value.__traceback__ is not tb:
 | 
			
		||||
            raise value.with_traceback(tb)
 | 
			
		||||
        raise value
 | 
			
		||||
 | 
			
		||||
    print_ = getattr(builtins, "print")
 | 
			
		||||
    del builtins
 | 
			
		||||
 | 
			
		||||
else:
 | 
			
		||||
    def exec_(code, globs=None, locs=None):
 | 
			
		||||
        """Execute code in a namespace."""
 | 
			
		||||
        if globs is None:
 | 
			
		||||
            frame = sys._getframe(1)
 | 
			
		||||
            globs = frame.f_globals
 | 
			
		||||
            if locs is None:
 | 
			
		||||
                locs = frame.f_locals
 | 
			
		||||
            del frame
 | 
			
		||||
        elif locs is None:
 | 
			
		||||
            locs = globs
 | 
			
		||||
        exec("""exec code in globs, locs""")
 | 
			
		||||
 | 
			
		||||
    exec_("""def reraise(tp, value, tb=None):
 | 
			
		||||
    raise tp, value, tb
 | 
			
		||||
""")
 | 
			
		||||
 | 
			
		||||
    def print_(*args, **kwargs):
 | 
			
		||||
        """The new-style print function."""
 | 
			
		||||
        fp = kwargs.pop("file", sys.stdout)
 | 
			
		||||
        if fp is None:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        def write(data):
 | 
			
		||||
            if not isinstance(data, basestring):
 | 
			
		||||
                data = str(data)
 | 
			
		||||
            fp.write(data)
 | 
			
		||||
        want_unicode = False
 | 
			
		||||
        sep = kwargs.pop("sep", None)
 | 
			
		||||
        if sep is not None:
 | 
			
		||||
            if isinstance(sep, unicode):
 | 
			
		||||
                want_unicode = True
 | 
			
		||||
            elif not isinstance(sep, str):
 | 
			
		||||
                raise TypeError("sep must be None or a string")
 | 
			
		||||
        end = kwargs.pop("end", None)
 | 
			
		||||
        if end is not None:
 | 
			
		||||
            if isinstance(end, unicode):
 | 
			
		||||
                want_unicode = True
 | 
			
		||||
            elif not isinstance(end, str):
 | 
			
		||||
                raise TypeError("end must be None or a string")
 | 
			
		||||
        if kwargs:
 | 
			
		||||
            raise TypeError("invalid keyword arguments to print()")
 | 
			
		||||
        if not want_unicode:
 | 
			
		||||
            for arg in args:
 | 
			
		||||
                if isinstance(arg, unicode):
 | 
			
		||||
                    want_unicode = True
 | 
			
		||||
                    break
 | 
			
		||||
        if want_unicode:
 | 
			
		||||
            newline = unicode("\n")
 | 
			
		||||
            space = unicode(" ")
 | 
			
		||||
        else:
 | 
			
		||||
            newline = "\n"
 | 
			
		||||
            space = " "
 | 
			
		||||
        if sep is None:
 | 
			
		||||
            sep = space
 | 
			
		||||
        if end is None:
 | 
			
		||||
            end = newline
 | 
			
		||||
        for i, arg in enumerate(args):
 | 
			
		||||
            if i:
 | 
			
		||||
                write(sep)
 | 
			
		||||
            write(arg)
 | 
			
		||||
        write(end)
 | 
			
		||||
 | 
			
		||||
_add_doc(reraise, """Reraise an exception.""")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def with_metaclass(meta, base=object):
 | 
			
		||||
    """Create a base class with a metaclass."""
 | 
			
		||||
    return meta("NewBase", (base,), {})
 | 
			
		||||
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								@ -1,238 +0,0 @@
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
from numpy.testing import assert_array_almost_equal, run_module_suite
 | 
			
		||||
from scipy.stats import \
 | 
			
		||||
    binned_statistic, binned_statistic_2d, binned_statistic_dd
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestBinnedStatistic(object):
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def setup_class(cls):
 | 
			
		||||
        np.random.seed(9865)
 | 
			
		||||
        cls.x = np.random.random(100)
 | 
			
		||||
        cls.y = np.random.random(100)
 | 
			
		||||
        cls.v = np.random.random(100)
 | 
			
		||||
        cls.X = np.random.random((100, 3))
 | 
			
		||||
 | 
			
		||||
    def test_1d_count(self):
 | 
			
		||||
        x = self.x
 | 
			
		||||
        v = self.v
 | 
			
		||||
 | 
			
		||||
        count1, edges1, bc = binned_statistic(x, v, 'count', bins=10)
 | 
			
		||||
        count2, edges2 = np.histogram(x, bins=10)
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(count1, count2)
 | 
			
		||||
        assert_array_almost_equal(edges1, edges2)
 | 
			
		||||
 | 
			
		||||
    def test_1d_sum(self):
 | 
			
		||||
        x = self.x
 | 
			
		||||
        v = self.v
 | 
			
		||||
 | 
			
		||||
        sum1, edges1, bc = binned_statistic(x, v, 'sum', bins=10)
 | 
			
		||||
        sum2, edges2 = np.histogram(x, bins=10, weights=v)
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(sum1, sum2)
 | 
			
		||||
        assert_array_almost_equal(edges1, edges2)
 | 
			
		||||
 | 
			
		||||
    def test_1d_mean(self):
 | 
			
		||||
        x = self.x
 | 
			
		||||
        v = self.v
 | 
			
		||||
 | 
			
		||||
        stat1, edges1, bc = binned_statistic(x, v, 'mean', bins=10)
 | 
			
		||||
        stat2, edges2, bc = binned_statistic(x, v, np.mean, bins=10)
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(stat1, stat2)
 | 
			
		||||
        assert_array_almost_equal(edges1, edges2)
 | 
			
		||||
 | 
			
		||||
    def test_1d_std(self):
 | 
			
		||||
        x = self.x
 | 
			
		||||
        v = self.v
 | 
			
		||||
 | 
			
		||||
        stat1, edges1, bc = binned_statistic(x, v, 'std', bins=10)
 | 
			
		||||
        stat2, edges2, bc = binned_statistic(x, v, np.std, bins=10)
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(stat1, stat2)
 | 
			
		||||
        assert_array_almost_equal(edges1, edges2)
 | 
			
		||||
 | 
			
		||||
    def test_1d_median(self):
 | 
			
		||||
        x = self.x
 | 
			
		||||
        v = self.v
 | 
			
		||||
 | 
			
		||||
        stat1, edges1, bc = binned_statistic(x, v, 'median', bins=10)
 | 
			
		||||
        stat2, edges2, bc = binned_statistic(x, v, np.median, bins=10)
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(stat1, stat2)
 | 
			
		||||
        assert_array_almost_equal(edges1, edges2)
 | 
			
		||||
 | 
			
		||||
    def test_1d_bincode(self):
 | 
			
		||||
        x = self.x[:20]
 | 
			
		||||
        v = self.v[:20]
 | 
			
		||||
 | 
			
		||||
        count1, edges1, bc = binned_statistic(x, v, 'count', bins=3)
 | 
			
		||||
        bc2 = np.array([3, 2, 1, 3, 2, 3, 3, 3, 3, 1, 1, 3, 3, 1, 2, 3, 1,
 | 
			
		||||
                        1, 2, 1])
 | 
			
		||||
 | 
			
		||||
        bcount = [(bc == i).sum() for i in np.unique(bc)]
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(bc, bc2)
 | 
			
		||||
        assert_array_almost_equal(bcount, count1)
 | 
			
		||||
 | 
			
		||||
    def test_1d_range_keyword(self):
 | 
			
		||||
        # Regression test for gh-3063, range can be (min, max) or [(min, max)]
 | 
			
		||||
        np.random.seed(9865)
 | 
			
		||||
        x = np.arange(30)
 | 
			
		||||
        data = np.random.random(30)
 | 
			
		||||
 | 
			
		||||
        mean, bins, _ = binned_statistic(x[:15], data[:15])
 | 
			
		||||
        mean_range, bins_range, _ = binned_statistic(x, data, range=[(0, 14)])
 | 
			
		||||
        mean_range2, bins_range2, _ = binned_statistic(x, data, range=(0, 14))
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(mean, mean_range)
 | 
			
		||||
        assert_array_almost_equal(bins, bins_range)
 | 
			
		||||
        assert_array_almost_equal(mean, mean_range2)
 | 
			
		||||
        assert_array_almost_equal(bins, bins_range2)
 | 
			
		||||
 | 
			
		||||
    def test_2d_count(self):
 | 
			
		||||
        x = self.x
 | 
			
		||||
        y = self.y
 | 
			
		||||
        v = self.v
 | 
			
		||||
 | 
			
		||||
        count1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'count', bins=5)
 | 
			
		||||
        count2, binx2, biny2 = np.histogram2d(x, y, bins=5)
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(count1, count2)
 | 
			
		||||
        assert_array_almost_equal(binx1, binx2)
 | 
			
		||||
        assert_array_almost_equal(biny1, biny2)
 | 
			
		||||
 | 
			
		||||
    def test_2d_sum(self):
 | 
			
		||||
        x = self.x
 | 
			
		||||
        y = self.y
 | 
			
		||||
        v = self.v
 | 
			
		||||
 | 
			
		||||
        sum1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'sum', bins=5)
 | 
			
		||||
        sum2, binx2, biny2 = np.histogram2d(x, y, bins=5, weights=v)
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(sum1, sum2)
 | 
			
		||||
        assert_array_almost_equal(binx1, binx2)
 | 
			
		||||
        assert_array_almost_equal(biny1, biny2)
 | 
			
		||||
 | 
			
		||||
    def test_2d_mean(self):
 | 
			
		||||
        x = self.x
 | 
			
		||||
        y = self.y
 | 
			
		||||
        v = self.v
 | 
			
		||||
 | 
			
		||||
        stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'mean', bins=5)
 | 
			
		||||
        stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(stat1, stat2)
 | 
			
		||||
        assert_array_almost_equal(binx1, binx2)
 | 
			
		||||
        assert_array_almost_equal(biny1, biny2)
 | 
			
		||||
 | 
			
		||||
    def test_2d_std(self):
 | 
			
		||||
        x = self.x
 | 
			
		||||
        y = self.y
 | 
			
		||||
        v = self.v
 | 
			
		||||
 | 
			
		||||
        stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'std', bins=5)
 | 
			
		||||
        stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.std, bins=5)
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(stat1, stat2)
 | 
			
		||||
        assert_array_almost_equal(binx1, binx2)
 | 
			
		||||
        assert_array_almost_equal(biny1, biny2)
 | 
			
		||||
 | 
			
		||||
    def test_2d_median(self):
 | 
			
		||||
        x = self.x
 | 
			
		||||
        y = self.y
 | 
			
		||||
        v = self.v
 | 
			
		||||
 | 
			
		||||
        stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'median', bins=5)
 | 
			
		||||
        stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.median, bins=5)
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(stat1, stat2)
 | 
			
		||||
        assert_array_almost_equal(binx1, binx2)
 | 
			
		||||
        assert_array_almost_equal(biny1, biny2)
 | 
			
		||||
 | 
			
		||||
    def test_2d_bincode(self):
 | 
			
		||||
        x = self.x[:20]
 | 
			
		||||
        y = self.y[:20]
 | 
			
		||||
        v = self.v[:20]
 | 
			
		||||
 | 
			
		||||
        count1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'count', bins=3)
 | 
			
		||||
        bc2 = np.array([17, 11, 6, 16, 11, 17, 18, 17, 17, 7, 6, 18, 16,
 | 
			
		||||
                        6, 11, 16, 6, 6, 11, 8])
 | 
			
		||||
 | 
			
		||||
        bcount = [(bc == i).sum() for i in np.unique(bc)]
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(bc, bc2)
 | 
			
		||||
        count1adj = count1[count1.nonzero()]
 | 
			
		||||
        assert_array_almost_equal(bcount, count1adj)
 | 
			
		||||
 | 
			
		||||
    def test_dd_count(self):
 | 
			
		||||
        X = self.X
 | 
			
		||||
        v = self.v
 | 
			
		||||
 | 
			
		||||
        count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
 | 
			
		||||
        count2, edges2 = np.histogramdd(X, bins=3)
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(count1, count2)
 | 
			
		||||
        assert_array_almost_equal(edges1, edges2)
 | 
			
		||||
 | 
			
		||||
    def test_dd_sum(self):
 | 
			
		||||
        X = self.X
 | 
			
		||||
        v = self.v
 | 
			
		||||
 | 
			
		||||
        sum1, edges1, bc = binned_statistic_dd(X, v, 'sum', bins=3)
 | 
			
		||||
        sum2, edges2 = np.histogramdd(X, bins=3, weights=v)
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(sum1, sum2)
 | 
			
		||||
        assert_array_almost_equal(edges1, edges2)
 | 
			
		||||
 | 
			
		||||
    def test_dd_mean(self):
 | 
			
		||||
        X = self.X
 | 
			
		||||
        v = self.v
 | 
			
		||||
 | 
			
		||||
        stat1, edges1, bc = binned_statistic_dd(X, v, 'mean', bins=3)
 | 
			
		||||
        stat2, edges2, bc = binned_statistic_dd(X, v, np.mean, bins=3)
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(stat1, stat2)
 | 
			
		||||
        assert_array_almost_equal(edges1, edges2)
 | 
			
		||||
 | 
			
		||||
    def test_dd_std(self):
 | 
			
		||||
        X = self.X
 | 
			
		||||
        v = self.v
 | 
			
		||||
 | 
			
		||||
        stat1, edges1, bc = binned_statistic_dd(X, v, 'std', bins=3)
 | 
			
		||||
        stat2, edges2, bc = binned_statistic_dd(X, v, np.std, bins=3)
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(stat1, stat2)
 | 
			
		||||
        assert_array_almost_equal(edges1, edges2)
 | 
			
		||||
 | 
			
		||||
    def test_dd_median(self):
 | 
			
		||||
        X = self.X
 | 
			
		||||
        v = self.v
 | 
			
		||||
 | 
			
		||||
        stat1, edges1, bc = binned_statistic_dd(X, v, 'median', bins=3)
 | 
			
		||||
        stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3)
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(stat1, stat2)
 | 
			
		||||
        assert_array_almost_equal(edges1, edges2)
 | 
			
		||||
 | 
			
		||||
    def test_dd_bincode(self):
 | 
			
		||||
        X = self.X[:20]
 | 
			
		||||
        v = self.v[:20]
 | 
			
		||||
 | 
			
		||||
        count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
 | 
			
		||||
        bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92,
 | 
			
		||||
                        32, 36, 91, 43, 87, 81, 81])
 | 
			
		||||
 | 
			
		||||
        bcount = [(bc == i).sum() for i in np.unique(bc)]
 | 
			
		||||
 | 
			
		||||
        assert_array_almost_equal(bc, bc2)
 | 
			
		||||
        count1adj = count1[count1.nonzero()]
 | 
			
		||||
        assert_array_almost_equal(bcount, count1adj)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    run_module_suite()
 | 
			
		||||
@ -1,202 +0,0 @@
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
from numpy.testing import (run_module_suite, assert_equal, assert_array_equal,
 | 
			
		||||
         assert_array_almost_equal, assert_approx_equal, assert_raises,
 | 
			
		||||
         assert_allclose)
 | 
			
		||||
from scipy.special import xlogy
 | 
			
		||||
from scipy.stats.contingency import margins, expected_freq, chi2_contingency
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_margins():
 | 
			
		||||
    a = np.array([1])
 | 
			
		||||
    m = margins(a)
 | 
			
		||||
    assert_equal(len(m), 1)
 | 
			
		||||
    m0 = m[0]
 | 
			
		||||
    assert_array_equal(m0, np.array([1]))
 | 
			
		||||
 | 
			
		||||
    a = np.array([[1]])
 | 
			
		||||
    m0, m1 = margins(a)
 | 
			
		||||
    expected0 = np.array([[1]])
 | 
			
		||||
    expected1 = np.array([[1]])
 | 
			
		||||
    assert_array_equal(m0, expected0)
 | 
			
		||||
    assert_array_equal(m1, expected1)
 | 
			
		||||
 | 
			
		||||
    a = np.arange(12).reshape(2, 6)
 | 
			
		||||
    m0, m1 = margins(a)
 | 
			
		||||
    expected0 = np.array([[15], [51]])
 | 
			
		||||
    expected1 = np.array([[6, 8, 10, 12, 14, 16]])
 | 
			
		||||
    assert_array_equal(m0, expected0)
 | 
			
		||||
    assert_array_equal(m1, expected1)
 | 
			
		||||
 | 
			
		||||
    a = np.arange(24).reshape(2, 3, 4)
 | 
			
		||||
    m0, m1, m2 = margins(a)
 | 
			
		||||
    expected0 = np.array([[[66]], [[210]]])
 | 
			
		||||
    expected1 = np.array([[[60], [92], [124]]])
 | 
			
		||||
    expected2 = np.array([[[60, 66, 72, 78]]])
 | 
			
		||||
    assert_array_equal(m0, expected0)
 | 
			
		||||
    assert_array_equal(m1, expected1)
 | 
			
		||||
    assert_array_equal(m2, expected2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_expected_freq():
 | 
			
		||||
    assert_array_equal(expected_freq([1]), np.array([1.0]))
 | 
			
		||||
 | 
			
		||||
    observed = np.array([[[2, 0], [0, 2]], [[0, 2], [2, 0]], [[1, 1], [1, 1]]])
 | 
			
		||||
    e = expected_freq(observed)
 | 
			
		||||
    assert_array_equal(e, np.ones_like(observed))
 | 
			
		||||
 | 
			
		||||
    observed = np.array([[10, 10, 20], [20, 20, 20]])
 | 
			
		||||
    e = expected_freq(observed)
 | 
			
		||||
    correct = np.array([[12., 12., 16.], [18., 18., 24.]])
 | 
			
		||||
    assert_array_almost_equal(e, correct)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_chi2_contingency_trivial():
 | 
			
		||||
    # Some very simple tests for chi2_contingency.
 | 
			
		||||
 | 
			
		||||
    # A trivial case
 | 
			
		||||
    obs = np.array([[1, 2], [1, 2]])
 | 
			
		||||
    chi2, p, dof, expected = chi2_contingency(obs, correction=False)
 | 
			
		||||
    assert_equal(chi2, 0.0)
 | 
			
		||||
    assert_equal(p, 1.0)
 | 
			
		||||
    assert_equal(dof, 1)
 | 
			
		||||
    assert_array_equal(obs, expected)
 | 
			
		||||
 | 
			
		||||
    # A *really* trivial case: 1-D data.
 | 
			
		||||
    obs = np.array([1, 2, 3])
 | 
			
		||||
    chi2, p, dof, expected = chi2_contingency(obs, correction=False)
 | 
			
		||||
    assert_equal(chi2, 0.0)
 | 
			
		||||
    assert_equal(p, 1.0)
 | 
			
		||||
    assert_equal(dof, 0)
 | 
			
		||||
    assert_array_equal(obs, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_chi2_contingency_R():
 | 
			
		||||
    # Some test cases that were computed independently, using R.
 | 
			
		||||
 | 
			
		||||
    Rcode = \
 | 
			
		||||
    """
 | 
			
		||||
    # Data vector.
 | 
			
		||||
    data <- c(
 | 
			
		||||
      12, 34, 23,     4,  47,  11,
 | 
			
		||||
      35, 31, 11,    34,  10,  18,
 | 
			
		||||
      12, 32,  9,    18,  13,  19,
 | 
			
		||||
      12, 12, 14,     9,  33,  25
 | 
			
		||||
      )
 | 
			
		||||
 | 
			
		||||
    # Create factor tags:r=rows, c=columns, t=tiers
 | 
			
		||||
    r <- factor(gl(4, 2*3, 2*3*4, labels=c("r1", "r2", "r3", "r4")))
 | 
			
		||||
    c <- factor(gl(3, 1,   2*3*4, labels=c("c1", "c2", "c3")))
 | 
			
		||||
    t <- factor(gl(2, 3,   2*3*4, labels=c("t1", "t2")))
 | 
			
		||||
 | 
			
		||||
    # 3-way Chi squared test of independence
 | 
			
		||||
    s = summary(xtabs(data~r+c+t))
 | 
			
		||||
    print(s)
 | 
			
		||||
    """
 | 
			
		||||
    Routput = \
 | 
			
		||||
    """
 | 
			
		||||
    Call: xtabs(formula = data ~ r + c + t)
 | 
			
		||||
    Number of cases in table: 478
 | 
			
		||||
    Number of factors: 3
 | 
			
		||||
    Test for independence of all factors:
 | 
			
		||||
            Chisq = 102.17, df = 17, p-value = 3.514e-14
 | 
			
		||||
    """
 | 
			
		||||
    obs = np.array(
 | 
			
		||||
        [[[12, 34, 23],
 | 
			
		||||
          [35, 31, 11],
 | 
			
		||||
          [12, 32, 9],
 | 
			
		||||
          [12, 12, 14]],
 | 
			
		||||
         [[4, 47, 11],
 | 
			
		||||
          [34, 10, 18],
 | 
			
		||||
          [18, 13, 19],
 | 
			
		||||
          [9, 33, 25]]])
 | 
			
		||||
    chi2, p, dof, expected = chi2_contingency(obs)
 | 
			
		||||
    assert_approx_equal(chi2, 102.17, significant=5)
 | 
			
		||||
    assert_approx_equal(p, 3.514e-14, significant=4)
 | 
			
		||||
    assert_equal(dof, 17)
 | 
			
		||||
 | 
			
		||||
    Rcode = \
 | 
			
		||||
    """
 | 
			
		||||
    # Data vector.
 | 
			
		||||
    data <- c(
 | 
			
		||||
        #
 | 
			
		||||
        12, 17,
 | 
			
		||||
        11, 16,
 | 
			
		||||
        #
 | 
			
		||||
        11, 12,
 | 
			
		||||
        15, 16,
 | 
			
		||||
        #
 | 
			
		||||
        23, 15,
 | 
			
		||||
        30, 22,
 | 
			
		||||
        #
 | 
			
		||||
        14, 17,
 | 
			
		||||
        15, 16
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    # Create factor tags:r=rows, c=columns, d=depths(?), t=tiers
 | 
			
		||||
    r <- factor(gl(2, 2,  2*2*2*2, labels=c("r1", "r2")))
 | 
			
		||||
    c <- factor(gl(2, 1,  2*2*2*2, labels=c("c1", "c2")))
 | 
			
		||||
    d <- factor(gl(2, 4,  2*2*2*2, labels=c("d1", "d2")))
 | 
			
		||||
    t <- factor(gl(2, 8,  2*2*2*2, labels=c("t1", "t2")))
 | 
			
		||||
 | 
			
		||||
    # 4-way Chi squared test of independence
 | 
			
		||||
    s = summary(xtabs(data~r+c+d+t))
 | 
			
		||||
    print(s)
 | 
			
		||||
    """
 | 
			
		||||
    Routput = \
 | 
			
		||||
    """
 | 
			
		||||
    Call: xtabs(formula = data ~ r + c + d + t)
 | 
			
		||||
    Number of cases in table: 262
 | 
			
		||||
    Number of factors: 4
 | 
			
		||||
    Test for independence of all factors:
 | 
			
		||||
            Chisq = 8.758, df = 11, p-value = 0.6442
 | 
			
		||||
    """
 | 
			
		||||
    obs = np.array(
 | 
			
		||||
        [[[[12, 17],
 | 
			
		||||
           [11, 16]],
 | 
			
		||||
          [[11, 12],
 | 
			
		||||
           [15, 16]]],
 | 
			
		||||
         [[[23, 15],
 | 
			
		||||
           [30, 22]],
 | 
			
		||||
          [[14, 17],
 | 
			
		||||
           [15, 16]]]])
 | 
			
		||||
    chi2, p, dof, expected = chi2_contingency(obs)
 | 
			
		||||
    assert_approx_equal(chi2, 8.758, significant=4)
 | 
			
		||||
    assert_approx_equal(p, 0.6442, significant=4)
 | 
			
		||||
    assert_equal(dof, 11)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_chi2_contingency_g():
 | 
			
		||||
    c = np.array([[15, 60], [15, 90]])
 | 
			
		||||
    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=False)
 | 
			
		||||
    assert_allclose(g, 2*xlogy(c, c/e).sum())
 | 
			
		||||
 | 
			
		||||
    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=True)
 | 
			
		||||
    c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]])
 | 
			
		||||
    assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum())
 | 
			
		||||
 | 
			
		||||
    c = np.array([[10, 12, 10], [12, 10, 10]])
 | 
			
		||||
    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood')
 | 
			
		||||
    assert_allclose(g, 2*xlogy(c, c/e).sum())
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_chi2_contingency_bad_args():
 | 
			
		||||
    # Test that "bad" inputs raise a ValueError.
 | 
			
		||||
 | 
			
		||||
    # Negative value in the array of observed frequencies.
 | 
			
		||||
    obs = np.array([[-1, 10], [1, 2]])
 | 
			
		||||
    assert_raises(ValueError, chi2_contingency, obs)
 | 
			
		||||
 | 
			
		||||
    # The zeros in this will result in zeros in the array
 | 
			
		||||
    # of expected frequencies.
 | 
			
		||||
    obs = np.array([[0, 1], [0, 1]])
 | 
			
		||||
    assert_raises(ValueError, chi2_contingency, obs)
 | 
			
		||||
 | 
			
		||||
    # A degenerate case: `observed` has size 0.
 | 
			
		||||
    obs = np.empty((0, 8))
 | 
			
		||||
    assert_raises(ValueError, chi2_contingency, obs)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    run_module_suite()
 | 
			
		||||
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								@ -1,202 +0,0 @@
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
from wafo import stats
 | 
			
		||||
import numpy as np
 | 
			
		||||
from numpy.testing import assert_almost_equal, assert_, assert_raises, \
 | 
			
		||||
    assert_array_almost_equal, assert_array_almost_equal_nulp, run_module_suite
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_kde_1d():
 | 
			
		||||
    #some basic tests comparing to normal distribution
 | 
			
		||||
    np.random.seed(8765678)
 | 
			
		||||
    n_basesample = 500
 | 
			
		||||
    xn = np.random.randn(n_basesample)
 | 
			
		||||
    xnmean = xn.mean()
 | 
			
		||||
    xnstd = xn.std(ddof=1)
 | 
			
		||||
 | 
			
		||||
    # get kde for original sample
 | 
			
		||||
    gkde = stats.gaussian_kde(xn)
 | 
			
		||||
 | 
			
		||||
    # evaluate the density function for the kde for some points
 | 
			
		||||
    xs = np.linspace(-7,7,501)
 | 
			
		||||
    kdepdf = gkde.evaluate(xs)
 | 
			
		||||
    normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
 | 
			
		||||
    intervall = xs[1] - xs[0]
 | 
			
		||||
 | 
			
		||||
    assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
 | 
			
		||||
    prob1 = gkde.integrate_box_1d(xnmean, np.inf)
 | 
			
		||||
    prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
 | 
			
		||||
    assert_almost_equal(prob1, 0.5, decimal=1)
 | 
			
		||||
    assert_almost_equal(prob2, 0.5, decimal=1)
 | 
			
		||||
    assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
 | 
			
		||||
    assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
 | 
			
		||||
 | 
			
		||||
    assert_almost_equal(gkde.integrate_kde(gkde),
 | 
			
		||||
                        (kdepdf**2).sum()*intervall, decimal=2)
 | 
			
		||||
    assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
 | 
			
		||||
                        (kdepdf*normpdf).sum()*intervall, decimal=2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_kde_bandwidth_method():
 | 
			
		||||
    def scotts_factor(kde_obj):
 | 
			
		||||
        """Same as default, just check that it works."""
 | 
			
		||||
        return np.power(kde_obj.n, -1./(kde_obj.d+4))
 | 
			
		||||
 | 
			
		||||
    np.random.seed(8765678)
 | 
			
		||||
    n_basesample = 50
 | 
			
		||||
    xn = np.random.randn(n_basesample)
 | 
			
		||||
 | 
			
		||||
    # Default
 | 
			
		||||
    gkde = stats.gaussian_kde(xn)
 | 
			
		||||
    # Supply a callable
 | 
			
		||||
    gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
 | 
			
		||||
    # Supply a scalar
 | 
			
		||||
    gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
 | 
			
		||||
 | 
			
		||||
    xs = np.linspace(-7,7,51)
 | 
			
		||||
    kdepdf = gkde.evaluate(xs)
 | 
			
		||||
    kdepdf2 = gkde2.evaluate(xs)
 | 
			
		||||
    assert_almost_equal(kdepdf, kdepdf2)
 | 
			
		||||
    kdepdf3 = gkde3.evaluate(xs)
 | 
			
		||||
    assert_almost_equal(kdepdf, kdepdf3)
 | 
			
		||||
 | 
			
		||||
    assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Subclasses that should stay working (extracted from various sources).
 | 
			
		||||
# Unfortunately the earlier design of gaussian_kde made it necessary for users
 | 
			
		||||
# to create these kinds of subclasses, or call _compute_covariance() directly.
 | 
			
		||||
 | 
			
		||||
class _kde_subclass1(stats.gaussian_kde):
 | 
			
		||||
    def __init__(self, dataset):
 | 
			
		||||
        self.dataset = np.atleast_2d(dataset)
 | 
			
		||||
        self.d, self.n = self.dataset.shape
 | 
			
		||||
        self.covariance_factor = self.scotts_factor
 | 
			
		||||
        self._compute_covariance()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class _kde_subclass2(stats.gaussian_kde):
 | 
			
		||||
    def __init__(self, dataset):
 | 
			
		||||
        self.covariance_factor = self.scotts_factor
 | 
			
		||||
        super(_kde_subclass2, self).__init__(dataset)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class _kde_subclass3(stats.gaussian_kde):
 | 
			
		||||
    def __init__(self, dataset, covariance):
 | 
			
		||||
        self.covariance = covariance
 | 
			
		||||
        stats.gaussian_kde.__init__(self, dataset)
 | 
			
		||||
 | 
			
		||||
    def _compute_covariance(self):
 | 
			
		||||
        self.inv_cov = np.linalg.inv(self.covariance)
 | 
			
		||||
        self._norm_factor = np.sqrt(np.linalg.det(2*np.pi * self.covariance)) \
 | 
			
		||||
                                   * self.n
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class _kde_subclass4(stats.gaussian_kde):
 | 
			
		||||
    def covariance_factor(self):
 | 
			
		||||
        return 0.5 * self.silverman_factor()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_gaussian_kde_subclassing():
 | 
			
		||||
    x1 = np.array([-7, -5, 1, 4, 5], dtype=np.float)
 | 
			
		||||
    xs = np.linspace(-10, 10, num=50)
 | 
			
		||||
 | 
			
		||||
    # gaussian_kde itself
 | 
			
		||||
    kde = stats.gaussian_kde(x1)
 | 
			
		||||
    ys = kde(xs)
 | 
			
		||||
 | 
			
		||||
    # subclass 1
 | 
			
		||||
    kde1 = _kde_subclass1(x1)
 | 
			
		||||
    y1 = kde1(xs)
 | 
			
		||||
    assert_array_almost_equal_nulp(ys, y1, nulp=10)
 | 
			
		||||
 | 
			
		||||
    # subclass 2
 | 
			
		||||
    kde2 = _kde_subclass2(x1)
 | 
			
		||||
    y2 = kde2(xs)
 | 
			
		||||
    assert_array_almost_equal_nulp(ys, y2, nulp=10)
 | 
			
		||||
 | 
			
		||||
    # subclass 3
 | 
			
		||||
    kde3 = _kde_subclass3(x1, kde.covariance)
 | 
			
		||||
    y3 = kde3(xs)
 | 
			
		||||
    assert_array_almost_equal_nulp(ys, y3, nulp=10)
 | 
			
		||||
 | 
			
		||||
    # subclass 4
 | 
			
		||||
    kde4 = _kde_subclass4(x1)
 | 
			
		||||
    y4 = kde4(x1)
 | 
			
		||||
    y_expected = [0.06292987, 0.06346938, 0.05860291, 0.08657652, 0.07904017]
 | 
			
		||||
 | 
			
		||||
    assert_array_almost_equal(y_expected, y4, decimal=6)
 | 
			
		||||
 | 
			
		||||
    # Not a subclass, but check for use of _compute_covariance()
 | 
			
		||||
    kde5 = kde
 | 
			
		||||
    kde5.covariance_factor = lambda: kde.factor
 | 
			
		||||
    kde5._compute_covariance()
 | 
			
		||||
    y5 = kde5(xs)
 | 
			
		||||
    assert_array_almost_equal_nulp(ys, y5, nulp=10)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_gaussian_kde_covariance_caching():
 | 
			
		||||
    x1 = np.array([-7, -5, 1, 4, 5], dtype=np.float)
 | 
			
		||||
    xs = np.linspace(-10, 10, num=5)
 | 
			
		||||
    # These expected values are from scipy 0.10, before some changes to
 | 
			
		||||
    # gaussian_kde.  They were not compared with any external reference.
 | 
			
		||||
    y_expected = [0.02463386, 0.04689208, 0.05395444, 0.05337754, 0.01664475]
 | 
			
		||||
 | 
			
		||||
    # Set the bandwidth, then reset it to the default.
 | 
			
		||||
    kde = stats.gaussian_kde(x1)
 | 
			
		||||
    kde.set_bandwidth(bw_method=0.5)
 | 
			
		||||
    kde.set_bandwidth(bw_method='scott')
 | 
			
		||||
    y2 = kde(xs)
 | 
			
		||||
 | 
			
		||||
    assert_array_almost_equal(y_expected, y2, decimal=7)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_gaussian_kde_monkeypatch():
 | 
			
		||||
    """Ugly, but people may rely on this.  See scipy pull request 123,
 | 
			
		||||
    specifically the linked ML thread "Width of the Gaussian in stats.kde".
 | 
			
		||||
    If it is necessary to break this later on, that is to be discussed on ML.
 | 
			
		||||
    """
 | 
			
		||||
    x1 = np.array([-7, -5, 1, 4, 5], dtype=np.float)
 | 
			
		||||
    xs = np.linspace(-10, 10, num=50)
 | 
			
		||||
 | 
			
		||||
    # The old monkeypatched version to get at Silverman's Rule.
 | 
			
		||||
    kde = stats.gaussian_kde(x1)
 | 
			
		||||
    kde.covariance_factor = kde.silverman_factor
 | 
			
		||||
    kde._compute_covariance()
 | 
			
		||||
    y1 = kde(xs)
 | 
			
		||||
 | 
			
		||||
    # The new saner version.
 | 
			
		||||
    kde2 = stats.gaussian_kde(x1, bw_method='silverman')
 | 
			
		||||
    y2 = kde2(xs)
 | 
			
		||||
 | 
			
		||||
    assert_array_almost_equal_nulp(y1, y2, nulp=10)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_kde_integer_input():
 | 
			
		||||
    """Regression test for #1181."""
 | 
			
		||||
    x1 = np.arange(5)
 | 
			
		||||
    kde = stats.gaussian_kde(x1)
 | 
			
		||||
    y_expected = [0.13480721, 0.18222869, 0.19514935, 0.18222869, 0.13480721]
 | 
			
		||||
    assert_array_almost_equal(kde(x1), y_expected, decimal=6)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_pdf_logpdf():
 | 
			
		||||
    np.random.seed(1)
 | 
			
		||||
    n_basesample = 50
 | 
			
		||||
    xn = np.random.randn(n_basesample)
 | 
			
		||||
 | 
			
		||||
    # Default
 | 
			
		||||
    gkde = stats.gaussian_kde(xn)
 | 
			
		||||
 | 
			
		||||
    xs = np.linspace(-15, 12, 25)
 | 
			
		||||
    pdf = gkde.evaluate(xs)
 | 
			
		||||
    pdf2 = gkde.pdf(xs)
 | 
			
		||||
    assert_almost_equal(pdf, pdf2, decimal=12)
 | 
			
		||||
 | 
			
		||||
    logpdf = np.log(pdf)
 | 
			
		||||
    logpdf2 = gkde.logpdf(xs)
 | 
			
		||||
    assert_almost_equal(logpdf, logpdf2, decimal=12)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    run_module_suite()
 | 
			
		||||
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								@ -1,107 +0,0 @@
 | 
			
		||||
# pylint: disable-msg=W0611, W0612, W0511,R0201
 | 
			
		||||
"""Tests suite for maskedArray statistics.
 | 
			
		||||
 | 
			
		||||
:author: Pierre Gerard-Marchant
 | 
			
		||||
:contact: pierregm_at_uga_dot_edu
 | 
			
		||||
"""
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
__author__ = "Pierre GF Gerard-Marchant ($Author: backtopop $)"
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
 | 
			
		||||
import numpy.ma as ma
 | 
			
		||||
 | 
			
		||||
import wafo.stats.mstats as ms
 | 
			
		||||
#import wafo.stats.mmorestats as mms
 | 
			
		||||
 | 
			
		||||
from numpy.testing import TestCase, run_module_suite, assert_equal, \
 | 
			
		||||
    assert_almost_equal, assert_
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestMisc(TestCase):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        TestCase.__init__(self, *args, **kwargs)
 | 
			
		||||
 | 
			
		||||
    def test_mjci(self):
 | 
			
		||||
        "Tests the Marits-Jarrett estimator"
 | 
			
		||||
        data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
 | 
			
		||||
                          296,299,306,376,428,515,666,1310,2611])
 | 
			
		||||
        assert_almost_equal(ms.mjci(data),[55.76819,45.84028,198.87875],5)
 | 
			
		||||
 | 
			
		||||
    def test_trimmedmeanci(self):
 | 
			
		||||
        "Tests the confidence intervals of the trimmed mean."
 | 
			
		||||
        data = ma.array([545,555,558,572,575,576,578,580,
 | 
			
		||||
                         594,605,635,651,653,661,666])
 | 
			
		||||
        assert_almost_equal(ms.trimmed_mean(data,0.2), 596.2, 1)
 | 
			
		||||
        assert_equal(np.round(ms.trimmed_mean_ci(data,(0.2,0.2)),1),
 | 
			
		||||
                     [561.8, 630.6])
 | 
			
		||||
 | 
			
		||||
    def test_idealfourths(self):
 | 
			
		||||
        "Tests ideal-fourths"
 | 
			
		||||
        test = np.arange(100)
 | 
			
		||||
        assert_almost_equal(np.asarray(ms.idealfourths(test)),
 | 
			
		||||
                            [24.416667,74.583333],6)
 | 
			
		||||
        test_2D = test.repeat(3).reshape(-1,3)
 | 
			
		||||
        assert_almost_equal(ms.idealfourths(test_2D, axis=0),
 | 
			
		||||
                            [[24.416667,24.416667,24.416667],
 | 
			
		||||
                             [74.583333,74.583333,74.583333]],6)
 | 
			
		||||
        assert_almost_equal(ms.idealfourths(test_2D, axis=1),
 | 
			
		||||
                            test.repeat(2).reshape(-1,2))
 | 
			
		||||
        test = [0,0]
 | 
			
		||||
        _result = ms.idealfourths(test)
 | 
			
		||||
        assert_(np.isnan(_result).all())
 | 
			
		||||
 | 
			
		||||
#..............................................................................
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestQuantiles(TestCase):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        TestCase.__init__(self, *args, **kwargs)
 | 
			
		||||
 | 
			
		||||
    def test_hdquantiles(self):
 | 
			
		||||
        data = [0.706560797,0.727229578,0.990399276,0.927065621,0.158953014,
 | 
			
		||||
            0.887764025,0.239407086,0.349638551,0.972791145,0.149789972,
 | 
			
		||||
            0.936947700,0.132359948,0.046041972,0.641675031,0.945530547,
 | 
			
		||||
            0.224218684,0.771450991,0.820257774,0.336458052,0.589113496,
 | 
			
		||||
            0.509736129,0.696838829,0.491323573,0.622767425,0.775189248,
 | 
			
		||||
            0.641461450,0.118455200,0.773029450,0.319280007,0.752229111,
 | 
			
		||||
            0.047841438,0.466295911,0.583850781,0.840581845,0.550086491,
 | 
			
		||||
            0.466470062,0.504765074,0.226855960,0.362641207,0.891620942,
 | 
			
		||||
            0.127898691,0.490094097,0.044882048,0.041441695,0.317976349,
 | 
			
		||||
            0.504135618,0.567353033,0.434617473,0.636243375,0.231803616,
 | 
			
		||||
            0.230154113,0.160011327,0.819464108,0.854706985,0.438809221,
 | 
			
		||||
            0.487427267,0.786907310,0.408367937,0.405534192,0.250444460,
 | 
			
		||||
            0.995309248,0.144389588,0.739947527,0.953543606,0.680051621,
 | 
			
		||||
            0.388382017,0.863530727,0.006514031,0.118007779,0.924024803,
 | 
			
		||||
            0.384236354,0.893687694,0.626534881,0.473051932,0.750134705,
 | 
			
		||||
            0.241843555,0.432947602,0.689538104,0.136934797,0.150206859,
 | 
			
		||||
            0.474335206,0.907775349,0.525869295,0.189184225,0.854284286,
 | 
			
		||||
            0.831089744,0.251637345,0.587038213,0.254475554,0.237781276,
 | 
			
		||||
            0.827928620,0.480283781,0.594514455,0.213641488,0.024194386,
 | 
			
		||||
            0.536668589,0.699497811,0.892804071,0.093835427,0.731107772]
 | 
			
		||||
        #
 | 
			
		||||
        assert_almost_equal(ms.hdquantiles(data,[0., 1.]),
 | 
			
		||||
                            [0.006514031, 0.995309248])
 | 
			
		||||
        hdq = ms.hdquantiles(data,[0.25, 0.5, 0.75])
 | 
			
		||||
        assert_almost_equal(hdq, [0.253210762, 0.512847491, 0.762232442,])
 | 
			
		||||
        hdq = ms.hdquantiles_sd(data,[0.25, 0.5, 0.75])
 | 
			
		||||
        assert_almost_equal(hdq, [0.03786954, 0.03805389, 0.03800152,], 4)
 | 
			
		||||
        #
 | 
			
		||||
        data = np.array(data).reshape(10,10)
 | 
			
		||||
        hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0)
 | 
			
		||||
        assert_almost_equal(hdq[:,0], ms.hdquantiles(data[:,0],[0.25,0.5,0.75]))
 | 
			
		||||
        assert_almost_equal(hdq[:,-1], ms.hdquantiles(data[:,-1],[0.25,0.5,0.75]))
 | 
			
		||||
        hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0,var=True)
 | 
			
		||||
        assert_almost_equal(hdq[...,0],
 | 
			
		||||
                            ms.hdquantiles(data[:,0],[0.25,0.5,0.75],var=True))
 | 
			
		||||
        assert_almost_equal(hdq[...,-1],
 | 
			
		||||
                            ms.hdquantiles(data[:,-1],[0.25,0.5,0.75], var=True))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    run_module_suite()
 | 
			
		||||
@ -1,485 +0,0 @@
 | 
			
		||||
"""
 | 
			
		||||
Test functions for multivariate normal distributions.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
from numpy.testing import (
 | 
			
		||||
    assert_allclose,
 | 
			
		||||
    assert_almost_equal,
 | 
			
		||||
    assert_array_almost_equal,
 | 
			
		||||
    assert_equal,
 | 
			
		||||
    assert_raises,
 | 
			
		||||
    run_module_suite,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
import numpy
 | 
			
		||||
import numpy as np
 | 
			
		||||
 | 
			
		||||
import scipy.linalg
 | 
			
		||||
from wafo.stats._multivariate import _PSD, _lnB
 | 
			
		||||
from wafo.stats import multivariate_normal
 | 
			
		||||
from wafo.stats import dirichlet, beta
 | 
			
		||||
from wafo.stats import norm
 | 
			
		||||
 | 
			
		||||
from scipy.integrate import romb
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_input_shape():
 | 
			
		||||
    mu = np.arange(3)
 | 
			
		||||
    cov = np.identity(2)
 | 
			
		||||
    assert_raises(ValueError, multivariate_normal.pdf, (0, 1), mu, cov)
 | 
			
		||||
    assert_raises(ValueError, multivariate_normal.pdf, (0, 1, 2), mu, cov)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_scalar_values():
 | 
			
		||||
    np.random.seed(1234)
 | 
			
		||||
 | 
			
		||||
    # When evaluated on scalar data, the pdf should return a scalar
 | 
			
		||||
    x, mean, cov = 1.5, 1.7, 2.5
 | 
			
		||||
    pdf = multivariate_normal.pdf(x, mean, cov)
 | 
			
		||||
    assert_equal(pdf.ndim, 0)
 | 
			
		||||
 | 
			
		||||
    # When evaluated on a single vector, the pdf should return a scalar
 | 
			
		||||
    x = np.random.randn(5)
 | 
			
		||||
    mean = np.random.randn(5)
 | 
			
		||||
    cov = np.abs(np.random.randn(5))  # Diagonal values for cov. matrix
 | 
			
		||||
    pdf = multivariate_normal.pdf(x, mean, cov)
 | 
			
		||||
    assert_equal(pdf.ndim, 0)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_logpdf():
 | 
			
		||||
    # Check that the log of the pdf is in fact the logpdf
 | 
			
		||||
    np.random.seed(1234)
 | 
			
		||||
    x = np.random.randn(5)
 | 
			
		||||
    mean = np.random.randn(5)
 | 
			
		||||
    cov = np.abs(np.random.randn(5))
 | 
			
		||||
    d1 = multivariate_normal.logpdf(x, mean, cov)
 | 
			
		||||
    d2 = multivariate_normal.pdf(x, mean, cov)
 | 
			
		||||
    assert_allclose(d1, np.log(d2))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_rank():
 | 
			
		||||
    # Check that the rank is detected correctly.
 | 
			
		||||
    np.random.seed(1234)
 | 
			
		||||
    n = 4
 | 
			
		||||
    mean = np.random.randn(n)
 | 
			
		||||
    for expected_rank in range(1, n + 1):
 | 
			
		||||
        s = np.random.randn(n, expected_rank)
 | 
			
		||||
        cov = np.dot(s, s.T)
 | 
			
		||||
        distn = multivariate_normal(mean, cov, allow_singular=True)
 | 
			
		||||
        assert_equal(distn.cov_info.rank, expected_rank)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _sample_orthonormal_matrix(n):
 | 
			
		||||
    M = np.random.randn(n, n)
 | 
			
		||||
    u, s, v = scipy.linalg.svd(M)
 | 
			
		||||
    return u
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_degenerate_distributions():
 | 
			
		||||
    for n in range(1, 5):
 | 
			
		||||
        x = np.random.randn(n)
 | 
			
		||||
        for k in range(1, n + 1):
 | 
			
		||||
            # Sample a small covariance matrix.
 | 
			
		||||
            s = np.random.randn(k, k)
 | 
			
		||||
            cov_kk = np.dot(s, s.T)
 | 
			
		||||
 | 
			
		||||
            # Embed the small covariance matrix into a larger low rank matrix.
 | 
			
		||||
            cov_nn = np.zeros((n, n))
 | 
			
		||||
            cov_nn[:k, :k] = cov_kk
 | 
			
		||||
 | 
			
		||||
            # Define a rotation of the larger low rank matrix.
 | 
			
		||||
            u = _sample_orthonormal_matrix(n)
 | 
			
		||||
            cov_rr = np.dot(u, np.dot(cov_nn, u.T))
 | 
			
		||||
            y = np.dot(u, x)
 | 
			
		||||
 | 
			
		||||
            # Check some identities.
 | 
			
		||||
            distn_kk = multivariate_normal(np.zeros(k), cov_kk,
 | 
			
		||||
                                           allow_singular=True)
 | 
			
		||||
            distn_nn = multivariate_normal(np.zeros(n), cov_nn,
 | 
			
		||||
                                           allow_singular=True)
 | 
			
		||||
            distn_rr = multivariate_normal(np.zeros(n), cov_rr,
 | 
			
		||||
                                           allow_singular=True)
 | 
			
		||||
            assert_equal(distn_kk.cov_info.rank, k)
 | 
			
		||||
            assert_equal(distn_nn.cov_info.rank, k)
 | 
			
		||||
            assert_equal(distn_rr.cov_info.rank, k)
 | 
			
		||||
            pdf_kk = distn_kk.pdf(x[:k])
 | 
			
		||||
            pdf_nn = distn_nn.pdf(x)
 | 
			
		||||
            pdf_rr = distn_rr.pdf(y)
 | 
			
		||||
            assert_allclose(pdf_kk, pdf_nn)
 | 
			
		||||
            assert_allclose(pdf_kk, pdf_rr)
 | 
			
		||||
            logpdf_kk = distn_kk.logpdf(x[:k])
 | 
			
		||||
            logpdf_nn = distn_nn.logpdf(x)
 | 
			
		||||
            logpdf_rr = distn_rr.logpdf(y)
 | 
			
		||||
            assert_allclose(logpdf_kk, logpdf_nn)
 | 
			
		||||
            assert_allclose(logpdf_kk, logpdf_rr)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_large_pseudo_determinant():
 | 
			
		||||
    # Check that large pseudo-determinants are handled appropriately.
 | 
			
		||||
 | 
			
		||||
    # Construct a singular diagonal covariance matrix
 | 
			
		||||
    # whose pseudo determinant overflows double precision.
 | 
			
		||||
    large_total_log = 1000.0
 | 
			
		||||
    npos = 100
 | 
			
		||||
    nzero = 2
 | 
			
		||||
    large_entry = np.exp(large_total_log / npos)
 | 
			
		||||
    n = npos + nzero
 | 
			
		||||
    cov = np.zeros((n, n), dtype=float)
 | 
			
		||||
    np.fill_diagonal(cov, large_entry)
 | 
			
		||||
    cov[-nzero:, -nzero:] = 0
 | 
			
		||||
 | 
			
		||||
    # Check some determinants.
 | 
			
		||||
    assert_equal(scipy.linalg.det(cov), 0)
 | 
			
		||||
    assert_equal(scipy.linalg.det(cov[:npos, :npos]), np.inf)
 | 
			
		||||
 | 
			
		||||
    # np.linalg.slogdet is only available in numpy 1.6+
 | 
			
		||||
    # but scipy currently supports numpy 1.5.1.
 | 
			
		||||
    # assert_allclose(np.linalg.slogdet(cov[:npos, :npos]),
 | 
			
		||||
    #                 (1, large_total_log))
 | 
			
		||||
 | 
			
		||||
    # Check the pseudo-determinant.
 | 
			
		||||
    psd = _PSD(cov)
 | 
			
		||||
    assert_allclose(psd.log_pdet, large_total_log)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_broadcasting():
 | 
			
		||||
    np.random.seed(1234)
 | 
			
		||||
    n = 4
 | 
			
		||||
 | 
			
		||||
    # Construct a random covariance matrix.
 | 
			
		||||
    data = np.random.randn(n, n)
 | 
			
		||||
    cov = np.dot(data, data.T)
 | 
			
		||||
    mean = np.random.randn(n)
 | 
			
		||||
 | 
			
		||||
    # Construct an ndarray which can be interpreted as
 | 
			
		||||
    # a 2x3 array whose elements are random data vectors.
 | 
			
		||||
    X = np.random.randn(2, 3, n)
 | 
			
		||||
 | 
			
		||||
    # Check that multiple data points can be evaluated at once.
 | 
			
		||||
    for i in range(2):
 | 
			
		||||
        for j in range(3):
 | 
			
		||||
            actual = multivariate_normal.pdf(X[i, j], mean, cov)
 | 
			
		||||
            desired = multivariate_normal.pdf(X, mean, cov)[i, j]
 | 
			
		||||
            assert_allclose(actual, desired)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_normal_1D():
 | 
			
		||||
    # The probability density function for a 1D normal variable should
 | 
			
		||||
    # agree with the standard normal distribution in scipy.stats.distributions
 | 
			
		||||
    x = np.linspace(0, 2, 10)
 | 
			
		||||
    mean, cov = 1.2, 0.9
 | 
			
		||||
    scale = cov**0.5
 | 
			
		||||
    d1 = norm.pdf(x, mean, scale)
 | 
			
		||||
    d2 = multivariate_normal.pdf(x, mean, cov)
 | 
			
		||||
    assert_allclose(d1, d2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_marginalization():
 | 
			
		||||
    # Integrating out one of the variables of a 2D Gaussian should
 | 
			
		||||
    # yield a 1D Gaussian
 | 
			
		||||
    mean = np.array([2.5, 3.5])
 | 
			
		||||
    cov = np.array([[.5, 0.2], [0.2, .6]])
 | 
			
		||||
    n = 2 ** 8 + 1  # Number of samples
 | 
			
		||||
    delta = 6 / (n - 1)  # Grid spacing
 | 
			
		||||
 | 
			
		||||
    v = np.linspace(0, 6, n)
 | 
			
		||||
    xv, yv = np.meshgrid(v, v)
 | 
			
		||||
    pos = np.empty((n, n, 2))
 | 
			
		||||
    pos[:, :, 0] = xv
 | 
			
		||||
    pos[:, :, 1] = yv
 | 
			
		||||
    pdf = multivariate_normal.pdf(pos, mean, cov)
 | 
			
		||||
 | 
			
		||||
    # Marginalize over x and y axis
 | 
			
		||||
    margin_x = romb(pdf, delta, axis=0)
 | 
			
		||||
    margin_y = romb(pdf, delta, axis=1)
 | 
			
		||||
 | 
			
		||||
    # Compare with standard normal distribution
 | 
			
		||||
    gauss_x = norm.pdf(v, loc=mean[0], scale=cov[0, 0] ** 0.5)
 | 
			
		||||
    gauss_y = norm.pdf(v, loc=mean[1], scale=cov[1, 1] ** 0.5)
 | 
			
		||||
    assert_allclose(margin_x, gauss_x, rtol=1e-2, atol=1e-2)
 | 
			
		||||
    assert_allclose(margin_y, gauss_y, rtol=1e-2, atol=1e-2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_frozen():
 | 
			
		||||
    # The frozen distribution should agree with the regular one
 | 
			
		||||
    np.random.seed(1234)
 | 
			
		||||
    x = np.random.randn(5)
 | 
			
		||||
    mean = np.random.randn(5)
 | 
			
		||||
    cov = np.abs(np.random.randn(5))
 | 
			
		||||
    norm_frozen = multivariate_normal(mean, cov)
 | 
			
		||||
    assert_allclose(norm_frozen.pdf(x), multivariate_normal.pdf(x, mean, cov))
 | 
			
		||||
    assert_allclose(norm_frozen.logpdf(x),
 | 
			
		||||
                    multivariate_normal.logpdf(x, mean, cov))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_pseudodet_pinv():
 | 
			
		||||
    # Make sure that pseudo-inverse and pseudo-det agree on cutoff
 | 
			
		||||
 | 
			
		||||
    # Assemble random covariance matrix with large and small eigenvalues
 | 
			
		||||
    np.random.seed(1234)
 | 
			
		||||
    n = 7
 | 
			
		||||
    x = np.random.randn(n, n)
 | 
			
		||||
    cov = np.dot(x, x.T)
 | 
			
		||||
    s, u = scipy.linalg.eigh(cov)
 | 
			
		||||
    s = 0.5 * np.ones(n)
 | 
			
		||||
    s[0] = 1.0
 | 
			
		||||
    s[-1] = 1e-7
 | 
			
		||||
    cov = np.dot(u, np.dot(np.diag(s), u.T))
 | 
			
		||||
 | 
			
		||||
    # Set cond so that the lowest eigenvalue is below the cutoff
 | 
			
		||||
    cond = 1e-5
 | 
			
		||||
    psd = _PSD(cov, cond=cond)
 | 
			
		||||
    psd_pinv = _PSD(psd.pinv, cond=cond)
 | 
			
		||||
 | 
			
		||||
    # Check that the log pseudo-determinant agrees with the sum
 | 
			
		||||
    # of the logs of all but the smallest eigenvalue
 | 
			
		||||
    assert_allclose(psd.log_pdet, np.sum(np.log(s[:-1])))
 | 
			
		||||
    # Check that the pseudo-determinant of the pseudo-inverse
 | 
			
		||||
    # agrees with 1 / pseudo-determinant
 | 
			
		||||
    assert_allclose(-psd.log_pdet, psd_pinv.log_pdet)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_exception_nonsquare_cov():
 | 
			
		||||
    cov = [[1, 2, 3], [4, 5, 6]]
 | 
			
		||||
    assert_raises(ValueError, _PSD, cov)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_exception_nonfinite_cov():
 | 
			
		||||
    cov_nan = [[1, 0], [0, np.nan]]
 | 
			
		||||
    assert_raises(ValueError, _PSD, cov_nan)
 | 
			
		||||
    cov_inf = [[1, 0], [0, np.inf]]
 | 
			
		||||
    assert_raises(ValueError, _PSD, cov_inf)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_exception_non_psd_cov():
 | 
			
		||||
    cov = [[1, 0], [0, -1]]
 | 
			
		||||
    assert_raises(ValueError, _PSD, cov)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_exception_singular_cov():
 | 
			
		||||
    np.random.seed(1234)
 | 
			
		||||
    x = np.random.randn(5)
 | 
			
		||||
    mean = np.random.randn(5)
 | 
			
		||||
    cov = np.ones((5, 5))
 | 
			
		||||
    e = np.linalg.LinAlgError
 | 
			
		||||
    assert_raises(e, multivariate_normal, mean, cov)
 | 
			
		||||
    assert_raises(e, multivariate_normal.pdf, x, mean, cov)
 | 
			
		||||
    assert_raises(e, multivariate_normal.logpdf, x, mean, cov)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_R_values():
 | 
			
		||||
    # Compare the multivariate pdf with some values precomputed
 | 
			
		||||
    # in R version 3.0.1 (2013-05-16) on Mac OS X 10.6.
 | 
			
		||||
 | 
			
		||||
    # The values below were generated by the following R-script:
 | 
			
		||||
    # > library(mnormt)
 | 
			
		||||
    # > x <- seq(0, 2, length=5)
 | 
			
		||||
    # > y <- 3*x - 2
 | 
			
		||||
    # > z <- x + cos(y)
 | 
			
		||||
    # > mu <- c(1, 3, 2)
 | 
			
		||||
    # > Sigma <- matrix(c(1,2,0,2,5,0.5,0,0.5,3), 3, 3)
 | 
			
		||||
    # > r_pdf <- dmnorm(cbind(x,y,z), mu, Sigma)
 | 
			
		||||
    r_pdf = np.array([0.0002214706, 0.0013819953, 0.0049138692,
 | 
			
		||||
                      0.0103803050, 0.0140250800])
 | 
			
		||||
 | 
			
		||||
    x = np.linspace(0, 2, 5)
 | 
			
		||||
    y = 3 * x - 2
 | 
			
		||||
    z = x + np.cos(y)
 | 
			
		||||
    r = np.array([x, y, z]).T
 | 
			
		||||
 | 
			
		||||
    mean = np.array([1, 3, 2], 'd')
 | 
			
		||||
    cov = np.array([[1, 2, 0], [2, 5, .5], [0, .5, 3]], 'd')
 | 
			
		||||
 | 
			
		||||
    pdf = multivariate_normal.pdf(r, mean, cov)
 | 
			
		||||
    assert_allclose(pdf, r_pdf, atol=1e-10)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_multivariate_normal_rvs_zero_covariance():
 | 
			
		||||
    mean = np.zeros(2)
 | 
			
		||||
    covariance = np.zeros((2, 2))
 | 
			
		||||
    model = multivariate_normal(mean, covariance, allow_singular=True)
 | 
			
		||||
    sample = model.rvs()
 | 
			
		||||
    assert_equal(sample, [0, 0])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_rvs_shape():
 | 
			
		||||
    # Check that rvs parses the mean and covariance correctly, and returns
 | 
			
		||||
    # an array of the right shape
 | 
			
		||||
    N = 300
 | 
			
		||||
    d = 4
 | 
			
		||||
    sample = multivariate_normal.rvs(mean=np.zeros(d), cov=1, size=N)
 | 
			
		||||
    assert_equal(sample.shape, (N, d))
 | 
			
		||||
 | 
			
		||||
    sample = multivariate_normal.rvs(mean=None,
 | 
			
		||||
                                     cov=np.array([[2, .1], [.1, 1]]),
 | 
			
		||||
                                     size=N)
 | 
			
		||||
    assert_equal(sample.shape, (N, 2))
 | 
			
		||||
 | 
			
		||||
    u = multivariate_normal(mean=0, cov=1)
 | 
			
		||||
    sample = u.rvs(N)
 | 
			
		||||
    assert_equal(sample.shape, (N, ))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_large_sample():
 | 
			
		||||
    # Generate large sample and compare sample mean and sample covariance
 | 
			
		||||
    # with mean and covariance matrix.
 | 
			
		||||
 | 
			
		||||
    np.random.seed(2846)
 | 
			
		||||
 | 
			
		||||
    n = 3
 | 
			
		||||
    mean = np.random.randn(n)
 | 
			
		||||
    M = np.random.randn(n, n)
 | 
			
		||||
    cov = np.dot(M, M.T)
 | 
			
		||||
    size = 5000
 | 
			
		||||
 | 
			
		||||
    sample = multivariate_normal.rvs(mean, cov, size)
 | 
			
		||||
 | 
			
		||||
    assert_allclose(numpy.cov(sample.T), cov, rtol=1e-1)
 | 
			
		||||
    assert_allclose(sample.mean(0), mean, rtol=1e-1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_entropy():
 | 
			
		||||
    np.random.seed(2846)
 | 
			
		||||
 | 
			
		||||
    n = 3
 | 
			
		||||
    mean = np.random.randn(n)
 | 
			
		||||
    M = np.random.randn(n, n)
 | 
			
		||||
    cov = np.dot(M, M.T)
 | 
			
		||||
 | 
			
		||||
    rv = multivariate_normal(mean, cov)
 | 
			
		||||
 | 
			
		||||
    # Check that frozen distribution agrees with entropy function
 | 
			
		||||
    assert_almost_equal(rv.entropy(), multivariate_normal.entropy(mean, cov))
 | 
			
		||||
    # Compare entropy with manually computed expression involving
 | 
			
		||||
    # the sum of the logs of the eigenvalues of the covariance matrix
 | 
			
		||||
    eigs = np.linalg.eig(cov)[0]
 | 
			
		||||
    desired = 1 / 2 * (n * (np.log(2 * np.pi) + 1) + np.sum(np.log(eigs)))
 | 
			
		||||
    assert_almost_equal(desired, rv.entropy())
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_lnB():
 | 
			
		||||
    alpha = np.array([1, 1, 1])
 | 
			
		||||
    desired = .5  # e^lnB = 1/2 for [1, 1, 1]
 | 
			
		||||
 | 
			
		||||
    assert_almost_equal(np.exp(_lnB(alpha)), desired)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_frozen_dirichlet():
 | 
			
		||||
    np.random.seed(2846)
 | 
			
		||||
 | 
			
		||||
    n = np.random.randint(1, 32)
 | 
			
		||||
    alpha = np.random.uniform(10e-10, 100, n)
 | 
			
		||||
 | 
			
		||||
    d = dirichlet(alpha)
 | 
			
		||||
 | 
			
		||||
    assert_equal(d.var(), dirichlet.var(alpha))
 | 
			
		||||
    assert_equal(d.mean(), dirichlet.mean(alpha))
 | 
			
		||||
    assert_equal(d.entropy(), dirichlet.entropy(alpha))
 | 
			
		||||
    num_tests = 10
 | 
			
		||||
    for i in range(num_tests):
 | 
			
		||||
        x = np.random.uniform(10e-10, 100, n)
 | 
			
		||||
        x /= np.sum(x)
 | 
			
		||||
        assert_equal(d.pdf(x[:-1]), dirichlet.pdf(x[:-1], alpha))
 | 
			
		||||
        assert_equal(d.logpdf(x[:-1]), dirichlet.logpdf(x[:-1], alpha))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_simple_values():
 | 
			
		||||
    alpha = np.array([1, 1])
 | 
			
		||||
    d = dirichlet(alpha)
 | 
			
		||||
 | 
			
		||||
    assert_almost_equal(d.mean(), 0.5)
 | 
			
		||||
    assert_almost_equal(d.var(), 1. / 12.)
 | 
			
		||||
 | 
			
		||||
    b = beta(1, 1)
 | 
			
		||||
    assert_almost_equal(d.mean(), b.mean())
 | 
			
		||||
    assert_almost_equal(d.var(), b.var())
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_K_and_K_minus_1_calls_equal():
 | 
			
		||||
    # Test that calls with K and K-1 entries yield the same results.
 | 
			
		||||
 | 
			
		||||
    np.random.seed(2846)
 | 
			
		||||
 | 
			
		||||
    n = np.random.randint(1, 32)
 | 
			
		||||
    alpha = np.random.uniform(10e-10, 100, n)
 | 
			
		||||
 | 
			
		||||
    d = dirichlet(alpha)
 | 
			
		||||
    num_tests = 10
 | 
			
		||||
    for i in range(num_tests):
 | 
			
		||||
        x = np.random.uniform(10e-10, 100, n)
 | 
			
		||||
        x /= np.sum(x)
 | 
			
		||||
        assert_almost_equal(d.pdf(x[:-1]), d.pdf(x))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_multiple_entry_calls():
 | 
			
		||||
    # Test that calls with multiple x vectors as matrix work
 | 
			
		||||
 | 
			
		||||
    np.random.seed(2846)
 | 
			
		||||
 | 
			
		||||
    n = np.random.randint(1, 32)
 | 
			
		||||
    alpha = np.random.uniform(10e-10, 100, n)
 | 
			
		||||
    d = dirichlet(alpha)
 | 
			
		||||
 | 
			
		||||
    num_tests = 10
 | 
			
		||||
    num_multiple = 5
 | 
			
		||||
    xm = None
 | 
			
		||||
    for i in range(num_tests):
 | 
			
		||||
        for m in range(num_multiple):
 | 
			
		||||
            x = np.random.uniform(10e-10, 100, n)
 | 
			
		||||
            x /= np.sum(x)
 | 
			
		||||
            if xm is not None:
 | 
			
		||||
                xm = np.vstack((xm, x))
 | 
			
		||||
            else:
 | 
			
		||||
                xm = x
 | 
			
		||||
        rm = d.pdf(xm.T)
 | 
			
		||||
        rs = None
 | 
			
		||||
        for xs in xm:
 | 
			
		||||
            r = d.pdf(xs)
 | 
			
		||||
            if rs is not None:
 | 
			
		||||
                rs = np.append(rs, r)
 | 
			
		||||
            else:
 | 
			
		||||
                rs = r
 | 
			
		||||
        assert_array_almost_equal(rm, rs)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_2D_dirichlet_is_beta():
 | 
			
		||||
    np.random.seed(2846)
 | 
			
		||||
 | 
			
		||||
    alpha = np.random.uniform(10e-10, 100, 2)
 | 
			
		||||
    d = dirichlet(alpha)
 | 
			
		||||
    b = beta(alpha[0], alpha[1])
 | 
			
		||||
 | 
			
		||||
    num_tests = 10
 | 
			
		||||
    for i in range(num_tests):
 | 
			
		||||
        x = np.random.uniform(10e-10, 100, 2)
 | 
			
		||||
        x /= np.sum(x)
 | 
			
		||||
        assert_almost_equal(b.pdf(x), d.pdf([x]))
 | 
			
		||||
 | 
			
		||||
    assert_almost_equal(b.mean(), d.mean()[0])
 | 
			
		||||
    assert_almost_equal(b.var(), d.var()[0])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_dimensions_mismatch():
 | 
			
		||||
    # Regression test for GH #3493. Check that setting up a PDF with a mean of
 | 
			
		||||
    # length M and a covariance matrix of size (N, N), where M != N, raises a
 | 
			
		||||
    # ValueError with an informative error message.
 | 
			
		||||
 | 
			
		||||
    mu = np.array([0.0, 0.0])
 | 
			
		||||
    sigma = np.array([[1.0]])
 | 
			
		||||
 | 
			
		||||
    assert_raises(ValueError, multivariate_normal, mu, sigma)
 | 
			
		||||
 | 
			
		||||
    # A simple check that the right error message was passed along. Checking
 | 
			
		||||
    # that the entire message is there, word for word, would be somewhat
 | 
			
		||||
    # fragile, so we just check for the leading part.
 | 
			
		||||
    try:
 | 
			
		||||
        multivariate_normal(mu, sigma)
 | 
			
		||||
    except ValueError as e:
 | 
			
		||||
        msg = "Dimension mismatch"
 | 
			
		||||
        assert_equal(str(e)[:len(msg)], msg)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    run_module_suite()
 | 
			
		||||
@ -1,193 +0,0 @@
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
from numpy.testing import TestCase, run_module_suite, assert_equal, \
 | 
			
		||||
    assert_array_equal
 | 
			
		||||
 | 
			
		||||
from wafo.stats import rankdata, tiecorrect
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestTieCorrect(TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_empty(self):
 | 
			
		||||
        """An empty array requires no correction, should return 1.0."""
 | 
			
		||||
        ranks = np.array([], dtype=np.float64)
 | 
			
		||||
        c = tiecorrect(ranks)
 | 
			
		||||
        assert_equal(c, 1.0)
 | 
			
		||||
 | 
			
		||||
    def test_one(self):
 | 
			
		||||
        """A single element requires no correction, should return 1.0."""
 | 
			
		||||
        ranks = np.array([1.0], dtype=np.float64)
 | 
			
		||||
        c = tiecorrect(ranks)
 | 
			
		||||
        assert_equal(c, 1.0)
 | 
			
		||||
 | 
			
		||||
    def test_no_correction(self):
 | 
			
		||||
        """Arrays with no ties require no correction."""
 | 
			
		||||
        ranks = np.arange(2.0)
 | 
			
		||||
        c = tiecorrect(ranks)
 | 
			
		||||
        assert_equal(c, 1.0)
 | 
			
		||||
        ranks = np.arange(3.0)
 | 
			
		||||
        c = tiecorrect(ranks)
 | 
			
		||||
        assert_equal(c, 1.0)
 | 
			
		||||
 | 
			
		||||
    def test_basic(self):
 | 
			
		||||
        """Check a few basic examples of the tie correction factor."""
 | 
			
		||||
        # One tie of two elements
 | 
			
		||||
        ranks = np.array([1.0, 2.5, 2.5])
 | 
			
		||||
        c = tiecorrect(ranks)
 | 
			
		||||
        T = 2.0
 | 
			
		||||
        N = ranks.size
 | 
			
		||||
        expected = 1.0 - (T**3 - T) / (N**3 - N)
 | 
			
		||||
        assert_equal(c, expected)
 | 
			
		||||
 | 
			
		||||
        # One tie of two elements (same as above, but tie is not at the end)
 | 
			
		||||
        ranks = np.array([1.5, 1.5, 3.0])
 | 
			
		||||
        c = tiecorrect(ranks)
 | 
			
		||||
        T = 2.0
 | 
			
		||||
        N = ranks.size
 | 
			
		||||
        expected = 1.0 - (T**3 - T) / (N**3 - N)
 | 
			
		||||
        assert_equal(c, expected)
 | 
			
		||||
 | 
			
		||||
        # One tie of three elements
 | 
			
		||||
        ranks = np.array([1.0, 3.0, 3.0, 3.0])
 | 
			
		||||
        c = tiecorrect(ranks)
 | 
			
		||||
        T = 3.0
 | 
			
		||||
        N = ranks.size
 | 
			
		||||
        expected = 1.0 - (T**3 - T) / (N**3 - N)
 | 
			
		||||
        assert_equal(c, expected)
 | 
			
		||||
 | 
			
		||||
        # Two ties, lengths 2 and 3.
 | 
			
		||||
        ranks = np.array([1.5, 1.5, 4.0, 4.0, 4.0])
 | 
			
		||||
        c = tiecorrect(ranks)
 | 
			
		||||
        T1 = 2.0
 | 
			
		||||
        T2 = 3.0
 | 
			
		||||
        N = ranks.size
 | 
			
		||||
        expected = 1.0 - ((T1**3 - T1) + (T2**3 - T2)) / (N**3 - N)
 | 
			
		||||
        assert_equal(c, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestRankData(TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_empty(self):
 | 
			
		||||
        """stats.rankdata([]) should return an empty array."""
 | 
			
		||||
        a = np.array([], dtype=np.int)
 | 
			
		||||
        r = rankdata(a)
 | 
			
		||||
        assert_array_equal(r, np.array([], dtype=np.float64))
 | 
			
		||||
        r = rankdata([])
 | 
			
		||||
        assert_array_equal(r, np.array([], dtype=np.float64))
 | 
			
		||||
 | 
			
		||||
    def test_one(self):
 | 
			
		||||
        """Check stats.rankdata with an array of length 1."""
 | 
			
		||||
        data = [100]
 | 
			
		||||
        a = np.array(data, dtype=np.int)
 | 
			
		||||
        r = rankdata(a)
 | 
			
		||||
        assert_array_equal(r, np.array([1.0], dtype=np.float64))
 | 
			
		||||
        r = rankdata(data)
 | 
			
		||||
        assert_array_equal(r, np.array([1.0], dtype=np.float64))
 | 
			
		||||
 | 
			
		||||
    def test_basic(self):
 | 
			
		||||
        """Basic tests of stats.rankdata."""
 | 
			
		||||
        data = [100, 10, 50]
 | 
			
		||||
        expected = np.array([3.0, 1.0, 2.0], dtype=np.float64)
 | 
			
		||||
        a = np.array(data, dtype=np.int)
 | 
			
		||||
        r = rankdata(a)
 | 
			
		||||
        assert_array_equal(r, expected)
 | 
			
		||||
        r = rankdata(data)
 | 
			
		||||
        assert_array_equal(r, expected)
 | 
			
		||||
 | 
			
		||||
        data = [40, 10, 30, 10, 50]
 | 
			
		||||
        expected = np.array([4.0, 1.5, 3.0, 1.5, 5.0], dtype=np.float64)
 | 
			
		||||
        a = np.array(data, dtype=np.int)
 | 
			
		||||
        r = rankdata(a)
 | 
			
		||||
        assert_array_equal(r, expected)
 | 
			
		||||
        r = rankdata(data)
 | 
			
		||||
        assert_array_equal(r, expected)
 | 
			
		||||
 | 
			
		||||
        data = [20, 20, 20, 10, 10, 10]
 | 
			
		||||
        expected = np.array([5.0, 5.0, 5.0, 2.0, 2.0, 2.0], dtype=np.float64)
 | 
			
		||||
        a = np.array(data, dtype=np.int)
 | 
			
		||||
        r = rankdata(a)
 | 
			
		||||
        assert_array_equal(r, expected)
 | 
			
		||||
        r = rankdata(data)
 | 
			
		||||
        assert_array_equal(r, expected)
 | 
			
		||||
        # The docstring states explicitly that the argument is flattened.
 | 
			
		||||
        a2d = a.reshape(2, 3)
 | 
			
		||||
        r = rankdata(a2d)
 | 
			
		||||
        assert_array_equal(r, expected)
 | 
			
		||||
 | 
			
		||||
    def test_large_int(self):
 | 
			
		||||
        data = np.array([2**60, 2**60+1], dtype=np.uint64)
 | 
			
		||||
        r = rankdata(data)
 | 
			
		||||
        assert_array_equal(r, [1.0, 2.0])
 | 
			
		||||
 | 
			
		||||
        data = np.array([2**60, 2**60+1], dtype=np.int64)
 | 
			
		||||
        r = rankdata(data)
 | 
			
		||||
        assert_array_equal(r, [1.0, 2.0])
 | 
			
		||||
 | 
			
		||||
        data = np.array([2**60, -2**60+1], dtype=np.int64)
 | 
			
		||||
        r = rankdata(data)
 | 
			
		||||
        assert_array_equal(r, [2.0, 1.0])
 | 
			
		||||
 | 
			
		||||
    def test_big_tie(self):
 | 
			
		||||
        for n in [10000, 100000, 1000000]:
 | 
			
		||||
            data = np.ones(n, dtype=int)
 | 
			
		||||
            r = rankdata(data)
 | 
			
		||||
            expected_rank = 0.5 * (n + 1)
 | 
			
		||||
            assert_array_equal(r, expected_rank * data,
 | 
			
		||||
                               "test failed with n=%d" % n)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
_cases = (
 | 
			
		||||
    # values, method, expected
 | 
			
		||||
    ([], 'average', []),
 | 
			
		||||
    ([], 'min', []),
 | 
			
		||||
    ([], 'max', []),
 | 
			
		||||
    ([], 'dense', []),
 | 
			
		||||
    ([], 'ordinal', []),
 | 
			
		||||
    #
 | 
			
		||||
    ([100], 'average', [1.0]),
 | 
			
		||||
    ([100], 'min', [1.0]),
 | 
			
		||||
    ([100], 'max', [1.0]),
 | 
			
		||||
    ([100], 'dense', [1.0]),
 | 
			
		||||
    ([100], 'ordinal', [1.0]),
 | 
			
		||||
    #
 | 
			
		||||
    ([100, 100, 100], 'average', [2.0, 2.0, 2.0]),
 | 
			
		||||
    ([100, 100, 100], 'min', [1.0, 1.0, 1.0]),
 | 
			
		||||
    ([100, 100, 100], 'max', [3.0, 3.0, 3.0]),
 | 
			
		||||
    ([100, 100, 100], 'dense', [1.0, 1.0, 1.0]),
 | 
			
		||||
    ([100, 100, 100], 'ordinal', [1.0, 2.0, 3.0]),
 | 
			
		||||
    #
 | 
			
		||||
    ([100, 300, 200], 'average', [1.0, 3.0, 2.0]),
 | 
			
		||||
    ([100, 300, 200], 'min', [1.0, 3.0, 2.0]),
 | 
			
		||||
    ([100, 300, 200], 'max', [1.0, 3.0, 2.0]),
 | 
			
		||||
    ([100, 300, 200], 'dense', [1.0, 3.0, 2.0]),
 | 
			
		||||
    ([100, 300, 200], 'ordinal', [1.0, 3.0, 2.0]),
 | 
			
		||||
    #
 | 
			
		||||
    ([100, 200, 300, 200], 'average', [1.0, 2.5, 4.0, 2.5]),
 | 
			
		||||
    ([100, 200, 300, 200], 'min', [1.0, 2.0, 4.0, 2.0]),
 | 
			
		||||
    ([100, 200, 300, 200], 'max', [1.0, 3.0, 4.0, 3.0]),
 | 
			
		||||
    ([100, 200, 300, 200], 'dense', [1.0, 2.0, 3.0, 2.0]),
 | 
			
		||||
    ([100, 200, 300, 200], 'ordinal', [1.0, 2.0, 4.0, 3.0]),
 | 
			
		||||
    #
 | 
			
		||||
    ([100, 200, 300, 200, 100], 'average', [1.5, 3.5, 5.0, 3.5, 1.5]),
 | 
			
		||||
    ([100, 200, 300, 200, 100], 'min', [1.0, 3.0, 5.0, 3.0, 1.0]),
 | 
			
		||||
    ([100, 200, 300, 200, 100], 'max', [2.0, 4.0, 5.0, 4.0, 2.0]),
 | 
			
		||||
    ([100, 200, 300, 200, 100], 'dense', [1.0, 2.0, 3.0, 2.0, 1.0]),
 | 
			
		||||
    ([100, 200, 300, 200, 100], 'ordinal', [1.0, 3.0, 5.0, 4.0, 2.0]),
 | 
			
		||||
    #
 | 
			
		||||
    ([10] * 30, 'ordinal', np.arange(1.0, 31.0)),
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_cases():
 | 
			
		||||
 | 
			
		||||
    def check_case(values, method, expected):
 | 
			
		||||
        r = rankdata(values, method=method)
 | 
			
		||||
        assert_array_equal(r, expected)
 | 
			
		||||
 | 
			
		||||
    for values, method, expected in _cases:
 | 
			
		||||
        yield check_case, values, method, expected
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    run_module_suite()
 | 
			
		||||
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								@ -1,91 +0,0 @@
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
from numpy.testing import assert_allclose, assert_equal, run_module_suite
 | 
			
		||||
 | 
			
		||||
from scipy.stats._tukeylambda_stats import tukeylambda_variance, \
 | 
			
		||||
                                            tukeylambda_kurtosis
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_tukeylambda_stats_known_exact():
 | 
			
		||||
    """Compare results with some known exact formulas."""
 | 
			
		||||
    # Some exact values of the Tukey Lambda variance and kurtosis:
 | 
			
		||||
    # lambda   var      kurtosis
 | 
			
		||||
    #   0     pi**2/3     6/5     (logistic distribution)
 | 
			
		||||
    #  0.5    4 - pi    (5/3 - pi/2)/(pi/4 - 1)**2 - 3
 | 
			
		||||
    #   1      1/3       -6/5     (uniform distribution on (-1,1))
 | 
			
		||||
    #   2      1/12      -6/5     (uniform distribution on (-1/2, 1/2))
 | 
			
		||||
 | 
			
		||||
    # lambda = 0
 | 
			
		||||
    var = tukeylambda_variance(0)
 | 
			
		||||
    assert_allclose(var, np.pi**2 / 3, atol=1e-12)
 | 
			
		||||
    kurt = tukeylambda_kurtosis(0)
 | 
			
		||||
    assert_allclose(kurt, 1.2, atol=1e-10)
 | 
			
		||||
 | 
			
		||||
    # lambda = 0.5
 | 
			
		||||
    var = tukeylambda_variance(0.5)
 | 
			
		||||
    assert_allclose(var, 4 - np.pi, atol=1e-12)
 | 
			
		||||
    kurt = tukeylambda_kurtosis(0.5)
 | 
			
		||||
    desired = (5./3 - np.pi/2) / (np.pi/4 - 1)**2 - 3
 | 
			
		||||
    assert_allclose(kurt, desired, atol=1e-10)
 | 
			
		||||
 | 
			
		||||
    # lambda = 1
 | 
			
		||||
    var = tukeylambda_variance(1)
 | 
			
		||||
    assert_allclose(var, 1.0 / 3, atol=1e-12)
 | 
			
		||||
    kurt = tukeylambda_kurtosis(1)
 | 
			
		||||
    assert_allclose(kurt, -1.2, atol=1e-10)
 | 
			
		||||
 | 
			
		||||
    # lambda = 2
 | 
			
		||||
    var = tukeylambda_variance(2)
 | 
			
		||||
    assert_allclose(var, 1.0 / 12, atol=1e-12)
 | 
			
		||||
    kurt = tukeylambda_kurtosis(2)
 | 
			
		||||
    assert_allclose(kurt, -1.2, atol=1e-10)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_tukeylambda_stats_mpmath():
 | 
			
		||||
    """Compare results with some values that were computed using mpmath."""
 | 
			
		||||
    a10 = dict(atol=1e-10, rtol=0)
 | 
			
		||||
    a12 = dict(atol=1e-12, rtol=0)
 | 
			
		||||
    data = [
 | 
			
		||||
        # lambda        variance              kurtosis
 | 
			
		||||
        [-0.1, 4.78050217874253547, 3.78559520346454510],
 | 
			
		||||
        [-0.0649, 4.16428023599895777, 2.52019675947435718],
 | 
			
		||||
        [-0.05, 3.93672267890775277, 2.13129793057777277],
 | 
			
		||||
        [-0.001, 3.30128380390964882, 1.21452460083542988],
 | 
			
		||||
        [0.001, 3.27850775649572176, 1.18560634779287585],
 | 
			
		||||
        [0.03125, 2.95927803254615800, 0.804487555161819980],
 | 
			
		||||
        [0.05, 2.78281053405464501, 0.611604043886644327],
 | 
			
		||||
        [0.0649, 2.65282386754100551, 0.476834119532774540],
 | 
			
		||||
        [1.2, 0.242153920578588346, -1.23428047169049726],
 | 
			
		||||
        [10.0, 0.00095237579757703597, 2.37810697355144933],
 | 
			
		||||
        [20.0, 0.00012195121951131043, 7.37654321002709531],
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    for lam, var_expected, kurt_expected in data:
 | 
			
		||||
        var = tukeylambda_variance(lam)
 | 
			
		||||
        assert_allclose(var, var_expected, **a12)
 | 
			
		||||
        kurt = tukeylambda_kurtosis(lam)
 | 
			
		||||
        assert_allclose(kurt, kurt_expected, **a10)
 | 
			
		||||
 | 
			
		||||
    # Test with vector arguments (most of the other tests are for single
 | 
			
		||||
    # values).
 | 
			
		||||
    lam, var_expected, kurt_expected = zip(*data)
 | 
			
		||||
    var = tukeylambda_variance(lam)
 | 
			
		||||
    assert_allclose(var, var_expected, **a12)
 | 
			
		||||
    kurt = tukeylambda_kurtosis(lam)
 | 
			
		||||
    assert_allclose(kurt, kurt_expected, **a10)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_tukeylambda_stats_invalid():
 | 
			
		||||
    """Test values of lambda outside the domains of the functions."""
 | 
			
		||||
    lam = [-1.0, -0.5]
 | 
			
		||||
    var = tukeylambda_variance(lam)
 | 
			
		||||
    assert_equal(var, np.array([np.nan, np.inf]))
 | 
			
		||||
 | 
			
		||||
    lam = [-1.0, -0.25]
 | 
			
		||||
    kurt = tukeylambda_kurtosis(lam)
 | 
			
		||||
    assert_equal(kurt, np.array([np.nan, np.inf]))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    run_module_suite()
 | 
			
		||||
@ -1,412 +0,0 @@
 | 
			
		||||
"""
 | 
			
		||||
Commentary
 | 
			
		||||
----------
 | 
			
		||||
 | 
			
		||||
Most of the work is done by the scipy.stats.distributions module.
 | 
			
		||||
 | 
			
		||||
This provides a plethora of continuous distributions to play with.
 | 
			
		||||
 | 
			
		||||
Each distribution has functions to generate random deviates, pdf's,
 | 
			
		||||
cdf's etc. as well as a function to fit the distribution to some given
 | 
			
		||||
data.
 | 
			
		||||
 | 
			
		||||
The fitting uses scipy.optimize.fmin to minimise the log odds of the
 | 
			
		||||
data given the distribution.
 | 
			
		||||
 | 
			
		||||
There are a couple of problems with this approach.  First it is
 | 
			
		||||
sensitive to the initial guess at the parameters.  Second it can be a
 | 
			
		||||
little slow.
 | 
			
		||||
 | 
			
		||||
Two key parameters are the 'loc' and 'scale' parameters.  Data is
 | 
			
		||||
shifted by 'loc' and scaled by scale prior to fitting.  Supplying
 | 
			
		||||
appropriate values for these parameters is important to getting a good
 | 
			
		||||
fit.
 | 
			
		||||
 | 
			
		||||
See the factory() function which picks from a handful of common
 | 
			
		||||
approaches for each distribution.
 | 
			
		||||
 | 
			
		||||
For some distributions (eg normal) it really makes sense just to
 | 
			
		||||
calculate the parameters directly from the data.
 | 
			
		||||
 | 
			
		||||
The code in the __ifmain__ should be a good guide how to use this.
 | 
			
		||||
 | 
			
		||||
Simply:
 | 
			
		||||
      get a QuickFit object
 | 
			
		||||
      add the distributions you want to try to fit
 | 
			
		||||
      call fit() with your data
 | 
			
		||||
      call fit_stats() to generate some stats on the fit.
 | 
			
		||||
      call plot() if you want to see a plot.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Named after Mrs Twolumps, minister's secretary in the silly walks
 | 
			
		||||
sketch, who brings in coffee with a full silly walk.
 | 
			
		||||
 | 
			
		||||
Tenuous link with curve fitting is that you generally see "two lumps"
 | 
			
		||||
one in your data and the other in the curve that is being fitted.
 | 
			
		||||
 | 
			
		||||
Or alternately, if your data is not too silly then you can fit a
 | 
			
		||||
curve to it.
 | 
			
		||||
 | 
			
		||||
License is GNU LGPL v3, see https://launchpad.net/twolumps
 | 
			
		||||
"""
 | 
			
		||||
import inspect
 | 
			
		||||
from itertools import izip
 | 
			
		||||
 | 
			
		||||
import numpy
 | 
			
		||||
from wafo import stats
 | 
			
		||||
from scipy import mean, std
 | 
			
		||||
 | 
			
		||||
def factory(name):
 | 
			
		||||
    """ Factory to return appropriate objects for each distro. """
 | 
			
		||||
    fitters = dict(
 | 
			
		||||
 | 
			
		||||
        beta=ZeroOneScipyDistribution,
 | 
			
		||||
        alpha=ZeroOneScipyDistribution,
 | 
			
		||||
        ncf=ZeroOneScipyDistribution,
 | 
			
		||||
        triang=ZeroOneScipyDistribution,
 | 
			
		||||
        uniform=ZeroOneScipyDistribution,
 | 
			
		||||
        powerlaw=ZeroOneScipyDistribution,
 | 
			
		||||
 | 
			
		||||
        pareto=MinLocScipyDistribution,
 | 
			
		||||
        expon=MinLocScipyDistribution,
 | 
			
		||||
        gamma=MinLocScipyDistribution,
 | 
			
		||||
        lognorm=MinLocScipyDistribution,
 | 
			
		||||
        maxwell=MinLocScipyDistribution,
 | 
			
		||||
        weibull_min=MinLocScipyDistribution,
 | 
			
		||||
 | 
			
		||||
        weibull_max=MaxLocScipyDistribution)
 | 
			
		||||
 | 
			
		||||
    return fitters.get(name, ScipyDistribution)(name)
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
def get_continuous_distros():
 | 
			
		||||
    """ Find all attributes of stats that are continuous distributions. """
 | 
			
		||||
    
 | 
			
		||||
    fitters = []
 | 
			
		||||
    skip = set()
 | 
			
		||||
    for name, item in inspect.getmembers(stats):
 | 
			
		||||
        if name in skip: continue
 | 
			
		||||
        if item is stats.rv_continuous: continue
 | 
			
		||||
        if isinstance(item, stats.rv_continuous):
 | 
			
		||||
            fitters.append([name, factory(name)])
 | 
			
		||||
 | 
			
		||||
    return fitters
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ScipyDistribution(object):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, name):
 | 
			
		||||
 | 
			
		||||
        self.name = name
 | 
			
		||||
        self.distro = self.get_distro()
 | 
			
		||||
        self.fitted = None
 | 
			
		||||
 | 
			
		||||
    def __getattr__(self, attr):
 | 
			
		||||
        """ Try delegating to the distro object """
 | 
			
		||||
        return getattr(self.distro, attr)
 | 
			
		||||
 | 
			
		||||
    def get_distro(self):
 | 
			
		||||
 | 
			
		||||
        return getattr(stats, self.name)
 | 
			
		||||
    
 | 
			
		||||
    def set_distro(self, parms):
 | 
			
		||||
        
 | 
			
		||||
        self.distro = getattr(stats, self.name)(*parms)
 | 
			
		||||
 | 
			
		||||
        return self.distro
 | 
			
		||||
 | 
			
		||||
    def calculate_loc_and_scale(self, data):
 | 
			
		||||
        """ Calculate loc and scale parameters for fit.
 | 
			
		||||
 | 
			
		||||
        Depending on the distribution, these need to be approximately
 | 
			
		||||
        right to get a good fit.
 | 
			
		||||
        """
 | 
			
		||||
        return mean(data), std(data)
 | 
			
		||||
        
 | 
			
		||||
    def fit(self, data, *args, **kwargs):
 | 
			
		||||
        """ This needs some work.
 | 
			
		||||
 | 
			
		||||
        Seems the various scipy distributions do a reasonable job if given a good hint.
 | 
			
		||||
 | 
			
		||||
        Need to get distro specific hints.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        fits = []
 | 
			
		||||
 | 
			
		||||
        # try with and without providing loc and scale hints
 | 
			
		||||
        # increases chance of a fit without an exception being
 | 
			
		||||
        # generated.
 | 
			
		||||
        for (loc, scale) in ((0.0, 1.0),
 | 
			
		||||
                             self.calculate_loc_and_scale(data)):
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
                parms = self.get_distro().fit(data, loc=loc, scale=scale)
 | 
			
		||||
                    
 | 
			
		||||
                self.set_distro(list(parms))
 | 
			
		||||
                expected = self.expected(data)
 | 
			
		||||
                rss = ((expected-data)**2).sum()
 | 
			
		||||
                fits.append([rss, list(parms)])
 | 
			
		||||
                
 | 
			
		||||
                parms = self.get_distro().fit(data, floc=loc, scale=scale)
 | 
			
		||||
                    
 | 
			
		||||
                self.set_distro(list(parms))
 | 
			
		||||
                expected = self.expected(data)
 | 
			
		||||
                rss = ((expected-data)**2).sum()
 | 
			
		||||
                fits.append([rss, list(parms)])
 | 
			
		||||
            except:
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
        # no fits means all tries raised exceptions
 | 
			
		||||
        if not fits:
 | 
			
		||||
            raise Exception("Exception in fit()")
 | 
			
		||||
 | 
			
		||||
        # pick the one with the smallest rss
 | 
			
		||||
        fits.sort()
 | 
			
		||||
        self.parms = fits[0][1]
 | 
			
		||||
        print self.parms
 | 
			
		||||
    
 | 
			
		||||
        return self.set_distro(list(self.parms))
 | 
			
		||||
 | 
			
		||||
    def expected(self, data):
 | 
			
		||||
        """ Calculate expected values at each data point """
 | 
			
		||||
        if self.fitted is not None:
 | 
			
		||||
            return self.fitted
 | 
			
		||||
 | 
			
		||||
        n = len(data)
 | 
			
		||||
        xx = numpy.linspace(0, 1, n + 2)[1:-1]
 | 
			
		||||
        self.fitted = self.ppf(xx)
 | 
			
		||||
        #self.fitted = [self.ppf(x) for x in xx]
 | 
			
		||||
 | 
			
		||||
        return self.fitted
 | 
			
		||||
    
 | 
			
		||||
    def fit_stats(self, data):
 | 
			
		||||
        """ Return stats on the fits
 | 
			
		||||
 | 
			
		||||
        data assumed to be sorted.
 | 
			
		||||
        """
 | 
			
		||||
        n = len(data)
 | 
			
		||||
 | 
			
		||||
        dvar = numpy.var(data)
 | 
			
		||||
        expected = self.expected(data)
 | 
			
		||||
        evar = numpy.var(expected)
 | 
			
		||||
 | 
			
		||||
        rss = 0.0
 | 
			
		||||
        for expect, obs in izip(expected, data):
 | 
			
		||||
            rss += (obs-expect) ** 2.0
 | 
			
		||||
 | 
			
		||||
        self.rss = rss
 | 
			
		||||
        self.dss = dvar * n
 | 
			
		||||
        self.fss = evar * n
 | 
			
		||||
        
 | 
			
		||||
    def residuals(self, data):
 | 
			
		||||
        """ Return residuals """
 | 
			
		||||
        expected = self.expected(data)
 | 
			
		||||
 | 
			
		||||
        return numpy.array(data) - numpy.array(expected)
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MinLocScipyDistribution(ScipyDistribution):
 | 
			
		||||
 | 
			
		||||
    def calculate_loc_and_scale(self, data):
 | 
			
		||||
        """ Set loc to min value in the data.
 | 
			
		||||
 | 
			
		||||
        Useful for weibull_min
 | 
			
		||||
        """
 | 
			
		||||
        return min(data), std(data)
 | 
			
		||||
 | 
			
		||||
class MaxLocScipyDistribution(ScipyDistribution):
 | 
			
		||||
 | 
			
		||||
    def calculate_loc_and_scale(self, data):
 | 
			
		||||
        """ Set loc to max value in the data.
 | 
			
		||||
 | 
			
		||||
        Useful for weibull_max
 | 
			
		||||
        """
 | 
			
		||||
        return max(data), std(data)
 | 
			
		||||
 | 
			
		||||
class ZeroOneScipyDistribution(ScipyDistribution):
 | 
			
		||||
 | 
			
		||||
    def calculate_loc_and_scale(self, data):
 | 
			
		||||
        """ Set loc and scale to move to [0, 1] interval.
 | 
			
		||||
 | 
			
		||||
        Useful for beta distribution
 | 
			
		||||
        """
 | 
			
		||||
        return min(data), max(data)-min(data)
 | 
			
		||||
 | 
			
		||||
class QuickFit(object):
 | 
			
		||||
    """ Fit a family of distributions.
 | 
			
		||||
 | 
			
		||||
    Calculates stats on each fit.
 | 
			
		||||
 | 
			
		||||
    Option to create plots.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
 | 
			
		||||
        self.distributions = []
 | 
			
		||||
 | 
			
		||||
    def add_distribution(self, distribution):
 | 
			
		||||
        """ Add a ready-prepared ScipyDistribution """
 | 
			
		||||
        self.distributions.append(distribution)
 | 
			
		||||
 | 
			
		||||
    def add(self, name):
 | 
			
		||||
        """ Add a distribution by name. """
 | 
			
		||||
 | 
			
		||||
        self.distributions.append(factory(name))
 | 
			
		||||
 | 
			
		||||
    def fit(self, data):
 | 
			
		||||
        """ Fit all of the distros we have """
 | 
			
		||||
        fitted = []
 | 
			
		||||
        for distro in self.distributions:
 | 
			
		||||
            print 'fitting distro', distro.name
 | 
			
		||||
            try:
 | 
			
		||||
                distro.fit(data)
 | 
			
		||||
            except:
 | 
			
		||||
                continue
 | 
			
		||||
            fitted.append(distro)
 | 
			
		||||
        self.distributions = fitted
 | 
			
		||||
            
 | 
			
		||||
        print 'finished fitting'
 | 
			
		||||
 | 
			
		||||
    def stats(self, data):
 | 
			
		||||
        """ Return stats on the fits """
 | 
			
		||||
        for dd in self.distributions:
 | 
			
		||||
            dd.fit_stats(data)
 | 
			
		||||
 | 
			
		||||
    def get_topn(self, n):
 | 
			
		||||
        """ Return top-n best fits. """
 | 
			
		||||
        data = [[x.rss, x] for x in self.distributions if numpy.isfinite(x.rss)]
 | 
			
		||||
        data.sort()
 | 
			
		||||
 | 
			
		||||
        if not n:
 | 
			
		||||
            n = len(data)
 | 
			
		||||
 | 
			
		||||
        return [x[1] for x in data[:n]]
 | 
			
		||||
 | 
			
		||||
    def fit_plot(self, data, topn=0, bins=20):
 | 
			
		||||
        """ Create a plot. """
 | 
			
		||||
        from matplotlib import pylab as pl
 | 
			
		||||
 | 
			
		||||
        distros = self.get_topn(topn)
 | 
			
		||||
 | 
			
		||||
        xx = numpy.linspace(data.min(), data.max(), 300)
 | 
			
		||||
 | 
			
		||||
        table = []
 | 
			
		||||
        nparms = max(len(x.parms) for x in distros)
 | 
			
		||||
        tcolours = []
 | 
			
		||||
        for dd in distros:
 | 
			
		||||
            patch = pl.plot(xx, [dd.pdf(p) for p in xx], label='%10.2f%% %s' % (100.0*dd.rss/dd.dss, dd.name))
 | 
			
		||||
            row = ['', dd.name, '%10.2f%%' % (100.0*dd.rss/dd.dss,)] + ['%0.2f' % x for x in dd.parms]
 | 
			
		||||
            while len(row) < 3 + nparms:
 | 
			
		||||
                row.append('')
 | 
			
		||||
            table.append(row)
 | 
			
		||||
            tcolours.append([patch[0].get_markerfacecolor()] + ['w'] * (2+nparms))
 | 
			
		||||
 | 
			
		||||
        # add a historgram with the data
 | 
			
		||||
        pl.hist(data, bins=bins, normed=True)
 | 
			
		||||
        tab = pl.table(cellText=table, cellColours=tcolours,
 | 
			
		||||
                       colLabels=['', 'Distribution', 'Res. SS/Data SS'] + ['P%d' % (x + 1,) for x in range(nparms)],
 | 
			
		||||
                       bbox=(0.0, 1.0, 1.0, 0.3))
 | 
			
		||||
                 #loc='top'))
 | 
			
		||||
        #pl.legend(loc=0)
 | 
			
		||||
        tab.auto_set_font_size(False)
 | 
			
		||||
        tab.set_fontsize(10.)
 | 
			
		||||
 | 
			
		||||
    def residual_plot(self, data, topn=0):
 | 
			
		||||
        """ Create a residual plot. """
 | 
			
		||||
        from matplotlib import pylab as pl
 | 
			
		||||
 | 
			
		||||
        distros = self.get_topn(topn)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        n = len(data)
 | 
			
		||||
        xx = numpy.linspace(0, 1, n + 2)[1:-1]
 | 
			
		||||
        for dd in distros:
 | 
			
		||||
 | 
			
		||||
            pl.plot(xx, dd.residuals(data), label='%10.2f%% %s' % (100.0*dd.rss/dd.dss, dd.name))
 | 
			
		||||
        pl.grid(True)
 | 
			
		||||
 | 
			
		||||
    def plot(self, data, topn):
 | 
			
		||||
        """ Plot data fit and residuals """
 | 
			
		||||
        from matplotlib import pylab as pl
 | 
			
		||||
        pl.axes([0.1, 0.4, 0.8, 0.4])   # leave room above the axes for the table
 | 
			
		||||
        self.fit_plot(data, topn=topn)
 | 
			
		||||
 | 
			
		||||
        pl.axes([0.1, 0.05, 0.8, 0.3]) 
 | 
			
		||||
        self.residual_plot(data, topn=topn)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def read_data(infile, field):
 | 
			
		||||
    """ Simple utility to extract a field out of a csv file. """
 | 
			
		||||
    import csv
 | 
			
		||||
 | 
			
		||||
    reader = csv.reader(infile)
 | 
			
		||||
    header = reader.next()
 | 
			
		||||
    field = header.index(field)
 | 
			
		||||
    data = []
 | 
			
		||||
    for row in reader:
 | 
			
		||||
        data.append(float(row[field]))
 | 
			
		||||
 | 
			
		||||
    return data
 | 
			
		||||
        
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
 | 
			
		||||
    import sys
 | 
			
		||||
    import optparse
 | 
			
		||||
 | 
			
		||||
    from matplotlib import pylab as pl
 | 
			
		||||
    
 | 
			
		||||
    parser = optparse.OptionParser()
 | 
			
		||||
    parser.add_option('-d', '--distro', action='append', default=[])
 | 
			
		||||
    parser.add_option('-l', '--list', action='store_true',
 | 
			
		||||
                      help='List available distros')
 | 
			
		||||
 | 
			
		||||
    parser.add_option('-i', '--infile')
 | 
			
		||||
    parser.add_option('-f', '--field', default='P/L')
 | 
			
		||||
 | 
			
		||||
    parser.add_option('-n', '--topn', type='int', default=0)
 | 
			
		||||
 | 
			
		||||
    parser.add_option('-s', '--sample', default='normal',
 | 
			
		||||
                      help='generate a sample from this distro as a test')
 | 
			
		||||
    parser.add_option('--size', type='int', default=1000,
 | 
			
		||||
                      help='Size of sample to generate')
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    opts, args = parser.parse_args()
 | 
			
		||||
    
 | 
			
		||||
    if opts.list:
 | 
			
		||||
        for name, distro in get_continuous_distros():
 | 
			
		||||
            print name
 | 
			
		||||
        sys.exit()
 | 
			
		||||
    opts.distro = ['weibull_min', 'norm']
 | 
			
		||||
    if not opts.distro:
 | 
			
		||||
        opts.distro = [x[0] for x in get_continuous_distros()]
 | 
			
		||||
 | 
			
		||||
    quickfit = QuickFit()
 | 
			
		||||
    for distro in opts.distro:
 | 
			
		||||
        quickfit.add(distro)
 | 
			
		||||
 | 
			
		||||
    if opts.sample:
 | 
			
		||||
        data = getattr(numpy.random, opts.sample)(size=opts.size)
 | 
			
		||||
    else:
 | 
			
		||||
        data = numpy.array(read_data(open(opts.infile), opts.field))
 | 
			
		||||
        
 | 
			
		||||
    data.sort()
 | 
			
		||||
 | 
			
		||||
    quickfit.fit(data)
 | 
			
		||||
    print 'doing stats'
 | 
			
		||||
    quickfit.stats(data)
 | 
			
		||||
 | 
			
		||||
    print 'doing plot'
 | 
			
		||||
    quickfit.plot(data, topn=opts.topn)
 | 
			
		||||
    pl.show()
 | 
			
		||||
    
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
@ -1,47 +0,0 @@
 | 
			
		||||
from __future__ import division, print_function, absolute_import
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
import scipy.stats
 | 
			
		||||
from scipy.special import i0
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def von_mises_cdf_series(k,x,p):
 | 
			
		||||
    x = float(x)
 | 
			
		||||
    s = np.sin(x)
 | 
			
		||||
    c = np.cos(x)
 | 
			
		||||
    sn = np.sin(p*x)
 | 
			
		||||
    cn = np.cos(p*x)
 | 
			
		||||
    R = 0
 | 
			
		||||
    V = 0
 | 
			
		||||
    for n in range(p-1,0,-1):
 | 
			
		||||
        sn, cn = sn*c - cn*s, cn*c + sn*s
 | 
			
		||||
        R = 1./(2*n/k + R)
 | 
			
		||||
        V = R*(sn/n+V)
 | 
			
		||||
 | 
			
		||||
    return 0.5+x/(2*np.pi) + V/np.pi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def von_mises_cdf_normalapprox(k,x,C1):
 | 
			
		||||
    b = np.sqrt(2/np.pi)*np.exp(k)/i0(k)
 | 
			
		||||
    z = b*np.sin(x/2.)
 | 
			
		||||
    return scipy.stats.norm.cdf(z)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def von_mises_cdf(k,x):
 | 
			
		||||
    ix = 2*np.pi*np.round(x/(2*np.pi))
 | 
			
		||||
    x = x-ix
 | 
			
		||||
    k = float(k)
 | 
			
		||||
 | 
			
		||||
    # These values should give 12 decimal digits
 | 
			
		||||
    CK = 50
 | 
			
		||||
    a = [28., 0.5, 100., 5.0]
 | 
			
		||||
    C1 = 50.1
 | 
			
		||||
 | 
			
		||||
    if k < CK:
 | 
			
		||||
        p = int(np.ceil(a[0]+a[1]*k-a[2]/(k+a[3])))
 | 
			
		||||
 | 
			
		||||
        F = np.clip(von_mises_cdf_series(k,x,p),0,1)
 | 
			
		||||
    else:
 | 
			
		||||
        F = von_mises_cdf_normalapprox(k,x,C1)
 | 
			
		||||
 | 
			
		||||
    return F+ix
 | 
			
		||||
@ -1,76 +0,0 @@
 | 
			
		||||
import numpy as np
 | 
			
		||||
import scipy.stats
 | 
			
		||||
from scipy.special import i0
 | 
			
		||||
import numpy.testing
 | 
			
		||||
cimport numpy as np
 | 
			
		||||
 | 
			
		||||
cdef extern from "math.h":
 | 
			
		||||
    double cos(double theta)
 | 
			
		||||
    double sin(double theta)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cdef double von_mises_cdf_series(double k,double x,unsigned int p):
 | 
			
		||||
    cdef double s, c, sn, cn, R, V
 | 
			
		||||
    cdef unsigned int n
 | 
			
		||||
    s = sin(x)
 | 
			
		||||
    c = cos(x)
 | 
			
		||||
    sn = sin(p*x)
 | 
			
		||||
    cn = cos(p*x)
 | 
			
		||||
    R = 0
 | 
			
		||||
    V = 0
 | 
			
		||||
    for n in range(p-1,0,-1):
 | 
			
		||||
        sn, cn = sn*c - cn*s, cn*c + sn*s
 | 
			
		||||
        R = 1./(2*n/k + R)
 | 
			
		||||
        V = R*(sn/n+V)
 | 
			
		||||
 | 
			
		||||
    return 0.5+x/(2*np.pi) + V/np.pi
 | 
			
		||||
 | 
			
		||||
def von_mises_cdf_normalapprox(k,x,C1):
 | 
			
		||||
    b = np.sqrt(2/np.pi)*np.exp(k)/i0(k)
 | 
			
		||||
    z = b*np.sin(x/2.)
 | 
			
		||||
    C = 24*k
 | 
			
		||||
    chi = z - z**3/((C-2*z**2-16)/3.-(z**4+7/4.*z**2+167./2)/(C+C1-z**2+3))**2
 | 
			
		||||
    return scipy.stats.norm.cdf(z)
 | 
			
		||||
 | 
			
		||||
cimport cython
 | 
			
		||||
@cython.boundscheck(False)
 | 
			
		||||
def von_mises_cdf(k,x):
 | 
			
		||||
    cdef np.ndarray[double, ndim=1] temp, temp_xs, temp_ks
 | 
			
		||||
    cdef unsigned int i, p
 | 
			
		||||
    cdef double a1, a2, a3, a4, C1, CK
 | 
			
		||||
    #k,x = np.broadcast_arrays(np.asarray(k),np.asarray(x))
 | 
			
		||||
    k = np.asarray(k)
 | 
			
		||||
    x = np.asarray(x)
 | 
			
		||||
    zerodim = k.ndim==0 and x.ndim==0
 | 
			
		||||
 | 
			
		||||
    k = np.atleast_1d(k)
 | 
			
		||||
    x = np.atleast_1d(x)
 | 
			
		||||
    ix = np.round(x/(2*np.pi))
 | 
			
		||||
    x = x-ix*2*np.pi
 | 
			
		||||
 | 
			
		||||
    # These values should give 12 decimal digits
 | 
			
		||||
    CK=50
 | 
			
		||||
    a1, a2, a3, a4 = [28., 0.5, 100., 5.0]
 | 
			
		||||
    C1 = 50.1
 | 
			
		||||
 | 
			
		||||
    bx, bk = np.broadcast_arrays(x,k)
 | 
			
		||||
    result = np.empty(bx.shape,dtype=np.float)
 | 
			
		||||
     
 | 
			
		||||
    c_small_k = bk<CK
 | 
			
		||||
    temp = result[c_small_k]
 | 
			
		||||
    temp_xs = bx[c_small_k].astype(np.float)
 | 
			
		||||
    temp_ks = bk[c_small_k].astype(np.float)
 | 
			
		||||
    for i in range(len(temp)):
 | 
			
		||||
        p = <int>(1+a1+a2*temp_ks[i]-a3/(temp_ks[i]+a4))
 | 
			
		||||
        temp[i] = von_mises_cdf_series(temp_ks[i],temp_xs[i],p)
 | 
			
		||||
        if temp[i]<0:
 | 
			
		||||
            temp[i]=0
 | 
			
		||||
        elif temp[i]>1:
 | 
			
		||||
            temp[i]=1
 | 
			
		||||
    result[c_small_k] = temp
 | 
			
		||||
    result[~c_small_k] = von_mises_cdf_normalapprox(bk[~c_small_k],bx[~c_small_k],C1)
 | 
			
		||||
 | 
			
		||||
    if not zerodim:
 | 
			
		||||
        return result+ix
 | 
			
		||||
    else:
 | 
			
		||||
        return (result+ix)[0]
 | 
			
		||||
					Loading…
					
					
				
		Reference in New Issue