Simplified wafo.stats:

-Deleted obsolete files. -Requires scipy v0.16 -._distn_infrastructure.py monkeypatch scipy.stats._distn_infrastructure.py
10 years ago · e73624161c
parent 7403d821df
commit e73624161c
40 changed files with 2822 additions and 23982 deletions
--- a/wafo/stats/init.py
+++ b/wafo/stats/init.py
@ -8,50 +8,14 @@ Statistical functions (:mod:`scipy.stats`)
 This module contains a large number of probability distributions as
 well as a growing library of statistical functions.
-Each included distribution is an instance of the class rv_continuous:
+Each univariate distribution is an instance of a subclass of `rv_continuous`
-For each given name the following methods are available:
+(`rv_discrete` for discrete distributions):
 .. autosummary::
   :toctree: generated/
   rv_continuous
   rv_continuous.pdf
   rv_continuous.logpdf
   rv_continuous.cdf
   rv_continuous.logcdf
   rv_continuous.sf
   rv_continuous.logsf
   rv_continuous.ppf
   rv_continuous.isf
   rv_continuous.moment
   rv_continuous.stats
   rv_continuous.entropy
   rv_continuous.fit
   rv_continuous.expect
 Calling the instance as a function returns a frozen pdf whose shape,
 location, and scale parameters are fixed.
 Similarly, each discrete distribution is an instance of the class
 rv_discrete:
 .. autosummary::
   :toctree: generated/
   rv_discrete
   rv_discrete.rvs
   rv_discrete.pmf
   rv_discrete.logpmf
   rv_discrete.cdf
   rv_discrete.logcdf
   rv_discrete.sf
   rv_discrete.logsf
   rv_discrete.ppf
   rv_discrete.isf
   rv_discrete.stats
   rv_discrete.moment
   rv_discrete.entropy
   rv_discrete.expect
 Continuous distributions
 ========================
@ -65,7 +29,8 @@ Continuous distributions
   beta              -- Beta
   betaprime         -- Beta Prime
   bradford          -- Bradford
-   burr              -- Burr
+   burr              -- Burr (Type III)
   burr12            -- Burr (Type XII)
   cauchy            -- Cauchy
   chi               -- Chi
   chi2              -- Chi-squared
@ -74,6 +39,7 @@ Continuous distributions
   dweibull          -- Double Weibull
   erlang            -- Erlang
   expon             -- Exponential
   exponnorm         -- Exponentially Modified Normal
   exponweib         -- Exponentiated Weibull
   exponpow          -- Exponential Power
   f                 -- F (Snecdor F)
@ -84,6 +50,7 @@ Continuous distributions
   frechet_r         -- Frechet Right Sided, Extreme Value Type II (Extreme LB) or weibull_min
   frechet_l         -- Frechet Left Sided, Weibull_max
   genlogistic       -- Generalized Logistic
   gennorm           -- Generalized normal
   genpareto         -- Generalized Pareto
   genexpon          -- Generalized Exponential
   genextreme        -- Generalized Extreme Value
@ -98,6 +65,7 @@ Continuous distributions
   halfcauchy        -- Half Cauchy
   halflogistic      -- Half Logistic
   halfnorm          -- Half Normal
   halfgennorm       -- Generalized Half Normal
   hypsecant         -- Hyperbolic Secant
   invgamma          -- Inverse Gamma
   invgauss          -- Inverse Gaussian
@ -107,6 +75,9 @@ Continuous distributions
   ksone             -- Kolmogorov-Smirnov one-sided (no stats)
   kstwobign         -- Kolmogorov-Smirnov two-sided test for Large N (no stats)
   laplace           -- Laplace
   levy              -- Levy
   levy_l
   levy_stable
   logistic          -- Logistic
   loggamma          -- Log-Gamma
   loglaplace        -- Log-Laplace (Log Double Exponential)
@ -130,6 +101,7 @@ Continuous distributions
   rice              -- Rice
   recipinvgauss     -- Reciprocal Inverse Gaussian
   semicircular      -- Semicircular
   skewnorm          -- Skew normal
   t                 -- Student's T
   triang            -- Triangular
   truncexpon        -- Truncated Exponential
@ -137,6 +109,7 @@ Continuous distributions
   tukeylambda       -- Tukey-Lambda
   uniform           -- Uniform
   vonmises          -- Von-Mises (Circular)
   vonmises_line     -- Von-Mises (Line)
   wald              -- Wald
   weibull_min       -- Minimum Weibull (see Frechet)
   weibull_max       -- Maximum Weibull (see Frechet)
@ -149,7 +122,12 @@ Multivariate distributions
   :toctree: generated/
   multivariate_normal   -- Multivariate normal distribution
   matrix_normal         -- Matrix normal distribution
   dirichlet             -- Dirichlet
   wishart               -- Wishart
   invwishart            -- Inverse Wishart
   special_ortho_group   -- SO(N) group
   ortho_group           -- O(N) group
 Discrete distributions
 ======================
@ -190,27 +168,28 @@ which work for masked arrays.
   normaltest        --
   skew              -- Skewness
   skewtest          --
   kstat             --
   kstatvar          --
   tmean             -- Truncated arithmetic mean
   tvar              -- Truncated variance
   tmin              --
   tmax              --
   tstd              --
   tsem              --
   nanmean           -- Mean, ignoring NaN values
   nanstd            -- Standard deviation, ignoring NaN values
   nanmedian         -- Median, ignoring NaN values
   variation         -- Coefficient of variation
   find_repeats
   trim_mean
 .. autosummary::
   :toctree: generated/
-   cumfreq           _
+   cumfreq
-   histogram2        _
+   histogram2
-   histogram         _
+   histogram
-   itemfreq          _
+   itemfreq
-   percentileofscore _
+   percentileofscore
-   scoreatpercentile _
+   scoreatpercentile
-   relfreq           _
+   relfreq
 .. autosummary::
   :toctree: generated/
@ -225,6 +204,7 @@ which work for masked arrays.
   obrientransform
   signaltonoise
   bayes_mvs
   mvsdist
   sem
   zmap
   zscore
@ -247,12 +227,14 @@ which work for masked arrays.
   kendalltau
   linregress
   theilslopes
   f_value
 .. autosummary::
   :toctree: generated/
   ttest_1samp
   ttest_ind
   ttest_ind_from_stats
   ttest_rel
   kstest
   chisquare
@ -265,6 +247,10 @@ which work for masked arrays.
   wilcoxon
   kruskal
   friedmanchisquare
   combine_pvalues
   ss
   square_of_sums
   jarque_bera
 .. autosummary::
   :toctree: generated/
@ -289,6 +275,22 @@ which work for masked arrays.
   entropy
 .. autosummary::
   :toctree: generated/
   chisqprob
   betai
 Circular statistical functions
 ==============================
 .. autosummary::
   :toctree: generated/
   circmean
   circvar
   circstd
 Contingency table functions
 ===========================
@ -335,21 +337,11 @@ interface package rpy.
 from __future__ import division, print_function, absolute_import
 from scipy.stats import *
 from .core import *
 from .stats import *
 from .distributions import *
 from .rv import *
 from .morestats import *
 from ._binned_statistic import *
 from .kde import gaussian_kde
 from . import mstats
 from .contingency import chi2_contingency
 from ._multivariate import *
 from . import estimation
 #remove vonmises_cython from __all__, I don't know why it is included
 __all__ = [s for s in dir() if not (s.startswith('_') or s.endswith('cython'))]
 #import distributions #@Reimport
 #from wafo.stats.distributions import *
 from numpy.testing import Tester
 test = Tester().test
--- a/wafo/stats/_binned_statistic.py
+++ b/wafo/stats/_binned_statistic.py
@ -1,408 +0,0 @@
 from __future__ import division, print_function, absolute_import
 import warnings
 import numpy as np
 from scipy._lib.six import callable
 def binned_statistic(x, values, statistic='mean',
                     bins=10, range=None):
    """
    Compute a binned statistic for a set of data.
    This is a generalization of a histogram function.  A histogram divides
    the space into bins, and returns the count of the number of points in
    each bin.  This function allows the computation of the sum, mean, median,
    or other statistic of the values within each bin.
    Parameters
    ----------
    x : array_like
        A sequence of values to be binned.
    values : array_like
        The values on which the statistic will be computed.  This must be
        the same shape as `x`.
    statistic : string or callable, optional
        The statistic to compute (default is 'mean').
        The following statistics are available:
          * 'mean' : compute the mean of values for points within each bin.
            Empty bins will be represented by NaN.
          * 'median' : compute the median of values for points within each
            bin. Empty bins will be represented by NaN.
          * 'count' : compute the count of points within each bin.  This is
            identical to an unweighted histogram.  `values` array is not
            referenced.
          * 'sum' : compute the sum of values for points within each bin.
            This is identical to a weighted histogram.
          * function : a user-defined function which takes a 1D array of
            values, and outputs a single numerical statistic. This function
            will be called on the values in each bin.  Empty bins will be
            represented by function([]), or NaN if this returns an error.
    bins : int or sequence of scalars, optional
        If `bins` is an int, it defines the number of equal-width
        bins in the given range (10, by default). If `bins` is a sequence,
        it defines the bin edges, including the rightmost edge, allowing
        for non-uniform bin widths.
    range : (float, float) or [(float, float)], optional
        The lower and upper range of the bins.  If not provided, range
        is simply ``(x.min(), x.max())``.  Values outside the range are
        ignored.
    Returns
    -------
    statistic : array
        The values of the selected statistic in each bin.
    bin_edges : array of dtype float
        Return the bin edges ``(length(statistic)+1)``.
    binnumber : 1-D ndarray of ints
        This assigns to each observation an integer that represents the bin
        in which this observation falls. Array has the same length as values.
    See Also
    --------
    numpy.histogram, binned_statistic_2d, binned_statistic_dd
    Notes
    -----
    All but the last (righthand-most) bin is half-open.  In other words, if
    `bins` is::
      [1, 2, 3, 4]
    then the first bin is ``[1, 2)`` (including 1, but excluding 2) and the
    second ``[2, 3)``.  The last bin, however, is ``[3, 4]``, which *includes*
    4.
    .. versionadded:: 0.11.0
    Examples
    --------
    >>> stats.binned_statistic([1, 2, 1, 2, 4], np.arange(5), statistic='mean',
    ... bins=3)
    (array([ 1.,  2.,  4.]), array([ 1.,  2.,  3.,  4.]), array([1, 2, 1, 2, 3]))
    >>> stats.binned_statistic([1, 2, 1, 2, 4], np.arange(5), statistic='mean', bins=3)
    (array([ 1.,  2.,  4.]), array([ 1.,  2.,  3.,  4.]), array([1, 2, 1, 2, 3]))
    """
    try:
        N = len(bins)
    except TypeError:
        N = 1
    if N != 1:
        bins = [np.asarray(bins, float)]
    if range is not None:
        if len(range) == 2:
            range = [range]
    medians, edges, xy = binned_statistic_dd([x], values, statistic,
                                             bins, range)
    return medians, edges[0], xy
 def binned_statistic_2d(x, y, values, statistic='mean',
                        bins=10, range=None):
    """
    Compute a bidimensional binned statistic for a set of data.
    This is a generalization of a histogram2d function.  A histogram divides
    the space into bins, and returns the count of the number of points in
    each bin.  This function allows the computation of the sum, mean, median,
    or other statistic of the values within each bin.
    Parameters
    ----------
    x : (N,) array_like
        A sequence of values to be binned along the first dimension.
    y : (M,) array_like
        A sequence of values to be binned along the second dimension.
    values : (N,) array_like
        The values on which the statistic will be computed.  This must be
        the same shape as `x`.
    statistic : string or callable, optional
        The statistic to compute (default is 'mean').
        The following statistics are available:
          * 'mean' : compute the mean of values for points within each bin.
            Empty bins will be represented by NaN.
          * 'median' : compute the median of values for points within each
            bin. Empty bins will be represented by NaN.
          * 'count' : compute the count of points within each bin.  This is
            identical to an unweighted histogram.  `values` array is not
            referenced.
          * 'sum' : compute the sum of values for points within each bin.
            This is identical to a weighted histogram.
          * function : a user-defined function which takes a 1D array of
            values, and outputs a single numerical statistic. This function
            will be called on the values in each bin.  Empty bins will be
            represented by function([]), or NaN if this returns an error.
    bins : int or [int, int] or array-like or [array, array], optional
        The bin specification:
          * the number of bins for the two dimensions (nx=ny=bins),
          * the number of bins in each dimension (nx, ny = bins),
          * the bin edges for the two dimensions (x_edges = y_edges = bins),
          * the bin edges in each dimension (x_edges, y_edges = bins).
    range : (2,2) array_like, optional
        The leftmost and rightmost edges of the bins along each dimension
        (if not specified explicitly in the `bins` parameters):
        [[xmin, xmax], [ymin, ymax]]. All values outside of this range will be
        considered outliers and not tallied in the histogram.
    Returns
    -------
    statistic : (nx, ny) ndarray
        The values of the selected statistic in each two-dimensional bin
    xedges : (nx + 1) ndarray
        The bin edges along the first dimension.
    yedges : (ny + 1) ndarray
        The bin edges along the second dimension.
    binnumber : 1-D ndarray of ints
        This assigns to each observation an integer that represents the bin
        in which this observation falls. Array has the same length as `values`.
    See Also
    --------
    numpy.histogram2d, binned_statistic, binned_statistic_dd
    Notes
    -----
    .. versionadded:: 0.11.0
    """
    # This code is based on np.histogram2d
    try:
        N = len(bins)
    except TypeError:
        N = 1
    if N != 1 and N != 2:
        xedges = yedges = np.asarray(bins, float)
        bins = [xedges, yedges]
    medians, edges, xy = binned_statistic_dd([x, y], values, statistic,
                                             bins, range)
    return medians, edges[0], edges[1], xy
 def binned_statistic_dd(sample, values, statistic='mean',
                        bins=10, range=None):
    """
    Compute a multidimensional binned statistic for a set of data.
    This is a generalization of a histogramdd function.  A histogram divides
    the space into bins, and returns the count of the number of points in
    each bin.  This function allows the computation of the sum, mean, median,
    or other statistic of the values within each bin.
    Parameters
    ----------
    sample : array_like
        Data to histogram passed as a sequence of D arrays of length N, or
        as an (N,D) array.
    values : array_like
        The values on which the statistic will be computed.  This must be
        the same shape as x.
    statistic : string or callable, optional
        The statistic to compute (default is 'mean').
        The following statistics are available:
          * 'mean' : compute the mean of values for points within each bin.
            Empty bins will be represented by NaN.
          * 'median' : compute the median of values for points within each
            bin. Empty bins will be represented by NaN.
          * 'count' : compute the count of points within each bin.  This is
            identical to an unweighted histogram.  `values` array is not
            referenced.
          * 'sum' : compute the sum of values for points within each bin.
            This is identical to a weighted histogram.
          * function : a user-defined function which takes a 1D array of
            values, and outputs a single numerical statistic. This function
            will be called on the values in each bin.  Empty bins will be
            represented by function([]), or NaN if this returns an error.
    bins : sequence or int, optional
        The bin specification:
          * A sequence of arrays describing the bin edges along each dimension.
          * The number of bins for each dimension (nx, ny, ... =bins)
          * The number of bins for all dimensions (nx=ny=...=bins).
    range : sequence, optional
        A sequence of lower and upper bin edges to be used if the edges are
        not given explicitely in `bins`. Defaults to the minimum and maximum
        values along each dimension.
    Returns
    -------
    statistic : ndarray, shape(nx1, nx2, nx3,...)
        The values of the selected statistic in each two-dimensional bin
    edges : list of ndarrays
        A list of D arrays describing the (nxi + 1) bin edges for each
        dimension
    binnumber : 1-D ndarray of ints
        This assigns to each observation an integer that represents the bin
        in which this observation falls. Array has the same length as values.
    See Also
    --------
    np.histogramdd, binned_statistic, binned_statistic_2d
    Notes
    -----
    .. versionadded:: 0.11.0
    """
    if type(statistic) == str:
        if statistic not in ['mean', 'median', 'count', 'sum', 'std']:
            raise ValueError('unrecognized statistic "%s"' % statistic)
    elif callable(statistic):
        pass
    else:
        raise ValueError("statistic not understood")
    # This code is based on np.histogramdd
    try:
        # Sample is an ND-array.
        N, D = sample.shape
    except (AttributeError, ValueError):
        # Sample is a sequence of 1D arrays.
        sample = np.atleast_2d(sample).T
        N, D = sample.shape
    nbin = np.empty(D, int)
    edges = D * [None]
    dedges = D * [None]
    try:
        M = len(bins)
        if M != D:
            raise AttributeError('The dimension of bins must be equal '
                                 'to the dimension of the sample x.')
    except TypeError:
        bins = D * [bins]
    # Select range for each dimension
    # Used only if number of bins is given.
    if range is None:
        smin = np.atleast_1d(np.array(sample.min(0), float))
        smax = np.atleast_1d(np.array(sample.max(0), float))
    else:
        smin = np.zeros(D)
        smax = np.zeros(D)
        for i in np.arange(D):
            smin[i], smax[i] = range[i]
    # Make sure the bins have a finite width.
    for i in np.arange(len(smin)):
        if smin[i] == smax[i]:
            smin[i] = smin[i] - .5
            smax[i] = smax[i] + .5
    # Create edge arrays
    for i in np.arange(D):
        if np.isscalar(bins[i]):
            nbin[i] = bins[i] + 2  # +2 for outlier bins
            edges[i] = np.linspace(smin[i], smax[i], nbin[i] - 1)
        else:
            edges[i] = np.asarray(bins[i], float)
            nbin[i] = len(edges[i]) + 1  # +1 for outlier bins
        dedges[i] = np.diff(edges[i])
    nbin = np.asarray(nbin)
    # Compute the bin number each sample falls into.
    Ncount = {}
    for i in np.arange(D):
        Ncount[i] = np.digitize(sample[:, i], edges[i])
    # Using digitize, values that fall on an edge are put in the right bin.
    # For the rightmost bin, we want values equal to the right
    # edge to be counted in the last bin, and not as an outlier.
    for i in np.arange(D):
        # Rounding precision
        decimal = int(-np.log10(dedges[i].min())) + 6
        # Find which points are on the rightmost edge.
        on_edge = np.where(np.around(sample[:, i], decimal)
                           == np.around(edges[i][-1], decimal))[0]
        # Shift these points one bin to the left.
        Ncount[i][on_edge] -= 1
    # Compute the sample indices in the flattened statistic matrix.
    ni = nbin.argsort()
    xy = np.zeros(N, int)
    for i in np.arange(0, D - 1):
        xy += Ncount[ni[i]] * nbin[ni[i + 1:]].prod()
    xy += Ncount[ni[-1]]
    result = np.empty(nbin.prod(), float)
    if statistic == 'mean':
        result.fill(np.nan)
        flatcount = np.bincount(xy, None)
        flatsum = np.bincount(xy, values)
        a = flatcount.nonzero()
        result[a] = flatsum[a] / flatcount[a]
    elif statistic == 'std':
        result.fill(0)
        flatcount = np.bincount(xy, None)
        flatsum = np.bincount(xy, values)
        flatsum2 = np.bincount(xy, values ** 2)
        a = flatcount.nonzero()
        result[a] = np.sqrt(flatsum2[a] / flatcount[a]
                            - (flatsum[a] / flatcount[a]) ** 2)
    elif statistic == 'count':
        result.fill(0)
        flatcount = np.bincount(xy, None)
        a = np.arange(len(flatcount))
        result[a] = flatcount
    elif statistic == 'sum':
        result.fill(0)
        flatsum = np.bincount(xy, values)
        a = np.arange(len(flatsum))
        result[a] = flatsum
    elif statistic == 'median':
        result.fill(np.nan)
        for i in np.unique(xy):
            result[i] = np.median(values[xy == i])
    elif callable(statistic):
        with warnings.catch_warnings():
            # Numpy generates a warnings for mean/std/... with empty list
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            old = np.seterr(invalid='ignore')
            try:
                null = statistic([])
            except:
                null = np.nan
            np.seterr(**old)
        result.fill(null)
        for i in np.unique(xy):
            result[i] = statistic(values[xy == i])
    # Shape into a proper matrix
    result = result.reshape(np.sort(nbin))
    for i in np.arange(nbin.size):
        j = ni.argsort()[i]
        result = result.swapaxes(i, j)
        ni[i], ni[j] = ni[j], ni[i]
    # Remove outliers (indices 0 and -1 for each dimension).
    core = D * [slice(1, -1)]
    result = result[core]
    if (result.shape != nbin - 2).any():
        raise RuntimeError('Internal Shape Error')
    return result, edges, xy
--- a/wafo/stats/_constants.py
+++ b/wafo/stats/_constants.py
@ -13,6 +13,10 @@ _EPS = np.finfo(float).eps
 # The largest [in magnitude] usable floating value.
 _XMAX = np.finfo(float).machar.xmax
 # The log of the largest usable floating value; useful for knowing
 # when exp(something) will overflow
 _LOGXMAX = np.log(_XMAX)
 # The smallest [in magnitude] usable floating value.
 _XMIN = np.finfo(float).machar.xmin
@ -21,4 +25,3 @@ _EULER = 0.577215664901532860606512090082402431042
 # special.zeta(3, 1)  Apery's constant
 _ZETA3 = 1.202056903159594285399738161511449990765
--- a/wafo/stats/_continuous_distns.py
+++ b/wafo/stats/_continuous_distns.py
--- a/wafo/stats/_discrete_distns.py
+++ b/wafo/stats/_discrete_distns.py
@ -5,15 +5,15 @@
 from __future__ import division, print_function, absolute_import
 from scipy import special
-from scipy.special import gammaln as gamln
+from scipy.special import entr, gammaln as gamln
 from scipy.misc import logsumexp
 from numpy import floor, ceil, log, exp, sqrt, log1p, expm1, tanh, cosh, sinh
 import numpy as np
 import numpy.random as mtrand
-from ._distn_infrastructure import (rv_discrete, _lazywhere, _ncx2_pdf,
+from ._distn_infrastructure import (
-                                    _ncx2_cdf, get_distribution_names)
+        rv_discrete, _lazywhere, _ncx2_pdf, _ncx2_cdf, get_distribution_names)
 class binom_gen(rv_discrete):
@ -31,11 +31,13 @@ class binom_gen(rv_discrete):
    `binom` takes ``n`` and ``p`` as shape parameters.
    %(after_notes)s
    %(example)s
    """
    def _rvs(self, n, p):
-        return mtrand.binomial(n, p, self._size)
+        return self._random_state.binomial(n, p, self._size)
    def _argcheck(self, n, p):
        self.b = n
@ -78,8 +80,7 @@ class binom_gen(rv_discrete):
    def _entropy(self, n, p):
        k = np.r_[0:n + 1]
        vals = self._pmf(k, n, p)
-        h = -np.sum(special.xlogy(vals, vals), axis=0)
+        return np.sum(entr(vals), axis=0)
        return h
 binom = binom_gen(name='binom')
@ -99,6 +100,8 @@ class bernoulli_gen(binom_gen):
    `bernoulli` takes ``p`` as shape parameter.
    %(after_notes)s
    %(example)s
    """
@ -127,8 +130,7 @@ class bernoulli_gen(binom_gen):
        return binom._stats(1, p)
    def _entropy(self, p):
-        h = -special.xlogy(p, p) - special.xlogy(1 - p, 1 - p)
+        return entr(p) + entr(1-p)
        return h
 bernoulli = bernoulli_gen(b=1, name='bernoulli')
@ -147,21 +149,23 @@ class nbinom_gen(rv_discrete):
    `nbinom` takes ``n`` and ``p`` as shape parameters.
    %(after_notes)s
    %(example)s
    """
    def _rvs(self, n, p):
-        return mtrand.negative_binomial(n, p, self._size)
+        return self._random_state.negative_binomial(n, p, self._size)
    def _argcheck(self, n, p):
-        return (n >= 0) & (p >= 0) & (p <= 1)
+        return (n > 0) & (p >= 0) & (p <= 1)
    def _pmf(self, x, n, p):
        return exp(self._logpmf(x, n, p))
    def _logpmf(self, x, n, p):
-        coeff = gamln(n + x) - gamln(x + 1) - gamln(n)
+        coeff = gamln(n+x) - gamln(x+1) - gamln(n)
-        return coeff + special.xlogy(n, p) + special.xlog1py(x, -p)
+        return coeff + n*log(p) + special.xlog1py(x, -p)
    def _cdf(self, x, n, p):
        k = floor(x)
@ -204,11 +208,13 @@ class geom_gen(rv_discrete):
    `geom` takes ``p`` as shape parameter.
    %(after_notes)s
    %(example)s
    """
    def _rvs(self, p):
-        return mtrand.geometric(p, size=self._size)
+        return self._random_state.geometric(p, size=self._size)
    def _argcheck(self, p):
        return (p <= 1) & (p >= 0)
@ -221,14 +227,14 @@ class geom_gen(rv_discrete):
    def _cdf(self, x, p):
        k = floor(x)
-        return -expm1(log1p(-p) * k)
+        return -expm1(log1p(-p)*k)
    def _sf(self, x, p):
        return np.exp(self._logsf(x, p))
    def _logsf(self, x, p):
        k = floor(x)
-        return k * log1p(-p)
+        return k*log1p(-p)
    def _ppf(self, q, p):
        vals = ceil(log1p(-q) / log1p(-p))
@ -262,6 +268,8 @@ class hypergeom_gen(rv_discrete):
        pmf(k, M, n, N) = choose(n, k) * choose(M - n, N - k) / choose(M, N),
                                       for max(0, N - (M-n)) <= k <= min(n, N)
    %(after_notes)s
    Examples
    --------
    >>> from scipy.stats import hypergeom
@ -297,10 +305,10 @@ class hypergeom_gen(rv_discrete):
    """
    def _rvs(self, M, n, N):
-        return mtrand.hypergeometric(n, M-n, N, size=self._size)
+        return self._random_state.hypergeometric(n, M-n, N, size=self._size)
    def _argcheck(self, M, n, N):
-        cond = rv_discrete._argcheck(self, M, n, N)
+        cond = (M > 0) & (n >= 0) & (N >= 0)
        cond &= (n <= M) & (N <= M)
        self.a = max(N-(M-n), 0)
        self.b = min(n, N)
@ -338,8 +346,7 @@ class hypergeom_gen(rv_discrete):
    def _entropy(self, M, n, N):
        k = np.r_[N - (M - n):min(n, N) + 1]
        vals = self.pmf(k, M, n, N)
-        h = -np.sum(special.xlogy(vals, vals), axis=0)
+        return np.sum(entr(vals), axis=0)
        return h
    def _sf(self, k, M, n, N):
        """More precise calculation, 1 - cdf doesn't cut it."""
@ -354,6 +361,17 @@ class hypergeom_gen(rv_discrete):
            k2 = np.arange(quant + 1, draw + 1)
            res.append(np.sum(self._pmf(k2, tot, good, draw)))
        return np.asarray(res)
    def _logsf(self, k, M, n, N):
        """
        More precise calculation than log(sf)
        """
        res = []
        for quant, tot, good, draw in zip(k, M, n, N):
            # Integration over probability mass function using logsumexp
            k2 = np.arange(quant + 1, draw + 1)
            res.append(logsumexp(self._logpmf(k2, tot, good, draw)))
        return np.asarray(res)
 hypergeom = hypergeom_gen(name='hypergeom')
@ -373,13 +391,15 @@ class logser_gen(rv_discrete):
    `logser` takes ``p`` as shape parameter.
    %(after_notes)s
    %(example)s
    """
    def _rvs(self, p):
        # looks wrong for p>0.5, too few k=1
        # trying to use generic is worse, no k=1 at all
-        return mtrand.logseries(p, size=self._size)
+        return self._random_state.logseries(p, size=self._size)
    def _argcheck(self, p):
        return (p > 0) & (p < 1)
@ -419,14 +439,21 @@ class poisson_gen(rv_discrete):
    `poisson` takes ``mu`` as shape parameter.
    %(after_notes)s
    %(example)s
    """
    # Override rv_discrete._argcheck to allow mu=0.
    def _argcheck(self, mu):
        return mu >= 0
    def _rvs(self, mu):
-        return mtrand.poisson(mu, self._size)
+        return self._random_state.poisson(mu, self._size)
    def _logpmf(self, k, mu):
-        Pk = k*log(mu)-gamln(k+1) - mu
+        Pk = special.xlogy(k, mu) - gamln(k + 1) - mu
        return Pk
    def _pmf(self, k, mu):
@ -449,9 +476,11 @@ class poisson_gen(rv_discrete):
    def _stats(self, mu):
        var = mu
        tmp = np.asarray(mu)
-        g1 = sqrt(1.0 / tmp)
+        mu_nonzero = tmp > 0
-        g2 = 1.0 / tmp
+        g1 = _lazywhere(mu_nonzero, (tmp,), lambda x: sqrt(1.0/x), np.inf)
        g2 = _lazywhere(mu_nonzero, (tmp,), lambda x: 1.0/x, np.inf)
        return mu, var, g1, g2
 poisson = poisson_gen(name="poisson", longname='A Poisson')
@ -470,6 +499,8 @@ class planck_gen(rv_discrete):
    `planck` takes ``lambda_`` as shape parameter.
    %(after_notes)s
    %(example)s
    """
@ -487,7 +518,7 @@ class planck_gen(rv_discrete):
    def _pmf(self, k, lambda_):
        fact = -expm1(-lambda_)
-        return fact * exp(-lambda_ * k)
+        return fact*exp(-lambda_*k)
    def _cdf(self, x, lambda_):
        k = floor(x)
@ -528,12 +559,14 @@ class boltzmann_gen(rv_discrete):
    `boltzmann` takes ``lambda_`` and ``N`` as shape parameters.
    %(after_notes)s
    %(example)s
    """
    def _pmf(self, k, lambda_, N):
        fact = (expm1(-lambda_)) / (expm1(-lambda_ * N))
-        return fact * exp(-lambda_ * k)
+        return fact*exp(-lambda_*k)
    def _cdf(self, x, lambda_, N):
        k = floor(x)
@ -559,7 +592,7 @@ class boltzmann_gen(rv_discrete):
        g2 = g2 / trm2 / trm2
        return mu, var, g1, g2
 boltzmann = boltzmann_gen(name='boltzmann',
-                          longname='A truncated discrete exponential ')
+        longname='A truncated discrete exponential ')
 class randint_gen(rv_discrete):
@ -577,8 +610,7 @@ class randint_gen(rv_discrete):
    `randint` takes ``low`` and ``high`` as shape parameters.
-    Note the difference to the numpy ``random_integers`` which
+    %(after_notes)s
    returns integers on a *closed* interval ``[low, high]``.
    %(example)s
@ -616,7 +648,7 @@ class randint_gen(rv_discrete):
        If ``high`` is ``None``, then range is >=0  and < low
        """
-        return mtrand.randint(low, high, self._size)
+        return self._random_state.randint(low, high, self._size)
    def _entropy(self, low, high):
        return log(high - low)
@ -624,21 +656,6 @@ randint = randint_gen(name='randint', longname='A discrete uniform '
                      '(random integer)')
 def harmonic(n, r):
    return (1./n + special.polygamma(r-1, n)/special.gamma(r) +
            special.zeta(r, 1))
 def H(n):
    """Returns the n-th harmonic number.
       http://en.wikipedia.org/wiki/Harmonic_number
    """
    # Euler-Mascheroni constant
    gamma = 0.57721566490153286060651209008240243104215933593992
    return gamma + special.digamma(n+1)
 # FIXME: problems sampling.
 class zipf_gen(rv_discrete):
    """A Zipf discrete random variable.
@ -655,11 +672,13 @@ class zipf_gen(rv_discrete):
    `zipf` takes ``a`` as shape parameter.
    %(after_notes)s
    %(example)s
    """
    def _rvs(self, a):
-        return mtrand.zipf(a, size=self._size)
+        return self._random_state.zipf(a, size=self._size)
    def _argcheck(self, a):
        return a > 1
@ -691,6 +710,8 @@ class dlaplace_gen(rv_discrete):
    `dlaplace` takes ``a`` as shape parameter.
    %(after_notes)s
    %(example)s
    """
@ -705,9 +726,8 @@ class dlaplace_gen(rv_discrete):
    def _ppf(self, q, a):
        const = 1 + exp(a)
-        vals = ceil(np.where(q < 1.0 / (1 + exp(-a)),
+        vals = ceil(np.where(q < 1.0 / (1 + exp(-a)), log(q*const) / a - 1,
-                             log(q*const) / a - 1,
+                                                      -log((1-q) * const) / a))
                             -log((1-q) * const) / a))
        vals1 = vals - 1
        return np.where(self._cdf(vals1, a) >= q, vals1, vals)
@ -746,25 +766,28 @@ class skellam_gen(rv_discrete):
    `skellam` takes ``mu1`` and ``mu2`` as shape parameters.
    %(after_notes)s
    %(example)s
    """
    def _rvs(self, mu1, mu2):
        n = self._size
-        return mtrand.poisson(mu1, n) - mtrand.poisson(mu2, n)
+        return (self._random_state.poisson(mu1, n) -
                self._random_state.poisson(mu2, n))
    def _pmf(self, x, mu1, mu2):
        px = np.where(x < 0,
-                      _ncx2_pdf(2*mu2, 2*(1-x), 2*mu1)*2,
+                _ncx2_pdf(2*mu2, 2*(1-x), 2*mu1)*2,
-                      _ncx2_pdf(2*mu1, 2*(1+x), 2*mu2)*2)
+                _ncx2_pdf(2*mu1, 2*(1+x), 2*mu2)*2)
        # ncx2.pdf() returns nan's for extremely low probabilities
        return px
    def _cdf(self, x, mu1, mu2):
        x = floor(x)
        px = np.where(x < 0,
-                      _ncx2_cdf(2*mu2, -2*x, 2*mu1),
+                _ncx2_cdf(2*mu2, -2*x, 2*mu1),
-                      1-_ncx2_cdf(2*mu1, 2*(x+1), 2*mu2))
+                1-_ncx2_cdf(2*mu1, 2*(x+1), 2*mu2))
        return px
    def _stats(self, mu1, mu2):
--- a/wafo/stats/_distn_infrastructure.py
+++ b/wafo/stats/_distn_infrastructure.py
--- a/wafo/stats/_distr_params.py
+++ b/wafo/stats/_distr_params.py
@ -10,6 +10,7 @@ distcont = [
    ['betaprime', (5, 6)],
    ['bradford', (0.29891359763170633,)],
    ['burr', (10.5, 4.3)],
    ['burr12', (10, 4)],
    ['cauchy', ()],
    ['chi', (78,)],
    ['chi2', (55,)],
@ -18,6 +19,7 @@ distcont = [
    ['dweibull', (2.0685080649914673,)],
    ['erlang', (10,)],
    ['expon', ()],
    ['exponnorm', (1.5,)],
    ['exponpow', (2.697119160358469,)],
    ['exponweib', (2.8923945291034436, 1.9505288745913174)],
    ['f', (29, 18)],
@ -33,8 +35,11 @@ distcont = [
    ['genexpon', (9.1325976465418908, 16.231956600590632, 3.2819552690843983)],
    ['genextreme', (-0.1,)],
    ['gengamma', (4.4162385429431925, 3.1193091679242761)],
    ['gengamma', (4.4162385429431925, -3.1193091679242761)],
    ['genhalflogistic', (0.77274727809929322,)],
    ['genlogistic', (0.41192440799679475,)],
    ['gennorm', (1.2988442399460265,)],
    ['halfgennorm', (0.6748054997000371,)],
    ['genpareto', (0.1,)],   # use case with finite moments
    ['gilbrat', ()],
    ['gompertz', (0.94743713075105251,)],
@ -80,6 +85,7 @@ distcont = [
    ['reciprocal', (0.0062309367010521255, 1.0062309367010522)],
    ['rice', (0.7749725210111873,)],
    ['semicircular', ()],
    ['skewnorm', (4.0,)],
    ['t', (2.7433514990818093,)],
    ['triang', (0.15785029824528218,)],
    ['truncexpon', (4.6907725456810478,)],
@ -113,4 +119,3 @@ distdiscrete = [
    ['skellam', (15, 8)],
    ['zipf', (6.5,)]
 ]
--- a/wafo/stats/_multivariate.py
+++ b/wafo/stats/_multivariate.py
@ -1,884 +0,0 @@
 #
 # Author: Joris Vankerschaver 2013
 #
 from __future__ import division, print_function, absolute_import
 import numpy as np
 import scipy.linalg
 from scipy.misc import doccer
 from scipy.special import gammaln
 __all__ = ['multivariate_normal', 'dirichlet']
 _LOG_2PI = np.log(2 * np.pi)
 def _process_parameters(dim, mean, cov):
    """
    Infer dimensionality from mean or covariance matrix, ensure that
    mean and covariance are full vector resp. matrix.
    """
    # Try to infer dimensionality
    if dim is None:
        if mean is None:
            if cov is None:
                dim = 1
            else:
                cov = np.asarray(cov, dtype=float)
                if cov.ndim < 2:
                    dim = 1
                else:
                    dim = cov.shape[0]
        else:
            mean = np.asarray(mean, dtype=float)
            dim = mean.size
    else:
        if not np.isscalar(dim):
            raise ValueError("Dimension of random variable must be a scalar.")
    # Check input sizes and return full arrays for mean and cov if necessary
    if mean is None:
        mean = np.zeros(dim)
    mean = np.asarray(mean, dtype=float)
    if cov is None:
        cov = 1.0
    cov = np.asarray(cov, dtype=float)
    if dim == 1:
        mean.shape = (1,)
        cov.shape = (1, 1)
    if mean.ndim != 1 or mean.shape[0] != dim:
        raise ValueError("Array 'mean' must be a vector of length %d." % dim)
    if cov.ndim == 0:
        cov = cov * np.eye(dim)
    elif cov.ndim == 1:
        cov = np.diag(cov)
    elif cov.ndim == 2 and cov.shape != (dim, dim):
        rows, cols = cov.shape
        if rows != cols:
            msg = ("Array 'cov' must be square if it is two dimensional,"
                   " but cov.shape = %s." % str(cov.shape))
        else:
            msg = ("Dimension mismatch: array 'cov' is of shape %s,"
                   " but 'mean' is a vector of length %d.")
            msg = msg % (str(cov.shape), len(mean))
        raise ValueError(msg)
    elif cov.ndim > 2:
        raise ValueError("Array 'cov' must be at most two-dimensional,"
                         " but cov.ndim = %d" % cov.ndim)
    return dim, mean, cov
 def _process_quantiles(x, dim):
    """
    Adjust quantiles array so that last axis labels the components of
    each data point.
    """
    x = np.asarray(x, dtype=float)
    if x.ndim == 0:
        x = x[np.newaxis]
    elif x.ndim == 1:
        if dim == 1:
            x = x[:, np.newaxis]
        else:
            x = x[np.newaxis, :]
    return x
 def _squeeze_output(out):
    """
    Remove single-dimensional entries from array and convert to scalar,
    if necessary.
    """
    out = out.squeeze()
    if out.ndim == 0:
        out = out[()]
    return out
 def _eigvalsh_to_eps(spectrum, cond=None, rcond=None):
    """
    Determine which eigenvalues are "small" given the spectrum.
    This is for compatibility across various linear algebra functions
    that should agree about whether or not a Hermitian matrix is numerically
    singular and what is its numerical matrix rank.
    This is designed to be compatible with scipy.linalg.pinvh.
    Parameters
    ----------
    spectrum : 1d ndarray
        Array of eigenvalues of a Hermitian matrix.
    cond, rcond : float, optional
        Cutoff for small eigenvalues.
        Singular values smaller than rcond * largest_eigenvalue are
        considered zero.
        If None or -1, suitable machine precision is used.
    Returns
    -------
    eps : float
        Magnitude cutoff for numerical negligibility.
    """
    if rcond is not None:
        cond = rcond
    if cond in [None, -1]:
        t = spectrum.dtype.char.lower()
        factor = {'f': 1E3, 'd': 1E6}
        cond = factor[t] * np.finfo(t).eps
    eps = cond * np.max(abs(spectrum))
    return eps
 def _pinv_1d(v, eps=1e-5):
    """
    A helper function for computing the pseudoinverse.
    Parameters
    ----------
    v : iterable of numbers
        This may be thought of as a vector of eigenvalues or singular values.
    eps : float
        Values with magnitude no greater than eps are considered negligible.
    Returns
    -------
    v_pinv : 1d float ndarray
        A vector of pseudo-inverted numbers.
    """
    return np.array([0 if abs(x) <= eps else 1/x for x in v], dtype=float)
 class _PSD(object):
    """
    Compute coordinated functions of a symmetric positive semidefinite matrix.
    This class addresses two issues.  Firstly it allows the pseudoinverse,
    the logarithm of the pseudo-determinant, and the rank of the matrix
    to be computed using one call to eigh instead of three.
    Secondly it allows these functions to be computed in a way
    that gives mutually compatible results.
    All of the functions are computed with a common understanding as to
    which of the eigenvalues are to be considered negligibly small.
    The functions are designed to coordinate with scipy.linalg.pinvh()
    but not necessarily with np.linalg.det() or with np.linalg.matrix_rank().
    Parameters
    ----------
    M : 2d array-like
        Symmetric positive semidefinite matrix.
    cond, rcond : float, optional
        Cutoff for small eigenvalues.
        Singular values smaller than rcond * largest_eigenvalue are
        considered zero.
        If None or -1, suitable machine precision is used.
    lower : bool, optional
        Whether the pertinent array data is taken from the lower
        or upper triangle of M. (Default: lower)
    check_finite : bool, optional
        Whether to check that the input matrices contain only finite
        numbers. Disabling may give a performance gain, but may result
        in problems (crashes, non-termination) if the inputs do contain
        infinities or NaNs.
    allow_singular : bool, optional
        Whether to allow a singular matrix.  (Default: True)
    Notes
    -----
    The arguments are similar to those of scipy.linalg.pinvh().
    """
    def __init__(self, M, cond=None, rcond=None, lower=True,
                 check_finite=True, allow_singular=True):
        # Compute the symmetric eigendecomposition.
        # Note that eigh takes care of array conversion, chkfinite,
        # and assertion that the matrix is square.
        s, u = scipy.linalg.eigh(M, lower=lower, check_finite=check_finite)
        eps = _eigvalsh_to_eps(s, cond, rcond)
        if np.min(s) < -eps:
            raise ValueError('the input matrix must be positive semidefinite')
        d = s[s > eps]
        if len(d) < len(s) and not allow_singular:
            raise np.linalg.LinAlgError('singular matrix')
        s_pinv = _pinv_1d(s, eps)
        U = np.multiply(u, np.sqrt(s_pinv))
        # Initialize the eagerly precomputed attributes.
        self.rank = len(d)
        self.U = U
        self.log_pdet = np.sum(np.log(d))
        # Initialize an attribute to be lazily computed.
        self._pinv = None
    @property
    def pinv(self):
        if self._pinv is None:
            self._pinv = np.dot(self.U, self.U.T)
        return self._pinv
 _doc_default_callparams = """\
 mean : array_like, optional
    Mean of the distribution (default zero)
 cov : array_like, optional
    Covariance matrix of the distribution (default one)
 allow_singular : bool, optional
    Whether to allow a singular covariance matrix.  (Default: False)
 """
 _doc_callparams_note = \
    """Setting the parameter `mean` to `None` is equivalent to having `mean`
    be the zero-vector. The parameter `cov` can be a scalar, in which case
    the covariance matrix is the identity times that value, a vector of
    diagonal entries for the covariance matrix, or a two-dimensional
    array_like.
    """
 _doc_frozen_callparams = ""
 _doc_frozen_callparams_note = \
    """See class definition for a detailed description of parameters."""
 docdict_params = {
    '_doc_default_callparams': _doc_default_callparams,
    '_doc_callparams_note': _doc_callparams_note
 }
 docdict_noparams = {
    '_doc_default_callparams': _doc_frozen_callparams,
    '_doc_callparams_note': _doc_frozen_callparams_note
 }
 class multivariate_normal_gen(object):
    r"""
    A multivariate normal random variable.
    The `mean` keyword specifies the mean. The `cov` keyword specifies the
    covariance matrix.
    Methods
    -------
    pdf(x, mean=None, cov=1, allow_singular=False)
        Probability density function.
    logpdf(x, mean=None, cov=1, allow_singular=False)
        Log of the probability density function.
    rvs(mean=None, cov=1, allow_singular=False, size=1)
        Draw random samples from a multivariate normal distribution.
    entropy()
        Compute the differential entropy of the multivariate normal.
    Parameters
    ----------
    x : array_like
        Quantiles, with the last axis of `x` denoting the components.
    %(_doc_default_callparams)s
    Alternatively, the object may be called (as a function) to fix the mean
    and covariance parameters, returning a "frozen" multivariate normal
    random variable:
    rv = multivariate_normal(mean=None, cov=1, allow_singular=False)
        - Frozen object with the same methods but holding the given
          mean and covariance fixed.
    Notes
    -----
    %(_doc_callparams_note)s
    The covariance matrix `cov` must be a (symmetric) positive
    semi-definite matrix. The determinant and inverse of `cov` are computed
    as the pseudo-determinant and pseudo-inverse, respectively, so
    that `cov` does not need to have full rank.
    The probability density function for `multivariate_normal` is
    .. math::
        f(x) = \frac{1}{\sqrt{(2 \pi)^k \det \Sigma}} \exp\left( -\frac{1}{2} (x - \mu)^T \Sigma^{-1} (x - \mu) \right),
    where :math:`\mu` is the mean, :math:`\Sigma` the covariance matrix,
    and :math:`k` is the dimension of the space where :math:`x` takes values.
    .. versionadded:: 0.14.0
    Examples
    --------
    >>> import matplotlib.pyplot as plt
    >>> from scipy.stats import multivariate_normal
    >>> x = np.linspace(0, 5, 10, endpoint=False)
    >>> y = multivariate_normal.pdf(x, mean=2.5, cov=0.5); y
    array([ 0.00108914,  0.01033349,  0.05946514,  0.20755375,  0.43939129,
            0.56418958,  0.43939129,  0.20755375,  0.05946514,  0.01033349])
    >>> plt.plot(x, y)
    The input quantiles can be any shape of array, as long as the last
    axis labels the components.  This allows us for instance to
    display the frozen pdf for a non-isotropic random variable in 2D as
    follows:
    >>> x, y = np.mgrid[-1:1:.01, -1:1:.01]
    >>> pos = np.empty(x.shape + (2,))
    >>> pos[:, :, 0] = x; pos[:, :, 1] = y
    >>> rv = multivariate_normal([0.5, -0.2], [[2.0, 0.3], [0.3, 0.5]])
    >>> plt.contourf(x, y, rv.pdf(pos))
    """
    def __init__(self):
        self.__doc__ = doccer.docformat(self.__doc__, docdict_params)
    def __call__(self, mean=None, cov=1, allow_singular=False):
        """
        Create a frozen multivariate normal distribution.
        See `multivariate_normal_frozen` for more information.
        """
        return multivariate_normal_frozen(mean, cov,
                                          allow_singular=allow_singular)
    def _logpdf(self, x, mean, prec_U, log_det_cov, rank):
        """
        Parameters
        ----------
        x : ndarray
            Points at which to evaluate the log of the probability
            density function
        mean : ndarray
            Mean of the distribution
        prec_U : ndarray
            A decomposition such that np.dot(prec_U, prec_U.T)
            is the precision matrix, i.e. inverse of the covariance matrix.
        log_det_cov : float
            Logarithm of the determinant of the covariance matrix
        rank : int
            Rank of the covariance matrix.
        Notes
        -----
        As this function does no argument checking, it should not be
        called directly; use 'logpdf' instead.
        """
        dev = x - mean
        maha = np.sum(np.square(np.dot(dev, prec_U)), axis=-1)
        return -0.5 * (rank * _LOG_2PI + log_det_cov + maha)
    def logpdf(self, x, mean, cov, allow_singular=False):
        """
        Log of the multivariate normal probability density function.
        Parameters
        ----------
        x : array_like
            Quantiles, with the last axis of `x` denoting the components.
        %(_doc_default_callparams)s
        Notes
        -----
        %(_doc_callparams_note)s
        Returns
        -------
        pdf : ndarray
            Log of the probability density function evaluated at `x`
        """
        dim, mean, cov = _process_parameters(None, mean, cov)
        x = _process_quantiles(x, dim)
        psd = _PSD(cov, allow_singular=allow_singular)
        out = self._logpdf(x, mean, psd.U, psd.log_pdet, psd.rank)
        return _squeeze_output(out)
    def pdf(self, x, mean, cov, allow_singular=False):
        """
        Multivariate normal probability density function.
        Parameters
        ----------
        x : array_like
            Quantiles, with the last axis of `x` denoting the components.
        %(_doc_default_callparams)s
        Notes
        -----
        %(_doc_callparams_note)s
        Returns
        -------
        pdf : ndarray
            Probability density function evaluated at `x`
        """
        dim, mean, cov = _process_parameters(None, mean, cov)
        x = _process_quantiles(x, dim)
        psd = _PSD(cov, allow_singular=allow_singular)
        out = np.exp(self._logpdf(x, mean, psd.U, psd.log_pdet, psd.rank))
        return _squeeze_output(out)
    def rvs(self, mean=None, cov=1, size=1):
        """
        Draw random samples from a multivariate normal distribution.
        Parameters
        ----------
        %(_doc_default_callparams)s
        size : integer, optional
            Number of samples to draw (default 1).
        Notes
        -----
        %(_doc_callparams_note)s
        Returns
        -------
        rvs : ndarray or scalar
            Random variates of size (`size`, `N`), where `N` is the
            dimension of the random variable.
        """
        dim, mean, cov = _process_parameters(None, mean, cov)
        out = np.random.multivariate_normal(mean, cov, size)
        return _squeeze_output(out)
    def entropy(self, mean=None, cov=1):
        """
        Compute the differential entropy of the multivariate normal.
        Parameters
        ----------
        %(_doc_default_callparams)s
        Notes
        -----
        %(_doc_callparams_note)s
        Returns
        -------
        h : scalar
            Entropy of the multivariate normal distribution
        """
        dim, mean, cov = _process_parameters(None, mean, cov)
        return 0.5 * np.log(np.linalg.det(2 * np.pi * np.e * cov))
 multivariate_normal = multivariate_normal_gen()
 class multivariate_normal_frozen(object):
    def __init__(self, mean=None, cov=1, allow_singular=False):
        """
        Create a frozen multivariate normal distribution.
        Parameters
        ----------
        mean : array_like, optional
            Mean of the distribution (default zero)
        cov : array_like, optional
            Covariance matrix of the distribution (default one)
        allow_singular : bool, optional
            If this flag is True then tolerate a singular
            covariance matrix (default False).
        Examples
        --------
        When called with the default parameters, this will create a 1D random
        variable with mean 0 and covariance 1:
        >>> from scipy.stats import multivariate_normal
        >>> r = multivariate_normal()
        >>> r.mean
        array([ 0.])
        >>> r.cov
        array([[1.]])
        """
        self.dim, self.mean, self.cov = _process_parameters(None, mean, cov)
        self.cov_info = _PSD(self.cov, allow_singular=allow_singular)
        self._mnorm = multivariate_normal_gen()
    def logpdf(self, x):
        x = _process_quantiles(x, self.dim)
        out = self._mnorm._logpdf(x, self.mean, self.cov_info.U,
                                  self.cov_info.log_pdet, self.cov_info.rank)
        return _squeeze_output(out)
    def pdf(self, x):
        return np.exp(self.logpdf(x))
    def rvs(self, size=1):
        return self._mnorm.rvs(self.mean, self.cov, size)
    def entropy(self):
        """
        Computes the differential entropy of the multivariate normal.
        Returns
        -------
        h : scalar
            Entropy of the multivariate normal distribution
        """
        log_pdet = self.cov_info.log_pdet
        rank = self.cov_info.rank
        return 0.5 * (rank * (_LOG_2PI + 1) + log_pdet)
 # Set frozen generator docstrings from corresponding docstrings in
 # multivariate_normal_gen and fill in default strings in class docstrings
 for name in ['logpdf', 'pdf', 'rvs']:
    method = multivariate_normal_gen.__dict__[name]
    method_frozen = multivariate_normal_frozen.__dict__[name]
    method_frozen.__doc__ = doccer.docformat(method.__doc__, docdict_noparams)
    method.__doc__ = doccer.docformat(method.__doc__, docdict_params)
 _dirichlet_doc_default_callparams = """\
 alpha : array_like
    The concentration parameters. The number of entries determines the
    dimensionality of the distribution.
 """
 _dirichlet_doc_frozen_callparams = ""
 _dirichlet_doc_frozen_callparams_note = \
    """See class definition for a detailed description of parameters."""
 dirichlet_docdict_params = {
    '_dirichlet_doc_default_callparams': _dirichlet_doc_default_callparams,
 }
 dirichlet_docdict_noparams = {
    '_dirichlet_doc_default_callparams': _dirichlet_doc_frozen_callparams,
 }
 def _dirichlet_check_parameters(alpha):
    alpha = np.asarray(alpha)
    if np.min(alpha) <= 0:
        raise ValueError("All parameters must be greater than 0")
    elif alpha.ndim != 1:
        raise ValueError("Parameter vector 'a' must be one dimensional, " +
                         "but a.shape = %s." % str(alpha.shape))
    return alpha
 def _dirichlet_check_input(alpha, x):
    x = np.asarray(x)
    if x.shape[0] + 1 != alpha.shape[0] and x.shape[0] != alpha.shape[0]:
        raise ValueError("Vector 'x' must have one entry less then the" +
                         " parameter vector 'a', but alpha.shape = " +
                         "%s and " % alpha.shape +
                         "x.shape = %s." % x.shape)
    if x.shape[0] != alpha.shape[0]:
        xk = np.array([1 - np.sum(x, 0)])
        if xk.ndim == 1:
            x = np.append(x, xk)
        elif xk.ndim == 2:
            x = np.vstack((x, xk))
        else:
            raise ValueError("The input must be one dimensional or a two "
                             "dimensional matrix containing the entries.")
    if np.min(x) < 0:
        raise ValueError("Each entry in 'x' must be greater or equal zero.")
    if np.max(x) > 1:
        raise ValueError("Each entry in 'x' must be smaller or equal one.")
    if (np.abs(np.sum(x, 0) - 1.0) > 10e-10).any():
        raise ValueError("The input vector 'x' must lie within the normal " +
                         "simplex. but sum(x)=%f." % np.sum(x, 0))
    return x
 def _lnB(alpha):
    r"""
    Internal helper function to compute the log of the useful quotient
    .. math::
        B(\alpha) = \frac{\prod_{i=1}{K}\Gamma(\alpha_i)}{\Gamma\left(\sum_{i=1}^{K}\alpha_i\right)}
    Parameters
    ----------
    %(_dirichlet_doc_default_callparams)s
    Returns
    -------
    B : scalar
        Helper quotient, internal use only
    """
    return np.sum(gammaln(alpha)) - gammaln(np.sum(alpha))
 class dirichlet_gen(object):
    r"""
    A Dirichlet random variable.
    The `alpha` keyword specifies the concentration parameters of the
    distribution.
    .. versionadded:: 0.15.0
    Methods
    -------
    pdf(x, alpha)
        Probability density function.
    logpdf(x, alpha)
        Log of the probability density function.
    rvs(alpha, size=1)
        Draw random samples from a Dirichlet distribution.
    mean(alpha)
        The mean of the Dirichlet distribution
    var(alpha)
        The variance of the Dirichlet distribution
    entropy(alpha)
        Compute the differential entropy of the multivariate normal.
    Parameters
    ----------
    x : array_like
        Quantiles, with the last axis of `x` denoting the components.
    %(_dirichlet_doc_default_callparams)s
    Alternatively, the object may be called (as a function) to fix
    concentration parameters, returning a "frozen" Dirichlet
    random variable:
    rv = dirichlet(alpha)
        - Frozen object with the same methods but holding the given
          concentration parameters fixed.
    Notes
    -----
    Each :math:`\alpha` entry must be positive. The distribution has only
    support on the simplex defined by
    .. math::
        \sum_{i=1}^{K} x_i \le 1
    The probability density function for `dirichlet` is
    .. math::
        f(x) = \frac{1}{\mathrm{B}(\boldsymbol\alpha)} \prod_{i=1}^K x_i^{\alpha_i - 1}
    where
    .. math::
        \mathrm{B}(\boldsymbol\alpha) = \frac{\prod_{i=1}^K \Gamma(\alpha_i)}{\Gamma\bigl(\sum_{i=1}^K \alpha_i\bigr)}
    and :math:`\boldsymbol\alpha=(\alpha_1,\ldots,\alpha_K)`, the
    concentration parameters and :math:`K` is the dimension of the space
    where :math:`x` takes values.
    """
    def __init__(self):
        self.__doc__ = doccer.docformat(self.__doc__, dirichlet_docdict_params)
    def __call__(self, alpha):
        return dirichlet_frozen(alpha)
    def _logpdf(self, x, alpha):
        """
        Parameters
        ----------
        x : ndarray
            Points at which to evaluate the log of the probability
            density function
        %(_dirichlet_doc_default_callparams)s
        Notes
        -----
        As this function does no argument checking, it should not be
        called directly; use 'logpdf' instead.
        """
        lnB = _lnB(alpha)
        return - lnB + np.sum((np.log(x.T) * (alpha - 1)).T, 0)
    def logpdf(self, x, alpha):
        """
        Log of the Dirichlet probability density function.
        Parameters
        ----------
        x : array_like
            Quantiles, with the last axis of `x` denoting the components.
        %(_dirichlet_doc_default_callparams)s
        Returns
        -------
        pdf : ndarray
            Log of the probability density function evaluated at `x`
        """
        alpha = _dirichlet_check_parameters(alpha)
        x = _dirichlet_check_input(alpha, x)
        out = self._logpdf(x, alpha)
        return _squeeze_output(out)
    def pdf(self, x, alpha):
        """
        The Dirichlet probability density function.
        Parameters
        ----------
        x : array_like
            Quantiles, with the last axis of `x` denoting the components.
        %(_dirichlet_doc_default_callparams)s
        Returns
        -------
        pdf : ndarray
            The probability density function evaluated at `x`
        """
        alpha = _dirichlet_check_parameters(alpha)
        x = _dirichlet_check_input(alpha, x)
        out = np.exp(self._logpdf(x, alpha))
        return _squeeze_output(out)
    def mean(self, alpha):
        """
        Compute the mean of the dirichlet distribution.
        Parameters
        ----------
        %(_dirichlet_doc_default_callparams)s
        Returns
        -------
        mu : scalar
            Mean of the Dirichlet distribution
        """
        alpha = _dirichlet_check_parameters(alpha)
        out = alpha / (np.sum(alpha))
        return _squeeze_output(out)
    def var(self, alpha):
        """
        Compute the variance of the dirichlet distribution.
        Parameters
        ----------
        %(_dirichlet_doc_default_callparams)s
        Returns
        -------
        v : scalar
            Variance of the Dirichlet distribution
        """
        alpha = _dirichlet_check_parameters(alpha)
        alpha0 = np.sum(alpha)
        out = (alpha * (alpha0 - alpha)) / ((alpha0 * alpha0) * (alpha0 + 1))
        return out
    def entropy(self, alpha):
        """
        Compute the differential entropy of the dirichlet distribution.
        Parameters
        ----------
        %(_dirichlet_doc_default_callparams)s
        Returns
        -------
        h : scalar
            Entropy of the Dirichlet distribution
        """
        alpha = _dirichlet_check_parameters(alpha)
        alpha0 = np.sum(alpha)
        lnB = _lnB(alpha)
        K = alpha.shape[0]
        out = lnB + (alpha0 - K) * scipy.special.psi(alpha0) - np.sum(
            (alpha - 1) * scipy.special.psi(alpha))
        return _squeeze_output(out)
    def rvs(self, alpha, size=1):
        """
        Draw random samples from a Dirichlet distribution.
        Parameters
        ----------
        %(_dirichlet_doc_default_callparams)s
        size : integer, optional
            Number of samples to draw (default 1).
        Returns
        -------
        rvs : ndarray or scalar
            Random variates of size (`size`, `N`), where `N` is the
            dimension of the random variable.
        """
        alpha = _dirichlet_check_parameters(alpha)
        return np.random.dirichlet(alpha, size=size)
 dirichlet = dirichlet_gen()
 class dirichlet_frozen(object):
    def __init__(self, alpha):
        self.alpha = _dirichlet_check_parameters(alpha)
        self._dirichlet = dirichlet_gen()
    def logpdf(self, x):
        return self._dirichlet.logpdf(x, self.alpha)
    def pdf(self, x):
        return self._dirichlet.pdf(x, self.alpha)
    def mean(self):
        return self._dirichlet.mean(self.alpha)
    def var(self):
        return self._dirichlet.var(self.alpha)
    def entropy(self):
        return self._dirichlet.entropy(self.alpha)
    def rvs(self, size=1):
        return self._dirichlet.rvs(self.alpha, size)
 # Set frozen generator docstrings from corresponding docstrings in
 # multivariate_normal_gen and fill in default strings in class docstrings
 for name in ['logpdf', 'pdf', 'rvs', 'mean', 'var', 'entropy']:
    method = dirichlet_gen.__dict__[name]
    method_frozen = dirichlet_frozen.__dict__[name]
    method_frozen.__doc__ = doccer.docformat(
        method.__doc__, dirichlet_docdict_noparams)
    method.__doc__ = doccer.docformat(method.__doc__, dirichlet_docdict_params)
--- a/wafo/stats/_tukeylambda_stats.py
+++ b/wafo/stats/_tukeylambda_stats.py
@ -1,201 +0,0 @@
 from __future__ import division, print_function, absolute_import
 import numpy as np
 from numpy import poly1d
 from scipy.special import beta
 # The following code was used to generate the Pade coefficients for the
 # Tukey Lambda variance function.  Version 0.17 of mpmath was used.
 #---------------------------------------------------------------------------
 # import mpmath as mp
 #
 # mp.mp.dps = 60
 #
 # one   = mp.mpf(1)
 # two   = mp.mpf(2)
 #
 # def mpvar(lam):
 #     if lam == 0:
 #         v = mp.pi**2 / three
 #     else:
 #         v = (two / lam**2) * (one / (one + two*lam) -
 #                               mp.beta(lam + one, lam + one))
 #     return v
 #
 # t = mp.taylor(mpvar, 0, 8)
 # p, q = mp.pade(t, 4, 4)
 # print "p =", [mp.fp.mpf(c) for c in p]
 # print "q =", [mp.fp.mpf(c) for c in q]
 #---------------------------------------------------------------------------
 # Pade coefficients for the Tukey Lambda variance function.
 _tukeylambda_var_pc = [3.289868133696453, 0.7306125098871127,
                       -0.5370742306855439, 0.17292046290190008,
                       -0.02371146284628187]
 _tukeylambda_var_qc = [1.0, 3.683605511659861, 4.184152498888124,
                       1.7660926747377275, 0.2643989311168465]
 # numpy.poly1d instances for the numerator and denominator of the
 # Pade approximation to the Tukey Lambda variance.
 _tukeylambda_var_p = poly1d(_tukeylambda_var_pc[::-1])
 _tukeylambda_var_q = poly1d(_tukeylambda_var_qc[::-1])
 def tukeylambda_variance(lam):
    """Variance of the Tukey Lambda distribution.
    Parameters
    ----------
    lam : array_like
        The lambda values at which to compute the variance.
    Returns
    -------
    v : ndarray
        The variance.  For lam < -0.5, the variance is not defined, so
        np.nan is returned.  For lam = 0.5, np.inf is returned.
    Notes
    -----
    In an interval around lambda=0, this function uses the [4,4] Pade
    approximation to compute the variance.  Otherwise it uses the standard
    formula (http://en.wikipedia.org/wiki/Tukey_lambda_distribution).  The
    Pade approximation is used because the standard formula has a removable
    discontinuity at lambda = 0, and does not produce accurate numerical
    results near lambda = 0.
    """
    lam = np.asarray(lam)
    shp = lam.shape
    lam = np.atleast_1d(lam).astype(np.float64)
    # For absolute values of lam less than threshold, use the Pade
    # approximation.
    threshold = 0.075
    # Play games with masks to implement the conditional evaluation of
    # the distribution.
    # lambda < -0.5:  var = nan
    low_mask = lam < -0.5
    # lambda == -0.5: var = inf
    neghalf_mask = lam == -0.5
    # abs(lambda) < threshold:  use Pade approximation
    small_mask = np.abs(lam) < threshold
    # else the "regular" case:  use the explicit formula.
    reg_mask = ~(low_mask | neghalf_mask | small_mask)
    # Get the 'lam' values for the cases where they are needed.
    small = lam[small_mask]
    reg = lam[reg_mask]
    # Compute the function for each case.
    v = np.empty_like(lam)
    v[low_mask] = np.nan
    v[neghalf_mask] = np.inf
    if small.size > 0:
        # Use the Pade approximation near lambda = 0.
        v[small_mask] = _tukeylambda_var_p(small) / _tukeylambda_var_q(small)
    if reg.size > 0:
        v[reg_mask] = (2.0 / reg**2) * (1.0 / (1.0 + 2 * reg) -
                                      beta(reg + 1, reg + 1))
    v.shape = shp
    return v
 # The following code was used to generate the Pade coefficients for the
 # Tukey Lambda kurtosis function.  Version 0.17 of mpmath was used.
 #---------------------------------------------------------------------------
 # import mpmath as mp
 #
 # mp.mp.dps = 60
 #
 # one   = mp.mpf(1)
 # two   = mp.mpf(2)
 # three = mp.mpf(3)
 # four  = mp.mpf(4)
 #
 # def mpkurt(lam):
 #     if lam == 0:
 #         k = mp.mpf(6)/5
 #     else:
 #         numer = (one/(four*lam+one) - four*mp.beta(three*lam+one, lam+one) +
 #                  three*mp.beta(two*lam+one, two*lam+one))
 #         denom = two*(one/(two*lam+one) - mp.beta(lam+one,lam+one))**2
 #         k = numer / denom - three
 #     return k
 #
 # # There is a bug in mpmath 0.17: when we use the 'method' keyword of the
 # # taylor function and we request a degree 9 Taylor polynomial, we actually
 # # get degree 8.
 # t = mp.taylor(mpkurt, 0, 9, method='quad', radius=0.01)
 # t = [mp.chop(c, tol=1e-15) for c in t]
 # p, q = mp.pade(t, 4, 4)
 # print "p =", [mp.fp.mpf(c) for c in p]
 # print "q =", [mp.fp.mpf(c) for c in q]
 #---------------------------------------------------------------------------
 # Pade coefficients for the Tukey Lambda kurtosis function.
 _tukeylambda_kurt_pc = [1.2, -5.853465139719495, -22.653447381131077,
                        0.20601184383406815, 4.59796302262789]
 _tukeylambda_kurt_qc = [1.0, 7.171149192233599, 12.96663094361842,
                        0.43075235247853005, -2.789746758009912]
 # numpy.poly1d instances for the numerator and denominator of the
 # Pade approximation to the Tukey Lambda kurtosis.
 _tukeylambda_kurt_p = poly1d(_tukeylambda_kurt_pc[::-1])
 _tukeylambda_kurt_q = poly1d(_tukeylambda_kurt_qc[::-1])
 def tukeylambda_kurtosis(lam):
    """Kurtosis of the Tukey Lambda distribution.
    Parameters
    ----------
    lam : array_like
        The lambda values at which to compute the variance.
    Returns
    -------
    v : ndarray
        The variance.  For lam < -0.25, the variance is not defined, so
        np.nan is returned.  For lam = 0.25, np.inf is returned.
    """
    lam = np.asarray(lam)
    shp = lam.shape
    lam = np.atleast_1d(lam).astype(np.float64)
    # For absolute values of lam less than threshold, use the Pade
    # approximation.
    threshold = 0.055
    # Use masks to implement the conditional evaluation of the kurtosis.
    # lambda < -0.25:  kurtosis = nan
    low_mask = lam < -0.25
    # lambda == -0.25: kurtosis = inf
    negqrtr_mask = lam == -0.25
    # lambda near 0:  use Pade approximation
    small_mask = np.abs(lam) < threshold
    # else the "regular" case:  use the explicit formula.
    reg_mask = ~(low_mask | negqrtr_mask | small_mask)
    # Get the 'lam' values for the cases where they are needed.
    small = lam[small_mask]
    reg = lam[reg_mask]
    # Compute the function for each case.
    k = np.empty_like(lam)
    k[low_mask] = np.nan
    k[negqrtr_mask] = np.inf
    if small.size > 0:
        k[small_mask] = _tukeylambda_kurt_p(small) / _tukeylambda_kurt_q(small)
    if reg.size > 0:
        numer = (1.0 / (4 * reg + 1) - 4 * beta(3 * reg + 1, reg + 1) +
                 3 * beta(2 * reg + 1, 2 * reg + 1))
        denom = 2 * (1.0/(2 * reg + 1) - beta(reg + 1, reg + 1))**2
        k[reg_mask] = numer / denom - 3
    # The return value will be a numpy array; resetting the shape ensures that
    # if `lam` was a scalar, the return value is a 0-d array.
    k.shape = shp
    return k
--- a/wafo/stats/contingency.py
+++ b/wafo/stats/contingency.py
@ -1,271 +0,0 @@
 """Some functions for working with contingency tables (i.e. cross tabulations).
 """
 from __future__ import division, print_function, absolute_import
 from functools import reduce
 import numpy as np
 from .stats import power_divergence
 __all__ = ['margins', 'expected_freq', 'chi2_contingency']
 def margins(a):
    """Return a list of the marginal sums of the array `a`.
    Parameters
    ----------
    a : ndarray
        The array for which to compute the marginal sums.
    Returns
    -------
    margsums : list of ndarrays
        A list of length `a.ndim`.  `margsums[k]` is the result
        of summing `a` over all axes except `k`; it has the same
        number of dimensions as `a`, but the length of each axis
        except axis `k` will be 1.
    Examples
    --------
    >>> a = np.arange(12).reshape(2, 6)
    >>> a
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11]])
    >>> m0, m1 = margins(a)
    >>> m0
    array([[15],
           [51]])
    >>> m1
    array([[ 6,  8, 10, 12, 14, 16]])
    >>> b = np.arange(24).reshape(2,3,4)
    >>> m0, m1, m2 = margins(b)
    >>> m0
    array([[[ 66]],
           [[210]]])
    >>> m1
    array([[[ 60],
            [ 92],
            [124]]])
    >>> m2
    array([[[60, 66, 72, 78]]])
    """
    margsums = []
    ranged = list(range(a.ndim))
    for k in ranged:
        marg = np.apply_over_axes(np.sum, a, [j for j in ranged if j != k])
        margsums.append(marg)
    return margsums
 def expected_freq(observed):
    """
    Compute the expected frequencies from a contingency table.
    Given an n-dimensional contingency table of observed frequencies,
    compute the expected frequencies for the table based on the marginal
    sums under the assumption that the groups associated with each
    dimension are independent.
    Parameters
    ----------
    observed : array_like
        The table of observed frequencies.  (While this function can handle
        a 1-D array, that case is trivial.  Generally `observed` is at
        least 2-D.)
    Returns
    -------
    expected : ndarray of float64
        The expected frequencies, based on the marginal sums of the table.
        Same shape as `observed`.
    Examples
    --------
    >>> observed = np.array([[10, 10, 20],[20, 20, 20]])
    >>> expected_freq(observed)
    array([[ 12.,  12.,  16.],
           [ 18.,  18.,  24.]])
    """
    # Typically `observed` is an integer array. If `observed` has a large
    # number of dimensions or holds large values, some of the following
    # computations may overflow, so we first switch to floating point.
    observed = np.asarray(observed, dtype=np.float64)
    # Create a list of the marginal sums.
    margsums = margins(observed)
    # Create the array of expected frequencies.  The shapes of the
    # marginal sums returned by apply_over_axes() are just what we
    # need for broadcasting in the following product.
    d = observed.ndim
    expected = reduce(np.multiply, margsums) / observed.sum() ** (d - 1)
    return expected
 def chi2_contingency(observed, correction=True, lambda_=None):
    """Chi-square test of independence of variables in a contingency table.
    This function computes the chi-square statistic and p-value for the
    hypothesis test of independence of the observed frequencies in the
    contingency table [1]_ `observed`.  The expected frequencies are computed
    based on the marginal sums under the assumption of independence; see
    `scipy.stats.contingency.expected_freq`.  The number of degrees of
    freedom is (expressed using numpy functions and attributes)::
        dof = observed.size - sum(observed.shape) + observed.ndim - 1
    Parameters
    ----------
    observed : array_like
        The contingency table. The table contains the observed frequencies
        (i.e. number of occurrences) in each category.  In the two-dimensional
        case, the table is often described as an "R x C table".
    correction : bool, optional
        If True, *and* the degrees of freedom is 1, apply Yates' correction
        for continuity.  The effect of the correction is to adjust each
        observed value by 0.5 towards the corresponding expected value.
    lambda_ : float or str, optional.
        By default, the statistic computed in this test is Pearson's
        chi-squared statistic [2]_.  `lambda_` allows a statistic from the
        Cressie-Read power divergence family [3]_ to be used instead.  See
        `power_divergence` for details.
    Returns
    -------
    chi2 : float
        The test statistic.
    p : float
        The p-value of the test
    dof : int
        Degrees of freedom
    expected : ndarray, same shape as `observed`
        The expected frequencies, based on the marginal sums of the table.
    See Also
    --------
    contingency.expected_freq
    fisher_exact
    chisquare
    power_divergence
    Notes
    -----
    An often quoted guideline for the validity of this calculation is that
    the test should be used only if the observed and expected frequency in
    each cell is at least 5.
    This is a test for the independence of different categories of a
    population. The test is only meaningful when the dimension of
    `observed` is two or more.  Applying the test to a one-dimensional
    table will always result in `expected` equal to `observed` and a
    chi-square statistic equal to 0.
    This function does not handle masked arrays, because the calculation
    does not make sense with missing values.
    Like stats.chisquare, this function computes a chi-square statistic;
    the convenience this function provides is to figure out the expected
    frequencies and degrees of freedom from the given contingency table.
    If these were already known, and if the Yates' correction was not
    required, one could use stats.chisquare.  That is, if one calls::
        chi2, p, dof, ex = chi2_contingency(obs, correction=False)
    then the following is true::
        (chi2, p) == stats.chisquare(obs.ravel(), f_exp=ex.ravel(),
                                     ddof=obs.size - 1 - dof)
    The `lambda_` argument was added in version 0.13.0 of scipy.
    References
    ----------
    .. [1] "Contingency table", http://en.wikipedia.org/wiki/Contingency_table
    .. [2] "Pearson's chi-squared test",
           http://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test
    .. [3] Cressie, N. and Read, T. R. C., "Multinomial Goodness-of-Fit
           Tests", J. Royal Stat. Soc. Series B, Vol. 46, No. 3 (1984),
           pp. 440-464.
    Examples
    --------
    A two-way example (2 x 3):
    >>> obs = np.array([[10, 10, 20], [20, 20, 20]])
    >>> chi2_contingency(obs)
    (2.7777777777777777,
     0.24935220877729619,
     2,
     array([[ 12.,  12.,  16.],
            [ 18.,  18.,  24.]]))
    Perform the test using the log-likelihood ratio (i.e. the "G-test")
    instead of Pearson's chi-squared statistic.
    >>> g, p, dof, expctd = chi2_contingency(obs, lambda_="log-likelihood")
    >>> g, p
    (2.7688587616781319, 0.25046668010954165)
    A four-way example (2 x 2 x 2 x 2):
    >>> obs = np.array(
    ...     [[[[12, 17],
    ...        [11, 16]],
    ...       [[11, 12],
    ...        [15, 16]]],
    ...      [[[23, 15],
    ...        [30, 22]],
    ...       [[14, 17],
    ...        [15, 16]]]])
    >>> chi2_contingency(obs)
    (8.7584514426741897,
     0.64417725029295503,
     11,
     array([[[[ 14.15462386,  14.15462386],
              [ 16.49423111,  16.49423111]],
             [[ 11.2461395 ,  11.2461395 ],
              [ 13.10500554,  13.10500554]]],
            [[[ 19.5591166 ,  19.5591166 ],
              [ 22.79202844,  22.79202844]],
             [[ 15.54012004,  15.54012004],
              [ 18.10873492,  18.10873492]]]]))
    """
    observed = np.asarray(observed)
    if np.any(observed < 0):
        raise ValueError("All values in `observed` must be nonnegative.")
    if observed.size == 0:
        raise ValueError("No data; `observed` has size 0.")
    expected = expected_freq(observed)
    if np.any(expected == 0):
        # Include one of the positions where expected is zero in
        # the exception message.
        zeropos = list(zip(*np.where(expected == 0)))[0]
        raise ValueError("The internally computed table of expected "
                         "frequencies has a zero element at %s." % (zeropos,))
    # The degrees of freedom
    dof = expected.size - sum(expected.shape) + expected.ndim - 1
    if dof == 0:
        # Degenerate case; this occurs when `observed` is 1D (or, more
        # generally, when it has only one nontrivial dimension).  In this
        # case, we also have observed == expected, so chi2 is 0.
        chi2 = 0.0
        p = 1.0
    else:
        if dof == 1 and correction:
            # Adjust `observed` according to Yates' correction for continuity.
            observed = observed + 0.5 * np.sign(expected - observed)
        chi2, p = power_divergence(observed, expected,
                                   ddof=observed.size - 1 - dof, axis=None,
                                   lambda_=lambda_)
    return chi2, p, dof, expected
--- a/wafo/stats/distributions.py
+++ b/wafo/stats/distributions.py
@ -22,4 +22,3 @@ __all__ = ['entropy', 'rv_discrete', 'rv_continuous']
 # Add only the distribution names, not the *_gen names.
 __all__ += _continuous_distns._distn_names
 __all__ += _discrete_distns._distn_names
--- a/wafo/stats/estimation.py
+++ b/wafo/stats/estimation.py
@ -11,20 +11,19 @@ from __future__ import division, absolute_import
 import warnings
 from wafo.plotbackend import plotbackend
-from wafo.misc import ecross, findcross
+from wafo.misc import ecross, findcross, argsreduce
-
+from wafo.stats._util import check_random_state
-
+from wafo.stats._constants import _EPS, _XMAX
-import numdifftools  # @UnresolvedImport
+from wafo.stats._distn_infrastructure import rv_frozen
 from scipy._lib.six import string_types
 import numdifftools as nd  # @UnresolvedImport
 from scipy import special
 from scipy.linalg import pinv2
 from scipy import optimize
 import numpy
 import numpy as np
-from numpy import alltrue, arange, ravel, sum, zeros, log, sqrt, exp
+from numpy import (alltrue, arange, ravel, zeros, log, sqrt, exp,
-from numpy import (
+                   atleast_1d, any, asarray, nan, pi, isfinite)
    atleast_1d, any, asarray, nan, pi,  # reshape, #repeat, product, ndarray,
    isfinite)
 from numpy import flatnonzero as nonzero
@ -48,97 +47,6 @@ def norm_ppf(q):
    return special.ndtri(q)
 # Frozen RV class
 class rv_frozen(object):
    ''' Frozen continous or discrete 1D Random Variable object (RV)
    Methods
    -------
    rvs(size=1)
        Random variates.
    pdf(x)
        Probability density function.
    cdf(x)
        Cumulative density function.
    sf(x)
        Survival function (1-cdf --- sometimes more accurate).
    ppf(q)
        Percent point function (inverse of cdf --- percentiles).
    isf(q)
        Inverse survival function (inverse of sf).
    stats(moments='mv')
        Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
    entropy()
        (Differential) entropy of the RV.
    '''
    def __init__(self, dist, *args, **kwds):
        self.dist = dist
        args, loc, scale = dist._parse_args(*args, **kwds)
        if len(args) == dist.numargs - 2:  #
            # if isinstance(dist, rv_continuous):
            self.par = args + (loc, scale)
        else:  # rv_discrete
            self.par = args + (loc,)
    def pdf(self, x):
        ''' Probability density function at x of the given RV.'''
        return self.dist.pdf(x, *self.par)
    def cdf(self, x):
        '''Cumulative distribution function at x of the given RV.'''
        return self.dist.cdf(x, *self.par)
    def ppf(self, q):
        '''Percent point function (inverse of cdf) at q of the given RV.'''
        return self.dist.ppf(q, *self.par)
    def isf(self, q):
        '''Inverse survival function at q of the given RV.'''
        return self.dist.isf(q, *self.par)
    def rvs(self, size=None):
        '''Random variates of given type.'''
        kwds = dict(size=size)
        return self.dist.rvs(*self.par, **kwds)
    def sf(self, x):
        '''Survival function (1-cdf) at x of the given RV.'''
        return self.dist.sf(x, *self.par)
    def stats(self, moments='mv'):
        ''' Some statistics of the given RV'''
        kwds = dict(moments=moments)
        return self.dist.stats(*self.par, **kwds)
    def median(self):
        return self.dist.median(*self.par)
    def mean(self):
        return self.dist.mean(*self.par)
    def var(self):
        return self.dist.var(*self.par)
    def std(self):
        return self.dist.std(*self.par)
    def moment(self, n):
        par1 = self.par[:self.dist.numargs]
        return self.dist.moment(n, *par1)
    def entropy(self):
        return self.dist.entropy(*self.par)
    def pmf(self, k):
        '''Probability mass function at k of the given RV'''
        return self.dist.pmf(k, *self.par)
    def interval(self, alpha):
        return self.dist.interval(alpha, *self.par)
 # internal class to profile parameters of a given distribution
 class Profile(object):
@ -230,7 +138,7 @@ class Profile(object):
    def __init__(self, fit_dist, **kwds):
        try:
-            i0 = (1 - numpy.isfinite(fit_dist.par_fix)).argmax()
+            i0 = (1 - np.isfinite(fit_dist.par_fix)).argmax()
        except:
            i0 = 0
        self.fit_dist = fit_dist
@ -259,7 +167,7 @@ class Profile(object):
        if fit_dist.par_fix is None:
            isnotfixed = np.ones(fit_dist.par.shape, dtype=bool)
        else:
-            isnotfixed = 1 - numpy.isfinite(fit_dist.par_fix)
+            isnotfixed = 1 - np.isfinite(fit_dist.par_fix)
        self.i_notfixed = nonzero(isnotfixed)
@ -341,7 +249,7 @@ class Profile(object):
    def _set_profile(self, phatfree0, p_opt):
        pvec = self._get_pvec(phatfree0, p_opt)
-        self.data = numpy.ones_like(pvec) * nan
+        self.data = np.ones_like(pvec) * nan
        k1 = (pvec >= p_opt).argmax()
        for size, step in ((-1, -1), (pvec.size, 1)):
@ -358,14 +266,14 @@ class Profile(object):
    def _prettify_profile(self):
        pvec = self.args
-        ix = nonzero(numpy.isfinite(pvec))
+        ix = nonzero(np.isfinite(pvec))
        self.data = self.data[ix]
        self.args = pvec[ix]
-        cond = self.data == -numpy.inf
+        cond = self.data == -np.inf
        if any(cond):
            ind, = cond.nonzero()
            self.data.put(ind, floatinfo.min / 2.0)
-            ind1 = numpy.where(ind == 0, ind, ind - 1)
+            ind1 = np.where(ind == 0, ind, ind - 1)
            cl = self.alpha_cross_level - self.alpha_Lrange / 2.0
            t0 = ecross(self.args, self.data, ind1, cl)
            self.data.put(ind, cl)
@ -379,29 +287,29 @@ class Profile(object):
            phatv = self._par
            if self.profile_x:
-                gradfun = numdifftools.Gradient(self._myinvfun)
+                gradfun = nd.Gradient(self._myinvfun)
            else:
-                gradfun = numdifftools.Gradient(self._myprbfun)
+                gradfun = nd.Gradient(self._myprbfun)
            drl = gradfun(phatv[self.i_notfixed])
            pcov = self.fit_dist.par_cov[i_notfixed, :][:, i_notfixed]
-            pvar = sum(numpy.dot(drl, pcov) * drl)
+            pvar = np.sum(np.dot(drl, pcov) * drl)
        return pvar
    def _get_pvec(self, phatfree0, p_opt):
        ''' return proper interval for the variable to profile
        '''
-        linspace = numpy.linspace
+        linspace = np.linspace
        if self.pmin is None or self.pmax is None:
            pvar = self._get_variance()
-            if pvar <= 1e-5 or numpy.isnan(pvar):
+            if pvar <= 1e-5 or np.isnan(pvar):
                pvar = max(abs(p_opt) * 0.5, 0.5)
            p_crit = (-norm_ppf(self.alpha / 2.0) *
-                      sqrt(numpy.ravel(pvar)) * 1.5)
+                      sqrt(np.ravel(pvar)) * 1.5)
            if self.pmin is None:
                self.pmin = self._search_pmin(phatfree0,
                                              p_opt - 5.0 * p_crit, p_opt)
@ -412,13 +320,13 @@ class Profile(object):
                                              p_opt + 5.0 * p_crit, p_opt)
            p_crit_up = (self.pmax - p_opt) / 5
-            N4 = numpy.floor(self.N / 4.0)
+            N4 = np.floor(self.N / 4.0)
            pvec1 = linspace(self.pmin, p_opt - p_crit_low, N4 + 1)
            pvec2 = linspace(
                p_opt - p_crit_low, p_opt + p_crit_up, self.N - 2 * N4)
            pvec3 = linspace(p_opt + p_crit_up, self.pmax, N4 + 1)
-            pvec = numpy.unique(numpy.hstack((pvec1, p_opt, pvec2, pvec3)))
+            pvec = np.unique(np.hstack((pvec1, p_opt, pvec2, pvec3)))
        else:
            pvec = linspace(self.pmin, self.pmax, self.N)
@ -701,12 +609,12 @@ class FitDistribution(rv_frozen):
        m_variables = ['method', 'alpha', 'par_fix', 'search', 'copydata']
        m_defaults = ['ml', 0.05, None, True, True]
        for (name, val) in zip(m_variables, m_defaults):
-            setattr(self, name, kwds.get(name, val))
+            setattr(self, name, kwds.pop(name, val))
        if self.method.lower()[:].startswith('mps'):
-            self._fitfun = dist.nlogps
+            self._fitfun = self._nlogps
        else:
-            self._fitfun = dist.nnlf
+            self._fitfun = self._nnlf
        self.data = ravel(data)
        if self.copydata:
@ -714,6 +622,7 @@ class FitDistribution(rv_frozen):
        self.data.sort()
        par, fixedn = self._fit(*args, **kwds)
        # super(FitDistribution, self).__init__(dist, *par)
        self.par = arr(par)
        somefixed = len(fixedn) > 0
        if somefixed:
@ -729,13 +638,13 @@ class FitDistribution(rv_frozen):
        self._compute_cov()
        # Set confidence interval for parameters
-        pvar = numpy.diag(self.par_cov)
+        pvar = np.diag(self.par_cov)
        zcrit = -norm_ppf(self.alpha / 2.0)
        self.par_lower = self.par - zcrit * sqrt(pvar)
        self.par_upper = self.par + zcrit * sqrt(pvar)
-        self.LLmax = -dist.nnlf(self.par, self.data)
+        self.LLmax = -self._nnlf(self.par, self.data)
-        self.LPSmax = -dist.nlogps(self.par, self.data)
+        self.LPSmax = -self._nlogps(self.par, self.data)
        self.pvalue = self._pvalue(self.par, self.data, unknown_numpar=numpar)
    def __repr__(self):
@ -747,17 +656,30 @@ class FitDistribution(rv_frozen):
        return ''.join(t)
    def _reduce_func(self, args, kwds):
        # First of all, convert fshapes params to fnum: eg for stats.beta,
        # shapes='a, b'. To fix `a`, can specify either `f1` or `fa`.
        # Convert the latter into the former.
        if self.shapes:
            shapes = self.shapes.replace(',', ' ').split()
            for j, s in enumerate(shapes):
                val = kwds.pop('f' + s, None) or kwds.pop('fix_' + s, None)
                if val is not None:
                    key = 'f%d' % j
                    if key in kwds:
                        raise ValueError("Duplicate entry for %s." % key)
                    else:
                        kwds[key] = val
        args = list(args)
        Nargs = len(args)
        fixedn = []
        index = range(Nargs)
        names = ['f%d' % n for n in range(Nargs - 2)] + ['floc', 'fscale']
-        x0 = args[:]
+        x0 = []
-        for n, key in zip(index[::-1], names[::-1]):
+        for n, key in enumerate(names):
            if key in kwds:
                fixedn.append(n)
-                args[n] = kwds[key]
+                args[n] = kwds.pop(key)
-                del x0[n]
+            else:
                x0.append(args[n])
        fitfun = self._fitfun
@ -765,7 +687,7 @@ class FitDistribution(rv_frozen):
            func = fitfun
            restore = None
        else:
-            if len(fixedn) == len(index):
+            if len(fixedn) == Nargs:
                raise ValueError("All parameters fixed. " +
                                 "There is nothing to optimize.")
@ -786,6 +708,134 @@ class FitDistribution(rv_frozen):
        return x0, func, restore, args, fixedn
    @staticmethod
    def _hessian(nnlf, theta, data, eps=None):
        ''' approximate hessian of nnlf where theta are the parameters
        (including loc and scale)
        '''
        if eps is None:
            eps = (_EPS) ** 0.4
        num_par = len(theta)
        # pab 07.01.2001: Always choose the stepsize h so that
        # it is an exactly representable number.
        # This is important when calculating numerical derivatives and is
        #  accomplished by the following.
        delta = (eps + 2.0) - 2.0
        delta2 = delta ** 2.0
        # Approximate 1/(nE( (d L(x|theta)/dtheta)^2)) with
        #              1/(d^2 L(theta|x)/dtheta^2)
        # using central differences
        LL = nnlf(theta, data)
        H = zeros((num_par, num_par))   # Hessian matrix
        theta = tuple(theta)
        for ix in xrange(num_par):
            sparam = list(theta)
            sparam[ix] = theta[ix] + delta
            fp = nnlf(sparam, data)
            sparam[ix] = theta[ix] - delta
            fm = nnlf(sparam, data)
            H[ix, ix] = (fp - 2 * LL + fm) / delta2
            for iy in range(ix + 1, num_par):
                sparam[ix] = theta[ix] + delta
                sparam[iy] = theta[iy] + delta
                fpp = nnlf(sparam, data)
                sparam[iy] = theta[iy] - delta
                fpm = nnlf(sparam, data)
                sparam[ix] = theta[ix] - delta
                fmm = nnlf(sparam, data)
                sparam[iy] = theta[iy] + delta
                fmp = nnlf(sparam, data)
                H[ix, iy] = ((fpp + fmm) - (fmp + fpm)) / (4. * delta2)
                H[iy, ix] = H[ix, iy]
                sparam[iy] = theta[iy]
        return -H
    def _nnlf(self, theta, x):
        return self.dist._penalized_nnlf(theta, x)
    def _nlogps(self, theta, x):
        """ Moran's negative log Product Spacings statistic
            where theta are the parameters (including loc and scale)
            Note the data in x must be sorted
        References
        -----------
        R. C. H. Cheng; N. A. K. Amin (1983)
        "Estimating Parameters in Continuous Univariate Distributions with a
        Shifted Origin.",
        Journal of the Royal Statistical Society. Series B (Methodological),
        Vol. 45, No. 3. (1983), pp. 394-403.
        R. C. H. Cheng; M. A. Stephens (1989)
        "A Goodness-Of-Fit Test Using Moran's Statistic with Estimated
        Parameters", Biometrika, 76, 2, pp 385-392
        Wong, T.S.T. and Li, W.K. (2006)
        "A note on the estimation of extreme value distributions using maximum
        product of spacings.",
        IMS Lecture Notes Monograph Series 2006, Vol. 52, pp. 272-283
        """
        n = 2 if self._rv_continous else 1
        try:
            loc = theta[-n]
            scale = theta[-1]
            args = tuple(theta[:-n])
        except IndexError:
            raise ValueError("Not enough input arguments.")
        if not self._rv_continous:
            scale = 1
        if not self._argcheck(*args) or scale <= 0:
            return np.inf
        dist = self.dist
        x = asarray((x - loc) / scale)
        cond0 = (x <= dist.a) | (dist.b <= x)
        Nbad = np.sum(cond0)
        if Nbad > 0:
            x = argsreduce(~cond0, x)[0]
        lowertail = True
        if lowertail:
            prb = np.hstack((0.0, dist.cdf(x, *args), 1.0))
            dprb = np.diff(prb)
        else:
            prb = np.hstack((1.0, dist.sf(x, *args), 0.0))
            dprb = -np.diff(prb)
        logD = log(dprb)
        dx = np.diff(x, axis=0)
        tie = (dx == 0)
        if any(tie):
            # TODO : implement this method for treating ties in data:
            # Assume measuring error is delta. Then compute
            # yL = F(xi-delta,theta)
            # yU = F(xi+delta,theta)
            # and replace
            # logDj = log((yU-yL)/(r-1)) for j = i+1,i+2,...i+r-1
            # The following is OK when only minimization of T is wanted
            i_tie, = np.nonzero(tie)
            tiedata = x[i_tie]
            logD[i_tie + 1] = log(dist._pdf(tiedata, *args)) - log(scale)
        finiteD = np.isfinite(logD)
        nonfiniteD = 1 - finiteD
        Nbad += np.sum(nonfiniteD, axis=0)
        if Nbad > 0:
            T = -np.sum(logD[finiteD], axis=0) + 100.0 * log(_XMAX) * Nbad
        else:
            T = -np.sum(logD, axis=0)
        return T
    def _fit(self, *args, **kwds):
        dist = self.dist
@ -799,15 +849,14 @@ class FitDistribution(rv_frozen):
            # get distribution specific starting locations
            start = dist._fitstart(data)
            args += start[Narg:-2]
-        loc = kwds.get('loc', start[-2])
+        loc = kwds.pop('loc', start[-2])
-        scale = kwds.get('scale', start[-1])
+        scale = kwds.pop('scale', start[-1])
        args += (loc, scale)
        x0, func, restore, args, fixedn = self._reduce_func(args, kwds)
        if self.search:
-            optimizer = kwds.get('optimizer', optimize.fmin)
+            optimizer = kwds.pop('optimizer', optimize.fmin)
            # convert string to function in scipy.optimize
-            if (not callable(optimizer) and
+            if not callable(optimizer) and isinstance(optimizer, string_types):
                    isinstance(optimizer, (str, unicode))):
                if not optimizer.startswith('fmin_'):
                    optimizer = "fmin_" + optimizer
                if optimizer == 'fmin_':
@ -816,7 +865,9 @@ class FitDistribution(rv_frozen):
                    optimizer = getattr(optimize, optimizer)
                except AttributeError:
                    raise ValueError("%s is not a valid optimizer" % optimizer)
-
+            # by now kwds must be empty, since everybody took what they needed
            if kwds:
                raise TypeError("Unknown arguments: %s." % kwds)
            vals = optimizer(func, x0, args=(ravel(data),), disp=0)
            vals = tuple(vals)
        else:
@ -829,8 +880,7 @@ class FitDistribution(rv_frozen):
        '''Compute covariance
        '''
        somefixed = (self.par_fix is not None) and any(isfinite(self.par_fix))
-        # H1 = numpy.asmatrix(self.dist.hessian_nnlf(self.par, self.data))
+        H = np.asmatrix(self._hessian(self._fitfun, self.par, self.data))
        H = numpy.asmatrix(self.dist.hessian_nlogps(self.par, self.data))
        self.H = H
        try:
            if somefixed:
@ -1034,7 +1084,7 @@ class FitDistribution(rv_frozen):
        # yy[0,0] = 0.0 # pdf
        yy[:, 0] = 0.0  # histogram
        yy.shape = (-1,)
-        yy = numpy.hstack((yy, 0.0))
+        yy = np.hstack((yy, 0.0))
        return xx, yy
    def _get_empirical_pdf(self):
@ -1110,7 +1160,7 @@ class FitDistribution(rv_frozen):
            Note: the data in x must be sorted
        '''
-        dx = numpy.diff(x, axis=0)
+        dx = np.diff(x, axis=0)
        tie = (dx == 0)
        if any(tie):
            warnings.warn(
--- a/wafo/stats/kde.py
+++ b/wafo/stats/kde.py
@ -1,541 +0,0 @@
 #-------------------------------------------------------------------------------
 #
 #  Define classes for (uni/multi)-variate kernel density estimation.
 #
 #  Currently, only Gaussian kernels are implemented.
 #
 #  Written by: Robert Kern
 #
 #  Date: 2004-08-09
 #
 #  Modified: 2005-02-10 by Robert Kern.
 #              Contributed to Scipy
 #            2005-10-07 by Robert Kern.
 #              Some fixes to match the new scipy_core
 #
 #  Copyright 2004-2005 by Enthought, Inc.
 #
 #-------------------------------------------------------------------------------
 from __future__ import division, print_function, absolute_import
 # Standard library imports.
 import warnings
 # Scipy imports.
 from scipy._lib.six import callable, string_types
 from scipy import linalg, special
 from numpy import atleast_2d, reshape, zeros, newaxis, dot, exp, pi, sqrt, \
     ravel, power, atleast_1d, squeeze, sum, transpose
 import numpy as np
 from numpy.random import randint, multivariate_normal
 # Local imports.
 from . import mvn
 __all__ = ['gaussian_kde']
 class gaussian_kde(object):
    """Representation of a kernel-density estimate using Gaussian kernels.
    Kernel density estimation is a way to estimate the probability density
    function (PDF) of a random variable in a non-parametric way.
    `gaussian_kde` works for both uni-variate and multi-variate data.   It
    includes automatic bandwidth determination.  The estimation works best for
    a unimodal distribution; bimodal or multi-modal distributions tend to be
    oversmoothed.
    Parameters
    ----------
    dataset : array_like
        Datapoints to estimate from. In case of univariate data this is a 1-D
        array, otherwise a 2-D array with shape (# of dims, # of data).
    bw_method : str, scalar or callable, optional
        The method used to calculate the estimator bandwidth.  This can be
        'scott', 'silverman', a scalar constant or a callable.  If a scalar,
        this will be used directly as `kde.factor`.  If a callable, it should
        take a `gaussian_kde` instance as only parameter and return a scalar.
        If None (default), 'scott' is used.  See Notes for more details.
    Attributes
    ----------
    dataset : ndarray
        The dataset with which `gaussian_kde` was initialized.
    d : int
        Number of dimensions.
    n : int
        Number of datapoints.
    factor : float
        The bandwidth factor, obtained from `kde.covariance_factor`, with which
        the covariance matrix is multiplied.
    covariance : ndarray
        The covariance matrix of `dataset`, scaled by the calculated bandwidth
        (`kde.factor`).
    inv_cov : ndarray
        The inverse of `covariance`.
    Methods
    -------
    kde.evaluate(points) : ndarray
        Evaluate the estimated pdf on a provided set of points.
    kde(points) : ndarray
        Same as kde.evaluate(points)
    kde.integrate_gaussian(mean, cov) : float
        Multiply pdf with a specified Gaussian and integrate over the whole
        domain.
    kde.integrate_box_1d(low, high) : float
        Integrate pdf (1D only) between two bounds.
    kde.integrate_box(low_bounds, high_bounds) : float
        Integrate pdf over a rectangular space between low_bounds and
        high_bounds.
    kde.integrate_kde(other_kde) : float
        Integrate two kernel density estimates multiplied together.
    kde.pdf(points) : ndarray
        Alias for ``kde.evaluate(points)``.
    kde.logpdf(points) : ndarray
        Equivalent to ``np.log(kde.evaluate(points))``.
    kde.resample(size=None) : ndarray
        Randomly sample a dataset from the estimated pdf.
    kde.set_bandwidth(bw_method='scott') : None
        Computes the bandwidth, i.e. the coefficient that multiplies the data
        covariance matrix to obtain the kernel covariance matrix.
        .. versionadded:: 0.11.0
    kde.covariance_factor : float
        Computes the coefficient (`kde.factor`) that multiplies the data
        covariance matrix to obtain the kernel covariance matrix.
        The default is `scotts_factor`.  A subclass can overwrite this method
        to provide a different method, or set it through a call to
        `kde.set_bandwidth`.
    Notes
    -----
    Bandwidth selection strongly influences the estimate obtained from the KDE
    (much more so than the actual shape of the kernel).  Bandwidth selection
    can be done by a "rule of thumb", by cross-validation, by "plug-in
    methods" or by other means; see [3]_, [4]_ for reviews.  `gaussian_kde`
    uses a rule of thumb, the default is Scott's Rule.
    Scott's Rule [1]_, implemented as `scotts_factor`, is::
        n**(-1./(d+4)),
    with ``n`` the number of data points and ``d`` the number of dimensions.
    Silverman's Rule [2]_, implemented as `silverman_factor`, is::
        (n * (d + 2) / 4.)**(-1. / (d + 4)).
    Good general descriptions of kernel density estimation can be found in [1]_
    and [2]_, the mathematics for this multi-dimensional implementation can be
    found in [1]_.
    References
    ----------
    .. [1] D.W. Scott, "Multivariate Density Estimation: Theory, Practice, and
           Visualization", John Wiley & Sons, New York, Chicester, 1992.
    .. [2] B.W. Silverman, "Density Estimation for Statistics and Data
           Analysis", Vol. 26, Monographs on Statistics and Applied Probability,
           Chapman and Hall, London, 1986.
    .. [3] B.A. Turlach, "Bandwidth Selection in Kernel Density Estimation: A
           Review", CORE and Institut de Statistique, Vol. 19, pp. 1-33, 1993.
    .. [4] D.M. Bashtannyk and R.J. Hyndman, "Bandwidth selection for kernel
           conditional density estimation", Computational Statistics & Data
           Analysis, Vol. 36, pp. 279-298, 2001.
    Examples
    --------
    Generate some random two-dimensional data:
    >>> from scipy import stats
    >>> def measure(n):
    >>>     "Measurement model, return two coupled measurements."
    >>>     m1 = np.random.normal(size=n)
    >>>     m2 = np.random.normal(scale=0.5, size=n)
    >>>     return m1+m2, m1-m2
    >>> m1, m2 = measure(2000)
    >>> xmin = m1.min()
    >>> xmax = m1.max()
    >>> ymin = m2.min()
    >>> ymax = m2.max()
    Perform a kernel density estimate on the data:
    >>> X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
    >>> positions = np.vstack([X.ravel(), Y.ravel()])
    >>> values = np.vstack([m1, m2])
    >>> kernel = stats.gaussian_kde(values)
    >>> Z = np.reshape(kernel(positions).T, X.shape)
    Plot the results:
    >>> import matplotlib.pyplot as plt
    >>> fig = plt.figure()
    >>> ax = fig.add_subplot(111)
    >>> ax.imshow(np.rot90(Z), cmap=plt.cm.gist_earth_r,
    ...           extent=[xmin, xmax, ymin, ymax])
    >>> ax.plot(m1, m2, 'k.', markersize=2)
    >>> ax.set_xlim([xmin, xmax])
    >>> ax.set_ylim([ymin, ymax])
    >>> plt.show()
    """
    def __init__(self, dataset, bw_method=None):
        self.dataset = atleast_2d(dataset)
        if not self.dataset.size > 1:
            raise ValueError("`dataset` input should have multiple elements.")
        self.d, self.n = self.dataset.shape
        self.set_bandwidth(bw_method=bw_method)
    def evaluate(self, points):
        """Evaluate the estimated pdf on a set of points.
        Parameters
        ----------
        points : (# of dimensions, # of points)-array
            Alternatively, a (# of dimensions,) vector can be passed in and
            treated as a single point.
        Returns
        -------
        values : (# of points,)-array
            The values at each point.
        Raises
        ------
        ValueError : if the dimensionality of the input points is different than
                     the dimensionality of the KDE.
        """
        points = atleast_2d(points)
        d, m = points.shape
        if d != self.d:
            if d == 1 and m == self.d:
                # points was passed in as a row vector
                points = reshape(points, (self.d, 1))
                m = 1
            else:
                msg = "points have dimension %s, dataset has dimension %s" % (d,
                    self.d)
                raise ValueError(msg)
        result = zeros((m,), dtype=np.float)
        if m >= self.n:
            # there are more points than data, so loop over data
            for i in range(self.n):
                diff = self.dataset[:, i, newaxis] - points
                tdiff = dot(self.inv_cov, diff)
                energy = sum(diff*tdiff,axis=0) / 2.0
                result = result + exp(-energy)
        else:
            # loop over points
            for i in range(m):
                diff = self.dataset - points[:, i, newaxis]
                tdiff = dot(self.inv_cov, diff)
                energy = sum(diff * tdiff, axis=0) / 2.0
                result[i] = sum(exp(-energy), axis=0)
        result = result / self._norm_factor
        return result
    __call__ = evaluate
    def integrate_gaussian(self, mean, cov):
        """
        Multiply estimated density by a multivariate Gaussian and integrate
        over the whole space.
        Parameters
        ----------
        mean : aray_like
            A 1-D array, specifying the mean of the Gaussian.
        cov : array_like
            A 2-D array, specifying the covariance matrix of the Gaussian.
        Returns
        -------
        result : scalar
            The value of the integral.
        Raises
        ------
        ValueError :
            If the mean or covariance of the input Gaussian differs from
            the KDE's dimensionality.
        """
        mean = atleast_1d(squeeze(mean))
        cov = atleast_2d(cov)
        if mean.shape != (self.d,):
            raise ValueError("mean does not have dimension %s" % self.d)
        if cov.shape != (self.d, self.d):
            raise ValueError("covariance does not have dimension %s" % self.d)
        # make mean a column vector
        mean = mean[:, newaxis]
        sum_cov = self.covariance + cov
        diff = self.dataset - mean
        tdiff = dot(linalg.inv(sum_cov), diff)
        energies = sum(diff * tdiff, axis=0) / 2.0
        result = sum(exp(-energies), axis=0) / sqrt(linalg.det(2 * pi *
                                                        sum_cov)) / self.n
        return result
    def integrate_box_1d(self, low, high):
        """
        Computes the integral of a 1D pdf between two bounds.
        Parameters
        ----------
        low : scalar
            Lower bound of integration.
        high : scalar
            Upper bound of integration.
        Returns
        -------
        value : scalar
            The result of the integral.
        Raises
        ------
        ValueError
            If the KDE is over more than one dimension.
        """
        if self.d != 1:
            raise ValueError("integrate_box_1d() only handles 1D pdfs")
        stdev = ravel(sqrt(self.covariance))[0]
        normalized_low = ravel((low - self.dataset) / stdev)
        normalized_high = ravel((high - self.dataset) / stdev)
        value = np.mean(special.ndtr(normalized_high) -
                        special.ndtr(normalized_low))
        return value
    def integrate_box(self, low_bounds, high_bounds, maxpts=None):
        """Computes the integral of a pdf over a rectangular interval.
        Parameters
        ----------
        low_bounds : array_like
            A 1-D array containing the lower bounds of integration.
        high_bounds : array_like
            A 1-D array containing the upper bounds of integration.
        maxpts : int, optional
            The maximum number of points to use for integration.
        Returns
        -------
        value : scalar
            The result of the integral.
        """
        if maxpts is not None:
            extra_kwds = {'maxpts': maxpts}
        else:
            extra_kwds = {}
        value, inform = mvn.mvnun(low_bounds, high_bounds, self.dataset,
                                  self.covariance, **extra_kwds)
        if inform:
            msg = ('An integral in mvn.mvnun requires more points than %s' %
                   (self.d * 1000))
            warnings.warn(msg)
        return value
    def integrate_kde(self, other):
        """
        Computes the integral of the product of this  kernel density estimate
        with another.
        Parameters
        ----------
        other : gaussian_kde instance
            The other kde.
        Returns
        -------
        value : scalar
            The result of the integral.
        Raises
        ------
        ValueError
            If the KDEs have different dimensionality.
        """
        if other.d != self.d:
            raise ValueError("KDEs are not the same dimensionality")
        # we want to iterate over the smallest number of points
        if other.n < self.n:
            small = other
            large = self
        else:
            small = self
            large = other
        sum_cov = small.covariance + large.covariance
        sum_cov_chol = linalg.cho_factor(sum_cov)
        result = 0.0
        for i in range(small.n):
            mean = small.dataset[:, i, newaxis]
            diff = large.dataset - mean
            tdiff = linalg.cho_solve(sum_cov_chol, diff)
            energies = sum(diff * tdiff, axis=0) / 2.0
            result += sum(exp(-energies), axis=0)
        result /= sqrt(linalg.det(2 * pi * sum_cov)) * large.n * small.n
        return result
    def resample(self, size=None):
        """
        Randomly sample a dataset from the estimated pdf.
        Parameters
        ----------
        size : int, optional
            The number of samples to draw.  If not provided, then the size is
            the same as the underlying dataset.
        Returns
        -------
        resample : (self.d, `size`) ndarray
            The sampled dataset.
        """
        if size is None:
            size = self.n
        norm = transpose(multivariate_normal(zeros((self.d,), float),
                         self.covariance, size=size))
        indices = randint(0, self.n, size=size)
        means = self.dataset[:, indices]
        return means + norm
    def scotts_factor(self):
        return power(self.n, -1./(self.d+4))
    def silverman_factor(self):
        return power(self.n*(self.d+2.0)/4.0, -1./(self.d+4))
    #  Default method to calculate bandwidth, can be overwritten by subclass
    covariance_factor = scotts_factor
    def set_bandwidth(self, bw_method=None):
        """Compute the estimator bandwidth with given method.
        The new bandwidth calculated after a call to `set_bandwidth` is used
        for subsequent evaluations of the estimated density.
        Parameters
        ----------
        bw_method : str, scalar or callable, optional
            The method used to calculate the estimator bandwidth.  This can be
            'scott', 'silverman', a scalar constant or a callable.  If a
            scalar, this will be used directly as `kde.factor`.  If a callable,
            it should take a `gaussian_kde` instance as only parameter and
            return a scalar.  If None (default), nothing happens; the current
            `kde.covariance_factor` method is kept.
        Notes
        -----
        .. versionadded:: 0.11
        Examples
        --------
        >>> x1 = np.array([-7, -5, 1, 4, 5.])
        >>> kde = stats.gaussian_kde(x1)
        >>> xs = np.linspace(-10, 10, num=50)
        >>> y1 = kde(xs)
        >>> kde.set_bandwidth(bw_method='silverman')
        >>> y2 = kde(xs)
        >>> kde.set_bandwidth(bw_method=kde.factor / 3.)
        >>> y3 = kde(xs)
        >>> fig = plt.figure()
        >>> ax = fig.add_subplot(111)
        >>> ax.plot(x1, np.ones(x1.shape) / (4. * x1.size), 'bo',
        ...         label='Data points (rescaled)')
        >>> ax.plot(xs, y1, label='Scott (default)')
        >>> ax.plot(xs, y2, label='Silverman')
        >>> ax.plot(xs, y3, label='Const (1/3 * Silverman)')
        >>> ax.legend()
        >>> plt.show()
        """
        if bw_method is None:
            pass
        elif bw_method == 'scott':
            self.covariance_factor = self.scotts_factor
        elif bw_method == 'silverman':
            self.covariance_factor = self.silverman_factor
        elif np.isscalar(bw_method) and not isinstance(bw_method, string_types):
            self._bw_method = 'use constant'
            self.covariance_factor = lambda: bw_method
        elif callable(bw_method):
            self._bw_method = bw_method
            self.covariance_factor = lambda: self._bw_method(self)
        else:
            msg = "`bw_method` should be 'scott', 'silverman', a scalar " \
                  "or a callable."
            raise ValueError(msg)
        self._compute_covariance()
    def _compute_covariance(self):
        """Computes the covariance matrix for each Gaussian kernel using
        covariance_factor().
        """
        self.factor = self.covariance_factor()
        # Cache covariance and inverse covariance of the data
        if not hasattr(self, '_data_inv_cov'):
            self._data_covariance = atleast_2d(np.cov(self.dataset, rowvar=1,
                                               bias=False))
            self._data_inv_cov = linalg.inv(self._data_covariance)
        self.covariance = self._data_covariance * self.factor**2
        self.inv_cov = self._data_inv_cov / self.factor**2
        self._norm_factor = sqrt(linalg.det(2*pi*self.covariance)) * self.n
    def pdf(self, x):
        """
        Evaluate the estimated pdf on a provided set of points.
        Notes
        -----
        This is an alias for `gaussian_kde.evaluate`.  See the ``evaluate``
        docstring for more details.
        """
        return self.evaluate(x)
    def logpdf(self, x):
        """
        Evaluate the log of the estimated pdf on a provided set of points.
        Notes
        -----
        See `gaussian_kde.evaluate` for more details; this method simply
        returns ``np.log(gaussian_kde.evaluate(x))``.
        """
        return np.log(self.evaluate(x))
--- a/wafo/stats/kde_example.py
+++ b/wafo/stats/kde_example.py
@ -1,15 +0,0 @@
 # -*- coding: utf-8 -*-
 """
 Created on Tue Dec 06 16:02:47 2011
@author: pab
 """
 import numpy as np
 import wafo.kdetools as wk
 n = 100
 x = np.sort(5*np.random.rand(1,n)-2.5, axis=-1).ravel()
 y = (np.cos(x)>2*np.random.rand(n, 1)-1).ravel()
 kreg = wk.KRegression(x,y)
 f = kreg(output='plotobj', title='Kernel regression', plotflag=1)
 f.plot()
--- a/wafo/stats/misc.py
+++ b/wafo/stats/misc.py
@ -1,13 +0,0 @@
 from numpy import asarray, ndarray, ones, nan #, reshape, repeat,  product
 def valarray(shape, value=nan, typecode=None):
    """Return an array of all value.
    """
    #out = reshape(repeat([value],product(shape,axis=0),axis=0),shape)
    out = ones(shape, dtype=bool) * value
    if typecode is not None:
        out = out.astype(typecode)
    if not isinstance(out, ndarray):
        out = asarray(out)
    return out
--- a/wafo/stats/morestats.py
+++ b/wafo/stats/morestats.py
--- a/wafo/stats/mstats.py
+++ b/wafo/stats/mstats.py
@ -1,79 +0,0 @@
 """
 ===================================================================
 Statistical functions for masked arrays (:mod:`scipy.stats.mstats`)
 ===================================================================
 .. currentmodule:: scipy.stats.mstats
 This module contains a large number of statistical functions that can
 be used with masked arrays.
 Most of these functions are similar to those in scipy.stats but might
 have small differences in the API or in the algorithm used. Since this
 is a relatively new package, some API changes are still possible.
 .. autosummary::
   :toctree: generated/
   argstoarray
   betai
   chisquare
   count_tied_groups
   describe
   f_oneway
   f_value_wilks_lambda
   find_repeats
   friedmanchisquare
   kendalltau
   kendalltau_seasonal
   kruskalwallis
   ks_twosamp
   kurtosis
   kurtosistest
   linregress
   mannwhitneyu
   plotting_positions
   mode
   moment
   mquantiles
   msign
   normaltest
   obrientransform
   pearsonr
   plotting_positions
   pointbiserialr
   rankdata
   scoreatpercentile
   sem
   signaltonoise
   skew
   skewtest
   spearmanr
   theilslopes
   threshold
   tmax
   tmean
   tmin
   trim
   trima
   trimboth
   trimmed_stde
   trimr
   trimtail
   tsem
   ttest_onesamp
   ttest_ind
   ttest_onesamp
   ttest_rel
   tvar
   variation
   winsorize
   zmap
   zscore
 """
 from __future__ import division, print_function, absolute_import
 from .mstats_basic import *
 from .mstats_extras import *
 from scipy.stats import gmean, hmean
--- a/wafo/stats/mstats_basic.py
+++ b/wafo/stats/mstats_basic.py
--- a/wafo/stats/mstats_extras.py
+++ b/wafo/stats/mstats_extras.py
@ -1,451 +0,0 @@
 """
 Additional statistics functions with support for masked arrays.
 """
 # Original author (2007): Pierre GF Gerard-Marchant
 from __future__ import division, print_function, absolute_import
 __all__ = ['compare_medians_ms',
           'hdquantiles', 'hdmedian', 'hdquantiles_sd',
           'idealfourths',
           'median_cihs','mjci','mquantiles_cimj',
           'rsh',
           'trimmed_mean_ci',]
 import numpy as np
 from numpy import float_, int_, ndarray
 import numpy.ma as ma
 from numpy.ma import MaskedArray
 from . import mstats_basic as mstats
 from scipy.stats.distributions import norm, beta, t, binom
 def hdquantiles(data, prob=list([.25,.5,.75]), axis=None, var=False,):
    """
    Computes quantile estimates with the Harrell-Davis method.
    The quantile estimates are calculated as a weighted linear combination
    of order statistics.
    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence
        Sequence of quantiles to compute.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : boolean
        Whether to return the variance of the estimate.
    Returns
    -------
    hdquantiles : MaskedArray
        A (p,) array of quantiles (if `var` is False), or a (2,p) array of
        quantiles and variances (if `var` is True), where ``p`` is the
        number of quantiles.
    """
    def _hd_1D(data,prob,var):
        "Computes the HD quantiles for a 1D array. Returns nan for invalid data."
        xsorted = np.squeeze(np.sort(data.compressed().view(ndarray)))
        # Don't use length here, in case we have a numpy scalar
        n = xsorted.size
        hd = np.empty((2,len(prob)), float_)
        if n < 2:
            hd.flat = np.nan
            if var:
                return hd
            return hd[0]
        v = np.arange(n+1) / float(n)
        betacdf = beta.cdf
        for (i,p) in enumerate(prob):
            _w = betacdf(v, (n+1)*p, (n+1)*(1-p))
            w = _w[1:] - _w[:-1]
            hd_mean = np.dot(w, xsorted)
            hd[0,i] = hd_mean
            #
            hd[1,i] = np.dot(w, (xsorted-hd_mean)**2)
            #
        hd[0, prob == 0] = xsorted[0]
        hd[0, prob == 1] = xsorted[-1]
        if var:
            hd[1, prob == 0] = hd[1, prob == 1] = np.nan
            return hd
        return hd[0]
    # Initialization & checks
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None) or (data.ndim == 1):
        result = _hd_1D(data, p, var)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, "
                             "but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_hd_1D, axis, data, p, var)
    return ma.fix_invalid(result, copy=False)
 def hdmedian(data, axis=-1, var=False):
    """
    Returns the Harrell-Davis estimate of the median along the given axis.
    Parameters
    ----------
    data : ndarray
        Data array.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : boolean
        Whether to return the variance of the estimate.
    """
    result = hdquantiles(data,[0.5], axis=axis, var=var)
    return result.squeeze()
 def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
    """
    The standard error of the Harrell-Davis quantile estimates by jackknife.
    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence
        Sequence of quantiles to compute.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    Returns
    -------
    hdquantiles_sd : MaskedArray
        Standard error of the Harrell-Davis quantile estimates.
    """
    def _hdsd_1D(data,prob):
        "Computes the std error for 1D arrays."
        xsorted = np.sort(data.compressed())
        n = len(xsorted)
        #.........
        hdsd = np.empty(len(prob), float_)
        if n < 2:
            hdsd.flat = np.nan
        vv = np.arange(n) / float(n-1)
        betacdf = beta.cdf
        for (i,p) in enumerate(prob):
            _w = betacdf(vv, (n+1)*p, (n+1)*(1-p))
            w = _w[1:] - _w[:-1]
            mx_ = np.fromiter([np.dot(w,xsorted[np.r_[list(range(0,k)),
                                                      list(range(k+1,n))].astype(int_)])
                                  for k in range(n)], dtype=float_)
            mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1)
            hdsd[i] = float(n-1) * np.sqrt(np.diag(mx_var).diagonal() / float(n))
        return hdsd
    # Initialization & checks
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        result = _hdsd_1D(data, p)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, "
                             "but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_hdsd_1D, axis, data, p)
    return ma.fix_invalid(result, copy=False).ravel()
 def trimmed_mean_ci(data, limits=(0.2,0.2), inclusive=(True,True),
                    alpha=0.05, axis=None):
    """
    Selected confidence interval of the trimmed mean along the given axis.
    Parameters
    ----------
    data : array_like
        Input data.
    limits : {None, tuple}, optional
        None or a two item tuple.
        Tuple of the percentages to cut on each side of the array, with respect
        to the number of unmasked data, as floats between 0. and 1. If ``n``
        is the number of unmasked data before trimming, then
        (``n * limits[0]``)th smallest data and (``n * limits[1]``)th
        largest data are masked.  The total number of unmasked data after
        trimming is ``n * (1. - sum(limits))``.
        The value of one limit can be set to None to indicate an open interval.
        Defaults to (0.2, 0.2).
    inclusive : (2,) tuple of boolean, optional
        If relative==False, tuple indicating whether values exactly equal to
        the absolute limits are allowed.
        If relative==True, tuple indicating whether the number of data being
        masked on each side should be rounded (True) or truncated (False).
        Defaults to (True, True).
    alpha : float, optional
        Confidence level of the intervals.
        Defaults to 0.05.
    axis : int, optional
        Axis along which to cut. If None, uses a flattened version of `data`.
        Defaults to None.
    Returns
    -------
    trimmed_mean_ci : (2,) ndarray
        The lower and upper confidence intervals of the trimmed data.
    """
    data = ma.array(data, copy=False)
    trimmed = mstats.trimr(data, limits=limits, inclusive=inclusive, axis=axis)
    tmean = trimmed.mean(axis)
    tstde = mstats.trimmed_stde(data,limits=limits,inclusive=inclusive,axis=axis)
    df = trimmed.count(axis) - 1
    tppf = t.ppf(1-alpha/2.,df)
    return np.array((tmean - tppf*tstde, tmean+tppf*tstde))
 def mjci(data, prob=[0.25,0.5,0.75], axis=None):
    """
    Returns the Maritz-Jarrett estimators of the standard error of selected
    experimental quantiles of the data.
    Parameters
    ----------
    data: ndarray
        Data array.
    prob: sequence
        Sequence of quantiles to compute.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    """
    def _mjci_1D(data, p):
        data = np.sort(data.compressed())
        n = data.size
        prob = (np.array(p) * n + 0.5).astype(int_)
        betacdf = beta.cdf
        mj = np.empty(len(prob), float_)
        x = np.arange(1,n+1, dtype=float_) / n
        y = x - 1./n
        for (i,m) in enumerate(prob):
            (m1,m2) = (m-1, n-m)
            W = betacdf(x,m-1,n-m) - betacdf(y,m-1,n-m)
            C1 = np.dot(W,data)
            C2 = np.dot(W,data**2)
            mj[i] = np.sqrt(C2 - C1**2)
        return mj
    data = ma.array(data, copy=False)
    if data.ndim > 2:
        raise ValueError("Array 'data' must be at most two dimensional, "
                         "but got data.ndim = %d" % data.ndim)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        return _mjci_1D(data, p)
    else:
        return ma.apply_along_axis(_mjci_1D, axis, data, p)
 def mquantiles_cimj(data, prob=[0.25,0.50,0.75], alpha=0.05, axis=None):
    """
    Computes the alpha confidence interval for the selected quantiles of the
    data, with Maritz-Jarrett estimators.
    Parameters
    ----------
    data : ndarray
        Data array.
    prob : sequence
        Sequence of quantiles to compute.
    alpha : float
        Confidence level of the intervals.
    axis : integer
        Axis along which to compute the quantiles.
        If None, use a flattened array.
    """
    alpha = min(alpha, 1-alpha)
    z = norm.ppf(1-alpha/2.)
    xq = mstats.mquantiles(data, prob, alphap=0, betap=0, axis=axis)
    smj = mjci(data, prob, axis=axis)
    return (xq - z * smj, xq + z * smj)
 def median_cihs(data, alpha=0.05, axis=None):
    """
    Computes the alpha-level confidence interval for the median of the data.
    Uses the Hettmasperger-Sheather method.
    Parameters
    ----------
    data : array_like
        Input data. Masked values are discarded. The input should be 1D only,
        or `axis` should be set to None.
    alpha : float
        Confidence level of the intervals.
    axis : integer
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    Returns
    -------
    median_cihs :
        Alpha level confidence interval.
    """
    def _cihs_1D(data, alpha):
        data = np.sort(data.compressed())
        n = len(data)
        alpha = min(alpha, 1-alpha)
        k = int(binom._ppf(alpha/2., n, 0.5))
        gk = binom.cdf(n-k,n,0.5) - binom.cdf(k-1,n,0.5)
        if gk < 1-alpha:
            k -= 1
            gk = binom.cdf(n-k,n,0.5) - binom.cdf(k-1,n,0.5)
        gkk = binom.cdf(n-k-1,n,0.5) - binom.cdf(k,n,0.5)
        I = (gk - 1 + alpha)/(gk - gkk)
        lambd = (n-k) * I / float(k + (n-2*k)*I)
        lims = (lambd*data[k] + (1-lambd)*data[k-1],
                lambd*data[n-k-1] + (1-lambd)*data[n-k])
        return lims
    data = ma.rray(data, copy=False)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        result = _cihs_1D(data.compressed(), alpha)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, "
                             "but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_cihs_1D, axis, data, alpha)
    return result
 def compare_medians_ms(group_1, group_2, axis=None):
    """
    Compares the medians from two independent groups along the given axis.
    The comparison is performed using the McKean-Schrader estimate of the
    standard error of the medians.
    Parameters
    ----------
    group_1 : array_like
        First dataset.
    group_2 : array_like
        Second dataset.
    axis : int, optional
        Axis along which the medians are estimated. If None, the arrays are
        flattened.  If `axis` is not None, then `group_1` and `group_2`
        should have the same shape.
    Returns
    -------
    compare_medians_ms : {float, ndarray}
        If `axis` is None, then returns a float, otherwise returns a 1-D
        ndarray of floats with a length equal to the length of `group_1`
        along `axis`.
    """
    (med_1, med_2) = (ma.median(group_1,axis=axis), ma.median(group_2,axis=axis))
    (std_1, std_2) = (mstats.stde_median(group_1, axis=axis),
                      mstats.stde_median(group_2, axis=axis))
    W = np.abs(med_1 - med_2) / ma.sqrt(std_1**2 + std_2**2)
    return 1 - norm.cdf(W)
 def idealfourths(data, axis=None):
    """
    Returns an estimate of the lower and upper quartiles.
    Uses the ideal fourths algorithm.
    Parameters
    ----------
    data : array_like
        Input array.
    axis : int, optional
        Axis along which the quartiles are estimated. If None, the arrays are
        flattened.
    Returns
    -------
    idealfourths : {list of floats, masked array}
        Returns the two internal values that divide `data` into four parts
        using the ideal fourths algorithm either along the flattened array
        (if `axis` is None) or along `axis` of `data`.
    """
    def _idf(data):
        x = data.compressed()
        n = len(x)
        if n < 3:
            return [np.nan,np.nan]
        (j,h) = divmod(n/4. + 5/12.,1)
        j = int(j)
        qlo = (1-h)*x[j-1] + h*x[j]
        k = n - j
        qup = (1-h)*x[k] + h*x[k-1]
        return [qlo, qup]
    data = ma.sort(data, axis=axis).view(MaskedArray)
    if (axis is None):
        return _idf(data)
    else:
        return ma.apply_along_axis(_idf, axis, data)
 def rsh(data, points=None):
    """
    Evaluates Rosenblatt's shifted histogram estimators for each point
    on the dataset 'data'.
    Parameters
    ----------
    data : sequence
        Input data. Masked values are ignored.
    points : sequence
        Sequence of points where to evaluate Rosenblatt shifted histogram.
        If None, use the data.
    """
    data = ma.array(data, copy=False)
    if points is None:
        points = data
    else:
        points = np.array(points, copy=False, ndmin=1)
    if data.ndim != 1:
        raise AttributeError("The input array should be 1D only !")
    n = data.count()
    r = idealfourths(data, axis=None)
    h = 1.2 * (r[-1]-r[0]) / n**(1./5)
    nhi = (data[:,None] <= points[None,:] + h).sum(0)
    nlo = (data[:,None] < points[None,:] - h).sum(0)
    return (nhi-nlo) / (2.*n*h)
--- a/wafo/stats/rv.py
+++ b/wafo/stats/rv.py
@ -1,76 +0,0 @@
 from __future__ import division, print_function, absolute_import
 from numpy import vectorize, deprecate
 from numpy.random import random_sample
 __all__ = ['randwppf', 'randwcdf']
 # XXX: Are these needed anymore?
 #####################################
 # General purpose continuous
 ######################################
@deprecate(message="Deprecated in scipy 0.14.0, use "
                   "distribution-specific rvs() method instead")
 def randwppf(ppf, args=(), size=None):
    """
    returns an array of randomly distributed integers of a distribution
    whose percent point function (inverse of the CDF or quantile function)
    is given.
    args is a tuple of extra arguments to the ppf function (i.e. shape,
    location, scale), and size is the size of the output.  Note the ppf
    function must accept an array of q values to compute over.
    """
    U = random_sample(size=size)
    return ppf(*(U,)+args)
@deprecate(message="Deprecated in scipy 0.14.0, use "
                   "distribution-specific rvs() method instead")
 def randwcdf(cdf, mean=1.0, args=(), size=None):
    """
    Returns an array of randomly distributed integers given a CDF.
    Given a cumulative distribution function (CDF) returns an array of
    randomly distributed integers that would satisfy the CDF.
    Parameters
    ----------
    cdf : function
        CDF function that accepts a single value and `args`, and returns
        an single value.
    mean : float, optional
        The mean of the distribution which helps the solver.  Defaults
        to 1.0.
    args : tuple, optional
        Extra arguments to the cdf function (i.e. shape, location, scale)
    size : {int, None}, optional
        Is the size of the output.  If None, only 1 value will be returned.
    Returns
    -------
    randwcdf : ndarray
        Array of random numbers.
    Notes
    -----
    Can use the ``scipy.stats.distributions.*.cdf`` functions for the
    `cdf` parameter.
    """
    import scipy.optimize as optimize
    def _ppfopt(x, q, *nargs):
        newargs = (x,)+nargs
        return cdf(*newargs) - q
    def _ppf(q, *nargs):
        return optimize.fsolve(_ppfopt, mean, args=(q,)+nargs)
    _vppf = vectorize(_ppf)
    U = random_sample(size=size)
    return _vppf(*(U,)+args)
--- a/wafo/stats/six.py
+++ b/wafo/stats/six.py
@ -1,389 +0,0 @@
 """Utilities for writing code that runs on Python 2 and 3"""
 # Copyright (c) 2010-2012 Benjamin Peterson
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy of
 # this software and associated documentation files (the "Software"), to deal in
 # the Software without restriction, including without limitation the rights to
 # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 # the Software, and to permit persons to whom the Software is furnished to do so,
 # subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in all
 # copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 import operator
 import sys
 import types
 __author__ = "Benjamin Peterson <benjamin@python.org>"
 __version__ = "1.2.0"
 # True if we are running on Python 3.
 PY3 = sys.version_info[0] == 3
 if PY3:
    string_types = str,
    integer_types = int,
    class_types = type,
    text_type = str
    binary_type = bytes
    MAXSIZE = sys.maxsize
 else:
    string_types = basestring,
    integer_types = (int, long)
    class_types = (type, types.ClassType)
    text_type = unicode
    binary_type = str
    if sys.platform.startswith("java"):
        # Jython always uses 32 bits.
        MAXSIZE = int((1 << 31) - 1)
    else:
        # It's possible to have sizeof(long) != sizeof(Py_ssize_t).
        class X(object):
            def __len__(self):
                return 1 << 31
        try:
            len(X())
        except OverflowError:
            # 32-bit
            MAXSIZE = int((1 << 31) - 1)
        else:
            # 64-bit
            MAXSIZE = int((1 << 63) - 1)
            del X
 def _add_doc(func, doc):
    """Add documentation to a function."""
    func.__doc__ = doc
 def _import_module(name):
    """Import module, returning the module after the last dot."""
    __import__(name)
    return sys.modules[name]
 class _LazyDescr(object):
    def __init__(self, name):
        self.name = name
    def __get__(self, obj, tp):
        result = self._resolve()
        setattr(obj, self.name, result)
        # This is a bit ugly, but it avoids running this again.
        delattr(tp, self.name)
        return result
 class MovedModule(_LazyDescr):
    def __init__(self, name, old, new=None):
        super(MovedModule, self).__init__(name)
        if PY3:
            if new is None:
                new = name
            self.mod = new
        else:
            self.mod = old
    def _resolve(self):
        return _import_module(self.mod)
 class MovedAttribute(_LazyDescr):
    def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None):
        super(MovedAttribute, self).__init__(name)
        if PY3:
            if new_mod is None:
                new_mod = name
            self.mod = new_mod
            if new_attr is None:
                if old_attr is None:
                    new_attr = name
                else:
                    new_attr = old_attr
            self.attr = new_attr
        else:
            self.mod = old_mod
            if old_attr is None:
                old_attr = name
            self.attr = old_attr
    def _resolve(self):
        module = _import_module(self.mod)
        return getattr(module, self.attr)
 class _MovedItems(types.ModuleType):
    """Lazy loading of moved objects"""
 _moved_attributes = [
    MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"),
    MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"),
    MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"),
    MovedAttribute("map", "itertools", "builtins", "imap", "map"),
    MovedAttribute("reload_module", "__builtin__", "imp", "reload"),
    MovedAttribute("reduce", "__builtin__", "functools"),
    MovedAttribute("StringIO", "StringIO", "io"),
    MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"),
    MovedAttribute("zip", "itertools", "builtins", "izip", "zip"),
    MovedModule("builtins", "__builtin__"),
    MovedModule("configparser", "ConfigParser"),
    MovedModule("copyreg", "copy_reg"),
    MovedModule("http_cookiejar", "cookielib", "http.cookiejar"),
    MovedModule("http_cookies", "Cookie", "http.cookies"),
    MovedModule("html_entities", "htmlentitydefs", "html.entities"),
    MovedModule("html_parser", "HTMLParser", "html.parser"),
    MovedModule("http_client", "httplib", "http.client"),
    MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"),
    MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"),
    MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"),
    MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"),
    MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"),
    MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"),
    MovedModule("cPickle", "cPickle", "pickle"),
    MovedModule("queue", "Queue"),
    MovedModule("reprlib", "repr"),
    MovedModule("socketserver", "SocketServer"),
    MovedModule("tkinter", "Tkinter"),
    MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"),
    MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"),
    MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"),
    MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"),
    MovedModule("tkinter_tix", "Tix", "tkinter.tix"),
    MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"),
    MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"),
    MovedModule("tkinter_colorchooser", "tkColorChooser",
                "tkinter.colorchooser"),
    MovedModule("tkinter_commondialog", "tkCommonDialog",
                "tkinter.commondialog"),
    MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"),
    MovedModule("tkinter_font", "tkFont", "tkinter.font"),
    MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"),
    MovedModule("tkinter_tksimpledialog", "tkSimpleDialog",
                "tkinter.simpledialog"),
    MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"),
    MovedModule("winreg", "_winreg"),
 ]
 for attr in _moved_attributes:
    setattr(_MovedItems, attr.name, attr)
 del attr
 moves = sys.modules[__name__ + ".moves"] = _MovedItems("moves")
 def add_move(move):
    """Add an item to six.moves."""
    setattr(_MovedItems, move.name, move)
 def remove_move(name):
    """Remove item from six.moves."""
    try:
        delattr(_MovedItems, name)
    except AttributeError:
        try:
            del moves.__dict__[name]
        except KeyError:
            raise AttributeError("no such move, %r" % (name,))
 if PY3:
    _meth_func = "__func__"
    _meth_self = "__self__"
    _func_code = "__code__"
    _func_defaults = "__defaults__"
    _iterkeys = "keys"
    _itervalues = "values"
    _iteritems = "items"
 else:
    _meth_func = "im_func"
    _meth_self = "im_self"
    _func_code = "func_code"
    _func_defaults = "func_defaults"
    _iterkeys = "iterkeys"
    _itervalues = "itervalues"
    _iteritems = "iteritems"
 try:
    advance_iterator = next
 except NameError:
    def advance_iterator(it):
        return it.next()
 next = advance_iterator
 if PY3:
    def get_unbound_function(unbound):
        return unbound
    Iterator = object
    def callable(obj):
        return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
 else:
    def get_unbound_function(unbound):
        return unbound.im_func
    class Iterator(object):
        def next(self):
            return type(self).__next__(self)
    callable = callable
 _add_doc(get_unbound_function,
         """Get the function out of a possibly unbound function""")
 get_method_function = operator.attrgetter(_meth_func)
 get_method_self = operator.attrgetter(_meth_self)
 get_function_code = operator.attrgetter(_func_code)
 get_function_defaults = operator.attrgetter(_func_defaults)
 def iterkeys(d):
    """Return an iterator over the keys of a dictionary."""
    return iter(getattr(d, _iterkeys)())
 def itervalues(d):
    """Return an iterator over the values of a dictionary."""
    return iter(getattr(d, _itervalues)())
 def iteritems(d):
    """Return an iterator over the (key, value) pairs of a dictionary."""
    return iter(getattr(d, _iteritems)())
 if PY3:
    def b(s):
        return s.encode("latin-1")
    def u(s):
        return s
    if sys.version_info[1] <= 1:
        def int2byte(i):
            return bytes((i,))
    else:
        # This is about 2x faster than the implementation above on 3.2+
        int2byte = operator.methodcaller("to_bytes", 1, "big")
    import io
    StringIO = io.StringIO
    BytesIO = io.BytesIO
 else:
    def b(s):
        return s
    def u(s):
        return unicode(s, "unicode_escape")
    int2byte = chr
    import StringIO
    StringIO = BytesIO = StringIO.StringIO
 _add_doc(b, """Byte literal""")
 _add_doc(u, """Text literal""")
 if PY3:
    import builtins  # @UnresolvedImport
    exec_ = getattr(builtins, "exec")
    def reraise(tp, value, tb=None):
        if value.__traceback__ is not tb:
            raise value.with_traceback(tb)
        raise value
    print_ = getattr(builtins, "print")
    del builtins
 else:
    def exec_(code, globs=None, locs=None):
        """Execute code in a namespace."""
        if globs is None:
            frame = sys._getframe(1)
            globs = frame.f_globals
            if locs is None:
                locs = frame.f_locals
            del frame
        elif locs is None:
            locs = globs
        exec("""exec code in globs, locs""")
    exec_("""def reraise(tp, value, tb=None):
    raise tp, value, tb
 """)
    def print_(*args, **kwargs):
        """The new-style print function."""
        fp = kwargs.pop("file", sys.stdout)
        if fp is None:
            return
        def write(data):
            if not isinstance(data, basestring):
                data = str(data)
            fp.write(data)
        want_unicode = False
        sep = kwargs.pop("sep", None)
        if sep is not None:
            if isinstance(sep, unicode):
                want_unicode = True
            elif not isinstance(sep, str):
                raise TypeError("sep must be None or a string")
        end = kwargs.pop("end", None)
        if end is not None:
            if isinstance(end, unicode):
                want_unicode = True
            elif not isinstance(end, str):
                raise TypeError("end must be None or a string")
        if kwargs:
            raise TypeError("invalid keyword arguments to print()")
        if not want_unicode:
            for arg in args:
                if isinstance(arg, unicode):
                    want_unicode = True
                    break
        if want_unicode:
            newline = unicode("\n")
            space = unicode(" ")
        else:
            newline = "\n"
            space = " "
        if sep is None:
            sep = space
        if end is None:
            end = newline
        for i, arg in enumerate(args):
            if i:
                write(sep)
            write(arg)
        write(end)
 _add_doc(reraise, """Reraise an exception.""")
 def with_metaclass(meta, base=object):
    """Create a base class with a metaclass."""
    return meta("NewBase", (base,), {})
--- a/wafo/stats/stats.py
+++ b/wafo/stats/stats.py
--- a/wafo/stats/tests/common_tests.py
+++ b/wafo/stats/tests/common_tests.py
@ -1,16 +1,23 @@
 from __future__ import division, print_function, absolute_import
 import inspect
 import warnings
 import pickle
 import numpy as np
 import numpy.testing as npt
 from numpy.testing import assert_allclose
 import numpy.ma.testutils as ma_npt
-from scipy._lib._version import NumpyVersion
+from wafo.stats._util import getargspec_no_self as _getargspec
 from wafo import stats
-NUMPY_BELOW_1_7 = NumpyVersion(np.__version__) < '1.7.0'
+def check_named_results(res, attributes, ma=False):
    for i, attr in enumerate(attributes):
        if ma:
            ma_npt.assert_equal(res[i], getattr(res, attr))
        else:
            npt.assert_equal(res[i], getattr(res, attr))
 def check_normalization(distfn, args, distname):
@ -94,17 +101,19 @@ def check_private_entropy(distfn, args, superclass):
 def check_edge_support(distfn, args):
-    # Make sure the x=self.a and self.b are handled correctly.
+    # Make sure that x=self.a and self.b are handled correctly.
    x = [distfn.a, distfn.b]
-    if isinstance(distfn, stats.rv_continuous):
+    if isinstance(distfn, stats.rv_discrete):
-        npt.assert_equal(distfn.cdf(x, *args), [0.0, 1.0])
+        x = [distfn.a - 1, distfn.b]
-        npt.assert_equal(distfn.logcdf(x, *args), [-np.inf, 0.0])
+
    npt.assert_equal(distfn.cdf(x, *args), [0.0, 1.0])
    npt.assert_equal(distfn.sf(x, *args), [1.0, 0.0])
-        npt.assert_equal(distfn.sf(x, *args), [1.0, 0.0])
+    if distfn.name not in ('skellam', 'dlaplace'):
        # with a = -inf, log(0) generates warnings
        npt.assert_equal(distfn.logcdf(x, *args), [-np.inf, 0.0])
        npt.assert_equal(distfn.logsf(x, *args), [0.0, -np.inf])
    if isinstance(distfn, stats.rv_discrete):
        x = [distfn.a-1, distfn.b]
    npt.assert_equal(distfn.ppf([0.0, 1.0], *args), x)
    npt.assert_equal(distfn.isf([0.0, 1.0], *args), x[::-1])
@ -117,12 +126,12 @@ def check_named_args(distfn, x, shape_args, defaults, meths):
    ## Check calling w/ named arguments.
    # check consistency of shapes, numargs and _parse signature
-    signature = inspect.getargspec(distfn._parse_args)
+    signature = _getargspec(distfn._parse_args)
    npt.assert_(signature.varargs is None)
    npt.assert_(signature.keywords is None)
-    npt.assert_(signature.defaults == defaults)
+    npt.assert_(list(signature.defaults) == list(defaults))
-    shape_argnames = signature.args[1:-len(defaults)]  # self, a, b, loc=0, scale=1
+    shape_argnames = signature.args[:-len(defaults)]  # a, b, loc=0, scale=1
    if distfn.shapes:
        shapes_ = distfn.shapes.replace(',', ' ').split()
    else:
@ -152,3 +161,115 @@ def check_named_args(distfn, x, shape_args, defaults, meths):
    k.update({'kaboom': 42})
    npt.assert_raises(TypeError, distfn.cdf, x, **k)
 def check_random_state_property(distfn, args):
    # check the random_state attribute of a distribution *instance*
    # This test fiddles with distfn.random_state. This breaks other tests,
    # hence need to save it and then restore.
    rndm = distfn.random_state
    # baseline: this relies on the global state
    np.random.seed(1234)
    distfn.random_state = None
    r0 = distfn.rvs(*args, size=8)
    # use an explicit instance-level random_state
    distfn.random_state = 1234
    r1 = distfn.rvs(*args, size=8)
    npt.assert_equal(r0, r1)
    distfn.random_state = np.random.RandomState(1234)
    r2 = distfn.rvs(*args, size=8)
    npt.assert_equal(r0, r2)
    # can override the instance-level random_state for an individual .rvs call
    distfn.random_state = 2
    orig_state = distfn.random_state.get_state()
    r3 = distfn.rvs(*args, size=8, random_state=np.random.RandomState(1234))
    npt.assert_equal(r0, r3)
    # ... and that does not alter the instance-level random_state!
    npt.assert_equal(distfn.random_state.get_state(), orig_state)
    # finally, restore the random_state
    distfn.random_state = rndm
 def check_meth_dtype(distfn, arg, meths):
    q0 = [0.25, 0.5, 0.75]
    x0 = distfn.ppf(q0, *arg)
    x_cast = [x0.astype(tp) for tp in
                        (np.int_, np.float16, np.float32, np.float64)]
    for x in x_cast:
        # casting may have clipped the values, exclude those
        distfn._argcheck(*arg)
        x = x[(distfn.a < x) & (x < distfn.b)]
        for meth in meths:
            val = meth(x, *arg)
            npt.assert_(val.dtype == np.float_)
 def check_ppf_dtype(distfn, arg):
    q0 = np.asarray([0.25, 0.5, 0.75])
    q_cast = [q0.astype(tp) for tp in (np.float16, np.float32, np.float64)]
    for q in q_cast:
        for meth in [distfn.ppf, distfn.isf]:
            val = meth(q, *arg)
            npt.assert_(val.dtype == np.float_)
 def check_cmplx_deriv(distfn, arg):
    # Distributions allow complex arguments.
    def deriv(f, x, *arg):
        x = np.asarray(x)
        h = 1e-10
        return (f(x + h*1j, *arg)/h).imag
    x0 = distfn.ppf([0.25, 0.51, 0.75], *arg)
    x_cast = [x0.astype(tp) for tp in
                        (np.int_, np.float16, np.float32, np.float64)]
    for x in x_cast:
        # casting may have clipped the values, exclude those
        distfn._argcheck(*arg)
        x = x[(distfn.a < x) & (x < distfn.b)]
        pdf, cdf, sf = distfn.pdf(x, *arg), distfn.cdf(x, *arg), distfn.sf(x, *arg)
        assert_allclose(deriv(distfn.cdf, x, *arg), pdf, rtol=1e-5)
        assert_allclose(deriv(distfn.logcdf, x, *arg), pdf/cdf, rtol=1e-5)
        assert_allclose(deriv(distfn.sf, x, *arg), -pdf, rtol=1e-5)
        assert_allclose(deriv(distfn.logsf, x, *arg), -pdf/sf, rtol=1e-5)
        assert_allclose(deriv(distfn.logpdf, x, *arg), 
                        deriv(distfn.pdf, x, *arg) / distfn.pdf(x, *arg),
                        rtol=1e-5)
 def check_pickling(distfn, args):
    # check that a distribution instance pickles and unpickles
    # pay special attention to the random_state property
    # save the random_state (restore later)
    rndm = distfn.random_state
    distfn.random_state = 1234
    distfn.rvs(*args, size=8)
    s = pickle.dumps(distfn)
    r0 = distfn.rvs(*args, size=8)
    unpickled = pickle.loads(s)
    r1 = unpickled.rvs(*args, size=8)
    npt.assert_equal(r0, r1)
    # also smoke test some methods
    medians = [distfn.ppf(0.5, *args), unpickled.ppf(0.5, *args)]
    npt.assert_equal(medians[0], medians[1])
    npt.assert_equal(distfn.cdf(medians[0], *args),
                     unpickled.cdf(medians[1], *args))
    # restore the random_state
    distfn.random_state = rndm
--- a/wafo/stats/tests/test_binned_statistic.py
+++ b/wafo/stats/tests/test_binned_statistic.py
@ -1,238 +0,0 @@
 from __future__ import division, print_function, absolute_import
 import numpy as np
 from numpy.testing import assert_array_almost_equal, run_module_suite
 from scipy.stats import \
    binned_statistic, binned_statistic_2d, binned_statistic_dd
 class TestBinnedStatistic(object):
    @classmethod
    def setup_class(cls):
        np.random.seed(9865)
        cls.x = np.random.random(100)
        cls.y = np.random.random(100)
        cls.v = np.random.random(100)
        cls.X = np.random.random((100, 3))
    def test_1d_count(self):
        x = self.x
        v = self.v
        count1, edges1, bc = binned_statistic(x, v, 'count', bins=10)
        count2, edges2 = np.histogram(x, bins=10)
        assert_array_almost_equal(count1, count2)
        assert_array_almost_equal(edges1, edges2)
    def test_1d_sum(self):
        x = self.x
        v = self.v
        sum1, edges1, bc = binned_statistic(x, v, 'sum', bins=10)
        sum2, edges2 = np.histogram(x, bins=10, weights=v)
        assert_array_almost_equal(sum1, sum2)
        assert_array_almost_equal(edges1, edges2)
    def test_1d_mean(self):
        x = self.x
        v = self.v
        stat1, edges1, bc = binned_statistic(x, v, 'mean', bins=10)
        stat2, edges2, bc = binned_statistic(x, v, np.mean, bins=10)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(edges1, edges2)
    def test_1d_std(self):
        x = self.x
        v = self.v
        stat1, edges1, bc = binned_statistic(x, v, 'std', bins=10)
        stat2, edges2, bc = binned_statistic(x, v, np.std, bins=10)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(edges1, edges2)
    def test_1d_median(self):
        x = self.x
        v = self.v
        stat1, edges1, bc = binned_statistic(x, v, 'median', bins=10)
        stat2, edges2, bc = binned_statistic(x, v, np.median, bins=10)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(edges1, edges2)
    def test_1d_bincode(self):
        x = self.x[:20]
        v = self.v[:20]
        count1, edges1, bc = binned_statistic(x, v, 'count', bins=3)
        bc2 = np.array([3, 2, 1, 3, 2, 3, 3, 3, 3, 1, 1, 3, 3, 1, 2, 3, 1,
                        1, 2, 1])
        bcount = [(bc == i).sum() for i in np.unique(bc)]
        assert_array_almost_equal(bc, bc2)
        assert_array_almost_equal(bcount, count1)
    def test_1d_range_keyword(self):
        # Regression test for gh-3063, range can be (min, max) or [(min, max)]
        np.random.seed(9865)
        x = np.arange(30)
        data = np.random.random(30)
        mean, bins, _ = binned_statistic(x[:15], data[:15])
        mean_range, bins_range, _ = binned_statistic(x, data, range=[(0, 14)])
        mean_range2, bins_range2, _ = binned_statistic(x, data, range=(0, 14))
        assert_array_almost_equal(mean, mean_range)
        assert_array_almost_equal(bins, bins_range)
        assert_array_almost_equal(mean, mean_range2)
        assert_array_almost_equal(bins, bins_range2)
    def test_2d_count(self):
        x = self.x
        y = self.y
        v = self.v
        count1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'count', bins=5)
        count2, binx2, biny2 = np.histogram2d(x, y, bins=5)
        assert_array_almost_equal(count1, count2)
        assert_array_almost_equal(binx1, binx2)
        assert_array_almost_equal(biny1, biny2)
    def test_2d_sum(self):
        x = self.x
        y = self.y
        v = self.v
        sum1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'sum', bins=5)
        sum2, binx2, biny2 = np.histogram2d(x, y, bins=5, weights=v)
        assert_array_almost_equal(sum1, sum2)
        assert_array_almost_equal(binx1, binx2)
        assert_array_almost_equal(biny1, biny2)
    def test_2d_mean(self):
        x = self.x
        y = self.y
        v = self.v
        stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'mean', bins=5)
        stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(binx1, binx2)
        assert_array_almost_equal(biny1, biny2)
    def test_2d_std(self):
        x = self.x
        y = self.y
        v = self.v
        stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'std', bins=5)
        stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.std, bins=5)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(binx1, binx2)
        assert_array_almost_equal(biny1, biny2)
    def test_2d_median(self):
        x = self.x
        y = self.y
        v = self.v
        stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'median', bins=5)
        stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.median, bins=5)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(binx1, binx2)
        assert_array_almost_equal(biny1, biny2)
    def test_2d_bincode(self):
        x = self.x[:20]
        y = self.y[:20]
        v = self.v[:20]
        count1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'count', bins=3)
        bc2 = np.array([17, 11, 6, 16, 11, 17, 18, 17, 17, 7, 6, 18, 16,
                        6, 11, 16, 6, 6, 11, 8])
        bcount = [(bc == i).sum() for i in np.unique(bc)]
        assert_array_almost_equal(bc, bc2)
        count1adj = count1[count1.nonzero()]
        assert_array_almost_equal(bcount, count1adj)
    def test_dd_count(self):
        X = self.X
        v = self.v
        count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
        count2, edges2 = np.histogramdd(X, bins=3)
        assert_array_almost_equal(count1, count2)
        assert_array_almost_equal(edges1, edges2)
    def test_dd_sum(self):
        X = self.X
        v = self.v
        sum1, edges1, bc = binned_statistic_dd(X, v, 'sum', bins=3)
        sum2, edges2 = np.histogramdd(X, bins=3, weights=v)
        assert_array_almost_equal(sum1, sum2)
        assert_array_almost_equal(edges1, edges2)
    def test_dd_mean(self):
        X = self.X
        v = self.v
        stat1, edges1, bc = binned_statistic_dd(X, v, 'mean', bins=3)
        stat2, edges2, bc = binned_statistic_dd(X, v, np.mean, bins=3)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(edges1, edges2)
    def test_dd_std(self):
        X = self.X
        v = self.v
        stat1, edges1, bc = binned_statistic_dd(X, v, 'std', bins=3)
        stat2, edges2, bc = binned_statistic_dd(X, v, np.std, bins=3)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(edges1, edges2)
    def test_dd_median(self):
        X = self.X
        v = self.v
        stat1, edges1, bc = binned_statistic_dd(X, v, 'median', bins=3)
        stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(edges1, edges2)
    def test_dd_bincode(self):
        X = self.X[:20]
        v = self.v[:20]
        count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
        bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92,
                        32, 36, 91, 43, 87, 81, 81])
        bcount = [(bc == i).sum() for i in np.unique(bc)]
        assert_array_almost_equal(bc, bc2)
        count1adj = count1[count1.nonzero()]
        assert_array_almost_equal(bcount, count1adj)
 if __name__ == "__main__":
    run_module_suite()
--- a/wafo/stats/tests/test_contingency.py
+++ b/wafo/stats/tests/test_contingency.py
@ -1,202 +0,0 @@
 from __future__ import division, print_function, absolute_import
 import numpy as np
 from numpy.testing import (run_module_suite, assert_equal, assert_array_equal,
         assert_array_almost_equal, assert_approx_equal, assert_raises,
         assert_allclose)
 from scipy.special import xlogy
 from scipy.stats.contingency import margins, expected_freq, chi2_contingency
 def test_margins():
    a = np.array([1])
    m = margins(a)
    assert_equal(len(m), 1)
    m0 = m[0]
    assert_array_equal(m0, np.array([1]))
    a = np.array([[1]])
    m0, m1 = margins(a)
    expected0 = np.array([[1]])
    expected1 = np.array([[1]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)
    a = np.arange(12).reshape(2, 6)
    m0, m1 = margins(a)
    expected0 = np.array([[15], [51]])
    expected1 = np.array([[6, 8, 10, 12, 14, 16]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)
    a = np.arange(24).reshape(2, 3, 4)
    m0, m1, m2 = margins(a)
    expected0 = np.array([[[66]], [[210]]])
    expected1 = np.array([[[60], [92], [124]]])
    expected2 = np.array([[[60, 66, 72, 78]]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)
    assert_array_equal(m2, expected2)
 def test_expected_freq():
    assert_array_equal(expected_freq([1]), np.array([1.0]))
    observed = np.array([[[2, 0], [0, 2]], [[0, 2], [2, 0]], [[1, 1], [1, 1]]])
    e = expected_freq(observed)
    assert_array_equal(e, np.ones_like(observed))
    observed = np.array([[10, 10, 20], [20, 20, 20]])
    e = expected_freq(observed)
    correct = np.array([[12., 12., 16.], [18., 18., 24.]])
    assert_array_almost_equal(e, correct)
 def test_chi2_contingency_trivial():
    # Some very simple tests for chi2_contingency.
    # A trivial case
    obs = np.array([[1, 2], [1, 2]])
    chi2, p, dof, expected = chi2_contingency(obs, correction=False)
    assert_equal(chi2, 0.0)
    assert_equal(p, 1.0)
    assert_equal(dof, 1)
    assert_array_equal(obs, expected)
    # A *really* trivial case: 1-D data.
    obs = np.array([1, 2, 3])
    chi2, p, dof, expected = chi2_contingency(obs, correction=False)
    assert_equal(chi2, 0.0)
    assert_equal(p, 1.0)
    assert_equal(dof, 0)
    assert_array_equal(obs, expected)
 def test_chi2_contingency_R():
    # Some test cases that were computed independently, using R.
    Rcode = \
    """
    # Data vector.
    data <- c(
      12, 34, 23,     4,  47,  11,
      35, 31, 11,    34,  10,  18,
      12, 32,  9,    18,  13,  19,
      12, 12, 14,     9,  33,  25
      )
    # Create factor tags:r=rows, c=columns, t=tiers
    r <- factor(gl(4, 2*3, 2*3*4, labels=c("r1", "r2", "r3", "r4")))
    c <- factor(gl(3, 1,   2*3*4, labels=c("c1", "c2", "c3")))
    t <- factor(gl(2, 3,   2*3*4, labels=c("t1", "t2")))
    # 3-way Chi squared test of independence
    s = summary(xtabs(data~r+c+t))
    print(s)
    """
    Routput = \
    """
    Call: xtabs(formula = data ~ r + c + t)
    Number of cases in table: 478
    Number of factors: 3
    Test for independence of all factors:
            Chisq = 102.17, df = 17, p-value = 3.514e-14
    """
    obs = np.array(
        [[[12, 34, 23],
          [35, 31, 11],
          [12, 32, 9],
          [12, 12, 14]],
         [[4, 47, 11],
          [34, 10, 18],
          [18, 13, 19],
          [9, 33, 25]]])
    chi2, p, dof, expected = chi2_contingency(obs)
    assert_approx_equal(chi2, 102.17, significant=5)
    assert_approx_equal(p, 3.514e-14, significant=4)
    assert_equal(dof, 17)
    Rcode = \
    """
    # Data vector.
    data <- c(
        #
        12, 17,
        11, 16,
        #
        11, 12,
        15, 16,
        #
        23, 15,
        30, 22,
        #
        14, 17,
        15, 16
        )
    # Create factor tags:r=rows, c=columns, d=depths(?), t=tiers
    r <- factor(gl(2, 2,  2*2*2*2, labels=c("r1", "r2")))
    c <- factor(gl(2, 1,  2*2*2*2, labels=c("c1", "c2")))
    d <- factor(gl(2, 4,  2*2*2*2, labels=c("d1", "d2")))
    t <- factor(gl(2, 8,  2*2*2*2, labels=c("t1", "t2")))
    # 4-way Chi squared test of independence
    s = summary(xtabs(data~r+c+d+t))
    print(s)
    """
    Routput = \
    """
    Call: xtabs(formula = data ~ r + c + d + t)
    Number of cases in table: 262
    Number of factors: 4
    Test for independence of all factors:
            Chisq = 8.758, df = 11, p-value = 0.6442
    """
    obs = np.array(
        [[[[12, 17],
           [11, 16]],
          [[11, 12],
           [15, 16]]],
         [[[23, 15],
           [30, 22]],
          [[14, 17],
           [15, 16]]]])
    chi2, p, dof, expected = chi2_contingency(obs)
    assert_approx_equal(chi2, 8.758, significant=4)
    assert_approx_equal(p, 0.6442, significant=4)
    assert_equal(dof, 11)
 def test_chi2_contingency_g():
    c = np.array([[15, 60], [15, 90]])
    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=False)
    assert_allclose(g, 2*xlogy(c, c/e).sum())
    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=True)
    c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]])
    assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum())
    c = np.array([[10, 12, 10], [12, 10, 10]])
    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood')
    assert_allclose(g, 2*xlogy(c, c/e).sum())
 def test_chi2_contingency_bad_args():
    # Test that "bad" inputs raise a ValueError.
    # Negative value in the array of observed frequencies.
    obs = np.array([[-1, 10], [1, 2]])
    assert_raises(ValueError, chi2_contingency, obs)
    # The zeros in this will result in zeros in the array
    # of expected frequencies.
    obs = np.array([[0, 1], [0, 1]])
    assert_raises(ValueError, chi2_contingency, obs)
    # A degenerate case: `observed` has size 0.
    obs = np.empty((0, 8))
    assert_raises(ValueError, chi2_contingency, obs)
 if __name__ == "__main__":
    run_module_suite()
--- a/wafo/stats/tests/test_continuous_basic.py
+++ b/wafo/stats/tests/test_continuous_basic.py
@ -7,11 +7,16 @@ import numpy.testing as npt
 from scipy import integrate
 from wafo import stats
-from wafo.stats.tests.common_tests import (check_normalization, check_moment,
+
-                           check_mean_expect,
+from wafo.stats.tests.common_tests import (check_normalization, check_moment, check_mean_expect,
-        check_var_expect, check_skew_expect, check_kurt_expect,
+                          check_var_expect, check_skew_expect,
-        check_entropy, check_private_entropy, NUMPY_BELOW_1_7,
+                          check_kurt_expect, check_entropy,
-        check_edge_support, check_named_args)
+                          check_private_entropy,
                          check_edge_support, check_named_args,
                          check_random_state_property,
                          check_meth_dtype, check_ppf_dtype, check_cmplx_deriv,
                          check_pickling)
 from wafo.stats._distr_params import distcont
@ -26,9 +31,12 @@ These tests currently check only/mostly for serious errors and exceptions,
 not for numerically exact results.
 """
 # Note that you need to add new distributions you want tested
 # to _distr_params
 DECIMAL = 5  # specify the precision of the tests  # increased from 0 to 5
-## Last four of these fail all around. Need to be checked
+# Last four of these fail all around. Need to be checked
 distcont_extra = [
    ['betaprime', (100, 86)],
    ['fatiguelife', (5,)],
@ -41,58 +49,37 @@ distcont_extra = [
 ]
 # for testing only specific functions
 # distcont = [
 ##    ['fatiguelife', (29,)],   #correction numargs = 1
 ##    ['loggamma', (0.41411931826052117,)]]
 # for testing ticket:767
 # distcont = [
 ##    ['genextreme', (3.3184017469423535,)],
 ##    ['genextreme', (0.01,)],
 ##    ['genextreme', (0.00001,)],
 ##    ['genextreme', (0.0,)],
 ##    ['genextreme', (-0.01,)]
 ##    ]
 # distcont = [['gumbel_l', ()],
 ##            ['gumbel_r', ()],
 ##            ['norm', ()]
 ##            ]
 # distcont = [['norm', ()]]
 distmissing = ['wald', 'gausshyper', 'genexpon', 'rv_continuous',
    'loglaplace', 'rdist', 'semicircular', 'invweibull', 'ksone',
    'cosine', 'kstwobign', 'truncnorm', 'mielke', 'recipinvgauss', 'levy',
    'johnsonsu', 'levy_l', 'powernorm', 'wrapcauchy',
    'johnsonsb', 'truncexpon', 'rice', 'invgauss', 'invgamma',
    'powerlognorm']
 distmiss = [[dist,args] for dist,args in distcont if dist in distmissing]
 distslow = ['rdist', 'gausshyper', 'recipinvgauss', 'ksone', 'genexpon',
            'vonmises', 'vonmises_line', 'mielke', 'semicircular',
            'cosine', 'invweibull', 'powerlognorm', 'johnsonsu', 'kstwobign']
 # distslow are sorted by speed (very slow to slow)
-# NB: not needed anymore?
+# These distributions fail the complex derivative test below.
-def _silence_fp_errors(func):
+# Here 'fail' mean produce wrong results and/or raise exceptions, depending
-    # warning: don't apply to test_ functions as is, then those will be skipped
+# on the implementation details of corresponding special functions.
-    def wrap(*a, **kw):
+# cf https://github.com/scipy/scipy/pull/4979 for a discussion.
-        olderr = np.seterr(all='ignore')
+fails_cmplx = set(['alpha', 'beta', 'betaprime', 'burr12', 'chi', 'chi2', 'dgamma',
-        try:
+                   'dweibull', 'erlang', 'expon', 'exponnorm', 'exponpow',
-            return func(*a, **kw)
+                   'exponweib', 'f', 'fatiguelife', 'foldnorm', 'frechet_l',
-        finally:
+                   'frechet_r', 'gamma', 'gausshyper', 'genexpon',
-            np.seterr(**olderr)
+                   'genextreme', 'gengamma', 'genlogistic', 'gennorm',
-    wrap.__name__ = func.__name__
+                   'genpareto', 'gilbrat', 'gompertz', 'halfcauchy',
-    return wrap
+                   'halfgennorm', 'halflogistic', 'halfnorm', 'invgamma',
                   'invgauss', 'johnsonsb', 'johnsonsu', 'ksone', 'kstwobign',
                   'levy_l', 'loggamma', 'logistic', 'lognorm', 'lomax',
                   'maxwell', 'nakagami', 'ncf', 'nct', 'ncx2', 'norm',
                   'pearson3', 'powerlognorm', 'powernorm', 'rayleigh',
                   'recipinvgauss', 'rice', 'skewnorm', 't', 'truncexpon', 'truncnorm',
                   'tukeylambda', 'vonmises', 'vonmises_line', 'wald',
                   'weibull_min'])
 def test_cont_basic():
    # this test skips slow distributions
    with warnings.catch_warnings():
-        warnings.filterwarnings('ignore', category=integrate.IntegrationWarning)
+        warnings.filterwarnings('ignore',
                                category=integrate.IntegrationWarning)
        for distname, arg in distcont[:]:
            if distname in distslow:
                continue
@ -106,17 +93,17 @@ def test_cont_basic():
            sv = rvs.var()
            m, v = distfn.stats(*arg)
-            yield check_sample_meanvar_, distfn, arg, m, v, sm, sv, sn, \
+            yield (check_sample_meanvar_, distfn, arg, m, v, sm, sv, sn,
-                   distname + 'sample mean test'
+                   distname + 'sample mean test')
            yield check_cdf_ppf, distfn, arg, distname
            yield check_sf_isf, distfn, arg, distname
            yield check_pdf, distfn, arg, distname
            yield check_pdf_logpdf, distfn, arg, distname
            yield check_cdf_logcdf, distfn, arg, distname
            yield check_sf_logsf, distfn, arg, distname
-            if distname in distmissing:
+
-                alpha = 0.01
+            alpha = 0.01
-                yield check_distribution_rvs, distname, arg, alpha, rvs
+            yield check_distribution_rvs, distname, arg, alpha, rvs
            locscale_defaults = (0, 1)
            meths = [distfn.pdf, distfn.logpdf, distfn.cdf, distfn.logcdf,
@ -126,28 +113,35 @@ def test_cont_basic():
                      'pareto': 1.5, 'tukeylambda': 0.3}
            x = spec_x.get(distname, 0.5)
            yield check_named_args, distfn, x, arg, locscale_defaults, meths
            yield check_random_state_property, distfn, arg
            # yield check_pickling, distfn, arg
            # Entropy
            skp = npt.dec.skipif
            yield check_entropy, distfn, arg, distname
            if distfn.numargs == 0:
-                yield skp(NUMPY_BELOW_1_7)(check_vecentropy), distfn, arg
+                yield check_vecentropy, distfn, arg
            if distfn.__class__._entropy != stats.rv_continuous._entropy:
                yield check_private_entropy, distfn, arg, stats.rv_continuous
            yield check_edge_support, distfn, arg
            yield check_meth_dtype, distfn, arg, meths
            yield check_ppf_dtype, distfn, arg
            yield skp(distname in fails_cmplx)(check_cmplx_deriv), distfn, arg
            knf = npt.dec.knownfailureif
-            yield knf(distname == 'truncnorm')(check_ppf_private), distfn, \
+            yield (knf(distname == 'truncnorm')(check_ppf_private), distfn,
-                      arg, distname
+                   arg, distname)
@npt.dec.slow
 def test_cont_basic_slow():
    # same as above for slow distributions
    with warnings.catch_warnings():
-        warnings.filterwarnings('ignore', category=integrate.IntegrationWarning)
+        warnings.filterwarnings('ignore',
                                category=integrate.IntegrationWarning)
        for distname, arg in distcont[:]:
            if distname not in distslow:
                continue
@ -156,12 +150,12 @@ def test_cont_basic_slow():
            distfn = getattr(stats, distname)
            np.random.seed(765456)
            sn = 500
-            rvs = distfn.rvs(size=sn,*arg)
+            rvs = distfn.rvs(size=sn, *arg)
            sm = rvs.mean()
            sv = rvs.var()
            m, v = distfn.stats(*arg)
-            yield check_sample_meanvar_, distfn, arg, m, v, sm, sv, sn, \
+            yield (check_sample_meanvar_, distfn, arg, m, v, sm, sv, sn,
-                  distname + 'sample mean test'
+                   distname + 'sample mean test')
            yield check_cdf_ppf, distfn, arg, distname
            yield check_sf_isf, distfn, arg, distname
            yield check_pdf, distfn, arg, distname
@ -169,9 +163,9 @@ def test_cont_basic_slow():
            yield check_cdf_logcdf, distfn, arg, distname
            yield check_sf_logsf, distfn, arg, distname
            # yield check_oth, distfn, arg # is still missing
-            if distname in distmissing:
+
-                alpha = 0.01
+            alpha = 0.01
-                yield check_distribution_rvs, distname, arg, alpha, rvs
+            yield check_distribution_rvs, distname, arg, alpha, rvs
            locscale_defaults = (0, 1)
            meths = [distfn.pdf, distfn.logpdf, distfn.cdf, distfn.logcdf,
@ -183,6 +177,8 @@ def test_cont_basic_slow():
            elif distname == 'ksone':
                arg = (3,)
            yield check_named_args, distfn, x, arg, locscale_defaults, meths
            yield check_random_state_property, distfn, arg
            # yield check_pickling, distfn, arg
            # Entropy
            skp = npt.dec.skipif
@ -190,17 +186,22 @@ def test_cont_basic_slow():
            yield skp(ks_cond)(check_entropy), distfn, arg, distname
            if distfn.numargs == 0:
-                yield skp(NUMPY_BELOW_1_7)(check_vecentropy), distfn, arg
+                yield check_vecentropy, distfn, arg
            if distfn.__class__._entropy != stats.rv_continuous._entropy:
                yield check_private_entropy, distfn, arg, stats.rv_continuous
            yield check_edge_support, distfn, arg
            yield check_meth_dtype, distfn, arg, meths
            yield check_ppf_dtype, distfn, arg
            yield skp(distname in fails_cmplx)(check_cmplx_deriv), distfn, arg
@npt.dec.slow
 def test_moments():
    with warnings.catch_warnings():
-        warnings.filterwarnings('ignore', category=integrate.IntegrationWarning)
+        warnings.filterwarnings('ignore',
                                category=integrate.IntegrationWarning)
        knf = npt.dec.knownfailureif
        fail_normalization = set(['vonmises', 'ksone'])
        fail_higher = set(['vonmises', 'ksone', 'ncf'])
@ -209,28 +210,30 @@ def test_moments():
                continue
            distfn = getattr(stats, distname)
            m, v, s, k = distfn.stats(*arg, moments='mvsk')
-            cond1, cond2 = distname in fail_normalization, distname in fail_higher
+            cond1 = distname in fail_normalization
            cond2 = distname in fail_higher
            msg = distname + ' fails moments'
            yield knf(cond1, msg)(check_normalization), distfn, arg, distname
            yield knf(cond2, msg)(check_mean_expect), distfn, arg, m, distname
-            yield knf(cond2, msg)(check_var_expect), distfn, arg, m, v, distname
+            yield (knf(cond2, msg)(check_var_expect), distfn, arg, m, v,
-            yield knf(cond2, msg)(check_skew_expect), distfn, arg, m, v, s, \
+                   distname)
-                  distname
+            yield (knf(cond2, msg)(check_skew_expect), distfn, arg, m, v, s,
-            yield knf(cond2, msg)(check_kurt_expect), distfn, arg, m, v, k, \
+                   distname)
-                  distname
+            yield (knf(cond2, msg)(check_kurt_expect), distfn, arg, m, v, k,
                   distname)
            yield check_loc_scale, distfn, arg, m, v, distname
            yield check_moment, distfn, arg, m, v, distname
 def check_sample_meanvar_(distfn, arg, m, v, sm, sv, sn, msg):
    # this did not work, skipped silently by nose
-    if not np.isinf(m):
+    if np.isfinite(m):
        check_sample_mean(sm, sv, sn, m)
-    if not np.isinf(v):
+    if np.isfinite(v):
        check_sample_var(sv, sn, v)
-def check_sample_mean(sm,v,n, popmean):
+def check_sample_mean(sm, v, n, popmean):
    # from stats.stats.ttest_1samp(a, popmean):
    # Calculates the t-obtained for the independent samples T-test on ONE group
    # of scores a, given a population mean.
@ -243,31 +246,32 @@ def check_sample_mean(sm,v,n, popmean):
    # return t,prob
    npt.assert_(prob > 0.01, 'mean fail, t,prob = %f, %f, m, sm=%f,%f' %
-            (t, prob, popmean, sm))
+                (t, prob, popmean, sm))
-def check_sample_var(sv,n, popvar):
+def check_sample_var(sv, n, popvar):
-    # two-sided chisquare test for sample variance equal to hypothesized variance
+    # two-sided chisquare test for sample variance equal to
    # hypothesized variance
    df = n-1
    chi2 = (n-1)*popvar/float(popvar)
-    pval = stats.chisqprob(chi2,df)*2
+    pval = stats.distributions.chi2.sf(chi2, df) * 2
    npt.assert_(pval > 0.01, 'var fail, t, pval = %f, %f, v, sv=%f, %f' %
-            (chi2,pval,popvar,sv))
+                (chi2, pval, popvar, sv))
-def check_cdf_ppf(distfn,arg,msg):
+def check_cdf_ppf(distfn, arg, msg):
    values = [0.001, 0.5, 0.999]
    npt.assert_almost_equal(distfn.cdf(distfn.ppf(values, *arg), *arg),
                            values, decimal=DECIMAL, err_msg=msg +
                            ' - cdf-ppf roundtrip')
-def check_sf_isf(distfn,arg,msg):
+def check_sf_isf(distfn, arg, msg):
-    npt.assert_almost_equal(distfn.sf(distfn.isf([0.1,0.5,0.9], *arg), *arg),
+    npt.assert_almost_equal(distfn.sf(distfn.isf([0.1, 0.5, 0.9], *arg), *arg),
-                            [0.1,0.5,0.9], decimal=DECIMAL, err_msg=msg +
+                            [0.1, 0.5, 0.9], decimal=DECIMAL, err_msg=msg +
                            ' - sf-isf roundtrip')
-    npt.assert_almost_equal(distfn.cdf([0.1,0.9], *arg),
+    npt.assert_almost_equal(distfn.cdf([0.1, 0.9], *arg),
-                            1.0-distfn.sf([0.1,0.9], *arg),
+                            1.0 - distfn.sf([0.1, 0.9], *arg),
                            decimal=DECIMAL, err_msg=msg +
                            ' - cdf-sf relationship')
@ -278,15 +282,16 @@ def check_pdf(distfn, arg, msg):
    eps = 1e-6
    pdfv = distfn.pdf(median, *arg)
    if (pdfv < 1e-4) or (pdfv > 1e4):
-        # avoid checking a case where pdf is close to zero or huge (singularity)
+        # avoid checking a case where pdf is close to zero or
        # huge (singularity)
        median = median + 0.1
        pdfv = distfn.pdf(median, *arg)
    cdfdiff = (distfn.cdf(median + eps, *arg) -
               distfn.cdf(median - eps, *arg))/eps/2.0
    # replace with better diff and better test (more points),
    # actually, this works pretty well
-    npt.assert_almost_equal(pdfv, cdfdiff,
+    msg += ' - cdf-pdf relationship'
-                decimal=DECIMAL, err_msg=msg + ' - cdf-pdf relationship')
+    npt.assert_almost_equal(pdfv, cdfdiff, decimal=DECIMAL, err_msg=msg)
 def check_pdf_logpdf(distfn, args, msg):
@ -297,7 +302,8 @@ def check_pdf_logpdf(distfn, args, msg):
    logpdf = distfn.logpdf(vals, *args)
    pdf = pdf[pdf != 0]
    logpdf = logpdf[np.isfinite(logpdf)]
-    npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg + " - logpdf-log(pdf) relationship")
+    msg += " - logpdf-log(pdf) relationship"
    npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg)
 def check_sf_logsf(distfn, args, msg):
@ -308,7 +314,8 @@ def check_sf_logsf(distfn, args, msg):
    logsf = distfn.logsf(vals, *args)
    sf = sf[sf != 0]
    logsf = logsf[np.isfinite(logsf)]
-    npt.assert_almost_equal(np.log(sf), logsf, decimal=7, err_msg=msg + " - logsf-log(sf) relationship")
+    msg += " - logsf-log(sf) relationship"
    npt.assert_almost_equal(np.log(sf), logsf, decimal=7, err_msg=msg)
 def check_cdf_logcdf(distfn, args, msg):
@ -319,24 +326,24 @@ def check_cdf_logcdf(distfn, args, msg):
    logcdf = distfn.logcdf(vals, *args)
    cdf = cdf[cdf != 0]
    logcdf = logcdf[np.isfinite(logcdf)]
-    npt.assert_almost_equal(np.log(cdf), logcdf, decimal=7, err_msg=msg + " - logcdf-log(cdf) relationship")
+    msg += " - logcdf-log(cdf) relationship"
    npt.assert_almost_equal(np.log(cdf), logcdf, decimal=7, err_msg=msg)
 def check_distribution_rvs(dist, args, alpha, rvs):
    # test from scipy.stats.tests
    # this version reuses existing random variables
-    D,pval = stats.kstest(rvs, dist, args=args, N=1000)
+    D, pval = stats.kstest(rvs, dist, args=args, N=1000)
    if (pval < alpha):
-        D,pval = stats.kstest(dist,'',args=args, N=1000)
+        D, pval = stats.kstest(dist, '', args=args, N=1000)
        npt.assert_(pval > alpha, "D = " + str(D) + "; pval = " + str(pval) +
-               "; alpha = " + str(alpha) + "\nargs = " + str(args))
+                    "; alpha = " + str(alpha) + "\nargs = " + str(args))
 def check_vecentropy(distfn, args):
    npt.assert_equal(distfn.vecentropy(*args), distfn._entropy(*args))
@npt.dec.skipif(NUMPY_BELOW_1_7)
 def check_loc_scale(distfn, arg, m, v, msg):
    loc, scale = 10.0, 10.0
    mt, vt = distfn.stats(loc=loc, scale=scale, *arg)
@ -345,7 +352,7 @@ def check_loc_scale(distfn, arg, m, v, msg):
 def check_ppf_private(distfn, arg, msg):
-    #fails by design for truncnorm self.nb not defined
+    # fails by design for truncnorm self.nb not defined
    ppfs = distfn._ppf(np.array([0.1, 0.5, 0.9]), *arg)
    npt.assert_(not np.any(np.isnan(ppfs)), msg + 'ppf private is nan')
--- a/wafo/stats/tests/test_discrete_basic.py
+++ b/wafo/stats/tests/test_discrete_basic.py
@ -5,18 +5,26 @@ import numpy as np
 from scipy._lib.six import xrange
 from wafo import stats
-from wafo.stats.tests.common_tests import (check_normalization, check_moment,
+from wafo.stats.tests.common_tests import (check_normalization, check_moment, check_mean_expect,
-                                           check_mean_expect,
+                          check_var_expect, check_skew_expect,
-        check_var_expect, check_skew_expect, check_kurt_expect,
+                          check_kurt_expect, check_entropy,
-        check_entropy, check_private_entropy, check_edge_support,
+                          check_private_entropy, check_edge_support,
-        check_named_args)
+                          check_named_args, check_random_state_property,
                          check_pickling)
 from wafo.stats._distr_params import distdiscrete
 knf = npt.dec.knownfailureif
 vals = ([1, 2, 3, 4], [0.1, 0.2, 0.3, 0.4])
 distdiscrete += [[stats.rv_discrete(values=vals), ()]]
 def test_discrete_basic():
    for distname, arg in distdiscrete:
-        distfn = getattr(stats, distname)
+        try:
            distfn = getattr(stats, distname)
        except TypeError:
            distfn = distname
            distname = 'sample distribution'
        np.random.seed(9765456)
        rvs = distfn.rvs(size=2000, *arg)
        supp = np.unique(rvs)
@ -28,15 +36,19 @@ def test_discrete_basic():
        yield check_edge_support, distfn, arg
        alpha = 0.01
-        yield check_discrete_chisquare, distfn, arg, rvs, alpha, \
+        yield (check_discrete_chisquare, distfn, arg, rvs, alpha,
-                      distname + ' chisquare'
+               distname + ' chisquare')
    seen = set()
    for distname, arg in distdiscrete:
        if distname in seen:
            continue
        seen.add(distname)
-        distfn = getattr(stats,distname)
+        try:
            distfn = getattr(stats, distname)
        except TypeError:
            distfn = distname
            distname = 'sample distribution'
        locscale_defaults = (0,)
        meths = [distfn.pmf, distfn.logpmf, distfn.cdf, distfn.logcdf,
                 distfn.logsf]
@ -44,7 +56,10 @@ def test_discrete_basic():
        spec_k = {'randint': 11, 'hypergeom': 4, 'bernoulli': 0, }
        k = spec_k.get(distname, 1)
        yield check_named_args, distfn, k, arg, locscale_defaults, meths
-        yield check_scale_docstring, distfn
+        if distname != 'sample distribution':
            yield check_scale_docstring, distfn
        yield check_random_state_property, distfn, arg
        yield check_pickling, distfn, arg
        # Entropy
        yield check_entropy, distfn, arg, distname
@ -54,7 +69,11 @@ def test_discrete_basic():
 def test_moments():
    for distname, arg in distdiscrete:
-        distfn = getattr(stats,distname)
+        try:
            distfn = getattr(stats, distname)
        except TypeError:
            distfn = distname
            distname = 'sample distribution'
        m, v, s, k = distfn.stats(*arg, moments='mvsk')
        yield check_normalization, distfn, arg, distname
@ -64,7 +83,7 @@ def test_moments():
        yield check_var_expect, distfn, arg, m, v, distname
        yield check_skew_expect, distfn, arg, m, v, s, distname
-        cond = False #distname in ['zipf']
+        cond = distname in ['zipf']
        msg = distname + ' fails kurtosis'
        yield knf(cond, msg)(check_kurt_expect), distfn, arg, m, v, k, distname
@ -81,35 +100,36 @@ def check_cdf_ppf(distfn, arg, supp, msg):
                           supp, msg + '-roundtrip')
    supp1 = supp[supp < distfn.b]
    npt.assert_array_equal(distfn.ppf(distfn.cdf(supp1, *arg) + 1e-8, *arg),
-                     supp1 + distfn.inc, msg + 'ppf-cdf-next')
+                           supp1 + distfn.inc, msg + 'ppf-cdf-next')
    # -1e-8 could cause an error if pmf < 1e-8
 def check_pmf_cdf(distfn, arg, distname):
-    startind = np.int(distfn.ppf(0.01, *arg) - 1)
+    startind = int(distfn.ppf(0.01, *arg) - 1)
    index = list(range(startind, startind + 10))
-    cdfs, pmfs_cum = distfn.cdf(index,*arg), distfn.pmf(index, *arg).cumsum()
+    cdfs = distfn.cdf(index, *arg)
    pmfs_cum = distfn.pmf(index, *arg).cumsum()
    atol, rtol = 1e-10, 1e-10
    if distname == 'skellam':    # ncx2 accuracy
        atol, rtol = 1e-5, 1e-5
    npt.assert_allclose(cdfs - cdfs[0], pmfs_cum - pmfs_cum[0],
-            atol=atol, rtol=rtol)
+                        atol=atol, rtol=rtol)
 def check_moment_frozen(distfn, arg, m, k):
    npt.assert_allclose(distfn(*arg).moment(k), m,
-            atol=1e-10, rtol=1e-10)
+                        atol=1e-10, rtol=1e-10)
 def check_oth(distfn, arg, supp, msg):
    # checking other methods of distfn
    npt.assert_allclose(distfn.sf(supp, *arg), 1. - distfn.cdf(supp, *arg),
-            atol=1e-10, rtol=1e-10)
+                        atol=1e-10, rtol=1e-10)
    q = np.linspace(0.01, 0.99, 20)
    npt.assert_allclose(distfn.isf(q, *arg), distfn.ppf(1. - q, *arg),
-            atol=1e-10, rtol=1e-10)
+                        atol=1e-10, rtol=1e-10)
    median_sf = distfn.isf(0.5, *arg)
    npt.assert_(distfn.sf(median_sf - 1, *arg) > 0.5)
@ -133,44 +153,41 @@ def check_discrete_chisquare(distfn, arg, rvs, alpha, msg):
    result : bool
        0 if test passes, 1 if test fails
    uses global variable debug for printing results
    """
-    n = len(rvs)
+    wsupp = 0.05
    nsupp = 20
    wsupp = 1.0/nsupp
-    # construct intervals with minimum mass 1/nsupp
+    # construct intervals with minimum mass `wsupp`.
    # intervals are left-half-open as in a cdf difference
-    distsupport = xrange(max(distfn.a, -1000), min(distfn.b, 1000) + 1)
+    lo = max(distfn.a, -1000)
    distsupport = xrange(lo, min(distfn.b, 1000) + 1)
    last = 0
-    distsupp = [max(distfn.a, -1000)]
+    distsupp = [lo]
    distmass = []
    for ii in distsupport:
-        current = distfn.cdf(ii,*arg)
+        current = distfn.cdf(ii, *arg)
-        if current - last >= wsupp-1e-14:
+        if current - last >= wsupp - 1e-14:
            distsupp.append(ii)
            distmass.append(current - last)
            last = current
-            if current > (1-wsupp):
+            if current > (1 - wsupp):
                break
    if distsupp[-1] < distfn.b:
        distsupp.append(distfn.b)
-        distmass.append(1-last)
+        distmass.append(1 - last)
    distsupp = np.array(distsupp)
    distmass = np.array(distmass)
    # convert intervals to right-half-open as required by histogram
-    histsupp = distsupp+1e-8
+    histsupp = distsupp + 1e-8
    histsupp[0] = distfn.a
    # find sample frequencies and perform chisquare test
-    freq,hsupp = np.histogram(rvs,histsupp)
+    freq, hsupp = np.histogram(rvs, histsupp)
-    cdfs = distfn.cdf(distsupp,*arg)
+    chis, pval = stats.chisquare(np.array(freq), len(rvs)*distmass)
    (chis,pval) = stats.chisquare(np.array(freq),n*distmass)
-    npt.assert_(pval > alpha, 'chisquare - test for %s'
+    npt.assert_(pval > alpha,
-           ' at arg = %s with pval = %s' % (msg,str(arg),str(pval)))
+                'chisquare - test for %s at arg = %s with pval = %s' %
                (msg, str(arg), str(pval)))
 def check_scale_docstring(distfn):
--- a/wafo/stats/tests/test_distributions.py
+++ b/wafo/stats/tests/test_distributions.py
--- a/wafo/stats/tests/test_fit.py
+++ b/wafo/stats/tests/test_fit.py
@ -3,7 +3,7 @@ from __future__ import division, print_function, absolute_import
 import os
 import numpy as np
-from numpy.testing import dec
+from numpy.testing import dec, assert_allclose
 from wafo import stats
@ -13,12 +13,12 @@ from wafo.stats.tests.test_continuous_basic import distcont
 # verifies that the estimate and true values don't differ by too much
 fit_sizes = [1000, 5000]  # sample sizes to try
 thresh_percent = 0.25  # percent of true parameters for fail cut-off
 thresh_min = 0.75  # minimum difference estimate - true to fail test
 failing_fits = [
        'burr',
        'chi',
        'chi2',
        'gausshyper',
        'genexpon',
@ -107,5 +107,18 @@ def check_cont_fit(distname,arg):
        raise AssertionError('fit not very good in %s\n' % distfn.name + txt)
 def _check_loc_scale_mle_fit(name, data, desired, atol=None):
    d = getattr(stats, name)
    actual = d.fit(data)[-2:]
    assert_allclose(actual, desired, atol=atol,
                    err_msg='poor mle fit of (loc, scale) in %s' % name)
 def test_non_default_loc_scale_mle_fit():
    data = np.array([1.01, 1.78, 1.78, 1.78, 1.88, 1.88, 1.88, 2.00])
    yield _check_loc_scale_mle_fit, 'uniform', data, [1.01, 0.99], 1e-3
    yield _check_loc_scale_mle_fit, 'expon', data, [1.01, 0.73875], 1e-3
 if __name__ == "__main__":
    np.testing.run_module_suite()
--- a/wafo/stats/tests/test_kdeoth.py
+++ b/wafo/stats/tests/test_kdeoth.py
@ -1,202 +0,0 @@
 from __future__ import division, print_function, absolute_import
 from wafo import stats
 import numpy as np
 from numpy.testing import assert_almost_equal, assert_, assert_raises, \
    assert_array_almost_equal, assert_array_almost_equal_nulp, run_module_suite
 def test_kde_1d():
    #some basic tests comparing to normal distribution
    np.random.seed(8765678)
    n_basesample = 500
    xn = np.random.randn(n_basesample)
    xnmean = xn.mean()
    xnstd = xn.std(ddof=1)
    # get kde for original sample
    gkde = stats.gaussian_kde(xn)
    # evaluate the density function for the kde for some points
    xs = np.linspace(-7,7,501)
    kdepdf = gkde.evaluate(xs)
    normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
    intervall = xs[1] - xs[0]
    assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
    prob1 = gkde.integrate_box_1d(xnmean, np.inf)
    prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
    assert_almost_equal(prob1, 0.5, decimal=1)
    assert_almost_equal(prob2, 0.5, decimal=1)
    assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
    assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
    assert_almost_equal(gkde.integrate_kde(gkde),
                        (kdepdf**2).sum()*intervall, decimal=2)
    assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
                        (kdepdf*normpdf).sum()*intervall, decimal=2)
 def test_kde_bandwidth_method():
    def scotts_factor(kde_obj):
        """Same as default, just check that it works."""
        return np.power(kde_obj.n, -1./(kde_obj.d+4))
    np.random.seed(8765678)
    n_basesample = 50
    xn = np.random.randn(n_basesample)
    # Default
    gkde = stats.gaussian_kde(xn)
    # Supply a callable
    gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
    # Supply a scalar
    gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
    xs = np.linspace(-7,7,51)
    kdepdf = gkde.evaluate(xs)
    kdepdf2 = gkde2.evaluate(xs)
    assert_almost_equal(kdepdf, kdepdf2)
    kdepdf3 = gkde3.evaluate(xs)
    assert_almost_equal(kdepdf, kdepdf3)
    assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
 # Subclasses that should stay working (extracted from various sources).
 # Unfortunately the earlier design of gaussian_kde made it necessary for users
 # to create these kinds of subclasses, or call _compute_covariance() directly.
 class _kde_subclass1(stats.gaussian_kde):
    def __init__(self, dataset):
        self.dataset = np.atleast_2d(dataset)
        self.d, self.n = self.dataset.shape
        self.covariance_factor = self.scotts_factor
        self._compute_covariance()
 class _kde_subclass2(stats.gaussian_kde):
    def __init__(self, dataset):
        self.covariance_factor = self.scotts_factor
        super(_kde_subclass2, self).__init__(dataset)
 class _kde_subclass3(stats.gaussian_kde):
    def __init__(self, dataset, covariance):
        self.covariance = covariance
        stats.gaussian_kde.__init__(self, dataset)
    def _compute_covariance(self):
        self.inv_cov = np.linalg.inv(self.covariance)
        self._norm_factor = np.sqrt(np.linalg.det(2*np.pi * self.covariance)) \
                                   * self.n
 class _kde_subclass4(stats.gaussian_kde):
    def covariance_factor(self):
        return 0.5 * self.silverman_factor()
 def test_gaussian_kde_subclassing():
    x1 = np.array([-7, -5, 1, 4, 5], dtype=np.float)
    xs = np.linspace(-10, 10, num=50)
    # gaussian_kde itself
    kde = stats.gaussian_kde(x1)
    ys = kde(xs)
    # subclass 1
    kde1 = _kde_subclass1(x1)
    y1 = kde1(xs)
    assert_array_almost_equal_nulp(ys, y1, nulp=10)
    # subclass 2
    kde2 = _kde_subclass2(x1)
    y2 = kde2(xs)
    assert_array_almost_equal_nulp(ys, y2, nulp=10)
    # subclass 3
    kde3 = _kde_subclass3(x1, kde.covariance)
    y3 = kde3(xs)
    assert_array_almost_equal_nulp(ys, y3, nulp=10)
    # subclass 4
    kde4 = _kde_subclass4(x1)
    y4 = kde4(x1)
    y_expected = [0.06292987, 0.06346938, 0.05860291, 0.08657652, 0.07904017]
    assert_array_almost_equal(y_expected, y4, decimal=6)
    # Not a subclass, but check for use of _compute_covariance()
    kde5 = kde
    kde5.covariance_factor = lambda: kde.factor
    kde5._compute_covariance()
    y5 = kde5(xs)
    assert_array_almost_equal_nulp(ys, y5, nulp=10)
 def test_gaussian_kde_covariance_caching():
    x1 = np.array([-7, -5, 1, 4, 5], dtype=np.float)
    xs = np.linspace(-10, 10, num=5)
    # These expected values are from scipy 0.10, before some changes to
    # gaussian_kde.  They were not compared with any external reference.
    y_expected = [0.02463386, 0.04689208, 0.05395444, 0.05337754, 0.01664475]
    # Set the bandwidth, then reset it to the default.
    kde = stats.gaussian_kde(x1)
    kde.set_bandwidth(bw_method=0.5)
    kde.set_bandwidth(bw_method='scott')
    y2 = kde(xs)
    assert_array_almost_equal(y_expected, y2, decimal=7)
 def test_gaussian_kde_monkeypatch():
    """Ugly, but people may rely on this.  See scipy pull request 123,
    specifically the linked ML thread "Width of the Gaussian in stats.kde".
    If it is necessary to break this later on, that is to be discussed on ML.
    """
    x1 = np.array([-7, -5, 1, 4, 5], dtype=np.float)
    xs = np.linspace(-10, 10, num=50)
    # The old monkeypatched version to get at Silverman's Rule.
    kde = stats.gaussian_kde(x1)
    kde.covariance_factor = kde.silverman_factor
    kde._compute_covariance()
    y1 = kde(xs)
    # The new saner version.
    kde2 = stats.gaussian_kde(x1, bw_method='silverman')
    y2 = kde2(xs)
    assert_array_almost_equal_nulp(y1, y2, nulp=10)
 def test_kde_integer_input():
    """Regression test for #1181."""
    x1 = np.arange(5)
    kde = stats.gaussian_kde(x1)
    y_expected = [0.13480721, 0.18222869, 0.19514935, 0.18222869, 0.13480721]
    assert_array_almost_equal(kde(x1), y_expected, decimal=6)
 def test_pdf_logpdf():
    np.random.seed(1)
    n_basesample = 50
    xn = np.random.randn(n_basesample)
    # Default
    gkde = stats.gaussian_kde(xn)
    xs = np.linspace(-15, 12, 25)
    pdf = gkde.evaluate(xs)
    pdf2 = gkde.pdf(xs)
    assert_almost_equal(pdf, pdf2, decimal=12)
    logpdf = np.log(pdf)
    logpdf2 = gkde.logpdf(xs)
    assert_almost_equal(logpdf, logpdf2, decimal=12)
 if __name__ == "__main__":
    run_module_suite()
--- a/wafo/stats/tests/test_morestats.py
+++ b/wafo/stats/tests/test_morestats.py
--- a/wafo/stats/tests/test_mstats_basic.py
+++ b/wafo/stats/tests/test_mstats_basic.py
--- a/wafo/stats/tests/test_mstats_extras.py
+++ b/wafo/stats/tests/test_mstats_extras.py
@ -1,107 +0,0 @@
 # pylint: disable-msg=W0611, W0612, W0511,R0201
 """Tests suite for maskedArray statistics.
 :author: Pierre Gerard-Marchant
 :contact: pierregm_at_uga_dot_edu
 """
 from __future__ import division, print_function, absolute_import
 __author__ = "Pierre GF Gerard-Marchant ($Author: backtopop $)"
 import numpy as np
 import numpy.ma as ma
 import wafo.stats.mstats as ms
 #import wafo.stats.mmorestats as mms
 from numpy.testing import TestCase, run_module_suite, assert_equal, \
    assert_almost_equal, assert_
 class TestMisc(TestCase):
    def __init__(self, *args, **kwargs):
        TestCase.__init__(self, *args, **kwargs)
    def test_mjci(self):
        "Tests the Marits-Jarrett estimator"
        data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
                          296,299,306,376,428,515,666,1310,2611])
        assert_almost_equal(ms.mjci(data),[55.76819,45.84028,198.87875],5)
    def test_trimmedmeanci(self):
        "Tests the confidence intervals of the trimmed mean."
        data = ma.array([545,555,558,572,575,576,578,580,
                         594,605,635,651,653,661,666])
        assert_almost_equal(ms.trimmed_mean(data,0.2), 596.2, 1)
        assert_equal(np.round(ms.trimmed_mean_ci(data,(0.2,0.2)),1),
                     [561.8, 630.6])
    def test_idealfourths(self):
        "Tests ideal-fourths"
        test = np.arange(100)
        assert_almost_equal(np.asarray(ms.idealfourths(test)),
                            [24.416667,74.583333],6)
        test_2D = test.repeat(3).reshape(-1,3)
        assert_almost_equal(ms.idealfourths(test_2D, axis=0),
                            [[24.416667,24.416667,24.416667],
                             [74.583333,74.583333,74.583333]],6)
        assert_almost_equal(ms.idealfourths(test_2D, axis=1),
                            test.repeat(2).reshape(-1,2))
        test = [0,0]
        _result = ms.idealfourths(test)
        assert_(np.isnan(_result).all())
 #..............................................................................
 class TestQuantiles(TestCase):
    def __init__(self, *args, **kwargs):
        TestCase.__init__(self, *args, **kwargs)
    def test_hdquantiles(self):
        data = [0.706560797,0.727229578,0.990399276,0.927065621,0.158953014,
            0.887764025,0.239407086,0.349638551,0.972791145,0.149789972,
            0.936947700,0.132359948,0.046041972,0.641675031,0.945530547,
            0.224218684,0.771450991,0.820257774,0.336458052,0.589113496,
            0.509736129,0.696838829,0.491323573,0.622767425,0.775189248,
            0.641461450,0.118455200,0.773029450,0.319280007,0.752229111,
            0.047841438,0.466295911,0.583850781,0.840581845,0.550086491,
            0.466470062,0.504765074,0.226855960,0.362641207,0.891620942,
            0.127898691,0.490094097,0.044882048,0.041441695,0.317976349,
            0.504135618,0.567353033,0.434617473,0.636243375,0.231803616,
            0.230154113,0.160011327,0.819464108,0.854706985,0.438809221,
            0.487427267,0.786907310,0.408367937,0.405534192,0.250444460,
            0.995309248,0.144389588,0.739947527,0.953543606,0.680051621,
            0.388382017,0.863530727,0.006514031,0.118007779,0.924024803,
            0.384236354,0.893687694,0.626534881,0.473051932,0.750134705,
            0.241843555,0.432947602,0.689538104,0.136934797,0.150206859,
            0.474335206,0.907775349,0.525869295,0.189184225,0.854284286,
            0.831089744,0.251637345,0.587038213,0.254475554,0.237781276,
            0.827928620,0.480283781,0.594514455,0.213641488,0.024194386,
            0.536668589,0.699497811,0.892804071,0.093835427,0.731107772]
        #
        assert_almost_equal(ms.hdquantiles(data,[0., 1.]),
                            [0.006514031, 0.995309248])
        hdq = ms.hdquantiles(data,[0.25, 0.5, 0.75])
        assert_almost_equal(hdq, [0.253210762, 0.512847491, 0.762232442,])
        hdq = ms.hdquantiles_sd(data,[0.25, 0.5, 0.75])
        assert_almost_equal(hdq, [0.03786954, 0.03805389, 0.03800152,], 4)
        #
        data = np.array(data).reshape(10,10)
        hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0)
        assert_almost_equal(hdq[:,0], ms.hdquantiles(data[:,0],[0.25,0.5,0.75]))
        assert_almost_equal(hdq[:,-1], ms.hdquantiles(data[:,-1],[0.25,0.5,0.75]))
        hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0,var=True)
        assert_almost_equal(hdq[...,0],
                            ms.hdquantiles(data[:,0],[0.25,0.5,0.75],var=True))
        assert_almost_equal(hdq[...,-1],
                            ms.hdquantiles(data[:,-1],[0.25,0.5,0.75], var=True))
 ###############################################################################
 if __name__ == "__main__":
    run_module_suite()
--- a/wafo/stats/tests/test_multivariate.py
+++ b/wafo/stats/tests/test_multivariate.py
@ -1,485 +0,0 @@
 """
 Test functions for multivariate normal distributions.
 """
 from __future__ import division, print_function, absolute_import
 from numpy.testing import (
    assert_allclose,
    assert_almost_equal,
    assert_array_almost_equal,
    assert_equal,
    assert_raises,
    run_module_suite,
 )
 import numpy
 import numpy as np
 import scipy.linalg
 from wafo.stats._multivariate import _PSD, _lnB
 from wafo.stats import multivariate_normal
 from wafo.stats import dirichlet, beta
 from wafo.stats import norm
 from scipy.integrate import romb
 def test_input_shape():
    mu = np.arange(3)
    cov = np.identity(2)
    assert_raises(ValueError, multivariate_normal.pdf, (0, 1), mu, cov)
    assert_raises(ValueError, multivariate_normal.pdf, (0, 1, 2), mu, cov)
 def test_scalar_values():
    np.random.seed(1234)
    # When evaluated on scalar data, the pdf should return a scalar
    x, mean, cov = 1.5, 1.7, 2.5
    pdf = multivariate_normal.pdf(x, mean, cov)
    assert_equal(pdf.ndim, 0)
    # When evaluated on a single vector, the pdf should return a scalar
    x = np.random.randn(5)
    mean = np.random.randn(5)
    cov = np.abs(np.random.randn(5))  # Diagonal values for cov. matrix
    pdf = multivariate_normal.pdf(x, mean, cov)
    assert_equal(pdf.ndim, 0)
 def test_logpdf():
    # Check that the log of the pdf is in fact the logpdf
    np.random.seed(1234)
    x = np.random.randn(5)
    mean = np.random.randn(5)
    cov = np.abs(np.random.randn(5))
    d1 = multivariate_normal.logpdf(x, mean, cov)
    d2 = multivariate_normal.pdf(x, mean, cov)
    assert_allclose(d1, np.log(d2))
 def test_rank():
    # Check that the rank is detected correctly.
    np.random.seed(1234)
    n = 4
    mean = np.random.randn(n)
    for expected_rank in range(1, n + 1):
        s = np.random.randn(n, expected_rank)
        cov = np.dot(s, s.T)
        distn = multivariate_normal(mean, cov, allow_singular=True)
        assert_equal(distn.cov_info.rank, expected_rank)
 def _sample_orthonormal_matrix(n):
    M = np.random.randn(n, n)
    u, s, v = scipy.linalg.svd(M)
    return u
 def test_degenerate_distributions():
    for n in range(1, 5):
        x = np.random.randn(n)
        for k in range(1, n + 1):
            # Sample a small covariance matrix.
            s = np.random.randn(k, k)
            cov_kk = np.dot(s, s.T)
            # Embed the small covariance matrix into a larger low rank matrix.
            cov_nn = np.zeros((n, n))
            cov_nn[:k, :k] = cov_kk
            # Define a rotation of the larger low rank matrix.
            u = _sample_orthonormal_matrix(n)
            cov_rr = np.dot(u, np.dot(cov_nn, u.T))
            y = np.dot(u, x)
            # Check some identities.
            distn_kk = multivariate_normal(np.zeros(k), cov_kk,
                                           allow_singular=True)
            distn_nn = multivariate_normal(np.zeros(n), cov_nn,
                                           allow_singular=True)
            distn_rr = multivariate_normal(np.zeros(n), cov_rr,
                                           allow_singular=True)
            assert_equal(distn_kk.cov_info.rank, k)
            assert_equal(distn_nn.cov_info.rank, k)
            assert_equal(distn_rr.cov_info.rank, k)
            pdf_kk = distn_kk.pdf(x[:k])
            pdf_nn = distn_nn.pdf(x)
            pdf_rr = distn_rr.pdf(y)
            assert_allclose(pdf_kk, pdf_nn)
            assert_allclose(pdf_kk, pdf_rr)
            logpdf_kk = distn_kk.logpdf(x[:k])
            logpdf_nn = distn_nn.logpdf(x)
            logpdf_rr = distn_rr.logpdf(y)
            assert_allclose(logpdf_kk, logpdf_nn)
            assert_allclose(logpdf_kk, logpdf_rr)
 def test_large_pseudo_determinant():
    # Check that large pseudo-determinants are handled appropriately.
    # Construct a singular diagonal covariance matrix
    # whose pseudo determinant overflows double precision.
    large_total_log = 1000.0
    npos = 100
    nzero = 2
    large_entry = np.exp(large_total_log / npos)
    n = npos + nzero
    cov = np.zeros((n, n), dtype=float)
    np.fill_diagonal(cov, large_entry)
    cov[-nzero:, -nzero:] = 0
    # Check some determinants.
    assert_equal(scipy.linalg.det(cov), 0)
    assert_equal(scipy.linalg.det(cov[:npos, :npos]), np.inf)
    # np.linalg.slogdet is only available in numpy 1.6+
    # but scipy currently supports numpy 1.5.1.
    # assert_allclose(np.linalg.slogdet(cov[:npos, :npos]),
    #                 (1, large_total_log))
    # Check the pseudo-determinant.
    psd = _PSD(cov)
    assert_allclose(psd.log_pdet, large_total_log)
 def test_broadcasting():
    np.random.seed(1234)
    n = 4
    # Construct a random covariance matrix.
    data = np.random.randn(n, n)
    cov = np.dot(data, data.T)
    mean = np.random.randn(n)
    # Construct an ndarray which can be interpreted as
    # a 2x3 array whose elements are random data vectors.
    X = np.random.randn(2, 3, n)
    # Check that multiple data points can be evaluated at once.
    for i in range(2):
        for j in range(3):
            actual = multivariate_normal.pdf(X[i, j], mean, cov)
            desired = multivariate_normal.pdf(X, mean, cov)[i, j]
            assert_allclose(actual, desired)
 def test_normal_1D():
    # The probability density function for a 1D normal variable should
    # agree with the standard normal distribution in scipy.stats.distributions
    x = np.linspace(0, 2, 10)
    mean, cov = 1.2, 0.9
    scale = cov**0.5
    d1 = norm.pdf(x, mean, scale)
    d2 = multivariate_normal.pdf(x, mean, cov)
    assert_allclose(d1, d2)
 def test_marginalization():
    # Integrating out one of the variables of a 2D Gaussian should
    # yield a 1D Gaussian
    mean = np.array([2.5, 3.5])
    cov = np.array([[.5, 0.2], [0.2, .6]])
    n = 2 ** 8 + 1  # Number of samples
    delta = 6 / (n - 1)  # Grid spacing
    v = np.linspace(0, 6, n)
    xv, yv = np.meshgrid(v, v)
    pos = np.empty((n, n, 2))
    pos[:, :, 0] = xv
    pos[:, :, 1] = yv
    pdf = multivariate_normal.pdf(pos, mean, cov)
    # Marginalize over x and y axis
    margin_x = romb(pdf, delta, axis=0)
    margin_y = romb(pdf, delta, axis=1)
    # Compare with standard normal distribution
    gauss_x = norm.pdf(v, loc=mean[0], scale=cov[0, 0] ** 0.5)
    gauss_y = norm.pdf(v, loc=mean[1], scale=cov[1, 1] ** 0.5)
    assert_allclose(margin_x, gauss_x, rtol=1e-2, atol=1e-2)
    assert_allclose(margin_y, gauss_y, rtol=1e-2, atol=1e-2)
 def test_frozen():
    # The frozen distribution should agree with the regular one
    np.random.seed(1234)
    x = np.random.randn(5)
    mean = np.random.randn(5)
    cov = np.abs(np.random.randn(5))
    norm_frozen = multivariate_normal(mean, cov)
    assert_allclose(norm_frozen.pdf(x), multivariate_normal.pdf(x, mean, cov))
    assert_allclose(norm_frozen.logpdf(x),
                    multivariate_normal.logpdf(x, mean, cov))
 def test_pseudodet_pinv():
    # Make sure that pseudo-inverse and pseudo-det agree on cutoff
    # Assemble random covariance matrix with large and small eigenvalues
    np.random.seed(1234)
    n = 7
    x = np.random.randn(n, n)
    cov = np.dot(x, x.T)
    s, u = scipy.linalg.eigh(cov)
    s = 0.5 * np.ones(n)
    s[0] = 1.0
    s[-1] = 1e-7
    cov = np.dot(u, np.dot(np.diag(s), u.T))
    # Set cond so that the lowest eigenvalue is below the cutoff
    cond = 1e-5
    psd = _PSD(cov, cond=cond)
    psd_pinv = _PSD(psd.pinv, cond=cond)
    # Check that the log pseudo-determinant agrees with the sum
    # of the logs of all but the smallest eigenvalue
    assert_allclose(psd.log_pdet, np.sum(np.log(s[:-1])))
    # Check that the pseudo-determinant of the pseudo-inverse
    # agrees with 1 / pseudo-determinant
    assert_allclose(-psd.log_pdet, psd_pinv.log_pdet)
 def test_exception_nonsquare_cov():
    cov = [[1, 2, 3], [4, 5, 6]]
    assert_raises(ValueError, _PSD, cov)
 def test_exception_nonfinite_cov():
    cov_nan = [[1, 0], [0, np.nan]]
    assert_raises(ValueError, _PSD, cov_nan)
    cov_inf = [[1, 0], [0, np.inf]]
    assert_raises(ValueError, _PSD, cov_inf)
 def test_exception_non_psd_cov():
    cov = [[1, 0], [0, -1]]
    assert_raises(ValueError, _PSD, cov)
 def test_exception_singular_cov():
    np.random.seed(1234)
    x = np.random.randn(5)
    mean = np.random.randn(5)
    cov = np.ones((5, 5))
    e = np.linalg.LinAlgError
    assert_raises(e, multivariate_normal, mean, cov)
    assert_raises(e, multivariate_normal.pdf, x, mean, cov)
    assert_raises(e, multivariate_normal.logpdf, x, mean, cov)
 def test_R_values():
    # Compare the multivariate pdf with some values precomputed
    # in R version 3.0.1 (2013-05-16) on Mac OS X 10.6.
    # The values below were generated by the following R-script:
    # > library(mnormt)
    # > x <- seq(0, 2, length=5)
    # > y <- 3*x - 2
    # > z <- x + cos(y)
    # > mu <- c(1, 3, 2)
    # > Sigma <- matrix(c(1,2,0,2,5,0.5,0,0.5,3), 3, 3)
    # > r_pdf <- dmnorm(cbind(x,y,z), mu, Sigma)
    r_pdf = np.array([0.0002214706, 0.0013819953, 0.0049138692,
                      0.0103803050, 0.0140250800])
    x = np.linspace(0, 2, 5)
    y = 3 * x - 2
    z = x + np.cos(y)
    r = np.array([x, y, z]).T
    mean = np.array([1, 3, 2], 'd')
    cov = np.array([[1, 2, 0], [2, 5, .5], [0, .5, 3]], 'd')
    pdf = multivariate_normal.pdf(r, mean, cov)
    assert_allclose(pdf, r_pdf, atol=1e-10)
 def test_multivariate_normal_rvs_zero_covariance():
    mean = np.zeros(2)
    covariance = np.zeros((2, 2))
    model = multivariate_normal(mean, covariance, allow_singular=True)
    sample = model.rvs()
    assert_equal(sample, [0, 0])
 def test_rvs_shape():
    # Check that rvs parses the mean and covariance correctly, and returns
    # an array of the right shape
    N = 300
    d = 4
    sample = multivariate_normal.rvs(mean=np.zeros(d), cov=1, size=N)
    assert_equal(sample.shape, (N, d))
    sample = multivariate_normal.rvs(mean=None,
                                     cov=np.array([[2, .1], [.1, 1]]),
                                     size=N)
    assert_equal(sample.shape, (N, 2))
    u = multivariate_normal(mean=0, cov=1)
    sample = u.rvs(N)
    assert_equal(sample.shape, (N, ))
 def test_large_sample():
    # Generate large sample and compare sample mean and sample covariance
    # with mean and covariance matrix.
    np.random.seed(2846)
    n = 3
    mean = np.random.randn(n)
    M = np.random.randn(n, n)
    cov = np.dot(M, M.T)
    size = 5000
    sample = multivariate_normal.rvs(mean, cov, size)
    assert_allclose(numpy.cov(sample.T), cov, rtol=1e-1)
    assert_allclose(sample.mean(0), mean, rtol=1e-1)
 def test_entropy():
    np.random.seed(2846)
    n = 3
    mean = np.random.randn(n)
    M = np.random.randn(n, n)
    cov = np.dot(M, M.T)
    rv = multivariate_normal(mean, cov)
    # Check that frozen distribution agrees with entropy function
    assert_almost_equal(rv.entropy(), multivariate_normal.entropy(mean, cov))
    # Compare entropy with manually computed expression involving
    # the sum of the logs of the eigenvalues of the covariance matrix
    eigs = np.linalg.eig(cov)[0]
    desired = 1 / 2 * (n * (np.log(2 * np.pi) + 1) + np.sum(np.log(eigs)))
    assert_almost_equal(desired, rv.entropy())
 def test_lnB():
    alpha = np.array([1, 1, 1])
    desired = .5  # e^lnB = 1/2 for [1, 1, 1]
    assert_almost_equal(np.exp(_lnB(alpha)), desired)
 def test_frozen_dirichlet():
    np.random.seed(2846)
    n = np.random.randint(1, 32)
    alpha = np.random.uniform(10e-10, 100, n)
    d = dirichlet(alpha)
    assert_equal(d.var(), dirichlet.var(alpha))
    assert_equal(d.mean(), dirichlet.mean(alpha))
    assert_equal(d.entropy(), dirichlet.entropy(alpha))
    num_tests = 10
    for i in range(num_tests):
        x = np.random.uniform(10e-10, 100, n)
        x /= np.sum(x)
        assert_equal(d.pdf(x[:-1]), dirichlet.pdf(x[:-1], alpha))
        assert_equal(d.logpdf(x[:-1]), dirichlet.logpdf(x[:-1], alpha))
 def test_simple_values():
    alpha = np.array([1, 1])
    d = dirichlet(alpha)
    assert_almost_equal(d.mean(), 0.5)
    assert_almost_equal(d.var(), 1. / 12.)
    b = beta(1, 1)
    assert_almost_equal(d.mean(), b.mean())
    assert_almost_equal(d.var(), b.var())
 def test_K_and_K_minus_1_calls_equal():
    # Test that calls with K and K-1 entries yield the same results.
    np.random.seed(2846)
    n = np.random.randint(1, 32)
    alpha = np.random.uniform(10e-10, 100, n)
    d = dirichlet(alpha)
    num_tests = 10
    for i in range(num_tests):
        x = np.random.uniform(10e-10, 100, n)
        x /= np.sum(x)
        assert_almost_equal(d.pdf(x[:-1]), d.pdf(x))
 def test_multiple_entry_calls():
    # Test that calls with multiple x vectors as matrix work
    np.random.seed(2846)
    n = np.random.randint(1, 32)
    alpha = np.random.uniform(10e-10, 100, n)
    d = dirichlet(alpha)
    num_tests = 10
    num_multiple = 5
    xm = None
    for i in range(num_tests):
        for m in range(num_multiple):
            x = np.random.uniform(10e-10, 100, n)
            x /= np.sum(x)
            if xm is not None:
                xm = np.vstack((xm, x))
            else:
                xm = x
        rm = d.pdf(xm.T)
        rs = None
        for xs in xm:
            r = d.pdf(xs)
            if rs is not None:
                rs = np.append(rs, r)
            else:
                rs = r
        assert_array_almost_equal(rm, rs)
 def test_2D_dirichlet_is_beta():
    np.random.seed(2846)
    alpha = np.random.uniform(10e-10, 100, 2)
    d = dirichlet(alpha)
    b = beta(alpha[0], alpha[1])
    num_tests = 10
    for i in range(num_tests):
        x = np.random.uniform(10e-10, 100, 2)
        x /= np.sum(x)
        assert_almost_equal(b.pdf(x), d.pdf([x]))
    assert_almost_equal(b.mean(), d.mean()[0])
    assert_almost_equal(b.var(), d.var()[0])
 def test_dimensions_mismatch():
    # Regression test for GH #3493. Check that setting up a PDF with a mean of
    # length M and a covariance matrix of size (N, N), where M != N, raises a
    # ValueError with an informative error message.
    mu = np.array([0.0, 0.0])
    sigma = np.array([[1.0]])
    assert_raises(ValueError, multivariate_normal, mu, sigma)
    # A simple check that the right error message was passed along. Checking
    # that the entire message is there, word for word, would be somewhat
    # fragile, so we just check for the leading part.
    try:
        multivariate_normal(mu, sigma)
    except ValueError as e:
        msg = "Dimension mismatch"
        assert_equal(str(e)[:len(msg)], msg)
 if __name__ == "__main__":
    run_module_suite()
--- a/wafo/stats/tests/test_rank.py
+++ b/wafo/stats/tests/test_rank.py
@ -1,193 +0,0 @@
 from __future__ import division, print_function, absolute_import
 import numpy as np
 from numpy.testing import TestCase, run_module_suite, assert_equal, \
    assert_array_equal
 from wafo.stats import rankdata, tiecorrect
 class TestTieCorrect(TestCase):
    def test_empty(self):
        """An empty array requires no correction, should return 1.0."""
        ranks = np.array([], dtype=np.float64)
        c = tiecorrect(ranks)
        assert_equal(c, 1.0)
    def test_one(self):
        """A single element requires no correction, should return 1.0."""
        ranks = np.array([1.0], dtype=np.float64)
        c = tiecorrect(ranks)
        assert_equal(c, 1.0)
    def test_no_correction(self):
        """Arrays with no ties require no correction."""
        ranks = np.arange(2.0)
        c = tiecorrect(ranks)
        assert_equal(c, 1.0)
        ranks = np.arange(3.0)
        c = tiecorrect(ranks)
        assert_equal(c, 1.0)
    def test_basic(self):
        """Check a few basic examples of the tie correction factor."""
        # One tie of two elements
        ranks = np.array([1.0, 2.5, 2.5])
        c = tiecorrect(ranks)
        T = 2.0
        N = ranks.size
        expected = 1.0 - (T**3 - T) / (N**3 - N)
        assert_equal(c, expected)
        # One tie of two elements (same as above, but tie is not at the end)
        ranks = np.array([1.5, 1.5, 3.0])
        c = tiecorrect(ranks)
        T = 2.0
        N = ranks.size
        expected = 1.0 - (T**3 - T) / (N**3 - N)
        assert_equal(c, expected)
        # One tie of three elements
        ranks = np.array([1.0, 3.0, 3.0, 3.0])
        c = tiecorrect(ranks)
        T = 3.0
        N = ranks.size
        expected = 1.0 - (T**3 - T) / (N**3 - N)
        assert_equal(c, expected)
        # Two ties, lengths 2 and 3.
        ranks = np.array([1.5, 1.5, 4.0, 4.0, 4.0])
        c = tiecorrect(ranks)
        T1 = 2.0
        T2 = 3.0
        N = ranks.size
        expected = 1.0 - ((T1**3 - T1) + (T2**3 - T2)) / (N**3 - N)
        assert_equal(c, expected)
 class TestRankData(TestCase):
    def test_empty(self):
        """stats.rankdata([]) should return an empty array."""
        a = np.array([], dtype=np.int)
        r = rankdata(a)
        assert_array_equal(r, np.array([], dtype=np.float64))
        r = rankdata([])
        assert_array_equal(r, np.array([], dtype=np.float64))
    def test_one(self):
        """Check stats.rankdata with an array of length 1."""
        data = [100]
        a = np.array(data, dtype=np.int)
        r = rankdata(a)
        assert_array_equal(r, np.array([1.0], dtype=np.float64))
        r = rankdata(data)
        assert_array_equal(r, np.array([1.0], dtype=np.float64))
    def test_basic(self):
        """Basic tests of stats.rankdata."""
        data = [100, 10, 50]
        expected = np.array([3.0, 1.0, 2.0], dtype=np.float64)
        a = np.array(data, dtype=np.int)
        r = rankdata(a)
        assert_array_equal(r, expected)
        r = rankdata(data)
        assert_array_equal(r, expected)
        data = [40, 10, 30, 10, 50]
        expected = np.array([4.0, 1.5, 3.0, 1.5, 5.0], dtype=np.float64)
        a = np.array(data, dtype=np.int)
        r = rankdata(a)
        assert_array_equal(r, expected)
        r = rankdata(data)
        assert_array_equal(r, expected)
        data = [20, 20, 20, 10, 10, 10]
        expected = np.array([5.0, 5.0, 5.0, 2.0, 2.0, 2.0], dtype=np.float64)
        a = np.array(data, dtype=np.int)
        r = rankdata(a)
        assert_array_equal(r, expected)
        r = rankdata(data)
        assert_array_equal(r, expected)
        # The docstring states explicitly that the argument is flattened.
        a2d = a.reshape(2, 3)
        r = rankdata(a2d)
        assert_array_equal(r, expected)
    def test_large_int(self):
        data = np.array([2**60, 2**60+1], dtype=np.uint64)
        r = rankdata(data)
        assert_array_equal(r, [1.0, 2.0])
        data = np.array([2**60, 2**60+1], dtype=np.int64)
        r = rankdata(data)
        assert_array_equal(r, [1.0, 2.0])
        data = np.array([2**60, -2**60+1], dtype=np.int64)
        r = rankdata(data)
        assert_array_equal(r, [2.0, 1.0])
    def test_big_tie(self):
        for n in [10000, 100000, 1000000]:
            data = np.ones(n, dtype=int)
            r = rankdata(data)
            expected_rank = 0.5 * (n + 1)
            assert_array_equal(r, expected_rank * data,
                               "test failed with n=%d" % n)
 _cases = (
    # values, method, expected
    ([], 'average', []),
    ([], 'min', []),
    ([], 'max', []),
    ([], 'dense', []),
    ([], 'ordinal', []),
    #
    ([100], 'average', [1.0]),
    ([100], 'min', [1.0]),
    ([100], 'max', [1.0]),
    ([100], 'dense', [1.0]),
    ([100], 'ordinal', [1.0]),
    #
    ([100, 100, 100], 'average', [2.0, 2.0, 2.0]),
    ([100, 100, 100], 'min', [1.0, 1.0, 1.0]),
    ([100, 100, 100], 'max', [3.0, 3.0, 3.0]),
    ([100, 100, 100], 'dense', [1.0, 1.0, 1.0]),
    ([100, 100, 100], 'ordinal', [1.0, 2.0, 3.0]),
    #
    ([100, 300, 200], 'average', [1.0, 3.0, 2.0]),
    ([100, 300, 200], 'min', [1.0, 3.0, 2.0]),
    ([100, 300, 200], 'max', [1.0, 3.0, 2.0]),
    ([100, 300, 200], 'dense', [1.0, 3.0, 2.0]),
    ([100, 300, 200], 'ordinal', [1.0, 3.0, 2.0]),
    #
    ([100, 200, 300, 200], 'average', [1.0, 2.5, 4.0, 2.5]),
    ([100, 200, 300, 200], 'min', [1.0, 2.0, 4.0, 2.0]),
    ([100, 200, 300, 200], 'max', [1.0, 3.0, 4.0, 3.0]),
    ([100, 200, 300, 200], 'dense', [1.0, 2.0, 3.0, 2.0]),
    ([100, 200, 300, 200], 'ordinal', [1.0, 2.0, 4.0, 3.0]),
    #
    ([100, 200, 300, 200, 100], 'average', [1.5, 3.5, 5.0, 3.5, 1.5]),
    ([100, 200, 300, 200, 100], 'min', [1.0, 3.0, 5.0, 3.0, 1.0]),
    ([100, 200, 300, 200, 100], 'max', [2.0, 4.0, 5.0, 4.0, 2.0]),
    ([100, 200, 300, 200, 100], 'dense', [1.0, 2.0, 3.0, 2.0, 1.0]),
    ([100, 200, 300, 200, 100], 'ordinal', [1.0, 3.0, 5.0, 4.0, 2.0]),
    #
    ([10] * 30, 'ordinal', np.arange(1.0, 31.0)),
 )
 def test_cases():
    def check_case(values, method, expected):
        r = rankdata(values, method=method)
        assert_array_equal(r, expected)
    for values, method, expected in _cases:
        yield check_case, values, method, expected
 if __name__ == "__main__":
    run_module_suite()
--- a/wafo/stats/tests/test_stats.py
+++ b/wafo/stats/tests/test_stats.py
--- a/wafo/stats/tests/test_tukeylambda_stats.py
+++ b/wafo/stats/tests/test_tukeylambda_stats.py
@ -1,91 +0,0 @@
 from __future__ import division, print_function, absolute_import
 import numpy as np
 from numpy.testing import assert_allclose, assert_equal, run_module_suite
 from scipy.stats._tukeylambda_stats import tukeylambda_variance, \
                                            tukeylambda_kurtosis
 def test_tukeylambda_stats_known_exact():
    """Compare results with some known exact formulas."""
    # Some exact values of the Tukey Lambda variance and kurtosis:
    # lambda   var      kurtosis
    #   0     pi**2/3     6/5     (logistic distribution)
    #  0.5    4 - pi    (5/3 - pi/2)/(pi/4 - 1)**2 - 3
    #   1      1/3       -6/5     (uniform distribution on (-1,1))
    #   2      1/12      -6/5     (uniform distribution on (-1/2, 1/2))
    # lambda = 0
    var = tukeylambda_variance(0)
    assert_allclose(var, np.pi**2 / 3, atol=1e-12)
    kurt = tukeylambda_kurtosis(0)
    assert_allclose(kurt, 1.2, atol=1e-10)
    # lambda = 0.5
    var = tukeylambda_variance(0.5)
    assert_allclose(var, 4 - np.pi, atol=1e-12)
    kurt = tukeylambda_kurtosis(0.5)
    desired = (5./3 - np.pi/2) / (np.pi/4 - 1)**2 - 3
    assert_allclose(kurt, desired, atol=1e-10)
    # lambda = 1
    var = tukeylambda_variance(1)
    assert_allclose(var, 1.0 / 3, atol=1e-12)
    kurt = tukeylambda_kurtosis(1)
    assert_allclose(kurt, -1.2, atol=1e-10)
    # lambda = 2
    var = tukeylambda_variance(2)
    assert_allclose(var, 1.0 / 12, atol=1e-12)
    kurt = tukeylambda_kurtosis(2)
    assert_allclose(kurt, -1.2, atol=1e-10)
 def test_tukeylambda_stats_mpmath():
    """Compare results with some values that were computed using mpmath."""
    a10 = dict(atol=1e-10, rtol=0)
    a12 = dict(atol=1e-12, rtol=0)
    data = [
        # lambda        variance              kurtosis
        [-0.1, 4.78050217874253547, 3.78559520346454510],
        [-0.0649, 4.16428023599895777, 2.52019675947435718],
        [-0.05, 3.93672267890775277, 2.13129793057777277],
        [-0.001, 3.30128380390964882, 1.21452460083542988],
        [0.001, 3.27850775649572176, 1.18560634779287585],
        [0.03125, 2.95927803254615800, 0.804487555161819980],
        [0.05, 2.78281053405464501, 0.611604043886644327],
        [0.0649, 2.65282386754100551, 0.476834119532774540],
        [1.2, 0.242153920578588346, -1.23428047169049726],
        [10.0, 0.00095237579757703597, 2.37810697355144933],
        [20.0, 0.00012195121951131043, 7.37654321002709531],
    ]
    for lam, var_expected, kurt_expected in data:
        var = tukeylambda_variance(lam)
        assert_allclose(var, var_expected, **a12)
        kurt = tukeylambda_kurtosis(lam)
        assert_allclose(kurt, kurt_expected, **a10)
    # Test with vector arguments (most of the other tests are for single
    # values).
    lam, var_expected, kurt_expected = zip(*data)
    var = tukeylambda_variance(lam)
    assert_allclose(var, var_expected, **a12)
    kurt = tukeylambda_kurtosis(lam)
    assert_allclose(kurt, kurt_expected, **a10)
 def test_tukeylambda_stats_invalid():
    """Test values of lambda outside the domains of the functions."""
    lam = [-1.0, -0.5]
    var = tukeylambda_variance(lam)
    assert_equal(var, np.array([np.nan, np.inf]))
    lam = [-1.0, -0.25]
    kurt = tukeylambda_kurtosis(lam)
    assert_equal(kurt, np.array([np.nan, np.inf]))
 if __name__ == "__main__":
    run_module_suite()
--- a/wafo/stats/twolumps.py
+++ b/wafo/stats/twolumps.py
@ -1,412 +0,0 @@
 """
 Commentary
 ----------
 Most of the work is done by the scipy.stats.distributions module.
 This provides a plethora of continuous distributions to play with.
 Each distribution has functions to generate random deviates, pdf's,
 cdf's etc. as well as a function to fit the distribution to some given
 data.
 The fitting uses scipy.optimize.fmin to minimise the log odds of the
 data given the distribution.
 There are a couple of problems with this approach.  First it is
 sensitive to the initial guess at the parameters.  Second it can be a
 little slow.
 Two key parameters are the 'loc' and 'scale' parameters.  Data is
 shifted by 'loc' and scaled by scale prior to fitting.  Supplying
 appropriate values for these parameters is important to getting a good
 fit.
 See the factory() function which picks from a handful of common
 approaches for each distribution.
 For some distributions (eg normal) it really makes sense just to
 calculate the parameters directly from the data.
 The code in the __ifmain__ should be a good guide how to use this.
 Simply:
      get a QuickFit object
      add the distributions you want to try to fit
      call fit() with your data
      call fit_stats() to generate some stats on the fit.
      call plot() if you want to see a plot.
 Named after Mrs Twolumps, minister's secretary in the silly walks
 sketch, who brings in coffee with a full silly walk.
 Tenuous link with curve fitting is that you generally see "two lumps"
 one in your data and the other in the curve that is being fitted.
 Or alternately, if your data is not too silly then you can fit a
 curve to it.
 License is GNU LGPL v3, see https://launchpad.net/twolumps
 """
 import inspect
 from itertools import izip
 import numpy
 from wafo import stats
 from scipy import mean, std
 def factory(name):
    """ Factory to return appropriate objects for each distro. """
    fitters = dict(
        beta=ZeroOneScipyDistribution,
        alpha=ZeroOneScipyDistribution,
        ncf=ZeroOneScipyDistribution,
        triang=ZeroOneScipyDistribution,
        uniform=ZeroOneScipyDistribution,
        powerlaw=ZeroOneScipyDistribution,
        pareto=MinLocScipyDistribution,
        expon=MinLocScipyDistribution,
        gamma=MinLocScipyDistribution,
        lognorm=MinLocScipyDistribution,
        maxwell=MinLocScipyDistribution,
        weibull_min=MinLocScipyDistribution,
        weibull_max=MaxLocScipyDistribution)
    return fitters.get(name, ScipyDistribution)(name)
 def get_continuous_distros():
    """ Find all attributes of stats that are continuous distributions. """
    fitters = []
    skip = set()
    for name, item in inspect.getmembers(stats):
        if name in skip: continue
        if item is stats.rv_continuous: continue
        if isinstance(item, stats.rv_continuous):
            fitters.append([name, factory(name)])
    return fitters
 class ScipyDistribution(object):
    def __init__(self, name):
        self.name = name
        self.distro = self.get_distro()
        self.fitted = None
    def __getattr__(self, attr):
        """ Try delegating to the distro object """
        return getattr(self.distro, attr)
    def get_distro(self):
        return getattr(stats, self.name)
    def set_distro(self, parms):
        self.distro = getattr(stats, self.name)(*parms)
        return self.distro
    def calculate_loc_and_scale(self, data):
        """ Calculate loc and scale parameters for fit.
        Depending on the distribution, these need to be approximately
        right to get a good fit.
        """
        return mean(data), std(data)
    def fit(self, data, *args, **kwargs):
        """ This needs some work.
        Seems the various scipy distributions do a reasonable job if given a good hint.
        Need to get distro specific hints.
        """
        fits = []
        # try with and without providing loc and scale hints
        # increases chance of a fit without an exception being
        # generated.
        for (loc, scale) in ((0.0, 1.0),
                             self.calculate_loc_and_scale(data)):
            try:
                parms = self.get_distro().fit(data, loc=loc, scale=scale)
                self.set_distro(list(parms))
                expected = self.expected(data)
                rss = ((expected-data)**2).sum()
                fits.append([rss, list(parms)])
                parms = self.get_distro().fit(data, floc=loc, scale=scale)
                self.set_distro(list(parms))
                expected = self.expected(data)
                rss = ((expected-data)**2).sum()
                fits.append([rss, list(parms)])
            except:
                pass
        # no fits means all tries raised exceptions
        if not fits:
            raise Exception("Exception in fit()")
        # pick the one with the smallest rss
        fits.sort()
        self.parms = fits[0][1]
        print self.parms
        return self.set_distro(list(self.parms))
    def expected(self, data):
        """ Calculate expected values at each data point """
        if self.fitted is not None:
            return self.fitted
        n = len(data)
        xx = numpy.linspace(0, 1, n + 2)[1:-1]
        self.fitted = self.ppf(xx)
        #self.fitted = [self.ppf(x) for x in xx]
        return self.fitted
    def fit_stats(self, data):
        """ Return stats on the fits
        data assumed to be sorted.
        """
        n = len(data)
        dvar = numpy.var(data)
        expected = self.expected(data)
        evar = numpy.var(expected)
        rss = 0.0
        for expect, obs in izip(expected, data):
            rss += (obs-expect) ** 2.0
        self.rss = rss
        self.dss = dvar * n
        self.fss = evar * n
    def residuals(self, data):
        """ Return residuals """
        expected = self.expected(data)
        return numpy.array(data) - numpy.array(expected)
 class MinLocScipyDistribution(ScipyDistribution):
    def calculate_loc_and_scale(self, data):
        """ Set loc to min value in the data.
        Useful for weibull_min
        """
        return min(data), std(data)
 class MaxLocScipyDistribution(ScipyDistribution):
    def calculate_loc_and_scale(self, data):
        """ Set loc to max value in the data.
        Useful for weibull_max
        """
        return max(data), std(data)
 class ZeroOneScipyDistribution(ScipyDistribution):
    def calculate_loc_and_scale(self, data):
        """ Set loc and scale to move to [0, 1] interval.
        Useful for beta distribution
        """
        return min(data), max(data)-min(data)
 class QuickFit(object):
    """ Fit a family of distributions.
    Calculates stats on each fit.
    Option to create plots.
    """
    def __init__(self):
        self.distributions = []
    def add_distribution(self, distribution):
        """ Add a ready-prepared ScipyDistribution """
        self.distributions.append(distribution)
    def add(self, name):
        """ Add a distribution by name. """
        self.distributions.append(factory(name))
    def fit(self, data):
        """ Fit all of the distros we have """
        fitted = []
        for distro in self.distributions:
            print 'fitting distro', distro.name
            try:
                distro.fit(data)
            except:
                continue
            fitted.append(distro)
        self.distributions = fitted
        print 'finished fitting'
    def stats(self, data):
        """ Return stats on the fits """
        for dd in self.distributions:
            dd.fit_stats(data)
    def get_topn(self, n):
        """ Return top-n best fits. """
        data = [[x.rss, x] for x in self.distributions if numpy.isfinite(x.rss)]
        data.sort()
        if not n:
            n = len(data)
        return [x[1] for x in data[:n]]
    def fit_plot(self, data, topn=0, bins=20):
        """ Create a plot. """
        from matplotlib import pylab as pl
        distros = self.get_topn(topn)
        xx = numpy.linspace(data.min(), data.max(), 300)
        table = []
        nparms = max(len(x.parms) for x in distros)
        tcolours = []
        for dd in distros:
            patch = pl.plot(xx, [dd.pdf(p) for p in xx], label='%10.2f%% %s' % (100.0*dd.rss/dd.dss, dd.name))
            row = ['', dd.name, '%10.2f%%' % (100.0*dd.rss/dd.dss,)] + ['%0.2f' % x for x in dd.parms]
            while len(row) < 3 + nparms:
                row.append('')
            table.append(row)
            tcolours.append([patch[0].get_markerfacecolor()] + ['w'] * (2+nparms))
        # add a historgram with the data
        pl.hist(data, bins=bins, normed=True)
        tab = pl.table(cellText=table, cellColours=tcolours,
                       colLabels=['', 'Distribution', 'Res. SS/Data SS'] + ['P%d' % (x + 1,) for x in range(nparms)],
                       bbox=(0.0, 1.0, 1.0, 0.3))
                 #loc='top'))
        #pl.legend(loc=0)
        tab.auto_set_font_size(False)
        tab.set_fontsize(10.)
    def residual_plot(self, data, topn=0):
        """ Create a residual plot. """
        from matplotlib import pylab as pl
        distros = self.get_topn(topn)
        n = len(data)
        xx = numpy.linspace(0, 1, n + 2)[1:-1]
        for dd in distros:
            pl.plot(xx, dd.residuals(data), label='%10.2f%% %s' % (100.0*dd.rss/dd.dss, dd.name))
        pl.grid(True)
    def plot(self, data, topn):
        """ Plot data fit and residuals """
        from matplotlib import pylab as pl
        pl.axes([0.1, 0.4, 0.8, 0.4])   # leave room above the axes for the table
        self.fit_plot(data, topn=topn)
        pl.axes([0.1, 0.05, 0.8, 0.3]) 
        self.residual_plot(data, topn=topn)
 def read_data(infile, field):
    """ Simple utility to extract a field out of a csv file. """
    import csv
    reader = csv.reader(infile)
    header = reader.next()
    field = header.index(field)
    data = []
    for row in reader:
        data.append(float(row[field]))
    return data
 if __name__ == '__main__':
    import sys
    import optparse
    from matplotlib import pylab as pl
    parser = optparse.OptionParser()
    parser.add_option('-d', '--distro', action='append', default=[])
    parser.add_option('-l', '--list', action='store_true',
                      help='List available distros')
    parser.add_option('-i', '--infile')
    parser.add_option('-f', '--field', default='P/L')
    parser.add_option('-n', '--topn', type='int', default=0)
    parser.add_option('-s', '--sample', default='normal',
                      help='generate a sample from this distro as a test')
    parser.add_option('--size', type='int', default=1000,
                      help='Size of sample to generate')
    opts, args = parser.parse_args()
    if opts.list:
        for name, distro in get_continuous_distros():
            print name
        sys.exit()
    opts.distro = ['weibull_min', 'norm']
    if not opts.distro:
        opts.distro = [x[0] for x in get_continuous_distros()]
    quickfit = QuickFit()
    for distro in opts.distro:
        quickfit.add(distro)
    if opts.sample:
        data = getattr(numpy.random, opts.sample)(size=opts.size)
    else:
        data = numpy.array(read_data(open(opts.infile), opts.field))
    data.sort()
    quickfit.fit(data)
    print 'doing stats'
    quickfit.stats(data)
    print 'doing plot'
    quickfit.plot(data, topn=opts.topn)
    pl.show()
--- a/wafo/stats/vonmises.py
+++ b/wafo/stats/vonmises.py
@ -1,47 +0,0 @@
 from __future__ import division, print_function, absolute_import
 import numpy as np
 import scipy.stats
 from scipy.special import i0
 def von_mises_cdf_series(k,x,p):
    x = float(x)
    s = np.sin(x)
    c = np.cos(x)
    sn = np.sin(p*x)
    cn = np.cos(p*x)
    R = 0
    V = 0
    for n in range(p-1,0,-1):
        sn, cn = sn*c - cn*s, cn*c + sn*s
        R = 1./(2*n/k + R)
        V = R*(sn/n+V)
    return 0.5+x/(2*np.pi) + V/np.pi
 def von_mises_cdf_normalapprox(k,x,C1):
    b = np.sqrt(2/np.pi)*np.exp(k)/i0(k)
    z = b*np.sin(x/2.)
    return scipy.stats.norm.cdf(z)
 def von_mises_cdf(k,x):
    ix = 2*np.pi*np.round(x/(2*np.pi))
    x = x-ix
    k = float(k)
    # These values should give 12 decimal digits
    CK = 50
    a = [28., 0.5, 100., 5.0]
    C1 = 50.1
    if k < CK:
        p = int(np.ceil(a[0]+a[1]*k-a[2]/(k+a[3])))
        F = np.clip(von_mises_cdf_series(k,x,p),0,1)
    else:
        F = von_mises_cdf_normalapprox(k,x,C1)
    return F+ix
--- a/wafo/stats/vonmises_cython.pyx
+++ b/wafo/stats/vonmises_cython.pyx
@ -1,76 +0,0 @@
 import numpy as np
 import scipy.stats
 from scipy.special import i0
 import numpy.testing
 cimport numpy as np
 cdef extern from "math.h":
    double cos(double theta)
    double sin(double theta)
 cdef double von_mises_cdf_series(double k,double x,unsigned int p):
    cdef double s, c, sn, cn, R, V
    cdef unsigned int n
    s = sin(x)
    c = cos(x)
    sn = sin(p*x)
    cn = cos(p*x)
    R = 0
    V = 0
    for n in range(p-1,0,-1):
        sn, cn = sn*c - cn*s, cn*c + sn*s
        R = 1./(2*n/k + R)
        V = R*(sn/n+V)
    return 0.5+x/(2*np.pi) + V/np.pi
 def von_mises_cdf_normalapprox(k,x,C1):
    b = np.sqrt(2/np.pi)*np.exp(k)/i0(k)
    z = b*np.sin(x/2.)
    C = 24*k
    chi = z - z**3/((C-2*z**2-16)/3.-(z**4+7/4.*z**2+167./2)/(C+C1-z**2+3))**2
    return scipy.stats.norm.cdf(z)
 cimport cython
@cython.boundscheck(False)
 def von_mises_cdf(k,x):
    cdef np.ndarray[double, ndim=1] temp, temp_xs, temp_ks
    cdef unsigned int i, p
    cdef double a1, a2, a3, a4, C1, CK
    #k,x = np.broadcast_arrays(np.asarray(k),np.asarray(x))
    k = np.asarray(k)
    x = np.asarray(x)
    zerodim = k.ndim==0 and x.ndim==0
    k = np.atleast_1d(k)
    x = np.atleast_1d(x)
    ix = np.round(x/(2*np.pi))
    x = x-ix*2*np.pi
    # These values should give 12 decimal digits
    CK=50
    a1, a2, a3, a4 = [28., 0.5, 100., 5.0]
    C1 = 50.1
    bx, bk = np.broadcast_arrays(x,k)
    result = np.empty(bx.shape,dtype=np.float)
    c_small_k = bk<CK
    temp = result[c_small_k]
    temp_xs = bx[c_small_k].astype(np.float)
    temp_ks = bk[c_small_k].astype(np.float)
    for i in range(len(temp)):
        p = <int>(1+a1+a2*temp_ks[i]-a3/(temp_ks[i]+a4))
        temp[i] = von_mises_cdf_series(temp_ks[i],temp_xs[i],p)
        if temp[i]<0:
            temp[i]=0
        elif temp[i]>1:
            temp[i]=1
    result[c_small_k] = temp
    result[~c_small_k] = von_mises_cdf_normalapprox(bk[~c_small_k],bx[~c_small_k],C1)
    if not zerodim:
        return result+ix
    else:
        return (result+ix)[0]