Updated from wafo.stats from scipy.stats

12 years ago · 6b88f2d4cc
parent 5c84825641
commit 6b88f2d4cc
34 changed files with 22617 additions and 12587 deletions
--- a/pywafo/src/wafo/stats/init.py
+++ b/pywafo/src/wafo/stats/init.py
@ -1,12 +1,346 @@
 """
-Statistics package in WAFO Toolbox.
+==========================================
 Statistical functions (:mod:`scipy.stats`)
 ==========================================
-    Readme               - Readme file for module STATS in WAFO Toolbox
+.. module:: scipy.stats
 This module contains a large number of probability distributions as
 well as a growing library of statistical functions.
 Each included distribution is an instance of the class rv_continous:
 For each given name the following methods are available:
 .. autosummary::
   :toctree: generated/
   rv_continuous
   rv_continuous.pdf
   rv_continuous.logpdf
   rv_continuous.cdf
   rv_continuous.logcdf
   rv_continuous.sf
   rv_continuous.logsf
   rv_continuous.ppf
   rv_continuous.isf
   rv_continuous.moment
   rv_continuous.stats
   rv_continuous.entropy
   rv_continuous.fit
   rv_continuous.expect
 Calling the instance as a function returns a frozen pdf whose shape,
 location, and scale parameters are fixed.
 Similarly, each discrete distribution is an instance of the class
 rv_discrete:
 .. autosummary::
   :toctree: generated/
   rv_discrete
   rv_discrete.rvs
   rv_discrete.pmf
   rv_discrete.logpmf
   rv_discrete.cdf
   rv_discrete.logcdf
   rv_discrete.sf
   rv_discrete.logsf
   rv_discrete.ppf
   rv_discrete.isf
   rv_discrete.stats
   rv_discrete.moment
   rv_discrete.entropy
   rv_discrete.expect
 Continuous distributions
 ========================
 .. autosummary::
   :toctree: generated/
   alpha             -- Alpha
   anglit            -- Anglit
   arcsine           -- Arcsine
   beta              -- Beta
   betaprime         -- Beta Prime
   bradford          -- Bradford
   burr              -- Burr
   cauchy            -- Cauchy
   chi               -- Chi
   chi2              -- Chi-squared
   cosine            -- Cosine
   dgamma            -- Double Gamma
   dweibull          -- Double Weibull
   erlang            -- Erlang
   expon             -- Exponential
   exponweib         -- Exponentiated Weibull
   exponpow          -- Exponential Power
   f                 -- F (Snecdor F)
   fatiguelife       -- Fatigue Life (Birnbaum-Sanders)
   fisk              -- Fisk
   foldcauchy        -- Folded Cauchy
   foldnorm          -- Folded Normal
   frechet_r         -- Frechet Right Sided, Extreme Value Type II (Extreme LB) or weibull_min
   frechet_l         -- Frechet Left Sided, Weibull_max
   genlogistic       -- Generalized Logistic
   genpareto         -- Generalized Pareto
   genexpon          -- Generalized Exponential
   genextreme        -- Generalized Extreme Value
   gausshyper        -- Gauss Hypergeometric
   gamma             -- Gamma
   gengamma          -- Generalized gamma
   genhalflogistic   -- Generalized Half Logistic
   gilbrat           -- Gilbrat
   gompertz          -- Gompertz (Truncated Gumbel)
   gumbel_r          -- Right Sided Gumbel, Log-Weibull, Fisher-Tippett, Extreme Value Type I
   gumbel_l          -- Left Sided Gumbel, etc.
   halfcauchy        -- Half Cauchy
   halflogistic      -- Half Logistic
   halfnorm          -- Half Normal
   hypsecant         -- Hyperbolic Secant
   invgamma          -- Inverse Gamma
   invgauss          -- Inverse Gaussian
   invweibull        -- Inverse Weibull
   johnsonsb         -- Johnson SB
   johnsonsu         -- Johnson SU
   ksone             -- Kolmogorov-Smirnov one-sided (no stats)
   kstwobign         -- Kolmogorov-Smirnov two-sided test for Large N (no stats)
   laplace           -- Laplace
   logistic          -- Logistic
   loggamma          -- Log-Gamma
   loglaplace        -- Log-Laplace (Log Double Exponential)
   lognorm           -- Log-Normal
   lomax             -- Lomax (Pareto of the second kind)
   maxwell           -- Maxwell
   mielke            -- Mielke's Beta-Kappa
   nakagami          -- Nakagami
   ncx2              -- Non-central chi-squared
   ncf               -- Non-central F
   nct               -- Non-central Student's T
   norm              -- Normal (Gaussian)
   pareto            -- Pareto
   pearson3          -- Pearson type III
   powerlaw          -- Power-function
   powerlognorm      -- Power log normal
   powernorm         -- Power normal
   rdist             -- R-distribution
   reciprocal        -- Reciprocal
   rayleigh          -- Rayleigh
   rice              -- Rice
   recipinvgauss     -- Reciprocal Inverse Gaussian
   semicircular      -- Semicircular
   t                 -- Student's T
   triang            -- Triangular
   truncexpon        -- Truncated Exponential
   truncnorm         -- Truncated Normal
   tukeylambda       -- Tukey-Lambda
   uniform           -- Uniform
   vonmises          -- Von-Mises (Circular)
   wald              -- Wald
   weibull_min       -- Minimum Weibull (see Frechet)
   weibull_max       -- Maximum Weibull (see Frechet)
   wrapcauchy        -- Wrapped Cauchy
 Multivariate distributions
 ==========================
 .. autosummary::
   :toctree: generated/
   multivariate_normal   -- Multivariate normal distribution
 Discrete distributions
 ======================
 .. autosummary::
   :toctree: generated/
   bernoulli         -- Bernoulli
   binom             -- Binomial
   boltzmann         -- Boltzmann (Truncated Discrete Exponential)
   dlaplace          -- Discrete Laplacian
   geom              -- Geometric
   hypergeom         -- Hypergeometric
   logser            -- Logarithmic (Log-Series, Series)
   nbinom            -- Negative Binomial
   planck            -- Planck (Discrete Exponential)
   poisson           -- Poisson
   randint           -- Discrete Uniform
   skellam           -- Skellam
   zipf              -- Zipf
 Statistical functions
 =====================
 Several of these functions have a similar version in scipy.stats.mstats
 which work for masked arrays.
 .. autosummary::
   :toctree: generated/
   describe          -- Descriptive statistics
   gmean             -- Geometric mean
   hmean             -- Harmonic mean
   kurtosis          -- Fisher or Pearson kurtosis
   kurtosistest      --
   mode              -- Modal value
   moment            -- Central moment
   normaltest        --
   skew              -- Skewness
   skewtest          --
   tmean             -- Truncated arithmetic mean
   tvar              -- Truncated variance
   tmin              --
   tmax              --
   tstd              --
   tsem              --
   nanmean           -- Mean, ignoring NaN values
   nanstd            -- Standard deviation, ignoring NaN values
   nanmedian         -- Median, ignoring NaN values
   variation         -- Coefficient of variation
 .. autosummary::
   :toctree: generated/
   cumfreq           _
   histogram2        _
   histogram         _
   itemfreq          _
   percentileofscore _
   scoreatpercentile _
   relfreq           _
 .. autosummary::
   :toctree: generated/
   binned_statistic     -- Compute a binned statistic for a set of data.
   binned_statistic_2d  -- Compute a 2-D binned statistic for a set of data.
   binned_statistic_dd  -- Compute a d-D binned statistic for a set of data.
 .. autosummary::
   :toctree: generated/
   obrientransform
   signaltonoise
   bayes_mvs
   sem
   zmap
   zscore
 .. autosummary::
   :toctree: generated/
   threshold
   trimboth
   trim1
 .. autosummary::
   :toctree: generated/
   f_oneway
   pearsonr
   spearmanr
   pointbiserialr
   kendalltau
   linregress
 .. autosummary::
   :toctree: generated/
   ttest_1samp
   ttest_ind
   ttest_rel
   kstest
   chisquare
   power_divergence
   ks_2samp
   mannwhitneyu
   tiecorrect
   rankdata
   ranksums
   wilcoxon
   kruskal
   friedmanchisquare
 .. autosummary::
   :toctree: generated/
   ansari
   bartlett
   levene
   shapiro
   anderson
   binom_test
   fligner
   mood
 .. autosummary::
   :toctree: generated/
   boxcox
   boxcox_normmax
   boxcox_llf
 Contingency table functions
 ===========================
 .. autosummary::
   :toctree: generated/
   chi2_contingency
   contingency.expected_freq
   contingency.margins
   fisher_exact
 Plot-tests
 ==========
 .. autosummary::
   :toctree: generated/
   ppcc_max
   ppcc_plot
   probplot
   boxcox_normplot
 Masked statistics functions
 ===========================
 .. toctree::
   stats.mstats
 Univariate and multivariate kernel density estimation (:mod:`scipy.stats.kde`)
 ==============================================================================
 .. autosummary::
   :toctree: generated/
   gaussian_kde
 For many more stat related functions install the software R and the
 interface package rpy.
 """
 from __future__ import division, print_function, absolute_import
 from scipy.stats import *
-from core import *
+from .core import *
-import distributions #@Reimport
+from .stats import *
-from wafo.stats.distributions import *
+from .distributions import *
 from .rv import *
 from .morestats import *
 from ._binned_statistic import *
 from .kde import gaussian_kde
 from . import mstats
 from .contingency import chi2_contingency
 from ._multivariate import *
 from . import estimation
 #remove vonmises_cython from __all__, I don't know why it is included
 __all__ = [s for s in dir() if not (s.startswith('_') or s.endswith('cython'))]
 #import distributions #@Reimport
 #from wafo.stats.distributions import *
 import estimation
--- a/pywafo/src/wafo/stats/_discrete_distns.py
+++ b/pywafo/src/wafo/stats/_discrete_distns.py
@ -0,0 +1,762 @@
 #
 # Author:  Travis Oliphant  2002-2011 with contributions from
 #          SciPy Developers 2004-2011
 #
 from __future__ import division, print_function, absolute_import
 from scipy import special
 from scipy.special import gammaln as gamln
 from numpy import floor, ceil, log, exp, sqrt, log1p, expm1, tanh, cosh, sinh
 import numpy as np
 import numpy.random as mtrand
 from ._distn_infrastructure import (
        rv_discrete, _lazywhere, _ncx2_pdf, _ncx2_cdf)
 __all__ = [
        'binom', 'bernoulli', 'nbinom', 'geom', 'hypergeom',
        'logser', 'poisson', 'planck', 'boltzmann', 'randint',
        'zipf', 'dlaplace', 'skellam'
        ]
 class binom_gen(rv_discrete):
    """A binomial discrete random variable.
    %(before_notes)s
    Notes
    -----
    The probability mass function for `binom` is::
       binom.pmf(k) = choose(n, k) * p**k * (1-p)**(n-k)
    for ``k`` in ``{0, 1,..., n}``.
    `binom` takes ``n`` and ``p`` as shape parameters.
    %(example)s
    """
    def _rvs(self, n, p):
        return mtrand.binomial(n, p, self._size)
    def _argcheck(self, n, p):
        self.b = n
        return (n >= 0) & (p >= 0) & (p <= 1)
    def _logpmf(self, x, n, p):
        k = floor(x)
        combiln = (gamln(n+1) - (gamln(k+1) + gamln(n-k+1)))
        return combiln + special.xlogy(k, p) + special.xlog1py(n-k, -p)
    def _pmf(self, x, n, p):
        return exp(self._logpmf(x, n, p))
    def _cdf(self, x, n, p):
        k = floor(x)
        vals = special.bdtr(k, n, p)
        return vals
    def _sf(self, x, n, p):
        k = floor(x)
        return special.bdtrc(k, n, p)
    def _ppf(self, q, n, p):
        vals = ceil(special.bdtrik(q, n, p))
        vals1 = vals-1
        temp = special.bdtr(vals1, n, p)
        return np.where(temp >= q, vals1, vals)
    def _stats(self, n, p):
        q = 1.0-p
        mu = n * p
        var = n * p * q
        g1 = (q-p) / sqrt(n*p*q)
        g2 = (1.0-6*p*q)/(n*p*q)
        return mu, var, g1, g2
    def _entropy(self, n, p):
        k = np.r_[0:n + 1]
        vals = self._pmf(k, n, p)
        h = -np.sum(special.xlogy(vals, vals), axis=0)
        return h
 binom = binom_gen(name='binom')
 class bernoulli_gen(binom_gen):
    """A Bernoulli discrete random variable.
    %(before_notes)s
    Notes
    -----
    The probability mass function for `bernoulli` is::
       bernoulli.pmf(k) = 1-p  if k = 0
                        = p    if k = 1
    for ``k`` in ``{0, 1}``.
    `bernoulli` takes ``p`` as shape parameter.
    %(example)s
    """
    def _rvs(self, p):
        return binom_gen._rvs(self, 1, p)
    def _argcheck(self, p):
        return (p >= 0) & (p <= 1)
    def _logpmf(self, x, p):
        return binom._logpmf(x, 1, p)
    def _pmf(self, x, p):
        return binom._pmf(x, 1, p)
    def _cdf(self, x, p):
        return binom._cdf(x, 1, p)
    def _sf(self, x, p):
        return binom._sf(x, 1, p)
    def _ppf(self, q, p):
        return binom._ppf(q, 1, p)
    def _stats(self, p):
        return binom._stats(1, p)
    def _entropy(self, p):
        h = -special.xlogy(p, p) - special.xlogy(1 - p, 1 - p)
        return h
 bernoulli = bernoulli_gen(b=1, name='bernoulli')
 class nbinom_gen(rv_discrete):
    """A negative binomial discrete random variable.
    %(before_notes)s
    Notes
    -----
    The probability mass function for `nbinom` is::
         nbinom.pmf(k) = choose(k+n-1, n-1) * p**n * (1-p)**k
    for ``k >= 0``.
    `nbinom` takes ``n`` and ``p`` as shape parameters.
    %(example)s
    """
    def _rvs(self, n, p):
        return mtrand.negative_binomial(n, p, self._size)
    def _argcheck(self, n, p):
        return (n >= 0) & (p >= 0) & (p <= 1)
    def _pmf(self, x, n, p):
        return exp(self._logpmf(x, n, p))
    def _logpmf(self, x, n, p):
        coeff = gamln(n+x) - gamln(x+1) - gamln(n)
        return coeff + n*log(p) + x*log1p(-p)
    def _cdf(self, x, n, p):
        k = floor(x)
        return special.betainc(n, k+1, p)
    def _sf_skip(self, x, n, p):
        # skip because special.nbdtrc doesn't work for 0<n<1
        k = floor(x)
        return special.nbdtrc(k, n, p)
    def _ppf(self, q, n, p):
        vals = ceil(special.nbdtrik(q, n, p))
        vals1 = (vals-1).clip(0.0, np.inf)
        temp = self._cdf(vals1, n, p)
        return np.where(temp >= q, vals1, vals)
    def _stats(self, n, p):
        Q = 1.0 / p
        P = Q - 1.0
        mu = n*P
        var = n*P*Q
        g1 = (Q+P)/sqrt(n*P*Q)
        g2 = (1.0 + 6*P*Q) / (n*P*Q)
        return mu, var, g1, g2
 nbinom = nbinom_gen(name='nbinom')
 class geom_gen(rv_discrete):
    """A geometric discrete random variable.
    %(before_notes)s
    Notes
    -----
    The probability mass function for `geom` is::
        geom.pmf(k) = (1-p)**(k-1)*p
    for ``k >= 1``.
    `geom` takes ``p`` as shape parameter.
    %(example)s
    """
    def _rvs(self, p):
        return mtrand.geometric(p, size=self._size)
    def _argcheck(self, p):
        return (p <= 1) & (p >= 0)
    def _pmf(self, k, p):
        return np.power(1-p, k-1) * p
    def _logpmf(self, k, p):
        return (k-1)*log1p(-p) + log(p)
    def _cdf(self, x, p):
        k = floor(x)
        return -expm1(log1p(-p)*k)
    def _sf(self, x, p):
        return np.exp(self._logsf(x, p))
    def _logsf(self, x, p):
        k = floor(x)
        return k*log1p(-p)
    def _ppf(self, q, p):
        vals = ceil(log1p(-q)/log1p(-p))
        temp = self._cdf(vals-1, p)
        return np.where((temp >= q) & (vals > 0), vals-1, vals)
    def _stats(self, p):
        mu = 1.0/p
        qr = 1.0-p
        var = qr / p / p
        g1 = (2.0-p) / sqrt(qr)
        g2 = np.polyval([1, -6, 6], p)/(1.0-p)
        return mu, var, g1, g2
 geom = geom_gen(a=1, name='geom', longname="A geometric")
 class hypergeom_gen(rv_discrete):
    """A hypergeometric discrete random variable.
    The hypergeometric distribution models drawing objects from a bin.
    M is the total number of objects, n is total number of Type I objects.
    The random variate represents the number of Type I objects in N drawn
    without replacement from the total population.
    %(before_notes)s
    Notes
    -----
    The probability mass function is defined as::
        pmf(k, M, n, N) = choose(n, k) * choose(M - n, N - k) / choose(M, N),
                                       for max(0, N - (M-n)) <= k <= min(n, N)
    Examples
    --------
    >>> from scipy.stats import hypergeom
    Suppose we have a collection of 20 animals, of which 7 are dogs.  Then if
    we want to know the probability of finding a given number of dogs if we
    choose at random 12 of the 20 animals, we can initialize a frozen
    distribution and plot the probability mass function:
    >>> [M, n, N] = [20, 7, 12]
    >>> rv = hypergeom(M, n, N)
    >>> x = np.arange(0, n+1)
    >>> pmf_dogs = rv.pmf(x)
    >>> fig = plt.figure()
    >>> ax = fig.add_subplot(111)
    >>> ax.plot(x, pmf_dogs, 'bo')
    >>> ax.vlines(x, 0, pmf_dogs, lw=2)
    >>> ax.set_xlabel('# of dogs in our group of chosen animals')
    >>> ax.set_ylabel('hypergeom PMF')
    >>> plt.show()
    Instead of using a frozen distribution we can also use `hypergeom`
    methods directly.  To for example obtain the cumulative distribution
    function, use:
    >>> prb = hypergeom.cdf(x, M, n, N)
    And to generate random numbers:
    >>> R = hypergeom.rvs(M, n, N, size=10)
    """
    def _rvs(self, M, n, N):
        return mtrand.hypergeometric(n, M-n, N, size=self._size)
    def _argcheck(self, M, n, N):
        cond = rv_discrete._argcheck(self, M, n, N)
        cond &= (n <= M) & (N <= M)
        self.a = max(N-(M-n), 0)
        self.b = min(n, N)
        return cond
    def _logpmf(self, k, M, n, N):
        tot, good = M, n
        bad = tot - good
        return gamln(good+1) - gamln(good-k+1) - gamln(k+1) + gamln(bad+1) \
            - gamln(bad-N+k+1) - gamln(N-k+1) - gamln(tot+1) + gamln(tot-N+1) \
            + gamln(N+1)
    def _pmf(self, k, M, n, N):
        # same as the following but numerically more precise
        # return comb(good, k) * comb(bad, N-k) / comb(tot, N)
        return exp(self._logpmf(k, M, n, N))
    def _stats(self, M, n, N):
        # tot, good, sample_size = M, n, N
        # "wikipedia".replace('N', 'M').replace('n', 'N').replace('K', 'n')
        M, n, N = 1.*M, 1.*n, 1.*N
        m = M - n
        p = n/M
        mu = N*p
        var = m*n*N*(M - N)*1.0/(M*M*(M-1))
        g1 = (m - n)*(M-2*N) / (M-2.0) * sqrt((M-1.0) / (m*n*N*(M-N)))
        g2 = M*(M+1) - 6.*N*(M-N) - 6.*n*m
        g2 *= (M-1)*M*M
        g2 += 6.*n*N*(M-N)*m*(5.*M-6)
        g2 /= n * N * (M-N) * m * (M-2.) * (M-3.)
        return mu, var, g1, g2
    def _entropy(self, M, n, N):
        k = np.r_[N - (M - n):min(n, N) + 1]
        vals = self.pmf(k, M, n, N)
        h = -np.sum(special.xlogy(vals, vals), axis=0)
        return h
    def _sf(self, k, M, n, N):
        """More precise calculation, 1 - cdf doesn't cut it."""
        # This for loop is needed because `k` can be an array. If that's the
        # case, the sf() method makes M, n and N arrays of the same shape. We
        # therefore unpack all inputs args, so we can do the manual
        # integration.
        res = []
        for quant, tot, good, draw in zip(k, M, n, N):
            # Manual integration over probability mass function. More accurate
            # than integrate.quad.
            k2 = np.arange(quant + 1, draw + 1)
            res.append(np.sum(self._pmf(k2, tot, good, draw)))
        return np.asarray(res)
 hypergeom = hypergeom_gen(name='hypergeom')
 # FIXME: Fails _cdfvec
 class logser_gen(rv_discrete):
    """A Logarithmic (Log-Series, Series) discrete random variable.
    %(before_notes)s
    Notes
    -----
    The probability mass function for `logser` is::
        logser.pmf(k) = - p**k / (k*log(1-p))
    for ``k >= 1``.
    `logser` takes ``p`` as shape parameter.
    %(example)s
    """
    def _rvs(self, p):
        # looks wrong for p>0.5, too few k=1
        # trying to use generic is worse, no k=1 at all
        return mtrand.logseries(p, size=self._size)
    def _argcheck(self, p):
        return (p > 0) & (p < 1)
    def _pmf(self, k, p):
        return -np.power(p, k) * 1.0 / k / log1p(- p)
    def _stats(self, p):
        r = log1p(-p)
        mu = p / (p - 1.0) / r
        mu2p = -p / r / (p - 1.0)**2
        var = mu2p - mu*mu
        mu3p = -p / r * (1.0+p) / (1.0 - p)**3
        mu3 = mu3p - 3*mu*mu2p + 2*mu**3
        g1 = mu3 / np.power(var, 1.5)
        mu4p = -p / r * (
            1.0 / (p-1)**2 - 6*p / (p - 1)**3 + 6*p*p / (p-1)**4)
        mu4 = mu4p - 4*mu3p*mu + 6*mu2p*mu*mu - 3*mu**4
        g2 = mu4 / var**2 - 3.0
        return mu, var, g1, g2
 logser = logser_gen(a=1, name='logser', longname='A logarithmic')
 class poisson_gen(rv_discrete):
    """A Poisson discrete random variable.
    %(before_notes)s
    Notes
    -----
    The probability mass function for `poisson` is::
        poisson.pmf(k) = exp(-mu) * mu**k / k!
    for ``k >= 0``.
    `poisson` takes ``mu`` as shape parameter.
    %(example)s
    """
    def _rvs(self, mu):
        return mtrand.poisson(mu, self._size)
    def _logpmf(self, k, mu):
        Pk = k*log(mu)-gamln(k+1) - mu
        return Pk
    def _pmf(self, k, mu):
        return exp(self._logpmf(k, mu))
    def _cdf(self, x, mu):
        k = floor(x)
        return special.pdtr(k, mu)
    def _sf(self, x, mu):
        k = floor(x)
        return special.pdtrc(k, mu)
    def _ppf(self, q, mu):
        vals = ceil(special.pdtrik(q, mu))
        vals1 = vals - 1
        temp = special.pdtr(vals1, mu)
        return np.where((temp >= q), vals1, vals)
    def _stats(self, mu):
        var = mu
        tmp = np.asarray(mu)
        g1 = sqrt(1.0 / tmp)
        g2 = 1.0 / tmp
        return mu, var, g1, g2
 poisson = poisson_gen(name="poisson", longname='A Poisson')
 class planck_gen(rv_discrete):
    """A Planck discrete exponential random variable.
    %(before_notes)s
    Notes
    -----
    The probability mass function for `planck` is::
        planck.pmf(k) = (1-exp(-lambda_))*exp(-lambda_*k)
    for ``k*lambda_ >= 0``.
    `planck` takes ``lambda_`` as shape parameter.
    %(example)s
    """
    def _argcheck(self, lambda_):
        if (lambda_ > 0):
            self.a = 0
            self.b = np.inf
            return 1
        elif (lambda_ < 0):
            self.a = -np.inf
            self.b = 0
            return 1
        else:
            return 0
    def _pmf(self, k, lambda_):
        fact = -expm1(-lambda_)
        return fact * exp(-lambda_ * k)
    def _cdf(self, x, lambda_):
        k = floor(x)
        return - expm1(-lambda_ * (k + 1))
    def _ppf(self, q, lambda_):
        vals = ceil(-1.0/lambda_ * log1p(-q)-1)
        vals1 = (vals-1).clip(self.a, np.inf)
        temp = self._cdf(vals1, lambda_)
        return np.where(temp >= q, vals1, vals)
    def _stats(self, lambda_):
        mu = 1/(exp(lambda_)-1)
        var = exp(-lambda_)/(expm1(-lambda_))**2
        g1 = 2*cosh(lambda_/2.0)
        g2 = 4+2*cosh(lambda_)
        return mu, var, g1, g2
    def _entropy(self, lambda_):
        l = lambda_
        C = -expm1(-l)
        return l * exp(-l) / C - log(C)
 planck = planck_gen(name='planck', longname='A discrete exponential ')
 class boltzmann_gen(rv_discrete):
    """A Boltzmann (Truncated Discrete Exponential) random variable.
    %(before_notes)s
    Notes
    -----
    The probability mass function for `boltzmann` is::
        boltzmann.pmf(k) = (1-exp(-lambda_)*exp(-lambda_*k)/(1-exp(-lambda_*N))
    for ``k = 0,..., N-1``.
    `boltzmann` takes ``lambda_`` and ``N`` as shape parameters.
    %(example)s
    """
    def _pmf(self, k, lambda_, N):
        fact = (expm1(-lambda_))/(expm1(-lambda_*N))
        return fact*exp(-lambda_*k)
    def _cdf(self, x, lambda_, N):
        k = floor(x)
        return (expm1(-lambda_*(k+1)))/(expm1(-lambda_*N))
    def _ppf(self, q, lambda_, N):
        qnew = -q*(expm1(-lambda_*N))
        vals = ceil(-1.0/lambda_ * log1p(-qnew)-1)
        vals1 = (vals-1).clip(0.0, np.inf)
        temp = self._cdf(vals1, lambda_, N)
        return np.where(temp >= q, vals1, vals)
    def _stats(self, lambda_, N):
        z = exp(-lambda_)
        zN = exp(-lambda_*N)
        mu = z/(1.0-z)-N*zN/(1-zN)
        var = z/(1.0-z)**2 - N*N*zN/(1-zN)**2
        trm = (1-zN)/(1-z)
        trm2 = (z*trm**2 - N*N*zN)
        g1 = z*(1+z)*trm**3 - N**3*zN*(1+zN)
        g1 = g1 / trm2**(1.5)
        g2 = z*(1+4*z+z*z)*trm**4 - N**4 * zN*(1+4*zN+zN*zN)
        g2 = g2 / trm2 / trm2
        return mu, var, g1, g2
 boltzmann = boltzmann_gen(name='boltzmann',
        longname='A truncated discrete exponential ')
 class randint_gen(rv_discrete):
    """A uniform discrete random variable.
    %(before_notes)s
    Notes
    -----
    The probability mass function for `randint` is::
        randint.pmf(k) = 1./(high - low)
    for ``k = low, ..., high - 1``.
    `randint` takes ``low`` and ``high`` as shape parameters.
    Note the difference to the numpy ``random_integers`` which
    returns integers on a *closed* interval ``[low, high]``.
    %(example)s
    """
    def _argcheck(self, low, high):
        self.a = low
        self.b = high - 1
        return (high > low)
    def _pmf(self, k, low, high):
        p = np.ones_like(k) / (high - low)
        return np.where((k >= low) & (k < high), p, 0.)
    def _cdf(self, x, low, high):
        k = floor(x)
        return (k - low + 1.) / (high - low)
    def _ppf(self, q, low, high):
        vals = ceil(q * (high - low) + low) - 1
        vals1 = (vals - 1).clip(low, high)
        temp = self._cdf(vals1, low, high)
        return np.where(temp >= q, vals1, vals)
    def _stats(self, low, high):
        m2, m1 = np.asarray(high), np.asarray(low)
        mu = (m2 + m1 - 1.0) / 2
        d = m2 - m1
        var = (d*d - 1) / 12.0
        g1 = 0.0
        g2 = -6.0/5.0 * (d*d + 1.0) / (d*d - 1.0)
        return mu, var, g1, g2
    def _rvs(self, low, high=None):
        """An array of *size* random integers >= ``low`` and < ``high``.
        If ``high`` is ``None``, then range is >=0  and < low
        """
        return mtrand.randint(low, high, self._size)
    def _entropy(self, low, high):
        return log(high - low)
 randint = randint_gen(name='randint', longname='A discrete uniform '
                      '(random integer)')
 # FIXME: problems sampling.
 class zipf_gen(rv_discrete):
    """A Zipf discrete random variable.
    %(before_notes)s
    Notes
    -----
    The probability mass function for `zipf` is::
        zipf.pmf(k, a) = 1/(zeta(a) * k**a)
    for ``k >= 1``.
    `zipf` takes ``a`` as shape parameter.
    %(example)s
    """
    def _rvs(self, a):
        return mtrand.zipf(a, size=self._size)
    def _argcheck(self, a):
        return a > 1
    def _pmf(self, k, a):
        Pk = 1.0 / special.zeta(a, 1) / k**a
        return Pk
    def _munp(self, n, a):
        return _lazywhere(
            a > n + 1, (a, n),
            lambda a, n: special.zeta(a - n, 1) / special.zeta(a, 1),
            np.inf)
 zipf = zipf_gen(a=1, name='zipf', longname='A Zipf')
 class dlaplace_gen(rv_discrete):
    """A  Laplacian discrete random variable.
    %(before_notes)s
    Notes
    -----
    The probability mass function for `dlaplace` is::
        dlaplace.pmf(k) = tanh(a/2) * exp(-a*abs(k))
    for ``a > 0``.
    `dlaplace` takes ``a`` as shape parameter.
    %(example)s
    """
    def _pmf(self, k, a):
        return tanh(a/2.0) * exp(-a * abs(k))
    def _cdf(self, x, a):
        k = floor(x)
        f = lambda k, a: 1.0 - exp(-a * k) / (exp(a) + 1)
        f2 = lambda k, a: exp(a * (k+1)) / (exp(a) + 1)
        return _lazywhere(k >= 0, (k, a), f=f, f2=f2)
    def _ppf(self, q, a):
        const = 1 + exp(a)
        vals = ceil(np.where(q < 1.0 / (1 + exp(-a)), log(q*const) / a - 1,
                                                      -log((1-q) * const) / a))
        vals1 = vals - 1
        return np.where(self._cdf(vals1, a) >= q, vals1, vals)
    def _stats(self, a):
        ea = exp(a)
        mu2 = 2.*ea/(ea-1.)**2
        mu4 = 2.*ea*(ea**2+10.*ea+1.) / (ea-1.)**4
        return 0., mu2, 0., mu4/mu2**2 - 3.
    def _entropy(self, a):
        return a / sinh(a) - log(tanh(a/2.0))
 dlaplace = dlaplace_gen(a=-np.inf,
                        name='dlaplace', longname='A discrete Laplacian')
 class skellam_gen(rv_discrete):
    """A  Skellam discrete random variable.
    %(before_notes)s
    Notes
    -----
    Probability distribution of the difference of two correlated or
    uncorrelated Poisson random variables.
    Let k1 and k2 be two Poisson-distributed r.v. with expected values
    lam1 and lam2. Then, ``k1 - k2`` follows a Skellam distribution with
    parameters ``mu1 = lam1 - rho*sqrt(lam1*lam2)`` and
    ``mu2 = lam2 - rho*sqrt(lam1*lam2)``, where rho is the correlation
    coefficient between k1 and k2. If the two Poisson-distributed r.v.
    are independent then ``rho = 0``.
    Parameters mu1 and mu2 must be strictly positive.
    For details see: http://en.wikipedia.org/wiki/Skellam_distribution
    `skellam` takes ``mu1`` and ``mu2`` as shape parameters.
    %(example)s
    """
    def _rvs(self, mu1, mu2):
        n = self._size
        return mtrand.poisson(mu1, n) - mtrand.poisson(mu2, n)
    def _pmf(self, x, mu1, mu2):
        px = np.where(x < 0,
                _ncx2_pdf(2*mu2, 2*(1-x), 2*mu1)*2,
                _ncx2_pdf(2*mu1, 2*(1+x), 2*mu2)*2)
        # ncx2.pdf() returns nan's for extremely low probabilities
        return px
    def _cdf(self, x, mu1, mu2):
        x = floor(x)
        px = np.where(x < 0,
                _ncx2_cdf(2*mu2, -2*x, 2*mu1),
                1-_ncx2_cdf(2*mu1, 2*(x+1), 2*mu2))
        return px
    def _stats(self, mu1, mu2):
        mean = mu1 - mu2
        var = mu1 + mu2
        g1 = mean / sqrt((var)**3)
        g2 = 1 / var
        return mean, var, g1, g2
 skellam = skellam_gen(a=-np.inf, name="skellam", longname='A Skellam')
--- a/pywafo/src/wafo/stats/_distn_infrastructure.py
+++ b/pywafo/src/wafo/stats/_distn_infrastructure.py
--- a/pywafo/src/wafo/stats/_multivariate.py
+++ b/pywafo/src/wafo/stats/_multivariate.py
@ -0,0 +1,493 @@
 #
 # Author: Joris Vankerschaver 2013
 #
 from __future__ import division, print_function, absolute_import
 from scipy.misc import doccer
 from functools import wraps
 import numpy as np
 import scipy.linalg
 __all__ = ['multivariate_normal']
 _LOG_2PI = np.log(2 * np.pi)
 def _process_parameters(dim, mean, cov):
    """
    Infer dimensionality from mean or covariance matrix, ensure that
    mean and covariance are full vector resp. matrix.
    """
    # Try to infer dimensionality
    if dim is None:
        if mean is None:
            if cov is None:
                dim = 1
            else:
                cov = np.asarray(cov, dtype=float)
                if cov.ndim < 2:
                    dim = 1
                else:
                    dim = cov.shape[0]
        else:
            mean = np.asarray(mean, dtype=float)
            dim = mean.size
    else:
        if not np.isscalar(dim):
            raise ValueError("Dimension of random variable must be a scalar.")
    # Check input sizes and return full arrays for mean and cov if necessary
    if mean is None:
        mean = np.zeros(dim)
    mean = np.asarray(mean, dtype=float)
    if cov is None:
        cov = 1.0
    cov = np.asarray(cov, dtype=float)
    if dim == 1:
        mean.shape = (1,)
        cov.shape = (1, 1)
    if mean.ndim != 1 or mean.shape[0] != dim:
        raise ValueError("Array 'mean' must be vector of length %d." % dim)
    if cov.ndim == 0:
        cov = cov * np.eye(dim)
    elif cov.ndim == 1:
        cov = np.diag(cov)
    else:
        if cov.shape != (dim, dim):
            raise ValueError("Array 'cov' must be at most two-dimensional,"
                                 " but cov.ndim = %d" % cov.ndim)
    return dim, mean, cov
 def _process_quantiles(x, dim):
    """
    Adjust quantiles array so that last axis labels the components of
    each data point.
    """
    x = np.asarray(x, dtype=float)
    if x.ndim == 0:
        x = x[np.newaxis]
    elif x.ndim == 1:
        if dim == 1:
            x = x[:, np.newaxis]
        else:
            x = x[np.newaxis, :]
    return x
 def _squeeze_output(out):
    """
    Remove single-dimensional entries from array and convert to scalar,
    if necessary.
    """
    out = out.squeeze()
    if out.ndim == 0:
        out = out[()]
    return out
 def _pinv_1d(v, eps=1e-5):
    """
    A helper function for computing the pseudoinverse.
    Parameters
    ----------
    v : iterable of numbers
        This may be thought of as a vector of eigenvalues or singular values.
    eps : float
        Elements of v smaller than eps are considered negligible.
    Returns
    -------
    v_pinv : 1d float ndarray
        A vector of pseudo-inverted numbers.
    """
    return np.array([0 if abs(x) < eps else 1/x for x in v], dtype=float)
 def _psd_pinv_decomposed_log_pdet(mat, cond=None, rcond=None,
                                  lower=True, check_finite=True):
    """
    Compute a decomposition of the pseudo-inverse and the logarithm of
    the pseudo-determinant of a symmetric positive semi-definite
    matrix.
    The pseudo-determinant of a matrix is defined as the product of
    the non-zero eigenvalues, and coincides with the usual determinant
    for a full matrix.
    Parameters
    ----------
    mat : array_like
        Input array of shape (`m`, `n`)
    cond, rcond : float or None
        Cutoff for 'small' singular values.
        Eigenvalues smaller than ``rcond*largest_eigenvalue``
        are considered zero.
        If None or -1, suitable machine precision is used.
    lower : bool, optional
        Whether the pertinent array data is taken from the lower or upper
        triangle of `mat`. (Default: lower)
    check_finite : boolean, optional
        Whether to check that the input matrix contains only finite numbers.
        Disabling may give a performance gain, but may result in problems
        (crashes, non-termination) if the inputs do contain infinities or NaNs.
    Returns
    -------
    M : array_like
        The pseudo-inverse of the input matrix is np.dot(M, M.T).
    log_pdet : float
        Logarithm of the pseudo-determinant of the matrix.
    """
    # Compute the symmetric eigendecomposition.
    # The input covariance matrix is required to be real symmetric
    # and positive semidefinite which implies that its eigenvalues
    # are all real and non-negative,
    # but clip them anyway to avoid numerical issues.
    # TODO: the code to set cond/rcond is identical to that in
    # scipy.linalg.{pinvh, pinv2} and if/when this function is subsumed
    # into scipy.linalg it should probably be shared between all of
    # these routines.
    # Note that eigh takes care of array conversion, chkfinite,
    # and assertion that the matrix is square.
    s, u = scipy.linalg.eigh(mat, lower=lower, check_finite=check_finite)
    if rcond is not None:
        cond = rcond
    if cond in [None, -1]:
        t = u.dtype.char.lower()
        factor = {'f': 1E3, 'd': 1E6}
        cond = factor[t] * np.finfo(t).eps
    eps = cond * np.max(abs(s))
    if np.min(s) < -eps:
        raise ValueError('the covariance matrix must be positive semidefinite')
    s_pinv = _pinv_1d(s, eps)
    U = np.multiply(u, np.sqrt(s_pinv))
    log_pdet = np.sum(np.log(s[s > eps]))
    return U, log_pdet
 _doc_default_callparams = \
 """mean : array_like, optional
    Mean of the distribution (default zero)
 cov : array_like, optional
    Covariance matrix of the distribution (default one)
 """
 _doc_callparams_note = \
 """Setting the parameter `mean` to `None` is equivalent to having `mean`
 be the zero-vector. The parameter `cov` can be a scalar, in which case
 the covariance matrix is the identity times that value, a vector of
 diagonal entries for the covariance matrix, or a two-dimensional
 array_like.
 """
 _doc_frozen_callparams = ""
 _doc_frozen_callparams_note = \
 """See class definition for a detailed description of parameters."""
 docdict_params = {
    '_doc_default_callparams': _doc_default_callparams,
    '_doc_callparams_note': _doc_callparams_note
 }
 docdict_noparams = {
    '_doc_default_callparams': _doc_frozen_callparams,
    '_doc_callparams_note': _doc_frozen_callparams_note
 }
 class multivariate_normal_gen(object):
    r"""
    A multivariate normal random variable.
    The `mean` keyword specifies the mean. The `cov` keyword specifies the
    covariance matrix.
    .. versionadded:: 0.14.0
    Methods
    -------
    pdf(x, mean=None, cov=1)
        Probability density function.
    logpdf(x, mean=None, cov=1)
        Log of the probability density function.
    rvs(mean=None, cov=1)
        Draw random samples from a multivariate normal distribution.
    entropy()
        Compute the differential entropy of the multivariate normal.
    Parameters
    ----------
    x : array_like
        Quantiles, with the last axis of `x` denoting the components.
    %(_doc_default_callparams)s
    Alternatively, the object may be called (as a function) to fix the mean
    and covariance parameters, returning a "frozen" multivariate normal
    random variable:
    rv = multivariate_normal(mean=None, scale=1)
        - Frozen  object with the same methods but holding the given
          mean and covariance fixed.
    Notes
    -----
    %(_doc_callparams_note)s
    The covariance matrix `cov` must be a (symmetric) positive
    semi-definite matrix. The determinant and inverse of `cov` are computed
    as the pseudo-determinant and pseudo-inverse, respectively, so
    that `cov` does not need to have full rank.
    The probability density function for `multivariate_normal` is
    .. math::
        f(x) = \frac{1}{\sqrt{(2 \pi)^k \det \Sigma}} \exp\left( -\frac{1}{2} (x - \mu)^T \Sigma^{-1} (x - \mu) \right),
    where :math:`\mu` is the mean, :math:`\Sigma` the covariance matrix,
    and :math:`k` is the dimension of the space where :math:`x` takes values.
    Examples
    --------
    >>> from scipy.stats import multivariate_normal
    >>> x = np.linspace(0, 5, 10, endpoint=False)
    >>> y = multivariate_normal.pdf(x, mean=2.5, cov=0.5); y
    array([ 0.00108914,  0.01033349,  0.05946514,  0.20755375,  0.43939129,
            0.56418958,  0.43939129,  0.20755375,  0.05946514,  0.01033349])
    >>> plt.plot(x, y)
    The input quantiles can be any shape of array, as long as the last
    axis labels the components.  This allows us for instance to
    display the frozen pdf for a non-isotropic random variable in 2D as
    follows:
    >>> x, y = np.mgrid[-1:1:.01, -1:1:.01]
    >>> pos = np.empty(x.shape + (2,))
    >>> pos[:, :, 0] = x; pos[:, :, 1] = y
    >>> rv = multivariate_normal([0.5, -0.2], [[2.0, 0.3], [0.3, 0.5]])
    >>> plt.contourf(x, y, rv.pdf(pos))
    """
    def __init__(self):
        self.__doc__ = doccer.docformat(self.__doc__, docdict_params)
    def __call__(self, mean=None, cov=1):
        """
        Create a frozen multivariate normal distribution.
        See `multivariate_normal_frozen` for more information.
        """
        return multivariate_normal_frozen(mean, cov)
    def _logpdf(self, x, mean, prec_U, log_det_cov):
        """
        Parameters
        ----------
        x : ndarray
            Points at which to evaluate the log of the probability
            density function
        mean : ndarray
            Mean of the distribution
        prec_U : ndarray
            A decomposition such that np.dot(prec_U, prec_U.T)
            is the precision matrix, i.e. inverse of the covariance matrix.
        log_det_cov : float
            Logarithm of the determinant of the covariance matrix
        Notes
        -----
        As this function does no argument checking, it should not be
        called directly; use 'logpdf' instead.
        """
        dim = x.shape[-1]
        dev = x - mean
        maha = np.sum(np.square(np.dot(dev, prec_U)), axis=-1)
        return -0.5 * (dim * _LOG_2PI + log_det_cov + maha)
    def logpdf(self, x, mean, cov):
        """
        Log of the multivariate normal probability density function.
        Parameters
        ----------
        x : array_like
            Quantiles, with the last axis of `x` denoting the components.
        %(_doc_default_callparams)s
        Notes
        -----
        %(_doc_callparams_note)s
        Returns
        -------
        pdf : ndarray
            Log of the probability density function evaluated at `x`
        """
        dim, mean, cov = _process_parameters(None, mean, cov)
        x = _process_quantiles(x, dim)
        prec_U, log_det_cov = _psd_pinv_decomposed_log_pdet(cov)
        out = self._logpdf(x, mean, prec_U, log_det_cov)
        return _squeeze_output(out)
    def pdf(self, x, mean, cov):
        """
        Multivariate normal probability density function.
        Parameters
        ----------
        x : array_like
            Quantiles, with the last axis of `x` denoting the components.
        %(_doc_default_callparams)s
        Notes
        -----
        %(_doc_callparams_note)s
        Returns
        -------
        pdf : ndarray
            Probability density function evaluated at `x`
        """
        dim, mean, cov = _process_parameters(None, mean, cov)
        x = _process_quantiles(x, dim)
        prec_U, log_det_cov = _psd_pinv_decomposed_log_pdet(cov)
        out = np.exp(self._logpdf(x, mean, prec_U, log_det_cov))
        return _squeeze_output(out)
    def rvs(self, mean=None, cov=1, size=1):
        """
        Draw random samples from a multivariate normal distribution.
        Parameters
        ----------
        %(_doc_default_callparams)s
        size : integer, optional
            Number of samples to draw (default 1).
        Notes
        -----
        %(_doc_callparams_note)s
        Returns
        -------
        rvs : ndarray or scalar
            Random variates of size (`size`, `N`), where `N` is the
            dimension of the random variable.
        """
        dim, mean, cov = _process_parameters(None, mean, cov)
        out = np.random.multivariate_normal(mean, cov, size)
        return _squeeze_output(out)
    def entropy(self, mean=None, cov=1):
        """
        Compute the differential entropy of the multivariate normal.
        Parameters
        ----------
        %(_doc_default_callparams)s
        Notes
        -----
        %(_doc_callparams_note)s
        Returns
        -------
        h : scalar
            Entropy of the multivariate normal distribution
        """
        dim, mean, cov = _process_parameters(None, mean, cov)
        return 1/2 * np.log(np.linalg.det(2 * np.pi * np.e * cov))
 multivariate_normal = multivariate_normal_gen()
 class multivariate_normal_frozen(object):
    def __init__(self, mean=None, cov=1):
        """
        Create a frozen multivariate normal distribution.
        Parameters
        ----------
        mean : array_like, optional
            Mean of the distribution (default zero)
        cov : array_like, optional
            Covariance matrix of the distribution (default one)
        Examples
        --------
        When called with the default parameters, this will create a 1D random
        variable with mean 0 and covariance 1:
        >>> from scipy.stats import multivariate_normal
        >>> r = multivariate_normal()
        >>> r.mean
        array([ 0.])
        >>> r.cov
        array([[1.]])
        """
        self.dim, self.mean, self.cov = _process_parameters(None, mean, cov)
        self.prec_U, self._log_det_cov = _psd_pinv_decomposed_log_pdet(self.cov)
        self._mnorm = multivariate_normal_gen()
    def logpdf(self, x):
        x = _process_quantiles(x, self.dim)
        out = self._mnorm._logpdf(x, self.mean, self.prec_U, self._log_det_cov)
        return _squeeze_output(out)
    def pdf(self, x):
        return np.exp(self.logpdf(x))
    def rvs(self, size=1):
        return self._mnorm.rvs(self.mean, self.cov, size)
    def entropy(self):
        """
        Computes the differential entropy of the multivariate normal.
        Returns
        -------
        h : scalar
            Entropy of the multivariate normal distribution
        """
        return 1/2 * (self.dim * (_LOG_2PI + 1) + self._log_det_cov)
 # Set frozen generator docstrings from corresponding docstrings in
 # multivariate_normal_gen and fill in default strings in class docstrings
 for name in ['logpdf', 'pdf', 'rvs']:
    method = multivariate_normal_gen.__dict__[name]
    method_frozen = multivariate_normal_frozen.__dict__[name]
    method_frozen.__doc__ = doccer.docformat(method.__doc__, docdict_noparams)
    method.__doc__ = doccer.docformat(method.__doc__, docdict_params)
--- a/pywafo/src/wafo/stats/_tukeylambda_stats.py
+++ b/pywafo/src/wafo/stats/_tukeylambda_stats.py
@ -0,0 +1,201 @@
 from __future__ import division, print_function, absolute_import
 import numpy as np
 from numpy import poly1d
 from scipy.special import beta
 # The following code was used to generate the Pade coefficients for the
 # Tukey Lambda variance function.  Version 0.17 of mpmath was used.
 #---------------------------------------------------------------------------
 # import mpmath as mp
 #
 # mp.mp.dps = 60
 #
 # one   = mp.mpf(1)
 # two   = mp.mpf(2)
 #
 # def mpvar(lam):
 #     if lam == 0:
 #         v = mp.pi**2 / three
 #     else:
 #         v = (two / lam**2) * (one / (one + two*lam) -
 #                               mp.beta(lam + one, lam + one))
 #     return v
 #
 # t = mp.taylor(mpvar, 0, 8)
 # p, q = mp.pade(t, 4, 4)
 # print "p =", [mp.fp.mpf(c) for c in p]
 # print "q =", [mp.fp.mpf(c) for c in q]
 #---------------------------------------------------------------------------
 # Pade coefficients for the Tukey Lambda variance function.
 _tukeylambda_var_pc = [3.289868133696453, 0.7306125098871127,
                       -0.5370742306855439, 0.17292046290190008,
                       -0.02371146284628187]
 _tukeylambda_var_qc = [1.0, 3.683605511659861, 4.184152498888124,
                       1.7660926747377275, 0.2643989311168465]
 # numpy.poly1d instances for the numerator and denominator of the
 # Pade approximation to the Tukey Lambda variance.
 _tukeylambda_var_p = poly1d(_tukeylambda_var_pc[::-1])
 _tukeylambda_var_q = poly1d(_tukeylambda_var_qc[::-1])
 def tukeylambda_variance(lam):
    """Variance of the Tukey Lambda distribution.
    Parameters
    ----------
    lam : array_like
        The lambda values at which to compute the variance.
    Returns
    -------
    v : ndarray
        The variance.  For lam < -0.5, the variance is not defined, so
        np.nan is returned.  For lam = 0.5, np.inf is returned.
    Notes
    -----
    In an interval around lambda=0, this function uses the [4,4] Pade
    approximation to compute the variance.  Otherwise it uses the standard
    formula (http://en.wikipedia.org/wiki/Tukey_lambda_distribution).  The
    Pade approximation is used because the standard formula has a removable
    discontinuity at lambda = 0, and does not produce accurate numerical
    results near lambda = 0.
    """
    lam = np.asarray(lam)
    shp = lam.shape
    lam = np.atleast_1d(lam).astype(np.float64)
    # For absolute values of lam less than threshold, use the Pade
    # approximation.
    threshold = 0.075
    # Play games with masks to implement the conditional evaluation of
    # the distribution.
    # lambda < -0.5:  var = nan
    low_mask = lam < -0.5
    # lambda == -0.5: var = inf
    neghalf_mask = lam == -0.5
    # abs(lambda) < threshold:  use Pade approximation
    small_mask = np.abs(lam) < threshold
    # else the "regular" case:  use the explicit formula.
    reg_mask = ~(low_mask | neghalf_mask | small_mask)
    # Get the 'lam' values for the cases where they are needed.
    small = lam[small_mask]
    reg = lam[reg_mask]
    # Compute the function for each case.
    v = np.empty_like(lam)
    v[low_mask] = np.nan
    v[neghalf_mask] = np.inf
    if small.size > 0:
        # Use the Pade approximation near lambda = 0.
        v[small_mask] = _tukeylambda_var_p(small) / _tukeylambda_var_q(small)
    if reg.size > 0:
        v[reg_mask] = (2.0 / reg**2) * (1.0 / (1.0 + 2 * reg) -
                                      beta(reg + 1, reg + 1))
    v.shape = shp
    return v
 # The following code was used to generate the Pade coefficients for the
 # Tukey Lambda kurtosis function.  Version 0.17 of mpmath was used.
 #---------------------------------------------------------------------------
 # import mpmath as mp
 #
 # mp.mp.dps = 60
 #
 # one   = mp.mpf(1)
 # two   = mp.mpf(2)
 # three = mp.mpf(3)
 # four  = mp.mpf(4)
 #
 # def mpkurt(lam):
 #     if lam == 0:
 #         k = mp.mpf(6)/5
 #     else:
 #         numer = (one/(four*lam+one) - four*mp.beta(three*lam+one, lam+one) +
 #                  three*mp.beta(two*lam+one, two*lam+one))
 #         denom = two*(one/(two*lam+one) - mp.beta(lam+one,lam+one))**2
 #         k = numer / denom - three
 #     return k
 #
 # # There is a bug in mpmath 0.17: when we use the 'method' keyword of the
 # # taylor function and we request a degree 9 Taylor polynomial, we actually
 # # get degree 8.
 # t = mp.taylor(mpkurt, 0, 9, method='quad', radius=0.01)
 # t = [mp.chop(c, tol=1e-15) for c in t]
 # p, q = mp.pade(t, 4, 4)
 # print "p =", [mp.fp.mpf(c) for c in p]
 # print "q =", [mp.fp.mpf(c) for c in q]
 #---------------------------------------------------------------------------
 # Pade coefficients for the Tukey Lambda kurtosis function.
 _tukeylambda_kurt_pc = [1.2, -5.853465139719495, -22.653447381131077,
                        0.20601184383406815, 4.59796302262789]
 _tukeylambda_kurt_qc = [1.0, 7.171149192233599, 12.96663094361842,
                        0.43075235247853005, -2.789746758009912]
 # numpy.poly1d instances for the numerator and denominator of the
 # Pade approximation to the Tukey Lambda kurtosis.
 _tukeylambda_kurt_p = poly1d(_tukeylambda_kurt_pc[::-1])
 _tukeylambda_kurt_q = poly1d(_tukeylambda_kurt_qc[::-1])
 def tukeylambda_kurtosis(lam):
    """Kurtosis of the Tukey Lambda distribution.
    Parameters
    ----------
    lam : array_like
        The lambda values at which to compute the variance.
    Returns
    -------
    v : ndarray
        The variance.  For lam < -0.25, the variance is not defined, so
        np.nan is returned.  For lam = 0.25, np.inf is returned.
    """
    lam = np.asarray(lam)
    shp = lam.shape
    lam = np.atleast_1d(lam).astype(np.float64)
    # For absolute values of lam less than threshold, use the Pade
    # approximation.
    threshold = 0.055
    # Use masks to implement the conditional evaluation of the kurtosis.
    # lambda < -0.25:  kurtosis = nan
    low_mask = lam < -0.25
    # lambda == -0.25: kurtosis = inf
    negqrtr_mask = lam == -0.25
    # lambda near 0:  use Pade approximation
    small_mask = np.abs(lam) < threshold
    # else the "regular" case:  use the explicit formula.
    reg_mask = ~(low_mask | negqrtr_mask | small_mask)
    # Get the 'lam' values for the cases where they are needed.
    small = lam[small_mask]
    reg = lam[reg_mask]
    # Compute the function for each case.
    k = np.empty_like(lam)
    k[low_mask] = np.nan
    k[negqrtr_mask] = np.inf
    if small.size > 0:
        k[small_mask] = _tukeylambda_kurt_p(small) / _tukeylambda_kurt_q(small)
    if reg.size > 0:
        numer = (1.0 / (4 * reg + 1) - 4 * beta(3 * reg + 1, reg + 1) +
                 3 * beta(2 * reg + 1, 2 * reg + 1))
        denom = 2 * (1.0/(2 * reg + 1) - beta(reg + 1, reg + 1))**2
        k[reg_mask] = numer / denom - 3
    # The return value will be a numpy array; resetting the shape ensures that
    # if `lam` was a scalar, the return value is a 0-d array.
    k.shape = shp
    return k
--- a/pywafo/src/wafo/stats/contingency.py
+++ b/pywafo/src/wafo/stats/contingency.py
@ -0,0 +1,271 @@
 """Some functions for working with contingency tables (i.e. cross tabulations).
 """
 from __future__ import division, print_function, absolute_import
 from functools import reduce
 import numpy as np
 from .stats import power_divergence
 __all__ = ['margins', 'expected_freq', 'chi2_contingency']
 def margins(a):
    """Return a list of the marginal sums of the array `a`.
    Parameters
    ----------
    a : ndarray
        The array for which to compute the marginal sums.
    Returns
    -------
    margsums : list of ndarrays
        A list of length `a.ndim`.  `margsums[k]` is the result
        of summing `a` over all axes except `k`; it has the same
        number of dimensions as `a`, but the length of each axis
        except axis `k` will be 1.
    Examples
    --------
    >>> a = np.arange(12).reshape(2, 6)
    >>> a
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11]])
    >>> m0, m1 = margins(a)
    >>> m0
    array([[15],
           [51]])
    >>> m1
    array([[ 6,  8, 10, 12, 14, 16]])
    >>> b = np.arange(24).reshape(2,3,4)
    >>> m0, m1, m2 = margins(b)
    >>> m0
    array([[[ 66]],
           [[210]]])
    >>> m1
    array([[[ 60],
            [ 92],
            [124]]])
    >>> m2
    array([[[60, 66, 72, 78]]])
    """
    margsums = []
    ranged = list(range(a.ndim))
    for k in ranged:
        marg = np.apply_over_axes(np.sum, a, [j for j in ranged if j != k])
        margsums.append(marg)
    return margsums
 def expected_freq(observed):
    """
    Compute the expected frequencies from a contingency table.
    Given an n-dimensional contingency table of observed frequencies,
    compute the expected frequencies for the table based on the marginal
    sums under the assumption that the groups associated with each
    dimension are independent.
    Parameters
    ----------
    observed : array_like
        The table of observed frequencies.  (While this function can handle
        a 1-D array, that case is trivial.  Generally `observed` is at
        least 2-D.)
    Returns
    -------
    expected : ndarray of float64
        The expected frequencies, based on the marginal sums of the table.
        Same shape as `observed`.
    Examples
    --------
    >>> observed = np.array([[10, 10, 20],[20, 20, 20]])
    >>> expected_freq(observed)
    array([[ 12.,  12.,  16.],
           [ 18.,  18.,  24.]])
    """
    # Typically `observed` is an integer array. If `observed` has a large
    # number of dimensions or holds large values, some of the following
    # computations may overflow, so we first switch to floating point.
    observed = np.asarray(observed, dtype=np.float64)
    # Create a list of the marginal sums.
    margsums = margins(observed)
    # Create the array of expected frequencies.  The shapes of the
    # marginal sums returned by apply_over_axes() are just what we
    # need for broadcasting in the following product.
    d = observed.ndim
    expected = reduce(np.multiply, margsums) / observed.sum() ** (d - 1)
    return expected
 def chi2_contingency(observed, correction=True, lambda_=None):
    """Chi-square test of independence of variables in a contingency table.
    This function computes the chi-square statistic and p-value for the
    hypothesis test of independence of the observed frequencies in the
    contingency table [1]_ `observed`.  The expected frequencies are computed
    based on the marginal sums under the assumption of independence; see
    `scipy.stats.contingency.expected_freq`.  The number of degrees of
    freedom is (expressed using numpy functions and attributes)::
        dof = observed.size - sum(observed.shape) + observed.ndim - 1
    Parameters
    ----------
    observed : array_like
        The contingency table. The table contains the observed frequencies
        (i.e. number of occurrences) in each category.  In the two-dimensional
        case, the table is often described as an "R x C table".
    correction : bool, optional
        If True, *and* the degrees of freedom is 1, apply Yates' correction
        for continuity.  The effect of the correction is to adjust each
        observed value by 0.5 towards the corresponding expected value.
    lambda_ : float or str, optional.
        By default, the statistic computed in this test is Pearson's
        chi-squared statistic [2]_.  `lambda_` allows a statistic from the
        Cressie-Read power divergence family [3]_ to be used instead.  See
        `power_divergence` for details.
    Returns
    -------
    chi2 : float
        The test statistic.
    p : float
        The p-value of the test
    dof : int
        Degrees of freedom
    expected : ndarray, same shape as `observed`
        The expected frequencies, based on the marginal sums of the table.
    See Also
    --------
    contingency.expected_freq
    fisher_exact
    chisquare
    power_divergence
    Notes
    -----
    An often quoted guideline for the validity of this calculation is that
    the test should be used only if the observed and expected frequency in
    each cell is at least 5.
    This is a test for the independence of different categories of a
    population. The test is only meaningful when the dimension of
    `observed` is two or more.  Applying the test to a one-dimensional
    table will always result in `expected` equal to `observed` and a
    chi-square statistic equal to 0.
    This function does not handle masked arrays, because the calculation
    does not make sense with missing values.
    Like stats.chisquare, this function computes a chi-square statistic;
    the convenience this function provides is to figure out the expected
    frequencies and degrees of freedom from the given contingency table.
    If these were already known, and if the Yates' correction was not
    required, one could use stats.chisquare.  That is, if one calls::
        chi2, p, dof, ex = chi2_contingency(obs, correction=False)
    then the following is true::
        (chi2, p) == stats.chisquare(obs.ravel(), f_exp=ex.ravel(),
                                     ddof=obs.size - 1 - dof)
    The `lambda_` argument was added in version 0.13.0 of scipy.
    References
    ----------
    .. [1] "Contingency table", http://en.wikipedia.org/wiki/Contingency_table
    .. [2] "Pearson's chi-squared test",
           http://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test
    .. [3] Cressie, N. and Read, T. R. C., "Multinomial Goodness-of-Fit
           Tests", J. Royal Stat. Soc. Series B, Vol. 46, No. 3 (1984),
           pp. 440-464.
    Examples
    --------
    A two-way example (2 x 3):
    >>> obs = np.array([[10, 10, 20], [20, 20, 20]])
    >>> chi2_contingency(obs)
    (2.7777777777777777,
     0.24935220877729619,
     2,
     array([[ 12.,  12.,  16.],
            [ 18.,  18.,  24.]]))
    Perform the test using the log-likelihood ratio (i.e. the "G-test")
    instead of Pearson's chi-squared statistic.
    >>> g, p, dof, expctd = chi2_contingency(obs, lambda_="log-likelihood")
    >>> g, p
    (2.7688587616781319, 0.25046668010954165)
    A four-way example (2 x 2 x 2 x 2):
    >>> obs = np.array(
    ...     [[[[12, 17],
    ...        [11, 16]],
    ...       [[11, 12],
    ...        [15, 16]]],
    ...      [[[23, 15],
    ...        [30, 22]],
    ...       [[14, 17],
    ...        [15, 16]]]])
    >>> chi2_contingency(obs)
    (8.7584514426741897,
     0.64417725029295503,
     11,
     array([[[[ 14.15462386,  14.15462386],
              [ 16.49423111,  16.49423111]],
             [[ 11.2461395 ,  11.2461395 ],
              [ 13.10500554,  13.10500554]]],
            [[[ 19.5591166 ,  19.5591166 ],
              [ 22.79202844,  22.79202844]],
             [[ 15.54012004,  15.54012004],
              [ 18.10873492,  18.10873492]]]]))
    """
    observed = np.asarray(observed)
    if np.any(observed < 0):
        raise ValueError("All values in `observed` must be nonnegative.")
    if observed.size == 0:
        raise ValueError("No data; `observed` has size 0.")
    expected = expected_freq(observed)
    if np.any(expected == 0):
        # Include one of the positions where expected is zero in
        # the exception message.
        zeropos = list(np.where(expected == 0)[0])
        raise ValueError("The internally computed table of expected "
                         "frequencies has a zero element at %s." % zeropos)
    # The degrees of freedom
    dof = expected.size - sum(expected.shape) + expected.ndim - 1
    if dof == 0:
        # Degenerate case; this occurs when `observed` is 1D (or, more
        # generally, when it has only one nontrivial dimension).  In this
        # case, we also have observed == expected, so chi2 is 0.
        chi2 = 0.0
        p = 1.0
    else:
        if dof == 1 and correction:
            # Adjust `observed` according to Yates' correction for continuity.
            observed = observed + 0.5 * np.sign(expected - observed)
        chi2, p = power_divergence(observed, expected,
                                   ddof=observed.size - 1 - dof, axis=None,
                                   lambda_=lambda_)
    return chi2, p, dof, expected
--- a/pywafo/src/wafo/stats/core.py
+++ b/pywafo/src/wafo/stats/core.py
@ -6,21 +6,25 @@ from scipy import special
 import numpy as np
 from numpy import inf
 from numpy import atleast_1d, nan, ndarray, sqrt, vstack, ones, where, zeros
-from numpy import arange, floor, linspace, asarray #, reshape, repeat, product
+# , reshape, repeat, product
 from numpy import arange, floor, linspace, asarray
 from time import gmtime, strftime
-__all__ = ['edf', 'edfcnd','reslife', 'dispersion_idx','decluster','findpot', 
+__all__ = [
    'edf', 'edfcnd', 'reslife', 'dispersion_idx', 'decluster', 'findpot',
           'declustering_time', 'interexceedance_times', 'extremal_idx']
 arr = asarray
 def now():
    '''
    Return current date and time as a string
    '''
    return strftime("%a, %d %b %Y %H:%M:%S", gmtime())
 def valarray(shape, value=nan, typecode=None):
    """Return an array of all value.
    """
@ -31,21 +35,36 @@ def valarray(shape, value=nan, typecode=None):
    if not isinstance(out, ndarray):
        out = arr(out)
    return out
 def _cdff(self, x, dfn, dfd):
    return special.fdtr(dfn, dfd, x)
 def _cdft(x, df):
    return special.stdtr(df, x)
 def _invt(q, df):
    return special.stdtrit(df, q)
 def _cdfchi2(x, df):
    return special.chdtr(df, x)
 def _invchi2(q, df):
    return special.chdtri(df, q)
 def _cdfnorm(x):
    return special.ndtr(x)
 def _invnorm(q):
    return special.ndtri(q)
 def edf(x, method=2):
    '''
    Returns Empirical Distribution Function (EDF).
@ -84,6 +103,7 @@ def edf(x, method=2):
    F.setplotter('step')
    return F
 def edfcnd(x, c=None, method=2):
    '''
    Returns empirical Distribution Function CoNDitioned that X>=c (EDFCND).
@ -124,7 +144,8 @@ def edfcnd(x, c=None, method=2):
    return F
-def reslife(data, u=None, umin=None, umax=None, nu=None, nmin=3, alpha=0.05, plotflag=False):
+def reslife(data, u=None, umin=None, umax=None, nu=None, nmin=3, alpha=0.05,
            plotflag=False):
    '''
    Return Mean Residual Life, i.e., mean excesses vs thresholds
@ -135,7 +156,8 @@ def reslife(data, u=None, umin=None, umax=None, nu=None, nmin=3, alpha=0.05, plo
    u :  array-like
        threshold values (default linspace(umin, umax, nu))
    umin, umax : real scalars
-        Minimum and maximum threshold, respectively (default min(data), max(data)).
+        Minimum and maximum threshold, respectively
        (default min(data), max(data)).
    nu : scalar integer
        number of threshold values (default min(N-nmin,100))
    nmin : scalar integer
@ -144,7 +166,6 @@ def reslife(data, u=None, umin=None, umax=None, nu=None, nmin=3, alpha=0.05, plo
        Confidence coefficient (default 0.05)
    plotflag: bool
    Returns
    -------
    mrl : PlotData object
@ -193,7 +214,6 @@ def reslife(data, u=None, umin=None, umax=None, nu=None, nmin=3, alpha=0.05, plo
        u = linspace(umin, umax, nu)
    nu = len(u)
    #mrl1 = valarray(nu)
@ -217,15 +237,20 @@ def reslife(data, u=None, umin=None, umax=None, nu=None, nmin=3, alpha=0.05, plo
    #options.CI = [mrll,mrlu];
    #options.numdata = num;
    titleTxt = 'Mean residual life with %d%s CI' % (100 * p, '%')
-    res = PlotData(mrl, u, xlab='Threshold', ylab='Mean Excess', title=titleTxt)
+    res = PlotData(mrl, u, xlab='Threshold',
-    res.workspace = dict(numdata=num, umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha)
+                   ylab='Mean Excess', title=titleTxt)
-    res.children = [PlotData(vstack([mrll, mrlu]).T, u, xlab='Threshold', title=titleTxt)]
+    res.workspace = dict(
        numdata=num, umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha)
    res.children = [
        PlotData(vstack([mrll, mrlu]).T, u, xlab='Threshold', title=titleTxt)]
    res.plot_args_children = [':r']
    if plotflag:
        res.plot()
    return res
-def dispersion_idx(data, t=None, u=None, umin=None, umax=None, nu=None, nmin=10, tb=1,
+
 def dispersion_idx(
    data, t=None, u=None, umin=None, umax=None, nu=None, nmin=10, tb=1,
        alpha=0.05, plotflag=False):
    '''Return Dispersion Index vs threshold
@ -236,7 +261,8 @@ def dispersion_idx(data, t=None, u=None, umin=None, umax=None, nu=None, nmin=10,
    u :  array-like
        threshold values (default linspace(umin, umax, nu))
    umin, umax : real scalars
-        Minimum and maximum threshold, respectively (default min(data), max(data)).
+        Minimum and maximum threshold, respectively
        (default min(data), max(data)).
    nu : scalar integer
        number of threshold values (default min(N-nmin,100))
    nmin : scalar integer
@ -255,8 +281,9 @@ def dispersion_idx(data, t=None, u=None, umin=None, umax=None, nu=None, nmin=10,
        threshold where the number of exceedances in a fixed period (Tb) is
        consistent with a Poisson process.
    ok_u : array-like
-        all thresholds where the number of exceedances in a fixed period (Tb) is
+        all thresholds where the number of exceedances in a fixed period (Tb)
-        consistent with a Poisson process.
+        is consistent with a Poisson process.
    Notes
    ------
    DISPRSNIDX estimate the Dispersion Index (DI) as function of threshold.
@ -292,7 +319,6 @@ def dispersion_idx(data, t=None, u=None, umin=None, umax=None, nu=None, nmin=10,
    fitgenparrange,
    extremal_idx
    References
    ----------
    Ribatet, M. A.,(2006),
@ -305,18 +331,6 @@ def dispersion_idx(data, t=None, u=None, umin=None, umax=None, nu=None, nmin=10,
         :489--494.}
    '''
 # This program is free software; you can redistribute it and/or modify it under the terms of the GNU
 # General Public License as published by the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 # This program is distributed in the hope that it will be useful, but without any warranty; without even
 # the implied warranty of merchantability or fitness for a particular purpose. See the GNU General Public
 # License for moredetails.
 # The GNU General Public License can be obtained from http://www.gnu.org/copyleft/gpl.html. You
 # can also obtain it by writing to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 #  MA 02111-1307, USA.
    n = len(data)
    if t is None:
        ti = arange(n)
@ -326,11 +340,9 @@ def dispersion_idx(data, t=None, u=None, umin=None, umax=None, nu=None, nmin=10,
    t1 = np.empty(ti.shape, dtype=int)
    t1[:] = np.floor(ti / tb)
    if u is None:
        sd = np.sort(data)
        nmin = max(nmin, 0)
        if 2 * nmin > n:
            warnings.warn('nmin possibly too large!')
@ -344,8 +356,6 @@ def dispersion_idx(data, t=None, u=None, umin=None, umax=None, nu=None, nmin=10,
        u = linspace(umin, umax, nu)
    nu = len(u)
    di = np.zeros(nu)
@ -354,8 +364,8 @@ def dispersion_idx(data, t=None, u=None, umin=None, umax=None, nu=None, nmin=10,
    mint = int(min(t1))  # ; % mint should be 0.
    maxt = int(max(t1))
-    M = maxt - mint + 1;
+    M = maxt - mint + 1
-    occ = np.zeros(M);
+    occ = np.zeros(M)
    for ix, tresh in enumerate(u.tolist()):
        excess = (d > tresh)
@ -381,17 +391,21 @@ def dispersion_idx(data, t=None, u=None, umin=None, umax=None, nu=None, nmin=10,
        b_u = ok_u = None
    CItxt = '%d%s CI' % (100 * p, '%')
-    titleTxt = 'Dispersion Index plot';
+    titleTxt = 'Dispersion Index plot'
-    res = PlotData(di, u, title=titleTxt, labx='Threshold', laby='Dispersion Index')
+    res = PlotData(di, u, title=titleTxt,
                   labx='Threshold', laby='Dispersion Index')
        #'caption',CItxt);
    res.workspace = dict(umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha)
-    res.children = [PlotData(vstack([diLo * ones(nu), diUp * ones(nu)]).T, u, xlab='Threshold', title=CItxt)]
+    res.children = [
        PlotData(vstack([diLo * ones(nu), diUp * ones(nu)]).T, u,
                 xlab='Threshold', title=CItxt)]
    res.plot_args_children = ['--r']
    if plotflag:
        res.plot(di)
    return res, b_u, ok_u
 def decluster(data, t=None, thresh=None, tmin=1):
    '''
    Return declustered peaks over threshold values
@ -408,8 +422,8 @@ def decluster(data, t=None, thresh=None, tmin=1):
    Returns
    -------
    ev, te : ndarray
-        extreme values and its corresponding sampling times, respectively, i.e., 
+        extreme values and its corresponding sampling times, respectively,
-        all data > thresh which are at least tmin distance apart.
+        i.e., all data > thresh which are at least tmin distance apart.
    Example
    -------
@ -434,6 +448,7 @@ def decluster(data, t=None, thresh=None, tmin=1):
    i = findpot(data, t, thresh, tmin)
    return data[i], t[i]
 def findpot(data, t=None, thresh=None, tmin=1):
    '''
    Retrun indices to Peaks over threshold values
@ -468,7 +483,9 @@ def findpot(data, t=None, thresh=None, tmin=1):
    >>> yp, tp = data[I], t[I]
    >>> Ie = findpot(yp, tp, ymin,tmin)
    >>> ye, te = yp[Ie], tp[Ie]
-    >>> h = pylab.plot(t,data,ttc,ytc,'ro',t,zeros(len(t)),':',te, ye,'k.',tp,yp,'+')
+    >>> h = pylab.plot(t,data,ttc,ytc,'ro',
    ...                t,zeros(len(t)),':',
    ...                te, ye,'k.',tp,yp,'+')
    See also
    --------
@ -480,14 +497,14 @@ def findpot(data, t=None, thresh=None, tmin=1):
    else:
        ti = arr(t)
-    Ie, = where(Data > thresh);
+    Ie, = where(Data > thresh)
    Ye = Data[Ie]
    Te = ti[Ie]
    if len(Ye) <= 1:
        return Ie
    dT = np.diff(Te)
-    notSorted = np.any(dT < 0);
+    notSorted = np.any(dT < 0)
    if notSorted:
        I = np.argsort(Te)
        Te = Te[I]
@ -498,7 +515,8 @@ def findpot(data, t=None, thresh=None, tmin=1):
    isTooSmall = (dT <= tmin)
    if np.any(isTooSmall):
-        isTooClose = np.hstack((isTooSmall[0], isTooSmall[:-1] | isTooSmall[1:], isTooSmall[-1]))
+        isTooClose = np.hstack(
            (isTooSmall[0], isTooSmall[:-1] | isTooSmall[1:], isTooSmall[-1]))
        # Find opening (NO) and closing (NC) index for data beeing to close:
        iy = findextrema(np.hstack([0, 0, isTooSmall, 0]))
@ -547,6 +565,7 @@ def _find_ok_peaks(Ye, Te, Tmin):
    iOK, = where(1 - isTooClose[oOrder])
    return iOK
 def declustering_time(t):
    '''
    Returns minimum distance between clusters.
@ -566,11 +585,10 @@ def declustering_time(t):
    >>> import wafo.data
    >>> x  = wafo.data.sea()
    >>> t, data = x[:400,:].T
-    >>> Ie = findpot(data,t,0,5);
+    >>> Ie = findpot(data,t,0,5)
    >>> tc = declustering_time(Ie)
    >>> tc
    21
    '''
    t0 = arr(t)
    nt = len(t0)
@ -609,6 +627,7 @@ def interexceedance_times(t):
    '''
    return np.diff(np.sort(t))
 def extremal_idx(ti):
    '''
    Returns Extremal Index measuring the dependence of data
@ -649,7 +668,8 @@ def extremal_idx(ti):
    ---------
    Christopher A. T. Ferro, Johan Segers (2003)
    Inference for clusters of extreme values
-    Journal of the Royal Statistical society: Series B (Statistical Methodology) 54 (2), 545-556
+    Journal of the Royal Statistical society: Series B
    (Statistical Methodology) 54 (2), 545-556
    doi:10.1111/1467-9868.00401
    '''
    t = arr(ti)
@ -662,12 +682,17 @@ def extremal_idx(ti):
        ei = min(1, 2 * np.mean(t - 1) ** 2 / np.mean((t - 1) * (t - 2)))
    return ei
 def _logit(p):
    return np.log(p) - np.log1p(-p)
 def _logitinv(x):
    return 1.0 / (np.exp(-x) + 1)
 class RegLogit(object):
    '''
    REGLOGIT Fit ordinal logistic regression model.
@ -691,15 +716,11 @@ class RegLogit(object):
                          otherwise no action
          .deletecolinear : If true delete colinear covarites (default)
     Methods
      .predict   : Predict from a fitted LOGIT object
      .summary   : Display summary of fitted LOGIT object.
      .compare   : Compare small LOGIT versus large one
     Suppose Y takes values in K ordered categories, and let
     gamma_i (x) be the cumulative probability that Y
     falls in one of the first i categories given the covariate
@ -750,7 +771,8 @@ class RegLogit(object):
    #% Uses the auxiliary functions logistic_regression_derivatives and
    #% logistic_regression_likelihood.
-    def __init__(self, maxiter=500,accuracy=1e-6, alpha=0.05, deletecolinear=True, verbose=False):
+    def __init__(self, maxiter=500, accuracy=1e-6, alpha=0.05,
                 deletecolinear=True, verbose=False):
        self.maxiter = maxiter
        self.accuracy = accuracy
@ -808,13 +830,15 @@ class RegLogit(object):
                X = X[:, ix]
                txt = [' %d,' % i for i in iy]
                #txt[-1] = ' %d' % iy[-1]
-                warnings.warn('Covariate matrix is singular. Removing column(s):%s' % txt)
+                warnings.warn(
                    'Covariate matrix is singular. Removing column(s):%s' %
                    txt)
        mx = X.shape[0]
        if (mx != my):
-            raise ValueError('x and y must have the same number of observations');
+            raise ValueError(
                'x and y must have the same number of observations')
        return y, X
    def fit(self, y, X=None, theta0=None, beta0=None):
        '''
        Member variables
@ -829,16 +853,16 @@ class RegLogit(object):
      .eta          : linear predictor for the model.
      .residual     : residual for the model (Y-E(Y|X)).
      .dispersnfit  : The estimated error variance
-      .deviance     : deviance for the model equal minus twice the log-likelihood.
+      .deviance     : deviance for the model equal minus twice the
                      log-likelihood.
      .d2L          : Hessian matrix (double derivative of log-likelihood)
      .dL           : First derivative of loglikelihood w.r.t. THETA and BETA.
        '''
-        self.family = 'multinomial';
+        self.family = 'multinomial'
-        self.link = 'logit';
+        self.link = 'logit'
        y, X = self.check_xy(y, X)
        # initial calculations
        tol = self.accuracy
        incr = 10
@ -847,8 +871,9 @@ class RegLogit(object):
        ymax = y.max()
        yrange = ymax - ymin
        z = (y * ones((1, yrange))) == ((y * 0 + 1) * np.arange(ymin, ymax))
-        z1 = (y * ones((1, yrange))) == ((y * 0 + 1) * np.arange(ymin + 1, ymax+1))
+        z1 = (y * ones((1, yrange))) == (
-        z  = z[:, np.flatnonzero(z.any(axis=0))];
+            (y * 0 + 1) * np.arange(ymin + 1, ymax + 1))
        z = z[:, np.flatnonzero(z.any(axis=0))]
        z1 = z1[:, np.flatnonzero(z1.any(axis=0))]
        [_mz, nz] = z.shape
        [_mx, nx] = X.shape
@ -869,7 +894,7 @@ class RegLogit(object):
        # likelihood and derivatives at starting values
        [dev, dl, d2l] = self.loglike(tb, y, X, z, z1)
-        epsilon = np.std(d2l) / 1000;
+        epsilon = np.std(d2l) / 1000
        if np.any(beta0) or np.any(theta00 != theta0):
            tb0 = np.vstack((theta00, beta00))
            nulldev = self.loglike(tb0, y, X, z, z1)[0]
@ -879,20 +904,21 @@ class RegLogit(object):
        # maximize likelihood using Levenberg modified Newton's method
        for i in range(self.maxiter + 1):
-            tbold = tb;
+            tbold = tb
-            devold = dev;
+            devold = dev
            tb = tbold - np.linalg.lstsq(d2l, dl)[0]
            [dev, dl, d2l] = self.loglike(tb, y, X, z, z1)
            if ((dev - devold) / np.dot(dl, tb - tbold) < 0):
                epsilon = epsilon / decr
            else:
                while ((dev - devold) / np.dot(dl, tb - tbold) > 0):
-                    epsilon = epsilon * incr;
+                    epsilon = epsilon * incr
                    if (epsilon > 1e+15):
-                        raise ValueError('epsilon too large');
+                        raise ValueError('epsilon too large')
-                    tb = tbold - np.linalg.lstsq(d2l - epsilon * np.eye(d2l.shape), dl)
+                    tb = tbold - \
-                    [dev, dl, d2l] = self.loglike(tb, y, X, z, z1);
+                        np.linalg.lstsq(d2l - epsilon * np.eye(d2l.shape), dl)
                    [dev, dl, d2l] = self.loglike(tb, y, X, z, z1)
                    print('epsilon %g' % epsilon)
                    # end %while
                    # end else
@ -900,13 +926,14 @@ class RegLogit(object):
            if (self.verbose > 1):
                print('Iter: %d,  Deviance: %8.6f', iter, dev)
-                print('First derivative'); 
+                print('First derivative')
-                print(dl);
+                print(dl)
-                print('Eigenvalues of second derivative'); 
+                print('Eigenvalues of second derivative')
-                print(np.linalg.eig(d2l)[0].T);
+                print(np.linalg.eig(d2l)[0].T)
                # end
                # end
-            stop = np.abs(np.dot(dl, np.linalg.lstsq(d2l, dl)[0]) / len(dl)) <= tol 
+            stop = np.abs(
                np.dot(dl, np.linalg.lstsq(d2l, dl)[0]) / len(dl)) <= tol
            if stop:
                break
            # end %while
@ -918,73 +945,76 @@ class RegLogit(object):
        pcov = np.linalg.pinv(-d2l)
        se = sqrt(np.diag(pcov))
        if (nx > 0):
            eta = ((X * beta) * ones((1, nz))) + ((y * 0 + 1) * theta)
        else:
-            eta = (y * 0 + 1) * theta;
+            eta = (y * 0 + 1) * theta
            # end
-        gammai = np.diff(np.hstack(((y * 0), _logitinv(eta), (y * 0 + 1))),n=1,axis=1)
+        gammai = np.diff(
            np.hstack(((y * 0), _logitinv(eta), (y * 0 + 1))), n=1, axis=1)
        k0 = min(y)
        mu = (k0 - 1) + np.dot(gammai, np.arange(1, nz + 2)).reshape(-1, 1)
        r = np.corrcoef(np.hstack((y, mu)).T)
-        R2 = r[0,1]**2; #coefficient of determination
+        R2 = r[0, 1] ** 2
-        R2adj = max(1 - (1-R2)* (my-1)/(my-nx-nz-1),0); # adjusted coefficient of determination
+        # coefficient of determination
        # adjusted coefficient of determination
        R2adj = max(1 - (1 - R2) * (my - 1) / (my - nx - nz - 1), 0)
        res = y - mu
        if nz == 1:
-            self.family = 'binomial';
+            self.family = 'binomial'
        else:
-            self.family = 'multinomial';
+            self.family = 'multinomial'
-        self.link   = 'logit';
+        self.link = 'logit'
        self.numvar = nx + nz
        self.numobs = my
        self.numk = nz + 1
        self.df = max(my - nx - nz, 0)
-        self.df_null = my-nz; #nulldf;  nulldf =  n - nz;
+        self.df_null = my - nz
        # nulldf;  nulldf =  n - nz
        self.params = tb[:(nz + nx)]
        self.params_ci = 1
        self.params_std = se
        self.params_cov = pcov
        self.params_tstat = (self.params / self.params_std)
-        if False: # % options.estdispersn %dispersion_parameter=='mean_deviance'
+        # % options.estdispersn %dispersion_parameter=='mean_deviance'
        if False:
            self.params_pvalue = 2. * _cdft(-abs(self.params_tstat), self.df)
-            bcrit = -se*_invt(self.alpha/2,self.df);
+            bcrit = -se * _invt(self.alpha / 2, self.df)
        else:
            self.params_pvalue = 2. * _cdfnorm(-abs(self.params_tstat))
-            bcrit = -se*_invnorm(self.alpha/2);
+            bcrit = -se * _invnorm(self.alpha / 2)
        # end
        self.params_ci = np.vstack((self.params + bcrit, self.params - bcrit))
-        self.mu = gammai;
+        self.mu = gammai
-        self.eta = _logit(gammai);
+        self.eta = _logit(gammai)
-        self.X = X;
+        self.X = X
        [dev, dl, d2l, p] = self.loglike(tb, y, X, z, z1, numout=4)
-        self.theta = theta;
+        self.theta = theta
-        self.beta  = beta;
+        self.beta = beta
-        self.gamma = gammai;
+        self.gamma = gammai
-        self.residual  = res.T; 
+        self.residual = res.T
-        self.residualD = np.sign(self.residual)*sqrt(-2*np.log(p));
+        self.residualD = np.sign(self.residual) * sqrt(-2 * np.log(p))
-        self.deviance  = dev;
+        self.deviance = dev
-        self.deviance_null = nulldev;
+        self.deviance_null = nulldev
-        self.d2L = d2l;
+        self.d2L = d2l
-        self.dL = dl.T;
+        self.dL = dl.T
-        self.dispersionfit=1;
+        self.dispersionfit = 1
-        self.dispersion = 1;
+        self.dispersion = 1
-        self.R2 = R2;
+        self.R2 = R2
-        self.R2adj = R2adj;
+        self.R2adj = R2adj
        self.numiter = i
-        self.converged = i<self.maxiter;
+        self.converged = i < self.maxiter
-        self.note = '';
+        self.note = ''
        self.date = now()
        if (self.verbose):
            self.summary()
    def compare(self, object2):
        ''' Compare  small LOGIT versus large one
@ -1000,52 +1030,51 @@ class RegLogit(object):
        try:
            if self.numvar > object2.numvar:
-                devL = self.deviance;
+                devL = self.deviance
-                nL   = self.numvar;
+                nL = self.numvar
-                dfL = self.df;
+                dfL = self.df
-                Al = self.X;
+                Al = self.X
-                disprsn = self.dispersionfit;
+                disprsn = self.dispersionfit
-                devs = object2.deviance;
+                devs = object2.deviance
-                ns   = object2.numvar;
+                ns = object2.numvar
-                dfs = object2.df;
+                dfs = object2.df
-                As = object2.X;
+                As = object2.X
            else:
-                devL = object2.deviance;
+                devL = object2.deviance
-                nL   = object2.numvar;
+                nL = object2.numvar
-                dfL = object2.df;
+                dfL = object2.df
-                Al = object2.X;
+                Al = object2.X
-                disprsn = object2.dispersionfit;
+                disprsn = object2.dispersionfit
-                devs = self.deviance;
+                devs = self.deviance
-                ns   = self.numvar;
+                ns = self.numvar
-                dfs = self.df;
+                dfs = self.df
-                As = self.X;
+                As = self.X
            # end
            if (((As - np.dot(Al * np.linalg.lstsq(Al, As))) > 500 * np.finfo(float).eps).any() or
                    object2.family != self.family or object2.link != self.link):
-                warnings.warn('Small model not included in large model, result is rubbish!')
+                warnings.warn('Small model not included in large model,' +
-            
+                    ' result is rubbish!')
        except:
            raise ValueError('Apparently not a valid regression object')
-        
+        pmq = np.abs(nL - ns)
        pmq = np.abs(nL-ns);
        print(' ')
        print('                       Analysis of Deviance')
-        if False: # %options.estdispersn   
+        if False:  # options.estdispersn
-            localstat = abs(devL-devs)/disprsn/pmq;
+            localstat = abs(devL - devs) / disprsn / pmq
 #            localpvalue = 1-cdff(localstat, pmq, dfL)
 #            print('Model    DF      Residual deviance      F-stat     Pr(>F)')
        else:
-            localstat = abs(devL-devs)/disprsn;
+            localstat = abs(devL - devs) / disprsn
            localpvalue = 1 - _cdfchi2(localstat, pmq)
-            print('Model    DF      Residual deviance      Chi2-stat        Pr(>Chi2)')
+            print('Model    DF      Residual deviance      Chi2-stat  ' +
                  '      Pr(>Chi2)')
        # end
-        
+        print('Small    %d       %12.4f       %12.4f    %12.4f' %
-        print('Small    %d       %12.4f       %12.4f    %12.4f' % (dfs,devs,localstat,localpvalue))
+              (dfs, devs, localstat, localpvalue))
        print('Full     %d       %12.4f' % (dfL, devL))
        print(' ')
@ -1055,54 +1084,72 @@ class RegLogit(object):
        print(' ')
        print('                       Analysis of Deviance')
        if False:  # %options.estdispersn
-            localstat = abs(self.deviance_null-self.deviance)/self.dispersionfit/(self.numvar-1);
+            localstat = abs(self.deviance_null - self.deviance) / \
-            localpvalue = 1-_cdff(localstat,self.numvar-1,self.df);
+                self.dispersionfit / (self.numvar - 1)
-            print('Model    DF      Residual deviance      F-stat        Pr(>F)')
+            localpvalue = 1 - _cdff(localstat, self.numvar - 1, self.df)
            print(
                'Model    DF      Residual deviance      F-stat        Pr(>F)')
        else:
-            localstat = abs(self.deviance_null-self.deviance)/self.dispersionfit;
+            localstat = abs(
-            localpvalue = 1-_cdfchi2(localstat,self.numvar-1);
+                self.deviance_null - self.deviance) / self.dispersionfit
-            print('Model    DF      Residual deviance      Chi2-stat        Pr(>Chi2)')
+            localpvalue = 1 - _cdfchi2(localstat, self.numvar - 1)
            print('Model    DF      Residual deviance      Chi2-stat' +
                  '        Pr(>Chi2)')
        # end
-       
+        print('Null     %d       %12.4f       %12.4f    %12.4f' %
-        print('Null     %d       %12.4f       %12.4f    %12.4f' % (self.df_null,self.deviance_null,localstat,localpvalue))
+              (self.df_null, self.deviance_null, localstat, localpvalue))
        print('Full     %d       %12.4f' % (self.df, self.deviance))
        print(' ')
        print(' R2 =  %2.4f,     R2adj = %2.4f' % (self.R2, self.R2adj))
        print(' ')
        return localpvalue
    def summary(self):
-        txtlink = self.link;
+        txtlink = self.link
        print('Call:')
-        print('reglogit(formula = %s(Pr(grp(y)<=i)) ~ theta_i+beta*x, family = %s)' %(txtlink,self.family))
+        print('reglogit(formula = %s(Pr(grp(y)<=i)) ~ theta_i+beta*x, family = %s)' %
            (txtlink, self.family))
        print(' ')
        print('Deviance Residuals:')
-        m,q1,me, q3,M = np.percentile(self.residualD,q=[0, 25, 50, 75, 100])
+        m, q1, me, q3, M = np.percentile(
            self.residualD, q=[0, 25, 50, 75, 100])
        print('    Min       1Q         Median       3Q        Max  ')
-        print('%2.4f     %2.4f     %2.4f     %2.4f     %2.4f' % (m, q1, me, q3, M))
+        print('%2.4f     %2.4f     %2.4f     %2.4f     %2.4f' %
              (m, q1, me, q3, M))
        print(' ')
        print(' Coefficients:')
        if False:  # %options.estdispersn
-            print('            Estimate      Std. Error     t value       Pr(>|t|)')
+            print(
            '            Estimate      Std. Error     t value       Pr(>|t|)')
        else:
-            print('            Estimate      Std. Error     z value       Pr(>|z|)')
+            print(
            '            Estimate      Std. Error     z value       Pr(>|z|)')
        # end
-        e, s, z, p = self.params, self.params_std, self.params_tstat, self.params_pvalue
+        e, s, z, p = (self.params, self.params_std, self.params_tstat,
                      self.params_pvalue)
        for i in range(self.numk):
-            print('theta_%d         %2.4f        %2.4f        %2.4f        %2.4f' % (i,e[i],s[i], z[i], p[i]))
+            print(
                'theta_%d         %2.4f        %2.4f        %2.4f        %2.4f' %
                (i, e[i], s[i], z[i], p[i]))
        for i in range(self.numk, self.numvar):
-            print(' beta_%d         %2.4f        %2.4f        %2.4f        %2.4f\n' % (i-self.numk,e[i],s[i], z[i], p[i]))
+            print(
                ' beta_%d         %2.4f        %2.4f        %2.4f        %2.4f\n' %
                (i - self.numk, e[i], s[i], z[i], p[i]))
        print(' ')
-        print('(Dispersion parameter for %s family taken to be %2.2f)' % (self.family,self.dispersionfit))
+        print('(Dispersion parameter for %s family taken to be %2.2f)' %
              (self.family, self.dispersionfit))
        print(' ')
        if True:  # %options.constant
-            print('    Null deviance: %2.4f  on %d  degrees of freedom' % (self.deviance_null,self.df_null))
+            print('    Null deviance: %2.4f  on %d  degrees of freedom' %
                  (self.deviance_null, self.df_null))
        # end
-        print('Residual deviance: %2.4f  on %d  degrees of freedom' % (self.deviance,self.df))
+        print('Residual deviance: %2.4f  on %d  degrees of freedom' %
              (self.deviance, self.df))
        self.anode()
@ -1124,7 +1171,7 @@ class RegLogit(object):
        [_mx, nx] = self.X.shape
        if Xnew is None:
-            Xnew = self.X;
+            Xnew = self.X
        else:
            Xnew = np.atleast_2d(Xnew)
            notnans = np.flatnonzero(1 - (1 - np.isfinite(Xnew)).any(axis=1))
@ -1132,45 +1179,46 @@ class RegLogit(object):
        [n, p] = Xnew.shape
        if p != nx:
-            raise ValueError('Number of covariates must match the number of regression coefficients')
+            raise ValueError('Number of covariates must match the number' +
-        
+                             ' of regression coefficients')
-        nz = self.numk-1;
+        nz = self.numk - 1
        one = ones((n, 1))
        if (nx > 0):
            eta = np.dot(Xnew, self.beta).reshape(-1, 1) + self.theta
        else:
            eta = one * self.theta
        # end
-        y = np.diff(np.hstack((zeros((n,1)), _logitinv(eta), one)),n=1, axis=1)
+        y = np.diff(
            np.hstack((zeros((n, 1)), _logitinv(eta), one)), n=1, axis=1)
        if fulloutput:
            eps = np.finfo(float).eps
-            pcov = self.params_cov;
+            pcov = self.params_cov
            if (nx > 0):
                np1 = pcov.shape[0]
-                [U, S, V]= np.linalg.svd(pcov,0);
+                [U, S, V] = np.linalg.svd(pcov, 0)
-                R = np.dot(U,np.dot(np.diag(sqrt(S)),V)) #%squareroot of pcov
+                # %squareroot of pcov
                R = np.dot(U, np.dot(np.diag(sqrt(S)), V))
                ib = np.r_[0, nz:np1]
                #% Var(eta_i) = var(theta_i+Xnew*b)
-                vareta = zeros((n,nz));
+                vareta = zeros((n, nz))
                u = np.hstack((one, Xnew))
                for i in range(nz):
                    ib[0] = i
-                    vareta[:,i] = np.maximum(((np.dot(u,R[ib][:,ib]))**2).sum(axis=1),eps)
+                    vareta[:, i] = np.maximum(
                        ((np.dot(u, R[ib][:, ib])) ** 2).sum(axis=1), eps)
                    # end
            else:
                vareta = np.diag(pcov)
                # end
-            crit = -_invnorm(alpha/2);
+            crit = -_invnorm(alpha / 2)
-            ecrit = crit * sqrt(vareta);
+            ecrit = crit * sqrt(vareta)
-            mulo = _logitinv(eta-ecrit);
+            mulo = _logitinv(eta - ecrit)
-            muup = _logitinv(eta+ecrit);
+            muup = _logitinv(eta + ecrit)
            ylo1 = np.diff(np.hstack((zeros((n, 1)), mulo, one)), n=1, axis=1)
            yup1 = np.diff(np.hstack((zeros((n, 1)), muup, one)), n=1, axis=1)
@ -1178,7 +1226,8 @@ class RegLogit(object):
            yup = np.maximum(ylo1, yup1)
            for i in range(1, nz):  # = 2:self.numk-1
-                yup[:,i]  = np.vstack((yup[:,i],muup[:,i]-mulo[:,i-1])).max(axis=0)
+                yup[:, i] = np.vstack(
                    (yup[:, i], muup[:, i] - mulo[:, i - 1])).max(axis=0)
                # end
            return y, ylo, yup
        return y
@ -1196,7 +1245,6 @@ class RegLogit(object):
        g = np.maximum(y == y.max(), g)
        g1 = np.minimum(y > y.min(), g1)
        p = g - g1
        dev = -2 * np.log(p).sum()
@ -1207,17 +1255,17 @@ class RegLogit(object):
        # Author: Gordon K. Smyth <gks@maths.uq.oz.au>
        # Description: Derivates of log-likelihood in logistic regression
        # first derivative
-        v = g * (1 - g) / p; 
+        v = g * (1 - g) / p
-        v1 = g1 * (1 - g1) / p;
+        v1 = g1 * (1 - g1) / p
        dlogp = np.hstack((((v * z) - (v1 * z1)), ((v - v1) * x)))
        dl = np.sum(dlogp, axis=0)
        # second derivative
        w = v * (1 - 2 * g)
        w1 = v1 * (1 - 2 * g1)
-        d2l = np.dot(zx.T, (w*zx)) - np.dot(z1x.T, (w1*z1x)) - np.dot(dlogp.T, dlogp)
+        d2l = np.dot(zx.T, (w * zx)) - np.dot(
            z1x.T, (w1 * z1x)) - np.dot(dlogp.T, dlogp)
        if numout == 4:
            return dev, dl, d2l,  p
@ -1230,12 +1278,13 @@ def _test_dispersion_idx():
    import wafo.data
    xn = wafo.data.sea()
    t, data = xn.T
-    Ie = findpot(data,t,0,5);
+    Ie = findpot(data, t, 0, 5)
    di, _u, _ok_u = dispersion_idx(data[Ie], t[Ie], tb=100)
    di.plot()  # a threshold around 1 seems appropriate.
    di.show()
    pass
 def _test_findpot():
    import pylab
    import wafo.data
@ -1250,16 +1299,19 @@ def _test_findpot():
    yp, tp = data[I], t[I]
    Ie = findpot(yp, tp, ymin, tmin)
    ye, te = yp[Ie], tp[Ie]
-    pylab.plot(t, data, ttc,ytc,'ro', t, zeros(len(t)), ':', te, ye, 'kx', tp, yp, '+')
+    pylab.plot(t, data, ttc, ytc, 'ro', t,
-    pylab.show() #
+               zeros(len(t)), ':', te, ye, 'kx', tp, yp, '+')
    pylab.show()
    pass
 def _test_reslife():
    import wafo
    R = wafo.stats.genpareto.rvs(0.1, 2, 2, size=100)
    mrl = reslife(R, nu=20)
    mrl.plot()
 def test_reglogit():
    y = np.array([1, 1, 2, 1, 3, 2, 3, 2, 3, 3]).reshape(-1, 1)
    x = np.arange(1, 11).reshape(-1, 1)
@ -1271,6 +1323,8 @@ def test_reglogit():
    [mu, plo, pup] = b.predict(fulloutput=True)  # @UnusedVariable
    pass
    # plot(x,mu,'g',x,plo,'r:',x,pup,'r:')
 def test_reglogit2():
    n = 40
    x = np.sort(5 * np.random.rand(n, 1) - 2.5, axis=0)
@ -1279,11 +1333,12 @@ def test_reglogit2():
    b.fit(y, x)
    # b.display() #% members and methods
    b.summary()
-    [mu,plo,pup] = b.predict(fulloutput=True);
+    [mu, plo, pup] = b.predict(fulloutput=True)
    import matplotlib.pyplot as pl
    pl.plot(x, mu, 'g', x, plo, 'r:', x, pup, 'r:')
    pl.show()
 def test_sklearn0():
    from sklearn.linear_model import LogisticRegression
    from sklearn import datasets  # @UnusedImport
@ -1306,14 +1361,13 @@ def test_sklearn0():
    #plot(cvals, score)
 def test_sklearn():
    X = np.sort(5 * np.random.rand(40, 1) - 2.5, axis=0)
    y = (2 * (np.cos(X) > 2 * np.random.rand(40, 1) - 1) - 1).ravel()
    from sklearn.svm import SVR
-    
+    #
    ###############################################################################
    # look at the results
    import pylab as pl
    pl.scatter(X, .5 * np.cos(X) + 0.5, c='k', label='True model')
@ -1332,6 +1386,7 @@ def test_sklearn():
    pl.legend()
    pl.show()
 def test_sklearn1():
    X = np.sort(5 * np.random.rand(40, 1) - 2.5, axis=0)
    y = (2 * (np.cos(X) > 2 * np.random.rand(40, 1) - 1) - 1).ravel()
@ -1345,7 +1400,7 @@ def test_sklearn1():
    y_lin = svr_lin.fit(X, y).predict(X)
    y_poly = svr_poly.fit(X, y).predict(X)
-    ###############################################################################
+    #
    # look at the results
    import pylab as pl
    pl.scatter(X, .5 * np.cos(X) + 0.5, c='k', label='True model')
@ -1359,6 +1414,7 @@ def test_sklearn1():
    pl.legend()
    pl.show()
 def test_doctstrings():
    #_test_dispersion_idx()
    import doctest
@ -1368,4 +1424,3 @@ def test_doctstrings():
 if __name__ == '__main__':
    # test_reglogit2()
    test_doctstrings()
--- a/pywafo/src/wafo/stats/distributions.py
+++ b/pywafo/src/wafo/stats/distributions.py
--- a/pywafo/src/wafo/stats/estimation.py
+++ b/pywafo/src/wafo/stats/estimation.py
@ -1,7 +1,8 @@
 '''
 Contains FitDistribution and Profile class, which are
-important classes for fitting to various Continous and Discrete Probability Distributions
+important classes for fitting to various Continous and Discrete Probability
 Distributions
 Author:  Per A. Brodtkorb 2008
 '''
@ -20,7 +21,8 @@ from scipy import optimize
 import numpy
 import numpy as np
 from numpy import alltrue, arange, ravel, sum, zeros, log, sqrt, exp
-from numpy import (atleast_1d, any, asarray, nan, pi, #reshape, #repeat, product, ndarray, 
+from numpy import (
    atleast_1d, any, asarray, nan, pi,  # reshape, #repeat, product, ndarray,
    isfinite)
 from numpy import flatnonzero as nonzero
@ -34,18 +36,22 @@ floatinfo = np.finfo(float)
 arr = asarray
 all = alltrue  # @ReservedAssignment
 def chi2isf(p, df):
    return special.chdtri(df, p)
 def chi2sf(x, df):
    return special.chdtrc(df, x)
 def norm_ppf(q):
    return special.ndtri(q)
 # Frozen RV class
 class rv_frozen(object):
    ''' Frozen continous or discrete 1D Random Variable object (RV)
    Methods
@ -67,61 +73,75 @@ class rv_frozen(object):
    entropy()
        (Differential) entropy of the RV.
    '''
    def __init__(self, dist, *args, **kwds):
        self.dist = dist
-        loc0, scale0 = map(kwds.get, ['loc', 'scale'])
+        args, loc, scale = dist._parse_args(*args, **kwds)
-        if hasattr(dist, 'fix_loc_scale'): #isinstance(dist, rv_continuous):
+        if len(args) == dist.numargs - 2:  # isinstance(dist, rv_continuous):
-            args, loc0, scale0 = dist.fix_loc_scale(args, loc0, scale0)
+            self.par = args + (loc, scale)
            self.par = args + (loc0, scale0)
        else:  # rv_discrete
-            args, loc0 = dist.fix_loc(args, loc0)
+            self.par = args + (loc,)
            self.par = args + (loc0,)
    def pdf(self, x):
        ''' Probability density function at x of the given RV.'''
        return self.dist.pdf(x, *self.par)
    def cdf(self, x):
        '''Cumulative distribution function at x of the given RV.'''
        return self.dist.cdf(x, *self.par)
    def ppf(self, q):
        '''Percent point function (inverse of cdf) at q of the given RV.'''
        return self.dist.ppf(q, *self.par)
    def isf(self, q):
        '''Inverse survival function at q of the given RV.'''
        return self.dist.isf(q, *self.par)
    def rvs(self, size=None):
        '''Random variates of given type.'''
        kwds = dict(size=size)
        return self.dist.rvs(*self.par, **kwds)
    def sf(self, x):
        '''Survival function (1-cdf) at x of the given RV.'''
        return self.dist.sf(x, *self.par)
    def stats(self, moments='mv'):
        ''' Some statistics of the given RV'''
        kwds = dict(moments=moments)
        return self.dist.stats(*self.par, **kwds)
    def median(self):
        return self.dist.median(*self.par)
    def mean(self):
        return self.dist.mean(*self.par)
    def var(self):
        return self.dist.var(*self.par)
    def std(self):
        return self.dist.std(*self.par)
    def moment(self, n):
        par1 = self.par[:self.dist.numargs]
        return self.dist.moment(n, *par1)
    def entropy(self):
        return self.dist.entropy(*self.par)
    def pmf(self, k):
        '''Probability mass function at k of the given RV'''
        return self.dist.pmf(k, *self.par)
    def interval(self, alpha):
        return self.dist.interval(alpha, *self.par)
 # internal class to profile parameters of a given distribution
 class Profile(object):
    ''' Profile Log- likelihood or Product Spacing-function.
            which can be used for constructing confidence interval for
            either phat[i], probability or quantile.
@ -133,24 +153,25 @@ class Profile(object):
    **kwds : named arguments with keys
        i : scalar integer
            defining which distribution parameter to profile, i.e. which
-            parameter to keep fixed (default index to first non-fixed parameter)
+            parameter to keep fixed (default first non-fixed parameter)
        pmin, pmax : real scalars
            Interval for either the parameter, phat(i), prb, or x, used in the
            optimization of the profile function (default is based on the
-            100*(1-alpha)% confidence interval computed using the delta method.)
+            100*(1-alpha)% confidence interval computed with the delta method.)
        N : scalar integer
            Max number of points used in Lp (default 100)
        x : real scalar
            Quantile (return value) (default None)
        logSF : real scalar
            log survival probability,i.e., SF = Prob(X>x;phat) (default None)
-        link : function connecting the quantile (x) and the survival probability
+        link : function connecting the x-quantile and the survival probability
            (SF) with the fixed distribution parameter, i.e.:
-            self.par[i] = link(x,logSF,self.par,i), where logSF = log(Prob(X>x;phat)).
+            self.par[i] = link(x,logSF,self.par,i), where
            logSF = log(Prob(X>x;phat)).
            This means that if:
                1) x is not None then x is profiled
                2) logSF is not None then logSF is profiled
-                3) x and logSF both are None then self.par[i] is profiled (default)
+                3) x and logSF are None then self.par[i] is profiled (default)
        alpha : real scalar
            confidence coefficent (default 0.05)
    Returns
@ -218,8 +239,11 @@ class Profile(object):
        self.title = 'Profile log'
        self.xlabel = ''
        self.ylabel = ''
-        self.i_fixed, self.N, self.alpha, self.pmin, self.pmax, self.x, self.logSF, self.link = map(kwds.get,
+        (self.i_fixed, self.N, self.alpha, self.pmin, self.pmax, self.x,
-                            ['i', 'N', 'alpha', 'pmin', 'pmax', 'x', 'logSF', 'link'],
+         self.logSF, self.link) = map(
            kwds.get,
            ['i', 'N', 'alpha', 'pmin',
                                'pmax', 'x', 'logSF', 'link'],
            [i0, 100, 0.05, None, None, None, None, None])
        self.ylabel = '%g%s CI' % (100 * (1.0 - self.alpha), '%')
@ -230,7 +254,8 @@ class Profile(object):
            self.title = self.title + ' product spacing'
            Lmax = fit_dist.LPSmax
        else:
-            raise ValueError("PROFILE is only valid for ML- or MPS- estimators")
+            raise ValueError(
                "PROFILE is only valid for ML- or MPS- estimators")
        if fit_dist.par_fix is None:
            isnotfixed = np.ones(fit_dist.par.shape, dtype=bool)
@ -242,7 +267,9 @@ class Profile(object):
        self.i_fixed = atleast_1d(self.i_fixed)
        if 1 - isnotfixed[self.i_fixed]:
-            raise ValueError("Index i must be equal to an index to one of the free parameters.")
+            raise ValueError(
                "Index i must be equal to an index to one of the free " +
                "parameters.")
        isfree = isnotfixed
        isfree[self.i_fixed] = False
@ -256,7 +283,7 @@ class Profile(object):
        phatv = fit_dist.par.copy()
        self._par = phatv.copy()
-        ## Set up variable to profile and _local_link function
+        # Set up variable to profile and _local_link function
        self.profile_x = not self.x == None
        self.profile_logSF = not (self.logSF == None or self.profile_x)
        self.profile_par = not (self.profile_x or self.profile_logSF)
@ -269,32 +296,38 @@ class Profile(object):
            p_opt = self._par[self.i_fixed]
        elif self.profile_x:
            self.logSF = log(fit_dist.sf(self.x))
-            self._local_link = lambda fix_par, par : self.link(fix_par, self.logSF, par, self.i_fixed)
+            self._local_link = lambda fix_par, par: self.link(
                fix_par, self.logSF, par, self.i_fixed)
            self.xlabel = 'x'
            p_opt = self.x
        elif self.profile_logSF:
            p_opt = self.logSF
            self.x = fit_dist.isf(exp(p_opt))
-            self._local_link = lambda fix_par, par : self.link(self.x, fix_par, par, self.i_fixed)
+            self._local_link = lambda fix_par, par: self.link(
                self.x, fix_par, par, self.i_fixed)
            self.xlabel = 'log(SF)'
        else:
-            raise ValueError("You must supply a non-empty quantile (x) or probability (logSF) in order to profile it!")
+            raise ValueError(
                "You must supply a non-empty quantile (x) or probability " +
                "(logSF) in order to profile it!")
        self.xlabel = self.xlabel + ' (' + fit_dist.dist.name + ')'
        phatfree = phatv[self.i_free].copy()
        self._set_profile(phatfree, p_opt)
    def _correct_Lmax(self, Lmax):
        if Lmax > self.Lmax:  # foundNewphat = True
-            warnings.warn('The fitted parameters does not provide the optimum fit. Something wrong with fit')
+            warnings.warn(
                'The fitted parameters does not provide the optimum fit. ' +
                'Something wrong with fit')
            dL = self.Lmax - Lmax
            self.alpha_cross_level -= dL
            self.Lmax = Lmax
    def _profile_optimum(self, phatfree0, p_opt):
-        phatfree = optimize.fmin(self._profile_fun, phatfree0, args=(p_opt, ), disp=0)
+        phatfree = optimize.fmin(
            self._profile_fun, phatfree0, args=(p_opt,), disp=0)
        Lmax = -self._profile_fun(phatfree, p_opt)
        self._correct_Lmax(Lmax)
        return Lmax, phatfree
@ -361,19 +394,23 @@ class Profile(object):
            if pvar <= 1e-5 or numpy.isnan(pvar):
                pvar = max(abs(p_opt) * 0.5, 0.5)
-            p_crit = -norm_ppf(self.alpha / 2.0) * sqrt(numpy.ravel(pvar)) * 1.5
+            p_crit = (-norm_ppf(self.alpha / 2.0) *
                      sqrt(numpy.ravel(pvar)) * 1.5)
            if self.pmin == None:
-                self.pmin = self._search_pmin(phatfree0, p_opt - 5.0 * p_crit, p_opt)
+                self.pmin = self._search_pmin(phatfree0,
                                              p_opt - 5.0 * p_crit, p_opt)
            p_crit_low = (p_opt - self.pmin) / 5
            if self.pmax == None:
-                self.pmax = self._search_pmax(phatfree0,p_opt + 5.0 * p_crit, p_opt)
+                self.pmax = self._search_pmax(phatfree0,
                                              p_opt + 5.0 * p_crit, p_opt)
            p_crit_up = (self.pmax - p_opt) / 5
            N4 = numpy.floor(self.N / 4.0)
            pvec1 = linspace(self.pmin, p_opt - p_crit_low, N4 + 1)
-            pvec2 = linspace(p_opt - p_crit_low, p_opt + p_crit_up, self.N - 2 * N4)
+            pvec2 = linspace(
                p_opt - p_crit_low, p_opt + p_crit_up, self.N - 2 * N4)
            pvec3 = linspace(p_opt + p_crit_up, self.pmax, N4 + 1)
            pvec = numpy.unique(numpy.hstack((pvec1, p_opt, pvec2, pvec3)))
@ -429,9 +466,9 @@ class Profile(object):
    def _myinvfun(self, phatnotfixed):
        mphat = self._par.copy()
-        mphat[self.i_notfixed] = phatnotfixed;
+        mphat[self.i_notfixed] = phatnotfixed
        prb = exp(self.logSF)
-        return self.fit_dist.dist.isf(prb, *mphat);
+        return self.fit_dist.dist.isf(prb, *mphat)
    def _myprbfun(self, phatnotfixed):
        mphat = self._par.copy()
@ -439,7 +476,6 @@ class Profile(object):
        logSF = self.fit_dist.dist.logsf(self.x, *mphat)
        return np.where(np.isfinite(logSF), logSF, np.nan)
    def _profile_fun(self, free_par, fix_par):
        ''' Return negative of loglike or logps function
@ -449,7 +485,8 @@ class Profile(object):
        '''
        par = self._par.copy()
        par[self.i_free] = free_par
-        # _local_link: connects fixed quantile or probability with fixed distribution parameter
+        # _local_link: connects fixed quantile or probability with fixed
        # distribution parameter
        par[self.i_fixed] = self._local_link(fix_par, par)
        return self.fit_dist.fitfun(par)
@ -457,7 +494,9 @@ class Profile(object):
        '''Return confidence interval for profiled parameter
        '''
        if alpha < self.alpha:
-            warnings.warn('Might not be able to return CI with alpha less than %g' % self.alpha)
+            warnings.warn(
                'Might not be able to return CI with alpha less than %g' %
                self.alpha)
        cross_level = self.Lmax - 0.5 * chi2isf(alpha, 1)
        ind = findcross(self.data, cross_level)
        N = len(ind)
@ -486,13 +525,15 @@ class Profile(object):
    def plot(self):
        ''' Plot profile function with 100(1-alpha)% CI
        '''
-        plotbackend.plot(self.args, self.data,
+        plotbackend.plot(
            self.args, self.data,
            self.args[[0, -1]], [self.Lmax, ] * 2, 'r',
            self.args[[0, -1]], [self.alpha_cross_level, ] * 2, 'r')
        plotbackend.title(self.title)
        plotbackend.ylabel(self.ylabel)
        plotbackend.xlabel(self.xlabel)
 def _discretize_adaptive(fun, a, b, tol=0.005, n=5):
    '''
    Automatic discretization of function, adaptive gridding.
@ -514,7 +555,8 @@ def _discretize_adaptive(fun, a, b, tol=0.005, n=5):
            I, = np.where(erri > tol)
            # double the sample rate in intervals with the most error
-            y = (np.vstack(((x[I] + x[I - 1]) / 2, (x[I + 1] + x[I]) / 2)).T).ravel()    
+            y = (np.vstack(((x[I] + x[I - 1]) / 2,
                           (x[I + 1] + x[I]) / 2)).T).ravel()
            fy = fun(y)
            fy0 = np.interp(y, x, fx)
@ -536,7 +578,10 @@ def _discretize_adaptive(fun, a, b, tol=0.005, n=5):
    return x, fx
 # class to fit given distribution to data
 class FitDistribution(rv_frozen):
    '''
    Return estimators to shape, location, and scale from data
@ -593,7 +638,6 @@ class FitDistribution(rv_frozen):
        par_lower : lower (1-alpha)% confidence bound for the parameters
        par_upper : upper (1-alpha)% confidence bound for the parameters
    Note
    ----
    `data` is sorted using this function, so if `copydata`==False the data
@ -631,6 +675,7 @@ class FitDistribution(rv_frozen):
    >>> Lsf.plot()
    >>> sf_ci = Lsf.get_bounds(alpha=0.2)
    '''
    def __init__(self, dist, data, *args, **kwds):
        extradoc = '''
    plotfitsummary()
@ -684,13 +729,13 @@ class FitDistribution(rv_frozen):
        self.dist = dist
        numargs = dist.numargs
-        self.method=self.alpha=self.par_fix=self.search=self.copydata=None
+        self.method = self.alpha = self.par_fix = self.search = None
        self.copydata = None
        m_variables = ['method', 'alpha', 'par_fix', 'search', 'copydata']
        m_defaults = ['ml', 0.05, None, True, True]
        for (name, val) in zip(m_variables, m_defaults):
            setattr(self, name, kwds.get(name, val))
        #self.method, self.alpha, self.par_fix, self.search, self.copydata = map(kwds.get, m_variables, m_defaults)
        if self.method.lower()[:].startswith('mps'):
            self._fitfun = dist.nlogps
        else:
@ -726,6 +771,14 @@ class FitDistribution(rv_frozen):
        self.LPSmax = -dist.nlogps(self.par, self.data)
        self.pvalue = self._pvalue(self.par, self.data, unknown_numpar=numpar)
    def __repr__(self):
        params = ['alpha', 'method', 'LLmax', 'LPSmax', 'pvalue',
                  'par', 'par_lower', 'par_upper', 'par_fix', 'par_cov']
        t = ['%s:\n' % self.__class__.__name__]
        for par in params:
            t.append('%s = %s\n' % (par, str(getattr(self, par))))
        return ''.join(t)
    def _reduce_func(self, args, kwds):
        args = list(args)
        Nargs = len(args)
@ -734,7 +787,7 @@ class FitDistribution(rv_frozen):
        names = ['f%d' % n for n in range(Nargs - 2)] + ['floc', 'fscale']
        x0 = args[:]
        for n, key in zip(index[::-1], names[::-1]):
-            if kwds.has_key(key):
+            if key in kwds:
                fixedn.append(n)
                args[n] = kwds[key]
                del x0[n]
@ -746,7 +799,9 @@ class FitDistribution(rv_frozen):
            restore = None
        else:
            if len(fixedn) == len(index):
-                raise ValueError, "All parameters fixed. There is nothing to optimize."
+                raise ValueError("All parameters fixed. " +
                                 "There is nothing to optimize.")
            def restore(args, theta):
                # Replace with theta for all numbers not in fixedn
                # This allows the non-fixed values to vary, but
@ -771,11 +826,11 @@ class FitDistribution(rv_frozen):
        Narg = len(args)
        if Narg > dist.numargs:
-                raise ValueError, "Too many input arguments."
+                raise ValueError("Too many input arguments.")
        start = [None] * 2
-        if (Narg < dist.numargs) or not (kwds.has_key('loc') and
+        if (Narg < dist.numargs) or not ('loc' in kwds and 'scale' in kwds):
-                                         kwds.has_key('scale')):
+            # get distribution specific starting locations
-            start = dist._fitstart(data)  # get distribution specific starting locations
+            start = dist._fitstart(data)
            args += start[Narg:-2]
        loc = kwds.get('loc', start[-2])
        scale = kwds.get('scale', start[-1])
@ -784,7 +839,8 @@ class FitDistribution(rv_frozen):
        if self.search:
            optimizer = kwds.get('optimizer', optimize.fmin)
            # convert string to function in scipy.optimize
-            if not callable(optimizer) and isinstance(optimizer, (str, unicode)):
+            if (not callable(optimizer) and
                isinstance(optimizer, (str, unicode))):
                if not optimizer.startswith('fmin_'):
                    optimizer = "fmin_" + optimizer
                if optimizer == 'fmin_':
@ -792,7 +848,7 @@ class FitDistribution(rv_frozen):
                try:
                    optimizer = getattr(optimize, optimizer)
                except AttributeError:
-                    raise ValueError, "%s is not a valid optimizer" % optimizer
+                    raise ValueError("%s is not a valid optimizer" % optimizer)
            vals = optimizer(func, x0, args=(ravel(data),), disp=0)
            vals = tuple(vals)
@ -836,30 +892,31 @@ class FitDistribution(rv_frozen):
        **kwds : named arguments with keys
        i : scalar integer
            defining which distribution parameter to profile, i.e. which
-            parameter to keep fixed (default index to first non-fixed parameter)
+            parameter to keep fixed (default first non-fixed parameter)
        pmin, pmax : real scalars
            Interval for either the parameter, phat(i), prb, or x, used in the
            optimization of the profile function (default is based on the
-            100*(1-alpha)% confidence interval computed using the delta method.)
+            100*(1-alpha)% confidence interval computed with the delta method.)
        N : scalar integer
            Max number of points used in Lp (default 100)
        x : real scalar
            Quantile (return value) (default None)
        logSF : real scalar
            log survival probability,i.e., SF = Prob(X>x;phat) (default None)
-        link : function connecting the quantile (x) and the survival probability
+        link : function connecting the x-quantile and the survival probability
            (SF) with the fixed distribution parameter, i.e.:
-            self.par[i] = link(x,logSF,self.par,i), where logSF = log(Prob(X>x;phat)).
+            self.par[i] = link(x,logSF,self.par,i), where
            logSF = log(Prob(X>x;phat)).
            This means that if:
                1) x is not None then x is profiled
                2) logSF is not None then logSF is profiled
-                3) x and logSF both are None then self.par[i] is profiled (default)
+                3) x and logSF are None then self.par[i] is profiled (default)
        alpha : real scalar
            confidence coefficent (default 0.05)
        Returns
        -------
        Lp : Profile log-likelihood function with parameters phat given
-            the data, phat(i), probability (prb) and quantile (x) (if given), i.e.,
+            the data, phat(i), probability (prb) and quantile (x), i.e.,
                Lp = max(log(f(phat|data,phat(i)))),
            or
                Lp = max(log(f(phat|data,phat(i),x,prb)))
@ -878,10 +935,11 @@ class FitDistribution(rv_frozen):
        Lmax : Maximum value of profile function
        alpha_cross_level :
-        PROFILE is a utility function for making inferences either on a particular
+        PROFILE is a utility function for making inferences either on a
-        component of the vector phat or the quantile, x, or the probability, SF.
+        particular component of the vector phat or the quantile, x, or the
-        This is usually more accurate than using the delta method assuming
+        probability, SF. This is usually more accurate than using the delta
-        asymptotic normality of the ML estimator or the MPS estimator.
+        method assuming asymptotic normality of the ML estimator or the MPS
        estimator.
        Examples
        --------
@ -917,12 +975,13 @@ class FitDistribution(rv_frozen):
    def plotfitsummary(self):
        ''' Plot various diagnostic plots to asses the quality of the fit.
-        PLOTFITSUMMARY displays probability plot, density plot, residual quantile
+        PLOTFITSUMMARY displays probability plot, density plot, residual
-        plot and residual probability plot.
+        quantile plot and residual probability plot.
        The purpose of these plots is to graphically assess whether the data
-        could come from the fitted distribution. If so the empirical- CDF and PDF
+        could come from the fitted distribution. If so the empirical- CDF and
-        should follow the model and the residual plots will be linear. Other
+        PDF should follow the model and the residual plots will be linear.
-        distribution types will introduce curvature in the residual plots.
+        Other distribution types will introduce curvature in the residual
        plots.
        '''
        plotbackend.subplot(2, 2, 1)
        # self.plotecdf()
@ -944,8 +1003,8 @@ class FitDistribution(rv_frozen):
                phatvstr = format1 % tuple(self.par[self.i_fixed])
                fixstr = 'Fixed: phat[%s] = %s ' % (phatistr, phatvstr)
-
+        infostr = 'Fit method: %s, Fit p-value: %2.2f %s' % (
-        infostr = 'Fit method: %s, Fit p-value: %2.2f %s' % (self.method, self.pvalue, fixstr)
+            self.method, self.pvalue, fixstr)
        try:
            plotbackend.figtext(0.05, 0.01, infostr)
        except:
@ -961,9 +1020,10 @@ class FitDistribution(rv_frozen):
        '''
        n = len(self.data)
        SF = (arange(n, 0, -1)) / n
-        plotbackend.semilogy(self.data, SF, symb2, self.data, self.sf(self.data), symb1)
+        plotbackend.semilogy(
            self.data, SF, symb2, self.data, self.sf(self.data), symb1)
        # plotbackend.plot(self.data,SF,'b.',self.data,self.sf(self.data),'r-')
-        plotbackend.xlabel('x');
+        plotbackend.xlabel('x')
        plotbackend.ylabel('F(x) (%s)' % self.dist.name)
        plotbackend.title('Empirical SF plot')
@ -977,59 +1037,64 @@ class FitDistribution(rv_frozen):
        '''
        n = len(self.data)
        F = (arange(1, n + 1)) / n
-        plotbackend.plot(self.data, F, symb2, self.data, self.cdf(self.data), symb1)
+        plotbackend.plot(self.data, F, symb2,
-        plotbackend.xlabel('x');
+                         self.data, self.cdf(self.data), symb1)
        plotbackend.xlabel('x')
        plotbackend.ylabel('F(x) (%s)' % self.dist.name)
        plotbackend.title('Empirical CDF plot')
-    def plotepdf(self, symb1='r-', symb2='b-'):
+    def _get_grid(self, odd=False):
        '''Plot Empirical and fitted Probability Density Function
        The purpose of the plot is to graphically assess whether
        the data could come from the fitted distribution.
        If so the histogram should resemble the model density.
        Other distribution types will introduce deviations in the plot.
        '''
        odd = False
        x = np.atleast_1d(self.data)
        n = np.ceil(4 * np.sqrt(np.sqrt(len(x))))
        mn = x.min()
        mx = x.max()
        d = (mx - mn) / n * 2
-        e = np.floor(np.log(d) / np.log(10));
+        e = np.floor(np.log(d) / np.log(10))
        m = np.floor(d / 10 ** e)
        if m > 5:
            m = 5
        elif m > 2:
            m = 2
        d = m * 10 ** e
        mn = (np.floor(mn / d) - 1) * d - odd * d / 2
        mx = (np.ceil(mx / d) + 1) * d + odd * d / 2
        limits = np.arange(mn, mx, d)
-        bin, limits = np.histogram(self.data, bins=limits, normed=True) #, new=True) @ReservedAssignment
+        return limits
-        limits.shape = (-1, 1)
+
-        xx = limits.repeat(3, axis=1)
+    def _staircase(self, x, y):
-        xx.shape = (-1,)
+        xx = x.reshape(-1, 1).repeat(3, axis=1).ravel()[1:-1]
-        xx = xx[1:-1]
+        yy = y.reshape(-1, 1).repeat(3, axis=1)
        bin.shape = (-1, 1)
        yy = bin.repeat(3, axis=1)
        # yy[0,0] = 0.0 # pdf
        yy[:, 0] = 0.0  # histogram
        yy.shape = (-1,)
        yy = numpy.hstack((yy, 0.0))
-        ymax = yy.max()
+        return xx, yy
    def _get_empirical_pdf(self):
        limits = self._get_grid()
        pdf, x = np.histogram(self.data, bins=limits, normed=True)
        return self._staircase(x, pdf)
    def plotepdf(self, symb1='r-', symb2='b-'):
        '''Plot Empirical and fitted Probability Density Function
        The purpose of the plot is to graphically assess whether
        the data could come from the fitted distribution.
        If so the histogram should resemble the model density.
        Other distribution types will introduce deviations in the plot.
        '''
        x, pdf = self._get_empirical_pdf()
        ymax = pdf.max()
        # plotbackend.hist(self.data,normed=True,fill=False)
-        plotbackend.plot(self.data, self.pdf(self.data), symb1, xx, yy, symb2)
+        plotbackend.plot(self.data, self.pdf(self.data), symb1,
                         x, pdf, symb2)
        ax = list(plotbackend.axis())
        ax[3] = min(ymax * 1.3, ax[3])
        plotbackend.axis(ax)
-        plotbackend.xlabel('x');
+        plotbackend.xlabel('x')
        plotbackend.ylabel('f(x) (%s)' % self.dist.name)
        plotbackend.title('Density plot')
    def plotresq(self, symb1='r-', symb2='b.'):
        '''PLOTRESQ displays a residual quantile plot.
@ -1045,33 +1110,34 @@ class FitDistribution(rv_frozen):
        plotbackend.plot(self.data, y, symb2, y1, y1, symb1)
        plotbackend.xlabel('Empirical')
        plotbackend.ylabel('Model (%s)' % self.dist.name)
-        plotbackend.title('Residual Quantile Plot');
+        plotbackend.title('Residual Quantile Plot')
        plotbackend.axis('tight')
        plotbackend.axis('equal')
    def plotresprb(self, symb1='r-', symb2='b.'):
        ''' PLOTRESPRB displays a residual probability plot.
        The purpose of the plot is to graphically assess whether
        the data could come from the fitted distribution. If so the
-        plot will be linear. Other distribution types will introduce curvature in the plot.
+        plot will be linear. Other distribution types will introduce curvature
        in the plot.
        '''
-        n = len(self.data);
+        n = len(self.data)
        # ecdf = (0.5:n-0.5)/n;
        ecdf = arange(1, n + 1) / (n + 1)
        mcdf = self.cdf(self.data)
        p1 = [0, 1]
-        plotbackend.plot(ecdf, mcdf, symb2, p1, p1, symb1)
+        plotbackend.plot(ecdf, mcdf, symb2,
                         p1, p1, symb1)
        plotbackend.xlabel('Empirical')
        plotbackend.ylabel('Model (%s)' % self.dist.name)
-        plotbackend.title('Residual Probability Plot');
+        plotbackend.title('Residual Probability Plot')
        plotbackend.axis('equal')
        plotbackend.axis([0, 1, 0, 1])
    def _pvalue(self, theta, x, unknown_numpar=None):
-        ''' Return the P-value for the fit using Moran's negative log Product Spacings statistic
+        ''' Return P-value for the fit using Moran's negative log Product
        Spacings statistic
            where theta are the parameters (including loc and scale)
@ -1080,7 +1146,9 @@ class FitDistribution(rv_frozen):
        dx = numpy.diff(x, axis=0)
        tie = (dx == 0)
        if any(tie):
-            warnings.warn('P-value is on the conservative side (i.e. too large) due to ties in the data!')
+            warnings.warn(
                'P-value is on the conservative side (i.e. too large) due to' +
                ' ties in the data!')
        T = self.dist.nlogps(theta, x)
@ -1096,54 +1164,51 @@ class FitDistribution(rv_frozen):
        v = (np1) * (pi ** 2. / 6.0 - 1.0) - 0.5 - 1.0 / (6. * (np1))
        C1 = m - sqrt(0.5 * n * v)
        C2 = sqrt(v / (2.0 * n))
-        Tn = (T + 0.5 * k * isParUnKnown - C1) / C2 # chi2 with n degrees of freedom
+        # chi2 with n degrees of freedom
        Tn = (T + 0.5 * k * isParUnKnown - C1) / C2
        pvalue = chi2sf(Tn, n)  # _WAFODIST.chi2.sf(Tn, n)
        return pvalue
 def test_doctstrings():
    import doctest
    doctest.testmod()
 def test1():
    import wafo.stats as ws
    dist = ws.weibull_min
-    dist = ws.bradford
+    #dist = ws.bradford
-    R = dist.rvs(0.3,size=1000);
+    R = dist.rvs(0.3, size=1000)
    phat = FitDistribution(dist, R, method='ml')
-#    # Better CI for phat.par[i=0]
+# Better CI for phat.par[i=0]
    Lp1 = Profile(phat, i=0)  # @UnusedVariable
 #    Lp2 = Profile(phat, i=2)
 #    SF = 1./990
 #    x = phat.isf(SF)
 #
-#    # CI for x
+# CI for x
 #    Lx = Profile(phat, i=0,x=x,link=phat.dist.link)
 #    Lx.plot()
 #    x_ci = Lx.get_bounds(alpha=0.2)
 #
-#     # CI for logSF=log(SF)
+# CI for logSF=log(SF)
 #    Lsf = phat.profile(i=0, logSF=log(SF), link=phat.dist.link)
 #    Lsf.plot()
 #    sf_ci = Lsf.get_bounds(alpha=0.2)
 #    pass
 #    _WAFODIST = ppimport('wafo.stats.distributions')
-#     #nbinom(10, 0.75).rvs(3)
+# nbinom(10, 0.75).rvs(3)
 #    import matplotlib
 #    matplotlib.interactive(True)
 #    t = _WAFODIST.bernoulli(0.75).rvs(3)
 #    x = np.r_[5, 10]
 #    npr = np.r_[9, 9]
 #    t2 = _WAFODIST.bd0(x, npr)
-#    #Examples   MLE and better CI for phat.par[0]
+# Examples   MLE and better CI for phat.par[0]
 #    R = _WAFODIST.weibull_min.rvs(1, size=100);
 #    phat = _WAFODIST.weibull_min.fit(R, 1, 1, par_fix=[nan, 0, nan])
 #    Lp = phat.profile(i=0)
@ -1152,18 +1217,18 @@ def test1():
 #    R = 1. / 990
 #    x = phat.isf(R)
 #
-#    # CI for x
+# CI for x
 #    Lx = phat.profile(i=0, x=x)
 #    Lx.plot()
 #    Lx.get_bounds(alpha=0.2)
 #
-#    # CI for logSF=log(SF)
+# CI for logSF=log(SF)
 #    Lpr = phat.profile(i=0, logSF=log(R), link=phat.dist.link)
 #    Lpr.plot()
 #    Lpr.get_bounds(alpha=0.075)
 #
 #    _WAFODIST.dlaplace.stats(0.8, loc=0)
-##    pass
+# pass
 #    t = _WAFODIST.planck(0.51000000000000001)
 #    t.ppf(0.5)
 #    t = _WAFODIST.zipf(2)
@ -1172,15 +1237,14 @@ def test1():
 #    _WAFODIST.rice.rvs(1)
 #    x = plb.linspace(-5, 5)
 #    y = _WAFODIST.genpareto.cdf(x, 0)
-#    #plb.plot(x,y)
+# plb.plot(x,y)
-#    #plb.show()
+# plb.show()
 #
 #
 #    on = ones((2, 3))
 #    r = _WAFODIST.genpareto.rvs(0, size=100)
 #    pht = _WAFODIST.genpareto.fit(r, 1, par_fix=[0, 0, nan])
 #    lp = pht.profile()
 if __name__ == '__main__':
    test1()
    # test_doctstrings()
--- a/pywafo/src/wafo/stats/kde.py
+++ b/pywafo/src/wafo/stats/kde.py
@ -0,0 +1,513 @@
 #-------------------------------------------------------------------------------
 #
 #  Define classes for (uni/multi)-variate kernel density estimation.
 #
 #  Currently, only Gaussian kernels are implemented.
 #
 #  Written by: Robert Kern
 #
 #  Date: 2004-08-09
 #
 #  Modified: 2005-02-10 by Robert Kern.
 #              Contributed to Scipy
 #            2005-10-07 by Robert Kern.
 #              Some fixes to match the new scipy_core
 #
 #  Copyright 2004-2005 by Enthought, Inc.
 #
 #-------------------------------------------------------------------------------
 from __future__ import division, print_function, absolute_import
 # Standard library imports.
 import warnings
 # Scipy imports.
 from scipy.lib.six import callable, string_types
 from scipy import linalg, special
 from numpy import atleast_2d, reshape, zeros, newaxis, dot, exp, pi, sqrt, \
     ravel, power, atleast_1d, squeeze, sum, transpose
 import numpy as np
 from numpy.random import randint, multivariate_normal
 # Local imports.
 from . import mvn
 __all__ = ['gaussian_kde']
 class gaussian_kde(object):
    """Representation of a kernel-density estimate using Gaussian kernels.
    Kernel density estimation is a way to estimate the probability density
    function (PDF) of a random variable in a non-parametric way.
    `gaussian_kde` works for both uni-variate and multi-variate data.   It
    includes automatic bandwidth determination.  The estimation works best for
    a unimodal distribution; bimodal or multi-modal distributions tend to be
    oversmoothed.
    Parameters
    ----------
    dataset : array_like
        Datapoints to estimate from. In case of univariate data this is a 1-D
        array, otherwise a 2-D array with shape (# of dims, # of data).
    bw_method : str, scalar or callable, optional
        The method used to calculate the estimator bandwidth.  This can be
        'scott', 'silverman', a scalar constant or a callable.  If a scalar,
        this will be used directly as `kde.factor`.  If a callable, it should
        take a `gaussian_kde` instance as only parameter and return a scalar.
        If None (default), 'scott' is used.  See Notes for more details.
    Attributes
    ----------
    dataset : ndarray
        The dataset with which `gaussian_kde` was initialized.
    d : int
        Number of dimensions.
    n : int
        Number of datapoints.
    factor : float
        The bandwidth factor, obtained from `kde.covariance_factor`, with which
        the covariance matrix is multiplied.
    covariance : ndarray
        The covariance matrix of `dataset`, scaled by the calculated bandwidth
        (`kde.factor`).
    inv_cov : ndarray
        The inverse of `covariance`.
    Methods
    -------
    kde.evaluate(points) : ndarray
        Evaluate the estimated pdf on a provided set of points.
    kde(points) : ndarray
        Same as kde.evaluate(points)
    kde.integrate_gaussian(mean, cov) : float
        Multiply pdf with a specified Gaussian and integrate over the whole
        domain.
    kde.integrate_box_1d(low, high) : float
        Integrate pdf (1D only) between two bounds.
    kde.integrate_box(low_bounds, high_bounds) : float
        Integrate pdf over a rectangular space between low_bounds and
        high_bounds.
    kde.integrate_kde(other_kde) : float
        Integrate two kernel density estimates multiplied together.
    kde.resample(size=None) : ndarray
        Randomly sample a dataset from the estimated pdf.
    kde.set_bandwidth(bw_method='scott') : None
        Computes the bandwidth, i.e. the coefficient that multiplies the data
        covariance matrix to obtain the kernel covariance matrix.
        .. versionadded:: 0.11.0
    kde.covariance_factor : float
        Computes the coefficient (`kde.factor`) that multiplies the data
        covariance matrix to obtain the kernel covariance matrix.
        The default is `scotts_factor`.  A subclass can overwrite this method
        to provide a different method, or set it through a call to
        `kde.set_bandwidth`.
    Notes
    -----
    Bandwidth selection strongly influences the estimate obtained from the KDE
    (much more so than the actual shape of the kernel).  Bandwidth selection
    can be done by a "rule of thumb", by cross-validation, by "plug-in
    methods" or by other means; see [3]_, [4]_ for reviews.  `gaussian_kde`
    uses a rule of thumb, the default is Scott's Rule.
    Scott's Rule [1]_, implemented as `scotts_factor`, is::
        n**(-1./(d+4)),
    with ``n`` the number of data points and ``d`` the number of dimensions.
    Silverman's Rule [2]_, implemented as `silverman_factor`, is::
        n * (d + 2) / 4.)**(-1. / (d + 4)).
    Good general descriptions of kernel density estimation can be found in [1]_
    and [2]_, the mathematics for this multi-dimensional implementation can be
    found in [1]_.
    References
    ----------
    .. [1] D.W. Scott, "Multivariate Density Estimation: Theory, Practice, and
           Visualization", John Wiley & Sons, New York, Chicester, 1992.
    .. [2] B.W. Silverman, "Density Estimation for Statistics and Data
           Analysis", Vol. 26, Monographs on Statistics and Applied Probability,
           Chapman and Hall, London, 1986.
    .. [3] B.A. Turlach, "Bandwidth Selection in Kernel Density Estimation: A
           Review", CORE and Institut de Statistique, Vol. 19, pp. 1-33, 1993.
    .. [4] D.M. Bashtannyk and R.J. Hyndman, "Bandwidth selection for kernel
           conditional density estimation", Computational Statistics & Data
           Analysis, Vol. 36, pp. 279-298, 2001.
    Examples
    --------
    Generate some random two-dimensional data:
    >>> from scipy import stats
    >>> def measure(n):
    >>>     "Measurement model, return two coupled measurements."
    >>>     m1 = np.random.normal(size=n)
    >>>     m2 = np.random.normal(scale=0.5, size=n)
    >>>     return m1+m2, m1-m2
    >>> m1, m2 = measure(2000)
    >>> xmin = m1.min()
    >>> xmax = m1.max()
    >>> ymin = m2.min()
    >>> ymax = m2.max()
    Perform a kernel density estimate on the data:
    >>> X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
    >>> positions = np.vstack([X.ravel(), Y.ravel()])
    >>> values = np.vstack([m1, m2])
    >>> kernel = stats.gaussian_kde(values)
    >>> Z = np.reshape(kernel(positions).T, X.shape)
    Plot the results:
    >>> import matplotlib.pyplot as plt
    >>> fig = plt.figure()
    >>> ax = fig.add_subplot(111)
    >>> ax.imshow(np.rot90(Z), cmap=plt.cm.gist_earth_r,
    ...           extent=[xmin, xmax, ymin, ymax])
    >>> ax.plot(m1, m2, 'k.', markersize=2)
    >>> ax.set_xlim([xmin, xmax])
    >>> ax.set_ylim([ymin, ymax])
    >>> plt.show()
    """
    def __init__(self, dataset, bw_method=None):
        self.dataset = atleast_2d(dataset)
        if not self.dataset.size > 1:
            raise ValueError("`dataset` input should have multiple elements.")
        self.d, self.n = self.dataset.shape
        self.set_bandwidth(bw_method=bw_method)
    def evaluate(self, points):
        """Evaluate the estimated pdf on a set of points.
        Parameters
        ----------
        points : (# of dimensions, # of points)-array
            Alternatively, a (# of dimensions,) vector can be passed in and
            treated as a single point.
        Returns
        -------
        values : (# of points,)-array
            The values at each point.
        Raises
        ------
        ValueError : if the dimensionality of the input points is different than
                     the dimensionality of the KDE.
        """
        points = atleast_2d(points)
        d, m = points.shape
        if d != self.d:
            if d == 1 and m == self.d:
                # points was passed in as a row vector
                points = reshape(points, (self.d, 1))
                m = 1
            else:
                msg = "points have dimension %s, dataset has dimension %s" % (d,
                    self.d)
                raise ValueError(msg)
        result = zeros((m,), dtype=np.float)
        if m >= self.n:
            # there are more points than data, so loop over data
            for i in range(self.n):
                diff = self.dataset[:, i, newaxis] - points
                tdiff = dot(self.inv_cov, diff)
                energy = sum(diff*tdiff,axis=0) / 2.0
                result = result + exp(-energy)
        else:
            # loop over points
            for i in range(m):
                diff = self.dataset - points[:, i, newaxis]
                tdiff = dot(self.inv_cov, diff)
                energy = sum(diff * tdiff, axis=0) / 2.0
                result[i] = sum(exp(-energy), axis=0)
        result = result / self._norm_factor
        return result
    __call__ = evaluate
    def integrate_gaussian(self, mean, cov):
        """
        Multiply estimated density by a multivariate Gaussian and integrate
        over the whole space.
        Parameters
        ----------
        mean : aray_like
            A 1-D array, specifying the mean of the Gaussian.
        cov : array_like
            A 2-D array, specifying the covariance matrix of the Gaussian.
        Returns
        -------
        result : scalar
            The value of the integral.
        Raises
        ------
        ValueError :
            If the mean or covariance of the input Gaussian differs from
            the KDE's dimensionality.
        """
        mean = atleast_1d(squeeze(mean))
        cov = atleast_2d(cov)
        if mean.shape != (self.d,):
            raise ValueError("mean does not have dimension %s" % self.d)
        if cov.shape != (self.d, self.d):
            raise ValueError("covariance does not have dimension %s" % self.d)
        # make mean a column vector
        mean = mean[:, newaxis]
        sum_cov = self.covariance + cov
        diff = self.dataset - mean
        tdiff = dot(linalg.inv(sum_cov), diff)
        energies = sum(diff * tdiff, axis=0) / 2.0
        result = sum(exp(-energies), axis=0) / sqrt(linalg.det(2 * pi *
                                                        sum_cov)) / self.n
        return result
    def integrate_box_1d(self, low, high):
        """
        Computes the integral of a 1D pdf between two bounds.
        Parameters
        ----------
        low : scalar
            Lower bound of integration.
        high : scalar
            Upper bound of integration.
        Returns
        -------
        value : scalar
            The result of the integral.
        Raises
        ------
        ValueError
            If the KDE is over more than one dimension.
        """
        if self.d != 1:
            raise ValueError("integrate_box_1d() only handles 1D pdfs")
        stdev = ravel(sqrt(self.covariance))[0]
        normalized_low = ravel((low - self.dataset) / stdev)
        normalized_high = ravel((high - self.dataset) / stdev)
        value = np.mean(special.ndtr(normalized_high) -
                        special.ndtr(normalized_low))
        return value
    def integrate_box(self, low_bounds, high_bounds, maxpts=None):
        """Computes the integral of a pdf over a rectangular interval.
        Parameters
        ----------
        low_bounds : array_like
            A 1-D array containing the lower bounds of integration.
        high_bounds : array_like
            A 1-D array containing the upper bounds of integration.
        maxpts : int, optional
            The maximum number of points to use for integration.
        Returns
        -------
        value : scalar
            The result of the integral.
        """
        if maxpts is not None:
            extra_kwds = {'maxpts': maxpts}
        else:
            extra_kwds = {}
        value, inform = mvn.mvnun(low_bounds, high_bounds, self.dataset,
                                  self.covariance, **extra_kwds)
        if inform:
            msg = ('An integral in mvn.mvnun requires more points than %s' %
                   (self.d * 1000))
            warnings.warn(msg)
        return value
    def integrate_kde(self, other):
        """
        Computes the integral of the product of this  kernel density estimate
        with another.
        Parameters
        ----------
        other : gaussian_kde instance
            The other kde.
        Returns
        -------
        value : scalar
            The result of the integral.
        Raises
        ------
        ValueError
            If the KDEs have different dimensionality.
        """
        if other.d != self.d:
            raise ValueError("KDEs are not the same dimensionality")
        # we want to iterate over the smallest number of points
        if other.n < self.n:
            small = other
            large = self
        else:
            small = self
            large = other
        sum_cov = small.covariance + large.covariance
        result = 0.0
        for i in range(small.n):
            mean = small.dataset[:, i, newaxis]
            diff = large.dataset - mean
            tdiff = dot(linalg.inv(sum_cov), diff)
            energies = sum(diff * tdiff, axis=0) / 2.0
            result += sum(exp(-energies), axis=0)
        result /= sqrt(linalg.det(2 * pi * sum_cov)) * large.n * small.n
        return result
    def resample(self, size=None):
        """
        Randomly sample a dataset from the estimated pdf.
        Parameters
        ----------
        size : int, optional
            The number of samples to draw.  If not provided, then the size is
            the same as the underlying dataset.
        Returns
        -------
        resample : (self.d, `size`) ndarray
            The sampled dataset.
        """
        if size is None:
            size = self.n
        norm = transpose(multivariate_normal(zeros((self.d,), float),
                         self.covariance, size=size))
        indices = randint(0, self.n, size=size)
        means = self.dataset[:, indices]
        return means + norm
    def scotts_factor(self):
        return power(self.n, -1./(self.d+4))
    def silverman_factor(self):
        return power(self.n*(self.d+2.0)/4.0, -1./(self.d+4))
    #  Default method to calculate bandwidth, can be overwritten by subclass
    covariance_factor = scotts_factor
    def set_bandwidth(self, bw_method=None):
        """Compute the estimator bandwidth with given method.
        The new bandwidth calculated after a call to `set_bandwidth` is used
        for subsequent evaluations of the estimated density.
        Parameters
        ----------
        bw_method : str, scalar or callable, optional
            The method used to calculate the estimator bandwidth.  This can be
            'scott', 'silverman', a scalar constant or a callable.  If a
            scalar, this will be used directly as `kde.factor`.  If a callable,
            it should take a `gaussian_kde` instance as only parameter and
            return a scalar.  If None (default), nothing happens; the current
            `kde.covariance_factor` method is kept.
        Notes
        -----
        .. versionadded:: 0.11
        Examples
        --------
        >>> x1 = np.array([-7, -5, 1, 4, 5.])
        >>> kde = stats.gaussian_kde(x1)
        >>> xs = np.linspace(-10, 10, num=50)
        >>> y1 = kde(xs)
        >>> kde.set_bandwidth(bw_method='silverman')
        >>> y2 = kde(xs)
        >>> kde.set_bandwidth(bw_method=kde.factor / 3.)
        >>> y3 = kde(xs)
        >>> fig = plt.figure()
        >>> ax = fig.add_subplot(111)
        >>> ax.plot(x1, np.ones(x1.shape) / (4. * x1.size), 'bo',
        ...         label='Data points (rescaled)')
        >>> ax.plot(xs, y1, label='Scott (default)')
        >>> ax.plot(xs, y2, label='Silverman')
        >>> ax.plot(xs, y3, label='Const (1/3 * Silverman)')
        >>> ax.legend()
        >>> plt.show()
        """
        if bw_method is None:
            pass
        elif bw_method == 'scott':
            self.covariance_factor = self.scotts_factor
        elif bw_method == 'silverman':
            self.covariance_factor = self.silverman_factor
        elif np.isscalar(bw_method) and not isinstance(bw_method, string_types):
            self._bw_method = 'use constant'
            self.covariance_factor = lambda: bw_method
        elif callable(bw_method):
            self._bw_method = bw_method
            self.covariance_factor = lambda: self._bw_method(self)
        else:
            msg = "`bw_method` should be 'scott', 'silverman', a scalar " \
                  "or a callable."
            raise ValueError(msg)
        self._compute_covariance()
    def _compute_covariance(self):
        """Computes the covariance matrix for each Gaussian kernel using
        covariance_factor().
        """
        self.factor = self.covariance_factor()
        # Cache covariance and inverse covariance of the data
        if not hasattr(self, '_data_inv_cov'):
            self._data_covariance = atleast_2d(np.cov(self.dataset, rowvar=1,
                                               bias=False))
            self._data_inv_cov = linalg.inv(self._data_covariance)
        self.covariance = self._data_covariance * self.factor**2
        self.inv_cov = self._data_inv_cov / self.factor**2
        self._norm_factor = sqrt(linalg.det(2*pi*self.covariance)) * self.n
--- a/pywafo/src/wafo/stats/morestats.py
+++ b/pywafo/src/wafo/stats/morestats.py
--- a/pywafo/src/wafo/stats/mstats.py
+++ b/pywafo/src/wafo/stats/mstats.py
@ -0,0 +1,82 @@
 """
 ===================================================================
 Statistical functions for masked arrays (:mod:`scipy.stats.mstats`)
 ===================================================================
 .. currentmodule:: scipy.stats.mstats
 This module contains a large number of statistical functions that can
 be used with masked arrays.
 Most of these functions are similar to those in scipy.stats but might
 have small differences in the API or in the algorithm used. Since this
 is a relatively new package, some API changes are still possible.
 .. autosummary::
   :toctree: generated/
   argstoarray
   betai
   chisquare
   count_tied_groups
   describe
   f_oneway
   f_value_wilks_lambda
   find_repeats
   friedmanchisquare
   gmean
   hmean
   kendalltau
   kendalltau_seasonal
   kruskalwallis
   kruskalwallis
   ks_twosamp
   ks_twosamp
   kurtosis
   kurtosistest
   linregress
   mannwhitneyu
   plotting_positions
   mode
   moment
   mquantiles
   msign
   normaltest
   obrientransform
   pearsonr
   plotting_positions
   pointbiserialr
   rankdata
   scoreatpercentile
   sem
   signaltonoise
   skew
   skewtest
   spearmanr
   theilslopes
   threshold
   tmax
   tmean
   tmin
   trim
   trima
   trimboth
   trimmed_stde
   trimr
   trimtail
   tsem
   ttest_onesamp
   ttest_ind
   ttest_onesamp
   ttest_rel
   tvar
   variation
   winsorize
   zmap
   zscore
 """
 from __future__ import division, print_function, absolute_import
 from .mstats_basic import *
 from .mstats_extras import *
--- a/pywafo/src/wafo/stats/mstats_basic.py
+++ b/pywafo/src/wafo/stats/mstats_basic.py
--- a/pywafo/src/wafo/stats/mstats_extras.py
+++ b/pywafo/src/wafo/stats/mstats_extras.py
@ -0,0 +1,466 @@
 """
 Additional statistics functions, with support to MA.
 :author: Pierre GF Gerard-Marchant
 :contact: pierregm_at_uga_edu
 :date: $Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $
 :version: $Id: morestats.py 3473 2007-10-29 15:18:13Z jarrod.millman $
 """
 from __future__ import division, print_function, absolute_import
 __author__ = "Pierre GF Gerard-Marchant"
 __docformat__ = "restructuredtext en"
 __all__ = ['compare_medians_ms',
           'hdquantiles', 'hdmedian', 'hdquantiles_sd',
           'idealfourths',
           'median_cihs','mjci','mquantiles_cimj',
           'rsh',
           'trimmed_mean_ci',]
 import numpy as np
 from numpy import float_, int_, ndarray
 import numpy.ma as ma
 from numpy.ma import MaskedArray
 from . import mstats_basic as mstats
 from scipy.stats.distributions import norm, beta, t, binom
 #####--------------------------------------------------------------------------
 #---- --- Quantiles ---
 #####--------------------------------------------------------------------------
 def hdquantiles(data, prob=list([.25,.5,.75]), axis=None, var=False,):
    """
    Computes quantile estimates with the Harrell-Davis method.
    The quantile estimates are calculated as a weighted linear combination
    of order statistics.
    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence
        Sequence of quantiles to compute.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : boolean
        Whether to return the variance of the estimate.
    Returns
    -------
    hdquantiles : MaskedArray
        A (p,) array of quantiles (if `var` is False), or a (2,p) array of
        quantiles and variances (if `var` is True), where ``p`` is the
        number of quantiles.
    """
    def _hd_1D(data,prob,var):
        "Computes the HD quantiles for a 1D array. Returns nan for invalid data."
        xsorted = np.squeeze(np.sort(data.compressed().view(ndarray)))
        # Don't use length here, in case we have a numpy scalar
        n = xsorted.size
        #.........
        hd = np.empty((2,len(prob)), float_)
        if n < 2:
            hd.flat = np.nan
            if var:
                return hd
            return hd[0]
        #.........
        v = np.arange(n+1) / float(n)
        betacdf = beta.cdf
        for (i,p) in enumerate(prob):
            _w = betacdf(v, (n+1)*p, (n+1)*(1-p))
            w = _w[1:] - _w[:-1]
            hd_mean = np.dot(w, xsorted)
            hd[0,i] = hd_mean
            #
            hd[1,i] = np.dot(w, (xsorted-hd_mean)**2)
            #
        hd[0, prob == 0] = xsorted[0]
        hd[0, prob == 1] = xsorted[-1]
        if var:
            hd[1, prob == 0] = hd[1, prob == 1] = np.nan
            return hd
        return hd[0]
    # Initialization & checks ---------
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None) or (data.ndim == 1):
        result = _hd_1D(data, p, var)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_hd_1D, axis, data, p, var)
    #
    return ma.fix_invalid(result, copy=False)
 #..............................................................................
 def hdmedian(data, axis=-1, var=False):
    """
    Returns the Harrell-Davis estimate of the median along the given axis.
    Parameters
    ----------
    data : ndarray
        Data array.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : boolean
        Whether to return the variance of the estimate.
    """
    result = hdquantiles(data,[0.5], axis=axis, var=var)
    return result.squeeze()
 #..............................................................................
 def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
    """
    The standard error of the Harrell-Davis quantile estimates by jackknife.
    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence
        Sequence of quantiles to compute.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    Returns
    -------
    hdquantiles_sd : MaskedArray
        Standard error of the Harrell-Davis quantile estimates.
    """
    def _hdsd_1D(data,prob):
        "Computes the std error for 1D arrays."
        xsorted = np.sort(data.compressed())
        n = len(xsorted)
        #.........
        hdsd = np.empty(len(prob), float_)
        if n < 2:
            hdsd.flat = np.nan
        #.........
        vv = np.arange(n) / float(n-1)
        betacdf = beta.cdf
        #
        for (i,p) in enumerate(prob):
            _w = betacdf(vv, (n+1)*p, (n+1)*(1-p))
            w = _w[1:] - _w[:-1]
            mx_ = np.fromiter([np.dot(w,xsorted[np.r_[list(range(0,k)),
                                                      list(range(k+1,n))].astype(int_)])
                                  for k in range(n)], dtype=float_)
            mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1)
            hdsd[i] = float(n-1) * np.sqrt(np.diag(mx_var).diagonal() / float(n))
        return hdsd
    # Initialization & checks ---------
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        result = _hdsd_1D(data, p)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_hdsd_1D, axis, data, p)
    #
    return ma.fix_invalid(result, copy=False).ravel()
 #####--------------------------------------------------------------------------
 #---- --- Confidence intervals ---
 #####--------------------------------------------------------------------------
 def trimmed_mean_ci(data, limits=(0.2,0.2), inclusive=(True,True),
                    alpha=0.05, axis=None):
    """
    Selected confidence interval of the trimmed mean along the given axis.
    Parameters
    ----------
    data : array_like
        Input data.
    limits : {None, tuple}, optional
        None or a two item tuple.
        Tuple of the percentages to cut on each side of the array, with respect
        to the number of unmasked data, as floats between 0. and 1. If ``n``
        is the number of unmasked data before trimming, then
        (``n`` * `limits[0]`)th smallest data and (``n`` * `limits[1]`)th
        largest data are masked.  The total number of unmasked data after
        trimming is ``n`` * (1. - sum(`limits`)).
        The value of one limit can be set to None to indicate an open interval.
        Defaults to (0.2, 0.2).
    inclusive : (2,) tuple of boolean, optional
        If relative==False, tuple indicating whether values exactly equal to
        the absolute limits are allowed.
        If relative==True, tuple indicating whether the number of data being
        masked on each side should be rounded (True) or truncated (False).
        Defaults to (True, True).
    alpha : float, optional
        Confidence level of the intervals.
        Defaults to 0.05.
    axis : int, optional
        Axis along which to cut. If None, uses a flattened version of `data`.
        Defaults to None.
    Returns
    -------
    trimmed_mean_ci : (2,) ndarray
        The lower and upper confidence intervals of the trimmed data.
    """
    data = ma.array(data, copy=False)
    trimmed = mstats.trimr(data, limits=limits, inclusive=inclusive, axis=axis)
    tmean = trimmed.mean(axis)
    tstde = mstats.trimmed_stde(data,limits=limits,inclusive=inclusive,axis=axis)
    df = trimmed.count(axis) - 1
    tppf = t.ppf(1-alpha/2.,df)
    return np.array((tmean - tppf*tstde, tmean+tppf*tstde))
 #..............................................................................
 def mjci(data, prob=[0.25,0.5,0.75], axis=None):
    """
    Returns the Maritz-Jarrett estimators of the standard error of selected
    experimental quantiles of the data.
    Parameters
    ----------
    data: ndarray
        Data array.
    prob: sequence
        Sequence of quantiles to compute.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    """
    def _mjci_1D(data, p):
        data = np.sort(data.compressed())
        n = data.size
        prob = (np.array(p) * n + 0.5).astype(int_)
        betacdf = beta.cdf
        #
        mj = np.empty(len(prob), float_)
        x = np.arange(1,n+1, dtype=float_) / n
        y = x - 1./n
        for (i,m) in enumerate(prob):
            (m1,m2) = (m-1, n-m)
            W = betacdf(x,m-1,n-m) - betacdf(y,m-1,n-m)
            C1 = np.dot(W,data)
            C2 = np.dot(W,data**2)
            mj[i] = np.sqrt(C2 - C1**2)
        return mj
    #
    data = ma.array(data, copy=False)
    if data.ndim > 2:
        raise ValueError("Array 'data' must be at most two dimensional, but got data.ndim = %d" % data.ndim)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        return _mjci_1D(data, p)
    else:
        return ma.apply_along_axis(_mjci_1D, axis, data, p)
 #..............................................................................
 def mquantiles_cimj(data, prob=[0.25,0.50,0.75], alpha=0.05, axis=None):
    """
    Computes the alpha confidence interval for the selected quantiles of the
    data, with Maritz-Jarrett estimators.
    Parameters
    ----------
    data : ndarray
        Data array.
    prob : sequence
        Sequence of quantiles to compute.
    alpha : float
        Confidence level of the intervals.
    axis : integer
        Axis along which to compute the quantiles.
        If None, use a flattened array.
    """
    alpha = min(alpha, 1-alpha)
    z = norm.ppf(1-alpha/2.)
    xq = mstats.mquantiles(data, prob, alphap=0, betap=0, axis=axis)
    smj = mjci(data, prob, axis=axis)
    return (xq - z * smj, xq + z * smj)
 #.............................................................................
 def median_cihs(data, alpha=0.05, axis=None):
    """
    Computes the alpha-level confidence interval for the median of the data.
    Uses the Hettmasperger-Sheather method.
    Parameters
    ----------
    data : array_like
        Input data. Masked values are discarded. The input should be 1D only,
        or `axis` should be set to None.
    alpha : float
        Confidence level of the intervals.
    axis : integer
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    Returns
    -------
    median_cihs :
        Alpha level confidence interval.
    """
    def _cihs_1D(data, alpha):
        data = np.sort(data.compressed())
        n = len(data)
        alpha = min(alpha, 1-alpha)
        k = int(binom._ppf(alpha/2., n, 0.5))
        gk = binom.cdf(n-k,n,0.5) - binom.cdf(k-1,n,0.5)
        if gk < 1-alpha:
            k -= 1
            gk = binom.cdf(n-k,n,0.5) - binom.cdf(k-1,n,0.5)
        gkk = binom.cdf(n-k-1,n,0.5) - binom.cdf(k,n,0.5)
        I = (gk - 1 + alpha)/(gk - gkk)
        lambd = (n-k) * I / float(k + (n-2*k)*I)
        lims = (lambd*data[k] + (1-lambd)*data[k-1],
                lambd*data[n-k-1] + (1-lambd)*data[n-k])
        return lims
    data = ma.rray(data, copy=False)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        result = _cihs_1D(data.compressed(), alpha)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_cihs_1D, axis, data, alpha)
    #
    return result
 #..............................................................................
 def compare_medians_ms(group_1, group_2, axis=None):
    """
    Compares the medians from two independent groups along the given axis.
    The comparison is performed using the McKean-Schrader estimate of the
    standard error of the medians.
    Parameters
    ----------
    group_1 : array_like
        First dataset.
    group_2 : array_like
        Second dataset.
    axis : int, optional
        Axis along which the medians are estimated. If None, the arrays are
        flattened.  If `axis` is not None, then `group_1` and `group_2`
        should have the same shape.
    Returns
    -------
    compare_medians_ms : {float, ndarray}
        If `axis` is None, then returns a float, otherwise returns a 1-D
        ndarray of floats with a length equal to the length of `group_1`
        along `axis`.
    """
    (med_1, med_2) = (ma.median(group_1,axis=axis), ma.median(group_2,axis=axis))
    (std_1, std_2) = (mstats.stde_median(group_1, axis=axis),
                      mstats.stde_median(group_2, axis=axis))
    W = np.abs(med_1 - med_2) / ma.sqrt(std_1**2 + std_2**2)
    return 1 - norm.cdf(W)
 def idealfourths(data, axis=None):
    """
    Returns an estimate of the lower and upper quartiles.
    Uses the ideal fourths algorithm.
    Parameters
    ----------
    data : array_like
        Input array.
    axis : int, optional
        Axis along which the quartiles are estimated. If None, the arrays are
        flattened.
    Returns
    -------
    idealfourths : {list of floats, masked array}
        Returns the two internal values that divide `data` into four parts
        using the ideal fourths algorithm either along the flattened array
        (if `axis` is None) or along `axis` of `data`.
    """
    def _idf(data):
        x = data.compressed()
        n = len(x)
        if n < 3:
            return [np.nan,np.nan]
        (j,h) = divmod(n/4. + 5/12.,1)
        j = int(j)
        qlo = (1-h)*x[j-1] + h*x[j]
        k = n - j
        qup = (1-h)*x[k] + h*x[k-1]
        return [qlo, qup]
    data = ma.sort(data, axis=axis).view(MaskedArray)
    if (axis is None):
        return _idf(data)
    else:
        return ma.apply_along_axis(_idf, axis, data)
 def rsh(data, points=None):
    """
    Evaluates Rosenblatt's shifted histogram estimators for each point
    on the dataset 'data'.
    Parameters
    ----------
    data : sequence
        Input data. Masked values are ignored.
    points : sequence
        Sequence of points where to evaluate Rosenblatt shifted histogram.
        If None, use the data.
    """
    data = ma.array(data, copy=False)
    if points is None:
        points = data
    else:
        points = np.array(points, copy=False, ndmin=1)
    if data.ndim != 1:
        raise AttributeError("The input array should be 1D only !")
    n = data.count()
    r = idealfourths(data, axis=None)
    h = 1.2 * (r[-1]-r[0]) / n**(1./5)
    nhi = (data[:,None] <= points[None,:] + h).sum(0)
    nlo = (data[:,None] < points[None,:] - h).sum(0)
    return (nhi-nlo) / (2.*n*h)
 ###############################################################################
--- a/pywafo/src/wafo/stats/rv.py
+++ b/pywafo/src/wafo/stats/rv.py
@ -0,0 +1,76 @@
 from __future__ import division, print_function, absolute_import
 from numpy import vectorize, deprecate
 from numpy.random import random_sample
 __all__ = ['randwppf', 'randwcdf']
 # XXX: Are these needed anymore?
 #####################################
 # General purpose continuous
 ######################################
@deprecate(message="Deprecated in scipy 0.14.0, use "
                   "distribution-specific rvs() method instead")
 def randwppf(ppf, args=(), size=None):
    """
    returns an array of randomly distributed integers of a distribution
    whose percent point function (inverse of the CDF or quantile function)
    is given.
    args is a tuple of extra arguments to the ppf function (i.e. shape,
    location, scale), and size is the size of the output.  Note the ppf
    function must accept an array of q values to compute over.
    """
    U = random_sample(size=size)
    return ppf(*(U,)+args)
@deprecate(message="Deprecated in scipy 0.14.0, use "
                   "distribution-specific rvs() method instead")
 def randwcdf(cdf, mean=1.0, args=(), size=None):
    """
    Returns an array of randomly distributed integers given a CDF.
    Given a cumulative distribution function (CDF) returns an array of
    randomly distributed integers that would satisfy the CDF.
    Parameters
    ----------
    cdf : function
        CDF function that accepts a single value and `args`, and returns
        an single value.
    mean : float, optional
        The mean of the distribution which helps the solver.  Defaults
        to 1.0.
    args : tuple, optional
        Extra arguments to the cdf function (i.e. shape, location, scale)
    size : {int, None}, optional
        Is the size of the output.  If None, only 1 value will be returned.
    Returns
    -------
    randwcdf : ndarray
        Array of random numbers.
    Notes
    -----
    Can use the ``scipy.stats.distributions.*.cdf`` functions for the
    `cdf` parameter.
    """
    import scipy.optimize as optimize
    def _ppfopt(x, q, *nargs):
        newargs = (x,)+nargs
        return cdf(*newargs) - q
    def _ppf(q, *nargs):
        return optimize.fsolve(_ppfopt, mean, args=(q,)+nargs)
    _vppf = vectorize(_ppf)
    U = random_sample(size=size)
    return _vppf(*(U,)+args)
--- a/pywafo/src/wafo/stats/six.py
+++ b/pywafo/src/wafo/stats/six.py
@ -307,7 +307,7 @@ _add_doc(u, """Text literal""")
 if PY3:
-    import builtins
+    import builtins  # @UnresolvedImport
    exec_ = getattr(builtins, "exec")
    def reraise(tp, value, tb=None):
--- a/pywafo/src/wafo/stats/stats.py
+++ b/pywafo/src/wafo/stats/stats.py
--- a/pywafo/src/wafo/stats/tests/common_tests.py
+++ b/pywafo/src/wafo/stats/tests/common_tests.py
@ -0,0 +1,154 @@
 from __future__ import division, print_function, absolute_import
 import inspect
 import warnings
 import numpy as np
 import numpy.testing as npt
 #from scipy.lib._version import NumpyVersion
 from scipy import stats
 #NUMPY_BELOW_1_7 = NumpyVersion(np.__version__) < '1.7.0'
 NUMPY_BELOW_1_7 =np.__version__ < '1.7.0'
 def check_normalization(distfn, args, distname):
    norm_moment = distfn.moment(0, *args)
    npt.assert_allclose(norm_moment, 1.0)
    # this is a temporary plug: either ncf or expect is problematic;
    # best be marked as a knownfail, but I've no clue how to do it.
    if distname == "ncf":
        atol, rtol = 1e-5, 0
    else:
        atol, rtol = 1e-7, 1e-7
    normalization_expect = distfn.expect(lambda x: 1, args=args)
    npt.assert_allclose(normalization_expect, 1.0, atol=atol, rtol=rtol,
            err_msg=distname, verbose=True)
    normalization_cdf = distfn.cdf(distfn.b, *args)
    npt.assert_allclose(normalization_cdf, 1.0)
 def check_moment(distfn, arg, m, v, msg):
    m1 = distfn.moment(1, *arg)
    m2 = distfn.moment(2, *arg)
    if not np.isinf(m):
        npt.assert_almost_equal(m1, m, decimal=10, err_msg=msg +
                            ' - 1st moment')
    else:                     # or np.isnan(m1),
        npt.assert_(np.isinf(m1),
               msg + ' - 1st moment -infinite, m1=%s' % str(m1))
    if not np.isinf(v):
        npt.assert_almost_equal(m2 - m1 * m1, v, decimal=10, err_msg=msg +
                            ' - 2ndt moment')
    else:                     # or np.isnan(m2),
        npt.assert_(np.isinf(m2),
               msg + ' - 2nd moment -infinite, m2=%s' % str(m2))
 def check_mean_expect(distfn, arg, m, msg):
    if np.isfinite(m):
        m1 = distfn.expect(lambda x: x, arg)
        npt.assert_almost_equal(m1, m, decimal=5, err_msg=msg +
                            ' - 1st moment (expect)')
 def check_var_expect(distfn, arg, m, v, msg):
    if np.isfinite(v):
        m2 = distfn.expect(lambda x: x*x, arg)
        npt.assert_almost_equal(m2, v + m*m, decimal=5, err_msg=msg +
                            ' - 2st moment (expect)')
 def check_skew_expect(distfn, arg, m, v, s, msg):
    if np.isfinite(s):
        m3e = distfn.expect(lambda x: np.power(x-m, 3), arg)
        npt.assert_almost_equal(m3e, s * np.power(v, 1.5),
                decimal=5, err_msg=msg + ' - skew')
    else:
        npt.assert_(np.isnan(s))
 def check_kurt_expect(distfn, arg, m, v, k, msg):
    if np.isfinite(k):
        m4e = distfn.expect(lambda x: np.power(x-m, 4), arg)
        npt.assert_allclose(m4e, (k + 3.) * np.power(v, 2), atol=1e-5, rtol=1e-5,
                err_msg=msg + ' - kurtosis')
    else:
        npt.assert_(np.isnan(k))
 def check_entropy(distfn, arg, msg):
    ent = distfn.entropy(*arg)
    npt.assert_(not np.isnan(ent), msg + 'test Entropy is nan')
 def check_private_entropy(distfn, args, superclass):
    # compare a generic _entropy with the distribution-specific implementation
    npt.assert_allclose(distfn._entropy(*args),
                        superclass._entropy(distfn, *args))
 def check_edge_support(distfn, args):
    # Make sure the x=self.a and self.b are handled correctly.
    x = [distfn.a, distfn.b]
    if isinstance(distfn, stats.rv_continuous):
        npt.assert_equal(distfn.cdf(x, *args), [0.0, 1.0])
        npt.assert_equal(distfn.logcdf(x, *args), [-np.inf, 0.0])
        npt.assert_equal(distfn.sf(x, *args), [1.0, 0.0])
        npt.assert_equal(distfn.logsf(x, *args), [0.0, -np.inf])
    if isinstance(distfn, stats.rv_discrete):
        x = [distfn.a - 1, distfn.b]
    npt.assert_equal(distfn.ppf([0.0, 1.0], *args), x)
    npt.assert_equal(distfn.isf([0.0, 1.0], *args), x[::-1])
    # out-of-bounds for isf & ppf
    npt.assert_(np.isnan(distfn.isf([-1, 2], *args)).all())
    npt.assert_(np.isnan(distfn.ppf([-1, 2], *args)).all())
 def check_named_args(distfn, x, shape_args, defaults, meths):
    ## Check calling w/ named arguments.
    # check consistency of shapes, numargs and _parse signature
    signature = inspect.getargspec(distfn._parse_args)
    npt.assert_(signature.varargs is None)
    npt.assert_(signature.keywords is None)
    npt.assert_(signature.defaults == defaults)
    shape_argnames = signature.args[1:-len(defaults)]  # self, a, b, loc=0, scale=1
    if distfn.shapes:
        shapes_ = distfn.shapes.replace(',', ' ').split()
    else:
        shapes_ = ''
    npt.assert_(len(shapes_) == distfn.numargs)
    npt.assert_(len(shapes_) == len(shape_argnames))
    # check calling w/ named arguments
    shape_args = list(shape_args)
    vals = [meth(x, *shape_args) for meth in meths]
    npt.assert_(np.all(np.isfinite(vals)))
    names, a, k = shape_argnames[:], shape_args[:], {}
    while names:
        k.update({names.pop(): a.pop()})
        v = [meth(x, *a, **k) for meth in meths]
        npt.assert_array_equal(vals, v)
        if not 'n' in k.keys():
            # `n` is first parameter of moment(), so can't be used as named arg
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", UserWarning)
                npt.assert_equal(distfn.moment(1, *a, **k),
                                 distfn.moment(1, *shape_args))
    # unknown arguments should not go through:
    k.update({'kaboom': 42})
    npt.assert_raises(TypeError, distfn.cdf, x, **k)
--- a/pywafo/src/wafo/stats/tests/test_binned_statistic.py
+++ b/pywafo/src/wafo/stats/tests/test_binned_statistic.py
@ -0,0 +1,238 @@
 from __future__ import division, print_function, absolute_import
 import numpy as np
 from numpy.testing import assert_array_almost_equal, run_module_suite
 from scipy.stats import \
    binned_statistic, binned_statistic_2d, binned_statistic_dd
 class TestBinnedStatistic(object):
    @classmethod
    def setup_class(cls):
        np.random.seed(9865)
        cls.x = np.random.random(100)
        cls.y = np.random.random(100)
        cls.v = np.random.random(100)
        cls.X = np.random.random((100, 3))
    def test_1d_count(self):
        x = self.x
        v = self.v
        count1, edges1, bc = binned_statistic(x, v, 'count', bins=10)
        count2, edges2 = np.histogram(x, bins=10)
        assert_array_almost_equal(count1, count2)
        assert_array_almost_equal(edges1, edges2)
    def test_1d_sum(self):
        x = self.x
        v = self.v
        sum1, edges1, bc = binned_statistic(x, v, 'sum', bins=10)
        sum2, edges2 = np.histogram(x, bins=10, weights=v)
        assert_array_almost_equal(sum1, sum2)
        assert_array_almost_equal(edges1, edges2)
    def test_1d_mean(self):
        x = self.x
        v = self.v
        stat1, edges1, bc = binned_statistic(x, v, 'mean', bins=10)
        stat2, edges2, bc = binned_statistic(x, v, np.mean, bins=10)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(edges1, edges2)
    def test_1d_std(self):
        x = self.x
        v = self.v
        stat1, edges1, bc = binned_statistic(x, v, 'std', bins=10)
        stat2, edges2, bc = binned_statistic(x, v, np.std, bins=10)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(edges1, edges2)
    def test_1d_median(self):
        x = self.x
        v = self.v
        stat1, edges1, bc = binned_statistic(x, v, 'median', bins=10)
        stat2, edges2, bc = binned_statistic(x, v, np.median, bins=10)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(edges1, edges2)
    def test_1d_bincode(self):
        x = self.x[:20]
        v = self.v[:20]
        count1, edges1, bc = binned_statistic(x, v, 'count', bins=3)
        bc2 = np.array([3, 2, 1, 3, 2, 3, 3, 3, 3, 1, 1, 3, 3, 1, 2, 3, 1,
                        1, 2, 1])
        bcount = [(bc == i).sum() for i in np.unique(bc)]
        assert_array_almost_equal(bc, bc2)
        assert_array_almost_equal(bcount, count1)
    def test_1d_range_keyword(self):
        # Regression test for gh-3063, range can be (min, max) or [(min, max)]
        np.random.seed(9865)
        x = np.arange(30)
        data = np.random.random(30)
        mean, bins, _ = binned_statistic(x[:15], data[:15])
        mean_range, bins_range, _ = binned_statistic(x, data, range=[(0, 14)])
        mean_range2, bins_range2, _ = binned_statistic(x, data, range=(0, 14))
        assert_array_almost_equal(mean, mean_range)
        assert_array_almost_equal(bins, bins_range)
        assert_array_almost_equal(mean, mean_range2)
        assert_array_almost_equal(bins, bins_range2)
    def test_2d_count(self):
        x = self.x
        y = self.y
        v = self.v
        count1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'count', bins=5)
        count2, binx2, biny2 = np.histogram2d(x, y, bins=5)
        assert_array_almost_equal(count1, count2)
        assert_array_almost_equal(binx1, binx2)
        assert_array_almost_equal(biny1, biny2)
    def test_2d_sum(self):
        x = self.x
        y = self.y
        v = self.v
        sum1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'sum', bins=5)
        sum2, binx2, biny2 = np.histogram2d(x, y, bins=5, weights=v)
        assert_array_almost_equal(sum1, sum2)
        assert_array_almost_equal(binx1, binx2)
        assert_array_almost_equal(biny1, biny2)
    def test_2d_mean(self):
        x = self.x
        y = self.y
        v = self.v
        stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'mean', bins=5)
        stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(binx1, binx2)
        assert_array_almost_equal(biny1, biny2)
    def test_2d_std(self):
        x = self.x
        y = self.y
        v = self.v
        stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'std', bins=5)
        stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.std, bins=5)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(binx1, binx2)
        assert_array_almost_equal(biny1, biny2)
    def test_2d_median(self):
        x = self.x
        y = self.y
        v = self.v
        stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'median', bins=5)
        stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.median, bins=5)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(binx1, binx2)
        assert_array_almost_equal(biny1, biny2)
    def test_2d_bincode(self):
        x = self.x[:20]
        y = self.y[:20]
        v = self.v[:20]
        count1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'count', bins=3)
        bc2 = np.array([17, 11, 6, 16, 11, 17, 18, 17, 17, 7, 6, 18, 16,
                        6, 11, 16, 6, 6, 11, 8])
        bcount = [(bc == i).sum() for i in np.unique(bc)]
        assert_array_almost_equal(bc, bc2)
        count1adj = count1[count1.nonzero()]
        assert_array_almost_equal(bcount, count1adj)
    def test_dd_count(self):
        X = self.X
        v = self.v
        count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
        count2, edges2 = np.histogramdd(X, bins=3)
        assert_array_almost_equal(count1, count2)
        assert_array_almost_equal(edges1, edges2)
    def test_dd_sum(self):
        X = self.X
        v = self.v
        sum1, edges1, bc = binned_statistic_dd(X, v, 'sum', bins=3)
        sum2, edges2 = np.histogramdd(X, bins=3, weights=v)
        assert_array_almost_equal(sum1, sum2)
        assert_array_almost_equal(edges1, edges2)
    def test_dd_mean(self):
        X = self.X
        v = self.v
        stat1, edges1, bc = binned_statistic_dd(X, v, 'mean', bins=3)
        stat2, edges2, bc = binned_statistic_dd(X, v, np.mean, bins=3)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(edges1, edges2)
    def test_dd_std(self):
        X = self.X
        v = self.v
        stat1, edges1, bc = binned_statistic_dd(X, v, 'std', bins=3)
        stat2, edges2, bc = binned_statistic_dd(X, v, np.std, bins=3)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(edges1, edges2)
    def test_dd_median(self):
        X = self.X
        v = self.v
        stat1, edges1, bc = binned_statistic_dd(X, v, 'median', bins=3)
        stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3)
        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(edges1, edges2)
    def test_dd_bincode(self):
        X = self.X[:20]
        v = self.v[:20]
        count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
        bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92,
                        32, 36, 91, 43, 87, 81, 81])
        bcount = [(bc == i).sum() for i in np.unique(bc)]
        assert_array_almost_equal(bc, bc2)
        count1adj = count1[count1.nonzero()]
        assert_array_almost_equal(bcount, count1adj)
 if __name__ == "__main__":
    run_module_suite()
--- a/pywafo/src/wafo/stats/tests/test_contingency.py
+++ b/pywafo/src/wafo/stats/tests/test_contingency.py
@ -0,0 +1,202 @@
 from __future__ import division, print_function, absolute_import
 import numpy as np
 from numpy.testing import (run_module_suite, assert_equal, assert_array_equal,
         assert_array_almost_equal, assert_approx_equal, assert_raises,
         assert_allclose)
 from scipy.special import xlogy
 from scipy.stats.contingency import margins, expected_freq, chi2_contingency
 def test_margins():
    a = np.array([1])
    m = margins(a)
    assert_equal(len(m), 1)
    m0 = m[0]
    assert_array_equal(m0, np.array([1]))
    a = np.array([[1]])
    m0, m1 = margins(a)
    expected0 = np.array([[1]])
    expected1 = np.array([[1]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)
    a = np.arange(12).reshape(2, 6)
    m0, m1 = margins(a)
    expected0 = np.array([[15], [51]])
    expected1 = np.array([[6, 8, 10, 12, 14, 16]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)
    a = np.arange(24).reshape(2, 3, 4)
    m0, m1, m2 = margins(a)
    expected0 = np.array([[[66]], [[210]]])
    expected1 = np.array([[[60], [92], [124]]])
    expected2 = np.array([[[60, 66, 72, 78]]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)
    assert_array_equal(m2, expected2)
 def test_expected_freq():
    assert_array_equal(expected_freq([1]), np.array([1.0]))
    observed = np.array([[[2, 0], [0, 2]], [[0, 2], [2, 0]], [[1, 1], [1, 1]]])
    e = expected_freq(observed)
    assert_array_equal(e, np.ones_like(observed))
    observed = np.array([[10, 10, 20], [20, 20, 20]])
    e = expected_freq(observed)
    correct = np.array([[12., 12., 16.], [18., 18., 24.]])
    assert_array_almost_equal(e, correct)
 def test_chi2_contingency_trivial():
    # Some very simple tests for chi2_contingency.
    # A trivial case
    obs = np.array([[1, 2], [1, 2]])
    chi2, p, dof, expected = chi2_contingency(obs, correction=False)
    assert_equal(chi2, 0.0)
    assert_equal(p, 1.0)
    assert_equal(dof, 1)
    assert_array_equal(obs, expected)
    # A *really* trivial case: 1-D data.
    obs = np.array([1, 2, 3])
    chi2, p, dof, expected = chi2_contingency(obs, correction=False)
    assert_equal(chi2, 0.0)
    assert_equal(p, 1.0)
    assert_equal(dof, 0)
    assert_array_equal(obs, expected)
 def test_chi2_contingency_R():
    # Some test cases that were computed independently, using R.
    Rcode = \
    """
    # Data vector.
    data <- c(
      12, 34, 23,     4,  47,  11,
      35, 31, 11,    34,  10,  18,
      12, 32,  9,    18,  13,  19,
      12, 12, 14,     9,  33,  25
      )
    # Create factor tags:r=rows, c=columns, t=tiers
    r <- factor(gl(4, 2*3, 2*3*4, labels=c("r1", "r2", "r3", "r4")))
    c <- factor(gl(3, 1,   2*3*4, labels=c("c1", "c2", "c3")))
    t <- factor(gl(2, 3,   2*3*4, labels=c("t1", "t2")))
    # 3-way Chi squared test of independence
    s = summary(xtabs(data~r+c+t))
    print(s)
    """
    Routput = \
    """
    Call: xtabs(formula = data ~ r + c + t)
    Number of cases in table: 478
    Number of factors: 3
    Test for independence of all factors:
            Chisq = 102.17, df = 17, p-value = 3.514e-14
    """
    obs = np.array(
        [[[12, 34, 23],
          [35, 31, 11],
          [12, 32, 9],
          [12, 12, 14]],
         [[4, 47, 11],
          [34, 10, 18],
          [18, 13, 19],
          [9, 33, 25]]])
    chi2, p, dof, expected = chi2_contingency(obs)
    assert_approx_equal(chi2, 102.17, significant=5)
    assert_approx_equal(p, 3.514e-14, significant=4)
    assert_equal(dof, 17)
    Rcode = \
    """
    # Data vector.
    data <- c(
        #
        12, 17,
        11, 16,
        #
        11, 12,
        15, 16,
        #
        23, 15,
        30, 22,
        #
        14, 17,
        15, 16
        )
    # Create factor tags:r=rows, c=columns, d=depths(?), t=tiers
    r <- factor(gl(2, 2,  2*2*2*2, labels=c("r1", "r2")))
    c <- factor(gl(2, 1,  2*2*2*2, labels=c("c1", "c2")))
    d <- factor(gl(2, 4,  2*2*2*2, labels=c("d1", "d2")))
    t <- factor(gl(2, 8,  2*2*2*2, labels=c("t1", "t2")))
    # 4-way Chi squared test of independence
    s = summary(xtabs(data~r+c+d+t))
    print(s)
    """
    Routput = \
    """
    Call: xtabs(formula = data ~ r + c + d + t)
    Number of cases in table: 262
    Number of factors: 4
    Test for independence of all factors:
            Chisq = 8.758, df = 11, p-value = 0.6442
    """
    obs = np.array(
        [[[[12, 17],
           [11, 16]],
          [[11, 12],
           [15, 16]]],
         [[[23, 15],
           [30, 22]],
          [[14, 17],
           [15, 16]]]])
    chi2, p, dof, expected = chi2_contingency(obs)
    assert_approx_equal(chi2, 8.758, significant=4)
    assert_approx_equal(p, 0.6442, significant=4)
    assert_equal(dof, 11)
 def test_chi2_contingency_g():
    c = np.array([[15, 60], [15, 90]])
    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=False)
    assert_allclose(g, 2*xlogy(c, c/e).sum())
    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=True)
    c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]])
    assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum())
    c = np.array([[10, 12, 10], [12, 10, 10]])
    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood')
    assert_allclose(g, 2*xlogy(c, c/e).sum())
 def test_chi2_contingency_bad_args():
    # Test that "bad" inputs raise a ValueError.
    # Negative value in the array of observed frequencies.
    obs = np.array([[-1, 10], [1, 2]])
    assert_raises(ValueError, chi2_contingency, obs)
    # The zeros in this will result in zeros in the array
    # of expected frequencies.
    obs = np.array([[0, 1], [0, 1]])
    assert_raises(ValueError, chi2_contingency, obs)
    # A degenerate case: `observed` has size 0.
    obs = np.empty((0, 8))
    assert_raises(ValueError, chi2_contingency, obs)
 if __name__ == "__main__":
    run_module_suite()
--- a/pywafo/src/wafo/stats/tests/test_continuous_basic.py
+++ b/pywafo/src/wafo/stats/tests/test_continuous_basic.py
@ -2,11 +2,15 @@ from __future__ import division, print_function, absolute_import
 import warnings
 import numpy.testing as npt
 import numpy as np
-import nose
+import numpy.testing as npt
-from wafo import stats
+from scipy import integrate
 from scipy import stats
 from common_tests import (check_normalization, check_moment, check_mean_expect,
        check_var_expect, check_skew_expect, check_kurt_expect,
        check_entropy, check_private_entropy, NUMPY_BELOW_1_7,
        check_edge_support, check_named_args)
 """
 Test all continuous distributions.
@ -17,34 +21,25 @@ distributions so that we can perform further testing of class methods.
 These tests currently check only/mostly for serious errors and exceptions,
 not for numerically exact results.
 TODO:
 * make functioning test for skew and kurtosis
  still known failures - skip for now
 """
 #currently not used
 DECIMAL = 5  # specify the precision of the tests  # increased from 0 to 5
 DECIMAL_kurt = 0
 distcont = [
    ['alpha', (3.5704770516650459,)],
    ['anglit', ()],
    ['arcsine', ()],
    ['beta', (2.3098496451481823, 0.62687954300963677)],
-    ['betaprime', (5, 6)],   # avoid unbound error in entropy with (100, 86)],
+    ['betaprime', (5, 6)],
    ['bradford', (0.29891359763170633,)],
-    ['burr', (10.5, 4.3)],    #incorrect mean and var for(0.94839838075366045, 4.3820284068855795)],
+    ['burr', (10.5, 4.3)],
    ['cauchy', ()],
    ['chi', (78,)],
    ['chi2', (55,)],
    ['cosine', ()],
    ['dgamma', (1.1023326088288166,)],
    ['dweibull', (2.0685080649914673,)],
-    ['erlang', (20,)],    #correction numargs = 1
+    ['erlang', (10,)],
    ['expon', ()],
    ['exponpow', (2.697119160358469,)],
    ['exponweib', (2.8923945291034436, 1.9505288745913174)],
@ -59,7 +54,7 @@ distcont = [
    ['gausshyper', (13.763771604130699, 3.1189636648681431,
                    2.5145980350183019, 5.1811649903971615)],  # veryslow
    ['genexpon', (9.1325976465418908, 16.231956600590632, 3.2819552690843983)],
-    ['genextreme', (-0.1,)],  # sample mean test fails for (3.3184017469423535,)],
+    ['genextreme', (-0.1,)],
    ['gengamma', (4.4162385429431925, 3.1193091679242761)],
    ['genhalflogistic', (0.77274727809929322,)],
    ['genlogistic', (0.41192440799679475,)],
@ -72,9 +67,9 @@ distcont = [
    ['halflogistic', ()],
    ['halfnorm', ()],
    ['hypsecant', ()],
-    ['invgamma', (2.0668996136993067,)],
+    ['invgamma', (4.0668996136993067,)],
    ['invgauss', (0.14546264555347513,)],
-    ['invweibull', (10.58,)], # sample mean test fails at(0.58847112119264788,)]
+    ['invweibull', (10.58,)],
    ['johnsonsb', (4.3172675099141058, 3.1837781130785063)],
    ['johnsonsu', (2.554395574161155, 2.2482281679651965)],
    ['ksone', (1000,)],  # replace 22 by 100 to avoid failing range, ticket 956
@ -91,8 +86,7 @@ distcont = [
    ['lognorm', (0.95368226960575331,)],
    ['lomax', (1.8771398388773268,)],
    ['maxwell', ()],
-    ['mielke', (10.4, 3.6)], # sample mean test fails for (4.6420495492121487, 0.59707419545516938)],
+    ['mielke', (10.4, 3.6)],
                             # mielke: good results if 2nd parameter >2, weird mean or var below
    ['nakagami', (4.9673794866666237,)],
    ['ncf', (27, 27, 0.41578441799226107)],
    ['nct', (14, 0.24045031331198066)],
@ -105,8 +99,6 @@ distcont = [
    ['powernorm', (4.4453652254590779,)],
    ['rayleigh', ()],
    ['rdist', (0.9,)],   # feels also slow
 #    ['rdist', (3.8266985793976525,)],  #veryslow, especially rvs
    #['rdist', (541.0,)],   # from ticket #758    #veryslow
    ['recipinvgauss', (0.63004267809369119,)],
    ['reciprocal', (0.0062309367010521255, 1.0062309367010522)],
    ['rice', (0.7749725210111873,)],
@ -115,22 +107,36 @@ distcont = [
    ['triang', (0.15785029824528218,)],
    ['truncexpon', (4.6907725456810478,)],
    ['truncnorm', (-1.0978730080013919, 2.7306754109031979)],
    ['truncnorm', (0.1, 2.)],
    ['tukeylambda', (3.1321477856738267,)],
    ['uniform', ()],
    ['vonmises', (3.9939042581071398,)],
    ['vonmises_line', (3.9939042581071398,)],
    ['wald', ()],
    ['weibull_max', (2.8687961709100187,)],
    ['weibull_min', (1.7866166930421596,)],
    ['wrapcauchy', (0.031071279018614728,)]]
 ## Last four of these fail all around. Need to be checked
 distcont_extra = [
    ['betaprime', (100, 86)],
    ['fatiguelife', (5,)],
    ['mielke', (4.6420495492121487, 0.59707419545516938)],
    ['invweibull', (0.58847112119264788,)],
    # burr: sample mean test fails still for c<1
    ['burr', (0.94839838075366045, 4.3820284068855795)],
    # genextreme: sample mean test, sf-logsf test fail
    ['genextreme', (3.3184017469423535,)],
 ]
 # for testing only specific functions
-##distcont = [
+# distcont = [
 ##    ['erlang', (20,)],    #correction numargs = 1
 ##    ['fatiguelife', (29,)],   #correction numargs = 1
 ##    ['loggamma', (0.41411931826052117,)]]
 # for testing ticket:767
-##distcont = [
+# distcont = [
 ##    ['genextreme', (3.3184017469423535,)],
 ##    ['genextreme', (0.01,)],
 ##    ['genextreme', (0.00001,)],
@ -138,12 +144,12 @@ distcont = [
 ##    ['genextreme', (-0.01,)]
 ##    ]
-##distcont = [['gumbel_l', ()],
+# distcont = [['gumbel_l', ()],
 ##            ['gumbel_r', ()],
 ##            ['norm', ()]
 ##            ]
-##distcont = [['norm', ()]]
+# distcont = [['norm', ()]]
 distmissing = ['wald', 'gausshyper', 'genexpon', 'rv_continuous',
    'loglaplace', 'rdist', 'semicircular', 'invweibull', 'ksone',
@ -154,11 +160,14 @@ distmissing = ['wald', 'gausshyper', 'genexpon', 'rv_continuous',
 distmiss = [[dist,args] for dist,args in distcont if dist in distmissing]
 distslow = ['rdist', 'gausshyper', 'recipinvgauss', 'ksone', 'genexpon',
-            'vonmises', 'rice', 'mielke', 'semicircular', 'cosine', 'invweibull',
+            'vonmises', 'vonmises_line', 'mielke', 'semicircular',
-            'powerlognorm', 'johnsonsu', 'kstwobign']
+            'cosine', 'invweibull', 'powerlognorm', 'johnsonsu', 'kstwobign']
 # distslow are sorted by speed (very slow to slow)
 # NB: not needed anymore?
 def _silence_fp_errors(func):
    # warning: don't apply to test_ functions as is, then those will be skipped
    def wrap(*a, **kw):
        olderr = np.seterr(all='ignore')
        try:
@ -168,32 +177,27 @@ def _silence_fp_errors(func):
    wrap.__name__ = func.__name__
    return wrap
-@_silence_fp_errors
+
 def test_cont_basic():
    # this test skips slow distributions
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=integrate.IntegrationWarning)
        for distname, arg in distcont[:]:
            if distname in distslow:
                continue
            distfn = getattr(stats, distname)
            np.random.seed(765456)
-        sn = 1000
+            sn = 500
            rvs = distfn.rvs(size=sn, *arg)
            sm = rvs.mean()
            sv = rvs.var()
        skurt = stats.kurtosis(rvs)
        sskew = stats.skew(rvs)
            m, v = distfn.stats(*arg)
-        yield check_sample_meanvar_, distfn, arg, m, v, sm, sv, sn, distname + \
+            yield check_sample_meanvar_, distfn, arg, m, v, sm, sv, sn, \
-              'sample mean test'
+                   distname + 'sample mean test'
        # the sample skew kurtosis test has known failures, not very good distance measure
        #yield check_sample_skew_kurt, distfn, arg, sskew, skurt, distname
        yield check_moment, distfn, arg, m, v, distname
            yield check_cdf_ppf, distfn, arg, distname
            yield check_sf_isf, distfn, arg, distname
            yield check_pdf, distfn, arg, distname
        if distname in ['wald']:
            continue
            yield check_pdf_logpdf, distfn, arg, distname
            yield check_cdf_logcdf, distfn, arg, distname
            yield check_sf_logsf, distfn, arg, distname
@ -201,25 +205,47 @@ def test_cont_basic():
                alpha = 0.01
                yield check_distribution_rvs, distname, arg, alpha, rvs
            locscale_defaults = (0, 1)
            meths = [distfn.pdf, distfn.logpdf, distfn.cdf, distfn.logcdf,
                     distfn.logsf]
            # make sure arguments are within support
            spec_x = {'frechet_l': -0.5, 'weibull_max': -0.5, 'levy_l': -0.5,
                      'pareto': 1.5, 'tukeylambda': 0.3}
            x = spec_x.get(distname, 0.5)
            yield check_named_args, distfn, x, arg, locscale_defaults, meths
            # Entropy
            skp = npt.dec.skipif
            yield check_entropy, distfn, arg, distname
            if distfn.numargs == 0:
                yield skp(NUMPY_BELOW_1_7)(check_vecentropy), distfn, arg
            if distfn.__class__._entropy != stats.rv_continuous._entropy:
                yield check_private_entropy, distfn, arg, stats.rv_continuous
            yield check_edge_support, distfn, arg
            knf = npt.dec.knownfailureif
            yield knf(distname == 'truncnorm')(check_ppf_private), distfn, \
                      arg, distname
@npt.dec.slow
 def test_cont_basic_slow():
    # same as above for slow distributions
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=integrate.IntegrationWarning)
        for distname, arg in distcont[:]:
-        if distname not in distslow: continue
+            if distname not in distslow:
                continue
            distfn = getattr(stats, distname)
            np.random.seed(765456)
-        sn = 1000
+            sn = 500
            rvs = distfn.rvs(size=sn,*arg)
            sm = rvs.mean()
            sv = rvs.var()
        skurt = stats.kurtosis(rvs)
        sskew = stats.skew(rvs)
            m, v = distfn.stats(*arg)
-        yield check_sample_meanvar_, distfn, arg, m, v, sm, sv, sn, distname + \
+            yield check_sample_meanvar_, distfn, arg, m, v, sm, sv, sn, \
-              'sample mean test'
+                  distname + 'sample mean test'
        # the sample skew kurtosis test has known failures, not very good distance measure
        #yield check_sample_skew_kurt, distfn, arg, sskew, skurt, distname
        yield check_moment, distfn, arg, m, v, distname
            yield check_cdf_ppf, distfn, arg, distname
            yield check_sf_isf, distfn, arg, distname
            yield check_pdf, distfn, arg, distname
@ -231,99 +257,103 @@ def test_cont_basic_slow():
                alpha = 0.01
                yield check_distribution_rvs, distname, arg, alpha, rvs
-@_silence_fp_errors
+            locscale_defaults = (0, 1)
-def check_moment(distfn, arg, m, v, msg):
+            meths = [distfn.pdf, distfn.logpdf, distfn.cdf, distfn.logcdf,
-    m1  = distfn.moment(1,*arg)
+                     distfn.logsf]
-    m2  = distfn.moment(2,*arg)
+            # make sure arguments are within support
-    if not np.isinf(m):
+            x = 0.5
-        npt.assert_almost_equal(m1, m, decimal=10, err_msg= msg + \
+            if distname == 'invweibull':
-                            ' - 1st moment')
+                arg = (1,)
-    else:                     # or np.isnan(m1),
+            elif distname == 'ksone':
-        npt.assert_(np.isinf(m1),
+                arg = (3,)
-               msg + ' - 1st moment -infinite, m1=%s' % str(m1))
+            yield check_named_args, distfn, x, arg, locscale_defaults, meths
-        #np.isnan(m1) temporary special treatment for loggamma
+
-    if not np.isinf(v):
+            # Entropy
-        npt.assert_almost_equal(m2-m1*m1, v, decimal=10, err_msg= msg + \
+            skp = npt.dec.skipif
-                            ' - 2ndt moment')
+            ks_cond = distname in ['ksone', 'kstwobign']
-    else:                     #or np.isnan(m2),
+            yield skp(ks_cond)(check_entropy), distfn, arg, distname
-        npt.assert_(np.isinf(m2),
+
-               msg + ' - 2nd moment -infinite, m2=%s' % str(m2))
+            if distfn.numargs == 0:
-        #np.isnan(m2) temporary special treatment for loggamma
+                yield skp(NUMPY_BELOW_1_7)(check_vecentropy), distfn, arg
-
+            if distfn.__class__._entropy != stats.rv_continuous._entropy:
-@_silence_fp_errors
+                yield check_private_entropy, distfn, arg, stats.rv_continuous
            yield check_edge_support, distfn, arg
@npt.dec.slow
 def test_moments():
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=integrate.IntegrationWarning)
        knf = npt.dec.knownfailureif
        fail_normalization = set(['vonmises', 'ksone'])
        fail_higher = set(['vonmises', 'ksone', 'ncf'])
        for distname, arg in distcont[:]:
            distfn = getattr(stats, distname)
            m, v, s, k = distfn.stats(*arg, moments='mvsk')
            cond1, cond2 = distname in fail_normalization, distname in fail_higher
            msg = distname + ' fails moments'
            yield knf(cond1, msg)(check_normalization), distfn, arg, distname
            yield knf(cond2, msg)(check_mean_expect), distfn, arg, m, distname
            yield knf(cond2, msg)(check_var_expect), distfn, arg, m, v, distname
            yield knf(cond2, msg)(check_skew_expect), distfn, arg, m, v, s, \
                  distname
            yield knf(cond2, msg)(check_kurt_expect), distfn, arg, m, v, k, \
                  distname
            yield check_loc_scale, distfn, arg, m, v, distname
            yield check_moment, distfn, arg, m, v, distname
 def check_sample_meanvar_(distfn, arg, m, v, sm, sv, sn, msg):
    # this did not work, skipped silently by nose
    #check_sample_meanvar, sm, m, msg + 'sample mean test'
    #check_sample_meanvar, sv, v, msg + 'sample var test'
    if not np.isinf(m):
        check_sample_mean(sm, sv, sn, m)
    if not np.isinf(v):
        check_sample_var(sv, sn, v)
 ##    check_sample_meanvar( sm, m, msg + 'sample mean test')
 ##    check_sample_meanvar( sv, v, msg + 'sample var test')
 def check_sample_mean(sm,v,n, popmean):
    """
 from stats.stats.ttest_1samp(a, popmean):
 Calculates the t-obtained for the independent samples T-test on ONE group
 of scores a, given a population mean.
-Returns: t-value, two-tailed prob
+def check_sample_mean(sm,v,n, popmean):
-"""
+    # from stats.stats.ttest_1samp(a, popmean):
-##    a = asarray(a)
+    # Calculates the t-obtained for the independent samples T-test on ONE group
-##    x = np.mean(a)
+    # of scores a, given a population mean.
-##    v = np.var(a, ddof=1)
+    #
-##    n = len(a)
+    # Returns: t-value, two-tailed prob
    df = n-1
    svar = ((n-1)*v) / float(df)    # looks redundant
    t = (sm-popmean) / np.sqrt(svar*(1.0/n))
    prob = stats.betai(0.5*df, 0.5, df/(df+t*t))
    # return t,prob
-    npt.assert_(prob > 0.01, 'mean fail, t,prob = %f, %f, m,sm=%f,%f' % (t,prob,popmean,sm))
+    npt.assert_(prob > 0.01, 'mean fail, t,prob = %f, %f, m, sm=%f,%f' %
            (t, prob, popmean, sm))
 def check_sample_var(sv,n, popvar):
-    '''
+    # two-sided chisquare test for sample variance equal to hypothesized variance
 two-sided chisquare test for sample variance equal to hypothesized variance
    '''
    df = n-1
    chi2 = (n-1)*popvar/float(popvar)
    pval = stats.chisqprob(chi2,df)*2
-    npt.assert_(pval > 0.01, 'var fail, t,pval = %f, %f, v,sv=%f,%f' % (chi2,pval,popvar,sv))
+    npt.assert_(pval > 0.01, 'var fail, t, pval = %f, %f, v, sv=%f, %f' %
            (chi2,pval,popvar,sv))
 def check_sample_skew_kurt(distfn, arg, ss, sk, msg):
    skew,kurt = distfn.stats(moments='sk',*arg)
 ##    skew = distfn.stats(moment='s',*arg)[()]
 ##    kurt = distfn.stats(moment='k',*arg)[()]
    check_sample_meanvar( sk, kurt, msg + 'sample kurtosis test')
    check_sample_meanvar( ss, skew, msg + 'sample skew test')
 def check_sample_meanvar(sm,m,msg):
    if not np.isinf(m) and not np.isnan(m):
        npt.assert_almost_equal(sm, m, decimal=DECIMAL, err_msg= msg + \
                                ' - finite moment')
 ##    else:
 ##        npt.assert_(abs(sm) > 10000), msg='infinite moment, sm = ' + str(sm))
@_silence_fp_errors
 def check_cdf_ppf(distfn,arg,msg):
    values = [0.001, 0.5, 0.999]
    npt.assert_almost_equal(distfn.cdf(distfn.ppf(values, *arg), *arg),
-                            values, decimal=DECIMAL, err_msg= msg + \
+                            values, decimal=DECIMAL, err_msg=msg +
                            ' - cdf-ppf roundtrip')
-@_silence_fp_errors
+
 def check_sf_isf(distfn,arg,msg):
    npt.assert_almost_equal(distfn.sf(distfn.isf([0.1,0.5,0.9], *arg), *arg),
-                            [0.1,0.5,0.9], decimal=DECIMAL, err_msg= msg + \
+                            [0.1,0.5,0.9], decimal=DECIMAL, err_msg=msg +
                            ' - sf-isf roundtrip')
    npt.assert_almost_equal(distfn.cdf([0.1,0.9], *arg),
                            1.0-distfn.sf([0.1,0.9], *arg),
-                            decimal=DECIMAL, err_msg= msg + \
+                            decimal=DECIMAL, err_msg=msg +
                            ' - cdf-sf relationship')
-@_silence_fp_errors
+
 def check_pdf(distfn, arg, msg):
    # compares pdf at median with numerical derivative of cdf
    median = distfn.ppf(0.5, *arg)
@ -340,7 +370,7 @@ def check_pdf(distfn, arg, msg):
    npt.assert_almost_equal(pdfv, cdfdiff,
                decimal=DECIMAL, err_msg=msg + ' - cdf-pdf relationship')
-@_silence_fp_errors
+
 def check_pdf_logpdf(distfn, args, msg):
    # compares pdf at several points with the log of the pdf
    points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
@ -351,7 +381,7 @@ def check_pdf_logpdf(distfn, args, msg):
    logpdf = logpdf[np.isfinite(logpdf)]
    npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg + " - logpdf-log(pdf) relationship")
-@_silence_fp_errors
+
 def check_sf_logsf(distfn, args, msg):
    # compares sf at several points with the log of the sf
    points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
@ -362,7 +392,7 @@ def check_sf_logsf(distfn, args, msg):
    logsf = logsf[np.isfinite(logsf)]
    npt.assert_almost_equal(np.log(sf), logsf, decimal=7, err_msg=msg + " - logsf-log(sf) relationship")
-@_silence_fp_errors
+
 def check_cdf_logcdf(distfn, args, msg):
    # compares cdf at several points with the log of the cdf
    points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
@ -374,7 +404,6 @@ def check_cdf_logcdf(distfn, args, msg):
    npt.assert_almost_equal(np.log(cdf), logcdf, decimal=7, err_msg=msg + " - logcdf-log(cdf) relationship")
@_silence_fp_errors
 def check_distribution_rvs(dist, args, alpha, rvs):
    # test from scipy.stats.tests
    # this version reuses existing random variables
@ -385,6 +414,22 @@ def check_distribution_rvs(dist, args, alpha, rvs):
               "; alpha = " + str(alpha) + "\nargs = " + str(args))
 def check_vecentropy(distfn, args):
    npt.assert_equal(distfn.vecentropy(*args), distfn._entropy(*args))
@npt.dec.skipif(NUMPY_BELOW_1_7)
 def check_loc_scale(distfn, arg, m, v, msg):
    loc, scale = 10.0, 10.0
    mt, vt = distfn.stats(loc=loc, scale=scale, *arg)
    npt.assert_allclose(m*scale + loc, mt)
    npt.assert_allclose(v*scale*scale, vt)
 def check_ppf_private(distfn, arg, msg):
    #fails by design for truncnorm self.nb not defined
    ppfs = distfn._ppf(np.array([0.1, 0.5, 0.9]), *arg)
    npt.assert_(not np.any(np.isnan(ppfs)), msg + 'ppf private is nan')
 if __name__ == "__main__":
-    #nose.run(argv=['', __file__])
+    npt.run_module_suite()
    nose.runmodule(argv=[__file__,'-s'], exit=False)
--- a/pywafo/src/wafo/stats/tests/test_discrete_basic.py
+++ b/pywafo/src/wafo/stats/tests/test_discrete_basic.py
@ -1,10 +1,17 @@
 from __future__ import division, print_function, absolute_import
 import numpy.testing as npt
 import numpy as np
-import nose
+try:
-
+    from scipy.lib.six import xrange
-from wafo import stats
+except:
-
+    pass
-DECIMAL_meanvar = 0#1  # was 0
+from scipy import stats
 from .common_tests import (check_normalization, check_moment, check_mean_expect,
        check_var_expect, check_skew_expect, check_kurt_expect,
        check_entropy, check_private_entropy, check_edge_support,
        check_named_args)
 knf = npt.dec.knownfailureif
 distdiscrete = [
    ['bernoulli',(0.3,)],
@ -21,187 +28,115 @@ distdiscrete = [
    ['planck', (0.51,)],   # 4.1
    ['poisson', (0.6,)],
    ['randint', (7, 31)],
-    ['skellam',  (15, 8)]]
+    ['skellam', (15, 8)],
-#    ['zipf',     (4,)] ]   # arg=4 is ok,
+    ['zipf', (6.5,)]
-                           # Zipf broken for arg = 2, e.g. weird .stats
+]
                           # looking closer, mean, var should be inf for arg=2
 #@npt.dec.slow
 def test_discrete_basic():
    for distname, arg in distdiscrete:
        distfn = getattr(stats, distname)
        #assert stats.dlaplace.rvs(0.8) is not None
        np.random.seed(9765456)
        rvs = distfn.rvs(size=2000, *arg)
        supp = np.unique(rvs)
        m, v = distfn.stats(*arg)
-        #yield npt.assert_almost_equal(rvs.mean(), m, decimal=4,err_msg='mean')
+        yield check_cdf_ppf, distfn, arg, supp, distname + ' cdf_ppf'
-        #yield npt.assert_almost_equal, rvs.mean(), m, 2, 'mean' # does not work
+
-        yield check_sample_meanvar, rvs.mean(), m, distname + ' sample mean test'
+        yield check_pmf_cdf, distfn, arg, distname
-        yield check_sample_meanvar, rvs.var(), v, distname + ' sample var test'
+        yield check_oth, distfn, arg, supp, distname + ' oth'
-        yield check_cdf_ppf, distfn, arg, distname + ' cdf_ppf'
+        yield check_edge_support, distfn, arg
-        yield check_cdf_ppf2, distfn, arg, supp, distname + ' cdf_ppf'
+
        yield check_pmf_cdf, distfn, arg, distname + ' pmf_cdf'
        # zipf doesn't fail, but generates floating point warnings.
        # Should be checked.
        if not distname in ['zipf']:
            yield check_oth, distfn, arg, distname + ' oth'
            skurt = stats.kurtosis(rvs)
            sskew = stats.skew(rvs)
            yield check_sample_skew_kurt, distfn, arg, skurt, sskew, \
                          distname + ' skew_kurt'
        # dlaplace doesn't fail, but generates lots of floating point warnings.
        # Should be checked.
        if not distname in ['dlaplace']: #['logser']:  #known failure, fixed
        alpha = 0.01
        yield check_discrete_chisquare, distfn, arg, rvs, alpha, \
                      distname + ' chisquare'
-@npt.dec.slow
+    seen = set()
 def test_discrete_extra():
    for distname, arg in distdiscrete:
        if distname in seen:
            continue
        seen.add(distname)
        distfn = getattr(stats,distname)
-        yield check_ppf_limits, distfn, arg, distname + \
+        locscale_defaults = (0,)
-              ' ppf limit test'
+        meths = [distfn.pmf, distfn.logpmf, distfn.cdf, distfn.logcdf,
-        yield check_isf_limits, distfn, arg, distname + \
+                 distfn.logsf]
-              ' isf limit test'
+        # make sure arguments are within support
-        yield check_entropy, distfn, arg, distname + \
+        spec_k = {'randint': 11, 'hypergeom': 4, 'bernoulli': 0, }
-              ' entropy nan test'
+        k = spec_k.get(distname, 1)
-
+        yield check_named_args, distfn, k, arg, locscale_defaults, meths
-@npt.dec.skipif(True)
+        yield check_scale_docstring, distfn
-def test_discrete_private():
+
-    #testing private methods mostly for debugging
+        # Entropy
-    #   some tests might fail by design,
+        yield check_entropy, distfn, arg, distname
-    #   e.g. incorrect definition of distfn.a and distfn.b
+        if distfn.__class__._entropy != stats.rv_discrete._entropy:
            yield check_private_entropy, distfn, arg, stats.rv_discrete
 def test_moments():
    for distname, arg in distdiscrete:
        distfn = getattr(stats,distname)
-        rvs = distfn.rvs(size=10000,*arg)
+        m, v, s, k = distfn.stats(*arg, moments='mvsk')
-        m,v = distfn.stats(*arg)
+        yield check_normalization, distfn, arg, distname
        yield check_ppf_ppf, distfn, arg
        yield check_cdf_ppf_private, distfn, arg, distname
        yield check_generic_moment, distfn, arg, m, 1, 3   # last is decimal
        yield check_generic_moment, distfn, arg, v+m*m, 2, 3 # last is decimal
        yield check_moment_frozen, distfn, arg, m, 1, 3   # last is decimal
        yield check_moment_frozen, distfn, arg, v+m*m, 2, 3 # last is decimal
        # compare `stats` and `moment` methods
        yield check_moment, distfn, arg, m, v, distname
        yield check_mean_expect, distfn, arg, m, distname
        yield check_var_expect, distfn, arg, m, v, distname
        yield check_skew_expect, distfn, arg, m, v, s, distname
-def check_sample_meanvar(sm,m,msg):
+        cond = distname in ['zipf']
-    if not np.isinf(m):
+        msg = distname + ' fails kurtosis'
-        npt.assert_almost_equal(sm, m, decimal=DECIMAL_meanvar, err_msg=msg + \
+        yield knf(cond, msg)(check_kurt_expect), distfn, arg, m, v, k, distname
                                ' - finite moment')
    else:
        npt.assert_(sm > 10000, msg='infinite moment, sm = ' + str(sm))
-def check_sample_var(sm,m,msg):
+        # frozen distr moments
-    npt.assert_almost_equal(sm, m, decimal=DECIMAL_meanvar, err_msg= msg + 'var')
+        yield check_moment_frozen, distfn, arg, m, 1
        yield check_moment_frozen, distfn, arg, v+m*m, 2
 def check_cdf_ppf(distfn,arg,msg):
    ppf05 = distfn.ppf(0.5,*arg)
    cdf05 = distfn.cdf(ppf05,*arg)
    npt.assert_almost_equal(distfn.ppf(cdf05-1e-6,*arg),ppf05,
                            err_msg=msg + 'ppf-cdf-median')
    npt.assert_((distfn.ppf(cdf05+1e-4,*arg)>ppf05), msg + 'ppf-cdf-next')
-def check_cdf_ppf2(distfn,arg,supp,msg):
+def check_cdf_ppf(distfn, arg, supp, msg):
    # cdf is a step function, and ppf(q) = min{k : cdf(k) >= q, k integer}
    npt.assert_array_equal(distfn.ppf(distfn.cdf(supp, *arg), *arg),
                           supp, msg + '-roundtrip')
    npt.assert_array_equal(distfn.ppf(distfn.cdf(supp, *arg) - 1e-8, *arg),
                           supp, msg + '-roundtrip')
    supp1 = supp[supp < distfn.b]
    npt.assert_array_equal(distfn.ppf(distfn.cdf(supp1, *arg) + 1e-8, *arg),
                     supp1 + distfn.inc, msg + 'ppf-cdf-next')
    # -1e-8 could cause an error if pmf < 1e-8
-def check_cdf_ppf_private(distfn,arg,msg):
+def check_pmf_cdf(distfn, arg, distname):
-    ppf05 = distfn._ppf(0.5,*arg)
+    startind = np.int(distfn.ppf(0.01, *arg) - 1)
-    cdf05 = distfn.cdf(ppf05,*arg)
+    index = list(range(startind, startind + 10))
-    npt.assert_almost_equal(distfn._ppf(cdf05-1e-6,*arg),ppf05,
+    cdfs, pmfs_cum = distfn.cdf(index,*arg), distfn.pmf(index, *arg).cumsum()
-                            err_msg=msg + '_ppf-cdf-median ')
+
-    npt.assert_((distfn._ppf(cdf05+1e-4,*arg)>ppf05), msg + '_ppf-cdf-next')
+    atol, rtol = 1e-10, 1e-10
-
+    if distname == 'skellam':    # ncx2 accuracy
-def check_ppf_ppf(distfn, arg):
+        atol, rtol = 1e-5, 1e-5
-    npt.assert_(distfn.ppf(0.5,*arg) < np.inf)
+    npt.assert_allclose(cdfs - cdfs[0], pmfs_cum - pmfs_cum[0],
-    ppfs = distfn.ppf([0.5,0.9],*arg)
+            atol=atol, rtol=rtol)
-    ppf_s = [distfn._ppf(0.5,*arg), distfn._ppf(0.9,*arg)]
+
-    npt.assert_(np.all(ppfs < np.inf))
+
-    npt.assert_(ppf_s[0] == distfn.ppf(0.5,*arg))
+def check_moment_frozen(distfn, arg, m, k):
-    npt.assert_(ppf_s[1] == distfn.ppf(0.9,*arg))
+    npt.assert_allclose(distfn(*arg).moment(k), m,
-    npt.assert_(ppf_s[0] == ppfs[0])
+            atol=1e-10, rtol=1e-10)
-    npt.assert_(ppf_s[1] == ppfs[1])
+
-
+
-def check_pmf_cdf(distfn, arg, msg):
+def check_oth(distfn, arg, supp, msg):
    startind = np.int(distfn._ppf(0.01,*arg)-1)
    index = range(startind,startind+10)
    cdfs = distfn.cdf(index,*arg)
    npt.assert_almost_equal(cdfs, distfn.pmf(index, *arg).cumsum() + \
                            cdfs[0] - distfn.pmf(index[0],*arg),
                            decimal=4, err_msg= msg + 'pmf-cdf')
 def check_generic_moment(distfn, arg, m, k, decim):
    npt.assert_almost_equal(distfn.generic_moment(k,*arg), m, decimal=decim,
                            err_msg= str(distfn) + ' generic moment test')
 def check_moment_frozen(distfn, arg, m, k, decim):
    npt.assert_almost_equal(distfn(*arg).moment(k), m, decimal=decim,
                            err_msg= str(distfn) + ' frozen moment test')
 def check_oth(distfn, arg, msg):
    # checking other methods of distfn
-    meanint = round(float(distfn.stats(*arg)[0])) # closest integer to mean
+    npt.assert_allclose(distfn.sf(supp, *arg), 1. - distfn.cdf(supp, *arg),
-    npt.assert_almost_equal(distfn.sf(meanint, *arg), 1 - \
+            atol=1e-10, rtol=1e-10)
-                            distfn.cdf(meanint, *arg), decimal=8)
+
-    median_sf = distfn.isf(0.5, *arg)
+    q = np.linspace(0.01, 0.99, 20)
    npt.assert_allclose(distfn.isf(q, *arg), distfn.ppf(1. - q, *arg),
            atol=1e-10, rtol=1e-10)
    median_sf = distfn.isf(0.5, *arg)
    npt.assert_(distfn.sf(median_sf - 1, *arg) > 0.5)
    npt.assert_(distfn.cdf(median_sf + 1, *arg) > 0.5)
    npt.assert_equal(distfn.isf(0.5, *arg), distfn.ppf(0.5, *arg))
 #next 3 functions copied from test_continous_extra
 #    adjusted
 def check_ppf_limits(distfn,arg,msg):
    below,low,upp,above = distfn.ppf([-1,0,1,2], *arg)
    #print distfn.name, distfn.a, low, distfn.b, upp
    #print distfn.name,below,low,upp,above
    assert_equal_inf_nan(distfn.a-1,low, msg + 'ppf lower bound')
    assert_equal_inf_nan(distfn.b,upp, msg + 'ppf upper bound')
    npt.assert_(np.isnan(below), msg + 'ppf out of bounds - below')
    npt.assert_(np.isnan(above), msg + 'ppf out of bounds - above')
 def check_isf_limits(distfn,arg,msg):
    below,low,upp,above = distfn.isf([-1,0,1,2], *arg)
    #print distfn.name, distfn.a, low, distfn.b, upp
    #print distfn.name,below,low,upp,above
    assert_equal_inf_nan(distfn.a-1,upp, msg + 'isf lower bound')
    assert_equal_inf_nan(distfn.b,low, msg + 'isf upper bound')
    npt.assert_(np.isnan(below), msg + 'isf out of bounds - below')
    npt.assert_(np.isnan(above), msg + 'isf out of bounds - above')
 def assert_equal_inf_nan(v1,v2,msg):
    npt.assert_(not np.isnan(v1))
    if not np.isinf(v1):
        npt.assert_almost_equal(v1, v2, decimal=10, err_msg = msg + \
                                   ' - finite')
    else:
        npt.assert_(np.isinf(v2) or np.isnan(v2),
               msg + ' - infinite, v2=%s' % str(v2))
 def check_sample_skew_kurt(distfn, arg, sk, ss, msg):
    k,s = distfn.stats(moment='ks',*arg)
    check_sample_meanvar, sk, k, msg + 'sample skew test'
    check_sample_meanvar, ss, s, msg + 'sample kurtosis test'
 def check_entropy(distfn,arg,msg):
    ent = distfn.entropy(*arg)
    #print 'Entropy =', ent
    npt.assert_(not np.isnan(ent), msg + 'test Entropy is nan')
 def check_discrete_chisquare(distfn, arg, rvs, alpha, msg):
-    '''perform chisquare test for random sample of a discrete distribution
+    """Perform chisquare test for random sample of a discrete distribution
    Parameters
    ----------
@ -218,20 +153,14 @@ def check_discrete_chisquare(distfn, arg, rvs, alpha, msg):
        0 if test passes, 1 if test fails
    uses global variable debug for printing results
    '''
-    # define parameters for test
+    """
 ##    n=2000
    n = len(rvs)
    nsupp = 20
    wsupp = 1.0/nsupp
 ##    distfn = getattr(stats, distname)
 ##    np.random.seed(9765456)
 ##    rvs = distfn.rvs(size=n,*arg)
    # construct intervals with minimum mass 1/nsupp
-    # intervalls are left-half-open as in a cdf difference
+    # intervals are left-half-open as in a cdf difference
    distsupport = xrange(max(distfn.a, -1000), min(distfn.b, 1000) + 1)
    last = 0
    distsupp = [max(distfn.a, -1000)]
@ -263,6 +192,11 @@ def check_discrete_chisquare(distfn, arg, rvs, alpha, msg):
           ' at arg = %s with pval = %s' % (msg,str(arg),str(pval)))
 def check_scale_docstring(distfn):
    if distfn.__doc__ is not None:
        # Docstrings can be stripped if interpreter is run with -OO
        npt.assert_('scale' not in distfn.__doc__)
 if __name__ == "__main__":
-    #nose.run(argv=['', __file__])
+    npt.run_module_suite()
    nose.runmodule(argv=[__file__,'-s'], exit=False)
--- a/pywafo/src/wafo/stats/tests/test_distributions.py
+++ b/pywafo/src/wafo/stats/tests/test_distributions.py
--- a/pywafo/src/wafo/stats/tests/test_fit.py
+++ b/pywafo/src/wafo/stats/tests/test_fit.py
@ -1,88 +1,95 @@
 # NOTE: contains only one test, _est_cont_fit, that is renamed so that
 #       nose doesn't run it
 # I put this here for the record and for the case when someone wants to
 # verify the quality of fit
 # with current parameters: relatively small sample size, default starting values
 #       Ran 84 tests in 401.797s
 #       FAILED (failures=15)
 #
 #Ran 83 tests in 238.859s
 #FAILED (failures=12)
 from __future__ import division, print_function, absolute_import
-import numpy.testing as npt
+import os
 import numpy as np
 from numpy.testing import dec
 from wafo import stats
-from test_continuous_basic import distcont
+from .test_continuous_basic import distcont
 # this is not a proper statistical test for convergence, but only
 # verifies that the estimate and true values don't differ by too much
-n_repl1 = 1000 # sample size for first run
+
-n_repl2 = 5000 # sample size for second run, if first run fails
+fit_sizes = [1000, 5000]  # sample sizes to try
 thresh_percent = 0.25  # percent of true parameters for fail cut-off
 thresh_min = 0.75  # minimum difference estimate - true to fail test
-
+failing_fits = [
-distslow = [ 'ncx2', 'rdist', 'gausshyper', 'recipinvgauss', 'ksone', 'genexpon',
+        'burr',
-            'vonmises', 'rice', 'mielke',
+        'chi',
-            'powerlognorm', 'kstwobign', 'tukeylambda','betaprime', 'gengamma', 
+        'chi2',
-             'johnsonsb', 'burr', 'truncexpon', 'pearson3', 'exponweib', 'nakagami', 
+        'gausshyper',
-             'wrapcauchy']
+        'genexpon',
-dist_rarely_fitted = ['f', 'ncf', 'nct', 'chi']
+        'gengamma',
-distskip = distslow + dist_rarely_fitted    
+        'ksone',
-
+        'mielke',
-#distcont = [['genextreme', (3.3184017469423535,)]]
+        'ncf',
-#@npt.dec.slow
+        'ncx2',
        'pearson3',
        'powerlognorm',
        'truncexpon',
        'tukeylambda',
        'vonmises',
        'wrapcauchy',
 ]
 # Don't run the fit test on these:
 skip_fit = [
    'erlang',  # Subclass of gamma, generates a warning.
 ]
@dec.slow
 def test_cont_fit():
    # this tests the closeness of the estimated parameters to the true
    # parameters with fit method of continuous distributions 
    for distname, arg in distcont:
        if distname not in distskip:
            yield check_cont_fit, distname,arg
@npt.dec.slow
 def _est_cont_fit_slow():
    # this tests the closeness of the estimated parameters to the true
    # parameters with fit method of continuous distributions
    # Note: is slow, some distributions don't converge with sample size <= 10000
    for distname, arg in distcont:
-        if distname in distslow:
+        if distname not in skip_fit:
            yield check_cont_fit, distname,arg
 def test_lognorm_fit_ticket1131():
    params = [(2.1, 1.,1.), (1.,10.,1.), (1.,1.,10.)]
    for param in params:
        yield check_cont_fit, 'lognorm', param
 def check_cont_fit(distname,arg):
-    distfn = getattr(stats, distname)
+    if distname in failing_fits:
-    rvs = distfn.rvs(size=n_repl1,*arg)
+        # Skip failing fits unless overridden
-    est = distfn.fit(rvs) #, *arg) # start with default values
+        xfail = True
-    n = distfn.numargs + 2
+        try:
-    truearg = np.hstack([arg,[0.0, 1.0]])[:n]
+            xfail = not int(os.environ['SCIPY_XFAIL'])
        except:
            pass
        if xfail:
            msg = "Fitting %s doesn't work reliably yet" % distname
            msg += " [Set environment variable SCIPY_XFAIL=1 to run this test nevertheless.]"
            dec.knownfailureif(True, msg)(lambda: None)()
-    diff = est-truearg
+    distfn = getattr(stats, distname)
-    txt = ''
+    truearg = np.hstack([arg,[0.0,1.0]])
    diffthreshold = np.max(np.vstack([truearg*thresh_percent,
                                      np.ones(distfn.numargs+2)*thresh_min]),0)
    for fit_size in fit_sizes:
        # Note that if a fit succeeds, the other fit_sizes are skipped
        np.random.seed(1234)
        with np.errstate(all='ignore'):
            rvs = distfn.rvs(size=fit_size, *arg)
            est = distfn.fit(rvs)  # start with default values
        diff = est - truearg
        # threshold for location
        diffthreshold[-2] = np.max([np.abs(rvs.mean())*thresh_percent,thresh_min])
        if np.any(np.isnan(est)):
            raise AssertionError('nan returned in fit')
        else:
-        if np.any((np.abs(diff) - diffthreshold) > 0.0):
+            if np.all(np.abs(diff) <= diffthreshold):
-##            txt = 'WARNING - diff too large with small sample'
+                break
-##            print 'parameter diff =', diff - diffthreshold, txt
+    else:
            rvs = np.concatenate([rvs,distfn.rvs(size=n_repl2-n_repl1,*arg)])
            est = distfn.fit(rvs) #,*arg)
            truearg = np.hstack([arg,[0.0,1.0]])[:n]
            diff = est-truearg
            if np.any((np.abs(diff) - diffthreshold) > 0.0):
        txt = 'parameter: %s\n' % str(truearg)
        txt += 'estimated: %s\n' % str(est)
        txt += 'diff     : %s\n' % str(diff)
@ -90,9 +97,4 @@ def check_cont_fit(distname,arg):
 if __name__ == "__main__":
-    check_cont_fit('bradford', (0.29891359763170633,))
+    np.testing.run_module_suite()
 #    check_cont_fit('lognorm', (10,1,1))
 #    check_cont_fit('ncx2', (21, 1.0560465975116415))
    import nose
    #nose.run(argv=['', __file__])
    nose.runmodule(argv=[__file__,'-s'], exit=False)
--- a/pywafo/src/wafo/stats/tests/test_kdeoth.py
+++ b/pywafo/src/wafo/stats/tests/test_kdeoth.py
@ -1,9 +1,10 @@
-
+from __future__ import division, print_function, absolute_import
 from wafo import stats
 import numpy as np
-from numpy.testing import assert_almost_equal, assert_
+from numpy.testing import assert_almost_equal, assert_, assert_raises, \
    assert_array_almost_equal, assert_array_almost_equal_nulp, run_module_suite
 def test_kde_1d():
    #some basic tests comparing to normal distribution
@ -16,7 +17,7 @@ def test_kde_1d():
    # get kde for original sample
    gkde = stats.gaussian_kde(xn)
-    # evaluate the density funtion for the kde for some points
+    # evaluate the density function for the kde for some points
    xs = np.linspace(-7,7,501)
    kdepdf = gkde.evaluate(xs)
    normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
@ -34,3 +35,150 @@ def test_kde_1d():
                        (kdepdf**2).sum()*intervall, decimal=2)
    assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
                        (kdepdf*normpdf).sum()*intervall, decimal=2)
 def test_kde_bandwidth_method():
    def scotts_factor(kde_obj):
        """Same as default, just check that it works."""
        return np.power(kde_obj.n, -1./(kde_obj.d+4))
    np.random.seed(8765678)
    n_basesample = 50
    xn = np.random.randn(n_basesample)
    # Default
    gkde = stats.gaussian_kde(xn)
    # Supply a callable
    gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
    # Supply a scalar
    gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
    xs = np.linspace(-7,7,51)
    kdepdf = gkde.evaluate(xs)
    kdepdf2 = gkde2.evaluate(xs)
    assert_almost_equal(kdepdf, kdepdf2)
    kdepdf3 = gkde3.evaluate(xs)
    assert_almost_equal(kdepdf, kdepdf3)
    assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
 # Subclasses that should stay working (extracted from various sources).
 # Unfortunately the earlier design of gaussian_kde made it necessary for users
 # to create these kinds of subclasses, or call _compute_covariance() directly.
 class _kde_subclass1(stats.gaussian_kde):
    def __init__(self, dataset):
        self.dataset = np.atleast_2d(dataset)
        self.d, self.n = self.dataset.shape
        self.covariance_factor = self.scotts_factor
        self._compute_covariance()
 class _kde_subclass2(stats.gaussian_kde):
    def __init__(self, dataset):
        self.covariance_factor = self.scotts_factor
        super(_kde_subclass2, self).__init__(dataset)
 class _kde_subclass3(stats.gaussian_kde):
    def __init__(self, dataset, covariance):
        self.covariance = covariance
        stats.gaussian_kde.__init__(self, dataset)
    def _compute_covariance(self):
        self.inv_cov = np.linalg.inv(self.covariance)
        self._norm_factor = np.sqrt(np.linalg.det(2*np.pi * self.covariance)) \
                                   * self.n
 class _kde_subclass4(stats.gaussian_kde):
    def covariance_factor(self):
        return 0.5 * self.silverman_factor()
 def test_gaussian_kde_subclassing():
    x1 = np.array([-7, -5, 1, 4, 5], dtype=np.float)
    xs = np.linspace(-10, 10, num=50)
    # gaussian_kde itself
    kde = stats.gaussian_kde(x1)
    ys = kde(xs)
    # subclass 1
    kde1 = _kde_subclass1(x1)
    y1 = kde1(xs)
    assert_array_almost_equal_nulp(ys, y1, nulp=10)
    # subclass 2
    kde2 = _kde_subclass2(x1)
    y2 = kde2(xs)
    assert_array_almost_equal_nulp(ys, y2, nulp=10)
    # subclass 3
    kde3 = _kde_subclass3(x1, kde.covariance)
    y3 = kde3(xs)
    assert_array_almost_equal_nulp(ys, y3, nulp=10)
    # subclass 4
    kde4 = _kde_subclass4(x1)
    y4 = kde4(x1)
    y_expected = [0.06292987, 0.06346938, 0.05860291, 0.08657652, 0.07904017]
    assert_array_almost_equal(y_expected, y4, decimal=6)
    # Not a subclass, but check for use of _compute_covariance()
    kde5 = kde
    kde5.covariance_factor = lambda: kde.factor
    kde5._compute_covariance()
    y5 = kde5(xs)
    assert_array_almost_equal_nulp(ys, y5, nulp=10)
 def test_gaussian_kde_covariance_caching():
    x1 = np.array([-7, -5, 1, 4, 5], dtype=np.float)
    xs = np.linspace(-10, 10, num=5)
    # These expected values are from scipy 0.10, before some changes to
    # gaussian_kde.  They were not compared with any external reference.
    y_expected = [0.02463386, 0.04689208, 0.05395444, 0.05337754, 0.01664475]
    # Set the bandwidth, then reset it to the default.
    kde = stats.gaussian_kde(x1)
    kde.set_bandwidth(bw_method=0.5)
    kde.set_bandwidth(bw_method='scott')
    y2 = kde(xs)
    assert_array_almost_equal(y_expected, y2, decimal=7)
 def test_gaussian_kde_monkeypatch():
    """Ugly, but people may rely on this.  See scipy pull request 123,
    specifically the linked ML thread "Width of the Gaussian in stats.kde".
    If it is necessary to break this later on, that is to be discussed on ML.
    """
    x1 = np.array([-7, -5, 1, 4, 5], dtype=np.float)
    xs = np.linspace(-10, 10, num=50)
    # The old monkeypatched version to get at Silverman's Rule.
    kde = stats.gaussian_kde(x1)
    kde.covariance_factor = kde.silverman_factor
    kde._compute_covariance()
    y1 = kde(xs)
    # The new saner version.
    kde2 = stats.gaussian_kde(x1, bw_method='silverman')
    y2 = kde2(xs)
    assert_array_almost_equal_nulp(y1, y2, nulp=10)
 def test_kde_integer_input():
    """Regression test for #1181."""
    x1 = np.arange(5)
    kde = stats.gaussian_kde(x1)
    y_expected = [0.13480721, 0.18222869, 0.19514935, 0.18222869, 0.13480721]
    assert_array_almost_equal(kde(x1), y_expected, decimal=6)
 if __name__ == "__main__":
    run_module_suite()
--- a/pywafo/src/wafo/stats/tests/test_morestats.py
+++ b/pywafo/src/wafo/stats/tests/test_morestats.py
@ -0,0 +1,789 @@
 # Author:  Travis Oliphant, 2002
 #
 # Further enhancements and tests added by numerous SciPy developers.
 #
 from __future__ import division, print_function, absolute_import
 import warnings
 import numpy as np
 from numpy.random import RandomState
 from numpy.testing import (TestCase, run_module_suite, assert_array_equal,
    assert_almost_equal, assert_array_less, assert_array_almost_equal,
    assert_raises, assert_, assert_allclose, assert_equal, dec)
 from scipy import stats
 # Matplotlib is not a scipy dependency but is optionally used in probplot, so
 # check if it's available
 try:
    import matplotlib.pyplot as plt
    have_matplotlib = True
 except:
    have_matplotlib = False
 g1 = [1.006, 0.996, 0.998, 1.000, 0.992, 0.993, 1.002, 0.999, 0.994, 1.000]
 g2 = [0.998, 1.006, 1.000, 1.002, 0.997, 0.998, 0.996, 1.000, 1.006, 0.988]
 g3 = [0.991, 0.987, 0.997, 0.999, 0.995, 0.994, 1.000, 0.999, 0.996, 0.996]
 g4 = [1.005, 1.002, 0.994, 1.000, 0.995, 0.994, 0.998, 0.996, 1.002, 0.996]
 g5 = [0.998, 0.998, 0.982, 0.990, 1.002, 0.984, 0.996, 0.993, 0.980, 0.996]
 g6 = [1.009, 1.013, 1.009, 0.997, 0.988, 1.002, 0.995, 0.998, 0.981, 0.996]
 g7 = [0.990, 1.004, 0.996, 1.001, 0.998, 1.000, 1.018, 1.010, 0.996, 1.002]
 g8 = [0.998, 1.000, 1.006, 1.000, 1.002, 0.996, 0.998, 0.996, 1.002, 1.006]
 g9 = [1.002, 0.998, 0.996, 0.995, 0.996, 1.004, 1.004, 0.998, 0.999, 0.991]
 g10 = [0.991, 0.995, 0.984, 0.994, 0.997, 0.997, 0.991, 0.998, 1.004, 0.997]
 class TestShapiro(TestCase):
    def test_basic(self):
        x1 = [0.11,7.87,4.61,10.14,7.95,3.14,0.46,
              4.43,0.21,4.75,0.71,1.52,3.24,
              0.93,0.42,4.97,9.53,4.55,0.47,6.66]
        w,pw = stats.shapiro(x1)
        assert_almost_equal(w,0.90047299861907959,6)
        assert_almost_equal(pw,0.042089745402336121,6)
        x2 = [1.36,1.14,2.92,2.55,1.46,1.06,5.27,-1.11,
              3.48,1.10,0.88,-0.51,1.46,0.52,6.20,1.69,
              0.08,3.67,2.81,3.49]
        w,pw = stats.shapiro(x2)
        assert_almost_equal(w,0.9590270,6)
        assert_almost_equal(pw,0.52460,3)
    def test_bad_arg(self):
        # Length of x is less than 3.
        x = [1]
        assert_raises(ValueError, stats.shapiro, x)
 class TestAnderson(TestCase):
    def test_normal(self):
        rs = RandomState(1234567890)
        x1 = rs.standard_exponential(size=50)
        x2 = rs.standard_normal(size=50)
        A,crit,sig = stats.anderson(x1)
        assert_array_less(crit[:-1], A)
        A,crit,sig = stats.anderson(x2)
        assert_array_less(A, crit[-2:])
    def test_expon(self):
        rs = RandomState(1234567890)
        x1 = rs.standard_exponential(size=50)
        x2 = rs.standard_normal(size=50)
        A,crit,sig = stats.anderson(x1,'expon')
        assert_array_less(A, crit[-2:])
        olderr = np.seterr(all='ignore')
        try:
            A,crit,sig = stats.anderson(x2,'expon')
        finally:
            np.seterr(**olderr)
        assert_(A > crit[-1])
    def test_bad_arg(self):
        assert_raises(ValueError, stats.anderson, [1], dist='plate_of_shrimp')
 class TestAnsari(TestCase):
    def test_small(self):
        x = [1,2,3,3,4]
        y = [3,2,6,1,6,1,4,1]
        W, pval = stats.ansari(x,y)
        assert_almost_equal(W,23.5,11)
        assert_almost_equal(pval,0.13499256881897437,11)
    def test_approx(self):
        ramsay = np.array((111, 107, 100, 99, 102, 106, 109, 108, 104, 99,
                           101, 96, 97, 102, 107, 113, 116, 113, 110, 98))
        parekh = np.array((107, 108, 106, 98, 105, 103, 110, 105, 104,
                           100, 96, 108, 103, 104, 114, 114, 113, 108, 106, 99))
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore',
                        message="Ties preclude use of exact statistic.")
            W, pval = stats.ansari(ramsay, parekh)
        assert_almost_equal(W,185.5,11)
        assert_almost_equal(pval,0.18145819972867083,11)
    def test_exact(self):
        W,pval = stats.ansari([1,2,3,4],[15,5,20,8,10,12])
        assert_almost_equal(W,10.0,11)
        assert_almost_equal(pval,0.533333333333333333,7)
    def test_bad_arg(self):
        assert_raises(ValueError, stats.ansari, [], [1])
        assert_raises(ValueError, stats.ansari, [1], [])
 class TestBartlett(TestCase):
    def test_data(self):
        args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
        T, pval = stats.bartlett(*args)
        assert_almost_equal(T,20.78587342806484,7)
        assert_almost_equal(pval,0.0136358632781,7)
    def test_bad_arg(self):
        # Too few args raises ValueError.
        assert_raises(ValueError, stats.bartlett, [1])
 class TestLevene(TestCase):
    def test_data(self):
        args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
        W, pval = stats.levene(*args)
        assert_almost_equal(W,1.7059176930008939,7)
        assert_almost_equal(pval,0.0990829755522,7)
    def test_trimmed1(self):
        # Test that center='trimmed' gives the same result as center='mean'
        # when proportiontocut=0.
        W1, pval1 = stats.levene(g1, g2, g3, center='mean')
        W2, pval2 = stats.levene(g1, g2, g3, center='trimmed', proportiontocut=0.0)
        assert_almost_equal(W1, W2)
        assert_almost_equal(pval1, pval2)
    def test_trimmed2(self):
        x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0]
        y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0]
        np.random.seed(1234)
        x2 = np.random.permutation(x)
        # Use center='trimmed'
        W0, pval0 = stats.levene(x, y, center='trimmed', proportiontocut=0.125)
        W1, pval1 = stats.levene(x2, y, center='trimmed', proportiontocut=0.125)
        # Trim the data here, and use center='mean'
        W2, pval2 = stats.levene(x[1:-1], y[1:-1], center='mean')
        # Result should be the same.
        assert_almost_equal(W0, W2)
        assert_almost_equal(W1, W2)
        assert_almost_equal(pval1, pval2)
    def test_equal_mean_median(self):
        x = np.linspace(-1,1,21)
        np.random.seed(1234)
        x2 = np.random.permutation(x)
        y = x**3
        W1, pval1 = stats.levene(x, y, center='mean')
        W2, pval2 = stats.levene(x2, y, center='median')
        assert_almost_equal(W1, W2)
        assert_almost_equal(pval1, pval2)
    def test_bad_keyword(self):
        x = np.linspace(-1,1,21)
        assert_raises(TypeError, stats.levene, x, x, portiontocut=0.1)
    def test_bad_center_value(self):
        x = np.linspace(-1,1,21)
        assert_raises(ValueError, stats.levene, x, x, center='trim')
    def test_too_few_args(self):
        assert_raises(ValueError, stats.levene, [1])
 class TestBinomP(TestCase):
    def test_data(self):
        pval = stats.binom_test(100,250)
        assert_almost_equal(pval,0.0018833009350757682,11)
        pval = stats.binom_test(201,405)
        assert_almost_equal(pval,0.92085205962670713,11)
        pval = stats.binom_test([682,243],p=3.0/4)
        assert_almost_equal(pval,0.38249155957481695,11)
    def test_bad_len_x(self):
        # Length of x must be 1 or 2.
        assert_raises(ValueError, stats.binom_test, [1,2,3])
    def test_bad_n(self):
        # len(x) is 1, but n is invalid.
        # Missing n
        assert_raises(ValueError, stats.binom_test, [100])
        # n less than x[0]
        assert_raises(ValueError, stats.binom_test, [100], n=50)
    def test_bad_p(self):
        assert_raises(ValueError, stats.binom_test, [50, 50], p=2.0)
 class TestFindRepeats(TestCase):
    def test_basic(self):
        a = [1,2,3,4,1,2,3,4,1,2,5]
        res,nums = stats.find_repeats(a)
        assert_array_equal(res,[1,2,3,4])
        assert_array_equal(nums,[3,3,2,2])
    def test_empty_result(self):
        # Check that empty arrays are returned when there are no repeats.
        a = [10, 20, 50, 30, 40]
        repeated, counts = stats.find_repeats(a)
        assert_array_equal(repeated, [])
        assert_array_equal(counts, [])
 class TestFligner(TestCase):
    def test_data(self):
        # numbers from R: fligner.test in package stats
        x1 = np.arange(5)
        assert_array_almost_equal(stats.fligner(x1,x1**2),
                           (3.2282229927203536, 0.072379187848207877), 11)
    def test_trimmed1(self):
        # Test that center='trimmed' gives the same result as center='mean'
        # when proportiontocut=0.
        Xsq1, pval1 = stats.fligner(g1, g2, g3, center='mean')
        Xsq2, pval2 = stats.fligner(g1, g2, g3, center='trimmed', proportiontocut=0.0)
        assert_almost_equal(Xsq1, Xsq2)
        assert_almost_equal(pval1, pval2)
    def test_trimmed2(self):
        x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0]
        y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0]
        # Use center='trimmed'
        Xsq1, pval1 = stats.fligner(x, y, center='trimmed', proportiontocut=0.125)
        # Trim the data here, and use center='mean'
        Xsq2, pval2 = stats.fligner(x[1:-1], y[1:-1], center='mean')
        # Result should be the same.
        assert_almost_equal(Xsq1, Xsq2)
        assert_almost_equal(pval1, pval2)
    # The following test looks reasonable at first, but fligner() uses the
    # function stats.rankdata(), and in one of the cases in this test,
    # there are ties, while in the other (because of normal rounding
    # errors) there are not.  This difference leads to differences in the
    # third significant digit of W.
    #
    #def test_equal_mean_median(self):
    #    x = np.linspace(-1,1,21)
    #    y = x**3
    #    W1, pval1 = stats.fligner(x, y, center='mean')
    #    W2, pval2 = stats.fligner(x, y, center='median')
    #    assert_almost_equal(W1, W2)
    #    assert_almost_equal(pval1, pval2)
    def test_bad_keyword(self):
        x = np.linspace(-1,1,21)
        assert_raises(TypeError, stats.fligner, x, x, portiontocut=0.1)
    def test_bad_center_value(self):
        x = np.linspace(-1,1,21)
        assert_raises(ValueError, stats.fligner, x, x, center='trim')
    def test_bad_num_args(self):
        # Too few args raises ValueError.
        assert_raises(ValueError, stats.fligner, [1])
 class TestMood(TestCase):
    def test_mood(self):
        # numbers from R: mood.test in package stats
        x1 = np.arange(5)
        assert_array_almost_equal(stats.mood(x1, x1**2),
                                  (-1.3830857299399906, 0.16663858066771478), 11)
    def test_mood_order_of_args(self):
        # z should change sign when the order of arguments changes, pvalue
        # should not change
        np.random.seed(1234)
        x1 = np.random.randn(10, 1)
        x2 = np.random.randn(15, 1)
        z1, p1 = stats.mood(x1, x2)
        z2, p2 = stats.mood(x2, x1)
        assert_array_almost_equal([z1, p1], [-z2, p2])
    def test_mood_with_axis_none(self):
        #Test with axis = None, compare with results from R
        x1 = [-0.626453810742332, 0.183643324222082, -0.835628612410047,
               1.59528080213779, 0.329507771815361, -0.820468384118015,
               0.487429052428485, 0.738324705129217, 0.575781351653492,
              -0.305388387156356, 1.51178116845085, 0.389843236411431,
              -0.621240580541804, -2.2146998871775, 1.12493091814311,
              -0.0449336090152309, -0.0161902630989461, 0.943836210685299,
               0.821221195098089, 0.593901321217509]
        x2 = [-0.896914546624981, 0.184849184646742, 1.58784533120882,
              -1.13037567424629, -0.0802517565509893, 0.132420284381094,
               0.707954729271733, -0.23969802417184, 1.98447393665293,
              -0.138787012119665, 0.417650750792556, 0.981752777463662,
              -0.392695355503813, -1.03966897694891, 1.78222896030858,
              -2.31106908460517, 0.878604580921265, 0.035806718015226,
               1.01282869212708, 0.432265154539617, 2.09081920524915,
              -1.19992581964387, 1.58963820029007, 1.95465164222325,
               0.00493777682814261, -2.45170638784613, 0.477237302613617,
              -0.596558168631403, 0.792203270299649, 0.289636710177348]
        x1 = np.array(x1)
        x2 = np.array(x2)
        x1.shape = (10, 2)
        x2.shape = (15, 2)
        assert_array_almost_equal(stats.mood(x1, x2, axis=None),
                                  [-1.31716607555, 0.18778296257])
    def test_mood_2d(self):
        # Test if the results of mood test in 2-D case are consistent with the
        # R result for the same inputs.  Numbers from R mood.test().
        ny = 5
        np.random.seed(1234)
        x1 = np.random.randn(10, ny)
        x2 = np.random.randn(15, ny)
        z_vectest, pval_vectest = stats.mood(x1, x2)
        for j in range(ny):
            assert_array_almost_equal([z_vectest[j], pval_vectest[j]],
                                      stats.mood(x1[:, j], x2[:, j]))
        # inverse order of dimensions
        x1 = x1.transpose()
        x2 = x2.transpose()
        z_vectest, pval_vectest = stats.mood(x1, x2, axis=1)
        for i in range(ny):
            # check axis handling is self consistent
            assert_array_almost_equal([z_vectest[i], pval_vectest[i]],
                                      stats.mood(x1[i, :], x2[i, :]))
    def test_mood_3d(self):
        shape = (10, 5, 6)
        np.random.seed(1234)
        x1 = np.random.randn(*shape)
        x2 = np.random.randn(*shape)
        for axis in range(3):
            z_vectest, pval_vectest = stats.mood(x1, x2, axis=axis)
            # Tests that result for 3-D arrays is equal to that for the
            # same calculation on a set of 1-D arrays taken from the
            # 3-D array
            axes_idx = ([1, 2], [0, 2], [0, 1])  # the two axes != axis
            for i in range(shape[axes_idx[axis][0]]):
                for j in range(shape[axes_idx[axis][1]]):
                    if axis == 0:
                        slice1 = x1[:, i, j]
                        slice2 = x2[:, i, j]
                    elif axis == 1:
                        slice1 = x1[i, :, j]
                        slice2 = x2[i, :, j]
                    else:
                        slice1 = x1[i, j, :]
                        slice2 = x2[i, j, :]
                    assert_array_almost_equal([z_vectest[i, j],
                                               pval_vectest[i, j]],
                                              stats.mood(slice1, slice2))
    def test_mood_bad_arg(self):
        # Raise ValueError when the sum of the lengths of the args is less than 3
        assert_raises(ValueError, stats.mood, [1], [])
 class TestProbplot(TestCase):
    def test_basic(self):
        np.random.seed(12345)
        x = stats.norm.rvs(size=20)
        osm, osr = stats.probplot(x, fit=False)
        osm_expected = [-1.8241636, -1.38768012, -1.11829229, -0.91222575,
                        -0.73908135, -0.5857176, -0.44506467, -0.31273668,
                        -0.18568928, -0.06158146, 0.06158146, 0.18568928,
                        0.31273668, 0.44506467, 0.5857176, 0.73908135,
                        0.91222575, 1.11829229, 1.38768012, 1.8241636]
        assert_allclose(osr, np.sort(x))
        assert_allclose(osm, osm_expected)
        res, res_fit = stats.probplot(x, fit=True)
        res_fit_expected = [1.05361841, 0.31297795, 0.98741609]
        assert_allclose(res_fit, res_fit_expected)
    def test_sparams_keyword(self):
        np.random.seed(123456)
        x = stats.norm.rvs(size=100)
        # Check that None, () and 0 (loc=0, for normal distribution) all work
        # and give the same results
        osm1, osr1 = stats.probplot(x, sparams=None, fit=False)
        osm2, osr2 = stats.probplot(x, sparams=0, fit=False)
        osm3, osr3 = stats.probplot(x, sparams=(), fit=False)
        assert_allclose(osm1, osm2)
        assert_allclose(osm1, osm3)
        assert_allclose(osr1, osr2)
        assert_allclose(osr1, osr3)
        # Check giving (loc, scale) params for normal distribution
        osm, osr = stats.probplot(x, sparams=(), fit=False)
    def test_dist_keyword(self):
        np.random.seed(12345)
        x = stats.norm.rvs(size=20)
        osm1, osr1 = stats.probplot(x, fit=False, dist='t', sparams=(3,))
        osm2, osr2 = stats.probplot(x, fit=False, dist=stats.t, sparams=(3,))
        assert_allclose(osm1, osm2)
        assert_allclose(osr1, osr2)
        assert_raises(ValueError, stats.probplot, x, dist='wrong-dist-name')
        assert_raises(AttributeError, stats.probplot, x, dist=[])
        class custom_dist(object):
            """Some class that looks just enough like a distribution."""
            def ppf(self, q):
                return stats.norm.ppf(q, loc=2)
        osm1, osr1 = stats.probplot(x, sparams=(2,), fit=False)
        osm2, osr2 = stats.probplot(x, dist=custom_dist(), fit=False)
        assert_allclose(osm1, osm2)
        assert_allclose(osr1, osr2)
    @dec.skipif(not have_matplotlib)
    def test_plot_kwarg(self):
        np.random.seed(7654321)
        fig = plt.figure()
        fig.add_subplot(111)
        x = stats.t.rvs(3, size=100)
        res1, fitres1 = stats.probplot(x, plot=plt)
        plt.close()
        res2, fitres2 = stats.probplot(x, plot=None)
        res3 = stats.probplot(x, fit=False, plot=plt)
        plt.close()
        res4 = stats.probplot(x, fit=False, plot=None)
        # Check that results are consistent between combinations of `fit` and
        # `plot` keywords.
        assert_(len(res1) == len(res2) == len(res3) == len(res4) == 2)
        assert_allclose(res1, res2)
        assert_allclose(res1, res3)
        assert_allclose(res1, res4)
        assert_allclose(fitres1, fitres2)
        # Check that a Matplotlib Axes object is accepted
        fig = plt.figure()
        ax = fig.add_subplot(111)
        stats.probplot(x, fit=False, plot=ax)
        plt.close()
    def test_probplot_bad_args(self):
        # Raise ValueError when given an invalid distribution.
        assert_raises(ValueError, stats.probplot, [1], dist="plate_of_shrimp")
 def test_wilcoxon_bad_arg():
    # Raise ValueError when two args of different lengths are given or
    # zero_method is unknown.
    assert_raises(ValueError, stats.wilcoxon, [1], [1,2])
    assert_raises(ValueError, stats.wilcoxon, [1,2], [1,2], "dummy")
 def test_mvsdist_bad_arg():
    # Raise ValueError if fewer than two data points are given.
    data = [1]
    assert_raises(ValueError, stats.mvsdist, data)
 def test_kstat_bad_arg():
    # Raise ValueError if n > 4 or n > 1.
    data = [1]
    n = 10
    assert_raises(ValueError, stats.kstat, data, n=n)
 def test_kstatvar_bad_arg():
    # Raise ValueError is n is not 1 or 2.
    data = [1]
    n = 10
    assert_raises(ValueError, stats.kstatvar, data, n=n)
 def test_ppcc_max_bad_arg():
    # Raise ValueError when given an invalid distribution.
    data = [1]
    assert_raises(ValueError, stats.ppcc_max, data, dist="plate_of_shrimp")
 class TestBoxcox_llf(TestCase):
    def test_basic(self):
        np.random.seed(54321)
        x = stats.norm.rvs(size=10000, loc=10)
        lmbda = 1
        llf = stats.boxcox_llf(lmbda, x)
        llf_expected = -x.size / 2. * np.log(np.sum(x.std()**2))
        assert_allclose(llf, llf_expected)
    def test_array_like(self):
        np.random.seed(54321)
        x = stats.norm.rvs(size=100, loc=10)
        lmbda = 1
        llf = stats.boxcox_llf(lmbda, x)
        llf2 = stats.boxcox_llf(lmbda, list(x))
        assert_allclose(llf, llf2, rtol=1e-12)
    def test_2d_input(self):
        # Note: boxcox_llf() was already working with 2-D input (sort of), so
        # keep it like that.  boxcox() doesn't work with 2-D input though, due
        # to brent() returning a scalar.
        np.random.seed(54321)
        x = stats.norm.rvs(size=100, loc=10)
        lmbda = 1
        llf = stats.boxcox_llf(lmbda, x)
        llf2 = stats.boxcox_llf(lmbda, np.vstack([x, x]).T)
        assert_allclose([llf, llf], llf2, rtol=1e-12)
    def test_empty(self):
        assert_(np.isnan(stats.boxcox_llf(1, [])))
 class TestBoxcox(TestCase):
    def test_fixed_lmbda(self):
        np.random.seed(12345)
        x = stats.loggamma.rvs(5, size=50) + 5
        xt = stats.boxcox(x, lmbda=1)
        assert_allclose(xt, x - 1)
        xt = stats.boxcox(x, lmbda=-1)
        assert_allclose(xt, 1 - 1/x)
        xt = stats.boxcox(x, lmbda=0)
        assert_allclose(xt, np.log(x))
        # Also test that array_like input works
        xt = stats.boxcox(list(x), lmbda=0)
        assert_allclose(xt, np.log(x))
    def test_lmbda_None(self):
        np.random.seed(1234567)
        # Start from normal rv's, do inverse transform to check that
        # optimization function gets close to the right answer.
        np.random.seed(1245)
        lmbda = 2.5
        x = stats.norm.rvs(loc=10, size=50000)
        x_inv = (x * lmbda + 1)**(-lmbda)
        xt, maxlog = stats.boxcox(x_inv)
        assert_almost_equal(maxlog, -1 / lmbda, decimal=2)
    def test_alpha(self):
        np.random.seed(1234)
        x = stats.loggamma.rvs(5, size=50) + 5
        # Some regular values for alpha, on a small sample size
        _, _, interval = stats.boxcox(x, alpha=0.75)
        assert_allclose(interval, [4.004485780226041, 5.138756355035744])
        _, _, interval = stats.boxcox(x, alpha=0.05)
        assert_allclose(interval, [1.2138178554857557, 8.209033272375663])
        # Try some extreme values, see we don't hit the N=500 limit
        x = stats.loggamma.rvs(7, size=500) + 15
        _, _, interval = stats.boxcox(x, alpha=0.001)
        assert_allclose(interval, [0.3988867, 11.40553131])
        _, _, interval = stats.boxcox(x, alpha=0.999)
        assert_allclose(interval, [5.83316246, 5.83735292])
    def test_boxcox_bad_arg(self):
        # Raise ValueError if any data value is negative.
        x = np.array([-1])
        assert_raises(ValueError, stats.boxcox, x)
    def test_empty(self):
        assert_(stats.boxcox([]).shape == (0,))
 class TestBoxcoxNormmax(TestCase):
    def setUp(self):
        np.random.seed(12345)
        self.x = stats.loggamma.rvs(5, size=50) + 5
    def test_pearsonr(self):
        maxlog = stats.boxcox_normmax(self.x)
        assert_allclose(maxlog, 1.804465325046)
    def test_mle(self):
        maxlog = stats.boxcox_normmax(self.x, method='mle')
        assert_allclose(maxlog, 1.758101454114)
        # Check that boxcox() uses 'mle'
        _, maxlog_boxcox = stats.boxcox(self.x)
        assert_allclose(maxlog_boxcox, maxlog)
    def test_all(self):
        maxlog_all = stats.boxcox_normmax(self.x, method='all')
        assert_allclose(maxlog_all, [1.804465325046, 1.758101454114])
 class TestBoxcoxNormplot(TestCase):
    def setUp(self):
        np.random.seed(7654321)
        self.x = stats.loggamma.rvs(5, size=500) + 5
    def test_basic(self):
        N = 5
        lmbdas, ppcc = stats.boxcox_normplot(self.x, -10, 10, N=N)
        ppcc_expected = [0.57783375, 0.83610988, 0.97524311, 0.99756057,
                         0.95843297]
        assert_allclose(lmbdas, np.linspace(-10, 10, num=N))
        assert_allclose(ppcc, ppcc_expected)
    @dec.skipif(not have_matplotlib)
    def test_plot_kwarg(self):
        # Check with the matplotlib.pyplot module
        fig = plt.figure()
        fig.add_subplot(111)
        stats.boxcox_normplot(self.x, -20, 20, plot=plt)
        plt.close()
        # Check that a Matplotlib Axes object is accepted
        fig.add_subplot(111)
        ax = fig.add_subplot(111)
        stats.boxcox_normplot(self.x, -20, 20, plot=ax)
        plt.close()
    def test_invalid_inputs(self):
        # `lb` has to be larger than `la`
        assert_raises(ValueError, stats.boxcox_normplot, self.x, 1, 0)
        # `x` can not contain negative values
        assert_raises(ValueError, stats.boxcox_normplot, [-1, 1] , 0, 1)
    def test_empty(self):
        assert_(stats.boxcox_normplot([], 0, 1).size == 0)
 class TestCircFuncs(TestCase):
    def test_circfuncs(self):
        x = np.array([355,5,2,359,10,350])
        M = stats.circmean(x, high=360)
        Mval = 0.167690146
        assert_allclose(M, Mval, rtol=1e-7)
        V = stats.circvar(x, high=360)
        Vval = 42.51955609
        assert_allclose(V, Vval, rtol=1e-7)
        S = stats.circstd(x, high=360)
        Sval = 6.520702116
        assert_allclose(S, Sval, rtol=1e-7)
    def test_circfuncs_small(self):
        x = np.array([20,21,22,18,19,20.5,19.2])
        M1 = x.mean()
        M2 = stats.circmean(x, high=360)
        assert_allclose(M2, M1, rtol=1e-5)
        V1 = x.var()
        V2 = stats.circvar(x, high=360)
        assert_allclose(V2, V1, rtol=1e-4)
        S1 = x.std()
        S2 = stats.circstd(x, high=360)
        assert_allclose(S2, S1, rtol=1e-4)
    def test_circmean_axis(self):
        x = np.array([[355,5,2,359,10,350],
                      [351,7,4,352,9,349],
                      [357,9,8,358,4,356]])
        M1 = stats.circmean(x, high=360)
        M2 = stats.circmean(x.ravel(), high=360)
        assert_allclose(M1, M2, rtol=1e-14)
        M1 = stats.circmean(x, high=360, axis=1)
        M2 = [stats.circmean(x[i], high=360) for i in range(x.shape[0])]
        assert_allclose(M1, M2, rtol=1e-14)
        M1 = stats.circmean(x, high=360, axis=0)
        M2 = [stats.circmean(x[:,i], high=360) for i in range(x.shape[1])]
        assert_allclose(M1, M2, rtol=1e-14)
    def test_circvar_axis(self):
        x = np.array([[355,5,2,359,10,350],
                      [351,7,4,352,9,349],
                  [357,9,8,358,4,356]])
        V1 = stats.circvar(x, high=360)
        V2 = stats.circvar(x.ravel(), high=360)
        assert_allclose(V1, V2, rtol=1e-11)
        V1 = stats.circvar(x, high=360, axis=1)
        V2 = [stats.circvar(x[i], high=360) for i in range(x.shape[0])]
        assert_allclose(V1, V2, rtol=1e-11)
        V1 = stats.circvar(x, high=360, axis=0)
        V2 = [stats.circvar(x[:,i], high=360) for i in range(x.shape[1])]
        assert_allclose(V1, V2, rtol=1e-11)
    def test_circstd_axis(self):
        x = np.array([[355,5,2,359,10,350],
                      [351,7,4,352,9,349],
                      [357,9,8,358,4,356]])
        S1 = stats.circstd(x, high=360)
        S2 = stats.circstd(x.ravel(), high=360)
        assert_allclose(S1, S2, rtol=1e-11)
        S1 = stats.circstd(x, high=360, axis=1)
        S2 = [stats.circstd(x[i], high=360) for i in range(x.shape[0])]
        assert_allclose(S1, S2, rtol=1e-11)
        S1 = stats.circstd(x, high=360, axis=0)
        S2 = [stats.circstd(x[:,i], high=360) for i in range(x.shape[1])]
        assert_allclose(S1, S2, rtol=1e-11)
    def test_circfuncs_array_like(self):
        x = [355,5,2,359,10,350]
        assert_allclose(stats.circmean(x, high=360), 0.167690146, rtol=1e-7)
        assert_allclose(stats.circvar(x, high=360), 42.51955609, rtol=1e-7)
        assert_allclose(stats.circstd(x, high=360), 6.520702116, rtol=1e-7)
    def test_empty(self):
        assert_(np.isnan(stats.circmean([])))
        assert_(np.isnan(stats.circstd([])))
        assert_(np.isnan(stats.circvar([])))
 def test_accuracy_wilcoxon():
    freq = [1, 4, 16, 15, 8, 4, 5, 1, 2]
    nums = range(-4, 5)
    x = np.concatenate([[u] * v for u, v in zip(nums, freq)])
    y = np.zeros(x.size)
    T, p = stats.wilcoxon(x, y, "pratt")
    assert_allclose(T, 423)
    assert_allclose(p, 0.00197547303533107)
    T, p = stats.wilcoxon(x, y, "zsplit")
    assert_allclose(T, 441)
    assert_allclose(p, 0.0032145343172473055)
    T, p = stats.wilcoxon(x, y, "wilcox")
    assert_allclose(T, 327)
    assert_allclose(p, 0.00641346115861)
    # Test the 'correction' option, using values computed in R with:
    # > wilcox.test(x, y, paired=TRUE, exact=FALSE, correct={FALSE,TRUE})
    x = np.array([120, 114, 181, 188, 180, 146, 121, 191, 132, 113, 127, 112])
    y = np.array([133, 143, 119, 189, 112, 199, 198, 113, 115, 121, 142, 187])
    T, p = stats.wilcoxon(x, y, correction=False)
    assert_equal(T, 34)
    assert_allclose(p, 0.6948866, rtol=1e-6)
    T, p = stats.wilcoxon(x, y, correction=True)
    assert_equal(T, 34)
    assert_allclose(p, 0.7240817, rtol=1e-6)
 def test_wilcoxon_tie():
    # Regression test for gh-2391.
    # Corresponding R code is:
    #   > result = wilcox.test(rep(0.1, 10), exact=FALSE, correct=FALSE)
    #   > result$p.value
    #   [1] 0.001565402
    #   > result = wilcox.test(rep(0.1, 10), exact=FALSE, correct=TRUE)
    #   > result$p.value
    #   [1] 0.001904195
    stat, p = stats.wilcoxon([0.1] * 10)
    expected_p = 0.001565402
    assert_equal(stat, 0)
    assert_allclose(p, expected_p, rtol=1e-6)
    stat, p = stats.wilcoxon([0.1] * 10, correction=True)
    expected_p = 0.001904195
    assert_equal(stat, 0)
    assert_allclose(p, expected_p, rtol=1e-6)
 if __name__ == "__main__":
    run_module_suite()
--- a/pywafo/src/wafo/stats/tests/test_mstats_basic.py
+++ b/pywafo/src/wafo/stats/tests/test_mstats_basic.py
@ -1,23 +1,26 @@
 """
 Tests for the stats.mstats module (support for maskd arrays)
 """
 from __future__ import division, print_function, absolute_import
 import warnings
 import numpy as np
 from numpy import nan
 import numpy.ma as ma
 from numpy.ma import masked, nomask
-import scipy.stats.mstats as mstats
+import wafo.stats.mstats as mstats
 from wafo import stats
 from numpy.testing import TestCase, run_module_suite
-from numpy.ma.testutils import assert_equal, assert_almost_equal, \
+from numpy.ma.testutils import (assert_equal, assert_almost_equal,
-    assert_array_almost_equal, assert_
+    assert_array_almost_equal, assert_array_almost_equal_nulp, assert_,
    assert_allclose, assert_raises)
 class TestMquantiles(TestCase):
    """Regression tests for mstats module."""
    def test_mquantiles_limit_keyword(self):
-        """Ticket #867"""
+        # Regression test for Trac ticket #867
        data = np.array([[6., 7., 1.],
                         [47., 15., 2.],
                         [49., 36., 3.],
@ -36,7 +39,6 @@ class TestMquantiles(TestCase):
        assert_almost_equal(quants, desired)
 class TestGMean(TestCase):
    def test_1D(self):
        a = (1,2,3,4)
@ -47,7 +49,7 @@ class TestGMean(TestCase):
        desired1 = mstats.gmean(a,axis=-1)
        assert_almost_equal(actual, desired1, decimal=14)
        assert_(not isinstance(desired1, ma.MaskedArray))
-        #
+
        a = ma.array((1,2,3,4),mask=(0,0,0,1))
        actual = mstats.gmean(a)
        desired = np.power(1*2*3,1./3.)
@ -55,23 +57,24 @@ class TestGMean(TestCase):
        desired1 = mstats.gmean(a,axis=-1)
        assert_almost_equal(actual, desired1, decimal=14)
-    #
+
    def test_2D(self):
        a = ma.array(((1,2,3,4),(1,2,3,4),(1,2,3,4)),
                     mask=((0,0,0,0),(1,0,0,1),(0,1,1,0)))
        actual = mstats.gmean(a)
        desired = np.array((1,2,3,4))
        assert_array_almost_equal(actual, desired, decimal=14)
-        #
+
        desired1 = mstats.gmean(a,axis=0)
        assert_array_almost_equal(actual, desired1, decimal=14)
-        #
+
        actual = mstats.gmean(a, -1)
        desired = ma.array((np.power(1*2*3*4,1./4.),
                            np.power(2*3,1./2.),
                            np.power(1*4,1./2.)))
        assert_array_almost_equal(actual, desired, decimal=14)
 class TestHMean(TestCase):
    def test_1D(self):
        a = (1,2,3,4)
@ -80,7 +83,7 @@ class TestHMean(TestCase):
        assert_almost_equal(actual, desired, decimal=14)
        desired1 = mstats.hmean(ma.array(a),axis=-1)
        assert_almost_equal(actual, desired1, decimal=14)
-        #
+
        a = ma.array((1,2,3,4),mask=(0,0,0,1))
        actual = mstats.hmean(a)
        desired = 3. / (1./1 + 1./2 + 1./3)
@ -94,7 +97,7 @@ class TestHMean(TestCase):
        actual = mstats.hmean(a)
        desired = ma.array((1,2,3,4))
        assert_array_almost_equal(actual, desired, decimal=14)
-        #
+
        actual1 = mstats.hmean(a,axis=-1)
        desired = (4./(1/1.+1/2.+1/3.+1/4.),
                   2./(1/2.+1/3.),
@ -104,10 +107,10 @@ class TestHMean(TestCase):
 class TestRanking(TestCase):
-    #
+
    def __init__(self, *args, **kwargs):
        TestCase.__init__(self, *args, **kwargs)
-    #
+
    def test_ranking(self):
        x = ma.array([0,1,1,1,2,3,4,5,5,6,])
        assert_almost_equal(mstats.rankdata(x),[1,3,3,3,5,6,7,8.5,8.5,10])
@ -124,44 +127,60 @@ class TestRanking(TestCase):
 class TestCorr(TestCase):
-    #
+
    def test_pearsonr(self):
-        "Tests some computations of Pearson's r"
+        # Tests some computations of Pearson's r
        x = ma.arange(10)
-        olderr = np.seterr(all='ignore')
+        with warnings.catch_warnings():
-        try:
+            # The tests in this context are edge cases, with perfect
            # correlation or anticorrelation, or totally masked data.
            # None of these should trigger a RuntimeWarning.
            warnings.simplefilter("error", RuntimeWarning)
            assert_almost_equal(mstats.pearsonr(x, x)[0], 1.0)
            assert_almost_equal(mstats.pearsonr(x, x[::-1])[0], -1.0)
            x = ma.array(x, mask=True)
            pr = mstats.pearsonr(x, x)
        finally:
            np.seterr(**olderr)
            assert_(pr[0] is masked)
            assert_(pr[1] is masked)
-    #
+
        x1 = ma.array([-1.0, 0.0, 1.0])
        y1 = ma.array([0, 0, 3])
        r, p = mstats.pearsonr(x1, y1)
        assert_almost_equal(r, np.sqrt(3)/2)
        assert_almost_equal(p, 1.0/3)
        # (x2, y2) have the same unmasked data as (x1, y1).
        mask = [False, False, False, True]
        x2 = ma.array([-1.0, 0.0, 1.0, 99.0], mask=mask)
        y2 = ma.array([0, 0, 3, -1], mask=mask)
        r, p = mstats.pearsonr(x2, y2)
        assert_almost_equal(r, np.sqrt(3)/2)
        assert_almost_equal(p, 1.0/3)
    def test_spearmanr(self):
-        "Tests some computations of Spearman's rho"
+        # Tests some computations of Spearman's rho
        (x, y) = ([5.05,6.75,3.21,2.66],[1.65,2.64,2.64,6.95])
        assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
        (x, y) = ([5.05,6.75,3.21,2.66,np.nan],[1.65,2.64,2.64,6.95,np.nan])
        (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
        assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
-        #
+
        x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
              1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7]
-        y = [22.6, 08.3, 44.4, 11.9, 24.6,  0.6,  5.7, 41.6,
+        y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
              0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4]
        assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
        x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
              1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan]
-        y = [22.6, 08.3, 44.4, 11.9, 24.6,  0.6,  5.7, 41.6,
+        y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
              0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan]
        (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
        assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
-    #
+
    def test_kendalltau(self):
-        "Tests some computations of Kendall's tau"
+        # Tests some computations of Kendall's tau
        x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan])
        y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
        z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
@ -176,9 +195,9 @@ class TestCorr(TestCase):
                            25,80,80,80,80,80,80, 0,10,45, np.nan, 0])
        result = mstats.kendalltau(x,y)
        assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
-    #
+
    def test_kendalltau_seasonal(self):
-        "Tests the seasonal Kendall tau."
+        # Tests the seasonal Kendall tau.
        x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
             [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
             [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
@ -188,9 +207,8 @@ class TestCorr(TestCase):
        assert_almost_equal(output['global p-value (indep)'], 0.008, 3)
        assert_almost_equal(output['seasonal p-value'].round(2),
                            [0.18,0.53,0.20,0.04])
-    #
+
    def test_pointbiserial(self):
        "Tests point biserial"
        x = [1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,
             0,0,0,0,1,-1]
        y = [14.8,13.8,12.4,10.1,7.1,6.1,5.8,4.6,4.3,3.5,3.3,3.2,3.0,
@ -200,9 +218,8 @@ class TestCorr(TestCase):
 class TestTrimming(TestCase):
-    #
+
    def test_trim(self):
        "Tests trimming"
        a = ma.arange(10)
        assert_equal(mstats.trim(a), [0,1,2,3,4,5,6,7,8,9])
        a = ma.arange(10)
@ -213,12 +230,12 @@ class TestTrimming(TestCase):
        a = ma.arange(10)
        assert_equal(mstats.trim(a,limits=(0.1,0.2),relative=True),
                     [None,1,2,3,4,5,6,7,None,None])
-        #
+
        a = ma.arange(12)
        a[[0,-1]] = a[5] = masked
        assert_equal(mstats.trim(a,(2,8)),
                     [None,None,2,3,4,None,6,7,8,None,None,None])
-        #
+
        x = ma.arange(100).reshape(10,10)
        trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=None)
        assert_equal(trimx._mask.ravel(),[1]*10+[0]*70+[1]*20)
@ -226,7 +243,7 @@ class TestTrimming(TestCase):
        assert_equal(trimx._mask.ravel(),[1]*10+[0]*70+[1]*20)
        trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=-1)
        assert_equal(trimx._mask.T.ravel(),[1]*10+[0]*70+[1]*20)
-        #
+
        x = ma.arange(110).reshape(11,10)
        x[1] = masked
        trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=None)
@ -235,9 +252,8 @@ class TestTrimming(TestCase):
        assert_equal(trimx._mask.ravel(),[1]*20+[0]*70+[1]*20)
        trimx = mstats.trim(x.T,(0.1,0.2),relative=True,axis=-1)
        assert_equal(trimx.T._mask.ravel(),[1]*20+[0]*70+[1]*20)
-    #
+
    def test_trim_old(self):
        "Tests trimming."
        x = ma.arange(100)
        assert_equal(mstats.trimboth(x).count(), 60)
        assert_equal(mstats.trimtail(x,tail='r').count(), 80)
@ -249,24 +265,21 @@ class TestTrimming(TestCase):
        x.shape = (10,10)
        assert_equal(mstats.trimboth(x).count(), 60)
        assert_equal(mstats.trimtail(x).count(), 80)
-    #
+
    def test_trimmedmean(self):
        "Tests the trimmed mean."
        data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
                         296,299,306,376,428,515,666,1310,2611])
        assert_almost_equal(mstats.trimmed_mean(data,0.1), 343, 0)
        assert_almost_equal(mstats.trimmed_mean(data,(0.1,0.1)), 343, 0)
        assert_almost_equal(mstats.trimmed_mean(data,(0.2,0.2)), 283, 0)
-    #
+
    def test_trimmed_stde(self):
        "Tests the trimmed mean standard error."
        data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
                         296,299,306,376,428,515,666,1310,2611])
        assert_almost_equal(mstats.trimmed_stde(data,(0.2,0.2)), 56.13193, 5)
        assert_almost_equal(mstats.trimmed_stde(data,0.2), 56.13193, 5)
-    #
+
    def test_winsorization(self):
        "Tests the Winsorization of the data."
        data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
                         296,299,306,376,428,515,666,1310,2611])
        assert_almost_equal(mstats.winsorize(data,(0.2,0.2)).var(ddof=1),
@ -277,21 +290,29 @@ class TestTrimming(TestCase):
 class TestMoments(TestCase):
-    """
+    # Comparison numbers are found using R v.1.5.1
-        Comparison numbers are found using R v.1.5.1
+    # note that length(testcase) = 4
-        note that length(testcase) = 4
+    # testmathworks comes from documentation for the
-        testmathworks comes from documentation for the
+    # Statistics Toolbox for Matlab and can be found at both
-        Statistics Toolbox for Matlab and can be found at both
+    # http://www.mathworks.com/access/helpdesk/help/toolbox/stats/kurtosis.shtml
-        http://www.mathworks.com/access/helpdesk/help/toolbox/stats/kurtosis.shtml
+    # http://www.mathworks.com/access/helpdesk/help/toolbox/stats/skewness.shtml
-        http://www.mathworks.com/access/helpdesk/help/toolbox/stats/skewness.shtml
+    # Note that both test cases came from here.
        Note that both test cases came from here.
    """
    testcase = [1,2,3,4]
    testmathworks = ma.fix_invalid([1.165, 0.6268, 0.0751, 0.3516, -0.6965,
                                    np.nan])
    testcase_2d = ma.array(
    np.array([[0.05245846, 0.50344235, 0.86589117, 0.36936353, 0.46961149],
           [0.11574073, 0.31299969, 0.45925772, 0.72618805, 0.75194407],
           [0.67696689, 0.91878127, 0.09769044, 0.04645137, 0.37615733],
           [0.05903624, 0.29908861, 0.34088298, 0.66216337, 0.83160998],
           [0.64619526, 0.94894632, 0.27855892, 0.0706151, 0.39962917]]),
    mask=np.array([[True, False, False, True, False],
           [True, True, True, False, True],
           [False, False, False, False, False],
           [True, True, True, True, True],
           [False, False, True, False, False]], dtype=np.bool))
    def test_moment(self):
        """
        mean((testcase-mean(testcase))**power,axis=0),axis=0))**power))"""
        y = mstats.moment(self.testcase,1)
        assert_almost_equal(y,0.0,10)
        y = mstats.moment(self.testcase,2)
@ -300,17 +321,12 @@ class TestMoments(TestCase):
        assert_almost_equal(y,0.0)
        y = mstats.moment(self.testcase,4)
        assert_almost_equal(y,2.5625)
    def test_variation(self):
        """variation = samplestd/mean """
 ##        y = stats.variation(self.shoes[0])
 ##        assert_almost_equal(y,21.8770668)
        y = mstats.variation(self.testcase)
        assert_almost_equal(y,0.44721359549996, 10)
    def test_skewness(self):
        """
            sum((testmathworks-mean(testmathworks,axis=0))**3,axis=0)/((sqrt(var(testmathworks)*4/5))**3)/5
        """
        y = mstats.skew(self.testmathworks)
        assert_almost_equal(y,-0.29322304336607,10)
        y = mstats.skew(self.testmathworks,bias=0)
@ -319,12 +335,8 @@ class TestMoments(TestCase):
        assert_almost_equal(y,0.0,10)
    def test_kurtosis(self):
-        """
+        # Set flags for axis = 0 and fisher=0 (Pearson's definition of kurtosis
-            sum((testcase-mean(testcase,axis=0))**4,axis=0)/((sqrt(var(testcase)*3/4))**4)/4
+        # for compatibility with Matlab)
            sum((test2-mean(testmathworks,axis=0))**4,axis=0)/((sqrt(var(testmathworks)*4/5))**4)/5
            Set flags for axis = 0 and
            fisher=0 (Pearson's definition of kurtosis for compatibility with Matlab)
        """
        y = mstats.kurtosis(self.testmathworks,0,fisher=0,bias=1)
        assert_almost_equal(y, 2.1658856802973,10)
        # Note that MATLAB has confusing docs for the following case
@ -335,22 +347,53 @@ class TestMoments(TestCase):
        assert_almost_equal(y, 3.663542721189047,10)
        y = mstats.kurtosis(self.testcase,0,0)
        assert_almost_equal(y,1.64)
-    #
+
        # test that kurtosis works on multidimensional masked arrays
        correct_2d = ma.array(np.array([-1.5, -3., -1.47247052385, 0.,
                                        -1.26979517952]),
                              mask=np.array([False, False, False, True,
                                             False], dtype=np.bool))
        assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1),
                                  correct_2d)
        for i, row in enumerate(self.testcase_2d):
            assert_almost_equal(mstats.kurtosis(row), correct_2d[i])
        correct_2d_bias_corrected = ma.array(
            np.array([-1.5, -3., -1.88988209538, 0., -0.5234638463918877]),
            mask=np.array([False, False, False, True, False], dtype=np.bool))
        assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1,
                                                  bias=False),
                                  correct_2d_bias_corrected)
        for i, row in enumerate(self.testcase_2d):
            assert_almost_equal(mstats.kurtosis(row, bias=False),
                                correct_2d_bias_corrected[i])
        # Check consistency between stats and mstats implementations
        assert_array_almost_equal_nulp(mstats.kurtosis(self.testcase_2d[2, :]),
                                       stats.kurtosis(self.testcase_2d[2, :]))
    def test_mode(self):
        "Tests the mode"
        #
        a1 = [0,0,0,1,1,1,2,3,3,3,3,4,5,6,7]
        a2 = np.reshape(a1, (3,5))
        a3 = np.array([1,2,3,4,5,6])
        a4 = np.reshape(a3, (3,2))
        ma1 = ma.masked_where(ma.array(a1) > 2, a1)
        ma2 = ma.masked_where(a2 > 2, a2)
        ma3 = ma.masked_where(a3 < 2, a3)
        ma4 = ma.masked_where(ma.array(a4) < 2, a4)
        assert_equal(mstats.mode(a1, axis=None), (3,4))
        assert_equal(mstats.mode(a1, axis=0), (3,4))
        assert_equal(mstats.mode(ma1, axis=None), (0,3))
        assert_equal(mstats.mode(a2, axis=None), (3,4))
        assert_equal(mstats.mode(ma2, axis=None), (0,3))
        assert_equal(mstats.mode(a3, axis=None), (1,1))
        assert_equal(mstats.mode(ma3, axis=None), (2,1))
        assert_equal(mstats.mode(a2, axis=0), ([[0,0,0,1,1]], [[1,1,1,1,1]]))
        assert_equal(mstats.mode(ma2, axis=0), ([[0,0,0,1,1]], [[1,1,1,1,1]]))
        assert_equal(mstats.mode(a2, axis=-1), ([[0],[3],[3]], [[3],[3],[1]]))
        assert_equal(mstats.mode(ma2, axis=-1), ([[0],[1],[0]], [[3],[1],[0]]))
        assert_equal(mstats.mode(ma4, axis=0), ([[3,2]], [[1,1]]))
        assert_equal(mstats.mode(ma4, axis=-1), ([[2],[3],[5]], [[1],[1],[1]]))
 class TestPercentile(TestCase):
@ -381,29 +424,22 @@ class TestVariability(TestCase):
    testcase = ma.fix_invalid([1,2,3,4,np.nan])
    def test_signaltonoise(self):
-        """
+        # This is not in R, so used:
-        this is not in R, so used
+        #     mean(testcase, axis=0) / (sqrt(var(testcase)*3/4))
        mean(testcase,axis=0)/(sqrt(var(testcase)*3/4)) """
        #y = stats.signaltonoise(self.shoes[0])
        #assert_approx_equal(y,4.5709967)
        y = mstats.signaltonoise(self.testcase)
        assert_almost_equal(y,2.236067977)
    def test_sem(self):
-        """
+        # This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
        this is not in R, so used
        sqrt(var(testcase)*3/4)/sqrt(3)
        """
        #y = stats.sem(self.shoes[0])
        #assert_approx_equal(y,0.775177399)
        y = mstats.sem(self.testcase)
        assert_almost_equal(y, 0.6454972244)
        n = self.testcase.count()
        assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
                        mstats.sem(self.testcase, ddof=2))
    def test_zmap(self):
-        """
+        # This is not in R, so tested by using:
-        not in R, so tested by using
+        #    (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4)
        (testcase[i]-mean(testcase,axis=0))/sqrt(var(testcase)*3/4)
        """
        y = mstats.zmap(self.testcase, self.testcase)
        desired_unmaskedvals = ([-1.3416407864999, -0.44721359549996,
                                 0.44721359549996, 1.3416407864999])
@ -411,46 +447,40 @@ class TestVariability(TestCase):
                                  y.data[y.mask == False], decimal=12)
    def test_zscore(self):
-        """
+        # This is not in R, so tested by using:
-        not in R, so tested by using
+        #     (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4)
        (testcase[i]-mean(testcase,axis=0))/sqrt(var(testcase)*3/4)
        """
        y = mstats.zscore(self.testcase)
        desired = ma.fix_invalid([-1.3416407864999, -0.44721359549996,
                                  0.44721359549996, 1.3416407864999, np.nan])
        assert_almost_equal(desired, y, decimal=12)
 class TestMisc(TestCase):
-    #
+
    def test_obrientransform(self):
        "Tests Obrien transform"
        args = [[5]*5+[6]*11+[7]*9+[8]*3+[9]*2+[10]*2,
                [6]+[7]*2+[8]*4+[9]*9+[10]*16]
        result = [5*[3.1828]+11*[0.5591]+9*[0.0344]+3*[1.6086]+2*[5.2817]+2*[11.0538],
                  [10.4352]+2*[4.8599]+4*[1.3836]+9*[0.0061]+16*[0.7277]]
        assert_almost_equal(np.round(mstats.obrientransform(*args).T,4),
                            result,4)
-    #
+
    def test_kstwosamp(self):
        "Tests the Kolmogorov-Smirnov 2 samples test"
        x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
             [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
             [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
             [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
        x = ma.fix_invalid(x).T
        (winter,spring,summer,fall) = x.T
-        #
+
        assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring),4),
                            (0.1818,0.9892))
        assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'g'),4),
                            (0.1469,0.7734))
        assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'l'),4),
                            (0.1818,0.6744))
-    #
+
    def test_friedmanchisq(self):
        "Tests the Friedman Chi-square test"
        # No missing values
        args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0],
                [7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0],
@ -470,7 +500,7 @@ class TestMisc(TestCase):
 def test_regress_simple():
-    """Regress a line with sinusoidal noise. Test for #1273."""
+    # Regress a line with sinusoidal noise. Test for #1273.
    x = np.linspace(0, 100, 100)
    y = 0.2 * np.linspace(0, 100, 100) + 10
    y += np.sin(np.linspace(0, 20, 100))
@ -481,10 +511,147 @@ def test_regress_simple():
 def test_plotting_positions():
-    """Regression test for #1256"""
+    # Regression test for #1256
    pos = mstats.plotting_positions(np.arange(3), 0, 0)
    assert_array_almost_equal(pos.data, np.array([0.25, 0.5, 0.75]))
 class TestNormalitytests():
    def test_vs_nonmasked(self):
        x = np.array((-2,-1,0,1,2,3)*4)**2
        assert_array_almost_equal(mstats.normaltest(x), stats.normaltest(x))
        assert_array_almost_equal(mstats.skewtest(x), stats.skewtest(x))
        assert_array_almost_equal(mstats.kurtosistest(x),
                                  stats.kurtosistest(x))
        funcs = [stats.normaltest, stats.skewtest, stats.kurtosistest]
        mfuncs = [mstats.normaltest, mstats.skewtest, mstats.kurtosistest]
        x = [1, 2, 3, 4]
        for func, mfunc in zip(funcs, mfuncs):
            assert_raises(ValueError, func, x)
            assert_raises(ValueError, mfunc, x)
    def test_axis_None(self):
        # Test axis=None (equal to axis=0 for 1-D input)
        x = np.array((-2,-1,0,1,2,3)*4)**2
        assert_allclose(mstats.normaltest(x, axis=None), mstats.normaltest(x))
        assert_allclose(mstats.skewtest(x, axis=None), mstats.skewtest(x))
        assert_allclose(mstats.kurtosistest(x, axis=None),
                        mstats.kurtosistest(x))
    def test_maskedarray_input(self):
        # Add some masked values, test result doesn't change
        x = np.array((-2,-1,0,1,2,3)*4)**2
        xm = np.ma.array(np.r_[np.inf, x, 10],
                         mask=np.r_[True, [False] * x.size, True])
        assert_allclose(mstats.normaltest(xm), stats.normaltest(x))
        assert_allclose(mstats.skewtest(xm), stats.skewtest(x))
        assert_allclose(mstats.kurtosistest(xm), stats.kurtosistest(x))
    def test_nd_input(self):
        x = np.array((-2,-1,0,1,2,3)*4)**2
        x_2d = np.vstack([x] * 2).T
        for func in [mstats.normaltest, mstats.skewtest, mstats.kurtosistest]:
            res_1d = func(x)
            res_2d = func(x_2d)
            assert_allclose(res_2d[0], [res_1d[0]] * 2)
            assert_allclose(res_2d[1], [res_1d[1]] * 2)
 #TODO: for all ttest functions, add tests with masked array inputs
 class TestTtest_rel():
    def test_vs_nonmasked(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]
        # 1-D inputs
        res1 = stats.ttest_rel(outcome[:, 0], outcome[:, 1])
        res2 = mstats.ttest_rel(outcome[:, 0], outcome[:, 1])
        assert_allclose(res1, res2)
        # 2-D inputs
        res1 = stats.ttest_rel(outcome[:, 0], outcome[:, 1], axis=None)
        res2 = mstats.ttest_rel(outcome[:, 0], outcome[:, 1], axis=None)
        assert_allclose(res1, res2)
        res1 = stats.ttest_rel(outcome[:, :2], outcome[:, 2:], axis=0)
        res2 = mstats.ttest_rel(outcome[:, :2], outcome[:, 2:], axis=0)
        assert_allclose(res1, res2)
        # Check default is axis=0
        res3 = mstats.ttest_rel(outcome[:, :2], outcome[:, 2:])
        assert_allclose(res2, res3)
    def test_invalid_input_size(self):
        assert_raises(ValueError, mstats.ttest_rel,
                      np.arange(10), np.arange(11))
        x = np.arange(24)
        assert_raises(ValueError, mstats.ttest_rel,
                      x.reshape(2, 3, 4), x.reshape(2, 4, 3), axis=1)
        assert_raises(ValueError, mstats.ttest_rel,
                      x.reshape(2, 3, 4), x.reshape(2, 4, 3), axis=2)
    def test_empty(self):
        res1 = mstats.ttest_rel([], [])
        assert_(np.all(np.isnan(res1)))
 class TestTtest_ind():
    def test_vs_nonmasked(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]
        # 1-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1])
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        assert_allclose(res1, res2)
        # 2-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        assert_allclose(res1, res2)
        res1 = stats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        res2 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        assert_allclose(res1, res2)
        # Check default is axis=0
        res3 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:])
        assert_allclose(res2, res3)
    def test_empty(self):
        res1 = mstats.ttest_ind([], [])
        assert_(np.all(np.isnan(res1)))
 class TestTtest_1samp():
    def test_vs_nonmasked(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]
        # 1-D inputs
        res1 = stats.ttest_1samp(outcome[:, 0], 1)
        res2 = mstats.ttest_1samp(outcome[:, 0], 1)
        assert_allclose(res1, res2)
        # 2-D inputs
        res1 = stats.ttest_1samp(outcome[:, 0], outcome[:, 1], axis=None)
        res2 = mstats.ttest_1samp(outcome[:, 0], outcome[:, 1], axis=None)
        assert_allclose(res1, res2)
        res1 = stats.ttest_1samp(outcome[:, :2], outcome[:, 2:], axis=0)
        res2 = mstats.ttest_1samp(outcome[:, :2], outcome[:, 2:], axis=0)
        assert_allclose(res1, res2)
        # Check default is axis=0
        res3 = mstats.ttest_1samp(outcome[:, :2], outcome[:, 2:])
        assert_allclose(res2, res3)
    def test_empty(self):
        res1 = mstats.ttest_1samp([], 1)
        assert_(np.all(np.isnan(res1)))
 if __name__ == "__main__":
    run_module_suite()
--- a/pywafo/src/wafo/stats/tests/test_mstats_extras.py
+++ b/pywafo/src/wafo/stats/tests/test_mstats_extras.py
@ -4,30 +4,32 @@
 :author: Pierre Gerard-Marchant
 :contact: pierregm_at_uga_dot_edu
 """
 from __future__ import division, print_function, absolute_import
 __author__ = "Pierre GF Gerard-Marchant ($Author: backtopop $)"
 import numpy as np
 import numpy.ma as ma
-import scipy.stats.mstats as ms
+import wafo.stats.mstats as ms
-#import scipy.stats.mmorestats as mms
+#import wafo.stats.mmorestats as mms
 from numpy.testing import TestCase, run_module_suite, assert_equal, \
    assert_almost_equal, assert_
 class TestMisc(TestCase):
-    #
+
    def __init__(self, *args, **kwargs):
        TestCase.__init__(self, *args, **kwargs)
-    #
+
    def test_mjci(self):
        "Tests the Marits-Jarrett estimator"
        data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
                          296,299,306,376,428,515,666,1310,2611])
        assert_almost_equal(ms.mjci(data),[55.76819,45.84028,198.87875],5)
-    #
+
    def test_trimmedmeanci(self):
        "Tests the confidence intervals of the trimmed mean."
        data = ma.array([545,555,558,572,575,576,578,580,
@ -35,7 +37,7 @@ class TestMisc(TestCase):
        assert_almost_equal(ms.trimmed_mean(data,0.2), 596.2, 1)
        assert_equal(np.round(ms.trimmed_mean_ci(data,(0.2,0.2)),1),
                     [561.8, 630.6])
-    #
+
    def test_idealfourths(self):
        "Tests ideal-fourths"
        test = np.arange(100)
@ -52,11 +54,13 @@ class TestMisc(TestCase):
        assert_(np.isnan(_result).all())
 #..............................................................................
 class TestQuantiles(TestCase):
-    #
+
    def __init__(self, *args, **kwargs):
        TestCase.__init__(self, *args, **kwargs)
-    #
+
    def test_hdquantiles(self):
        data = [0.706560797,0.727229578,0.990399276,0.927065621,0.158953014,
            0.887764025,0.239407086,0.349638551,0.972791145,0.149789972,
--- a/pywafo/src/wafo/stats/tests/test_multivariate.py
+++ b/pywafo/src/wafo/stats/tests/test_multivariate.py
@ -0,0 +1,275 @@
 """
 Test functions for multivariate normal distributions.
 """
 from __future__ import division, print_function, absolute_import
 from numpy.testing import (assert_almost_equal,
        run_module_suite, assert_allclose, assert_equal, assert_raises)
 import numpy
 import numpy as np
 import scipy.linalg
 import scipy.stats._multivariate
 from scipy.stats import multivariate_normal
 from scipy.stats import norm
 from scipy.stats._multivariate import _psd_pinv_decomposed_log_pdet
 from scipy.integrate import romb
 def test_scalar_values():
    np.random.seed(1234)
    # When evaluated on scalar data, the pdf should return a scalar
    x, mean, cov = 1.5, 1.7, 2.5
    pdf = multivariate_normal.pdf(x, mean, cov)
    assert_equal(pdf.ndim, 0)
    # When evaluated on a single vector, the pdf should return a scalar
    x = np.random.randn(5)
    mean = np.random.randn(5)
    cov = np.abs(np.random.randn(5))  # Diagonal values for cov. matrix
    pdf = multivariate_normal.pdf(x, mean, cov)
    assert_equal(pdf.ndim, 0)
 def test_logpdf():
    # Check that the log of the pdf is in fact the logpdf
    np.random.seed(1234)
    x = np.random.randn(5)
    mean = np.random.randn(5)
    cov = np.abs(np.random.randn(5))
    d1 = multivariate_normal.logpdf(x, mean, cov)
    d2 = multivariate_normal.pdf(x, mean, cov)
    assert_allclose(d1, np.log(d2))
 def test_large_pseudo_determinant():
    # Check that large pseudo-determinants are handled appropriately.
    # Construct a singular diagonal covariance matrix
    # whose pseudo determinant overflows double precision.
    large_total_log = 1000.0
    npos = 100
    nzero = 2
    large_entry = np.exp(large_total_log / npos)
    n = npos + nzero
    cov = np.zeros((n, n), dtype=float)
    np.fill_diagonal(cov, large_entry)
    cov[-nzero:, -nzero:] = 0
    # Check some determinants.
    assert_equal(scipy.linalg.det(cov), 0)
    assert_equal(scipy.linalg.det(cov[:npos, :npos]), np.inf)
    # np.linalg.slogdet is only available in numpy 1.6+
    # but scipy currently supports numpy 1.5.1.
    #assert_allclose(np.linalg.slogdet(cov[:npos, :npos]), (1, large_total_log))
    # Check the pseudo-determinant.
    U, log_pdet = scipy.stats._multivariate._psd_pinv_decomposed_log_pdet(cov)
    assert_allclose(log_pdet, large_total_log)
 def test_broadcasting():
    np.random.seed(1234)
    n = 4
    # Construct a random covariance matrix.
    data = np.random.randn(n, n)
    cov = np.dot(data, data.T)
    mean = np.random.randn(n)
    # Construct an ndarray which can be interpreted as
    # a 2x3 array whose elements are random data vectors.
    X = np.random.randn(2, 3, n)
    # Check that multiple data points can be evaluated at once.
    for i in range(2):
        for j in range(3):
            actual = multivariate_normal.pdf(X[i, j], mean, cov)
            desired = multivariate_normal.pdf(X, mean, cov)[i, j]
            assert_allclose(actual, desired)
 def test_normal_1D():
    # The probability density function for a 1D normal variable should
    # agree with the standard normal distribution in scipy.stats.distributions
    x = np.linspace(0, 2, 10)
    mean, cov = 1.2, 0.9
    scale = cov**0.5
    d1 = norm.pdf(x, mean, scale)
    d2 = multivariate_normal.pdf(x, mean, cov)
    assert_allclose(d1, d2)
 def test_marginalization():
    # Integrating out one of the variables of a 2D Gaussian should
    # yield a 1D Gaussian
    mean = np.array([2.5, 3.5])
    cov = np.array([[.5, 0.2], [0.2, .6]])
    n = 2**8 + 1  # Number of samples
    delta = 6 / (n - 1)  # Grid spacing
    v = np.linspace(0, 6, n)
    xv, yv = np.meshgrid(v, v)
    pos = np.empty((n, n, 2))
    pos[:, :, 0] = xv
    pos[:, :, 1] = yv
    pdf = multivariate_normal.pdf(pos, mean, cov)
    # Marginalize over x and y axis
    margin_x = romb(pdf, delta, axis=0)
    margin_y = romb(pdf, delta, axis=1)
    # Compare with standard normal distribution
    gauss_x = norm.pdf(v, loc=mean[0], scale=cov[0, 0]**0.5)
    gauss_y = norm.pdf(v, loc=mean[1], scale=cov[1, 1]**0.5)
    assert_allclose(margin_x, gauss_x, rtol=1e-2, atol=1e-2)
    assert_allclose(margin_y, gauss_y, rtol=1e-2, atol=1e-2)
 def test_frozen():
    # The frozen distribution should agree with the regular one
    np.random.seed(1234)
    x = np.random.randn(5)
    mean = np.random.randn(5)
    cov = np.abs(np.random.randn(5))
    norm_frozen = multivariate_normal(mean, cov)
    assert_allclose(norm_frozen.pdf(x), multivariate_normal.pdf(x, mean, cov))
    assert_allclose(norm_frozen.logpdf(x),
                    multivariate_normal.logpdf(x, mean, cov))
 def test_pseudodet_pinv():
    # Make sure that pseudo-inverse and pseudo-det agree on cutoff
    # Assemble random covariance matrix with large and small eigenvalues
    np.random.seed(1234)
    n = 7
    x = np.random.randn(n, n)
    cov = np.dot(x, x.T)
    s, u = scipy.linalg.eigh(cov)
    s = 0.5 * np.ones(n)
    s[0] = 1.0
    s[-1] = 1e-7
    cov = np.dot(u, np.dot(np.diag(s), u.T))
    # Set cond so that the lowest eigenvalue is below the cutoff
    cond = 1e-5
    U, log_pdet = _psd_pinv_decomposed_log_pdet(cov, cond)
    pinv = np.dot(U, U.T)
    _, log_pdet_pinv = _psd_pinv_decomposed_log_pdet(pinv, cond)
    # Check that the log pseudo-determinant agrees with the sum
    # of the logs of all but the smallest eigenvalue
    assert_allclose(log_pdet, np.sum(np.log(s[:-1])))
    # Check that the pseudo-determinant of the pseudo-inverse
    # agrees with 1 / pseudo-determinant
    assert_allclose(-log_pdet, log_pdet_pinv)
 def test_exception_nonsquare_cov():
    cov = [[1, 2, 3], [4, 5, 6]]
    assert_raises(ValueError, _psd_pinv_decomposed_log_pdet, cov)
 def test_exception_nonfinite_cov():
    cov_nan = [[1, 0], [0, np.nan]]
    assert_raises(ValueError, _psd_pinv_decomposed_log_pdet, cov_nan)
    cov_inf = [[1, 0], [0, np.inf]]
    assert_raises(ValueError, _psd_pinv_decomposed_log_pdet, cov_inf)
 def test_exception_non_psd_cov():
    cov = [[1, 0], [0, -1]]
    assert_raises(ValueError, _psd_pinv_decomposed_log_pdet, cov)
 def test_R_values():
    # Compare the multivariate pdf with some values precomputed
    # in R version 3.0.1 (2013-05-16) on Mac OS X 10.6.
    # The values below were generated by the following R-script:
    # > library(mnormt)
    # > x <- seq(0, 2, length=5)
    # > y <- 3*x - 2
    # > z <- x + cos(y)
    # > mu <- c(1, 3, 2)
    # > Sigma <- matrix(c(1,2,0,2,5,0.5,0,0.5,3), 3, 3)
    # > r_pdf <- dmnorm(cbind(x,y,z), mu, Sigma)
    r_pdf = np.array([0.0002214706, 0.0013819953, 0.0049138692,
                      0.0103803050, 0.0140250800])
    x = np.linspace(0, 2, 5)
    y = 3 * x - 2
    z = x + np.cos(y)
    r = np.array([x, y, z]).T
    mean = np.array([1, 3, 2], 'd')
    cov = np.array([[1, 2, 0], [2, 5, .5], [0, .5, 3]], 'd')
    pdf = multivariate_normal.pdf(r, mean, cov)
    assert_allclose(pdf, r_pdf, atol=1e-10)
 def test_rvs_shape():
    # Check that rvs parses the mean and covariance correctly, and returns
    # an array of the right shape
    N = 300
    d = 4
    sample = multivariate_normal.rvs(mean=np.zeros(d), cov=1, size=N)
    assert_equal(sample.shape, (N, d))
    sample = multivariate_normal.rvs(mean=None,
                                     cov=np.array([[2, .1], [.1, 1]]),
                                     size=N)
    assert_equal(sample.shape, (N, 2))
    u = multivariate_normal(mean=0, cov=1)
    sample = u.rvs(N)
    assert_equal(sample.shape, (N, ))
 def test_large_sample():
    # Generate large sample and compare sample mean and sample covariance
    # with mean and covariance matrix.
    np.random.seed(2846)
    n = 3
    mean = np.random.randn(n)
    M = np.random.randn(n, n)
    cov = np.dot(M, M.T)
    size = 5000
    sample = multivariate_normal.rvs(mean, cov, size)
    assert_allclose(numpy.cov(sample.T), cov, rtol=1e-1)
    assert_allclose(sample.mean(0), mean, rtol=1e-1)
 def test_entropy():
    np.random.seed(2846)
    n = 3
    mean = np.random.randn(n)
    M = np.random.randn(n, n)
    cov = np.dot(M, M.T)
    rv = multivariate_normal(mean, cov)
    # Check that frozen distribution agrees with entropy function
    assert_almost_equal(rv.entropy(), multivariate_normal.entropy(mean, cov))
    # Compare entropy with manually computed expression involving
    # the sum of the logs of the eigenvalues of the covariance matrix
    eigs = np.linalg.eig(cov)[0]
    desired = 1/2 * (n * (np.log(2*np.pi) + 1) + np.sum(np.log(eigs)))
    assert_almost_equal(desired, rv.entropy())
 if __name__ == "__main__":
    run_module_suite()
--- a/pywafo/src/wafo/stats/tests/test_rank.py
+++ b/pywafo/src/wafo/stats/tests/test_rank.py
@ -0,0 +1,193 @@
 from __future__ import division, print_function, absolute_import
 import numpy as np
 from numpy.testing import TestCase, run_module_suite, assert_equal, \
    assert_array_equal
 from scipy.stats import rankdata, tiecorrect
 class TestTieCorrect(TestCase):
    def test_empty(self):
        """An empty array requires no correction, should return 1.0."""
        ranks = np.array([], dtype=np.float64)
        c = tiecorrect(ranks)
        assert_equal(c, 1.0)
    def test_one(self):
        """A single element requires no correction, should return 1.0."""
        ranks = np.array([1.0], dtype=np.float64)
        c = tiecorrect(ranks)
        assert_equal(c, 1.0)
    def test_no_correction(self):
        """Arrays with no ties require no correction."""
        ranks = np.arange(2.0)
        c = tiecorrect(ranks)
        assert_equal(c, 1.0)
        ranks = np.arange(3.0)
        c = tiecorrect(ranks)
        assert_equal(c, 1.0)
    def test_basic(self):
        """Check a few basic examples of the tie correction factor."""
        # One tie of two elements
        ranks = np.array([1.0, 2.5, 2.5])
        c = tiecorrect(ranks)
        T = 2.0
        N = ranks.size
        expected = 1.0 - (T**3 - T) / (N**3 - N)
        assert_equal(c, expected)
        # One tie of two elements (same as above, but tie is not at the end)
        ranks = np.array([1.5, 1.5, 3.0])
        c = tiecorrect(ranks)
        T = 2.0
        N = ranks.size
        expected = 1.0 - (T**3 - T) / (N**3 - N)
        assert_equal(c, expected)
        # One tie of three elements
        ranks = np.array([1.0, 3.0, 3.0, 3.0])
        c = tiecorrect(ranks)
        T = 3.0
        N = ranks.size
        expected = 1.0 - (T**3 - T) / (N**3 - N)
        assert_equal(c, expected)
        # Two ties, lengths 2 and 3.
        ranks = np.array([1.5, 1.5, 4.0, 4.0, 4.0])
        c = tiecorrect(ranks)
        T1 = 2.0
        T2 = 3.0
        N = ranks.size
        expected = 1.0 - ((T1**3 - T1) + (T2**3 - T2)) / (N**3 - N)
        assert_equal(c, expected)
 class TestRankData(TestCase):
    def test_empty(self):
        """stats.rankdata([]) should return an empty array."""
        a = np.array([], dtype=np.int)
        r = rankdata(a)
        assert_array_equal(r, np.array([], dtype=np.float64))
        r = rankdata([])
        assert_array_equal(r, np.array([], dtype=np.float64))
    def test_one(self):
        """Check stats.rankdata with an array of length 1."""
        data = [100]
        a = np.array(data, dtype=np.int)
        r = rankdata(a)
        assert_array_equal(r, np.array([1.0], dtype=np.float64))
        r = rankdata(data)
        assert_array_equal(r, np.array([1.0], dtype=np.float64))
    def test_basic(self):
        """Basic tests of stats.rankdata."""
        data = [100, 10, 50]
        expected = np.array([3.0, 1.0, 2.0], dtype=np.float64)
        a = np.array(data, dtype=np.int)
        r = rankdata(a)
        assert_array_equal(r, expected)
        r = rankdata(data)
        assert_array_equal(r, expected)
        data = [40, 10, 30, 10, 50]
        expected = np.array([4.0, 1.5, 3.0, 1.5, 5.0], dtype=np.float64)
        a = np.array(data, dtype=np.int)
        r = rankdata(a)
        assert_array_equal(r, expected)
        r = rankdata(data)
        assert_array_equal(r, expected)
        data = [20, 20, 20, 10, 10, 10]
        expected = np.array([5.0, 5.0, 5.0, 2.0, 2.0, 2.0], dtype=np.float64)
        a = np.array(data, dtype=np.int)
        r = rankdata(a)
        assert_array_equal(r, expected)
        r = rankdata(data)
        assert_array_equal(r, expected)
        # The docstring states explicitly that the argument is flattened.
        a2d = a.reshape(2, 3)
        r = rankdata(a2d)
        assert_array_equal(r, expected)
    def test_large_int(self):
        data = np.array([2**60, 2**60+1], dtype=np.uint64)
        r = rankdata(data)
        assert_array_equal(r, [1.0, 2.0])
        data = np.array([2**60, 2**60+1], dtype=np.int64)
        r = rankdata(data)
        assert_array_equal(r, [1.0, 2.0])
        data = np.array([2**60, -2**60+1], dtype=np.int64)
        r = rankdata(data)
        assert_array_equal(r, [2.0, 1.0])
    def test_big_tie(self):
        for n in [10000, 100000, 1000000]:
            data = np.ones(n, dtype=int)
            r = rankdata(data)
            expected_rank = 0.5 * (n + 1)
            assert_array_equal(r, expected_rank * data,
                               "test failed with n=%d" % n)
 _cases = (
    # values, method, expected
    ([], 'average', []),
    ([], 'min', []),
    ([], 'max', []),
    ([], 'dense', []),
    ([], 'ordinal', []),
    #
    ([100], 'average', [1.0]),
    ([100], 'min', [1.0]),
    ([100], 'max', [1.0]),
    ([100], 'dense', [1.0]),
    ([100], 'ordinal', [1.0]),
    #
    ([100, 100, 100], 'average', [2.0, 2.0, 2.0]),
    ([100, 100, 100], 'min', [1.0, 1.0, 1.0]),
    ([100, 100, 100], 'max', [3.0, 3.0, 3.0]),
    ([100, 100, 100], 'dense', [1.0, 1.0, 1.0]),
    ([100, 100, 100], 'ordinal', [1.0, 2.0, 3.0]),
    #
    ([100, 300, 200], 'average', [1.0, 3.0, 2.0]),
    ([100, 300, 200], 'min', [1.0, 3.0, 2.0]),
    ([100, 300, 200], 'max', [1.0, 3.0, 2.0]),
    ([100, 300, 200], 'dense', [1.0, 3.0, 2.0]),
    ([100, 300, 200], 'ordinal', [1.0, 3.0, 2.0]),
    #
    ([100, 200, 300, 200], 'average', [1.0, 2.5, 4.0, 2.5]),
    ([100, 200, 300, 200], 'min', [1.0, 2.0, 4.0, 2.0]),
    ([100, 200, 300, 200], 'max', [1.0, 3.0, 4.0, 3.0]),
    ([100, 200, 300, 200], 'dense', [1.0, 2.0, 3.0, 2.0]),
    ([100, 200, 300, 200], 'ordinal', [1.0, 2.0, 4.0, 3.0]),
    #
    ([100, 200, 300, 200, 100], 'average', [1.5, 3.5, 5.0, 3.5, 1.5]),
    ([100, 200, 300, 200, 100], 'min', [1.0, 3.0, 5.0, 3.0, 1.0]),
    ([100, 200, 300, 200, 100], 'max', [2.0, 4.0, 5.0, 4.0, 2.0]),
    ([100, 200, 300, 200, 100], 'dense', [1.0, 2.0, 3.0, 2.0, 1.0]),
    ([100, 200, 300, 200, 100], 'ordinal', [1.0, 3.0, 5.0, 4.0, 2.0]),
    #
    ([10] * 30, 'ordinal', np.arange(1.0, 31.0)),
 )
 def test_cases():
    def check_case(values, method, expected):
        r = rankdata(values, method=method)
        assert_array_equal(r, expected)
    for values, method, expected in _cases:
        yield check_case, values, method, expected
 if __name__ == "__main__":
    run_module_suite()
--- a/pywafo/src/wafo/stats/tests/test_stats.py
+++ b/pywafo/src/wafo/stats/tests/test_stats.py
--- a/pywafo/src/wafo/stats/tests/test_tukeylambda_stats.py
+++ b/pywafo/src/wafo/stats/tests/test_tukeylambda_stats.py
@ -0,0 +1,91 @@
 from __future__ import division, print_function, absolute_import
 import numpy as np
 from numpy.testing import assert_allclose, assert_equal, run_module_suite
 from scipy.stats._tukeylambda_stats import tukeylambda_variance, \
                                            tukeylambda_kurtosis
 def test_tukeylambda_stats_known_exact():
    """Compare results with some known exact formulas."""
    # Some exact values of the Tukey Lambda variance and kurtosis:
    # lambda   var      kurtosis
    #   0     pi**2/3     6/5     (logistic distribution)
    #  0.5    4 - pi    (5/3 - pi/2)/(pi/4 - 1)**2 - 3
    #   1      1/3       -6/5     (uniform distribution on (-1,1))
    #   2      1/12      -6/5     (uniform distribution on (-1/2, 1/2))
    # lambda = 0
    var = tukeylambda_variance(0)
    assert_allclose(var, np.pi**2 / 3, atol=1e-12)
    kurt = tukeylambda_kurtosis(0)
    assert_allclose(kurt, 1.2, atol=1e-10)
    # lambda = 0.5
    var = tukeylambda_variance(0.5)
    assert_allclose(var, 4 - np.pi, atol=1e-12)
    kurt = tukeylambda_kurtosis(0.5)
    desired = (5./3 - np.pi/2) / (np.pi/4 - 1)**2 - 3
    assert_allclose(kurt, desired, atol=1e-10)
    # lambda = 1
    var = tukeylambda_variance(1)
    assert_allclose(var, 1.0 / 3, atol=1e-12)
    kurt = tukeylambda_kurtosis(1)
    assert_allclose(kurt, -1.2, atol=1e-10)
    # lambda = 2
    var = tukeylambda_variance(2)
    assert_allclose(var, 1.0 / 12, atol=1e-12)
    kurt = tukeylambda_kurtosis(2)
    assert_allclose(kurt, -1.2, atol=1e-10)
 def test_tukeylambda_stats_mpmath():
    """Compare results with some values that were computed using mpmath."""
    a10 = dict(atol=1e-10, rtol=0)
    a12 = dict(atol=1e-12, rtol=0)
    data = [
        # lambda        variance              kurtosis
        [-0.1, 4.78050217874253547, 3.78559520346454510],
        [-0.0649, 4.16428023599895777, 2.52019675947435718],
        [-0.05, 3.93672267890775277, 2.13129793057777277],
        [-0.001, 3.30128380390964882, 1.21452460083542988],
        [0.001, 3.27850775649572176, 1.18560634779287585],
        [0.03125, 2.95927803254615800, 0.804487555161819980],
        [0.05, 2.78281053405464501, 0.611604043886644327],
        [0.0649, 2.65282386754100551, 0.476834119532774540],
        [1.2, 0.242153920578588346, -1.23428047169049726],
        [10.0, 0.00095237579757703597, 2.37810697355144933],
        [20.0, 0.00012195121951131043, 7.37654321002709531],
    ]
    for lam, var_expected, kurt_expected in data:
        var = tukeylambda_variance(lam)
        assert_allclose(var, var_expected, **a12)
        kurt = tukeylambda_kurtosis(lam)
        assert_allclose(kurt, kurt_expected, **a10)
    # Test with vector arguments (most of the other tests are for single
    # values).
    lam, var_expected, kurt_expected = zip(*data)
    var = tukeylambda_variance(lam)
    assert_allclose(var, var_expected, **a12)
    kurt = tukeylambda_kurtosis(lam)
    assert_allclose(kurt, kurt_expected, **a10)
 def test_tukeylambda_stats_invalid():
    """Test values of lambda outside the domains of the functions."""
    lam = [-1.0, -0.5]
    var = tukeylambda_variance(lam)
    assert_equal(var, np.array([np.nan, np.inf]))
    lam = [-1.0, -0.25]
    kurt = tukeylambda_kurtosis(lam)
    assert_equal(kurt, np.array([np.nan, np.inf]))
 if __name__ == "__main__":
    run_module_suite()
--- a/pywafo/src/wafo/stats/vonmises.py
+++ b/pywafo/src/wafo/stats/vonmises.py
@ -0,0 +1,47 @@
 from __future__ import division, print_function, absolute_import
 import numpy as np
 import scipy.stats
 from scipy.special import i0
 def von_mises_cdf_series(k,x,p):
    x = float(x)
    s = np.sin(x)
    c = np.cos(x)
    sn = np.sin(p*x)
    cn = np.cos(p*x)
    R = 0
    V = 0
    for n in range(p-1,0,-1):
        sn, cn = sn*c - cn*s, cn*c + sn*s
        R = 1./(2*n/k + R)
        V = R*(sn/n+V)
    return 0.5+x/(2*np.pi) + V/np.pi
 def von_mises_cdf_normalapprox(k,x,C1):
    b = np.sqrt(2/np.pi)*np.exp(k)/i0(k)
    z = b*np.sin(x/2.)
    return scipy.stats.norm.cdf(z)
 def von_mises_cdf(k,x):
    ix = 2*np.pi*np.round(x/(2*np.pi))
    x = x-ix
    k = float(k)
    # These values should give 12 decimal digits
    CK = 50
    a = [28., 0.5, 100., 5.0]
    C1 = 50.1
    if k < CK:
        p = int(np.ceil(a[0]+a[1]*k-a[2]/(k+a[3])))
        F = np.clip(von_mises_cdf_series(k,x,p),0,1)
    else:
        F = von_mises_cdf_normalapprox(k,x,C1)
    return F+ix
--- a/pywafo/src/wafo/stats/vonmises_cython.pyx
+++ b/pywafo/src/wafo/stats/vonmises_cython.pyx
@ -0,0 +1,76 @@
 import numpy as np
 import scipy.stats
 from scipy.special import i0
 import numpy.testing
 cimport numpy as np
 cdef extern from "math.h":
    double cos(double theta)
    double sin(double theta)
 cdef double von_mises_cdf_series(double k,double x,unsigned int p):
    cdef double s, c, sn, cn, R, V
    cdef unsigned int n
    s = sin(x)
    c = cos(x)
    sn = sin(p*x)
    cn = cos(p*x)
    R = 0
    V = 0
    for n in range(p-1,0,-1):
        sn, cn = sn*c - cn*s, cn*c + sn*s
        R = 1./(2*n/k + R)
        V = R*(sn/n+V)
    return 0.5+x/(2*np.pi) + V/np.pi
 def von_mises_cdf_normalapprox(k,x,C1):
    b = np.sqrt(2/np.pi)*np.exp(k)/i0(k)
    z = b*np.sin(x/2.)
    C = 24*k
    chi = z - z**3/((C-2*z**2-16)/3.-(z**4+7/4.*z**2+167./2)/(C+C1-z**2+3))**2
    return scipy.stats.norm.cdf(z)
 cimport cython
@cython.boundscheck(False)
 def von_mises_cdf(k,x):
    cdef np.ndarray[double, ndim=1] temp, temp_xs, temp_ks
    cdef unsigned int i, p
    cdef double a1, a2, a3, a4, C1, CK
    #k,x = np.broadcast_arrays(np.asarray(k),np.asarray(x))
    k = np.asarray(k)
    x = np.asarray(x)
    zerodim = k.ndim==0 and x.ndim==0
    k = np.atleast_1d(k)
    x = np.atleast_1d(x)
    ix = np.round(x/(2*np.pi))
    x = x-ix*2*np.pi
    # These values should give 12 decimal digits
    CK=50
    a1, a2, a3, a4 = [28., 0.5, 100., 5.0]
    C1 = 50.1
    bx, bk = np.broadcast_arrays(x,k)
    result = np.empty(bx.shape,dtype=np.float)
    c_small_k = bk<CK
    temp = result[c_small_k]
    temp_xs = bx[c_small_k].astype(np.float)
    temp_ks = bk[c_small_k].astype(np.float)
    for i in range(len(temp)):
        p = <int>(1+a1+a2*temp_ks[i]-a3/(temp_ks[i]+a4))
        temp[i] = von_mises_cdf_series(temp_ks[i],temp_xs[i],p)
        if temp[i]<0:
            temp[i]=0
        elif temp[i]>1:
            temp[i]=1
    result[c_small_k] = temp
    result[~c_small_k] = von_mises_cdf_normalapprox(bk[~c_small_k],bx[~c_small_k],C1)
    if not zerodim:
        return result+ix
    else:
        return (result+ix)[0]