You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pywafo/wafo/stats/tests/test_mstats_basic.py

1056 lines
42 KiB
Python

11 years ago
"""
Tests for the stats.mstats module (support for masked arrays)
11 years ago
"""
from __future__ import division, print_function, absolute_import
import warnings
import numpy as np
from numpy import nan
import numpy.ma as ma
from numpy.ma import masked, nomask
import wafo.stats.mstats as mstats
from wafo import stats
from numpy.testing import TestCase, run_module_suite
from numpy.testing.decorators import skipif
11 years ago
from numpy.ma.testutils import (assert_equal, assert_almost_equal,
assert_array_almost_equal, assert_array_almost_equal_nulp, assert_,
assert_allclose, assert_raises)
11 years ago
class TestMquantiles(TestCase):
def test_mquantiles_limit_keyword(self):
# Regression test for Trac ticket #867
data = np.array([[6., 7., 1.],
[47., 15., 2.],
[49., 36., 3.],
[15., 39., 4.],
[42., 40., -999.],
[41., 41., -999.],
[7., -999., -999.],
[39., -999., -999.],
[43., -999., -999.],
[40., -999., -999.],
[36., -999., -999.]])
desired = [[19.2, 14.6, 1.45],
[40.0, 37.5, 2.5],
[42.8, 40.05, 3.55]]
quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
assert_almost_equal(quants, desired)
class TestGMean(TestCase):
def test_1D(self):
a = (1,2,3,4)
11 years ago
actual = mstats.gmean(a)
desired = np.power(1*2*3*4,1./4.)
11 years ago
assert_almost_equal(actual, desired, decimal=14)
desired1 = mstats.gmean(a,axis=-1)
11 years ago
assert_almost_equal(actual, desired1, decimal=14)
assert_(not isinstance(desired1, ma.MaskedArray))
a = ma.array((1,2,3,4),mask=(0,0,0,1))
11 years ago
actual = mstats.gmean(a)
desired = np.power(1*2*3,1./3.)
assert_almost_equal(actual, desired,decimal=14)
11 years ago
desired1 = mstats.gmean(a,axis=-1)
11 years ago
assert_almost_equal(actual, desired1, decimal=14)
@skipif(not hasattr(np, 'float96'), 'cannot find float96 so skipping')
def test_1D_float96(self):
a = ma.array((1,2,3,4), mask=(0,0,0,1))
actual_dt = mstats.gmean(a, dtype=np.float96)
desired_dt = np.power(1 * 2 * 3, 1. / 3.).astype(np.float96)
assert_almost_equal(actual_dt, desired_dt, decimal=14)
assert_(actual_dt.dtype == desired_dt.dtype)
11 years ago
def test_2D(self):
a = ma.array(((1, 2, 3, 4), (1, 2, 3, 4), (1, 2, 3, 4)),
mask=((0, 0, 0, 0), (1, 0, 0, 1), (0, 1, 1, 0)))
actual = mstats.gmean(a)
desired = np.array((1,2,3,4))
11 years ago
assert_array_almost_equal(actual, desired, decimal=14)
desired1 = mstats.gmean(a,axis=0)
11 years ago
assert_array_almost_equal(actual, desired1, decimal=14)
actual = mstats.gmean(a, -1)
desired = ma.array((np.power(1*2*3*4,1./4.),
np.power(2*3,1./2.),
np.power(1*4,1./2.)))
11 years ago
assert_array_almost_equal(actual, desired, decimal=14)
class TestHMean(TestCase):
def test_1D(self):
a = (1,2,3,4)
11 years ago
actual = mstats.hmean(a)
desired = 4. / (1./1 + 1./2 + 1./3 + 1./4)
11 years ago
assert_almost_equal(actual, desired, decimal=14)
desired1 = mstats.hmean(ma.array(a),axis=-1)
11 years ago
assert_almost_equal(actual, desired1, decimal=14)
a = ma.array((1,2,3,4),mask=(0,0,0,1))
11 years ago
actual = mstats.hmean(a)
desired = 3. / (1./1 + 1./2 + 1./3)
assert_almost_equal(actual, desired,decimal=14)
desired1 = mstats.hmean(a,axis=-1)
11 years ago
assert_almost_equal(actual, desired1, decimal=14)
@skipif(not hasattr(np, 'float96'), 'cannot find float96 so skipping')
def test_1D_float96(self):
a = ma.array((1,2,3,4), mask=(0,0,0,1))
actual_dt = mstats.hmean(a, dtype=np.float96)
desired_dt = np.asarray(3. / (1./1 + 1./2 + 1./3),
dtype=np.float96)
assert_almost_equal(actual_dt, desired_dt, decimal=14)
assert_(actual_dt.dtype == desired_dt.dtype)
11 years ago
def test_2D(self):
a = ma.array(((1,2,3,4),(1,2,3,4),(1,2,3,4)),
mask=((0,0,0,0),(1,0,0,1),(0,1,1,0)))
11 years ago
actual = mstats.hmean(a)
desired = ma.array((1,2,3,4))
11 years ago
assert_array_almost_equal(actual, desired, decimal=14)
actual1 = mstats.hmean(a,axis=-1)
desired = (4./(1/1.+1/2.+1/3.+1/4.),
2./(1/2.+1/3.),
2./(1/1.+1/4.)
11 years ago
)
assert_array_almost_equal(actual1, desired, decimal=14)
class TestRanking(TestCase):
def __init__(self, *args, **kwargs):
TestCase.__init__(self, *args, **kwargs)
def test_ranking(self):
x = ma.array([0,1,1,1,2,3,4,5,5,6,])
assert_almost_equal(mstats.rankdata(x),
[1,3,3,3,5,6,7,8.5,8.5,10])
x[[3,4]] = masked
assert_almost_equal(mstats.rankdata(x),
[1,2.5,2.5,0,0,4,5,6.5,6.5,8])
11 years ago
assert_almost_equal(mstats.rankdata(x, use_missing=True),
[1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8])
x = ma.array([0,1,5,1,2,4,3,5,1,6,])
assert_almost_equal(mstats.rankdata(x),
[1,3,8.5,3,5,7,6,8.5,3,10])
x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]])
assert_almost_equal(mstats.rankdata(x),
[[1,3,3,3,5], [6,7,8.5,8.5,10]])
assert_almost_equal(mstats.rankdata(x, axis=1),
[[1,3,3,3,5], [1,2,3.5,3.5,5]])
assert_almost_equal(mstats.rankdata(x,axis=0),
[[1,1,1,1,1], [2,2,2,2,2,]])
11 years ago
class TestCorr(TestCase):
def test_pearsonr(self):
# Tests some computations of Pearson's r
x = ma.arange(10)
with warnings.catch_warnings():
# The tests in this context are edge cases, with perfect
# correlation or anticorrelation, or totally masked data.
# None of these should trigger a RuntimeWarning.
warnings.simplefilter("error", RuntimeWarning)
assert_almost_equal(mstats.pearsonr(x, x)[0], 1.0)
assert_almost_equal(mstats.pearsonr(x, x[::-1])[0], -1.0)
x = ma.array(x, mask=True)
pr = mstats.pearsonr(x, x)
assert_(pr[0] is masked)
assert_(pr[1] is masked)
x1 = ma.array([-1.0, 0.0, 1.0])
y1 = ma.array([0, 0, 3])
r, p = mstats.pearsonr(x1, y1)
assert_almost_equal(r, np.sqrt(3)/2)
assert_almost_equal(p, 1.0/3)
11 years ago
# (x2, y2) have the same unmasked data as (x1, y1).
mask = [False, False, False, True]
x2 = ma.array([-1.0, 0.0, 1.0, 99.0], mask=mask)
y2 = ma.array([0, 0, 3, -1], mask=mask)
r, p = mstats.pearsonr(x2, y2)
assert_almost_equal(r, np.sqrt(3)/2)
assert_almost_equal(p, 1.0/3)
11 years ago
def test_spearmanr(self):
# Tests some computations of Spearman's rho
(x, y) = ([5.05,6.75,3.21,2.66],[1.65,2.64,2.64,6.95])
assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
(x, y) = ([5.05,6.75,3.21,2.66,np.nan],[1.65,2.64,2.64,6.95,np.nan])
11 years ago
(x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
11 years ago
x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7]
11 years ago
y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4]
assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
11 years ago
x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan]
11 years ago
y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan]
11 years ago
(x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
11 years ago
def test_kendalltau(self):
# Tests some computations of Kendall's tau
x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan])
11 years ago
y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
assert_almost_equal(np.asarray(mstats.kendalltau(x,y)),
[+0.3333333,0.4969059])
assert_almost_equal(np.asarray(mstats.kendalltau(x,z)),
[-0.5477226,0.2785987])
11 years ago
#
x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20,
10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan])
y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27,
25,80,80,80,80,80,80, 0,10,45, np.nan, 0])
result = mstats.kendalltau(x,y)
11 years ago
assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
def test_kendalltau_seasonal(self):
# Tests the seasonal Kendall tau.
x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
11 years ago
[4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
[3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
[nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
11 years ago
x = ma.fix_invalid(x).T
output = mstats.kendalltau_seasonal(x)
assert_almost_equal(output['global p-value (indep)'], 0.008, 3)
assert_almost_equal(output['seasonal p-value'].round(2),
[0.18,0.53,0.20,0.04])
11 years ago
def test_pointbiserial(self):
x = [1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,
0,0,0,0,1,-1]
y = [14.8,13.8,12.4,10.1,7.1,6.1,5.8,4.6,4.3,3.5,3.3,3.2,3.0,
2.8,2.8,2.5,2.4,2.3,2.1,1.7,1.7,1.5,1.3,1.3,1.2,1.2,1.1,
0.8,0.7,0.6,0.5,0.2,0.2,0.1,np.nan]
11 years ago
assert_almost_equal(mstats.pointbiserialr(x, y)[0], 0.36149, 5)
class TestTrimming(TestCase):
def test_trim(self):
a = ma.arange(10)
assert_equal(mstats.trim(a), [0,1,2,3,4,5,6,7,8,9])
11 years ago
a = ma.arange(10)
assert_equal(mstats.trim(a,(2,8)), [None,None,2,3,4,5,6,7,8,None])
11 years ago
a = ma.arange(10)
assert_equal(mstats.trim(a,limits=(2,8),inclusive=(False,False)),
[None,None,None,3,4,5,6,7,None,None])
11 years ago
a = ma.arange(10)
assert_equal(mstats.trim(a,limits=(0.1,0.2),relative=True),
[None,1,2,3,4,5,6,7,None,None])
11 years ago
a = ma.arange(12)
a[[0,-1]] = a[5] = masked
assert_equal(mstats.trim(a, (2,8)),
11 years ago
[None, None, 2, 3, 4, None, 6, 7, 8, None, None, None])
x = ma.arange(100).reshape(10, 10)
expected = [1]*10 + [0]*70 + [1]*20
trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=None)
assert_equal(trimx._mask.ravel(), expected)
trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=0)
assert_equal(trimx._mask.ravel(), expected)
trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=-1)
assert_equal(trimx._mask.T.ravel(), expected)
# same as above, but with an extra masked row inserted
11 years ago
x = ma.arange(110).reshape(11, 10)
x[1] = masked
expected = [1]*20 + [0]*70 + [1]*20
trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=None)
assert_equal(trimx._mask.ravel(), expected)
trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=0)
assert_equal(trimx._mask.ravel(), expected)
trimx = mstats.trim(x.T, (0.1,0.2), relative=True, axis=-1)
assert_equal(trimx.T._mask.ravel(), expected)
11 years ago
def test_trim_old(self):
x = ma.arange(100)
assert_equal(mstats.trimboth(x).count(), 60)
assert_equal(mstats.trimtail(x,tail='r').count(), 80)
11 years ago
x[50:70] = masked
trimx = mstats.trimboth(x)
assert_equal(trimx.count(), 48)
assert_equal(trimx._mask, [1]*16 + [0]*34 + [1]*20 + [0]*14 + [1]*16)
11 years ago
x._mask = nomask
x.shape = (10,10)
11 years ago
assert_equal(mstats.trimboth(x).count(), 60)
assert_equal(mstats.trimtail(x).count(), 80)
def test_trimmedmean(self):
data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
296,299,306,376,428,515,666,1310,2611])
assert_almost_equal(mstats.trimmed_mean(data,0.1), 343, 0)
assert_almost_equal(mstats.trimmed_mean(data,(0.1,0.1)), 343, 0)
assert_almost_equal(mstats.trimmed_mean(data,(0.2,0.2)), 283, 0)
11 years ago
def test_trimmed_stde(self):
data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
296,299,306,376,428,515,666,1310,2611])
assert_almost_equal(mstats.trimmed_stde(data,(0.2,0.2)), 56.13193, 5)
assert_almost_equal(mstats.trimmed_stde(data,0.2), 56.13193, 5)
11 years ago
def test_winsorization(self):
data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
296,299,306,376,428,515,666,1310,2611])
assert_almost_equal(mstats.winsorize(data,(0.2,0.2)).var(ddof=1),
11 years ago
21551.4, 1)
data[5] = masked
winsorized = mstats.winsorize(data)
assert_equal(winsorized.mask, data.mask)
class TestMoments(TestCase):
# Comparison numbers are found using R v.1.5.1
# note that length(testcase) = 4
# testmathworks comes from documentation for the
# Statistics Toolbox for Matlab and can be found at both
# http://www.mathworks.com/access/helpdesk/help/toolbox/stats/kurtosis.shtml
# http://www.mathworks.com/access/helpdesk/help/toolbox/stats/skewness.shtml
# Note that both test cases came from here.
testcase = [1,2,3,4]
11 years ago
testmathworks = ma.fix_invalid([1.165, 0.6268, 0.0751, 0.3516, -0.6965,
np.nan])
testcase_2d = ma.array(
np.array([[0.05245846, 0.50344235, 0.86589117, 0.36936353, 0.46961149],
[0.11574073, 0.31299969, 0.45925772, 0.72618805, 0.75194407],
[0.67696689, 0.91878127, 0.09769044, 0.04645137, 0.37615733],
[0.05903624, 0.29908861, 0.34088298, 0.66216337, 0.83160998],
[0.64619526, 0.94894632, 0.27855892, 0.0706151, 0.39962917]]),
mask=np.array([[True, False, False, True, False],
[True, True, True, False, True],
[False, False, False, False, False],
[True, True, True, True, True],
[False, False, True, False, False]], dtype=np.bool))
11 years ago
def test_moment(self):
y = mstats.moment(self.testcase,1)
assert_almost_equal(y,0.0,10)
y = mstats.moment(self.testcase,2)
assert_almost_equal(y,1.25)
y = mstats.moment(self.testcase,3)
assert_almost_equal(y,0.0)
y = mstats.moment(self.testcase,4)
assert_almost_equal(y,2.5625)
11 years ago
def test_variation(self):
y = mstats.variation(self.testcase)
assert_almost_equal(y,0.44721359549996, 10)
11 years ago
def test_skewness(self):
y = mstats.skew(self.testmathworks)
assert_almost_equal(y,-0.29322304336607,10)
y = mstats.skew(self.testmathworks,bias=0)
assert_almost_equal(y,-0.437111105023940,10)
11 years ago
y = mstats.skew(self.testcase)
assert_almost_equal(y,0.0,10)
11 years ago
def test_kurtosis(self):
# Set flags for axis = 0 and fisher=0 (Pearson's definition of kurtosis
# for compatibility with Matlab)
y = mstats.kurtosis(self.testmathworks,0,fisher=0,bias=1)
assert_almost_equal(y, 2.1658856802973,10)
11 years ago
# Note that MATLAB has confusing docs for the following case
# kurtosis(x,0) gives an unbiased estimate of Pearson's skewness
# kurtosis(x) gives a biased estimate of Fisher's skewness (Pearson-3)
# The MATLAB docs imply that both should give Fisher's
y = mstats.kurtosis(self.testmathworks,fisher=0, bias=0)
assert_almost_equal(y, 3.663542721189047,10)
y = mstats.kurtosis(self.testcase,0,0)
assert_almost_equal(y,1.64)
11 years ago
# test that kurtosis works on multidimensional masked arrays
correct_2d = ma.array(np.array([-1.5, -3., -1.47247052385, 0.,
-1.26979517952]),
11 years ago
mask=np.array([False, False, False, True,
False], dtype=np.bool))
assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1),
correct_2d)
for i, row in enumerate(self.testcase_2d):
assert_almost_equal(mstats.kurtosis(row), correct_2d[i])
correct_2d_bias_corrected = ma.array(
np.array([-1.5, -3., -1.88988209538, 0., -0.5234638463918877]),
mask=np.array([False, False, False, True, False], dtype=np.bool))
assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1,
bias=False),
correct_2d_bias_corrected)
for i, row in enumerate(self.testcase_2d):
assert_almost_equal(mstats.kurtosis(row, bias=False),
correct_2d_bias_corrected[i])
# Check consistency between stats and mstats implementations
assert_array_almost_equal_nulp(mstats.kurtosis(self.testcase_2d[2, :]),
stats.kurtosis(self.testcase_2d[2, :]))
def test_mode(self):
a1 = [0,0,0,1,1,1,2,3,3,3,3,4,5,6,7]
a2 = np.reshape(a1, (3,5))
a3 = np.array([1,2,3,4,5,6])
a4 = np.reshape(a3, (3,2))
11 years ago
ma1 = ma.masked_where(ma.array(a1) > 2, a1)
ma2 = ma.masked_where(a2 > 2, a2)
ma3 = ma.masked_where(a3 < 2, a3)
ma4 = ma.masked_where(ma.array(a4) < 2, a4)
assert_equal(mstats.mode(a1, axis=None), (3,4))
assert_equal(mstats.mode(a1, axis=0), (3,4))
assert_equal(mstats.mode(ma1, axis=None), (0,3))
assert_equal(mstats.mode(a2, axis=None), (3,4))
assert_equal(mstats.mode(ma2, axis=None), (0,3))
assert_equal(mstats.mode(a3, axis=None), (1,1))
assert_equal(mstats.mode(ma3, axis=None), (2,1))
assert_equal(mstats.mode(a2, axis=0), ([[0,0,0,1,1]], [[1,1,1,1,1]]))
assert_equal(mstats.mode(ma2, axis=0), ([[0,0,0,1,1]], [[1,1,1,1,1]]))
assert_equal(mstats.mode(a2, axis=-1), ([[0],[3],[3]], [[3],[3],[1]]))
assert_equal(mstats.mode(ma2, axis=-1), ([[0],[1],[0]], [[3],[1],[0]]))
assert_equal(mstats.mode(ma4, axis=0), ([[3,2]], [[1,1]]))
assert_equal(mstats.mode(ma4, axis=-1), ([[2],[3],[5]], [[1],[1],[1]]))
11 years ago
class TestPercentile(TestCase):
def setUp(self):
self.a1 = [3,4,5,10,-3,-5,6]
self.a2 = [3,-6,-2,8,7,4,2,1]
self.a3 = [3.,4,5,10,-3,-5,-6,7.0]
11 years ago
def test_percentile(self):
x = np.arange(8) * 0.5
assert_equal(mstats.scoreatpercentile(x, 0), 0.)
assert_equal(mstats.scoreatpercentile(x, 100), 3.5)
assert_equal(mstats.scoreatpercentile(x, 50), 1.75)
def test_2D(self):
x = ma.array([[1, 1, 1],
[1, 1, 1],
[4, 4, 3],
[1, 1, 1],
[1, 1, 1]])
assert_equal(mstats.scoreatpercentile(x,50), [1,1,1])
11 years ago
class TestVariability(TestCase):
""" Comparison numbers are found using R v.1.5.1
note that length(testcase) = 4
"""
testcase = ma.fix_invalid([1,2,3,4,np.nan])
11 years ago
def test_signaltonoise(self):
# This is not in R, so used:
# mean(testcase, axis=0) / (sqrt(var(testcase)*3/4))
y = mstats.signaltonoise(self.testcase)
assert_almost_equal(y, 2.236067977)
def test_sem(self):
# This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
y = mstats.sem(self.testcase)
assert_almost_equal(y, 0.6454972244)
n = self.testcase.count()
assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
11 years ago
mstats.sem(self.testcase, ddof=2))
def test_zmap(self):
# This is not in R, so tested by using:
# (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4)
y = mstats.zmap(self.testcase, self.testcase)
desired_unmaskedvals = ([-1.3416407864999, -0.44721359549996,
0.44721359549996, 1.3416407864999])
assert_array_almost_equal(desired_unmaskedvals,
y.data[y.mask == False], decimal=12)
def test_zscore(self):
# This is not in R, so tested by using:
# (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4)
y = mstats.zscore(self.testcase)
desired = ma.fix_invalid([-1.3416407864999, -0.44721359549996,
0.44721359549996, 1.3416407864999, np.nan])
assert_almost_equal(desired, y, decimal=12)
class TestMisc(TestCase):
def test_obrientransform(self):
args = [[5]*5+[6]*11+[7]*9+[8]*3+[9]*2+[10]*2,
[6]+[7]*2+[8]*4+[9]*9+[10]*16]
result = [5*[3.1828]+11*[0.5591]+9*[0.0344]+3*[1.6086]+2*[5.2817]+2*[11.0538],
[10.4352]+2*[4.8599]+4*[1.3836]+9*[0.0061]+16*[0.7277]]
assert_almost_equal(np.round(mstats.obrientransform(*args).T,4),
result,4)
11 years ago
def test_kstwosamp(self):
x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
11 years ago
[4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
[3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
[nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
11 years ago
x = ma.fix_invalid(x).T
(winter,spring,summer,fall) = x.T
11 years ago
assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring),4),
(0.1818,0.9892))
assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'g'),4),
(0.1469,0.7734))
assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'l'),4),
(0.1818,0.6744))
11 years ago
def test_friedmanchisq(self):
# No missing values
args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0],
[7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0],
[6.0,8.0,4.0,6.0,7.0,6.5,6.0,4.0,6.5,3.0])
11 years ago
result = mstats.friedmanchisquare(*args)
assert_almost_equal(result[0], 10.4737, 4)
assert_almost_equal(result[1], 0.005317, 6)
# Missing values
x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
11 years ago
[4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
[3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
[nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
11 years ago
x = ma.fix_invalid(x)
result = mstats.friedmanchisquare(*x)
assert_almost_equal(result[0], 2.0156, 4)
assert_almost_equal(result[1], 0.5692, 4)
def test_regress_simple():
# Regress a line with sinusoidal noise. Test for #1273.
x = np.linspace(0, 100, 100)
y = 0.2 * np.linspace(0, 100, 100) + 10
y += np.sin(np.linspace(0, 20, 100))
slope, intercept, r_value, p_value, sterr = mstats.linregress(x, y)
assert_almost_equal(slope, 0.19644990055858422)
assert_almost_equal(intercept, 10.211269918932341)
def test_theilslopes():
# Test for basic slope and intercept.
slope, intercept, lower, upper = mstats.theilslopes([0,1,1])
assert_almost_equal(slope, 0.5)
assert_almost_equal(intercept, 0.5)
# Test for correct masking.
y = np.ma.array([0,1,100,1], mask=[False, False, True, False])
slope, intercept, lower, upper = mstats.theilslopes(y)
assert_almost_equal(slope, 1./3)
assert_almost_equal(intercept, 2./3)
# Test of confidence intervals from example in Sen (1968).
x = [1, 2, 3, 4, 10, 12, 18]
y = [9, 15, 19, 20, 45, 55, 78]
slope, intercept, lower, upper = mstats.theilslopes(y, x, 0.07)
assert_almost_equal(slope, 4)
assert_almost_equal(upper, 4.38, decimal=2)
assert_almost_equal(lower, 3.71, decimal=2)
11 years ago
def test_plotting_positions():
# Regression test for #1256
pos = mstats.plotting_positions(np.arange(3), 0, 0)
assert_array_almost_equal(pos.data, np.array([0.25, 0.5, 0.75]))
class TestNormalitytests():
def test_vs_nonmasked(self):
x = np.array((-2,-1,0,1,2,3)*4)**2
assert_array_almost_equal(mstats.normaltest(x),
stats.normaltest(x))
assert_array_almost_equal(mstats.skewtest(x),
stats.skewtest(x))
11 years ago
assert_array_almost_equal(mstats.kurtosistest(x),
stats.kurtosistest(x))
funcs = [stats.normaltest, stats.skewtest, stats.kurtosistest]
mfuncs = [mstats.normaltest, mstats.skewtest, mstats.kurtosistest]
x = [1, 2, 3, 4]
for func, mfunc in zip(funcs, mfuncs):
assert_raises(ValueError, func, x)
assert_raises(ValueError, mfunc, x)
def test_axis_None(self):
# Test axis=None (equal to axis=0 for 1-D input)
x = np.array((-2,-1,0,1,2,3)*4)**2
11 years ago
assert_allclose(mstats.normaltest(x, axis=None), mstats.normaltest(x))
assert_allclose(mstats.skewtest(x, axis=None), mstats.skewtest(x))
assert_allclose(mstats.kurtosistest(x, axis=None),
mstats.kurtosistest(x))
def test_maskedarray_input(self):
# Add some masked values, test result doesn't change
x = np.array((-2,-1,0,1,2,3)*4)**2
11 years ago
xm = np.ma.array(np.r_[np.inf, x, 10],
mask=np.r_[True, [False] * x.size, True])
assert_allclose(mstats.normaltest(xm), stats.normaltest(x))
assert_allclose(mstats.skewtest(xm), stats.skewtest(x))
assert_allclose(mstats.kurtosistest(xm), stats.kurtosistest(x))
def test_nd_input(self):
x = np.array((-2,-1,0,1,2,3)*4)**2
11 years ago
x_2d = np.vstack([x] * 2).T
for func in [mstats.normaltest, mstats.skewtest, mstats.kurtosistest]:
res_1d = func(x)
res_2d = func(x_2d)
assert_allclose(res_2d[0], [res_1d[0]] * 2)
assert_allclose(res_2d[1], [res_1d[1]] * 2)
#TODO: for all ttest functions, add tests with masked array inputs
11 years ago
class TestTtest_rel():
def test_vs_nonmasked(self):
np.random.seed(1234567)
outcome = np.random.randn(20, 4) + [0, 0, 1, 2]
# 1-D inputs
res1 = stats.ttest_rel(outcome[:, 0], outcome[:, 1])
res2 = mstats.ttest_rel(outcome[:, 0], outcome[:, 1])
assert_allclose(res1, res2)
# 2-D inputs
res1 = stats.ttest_rel(outcome[:, 0], outcome[:, 1], axis=None)
res2 = mstats.ttest_rel(outcome[:, 0], outcome[:, 1], axis=None)
assert_allclose(res1, res2)
res1 = stats.ttest_rel(outcome[:, :2], outcome[:, 2:], axis=0)
res2 = mstats.ttest_rel(outcome[:, :2], outcome[:, 2:], axis=0)
assert_allclose(res1, res2)
# Check default is axis=0
res3 = mstats.ttest_rel(outcome[:, :2], outcome[:, 2:])
assert_allclose(res2, res3)
def test_invalid_input_size(self):
assert_raises(ValueError, mstats.ttest_rel,
np.arange(10), np.arange(11))
x = np.arange(24)
assert_raises(ValueError, mstats.ttest_rel,
x.reshape(2, 3, 4), x.reshape(2, 4, 3), axis=1)
assert_raises(ValueError, mstats.ttest_rel,
x.reshape(2, 3, 4), x.reshape(2, 4, 3), axis=2)
def test_empty(self):
res1 = mstats.ttest_rel([], [])
assert_(np.all(np.isnan(res1)))
class TestTtest_ind():
def test_vs_nonmasked(self):
np.random.seed(1234567)
outcome = np.random.randn(20, 4) + [0, 0, 1, 2]
# 1-D inputs
res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1])
res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
assert_allclose(res1, res2)
# 2-D inputs
res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
assert_allclose(res1, res2)
res1 = stats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
res2 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
assert_allclose(res1, res2)
# Check default is axis=0
res3 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:])
assert_allclose(res2, res3)
def test_empty(self):
res1 = mstats.ttest_ind([], [])
assert_(np.all(np.isnan(res1)))
class TestTtest_1samp():
def test_vs_nonmasked(self):
np.random.seed(1234567)
outcome = np.random.randn(20, 4) + [0, 0, 1, 2]
# 1-D inputs
res1 = stats.ttest_1samp(outcome[:, 0], 1)
res2 = mstats.ttest_1samp(outcome[:, 0], 1)
assert_allclose(res1, res2)
# 2-D inputs
res1 = stats.ttest_1samp(outcome[:, 0], outcome[:, 1], axis=None)
res2 = mstats.ttest_1samp(outcome[:, 0], outcome[:, 1], axis=None)
assert_allclose(res1, res2)
res1 = stats.ttest_1samp(outcome[:, :2], outcome[:, 2:], axis=0)
res2 = mstats.ttest_1samp(outcome[:, :2], outcome[:, 2:], axis=0)
assert_allclose(res1, res2)
# Check default is axis=0
res3 = mstats.ttest_1samp(outcome[:, :2], outcome[:, 2:])
assert_allclose(res2, res3)
def test_empty(self):
res1 = mstats.ttest_1samp([], 1)
assert_(np.all(np.isnan(res1)))
class TestCompareWithStats(TestCase):
"""
Class to compare mstats results with stats results.
It is in general assumed that scipy.stats is at a more mature stage than
stats.mstats. If a routine in mstats results in similar results like in
scipy.stats, this is considered also as a proper validation of scipy.mstats
routine.
Different sample sizes are used for testing, as some problems between stats
and mstats are dependent on sample size.
Author: Alexander Loew
NOTE that some tests fail. This might be caused by
a) actual differences or bugs between stats and mstats
b) numerical inaccuracies
c) different definitions of routine interfaces
These failures need to be checked. Current workaround is to have disabled these tests,
but issuing reports on scipy-dev
"""
def get_n(self):
""" Returns list of sample sizes to be used for comparison. """
return [1000, 100, 10, 5]
def generate_xy_sample(self, n):
# This routine generates numpy arrays and corresponding masked arrays
# with the same data, but additional masked values
np.random.seed(1234567)
x = np.random.randn(n)
y = x + np.random.randn(n)
xm = np.ones(len(x) + 5) * 1e16
ym = np.ones(len(y) + 5) * 1e16
xm[0:len(x)] = x
ym[0:len(y)] = y
mask = xm > 9e15
xm = np.ma.array(xm, mask=mask)
ym = np.ma.array(ym, mask=mask)
return x, y, xm, ym
def generate_xy_sample2D(self, n, nx):
x = np.ones((n, nx)) * np.nan
y = np.ones((n, nx)) * np.nan
xm = np.ones((n+5, nx)) * np.nan
ym = np.ones((n+5, nx)) * np.nan
for i in range(nx):
x[:,i], y[:,i], dx, dy = self.generate_xy_sample(n)
xm[0:n, :] = x[0:n]
ym[0:n, :] = y[0:n]
xm = np.ma.array(xm, mask=np.isnan(xm))
ym = np.ma.array(ym, mask=np.isnan(ym))
return x, y, xm, ym
def test_linregress(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
res1 = stats.linregress(x, y)
res2 = stats.mstats.linregress(xm, ym)
assert_allclose(np.asarray(res1), np.asarray(res2))
def test_pearsonr(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r, p = stats.pearsonr(x, y)
rm, pm = stats.mstats.pearsonr(xm, ym)
assert_almost_equal(r, rm, decimal=14)
assert_almost_equal(p, pm, decimal=14)
def test_spearmanr(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r, p = stats.spearmanr(x, y)
rm, pm = stats.mstats.spearmanr(xm, ym)
assert_almost_equal(r, rm, 14)
assert_almost_equal(p, pm, 14)
def test_gmean(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.gmean(abs(x))
rm = stats.mstats.gmean(abs(xm))
assert_allclose(r, rm, rtol=1e-13)
r = stats.gmean(abs(y))
rm = stats.mstats.gmean(abs(ym))
assert_allclose(r, rm, rtol=1e-13)
def test_hmean(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.hmean(abs(x))
rm = stats.mstats.hmean(abs(xm))
assert_almost_equal(r, rm, 10)
r = stats.hmean(abs(y))
rm = stats.mstats.hmean(abs(ym))
assert_almost_equal(r, rm, 10)
def test_skew(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.skew(x)
rm = stats.mstats.skew(xm)
assert_almost_equal(r, rm, 10)
r = stats.skew(y)
rm = stats.mstats.skew(ym)
assert_almost_equal(r, rm, 10)
def test_moment(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.moment(x)
rm = stats.mstats.moment(xm)
assert_almost_equal(r, rm, 10)
r = stats.moment(y)
rm = stats.mstats.moment(ym)
assert_almost_equal(r, rm, 10)
def test_signaltonoise(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.signaltonoise(x)
rm = stats.mstats.signaltonoise(xm)
assert_almost_equal(r, rm, 10)
r = stats.signaltonoise(y)
rm = stats.mstats.signaltonoise(ym)
assert_almost_equal(r, rm, 10)
def test_betai(self):
np.random.seed(12345)
for i in range(10):
a = np.random.rand() * 5.
b = np.random.rand() * 200.
assert_equal(stats.betai(a, b, 0.), 0.)
assert_equal(stats.betai(a, b, 1.), 1.)
assert_equal(stats.mstats.betai(a, b, 0.), 0.)
assert_equal(stats.mstats.betai(a, b, 1.), 1.)
x = np.random.rand()
assert_almost_equal(stats.betai(a, b, x),
stats.mstats.betai(a, b, x), decimal=13)
def test_zscore(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
#reference solution
zx = (x - x.mean()) / x.std()
zy = (y - y.mean()) / y.std()
#validate stats
assert_allclose(stats.zscore(x), zx, rtol=1e-10)
assert_allclose(stats.zscore(y), zy, rtol=1e-10)
#compare stats and mstats
assert_allclose(stats.zscore(x), stats.mstats.zscore(xm[0:len(x)]),
rtol=1e-10)
assert_allclose(stats.zscore(y), stats.mstats.zscore(ym[0:len(y)]),
rtol=1e-10)
def test_kurtosis(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.kurtosis(x)
rm = stats.mstats.kurtosis(xm)
assert_almost_equal(r, rm, 10)
r = stats.kurtosis(y)
rm = stats.mstats.kurtosis(ym)
assert_almost_equal(r, rm, 10)
def test_sem(self):
# example from stats.sem doc
a = np.arange(20).reshape(5,4)
am = np.ma.array(a)
r = stats.sem(a,ddof=1)
rm = stats.mstats.sem(am, ddof=1)
assert_allclose(r, 2.82842712, atol=1e-5)
assert_allclose(rm, 2.82842712, atol=1e-5)
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
stats.sem(x, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
stats.sem(y, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
stats.sem(x, axis=None, ddof=1), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
stats.sem(y, axis=None, ddof=1), decimal=13)
def test_describe(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.describe(x, ddof=1)
rm = stats.mstats.describe(xm, ddof=1)
for ii in range(6):
assert_almost_equal(np.asarray(r[ii]),
np.asarray(rm[ii]),
decimal=12)
def test_rankdata(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.rankdata(x)
rm = stats.mstats.rankdata(x)
assert_allclose(r, rm)
def test_tmean(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.tmean(x),stats.mstats.tmean(xm), 14)
assert_almost_equal(stats.tmean(y),stats.mstats.tmean(ym), 14)
def test_tmax(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.tmax(x,2.),
stats.mstats.tmax(xm,2.), 10)
assert_almost_equal(stats.tmax(y,2.),
stats.mstats.tmax(ym,2.), 10)
def test_tmin(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_equal(stats.tmin(x),stats.mstats.tmin(xm))
assert_equal(stats.tmin(y),stats.mstats.tmin(ym))
assert_almost_equal(stats.tmin(x,lowerlimit=-1.),
stats.mstats.tmin(xm,lowerlimit=-1.), 10)
assert_almost_equal(stats.tmin(y,lowerlimit=-1.),
stats.mstats.tmin(ym,lowerlimit=-1.), 10)
def test_zmap(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
z = stats.zmap(x,y)
zm = stats.mstats.zmap(xm,ym)
assert_allclose(z, zm[0:len(z)], atol=1e-10)
def test_variation(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.variation(x), stats.mstats.variation(xm),
decimal=12)
assert_almost_equal(stats.variation(y), stats.mstats.variation(ym),
decimal=12)
def test_tvar(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.tvar(x), stats.mstats.tvar(xm),
decimal=12)
assert_almost_equal(stats.tvar(y), stats.mstats.tvar(ym),
decimal=12)
def test_trimboth(self):
a = np.arange(20)
b = stats.trimboth(a, 0.1)
bm = stats.mstats.trimboth(a, 0.1)
assert_allclose(b, bm.data[~bm.mask])
def test_tsem(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.tsem(x),stats.mstats.tsem(xm), decimal=14)
assert_almost_equal(stats.tsem(y),stats.mstats.tsem(ym), decimal=14)
assert_almost_equal(stats.tsem(x,limits=(-2.,2.)),
stats.mstats.tsem(xm,limits=(-2.,2.)),
decimal=14)
def test_skewtest(self):
# this test is for 1D data
for n in self.get_n():
if n > 8:
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.skewtest(x)
rm = stats.mstats.skewtest(xm)
assert_equal(r[0], rm[0])
# TODO this test is not performed as it is a known issue that
# mstats returns a slightly different p-value what is a bit
# strange is that other tests like test_maskedarray_input don't
# fail!
#~ assert_almost_equal(r[1], rm[1])
def test_skewtest_2D_notmasked(self):
# a normal ndarray is passed to the masked function
x = np.random.random((20, 2)) * 20.
r = stats.skewtest(x)
rm = stats.mstats.skewtest(x)
assert_allclose(np.asarray(r), np.asarray(rm))
def test_skewtest_2D_WithMask(self):
nx = 2
for n in self.get_n():
if n > 8:
x, y, xm, ym = self.generate_xy_sample2D(n, nx)
r = stats.skewtest(x)
rm = stats.mstats.skewtest(xm)
assert_equal(r[0][0],rm[0][0])
assert_equal(r[0][1],rm[0][1])
def test_normaltest(self):
np.seterr(over='raise')
for n in self.get_n():
if n > 8:
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=UserWarning)
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.normaltest(x)
rm = stats.mstats.normaltest(xm)
assert_allclose(np.asarray(r), np.asarray(rm))
def test_find_repeats(self):
x = np.asarray([1,1,2,2,3,3,3,4,4,4,4]).astype('float')
tmp = np.asarray([1,1,2,2,3,3,3,4,4,4,4,5,5,5,5]).astype('float')
mask = (tmp == 5.)
xm = np.ma.array(tmp, mask=mask)
r = stats.find_repeats(x)
rm = stats.mstats.find_repeats(xm)
assert_equal(r,rm)
def test_kendalltau(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.kendalltau(x, y)
rm = stats.mstats.kendalltau(xm, ym)
assert_almost_equal(r[0], rm[0], decimal=10)
assert_almost_equal(r[1], rm[1], decimal=7)
def test_obrientransform(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.obrientransform(x)
rm = stats.mstats.obrientransform(xm)
assert_almost_equal(r.T, rm[0:len(x)])
11 years ago
if __name__ == "__main__":
run_module_suite()