pywafo/wafo/stats/tests/test_contingency.py

from __future__ import division, print_function, absolute_import

import numpy as np
from numpy.testing import (run_module_suite, assert_equal, assert_array_equal,
         assert_array_almost_equal, assert_approx_equal, assert_raises,
         assert_allclose)
from scipy.special import xlogy
from scipy.stats.contingency import margins, expected_freq, chi2_contingency


def test_margins():
    a = np.array([1])
    m = margins(a)
    assert_equal(len(m), 1)
    m0 = m[0]
    assert_array_equal(m0, np.array([1]))

    a = np.array([[1]])
    m0, m1 = margins(a)
    expected0 = np.array([[1]])
    expected1 = np.array([[1]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)

    a = np.arange(12).reshape(2, 6)
    m0, m1 = margins(a)
    expected0 = np.array([[15], [51]])
    expected1 = np.array([[6, 8, 10, 12, 14, 16]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)

    a = np.arange(24).reshape(2, 3, 4)
    m0, m1, m2 = margins(a)
    expected0 = np.array([[[66]], [[210]]])
    expected1 = np.array([[[60], [92], [124]]])
    expected2 = np.array([[[60, 66, 72, 78]]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)
    assert_array_equal(m2, expected2)


def test_expected_freq():
    assert_array_equal(expected_freq([1]), np.array([1.0]))

    observed = np.array([[[2, 0], [0, 2]], [[0, 2], [2, 0]], [[1, 1], [1, 1]]])
    e = expected_freq(observed)
    assert_array_equal(e, np.ones_like(observed))

    observed = np.array([[10, 10, 20], [20, 20, 20]])
    e = expected_freq(observed)
    correct = np.array([[12., 12., 16.], [18., 18., 24.]])
    assert_array_almost_equal(e, correct)


def test_chi2_contingency_trivial():
    # Some very simple tests for chi2_contingency.

    # A trivial case
    obs = np.array([[1, 2], [1, 2]])
    chi2, p, dof, expected = chi2_contingency(obs, correction=False)
    assert_equal(chi2, 0.0)
    assert_equal(p, 1.0)
    assert_equal(dof, 1)
    assert_array_equal(obs, expected)

    # A *really* trivial case: 1-D data.
    obs = np.array([1, 2, 3])
    chi2, p, dof, expected = chi2_contingency(obs, correction=False)
    assert_equal(chi2, 0.0)
    assert_equal(p, 1.0)
    assert_equal(dof, 0)
    assert_array_equal(obs, expected)


def test_chi2_contingency_R():
    # Some test cases that were computed independently, using R.

    Rcode = \
    """
    # Data vector.
    data <- c(
      12, 34, 23,     4,  47,  11,
      35, 31, 11,    34,  10,  18,
      12, 32,  9,    18,  13,  19,
      12, 12, 14,     9,  33,  25
      )

    # Create factor tags:r=rows, c=columns, t=tiers
    r <- factor(gl(4, 2*3, 2*3*4, labels=c("r1", "r2", "r3", "r4")))
    c <- factor(gl(3, 1,   2*3*4, labels=c("c1", "c2", "c3")))
    t <- factor(gl(2, 3,   2*3*4, labels=c("t1", "t2")))

    # 3-way Chi squared test of independence
    s = summary(xtabs(data~r+c+t))
    print(s)
    """
    Routput = \
    """
    Call: xtabs(formula = data ~ r + c + t)
    Number of cases in table: 478
    Number of factors: 3
    Test for independence of all factors:
            Chisq = 102.17, df = 17, p-value = 3.514e-14
    """
    obs = np.array(
        [[[12, 34, 23],
          [35, 31, 11],
          [12, 32, 9],
          [12, 12, 14]],
         [[4, 47, 11],
          [34, 10, 18],
          [18, 13, 19],
          [9, 33, 25]]])
    chi2, p, dof, expected = chi2_contingency(obs)
    assert_approx_equal(chi2, 102.17, significant=5)
    assert_approx_equal(p, 3.514e-14, significant=4)
    assert_equal(dof, 17)

    Rcode = \
    """
    # Data vector.
    data <- c(
        #
        12, 17,
        11, 16,
        #
        11, 12,
        15, 16,
        #
        23, 15,
        30, 22,
        #
        14, 17,
        15, 16
        )

    # Create factor tags:r=rows, c=columns, d=depths(?), t=tiers
    r <- factor(gl(2, 2,  2*2*2*2, labels=c("r1", "r2")))
    c <- factor(gl(2, 1,  2*2*2*2, labels=c("c1", "c2")))
    d <- factor(gl(2, 4,  2*2*2*2, labels=c("d1", "d2")))
    t <- factor(gl(2, 8,  2*2*2*2, labels=c("t1", "t2")))

    # 4-way Chi squared test of independence
    s = summary(xtabs(data~r+c+d+t))
    print(s)
    """
    Routput = \
    """
    Call: xtabs(formula = data ~ r + c + d + t)
    Number of cases in table: 262
    Number of factors: 4
    Test for independence of all factors:
            Chisq = 8.758, df = 11, p-value = 0.6442
    """
    obs = np.array(
        [[[[12, 17],
           [11, 16]],
          [[11, 12],
           [15, 16]]],
         [[[23, 15],
           [30, 22]],
          [[14, 17],
           [15, 16]]]])
    chi2, p, dof, expected = chi2_contingency(obs)
    assert_approx_equal(chi2, 8.758, significant=4)
    assert_approx_equal(p, 0.6442, significant=4)
    assert_equal(dof, 11)


def test_chi2_contingency_g():
    c = np.array([[15, 60], [15, 90]])
    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=False)
    assert_allclose(g, 2*xlogy(c, c/e).sum())

    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=True)
    c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]])
    assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum())

    c = np.array([[10, 12, 10], [12, 10, 10]])
    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood')
    assert_allclose(g, 2*xlogy(c, c/e).sum())


def test_chi2_contingency_bad_args():
    # Test that "bad" inputs raise a ValueError.

    # Negative value in the array of observed frequencies.
    obs = np.array([[-1, 10], [1, 2]])
    assert_raises(ValueError, chi2_contingency, obs)

    # The zeros in this will result in zeros in the array
    # of expected frequencies.
    obs = np.array([[0, 1], [0, 1]])
    assert_raises(ValueError, chi2_contingency, obs)

    # A degenerate case: `observed` has size 0.
    obs = np.empty((0, 8))
    assert_raises(ValueError, chi2_contingency, obs)


if __name__ == "__main__":
    run_module_suite()
Updated from wafo.stats from scipy.stats 11 years ago			`from __future__ import division, print_function, absolute_import`

			`import numpy as np`
			`from numpy.testing import (run_module_suite, assert_equal, assert_array_equal,`
			`assert_array_almost_equal, assert_approx_equal, assert_raises,`
			`assert_allclose)`
			`from scipy.special import xlogy`
			`from scipy.stats.contingency import margins, expected_freq, chi2_contingency`


			`def test_margins():`
			`a = np.array([1])`
			`m = margins(a)`
			`assert_equal(len(m), 1)`
			`m0 = m[0]`
			`assert_array_equal(m0, np.array([1]))`

			`a = np.array([[1]])`
			`m0, m1 = margins(a)`
			`expected0 = np.array([[1]])`
			`expected1 = np.array([[1]])`
			`assert_array_equal(m0, expected0)`
			`assert_array_equal(m1, expected1)`

			`a = np.arange(12).reshape(2, 6)`
			`m0, m1 = margins(a)`
			`expected0 = np.array([[15], [51]])`
			`expected1 = np.array([[6, 8, 10, 12, 14, 16]])`
			`assert_array_equal(m0, expected0)`
			`assert_array_equal(m1, expected1)`

			`a = np.arange(24).reshape(2, 3, 4)`
			`m0, m1, m2 = margins(a)`
			`expected0 = np.array([[[66]], [[210]]])`
			`expected1 = np.array([[[60], [92], [124]]])`
			`expected2 = np.array([[[60, 66, 72, 78]]])`
			`assert_array_equal(m0, expected0)`
			`assert_array_equal(m1, expected1)`
			`assert_array_equal(m2, expected2)`


			`def test_expected_freq():`
			`assert_array_equal(expected_freq([1]), np.array([1.0]))`

			`observed = np.array([[[2, 0], [0, 2]], [[0, 2], [2, 0]], [[1, 1], [1, 1]]])`
			`e = expected_freq(observed)`
			`assert_array_equal(e, np.ones_like(observed))`

			`observed = np.array([[10, 10, 20], [20, 20, 20]])`
			`e = expected_freq(observed)`
			`correct = np.array([[12., 12., 16.], [18., 18., 24.]])`
			`assert_array_almost_equal(e, correct)`


			`def test_chi2_contingency_trivial():`
			`# Some very simple tests for chi2_contingency.`

			`# A trivial case`
			`obs = np.array([[1, 2], [1, 2]])`
			`chi2, p, dof, expected = chi2_contingency(obs, correction=False)`
			`assert_equal(chi2, 0.0)`
			`assert_equal(p, 1.0)`
			`assert_equal(dof, 1)`
			`assert_array_equal(obs, expected)`

			`# A really trivial case: 1-D data.`
			`obs = np.array([1, 2, 3])`
			`chi2, p, dof, expected = chi2_contingency(obs, correction=False)`
			`assert_equal(chi2, 0.0)`
			`assert_equal(p, 1.0)`
			`assert_equal(dof, 0)`
			`assert_array_equal(obs, expected)`


			`def test_chi2_contingency_R():`
			`# Some test cases that were computed independently, using R.`

			`Rcode = \`
			`"""`
			`# Data vector.`
			`data <- c(`
			`12, 34, 23, 4, 47, 11,`
			`35, 31, 11, 34, 10, 18,`
			`12, 32, 9, 18, 13, 19,`
			`12, 12, 14, 9, 33, 25`
			`)`

			`# Create factor tags:r=rows, c=columns, t=tiers`
			`r <- factor(gl(4, 23, 23*4, labels=c("r1", "r2", "r3", "r4")))`
			`c <- factor(gl(3, 1, 234, labels=c("c1", "c2", "c3")))`
			`t <- factor(gl(2, 3, 234, labels=c("t1", "t2")))`

			`# 3-way Chi squared test of independence`
			`s = summary(xtabs(data~r+c+t))`
			`print(s)`
			`"""`
			`Routput = \`
			`"""`
			`Call: xtabs(formula = data ~ r + c + t)`
			`Number of cases in table: 478`
			`Number of factors: 3`
			`Test for independence of all factors:`
			`Chisq = 102.17, df = 17, p-value = 3.514e-14`
			`"""`
			`obs = np.array(`
			`[[[12, 34, 23],`
			`[35, 31, 11],`
			`[12, 32, 9],`
			`[12, 12, 14]],`
			`[[4, 47, 11],`
			`[34, 10, 18],`
			`[18, 13, 19],`
			`[9, 33, 25]]])`
			`chi2, p, dof, expected = chi2_contingency(obs)`
			`assert_approx_equal(chi2, 102.17, significant=5)`
			`assert_approx_equal(p, 3.514e-14, significant=4)`
			`assert_equal(dof, 17)`

			`Rcode = \`
			`"""`
			`# Data vector.`
			`data <- c(`
			`#`
			`12, 17,`
			`11, 16,`
			`#`
			`11, 12,`
			`15, 16,`
			`#`
			`23, 15,`
			`30, 22,`
			`#`
			`14, 17,`
			`15, 16`
			`)`

			`# Create factor tags:r=rows, c=columns, d=depths(?), t=tiers`
			`r <- factor(gl(2, 2, 222*2, labels=c("r1", "r2")))`
			`c <- factor(gl(2, 1, 222*2, labels=c("c1", "c2")))`
			`d <- factor(gl(2, 4, 222*2, labels=c("d1", "d2")))`
			`t <- factor(gl(2, 8, 222*2, labels=c("t1", "t2")))`

			`# 4-way Chi squared test of independence`
			`s = summary(xtabs(data~r+c+d+t))`
			`print(s)`
			`"""`
			`Routput = \`
			`"""`
			`Call: xtabs(formula = data ~ r + c + d + t)`
			`Number of cases in table: 262`
			`Number of factors: 4`
			`Test for independence of all factors:`
			`Chisq = 8.758, df = 11, p-value = 0.6442`
			`"""`
			`obs = np.array(`
			`[[[[12, 17],`
			`[11, 16]],`
			`[[11, 12],`
			`[15, 16]]],`
			`[[[23, 15],`
			`[30, 22]],`
			`[[14, 17],`
			`[15, 16]]]])`
			`chi2, p, dof, expected = chi2_contingency(obs)`
			`assert_approx_equal(chi2, 8.758, significant=4)`
			`assert_approx_equal(p, 0.6442, significant=4)`
			`assert_equal(dof, 11)`


			`def test_chi2_contingency_g():`
			`c = np.array([[15, 60], [15, 90]])`
			`g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=False)`
			`assert_allclose(g, 2*xlogy(c, c/e).sum())`

			`g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=True)`
			`c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]])`
			`assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum())`

			`c = np.array([[10, 12, 10], [12, 10, 10]])`
			`g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood')`
			`assert_allclose(g, 2*xlogy(c, c/e).sum())`


			`def test_chi2_contingency_bad_args():`
			`# Test that "bad" inputs raise a ValueError.`

			`# Negative value in the array of observed frequencies.`
			`obs = np.array([[-1, 10], [1, 2]])`
			`assert_raises(ValueError, chi2_contingency, obs)`

			`# The zeros in this will result in zeros in the array`
			`# of expected frequencies.`
			`obs = np.array([[0, 1], [0, 1]])`
			`assert_raises(ValueError, chi2_contingency, obs)`

			# A degenerate case: `observed` has size 0.
			`obs = np.empty((0, 8))`
			`assert_raises(ValueError, chi2_contingency, obs)`


			`if __name__ == "__main__":`
			`run_module_suite()`