Formatted code + added tests
parent
93ed9616b1
commit
31f80c5798
@ -1,70 +1,76 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from wafo.spectrum.models import (Bretschneider, Jonswap, OchiHubble, Tmaspec,
|
from wafo.spectrum.models import (Bretschneider, Jonswap, OchiHubble, Tmaspec,
|
||||||
Torsethaugen, McCormick, Wallop)
|
Torsethaugen, McCormick, Wallop)
|
||||||
|
|
||||||
def test_bretschneider():
|
|
||||||
S = Bretschneider(Hm0=6.5,Tp=10)
|
def test_bretschneider():
|
||||||
vals = S((0,1,2,3))
|
S = Bretschneider(Hm0=6.5, Tp=10)
|
||||||
true_vals = np.array([ 0. , 1.69350993, 0.06352698, 0.00844783])
|
vals = S((0, 1, 2, 3))
|
||||||
assert((np.abs(vals-true_vals)<1e-7).all())
|
true_vals = np.array([0., 1.69350993, 0.06352698, 0.00844783])
|
||||||
|
assert((np.abs(vals - true_vals) < 1e-7).all())
|
||||||
def test_if_jonswap_with_gamma_one_equals_bretschneider():
|
|
||||||
S = Jonswap(Hm0=7, Tp=11,gamma=1)
|
|
||||||
vals = S((0,1,2,3))
|
def test_if_jonswap_with_gamma_one_equals_bretschneider():
|
||||||
true_vals = np.array([ 0. , 1.42694133, 0.05051648, 0.00669692])
|
S = Jonswap(Hm0=7, Tp=11, gamma=1)
|
||||||
assert((np.abs(vals-true_vals)<1e-7).all())
|
vals = S((0, 1, 2, 3))
|
||||||
|
true_vals = np.array([0., 1.42694133, 0.05051648, 0.00669692])
|
||||||
w = np.linspace(0,5)
|
assert((np.abs(vals - true_vals) < 1e-7).all())
|
||||||
S2 = Bretschneider(Hm0=7, Tp=11)
|
|
||||||
#JONSWAP with gamma=1 should be equal to Bretscneider:
|
w = np.linspace(0, 5)
|
||||||
assert(np.all(np.abs(S(w)-S2(w))<1.e-7))
|
S2 = Bretschneider(Hm0=7, Tp=11)
|
||||||
|
# JONSWAP with gamma=1 should be equal to Bretscneider:
|
||||||
|
assert(np.all(np.abs(S(w) - S2(w)) < 1.e-7))
|
||||||
def test_tmaspec():
|
|
||||||
S = Tmaspec(Hm0=7, Tp=11,gamma=1,h=10)
|
|
||||||
vals = S((0,1,2,3))
|
def test_tmaspec():
|
||||||
true_vals = np.array([ 0. , 0.70106233, 0.05022433, 0.00669692])
|
S = Tmaspec(Hm0=7, Tp=11, gamma=1, h=10)
|
||||||
assert((np.abs(vals-true_vals)<1e-7).all())
|
vals = S((0, 1, 2, 3))
|
||||||
|
true_vals = np.array([0., 0.70106233, 0.05022433, 0.00669692])
|
||||||
def test_torsethaugen():
|
assert((np.abs(vals - true_vals) < 1e-7).all())
|
||||||
|
|
||||||
S = Torsethaugen(Hm0=7, Tp=11,gamma=1,h=10)
|
|
||||||
vals = S((0,1,2,3))
|
def test_torsethaugen():
|
||||||
true_vals = np.array([ 0. , 1.19989709, 0.05819794, 0.0093541 ])
|
|
||||||
assert((np.abs(vals-true_vals)<1e-7).all())
|
S = Torsethaugen(Hm0=7, Tp=11, gamma=1, h=10)
|
||||||
|
vals = S((0, 1, 2, 3))
|
||||||
vals = S.wind(range(4))
|
true_vals = np.array([0., 1.19989709, 0.05819794, 0.0093541])
|
||||||
true_vals = np.array([ 0. , 1.13560528, 0.05529849, 0.00888989])
|
assert((np.abs(vals - true_vals) < 1e-7).all())
|
||||||
assert((np.abs(vals-true_vals)<1e-7).all())
|
|
||||||
vals = S.swell(range(4))
|
vals = S.wind(range(4))
|
||||||
true_vals = np.array([ 0. , 0.0642918 , 0.00289946, 0.00046421])
|
true_vals = np.array([0., 1.13560528, 0.05529849, 0.00888989])
|
||||||
assert((np.abs(vals-true_vals)<1e-7).all())
|
assert((np.abs(vals - true_vals) < 1e-7).all())
|
||||||
|
vals = S.swell(range(4))
|
||||||
def test_ochihubble():
|
true_vals = np.array([0., 0.0642918, 0.00289946, 0.00046421])
|
||||||
|
assert((np.abs(vals - true_vals) < 1e-7).all())
|
||||||
S = OchiHubble(par=2)
|
|
||||||
vals = S(range(4))
|
|
||||||
true_vals = np.array([ 0. , 0.90155636, 0.04185445, 0.00583207])
|
def test_ochihubble():
|
||||||
assert((np.abs(vals-true_vals)<1e-7).all())
|
|
||||||
|
S = OchiHubble(par=2)
|
||||||
def test_mccormick():
|
vals = S(range(4))
|
||||||
|
true_vals = np.array([0., 0.90155636, 0.04185445, 0.00583207])
|
||||||
S = McCormick(Hm0=6.5,Tp=10)
|
assert((np.abs(vals - true_vals) < 1e-7).all())
|
||||||
vals = S(range(4))
|
|
||||||
true_vals = np.array([ 0. , 1.87865908, 0.15050447, 0.02994663])
|
|
||||||
assert((np.abs(vals-true_vals)<1e-7).all())
|
def test_mccormick():
|
||||||
|
|
||||||
def test_wallop():
|
S = McCormick(Hm0=6.5, Tp=10)
|
||||||
|
vals = S(range(4))
|
||||||
S = Wallop(Hm0=6.5, Tp=10)
|
true_vals = np.array([0., 1.87865908, 0.15050447, 0.02994663])
|
||||||
vals = S(range(4))
|
assert((np.abs(vals - true_vals) < 1e-7).all())
|
||||||
true_vals = np.array([ 0.00000000e+00, 9.36921871e-01, 2.76991078e-03,
|
|
||||||
7.72996150e-05])
|
|
||||||
assert((np.abs(vals-true_vals)<1e-7).all())
|
def test_wallop():
|
||||||
|
|
||||||
|
S = Wallop(Hm0=6.5, Tp=10)
|
||||||
if __name__ == '__main__':
|
vals = S(range(4))
|
||||||
#main()
|
true_vals = np.array([0.00000000e+00, 9.36921871e-01, 2.76991078e-03,
|
||||||
import nose
|
7.72996150e-05])
|
||||||
# nose.run()
|
assert((np.abs(vals - true_vals) < 1e-7).all())
|
||||||
test_tmaspec()
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# main()
|
||||||
|
import nose
|
||||||
|
nose.run()
|
||||||
|
#test_tmaspec()
|
||||||
|
@ -1,203 +1,225 @@
|
|||||||
import wafo.spectrum.models as sm
|
import wafo.spectrum.models as sm
|
||||||
from wafo.spectrum import SpecData1D
|
from wafo.spectrum import SpecData1D
|
||||||
import numpy as np
|
import numpy as np
|
||||||
def slow(f):
|
import unittest
|
||||||
f.slow = True
|
|
||||||
return f
|
|
||||||
|
def slow(f):
|
||||||
@slow
|
f.slow = True
|
||||||
def test_tocovmatrix():
|
return f
|
||||||
Sj = sm.Jonswap()
|
|
||||||
S = Sj.tospecdata()
|
|
||||||
acfmat = S.tocov_matrix(nr=3, nt=256, dt=0.1)
|
class TestSpectrum(unittest.TestCase):
|
||||||
vals = acfmat[:2,:]
|
|
||||||
true_vals = np.array([[ 3.06073383, 0. , -1.67748256 , 0. ],
|
@slow
|
||||||
[ 3.05235423, -0.1674357 , -1.66811444, 0.18693242]])
|
def test_tocovmatrix(self):
|
||||||
assert((np.abs(vals-true_vals)<1e-7).all())
|
Sj = sm.Jonswap()
|
||||||
|
S = Sj.tospecdata()
|
||||||
|
acfmat = S.tocov_matrix(nr=3, nt=256, dt=0.1)
|
||||||
def test_tocovdata():
|
vals = acfmat[:2, :]
|
||||||
Sj = sm.Jonswap()
|
true_vals = np.array([[3.06073383, 0.0000000, -1.67748256, 0.],
|
||||||
S = Sj.tospecdata()
|
[3.05235423, -0.1674357, -1.66811444, 0.18693242]])
|
||||||
Nt = len(S.data)-1
|
self.assertTrue((np.abs(vals - true_vals) < 1e-7).all())
|
||||||
acf = S.tocovdata(nr=0, nt=Nt)
|
|
||||||
vals = acf.data[:5]
|
|
||||||
|
|
||||||
true_vals = np.array([3.06090339, 2.22658399, 0.45307391, -1.17495501, -2.05649042])
|
def test_tocovdata():
|
||||||
assert((np.abs(vals-true_vals)<1e-6).all())
|
Sj = sm.Jonswap()
|
||||||
|
S = Sj.tospecdata()
|
||||||
|
Nt = len(S.data) - 1
|
||||||
def test_to_t_pdf():
|
acf = S.tocovdata(nr=0, nt=Nt)
|
||||||
'''
|
vals = acf.data[:5]
|
||||||
The density of Tc is computed by:
|
|
||||||
>>> from wafo.spectrum import models as sm
|
true_vals = np.array(
|
||||||
>>> Sj = sm.Jonswap()
|
[3.06090339, 2.22658399, 0.45307391, -1.17495501, -2.05649042])
|
||||||
>>> S = Sj.tospecdata()
|
assert((np.abs(vals - true_vals) < 1e-6).all())
|
||||||
>>> f = S.to_t_pdf(pdef='Tc', paramt=(0, 10, 51), speed=7, seed=100)
|
|
||||||
>>> ['%2.3f' % val for val in f.data[:10]]
|
|
||||||
['0.000', '0.014', '0.027', '0.040', '0.050', '0.059', '0.067', '0.072', '0.077', '0.081']
|
def test_to_t_pdf():
|
||||||
|
Sj = sm.Jonswap()
|
||||||
estimated error bounds
|
S = Sj.tospecdata()
|
||||||
>>> ['%2.4f' % val for val in f.err[:10]]
|
f = S.to_t_pdf(pdef='Tc', paramt=(0, 10, 51), speed=7, seed=100)
|
||||||
['0.0000', '0.0003', '0.0003', '0.0004', '0.0006', '0.0009', '0.0016', '0.0019', '0.0020', '0.0021']
|
vals = ['%2.3f' % val for val in f.data[:10]]
|
||||||
'''
|
truevals = ['0.000', '0.014', '0.027', '0.040',
|
||||||
@slow
|
'0.050', '0.059', '0.067', '0.072', '0.077', '0.081']
|
||||||
def test_sim():
|
|
||||||
|
# estimated error bounds
|
||||||
Sj = sm.Jonswap();S = Sj.tospecdata()
|
vals = ['%2.4f' % val for val in f.err[:10]]
|
||||||
ns =100; dt = .2
|
truevals = ['0.0000', '0.0003', '0.0003', '0.0004',
|
||||||
x1 = S.sim(ns,dt=dt)
|
'0.0006', '0.0009', '0.0016', '0.0019', '0.0020', '0.0021']
|
||||||
|
|
||||||
|
|
||||||
import scipy.stats as st
|
@slow
|
||||||
x2 = S.sim(20000,20)
|
def test_sim():
|
||||||
truth1 = [0,np.sqrt(S.moment(1)[0]),0., 0.]
|
|
||||||
funs = [np.mean,np.std,st.skew,st.kurtosis]
|
Sj = sm.Jonswap()
|
||||||
for fun,trueval in zip(funs,truth1):
|
S = Sj.tospecdata()
|
||||||
res = fun(x2[:,1::], axis=0)
|
ns = 100
|
||||||
m = res.mean()
|
dt = .2
|
||||||
sa = res.std()
|
x1 = S.sim(ns, dt=dt)
|
||||||
#trueval, m, sa
|
|
||||||
assert(np.abs(m-trueval)<sa)
|
import scipy.stats as st
|
||||||
@slow
|
x2 = S.sim(20000, 20)
|
||||||
def test_sim_nl():
|
truth1 = [0, np.sqrt(S.moment(1)[0]), 0., 0.]
|
||||||
|
funs = [np.mean, np.std, st.skew, st.kurtosis]
|
||||||
Sj = sm.Jonswap();S = Sj.tospecdata()
|
for fun, trueval in zip(funs, truth1):
|
||||||
ns =100; dt = .2
|
res = fun(x2[:, 1::], axis=0)
|
||||||
x1 = S.sim_nl(ns,dt=dt)
|
m = res.mean()
|
||||||
|
sa = res.std()
|
||||||
import numpy as np
|
#trueval, m, sa
|
||||||
import scipy.stats as st
|
assert(np.abs(m - trueval) < sa)
|
||||||
x2, x1 = S.sim_nl(ns=20000,cases=40)
|
|
||||||
truth1 = [0,np.sqrt(S.moment(1)[0][0])] + S.stats_nl(moments='sk')
|
|
||||||
truth1[-1] = truth1[-1]-3
|
@slow
|
||||||
|
def test_sim_nl():
|
||||||
#truth1
|
|
||||||
#[0, 1.7495200310090633, 0.18673120577479801, 0.061988521262417606]
|
Sj = sm.Jonswap()
|
||||||
|
S = Sj.tospecdata()
|
||||||
funs = [np.mean,np.std,st.skew,st.kurtosis]
|
ns = 100
|
||||||
for fun,trueval in zip(funs,truth1):
|
dt = .2
|
||||||
res = fun(x2[:,1::], axis=0)
|
x1 = S.sim_nl(ns, dt=dt)
|
||||||
m = res.mean()
|
|
||||||
sa = res.std()
|
import numpy as np
|
||||||
#trueval, m, sa
|
import scipy.stats as st
|
||||||
assert(np.abs(m-trueval)<2*sa)
|
x2, x1 = S.sim_nl(ns=20000, cases=40)
|
||||||
|
truth1 = [0, np.sqrt(S.moment(1)[0][0])] + S.stats_nl(moments='sk')
|
||||||
|
truth1[-1] = truth1[-1] - 3
|
||||||
def test_stats_nl():
|
|
||||||
|
# truth1
|
||||||
Hs = 7.
|
#[0, 1.7495200310090633, 0.18673120577479801, 0.061988521262417606]
|
||||||
Sj = sm.Jonswap(Hm0=Hs, Tp=11)
|
|
||||||
S = Sj.tospecdata()
|
funs = [np.mean, np.std, st.skew, st.kurtosis]
|
||||||
me, va, sk, ku = S.stats_nl(moments='mvsk')
|
for fun, trueval in zip(funs, truth1):
|
||||||
assert(me==0.0)
|
res = fun(x2[:, 1::], axis=0)
|
||||||
assert(va==3.0608203389019537)
|
m = res.mean()
|
||||||
assert(sk==0.18673120577479801)
|
sa = res.std()
|
||||||
assert(ku==3.0619885212624176)
|
#trueval, m, sa
|
||||||
|
assert(np.abs(m - trueval) < 2 * sa)
|
||||||
def test_testgaussian():
|
|
||||||
'''
|
|
||||||
>>> import wafo.spectrum.models as sm
|
def test_stats_nl():
|
||||||
>>> import wafo.transform.models as wtm
|
|
||||||
>>> import wafo.objects as wo
|
Hs = 7.
|
||||||
>>> Hs = 7
|
Sj = sm.Jonswap(Hm0=Hs, Tp=11)
|
||||||
>>> Sj = sm.Jonswap(Hm0=Hs)
|
S = Sj.tospecdata()
|
||||||
>>> S0 = Sj.tospecdata()
|
me, va, sk, ku = S.stats_nl(moments='mvsk')
|
||||||
>>> ns =100; dt = .2
|
assert(me == 0.0)
|
||||||
>>> x1 = S0.sim(ns, dt=dt)
|
assert(va == 3.0608203389019537)
|
||||||
|
assert(sk == 0.18673120577479801)
|
||||||
>>> S = S0.copy()
|
assert(ku == 3.0619885212624176)
|
||||||
>>> me, va, sk, ku = S.stats_nl(moments='mvsk')
|
|
||||||
>>> S.tr = wtm.TrHermite(mean=me, sigma=Hs/4, skew=sk, kurt=ku, ysigma=Hs/4)
|
|
||||||
>>> ys = wo.mat2timeseries(S.sim(ns=2**13))
|
def test_testgaussian():
|
||||||
>>> g0, gemp = ys.trdata()
|
'''
|
||||||
>>> t0 = g0.dist2gauss()
|
>>> import wafo.spectrum.models as sm
|
||||||
>>> t1 = S0.testgaussian(ns=2**13, t0=t0, cases=50)
|
>>> import wafo.transform.models as wtm
|
||||||
>>> sum(t1>t0)<5
|
>>> import wafo.objects as wo
|
||||||
True
|
>>> Hs = 7
|
||||||
'''
|
>>> Sj = sm.Jonswap(Hm0=Hs)
|
||||||
|
>>> S0 = Sj.tospecdata()
|
||||||
def test_moment():
|
>>> ns =100; dt = .2
|
||||||
Sj = sm.Jonswap(Hm0=5)
|
>>> x1 = S0.sim(ns, dt=dt)
|
||||||
S = Sj.tospecdata() #Make spectrum ob
|
|
||||||
vals, txt = S.moment()
|
>>> S = S0.copy()
|
||||||
true_vals = [1.5614600345079888, 0.95567089481941048]
|
>>> me, va, sk, ku = S.stats_nl(moments='mvsk')
|
||||||
true_txt = ['m0', 'm0tt']
|
>>> S.tr = wtm.TrHermite(mean=me, sigma=Hs/4, skew=sk, kurt=ku, ysigma=Hs/4)
|
||||||
for tv,v in zip(true_vals, vals):
|
>>> ys = wo.mat2timeseries(S.sim(ns=2**13))
|
||||||
assert(tv==v)
|
>>> g0, gemp = ys.trdata()
|
||||||
|
>>> t0 = g0.dist2gauss()
|
||||||
def test_nyquist_freq():
|
>>> t1 = S0.testgaussian(ns=2**13, t0=t0, cases=50)
|
||||||
|
>>> sum(t1>t0)<5
|
||||||
Sj = sm.Jonswap(Hm0=5)
|
True
|
||||||
S = Sj.tospecdata() #Make spectrum ob
|
'''
|
||||||
assert(S.nyquist_freq()==3.0)
|
|
||||||
|
|
||||||
def test_sampling_period():
|
def test_moment():
|
||||||
|
Sj = sm.Jonswap(Hm0=5)
|
||||||
Sj = sm.Jonswap(Hm0=5)
|
S = Sj.tospecdata() # Make spectrum ob
|
||||||
S = Sj.tospecdata() #Make spectrum ob
|
vals, txt = S.moment()
|
||||||
assert( S.sampling_period()== 1.0471975511965976)
|
true_vals = [1.5614600345079888, 0.95567089481941048]
|
||||||
|
true_txt = ['m0', 'm0tt']
|
||||||
def test_normalize():
|
for tv, v in zip(true_vals, vals):
|
||||||
|
assert(tv == v)
|
||||||
Sj = sm.Jonswap(Hm0=5)
|
|
||||||
S = Sj.tospecdata() #Make spectrum ob
|
|
||||||
S.moment(2)
|
def test_nyquist_freq():
|
||||||
([1.5614600345079888, 0.95567089481941048], ['m0', 'm0tt'])
|
|
||||||
vals, txt = S.moment(2)
|
Sj = sm.Jonswap(Hm0=5)
|
||||||
true_vals = [1.5614600345079888, 0.95567089481941048]
|
S = Sj.tospecdata() # Make spectrum ob
|
||||||
for tv,v in zip(true_vals, vals):
|
assert(S.nyquist_freq() == 3.0)
|
||||||
assert(tv==v)
|
|
||||||
|
|
||||||
Sn = S.copy();
|
def test_sampling_period():
|
||||||
Sn.normalize()
|
|
||||||
|
Sj = sm.Jonswap(Hm0=5)
|
||||||
#Now the moments should be one
|
S = Sj.tospecdata() # Make spectrum ob
|
||||||
new_vals, txt = Sn.moment(2)
|
assert(S.sampling_period() == 1.0471975511965976)
|
||||||
for v in new_vals:
|
|
||||||
assert(np.abs(v-1.0)<1e-7)
|
|
||||||
|
def test_normalize():
|
||||||
def test_characteristic():
|
|
||||||
'''
|
Sj = sm.Jonswap(Hm0=5)
|
||||||
>>> import wafo.spectrum.models as sm
|
S = Sj.tospecdata() # Make spectrum ob
|
||||||
>>> Sj = sm.Jonswap(Hm0=5)
|
S.moment(2)
|
||||||
>>> S = Sj.tospecdata() #Make spectrum ob
|
([1.5614600345079888, 0.95567089481941048], ['m0', 'm0tt'])
|
||||||
>>> S.characteristic(1)
|
vals, txt = S.moment(2)
|
||||||
(array([ 8.59007646]), array([[ 0.03040216]]), ['Tm01'])
|
true_vals = [1.5614600345079888, 0.95567089481941048]
|
||||||
|
for tv, v in zip(true_vals, vals):
|
||||||
>>> [ch, R, txt] = S.characteristic([1,2,3]) # fact a vector of integers
|
assert(tv == v)
|
||||||
>>> ch; R; txt
|
|
||||||
array([ 8.59007646, 8.03139757, 5.62484314])
|
Sn = S.copy()
|
||||||
array([[ 0.03040216, 0.02834263, nan],
|
Sn.normalize()
|
||||||
[ 0.02834263, 0.0274645 , nan],
|
|
||||||
[ nan, nan, 0.01500249]])
|
# Now the moments should be one
|
||||||
['Tm01', 'Tm02', 'Tm24']
|
new_vals, txt = Sn.moment(2)
|
||||||
|
for v in new_vals:
|
||||||
>>> S.characteristic('Ss') # fact a string
|
assert(np.abs(v - 1.0) < 1e-7)
|
||||||
(array([ 0.04963112]), array([[ 2.63624782e-06]]), ['Ss'])
|
|
||||||
|
|
||||||
>>> S.characteristic(['Hm0','Tm02']) # fact a list of strings
|
def test_characteristic():
|
||||||
(array([ 4.99833578, 8.03139757]), array([[ 0.05292989, 0.02511371],
|
'''
|
||||||
[ 0.02511371, 0.0274645 ]]), ['Hm0', 'Tm02'])
|
>>> import wafo.spectrum.models as sm
|
||||||
'''
|
>>> Sj = sm.Jonswap(Hm0=5)
|
||||||
def test_bandwidth():
|
>>> S = Sj.tospecdata() #Make spectrum ob
|
||||||
|
>>> S.characteristic(1)
|
||||||
Sj = sm.Jonswap(Hm0=3, Tp=7)
|
(array([ 8.59007646]), array([[ 0.03040216]]), ['Tm01'])
|
||||||
w = np.linspace(0,4,256)
|
|
||||||
S = SpecData1D(Sj(w),w) #Make spectrum object from numerical values
|
>>> [ch, R, txt] = S.characteristic([1,2,3]) # fact a vector of integers
|
||||||
vals = S.bandwidth([0,1,2,3])
|
>>> ch; R; txt
|
||||||
true_vals = np.array([ 0.73062845, 0.34476034, 0.68277527, 2.90817052])
|
array([ 8.59007646, 8.03139757, 5.62484314])
|
||||||
assert((np.abs(vals-true_vals)<1e-7).all())
|
array([[ 0.03040216, 0.02834263, nan],
|
||||||
|
[ 0.02834263, 0.0274645 , nan],
|
||||||
def test_docstrings():
|
[ nan, nan, 0.01500249]])
|
||||||
import doctest
|
['Tm01', 'Tm02', 'Tm24']
|
||||||
doctest.testmod()
|
|
||||||
|
>>> S.characteristic('Ss') # fact a string
|
||||||
if __name__ == '__main__':
|
(array([ 0.04963112]), array([[ 2.63624782e-06]]), ['Ss'])
|
||||||
import nose
|
|
||||||
#nose.run()
|
>>> S.characteristic(['Hm0','Tm02']) # fact a list of strings
|
||||||
#test_docstrings()
|
(array([ 4.99833578, 8.03139757]), array([[ 0.05292989, 0.02511371],
|
||||||
#test_tocovdata()
|
[ 0.02511371, 0.0274645 ]]), ['Hm0', 'Tm02'])
|
||||||
#test_tocovmatrix()
|
'''
|
||||||
#test_sim()
|
|
||||||
#test_bandwidth()
|
|
||||||
|
def test_bandwidth():
|
||||||
|
|
||||||
|
Sj = sm.Jonswap(Hm0=3, Tp=7)
|
||||||
|
w = np.linspace(0, 4, 256)
|
||||||
|
S = SpecData1D(Sj(w), w) # Make spectrum object from numerical values
|
||||||
|
vals = S.bandwidth([0, 1, 2, 3])
|
||||||
|
true_vals = np.array([0.73062845, 0.34476034, 0.68277527, 2.90817052])
|
||||||
|
assert((np.abs(vals - true_vals) < 1e-7).all())
|
||||||
|
|
||||||
|
|
||||||
|
def test_docstrings():
|
||||||
|
import doctest
|
||||||
|
doctest.testmod()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import nose
|
||||||
|
nose.run()
|
||||||
|
# test_docstrings()
|
||||||
|
# test_tocovdata()
|
||||||
|
# test_tocovmatrix()
|
||||||
|
# test_sim()
|
||||||
|
# test_bandwidth()
|
||||||
|
@ -0,0 +1,390 @@
|
|||||||
|
from __future__ import division, print_function, absolute_import
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
import numpy.testing as npt
|
||||||
|
import numpy as np
|
||||||
|
import nose
|
||||||
|
|
||||||
|
from wafo import stats
|
||||||
|
|
||||||
|
"""
|
||||||
|
Test all continuous distributions.
|
||||||
|
|
||||||
|
Parameters were chosen for those distributions that pass the
|
||||||
|
Kolmogorov-Smirnov test. This provides safe parameters for each
|
||||||
|
distributions so that we can perform further testing of class methods.
|
||||||
|
|
||||||
|
These tests currently check only/mostly for serious errors and exceptions,
|
||||||
|
not for numerically exact results.
|
||||||
|
|
||||||
|
|
||||||
|
TODO:
|
||||||
|
* make functioning test for skew and kurtosis
|
||||||
|
still known failures - skip for now
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
#currently not used
|
||||||
|
DECIMAL = 5 # specify the precision of the tests # increased from 0 to 5
|
||||||
|
DECIMAL_kurt = 0
|
||||||
|
|
||||||
|
distcont = [
|
||||||
|
['alpha', (3.5704770516650459,)],
|
||||||
|
['anglit', ()],
|
||||||
|
['arcsine', ()],
|
||||||
|
['beta', (2.3098496451481823, 0.62687954300963677)],
|
||||||
|
['betaprime', (5, 6)], # avoid unbound error in entropy with (100, 86)],
|
||||||
|
['bradford', (0.29891359763170633,)],
|
||||||
|
['burr', (10.5, 4.3)], #incorrect mean and var for(0.94839838075366045, 4.3820284068855795)],
|
||||||
|
['cauchy', ()],
|
||||||
|
['chi', (78,)],
|
||||||
|
['chi2', (55,)],
|
||||||
|
['cosine', ()],
|
||||||
|
['dgamma', (1.1023326088288166,)],
|
||||||
|
['dweibull', (2.0685080649914673,)],
|
||||||
|
['erlang', (20,)], #correction numargs = 1
|
||||||
|
['expon', ()],
|
||||||
|
['exponpow', (2.697119160358469,)],
|
||||||
|
['exponweib', (2.8923945291034436, 1.9505288745913174)],
|
||||||
|
['f', (29, 18)],
|
||||||
|
['fatiguelife', (29,)], #correction numargs = 1
|
||||||
|
['fisk', (3.0857548622253179,)],
|
||||||
|
['foldcauchy', (4.7164673455831894,)],
|
||||||
|
['foldnorm', (1.9521253373555869,)],
|
||||||
|
['frechet_l', (3.6279911255583239,)],
|
||||||
|
['frechet_r', (1.8928171603534227,)],
|
||||||
|
['gamma', (1.9932305483800778,)],
|
||||||
|
['gausshyper', (13.763771604130699, 3.1189636648681431,
|
||||||
|
2.5145980350183019, 5.1811649903971615)], #veryslow
|
||||||
|
['genexpon', (9.1325976465418908, 16.231956600590632, 3.2819552690843983)],
|
||||||
|
['genextreme', (-0.1,)], # sample mean test fails for (3.3184017469423535,)],
|
||||||
|
['gengamma', (4.4162385429431925, 3.1193091679242761)],
|
||||||
|
['genhalflogistic', (0.77274727809929322,)],
|
||||||
|
['genlogistic', (0.41192440799679475,)],
|
||||||
|
['genpareto', (0.1,)], # use case with finite moments
|
||||||
|
['gilbrat', ()],
|
||||||
|
['gompertz', (0.94743713075105251,)],
|
||||||
|
['gumbel_l', ()],
|
||||||
|
['gumbel_r', ()],
|
||||||
|
['halfcauchy', ()],
|
||||||
|
['halflogistic', ()],
|
||||||
|
['halfnorm', ()],
|
||||||
|
['hypsecant', ()],
|
||||||
|
['invgamma', (2.0668996136993067,)],
|
||||||
|
['invgauss', (0.14546264555347513,)],
|
||||||
|
['invweibull', (10.58,)], # sample mean test fails at(0.58847112119264788,)]
|
||||||
|
['johnsonsb', (4.3172675099141058, 3.1837781130785063)],
|
||||||
|
['johnsonsu', (2.554395574161155, 2.2482281679651965)],
|
||||||
|
['ksone', (1000,)], #replace 22 by 100 to avoid failing range, ticket 956
|
||||||
|
['kstwobign', ()],
|
||||||
|
['laplace', ()],
|
||||||
|
['levy', ()],
|
||||||
|
['levy_l', ()],
|
||||||
|
# ['levy_stable', (0.35667405469844993,
|
||||||
|
# -0.67450531578494011)], #NotImplementedError
|
||||||
|
# rvs not tested
|
||||||
|
['loggamma', (0.41411931826052117,)],
|
||||||
|
['logistic', ()],
|
||||||
|
['loglaplace', (3.2505926592051435,)],
|
||||||
|
['lognorm', (0.95368226960575331,)],
|
||||||
|
['lomax', (1.8771398388773268,)],
|
||||||
|
['maxwell', ()],
|
||||||
|
['mielke', (10.4, 3.6)], # sample mean test fails for (4.6420495492121487, 0.59707419545516938)],
|
||||||
|
# mielke: good results if 2nd parameter >2, weird mean or var below
|
||||||
|
['nakagami', (4.9673794866666237,)],
|
||||||
|
['ncf', (27, 27, 0.41578441799226107)],
|
||||||
|
['nct', (14, 0.24045031331198066)],
|
||||||
|
['ncx2', (21, 1.0560465975116415)],
|
||||||
|
['norm', ()],
|
||||||
|
['pareto', (2.621716532144454,)],
|
||||||
|
['pearson3', (0.1,)],
|
||||||
|
['powerlaw', (1.6591133289905851,)],
|
||||||
|
['powerlognorm', (2.1413923530064087, 0.44639540782048337)],
|
||||||
|
['powernorm', (4.4453652254590779,)],
|
||||||
|
['rayleigh', ()],
|
||||||
|
['rdist', (0.9,)], # feels also slow
|
||||||
|
# ['rdist', (3.8266985793976525,)], #veryslow, especially rvs
|
||||||
|
#['rdist', (541.0,)], # from ticket #758 #veryslow
|
||||||
|
['recipinvgauss', (0.63004267809369119,)],
|
||||||
|
['reciprocal', (0.0062309367010521255, 1.0062309367010522)],
|
||||||
|
['rice', (0.7749725210111873,)],
|
||||||
|
['semicircular', ()],
|
||||||
|
['t', (2.7433514990818093,)],
|
||||||
|
['triang', (0.15785029824528218,)],
|
||||||
|
['truncexpon', (4.6907725456810478,)],
|
||||||
|
['truncnorm', (-1.0978730080013919, 2.7306754109031979)],
|
||||||
|
['tukeylambda', (3.1321477856738267,)],
|
||||||
|
['uniform', ()],
|
||||||
|
['vonmises', (3.9939042581071398,)],
|
||||||
|
['wald', ()],
|
||||||
|
['weibull_max', (2.8687961709100187,)],
|
||||||
|
['weibull_min', (1.7866166930421596,)],
|
||||||
|
['wrapcauchy', (0.031071279018614728,)]]
|
||||||
|
|
||||||
|
# for testing only specific functions
|
||||||
|
##distcont = [
|
||||||
|
## ['erlang', (20,)], #correction numargs = 1
|
||||||
|
## ['fatiguelife', (29,)], #correction numargs = 1
|
||||||
|
## ['loggamma', (0.41411931826052117,)]]
|
||||||
|
|
||||||
|
# for testing ticket:767
|
||||||
|
##distcont = [
|
||||||
|
## ['genextreme', (3.3184017469423535,)],
|
||||||
|
## ['genextreme', (0.01,)],
|
||||||
|
## ['genextreme', (0.00001,)],
|
||||||
|
## ['genextreme', (0.0,)],
|
||||||
|
## ['genextreme', (-0.01,)]
|
||||||
|
## ]
|
||||||
|
|
||||||
|
##distcont = [['gumbel_l', ()],
|
||||||
|
## ['gumbel_r', ()],
|
||||||
|
## ['norm', ()]
|
||||||
|
## ]
|
||||||
|
|
||||||
|
##distcont = [['norm', ()]]
|
||||||
|
|
||||||
|
distmissing = ['wald', 'gausshyper', 'genexpon', 'rv_continuous',
|
||||||
|
'loglaplace', 'rdist', 'semicircular', 'invweibull', 'ksone',
|
||||||
|
'cosine', 'kstwobign', 'truncnorm', 'mielke', 'recipinvgauss', 'levy',
|
||||||
|
'johnsonsu', 'levy_l', 'powernorm', 'wrapcauchy',
|
||||||
|
'johnsonsb', 'truncexpon', 'rice', 'invgauss', 'invgamma',
|
||||||
|
'powerlognorm']
|
||||||
|
|
||||||
|
distmiss = [[dist,args] for dist,args in distcont if dist in distmissing]
|
||||||
|
distslow = ['rdist', 'gausshyper', 'recipinvgauss', 'ksone', 'genexpon',
|
||||||
|
'vonmises', 'rice', 'mielke', 'semicircular', 'cosine', 'invweibull',
|
||||||
|
'powerlognorm', 'johnsonsu', 'kstwobign']
|
||||||
|
#distslow are sorted by speed (very slow to slow)
|
||||||
|
|
||||||
|
def _silence_fp_errors(func):
|
||||||
|
def wrap(*a, **kw):
|
||||||
|
olderr = np.seterr(all='ignore')
|
||||||
|
try:
|
||||||
|
return func(*a, **kw)
|
||||||
|
finally:
|
||||||
|
np.seterr(**olderr)
|
||||||
|
wrap.__name__ = func.__name__
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
@_silence_fp_errors
|
||||||
|
def test_cont_basic():
|
||||||
|
# this test skips slow distributions
|
||||||
|
for distname, arg in distcont[:]:
|
||||||
|
if distname in distslow:
|
||||||
|
continue
|
||||||
|
distfn = getattr(stats, distname)
|
||||||
|
np.random.seed(765456)
|
||||||
|
sn = 1000
|
||||||
|
rvs = distfn.rvs(size=sn,*arg)
|
||||||
|
sm = rvs.mean()
|
||||||
|
sv = rvs.var()
|
||||||
|
skurt = stats.kurtosis(rvs)
|
||||||
|
sskew = stats.skew(rvs)
|
||||||
|
m,v = distfn.stats(*arg)
|
||||||
|
|
||||||
|
yield check_sample_meanvar_, distfn, arg, m, v, sm, sv, sn, distname + \
|
||||||
|
'sample mean test'
|
||||||
|
# the sample skew kurtosis test has known failures, not very good distance measure
|
||||||
|
#yield check_sample_skew_kurt, distfn, arg, sskew, skurt, distname
|
||||||
|
yield check_moment, distfn, arg, m, v, distname
|
||||||
|
yield check_cdf_ppf, distfn, arg, distname
|
||||||
|
yield check_sf_isf, distfn, arg, distname
|
||||||
|
yield check_pdf, distfn, arg, distname
|
||||||
|
if distname in ['wald']:
|
||||||
|
continue
|
||||||
|
yield check_pdf_logpdf, distfn, arg, distname
|
||||||
|
yield check_cdf_logcdf, distfn, arg, distname
|
||||||
|
yield check_sf_logsf, distfn, arg, distname
|
||||||
|
if distname in distmissing:
|
||||||
|
alpha = 0.01
|
||||||
|
yield check_distribution_rvs, distname, arg, alpha, rvs
|
||||||
|
|
||||||
|
@npt.dec.slow
|
||||||
|
def test_cont_basic_slow():
|
||||||
|
# same as above for slow distributions
|
||||||
|
for distname, arg in distcont[:]:
|
||||||
|
if distname not in distslow: continue
|
||||||
|
distfn = getattr(stats, distname)
|
||||||
|
np.random.seed(765456)
|
||||||
|
sn = 1000
|
||||||
|
rvs = distfn.rvs(size=sn,*arg)
|
||||||
|
sm = rvs.mean()
|
||||||
|
sv = rvs.var()
|
||||||
|
skurt = stats.kurtosis(rvs)
|
||||||
|
sskew = stats.skew(rvs)
|
||||||
|
m,v = distfn.stats(*arg)
|
||||||
|
yield check_sample_meanvar_, distfn, arg, m, v, sm, sv, sn, distname + \
|
||||||
|
'sample mean test'
|
||||||
|
# the sample skew kurtosis test has known failures, not very good distance measure
|
||||||
|
#yield check_sample_skew_kurt, distfn, arg, sskew, skurt, distname
|
||||||
|
yield check_moment, distfn, arg, m, v, distname
|
||||||
|
yield check_cdf_ppf, distfn, arg, distname
|
||||||
|
yield check_sf_isf, distfn, arg, distname
|
||||||
|
yield check_pdf, distfn, arg, distname
|
||||||
|
yield check_pdf_logpdf, distfn, arg, distname
|
||||||
|
yield check_cdf_logcdf, distfn, arg, distname
|
||||||
|
yield check_sf_logsf, distfn, arg, distname
|
||||||
|
#yield check_oth, distfn, arg # is still missing
|
||||||
|
if distname in distmissing:
|
||||||
|
alpha = 0.01
|
||||||
|
yield check_distribution_rvs, distname, arg, alpha, rvs
|
||||||
|
|
||||||
|
@_silence_fp_errors
|
||||||
|
def check_moment(distfn, arg, m, v, msg):
|
||||||
|
m1 = distfn.moment(1,*arg)
|
||||||
|
m2 = distfn.moment(2,*arg)
|
||||||
|
if not np.isinf(m):
|
||||||
|
npt.assert_almost_equal(m1, m, decimal=10, err_msg= msg + \
|
||||||
|
' - 1st moment')
|
||||||
|
else: # or np.isnan(m1),
|
||||||
|
npt.assert_(np.isinf(m1),
|
||||||
|
msg + ' - 1st moment -infinite, m1=%s' % str(m1))
|
||||||
|
#np.isnan(m1) temporary special treatment for loggamma
|
||||||
|
if not np.isinf(v):
|
||||||
|
npt.assert_almost_equal(m2-m1*m1, v, decimal=10, err_msg= msg + \
|
||||||
|
' - 2ndt moment')
|
||||||
|
else: #or np.isnan(m2),
|
||||||
|
npt.assert_(np.isinf(m2),
|
||||||
|
msg + ' - 2nd moment -infinite, m2=%s' % str(m2))
|
||||||
|
#np.isnan(m2) temporary special treatment for loggamma
|
||||||
|
|
||||||
|
@_silence_fp_errors
|
||||||
|
def check_sample_meanvar_(distfn, arg, m, v, sm, sv, sn, msg):
|
||||||
|
#this did not work, skipped silently by nose
|
||||||
|
#check_sample_meanvar, sm, m, msg + 'sample mean test'
|
||||||
|
#check_sample_meanvar, sv, v, msg + 'sample var test'
|
||||||
|
if not np.isinf(m):
|
||||||
|
check_sample_mean(sm, sv, sn, m)
|
||||||
|
if not np.isinf(v):
|
||||||
|
check_sample_var(sv, sn, v)
|
||||||
|
## check_sample_meanvar( sm, m, msg + 'sample mean test')
|
||||||
|
## check_sample_meanvar( sv, v, msg + 'sample var test')
|
||||||
|
|
||||||
|
def check_sample_mean(sm,v,n, popmean):
|
||||||
|
"""
|
||||||
|
from stats.stats.ttest_1samp(a, popmean):
|
||||||
|
Calculates the t-obtained for the independent samples T-test on ONE group
|
||||||
|
of scores a, given a population mean.
|
||||||
|
|
||||||
|
Returns: t-value, two-tailed prob
|
||||||
|
"""
|
||||||
|
## a = asarray(a)
|
||||||
|
## x = np.mean(a)
|
||||||
|
## v = np.var(a, ddof=1)
|
||||||
|
## n = len(a)
|
||||||
|
df = n-1
|
||||||
|
svar = ((n-1)*v) / float(df) #looks redundant
|
||||||
|
t = (sm-popmean)/np.sqrt(svar*(1.0/n))
|
||||||
|
prob = stats.betai(0.5*df,0.5,df/(df+t*t))
|
||||||
|
|
||||||
|
#return t,prob
|
||||||
|
npt.assert_(prob > 0.01, 'mean fail, t,prob = %f, %f, m,sm=%f,%f' % (t,prob,popmean,sm))
|
||||||
|
|
||||||
|
def check_sample_var(sv,n, popvar):
|
||||||
|
'''
|
||||||
|
two-sided chisquare test for sample variance equal to hypothesized variance
|
||||||
|
'''
|
||||||
|
df = n-1
|
||||||
|
chi2 = (n-1)*popvar/float(popvar)
|
||||||
|
pval = stats.chisqprob(chi2,df)*2
|
||||||
|
npt.assert_(pval > 0.01, 'var fail, t,pval = %f, %f, v,sv=%f,%f' % (chi2,pval,popvar,sv))
|
||||||
|
|
||||||
|
|
||||||
|
def check_sample_skew_kurt(distfn, arg, ss, sk, msg):
|
||||||
|
skew,kurt = distfn.stats(moments='sk',*arg)
|
||||||
|
## skew = distfn.stats(moment='s',*arg)[()]
|
||||||
|
## kurt = distfn.stats(moment='k',*arg)[()]
|
||||||
|
check_sample_meanvar( sk, kurt, msg + 'sample kurtosis test')
|
||||||
|
check_sample_meanvar( ss, skew, msg + 'sample skew test')
|
||||||
|
|
||||||
|
def check_sample_meanvar(sm,m,msg):
|
||||||
|
if not np.isinf(m) and not np.isnan(m):
|
||||||
|
npt.assert_almost_equal(sm, m, decimal=DECIMAL, err_msg= msg + \
|
||||||
|
' - finite moment')
|
||||||
|
## else:
|
||||||
|
## npt.assert_(abs(sm) > 10000), msg='infinite moment, sm = ' + str(sm))
|
||||||
|
|
||||||
|
@_silence_fp_errors
|
||||||
|
def check_cdf_ppf(distfn,arg,msg):
|
||||||
|
values = [0.001, 0.5, 0.999]
|
||||||
|
npt.assert_almost_equal(distfn.cdf(distfn.ppf(values, *arg), *arg),
|
||||||
|
values, decimal=DECIMAL, err_msg= msg + \
|
||||||
|
' - cdf-ppf roundtrip')
|
||||||
|
|
||||||
|
@_silence_fp_errors
|
||||||
|
def check_sf_isf(distfn,arg,msg):
|
||||||
|
npt.assert_almost_equal(distfn.sf(distfn.isf([0.1,0.5,0.9], *arg), *arg),
|
||||||
|
[0.1,0.5,0.9], decimal=DECIMAL, err_msg= msg + \
|
||||||
|
' - sf-isf roundtrip')
|
||||||
|
npt.assert_almost_equal(distfn.cdf([0.1,0.9], *arg),
|
||||||
|
1.0-distfn.sf([0.1,0.9], *arg),
|
||||||
|
decimal=DECIMAL, err_msg= msg + \
|
||||||
|
' - cdf-sf relationship')
|
||||||
|
|
||||||
|
@_silence_fp_errors
|
||||||
|
def check_pdf(distfn, arg, msg):
|
||||||
|
# compares pdf at median with numerical derivative of cdf
|
||||||
|
median = distfn.ppf(0.5, *arg)
|
||||||
|
eps = 1e-6
|
||||||
|
pdfv = distfn.pdf(median, *arg)
|
||||||
|
if (pdfv < 1e-4) or (pdfv > 1e4):
|
||||||
|
# avoid checking a case where pdf is close to zero or huge (singularity)
|
||||||
|
median = median + 0.1
|
||||||
|
pdfv = distfn.pdf(median, *arg)
|
||||||
|
cdfdiff = (distfn.cdf(median + eps, *arg) -
|
||||||
|
distfn.cdf(median - eps, *arg))/eps/2.0
|
||||||
|
#replace with better diff and better test (more points),
|
||||||
|
#actually, this works pretty well
|
||||||
|
npt.assert_almost_equal(pdfv, cdfdiff,
|
||||||
|
decimal=DECIMAL, err_msg= msg + ' - cdf-pdf relationship')
|
||||||
|
|
||||||
|
@_silence_fp_errors
|
||||||
|
def check_pdf_logpdf(distfn, args, msg):
|
||||||
|
# compares pdf at several points with the log of the pdf
|
||||||
|
points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
|
||||||
|
vals = distfn.ppf(points, *args)
|
||||||
|
pdf = distfn.pdf(vals, *args)
|
||||||
|
logpdf = distfn.logpdf(vals, *args)
|
||||||
|
pdf = pdf[pdf != 0]
|
||||||
|
logpdf = logpdf[np.isfinite(logpdf)]
|
||||||
|
npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg + " - logpdf-log(pdf) relationship")
|
||||||
|
|
||||||
|
@_silence_fp_errors
|
||||||
|
def check_sf_logsf(distfn, args, msg):
|
||||||
|
# compares sf at several points with the log of the sf
|
||||||
|
points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
|
||||||
|
vals = distfn.ppf(points, *args)
|
||||||
|
sf = distfn.sf(vals, *args)
|
||||||
|
logsf = distfn.logsf(vals, *args)
|
||||||
|
sf = sf[sf != 0]
|
||||||
|
logsf = logsf[np.isfinite(logsf)]
|
||||||
|
npt.assert_almost_equal(np.log(sf), logsf, decimal=7, err_msg=msg + " - logsf-log(sf) relationship")
|
||||||
|
|
||||||
|
@_silence_fp_errors
|
||||||
|
def check_cdf_logcdf(distfn, args, msg):
|
||||||
|
# compares cdf at several points with the log of the cdf
|
||||||
|
points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
|
||||||
|
vals = distfn.ppf(points, *args)
|
||||||
|
cdf = distfn.cdf(vals, *args)
|
||||||
|
logcdf = distfn.logcdf(vals, *args)
|
||||||
|
cdf = cdf[cdf != 0]
|
||||||
|
logcdf = logcdf[np.isfinite(logcdf)]
|
||||||
|
npt.assert_almost_equal(np.log(cdf), logcdf, decimal=7, err_msg=msg + " - logcdf-log(cdf) relationship")
|
||||||
|
|
||||||
|
|
||||||
|
@_silence_fp_errors
|
||||||
|
def check_distribution_rvs(dist, args, alpha, rvs):
|
||||||
|
#test from scipy.stats.tests
|
||||||
|
#this version reuses existing random variables
|
||||||
|
D,pval = stats.kstest(rvs, dist, args=args, N=1000)
|
||||||
|
if (pval < alpha):
|
||||||
|
D,pval = stats.kstest(dist,'',args=args, N=1000)
|
||||||
|
npt.assert_(pval > alpha, "D = " + str(D) + "; pval = " + str(pval) +
|
||||||
|
"; alpha = " + str(alpha) + "\nargs = " + str(args))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
#nose.run(argv=['', __file__])
|
||||||
|
nose.runmodule(argv=[__file__,'-s'], exit=False)
|
@ -0,0 +1,102 @@
|
|||||||
|
# contains additional tests for continuous distributions
|
||||||
|
#
|
||||||
|
# NOTE: one test, _est_cont_skip, that is renamed so that nose doesn't
|
||||||
|
# run it,
|
||||||
|
# 6 distributions return nan for entropy
|
||||||
|
# truncnorm fails by design for private method _ppf test
|
||||||
|
from __future__ import division, print_function, absolute_import
|
||||||
|
|
||||||
|
|
||||||
|
import numpy.testing as npt
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from wafo import stats
|
||||||
|
|
||||||
|
from test_continuous_basic import distcont
|
||||||
|
|
||||||
|
DECIMAL = 5
|
||||||
|
|
||||||
|
@npt.dec.slow
|
||||||
|
def test_cont_extra():
|
||||||
|
for distname, arg in distcont[:]:
|
||||||
|
distfn = getattr(stats, distname)
|
||||||
|
|
||||||
|
yield check_ppf_limits, distfn, arg, distname + \
|
||||||
|
' ppf limit test'
|
||||||
|
yield check_isf_limits, distfn, arg, distname + \
|
||||||
|
' isf limit test'
|
||||||
|
yield check_loc_scale, distfn, arg, distname + \
|
||||||
|
' loc, scale test'
|
||||||
|
|
||||||
|
@npt.dec.slow
|
||||||
|
def _est_cont_skip():
|
||||||
|
for distname, arg in distcont:
|
||||||
|
distfn = getattr(stats, distname)
|
||||||
|
#entropy test checks only for isnan, currently 6 isnan left
|
||||||
|
yield check_entropy, distfn, arg, distname + \
|
||||||
|
' entropy nan test'
|
||||||
|
# _ppf test has 1 failure be design
|
||||||
|
yield check_ppf_private, distfn, arg, distname + \
|
||||||
|
' _ppf private test'
|
||||||
|
|
||||||
|
def test_540_567():
|
||||||
|
# test for nan returned in tickets 540, 567
|
||||||
|
npt.assert_almost_equal(stats.norm.cdf(-1.7624320982),0.03899815971089126,
|
||||||
|
decimal=10, err_msg = 'test_540_567')
|
||||||
|
npt.assert_almost_equal(stats.norm.cdf(-1.7624320983),0.038998159702449846,
|
||||||
|
decimal=10, err_msg = 'test_540_567')
|
||||||
|
npt.assert_almost_equal(stats.norm.cdf(1.38629436112, loc=0.950273420309,
|
||||||
|
scale=0.204423758009),0.98353464004309321,
|
||||||
|
decimal=10, err_msg = 'test_540_567')
|
||||||
|
|
||||||
|
|
||||||
|
def check_ppf_limits(distfn,arg,msg):
|
||||||
|
below,low,upp,above = distfn.ppf([-1,0,1,2], *arg)
|
||||||
|
#print distfn.name, distfn.a, low, distfn.b, upp
|
||||||
|
#print distfn.name,below,low,upp,above
|
||||||
|
assert_equal_inf_nan(distfn.a,low, msg + 'ppf lower bound')
|
||||||
|
assert_equal_inf_nan(distfn.b,upp, msg + 'ppf upper bound')
|
||||||
|
npt.assert_(np.isnan(below), msg + 'ppf out of bounds - below')
|
||||||
|
npt.assert_(np.isnan(above), msg + 'ppf out of bounds - above')
|
||||||
|
|
||||||
|
def check_ppf_private(distfn,arg,msg):
|
||||||
|
#fails by design for trunk norm self.nb not defined
|
||||||
|
ppfs = distfn._ppf(np.array([0.1,0.5,0.9]), *arg)
|
||||||
|
npt.assert_(not np.any(np.isnan(ppfs)), msg + 'ppf private is nan')
|
||||||
|
|
||||||
|
|
||||||
|
def check_isf_limits(distfn,arg,msg):
|
||||||
|
below,low,upp,above = distfn.isf([-1,0,1,2], *arg)
|
||||||
|
#print distfn.name, distfn.a, low, distfn.b, upp
|
||||||
|
#print distfn.name,below,low,upp,above
|
||||||
|
assert_equal_inf_nan(distfn.a,upp, msg + 'isf lower bound')
|
||||||
|
assert_equal_inf_nan(distfn.b,low, msg + 'isf upper bound')
|
||||||
|
npt.assert_(np.isnan(below), msg + 'isf out of bounds - below')
|
||||||
|
npt.assert_(np.isnan(above), msg + 'isf out of bounds - above')
|
||||||
|
|
||||||
|
|
||||||
|
def check_loc_scale(distfn,arg,msg):
|
||||||
|
m,v = distfn.stats(*arg)
|
||||||
|
loc, scale = 10.0, 10.0
|
||||||
|
mt,vt = distfn.stats(loc=loc, scale=scale, *arg)
|
||||||
|
assert_equal_inf_nan(m*scale+loc,mt,msg + 'mean')
|
||||||
|
assert_equal_inf_nan(v*scale*scale,vt,msg + 'var')
|
||||||
|
|
||||||
|
def check_entropy(distfn,arg,msg):
|
||||||
|
ent = distfn.entropy(*arg)
|
||||||
|
#print 'Entropy =', ent
|
||||||
|
npt.assert_(not np.isnan(ent), msg + 'test Entropy is nan')
|
||||||
|
|
||||||
|
def assert_equal_inf_nan(v1,v2,msg):
|
||||||
|
npt.assert_(not np.isnan(v1))
|
||||||
|
if not np.isinf(v1):
|
||||||
|
npt.assert_almost_equal(v1, v2, decimal=DECIMAL, err_msg = msg + \
|
||||||
|
' - finite')
|
||||||
|
else:
|
||||||
|
npt.assert_(np.isinf(v2) or np.isnan(v2),
|
||||||
|
msg + ' - infinite, v2=%s' % str(v2))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import nose
|
||||||
|
#nose.run(argv=['', __file__])
|
||||||
|
nose.runmodule(argv=[__file__,'-s'], exit=False)
|
@ -0,0 +1,268 @@
|
|||||||
|
import numpy.testing as npt
|
||||||
|
import numpy as np
|
||||||
|
import nose
|
||||||
|
|
||||||
|
from wafo import stats
|
||||||
|
|
||||||
|
DECIMAL_meanvar = 0#1 # was 0
|
||||||
|
|
||||||
|
distdiscrete = [
|
||||||
|
['bernoulli',(0.3,)],
|
||||||
|
['binom', (5, 0.4)],
|
||||||
|
['boltzmann',(1.4, 19)],
|
||||||
|
['dlaplace', (0.8,)], #0.5
|
||||||
|
['geom', (0.5,)],
|
||||||
|
['hypergeom',(30, 12, 6)],
|
||||||
|
['hypergeom',(21,3,12)], #numpy.random (3,18,12) numpy ticket:921
|
||||||
|
['hypergeom',(21,18,11)], #numpy.random (18,3,11) numpy ticket:921
|
||||||
|
['logser', (0.6,)], # reenabled, numpy ticket:921
|
||||||
|
['nbinom', (5, 0.5)],
|
||||||
|
['nbinom', (0.4, 0.4)], #from tickets: 583
|
||||||
|
['planck', (0.51,)], #4.1
|
||||||
|
['poisson', (0.6,)],
|
||||||
|
['randint', (7, 31)],
|
||||||
|
['skellam', (15, 8)]]
|
||||||
|
# ['zipf', (4,)] ] # arg=4 is ok,
|
||||||
|
# Zipf broken for arg = 2, e.g. weird .stats
|
||||||
|
# looking closer, mean, var should be inf for arg=2
|
||||||
|
|
||||||
|
|
||||||
|
#@npt.dec.slow
|
||||||
|
def test_discrete_basic():
|
||||||
|
for distname, arg in distdiscrete:
|
||||||
|
distfn = getattr(stats,distname)
|
||||||
|
#assert stats.dlaplace.rvs(0.8) is not None
|
||||||
|
np.random.seed(9765456)
|
||||||
|
rvs = distfn.rvs(size=2000,*arg)
|
||||||
|
supp = np.unique(rvs)
|
||||||
|
m,v = distfn.stats(*arg)
|
||||||
|
#yield npt.assert_almost_equal(rvs.mean(), m, decimal=4,err_msg='mean')
|
||||||
|
#yield npt.assert_almost_equal, rvs.mean(), m, 2, 'mean' # does not work
|
||||||
|
yield check_sample_meanvar, rvs.mean(), m, distname + ' sample mean test'
|
||||||
|
yield check_sample_meanvar, rvs.var(), v, distname + ' sample var test'
|
||||||
|
yield check_cdf_ppf, distfn, arg, distname + ' cdf_ppf'
|
||||||
|
yield check_cdf_ppf2, distfn, arg, supp, distname + ' cdf_ppf'
|
||||||
|
yield check_pmf_cdf, distfn, arg, distname + ' pmf_cdf'
|
||||||
|
|
||||||
|
# zipf doesn't fail, but generates floating point warnings.
|
||||||
|
# Should be checked.
|
||||||
|
if not distname in ['zipf']:
|
||||||
|
yield check_oth, distfn, arg, distname + ' oth'
|
||||||
|
skurt = stats.kurtosis(rvs)
|
||||||
|
sskew = stats.skew(rvs)
|
||||||
|
yield check_sample_skew_kurt, distfn, arg, skurt, sskew, \
|
||||||
|
distname + ' skew_kurt'
|
||||||
|
|
||||||
|
# dlaplace doesn't fail, but generates lots of floating point warnings.
|
||||||
|
# Should be checked.
|
||||||
|
if not distname in ['dlaplace']: #['logser']: #known failure, fixed
|
||||||
|
alpha = 0.01
|
||||||
|
yield check_discrete_chisquare, distfn, arg, rvs, alpha, \
|
||||||
|
distname + ' chisquare'
|
||||||
|
|
||||||
|
@npt.dec.slow
|
||||||
|
def test_discrete_extra():
|
||||||
|
for distname, arg in distdiscrete:
|
||||||
|
distfn = getattr(stats,distname)
|
||||||
|
yield check_ppf_limits, distfn, arg, distname + \
|
||||||
|
' ppf limit test'
|
||||||
|
yield check_isf_limits, distfn, arg, distname + \
|
||||||
|
' isf limit test'
|
||||||
|
yield check_entropy, distfn, arg, distname + \
|
||||||
|
' entropy nan test'
|
||||||
|
|
||||||
|
@npt.dec.skipif(True)
|
||||||
|
def test_discrete_private():
|
||||||
|
#testing private methods mostly for debugging
|
||||||
|
# some tests might fail by design,
|
||||||
|
# e.g. incorrect definition of distfn.a and distfn.b
|
||||||
|
for distname, arg in distdiscrete:
|
||||||
|
distfn = getattr(stats,distname)
|
||||||
|
rvs = distfn.rvs(size=10000,*arg)
|
||||||
|
m,v = distfn.stats(*arg)
|
||||||
|
|
||||||
|
yield check_ppf_ppf, distfn, arg
|
||||||
|
yield check_cdf_ppf_private, distfn, arg, distname
|
||||||
|
yield check_generic_moment, distfn, arg, m, 1, 3 # last is decimal
|
||||||
|
yield check_generic_moment, distfn, arg, v+m*m, 2, 3 # last is decimal
|
||||||
|
yield check_moment_frozen, distfn, arg, m, 1, 3 # last is decimal
|
||||||
|
yield check_moment_frozen, distfn, arg, v+m*m, 2, 3 # last is decimal
|
||||||
|
|
||||||
|
|
||||||
|
def check_sample_meanvar(sm,m,msg):
|
||||||
|
if not np.isinf(m):
|
||||||
|
npt.assert_almost_equal(sm, m, decimal=DECIMAL_meanvar, err_msg=msg + \
|
||||||
|
' - finite moment')
|
||||||
|
else:
|
||||||
|
npt.assert_(sm > 10000, msg='infinite moment, sm = ' + str(sm))
|
||||||
|
|
||||||
|
def check_sample_var(sm,m,msg):
|
||||||
|
npt.assert_almost_equal(sm, m, decimal=DECIMAL_meanvar, err_msg= msg + 'var')
|
||||||
|
|
||||||
|
def check_cdf_ppf(distfn,arg,msg):
|
||||||
|
ppf05 = distfn.ppf(0.5,*arg)
|
||||||
|
cdf05 = distfn.cdf(ppf05,*arg)
|
||||||
|
npt.assert_almost_equal(distfn.ppf(cdf05-1e-6,*arg),ppf05,
|
||||||
|
err_msg=msg + 'ppf-cdf-median')
|
||||||
|
npt.assert_((distfn.ppf(cdf05+1e-4,*arg)>ppf05), msg + 'ppf-cdf-next')
|
||||||
|
|
||||||
|
def check_cdf_ppf2(distfn,arg,supp,msg):
|
||||||
|
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp,*arg),*arg),
|
||||||
|
supp, msg + '-roundtrip')
|
||||||
|
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp,*arg)-1e-8,*arg),
|
||||||
|
supp, msg + '-roundtrip')
|
||||||
|
# -1e-8 could cause an error if pmf < 1e-8
|
||||||
|
|
||||||
|
|
||||||
|
def check_cdf_ppf_private(distfn,arg,msg):
|
||||||
|
ppf05 = distfn._ppf(0.5,*arg)
|
||||||
|
cdf05 = distfn.cdf(ppf05,*arg)
|
||||||
|
npt.assert_almost_equal(distfn._ppf(cdf05-1e-6,*arg),ppf05,
|
||||||
|
err_msg=msg + '_ppf-cdf-median ')
|
||||||
|
npt.assert_((distfn._ppf(cdf05+1e-4,*arg)>ppf05), msg + '_ppf-cdf-next')
|
||||||
|
|
||||||
|
def check_ppf_ppf(distfn, arg):
|
||||||
|
npt.assert_(distfn.ppf(0.5,*arg) < np.inf)
|
||||||
|
ppfs = distfn.ppf([0.5,0.9],*arg)
|
||||||
|
ppf_s = [distfn._ppf(0.5,*arg), distfn._ppf(0.9,*arg)]
|
||||||
|
npt.assert_(np.all(ppfs < np.inf))
|
||||||
|
npt.assert_(ppf_s[0] == distfn.ppf(0.5,*arg))
|
||||||
|
npt.assert_(ppf_s[1] == distfn.ppf(0.9,*arg))
|
||||||
|
npt.assert_(ppf_s[0] == ppfs[0])
|
||||||
|
npt.assert_(ppf_s[1] == ppfs[1])
|
||||||
|
|
||||||
|
def check_pmf_cdf(distfn, arg, msg):
|
||||||
|
startind = np.int(distfn._ppf(0.01,*arg)-1)
|
||||||
|
index = range(startind,startind+10)
|
||||||
|
cdfs = distfn.cdf(index,*arg)
|
||||||
|
npt.assert_almost_equal(cdfs, distfn.pmf(index, *arg).cumsum() + \
|
||||||
|
cdfs[0] - distfn.pmf(index[0],*arg),
|
||||||
|
decimal=4, err_msg= msg + 'pmf-cdf')
|
||||||
|
|
||||||
|
def check_generic_moment(distfn, arg, m, k, decim):
|
||||||
|
npt.assert_almost_equal(distfn.generic_moment(k,*arg), m, decimal=decim,
|
||||||
|
err_msg= str(distfn) + ' generic moment test')
|
||||||
|
|
||||||
|
def check_moment_frozen(distfn, arg, m, k, decim):
|
||||||
|
npt.assert_almost_equal(distfn(*arg).moment(k), m, decimal=decim,
|
||||||
|
err_msg= str(distfn) + ' frozen moment test')
|
||||||
|
|
||||||
|
def check_oth(distfn, arg, msg):
|
||||||
|
#checking other methods of distfn
|
||||||
|
meanint = round(float(distfn.stats(*arg)[0])) # closest integer to mean
|
||||||
|
npt.assert_almost_equal(distfn.sf(meanint, *arg), 1 - \
|
||||||
|
distfn.cdf(meanint, *arg), decimal=8)
|
||||||
|
median_sf = distfn.isf(0.5, *arg)
|
||||||
|
|
||||||
|
npt.assert_(distfn.sf(median_sf - 1, *arg) > 0.5)
|
||||||
|
npt.assert_(distfn.cdf(median_sf + 1, *arg) > 0.5)
|
||||||
|
npt.assert_equal(distfn.isf(0.5, *arg), distfn.ppf(0.5, *arg))
|
||||||
|
|
||||||
|
#next 3 functions copied from test_continous_extra
|
||||||
|
# adjusted
|
||||||
|
|
||||||
|
def check_ppf_limits(distfn,arg,msg):
|
||||||
|
below,low,upp,above = distfn.ppf([-1,0,1,2], *arg)
|
||||||
|
#print distfn.name, distfn.a, low, distfn.b, upp
|
||||||
|
#print distfn.name,below,low,upp,above
|
||||||
|
assert_equal_inf_nan(distfn.a-1,low, msg + 'ppf lower bound')
|
||||||
|
assert_equal_inf_nan(distfn.b,upp, msg + 'ppf upper bound')
|
||||||
|
npt.assert_(np.isnan(below), msg + 'ppf out of bounds - below')
|
||||||
|
npt.assert_(np.isnan(above), msg + 'ppf out of bounds - above')
|
||||||
|
|
||||||
|
def check_isf_limits(distfn,arg,msg):
|
||||||
|
below,low,upp,above = distfn.isf([-1,0,1,2], *arg)
|
||||||
|
#print distfn.name, distfn.a, low, distfn.b, upp
|
||||||
|
#print distfn.name,below,low,upp,above
|
||||||
|
assert_equal_inf_nan(distfn.a-1,upp, msg + 'isf lower bound')
|
||||||
|
assert_equal_inf_nan(distfn.b,low, msg + 'isf upper bound')
|
||||||
|
npt.assert_(np.isnan(below), msg + 'isf out of bounds - below')
|
||||||
|
npt.assert_(np.isnan(above), msg + 'isf out of bounds - above')
|
||||||
|
|
||||||
|
def assert_equal_inf_nan(v1,v2,msg):
|
||||||
|
npt.assert_(not np.isnan(v1))
|
||||||
|
if not np.isinf(v1):
|
||||||
|
npt.assert_almost_equal(v1, v2, decimal=10, err_msg = msg + \
|
||||||
|
' - finite')
|
||||||
|
else:
|
||||||
|
npt.assert_(np.isinf(v2) or np.isnan(v2),
|
||||||
|
msg + ' - infinite, v2=%s' % str(v2))
|
||||||
|
|
||||||
|
def check_sample_skew_kurt(distfn, arg, sk, ss, msg):
|
||||||
|
k,s = distfn.stats(moment='ks',*arg)
|
||||||
|
check_sample_meanvar, sk, k, msg + 'sample skew test'
|
||||||
|
check_sample_meanvar, ss, s, msg + 'sample kurtosis test'
|
||||||
|
|
||||||
|
|
||||||
|
def check_entropy(distfn,arg,msg):
|
||||||
|
ent = distfn.entropy(*arg)
|
||||||
|
#print 'Entropy =', ent
|
||||||
|
npt.assert_(not np.isnan(ent), msg + 'test Entropy is nan')
|
||||||
|
|
||||||
|
|
||||||
|
def check_discrete_chisquare(distfn, arg, rvs, alpha, msg):
|
||||||
|
'''perform chisquare test for random sample of a discrete distribution
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
distname : string
|
||||||
|
name of distribution function
|
||||||
|
arg : sequence
|
||||||
|
parameters of distribution
|
||||||
|
alpha : float
|
||||||
|
significance level, threshold for p-value
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
result : bool
|
||||||
|
0 if test passes, 1 if test fails
|
||||||
|
|
||||||
|
uses global variable debug for printing results
|
||||||
|
'''
|
||||||
|
|
||||||
|
# define parameters for test
|
||||||
|
## n=2000
|
||||||
|
n = len(rvs)
|
||||||
|
nsupp = 20
|
||||||
|
wsupp = 1.0/nsupp
|
||||||
|
|
||||||
|
## distfn = getattr(stats, distname)
|
||||||
|
## np.random.seed(9765456)
|
||||||
|
## rvs = distfn.rvs(size=n,*arg)
|
||||||
|
|
||||||
|
# construct intervals with minimum mass 1/nsupp
|
||||||
|
# intervalls are left-half-open as in a cdf difference
|
||||||
|
distsupport = xrange(max(distfn.a, -1000), min(distfn.b, 1000) + 1)
|
||||||
|
last = 0
|
||||||
|
distsupp = [max(distfn.a, -1000)]
|
||||||
|
distmass = []
|
||||||
|
for ii in distsupport:
|
||||||
|
current = distfn.cdf(ii,*arg)
|
||||||
|
if current - last >= wsupp-1e-14:
|
||||||
|
distsupp.append(ii)
|
||||||
|
distmass.append(current - last)
|
||||||
|
last = current
|
||||||
|
if current > (1-wsupp):
|
||||||
|
break
|
||||||
|
if distsupp[-1] < distfn.b:
|
||||||
|
distsupp.append(distfn.b)
|
||||||
|
distmass.append(1-last)
|
||||||
|
distsupp = np.array(distsupp)
|
||||||
|
distmass = np.array(distmass)
|
||||||
|
|
||||||
|
# convert intervals to right-half-open as required by histogram
|
||||||
|
histsupp = distsupp+1e-8
|
||||||
|
histsupp[0] = distfn.a
|
||||||
|
|
||||||
|
# find sample frequencies and perform chisquare test
|
||||||
|
freq,hsupp = np.histogram(rvs,histsupp)
|
||||||
|
cdfs = distfn.cdf(distsupp,*arg)
|
||||||
|
(chis,pval) = stats.chisquare(np.array(freq),n*distmass)
|
||||||
|
|
||||||
|
npt.assert_(pval > alpha, 'chisquare - test for %s'
|
||||||
|
' at arg = %s with pval = %s' % (msg,str(arg),str(pval)))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
#nose.run(argv=['', __file__])
|
||||||
|
nose.runmodule(argv=[__file__,'-s'], exit=False)
|
@ -0,0 +1,98 @@
|
|||||||
|
# NOTE: contains only one test, _est_cont_fit, that is renamed so that
|
||||||
|
# nose doesn't run it
|
||||||
|
# I put this here for the record and for the case when someone wants to
|
||||||
|
# verify the quality of fit
|
||||||
|
# with current parameters: relatively small sample size, default starting values
|
||||||
|
# Ran 84 tests in 401.797s
|
||||||
|
# FAILED (failures=15)
|
||||||
|
#
|
||||||
|
#Ran 83 tests in 238.859s
|
||||||
|
#FAILED (failures=12)
|
||||||
|
|
||||||
|
from __future__ import division, print_function, absolute_import
|
||||||
|
|
||||||
|
import numpy.testing as npt
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from wafo import stats
|
||||||
|
|
||||||
|
from test_continuous_basic import distcont
|
||||||
|
|
||||||
|
# this is not a proper statistical test for convergence, but only
|
||||||
|
# verifies that the estimate and true values don't differ by too much
|
||||||
|
n_repl1 = 1000 # sample size for first run
|
||||||
|
n_repl2 = 5000 # sample size for second run, if first run fails
|
||||||
|
thresh_percent = 0.25 # percent of true parameters for fail cut-off
|
||||||
|
thresh_min = 0.75 # minimum difference estimate - true to fail test
|
||||||
|
|
||||||
|
|
||||||
|
distslow = [ 'ncx2', 'rdist', 'gausshyper', 'recipinvgauss', 'ksone', 'genexpon',
|
||||||
|
'vonmises', 'rice', 'mielke',
|
||||||
|
'powerlognorm', 'kstwobign', 'tukeylambda','betaprime', 'gengamma',
|
||||||
|
'johnsonsb', 'burr', 'truncexpon', 'pearson3', 'exponweib', 'nakagami',
|
||||||
|
'wrapcauchy']
|
||||||
|
dist_rarely_fitted = ['f', 'ncf', 'nct', 'chi']
|
||||||
|
distskip = distslow + dist_rarely_fitted
|
||||||
|
|
||||||
|
#distcont = [['genextreme', (3.3184017469423535,)]]
|
||||||
|
#@npt.dec.slow
|
||||||
|
def test_cont_fit():
|
||||||
|
# this tests the closeness of the estimated parameters to the true
|
||||||
|
# parameters with fit method of continuous distributions
|
||||||
|
for distname, arg in distcont:
|
||||||
|
if distname not in distskip:
|
||||||
|
yield check_cont_fit, distname,arg
|
||||||
|
|
||||||
|
@npt.dec.slow
|
||||||
|
def _est_cont_fit_slow():
|
||||||
|
# this tests the closeness of the estimated parameters to the true
|
||||||
|
# parameters with fit method of continuous distributions
|
||||||
|
# Note: is slow, some distributions don't converge with sample size <= 10000
|
||||||
|
for distname, arg in distcont:
|
||||||
|
if distname in distslow:
|
||||||
|
yield check_cont_fit, distname,arg
|
||||||
|
|
||||||
|
def test_lognorm_fit_ticket1131():
|
||||||
|
params = [(2.1, 1.,1.), (1.,10.,1.), (1.,1.,10.)]
|
||||||
|
for param in params:
|
||||||
|
yield check_cont_fit, 'lognorm', param
|
||||||
|
|
||||||
|
def check_cont_fit(distname,arg):
|
||||||
|
distfn = getattr(stats, distname)
|
||||||
|
rvs = distfn.rvs(size=n_repl1,*arg)
|
||||||
|
est = distfn.fit(rvs) #, *arg) # start with default values
|
||||||
|
n = distfn.numargs + 2
|
||||||
|
truearg = np.hstack([arg,[0.0, 1.0]])[:n]
|
||||||
|
|
||||||
|
diff = est-truearg
|
||||||
|
|
||||||
|
txt = ''
|
||||||
|
diffthreshold = np.max(np.vstack([truearg*thresh_percent,
|
||||||
|
np.ones(distfn.numargs+2)*thresh_min]),0)
|
||||||
|
# threshold for location
|
||||||
|
diffthreshold[-2] = np.max([np.abs(rvs.mean())*thresh_percent,thresh_min])
|
||||||
|
|
||||||
|
if np.any(np.isnan(est)):
|
||||||
|
raise AssertionError('nan returned in fit')
|
||||||
|
else:
|
||||||
|
if np.any((np.abs(diff) - diffthreshold) > 0.0):
|
||||||
|
## txt = 'WARNING - diff too large with small sample'
|
||||||
|
## print 'parameter diff =', diff - diffthreshold, txt
|
||||||
|
rvs = np.concatenate([rvs,distfn.rvs(size=n_repl2-n_repl1,*arg)])
|
||||||
|
est = distfn.fit(rvs) #,*arg)
|
||||||
|
truearg = np.hstack([arg,[0.0,1.0]])[:n]
|
||||||
|
diff = est-truearg
|
||||||
|
if np.any((np.abs(diff) - diffthreshold) > 0.0):
|
||||||
|
txt = 'parameter: %s\n' % str(truearg)
|
||||||
|
txt += 'estimated: %s\n' % str(est)
|
||||||
|
txt += 'diff : %s\n' % str(diff)
|
||||||
|
raise AssertionError('fit not very good in %s\n' % distfn.name + txt)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
check_cont_fit('bradford', (0.29891359763170633,))
|
||||||
|
# check_cont_fit('lognorm', (10,1,1))
|
||||||
|
# check_cont_fit('ncx2', (21, 1.0560465975116415))
|
||||||
|
import nose
|
||||||
|
#nose.run(argv=['', __file__])
|
||||||
|
nose.runmodule(argv=[__file__,'-s'], exit=False)
|
@ -0,0 +1,36 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
from wafo import stats
|
||||||
|
import numpy as np
|
||||||
|
from numpy.testing import assert_almost_equal, assert_
|
||||||
|
|
||||||
|
def test_kde_1d():
|
||||||
|
#some basic tests comparing to normal distribution
|
||||||
|
np.random.seed(8765678)
|
||||||
|
n_basesample = 500
|
||||||
|
xn = np.random.randn(n_basesample)
|
||||||
|
xnmean = xn.mean()
|
||||||
|
xnstd = xn.std(ddof=1)
|
||||||
|
|
||||||
|
# get kde for original sample
|
||||||
|
gkde = stats.gaussian_kde(xn)
|
||||||
|
|
||||||
|
# evaluate the density funtion for the kde for some points
|
||||||
|
xs = np.linspace(-7,7,501)
|
||||||
|
kdepdf = gkde.evaluate(xs)
|
||||||
|
normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
|
||||||
|
intervall = xs[1] - xs[0]
|
||||||
|
|
||||||
|
assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
|
||||||
|
prob1 = gkde.integrate_box_1d(xnmean, np.inf)
|
||||||
|
prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
|
||||||
|
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||||
|
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||||
|
assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
|
||||||
|
assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
|
||||||
|
|
||||||
|
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||||
|
(kdepdf**2).sum()*intervall, decimal=2)
|
||||||
|
assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
|
||||||
|
(kdepdf*normpdf).sum()*intervall, decimal=2)
|
@ -0,0 +1,490 @@
|
|||||||
|
"""
|
||||||
|
Tests for the stats.mstats module (support for maskd arrays)
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from numpy import nan
|
||||||
|
import numpy.ma as ma
|
||||||
|
from numpy.ma import masked, nomask
|
||||||
|
|
||||||
|
import scipy.stats.mstats as mstats
|
||||||
|
from numpy.testing import TestCase, run_module_suite
|
||||||
|
from numpy.ma.testutils import assert_equal, assert_almost_equal, \
|
||||||
|
assert_array_almost_equal, assert_
|
||||||
|
|
||||||
|
|
||||||
|
class TestMquantiles(TestCase):
|
||||||
|
"""Regression tests for mstats module."""
|
||||||
|
def test_mquantiles_limit_keyword(self):
|
||||||
|
"""Ticket #867"""
|
||||||
|
data = np.array([[ 6., 7., 1.],
|
||||||
|
[ 47., 15., 2.],
|
||||||
|
[ 49., 36., 3.],
|
||||||
|
[ 15., 39., 4.],
|
||||||
|
[ 42., 40., -999.],
|
||||||
|
[ 41., 41., -999.],
|
||||||
|
[ 7., -999., -999.],
|
||||||
|
[ 39., -999., -999.],
|
||||||
|
[ 43., -999., -999.],
|
||||||
|
[ 40., -999., -999.],
|
||||||
|
[ 36., -999., -999.]])
|
||||||
|
desired = [[19.2, 14.6, 1.45],
|
||||||
|
[40.0, 37.5, 2.5 ],
|
||||||
|
[42.8, 40.05, 3.55]]
|
||||||
|
quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
|
||||||
|
assert_almost_equal(quants, desired)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TestGMean(TestCase):
|
||||||
|
def test_1D(self):
|
||||||
|
a = (1,2,3,4)
|
||||||
|
actual= mstats.gmean(a)
|
||||||
|
desired = np.power(1*2*3*4,1./4.)
|
||||||
|
assert_almost_equal(actual, desired,decimal=14)
|
||||||
|
|
||||||
|
desired1 = mstats.gmean(a,axis=-1)
|
||||||
|
assert_almost_equal(actual, desired1, decimal=14)
|
||||||
|
assert_(not isinstance(desired1, ma.MaskedArray))
|
||||||
|
#
|
||||||
|
a = ma.array((1,2,3,4),mask=(0,0,0,1))
|
||||||
|
actual= mstats.gmean(a)
|
||||||
|
desired = np.power(1*2*3,1./3.)
|
||||||
|
assert_almost_equal(actual, desired,decimal=14)
|
||||||
|
|
||||||
|
desired1 = mstats.gmean(a,axis=-1)
|
||||||
|
assert_almost_equal(actual, desired1, decimal=14)
|
||||||
|
#
|
||||||
|
def test_2D(self):
|
||||||
|
a = ma.array(((1,2,3,4),(1,2,3,4),(1,2,3,4)),
|
||||||
|
mask=((0,0,0,0),(1,0,0,1),(0,1,1,0)))
|
||||||
|
actual= mstats.gmean(a)
|
||||||
|
desired = np.array((1,2,3,4))
|
||||||
|
assert_array_almost_equal(actual, desired, decimal=14)
|
||||||
|
#
|
||||||
|
desired1 = mstats.gmean(a,axis=0)
|
||||||
|
assert_array_almost_equal(actual, desired1, decimal=14)
|
||||||
|
#
|
||||||
|
actual= mstats.gmean(a, -1)
|
||||||
|
desired = ma.array((np.power(1*2*3*4,1./4.),
|
||||||
|
np.power(2*3,1./2.),
|
||||||
|
np.power(1*4,1./2.)))
|
||||||
|
assert_array_almost_equal(actual, desired, decimal=14)
|
||||||
|
|
||||||
|
class TestHMean(TestCase):
|
||||||
|
def test_1D(self):
|
||||||
|
a = (1,2,3,4)
|
||||||
|
actual= mstats.hmean(a)
|
||||||
|
desired = 4. / (1./1 + 1./2 + 1./3 + 1./4)
|
||||||
|
assert_almost_equal(actual, desired, decimal=14)
|
||||||
|
desired1 = mstats.hmean(ma.array(a),axis=-1)
|
||||||
|
assert_almost_equal(actual, desired1, decimal=14)
|
||||||
|
#
|
||||||
|
a = ma.array((1,2,3,4),mask=(0,0,0,1))
|
||||||
|
actual= mstats.hmean(a)
|
||||||
|
desired = 3. / (1./1 + 1./2 + 1./3)
|
||||||
|
assert_almost_equal(actual, desired,decimal=14)
|
||||||
|
desired1 = mstats.hmean(a,axis=-1)
|
||||||
|
assert_almost_equal(actual, desired1, decimal=14)
|
||||||
|
|
||||||
|
def test_2D(self):
|
||||||
|
a = ma.array(((1,2,3,4),(1,2,3,4),(1,2,3,4)),
|
||||||
|
mask=((0,0,0,0),(1,0,0,1),(0,1,1,0)))
|
||||||
|
actual= mstats.hmean(a)
|
||||||
|
desired = ma.array((1,2,3,4))
|
||||||
|
assert_array_almost_equal(actual, desired, decimal=14)
|
||||||
|
#
|
||||||
|
actual1 = mstats.hmean(a,axis=-1)
|
||||||
|
desired = (4./(1/1.+1/2.+1/3.+1/4.),
|
||||||
|
2./(1/2.+1/3.),
|
||||||
|
2./(1/1.+1/4.)
|
||||||
|
)
|
||||||
|
assert_array_almost_equal(actual1, desired, decimal=14)
|
||||||
|
|
||||||
|
|
||||||
|
class TestRanking(TestCase):
|
||||||
|
#
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
TestCase.__init__(self, *args, **kwargs)
|
||||||
|
#
|
||||||
|
def test_ranking(self):
|
||||||
|
x = ma.array([0,1,1,1,2,3,4,5,5,6,])
|
||||||
|
assert_almost_equal(mstats.rankdata(x),[1,3,3,3,5,6,7,8.5,8.5,10])
|
||||||
|
x[[3,4]] = masked
|
||||||
|
assert_almost_equal(mstats.rankdata(x),[1,2.5,2.5,0,0,4,5,6.5,6.5,8])
|
||||||
|
assert_almost_equal(mstats.rankdata(x,use_missing=True),
|
||||||
|
[1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8])
|
||||||
|
x = ma.array([0,1,5,1,2,4,3,5,1,6,])
|
||||||
|
assert_almost_equal(mstats.rankdata(x),[1,3,8.5,3,5,7,6,8.5,3,10])
|
||||||
|
x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]])
|
||||||
|
assert_almost_equal(mstats.rankdata(x),[[1,3,3,3,5],[6,7,8.5,8.5,10]])
|
||||||
|
assert_almost_equal(mstats.rankdata(x,axis=1),[[1,3,3,3,5],[1,2,3.5,3.5,5]])
|
||||||
|
assert_almost_equal(mstats.rankdata(x,axis=0),[[1,1,1,1,1],[2,2,2,2,2,]])
|
||||||
|
|
||||||
|
|
||||||
|
class TestCorr(TestCase):
|
||||||
|
#
|
||||||
|
def test_pearsonr(self):
|
||||||
|
"Tests some computations of Pearson's r"
|
||||||
|
x = ma.arange(10)
|
||||||
|
olderr = np.seterr(all='ignore')
|
||||||
|
try:
|
||||||
|
assert_almost_equal(mstats.pearsonr(x,x)[0], 1.0)
|
||||||
|
assert_almost_equal(mstats.pearsonr(x,x[::-1])[0], -1.0)
|
||||||
|
|
||||||
|
x = ma.array(x, mask=True)
|
||||||
|
pr = mstats.pearsonr(x,x)
|
||||||
|
finally:
|
||||||
|
np.seterr(**olderr)
|
||||||
|
assert_(pr[0] is masked)
|
||||||
|
assert_(pr[1] is masked)
|
||||||
|
#
|
||||||
|
def test_spearmanr(self):
|
||||||
|
"Tests some computations of Spearman's rho"
|
||||||
|
(x, y) = ([5.05,6.75,3.21,2.66],[1.65,2.64,2.64,6.95])
|
||||||
|
assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
|
||||||
|
(x, y) = ([5.05,6.75,3.21,2.66,np.nan],[1.65,2.64,2.64,6.95,np.nan])
|
||||||
|
(x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
|
||||||
|
assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
|
||||||
|
#
|
||||||
|
x = [ 2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
|
||||||
|
1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7]
|
||||||
|
y = [22.6, 08.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
|
||||||
|
0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4]
|
||||||
|
assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
|
||||||
|
x = [ 2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
|
||||||
|
1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan]
|
||||||
|
y = [22.6, 08.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
|
||||||
|
0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan]
|
||||||
|
(x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
|
||||||
|
assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
|
||||||
|
#
|
||||||
|
def test_kendalltau(self):
|
||||||
|
"Tests some computations of Kendall's tau"
|
||||||
|
x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan])
|
||||||
|
y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
|
||||||
|
z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
|
||||||
|
assert_almost_equal(np.asarray(mstats.kendalltau(x,y)),
|
||||||
|
[+0.3333333,0.4969059])
|
||||||
|
assert_almost_equal(np.asarray(mstats.kendalltau(x,z)),
|
||||||
|
[-0.5477226,0.2785987])
|
||||||
|
#
|
||||||
|
x = ma.fix_invalid([ 0, 0, 0, 0,20,20, 0,60, 0,20,
|
||||||
|
10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan])
|
||||||
|
y = ma.fix_invalid([ 0,80,80,80,10,33,60, 0,67,27,
|
||||||
|
25,80,80,80,80,80,80, 0,10,45, np.nan, 0])
|
||||||
|
result = mstats.kendalltau(x,y)
|
||||||
|
assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
|
||||||
|
#
|
||||||
|
def test_kendalltau_seasonal(self):
|
||||||
|
"Tests the seasonal Kendall tau."
|
||||||
|
x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
|
||||||
|
[ 4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
|
||||||
|
[ 3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
|
||||||
|
[nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
|
||||||
|
x = ma.fix_invalid(x).T
|
||||||
|
output = mstats.kendalltau_seasonal(x)
|
||||||
|
assert_almost_equal(output['global p-value (indep)'], 0.008, 3)
|
||||||
|
assert_almost_equal(output['seasonal p-value'].round(2),
|
||||||
|
[0.18,0.53,0.20,0.04])
|
||||||
|
#
|
||||||
|
def test_pointbiserial(self):
|
||||||
|
"Tests point biserial"
|
||||||
|
x = [1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,
|
||||||
|
0,0,0,0,1,-1]
|
||||||
|
y = [14.8,13.8,12.4,10.1,7.1,6.1,5.8,4.6,4.3,3.5,3.3,3.2,3.0,
|
||||||
|
2.8,2.8,2.5,2.4,2.3,2.1,1.7,1.7,1.5,1.3,1.3,1.2,1.2,1.1,
|
||||||
|
0.8,0.7,0.6,0.5,0.2,0.2,0.1,np.nan]
|
||||||
|
assert_almost_equal(mstats.pointbiserialr(x, y)[0], 0.36149, 5)
|
||||||
|
|
||||||
|
|
||||||
|
class TestTrimming(TestCase):
|
||||||
|
#
|
||||||
|
def test_trim(self):
|
||||||
|
"Tests trimming"
|
||||||
|
a = ma.arange(10)
|
||||||
|
assert_equal(mstats.trim(a), [0,1,2,3,4,5,6,7,8,9])
|
||||||
|
a = ma.arange(10)
|
||||||
|
assert_equal(mstats.trim(a,(2,8)), [None,None,2,3,4,5,6,7,8,None])
|
||||||
|
a = ma.arange(10)
|
||||||
|
assert_equal(mstats.trim(a,limits=(2,8),inclusive=(False,False)),
|
||||||
|
[None,None,None,3,4,5,6,7,None,None])
|
||||||
|
a = ma.arange(10)
|
||||||
|
assert_equal(mstats.trim(a,limits=(0.1,0.2),relative=True),
|
||||||
|
[None,1,2,3,4,5,6,7,None,None])
|
||||||
|
#
|
||||||
|
a = ma.arange(12)
|
||||||
|
a[[0,-1]] = a[5] = masked
|
||||||
|
assert_equal(mstats.trim(a,(2,8)),
|
||||||
|
[None,None,2,3,4,None,6,7,8,None,None,None])
|
||||||
|
#
|
||||||
|
x = ma.arange(100).reshape(10,10)
|
||||||
|
trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=None)
|
||||||
|
assert_equal(trimx._mask.ravel(),[1]*10+[0]*70+[1]*20)
|
||||||
|
trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=0)
|
||||||
|
assert_equal(trimx._mask.ravel(),[1]*10+[0]*70+[1]*20)
|
||||||
|
trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=-1)
|
||||||
|
assert_equal(trimx._mask.T.ravel(),[1]*10+[0]*70+[1]*20)
|
||||||
|
#
|
||||||
|
x = ma.arange(110).reshape(11,10)
|
||||||
|
x[1] = masked
|
||||||
|
trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=None)
|
||||||
|
assert_equal(trimx._mask.ravel(),[1]*20+[0]*70+[1]*20)
|
||||||
|
trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=0)
|
||||||
|
assert_equal(trimx._mask.ravel(),[1]*20+[0]*70+[1]*20)
|
||||||
|
trimx = mstats.trim(x.T,(0.1,0.2),relative=True,axis=-1)
|
||||||
|
assert_equal(trimx.T._mask.ravel(),[1]*20+[0]*70+[1]*20)
|
||||||
|
#
|
||||||
|
def test_trim_old(self):
|
||||||
|
"Tests trimming."
|
||||||
|
x = ma.arange(100)
|
||||||
|
assert_equal(mstats.trimboth(x).count(), 60)
|
||||||
|
assert_equal(mstats.trimtail(x,tail='r').count(), 80)
|
||||||
|
x[50:70] = masked
|
||||||
|
trimx = mstats.trimboth(x)
|
||||||
|
assert_equal(trimx.count(), 48)
|
||||||
|
assert_equal(trimx._mask, [1]*16 + [0]*34 + [1]*20 + [0]*14 + [1]*16)
|
||||||
|
x._mask = nomask
|
||||||
|
x.shape = (10,10)
|
||||||
|
assert_equal(mstats.trimboth(x).count(), 60)
|
||||||
|
assert_equal(mstats.trimtail(x).count(), 80)
|
||||||
|
#
|
||||||
|
def test_trimmedmean(self):
|
||||||
|
"Tests the trimmed mean."
|
||||||
|
data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
|
||||||
|
296,299,306,376,428,515,666,1310,2611])
|
||||||
|
assert_almost_equal(mstats.trimmed_mean(data,0.1), 343, 0)
|
||||||
|
assert_almost_equal(mstats.trimmed_mean(data,(0.1,0.1)), 343, 0)
|
||||||
|
assert_almost_equal(mstats.trimmed_mean(data,(0.2,0.2)), 283, 0)
|
||||||
|
#
|
||||||
|
def test_trimmed_stde(self):
|
||||||
|
"Tests the trimmed mean standard error."
|
||||||
|
data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
|
||||||
|
296,299,306,376,428,515,666,1310,2611])
|
||||||
|
assert_almost_equal(mstats.trimmed_stde(data,(0.2,0.2)), 56.13193, 5)
|
||||||
|
assert_almost_equal(mstats.trimmed_stde(data,0.2), 56.13193, 5)
|
||||||
|
#
|
||||||
|
def test_winsorization(self):
|
||||||
|
"Tests the Winsorization of the data."
|
||||||
|
data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
|
||||||
|
296,299,306,376,428,515,666,1310,2611])
|
||||||
|
assert_almost_equal(mstats.winsorize(data,(0.2,0.2)).var(ddof=1),
|
||||||
|
21551.4, 1)
|
||||||
|
data[5] = masked
|
||||||
|
winsorized = mstats.winsorize(data)
|
||||||
|
assert_equal(winsorized.mask, data.mask)
|
||||||
|
|
||||||
|
|
||||||
|
class TestMoments(TestCase):
|
||||||
|
"""
|
||||||
|
Comparison numbers are found using R v.1.5.1
|
||||||
|
note that length(testcase) = 4
|
||||||
|
testmathworks comes from documentation for the
|
||||||
|
Statistics Toolbox for Matlab and can be found at both
|
||||||
|
http://www.mathworks.com/access/helpdesk/help/toolbox/stats/kurtosis.shtml
|
||||||
|
http://www.mathworks.com/access/helpdesk/help/toolbox/stats/skewness.shtml
|
||||||
|
Note that both test cases came from here.
|
||||||
|
"""
|
||||||
|
testcase = [1,2,3,4]
|
||||||
|
testmathworks = ma.fix_invalid([1.165 , 0.6268, 0.0751, 0.3516, -0.6965,
|
||||||
|
np.nan])
|
||||||
|
def test_moment(self):
|
||||||
|
"""
|
||||||
|
mean((testcase-mean(testcase))**power,axis=0),axis=0))**power))"""
|
||||||
|
y = mstats.moment(self.testcase,1)
|
||||||
|
assert_almost_equal(y,0.0,10)
|
||||||
|
y = mstats.moment(self.testcase,2)
|
||||||
|
assert_almost_equal(y,1.25)
|
||||||
|
y = mstats.moment(self.testcase,3)
|
||||||
|
assert_almost_equal(y,0.0)
|
||||||
|
y = mstats.moment(self.testcase,4)
|
||||||
|
assert_almost_equal(y,2.5625)
|
||||||
|
def test_variation(self):
|
||||||
|
"""variation = samplestd/mean """
|
||||||
|
## y = stats.variation(self.shoes[0])
|
||||||
|
## assert_almost_equal(y,21.8770668)
|
||||||
|
y = mstats.variation(self.testcase)
|
||||||
|
assert_almost_equal(y,0.44721359549996, 10)
|
||||||
|
|
||||||
|
def test_skewness(self):
|
||||||
|
"""
|
||||||
|
sum((testmathworks-mean(testmathworks,axis=0))**3,axis=0)/((sqrt(var(testmathworks)*4/5))**3)/5
|
||||||
|
"""
|
||||||
|
y = mstats.skew(self.testmathworks)
|
||||||
|
assert_almost_equal(y,-0.29322304336607,10)
|
||||||
|
y = mstats.skew(self.testmathworks,bias=0)
|
||||||
|
assert_almost_equal(y,-0.437111105023940,10)
|
||||||
|
y = mstats.skew(self.testcase)
|
||||||
|
assert_almost_equal(y,0.0,10)
|
||||||
|
|
||||||
|
def test_kurtosis(self):
|
||||||
|
"""
|
||||||
|
sum((testcase-mean(testcase,axis=0))**4,axis=0)/((sqrt(var(testcase)*3/4))**4)/4
|
||||||
|
sum((test2-mean(testmathworks,axis=0))**4,axis=0)/((sqrt(var(testmathworks)*4/5))**4)/5
|
||||||
|
Set flags for axis = 0 and
|
||||||
|
fisher=0 (Pearson's definition of kurtosis for compatibility with Matlab)
|
||||||
|
"""
|
||||||
|
y = mstats.kurtosis(self.testmathworks,0,fisher=0,bias=1)
|
||||||
|
assert_almost_equal(y, 2.1658856802973,10)
|
||||||
|
# Note that MATLAB has confusing docs for the following case
|
||||||
|
# kurtosis(x,0) gives an unbiased estimate of Pearson's skewness
|
||||||
|
# kurtosis(x) gives a biased estimate of Fisher's skewness (Pearson-3)
|
||||||
|
# The MATLAB docs imply that both should give Fisher's
|
||||||
|
y = mstats.kurtosis(self.testmathworks,fisher=0,bias=0)
|
||||||
|
assert_almost_equal(y, 3.663542721189047,10)
|
||||||
|
y = mstats.kurtosis(self.testcase,0,0)
|
||||||
|
assert_almost_equal(y,1.64)
|
||||||
|
#
|
||||||
|
def test_mode(self):
|
||||||
|
"Tests the mode"
|
||||||
|
#
|
||||||
|
a1 = [0,0,0,1,1,1,2,3,3,3,3,4,5,6,7]
|
||||||
|
a2 = np.reshape(a1, (3,5))
|
||||||
|
ma1 = ma.masked_where(ma.array(a1)>2,a1)
|
||||||
|
ma2 = ma.masked_where(a2>2, a2)
|
||||||
|
assert_equal(mstats.mode(a1, axis=None), (3,4))
|
||||||
|
assert_equal(mstats.mode(ma1, axis=None), (0,3))
|
||||||
|
assert_equal(mstats.mode(a2, axis=None), (3,4))
|
||||||
|
assert_equal(mstats.mode(ma2, axis=None), (0,3))
|
||||||
|
assert_equal(mstats.mode(a2, axis=0), ([[0,0,0,1,1]],[[1,1,1,1,1]]))
|
||||||
|
assert_equal(mstats.mode(ma2, axis=0), ([[0,0,0,1,1]],[[1,1,1,1,1]]))
|
||||||
|
assert_equal(mstats.mode(a2, axis=-1), ([[0],[3],[3]], [[3],[3],[1]]))
|
||||||
|
assert_equal(mstats.mode(ma2, axis=-1), ([[0],[1],[0]], [[3],[1],[0]]))
|
||||||
|
|
||||||
|
|
||||||
|
class TestPercentile(TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.a1 = [3,4,5,10,-3,-5,6]
|
||||||
|
self.a2 = [3,-6,-2,8,7,4,2,1]
|
||||||
|
self.a3 = [3.,4,5,10,-3,-5,-6,7.0]
|
||||||
|
|
||||||
|
def test_percentile(self):
|
||||||
|
x = np.arange(8) * 0.5
|
||||||
|
assert_equal(mstats.scoreatpercentile(x, 0), 0.)
|
||||||
|
assert_equal(mstats.scoreatpercentile(x, 100), 3.5)
|
||||||
|
assert_equal(mstats.scoreatpercentile(x, 50), 1.75)
|
||||||
|
|
||||||
|
def test_2D(self):
|
||||||
|
x = ma.array([[1, 1, 1],
|
||||||
|
[1, 1, 1],
|
||||||
|
[4, 4, 3],
|
||||||
|
[1, 1, 1],
|
||||||
|
[1, 1, 1]])
|
||||||
|
assert_equal(mstats.scoreatpercentile(x,50), [1,1,1])
|
||||||
|
|
||||||
|
|
||||||
|
class TestVariability(TestCase):
|
||||||
|
""" Comparison numbers are found using R v.1.5.1
|
||||||
|
note that length(testcase) = 4
|
||||||
|
"""
|
||||||
|
testcase = ma.fix_invalid([1,2,3,4,np.nan])
|
||||||
|
|
||||||
|
def test_signaltonoise(self):
|
||||||
|
"""
|
||||||
|
this is not in R, so used
|
||||||
|
mean(testcase,axis=0)/(sqrt(var(testcase)*3/4)) """
|
||||||
|
#y = stats.signaltonoise(self.shoes[0])
|
||||||
|
#assert_approx_equal(y,4.5709967)
|
||||||
|
y = mstats.signaltonoise(self.testcase)
|
||||||
|
assert_almost_equal(y,2.236067977)
|
||||||
|
|
||||||
|
def test_sem(self):
|
||||||
|
"""
|
||||||
|
this is not in R, so used
|
||||||
|
sqrt(var(testcase)*3/4)/sqrt(3)
|
||||||
|
"""
|
||||||
|
#y = stats.sem(self.shoes[0])
|
||||||
|
#assert_approx_equal(y,0.775177399)
|
||||||
|
y = mstats.sem(self.testcase)
|
||||||
|
assert_almost_equal(y,0.6454972244)
|
||||||
|
|
||||||
|
def test_zmap(self):
|
||||||
|
"""
|
||||||
|
not in R, so tested by using
|
||||||
|
(testcase[i]-mean(testcase,axis=0))/sqrt(var(testcase)*3/4)
|
||||||
|
"""
|
||||||
|
y = mstats.zmap(self.testcase, self.testcase)
|
||||||
|
desired_unmaskedvals = ([-1.3416407864999, -0.44721359549996 ,
|
||||||
|
0.44721359549996 , 1.3416407864999])
|
||||||
|
assert_array_almost_equal(desired_unmaskedvals,
|
||||||
|
y.data[y.mask==False], decimal=12)
|
||||||
|
|
||||||
|
def test_zscore(self):
|
||||||
|
"""
|
||||||
|
not in R, so tested by using
|
||||||
|
(testcase[i]-mean(testcase,axis=0))/sqrt(var(testcase)*3/4)
|
||||||
|
"""
|
||||||
|
y = mstats.zscore(self.testcase)
|
||||||
|
desired = ma.fix_invalid([-1.3416407864999, -0.44721359549996 ,
|
||||||
|
0.44721359549996 , 1.3416407864999, np.nan])
|
||||||
|
assert_almost_equal(desired, y, decimal=12)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TestMisc(TestCase):
|
||||||
|
#
|
||||||
|
def test_obrientransform(self):
|
||||||
|
"Tests Obrien transform"
|
||||||
|
args = [[5]*5+[6]*11+[7]*9+[8]*3+[9]*2+[10]*2,
|
||||||
|
[6]+[7]*2+[8]*4+[9]*9+[10]*16]
|
||||||
|
result = [5*[3.1828]+11*[0.5591]+9*[0.0344]+3*[1.6086]+2*[5.2817]+2*[11.0538],
|
||||||
|
[10.4352]+2*[4.8599]+4*[1.3836]+9*[0.0061]+16*[0.7277]]
|
||||||
|
assert_almost_equal(np.round(mstats.obrientransform(*args).T,4),
|
||||||
|
result,4)
|
||||||
|
#
|
||||||
|
def test_kstwosamp(self):
|
||||||
|
"Tests the Kolmogorov-Smirnov 2 samples test"
|
||||||
|
x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
|
||||||
|
[ 4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
|
||||||
|
[ 3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
|
||||||
|
[nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
|
||||||
|
x = ma.fix_invalid(x).T
|
||||||
|
(winter,spring,summer,fall) = x.T
|
||||||
|
#
|
||||||
|
assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring),4),
|
||||||
|
(0.1818,0.9892))
|
||||||
|
assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'g'),4),
|
||||||
|
(0.1469,0.7734))
|
||||||
|
assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'l'),4),
|
||||||
|
(0.1818,0.6744))
|
||||||
|
#
|
||||||
|
def test_friedmanchisq(self):
|
||||||
|
"Tests the Friedman Chi-square test"
|
||||||
|
# No missing values
|
||||||
|
args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0],
|
||||||
|
[7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0],
|
||||||
|
[6.0,8.0,4.0,6.0,7.0,6.5,6.0,4.0,6.5,3.0])
|
||||||
|
result = mstats.friedmanchisquare(*args)
|
||||||
|
assert_almost_equal(result[0], 10.4737, 4)
|
||||||
|
assert_almost_equal(result[1], 0.005317, 6)
|
||||||
|
# Missing values
|
||||||
|
x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
|
||||||
|
[ 4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
|
||||||
|
[ 3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
|
||||||
|
[nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
|
||||||
|
x = ma.fix_invalid(x)
|
||||||
|
result = mstats.friedmanchisquare(*x)
|
||||||
|
assert_almost_equal(result[0], 2.0156, 4)
|
||||||
|
assert_almost_equal(result[1], 0.5692, 4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_regress_simple():
|
||||||
|
"""Regress a line with sinusoidal noise. Test for #1273."""
|
||||||
|
x = np.linspace(0, 100, 100)
|
||||||
|
y = 0.2 * np.linspace(0, 100, 100) + 10
|
||||||
|
y += np.sin(np.linspace(0, 20, 100))
|
||||||
|
|
||||||
|
slope, intercept, r_value, p_value, sterr = mstats.linregress(x, y)
|
||||||
|
assert_almost_equal(slope, 0.19644990055858422)
|
||||||
|
assert_almost_equal(intercept, 10.211269918932341)
|
||||||
|
|
||||||
|
|
||||||
|
def test_plotting_positions():
|
||||||
|
"""Regression test for #1256"""
|
||||||
|
pos = mstats.plotting_positions(np.arange(3), 0, 0)
|
||||||
|
assert_array_almost_equal(pos.data, np.array([0.25, 0.5, 0.75]))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run_module_suite()
|
@ -0,0 +1,103 @@
|
|||||||
|
# pylint: disable-msg=W0611, W0612, W0511,R0201
|
||||||
|
"""Tests suite for maskedArray statistics.
|
||||||
|
|
||||||
|
:author: Pierre Gerard-Marchant
|
||||||
|
:contact: pierregm_at_uga_dot_edu
|
||||||
|
"""
|
||||||
|
__author__ = "Pierre GF Gerard-Marchant ($Author: backtopop $)"
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import numpy.ma as ma
|
||||||
|
|
||||||
|
import scipy.stats.mstats as ms
|
||||||
|
#import scipy.stats.mmorestats as mms
|
||||||
|
|
||||||
|
from numpy.testing import TestCase, run_module_suite, assert_equal, \
|
||||||
|
assert_almost_equal, assert_
|
||||||
|
|
||||||
|
|
||||||
|
class TestMisc(TestCase):
|
||||||
|
#
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
TestCase.__init__(self, *args, **kwargs)
|
||||||
|
#
|
||||||
|
def test_mjci(self):
|
||||||
|
"Tests the Marits-Jarrett estimator"
|
||||||
|
data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
|
||||||
|
296,299,306,376,428,515,666,1310,2611])
|
||||||
|
assert_almost_equal(ms.mjci(data),[55.76819,45.84028,198.87875],5)
|
||||||
|
#
|
||||||
|
def test_trimmedmeanci(self):
|
||||||
|
"Tests the confidence intervals of the trimmed mean."
|
||||||
|
data = ma.array([545,555,558,572,575,576,578,580,
|
||||||
|
594,605,635,651,653,661,666])
|
||||||
|
assert_almost_equal(ms.trimmed_mean(data,0.2), 596.2, 1)
|
||||||
|
assert_equal(np.round(ms.trimmed_mean_ci(data,(0.2,0.2)),1),
|
||||||
|
[561.8, 630.6])
|
||||||
|
#
|
||||||
|
def test_idealfourths(self):
|
||||||
|
"Tests ideal-fourths"
|
||||||
|
test = np.arange(100)
|
||||||
|
assert_almost_equal(np.asarray(ms.idealfourths(test)),
|
||||||
|
[24.416667,74.583333],6)
|
||||||
|
test_2D = test.repeat(3).reshape(-1,3)
|
||||||
|
assert_almost_equal(ms.idealfourths(test_2D, axis=0),
|
||||||
|
[[24.416667,24.416667,24.416667],
|
||||||
|
[74.583333,74.583333,74.583333]],6)
|
||||||
|
assert_almost_equal(ms.idealfourths(test_2D, axis=1),
|
||||||
|
test.repeat(2).reshape(-1,2))
|
||||||
|
test = [0,0]
|
||||||
|
_result = ms.idealfourths(test)
|
||||||
|
assert_(np.isnan(_result).all())
|
||||||
|
|
||||||
|
#..............................................................................
|
||||||
|
class TestQuantiles(TestCase):
|
||||||
|
#
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
TestCase.__init__(self, *args, **kwargs)
|
||||||
|
#
|
||||||
|
def test_hdquantiles(self):
|
||||||
|
data = [0.706560797,0.727229578,0.990399276,0.927065621,0.158953014,
|
||||||
|
0.887764025,0.239407086,0.349638551,0.972791145,0.149789972,
|
||||||
|
0.936947700,0.132359948,0.046041972,0.641675031,0.945530547,
|
||||||
|
0.224218684,0.771450991,0.820257774,0.336458052,0.589113496,
|
||||||
|
0.509736129,0.696838829,0.491323573,0.622767425,0.775189248,
|
||||||
|
0.641461450,0.118455200,0.773029450,0.319280007,0.752229111,
|
||||||
|
0.047841438,0.466295911,0.583850781,0.840581845,0.550086491,
|
||||||
|
0.466470062,0.504765074,0.226855960,0.362641207,0.891620942,
|
||||||
|
0.127898691,0.490094097,0.044882048,0.041441695,0.317976349,
|
||||||
|
0.504135618,0.567353033,0.434617473,0.636243375,0.231803616,
|
||||||
|
0.230154113,0.160011327,0.819464108,0.854706985,0.438809221,
|
||||||
|
0.487427267,0.786907310,0.408367937,0.405534192,0.250444460,
|
||||||
|
0.995309248,0.144389588,0.739947527,0.953543606,0.680051621,
|
||||||
|
0.388382017,0.863530727,0.006514031,0.118007779,0.924024803,
|
||||||
|
0.384236354,0.893687694,0.626534881,0.473051932,0.750134705,
|
||||||
|
0.241843555,0.432947602,0.689538104,0.136934797,0.150206859,
|
||||||
|
0.474335206,0.907775349,0.525869295,0.189184225,0.854284286,
|
||||||
|
0.831089744,0.251637345,0.587038213,0.254475554,0.237781276,
|
||||||
|
0.827928620,0.480283781,0.594514455,0.213641488,0.024194386,
|
||||||
|
0.536668589,0.699497811,0.892804071,0.093835427,0.731107772]
|
||||||
|
#
|
||||||
|
assert_almost_equal(ms.hdquantiles(data,[0., 1.]),
|
||||||
|
[0.006514031, 0.995309248])
|
||||||
|
hdq = ms.hdquantiles(data,[0.25, 0.5, 0.75])
|
||||||
|
assert_almost_equal(hdq, [0.253210762, 0.512847491, 0.762232442,])
|
||||||
|
hdq = ms.hdquantiles_sd(data,[0.25, 0.5, 0.75])
|
||||||
|
assert_almost_equal(hdq, [0.03786954, 0.03805389, 0.03800152,], 4)
|
||||||
|
#
|
||||||
|
data = np.array(data).reshape(10,10)
|
||||||
|
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0)
|
||||||
|
assert_almost_equal(hdq[:,0], ms.hdquantiles(data[:,0],[0.25,0.5,0.75]))
|
||||||
|
assert_almost_equal(hdq[:,-1], ms.hdquantiles(data[:,-1],[0.25,0.5,0.75]))
|
||||||
|
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0,var=True)
|
||||||
|
assert_almost_equal(hdq[...,0],
|
||||||
|
ms.hdquantiles(data[:,0],[0.25,0.5,0.75],var=True))
|
||||||
|
assert_almost_equal(hdq[...,-1],
|
||||||
|
ms.hdquantiles(data[:,-1],[0.25,0.5,0.75], var=True))
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run_module_suite()
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,412 @@
|
|||||||
|
"""
|
||||||
|
Commentary
|
||||||
|
----------
|
||||||
|
|
||||||
|
Most of the work is done by the scipy.stats.distributions module.
|
||||||
|
|
||||||
|
This provides a plethora of continuous distributions to play with.
|
||||||
|
|
||||||
|
Each distribution has functions to generate random deviates, pdf's,
|
||||||
|
cdf's etc. as well as a function to fit the distribution to some given
|
||||||
|
data.
|
||||||
|
|
||||||
|
The fitting uses scipy.optimize.fmin to minimise the log odds of the
|
||||||
|
data given the distribution.
|
||||||
|
|
||||||
|
There are a couple of problems with this approach. First it is
|
||||||
|
sensitive to the initial guess at the parameters. Second it can be a
|
||||||
|
little slow.
|
||||||
|
|
||||||
|
Two key parameters are the 'loc' and 'scale' parameters. Data is
|
||||||
|
shifted by 'loc' and scaled by scale prior to fitting. Supplying
|
||||||
|
appropriate values for these parameters is important to getting a good
|
||||||
|
fit.
|
||||||
|
|
||||||
|
See the factory() function which picks from a handful of common
|
||||||
|
approaches for each distribution.
|
||||||
|
|
||||||
|
For some distributions (eg normal) it really makes sense just to
|
||||||
|
calculate the parameters directly from the data.
|
||||||
|
|
||||||
|
The code in the __ifmain__ should be a good guide how to use this.
|
||||||
|
|
||||||
|
Simply:
|
||||||
|
get a QuickFit object
|
||||||
|
add the distributions you want to try to fit
|
||||||
|
call fit() with your data
|
||||||
|
call fit_stats() to generate some stats on the fit.
|
||||||
|
call plot() if you want to see a plot.
|
||||||
|
|
||||||
|
|
||||||
|
Named after Mrs Twolumps, minister's secretary in the silly walks
|
||||||
|
sketch, who brings in coffee with a full silly walk.
|
||||||
|
|
||||||
|
Tenuous link with curve fitting is that you generally see "two lumps"
|
||||||
|
one in your data and the other in the curve that is being fitted.
|
||||||
|
|
||||||
|
Or alternately, if your data is not too silly then you can fit a
|
||||||
|
curve to it.
|
||||||
|
|
||||||
|
License is GNU LGPL v3, see https://launchpad.net/twolumps
|
||||||
|
"""
|
||||||
|
import inspect
|
||||||
|
from itertools import izip
|
||||||
|
|
||||||
|
import numpy
|
||||||
|
from wafo import stats
|
||||||
|
from scipy import mean, std
|
||||||
|
|
||||||
|
def factory(name):
|
||||||
|
""" Factory to return appropriate objects for each distro. """
|
||||||
|
fitters = dict(
|
||||||
|
|
||||||
|
beta=ZeroOneScipyDistribution,
|
||||||
|
alpha=ZeroOneScipyDistribution,
|
||||||
|
ncf=ZeroOneScipyDistribution,
|
||||||
|
triang=ZeroOneScipyDistribution,
|
||||||
|
uniform=ZeroOneScipyDistribution,
|
||||||
|
powerlaw=ZeroOneScipyDistribution,
|
||||||
|
|
||||||
|
pareto=MinLocScipyDistribution,
|
||||||
|
expon=MinLocScipyDistribution,
|
||||||
|
gamma=MinLocScipyDistribution,
|
||||||
|
lognorm=MinLocScipyDistribution,
|
||||||
|
maxwell=MinLocScipyDistribution,
|
||||||
|
weibull_min=MinLocScipyDistribution,
|
||||||
|
|
||||||
|
weibull_max=MaxLocScipyDistribution)
|
||||||
|
|
||||||
|
return fitters.get(name, ScipyDistribution)(name)
|
||||||
|
|
||||||
|
|
||||||
|
def get_continuous_distros():
|
||||||
|
""" Find all attributes of stats that are continuous distributions. """
|
||||||
|
|
||||||
|
fitters = []
|
||||||
|
skip = set()
|
||||||
|
for name, item in inspect.getmembers(stats):
|
||||||
|
if name in skip: continue
|
||||||
|
if item is stats.rv_continuous: continue
|
||||||
|
if isinstance(item, stats.rv_continuous):
|
||||||
|
fitters.append([name, factory(name)])
|
||||||
|
|
||||||
|
return fitters
|
||||||
|
|
||||||
|
|
||||||
|
class ScipyDistribution(object):
|
||||||
|
|
||||||
|
def __init__(self, name):
|
||||||
|
|
||||||
|
self.name = name
|
||||||
|
self.distro = self.get_distro()
|
||||||
|
self.fitted = None
|
||||||
|
|
||||||
|
def __getattr__(self, attr):
|
||||||
|
""" Try delegating to the distro object """
|
||||||
|
return getattr(self.distro, attr)
|
||||||
|
|
||||||
|
def get_distro(self):
|
||||||
|
|
||||||
|
return getattr(stats, self.name)
|
||||||
|
|
||||||
|
def set_distro(self, parms):
|
||||||
|
|
||||||
|
self.distro = getattr(stats, self.name)(*parms)
|
||||||
|
|
||||||
|
return self.distro
|
||||||
|
|
||||||
|
def calculate_loc_and_scale(self, data):
|
||||||
|
""" Calculate loc and scale parameters for fit.
|
||||||
|
|
||||||
|
Depending on the distribution, these need to be approximately
|
||||||
|
right to get a good fit.
|
||||||
|
"""
|
||||||
|
return mean(data), std(data)
|
||||||
|
|
||||||
|
def fit(self, data, *args, **kwargs):
|
||||||
|
""" This needs some work.
|
||||||
|
|
||||||
|
Seems the various scipy distributions do a reasonable job if given a good hint.
|
||||||
|
|
||||||
|
Need to get distro specific hints.
|
||||||
|
"""
|
||||||
|
|
||||||
|
fits = []
|
||||||
|
|
||||||
|
# try with and without providing loc and scale hints
|
||||||
|
# increases chance of a fit without an exception being
|
||||||
|
# generated.
|
||||||
|
for (loc, scale) in ((0.0, 1.0),
|
||||||
|
self.calculate_loc_and_scale(data)):
|
||||||
|
|
||||||
|
try:
|
||||||
|
parms = self.get_distro().fit(data, loc=loc, scale=scale)
|
||||||
|
|
||||||
|
self.set_distro(list(parms))
|
||||||
|
expected = self.expected(data)
|
||||||
|
rss = ((expected-data)**2).sum()
|
||||||
|
fits.append([rss, list(parms)])
|
||||||
|
|
||||||
|
parms = self.get_distro().fit(data, floc=loc, scale=scale)
|
||||||
|
|
||||||
|
self.set_distro(list(parms))
|
||||||
|
expected = self.expected(data)
|
||||||
|
rss = ((expected-data)**2).sum()
|
||||||
|
fits.append([rss, list(parms)])
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# no fits means all tries raised exceptions
|
||||||
|
if not fits:
|
||||||
|
raise Exception("Exception in fit()")
|
||||||
|
|
||||||
|
# pick the one with the smallest rss
|
||||||
|
fits.sort()
|
||||||
|
self.parms = fits[0][1]
|
||||||
|
print self.parms
|
||||||
|
|
||||||
|
return self.set_distro(list(self.parms))
|
||||||
|
|
||||||
|
def expected(self, data):
|
||||||
|
""" Calculate expected values at each data point """
|
||||||
|
if self.fitted is not None:
|
||||||
|
return self.fitted
|
||||||
|
|
||||||
|
n = len(data)
|
||||||
|
xx = numpy.linspace(0, 1, n + 2)[1:-1]
|
||||||
|
self.fitted = self.ppf(xx)
|
||||||
|
#self.fitted = [self.ppf(x) for x in xx]
|
||||||
|
|
||||||
|
return self.fitted
|
||||||
|
|
||||||
|
def fit_stats(self, data):
|
||||||
|
""" Return stats on the fits
|
||||||
|
|
||||||
|
data assumed to be sorted.
|
||||||
|
"""
|
||||||
|
n = len(data)
|
||||||
|
|
||||||
|
dvar = numpy.var(data)
|
||||||
|
expected = self.expected(data)
|
||||||
|
evar = numpy.var(expected)
|
||||||
|
|
||||||
|
rss = 0.0
|
||||||
|
for expect, obs in izip(expected, data):
|
||||||
|
rss += (obs-expect) ** 2.0
|
||||||
|
|
||||||
|
self.rss = rss
|
||||||
|
self.dss = dvar * n
|
||||||
|
self.fss = evar * n
|
||||||
|
|
||||||
|
def residuals(self, data):
|
||||||
|
""" Return residuals """
|
||||||
|
expected = self.expected(data)
|
||||||
|
|
||||||
|
return numpy.array(data) - numpy.array(expected)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class MinLocScipyDistribution(ScipyDistribution):
|
||||||
|
|
||||||
|
def calculate_loc_and_scale(self, data):
|
||||||
|
""" Set loc to min value in the data.
|
||||||
|
|
||||||
|
Useful for weibull_min
|
||||||
|
"""
|
||||||
|
return min(data), std(data)
|
||||||
|
|
||||||
|
class MaxLocScipyDistribution(ScipyDistribution):
|
||||||
|
|
||||||
|
def calculate_loc_and_scale(self, data):
|
||||||
|
""" Set loc to max value in the data.
|
||||||
|
|
||||||
|
Useful for weibull_max
|
||||||
|
"""
|
||||||
|
return max(data), std(data)
|
||||||
|
|
||||||
|
class ZeroOneScipyDistribution(ScipyDistribution):
|
||||||
|
|
||||||
|
def calculate_loc_and_scale(self, data):
|
||||||
|
""" Set loc and scale to move to [0, 1] interval.
|
||||||
|
|
||||||
|
Useful for beta distribution
|
||||||
|
"""
|
||||||
|
return min(data), max(data)-min(data)
|
||||||
|
|
||||||
|
class QuickFit(object):
|
||||||
|
""" Fit a family of distributions.
|
||||||
|
|
||||||
|
Calculates stats on each fit.
|
||||||
|
|
||||||
|
Option to create plots.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
|
||||||
|
self.distributions = []
|
||||||
|
|
||||||
|
def add_distribution(self, distribution):
|
||||||
|
""" Add a ready-prepared ScipyDistribution """
|
||||||
|
self.distributions.append(distribution)
|
||||||
|
|
||||||
|
def add(self, name):
|
||||||
|
""" Add a distribution by name. """
|
||||||
|
|
||||||
|
self.distributions.append(factory(name))
|
||||||
|
|
||||||
|
def fit(self, data):
|
||||||
|
""" Fit all of the distros we have """
|
||||||
|
fitted = []
|
||||||
|
for distro in self.distributions:
|
||||||
|
print 'fitting distro', distro.name
|
||||||
|
try:
|
||||||
|
distro.fit(data)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
fitted.append(distro)
|
||||||
|
self.distributions = fitted
|
||||||
|
|
||||||
|
print 'finished fitting'
|
||||||
|
|
||||||
|
def stats(self, data):
|
||||||
|
""" Return stats on the fits """
|
||||||
|
for dd in self.distributions:
|
||||||
|
dd.fit_stats(data)
|
||||||
|
|
||||||
|
def get_topn(self, n):
|
||||||
|
""" Return top-n best fits. """
|
||||||
|
data = [[x.rss, x] for x in self.distributions if numpy.isfinite(x.rss)]
|
||||||
|
data.sort()
|
||||||
|
|
||||||
|
if not n:
|
||||||
|
n = len(data)
|
||||||
|
|
||||||
|
return [x[1] for x in data[:n]]
|
||||||
|
|
||||||
|
def fit_plot(self, data, topn=0, bins=20):
|
||||||
|
""" Create a plot. """
|
||||||
|
from matplotlib import pylab as pl
|
||||||
|
|
||||||
|
distros = self.get_topn(topn)
|
||||||
|
|
||||||
|
xx = numpy.linspace(data.min(), data.max(), 300)
|
||||||
|
|
||||||
|
table = []
|
||||||
|
nparms = max(len(x.parms) for x in distros)
|
||||||
|
tcolours = []
|
||||||
|
for dd in distros:
|
||||||
|
patch = pl.plot(xx, [dd.pdf(p) for p in xx], label='%10.2f%% %s' % (100.0*dd.rss/dd.dss, dd.name))
|
||||||
|
row = ['', dd.name, '%10.2f%%' % (100.0*dd.rss/dd.dss,)] + ['%0.2f' % x for x in dd.parms]
|
||||||
|
while len(row) < 3 + nparms:
|
||||||
|
row.append('')
|
||||||
|
table.append(row)
|
||||||
|
tcolours.append([patch[0].get_markerfacecolor()] + ['w'] * (2+nparms))
|
||||||
|
|
||||||
|
# add a historgram with the data
|
||||||
|
pl.hist(data, bins=bins, normed=True)
|
||||||
|
tab = pl.table(cellText=table, cellColours=tcolours,
|
||||||
|
colLabels=['', 'Distribution', 'Res. SS/Data SS'] + ['P%d' % (x + 1,) for x in range(nparms)],
|
||||||
|
bbox=(0.0, 1.0, 1.0, 0.3))
|
||||||
|
#loc='top'))
|
||||||
|
#pl.legend(loc=0)
|
||||||
|
tab.auto_set_font_size(False)
|
||||||
|
tab.set_fontsize(10.)
|
||||||
|
|
||||||
|
def residual_plot(self, data, topn=0):
|
||||||
|
""" Create a residual plot. """
|
||||||
|
from matplotlib import pylab as pl
|
||||||
|
|
||||||
|
distros = self.get_topn(topn)
|
||||||
|
|
||||||
|
|
||||||
|
n = len(data)
|
||||||
|
xx = numpy.linspace(0, 1, n + 2)[1:-1]
|
||||||
|
for dd in distros:
|
||||||
|
|
||||||
|
pl.plot(xx, dd.residuals(data), label='%10.2f%% %s' % (100.0*dd.rss/dd.dss, dd.name))
|
||||||
|
pl.grid(True)
|
||||||
|
|
||||||
|
def plot(self, data, topn):
|
||||||
|
""" Plot data fit and residuals """
|
||||||
|
from matplotlib import pylab as pl
|
||||||
|
pl.axes([0.1, 0.4, 0.8, 0.4]) # leave room above the axes for the table
|
||||||
|
self.fit_plot(data, topn=topn)
|
||||||
|
|
||||||
|
pl.axes([0.1, 0.05, 0.8, 0.3])
|
||||||
|
self.residual_plot(data, topn=topn)
|
||||||
|
|
||||||
|
|
||||||
|
def read_data(infile, field):
|
||||||
|
""" Simple utility to extract a field out of a csv file. """
|
||||||
|
import csv
|
||||||
|
|
||||||
|
reader = csv.reader(infile)
|
||||||
|
header = reader.next()
|
||||||
|
field = header.index(field)
|
||||||
|
data = []
|
||||||
|
for row in reader:
|
||||||
|
data.append(float(row[field]))
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import optparse
|
||||||
|
|
||||||
|
from matplotlib import pylab as pl
|
||||||
|
|
||||||
|
parser = optparse.OptionParser()
|
||||||
|
parser.add_option('-d', '--distro', action='append', default=[])
|
||||||
|
parser.add_option('-l', '--list', action='store_true',
|
||||||
|
help='List available distros')
|
||||||
|
|
||||||
|
parser.add_option('-i', '--infile')
|
||||||
|
parser.add_option('-f', '--field', default='P/L')
|
||||||
|
|
||||||
|
parser.add_option('-n', '--topn', type='int', default=0)
|
||||||
|
|
||||||
|
parser.add_option('-s', '--sample', default='normal',
|
||||||
|
help='generate a sample from this distro as a test')
|
||||||
|
parser.add_option('--size', type='int', default=1000,
|
||||||
|
help='Size of sample to generate')
|
||||||
|
|
||||||
|
|
||||||
|
opts, args = parser.parse_args()
|
||||||
|
|
||||||
|
if opts.list:
|
||||||
|
for name, distro in get_continuous_distros():
|
||||||
|
print name
|
||||||
|
sys.exit()
|
||||||
|
opts.distro = ['weibull_min', 'norm']
|
||||||
|
if not opts.distro:
|
||||||
|
opts.distro = [x[0] for x in get_continuous_distros()]
|
||||||
|
|
||||||
|
quickfit = QuickFit()
|
||||||
|
for distro in opts.distro:
|
||||||
|
quickfit.add(distro)
|
||||||
|
|
||||||
|
if opts.sample:
|
||||||
|
data = getattr(numpy.random, opts.sample)(size=opts.size)
|
||||||
|
else:
|
||||||
|
data = numpy.array(read_data(open(opts.infile), opts.field))
|
||||||
|
|
||||||
|
data.sort()
|
||||||
|
|
||||||
|
quickfit.fit(data)
|
||||||
|
print 'doing stats'
|
||||||
|
quickfit.stats(data)
|
||||||
|
|
||||||
|
print 'doing plot'
|
||||||
|
quickfit.plot(data, topn=opts.topn)
|
||||||
|
pl.show()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue