Formatted code + added tests

master
Per.Andreas.Brodtkorb 11 years ago
parent 93ed9616b1
commit 31f80c5798

@ -272,9 +272,9 @@ def romberg(fun, a, b, releps=1e-3, abseps=1e-3):
Ih2 = 0. Ih2 = 0.
Ih4 = rom[0, 0] Ih4 = rom[0, 0]
abserr = Ih4 abserr = Ih4
#%epstab = zeros(1,decdigs+7) #epstab = zeros(1,decdigs+7)
#%newflg = 1 #newflg = 1
#%[res,abserr,epstab,newflg] = dea(newflg,Ih4,abserr,epstab) #[res,abserr,epstab,newflg] = dea(newflg,Ih4,abserr,epstab)
two = 1 two = 1
one = 0 one = 0
for i in xrange(1, tableLimit): for i in xrange(1, tableLimit):
@ -298,11 +298,11 @@ def romberg(fun, a, b, releps=1e-3, abseps=1e-3):
if (2 <= i): if (2 <= i):
[res, abserr] = dea3(Ih1, Ih2, Ih4) [res, abserr] = dea3(Ih1, Ih2, Ih4)
#%Ih4 = res # Ih4 = res
if (abserr <= max(abseps, releps * abs(res))): if (abserr <= max(abseps, releps * abs(res))):
break break
#%rom(1,1:i) = rom(2,1:i) # rom(1,1:i) = rom(2,1:i)
two = one two = one
one = (one + 1) % 2 one = (one + 1) % 2
ipower *= 2 ipower *= 2

@ -1,5 +1,5 @@
GFORTRAN module version '4' created from rind71mod.f on Sat May 05 23:15:45 2012 GFORTRAN module version '4' created from rind71mod.f on Fri Apr 05 14:43:36 2013
MD5:0b1982321203177ab8efc0aefe21c275 -- If you edit this, you'll get what you deserve. MD5:7894124eb6e1c7a3f817b6596dd96b5e -- If you edit this, you'll get what you deserve.
(() () () () () () () () () () () () () () () () () () () () () () () (() () () () () () () () () () () () () () () () () () () () () () ()
() () () ()) () () () ())
@ -193,10 +193,11 @@ DIMENSION DUMMY) (REAL 8 0 0 REAL ()) 0 0 () (1 ASSUMED_SHAPE (CONSTANT
DUMMY) (REAL 8 0 0 REAL ()) 0 0 () () 0 () () () 0 0) DUMMY) (REAL 8 0 0 REAL ()) 0 0 () () 0 () () () 0 0)
35 'xma' '' 'xma' 30 ((VARIABLE IN UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 35 'xma' '' 'xma' 30 ((VARIABLE IN UNKNOWN-PROC UNKNOWN UNKNOWN 0 0
DUMMY) (REAL 8 0 0 REAL ()) 0 0 () () 0 () () () 0 0) DUMMY) (REAL 8 0 0 REAL ()) 0 0 () () 0 () () () 0 0)
48 'xma' '' 'xma' 43 ((VARIABLE IN UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 24 'n' '' 'n' 23 ((VARIABLE INOUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 DUMMY)
DUMMY) (REAL 8 0 0 REAL ()) 0 0 () () 0 () () () 0 0)
49 'n0' '' 'n0' 43 ((VARIABLE IN UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 DUMMY)
(INTEGER 4 0 0 INTEGER ()) 0 0 () () 0 () () () 0 0) (INTEGER 4 0 0 INTEGER ()) 0 0 () () 0 () () () 0 0)
25 'wfout' '' 'wfout' 23 ((VARIABLE OUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0
DIMENSION DUMMY) (REAL 8 0 0 REAL ()) 0 0 () (1 ASSUMED_SHAPE (CONSTANT
(INTEGER 4 0 0 INTEGER ()) 0 '1') ()) 0 () () () 0 0)
26 'bpout' '' 'bpout' 23 ((VARIABLE OUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 26 'bpout' '' 'bpout' 23 ((VARIABLE OUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0
DIMENSION DUMMY) (REAL 8 0 0 REAL ()) 0 0 () (1 ASSUMED_SHAPE (CONSTANT DIMENSION DUMMY) (REAL 8 0 0 REAL ()) 0 0 () (1 ASSUMED_SHAPE (CONSTANT
(INTEGER 4 0 0 INTEGER ()) 0 '1') ()) 0 () () () 0 0) (INTEGER 4 0 0 INTEGER ()) 0 '1') ()) 0 () () () 0 0)
@ -220,11 +221,6 @@ DUMMY) (REAL 8 0 0 REAL ()) 0 0 () () 0 () () () 0 0)
DUMMY) (REAL 8 0 0 REAL ()) 0 0 () () 0 () () () 0 0) DUMMY) (REAL 8 0 0 REAL ()) 0 0 () () 0 () () () 0 0)
42 'n0' '' 'n0' 36 ((VARIABLE IN UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 DUMMY) 42 'n0' '' 'n0' 36 ((VARIABLE IN UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 DUMMY)
(INTEGER 4 0 0 INTEGER ()) 0 0 () () 0 () () () 0 0) (INTEGER 4 0 0 INTEGER ()) 0 0 () () 0 () () () 0 0)
24 'n' '' 'n' 23 ((VARIABLE INOUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 DUMMY)
(INTEGER 4 0 0 INTEGER ()) 0 0 () () 0 () () () 0 0)
25 'wfout' '' 'wfout' 23 ((VARIABLE OUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0
DIMENSION DUMMY) (REAL 8 0 0 REAL ()) 0 0 () (1 ASSUMED_SHAPE (CONSTANT
(INTEGER 4 0 0 INTEGER ()) 0 '1') ()) 0 () () () 0 0)
10 'n' '' 'n' 9 ((VARIABLE INOUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 DUMMY) 10 'n' '' 'n' 9 ((VARIABLE INOUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 DUMMY)
(INTEGER 4 0 0 INTEGER ()) 0 0 () () 0 () () () 0 0) (INTEGER 4 0 0 INTEGER ()) 0 0 () () 0 () () () 0 0)
11 'wfout' '' 'wfout' 9 ((VARIABLE OUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 11 'wfout' '' 'wfout' 9 ((VARIABLE OUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0
@ -253,16 +249,20 @@ DUMMY) (REAL 8 0 0 REAL ()) 0 0 () () 0 () () () 0 0)
DUMMY) (REAL 8 0 0 REAL ()) 0 0 () () 0 () () () 0 0) DUMMY) (REAL 8 0 0 REAL ()) 0 0 () () 0 () () () 0 0)
22 'n0' '' 'n0' 16 ((VARIABLE IN UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 DUMMY) 22 'n0' '' 'n0' 16 ((VARIABLE IN UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 DUMMY)
(INTEGER 4 0 0 INTEGER ()) 0 0 () () 0 () () () 0 0) (INTEGER 4 0 0 INTEGER ()) 0 0 () () 0 () () () 0 0)
44 'n' '' 'n' 43 ((VARIABLE INOUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 DUMMY)
(INTEGER 4 0 0 INTEGER ()) 0 0 () () 0 () () () 0 0)
45 'wf' '' 'wf' 43 ((VARIABLE OUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 45 'wf' '' 'wf' 43 ((VARIABLE OUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0
DIMENSION DUMMY) (REAL 8 0 0 REAL ()) 0 0 () (1 ASSUMED_SHAPE (CONSTANT DIMENSION DUMMY) (REAL 8 0 0 REAL ()) 0 0 () (1 ASSUMED_SHAPE (CONSTANT
(INTEGER 4 0 0 INTEGER ()) 0 '1') ()) 0 () () () 0 0) (INTEGER 4 0 0 INTEGER ()) 0 '1') ()) 0 () () () 0 0)
44 'n' '' 'n' 43 ((VARIABLE INOUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 DUMMY)
(INTEGER 4 0 0 INTEGER ()) 0 0 () () 0 () () () 0 0)
46 'bp' '' 'bp' 43 ((VARIABLE OUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 46 'bp' '' 'bp' 43 ((VARIABLE OUT UNKNOWN-PROC UNKNOWN UNKNOWN 0 0
DIMENSION DUMMY) (REAL 8 0 0 REAL ()) 0 0 () (1 ASSUMED_SHAPE (CONSTANT DIMENSION DUMMY) (REAL 8 0 0 REAL ()) 0 0 () (1 ASSUMED_SHAPE (CONSTANT
(INTEGER 4 0 0 INTEGER ()) 0 '1') ()) 0 () () () 0 0) (INTEGER 4 0 0 INTEGER ()) 0 '1') ()) 0 () () () 0 0)
47 'xmi' '' 'xmi' 43 ((VARIABLE IN UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 47 'xmi' '' 'xmi' 43 ((VARIABLE IN UNKNOWN-PROC UNKNOWN UNKNOWN 0 0
DUMMY) (REAL 8 0 0 REAL ()) 0 0 () () 0 () () () 0 0) DUMMY) (REAL 8 0 0 REAL ()) 0 0 () () 0 () () () 0 0)
48 'xma' '' 'xma' 43 ((VARIABLE IN UNKNOWN-PROC UNKNOWN UNKNOWN 0 0
DUMMY) (REAL 8 0 0 REAL ()) 0 0 () () 0 () () () 0 0)
49 'n0' '' 'n0' 43 ((VARIABLE IN UNKNOWN-PROC UNKNOWN UNKNOWN 0 0 DUMMY)
(INTEGER 4 0 0 INTEGER ()) 0 0 () () 0 () () () 0 0)
) )
('__convert_r4_r8' 0 8 'gausshe0' 0 2 'gaussla0' 0 7 'gaussle0' 0 3 ('__convert_r4_r8' 0 8 'gausshe0' 0 2 'gaussla0' 0 7 'gaussle0' 0 3

@ -2,69 +2,75 @@ import numpy as np
from wafo.spectrum.models import (Bretschneider, Jonswap, OchiHubble, Tmaspec, from wafo.spectrum.models import (Bretschneider, Jonswap, OchiHubble, Tmaspec,
Torsethaugen, McCormick, Wallop) Torsethaugen, McCormick, Wallop)
def test_bretschneider(): def test_bretschneider():
S = Bretschneider(Hm0=6.5,Tp=10) S = Bretschneider(Hm0=6.5, Tp=10)
vals = S((0,1,2,3)) vals = S((0, 1, 2, 3))
true_vals = np.array([ 0. , 1.69350993, 0.06352698, 0.00844783]) true_vals = np.array([0., 1.69350993, 0.06352698, 0.00844783])
assert((np.abs(vals-true_vals)<1e-7).all()) assert((np.abs(vals - true_vals) < 1e-7).all())
def test_if_jonswap_with_gamma_one_equals_bretschneider(): def test_if_jonswap_with_gamma_one_equals_bretschneider():
S = Jonswap(Hm0=7, Tp=11,gamma=1) S = Jonswap(Hm0=7, Tp=11, gamma=1)
vals = S((0,1,2,3)) vals = S((0, 1, 2, 3))
true_vals = np.array([ 0. , 1.42694133, 0.05051648, 0.00669692]) true_vals = np.array([0., 1.42694133, 0.05051648, 0.00669692])
assert((np.abs(vals-true_vals)<1e-7).all()) assert((np.abs(vals - true_vals) < 1e-7).all())
w = np.linspace(0,5) w = np.linspace(0, 5)
S2 = Bretschneider(Hm0=7, Tp=11) S2 = Bretschneider(Hm0=7, Tp=11)
#JONSWAP with gamma=1 should be equal to Bretscneider: # JONSWAP with gamma=1 should be equal to Bretscneider:
assert(np.all(np.abs(S(w)-S2(w))<1.e-7)) assert(np.all(np.abs(S(w) - S2(w)) < 1.e-7))
def test_tmaspec(): def test_tmaspec():
S = Tmaspec(Hm0=7, Tp=11,gamma=1,h=10) S = Tmaspec(Hm0=7, Tp=11, gamma=1, h=10)
vals = S((0,1,2,3)) vals = S((0, 1, 2, 3))
true_vals = np.array([ 0. , 0.70106233, 0.05022433, 0.00669692]) true_vals = np.array([0., 0.70106233, 0.05022433, 0.00669692])
assert((np.abs(vals-true_vals)<1e-7).all()) assert((np.abs(vals - true_vals) < 1e-7).all())
def test_torsethaugen(): def test_torsethaugen():
S = Torsethaugen(Hm0=7, Tp=11,gamma=1,h=10) S = Torsethaugen(Hm0=7, Tp=11, gamma=1, h=10)
vals = S((0,1,2,3)) vals = S((0, 1, 2, 3))
true_vals = np.array([ 0. , 1.19989709, 0.05819794, 0.0093541 ]) true_vals = np.array([0., 1.19989709, 0.05819794, 0.0093541])
assert((np.abs(vals-true_vals)<1e-7).all()) assert((np.abs(vals - true_vals) < 1e-7).all())
vals = S.wind(range(4)) vals = S.wind(range(4))
true_vals = np.array([ 0. , 1.13560528, 0.05529849, 0.00888989]) true_vals = np.array([0., 1.13560528, 0.05529849, 0.00888989])
assert((np.abs(vals-true_vals)<1e-7).all()) assert((np.abs(vals - true_vals) < 1e-7).all())
vals = S.swell(range(4)) vals = S.swell(range(4))
true_vals = np.array([ 0. , 0.0642918 , 0.00289946, 0.00046421]) true_vals = np.array([0., 0.0642918, 0.00289946, 0.00046421])
assert((np.abs(vals-true_vals)<1e-7).all()) assert((np.abs(vals - true_vals) < 1e-7).all())
def test_ochihubble(): def test_ochihubble():
S = OchiHubble(par=2) S = OchiHubble(par=2)
vals = S(range(4)) vals = S(range(4))
true_vals = np.array([ 0. , 0.90155636, 0.04185445, 0.00583207]) true_vals = np.array([0., 0.90155636, 0.04185445, 0.00583207])
assert((np.abs(vals-true_vals)<1e-7).all()) assert((np.abs(vals - true_vals) < 1e-7).all())
def test_mccormick(): def test_mccormick():
S = McCormick(Hm0=6.5,Tp=10) S = McCormick(Hm0=6.5, Tp=10)
vals = S(range(4)) vals = S(range(4))
true_vals = np.array([ 0. , 1.87865908, 0.15050447, 0.02994663]) true_vals = np.array([0., 1.87865908, 0.15050447, 0.02994663])
assert((np.abs(vals-true_vals)<1e-7).all()) assert((np.abs(vals - true_vals) < 1e-7).all())
def test_wallop(): def test_wallop():
S = Wallop(Hm0=6.5, Tp=10) S = Wallop(Hm0=6.5, Tp=10)
vals = S(range(4)) vals = S(range(4))
true_vals = np.array([ 0.00000000e+00, 9.36921871e-01, 2.76991078e-03, true_vals = np.array([0.00000000e+00, 9.36921871e-01, 2.76991078e-03,
7.72996150e-05]) 7.72996150e-05])
assert((np.abs(vals-true_vals)<1e-7).all()) assert((np.abs(vals - true_vals) < 1e-7).all())
if __name__ == '__main__': if __name__ == '__main__':
#main() # main()
import nose import nose
# nose.run() nose.run()
test_tmaspec() #test_tmaspec()

@ -1,87 +1,100 @@
import wafo.spectrum.models as sm import wafo.spectrum.models as sm
from wafo.spectrum import SpecData1D from wafo.spectrum import SpecData1D
import numpy as np import numpy as np
import unittest
def slow(f): def slow(f):
f.slow = True f.slow = True
return f return f
@slow
def test_tocovmatrix(): class TestSpectrum(unittest.TestCase):
Sj = sm.Jonswap()
S = Sj.tospecdata() @slow
acfmat = S.tocov_matrix(nr=3, nt=256, dt=0.1) def test_tocovmatrix(self):
vals = acfmat[:2,:] Sj = sm.Jonswap()
true_vals = np.array([[ 3.06073383, 0. , -1.67748256 , 0. ], S = Sj.tospecdata()
[ 3.05235423, -0.1674357 , -1.66811444, 0.18693242]]) acfmat = S.tocov_matrix(nr=3, nt=256, dt=0.1)
assert((np.abs(vals-true_vals)<1e-7).all()) vals = acfmat[:2, :]
true_vals = np.array([[3.06073383, 0.0000000, -1.67748256, 0.],
[3.05235423, -0.1674357, -1.66811444, 0.18693242]])
self.assertTrue((np.abs(vals - true_vals) < 1e-7).all())
def test_tocovdata(): def test_tocovdata():
Sj = sm.Jonswap() Sj = sm.Jonswap()
S = Sj.tospecdata() S = Sj.tospecdata()
Nt = len(S.data)-1 Nt = len(S.data) - 1
acf = S.tocovdata(nr=0, nt=Nt) acf = S.tocovdata(nr=0, nt=Nt)
vals = acf.data[:5] vals = acf.data[:5]
true_vals = np.array([3.06090339, 2.22658399, 0.45307391, -1.17495501, -2.05649042]) true_vals = np.array(
assert((np.abs(vals-true_vals)<1e-6).all()) [3.06090339, 2.22658399, 0.45307391, -1.17495501, -2.05649042])
assert((np.abs(vals - true_vals) < 1e-6).all())
def test_to_t_pdf(): def test_to_t_pdf():
''' Sj = sm.Jonswap()
The density of Tc is computed by: S = Sj.tospecdata()
>>> from wafo.spectrum import models as sm f = S.to_t_pdf(pdef='Tc', paramt=(0, 10, 51), speed=7, seed=100)
>>> Sj = sm.Jonswap() vals = ['%2.3f' % val for val in f.data[:10]]
>>> S = Sj.tospecdata() truevals = ['0.000', '0.014', '0.027', '0.040',
>>> f = S.to_t_pdf(pdef='Tc', paramt=(0, 10, 51), speed=7, seed=100) '0.050', '0.059', '0.067', '0.072', '0.077', '0.081']
>>> ['%2.3f' % val for val in f.data[:10]]
['0.000', '0.014', '0.027', '0.040', '0.050', '0.059', '0.067', '0.072', '0.077', '0.081'] # estimated error bounds
vals = ['%2.4f' % val for val in f.err[:10]]
estimated error bounds truevals = ['0.0000', '0.0003', '0.0003', '0.0004',
>>> ['%2.4f' % val for val in f.err[:10]] '0.0006', '0.0009', '0.0016', '0.0019', '0.0020', '0.0021']
['0.0000', '0.0003', '0.0003', '0.0004', '0.0006', '0.0009', '0.0016', '0.0019', '0.0020', '0.0021']
'''
@slow @slow
def test_sim(): def test_sim():
Sj = sm.Jonswap();S = Sj.tospecdata() Sj = sm.Jonswap()
ns =100; dt = .2 S = Sj.tospecdata()
x1 = S.sim(ns,dt=dt) ns = 100
dt = .2
x1 = S.sim(ns, dt=dt)
import scipy.stats as st import scipy.stats as st
x2 = S.sim(20000,20) x2 = S.sim(20000, 20)
truth1 = [0,np.sqrt(S.moment(1)[0]),0., 0.] truth1 = [0, np.sqrt(S.moment(1)[0]), 0., 0.]
funs = [np.mean,np.std,st.skew,st.kurtosis] funs = [np.mean, np.std, st.skew, st.kurtosis]
for fun,trueval in zip(funs,truth1): for fun, trueval in zip(funs, truth1):
res = fun(x2[:,1::], axis=0) res = fun(x2[:, 1::], axis=0)
m = res.mean() m = res.mean()
sa = res.std() sa = res.std()
#trueval, m, sa #trueval, m, sa
assert(np.abs(m-trueval)<sa) assert(np.abs(m - trueval) < sa)
@slow @slow
def test_sim_nl(): def test_sim_nl():
Sj = sm.Jonswap();S = Sj.tospecdata() Sj = sm.Jonswap()
ns =100; dt = .2 S = Sj.tospecdata()
x1 = S.sim_nl(ns,dt=dt) ns = 100
dt = .2
x1 = S.sim_nl(ns, dt=dt)
import numpy as np import numpy as np
import scipy.stats as st import scipy.stats as st
x2, x1 = S.sim_nl(ns=20000,cases=40) x2, x1 = S.sim_nl(ns=20000, cases=40)
truth1 = [0,np.sqrt(S.moment(1)[0][0])] + S.stats_nl(moments='sk') truth1 = [0, np.sqrt(S.moment(1)[0][0])] + S.stats_nl(moments='sk')
truth1[-1] = truth1[-1]-3 truth1[-1] = truth1[-1] - 3
#truth1 # truth1
#[0, 1.7495200310090633, 0.18673120577479801, 0.061988521262417606] #[0, 1.7495200310090633, 0.18673120577479801, 0.061988521262417606]
funs = [np.mean,np.std,st.skew,st.kurtosis] funs = [np.mean, np.std, st.skew, st.kurtosis]
for fun,trueval in zip(funs,truth1): for fun, trueval in zip(funs, truth1):
res = fun(x2[:,1::], axis=0) res = fun(x2[:, 1::], axis=0)
m = res.mean() m = res.mean()
sa = res.std() sa = res.std()
#trueval, m, sa #trueval, m, sa
assert(np.abs(m-trueval)<2*sa) assert(np.abs(m - trueval) < 2 * sa)
def test_stats_nl(): def test_stats_nl():
@ -90,10 +103,11 @@ def test_stats_nl():
Sj = sm.Jonswap(Hm0=Hs, Tp=11) Sj = sm.Jonswap(Hm0=Hs, Tp=11)
S = Sj.tospecdata() S = Sj.tospecdata()
me, va, sk, ku = S.stats_nl(moments='mvsk') me, va, sk, ku = S.stats_nl(moments='mvsk')
assert(me==0.0) assert(me == 0.0)
assert(va==3.0608203389019537) assert(va == 3.0608203389019537)
assert(sk==0.18673120577479801) assert(sk == 0.18673120577479801)
assert(ku==3.0619885212624176) assert(ku == 3.0619885212624176)
def test_testgaussian(): def test_testgaussian():
''' '''
@ -117,45 +131,50 @@ def test_testgaussian():
True True
''' '''
def test_moment(): def test_moment():
Sj = sm.Jonswap(Hm0=5) Sj = sm.Jonswap(Hm0=5)
S = Sj.tospecdata() #Make spectrum ob S = Sj.tospecdata() # Make spectrum ob
vals, txt = S.moment() vals, txt = S.moment()
true_vals = [1.5614600345079888, 0.95567089481941048] true_vals = [1.5614600345079888, 0.95567089481941048]
true_txt = ['m0', 'm0tt'] true_txt = ['m0', 'm0tt']
for tv,v in zip(true_vals, vals): for tv, v in zip(true_vals, vals):
assert(tv==v) assert(tv == v)
def test_nyquist_freq(): def test_nyquist_freq():
Sj = sm.Jonswap(Hm0=5) Sj = sm.Jonswap(Hm0=5)
S = Sj.tospecdata() #Make spectrum ob S = Sj.tospecdata() # Make spectrum ob
assert(S.nyquist_freq()==3.0) assert(S.nyquist_freq() == 3.0)
def test_sampling_period(): def test_sampling_period():
Sj = sm.Jonswap(Hm0=5) Sj = sm.Jonswap(Hm0=5)
S = Sj.tospecdata() #Make spectrum ob S = Sj.tospecdata() # Make spectrum ob
assert( S.sampling_period()== 1.0471975511965976) assert(S.sampling_period() == 1.0471975511965976)
def test_normalize(): def test_normalize():
Sj = sm.Jonswap(Hm0=5) Sj = sm.Jonswap(Hm0=5)
S = Sj.tospecdata() #Make spectrum ob S = Sj.tospecdata() # Make spectrum ob
S.moment(2) S.moment(2)
([1.5614600345079888, 0.95567089481941048], ['m0', 'm0tt']) ([1.5614600345079888, 0.95567089481941048], ['m0', 'm0tt'])
vals, txt = S.moment(2) vals, txt = S.moment(2)
true_vals = [1.5614600345079888, 0.95567089481941048] true_vals = [1.5614600345079888, 0.95567089481941048]
for tv,v in zip(true_vals, vals): for tv, v in zip(true_vals, vals):
assert(tv==v) assert(tv == v)
Sn = S.copy(); Sn = S.copy()
Sn.normalize() Sn.normalize()
#Now the moments should be one # Now the moments should be one
new_vals, txt = Sn.moment(2) new_vals, txt = Sn.moment(2)
for v in new_vals: for v in new_vals:
assert(np.abs(v-1.0)<1e-7) assert(np.abs(v - 1.0) < 1e-7)
def test_characteristic(): def test_characteristic():
''' '''
@ -180,14 +199,17 @@ def test_characteristic():
(array([ 4.99833578, 8.03139757]), array([[ 0.05292989, 0.02511371], (array([ 4.99833578, 8.03139757]), array([[ 0.05292989, 0.02511371],
[ 0.02511371, 0.0274645 ]]), ['Hm0', 'Tm02']) [ 0.02511371, 0.0274645 ]]), ['Hm0', 'Tm02'])
''' '''
def test_bandwidth(): def test_bandwidth():
Sj = sm.Jonswap(Hm0=3, Tp=7) Sj = sm.Jonswap(Hm0=3, Tp=7)
w = np.linspace(0,4,256) w = np.linspace(0, 4, 256)
S = SpecData1D(Sj(w),w) #Make spectrum object from numerical values S = SpecData1D(Sj(w), w) # Make spectrum object from numerical values
vals = S.bandwidth([0,1,2,3]) vals = S.bandwidth([0, 1, 2, 3])
true_vals = np.array([ 0.73062845, 0.34476034, 0.68277527, 2.90817052]) true_vals = np.array([0.73062845, 0.34476034, 0.68277527, 2.90817052])
assert((np.abs(vals-true_vals)<1e-7).all()) assert((np.abs(vals - true_vals) < 1e-7).all())
def test_docstrings(): def test_docstrings():
import doctest import doctest
@ -195,9 +217,9 @@ def test_docstrings():
if __name__ == '__main__': if __name__ == '__main__':
import nose import nose
#nose.run() nose.run()
#test_docstrings() # test_docstrings()
#test_tocovdata() # test_tocovdata()
#test_tocovmatrix() # test_tocovmatrix()
#test_sim() # test_sim()
#test_bandwidth() # test_bandwidth()

@ -128,7 +128,6 @@ class MovedAttribute(_LazyDescr):
return getattr(module, self.attr) return getattr(module, self.attr)
class _MovedItems(types.ModuleType): class _MovedItems(types.ModuleType):
"""Lazy loading of moved objects""" """Lazy loading of moved objects"""
@ -267,10 +266,12 @@ def iterkeys(d):
"""Return an iterator over the keys of a dictionary.""" """Return an iterator over the keys of a dictionary."""
return iter(getattr(d, _iterkeys)()) return iter(getattr(d, _iterkeys)())
def itervalues(d): def itervalues(d):
"""Return an iterator over the values of a dictionary.""" """Return an iterator over the values of a dictionary."""
return iter(getattr(d, _itervalues)()) return iter(getattr(d, _itervalues)())
def iteritems(d): def iteritems(d):
"""Return an iterator over the (key, value) pairs of a dictionary.""" """Return an iterator over the (key, value) pairs of a dictionary."""
return iter(getattr(d, _iteritems)()) return iter(getattr(d, _iteritems)())
@ -279,8 +280,10 @@ def iteritems(d):
if PY3: if PY3:
def b(s): def b(s):
return s.encode("latin-1") return s.encode("latin-1")
def u(s): def u(s):
return s return s
if sys.version_info[1] <= 1: if sys.version_info[1] <= 1:
def int2byte(i): def int2byte(i):
return bytes((i,)) return bytes((i,))
@ -293,6 +296,7 @@ if PY3:
else: else:
def b(s): def b(s):
return s return s
def u(s): def u(s):
return unicode(s, "unicode_escape") return unicode(s, "unicode_escape")
int2byte = chr int2byte = chr
@ -306,13 +310,11 @@ if PY3:
import builtins import builtins
exec_ = getattr(builtins, "exec") exec_ = getattr(builtins, "exec")
def reraise(tp, value, tb=None): def reraise(tp, value, tb=None):
if value.__traceback__ is not tb: if value.__traceback__ is not tb:
raise value.with_traceback(tb) raise value.with_traceback(tb)
raise value raise value
print_ = getattr(builtins, "print") print_ = getattr(builtins, "print")
del builtins del builtins
@ -329,17 +331,16 @@ else:
locs = globs locs = globs
exec("""exec code in globs, locs""") exec("""exec code in globs, locs""")
exec_("""def reraise(tp, value, tb=None): exec_("""def reraise(tp, value, tb=None):
raise tp, value, tb raise tp, value, tb
""") """)
def print_(*args, **kwargs): def print_(*args, **kwargs):
"""The new-style print function.""" """The new-style print function."""
fp = kwargs.pop("file", sys.stdout) fp = kwargs.pop("file", sys.stdout)
if fp is None: if fp is None:
return return
def write(data): def write(data):
if not isinstance(data, basestring): if not isinstance(data, basestring):
data = str(data) data = str(data)

@ -0,0 +1,390 @@
from __future__ import division, print_function, absolute_import
import warnings
import numpy.testing as npt
import numpy as np
import nose
from wafo import stats
"""
Test all continuous distributions.
Parameters were chosen for those distributions that pass the
Kolmogorov-Smirnov test. This provides safe parameters for each
distributions so that we can perform further testing of class methods.
These tests currently check only/mostly for serious errors and exceptions,
not for numerically exact results.
TODO:
* make functioning test for skew and kurtosis
still known failures - skip for now
"""
#currently not used
DECIMAL = 5 # specify the precision of the tests # increased from 0 to 5
DECIMAL_kurt = 0
distcont = [
['alpha', (3.5704770516650459,)],
['anglit', ()],
['arcsine', ()],
['beta', (2.3098496451481823, 0.62687954300963677)],
['betaprime', (5, 6)], # avoid unbound error in entropy with (100, 86)],
['bradford', (0.29891359763170633,)],
['burr', (10.5, 4.3)], #incorrect mean and var for(0.94839838075366045, 4.3820284068855795)],
['cauchy', ()],
['chi', (78,)],
['chi2', (55,)],
['cosine', ()],
['dgamma', (1.1023326088288166,)],
['dweibull', (2.0685080649914673,)],
['erlang', (20,)], #correction numargs = 1
['expon', ()],
['exponpow', (2.697119160358469,)],
['exponweib', (2.8923945291034436, 1.9505288745913174)],
['f', (29, 18)],
['fatiguelife', (29,)], #correction numargs = 1
['fisk', (3.0857548622253179,)],
['foldcauchy', (4.7164673455831894,)],
['foldnorm', (1.9521253373555869,)],
['frechet_l', (3.6279911255583239,)],
['frechet_r', (1.8928171603534227,)],
['gamma', (1.9932305483800778,)],
['gausshyper', (13.763771604130699, 3.1189636648681431,
2.5145980350183019, 5.1811649903971615)], #veryslow
['genexpon', (9.1325976465418908, 16.231956600590632, 3.2819552690843983)],
['genextreme', (-0.1,)], # sample mean test fails for (3.3184017469423535,)],
['gengamma', (4.4162385429431925, 3.1193091679242761)],
['genhalflogistic', (0.77274727809929322,)],
['genlogistic', (0.41192440799679475,)],
['genpareto', (0.1,)], # use case with finite moments
['gilbrat', ()],
['gompertz', (0.94743713075105251,)],
['gumbel_l', ()],
['gumbel_r', ()],
['halfcauchy', ()],
['halflogistic', ()],
['halfnorm', ()],
['hypsecant', ()],
['invgamma', (2.0668996136993067,)],
['invgauss', (0.14546264555347513,)],
['invweibull', (10.58,)], # sample mean test fails at(0.58847112119264788,)]
['johnsonsb', (4.3172675099141058, 3.1837781130785063)],
['johnsonsu', (2.554395574161155, 2.2482281679651965)],
['ksone', (1000,)], #replace 22 by 100 to avoid failing range, ticket 956
['kstwobign', ()],
['laplace', ()],
['levy', ()],
['levy_l', ()],
# ['levy_stable', (0.35667405469844993,
# -0.67450531578494011)], #NotImplementedError
# rvs not tested
['loggamma', (0.41411931826052117,)],
['logistic', ()],
['loglaplace', (3.2505926592051435,)],
['lognorm', (0.95368226960575331,)],
['lomax', (1.8771398388773268,)],
['maxwell', ()],
['mielke', (10.4, 3.6)], # sample mean test fails for (4.6420495492121487, 0.59707419545516938)],
# mielke: good results if 2nd parameter >2, weird mean or var below
['nakagami', (4.9673794866666237,)],
['ncf', (27, 27, 0.41578441799226107)],
['nct', (14, 0.24045031331198066)],
['ncx2', (21, 1.0560465975116415)],
['norm', ()],
['pareto', (2.621716532144454,)],
['pearson3', (0.1,)],
['powerlaw', (1.6591133289905851,)],
['powerlognorm', (2.1413923530064087, 0.44639540782048337)],
['powernorm', (4.4453652254590779,)],
['rayleigh', ()],
['rdist', (0.9,)], # feels also slow
# ['rdist', (3.8266985793976525,)], #veryslow, especially rvs
#['rdist', (541.0,)], # from ticket #758 #veryslow
['recipinvgauss', (0.63004267809369119,)],
['reciprocal', (0.0062309367010521255, 1.0062309367010522)],
['rice', (0.7749725210111873,)],
['semicircular', ()],
['t', (2.7433514990818093,)],
['triang', (0.15785029824528218,)],
['truncexpon', (4.6907725456810478,)],
['truncnorm', (-1.0978730080013919, 2.7306754109031979)],
['tukeylambda', (3.1321477856738267,)],
['uniform', ()],
['vonmises', (3.9939042581071398,)],
['wald', ()],
['weibull_max', (2.8687961709100187,)],
['weibull_min', (1.7866166930421596,)],
['wrapcauchy', (0.031071279018614728,)]]
# for testing only specific functions
##distcont = [
## ['erlang', (20,)], #correction numargs = 1
## ['fatiguelife', (29,)], #correction numargs = 1
## ['loggamma', (0.41411931826052117,)]]
# for testing ticket:767
##distcont = [
## ['genextreme', (3.3184017469423535,)],
## ['genextreme', (0.01,)],
## ['genextreme', (0.00001,)],
## ['genextreme', (0.0,)],
## ['genextreme', (-0.01,)]
## ]
##distcont = [['gumbel_l', ()],
## ['gumbel_r', ()],
## ['norm', ()]
## ]
##distcont = [['norm', ()]]
distmissing = ['wald', 'gausshyper', 'genexpon', 'rv_continuous',
'loglaplace', 'rdist', 'semicircular', 'invweibull', 'ksone',
'cosine', 'kstwobign', 'truncnorm', 'mielke', 'recipinvgauss', 'levy',
'johnsonsu', 'levy_l', 'powernorm', 'wrapcauchy',
'johnsonsb', 'truncexpon', 'rice', 'invgauss', 'invgamma',
'powerlognorm']
distmiss = [[dist,args] for dist,args in distcont if dist in distmissing]
distslow = ['rdist', 'gausshyper', 'recipinvgauss', 'ksone', 'genexpon',
'vonmises', 'rice', 'mielke', 'semicircular', 'cosine', 'invweibull',
'powerlognorm', 'johnsonsu', 'kstwobign']
#distslow are sorted by speed (very slow to slow)
def _silence_fp_errors(func):
def wrap(*a, **kw):
olderr = np.seterr(all='ignore')
try:
return func(*a, **kw)
finally:
np.seterr(**olderr)
wrap.__name__ = func.__name__
return wrap
@_silence_fp_errors
def test_cont_basic():
# this test skips slow distributions
for distname, arg in distcont[:]:
if distname in distslow:
continue
distfn = getattr(stats, distname)
np.random.seed(765456)
sn = 1000
rvs = distfn.rvs(size=sn,*arg)
sm = rvs.mean()
sv = rvs.var()
skurt = stats.kurtosis(rvs)
sskew = stats.skew(rvs)
m,v = distfn.stats(*arg)
yield check_sample_meanvar_, distfn, arg, m, v, sm, sv, sn, distname + \
'sample mean test'
# the sample skew kurtosis test has known failures, not very good distance measure
#yield check_sample_skew_kurt, distfn, arg, sskew, skurt, distname
yield check_moment, distfn, arg, m, v, distname
yield check_cdf_ppf, distfn, arg, distname
yield check_sf_isf, distfn, arg, distname
yield check_pdf, distfn, arg, distname
if distname in ['wald']:
continue
yield check_pdf_logpdf, distfn, arg, distname
yield check_cdf_logcdf, distfn, arg, distname
yield check_sf_logsf, distfn, arg, distname
if distname in distmissing:
alpha = 0.01
yield check_distribution_rvs, distname, arg, alpha, rvs
@npt.dec.slow
def test_cont_basic_slow():
# same as above for slow distributions
for distname, arg in distcont[:]:
if distname not in distslow: continue
distfn = getattr(stats, distname)
np.random.seed(765456)
sn = 1000
rvs = distfn.rvs(size=sn,*arg)
sm = rvs.mean()
sv = rvs.var()
skurt = stats.kurtosis(rvs)
sskew = stats.skew(rvs)
m,v = distfn.stats(*arg)
yield check_sample_meanvar_, distfn, arg, m, v, sm, sv, sn, distname + \
'sample mean test'
# the sample skew kurtosis test has known failures, not very good distance measure
#yield check_sample_skew_kurt, distfn, arg, sskew, skurt, distname
yield check_moment, distfn, arg, m, v, distname
yield check_cdf_ppf, distfn, arg, distname
yield check_sf_isf, distfn, arg, distname
yield check_pdf, distfn, arg, distname
yield check_pdf_logpdf, distfn, arg, distname
yield check_cdf_logcdf, distfn, arg, distname
yield check_sf_logsf, distfn, arg, distname
#yield check_oth, distfn, arg # is still missing
if distname in distmissing:
alpha = 0.01
yield check_distribution_rvs, distname, arg, alpha, rvs
@_silence_fp_errors
def check_moment(distfn, arg, m, v, msg):
m1 = distfn.moment(1,*arg)
m2 = distfn.moment(2,*arg)
if not np.isinf(m):
npt.assert_almost_equal(m1, m, decimal=10, err_msg= msg + \
' - 1st moment')
else: # or np.isnan(m1),
npt.assert_(np.isinf(m1),
msg + ' - 1st moment -infinite, m1=%s' % str(m1))
#np.isnan(m1) temporary special treatment for loggamma
if not np.isinf(v):
npt.assert_almost_equal(m2-m1*m1, v, decimal=10, err_msg= msg + \
' - 2ndt moment')
else: #or np.isnan(m2),
npt.assert_(np.isinf(m2),
msg + ' - 2nd moment -infinite, m2=%s' % str(m2))
#np.isnan(m2) temporary special treatment for loggamma
@_silence_fp_errors
def check_sample_meanvar_(distfn, arg, m, v, sm, sv, sn, msg):
#this did not work, skipped silently by nose
#check_sample_meanvar, sm, m, msg + 'sample mean test'
#check_sample_meanvar, sv, v, msg + 'sample var test'
if not np.isinf(m):
check_sample_mean(sm, sv, sn, m)
if not np.isinf(v):
check_sample_var(sv, sn, v)
## check_sample_meanvar( sm, m, msg + 'sample mean test')
## check_sample_meanvar( sv, v, msg + 'sample var test')
def check_sample_mean(sm,v,n, popmean):
"""
from stats.stats.ttest_1samp(a, popmean):
Calculates the t-obtained for the independent samples T-test on ONE group
of scores a, given a population mean.
Returns: t-value, two-tailed prob
"""
## a = asarray(a)
## x = np.mean(a)
## v = np.var(a, ddof=1)
## n = len(a)
df = n-1
svar = ((n-1)*v) / float(df) #looks redundant
t = (sm-popmean)/np.sqrt(svar*(1.0/n))
prob = stats.betai(0.5*df,0.5,df/(df+t*t))
#return t,prob
npt.assert_(prob > 0.01, 'mean fail, t,prob = %f, %f, m,sm=%f,%f' % (t,prob,popmean,sm))
def check_sample_var(sv,n, popvar):
'''
two-sided chisquare test for sample variance equal to hypothesized variance
'''
df = n-1
chi2 = (n-1)*popvar/float(popvar)
pval = stats.chisqprob(chi2,df)*2
npt.assert_(pval > 0.01, 'var fail, t,pval = %f, %f, v,sv=%f,%f' % (chi2,pval,popvar,sv))
def check_sample_skew_kurt(distfn, arg, ss, sk, msg):
skew,kurt = distfn.stats(moments='sk',*arg)
## skew = distfn.stats(moment='s',*arg)[()]
## kurt = distfn.stats(moment='k',*arg)[()]
check_sample_meanvar( sk, kurt, msg + 'sample kurtosis test')
check_sample_meanvar( ss, skew, msg + 'sample skew test')
def check_sample_meanvar(sm,m,msg):
if not np.isinf(m) and not np.isnan(m):
npt.assert_almost_equal(sm, m, decimal=DECIMAL, err_msg= msg + \
' - finite moment')
## else:
## npt.assert_(abs(sm) > 10000), msg='infinite moment, sm = ' + str(sm))
@_silence_fp_errors
def check_cdf_ppf(distfn,arg,msg):
values = [0.001, 0.5, 0.999]
npt.assert_almost_equal(distfn.cdf(distfn.ppf(values, *arg), *arg),
values, decimal=DECIMAL, err_msg= msg + \
' - cdf-ppf roundtrip')
@_silence_fp_errors
def check_sf_isf(distfn,arg,msg):
npt.assert_almost_equal(distfn.sf(distfn.isf([0.1,0.5,0.9], *arg), *arg),
[0.1,0.5,0.9], decimal=DECIMAL, err_msg= msg + \
' - sf-isf roundtrip')
npt.assert_almost_equal(distfn.cdf([0.1,0.9], *arg),
1.0-distfn.sf([0.1,0.9], *arg),
decimal=DECIMAL, err_msg= msg + \
' - cdf-sf relationship')
@_silence_fp_errors
def check_pdf(distfn, arg, msg):
# compares pdf at median with numerical derivative of cdf
median = distfn.ppf(0.5, *arg)
eps = 1e-6
pdfv = distfn.pdf(median, *arg)
if (pdfv < 1e-4) or (pdfv > 1e4):
# avoid checking a case where pdf is close to zero or huge (singularity)
median = median + 0.1
pdfv = distfn.pdf(median, *arg)
cdfdiff = (distfn.cdf(median + eps, *arg) -
distfn.cdf(median - eps, *arg))/eps/2.0
#replace with better diff and better test (more points),
#actually, this works pretty well
npt.assert_almost_equal(pdfv, cdfdiff,
decimal=DECIMAL, err_msg= msg + ' - cdf-pdf relationship')
@_silence_fp_errors
def check_pdf_logpdf(distfn, args, msg):
# compares pdf at several points with the log of the pdf
points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
vals = distfn.ppf(points, *args)
pdf = distfn.pdf(vals, *args)
logpdf = distfn.logpdf(vals, *args)
pdf = pdf[pdf != 0]
logpdf = logpdf[np.isfinite(logpdf)]
npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg + " - logpdf-log(pdf) relationship")
@_silence_fp_errors
def check_sf_logsf(distfn, args, msg):
# compares sf at several points with the log of the sf
points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
vals = distfn.ppf(points, *args)
sf = distfn.sf(vals, *args)
logsf = distfn.logsf(vals, *args)
sf = sf[sf != 0]
logsf = logsf[np.isfinite(logsf)]
npt.assert_almost_equal(np.log(sf), logsf, decimal=7, err_msg=msg + " - logsf-log(sf) relationship")
@_silence_fp_errors
def check_cdf_logcdf(distfn, args, msg):
# compares cdf at several points with the log of the cdf
points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
vals = distfn.ppf(points, *args)
cdf = distfn.cdf(vals, *args)
logcdf = distfn.logcdf(vals, *args)
cdf = cdf[cdf != 0]
logcdf = logcdf[np.isfinite(logcdf)]
npt.assert_almost_equal(np.log(cdf), logcdf, decimal=7, err_msg=msg + " - logcdf-log(cdf) relationship")
@_silence_fp_errors
def check_distribution_rvs(dist, args, alpha, rvs):
#test from scipy.stats.tests
#this version reuses existing random variables
D,pval = stats.kstest(rvs, dist, args=args, N=1000)
if (pval < alpha):
D,pval = stats.kstest(dist,'',args=args, N=1000)
npt.assert_(pval > alpha, "D = " + str(D) + "; pval = " + str(pval) +
"; alpha = " + str(alpha) + "\nargs = " + str(args))
if __name__ == "__main__":
#nose.run(argv=['', __file__])
nose.runmodule(argv=[__file__,'-s'], exit=False)

@ -0,0 +1,102 @@
# contains additional tests for continuous distributions
#
# NOTE: one test, _est_cont_skip, that is renamed so that nose doesn't
# run it,
# 6 distributions return nan for entropy
# truncnorm fails by design for private method _ppf test
from __future__ import division, print_function, absolute_import
import numpy.testing as npt
import numpy as np
from wafo import stats
from test_continuous_basic import distcont
DECIMAL = 5
@npt.dec.slow
def test_cont_extra():
for distname, arg in distcont[:]:
distfn = getattr(stats, distname)
yield check_ppf_limits, distfn, arg, distname + \
' ppf limit test'
yield check_isf_limits, distfn, arg, distname + \
' isf limit test'
yield check_loc_scale, distfn, arg, distname + \
' loc, scale test'
@npt.dec.slow
def _est_cont_skip():
for distname, arg in distcont:
distfn = getattr(stats, distname)
#entropy test checks only for isnan, currently 6 isnan left
yield check_entropy, distfn, arg, distname + \
' entropy nan test'
# _ppf test has 1 failure be design
yield check_ppf_private, distfn, arg, distname + \
' _ppf private test'
def test_540_567():
# test for nan returned in tickets 540, 567
npt.assert_almost_equal(stats.norm.cdf(-1.7624320982),0.03899815971089126,
decimal=10, err_msg = 'test_540_567')
npt.assert_almost_equal(stats.norm.cdf(-1.7624320983),0.038998159702449846,
decimal=10, err_msg = 'test_540_567')
npt.assert_almost_equal(stats.norm.cdf(1.38629436112, loc=0.950273420309,
scale=0.204423758009),0.98353464004309321,
decimal=10, err_msg = 'test_540_567')
def check_ppf_limits(distfn,arg,msg):
below,low,upp,above = distfn.ppf([-1,0,1,2], *arg)
#print distfn.name, distfn.a, low, distfn.b, upp
#print distfn.name,below,low,upp,above
assert_equal_inf_nan(distfn.a,low, msg + 'ppf lower bound')
assert_equal_inf_nan(distfn.b,upp, msg + 'ppf upper bound')
npt.assert_(np.isnan(below), msg + 'ppf out of bounds - below')
npt.assert_(np.isnan(above), msg + 'ppf out of bounds - above')
def check_ppf_private(distfn,arg,msg):
#fails by design for trunk norm self.nb not defined
ppfs = distfn._ppf(np.array([0.1,0.5,0.9]), *arg)
npt.assert_(not np.any(np.isnan(ppfs)), msg + 'ppf private is nan')
def check_isf_limits(distfn,arg,msg):
below,low,upp,above = distfn.isf([-1,0,1,2], *arg)
#print distfn.name, distfn.a, low, distfn.b, upp
#print distfn.name,below,low,upp,above
assert_equal_inf_nan(distfn.a,upp, msg + 'isf lower bound')
assert_equal_inf_nan(distfn.b,low, msg + 'isf upper bound')
npt.assert_(np.isnan(below), msg + 'isf out of bounds - below')
npt.assert_(np.isnan(above), msg + 'isf out of bounds - above')
def check_loc_scale(distfn,arg,msg):
m,v = distfn.stats(*arg)
loc, scale = 10.0, 10.0
mt,vt = distfn.stats(loc=loc, scale=scale, *arg)
assert_equal_inf_nan(m*scale+loc,mt,msg + 'mean')
assert_equal_inf_nan(v*scale*scale,vt,msg + 'var')
def check_entropy(distfn,arg,msg):
ent = distfn.entropy(*arg)
#print 'Entropy =', ent
npt.assert_(not np.isnan(ent), msg + 'test Entropy is nan')
def assert_equal_inf_nan(v1,v2,msg):
npt.assert_(not np.isnan(v1))
if not np.isinf(v1):
npt.assert_almost_equal(v1, v2, decimal=DECIMAL, err_msg = msg + \
' - finite')
else:
npt.assert_(np.isinf(v2) or np.isnan(v2),
msg + ' - infinite, v2=%s' % str(v2))
if __name__ == "__main__":
import nose
#nose.run(argv=['', __file__])
nose.runmodule(argv=[__file__,'-s'], exit=False)

@ -0,0 +1,268 @@
import numpy.testing as npt
import numpy as np
import nose
from wafo import stats
DECIMAL_meanvar = 0#1 # was 0
distdiscrete = [
['bernoulli',(0.3,)],
['binom', (5, 0.4)],
['boltzmann',(1.4, 19)],
['dlaplace', (0.8,)], #0.5
['geom', (0.5,)],
['hypergeom',(30, 12, 6)],
['hypergeom',(21,3,12)], #numpy.random (3,18,12) numpy ticket:921
['hypergeom',(21,18,11)], #numpy.random (18,3,11) numpy ticket:921
['logser', (0.6,)], # reenabled, numpy ticket:921
['nbinom', (5, 0.5)],
['nbinom', (0.4, 0.4)], #from tickets: 583
['planck', (0.51,)], #4.1
['poisson', (0.6,)],
['randint', (7, 31)],
['skellam', (15, 8)]]
# ['zipf', (4,)] ] # arg=4 is ok,
# Zipf broken for arg = 2, e.g. weird .stats
# looking closer, mean, var should be inf for arg=2
#@npt.dec.slow
def test_discrete_basic():
for distname, arg in distdiscrete:
distfn = getattr(stats,distname)
#assert stats.dlaplace.rvs(0.8) is not None
np.random.seed(9765456)
rvs = distfn.rvs(size=2000,*arg)
supp = np.unique(rvs)
m,v = distfn.stats(*arg)
#yield npt.assert_almost_equal(rvs.mean(), m, decimal=4,err_msg='mean')
#yield npt.assert_almost_equal, rvs.mean(), m, 2, 'mean' # does not work
yield check_sample_meanvar, rvs.mean(), m, distname + ' sample mean test'
yield check_sample_meanvar, rvs.var(), v, distname + ' sample var test'
yield check_cdf_ppf, distfn, arg, distname + ' cdf_ppf'
yield check_cdf_ppf2, distfn, arg, supp, distname + ' cdf_ppf'
yield check_pmf_cdf, distfn, arg, distname + ' pmf_cdf'
# zipf doesn't fail, but generates floating point warnings.
# Should be checked.
if not distname in ['zipf']:
yield check_oth, distfn, arg, distname + ' oth'
skurt = stats.kurtosis(rvs)
sskew = stats.skew(rvs)
yield check_sample_skew_kurt, distfn, arg, skurt, sskew, \
distname + ' skew_kurt'
# dlaplace doesn't fail, but generates lots of floating point warnings.
# Should be checked.
if not distname in ['dlaplace']: #['logser']: #known failure, fixed
alpha = 0.01
yield check_discrete_chisquare, distfn, arg, rvs, alpha, \
distname + ' chisquare'
@npt.dec.slow
def test_discrete_extra():
for distname, arg in distdiscrete:
distfn = getattr(stats,distname)
yield check_ppf_limits, distfn, arg, distname + \
' ppf limit test'
yield check_isf_limits, distfn, arg, distname + \
' isf limit test'
yield check_entropy, distfn, arg, distname + \
' entropy nan test'
@npt.dec.skipif(True)
def test_discrete_private():
#testing private methods mostly for debugging
# some tests might fail by design,
# e.g. incorrect definition of distfn.a and distfn.b
for distname, arg in distdiscrete:
distfn = getattr(stats,distname)
rvs = distfn.rvs(size=10000,*arg)
m,v = distfn.stats(*arg)
yield check_ppf_ppf, distfn, arg
yield check_cdf_ppf_private, distfn, arg, distname
yield check_generic_moment, distfn, arg, m, 1, 3 # last is decimal
yield check_generic_moment, distfn, arg, v+m*m, 2, 3 # last is decimal
yield check_moment_frozen, distfn, arg, m, 1, 3 # last is decimal
yield check_moment_frozen, distfn, arg, v+m*m, 2, 3 # last is decimal
def check_sample_meanvar(sm,m,msg):
if not np.isinf(m):
npt.assert_almost_equal(sm, m, decimal=DECIMAL_meanvar, err_msg=msg + \
' - finite moment')
else:
npt.assert_(sm > 10000, msg='infinite moment, sm = ' + str(sm))
def check_sample_var(sm,m,msg):
npt.assert_almost_equal(sm, m, decimal=DECIMAL_meanvar, err_msg= msg + 'var')
def check_cdf_ppf(distfn,arg,msg):
ppf05 = distfn.ppf(0.5,*arg)
cdf05 = distfn.cdf(ppf05,*arg)
npt.assert_almost_equal(distfn.ppf(cdf05-1e-6,*arg),ppf05,
err_msg=msg + 'ppf-cdf-median')
npt.assert_((distfn.ppf(cdf05+1e-4,*arg)>ppf05), msg + 'ppf-cdf-next')
def check_cdf_ppf2(distfn,arg,supp,msg):
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp,*arg),*arg),
supp, msg + '-roundtrip')
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp,*arg)-1e-8,*arg),
supp, msg + '-roundtrip')
# -1e-8 could cause an error if pmf < 1e-8
def check_cdf_ppf_private(distfn,arg,msg):
ppf05 = distfn._ppf(0.5,*arg)
cdf05 = distfn.cdf(ppf05,*arg)
npt.assert_almost_equal(distfn._ppf(cdf05-1e-6,*arg),ppf05,
err_msg=msg + '_ppf-cdf-median ')
npt.assert_((distfn._ppf(cdf05+1e-4,*arg)>ppf05), msg + '_ppf-cdf-next')
def check_ppf_ppf(distfn, arg):
npt.assert_(distfn.ppf(0.5,*arg) < np.inf)
ppfs = distfn.ppf([0.5,0.9],*arg)
ppf_s = [distfn._ppf(0.5,*arg), distfn._ppf(0.9,*arg)]
npt.assert_(np.all(ppfs < np.inf))
npt.assert_(ppf_s[0] == distfn.ppf(0.5,*arg))
npt.assert_(ppf_s[1] == distfn.ppf(0.9,*arg))
npt.assert_(ppf_s[0] == ppfs[0])
npt.assert_(ppf_s[1] == ppfs[1])
def check_pmf_cdf(distfn, arg, msg):
startind = np.int(distfn._ppf(0.01,*arg)-1)
index = range(startind,startind+10)
cdfs = distfn.cdf(index,*arg)
npt.assert_almost_equal(cdfs, distfn.pmf(index, *arg).cumsum() + \
cdfs[0] - distfn.pmf(index[0],*arg),
decimal=4, err_msg= msg + 'pmf-cdf')
def check_generic_moment(distfn, arg, m, k, decim):
npt.assert_almost_equal(distfn.generic_moment(k,*arg), m, decimal=decim,
err_msg= str(distfn) + ' generic moment test')
def check_moment_frozen(distfn, arg, m, k, decim):
npt.assert_almost_equal(distfn(*arg).moment(k), m, decimal=decim,
err_msg= str(distfn) + ' frozen moment test')
def check_oth(distfn, arg, msg):
#checking other methods of distfn
meanint = round(float(distfn.stats(*arg)[0])) # closest integer to mean
npt.assert_almost_equal(distfn.sf(meanint, *arg), 1 - \
distfn.cdf(meanint, *arg), decimal=8)
median_sf = distfn.isf(0.5, *arg)
npt.assert_(distfn.sf(median_sf - 1, *arg) > 0.5)
npt.assert_(distfn.cdf(median_sf + 1, *arg) > 0.5)
npt.assert_equal(distfn.isf(0.5, *arg), distfn.ppf(0.5, *arg))
#next 3 functions copied from test_continous_extra
# adjusted
def check_ppf_limits(distfn,arg,msg):
below,low,upp,above = distfn.ppf([-1,0,1,2], *arg)
#print distfn.name, distfn.a, low, distfn.b, upp
#print distfn.name,below,low,upp,above
assert_equal_inf_nan(distfn.a-1,low, msg + 'ppf lower bound')
assert_equal_inf_nan(distfn.b,upp, msg + 'ppf upper bound')
npt.assert_(np.isnan(below), msg + 'ppf out of bounds - below')
npt.assert_(np.isnan(above), msg + 'ppf out of bounds - above')
def check_isf_limits(distfn,arg,msg):
below,low,upp,above = distfn.isf([-1,0,1,2], *arg)
#print distfn.name, distfn.a, low, distfn.b, upp
#print distfn.name,below,low,upp,above
assert_equal_inf_nan(distfn.a-1,upp, msg + 'isf lower bound')
assert_equal_inf_nan(distfn.b,low, msg + 'isf upper bound')
npt.assert_(np.isnan(below), msg + 'isf out of bounds - below')
npt.assert_(np.isnan(above), msg + 'isf out of bounds - above')
def assert_equal_inf_nan(v1,v2,msg):
npt.assert_(not np.isnan(v1))
if not np.isinf(v1):
npt.assert_almost_equal(v1, v2, decimal=10, err_msg = msg + \
' - finite')
else:
npt.assert_(np.isinf(v2) or np.isnan(v2),
msg + ' - infinite, v2=%s' % str(v2))
def check_sample_skew_kurt(distfn, arg, sk, ss, msg):
k,s = distfn.stats(moment='ks',*arg)
check_sample_meanvar, sk, k, msg + 'sample skew test'
check_sample_meanvar, ss, s, msg + 'sample kurtosis test'
def check_entropy(distfn,arg,msg):
ent = distfn.entropy(*arg)
#print 'Entropy =', ent
npt.assert_(not np.isnan(ent), msg + 'test Entropy is nan')
def check_discrete_chisquare(distfn, arg, rvs, alpha, msg):
'''perform chisquare test for random sample of a discrete distribution
Parameters
----------
distname : string
name of distribution function
arg : sequence
parameters of distribution
alpha : float
significance level, threshold for p-value
Returns
-------
result : bool
0 if test passes, 1 if test fails
uses global variable debug for printing results
'''
# define parameters for test
## n=2000
n = len(rvs)
nsupp = 20
wsupp = 1.0/nsupp
## distfn = getattr(stats, distname)
## np.random.seed(9765456)
## rvs = distfn.rvs(size=n,*arg)
# construct intervals with minimum mass 1/nsupp
# intervalls are left-half-open as in a cdf difference
distsupport = xrange(max(distfn.a, -1000), min(distfn.b, 1000) + 1)
last = 0
distsupp = [max(distfn.a, -1000)]
distmass = []
for ii in distsupport:
current = distfn.cdf(ii,*arg)
if current - last >= wsupp-1e-14:
distsupp.append(ii)
distmass.append(current - last)
last = current
if current > (1-wsupp):
break
if distsupp[-1] < distfn.b:
distsupp.append(distfn.b)
distmass.append(1-last)
distsupp = np.array(distsupp)
distmass = np.array(distmass)
# convert intervals to right-half-open as required by histogram
histsupp = distsupp+1e-8
histsupp[0] = distfn.a
# find sample frequencies and perform chisquare test
freq,hsupp = np.histogram(rvs,histsupp)
cdfs = distfn.cdf(distsupp,*arg)
(chis,pval) = stats.chisquare(np.array(freq),n*distmass)
npt.assert_(pval > alpha, 'chisquare - test for %s'
' at arg = %s with pval = %s' % (msg,str(arg),str(pval)))
if __name__ == "__main__":
#nose.run(argv=['', __file__])
nose.runmodule(argv=[__file__,'-s'], exit=False)

@ -0,0 +1,98 @@
# NOTE: contains only one test, _est_cont_fit, that is renamed so that
# nose doesn't run it
# I put this here for the record and for the case when someone wants to
# verify the quality of fit
# with current parameters: relatively small sample size, default starting values
# Ran 84 tests in 401.797s
# FAILED (failures=15)
#
#Ran 83 tests in 238.859s
#FAILED (failures=12)
from __future__ import division, print_function, absolute_import
import numpy.testing as npt
import numpy as np
from wafo import stats
from test_continuous_basic import distcont
# this is not a proper statistical test for convergence, but only
# verifies that the estimate and true values don't differ by too much
n_repl1 = 1000 # sample size for first run
n_repl2 = 5000 # sample size for second run, if first run fails
thresh_percent = 0.25 # percent of true parameters for fail cut-off
thresh_min = 0.75 # minimum difference estimate - true to fail test
distslow = [ 'ncx2', 'rdist', 'gausshyper', 'recipinvgauss', 'ksone', 'genexpon',
'vonmises', 'rice', 'mielke',
'powerlognorm', 'kstwobign', 'tukeylambda','betaprime', 'gengamma',
'johnsonsb', 'burr', 'truncexpon', 'pearson3', 'exponweib', 'nakagami',
'wrapcauchy']
dist_rarely_fitted = ['f', 'ncf', 'nct', 'chi']
distskip = distslow + dist_rarely_fitted
#distcont = [['genextreme', (3.3184017469423535,)]]
#@npt.dec.slow
def test_cont_fit():
# this tests the closeness of the estimated parameters to the true
# parameters with fit method of continuous distributions
for distname, arg in distcont:
if distname not in distskip:
yield check_cont_fit, distname,arg
@npt.dec.slow
def _est_cont_fit_slow():
# this tests the closeness of the estimated parameters to the true
# parameters with fit method of continuous distributions
# Note: is slow, some distributions don't converge with sample size <= 10000
for distname, arg in distcont:
if distname in distslow:
yield check_cont_fit, distname,arg
def test_lognorm_fit_ticket1131():
params = [(2.1, 1.,1.), (1.,10.,1.), (1.,1.,10.)]
for param in params:
yield check_cont_fit, 'lognorm', param
def check_cont_fit(distname,arg):
distfn = getattr(stats, distname)
rvs = distfn.rvs(size=n_repl1,*arg)
est = distfn.fit(rvs) #, *arg) # start with default values
n = distfn.numargs + 2
truearg = np.hstack([arg,[0.0, 1.0]])[:n]
diff = est-truearg
txt = ''
diffthreshold = np.max(np.vstack([truearg*thresh_percent,
np.ones(distfn.numargs+2)*thresh_min]),0)
# threshold for location
diffthreshold[-2] = np.max([np.abs(rvs.mean())*thresh_percent,thresh_min])
if np.any(np.isnan(est)):
raise AssertionError('nan returned in fit')
else:
if np.any((np.abs(diff) - diffthreshold) > 0.0):
## txt = 'WARNING - diff too large with small sample'
## print 'parameter diff =', diff - diffthreshold, txt
rvs = np.concatenate([rvs,distfn.rvs(size=n_repl2-n_repl1,*arg)])
est = distfn.fit(rvs) #,*arg)
truearg = np.hstack([arg,[0.0,1.0]])[:n]
diff = est-truearg
if np.any((np.abs(diff) - diffthreshold) > 0.0):
txt = 'parameter: %s\n' % str(truearg)
txt += 'estimated: %s\n' % str(est)
txt += 'diff : %s\n' % str(diff)
raise AssertionError('fit not very good in %s\n' % distfn.name + txt)
if __name__ == "__main__":
check_cont_fit('bradford', (0.29891359763170633,))
# check_cont_fit('lognorm', (10,1,1))
# check_cont_fit('ncx2', (21, 1.0560465975116415))
import nose
#nose.run(argv=['', __file__])
nose.runmodule(argv=[__file__,'-s'], exit=False)

@ -0,0 +1,36 @@
from wafo import stats
import numpy as np
from numpy.testing import assert_almost_equal, assert_
def test_kde_1d():
#some basic tests comparing to normal distribution
np.random.seed(8765678)
n_basesample = 500
xn = np.random.randn(n_basesample)
xnmean = xn.mean()
xnstd = xn.std(ddof=1)
# get kde for original sample
gkde = stats.gaussian_kde(xn)
# evaluate the density funtion for the kde for some points
xs = np.linspace(-7,7,501)
kdepdf = gkde.evaluate(xs)
normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
intervall = xs[1] - xs[0]
assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
prob1 = gkde.integrate_box_1d(xnmean, np.inf)
prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
assert_almost_equal(prob1, 0.5, decimal=1)
assert_almost_equal(prob2, 0.5, decimal=1)
assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
assert_almost_equal(gkde.integrate_kde(gkde),
(kdepdf**2).sum()*intervall, decimal=2)
assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
(kdepdf*normpdf).sum()*intervall, decimal=2)

@ -0,0 +1,490 @@
"""
Tests for the stats.mstats module (support for maskd arrays)
"""
import numpy as np
from numpy import nan
import numpy.ma as ma
from numpy.ma import masked, nomask
import scipy.stats.mstats as mstats
from numpy.testing import TestCase, run_module_suite
from numpy.ma.testutils import assert_equal, assert_almost_equal, \
assert_array_almost_equal, assert_
class TestMquantiles(TestCase):
"""Regression tests for mstats module."""
def test_mquantiles_limit_keyword(self):
"""Ticket #867"""
data = np.array([[ 6., 7., 1.],
[ 47., 15., 2.],
[ 49., 36., 3.],
[ 15., 39., 4.],
[ 42., 40., -999.],
[ 41., 41., -999.],
[ 7., -999., -999.],
[ 39., -999., -999.],
[ 43., -999., -999.],
[ 40., -999., -999.],
[ 36., -999., -999.]])
desired = [[19.2, 14.6, 1.45],
[40.0, 37.5, 2.5 ],
[42.8, 40.05, 3.55]]
quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
assert_almost_equal(quants, desired)
class TestGMean(TestCase):
def test_1D(self):
a = (1,2,3,4)
actual= mstats.gmean(a)
desired = np.power(1*2*3*4,1./4.)
assert_almost_equal(actual, desired,decimal=14)
desired1 = mstats.gmean(a,axis=-1)
assert_almost_equal(actual, desired1, decimal=14)
assert_(not isinstance(desired1, ma.MaskedArray))
#
a = ma.array((1,2,3,4),mask=(0,0,0,1))
actual= mstats.gmean(a)
desired = np.power(1*2*3,1./3.)
assert_almost_equal(actual, desired,decimal=14)
desired1 = mstats.gmean(a,axis=-1)
assert_almost_equal(actual, desired1, decimal=14)
#
def test_2D(self):
a = ma.array(((1,2,3,4),(1,2,3,4),(1,2,3,4)),
mask=((0,0,0,0),(1,0,0,1),(0,1,1,0)))
actual= mstats.gmean(a)
desired = np.array((1,2,3,4))
assert_array_almost_equal(actual, desired, decimal=14)
#
desired1 = mstats.gmean(a,axis=0)
assert_array_almost_equal(actual, desired1, decimal=14)
#
actual= mstats.gmean(a, -1)
desired = ma.array((np.power(1*2*3*4,1./4.),
np.power(2*3,1./2.),
np.power(1*4,1./2.)))
assert_array_almost_equal(actual, desired, decimal=14)
class TestHMean(TestCase):
def test_1D(self):
a = (1,2,3,4)
actual= mstats.hmean(a)
desired = 4. / (1./1 + 1./2 + 1./3 + 1./4)
assert_almost_equal(actual, desired, decimal=14)
desired1 = mstats.hmean(ma.array(a),axis=-1)
assert_almost_equal(actual, desired1, decimal=14)
#
a = ma.array((1,2,3,4),mask=(0,0,0,1))
actual= mstats.hmean(a)
desired = 3. / (1./1 + 1./2 + 1./3)
assert_almost_equal(actual, desired,decimal=14)
desired1 = mstats.hmean(a,axis=-1)
assert_almost_equal(actual, desired1, decimal=14)
def test_2D(self):
a = ma.array(((1,2,3,4),(1,2,3,4),(1,2,3,4)),
mask=((0,0,0,0),(1,0,0,1),(0,1,1,0)))
actual= mstats.hmean(a)
desired = ma.array((1,2,3,4))
assert_array_almost_equal(actual, desired, decimal=14)
#
actual1 = mstats.hmean(a,axis=-1)
desired = (4./(1/1.+1/2.+1/3.+1/4.),
2./(1/2.+1/3.),
2./(1/1.+1/4.)
)
assert_array_almost_equal(actual1, desired, decimal=14)
class TestRanking(TestCase):
#
def __init__(self, *args, **kwargs):
TestCase.__init__(self, *args, **kwargs)
#
def test_ranking(self):
x = ma.array([0,1,1,1,2,3,4,5,5,6,])
assert_almost_equal(mstats.rankdata(x),[1,3,3,3,5,6,7,8.5,8.5,10])
x[[3,4]] = masked
assert_almost_equal(mstats.rankdata(x),[1,2.5,2.5,0,0,4,5,6.5,6.5,8])
assert_almost_equal(mstats.rankdata(x,use_missing=True),
[1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8])
x = ma.array([0,1,5,1,2,4,3,5,1,6,])
assert_almost_equal(mstats.rankdata(x),[1,3,8.5,3,5,7,6,8.5,3,10])
x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]])
assert_almost_equal(mstats.rankdata(x),[[1,3,3,3,5],[6,7,8.5,8.5,10]])
assert_almost_equal(mstats.rankdata(x,axis=1),[[1,3,3,3,5],[1,2,3.5,3.5,5]])
assert_almost_equal(mstats.rankdata(x,axis=0),[[1,1,1,1,1],[2,2,2,2,2,]])
class TestCorr(TestCase):
#
def test_pearsonr(self):
"Tests some computations of Pearson's r"
x = ma.arange(10)
olderr = np.seterr(all='ignore')
try:
assert_almost_equal(mstats.pearsonr(x,x)[0], 1.0)
assert_almost_equal(mstats.pearsonr(x,x[::-1])[0], -1.0)
x = ma.array(x, mask=True)
pr = mstats.pearsonr(x,x)
finally:
np.seterr(**olderr)
assert_(pr[0] is masked)
assert_(pr[1] is masked)
#
def test_spearmanr(self):
"Tests some computations of Spearman's rho"
(x, y) = ([5.05,6.75,3.21,2.66],[1.65,2.64,2.64,6.95])
assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
(x, y) = ([5.05,6.75,3.21,2.66,np.nan],[1.65,2.64,2.64,6.95,np.nan])
(x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
#
x = [ 2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7]
y = [22.6, 08.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4]
assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
x = [ 2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan]
y = [22.6, 08.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan]
(x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
#
def test_kendalltau(self):
"Tests some computations of Kendall's tau"
x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan])
y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
assert_almost_equal(np.asarray(mstats.kendalltau(x,y)),
[+0.3333333,0.4969059])
assert_almost_equal(np.asarray(mstats.kendalltau(x,z)),
[-0.5477226,0.2785987])
#
x = ma.fix_invalid([ 0, 0, 0, 0,20,20, 0,60, 0,20,
10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan])
y = ma.fix_invalid([ 0,80,80,80,10,33,60, 0,67,27,
25,80,80,80,80,80,80, 0,10,45, np.nan, 0])
result = mstats.kendalltau(x,y)
assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
#
def test_kendalltau_seasonal(self):
"Tests the seasonal Kendall tau."
x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
[ 4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
[ 3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
[nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
x = ma.fix_invalid(x).T
output = mstats.kendalltau_seasonal(x)
assert_almost_equal(output['global p-value (indep)'], 0.008, 3)
assert_almost_equal(output['seasonal p-value'].round(2),
[0.18,0.53,0.20,0.04])
#
def test_pointbiserial(self):
"Tests point biserial"
x = [1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,
0,0,0,0,1,-1]
y = [14.8,13.8,12.4,10.1,7.1,6.1,5.8,4.6,4.3,3.5,3.3,3.2,3.0,
2.8,2.8,2.5,2.4,2.3,2.1,1.7,1.7,1.5,1.3,1.3,1.2,1.2,1.1,
0.8,0.7,0.6,0.5,0.2,0.2,0.1,np.nan]
assert_almost_equal(mstats.pointbiserialr(x, y)[0], 0.36149, 5)
class TestTrimming(TestCase):
#
def test_trim(self):
"Tests trimming"
a = ma.arange(10)
assert_equal(mstats.trim(a), [0,1,2,3,4,5,6,7,8,9])
a = ma.arange(10)
assert_equal(mstats.trim(a,(2,8)), [None,None,2,3,4,5,6,7,8,None])
a = ma.arange(10)
assert_equal(mstats.trim(a,limits=(2,8),inclusive=(False,False)),
[None,None,None,3,4,5,6,7,None,None])
a = ma.arange(10)
assert_equal(mstats.trim(a,limits=(0.1,0.2),relative=True),
[None,1,2,3,4,5,6,7,None,None])
#
a = ma.arange(12)
a[[0,-1]] = a[5] = masked
assert_equal(mstats.trim(a,(2,8)),
[None,None,2,3,4,None,6,7,8,None,None,None])
#
x = ma.arange(100).reshape(10,10)
trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=None)
assert_equal(trimx._mask.ravel(),[1]*10+[0]*70+[1]*20)
trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=0)
assert_equal(trimx._mask.ravel(),[1]*10+[0]*70+[1]*20)
trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=-1)
assert_equal(trimx._mask.T.ravel(),[1]*10+[0]*70+[1]*20)
#
x = ma.arange(110).reshape(11,10)
x[1] = masked
trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=None)
assert_equal(trimx._mask.ravel(),[1]*20+[0]*70+[1]*20)
trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=0)
assert_equal(trimx._mask.ravel(),[1]*20+[0]*70+[1]*20)
trimx = mstats.trim(x.T,(0.1,0.2),relative=True,axis=-1)
assert_equal(trimx.T._mask.ravel(),[1]*20+[0]*70+[1]*20)
#
def test_trim_old(self):
"Tests trimming."
x = ma.arange(100)
assert_equal(mstats.trimboth(x).count(), 60)
assert_equal(mstats.trimtail(x,tail='r').count(), 80)
x[50:70] = masked
trimx = mstats.trimboth(x)
assert_equal(trimx.count(), 48)
assert_equal(trimx._mask, [1]*16 + [0]*34 + [1]*20 + [0]*14 + [1]*16)
x._mask = nomask
x.shape = (10,10)
assert_equal(mstats.trimboth(x).count(), 60)
assert_equal(mstats.trimtail(x).count(), 80)
#
def test_trimmedmean(self):
"Tests the trimmed mean."
data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
296,299,306,376,428,515,666,1310,2611])
assert_almost_equal(mstats.trimmed_mean(data,0.1), 343, 0)
assert_almost_equal(mstats.trimmed_mean(data,(0.1,0.1)), 343, 0)
assert_almost_equal(mstats.trimmed_mean(data,(0.2,0.2)), 283, 0)
#
def test_trimmed_stde(self):
"Tests the trimmed mean standard error."
data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
296,299,306,376,428,515,666,1310,2611])
assert_almost_equal(mstats.trimmed_stde(data,(0.2,0.2)), 56.13193, 5)
assert_almost_equal(mstats.trimmed_stde(data,0.2), 56.13193, 5)
#
def test_winsorization(self):
"Tests the Winsorization of the data."
data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
296,299,306,376,428,515,666,1310,2611])
assert_almost_equal(mstats.winsorize(data,(0.2,0.2)).var(ddof=1),
21551.4, 1)
data[5] = masked
winsorized = mstats.winsorize(data)
assert_equal(winsorized.mask, data.mask)
class TestMoments(TestCase):
"""
Comparison numbers are found using R v.1.5.1
note that length(testcase) = 4
testmathworks comes from documentation for the
Statistics Toolbox for Matlab and can be found at both
http://www.mathworks.com/access/helpdesk/help/toolbox/stats/kurtosis.shtml
http://www.mathworks.com/access/helpdesk/help/toolbox/stats/skewness.shtml
Note that both test cases came from here.
"""
testcase = [1,2,3,4]
testmathworks = ma.fix_invalid([1.165 , 0.6268, 0.0751, 0.3516, -0.6965,
np.nan])
def test_moment(self):
"""
mean((testcase-mean(testcase))**power,axis=0),axis=0))**power))"""
y = mstats.moment(self.testcase,1)
assert_almost_equal(y,0.0,10)
y = mstats.moment(self.testcase,2)
assert_almost_equal(y,1.25)
y = mstats.moment(self.testcase,3)
assert_almost_equal(y,0.0)
y = mstats.moment(self.testcase,4)
assert_almost_equal(y,2.5625)
def test_variation(self):
"""variation = samplestd/mean """
## y = stats.variation(self.shoes[0])
## assert_almost_equal(y,21.8770668)
y = mstats.variation(self.testcase)
assert_almost_equal(y,0.44721359549996, 10)
def test_skewness(self):
"""
sum((testmathworks-mean(testmathworks,axis=0))**3,axis=0)/((sqrt(var(testmathworks)*4/5))**3)/5
"""
y = mstats.skew(self.testmathworks)
assert_almost_equal(y,-0.29322304336607,10)
y = mstats.skew(self.testmathworks,bias=0)
assert_almost_equal(y,-0.437111105023940,10)
y = mstats.skew(self.testcase)
assert_almost_equal(y,0.0,10)
def test_kurtosis(self):
"""
sum((testcase-mean(testcase,axis=0))**4,axis=0)/((sqrt(var(testcase)*3/4))**4)/4
sum((test2-mean(testmathworks,axis=0))**4,axis=0)/((sqrt(var(testmathworks)*4/5))**4)/5
Set flags for axis = 0 and
fisher=0 (Pearson's definition of kurtosis for compatibility with Matlab)
"""
y = mstats.kurtosis(self.testmathworks,0,fisher=0,bias=1)
assert_almost_equal(y, 2.1658856802973,10)
# Note that MATLAB has confusing docs for the following case
# kurtosis(x,0) gives an unbiased estimate of Pearson's skewness
# kurtosis(x) gives a biased estimate of Fisher's skewness (Pearson-3)
# The MATLAB docs imply that both should give Fisher's
y = mstats.kurtosis(self.testmathworks,fisher=0,bias=0)
assert_almost_equal(y, 3.663542721189047,10)
y = mstats.kurtosis(self.testcase,0,0)
assert_almost_equal(y,1.64)
#
def test_mode(self):
"Tests the mode"
#
a1 = [0,0,0,1,1,1,2,3,3,3,3,4,5,6,7]
a2 = np.reshape(a1, (3,5))
ma1 = ma.masked_where(ma.array(a1)>2,a1)
ma2 = ma.masked_where(a2>2, a2)
assert_equal(mstats.mode(a1, axis=None), (3,4))
assert_equal(mstats.mode(ma1, axis=None), (0,3))
assert_equal(mstats.mode(a2, axis=None), (3,4))
assert_equal(mstats.mode(ma2, axis=None), (0,3))
assert_equal(mstats.mode(a2, axis=0), ([[0,0,0,1,1]],[[1,1,1,1,1]]))
assert_equal(mstats.mode(ma2, axis=0), ([[0,0,0,1,1]],[[1,1,1,1,1]]))
assert_equal(mstats.mode(a2, axis=-1), ([[0],[3],[3]], [[3],[3],[1]]))
assert_equal(mstats.mode(ma2, axis=-1), ([[0],[1],[0]], [[3],[1],[0]]))
class TestPercentile(TestCase):
def setUp(self):
self.a1 = [3,4,5,10,-3,-5,6]
self.a2 = [3,-6,-2,8,7,4,2,1]
self.a3 = [3.,4,5,10,-3,-5,-6,7.0]
def test_percentile(self):
x = np.arange(8) * 0.5
assert_equal(mstats.scoreatpercentile(x, 0), 0.)
assert_equal(mstats.scoreatpercentile(x, 100), 3.5)
assert_equal(mstats.scoreatpercentile(x, 50), 1.75)
def test_2D(self):
x = ma.array([[1, 1, 1],
[1, 1, 1],
[4, 4, 3],
[1, 1, 1],
[1, 1, 1]])
assert_equal(mstats.scoreatpercentile(x,50), [1,1,1])
class TestVariability(TestCase):
""" Comparison numbers are found using R v.1.5.1
note that length(testcase) = 4
"""
testcase = ma.fix_invalid([1,2,3,4,np.nan])
def test_signaltonoise(self):
"""
this is not in R, so used
mean(testcase,axis=0)/(sqrt(var(testcase)*3/4)) """
#y = stats.signaltonoise(self.shoes[0])
#assert_approx_equal(y,4.5709967)
y = mstats.signaltonoise(self.testcase)
assert_almost_equal(y,2.236067977)
def test_sem(self):
"""
this is not in R, so used
sqrt(var(testcase)*3/4)/sqrt(3)
"""
#y = stats.sem(self.shoes[0])
#assert_approx_equal(y,0.775177399)
y = mstats.sem(self.testcase)
assert_almost_equal(y,0.6454972244)
def test_zmap(self):
"""
not in R, so tested by using
(testcase[i]-mean(testcase,axis=0))/sqrt(var(testcase)*3/4)
"""
y = mstats.zmap(self.testcase, self.testcase)
desired_unmaskedvals = ([-1.3416407864999, -0.44721359549996 ,
0.44721359549996 , 1.3416407864999])
assert_array_almost_equal(desired_unmaskedvals,
y.data[y.mask==False], decimal=12)
def test_zscore(self):
"""
not in R, so tested by using
(testcase[i]-mean(testcase,axis=0))/sqrt(var(testcase)*3/4)
"""
y = mstats.zscore(self.testcase)
desired = ma.fix_invalid([-1.3416407864999, -0.44721359549996 ,
0.44721359549996 , 1.3416407864999, np.nan])
assert_almost_equal(desired, y, decimal=12)
class TestMisc(TestCase):
#
def test_obrientransform(self):
"Tests Obrien transform"
args = [[5]*5+[6]*11+[7]*9+[8]*3+[9]*2+[10]*2,
[6]+[7]*2+[8]*4+[9]*9+[10]*16]
result = [5*[3.1828]+11*[0.5591]+9*[0.0344]+3*[1.6086]+2*[5.2817]+2*[11.0538],
[10.4352]+2*[4.8599]+4*[1.3836]+9*[0.0061]+16*[0.7277]]
assert_almost_equal(np.round(mstats.obrientransform(*args).T,4),
result,4)
#
def test_kstwosamp(self):
"Tests the Kolmogorov-Smirnov 2 samples test"
x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
[ 4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
[ 3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
[nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
x = ma.fix_invalid(x).T
(winter,spring,summer,fall) = x.T
#
assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring),4),
(0.1818,0.9892))
assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'g'),4),
(0.1469,0.7734))
assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'l'),4),
(0.1818,0.6744))
#
def test_friedmanchisq(self):
"Tests the Friedman Chi-square test"
# No missing values
args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0],
[7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0],
[6.0,8.0,4.0,6.0,7.0,6.5,6.0,4.0,6.5,3.0])
result = mstats.friedmanchisquare(*args)
assert_almost_equal(result[0], 10.4737, 4)
assert_almost_equal(result[1], 0.005317, 6)
# Missing values
x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
[ 4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
[ 3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
[nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
x = ma.fix_invalid(x)
result = mstats.friedmanchisquare(*x)
assert_almost_equal(result[0], 2.0156, 4)
assert_almost_equal(result[1], 0.5692, 4)
def test_regress_simple():
"""Regress a line with sinusoidal noise. Test for #1273."""
x = np.linspace(0, 100, 100)
y = 0.2 * np.linspace(0, 100, 100) + 10
y += np.sin(np.linspace(0, 20, 100))
slope, intercept, r_value, p_value, sterr = mstats.linregress(x, y)
assert_almost_equal(slope, 0.19644990055858422)
assert_almost_equal(intercept, 10.211269918932341)
def test_plotting_positions():
"""Regression test for #1256"""
pos = mstats.plotting_positions(np.arange(3), 0, 0)
assert_array_almost_equal(pos.data, np.array([0.25, 0.5, 0.75]))
if __name__ == "__main__":
run_module_suite()

@ -0,0 +1,103 @@
# pylint: disable-msg=W0611, W0612, W0511,R0201
"""Tests suite for maskedArray statistics.
:author: Pierre Gerard-Marchant
:contact: pierregm_at_uga_dot_edu
"""
__author__ = "Pierre GF Gerard-Marchant ($Author: backtopop $)"
import numpy as np
import numpy.ma as ma
import scipy.stats.mstats as ms
#import scipy.stats.mmorestats as mms
from numpy.testing import TestCase, run_module_suite, assert_equal, \
assert_almost_equal, assert_
class TestMisc(TestCase):
#
def __init__(self, *args, **kwargs):
TestCase.__init__(self, *args, **kwargs)
#
def test_mjci(self):
"Tests the Marits-Jarrett estimator"
data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
296,299,306,376,428,515,666,1310,2611])
assert_almost_equal(ms.mjci(data),[55.76819,45.84028,198.87875],5)
#
def test_trimmedmeanci(self):
"Tests the confidence intervals of the trimmed mean."
data = ma.array([545,555,558,572,575,576,578,580,
594,605,635,651,653,661,666])
assert_almost_equal(ms.trimmed_mean(data,0.2), 596.2, 1)
assert_equal(np.round(ms.trimmed_mean_ci(data,(0.2,0.2)),1),
[561.8, 630.6])
#
def test_idealfourths(self):
"Tests ideal-fourths"
test = np.arange(100)
assert_almost_equal(np.asarray(ms.idealfourths(test)),
[24.416667,74.583333],6)
test_2D = test.repeat(3).reshape(-1,3)
assert_almost_equal(ms.idealfourths(test_2D, axis=0),
[[24.416667,24.416667,24.416667],
[74.583333,74.583333,74.583333]],6)
assert_almost_equal(ms.idealfourths(test_2D, axis=1),
test.repeat(2).reshape(-1,2))
test = [0,0]
_result = ms.idealfourths(test)
assert_(np.isnan(_result).all())
#..............................................................................
class TestQuantiles(TestCase):
#
def __init__(self, *args, **kwargs):
TestCase.__init__(self, *args, **kwargs)
#
def test_hdquantiles(self):
data = [0.706560797,0.727229578,0.990399276,0.927065621,0.158953014,
0.887764025,0.239407086,0.349638551,0.972791145,0.149789972,
0.936947700,0.132359948,0.046041972,0.641675031,0.945530547,
0.224218684,0.771450991,0.820257774,0.336458052,0.589113496,
0.509736129,0.696838829,0.491323573,0.622767425,0.775189248,
0.641461450,0.118455200,0.773029450,0.319280007,0.752229111,
0.047841438,0.466295911,0.583850781,0.840581845,0.550086491,
0.466470062,0.504765074,0.226855960,0.362641207,0.891620942,
0.127898691,0.490094097,0.044882048,0.041441695,0.317976349,
0.504135618,0.567353033,0.434617473,0.636243375,0.231803616,
0.230154113,0.160011327,0.819464108,0.854706985,0.438809221,
0.487427267,0.786907310,0.408367937,0.405534192,0.250444460,
0.995309248,0.144389588,0.739947527,0.953543606,0.680051621,
0.388382017,0.863530727,0.006514031,0.118007779,0.924024803,
0.384236354,0.893687694,0.626534881,0.473051932,0.750134705,
0.241843555,0.432947602,0.689538104,0.136934797,0.150206859,
0.474335206,0.907775349,0.525869295,0.189184225,0.854284286,
0.831089744,0.251637345,0.587038213,0.254475554,0.237781276,
0.827928620,0.480283781,0.594514455,0.213641488,0.024194386,
0.536668589,0.699497811,0.892804071,0.093835427,0.731107772]
#
assert_almost_equal(ms.hdquantiles(data,[0., 1.]),
[0.006514031, 0.995309248])
hdq = ms.hdquantiles(data,[0.25, 0.5, 0.75])
assert_almost_equal(hdq, [0.253210762, 0.512847491, 0.762232442,])
hdq = ms.hdquantiles_sd(data,[0.25, 0.5, 0.75])
assert_almost_equal(hdq, [0.03786954, 0.03805389, 0.03800152,], 4)
#
data = np.array(data).reshape(10,10)
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0)
assert_almost_equal(hdq[:,0], ms.hdquantiles(data[:,0],[0.25,0.5,0.75]))
assert_almost_equal(hdq[:,-1], ms.hdquantiles(data[:,-1],[0.25,0.5,0.75]))
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0,var=True)
assert_almost_equal(hdq[...,0],
ms.hdquantiles(data[:,0],[0.25,0.5,0.75],var=True))
assert_almost_equal(hdq[...,-1],
ms.hdquantiles(data[:,-1],[0.25,0.5,0.75], var=True))
###############################################################################
if __name__ == "__main__":
run_module_suite()

File diff suppressed because it is too large Load Diff

@ -0,0 +1,412 @@
"""
Commentary
----------
Most of the work is done by the scipy.stats.distributions module.
This provides a plethora of continuous distributions to play with.
Each distribution has functions to generate random deviates, pdf's,
cdf's etc. as well as a function to fit the distribution to some given
data.
The fitting uses scipy.optimize.fmin to minimise the log odds of the
data given the distribution.
There are a couple of problems with this approach. First it is
sensitive to the initial guess at the parameters. Second it can be a
little slow.
Two key parameters are the 'loc' and 'scale' parameters. Data is
shifted by 'loc' and scaled by scale prior to fitting. Supplying
appropriate values for these parameters is important to getting a good
fit.
See the factory() function which picks from a handful of common
approaches for each distribution.
For some distributions (eg normal) it really makes sense just to
calculate the parameters directly from the data.
The code in the __ifmain__ should be a good guide how to use this.
Simply:
get a QuickFit object
add the distributions you want to try to fit
call fit() with your data
call fit_stats() to generate some stats on the fit.
call plot() if you want to see a plot.
Named after Mrs Twolumps, minister's secretary in the silly walks
sketch, who brings in coffee with a full silly walk.
Tenuous link with curve fitting is that you generally see "two lumps"
one in your data and the other in the curve that is being fitted.
Or alternately, if your data is not too silly then you can fit a
curve to it.
License is GNU LGPL v3, see https://launchpad.net/twolumps
"""
import inspect
from itertools import izip
import numpy
from wafo import stats
from scipy import mean, std
def factory(name):
""" Factory to return appropriate objects for each distro. """
fitters = dict(
beta=ZeroOneScipyDistribution,
alpha=ZeroOneScipyDistribution,
ncf=ZeroOneScipyDistribution,
triang=ZeroOneScipyDistribution,
uniform=ZeroOneScipyDistribution,
powerlaw=ZeroOneScipyDistribution,
pareto=MinLocScipyDistribution,
expon=MinLocScipyDistribution,
gamma=MinLocScipyDistribution,
lognorm=MinLocScipyDistribution,
maxwell=MinLocScipyDistribution,
weibull_min=MinLocScipyDistribution,
weibull_max=MaxLocScipyDistribution)
return fitters.get(name, ScipyDistribution)(name)
def get_continuous_distros():
""" Find all attributes of stats that are continuous distributions. """
fitters = []
skip = set()
for name, item in inspect.getmembers(stats):
if name in skip: continue
if item is stats.rv_continuous: continue
if isinstance(item, stats.rv_continuous):
fitters.append([name, factory(name)])
return fitters
class ScipyDistribution(object):
def __init__(self, name):
self.name = name
self.distro = self.get_distro()
self.fitted = None
def __getattr__(self, attr):
""" Try delegating to the distro object """
return getattr(self.distro, attr)
def get_distro(self):
return getattr(stats, self.name)
def set_distro(self, parms):
self.distro = getattr(stats, self.name)(*parms)
return self.distro
def calculate_loc_and_scale(self, data):
""" Calculate loc and scale parameters for fit.
Depending on the distribution, these need to be approximately
right to get a good fit.
"""
return mean(data), std(data)
def fit(self, data, *args, **kwargs):
""" This needs some work.
Seems the various scipy distributions do a reasonable job if given a good hint.
Need to get distro specific hints.
"""
fits = []
# try with and without providing loc and scale hints
# increases chance of a fit without an exception being
# generated.
for (loc, scale) in ((0.0, 1.0),
self.calculate_loc_and_scale(data)):
try:
parms = self.get_distro().fit(data, loc=loc, scale=scale)
self.set_distro(list(parms))
expected = self.expected(data)
rss = ((expected-data)**2).sum()
fits.append([rss, list(parms)])
parms = self.get_distro().fit(data, floc=loc, scale=scale)
self.set_distro(list(parms))
expected = self.expected(data)
rss = ((expected-data)**2).sum()
fits.append([rss, list(parms)])
except:
pass
# no fits means all tries raised exceptions
if not fits:
raise Exception("Exception in fit()")
# pick the one with the smallest rss
fits.sort()
self.parms = fits[0][1]
print self.parms
return self.set_distro(list(self.parms))
def expected(self, data):
""" Calculate expected values at each data point """
if self.fitted is not None:
return self.fitted
n = len(data)
xx = numpy.linspace(0, 1, n + 2)[1:-1]
self.fitted = self.ppf(xx)
#self.fitted = [self.ppf(x) for x in xx]
return self.fitted
def fit_stats(self, data):
""" Return stats on the fits
data assumed to be sorted.
"""
n = len(data)
dvar = numpy.var(data)
expected = self.expected(data)
evar = numpy.var(expected)
rss = 0.0
for expect, obs in izip(expected, data):
rss += (obs-expect) ** 2.0
self.rss = rss
self.dss = dvar * n
self.fss = evar * n
def residuals(self, data):
""" Return residuals """
expected = self.expected(data)
return numpy.array(data) - numpy.array(expected)
class MinLocScipyDistribution(ScipyDistribution):
def calculate_loc_and_scale(self, data):
""" Set loc to min value in the data.
Useful for weibull_min
"""
return min(data), std(data)
class MaxLocScipyDistribution(ScipyDistribution):
def calculate_loc_and_scale(self, data):
""" Set loc to max value in the data.
Useful for weibull_max
"""
return max(data), std(data)
class ZeroOneScipyDistribution(ScipyDistribution):
def calculate_loc_and_scale(self, data):
""" Set loc and scale to move to [0, 1] interval.
Useful for beta distribution
"""
return min(data), max(data)-min(data)
class QuickFit(object):
""" Fit a family of distributions.
Calculates stats on each fit.
Option to create plots.
"""
def __init__(self):
self.distributions = []
def add_distribution(self, distribution):
""" Add a ready-prepared ScipyDistribution """
self.distributions.append(distribution)
def add(self, name):
""" Add a distribution by name. """
self.distributions.append(factory(name))
def fit(self, data):
""" Fit all of the distros we have """
fitted = []
for distro in self.distributions:
print 'fitting distro', distro.name
try:
distro.fit(data)
except:
continue
fitted.append(distro)
self.distributions = fitted
print 'finished fitting'
def stats(self, data):
""" Return stats on the fits """
for dd in self.distributions:
dd.fit_stats(data)
def get_topn(self, n):
""" Return top-n best fits. """
data = [[x.rss, x] for x in self.distributions if numpy.isfinite(x.rss)]
data.sort()
if not n:
n = len(data)
return [x[1] for x in data[:n]]
def fit_plot(self, data, topn=0, bins=20):
""" Create a plot. """
from matplotlib import pylab as pl
distros = self.get_topn(topn)
xx = numpy.linspace(data.min(), data.max(), 300)
table = []
nparms = max(len(x.parms) for x in distros)
tcolours = []
for dd in distros:
patch = pl.plot(xx, [dd.pdf(p) for p in xx], label='%10.2f%% %s' % (100.0*dd.rss/dd.dss, dd.name))
row = ['', dd.name, '%10.2f%%' % (100.0*dd.rss/dd.dss,)] + ['%0.2f' % x for x in dd.parms]
while len(row) < 3 + nparms:
row.append('')
table.append(row)
tcolours.append([patch[0].get_markerfacecolor()] + ['w'] * (2+nparms))
# add a historgram with the data
pl.hist(data, bins=bins, normed=True)
tab = pl.table(cellText=table, cellColours=tcolours,
colLabels=['', 'Distribution', 'Res. SS/Data SS'] + ['P%d' % (x + 1,) for x in range(nparms)],
bbox=(0.0, 1.0, 1.0, 0.3))
#loc='top'))
#pl.legend(loc=0)
tab.auto_set_font_size(False)
tab.set_fontsize(10.)
def residual_plot(self, data, topn=0):
""" Create a residual plot. """
from matplotlib import pylab as pl
distros = self.get_topn(topn)
n = len(data)
xx = numpy.linspace(0, 1, n + 2)[1:-1]
for dd in distros:
pl.plot(xx, dd.residuals(data), label='%10.2f%% %s' % (100.0*dd.rss/dd.dss, dd.name))
pl.grid(True)
def plot(self, data, topn):
""" Plot data fit and residuals """
from matplotlib import pylab as pl
pl.axes([0.1, 0.4, 0.8, 0.4]) # leave room above the axes for the table
self.fit_plot(data, topn=topn)
pl.axes([0.1, 0.05, 0.8, 0.3])
self.residual_plot(data, topn=topn)
def read_data(infile, field):
""" Simple utility to extract a field out of a csv file. """
import csv
reader = csv.reader(infile)
header = reader.next()
field = header.index(field)
data = []
for row in reader:
data.append(float(row[field]))
return data
if __name__ == '__main__':
import sys
import optparse
from matplotlib import pylab as pl
parser = optparse.OptionParser()
parser.add_option('-d', '--distro', action='append', default=[])
parser.add_option('-l', '--list', action='store_true',
help='List available distros')
parser.add_option('-i', '--infile')
parser.add_option('-f', '--field', default='P/L')
parser.add_option('-n', '--topn', type='int', default=0)
parser.add_option('-s', '--sample', default='normal',
help='generate a sample from this distro as a test')
parser.add_option('--size', type='int', default=1000,
help='Size of sample to generate')
opts, args = parser.parse_args()
if opts.list:
for name, distro in get_continuous_distros():
print name
sys.exit()
opts.distro = ['weibull_min', 'norm']
if not opts.distro:
opts.distro = [x[0] for x in get_continuous_distros()]
quickfit = QuickFit()
for distro in opts.distro:
quickfit.add(distro)
if opts.sample:
data = getattr(numpy.random, opts.sample)(size=opts.size)
else:
data = numpy.array(read_data(open(opts.infile), opts.field))
data.sort()
quickfit.fit(data)
print 'doing stats'
quickfit.stats(data)
print 'doing plot'
quickfit.plot(data, topn=opts.topn)
pl.show()

@ -1,12 +1,17 @@
import numpy as np #@UnusedImport import numpy as np # @UnusedImport
from numpy import cos, exp, linspace, pi, sin, diff, arange, ones #@UnusedImport #@UnusedImport
from numpy.random import randn #@UnusedImport from numpy import cos, exp, linspace, pi, sin, diff, arange, ones
from wafo.data import sea #@UnusedImport from numpy.random import randn # @UnusedImport
from wafo.misc import (JITImport, Bunch, detrendma, DotDict, findcross, ecross, findextrema, #@UnusedImport from wafo.data import sea # @UnusedImport
findrfc, rfcfilter, findtp, findtc, findoutliers, #@UnusedImport from wafo.misc import (JITImport, Bunch, detrendma, DotDict, findcross, ecross, findextrema, # @UnusedImport
common_shape, argsreduce, stirlerr, getshipchar, betaloge, #@UnusedImport #@UnusedImport
gravity, nextpow2, discretize, polar2cart, #@UnusedImport findrfc, rfcfilter, findtp, findtc, findoutliers,
cart2polar, meshgrid, tranproc)#@UnusedImport common_shape, argsreduce, stirlerr, getshipchar, betaloge,
#@UnusedImport
#@UnusedImport
gravity, nextpow2, discretize, polar2cart,
cart2polar, meshgrid, tranproc) # @UnusedImport
def test_JITImport(): def test_JITImport():
''' '''
@ -14,6 +19,8 @@ def test_JITImport():
>>> np.exp(0)==1.0 >>> np.exp(0)==1.0
True True
''' '''
def test_bunch(): def test_bunch():
''' '''
>>> d = Bunch(test1=1,test2=3) >>> d = Bunch(test1=1,test2=3)
@ -21,6 +28,8 @@ def test_bunch():
1 1
3 3
''' '''
def test_dotdict(): def test_dotdict():
''' '''
>>> d = DotDict(test1=1,test2=3) >>> d = DotDict(test1=1,test2=3)
@ -29,6 +38,7 @@ def test_dotdict():
3 3
''' '''
def test_detrendma(): def test_detrendma():
''' '''
>>> x = linspace(0,1,200) >>> x = linspace(0,1,200)
@ -101,7 +111,8 @@ def test_detrendma():
2.43802139e-01, 2.39414013e-01, 2.03257341e-01, 2.43802139e-01, 2.39414013e-01, 2.03257341e-01,
1.54325635e-01, 1.16564992e-01, 1.09638547e-01, 1.54325635e-01, 1.16564992e-01, 1.09638547e-01,
1.41342814e-01, 2.04600808e-01, 2.80191671e-01, 1.41342814e-01, 2.04600808e-01, 2.80191671e-01,
3.44164010e-01, 3.77073744e-01]), array([ 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 3.44164010e-01, 3.77073744e-01]), array([
1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152,
1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152,
1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152,
1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152, 1.11058152,
@ -143,6 +154,7 @@ def test_detrendma():
2.44120808, 2.44120808, 2.44120808, 2.44120808, 2.44120808])) 2.44120808, 2.44120808, 2.44120808, 2.44120808, 2.44120808]))
''' '''
def test_findcross_and_ecross(): def test_findcross_and_ecross():
''' '''
>>> findcross([0, 0, 1, -1, 1],0) >>> findcross([0, 0, 1, -1, 1],0)
@ -159,8 +171,9 @@ def test_findcross_and_ecross():
>>> t0 >>> t0
array([ 0.84910514, 2.2933879 , 7.13205663, 8.57630119, array([ 0.84910514, 2.2933879 , 7.13205663, 8.57630119,
13.41484739, 14.85909194, 19.69776067, 21.14204343]) 13.41484739, 14.85909194, 19.69776067, 21.14204343])
''' '''
def test_findextrema(): def test_findextrema():
''' '''
>>> t = linspace(0,7*pi,250) >>> t = linspace(0,7*pi,250)
@ -170,9 +183,9 @@ def test_findextrema():
array([ 18, 53, 89, 125, 160, 196, 231]) array([ 18, 53, 89, 125, 160, 196, 231])
''' '''
def test_findrfc(): def test_findrfc():
''' '''
>>> t = linspace(0,7*pi,250) >>> t = linspace(0,7*pi,250)
>>> x = sin(t)+0.1*sin(50*t) >>> x = sin(t)+0.1*sin(50*t)
>>> ind = findextrema(x) >>> ind = findextrema(x)
@ -198,6 +211,7 @@ def test_findrfc():
1.07849396, -1.0995006 , 1.08094452]) 1.07849396, -1.0995006 , 1.08094452])
''' '''
def test_rfcfilter(): def test_rfcfilter():
''' '''
# 1. Filtered signal y is the turning points of x. # 1. Filtered signal y is the turning points of x.
@ -233,6 +247,8 @@ def test_rfcfilter():
array([-0.00743352, 1.08753972, -1.07206545, 1.09550837, -1.07940458, array([-0.00743352, 1.08753972, -1.07206545, 1.09550837, -1.07940458,
1.07849396, -1.0995006 , 1.08094452, 0.11983423]) 1.07849396, -1.0995006 , 1.08094452, 0.11983423])
''' '''
def test_findtp(): def test_findtp():
''' '''
>>> import numpy as np >>> import numpy as np
@ -248,6 +264,8 @@ def test_findtp():
array([ 11, 28, 31, 39, 47, 51, 56, 64, 70, 78, 89, 94, 101, array([ 11, 28, 31, 39, 47, 51, 56, 64, 70, 78, 89, 94, 101,
108, 119, 131, 141, 148, 159, 173, 184, 190, 199]) 108, 119, 131, 141, 148, 159, 173, 184, 190, 199])
''' '''
def test_findtc(): def test_findtc():
''' '''
>>> x = sea() >>> x = sea()
@ -261,6 +279,7 @@ def test_findtc():
112, 127, 137, 143, 154, 166, 180, 185]) 112, 127, 137, 143, 154, 166, 180, 185])
''' '''
def test_findoutliers(): def test_findoutliers():
''' '''
>>> xx = sea() >>> xx = sea()
@ -280,6 +299,8 @@ def test_findoutliers():
>>> indg >>> indg
array([ 0, 1, 2, ..., 9521, 9522, 9523]) array([ 0, 1, 2, ..., 9521, 9522, 9523])
''' '''
def test_common_shape(): def test_common_shape():
''' '''
>>> import numpy as np >>> import numpy as np
@ -298,6 +319,8 @@ def test_common_shape():
>>> common_shape(A,B,C,shape=(3,4,1)) >>> common_shape(A,B,C,shape=(3,4,1))
(3, 4, 5) (3, 4, 5)
''' '''
def test_argsreduce(): def test_argsreduce():
''' '''
>>> import numpy as np >>> import numpy as np
@ -319,11 +342,15 @@ def test_argsreduce():
array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
''' '''
def test_stirlerr(): def test_stirlerr():
''' '''
>>> stirlerr(range(5)) >>> stirlerr(range(5))
array([ inf, 0.08106147, 0.0413407 , 0.02767793, 0.02079067]) array([ inf, 0.08106147, 0.0413407 , 0.02767793, 0.02079067])
''' '''
def test_getshipchar(): def test_getshipchar():
''' '''
>>> sc = getshipchar(10,'service_speed') >>> sc = getshipchar(10,'service_speed')
@ -345,17 +372,23 @@ def test_getshipchar():
service_speed : 10 service_speed : 10
service_speedSTD : 0 service_speedSTD : 0
''' '''
def test_betaloge(): def test_betaloge():
''' '''
>>> betaloge(3, arange(4)) >>> betaloge(3, arange(4))
array([ inf, -1.09861229, -2.48490665, -3.40119738]) array([ inf, -1.09861229, -2.48490665, -3.40119738])
''' '''
def test_gravity(): def test_gravity():
''' '''
>>> phi = linspace(0,45,5) >>> phi = linspace(0,45,5)
>>> gravity(phi) >>> gravity(phi)
array([ 9.78049 , 9.78245014, 9.78803583, 9.79640552, 9.80629387]) array([ 9.78049 , 9.78245014, 9.78803583, 9.79640552, 9.80629387])
''' '''
def test_nextpow2(): def test_nextpow2():
''' '''
>>> nextpow2(10) >>> nextpow2(10)
@ -364,6 +397,7 @@ def test_nextpow2():
3 3
''' '''
def test_discretize(): def test_discretize():
''' '''
>>> x, y = discretize(np.cos,0,np.pi) >>> x, y = discretize(np.cos,0,np.pi)
@ -379,6 +413,8 @@ def test_discretize():
-7.07106781e-01, -8.31469612e-01, -9.23879533e-01, -7.07106781e-01, -8.31469612e-01, -9.23879533e-01,
-9.80785280e-01, -1.00000000e+00]) -9.80785280e-01, -1.00000000e+00])
''' '''
def test_discretize_adaptive(): def test_discretize_adaptive():
''' '''
>>> x, y = discretize(np.cos,0,np.pi, method='adaptive') >>> x, y = discretize(np.cos,0,np.pi, method='adaptive')
@ -394,6 +430,8 @@ def test_discretize_adaptive():
-7.07106781e-01, -8.31469612e-01, -9.23879533e-01, -7.07106781e-01, -8.31469612e-01, -9.23879533e-01,
-9.80785280e-01, -1.00000000e+00]) -9.80785280e-01, -1.00000000e+00])
''' '''
def test_pol2cart_n_cart2pol(): def test_pol2cart_n_cart2pol():
''' '''
>>> r = 5 >>> r = 5
@ -420,6 +458,8 @@ def test_pol2cart_n_cart2pol():
array([ 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., array([ 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
5., 5., 5., 5., 5., 5., 5.]) 5., 5., 5., 5., 5., 5., 5.])
''' '''
def test_meshgrid(): def test_meshgrid():
''' '''
>>> x = np.linspace(0,1,3) # coordinates along x axis >>> x = np.linspace(0,1,3) # coordinates along x axis
@ -465,6 +505,8 @@ def test_meshgrid():
>>> xx, yy = meshgrid(x, y, sparse=True) >>> xx, yy = meshgrid(x, y, sparse=True)
>>> z = np.sin(xx**2+yy**2)/(xx**2+yy**2) >>> z = np.sin(xx**2+yy**2)/(xx**2+yy**2)
''' '''
def test_tranproc(): def test_tranproc():
''' '''
>>> import wafo.transform.models as wtm >>> import wafo.transform.models as wtm
@ -475,8 +517,7 @@ def test_tranproc():
>>> y0;y1 >>> y0;y1
array([ 0.02659612, 1.00115284, 1.92872532, 2.81453257, 3.66292878]) array([ 0.02659612, 1.00115284, 1.92872532, 2.81453257, 3.66292878])
array([ 1.00005295, 0.9501118 , 0.90589954, 0.86643821, 0.83096482]) array([ 1.00005295, 0.9501118 , 0.90589954, 0.86643821, 0.83096482])
''' '''
if __name__=='__main__': if __name__ == '__main__':
import doctest import doctest
doctest.testmod() doctest.testmod()

Loading…
Cancel
Save