Formatted code + added tests
parent
93ed9616b1
commit
31f80c5798
@ -0,0 +1,390 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy.testing as npt
|
||||
import numpy as np
|
||||
import nose
|
||||
|
||||
from wafo import stats
|
||||
|
||||
"""
|
||||
Test all continuous distributions.
|
||||
|
||||
Parameters were chosen for those distributions that pass the
|
||||
Kolmogorov-Smirnov test. This provides safe parameters for each
|
||||
distributions so that we can perform further testing of class methods.
|
||||
|
||||
These tests currently check only/mostly for serious errors and exceptions,
|
||||
not for numerically exact results.
|
||||
|
||||
|
||||
TODO:
|
||||
* make functioning test for skew and kurtosis
|
||||
still known failures - skip for now
|
||||
|
||||
|
||||
"""
|
||||
|
||||
#currently not used
|
||||
DECIMAL = 5 # specify the precision of the tests # increased from 0 to 5
|
||||
DECIMAL_kurt = 0
|
||||
|
||||
distcont = [
|
||||
['alpha', (3.5704770516650459,)],
|
||||
['anglit', ()],
|
||||
['arcsine', ()],
|
||||
['beta', (2.3098496451481823, 0.62687954300963677)],
|
||||
['betaprime', (5, 6)], # avoid unbound error in entropy with (100, 86)],
|
||||
['bradford', (0.29891359763170633,)],
|
||||
['burr', (10.5, 4.3)], #incorrect mean and var for(0.94839838075366045, 4.3820284068855795)],
|
||||
['cauchy', ()],
|
||||
['chi', (78,)],
|
||||
['chi2', (55,)],
|
||||
['cosine', ()],
|
||||
['dgamma', (1.1023326088288166,)],
|
||||
['dweibull', (2.0685080649914673,)],
|
||||
['erlang', (20,)], #correction numargs = 1
|
||||
['expon', ()],
|
||||
['exponpow', (2.697119160358469,)],
|
||||
['exponweib', (2.8923945291034436, 1.9505288745913174)],
|
||||
['f', (29, 18)],
|
||||
['fatiguelife', (29,)], #correction numargs = 1
|
||||
['fisk', (3.0857548622253179,)],
|
||||
['foldcauchy', (4.7164673455831894,)],
|
||||
['foldnorm', (1.9521253373555869,)],
|
||||
['frechet_l', (3.6279911255583239,)],
|
||||
['frechet_r', (1.8928171603534227,)],
|
||||
['gamma', (1.9932305483800778,)],
|
||||
['gausshyper', (13.763771604130699, 3.1189636648681431,
|
||||
2.5145980350183019, 5.1811649903971615)], #veryslow
|
||||
['genexpon', (9.1325976465418908, 16.231956600590632, 3.2819552690843983)],
|
||||
['genextreme', (-0.1,)], # sample mean test fails for (3.3184017469423535,)],
|
||||
['gengamma', (4.4162385429431925, 3.1193091679242761)],
|
||||
['genhalflogistic', (0.77274727809929322,)],
|
||||
['genlogistic', (0.41192440799679475,)],
|
||||
['genpareto', (0.1,)], # use case with finite moments
|
||||
['gilbrat', ()],
|
||||
['gompertz', (0.94743713075105251,)],
|
||||
['gumbel_l', ()],
|
||||
['gumbel_r', ()],
|
||||
['halfcauchy', ()],
|
||||
['halflogistic', ()],
|
||||
['halfnorm', ()],
|
||||
['hypsecant', ()],
|
||||
['invgamma', (2.0668996136993067,)],
|
||||
['invgauss', (0.14546264555347513,)],
|
||||
['invweibull', (10.58,)], # sample mean test fails at(0.58847112119264788,)]
|
||||
['johnsonsb', (4.3172675099141058, 3.1837781130785063)],
|
||||
['johnsonsu', (2.554395574161155, 2.2482281679651965)],
|
||||
['ksone', (1000,)], #replace 22 by 100 to avoid failing range, ticket 956
|
||||
['kstwobign', ()],
|
||||
['laplace', ()],
|
||||
['levy', ()],
|
||||
['levy_l', ()],
|
||||
# ['levy_stable', (0.35667405469844993,
|
||||
# -0.67450531578494011)], #NotImplementedError
|
||||
# rvs not tested
|
||||
['loggamma', (0.41411931826052117,)],
|
||||
['logistic', ()],
|
||||
['loglaplace', (3.2505926592051435,)],
|
||||
['lognorm', (0.95368226960575331,)],
|
||||
['lomax', (1.8771398388773268,)],
|
||||
['maxwell', ()],
|
||||
['mielke', (10.4, 3.6)], # sample mean test fails for (4.6420495492121487, 0.59707419545516938)],
|
||||
# mielke: good results if 2nd parameter >2, weird mean or var below
|
||||
['nakagami', (4.9673794866666237,)],
|
||||
['ncf', (27, 27, 0.41578441799226107)],
|
||||
['nct', (14, 0.24045031331198066)],
|
||||
['ncx2', (21, 1.0560465975116415)],
|
||||
['norm', ()],
|
||||
['pareto', (2.621716532144454,)],
|
||||
['pearson3', (0.1,)],
|
||||
['powerlaw', (1.6591133289905851,)],
|
||||
['powerlognorm', (2.1413923530064087, 0.44639540782048337)],
|
||||
['powernorm', (4.4453652254590779,)],
|
||||
['rayleigh', ()],
|
||||
['rdist', (0.9,)], # feels also slow
|
||||
# ['rdist', (3.8266985793976525,)], #veryslow, especially rvs
|
||||
#['rdist', (541.0,)], # from ticket #758 #veryslow
|
||||
['recipinvgauss', (0.63004267809369119,)],
|
||||
['reciprocal', (0.0062309367010521255, 1.0062309367010522)],
|
||||
['rice', (0.7749725210111873,)],
|
||||
['semicircular', ()],
|
||||
['t', (2.7433514990818093,)],
|
||||
['triang', (0.15785029824528218,)],
|
||||
['truncexpon', (4.6907725456810478,)],
|
||||
['truncnorm', (-1.0978730080013919, 2.7306754109031979)],
|
||||
['tukeylambda', (3.1321477856738267,)],
|
||||
['uniform', ()],
|
||||
['vonmises', (3.9939042581071398,)],
|
||||
['wald', ()],
|
||||
['weibull_max', (2.8687961709100187,)],
|
||||
['weibull_min', (1.7866166930421596,)],
|
||||
['wrapcauchy', (0.031071279018614728,)]]
|
||||
|
||||
# for testing only specific functions
|
||||
##distcont = [
|
||||
## ['erlang', (20,)], #correction numargs = 1
|
||||
## ['fatiguelife', (29,)], #correction numargs = 1
|
||||
## ['loggamma', (0.41411931826052117,)]]
|
||||
|
||||
# for testing ticket:767
|
||||
##distcont = [
|
||||
## ['genextreme', (3.3184017469423535,)],
|
||||
## ['genextreme', (0.01,)],
|
||||
## ['genextreme', (0.00001,)],
|
||||
## ['genextreme', (0.0,)],
|
||||
## ['genextreme', (-0.01,)]
|
||||
## ]
|
||||
|
||||
##distcont = [['gumbel_l', ()],
|
||||
## ['gumbel_r', ()],
|
||||
## ['norm', ()]
|
||||
## ]
|
||||
|
||||
##distcont = [['norm', ()]]
|
||||
|
||||
distmissing = ['wald', 'gausshyper', 'genexpon', 'rv_continuous',
|
||||
'loglaplace', 'rdist', 'semicircular', 'invweibull', 'ksone',
|
||||
'cosine', 'kstwobign', 'truncnorm', 'mielke', 'recipinvgauss', 'levy',
|
||||
'johnsonsu', 'levy_l', 'powernorm', 'wrapcauchy',
|
||||
'johnsonsb', 'truncexpon', 'rice', 'invgauss', 'invgamma',
|
||||
'powerlognorm']
|
||||
|
||||
distmiss = [[dist,args] for dist,args in distcont if dist in distmissing]
|
||||
distslow = ['rdist', 'gausshyper', 'recipinvgauss', 'ksone', 'genexpon',
|
||||
'vonmises', 'rice', 'mielke', 'semicircular', 'cosine', 'invweibull',
|
||||
'powerlognorm', 'johnsonsu', 'kstwobign']
|
||||
#distslow are sorted by speed (very slow to slow)
|
||||
|
||||
def _silence_fp_errors(func):
|
||||
def wrap(*a, **kw):
|
||||
olderr = np.seterr(all='ignore')
|
||||
try:
|
||||
return func(*a, **kw)
|
||||
finally:
|
||||
np.seterr(**olderr)
|
||||
wrap.__name__ = func.__name__
|
||||
return wrap
|
||||
|
||||
@_silence_fp_errors
|
||||
def test_cont_basic():
|
||||
# this test skips slow distributions
|
||||
for distname, arg in distcont[:]:
|
||||
if distname in distslow:
|
||||
continue
|
||||
distfn = getattr(stats, distname)
|
||||
np.random.seed(765456)
|
||||
sn = 1000
|
||||
rvs = distfn.rvs(size=sn,*arg)
|
||||
sm = rvs.mean()
|
||||
sv = rvs.var()
|
||||
skurt = stats.kurtosis(rvs)
|
||||
sskew = stats.skew(rvs)
|
||||
m,v = distfn.stats(*arg)
|
||||
|
||||
yield check_sample_meanvar_, distfn, arg, m, v, sm, sv, sn, distname + \
|
||||
'sample mean test'
|
||||
# the sample skew kurtosis test has known failures, not very good distance measure
|
||||
#yield check_sample_skew_kurt, distfn, arg, sskew, skurt, distname
|
||||
yield check_moment, distfn, arg, m, v, distname
|
||||
yield check_cdf_ppf, distfn, arg, distname
|
||||
yield check_sf_isf, distfn, arg, distname
|
||||
yield check_pdf, distfn, arg, distname
|
||||
if distname in ['wald']:
|
||||
continue
|
||||
yield check_pdf_logpdf, distfn, arg, distname
|
||||
yield check_cdf_logcdf, distfn, arg, distname
|
||||
yield check_sf_logsf, distfn, arg, distname
|
||||
if distname in distmissing:
|
||||
alpha = 0.01
|
||||
yield check_distribution_rvs, distname, arg, alpha, rvs
|
||||
|
||||
@npt.dec.slow
|
||||
def test_cont_basic_slow():
|
||||
# same as above for slow distributions
|
||||
for distname, arg in distcont[:]:
|
||||
if distname not in distslow: continue
|
||||
distfn = getattr(stats, distname)
|
||||
np.random.seed(765456)
|
||||
sn = 1000
|
||||
rvs = distfn.rvs(size=sn,*arg)
|
||||
sm = rvs.mean()
|
||||
sv = rvs.var()
|
||||
skurt = stats.kurtosis(rvs)
|
||||
sskew = stats.skew(rvs)
|
||||
m,v = distfn.stats(*arg)
|
||||
yield check_sample_meanvar_, distfn, arg, m, v, sm, sv, sn, distname + \
|
||||
'sample mean test'
|
||||
# the sample skew kurtosis test has known failures, not very good distance measure
|
||||
#yield check_sample_skew_kurt, distfn, arg, sskew, skurt, distname
|
||||
yield check_moment, distfn, arg, m, v, distname
|
||||
yield check_cdf_ppf, distfn, arg, distname
|
||||
yield check_sf_isf, distfn, arg, distname
|
||||
yield check_pdf, distfn, arg, distname
|
||||
yield check_pdf_logpdf, distfn, arg, distname
|
||||
yield check_cdf_logcdf, distfn, arg, distname
|
||||
yield check_sf_logsf, distfn, arg, distname
|
||||
#yield check_oth, distfn, arg # is still missing
|
||||
if distname in distmissing:
|
||||
alpha = 0.01
|
||||
yield check_distribution_rvs, distname, arg, alpha, rvs
|
||||
|
||||
@_silence_fp_errors
|
||||
def check_moment(distfn, arg, m, v, msg):
|
||||
m1 = distfn.moment(1,*arg)
|
||||
m2 = distfn.moment(2,*arg)
|
||||
if not np.isinf(m):
|
||||
npt.assert_almost_equal(m1, m, decimal=10, err_msg= msg + \
|
||||
' - 1st moment')
|
||||
else: # or np.isnan(m1),
|
||||
npt.assert_(np.isinf(m1),
|
||||
msg + ' - 1st moment -infinite, m1=%s' % str(m1))
|
||||
#np.isnan(m1) temporary special treatment for loggamma
|
||||
if not np.isinf(v):
|
||||
npt.assert_almost_equal(m2-m1*m1, v, decimal=10, err_msg= msg + \
|
||||
' - 2ndt moment')
|
||||
else: #or np.isnan(m2),
|
||||
npt.assert_(np.isinf(m2),
|
||||
msg + ' - 2nd moment -infinite, m2=%s' % str(m2))
|
||||
#np.isnan(m2) temporary special treatment for loggamma
|
||||
|
||||
@_silence_fp_errors
|
||||
def check_sample_meanvar_(distfn, arg, m, v, sm, sv, sn, msg):
|
||||
#this did not work, skipped silently by nose
|
||||
#check_sample_meanvar, sm, m, msg + 'sample mean test'
|
||||
#check_sample_meanvar, sv, v, msg + 'sample var test'
|
||||
if not np.isinf(m):
|
||||
check_sample_mean(sm, sv, sn, m)
|
||||
if not np.isinf(v):
|
||||
check_sample_var(sv, sn, v)
|
||||
## check_sample_meanvar( sm, m, msg + 'sample mean test')
|
||||
## check_sample_meanvar( sv, v, msg + 'sample var test')
|
||||
|
||||
def check_sample_mean(sm,v,n, popmean):
|
||||
"""
|
||||
from stats.stats.ttest_1samp(a, popmean):
|
||||
Calculates the t-obtained for the independent samples T-test on ONE group
|
||||
of scores a, given a population mean.
|
||||
|
||||
Returns: t-value, two-tailed prob
|
||||
"""
|
||||
## a = asarray(a)
|
||||
## x = np.mean(a)
|
||||
## v = np.var(a, ddof=1)
|
||||
## n = len(a)
|
||||
df = n-1
|
||||
svar = ((n-1)*v) / float(df) #looks redundant
|
||||
t = (sm-popmean)/np.sqrt(svar*(1.0/n))
|
||||
prob = stats.betai(0.5*df,0.5,df/(df+t*t))
|
||||
|
||||
#return t,prob
|
||||
npt.assert_(prob > 0.01, 'mean fail, t,prob = %f, %f, m,sm=%f,%f' % (t,prob,popmean,sm))
|
||||
|
||||
def check_sample_var(sv,n, popvar):
|
||||
'''
|
||||
two-sided chisquare test for sample variance equal to hypothesized variance
|
||||
'''
|
||||
df = n-1
|
||||
chi2 = (n-1)*popvar/float(popvar)
|
||||
pval = stats.chisqprob(chi2,df)*2
|
||||
npt.assert_(pval > 0.01, 'var fail, t,pval = %f, %f, v,sv=%f,%f' % (chi2,pval,popvar,sv))
|
||||
|
||||
|
||||
def check_sample_skew_kurt(distfn, arg, ss, sk, msg):
|
||||
skew,kurt = distfn.stats(moments='sk',*arg)
|
||||
## skew = distfn.stats(moment='s',*arg)[()]
|
||||
## kurt = distfn.stats(moment='k',*arg)[()]
|
||||
check_sample_meanvar( sk, kurt, msg + 'sample kurtosis test')
|
||||
check_sample_meanvar( ss, skew, msg + 'sample skew test')
|
||||
|
||||
def check_sample_meanvar(sm,m,msg):
|
||||
if not np.isinf(m) and not np.isnan(m):
|
||||
npt.assert_almost_equal(sm, m, decimal=DECIMAL, err_msg= msg + \
|
||||
' - finite moment')
|
||||
## else:
|
||||
## npt.assert_(abs(sm) > 10000), msg='infinite moment, sm = ' + str(sm))
|
||||
|
||||
@_silence_fp_errors
|
||||
def check_cdf_ppf(distfn,arg,msg):
|
||||
values = [0.001, 0.5, 0.999]
|
||||
npt.assert_almost_equal(distfn.cdf(distfn.ppf(values, *arg), *arg),
|
||||
values, decimal=DECIMAL, err_msg= msg + \
|
||||
' - cdf-ppf roundtrip')
|
||||
|
||||
@_silence_fp_errors
|
||||
def check_sf_isf(distfn,arg,msg):
|
||||
npt.assert_almost_equal(distfn.sf(distfn.isf([0.1,0.5,0.9], *arg), *arg),
|
||||
[0.1,0.5,0.9], decimal=DECIMAL, err_msg= msg + \
|
||||
' - sf-isf roundtrip')
|
||||
npt.assert_almost_equal(distfn.cdf([0.1,0.9], *arg),
|
||||
1.0-distfn.sf([0.1,0.9], *arg),
|
||||
decimal=DECIMAL, err_msg= msg + \
|
||||
' - cdf-sf relationship')
|
||||
|
||||
@_silence_fp_errors
|
||||
def check_pdf(distfn, arg, msg):
|
||||
# compares pdf at median with numerical derivative of cdf
|
||||
median = distfn.ppf(0.5, *arg)
|
||||
eps = 1e-6
|
||||
pdfv = distfn.pdf(median, *arg)
|
||||
if (pdfv < 1e-4) or (pdfv > 1e4):
|
||||
# avoid checking a case where pdf is close to zero or huge (singularity)
|
||||
median = median + 0.1
|
||||
pdfv = distfn.pdf(median, *arg)
|
||||
cdfdiff = (distfn.cdf(median + eps, *arg) -
|
||||
distfn.cdf(median - eps, *arg))/eps/2.0
|
||||
#replace with better diff and better test (more points),
|
||||
#actually, this works pretty well
|
||||
npt.assert_almost_equal(pdfv, cdfdiff,
|
||||
decimal=DECIMAL, err_msg= msg + ' - cdf-pdf relationship')
|
||||
|
||||
@_silence_fp_errors
|
||||
def check_pdf_logpdf(distfn, args, msg):
|
||||
# compares pdf at several points with the log of the pdf
|
||||
points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
|
||||
vals = distfn.ppf(points, *args)
|
||||
pdf = distfn.pdf(vals, *args)
|
||||
logpdf = distfn.logpdf(vals, *args)
|
||||
pdf = pdf[pdf != 0]
|
||||
logpdf = logpdf[np.isfinite(logpdf)]
|
||||
npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg + " - logpdf-log(pdf) relationship")
|
||||
|
||||
@_silence_fp_errors
|
||||
def check_sf_logsf(distfn, args, msg):
|
||||
# compares sf at several points with the log of the sf
|
||||
points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
|
||||
vals = distfn.ppf(points, *args)
|
||||
sf = distfn.sf(vals, *args)
|
||||
logsf = distfn.logsf(vals, *args)
|
||||
sf = sf[sf != 0]
|
||||
logsf = logsf[np.isfinite(logsf)]
|
||||
npt.assert_almost_equal(np.log(sf), logsf, decimal=7, err_msg=msg + " - logsf-log(sf) relationship")
|
||||
|
||||
@_silence_fp_errors
|
||||
def check_cdf_logcdf(distfn, args, msg):
|
||||
# compares cdf at several points with the log of the cdf
|
||||
points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
|
||||
vals = distfn.ppf(points, *args)
|
||||
cdf = distfn.cdf(vals, *args)
|
||||
logcdf = distfn.logcdf(vals, *args)
|
||||
cdf = cdf[cdf != 0]
|
||||
logcdf = logcdf[np.isfinite(logcdf)]
|
||||
npt.assert_almost_equal(np.log(cdf), logcdf, decimal=7, err_msg=msg + " - logcdf-log(cdf) relationship")
|
||||
|
||||
|
||||
@_silence_fp_errors
|
||||
def check_distribution_rvs(dist, args, alpha, rvs):
|
||||
#test from scipy.stats.tests
|
||||
#this version reuses existing random variables
|
||||
D,pval = stats.kstest(rvs, dist, args=args, N=1000)
|
||||
if (pval < alpha):
|
||||
D,pval = stats.kstest(dist,'',args=args, N=1000)
|
||||
npt.assert_(pval > alpha, "D = " + str(D) + "; pval = " + str(pval) +
|
||||
"; alpha = " + str(alpha) + "\nargs = " + str(args))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
#nose.run(argv=['', __file__])
|
||||
nose.runmodule(argv=[__file__,'-s'], exit=False)
|
@ -0,0 +1,102 @@
|
||||
# contains additional tests for continuous distributions
|
||||
#
|
||||
# NOTE: one test, _est_cont_skip, that is renamed so that nose doesn't
|
||||
# run it,
|
||||
# 6 distributions return nan for entropy
|
||||
# truncnorm fails by design for private method _ppf test
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
|
||||
import numpy.testing as npt
|
||||
import numpy as np
|
||||
|
||||
from wafo import stats
|
||||
|
||||
from test_continuous_basic import distcont
|
||||
|
||||
DECIMAL = 5
|
||||
|
||||
@npt.dec.slow
|
||||
def test_cont_extra():
|
||||
for distname, arg in distcont[:]:
|
||||
distfn = getattr(stats, distname)
|
||||
|
||||
yield check_ppf_limits, distfn, arg, distname + \
|
||||
' ppf limit test'
|
||||
yield check_isf_limits, distfn, arg, distname + \
|
||||
' isf limit test'
|
||||
yield check_loc_scale, distfn, arg, distname + \
|
||||
' loc, scale test'
|
||||
|
||||
@npt.dec.slow
|
||||
def _est_cont_skip():
|
||||
for distname, arg in distcont:
|
||||
distfn = getattr(stats, distname)
|
||||
#entropy test checks only for isnan, currently 6 isnan left
|
||||
yield check_entropy, distfn, arg, distname + \
|
||||
' entropy nan test'
|
||||
# _ppf test has 1 failure be design
|
||||
yield check_ppf_private, distfn, arg, distname + \
|
||||
' _ppf private test'
|
||||
|
||||
def test_540_567():
|
||||
# test for nan returned in tickets 540, 567
|
||||
npt.assert_almost_equal(stats.norm.cdf(-1.7624320982),0.03899815971089126,
|
||||
decimal=10, err_msg = 'test_540_567')
|
||||
npt.assert_almost_equal(stats.norm.cdf(-1.7624320983),0.038998159702449846,
|
||||
decimal=10, err_msg = 'test_540_567')
|
||||
npt.assert_almost_equal(stats.norm.cdf(1.38629436112, loc=0.950273420309,
|
||||
scale=0.204423758009),0.98353464004309321,
|
||||
decimal=10, err_msg = 'test_540_567')
|
||||
|
||||
|
||||
def check_ppf_limits(distfn,arg,msg):
|
||||
below,low,upp,above = distfn.ppf([-1,0,1,2], *arg)
|
||||
#print distfn.name, distfn.a, low, distfn.b, upp
|
||||
#print distfn.name,below,low,upp,above
|
||||
assert_equal_inf_nan(distfn.a,low, msg + 'ppf lower bound')
|
||||
assert_equal_inf_nan(distfn.b,upp, msg + 'ppf upper bound')
|
||||
npt.assert_(np.isnan(below), msg + 'ppf out of bounds - below')
|
||||
npt.assert_(np.isnan(above), msg + 'ppf out of bounds - above')
|
||||
|
||||
def check_ppf_private(distfn,arg,msg):
|
||||
#fails by design for trunk norm self.nb not defined
|
||||
ppfs = distfn._ppf(np.array([0.1,0.5,0.9]), *arg)
|
||||
npt.assert_(not np.any(np.isnan(ppfs)), msg + 'ppf private is nan')
|
||||
|
||||
|
||||
def check_isf_limits(distfn,arg,msg):
|
||||
below,low,upp,above = distfn.isf([-1,0,1,2], *arg)
|
||||
#print distfn.name, distfn.a, low, distfn.b, upp
|
||||
#print distfn.name,below,low,upp,above
|
||||
assert_equal_inf_nan(distfn.a,upp, msg + 'isf lower bound')
|
||||
assert_equal_inf_nan(distfn.b,low, msg + 'isf upper bound')
|
||||
npt.assert_(np.isnan(below), msg + 'isf out of bounds - below')
|
||||
npt.assert_(np.isnan(above), msg + 'isf out of bounds - above')
|
||||
|
||||
|
||||
def check_loc_scale(distfn,arg,msg):
|
||||
m,v = distfn.stats(*arg)
|
||||
loc, scale = 10.0, 10.0
|
||||
mt,vt = distfn.stats(loc=loc, scale=scale, *arg)
|
||||
assert_equal_inf_nan(m*scale+loc,mt,msg + 'mean')
|
||||
assert_equal_inf_nan(v*scale*scale,vt,msg + 'var')
|
||||
|
||||
def check_entropy(distfn,arg,msg):
|
||||
ent = distfn.entropy(*arg)
|
||||
#print 'Entropy =', ent
|
||||
npt.assert_(not np.isnan(ent), msg + 'test Entropy is nan')
|
||||
|
||||
def assert_equal_inf_nan(v1,v2,msg):
|
||||
npt.assert_(not np.isnan(v1))
|
||||
if not np.isinf(v1):
|
||||
npt.assert_almost_equal(v1, v2, decimal=DECIMAL, err_msg = msg + \
|
||||
' - finite')
|
||||
else:
|
||||
npt.assert_(np.isinf(v2) or np.isnan(v2),
|
||||
msg + ' - infinite, v2=%s' % str(v2))
|
||||
|
||||
if __name__ == "__main__":
|
||||
import nose
|
||||
#nose.run(argv=['', __file__])
|
||||
nose.runmodule(argv=[__file__,'-s'], exit=False)
|
@ -0,0 +1,268 @@
|
||||
import numpy.testing as npt
|
||||
import numpy as np
|
||||
import nose
|
||||
|
||||
from wafo import stats
|
||||
|
||||
DECIMAL_meanvar = 0#1 # was 0
|
||||
|
||||
distdiscrete = [
|
||||
['bernoulli',(0.3,)],
|
||||
['binom', (5, 0.4)],
|
||||
['boltzmann',(1.4, 19)],
|
||||
['dlaplace', (0.8,)], #0.5
|
||||
['geom', (0.5,)],
|
||||
['hypergeom',(30, 12, 6)],
|
||||
['hypergeom',(21,3,12)], #numpy.random (3,18,12) numpy ticket:921
|
||||
['hypergeom',(21,18,11)], #numpy.random (18,3,11) numpy ticket:921
|
||||
['logser', (0.6,)], # reenabled, numpy ticket:921
|
||||
['nbinom', (5, 0.5)],
|
||||
['nbinom', (0.4, 0.4)], #from tickets: 583
|
||||
['planck', (0.51,)], #4.1
|
||||
['poisson', (0.6,)],
|
||||
['randint', (7, 31)],
|
||||
['skellam', (15, 8)]]
|
||||
# ['zipf', (4,)] ] # arg=4 is ok,
|
||||
# Zipf broken for arg = 2, e.g. weird .stats
|
||||
# looking closer, mean, var should be inf for arg=2
|
||||
|
||||
|
||||
#@npt.dec.slow
|
||||
def test_discrete_basic():
|
||||
for distname, arg in distdiscrete:
|
||||
distfn = getattr(stats,distname)
|
||||
#assert stats.dlaplace.rvs(0.8) is not None
|
||||
np.random.seed(9765456)
|
||||
rvs = distfn.rvs(size=2000,*arg)
|
||||
supp = np.unique(rvs)
|
||||
m,v = distfn.stats(*arg)
|
||||
#yield npt.assert_almost_equal(rvs.mean(), m, decimal=4,err_msg='mean')
|
||||
#yield npt.assert_almost_equal, rvs.mean(), m, 2, 'mean' # does not work
|
||||
yield check_sample_meanvar, rvs.mean(), m, distname + ' sample mean test'
|
||||
yield check_sample_meanvar, rvs.var(), v, distname + ' sample var test'
|
||||
yield check_cdf_ppf, distfn, arg, distname + ' cdf_ppf'
|
||||
yield check_cdf_ppf2, distfn, arg, supp, distname + ' cdf_ppf'
|
||||
yield check_pmf_cdf, distfn, arg, distname + ' pmf_cdf'
|
||||
|
||||
# zipf doesn't fail, but generates floating point warnings.
|
||||
# Should be checked.
|
||||
if not distname in ['zipf']:
|
||||
yield check_oth, distfn, arg, distname + ' oth'
|
||||
skurt = stats.kurtosis(rvs)
|
||||
sskew = stats.skew(rvs)
|
||||
yield check_sample_skew_kurt, distfn, arg, skurt, sskew, \
|
||||
distname + ' skew_kurt'
|
||||
|
||||
# dlaplace doesn't fail, but generates lots of floating point warnings.
|
||||
# Should be checked.
|
||||
if not distname in ['dlaplace']: #['logser']: #known failure, fixed
|
||||
alpha = 0.01
|
||||
yield check_discrete_chisquare, distfn, arg, rvs, alpha, \
|
||||
distname + ' chisquare'
|
||||
|
||||
@npt.dec.slow
|
||||
def test_discrete_extra():
|
||||
for distname, arg in distdiscrete:
|
||||
distfn = getattr(stats,distname)
|
||||
yield check_ppf_limits, distfn, arg, distname + \
|
||||
' ppf limit test'
|
||||
yield check_isf_limits, distfn, arg, distname + \
|
||||
' isf limit test'
|
||||
yield check_entropy, distfn, arg, distname + \
|
||||
' entropy nan test'
|
||||
|
||||
@npt.dec.skipif(True)
|
||||
def test_discrete_private():
|
||||
#testing private methods mostly for debugging
|
||||
# some tests might fail by design,
|
||||
# e.g. incorrect definition of distfn.a and distfn.b
|
||||
for distname, arg in distdiscrete:
|
||||
distfn = getattr(stats,distname)
|
||||
rvs = distfn.rvs(size=10000,*arg)
|
||||
m,v = distfn.stats(*arg)
|
||||
|
||||
yield check_ppf_ppf, distfn, arg
|
||||
yield check_cdf_ppf_private, distfn, arg, distname
|
||||
yield check_generic_moment, distfn, arg, m, 1, 3 # last is decimal
|
||||
yield check_generic_moment, distfn, arg, v+m*m, 2, 3 # last is decimal
|
||||
yield check_moment_frozen, distfn, arg, m, 1, 3 # last is decimal
|
||||
yield check_moment_frozen, distfn, arg, v+m*m, 2, 3 # last is decimal
|
||||
|
||||
|
||||
def check_sample_meanvar(sm,m,msg):
|
||||
if not np.isinf(m):
|
||||
npt.assert_almost_equal(sm, m, decimal=DECIMAL_meanvar, err_msg=msg + \
|
||||
' - finite moment')
|
||||
else:
|
||||
npt.assert_(sm > 10000, msg='infinite moment, sm = ' + str(sm))
|
||||
|
||||
def check_sample_var(sm,m,msg):
|
||||
npt.assert_almost_equal(sm, m, decimal=DECIMAL_meanvar, err_msg= msg + 'var')
|
||||
|
||||
def check_cdf_ppf(distfn,arg,msg):
|
||||
ppf05 = distfn.ppf(0.5,*arg)
|
||||
cdf05 = distfn.cdf(ppf05,*arg)
|
||||
npt.assert_almost_equal(distfn.ppf(cdf05-1e-6,*arg),ppf05,
|
||||
err_msg=msg + 'ppf-cdf-median')
|
||||
npt.assert_((distfn.ppf(cdf05+1e-4,*arg)>ppf05), msg + 'ppf-cdf-next')
|
||||
|
||||
def check_cdf_ppf2(distfn,arg,supp,msg):
|
||||
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp,*arg),*arg),
|
||||
supp, msg + '-roundtrip')
|
||||
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp,*arg)-1e-8,*arg),
|
||||
supp, msg + '-roundtrip')
|
||||
# -1e-8 could cause an error if pmf < 1e-8
|
||||
|
||||
|
||||
def check_cdf_ppf_private(distfn,arg,msg):
|
||||
ppf05 = distfn._ppf(0.5,*arg)
|
||||
cdf05 = distfn.cdf(ppf05,*arg)
|
||||
npt.assert_almost_equal(distfn._ppf(cdf05-1e-6,*arg),ppf05,
|
||||
err_msg=msg + '_ppf-cdf-median ')
|
||||
npt.assert_((distfn._ppf(cdf05+1e-4,*arg)>ppf05), msg + '_ppf-cdf-next')
|
||||
|
||||
def check_ppf_ppf(distfn, arg):
|
||||
npt.assert_(distfn.ppf(0.5,*arg) < np.inf)
|
||||
ppfs = distfn.ppf([0.5,0.9],*arg)
|
||||
ppf_s = [distfn._ppf(0.5,*arg), distfn._ppf(0.9,*arg)]
|
||||
npt.assert_(np.all(ppfs < np.inf))
|
||||
npt.assert_(ppf_s[0] == distfn.ppf(0.5,*arg))
|
||||
npt.assert_(ppf_s[1] == distfn.ppf(0.9,*arg))
|
||||
npt.assert_(ppf_s[0] == ppfs[0])
|
||||
npt.assert_(ppf_s[1] == ppfs[1])
|
||||
|
||||
def check_pmf_cdf(distfn, arg, msg):
|
||||
startind = np.int(distfn._ppf(0.01,*arg)-1)
|
||||
index = range(startind,startind+10)
|
||||
cdfs = distfn.cdf(index,*arg)
|
||||
npt.assert_almost_equal(cdfs, distfn.pmf(index, *arg).cumsum() + \
|
||||
cdfs[0] - distfn.pmf(index[0],*arg),
|
||||
decimal=4, err_msg= msg + 'pmf-cdf')
|
||||
|
||||
def check_generic_moment(distfn, arg, m, k, decim):
|
||||
npt.assert_almost_equal(distfn.generic_moment(k,*arg), m, decimal=decim,
|
||||
err_msg= str(distfn) + ' generic moment test')
|
||||
|
||||
def check_moment_frozen(distfn, arg, m, k, decim):
|
||||
npt.assert_almost_equal(distfn(*arg).moment(k), m, decimal=decim,
|
||||
err_msg= str(distfn) + ' frozen moment test')
|
||||
|
||||
def check_oth(distfn, arg, msg):
|
||||
#checking other methods of distfn
|
||||
meanint = round(float(distfn.stats(*arg)[0])) # closest integer to mean
|
||||
npt.assert_almost_equal(distfn.sf(meanint, *arg), 1 - \
|
||||
distfn.cdf(meanint, *arg), decimal=8)
|
||||
median_sf = distfn.isf(0.5, *arg)
|
||||
|
||||
npt.assert_(distfn.sf(median_sf - 1, *arg) > 0.5)
|
||||
npt.assert_(distfn.cdf(median_sf + 1, *arg) > 0.5)
|
||||
npt.assert_equal(distfn.isf(0.5, *arg), distfn.ppf(0.5, *arg))
|
||||
|
||||
#next 3 functions copied from test_continous_extra
|
||||
# adjusted
|
||||
|
||||
def check_ppf_limits(distfn,arg,msg):
|
||||
below,low,upp,above = distfn.ppf([-1,0,1,2], *arg)
|
||||
#print distfn.name, distfn.a, low, distfn.b, upp
|
||||
#print distfn.name,below,low,upp,above
|
||||
assert_equal_inf_nan(distfn.a-1,low, msg + 'ppf lower bound')
|
||||
assert_equal_inf_nan(distfn.b,upp, msg + 'ppf upper bound')
|
||||
npt.assert_(np.isnan(below), msg + 'ppf out of bounds - below')
|
||||
npt.assert_(np.isnan(above), msg + 'ppf out of bounds - above')
|
||||
|
||||
def check_isf_limits(distfn,arg,msg):
|
||||
below,low,upp,above = distfn.isf([-1,0,1,2], *arg)
|
||||
#print distfn.name, distfn.a, low, distfn.b, upp
|
||||
#print distfn.name,below,low,upp,above
|
||||
assert_equal_inf_nan(distfn.a-1,upp, msg + 'isf lower bound')
|
||||
assert_equal_inf_nan(distfn.b,low, msg + 'isf upper bound')
|
||||
npt.assert_(np.isnan(below), msg + 'isf out of bounds - below')
|
||||
npt.assert_(np.isnan(above), msg + 'isf out of bounds - above')
|
||||
|
||||
def assert_equal_inf_nan(v1,v2,msg):
|
||||
npt.assert_(not np.isnan(v1))
|
||||
if not np.isinf(v1):
|
||||
npt.assert_almost_equal(v1, v2, decimal=10, err_msg = msg + \
|
||||
' - finite')
|
||||
else:
|
||||
npt.assert_(np.isinf(v2) or np.isnan(v2),
|
||||
msg + ' - infinite, v2=%s' % str(v2))
|
||||
|
||||
def check_sample_skew_kurt(distfn, arg, sk, ss, msg):
|
||||
k,s = distfn.stats(moment='ks',*arg)
|
||||
check_sample_meanvar, sk, k, msg + 'sample skew test'
|
||||
check_sample_meanvar, ss, s, msg + 'sample kurtosis test'
|
||||
|
||||
|
||||
def check_entropy(distfn,arg,msg):
|
||||
ent = distfn.entropy(*arg)
|
||||
#print 'Entropy =', ent
|
||||
npt.assert_(not np.isnan(ent), msg + 'test Entropy is nan')
|
||||
|
||||
|
||||
def check_discrete_chisquare(distfn, arg, rvs, alpha, msg):
|
||||
'''perform chisquare test for random sample of a discrete distribution
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distname : string
|
||||
name of distribution function
|
||||
arg : sequence
|
||||
parameters of distribution
|
||||
alpha : float
|
||||
significance level, threshold for p-value
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : bool
|
||||
0 if test passes, 1 if test fails
|
||||
|
||||
uses global variable debug for printing results
|
||||
'''
|
||||
|
||||
# define parameters for test
|
||||
## n=2000
|
||||
n = len(rvs)
|
||||
nsupp = 20
|
||||
wsupp = 1.0/nsupp
|
||||
|
||||
## distfn = getattr(stats, distname)
|
||||
## np.random.seed(9765456)
|
||||
## rvs = distfn.rvs(size=n,*arg)
|
||||
|
||||
# construct intervals with minimum mass 1/nsupp
|
||||
# intervalls are left-half-open as in a cdf difference
|
||||
distsupport = xrange(max(distfn.a, -1000), min(distfn.b, 1000) + 1)
|
||||
last = 0
|
||||
distsupp = [max(distfn.a, -1000)]
|
||||
distmass = []
|
||||
for ii in distsupport:
|
||||
current = distfn.cdf(ii,*arg)
|
||||
if current - last >= wsupp-1e-14:
|
||||
distsupp.append(ii)
|
||||
distmass.append(current - last)
|
||||
last = current
|
||||
if current > (1-wsupp):
|
||||
break
|
||||
if distsupp[-1] < distfn.b:
|
||||
distsupp.append(distfn.b)
|
||||
distmass.append(1-last)
|
||||
distsupp = np.array(distsupp)
|
||||
distmass = np.array(distmass)
|
||||
|
||||
# convert intervals to right-half-open as required by histogram
|
||||
histsupp = distsupp+1e-8
|
||||
histsupp[0] = distfn.a
|
||||
|
||||
# find sample frequencies and perform chisquare test
|
||||
freq,hsupp = np.histogram(rvs,histsupp)
|
||||
cdfs = distfn.cdf(distsupp,*arg)
|
||||
(chis,pval) = stats.chisquare(np.array(freq),n*distmass)
|
||||
|
||||
npt.assert_(pval > alpha, 'chisquare - test for %s'
|
||||
' at arg = %s with pval = %s' % (msg,str(arg),str(pval)))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
#nose.run(argv=['', __file__])
|
||||
nose.runmodule(argv=[__file__,'-s'], exit=False)
|
@ -0,0 +1,98 @@
|
||||
# NOTE: contains only one test, _est_cont_fit, that is renamed so that
|
||||
# nose doesn't run it
|
||||
# I put this here for the record and for the case when someone wants to
|
||||
# verify the quality of fit
|
||||
# with current parameters: relatively small sample size, default starting values
|
||||
# Ran 84 tests in 401.797s
|
||||
# FAILED (failures=15)
|
||||
#
|
||||
#Ran 83 tests in 238.859s
|
||||
#FAILED (failures=12)
|
||||
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import numpy.testing as npt
|
||||
import numpy as np
|
||||
|
||||
from wafo import stats
|
||||
|
||||
from test_continuous_basic import distcont
|
||||
|
||||
# this is not a proper statistical test for convergence, but only
|
||||
# verifies that the estimate and true values don't differ by too much
|
||||
n_repl1 = 1000 # sample size for first run
|
||||
n_repl2 = 5000 # sample size for second run, if first run fails
|
||||
thresh_percent = 0.25 # percent of true parameters for fail cut-off
|
||||
thresh_min = 0.75 # minimum difference estimate - true to fail test
|
||||
|
||||
|
||||
distslow = [ 'ncx2', 'rdist', 'gausshyper', 'recipinvgauss', 'ksone', 'genexpon',
|
||||
'vonmises', 'rice', 'mielke',
|
||||
'powerlognorm', 'kstwobign', 'tukeylambda','betaprime', 'gengamma',
|
||||
'johnsonsb', 'burr', 'truncexpon', 'pearson3', 'exponweib', 'nakagami',
|
||||
'wrapcauchy']
|
||||
dist_rarely_fitted = ['f', 'ncf', 'nct', 'chi']
|
||||
distskip = distslow + dist_rarely_fitted
|
||||
|
||||
#distcont = [['genextreme', (3.3184017469423535,)]]
|
||||
#@npt.dec.slow
|
||||
def test_cont_fit():
|
||||
# this tests the closeness of the estimated parameters to the true
|
||||
# parameters with fit method of continuous distributions
|
||||
for distname, arg in distcont:
|
||||
if distname not in distskip:
|
||||
yield check_cont_fit, distname,arg
|
||||
|
||||
@npt.dec.slow
|
||||
def _est_cont_fit_slow():
|
||||
# this tests the closeness of the estimated parameters to the true
|
||||
# parameters with fit method of continuous distributions
|
||||
# Note: is slow, some distributions don't converge with sample size <= 10000
|
||||
for distname, arg in distcont:
|
||||
if distname in distslow:
|
||||
yield check_cont_fit, distname,arg
|
||||
|
||||
def test_lognorm_fit_ticket1131():
|
||||
params = [(2.1, 1.,1.), (1.,10.,1.), (1.,1.,10.)]
|
||||
for param in params:
|
||||
yield check_cont_fit, 'lognorm', param
|
||||
|
||||
def check_cont_fit(distname,arg):
|
||||
distfn = getattr(stats, distname)
|
||||
rvs = distfn.rvs(size=n_repl1,*arg)
|
||||
est = distfn.fit(rvs) #, *arg) # start with default values
|
||||
n = distfn.numargs + 2
|
||||
truearg = np.hstack([arg,[0.0, 1.0]])[:n]
|
||||
|
||||
diff = est-truearg
|
||||
|
||||
txt = ''
|
||||
diffthreshold = np.max(np.vstack([truearg*thresh_percent,
|
||||
np.ones(distfn.numargs+2)*thresh_min]),0)
|
||||
# threshold for location
|
||||
diffthreshold[-2] = np.max([np.abs(rvs.mean())*thresh_percent,thresh_min])
|
||||
|
||||
if np.any(np.isnan(est)):
|
||||
raise AssertionError('nan returned in fit')
|
||||
else:
|
||||
if np.any((np.abs(diff) - diffthreshold) > 0.0):
|
||||
## txt = 'WARNING - diff too large with small sample'
|
||||
## print 'parameter diff =', diff - diffthreshold, txt
|
||||
rvs = np.concatenate([rvs,distfn.rvs(size=n_repl2-n_repl1,*arg)])
|
||||
est = distfn.fit(rvs) #,*arg)
|
||||
truearg = np.hstack([arg,[0.0,1.0]])[:n]
|
||||
diff = est-truearg
|
||||
if np.any((np.abs(diff) - diffthreshold) > 0.0):
|
||||
txt = 'parameter: %s\n' % str(truearg)
|
||||
txt += 'estimated: %s\n' % str(est)
|
||||
txt += 'diff : %s\n' % str(diff)
|
||||
raise AssertionError('fit not very good in %s\n' % distfn.name + txt)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_cont_fit('bradford', (0.29891359763170633,))
|
||||
# check_cont_fit('lognorm', (10,1,1))
|
||||
# check_cont_fit('ncx2', (21, 1.0560465975116415))
|
||||
import nose
|
||||
#nose.run(argv=['', __file__])
|
||||
nose.runmodule(argv=[__file__,'-s'], exit=False)
|
@ -0,0 +1,36 @@
|
||||
|
||||
|
||||
|
||||
from wafo import stats
|
||||
import numpy as np
|
||||
from numpy.testing import assert_almost_equal, assert_
|
||||
|
||||
def test_kde_1d():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
xn = np.random.randn(n_basesample)
|
||||
xnmean = xn.mean()
|
||||
xnstd = xn.std(ddof=1)
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
# evaluate the density funtion for the kde for some points
|
||||
xs = np.linspace(-7,7,501)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
|
||||
intervall = xs[1] - xs[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
|
||||
prob1 = gkde.integrate_box_1d(xnmean, np.inf)
|
||||
prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
|
||||
assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
|
||||
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*intervall, decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
|
||||
(kdepdf*normpdf).sum()*intervall, decimal=2)
|
@ -0,0 +1,490 @@
|
||||
"""
|
||||
Tests for the stats.mstats module (support for maskd arrays)
|
||||
"""
|
||||
|
||||
|
||||
import numpy as np
|
||||
from numpy import nan
|
||||
import numpy.ma as ma
|
||||
from numpy.ma import masked, nomask
|
||||
|
||||
import scipy.stats.mstats as mstats
|
||||
from numpy.testing import TestCase, run_module_suite
|
||||
from numpy.ma.testutils import assert_equal, assert_almost_equal, \
|
||||
assert_array_almost_equal, assert_
|
||||
|
||||
|
||||
class TestMquantiles(TestCase):
|
||||
"""Regression tests for mstats module."""
|
||||
def test_mquantiles_limit_keyword(self):
|
||||
"""Ticket #867"""
|
||||
data = np.array([[ 6., 7., 1.],
|
||||
[ 47., 15., 2.],
|
||||
[ 49., 36., 3.],
|
||||
[ 15., 39., 4.],
|
||||
[ 42., 40., -999.],
|
||||
[ 41., 41., -999.],
|
||||
[ 7., -999., -999.],
|
||||
[ 39., -999., -999.],
|
||||
[ 43., -999., -999.],
|
||||
[ 40., -999., -999.],
|
||||
[ 36., -999., -999.]])
|
||||
desired = [[19.2, 14.6, 1.45],
|
||||
[40.0, 37.5, 2.5 ],
|
||||
[42.8, 40.05, 3.55]]
|
||||
quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
|
||||
assert_almost_equal(quants, desired)
|
||||
|
||||
|
||||
|
||||
class TestGMean(TestCase):
|
||||
def test_1D(self):
|
||||
a = (1,2,3,4)
|
||||
actual= mstats.gmean(a)
|
||||
desired = np.power(1*2*3*4,1./4.)
|
||||
assert_almost_equal(actual, desired,decimal=14)
|
||||
|
||||
desired1 = mstats.gmean(a,axis=-1)
|
||||
assert_almost_equal(actual, desired1, decimal=14)
|
||||
assert_(not isinstance(desired1, ma.MaskedArray))
|
||||
#
|
||||
a = ma.array((1,2,3,4),mask=(0,0,0,1))
|
||||
actual= mstats.gmean(a)
|
||||
desired = np.power(1*2*3,1./3.)
|
||||
assert_almost_equal(actual, desired,decimal=14)
|
||||
|
||||
desired1 = mstats.gmean(a,axis=-1)
|
||||
assert_almost_equal(actual, desired1, decimal=14)
|
||||
#
|
||||
def test_2D(self):
|
||||
a = ma.array(((1,2,3,4),(1,2,3,4),(1,2,3,4)),
|
||||
mask=((0,0,0,0),(1,0,0,1),(0,1,1,0)))
|
||||
actual= mstats.gmean(a)
|
||||
desired = np.array((1,2,3,4))
|
||||
assert_array_almost_equal(actual, desired, decimal=14)
|
||||
#
|
||||
desired1 = mstats.gmean(a,axis=0)
|
||||
assert_array_almost_equal(actual, desired1, decimal=14)
|
||||
#
|
||||
actual= mstats.gmean(a, -1)
|
||||
desired = ma.array((np.power(1*2*3*4,1./4.),
|
||||
np.power(2*3,1./2.),
|
||||
np.power(1*4,1./2.)))
|
||||
assert_array_almost_equal(actual, desired, decimal=14)
|
||||
|
||||
class TestHMean(TestCase):
|
||||
def test_1D(self):
|
||||
a = (1,2,3,4)
|
||||
actual= mstats.hmean(a)
|
||||
desired = 4. / (1./1 + 1./2 + 1./3 + 1./4)
|
||||
assert_almost_equal(actual, desired, decimal=14)
|
||||
desired1 = mstats.hmean(ma.array(a),axis=-1)
|
||||
assert_almost_equal(actual, desired1, decimal=14)
|
||||
#
|
||||
a = ma.array((1,2,3,4),mask=(0,0,0,1))
|
||||
actual= mstats.hmean(a)
|
||||
desired = 3. / (1./1 + 1./2 + 1./3)
|
||||
assert_almost_equal(actual, desired,decimal=14)
|
||||
desired1 = mstats.hmean(a,axis=-1)
|
||||
assert_almost_equal(actual, desired1, decimal=14)
|
||||
|
||||
def test_2D(self):
|
||||
a = ma.array(((1,2,3,4),(1,2,3,4),(1,2,3,4)),
|
||||
mask=((0,0,0,0),(1,0,0,1),(0,1,1,0)))
|
||||
actual= mstats.hmean(a)
|
||||
desired = ma.array((1,2,3,4))
|
||||
assert_array_almost_equal(actual, desired, decimal=14)
|
||||
#
|
||||
actual1 = mstats.hmean(a,axis=-1)
|
||||
desired = (4./(1/1.+1/2.+1/3.+1/4.),
|
||||
2./(1/2.+1/3.),
|
||||
2./(1/1.+1/4.)
|
||||
)
|
||||
assert_array_almost_equal(actual1, desired, decimal=14)
|
||||
|
||||
|
||||
class TestRanking(TestCase):
|
||||
#
|
||||
def __init__(self, *args, **kwargs):
|
||||
TestCase.__init__(self, *args, **kwargs)
|
||||
#
|
||||
def test_ranking(self):
|
||||
x = ma.array([0,1,1,1,2,3,4,5,5,6,])
|
||||
assert_almost_equal(mstats.rankdata(x),[1,3,3,3,5,6,7,8.5,8.5,10])
|
||||
x[[3,4]] = masked
|
||||
assert_almost_equal(mstats.rankdata(x),[1,2.5,2.5,0,0,4,5,6.5,6.5,8])
|
||||
assert_almost_equal(mstats.rankdata(x,use_missing=True),
|
||||
[1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8])
|
||||
x = ma.array([0,1,5,1,2,4,3,5,1,6,])
|
||||
assert_almost_equal(mstats.rankdata(x),[1,3,8.5,3,5,7,6,8.5,3,10])
|
||||
x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]])
|
||||
assert_almost_equal(mstats.rankdata(x),[[1,3,3,3,5],[6,7,8.5,8.5,10]])
|
||||
assert_almost_equal(mstats.rankdata(x,axis=1),[[1,3,3,3,5],[1,2,3.5,3.5,5]])
|
||||
assert_almost_equal(mstats.rankdata(x,axis=0),[[1,1,1,1,1],[2,2,2,2,2,]])
|
||||
|
||||
|
||||
class TestCorr(TestCase):
|
||||
#
|
||||
def test_pearsonr(self):
|
||||
"Tests some computations of Pearson's r"
|
||||
x = ma.arange(10)
|
||||
olderr = np.seterr(all='ignore')
|
||||
try:
|
||||
assert_almost_equal(mstats.pearsonr(x,x)[0], 1.0)
|
||||
assert_almost_equal(mstats.pearsonr(x,x[::-1])[0], -1.0)
|
||||
|
||||
x = ma.array(x, mask=True)
|
||||
pr = mstats.pearsonr(x,x)
|
||||
finally:
|
||||
np.seterr(**olderr)
|
||||
assert_(pr[0] is masked)
|
||||
assert_(pr[1] is masked)
|
||||
#
|
||||
def test_spearmanr(self):
|
||||
"Tests some computations of Spearman's rho"
|
||||
(x, y) = ([5.05,6.75,3.21,2.66],[1.65,2.64,2.64,6.95])
|
||||
assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
|
||||
(x, y) = ([5.05,6.75,3.21,2.66,np.nan],[1.65,2.64,2.64,6.95,np.nan])
|
||||
(x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
|
||||
assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
|
||||
#
|
||||
x = [ 2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
|
||||
1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7]
|
||||
y = [22.6, 08.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
|
||||
0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4]
|
||||
assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
|
||||
x = [ 2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
|
||||
1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan]
|
||||
y = [22.6, 08.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
|
||||
0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan]
|
||||
(x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
|
||||
assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
|
||||
#
|
||||
def test_kendalltau(self):
|
||||
"Tests some computations of Kendall's tau"
|
||||
x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan])
|
||||
y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
|
||||
z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
|
||||
assert_almost_equal(np.asarray(mstats.kendalltau(x,y)),
|
||||
[+0.3333333,0.4969059])
|
||||
assert_almost_equal(np.asarray(mstats.kendalltau(x,z)),
|
||||
[-0.5477226,0.2785987])
|
||||
#
|
||||
x = ma.fix_invalid([ 0, 0, 0, 0,20,20, 0,60, 0,20,
|
||||
10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan])
|
||||
y = ma.fix_invalid([ 0,80,80,80,10,33,60, 0,67,27,
|
||||
25,80,80,80,80,80,80, 0,10,45, np.nan, 0])
|
||||
result = mstats.kendalltau(x,y)
|
||||
assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
|
||||
#
|
||||
def test_kendalltau_seasonal(self):
|
||||
"Tests the seasonal Kendall tau."
|
||||
x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
|
||||
[ 4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
|
||||
[ 3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
|
||||
[nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
|
||||
x = ma.fix_invalid(x).T
|
||||
output = mstats.kendalltau_seasonal(x)
|
||||
assert_almost_equal(output['global p-value (indep)'], 0.008, 3)
|
||||
assert_almost_equal(output['seasonal p-value'].round(2),
|
||||
[0.18,0.53,0.20,0.04])
|
||||
#
|
||||
def test_pointbiserial(self):
|
||||
"Tests point biserial"
|
||||
x = [1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,
|
||||
0,0,0,0,1,-1]
|
||||
y = [14.8,13.8,12.4,10.1,7.1,6.1,5.8,4.6,4.3,3.5,3.3,3.2,3.0,
|
||||
2.8,2.8,2.5,2.4,2.3,2.1,1.7,1.7,1.5,1.3,1.3,1.2,1.2,1.1,
|
||||
0.8,0.7,0.6,0.5,0.2,0.2,0.1,np.nan]
|
||||
assert_almost_equal(mstats.pointbiserialr(x, y)[0], 0.36149, 5)
|
||||
|
||||
|
||||
class TestTrimming(TestCase):
|
||||
#
|
||||
def test_trim(self):
|
||||
"Tests trimming"
|
||||
a = ma.arange(10)
|
||||
assert_equal(mstats.trim(a), [0,1,2,3,4,5,6,7,8,9])
|
||||
a = ma.arange(10)
|
||||
assert_equal(mstats.trim(a,(2,8)), [None,None,2,3,4,5,6,7,8,None])
|
||||
a = ma.arange(10)
|
||||
assert_equal(mstats.trim(a,limits=(2,8),inclusive=(False,False)),
|
||||
[None,None,None,3,4,5,6,7,None,None])
|
||||
a = ma.arange(10)
|
||||
assert_equal(mstats.trim(a,limits=(0.1,0.2),relative=True),
|
||||
[None,1,2,3,4,5,6,7,None,None])
|
||||
#
|
||||
a = ma.arange(12)
|
||||
a[[0,-1]] = a[5] = masked
|
||||
assert_equal(mstats.trim(a,(2,8)),
|
||||
[None,None,2,3,4,None,6,7,8,None,None,None])
|
||||
#
|
||||
x = ma.arange(100).reshape(10,10)
|
||||
trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=None)
|
||||
assert_equal(trimx._mask.ravel(),[1]*10+[0]*70+[1]*20)
|
||||
trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=0)
|
||||
assert_equal(trimx._mask.ravel(),[1]*10+[0]*70+[1]*20)
|
||||
trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=-1)
|
||||
assert_equal(trimx._mask.T.ravel(),[1]*10+[0]*70+[1]*20)
|
||||
#
|
||||
x = ma.arange(110).reshape(11,10)
|
||||
x[1] = masked
|
||||
trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=None)
|
||||
assert_equal(trimx._mask.ravel(),[1]*20+[0]*70+[1]*20)
|
||||
trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=0)
|
||||
assert_equal(trimx._mask.ravel(),[1]*20+[0]*70+[1]*20)
|
||||
trimx = mstats.trim(x.T,(0.1,0.2),relative=True,axis=-1)
|
||||
assert_equal(trimx.T._mask.ravel(),[1]*20+[0]*70+[1]*20)
|
||||
#
|
||||
def test_trim_old(self):
|
||||
"Tests trimming."
|
||||
x = ma.arange(100)
|
||||
assert_equal(mstats.trimboth(x).count(), 60)
|
||||
assert_equal(mstats.trimtail(x,tail='r').count(), 80)
|
||||
x[50:70] = masked
|
||||
trimx = mstats.trimboth(x)
|
||||
assert_equal(trimx.count(), 48)
|
||||
assert_equal(trimx._mask, [1]*16 + [0]*34 + [1]*20 + [0]*14 + [1]*16)
|
||||
x._mask = nomask
|
||||
x.shape = (10,10)
|
||||
assert_equal(mstats.trimboth(x).count(), 60)
|
||||
assert_equal(mstats.trimtail(x).count(), 80)
|
||||
#
|
||||
def test_trimmedmean(self):
|
||||
"Tests the trimmed mean."
|
||||
data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
|
||||
296,299,306,376,428,515,666,1310,2611])
|
||||
assert_almost_equal(mstats.trimmed_mean(data,0.1), 343, 0)
|
||||
assert_almost_equal(mstats.trimmed_mean(data,(0.1,0.1)), 343, 0)
|
||||
assert_almost_equal(mstats.trimmed_mean(data,(0.2,0.2)), 283, 0)
|
||||
#
|
||||
def test_trimmed_stde(self):
|
||||
"Tests the trimmed mean standard error."
|
||||
data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
|
||||
296,299,306,376,428,515,666,1310,2611])
|
||||
assert_almost_equal(mstats.trimmed_stde(data,(0.2,0.2)), 56.13193, 5)
|
||||
assert_almost_equal(mstats.trimmed_stde(data,0.2), 56.13193, 5)
|
||||
#
|
||||
def test_winsorization(self):
|
||||
"Tests the Winsorization of the data."
|
||||
data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
|
||||
296,299,306,376,428,515,666,1310,2611])
|
||||
assert_almost_equal(mstats.winsorize(data,(0.2,0.2)).var(ddof=1),
|
||||
21551.4, 1)
|
||||
data[5] = masked
|
||||
winsorized = mstats.winsorize(data)
|
||||
assert_equal(winsorized.mask, data.mask)
|
||||
|
||||
|
||||
class TestMoments(TestCase):
|
||||
"""
|
||||
Comparison numbers are found using R v.1.5.1
|
||||
note that length(testcase) = 4
|
||||
testmathworks comes from documentation for the
|
||||
Statistics Toolbox for Matlab and can be found at both
|
||||
http://www.mathworks.com/access/helpdesk/help/toolbox/stats/kurtosis.shtml
|
||||
http://www.mathworks.com/access/helpdesk/help/toolbox/stats/skewness.shtml
|
||||
Note that both test cases came from here.
|
||||
"""
|
||||
testcase = [1,2,3,4]
|
||||
testmathworks = ma.fix_invalid([1.165 , 0.6268, 0.0751, 0.3516, -0.6965,
|
||||
np.nan])
|
||||
def test_moment(self):
|
||||
"""
|
||||
mean((testcase-mean(testcase))**power,axis=0),axis=0))**power))"""
|
||||
y = mstats.moment(self.testcase,1)
|
||||
assert_almost_equal(y,0.0,10)
|
||||
y = mstats.moment(self.testcase,2)
|
||||
assert_almost_equal(y,1.25)
|
||||
y = mstats.moment(self.testcase,3)
|
||||
assert_almost_equal(y,0.0)
|
||||
y = mstats.moment(self.testcase,4)
|
||||
assert_almost_equal(y,2.5625)
|
||||
def test_variation(self):
|
||||
"""variation = samplestd/mean """
|
||||
## y = stats.variation(self.shoes[0])
|
||||
## assert_almost_equal(y,21.8770668)
|
||||
y = mstats.variation(self.testcase)
|
||||
assert_almost_equal(y,0.44721359549996, 10)
|
||||
|
||||
def test_skewness(self):
|
||||
"""
|
||||
sum((testmathworks-mean(testmathworks,axis=0))**3,axis=0)/((sqrt(var(testmathworks)*4/5))**3)/5
|
||||
"""
|
||||
y = mstats.skew(self.testmathworks)
|
||||
assert_almost_equal(y,-0.29322304336607,10)
|
||||
y = mstats.skew(self.testmathworks,bias=0)
|
||||
assert_almost_equal(y,-0.437111105023940,10)
|
||||
y = mstats.skew(self.testcase)
|
||||
assert_almost_equal(y,0.0,10)
|
||||
|
||||
def test_kurtosis(self):
|
||||
"""
|
||||
sum((testcase-mean(testcase,axis=0))**4,axis=0)/((sqrt(var(testcase)*3/4))**4)/4
|
||||
sum((test2-mean(testmathworks,axis=0))**4,axis=0)/((sqrt(var(testmathworks)*4/5))**4)/5
|
||||
Set flags for axis = 0 and
|
||||
fisher=0 (Pearson's definition of kurtosis for compatibility with Matlab)
|
||||
"""
|
||||
y = mstats.kurtosis(self.testmathworks,0,fisher=0,bias=1)
|
||||
assert_almost_equal(y, 2.1658856802973,10)
|
||||
# Note that MATLAB has confusing docs for the following case
|
||||
# kurtosis(x,0) gives an unbiased estimate of Pearson's skewness
|
||||
# kurtosis(x) gives a biased estimate of Fisher's skewness (Pearson-3)
|
||||
# The MATLAB docs imply that both should give Fisher's
|
||||
y = mstats.kurtosis(self.testmathworks,fisher=0,bias=0)
|
||||
assert_almost_equal(y, 3.663542721189047,10)
|
||||
y = mstats.kurtosis(self.testcase,0,0)
|
||||
assert_almost_equal(y,1.64)
|
||||
#
|
||||
def test_mode(self):
|
||||
"Tests the mode"
|
||||
#
|
||||
a1 = [0,0,0,1,1,1,2,3,3,3,3,4,5,6,7]
|
||||
a2 = np.reshape(a1, (3,5))
|
||||
ma1 = ma.masked_where(ma.array(a1)>2,a1)
|
||||
ma2 = ma.masked_where(a2>2, a2)
|
||||
assert_equal(mstats.mode(a1, axis=None), (3,4))
|
||||
assert_equal(mstats.mode(ma1, axis=None), (0,3))
|
||||
assert_equal(mstats.mode(a2, axis=None), (3,4))
|
||||
assert_equal(mstats.mode(ma2, axis=None), (0,3))
|
||||
assert_equal(mstats.mode(a2, axis=0), ([[0,0,0,1,1]],[[1,1,1,1,1]]))
|
||||
assert_equal(mstats.mode(ma2, axis=0), ([[0,0,0,1,1]],[[1,1,1,1,1]]))
|
||||
assert_equal(mstats.mode(a2, axis=-1), ([[0],[3],[3]], [[3],[3],[1]]))
|
||||
assert_equal(mstats.mode(ma2, axis=-1), ([[0],[1],[0]], [[3],[1],[0]]))
|
||||
|
||||
|
||||
class TestPercentile(TestCase):
|
||||
def setUp(self):
|
||||
self.a1 = [3,4,5,10,-3,-5,6]
|
||||
self.a2 = [3,-6,-2,8,7,4,2,1]
|
||||
self.a3 = [3.,4,5,10,-3,-5,-6,7.0]
|
||||
|
||||
def test_percentile(self):
|
||||
x = np.arange(8) * 0.5
|
||||
assert_equal(mstats.scoreatpercentile(x, 0), 0.)
|
||||
assert_equal(mstats.scoreatpercentile(x, 100), 3.5)
|
||||
assert_equal(mstats.scoreatpercentile(x, 50), 1.75)
|
||||
|
||||
def test_2D(self):
|
||||
x = ma.array([[1, 1, 1],
|
||||
[1, 1, 1],
|
||||
[4, 4, 3],
|
||||
[1, 1, 1],
|
||||
[1, 1, 1]])
|
||||
assert_equal(mstats.scoreatpercentile(x,50), [1,1,1])
|
||||
|
||||
|
||||
class TestVariability(TestCase):
|
||||
""" Comparison numbers are found using R v.1.5.1
|
||||
note that length(testcase) = 4
|
||||
"""
|
||||
testcase = ma.fix_invalid([1,2,3,4,np.nan])
|
||||
|
||||
def test_signaltonoise(self):
|
||||
"""
|
||||
this is not in R, so used
|
||||
mean(testcase,axis=0)/(sqrt(var(testcase)*3/4)) """
|
||||
#y = stats.signaltonoise(self.shoes[0])
|
||||
#assert_approx_equal(y,4.5709967)
|
||||
y = mstats.signaltonoise(self.testcase)
|
||||
assert_almost_equal(y,2.236067977)
|
||||
|
||||
def test_sem(self):
|
||||
"""
|
||||
this is not in R, so used
|
||||
sqrt(var(testcase)*3/4)/sqrt(3)
|
||||
"""
|
||||
#y = stats.sem(self.shoes[0])
|
||||
#assert_approx_equal(y,0.775177399)
|
||||
y = mstats.sem(self.testcase)
|
||||
assert_almost_equal(y,0.6454972244)
|
||||
|
||||
def test_zmap(self):
|
||||
"""
|
||||
not in R, so tested by using
|
||||
(testcase[i]-mean(testcase,axis=0))/sqrt(var(testcase)*3/4)
|
||||
"""
|
||||
y = mstats.zmap(self.testcase, self.testcase)
|
||||
desired_unmaskedvals = ([-1.3416407864999, -0.44721359549996 ,
|
||||
0.44721359549996 , 1.3416407864999])
|
||||
assert_array_almost_equal(desired_unmaskedvals,
|
||||
y.data[y.mask==False], decimal=12)
|
||||
|
||||
def test_zscore(self):
|
||||
"""
|
||||
not in R, so tested by using
|
||||
(testcase[i]-mean(testcase,axis=0))/sqrt(var(testcase)*3/4)
|
||||
"""
|
||||
y = mstats.zscore(self.testcase)
|
||||
desired = ma.fix_invalid([-1.3416407864999, -0.44721359549996 ,
|
||||
0.44721359549996 , 1.3416407864999, np.nan])
|
||||
assert_almost_equal(desired, y, decimal=12)
|
||||
|
||||
|
||||
|
||||
class TestMisc(TestCase):
|
||||
#
|
||||
def test_obrientransform(self):
|
||||
"Tests Obrien transform"
|
||||
args = [[5]*5+[6]*11+[7]*9+[8]*3+[9]*2+[10]*2,
|
||||
[6]+[7]*2+[8]*4+[9]*9+[10]*16]
|
||||
result = [5*[3.1828]+11*[0.5591]+9*[0.0344]+3*[1.6086]+2*[5.2817]+2*[11.0538],
|
||||
[10.4352]+2*[4.8599]+4*[1.3836]+9*[0.0061]+16*[0.7277]]
|
||||
assert_almost_equal(np.round(mstats.obrientransform(*args).T,4),
|
||||
result,4)
|
||||
#
|
||||
def test_kstwosamp(self):
|
||||
"Tests the Kolmogorov-Smirnov 2 samples test"
|
||||
x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
|
||||
[ 4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
|
||||
[ 3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
|
||||
[nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
|
||||
x = ma.fix_invalid(x).T
|
||||
(winter,spring,summer,fall) = x.T
|
||||
#
|
||||
assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring),4),
|
||||
(0.1818,0.9892))
|
||||
assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'g'),4),
|
||||
(0.1469,0.7734))
|
||||
assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'l'),4),
|
||||
(0.1818,0.6744))
|
||||
#
|
||||
def test_friedmanchisq(self):
|
||||
"Tests the Friedman Chi-square test"
|
||||
# No missing values
|
||||
args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0],
|
||||
[7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0],
|
||||
[6.0,8.0,4.0,6.0,7.0,6.5,6.0,4.0,6.5,3.0])
|
||||
result = mstats.friedmanchisquare(*args)
|
||||
assert_almost_equal(result[0], 10.4737, 4)
|
||||
assert_almost_equal(result[1], 0.005317, 6)
|
||||
# Missing values
|
||||
x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
|
||||
[ 4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
|
||||
[ 3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
|
||||
[nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
|
||||
x = ma.fix_invalid(x)
|
||||
result = mstats.friedmanchisquare(*x)
|
||||
assert_almost_equal(result[0], 2.0156, 4)
|
||||
assert_almost_equal(result[1], 0.5692, 4)
|
||||
|
||||
|
||||
def test_regress_simple():
|
||||
"""Regress a line with sinusoidal noise. Test for #1273."""
|
||||
x = np.linspace(0, 100, 100)
|
||||
y = 0.2 * np.linspace(0, 100, 100) + 10
|
||||
y += np.sin(np.linspace(0, 20, 100))
|
||||
|
||||
slope, intercept, r_value, p_value, sterr = mstats.linregress(x, y)
|
||||
assert_almost_equal(slope, 0.19644990055858422)
|
||||
assert_almost_equal(intercept, 10.211269918932341)
|
||||
|
||||
|
||||
def test_plotting_positions():
|
||||
"""Regression test for #1256"""
|
||||
pos = mstats.plotting_positions(np.arange(3), 0, 0)
|
||||
assert_array_almost_equal(pos.data, np.array([0.25, 0.5, 0.75]))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_module_suite()
|
@ -0,0 +1,103 @@
|
||||
# pylint: disable-msg=W0611, W0612, W0511,R0201
|
||||
"""Tests suite for maskedArray statistics.
|
||||
|
||||
:author: Pierre Gerard-Marchant
|
||||
:contact: pierregm_at_uga_dot_edu
|
||||
"""
|
||||
__author__ = "Pierre GF Gerard-Marchant ($Author: backtopop $)"
|
||||
|
||||
import numpy as np
|
||||
|
||||
import numpy.ma as ma
|
||||
|
||||
import scipy.stats.mstats as ms
|
||||
#import scipy.stats.mmorestats as mms
|
||||
|
||||
from numpy.testing import TestCase, run_module_suite, assert_equal, \
|
||||
assert_almost_equal, assert_
|
||||
|
||||
|
||||
class TestMisc(TestCase):
|
||||
#
|
||||
def __init__(self, *args, **kwargs):
|
||||
TestCase.__init__(self, *args, **kwargs)
|
||||
#
|
||||
def test_mjci(self):
|
||||
"Tests the Marits-Jarrett estimator"
|
||||
data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
|
||||
296,299,306,376,428,515,666,1310,2611])
|
||||
assert_almost_equal(ms.mjci(data),[55.76819,45.84028,198.87875],5)
|
||||
#
|
||||
def test_trimmedmeanci(self):
|
||||
"Tests the confidence intervals of the trimmed mean."
|
||||
data = ma.array([545,555,558,572,575,576,578,580,
|
||||
594,605,635,651,653,661,666])
|
||||
assert_almost_equal(ms.trimmed_mean(data,0.2), 596.2, 1)
|
||||
assert_equal(np.round(ms.trimmed_mean_ci(data,(0.2,0.2)),1),
|
||||
[561.8, 630.6])
|
||||
#
|
||||
def test_idealfourths(self):
|
||||
"Tests ideal-fourths"
|
||||
test = np.arange(100)
|
||||
assert_almost_equal(np.asarray(ms.idealfourths(test)),
|
||||
[24.416667,74.583333],6)
|
||||
test_2D = test.repeat(3).reshape(-1,3)
|
||||
assert_almost_equal(ms.idealfourths(test_2D, axis=0),
|
||||
[[24.416667,24.416667,24.416667],
|
||||
[74.583333,74.583333,74.583333]],6)
|
||||
assert_almost_equal(ms.idealfourths(test_2D, axis=1),
|
||||
test.repeat(2).reshape(-1,2))
|
||||
test = [0,0]
|
||||
_result = ms.idealfourths(test)
|
||||
assert_(np.isnan(_result).all())
|
||||
|
||||
#..............................................................................
|
||||
class TestQuantiles(TestCase):
|
||||
#
|
||||
def __init__(self, *args, **kwargs):
|
||||
TestCase.__init__(self, *args, **kwargs)
|
||||
#
|
||||
def test_hdquantiles(self):
|
||||
data = [0.706560797,0.727229578,0.990399276,0.927065621,0.158953014,
|
||||
0.887764025,0.239407086,0.349638551,0.972791145,0.149789972,
|
||||
0.936947700,0.132359948,0.046041972,0.641675031,0.945530547,
|
||||
0.224218684,0.771450991,0.820257774,0.336458052,0.589113496,
|
||||
0.509736129,0.696838829,0.491323573,0.622767425,0.775189248,
|
||||
0.641461450,0.118455200,0.773029450,0.319280007,0.752229111,
|
||||
0.047841438,0.466295911,0.583850781,0.840581845,0.550086491,
|
||||
0.466470062,0.504765074,0.226855960,0.362641207,0.891620942,
|
||||
0.127898691,0.490094097,0.044882048,0.041441695,0.317976349,
|
||||
0.504135618,0.567353033,0.434617473,0.636243375,0.231803616,
|
||||
0.230154113,0.160011327,0.819464108,0.854706985,0.438809221,
|
||||
0.487427267,0.786907310,0.408367937,0.405534192,0.250444460,
|
||||
0.995309248,0.144389588,0.739947527,0.953543606,0.680051621,
|
||||
0.388382017,0.863530727,0.006514031,0.118007779,0.924024803,
|
||||
0.384236354,0.893687694,0.626534881,0.473051932,0.750134705,
|
||||
0.241843555,0.432947602,0.689538104,0.136934797,0.150206859,
|
||||
0.474335206,0.907775349,0.525869295,0.189184225,0.854284286,
|
||||
0.831089744,0.251637345,0.587038213,0.254475554,0.237781276,
|
||||
0.827928620,0.480283781,0.594514455,0.213641488,0.024194386,
|
||||
0.536668589,0.699497811,0.892804071,0.093835427,0.731107772]
|
||||
#
|
||||
assert_almost_equal(ms.hdquantiles(data,[0., 1.]),
|
||||
[0.006514031, 0.995309248])
|
||||
hdq = ms.hdquantiles(data,[0.25, 0.5, 0.75])
|
||||
assert_almost_equal(hdq, [0.253210762, 0.512847491, 0.762232442,])
|
||||
hdq = ms.hdquantiles_sd(data,[0.25, 0.5, 0.75])
|
||||
assert_almost_equal(hdq, [0.03786954, 0.03805389, 0.03800152,], 4)
|
||||
#
|
||||
data = np.array(data).reshape(10,10)
|
||||
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0)
|
||||
assert_almost_equal(hdq[:,0], ms.hdquantiles(data[:,0],[0.25,0.5,0.75]))
|
||||
assert_almost_equal(hdq[:,-1], ms.hdquantiles(data[:,-1],[0.25,0.5,0.75]))
|
||||
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0,var=True)
|
||||
assert_almost_equal(hdq[...,0],
|
||||
ms.hdquantiles(data[:,0],[0.25,0.5,0.75],var=True))
|
||||
assert_almost_equal(hdq[...,-1],
|
||||
ms.hdquantiles(data[:,-1],[0.25,0.5,0.75], var=True))
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_module_suite()
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,412 @@
|
||||
"""
|
||||
Commentary
|
||||
----------
|
||||
|
||||
Most of the work is done by the scipy.stats.distributions module.
|
||||
|
||||
This provides a plethora of continuous distributions to play with.
|
||||
|
||||
Each distribution has functions to generate random deviates, pdf's,
|
||||
cdf's etc. as well as a function to fit the distribution to some given
|
||||
data.
|
||||
|
||||
The fitting uses scipy.optimize.fmin to minimise the log odds of the
|
||||
data given the distribution.
|
||||
|
||||
There are a couple of problems with this approach. First it is
|
||||
sensitive to the initial guess at the parameters. Second it can be a
|
||||
little slow.
|
||||
|
||||
Two key parameters are the 'loc' and 'scale' parameters. Data is
|
||||
shifted by 'loc' and scaled by scale prior to fitting. Supplying
|
||||
appropriate values for these parameters is important to getting a good
|
||||
fit.
|
||||
|
||||
See the factory() function which picks from a handful of common
|
||||
approaches for each distribution.
|
||||
|
||||
For some distributions (eg normal) it really makes sense just to
|
||||
calculate the parameters directly from the data.
|
||||
|
||||
The code in the __ifmain__ should be a good guide how to use this.
|
||||
|
||||
Simply:
|
||||
get a QuickFit object
|
||||
add the distributions you want to try to fit
|
||||
call fit() with your data
|
||||
call fit_stats() to generate some stats on the fit.
|
||||
call plot() if you want to see a plot.
|
||||
|
||||
|
||||
Named after Mrs Twolumps, minister's secretary in the silly walks
|
||||
sketch, who brings in coffee with a full silly walk.
|
||||
|
||||
Tenuous link with curve fitting is that you generally see "two lumps"
|
||||
one in your data and the other in the curve that is being fitted.
|
||||
|
||||
Or alternately, if your data is not too silly then you can fit a
|
||||
curve to it.
|
||||
|
||||
License is GNU LGPL v3, see https://launchpad.net/twolumps
|
||||
"""
|
||||
import inspect
|
||||
from itertools import izip
|
||||
|
||||
import numpy
|
||||
from wafo import stats
|
||||
from scipy import mean, std
|
||||
|
||||
def factory(name):
|
||||
""" Factory to return appropriate objects for each distro. """
|
||||
fitters = dict(
|
||||
|
||||
beta=ZeroOneScipyDistribution,
|
||||
alpha=ZeroOneScipyDistribution,
|
||||
ncf=ZeroOneScipyDistribution,
|
||||
triang=ZeroOneScipyDistribution,
|
||||
uniform=ZeroOneScipyDistribution,
|
||||
powerlaw=ZeroOneScipyDistribution,
|
||||
|
||||
pareto=MinLocScipyDistribution,
|
||||
expon=MinLocScipyDistribution,
|
||||
gamma=MinLocScipyDistribution,
|
||||
lognorm=MinLocScipyDistribution,
|
||||
maxwell=MinLocScipyDistribution,
|
||||
weibull_min=MinLocScipyDistribution,
|
||||
|
||||
weibull_max=MaxLocScipyDistribution)
|
||||
|
||||
return fitters.get(name, ScipyDistribution)(name)
|
||||
|
||||
|
||||
def get_continuous_distros():
|
||||
""" Find all attributes of stats that are continuous distributions. """
|
||||
|
||||
fitters = []
|
||||
skip = set()
|
||||
for name, item in inspect.getmembers(stats):
|
||||
if name in skip: continue
|
||||
if item is stats.rv_continuous: continue
|
||||
if isinstance(item, stats.rv_continuous):
|
||||
fitters.append([name, factory(name)])
|
||||
|
||||
return fitters
|
||||
|
||||
|
||||
class ScipyDistribution(object):
|
||||
|
||||
def __init__(self, name):
|
||||
|
||||
self.name = name
|
||||
self.distro = self.get_distro()
|
||||
self.fitted = None
|
||||
|
||||
def __getattr__(self, attr):
|
||||
""" Try delegating to the distro object """
|
||||
return getattr(self.distro, attr)
|
||||
|
||||
def get_distro(self):
|
||||
|
||||
return getattr(stats, self.name)
|
||||
|
||||
def set_distro(self, parms):
|
||||
|
||||
self.distro = getattr(stats, self.name)(*parms)
|
||||
|
||||
return self.distro
|
||||
|
||||
def calculate_loc_and_scale(self, data):
|
||||
""" Calculate loc and scale parameters for fit.
|
||||
|
||||
Depending on the distribution, these need to be approximately
|
||||
right to get a good fit.
|
||||
"""
|
||||
return mean(data), std(data)
|
||||
|
||||
def fit(self, data, *args, **kwargs):
|
||||
""" This needs some work.
|
||||
|
||||
Seems the various scipy distributions do a reasonable job if given a good hint.
|
||||
|
||||
Need to get distro specific hints.
|
||||
"""
|
||||
|
||||
fits = []
|
||||
|
||||
# try with and without providing loc and scale hints
|
||||
# increases chance of a fit without an exception being
|
||||
# generated.
|
||||
for (loc, scale) in ((0.0, 1.0),
|
||||
self.calculate_loc_and_scale(data)):
|
||||
|
||||
try:
|
||||
parms = self.get_distro().fit(data, loc=loc, scale=scale)
|
||||
|
||||
self.set_distro(list(parms))
|
||||
expected = self.expected(data)
|
||||
rss = ((expected-data)**2).sum()
|
||||
fits.append([rss, list(parms)])
|
||||
|
||||
parms = self.get_distro().fit(data, floc=loc, scale=scale)
|
||||
|
||||
self.set_distro(list(parms))
|
||||
expected = self.expected(data)
|
||||
rss = ((expected-data)**2).sum()
|
||||
fits.append([rss, list(parms)])
|
||||
except:
|
||||
pass
|
||||
|
||||
# no fits means all tries raised exceptions
|
||||
if not fits:
|
||||
raise Exception("Exception in fit()")
|
||||
|
||||
# pick the one with the smallest rss
|
||||
fits.sort()
|
||||
self.parms = fits[0][1]
|
||||
print self.parms
|
||||
|
||||
return self.set_distro(list(self.parms))
|
||||
|
||||
def expected(self, data):
|
||||
""" Calculate expected values at each data point """
|
||||
if self.fitted is not None:
|
||||
return self.fitted
|
||||
|
||||
n = len(data)
|
||||
xx = numpy.linspace(0, 1, n + 2)[1:-1]
|
||||
self.fitted = self.ppf(xx)
|
||||
#self.fitted = [self.ppf(x) for x in xx]
|
||||
|
||||
return self.fitted
|
||||
|
||||
def fit_stats(self, data):
|
||||
""" Return stats on the fits
|
||||
|
||||
data assumed to be sorted.
|
||||
"""
|
||||
n = len(data)
|
||||
|
||||
dvar = numpy.var(data)
|
||||
expected = self.expected(data)
|
||||
evar = numpy.var(expected)
|
||||
|
||||
rss = 0.0
|
||||
for expect, obs in izip(expected, data):
|
||||
rss += (obs-expect) ** 2.0
|
||||
|
||||
self.rss = rss
|
||||
self.dss = dvar * n
|
||||
self.fss = evar * n
|
||||
|
||||
def residuals(self, data):
|
||||
""" Return residuals """
|
||||
expected = self.expected(data)
|
||||
|
||||
return numpy.array(data) - numpy.array(expected)
|
||||
|
||||
|
||||
|
||||
class MinLocScipyDistribution(ScipyDistribution):
|
||||
|
||||
def calculate_loc_and_scale(self, data):
|
||||
""" Set loc to min value in the data.
|
||||
|
||||
Useful for weibull_min
|
||||
"""
|
||||
return min(data), std(data)
|
||||
|
||||
class MaxLocScipyDistribution(ScipyDistribution):
|
||||
|
||||
def calculate_loc_and_scale(self, data):
|
||||
""" Set loc to max value in the data.
|
||||
|
||||
Useful for weibull_max
|
||||
"""
|
||||
return max(data), std(data)
|
||||
|
||||
class ZeroOneScipyDistribution(ScipyDistribution):
|
||||
|
||||
def calculate_loc_and_scale(self, data):
|
||||
""" Set loc and scale to move to [0, 1] interval.
|
||||
|
||||
Useful for beta distribution
|
||||
"""
|
||||
return min(data), max(data)-min(data)
|
||||
|
||||
class QuickFit(object):
|
||||
""" Fit a family of distributions.
|
||||
|
||||
Calculates stats on each fit.
|
||||
|
||||
Option to create plots.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
|
||||
self.distributions = []
|
||||
|
||||
def add_distribution(self, distribution):
|
||||
""" Add a ready-prepared ScipyDistribution """
|
||||
self.distributions.append(distribution)
|
||||
|
||||
def add(self, name):
|
||||
""" Add a distribution by name. """
|
||||
|
||||
self.distributions.append(factory(name))
|
||||
|
||||
def fit(self, data):
|
||||
""" Fit all of the distros we have """
|
||||
fitted = []
|
||||
for distro in self.distributions:
|
||||
print 'fitting distro', distro.name
|
||||
try:
|
||||
distro.fit(data)
|
||||
except:
|
||||
continue
|
||||
fitted.append(distro)
|
||||
self.distributions = fitted
|
||||
|
||||
print 'finished fitting'
|
||||
|
||||
def stats(self, data):
|
||||
""" Return stats on the fits """
|
||||
for dd in self.distributions:
|
||||
dd.fit_stats(data)
|
||||
|
||||
def get_topn(self, n):
|
||||
""" Return top-n best fits. """
|
||||
data = [[x.rss, x] for x in self.distributions if numpy.isfinite(x.rss)]
|
||||
data.sort()
|
||||
|
||||
if not n:
|
||||
n = len(data)
|
||||
|
||||
return [x[1] for x in data[:n]]
|
||||
|
||||
def fit_plot(self, data, topn=0, bins=20):
|
||||
""" Create a plot. """
|
||||
from matplotlib import pylab as pl
|
||||
|
||||
distros = self.get_topn(topn)
|
||||
|
||||
xx = numpy.linspace(data.min(), data.max(), 300)
|
||||
|
||||
table = []
|
||||
nparms = max(len(x.parms) for x in distros)
|
||||
tcolours = []
|
||||
for dd in distros:
|
||||
patch = pl.plot(xx, [dd.pdf(p) for p in xx], label='%10.2f%% %s' % (100.0*dd.rss/dd.dss, dd.name))
|
||||
row = ['', dd.name, '%10.2f%%' % (100.0*dd.rss/dd.dss,)] + ['%0.2f' % x for x in dd.parms]
|
||||
while len(row) < 3 + nparms:
|
||||
row.append('')
|
||||
table.append(row)
|
||||
tcolours.append([patch[0].get_markerfacecolor()] + ['w'] * (2+nparms))
|
||||
|
||||
# add a historgram with the data
|
||||
pl.hist(data, bins=bins, normed=True)
|
||||
tab = pl.table(cellText=table, cellColours=tcolours,
|
||||
colLabels=['', 'Distribution', 'Res. SS/Data SS'] + ['P%d' % (x + 1,) for x in range(nparms)],
|
||||
bbox=(0.0, 1.0, 1.0, 0.3))
|
||||
#loc='top'))
|
||||
#pl.legend(loc=0)
|
||||
tab.auto_set_font_size(False)
|
||||
tab.set_fontsize(10.)
|
||||
|
||||
def residual_plot(self, data, topn=0):
|
||||
""" Create a residual plot. """
|
||||
from matplotlib import pylab as pl
|
||||
|
||||
distros = self.get_topn(topn)
|
||||
|
||||
|
||||
n = len(data)
|
||||
xx = numpy.linspace(0, 1, n + 2)[1:-1]
|
||||
for dd in distros:
|
||||
|
||||
pl.plot(xx, dd.residuals(data), label='%10.2f%% %s' % (100.0*dd.rss/dd.dss, dd.name))
|
||||
pl.grid(True)
|
||||
|
||||
def plot(self, data, topn):
|
||||
""" Plot data fit and residuals """
|
||||
from matplotlib import pylab as pl
|
||||
pl.axes([0.1, 0.4, 0.8, 0.4]) # leave room above the axes for the table
|
||||
self.fit_plot(data, topn=topn)
|
||||
|
||||
pl.axes([0.1, 0.05, 0.8, 0.3])
|
||||
self.residual_plot(data, topn=topn)
|
||||
|
||||
|
||||
def read_data(infile, field):
|
||||
""" Simple utility to extract a field out of a csv file. """
|
||||
import csv
|
||||
|
||||
reader = csv.reader(infile)
|
||||
header = reader.next()
|
||||
field = header.index(field)
|
||||
data = []
|
||||
for row in reader:
|
||||
data.append(float(row[field]))
|
||||
|
||||
return data
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
import sys
|
||||
import optparse
|
||||
|
||||
from matplotlib import pylab as pl
|
||||
|
||||
parser = optparse.OptionParser()
|
||||
parser.add_option('-d', '--distro', action='append', default=[])
|
||||
parser.add_option('-l', '--list', action='store_true',
|
||||
help='List available distros')
|
||||
|
||||
parser.add_option('-i', '--infile')
|
||||
parser.add_option('-f', '--field', default='P/L')
|
||||
|
||||
parser.add_option('-n', '--topn', type='int', default=0)
|
||||
|
||||
parser.add_option('-s', '--sample', default='normal',
|
||||
help='generate a sample from this distro as a test')
|
||||
parser.add_option('--size', type='int', default=1000,
|
||||
help='Size of sample to generate')
|
||||
|
||||
|
||||
opts, args = parser.parse_args()
|
||||
|
||||
if opts.list:
|
||||
for name, distro in get_continuous_distros():
|
||||
print name
|
||||
sys.exit()
|
||||
opts.distro = ['weibull_min', 'norm']
|
||||
if not opts.distro:
|
||||
opts.distro = [x[0] for x in get_continuous_distros()]
|
||||
|
||||
quickfit = QuickFit()
|
||||
for distro in opts.distro:
|
||||
quickfit.add(distro)
|
||||
|
||||
if opts.sample:
|
||||
data = getattr(numpy.random, opts.sample)(size=opts.size)
|
||||
else:
|
||||
data = numpy.array(read_data(open(opts.infile), opts.field))
|
||||
|
||||
data.sort()
|
||||
|
||||
quickfit.fit(data)
|
||||
print 'doing stats'
|
||||
quickfit.stats(data)
|
||||
|
||||
print 'doing plot'
|
||||
quickfit.plot(data, topn=opts.topn)
|
||||
pl.show()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue