pep8 + updated wafo.stats packages

master
Per.Andreas.Brodtkorb 10 years ago
parent 629ed411c9
commit d308357c5b

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<projectDescription> <projectDescription>
<name>google_pywafo</name> <name>pywafo</name>
<comment></comment> <comment></comment>
<projects> <projects>
</projects> </projects>
@ -10,6 +10,16 @@
<arguments> <arguments>
</arguments> </arguments>
</buildCommand> </buildCommand>
<buildCommand>
<name>org.eclipse.ui.externaltools.ExternalToolBuilder</name>
<triggers>auto,full,incremental,</triggers>
<arguments>
<dictionary>
<key>LaunchConfigHandle</key>
<value>&lt;project&gt;/.externalToolBuilders/wafo_stats_tests.launch</value>
</dictionary>
</arguments>
</buildCommand>
</buildSpec> </buildSpec>
<natures> <natures>
<nature>org.python.pydev.pythonNature</nature> <nature>org.python.pydev.pythonNature</nature>

@ -3,8 +3,8 @@
<pydev_project> <pydev_project>
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH"> <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
<path>/google_pywafo/src</path> <path>/pywafo/src</path>
</pydev_pathproperty> </pydev_pathproperty>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property> <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property> <pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
</pydev_project> </pydev_project>

@ -1,4 +1,4 @@
Metadata-Version: 1.0 Metadata-Version: 1.1
Name: wafo Name: wafo
Version: 0.1.2 Version: 0.1.2
Summary: Statistical analysis and simulation of random waves and random loads Summary: Statistical analysis and simulation of random waves and random loads

@ -4,17 +4,19 @@ gendocwafo.py
manifest manifest
setup.py setup.py
setup_old.py setup_old.py
test_all.py
src/epydoc_wafo.prj src/epydoc_wafo.prj
src/Wafo.egg-info/PKG-INFO src/Wafo.egg-info/PKG-INFO
src/Wafo.egg-info/SOURCES.txt src/Wafo.egg-info/SOURCES.txt
src/Wafo.egg-info/dependency_links.txt src/Wafo.egg-info/dependency_links.txt
src/Wafo.egg-info/top_level.txt src/Wafo.egg-info/top_level.txt
src/wafo/MSO.py
src/wafo/MSPPT.py
src/wafo/SpecData1D.mm src/wafo/SpecData1D.mm
src/wafo/__init__.py src/wafo/__init__.py
src/wafo/bitwise.py src/wafo/bitwise.py
src/wafo/c_library.pyd src/wafo/c_library.pyd
src/wafo/c_library.so src/wafo/c_library.so
src/wafo/containers.py
src/wafo/cov2mod.pyd src/wafo/cov2mod.pyd
src/wafo/dctpack.py src/wafo/dctpack.py
src/wafo/definitions.py src/wafo/definitions.py
@ -28,6 +30,7 @@ src/wafo/info.py
src/wafo/integrate.py src/wafo/integrate.py
src/wafo/interpolate.py src/wafo/interpolate.py
src/wafo/kdetools.py src/wafo/kdetools.py
src/wafo/magic.py
src/wafo/meshgrid.py src/wafo/meshgrid.py
src/wafo/misc.py src/wafo/misc.py
src/wafo/mvn.pyd src/wafo/mvn.pyd
@ -39,20 +42,26 @@ src/wafo/objects.py
src/wafo/plotbackend.py src/wafo/plotbackend.py
src/wafo/polynomial.py src/wafo/polynomial.py
src/wafo/polynomial_old.py src/wafo/polynomial_old.py
src/wafo/pychip.py src/wafo/powerpoint.py
src/wafo/resize_problem.py
src/wafo/rindmod.pyd src/wafo/rindmod.pyd
src/wafo/rindmod.so src/wafo/rindmod.so
src/wafo/sg_filter.py src/wafo/sg_filter.py
src/wafo/version.py src/wafo/version.py
src/wafo/wafodata.py src/wafo/wafodata.py
src/wafo/wtraits.py
src/wafo/wtraits2.py
src/wafo/wtraits3.py
src/wafo.egg-info/SOURCES.txt src/wafo.egg-info/SOURCES.txt
src/wafo/covariance/__init__.py src/wafo/covariance/__init__.py
src/wafo/covariance/core.py src/wafo/covariance/core.py
src/wafo/data/__init__.py src/wafo/data/__init__.py
src/wafo/data/__init__.pyc
src/wafo/data/atlantic.dat src/wafo/data/atlantic.dat
src/wafo/data/gfaks89.dat src/wafo/data/gfaks89.dat
src/wafo/data/gfaksr89.dat src/wafo/data/gfaksr89.dat
src/wafo/data/info.py src/wafo/data/info.py
src/wafo/data/info.pyc
src/wafo/data/info.~py src/wafo/data/info.~py
src/wafo/data/japansea.dat src/wafo/data/japansea.dat
src/wafo/data/northsea.dat src/wafo/data/northsea.dat
@ -276,30 +285,47 @@ src/wafo/source/test_f90/types.f90
src/wafo/source/test_f90/types.mod src/wafo/source/test_f90/types.mod
src/wafo/spectrum/__init__.py src/wafo/spectrum/__init__.py
src/wafo/spectrum/core.py src/wafo/spectrum/core.py
src/wafo/spectrum/dispersion_relation.py
src/wafo/spectrum/models.py src/wafo/spectrum/models.py
src/wafo/spectrum/test/test_dispersion_relation.py
src/wafo/spectrum/test/test_models.py src/wafo/spectrum/test/test_models.py
src/wafo/spectrum/test/test_models.pyc
src/wafo/spectrum/test/test_specdata1d.py src/wafo/spectrum/test/test_specdata1d.py
src/wafo/spectrum/test/test_specdata1d.pyc
src/wafo/stats/__init__.py src/wafo/stats/__init__.py
src/wafo/stats/core.py src/wafo/stats/core.py
src/wafo/stats/distributions.py src/wafo/stats/distributions.py
src/wafo/stats/distributions_juli2010.py
src/wafo/stats/estimation.py src/wafo/stats/estimation.py
src/wafo/stats/kde_test.py
src/wafo/stats/misc.py src/wafo/stats/misc.py
src/wafo/stats/six.py
src/wafo/stats/sklearn_test.py
src/wafo/stats/twolumps.py
src/wafo/stats/tests/test_distributions.py src/wafo/stats/tests/test_distributions.py
src/wafo/stats/tests/test_estimation.py src/wafo/stats/tests/test_estimation.py
src/wafo/test/__init__.py src/wafo/test/__init__.py
src/wafo/test/__init__.pyc
src/wafo/test/test_gaussian.py src/wafo/test/test_gaussian.py
src/wafo/test/test_gaussian.pyc
src/wafo/test/test_kdetools.py src/wafo/test/test_kdetools.py
src/wafo/test/test_kdetools.pyc
src/wafo/test/test_misc.py src/wafo/test/test_misc.py
src/wafo/test/test_misc.pyc
src/wafo/test/test_objects.py src/wafo/test/test_objects.py
src/wafo/test/test_objects.pyc
src/wafo/transform/__init__.py src/wafo/transform/__init__.py
src/wafo/transform/core.py src/wafo/transform/core.py
src/wafo/transform/models.py src/wafo/transform/models.py
src/wafo/transform/models.~py src/wafo/transform/models.~py
src/wafo/transform/test/__init__.py src/wafo/transform/test/__init__.py
src/wafo/transform/test/__init__.pyc
src/wafo/transform/test/test_models.py src/wafo/transform/test/test_models.py
src/wafo/transform/test/test_models.pyc
src/wafo/transform/test/test_trdata.py src/wafo/transform/test/test_trdata.py
src/wafo/transform/test/test_trdata.pyc
src/wafo/wave_theory/__init__.py src/wafo/wave_theory/__init__.py
src/wafo/wave_theory/core.py src/wafo/wave_theory/core.py
src/wafo/wave_theory/dispersion_relation.py src/wafo/wave_theory/dispersion_relation.py
src/wafo/wave_theory/test/__init__.py
src/wafo/wave_theory/test/__init__.pyc
src/wafo/wave_theory/test/test_dispersion_relation.py
src/wafo/wave_theory/test/test_dispersion_relation.pyc

@ -1,21 +1,22 @@
from __future__ import division, print_function, absolute_import
from info import __doc__ from .info import __doc__
import misc from . import misc
import data from . import data
import demos from . import demos
import kdetools from . import kdetools
import objects from . import objects
import spectrum from . import spectrum
import transform from . import transform
import definitions from . import definitions
import polynomial from . import polynomial
import stats from . import stats
import interpolate from . import interpolate
import dctpack from . import dctpack
try: try:
import fig from . import fig
except ImportError: except ImportError:
print 'fig import only supported on Windows' print('fig import only supported on Windows')
try: try:
from wafo.version import version as __version__ from wafo.version import version as __version__

@ -2,13 +2,13 @@
Module extending the bitoperator capabilites of numpy Module extending the bitoperator capabilites of numpy
''' '''
from numpy import (bitwise_and, bitwise_or, #@UnresolvedImport from numpy import (bitwise_and, bitwise_or,
bitwise_not, binary_repr, #@UnresolvedImport @UnusedImport bitwise_not, binary_repr, # @UnusedImport
bitwise_xor, where, arange) #@UnresolvedImport @UnusedImport bitwise_xor, where, arange) # @UnusedImport
#import numpy as np
__all__ = ['bitwise_and', 'bitwise_or', 'bitwise_not', 'binary_repr', __all__ = ['bitwise_and', 'bitwise_or', 'bitwise_not', 'binary_repr',
'bitwise_xor', 'getbit', 'setbit', 'getbits', 'setbits'] 'bitwise_xor', 'getbit', 'setbit', 'getbits', 'setbits']
def getbit(i, bit): def getbit(i, bit):
""" """
Get bit at specified position Get bit at specified position
@ -32,12 +32,14 @@ def getbit(i, bit):
""" """
return bitwise_and(i, 1 << bit) >> bit return bitwise_and(i, 1 << bit) >> bit
def getbits(i, numbits=8): def getbits(i, numbits=8):
""" """
Returns bits of i in a list Returns bits of i in a list
""" """
return getbit(i, arange(0, numbits)) return getbit(i, arange(0, numbits))
def setbit(i, bit, value=1): def setbit(i, bit, value=1):
""" """
Set bit at specified position Set bit at specified position
@ -63,6 +65,7 @@ def setbit(i, bit, value=1):
return where((value == 0) & (i == i) & (bit == bit), bitwise_and(i, val0), return where((value == 0) & (i == i) & (bit == bit), bitwise_and(i, val0),
bitwise_or(i, val1)) bitwise_or(i, val1))
def setbits(bitlist): def setbits(bitlist):
""" """
Set bits of val to values in bitlist Set bits of val to values in bitlist
@ -81,9 +84,12 @@ def setbits(bitlist):
val |= j << i val |= j << i
return val return val
def test_docstrings(): def test_docstrings():
import doctest import doctest
doctest.testmod() print('Testing docstrings in %s' % __file__)
doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
if __name__ == '__main__': if __name__ == '__main__':
test_docstrings() test_docstrings()

@ -1,5 +1,6 @@
import warnings import warnings
from graphutil import cltext from graphutil import cltext # @UnresolvedImport
from plotbackend import plotbackend from plotbackend import plotbackend
from time import gmtime, strftime from time import gmtime, strftime
import numpy as np import numpy as np
@ -8,27 +9,34 @@ from scipy import interpolate
from scipy import integrate from scipy import integrate
__all__ = ['PlotData', 'AxisLabels'] __all__ = ['PlotData', 'AxisLabels']
def empty_copy(obj): def empty_copy(obj):
class Empty(obj.__class__): class Empty(obj.__class__):
def __init__(self): def __init__(self):
pass pass
newcopy = Empty() newcopy = Empty()
newcopy.__class__ = obj.__class__ newcopy.__class__ = obj.__class__
return newcopy return newcopy
def _set_seed(iseed): def _set_seed(iseed):
if iseed != None: if iseed is not None:
try: try:
np.random.set_state(iseed) np.random.set_state(iseed)
except: except:
np.random.seed(iseed) np.random.seed(iseed)
def now(): def now():
''' '''
Return current date and time as a string Return current date and time as a string
''' '''
return strftime("%a, %d %b %Y %H:%M:%S", gmtime()) return strftime("%a, %d %b %Y %H:%M:%S", gmtime())
class PlotData(object): class PlotData(object):
''' '''
Container class for data with interpolation and plotting methods Container class for data with interpolation and plotting methods
@ -66,6 +74,7 @@ class PlotData(object):
>>> h = d3.plot() >>> h = d3.plot()
''' '''
def __init__(self, data=None, args=None, *args2, **kwds): def __init__(self, data=None, args=None, *args2, **kwds):
self.data = data self.data = data
self.args = args self.args = args
@ -118,7 +127,8 @@ class PlotData(object):
>>> x = np.arange(-2, 2, 0.4) >>> x = np.arange(-2, 2, 0.4)
>>> xi = np.arange(-2, 2, 0.1) >>> xi = np.arange(-2, 2, 0.1)
>>> d = PlotData(np.sin(x), x, xlab='x', ylab='sin', title='sinus', plot_args=['r.']) >>> d = PlotData(np.sin(x), x, xlab='x', ylab='sin', title='sinus',
... plot_args=['r.'])
>>> di = PlotData(d.eval_points(xi), xi) >>> di = PlotData(d.eval_points(xi), xi)
>>> hi = di.plot() >>> hi = di.plot()
>>> h = d.plot() >>> h = d.plot()
@ -132,7 +142,8 @@ class PlotData(object):
if isinstance(self.args, (list, tuple)): # Multidimensional data if isinstance(self.args, (list, tuple)): # Multidimensional data
ndim = len(self.args) ndim = len(self.args)
if ndim < 2: if ndim < 2:
msg = '''Unable to determine plotter-type, because len(self.args)<2. msg = '''
Unable to determine plotter-type, because len(self.args)<2.
If the data is 1D, then self.args should be a vector! If the data is 1D, then self.args should be a vector!
If the data is 2D, then length(self.args) should be 2. If the data is 2D, then length(self.args) should be 2.
If the data is 3D, then length(self.args) should be 3. If the data is 3D, then length(self.args) should be 3.
@ -140,9 +151,11 @@ class PlotData(object):
warnings.warn(msg) warnings.warn(msg)
else: else:
xi = np.meshgrid(*self.args) xi = np.meshgrid(*self.args)
return interpolate.griddata(xi, self.data.ravel(), points, **options) return interpolate.griddata(
xi, self.data.ravel(), points, **options)
else: # One dimensional data else: # One dimensional data
return interpolate.griddata(self.args, self.data, points, **options) return interpolate.griddata(
self.args, self.data, points, **options)
def integrate(self, a, b, **kwds): def integrate(self, a, b, **kwds):
''' '''
@ -159,7 +172,8 @@ class PlotData(object):
raise NotImplementedError('integration for ndim>1 not implemented') raise NotImplementedError('integration for ndim>1 not implemented')
#ndim = len(self.args) #ndim = len(self.args)
# if ndim < 2: # if ndim < 2:
# msg = '''Unable to determine plotter-type, because len(self.args)<2. # msg = '''Unable to determine plotter-type, because
# len(self.args)<2.
# If the data is 1D, then self.args should be a vector! # If the data is 1D, then self.args should be a vector!
# If the data is 2D, then length(self.args) should be 2. # If the data is 2D, then length(self.args) should be 2.
# If the data is 3D, then length(self.args) should be 3. # If the data is 3D, then length(self.args) should be 3.
@ -172,10 +186,14 @@ class PlotData(object):
x = self.args x = self.args
ix = np.flatnonzero((a < x) & (x < b)) ix = np.flatnonzero((a < x) & (x < b))
xi = np.hstack((a, x.take(ix), b)) xi = np.hstack((a, x.take(ix), b))
fi = np.hstack((self.eval_points(a),self.data.take(ix),self.eval_points(b))) fi = np.hstack(
(self.eval_points(a),
self.data.take(ix),
self.eval_points(b)))
res = fun(fi, xi, **kwds) res = fun(fi, xi, **kwds)
if return_ci: if return_ci:
return np.hstack((res, fun(self.dataCI[ix,:].T, xi[1:-1], **kwds))) return np.hstack(
(res, fun(self.dataCI[ix, :].T, xi[1:-1], **kwds)))
return res return res
def plot(self, *args, **kwds): def plot(self, *args, **kwds):
@ -185,16 +203,19 @@ class PlotData(object):
tmp = None tmp = None
default_plotflag = self.plot_kwds.get('plotflag', None) default_plotflag = self.plot_kwds.get('plotflag', None)
plotflag = kwds.get('plotflag', default_plotflag) plotflag = kwds.get('plotflag', default_plotflag)
if not plotflag and self.children != None: if not plotflag and self.children is not None:
axis.hold('on') axis.hold('on')
tmp = [] tmp = []
child_args = kwds.pop('plot_args_children', tuple(self.plot_args_children)) child_args = kwds.pop(
'plot_args_children',
tuple(
self.plot_args_children))
child_kwds = dict(self.plot_kwds_children).copy() child_kwds = dict(self.plot_kwds_children).copy()
child_kwds.update(kwds.pop('plot_kwds_children', {})) child_kwds.update(kwds.pop('plot_kwds_children', {}))
child_kwds['axis'] = axis child_kwds['axis'] = axis
for child in self.children: for child in self.children:
tmp1 = child(*child_args, **child_kwds) tmp1 = child(*child_args, **child_kwds)
if tmp1 != None: if tmp1 is not None:
tmp.append(tmp1) tmp.append(tmp1)
if len(tmp) == 0: if len(tmp) == 0:
tmp = None tmp = None
@ -207,12 +228,14 @@ class PlotData(object):
def setplotter(self, plotmethod=None): def setplotter(self, plotmethod=None):
''' '''
Set plotter based on the data type data_1d, data_2d, data_3d or data_nd Set plotter based on the data type:
data_1d, data_2d, data_3d or data_nd
''' '''
if isinstance(self.args, (list, tuple)): # Multidimensional data if isinstance(self.args, (list, tuple)): # Multidimensional data
ndim = len(self.args) ndim = len(self.args)
if ndim < 2: if ndim < 2:
msg = '''Unable to determine plotter-type, because len(self.args)<2. msg = '''
Unable to determine plotter-type, because len(self.args)<2.
If the data is 1D, then self.args should be a vector! If the data is 1D, then self.args should be a vector!
If the data is 2D, then length(self.args) should be 2. If the data is 2D, then length(self.args) should be 2.
If the data is 3D, then length(self.args) should be 3. If the data is 3D, then length(self.args) should be 3.
@ -226,22 +249,28 @@ class PlotData(object):
else: # One dimensional data else: # One dimensional data
self.plotter = Plotter_1d(plotmethod) self.plotter = Plotter_1d(plotmethod)
def show(self): def show(self, *args, **kwds):
self.plotter.show() self.plotter.show(*args, **kwds)
__call__ = plot __call__ = plot
interpolate = eval_points interpolate = eval_points
class AxisLabels: class AxisLabels:
def __init__(self, title='', xlab='', ylab='', zlab='', **kwds): def __init__(self, title='', xlab='', ylab='', zlab='', **kwds):
self.title = title self.title = title
self.xlab = xlab self.xlab = xlab
self.ylab = ylab self.ylab = ylab
self.zlab = zlab self.zlab = zlab
def __repr__(self): def __repr__(self):
return self.__str__() return self.__str__()
def __str__(self): def __str__(self):
return '%s\n%s\n%s\n%s\n' % (self.title, self.xlab, self.ylab, self.zlab) return '%s\n%s\n%s\n%s\n' % (
self.title, self.xlab, self.ylab, self.zlab)
def copy(self): def copy(self):
newcopy = empty_copy(self) newcopy = empty_copy(self)
newcopy.__dict__.update(self.__dict__) newcopy.__dict__.update(self.__dict__)
@ -252,18 +281,22 @@ class AxisLabels:
axis = plotbackend.gca() axis = plotbackend.gca()
try: try:
h = [] h = []
for fun, txt in zip(('set_title', 'set_xlabel','set_ylabel', 'set_ylabel'), for fun, txt in zip(
('set_title', 'set_xlabel', 'set_ylabel', 'set_ylabel'),
(self.title, self.xlab, self.ylab, self.zlab)): (self.title, self.xlab, self.ylab, self.zlab)):
if txt: if txt:
if fun.startswith('set_title'): if fun.startswith('set_title'):
title0 = axis.get_title() title0 = axis.get_title()
if title0.lower().strip() != txt.lower().strip():
txt = title0 + '\n' + txt txt = title0 + '\n' + txt
h.append(getattr(axis, fun)(txt)) h.append(getattr(axis, fun)(txt))
return h return h
except: except:
pass pass
class Plotter_1d(object): class Plotter_1d(object):
""" """
Parameters Parameters
@ -280,6 +313,7 @@ class Plotter_1d(object):
step : stair-step plot step : stair-step plot
scatter : scatter plot scatter : scatter plot
""" """
def __init__(self, plotmethod='plot'): def __init__(self, plotmethod='plot'):
self.plotfun = None self.plotfun = None
if plotmethod is None: if plotmethod is None:
@ -291,8 +325,8 @@ class Plotter_1d(object):
# except: # except:
# pass # pass
def show(self): def show(self, *args, **kwds):
plotbackend.show() plotbackend.show(*args, **kwds)
def plot(self, wdata, *args, **kwds): def plot(self, wdata, *args, **kwds):
axis = kwds.pop('axis', None) axis = kwds.pop('axis', None)
@ -323,6 +357,7 @@ class Plotter_1d(object):
return h1 return h1
__call__ = plot __call__ = plot
def plot1d(axis, args, data, dataCI, plotflag, *varargin, **kwds): def plot1d(axis, args, data, dataCI, plotflag, *varargin, **kwds):
plottype = np.mod(plotflag, 10) plottype = np.mod(plotflag, 10)
@ -335,18 +370,32 @@ def plot1d(axis, args, data, dataCI, plotflag, *varargin, **kwds):
elif plottype == 3: elif plottype == 3:
H = axis.stem(args, data, *varargin, **kwds) H = axis.stem(args, data, *varargin, **kwds)
elif plottype == 4: elif plottype == 4:
H = axis.errorbar(args, data, yerr=[dataCI[:,0] - data, dataCI[:,1] - data], *varargin, **kwds) H = axis.errorbar(
args,
data,
yerr=[
dataCI[
:,
0] - data,
dataCI[
:,
1] - data],
*varargin,
**kwds)
elif plottype == 5: elif plottype == 5:
H = axis.bar(args, data, *varargin, **kwds) H = axis.bar(args, data, *varargin, **kwds)
elif plottype == 6: elif plottype == 6:
level = 0 level = 0
if np.isfinite(level): if np.isfinite(level):
H = axis.fill_between(args, data, level, *varargin, **kwds); H = axis.fill_between(args, data, level, *varargin, **kwds)
else: else:
H = axis.fill_between(args, data, *varargin, **kwds); H = axis.fill_between(args, data, *varargin, **kwds)
elif plottype == 7: elif plottype == 7:
H = axis.plot(args, data, *varargin, **kwds) H = axis.plot(args, data, *varargin, **kwds)
H = axis.fill_between(args, dataCI[:,0], dataCI[:,1], alpha=0.2, color='r'); H = axis.fill_between(
args, dataCI[
:, 0], dataCI[
:, 1], alpha=0.2, color='r')
scale = plotscale(plotflag) scale = plotscale(plotflag)
logXscale = 'x' in scale logXscale = 'x' in scale
@ -369,16 +418,17 @@ def plot1d(axis, args, data, dataCI, plotflag, *varargin, **kwds):
ax[3] = 11 * np.log10(fmax1) ax[3] = 11 * np.log10(fmax1)
ax[2] = ax[3] - 40 ax[2] = ax[3] - 40
else: else:
ax[3] = 1.15 * fmax1; ax[3] = 1.15 * fmax1
ax[2] = ax[3] * 1e-4; ax[2] = ax[3] * 1e-4
axis.axis(ax) axis.axis(ax)
if np.any(dataCI) and plottype < 3: if np.any(dataCI) and plottype < 3:
axis.hold(True) axis.hold(True)
plot1d(axis, args, dataCI, (), plotflag, 'r--'); plot1d(axis, args, dataCI, (), plotflag, 'r--')
return H return H
def plotscale(plotflag): def plotscale(plotflag):
''' '''
Return plotscale from plotflag Return plotscale from plotflag
@ -419,10 +469,19 @@ def plotscale(plotflag):
logZscaleId = (np.mod(scaleId // 100, 10) > 0) * 4 logZscaleId = (np.mod(scaleId // 100, 10) > 0) * 4
scaleId = logYscaleId + logXscaleId + logZscaleId scaleId = logYscaleId + logXscaleId + logZscaleId
scales = ['linear', 'xlog', 'ylog', 'xylog', 'zlog', 'xzlog', 'yzlog', 'xyzlog'] scales = [
'linear',
'xlog',
'ylog',
'xylog',
'zlog',
'xzlog',
'yzlog',
'xyzlog']
return scales[scaleId] return scales[scaleId]
def transformdata(x, f, plotflag): def transformdata(x, f, plotflag):
transFlag = np.mod(plotflag // 10, 10) transFlag = np.mod(plotflag // 10, 10)
if transFlag == 0: if transFlag == 0:
@ -438,11 +497,14 @@ def transformdata(x, f, plotflag):
data = -np.log1p(-cumtrapz(f, x)) data = -np.log1p(-cumtrapz(f, x))
else: else:
if any(f < 0): if any(f < 0):
raise ValueError('Invalid plotflag: Data or dataCI is negative, but must be positive') raise ValueError('Invalid plotflag: Data or dataCI is ' +
'negative, but must be positive')
data = 10 * np.log10(f) data = 10 * np.log10(f)
return data return data
class Plotter_2d(Plotter_1d): class Plotter_2d(Plotter_1d):
""" """
Parameters Parameters
---------- ----------
@ -463,6 +525,7 @@ class Plotter_2d(Plotter_1d):
h1 = plot2d(axis, wdata, plotflag, *args, **kwds) h1 = plot2d(axis, wdata, plotflag, *args, **kwds)
return h1 return h1
def plot2d(axis, wdata, plotflag, *args, **kwds): def plot2d(axis, wdata, plotflag, *args, **kwds):
f = wdata f = wdata
if isinstance(wdata.args, (list, tuple)): if isinstance(wdata.args, (list, tuple)):
@ -471,7 +534,8 @@ def plot2d(axis, wdata, plotflag, *args, **kwds):
args1 = tuple((wdata.args,)) + (wdata.data,) + args args1 = tuple((wdata.args,)) + (wdata.data,) + args
if plotflag in (1, 6, 7, 8, 9): if plotflag in (1, 6, 7, 8, 9):
isPL = False isPL = False
if hasattr(f, 'clevels') and len(f.clevels) > 0: # check if contour levels is submitted # check if contour levels is submitted
if hasattr(f, 'clevels') and len(f.clevels) > 0:
CL = f.clevels CL = f.clevels
isPL = hasattr(f, 'plevels') and f.plevels is not None isPL = hasattr(f, 'plevels') and f.plevels is not None
if isPL: if isPL:
@ -479,11 +543,12 @@ def plot2d(axis, wdata, plotflag, *args, **kwds):
else: else:
dmax = np.max(f.data) dmax = np.max(f.data)
dmin = np.min(f.data) dmin = np.min(f.data)
CL = dmax - (dmax - dmin) * (1 - np.r_[0.01, 0.025, 0.05, 0.1, 0.2, 0.4, 0.5, 0.75]) CL = dmax - (dmax - dmin) * \
(1 - np.r_[0.01, 0.025, 0.05, 0.1, 0.2, 0.4, 0.5, 0.75])
clvec = np.sort(CL) clvec = np.sort(CL)
if plotflag in [1, 8, 9]: if plotflag in [1, 8, 9]:
h = axis.contour(*args1, levels=CL, **kwds); h = axis.contour(*args1, levels=CL, **kwds)
# else: # else:
# [cs hcs] = contour3(f.x{:},f.f,CL,sym); # [cs hcs] = contour3(f.x{:},f.f,CL,sym);
@ -491,10 +556,13 @@ def plot2d(axis, wdata, plotflag, *args, **kwds):
ncl = len(clvec) ncl = len(clvec)
if ncl > 12: if ncl > 12:
ncl = 12 ncl = 12
warnings.warn('Only the first 12 levels will be listed in table.') warnings.warn(
'Only the first 12 levels will be listed in table.')
clvals = PL[:ncl] if isPL else clvec[:ncl] clvals = PL[:ncl] if isPL else clvec[:ncl]
unused_axcl = cltext(clvals, percent=isPL) # print contour level text unused_axcl = cltext(
clvals,
percent=isPL) # print contour level text
elif any(plotflag == [7, 9]): elif any(plotflag == [7, 9]):
axis.clabel(h) axis.clabel(h)
else: else:
@ -502,7 +570,8 @@ def plot2d(axis, wdata, plotflag, *args, **kwds):
elif plotflag == 2: elif plotflag == 2:
h = axis.mesh(*args1, **kwds) h = axis.mesh(*args1, **kwds)
elif plotflag == 3: elif plotflag == 3:
h = axis.surf(*args1, **kwds) #shading interp % flat, faceted % surfc # shading interp % flat, faceted % surfc
h = axis.surf(*args1, **kwds)
elif plotflag == 4: elif plotflag == 4:
h = axis.waterfall(*args1, **kwds) h = axis.waterfall(*args1, **kwds)
elif plotflag == 5: elif plotflag == 5:
@ -518,20 +587,25 @@ def plot2d(axis, wdata, plotflag, *args, **kwds):
# end # end
# pass # pass
def test_plotdata(): def test_plotdata():
plotbackend.ioff() plotbackend.ioff()
x = np.arange(-2, 2, 0.4) x = np.arange(-2, 2, 0.4)
xi = np.arange(-2, 2, 0.1) xi = np.arange(-2, 2, 0.1)
d = PlotData(np.sin(x), x, xlab='x', ylab='sin', title='sinus', plot_args=['r.']) d = PlotData(np.sin(x), x, xlab='x', ylab='sin', title='sinus',
plot_args=['r.'])
di = PlotData(d.eval_points(xi, method='cubic'), xi) di = PlotData(d.eval_points(xi, method='cubic'), xi)
unused_hi = di.plot() unused_hi = di.plot()
unused_h = d.plot() unused_h = d.plot()
d.show() d.show()
def test_docstrings(): def test_docstrings():
import doctest import doctest
doctest.testmod() print('Testing docstrings in %s' % __file__)
doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
def main(): def main():
pass pass

@ -4,4 +4,4 @@ Covariance package in WAFO Toolbox.
from core import * # CovData1D from core import * # CovData1D
#import models #import models
#import dispersion_relation import estimation

@ -16,25 +16,28 @@ date : Date and time of creation or change.
from __future__ import division from __future__ import division
import warnings import warnings
#import numpy as np import numpy as np
from numpy import (zeros, sqrt, dot, inf, where, pi, nan, #@UnresolvedImport from numpy import (zeros, ones, sqrt, inf, where, nan,
atleast_1d, hstack, vstack, r_, linspace, flatnonzero, size, #@UnresolvedImport atleast_1d, hstack, r_, linspace, flatnonzero, size,
isnan, finfo, diag, ceil, floor, random, pi) #@UnresolvedImport isnan, finfo, diag, ceil, floor, random, pi)
from numpy.fft import fft #as fft from numpy.fft import fft
from numpy.random import randn from numpy.random import randn
import scipy.interpolate as interpolate import scipy.interpolate as interpolate
from scipy.linalg import toeplitz, sqrtm, svd, cholesky, diagsvd, pinv from scipy.linalg import toeplitz, lstsq
from scipy import sparse from scipy import sparse
from pylab import stineman_interp from pylab import stineman_interp
from wafo.wafodata import PlotData from wafo.containers import PlotData
from wafo.misc import sub_dict_select, nextpow2 # , JITImport from wafo.misc import sub_dict_select, nextpow2 # , JITImport
import wafo.spectrum as _wafospec import wafo.spectrum as _wafospec
from scipy.sparse.linalg.dsolve.linsolve import spsolve
from scipy.sparse.base import issparse
from scipy.signal.windows import parzen
#_wafospec = JITImport('wafo.spectrum') #_wafospec = JITImport('wafo.spectrum')
__all__ = ['CovData1D'] __all__ = ['CovData1D']
def _set_seed(iseed): def _set_seed(iseed):
if iseed != None: if iseed != None:
try: try:
@ -43,67 +46,48 @@ def _set_seed(iseed):
random.seed(iseed) random.seed(iseed)
#def rndnormnd(cov, mean=0.0, cases=1, method='svd'): def rndnormnd(mean, cov, cases=1):
# ''' '''
# Random vectors from a multivariate Normal distribution Random vectors from a multivariate Normal distribution
#
# Parameters Parameters
# ---------- ----------
# mean, cov : array-like mean, cov : array-like
# mean and covariance, respectively. mean and covariance, respectively.
# cases : scalar integer cases : scalar integer
# number of sample vectors number of sample vectors
# method : string
# defining squareroot method for covariance Returns
# 'svd' : Singular value decomp. (stable, quite fast) (default) -------
# 'chol' : Cholesky decomposition (fast, but unstable) r : matrix of random numbers from the multivariate normal
# 'sqrtm' : sqrtm (stable and slow) distribution with the given mean and covariance matrix.
#
# Returns The covariance must be a symmetric, semi-positive definite matrix with
# ------- shape equal to the size of the mean.
# r : matrix of random numbers from the multivariate normal
# distribution with the given mean and covariance matrix. Example
# -------
# The covariance must be a symmetric, semi-positive definite matrix with shape >>> mu = [0, 5]
# equal to the size of the mean. METHOD used for calculating the square root >>> S = [[1 0.45], [0.45 0.25]]
# of COV is either svd, cholesky or sqrtm. (cholesky is fastest but least accurate.) >>> r = rndnormnd(mu, S, 1)
# When cholesky is chosen and S is not positive definite, the svd-method
# is used instead. plot(r(:,1),r(:,2),'.')
#
# Example >>> d = 40
# ------- >>> rho = 2 * np.random.rand(1,d)-1
# mu = [0, 5] >>> mu = zeros(d)
# S = [[1 0.45], [0.45 0.25]] >>> S = (np.dot(rho.T, rho)-diag(rho.ravel()**2))+np.eye(d)
# r = rndnormnd(S, mu, 1) >>> r = rndnormnd(mu, S, 100)
# plot(r(:,1),r(:,2),'.')
# See also
# d = 40; rho = 2*rand(1,d)-1; --------
# mu = zeros(0,d); np.random.multivariate_normal
# S = (rho.'*rho-diag(rho.^2))+eye(d); '''
# r = rndnormnd(S,mu,100,'genchol')'; return np.random.multivariate_normal(mean, cov, cases)
#
# See also
# --------
# chol, svd, sqrtm, genchol
# np.random.multivariate_normal
# '''
# sa = np.atleast_2d(cov)
# mu = np.atleast_1d(mean).ravel()
# m, n = sa.shape
# if m != n:
# raise ValueError('Covariance must be square')
# def svdfun(sa):
# u, s, vh = svd(sa, full_matrices=False)
# sqt = diagsvd(sqrt(s))
# return dot(u, dot(sqt, vh))
#
# sqrtfuns = dict(sqrtm=sqrtm, svd=svdfun, cholesky=cholesky)
# sqrtfun = sqrtfuns[method]
# std = sqrtfun(sa)
# return dot(std,random.randn(n, cases)) + mu[:,newaxis]
class CovData1D(PlotData): class CovData1D(PlotData):
""" Container class for 1D covariance data objects in WAFO """ Container class for 1D covariance data objects in WAFO
Member variables Member variables
@ -147,6 +131,7 @@ class CovData1D(PlotData):
self.__dict__.update(sub_dict_select(kwds, somekeys)) self.__dict__.update(sub_dict_select(kwds, somekeys))
self.setlabels() self.setlabels()
def setlabels(self): def setlabels(self):
''' Set automatic title, x-,y- and z- labels ''' Set automatic title, x-,y- and z- labels
@ -155,7 +140,8 @@ class CovData1D(PlotData):
N = len(self.type) N = len(self.type)
if N == 0: if N == 0:
raise ValueError('Object does not appear to be initialized, it is empty!') raise ValueError(
'Object does not appear to be initialized, it is empty!')
labels = ['', 'ACF', ''] labels = ['', 'ACF', '']
@ -175,14 +161,8 @@ class CovData1D(PlotData):
self.labels.ylab = labels[1] self.labels.ylab = labels[1]
self.labels.zlab = labels[2] self.labels.zlab = labels[2]
def tospecdata(self, rate=None, method='fft', nugget=0.0, trunc=1e-5,
fast=True):
## def copy(self):
## kwds = self.__dict__.copy()
## wdata = CovData1D(**kwds)
## return wdata
def tospecdata(self, rate=None, method='fft', nugget=0.0, trunc=1e-5, fast=True):
''' '''
Computes spectral density from the auto covariance function Computes spectral density from the auto covariance function
@ -190,14 +170,11 @@ class CovData1D(PlotData):
---------- ----------
rate = scalar, int rate = scalar, int
1,2,4,8...2^r, interpolation rate for f (default 1) 1,2,4,8...2^r, interpolation rate for f (default 1)
method : string method : string
interpolation method 'stineman', 'linear', 'cubic', 'fft' interpolation method 'stineman', 'linear', 'cubic', 'fft'
nugget : scalar, real
nugget = scalar, real
nugget effect to ensure that round off errors do not result in nugget effect to ensure that round off errors do not result in
negative spectral estimates. Good choice might be 10^-12. negative spectral estimates. Good choice might be 10^-12.
trunc : scalar, real trunc : scalar, real
truncates all spectral values where S/max(S) < trunc truncates all spectral values where S/max(S) < trunc
0 <= trunc <1 This is to ensure that high frequency 0 <= trunc <1 This is to ensure that high frequency
@ -208,7 +185,7 @@ class CovData1D(PlotData):
Returns Returns
-------- --------
S = SpecData1D object S : SpecData1D object
spectral density spectral density
NB! This routine requires that the covariance is evenly spaced NB! This routine requires that the covariance is evenly spaced
@ -258,13 +235,12 @@ class CovData1D(PlotData):
ftype = 'k' ftype = 'k'
if rate is None: if rate is None:
rate = 1 ##interpolation rate rate = 1 # interpolation rate
else: else:
rate = 2 ** nextpow2(rate) ##make sure rate is a power of 2 rate = 2 ** nextpow2(rate) # make sure rate is a power of 2
## add a nugget effect to ensure that round off errors # add a nugget effect to ensure that round off errors
## do not result in negative spectral estimates # do not result in negative spectral estimates
acf[0] = acf[0] + nugget acf[0] = acf[0] + nugget
n = acf.size n = acf.size
# embedding a circulant vector and Fourier transform # embedding a circulant vector and Fourier transform
@ -274,13 +250,10 @@ class CovData1D(PlotData):
if method == 'fft': if method == 'fft':
nfft *= rate nfft *= rate
nf = nfft / 2 ## number of frequencies nf = nfft / 2 # number of frequencies
acf = r_[acf, zeros(nfft - 2 * n + 2), acf[n - 2:0:-1]] acf = r_[acf, zeros(nfft - 2 * n + 2), acf[n - 2:0:-1]]
Rper = (fft(acf, nfft).real).clip(0) ## periodogram Rper = (fft(acf, nfft).real).clip(0) # periodogram
# import pylab
# pylab.semilogy(Rper)
# pylab.show()
RperMax = Rper.max() RperMax = Rper.max()
Rper = where(Rper < trunc * RperMax, 0, Rper) Rper = where(Rper < trunc * RperMax, 0, Rper)
@ -320,6 +293,10 @@ class CovData1D(PlotData):
warnings.warn('Data is not uniformly sampled!') warnings.warn('Data is not uniformly sampled!')
return dt return dt
def _is_valid_acf(self):
if self.data.argmax() != 0:
raise ValueError('ACF does not have a maximum at zero lag')
def sim(self, ns=None, cases=1, dt=None, iseed=None, derivative=False): def sim(self, ns=None, cases=1, dt=None, iseed=None, derivative=False):
''' '''
Simulates a Gaussian process and its derivative from ACF Simulates a Gaussian process and its derivative from ACF
@ -351,8 +328,8 @@ class CovData1D(PlotData):
Gaussian process through circulant embedding of the covariance matrix. Gaussian process through circulant embedding of the covariance matrix.
If the ACF has a non-empty field .tr, then the transformation is If the ACF has a non-empty field .tr, then the transformation is
applied to the simulated data, the result is a simulation of a transformed applied to the simulated data, the result is a simulation of a
Gaussian process. transformed Gaussian process.
Note: The simulation may give high frequency ripple when used with a Note: The simulation may give high frequency ripple when used with a
small dt. small dt.
@ -384,15 +361,9 @@ class CovData1D(PlotData):
nugget = 0 # 10**-12 nugget = 0 # 10**-12
_set_seed(iseed) _set_seed(iseed)
self._is_valid_acf()
acf = self.data.ravel() acf = self.data.ravel()
n = acf.size n = acf.size
I = acf.argmax()
if I != 0:
raise ValueError('ACF does not have a maximum at zero lag')
acf.shape = (n, 1) acf.shape = (n, 1)
dT = self.sampling_period() dT = self.sampling_period()
@ -402,26 +373,26 @@ class CovData1D(PlotData):
if derivative: if derivative:
xder = x.copy() xder = x.copy()
## add a nugget effect to ensure that round off errors # add a nugget effect to ensure that round off errors
## do not result in negative spectral estimates # do not result in negative spectral estimates
acf[0] = acf[0] + nugget acf[0] = acf[0] + nugget
## Fast and exact simulation of simulation of stationary # Fast and exact simulation of simulation of stationary
## Gaussian process throug circulant embedding of the # Gaussian process throug circulant embedding of the
## Covariance matrix # Covariance matrix
floatinfo = finfo(float) floatinfo = finfo(float)
if (abs(acf[-1]) > floatinfo.eps): ## assuming acf(n+1)==0 if (abs(acf[-1]) > floatinfo.eps): # assuming acf(n+1)==0
m2 = 2 * n - 1 m2 = 2 * n - 1
nfft = 2 ** nextpow2(max(m2, 2 * ns)) nfft = 2 ** nextpow2(max(m2, 2 * ns))
acf = r_[acf, zeros((nfft - m2, 1)), acf[-1:0:-1, :]] acf = r_[acf, zeros((nfft - m2, 1)), acf[-1:0:-1, :]]
#warnings,warn('I am now assuming that ACF(k)=0 for k>MAXLAG.') #warnings,warn('I am now assuming that ACF(k)=0 for k>MAXLAG.')
else: # # ACF(n)==0 else: # ACF(n)==0
m2 = 2 * n - 2 m2 = 2 * n - 2
nfft = 2 ** nextpow2(max(m2, 2 * ns)) nfft = 2 ** nextpow2(max(m2, 2 * ns))
acf = r_[acf, zeros((nfft - m2, 1)), acf[n - 1:1:-1, :]] acf = r_[acf, zeros((nfft - m2, 1)), acf[n - 1:1:-1, :]]
##m2=2*n-2 # m2=2*n-2
S = fft(acf, nfft, axis=0).real ## periodogram S = fft(acf, nfft, axis=0).real # periodogram
I = S.argmax() I = S.argmax()
k = flatnonzero(S < 0) k = flatnonzero(S < 0)
@ -438,42 +409,43 @@ class CovData1D(PlotData):
ix = flatnonzero(k > 2 * I) ix = flatnonzero(k > 2 * I)
if ix.size > 0: if ix.size > 0:
## # truncating all oscillating values above 2 times the peak # truncating all oscillating values above 2 times the peak
## # frequency to zero to ensure that # frequency to zero to ensure that
## # that high frequency noise is not added to # that high frequency noise is not added to
## # the simulated timeseries. # the simulated timeseries.
ix0 = k[ix[0]] ix0 = k[ix[0]]
S[ix0:-ix0] = 0.0 S[ix0:-ix0] = 0.0
trunc = 1e-5 trunc = 1e-5
maxS = S[I] maxS = S[I]
k = flatnonzero(S[I:-I] < maxS * trunc) k = flatnonzero(S[I:-I] < maxS * trunc)
if k.size > 0: if k.size > 0:
S[k + I] = 0. S[k + I] = 0.
## truncating small values to zero to ensure that # truncating small values to zero to ensure that
## that high frequency noise is not added to # that high frequency noise is not added to
## the simulated timeseries # the simulated timeseries
cases1 = floor(cases / 2) cases1 = int(cases / 2)
cases2 = ceil(cases / 2) cases2 = int(ceil(cases / 2))
# Generate standard normal random numbers for the simulations # Generate standard normal random numbers for the simulations
#randn = np.random.randn #randn = np.random.randn
epsi = randn(nfft, cases2) + 1j * randn(nfft, cases2) epsi = randn(nfft, cases2) + 1j * randn(nfft, cases2)
Ssqr = sqrt(S / (nfft)) # #sqrt(S(wn)*dw ) Ssqr = sqrt(S / (nfft)) # sqrt(S(wn)*dw )
ephat = epsi * Ssqr # [:,np.newaxis] ephat = epsi * Ssqr # [:,np.newaxis]
y = fft(ephat, nfft, axis=0) y = fft(ephat, nfft, axis=0)
x[:, 1:cases + 1] = hstack((y[2:ns + 2, 0:cases2].real, y[2:ns + 2, 0:cases1].imag)) x[:, 1:cases + 1] = hstack((y[2:ns + 2, 0:cases2].real,
y[2:ns + 2, 0:cases1].imag))
x[:, 0] = linspace(0, (ns - 1) * dT, ns) ##(0:dT:(dT*(np-1)))' x[:, 0] = linspace(0, (ns - 1) * dT, ns) # (0:dT:(dT*(np-1)))'
if derivative: if derivative:
Ssqr = Ssqr * r_[0:(nfft / 2 + 1), -(nfft / 2 - 1):0] * 2 * pi / nfft / dT Ssqr = Ssqr * \
r_[0:(nfft / 2 + 1), -(nfft / 2 - 1):0] * 2 * pi / nfft / dT
ephat = epsi * Ssqr # [:,newaxis] ephat = epsi * Ssqr # [:,newaxis]
y = fft(ephat, nfft, axis=0) y = fft(ephat, nfft, axis=0)
xder[:, 1:(cases + 1)] = hstack((y[2:ns + 2, 0:cases2].imag - y[2:ns + 2, 0:cases1].real)) xder[:, 1:(cases + 1)] = hstack((y[2:ns + 2, 0:cases2].imag -
y[2:ns + 2, 0:cases1].real))
xder[:, 0] = x[:, 0] xder[:, 0] = x[:, 0]
if self.tr is not None: if self.tr is not None:
@ -493,37 +465,83 @@ class CovData1D(PlotData):
else: else:
return x return x
def simcond(self, xo, cases=1, method='approx', inds=None): def _get_lag_where_acf_is_almost_zero(self):
acf = self.data.ravel()
r0 = acf[0]
n = len(acf)
sigma = sqrt(r_[0, r0 ** 2,
r0 ** 2 + 2 * np.cumsum(acf[1:n - 1] ** 2)] / n)
k = flatnonzero(np.abs(acf) > 0.1 * sigma)
if k.size > 0:
lag = min(k.max() + 3, n)
return lag
return n
def _get_acf(self, smooth=False):
self._is_valid_acf()
acf = atleast_1d(self.data).ravel()
n = self._get_lag_where_acf_is_almost_zero()
if smooth:
rwin = parzen(2 * n + 1)
return acf[:n] * rwin[n:2 * n]
else:
return acf[:n]
def _split_cov(self, sigma, i_known, i_unknown):
'''
Split covariance matrix between known/unknown observations
Returns
-------
Soo covariance between known observations
S11 = covariance between unknown observations
S1o = covariance between known and unknown obs
'''
Soo, So1 = sigma[i_known][:, i_known], sigma[i_known][:, i_unknown]
S11 = sigma[i_unknown][:, i_unknown]
return Soo, So1, S11
def _update_window(self, idx, i_unknown, num_x, num_acf,
overlap, nw, num_restored):
Nsig = len(idx)
start_max = num_x - Nsig
if (nw == 0) and (num_restored < len(i_unknown)):
# move to the next missing data
start_ix = min(i_unknown[num_restored + 1] - overlap, start_max)
else:
start_ix = min(idx[0] + num_acf, start_max)
return idx + start_ix - idx[0]
def simcond(self, xo, method='approx', i_unknown=None):
""" """
Simulate values conditionally on observed known values Simulate values conditionally on observed known values
Parameters Parameters
---------- ----------
x : array-like x : vector
datavector including missing data. timeseries including missing data.
(missing data must be NaN if inds is not given) (missing data must be NaN if i_unknown is not given)
Assumption: The covariance of x is equal to self and have the Assumption: The covariance of x is equal to self and have the
same sample period. same sample period.
cases : scalar integer
number of cases, i.e., number of columns of sample (default=1)
method : string method : string
defining method used in the conditional simulation. Options are: defining method used in the conditional simulation. Options are:
'approximate': Condition only on the closest points. Pros: quite fast 'approximate': Condition only on the closest points. Quite fast
'pseudo': Use pseudo inverse to calculate conditional covariance matrix 'exact' : Exact simulation. Slow for large data sets, may not
'exact' : Exact simulation. Cons: Slow for large data sets, may not return any result due to near singularity of the covariance
return any result due to near singularity of the covariance matrix. matrix.
inds : integers i_unknown : integers
indices to spurious or missing data in x indices to spurious or missing data in x
Returns Returns
------- -------
sample : ndarray sample : ndarray
a random sample of the missing values conditioned on the observed data. a random sample of the missing values conditioned on the observed
data.
mu, sigma : ndarray mu, sigma : ndarray
mean and standard deviation, respectively, of the missing values mean and standard deviation, respectively, of the missing values
conditioned on the observed data. conditioned on the observed data.
Notes Notes
----- -----
SIMCOND generates the missing values from x conditioned on the observed SIMCOND generates the missing values from x conditioned on the observed
@ -541,266 +559,131 @@ class CovData1D(PlotData):
Brodtkorb, P, Myrhaug, D, and Rue, H (2001) Brodtkorb, P, Myrhaug, D, and Rue, H (2001)
"Joint distribution of wave height and wave crest velocity from "Joint distribution of wave height and wave crest velocity from
reconstructed data with application to ringing" reconstructed data with application to ringing"
Int. Journal of Offshore and Polar Engineering, Vol 11, No. 1, pp 23--32 Int. Journal of Offshore and Polar Engineering, Vol 11, No. 1,
pp 23--32
Brodtkorb, P, Myrhaug, D, and Rue, H (1999) Brodtkorb, P, Myrhaug, D, and Rue, H (1999)
"Joint distribution of wave height and wave crest velocity from "Joint distribution of wave height and wave crest velocity from
reconstructed data" reconstructed data"
in Proceedings of 9th ISOPE Conference, Vol III, pp 66-73 in Proceedings of 9th ISOPE Conference, Vol III, pp 66-73
""" """
# TODO: does not work yet.
# secret methods:
# 'dec1-3': different decomposing algorithm's
# which is only correct for a variables
# having the Markov property
# Cons: 3 is not correct at all, but seems to give
# a reasonable result
# Pros: 1 is slow, 2 is quite fast and 3 is very fast
# Note: (mu1oStd is not given for method ='dec3')
compute_sigma = True
x = atleast_1d(xo).ravel() x = atleast_1d(xo).ravel()
acf = atleast_1d(self.data).ravel() acf = self._get_acf()
N = len(x) num_x = len(x)
n = len(acf) num_acf = len(acf)
i = acf.argmax() if not i_unknown is None:
if i != 0: x[i_unknown] = nan
raise ValueError('This is not a valid ACF!!') i_unknown = flatnonzero(isnan(x))
num_unknown = len(i_unknown)
if not inds is None: mu1o = zeros((num_unknown,))
x[inds] = nan mu1o_std = zeros((num_unknown,))
inds = where(isnan(x))[0] #indices to the unknown observations sample = zeros((num_unknown,))
if num_unknown == 0:
Ns = len(inds) # # missing values warnings.warn('No missing data, no point to continue.')
if Ns == 0: return sample, mu1o, mu1o_std
warnings.warn('No missing data, unable to continue.') if num_unknown == num_x:
return xo, zeros(Ns), zeros(Ns) warnings.warn('All data missing, returning sample from' +
#end ' the apriori distribution.')
if Ns == N:# simulated surface from the apriori distribution mu1o_std = ones(num_unknown) * sqrt(acf[0])
txt = '''All data missing, return self.sim(ns=num_unknown, cases=1)[:, 1], mu1o, mu1o_std
returning sample from the unconditional distribution.'''
warnings.warn(txt) i_known = flatnonzero(1 - isnan(x))
return self.sim(ns=N, cases=cases), zeros(Ns), zeros(Ns)
if method.startswith('exac'):
indg = where(1 - isnan(x))[0] #indices to the known observations # exact but slow. It also may not return any result
if num_acf > 0.3 * num_x:
#initializing variables Sigma = toeplitz(hstack((acf, zeros(num_x - num_acf))))
mu1o = zeros(Ns, 1)
mu1o_std = mu1o
sample = zeros((Ns, cases))
if method[0] == 'd':
# simulated surface from the apriori distribution
xs = self.sim(ns=N, cases=cases)
mu1os = zeros((Ns, cases))
if method.startswith('dec1'):
# only correct for variables having the Markov property
# but still seems to give a reasonable answer. Slow procedure.
Sigma = sptoeplitz(hstack((acf, zeros(N - n))))
#Soo=Sigma(~inds,~inds); # covariance between known observations
#S11=Sigma(inds,inds); # covariance between unknown observations
#S1o=Sigma(inds,~inds);# covariance between known and unknown observations
#tmp=S1o*pinv(full(Soo));
#tmp=S1o/Soo; # this is time consuming if Soo large
tmp = 2 * Sigma[inds, indg] / (Sigma[indg, indg] + Sigma[indg, indg].T)
if compute_sigma:
#standard deviation of the expected surface
#mu1o_std=sqrt(diag(S11-tmp*S1o'));
mu1o_std = sqrt(diag(Sigma[inds, inds] - tmp * Sigma[indg, inds]))
#expected surface conditioned on the known observations from x
mu1o = tmp * x[indg]
#expected surface conditioned on the known observations from xs
mu1os = tmp * (xs[indg, :])
# sampled surface conditioned on the known observations
sample = mu1o + xs[inds, :] - mu1os
elif method.startswith('dec2'):
# only correct for variables having the Markov property
# but still seems to give a reasonable answer
# approximating the expected surfaces conditioned on
# the known observations from x and xs by only using the closest points
Sigma = sptoeplitz(hstack((acf, zeros(n))))
n2 = int(floor(n / 2))
idx = r_[0:2 * n] + max(0, inds[0] - n2) # indices to the points used
tmpinds = zeros(N, dtype=bool)
tmpinds[inds] = True # temporary storage of indices to missing points
tinds = where(tmpinds[idx])[0] # indices to the points used
tindg = where(1 - tmpinds[idx])[0]
ns = len(tinds); # number of missing data in the interval
nprev = 0; # number of previously simulated points
xsinds = xs[inds, :]
while ns > 0:
tmp = 2 * Sigma[tinds, tindg] / (Sigma[tindg, tindg] + Sigma[tindg, tindg].T)
if compute_sigma:
#standard deviation of the expected surface
#mu1o_std=sqrt(diag(S11-tmp*S1o'));
ix = slice(nprev + 1, nprev + ns + 1)
mu1o_std[ix] = max(mu1o_std[ix],
sqrt(diag(Sigma[tinds, tinds] - tmp * Sigma[tindg, tinds])))
#end
#expected surface conditioned on the closest known observations
# from x and xs2
mu1o[(nprev + 1):(nprev + ns + 1)] = tmp * x[idx[tindg]]
mu1os[(nprev + 1):(nprev + ns + 1), :] = tmp * xs[idx[tindg], :]
if idx[-1] == N - 1:#
ns = 0 # no more points to simulate
else: else:
# updating by putting expected surface into x acf[0] = acf[0] * 1.00001
x[idx[tinds]] = mu1o[(nprev + 1):(nprev + ns + 1)] Sigma = sptoeplitz(hstack((acf, zeros(num_x - num_acf))))
xs[idx[tinds]] = mu1os[(nprev + 1):(nprev + ns + 1)] Soo, So1, S11 = self._split_cov(Sigma, i_known, i_unknown)
nw = sum(tmpinds[idx[-n2:]])# # data which we want to simulate once if issparse(Sigma):
tmpinds[idx[:-n2]] = False # removing indices to data .. So1 = So1.todense()
# which has been simulated S11 = S11.todense()
nprev = nprev + ns - nw # update # points simulated so far S1o_Sooinv = spsolve(Soo + Soo.T, 2 * So1).T
if (nw == 0) and (nprev < Ns):
idx = r_[0:2 * n] + (inds[nprev + 1] - n2) # move to the next missing data
else: else:
idx = idx + n Sooinv_So1, _res, _rank, _s = lstsq(Soo + Soo.T, 2 * So1,
#end cond=1e-4)
tmp = N - idx[-1] S1o_Sooinv = Sooinv_So1.T
if tmp < 0: # checking if tmp exceeds the limits mu1o = S1o_Sooinv.dot(x[i_known])
idx = idx + tmp Sigma1o = S11 - S1o_Sooinv.dot(So1)
#end if (diag(Sigma1o) < 0).any():
# find new interval with missing data raise ValueError('Failed to converge to a solution')
tinds = where(tmpinds[idx])[0]
tindg = where(1 - tmpinds[idx])[0]
ns = len(tinds);# # missing data
#end
#end
# sampled surface conditioned on the known observations
sample = mu1o + (xsinds - mu1os)
elif method.startswith('dec3'):
# this is not correct for even for variables having the
# Markov property but still seems to give a reasonable answer
# a quasi approach approximating the expected surfaces conditioned on
# the known observations from x and xs with a spline
mu1o = interp1(indg, x[indg], inds, 'spline')
mu1os = interp1(indg, xs[indg, :], inds, 'spline')
# sampled surface conditioned on the known observations
sample = mu1o + (xs[inds, :] - mu1os)
elif method.startswith('exac') or method.startswith('pseu'):
# exact but slow. It also may not return any result
Sigma = sptoeplitz(hstack((acf, zeros(N - n))))
#Soo=Sigma(~inds,~inds); # covariance between known observations
#S11=Sigma(inds,inds); # covariance between unknown observations
#S1o=Sigma(inds,~inds);# covariance between known and unknown observations
#tmp=S1o/Soo; # this is time consuming if Soo large
if method[0] == 'e': #exact
tmp = 2 * Sigma[inds, indg] / (Sigma[indg, indg] + Sigma[indg, indg].T);
else: # approximate the inverse with pseudo inverse
tmp = dot(Sigma[inds, indg], pinv(Sigma[indg, indg]))
#end
#expected surface conditioned on the known observations from x
mu1o = dot(tmp, x[indg])
# Covariance conditioned on the known observations
Sigma1o = Sigma[inds, inds] - tmp * Sigma[indg, inds]
#sample conditioned on the known observations from x
sample = random.multivariate_normal(mu1o, Sigma1o, cases)
#rndnormnd(mu1o,Sigma1o,cases )
if compute_sigma: mu1o_std = sqrt(diag(Sigma1o))
#standard deviation of the expected surface sample[:] = rndnormnd(mu1o, Sigma1o, cases=1).ravel()
mu1o_std = sqrt(diag(Sigma1o));
#end
elif method.startswith('appr'): elif method.startswith('appr'):
# approximating by only condition on # approximating by only condition on the closest points
# the closest points
# checking approximately how many lags we need in order to Nsig = min(2 * num_acf, num_x)
# ensure conditional independence
# using that the inverse of the circulant covariance matrix has Sigma = toeplitz(hstack((acf, zeros(Nsig - num_acf))))
# approximately the same bandstructure as the inverse of the overlap = int(Nsig / 4)
# covariance matrix # indices to the points used
idx = r_[0:Nsig] + max(0, min(i_unknown[0] - overlap, num_x - Nsig))
Nsig = 2 * n; mask_unknown = zeros(num_x, dtype=bool)
# temporary storage of indices to missing points
Sigma = sptoeplitz(hstack((acf, zeros(Nsig - n)))) mask_unknown[i_unknown] = True
n2 = floor(Nsig / 4) t_unknown = where(mask_unknown[idx])[0]
idx = r_[0:Nsig] + max(0, inds[0] - n2) # indices to the points used t_known = where(1 - mask_unknown[idx])[0]
tmpinds = zeros(N, dtype=bool) ns = len(t_unknown) # number of missing data in the interval
tmpinds[inds] = True # temporary storage of indices to missing points
tinds = where(tmpinds[idx])[0] # indices to the points used num_restored = 0 # number of previously simulated points
tindg = where(1 - tmpinds[idx])[0] x2 = x.copy()
ns = len(tinds) # number of missing data in the interval
nprev = 0 # number of previously simulated points
x2 = x
while ns > 0: while ns > 0:
#make sure MATLAB uses a symmetric matrix solver Soo, So1, S11 = self._split_cov(Sigma, t_known, t_unknown)
tmp = 2 * Sigma[tinds, tindg] / (Sigma[tindg, tindg] + Sigma[tindg, tindg].T) if issparse(Soo):
Sigma1o = Sigma[tinds, tinds] - tmp * Sigma[tindg, tinds] So1 = So1.todense()
if compute_sigma: S11 = S11.todense()
S1o_Sooinv = spsolve(Soo + Soo.T, 2 * So1).T
else:
Sooinv_So1, _res, _rank, _s = lstsq(Soo + Soo.T, 2 * So1,
cond=1e-4)
S1o_Sooinv = Sooinv_So1.T
Sigma1o = S11 - S1o_Sooinv.dot(So1)
if (diag(Sigma1o) < 0).any():
raise ValueError('Failed to converge to a solution')
ix = slice((num_restored), (num_restored + ns))
# standard deviation of the expected surface # standard deviation of the expected surface
#mu1o_std=sqrt(diag(S11-tmp*S1o')); mu1o_std[ix] = np.maximum(mu1o_std[ix], sqrt(diag(Sigma1o)))
mu1o_std[(nprev + 1):(nprev + ns + 1)] = max(mu1o_std[(nprev + 1):(nprev + ns)] ,
sqrt(diag(Sigma1o)))
#end
#expected surface conditioned on the closest known observations from x # expected surface conditioned on the closest known
mu1o[(nprev + 1):(nprev + ns + 1)] = tmp * x2[idx[tindg]] # observations from x
mu1o[ix] = S1o_Sooinv.dot(x2[idx[t_known]])
# sample conditioned on the known observations from x # sample conditioned on the known observations from x
sample[(nprev + 1):(nprev + ns + 1), :] = rndnormnd(tmp * x[idx[tindg]], Sigma1o, cases) mu1os = S1o_Sooinv.dot(x[idx[t_known]])
if idx[-1] == N - 1: sample[ix] = rndnormnd(mu1os, Sigma1o, cases=1)
if idx[-1] == num_x - 1:
ns = 0 # no more points to simulate ns = 0 # no more points to simulate
else: else:
# updating x2[idx[t_unknown]] = mu1o[ix] # expected surface
x2[idx[tinds]] = mu1o[(nprev + 1):(nprev + ns + 1)] #expected surface x[idx[t_unknown]] = sample[ix] # sampled surface
x[idx[tinds]] = sample[(nprev + 1):(nprev + ns + 1)]#sampled surface # removing indices to data which has been simulated
nw = sum(tmpinds[idx[-n2::]] == True)# # data we want to simulate once more mask_unknown[idx[:-overlap]] = False
tmpinds[idx[:-n2]] = False # removing indices to data .. # data we want to simulate once more
# which has been simulated nw = sum(mask_unknown[idx[-overlap:]] == True)
nprev = nprev + ns - nw # update # points simulated so far num_restored += ns - nw # update # points simulated so far
if (nw == 0) and (nprev < Ns): idx = self._update_window(idx, i_unknown, num_x, num_acf,
idx = r_[0:Nsig] + (inds[nprev + 1] - n2) # move to the next missing data overlap, nw, num_restored)
else:
idx = idx + n
#end
tmp = N - idx[-1]
if tmp < 0: # checking if tmp exceeds the limits
idx = idx + tmp
#end
# find new interval with missing data # find new interval with missing data
tinds = where(tmpinds[idx])[0] t_unknown = flatnonzero(mask_unknown[idx])
tindg = where(1 - tmpinds[idx])[0] t_known = flatnonzero(1 - mask_unknown[idx])
ns = len(tinds);# # missing data in the interval ns = len(t_unknown) # # missing data in the interval
#end return sample, mu1o, mu1o_std
#end
#end
return sample
# plot(find(~inds),x(~inds),'.')
# hold on,
# ind=find(inds);
# plot(ind,mu1o ,'*')
# plot(ind,sample,'r+')
# #mu1o_std
# plot(ind,[mu1o-2*mu1o_std mu1o+2*mu1o_std ] ,'d')
# #plot(xs),plot(ind,mu1os,'r*')
# hold off
# legend('observed values','mu1o','sampled values','2 stdev')
# #axis([770 850 -1 1])
# #axis([1300 1325 -1 1])
def sptoeplitz(x): def sptoeplitz(x):
k = where(x.ravel())[0] k = flatnonzero(x)
n = len(x) n = len(x)
if len(k) > 0.3 * n:
return toeplitz(x)
else:
spdiags = sparse.dia_matrix spdiags = sparse.dia_matrix
data = x[k].reshape(-1, 1).repeat(n, axis=-1) data = x[k].reshape(-1, 1).repeat(n, axis=-1)
offsets = k offsets = k
@ -808,7 +691,9 @@ def sptoeplitz(x):
if k[0] == 0: if k[0] == 0:
offsets = k[1::] offsets = k[1::]
data = data[1::, :] data = data[1::, :]
return y + spdiags((data, -offsets), shape=(n, n)) t = y + spdiags((data, -offsets), shape=(n, n))
return t.tocsr()
def _test_covdata(): def _test_covdata():
import wafo.data import wafo.data
@ -817,6 +702,7 @@ def _test_covdata():
rf = ts.tocovdata(lag=150) rf = ts.tocovdata(lag=150)
rf.plot() rf.plot()
def main(): def main():
import wafo.spectrum.models as sm import wafo.spectrum.models as sm
import matplotlib import matplotlib
@ -824,13 +710,27 @@ def main():
Sj = sm.Jonswap() Sj = sm.Jonswap()
S = Sj.tospecdata() # Make spec S = Sj.tospecdata() # Make spec
S.plot() S.plot()
R = S.tocovdata() R = S.tocovdata(rate=3)
R.plot() R.plot()
#x = R.sim(ns=1000,dt=0.2) x = R.sim(ns=1024 * 4)
inds = np.hstack((21 + np.arange(20),
1000 + np.arange(20),
1024 * 4 - 21 + np.arange(20)))
sample, mu1o, mu1o_std = R.simcond(x[:, 1], method='approx', i_unknown=inds)
import matplotlib.pyplot as plt
#inds = np.atleast_2d(inds).reshape((-1,1))
plt.plot(x[:, 1], 'k.', label='observed values')
plt.plot(inds, mu1o, '*', label='mu1o')
plt.plot(inds, sample.ravel(), 'r+', label='samples')
plt.plot(inds, mu1o - 2 * mu1o_std, 'r',
inds, mu1o + 2 * mu1o_std, 'r', label='2 stdev')
plt.legend()
plt.show('hold')
if __name__ == '__main__': if __name__ == '__main__':
if True: #False : # if False: # True: #
import doctest import doctest
doctest.testmod() doctest.testmod()
else: else:

@ -3,6 +3,7 @@ from scipy.fftpack import dct as _dct
from scipy.fftpack import idct as _idct from scipy.fftpack import idct as _idct
__all__ = ['dct', 'idct', 'dctn', 'idctn'] __all__ = ['dct', 'idct', 'dctn', 'idctn']
def dct(x, type=2, n=None, axis=-1, norm='ortho'): # @ReservedAssignment def dct(x, type=2, n=None, axis=-1, norm='ortho'): # @ReservedAssignment
''' '''
Return the Discrete Cosine Transform of arbitrary type sequence x. Return the Discrete Cosine Transform of arbitrary type sequence x.
@ -99,9 +100,12 @@ def dct(x, type=2, n=None, axis=-1, norm='ortho'): #@ReservedAssignment
''' '''
farr = np.asfarray farr = np.asfarray
if np.iscomplex(x).any(): if np.iscomplex(x).any():
return _dct(farr(x.real), type, n, axis, norm) + 1j*_dct(farr(x.imag), type, n, axis, norm) return _dct(farr(x.real), type, n, axis, norm) + \
1j * _dct(farr(x.imag), type, n, axis, norm)
else: else:
return _dct(farr(x), type, n, axis, norm) return _dct(farr(x), type, n, axis, norm)
def idct(x, type=2, n=None, axis=-1, norm='ortho'): # @ReservedAssignment def idct(x, type=2, n=None, axis=-1, norm='ortho'): # @ReservedAssignment
''' '''
Return the Inverse Discrete Cosine Transform of an arbitrary type sequence. Return the Inverse Discrete Cosine Transform of an arbitrary type sequence.
@ -141,9 +145,12 @@ def idct(x, type=2, n=None, axis=-1, norm='ortho'): #@ReservedAssignment
''' '''
farr = np.asarray farr = np.asarray
if np.iscomplex(x).any(): if np.iscomplex(x).any():
return _idct(farr(x.real), type, n, axis, norm) + 1j*_idct(farr(x.imag), type, n, axis, norm) return _idct(farr(x.real), type, n, axis, norm) + \
1j * _idct(farr(x.imag), type, n, axis, norm)
else: else:
return _idct(farr(x), type, n, axis, norm) return _idct(farr(x), type, n, axis, norm)
def dctn(x, type=2, axis=None, norm='ortho'): # @ReservedAssignment def dctn(x, type=2, axis=None, norm='ortho'): # @ReservedAssignment
''' '''
DCTN N-D discrete cosine transform. DCTN N-D discrete cosine transform.
@ -204,7 +211,8 @@ def dctn(x, type=2, axis=None, norm='ortho'): #@ReservedAssignment
return y return y
if np.iscomplex(y).any(): if np.iscomplex(y).any():
y = dctn(y.real, type, axis, norm) + 1j*dctn(y.imag, type, axis, norm) y = dctn(y.real, type, axis, norm) + 1j * \
dctn(y.imag, type, axis, norm)
else: else:
y = np.asfarray(y) y = np.asfarray(y)
for dim in range(ndim): for dim in range(ndim):
@ -215,6 +223,7 @@ def dctn(x, type=2, axis=None, norm='ortho'): #@ReservedAssignment
y = _dct(y, type, norm=norm) y = _dct(y, type, norm=norm)
return y.reshape(shape0) return y.reshape(shape0)
def idctn(x, type=2, axis=None, norm='ortho'): # @ReservedAssignment def idctn(x, type=2, axis=None, norm='ortho'): # @ReservedAssignment
y = np.atleast_1d(x) y = np.atleast_1d(x)
shape0 = y.shape shape0 = y.shape
@ -236,7 +245,8 @@ def idctn(x, type=2, axis=None, norm='ortho'): #@ReservedAssignment
return y return y
if np.iscomplex(y).any(): if np.iscomplex(y).any():
y = idctn(y.real, type, axis, norm) + 1j*idctn(y.imag, type, axis, norm) y = idctn(y.real, type, axis, norm) + 1j * \
idctn(y.imag, type, axis, norm)
else: else:
y = np.asfarray(y) y = np.asfarray(y)
for dim in range(ndim): for dim in range(ndim):
@ -345,7 +355,8 @@ def idctn(x, type=2, axis=None, norm='ortho'): #@ReservedAssignment
# y[..., ::2] = yp[..., :n / 2] # y[..., ::2] = yp[..., :n / 2]
# y[..., ::-2] = yp[..., n / 2::] # y[..., ::-2] = yp[..., n / 2::]
# else: # else:
# yp = ifft(np.hstack((xx, np.zeros_like(xx[..., 0]), np.conj(xx[..., :0:-1])))) # yp = ifft(np.hstack((xx, np.zeros_like(xx[..., 0]),
# np.conj(xx[..., :0:-1]))))
# y = yp[..., :n] # y = yp[..., :n]
# #
# if real_x: # if real_x:
@ -403,16 +414,16 @@ def idctn(x, type=2, axis=None, norm='ortho'): #@ReservedAssignment
# if dimy==1: # if dimy==1:
# y = np.atleast_2d(y) # y = np.atleast_2d(y)
# y = y.T # y = y.T
# # Some modifications are required if Y is a vector # Some modifications are required if Y is a vector
## if isvector(y): # if isvector(y):
## if y.shape[0]==1: # if y.shape[0]==1:
## if axis==0: # if axis==0:
## return y, None # return y, None
## elif axis==1: # elif axis==1:
## axis=0 # axis=0
## y = y.T ## y = y.T
## elif axis==1: # elif axis==1:
## return y, None # return y, None
# #
# if w is None: # if w is None:
# w = [0,] * dimy # w = [0,] * dimy
@ -420,17 +431,17 @@ def idctn(x, type=2, axis=None, norm='ortho'): #@ReservedAssignment
# if axis is not None and dim!=axis: # if axis is not None and dim!=axis:
# continue # continue
# n = (dimy==1)*y.size + (dimy>1)*shape0[dim] # n = (dimy==1)*y.size + (dimy>1)*shape0[dim]
# #w{dim} = exp(1i*(0:n-1)'*pi/2/n); # w{dim} = exp(1i*(0:n-1)'*pi/2/n);
# w[dim] = np.exp(1j * np.arange(n) * np.pi / (2 * n)) # w[dim] = np.exp(1j * np.arange(n) * np.pi / (2 * n))
# #
# # --- DCT algorithm --- # --- DCT algorithm ---
# if np.iscomplex(y).any(): # if np.iscomplex(y).any():
# y = dctn(np.real(y),axis,w) + 1j*dctn(np.imag(y),axis,w) # y = dctn(np.real(y),axis,w) + 1j*dctn(np.imag(y),axis,w)
# else: # else:
# for dim in range(dimy): # for dim in range(dimy):
# y = shiftdim(y,1) # y = shiftdim(y,1)
# if axis is not None and dim!=axis: # if axis is not None and dim!=axis:
# #y = shiftdim(y, 1) # y = shiftdim(y, 1)
# continue # continue
# siz = y.shape # siz = y.shape
# n = siz[-1] # n = siz[-1]
@ -441,8 +452,8 @@ def idctn(x, type=2, axis=None, norm='ortho'): #@ReservedAssignment
# y[:,0] = y[:,0]/np.sqrt(2) # y[:,0] = y[:,0]/np.sqrt(2)
# y = y.reshape(siz) # y = y.reshape(siz)
# #
# #end # end
# #end # end
# #
# return y.reshape(shape0), w # return y.reshape(shape0), w
# #
@ -510,16 +521,16 @@ def idctn(x, type=2, axis=None, norm='ortho'): #@ReservedAssignment
# if dimy==1: # if dimy==1:
# y = np.atleast_2d(y) # y = np.atleast_2d(y)
# y = y.T # y = y.T
# # Some modifications are required if Y is a vector # Some modifications are required if Y is a vector
## if isvector(y): # if isvector(y):
## if y.shape[0]==1: # if y.shape[0]==1:
## if axis==0: # if axis==0:
## return y, None # return y, None
## elif axis==1: # elif axis==1:
## axis=0 # axis=0
## y = y.T ## y = y.T
## elif axis==1: # elif axis==1:
## return y, None # return y, None
## ##
# #
# #
@ -529,16 +540,16 @@ def idctn(x, type=2, axis=None, norm='ortho'): #@ReservedAssignment
# if axis is not None and dim!=axis: # if axis is not None and dim!=axis:
# continue # continue
# n = (dimy==1)*y.size + (dimy>1)*shape0[dim] # n = (dimy==1)*y.size + (dimy>1)*shape0[dim]
# #w{dim} = exp(1i*(0:n-1)'*pi/2/n); # w{dim} = exp(1i*(0:n-1)'*pi/2/n);
# w[dim] = np.exp(1j * np.arange(n) * np.pi / (2 * n)) # w[dim] = np.exp(1j * np.arange(n) * np.pi / (2 * n))
# # --- IDCT algorithm --- # --- IDCT algorithm ---
# if np.iscomplex(y).any(): # if np.iscomplex(y).any():
# y = np.complex(idctn(np.real(y),axis,w),idctn(np.imag(y),axis,w)) # y = np.complex(idctn(np.real(y),axis,w),idctn(np.imag(y),axis,w))
# else: # else:
# for dim in range(dimy): # for dim in range(dimy):
# y = shiftdim(y,1) # y = shiftdim(y,1)
# if axis is not None and dim!=axis: # if axis is not None and dim!=axis:
# #y = shiftdim(y, 1) # y = shiftdim(y, 1)
# continue # continue
# siz = y.shape # siz = y.shape
# n = siz[-1] # n = siz[-1]
@ -560,7 +571,6 @@ def idctn(x, type=2, axis=None, norm='ortho'): #@ReservedAssignment
# return y, w # return y, w
def no_leading_ones(x): def no_leading_ones(x):
first = 0 first = 0
for i, xi in enumerate(x): for i, xi in enumerate(x):
@ -569,6 +579,7 @@ def no_leading_ones(x):
break break
return x[first:] return x[first:]
def shiftdim(x, n=None): def shiftdim(x, n=None):
''' '''
Shift dimensions Shift dimensions
@ -603,6 +614,7 @@ def shiftdim(x, n=None):
else: else:
return x.reshape((1,) * -n + x.shape) return x.reshape((1,) * -n + x.shape)
def test_dctn(): def test_dctn():
a = np.arange(12) # .reshape((3,-1)) a = np.arange(12) # .reshape((3,-1))
print('a = ', a) print('a = ', a)
@ -639,10 +651,11 @@ def test_dctn():
# print(xn1) # print(xn1)
def test_docstrings(): def test_docstrings():
import doctest import doctest
doctest.testmod() print('Testing docstrings in %s' % __file__)
doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
if __name__ == '__main__': if __name__ == '__main__':
test_docstrings() test_docstrings()

@ -20,6 +20,8 @@ or
""" """
def wave_amplitudes(): def wave_amplitudes():
r""" r"""
Wave amplitudes and heights definitions and nomenclature Wave amplitudes and heights definitions and nomenclature
@ -55,6 +57,7 @@ def wave_amplitudes():
""" """
print(wave_amplitudes.__doc__) print(wave_amplitudes.__doc__)
def crossings(): def crossings():
r""" r"""
Level v crossing definitions and nomenclature Level v crossing definitions and nomenclature
@ -99,6 +102,7 @@ def crossings():
""" """
print(crossings.__doc__) print(crossings.__doc__)
def cycle_pairs(): def cycle_pairs():
r""" r"""
Cycle pairs definitions and numenclature Cycle pairs definitions and numenclature
@ -116,6 +120,7 @@ def cycle_pairs():
""" """
print(cycle_pairs.__doc__) print(cycle_pairs.__doc__)
def wave_periods(): def wave_periods():
r""" r"""
Wave periods (lengths) definitions and nomenclature Wave periods (lengths) definitions and nomenclature
@ -203,6 +208,8 @@ def wave_periods():
turning_points turning_points
""" """
print(wave_periods.__doc__) print(wave_periods.__doc__)
def turning_points(): def turning_points():
r""" r"""
Turning points definitions and numenclature Turning points definitions and numenclature
@ -238,6 +245,8 @@ def turning_points():
""" """
print(turning_points.__doc__) print(turning_points.__doc__)
def waves(): def waves():
r""" r"""
Wave definitions and nomenclature Wave definitions and nomenclature

@ -1,4 +1,5 @@
from pylab import subplot, plot, title, savefig, figure, arange, sin, random #@UnresolvedImport # @UnresolvedImport
from pylab import subplot, plot, title, savefig, figure, arange, sin, random
from sg_filter import calc_coeff, smooth from sg_filter import calc_coeff, smooth
@ -36,8 +37,3 @@ title('smoothed derivative of signal')
# show plot # show plot
savefig("savitzky.png") savefig("savitzky.png")

@ -4,10 +4,10 @@ Created on 20. jan. 2011
@author: pab @author: pab
''' '''
import numpy as np import numpy as np
from numpy import exp from numpy import exp, meshgrid
from wafo.misc import meshgrid
__all__ = ['peaks', 'humps', 'magic'] __all__ = ['peaks', 'humps', 'magic']
def magic(n): def magic(n):
''' '''
Return magic square for n of any orders > 2. Return magic square for n of any orders > 2.
@ -55,7 +55,8 @@ def magic(n):
p = n // 2 p = n // 2
M0 = magic(p) M0 = magic(p)
M = np.hstack((np.vstack((M0, M0+3*p*p)),np.vstack((M0+2*p*p, M0+p*p)))) M = np.hstack((np.vstack((M0, M0 + 3 * p * p)),
np.vstack((M0 + 2 * p * p, M0 + p * p))))
if n > 2: if n > 2:
k = (n - 2) // 4 k = (n - 2) // 4
@ -68,9 +69,9 @@ def magic(n):
i = k i = k
j = 0 j = 0
temp = M[i][j]; temp = M[i][j]
M[i][j] = M[i + p][j] M[i][j] = M[i + p][j]
M[i+p][j] = temp; M[i + p][j] = temp
j = i j = i
temp = M[i + p][j] temp = M[i + p][j]
@ -79,6 +80,7 @@ def magic(n):
return M return M
def peaks(x=None, y=None, n=51): def peaks(x=None, y=None, n=51):
''' '''
Return the "well" known MatLab (R) peaks function Return the "well" known MatLab (R) peaks function
@ -105,6 +107,7 @@ def peaks(x=None, y=None, n=51):
return x1, y1, z return x1, y1, z
def humps(x=None): def humps(x=None):
''' '''
Computes a function that has three roots, and some humps. Computes a function that has three roots, and some humps.
@ -122,11 +125,14 @@ def humps(x=None):
else: else:
y = np.asarray(x) y = np.asarray(x)
return 1.0 / ((y - 0.3) ** 2 + 0.01) + 1.0 / ((y - 0.9) ** 2 + 0.04) + 2 * y - 5.2 return 1.0 / ((y - 0.3) ** 2 + 0.01) + 1.0 / ((y - 0.9) ** 2 + 0.04) + \
2 * y - 5.2
def test_docstrings(): def test_docstrings():
import doctest import doctest
doctest.testmod() print('Testing docstrings in %s' % __file__)
doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
if __name__ == '__main__': if __name__ == '__main__':
test_docstrings() test_docstrings()

@ -10,16 +10,18 @@ import warnings
import numpy as np import numpy as np
from wafo.plotbackend import plotbackend from wafo.plotbackend import plotbackend
from matplotlib import mlab from matplotlib import mlab
__all__ = ['cltext', 'epcolor', 'tallibing', 'test_docstrings'] __all__ = ['cltext', 'tallibing', 'test_docstrings']
_TALLIBING_GID = 'TALLIBING' _TALLIBING_GID = 'TALLIBING'
_CLTEXT_GID = 'CLTEXT' _CLTEXT_GID = 'CLTEXT'
def _matchfun(x, gidtxt): def _matchfun(x, gidtxt):
if hasattr(x, 'get_gid'): if hasattr(x, 'get_gid'):
return x.get_gid() == gidtxt return x.get_gid() == gidtxt
return False return False
def delete_text_object(gidtxt, figure=None, axis=None, verbose=False): def delete_text_object(gidtxt, figure=None, axis=None, verbose=False):
''' '''
Delete all text objects matching the gidtxt if it exists Delete all text objects matching the gidtxt if it exists
@ -44,16 +46,20 @@ def delete_text_object(gidtxt, figure=None, axis=None, verbose=False):
axis.texts.remove(obj) axis.texts.remove(obj)
except: except:
if verbose: if verbose:
warnings.warn('Tried to delete a non-existing %s from axis' % gidtxt) warnings.warn(
'Tried to delete a non-existing %s from axis' % gidtxt)
objs = figure.findobj(lmatchfun) objs = figure.findobj(lmatchfun)
for obj in objs: for obj in objs:
try: try:
figure.texts.remove(obj) figure.texts.remove(obj)
except: except:
if verbose: if verbose:
warnings.warn('Tried to delete a non-existing %s from figure' % gidtxt) warnings.warn(
'Tried to delete a non-existing %s from figure' % gidtxt)
def cltext(levels, percent=False, n=4, xs=0.036, ys=0.94, zs=0, figure=None, axis=None):
def cltext(levels, percent=False, n=4, xs=0.036, ys=0.94, zs=0, figure=None,
axis=None):
''' '''
Places contour level text in the current window Places contour level text in the current window
@ -102,7 +108,8 @@ def cltext(levels, percent=False, n=4, xs=0.036, ys=0.94, zs=0, figure=None, axi
>>> h = wg.cltext(h.levels) >>> h = wg.cltext(h.levels)
>>> plt.show() >>> plt.show()
''' '''
# TODO : Make it work like legend does (but without the box): include position options etc... # TODO : Make it work like legend does (but without the box): include
# position options etc...
if figure is None: if figure is None:
figure = plotbackend.gcf() figure = plotbackend.gcf()
if axis is None: if axis is None:
@ -110,7 +117,6 @@ def cltext(levels, percent=False, n=4, xs=0.036, ys=0.94, zs=0, figure=None, axi
clevels = np.atleast_1d(levels) clevels = np.atleast_1d(levels)
axpos = axis.get_position() axpos = axis.get_position()
xint = axpos.intervalx xint = axpos.intervalx
yint = axpos.intervaly yint = axpos.intervaly
@ -125,20 +131,20 @@ def cltext(levels, percent=False, n=4, xs=0.036, ys=0.94, zs=0, figure=None, axi
delta_y = charHeight delta_y = charHeight
if percent: if percent:
titletxt = 'Level curves enclosing:'; titletxt = 'Level curves enclosing:'
else: else:
titletxt = 'Level curves at:'; titletxt = 'Level curves at:'
format_ = '%0.' + ('%d' % n) + 'g\n' format_ = '%0.' + ('%d' % n) + 'g\n'
cltxt = ''.join([format_ % level for level in clevels.tolist()]) cltxt = ''.join([format_ % level for level in clevels.tolist()])
titleProp = dict(gid=_CLTEXT_GID, horizontalalignment='left', titleProp = dict(gid=_CLTEXT_GID, horizontalalignment='left',
verticalalignment='center', fontweight='bold', axes=axis) # verticalalignment='center', fontweight='bold', axes=axis)
ha1 = figure.text(xss, yss, titletxt, **titleProp) ha1 = figure.text(xss, yss, titletxt, **titleProp)
yss -= delta_y; yss -= delta_y
txtProp = dict(gid=_CLTEXT_GID, horizontalalignment='left', txtProp = dict(gid=_CLTEXT_GID, horizontalalignment='left',
verticalalignment='top', axes=axis) verticalalignment='top', axes=axis)
@ -146,18 +152,31 @@ def cltext(levels, percent=False, n=4, xs=0.036, ys=0.94, zs=0, figure=None, axi
plotbackend.draw_if_interactive() plotbackend.draw_if_interactive()
return ha1, ha2 return ha1, ha2
def tallibing(x, y, n, **kwds):
def tallibing(*args, **kwds):
''' '''
TALLIBING Display numbers on field-plot TALLIBING Display numbers on field-plot
CALL h=tallibing(x,y,n,size,color) CALL h=tallibing(x,y,n,size,color)
x,y = position matrices Parameters
n = the corresponding matrix of the values to be written ----------
x, y : array
position matrices
n : array
corresponding matrix of the values to be written
(non-integers are rounded) (non-integers are rounded)
size = font size (optional) (default=8) mid_points : bool (default True)
color = color of text (optional) (default='white') data-point-positions are in the middle of bins instead of the corners
h = column-vector of handles to TEXT objects size : int, (default=8)
font size (optional)
color : str, (default='white')
color of text (optional)
Returns
-------
h : list
handles to TEXT objects
TALLIBING writes the numbers in a 2D array as text at the positions TALLIBING writes the numbers in a 2D array as text at the positions
given by the x and y coordinate matrices. given by the x and y coordinate matrices.
@ -169,11 +188,9 @@ def tallibing(x, y, n, **kwds):
>>> import wafo.graphutil as wg >>> import wafo.graphutil as wg
>>> import wafo.demos as wd >>> import wafo.demos as wd
>>> [x,y,z] = wd.peaks(n=20) >>> [x,y,z] = wd.peaks(n=20)
>>> h0 = wg.epcolor(x,y,z) >>> h0 = wg.pcolor(x,y,z)
>>> h1 = wg.tallibing(x,y,z) >>> h1 = wg.tallibing(x,y,z)
pcolor(x,y,z); shading interp;
See also See also
-------- --------
text text
@ -183,65 +200,29 @@ def tallibing(x, y, n, **kwds):
if axis is None: if axis is None:
axis = plotbackend.gca() axis = plotbackend.gca()
x, y, n = np.atleast_1d(x, y, n) x, y, n = _parse_data(*args, **kwds)
if mlab.isvector(x) or mlab.isvector(y): if mlab.isvector(x) or mlab.isvector(y):
x, y = np.meshgrid(x, y) x, y = np.meshgrid(x, y)
x = x.ravel()
y = y.ravel()
n = n.ravel()
n = np.round(n) n = np.round(n)
# delete tallibing object if it exists # delete tallibing object if it exists
delete_text_object(_TALLIBING_GID, axis=axis) delete_text_object(_TALLIBING_GID, axis=axis)
txtProp = dict(gid=_TALLIBING_GID, size=8, color='w', horizontalalignment='center', txtProp = dict(gid=_TALLIBING_GID, size=8, color='w',
horizontalalignment='center',
verticalalignment='center', fontweight='demi', axes=axis) verticalalignment='center', fontweight='demi', axes=axis)
txtProp.update(**kwds) txtProp.update(**kwds)
h = [] h = []
for xi,yi, ni in zip(x,y,n): for xi, yi, ni in zip(x.ravel(), y.ravel(), n.ravel()):
if ni: if ni:
h.append(axis.text(xi, yi, str(ni), **txtProp)) h.append(axis.text(xi, yi, str(ni), **txtProp))
plotbackend.draw_if_interactive() plotbackend.draw_if_interactive()
return h return h
def epcolor(*args, **kwds):
'''
Pseudocolor (checkerboard) plot with mid-bin positioning.
h = epcolor(x,y,data)
[x,y]= the axes corresponding to the data-positions. Vectors or
matrices. If omitted, giving only data-matrix as inargument, the
matrix-indices are used as axes.
data = data-matrix
EPCOLOR make a checkerboard plot where the data-point-positions are in
the middle of the bins instead of in the corners, and the last column
and row of data are used.
Example:
>>> import wafo.demos as wd
>>> import wafo.graphutil as wg
>>> x, y, z = wd.peaks(n=20)
>>> h = wg.epcolor(x,y,z)
See also
--------
pylab.pcolor
'''
axis = kwds.pop('axis',None)
if axis is None:
axis = plotbackend.gca()
midbin = kwds.pop('midbin', True)
if not midbin:
ret = axis.pcolor(*args,**kwds)
plotbackend.draw_if_interactive()
return ret
def _parse_data(*args, **kwds):
nargin = len(args) nargin = len(args)
data = np.atleast_2d(args[-1]).copy() data = np.atleast_2d(args[-1]).copy()
M, N = data.shape M, N = data.shape
@ -255,28 +236,32 @@ def epcolor(*args, **kwds):
if min(y.shape) != 1: if min(y.shape) != 1:
y = y[:, 0] y = y[:, 0]
else: else:
raise ValueError('pcolor takes 3 or 1 inarguments! (x,y,data) or (data)') raise ValueError(
'Requires 3 or 1 in arguments! (x,y,data) or (data)')
if kwds.pop('mid_point', True):
xx = _find_mid_points(x)
yy = _find_mid_points(y)
return xx, yy, data
return x, y, data
xx = _findbins(x) pcolor = plotbackend.pcolor
yy = _findbins(y) pcolormesh = plotbackend.pcolormesh
ret = axis.pcolor(xx, yy, data, **kwds)
plotbackend.draw_if_interactive()
return ret
def _findbins(x): def _find_mid_points(x):
''' Return points half way between all values of X _and_ outside the ''' Return points half way between all values of X and outside the
endpoints. The outer limits have same distance from X's endpoints as endpoints. The outer limits have same distance from X's endpoints as
the limits just inside. the limits just inside.
''' '''
dx = np.diff(x) * 0.5 dx = np.diff(x) * 0.5
dx = np.hstack((dx, dx[-1])) dx = np.hstack((dx, dx[-1]))
return np.hstack((x[0] - dx[0], x + dx)) return x + dx
def test_docstrings(): def test_docstrings():
import doctest import doctest
doctest.testmod() print('Testing docstrings in %s' % __file__)
doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
if __name__ == '__main__': if __name__ == '__main__':
test_docstrings() test_docstrings()

@ -1,9 +1,10 @@
""" """
WAFO WAFO
==== ====
WAFO is a toolbox Python routines for statistical analysis and simulation of random waves and random loads. WAFO is a toolbox Python routines for statistical analysis and simulation of
WAFO is freely redistributable software, see WAFO licence, cf. the GNU General Public License (GPL) and random waves and random loads.
contain tools for: WAFO is freely redistributable software, see WAFO licence, cf. the
GNU General Public License (GPL) and contain tools for:
Fatigue Analysis Fatigue Analysis
---------------- ----------------
@ -22,7 +23,8 @@ Statistics
-Kernel density estimation -Kernel density estimation
-Hidden markov models -Hidden markov models
WAFO consists of several subpackages and classes with short descriptions below. WAFO consists of several subpackages and classes with short descriptions given
below.
Classes: Classes:
TimeSeries - Data analysis of time series. Example: extraction of TimeSeries - Data analysis of time series. Example: extraction of
@ -35,7 +37,6 @@ Statistics
Ex: common spectra implemented, directional spectra, Ex: common spectra implemented, directional spectra,
bandwidth measures, exact distributions for wave characteristics. bandwidth measures, exact distributions for wave characteristics.
CyclePairs - Cycle counting, discretization, and crossings, calculation of CyclePairs - Cycle counting, discretization, and crossings, calculation of
damage. Simulation of discrete Markov chains, switching Markov damage. Simulation of discrete Markov chains, switching Markov
chains, harmonic oscillator. Ex: Rainflow cycles and matrix, chains, harmonic oscillator. Ex: Rainflow cycles and matrix,

@ -1428,8 +1428,8 @@ def qdemo(f, a, b):
formats = ['%4.0f, ', ] + ['%10.10f, ', ] * 6 formats = ['%4.0f, ', ] + ['%10.10f, ', ] * 6
formats[-1] = formats[-1].split(',')[0] formats[-1] = formats[-1].split(',')[0]
data = np.vstack((neval, qt, et, qs, es, qb, eb)).T data = np.vstack((neval, qt, et, qs, es, qb, eb)).T
print(' ftn Trapezoid Simpson''s Boole''s') print(' ftn Trapezoid Simpson''s Boole''s') # @IgnorePep8
print('evals approx error approx error approx error') print('evals approx error approx error approx error') # @IgnorePep8
for k in xrange(kmax): for k in xrange(kmax):
tmp = data[k].tolist() tmp = data[k].tolist()
@ -1437,8 +1437,8 @@ def qdemo(f, a, b):
# display results # display results
data = np.vstack((neval, qc, ec, qc2, ec2, qg, eg)).T data = np.vstack((neval, qc, ec, qc2, ec2, qg, eg)).T
print(' ftn Clenshaw Chebychev Gauss-L') print(' ftn Clenshaw Chebychev Gauss-L') # @IgnorePep8
print('evals approx error approx error approx error') print('evals approx error approx error approx error') # @IgnorePep8
for k in xrange(kmax): for k in xrange(kmax):
tmp = data[k].tolist() tmp = data[k].tolist()
print(''.join(fi % t for fi, t in zip(formats, tmp))) print(''.join(fi % t for fi, t in zip(formats, tmp)))
@ -1447,7 +1447,7 @@ def qdemo(f, a, b):
plt.xlabel('number of function evaluations') plt.xlabel('number of function evaluations')
plt.ylabel('error') plt.ylabel('error')
plt.legend( plt.legend(
('Trapezoid', 'Simpsons', 'Booles', 'Clenshaw', 'Chebychev', 'Gauss-L')) ('Trapezoid', 'Simpsons', 'Booles', 'Clenshaw', 'Chebychev', 'Gauss-L')) # @IgnorePep8
# ec3' # ec3'

@ -12,9 +12,9 @@
from __future__ import division from __future__ import division
import numpy as np import numpy as np
import scipy.signal import scipy.signal
import scipy.special as spec #import scipy.special as spec
import scipy.sparse as sp
import scipy.sparse.linalg # @UnusedImport import scipy.sparse.linalg # @UnusedImport
import scipy.sparse as sparse
from numpy.ma.core import ones, zeros, prod, sin from numpy.ma.core import ones, zeros, prod, sin
from numpy import diff, pi, inf # @UnresolvedImport from numpy import diff, pi, inf # @UnresolvedImport
from numpy.lib.shape_base import vstack from numpy.lib.shape_base import vstack
@ -546,7 +546,7 @@ class SmoothSpline(PPform):
else: else:
dx1 = 1. / dx dx1 = 1. / dx
D = sp.spdiags(var * ones(n), 0, n, n) # The variance D = sparse.spdiags(var * ones(n), 0, n, n) # The variance
u, p = self._compute_u(p, D, dydx, dx, dx1, n) u, p = self._compute_u(p, D, dydx, dx, dx1, n)
dx1.shape = (n - 1, -1) dx1.shape = (n - 1, -1)
@ -590,10 +590,10 @@ class SmoothSpline(PPform):
def _compute_u(self, p, D, dydx, dx, dx1, n): def _compute_u(self, p, D, dydx, dx, dx1, n):
if p is None or p != 0: if p is None or p != 0:
data = [dx[1:n - 1], 2 * (dx[:n - 2] + dx[1:n - 1]), dx[:n - 2]] data = [dx[1:n - 1], 2 * (dx[:n - 2] + dx[1:n - 1]), dx[:n - 2]]
R = sp.spdiags(data, [-1, 0, 1], n - 2, n - 2) R = sparse.spdiags(data, [-1, 0, 1], n - 2, n - 2)
if p is None or p < 1: if p is None or p < 1:
Q = sp.spdiags( Q = sparse.spdiags(
[dx1[:n - 2], -(dx1[:n - 2] + dx1[1:n - 1]), dx1[1:n - 1]], [dx1[:n - 2], -(dx1[:n - 2] + dx1[1:n - 1]), dx1[1:n - 1]],
[0, -1, -2], n, n - 2) [0, -1, -2], n, n - 2)
QDQ = (Q.T * D * Q) QDQ = (Q.T * D * Q)
@ -612,8 +612,8 @@ class SmoothSpline(PPform):
# Make sure it uses symmetric matrix solver # Make sure it uses symmetric matrix solver
ddydx = diff(dydx, axis=0) ddydx = diff(dydx, axis=0)
sp.linalg.use_solver(useUmfpack=True) #sp.linalg.use_solver(useUmfpack=True)
u = 2 * sp.linalg.spsolve((QQ + QQ.T), ddydx) u = 2 * sparse.linalg.spsolve((QQ + QQ.T), ddydx) # @UndefinedVariable
return u.reshape(n - 2, -1), p return u.reshape(n - 2, -1), p
@ -923,7 +923,7 @@ class StinemanInterp(object):
''' '''
def __init__(self, x, y, yp=None, method='parabola', monotone=False): def __init__(self, x, y, yp=None, method='parabola', monotone=False):
if yp is None: if yp is None:
yp = slopes(x, y, method, monotone) yp = slopes(x, y, method, monotone=monotone)
self.x = np.asarray(x, np.float_) self.x = np.asarray(x, np.float_)
self.y = np.asarray(y, np.float_) self.y = np.asarray(y, np.float_)
self.yp = np.asarray(yp, np.float_) self.yp = np.asarray(yp, np.float_)
@ -1058,7 +1058,8 @@ class Pchip(PiecewisePolynomial):
>>> h=plt.xlabel("X") >>> h=plt.xlabel("X")
>>> h=plt.ylabel("Y") >>> h=plt.ylabel("Y")
>>> h=plt.title("Comparing pypchip() vs. Scipy interp1d() vs. non-monotonic CHS") >>> txt = "Comparing pypchip() vs. Scipy interp1d() vs. non-monotonic CHS"
>>> h=plt.title(txt)
>>> legends = ["Data", "pypchip()", "interp1d","CHS", 'SI'] >>> legends = ["Data", "pypchip()", "interp1d","CHS", 'SI']
>>> h=plt.legend(legends, loc="upper left") >>> h=plt.legend(legends, loc="upper left")
>>> plt.show() >>> plt.show()
@ -1210,10 +1211,10 @@ def test_func():
_tck1, _u = interpolate.splprep([t, y], s=0) # @UndefinedVariable _tck1, _u = interpolate.splprep([t, y], s=0) # @UndefinedVariable
tck2 = interpolate.splrep(t, y, s=len(t), task=0) # @UndefinedVariable tck2 = interpolate.splrep(t, y, s=len(t), task=0) # @UndefinedVariable
# interpolate.spl # interpolate.spl
tck = interpolate.splmake(t, y, order=3, kind='smoothest', conds=None) # @UndefinedVariable tck = interpolate.splmake(t, y, order=3, kind='smoothest', conds=None)
self = interpolate.ppform.fromspline(*tck2) # @UndefinedVariable self = interpolate.ppform.fromspline(*tck2) # @UndefinedVariable
plt.plot(t, self(t)) plt.plot(t, self(t))
plt.show() plt.show('hold')
pass pass
@ -1238,12 +1239,13 @@ def test_pp():
def test_docstrings(): def test_docstrings():
import doctest import doctest
doctest.testmod() print('Testing docstrings in %s' % __file__)
doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
if __name__ == '__main__': if __name__ == '__main__':
test_func() #test_func()
# test_doctstrings() # test_doctstrings()
# test_smoothing_spline() # test_smoothing_spline()
#compare_methods() #compare_methods()
#demo_monoticity() demo_monoticity()

@ -21,7 +21,7 @@ from scipy.ndimage.morphology import distance_transform_edt
from numpy import pi, sqrt, atleast_2d, exp, newaxis # @UnresolvedImport from numpy import pi, sqrt, atleast_2d, exp, newaxis # @UnresolvedImport
from wafo.misc import meshgrid, nextpow2, tranproc # , trangood from wafo.misc import meshgrid, nextpow2, tranproc # , trangood
from wafo.wafodata import PlotData from wafo.containers import PlotData
from wafo.dctpack import dct, dctn, idctn from wafo.dctpack import dct, dctn, idctn
from wafo.plotbackend import plotbackend as plt from wafo.plotbackend import plotbackend as plt
try: try:
@ -3984,7 +3984,8 @@ def kreg_demo3(x, y, fun1, hs=None, fun='hisj', plotlog=False):
eerr = np.abs((yiii - fiii)).std() + 0.5 * (df[:-1] * df[1:] < 0).sum() / n eerr = np.abs((yiii - fiii)).std() + 0.5 * (df[:-1] * df[1:] < 0).sum() / n
err = (fiii - fit).std() err = (fiii - fit).std()
f = kreg( f = kreg(
xiii, output='plotobj', title='%s err=%1.3f,eerr=%1.3f, n=%d, hs=%1.3f, hs1=%1.3f, hs2=%1.3f' % xiii, output='plotobj',
title='%s err=%1.3f,eerr=%1.3f, n=%d, hs=%1.3f, hs1=%1.3f, hs2=%1.3f' %
(fun, err, eerr, n, hs, hs1, hs2), plotflag=1) (fun, err, eerr, n, hs, hs1, hs2), plotflag=1)
#yi[yi==0] = 1.0/(c[c!=0].min()+4) #yi[yi==0] = 1.0/(c[c!=0].min()+4)
@ -4051,8 +4052,8 @@ def kreg_demo3(x, y, fun1, hs=None, fun='hisj', plotlog=False):
# Wilson score # Wilson score
den = 1 + (z0 ** 2. / ciii) den = 1 + (z0 ** 2. / ciii)
xc = (pi1 + (z0 ** 2) / (2 * ciii)) / den xc = (pi1 + (z0 ** 2) / (2 * ciii)) / den
halfwidth = ( halfwidth = (z0 * sqrt((pi1 * (1 - pi1) / ciii) +
z0 * sqrt((pi1 * (1 - pi1) / ciii) + (z0 ** 2 / (4 * (ciii ** 2))))) / den (z0 ** 2 / (4 * (ciii ** 2))))) / den
plo = (xc - halfwidth).clip(min=0) # wilson score plo = (xc - halfwidth).clip(min=0) # wilson score
pup = (xc + halfwidth).clip(max=1.0) # wilson score pup = (xc + halfwidth).clip(max=1.0) # wilson score
# pup = (pi + z0*np.sqrt(pi*(1-pi)/ciii)).clip(min=0,max=1) # dont use # pup = (pi + z0*np.sqrt(pi*(1-pi)/ciii)).clip(min=0,max=1) # dont use
@ -4061,14 +4062,18 @@ def kreg_demo3(x, y, fun1, hs=None, fun='hisj', plotlog=False):
#mi = kreg.eval_grid(x) #mi = kreg.eval_grid(x)
#sigma = (stineman_interp(x, xiii, pup)-stineman_interp(x, xiii, plo))/4 #sigma = (stineman_interp(x, xiii, pup)-stineman_interp(x, xiii, plo))/4
#aic = np.abs((y-mi)/sigma).std()+ 0.5*(df[:-1]*df[1:]<0).sum()/n #aic = np.abs((y-mi)/sigma).std()+ 0.5*(df[:-1]*df[1:]<0).sum()/n
#aic = np.abs((yiii-fiii)/(pup-plo)).std()+ 0.5*(df[:-1]*df[1:]<0).sum() + ((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum() #aic = np.abs((yiii-fiii)/(pup-plo)).std() + \
# 0.5*(df[:-1]*df[1:]<0).sum() + \
# ((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum()
k = (df[:-1] * df[1:] < 0).sum() # numpeaks k = (df[:-1] * df[1:] < 0).sum() # numpeaks
sigmai = (pup - plo) sigmai = (pup - plo)
aic = (((yiii - fiii) / sigmai) ** 2).sum() + 2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \ aic = (((yiii - fiii) / sigmai) ** 2).sum() + \
2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \
np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum() np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum()
#aic = (((yiii-fiii)/sigmai)**2).sum()+ 2*k*(k+1)/(ni-k+1) + np.abs((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum() #aic = (((yiii-fiii)/sigmai)**2).sum()+ 2*k*(k+1)/(ni-k+1) + \
# np.abs((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum()
#aic = averr + ((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum() #aic = averr + ((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum()
@ -4140,14 +4145,16 @@ def kreg_demo4(x, y, hs, hopt, alpha=0.05):
yi = np.where(c == 0, 0, c0 / c) yi = np.where(c == 0, 0, c0 / c)
f.children = [PlotData( f.children = [PlotData(
[plo, pup], xiii, plotmethod='fill_between', plot_kwds=dict(alpha=0.2, color='r')), [plo, pup], xiii, plotmethod='fill_between',
plot_kwds=dict(alpha=0.2, color='r')),
PlotData(yi, xi, plotmethod='scatter', plot_kwds=dict(color='r', s=5))] PlotData(yi, xi, plotmethod='scatter', plot_kwds=dict(color='r', s=5))]
yiii = interpolate.interp1d(xi, yi)(xiii) yiii = interpolate.interp1d(xi, yi)(xiii)
df = np.diff(fiii) df = np.diff(fiii)
k = (df[:-1] * df[1:] < 0).sum() # numpeaks k = (df[:-1] * df[1:] < 0).sum() # numpeaks
sigmai = (pup - plo) sigmai = (pup - plo)
aicc = (((yiii - fiii) / sigmai) ** 2).sum() + 2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \ aicc = (((yiii - fiii) / sigmai) ** 2).sum() + \
2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \
np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum() np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum()
f.aicc = aicc f.aicc = aicc
@ -4168,7 +4175,7 @@ def check_kreg_demo3():
for fun in ['hste', ]: for fun in ['hste', ]:
#@UnusedVariable #@UnusedVariable
hsmax, hs1, hs2 = _get_regression_smooting(x, y, fun=fun) hsmax, _hs1, _hs2 = _get_regression_smooting(x, y, fun=fun)
for hi in np.linspace(hsmax * 0.25, hsmax, 9): for hi in np.linspace(hsmax * 0.25, hsmax, 9):
plt.figure(k) plt.figure(k)
k += 1 k += 1
@ -4197,7 +4204,7 @@ def check_kreg_demo4():
hopt = sqrt(hopt1 * hopt2) hopt = sqrt(hopt1 * hopt2)
#hopt = _get_regression_smooting(x,y,fun='hos')[0] #hopt = _get_regression_smooting(x,y,fun='hos')[0]
# , 'hisj', 'hns', 'hstt' @UnusedVariable # , 'hisj', 'hns', 'hstt' @UnusedVariable
for j, fun in enumerate(['hste']): for _j, fun in enumerate(['hste']):
hsmax, _hs1, _hs2 = _get_regression_smooting(x, y, fun=fun) hsmax, _hs1, _hs2 = _get_regression_smooting(x, y, fun=fun)
fmax = kreg_demo4(x, y, hsmax + 0.1, hopt) fmax = kreg_demo4(x, y, hsmax + 0.1, hopt)
@ -4320,10 +4327,12 @@ def empirical_bin_prb(x, y, hopt, color='r'):
else: else:
c0 = np.zeros(xi.shape) c0 = np.zeros(xi.shape)
yi = np.where(c == 0, 0, c0 / c) yi = np.where(c == 0, 0, c0 / c)
return PlotData(yi, xi, plotmethod='scatter', plot_kwds=dict(color=color, s=5)) return PlotData(yi, xi, plotmethod='scatter',
plot_kwds=dict(color=color, s=5))
def smoothed_bin_prb(x, y, hs, hopt, alpha=0.05, color='r', label='', bin_prb=None): def smoothed_bin_prb(x, y, hs, hopt, alpha=0.05, color='r', label='',
bin_prb=None):
''' '''
Parameters Parameters
---------- ----------
@ -4379,14 +4388,16 @@ def smoothed_bin_prb(x, y, hs, hopt, alpha=0.05, color='r', label='', bin_prb=No
if label: if label:
f.plot_kwds['label'] = label f.plot_kwds['label'] = label
f.children = [PlotData( f.children = [PlotData(
[plo, pup], xiii, plotmethod='fill_between', plot_kwds=dict(alpha=0.2, color=color)), [plo, pup], xiii, plotmethod='fill_between',
plot_kwds=dict(alpha=0.2, color=color)),
bin_prb] bin_prb]
yiii = interpolate.interp1d(xi, yi)(xiii) yiii = interpolate.interp1d(xi, yi)(xiii)
df = np.diff(fiii) df = np.diff(fiii)
k = (df[:-1] * df[1:] < 0).sum() # numpeaks k = (df[:-1] * df[1:] < 0).sum() # numpeaks
sigmai = (pup - plo) sigmai = (pup - plo)
aicc = (((yiii - fiii) / sigmai) ** 2).sum() + 2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \ aicc = (((yiii - fiii) / sigmai) ** 2).sum() + \
2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \
np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum() np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum()
f.aicc = aicc f.aicc = aicc
@ -4408,17 +4419,15 @@ def regressionbin(x, y, alpha=0.05, color='r', label=''):
y : arraylike y : arraylike
of 0 and 1 of 0 and 1
''' '''
# @UnusedVariable
hopt1, h1, h2 = _get_regression_smooting(x, y, fun='hos') hopt1, _h1, _h2 = _get_regression_smooting(x, y, fun='hos')
# @UnusedVariable hopt2, _h1, _h2 = _get_regression_smooting(x, y, fun='hste')
hopt2, h1, h2 = _get_regression_smooting(x, y, fun='hste')
hopt = sqrt(hopt1 * hopt2) hopt = sqrt(hopt1 * hopt2)
fbest = smoothed_bin_prb(x, y, hopt2 + 0.1, hopt, alpha, color, label) fbest = smoothed_bin_prb(x, y, hopt2 + 0.1, hopt, alpha, color, label)
bin_prb = fbest.children[-1] bin_prb = fbest.children[-1]
for fun in ['hste']: # , 'hisj', 'hns', 'hstt' for fun in ['hste']: # , 'hisj', 'hns', 'hstt'
#@UnusedVariable hsmax, _hs1, _hs2 = _get_regression_smooting(x, y, fun=fun)
hsmax, hs1, hs2 = _get_regression_smooting(x, y, fun=fun)
for hi in np.linspace(hsmax * 0.1, hsmax, 55): for hi in np.linspace(hsmax * 0.1, hsmax, 55):
f = smoothed_bin_prb(x, y, hi, hopt, alpha, color, label, bin_prb) f = smoothed_bin_prb(x, y, hi, hopt, alpha, color, label, bin_prb)
if f.aicc <= fbest.aicc: if f.aicc <= fbest.aicc:
@ -4479,8 +4488,8 @@ def kde_gauss_demo(n=50):
print(fmax / f2.data.max()) print(fmax / f2.data.max())
format_ = ''.join(('%g, ') * d) format_ = ''.join(('%g, ') * d)
format_ = 'hs0=%s hs1=%s hs2=%s' % (format_, format_, format_) format_ = 'hs0=%s hs1=%s hs2=%s' % (format_, format_, format_)
print( print(format_ % tuple(kde0.hs.tolist() +
format_ % tuple(kde0.hs.tolist() + kde1.tkde.hs.tolist() + kde2.hs.tolist())) kde1.tkde.hs.tolist() + kde2.hs.tolist()))
print('inc0 = %d, inc1 = %d, inc2 = %d' % (kde0.inc, kde1.inc, kde2.inc)) print('inc0 = %d, inc1 = %d, inc2 = %d' % (kde0.inc, kde1.inc, kde2.inc))

@ -1,136 +0,0 @@
import numpy as np
def meshgrid(*xi, **kwargs):
"""
Return coordinate matrices from one or more coordinate vectors.
Make N-D coordinate arrays for vectorized evaluations of
N-D scalar/vector fields over N-D grids, given
one-dimensional coordinate arrays x1, x2,..., xn.
Parameters
----------
x1, x2,..., xn : array_like
1-D arrays representing the coordinates of a grid.
indexing : 'xy' or 'ij' (optional)
cartesian ('xy', default) or matrix ('ij') indexing of output
sparse : True or False (default) (optional)
If True a sparse grid is returned in order to conserve memory.
copy : True (default) or False (optional)
If False a view into the original arrays are returned in order to
conserve memory. Please note that sparse=False, copy=False will likely
return non-contiguous arrays. Furthermore, more than one element of a
broadcasted array may refer to a single memory location. If you
need to write to the arrays, make copies first.
Returns
-------
X1, X2,..., XN : ndarray
For vectors `x1`, `x2`,..., 'xn' with lengths ``Ni=len(xi)`` ,
return ``(N1, N2, N3,...Nn)`` shaped arrays if indexing='ij'
or ``(N2, N1, N3,...Nn)`` shaped arrays if indexing='xy'
with the elements of `xi` repeated to fill the matrix along
the first dimension for `x1`, the second for `x2` and so on.
Notes
-----
This function supports both indexing conventions through the indexing
keyword argument. Giving the string 'ij' returns a meshgrid with matrix
indexing, while 'xy' returns a meshgrid with Cartesian indexing. The
difference is illustrated by the following code snippet:
xv, yv = meshgrid(x, y, sparse=False, indexing='ij')
for i in range(nx):
for j in range(ny):
# treat xv[i,j], yv[i,j]
xv, yv = meshgrid(x, y, sparse=False, indexing='xy')
for i in range(nx):
for j in range(ny):
# treat xv[j,i], yv[j,i]
See Also
--------
index_tricks.mgrid : Construct a multi-dimensional "meshgrid"
using indexing notation.
index_tricks.ogrid : Construct an open multi-dimensional "meshgrid"
using indexing notation.
Examples
--------
>>> nx, ny = (3, 2)
>>> x = np.linspace(0, 1, nx)
>>> y = np.linspace(0, 1, ny)
>>> xv, yv = meshgrid(x, y)
>>> xv
array([[ 0. , 0.5, 1. ],
[ 0. , 0.5, 1. ]])
>>> yv
array([[ 0., 0., 0.],
[ 1., 1., 1.]])
>>> xv, yv = meshgrid(x, y, sparse=True) # make sparse output arrays
>>> xv
array([[ 0. , 0.5, 1. ]])
>>> yv
array([[ 0.],
[ 1.]])
`meshgrid` is very useful to evaluate functions on a grid.
>>> x = np.arange(-5, 5, 0.1)
>>> y = np.arange(-5, 5, 0.1)
>>> xx, yy = meshgrid(x, y, sparse=True)
>>> z = np.sin(xx**2+yy**2)/(xx**2+yy**2)
>>> import matplotlib.pyplot as plt
>>> h = plt.contourf(x,y,z)
"""
copy_ = kwargs.get('copy', True)
args = np.atleast_1d(*xi)
ndim = len(args)
if not isinstance(args, list) or ndim < 2:
raise TypeError(
'meshgrid() takes 2 or more arguments (%d given)' % int(ndim > 0))
sparse = kwargs.get('sparse', False)
indexing = kwargs.get('indexing', 'xy')
s0 = (1,) * ndim
output = [x.reshape(s0[:i] + (-1,) + s0[i + 1::])
for i, x in enumerate(args)]
shape = [x.size for x in output]
if indexing == 'xy':
# switch first and second axis
output[0].shape = (1, -1) + (1,) * (ndim - 2)
output[1].shape = (-1, 1) + (1,) * (ndim - 2)
shape[0], shape[1] = shape[1], shape[0]
if sparse:
if copy_:
return [x.copy() for x in output]
else:
return output
else:
# Return the full N-D matrix (not only the 1-D vector)
if copy_:
mult_fact = np.ones(shape, dtype=int)
return [x * mult_fact for x in output]
else:
return np.broadcast_arrays(*output)
def ndgrid(*args, **kwargs):
"""
Same as calling meshgrid with indexing='ij' (see meshgrid for
documentation).
"""
kwargs['indexing'] = 'ij'
return meshgrid(*args, **kwargs)
if __name__ == '__main__':
import doctest
doctest.testmod()

File diff suppressed because it is too large Load Diff

@ -2,6 +2,7 @@ from operator import itemgetter as _itemgetter
from keyword import iskeyword as _iskeyword from keyword import iskeyword as _iskeyword
import sys as _sys import sys as _sys
def namedtuple(typename, field_names, verbose=False): def namedtuple(typename, field_names, verbose=False):
"""Returns a new subclass of tuple with named fields. """Returns a new subclass of tuple with named fields.
@ -27,30 +28,39 @@ def namedtuple(typename, field_names, verbose=False):
""" """
# Parse and validate the field names. Validation serves two purposes, # Parse and validate the field names. Validation serves two purposes,
# generating informative error messages and preventing template injection attacks. # generating informative error messages and preventing template injection
# attacks.
if isinstance(field_names, basestring): if isinstance(field_names, basestring):
field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas # names separated by whitespace and/or commas
field_names = field_names.replace(',', ' ').split()
field_names = tuple(field_names) field_names = tuple(field_names)
for name in (typename,) + field_names: for name in (typename,) + field_names:
if not min(c.isalnum() or c == '_' for c in name): if not min(c.isalnum() or c == '_' for c in name):
raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name) raise ValueError(
'Type names and field names can only contain alphanumeric ' +
'characters and underscores: %r' % name)
if _iskeyword(name): if _iskeyword(name):
raise ValueError('Type names and field names cannot be a keyword: %r' % name) raise ValueError(
'Type names and field names cannot be a keyword: %r' % name)
if name[0].isdigit(): if name[0].isdigit():
raise ValueError('Type names and field names cannot start with a number: %r' % name) raise ValueError('Type names and field names cannot start ' +
'with a number: %r' % name)
seen_names = set() seen_names = set()
for name in field_names: for name in field_names:
if name.startswith('_'): if name.startswith('_'):
raise ValueError('Field names cannot start with an underscore: %r' % name) raise ValueError(
'Field names cannot start with an underscore: %r' % name)
if name in seen_names: if name in seen_names:
raise ValueError('Encountered duplicate field name: %r' % name) raise ValueError('Encountered duplicate field name: %r' % name)
seen_names.add(name) seen_names.add(name)
# Create and fill-in the class template # Create and fill-in the class template
numfields = len(field_names) numfields = len(field_names)
argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes # tuple repr without parens or quotes
argtxt = repr(field_names).replace("'", "")[1:-1]
reprtxt = ', '.join('%s=%%r' % name for name in field_names) reprtxt = ', '.join('%s=%%r' % name for name in field_names)
dicttxt = ', '.join('%r: t[%d]' % (name, pos) for pos, name in enumerate(field_names)) dicttxt = ', '.join('%r: t[%d]' % (name, pos)
for pos, name in enumerate(field_names))
template = '''class %(typename)s(tuple): template = '''class %(typename)s(tuple):
'%(typename)s(%(argtxt)s)' \n '%(typename)s(%(argtxt)s)' \n
__slots__ = () \n __slots__ = () \n
@ -88,19 +98,15 @@ def namedtuple(typename, field_names, verbose=False):
raise SyntaxError(e.message + ':\n' + template) raise SyntaxError(e.message + ':\n' + template)
result = namespace[typename] result = namespace[typename]
# For pickling to work, the __module__ variable needs to be set to the frame # For pickling to work, the __module__ variable needs to be set to the
# where the named tuple is created. Bypass this step in enviroments where # frame where the named tuple is created. Bypass this step in enviroments
# sys._getframe is not defined (Jython for example). # where sys._getframe is not defined (Jython for example).
if hasattr(_sys, '_getframe'): if hasattr(_sys, '_getframe'):
result.__module__ = _sys._getframe(1).f_globals['__name__'] result.__module__ = _sys._getframe(1).f_globals['__name__']
return result return result
if __name__ == '__main__': if __name__ == '__main__':
# verify that instances can be pickled # verify that instances can be pickled
from cPickle import loads, dumps from cPickle import loads, dumps
@ -110,18 +116,24 @@ if __name__ == '__main__':
# test and demonstrate ability to override methods # test and demonstrate ability to override methods
class Point(namedtuple('Point', 'x y')): class Point(namedtuple('Point', 'x y')):
@property @property
def hypot(self): def hypot(self):
return (self.x ** 2 + self.y ** 2) ** 0.5 return (self.x ** 2 + self.y ** 2) ** 0.5
def __str__(self): def __str__(self):
return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot) return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y,
self.hypot)
for p in Point(3, 4), Point(14, 5), Point(9. / 7, 6): for p in Point(3, 4), Point(14, 5), Point(9. / 7, 6):
print p print p
class Point(namedtuple('Point', 'x y')): class Point(namedtuple('Point', 'x y')):
'Point class with optimized _make() and _replace() without error-checking' '''Point class with optimized _make() and _replace()
without error-checking
'''
_make = classmethod(tuple.__new__) _make = classmethod(tuple.__new__)
def _replace(self, _map=map, **kwds): def _replace(self, _map=map, **kwds):
return self._make(_map(kwds.get, ('x', 'y'), self)) return self._make(_map(kwds.get, ('x', 'y'), self))

File diff suppressed because it is too large Load Diff

@ -7,7 +7,7 @@ if False:
try: try:
from scitools import easyviz as plotbackend from scitools import easyviz as plotbackend
if verbose: if verbose:
print('wafo.wafodata: plotbackend is set to scitools.easyviz') print('wafo: plotbackend is set to scitools.easyviz')
except: except:
warnings.warn('wafo: Unable to load scitools.easyviz as plotbackend') warnings.warn('wafo: Unable to load scitools.easyviz as plotbackend')
plotbackend = None plotbackend = None
@ -16,7 +16,7 @@ else:
from matplotlib import pyplot as plotbackend from matplotlib import pyplot as plotbackend
plotbackend.interactive(True) plotbackend.interactive(True)
if verbose: if verbose:
print('wafo.wafodata: plotbackend is set to matplotlib.pyplot') print('wafo: plotbackend is set to matplotlib.pyplot')
except: except:
warnings.warn('wafo: Unable to load matplotlib.pyplot as plotbackend') warnings.warn('wafo: Unable to load matplotlib.pyplot as plotbackend')
plotbackend = None plotbackend = None

@ -1,12 +1,13 @@
""" """
Extended functions to operate on polynomials Extended functions to operate on polynomials
""" """
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------
# Name: polynomial # Name: polynomial
# Purpose: Functions to operate on polynomials. # Purpose: Functions to operate on polynomials.
# #
# Author: pab # Author: pab
# polyXXX functions are based on functions found in the matlab toolbox polyutil written by # polyXXX functions are based on functions found in the matlab toolbox polyutil
# written by
# Author: Peter J. Acklam # Author: Peter J. Acklam
# E-mail: pjacklam@online.no # E-mail: pjacklam@online.no
# WWW URL: http://home.online.no/~pjacklam # WWW URL: http://home.online.no/~pjacklam
@ -14,22 +15,23 @@
# Created: 30.12.2008 # Created: 30.12.2008
# Copyright: (c) pab 2008 # Copyright: (c) pab 2008
# Licence: LGPL # Licence: LGPL
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------
#!/usr/bin/env python #!/usr/bin/env python
from plotbackend import plotbackend as plt from plotbackend import plotbackend as plt
import numpy as np import numpy as np
from numpy.fft import fft, ifft from numpy.fft import fft, ifft
from numpy import (zeros, ones, zeros_like, array, asarray, newaxis, arange, #@UnresolvedImport from numpy import (zeros, ones, zeros_like, array, asarray, newaxis, arange,
logical_or, any, pi, cos, round, diff, all, r_, exp, atleast_1d, # hstack,#@UnresolvedImport logical_or, any, pi, cos, round, diff, all, exp,
where, extract, dot, linalg, sign, concatenate, floor, isreal, conj, remainder, #@UnresolvedImport where, extract, linalg, sign, concatenate, floor, isreal,
linspace) #@UnresolvedImport conj, remainder, linspace, atleast_1d, hstack, sum)
from numpy.lib.polynomial import * # @UnusedWildImport from numpy.lib.polynomial import * # @UnusedWildImport
from scipy.misc.common import pade from scipy.misc.common import pade # @UnresolvedImport
__all__ = np.lib.polynomial.__all__ __all__ = np.lib.polynomial.__all__
__all__ = __all__ + ['pade', 'padefit', 'polyreloc', 'polyrescl', 'polytrim', 'poly2hstr', 'poly2str', __all__ = __all__ + ['pade', 'padefit', 'polyreloc', 'polyrescl', 'polytrim',
'polyshift', 'polyishift', 'map_from_intervall', 'map_to_intervall', 'poly2hstr', 'poly2str', 'polyshift', 'polyishift',
'cheb2poly', 'chebextr', 'chebroot', 'chebpoly', 'chebfit', 'chebval', 'map_from_intervall', 'map_to_intervall', 'cheb2poly',
'chebextr', 'chebroot', 'chebpoly', 'chebfit', 'chebval',
'chebder', 'chebint', 'Cheb1d', 'dct', 'idct'] 'chebder', 'chebint', 'Cheb1d', 'dct', 'idct']
@ -86,8 +88,9 @@ def polyint(p, m=1, k=None):
>>> np.polyder(P, 2)(0) >>> np.polyder(P, 2)(0)
0.0 0.0
>>> P = np.polyint(p, 3, k=[6, 5, 3]) >>> P = np.polyint(p, 3, k=[6, 5, 3])
>>> P >>> P.coefficients.tolist()
poly1d([ 0.01666667, 0.04166667, 0.16666667, 3. , 5. , 3. ]) [0.016666666666666666, 0.041666666666666664, 0.16666666666666666, 3.0,
5.0, 3.0]
Note that 3 = 6 / 2!, and that the constants are given in the order of Note that 3 = 6 / 2!, and that the constants are given in the order of
integrations. Constant of the highest-order polynomial term comes first: integrations. Constant of the highest-order polynomial term comes first:
@ -102,15 +105,15 @@ def polyint(p, m=1, k=None):
""" """
m = int(m) m = int(m)
if m < 0: if m < 0:
raise ValueError, "Order of integral must be positive (see polyder)" raise ValueError("Order of integral must be positive (see polyder)")
if k is None: if k is None:
k = zeros(m, float) k = zeros(m, float)
k = atleast_1d(k) k = atleast_1d(k)
if len(k) == 1 and m > 1: if len(k) == 1 and m > 1:
k = k[0] * ones(m, float) k = k[0] * ones(m, float)
if len(k) < m: if len(k) < m:
raise ValueError, \ raise ValueError(
"k must be a scalar or a rank-1 array of length 1 or >m." "k must be a scalar or a rank-1 array of length 1 or >m.")
truepoly = isinstance(p, poly1d) truepoly = isinstance(p, poly1d)
p = asarray(p) p = asarray(p)
if m == 0: if m == 0:
@ -132,6 +135,7 @@ def polyint(p, m=1, k=None):
return poly1d(val) return poly1d(val)
return val return val
def polyder(p, m=1): def polyder(p, m=1):
""" """
Return the derivative of the specified order of a polynomial. Return the derivative of the specified order of a polynomial.
@ -186,7 +190,7 @@ def polyder(p, m=1):
""" """
m = int(m) m = int(m)
if m < 0: if m < 0:
raise ValueError, "Order of derivative must be positive (see polyint)" raise ValueError("Order of derivative must be positive (see polyint)")
truepoly = isinstance(p, poly1d) truepoly = isinstance(p, poly1d)
p = asarray(p) p = asarray(p)
if m == 0: if m == 0:
@ -204,10 +208,13 @@ def polyder(p, m=1):
return poly1d(val) return poly1d(val)
return val return val
def unfinished_polydeg(x,y):
def polydeg(x, y):
''' '''
Return optimal degree for polynomial fitting. Return optimal degree for polynomial fitting
N = POLYDEG(X,Y) finds the optimal degree for polynomial fitting
N = POLYDEG(X,Y) finds the optimal degree for polynomial fitting,
according to the Akaike's information criterion. according to the Akaike's information criterion.
Assuming that you want to find the degree N of a polynomial that fits Assuming that you want to find the degree N of a polynomial that fits
@ -215,8 +222,8 @@ def unfinished_polydeg(x,y):
criterion is defined by: criterion is defined by:
2*(N + 1) + n * (log(2 * pi * RSS / n) + 1) 2*(N + 1) + n * (log(2 * pi * RSS / n) + 1)
where n is the number of points and RSS is the residual sum of squares. where n is the number of points and RSS is the residual sum of squares.
The optimal degree N is defined here as that which minimizes <a The optimal degree N is defined here as that which minimizes AIC:
href="matlab:web('http://en.wikipedia.org/wiki/Akaike_Information_Criterion')">AIC</a>. http://en.wikipedia.org/wiki/Akaike_Information_Criterion
Notes: Notes:
----- -----
@ -226,40 +233,33 @@ def unfinished_polydeg(x,y):
ORTHOFIT is more appropriate than POLYFIT for polynomial fitting with ORTHOFIT is more appropriate than POLYFIT for polynomial fitting with
relatively high degrees. relatively high degrees.
Examples: Example:
-------- -------
load census >>> x = np.linspace(0,10,300)
n = polydeg(cdate,pop) >>> y = np.sin(x ** 3 / 100) ** 2 + 0.05 * np.random.randn(x.size)
>>> n = polydeg(x,y)
>>> n
21
x = linspace(0,10,300);
y = sin(x.^3/100).^2 + 0.05*randn(size(x));
n = polydeg(x,y)
ys = orthofit(x,y,n); ys = orthofit(x,y,n);
plot(x,y,'.',x,ys,'k') plt.plot(x, y, '.', x, ys, 'k')
Damien Garcia, 02/2008, revised 01/2010
See also POLYFIT, ORTHOFIT. See also
--------
polyfit, orthofit
''' '''
x, y = np.atleast_1d(x, y) x, y = np.atleast_1d(x, y)
x = x.ravel()
y = y.ravel()
N = len(x) N = len(x)
# Search the optimal degree minimizing the Akaike's information criterion
## Search the optimal degree minimizing the Akaike's information criterion
# ---
# y(x) are fitted in a least-squares sense using a polynomial of degree n # y(x) are fitted in a least-squares sense using a polynomial of degree n
# developed in a series of orthogonal polynomials. # developed in a series of orthogonal polynomials.
ys = np.ones((N,)) * y.mean()
# correction for small sample sizes
p = y.mean() AIC = 2 + N * \
ys = np.ones((N,))*p (np.log(2 * pi * ((ys - y) ** 2).sum() / N) + 1) + 4 / (N - 2)
AIC = 2+N*(np.log(2*pi*((ys-y)**2).sum()/N)+1)+ 4/(N-2) #correction for small sample sizes
p = zeros((2,2))
p[1,0] = x.mean()
PL = np.ones((2,N))
PL[1] = x-p[1,0]
n = 1 n = 1
nit = 0 nit = 0
@ -269,18 +269,11 @@ def unfinished_polydeg(x,y):
# is a (likely) global minimum. # is a (likely) global minimum.
while nit < 3: while nit < 3:
if n>0: p = orthofit(x, y, n)
p[0,n] = sum(x*PL[:,n]**2)/sum(PL[:,n]**2) ys = orthoval(p, x)
p[1,n] = sum(x*PL[:,n-1]*PL[:,n])/sum(PL[:,n-1]**2)
PL[:,n] = (x-p[0,n+1])*PL[:,n]-p[1,n+1]*PL[:,n-1]
#end
tmp = sum(y*PL)/sum(PL**2)
ys = sum(PL*tmp,axis=-1)
# -- Akaike's Information Criterion # -- Akaike's Information Criterion
aic = 2*(n+1)+N*(np.log(2*pi*sum((ys-y.ravel()**2)/N)+1)) + 2*(n+1)*(n+2)/(N-n-2) aic = 2 * (n + 1) * (1 + (n + 2) / (N - n - 2)) + \
N * (np.log(2 * pi * sum((ys - y) ** 2) / N) + 1)
if aic >= AIC: if aic >= AIC:
nit += 1 nit += 1
@ -293,29 +286,118 @@ def unfinished_polydeg(x,y):
if n >= N: if n >= N:
break break
n = n - nit - 1 n = n - nit - 1
return n return n
def unfinished_orthofit(x,y,n):
def orthoval(p, x):
''' '''
ORTHOFIT Fit polynomial to data. Evaluation of orthogonal polynomial
YS = ORTHOFIT(X,Y,N) smooths/fits data Y(X) in a least-squares sense
using a polynomial of degree N and returns the smoothed data YS.
[YS,YI] = ORTHOFIT(X,Y,N,XI) also returns the values YI of the fitting Parameters
polynomial at the points of a different XI array. ----------
p : array_like
2D array of polynomial coefficients (including coefficients equal
to zero) from highest degree to the constant term.
x : array_like
A number or a 1D array of numbers "at" which to evaluate `p`.
YI = ORTHOFIT(X,Y,N,XI) returns only the values YI of the fitting Returns
polynomial at the points of the XI array. -------
values : ndarray
[YS,P] = ORTHOFIT(X,Y,N) returns the polynomial coefficients P for use See Also
with POLYVAL. --------
orthofit
'''
p = np.atleast_2d(p)
n = p.shape[1] - 1
xi = np.atleast_1d(x)
shape0 = xi.shape
if n == 0:
return np.ones(shape0) * p[0]
xi = xi.ravel()
xn = np.ones((n + 1, len(xi)))
xn[1] = xi - p[1, 1]
for i in range(2, n + 1):
xn[i, :] = (xi - p[1, i]) * xn[i - 1, :] - p[2, i] * xn[i - 2, :]
ys = np.dot(p[0], xn)
return ys.reshape(shape0)
def ortho2poly(p):
"""
Converts orthogonal polynomial to ordinary polynomial coefficients
Parameters
----------
p : array-like
orthogonal polynomial coefficients
Returns
-------
p : ndarray
ordinary polynomial coefficients
It is not advised to do this for p.shape[1]>10 due to numerical
cancellations.
See also
--------
orthoval
orthofit
Examples
--------
>>> import numpy as np
>>> x = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0])
>>> y = np.array([0.0, 0.8, 0.9, 0.1, -0.8, -1.0])
>>> p = orthofit(x, y, 3)
>>> p
array([[ 0. , -0.30285714, -0.16071429, 0.08703704],
[ 0. , 2.5 , 2.5 , 2.5 ],
[ 0. , 0. , 2.91666667, 2.13333333]])
>>> ortho2poly(p)
array([ 0.08703704, -0.81349206, 1.69312169, -0.03968254])
>>> np.polyfit(x, y, 3)
array([ 0.08703704, -0.81349206, 1.69312169, -0.03968254])
References
----------
"""
p = np.atleast_2d(p)
n = p.shape[1] - 1
if n == 0:
return p[0]
x = [1, ] * (n + 1)
x[1] = np.array([1, - p[1, 1]])
for i in range(2, n + 1):
x[i] = polyadd(polymul([1, - p[1, i]], x[i - 1]), - p[2, i] * x[i - 2])
for i in range(n + 1):
x[i] *= p[0, i]
return reduce(polyadd, x)
def orthofit(x, y, n):
'''
Fit orthogonal polynomial to data.
Parameters
---------
x, y : arrays
data Y(X) to fit to a polynomial.
n : integer
degree of fitted polynomial.
Returns
-------
p : array
orthogonal polynomial
Notes: Notes:
----- -----
ORTHOFIT smooths/fits data using a polynomial of degree N developed in Orthofit smooths/fits data using a polynomial of degree N developed in
a sequence of orthogonal polynomials. ORTHOFIT is more appropriate than a sequence of orthogonal polynomials. ORTHOFIT is more appropriate than
POLYFIT for polynomial fitting and smoothing since this method does not polyfit for polynomial fitting and smoothing since this method does not
involve any matrix linear system but a simple recursive procedure. involve any matrix linear system but a simple recursive procedure.
Degrees much higher than 30 could be used with orthogonal polynomials, Degrees much higher than 30 could be used with orthogonal polynomials,
whereas badly conditioned matrices may appear with a classical whereas badly conditioned matrices may appear with a classical
@ -323,73 +405,49 @@ def unfinished_orthofit(x,y,n):
To avoid using unnecessarily high degrees, you may let the function To avoid using unnecessarily high degrees, you may let the function
POLYDEG choose it for you. POLYDEG finds an optimal polynomial degree POLYDEG choose it for you. POLYDEG finds an optimal polynomial degree
according to the Akaike's information criterion (available <a according to the Akaike's information criterion.
href="matlab:web('http://www.biomecardio.com/matlab/polydeg.html')">here</a>).
Example: Example:
------- -------
x = linspace(0,10,300); >>> x = np.linspace(0,10,300);
y = sin(x.^3/100).^2 + 0.05*randn(size(x)); >>> y = np.sin(x**3/100)**2 + 0.05*np.random.randn(x.size)
ys = orthofit(x,y,25); >>> p = orthofit(x, y, 25)
plot(x,y,'.',x,ys,'k') >>> ys = orthoval(p, x)
try POLYFIT for comparison...
Automatic degree determination with <a
href="matlab:web('http://www.biomecardio.com/matlab/polydeg.html')">POLYDEG</a>
n = polydeg(x,y);
ys = orthofit(x,y,n);
plot(x, y,'.',x, ys, 'k') plot(x, y,'.',x, ys, 'k')
Reference: Methodes de calcul numerique 2. JP Nougier. Hermes Science See also
Publications, 2001. Section 4.7 pp 116-121 --------
polydeg, polyfit, polyval
Damien Garcia, 09/2007, revised 01/2010
See also POLYDEG, POLYFIT, POLYVAL. Reference:
---------
Methodes de calcul numerique 2. JP Nougier. Hermes Science
Publications, 2001. Section 4.7 pp 116-121
''' '''
x, y = np.atleast_1d(x, y) x, y = np.atleast_1d(x, y)
# Particular case: n=0
if n==0:
p = y.mean()
ys = np.ones(y.shape)*p
return p, ys
# Reshape
x = x.ravel() x = x.ravel()
# siz0 = y.shape
y = y.ravel() y = y.ravel()
# Particular case: n=0
if n == 0:
return y.mean()
# Coefficients of the orthogonal polynomials # p = Coefficients of the orthogonal polynomials
p = np.zeros((3, n + 1)) p = np.zeros((3, n + 1))
p[1, 1] = x.mean() p[1, 1] = x.mean()
N = len(x) N = len(x)
PL = np.ones((N,n+1)) PL = np.ones((n + 1, N))
PL[:,1] = x-p[1,1] PL[1] = x - p[1, 1]
for i in range(2, n + 1): for i in range(2, n + 1):
p[1,i] = sum(x*PL[:,i-1]**2)/sum(PL[:,i-1]**2) p[1, i] = np.dot(x, PL[i - 1] ** 2) / sum(PL[i - 1] ** 2)
p[2,i] = sum(x*PL[:,i-2]*PL[:,i-1])/sum(PL[:,i-2]**2) p[2, i] = np.dot(x, PL[i - 2] * PL[i - 1]) / sum(PL[i - 2] ** 2)
PL[:,i] = (x-p[1,i])*PL[:,i-1]-p[2,i]*PL[:,i-2] PL[i] = (x - p[1, i]) * PL[i - 1] - p[2, i] * PL[i - 2]
#end p[0, :] = np.dot(PL, y) / sum(PL ** 2, axis=1)
p[0,:] = sum(PL*y)/sum(PL**2);
# ys = smoothed y
#ys = sum(PL*p(0,:) axis=1)
#ys.shape = siz0
# Coefficients of the polynomial in its final form
yi = np.zeros((n+1,n+1))
yi[0,n] = 1
yi[1,n-1:n+1] = 1 -p[1,1]
for i in range(2, n+1):
yi[i,:] = np.hstack((yi[i-1,1:], 0))-p[1,i]*yi[i-1,:]-p[2,i]*yi[i-2,:];
p = sum(p[0,:]*yi, axis=0)
return p return p
# ys = np.dot(p[0, :], PL) # smoothed y
def polyreloc(p, x, y=0.0): def polyreloc(p, x, y=0.0):
""" """
@ -403,8 +461,8 @@ def polyreloc(p, x, y=0.0):
Parameters Parameters
---------- ----------
p : array-like, poly1d p : array-like, poly1d
vector or matrix of column vectors of polynomial coefficients to relocate. vector or matrix of column vectors of polynomial coefficients to
(Polynomial coefficients are in decreasing order.) relocate. (Polynomial coefficients are in decreasing order.)
x : scalar x : scalar
distance to relocate P along x-axis distance to relocate P along x-axis
y : scalar y : scalar
@ -449,6 +507,7 @@ def polyreloc(p, x, y=0.0):
r = poly1d(r) r = poly1d(r)
return r return r
def polyrescl(p, x, y=1.0): def polyrescl(p, x, y=1.0):
""" """
Rescale polynomial. Rescale polynomial.
@ -456,8 +515,8 @@ def polyrescl(p, x, y=1.0):
Parameters Parameters
---------- ----------
p : array-like, poly1d p : array-like, poly1d
vector or matrix of column vectors of polynomial coefficients to rescale. vector or matrix of column vectors of polynomial coefficients to
(Polynomial coefficients are in decreasing order.) rescale. (Polynomial coefficients are in decreasing order.)
x,y : scalars x,y : scalars
defining the factors to rescale the polynomial `p` in defining the factors to rescale the polynomial `p` in
x-direction and y-direction, respectively. x-direction and y-direction, respectively.
@ -499,6 +558,7 @@ def polyrescl(p, x, y=1.0):
q = poly1d(q) q = poly1d(q)
return q return q
def polytrim(p): def polytrim(p):
""" """
Trim polynomial by stripping off leading zeros. Trim polynomial by stripping off leading zeros.
@ -539,6 +599,7 @@ def polytrim(p):
r = r[is_not_lead_zeros, :] r = r[is_not_lead_zeros, :]
return r return r
def poly2hstr(p, variable='x'): def poly2hstr(p, variable='x'):
""" """
Return polynomial as a Horner represented string. Return polynomial as a Horner represented string.
@ -570,7 +631,7 @@ def poly2hstr(p, variable='x'):
coefs = polytrim(atleast_1d(p)) coefs = polytrim(atleast_1d(p))
order = len(coefs) - 1 # Order of polynomial. order = len(coefs) - 1 # Order of polynomial.
s = '' # Initialize output string. s = '' # Initialize output string.
ix = 1; ix = 1
for expon in range(order, -1, -1): for expon in range(order, -1, -1):
coef = coefs[order - expon] coef = coefs[order - expon]
#% There is no point in adding a zero term (except if it's the only #% There is no point in adding a zero term (except if it's the only
@ -581,14 +642,16 @@ def poly2hstr(p, variable='x'):
#% Append exponent if necessary. #% Append exponent if necessary.
if ix > 1: if ix > 1:
exponstr = '%.0f' % ix exponstr = '%.0f' % ix
s = '%s**%s' % (s, exponstr); s = '%s**%s' % (s, exponstr)
ix = 1 ix = 1
#% Is it the first term? #% Is it the first term?
isfirst = s == '' isfirst = s == ''
# We need the coefficient only if it is different from 1 or -1 or # We need the coefficient only if it is different from 1 or -1 or
# when it is the constant term. # when it is the constant term.
needcoef = ((abs(coef) != 1) | (expon == 0) & isfirst) | 1 - isfirst needcoef = (
(abs(coef) != 1) | (
expon == 0) & isfirst) | 1 - isfirst
# We need the variable except in the constant term. # We need the variable except in the constant term.
needvar = (expon != 0) needvar = (expon != 0)
@ -603,7 +666,6 @@ def poly2hstr(p, variable='x'):
else: else:
s = '%s + ' % s # % Binary plus (addition). s = '%s + ' % s # % Binary plus (addition).
#% Append the coefficient if it is different from one or when it is #% Append the coefficient if it is different from one or when it is
#% the constant term. #% the constant term.
if needcoef: if needcoef:
@ -624,6 +686,7 @@ def poly2hstr(p, variable='x'):
s = '0' s = '0'
return s return s
def poly2str(p, variable='x'): def poly2str(p, variable='x'):
""" """
Return polynomial as a string. Return polynomial as a string.
@ -698,6 +761,7 @@ def poly2str(p, variable='x'):
thestr = newstr thestr = newstr
return thestr return thestr
def polyshift(py, a=-1, b=1): def polyshift(py, a=-1, b=1):
""" """
Polynomial coefficient shift Polynomial coefficient shift
@ -739,6 +803,7 @@ def polyshift(py, a= -1, b=1):
L = b - a L = b - a
return polyishift(py, -(2. + b + a) / L, (2. - b - a) / L) return polyishift(py, -(2. + b + a) / L, (2. - b - a) / L)
def polyishift(px, a=-1, b=1): def polyishift(px, a=-1, b=1):
""" """
Inverse polynomial coefficient shift Inverse polynomial coefficient shift
@ -782,14 +847,17 @@ def polyishift(px, a= -1, b=1):
xloc = -float(a + b) / L xloc = -float(a + b) / L
return polyreloc(polyrescl(px, xscale), xloc) return polyreloc(polyrescl(px, xscale), xloc)
def map_from_interval(x, a, b): def map_from_interval(x, a, b):
"""F(x), where F: [a,b] -> [-1,1].""" """F(x), where F: [a,b] -> [-1,1]."""
return (x - (b + a) / 2.0) * (2.0 / (b - a)) return (x - (b + a) / 2.0) * (2.0 / (b - a))
def map_to_interval(x, a, b): def map_to_interval(x, a, b):
"""F(x), where F: [-1,1] -> [a,b].""" """F(x), where F: [-1,1] -> [a,b]."""
return (x * (b - a) + (b + a)) / 2.0 return (x * (b - a) + (b + a)) / 2.0
def poly2cheb(p, a=-1, b=1): def poly2cheb(p, a=-1, b=1):
""" """
Convert polynomial coefficients into Chebyshev coefficients Convert polynomial coefficients into Chebyshev coefficients
@ -842,6 +910,7 @@ def poly2cheb(p, a= -1, b=1):
n = len(f.coeffs) n = len(f.coeffs)
return chebfit(f, n, a, b) return chebfit(f, n, a, b)
def cheb2poly(ck, a=-1, b=1): def cheb2poly(ck, a=-1, b=1):
""" """
Converts Chebyshev coefficients to polynomial coefficients Converts Chebyshev coefficients to polynomial coefficients
@ -885,7 +954,7 @@ def cheb2poly(ck, a= -1, b=1):
b_Nmi = zeros(1) b_Nmi = zeros(1)
b_Nmip1 = zeros(1) b_Nmip1 = zeros(1)
y = r_[2 / (b - a), -(a + b) / (b - a)] y = np.r_[2 / (b - a), -(a + b) / (b - a)]
y2 = 2. * y y2 = 2. * y
# Clenshaw recurence # Clenshaw recurence
@ -903,6 +972,7 @@ def cheb2poly(ck, a= -1, b=1):
p[-nb::] = p[-nb::] - b_Nmip1 p[-nb::] = p[-nb::] - b_Nmip1
return polytrim(p) return polytrim(p)
def chebextr(n): def chebextr(n):
""" """
Return roots of derivative of Chebychev polynomial of the first kind. Return roots of derivative of Chebychev polynomial of the first kind.
@ -933,7 +1003,8 @@ def chebextr(n):
http://en.wikipedia.org/wiki/Chebyshev_nodes http://en.wikipedia.org/wiki/Chebyshev_nodes
http://en.wikipedia.org/wiki/Chebyshev_polynomials http://en.wikipedia.org/wiki/Chebyshev_polynomials
""" """
return - cos((pi * arange(n + 1)) / n); return - cos((pi * arange(n + 1)) / n)
def chebroot(n, kind=1): def chebroot(n, kind=1):
""" """
@ -972,7 +1043,7 @@ def chebroot(n, kind=1):
""" """
if kind not in (1, 2): if kind not in (1, 2):
raise ValueError('kind must be 1 or 2') raise ValueError('kind must be 1 or 2')
return - cos(pi * (arange(n) + 0.5 * kind) / (n + kind - 1)); return - cos(pi * (arange(n) + 0.5 * kind) / (n + kind - 1))
def chebpoly(n, x=None, kind=1): def chebpoly(n, x=None, kind=1):
@ -980,10 +1051,10 @@ def chebpoly(n, x=None, kind=1):
Return Chebyshev polynomial of the first or second kind. Return Chebyshev polynomial of the first or second kind.
These polynomials are orthogonal on the interval [-1,1], with These polynomials are orthogonal on the interval [-1,1], with
respect to the weight function w(x) = (1-x^2)^(-1/2+kind-1). respect to the weight function w(x) = (1-x**2)**(-1/2+kind-1).
chebpoly(n) returns the coefficients of the Chebychev polynomial of degree N. chebpoly(n) returns coefficients of the Chebychev polynomial of degree N.
chebpoly(n,x) returns the Chebychev polynomial of degree N evaluated in X. chebpoly(n,x) returns the Chebychev polynomial of degree N evaluated at X.
Parameters Parameters
---------- ----------
@ -1024,13 +1095,14 @@ def chebpoly(n, x=None, kind=1):
p = ones(1) p = ones(1)
else: else:
p = round(pow(2, n - 2 + kind) * poly(chebroot(n, kind=kind))) p = round(pow(2, n - 2 + kind) * poly(chebroot(n, kind=kind)))
p[1::2] = 0; p[1::2] = 0
return p return p
else: # Evaluate polynomial in chebychev form else: # Evaluate polynomial in chebychev form
ck = zeros(n + 1) ck = zeros(n + 1)
ck[0] = 1. ck[0] = 1.
return _chebval(atleast_1d(x), ck, kind=kind) return _chebval(atleast_1d(x), ck, kind=kind)
def chebfit(fun, n=10, a=-1, b=1, trace=False): def chebfit(fun, n=10, a=-1, b=1, trace=False):
""" """
Computes the Chebyshevs coefficients Computes the Chebyshevs coefficients
@ -1092,7 +1164,7 @@ def chebfit(fun, n=10, a= -1, b=1, trace=False):
if hasattr(fun, '__call__'): if hasattr(fun, '__call__'):
x = map_to_interval(chebroot(n), a, b) x = map_to_interval(chebroot(n), a, b)
f = fun(x); f = fun(x)
if trace: if trace:
plt.plot(x, f, '+') plt.plot(x, f, '+')
else: else:
@ -1108,6 +1180,7 @@ def chebfit(fun, n=10, a= -1, b=1, trace=False):
ck[0] = ck[0] / 2. ck[0] = ck[0] / 2.
return ck[::-1] return ck[::-1]
def dct(x, n=None): def dct(x, n=None):
""" """
Discrete Cosine Transform Discrete Cosine Transform
@ -1158,6 +1231,7 @@ def dct(x, n=None):
else: else:
return y return y
def idct(x, n=None): def idct(x, n=None):
""" """
Inverse Discrete Cosine Transform Inverse Discrete Cosine Transform
@ -1184,7 +1258,6 @@ def idct(x, n=None):
http://users.ece.utexas.edu/~bevans/courses/ee381k/lectures/ http://users.ece.utexas.edu/~bevans/courses/ee381k/lectures/
""" """
x = atleast_1d(x) x = atleast_1d(x)
if n is None: if n is None:
@ -1214,11 +1287,12 @@ def idct(x, n=None):
else: else:
return y return y
def _chebval(x, ck, kind=1): def _chebval(x, ck, kind=1):
""" """
Evaluate polynomial in Chebyshev form. Evaluate polynomial in Chebyshev form.
A polynomial of degree N in Chebyshev form is a polynomial p(x) of the form: A polynomial of degree N in Chebyshev form is a polynomial p(x):
N N
p(x) = sum ck*Tk(x) p(x) = sum ck*Tk(x)
@ -1252,7 +1326,7 @@ def chebval(x, ck, a= -1, b=1, kind=1, fill=None):
""" """
Evaluate polynomial in Chebyshev form at X Evaluate polynomial in Chebyshev form at X
A polynomial of degree N in Chebyshev form is a polynomial p(x) of the form: A polynomial of degree N in Chebyshev form is a polynomial p(x) of the form
N N
p(x) = sum ck*Tk(x) p(x) = sum ck*Tk(x)
@ -1265,7 +1339,8 @@ def chebval(x, ck, a= -1, b=1, kind=1, fill=None):
x : array-like x : array-like
points to evaluate points to evaluate
ck : array-like ck : array-like
polynomial coefficients in Chebyshev form ordered from highest degree to zero polynomial coefficients in Chebyshev form ordered from highest degree
to zero
a,b : real, scalars, optional a,b : real, scalars, optional
limits for polynomial (Default -1,1) limits for polynomial (Default -1,1)
kind: 1 or 2, optional kind: 1 or 2, optional
@ -1362,6 +1437,7 @@ def chebder(ck, a= -1, b=1):
return cder * 2. / (b - a) # Normalize to the interval b-a. return cder * 2. / (b - a) # Normalize to the interval b-a.
def chebint(ck, a=-1, b=1): def chebint(ck, a=-1, b=1):
""" """
Integrate Chebyshev polynomial Integrate Chebyshev polynomial
@ -1410,8 +1486,8 @@ def chebint(ck, a= -1, b=1):
# p(x) = sum cn*Tn(x) # p(x) = sum cn*Tn(x)
# n=0 # n=0
# int p(x) dx = sum cn * int(Tn(x)dx) = 0.5*sum cn *{Tn+1(x)/(n+1) - Tn-1(x)/(n-1)} # int p(x) dx = sum cn * int(Tn(x)dx) =
# = 0.5 sum (cn-1-cn+1)*Tn/n n>0 # 0.5*sum cn *{Tn+1(x)/(n+1) - Tn-1(x)/(n-1)} = 0.5 sum (cn-1-cn+1)*Tn/n n>0
n = len(ck) n = len(ck)
@ -1419,24 +1495,27 @@ def chebint(ck, a= -1, b=1):
con = 0.25 * (b - a) con = 0.25 * (b - a)
dif1 = diff(ck[-1::-2]) dif1 = diff(ck[-1::-2])
ix1 = r_[1:n - 1:2] ix1 = np.r_[1:n - 1:2]
cint[ix1] = -(con * dif1) / ix1 cint[ix1] = -(con * dif1) / ix1
if n > 3: if n > 3:
dif2 = diff(ck[-2::-2]) dif2 = diff(ck[-2::-2])
ix2 = r_[2:n - 1:2] ix2 = np.r_[2:n - 1:2]
cint[ix2] = -(con * dif2) / ix2 cint[ix2] = -(con * dif2) / ix2
cint = cint[::-1] cint = cint[::-1]
#% cint(n) is a special case # cint(n) is a special case
cint[-1] = (con * ck[n - 2]) / (n - 1) cint[-1] = (con * ck[n - 2]) / (n - 1)
cint[0] = 2 * np.sum((-1) ** r_[0:n - 1] * cint[-2::-1]) # Set integration constant # Set integration constant
cint[0] = 2 * np.sum((-1) ** np.r_[0:n - 1] * cint[-2::-1])
return cint return cint
class Cheb1d(object): class Cheb1d(object):
coeffs = None coeffs = None
order = None order = None
a = None a = None
b = None b = None
kind = None kind = None
def __init__(self, ck, a=-1, b=1, kind=1): def __init__(self, ck, a=-1, b=1, kind=1):
if isinstance(ck, Cheb1d): if isinstance(ck, Cheb1d):
for key in ck.__dict__.keys(): for key in ck.__dict__.keys():
@ -1444,14 +1523,13 @@ class Cheb1d(object):
return return
cki = trim_zeros(atleast_1d(ck), 'b') cki = trim_zeros(atleast_1d(ck), 'b')
if len(cki.shape) > 1: if len(cki.shape) > 1:
raise ValueError, "Polynomial must be 1d only." raise ValueError("Polynomial must be 1d only.")
self.__dict__['coeffs'] = cki self.__dict__['coeffs'] = cki
self.__dict__['order'] = len(cki) - 1 self.__dict__['order'] = len(cki) - 1
self.__dict__['a'] = a self.__dict__['a'] = a
self.__dict__['b'] = b self.__dict__['b'] = b
self.__dict__['kind'] = kind self.__dict__['kind'] = kind
def __call__(self, x): def __call__(self, x):
return chebval(x, self.coeffs, self.a, self.b, self.kind) return chebval(x, self.coeffs, self.a, self.b, self.kind)
@ -1471,6 +1549,7 @@ class Cheb1d(object):
def __str__(self): def __str__(self):
pass pass
def __neg__(self): def __neg__(self):
new = Cheb1d(self) new = Cheb1d(self)
new.coeffs = -self.coeffs new.coeffs = -self.coeffs
@ -1479,7 +1558,6 @@ class Cheb1d(object):
def __pos__(self): def __pos__(self):
return self return self
def __add__(self, other): def __add__(self, other):
other = Cheb1d(other) other = Cheb1d(other)
new = Cheb1d(self) new = Cheb1d(self)
@ -1507,10 +1585,11 @@ class Cheb1d(object):
and (self.b == other.b) and (self.kind == other.kind)) and (self.b == other.b) and (self.kind == other.kind))
def __ne__(self, other): def __ne__(self, other):
return any(self.coeffs != other.coeffs) or (self.a != other.a) or (self.b != other.b) or (self.kind != other.kind) return any(self.coeffs != other.coeffs) or (self.a != other.a) or (
self.b != other.b) or (self.kind != other.kind)
def __setattr__(self, key, val): def __setattr__(self, key, val):
raise ValueError, "Attributes cannot be changed this way." raise ValueError("Attributes cannot be changed this way.")
def __getattr__(self, key): def __getattr__(self, key):
if key in ['c', 'coef', 'coefficients']: if key in ['c', 'coef', 'coefficients']:
@ -1527,7 +1606,10 @@ class Cheb1d(object):
try: try:
return self.__dict__[key] return self.__dict__[key]
except KeyError: except KeyError:
raise AttributeError("'%s' has no attribute '%s'" % (self.__class__, key)) raise AttributeError(
"'%s' has no attribute '%s'" %
(self.__class__, key))
def __getitem__(self, val): def __getitem__(self, val):
if val > self.order: if val > self.order:
return 0 return 0
@ -1538,7 +1620,7 @@ class Cheb1d(object):
def __setitem__(self, key, val): def __setitem__(self, key, val):
#ind = self.order - key #ind = self.order - key
if key < 0: if key < 0:
raise ValueError, "Does not support negative powers." raise ValueError("Does not support negative powers.")
if key > self.order: if key > self.order:
zr = zeros(key - self.order, self.coeffs.dtype) zr = zeros(key - self.order, self.coeffs.dtype)
self.__dict__['coeffs'] = concatenate((self.coeffs, zr)) self.__dict__['coeffs'] = concatenate((self.coeffs, zr))
@ -1579,6 +1661,7 @@ class Cheb1d(object):
der.coeffs = chebder(self.coeffs, self.a, self.b) der.coeffs = chebder(self.coeffs, self.a, self.b)
return der return der
def padefit(c, m=None): def padefit(c, m=None):
""" """
Rational polynomial fitting from polynomial coefficients Rational polynomial fitting from polynomial coefficients
@ -1616,7 +1699,7 @@ def padefit(c, m=None):
Pade approximation to exp(x) Pade approximation to exp(x)
>>> import scipy.special as sp >>> import scipy.special as sp
>>> import matplotlib.pyplot as plt >>> import matplotlib.pyplot as plt
>>> c = poly1d(1./sp.gamma(np.r_[6+1:0:-1])) #polynomial coeff exponential function >>> c = poly1d(1./sp.gamma(np.r_[6+1:0:-1]))
>>> [p, q] = padefit(c) >>> [p, q] = padefit(c)
>>> p; q >>> p; q
poly1d([ 0.00277778, 0.03333333, 0.2 , 0.66666667, 1. ]) poly1d([ 0.00277778, 0.03333333, 0.2 , 0.66666667, 1. ])
@ -1636,12 +1719,14 @@ def padefit(c, m=None):
c = asarray(c) c = asarray(c)
return pade(c[::-1], m) return pade(c[::-1], m)
def test_pade(): def test_pade():
cof = array(([1.0, 1.0, 1.0 / 2, 1. / 6, 1. / 24])) cof = array(([1.0, 1.0, 1.0 / 2, 1. / 6, 1. / 24]))
p, q = pade(cof, 2) p, q = pade(cof, 2)
t = arange(0, 2, 0.1) t = arange(0, 2, 0.1)
assert(all(abs(p(t) / q(t) - exp(t)) < 0.3)) assert(all(abs(p(t) / q(t) - exp(t)) < 0.3))
def padefitlsq(fun, m, k, a=-1, b=1, trace=False, x=None, end_points=True): def padefitlsq(fun, m, k, a=-1, b=1, trace=False, x=None, end_points=True):
""" """
Rational polynomial fitting. A minimax solution by least squares. Rational polynomial fitting. A minimax solution by least squares.
@ -1708,7 +1793,8 @@ def padefitlsq(fun, m, k, a= -1, b=1, trace=False, x=None, end_points=True):
smallest_devmax = BIG smallest_devmax = BIG
ncof = m + k + 1 ncof = m + k + 1
npt = NFAC * ncof # % Number of points where function is evaluated, i.e. fineness of mesh # % Number of points where function is evaluated, i.e. fineness of mesh
npt = NFAC * ncof
if x is None: if x is None:
if end_points: if end_points:
@ -1716,18 +1802,20 @@ def padefitlsq(fun, m, k, a= -1, b=1, trace=False, x=None, end_points=True):
# the Chebychev polynomial of the first kind of degree NPT-1. # the Chebychev polynomial of the first kind of degree NPT-1.
x = map_to_interval(chebextr(npt - 1), a, b) x = map_to_interval(chebextr(npt - 1), a, b)
else: else:
# Use the roots of the Chebychev polynomial of the first kind of degree NPT. # Use the roots of the Chebychev polynomial of the first kind of
# Note this is useful if there are singularities close to the endpoints. # degree NPT. Note this is useful if there are singularities close
# to the endpoints.
x = map_to_interval(chebroot(npt, kind=1), a, b) x = map_to_interval(chebroot(npt, kind=1), a, b)
if hasattr(fun, '__call__'): if hasattr(fun, '__call__'):
fs = fun(x) fs = fun(x)
else: else:
fs = fun fs = fun
n = len(fs) n = len(fs)
if n < npt: if n < npt:
warnings.warn('Check the result! Number of function values should be at least: %d' % npt) warnings.warn(
'Check the result! ' +
'Number of function values should be at least: %d' % npt)
if trace: if trace:
plt.plot(x, fs, '+') plt.plot(x, fs, '+')
@ -1738,7 +1826,7 @@ def padefitlsq(fun, m, k, a= -1, b=1, trace=False, x=None, end_points=True):
u = zeros((npt, ncof)) u = zeros((npt, ncof))
for ix in xrange(MAXIT): for ix in xrange(MAXIT):
#% Set up design matrix for least squares fit. # Set up design matrix for least squares fit.
pow1 = wt pow1 = wt
bb = pow1 * (fs + abs(mad) * sign(ee)) bb = pow1 * (fs + abs(mad) * sign(ee))
@ -1751,19 +1839,20 @@ def padefitlsq(fun, m, k, a= -1, b=1, trace=False, x=None, end_points=True):
pow1 = pow1 * x pow1 = pow1 * x
u[:, jx] = pow1 u[:, jx] = pow1
[u1, w, v] = linalg.svd(u, full_matrices=False) [u1, w, v] = linalg.svd(u, full_matrices=False)
cof = where(w == 0, 0.0, dot(bb, u1) / w) cof = where(w == 0, 0.0, np.dot(bb, u1) / w)
cof = dot(cof, v) cof = np.dot(cof, v)
#% Tabulate the deviations and revise the weights # Tabulate the deviations and revise the weights
ee = polyval(cof[m::-1], x) / polyval(cof[ncof:m:-1].tolist() + [1, ], x) - fs ee = polyval(cof[m::-1], x) / \
polyval(cof[ncof:m:-1].tolist() + [1, ], x) - fs
wt = np.abs(ee) wt = np.abs(ee)
devmax = max(wt) devmax = max(wt)
mad = wt.mean() # % mean absolute deviation mad = wt.mean() # % mean absolute deviation
if (devmax <= smallest_devmax): #% Save only the best coefficients found # Save only the best coefficients found
if (devmax <= smallest_devmax):
smallest_devmax = devmax smallest_devmax = devmax
c1 = cof[m::-1] c1 = cof[m::-1]
c2 = cof[ncof:m:-1].tolist() + [1, ] c2 = cof[ncof:m:-1].tolist() + [1, ]
@ -1771,14 +1860,9 @@ def padefitlsq(fun, m, k, a= -1, b=1, trace=False, x=None, end_points=True):
if trace: if trace:
print('Iteration=%d, max error=%g' % (ix, devmax)) print('Iteration=%d, max error=%g' % (ix, devmax))
plt.plot(x, fs, x, ee + fs) plt.plot(x, fs, x, ee + fs)
#c1=c1(:)
#c2=c2(:)
return poly1d(c1), poly1d(c2) return poly1d(c1), poly1d(c2)
def main(): def main():
[c1, c2] = padefitlsq(exp, 3, 3, 0, 2) [c1, c2] = padefitlsq(exp, 3, 3, 0, 2)
@ -1794,45 +1878,72 @@ def main():
_pr = polyreloc(p, 2) _pr = polyreloc(p, 2)
_pd = polyder(p) _pd = polyder(p)
_st = poly2str(p) _st = poly2str(p)
c = poly1d(1. / sp.gamma(np.r_[6 + 1:0:-1])) #polynomial coeff exponential function c = poly1d(
1. /
sp.gamma(
np.r_[
6 +
1:0:-
1])) # polynomial coeff exponential function
[p, q] = padefit(c) [p, q] = padefit(c)
x = linspace(0, 4); x = linspace(0, 4)
plt.plot(x, c(x), x, p(x) / q(x), 'g-', x, exp(x), 'r.') plt.plot(x, c(x), x, p(x) / q(x), 'g-', x, exp(x), 'r.')
plt.close() plt.close()
x = arange(4) x = arange(4)
dx = dct(x) dx = dct(x)
_idx = idct(dx) _idx = idct(dx)
a = 0; a = 0
b = 2; b = 2
ck = chebfit(exp, 6, a, b); ck = chebfit(exp, 6, a, b)
_t = chebval(0, ck, a, b) _t = chebval(0, ck, a, b)
x = linspace(0, 2, 6); x = linspace(0, 2, 6)
plt.plot(x, exp(x), 'r', x, chebval(x, ck, a, b), 'g.') plt.plot(x, exp(x), 'r', x, chebval(x, ck, a, b), 'g.')
# x1 = chebroot(9).'*(b-a)/2+(b+a)/2 ; # x1 = chebroot(9).'*(b-a)/2+(b+a)/2 ;
# ck1 =chebfit([x1 exp(x1)],9,a,b); # ck1 =chebfit([x1 exp(x1)],9,a,b);
# plot(x,exp(x),'r'), hold on # plot(x,exp(x),'r'), hold on
# plot(x,chebval(x,ck1,a,b),'g'), hold off # plot(x,chebval(x,ck1,a,b),'g'), hold off
_t = poly2hstr([1, 1, 2]) _t = poly2hstr([1, 1, 2])
py = [1, 0] py = [1, 0]
px = polyshift(py, 0, 5); px = polyshift(py, 0, 5)
_t1 = polyval(px, [0, 2.5, 5]) # % This is the same as the line below _t1 = polyval(px, [0, 2.5, 5]) # % This is the same as the line below
_t2 = polyval(py, [-1, 0, 1]) _t2 = polyval(py, [-1, 0, 1])
px = [1, 0] px = [1, 0]
py = polyishift(px, 0, 5); py = polyishift(px, 0, 5)
t1 = polyval(px, [0, 2.5, 5]) # % This is the same as the line below t1 = polyval(px, [0, 2.5, 5]) # % This is the same as the line below
t2 = polyval(py, [-1, 0, 1]) t2 = polyval(py, [-1, 0, 1])
print(t1, t2) print(t1, t2)
def test_polydeg():
x = np.linspace(0, 10, 300)
y = np.sin(x ** 3 / 100) ** 2 + 0.05 * np.random.randn(x.size)
n = polydeg(x, y)
#n = 2
p = orthofit(x, y, n)
xi = linspace(x.min(), x.max())
ys0 = orthoval(p, x)
ys = orthoval(p, xi)
ys2 = orthoval(p, xi)
plt.plot(x, y, '.', x, ys0, 'k', xi, ys, 'r', xi, ys2, 'r.')
p0 = ortho2poly(p)
p1 = polyfit(x, ys0, n)
plt.plot(xi, polyval(p0, xi), 'g-.', xi, polyval(p1, xi), 'go')
plt.show('hold')
def test_docstrings(): def test_docstrings():
import doctest import doctest
doctest.testmod() print('Testing docstrings in %s' % __file__)
doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
if __name__ == '__main__': if __name__ == '__main__':
if False: # True: #
main()
else:
test_docstrings() test_docstrings()
# main() #test_polydeg()

@ -11,9 +11,11 @@ Created on 15. des. 2009
#typelib_msppt = None #typelib_msppt = None
# for typelib in EnumTlbs(): # for typelib in EnumTlbs():
# d = typelib.desc.split(' ') # d = typelib.desc.split(' ')
# if d[0] == 'Microsoft' and d[1] == 'Office' and d[3] == 'Object' and d[4] == 'Library': # if d[0] == 'Microsoft' and d[1] == 'Office' and d[3] == 'Object' \
# and d[4] == 'Library':
# typelib_mso = typelib # typelib_mso = typelib
# if d[0] == 'Microsoft' and d[1] == 'PowerPoint' and d[3] == 'Object' and d[4] == 'Library': # if d[0] == 'Microsoft' and d[1] == 'PowerPoint' and d[3] == 'Object' \
# and d[4] == 'Library':
# typelib_msppt = typelib # typelib_msppt = typelib
# if hasattr(sys, 'frozen'): # If we're an .exe file # if hasattr(sys, 'frozen'): # If we're an .exe file
# win32com.__gen_path__ = os.path.dirname(sys.executable) # win32com.__gen_path__ = os.path.dirname(sys.executable)
@ -21,9 +23,12 @@ Created on 15. des. 2009
# if win32com.client.gencache.is_readonly: # if win32com.client.gencache.is_readonly:
# win32com.client.gencache.is_readonly = False # win32com.client.gencache.is_readonly = False
# win32com.client.gencache.Rebuild() # win32com.client.gencache.Rebuild()
#MSPPT = win32com.client.gencache.EnsureModule(typelib_msppt.clsid, typelib_msppt.lcid, # MSPPT = win32com.client.gencache.EnsureModule(typelib_msppt.clsid,
# int(typelib_msppt.major), int(typelib_msppt.minor)) # typelib_msppt.lcid,
#MSO = win32com.client.gencache.EnsureModule(typelib_mso.clsid, typelib_mso.lcid, # int(typelib_msppt.major),
# int(typelib_msppt.minor))
# MSO = win32com.client.gencache.EnsureModule(typelib_mso.clsid,
# typelib_mso.lcid,
# int(typelib_mso.major), int(typelib_mso.minor)) # int(typelib_mso.major), int(typelib_mso.minor))
import os import os
import warnings import warnings
@ -38,7 +43,9 @@ for c in dir(MSO.constants):
for c in dir(MSPPT.constants): for c in dir(MSPPT.constants):
g[c] = getattr(MSPPT.constants, c) g[c] = getattr(MSPPT.constants, c)
class Powerpoint(object): class Powerpoint(object):
def __init__(self, file_name=''): def __init__(self, file_name=''):
self.application = win32com.client.Dispatch("Powerpoint.Application") self.application = win32com.client.Dispatch("Powerpoint.Application")
@ -82,32 +89,38 @@ class Powerpoint(object):
footer = shape footer = shape
break break
else: else:
footer = shapes.AddTextbox(msoTextOrientationHorizontal, Left=0, Top=510, Width=720, Height=28.875) #@UndefinedVariable footer = shapes.AddTextbox(
msoTextOrientationHorizontal, # @UndefinedVariable
Left=0, Top=510, Width=720, Height=28.875)
footer.Name = 'Footer' footer.Name = 'Footer'
footer.TextFrame.TextRange.Text = self.footer footer.TextFrame.TextRange.Text = self.footer
def add_title_slide(self, title, subtitle=''): def add_title_slide(self, title, subtitle=''):
self.num_slides += 1 self.num_slides += 1
slide = self.presentation.Slides.Add(self.num_slides, MSPPT.constants.ppLayoutTitle) slide = self.presentation.Slides.Add(
self.num_slides, MSPPT.constants.ppLayoutTitle)
unused_title_id, unused_textbox_id = 1, 2 unused_title_id, unused_textbox_id = 1, 2
for id_, title1 in enumerate([title, subtitle]): for id_, title1 in enumerate([title, subtitle]):
titlerange = slide.Shapes(id_ + 1).TextFrame.TextRange titlerange = slide.Shapes(id_ + 1).TextFrame.TextRange
titlerange.Text = title1 titlerange.Text = title1
titlerange.Font.Name = self.title_font titlerange.Font.Name = self.title_font
titlerange.Font.Size = self.title_size-id_*12 if self.title_size>22 else self.title_size titlerange.Font.Size = self.title_size - id_ * \
12 if self.title_size > 22 else self.title_size
def add_slide(self, title='', texts='', notes='', image_file='', def add_slide(self, title='', texts='', notes='', image_file='',
maxlevel=None, left=220, width=-1, height=-1): maxlevel=None, left=220, width=-1, height=-1):
self.num_slides += 1 self.num_slides += 1
slide = self.presentation.Slides.Add(self.num_slides, MSPPT.constants.ppLayoutText) slide = self.presentation.Slides.Add(
self.num_slides, MSPPT.constants.ppLayoutText)
self.add2slide(slide, title, texts, notes, image_file, maxlevel, left, width, height) self.add2slide(slide, title, texts, notes, image_file, maxlevel, left,
width, height)
return slide return slide
def add2slide(self, slide, title='', texts='', notes='', image_file='', def add2slide(self, slide, title='', texts='', notes='', image_file='',
maxlevel=None, left=220, width=-1, height=-1, keep_aspect=True): maxlevel=None, left=220, width=-1, height=-1,
keep_aspect=True):
title_id, textbox_id = 1, 2 title_id, textbox_id = 1, 2
if title: if title:
titlerange = slide.Shapes(title_id).TextFrame.TextRange titlerange = slide.Shapes(title_id).TextFrame.TextRange
@ -155,15 +168,19 @@ class Powerpoint(object):
page.Shapes(id).TextFrame.TextRange.Font.Size = self.text_size page.Shapes(id).TextFrame.TextRange.Font.Size = self.text_size
def _add_text_from_dict(self, page, id, txt_dict, level, maxlevel=None): #@ReservedAssignment def _add_text_from_dict(self, page, id, txt_dict, # @ReservedAssignment
level, maxlevel=None):
if maxlevel is None or level <= maxlevel: if maxlevel is None or level <= maxlevel:
for name, subdict in txt_dict.iteritems(): for name, subdict in txt_dict.iteritems():
tr = page.Shapes(id).TextFrame.TextRange.InsertAfter(name) tr = page.Shapes(id).TextFrame.TextRange.InsertAfter(name)
unused_temp = page.Shapes(id).TextFrame.TextRange.InsertAfter('\r') unused_temp = page.Shapes(
id).TextFrame.TextRange.InsertAfter('\r')
tr.IndentLevel = level tr.IndentLevel = level
self._add_text_from_dict(page, id, subdict, min(level+1,5), maxlevel) self._add_text_from_dict(
page, id, subdict, min(level + 1, 5), maxlevel)
def _add_text_from_list(self, page, id, txt_list, maxlevel=None): #@ReservedAssignment def _add_text_from_list(self, page, id, # @ReservedAssignment
txt_list, maxlevel=None):
for txt in txt_list: for txt in txt_list:
level = 1 level = 1
while isinstance(txt, (list, tuple)): while isinstance(txt, (list, tuple)):
@ -171,17 +188,16 @@ class Powerpoint(object):
level += 1 level += 1
if maxlevel is None or level <= maxlevel: if maxlevel is None or level <= maxlevel:
tr = page.Shapes(id).TextFrame.TextRange.InsertAfter(txt) tr = page.Shapes(id).TextFrame.TextRange.InsertAfter(txt)
unused_temp = page.Shapes(id).TextFrame.TextRange.InsertAfter('\r') unused_temp = page.Shapes(
id).TextFrame.TextRange.InsertAfter('\r')
tr.IndentLevel = level tr.IndentLevel = level
def save(self, fullfile=''): def save(self, fullfile=''):
if fullfile: if fullfile:
self.presentation.SaveAs(FileName=fullfile) self.presentation.SaveAs(FileName=fullfile)
else: else:
self.presentation.Save() self.presentation.Save()
def quit(self): # @ReservedAssignment def quit(self): # @ReservedAssignment
if self._visible: if self._visible:
self.presentation.Close() self.presentation.Close()
@ -192,6 +208,7 @@ class Powerpoint(object):
if not self._visible: if not self._visible:
self.application.Quit() self.application.Quit()
def test_powerpoint(): def test_powerpoint():
# Make powerpoint # Make powerpoint
@ -202,33 +219,33 @@ def test_powerpoint():
ppt.add_slide(title='alsfkasldk', texts='asdflaf', notes='asdfas') ppt.add_slide(title='alsfkasldk', texts='asdflaf', notes='asdfas')
ppt.set_footer() ppt.set_footer()
def make_ppt(): def make_ppt():
application = win32com.client.Dispatch("Powerpoint.Application") application = win32com.client.Dispatch("Powerpoint.Application")
application.Visible = True application.Visible = True
presentation = application.Presentations.Add() presentation = application.Presentations.Add()
slide1 = presentation.Slides.Add(1, MSPPT.constants.ppLayoutText) slide1 = presentation.Slides.Add(1, MSPPT.constants.ppLayoutText)
# title = slide1.Shapes.AddTextBox(Type=msoTextOrientationHorizontal,
# title = slide1.Shapes.AddTextBox(Type=msoTextOrientationHorizontal,Left=50, Top=10, Width=620, Height=70) # Left=50, Top=10, Width=620, Height=70)
# title.TextFrame.TextRange.Text = 'Overskrift' # title.TextFrame.TextRange.Text = 'Overskrift'
title_id, textbox_id = 1, 2 title_id, textbox_id = 1, 2
slide1.Shapes(title_id).TextFrame.TextRange.Text = 'Overskrift' slide1.Shapes(title_id).TextFrame.TextRange.Text = 'Overskrift'
#slide1.Shapes(title_id).TextFrame.Width = 190 #slide1.Shapes(title_id).TextFrame.Width = 190
slide1.Shapes(textbox_id).TextFrame.TextRange.InsertAfter('Test') slide1.Shapes(textbox_id).TextFrame.TextRange.InsertAfter('Test')
unused_tr = slide1.Shapes(textbox_id).TextFrame.TextRange.InsertAfter('\r') unused_tr = slide1.Shapes(textbox_id).TextFrame.TextRange.InsertAfter('\r')
slide1.Shapes(textbox_id).TextFrame.TextRange.IndentLevel = 1 slide1.Shapes(textbox_id).TextFrame.TextRange.IndentLevel = 1
tr = slide1.Shapes(textbox_id).TextFrame.TextRange.InsertAfter('tests') tr = slide1.Shapes(textbox_id).TextFrame.TextRange.InsertAfter('tests')
unused_tr0 = slide1.Shapes(textbox_id).TextFrame.TextRange.InsertAfter('\r') unused_tr0 = slide1.Shapes(
textbox_id).TextFrame.TextRange.InsertAfter('\r')
tr.IndentLevel = 2 tr.IndentLevel = 2
tr1 = slide1.Shapes(textbox_id).TextFrame.TextRange.InsertAfter('test3') tr1 = slide1.Shapes(textbox_id).TextFrame.TextRange.InsertAfter('test3')
tr1.IndentLevel = 3 tr1.IndentLevel = 3
#slide1.Shapes(textbox_id).TextFrame.TextRange.Text = 'Test \r test2' #slide1.Shapes(textbox_id).TextFrame.TextRange.Text = 'Test \r test2'
# textbox = slide1.Shapes.AddTextBox(Type=msoTextOrientationHorizontal,Left=30, Top=100, Width=190, Height=400) # textbox = slide1.Shapes.AddTextBox(Type=msoTextOrientationHorizontal,
# Left=30, Top=100, Width=190, Height=400)
# textbox.TextFrame.TextRange.Text = 'Test \r test2' # textbox.TextFrame.TextRange.Text = 'Test \r test2'
#picbox = slide1.Shapes(picb_id) #picbox = slide1.Shapes(picb_id)
@ -240,14 +257,12 @@ def make_ppt():
slide1.NotesPage.Shapes(2).TextFrame.TextRange.Text = 'test' slide1.NotesPage.Shapes(2).TextFrame.TextRange.Text = 'test'
# for shape in slide1.Shapes: # for shape in slide1.Shapes:
# shape.TextFrame.TextRange.Text = 'Test \r test2' # shape.TextFrame.TextRange.Text = 'Test \r test2'
# slide1.Shapes.Titles.TextFrames.TestRange.Text # slide1.Shapes.Titles.TextFrames.TestRange.Text
# shape = slide1.Shapes.AddShape(msoShapeRectangle, 300, 100, 400, 400) # shape = slide1.Shapes.AddShape(msoShapeRectangle, 300, 100, 400, 400)
# shape.TextFrame.TextRange.Text = 'Test \n test2' # shape.TextFrame.TextRange.Text = 'Test \n test2'
# shape.TextFrame.TextRange.Font.Size = 12 # shape.TextFrame.TextRange.Font.Size = 12
# #
# app = wx.PySimpleApp() # app = wx.PySimpleApp()
# dialog = wx.FileDialog(None, 'Choose image file', defaultDir=os.getcwd(), # dialog = wx.FileDialog(None, 'Choose image file', defaultDir=os.getcwd(),
@ -281,6 +296,8 @@ def rename_ppt():
ppt.save(os.path.join(root, ppt.footer)) ppt.save(os.path.join(root, ppt.footer))
except: except:
warnings.warn('Unable to load %s' % filename) warnings.warn('Unable to load %s' % filename)
def load_file_into_ppt(): def load_file_into_ppt():
root = r'C:/pab/tsm_opeval/analysis_tsmps_aco_v2008b/plots' root = r'C:/pab/tsm_opeval/analysis_tsmps_aco_v2008b/plots'
# root = r'C:/pab/tsm_opeval/analysis_tsmps_mag_v2008b/plots' # root = r'C:/pab/tsm_opeval/analysis_tsmps_mag_v2008b/plots'

@ -2,16 +2,19 @@ import numpy as np
#from math import pow #from math import pow
#from numpy import zeros,dot #from numpy import zeros,dot
from numpy import abs, size, convolve, linalg, concatenate # @UnresolvedImport from numpy import abs, size, convolve, linalg, concatenate # @UnresolvedImport
from scipy.sparse import spdiags
from scipy.sparse.linalg import spsolve, expm
from scipy.signal import medfilt
__all__ = ['calc_coeff', 'smooth', 'smooth_last'] __all__ = ['calc_coeff', 'smooth', 'smooth_last',
'SavitzkyGolay', 'Kalman', 'HodrickPrescott']
def calc_coeff(n, degree, diff_order=0): def calc_coeff(n, degree, diff_order=0):
""" calculates filter coefficients for symmetric savitzky-golay filter. """ calculates filter coefficients for symmetric savitzky-golay filter.
see: http://www.nrbook.com/a/bookcpdf/c14-8.pdf see: http://www.nrbook.com/a/bookcpdf/c14-8.pdf
n means that 2*n+1 values contribute to the n means that 2*n+1 values contribute to the smoother.
smoother.
degree is degree of fitting polynomial degree is degree of fitting polynomial
@ -29,6 +32,7 @@ def calc_coeff(n, degree, diff_order=0):
coeff = linalg.pinv(b).A[diff_order] coeff = linalg.pinv(b).A[diff_order]
return coeff return coeff
def smooth_last(signal, coeff, k=0): def smooth_last(signal, coeff, k=0):
n = size(coeff - 1) // 2 n = size(coeff - 1) // 2
y = np.squeeze(signal) y = np.squeeze(signal)
@ -41,12 +45,12 @@ def smooth_last(signal, coeff, k=0):
def smooth(signal, coeff, pad=True): def smooth(signal, coeff, pad=True):
"""applies coefficients calculated by calc_coeff() to signal."""
""" applies coefficients calculated by calc_coeff()
to signal """
n = size(coeff - 1) // 2 n = size(coeff - 1) // 2
y = np.squeeze(signal) y = np.squeeze(signal)
if n == 0:
return y
if pad: if pad:
first_vals = y[0] - abs(y[n:0:-1] - y[0]) first_vals = y[0] - abs(y[n:0:-1] - y[0])
last_vals = y[-1] + abs(y[-2:-n - 2:-1] - y[-1]) last_vals = y[-1] + abs(y[-2:-n - 2:-1] - y[-1])
@ -63,8 +67,9 @@ def smooth(signal, coeff, pad=True):
res = convolve(y, coeff)[n:-n] res = convolve(y, coeff)[n:-n]
return res return res
class SavitzkyGolay(object): class SavitzkyGolay(object):
r"""Smooth (and optionally differentiate) data with a Savitzky-Golay filter. r"""Smooth and optionally differentiate data with a Savitzky-Golay filter.
The Savitzky-Golay filter removes high frequency noise from data. The Savitzky-Golay filter removes high frequency noise from data.
It has the advantage of preserving the original shape and It has the advantage of preserving the original shape and
@ -79,10 +84,10 @@ class SavitzkyGolay(object):
the order of the polynomial used in the filtering. the order of the polynomial used in the filtering.
Must be less than `window_size` - 1, i.e, less than 2*n. Must be less than `window_size` - 1, i.e, less than 2*n.
diff_order : int diff_order : int
the order of the derivative to compute (default = 0 means only smoothing) order of the derivative to compute (default = 0 means only smoothing)
0 means that filter results in smoothing of function 0 means that filter results in smoothing of function
1 means that filter results in smoothing the first derivative of function. 1 means that filter results in smoothing the first derivative of the
and so on ... function and so on ...
Notes Notes
----- -----
@ -96,13 +101,14 @@ class SavitzkyGolay(object):
-------- --------
>>> t = np.linspace(-4, 4, 500) >>> t = np.linspace(-4, 4, 500)
>>> y = np.exp( -t**2 ) + np.random.normal(0, 0.05, t.shape) >>> y = np.exp( -t**2 ) + np.random.normal(0, 0.05, t.shape)
>>> ysg = SavitzkyGolay(n=15, degree=4).smooth(y) >>> ysg = SavitzkyGolay(n=20, degree=2).smooth(y)
>>> import matplotlib.pyplot as plt >>> import matplotlib.pyplot as plt
>>> hy = plt.plot(t, y, label='Noisy signal') >>> h = plt.plot(t, y, label='Noisy signal')
>>> h = plt.plot(t, np.exp(-t**2), 'k', lw=1.5, label='Original signal') >>> h1 = plt.plot(t, np.exp(-t**2), 'k', lw=1.5, label='Original signal')
>>> h = plt.plot(t, ysg, 'r', label='Filtered signal') >>> h2 = plt.plot(t, ysg, 'r', label='Filtered signal')
>>> h = plt.legend() >>> h3 = plt.legend()
>>> plt.show() >>> h4 = plt.title('Savitzky-Golay')
plt.show()
References References
---------- ----------
@ -113,6 +119,7 @@ class SavitzkyGolay(object):
W.H. Press, S.A. Teukolsky, W.T. Vetterling, B.P. Flannery W.H. Press, S.A. Teukolsky, W.T. Vetterling, B.P. Flannery
Cambridge University Press ISBN-13: 9780521880688 Cambridge University Press ISBN-13: 9780521880688
""" """
def __init__(self, n, degree=1, diff_order=0): def __init__(self, n, degree=1, diff_order=0):
self.n = n self.n = n
self.degree = degree self.degree = degree
@ -128,10 +135,13 @@ class SavitzkyGolay(object):
b = np.mat(k_range ** order_range) b = np.mat(k_range ** order_range)
#b =np.mat([[float(k)**i for i in order_range] for k in range(-n,n+1)]) #b =np.mat([[float(k)**i for i in order_range] for k in range(-n,n+1)])
self._coeff = linalg.pinv(b).A[self.diff_order] self._coeff = linalg.pinv(b).A[self.diff_order]
def smooth_last(self, signal, k=0): def smooth_last(self, signal, k=0):
coeff = self._coeff coeff = self._coeff
n = size(coeff - 1) // 2 n = size(coeff - 1) // 2
y = np.squeeze(signal) y = np.squeeze(signal)
if n == 0:
return y
if y.ndim > 1: if y.ndim > 1:
coeff.shape = (-1, 1) coeff.shape = (-1, 1)
first_vals = y[0] - abs(y[n:0:-1] - y[0]) first_vals = y[0] - abs(y[n:0:-1] - y[0])
@ -139,6 +149,8 @@ class SavitzkyGolay(object):
y = concatenate((first_vals, y, last_vals)) y = concatenate((first_vals, y, last_vals))
return (y[-2 * n - 1 - k:-k] * coeff).sum(axis=0) return (y[-2 * n - 1 - k:-k] * coeff).sum(axis=0)
def __call__(self, signal):
return self.smooth(signal)
def smooth(self, signal, pad=True): def smooth(self, signal, pad=True):
""" """
@ -159,6 +171,8 @@ class SavitzkyGolay(object):
coeff = self._coeff coeff = self._coeff
n = size(coeff - 1) // 2 n = size(coeff - 1) // 2
y = np.squeeze(signal) y = np.squeeze(signal)
if n == 0:
return y
if pad: if pad:
first_vals = y[0] - abs(y[n:0:-1] - y[0]) first_vals = y[0] - abs(y[n:0:-1] - y[0])
last_vals = y[-1] + abs(y[-2:-n - 2:-1] - y[-1]) last_vals = y[-1] + abs(y[-2:-n - 2:-1] - y[-1])
@ -175,7 +189,72 @@ class SavitzkyGolay(object):
res = convolve(y, coeff)[n:-n] res = convolve(y, coeff)[n:-n]
return res return res
class HodrickPrescott(object):
'''Smooth data with a Hodrick-Prescott filter.
The Hodrick-Prescott filter removes high frequency noise from data.
It has the advantage of preserving the original shape and
features of the signal better than other types of filtering
approaches, such as moving averages techniques.
Parameter
---------
w : real scalar
smooting parameter. Larger w means more smoothing. Values usually
in the [100, 20000] interval. As w approach infinity H-P will approach
a line.
Examples
--------
>>> t = np.linspace(-4, 4, 500)
>>> y = np.exp( -t**2 ) + np.random.normal(0, 0.05, t.shape)
>>> ysg = HodrickPrescott(w=10000)(y)
>>> import matplotlib.pyplot as plt
>>> h = plt.plot(t, y, label='Noisy signal')
>>> h1 = plt.plot(t, np.exp(-t**2), 'k', lw=1.5, label='Original signal')
>>> h2 = plt.plot(t, ysg, 'r', label='Filtered signal')
>>> h3 = plt.legend()
>>> h4 = plt.title('Hodrick-Prescott')
>>> plt.show()
References
----------
.. [1] E. T. Whittaker, On a new method of graduation. In proceedings of
the Edinburgh Mathematical association., 1923, 78, pp 88-89.
.. [2] R. Hodrick and E. Prescott, Postwar U.S. business cycles: an
empirical investigation,
Journal of money, credit and banking, 1997, 29 (1), pp 1-16.
.. [3] Kim Hyeongwoo, Hodrick-Prescott filter,
2004, www.auburn.edu/~hzk0001/hpfilter.pdf
'''
def __init__(self, w=100):
self.w = w
def _get_matrix(self, n):
w = self.w
diag_matrix = np.repeat(
np.atleast_2d([w, -4 * w, 6 * w + 1, -4 * w, w]).T, n, axis=1)
A = spdiags(diag_matrix, np.arange(-2, 2 + 1), n, n).tocsr()
A[0, 0] = A[-1, -1] = 1 + w
A[1, 1] = A[-2, -2] = 1 + 5 * w
A[0, 1] = A[1, 0] = A[-2, -1] = A[-1, -2] = -2 * w
return A
def __call__(self, x):
x = np.atleast_1d(x).flatten()
n = len(x)
if n < 4:
return x.copy()
A = self._get_matrix(n)
return spsolve(A, x)
class Kalman(object): class Kalman(object):
''' '''
Kalman filter object - updates a system state vector estimate based upon an Kalman filter object - updates a system state vector estimate based upon an
observation, using a discrete Kalman filter. observation, using a discrete Kalman filter.
@ -200,8 +279,8 @@ class Kalman(object):
matrix. matrix.
USAGE: USAGE:
filt = Kalman(R, x, P, A, B=0, u=0, Q, H) filt = Kalman(R, x, P, A, B=0, Q, H)
x = filt(z) x = filt(z, u=0)
filt is a "system" object containing various fields used as input filt is a "system" object containing various fields used as input
and output. The state estimate "x" and its covariance "P" are and output. The state estimate "x" and its covariance "P" are
@ -241,8 +320,8 @@ class Kalman(object):
information from the new observation). In the output struct, information from the new observation). In the output struct,
this is the "a posteriori" state estimate (after the new this is the "a posteriori" state estimate (after the new
measurement information is included). measurement information is included).
s.z = observation vector z = observation vector
s.u = input control vector, optional (defaults to zero). u = input control vector, optional (defaults to zero).
MATRIX VARIABLES: MATRIX VARIABLES:
@ -285,7 +364,7 @@ class Kalman(object):
>>> r = 0.1**2 # variance of measurement error >>> r = 0.1**2 # variance of measurement error
>>> b = 0 # no system input >>> b = 0 # no system input
>>> u = 0 # no system input >>> u = 0 # no system input
>>> filt = Kalman(R=r, A=1, Q=q, H=h, B=b, u=u) >>> filt = Kalman(R=r, A=1, Q=q, H=h, B=b)
# Generate random voltages and watch the filter operate. # Generate random voltages and watch the filter operate.
>>> n = 50 >>> n = 50
@ -294,32 +373,34 @@ class Kalman(object):
>>> x = np.zeros(n) >>> x = np.zeros(n)
>>> for i, zi in enumerate(z): >>> for i, zi in enumerate(z):
... x[i] = filt(zi) # perform a Kalman filter iteration ... x[i] = filt(zi, u) # perform a Kalman filter iteration
>>> import matplotlib.pyplot as plt >>> import matplotlib.pyplot as plt
>>> hz = plt.plot(z,'r.', label='observations') >>> hz = plt.plot(z,'r.', label='observations')
>>> hx = plt.plot(x,'b-', label='Kalman output') # a-posteriori state estimates:
# a-posteriori state estimates:
>>> hx = plt.plot(x,'b-', label='Kalman output')
>>> ht = plt.plot(truth,'g-', label='true voltage') >>> ht = plt.plot(truth,'g-', label='true voltage')
>>> h = plt.legend() >>> h = plt.legend()
>>> h = plt.title('Automobile Voltimeter Example') >>> h1 = plt.title('Automobile Voltimeter Example')
>>> plt.show()
''' '''
def __init__(self, R, x=None, P=None, A=None, B=0, u=0, Q=None, H=None): def __init__(self, R, x=None, P=None, A=None, B=0, Q=None, H=None):
self.R = R self.R = R # Estimated error in measurements.
self.x = x self.x = x # Initial state estimate.
self.P = P self.P = P # Initial covariance estimate.
self.u = u self.A = A # State transition matrix.
self.A = A self.B = B # Control matrix.
self.B = B self.Q = Q # Estimated error in process.
self.Q = Q self.H = H # Observation matrix.
self.H = H
self.reset() self.reset()
def reset(self): def reset(self):
self._filter = self._filter_first self._filter = self._filter_first
def _filter_first(self, z): def _filter_first(self, z, u):
self._filter = self._filter_main self._filter = self._filter_main
@ -329,57 +410,68 @@ class Kalman(object):
else: else:
n = np.size(self.x) n = np.size(self.x)
if self.A is None: if self.A is None:
self.A = np.eye(n, n) self.A = np.eye(n)
self.A = np.atleast_2d(self.A) self.A = np.atleast_2d(self.A)
if self.Q is None: if self.Q is None:
self.Q = np.zeros((n, n)) self.Q = np.zeros((n, n))
self.Q = np.atleast_2d(self.Q) self.Q = np.atleast_2d(self.Q)
if self.H is None: if self.H is None:
self.H = np.eye(n, n) self.H = np.eye(n)
self.H = np.atleast_2d(self.H) self.H = np.atleast_2d(self.H)
# if np.diff(np.shape(self.H)): try:
# raise ValueError('Observation matrix must be square and invertible for state autointialization.')
HI = np.linalg.inv(self.H) HI = np.linalg.inv(self.H)
except:
HI = np.eye(n)
if self.P is None: if self.P is None:
self.P = np.dot(np.dot(HI, self.R), HI.T) self.P = np.dot(np.dot(HI, self.R), HI.T)
self.P = np.atleast_2d(self.P) self.P = np.atleast_2d(self.P)
if auto_init: if auto_init:
# initialize state estimate from first observation # initialize state estimate from first observation
self.x = np.dot(HI, z) self.x = np.dot(HI, z)
return self.x return self.x
else: else:
return self._filter_main(z) return self._filter_main(z, u)
def _predict_state(self, x, u):
return np.dot(self.A, x) + np.dot(self.B, u)
def _filter_main(self, z): def _predict_covariance(self, P):
''' This is the code which implements the discrete Kalman filter:
'''
A = self.A A = self.A
return np.dot(np.dot(A, P), A.T) + self.Q
def _compute_gain(self, P):
"""Kalman gain factor."""
H = self.H H = self.H
P = self.P PHT = np.dot(P, H.T)
innovation_covariance = np.dot(H, PHT) + self.R
#return np.linalg.solve(PHT, innovation_covariance)
return np.dot(PHT, np.linalg.inv(innovation_covariance))
# Prediction for state vector and covariance: def _update_state_from_observation(self, x, z, K):
x = np.dot(A, self.x) + np.dot(self.B, self.u) innovation = z - np.dot(self.H, x)
P = np.dot(np.dot(A, P), A.T) + self.Q return x + np.dot(K, innovation)
# Compute Kalman gain factor: def _update_covariance(self, P, K):
PHT = np.dot(P, H.T) return P - np.dot(K, np.dot(self.H, P))
K = np.dot(PHT, np.linalg.inv(np.dot(H, PHT) + self.R)) return np.dot(np.eye(len(P)) - K * self.H, P)
# Correction based on observation: def _filter_main(self, z, u):
self.x = x + np.dot(K, z - np.dot(H, x)) ''' This is the code which implements the discrete Kalman filter:
self.P = P - np.dot(K, np.dot(H, P)) '''
P = self._predict_covariance(self.P)
x = self._predict_state(self.x, u)
# Note that the desired result, which is an improved estimate K = self._compute_gain(P)
# of the system state vector x and its covariance P, was obtained
# in only five lines of code, once the system was defined. (That's
# how simple the discrete Kalman filter is to use.) Later,
# we'll discuss how to deal with nonlinear systems.
self.P = self._update_covariance(P, K)
self.x = self._update_state_from_observation(x, z, K)
return self.x return self.x
def __call__(self, z):
return self._filter(z) def __call__(self, z, u=0):
return self._filter(z, u)
def test_kalman(): def test_kalman():
V0 = 12 V0 = 12
@ -388,7 +480,7 @@ def test_kalman():
r = 0.05 ** 2 # variance of measurement error r = 0.05 ** 2 # variance of measurement error
b = 0 # no system input b = 0 # no system input
u = 0 # no system input u = 0 # no system input
filt = Kalman(R=r, A=1, Q=q, H=h, B=b, u=u) filt = Kalman(R=r, A=1, Q=q, H=h, B=b)
# Generate random voltages and watch the filter operate. # Generate random voltages and watch the filter operate.
n = 50 n = 50
@ -397,32 +489,450 @@ def test_kalman():
x = np.zeros(n) x = np.zeros(n)
for i, zi in enumerate(z): for i, zi in enumerate(z):
x[i] = filt(zi) # perform a Kalman filter iteration x[i] = filt(zi, u) # perform a Kalman filter iteration
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
_hz = plt.plot(z, 'r.', label='observations') _hz = plt.plot(z, 'r.', label='observations')
_hx = plt.plot(x, 'b-', label='Kalman output') # a-posteriori state estimates: # a-posteriori state estimates:
_hx = plt.plot(x, 'b-', label='Kalman output')
_ht = plt.plot(truth, 'g-', label='true voltage')
plt.legend()
plt.title('Automobile Voltimeter Example')
plt.show('hold')
def lti_disc(F, L=None, Q=None, dt=1):
'''
LTI_DISC Discretize LTI ODE with Gaussian Noise
Syntax:
[A,Q] = lti_disc(F,L,Qc,dt)
In:
F - NxN Feedback matrix
L - NxL Noise effect matrix (optional, default identity)
Qc - LxL Diagonal Spectral Density (optional, default zeros)
dt - Time Step (optional, default 1)
Out:
A - Transition matrix
Q - Discrete Process Covariance
Description:
Discretize LTI ODE with Gaussian Noise. The original
ODE model is in form
dx/dt = F x + L w, w ~ N(0,Qc)
Result of discretization is the model
x[k] = A x[k-1] + q, q ~ N(0,Q)
Which can be used for integrating the model
exactly over time steps, which are multiples
of dt.
'''
n = np.shape(F)[0]
if L is None:
L = np.eye(n)
if Q is None:
Q = np.zeros((n, n))
# Closed form integration of transition matrix
A = expm(F * dt)
# Closed form integration of covariance
# by matrix fraction decomposition
Phi = np.vstack((np.hstack((F, np.dot(np.dot(L, Q), L.T))),
np.hstack((np.zeros((n, n)), -F.T))))
AB = np.dot(expm(Phi * dt), np.vstack((np.zeros((n, n)), np.eye(n))))
#Q = AB[:n, :] / AB[n:(2 * n), :]
Q = np.linalg.solve(AB[n:(2 * n), :].T, AB[:n, :].T)
return A, Q
def test_kalman_sine():
'''Kalman Filter demonstration with sine signal.'''
sd = 1.
dt = 0.1
w = 1
T = np.arange(0, 30 + dt / 2, dt)
n = len(T)
X = np.sin(w * T)
Y = X + sd * np.random.randn(n)
''' Initialize KF to values
x = 0
dx/dt = 0
with great uncertainty in derivative
'''
M = np.zeros((2, 1))
P = np.diag([0.1, 2])
R = sd ** 2
H = np.atleast_2d([1, 0])
q = 0.1
F = np.atleast_2d([[0, 1],
[0, 0]])
A, Q = lti_disc(F, L=None, Q=np.diag([0, q]), dt=dt)
# Track and animate
m = M.shape[0]
MM = np.zeros((m, n))
PP = np.zeros((m, m, n))
'''In this demonstration we estimate a stationary sine signal from noisy
measurements by using the classical Kalman filter.'
'''
filt = Kalman(R=R, x=M, P=P, A=A, Q=Q, H=H, B=0)
# Generate random voltages and watch the filter operate.
#n = 50
#truth = np.random.randn(n) * np.sqrt(q) + V0
#z = truth + np.random.randn(n) * np.sqrt(r) # measurement
truth = X
z = Y
x = np.zeros((n, m))
for i, zi in enumerate(z):
x[i] = filt(zi, u=0).ravel()
import matplotlib.pyplot as plt
_hz = plt.plot(z, 'r.', label='observations')
# a-posteriori state estimates:
_hx = plt.plot(x[:, 0], 'b-', label='Kalman output')
_ht = plt.plot(truth, 'g-', label='true voltage') _ht = plt.plot(truth, 'g-', label='true voltage')
plt.legend() plt.legend()
plt.title('Automobile Voltimeter Example') plt.title('Automobile Voltimeter Example')
plt.show() plt.show()
# for k in range(m):
# [M,P] = kf_predict(M,P,A,Q);
# [M,P] = kf_update(M,P,Y(k),H,R);
#
# MM(:,k) = M;
# PP(:,:,k) = P;
#
# %
# % Animate
# %
# if rem(k,10)==1
# plot(T,X,'b--',...
# T,Y,'ro',...
# T(k),M(1),'k*',...
# T(1:k),MM(1,1:k),'k-');
# legend('Real signal','Measurements','Latest estimate','Filtered estimate')
# title('Estimating a noisy sine signal with Kalman filter.');
# drawnow;
#
# pause;
# end
# end
#
# clc;
# disp('In this demonstration we estimate a stationary sine signal from noisy measurements by using the classical Kalman filter.');
# disp(' ');
# disp('The filtering results are now displayed sequantially for 10 time step at a time.');
# disp(' ');
# disp('<push any key to see the filtered and smoothed results together>')
# pause;
# %
# % Apply Kalman smoother
# %
# SM = rts_smooth(MM,PP,A,Q);
# plot(T,X,'b--',...
# T,MM(1,:),'k-',...
# T,SM(1,:),'r-');
# legend('Real signal','Filtered estimate','Smoothed estimate')
# title('Filtered and smoothed estimate of the original signal');
#
# clc;
# disp('The filtered and smoothed estimates of the signal are now displayed.')
# disp(' ');
# disp('RMS errors:');
# %
# % Errors
# %
# fprintf('KF = %.3f\nRTS = %.3f\n',...
# sqrt(mean((MM(1,:)-X(1,:)).^2)),...
# sqrt(mean((SM(1,:)-X(1,:)).^2)));
class HampelFilter(object):
'''
Hampel Filter.
HAMPEL(X,Y,DX,T,varargin) returns the Hampel filtered values of the
elements in Y. It was developed to detect outliers in a time series,
but it can also be used as an alternative to the standard median
filter.
X,Y are row or column vectors with an equal number of elements.
The elements in Y should be Gaussian distributed.
Parameters
----------
dx : positive scalar (default 3 * median(diff(X))
which defines the half width of the filter window. Dx should be
dimensionally equivalent to the values in X.
t : positive scalar (default 3)
which defines the threshold value used in the equation
|Y - Y0| > T * S0.
adaptive: real scalar
if greater than 0 it uses an experimental adaptive Hampel filter.
If none it uses a standard Hampel filter
fulloutput: bool
if True also the vectors: outliers, Y0,LB,UB,ADX, which corresponds to
the mask of the replaced values, nominal data, lower and upper bounds
on the Hampel filter and the relative half size of the local window,
respectively. outliers.sum() gives the number of outliers detected.
Examples
---------
Hampel filter removal of outliers
>>> import numpy as np
>>> randint = np.random.randint
>>> Y = 5000 + np.random.randn(1000)
>>> outliers = randint(0,1000, size=(10,))
>>> Y[outliers] = Y[outliers] + randint(1000, size=(10,))
>>> YY, res = HampelFilter(fulloutput=True)(Y)
>>> YY1, res1 = HampelFilter(dx=1, t=3, adaptive=0.1, fulloutput=True)(Y)
>>> YY2, res2 = HampelFilter(dx=3, t=0, fulloutput=True)(Y) # Y0 = median
X = np.arange(len(YY))
plt.plot(X, Y, 'b.') # Original Data
plt.plot(X, YY, 'r') # Hampel Filtered Data
plt.plot(X, res['Y0'], 'b--') # Nominal Data
plt.plot(X, res['LB'], 'r--') # Lower Bounds on Hampel Filter
plt.plot(X, res['UB'], 'r--') # Upper Bounds on Hampel Filter
i = res['outliers']
plt.plot(X[i], Y[i], 'ks') # Identified Outliers
plt.show('hold')
References
----------
Chapters 1.4.2, 3.2.2 and 4.3.4 in Mining Imperfect Data: Dealing with
Contamination and Incomplete Records by Ronald K. Pearson.
Acknowledgements
I would like to thank Ronald K. Pearson for the introduction to moving
window filters. Please visit his blog at:
http://exploringdatablog.blogspot.com/2012/01/moving-window-filters-and
-pracma.html
'''
def __init__(self, dx=None, t=3, adaptive=None, fulloutput=False):
self.dx = dx
self.t = t
self.adaptive = adaptive
self.fulloutput = fulloutput
def __call__(self, y, x=None):
Y = np.atleast_1d(y).ravel()
if x is None:
x = range(len(Y))
X = np.atleast_1d(x).ravel()
dx = self.dx
if dx is None:
dx = 3 * np.median(np.diff(X))
if not np.isscalar(dx):
raise ValueError('DX must be a scalar.')
elif dx < 0:
raise ValueError('DX must be larger than zero.')
YY = Y
S0 = np.nan * np.zeros(YY.shape)
Y0 = np.nan * np.zeros(YY.shape)
ADX = dx * np.ones(Y.shape)
def localwindow(X, Y, DX, i):
mask = (X[i] - DX <= X) & (X <= X[i] + DX)
Y0 = np.median(Y[mask])
# Calculate Local Scale of Natural Variation
S0 = 1.4826 * np.median(np.abs(Y[mask] - Y0))
return Y0, S0
def smgauss(X, V, DX):
Xj = X
Xk = np.atleast_2d(X).T
Wjk = np.exp(-((Xj - Xk) / (2 * DX)) ** 2)
G = np.dot(Wjk, V) / np.sum(Wjk, axis=0)
return G
if len(X) > 1:
if self.adaptive is None:
for i in range(len(Y)):
Y0[i], S0[i] = localwindow(X, Y, dx, i)
else: # 'adaptive'
Y0Tmp = np.nan * np.zeros(YY.shape)
S0Tmp = np.nan * np.zeros(YY.shape)
DXTmp = np.arange(1, len(S0) + 1) * dx
# Integer variation of Window Half Size
# Calculate Initial Guess of Optimal Parameters Y0, S0, ADX
for i in range(len(Y)):
j = 0
S0Rel = np.inf
while S0Rel > self.adaptive:
Y0Tmp[j], S0Tmp[j] = localwindow(X, Y, DXTmp[j], i)
if j > 0:
S0Rel = abs((S0Tmp[j - 1] - S0Tmp[j]) /
(S0Tmp[j - 1] + S0Tmp[j]) / 2)
j += 1
Y0[i] = Y0Tmp[j - 2]
S0[i] = S0Tmp[j - 2]
ADX[i] = DXTmp[j - 2] / dx
# Gaussian smoothing of relevant parameters
DX = 2 * np.median(np.diff(X))
ADX = smgauss(X, ADX, DX)
S0 = smgauss(X, S0, DX)
Y0 = smgauss(X, Y0, DX)
T = self.t
## Prepare Output
self.UB = Y0 + T * S0
self.LB = Y0 - T * S0
outliers = np.abs(Y - Y0) > T * S0 # possible outliers
YY[outliers] = Y0[outliers]
self.outliers = outliers
self.num_outliers = outliers.sum()
self.ADX = ADX
self.Y0 = Y0
if self.fulloutput:
return YY, dict(outliers=outliers, Y0=Y0,
LB=self.LB, UB=self.UB, ADX=ADX)
return YY
def test_hampel():
import matplotlib.pyplot as plt
randint = np.random.randint
Y = 5000 + np.random.randn(1000)
outliers = randint(0, 1000, size=(10,))
Y[outliers] = Y[outliers] + randint(1000, size=(10,))
YY, res = HampelFilter(dx=3, t=3, fulloutput=True)(Y)
YY1, res1 = HampelFilter(dx=1, t=3, adaptive=0.1, fulloutput=True)(Y)
YY2, res2 = HampelFilter(dx=3, t=0, fulloutput=True)(Y) # median
plt.figure(1)
plot_hampel(Y, YY, res)
plt.title('Standard HampelFilter')
plt.figure(2)
plot_hampel(Y, YY1, res1)
plt.title('Adaptive HampelFilter')
plt.figure(3)
plot_hampel(Y, YY2, res2)
plt.title('Median filter')
plt.show('hold')
def plot_hampel(Y, YY, res):
import matplotlib.pyplot as plt
X = np.arange(len(YY))
plt.plot(X, Y, 'b.') # Original Data
plt.plot(X, YY, 'r') # Hampel Filtered Data
plt.plot(X, res['Y0'], 'b--') # Nominal Data
plt.plot(X, res['LB'], 'r--') # Lower Bounds on Hampel Filter
plt.plot(X, res['UB'], 'r--') # Upper Bounds on Hampel Filter
i = res['outliers']
plt.plot(X[i], Y[i], 'ks') # Identified Outliers
#plt.show('hold')
def test_tide_filter():
# import statsmodels.api as sa
import wafo.spectrum.models as sm
sd = 10
Sj = sm.Jonswap(Hm0=4.* sd)
S = Sj.tospecdata()
q = (0.1 * sd) ** 2 # variance of process noise s the car operates
r = (100 * sd) ** 2 # variance of measurement error
b = 0 # no system input
u = 0 # no system input
from scipy.signal import butter, lfilter, filtfilt, lfilter_zi
freq_tide = 1. / (12 * 60 * 60)
freq_wave = 1. / 10
freq_filt = freq_wave / 10
dt = 1.
freq = 1. / dt
fn = (freq / 2)
P = 10* np.diag([1, 0.01])
R = r
H = np.atleast_2d([1, 0])
F = np.atleast_2d([[0, 1],
[0, 0]])
A, Q = lti_disc(F, L=None, Q=np.diag([0, q]), dt=dt)
t = np.arange(0, 60 * 12, 1. / freq)
w = 2 * np.pi * freq # 1 Hz
tide = 100 * np.sin(freq_tide * w * t + 2 * np.pi / 4) + 100
y = tide + S.sim(len(t), dt=1. / freq)[:, 1].ravel()
# lowess = sa.nonparametric.lowess
# y2 = lowess(y, t, frac=0.5)[:,1]
filt = Kalman(R=R, x=np.array([[tide[0]], [0]]), P=P, A=A, Q=Q, H=H, B=b)
filt2 = Kalman(R=R, x=np.array([[tide[0]], [0]]), P=P, A=A, Q=Q, H=H, B=b)
#y = tide + 0.5 * np.sin(freq_wave * w * t)
# Butterworth filter
b, a = butter(9, (freq_filt / fn), btype='low')
#y2 = [lowess(y[max(i-60,0):i + 1], t[max(i-60,0):i + 1], frac=.3)[-1,1] for i in range(len(y))]
#y2 = [lfilter(b, a, y[:i + 1])[i] for i in range(len(y))]
#y3 = filtfilt(b, a, y[:16]).tolist() + [filtfilt(b, a, y[:i + 1])[i] for i in range(16, len(y))]
#y0 = medfilt(y, 41)
zi = lfilter_zi(b, a)
#y2 = lfilter(b, a, y)#, zi=y[0]*zi) # standard filter
y3 = filtfilt(b, a, y) # filter with phase shift correction
y4 =[]
y5 = []
for i, j in enumerate(y):
tmp = filt(j, u=u).ravel()
tmp = filt2(tmp[0], u=u).ravel()
# if i==0:
# print(filt.x)
# print(filt2.x)
y4.append(tmp[0])
y5.append(tmp[1])
y0 = medfilt(y4, 41)
print(filt.P)
# plot
import matplotlib.pyplot as plt
plt.plot(t, y, 'r.-', linewidth=2, label='raw data')
#plt.plot(t, y2, 'b.-', linewidth=2, label='lowess @ %g Hz' % freq_filt)
#plt.plot(t, y2, 'b.-', linewidth=2, label='filter @ %g Hz' % freq_filt)
plt.plot(t, y3, 'g.-', linewidth=2, label='filtfilt @ %g Hz' % freq_filt)
plt.plot(t, y4, 'k.-', linewidth=2, label='kalman')
#plt.plot(t, y5, 'k.', linewidth=2, label='kalman2')
plt.plot(t, tide, 'y-', linewidth=2, label='True tide')
plt.legend(frameon=False, fontsize=14)
plt.xlabel("Time [s]")
plt.ylabel("Amplitude")
plt.show('hold')
def test_smooth(): def test_smooth():
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
t = np.linspace(-4, 4, 500) t = np.linspace(-4, 4, 500)
y = np.exp(-t ** 2) + np.random.normal(0, 0.05, t.shape) y = np.exp(-t ** 2) + np.random.normal(0, 0.05, t.shape)
coeff = calc_coeff(num_points=3, degree=2, diff_order=0) coeff = calc_coeff(n=0, degree=0, diff_order=0)
ysg = smooth(y, coeff, pad=True) ysg = smooth(y, coeff, pad=True)
plt.plot(t, y, t, ysg, '--') plt.plot(t, y, t, ysg, '--')
plt.show() plt.show()
def test_docstrings(): def test_docstrings():
import doctest import doctest
doctest.testmod() print('Testing docstrings in %s' % __file__)
doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
if __name__ == '__main__': if __name__ == '__main__':
test_docstrings() #test_kalman_sine()
test_tide_filter()
#test_docstrings()
#test_hampel()
#test_kalman() #test_kalman()
# test_smooth() # test_smooth()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,76 +1,133 @@
import unittest
import numpy as np import numpy as np
from wafo.spectrum.models import (Bretschneider, Jonswap, OchiHubble, Tmaspec, from wafo.spectrum.models import (Bretschneider, Jonswap, OchiHubble, Tmaspec,
Torsethaugen, McCormick, Wallop) Torsethaugen, McCormick, Wallop, Spreading)
def test_bretschneider(): class TestCase(unittest.TestCase):
S = Bretschneider(Hm0=6.5, Tp=10) def assertListAlmostEqual(self, list1, list2, places=None, msg=None):
vals = S((0, 1, 2, 3)) self.assertEqual(len(list1), len(list2))
true_vals = np.array([0., 1.69350993, 0.06352698, 0.00844783]) for a, b in zip(list1, list2):
assert((np.abs(vals - true_vals) < 1e-7).all()) self.assertAlmostEqual(a, b, places, msg)
def test_if_jonswap_with_gamma_one_equals_bretschneider(): class TestSpectra(TestCase):
def test_bretschneider(self):
S = Bretschneider(Hm0=6.5, Tp=10)
vals = S((0, 1, 2, 3)).tolist()
true_vals = [0., 1.69350993, 0.06352698, 0.00844783]
self.assertListAlmostEqual(vals, true_vals)
def test_if_jonswap_with_gamma_one_equals_bretschneider(self):
S = Jonswap(Hm0=7, Tp=11, gamma=1) S = Jonswap(Hm0=7, Tp=11, gamma=1)
vals = S((0, 1, 2, 3)) vals = S((0, 1, 2, 3))
true_vals = np.array([0., 1.42694133, 0.05051648, 0.00669692]) true_vals = np.array([0., 1.42694133, 0.05051648, 0.00669692])
assert((np.abs(vals - true_vals) < 1e-7).all()) self.assertListAlmostEqual(vals, true_vals)
w = np.linspace(0, 5) w = np.linspace(0, 5)
S2 = Bretschneider(Hm0=7, Tp=11) S2 = Bretschneider(Hm0=7, Tp=11)
# JONSWAP with gamma=1 should be equal to Bretscneider: # JONSWAP with gamma=1 should be equal to Bretscneider:
assert(np.all(np.abs(S(w) - S2(w)) < 1.e-7)) self.assertListAlmostEqual(S(w), S2(w))
def test_tmaspec(): def test_tmaspec(self):
S = Tmaspec(Hm0=7, Tp=11, gamma=1, h=10) S = Tmaspec(Hm0=7, Tp=11, gamma=1, h=10)
vals = S((0, 1, 2, 3)) vals = S((0, 1, 2, 3))
true_vals = np.array([0., 0.70106233, 0.05022433, 0.00669692]) true_vals = np.array([0., 0.70106233, 0.05022433, 0.00669692])
assert((np.abs(vals - true_vals) < 1e-7).all()) self.assertListAlmostEqual(vals, true_vals)
def test_torsethaugen():
def test_torsethaugen(self):
S = Torsethaugen(Hm0=7, Tp=11, gamma=1, h=10) S = Torsethaugen(Hm0=7, Tp=11, gamma=1, h=10)
vals = S((0, 1, 2, 3)) vals = S((0, 1, 2, 3))
true_vals = np.array([0., 1.19989709, 0.05819794, 0.0093541]) true_vals = np.array([0., 1.19989709, 0.05819794, 0.0093541])
assert((np.abs(vals - true_vals) < 1e-7).all()) self.assertListAlmostEqual(vals, true_vals)
vals = S.wind(range(4)) vals = S.wind(range(4))
true_vals = np.array([0., 1.13560528, 0.05529849, 0.00888989]) true_vals = np.array([0., 1.13560528, 0.05529849, 0.00888989])
assert((np.abs(vals - true_vals) < 1e-7).all()) self.assertListAlmostEqual(vals, true_vals)
vals = S.swell(range(4)) vals = S.swell(range(4))
true_vals = np.array([0., 0.0642918, 0.00289946, 0.00046421]) true_vals = np.array([0., 0.0642918, 0.00289946, 0.00046421])
assert((np.abs(vals - true_vals) < 1e-7).all()) self.assertListAlmostEqual(vals, true_vals)
def test_ochihubble(): def test_ochihubble(self):
S = OchiHubble(par=2) S = OchiHubble(par=2)
vals = S(range(4)) vals = S(range(4))
true_vals = np.array([0., 0.90155636, 0.04185445, 0.00583207]) true_vals = np.array([0., 0.90155636, 0.04185445, 0.00583207])
assert((np.abs(vals - true_vals) < 1e-7).all()) self.assertListAlmostEqual(vals, true_vals)
def test_mccormick(self):
def test_mccormick():
S = McCormick(Hm0=6.5, Tp=10) S = McCormick(Hm0=6.5, Tp=10)
vals = S(range(4)) vals = S(range(4))
true_vals = np.array([0., 1.87865908, 0.15050447, 0.02994663]) true_vals = np.array([0., 1.87865908, 0.15050447, 0.02994663])
assert((np.abs(vals - true_vals) < 1e-7).all()) self.assertListAlmostEqual(vals, true_vals)
def test_wallop():
def test_wallop(self):
S = Wallop(Hm0=6.5, Tp=10) S = Wallop(Hm0=6.5, Tp=10)
vals = S(range(4)) vals = S(range(4))
true_vals = np.array([0.00000000e+00, 9.36921871e-01, 2.76991078e-03, true_vals = np.array([0.00000000e+00, 9.36921871e-01, 2.76991078e-03,
7.72996150e-05]) 7.72996150e-05])
assert((np.abs(vals - true_vals) < 1e-7).all()) self.assertListAlmostEqual(vals, true_vals)
class TestSpreading(TestCase):
def test_cos2s(self):
theta = np.linspace(0, 2 * np.pi)
d = Spreading(type='cos2s')
dvals = [[1.10168934e+00],
[1.03576796e+00],
[8.60302298e-01],
[6.30309013e-01],
[4.06280137e-01],
[2.29514882e-01],
[1.13052757e-01],
[4.82339343e-02],
[1.76754409e-02],
[5.50490020e-03],
[1.43800617e-03],
[3.09907242e-04],
[5.39672445e-05],
[7.39553743e-06],
[7.70796579e-07],
[5.84247670e-08],
[3.03264905e-09],
[9.91950201e-11],
[1.81442131e-12],
[1.55028269e-14],
[4.63223469e-17],
[2.90526245e-20],
[1.35842977e-24],
[3.26077455e-31],
[1.65021852e-45],
[1.65021852e-45],
[3.26077455e-31],
[1.35842977e-24],
[2.90526245e-20],
[4.63223469e-17],
[1.55028269e-14],
[1.81442131e-12],
[9.91950201e-11],
[3.03264905e-09],
[5.84247670e-08],
[7.70796579e-07],
[7.39553743e-06],
[5.39672445e-05],
[3.09907242e-04],
[1.43800617e-03],
[5.50490020e-03],
[1.76754409e-02],
[4.82339343e-02],
[1.13052757e-01],
[2.29514882e-01],
[4.06280137e-01],
[6.30309013e-01],
[8.60302298e-01],
[1.03576796e+00],
[1.10168934e+00]]
self.assertListAlmostEqual(d(theta)[0], dvals)
if __name__ == '__main__': if __name__ == '__main__':
# main() unittest.main()
import nose
nose.run()
#test_tmaspec()

@ -1,4 +1,6 @@
import wafo.spectrum.models as sm import wafo.spectrum.models as sm
import wafo.transform.models as wtm
import wafo.objects as wo
from wafo.spectrum import SpecData1D from wafo.spectrum import SpecData1D
import numpy as np import numpy as np
import unittest import unittest
@ -18,11 +20,11 @@ class TestSpectrum(unittest.TestCase):
acfmat = S.tocov_matrix(nr=3, nt=256, dt=0.1) acfmat = S.tocov_matrix(nr=3, nt=256, dt=0.1)
vals = acfmat[:2, :] vals = acfmat[:2, :]
true_vals = np.array([[3.06073383, 0.0000000, -1.67748256, 0.], true_vals = np.array([[3.06073383, 0.0000000, -1.67748256, 0.],
[3.05235423, -0.1674357, -1.66811444, 0.18693242]]) [3.05235423, -0.1674357, -1.66811444,
0.18693242]])
self.assertTrue((np.abs(vals - true_vals) < 1e-7).all()) self.assertTrue((np.abs(vals - true_vals) < 1e-7).all())
def test_tocovdata(): def test_tocovdata():
Sj = sm.Jonswap() Sj = sm.Jonswap()
S = Sj.tospecdata() S = Sj.tospecdata()
@ -41,22 +43,25 @@ def test_to_t_pdf():
f = S.to_t_pdf(pdef='Tc', paramt=(0, 10, 51), speed=7, seed=100) f = S.to_t_pdf(pdef='Tc', paramt=(0, 10, 51), speed=7, seed=100)
vals = ['%2.3f' % val for val in f.data[:10]] vals = ['%2.3f' % val for val in f.data[:10]]
truevals = ['0.000', '0.014', '0.027', '0.040', truevals = ['0.000', '0.014', '0.027', '0.040',
'0.050', '0.059', '0.067', '0.072', '0.077', '0.081'] '0.050', '0.059', '0.067', '0.073', '0.077', '0.082']
for t, v in zip(truevals, vals):
assert(t == v)
# estimated error bounds # estimated error bounds
vals = ['%2.4f' % val for val in f.err[:10]] vals = ['%2.4f' % val for val in f.err[:10]]
truevals = ['0.0000', '0.0003', '0.0003', '0.0004', truevals = ['0.0000', '0.0003', '0.0003', '0.0004',
'0.0006', '0.0009', '0.0016', '0.0019', '0.0020', '0.0021'] '0.0006', '0.0008', '0.0016', '0.0019', '0.0020', '0.0021']
for t, v in zip(truevals, vals):
assert(t == v)
@slow @slow
def test_sim(): def test_sim():
Sj = sm.Jonswap() Sj = sm.Jonswap()
S = Sj.tospecdata() S = Sj.tospecdata()
ns = 100 #ns = 100
dt = .2 #dt = .2
x1 = S.sim(ns, dt=dt) #x1 = S.sim(ns, dt=dt)
import scipy.stats as st import scipy.stats as st
x2 = S.sim(20000, 20) x2 = S.sim(20000, 20)
@ -75,13 +80,11 @@ def test_sim_nl():
Sj = sm.Jonswap() Sj = sm.Jonswap()
S = Sj.tospecdata() S = Sj.tospecdata()
ns = 100 # ns = 100
dt = .2 # dt = .2
x1 = S.sim_nl(ns, dt=dt) # x1 = S.sim_nl(ns, dt=dt)
import numpy as np
import scipy.stats as st import scipy.stats as st
x2, x1 = S.sim_nl(ns=20000, cases=40) x2, _x1 = S.sim_nl(ns=20000, cases=40)
truth1 = [0, np.sqrt(S.moment(1)[0][0])] + S.stats_nl(moments='sk') truth1 = [0, np.sqrt(S.moment(1)[0][0])] + S.stats_nl(moments='sk')
truth1[-1] = truth1[-1] - 3 truth1[-1] = truth1[-1] - 3
@ -110,26 +113,22 @@ def test_stats_nl():
def test_testgaussian(): def test_testgaussian():
'''
>>> import wafo.spectrum.models as sm Hs = 7
>>> import wafo.transform.models as wtm Sj = sm.Jonswap(Hm0=Hs)
>>> import wafo.objects as wo S0 = Sj.tospecdata()
>>> Hs = 7 #ns =100; dt = .2
>>> Sj = sm.Jonswap(Hm0=Hs) #x1 = S0.sim(ns, dt=dt)
>>> S0 = Sj.tospecdata()
>>> ns =100; dt = .2 S = S0.copy()
>>> x1 = S0.sim(ns, dt=dt) me, _va, sk, ku = S.stats_nl(moments='mvsk')
S.tr = wtm.TrHermite(
>>> S = S0.copy() mean=me, sigma=Hs / 4, skew=sk, kurt=ku, ysigma=Hs / 4)
>>> me, va, sk, ku = S.stats_nl(moments='mvsk') ys = wo.mat2timeseries(S.sim(ns=2 ** 13))
>>> S.tr = wtm.TrHermite(mean=me, sigma=Hs/4, skew=sk, kurt=ku, ysigma=Hs/4) g0, _gemp = ys.trdata()
>>> ys = wo.mat2timeseries(S.sim(ns=2**13)) t0 = g0.dist2gauss()
>>> g0, gemp = ys.trdata() t1 = S0.testgaussian(ns=2 ** 13, t0=t0, cases=50)
>>> t0 = g0.dist2gauss() assert(sum(t1 > t0) < 5)
>>> t1 = S0.testgaussian(ns=2**13, t0=t0, cases=50)
>>> sum(t1>t0)<5
True
'''
def test_moment(): def test_moment():
@ -140,29 +139,28 @@ def test_moment():
true_txt = ['m0', 'm0tt'] true_txt = ['m0', 'm0tt']
for tv, v in zip(true_vals, vals): for tv, v in zip(true_vals, vals):
assert(tv == v) assert(tv == v)
for tv, v in zip(true_txt, txt):
assert(tv == v)
def test_nyquist_freq(): def test_nyquist_freq():
Sj = sm.Jonswap(Hm0=5) Sj = sm.Jonswap(Hm0=5)
S = Sj.tospecdata() # Make spectrum ob S = Sj.tospecdata() # Make spectrum ob
assert(S.nyquist_freq() == 3.0) assert(S.nyquist_freq() == 3.0)
def test_sampling_period(): def test_sampling_period():
Sj = sm.Jonswap(Hm0=5) Sj = sm.Jonswap(Hm0=5)
S = Sj.tospecdata() # Make spectrum ob S = Sj.tospecdata() # Make spectrum ob
assert(S.sampling_period() == 1.0471975511965976) assert(S.sampling_period() == 1.0471975511965976)
def test_normalize(): def test_normalize():
Sj = sm.Jonswap(Hm0=5) Sj = sm.Jonswap(Hm0=5)
S = Sj.tospecdata() # Make spectrum ob S = Sj.tospecdata() # Make spectrum ob
S.moment(2) S.moment(2)
([1.5614600345079888, 0.95567089481941048], ['m0', 'm0tt']) ([1.5614600345079888, 0.95567089481941048], ['m0', 'm0tt'])
vals, txt = S.moment(2) vals, _txt = S.moment(2)
true_vals = [1.5614600345079888, 0.95567089481941048] true_vals = [1.5614600345079888, 0.95567089481941048]
for tv, v in zip(true_vals, vals): for tv, v in zip(true_vals, vals):
assert(tv == v) assert(tv == v)
@ -171,7 +169,7 @@ def test_normalize():
Sn.normalize() Sn.normalize()
# Now the moments should be one # Now the moments should be one
new_vals, txt = Sn.moment(2) new_vals, _txt = Sn.moment(2)
for v in new_vals: for v in new_vals:
assert(np.abs(v - 1.0) < 1e-7) assert(np.abs(v - 1.0) < 1e-7)

@ -8,7 +8,7 @@ Statistical functions (:mod:`scipy.stats`)
This module contains a large number of probability distributions as This module contains a large number of probability distributions as
well as a growing library of statistical functions. well as a growing library of statistical functions.
Each included distribution is an instance of the class rv_continous: Each included distribution is an instance of the class rv_continuous:
For each given name the following methods are available: For each given name the following methods are available:
.. autosummary:: .. autosummary::
@ -77,7 +77,7 @@ Continuous distributions
exponweib -- Exponentiated Weibull exponweib -- Exponentiated Weibull
exponpow -- Exponential Power exponpow -- Exponential Power
f -- F (Snecdor F) f -- F (Snecdor F)
fatiguelife -- Fatigue Life (Birnbaum-Sanders) fatiguelife -- Fatigue Life (Birnbaum-Saunders)
fisk -- Fisk fisk -- Fisk
foldcauchy -- Folded Cauchy foldcauchy -- Folded Cauchy
foldnorm -- Folded Normal foldnorm -- Folded Normal
@ -149,6 +149,7 @@ Multivariate distributions
:toctree: generated/ :toctree: generated/
multivariate_normal -- Multivariate normal distribution multivariate_normal -- Multivariate normal distribution
dirichlet -- Dirichlet
Discrete distributions Discrete distributions
====================== ======================
@ -231,6 +232,7 @@ which work for masked arrays.
.. autosummary:: .. autosummary::
:toctree: generated/ :toctree: generated/
sigmaclip
threshold threshold
trimboth trimboth
trim1 trim1
@ -244,6 +246,7 @@ which work for masked arrays.
pointbiserialr pointbiserialr
kendalltau kendalltau
linregress linregress
theilslopes
.. autosummary:: .. autosummary::
:toctree: generated/ :toctree: generated/
@ -271,8 +274,10 @@ which work for masked arrays.
levene levene
shapiro shapiro
anderson anderson
anderson_ksamp
binom_test binom_test
fligner fligner
median_test
mood mood
.. autosummary:: .. autosummary::
@ -282,6 +287,8 @@ which work for masked arrays.
boxcox_normmax boxcox_normmax
boxcox_llf boxcox_llf
entropy
Contingency table functions Contingency table functions
=========================== ===========================
@ -344,3 +351,5 @@ __all__ = [s for s in dir() if not (s.startswith('_') or s.endswith('cython'))]
#import distributions #@Reimport #import distributions #@Reimport
#from wafo.stats.distributions import * #from wafo.stats.distributions import *
from numpy.testing import Tester
test = Tester().test

@ -16,8 +16,6 @@ def binned_statistic(x, values, statistic='mean',
each bin. This function allows the computation of the sum, mean, median, each bin. This function allows the computation of the sum, mean, median,
or other statistic of the values within each bin. or other statistic of the values within each bin.
.. versionadded:: 0.11.0
Parameters Parameters
---------- ----------
x : array_like x : array_like
@ -78,6 +76,8 @@ def binned_statistic(x, values, statistic='mean',
second ``[2, 3)``. The last bin, however, is ``[3, 4]``, which *includes* second ``[2, 3)``. The last bin, however, is ``[3, 4]``, which *includes*
4. 4.
.. versionadded:: 0.11.0
Examples Examples
-------- --------
>>> stats.binned_statistic([1, 2, 1, 2, 4], np.arange(5), statistic='mean', >>> stats.binned_statistic([1, 2, 1, 2, 4], np.arange(5), statistic='mean',
@ -116,8 +116,6 @@ def binned_statistic_2d(x, y, values, statistic='mean',
each bin. This function allows the computation of the sum, mean, median, each bin. This function allows the computation of the sum, mean, median,
or other statistic of the values within each bin. or other statistic of the values within each bin.
.. versionadded:: 0.11.0
Parameters Parameters
---------- ----------
x : (N,) array_like x : (N,) array_like
@ -175,6 +173,11 @@ def binned_statistic_2d(x, y, values, statistic='mean',
-------- --------
numpy.histogram2d, binned_statistic, binned_statistic_dd numpy.histogram2d, binned_statistic, binned_statistic_dd
Notes
-----
.. versionadded:: 0.11.0
""" """
# This code is based on np.histogram2d # This code is based on np.histogram2d
@ -203,8 +206,6 @@ def binned_statistic_dd(sample, values, statistic='mean',
each bin. This function allows the computation of the sum, mean, median, each bin. This function allows the computation of the sum, mean, median,
or other statistic of the values within each bin. or other statistic of the values within each bin.
.. versionadded:: 0.11.0
Parameters Parameters
---------- ----------
sample : array_like sample : array_like
@ -258,6 +259,11 @@ def binned_statistic_dd(sample, values, statistic='mean',
-------- --------
np.histogramdd, binned_statistic, binned_statistic_2d np.histogramdd, binned_statistic, binned_statistic_2d
Notes
-----
.. versionadded:: 0.11.0
""" """
if type(statistic) == str: if type(statistic) == str:
if statistic not in ['mean', 'median', 'count', 'sum', 'std']: if statistic not in ['mean', 'median', 'count', 'sum', 'std']:

@ -6,23 +6,23 @@ from __future__ import division, print_function, absolute_import
import warnings import warnings
from scipy.misc import comb # @UnresolvedImport from scipy.special import comb
from scipy.misc.doccer import inherit_docstring_from from scipy.misc.doccer import inherit_docstring_from
from scipy import special from scipy import special
from scipy import optimize from scipy import optimize
from scipy import integrate from scipy import integrate
from scipy.special import (gammaln as gamln, gamma as gam, log1p) from scipy.special import (gammaln as gamln, gamma as gam, boxcox, boxcox1p, log1p, expm1)
from numpy import (where, arange, putmask, ravel, sum, shape, from numpy import (where, arange, putmask, ravel, sum, shape,
log, sqrt, exp, arctanh, tan, sin, arcsin, arctan, log, sqrt, exp, arctanh, tan, sin, arcsin, arctan,
tanh, cos, cosh, sinh, expm1) tanh, cos, cosh, sinh)
from numpy import polyval, place, extract, any, asarray, nan, inf, pi from numpy import polyval, place, extract, any, asarray, nan, inf, pi
import numpy as np import numpy as np
import numpy.random as mtrand import numpy.random as mtrand
try: try:
from scipy.stats.distributions import vonmises_cython from scipy.stats import vonmises_cython
except: except:
vonmises_cython = None vonmises_cython = None
# try: # try:
@ -36,13 +36,12 @@ from ._tukeylambda_stats import (tukeylambda_variance as _tlvar,
tukeylambda_kurtosis as _tlkurt) tukeylambda_kurtosis as _tlkurt)
from ._distn_infrastructure import ( from ._distn_infrastructure import (
rv_continuous, valarray, rv_continuous, valarray, _skew, _kurtosis, _lazywhere,
_skew, _kurtosis, _lazywhere, _ncx2_log_pdf, _ncx2_pdf, _ncx2_cdf, get_distribution_names,
_ncx2_log_pdf, _ncx2_pdf, _ncx2_cdf,
) )
from ._constants import _XMIN, _EULER, _ZETA3, _EPS from ._constants import _XMIN, _EULER, _ZETA3, _EPS
from .stats import mode
#from .estimation import FitDistribution #from .estimation import FitDistribution
__all__ = [ __all__ = [
@ -98,7 +97,7 @@ class kstwobign_gen(rv_continuous):
kstwobign = kstwobign_gen(a=0.0, name='kstwobign') kstwobign = kstwobign_gen(a=0.0, name='kstwobign')
# Normal distribution ## Normal distribution
# loc = mu, scale = std # loc = mu, scale = std
# Keep these implementations out of the class definition so they can be reused # Keep these implementations out of the class definition so they can be reused
@ -282,8 +281,7 @@ class anglit_gen(rv_continuous):
return (arcsin(sqrt(q))-pi/4) return (arcsin(sqrt(q))-pi/4)
def _stats(self): def _stats(self):
return (0.0, pi * pi / 16 - 0.5, 0.0, return 0.0, pi*pi/16-0.5, 0.0, -2*(pi**4 - 96)/(pi*pi-8)**2
-2 * (pi ** 4 - 96) / (pi * pi - 8) ** 2)
def _entropy(self): def _entropy(self):
return 1-log(2) return 1-log(2)
@ -380,10 +378,12 @@ class beta_gen(rv_continuous):
----- -----
The probability density function for `beta` is:: The probability density function for `beta` is::
beta.pdf(x, a, b) = gamma(a+b)/(gamma(a)*gamma(b)) * x**(a-1) * gamma(a+b) * x**(a-1) * (1-x)**(b-1)
(1-x)**(b-1), beta.pdf(x, a, b) = ------------------------------------
gamma(a)*gamma(b)
for ``0 < x < 1``, ``a > 0``, ``b > 0``. for ``0 < x < 1``, ``a > 0``, ``b > 0``, where ``gamma(z)`` is the gamma
function (`scipy.special.gamma`).
%(example)s %(example)s
@ -409,9 +409,7 @@ class beta_gen(rv_continuous):
mn = a*1.0 / (a + b) mn = a*1.0 / (a + b)
var = (a*b*1.0)/(a+b+1.0)/(a+b)**2.0 var = (a*b*1.0)/(a+b+1.0)/(a+b)**2.0
g1 = 2.0*(b-a)*sqrt((1.0+a+b)/(a*b)) / (2+a+b) g1 = 2.0*(b-a)*sqrt((1.0+a+b)/(a*b)) / (2+a+b)
g2 = 6.0 * \ g2 = 6.0*(a**3 + a**2*(1-2*b) + b**2*(1+b) - 2*a*b*(2+b))
(a ** 3 + a ** 2 * (1 - 2 * b) +
b ** 2 * (1 + b) - 2 * a * b * (2 + b))
g2 /= a*b*(a+b+2)*(a+b+3) g2 /= a*b*(a+b+2)*(a+b+3)
return mn, var, g1, g2 return mn, var, g1, g2
@ -422,8 +420,7 @@ class beta_gen(rv_continuous):
def func(x): def func(x):
a, b = x a, b = x
sk = 2*(b-a)*sqrt(a + b + 1) / (a + b + 2) / sqrt(a*b) sk = 2*(b-a)*sqrt(a + b + 1) / (a + b + 2) / sqrt(a*b)
ku = a ** 3 - a ** 2 * \ ku = a**3 - a**2*(2*b-1) + b**2*(b+1) - 2*a*b*(b+2)
(2 * b - 1) + b ** 2 * (b + 1) - 2 * a * b * (b + 2)
ku /= a*b*(a+b+2)*(a+b+3) ku /= a*b*(a+b+2)*(a+b+3)
ku *= 6 ku *= 6
return [sk-g1, ku-g2] return [sk-g1, ku-g2]
@ -486,7 +483,7 @@ class beta_gen(rv_continuous):
a = b * xbar / (1 - xbar) a = b * xbar / (1 - xbar)
# Compute the MLE for `a` by solving _beta_mle_a. # Compute the MLE for `a` by solving _beta_mle_a.
theta, _info, ier, mesg = optimize.fsolve( theta, info, ier, mesg = optimize.fsolve(
_beta_mle_a, a, _beta_mle_a, a,
args=(b, len(data), np.log(data).sum()), args=(b, len(data), np.log(data).sum()),
full_output=True full_output=True
@ -515,7 +512,7 @@ class beta_gen(rv_continuous):
b = (1 - xbar) * fac b = (1 - xbar) * fac
# Compute the MLE for a and b by solving _beta_mle_ab. # Compute the MLE for a and b by solving _beta_mle_ab.
theta, _info, ier, mesg = optimize.fsolve( theta, info, ier, mesg = optimize.fsolve(
_beta_mle_ab, [a, b], _beta_mle_ab, [a, b],
args=(len(data), s1, s2), args=(len(data), s1, s2),
full_output=True full_output=True
@ -558,11 +555,23 @@ class betaprime_gen(rv_continuous):
return (special.xlogy(a-1.0, x) - special.xlog1py(a+b, x) - return (special.xlogy(a-1.0, x) - special.xlog1py(a+b, x) -
special.betaln(a, b)) special.betaln(a, b))
def _cdf_skip(self, x, a, b): def _cdf(self, x, a, b):
return special.betainc(a, b, x/(1.+x))
# remove for now: special.hyp2f1 is incorrect for large a # remove for now: special.hyp2f1 is incorrect for large a
x = where(x == 1.0, 1.0 - 1e-6, x) # x = where(x == 1.0, 1.0-1e-6, x)
return (pow(x, a) * special.hyp2f1(a + b, a, 1 + a, -x) / a / # return pow(x, a)*special.hyp2f1(a+b, a, 1+a, -x)/a/special.beta(a, b)
special.beta(a, b))
def _fitstart(self, data):
g1 = np.mean(data)
g2 = mode(data)[0]
def func(x):
a, b = x
me = a / (b-1) if 1 < b else 1e100
mo = (a-1)/(b+1) if 1 <= a else 0
return [me-g1, mo-g2]
a, b = optimize.fsolve(func, (1.0, 1.5))
return super(betaprime_gen, self)._fitstart(data, args=(a, b))
def _munp(self, n, a, b): def _munp(self, n, a, b):
if (n == 1.0): if (n == 1.0):
@ -570,14 +579,12 @@ class betaprime_gen(rv_continuous):
elif (n == 2.0): elif (n == 2.0):
return where(b > 2, a*(a+1.0)/((b-2.0)*(b-1.0)), inf) return where(b > 2, a*(a+1.0)/((b-2.0)*(b-1.0)), inf)
elif (n == 3.0): elif (n == 3.0):
return where( return where(b > 3, a*(a+1.0)*(a+2.0)/((b-3.0)*(b-2.0)*(b-1.0)),
b > 3, a *
(a + 1.0) * (a + 2.0) / ((b - 3.0) * (b - 2.0) * (b - 1.0)),
inf) inf)
elif (n == 4.0): elif (n == 4.0):
return where(b > 4, return where(b > 4,
a * (a + 1.0) * (a + 2.0) * (a + 3.0) / a*(a+1.0)*(a+2.0)*(a+3.0)/((b-4.0)*(b-3.0)
((b - 4.0) * (b - 3.0) * (b - 2.0) * (b - 1.0)), inf) * (b-2.0)*(b-1.0)), inf)
else: else:
raise NotImplementedError raise NotImplementedError
betaprime = betaprime_gen(a=0.0, name='betaprime') betaprime = betaprime_gen(a=0.0, name='betaprime')
@ -615,14 +622,11 @@ class bradford_gen(rv_continuous):
g1 = None g1 = None
g2 = None g2 = None
if 's' in moments: if 's' in moments:
g1 = sqrt(2) * \ g1 = sqrt(2)*(12*c*c-9*c*k*(c+2)+2*k*k*(c*(c+3)+3))
(12 * c * c - 9 * c * k * (c + 2)
+ 2 * k * k * (c * (c + 3) + 3))
g1 /= sqrt(c*(c*(k-2)+2*k))*(3*c*(k-2)+6*k) g1 /= sqrt(c*(c*(k-2)+2*k))*(3*c*(k-2)+6*k)
if 'k' in moments: if 'k' in moments:
g2 = (c ** 3 * (k - 3) * (k * (3 * k - 16) + 24) + g2 = (c**3*(k-3)*(k*(3*k-16)+24)+12*k*c*c*(k-4)*(k-3)
12 * k * c * c * (k - 4) * (k - 3) + + 6*c*k*k*(3*k-14) + 12*k**3)
6 * c * k * k * (3 * k - 14) + 12 * k ** 3)
g2 /= 3*c*(c*(k-2)+2*k)**2 g2 /= 3*c*(c*(k-2)+2*k)**2
return mu, mu2, g1, g2 return mu, mu2, g1, g2
@ -662,7 +666,7 @@ class burr_gen(rv_continuous):
""" """
def _pdf(self, x, c, d): def _pdf(self, x, c, d):
return c * d * (x ** (-c - 1.0)) * ((1 + x ** (-c * 1.)) ** (-d - 1.)) return c*d*(x**(-c-1.0))*((1+x**(-c*1.0))**(-d-1.0))
def _cdf(self, x, c, d): def _cdf(self, x, c, d):
return (1+x**(-c*1.0))**(-d**1.0) return (1+x**(-c*1.0))**(-d**1.0)
@ -764,20 +768,18 @@ class chi_gen(rv_continuous):
Special cases of `chi` are: Special cases of `chi` are:
- ``chi(1, loc, scale) = `halfnormal` - ``chi(1, loc, scale)`` is equivalent to `halfnorm`
- ``chi(2, 0, scale) = `rayleigh` - ``chi(2, 0, scale)`` is equivalent to `rayleigh`
- ``chi(3, 0, scale) : `maxwell` - ``chi(3, 0, scale)`` is equivalent to `maxwell`
%(example)s %(example)s
""" """
def _rvs(self, df): def _rvs(self, df):
return sqrt(chi2.rvs(df, size=self._size)) return sqrt(chi2.rvs(df, size=self._size))
def _pdf(self, x, df): def _pdf(self, x, df):
return (x ** (df - 1.) * exp(-x * x * 0.5) / (2.0) ** (df * 0.5 - 1) / return x**(df-1.)*exp(-x*x*0.5)/(2.0)**(df*0.5-1)/gam(df*0.5)
gam(df * 0.5))
def _cdf(self, x, df): def _cdf(self, x, df):
return special.gammainc(df*0.5, 0.5*x*x) return special.gammainc(df*0.5, 0.5*x*x)
@ -802,7 +804,7 @@ class chi_gen(rv_continuous):
chi = chi_gen(a=0.0, name='chi') chi = chi_gen(a=0.0, name='chi')
# Chi-squared (gamma-distributed with loc=0 and scale=2 and shape=df/2) ## Chi-squared (gamma-distributed with loc=0 and scale=2 and shape=df/2)
class chi2_gen(rv_continuous): class chi2_gen(rv_continuous):
"""A chi-squared continuous random variable. """A chi-squared continuous random variable.
@ -824,8 +826,7 @@ class chi2_gen(rv_continuous):
return exp(self._logpdf(x, df)) return exp(self._logpdf(x, df))
def _logpdf(self, x, df): def _logpdf(self, x, df):
return (special.xlogy(df / 2. - 1, x) - x / 2. - gamln(df / 2.) - return special.xlogy(df/2.-1, x) - x/2. - gamln(df/2.) - (log(2)*df)/2.
(log(2) * df) / 2.)
def _cdf(self, x, df): def _cdf(self, x, df):
return special.chdtr(df, x) return special.chdtr(df, x)
@ -879,8 +880,7 @@ class cosine_gen(rv_continuous):
return 1.0/2/pi*(pi + x + sin(x)) return 1.0/2/pi*(pi + x + sin(x))
def _stats(self): def _stats(self):
return (0.0, pi * pi / 3.0 - 2.0, 0.0, return 0.0, pi*pi/3.0-2.0, 0.0, -6.0*(pi**4-90)/(5.0*(pi*pi-6)**2)
-6.0 * (pi ** 4 - 90) / (5.0 * (pi * pi - 6) ** 2))
def _entropy(self): def _entropy(self):
return log(4*pi)-1.0 return log(4*pi)-1.0
@ -980,7 +980,7 @@ class dweibull_gen(rv_continuous):
dweibull = dweibull_gen(name='dweibull') dweibull = dweibull_gen(name='dweibull')
# Exponential (gamma distributed with a=1.0, loc=loc and scale=scale) ## Exponential (gamma distributed with a=1.0, loc=loc and scale=scale)
class expon_gen(rv_continuous): class expon_gen(rv_continuous):
"""An exponential continuous random variable. """An exponential continuous random variable.
@ -1063,9 +1063,11 @@ class exponweib_gen(rv_continuous):
return exp(self._logpdf(x, a, c)) return exp(self._logpdf(x, a, c))
def _logpdf(self, x, a, c): def _logpdf(self, x, a, c):
exc = exp(-x ** c) negxc = -x**c
return (log(a) + log(c) + special.xlog1py(a - 1., -exc) - x ** c + exm1c = -special.expm1(negxc)
special.xlogy(c - 1.0, x)) logp = (log(a) + log(c) + special.xlogy(a - 1.0, exm1c) +
negxc + special.xlogy(c - 1.0, x))
return logp
def _cdf(self, x, a, c): def _cdf(self, x, a, c):
exm1c = -expm1(-x ** c) exm1c = -expm1(-x ** c)
@ -1122,7 +1124,7 @@ exponpow = exponpow_gen(a=0.0, name='exponpow')
class fatiguelife_gen(rv_continuous): class fatiguelife_gen(rv_continuous):
"""A fatigue-life (Birnbaum-Sanders) continuous random variable. """A fatigue-life (Birnbaum-Saunders) continuous random variable.
%(before_notes)s %(before_notes)s
@ -1135,6 +1137,11 @@ class fatiguelife_gen(rv_continuous):
for ``x > 0``. for ``x > 0``.
References
----------
.. [1] "Birnbaum-Saunders distribution",
http://en.wikipedia.org/wiki/Birnbaum-Saunders_distribution
%(example)s %(example)s
""" """
@ -1344,10 +1351,10 @@ class foldnorm_gen(rv_continuous):
foldnorm = foldnorm_gen(a=0.0, name='foldnorm') foldnorm = foldnorm_gen(a=0.0, name='foldnorm')
# Extreme Value Type II or Frechet ## Extreme Value Type II or Frechet
# (defined in Regress+ documentation as Extreme LB) as ## (defined in Regress+ documentation as Extreme LB) as
# a limiting value distribution. ## a limiting value distribution.
# ##
class frechet_r_gen(rv_continuous): class frechet_r_gen(rv_continuous):
"""A Frechet right (or Weibull minimum) continuous random variable. """A Frechet right (or Weibull minimum) continuous random variable.
@ -1518,14 +1525,19 @@ class genpareto_gen(rv_continuous):
----- -----
The probability density function for `genpareto` is:: The probability density function for `genpareto` is::
genpareto.pdf(x, c) = exp(-x) genpareto.pdf(x, c) = (1 + c * x)**(-1 - 1/c)
for c==0 for ``c >= 0`` ``x >= 0``, and
for ``c < 0`` ``0 <= x <= -1/c``
genpareto.pdf(x, c) = (1 + c * x)**(-1 - 1/c) For ``c == 0``, `genpareto` reduces to the exponential
distribution, `expon`::
for ``c != 0``, and for ``x >= 0`` for all c, genpareto.pdf(x, c=0) = exp(-x)
and ``x < 1/abs(c)`` for ``c < 0``.
For ``c == -1``, `genpareto` is uniform on ``[0, 1]``::
genpareto.cdf(x, c=-1) = x
%(example)s %(example)s
@ -1561,7 +1573,7 @@ class genpareto_gen(rv_continuous):
c = asarray(c) c = asarray(c)
self.b = _lazywhere(c < 0, (c,), self.b = _lazywhere(c < 0, (c,),
lambda c: -1. / c, np.inf) lambda c: -1. / c, np.inf)
return where(abs(c) == inf, 0, 1) return where(abs(c) == inf, False, True)
def _pdf(self, x, c): def _pdf(self, x, c):
return exp(self._logpdf(x, c)) return exp(self._logpdf(x, c))
@ -1570,46 +1582,23 @@ class genpareto_gen(rv_continuous):
return _lazywhere((x == x) & (c != 0), (x, c), return _lazywhere((x == x) & (c != 0), (x, c),
lambda x, c: -special.xlog1py(c+1., c*x) / c, lambda x, c: -special.xlog1py(c+1., c*x) / c,
-x) -x)
#return -(c + 1.) * self._log1pcx(x, c)
# x1 = where((c == 0) & (x == inf), 0.0, x)
# cx = where((c == 0) & (x == inf), 0.0, c * x1)
# logpdf = where((cx == inf) | (cx == -1),
# -inf, -(x + cx) * log1pxdx(cx))
# putmask(logpdf, (c == -1) & (x == 1.0), 0.0)
# return logpdf
# return (-1.0-1.0/c) * np.log1p(c*x)
def _logsf(self, x, c):
return -self._log1pcx(x, c)
#cx = c * x
# return where((0.0 < x) & (-1.0 <= cx) & (c != 0), -log1p(cx) / c, -x)
def _cdf(self, x, c): def _cdf(self, x, c):
log_sf = self._logsf(x, c) return - expm1( self._logsf(x, c))
return - expm1(log_sf)
# return 1.0 - power(1+c*x,asarray(-1.0/c))
def _sf(self, x, c): def _sf(self, x, c):
log_sf = self._logsf(x, c) return exp(self._logsf(x, c))
return exp(log_sf)
def _logsf(self, x, c):
return _lazywhere((x == x) & (c != 0), (x, c),
lambda x, c: -log1p(c * x) / c,
-x)
def _ppf(self, q, c): def _ppf(self, q, c):
log_sf = log1p(-q) return -boxcox1p(-q, -c)
return where((c != 0) & (-inf < log_sf), expm1(-c * log_sf) / c,
-log_sf)
def _isf(self, q, c): def _isf(self, q, c):
log_sf = log(q) return -boxcox(q, -c)
return where((c != 0) & (-inf < log_sf), expm1(-c * log_sf) / c,
-log_sf)
#vals = 1.0/c * (power(1-q, -c)-1)
# return vals
def _log1pcx(self, x, c):
''' log(1+c*x)/c incl c\to 0 limit'''
return _lazywhere((x == x) & (c != 0), (x, c),
lambda x, c: np.log1p(c * x) / c,
x)
def _fitstart(self, data): def _fitstart(self, data):
d = asarray(data) d = asarray(data)
@ -1705,14 +1694,8 @@ class genpareto_gen(rv_continuous):
munp = lambda c: __munp(n, c) munp = lambda c: __munp(n, c)
return _lazywhere(c != 0, (c,), munp, gam(n + 1)) return _lazywhere(c != 0, (c,), munp, gam(n + 1))
def _munp2(self, n, c):
k = arange(0, n + 1)
val = (-1.0 / c) ** n * \
sum(comb(n, k) * (-1) ** k / (1.0 - c * k), axis=0)
return where(c * n < 1, val, inf)
def _entropy(self, c): def _entropy(self, c):
return 1 + c return 1. + c
genpareto = genpareto_gen(a=0.0, name='genpareto') genpareto = genpareto_gen(a=0.0, name='genpareto')
@ -1788,12 +1771,15 @@ class genextreme_gen(rv_continuous):
exp(-exp(-x))*exp(-x), for c==0 exp(-exp(-x))*exp(-x), for c==0
exp(-(1-c*x)**(1/c))*(1-c*x)**(1/c-1), for x <= 1/c, c > 0 exp(-(1-c*x)**(1/c))*(1-c*x)**(1/c-1), for x <= 1/c, c > 0
Note that several sources and software packages use the opposite
convention for the sign of the shape parameter ``c``.
%(example)s %(example)s
""" """
def _argcheck(self, c): def _argcheck(self, c):
min = np.minimum # @ReservedAssignment min = np.minimum
max = np.maximum # @ReservedAssignment max = np.maximum
self.b = where(c > 0, 1.0 / max(c, _XMIN), inf) self.b = where(c > 0, 1.0 / max(c, _XMIN), inf)
self.a = where(c < 0, 1.0 / min(c, -_XMIN), -inf) self.a = where(c < 0, 1.0 / min(c, -_XMIN), -inf)
return where(abs(c) == inf, 0, 1) return where(abs(c) == inf, 0, 1)
@ -1813,7 +1799,7 @@ class genextreme_gen(rv_continuous):
logpdf = where( logpdf = where(
(cx == 1) | (cx == -inf), -inf, -pex2 + logpex2 - logex2) (cx == 1) | (cx == -inf), -inf, -pex2 + logpex2 - logex2)
putmask(logpdf, (c == 1) & (x == 1), 0.0) putmask(logpdf, (c == 1) & (x == 1), 0.0)
return exp(logpdf) return logpdf
def _cdf(self, x, c): def _cdf(self, x, c):
return exp(self._logcdf(x, c)) return exp(self._logcdf(x, c))
@ -1830,7 +1816,6 @@ class genextreme_gen(rv_continuous):
def _ppf(self, q, c): def _ppf(self, q, c):
x = -log(-log(q)) x = -log(-log(q))
#return where((c == 0) * (x == x), x, -expm1(-c * x) / c)
return _lazywhere((x == x) & (c != 0), (x, c), return _lazywhere((x == x) & (c != 0), (x, c),
lambda x, c: -expm1(-c * x) / c, x) lambda x, c: -expm1(-c * x) / c, x)
@ -1842,8 +1827,7 @@ class genextreme_gen(rv_continuous):
g4 = g(4) g4 = g(4)
g2mg12 = where(abs(c) < 1e-7, (c*pi)**2.0/6.0, g2-g1**2.0) g2mg12 = where(abs(c) < 1e-7, (c*pi)**2.0/6.0, g2-g1**2.0)
gam2k = where(abs(c) < 1e-7, pi**2.0/6.0, gam2k = where(abs(c) < 1e-7, pi**2.0/6.0,
expm1(gamln(2.0 * c + 1.0) - expm1(gamln(2.0*c+1.0)-2*gamln(c+1.0))/c**2.0)
2 * gamln(c + 1.0)) / c ** 2.0)
eps = 1e-14 eps = 1e-14
gamk = where(abs(c) < eps, -_EULER, expm1(gamln(c + 1)) / c) gamk = where(abs(c) < eps, -_EULER, expm1(gamln(c + 1)) / c)
@ -1852,14 +1836,12 @@ class genextreme_gen(rv_continuous):
# skewness # skewness
sk1 = where(c < -1./3, nan, sk1 = where(c < -1./3, nan,
np.sign(c) * (-g3 + (g2 + 2 * g2mg12) * g1) / np.sign(c)*(-g3+(g2+2*g2mg12)*g1)/((g2mg12)**(3./2.)))
((g2mg12) ** (3. / 2.)))
sk = where(abs(c) <= eps**0.29, 12*sqrt(6)*_ZETA3/pi**3, sk1) sk = where(abs(c) <= eps**0.29, 12*sqrt(6)*_ZETA3/pi**3, sk1)
# kurtosis # kurtosis
ku1 = where(c < -1./4, nan, ku1 = where(c < -1./4, nan,
(g4 + (-4 * g3 + 3 * (g2 + g2mg12) * g1) * g1) / (g4+(-4*g3+3*(g2+g2mg12)*g1)*g1)/((g2mg12)**2))
((g2mg12) ** 2))
ku = where(abs(c) <= (eps)**0.23, 12.0/5.0, ku1-3.0) ku = where(abs(c) <= (eps)**0.23, 12.0/5.0, ku1-3.0)
return m, v, sk, ku return m, v, sk, ku
@ -1918,7 +1900,7 @@ def _digammainv(y):
else: else:
x0 = 1.0 / (-y - _em) x0 = 1.0 / (-y - _em)
value, _info, ier, _msg = optimize.fsolve(func, x0, xtol=1e-11, value, info, ier, mesg = optimize.fsolve(func, x0, xtol=1e-11,
full_output=True) full_output=True)
if ier != 1: if ier != 1:
raise RuntimeError("_digammainv: fsolve failed, y = %r" % y) raise RuntimeError("_digammainv: fsolve failed, y = %r" % y)
@ -1926,11 +1908,11 @@ def _digammainv(y):
return value[0] return value[0]
# Gamma (Use MATLAB and MATHEMATICA (b=theta=scale, a=alpha=shape) definition) ## Gamma (Use MATLAB and MATHEMATICA (b=theta=scale, a=alpha=shape) definition)
# gamma(a, loc, scale) with a an integer is the Erlang distribution ## gamma(a, loc, scale) with a an integer is the Erlang distribution
# gamma(1, loc, scale) is the Exponential distribution ## gamma(1, loc, scale) is the Exponential distribution
# gamma(df/2, 0, 2) is the chi2 distribution with df degrees of freedom. ## gamma(df/2, 0, 2) is the chi2 distribution with df degrees of freedom.
class gamma_gen(rv_continuous): class gamma_gen(rv_continuous):
"""A gamma continuous random variable. """A gamma continuous random variable.
@ -2171,7 +2153,6 @@ gengamma = gengamma_gen(a=0.0, name='gengamma')
class genhalflogistic_gen(rv_continuous): class genhalflogistic_gen(rv_continuous):
"""A generalized half-logistic continuous random variable. """A generalized half-logistic continuous random variable.
%(before_notes)s %(before_notes)s
@ -2187,7 +2168,6 @@ class genhalflogistic_gen(rv_continuous):
%(example)s %(example)s
""" """
def _argcheck(self, c): def _argcheck(self, c):
self.b = 1.0 / c self.b = 1.0 / c
return (c > 0) return (c > 0)
@ -2355,7 +2335,7 @@ class halfcauchy_gen(rv_continuous):
return 2.0/pi/(1.0+x*x) return 2.0/pi/(1.0+x*x)
def _logpdf(self, x): def _logpdf(self, x):
return np.log(2.0 / pi) - np.log1p(x * x) return np.log(2.0/pi) - special.log1p(x*x)
def _cdf(self, x): def _cdf(self, x):
return 2.0/pi*arctan(x) return 2.0/pi*arctan(x)
@ -2408,8 +2388,7 @@ class halflogistic_gen(rv_continuous):
return 9*_ZETA3 return 9*_ZETA3
if n == 4: if n == 4:
return 7*pi**4 / 15.0 return 7*pi**4 / 15.0
return (2 * (1 - pow(2.0, 1 - n)) * special.gamma(n + 1) * return 2*(1-pow(2.0, 1-n))*special.gamma(n+1)*special.zeta(n, 1)
special.zeta(n, 1))
def _entropy(self): def _entropy(self):
return 2-log(2) return 2-log(2)
@ -2450,8 +2429,7 @@ class halfnorm_gen(rv_continuous):
return special.ndtri((1+q)/2.0) return special.ndtri((1+q)/2.0)
def _stats(self): def _stats(self):
return ( return (sqrt(2.0/pi), 1-2.0/pi, sqrt(2)*(4-pi)/(pi-2)**1.5,
sqrt(2.0 / pi), 1 - 2.0 / pi, sqrt(2) * (4 - pi) / (pi - 2) ** 1.5,
8*(pi-3)/(pi-2)**2) 8*(pi-3)/(pi-2)**2)
def _entropy(self): def _entropy(self):
@ -2513,8 +2491,7 @@ class gausshyper_gen(rv_continuous):
def _pdf(self, x, a, b, c, z): def _pdf(self, x, a, b, c, z):
Cinv = gam(a)*gam(b)/gam(a+b)*special.hyp2f1(c, a, a+b, -z) Cinv = gam(a)*gam(b)/gam(a+b)*special.hyp2f1(c, a, a+b, -z)
return (1.0 / Cinv * x ** (a - 1.0) * (1.0 - x) ** (b - 1.0) / return 1.0/Cinv * x**(a-1.0) * (1.0-x)**(b-1.0) / (1.0+z*x)**c
(1.0 + z * x) ** c)
def _munp(self, n, a, b, c, z): def _munp(self, n, a, b, c, z):
fac = special.beta(n+a, b) / special.beta(a, b) fac = special.beta(n+a, b) / special.beta(a, b)
@ -2600,12 +2577,10 @@ class invgauss_gen(rv_continuous):
return mtrand.wald(mu, 1.0, size=self._size) return mtrand.wald(mu, 1.0, size=self._size)
def _pdf(self, x, mu): def _pdf(self, x, mu):
return (1.0 / sqrt(2 * pi * x ** 3.0) * return 1.0/sqrt(2*pi*x**3.0)*exp(-1.0/(2*x)*((x-mu)/mu)**2)
exp(-1.0 / (2 * x) * ((x - mu) / mu) ** 2))
def _logpdf(self, x, mu): def _logpdf(self, x, mu):
return (-0.5 * log(2 * pi) - 1.5 * log(x) - return -0.5*log(2*pi) - 1.5*log(x) - ((x-mu)/mu)**2/(2*x)
((x - mu) / mu) ** 2 / (2 * x))
def _cdf(self, x, mu): def _cdf(self, x, mu):
fac = sqrt(1.0/x) fac = sqrt(1.0/x)
@ -2693,7 +2668,7 @@ class johnsonsb_gen(rv_continuous):
def _ppf(self, q, a, b): def _ppf(self, q, a, b):
return 1.0 / (1 + exp(-1.0 / b * (_norm_ppf(q) - a))) return 1.0 / (1 + exp(-1.0 / b * (_norm_ppf(q) - a)))
johnsonsb = johnsonsb_gen(a=0.0, b=1.0, name='johnsonb') johnsonsb = johnsonsb_gen(a=0.0, b=1.0, name='johnsonsb')
class johnsonsu_gen(rv_continuous): class johnsonsu_gen(rv_continuous):
@ -2789,15 +2764,16 @@ class levy_gen(rv_continuous):
%(example)s %(example)s
""" """
def _pdf(self, x): def _pdf(self, x):
return 1 / sqrt(2*pi*x) / x * exp(-1/(2*x)) return 1 / sqrt(2*pi*x) / x * exp(-1/(2*x))
def _cdf(self, x): def _cdf(self, x):
return 2 * (1 - _norm_cdf(1 / sqrt(x))) # Equivalent to 2*norm.sf(sqrt(1/x))
return special.erfc(sqrt(0.5 / x))
def _ppf(self, q): def _ppf(self, q):
val = _norm_ppf(1 - q / 2.0) # Equivalent to 1.0/(norm.isf(q/2)**2) or 0.5/(erfcinv(q)**2)
val = -special.ndtri(q/2)
return 1.0 / (val * val) return 1.0 / (val * val)
def _stats(self): def _stats(self):
@ -2866,21 +2842,17 @@ class levy_stable_gen(rv_continuous):
TH = uniform.rvs(loc=-pi/2.0, scale=pi, size=sz) TH = uniform.rvs(loc=-pi/2.0, scale=pi, size=sz)
W = expon.rvs(size=sz) W = expon.rvs(size=sz)
if alpha == 1: if alpha == 1:
return (2. / pi * (pi / 2 + beta * TH) * tan(TH) - return 2/pi*(pi/2+beta*TH)*tan(TH)-beta*log((pi/2*W*cos(TH))/(pi/2+beta*TH))
beta * log((pi / 2 * W * cos(TH)) / (pi / 2 + beta * TH)))
ialpha = 1.0/alpha ialpha = 1.0/alpha
aTH = alpha*TH aTH = alpha*TH
if beta == 0: if beta == 0:
return (W / (cos(TH) / tan(aTH) + sin(TH)) * return W/(cos(TH)/tan(aTH)+sin(TH))*((cos(aTH)+sin(aTH)*tan(TH))/W)**ialpha
((cos(aTH) + sin(aTH) * tan(TH)) / W) ** ialpha)
val0 = beta*tan(pi*alpha/2) val0 = beta*tan(pi*alpha/2)
th0 = arctan(val0)/alpha th0 = arctan(val0)/alpha
val3 = W/(cos(TH)/tan(alpha*(th0+TH))+sin(TH)) val3 = W/(cos(TH)/tan(alpha*(th0+TH))+sin(TH))
res3 = val3 * \ res3 = val3*((cos(aTH)+sin(aTH)*tan(TH)-val0*(sin(aTH)-cos(aTH)*tan(TH)))/W)**ialpha
((cos(aTH) + sin(aTH) * tan(TH) - val0 *
(sin(aTH) - cos(aTH) * tan(TH))) / W) ** ialpha
return res3 return res3
def _argcheck(self, alpha, beta): def _argcheck(self, alpha, beta):
@ -2924,7 +2896,7 @@ class logistic_gen(rv_continuous):
return special.expit(x) return special.expit(x)
def _ppf(self, q): def _ppf(self, q):
return -log(1.0 / q - 1) return -log1p(-q) + log(q)
def _stats(self): def _stats(self):
return 0, pi*pi/3.0, 0, 6.0/5.0 return 0, pi*pi/3.0, 0, 6.0/5.0
@ -3006,8 +2978,7 @@ class loglaplace_gen(rv_continuous):
return where(x < 1, 0.5*x**c, 1-0.5*x**(-c)) return where(x < 1, 0.5*x**c, 1-0.5*x**(-c))
def _ppf(self, q, c): def _ppf(self, q, c):
return where(q < 0.5, (2.0 * q) ** (1.0 / c), return where(q < 0.5, (2.0*q)**(1.0/c), (2*(1.0-q))**(-1.0/c))
(2 * (1.0 - q)) ** (-1.0 / c))
def _munp(self, n, c): def _munp(self, n, c):
return c**2 / (c**2 - n**2) return c**2 / (c**2 - n**2)
@ -3018,8 +2989,7 @@ loglaplace = loglaplace_gen(a=0.0, name='loglaplace')
def _lognorm_logpdf(x, s): def _lognorm_logpdf(x, s):
return (-log(x) ** 2 / (2 * s ** 2) + np.where(x == 0, 0, return -log(x)**2 / (2*s**2) + np.where(x == 0, 0, -log(s*x*sqrt(2*pi)))
-log(s * x * sqrt(2 * pi))))
class lognorm_gen(rv_continuous): class lognorm_gen(rv_continuous):
@ -3164,9 +3134,7 @@ class maxwell_gen(rv_continuous):
def _stats(self): def _stats(self):
val = 3*pi-8 val = 3*pi-8
return ( return (2*sqrt(2.0/pi), 3-8/pi, sqrt(2)*(32-10*pi)/val**1.5,
2 * sqrt(2.0 / pi), 3 - 8 / pi, sqrt(
2) * (32 - 10 * pi) / val ** 1.5,
(-12*pi*pi + 160*pi - 384) / val**2.0) (-12*pi*pi + 160*pi - 384) / val**2.0)
def _entropy(self): def _entropy(self):
@ -3220,8 +3188,7 @@ class nakagami_gen(rv_continuous):
""" """
def _pdf(self, x, nu): def _pdf(self, x, nu):
return (2 * nu ** nu / gam(nu) * (x ** (2 * nu - 1.0)) * return 2*nu**nu/gam(nu)*(x**(2*nu-1.0))*exp(-nu*x*x)
exp(-nu * x * x))
def _cdf(self, x, nu): def _cdf(self, x, nu):
return special.gammainc(nu, nu*x*x) return special.gammainc(nu, nu*x*x)
@ -3295,12 +3262,12 @@ class ncf_gen(rv_continuous):
----- -----
The probability density function for `ncf` is:: The probability density function for `ncf` is::
ncf.pdf(x, df1, df2, nc) = exp(nc/2 + nc*df1*x/(2*(df1*x+df2))) ncf.pdf(x, df1, df2, nc) = exp(nc/2 + nc*df1*x/(2*(df1*x+df2))) *
* df1**(df1/2) * df2**(df2/2) * x**(df1/2-1) df1**(df1/2) * df2**(df2/2) * x**(df1/2-1) *
* (df2+df1*x)**(-(df1+df2)/2) (df2+df1*x)**(-(df1+df2)/2) *
* gamma(df1/2)*gamma(1+df2/2) gamma(df1/2)*gamma(1+df2/2) *
* L^{v1/2-1}^{v2/2}(-nc*v1*x/(2*(v1*x+v2))) L^{v1/2-1}^{v2/2}(-nc*v1*x/(2*(v1*x+v2))) /
/ (B(v1/2, v2/2) * gamma((v1+v2)/2)) (B(v1/2, v2/2) * gamma((v1+v2)/2))
for ``df1, df2, nc > 0``. for ``df1, df2, nc > 0``.
@ -3312,17 +3279,15 @@ class ncf_gen(rv_continuous):
def _pdf_skip(self, x, dfn, dfd, nc): def _pdf_skip(self, x, dfn, dfd, nc):
n1, n2 = dfn, dfd n1, n2 = dfn, dfd
term = -nc / 2 + nc * n1 * x / \ term = -nc/2+nc*n1*x/(2*(n2+n1*x)) + gamln(n1/2.)+gamln(1+n2/2.)
(2 * (n2 + n1 * x)) + gamln(n1 / 2.) + gamln(1 + n2 / 2.)
term -= gamln((n1+n2)/2.0) term -= gamln((n1+n2)/2.0)
Px = exp(term) Px = exp(term)
Px *= n1**(n1/2) * n2**(n2/2) * x**(n1/2-1) Px *= n1**(n1/2) * n2**(n2/2) * x**(n1/2-1)
Px *= (n2+n1*x)**(-(n1+n2)/2) Px *= (n2+n1*x)**(-(n1+n2)/2)
Px *= special.assoc_laguerre( Px *= special.assoc_laguerre(-nc*n1*x/(2.0*(n2+n1*x)), n2/2, n1/2-1)
-nc * n1 * x / (2.0 * (n2 + n1 * x)), n2 / 2, n1 / 2 - 1)
Px /= special.beta(n1/2, n2/2) Px /= special.beta(n1/2, n2/2)
# this function does not have a return # This function does not have a return. Drop it for now, the generic
# drop it for now, the generic function seems to work ok # function seems to work OK.
def _cdf(self, x, dfn, dfd, nc): def _cdf(self, x, dfn, dfd, nc):
return special.ncfdtr(dfn, dfd, nc, x) return special.ncfdtr(dfn, dfd, nc, x)
@ -3376,7 +3341,7 @@ class t_gen(rv_continuous):
def _logpdf(self, x, df): def _logpdf(self, x, df):
r = df*1.0 r = df*1.0
lPx = gamln((r+1)/2)-gamln(r/2) lPx = gamln((r+1)/2)-gamln(r/2)
lPx -= 0.5 * log(r * pi) + (r + 1) / 2 * log1p((x ** 2) / r) lPx -= 0.5*log(r*pi) + (r+1)/2*log(1+(x**2)/r)
return lPx return lPx
def _cdf(self, x, df): def _cdf(self, x, df):
@ -3431,8 +3396,7 @@ class nct_gen(rv_continuous):
ncx2 = nc*nc*x2 ncx2 = nc*nc*x2
fac1 = n + x2 fac1 = n + x2
trm1 = n/2.*log(n) + gamln(n+1) trm1 = n/2.*log(n) + gamln(n+1)
trm1 -= n * \ trm1 -= n*log(2)+nc*nc/2.+(n/2.)*log(fac1)+gamln(n/2.)
log(2) + nc * nc / 2. + (n / 2.) * log(fac1) + gamln(n / 2.)
Px = exp(trm1) Px = exp(trm1)
valF = ncx2 / (2*fac1) valF = ncx2 / (2*fac1)
trm1 = sqrt(2)*nc*x*special.hyp1f1(n/2+1, 1.5, valF) trm1 = sqrt(2)*nc*x*special.hyp1f1(n/2+1, 1.5, valF)
@ -3448,6 +3412,20 @@ class nct_gen(rv_continuous):
def _ppf(self, q, df, nc): def _ppf(self, q, df, nc):
return special.nctdtrit(df, nc, q) return special.nctdtrit(df, nc, q)
def _fitstart(self, data):
me = np.mean(data)
# g2 = mode(data)[0]
sa = np.std(data)
def func(df):
return ((df-2)*(4 * df - 1)-(4 * df - 1)*df/(sa**2+me**2) +
me**2/(sa**2+me**2) * (df*(4 * df - 1) - 3*df))
df0 = np.maximum(2*sa/(sa-1), 1)
df = optimize.fsolve(func, df0)
mu = me*(1 - 3 / (4 * df - 1))
return super(nct_gen, self)._fitstart(data, args=(df, mu))
def _stats(self, df, nc, moments='mv'): def _stats(self, df, nc, moments='mv'):
# #
# See D. Hogben, R.S. Pinkham, and M.B. Wilk, # See D. Hogben, R.S. Pinkham, and M.B. Wilk,
@ -3455,7 +3433,7 @@ class nct_gen(rv_continuous):
# Biometrika 48, p. 465 (2961). # Biometrika 48, p. 465 (2961).
# e.g. http://www.jstor.org/stable/2332772 (gated) # e.g. http://www.jstor.org/stable/2332772 (gated)
# #
_mu, _mu2, g1, g2 = None, None, None, None mu, mu2, g1, g2 = None, None, None, None
gfac = gam(df/2.-0.5) / gam(df/2.) gfac = gam(df/2.-0.5) / gam(df/2.)
c11 = sqrt(df/2.) * gfac c11 = sqrt(df/2.) * gfac
@ -3679,7 +3657,7 @@ class pearson3_gen(rv_continuous):
# + (alpha - 1)*log(beta*(x - zeta)) + (a - 1)*log(x) # + (alpha - 1)*log(beta*(x - zeta)) + (a - 1)*log(x)
# - beta*(x - zeta) - x # - beta*(x - zeta) - x
# - gamln(alpha) - gamln(a) # - gamln(alpha) - gamln(a)
ans, x, transx, skew, mask, invmask, beta, alpha, _zeta = ( ans, x, transx, skew, mask, invmask, beta, alpha, zeta = (
self._preprocess(x, skew)) self._preprocess(x, skew))
ans[mask] = np.log(_norm_pdf(x[mask])) ans[mask] = np.log(_norm_pdf(x[mask]))
@ -3687,7 +3665,7 @@ class pearson3_gen(rv_continuous):
return ans return ans
def _cdf(self, x, skew): def _cdf(self, x, skew):
ans, x, transx, skew, mask, invmask, beta, alpha, _zeta = ( ans, x, transx, skew, mask, invmask, beta, alpha, zeta = (
self._preprocess(x, skew)) self._preprocess(x, skew))
ans[mask] = _norm_cdf(x[mask]) ans[mask] = _norm_cdf(x[mask])
@ -3695,7 +3673,7 @@ class pearson3_gen(rv_continuous):
return ans return ans
def _rvs(self, skew): def _rvs(self, skew):
_ans, _x, _transx, skew, mask, _invmask, beta, alpha, zeta = ( ans, x, transx, skew, mask, invmask, beta, alpha, zeta = (
self._preprocess([0], skew)) self._preprocess([0], skew))
if mask[0]: if mask[0]:
return mtrand.standard_normal(self._size) return mtrand.standard_normal(self._size)
@ -3726,7 +3704,7 @@ class powerlaw_gen(rv_continuous):
for ``0 <= x <= 1``, ``a > 0``. for ``0 <= x <= 1``, ``a > 0``.
`powerlaw` is a special case of `beta` with ``d == 1``. `powerlaw` is a special case of `beta` with ``b == 1``.
%(example)s %(example)s
@ -3835,8 +3813,7 @@ class rdist_gen(rv_continuous):
""" """
def _pdf(self, x, c): def _pdf(self, x, c):
return (np.power((1.0 - x ** 2), c / 2.0 - 1) / return np.power((1.0 - x**2), c / 2.0 - 1) / special.beta(0.5, c / 2.0)
special.beta(0.5, c / 2.0))
def _cdf(self, x, c): def _cdf(self, x, c):
term1 = x / special.beta(0.5, c / 2.0) term1 = x / special.beta(0.5, c / 2.0)
@ -4023,6 +4000,17 @@ class reciprocal_gen(rv_continuous):
def _munp(self, n, a, b): def _munp(self, n, a, b):
return 1.0/self.d / n * (pow(b*1.0, n) - pow(a*1.0, n)) return 1.0/self.d / n * (pow(b*1.0, n) - pow(a*1.0, n))
def _fitstart(self, data):
a = np.min(data)
a -= 0.01*np.abs(a)
b = np.max(data)
b += 0.01*np.abs(b)
if a <= 0:
da = np.abs(a)+0.001
a += da
b += da
return super(reciprocal_gen, self)._fitstart(data, args=(a, b))
def _entropy(self, a, b): def _entropy(self, a, b):
return 0.5*log(a*b)+log(log(b/a)) return 0.5*log(a*b)+log(log(b/a))
reciprocal = reciprocal_gen(name="reciprocal") reciprocal = reciprocal_gen(name="reciprocal")
@ -4087,19 +4075,16 @@ class recipinvgauss_gen(rv_continuous):
return 1.0/mtrand.wald(mu, 1.0, size=self._size) return 1.0/mtrand.wald(mu, 1.0, size=self._size)
def _pdf(self, x, mu): def _pdf(self, x, mu):
return (1.0 / sqrt(2 * pi * x) * exp(-(1 - mu * x) ** 2.0 / return 1.0/sqrt(2*pi*x)*exp(-(1-mu*x)**2.0 / (2*x*mu**2.0))
(2 * x * mu ** 2.0)))
def _logpdf(self, x, mu): def _logpdf(self, x, mu):
return (-(1 - mu * x) ** 2.0 / (2 * x * mu ** 2.0) - return -(1-mu*x)**2.0 / (2*x*mu**2.0) - 0.5*log(2*pi*x)
0.5 * log(2 * pi * x))
def _cdf(self, x, mu): def _cdf(self, x, mu):
trm1 = 1.0/mu - x trm1 = 1.0/mu - x
trm2 = 1.0/mu + x trm2 = 1.0/mu + x
isqx = 1.0/sqrt(x) isqx = 1.0/sqrt(x)
return (1.0 - _norm_cdf(isqx * trm1) - return 1.0-_norm_cdf(isqx*trm1)-exp(2.0/mu)*_norm_cdf(-isqx*trm2)
exp(2.0 / mu) * _norm_cdf(-isqx * trm2))
recipinvgauss = recipinvgauss_gen(a=0.0, name='recipinvgauss') recipinvgauss = recipinvgauss_gen(a=0.0, name='recipinvgauss')
@ -4167,11 +4152,8 @@ class triang_gen(rv_continuous):
return where(q < c, sqrt(c*q), 1-sqrt((1-c)*(1-q))) return where(q < c, sqrt(c*q), 1-sqrt((1-c)*(1-q)))
def _stats(self, c): def _stats(self, c):
return ((c + 1.0) / 3.0, return (c+1.0)/3.0, (1.0-c+c*c)/18, sqrt(2)*(2*c-1)*(c+1)*(c-2) / \
(1.0 - c + c * c) / 18, (5 * np.power((1.0-c+c*c), 1.5)), -3.0/5.0
sqrt(2) * (2 * c - 1) * (c + 1) * (c - 2) /
(5 * np.power((1.0 - c + c * c), 1.5)),
-3.0 / 5.0)
def _entropy(self, c): def _entropy(self, c):
return 0.5-log(2) return 0.5-log(2)
@ -4410,7 +4392,7 @@ class wald_gen(invgauss_gen):
----- -----
The probability density function for `wald` is:: The probability density function for `wald` is::
wald.pdf(x, a) = 1/sqrt(2*pi*x**3) * exp(-(x-1)**2/(2*x)) wald.pdf(x) = 1/sqrt(2*pi*x**3) * exp(-(x-1)**2/(2*x))
for ``x > 0``. for ``x > 0``.
@ -4486,3 +4468,10 @@ class wrapcauchy_gen(rv_continuous):
def _entropy(self, c): def _entropy(self, c):
return log(2*pi*(1-c*c)) return log(2*pi*(1-c*c))
wrapcauchy = wrapcauchy_gen(a=0.0, b=2*pi, name='wrapcauchy') wrapcauchy = wrapcauchy_gen(a=0.0, b=2*pi, name='wrapcauchy')
# Collect names of classes and objects in this module.
pairs = list(globals().items())
_distn_names, _distn_gen_names = get_distribution_names(pairs, rv_continuous)
__all__ = _distn_names + _distn_gen_names

@ -13,17 +13,10 @@ import numpy as np
import numpy.random as mtrand import numpy.random as mtrand
from ._distn_infrastructure import ( from ._distn_infrastructure import (
rv_discrete, _lazywhere, _ncx2_pdf, _ncx2_cdf) rv_discrete, _lazywhere, _ncx2_pdf, _ncx2_cdf, get_distribution_names)
__all__ = [
'binom', 'bernoulli', 'nbinom', 'geom', 'hypergeom',
'logser', 'poisson', 'planck', 'boltzmann', 'randint',
'zipf', 'dlaplace', 'skellam'
]
class binom_gen(rv_discrete): class binom_gen(rv_discrete):
"""A binomial discrete random variable. """A binomial discrete random variable.
%(before_notes)s %(before_notes)s
@ -41,7 +34,6 @@ class binom_gen(rv_discrete):
%(example)s %(example)s
""" """
def _rvs(self, n, p): def _rvs(self, n, p):
return mtrand.binomial(n, p, self._size) return mtrand.binomial(n, p, self._size)
@ -68,16 +60,19 @@ class binom_gen(rv_discrete):
def _ppf(self, q, n, p): def _ppf(self, q, n, p):
vals = ceil(special.bdtrik(q, n, p)) vals = ceil(special.bdtrik(q, n, p))
vals1 = vals - 1 vals1 = np.maximum(vals - 1, 0)
temp = special.bdtr(vals1, n, p) temp = special.bdtr(vals1, n, p)
return np.where(temp >= q, vals1, vals) return np.where(temp >= q, vals1, vals)
def _stats(self, n, p): def _stats(self, n, p, moments='mv'):
q = 1.0 - p q = 1.0 - p
mu = n * p mu = n * p
var = n * p * q var = n * p * q
g1 = (q - p) / sqrt(n * p * q) g1, g2 = None, None
g2 = (1.0 - 6 * p * q) / (n * p * q) if 's' in moments:
g1 = (q - p) / sqrt(var)
if 'k' in moments:
g2 = (1.0 - 6*p*q) / var
return mu, var, g1, g2 return mu, var, g1, g2
def _entropy(self, n, p): def _entropy(self, n, p):
@ -89,7 +84,6 @@ binom = binom_gen(name='binom')
class bernoulli_gen(binom_gen): class bernoulli_gen(binom_gen):
"""A Bernoulli discrete random variable. """A Bernoulli discrete random variable.
%(before_notes)s %(before_notes)s
@ -108,7 +102,6 @@ class bernoulli_gen(binom_gen):
%(example)s %(example)s
""" """
def _rvs(self, p): def _rvs(self, p):
return binom_gen._rvs(self, 1, p) return binom_gen._rvs(self, 1, p)
@ -140,7 +133,6 @@ bernoulli = bernoulli_gen(b=1, name='bernoulli')
class nbinom_gen(rv_discrete): class nbinom_gen(rv_discrete):
"""A negative binomial discrete random variable. """A negative binomial discrete random variable.
%(before_notes)s %(before_notes)s
@ -158,7 +150,6 @@ class nbinom_gen(rv_discrete):
%(example)s %(example)s
""" """
def _rvs(self, n, p): def _rvs(self, n, p):
return mtrand.negative_binomial(n, p, self._size) return mtrand.negative_binomial(n, p, self._size)
@ -199,7 +190,6 @@ nbinom = nbinom_gen(name='nbinom')
class geom_gen(rv_discrete): class geom_gen(rv_discrete):
"""A geometric discrete random variable. """A geometric discrete random variable.
%(before_notes)s %(before_notes)s
@ -217,7 +207,6 @@ class geom_gen(rv_discrete):
%(example)s %(example)s
""" """
def _rvs(self, p): def _rvs(self, p):
return mtrand.geometric(p, size=self._size) return mtrand.geometric(p, size=self._size)
@ -257,7 +246,6 @@ geom = geom_gen(a=1, name='geom', longname="A geometric")
class hypergeom_gen(rv_discrete): class hypergeom_gen(rv_discrete):
"""A hypergeometric discrete random variable. """A hypergeometric discrete random variable.
The hypergeometric distribution models drawing objects from a bin. The hypergeometric distribution models drawing objects from a bin.
@ -277,6 +265,7 @@ class hypergeom_gen(rv_discrete):
Examples Examples
-------- --------
>>> from scipy.stats import hypergeom >>> from scipy.stats import hypergeom
>>> import matplotlib.pyplot as plt
Suppose we have a collection of 20 animals, of which 7 are dogs. Then if Suppose we have a collection of 20 animals, of which 7 are dogs. Then if
we want to know the probability of finding a given number of dogs if we we want to know the probability of finding a given number of dogs if we
@ -307,7 +296,6 @@ class hypergeom_gen(rv_discrete):
>>> R = hypergeom.rvs(M, n, N, size=10) >>> R = hypergeom.rvs(M, n, N, size=10)
""" """
def _rvs(self, M, n, N): def _rvs(self, M, n, N):
return mtrand.hypergeometric(n, M-n, N, size=self._size) return mtrand.hypergeometric(n, M-n, N, size=self._size)
@ -321,9 +309,9 @@ class hypergeom_gen(rv_discrete):
def _logpmf(self, k, M, n, N): def _logpmf(self, k, M, n, N):
tot, good = M, n tot, good = M, n
bad = tot - good bad = tot - good
return gamln(good + 1) - gamln(good - k + 1) - gamln(k + 1) + \ return gamln(good+1) - gamln(good-k+1) - gamln(k+1) + gamln(bad+1) \
gamln(bad + 1) - gamln(bad - N + k + 1) - gamln(N - k + 1) - \ - gamln(bad-N+k+1) - gamln(N-k+1) - gamln(tot+1) + gamln(tot-N+1) \
gamln(tot + 1) + gamln(tot - N + 1) + gamln(N + 1) + gamln(N+1)
def _pmf(self, k, M, n, N): def _pmf(self, k, M, n, N):
# same as the following but numerically more precise # same as the following but numerically more precise
@ -339,8 +327,7 @@ class hypergeom_gen(rv_discrete):
mu = N*p mu = N*p
var = m*n*N*(M - N)*1.0/(M*M*(M-1)) var = m*n*N*(M - N)*1.0/(M*M*(M-1))
g1 = (m - n) * (M - 2 * N) / (M - 2.0) * \ g1 = (m - n)*(M-2*N) / (M-2.0) * sqrt((M-1.0) / (m*n*N*(M-N)))
sqrt((M - 1.0) / (m * n * N * (M - N)))
g2 = M*(M+1) - 6.*N*(M-N) - 6.*n*m g2 = M*(M+1) - 6.*N*(M-N) - 6.*n*m
g2 *= (M-1)*M*M g2 *= (M-1)*M*M
@ -372,7 +359,6 @@ hypergeom = hypergeom_gen(name='hypergeom')
# FIXME: Fails _cdfvec # FIXME: Fails _cdfvec
class logser_gen(rv_discrete): class logser_gen(rv_discrete):
"""A Logarithmic (Log-Series, Series) discrete random variable. """A Logarithmic (Log-Series, Series) discrete random variable.
%(before_notes)s %(before_notes)s
@ -390,7 +376,6 @@ class logser_gen(rv_discrete):
%(example)s %(example)s
""" """
def _rvs(self, p): def _rvs(self, p):
# looks wrong for p>0.5, too few k=1 # looks wrong for p>0.5, too few k=1
# trying to use generic is worse, no k=1 at all # trying to use generic is worse, no k=1 at all
@ -420,7 +405,6 @@ logser = logser_gen(a=1, name='logser', longname='A logarithmic')
class poisson_gen(rv_discrete): class poisson_gen(rv_discrete):
"""A Poisson discrete random variable. """A Poisson discrete random variable.
%(before_notes)s %(before_notes)s
@ -438,7 +422,6 @@ class poisson_gen(rv_discrete):
%(example)s %(example)s
""" """
def _rvs(self, mu): def _rvs(self, mu):
return mtrand.poisson(mu, self._size) return mtrand.poisson(mu, self._size)
@ -459,9 +442,9 @@ class poisson_gen(rv_discrete):
def _ppf(self, q, mu): def _ppf(self, q, mu):
vals = ceil(special.pdtrik(q, mu)) vals = ceil(special.pdtrik(q, mu))
vals1 = vals - 1 vals1 = np.maximum(vals - 1, 0)
temp = special.pdtr(vals1, mu) temp = special.pdtr(vals1, mu)
return np.where((temp >= q), vals1, vals) return np.where(temp >= q, vals1, vals)
def _stats(self, mu): def _stats(self, mu):
var = mu var = mu
@ -473,7 +456,6 @@ poisson = poisson_gen(name="poisson", longname='A Poisson')
class planck_gen(rv_discrete): class planck_gen(rv_discrete):
"""A Planck discrete exponential random variable. """A Planck discrete exponential random variable.
%(before_notes)s %(before_notes)s
@ -491,7 +473,6 @@ class planck_gen(rv_discrete):
%(example)s %(example)s
""" """
def _argcheck(self, lambda_): def _argcheck(self, lambda_):
if (lambda_ > 0): if (lambda_ > 0):
self.a = 0 self.a = 0
@ -533,7 +514,6 @@ planck = planck_gen(name='planck', longname='A discrete exponential ')
class boltzmann_gen(rv_discrete): class boltzmann_gen(rv_discrete):
"""A Boltzmann (Truncated Discrete Exponential) random variable. """A Boltzmann (Truncated Discrete Exponential) random variable.
%(before_notes)s %(before_notes)s
@ -551,7 +531,6 @@ class boltzmann_gen(rv_discrete):
%(example)s %(example)s
""" """
def _pmf(self, k, lambda_, N): def _pmf(self, k, lambda_, N):
fact = (expm1(-lambda_)) / (expm1(-lambda_ * N)) fact = (expm1(-lambda_)) / (expm1(-lambda_ * N))
return fact * exp(-lambda_ * k) return fact * exp(-lambda_ * k)
@ -576,8 +555,7 @@ class boltzmann_gen(rv_discrete):
trm2 = (z*trm**2 - N*N*zN) trm2 = (z*trm**2 - N*N*zN)
g1 = z*(1+z)*trm**3 - N**3*zN*(1+zN) g1 = z*(1+z)*trm**3 - N**3*zN*(1+zN)
g1 = g1 / trm2**(1.5) g1 = g1 / trm2**(1.5)
g2 = z * (1 + 4 * z + z * z) * \ g2 = z*(1+4*z+z*z)*trm**4 - N**4 * zN*(1+4*zN+zN*zN)
trm ** 4 - N ** 4 * zN * (1 + 4 * zN + zN * zN)
g2 = g2 / trm2 / trm2 g2 = g2 / trm2 / trm2
return mu, var, g1, g2 return mu, var, g1, g2
boltzmann = boltzmann_gen(name='boltzmann', boltzmann = boltzmann_gen(name='boltzmann',
@ -585,7 +563,6 @@ boltzmann = boltzmann_gen(name='boltzmann',
class randint_gen(rv_discrete): class randint_gen(rv_discrete):
"""A uniform discrete random variable. """A uniform discrete random variable.
%(before_notes)s %(before_notes)s
@ -606,7 +583,6 @@ class randint_gen(rv_discrete):
%(example)s %(example)s
""" """
def _argcheck(self, low, high): def _argcheck(self, low, high):
self.a = low self.a = low
self.b = high - 1 self.b = high - 1
@ -648,9 +624,22 @@ randint = randint_gen(name='randint', longname='A discrete uniform '
'(random integer)') '(random integer)')
def harmonic(n,r):
return 1./n + special.polygamma(r-1, n)/special.gamma(r) + special.zeta(r, 1)
def H(n):
"""Returns the n-th harmonic number.
http://en.wikipedia.org/wiki/Harmonic_number
"""
# Euler-Mascheroni constant
gamma = 0.57721566490153286060651209008240243104215933593992
return gamma + special.digamma(n+1)
# FIXME: problems sampling. # FIXME: problems sampling.
class zipf_gen(rv_discrete): class zipf_gen(rv_discrete):
"""A Zipf discrete random variable. """A Zipf discrete random variable.
%(before_notes)s %(before_notes)s
@ -668,7 +657,6 @@ class zipf_gen(rv_discrete):
%(example)s %(example)s
""" """
def _rvs(self, a): def _rvs(self, a):
return mtrand.zipf(a, size=self._size) return mtrand.zipf(a, size=self._size)
@ -688,7 +676,6 @@ zipf = zipf_gen(a=1, name='zipf', longname='A Zipf')
class dlaplace_gen(rv_discrete): class dlaplace_gen(rv_discrete):
"""A Laplacian discrete random variable. """A Laplacian discrete random variable.
%(before_notes)s %(before_notes)s
@ -706,7 +693,6 @@ class dlaplace_gen(rv_discrete):
%(example)s %(example)s
""" """
def _pmf(self, k, a): def _pmf(self, k, a):
return tanh(a/2.0) * exp(-a * abs(k)) return tanh(a/2.0) * exp(-a * abs(k))
@ -736,7 +722,6 @@ dlaplace = dlaplace_gen(a=-np.inf,
class skellam_gen(rv_discrete): class skellam_gen(rv_discrete):
"""A Skellam discrete random variable. """A Skellam discrete random variable.
%(before_notes)s %(before_notes)s
@ -762,7 +747,6 @@ class skellam_gen(rv_discrete):
%(example)s %(example)s
""" """
def _rvs(self, mu1, mu2): def _rvs(self, mu1, mu2):
n = self._size n = self._size
return mtrand.poisson(mu1, n) - mtrand.poisson(mu2, n) return mtrand.poisson(mu1, n) - mtrand.poisson(mu2, n)
@ -788,3 +772,10 @@ class skellam_gen(rv_discrete):
g2 = 1 / var g2 = 1 / var
return mean, var, g1, g2 return mean, var, g1, g2
skellam = skellam_gen(a=-np.inf, name="skellam", longname='A Skellam') skellam = skellam_gen(a=-np.inf, name="skellam", longname='A Skellam')
# Collect names of classes and objects in this module.
pairs = list(globals().items())
_distn_names, _distn_gen_names = get_distribution_names(pairs, rv_discrete)
__all__ = _distn_names + _distn_gen_names

@ -12,9 +12,11 @@ import re
import inspect import inspect
import types import types
import warnings import warnings
from scipy.misc import doccer from scipy.misc import doccer
from ._distr_params import distcont, distdiscrete
from scipy.special import xlogy, chndtr, gammaln, hyp0f1 from scipy.special import xlogy, chndtr, gammaln, hyp0f1, comb
# for root finding for discrete distribution ppf, and max likelihood estimation # for root finding for discrete distribution ppf, and max likelihood estimation
from scipy import optimize from scipy import optimize
@ -23,11 +25,11 @@ from scipy import optimize
from scipy import integrate from scipy import integrate
# to approximate the pdf of a continuous distribution given its cdf # to approximate the pdf of a continuous distribution given its cdf
from scipy.misc import comb, derivative # @UnresolvedImport from scipy.misc import derivative
from numpy import (arange, putmask, ravel, take, ones, sum, shape, from numpy import (arange, putmask, ravel, take, ones, sum, shape,
product, reshape, zeros, floor, logical_and, log, sqrt, exp, product, reshape, zeros, floor, logical_and, log, sqrt, exp,
ndarray, newaxis) ndarray)
from numpy import (place, any, argsort, argmax, vectorize, from numpy import (place, any, argsort, argmax, vectorize,
asarray, nan, inf, isinf, NINF, empty) asarray, nan, inf, isinf, NINF, empty)
@ -55,91 +57,91 @@ docheaders = {'methods': """\nMethods\n-------\n""",
'examples': """\nExamples\n--------\n"""} 'examples': """\nExamples\n--------\n"""}
_doc_rvs = """\ _doc_rvs = """\
rvs(%(shapes)s, loc=0, scale=1, size=1) ``rvs(%(shapes)s, loc=0, scale=1, size=1)``
Random variates. Random variates.
""" """
_doc_pdf = """\ _doc_pdf = """\
pdf(x, %(shapes)s, loc=0, scale=1) ``pdf(x, %(shapes)s, loc=0, scale=1)``
Probability density function. Probability density function.
""" """
_doc_logpdf = """\ _doc_logpdf = """\
logpdf(x, %(shapes)s, loc=0, scale=1) ``logpdf(x, %(shapes)s, loc=0, scale=1)``
Log of the probability density function. Log of the probability density function.
""" """
_doc_pmf = """\ _doc_pmf = """\
pmf(x, %(shapes)s, loc=0, scale=1) ``pmf(x, %(shapes)s, loc=0, scale=1)``
Probability mass function. Probability mass function.
""" """
_doc_logpmf = """\ _doc_logpmf = """\
logpmf(x, %(shapes)s, loc=0, scale=1) ``logpmf(x, %(shapes)s, loc=0, scale=1)``
Log of the probability mass function. Log of the probability mass function.
""" """
_doc_cdf = """\ _doc_cdf = """\
cdf(x, %(shapes)s, loc=0, scale=1) ``cdf(x, %(shapes)s, loc=0, scale=1)``
Cumulative density function. Cumulative density function.
""" """
_doc_logcdf = """\ _doc_logcdf = """\
logcdf(x, %(shapes)s, loc=0, scale=1) ``logcdf(x, %(shapes)s, loc=0, scale=1)``
Log of the cumulative density function. Log of the cumulative density function.
""" """
_doc_sf = """\ _doc_sf = """\
sf(x, %(shapes)s, loc=0, scale=1) ``sf(x, %(shapes)s, loc=0, scale=1)``
Survival function (1-cdf --- sometimes more accurate). Survival function (1-cdf --- sometimes more accurate).
""" """
_doc_logsf = """\ _doc_logsf = """\
logsf(x, %(shapes)s, loc=0, scale=1) ``logsf(x, %(shapes)s, loc=0, scale=1)``
Log of the survival function. Log of the survival function.
""" """
_doc_ppf = """\ _doc_ppf = """\
ppf(q, %(shapes)s, loc=0, scale=1) ``ppf(q, %(shapes)s, loc=0, scale=1)``
Percent point function (inverse of cdf --- percentiles). Percent point function (inverse of cdf --- percentiles).
""" """
_doc_isf = """\ _doc_isf = """\
isf(q, %(shapes)s, loc=0, scale=1) ``isf(q, %(shapes)s, loc=0, scale=1)``
Inverse survival function (inverse of sf). Inverse survival function (inverse of sf).
""" """
_doc_moment = """\ _doc_moment = """\
moment(n, %(shapes)s, loc=0, scale=1) ``moment(n, %(shapes)s, loc=0, scale=1)``
Non-central moment of order n Non-central moment of order n
""" """
_doc_stats = """\ _doc_stats = """\
stats(%(shapes)s, loc=0, scale=1, moments='mv') ``stats(%(shapes)s, loc=0, scale=1, moments='mv')``
Mean('m'), variance('v'), skew('s'), and/or kurtosis('k'). Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
""" """
_doc_entropy = """\ _doc_entropy = """\
entropy(%(shapes)s, loc=0, scale=1) ``entropy(%(shapes)s, loc=0, scale=1)``
(Differential) entropy of the RV. (Differential) entropy of the RV.
""" """
_doc_fit = """\ _doc_fit = """\
fit(data, %(shapes)s, loc=0, scale=1) ``fit(data, %(shapes)s, loc=0, scale=1)``
Parameter estimates for generic data. Parameter estimates for generic data.
""" """
_doc_expect = """\ _doc_expect = """\
expect(func, %(shapes)s, loc=0, scale=1, lb=None, ub=None, conditional=False, **kwds) ``expect(func, %(shapes)s, loc=0, scale=1, lb=None, ub=None, conditional=False, **kwds)``
Expected value of a function (of one argument) with respect to the distribution. Expected value of a function (of one argument) with respect to the distribution.
""" """
_doc_expect_discrete = """\ _doc_expect_discrete = """\
expect(func, %(shapes)s, loc=0, lb=None, ub=None, conditional=False) ``expect(func, %(shapes)s, loc=0, lb=None, ub=None, conditional=False)``
Expected value of a function (of one argument) with respect to the distribution. Expected value of a function (of one argument) with respect to the distribution.
""" """
_doc_median = """\ _doc_median = """\
median(%(shapes)s, loc=0, scale=1) ``median(%(shapes)s, loc=0, scale=1)``
Median of the distribution. Median of the distribution.
""" """
_doc_mean = """\ _doc_mean = """\
mean(%(shapes)s, loc=0, scale=1) ``mean(%(shapes)s, loc=0, scale=1)``
Mean of the distribution. Mean of the distribution.
""" """
_doc_var = """\ _doc_var = """\
var(%(shapes)s, loc=0, scale=1) ``var(%(shapes)s, loc=0, scale=1)``
Variance of the distribution. Variance of the distribution.
""" """
_doc_std = """\ _doc_std = """\
std(%(shapes)s, loc=0, scale=1) ``std(%(shapes)s, loc=0, scale=1)``
Standard deviation of the distribution. Standard deviation of the distribution.
""" """
_doc_interval = """\ _doc_interval = """\
interval(alpha, %(shapes)s, loc=0, scale=1) ``interval(alpha, %(shapes)s, loc=0, scale=1)``
Endpoints of the range that contains alpha percent of the distribution Endpoints of the range that contains alpha percent of the distribution
""" """
_doc_allmethods = ''.join([docheaders['methods'], _doc_rvs, _doc_pdf, _doc_allmethods = ''.join([docheaders['methods'], _doc_rvs, _doc_pdf,
@ -151,7 +153,7 @@ _doc_allmethods = ''.join([docheaders['methods'], _doc_rvs, _doc_pdf,
# Note that the two lines for %(shapes) are searched for and replaced in # Note that the two lines for %(shapes) are searched for and replaced in
# rv_continuous and rv_discrete - update there if the exact string changes # rv_continuous and rv_discrete - update there if the exact string changes
_doc_default_callparams = """\ _doc_default_callparams = """
Parameters Parameters
---------- ----------
x : array_like x : array_like
@ -169,7 +171,8 @@ size : int or tuple of ints, optional
moments : str, optional moments : str, optional
composed of letters ['mvsk'] specifying which moments to compute where composed of letters ['mvsk'] specifying which moments to compute where
'm' = mean, 'v' = variance, 's' = (Fisher's) skew and 'm' = mean, 'v' = variance, 's' = (Fisher's) skew and
'k' = (Fisher's) kurtosis. (default='mv') 'k' = (Fisher's) kurtosis.
Default is 'mv'.
""" """
_doc_default_longsummary = """\ _doc_default_longsummary = """\
Continuous random variables are defined from a standard form and may Continuous random variables are defined from a standard form and may
@ -188,27 +191,42 @@ rv = %(name)s(%(shapes)s, loc=0, scale=1)
_doc_default_example = """\ _doc_default_example = """\
Examples Examples
-------- --------
>>> import matplotlib.pyplot as plt
>>> from wafo.stats import %(name)s >>> from wafo.stats import %(name)s
>>> numargs = %(name)s.numargs >>> import matplotlib.pyplot as plt
>>> [ %(shapes)s ] = [0.9,] * numargs >>> fig, ax = plt.subplots(1, 1)
>>> rv = %(name)s(%(shapes)s)
Display frozen pdf Calculate a few first moments:
>>> x = np.linspace(0, np.minimum(rv.dist.b, 3)) %(set_vals_stmt)s
>>> h = plt.plot(x, rv.pdf(x)) >>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')
Here, ``rv.dist.b`` is the right endpoint of the support of ``rv.dist``. Display the probability density function (``pdf``):
Check accuracy of cdf and ppf >>> x = np.linspace(%(name)s.ppf(0.01, %(shapes)s),
... %(name)s.ppf(0.99, %(shapes)s), 100)
>>> ax.plot(x, %(name)s.pdf(x, %(shapes)s),
... 'r-', lw=5, alpha=0.6, label='%(name)s pdf')
>>> prb = %(name)s.cdf(x, %(shapes)s) Alternatively, freeze the distribution and display the frozen pdf:
>>> h = plt.semilogy(np.abs(x - %(name)s.ppf(prb, %(shapes)s)) + 1e-20)
Random number generation >>> rv = %(name)s(%(shapes)s)
>>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
>>> R = %(name)s.rvs(%(shapes)s, size=100) Check accuracy of ``cdf`` and ``ppf``:
>>> vals = %(name)s.ppf([0.001, 0.5, 0.999], %(shapes)s)
>>> np.allclose([0.001, 0.5, 0.999], %(name)s.cdf(vals, %(shapes)s))
True
Generate random numbers:
>>> r = %(name)s.rvs(%(shapes)s, size=1000)
And compare the histogram:
>>> ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
>>> ax.legend(loc='best', frameon=False)
>>> plt.show()
Compare ML and MPS method Compare ML and MPS method
>>> phat = %(name)s.fit2(R, method='ml'); >>> phat = %(name)s.fit2(R, method='ml');
@ -301,26 +319,39 @@ docdict_discrete['frozennote'] = _doc_default_frozen_note
_doc_default_discrete_example = """\ _doc_default_discrete_example = """\
Examples Examples
-------- --------
>>> from scipy.stats import %(name)s >>> from wafo.stats import %(name)s
>>> [ %(shapes)s ] = [<Replace with reasonable values>] >>> import matplotlib.pyplot as plt
>>> rv = %(name)s(%(shapes)s) >>> fig, ax = plt.subplots(1, 1)
Calculate a few first moments:
Display frozen pmf %(set_vals_stmt)s
>>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')
>>> x = np.arange(0, np.minimum(rv.dist.b, 3)) Display the probability mass function (``pmf``):
>>> h = plt.vlines(x, 0, rv.pmf(x), lw=2)
Here, ``rv.dist.b`` is the right endpoint of the support of ``rv.dist``. >>> x = np.arange(%(name)s.ppf(0.01, %(shapes)s),
... %(name)s.ppf(0.99, %(shapes)s))
>>> ax.plot(x, %(name)s.pmf(x, %(shapes)s), 'bo', ms=8, label='%(name)s pmf')
>>> ax.vlines(x, 0, %(name)s.pmf(x, %(shapes)s), colors='b', lw=5, alpha=0.5)
Check accuracy of cdf and ppf Alternatively, freeze the distribution and display the frozen ``pmf``:
>>> prb = %(name)s.cdf(x, %(shapes)s) >>> rv = %(name)s(%(shapes)s)
>>> h = plt.semilogy(np.abs(x - %(name)s.ppf(prb, %(shapes)s)) + 1e-20) >>> ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1,
... label='frozen pmf')
>>> ax.legend(loc='best', frameon=False)
>>> plt.show()
Random number generation Check accuracy of ``cdf`` and ``ppf``:
>>> R = %(name)s.rvs(%(shapes)s, size=100) >>> prob = %(name)s.cdf(x, %(shapes)s)
>>> np.allclose(x, %(name)s.ppf(prob, %(shapes)s))
True
Generate random numbers:
>>> r = %(name)s.rvs(%(shapes)s, size=1000)
""" """
docdict_discrete['example'] = _doc_default_discrete_example docdict_discrete['example'] = _doc_default_discrete_example
@ -408,6 +439,82 @@ def _kurtosis(data):
return m4 / m2**2 - 3 return m4 / m2**2 - 3
# Frozen RV class
class rv_frozen_old(object):
def __init__(self, dist, *args, **kwds):
self.args = args
self.kwds = kwds
# create a new instance
self.dist = dist.__class__(**dist._ctor_param)
# a, b may be set in _argcheck, depending on *args, **kwds. Ouch.
shapes, _, _ = self.dist._parse_args(*args, **kwds)
self.dist._argcheck(*shapes)
def pdf(self, x): # raises AttributeError in frozen discrete distribution
return self.dist.pdf(x, *self.args, **self.kwds)
def logpdf(self, x):
return self.dist.logpdf(x, *self.args, **self.kwds)
def cdf(self, x):
return self.dist.cdf(x, *self.args, **self.kwds)
def logcdf(self, x):
return self.dist.logcdf(x, *self.args, **self.kwds)
def ppf(self, q):
return self.dist.ppf(q, *self.args, **self.kwds)
def isf(self, q):
return self.dist.isf(q, *self.args, **self.kwds)
def rvs(self, size=None):
kwds = self.kwds.copy()
kwds.update({'size': size})
return self.dist.rvs(*self.args, **kwds)
def sf(self, x):
return self.dist.sf(x, *self.args, **self.kwds)
def logsf(self, x):
return self.dist.logsf(x, *self.args, **self.kwds)
def stats(self, moments='mv'):
kwds = self.kwds.copy()
kwds.update({'moments': moments})
return self.dist.stats(*self.args, **kwds)
def median(self):
return self.dist.median(*self.args, **self.kwds)
def mean(self):
return self.dist.mean(*self.args, **self.kwds)
def var(self):
return self.dist.var(*self.args, **self.kwds)
def std(self):
return self.dist.std(*self.args, **self.kwds)
def moment(self, n):
return self.dist.moment(n, *self.args, **self.kwds)
def entropy(self):
return self.dist.entropy(*self.args, **self.kwds)
def pmf(self, k):
return self.dist.pmf(k, *self.args, **self.kwds)
def logpmf(self, k):
return self.dist.logpmf(k, *self.args, **self.kwds)
def interval(self, alpha):
return self.dist.interval(alpha, *self.args, **self.kwds)
# Frozen RV class # Frozen RV class
class rv_frozen(object): class rv_frozen(object):
''' Frozen continous or discrete 1D Random Variable object (RV) ''' Frozen continous or discrete 1D Random Variable object (RV)
@ -528,74 +635,6 @@ class rv_frozen(object):
return self.dist.interval(alpha, *self.par) return self.dist.interval(alpha, *self.par)
# Frozen RV class
class rv_frozen_old(object):
def __init__(self, dist, *args, **kwds):
self.args = args
self.kwds = kwds
self.dist = dist
def pdf(self, x): # raises AttributeError in frozen discrete distribution
return self.dist.pdf(x, *self.args, **self.kwds)
def logpdf(self, x):
return self.dist.logpdf(x, *self.args, **self.kwds)
def cdf(self, x):
return self.dist.cdf(x, *self.args, **self.kwds)
def logcdf(self, x):
return self.dist.logcdf(x, *self.args, **self.kwds)
def ppf(self, q):
return self.dist.ppf(q, *self.args, **self.kwds)
def isf(self, q):
return self.dist.isf(q, *self.args, **self.kwds)
def rvs(self, size=None):
kwds = self.kwds.copy()
kwds.update({'size': size})
return self.dist.rvs(*self.args, **kwds)
def sf(self, x):
return self.dist.sf(x, *self.args, **self.kwds)
def logsf(self, x):
return self.dist.logsf(x, *self.args, **self.kwds)
def stats(self, moments='mv'):
kwds = self.kwds.copy()
kwds.update({'moments': moments})
return self.dist.stats(*self.args, **kwds)
def median(self):
return self.dist.median(*self.args, **self.kwds)
def mean(self):
return self.dist.mean(*self.args, **self.kwds)
def var(self):
return self.dist.var(*self.args, **self.kwds)
def std(self):
return self.dist.std(*self.args, **self.kwds)
def moment(self, n):
return self.dist.moment(n, *self.args, **self.kwds)
def entropy(self):
return self.dist.entropy(*self.args, **self.kwds)
def pmf(self, k):
return self.dist.pmf(k, *self.args, **self.kwds)
def logpmf(self, k):
return self.dist.logpmf(k, *self.args, **self.kwds)
def interval(self, alpha):
return self.dist.interval(alpha, *self.args, **self.kwds)
def valarray(shape, value=nan, typecode=None): def valarray(shape, value=nan, typecode=None):
"""Return an array of all value. """Return an array of all value.
@ -693,9 +732,11 @@ def _ncx2_log_pdf(x, df, nc):
fac = -nc/2.0 - x/2.0 + (a-1)*log(x) - a*log(2) - gammaln(a) fac = -nc/2.0 - x/2.0 + (a-1)*log(x) - a*log(2) - gammaln(a)
return fac + np.nan_to_num(log(hyp0f1(a, nc * x/4.0))) return fac + np.nan_to_num(log(hyp0f1(a, nc * x/4.0)))
def _ncx2_pdf(x, df, nc): def _ncx2_pdf(x, df, nc):
return np.exp(_ncx2_log_pdf(x, df, nc)) return np.exp(_ncx2_log_pdf(x, df, nc))
def _ncx2_cdf(x, df, nc): def _ncx2_cdf(x, df, nc):
return chndtr(x, df, nc) return chndtr(x, df, nc)
@ -713,7 +754,8 @@ class rv_generic(object):
self._stats_has_moments = ((sign[2] is not None) or self._stats_has_moments = ((sign[2] is not None) or
('moments' in sign[0])) ('moments' in sign[0]))
def _construct_argparser(self, meths_to_inspect, locscale_in, locscale_out): def _construct_argparser(
self, meths_to_inspect, locscale_in, locscale_out):
"""Construct the parser for the shape arguments. """Construct the parser for the shape arguments.
Generates the argument-parsing functions dynamically and attaches Generates the argument-parsing functions dynamically and attaches
@ -789,6 +831,36 @@ class rv_generic(object):
# allows more general subclassing with *args # allows more general subclassing with *args
self.numargs = len(shapes) self.numargs = len(shapes)
def _construct_doc(self, docdict, shapes_vals=None):
"""Construct the instance docstring with string substitutions."""
tempdict = docdict.copy()
tempdict['name'] = self.name or 'distname'
tempdict['shapes'] = self.shapes or ''
if shapes_vals is None:
shapes_vals = ()
vals = ', '.join(str(_) for _ in shapes_vals)
tempdict['vals'] = vals
if self.shapes:
tempdict['set_vals_stmt'] = '>>> %s = %s' % (self.shapes, vals)
else:
tempdict['set_vals_stmt'] = ''
if self.shapes is None:
# remove shapes from call parameters if there are none
for item in ['callparams', 'default', 'before_notes']:
tempdict[item] = tempdict[item].replace(
"\n%(shapes)s : array_like\n shape parameters", "")
for i in range(2):
if self.shapes is None:
# necessary because we use %(shapes)s in two forms (w w/o ", ")
self.__doc__ = self.__doc__.replace("%(shapes)s, ", "")
self.__doc__ = doccer.docformat(self.__doc__, tempdict)
# correct for empty shapes
self.__doc__ = self.__doc__.replace('(, ', '(').replace(', )', ')')
def freeze(self, *args, **kwds): def freeze(self, *args, **kwds):
"""Freeze the distribution for the given arguments. """Freeze the distribution for the given arguments.
@ -1297,68 +1369,67 @@ class rv_continuous(rv_generic):
Methods Methods
------- -------
rvs(<shape(s)>, loc=0, scale=1, size=1) ``rvs(<shape(s)>, loc=0, scale=1, size=1)``
random variates random variates
pdf(x, <shape(s)>, loc=0, scale=1) ``pdf(x, <shape(s)>, loc=0, scale=1)``
probability density function probability density function
logpdf(x, <shape(s)>, loc=0, scale=1) ``logpdf(x, <shape(s)>, loc=0, scale=1)``
log of the probability density function log of the probability density function
cdf(x, <shape(s)>, loc=0, scale=1) ``cdf(x, <shape(s)>, loc=0, scale=1)``
cumulative density function cumulative density function
logcdf(x, <shape(s)>, loc=0, scale=1) ``logcdf(x, <shape(s)>, loc=0, scale=1)``
log of the cumulative density function log of the cumulative density function
sf(x, <shape(s)>, loc=0, scale=1) ``sf(x, <shape(s)>, loc=0, scale=1)``
survival function (1-cdf --- sometimes more accurate) survival function (1-cdf --- sometimes more accurate)
logsf(x, <shape(s)>, loc=0, scale=1) ``logsf(x, <shape(s)>, loc=0, scale=1)``
log of the survival function log of the survival function
ppf(q, <shape(s)>, loc=0, scale=1) ``ppf(q, <shape(s)>, loc=0, scale=1)``
percent point function (inverse of cdf --- quantiles) percent point function (inverse of cdf --- quantiles)
isf(q, <shape(s)>, loc=0, scale=1) ``isf(q, <shape(s)>, loc=0, scale=1)``
inverse survival function (inverse of sf) inverse survival function (inverse of sf)
moment(n, <shape(s)>, loc=0, scale=1) ``moment(n, <shape(s)>, loc=0, scale=1)``
non-central n-th moment of the distribution. May not work for array non-central n-th moment of the distribution. May not work for array
arguments. arguments.
stats(<shape(s)>, loc=0, scale=1, moments='mv') ``stats(<shape(s)>, loc=0, scale=1, moments='mv')``
mean('m'), variance('v'), skew('s'), and/or kurtosis('k') mean('m'), variance('v'), skew('s'), and/or kurtosis('k')
entropy(<shape(s)>, loc=0, scale=1) ``entropy(<shape(s)>, loc=0, scale=1)``
(differential) entropy of the RV. (differential) entropy of the RV.
fit(data, <shape(s)>, loc=0, scale=1) ``fit(data, <shape(s)>, loc=0, scale=1)``
Parameter estimates for generic data Parameter estimates for generic data
expect(func=None, args=(), loc=0, scale=1, lb=None, ub=None, ``expect(func=None, args=(), loc=0, scale=1, lb=None, ub=None, conditional=False, **kwds)``
conditional=False, **kwds)
Expected value of a function with respect to the distribution. Expected value of a function with respect to the distribution.
Additional kwd arguments passed to integrate.quad Additional kwd arguments passed to integrate.quad
median(<shape(s)>, loc=0, scale=1) ``median(<shape(s)>, loc=0, scale=1)``
Median of the distribution. Median of the distribution.
mean(<shape(s)>, loc=0, scale=1) ``mean(<shape(s)>, loc=0, scale=1)``
Mean of the distribution. Mean of the distribution.
std(<shape(s)>, loc=0, scale=1) ``std(<shape(s)>, loc=0, scale=1)``
Standard deviation of the distribution. Standard deviation of the distribution.
var(<shape(s)>, loc=0, scale=1) ``var(<shape(s)>, loc=0, scale=1)``
Variance of the distribution. Variance of the distribution.
interval(alpha, <shape(s)>, loc=0, scale=1) ``interval(alpha, <shape(s)>, loc=0, scale=1)``
Interval that with `alpha` percent probability contains a random Interval that with `alpha` percent probability contains a random
realization of this distribution. realization of this distribution.
__call__(<shape(s)>, loc=0, scale=1) ``__call__(<shape(s)>, loc=0, scale=1)``
Calling a distribution instance creates a frozen RV object with the Calling a distribution instance creates a frozen RV object with the
same methods but holding the given shape, location, and scale fixed. same methods but holding the given shape, location, and scale fixed.
See Notes section. See Notes section.
@ -1469,6 +1540,12 @@ class rv_continuous(rv_generic):
super(rv_continuous, self).__init__() super(rv_continuous, self).__init__()
# save the ctor parameters, cf generic freeze
self._ctor_param = dict(
momtype=momtype, a=a, b=b, xtol=xtol,
badvalue=badvalue, name=name, longname=longname,
shapes=shapes, extradoc=extradoc)
if badvalue is None: if badvalue is None:
badvalue = nan badvalue = nan
if name is None: if name is None:
@ -1483,11 +1560,7 @@ class rv_continuous(rv_generic):
self.b = inf self.b = inf
self.xtol = xtol self.xtol = xtol
self._size = 1 self._size = 1
self.m = 0.0
self.moment_type = momtype self.moment_type = momtype
self.expandarr = 1
self.shapes = shapes self.shapes = shapes
self._construct_argparser(meths_to_inspect=[self._pdf, self._cdf], self._construct_argparser(meths_to_inspect=[self._pdf, self._cdf],
locscale_in='loc=0, scale=1', locscale_in='loc=0, scale=1',
@ -1497,13 +1570,13 @@ class rv_continuous(rv_generic):
self._ppfvec = vectorize(self._ppf_single, otypes='d') self._ppfvec = vectorize(self._ppf_single, otypes='d')
self._ppfvec.nin = self.numargs + 1 self._ppfvec.nin = self.numargs + 1
self.vecentropy = vectorize(self._entropy, otypes='d') self.vecentropy = vectorize(self._entropy, otypes='d')
self.vecentropy.nin = self.numargs + 1
self._cdfvec = vectorize(self._cdf_single, otypes='d') self._cdfvec = vectorize(self._cdf_single, otypes='d')
self._cdfvec.nin = self.numargs + 1 self._cdfvec.nin = self.numargs + 1
# backwards compatibility # backwards compat. these were removed in 0.14.0, put back but
self.vecfunc = self._ppfvec # deprecated in 0.14.1:
self.veccdf = self._cdfvec self.vecfunc = np.deprecate(self._ppfvec, "vecfunc")
self.veccdf = np.deprecate(self._cdfvec, "veccdf")
self.extradoc = extradoc self.extradoc = extradoc
if momtype == 0: if momtype == 0:
@ -1527,7 +1600,8 @@ class rv_continuous(rv_generic):
self._construct_default_doc(longname=longname, self._construct_default_doc(longname=longname,
extradoc=extradoc) extradoc=extradoc)
else: else:
self._construct_doc() dct = dict(distcont)
self._construct_doc(docdict, dct.get(self.name))
def _construct_default_doc(self, longname=None, extradoc=None): def _construct_default_doc(self, longname=None, extradoc=None):
"""Construct instance docstring from the default template.""" """Construct instance docstring from the default template."""
@ -1540,24 +1614,7 @@ class rv_continuous(rv_generic):
self.__doc__ = ''.join(['%s continuous random variable.' % longname, self.__doc__ = ''.join(['%s continuous random variable.' % longname,
'\n\n%(before_notes)s\n', docheaders['notes'], '\n\n%(before_notes)s\n', docheaders['notes'],
extradoc, '\n%(example)s']) extradoc, '\n%(example)s'])
self._construct_doc() self._construct_doc(docdict)
def _construct_doc(self):
"""Construct the instance docstring with string substitutions."""
tempdict = docdict.copy()
tempdict['name'] = self.name or 'distname'
tempdict['shapes'] = self.shapes or ''
if self.shapes is None:
# remove shapes from call parameters if there are none
for item in ['callparams', 'default', 'before_notes']:
tempdict[item] = tempdict[item].replace(
"\n%(shapes)s : array_like\n shape parameters", "")
for _i in range(2):
if self.shapes is None:
# necessary because we use %(shapes)s in two forms (w w/o ", ")
self.__doc__ = self.__doc__.replace("%(shapes)s, ", "")
self.__doc__ = doccer.docformat(self.__doc__, tempdict)
def _ppf_to_solve(self, x, q, *args): def _ppf_to_solve(self, x, q, *args):
return self.cdf(*(x, )+args)-q return self.cdf(*(x, )+args)-q
@ -2162,7 +2219,7 @@ class rv_continuous(rv_generic):
# logDj = log((yU-yL)/(r-1)) for j = i+1,i+2,...i+r-1 # logDj = log((yU-yL)/(r-1)) for j = i+1,i+2,...i+r-1
# The following is OK when only minimization of T is wanted # The following is OK when only minimization of T is wanted
i_tie = np.nonzero(tie) i_tie, = np.nonzero(tie)
tiedata = x[i_tie] tiedata = x[i_tie]
logD[i_tie + 1] = log(self._pdf(tiedata, *args)) - log(scale) logD[i_tie + 1] = log(self._pdf(tiedata, *args)) - log(scale)
@ -2265,7 +2322,8 @@ class rv_continuous(rv_generic):
restore = None restore = None
else: else:
if len(fixedn) == len(index): if len(fixedn) == len(index):
raise ValueError("All parameters fixed. There is nothing to optimize.") raise ValueError(
"All parameters fixed. There is nothing to optimize.")
def restore(args, theta): def restore(args, theta):
# Replace with theta for all numbers not in fixedn # Replace with theta for all numbers not in fixedn
@ -2462,15 +2520,15 @@ class rv_continuous(rv_generic):
def _entropy(self, *args): def _entropy(self, *args):
def integ(x): def integ(x):
val = self._pdf(x, *args) val = self._pdf(x, *args)
return xlogy(val, val) return -xlogy(val, val)
# upper limit is often inf, so suppress warnings when integrating # upper limit is often inf, so suppress warnings when integrating
olderr = np.seterr(over='ignore') olderr = np.seterr(over='ignore')
entr = -integrate.quad(integ, self.a, self.b)[0] h = integrate.quad(integ, self.a, self.b)[0]
np.seterr(**olderr) np.seterr(**olderr)
if not np.isnan(entr): if not np.isnan(h):
return entr return h
else: else:
# try with different limits if integration problems # try with different limits if integration problems
low, upp = self.ppf([1e-10, 1. - 1e-10], *args) low, upp = self.ppf([1e-10, 1. - 1e-10], *args)
@ -2482,7 +2540,7 @@ class rv_continuous(rv_generic):
lower = low lower = low
else: else:
lower = self.a lower = self.a
return -integrate.quad(integ, lower, upper)[0] return integrate.quad(integ, lower, upper)[0]
def entropy(self, *args, **kwds): def entropy(self, *args, **kwds):
""" """
@ -2606,12 +2664,12 @@ def _drv_nonzero(self, k, *args):
def _drv_moment(self, n, *args): def _drv_moment(self, n, *args):
n = asarray(n) n = asarray(n)
return sum(self.xk**n[newaxis, ...] * self.pk, axis=0) return sum(self.xk**n[np.newaxis, ...] * self.pk, axis=0)
def _drv_moment_gen(self, t, *args): def _drv_moment_gen(self, t, *args):
t = asarray(t) t = asarray(t)
return sum(exp(self.xk * t[newaxis, ...]) * self.pk, axis=0) return sum(exp(self.xk * t[np.newaxis, ...]) * self.pk, axis=0)
def _drv2_moment(self, n, *args): def _drv2_moment(self, n, *args):
@ -2716,8 +2774,7 @@ def entropy(pk, qk=None, base=None):
If only probabilities `pk` are given, the entropy is calculated as If only probabilities `pk` are given, the entropy is calculated as
``S = -sum(pk * log(pk), axis=0)``. ``S = -sum(pk * log(pk), axis=0)``.
If `qk` is not None, then compute a relative entropy (also known as If `qk` is not None, then compute the Kullback-Leibler divergence
Kullback-Leibler divergence or Kullback-Leibler distance)
``S = sum(pk * log(pk / qk), axis=0)``. ``S = sum(pk * log(pk / qk), axis=0)``.
This routine will normalize `pk` and `qk` if they don't sum to 1. This routine will normalize `pk` and `qk` if they don't sum to 1.
@ -2809,65 +2866,64 @@ class rv_discrete(rv_generic):
Methods Methods
------- -------
generic.rvs(<shape(s)>, loc=0, size=1) ``generic.rvs(<shape(s)>, loc=0, size=1)``
random variates random variates
generic.pmf(x, <shape(s)>, loc=0) ``generic.pmf(x, <shape(s)>, loc=0)``
probability mass function probability mass function
logpmf(x, <shape(s)>, loc=0) ``logpmf(x, <shape(s)>, loc=0)``
log of the probability density function log of the probability density function
generic.cdf(x, <shape(s)>, loc=0) ``generic.cdf(x, <shape(s)>, loc=0)``
cumulative density function cumulative density function
generic.logcdf(x, <shape(s)>, loc=0) ``generic.logcdf(x, <shape(s)>, loc=0)``
log of the cumulative density function log of the cumulative density function
generic.sf(x, <shape(s)>, loc=0) ``generic.sf(x, <shape(s)>, loc=0)``
survival function (1-cdf --- sometimes more accurate) survival function (1-cdf --- sometimes more accurate)
generic.logsf(x, <shape(s)>, loc=0, scale=1) ``generic.logsf(x, <shape(s)>, loc=0, scale=1)``
log of the survival function log of the survival function
generic.ppf(q, <shape(s)>, loc=0) ``generic.ppf(q, <shape(s)>, loc=0)``
percent point function (inverse of cdf --- percentiles) percent point function (inverse of cdf --- percentiles)
generic.isf(q, <shape(s)>, loc=0) ``generic.isf(q, <shape(s)>, loc=0)``
inverse survival function (inverse of sf) inverse survival function (inverse of sf)
generic.moment(n, <shape(s)>, loc=0) ``generic.moment(n, <shape(s)>, loc=0)``
non-central n-th moment of the distribution. May not work for array non-central n-th moment of the distribution. May not work for array
arguments. arguments.
generic.stats(<shape(s)>, loc=0, moments='mv') ``generic.stats(<shape(s)>, loc=0, moments='mv')``
mean('m', axis=0), variance('v'), skew('s'), and/or kurtosis('k') mean('m', axis=0), variance('v'), skew('s'), and/or kurtosis('k')
generic.entropy(<shape(s)>, loc=0) ``generic.entropy(<shape(s)>, loc=0)``
entropy of the RV entropy of the RV
generic.expect(func=None, args=(), loc=0, lb=None, ub=None, ``generic.expect(func=None, args=(), loc=0, lb=None, ub=None, conditional=False)``
conditional=False)
Expected value of a function with respect to the distribution. Expected value of a function with respect to the distribution.
Additional kwd arguments passed to integrate.quad Additional kwd arguments passed to integrate.quad
generic.median(<shape(s)>, loc=0) ``generic.median(<shape(s)>, loc=0)``
Median of the distribution. Median of the distribution.
generic.mean(<shape(s)>, loc=0) ``generic.mean(<shape(s)>, loc=0)``
Mean of the distribution. Mean of the distribution.
generic.std(<shape(s)>, loc=0) ``generic.std(<shape(s)>, loc=0)``
Standard deviation of the distribution. Standard deviation of the distribution.
generic.var(<shape(s)>, loc=0) ``generic.var(<shape(s)>, loc=0)``
Variance of the distribution. Variance of the distribution.
generic.interval(alpha, <shape(s)>, loc=0) ``generic.interval(alpha, <shape(s)>, loc=0)``
Interval that with `alpha` percent probability contains a random Interval that with `alpha` percent probability contains a random
realization of this distribution. realization of this distribution.
generic(<shape(s)>, loc=0) ``generic(<shape(s)>, loc=0)``
calling a distribution instance returns a frozen distribution calling a distribution instance returns a frozen distribution
Notes Notes
@ -2911,32 +2967,25 @@ class rv_discrete(rv_generic):
Custom made discrete distribution: Custom made discrete distribution:
>>> import matplotlib.pyplot as plt
>>> from scipy import stats >>> from scipy import stats
>>> xk = np.arange(7) >>> xk = np.arange(7)
>>> pk = (0.1, 0.2, 0.3, 0.1, 0.1, 0.1, 0.1) >>> pk = (0.1, 0.2, 0.3, 0.1, 0.1, 0.0, 0.2)
>>> custm = stats.rv_discrete(name='custm', values=(xk, pk)) >>> custm = stats.rv_discrete(name='custm', values=(xk, pk))
>>> h = plt.plot(xk, custm.pmf(xk)) >>>
>>> import matplotlib.pyplot as plt
>>> fig, ax = plt.subplots(1, 1)
>>> ax.plot(xk, custm.pmf(xk), 'ro', ms=12, mec='r')
>>> ax.vlines(xk, 0, custm.pmf(xk), colors='r', lw=4)
>>> plt.show()
Random number generation: Random number generation:
>>> R = custm.rvs(size=100) >>> R = custm.rvs(size=100)
Display frozen pmf:
>>> numargs = generic.numargs
>>> [ <shape(s)> ] = ['Replace with resonable value', ]*numargs
>>> rv = generic(<shape(s)>)
>>> x = np.arange(0, np.min(rv.dist.b, 3)+1)
>>> h = plt.plot(x, rv.pmf(x))
Here, ``rv.dist.b`` is the right endpoint of the support of ``rv.dist``.
Check accuracy of cdf and ppf: Check accuracy of cdf and ppf:
>>> prb = generic.cdf(x, <shape(s)>) >>> prb = custm.cdf(x, <shape(s)>)
>>> h = plt.semilogy(np.abs(x-generic.ppf(prb, <shape(s)>))+1e-20) >>> h = plt.semilogy(np.abs(x-custm.ppf(prb, <shape(s)>))+1e-20)
""" """
def __init__(self, a=0, b=inf, name=None, badvalue=None, def __init__(self, a=0, b=inf, name=None, badvalue=None,
@ -2945,6 +2994,12 @@ class rv_discrete(rv_generic):
super(rv_discrete, self).__init__() super(rv_discrete, self).__init__()
# cf generic freeze
self._ctor_param = dict(
a=a, b=b, name=name, badvalue=badvalue,
moment_tol=moment_tol, values=values, inc=inc,
longname=longname, shapes=shapes, extradoc=extradoc)
if badvalue is None: if badvalue is None:
badvalue = nan badvalue = nan
if name is None: if name is None:
@ -3001,9 +3056,11 @@ class rv_discrete(rv_generic):
_vec_generic_moment.nin = self.numargs + 2 _vec_generic_moment.nin = self.numargs + 2
self.generic_moment = instancemethod(_vec_generic_moment, self.generic_moment = instancemethod(_vec_generic_moment,
self, rv_discrete) self, rv_discrete)
# backwards compat. was removed in 0.14.0, put back but
# backwards compatibility # deprecated in 0.14.1:
self.vec_generic_moment = _vec_generic_moment self.vec_generic_moment = np.deprecate(_vec_generic_moment,
"vec_generic_moment",
"generic_moment")
# correct nin for ppf vectorization # correct nin for ppf vectorization
_vppf = vectorize(_drv2_ppfsingle, otypes='d') _vppf = vectorize(_drv2_ppfsingle, otypes='d')
@ -3028,7 +3085,8 @@ class rv_discrete(rv_generic):
self._construct_default_doc(longname=longname, self._construct_default_doc(longname=longname,
extradoc=extradoc) extradoc=extradoc)
else: else:
self._construct_doc() dct = dict(distdiscrete)
self._construct_doc(docdict_discrete, dct.get(self.name))
#discrete RV do not have the scale parameter, remove it #discrete RV do not have the scale parameter, remove it
self.__doc__ = self.__doc__.replace( self.__doc__ = self.__doc__.replace(
@ -3044,24 +3102,7 @@ class rv_discrete(rv_generic):
self.__doc__ = ''.join(['%s discrete random variable.' % longname, self.__doc__ = ''.join(['%s discrete random variable.' % longname,
'\n\n%(before_notes)s\n', docheaders['notes'], '\n\n%(before_notes)s\n', docheaders['notes'],
extradoc, '\n%(example)s']) extradoc, '\n%(example)s'])
self._construct_doc() self._construct_doc(docdict_discrete)
def _construct_doc(self):
"""Construct the instance docstring with string substitutions."""
tempdict = docdict_discrete.copy()
tempdict['name'] = self.name or 'distname'
tempdict['shapes'] = self.shapes or ''
if self.shapes is None:
# remove shapes from call parameters if there are none
for item in ['callparams', 'default', 'before_notes']:
tempdict[item] = tempdict[item].replace(
"\n%(shapes)s : array_like\n shape parameters", "")
for _i in range(2):
if self.shapes is None:
# necessary because we use %(shapes)s in two forms (w w/o ", ")
self.__doc__ = self.__doc__.replace("%(shapes)s, ", "")
self.__doc__ = doccer.docformat(self.__doc__, tempdict)
def _nonzero(self, k, *args): def _nonzero(self, k, *args):
return floor(k) == k return floor(k) == k
@ -3137,7 +3178,7 @@ class rv_discrete(rv_generic):
place(output, (1-cond0) + np.isnan(k), self.badvalue) place(output, (1-cond0) + np.isnan(k), self.badvalue)
if any(cond): if any(cond):
goodargs = argsreduce(cond, *((k,)+args)) goodargs = argsreduce(cond, *((k,)+args))
place(output, cond, self._pmf(*goodargs)) place(output, cond, np.clip(self._pmf(*goodargs), 0, 1))
if output.ndim == 0: if output.ndim == 0:
return output[()] return output[()]
return output return output
@ -3213,7 +3254,7 @@ class rv_discrete(rv_generic):
if any(cond): if any(cond):
goodargs = argsreduce(cond, *((k,)+args)) goodargs = argsreduce(cond, *((k,)+args))
place(output, cond, self._cdf(*goodargs)) place(output, cond, np.clip(self._cdf(*goodargs), 0, 1))
if output.ndim == 0: if output.ndim == 0:
return output[()] return output[()]
return output return output
@ -3291,7 +3332,7 @@ class rv_discrete(rv_generic):
place(output, cond2, 1.0) place(output, cond2, 1.0)
if any(cond): if any(cond):
goodargs = argsreduce(cond, *((k,)+args)) goodargs = argsreduce(cond, *((k,)+args))
place(output, cond, self._sf(*goodargs)) place(output, cond, np.clip(self._sf(*goodargs), 0, 1))
if output.ndim == 0: if output.ndim == 0:
return output[()] return output[()]
return output return output
@ -3382,7 +3423,7 @@ class rv_discrete(rv_generic):
def isf(self, q, *args, **kwds): def isf(self, q, *args, **kwds):
""" """
Inverse survival function (1-sf) at q of the given RV. Inverse survival function (inverse of `sf`) at q of the given RV.
Parameters Parameters
---------- ----------
@ -3555,3 +3596,36 @@ class rv_discrete(rv_generic):
if count > maxcount: if count > maxcount:
warnings.warn('expect(): sum did not converge', RuntimeWarning) warnings.warn('expect(): sum did not converge', RuntimeWarning)
return tot/invfac return tot/invfac
def get_distribution_names(namespace_pairs, rv_base_class):
"""
Collect names of statistical distributions and their generators.
Parameters
----------
namespace_pairs : sequence
A snapshot of (name, value) pairs in the namespace of a module.
rv_base_class : class
The base class of random variable generator classes in a module.
Returns
-------
distn_names : list of strings
Names of the statistical distributions.
distn_gen_names : list of strings
Names of the generators of the statistical distributions.
Note that these are not simply the names of the statistical
distributions, with a _gen suffix added.
"""
distn_names = []
distn_gen_names = []
for name, value in namespace_pairs:
if name.startswith('_'):
continue
if name.endswith('_gen') and issubclass(value, rv_base_class):
distn_gen_names.append(name)
if isinstance(value, rv_base_class):
distn_names.append(name)
return distn_names, distn_gen_names

@ -0,0 +1,116 @@
"""
Sane parameters for stats.distributions.
"""
distcont = [
['alpha', (3.5704770516650459,)],
['anglit', ()],
['arcsine', ()],
['beta', (2.3098496451481823, 0.62687954300963677)],
['betaprime', (5, 6)],
['bradford', (0.29891359763170633,)],
['burr', (10.5, 4.3)],
['cauchy', ()],
['chi', (78,)],
['chi2', (55,)],
['cosine', ()],
['dgamma', (1.1023326088288166,)],
['dweibull', (2.0685080649914673,)],
['erlang', (10,)],
['expon', ()],
['exponpow', (2.697119160358469,)],
['exponweib', (2.8923945291034436, 1.9505288745913174)],
['f', (29, 18)],
['fatiguelife', (29,)], # correction numargs = 1
['fisk', (3.0857548622253179,)],
['foldcauchy', (4.7164673455831894,)],
['foldnorm', (1.9521253373555869,)],
['frechet_l', (3.6279911255583239,)],
['frechet_r', (1.8928171603534227,)],
['gamma', (1.9932305483800778,)],
['gausshyper', (13.763771604130699, 3.1189636648681431,
2.5145980350183019, 5.1811649903971615)], # veryslow
['genexpon', (9.1325976465418908, 16.231956600590632, 3.2819552690843983)],
['genextreme', (-0.1,)],
['gengamma', (4.4162385429431925, 3.1193091679242761)],
['genhalflogistic', (0.77274727809929322,)],
['genlogistic', (0.41192440799679475,)],
['genpareto', (0.1,)], # use case with finite moments
['gilbrat', ()],
['gompertz', (0.94743713075105251,)],
['gumbel_l', ()],
['gumbel_r', ()],
['halfcauchy', ()],
['halflogistic', ()],
['halfnorm', ()],
['hypsecant', ()],
['invgamma', (4.0668996136993067,)],
['invgauss', (0.14546264555347513,)],
['invweibull', (10.58,)],
['johnsonsb', (4.3172675099141058, 3.1837781130785063)],
['johnsonsu', (2.554395574161155, 2.2482281679651965)],
['ksone', (1000,)], # replace 22 by 100 to avoid failing range, ticket 956
['kstwobign', ()],
['laplace', ()],
['levy', ()],
['levy_l', ()],
['levy_stable', (0.35667405469844993,
-0.67450531578494011)], # NotImplementedError
# rvs not tested
['loggamma', (0.41411931826052117,)],
['logistic', ()],
['loglaplace', (3.2505926592051435,)],
['lognorm', (0.95368226960575331,)],
['lomax', (1.8771398388773268,)],
['maxwell', ()],
['mielke', (10.4, 3.6)],
['nakagami', (4.9673794866666237,)],
['ncf', (27, 27, 0.41578441799226107)],
['nct', (14, 0.24045031331198066)],
['ncx2', (21, 1.0560465975116415)],
['norm', ()],
['pareto', (2.621716532144454,)],
['pearson3', (0.1,)],
['powerlaw', (1.6591133289905851,)],
['powerlognorm', (2.1413923530064087, 0.44639540782048337)],
['powernorm', (4.4453652254590779,)],
['rayleigh', ()],
['rdist', (0.9,)], # feels also slow
['recipinvgauss', (0.63004267809369119,)],
['reciprocal', (0.0062309367010521255, 1.0062309367010522)],
['rice', (0.7749725210111873,)],
['semicircular', ()],
['t', (2.7433514990818093,)],
['triang', (0.15785029824528218,)],
['truncexpon', (4.6907725456810478,)],
['truncnorm', (-1.0978730080013919, 2.7306754109031979)],
['truncnorm', (0.1, 2.)],
['tukeylambda', (3.1321477856738267,)],
['uniform', ()],
['vonmises', (3.9939042581071398,)],
['vonmises_line', (3.9939042581071398,)],
['wald', ()],
['weibull_max', (2.8687961709100187,)],
['weibull_min', (1.7866166930421596,)],
['wrapcauchy', (0.031071279018614728,)]]
distdiscrete = [
['bernoulli',(0.3,)],
['binom', (5, 0.4)],
['boltzmann',(1.4, 19)],
['dlaplace', (0.8,)], # 0.5
['geom', (0.5,)],
['hypergeom',(30, 12, 6)],
['hypergeom',(21,3,12)], # numpy.random (3,18,12) numpy ticket:921
['hypergeom',(21,18,11)], # numpy.random (18,3,11) numpy ticket:921
['logser', (0.6,)], # reenabled, numpy ticket:921
['nbinom', (5, 0.5)],
['nbinom', (0.4, 0.4)], # from tickets: 583
['planck', (0.51,)], # 4.1
['poisson', (0.6,)],
['randint', (7, 31)],
['skellam', (15, 8)],
['zipf', (6.5,)]
]

@ -3,13 +3,13 @@
# #
from __future__ import division, print_function, absolute_import from __future__ import division, print_function, absolute_import
from scipy.misc import doccer
from functools import wraps
import numpy as np import numpy as np
import scipy.linalg import scipy.linalg
from scipy.misc import doccer
from scipy.special import gammaln
__all__ = ['multivariate_normal']
__all__ = ['multivariate_normal', 'dirichlet']
_LOG_2PI = np.log(2 * np.pi) _LOG_2PI = np.log(2 * np.pi)
@ -53,13 +53,22 @@ def _process_parameters(dim, mean, cov):
cov.shape = (1, 1) cov.shape = (1, 1)
if mean.ndim != 1 or mean.shape[0] != dim: if mean.ndim != 1 or mean.shape[0] != dim:
raise ValueError("Array 'mean' must be vector of length %d." % dim) raise ValueError("Array 'mean' must be a vector of length %d." % dim)
if cov.ndim == 0: if cov.ndim == 0:
cov = cov * np.eye(dim) cov = cov * np.eye(dim)
elif cov.ndim == 1: elif cov.ndim == 1:
cov = np.diag(cov) cov = np.diag(cov)
elif cov.ndim == 2 and cov.shape != (dim, dim):
rows, cols = cov.shape
if rows != cols:
msg = ("Array 'cov' must be square if it is two dimensional,"
" but cov.shape = %s." % str(cov.shape))
else: else:
if cov.shape != (dim, dim): msg = ("Dimension mismatch: array 'cov' is of shape %s,"
" but 'mean' is a vector of length %d.")
msg = msg % (str(cov.shape), len(mean))
raise ValueError(msg)
elif cov.ndim > 2:
raise ValueError("Array 'cov' must be at most two-dimensional," raise ValueError("Array 'cov' must be at most two-dimensional,"
" but cov.ndim = %d" % cov.ndim) " but cov.ndim = %d" % cov.ndim)
@ -97,6 +106,41 @@ def _squeeze_output(out):
return out return out
def _eigvalsh_to_eps(spectrum, cond=None, rcond=None):
"""
Determine which eigenvalues are "small" given the spectrum.
This is for compatibility across various linear algebra functions
that should agree about whether or not a Hermitian matrix is numerically
singular and what is its numerical matrix rank.
This is designed to be compatible with scipy.linalg.pinvh.
Parameters
----------
spectrum : 1d ndarray
Array of eigenvalues of a Hermitian matrix.
cond, rcond : float, optional
Cutoff for small eigenvalues.
Singular values smaller than rcond * largest_eigenvalue are
considered zero.
If None or -1, suitable machine precision is used.
Returns
-------
eps : float
Magnitude cutoff for numerical negligibility.
"""
if rcond is not None:
cond = rcond
if cond in [None, -1]:
t = spectrum.dtype.char.lower()
factor = {'f': 1E3, 'd': 1E6}
cond = factor[t] * np.finfo(t).eps
eps = cond * np.max(abs(spectrum))
return eps
def _pinv_1d(v, eps=1e-5): def _pinv_1d(v, eps=1e-5):
""" """
A helper function for computing the pseudoinverse. A helper function for computing the pseudoinverse.
@ -106,7 +150,7 @@ def _pinv_1d(v, eps=1e-5):
v : iterable of numbers v : iterable of numbers
This may be thought of as a vector of eigenvalues or singular values. This may be thought of as a vector of eigenvalues or singular values.
eps : float eps : float
Elements of v smaller than eps are considered negligible. Values with magnitude no greater than eps are considered negligible.
Returns Returns
------- -------
@ -114,83 +158,87 @@ def _pinv_1d(v, eps=1e-5):
A vector of pseudo-inverted numbers. A vector of pseudo-inverted numbers.
""" """
return np.array([0 if abs(x) < eps else 1/x for x in v], dtype=float) return np.array([0 if abs(x) <= eps else 1/x for x in v], dtype=float)
def _psd_pinv_decomposed_log_pdet(mat, cond=None, rcond=None, class _PSD(object):
lower=True, check_finite=True):
""" """
Compute a decomposition of the pseudo-inverse and the logarithm of Compute coordinated functions of a symmetric positive semidefinite matrix.
the pseudo-determinant of a symmetric positive semi-definite
matrix. This class addresses two issues. Firstly it allows the pseudoinverse,
the logarithm of the pseudo-determinant, and the rank of the matrix
The pseudo-determinant of a matrix is defined as the product of to be computed using one call to eigh instead of three.
the non-zero eigenvalues, and coincides with the usual determinant Secondly it allows these functions to be computed in a way
for a full matrix. that gives mutually compatible results.
All of the functions are computed with a common understanding as to
which of the eigenvalues are to be considered negligibly small.
The functions are designed to coordinate with scipy.linalg.pinvh()
but not necessarily with np.linalg.det() or with np.linalg.matrix_rank().
Parameters Parameters
---------- ----------
mat : array_like M : 2d array-like
Input array of shape (`m`, `n`) Symmetric positive semidefinite matrix.
cond, rcond : float or None cond, rcond : float, optional
Cutoff for 'small' singular values. Cutoff for small eigenvalues.
Eigenvalues smaller than ``rcond*largest_eigenvalue`` Singular values smaller than rcond * largest_eigenvalue are
are considered zero. considered zero.
If None or -1, suitable machine precision is used. If None or -1, suitable machine precision is used.
lower : bool, optional lower : bool, optional
Whether the pertinent array data is taken from the lower or upper Whether the pertinent array data is taken from the lower
triangle of `mat`. (Default: lower) or upper triangle of M. (Default: lower)
check_finite : boolean, optional check_finite : bool, optional
Whether to check that the input matrix contains only finite numbers. Whether to check that the input matrices contain only finite
Disabling may give a performance gain, but may result in problems numbers. Disabling may give a performance gain, but may result
(crashes, non-termination) if the inputs do contain infinities or NaNs. in problems (crashes, non-termination) if the inputs do contain
infinities or NaNs.
allow_singular : bool, optional
Whether to allow a singular matrix. (Default: True)
Returns Notes
------- -----
M : array_like The arguments are similar to those of scipy.linalg.pinvh().
The pseudo-inverse of the input matrix is np.dot(M, M.T).
log_pdet : float
Logarithm of the pseudo-determinant of the matrix.
""" """
# Compute the symmetric eigendecomposition.
# The input covariance matrix is required to be real symmetric
# and positive semidefinite which implies that its eigenvalues
# are all real and non-negative,
# but clip them anyway to avoid numerical issues.
# TODO: the code to set cond/rcond is identical to that in
# scipy.linalg.{pinvh, pinv2} and if/when this function is subsumed
# into scipy.linalg it should probably be shared between all of
# these routines.
def __init__(self, M, cond=None, rcond=None, lower=True,
check_finite=True, allow_singular=True):
# Compute the symmetric eigendecomposition.
# Note that eigh takes care of array conversion, chkfinite, # Note that eigh takes care of array conversion, chkfinite,
# and assertion that the matrix is square. # and assertion that the matrix is square.
s, u = scipy.linalg.eigh(mat, lower=lower, check_finite=check_finite) s, u = scipy.linalg.eigh(M, lower=lower, check_finite=check_finite)
if rcond is not None:
cond = rcond
if cond in [None, -1]:
t = u.dtype.char.lower()
factor = {'f': 1E3, 'd': 1E6}
cond = factor[t] * np.finfo(t).eps
eps = cond * np.max(abs(s))
eps = _eigvalsh_to_eps(s, cond, rcond)
if np.min(s) < -eps: if np.min(s) < -eps:
raise ValueError('the covariance matrix must be positive semidefinite') raise ValueError('the input matrix must be positive semidefinite')
d = s[s > eps]
if len(d) < len(s) and not allow_singular:
raise np.linalg.LinAlgError('singular matrix')
s_pinv = _pinv_1d(s, eps) s_pinv = _pinv_1d(s, eps)
U = np.multiply(u, np.sqrt(s_pinv)) U = np.multiply(u, np.sqrt(s_pinv))
log_pdet = np.sum(np.log(s[s > eps]))
return U, log_pdet # Initialize the eagerly precomputed attributes.
self.rank = len(d)
self.U = U
self.log_pdet = np.sum(np.log(d))
# Initialize an attribute to be lazily computed.
self._pinv = None
@property
def pinv(self):
if self._pinv is None:
self._pinv = np.dot(self.U, self.U.T)
return self._pinv
_doc_default_callparams = \ _doc_default_callparams = """\
"""mean : array_like, optional mean : array_like, optional
Mean of the distribution (default zero) Mean of the distribution (default zero)
cov : array_like, optional cov : array_like, optional
Covariance matrix of the distribution (default one) Covariance matrix of the distribution (default one)
allow_singular : bool, optional
Whether to allow a singular covariance matrix. (Default: False)
""" """
_doc_callparams_note = \ _doc_callparams_note = \
@ -224,15 +272,13 @@ class multivariate_normal_gen(object):
The `mean` keyword specifies the mean. The `cov` keyword specifies the The `mean` keyword specifies the mean. The `cov` keyword specifies the
covariance matrix. covariance matrix.
.. versionadded:: 0.14.0
Methods Methods
------- -------
pdf(x, mean=None, cov=1) pdf(x, mean=None, cov=1, allow_singular=False)
Probability density function. Probability density function.
logpdf(x, mean=None, cov=1) logpdf(x, mean=None, cov=1, allow_singular=False)
Log of the probability density function. Log of the probability density function.
rvs(mean=None, cov=1) rvs(mean=None, cov=1, allow_singular=False, size=1)
Draw random samples from a multivariate normal distribution. Draw random samples from a multivariate normal distribution.
entropy() entropy()
Compute the differential entropy of the multivariate normal. Compute the differential entropy of the multivariate normal.
@ -247,7 +293,7 @@ class multivariate_normal_gen(object):
and covariance parameters, returning a "frozen" multivariate normal and covariance parameters, returning a "frozen" multivariate normal
random variable: random variable:
rv = multivariate_normal(mean=None, scale=1) rv = multivariate_normal(mean=None, cov=1, allow_singular=False)
- Frozen object with the same methods but holding the given - Frozen object with the same methods but holding the given
mean and covariance fixed. mean and covariance fixed.
@ -269,8 +315,11 @@ class multivariate_normal_gen(object):
where :math:`\mu` is the mean, :math:`\Sigma` the covariance matrix, where :math:`\mu` is the mean, :math:`\Sigma` the covariance matrix,
and :math:`k` is the dimension of the space where :math:`x` takes values. and :math:`k` is the dimension of the space where :math:`x` takes values.
.. versionadded:: 0.14.0
Examples Examples
-------- --------
>>> import matplotlib.pyplot as plt
>>> from scipy.stats import multivariate_normal >>> from scipy.stats import multivariate_normal
>>> x = np.linspace(0, 5, 10, endpoint=False) >>> x = np.linspace(0, 5, 10, endpoint=False)
>>> y = multivariate_normal.pdf(x, mean=2.5, cov=0.5); y >>> y = multivariate_normal.pdf(x, mean=2.5, cov=0.5); y
@ -294,16 +343,17 @@ class multivariate_normal_gen(object):
def __init__(self): def __init__(self):
self.__doc__ = doccer.docformat(self.__doc__, docdict_params) self.__doc__ = doccer.docformat(self.__doc__, docdict_params)
def __call__(self, mean=None, cov=1): def __call__(self, mean=None, cov=1, allow_singular=False):
""" """
Create a frozen multivariate normal distribution. Create a frozen multivariate normal distribution.
See `multivariate_normal_frozen` for more information. See `multivariate_normal_frozen` for more information.
""" """
return multivariate_normal_frozen(mean, cov) return multivariate_normal_frozen(mean, cov,
allow_singular=allow_singular)
def _logpdf(self, x, mean, prec_U, log_det_cov): def _logpdf(self, x, mean, prec_U, log_det_cov, rank):
""" """
Parameters Parameters
---------- ----------
@ -317,6 +367,8 @@ class multivariate_normal_gen(object):
is the precision matrix, i.e. inverse of the covariance matrix. is the precision matrix, i.e. inverse of the covariance matrix.
log_det_cov : float log_det_cov : float
Logarithm of the determinant of the covariance matrix Logarithm of the determinant of the covariance matrix
rank : int
Rank of the covariance matrix.
Notes Notes
----- -----
@ -324,12 +376,11 @@ class multivariate_normal_gen(object):
called directly; use 'logpdf' instead. called directly; use 'logpdf' instead.
""" """
dim = x.shape[-1]
dev = x - mean dev = x - mean
maha = np.sum(np.square(np.dot(dev, prec_U)), axis=-1) maha = np.sum(np.square(np.dot(dev, prec_U)), axis=-1)
return -0.5 * (dim * _LOG_2PI + log_det_cov + maha) return -0.5 * (rank * _LOG_2PI + log_det_cov + maha)
def logpdf(self, x, mean, cov): def logpdf(self, x, mean, cov, allow_singular=False):
""" """
Log of the multivariate normal probability density function. Log of the multivariate normal probability density function.
@ -351,11 +402,11 @@ class multivariate_normal_gen(object):
""" """
dim, mean, cov = _process_parameters(None, mean, cov) dim, mean, cov = _process_parameters(None, mean, cov)
x = _process_quantiles(x, dim) x = _process_quantiles(x, dim)
prec_U, log_det_cov = _psd_pinv_decomposed_log_pdet(cov) psd = _PSD(cov, allow_singular=allow_singular)
out = self._logpdf(x, mean, prec_U, log_det_cov) out = self._logpdf(x, mean, psd.U, psd.log_pdet, psd.rank)
return _squeeze_output(out) return _squeeze_output(out)
def pdf(self, x, mean, cov): def pdf(self, x, mean, cov, allow_singular=False):
""" """
Multivariate normal probability density function. Multivariate normal probability density function.
@ -377,8 +428,8 @@ class multivariate_normal_gen(object):
""" """
dim, mean, cov = _process_parameters(None, mean, cov) dim, mean, cov = _process_parameters(None, mean, cov)
x = _process_quantiles(x, dim) x = _process_quantiles(x, dim)
prec_U, log_det_cov = _psd_pinv_decomposed_log_pdet(cov) psd = _PSD(cov, allow_singular=allow_singular)
out = np.exp(self._logpdf(x, mean, prec_U, log_det_cov)) out = np.exp(self._logpdf(x, mean, psd.U, psd.log_pdet, psd.rank))
return _squeeze_output(out) return _squeeze_output(out)
def rvs(self, mean=None, cov=1, size=1): def rvs(self, mean=None, cov=1, size=1):
@ -425,13 +476,14 @@ class multivariate_normal_gen(object):
""" """
dim, mean, cov = _process_parameters(None, mean, cov) dim, mean, cov = _process_parameters(None, mean, cov)
return 1/2 * np.log(np.linalg.det(2 * np.pi * np.e * cov)) return 0.5 * np.log(np.linalg.det(2 * np.pi * np.e * cov))
multivariate_normal = multivariate_normal_gen() multivariate_normal = multivariate_normal_gen()
class multivariate_normal_frozen(object): class multivariate_normal_frozen(object):
def __init__(self, mean=None, cov=1): def __init__(self, mean=None, cov=1, allow_singular=False):
""" """
Create a frozen multivariate normal distribution. Create a frozen multivariate normal distribution.
@ -441,6 +493,9 @@ class multivariate_normal_frozen(object):
Mean of the distribution (default zero) Mean of the distribution (default zero)
cov : array_like, optional cov : array_like, optional
Covariance matrix of the distribution (default one) Covariance matrix of the distribution (default one)
allow_singular : bool, optional
If this flag is True then tolerate a singular
covariance matrix (default False).
Examples Examples
-------- --------
@ -456,13 +511,13 @@ class multivariate_normal_frozen(object):
""" """
self.dim, self.mean, self.cov = _process_parameters(None, mean, cov) self.dim, self.mean, self.cov = _process_parameters(None, mean, cov)
self.prec_U, self._log_det_cov = _psd_pinv_decomposed_log_pdet(self.cov) self.cov_info = _PSD(self.cov, allow_singular=allow_singular)
self._mnorm = multivariate_normal_gen() self._mnorm = multivariate_normal_gen()
def logpdf(self, x): def logpdf(self, x):
x = _process_quantiles(x, self.dim) x = _process_quantiles(x, self.dim)
out = self._mnorm._logpdf(x, self.mean, self.prec_U, self._log_det_cov) out = self._mnorm._logpdf(x, self.mean, self.cov_info.U,
self.cov_info.log_pdet, self.cov_info.rank)
return _squeeze_output(out) return _squeeze_output(out)
def pdf(self, x): def pdf(self, x):
@ -481,7 +536,9 @@ class multivariate_normal_frozen(object):
Entropy of the multivariate normal distribution Entropy of the multivariate normal distribution
""" """
return 1/2 * (self.dim * (_LOG_2PI + 1) + self._log_det_cov) log_pdet = self.cov_info.log_pdet
rank = self.cov_info.rank
return 0.5 * (rank * (_LOG_2PI + 1) + log_pdet)
# Set frozen generator docstrings from corresponding docstrings in # Set frozen generator docstrings from corresponding docstrings in
@ -491,3 +548,337 @@ for name in ['logpdf', 'pdf', 'rvs']:
method_frozen = multivariate_normal_frozen.__dict__[name] method_frozen = multivariate_normal_frozen.__dict__[name]
method_frozen.__doc__ = doccer.docformat(method.__doc__, docdict_noparams) method_frozen.__doc__ = doccer.docformat(method.__doc__, docdict_noparams)
method.__doc__ = doccer.docformat(method.__doc__, docdict_params) method.__doc__ = doccer.docformat(method.__doc__, docdict_params)
_dirichlet_doc_default_callparams = """\
alpha : array_like
The concentration parameters. The number of entries determines the
dimensionality of the distribution.
"""
_dirichlet_doc_frozen_callparams = ""
_dirichlet_doc_frozen_callparams_note = \
"""See class definition for a detailed description of parameters."""
dirichlet_docdict_params = {
'_dirichlet_doc_default_callparams': _dirichlet_doc_default_callparams,
}
dirichlet_docdict_noparams = {
'_dirichlet_doc_default_callparams': _dirichlet_doc_frozen_callparams,
}
def _dirichlet_check_parameters(alpha):
alpha = np.asarray(alpha)
if np.min(alpha) <= 0:
raise ValueError("All parameters must be greater than 0")
elif alpha.ndim != 1:
raise ValueError("Parameter vector 'a' must be one dimensional, " +
"but a.shape = %s." % str(alpha.shape))
return alpha
def _dirichlet_check_input(alpha, x):
x = np.asarray(x)
if x.shape[0] + 1 != alpha.shape[0] and x.shape[0] != alpha.shape[0]:
raise ValueError("Vector 'x' must have one entry less then the" +
" parameter vector 'a', but alpha.shape = " +
"%s and " % alpha.shape +
"x.shape = %s." % x.shape)
if x.shape[0] != alpha.shape[0]:
xk = np.array([1 - np.sum(x, 0)])
if xk.ndim == 1:
x = np.append(x, xk)
elif xk.ndim == 2:
x = np.vstack((x, xk))
else:
raise ValueError("The input must be one dimensional or a two "
"dimensional matrix containing the entries.")
if np.min(x) < 0:
raise ValueError("Each entry in 'x' must be greater or equal zero.")
if np.max(x) > 1:
raise ValueError("Each entry in 'x' must be smaller or equal one.")
if (np.abs(np.sum(x, 0) - 1.0) > 10e-10).any():
raise ValueError("The input vector 'x' must lie within the normal " +
"simplex. but sum(x)=%f." % np.sum(x, 0))
return x
def _lnB(alpha):
r"""
Internal helper function to compute the log of the useful quotient
.. math::
B(\alpha) = \frac{\prod_{i=1}{K}\Gamma(\alpha_i)}{\Gamma\left(\sum_{i=1}^{K}\alpha_i\right)}
Parameters
----------
%(_dirichlet_doc_default_callparams)s
Returns
-------
B : scalar
Helper quotient, internal use only
"""
return np.sum(gammaln(alpha)) - gammaln(np.sum(alpha))
class dirichlet_gen(object):
r"""
A Dirichlet random variable.
The `alpha` keyword specifies the concentration parameters of the
distribution.
.. versionadded:: 0.15.0
Methods
-------
pdf(x, alpha)
Probability density function.
logpdf(x, alpha)
Log of the probability density function.
rvs(alpha, size=1)
Draw random samples from a Dirichlet distribution.
mean(alpha)
The mean of the Dirichlet distribution
var(alpha)
The variance of the Dirichlet distribution
entropy(alpha)
Compute the differential entropy of the multivariate normal.
Parameters
----------
x : array_like
Quantiles, with the last axis of `x` denoting the components.
%(_dirichlet_doc_default_callparams)s
Alternatively, the object may be called (as a function) to fix
concentration parameters, returning a "frozen" Dirichlet
random variable:
rv = dirichlet(alpha)
- Frozen object with the same methods but holding the given
concentration parameters fixed.
Notes
-----
Each :math:`\alpha` entry must be positive. The distribution has only
support on the simplex defined by
.. math::
\sum_{i=1}^{K} x_i \le 1
The probability density function for `dirichlet` is
.. math::
f(x) = \frac{1}{\mathrm{B}(\boldsymbol\alpha)} \prod_{i=1}^K x_i^{\alpha_i - 1}
where
.. math::
\mathrm{B}(\boldsymbol\alpha) = \frac{\prod_{i=1}^K \Gamma(\alpha_i)}{\Gamma\bigl(\sum_{i=1}^K \alpha_i\bigr)}
and :math:`\boldsymbol\alpha=(\alpha_1,\ldots,\alpha_K)`, the
concentration parameters and :math:`K` is the dimension of the space
where :math:`x` takes values.
"""
def __init__(self):
self.__doc__ = doccer.docformat(self.__doc__, dirichlet_docdict_params)
def __call__(self, alpha):
return dirichlet_frozen(alpha)
def _logpdf(self, x, alpha):
"""
Parameters
----------
x : ndarray
Points at which to evaluate the log of the probability
density function
%(_dirichlet_doc_default_callparams)s
Notes
-----
As this function does no argument checking, it should not be
called directly; use 'logpdf' instead.
"""
lnB = _lnB(alpha)
return - lnB + np.sum((np.log(x.T) * (alpha - 1)).T, 0)
def logpdf(self, x, alpha):
"""
Log of the Dirichlet probability density function.
Parameters
----------
x : array_like
Quantiles, with the last axis of `x` denoting the components.
%(_dirichlet_doc_default_callparams)s
Returns
-------
pdf : ndarray
Log of the probability density function evaluated at `x`
"""
alpha = _dirichlet_check_parameters(alpha)
x = _dirichlet_check_input(alpha, x)
out = self._logpdf(x, alpha)
return _squeeze_output(out)
def pdf(self, x, alpha):
"""
The Dirichlet probability density function.
Parameters
----------
x : array_like
Quantiles, with the last axis of `x` denoting the components.
%(_dirichlet_doc_default_callparams)s
Returns
-------
pdf : ndarray
The probability density function evaluated at `x`
"""
alpha = _dirichlet_check_parameters(alpha)
x = _dirichlet_check_input(alpha, x)
out = np.exp(self._logpdf(x, alpha))
return _squeeze_output(out)
def mean(self, alpha):
"""
Compute the mean of the dirichlet distribution.
Parameters
----------
%(_dirichlet_doc_default_callparams)s
Returns
-------
mu : scalar
Mean of the Dirichlet distribution
"""
alpha = _dirichlet_check_parameters(alpha)
out = alpha / (np.sum(alpha))
return _squeeze_output(out)
def var(self, alpha):
"""
Compute the variance of the dirichlet distribution.
Parameters
----------
%(_dirichlet_doc_default_callparams)s
Returns
-------
v : scalar
Variance of the Dirichlet distribution
"""
alpha = _dirichlet_check_parameters(alpha)
alpha0 = np.sum(alpha)
out = (alpha * (alpha0 - alpha)) / ((alpha0 * alpha0) * (alpha0 + 1))
return out
def entropy(self, alpha):
"""
Compute the differential entropy of the dirichlet distribution.
Parameters
----------
%(_dirichlet_doc_default_callparams)s
Returns
-------
h : scalar
Entropy of the Dirichlet distribution
"""
alpha = _dirichlet_check_parameters(alpha)
alpha0 = np.sum(alpha)
lnB = _lnB(alpha)
K = alpha.shape[0]
out = lnB + (alpha0 - K) * scipy.special.psi(alpha0) - np.sum(
(alpha - 1) * scipy.special.psi(alpha))
return _squeeze_output(out)
def rvs(self, alpha, size=1):
"""
Draw random samples from a Dirichlet distribution.
Parameters
----------
%(_dirichlet_doc_default_callparams)s
size : integer, optional
Number of samples to draw (default 1).
Returns
-------
rvs : ndarray or scalar
Random variates of size (`size`, `N`), where `N` is the
dimension of the random variable.
"""
alpha = _dirichlet_check_parameters(alpha)
return np.random.dirichlet(alpha, size=size)
dirichlet = dirichlet_gen()
class dirichlet_frozen(object):
def __init__(self, alpha):
self.alpha = _dirichlet_check_parameters(alpha)
self._dirichlet = dirichlet_gen()
def logpdf(self, x):
return self._dirichlet.logpdf(x, self.alpha)
def pdf(self, x):
return self._dirichlet.pdf(x, self.alpha)
def mean(self):
return self._dirichlet.mean(self.alpha)
def var(self):
return self._dirichlet.var(self.alpha)
def entropy(self):
return self._dirichlet.entropy(self.alpha)
def rvs(self, size=1):
return self._dirichlet.rvs(self.alpha, size)
# Set frozen generator docstrings from corresponding docstrings in
# multivariate_normal_gen and fill in default strings in class docstrings
for name in ['logpdf', 'pdf', 'rvs', 'mean', 'var', 'entropy']:
method = dirichlet_gen.__dict__[name]
method_frozen = dirichlet_frozen.__dict__[name]
method_frozen.__doc__ = doccer.docformat(
method.__doc__, dirichlet_docdict_noparams)
method.__doc__ = doccer.docformat(method.__doc__, dirichlet_docdict_params)

@ -0,0 +1,54 @@
"""Functions copypasted from newer versions of numpy.
"""
from __future__ import division, print_function, absolute_import
import warnings
import numpy as np
from scipy.lib._version import NumpyVersion
if NumpyVersion(np.__version__) > '1.7.0.dev':
_assert_warns = np.testing.assert_warns
else:
def _assert_warns(warning_class, func, *args, **kw):
r"""
Fail unless the given callable throws the specified warning.
This definition is copypasted from numpy 1.9.0.dev.
The version in earlier numpy returns None.
Parameters
----------
warning_class : class
The class defining the warning that `func` is expected to throw.
func : callable
The callable to test.
*args : Arguments
Arguments passed to `func`.
**kwargs : Kwargs
Keyword arguments passed to `func`.
Returns
-------
The value returned by `func`.
"""
with warnings.catch_warnings(record=True) as l:
warnings.simplefilter('always')
result = func(*args, **kw)
if not len(l) > 0:
raise AssertionError("No warning raised when calling %s"
% func.__name__)
if not l[0].category is warning_class:
raise AssertionError("First warning for %s is not a "
"%s( is %s)" % (func.__name__, warning_class, l[0]))
return result
if NumpyVersion(np.__version__) >= '1.6.0':
count_nonzero = np.count_nonzero
else:
def count_nonzero(a):
return (a != 0).sum()

@ -246,9 +246,9 @@ def chi2_contingency(observed, correction=True, lambda_=None):
if np.any(expected == 0): if np.any(expected == 0):
# Include one of the positions where expected is zero in # Include one of the positions where expected is zero in
# the exception message. # the exception message.
zeropos = list(np.where(expected == 0)[0]) zeropos = list(zip(*np.where(expected == 0)))[0]
raise ValueError("The internally computed table of expected " raise ValueError("The internally computed table of expected "
"frequencies has a zero element at %s." % zeropos) "frequencies has a zero element at %s." % (zeropos,))
# The degrees of freedom # The degrees of freedom
dof = expected.size - sum(expected.shape) + expected.ndim - 1 dof = expected.size - sum(expected.shape) + expected.ndim - 1

@ -1,6 +1,6 @@
from __future__ import division from __future__ import division
import warnings import warnings
from wafo.wafodata import PlotData from wafo.containers import PlotData
from wafo.misc import findextrema from wafo.misc import findextrema
from scipy import special from scipy import special
import numpy as np import numpy as np

@ -7,7 +7,19 @@
# #
from __future__ import division, print_function, absolute_import from __future__ import division, print_function, absolute_import
from ._distn_infrastructure import entropy, rv_discrete, rv_continuous from ._distn_infrastructure import (entropy, rv_discrete, rv_continuous,
rv_frozen)
from . import _continuous_distns
from . import _discrete_distns
from ._continuous_distns import * from ._continuous_distns import *
from ._discrete_distns import * from ._discrete_distns import *
# For backwards compatibility e.g. pymc expects distributions.__all__.
__all__ = ['entropy', 'rv_discrete', 'rv_continuous']
# Add only the distribution names, not the *_gen names.
__all__ += _continuous_distns._distn_names
__all__ += _discrete_distns._distn_names

@ -7,10 +7,11 @@ Distributions
Author: Per A. Brodtkorb 2008 Author: Per A. Brodtkorb 2008
''' '''
from __future__ import division from __future__ import division, absolute_import
import warnings import warnings
from wafo.plotbackend import plotbackend
from wafo.misc import ecross, findcross from ..plotbackend import plotbackend
from ..misc import ecross, findcross
import numdifftools # @UnresolvedImport import numdifftools # @UnresolvedImport
@ -27,12 +28,10 @@ from numpy import (
from numpy import flatnonzero as nonzero from numpy import flatnonzero as nonzero
__all__ = [ __all__ = ['Profile', 'FitDistribution']
'Profile', 'FitDistribution'
]
floatinfo = np.finfo(float) floatinfo = np.finfo(float)
# arr = atleast_1d
arr = asarray arr = asarray
all = alltrue # @ReservedAssignment all = alltrue # @ReservedAssignment
@ -77,7 +76,8 @@ class rv_frozen(object):
def __init__(self, dist, *args, **kwds): def __init__(self, dist, *args, **kwds):
self.dist = dist self.dist = dist
args, loc, scale = dist._parse_args(*args, **kwds) args, loc, scale = dist._parse_args(*args, **kwds)
if len(args) == dist.numargs - 2: # isinstance(dist, rv_continuous): if len(args) == dist.numargs - 2: #
# if isinstance(dist, rv_continuous):
self.par = args + (loc, scale) self.par = args + (loc, scale)
else: # rv_discrete else: # rv_discrete
self.par = args + (loc,) self.par = args + (loc,)
@ -283,27 +283,25 @@ class Profile(object):
self._par = phatv.copy() self._par = phatv.copy()
# Set up variable to profile and _local_link function # Set up variable to profile and _local_link function
self.profile_x = not self.x == None self.profile_x = self.x is not None
self.profile_logSF = not (self.logSF == None or self.profile_x) self.profile_logSF = not (self.logSF is None or self.profile_x)
self.profile_par = not (self.profile_x or self.profile_logSF) self.profile_par = not (self.profile_x or self.profile_logSF)
if self.link == None: if self.link is None:
self.link = self.fit_dist.dist.link self.link = self.fit_dist.dist.link
if self.profile_par: if self.profile_par:
self._local_link = lambda fix_par, par: fix_par self._local_link = self._par_link
self.xlabel = 'phat(%d)' % self.i_fixed self.xlabel = 'phat(%d)' % self.i_fixed
p_opt = self._par[self.i_fixed] p_opt = self._par[self.i_fixed]
elif self.profile_x: elif self.profile_x:
self.logSF = fit_dist.logsf(self.x) self.logSF = fit_dist.logsf(self.x)
self._local_link = lambda fix_par, par: self.link( self._local_link = self._x_link
fix_par, self.logSF, par, self.i_fixed)
self.xlabel = 'x' self.xlabel = 'x'
p_opt = self.x p_opt = self.x
elif self.profile_logSF: elif self.profile_logSF:
p_opt = self.logSF p_opt = self.logSF
self.x = fit_dist.isf(exp(p_opt)) self.x = fit_dist.isf(exp(p_opt))
self._local_link = lambda fix_par, par: self.link( self._local_link = self._logSF_link
self.x, fix_par, par, self.i_fixed)
self.xlabel = 'log(SF)' self.xlabel = 'log(SF)'
else: else:
raise ValueError( raise ValueError(
@ -315,6 +313,15 @@ class Profile(object):
phatfree = phatv[self.i_free].copy() phatfree = phatv[self.i_free].copy()
self._set_profile(phatfree, p_opt) self._set_profile(phatfree, p_opt)
def _par_link(self, fix_par, par):
return fix_par
def _x_link(self, fix_par, par):
return self.link(fix_par, self.logSF, par, self.i_fixed)
def _logSF_link(self, fix_par, par):
return self.link(self.x, fix_par, par, self.i_fixed)
def _correct_Lmax(self, Lmax): def _correct_Lmax(self, Lmax):
if Lmax > self.Lmax: # foundNewphat = True if Lmax > self.Lmax: # foundNewphat = True
warnings.warn( warnings.warn(
@ -386,7 +393,7 @@ class Profile(object):
''' '''
linspace = numpy.linspace linspace = numpy.linspace
if self.pmin == None or self.pmax == None: if self.pmin is None or self.pmax is None:
pvar = self._get_variance() pvar = self._get_variance()
@ -395,12 +402,12 @@ class Profile(object):
p_crit = (-norm_ppf(self.alpha / 2.0) * p_crit = (-norm_ppf(self.alpha / 2.0) *
sqrt(numpy.ravel(pvar)) * 1.5) sqrt(numpy.ravel(pvar)) * 1.5)
if self.pmin == None: if self.pmin is None:
self.pmin = self._search_pmin(phatfree0, self.pmin = self._search_pmin(phatfree0,
p_opt - 5.0 * p_crit, p_opt) p_opt - 5.0 * p_crit, p_opt)
p_crit_low = (p_opt - self.pmin) / 5 p_crit_low = (p_opt - self.pmin) / 5
if self.pmax == None: if self.pmax is None:
self.pmax = self._search_pmax(phatfree0, self.pmax = self._search_pmax(phatfree0,
p_opt + 5.0 * p_crit, p_opt) p_opt + 5.0 * p_crit, p_opt)
p_crit_up = (self.pmax - p_opt) / 5 p_crit_up = (self.pmax - p_opt) / 5
@ -540,52 +547,6 @@ class Profile(object):
axis.set_xlabel(self.xlabel) axis.set_xlabel(self.xlabel)
def _discretize_adaptive(fun, a, b, tol=0.005, n=5):
'''
Automatic discretization of function, adaptive gridding.
'''
tiny = floatinfo.tiny
n += (np.mod(n, 2) == 0) # make sure n is odd
x = np.linspace(a, b, n)
fx = fun(x)
n2 = (n - 1) / 2
erri = np.hstack((np.zeros((n2, 1)), np.ones((n2, 1)))).ravel()
err = erri.max()
err0 = np.inf
# while (err != err0 and err > tol and n < nmax):
for j in range(50):
if err != err0 and np.any(erri > tol):
err0 = err
# find top errors
I, = np.where(erri > tol)
# double the sample rate in intervals with the most error
y = (np.vstack(((x[I] + x[I - 1]) / 2,
(x[I + 1] + x[I]) / 2)).T).ravel()
fy = fun(y)
fy0 = np.interp(y, x, fx)
erri = 0.5 * (abs((fy0 - fy) / (abs(fy0 + fy) + tiny)))
err = erri.max()
x = np.hstack((x, y))
I = x.argsort()
x = x[I]
erri = np.hstack((zeros(len(fx)), erri))[I]
fx = np.hstack((fx, fy))[I]
else:
break
else:
warnings.warn('Recursion level limit reached j=%d' % j)
return x, fx
# class to fit given distribution to data
class FitDistribution(rv_frozen): class FitDistribution(rv_frozen):
''' '''
@ -867,7 +828,7 @@ class FitDistribution(rv_frozen):
def _compute_cov(self): def _compute_cov(self):
'''Compute covariance '''Compute covariance
''' '''
somefixed = (self.par_fix != None) and any(isfinite(self.par_fix)) somefixed = (self.par_fix is not None) and any(isfinite(self.par_fix))
# H1 = numpy.asmatrix(self.dist.hessian_nnlf(self.par, self.data)) # H1 = numpy.asmatrix(self.dist.hessian_nnlf(self.par, self.data))
H = numpy.asmatrix(self.dist.hessian_nlogps(self.par, self.data)) H = numpy.asmatrix(self.dist.hessian_nlogps(self.par, self.data))
self.H = H self.H = H
@ -1000,7 +961,7 @@ class FitDistribution(rv_frozen):
self.plotresprb() self.plotresprb()
fixstr = '' fixstr = ''
if not self.par_fix == None: if self.par_fix is not None:
numfix = len(self.i_fixed) numfix = len(self.i_fixed)
if numfix > 0: if numfix > 0:
format0 = ', '.join(['%d'] * numfix) format0 = ', '.join(['%d'] * numfix)
@ -1160,7 +1121,7 @@ class FitDistribution(rv_frozen):
n = len(x) n = len(x)
np1 = n + 1 np1 = n + 1
if unknown_numpar == None: if unknown_numpar is None:
k = len(theta) k = len(theta)
else: else:
k = unknown_numpar k = unknown_numpar

@ -93,6 +93,10 @@ class gaussian_kde(object):
high_bounds. high_bounds.
kde.integrate_kde(other_kde) : float kde.integrate_kde(other_kde) : float
Integrate two kernel density estimates multiplied together. Integrate two kernel density estimates multiplied together.
kde.pdf(points) : ndarray
Alias for ``kde.evaluate(points)``.
kde.logpdf(points) : ndarray
Equivalent to ``np.log(kde.evaluate(points))``.
kde.resample(size=None) : ndarray kde.resample(size=None) : ndarray
Randomly sample a dataset from the estimated pdf. Randomly sample a dataset from the estimated pdf.
kde.set_bandwidth(bw_method='scott') : None kde.set_bandwidth(bw_method='scott') : None
@ -106,7 +110,6 @@ class gaussian_kde(object):
to provide a different method, or set it through a call to to provide a different method, or set it through a call to
`kde.set_bandwidth`. `kde.set_bandwidth`.
Notes Notes
----- -----
Bandwidth selection strongly influences the estimate obtained from the KDE Bandwidth selection strongly influences the estimate obtained from the KDE
@ -122,7 +125,7 @@ class gaussian_kde(object):
with ``n`` the number of data points and ``d`` the number of dimensions. with ``n`` the number of data points and ``d`` the number of dimensions.
Silverman's Rule [2]_, implemented as `silverman_factor`, is:: Silverman's Rule [2]_, implemented as `silverman_factor`, is::
n * (d + 2) / 4.)**(-1. / (d + 4)). (n * (d + 2) / 4.)**(-1. / (d + 4)).
Good general descriptions of kernel density estimation can be found in [1]_ Good general descriptions of kernel density estimation can be found in [1]_
and [2]_, the mathematics for this multi-dimensional implementation can be and [2]_, the mathematics for this multi-dimensional implementation can be
@ -388,11 +391,12 @@ class gaussian_kde(object):
large = other large = other
sum_cov = small.covariance + large.covariance sum_cov = small.covariance + large.covariance
sum_cov_chol = linalg.cho_factor(sum_cov)
result = 0.0 result = 0.0
for i in range(small.n): for i in range(small.n):
mean = small.dataset[:, i, newaxis] mean = small.dataset[:, i, newaxis]
diff = large.dataset - mean diff = large.dataset - mean
tdiff = dot(linalg.inv(sum_cov), diff) tdiff = linalg.cho_solve(sum_cov_chol, diff)
energies = sum(diff * tdiff, axis=0) / 2.0 energies = sum(diff * tdiff, axis=0) / 2.0
result += sum(exp(-energies), axis=0) result += sum(exp(-energies), axis=0)
@ -511,3 +515,27 @@ class gaussian_kde(object):
self.covariance = self._data_covariance * self.factor**2 self.covariance = self._data_covariance * self.factor**2
self.inv_cov = self._data_inv_cov / self.factor**2 self.inv_cov = self._data_inv_cov / self.factor**2
self._norm_factor = sqrt(linalg.det(2*pi*self.covariance)) * self.n self._norm_factor = sqrt(linalg.det(2*pi*self.covariance)) * self.n
def pdf(self, x):
"""
Evaluate the estimated pdf on a provided set of points.
Notes
-----
This is an alias for `gaussian_kde.evaluate`. See the ``evaluate``
docstring for more details.
"""
return self.evaluate(x)
def logpdf(self, x):
"""
Evaluate the log of the estimated pdf on a provided set of points.
Notes
-----
See `gaussian_kde.evaluate` for more details; this method simply
returns ``np.log(gaussian_kde.evaluate(x))``.
"""
return np.log(self.evaluate(x))

@ -8,29 +8,29 @@ import math
import warnings import warnings
import numpy as np import numpy as np
from numpy import (isscalar, r_, log, sum, around, unique, asarray, zeros, from numpy import (isscalar, r_, log, sum, around, unique, asarray,
arange, sort, amin, amax, any, atleast_1d, sqrt, ceil, zeros, arange, sort, amin, amax, any, atleast_1d, sqrt, ceil,
floor, array, poly1d, compress, not_equal, pi, exp, ravel, floor, array, poly1d, compress, not_equal, pi, exp, ravel, angle)
angle)
from numpy.testing.decorators import setastest from numpy.testing.decorators import setastest
from scipy.lib.six import string_types from scipy.lib.six import string_types
from ._numpy_compat import count_nonzero
from scipy import optimize from scipy import optimize
from scipy import special from scipy import special
from wafo.stats import statlib from . import statlib
from wafo.stats import stats from . import stats
from wafo.stats.stats import find_repeats from .stats import find_repeats
from wafo.stats import distributions from .contingency import chi2_contingency
from wafo.stats._distn_infrastructure import rv_generic from . import distributions
from ._distn_infrastructure import rv_generic
__all__ = ['mvsdist', __all__ = ['mvsdist',
'bayes_mvs', 'kstat', 'kstatvar', 'probplot', 'ppcc_max', 'bayes_mvs', 'kstat', 'kstatvar', 'probplot', 'ppcc_max', 'ppcc_plot',
'ppcc_plot',
'boxcox_llf', 'boxcox', 'boxcox_normmax', 'boxcox_normplot', 'boxcox_llf', 'boxcox', 'boxcox_normmax', 'boxcox_normplot',
'shapiro', 'anderson', 'ansari', 'bartlett', 'levene', 'binom_test', 'shapiro', 'anderson', 'ansari', 'bartlett', 'levene', 'binom_test',
'fligner', 'mood', 'wilcoxon', 'fligner', 'mood', 'wilcoxon', 'median_test',
'pdf_fromgamma', 'circmean', 'circvar', 'circstd', 'pdf_fromgamma', 'circmean', 'circvar', 'circstd', 'anderson_ksamp'
] ]
@ -80,8 +80,7 @@ def bayes_mvs(data, alpha=0.90):
""" """
res = mvsdist(data) res = mvsdist(data)
if alpha >= 1 or alpha <= 0: if alpha >= 1 or alpha <= 0:
raise ValueError( raise ValueError("0 < alpha < 1 is required, but alpha=%s was given." % alpha)
"0 < alpha < 1 is required, but alpha=%s was given." % alpha)
return tuple((x.mean(), x.interval(alpha)) for x in res) return tuple((x.mean(), x.interval(alpha)) for x in res)
@ -138,8 +137,7 @@ def mvsdist(data):
C = x.var() C = x.var()
if (n > 1000): # gaussian approximations for large n if (n > 1000): # gaussian approximations for large n
mdist = distributions.norm(loc=xbar, scale=math.sqrt(C/n)) mdist = distributions.norm(loc=xbar, scale=math.sqrt(C/n))
sdist = distributions.norm( sdist = distributions.norm(loc=math.sqrt(C), scale=math.sqrt(C/(2.*n)))
loc=math.sqrt(C), scale=math.sqrt(C / (2. * n)))
vdist = distributions.norm(loc=C, scale=math.sqrt(2.0/n)*C) vdist = distributions.norm(loc=C, scale=math.sqrt(2.0/n)*C)
else: else:
nm1 = n-1 nm1 = n-1
@ -422,7 +420,7 @@ def probplot(x, sparams=(), dist='norm', fit=True, plot=None):
osr = sort(x) osr = sort(x)
if fit or (plot is not None): if fit or (plot is not None):
# perform a linear fit. # perform a linear fit.
slope, intercept, r, _prob, _sterrest = stats.linregress(osm, osr) slope, intercept, r, prob, sterrest = stats.linregress(osm, osr)
if plot is not None: if plot is not None:
plot.plot(osm, osr, 'bo', osm, slope*osm + intercept, 'r-') plot.plot(osm, osr, 'bo', osm, slope*osm + intercept, 'r-')
@ -474,11 +472,10 @@ def ppcc_max(x, brack=(0.0, 1.0), dist='tukeylambda'):
# correlation # correlation
def tempfunc(shape, mi, yvals, func): def tempfunc(shape, mi, yvals, func):
xvals = func(mi, shape) xvals = func(mi, shape)
r, _prob = stats.pearsonr(xvals, yvals) r, prob = stats.pearsonr(xvals, yvals)
return 1-r return 1-r
return optimize.brent(tempfunc, brack=brack, return optimize.brent(tempfunc, brack=brack, args=(osm_uniform, osr, dist.ppf))
args=(osm_uniform, osr, dist.ppf))
def ppcc_plot(x,a,b,dist='tukeylambda', plot=None, N=80): def ppcc_plot(x,a,b,dist='tukeylambda', plot=None, N=80):
@ -493,7 +490,7 @@ def ppcc_plot(x, a, b, dist='tukeylambda', plot=None, N=80):
ppcc = svals*0.0 ppcc = svals*0.0
k = 0 k = 0
for sval in svals: for sval in svals:
_r1, r2 = probplot(x, sval, dist=dist, fit=1) r1,r2 = probplot(x,sval,dist=dist,fit=1)
ppcc[k] = r2[-1] ppcc[k] = r2[-1]
k += 1 k += 1
if plot is not None: if plot is not None:
@ -724,7 +721,7 @@ def boxcox(x, lmbda=None, alpha=None):
raise ValueError("Data must be positive.") raise ValueError("Data must be positive.")
if lmbda is not None: # single transformation if lmbda is not None: # single transformation
return special.boxcox(x, lmbda) # @UndefinedVariable return special.boxcox(x, lmbda)
# If lmbda=None, find the lmbda that maximizes the log-likelihood function. # If lmbda=None, find the lmbda that maximizes the log-likelihood function.
lmax = boxcox_normmax(x, method='mle') lmax = boxcox_normmax(x, method='mle')
@ -815,7 +812,7 @@ def boxcox_normmax(x, brack=(-2.0, 2.0), method='pearsonr'):
# correlation. # correlation.
y = boxcox(samps, lmbda) y = boxcox(samps, lmbda)
yvals = np.sort(y) yvals = np.sort(y)
r, _prob = stats.pearsonr(xvals, yvals) r, prob = stats.pearsonr(xvals, yvals)
return 1 - r return 1 - r
return optimize.brent(_eval_pearsonr, brack=brack, args=(xvals, x)) return optimize.brent(_eval_pearsonr, brack=brack, args=(xvals, x))
@ -836,7 +833,7 @@ def boxcox_normmax(x, brack=(-2.0, 2.0), method='pearsonr'):
methods = {'pearsonr': _pearsonr, methods = {'pearsonr': _pearsonr,
'mle': _mle, 'mle': _mle,
'all': _all} 'all': _all}
if not method in methods.keys(): if method not in methods.keys():
raise ValueError("Method %s not recognized." % method) raise ValueError("Method %s not recognized." % method)
optimfunc = methods[method] optimfunc = methods[method]
@ -994,7 +991,7 @@ def shapiro(x, a=None, reta=False):
init = 1 init = 1
y = sort(x) y = sort(x)
a, w, pw, ifault = statlib.swilk(y, a[:N//2], init) a, w, pw, ifault = statlib.swilk(y, a[:N//2], init)
if not ifault in [0, 2]: if ifault not in [0,2]:
warnings.warn(str(ifault)) warnings.warn(str(ifault))
if N > 5000: if N > 5000:
warnings.warn("p-value may not be accurate for N > 5000.") warnings.warn("p-value may not be accurate for N > 5000.")
@ -1022,7 +1019,7 @@ def anderson(x, dist='norm'):
Anderson-Darling test for data coming from a particular distribution Anderson-Darling test for data coming from a particular distribution
The Anderson-Darling test is a modification of the Kolmogorov- The Anderson-Darling test is a modification of the Kolmogorov-
Smirnov test kstest_ for the null hypothesis that a sample is Smirnov test `kstest` for the null hypothesis that a sample is
drawn from a population that follows a particular distribution. drawn from a population that follows a particular distribution.
For the Anderson-Darling test, the critical values depend on For the Anderson-Darling test, the critical values depend on
which distribution is being tested against. This function works which distribution is being tested against. This function works
@ -1083,7 +1080,7 @@ def anderson(x, dist='norm'):
pp. 591-595. pp. 591-595.
""" """
if not dist in ['norm', 'expon', 'gumbel', 'extreme1', 'logistic']: if dist not in ['norm','expon','gumbel','extreme1','logistic']:
raise ValueError("Invalid distribution; dist must be 'norm', " raise ValueError("Invalid distribution; dist must be 'norm', "
"'expon', 'gumbel', 'extreme1' or 'logistic'.") "'expon', 'gumbel', 'extreme1' or 'logistic'.")
y = sort(x) y = sort(x)
@ -1117,13 +1114,13 @@ def anderson(x, dist='norm'):
else: # (dist == 'gumbel') or (dist == 'extreme1'): else: # (dist == 'gumbel') or (dist == 'extreme1'):
# the following is incorrect, see ticket:1097 # the following is incorrect, see ticket:1097
#def fixedsolve(th,xj,N): #def fixedsolve(th,xj,N):
## val = stats.sum(xj)*1.0/N # val = stats.sum(xj)*1.0/N
## tmp = exp(-xj/th) # tmp = exp(-xj/th)
## term = sum(xj*tmp,axis=0) # term = sum(xj*tmp,axis=0)
## term /= sum(tmp,axis=0) # term /= sum(tmp,axis=0)
# return val - term # return val - term
## s = optimize.fixed_point(fixedsolve, 1.0, args=(x,N),xtol=1e-5) #s = optimize.fixed_point(fixedsolve, 1.0, args=(x,N),xtol=1e-5)
## xbar = -s*log(sum(exp(-x/s),axis=0)*1.0/N) #xbar = -s*log(sum(exp(-x/s),axis=0)*1.0/N)
xbar, s = distributions.gumbel_l.fit(x) xbar, s = distributions.gumbel_l.fit(x)
w = (y-xbar)/s w = (y-xbar)/s
z = distributions.gumbel_l.cdf(w) z = distributions.gumbel_l.cdf(w)
@ -1136,6 +1133,229 @@ def anderson(x, dist='norm'):
return A2, critical, sig return A2, critical, sig
def _anderson_ksamp_midrank(samples, Z, Zstar, k, n, N):
"""
Compute A2akN equation 7 of Scholz and Stephens.
Parameters
----------
samples : sequence of 1-D array_like
Array of sample arrays.
Z : array_like
Sorted array of all observations.
Zstar : array_like
Sorted array of unique observations.
k : int
Number of samples.
n : array_like
Number of observations in each sample.
N : int
Total number of observations.
Returns
-------
A2aKN : float
The A2aKN statistics of Scholz and Stephens 1987.
"""
A2akN = 0.
Z_ssorted_left = Z.searchsorted(Zstar, 'left')
if N == Zstar.size:
lj = 1.
else:
lj = Z.searchsorted(Zstar, 'right') - Z_ssorted_left
Bj = Z_ssorted_left + lj / 2.
for i in arange(0, k):
s = np.sort(samples[i])
s_ssorted_right = s.searchsorted(Zstar, side='right')
Mij = s_ssorted_right.astype(np.float)
fij = s_ssorted_right - s.searchsorted(Zstar, 'left')
Mij -= fij / 2.
inner = lj / float(N) * (N * Mij - Bj * n[i])**2 / \
(Bj * (N - Bj) - N * lj / 4.)
A2akN += inner.sum() / n[i]
A2akN *= (N - 1.) / N
return A2akN
def _anderson_ksamp_right(samples, Z, Zstar, k, n, N):
"""
Compute A2akN equation 6 of Scholz & Stephens.
Parameters
----------
samples : sequence of 1-D array_like
Array of sample arrays.
Z : array_like
Sorted array of all observations.
Zstar : array_like
Sorted array of unique observations.
k : int
Number of samples.
n : array_like
Number of observations in each sample.
N : int
Total number of observations.
Returns
-------
A2KN : float
The A2KN statistics of Scholz and Stephens 1987.
"""
A2kN = 0.
lj = Z.searchsorted(Zstar[:-1], 'right') - Z.searchsorted(Zstar[:-1],
'left')
Bj = lj.cumsum()
for i in arange(0, k):
s = np.sort(samples[i])
Mij = s.searchsorted(Zstar[:-1], side='right')
inner = lj / float(N) * (N * Mij - Bj * n[i])**2 / (Bj * (N - Bj))
A2kN += inner.sum() / n[i]
return A2kN
def anderson_ksamp(samples, midrank=True):
"""The Anderson-Darling test for k-samples.
The k-sample Anderson-Darling test is a modification of the
one-sample Anderson-Darling test. It tests the null hypothesis
that k-samples are drawn from the same population without having
to specify the distribution function of that population. The
critical values depend on the number of samples.
Parameters
----------
samples : sequence of 1-D array_like
Array of sample data in arrays.
midrank : bool, optional
Type of Anderson-Darling test which is computed. Default
(True) is the midrank test applicable to continuous and
discrete populations. If False, the right side empirical
distribution is used.
Returns
-------
A2 : float
Normalized k-sample Anderson-Darling test statistic.
critical : array
The critical values for significance levels 25%, 10%, 5%, 2.5%, 1%.
p : float
An approximate significance level at which the null hypothesis for the
provided samples can be rejected.
Raises
------
ValueError
If less than 2 samples are provided, a sample is empty, or no
distinct observations are in the samples.
See Also
--------
ks_2samp : 2 sample Kolmogorov-Smirnov test
anderson : 1 sample Anderson-Darling test
Notes
-----
[1]_ Defines three versions of the k-sample Anderson-Darling test:
one for continuous distributions and two for discrete
distributions, in which ties between samples may occur. The
default of this routine is to compute the version based on the
midrank empirical distribution function. This test is applicable
to continuous and discrete data. If midrank is set to False, the
right side empirical distribution is used for a test for discrete
data. According to [1]_, the two discrete test statistics differ
only slightly if a few collisions due to round-off errors occur in
the test not adjusted for ties between samples.
.. versionadded:: 0.14.0
References
----------
.. [1] Scholz, F. W and Stephens, M. A. (1987), K-Sample
Anderson-Darling Tests, Journal of the American Statistical
Association, Vol. 82, pp. 918-924.
Examples
--------
>>> from scipy import stats
>>> np.random.seed(314159)
The null hypothesis that the two random samples come from the same
distribution can be rejected at the 5% level because the returned
test value is greater than the critical value for 5% (1.961) but
not at the 2.5% level. The interpolation gives an approximate
significance level of 3.1%:
>>> stats.anderson_ksamp([np.random.normal(size=50),
... np.random.normal(loc=0.5, size=30)])
(2.4615796189876105,
array([ 0.325, 1.226, 1.961, 2.718, 3.752]),
0.03134990135800783)
The null hypothesis cannot be rejected for three samples from an
identical distribution. The approximate p-value (87%) has to be
computed by extrapolation and may not be very accurate:
>>> stats.anderson_ksamp([np.random.normal(size=50),
... np.random.normal(size=30), np.random.normal(size=20)])
(-0.73091722665244196,
array([ 0.44925884, 1.3052767 , 1.9434184 , 2.57696569, 3.41634856]),
0.8789283903979661)
"""
k = len(samples)
if (k < 2):
raise ValueError("anderson_ksamp needs at least two samples")
samples = list(map(np.asarray, samples))
Z = np.sort(np.hstack(samples))
N = Z.size
Zstar = np.unique(Z)
if Zstar.size < 2:
raise ValueError("anderson_ksamp needs more than one distinct "
"observation")
n = np.array([sample.size for sample in samples])
if any(n == 0):
raise ValueError("anderson_ksamp encountered sample without "
"observations")
if midrank:
A2kN = _anderson_ksamp_midrank(samples, Z, Zstar, k, n, N)
else:
A2kN = _anderson_ksamp_right(samples, Z, Zstar, k, n, N)
h = (1. / arange(1, N)).sum()
H = (1. / n).sum()
g = 0
for l in arange(1, N-1):
inner = np.array([1. / ((N - l) * m) for m in arange(l+1, N)])
g += inner.sum()
a = (4*g - 6) * (k - 1) + (10 - 6*g)*H
b = (2*g - 4)*k**2 + 8*h*k + (2*g - 14*h - 4)*H - 8*h + 4*g - 6
c = (6*h + 2*g - 2)*k**2 + (4*h - 4*g + 6)*k + (2*h - 6)*H + 4*h
d = (2*h + 6)*k**2 - 4*h*k
sigmasq = (a*N**3 + b*N**2 + c*N + d) / ((N - 1.) * (N - 2.) * (N - 3.))
m = k - 1
A2 = (A2kN - m) / math.sqrt(sigmasq)
# The b_i values are the interpolation coefficients from Table 2
# of Scholz and Stephens 1987
b0 = np.array([0.675, 1.281, 1.645, 1.96, 2.326])
b1 = np.array([-0.245, 0.25, 0.678, 1.149, 1.822])
b2 = np.array([-0.105, -0.305, -0.362, -0.391, -0.396])
critical = b0 + b1 / math.sqrt(m) + b2 / m
pf = np.polyfit(critical, log(np.array([0.25, 0.1, 0.05, 0.025, 0.01])), 2)
if A2 < critical.min() or A2 > critical.max():
warnings.warn("approximate p-value will be computed by extrapolation")
p = math.exp(np.polyval(pf, A2))
return A2, critical, p
def ansari(x,y): def ansari(x,y):
""" """
Perform the Ansari-Bradley test for equal scale parameters Perform the Ansari-Bradley test for equal scale parameters
@ -1191,7 +1411,7 @@ def ansari(x, y):
if repeats and ((m < 55) or (n < 55)): if repeats and ((m < 55) or (n < 55)):
warnings.warn("Ties preclude use of exact statistic.") warnings.warn("Ties preclude use of exact statistic.")
if exact: if exact:
astart, a1, _ifault = statlib.gscale(n, m) astart, a1, ifault = statlib.gscale(n,m)
ind = AB-astart ind = AB-astart
total = sum(a1,axis=0) total = sum(a1,axis=0)
if ind < len(a1)/2.0: if ind < len(a1)/2.0:
@ -1219,11 +1439,9 @@ def ansari(x, y):
# compute sum(tj * rj**2,axis=0) # compute sum(tj * rj**2,axis=0)
fac = sum(symrank**2,axis=0) fac = sum(symrank**2,axis=0)
if N % 2: # N odd if N % 2: # N odd
varAB = m * n * \ varAB = m*n*(16*N*fac-(N+1)**4)/(16.0 * N**2 * (N-1))
(16 * N * fac - (N + 1) ** 4) / (16.0 * N ** 2 * (N - 1))
else: # N even else: # N even
varAB = m * n * \ varAB = m*n*(16*fac-N*(N+2)**2)/(16.0 * N * (N-1))
(16 * fac - N * (N + 2) ** 2) / (16.0 * N * (N - 1))
z = (AB - mnAB)/sqrt(varAB) z = (AB - mnAB)/sqrt(varAB)
pval = distributions.norm.sf(abs(z)) * 2.0 pval = distributions.norm.sf(abs(z)) * 2.0
return AB, pval return AB, pval
@ -1236,7 +1454,7 @@ def bartlett(*args):
Bartlett's test tests the null hypothesis that all input samples Bartlett's test tests the null hypothesis that all input samples
are from populations with equal variances. For samples are from populations with equal variances. For samples
from significantly non-normal populations, Levene's test from significantly non-normal populations, Levene's test
`levene`_ is more robust. `levene` is more robust.
Parameters Parameters
---------- ----------
@ -1269,8 +1487,7 @@ def bartlett(*args):
Ntot = sum(Ni,axis=0) Ntot = sum(Ni,axis=0)
spsq = sum((Ni-1)*ssq,axis=0)/(1.0*(Ntot-k)) spsq = sum((Ni-1)*ssq,axis=0)/(1.0*(Ntot-k))
numer = (Ntot*1.0-k)*log(spsq) - sum((Ni-1.0)*log(ssq),axis=0) numer = (Ntot*1.0-k)*log(spsq) - sum((Ni-1.0)*log(ssq),axis=0)
denom = 1.0 + (1.0 / (3 * (k - 1))) * \ denom = 1.0 + (1.0/(3*(k-1)))*((sum(1.0/(Ni-1.0),axis=0))-1.0/(Ntot-k))
((sum(1.0 / (Ni - 1.0), axis=0)) - 1.0 / (Ntot - k))
T = numer / denom T = numer / denom
pval = distributions.chi2.sf(T,k-1) # 1 - cdf pval = distributions.chi2.sf(T,k-1) # 1 - cdf
return T, pval return T, pval
@ -1328,8 +1545,8 @@ def levene(*args, **kwds):
proportiontocut = 0.05 proportiontocut = 0.05
for kw, value in kwds.items(): for kw, value in kwds.items():
if kw not in ['center', 'proportiontocut']: if kw not in ['center', 'proportiontocut']:
raise TypeError( raise TypeError("levene() got an unexpected keyword "
"levene() got an unexpected keyword argument '%s'" % kw) "argument '%s'" % kw)
if kw == 'center': if kw == 'center':
center = value center = value
else: else:
@ -1341,7 +1558,7 @@ def levene(*args, **kwds):
Ni = zeros(k) Ni = zeros(k)
Yci = zeros(k, 'd') Yci = zeros(k, 'd')
if not center in ['mean', 'median', 'trimmed']: if center not in ['mean', 'median', 'trimmed']:
raise ValueError("Keyword argument <center> must be 'mean', 'median'" raise ValueError("Keyword argument <center> must be 'mean', 'median'"
+ "or 'trimmed'.") + "or 'trimmed'.")
@ -1439,13 +1656,13 @@ def binom_test(x, n=None, p=0.5):
elif (x < p*n): elif (x < p*n):
i = np.arange(np.ceil(p*n), n+1) i = np.arange(np.ceil(p*n), n+1)
y = np.sum(distributions.binom.pmf(i, n, p) <= d*rerr, axis=0) y = np.sum(distributions.binom.pmf(i, n, p) <= d*rerr, axis=0)
pval = distributions.binom.cdf( pval = (distributions.binom.cdf(x, n, p) +
x, n, p) + distributions.binom.sf(n - y, n, p) distributions.binom.sf(n-y, n, p))
else: else:
i = np.arange(np.floor(p*n) + 1) i = np.arange(np.floor(p*n) + 1)
y = np.sum(distributions.binom.pmf(i, n, p) <= d*rerr, axis=0) y = np.sum(distributions.binom.pmf(i, n, p) <= d*rerr, axis=0)
pval = distributions.binom.cdf( pval = (distributions.binom.cdf(y-1, n, p) +
y - 1, n, p) + distributions.binom.sf(x - 1, n, p) distributions.binom.sf(x-1, n, p))
return min(1.0, pval) return min(1.0, pval)
@ -1473,11 +1690,10 @@ def fligner(*args, **kwds):
Parameters Parameters
---------- ----------
sample1, sample2, ... : array_like sample1, sample2, ... : array_like
arrays of sample data. Need not be the same length Arrays of sample data. Need not be the same length.
center : {'mean', 'median', 'trimmed'}, optional center : {'mean', 'median', 'trimmed'}, optional
keyword argument controlling which function of the data Keyword argument controlling which function of the data is used in
is used in computing the test statistic. The default computing the test statistic. The default is 'median'.
is 'median'.
proportiontocut : float, optional proportiontocut : float, optional
When `center` is 'trimmed', this gives the proportion of data points When `center` is 'trimmed', this gives the proportion of data points
to cut from each end. (See `scipy.stats.trim_mean`.) to cut from each end. (See `scipy.stats.trim_mean`.)
@ -1486,15 +1702,15 @@ def fligner(*args, **kwds):
Returns Returns
------- -------
Xsq : float Xsq : float
the test statistic The test statistic.
p-value : float p-value : float
the p-value for the hypothesis test The p-value for the hypothesis test.
Notes Notes
----- -----
As with Levene's test there are three variants As with Levene's test there are three variants of Fligner's test that
of Fligner's test that differ by the measure of central differ by the measure of central tendency used in the test. See `levene`
tendency used in the test. See `levene` for more information. for more information.
References References
---------- ----------
@ -1510,8 +1726,8 @@ def fligner(*args, **kwds):
proportiontocut = 0.05 proportiontocut = 0.05
for kw, value in kwds.items(): for kw, value in kwds.items():
if kw not in ['center', 'proportiontocut']: if kw not in ['center', 'proportiontocut']:
raise TypeError( raise TypeError("fligner() got an unexpected keyword "
"fligner() got an unexpected keyword argument '%s'" % kw) "argument '%s'" % kw)
if kw == 'center': if kw == 'center':
center = value center = value
else: else:
@ -1521,7 +1737,7 @@ def fligner(*args, **kwds):
if k < 2: if k < 2:
raise ValueError("Must enter at least two input sample vectors.") raise ValueError("Must enter at least two input sample vectors.")
if not center in ['mean', 'median', 'trimmed']: if center not in ['mean','median','trimmed']:
raise ValueError("Keyword argument <center> must be 'mean', 'median'" raise ValueError("Keyword argument <center> must be 'mean', 'median'"
+ "or 'trimmed'.") + "or 'trimmed'.")
@ -1578,7 +1794,7 @@ def mood(x, y, axis=0):
------- -------
z : scalar or ndarray z : scalar or ndarray
The z-score for the hypothesis test. For 1-D inputs a scalar is The z-score for the hypothesis test. For 1-D inputs a scalar is
returned; returned.
p-value : scalar ndarray p-value : scalar ndarray
The p-value for the hypothesis test. The p-value for the hypothesis test.
@ -1631,8 +1847,7 @@ def mood(x, y, axis=0):
axis = 0 axis = 0
# Determine shape of the result arrays # Determine shape of the result arrays
res_shape = tuple([x.shape[ax] res_shape = tuple([x.shape[ax] for ax in range(len(x.shape)) if ax != axis])
for ax in range(len(x.shape)) if ax != axis])
if not (res_shape == tuple([y.shape[ax] for ax in range(len(y.shape)) if if not (res_shape == tuple([y.shape[ax] for ax in range(len(y.shape)) if
ax != axis])): ax != axis])):
raise ValueError("Dimensions of x and y on all axes except `axis` " raise ValueError("Dimensions of x and y on all axes except `axis` "
@ -1732,7 +1947,7 @@ def wilcoxon(x, y=None, zero_method="wilcox", correction=False):
""" """
if not zero_method in ["wilcox", "pratt", "zsplit"]: if zero_method not in ["wilcox", "pratt", "zsplit"]:
raise ValueError("Zero method should be either 'wilcox' \ raise ValueError("Zero method should be either 'wilcox' \
or 'pratt' or 'zsplit'") or 'pratt' or 'zsplit'")
@ -1745,13 +1960,11 @@ def wilcoxon(x, y=None, zero_method="wilcox", correction=False):
d = x-y d = x-y
if zero_method == "wilcox": if zero_method == "wilcox":
# Keep all non-zero differences d = compress(not_equal(d, 0), d, axis=-1) # Keep all non-zero differences
d = compress(not_equal(d, 0), d, axis=-1)
count = len(d) count = len(d)
if (count < 10): if (count < 10):
warnings.warn( warnings.warn("Warning: sample size too small for normal approximation.")
"Warning: sample size too small for normal approximation.")
r = stats.rankdata(abs(d)) r = stats.rankdata(abs(d))
r_plus = sum((d > 0) * r, axis=0) r_plus = sum((d > 0) * r, axis=0)
r_minus = sum((d < 0) * r, axis=0) r_minus = sum((d < 0) * r, axis=0)
@ -1768,7 +1981,7 @@ def wilcoxon(x, y=None, zero_method="wilcox", correction=False):
if zero_method == "pratt": if zero_method == "pratt":
r = r[d != 0] r = r[d != 0]
_replist, repnum = find_repeats(r) replist, repnum = find_repeats(r)
if repnum.size != 0: if repnum.size != 0:
# Correction for repeated elements. # Correction for repeated elements.
se -= 0.5 * (repnum * (repnum * repnum - 1)).sum() se -= 0.5 * (repnum * (repnum * repnum - 1)).sum()
@ -1780,6 +1993,211 @@ def wilcoxon(x, y=None, zero_method="wilcox", correction=False):
return T, prob return T, prob
@setastest(False)
def median_test(*args, **kwds):
"""
Mood's median test.
Test that two or more samples come from populations with the same median.
Let ``n = len(args)`` be the number of samples. The "grand median" of
all the data is computed, and a contingency table is formed by
classifying the values in each sample as being above or below the grand
median. The contingency table, along with `correction` and `lambda_`,
are passed to `scipy.stats.chi2_contingency` to compute the test statistic
and p-value.
Parameters
----------
sample1, sample2, ... : array_like
The set of samples. There must be at least two samples.
Each sample must be a one-dimensional sequence containing at least
one value. The samples are not required to have the same length.
ties : str, optional
Determines how values equal to the grand median are classified in
the contingency table. The string must be one of::
"below":
Values equal to the grand median are counted as "below".
"above":
Values equal to the grand median are counted as "above".
"ignore":
Values equal to the grand median are not counted.
The default is "below".
correction : bool, optional
If True, *and* there are just two samples, apply Yates' correction
for continuity when computing the test statistic associated with
the contingency table. Default is True.
lambda_ : float or str, optional.
By default, the statistic computed in this test is Pearson's
chi-squared statistic. `lambda_` allows a statistic from the
Cressie-Read power divergence family to be used instead. See
`power_divergence` for details.
Default is 1 (Pearson's chi-squared statistic).
Returns
-------
stat : float
The test statistic. The statistic that is returned is determined by
`lambda_`. The default is Pearson's chi-squared statistic.
p : float
The p-value of the test.
m : float
The grand median.
table : ndarray
The contingency table. The shape of the table is (2, n), where
n is the number of samples. The first row holds the counts of the
values above the grand median, and the second row holds the counts
of the values below the grand median. The table allows further
analysis with, for example, `scipy.stats.chi2_contingency`, or with
`scipy.stats.fisher_exact` if there are two samples, without having
to recompute the table.
See Also
--------
kruskal : Compute the Kruskal-Wallis H-test for independent samples.
mannwhitneyu : Computes the Mann-Whitney rank test on samples x and y.
Notes
-----
.. versionadded:: 0.15.0
References
----------
.. [1] Mood, A. M., Introduction to the Theory of Statistics. McGraw-Hill
(1950), pp. 394-399.
.. [2] Zar, J. H., Biostatistical Analysis, 5th ed. Prentice Hall (2010).
See Sections 8.12 and 10.15.
Examples
--------
A biologist runs an experiment in which there are three groups of plants.
Group 1 has 16 plants, group 2 has 15 plants, and group 3 has 17 plants.
Each plant produces a number of seeds. The seed counts for each group
are::
Group 1: 10 14 14 18 20 22 24 25 31 31 32 39 43 43 48 49
Group 2: 28 30 31 33 34 35 36 40 44 55 57 61 91 92 99
Group 3: 0 3 9 22 23 25 25 33 34 34 40 45 46 48 62 67 84
The following code applies Mood's median test to these samples.
>>> g1 = [10, 14, 14, 18, 20, 22, 24, 25, 31, 31, 32, 39, 43, 43, 48, 49]
>>> g2 = [28, 30, 31, 33, 34, 35, 36, 40, 44, 55, 57, 61, 91, 92, 99]
>>> g3 = [0, 3, 9, 22, 23, 25, 25, 33, 34, 34, 40, 45, 46, 48, 62, 67, 84]
>>> stat, p, med, tbl = median_test(g1, g2, g3)
The median is
>>> med
34.0
and the contingency table is
>>> tbl
array([[ 5, 10, 7],
[11, 5, 10]])
`p` is too large to conclude that the medians are not the same:
>>> p
0.12609082774093244
The "G-test" can be performed by passing ``lambda_="log-likelihood"`` to
`median_test`.
>>> g, p, med, tbl = median_test(g1, g2, g3, lambda_="log-likelihood")
>>> p
0.12224779737117837
The median occurs several times in the data, so we'll get a different
result if, for example, ``ties="above"`` is used:
>>> stat, p, med, tbl = median_test(g1, g2, g3, ties="above")
>>> p
0.063873276069553273
>>> tbl
array([[ 5, 11, 9],
[11, 4, 8]])
This example demonstrates that if the data set is not large and there
are values equal to the median, the p-value can be sensitive to the
choice of `ties`.
"""
ties = kwds.pop('ties', 'below')
correction = kwds.pop('correction', True)
lambda_ = kwds.pop('lambda_', None)
if len(kwds) > 0:
bad_kwd = kwds.keys()[0]
raise TypeError("median_test() got an unexpected keyword "
"argument %r" % bad_kwd)
if len(args) < 2:
raise ValueError('median_test requires two or more samples.')
ties_options = ['below', 'above', 'ignore']
if ties not in ties_options:
raise ValueError("invalid 'ties' option '%s'; 'ties' must be one "
"of: %s" % (ties, str(ties_options)[1:-1]))
data = [np.asarray(arg) for arg in args]
# Validate the sizes and shapes of the arguments.
for k, d in enumerate(data):
if d.size == 0:
raise ValueError("Sample %d is empty. All samples must "
"contain at least one value." % (k + 1))
if d.ndim != 1:
raise ValueError("Sample %d has %d dimensions. All "
"samples must be one-dimensional sequences." %
(k + 1, d.ndim))
grand_median = np.median(np.concatenate(data))
# Create the contingency table.
table = np.zeros((2, len(data)), dtype=np.int64)
for k, sample in enumerate(data):
nabove = count_nonzero(sample > grand_median)
nbelow = count_nonzero(sample < grand_median)
nequal = sample.size - (nabove + nbelow)
table[0, k] += nabove
table[1, k] += nbelow
if ties == "below":
table[1, k] += nequal
elif ties == "above":
table[0, k] += nequal
# Check that no row or column of the table is all zero.
# Such a table can not be given to chi2_contingency, because it would have
# a zero in the table of expected frequencies.
rowsums = table.sum(axis=1)
if rowsums[0] == 0:
raise ValueError("All values are below the grand median (%r)." %
grand_median)
if rowsums[1] == 0:
raise ValueError("All values are above the grand median (%r)." %
grand_median)
if ties == "ignore":
# We already checked that each sample has at least one value, but it
# is possible that all those values equal the grand median. If `ties`
# is "ignore", that would result in a column of zeros in `table`. We
# check for that case here.
zero_cols = np.where((table == 0).all(axis=0))[0]
if len(zero_cols) > 0:
msg = ("All values in sample %d are equal to the grand "
"median (%r), so they are ignored, resulting in an "
"empty sample." % (zero_cols[0] + 1, grand_median))
raise ValueError(msg)
stat, p, dof, expected = chi2_contingency(table, lambda_=lambda_,
correction=correction)
return stat, p, grand_median, table
def _hermnorm(N): def _hermnorm(N):
# return the negatively normalized hermite polynomials up to order N-1 # return the negatively normalized hermite polynomials up to order N-1
# (inclusive) # (inclusive)
@ -1929,7 +2347,7 @@ def circstd(samples, high=2 * pi, low=0, axis=None):
# Tests to include (from R) -- some of these already in stats. # Tests to include (from R) -- some of these already in stats.
# ########
# X Ansari-Bradley # X Ansari-Bradley
# X Bartlett (and Levene) # X Bartlett (and Levene)
# X Binomial # X Binomial

@ -24,13 +24,9 @@ is a relatively new package, some API changes are still possible.
f_value_wilks_lambda f_value_wilks_lambda
find_repeats find_repeats
friedmanchisquare friedmanchisquare
gmean
hmean
kendalltau kendalltau
kendalltau_seasonal kendalltau_seasonal
kruskalwallis kruskalwallis
kruskalwallis
ks_twosamp
ks_twosamp ks_twosamp
kurtosis kurtosis
kurtosistest kurtosistest
@ -80,3 +76,4 @@ from __future__ import division, print_function, absolute_import
from .mstats_basic import * from .mstats_basic import *
from .mstats_extras import * from .mstats_extras import *
from scipy.stats import gmean, hmean

File diff suppressed because it is too large Load Diff

@ -1,15 +1,12 @@
""" """
Additional statistics functions, with support to MA. Additional statistics functions with support for masked arrays.
:author: Pierre GF Gerard-Marchant
:contact: pierregm_at_uga_edu
:date: $Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $
:version: $Id: morestats.py 3473 2007-10-29 15:18:13Z jarrod.millman $
""" """
from __future__ import division, print_function, absolute_import
__author__ = "Pierre GF Gerard-Marchant" # Original author (2007): Pierre GF Gerard-Marchant
__docformat__ = "restructuredtext en"
from __future__ import division, print_function, absolute_import
__all__ = ['compare_medians_ms', __all__ = ['compare_medians_ms',
@ -19,6 +16,7 @@ __all__ = ['compare_medians_ms',
'rsh', 'rsh',
'trimmed_mean_ci',] 'trimmed_mean_ci',]
import numpy as np import numpy as np
from numpy import float_, int_, ndarray from numpy import float_, int_, ndarray
@ -30,9 +28,6 @@ from . import mstats_basic as mstats
from scipy.stats.distributions import norm, beta, t, binom from scipy.stats.distributions import norm, beta, t, binom
#####--------------------------------------------------------------------------
#---- --- Quantiles ---
#####--------------------------------------------------------------------------
def hdquantiles(data, prob=list([.25,.5,.75]), axis=None, var=False,): def hdquantiles(data, prob=list([.25,.5,.75]), axis=None, var=False,):
""" """
Computes quantile estimates with the Harrell-Davis method. Computes quantile estimates with the Harrell-Davis method.
@ -65,14 +60,14 @@ def hdquantiles(data, prob=list([.25,.5,.75]), axis=None, var=False,):
xsorted = np.squeeze(np.sort(data.compressed().view(ndarray))) xsorted = np.squeeze(np.sort(data.compressed().view(ndarray)))
# Don't use length here, in case we have a numpy scalar # Don't use length here, in case we have a numpy scalar
n = xsorted.size n = xsorted.size
#.........
hd = np.empty((2,len(prob)), float_) hd = np.empty((2,len(prob)), float_)
if n < 2: if n < 2:
hd.flat = np.nan hd.flat = np.nan
if var: if var:
return hd return hd
return hd[0] return hd[0]
#.........
v = np.arange(n+1) / float(n) v = np.arange(n+1) / float(n)
betacdf = beta.cdf betacdf = beta.cdf
for (i,p) in enumerate(prob): for (i,p) in enumerate(prob):
@ -89,7 +84,7 @@ def hdquantiles(data, prob=list([.25,.5,.75]), axis=None, var=False,):
hd[1, prob == 0] = hd[1, prob == 1] = np.nan hd[1, prob == 0] = hd[1, prob == 1] = np.nan
return hd return hd
return hd[0] return hd[0]
# Initialization & checks --------- # Initialization & checks
data = ma.array(data, copy=False, dtype=float_) data = ma.array(data, copy=False, dtype=float_)
p = np.array(prob, copy=False, ndmin=1) p = np.array(prob, copy=False, ndmin=1)
# Computes quantiles along axis (or globally) # Computes quantiles along axis (or globally)
@ -97,12 +92,11 @@ def hdquantiles(data, prob=list([.25,.5,.75]), axis=None, var=False,):
result = _hd_1D(data, p, var) result = _hd_1D(data, p, var)
else: else:
if data.ndim > 2: if data.ndim > 2:
raise ValueError("Array 'data' must be at most two dimensional, but got data.ndim = %d" % data.ndim) raise ValueError("Array 'data' must be at most two dimensional, "
"but got data.ndim = %d" % data.ndim)
result = ma.apply_along_axis(_hd_1D, axis, data, p, var) result = ma.apply_along_axis(_hd_1D, axis, data, p, var)
#
return ma.fix_invalid(result, copy=False)
#.............................................................................. return ma.fix_invalid(result, copy=False)
def hdmedian(data, axis=-1, var=False): def hdmedian(data, axis=-1, var=False):
@ -124,7 +118,6 @@ def hdmedian(data, axis=-1, var=False):
return result.squeeze() return result.squeeze()
#..............................................................................
def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None): def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
""" """
The standard error of the Harrell-Davis quantile estimates by jackknife. The standard error of the Harrell-Davis quantile estimates by jackknife.
@ -153,10 +146,10 @@ def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
hdsd = np.empty(len(prob), float_) hdsd = np.empty(len(prob), float_)
if n < 2: if n < 2:
hdsd.flat = np.nan hdsd.flat = np.nan
#.........
vv = np.arange(n) / float(n-1) vv = np.arange(n) / float(n-1)
betacdf = beta.cdf betacdf = beta.cdf
#
for (i,p) in enumerate(prob): for (i,p) in enumerate(prob):
_w = betacdf(vv, (n+1)*p, (n+1)*(1-p)) _w = betacdf(vv, (n+1)*p, (n+1)*(1-p))
w = _w[1:] - _w[:-1] w = _w[1:] - _w[:-1]
@ -166,7 +159,7 @@ def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1) mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1)
hdsd[i] = float(n-1) * np.sqrt(np.diag(mx_var).diagonal() / float(n)) hdsd[i] = float(n-1) * np.sqrt(np.diag(mx_var).diagonal() / float(n))
return hdsd return hdsd
# Initialization & checks --------- # Initialization & checks
data = ma.array(data, copy=False, dtype=float_) data = ma.array(data, copy=False, dtype=float_)
p = np.array(prob, copy=False, ndmin=1) p = np.array(prob, copy=False, ndmin=1)
# Computes quantiles along axis (or globally) # Computes quantiles along axis (or globally)
@ -174,15 +167,12 @@ def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
result = _hdsd_1D(data, p) result = _hdsd_1D(data, p)
else: else:
if data.ndim > 2: if data.ndim > 2:
raise ValueError("Array 'data' must be at most two dimensional, but got data.ndim = %d" % data.ndim) raise ValueError("Array 'data' must be at most two dimensional, "
"but got data.ndim = %d" % data.ndim)
result = ma.apply_along_axis(_hdsd_1D, axis, data, p) result = ma.apply_along_axis(_hdsd_1D, axis, data, p)
#
return ma.fix_invalid(result, copy=False).ravel()
return ma.fix_invalid(result, copy=False).ravel()
#####--------------------------------------------------------------------------
#---- --- Confidence intervals ---
#####--------------------------------------------------------------------------
def trimmed_mean_ci(data, limits=(0.2,0.2), inclusive=(True,True), def trimmed_mean_ci(data, limits=(0.2,0.2), inclusive=(True,True),
alpha=0.05, axis=None): alpha=0.05, axis=None):
@ -198,9 +188,9 @@ def trimmed_mean_ci(data, limits=(0.2,0.2), inclusive=(True,True),
Tuple of the percentages to cut on each side of the array, with respect Tuple of the percentages to cut on each side of the array, with respect
to the number of unmasked data, as floats between 0. and 1. If ``n`` to the number of unmasked data, as floats between 0. and 1. If ``n``
is the number of unmasked data before trimming, then is the number of unmasked data before trimming, then
(``n`` * `limits[0]`)th smallest data and (``n`` * `limits[1]`)th (``n * limits[0]``)th smallest data and (``n * limits[1]``)th
largest data are masked. The total number of unmasked data after largest data are masked. The total number of unmasked data after
trimming is ``n`` * (1. - sum(`limits`)). trimming is ``n * (1. - sum(limits))``.
The value of one limit can be set to None to indicate an open interval. The value of one limit can be set to None to indicate an open interval.
Defaults to (0.2, 0.2). Defaults to (0.2, 0.2).
@ -234,8 +224,6 @@ def trimmed_mean_ci(data, limits=(0.2,0.2), inclusive=(True,True),
tppf = t.ppf(1-alpha/2.,df) tppf = t.ppf(1-alpha/2.,df)
return np.array((tmean - tppf*tstde, tmean+tppf*tstde)) return np.array((tmean - tppf*tstde, tmean+tppf*tstde))
#..............................................................................
def mjci(data, prob=[0.25,0.5,0.75], axis=None): def mjci(data, prob=[0.25,0.5,0.75], axis=None):
""" """
@ -258,7 +246,7 @@ def mjci(data, prob=[0.25,0.5,0.75], axis=None):
n = data.size n = data.size
prob = (np.array(p) * n + 0.5).astype(int_) prob = (np.array(p) * n + 0.5).astype(int_)
betacdf = beta.cdf betacdf = beta.cdf
#
mj = np.empty(len(prob), float_) mj = np.empty(len(prob), float_)
x = np.arange(1,n+1, dtype=float_) / n x = np.arange(1,n+1, dtype=float_) / n
y = x - 1./n y = x - 1./n
@ -269,10 +257,12 @@ def mjci(data, prob=[0.25,0.5,0.75], axis=None):
C2 = np.dot(W,data**2) C2 = np.dot(W,data**2)
mj[i] = np.sqrt(C2 - C1**2) mj[i] = np.sqrt(C2 - C1**2)
return mj return mj
#
data = ma.array(data, copy=False) data = ma.array(data, copy=False)
if data.ndim > 2: if data.ndim > 2:
raise ValueError("Array 'data' must be at most two dimensional, but got data.ndim = %d" % data.ndim) raise ValueError("Array 'data' must be at most two dimensional, "
"but got data.ndim = %d" % data.ndim)
p = np.array(prob, copy=False, ndmin=1) p = np.array(prob, copy=False, ndmin=1)
# Computes quantiles along axis (or globally) # Computes quantiles along axis (or globally)
if (axis is None): if (axis is None):
@ -280,8 +270,6 @@ def mjci(data, prob=[0.25,0.5,0.75], axis=None):
else: else:
return ma.apply_along_axis(_mjci_1D, axis, data, p) return ma.apply_along_axis(_mjci_1D, axis, data, p)
#..............................................................................
def mquantiles_cimj(data, prob=[0.25,0.50,0.75], alpha=0.05, axis=None): def mquantiles_cimj(data, prob=[0.25,0.50,0.75], alpha=0.05, axis=None):
""" """
@ -308,7 +296,6 @@ def mquantiles_cimj(data, prob=[0.25,0.50,0.75], alpha=0.05, axis=None):
return (xq - z * smj, xq + z * smj) return (xq - z * smj, xq + z * smj)
#.............................................................................
def median_cihs(data, alpha=0.05, axis=None): def median_cihs(data, alpha=0.05, axis=None):
""" """
Computes the alpha-level confidence interval for the median of the data. Computes the alpha-level confidence interval for the median of the data.
@ -353,12 +340,11 @@ def median_cihs(data, alpha=0.05, axis=None):
result = _cihs_1D(data.compressed(), alpha) result = _cihs_1D(data.compressed(), alpha)
else: else:
if data.ndim > 2: if data.ndim > 2:
raise ValueError("Array 'data' must be at most two dimensional, but got data.ndim = %d" % data.ndim) raise ValueError("Array 'data' must be at most two dimensional, "
"but got data.ndim = %d" % data.ndim)
result = ma.apply_along_axis(_cihs_1D, axis, data, alpha) result = ma.apply_along_axis(_cihs_1D, axis, data, alpha)
#
return result
#.............................................................................. return result
def compare_medians_ms(group_1, group_2, axis=None): def compare_medians_ms(group_1, group_2, axis=None):
@ -453,14 +439,13 @@ def rsh(data, points=None):
points = data points = data
else: else:
points = np.array(points, copy=False, ndmin=1) points = np.array(points, copy=False, ndmin=1)
if data.ndim != 1: if data.ndim != 1:
raise AttributeError("The input array should be 1D only !") raise AttributeError("The input array should be 1D only !")
n = data.count() n = data.count()
r = idealfourths(data, axis=None) r = idealfourths(data, axis=None)
h = 1.2 * (r[-1]-r[0]) / n**(1./5) h = 1.2 * (r[-1]-r[0]) / n**(1./5)
nhi = (data[:,None] <= points[None,:] + h).sum(0) nhi = (data[:,None] <= points[None,:] + h).sum(0)
nlo = (data[:,None] < points[None,:] - h).sum(0) nlo = (data[:,None] < points[None,:] - h).sum(0)
return (nhi-nlo) / (2.*n*h) return (nhi-nlo) / (2.*n*h)
###############################################################################

@ -112,6 +112,7 @@ Correlation Functions
pointbiserialr pointbiserialr
kendalltau kendalltau
linregress linregress
theilslopes
Inferential Stats Inferential Stats
----------------- -----------------
@ -170,7 +171,7 @@ from __future__ import division, print_function, absolute_import
import warnings import warnings
import math import math
#from .six import xrange from scipy.lib.six import xrange
# friedmanchisquare patch uses python sum # friedmanchisquare patch uses python sum
pysum = sum # save it before it gets overwritten pysum = sum # save it before it gets overwritten
@ -188,25 +189,23 @@ try:
from scipy.stats._rank import rankdata, tiecorrect from scipy.stats._rank import rankdata, tiecorrect
except: except:
rankdata = tiecorrect = None rankdata = tiecorrect = None
__all__ = ['find_repeats', 'gmean', 'hmean', 'mode', __all__ = ['find_repeats', 'gmean', 'hmean', 'mode', 'tmean', 'tvar',
'tmean', 'tvar', 'tmin', 'tmax', 'tstd', 'tsem', 'tmin', 'tmax', 'tstd', 'tsem', 'moment', 'variation',
'moment', 'variation', 'skew', 'kurtosis', 'describe', 'skew', 'kurtosis', 'describe', 'skewtest', 'kurtosistest',
'skewtest', 'kurtosistest', 'normaltest', 'jarque_bera', 'normaltest', 'jarque_bera', 'itemfreq',
'itemfreq', 'scoreatpercentile', 'percentileofscore', 'scoreatpercentile', 'percentileofscore', 'histogram',
'histogram', 'histogram2', 'cumfreq', 'relfreq', 'histogram2', 'cumfreq', 'relfreq', 'obrientransform',
'obrientransform', 'signaltonoise', 'sem', 'zmap', 'zscore', 'signaltonoise', 'sem', 'zmap', 'zscore', 'threshold',
'threshold', 'sigmaclip', 'trimboth', 'trim1', 'trim_mean', 'sigmaclip', 'trimboth', 'trim1', 'trim_mean', 'f_oneway',
'f_oneway', 'pearsonr', 'fisher_exact', 'pearsonr', 'fisher_exact', 'spearmanr', 'pointbiserialr',
'spearmanr', 'pointbiserialr', 'kendalltau', 'linregress', 'kendalltau', 'linregress', 'theilslopes', 'ttest_1samp',
'ttest_1samp', 'ttest_ind', 'ttest_rel', 'kstest', 'ttest_ind', 'ttest_rel', 'kstest', 'chisquare',
'chisquare', 'power_divergence', 'ks_2samp', 'mannwhitneyu', 'power_divergence', 'ks_2samp', 'mannwhitneyu',
'tiecorrect', 'ranksums', 'kruskal', 'friedmanchisquare', 'tiecorrect', 'ranksums', 'kruskal', 'friedmanchisquare',
'zprob', 'chisqprob', 'ksprob', 'fprob', 'betai', 'zprob', 'chisqprob', 'ksprob', 'fprob', 'betai',
'f_value_wilks_lambda', 'f_value', 'f_value_multivariate', 'f_value_wilks_lambda', 'f_value', 'f_value_multivariate',
'ss', 'square_of_sums', 'ss', 'square_of_sums', 'fastsort', 'rankdata', 'nanmean',
'fastsort', 'rankdata', 'nanstd', 'nanmedian', ]
'nanmean', 'nanstd', 'nanmedian',
]
def _chk_asarray(a, axis): def _chk_asarray(a, axis):
@ -250,19 +249,20 @@ def find_repeats(arr):
Examples Examples
-------- --------
>>> sp.stats.find_repeats([2, 1, 2, 3, 2, 2, 5]) >>> import scipy.stats as stats
>>> stats.find_repeats([2, 1, 2, 3, 2, 2, 5])
(array([ 2. ]), array([ 4 ], dtype=int32) (array([ 2. ]), array([ 4 ], dtype=int32)
>>> sp.stats.find_repeats([[10, 20, 1, 2], [5, 5, 4, 4]]) >>> stats.find_repeats([[10, 20, 1, 2], [5, 5, 4, 4]])
(array([ 4., 5.]), array([2, 2], dtype=int32)) (array([ 4., 5.]), array([2, 2], dtype=int32))
""" """
v1,v2, n = futil.dfreps(arr) v1,v2, n = futil.dfreps(arr)
return v1[:n],v2[:n] return v1[:n],v2[:n]
# #######
# NAN friendly functions ### NAN friendly functions
# ########
def nanmean(x, axis=0): def nanmean(x, axis=0):
@ -387,11 +387,20 @@ def _nanmedian(arr1d): # This only works on 1d arrays
m : float m : float
The median. The median.
""" """
cond = 1 - np.isnan(arr1d) x = arr1d.copy()
x = np.sort(np.compress(cond, arr1d, axis=-1)) c = np.isnan(x)
if x.size == 0: s = np.where(c)[0]
if s.size == x.size:
warnings.warn("All-NaN slice encountered", RuntimeWarning)
return np.nan return np.nan
return np.median(x) elif s.size != 0:
# select non-nans at end of array
enonan = x[-s.size:][~c[-s.size:]]
# fill nans in beginning of array with non-nans of end
x[s[:enonan.size]] = enonan
# slice nans away
x = x[:-s.size]
return np.median(x, overwrite_input=True)
def nanmedian(x, axis=0): def nanmedian(x, axis=0):
@ -413,7 +422,7 @@ def nanmedian(x, axis=0):
See Also See Also
-------- --------
nanstd, nanmean nanstd, nanmean, numpy.nanmedian
Examples Examples
-------- --------
@ -444,16 +453,17 @@ def nanmedian(x, axis=0):
x, axis = _chk_asarray(x, axis) x, axis = _chk_asarray(x, axis)
if x.ndim == 0: if x.ndim == 0:
return float(x.item()) return float(x.item())
x = x.copy() if hasattr(np, 'nanmedian'): # numpy 1.9 faster for some cases
return np.nanmedian(x, axis)
x = np.apply_along_axis(_nanmedian, axis, x) x = np.apply_along_axis(_nanmedian, axis, x)
if x.ndim == 0: if x.ndim == 0:
x = float(x.item()) x = float(x.item())
return x return x
# #####################################
# CENTRAL TENDENCY ######## ######## CENTRAL TENDENCY ########
# #####################################
def gmean(a, axis=0, dtype=None): def gmean(a, axis=0, dtype=None):
@ -498,8 +508,7 @@ def gmean(a, axis=0, dtype=None):
arrays automatically mask any non-finite values. arrays automatically mask any non-finite values.
""" """
# if not an ndarray object attempt to convert it if not isinstance(a, np.ndarray): # if not an ndarray object attempt to convert it
if not isinstance(a, np.ndarray):
log_a = np.log(np.array(a, dtype=dtype)) log_a = np.log(np.array(a, dtype=dtype))
elif dtype: # Must change the default dtype allowing array type elif dtype: # Must change the default dtype allowing array type
if isinstance(a,np.ma.MaskedArray): if isinstance(a,np.ma.MaskedArray):
@ -564,8 +573,7 @@ def hmean(a, axis=0, dtype=None):
size = a.shape[axis] size = a.shape[axis]
return size / np.sum(1.0/a, axis=axis, dtype=dtype) return size / np.sum(1.0/a, axis=axis, dtype=dtype)
else: else:
raise ValueError( raise ValueError("Harmonic mean only defined if all elements greater than zero")
"Harmonic mean only defined if all elements greater than zero")
def mode(a, axis=0): def mode(a, axis=0):
@ -610,7 +618,7 @@ def mode(a, axis=0):
scores = np.unique(np.ravel(a)) # get ALL unique values scores = np.unique(np.ravel(a)) # get ALL unique values
testshape = list(a.shape) testshape = list(a.shape)
testshape[axis] = 1 testshape[axis] = 1
oldmostfreq = np.zeros(testshape) oldmostfreq = np.zeros(testshape, dtype=a.dtype)
oldcounts = np.zeros(testshape) oldcounts = np.zeros(testshape)
for score in scores: for score in scores:
template = (a == score) template = (a == score)
@ -887,9 +895,9 @@ def tsem(a, limits=None, inclusive=(True, True)):
return sd / np.sqrt(am.count()) return sd / np.sqrt(am.count())
# #####################################
# MOMENTS ############# ############ MOMENTS #############
# #####################################
def moment(a, moment=1, axis=0): def moment(a, moment=1, axis=0):
""" """
@ -1061,9 +1069,7 @@ def kurtosis(a, axis=0, fisher=True, bias=True):
if can_correct.any(): if can_correct.any():
m2 = np.extract(can_correct, m2) m2 = np.extract(can_correct, m2)
m4 = np.extract(can_correct, m4) m4 = np.extract(can_correct, m4)
nval = 1.0 / \ nval = 1.0/(n-2)/(n-3)*((n*n-1.0)*m4/m2**2.0-3*(n-1)**2.0)
(n - 2) / (n - 3) * \
((n * n - 1.0) * m4 / m2 ** 2.0 - 3 * (n - 1) ** 2.0)
np.place(vals, can_correct, nval+3.0) np.place(vals, can_correct, nval+3.0)
if vals.ndim == 0: if vals.ndim == 0:
@ -1075,17 +1081,19 @@ def kurtosis(a, axis=0, fisher=True, bias=True):
return vals return vals
def describe(a, axis=0): def describe(a, axis=0, ddof=1):
""" """
Computes several descriptive statistics of the passed array. Computes several descriptive statistics of the passed array.
Parameters Parameters
---------- ----------
a : array_like a : array_like
data Input data.
axis : int or None axis : int, optional
axis along which statistics are calculated. If axis is None, then data Axis along which statistics are calculated. If axis is None, then data
array is raveled. The default axis is zero. array is raveled. The default axis is zero.
ddof : int, optional
Delta degrees of freedom. Default is 1.
Returns Returns
------- -------
@ -1107,22 +1115,21 @@ def describe(a, axis=0):
See Also See Also
-------- --------
skew skew, kurtosis
kurtosis
""" """
a, axis = _chk_asarray(a, axis) a, axis = _chk_asarray(a, axis)
n = a.shape[axis] n = a.shape[axis]
mm = (np.min(a, axis=axis), np.max(a, axis=axis)) mm = (np.min(a, axis=axis), np.max(a, axis=axis))
m = np.mean(a, axis=axis) m = np.mean(a, axis=axis)
v = np.var(a, axis=axis, ddof=1) v = np.var(a, axis=axis, ddof=ddof)
sk = skew(a, axis) sk = skew(a, axis)
kurt = kurtosis(a, axis) kurt = kurtosis(a, axis)
return n, mm, m, v, sk, kurt return n, mm, m, v, sk, kurt
# #####################################
# NORMALITY TESTS ########## ######## NORMALITY TESTS ##########
# #####################################
def skewtest(a, axis=0): def skewtest(a, axis=0):
@ -1211,18 +1218,15 @@ def kurtosistest(a, axis=0):
int(n)) int(n))
b2 = kurtosis(a, axis, fisher=False) b2 = kurtosis(a, axis, fisher=False)
E = 3.0*(n-1) / (n+1) E = 3.0*(n-1) / (n+1)
varb2 = 24.0 * n * \ varb2 = 24.0*n*(n-2)*(n-3) / ((n+1)*(n+1.)*(n+3)*(n+5))
(n - 2) * (n - 3) / ((n + 1) * (n + 1) * (n + 3) * (n + 5))
x = (b2-E)/np.sqrt(varb2) x = (b2-E)/np.sqrt(varb2)
sqrtbeta1 = 6.0*(n*n-5*n+2)/((n+7)*(n+9)) * np.sqrt((6.0*(n+3)*(n+5)) / sqrtbeta1 = 6.0*(n*n-5*n+2)/((n+7)*(n+9)) * np.sqrt((6.0*(n+3)*(n+5)) /
(n*(n-2)*(n-3))) (n*(n-2)*(n-3)))
A = 6.0 + 8.0 / sqrtbeta1 * \ A = 6.0 + 8.0/sqrtbeta1 * (2.0/sqrtbeta1 + np.sqrt(1+4.0/(sqrtbeta1**2)))
(2.0 / sqrtbeta1 + np.sqrt(1 + 4.0 / (sqrtbeta1 ** 2)))
term1 = 1 - 2/(9.0*A) term1 = 1 - 2/(9.0*A)
denom = 1 + x*np.sqrt(2/(A-4.0)) denom = 1 + x*np.sqrt(2/(A-4.0))
denom = np.where(denom < 0, 99, denom) denom = np.where(denom < 0, 99, denom)
term2 = np.where( term2 = np.where(denom < 0, term1, np.power((1-2.0/A)/denom,1/3.0))
denom < 0, term1, np.power((1 - 2.0 / A) / denom, 1 / 3.0))
Z = (term1 - term2) / np.sqrt(2/(9.0*A)) Z = (term1 - term2) / np.sqrt(2/(9.0*A))
Z = np.where(denom == 99, 0, Z) Z = np.where(denom == 99, 0, Z)
if Z.ndim == 0: if Z.ndim == 0:
@ -1268,8 +1272,8 @@ def normaltest(a, axis=0):
""" """
a, axis = _chk_asarray(a, axis) a, axis = _chk_asarray(a, axis)
s, _p = skewtest(a, axis) s, _ = skewtest(a, axis)
k, _p = kurtosistest(a, axis) k, _ = kurtosistest(a, axis)
k2 = s*s + k*k k2 = s*s + k*k
return k2, chisqprob(k2,2) return k2, chisqprob(k2,2)
@ -1322,8 +1326,7 @@ def jarque_bera(x):
mu = x.mean() mu = x.mean()
diffx = x - mu diffx = x - mu
skewness = (1 / n * np.sum(diffx ** 3)) / \ skewness = (1 / n * np.sum(diffx**3)) / (1 / n * np.sum(diffx**2))**(3 / 2.)
(1 / n * np.sum(diffx ** 2)) ** (3 / 2.)
kurtosis = (1 / n * np.sum(diffx**4)) / (1 / n * np.sum(diffx**2))**2 kurtosis = (1 / n * np.sum(diffx**4)) / (1 / n * np.sum(diffx**2))**2
jb_value = n / 6 * (skewness**2 + (kurtosis - 3)**2 / 4) jb_value = n / 6 * (skewness**2 + (kurtosis - 3)**2 / 4)
p = 1 - distributions.chi2.cdf(jb_value, 2) p = 1 - distributions.chi2.cdf(jb_value, 2)
@ -1331,9 +1334,9 @@ def jarque_bera(x):
return jb_value, p return jb_value, p
# #####################################
# FREQUENCY FUNCTIONS ####### ###### FREQUENCY FUNCTIONS #######
# #####################################
def itemfreq(a): def itemfreq(a):
""" """
@ -1411,12 +1414,20 @@ def scoreatpercentile(a, per, limit=(), interpolation_method='fraction',
Returns Returns
------- -------
score : float (or sequence of floats) score : float or ndarray
Score at percentile. Score at percentile(s).
See Also See Also
-------- --------
percentileofscore percentileofscore, numpy.percentile
Notes
-----
This function will become obsolete in the future.
For Numpy 1.9 and higher, `numpy.percentile` provides all the functionality
that `scoreatpercentile` provides. And it's significantly faster.
Therefore it's recommended to use `numpy.percentile` for users that have
numpy >= 1.9.
Examples Examples
-------- --------
@ -1426,17 +1437,19 @@ def scoreatpercentile(a, per, limit=(), interpolation_method='fraction',
49.5 49.5
""" """
# adapted from NumPy's percentile function # adapted from NumPy's percentile function. When we require numpy >= 1.8,
# the implementation of this function can be replaced by np.percentile.
a = np.asarray(a) a = np.asarray(a)
if a.size == 0:
# empty array, return nan(s) with shape matching `per`
if np.isscalar(per):
return np.nan
else:
return np.ones(np.asarray(per).shape, dtype=np.float64) * np.nan
if limit: if limit:
a = a[(limit[0] <= a) & (a <= limit[1])] a = a[(limit[0] <= a) & (a <= limit[1])]
if per == 0:
return a.min(axis=axis)
elif per == 100:
return a.max(axis=axis)
sorted = np.sort(a, axis=axis) sorted = np.sort(a, axis=axis)
if axis is None: if axis is None:
axis = 0 axis = 0
@ -1447,8 +1460,9 @@ def scoreatpercentile(a, per, limit=(), interpolation_method='fraction',
# handle sequence of per's without calling sort multiple times # handle sequence of per's without calling sort multiple times
def _compute_qth_percentile(sorted, per, interpolation_method, axis): def _compute_qth_percentile(sorted, per, interpolation_method, axis):
if not np.isscalar(per): if not np.isscalar(per):
return [_compute_qth_percentile(sorted, i, interpolation_method, axis) score = [_compute_qth_percentile(sorted, i, interpolation_method, axis)
for i in per] for i in per]
return np.array(score)
if (per < 0) or (per > 100): if (per < 0) or (per > 100):
raise ValueError("percentile must be in the range [0, 100]") raise ValueError("percentile must be in the range [0, 100]")
@ -1482,7 +1496,7 @@ def _compute_qth_percentile(sorted, per, interpolation_method, axis):
weights.shape = wshape weights.shape = wshape
sumval = weights.sum() sumval = weights.sum()
# Use np.add.reduce to coerce data type # Use np.add.reduce (== np.sum but a little faster) to coerce data type
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
@ -1608,8 +1622,7 @@ def histogram2(a, bins):
return n[1:]-n[:-1] return n[1:]-n[:-1]
def histogram(a, numbins=10, defaultlimits=None, weights=None, def histogram(a, numbins=10, defaultlimits=None, weights=None, printextras=False):
printextras=False):
""" """
Separates the range into several bins and returns the number of instances Separates the range into several bins and returns the number of instances
in each bin. in each bin.
@ -1711,12 +1724,13 @@ def cumfreq(a, numbins=10, defaultreallimits=None, weights=None):
Examples Examples
-------- --------
>>> import scipy.stats as stats
>>> x = [1, 4, 2, 1, 3, 1] >>> x = [1, 4, 2, 1, 3, 1]
>>> cumfreqs, lowlim, binsize, extrapoints = sp.stats.cumfreq(x, numbins=4) >>> cumfreqs, lowlim, binsize, extrapoints = stats.cumfreq(x, numbins=4)
>>> cumfreqs >>> cumfreqs
array([ 3., 4., 5., 6.]) array([ 3., 4., 5., 6.])
>>> cumfreqs, lowlim, binsize, extrapoints = \ >>> cumfreqs, lowlim, binsize, extrapoints = \
... sp.stats.cumfreq(x, numbins=4, defaultreallimits=(1.5, 5)) ... stats.cumfreq(x, numbins=4, defaultreallimits=(1.5, 5))
>>> cumfreqs >>> cumfreqs
array([ 1., 2., 3., 3.]) array([ 1., 2., 3., 3.])
>>> extrapoints >>> extrapoints
@ -1760,8 +1774,9 @@ def relfreq(a, numbins=10, defaultreallimits=None, weights=None):
Examples Examples
-------- --------
>>> import scipy.stats as stats
>>> a = np.array([1, 4, 2, 1, 3, 1]) >>> a = np.array([1, 4, 2, 1, 3, 1])
>>> relfreqs, lowlim, binsize, extrapoints = sp.stats.relfreq(a, numbins=4) >>> relfreqs, lowlim, binsize, extrapoints = stats.relfreq(a, numbins=4)
>>> relfreqs >>> relfreqs
array([ 0.5 , 0.16666667, 0.16666667, 0.16666667]) array([ 0.5 , 0.16666667, 0.16666667, 0.16666667])
>>> np.sum(relfreqs) # relative frequencies should add up to 1 >>> np.sum(relfreqs) # relative frequencies should add up to 1
@ -1773,9 +1788,9 @@ def relfreq(a, numbins=10, defaultreallimits=None, weights=None):
return h, l, b, e return h, l, b, e
# #####################################
# VARIABILITY FUNCTIONS ##### ###### VARIABILITY FUNCTIONS #####
# #####################################
def obrientransform(*args): def obrientransform(*args):
""" """
@ -1936,7 +1951,7 @@ def sem(a, axis=0, ddof=1):
""" """
a, axis = _chk_asarray(a, axis) a, axis = _chk_asarray(a, axis)
n = a.shape[axis] n = a.shape[axis]
s = np.std(a, axis=axis, ddof=ddof) / np.sqrt(n) # JP check normalization s = np.std(a, axis=axis, ddof=ddof) / np.sqrt(n)
return s return s
@ -2053,9 +2068,9 @@ def zmap(scores, compare, axis=0, ddof=0):
return (scores - mns) / sstd return (scores - mns) / sstd
# #####################################
# TRIMMING FUNCTIONS ####### ####### TRIMMING FUNCTIONS #######
# #####################################
def threshold(a, threshmin=None, threshmax=None, newval=0): def threshold(a, threshmin=None, threshmax=None, newval=0):
""" """
@ -2370,7 +2385,7 @@ def f_oneway(*args):
.. [2] Heiman, G.W. Research Methods in Statistics. 2002. .. [2] Heiman, G.W. Research Methods in Statistics. 2002.
""" """
args = list(map(np.asarray, args)) # convert to an numpy array args = [np.asarray(arg, dtype=float) for arg in args]
na = len(args) # ANOVA on 'na' groups, each in it's own array na = len(args) # ANOVA on 'na' groups, each in it's own array
alldata = np.concatenate(args) alldata = np.concatenate(args)
bign = len(alldata) bign = len(alldata)
@ -2378,6 +2393,7 @@ def f_oneway(*args):
ssbn = 0 ssbn = 0
for a in args: for a in args:
ssbn += square_of_sums(a) / float(len(a)) ssbn += square_of_sums(a) / float(len(a))
ssbn -= (square_of_sums(alldata) / float(bign)) ssbn -= (square_of_sums(alldata) / float(bign))
sswn = sstot - ssbn sswn = sstot - ssbn
dfbn = na - 1 dfbn = na - 1
@ -2385,7 +2401,7 @@ def f_oneway(*args):
msb = ssbn / float(dfbn) msb = ssbn / float(dfbn)
msw = sswn / float(dfwn) msw = sswn / float(dfwn)
f = msb / msw f = msb / msw
prob = fprob(dfbn, dfwn, f) prob = special.fdtrc(dfbn, dfwn, f) # equivalent to stats.f.sf
return f, prob return f, prob
@ -2506,8 +2522,7 @@ def fisher_exact(table, alternative='two-sided'):
""" """
hypergeom = distributions.hypergeom hypergeom = distributions.hypergeom
# int32 is not enough for the algorithm c = np.asarray(table, dtype=np.int64) # int32 is not enough for the algorithm
c = np.asarray(table, dtype=np.int64)
if not c.shape == (2, 2): if not c.shape == (2, 2):
raise ValueError("The input `table` must be of shape (2, 2).") raise ValueError("The input `table` must be of shape (2, 2).")
@ -2580,7 +2595,7 @@ def fisher_exact(table, alternative='two-sided'):
pmode = hypergeom.pmf(mode, n1 + n2, n1, n) pmode = hypergeom.pmf(mode, n1 + n2, n1, n)
epsilon = 1 - 1e-4 epsilon = 1 - 1e-4
if float(np.abs(pexact - pmode)) / np.abs(np.max(pexact, pmode)) <= 1 - epsilon: if np.abs(pexact - pmode) / np.maximum(pexact, pmode) <= 1 - epsilon:
return oddsratio, 1. return oddsratio, 1.
elif c[0,0] < mode: elif c[0,0] < mode:
@ -2704,7 +2719,7 @@ def spearmanr(a, b=None, axis=0):
ar = np.apply_along_axis(rankdata,axisout,a) ar = np.apply_along_axis(rankdata,axisout,a)
br = None br = None
if not b is None: if b is not None:
b, axisout = _chk_asarray(b, axis) b, axisout = _chk_asarray(b, axis)
br = np.apply_along_axis(rankdata,axisout,b) br = np.apply_along_axis(rankdata,axisout,b)
n = a.shape[axisout] n = a.shape[axisout]
@ -2838,9 +2853,10 @@ def kendalltau(x, y, initial_lexsort=True):
Examples Examples
-------- --------
>>> import scipy.stats as stats
>>> x1 = [12, 2, 1, 12, 2] >>> x1 = [12, 2, 1, 12, 2]
>>> x2 = [1, 4, 7, 1, 0] >>> x2 = [1, 4, 7, 1, 0]
>>> tau, p_value = sp.stats.kendalltau(x1, x2) >>> tau, p_value = stats.kendalltau(x1, x2)
>>> tau >>> tau
-0.47140452079103173 -0.47140452079103173
>>> p_value >>> p_value
@ -2850,6 +2866,10 @@ def kendalltau(x, y, initial_lexsort=True):
x = np.asarray(x).ravel() x = np.asarray(x).ravel()
y = np.asarray(y).ravel() y = np.asarray(y).ravel()
if not x.size or not y.size:
return (np.nan, np.nan) # Return NaN if arrays are empty
n = np.int64(len(x)) n = np.int64(len(x))
temp = list(range(n)) # support structure used by mergesort temp = list(range(n)) # support structure used by mergesort
# this closure recursively sorts sections of perm[] by comparing # this closure recursively sorts sections of perm[] by comparing
@ -3031,9 +3051,126 @@ def linregress(x, y=None):
return slope, intercept, r, prob, sterrest return slope, intercept, r, prob, sterrest
# def theilslopes(y, x=None, alpha=0.95):
# INFERENTIAL STATISTICS ##### r"""
# Computes the Theil-Sen estimator for a set of points (x, y).
`theilslopes` implements a method for robust linear regression. It
computes the slope as the median of all slopes between paired values.
Parameters
----------
y : array_like
Dependent variable.
x : {None, array_like}, optional
Independent variable. If None, use ``arange(len(y))`` instead.
alpha : float
Confidence degree between 0 and 1. Default is 95% confidence.
Note that `alpha` is symmetric around 0.5, i.e. both 0.1 and 0.9 are
interpreted as "find the 90% confidence interval".
Returns
-------
medslope : float
Theil slope.
medintercept : float
Intercept of the Theil line, as ``median(y) - medslope*median(x)``.
lo_slope : float
Lower bound of the confidence interval on `medslope`.
up_slope : float
Upper bound of the confidence interval on `medslope`.
Notes
-----
The implementation of `theilslopes` follows [1]_. The intercept is
not defined in [1]_, and here it is defined as ``median(y) -
medslope*median(x)``, which is given in [3]_. Other definitions of
the intercept exist in the literature. A confidence interval for
the intercept is not given as this question is not addressed in
[1]_.
References
----------
.. [1] P.K. Sen, "Estimates of the regression coefficient based on Kendall's tau",
J. Am. Stat. Assoc., Vol. 63, pp. 1379-1389, 1968.
.. [2] H. Theil, "A rank-invariant method of linear and polynomial
regression analysis I, II and III", Nederl. Akad. Wetensch., Proc.
53:, pp. 386-392, pp. 521-525, pp. 1397-1412, 1950.
.. [3] W.L. Conover, "Practical nonparametric statistics", 2nd ed.,
John Wiley and Sons, New York, pp. 493.
Examples
--------
>>> from scipy import stats
>>> import matplotlib.pyplot as plt
>>> x = np.linspace(-5, 5, num=150)
>>> y = x + np.random.normal(size=x.size)
>>> y[11:15] += 10 # add outliers
>>> y[-5:] -= 7
Compute the slope, intercept and 90% confidence interval. For comparison,
also compute the least-squares fit with `linregress`:
>>> res = stats.theilslopes(y, x, 0.90)
>>> lsq_res = stats.linregress(x, y)
Plot the results. The Theil-Sen regression line is shown in red, with the
dashed red lines illustrating the confidence interval of the slope (note
that the dashed red lines are not the confidence interval of the regression
as the confidence interval of the intercept is not included). The green
line shows the least-squares fit for comparison.
>>> fig = plt.figure()
>>> ax = fig.add_subplot(111)
>>> ax.plot(x, y, 'b.')
>>> ax.plot(x, res[1] + res[0] * x, 'r-')
>>> ax.plot(x, res[1] + res[2] * x, 'r--')
>>> ax.plot(x, res[1] + res[3] * x, 'r--')
>>> ax.plot(x, lsq_res[1] + lsq_res[0] * x, 'g-')
>>> plt.show()
"""
y = np.asarray(y).flatten()
if x is None:
x = np.arange(len(y), dtype=float)
else:
x = np.asarray(x, dtype=float).flatten()
if len(x) != len(y):
raise ValueError("Incompatible lengths ! (%s<>%s)" % (len(y),len(x)))
# Compute sorted slopes only when deltax > 0
deltax = x[:, np.newaxis] - x
deltay = y[:, np.newaxis] - y
slopes = deltay[deltax > 0] / deltax[deltax > 0]
slopes.sort()
medslope = np.median(slopes)
medinter = np.median(y) - medslope * np.median(x)
# Now compute confidence intervals
if alpha > 0.5:
alpha = 1. - alpha
z = distributions.norm.ppf(alpha / 2.)
# This implements (2.6) from Sen (1968)
_, nxreps = find_repeats(x)
_, nyreps = find_repeats(y)
nt = len(slopes) # N in Sen (1968)
ny = len(y) # n in Sen (1968)
# Equation 2.6 in Sen (1968):
sigsq = 1/18. * (ny * (ny-1) * (2*ny+5) -
np.sum(k * (k-1) * (2*k + 5) for k in nxreps) -
np.sum(k * (k-1) * (2*k + 5) for k in nyreps))
# Find the confidence interval indices in `slopes`
sigma = np.sqrt(sigsq)
Ru = min(int(np.round((nt - z*sigma)/2.)), len(slopes)-1)
Rl = max(int(np.round((nt + z*sigma)/2.)) - 1, 0)
delta = slopes[[Rl, Ru]]
return medslope, medinter, delta[0], delta[1]
#####################################
##### INFERENTIAL STATISTICS #####
#####################################
def ttest_1samp(a, popmean, axis=0): def ttest_1samp(a, popmean, axis=0):
""" """
@ -3105,8 +3242,7 @@ def ttest_1samp(a, popmean, axis=0):
def _ttest_finish(df,t): def _ttest_finish(df,t):
"""Common code between all 3 t-test functions.""" """Common code between all 3 t-test functions."""
# use np.abs to get upper tail prob = distributions.t.sf(np.abs(t), df) * 2 # use np.abs to get upper tail
prob = distributions.t.sf(np.abs(t), df) * 2
if t.ndim == 0: if t.ndim == 0:
t = t[()] t = t[()]
@ -3217,8 +3353,7 @@ def ttest_ind(a, b, axis=0, equal_var=True):
else: else:
vn1 = v1 / n1 vn1 = v1 / n1
vn2 = v2 / n2 vn2 = v2 / n2
df = ((vn1 + vn2) ** 2) / \ df = ((vn1 + vn2)**2) / ((vn1**2) / (n1 - 1) + (vn2**2) / (n2 - 1))
((vn1 ** 2) / (n1 - 1) + (vn2 ** 2) / (n2 - 1))
# If df is undefined, variances are zero (assumes n1 > 0 & n2 > 0). # If df is undefined, variances are zero (assumes n1 > 0 & n2 > 0).
# Hence it doesn't matter what df is as long as it's not NaN. # Hence it doesn't matter what df is as long as it's not NaN.
@ -3592,7 +3727,7 @@ def power_divergence(f_obs, f_exp=None, ddof=0, axis=0, lambda_=None):
are uniform and given by the mean of the observed frequencies. Here we are uniform and given by the mean of the observed frequencies. Here we
perform a G-test (i.e. use the log-likelihood ratio statistic): perform a G-test (i.e. use the log-likelihood ratio statistic):
>>> power_divergence([16, 18, 16, 14, 12, 12], method='log-likelihood') >>> power_divergence([16, 18, 16, 14, 12, 12], lambda_='log-likelihood')
(2.006573162632538, 0.84823476779463769) (2.006573162632538, 0.84823476779463769)
The expected frequencies can be given with the `f_exp` argument: The expected frequencies can be given with the `f_exp` argument:
@ -3873,8 +4008,8 @@ def ks_2samp(data1, data2):
""" """
data1, data2 = map(asarray, (data1, data2)) data1, data2 = map(asarray, (data1, data2))
#n1 = data1.shape[0] n1 = data1.shape[0]
#n2 = data2.shape[0] n2 = data2.shape[0]
n1 = len(data1) n1 = len(data1)
n2 = len(data2) n2 = len(data2)
data1 = np.sort(data1) data1 = np.sort(data1)
@ -3886,7 +4021,7 @@ def ks_2samp(data1, data2):
# Note: d absolute not signed distance # Note: d absolute not signed distance
en = np.sqrt(n1*n2/float(n1+n2)) en = np.sqrt(n1*n2/float(n1+n2))
try: try:
prob = ksprob((en + 0.12 + 0.11 / en) * d) prob = distributions.kstwobign.sf((en + 0.12 + 0.11 / en) * d)
except: except:
prob = 1.0 prob = 1.0
return d, prob return d, prob
@ -3929,8 +4064,7 @@ def mannwhitneyu(x, y, use_continuity=True):
n2 = len(y) n2 = len(y)
ranked = rankdata(np.concatenate((x,y))) ranked = rankdata(np.concatenate((x,y)))
rankx = ranked[0:n1] # get the x-ranks rankx = ranked[0:n1] # get the x-ranks
# calc U for x u1 = n1*n2 + (n1*(n1+1))/2.0 - np.sum(rankx,axis=0) # calc U for x
u1 = n1 * n2 + (n1 * (n1 + 1)) / 2.0 - np.sum(rankx, axis=0)
u2 = n1*n2 - u1 # remainder is U for y u2 = n1*n2 - u1 # remainder is U for y
bigu = max(u1,u2) bigu = max(u1,u2)
smallu = min(u1,u2) smallu = min(u1,u2)
@ -3943,8 +4077,7 @@ def mannwhitneyu(x, y, use_continuity=True):
# normal approximation for prob calc with continuity correction # normal approximation for prob calc with continuity correction
z = abs((bigu-0.5-n1*n2/2.0) / sd) z = abs((bigu-0.5-n1*n2/2.0) / sd)
else: else:
# normal approximation for prob calc z = abs((bigu-n1*n2/2.0) / sd) # normal approximation for prob calc
z = abs((bigu - n1 * n2 / 2.0) / sd)
return smallu, distributions.norm.sf(z) # (1.0 - zprob(z)) return smallu, distributions.norm.sf(z) # (1.0 - zprob(z))
@ -4094,8 +4227,7 @@ def friedmanchisquare(*args):
""" """
k = len(args) k = len(args)
if k < 3: if k < 3:
raise ValueError( raise ValueError('\nLess than 3 levels. Friedman test not appropriate.\n')
'\nLess than 3 levels. Friedman test not appropriate.\n')
n = len(args[0]) n = len(args[0])
for i in range(1, k): for i in range(1, k):
@ -4111,7 +4243,7 @@ def friedmanchisquare(*args):
# Handle ties # Handle ties
ties = 0 ties = 0
for i in range(len(data)): for i in range(len(data)):
_replist, repnum = find_repeats(array(data[i])) replist, repnum = find_repeats(array(data[i]))
for t in repnum: for t in repnum:
ties += t*(t*t-1) ties += t*(t*t-1)
c = 1 - ties / float(k*(k*k-1)*n) c = 1 - ties / float(k*(k*k-1)*n)
@ -4121,11 +4253,13 @@ def friedmanchisquare(*args):
return chisq, chisqprob(chisq,k-1) return chisq, chisqprob(chisq,k-1)
# #####################################
# PROBABILITY CALCULATIONS #### #### PROBABILITY CALCULATIONS ####
# #####################################
zprob = special.ndtr zprob = np.deprecate(message='zprob is deprecated in scipy 0.14, '
'use norm.cdf or special.ndtr instead\n',
old_name='zprob')(special.ndtr)
def chisqprob(chisq, df): def chisqprob(chisq, df):
@ -4149,8 +4283,13 @@ def chisqprob(chisq, df):
""" """
return special.chdtrc(df,chisq) return special.chdtrc(df,chisq)
ksprob = special.kolmogorov ksprob = np.deprecate(message='ksprob is deprecated in scipy 0.14, '
fprob = special.fdtrc 'use stats.kstwobign.sf or special.kolmogorov instead\n',
old_name='ksprob')(special.kolmogorov)
fprob = np.deprecate(message='fprob is deprecated in scipy 0.14, '
'use stats.f.sf or special.fdtrc instead\n',
old_name='fprob')(special.fdtrc)
def betai(a, b, x): def betai(a, b, x):
@ -4184,9 +4323,9 @@ def betai(a, b, x):
return special.betainc(a, b, x) return special.betainc(a, b, x)
# #####################################
# ANOVA CALCULATIONS ####### ####### ANOVA CALCULATIONS #######
# #####################################
def f_value_wilks_lambda(ER, EF, dfnum, dfden, a, b): def f_value_wilks_lambda(ER, EF, dfnum, dfden, a, b):
"""Calculation of Wilks lambda F-statistic for multivarite data, per """Calculation of Wilks lambda F-statistic for multivarite data, per
@ -4200,8 +4339,7 @@ def f_value_wilks_lambda(ER, EF, dfnum, dfden, a, b):
if (a-1)**2 + (b-1)**2 == 5: if (a-1)**2 + (b-1)**2 == 5:
q = 1 q = 1
else: else:
q = np.sqrt( q = np.sqrt(((a-1)**2*(b-1)**2 - 2) / ((a-1)**2 + (b-1)**2 - 5))
((a - 1) ** 2 * (b - 1) ** 2 - 2) / ((a - 1) ** 2 + (b - 1) ** 2 - 5))
n_um = (1 - lmbda**(1.0/q))*(a-1)*(b-1) n_um = (1 - lmbda**(1.0/q))*(a-1)*(b-1)
d_en = lmbda**(1.0/q) / (n_um*q - 0.5*(a-1)*(b-1) + 1) d_en = lmbda**(1.0/q) / (n_um*q - 0.5*(a-1)*(b-1) + 1)
return n_um / d_en return n_um / d_en
@ -4267,9 +4405,9 @@ def f_value_multivariate(ER, EF, dfnum, dfden):
return n_um / d_en return n_um / d_en
# #####################################
# SUPPORT FUNCTIONS ######## ####### SUPPORT FUNCTIONS ########
# #####################################
def ss(a, axis=0): def ss(a, axis=0):
""" """

@ -6,12 +6,11 @@ import warnings
import numpy as np import numpy as np
import numpy.testing as npt import numpy.testing as npt
#from scipy.lib._version import NumpyVersion from scipy.lib._version import NumpyVersion
from scipy import stats from wafo import stats
#NUMPY_BELOW_1_7 = NumpyVersion(np.__version__) < '1.7.0' NUMPY_BELOW_1_7 = NumpyVersion(np.__version__) < '1.7.0'
NUMPY_BELOW_1_7 = np.__version__ < '1.7.0'
def check_normalization(distfn, args, distname): def check_normalization(distfn, args, distname):
@ -77,8 +76,7 @@ def check_skew_expect(distfn, arg, m, v, s, msg):
def check_kurt_expect(distfn, arg, m, v, k, msg): def check_kurt_expect(distfn, arg, m, v, k, msg):
if np.isfinite(k): if np.isfinite(k):
m4e = distfn.expect(lambda x: np.power(x-m, 4), arg) m4e = distfn.expect(lambda x: np.power(x-m, 4), arg)
npt.assert_allclose( npt.assert_allclose(m4e, (k + 3.) * np.power(v, 2), atol=1e-5, rtol=1e-5,
m4e, (k + 3.) * np.power(v, 2), atol=1e-5, rtol=1e-5,
err_msg=msg + ' - kurtosis') err_msg=msg + ' - kurtosis')
else: else:
npt.assert_(np.isnan(k)) npt.assert_(np.isnan(k))
@ -116,7 +114,7 @@ def check_edge_support(distfn, args):
def check_named_args(distfn, x, shape_args, defaults, meths): def check_named_args(distfn, x, shape_args, defaults, meths):
# Check calling w/ named arguments. ## Check calling w/ named arguments.
# check consistency of shapes, numargs and _parse signature # check consistency of shapes, numargs and _parse signature
signature = inspect.getargspec(distfn._parse_args) signature = inspect.getargspec(distfn._parse_args)
@ -124,8 +122,7 @@ def check_named_args(distfn, x, shape_args, defaults, meths):
npt.assert_(signature.keywords is None) npt.assert_(signature.keywords is None)
npt.assert_(signature.defaults == defaults) npt.assert_(signature.defaults == defaults)
# self, a, b, loc=0, scale=1 shape_argnames = signature.args[1:-len(defaults)] # self, a, b, loc=0, scale=1
shape_argnames = signature.args[1:-len(defaults)]
if distfn.shapes: if distfn.shapes:
shapes_ = distfn.shapes.replace(',', ' ').split() shapes_ = distfn.shapes.replace(',', ' ').split()
else: else:
@ -144,7 +141,7 @@ def check_named_args(distfn, x, shape_args, defaults, meths):
k.update({names.pop(): a.pop()}) k.update({names.pop(): a.pop()})
v = [meth(x, *a, **k) for meth in meths] v = [meth(x, *a, **k) for meth in meths]
npt.assert_array_equal(vals, v) npt.assert_array_equal(vals, v)
if not 'n' in k.keys(): if 'n' not in k.keys():
# `n` is first parameter of moment(), so can't be used as named arg # `n` is first parameter of moment(), so can't be used as named arg
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning) warnings.simplefilter("ignore", UserWarning)
@ -154,3 +151,4 @@ def check_named_args(distfn, x, shape_args, defaults, meths):
# unknown arguments should not go through: # unknown arguments should not go through:
k.update({'kaboom': 42}) k.update({'kaboom': 42})
npt.assert_raises(TypeError, distfn.cdf, x, **k) npt.assert_raises(TypeError, distfn.cdf, x, **k)

@ -1,4 +1,5 @@
from __future__ import division, print_function, absolute_import from __future__ import division, print_function, absolute_import
import numpy as np import numpy as np
from numpy.testing import assert_array_almost_equal, run_module_suite from numpy.testing import assert_array_almost_equal, run_module_suite
from scipy.stats import \ from scipy.stats import \
@ -19,7 +20,7 @@ class TestBinnedStatistic(object):
x = self.x x = self.x
v = self.v v = self.v
count1, edges1, _bc = binned_statistic(x, v, 'count', bins=10) count1, edges1, bc = binned_statistic(x, v, 'count', bins=10)
count2, edges2 = np.histogram(x, bins=10) count2, edges2 = np.histogram(x, bins=10)
assert_array_almost_equal(count1, count2) assert_array_almost_equal(count1, count2)
@ -29,7 +30,7 @@ class TestBinnedStatistic(object):
x = self.x x = self.x
v = self.v v = self.v
sum1, edges1, _bc = binned_statistic(x, v, 'sum', bins=10) sum1, edges1, bc = binned_statistic(x, v, 'sum', bins=10)
sum2, edges2 = np.histogram(x, bins=10, weights=v) sum2, edges2 = np.histogram(x, bins=10, weights=v)
assert_array_almost_equal(sum1, sum2) assert_array_almost_equal(sum1, sum2)
@ -39,8 +40,8 @@ class TestBinnedStatistic(object):
x = self.x x = self.x
v = self.v v = self.v
stat1, edges1, _bc = binned_statistic(x, v, 'mean', bins=10) stat1, edges1, bc = binned_statistic(x, v, 'mean', bins=10)
stat2, edges2, _bc = binned_statistic(x, v, np.mean, bins=10) stat2, edges2, bc = binned_statistic(x, v, np.mean, bins=10)
assert_array_almost_equal(stat1, stat2) assert_array_almost_equal(stat1, stat2)
assert_array_almost_equal(edges1, edges2) assert_array_almost_equal(edges1, edges2)
@ -49,8 +50,8 @@ class TestBinnedStatistic(object):
x = self.x x = self.x
v = self.v v = self.v
stat1, edges1, _bc = binned_statistic(x, v, 'std', bins=10) stat1, edges1, bc = binned_statistic(x, v, 'std', bins=10)
stat2, edges2, _bc = binned_statistic(x, v, np.std, bins=10) stat2, edges2, bc = binned_statistic(x, v, np.std, bins=10)
assert_array_almost_equal(stat1, stat2) assert_array_almost_equal(stat1, stat2)
assert_array_almost_equal(edges1, edges2) assert_array_almost_equal(edges1, edges2)
@ -59,8 +60,8 @@ class TestBinnedStatistic(object):
x = self.x x = self.x
v = self.v v = self.v
stat1, edges1, _bc = binned_statistic(x, v, 'median', bins=10) stat1, edges1, bc = binned_statistic(x, v, 'median', bins=10)
stat2, edges2, _bc = binned_statistic(x, v, np.median, bins=10) stat2, edges2, bc = binned_statistic(x, v, np.median, bins=10)
assert_array_almost_equal(stat1, stat2) assert_array_almost_equal(stat1, stat2)
assert_array_almost_equal(edges1, edges2) assert_array_almost_equal(edges1, edges2)
@ -69,7 +70,7 @@ class TestBinnedStatistic(object):
x = self.x[:20] x = self.x[:20]
v = self.v[:20] v = self.v[:20]
count1, _edges1, bc = binned_statistic(x, v, 'count', bins=3) count1, edges1, bc = binned_statistic(x, v, 'count', bins=3)
bc2 = np.array([3, 2, 1, 3, 2, 3, 3, 3, 3, 1, 1, 3, 3, 1, 2, 3, 1, bc2 = np.array([3, 2, 1, 3, 2, 3, 3, 3, 3, 1, 1, 3, 3, 1, 2, 3, 1,
1, 2, 1]) 1, 2, 1])
@ -86,7 +87,7 @@ class TestBinnedStatistic(object):
mean, bins, _ = binned_statistic(x[:15], data[:15]) mean, bins, _ = binned_statistic(x[:15], data[:15])
mean_range, bins_range, _ = binned_statistic(x, data, range=[(0, 14)]) mean_range, bins_range, _ = binned_statistic(x, data, range=[(0, 14)])
mean_range2, bins_range2, _ = binned_statistic(x, data, range=[(0, 14)]) mean_range2, bins_range2, _ = binned_statistic(x, data, range=(0, 14))
assert_array_almost_equal(mean, mean_range) assert_array_almost_equal(mean, mean_range)
assert_array_almost_equal(bins, bins_range) assert_array_almost_equal(bins, bins_range)
@ -98,8 +99,7 @@ class TestBinnedStatistic(object):
y = self.y y = self.y
v = self.v v = self.v
count1, binx1, biny1, _bc = binned_statistic_2d(x, y, v, 'count', count1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'count', bins=5)
bins=5)
count2, binx2, biny2 = np.histogram2d(x, y, bins=5) count2, binx2, biny2 = np.histogram2d(x, y, bins=5)
assert_array_almost_equal(count1, count2) assert_array_almost_equal(count1, count2)
@ -111,7 +111,7 @@ class TestBinnedStatistic(object):
y = self.y y = self.y
v = self.v v = self.v
sum1, binx1, biny1, _bc = binned_statistic_2d(x, y, v, 'sum', bins=5) sum1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'sum', bins=5)
sum2, binx2, biny2 = np.histogram2d(x, y, bins=5, weights=v) sum2, binx2, biny2 = np.histogram2d(x, y, bins=5, weights=v)
assert_array_almost_equal(sum1, sum2) assert_array_almost_equal(sum1, sum2)
@ -123,8 +123,8 @@ class TestBinnedStatistic(object):
y = self.y y = self.y
v = self.v v = self.v
stat1, binx1, biny1, _b = binned_statistic_2d(x, y, v, 'mean', bins=5) stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'mean', bins=5)
stat2, binx2, biny2, _b = binned_statistic_2d(x, y, v, np.mean, bins=5) stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
assert_array_almost_equal(stat1, stat2) assert_array_almost_equal(stat1, stat2)
assert_array_almost_equal(binx1, binx2) assert_array_almost_equal(binx1, binx2)
@ -135,8 +135,8 @@ class TestBinnedStatistic(object):
y = self.y y = self.y
v = self.v v = self.v
stat1, binx1, biny1, _bc = binned_statistic_2d(x, y, v, 'std', bins=5) stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'std', bins=5)
stat2, binx2, biny2, _bc = binned_statistic_2d(x, y, v, np.std, bins=5) stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.std, bins=5)
assert_array_almost_equal(stat1, stat2) assert_array_almost_equal(stat1, stat2)
assert_array_almost_equal(binx1, binx2) assert_array_almost_equal(binx1, binx2)
@ -147,9 +147,8 @@ class TestBinnedStatistic(object):
y = self.y y = self.y
v = self.v v = self.v
stat1, binx1, biny1, _ = binned_statistic_2d(x, y, v, 'median', bins=5) stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'median', bins=5)
stat2, binx2, biny2, _ = binned_statistic_2d(x, y, v, np.median, stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.median, bins=5)
bins=5)
assert_array_almost_equal(stat1, stat2) assert_array_almost_equal(stat1, stat2)
assert_array_almost_equal(binx1, binx2) assert_array_almost_equal(binx1, binx2)
@ -160,8 +159,7 @@ class TestBinnedStatistic(object):
y = self.y[:20] y = self.y[:20]
v = self.v[:20] v = self.v[:20]
count1, _binx1, _biny1, bc = binned_statistic_2d(x, y, v, 'count', count1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'count', bins=3)
bins=3)
bc2 = np.array([17, 11, 6, 16, 11, 17, 18, 17, 17, 7, 6, 18, 16, bc2 = np.array([17, 11, 6, 16, 11, 17, 18, 17, 17, 7, 6, 18, 16,
6, 11, 16, 6, 6, 11, 8]) 6, 11, 16, 6, 6, 11, 8])
@ -175,7 +173,7 @@ class TestBinnedStatistic(object):
X = self.X X = self.X
v = self.v v = self.v
count1, edges1, _bc = binned_statistic_dd(X, v, 'count', bins=3) count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
count2, edges2 = np.histogramdd(X, bins=3) count2, edges2 = np.histogramdd(X, bins=3)
assert_array_almost_equal(count1, count2) assert_array_almost_equal(count1, count2)
@ -185,7 +183,7 @@ class TestBinnedStatistic(object):
X = self.X X = self.X
v = self.v v = self.v
sum1, edges1, _bc = binned_statistic_dd(X, v, 'sum', bins=3) sum1, edges1, bc = binned_statistic_dd(X, v, 'sum', bins=3)
sum2, edges2 = np.histogramdd(X, bins=3, weights=v) sum2, edges2 = np.histogramdd(X, bins=3, weights=v)
assert_array_almost_equal(sum1, sum2) assert_array_almost_equal(sum1, sum2)
@ -195,8 +193,8 @@ class TestBinnedStatistic(object):
X = self.X X = self.X
v = self.v v = self.v
stat1, edges1, _bc = binned_statistic_dd(X, v, 'mean', bins=3) stat1, edges1, bc = binned_statistic_dd(X, v, 'mean', bins=3)
stat2, edges2, _bc = binned_statistic_dd(X, v, np.mean, bins=3) stat2, edges2, bc = binned_statistic_dd(X, v, np.mean, bins=3)
assert_array_almost_equal(stat1, stat2) assert_array_almost_equal(stat1, stat2)
assert_array_almost_equal(edges1, edges2) assert_array_almost_equal(edges1, edges2)
@ -205,8 +203,8 @@ class TestBinnedStatistic(object):
X = self.X X = self.X
v = self.v v = self.v
stat1, edges1, _bc = binned_statistic_dd(X, v, 'std', bins=3) stat1, edges1, bc = binned_statistic_dd(X, v, 'std', bins=3)
stat2, edges2, _bc = binned_statistic_dd(X, v, np.std, bins=3) stat2, edges2, bc = binned_statistic_dd(X, v, np.std, bins=3)
assert_array_almost_equal(stat1, stat2) assert_array_almost_equal(stat1, stat2)
assert_array_almost_equal(edges1, edges2) assert_array_almost_equal(edges1, edges2)
@ -215,8 +213,8 @@ class TestBinnedStatistic(object):
X = self.X X = self.X
v = self.v v = self.v
stat1, edges1, _bc = binned_statistic_dd(X, v, 'median', bins=3) stat1, edges1, bc = binned_statistic_dd(X, v, 'median', bins=3)
stat2, edges2, _bc = binned_statistic_dd(X, v, np.median, bins=3) stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3)
assert_array_almost_equal(stat1, stat2) assert_array_almost_equal(stat1, stat2)
assert_array_almost_equal(edges1, edges2) assert_array_almost_equal(edges1, edges2)
@ -225,7 +223,7 @@ class TestBinnedStatistic(object):
X = self.X[:20] X = self.X[:20]
v = self.v[:20] v = self.v[:20]
count1, _edges1, bc = binned_statistic_dd(X, v, 'count', bins=3) count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92, bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92,
32, 36, 91, 43, 87, 81, 81]) 32, 36, 91, 43, 87, 81, 81])
@ -237,5 +235,4 @@ class TestBinnedStatistic(object):
if __name__ == "__main__": if __name__ == "__main__":
#unittest.main()
run_module_suite() run_module_suite()

@ -13,6 +13,8 @@ from wafo.stats.tests.common_tests import (check_normalization, check_moment,
check_entropy, check_private_entropy, NUMPY_BELOW_1_7, check_entropy, check_private_entropy, NUMPY_BELOW_1_7,
check_edge_support, check_named_args) check_edge_support, check_named_args)
from wafo.stats._distr_params import distcont
""" """
Test all continuous distributions. Test all continuous distributions.
@ -26,98 +28,6 @@ not for numerically exact results.
DECIMAL = 5 # specify the precision of the tests # increased from 0 to 5 DECIMAL = 5 # specify the precision of the tests # increased from 0 to 5
distcont = [
['alpha', (3.5704770516650459,)],
['anglit', ()],
['arcsine', ()],
['beta', (2.3098496451481823, 0.62687954300963677)],
['betaprime', (5, 6)],
['bradford', (0.29891359763170633,)],
['burr', (10.5, 4.3)],
['cauchy', ()],
['chi', (78,)],
['chi2', (55,)],
['cosine', ()],
['dgamma', (1.1023326088288166,)],
['dweibull', (2.0685080649914673,)],
['erlang', (10,)],
['expon', ()],
['exponpow', (2.697119160358469,)],
['exponweib', (2.8923945291034436, 1.9505288745913174)],
['f', (29, 18)],
['fatiguelife', (29,)], # correction numargs = 1
['fisk', (3.0857548622253179,)],
['foldcauchy', (4.7164673455831894,)],
['foldnorm', (1.9521253373555869,)],
['frechet_l', (3.6279911255583239,)],
['frechet_r', (1.8928171603534227,)],
['gamma', (1.9932305483800778,)],
['gausshyper', (13.763771604130699, 3.1189636648681431,
2.5145980350183019, 5.1811649903971615)], # veryslow
['genexpon', (9.1325976465418908, 16.231956600590632, 3.2819552690843983)],
['genextreme', (-0.1,)],
['gengamma', (4.4162385429431925, 3.1193091679242761)],
['genhalflogistic', (0.77274727809929322,)],
['genlogistic', (0.41192440799679475,)],
['genpareto', (0.1,)], # use case with finite moments
['gilbrat', ()],
['gompertz', (0.94743713075105251,)],
['gumbel_l', ()],
['gumbel_r', ()],
['halfcauchy', ()],
['halflogistic', ()],
['halfnorm', ()],
['hypsecant', ()],
['invgamma', (4.0668996136993067,)],
['invgauss', (0.14546264555347513,)],
['invweibull', (10.58,)],
['johnsonsb', (4.3172675099141058, 3.1837781130785063)],
['johnsonsu', (2.554395574161155, 2.2482281679651965)],
['ksone', (1000,)], # replace 22 by 100 to avoid failing range, ticket 956
['kstwobign', ()],
['laplace', ()],
['levy', ()],
['levy_l', ()],
# ['levy_stable', (0.35667405469844993,
# -0.67450531578494011)], #NotImplementedError
# rvs not tested
['loggamma', (0.41411931826052117,)],
['logistic', ()],
['loglaplace', (3.2505926592051435,)],
['lognorm', (0.95368226960575331,)],
['lomax', (1.8771398388773268,)],
['maxwell', ()],
['mielke', (10.4, 3.6)],
['nakagami', (4.9673794866666237,)],
['ncf', (27, 27, 0.41578441799226107)],
['nct', (14, 0.24045031331198066)],
['ncx2', (21, 1.0560465975116415)],
['norm', ()],
['pareto', (2.621716532144454,)],
['pearson3', (0.1,)],
['powerlaw', (1.6591133289905851,)],
['powerlognorm', (2.1413923530064087, 0.44639540782048337)],
['powernorm', (4.4453652254590779,)],
['rayleigh', ()],
['rdist', (0.9,)], # feels also slow
['recipinvgauss', (0.63004267809369119,)],
['reciprocal', (0.0062309367010521255, 1.0062309367010522)],
['rice', (0.7749725210111873,)],
['semicircular', ()],
['t', (2.7433514990818093,)],
['triang', (0.15785029824528218,)],
['truncexpon', (4.6907725456810478,)],
['truncnorm', (-1.0978730080013919, 2.7306754109031979)],
['truncnorm', (0.1, 2.)],
['tukeylambda', (3.1321477856738267,)],
['uniform', ()],
['vonmises', (3.9939042581071398,)],
['vonmises_line', (3.9939042581071398,)],
['wald', ()],
['weibull_max', (2.8687961709100187,)],
['weibull_min', (1.7866166930421596,)],
['wrapcauchy', (0.031071279018614728,)]]
## Last four of these fail all around. Need to be checked ## Last four of these fail all around. Need to be checked
distcont_extra = [ distcont_extra = [
['betaprime', (100, 86)], ['betaprime', (100, 86)],
@ -182,11 +92,12 @@ def _silence_fp_errors(func):
def test_cont_basic(): def test_cont_basic():
# this test skips slow distributions # this test skips slow distributions
with warnings.catch_warnings(): with warnings.catch_warnings():
# warnings.filterwarnings('ignore', warnings.filterwarnings('ignore', category=integrate.IntegrationWarning)
# category=integrate.IntegrationWarning)
for distname, arg in distcont[:]: for distname, arg in distcont[:]:
if distname in distslow: if distname in distslow:
continue continue
if distname is 'levy_stable':
continue
distfn = getattr(stats, distname) distfn = getattr(stats, distname)
np.random.seed(765456) np.random.seed(765456)
sn = 500 sn = 500
@ -231,15 +142,17 @@ def test_cont_basic():
yield knf(distname == 'truncnorm')(check_ppf_private), distfn, \ yield knf(distname == 'truncnorm')(check_ppf_private), distfn, \
arg, distname arg, distname
@npt.dec.slow @npt.dec.slow
def test_cont_basic_slow(): def test_cont_basic_slow():
# same as above for slow distributions # same as above for slow distributions
with warnings.catch_warnings(): with warnings.catch_warnings():
# warnings.filterwarnings('ignore', warnings.filterwarnings('ignore', category=integrate.IntegrationWarning)
# category=integrate.IntegrationWarning)
for distname, arg in distcont[:]: for distname, arg in distcont[:]:
if distname not in distslow: if distname not in distslow:
continue continue
if distname is 'levy_stable':
continue
distfn = getattr(stats, distname) distfn = getattr(stats, distname)
np.random.seed(765456) np.random.seed(765456)
sn = 500 sn = 500
@ -287,12 +200,13 @@ def test_cont_basic_slow():
@npt.dec.slow @npt.dec.slow
def test_moments(): def test_moments():
with warnings.catch_warnings(): with warnings.catch_warnings():
# warnings.filterwarnings('ignore', warnings.filterwarnings('ignore', category=integrate.IntegrationWarning)
# category=integrate.IntegrationWarning)
knf = npt.dec.knownfailureif knf = npt.dec.knownfailureif
fail_normalization = set(['vonmises', 'ksone']) fail_normalization = set(['vonmises', 'ksone'])
fail_higher = set(['vonmises', 'ksone', 'ncf']) fail_higher = set(['vonmises', 'ksone', 'ncf'])
for distname, arg in distcont[:]: for distname, arg in distcont[:]:
if distname is 'levy_stable':
continue
distfn = getattr(stats, distname) distfn = getattr(stats, distname)
m, v, s, k = distfn.stats(*arg, moments='mvsk') m, v, s, k = distfn.stats(*arg, moments='mvsk')
cond1, cond2 = distname in fail_normalization, distname in fail_higher cond1, cond2 = distname in fail_normalization, distname in fail_higher
@ -333,8 +247,7 @@ def check_sample_mean(sm, v, n, popmean):
def check_sample_var(sv,n, popvar): def check_sample_var(sv,n, popvar):
# two-sided chisquare test for sample variance equal to hypothesized # two-sided chisquare test for sample variance equal to hypothesized variance
# variance
df = n-1 df = n-1
chi2 = (n-1)*popvar/float(popvar) chi2 = (n-1)*popvar/float(popvar)
pval = stats.chisqprob(chi2,df)*2 pval = stats.chisqprob(chi2,df)*2
@ -365,16 +278,15 @@ def check_pdf(distfn, arg, msg):
eps = 1e-6 eps = 1e-6
pdfv = distfn.pdf(median, *arg) pdfv = distfn.pdf(median, *arg)
if (pdfv < 1e-4) or (pdfv > 1e4): if (pdfv < 1e-4) or (pdfv > 1e4):
# avoid checking a case where pdf is close to zero or huge # avoid checking a case where pdf is close to zero or huge (singularity)
# (singularity)
median = median + 0.1 median = median + 0.1
pdfv = distfn.pdf(median, *arg) pdfv = distfn.pdf(median, *arg)
cdfdiff = (distfn.cdf(median + eps, *arg) - cdfdiff = (distfn.cdf(median + eps, *arg) -
distfn.cdf(median - eps, *arg))/eps/2.0 distfn.cdf(median - eps, *arg))/eps/2.0
# replace with better diff and better test (more points), # replace with better diff and better test (more points),
# actually, this works pretty well # actually, this works pretty well
npt.assert_almost_equal(pdfv, cdfdiff, decimal=DECIMAL, npt.assert_almost_equal(pdfv, cdfdiff,
err_msg=msg + ' - cdf-pdf relationship') decimal=DECIMAL, err_msg=msg + ' - cdf-pdf relationship')
def check_pdf_logpdf(distfn, args, msg): def check_pdf_logpdf(distfn, args, msg):
@ -385,8 +297,7 @@ def check_pdf_logpdf(distfn, args, msg):
logpdf = distfn.logpdf(vals, *args) logpdf = distfn.logpdf(vals, *args)
pdf = pdf[pdf != 0] pdf = pdf[pdf != 0]
logpdf = logpdf[np.isfinite(logpdf)] logpdf = logpdf[np.isfinite(logpdf)]
npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg + " - logpdf-log(pdf) relationship")
err_msg=msg + " - logpdf-log(pdf) relationship")
def check_sf_logsf(distfn, args, msg): def check_sf_logsf(distfn, args, msg):
@ -397,8 +308,7 @@ def check_sf_logsf(distfn, args, msg):
logsf = distfn.logsf(vals, *args) logsf = distfn.logsf(vals, *args)
sf = sf[sf != 0] sf = sf[sf != 0]
logsf = logsf[np.isfinite(logsf)] logsf = logsf[np.isfinite(logsf)]
npt.assert_almost_equal(np.log(sf), logsf, decimal=7, npt.assert_almost_equal(np.log(sf), logsf, decimal=7, err_msg=msg + " - logsf-log(sf) relationship")
err_msg=msg + " - logsf-log(sf) relationship")
def check_cdf_logcdf(distfn, args, msg): def check_cdf_logcdf(distfn, args, msg):
@ -409,8 +319,7 @@ def check_cdf_logcdf(distfn, args, msg):
logcdf = distfn.logcdf(vals, *args) logcdf = distfn.logcdf(vals, *args)
cdf = cdf[cdf != 0] cdf = cdf[cdf != 0]
logcdf = logcdf[np.isfinite(logcdf)] logcdf = logcdf[np.isfinite(logcdf)]
npt.assert_almost_equal(np.log(cdf), logcdf, decimal=7, npt.assert_almost_equal(np.log(cdf), logcdf, decimal=7, err_msg=msg + " - logcdf-log(cdf) relationship")
err_msg=msg + " - logcdf-log(cdf) relationship")
def check_distribution_rvs(dist, args, alpha, rvs): def check_distribution_rvs(dist, args, alpha, rvs):

@ -2,37 +2,17 @@ from __future__ import division, print_function, absolute_import
import numpy.testing as npt import numpy.testing as npt
import numpy as np import numpy as np
try: from scipy.lib.six import xrange
from wafo.stats.six import xrange
except:
pass
from wafo import stats from wafo import stats
from wafo.stats.tests.common_tests import (check_normalization, check_moment, from wafo.stats.tests.common_tests import (check_normalization, check_moment,
check_mean_expect, check_mean_expect,
check_var_expect, check_skew_expect, check_kurt_expect, check_var_expect, check_skew_expect, check_kurt_expect,
check_entropy, check_private_entropy, check_edge_support, check_entropy, check_private_entropy, check_edge_support,
check_named_args) check_named_args)
from wafo.stats._distr_params import distdiscrete
knf = npt.dec.knownfailureif knf = npt.dec.knownfailureif
distdiscrete = [
['bernoulli', (0.3, )],
['binom', (5, 0.4)],
['boltzmann', (1.4, 19)],
['dlaplace', (0.8,)], # 0.5
['geom', (0.5,)],
['hypergeom', (30, 12, 6)],
['hypergeom', (21, 3, 12)], # numpy.random (3,18,12) numpy ticket:921
['hypergeom', (21, 18, 11)], # numpy.random (18,3,11) numpy ticket:921
['logser', (0.6,)], # reenabled, numpy ticket:921
['nbinom', (5, 0.5)],
['nbinom', (0.4, 0.4)], # from tickets: 583
['planck', (0.51,)], # 4.1
['poisson', (0.6,)],
['randint', (7, 31)],
['skellam', (15, 8)],
['zipf', (6.5,)]
]
def test_discrete_basic(): def test_discrete_basic():
for distname, arg in distdiscrete: for distname, arg in distdiscrete:
@ -40,7 +20,7 @@ def test_discrete_basic():
np.random.seed(9765456) np.random.seed(9765456)
rvs = distfn.rvs(size=2000, *arg) rvs = distfn.rvs(size=2000, *arg)
supp = np.unique(rvs) supp = np.unique(rvs)
#_m, v = distfn.stats(*arg) m, v = distfn.stats(*arg)
yield check_cdf_ppf, distfn, arg, supp, distname + ' cdf_ppf' yield check_cdf_ppf, distfn, arg, supp, distname + ' cdf_ppf'
yield check_pmf_cdf, distfn, arg, distname yield check_pmf_cdf, distfn, arg, distname
@ -84,7 +64,7 @@ def test_moments():
yield check_var_expect, distfn, arg, m, v, distname yield check_var_expect, distfn, arg, m, v, distname
yield check_skew_expect, distfn, arg, m, v, s, distname yield check_skew_expect, distfn, arg, m, v, s, distname
cond = distname in ['zipf'] cond = False #distname in ['zipf']
msg = distname + ' fails kurtosis' msg = distname + ' fails kurtosis'
yield knf(cond, msg)(check_kurt_expect), distfn, arg, m, v, k, distname yield knf(cond, msg)(check_kurt_expect), distfn, arg, m, v, k, distname
@ -185,9 +165,9 @@ def check_discrete_chisquare(distfn, arg, rvs, alpha, msg):
histsupp[0] = distfn.a histsupp[0] = distfn.a
# find sample frequencies and perform chisquare test # find sample frequencies and perform chisquare test
freq, _hsupp = np.histogram(rvs, histsupp) freq,hsupp = np.histogram(rvs,histsupp)
#cdfs = distfn.cdf(distsupp, *arg) cdfs = distfn.cdf(distsupp,*arg)
(_chis, pval) = stats.chisquare(np.array(freq), n * distmass) (chis,pval) = stats.chisquare(np.array(freq),n*distmass)
npt.assert_(pval > alpha, 'chisquare - test for %s' npt.assert_(pval > alpha, 'chisquare - test for %s'
' at arg = %s with pval = %s' % (msg,str(arg),str(pval))) ' at arg = %s with pval = %s' % (msg,str(arg),str(pval)))

@ -2,24 +2,25 @@
""" """
from __future__ import division, print_function, absolute_import from __future__ import division, print_function, absolute_import
#import unittest
import warnings import warnings
import re import re
import sys import sys
from numpy.testing import (TestCase, run_module_suite, assert_equal, from numpy.testing import (TestCase, run_module_suite, assert_equal,
assert_array_equal, assert_almost_equal, assert_array_equal, assert_almost_equal, assert_array_almost_equal,
assert_array_almost_equal,
assert_allclose, assert_, assert_raises, rand, dec) assert_allclose, assert_, assert_raises, rand, dec)
from nose import SkipTest from nose import SkipTest
import numpy import numpy
import numpy as np import numpy as np
from numpy import typecodes, array from numpy import typecodes, array
#from scipy.lib._version import NumpyVersion from scipy.lib._version import NumpyVersion
from scipy import special from scipy import special
import wafo.stats as stats import wafo.stats as stats
from wafo.stats._distn_infrastructure import argsreduce from wafo.stats._distn_infrastructure import argsreduce
import wafo.stats.distributions
from scipy.special import xlogy from scipy.special import xlogy
@ -27,7 +28,8 @@ from scipy.special import xlogy
DOCSTRINGS_STRIPPED = sys.flags.optimize > 1 DOCSTRINGS_STRIPPED = sys.flags.optimize > 1
# generate test cases to test cdf and distribution consistency # Generate test cases to test cdf and distribution consistency.
# Note that this list does not include all distributions.
dists = ['uniform','norm','lognorm','expon','beta', dists = ['uniform','norm','lognorm','expon','beta',
'powerlaw','bradford','burr','fisk','cauchy','halfcauchy', 'powerlaw','bradford','burr','fisk','cauchy','halfcauchy',
'foldcauchy','gamma','gengamma','loggamma', 'foldcauchy','gamma','gengamma','loggamma',
@ -41,6 +43,18 @@ dists = ['uniform', 'norm', 'lognorm', 'expon', 'beta',
'hypsecant', 'laplace', 'reciprocal','triang','tukeylambda', 'hypsecant', 'laplace', 'reciprocal','triang','tukeylambda',
'vonmises', 'vonmises_line', 'pearson3'] 'vonmises', 'vonmises_line', 'pearson3']
def _assert_hasattr(a, b, msg=None):
if msg is None:
msg = '%s does not have attribute %s' % (a, b)
assert_(hasattr(a, b), msg=msg)
def test_api_regression():
# https://github.com/scipy/scipy/issues/3802
_assert_hasattr(stats.distributions, 'f_gen')
# check function for test generator # check function for test generator
@ -110,7 +124,6 @@ def test_vonmises_line_support():
class TestRandInt(TestCase): class TestRandInt(TestCase):
def test_rvs(self): def test_rvs(self):
vals = stats.randint.rvs(5,30,size=100) vals = stats.randint.rvs(5,30,size=100)
assert_(numpy.all(vals < 30) & numpy.all(vals >= 5)) assert_(numpy.all(vals < 30) & numpy.all(vals >= 5))
@ -133,14 +146,12 @@ class TestRandInt(TestCase):
def test_cdf(self): def test_cdf(self):
x = numpy.r_[0:36:100j] x = numpy.r_[0:36:100j]
k = numpy.floor(x) k = numpy.floor(x)
out = numpy.select( out = numpy.select([k >= 30,k >= 5],[1.0,(k-5.0+1)/(30-5.0)],0)
[k >= 30, k >= 5], [1.0, (k - 5.0 + 1) / (30 - 5.0)], 0)
vals = stats.randint.cdf(x,5,30) vals = stats.randint.cdf(x,5,30)
assert_array_almost_equal(vals, out, decimal=12) assert_array_almost_equal(vals, out, decimal=12)
class TestBinom(TestCase): class TestBinom(TestCase):
def test_rvs(self): def test_rvs(self):
vals = stats.binom.rvs(10, 0.75, size=(2, 50)) vals = stats.binom.rvs(10, 0.75, size=(2, 50))
assert_(numpy.all(vals >= 0) & numpy.all(vals <= 10)) assert_(numpy.all(vals >= 0) & numpy.all(vals <= 10))
@ -175,9 +186,15 @@ class TestBinom(TestCase):
h = b.entropy() h = b.entropy()
assert_equal(h, 0.0) assert_equal(h, 0.0)
def test_warns_p0(self):
# no spurious warnigns are generated for p=0; gh-3817
with warnings.catch_warnings():
warnings.simplefilter("error", RuntimeWarning)
assert_equal(stats.binom(n=2, p=0).mean(), 0)
assert_equal(stats.binom(n=2, p=0).std(), 0)
class TestBernoulli(TestCase): class TestBernoulli(TestCase):
def test_rvs(self): def test_rvs(self):
vals = stats.bernoulli.rvs(0.75, size=(2, 50)) vals = stats.bernoulli.rvs(0.75, size=(2, 50))
assert_(numpy.all(vals >= 0) & numpy.all(vals <= 1)) assert_(numpy.all(vals >= 0) & numpy.all(vals <= 1))
@ -206,7 +223,6 @@ class TestBernoulli(TestCase):
class TestNBinom(TestCase): class TestNBinom(TestCase):
def test_rvs(self): def test_rvs(self):
vals = stats.nbinom.rvs(10, 0.75, size=(2, 50)) vals = stats.nbinom.rvs(10, 0.75, size=(2, 50))
assert_(numpy.all(vals >= 0)) assert_(numpy.all(vals >= 0))
@ -225,7 +241,6 @@ class TestNBinom(TestCase):
class TestGeom(TestCase): class TestGeom(TestCase):
def test_rvs(self): def test_rvs(self):
vals = stats.geom.rvs(0.75, size=(2, 50)) vals = stats.geom.rvs(0.75, size=(2, 50))
assert_(numpy.all(vals >= 0)) assert_(numpy.all(vals >= 0))
@ -268,17 +283,14 @@ class TestGeom(TestCase):
class TestTruncnorm(TestCase): class TestTruncnorm(TestCase):
def test_ppf_ticket1131(self): def test_ppf_ticket1131(self):
vals = stats.truncnorm.ppf( vals = stats.truncnorm.ppf([-0.5,0,1e-4,0.5, 1-1e-4,1,2], -1., 1.,
[-0.5, 0, 1e-4, 0.5, 1 - 1e-4, 1, 2], -1., 1.,
loc=[3]*7, scale=2) loc=[3]*7, scale=2)
expected = np.array([np.nan, 1, 1.00056419, 3, 4.99943581, 5, np.nan]) expected = np.array([np.nan, 1, 1.00056419, 3, 4.99943581, 5, np.nan])
assert_array_almost_equal(vals, expected) assert_array_almost_equal(vals, expected)
def test_isf_ticket1131(self): def test_isf_ticket1131(self):
vals = stats.truncnorm.isf( vals = stats.truncnorm.isf([-0.5,0,1e-4,0.5, 1-1e-4,1,2], -1., 1.,
[-0.5, 0, 1e-4, 0.5, 1 - 1e-4, 1, 2], -1., 1.,
loc=[3]*7, scale=2) loc=[3]*7, scale=2)
expected = np.array([np.nan, 5, 4.99943581, 3, 1.00056419, 1, np.nan]) expected = np.array([np.nan, 5, 4.99943581, 3, 1.00056419, 1, np.nan])
assert_array_almost_equal(vals, expected) assert_array_almost_equal(vals, expected)
@ -308,7 +320,6 @@ class TestTruncnorm(TestCase):
class TestHypergeom(TestCase): class TestHypergeom(TestCase):
def test_rvs(self): def test_rvs(self):
vals = stats.hypergeom.rvs(20, 10, 3, size=(2, 50)) vals = stats.hypergeom.rvs(20, 10, 3, size=(2, 50))
assert_(numpy.all(vals >= 0) & assert_(numpy.all(vals >= 0) &
@ -331,6 +342,10 @@ class TestHypergeom(TestCase):
hgpmf = stats.hypergeom.pmf(2, tot, good, N) hgpmf = stats.hypergeom.pmf(2, tot, good, N)
assert_almost_equal(hgpmf, 0.0010114963068932233, 11) assert_almost_equal(hgpmf, 0.0010114963068932233, 11)
def test_cdf_above_one(self):
# for some values of parameters, hypergeom cdf was >1, see gh-2238
assert_(0 <= stats.hypergeom.cdf(30, 13397950, 4363, 12390) <= 1.0)
def test_precision2(self): def test_precision2(self):
# Test hypergeom precision for large numbers. See #1218. # Test hypergeom precision for large numbers. See #1218.
# Results compared with those from R. # Results compared with those from R.
@ -340,8 +355,7 @@ class TestHypergeom(TestCase):
quantile = 2e4 quantile = 2e4
res = [] res = []
for eaten in fruits_eaten: for eaten in fruits_eaten:
res.append( res.append(stats.hypergeom.sf(quantile, oranges + pears, oranges, eaten))
stats.hypergeom.sf(quantile, oranges + pears, oranges, eaten))
expected = np.array([0, 1.904153e-114, 2.752693e-66, 4.931217e-32, expected = np.array([0, 1.904153e-114, 2.752693e-66, 4.931217e-32,
8.265601e-11, 0.1237904, 1]) 8.265601e-11, 0.1237904, 1])
assert_allclose(res, expected, atol=0, rtol=5e-7) assert_allclose(res, expected, atol=0, rtol=5e-7)
@ -384,7 +398,6 @@ class TestLoggamma(TestCase):
class TestLogser(TestCase): class TestLogser(TestCase):
def test_rvs(self): def test_rvs(self):
vals = stats.logser.rvs(0.75, size=(2, 50)) vals = stats.logser.rvs(0.75, size=(2, 50))
assert_(numpy.all(vals >= 1)) assert_(numpy.all(vals >= 1))
@ -398,7 +411,6 @@ class TestLogser(TestCase):
class TestPareto(TestCase): class TestPareto(TestCase):
def test_stats(self): def test_stats(self):
# Check the stats() method with some simple values. Also check # Check the stats() method with some simple values. Also check
# that the calculations do not trigger RuntimeWarnings. # that the calculations do not trigger RuntimeWarnings.
@ -450,20 +462,114 @@ class TestPareto(TestCase):
m, v, s, k = stats.pareto.stats(4.0, moments='mvsk') m, v, s, k = stats.pareto.stats(4.0, moments='mvsk')
assert_allclose(m, 4.0 / 3.0) assert_allclose(m, 4.0 / 3.0)
assert_allclose(v, 4.0 / 18.0) assert_allclose(v, 4.0 / 18.0)
assert_allclose( assert_allclose(s, 2*(1+4.0)/(4.0-3) * np.sqrt((4.0-2)/4.0))
s, 2 * (1 + 4.0) / (4.0 - 3) * np.sqrt((4.0 - 2) / 4.0))
assert_equal(k, np.nan) assert_equal(k, np.nan)
m, v, s, k = stats.pareto.stats(4.5, moments='mvsk') m, v, s, k = stats.pareto.stats(4.5, moments='mvsk')
assert_allclose(m, 4.5 / 3.5) assert_allclose(m, 4.5 / 3.5)
assert_allclose(v, 4.5 / (3.5*3.5*2.5)) assert_allclose(v, 4.5 / (3.5*3.5*2.5))
assert_allclose(s, (2*5.5/1.5) * np.sqrt(2.5/4.5)) assert_allclose(s, (2*5.5/1.5) * np.sqrt(2.5/4.5))
assert_allclose( assert_allclose(k, 6*(4.5**3 + 4.5**2 - 6*4.5 - 2)/(4.5*1.5*0.5))
k, 6 * (4.5 ** 3 + 4.5 ** 2 - 6 * 4.5 - 2) / (4.5 * 1.5 * 0.5))
class TestGenpareto(TestCase):
def test_ab(self):
# c >= 0: a, b = [0, inf]
for c in [1., 0.]:
c = np.asarray(c)
stats.genpareto._argcheck(c) # ugh
assert_equal(stats.genpareto.a, 0.)
assert_(np.isposinf(stats.genpareto.b))
# c < 0: a=0, b=1/|c|
c = np.asarray(-2.)
stats.genpareto._argcheck(c)
assert_allclose([stats.genpareto.a, stats.genpareto.b], [0., 0.5])
def test_c0(self):
# with c=0, genpareto reduces to the exponential distribution
rv = stats.genpareto(c=0.)
x = np.linspace(0, 10., 30)
assert_allclose(rv.pdf(x), stats.expon.pdf(x))
assert_allclose(rv.cdf(x), stats.expon.cdf(x))
assert_allclose(rv.sf(x), stats.expon.sf(x))
q = np.linspace(0., 1., 10)
assert_allclose(rv.ppf(q), stats.expon.ppf(q))
def test_cm1(self):
# with c=-1, genpareto reduces to the uniform distr on [0, 1]
rv = stats.genpareto(c=-1.)
x = np.linspace(0, 10., 30)
assert_allclose(rv.pdf(x), stats.uniform.pdf(x))
assert_allclose(rv.cdf(x), stats.uniform.cdf(x))
assert_allclose(rv.sf(x), stats.uniform.sf(x))
q = np.linspace(0., 1., 10)
assert_allclose(rv.ppf(q), stats.uniform.ppf(q))
# logpdf(1., c=-1) should be zero
assert_allclose(rv.logpdf(1), 0)
def test_x_inf(self):
# make sure x=inf is handled gracefully
rv = stats.genpareto(c=0.1)
assert_allclose([rv.pdf(np.inf), rv.cdf(np.inf)], [0., 1.])
assert_(np.isneginf(rv.logpdf(np.inf)))
rv = stats.genpareto(c=0.)
assert_allclose([rv.pdf(np.inf), rv.cdf(np.inf)], [0., 1.])
assert_(np.isneginf(rv.logpdf(np.inf)))
rv = stats.genpareto(c=-1.)
assert_allclose([rv.pdf(np.inf), rv.cdf(np.inf)], [0., 1.])
assert_(np.isneginf(rv.logpdf(np.inf)))
def test_c_continuity(self):
# pdf is continuous at c=0, -1
x = np.linspace(0, 10, 30)
for c in [0, -1]:
pdf0 = stats.genpareto.pdf(x, c)
for dc in [1e-14, -1e-14]:
pdfc = stats.genpareto.pdf(x, c + dc)
assert_allclose(pdf0, pdfc, atol=1e-12)
cdf0 = stats.genpareto.cdf(x, c)
for dc in [1e-14, 1e-14]:
cdfc = stats.genpareto.cdf(x, c + dc)
assert_allclose(cdf0, cdfc, atol=1e-12)
def test_c_continuity_ppf(self):
q = np.r_[np.logspace(1e-12, 0.01, base=0.1),
np.linspace(0.01, 1, 30, endpoint=False),
1. - np.logspace(1e-12, 0.01, base=0.1)]
for c in [0., -1.]:
ppf0 = stats.genpareto.ppf(q, c)
for dc in [1e-14, -1e-14]:
ppfc = stats.genpareto.ppf(q, c + dc)
assert_allclose(ppf0, ppfc, atol=1e-12)
def test_c_continuity_isf(self):
q = np.r_[np.logspace(1e-12, 0.01, base=0.1),
np.linspace(0.01, 1, 30, endpoint=False),
1. - np.logspace(1e-12, 0.01, base=0.1)]
for c in [0., -1.]:
isf0 = stats.genpareto.isf(q, c)
for dc in [1e-14, -1e-14]:
isfc = stats.genpareto.isf(q, c + dc)
assert_allclose(isf0, isfc, atol=1e-12)
def test_cdf_ppf_roundtrip(self):
# this should pass with machine precision. hat tip @pbrod
q = np.r_[np.logspace(1e-12, 0.01, base=0.1),
np.linspace(0.01, 1, 30, endpoint=False),
1. - np.logspace(1e-12, 0.01, base=0.1)]
for c in [1e-8, -1e-18, 1e-15, -1e-15]:
assert_allclose(stats.genpareto.cdf(stats.genpareto.ppf(q, c), c),
q, atol=1e-15)
class TestPearson3(TestCase): class TestPearson3(TestCase):
def test_rvs(self): def test_rvs(self):
vals = stats.pearson3.rvs(0.1, size=(2, 50)) vals = stats.pearson3.rvs(0.1, size=(2, 50))
assert_(numpy.shape(vals) == (2, 50)) assert_(numpy.shape(vals) == (2, 50))
@ -497,7 +603,6 @@ class TestPearson3(TestCase):
class TestPoisson(TestCase): class TestPoisson(TestCase):
def test_rvs(self): def test_rvs(self):
vals = stats.poisson.rvs(0.5, size=(2, 50)) vals = stats.poisson.rvs(0.5, size=(2, 50))
assert_(numpy.all(vals >= 0)) assert_(numpy.all(vals >= 0))
@ -516,7 +621,6 @@ class TestPoisson(TestCase):
class TestZipf(TestCase): class TestZipf(TestCase):
def test_rvs(self): def test_rvs(self):
vals = stats.zipf.rvs(1.5, size=(2, 50)) vals = stats.zipf.rvs(1.5, size=(2, 50))
assert_(numpy.all(vals >= 1)) assert_(numpy.all(vals >= 1))
@ -539,7 +643,6 @@ class TestZipf(TestCase):
class TestDLaplace(TestCase): class TestDLaplace(TestCase):
def test_rvs(self): def test_rvs(self):
vals = stats.dlaplace.rvs(1.5, size=(2, 50)) vals = stats.dlaplace.rvs(1.5, size=(2, 50))
assert_(numpy.shape(vals) == (2, 50)) assert_(numpy.shape(vals) == (2, 50))
@ -573,9 +676,8 @@ class TestDLaplace(TestCase):
class TestInvGamma(TestCase): class TestInvGamma(TestCase):
@dec.skipif(NumpyVersion(np.__version__) < '1.7.0',
# @dec.skipif(NumpyVersion(np.__version__) < '1.7.0', "assert_* funcs broken with inf/nan")
# "assert_* funcs broken with inf/nan")
def test_invgamma_inf_gh_1866(self): def test_invgamma_inf_gh_1866(self):
# invgamma's moments are only finite for a>n # invgamma's moments are only finite for a>n
# specific numbers checked w/ boost 1.54 # specific numbers checked w/ boost 1.54
@ -583,8 +685,7 @@ class TestInvGamma(TestCase):
warnings.simplefilter('error', RuntimeWarning) warnings.simplefilter('error', RuntimeWarning)
mvsk = stats.invgamma.stats(a=19.31, moments='mvsk') mvsk = stats.invgamma.stats(a=19.31, moments='mvsk')
assert_allclose(mvsk, assert_allclose(mvsk,
[0.05461496450, 0.0001723162534, [0.05461496450, 0.0001723162534, 1.020362676, 2.055616582])
1.020362676, 2.055616582])
a = [1.1, 3.1, 5.6] a = [1.1, 3.1, 5.6]
mvsk = stats.invgamma.stats(a=a, moments='mvsk') mvsk = stats.invgamma.stats(a=a, moments='mvsk')
@ -597,7 +698,6 @@ class TestInvGamma(TestCase):
class TestF(TestCase): class TestF(TestCase):
def test_f_moments(self): def test_f_moments(self):
# n-th moment of F distributions is only finite for n < dfd / 2 # n-th moment of F distributions is only finite for n < dfd / 2
m, v, s, k = stats.f.stats(11, 6.5, moments='mvsk') m, v, s, k = stats.f.stats(11, 6.5, moments='mvsk')
@ -612,10 +712,10 @@ class TestF(TestCase):
warnings.simplefilter('error', RuntimeWarning) warnings.simplefilter('error', RuntimeWarning)
stats.f.stats(dfn=[11]*4, dfd=[2, 4, 6, 8], moments='mvsk') stats.f.stats(dfn=[11]*4, dfd=[2, 4, 6, 8], moments='mvsk')
#@dec.knownfailureif(True, 'f stats does not properly broadcast') @dec.knownfailureif(True, 'f stats does not properly broadcast')
def test_stats_broadcast(self): def test_stats_broadcast(self):
# stats do not fully broadcast just yet # stats do not fully broadcast just yet
_mv = stats.f.stats(dfn=11, dfd=[11, 12]) mv = stats.f.stats(dfn=11, dfd=[11, 12])
def test_rvgeneric_std(): def test_rvgeneric_std():
@ -624,7 +724,6 @@ def test_rvgeneric_std():
class TestRvDiscrete(TestCase): class TestRvDiscrete(TestCase):
def test_rvs(self): def test_rvs(self):
states = [-1,0,1,2,3,4] states = [-1,0,1,2,3,4]
probability = [0.0,0.3,0.4,0.0,0.3,0.0] probability = [0.0,0.3,0.4,0.0,0.3,0.0]
@ -653,7 +752,6 @@ class TestRvDiscrete(TestCase):
class TestExpon(TestCase): class TestExpon(TestCase):
def test_zero(self): def test_zero(self):
assert_equal(stats.expon.pdf(0),1) assert_equal(stats.expon.pdf(0),1)
@ -663,7 +761,6 @@ class TestExpon(TestCase):
class TestGenExpon(TestCase): class TestGenExpon(TestCase):
def test_pdf_unity_area(self): def test_pdf_unity_area(self):
from scipy.integrate import simps from scipy.integrate import simps
# PDF should integrate to one # PDF should integrate to one
@ -678,15 +775,12 @@ class TestGenExpon(TestCase):
class TestExponpow(TestCase): class TestExponpow(TestCase):
def test_tail(self): def test_tail(self):
assert_almost_equal(stats.exponpow.cdf(1e-10, 2.), 1e-20) assert_almost_equal(stats.exponpow.cdf(1e-10, 2.), 1e-20)
assert_almost_equal( assert_almost_equal(stats.exponpow.isf(stats.exponpow.sf(5, .8), .8), 5)
stats.exponpow.isf(stats.exponpow.sf(5, .8), .8), 5)
class TestSkellam(TestCase): class TestSkellam(TestCase):
def test_pmf(self): def test_pmf(self):
# comparison to R # comparison to R
k = numpy.arange(-10, 15) k = numpy.arange(-10, 15)
@ -731,7 +825,6 @@ class TestSkellam(TestCase):
class TestLognorm(TestCase): class TestLognorm(TestCase):
def test_pdf(self): def test_pdf(self):
# Regression test for Ticket #1471: avoid nan with 0/0 situation # Regression test for Ticket #1471: avoid nan with 0/0 situation
with np.errstate(divide='ignore'): with np.errstate(divide='ignore'):
@ -740,7 +833,6 @@ class TestLognorm(TestCase):
class TestBeta(TestCase): class TestBeta(TestCase):
def test_logpdf(self): def test_logpdf(self):
# Regression test for Ticket #1326: avoid nan with 0*log(0) situation # Regression test for Ticket #1326: avoid nan with 0*log(0) situation
logpdf = stats.beta.logpdf(0,1,0.5) logpdf = stats.beta.logpdf(0,1,0.5)
@ -757,7 +849,6 @@ class TestBeta(TestCase):
class TestBetaPrime(TestCase): class TestBetaPrime(TestCase):
def test_logpdf(self): def test_logpdf(self):
alpha, beta = 267, 1472 alpha, beta = 267, 1472
x = np.array([0.2, 0.5, 0.6]) x = np.array([0.2, 0.5, 0.6])
@ -767,7 +858,6 @@ class TestBetaPrime(TestCase):
class TestGamma(TestCase): class TestGamma(TestCase):
def test_pdf(self): def test_pdf(self):
# a few test cases to compare with R # a few test cases to compare with R
pdf = stats.gamma.pdf(90, 394, scale=1./5) pdf = stats.gamma.pdf(90, 394, scale=1./5)
@ -785,23 +875,18 @@ class TestGamma(TestCase):
class TestChi2(TestCase): class TestChi2(TestCase):
# regression tests after precision improvements, ticket:1041, not verified # regression tests after precision improvements, ticket:1041, not verified
def test_precision(self): def test_precision(self):
assert_almost_equal( assert_almost_equal(stats.chi2.pdf(1000, 1000), 8.919133934753128e-003, 14)
stats.chi2.pdf(1000, 1000), 8.919133934753128e-003, 14)
assert_almost_equal(stats.chi2.pdf(100, 100), 0.028162503162596778, 14) assert_almost_equal(stats.chi2.pdf(100, 100), 0.028162503162596778, 14)
class TestArrayArgument(TestCase): # test for ticket:992 class TestArrayArgument(TestCase): # test for ticket:992
def test_noexception(self): def test_noexception(self):
rvs = stats.norm.rvs( rvs = stats.norm.rvs(loc=(np.arange(5)), scale=np.ones(5), size=(10,5))
loc=(np.arange(5)), scale=np.ones(5), size=(10, 5))
assert_equal(rvs.shape, (10,5)) assert_equal(rvs.shape, (10,5))
class TestDocstring(TestCase): class TestDocstring(TestCase):
def test_docstrings(self): def test_docstrings(self):
# See ticket #761 # See ticket #761
if stats.rayleigh.__doc__ is not None: if stats.rayleigh.__doc__ is not None:
@ -816,7 +901,6 @@ class TestDocstring(TestCase):
class TestEntropy(TestCase): class TestEntropy(TestCase):
def test_entropy_positive(self): def test_entropy_positive(self):
# See ticket #497 # See ticket #497
pk = [0.5,0.2,0.3] pk = [0.5,0.2,0.3]
@ -848,8 +932,8 @@ class TestEntropy(TestCase):
assert_array_almost_equal(stats.entropy(pk, qk), assert_array_almost_equal(stats.entropy(pk, qk),
[0.1933259, 0.18609809]) [0.1933259, 0.18609809])
# @dec.skipif(NumpyVersion(np.__version__) < '1.7.0', @dec.skipif(NumpyVersion(np.__version__) < '1.7.0',
# "assert_* funcs broken with inf/nan") "assert_* funcs broken with inf/nan")
def test_entropy_2d_zero(self): def test_entropy_2d_zero(self):
pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]] pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
qk = [[0.0, 0.1], [0.3, 0.6], [0.5, 0.3]] qk = [[0.0, 0.1], [0.3, 0.6], [0.5, 0.3]]
@ -1024,7 +1108,7 @@ class TestFitMethod(object):
assert_equal(a, 2) assert_equal(a, 2)
assert_equal(loc, 0) assert_equal(loc, 0)
assert_equal(scale, 1) assert_equal(scale, 1)
_da, db = mlefunc(a, b, x) da, db = mlefunc(a, b, x)
assert_allclose(db, 0, atol=1e-5) assert_allclose(db, 0, atol=1e-5)
# Same floc and fscale values as above, but reverse the data # Same floc and fscale values as above, but reverse the data
@ -1053,11 +1137,10 @@ class TestFitMethod(object):
class TestFrozen(TestCase): class TestFrozen(TestCase):
# Test that a frozen distribution gives the same results as the original # Test that a frozen distribution gives the same results as the original object.
# object. #
# Only tested for the normal distribution (with loc and scale specified) # Only tested for the normal distribution (with loc and scale specified)
# and for the gamma distribution (with a shape parameter specified). # and for the gamma distribution (with a shape parameter specified).
def test_norm(self): def test_norm(self):
dist = stats.norm dist = stats.norm
frozen = stats.norm(loc=10.0, scale=3.0) frozen = stats.norm(loc=10.0, scale=3.0)
@ -1170,13 +1253,35 @@ class TestFrozen(TestCase):
# the focus of this test. # the focus of this test.
assert_equal(m1, m2) assert_equal(m1, m2)
def test_ab(self):
# test that the support of a frozen distribution
# (i) remains frozen even if it changes for the original one
# (ii) is actually correct if the shape parameters are such that
# the values of [a, b] are not the default [0, inf]
# take a genpareto as an example where the support
# depends on the value of the shape parameter:
# for c > 0: a, b = 0, inf
# for c < 0: a, b = 0, -1/c
rv = stats.genpareto(c=-0.1)
a, b = rv.dist.a, rv.dist.b
assert_equal([a, b], [0., 10.])
stats.genpareto.pdf(0, c=0.1) # this changes genpareto.b
assert_equal([rv.dist.a, rv.dist.b], [a, b])
rv1 = stats.genpareto(c=0.1)
assert_(rv1.dist is not rv.dist)
def test_rv_frozen_in_namespace(self):
# Regression test for gh-3522
assert_(hasattr(stats.distributions, 'rv_frozen'))
class TestExpect(TestCase): class TestExpect(TestCase):
# Test for expect method. # Test for expect method.
# #
# Uses normal distribution and beta distribution for finite bounds, and # Uses normal distribution and beta distribution for finite bounds, and
# hypergeom for discrete distribution with finite support # hypergeom for discrete distribution with finite support
def test_norm(self): def test_norm(self):
v = stats.norm.expect(lambda x: (x-5)*(x-5), loc=5, scale=2) v = stats.norm.expect(lambda x: (x-5)*(x-5), loc=5, scale=2)
assert_almost_equal(v, 4, decimal=14) assert_almost_equal(v, 4, decimal=14)
@ -1195,8 +1300,7 @@ class TestExpect(TestCase):
def test_beta(self): def test_beta(self):
# case with finite support interval # case with finite support interval
v = stats.beta.expect( v = stats.beta.expect(lambda x: (x-19/3.)*(x-19/3.), args=(10,5),
lambda x: (x - 19 / 3.) * (x - 19 / 3.), args=(10, 5),
loc=5, scale=2) loc=5, scale=2)
assert_almost_equal(v, 1./18., decimal=13) assert_almost_equal(v, 1./18., decimal=13)
@ -1226,8 +1330,7 @@ class TestExpect(TestCase):
assert_almost_equal(v, v_true, decimal=14) assert_almost_equal(v, v_true, decimal=14)
# with bounds, bounds equal to shifted support # with bounds, bounds equal to shifted support
v_bounds = stats.hypergeom.expect( v_bounds = stats.hypergeom.expect(lambda x: (x-9.)**2, args=(20, 10, 8),
lambda x: (x - 9.) ** 2, args=(20, 10, 8),
loc=5., lb=5, ub=13) loc=5., lb=5, ub=13)
assert_almost_equal(v_bounds, v_true, decimal=14) assert_almost_equal(v_bounds, v_true, decimal=14)
@ -1279,7 +1382,6 @@ class TestExpect(TestCase):
class TestNct(TestCase): class TestNct(TestCase):
def test_nc_parameter(self): def test_nc_parameter(self):
# Parameter values c<=0 were not enabled (gh-2402). # Parameter values c<=0 were not enabled (gh-2402).
# For negative values c and for c=0 results of rv.cdf(0) below were nan # For negative values c and for c=0 results of rv.cdf(0) below were nan
@ -1289,8 +1391,7 @@ class TestNct(TestCase):
assert_almost_equal(rv.cdf(0), 0.841344746069, decimal=10) assert_almost_equal(rv.cdf(0), 0.841344746069, decimal=10)
def test_broadcasting(self): def test_broadcasting(self):
res = stats.nct.pdf( res = stats.nct.pdf(5, np.arange(4,7)[:,None], np.linspace(0.1, 1, 4))
5, np.arange(4, 7)[:, None], np.linspace(0.1, 1, 4))
expected = array([[0.00321886, 0.00557466, 0.00918418, 0.01442997], expected = array([[0.00321886, 0.00557466, 0.00918418, 0.01442997],
[0.00217142, 0.00395366, 0.00683888, 0.01126276], [0.00217142, 0.00395366, 0.00683888, 0.01126276],
[0.00153078, 0.00291093, 0.00525206, 0.00900815]]) [0.00153078, 0.00291093, 0.00525206, 0.00900815]])
@ -1316,7 +1417,6 @@ class TestNct(TestCase):
class TestRice(TestCase): class TestRice(TestCase):
def test_rice_zero_b(self): def test_rice_zero_b(self):
# rice distribution should work with b=0, cf gh-2164 # rice distribution should work with b=0, cf gh-2164
x = [0.2, 1., 5.] x = [0.2, 1., 5.]
@ -1345,15 +1445,13 @@ class TestRice(TestCase):
class TestErlang(TestCase): class TestErlang(TestCase):
def test_erlang_runtimewarning(self): def test_erlang_runtimewarning(self):
# erlang should generate a RuntimeWarning if a non-integer # erlang should generate a RuntimeWarning if a non-integer
# shape parameter is used. # shape parameter is used.
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter("error", RuntimeWarning) warnings.simplefilter("error", RuntimeWarning)
# The non-integer shape parameter 1.3 should trigger a # The non-integer shape parameter 1.3 should trigger a RuntimeWarning
# RuntimeWarning
assert_raises(RuntimeWarning, assert_raises(RuntimeWarning,
stats.erlang.rvs, 1.3, loc=0, scale=1, size=4) stats.erlang.rvs, 1.3, loc=0, scale=1, size=4)
@ -1366,8 +1464,51 @@ class TestErlang(TestCase):
assert_allclose(result_erlang, result_gamma, rtol=1e-3) assert_allclose(result_erlang, result_gamma, rtol=1e-3)
class TestRdist(TestCase): class TestExponWeib(TestCase):
def test_pdf_logpdf(self):
# Regression test for gh-3508.
x = 0.1
a = 1.0
c = 100.0
p = stats.exponweib.pdf(x, a, c)
logp = stats.exponweib.logpdf(x, a, c)
# Expected values were computed with mpmath.
assert_allclose([p, logp],
[1.0000000000000054e-97, -223.35075402042244])
def test_a_is_1(self):
# For issue gh-3508.
# Check that when a=1, the pdf and logpdf methods of exponweib are the
# same as those of weibull_min.
x = np.logspace(-4, -1, 4)
a = 1
c = 100
p = stats.exponweib.pdf(x, a, c)
expected = stats.weibull_min.pdf(x, c)
assert_allclose(p, expected)
logp = stats.exponweib.logpdf(x, a, c)
expected = stats.weibull_min.logpdf(x, c)
assert_allclose(logp, expected)
def test_a_is_1_c_is_1(self):
# When a = 1 and c = 1, the distribution is exponential.
x = np.logspace(-8, 1, 10)
a = 1
c = 1
p = stats.exponweib.pdf(x, a, c)
expected = stats.expon.pdf(x)
assert_allclose(p, expected)
logp = stats.exponweib.logpdf(x, a, c)
expected = stats.expon.logpdf(x)
assert_allclose(logp, expected)
class TestRdist(TestCase):
@dec.slow @dec.slow
def test_rdist_cdf_gh1285(self): def test_rdist_cdf_gh1285(self):
# check workaround in rdist._cdf for issue gh-1285. # check workaround in rdist._cdf for issue gh-1285.
@ -1384,15 +1525,14 @@ def test_540_567():
assert_almost_equal(stats.norm.cdf(-1.7624320983),0.038998159702449846, assert_almost_equal(stats.norm.cdf(-1.7624320983),0.038998159702449846,
decimal=10, err_msg='test_540_567') decimal=10, err_msg='test_540_567')
assert_almost_equal(stats.norm.cdf(1.38629436112, loc=0.950273420309, assert_almost_equal(stats.norm.cdf(1.38629436112, loc=0.950273420309,
scale=0.204423758009), scale=0.204423758009),0.98353464004309321,
0.98353464004309321, decimal=10, decimal=10, err_msg='test_540_567')
err_msg='test_540_567')
def test_regression_ticket_1316(): def test_regression_ticket_1316():
# The following was raising an exception, because _construct_default_doc() # The following was raising an exception, because _construct_default_doc()
# did not handle the default keyword extradoc=None. See ticket #1316. # did not handle the default keyword extradoc=None. See ticket #1316.
_g = stats._continuous_distns.gamma_gen(name='gamma') g = stats._continuous_distns.gamma_gen(name='gamma')
def test_regression_ticket_1326(): def test_regression_ticket_1326():
@ -1401,8 +1541,7 @@ def test_regression_ticket_1326():
def test_regression_tukey_lambda(): def test_regression_tukey_lambda():
# Make sure that Tukey-Lambda distribution correctly handles non-positive # Make sure that Tukey-Lambda distribution correctly handles non-positive lambdas.
# lambdas.
x = np.linspace(-5.0, 5.0, 101) x = np.linspace(-5.0, 5.0, 101)
olderr = np.seterr(divide='ignore') olderr = np.seterr(divide='ignore')
@ -1431,6 +1570,7 @@ def test_regression_ticket_1421():
def test_nan_arguments_gh_issue_1362(): def test_nan_arguments_gh_issue_1362():
with np.errstate(invalid='ignore'):
assert_(np.isnan(stats.t.logcdf(1, np.nan))) assert_(np.isnan(stats.t.logcdf(1, np.nan)))
assert_(np.isnan(stats.t.cdf(1, np.nan))) assert_(np.isnan(stats.t.cdf(1, np.nan)))
assert_(np.isnan(stats.t.logsf(1, np.nan))) assert_(np.isnan(stats.t.logsf(1, np.nan)))
@ -1597,6 +1737,30 @@ def test_norm_logcdf():
np.seterr(**olderr) np.seterr(**olderr)
def test_levy_cdf_ppf():
# Test levy.cdf, including small arguments.
x = np.array([1000, 1.0, 0.5, 0.1, 0.01, 0.001])
# Expected values were calculated separately with mpmath.
# E.g.
# >>> mpmath.mp.dps = 100
# >>> x = mpmath.mp.mpf('0.01')
# >>> cdf = mpmath.erfc(mpmath.sqrt(1/(2*x)))
expected = np.array([0.9747728793699604,
0.3173105078629141,
0.1572992070502851,
0.0015654022580025495,
1.523970604832105e-23,
1.795832784800726e-219])
y = stats.levy.cdf(x)
assert_allclose(y, expected, rtol=1e-10)
# ppf(expected) should get us back to x.
xx = stats.levy.ppf(expected)
assert_allclose(xx, x, rtol=1e-13)
def test_hypergeom_interval_1802(): def test_hypergeom_interval_1802():
# these two had endless loops # these two had endless loops
assert_equal(stats.hypergeom.interval(.95, 187601, 43192, 757), assert_equal(stats.hypergeom.interval(.95, 187601, 43192, 757),
@ -1654,7 +1818,7 @@ def test_ncx2_tails_ticket_955():
# Trac #955 -- check that the cdf computed by special functions # Trac #955 -- check that the cdf computed by special functions
# matches the integrated pdf # matches the integrated pdf
a = stats.ncx2.cdf(np.arange(20, 25, 0.2), 2, 1.07458615e+02) a = stats.ncx2.cdf(np.arange(20, 25, 0.2), 2, 1.07458615e+02)
b = stats.ncx2.veccdf(np.arange(20, 25, 0.2), 2, 1.07458615e+02) b = stats.ncx2._cdfvec(np.arange(20, 25, 0.2), 2, 1.07458615e+02)
assert_allclose(a, b, rtol=1e-3, atol=0) assert_allclose(a, b, rtol=1e-3, atol=0)
@ -1672,8 +1836,7 @@ def test_stats_shapes_argcheck():
mv2_augmented = tuple(np.r_[np.nan, _] for _ in mv2) mv2_augmented = tuple(np.r_[np.nan, _] for _ in mv2)
assert_equal(mv2_augmented, mv3) assert_equal(mv2_augmented, mv3)
# -1 is not a legal shape parameter mv3 = stats.lognorm.stats([2, 2.4, -1]) # -1 is not a legal shape parameter
mv3 = stats.lognorm.stats([2, 2.4, -1])
mv2 = stats.lognorm.stats([2, 2.4]) mv2 = stats.lognorm.stats([2, 2.4])
mv2_augmented = tuple(np.r_[_, np.nan] for _ in mv2) mv2_augmented = tuple(np.r_[_, np.nan] for _ in mv2)
assert_equal(mv2_augmented, mv3) assert_equal(mv2_augmented, mv3)
@ -1683,22 +1846,19 @@ def test_stats_shapes_argcheck():
# anyway, so some distributions may or may not fail. # anyway, so some distributions may or may not fail.
# Test subclassing distributions w/ explicit shapes ## Test subclassing distributions w/ explicit shapes
class _distr_gen(stats.rv_continuous): class _distr_gen(stats.rv_continuous):
def _pdf(self, x, a): def _pdf(self, x, a):
return 42 return 42
class _distr2_gen(stats.rv_continuous): class _distr2_gen(stats.rv_continuous):
def _cdf(self, x, a): def _cdf(self, x, a):
return 42 * a + x return 42 * a + x
class _distr3_gen(stats.rv_continuous): class _distr3_gen(stats.rv_continuous):
def _pdf(self, x, a, b): def _pdf(self, x, a, b):
return a + b return a + b
@ -1710,7 +1870,6 @@ class _distr3_gen(stats.rv_continuous):
class _distr6_gen(stats.rv_continuous): class _distr6_gen(stats.rv_continuous):
# Two shape parameters (both _pdf and _cdf defined, consistent shapes.) # Two shape parameters (both _pdf and _cdf defined, consistent shapes.)
def _pdf(self, x, a, b): def _pdf(self, x, a, b):
return a*x + b return a*x + b
@ -1772,7 +1931,6 @@ class TestSubclassingExplicitShapes(TestCase):
def test_shapes_signature(self): def test_shapes_signature(self):
# test explicit shapes which agree w/ the signature of _pdf # test explicit shapes which agree w/ the signature of _pdf
class _dist_gen(stats.rv_continuous): class _dist_gen(stats.rv_continuous):
def _pdf(self, x, a): def _pdf(self, x, a):
return stats.norm._pdf(x) * a return stats.norm._pdf(x) * a
@ -1782,7 +1940,6 @@ class TestSubclassingExplicitShapes(TestCase):
def test_shapes_signature_inconsistent(self): def test_shapes_signature_inconsistent(self):
# test explicit shapes which do not agree w/ the signature of _pdf # test explicit shapes which do not agree w/ the signature of _pdf
class _dist_gen(stats.rv_continuous): class _dist_gen(stats.rv_continuous):
def _pdf(self, x, a): def _pdf(self, x, a):
return stats.norm._pdf(x) * a return stats.norm._pdf(x) * a
@ -1793,7 +1950,6 @@ class TestSubclassingExplicitShapes(TestCase):
# test _pdf with only starargs # test _pdf with only starargs
# NB: **kwargs of pdf will never reach _pdf # NB: **kwargs of pdf will never reach _pdf
class _dist_gen(stats.rv_continuous): class _dist_gen(stats.rv_continuous):
def _pdf(self, x, *args): def _pdf(self, x, *args):
extra_kwarg = args[0] extra_kwarg = args[0]
return stats.norm._pdf(x) * extra_kwarg return stats.norm._pdf(x) * extra_kwarg
@ -1807,7 +1963,6 @@ class TestSubclassingExplicitShapes(TestCase):
# test _pdf with named & starargs # test _pdf with named & starargs
# NB: **kwargs of pdf will never reach _pdf # NB: **kwargs of pdf will never reach _pdf
class _dist_gen(stats.rv_continuous): class _dist_gen(stats.rv_continuous):
def _pdf(self, x, offset, *args): def _pdf(self, x, offset, *args):
extra_kwarg = args[0] extra_kwarg = args[0]
return stats.norm._pdf(x) * extra_kwarg + offset return stats.norm._pdf(x) * extra_kwarg + offset
@ -1822,11 +1977,9 @@ class TestSubclassingExplicitShapes(TestCase):
# **kwargs to _pdf are ignored. # **kwargs to _pdf are ignored.
# this is a limitation of the framework (_pdf(x, *goodargs)) # this is a limitation of the framework (_pdf(x, *goodargs))
class _distr_gen(stats.rv_continuous): class _distr_gen(stats.rv_continuous):
def _pdf(self, x, *args, **kwargs): def _pdf(self, x, *args, **kwargs):
# _pdf should handle *args, **kwargs itself. Here "handling" # _pdf should handle *args, **kwargs itself. Here "handling" is
# is ignoring *args and looking for ``extra_kwarg`` and using # ignoring *args and looking for ``extra_kwarg`` and using that.
# that.
extra_kwarg = kwargs.pop('extra_kwarg', 1) extra_kwarg = kwargs.pop('extra_kwarg', 1)
return stats.norm._pdf(x) * extra_kwarg return stats.norm._pdf(x) * extra_kwarg
@ -1836,7 +1989,6 @@ class TestSubclassingExplicitShapes(TestCase):
def shapes_empty_string(self): def shapes_empty_string(self):
# shapes='' is equivalent to shapes=None # shapes='' is equivalent to shapes=None
class _dist_gen(stats.rv_continuous): class _dist_gen(stats.rv_continuous):
def _pdf(self, x): def _pdf(self, x):
return stats.norm.pdf(x) return stats.norm.pdf(x)
@ -1889,7 +2041,6 @@ class TestSubclassingNoShapes(TestCase):
def test_defaults_raise(self): def test_defaults_raise(self):
# default arguments should raise # default arguments should raise
class _dist_gen(stats.rv_continuous): class _dist_gen(stats.rv_continuous):
def _pdf(self, x, a=42): def _pdf(self, x, a=42):
return 42 return 42
assert_raises(TypeError, _dist_gen, **dict(name='dummy')) assert_raises(TypeError, _dist_gen, **dict(name='dummy'))
@ -1897,7 +2048,6 @@ class TestSubclassingNoShapes(TestCase):
def test_starargs_raise(self): def test_starargs_raise(self):
# without explicit shapes, *args are not allowed # without explicit shapes, *args are not allowed
class _dist_gen(stats.rv_continuous): class _dist_gen(stats.rv_continuous):
def _pdf(self, x, a, *args): def _pdf(self, x, a, *args):
return 42 return 42
assert_raises(TypeError, _dist_gen, **dict(name='dummy')) assert_raises(TypeError, _dist_gen, **dict(name='dummy'))
@ -1905,7 +2055,6 @@ class TestSubclassingNoShapes(TestCase):
def test_kwargs_raise(self): def test_kwargs_raise(self):
# without explicit shapes, **kwargs are not allowed # without explicit shapes, **kwargs are not allowed
class _dist_gen(stats.rv_continuous): class _dist_gen(stats.rv_continuous):
def _pdf(self, x, a, **kwargs): def _pdf(self, x, a, **kwargs):
return 42 return 42
assert_raises(TypeError, _dist_gen, **dict(name='dummy')) assert_raises(TypeError, _dist_gen, **dict(name='dummy'))
@ -1927,5 +2076,4 @@ def test_infinite_input():
if __name__ == "__main__": if __name__ == "__main__":
#unittest.main()
run_module_suite() run_module_suite()

@ -33,6 +33,7 @@ failing_fits = [
'tukeylambda', 'tukeylambda',
'vonmises', 'vonmises',
'wrapcauchy', 'wrapcauchy',
'levy_stable'
] ]
# Don't run the fit test on these: # Don't run the fit test on these:
@ -45,7 +46,7 @@ skip_fit = [
def test_cont_fit(): def test_cont_fit():
# this tests the closeness of the estimated parameters to the true # this tests the closeness of the estimated parameters to the true
# parameters with fit method of continuous distributions # parameters with fit method of continuous distributions
# Note: slow, some distributions don't converge with sample size <= 10000 # Note: is slow, some distributions don't converge with sample size <= 10000
for distname, arg in distcont: for distname, arg in distcont:
if distname not in skip_fit: if distname not in skip_fit:
@ -53,6 +54,7 @@ def test_cont_fit():
def check_cont_fit(distname,arg): def check_cont_fit(distname,arg):
options = dict(method='mps', floc=0.)
if distname in failing_fits: if distname in failing_fits:
# Skip failing fits unless overridden # Skip failing fits unless overridden
xfail = True xfail = True
@ -62,15 +64,17 @@ def check_cont_fit(distname, arg):
pass pass
if xfail: if xfail:
msg = "Fitting %s doesn't work reliably yet" % distname msg = "Fitting %s doesn't work reliably yet" % distname
msg += " [Set environment variable SCIPY_XFAIL=1 to run this " + \ msg += " [Set environment variable SCIPY_XFAIL=1 to run this test nevertheless.]"
"test nevertheless.]" #dec.knownfailureif(True, msg)(lambda: None)()
dec.knownfailureif(True, msg)(lambda: None)() options['floc']=0.
options['fscale']=1.
# print('Testing %s' % distname)
distfn = getattr(stats, distname) distfn = getattr(stats, distname)
truearg = np.hstack([arg,[0.0,1.0]]) truearg = np.hstack([arg,[0.0,1.0]])
diffthreshold = np.max(np.vstack([ diffthreshold = np.max(np.vstack([truearg*thresh_percent,
truearg * thresh_percent,
np.ones(distfn.numargs+2)*thresh_min]),0) np.ones(distfn.numargs+2)*thresh_min]),0)
for fit_size in fit_sizes: for fit_size in fit_sizes:
@ -80,15 +84,16 @@ def check_cont_fit(distname, arg):
with np.errstate(all='ignore'): with np.errstate(all='ignore'):
rvs = distfn.rvs(size=fit_size, *arg) rvs = distfn.rvs(size=fit_size, *arg)
# phat = distfn.fit2(rvs) # phat = distfn.fit2(rvs)
phat = distfn.fit2(rvs, method='mps')
phat = distfn.fit2(rvs, **options)
est = phat.par est = phat.par
#est = distfn.fit(rvs) # start with default values #est = distfn.fit(rvs) # start with default values
diff = est - truearg diff = est - truearg
# threshold for location # threshold for location
diffthreshold[-2] = np.max([np.abs(rvs.mean()) * thresh_percent, diffthreshold[-2] = np.max([np.abs(rvs.mean())*thresh_percent,thresh_min])
thresh_min])
if np.any(np.isnan(est)): if np.any(np.isnan(est)):
raise AssertionError('nan returned in fit') raise AssertionError('nan returned in fit')

@ -180,5 +180,23 @@ def test_kde_integer_input():
assert_array_almost_equal(kde(x1), y_expected, decimal=6) assert_array_almost_equal(kde(x1), y_expected, decimal=6)
def test_pdf_logpdf():
np.random.seed(1)
n_basesample = 50
xn = np.random.randn(n_basesample)
# Default
gkde = stats.gaussian_kde(xn)
xs = np.linspace(-15, 12, 25)
pdf = gkde.evaluate(xs)
pdf2 = gkde.pdf(xs)
assert_almost_equal(pdf, pdf2, decimal=12)
logpdf = np.log(pdf)
logpdf2 = gkde.logpdf(xs)
assert_almost_equal(logpdf, logpdf2, decimal=12)
if __name__ == "__main__": if __name__ == "__main__":
run_module_suite() run_module_suite()

@ -9,9 +9,8 @@ import warnings
import numpy as np import numpy as np
from numpy.random import RandomState from numpy.random import RandomState
from numpy.testing import (TestCase, run_module_suite, assert_array_equal, from numpy.testing import (TestCase, run_module_suite, assert_array_equal,
assert_almost_equal, assert_array_less, assert_almost_equal, assert_array_less, assert_array_almost_equal,
assert_array_almost_equal, assert_raises, assert_, assert_raises, assert_, assert_allclose, assert_equal, dec, assert_warns)
assert_allclose, assert_equal, dec)
from wafo import stats from wafo import stats
@ -37,7 +36,6 @@ g10 = [0.991, 0.995, 0.984, 0.994, 0.997, 0.997, 0.991, 0.998, 1.004, 0.997]
class TestShapiro(TestCase): class TestShapiro(TestCase):
def test_basic(self): def test_basic(self):
x1 = [0.11,7.87,4.61,10.14,7.95,3.14,0.46, x1 = [0.11,7.87,4.61,10.14,7.95,3.14,0.46,
4.43,0.21,4.75,0.71,1.52,3.24, 4.43,0.21,4.75,0.71,1.52,3.24,
@ -59,25 +57,24 @@ class TestShapiro(TestCase):
class TestAnderson(TestCase): class TestAnderson(TestCase):
def test_normal(self): def test_normal(self):
rs = RandomState(1234567890) rs = RandomState(1234567890)
x1 = rs.standard_exponential(size=50) x1 = rs.standard_exponential(size=50)
x2 = rs.standard_normal(size=50) x2 = rs.standard_normal(size=50)
A, crit, _sig = stats.anderson(x1) A,crit,sig = stats.anderson(x1)
assert_array_less(crit[:-1], A) assert_array_less(crit[:-1], A)
A, crit, _sig = stats.anderson(x2) A,crit,sig = stats.anderson(x2)
assert_array_less(A, crit[-2:]) assert_array_less(A, crit[-2:])
def test_expon(self): def test_expon(self):
rs = RandomState(1234567890) rs = RandomState(1234567890)
x1 = rs.standard_exponential(size=50) x1 = rs.standard_exponential(size=50)
x2 = rs.standard_normal(size=50) x2 = rs.standard_normal(size=50)
A, crit, _sig = stats.anderson(x1, 'expon') A,crit,sig = stats.anderson(x1,'expon')
assert_array_less(A, crit[-2:]) assert_array_less(A, crit[-2:])
olderr = np.seterr(all='ignore') olderr = np.seterr(all='ignore')
try: try:
A, crit, _sig = stats.anderson(x2, 'expon') A,crit,sig = stats.anderson(x2,'expon')
finally: finally:
np.seterr(**olderr) np.seterr(**olderr)
assert_(A > crit[-1]) assert_(A > crit[-1])
@ -86,6 +83,123 @@ class TestAnderson(TestCase):
assert_raises(ValueError, stats.anderson, [1], dist='plate_of_shrimp') assert_raises(ValueError, stats.anderson, [1], dist='plate_of_shrimp')
class TestAndersonKSamp(TestCase):
def test_example1a(self):
# Example data from Scholz & Stephens (1987), originally
# published in Lehmann (1995, Nonparametrics, Statistical
# Methods Based on Ranks, p. 309)
# Pass a mixture of lists and arrays
t1 = [38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0]
t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0])
t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8])
assert_warns(UserWarning, stats.anderson_ksamp, (t1, t2, t3, t4),
midrank=False)
with warnings.catch_warnings():
warnings.filterwarnings('ignore', message='approximate p-value')
Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4), midrank=False)
assert_almost_equal(Tk, 4.449, 3)
assert_array_almost_equal([0.4985, 1.3237, 1.9158, 2.4930, 3.2459],
tm, 4)
assert_almost_equal(p, 0.0021, 4)
def test_example1b(self):
# Example data from Scholz & Stephens (1987), originally
# published in Lehmann (1995, Nonparametrics, Statistical
# Methods Based on Ranks, p. 309)
# Pass arrays
t1 = np.array([38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0])
t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0])
t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8])
with warnings.catch_warnings():
warnings.filterwarnings('ignore', message='approximate p-value')
Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4), midrank=True)
assert_almost_equal(Tk, 4.480, 3)
assert_array_almost_equal([0.4985, 1.3237, 1.9158, 2.4930, 3.2459],
tm, 4)
assert_almost_equal(p, 0.0020, 4)
def test_example2a(self):
# Example data taken from an earlier technical report of
# Scholz and Stephens
# Pass lists instead of arrays
t1 = [194, 15, 41, 29, 33, 181]
t2 = [413, 14, 58, 37, 100, 65, 9, 169, 447, 184, 36, 201, 118]
t3 = [34, 31, 18, 18, 67, 57, 62, 7, 22, 34]
t4 = [90, 10, 60, 186, 61, 49, 14, 24, 56, 20, 79, 84, 44, 59, 29,
118, 25, 156, 310, 76, 26, 44, 23, 62]
t5 = [130, 208, 70, 101, 208]
t6 = [74, 57, 48, 29, 502, 12, 70, 21, 29, 386, 59, 27]
t7 = [55, 320, 56, 104, 220, 239, 47, 246, 176, 182, 33]
t8 = [23, 261, 87, 7, 120, 14, 62, 47, 225, 71, 246, 21, 42, 20, 5,
12, 120, 11, 3, 14, 71, 11, 14, 11, 16, 90, 1, 16, 52, 95]
t9 = [97, 51, 11, 4, 141, 18, 142, 68, 77, 80, 1, 16, 106, 206, 82,
54, 31, 216, 46, 111, 39, 63, 18, 191, 18, 163, 24]
t10 = [50, 44, 102, 72, 22, 39, 3, 15, 197, 188, 79, 88, 46, 5, 5, 36,
22, 139, 210, 97, 30, 23, 13, 14]
t11 = [359, 9, 12, 270, 603, 3, 104, 2, 438]
t12 = [50, 254, 5, 283, 35, 12]
t13 = [487, 18, 100, 7, 98, 5, 85, 91, 43, 230, 3, 130]
t14 = [102, 209, 14, 57, 54, 32, 67, 59, 134, 152, 27, 14, 230, 66,
61, 34]
with warnings.catch_warnings():
warnings.filterwarnings('ignore', message='approximate p-value')
Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4, t5, t6, t7, t8,
t9, t10, t11, t12, t13, t14),
midrank=False)
assert_almost_equal(Tk, 3.288, 3)
assert_array_almost_equal([0.5990, 1.3269, 1.8052, 2.2486, 2.8009],
tm, 4)
assert_almost_equal(p, 0.0041, 4)
def test_example2b(self):
# Example data taken from an earlier technical report of
# Scholz and Stephens
t1 = [194, 15, 41, 29, 33, 181]
t2 = [413, 14, 58, 37, 100, 65, 9, 169, 447, 184, 36, 201, 118]
t3 = [34, 31, 18, 18, 67, 57, 62, 7, 22, 34]
t4 = [90, 10, 60, 186, 61, 49, 14, 24, 56, 20, 79, 84, 44, 59, 29,
118, 25, 156, 310, 76, 26, 44, 23, 62]
t5 = [130, 208, 70, 101, 208]
t6 = [74, 57, 48, 29, 502, 12, 70, 21, 29, 386, 59, 27]
t7 = [55, 320, 56, 104, 220, 239, 47, 246, 176, 182, 33]
t8 = [23, 261, 87, 7, 120, 14, 62, 47, 225, 71, 246, 21, 42, 20, 5,
12, 120, 11, 3, 14, 71, 11, 14, 11, 16, 90, 1, 16, 52, 95]
t9 = [97, 51, 11, 4, 141, 18, 142, 68, 77, 80, 1, 16, 106, 206, 82,
54, 31, 216, 46, 111, 39, 63, 18, 191, 18, 163, 24]
t10 = [50, 44, 102, 72, 22, 39, 3, 15, 197, 188, 79, 88, 46, 5, 5, 36,
22, 139, 210, 97, 30, 23, 13, 14]
t11 = [359, 9, 12, 270, 603, 3, 104, 2, 438]
t12 = [50, 254, 5, 283, 35, 12]
t13 = [487, 18, 100, 7, 98, 5, 85, 91, 43, 230, 3, 130]
t14 = [102, 209, 14, 57, 54, 32, 67, 59, 134, 152, 27, 14, 230, 66,
61, 34]
with warnings.catch_warnings():
warnings.filterwarnings('ignore', message='approximate p-value')
Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4, t5, t6, t7, t8,
t9, t10, t11, t12, t13, t14),
midrank=True)
assert_almost_equal(Tk, 3.294, 3)
assert_array_almost_equal([0.5990, 1.3269, 1.8052, 2.2486, 2.8009],
tm, 4)
assert_almost_equal(p, 0.0041, 4)
def test_not_enough_samples(self):
assert_raises(ValueError, stats.anderson_ksamp, np.ones(5))
def test_no_distinct_observations(self):
assert_raises(ValueError, stats.anderson_ksamp,
(np.ones(5), np.ones(5)))
def test_empty_sample(self):
assert_raises(ValueError, stats.anderson_ksamp, (np.ones(5), []))
class TestAnsari(TestCase): class TestAnsari(TestCase):
def test_small(self): def test_small(self):
@ -98,13 +212,12 @@ class TestAnsari(TestCase):
def test_approx(self): def test_approx(self):
ramsay = np.array((111, 107, 100, 99, 102, 106, 109, 108, 104, 99, ramsay = np.array((111, 107, 100, 99, 102, 106, 109, 108, 104, 99,
101, 96, 97, 102, 107, 113, 116, 113, 110, 98)) 101, 96, 97, 102, 107, 113, 116, 113, 110, 98))
parekh = np.array((107, 108, 106, 98, 105, 103, 110, 105, 104, 100, parekh = np.array((107, 108, 106, 98, 105, 103, 110, 105, 104,
96, 108, 103, 104, 114, 114, 113, 108, 106, 99)) 100, 96, 108, 103, 104, 114, 114, 113, 108, 106, 99))
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.filterwarnings('ignore', warnings.filterwarnings('ignore',
message="Ties preclude use of exact " + message="Ties preclude use of exact statistic.")
"statistic.")
W, pval = stats.ansari(ramsay, parekh) W, pval = stats.ansari(ramsay, parekh)
assert_almost_equal(W,185.5,11) assert_almost_equal(W,185.5,11)
@ -145,8 +258,7 @@ class TestLevene(TestCase):
# Test that center='trimmed' gives the same result as center='mean' # Test that center='trimmed' gives the same result as center='mean'
# when proportiontocut=0. # when proportiontocut=0.
W1, pval1 = stats.levene(g1, g2, g3, center='mean') W1, pval1 = stats.levene(g1, g2, g3, center='mean')
W2, pval2 = stats.levene( W2, pval2 = stats.levene(g1, g2, g3, center='trimmed', proportiontocut=0.0)
g1, g2, g3, center='trimmed', proportiontocut=0.0)
assert_almost_equal(W1, W2) assert_almost_equal(W1, W2)
assert_almost_equal(pval1, pval2) assert_almost_equal(pval1, pval2)
@ -157,10 +269,8 @@ class TestLevene(TestCase):
x2 = np.random.permutation(x) x2 = np.random.permutation(x)
# Use center='trimmed' # Use center='trimmed'
W0, _pval0 = stats.levene(x, y, center='trimmed', W0, pval0 = stats.levene(x, y, center='trimmed', proportiontocut=0.125)
proportiontocut=0.125) W1, pval1 = stats.levene(x2, y, center='trimmed', proportiontocut=0.125)
W1, pval1 = stats.levene(
x2, y, center='trimmed', proportiontocut=0.125)
# Trim the data here, and use center='mean' # Trim the data here, and use center='mean'
W2, pval2 = stats.levene(x[1:-1], y[1:-1], center='mean') W2, pval2 = stats.levene(x[1:-1], y[1:-1], center='mean')
# Result should be the same. # Result should be the same.
@ -237,15 +347,13 @@ class TestFligner(TestCase):
# numbers from R: fligner.test in package stats # numbers from R: fligner.test in package stats
x1 = np.arange(5) x1 = np.arange(5)
assert_array_almost_equal(stats.fligner(x1,x1**2), assert_array_almost_equal(stats.fligner(x1,x1**2),
(3.2282229927203536, 0.072379187848207877), (3.2282229927203536, 0.072379187848207877), 11)
11)
def test_trimmed1(self): def test_trimmed1(self):
# Test that center='trimmed' gives the same result as center='mean' # Test that center='trimmed' gives the same result as center='mean'
# when proportiontocut=0. # when proportiontocut=0.
Xsq1, pval1 = stats.fligner(g1, g2, g3, center='mean') Xsq1, pval1 = stats.fligner(g1, g2, g3, center='mean')
Xsq2, pval2 = stats.fligner( Xsq2, pval2 = stats.fligner(g1, g2, g3, center='trimmed', proportiontocut=0.0)
g1, g2, g3, center='trimmed', proportiontocut=0.0)
assert_almost_equal(Xsq1, Xsq2) assert_almost_equal(Xsq1, Xsq2)
assert_almost_equal(pval1, pval2) assert_almost_equal(pval1, pval2)
@ -253,8 +361,7 @@ class TestFligner(TestCase):
x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0] x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0]
y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0] y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0]
# Use center='trimmed' # Use center='trimmed'
Xsq1, pval1 = stats.fligner( Xsq1, pval1 = stats.fligner(x, y, center='trimmed', proportiontocut=0.125)
x, y, center='trimmed', proportiontocut=0.125)
# Trim the data here, and use center='mean' # Trim the data here, and use center='mean'
Xsq2, pval2 = stats.fligner(x[1:-1], y[1:-1], center='mean') Xsq2, pval2 = stats.fligner(x[1:-1], y[1:-1], center='mean')
# Result should be the same. # Result should be the same.
@ -289,13 +396,11 @@ class TestFligner(TestCase):
class TestMood(TestCase): class TestMood(TestCase):
def test_mood(self): def test_mood(self):
# numbers from R: mood.test in package stats # numbers from R: mood.test in package stats
x1 = np.arange(5) x1 = np.arange(5)
assert_array_almost_equal(stats.mood(x1, x1**2), assert_array_almost_equal(stats.mood(x1, x1**2),
(-1.3830857299399906, 0.16663858066771478), (-1.3830857299399906, 0.16663858066771478), 11)
11)
def test_mood_order_of_args(self): def test_mood_order_of_args(self):
# z should change sign when the order of arguments changes, pvalue # z should change sign when the order of arguments changes, pvalue
@ -387,8 +492,7 @@ class TestMood(TestCase):
stats.mood(slice1, slice2)) stats.mood(slice1, slice2))
def test_mood_bad_arg(self): def test_mood_bad_arg(self):
# Raise ValueError when the sum of the lengths of the args is less than # Raise ValueError when the sum of the lengths of the args is less than 3
# 3
assert_raises(ValueError, stats.mood, [1], []) assert_raises(ValueError, stats.mood, [1], [])
@ -406,7 +510,7 @@ class TestProbplot(TestCase):
assert_allclose(osr, np.sort(x)) assert_allclose(osr, np.sort(x))
assert_allclose(osm, osm_expected) assert_allclose(osm, osm_expected)
_res, res_fit = stats.probplot(x, fit=True) res, res_fit = stats.probplot(x, fit=True)
res_fit_expected = [1.05361841, 0.31297795, 0.98741609] res_fit_expected = [1.05361841, 0.31297795, 0.98741609]
assert_allclose(res_fit, res_fit_expected) assert_allclose(res_fit, res_fit_expected)
@ -423,7 +527,7 @@ class TestProbplot(TestCase):
assert_allclose(osr1, osr2) assert_allclose(osr1, osr2)
assert_allclose(osr1, osr3) assert_allclose(osr1, osr3)
# Check giving (loc, scale) params for normal distribution # Check giving (loc, scale) params for normal distribution
_osm, _osr = stats.probplot(x, sparams=(), fit=False) osm, osr = stats.probplot(x, sparams=(), fit=False)
def test_dist_keyword(self): def test_dist_keyword(self):
np.random.seed(12345) np.random.seed(12345)
@ -437,9 +541,7 @@ class TestProbplot(TestCase):
assert_raises(AttributeError, stats.probplot, x, dist=[]) assert_raises(AttributeError, stats.probplot, x, dist=[])
class custom_dist(object): class custom_dist(object):
"""Some class that looks just enough like a distribution.""" """Some class that looks just enough like a distribution."""
def ppf(self, q): def ppf(self, q):
return stats.norm.ppf(q, loc=2) return stats.norm.ppf(q, loc=2)
@ -570,7 +672,7 @@ class TestBoxcox(TestCase):
lmbda = 2.5 lmbda = 2.5
x = stats.norm.rvs(loc=10, size=50000) x = stats.norm.rvs(loc=10, size=50000)
x_inv = (x * lmbda + 1)**(-lmbda) x_inv = (x * lmbda + 1)**(-lmbda)
_xt, maxlog = stats.boxcox(x_inv) xt, maxlog = stats.boxcox(x_inv)
assert_almost_equal(maxlog, -1 / lmbda, decimal=2) assert_almost_equal(maxlog, -1 / lmbda, decimal=2)
@ -601,18 +703,17 @@ class TestBoxcox(TestCase):
class TestBoxcoxNormmax(TestCase): class TestBoxcoxNormmax(TestCase):
def setUp(self): def setUp(self):
np.random.seed(12345) np.random.seed(12345)
self.x = stats.loggamma.rvs(5, size=50) + 5 self.x = stats.loggamma.rvs(5, size=50) + 5
def test_pearsonr(self): def test_pearsonr(self):
maxlog = stats.boxcox_normmax(self.x) maxlog = stats.boxcox_normmax(self.x)
assert_allclose(maxlog, 1.804465325046) assert_allclose(maxlog, 1.804465, rtol=1e-6)
def test_mle(self): def test_mle(self):
maxlog = stats.boxcox_normmax(self.x, method='mle') maxlog = stats.boxcox_normmax(self.x, method='mle')
assert_allclose(maxlog, 1.758101454114) assert_allclose(maxlog, 1.758101, rtol=1e-6)
# Check that boxcox() uses 'mle' # Check that boxcox() uses 'mle'
_, maxlog_boxcox = stats.boxcox(self.x) _, maxlog_boxcox = stats.boxcox(self.x)
@ -620,11 +721,10 @@ class TestBoxcoxNormmax(TestCase):
def test_all(self): def test_all(self):
maxlog_all = stats.boxcox_normmax(self.x, method='all') maxlog_all = stats.boxcox_normmax(self.x, method='all')
assert_allclose(maxlog_all, [1.804465325046, 1.758101454114]) assert_allclose(maxlog_all, [1.804465, 1.758101], rtol=1e-6)
class TestBoxcoxNormplot(TestCase): class TestBoxcoxNormplot(TestCase):
def setUp(self): def setUp(self):
np.random.seed(7654321) np.random.seed(7654321)
self.x = stats.loggamma.rvs(5, size=500) + 5 self.x = stats.loggamma.rvs(5, size=500) + 5
@ -662,7 +762,6 @@ class TestBoxcoxNormplot(TestCase):
class TestCircFuncs(TestCase): class TestCircFuncs(TestCase):
def test_circfuncs(self): def test_circfuncs(self):
x = np.array([355,5,2,359,10,350]) x = np.array([355,5,2,359,10,350])
M = stats.circmean(x, high=360) M = stats.circmean(x, high=360)
@ -803,5 +902,108 @@ def test_wilcoxon_tie():
assert_allclose(p, expected_p, rtol=1e-6) assert_allclose(p, expected_p, rtol=1e-6)
class TestMedianTest(TestCase):
def test_bad_n_samples(self):
# median_test requires at least two samples.
assert_raises(ValueError, stats.median_test, [1, 2, 3])
def test_empty_sample(self):
# Each sample must contain at least one value.
assert_raises(ValueError, stats.median_test, [], [1, 2, 3])
def test_empty_when_ties_ignored(self):
# The grand median is 1, and all values in the first argument are
# equal to the grand median. With ties="ignore", those values are
# ignored, which results in the first sample being (in effect) empty.
# This should raise a ValueError.
assert_raises(ValueError, stats.median_test,
[1, 1, 1, 1], [2, 0, 1], [2, 0], ties="ignore")
def test_empty_contingency_row(self):
# The grand median is 1, and with the default ties="below", all the
# values in the samples are counted as being below the grand median.
# This would result a row of zeros in the contingency table, which is
# an error.
assert_raises(ValueError, stats.median_test, [1, 1, 1], [1, 1, 1])
# With ties="above", all the values are counted as above the
# grand median.
assert_raises(ValueError, stats.median_test, [1, 1, 1], [1, 1, 1],
ties="above")
def test_bad_ties(self):
assert_raises(ValueError, stats.median_test, [1, 2, 3], [4, 5], ties="foo")
def test_bad_keyword(self):
assert_raises(TypeError, stats.median_test, [1, 2, 3], [4, 5], foo="foo")
def test_simple(self):
x = [1, 2, 3]
y = [1, 2, 3]
stat, p, med, tbl = stats.median_test(x, y)
# The median is floating point, but this equality test should be safe.
assert_equal(med, 2.0)
assert_array_equal(tbl, [[1, 1], [2, 2]])
# The expected values of the contingency table equal the contingency table,
# so the statistic should be 0 and the p-value should be 1.
assert_equal(stat, 0)
assert_equal(p, 1)
def test_ties_options(self):
# Test the contingency table calculation.
x = [1, 2, 3, 4]
y = [5, 6]
z = [7, 8, 9]
# grand median is 5.
# Default 'ties' option is "below".
stat, p, m, tbl = stats.median_test(x, y, z)
assert_equal(m, 5)
assert_equal(tbl, [[0, 1, 3], [4, 1, 0]])
stat, p, m, tbl = stats.median_test(x, y, z, ties="ignore")
assert_equal(m, 5)
assert_equal(tbl, [[0, 1, 3], [4, 0, 0]])
stat, p, m, tbl = stats.median_test(x, y, z, ties="above")
assert_equal(m, 5)
assert_equal(tbl, [[0, 2, 3], [4, 0, 0]])
def test_basic(self):
# median_test calls chi2_contingency to compute the test statistic
# and p-value. Make sure it hasn't screwed up the call...
x = [1, 2, 3, 4, 5]
y = [2, 4, 6, 8]
stat, p, m, tbl = stats.median_test(x, y)
assert_equal(m, 4)
assert_equal(tbl, [[1, 2], [4, 2]])
exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl)
assert_allclose(stat, exp_stat)
assert_allclose(p, exp_p)
stat, p, m, tbl = stats.median_test(x, y, lambda_=0)
assert_equal(m, 4)
assert_equal(tbl, [[1, 2], [4, 2]])
exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl, lambda_=0)
assert_allclose(stat, exp_stat)
assert_allclose(p, exp_p)
stat, p, m, tbl = stats.median_test(x, y, correction=False)
assert_equal(m, 4)
assert_equal(tbl, [[1, 2], [4, 2]])
exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl, correction=False)
assert_allclose(stat, exp_stat)
assert_allclose(p, exp_p)
if __name__ == "__main__": if __name__ == "__main__":
run_module_suite() run_module_suite()

@ -1,5 +1,5 @@
""" """
Tests for the stats.mstats module (support for maskd arrays) Tests for the stats.mstats module (support for masked arrays)
""" """
from __future__ import division, print_function, absolute_import from __future__ import division, print_function, absolute_import
@ -13,14 +13,13 @@ from numpy.ma import masked, nomask
import wafo.stats.mstats as mstats import wafo.stats.mstats as mstats
from wafo import stats from wafo import stats
from numpy.testing import TestCase, run_module_suite from numpy.testing import TestCase, run_module_suite
from numpy.testing.decorators import skipif
from numpy.ma.testutils import (assert_equal, assert_almost_equal, from numpy.ma.testutils import (assert_equal, assert_almost_equal,
assert_array_almost_equal, assert_array_almost_equal, assert_array_almost_equal_nulp, assert_,
assert_array_almost_equal_nulp, assert_,
assert_allclose, assert_raises) assert_allclose, assert_raises)
class TestMquantiles(TestCase): class TestMquantiles(TestCase):
def test_mquantiles_limit_keyword(self): def test_mquantiles_limit_keyword(self):
# Regression test for Trac ticket #867 # Regression test for Trac ticket #867
data = np.array([[6., 7., 1.], data = np.array([[6., 7., 1.],
@ -42,7 +41,6 @@ class TestMquantiles(TestCase):
class TestGMean(TestCase): class TestGMean(TestCase):
def test_1D(self): def test_1D(self):
a = (1,2,3,4) a = (1,2,3,4)
actual = mstats.gmean(a) actual = mstats.gmean(a)
@ -61,6 +59,14 @@ class TestGMean(TestCase):
desired1 = mstats.gmean(a,axis=-1) desired1 = mstats.gmean(a,axis=-1)
assert_almost_equal(actual, desired1, decimal=14) assert_almost_equal(actual, desired1, decimal=14)
@skipif(not hasattr(np, 'float96'), 'cannot find float96 so skipping')
def test_1D_float96(self):
a = ma.array((1,2,3,4), mask=(0,0,0,1))
actual_dt = mstats.gmean(a, dtype=np.float96)
desired_dt = np.power(1 * 2 * 3, 1. / 3.).astype(np.float96)
assert_almost_equal(actual_dt, desired_dt, decimal=14)
assert_(actual_dt.dtype == desired_dt.dtype)
def test_2D(self): def test_2D(self):
a = ma.array(((1, 2, 3, 4), (1, 2, 3, 4), (1, 2, 3, 4)), a = ma.array(((1, 2, 3, 4), (1, 2, 3, 4), (1, 2, 3, 4)),
mask=((0, 0, 0, 0), (1, 0, 0, 1), (0, 1, 1, 0))) mask=((0, 0, 0, 0), (1, 0, 0, 1), (0, 1, 1, 0)))
@ -79,7 +85,6 @@ class TestGMean(TestCase):
class TestHMean(TestCase): class TestHMean(TestCase):
def test_1D(self): def test_1D(self):
a = (1,2,3,4) a = (1,2,3,4)
actual = mstats.hmean(a) actual = mstats.hmean(a)
@ -95,6 +100,15 @@ class TestHMean(TestCase):
desired1 = mstats.hmean(a,axis=-1) desired1 = mstats.hmean(a,axis=-1)
assert_almost_equal(actual, desired1, decimal=14) assert_almost_equal(actual, desired1, decimal=14)
@skipif(not hasattr(np, 'float96'), 'cannot find float96 so skipping')
def test_1D_float96(self):
a = ma.array((1,2,3,4), mask=(0,0,0,1))
actual_dt = mstats.hmean(a, dtype=np.float96)
desired_dt = np.asarray(3. / (1./1 + 1./2 + 1./3),
dtype=np.float96)
assert_almost_equal(actual_dt, desired_dt, decimal=14)
assert_(actual_dt.dtype == desired_dt.dtype)
def test_2D(self): def test_2D(self):
a = ma.array(((1,2,3,4),(1,2,3,4),(1,2,3,4)), a = ma.array(((1,2,3,4),(1,2,3,4),(1,2,3,4)),
mask=((0,0,0,0),(1,0,0,1),(0,1,1,0))) mask=((0,0,0,0),(1,0,0,1),(0,1,1,0)))
@ -111,33 +125,31 @@ class TestHMean(TestCase):
class TestRanking(TestCase): class TestRanking(TestCase):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
TestCase.__init__(self, *args, **kwargs) TestCase.__init__(self, *args, **kwargs)
def test_ranking(self): def test_ranking(self):
x = ma.array([0,1,1,1,2,3,4,5,5,6,]) x = ma.array([0,1,1,1,2,3,4,5,5,6,])
assert_almost_equal( assert_almost_equal(mstats.rankdata(x),
mstats.rankdata(x), [1, 3, 3, 3, 5, 6, 7, 8.5, 8.5, 10]) [1,3,3,3,5,6,7,8.5,8.5,10])
x[[3,4]] = masked x[[3,4]] = masked
assert_almost_equal( assert_almost_equal(mstats.rankdata(x),
mstats.rankdata(x), [1, 2.5, 2.5, 0, 0, 4, 5, 6.5, 6.5, 8]) [1,2.5,2.5,0,0,4,5,6.5,6.5,8])
assert_almost_equal(mstats.rankdata(x, use_missing=True), assert_almost_equal(mstats.rankdata(x, use_missing=True),
[1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8]) [1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8])
x = ma.array([0,1,5,1,2,4,3,5,1,6,]) x = ma.array([0,1,5,1,2,4,3,5,1,6,])
assert_almost_equal( assert_almost_equal(mstats.rankdata(x),
mstats.rankdata(x), [1, 3, 8.5, 3, 5, 7, 6, 8.5, 3, 10]) [1,3,8.5,3,5,7,6,8.5,3,10])
x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]]) x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]])
assert_almost_equal( assert_almost_equal(mstats.rankdata(x),
mstats.rankdata(x), [[1, 3, 3, 3, 5], [6, 7, 8.5, 8.5, 10]]) [[1,3,3,3,5], [6,7,8.5,8.5,10]])
assert_almost_equal( assert_almost_equal(mstats.rankdata(x, axis=1),
mstats.rankdata(x, axis=1), [[1, 3, 3, 3, 5], [1, 2, 3.5, 3.5, 5]]) [[1,3,3,3,5], [1,2,3.5,3.5,5]])
assert_almost_equal( assert_almost_equal(mstats.rankdata(x,axis=0),
mstats.rankdata(x, axis=0), [[1, 1, 1, 1, 1], [2, 2, 2, 2, 2, ]]) [[1,1,1,1,1], [2,2,2,2,2,]])
class TestCorr(TestCase): class TestCorr(TestCase):
def test_pearsonr(self): def test_pearsonr(self):
# Tests some computations of Pearson's r # Tests some computations of Pearson's r
x = ma.arange(10) x = ma.arange(10)
@ -173,8 +185,7 @@ class TestCorr(TestCase):
# Tests some computations of Spearman's rho # Tests some computations of Spearman's rho
(x, y) = ([5.05,6.75,3.21,2.66],[1.65,2.64,2.64,6.95]) (x, y) = ([5.05,6.75,3.21,2.66],[1.65,2.64,2.64,6.95])
assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555) assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
(x, y) = ([5.05, 6.75, 3.21, 2.66, np.nan], (x, y) = ([5.05,6.75,3.21,2.66,np.nan],[1.65,2.64,2.64,6.95,np.nan])
[1.65, 2.64, 2.64, 6.95, np.nan])
(x, y) = (ma.fix_invalid(x), ma.fix_invalid(y)) (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555) assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
@ -220,8 +231,8 @@ class TestCorr(TestCase):
[0.18,0.53,0.20,0.04]) [0.18,0.53,0.20,0.04])
def test_pointbiserial(self): def test_pointbiserial(self):
x = [1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, x = [1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, -1] 0,0,0,0,1,-1]
y = [14.8,13.8,12.4,10.1,7.1,6.1,5.8,4.6,4.3,3.5,3.3,3.2,3.0, y = [14.8,13.8,12.4,10.1,7.1,6.1,5.8,4.6,4.3,3.5,3.3,3.2,3.0,
2.8,2.8,2.5,2.4,2.3,2.1,1.7,1.7,1.5,1.3,1.3,1.2,1.2,1.1, 2.8,2.8,2.5,2.4,2.3,2.1,1.7,1.7,1.5,1.3,1.3,1.2,1.2,1.1,
0.8,0.7,0.6,0.5,0.2,0.2,0.1,np.nan] 0.8,0.7,0.6,0.5,0.2,0.2,0.1,np.nan]
@ -234,8 +245,7 @@ class TestTrimming(TestCase):
a = ma.arange(10) a = ma.arange(10)
assert_equal(mstats.trim(a), [0,1,2,3,4,5,6,7,8,9]) assert_equal(mstats.trim(a), [0,1,2,3,4,5,6,7,8,9])
a = ma.arange(10) a = ma.arange(10)
assert_equal( assert_equal(mstats.trim(a,(2,8)), [None,None,2,3,4,5,6,7,8,None])
mstats.trim(a, (2, 8)), [None, None, 2, 3, 4, 5, 6, 7, 8, None])
a = ma.arange(10) a = ma.arange(10)
assert_equal(mstats.trim(a,limits=(2,8),inclusive=(False,False)), assert_equal(mstats.trim(a,limits=(2,8),inclusive=(False,False)),
[None,None,None,3,4,5,6,7,None,None]) [None,None,None,3,4,5,6,7,None,None])
@ -249,21 +259,24 @@ class TestTrimming(TestCase):
[None, None, 2, 3, 4, None, 6, 7, 8, None, None, None]) [None, None, 2, 3, 4, None, 6, 7, 8, None, None, None])
x = ma.arange(100).reshape(10, 10) x = ma.arange(100).reshape(10, 10)
expected = [1]*10 + [0]*70 + [1]*20
trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=None) trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=None)
assert_equal(trimx._mask.ravel(), [1] * 10 + [0] * 70 + [1] * 20) assert_equal(trimx._mask.ravel(), expected)
trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=0) trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=0)
assert_equal(trimx._mask.ravel(), [1] * 10 + [0] * 70 + [1] * 20) assert_equal(trimx._mask.ravel(), expected)
trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=-1) trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=-1)
assert_equal(trimx._mask.T.ravel(), [1] * 10 + [0] * 70 + [1] * 20) assert_equal(trimx._mask.T.ravel(), expected)
# same as above, but with an extra masked row inserted
x = ma.arange(110).reshape(11, 10) x = ma.arange(110).reshape(11, 10)
x[1] = masked x[1] = masked
expected = [1]*20 + [0]*70 + [1]*20
trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=None) trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=None)
assert_equal(trimx._mask.ravel(), [1] * 20 + [0] * 70 + [1] * 20) assert_equal(trimx._mask.ravel(), expected)
trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=0) trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=0)
assert_equal(trimx._mask.ravel(), [1] * 20 + [0] * 70 + [1] * 20) assert_equal(trimx._mask.ravel(), expected)
trimx = mstats.trim(x.T, (0.1,0.2), relative=True, axis=-1) trimx = mstats.trim(x.T, (0.1,0.2), relative=True, axis=-1)
assert_equal(trimx.T._mask.ravel(), [1] * 20 + [0] * 70 + [1] * 20) assert_equal(trimx.T._mask.ravel(), expected)
def test_trim_old(self): def test_trim_old(self):
x = ma.arange(100) x = ma.arange(100)
@ -272,8 +285,7 @@ class TestTrimming(TestCase):
x[50:70] = masked x[50:70] = masked
trimx = mstats.trimboth(x) trimx = mstats.trimboth(x)
assert_equal(trimx.count(), 48) assert_equal(trimx.count(), 48)
assert_equal( assert_equal(trimx._mask, [1]*16 + [0]*34 + [1]*20 + [0]*14 + [1]*16)
trimx._mask, [1] * 16 + [0] * 34 + [1] * 20 + [0] * 14 + [1] * 16)
x._mask = nomask x._mask = nomask
x.shape = (10,10) x.shape = (10,10)
assert_equal(mstats.trimboth(x).count(), 60) assert_equal(mstats.trimboth(x).count(), 60)
@ -401,21 +413,15 @@ class TestMoments(TestCase):
assert_equal(mstats.mode(ma2, axis=None), (0,3)) assert_equal(mstats.mode(ma2, axis=None), (0,3))
assert_equal(mstats.mode(a3, axis=None), (1,1)) assert_equal(mstats.mode(a3, axis=None), (1,1))
assert_equal(mstats.mode(ma3, axis=None), (2,1)) assert_equal(mstats.mode(ma3, axis=None), (2,1))
assert_equal( assert_equal(mstats.mode(a2, axis=0), ([[0,0,0,1,1]], [[1,1,1,1,1]]))
mstats.mode(a2, axis=0), ([[0, 0, 0, 1, 1]], [[1, 1, 1, 1, 1]])) assert_equal(mstats.mode(ma2, axis=0), ([[0,0,0,1,1]], [[1,1,1,1,1]]))
assert_equal( assert_equal(mstats.mode(a2, axis=-1), ([[0],[3],[3]], [[3],[3],[1]]))
mstats.mode(ma2, axis=0), ([[0, 0, 0, 1, 1]], [[1, 1, 1, 1, 1]])) assert_equal(mstats.mode(ma2, axis=-1), ([[0],[1],[0]], [[3],[1],[0]]))
assert_equal(
mstats.mode(a2, axis=-1), ([[0], [3], [3]], [[3], [3], [1]]))
assert_equal(
mstats.mode(ma2, axis=-1), ([[0], [1], [0]], [[3], [1], [0]]))
assert_equal(mstats.mode(ma4, axis=0), ([[3,2]], [[1,1]])) assert_equal(mstats.mode(ma4, axis=0), ([[3,2]], [[1,1]]))
assert_equal( assert_equal(mstats.mode(ma4, axis=-1), ([[2],[3],[5]], [[1],[1],[1]]))
mstats.mode(ma4, axis=-1), ([[2], [3], [5]], [[1], [1], [1]]))
class TestPercentile(TestCase): class TestPercentile(TestCase):
def setUp(self): def setUp(self):
self.a1 = [3,4,5,10,-3,-5,6] self.a1 = [3,4,5,10,-3,-5,6]
self.a2 = [3,-6,-2,8,7,4,2,1] self.a2 = [3,-6,-2,8,7,4,2,1]
@ -437,7 +443,6 @@ class TestPercentile(TestCase):
class TestVariability(TestCase): class TestVariability(TestCase):
""" Comparison numbers are found using R v.1.5.1 """ Comparison numbers are found using R v.1.5.1
note that length(testcase) = 4 note that length(testcase) = 4
""" """
@ -530,6 +535,27 @@ def test_regress_simple():
assert_almost_equal(intercept, 10.211269918932341) assert_almost_equal(intercept, 10.211269918932341)
def test_theilslopes():
# Test for basic slope and intercept.
slope, intercept, lower, upper = mstats.theilslopes([0,1,1])
assert_almost_equal(slope, 0.5)
assert_almost_equal(intercept, 0.5)
# Test for correct masking.
y = np.ma.array([0,1,100,1], mask=[False, False, True, False])
slope, intercept, lower, upper = mstats.theilslopes(y)
assert_almost_equal(slope, 1./3)
assert_almost_equal(intercept, 2./3)
# Test of confidence intervals from example in Sen (1968).
x = [1, 2, 3, 4, 10, 12, 18]
y = [9, 15, 19, 20, 45, 55, 78]
slope, intercept, lower, upper = mstats.theilslopes(y, x, 0.07)
assert_almost_equal(slope, 4)
assert_almost_equal(upper, 4.38, decimal=2)
assert_almost_equal(lower, 3.71, decimal=2)
def test_plotting_positions(): def test_plotting_positions():
# Regression test for #1256 # Regression test for #1256
pos = mstats.plotting_positions(np.arange(3), 0, 0) pos = mstats.plotting_positions(np.arange(3), 0, 0)
@ -540,8 +566,10 @@ class TestNormalitytests():
def test_vs_nonmasked(self): def test_vs_nonmasked(self):
x = np.array((-2,-1,0,1,2,3)*4)**2 x = np.array((-2,-1,0,1,2,3)*4)**2
assert_array_almost_equal(mstats.normaltest(x), stats.normaltest(x)) assert_array_almost_equal(mstats.normaltest(x),
assert_array_almost_equal(mstats.skewtest(x), stats.skewtest(x)) stats.normaltest(x))
assert_array_almost_equal(mstats.skewtest(x),
stats.skewtest(x))
assert_array_almost_equal(mstats.kurtosistest(x), assert_array_almost_equal(mstats.kurtosistest(x),
stats.kurtosistest(x)) stats.kurtosistest(x))
@ -581,7 +609,6 @@ class TestNormalitytests():
#TODO: for all ttest functions, add tests with masked array inputs #TODO: for all ttest functions, add tests with masked array inputs
class TestTtest_rel(): class TestTtest_rel():
def test_vs_nonmasked(self): def test_vs_nonmasked(self):
np.random.seed(1234567) np.random.seed(1234567)
outcome = np.random.randn(20, 4) + [0, 0, 1, 2] outcome = np.random.randn(20, 4) + [0, 0, 1, 2]
@ -618,7 +645,6 @@ class TestTtest_rel():
class TestTtest_ind(): class TestTtest_ind():
def test_vs_nonmasked(self): def test_vs_nonmasked(self):
np.random.seed(1234567) np.random.seed(1234567)
outcome = np.random.randn(20, 4) + [0, 0, 1, 2] outcome = np.random.randn(20, 4) + [0, 0, 1, 2]
@ -646,7 +672,6 @@ class TestTtest_ind():
class TestTtest_1samp(): class TestTtest_1samp():
def test_vs_nonmasked(self): def test_vs_nonmasked(self):
np.random.seed(1234567) np.random.seed(1234567)
outcome = np.random.randn(20, 4) + [0, 0, 1, 2] outcome = np.random.randn(20, 4) + [0, 0, 1, 2]
@ -673,5 +698,358 @@ class TestTtest_1samp():
assert_(np.all(np.isnan(res1))) assert_(np.all(np.isnan(res1)))
class TestCompareWithStats(TestCase):
"""
Class to compare mstats results with stats results.
It is in general assumed that scipy.stats is at a more mature stage than
stats.mstats. If a routine in mstats results in similar results like in
scipy.stats, this is considered also as a proper validation of scipy.mstats
routine.
Different sample sizes are used for testing, as some problems between stats
and mstats are dependent on sample size.
Author: Alexander Loew
NOTE that some tests fail. This might be caused by
a) actual differences or bugs between stats and mstats
b) numerical inaccuracies
c) different definitions of routine interfaces
These failures need to be checked. Current workaround is to have disabled these tests,
but issuing reports on scipy-dev
"""
def get_n(self):
""" Returns list of sample sizes to be used for comparison. """
return [1000, 100, 10, 5]
def generate_xy_sample(self, n):
# This routine generates numpy arrays and corresponding masked arrays
# with the same data, but additional masked values
np.random.seed(1234567)
x = np.random.randn(n)
y = x + np.random.randn(n)
xm = np.ones(len(x) + 5) * 1e16
ym = np.ones(len(y) + 5) * 1e16
xm[0:len(x)] = x
ym[0:len(y)] = y
mask = xm > 9e15
xm = np.ma.array(xm, mask=mask)
ym = np.ma.array(ym, mask=mask)
return x, y, xm, ym
def generate_xy_sample2D(self, n, nx):
x = np.ones((n, nx)) * np.nan
y = np.ones((n, nx)) * np.nan
xm = np.ones((n+5, nx)) * np.nan
ym = np.ones((n+5, nx)) * np.nan
for i in range(nx):
x[:,i], y[:,i], dx, dy = self.generate_xy_sample(n)
xm[0:n, :] = x[0:n]
ym[0:n, :] = y[0:n]
xm = np.ma.array(xm, mask=np.isnan(xm))
ym = np.ma.array(ym, mask=np.isnan(ym))
return x, y, xm, ym
def test_linregress(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
res1 = stats.linregress(x, y)
res2 = stats.mstats.linregress(xm, ym)
assert_allclose(np.asarray(res1), np.asarray(res2))
def test_pearsonr(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r, p = stats.pearsonr(x, y)
rm, pm = stats.mstats.pearsonr(xm, ym)
assert_almost_equal(r, rm, decimal=14)
assert_almost_equal(p, pm, decimal=14)
def test_spearmanr(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r, p = stats.spearmanr(x, y)
rm, pm = stats.mstats.spearmanr(xm, ym)
assert_almost_equal(r, rm, 14)
assert_almost_equal(p, pm, 14)
def test_gmean(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.gmean(abs(x))
rm = stats.mstats.gmean(abs(xm))
assert_allclose(r, rm, rtol=1e-13)
r = stats.gmean(abs(y))
rm = stats.mstats.gmean(abs(ym))
assert_allclose(r, rm, rtol=1e-13)
def test_hmean(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.hmean(abs(x))
rm = stats.mstats.hmean(abs(xm))
assert_almost_equal(r, rm, 10)
r = stats.hmean(abs(y))
rm = stats.mstats.hmean(abs(ym))
assert_almost_equal(r, rm, 10)
def test_skew(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.skew(x)
rm = stats.mstats.skew(xm)
assert_almost_equal(r, rm, 10)
r = stats.skew(y)
rm = stats.mstats.skew(ym)
assert_almost_equal(r, rm, 10)
def test_moment(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.moment(x)
rm = stats.mstats.moment(xm)
assert_almost_equal(r, rm, 10)
r = stats.moment(y)
rm = stats.mstats.moment(ym)
assert_almost_equal(r, rm, 10)
def test_signaltonoise(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.signaltonoise(x)
rm = stats.mstats.signaltonoise(xm)
assert_almost_equal(r, rm, 10)
r = stats.signaltonoise(y)
rm = stats.mstats.signaltonoise(ym)
assert_almost_equal(r, rm, 10)
def test_betai(self):
np.random.seed(12345)
for i in range(10):
a = np.random.rand() * 5.
b = np.random.rand() * 200.
assert_equal(stats.betai(a, b, 0.), 0.)
assert_equal(stats.betai(a, b, 1.), 1.)
assert_equal(stats.mstats.betai(a, b, 0.), 0.)
assert_equal(stats.mstats.betai(a, b, 1.), 1.)
x = np.random.rand()
assert_almost_equal(stats.betai(a, b, x),
stats.mstats.betai(a, b, x), decimal=13)
def test_zscore(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
#reference solution
zx = (x - x.mean()) / x.std()
zy = (y - y.mean()) / y.std()
#validate stats
assert_allclose(stats.zscore(x), zx, rtol=1e-10)
assert_allclose(stats.zscore(y), zy, rtol=1e-10)
#compare stats and mstats
assert_allclose(stats.zscore(x), stats.mstats.zscore(xm[0:len(x)]),
rtol=1e-10)
assert_allclose(stats.zscore(y), stats.mstats.zscore(ym[0:len(y)]),
rtol=1e-10)
def test_kurtosis(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.kurtosis(x)
rm = stats.mstats.kurtosis(xm)
assert_almost_equal(r, rm, 10)
r = stats.kurtosis(y)
rm = stats.mstats.kurtosis(ym)
assert_almost_equal(r, rm, 10)
def test_sem(self):
# example from stats.sem doc
a = np.arange(20).reshape(5,4)
am = np.ma.array(a)
r = stats.sem(a,ddof=1)
rm = stats.mstats.sem(am, ddof=1)
assert_allclose(r, 2.82842712, atol=1e-5)
assert_allclose(rm, 2.82842712, atol=1e-5)
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
stats.sem(x, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
stats.sem(y, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
stats.sem(x, axis=None, ddof=1), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
stats.sem(y, axis=None, ddof=1), decimal=13)
def test_describe(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.describe(x, ddof=1)
rm = stats.mstats.describe(xm, ddof=1)
for ii in range(6):
assert_almost_equal(np.asarray(r[ii]),
np.asarray(rm[ii]),
decimal=12)
def test_rankdata(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.rankdata(x)
rm = stats.mstats.rankdata(x)
assert_allclose(r, rm)
def test_tmean(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.tmean(x),stats.mstats.tmean(xm), 14)
assert_almost_equal(stats.tmean(y),stats.mstats.tmean(ym), 14)
def test_tmax(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.tmax(x,2.),
stats.mstats.tmax(xm,2.), 10)
assert_almost_equal(stats.tmax(y,2.),
stats.mstats.tmax(ym,2.), 10)
def test_tmin(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_equal(stats.tmin(x),stats.mstats.tmin(xm))
assert_equal(stats.tmin(y),stats.mstats.tmin(ym))
assert_almost_equal(stats.tmin(x,lowerlimit=-1.),
stats.mstats.tmin(xm,lowerlimit=-1.), 10)
assert_almost_equal(stats.tmin(y,lowerlimit=-1.),
stats.mstats.tmin(ym,lowerlimit=-1.), 10)
def test_zmap(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
z = stats.zmap(x,y)
zm = stats.mstats.zmap(xm,ym)
assert_allclose(z, zm[0:len(z)], atol=1e-10)
def test_variation(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.variation(x), stats.mstats.variation(xm),
decimal=12)
assert_almost_equal(stats.variation(y), stats.mstats.variation(ym),
decimal=12)
def test_tvar(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.tvar(x), stats.mstats.tvar(xm),
decimal=12)
assert_almost_equal(stats.tvar(y), stats.mstats.tvar(ym),
decimal=12)
def test_trimboth(self):
a = np.arange(20)
b = stats.trimboth(a, 0.1)
bm = stats.mstats.trimboth(a, 0.1)
assert_allclose(b, bm.data[~bm.mask])
def test_tsem(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.tsem(x),stats.mstats.tsem(xm), decimal=14)
assert_almost_equal(stats.tsem(y),stats.mstats.tsem(ym), decimal=14)
assert_almost_equal(stats.tsem(x,limits=(-2.,2.)),
stats.mstats.tsem(xm,limits=(-2.,2.)),
decimal=14)
def test_skewtest(self):
# this test is for 1D data
for n in self.get_n():
if n > 8:
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.skewtest(x)
rm = stats.mstats.skewtest(xm)
assert_equal(r[0], rm[0])
# TODO this test is not performed as it is a known issue that
# mstats returns a slightly different p-value what is a bit
# strange is that other tests like test_maskedarray_input don't
# fail!
#~ assert_almost_equal(r[1], rm[1])
def test_skewtest_2D_notmasked(self):
# a normal ndarray is passed to the masked function
x = np.random.random((20, 2)) * 20.
r = stats.skewtest(x)
rm = stats.mstats.skewtest(x)
assert_allclose(np.asarray(r), np.asarray(rm))
def test_skewtest_2D_WithMask(self):
nx = 2
for n in self.get_n():
if n > 8:
x, y, xm, ym = self.generate_xy_sample2D(n, nx)
r = stats.skewtest(x)
rm = stats.mstats.skewtest(xm)
assert_equal(r[0][0],rm[0][0])
assert_equal(r[0][1],rm[0][1])
def test_normaltest(self):
np.seterr(over='raise')
for n in self.get_n():
if n > 8:
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=UserWarning)
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.normaltest(x)
rm = stats.mstats.normaltest(xm)
assert_allclose(np.asarray(r), np.asarray(rm))
def test_find_repeats(self):
x = np.asarray([1,1,2,2,3,3,3,4,4,4,4]).astype('float')
tmp = np.asarray([1,1,2,2,3,3,3,4,4,4,4,5,5,5,5]).astype('float')
mask = (tmp == 5.)
xm = np.ma.array(tmp, mask=mask)
r = stats.find_repeats(x)
rm = stats.mstats.find_repeats(xm)
assert_equal(r,rm)
def test_kendalltau(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.kendalltau(x, y)
rm = stats.mstats.kendalltau(xm, ym)
assert_almost_equal(r[0], rm[0], decimal=10)
assert_almost_equal(r[1], rm[1], decimal=7)
def test_obrientransform(self):
for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
r = stats.obrientransform(x)
rm = stats.mstats.obrientransform(xm)
assert_almost_equal(r.T, rm[0:len(x)])
if __name__ == "__main__": if __name__ == "__main__":
run_module_suite() run_module_suite()

@ -4,22 +4,34 @@ Test functions for multivariate normal distributions.
""" """
from __future__ import division, print_function, absolute_import from __future__ import division, print_function, absolute_import
from numpy.testing import (assert_almost_equal, from numpy.testing import (
run_module_suite, assert_allclose, assert_equal, assert_raises) assert_allclose,
assert_almost_equal,
assert_array_almost_equal,
assert_equal,
assert_raises,
run_module_suite,
)
import numpy import numpy
import numpy as np import numpy as np
import scipy.linalg import scipy.linalg
#import wafo.stats._multivariate from wafo.stats._multivariate import _PSD, _lnB
from wafo.stats import multivariate_normal from wafo.stats import multivariate_normal
from wafo.stats import dirichlet, beta
from wafo.stats import norm from wafo.stats import norm
from wafo.stats._multivariate import _psd_pinv_decomposed_log_pdet
from scipy.integrate import romb from scipy.integrate import romb
def test_input_shape():
mu = np.arange(3)
cov = np.identity(2)
assert_raises(ValueError, multivariate_normal.pdf, (0, 1), mu, cov)
assert_raises(ValueError, multivariate_normal.pdf, (0, 1, 2), mu, cov)
def test_scalar_values(): def test_scalar_values():
np.random.seed(1234) np.random.seed(1234)
@ -47,6 +59,63 @@ def test_logpdf():
assert_allclose(d1, np.log(d2)) assert_allclose(d1, np.log(d2))
def test_rank():
# Check that the rank is detected correctly.
np.random.seed(1234)
n = 4
mean = np.random.randn(n)
for expected_rank in range(1, n + 1):
s = np.random.randn(n, expected_rank)
cov = np.dot(s, s.T)
distn = multivariate_normal(mean, cov, allow_singular=True)
assert_equal(distn.cov_info.rank, expected_rank)
def _sample_orthonormal_matrix(n):
M = np.random.randn(n, n)
u, s, v = scipy.linalg.svd(M)
return u
def test_degenerate_distributions():
for n in range(1, 5):
x = np.random.randn(n)
for k in range(1, n + 1):
# Sample a small covariance matrix.
s = np.random.randn(k, k)
cov_kk = np.dot(s, s.T)
# Embed the small covariance matrix into a larger low rank matrix.
cov_nn = np.zeros((n, n))
cov_nn[:k, :k] = cov_kk
# Define a rotation of the larger low rank matrix.
u = _sample_orthonormal_matrix(n)
cov_rr = np.dot(u, np.dot(cov_nn, u.T))
y = np.dot(u, x)
# Check some identities.
distn_kk = multivariate_normal(np.zeros(k), cov_kk,
allow_singular=True)
distn_nn = multivariate_normal(np.zeros(n), cov_nn,
allow_singular=True)
distn_rr = multivariate_normal(np.zeros(n), cov_rr,
allow_singular=True)
assert_equal(distn_kk.cov_info.rank, k)
assert_equal(distn_nn.cov_info.rank, k)
assert_equal(distn_rr.cov_info.rank, k)
pdf_kk = distn_kk.pdf(x[:k])
pdf_nn = distn_nn.pdf(x)
pdf_rr = distn_rr.pdf(y)
assert_allclose(pdf_kk, pdf_nn)
assert_allclose(pdf_kk, pdf_rr)
logpdf_kk = distn_kk.logpdf(x[:k])
logpdf_nn = distn_nn.logpdf(x)
logpdf_rr = distn_rr.logpdf(y)
assert_allclose(logpdf_kk, logpdf_nn)
assert_allclose(logpdf_kk, logpdf_rr)
def test_large_pseudo_determinant(): def test_large_pseudo_determinant():
# Check that large pseudo-determinants are handled appropriately. # Check that large pseudo-determinants are handled appropriately.
@ -67,11 +136,12 @@ def test_large_pseudo_determinant():
# np.linalg.slogdet is only available in numpy 1.6+ # np.linalg.slogdet is only available in numpy 1.6+
# but scipy currently supports numpy 1.5.1. # but scipy currently supports numpy 1.5.1.
#assert_allclose(np.linalg.slogdet(cov[:npos, :npos]), (1, large_total_log)) # assert_allclose(np.linalg.slogdet(cov[:npos, :npos]),
# (1, large_total_log))
# Check the pseudo-determinant. # Check the pseudo-determinant.
U, log_pdet = _psd_pinv_decomposed_log_pdet(cov) psd = _PSD(cov)
assert_allclose(log_pdet, large_total_log) assert_allclose(psd.log_pdet, large_total_log)
def test_broadcasting(): def test_broadcasting():
@ -160,33 +230,43 @@ def test_pseudodet_pinv():
# Set cond so that the lowest eigenvalue is below the cutoff # Set cond so that the lowest eigenvalue is below the cutoff
cond = 1e-5 cond = 1e-5
U, log_pdet = _psd_pinv_decomposed_log_pdet(cov, cond) psd = _PSD(cov, cond=cond)
pinv = np.dot(U, U.T) psd_pinv = _PSD(psd.pinv, cond=cond)
_, log_pdet_pinv = _psd_pinv_decomposed_log_pdet(pinv, cond)
# Check that the log pseudo-determinant agrees with the sum # Check that the log pseudo-determinant agrees with the sum
# of the logs of all but the smallest eigenvalue # of the logs of all but the smallest eigenvalue
assert_allclose(log_pdet, np.sum(np.log(s[:-1]))) assert_allclose(psd.log_pdet, np.sum(np.log(s[:-1])))
# Check that the pseudo-determinant of the pseudo-inverse # Check that the pseudo-determinant of the pseudo-inverse
# agrees with 1 / pseudo-determinant # agrees with 1 / pseudo-determinant
assert_allclose(-log_pdet, log_pdet_pinv) assert_allclose(-psd.log_pdet, psd_pinv.log_pdet)
def test_exception_nonsquare_cov(): def test_exception_nonsquare_cov():
cov = [[1, 2, 3], [4, 5, 6]] cov = [[1, 2, 3], [4, 5, 6]]
assert_raises(ValueError, _psd_pinv_decomposed_log_pdet, cov) assert_raises(ValueError, _PSD, cov)
def test_exception_nonfinite_cov(): def test_exception_nonfinite_cov():
cov_nan = [[1, 0], [0, np.nan]] cov_nan = [[1, 0], [0, np.nan]]
assert_raises(ValueError, _psd_pinv_decomposed_log_pdet, cov_nan) assert_raises(ValueError, _PSD, cov_nan)
cov_inf = [[1, 0], [0, np.inf]] cov_inf = [[1, 0], [0, np.inf]]
assert_raises(ValueError, _psd_pinv_decomposed_log_pdet, cov_inf) assert_raises(ValueError, _PSD, cov_inf)
def test_exception_non_psd_cov(): def test_exception_non_psd_cov():
cov = [[1, 0], [0, -1]] cov = [[1, 0], [0, -1]]
assert_raises(ValueError, _psd_pinv_decomposed_log_pdet, cov) assert_raises(ValueError, _PSD, cov)
def test_exception_singular_cov():
np.random.seed(1234)
x = np.random.randn(5)
mean = np.random.randn(5)
cov = np.ones((5, 5))
e = np.linalg.LinAlgError
assert_raises(e, multivariate_normal, mean, cov)
assert_raises(e, multivariate_normal.pdf, x, mean, cov)
assert_raises(e, multivariate_normal.logpdf, x, mean, cov)
def test_R_values(): def test_R_values():
@ -216,6 +296,14 @@ def test_R_values():
assert_allclose(pdf, r_pdf, atol=1e-10) assert_allclose(pdf, r_pdf, atol=1e-10)
def test_multivariate_normal_rvs_zero_covariance():
mean = np.zeros(2)
covariance = np.zeros((2, 2))
model = multivariate_normal(mean, covariance, allow_singular=True)
sample = model.rvs()
assert_equal(sample, [0, 0])
def test_rvs_shape(): def test_rvs_shape():
# Check that rvs parses the mean and covariance correctly, and returns # Check that rvs parses the mean and covariance correctly, and returns
# an array of the right shape # an array of the right shape
@ -271,5 +359,127 @@ def test_entropy():
assert_almost_equal(desired, rv.entropy()) assert_almost_equal(desired, rv.entropy())
def test_lnB():
alpha = np.array([1, 1, 1])
desired = .5 # e^lnB = 1/2 for [1, 1, 1]
assert_almost_equal(np.exp(_lnB(alpha)), desired)
def test_frozen_dirichlet():
np.random.seed(2846)
n = np.random.randint(1, 32)
alpha = np.random.uniform(10e-10, 100, n)
d = dirichlet(alpha)
assert_equal(d.var(), dirichlet.var(alpha))
assert_equal(d.mean(), dirichlet.mean(alpha))
assert_equal(d.entropy(), dirichlet.entropy(alpha))
num_tests = 10
for i in range(num_tests):
x = np.random.uniform(10e-10, 100, n)
x /= np.sum(x)
assert_equal(d.pdf(x[:-1]), dirichlet.pdf(x[:-1], alpha))
assert_equal(d.logpdf(x[:-1]), dirichlet.logpdf(x[:-1], alpha))
def test_simple_values():
alpha = np.array([1, 1])
d = dirichlet(alpha)
assert_almost_equal(d.mean(), 0.5)
assert_almost_equal(d.var(), 1. / 12.)
b = beta(1, 1)
assert_almost_equal(d.mean(), b.mean())
assert_almost_equal(d.var(), b.var())
def test_K_and_K_minus_1_calls_equal():
# Test that calls with K and K-1 entries yield the same results.
np.random.seed(2846)
n = np.random.randint(1, 32)
alpha = np.random.uniform(10e-10, 100, n)
d = dirichlet(alpha)
num_tests = 10
for i in range(num_tests):
x = np.random.uniform(10e-10, 100, n)
x /= np.sum(x)
assert_almost_equal(d.pdf(x[:-1]), d.pdf(x))
def test_multiple_entry_calls():
# Test that calls with multiple x vectors as matrix work
np.random.seed(2846)
n = np.random.randint(1, 32)
alpha = np.random.uniform(10e-10, 100, n)
d = dirichlet(alpha)
num_tests = 10
num_multiple = 5
xm = None
for i in range(num_tests):
for m in range(num_multiple):
x = np.random.uniform(10e-10, 100, n)
x /= np.sum(x)
if xm is not None:
xm = np.vstack((xm, x))
else:
xm = x
rm = d.pdf(xm.T)
rs = None
for xs in xm:
r = d.pdf(xs)
if rs is not None:
rs = np.append(rs, r)
else:
rs = r
assert_array_almost_equal(rm, rs)
def test_2D_dirichlet_is_beta():
np.random.seed(2846)
alpha = np.random.uniform(10e-10, 100, 2)
d = dirichlet(alpha)
b = beta(alpha[0], alpha[1])
num_tests = 10
for i in range(num_tests):
x = np.random.uniform(10e-10, 100, 2)
x /= np.sum(x)
assert_almost_equal(b.pdf(x), d.pdf([x]))
assert_almost_equal(b.mean(), d.mean()[0])
assert_almost_equal(b.var(), d.var()[0])
def test_dimensions_mismatch():
# Regression test for GH #3493. Check that setting up a PDF with a mean of
# length M and a covariance matrix of size (N, N), where M != N, raises a
# ValueError with an informative error message.
mu = np.array([0.0, 0.0])
sigma = np.array([[1.0]])
assert_raises(ValueError, multivariate_normal, mu, sigma)
# A simple check that the right error message was passed along. Checking
# that the entire message is there, word for word, would be somewhat
# fragile, so we just check for the leading part.
try:
multivariate_normal(mu, sigma)
except ValueError as e:
msg = "Dimension mismatch"
assert_equal(str(e)[:len(msg)], msg)
if __name__ == "__main__": if __name__ == "__main__":
run_module_suite() run_module_suite()

@ -8,13 +8,15 @@
""" """
from __future__ import division, print_function, absolute_import from __future__ import division, print_function, absolute_import
import sys
import warnings import warnings
from collections import namedtuple from collections import namedtuple
from numpy.testing import TestCase, assert_, assert_equal, \ from numpy.testing import (TestCase, assert_, assert_equal,
assert_almost_equal, assert_array_almost_equal, assert_array_equal, \ assert_almost_equal, assert_array_almost_equal,
assert_approx_equal, assert_raises, run_module_suite, \ assert_array_equal, assert_approx_equal,
assert_allclose, dec assert_raises, run_module_suite, assert_allclose,
dec)
import numpy.ma.testutils as mat import numpy.ma.testutils as mat
from numpy import array, arange, float32, float64, power from numpy import array, arange, float32, float64, power
import numpy as np import numpy as np
@ -170,6 +172,14 @@ class TestNanFunc(TestCase):
m = stats.nanmedian(self.X) m = stats.nanmedian(self.X)
assert_approx_equal(m, np.median(self.X)) assert_approx_equal(m, np.median(self.X))
def test_nanmedian_axis(self):
# Check nanmedian with axis
X = self.X.reshape(3,3)
m = stats.nanmedian(X, axis=0)
assert_equal(m, np.median(X, axis=0))
m = stats.nanmedian(X, axis=1)
assert_equal(m, np.median(X, axis=1))
def test_nanmedian_some(self): def test_nanmedian_some(self):
# Check nanmedian when some values only are nan. # Check nanmedian when some values only are nan.
m = stats.nanmedian(self.Xsome) m = stats.nanmedian(self.Xsome)
@ -177,8 +187,21 @@ class TestNanFunc(TestCase):
def test_nanmedian_all(self): def test_nanmedian_all(self):
# Check nanmedian when all values are nan. # Check nanmedian when all values are nan.
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
m = stats.nanmedian(self.Xall) m = stats.nanmedian(self.Xall)
assert_(np.isnan(m)) assert_(np.isnan(m))
assert_equal(len(w), 1)
assert_(issubclass(w[0].category, RuntimeWarning))
def test_nanmedian_all_axis(self):
# Check nanmedian when all values are nan.
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
m = stats.nanmedian(self.Xall.reshape(3,3), axis=1)
assert_(np.isnan(m).all())
assert_equal(len(w), 3)
assert_(issubclass(w[0].category, RuntimeWarning))
def test_nanmedian_scalars(self): def test_nanmedian_scalars(self):
# Check nanmedian for scalar inputs. See ticket #1098. # Check nanmedian for scalar inputs. See ticket #1098.
@ -449,6 +472,11 @@ class TestFisherExact(TestCase):
res.append(stats.fisher_exact(table, alternative="greater")[1]) res.append(stats.fisher_exact(table, alternative="greater")[1])
assert_allclose(res, pval, atol=0, rtol=1e-7) assert_allclose(res, pval, atol=0, rtol=1e-7)
def test_gh3014(self):
# check if issue #3014 has been fixed.
# before, this would have risen a ValueError
odds, pvalue = stats.fisher_exact([[1, 2], [9, 84419233]])
class TestCorrSpearmanr(TestCase): class TestCorrSpearmanr(TestCase):
""" W.II.D. Compute a correlation matrix on all the variables. """ W.II.D. Compute a correlation matrix on all the variables.
@ -601,9 +629,12 @@ def test_kendalltau():
assert_approx_equal(res[1], expected[1]) assert_approx_equal(res[1], expected[1])
# with only ties in one or both inputs # with only ties in one or both inputs
assert_(np.all(np.isnan(stats.kendalltau([2,2,2], [2,2,2])))) assert_equal(stats.kendalltau([2,2,2], [2,2,2]), (np.nan, np.nan))
assert_(np.all(np.isnan(stats.kendalltau([2,0,2], [2,2,2])))) assert_equal(stats.kendalltau([2,0,2], [2,2,2]), (np.nan, np.nan))
assert_(np.all(np.isnan(stats.kendalltau([2,2,2], [2,0,2])))) assert_equal(stats.kendalltau([2,2,2], [2,0,2]), (np.nan, np.nan))
# empty arrays provided as input
assert_equal(stats.kendalltau([], []), (np.nan, np.nan))
# check two different sort methods # check two different sort methods
assert_approx_equal(stats.kendalltau(x1, x2, initial_lexsort=False)[1], assert_approx_equal(stats.kendalltau(x1, x2, initial_lexsort=False)[1],
@ -718,6 +749,21 @@ class TestRegression(TestCase):
assert_(not np.isnan(res[4])) # stderr should stay finite assert_(not np.isnan(res[4])) # stderr should stay finite
def test_theilslopes():
# Basic slope test.
slope, intercept, lower, upper = stats.theilslopes([0,1,1])
assert_almost_equal(slope, 0.5)
assert_almost_equal(intercept, 0.5)
# Test of confidence intervals.
x = [1, 2, 3, 4, 10, 12, 18]
y = [9, 15, 19, 20, 45, 55, 78]
slope, intercept, lower, upper = stats.theilslopes(y, x, 0.07)
assert_almost_equal(slope, 4)
assert_almost_equal(upper, 4.38, decimal=2)
assert_almost_equal(lower, 3.71, decimal=2)
class TestHistogram(TestCase): class TestHistogram(TestCase):
# Tests that histogram works as it should, and keeps old behaviour # Tests that histogram works as it should, and keeps old behaviour
# #
@ -1032,9 +1078,23 @@ class TestScoreatpercentile(TestCase):
assert_equal(scoreatperc(np.array([1, 10, 100]), 50, limit=(1, 10), assert_equal(scoreatperc(np.array([1, 10, 100]), 50, limit=(1, 10),
interpolation_method='higher'), 10) interpolation_method='higher'), 10)
def test_sequence(self): def test_sequence_per(self):
x = arange(8) * 0.5 x = arange(8) * 0.5
assert_equal(stats.scoreatpercentile(x, [0, 100, 50]), [0, 3.5, 1.75]) expected = np.array([0, 3.5, 1.75])
res = stats.scoreatpercentile(x, [0, 100, 50])
assert_allclose(res, expected)
assert_(isinstance(res, np.ndarray))
# Test with ndarray. Regression test for gh-2861
assert_allclose(stats.scoreatpercentile(x, np.array([0, 100, 50])),
expected)
# Also test combination of 2-D array, axis not None and array-like per
res2 = stats.scoreatpercentile(np.arange(12).reshape((3,4)),
np.array([0, 1, 100, 100]), axis=1)
expected2 = array([[0, 4, 8],
[0.03, 4.03, 8.03],
[3, 7, 11],
[3, 7, 11]])
assert_allclose(res2, expected2)
def test_axis(self): def test_axis(self):
scoreatperc = stats.scoreatpercentile scoreatperc = stats.scoreatpercentile
@ -1054,6 +1114,11 @@ class TestScoreatpercentile(TestCase):
assert_raises(ValueError, stats.scoreatpercentile, [1], 101) assert_raises(ValueError, stats.scoreatpercentile, [1], 101)
assert_raises(ValueError, stats.scoreatpercentile, [1], -1) assert_raises(ValueError, stats.scoreatpercentile, [1], -1)
def test_empty(self):
assert_equal(stats.scoreatpercentile([], 50), np.nan)
assert_equal(stats.scoreatpercentile(np.array([[], []]), 50), np.nan)
assert_equal(stats.scoreatpercentile([], [50, 99]), [np.nan, np.nan])
class TestItemfreq(object): class TestItemfreq(object):
a = [5, 7, 1, 2, 1, 5, 7] * 10 a = [5, 7, 1, 2, 1, 5, 7] * 10
@ -1089,7 +1154,7 @@ class TestItemfreq(object):
bb = np.array(list(zip(b, b)), dt) bb = np.array(list(zip(b, b)), dt)
v = stats.itemfreq(aa) v = stats.itemfreq(aa)
# Arrays don't compare equal because v[:,0] is object array # Arrays don't compare equal because v[:,0] is object array
assert_equal(v[2, 0], bb[2]) assert_equal(tuple(v[2, 0]), tuple(bb[2]))
class TestMode(TestCase): class TestMode(TestCase):
@ -1099,6 +1164,71 @@ class TestMode(TestCase):
assert_almost_equal(vals[0][0],6) assert_almost_equal(vals[0][0],6)
assert_almost_equal(vals[1][0],3) assert_almost_equal(vals[1][0],3)
def test_axes(self):
data1 = [10,10,30,40]
data2 = [10,10,10,10]
data3 = [20,10,20,20]
data4 = [30,30,30,30]
data5 = [40,30,30,30]
arr = np.array([data1, data2, data3, data4, data5])
vals = stats.mode(arr, axis=None)
assert_almost_equal(vals[0],np.array([30]))
assert_almost_equal(vals[1],np.array([8]))
vals = stats.mode(arr, axis=0)
assert_almost_equal(vals[0],np.array([[10,10,30,30]]))
assert_almost_equal(vals[1],np.array([[2,3,3,2]]))
vals = stats.mode(arr, axis=1)
assert_almost_equal(vals[0],np.array([[10],[10],[20],[30],[30]]))
assert_almost_equal(vals[1],np.array([[2],[4],[3],[4],[3]]))
def test_strings(self):
data1 = ['rain', 'showers', 'showers']
vals = stats.mode(data1)
expected = ['showers']
assert_equal(vals[0][0], 'showers')
assert_equal(vals[1][0], 2)
@dec.knownfailureif(sys.version_info > (3,), 'numpy github issue 641')
def test_mixed_objects(self):
objects = [10, True, np.nan, 'hello', 10]
arr = np.empty((5,), dtype=object)
arr[:] = objects
vals = stats.mode(arr)
assert_equal(vals[0][0], 10)
assert_equal(vals[1][0], 2)
def test_objects(self):
"""Python objects must be sortable (le + eq) and have ne defined
for np.unique to work. hash is for set.
"""
class Point(object):
def __init__(self, x):
self.x = x
def __eq__(self, other):
return self.x == other.x
def __ne__(self, other):
return self.x != other.x
def __lt__(self, other):
return self.x < other.x
def __hash__(self):
return hash(self.x)
points = [Point(x) for x in [1,2,3,4,3,2,2,2]]
arr = np.empty((8,), dtype=object)
arr[:] = points
assert len(set(points)) == 4
assert_equal(np.unique(arr).shape, (4,))
vals = stats.mode(arr)
assert_equal(vals[0][0], Point(2))
assert_equal(vals[1][0], 4)
class TestVariability(TestCase): class TestVariability(TestCase):
@ -1660,6 +1790,7 @@ def test_chisquare_masked_arrays():
# Empty arrays: # Empty arrays:
# A data set with length 0 returns a masked scalar. # A data set with length 0 returns a masked scalar.
with np.errstate(invalid='ignore'):
chisq, p = stats.chisquare(np.ma.array([])) chisq, p = stats.chisquare(np.ma.array([]))
assert_(isinstance(chisq, np.ma.MaskedArray)) assert_(isinstance(chisq, np.ma.MaskedArray))
assert_equal(chisq.shape, ()) assert_equal(chisq.shape, ())
@ -1675,6 +1806,7 @@ def test_chisquare_masked_arrays():
# empty3.T is an array containing 3 data sets, each with length 0, # empty3.T is an array containing 3 data sets, each with length 0,
# so an array of size (3,) is returned, with all values masked. # so an array of size (3,) is returned, with all values masked.
with np.errstate(invalid='ignore'):
chisq, p = stats.chisquare(empty3.T) chisq, p = stats.chisquare(empty3.T)
assert_(isinstance(chisq, np.ma.MaskedArray)) assert_(isinstance(chisq, np.ma.MaskedArray))
assert_equal(chisq.shape, (3,)) assert_equal(chisq.shape, (3,))
@ -2622,22 +2754,25 @@ class TestSigamClip(object):
class TestFOneWay(TestCase): class TestFOneWay(TestCase):
def test_trivial(self): def test_trivial(self):
# A trivial test of stats.f_oneway, with F=0. # A trivial test of stats.f_oneway, with F=0.
F, p = stats.f_oneway([0,2], [0,2]) F, p = stats.f_oneway([0,2], [0,2])
assert_equal(F, 0.0) assert_equal(F, 0.0)
def test_basic(self): def test_basic(self):
# A test of stats.f_oneway, with F=2.
F, p = stats.f_oneway([0,2], [2,4])
# Despite being a floating point calculation, this data should # Despite being a floating point calculation, this data should
# result in F being exactly 2.0. # result in F being exactly 2.0.
F, p = stats.f_oneway([0,2], [2,4])
assert_equal(F, 2.0) assert_equal(F, 2.0)
def test_large_integer_array(self):
a = np.array([655, 788], dtype=np.uint16)
b = np.array([789, 772], dtype=np.uint16)
F, p = stats.f_oneway(a, b)
assert_almost_equal(F, 0.77450216931805538)
class TestKruskal(TestCase):
class TestKruskal(TestCase):
def test_simple(self): def test_simple(self):
x = [1] x = [1]
y = [2] y = [2]

@ -297,9 +297,11 @@ def test_hygfz():
assert_almost_equal(1.0464328112173522, hygfz(0.1, 0.2, 0.3, 0.5)) assert_almost_equal(1.0464328112173522, hygfz(0.1, 0.2, 0.3, 0.5))
assert_almost_equal(1.2027034401166194, hygfz(0.1, 0.2, 0.3, 0.95)) assert_almost_equal(1.2027034401166194, hygfz(0.1, 0.2, 0.3, 0.95))
#assert_equal(1.661006238211309e-07, hygfz(5, -300, 10, 0.5)) #assert_equal(1.661006238211309e-07, hygfz(5, -300, 10, 0.5))
assert_equal(0.118311386286, hygfz(0.5, -99.0, 1.5, 0.5625)) #assert_equal(0.118311386286, hygfz(0.5, -99.0, 1.5, 0.5625))
assert_equal(0.0965606007742, hygfz(0.5, -149.0, 1.5, 0.5625)) #assert_equal(0.0965606007742, hygfz(0.5, -149.0, 1.5, 0.5625))
assert_equal(0.49234384000963544+0.60513406166123973j, hygfz(1, 1, 4, 3+4j)) #assert_equal(0.49234384000963544 + 0.60513406166123973j,
# hygfz(1, 1, 4, 3 + 4j))
def test_common_shape(): def test_common_shape():
A = np.ones((4, 1)) A = np.ones((4, 1))

@ -2,5 +2,6 @@
Transform package in WAFO Toolbox. Transform package in WAFO Toolbox.
""" """
from core import * from .core import *
import models from . import models
from . import estimation

@ -4,12 +4,14 @@ from __future__ import division
#import numpy as np #import numpy as np
from numpy import trapz, sqrt, linspace # @UnresolvedImport from numpy import trapz, sqrt, linspace # @UnresolvedImport
from wafo.wafodata import PlotData from wafo.containers import PlotData
from wafo.misc import tranproc # , trangood from wafo.misc import tranproc # , trangood
__all__ = ['TrData', 'TrCommon'] __all__ = ['TrData', 'TrCommon']
class TrCommon(object): class TrCommon(object):
""" """
<generic> transformation model, g. <generic> transformation model, g.
@ -102,8 +104,10 @@ class TrCommon(object):
tranproc tranproc
""" """
return self._gauss2dat(y, *yi) return self._gauss2dat(y, *yi)
def _gauss2dat(self, y, *yi): def _gauss2dat(self, y, *yi):
pass pass
def dat2gauss(self, x, *xi): def dat2gauss(self, x, *xi):
""" """
Transforms non-linear data, x, to Gaussian scale. Transforms non-linear data, x, to Gaussian scale.
@ -111,8 +115,8 @@ class TrCommon(object):
Parameters Parameters
---------- ----------
x, x1,...,xn : array-like x, x1,...,xn : array-like
input vectors with non-linear data values, where xi is the i'th time input vectors with non-linear data values, where xi is the i'th
derivative of x. (n<=4) time derivative of x. (n<=4)
Returns Returns
------- -------
y, y1,...,yn : array-like y, y1,...,yn : array-like
@ -124,18 +128,21 @@ class TrCommon(object):
tranproc. tranproc.
""" """
return self._dat2gauss(x, *xi) return self._dat2gauss(x, *xi)
def _dat2gauss(self, x, *xi): def _dat2gauss(self, x, *xi):
pass pass
class TrData(PlotData, TrCommon): class TrData(PlotData, TrCommon):
__doc__ = TrCommon.__doc__.split('mean')[0].replace('<generic>','Data' #@ReservedAssignment __doc__ = TrCommon.__doc__.split('mean')[0].replace('<generic>',
) + """ 'Data') + """
data : array-like data : array-like
Gaussian values, Y Gaussian values, Y
args : array-like args : array-like
non-Gaussian values, X non-Gaussian values, X
ymean, ysigma : real, scalars (default ymean=0, ysigma=1) ymean, ysigma : real, scalars (default ymean=0, ysigma=1)
mean and standard-deviation, respectively, of the process in Gaussian world. mean and standard-deviation, respectively, of the process in Gaussian
world.
mean, sigma : real, scalars mean, sigma : real, scalars
mean and standard-deviation, respectively, of the non-Gaussian process. mean and standard-deviation, respectively, of the non-Gaussian process.
Default: Default:
@ -167,6 +174,7 @@ class TrData(PlotData, TrCommon):
>>> g.dist2gauss() < 1e-16 >>> g.dist2gauss() < 1e-16
True True
""" """
def __init__(self, *args, **kwds): def __init__(self, *args, **kwds):
options = dict(title='Transform', options = dict(title='Transform',
xlab='x', ylab='g(x)', xlab='x', ylab='g(x)',
@ -187,7 +195,8 @@ class TrData(PlotData, TrCommon):
ym = self.ymean - self.ysigma ym = self.ymean - self.ysigma
self.sigma = (self.gauss2dat(yp) - self.gauss2dat(ym)) / 2. self.sigma = (self.gauss2dat(yp) - self.gauss2dat(ym)) / 2.
self.children = [PlotData((self.args-self.mean)/self.sigma, self.args)] self.children = [
PlotData((self.args - self.mean) / self.sigma, self.args)]
def trdata(self): def trdata(self):
return self return self
@ -198,6 +207,9 @@ class TrData(PlotData, TrCommon):
def _dat2gauss(self, x, *xi): def _dat2gauss(self, x, *xi):
return tranproc(self.args, self.data, x, *xi) return tranproc(self.args, self.data, x, *xi)
class EstimateTransform(object):
pass
def main(): def main():
pass pass

@ -5,16 +5,6 @@ TrHermite
TrOchi TrOchi
TrLinear TrLinear
''' '''
#-------------------------------------------------------------------------------
# Name: transform.models
# Purpose:
#
# Author: pab
#
# Created: 24.11.2008
# Copyright: (c) pab 2008
# Licence: <your licence>
#-------------------------------------------------------------------------------
# !/usr/bin/env python # !/usr/bin/env python
from __future__ import division from __future__ import division
from scipy.optimize import brentq from scipy.optimize import brentq
@ -42,8 +32,11 @@ _example = '''
>>> g2 = tm.<generic>(mean=me, var=va, skew=sk, kurt=ku, ysigma=std) >>> g2 = tm.<generic>(mean=me, var=va, skew=sk, kurt=ku, ysigma=std)
>>> xs = g2.gauss2dat(ys[:,1:]) # Transformed to the real world >>> xs = g2.gauss2dat(ys[:,1:]) # Transformed to the real world
''' '''
class TrCommon2(TrCommon): class TrCommon2(TrCommon):
__doc__ = TrCommon.__doc__ # @ReservedAssignment __doc__ = TrCommon.__doc__ # @ReservedAssignment
def trdata(self, x=None, xnmin=-5, xnmax=5, n=513): def trdata(self, x=None, xnmin=-5, xnmax=5, n=513):
""" """
Return a discretized transformation model. Return a discretized transformation model.
@ -74,6 +67,7 @@ class TrCommon2(TrCommon):
return TrData(yn, x, mean=self.mean, sigma=self.sigma) return TrData(yn, x, mean=self.mean, sigma=self.sigma)
class TrHermite(TrCommon2): class TrHermite(TrCommon2):
__doc__ = TrCommon2.__doc__.replace('<generic>', 'Hermite' # @ReservedAssignment __doc__ = TrCommon2.__doc__.replace('<generic>', 'Hermite' # @ReservedAssignment
) + """ ) + """
@ -135,6 +129,7 @@ class TrHermite(TrCommon2):
'Nonlinear vibration models for extremes and fatigue.' 'Nonlinear vibration models for extremes and fatigue.'
J. Engng. Mech., ASCE, Vol 114, No 10, pp 1772-1790 J. Engng. Mech., ASCE, Vol 114, No 10, pp 1772-1790
""" """
def __init__(self, *args, **kwds): def __init__(self, *args, **kwds):
super(TrHermite, self).__init__(*args, **kwds) super(TrHermite, self).__init__(*args, **kwds)
self.pardef = kwds.get('pardef', 1) self.pardef = kwds.get('pardef', 1)
@ -167,11 +162,13 @@ class TrHermite(TrCommon2):
if (ga2 < 0) or (12 < ga2): if (ga2 < 0) or (12 < ga2):
warnings.warn('Kurtosis must be between 0 and 12') warnings.warn('Kurtosis must be between 0 and 12')
self._c3 = skew / 6 * (1 - 0.015 * abs(skew) + 0.3 * skew ** 2) / (1 + 0.2 * ga2) self._c3 = skew / 6 * \
(1 - 0.015 * abs(skew) + 0.3 * skew ** 2) / (1 + 0.2 * ga2)
if ga2 == 0.: if ga2 == 0.:
self._c4 = 0.0 self._c4 = 0.0
else: else:
c41 = (1. - 1.43 * skew ** 2. / ga2) ** (1. - 0.1 * (ga2 + 3.) ** 0.8) expon = 1. - 0.1 * (ga2 + 3.) ** 0.8
c41 = (1. - 1.43 * skew ** 2. / ga2) ** (expon)
self._c4 = 0.1 * ((1. + 1.25 * ga2) ** (1. / 3.) - 1.) * c41 self._c4 = 0.1 * ((1. + 1.25 * ga2) ** (1. / 3.) - 1.) * c41
if not np.isfinite(self._c3) or not np.isfinite(self._c4): if not np.isfinite(self._c3) or not np.isfinite(self._c4):
@ -199,11 +196,12 @@ class TrHermite(TrCommon2):
self._backward = None self._backward = None
else: else:
Km1 = np.sqrt(1. + 2. * c3 ** 2 + 6 * c4 ** 2) Km1 = np.sqrt(1. + 2. * c3 ** 2 + 6 * c4 ** 2)
p = np.poly1d(np.r_[c4, c3, 1. - 3. * c4, -c3] / Km1) # backward G # backward G
p = np.poly1d(np.r_[c4, c3, 1. - 3. * c4, -c3] / Km1)
self._forward = None self._forward = None
self._backward = p self._backward = p
#% Check if it is a strictly increasing function. # Check if it is a strictly increasing function.
dp = p.deriv(m=1) # % Derivative dp = p.deriv(m=1) # % Derivative
r = dp.r # % Find roots of the derivative r = dp.r # % Find roots of the derivative
r = r[where(abs(imag(r)) < eps)] # Keep only real roots r = r[where(abs(imag(r)) < eps)] # Keep only real roots
@ -219,6 +217,7 @@ class TrHermite(TrCommon2):
The derivative of g(x) is infinite at x = %g''' % self._x_limit The derivative of g(x) is infinite at x = %g''' % self._x_limit
warnings.warn(txt1) warnings.warn(txt1)
return return
def check_forward(self, x): def check_forward(self, x):
if not (self._x_limit is None): if not (self._x_limit is None):
x00 = self._x_limit x00 = self._x_limit
@ -232,8 +231,8 @@ class TrHermite(TrCommon2):
if np.mod(cdef, 2): if np.mod(cdef, 2):
errtxt = 'Unable to invert the polynomial \n %s' % txt2 errtxt = 'Unable to invert the polynomial \n %s' % txt2
raise ValueError(errtxt) raise ValueError(errtxt)
np.disp('However, successfully inverted the polynomial\n %s' % txt2) np.disp(
'However, successfully inverted the polynomial\n %s' % txt2)
def _dat2gauss(self, x, *xi): def _dat2gauss(self, x, *xi):
if len(xi) > 0: if len(xi) > 0:
@ -257,8 +256,7 @@ class TrHermite(TrCommon2):
# self.check_forward(y) # self.check_forward(y)
if self._backward is None: if self._backward is None:
#% Inverting the polynomial # Inverting the polynomial
#%~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
xn = self._poly_inv(self._forward, yn) xn = self._poly_inv(self._forward, yn)
else: else:
xn = self._backward(yn) xn = self._backward(yn)
@ -290,24 +288,23 @@ class TrHermite(TrCommon2):
c = coefs[2] - xn / p.coeffs[0] c = coefs[2] - xn / p.coeffs[0]
x0 = a / 3. x0 = a / 3.
#% substitue xn = z-x0 and divide by c4 => z^3 + 3*p1*z+2*q0 = 0 # substitue xn = z-x0 and divide by c4 => z^3 + 3*p1*z+2*q0 = 0
p1 = b / 3 - x0 ** 2 p1 = b / 3 - x0 ** 2
# p1 = (b-a**2/3)/3 # p1 = (b-a**2/3)/3
# q0 = (c + x0*(2.*x0/3.-b))/2. # q0 = (c + x0*(2.*x0/3.-b))/2.
# q0 = x0**3 -a*b/6 +c/2 # q0 = x0**3 -a*b/6 +c/2
q0 = x0 * (x0 ** 2 - b / 2) + c / 2 q0 = x0 * (x0 ** 2 - b / 2) + c / 2
## # z^3+3*p1*z+2*q0=0 # z^3+3*p1*z+2*q0=0
## c3 = self._c3 # c3 = self._c3
## c4 = self._c4 # c4 = self._c4
## b1 = 1./(3.*c4) # b1 = 1./(3.*c4)
## #x0 = c3*b1 # x0 = c3*b1
## #% substitue u = z-x0 and divide by c4 => z^3 + 3*c*z+2*q0 = 0 # % substitue u = z-x0 and divide by c4 => z^3 + 3*c*z+2*q0 = 0
## #p1 = b1-1.-x0**2. # p1 = b1-1.-x0**2.
## Km1 = np.sqrt(1.+2.*c3**2+6*c4**2) # Km1 = np.sqrt(1.+2.*c3**2+6*c4**2)
## q0 = x0**3-1.5*b1*(x0+xn*Km1) # q0 = x0**3-1.5*b1*(x0+xn*Km1)
# q0 = x0**3-1.5*b1*(x0+xn) # q0 = x0**3-1.5*b1*(x0+xn)
if not (self._x_limit is None): # % Three real roots if not (self._x_limit is None): # % Three real roots
d = sqrt(-p1) d = sqrt(-p1)
@ -318,7 +315,7 @@ class TrHermite(TrCommon2):
return 2. * d * cos(theta1 + th2[ix]) - x0 return 2. * d * cos(theta1 + th2[ix]) - x0
else: # %Only one real root exist else: # %Only one real root exist
q1 = sqrt((q0) ** 2 + p1 ** 3) q1 = sqrt((q0) ** 2 + p1 ** 3)
#% Find the real root of the monic polynomial # Find the real root of the monic polynomial
A0 = (q1 - q0) ** (1. / 3.) A0 = (q1 - q0) ** (1. / 3.)
B0 = -(q1 + q0) ** (1. / 3.) B0 = -(q1 + q0) ** (1. / 3.)
return A0 + B0 - x0 # % real root return A0 + B0 - x0 # % real root
@ -327,7 +324,6 @@ class TrHermite(TrCommon2):
#%x=-(A0+B0)/2+(A0-B0)*sqrt(-3)/2-x0 #%x=-(A0+B0)/2+(A0-B0)*sqrt(-3)/2-x0
class TrLinear(TrCommon2): class TrLinear(TrCommon2):
__doc__ = TrCommon2.__doc__.replace('<generic>', 'Linear' # @ReservedAssignment __doc__ = TrCommon2.__doc__.replace('<generic>', 'Linear' # @ReservedAssignment
) + """ ) + """
@ -355,6 +351,7 @@ class TrLinear(TrCommon2):
spec2skew, ochitr, lc2tr, dat2tr spec2skew, ochitr, lc2tr, dat2tr
""" """
def _dat2gauss(self, x, *xi): def _dat2gauss(self, x, *xi):
sratio = atleast_1d(self.ysigma / self.sigma) sratio = atleast_1d(self.ysigma / self.sigma)
y = (atleast_1d(x) - self.mean) * sratio + self.ymean y = (atleast_1d(x) - self.mean) * sratio + self.ymean
@ -401,7 +398,8 @@ class TrOchi(TrCommon2):
Note Note
---- ----
Transformation, g, does not have continous derivatives of 2'nd order or higher. Transformation, g, does not have continous derivatives of 2'nd order or
higher.
Example Example
------- -------
@ -454,8 +452,8 @@ class TrOchi(TrCommon2):
# Set up the 2D non-linear equations for a and sig2^2: # Set up the 2D non-linear equations for a and sig2^2:
# g1='[x(2)-2.*x(1).^2.*x(2).^2-P1, 2.*x(1).*x(2).^2.*(3-8.*x(1).^2.*x(2))-P2 ]' # g1='[x(2)-2.*x(1).^2.*x(2).^2-P1, 2.*x(1).*x(2).^2.*(3-8.*x(1).^2.*x(2))-P2 ]'
# Or solve the following 1D non-linear equation for sig2^2: # Or solve the following 1D non-linear equation for sig2^2:
g2 = lambda x:-sqrt(abs(x - 1) * 2) * (3. * x - 4 * abs(x - 1)) + abs(skew) g2 = lambda x: -sqrt(abs(x - 1) * 2) * \
(3. * x - 4 * abs(x - 1)) + abs(skew)
a1 = 1. # % Start interval where sig2^2 is located. a1 = 1. # % Start interval where sig2^2 is located.
a2 = 2. a2 = 2.
@ -466,8 +464,8 @@ class TrOchi(TrCommon2):
gam_b = 3 * a gam_b = 3 * a
sigma2 = sqrt(sig22) sigma2 = sqrt(sig22)
#% Solve the following 2nd order equation to obtain ma2 # Solve the following 2nd order equation to obtain ma2
#% a*(sig2^2+ma2^2)+ma2 = 0 # a*(sig2^2+ma2^2)+ma2 = 0
my2 = (-1. - sqrt(1. - 4. * a ** 2 * sig22)) / a # % Largest mean my2 = (-1. - sqrt(1. - 4. * a ** 2 * sig22)) / a # % Largest mean
mean2 = a * sig22 / my2 # % choose the smallest mean mean2 = a * sig22 / my2 # % choose the smallest mean
@ -535,6 +533,7 @@ class TrOchi(TrCommon2):
xn.shape = yn.shape xn.shape = yn.shape
return sigma * xn + mean return sigma * xn + mean
def main(): def main():
import pylab import pylab
g = TrHermite(skew=0.1, kurt=3.01) g = TrHermite(skew=0.1, kurt=3.01)
@ -554,6 +553,3 @@ if __name__ == '__main__':
doctest.testmod() doctest.testmod()
else: else:
main() main()

@ -9,27 +9,34 @@ from scipy import integrate
__all__ = ['PlotData', 'AxisLabels'] __all__ = ['PlotData', 'AxisLabels']
def empty_copy(obj): def empty_copy(obj):
class Empty(obj.__class__): class Empty(obj.__class__):
def __init__(self): def __init__(self):
pass pass
newcopy = Empty() newcopy = Empty()
newcopy.__class__ = obj.__class__ newcopy.__class__ = obj.__class__
return newcopy return newcopy
def _set_seed(iseed): def _set_seed(iseed):
if iseed != None: if iseed != None:
try: try:
np.random.set_state(iseed) np.random.set_state(iseed)
except: except:
np.random.seed(iseed) np.random.seed(iseed)
def now(): def now():
''' '''
Return current date and time as a string Return current date and time as a string
''' '''
return strftime("%a, %d %b %Y %H:%M:%S", gmtime()) return strftime("%a, %d %b %Y %H:%M:%S", gmtime())
class PlotData(object): class PlotData(object):
''' '''
Container class for data objects in WAFO Container class for data objects in WAFO
@ -66,6 +73,7 @@ class PlotData(object):
specdata, specdata,
covdata covdata
''' '''
def __init__(self, data=None, args=None, *args2, **kwds): def __init__(self, data=None, args=None, *args2, **kwds):
self.data = data self.data = data
self.args = args self.args = args
@ -90,7 +98,8 @@ class PlotData(object):
if not plotflag and self.children != None: if not plotflag and self.children != None:
plotbackend.hold('on') plotbackend.hold('on')
tmp = [] tmp = []
child_args = kwds.pop('plot_args_children', tuple(self.plot_args_children)) child_args = kwds.pop(
'plot_args_children', tuple(self.plot_args_children))
child_kwds = dict(self.plot_kwds_children).copy() child_kwds = dict(self.plot_kwds_children).copy()
child_kwds.update(kwds.pop('plot_kwds_children', {})) child_kwds.update(kwds.pop('plot_kwds_children', {}))
child_kwds['axis'] = axis child_kwds['axis'] = axis
@ -157,10 +166,10 @@ class PlotData(object):
x = self.args x = self.args
ix = np.flatnonzero((a < x) & (x < b)) ix = np.flatnonzero((a < x) & (x < b))
xi = np.hstack((a, x.take(ix), b)) xi = np.hstack((a, x.take(ix), b))
fi = np.hstack((self.eval_points(a),self.data.take(ix),self.eval_points(b))) fi = np.hstack(
(self.eval_points(a), self.data.take(ix), self.eval_points(b)))
return fun(fi, xi, **kwds) return fun(fi, xi, **kwds)
def show(self): def show(self):
self.plotter.show() self.plotter.show()
@ -192,15 +201,19 @@ class PlotData(object):
class AxisLabels: class AxisLabels:
def __init__(self, title='', xlab='', ylab='', zlab='', **kwds): def __init__(self, title='', xlab='', ylab='', zlab='', **kwds):
self.title = title self.title = title
self.xlab = xlab self.xlab = xlab
self.ylab = ylab self.ylab = ylab
self.zlab = zlab self.zlab = zlab
def __repr__(self): def __repr__(self):
return self.__str__() return self.__str__()
def __str__(self): def __str__(self):
return '%s\n%s\n%s\n%s\n' % (self.title, self.xlab, self.ylab, self.zlab) return '%s\n%s\n%s\n%s\n' % (self.title, self.xlab, self.ylab, self.zlab)
def copy(self): def copy(self):
newcopy = empty_copy(self) newcopy = empty_copy(self)
newcopy.__dict__.update(self.__dict__) newcopy.__dict__.update(self.__dict__)
@ -218,7 +231,9 @@ class AxisLabels:
except: except:
pass pass
class Plotter_1d(object): class Plotter_1d(object):
""" """
Parameters Parameters
@ -235,6 +250,7 @@ class Plotter_1d(object):
step : stair-step plot step : stair-step plot
scatter : scatter plot scatter : scatter plot
""" """
def __init__(self, plotmethod='plot'): def __init__(self, plotmethod='plot'):
self.plotfun = None self.plotfun = None
if plotmethod is None: if plotmethod is None:
@ -277,6 +293,7 @@ class Plotter_1d(object):
h1 = plot1d(axis, x, data, dataCI, plotflag, *args, **kwds) h1 = plot1d(axis, x, data, dataCI, plotflag, *args, **kwds)
return h1 return h1
def plot1d(axis, args, data, dataCI, plotflag, *varargin, **kwds): def plot1d(axis, args, data, dataCI, plotflag, *varargin, **kwds):
plottype = np.mod(plotflag, 10) plottype = np.mod(plotflag, 10)
@ -289,18 +306,20 @@ def plot1d(axis, args, data, dataCI, plotflag, *varargin, **kwds):
elif plottype == 3: elif plottype == 3:
H = axis.stem(args, data, *varargin, **kwds) H = axis.stem(args, data, *varargin, **kwds)
elif plottype == 4: elif plottype == 4:
H = axis.errorbar(args, data, yerr=[dataCI[:,0] - data, dataCI[:,1] - data], *varargin, **kwds) H = axis.errorbar(
args, data, yerr=[dataCI[:, 0] - data, dataCI[:, 1] - data], *varargin, **kwds)
elif plottype == 5: elif plottype == 5:
H = axis.bar(args, data, *varargin, **kwds) H = axis.bar(args, data, *varargin, **kwds)
elif plottype == 6: elif plottype == 6:
level = 0 level = 0
if np.isfinite(level): if np.isfinite(level):
H = axis.fill_between(args, data, level, *varargin, **kwds); H = axis.fill_between(args, data, level, *varargin, **kwds)
else: else:
H = axis.fill_between(args, data, *varargin, **kwds); H = axis.fill_between(args, data, *varargin, **kwds)
elif plottype == 7: elif plottype == 7:
H = axis.plot(args, data, *varargin, **kwds) H = axis.plot(args, data, *varargin, **kwds)
H = axis.fill_between(args, dataCI[:,0], dataCI[:,1], alpha=0.2, color='r'); H = axis.fill_between(
args, dataCI[:, 0], dataCI[:, 1], alpha=0.2, color='r')
scale = plotscale(plotflag) scale = plotscale(plotflag)
logXscale = 'x' in scale logXscale = 'x' in scale
@ -323,16 +342,17 @@ def plot1d(axis, args, data, dataCI, plotflag, *varargin, **kwds):
ax[3] = 11 * np.log10(fmax1) ax[3] = 11 * np.log10(fmax1)
ax[2] = ax[3] - 40 ax[2] = ax[3] - 40
else: else:
ax[3] = 1.15 * fmax1; ax[3] = 1.15 * fmax1
ax[2] = ax[3] * 1e-4; ax[2] = ax[3] * 1e-4
axis.axis(ax) axis.axis(ax)
if np.any(dataCI) and plottype < 3: if np.any(dataCI) and plottype < 3:
axis.hold(True) axis.hold(True)
plot1d(axis, args, dataCI, (), plotflag, 'r--'); plot1d(axis, args, dataCI, (), plotflag, 'r--')
return H return H
def plotscale(plotflag): def plotscale(plotflag):
''' '''
Return plotscale from plotflag Return plotscale from plotflag
@ -388,10 +408,12 @@ def plotscale(plotflag):
logZscaleId = (np.mod(scaleId // 100, 10) > 0) * 4 logZscaleId = (np.mod(scaleId // 100, 10) > 0) * 4
scaleId = logYscaleId + logXscaleId + logZscaleId scaleId = logYscaleId + logXscaleId + logZscaleId
scales = ['linear', 'xlog', 'ylog', 'xylog', 'zlog', 'xzlog', 'yzlog', 'xyzlog'] scales = ['linear', 'xlog', 'ylog', 'xylog',
'zlog', 'xzlog', 'yzlog', 'xyzlog']
return scales[scaleId] return scales[scaleId]
def transformdata(x, f, plotflag): def transformdata(x, f, plotflag):
transFlag = np.mod(plotflag // 10, 10) transFlag = np.mod(plotflag // 10, 10)
if transFlag == 0: if transFlag == 0:
@ -407,11 +429,14 @@ def transformdata(x, f, plotflag):
data = -np.log1p(-cumtrapz(f, x)) data = -np.log1p(-cumtrapz(f, x))
else: else:
if any(f < 0): if any(f < 0):
raise ValueError('Invalid plotflag: Data or dataCI is negative, but must be positive') raise ValueError(
'Invalid plotflag: Data or dataCI is negative, but must be positive')
data = 10 * np.log10(f) data = 10 * np.log10(f)
return data return data
class Plotter_2d(Plotter_1d): class Plotter_2d(Plotter_1d):
""" """
Parameters Parameters
---------- ----------
@ -432,6 +457,7 @@ class Plotter_2d(Plotter_1d):
h1 = plot2d(axis, wdata, plotflag, *args, **kwds) h1 = plot2d(axis, wdata, plotflag, *args, **kwds)
return h1 return h1
def plot2d(axis, wdata, plotflag, *args, **kwds): def plot2d(axis, wdata, plotflag, *args, **kwds):
f = wdata f = wdata
if isinstance(wdata.args, (list, tuple)): if isinstance(wdata.args, (list, tuple)):
@ -440,7 +466,8 @@ def plot2d(axis, wdata, plotflag, *args, **kwds):
args1 = tuple((wdata.args,)) + (wdata.data,) + args args1 = tuple((wdata.args,)) + (wdata.data,) + args
if plotflag in (1, 6, 7, 8, 9): if plotflag in (1, 6, 7, 8, 9):
isPL = False isPL = False
if hasattr(f, 'clevels') and len(f.clevels) > 0: # check if contour levels is submitted # check if contour levels is submitted
if hasattr(f, 'clevels') and len(f.clevels) > 0:
CL = f.clevels CL = f.clevels
isPL = hasattr(f, 'plevels') and f.plevels is not None isPL = hasattr(f, 'plevels') and f.plevels is not None
if isPL: if isPL:
@ -448,11 +475,12 @@ def plot2d(axis, wdata, plotflag, *args, **kwds):
else: else:
dmax = np.max(f.data) dmax = np.max(f.data)
dmin = np.min(f.data) dmin = np.min(f.data)
CL = dmax - (dmax - dmin) * (1 - np.r_[0.01, 0.025, 0.05, 0.1, 0.2, 0.4, 0.5, 0.75]) CL = dmax - (dmax - dmin) * \
(1 - np.r_[0.01, 0.025, 0.05, 0.1, 0.2, 0.4, 0.5, 0.75])
clvec = np.sort(CL) clvec = np.sort(CL)
if plotflag in [1, 8, 9]: if plotflag in [1, 8, 9]:
h = axis.contour(*args1, levels=CL, **kwds); h = axis.contour(*args1, levels=CL, **kwds)
# else: # else:
# [cs hcs] = contour3(f.x{:},f.f,CL,sym); # [cs hcs] = contour3(f.x{:},f.f,CL,sym);
@ -460,10 +488,12 @@ def plot2d(axis, wdata, plotflag, *args, **kwds):
ncl = len(clvec) ncl = len(clvec)
if ncl > 12: if ncl > 12:
ncl = 12 ncl = 12
warnings.warn('Only the first 12 levels will be listed in table.') warnings.warn(
'Only the first 12 levels will be listed in table.')
clvals = PL[:ncl] if isPL else clvec[:ncl] clvals = PL[:ncl] if isPL else clvec[:ncl]
unused_axcl = cltext(clvals, percent=isPL) # print contour level text # print contour level text
unused_axcl = cltext(clvals, percent=isPL)
elif any(plotflag == [7, 9]): elif any(plotflag == [7, 9]):
axis.clabel(h) axis.clabel(h)
else: else:
@ -471,7 +501,8 @@ def plot2d(axis, wdata, plotflag, *args, **kwds):
elif plotflag == 2: elif plotflag == 2:
h = axis.mesh(*args1, **kwds) h = axis.mesh(*args1, **kwds)
elif plotflag == 3: elif plotflag == 3:
h = axis.surf(*args1, **kwds) #shading interp % flat, faceted % surfc # shading interp % flat, faceted % surfc
h = axis.surf(*args1, **kwds)
elif plotflag == 4: elif plotflag == 4:
h = axis.waterfall(*args1, **kwds) h = axis.waterfall(*args1, **kwds)
elif plotflag == 5: elif plotflag == 5:
@ -487,6 +518,7 @@ def plot2d(axis, wdata, plotflag, *args, **kwds):
# end # end
# pass # pass
def test_eval_points(): def test_eval_points():
plotbackend.ioff() plotbackend.ioff()
x = np.linspace(0, 5, 21) x = np.linspace(0, 5, 21)
@ -496,14 +528,19 @@ def test_eval_points():
d.plot('.') d.plot('.')
di.plot() di.plot()
di.show() di.show()
def test_integrate(): def test_integrate():
x = np.linspace(0, 5, 60) x = np.linspace(0, 5, 60)
d = PlotData(np.sin(x), x) d = PlotData(np.sin(x), x)
print(d.integrate(0, np.pi / 2, method='simps')) print(d.integrate(0, np.pi / 2, method='simps'))
def test_docstrings(): def test_docstrings():
import doctest import doctest
doctest.testmod() doctest.testmod()
def main(): def main():
pass pass

@ -9,6 +9,7 @@ from dispersion_relation import w2k, k2w #@UnusedImport
__all__ = ['w2k', 'k2w', 'sensor_typeid', 'sensor_type', 'TransferFunction'] __all__ = ['w2k', 'k2w', 'sensor_typeid', 'sensor_type', 'TransferFunction']
def hyperbolic_ratio(a, b, sa, sb): def hyperbolic_ratio(a, b, sa, sb):
''' '''
Return ratio of hyperbolic functions Return ratio of hyperbolic functions
@ -19,7 +20,8 @@ def hyperbolic_ratio(a, b, sa, sb):
a, b : array-like a, b : array-like
arguments vectors of the same size arguments vectors of the same size
sa, sb : scalar integers sa, sb : scalar integers
defining the hyperbolic function used, i.e., f(x,1)=cosh(x), f(x,-1)=sinh(x) defining the hyperbolic function used, i.e.,
f(x,1)=cosh(x), f(x,-1)=sinh(x)
Returns Returns
------- -------
@ -68,7 +70,9 @@ def hyperbolic_ratio(a, b, sa, sb):
ind = np.flatnonzero(den != 0) ind = np.flatnonzero(den != 0)
iden.flat[ind] = 1.0 / den[ind] iden.flat[ind] = 1.0 / den[ind]
val = np.where(num == den, 1, num * iden) val = np.where(num == den, 1, num * iden)
return signRatio * exp(ak - bk) * val #((sak+exp(-2*ak))/(sbk+exp(-2*bk))) # ((sak+exp(-2*ak))/(sbk+exp(-2*bk)))
return signRatio * exp(ak - bk) * val
def sensor_typeid(*sensortypes): def sensor_typeid(*sensortypes):
''' Return ID for sensortype name ''' Return ID for sensortype name
@ -97,9 +101,9 @@ def sensor_typeid(*sensortypes):
12, 'U_t' : Water particle acceleration in x-direction 12, 'U_t' : Water particle acceleration in x-direction
13, 'V_t' : Water particle acceleration in y-direction 13, 'V_t' : Water particle acceleration in y-direction
14, 'W_t' : Water particle acceleration in z-direction 14, 'W_t' : Water particle acceleration in z-direction
15, 'X_p' : Water particle displacement in x-direction from its mean position 15, 'X_p' : Water particle displacement in x-direction from mean pos.
16, 'Y_p' : Water particle displacement in y-direction from its mean position 16, 'Y_p' : Water particle displacement in y-direction from mean pos.
17, 'Z_p' : Water particle displacement in z-direction from its mean position 17, 'Z_p' : Water particle displacement in z-direction from mean pos.
Example: Example:
>>> sensor_typeid('W','v') >>> sensor_typeid('W','v')
@ -121,7 +125,6 @@ def sensor_typeid(*sensortypes):
raise ValueError('Input must be a string!') raise ValueError('Input must be a string!')
def sensor_type(*sensorids): def sensor_type(*sensorids):
''' '''
Return sensortype name Return sensortype name
@ -149,9 +152,9 @@ def sensor_type(*sensorids):
12, 'U_t' : Water particle acceleration in x-direction 12, 'U_t' : Water particle acceleration in x-direction
13, 'V_t' : Water particle acceleration in y-direction 13, 'V_t' : Water particle acceleration in y-direction
14, 'W_t' : Water particle acceleration in z-direction 14, 'W_t' : Water particle acceleration in z-direction
15, 'X_p' : Water particle displacement in x-direction from its mean position 15, 'X_p' : Water particle displacement in x-direction from mean pos.
16, 'Y_p' : Water particle displacement in y-direction from its mean position 16, 'Y_p' : Water particle displacement in y-direction from mean pos.
17, 'Z_p' : Water particle displacement in z-direction from its mean position 17, 'Z_p' : Water particle displacement in z-direction from mean pos.
Example: Example:
>>> sensor_type(range(3)) >>> sensor_type(range(3))
@ -162,8 +165,8 @@ def sensor_type(*sensorids):
sensor_typeid, tran sensor_typeid, tran
''' '''
valid_names = ('n', 'n_t', 'n_tt', 'n_x', 'n_y', 'n_xx', 'n_yy', 'n_xy', valid_names = ('n', 'n_t', 'n_tt', 'n_x', 'n_y', 'n_xx', 'n_yy', 'n_xy',
'p', 'u', 'v', 'w', 'u_t', 'v_t', 'w_t', 'x_p', 'y_p', 'z_p', 'p', 'u', 'v', 'w', 'u_t', 'v_t', 'w_t', 'x_p', 'y_p',
nan) 'z_p', nan)
ids = atleast_1d(*sensorids) ids = atleast_1d(*sensorids)
if isinstance(ids, list): if isinstance(ids, list):
ids = hstack(ids) ids = hstack(ids)
@ -171,7 +174,9 @@ def sensor_type(*sensorids):
ids = where(((ids < 0) | (n < ids)), n, ids) ids = where(((ids < 0) | (n < ids)), n, ids)
return tuple(valid_names[i] for i in ids) return tuple(valid_names[i] for i in ids)
class TransferFunction(object): class TransferFunction(object):
''' '''
Class for computing transfer functions based on linear wave theory Class for computing transfer functions based on linear wave theory
of the system with input surface elevation, of the system with input surface elevation,
@ -189,8 +194,8 @@ class TransferFunction(object):
( theta = 0 -> positive x axis theta = pi/2 -> positive y axis) ( theta = 0 -> positive x axis theta = pi/2 -> positive y axis)
Member variables Member variables
---------------- ----------------
pos : [x,y,z] pos : [x,y,z], (default [0,0,0])
vector giving coordinate position relative to [x0 y0 z0] (default [0,0,0]) vector giving coordinate position relative to [x0 y0 z0]
sensortype = string sensortype = string
defining the sensortype or transfer function in output. defining the sensortype or transfer function in output.
0, 'n' : Surface elevation (n=Eta) (default) 0, 'n' : Surface elevation (n=Eta) (default)
@ -208,17 +213,17 @@ class TransferFunction(object):
12, 'U_t' : Water particle acceleration in x-direction 12, 'U_t' : Water particle acceleration in x-direction
13, 'V_t' : Water particle acceleration in y-direction 13, 'V_t' : Water particle acceleration in y-direction
14, 'W_t' : Water particle acceleration in z-direction 14, 'W_t' : Water particle acceleration in z-direction
15, 'X_p' : Water particle displacement in x-direction from its mean position 15, 'X_p' : Water particle displacement in x-direction from mean pos.
16, 'Y_p' : Water particle displacement in y-direction from its mean position 16, 'Y_p' : Water particle displacement in y-direction from mean pos.
17, 'Z_p' : Water particle displacement in z-direction from its mean position 17, 'Z_p' : Water particle displacement in z-direction from mean pos.
h : real scalar h : real scalar
water depth (default inf) water depth (default inf)
g : real scalar g : real scalar
acceleration of gravity (default 9.81 m/s**2) acceleration of gravity (default 9.81 m/s**2)
rho : real scalar rho : real scalar
water density (default 1028 kg/m**3) water density (default 1028 kg/m**3)
bet : 1 or -1 bet : 1 or -1 (default 1)
1, theta given in terms of directions toward which waves travel (default) 1, theta given in terms of directions toward which waves travel
-1, theta given in terms of directions from which waves come -1, theta given in terms of directions from which waves come
igam : 1,2 or 3 igam : 1,2 or 3
1, if z is measured positive upward from mean water level (default) 1, if z is measured positive upward from mean water level (default)
@ -243,9 +248,9 @@ class TransferFunction(object):
... tf.sensortype = stype ... tf.sensortype = stype
... Hw, Gwt = tf.tran(w0,th0) ... Hw, Gwt = tf.tran(w0,th0)
... vals.append((Hw*Gwt*eta0).real.ravel()) ... vals.append((Hw*Gwt*eta0).real.ravel())
... vals[i]
... fh = plt.plot(t, vals[i]) fh = plt.plot(t, vals[i])
>>> plt.show() plt.show()
See also See also
@ -258,10 +263,12 @@ class TransferFunction(object):
"On the measurement of directional spectra", "On the measurement of directional spectra",
Applied Ocean Research, Vol 16, pp 283-294 Applied Ocean Research, Vol 16, pp 283-294
''' '''
def __init__(self, pos=(0, 0, 0), sensortype='n', h=inf, g=9.81, rho=1028, def __init__(self, pos=(0, 0, 0), sensortype='n', h=inf, g=9.81, rho=1028,
bet=1, igam=1, thetax=90, thetay=0): bet=1, igam=1, thetax=90, thetay=0):
self.pos = pos self.pos = pos
self.sensortype = sensortype if isinstance(sensortype, str) else sensor_type(sensortype) self.sensortype = sensortype if isinstance(
sensortype, str) else sensor_type(sensortype)
self.h = h self.h = h
self.g = g self.g = g
self.rho = rho self.rho = rho
@ -299,8 +306,8 @@ class TransferFunction(object):
vector of directions in radians Length Nt (default 0) vector of directions in radians Length Nt (default 0)
( theta = 0 -> positive x axis theta = pi/2 -> positive y axis) ( theta = 0 -> positive x axis theta = pi/2 -> positive y axis)
kw : array-like kw : array-like
vector of wave numbers corresponding to angular frequencies, w. Length Nf vector of wave numbers corresponding to angular frequencies, w.
(default calculated with w2k) Length Nf (default calculated with w2k)
Returns Returns
------- -------
@ -311,7 +318,8 @@ class TransferFunction(object):
w (columns) and theta (rows) size Nt x Nf w (columns) and theta (rows) size Nt x Nf
''' '''
if kw is None: if kw is None:
kw, unusedkw2 = w2k(w, 0, self.h) #wave number as function of angular frequency # wave number as function of angular frequency
kw, unusedkw2 = w2k(w, 0, self.h)
w, theta, kw = np.atleast_1d(w, theta, kw) w, theta, kw = np.atleast_1d(w, theta, kw)
# make sure they have the correct orientation # make sure they have the correct orientation
@ -327,7 +335,7 @@ class TransferFunction(object):
ind = np.flatnonzero(1 - np.isfinite(Hw)) ind = np.flatnonzero(1 - np.isfinite(Hw))
Hw.flat[ind] = 0 Hw.flat[ind] = 0
sgn = np.sign(Hw); sgn = np.sign(Hw)
k0 = np.flatnonzero(sgn < 0) k0 = np.flatnonzero(sgn < 0)
if len(k0): # make sure Hw>=0 ie. transfer negative signs to Gwt if len(k0): # make sure Hw>=0 ie. transfer negative signs to Gwt
Gwt[:, k0] = -Gwt[:, k0] Gwt[:, k0] = -Gwt[:, k0]
@ -335,11 +343,13 @@ class TransferFunction(object):
if self.igam == 2: if self.igam == 2:
# pab 09 Oct.2002: bug fix # pab 09 Oct.2002: bug fix
# Changing igam by 2 should affect the directional result in the same way that changing eta by -eta! # Changing igam by 2 should affect the directional result in the
# same way that changing eta by -eta!
Gwt = -Gwt Gwt = -Gwt
return Hw, Gwt return Hw, Gwt
__call__ = tran __call__ = tran
#---Private member methods #---Private member methods
def _get_ee_cthxy(self, theta, kw): def _get_ee_cthxy(self, theta, kw):
# convert from angle in degrees to radians # convert from angle in degrees to radians
bet = self.bet bet = self.bet
@ -352,16 +362,19 @@ class TransferFunction(object):
# Compute location complex exponential # Compute location complex exponential
x, y, unused_z = list(self.pos) x, y, unused_z = list(self.pos)
ee = exp((1j * (x * cthx + y * cthy)) * kw) # exp(i*k(w)*(x*cos(theta)+y*sin(theta)) size Nt X Nf # exp(i*k(w)*(x*cos(theta)+y*sin(theta)) size Nt X Nf
ee = exp((1j * (x * cthx + y * cthy)) * kw)
return ee, cthx, cthy return ee, cthx, cthy
def _get_zk(self, kw): def _get_zk(self, kw):
h = self.h h = self.h
z = self.pos[2] z = self.pos[2]
if self.igam == 1: if self.igam == 1:
zk = kw * (h + z) # z measured positive upward from mean water level (default) # z measured positive upward from mean water level (default)
zk = kw * (h + z)
elif self.igam == 2: elif self.igam == 2:
zk = kw * (h - z) # z measured positive downward from mean water level # z measured positive downward from mean water level
zk = kw * (h - z)
else: else:
zk = kw * z # z measured positive upward from sea floor zk = kw * z # z measured positive upward from sea floor
return zk return zk
@ -377,7 +390,8 @@ class TransferFunction(object):
def _n_t(self, w, theta, kw): def _n_t(self, w, theta, kw):
''' n_t = Eta_t ''' ''' n_t = Eta_t '''
ee, unused_cthx, unused_cthy = self._get_ee_cthxy(theta, kw) ee, unused_cthx, unused_cthy = self._get_ee_cthxy(theta, kw)
return w, -1j * ee; return w, -1j * ee
def _n_tt(self, w, theta, kw): def _n_tt(self, w, theta, kw):
'''n_tt = Eta_tt''' '''n_tt = Eta_tt'''
ee, unused_cthx, unused_cthy = self._get_ee_cthxy(theta, kw) ee, unused_cthx, unused_cthy = self._get_ee_cthxy(theta, kw)
@ -388,6 +402,7 @@ class TransferFunction(object):
''' n_x = Eta_x = x-slope''' ''' n_x = Eta_x = x-slope'''
ee, cthx, unused_cthy = self._get_ee_cthxy(theta, kw) ee, cthx, unused_cthy = self._get_ee_cthxy(theta, kw)
return kw, 1j * cthx * ee return kw, 1j * cthx * ee
def _n_y(self, w, theta, kw): def _n_y(self, w, theta, kw):
''' n_y = Eta_y = y-slope''' ''' n_y = Eta_y = y-slope'''
ee, unused_cthx, cthy = self._get_ee_cthxy(theta, kw) ee, unused_cthx, cthy = self._get_ee_cthxy(theta, kw)
@ -398,10 +413,12 @@ class TransferFunction(object):
''' n_xx = Eta_xx = Surface curvature (x-dir)''' ''' n_xx = Eta_xx = Surface curvature (x-dir)'''
ee, cthx, unused_cthy = self._get_ee_cthxy(theta, kw) ee, cthx, unused_cthy = self._get_ee_cthxy(theta, kw)
return kw ** 2, -(cthx ** 2) * ee return kw ** 2, -(cthx ** 2) * ee
def _n_yy(self, w, theta, kw): def _n_yy(self, w, theta, kw):
''' n_yy = Eta_yy = Surface curvature (y-dir)''' ''' n_yy = Eta_yy = Surface curvature (y-dir)'''
ee, unused_cthx, cthy = self._get_ee_cthxy(theta, kw) ee, unused_cthx, cthy = self._get_ee_cthxy(theta, kw)
return kw ** 2, -cthy ** 2 * ee return kw ** 2, -cthy ** 2 * ee
def _n_xy(self, w, theta, kw): def _n_xy(self, w, theta, kw):
''' n_xy = Eta_xy = Surface curvature (xy-dir)''' ''' n_xy = Eta_xy = Surface curvature (xy-dir)'''
ee, cthx, cthy = self._get_ee_cthxy(theta, kw) ee, cthx, cthy = self._get_ee_cthxy(theta, kw)
@ -413,7 +430,8 @@ class TransferFunction(object):
ee, unused_cthx, unused_cthy = self._get_ee_cthxy(theta, kw) ee, unused_cthx, unused_cthy = self._get_ee_cthxy(theta, kw)
hk = kw * self.h hk = kw * self.h
zk = self._get_zk(kw) zk = self._get_zk(kw)
return self.rho * self.g * hyperbolic_ratio(zk, hk, 1, 1), ee #hyperbolic_ratio = cosh(zk)/cosh(hk) # hyperbolic_ratio = cosh(zk)/cosh(hk)
return self.rho * self.g * hyperbolic_ratio(zk, hk, 1, 1), ee
#---- Water particle velocities --- #---- Water particle velocities ---
def _u(self, w, theta, kw): def _u(self, w, theta, kw):
@ -421,19 +439,24 @@ class TransferFunction(object):
ee, cthx, unused_cthy = self._get_ee_cthxy(theta, kw) ee, cthx, unused_cthy = self._get_ee_cthxy(theta, kw)
hk = kw * self.h hk = kw * self.h
zk = self._get_zk(kw) zk = self._get_zk(kw)
return w * hyperbolic_ratio(zk, hk, 1, -1), cthx * ee# w*cosh(zk)/sinh(hk), cos(theta)*ee # w*cosh(zk)/sinh(hk), cos(theta)*ee
return w * hyperbolic_ratio(zk, hk, 1, -1), cthx * ee
def _v(self, w, theta, kw): def _v(self, w, theta, kw):
'''V = y-velocity''' '''V = y-velocity'''
ee, unused_cthx, cthy = self._get_ee_cthxy(theta, kw) ee, unused_cthx, cthy = self._get_ee_cthxy(theta, kw)
hk = kw * self.h hk = kw * self.h
zk = self._get_zk(kw) zk = self._get_zk(kw)
return w * hyperbolic_ratio(zk, hk, 1, -1), cthy * ee # w*cosh(zk)/sinh(hk), sin(theta)*ee # w*cosh(zk)/sinh(hk), sin(theta)*ee
return w * hyperbolic_ratio(zk, hk, 1, -1), cthy * ee
def _w(self, w, theta, kw): def _w(self, w, theta, kw):
''' W = z-velocity''' ''' W = z-velocity'''
ee, unused_cthx, unused_cthy = self._get_ee_cthxy(theta, kw) ee, unused_cthx, unused_cthy = self._get_ee_cthxy(theta, kw)
hk = kw * self.h hk = kw * self.h
zk = self._get_zk(kw) zk = self._get_zk(kw)
return w * hyperbolic_ratio(zk, hk, -1, -1), -1j * ee # w*sinh(zk)/sinh(hk), -? # w*sinh(zk)/sinh(hk), -?
return w * hyperbolic_ratio(zk, hk, -1, -1), -1j * ee
#---- Water particle acceleration --- #---- Water particle acceleration ---
def _u_t(self, w, theta, kw): def _u_t(self, w, theta, kw):
@ -441,20 +464,24 @@ class TransferFunction(object):
ee, cthx, unused_cthy = self._get_ee_cthxy(theta, kw) ee, cthx, unused_cthy = self._get_ee_cthxy(theta, kw)
hk = kw * self.h hk = kw * self.h
zk = self._get_zk(kw) zk = self._get_zk(kw)
return (w ** 2) * hyperbolic_ratio(zk, hk, 1, -1), -1j * cthx * ee # w^2*cosh(zk)/sinh(hk), ? # w^2*cosh(zk)/sinh(hk), ?
return (w ** 2) * hyperbolic_ratio(zk, hk, 1, -1), -1j * cthx * ee
def _v_t(self, w, theta, kw): def _v_t(self, w, theta, kw):
''' V_t = y-acceleration''' ''' V_t = y-acceleration'''
ee, unused_cthx, cthy = self._get_ee_cthxy(theta, kw) ee, unused_cthx, cthy = self._get_ee_cthxy(theta, kw)
hk = kw * self.h hk = kw * self.h
zk = self._get_zk(kw) zk = self._get_zk(kw)
return (w ** 2) * hyperbolic_ratio(zk, hk, 1, -1), -1j * cthy * ee # w^2*cosh(zk)/sinh(hk), ? # w^2*cosh(zk)/sinh(hk), ?
return (w ** 2) * hyperbolic_ratio(zk, hk, 1, -1), -1j * cthy * ee
def _w_t(self, w, theta, kw): def _w_t(self, w, theta, kw):
''' W_t = z-acceleration''' ''' W_t = z-acceleration'''
ee, unused_cthx, unused_cthy = self._get_ee_cthxy(theta, kw) ee, unused_cthx, unused_cthy = self._get_ee_cthxy(theta, kw)
hk = kw * self.h hk = kw * self.h
zk = self._get_zk(kw) zk = self._get_zk(kw)
return (w ** 2) * hyperbolic_ratio(zk, hk, -1, -1), -ee # w*sinh(zk)/sinh(hk), ? # w*sinh(zk)/sinh(hk), ?
return (w ** 2) * hyperbolic_ratio(zk, hk, -1, -1), -ee
#---- Water particle displacement --- #---- Water particle displacement ---
def _x_p(self, w, theta, kw): def _x_p(self, w, theta, kw):
@ -462,13 +489,17 @@ class TransferFunction(object):
ee, cthx, unused_cthy = self._get_ee_cthxy(theta, kw) ee, cthx, unused_cthy = self._get_ee_cthxy(theta, kw)
hk = kw * self.h hk = kw * self.h
zk = self._get_zk(kw) zk = self._get_zk(kw)
return hyperbolic_ratio(zk, hk, 1, -1), 1j * cthx * ee # cosh(zk)./sinh(hk), ? # cosh(zk)./sinh(hk), ?
return hyperbolic_ratio(zk, hk, 1, -1), 1j * cthx * ee
def _y_p(self, w, theta, kw): def _y_p(self, w, theta, kw):
''' Y_p = y-displacement''' ''' Y_p = y-displacement'''
ee, unused_cthx, cthy = self._get_ee_cthxy(theta, kw) ee, unused_cthx, cthy = self._get_ee_cthxy(theta, kw)
hk = kw * self.h hk = kw * self.h
zk = self._get_zk(kw) zk = self._get_zk(kw)
return hyperbolic_ratio(zk, hk, 1, -1), 1j * cthy * ee # cosh(zk)./sinh(hk), ? # cosh(zk)./sinh(hk), ?
return hyperbolic_ratio(zk, hk, 1, -1), 1j * cthy * ee
def _z_p(self, w, theta, kw): def _z_p(self, w, theta, kw):
''' Z_p = z-displacement''' ''' Z_p = z-displacement'''
ee, unused_cthx, unused_cthy = self._get_ee_cthxy(theta, kw) ee, unused_cthx, unused_cthy = self._get_ee_cthxy(theta, kw)
@ -531,7 +562,7 @@ class TransferFunction(object):
# ''' # '''
# #
# #
# # Assume seastate with jonswap spectrum: # Assume seastate with jonswap spectrum:
# #
# Tp = 4 * np.sqrt(Hm0) # Tp = 4 * np.sqrt(Hm0)
# gam = jonswap_peakfact(Hm0, Tp) # gam = jonswap_peakfact(Hm0, Tp)
@ -542,13 +573,13 @@ class TransferFunction(object):
# hk = kw * h # hk = kw * h
# zk1 = kw * z # zk1 = kw * z
# zk = hk + zk1 # z measured positive upward from mean water level (default) # zk = hk + zk1 # z measured positive upward from mean water level (default)
# #zk = hk-zk1; % z measured positive downward from mean water level # zk = hk-zk1; % z measured positive downward from mean water level
# #zk1 = -zk1; # zk1 = -zk1;
# #zk = zk1; % z measured positive upward from sea floor # zk = zk1; % z measured positive upward from sea floor
# #
# # cosh(zk)/cosh(hk) approx exp(zk) for large h # cosh(zk)/cosh(hk) approx exp(zk) for large h
# # hyperbolic_ratio(zk,hk,1,1) = cosh(zk)/cosh(hk) # hyperbolic_ratio(zk,hk,1,1) = cosh(zk)/cosh(hk)
# # pr = np.where(np.pi < hk, np.exp(zk1), hyperbolic_ratio(zk, hk, 1, 1)) # pr = np.where(np.pi < hk, np.exp(zk1), hyperbolic_ratio(zk, hk, 1, 1))
# pr = hyperbolic_ratio(zk, hk, 1, 1) # pr = hyperbolic_ratio(zk, hk, 1, 1)
# pressure = (rho * g * Hm0 / 2) * pr # pressure = (rho * g * Hm0 / 2) * pr
# #
@ -559,7 +590,14 @@ class TransferFunction(object):
# #
# return pressure # return pressure
def test_docstrings():
import doctest
print('Testing docstrings in %s' % __file__)
doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
def main(): def main():
sensor_type(range(21)) sensor_type(range(21))
if __name__ == '__main__': if __name__ == '__main__':
pass test_docstrings()

@ -6,11 +6,13 @@ w2k - Translates from frequency to wave number
""" """
import warnings import warnings
#import numpy as np #import numpy as np
from numpy import (atleast_1d, sqrt, ones_like, zeros_like, arctan2, where, tanh, any, #@UnresolvedImport from numpy import (atleast_1d, sqrt, ones_like, zeros_like, arctan2, where,
sin, cos, sign, inf, flatnonzero, finfo, cosh, abs) #@UnresolvedImport tanh, any, sin, cos, sign, inf,
flatnonzero, finfo, cosh, abs)
__all__ = ['k2w', 'w2k'] __all__ = ['k2w', 'w2k']
def k2w(k1, k2=0e0, h=inf, g=9.81, u1=0e0, u2=0e0): def k2w(k1, k2=0e0, h=inf, g=9.81, u1=0e0, u2=0e0):
''' Translates from wave number to frequency ''' Translates from wave number to frequency
using the dispersion relation using the dispersion relation
@ -53,7 +55,7 @@ def k2w(k1, k2=0e0, h=inf, g=9.81, u1=0e0, u2=0e0):
Example Example
------- -------
>>> from numpy import arange >>> from numpy import arange
>>> import wafo.spectrum.dispersion_relation as wsd >>> import wafo.wave_theory.dispersion_relation as wsd
>>> wsd.k2w(arange(0.01,.5,0.2))[0] >>> wsd.k2w(arange(0.01,.5,0.2))[0]
array([ 0.3132092 , 1.43530485, 2.00551739]) array([ 0.3132092 , 1.43530485, 2.00551739])
>>> wsd.k2w(arange(0.01,.5,0.2),h=20)[0] >>> wsd.k2w(arange(0.01,.5,0.2),h=20)[0]
@ -84,6 +86,7 @@ def k2w(k1, k2=0e0, h=inf, g=9.81, u1=0e0, u2=0e0):
return w, theta return w, theta
def w2k(w, theta=0.0, h=inf, g=9.81, count_limit=100): def w2k(w, theta=0.0, h=inf, g=9.81, count_limit=100):
''' '''
Translates from frequency to wave number Translates from frequency to wave number
@ -107,7 +110,8 @@ def w2k(w, theta=0.0, h=inf, g=9.81, count_limit=100):
Description Description
----------- -----------
Uses Newton Raphson method to find the wave number k in the dispersion relation Uses Newton Raphson method to find the wave number k in the dispersion
relation
w**2= g*k*tanh(k*h). w**2= g*k*tanh(k*h).
The solution k(w) => k1 = k(w)*cos(theta) The solution k(w) => k1 = k(w)*cos(theta)
k2 = k(w)*sin(theta) k2 = k(w)*sin(theta)
@ -118,7 +122,7 @@ def w2k(w, theta=0.0, h=inf, g=9.81, count_limit=100):
Example Example
------- -------
>>> import pylab as plb >>> import pylab as plb
>>> import wafo.spectrum.dispersion_relation as wsd >>> import wafo.wave_theory.dispersion_relation as wsd
>>> w = plb.linspace(0,3); >>> w = plb.linspace(0,3);
>>> h = plb.plot(w,w2k(w)[0]) >>> h = plb.plot(w,w2k(w)[0])
>>> wsd.w2k(range(4))[0] >>> wsd.w2k(range(4))[0]
@ -143,13 +147,9 @@ def w2k(w, theta=0.0, h=inf, g=9.81, count_limit=100):
k1 = k * cos(th) k1 = k * cos(th)
return k1, k2 return k1, k2
if gi.size > 1: if gi.size > 1:
txt0 = ''' raise ValueError('Finite depth in combination with 3D normalization' +
Finite depth in combination with 3D normalization (len(g)=2) is not implemented yet. ' (len(g)=2) is not implemented yet.')
'''
raise ValueError(txt0)
find = flatnonzero find = flatnonzero
eps = finfo(float).eps eps = finfo(float).eps
@ -171,7 +171,8 @@ def w2k(w, theta=0.0, h=inf, g=9.81, count_limit=100):
while (ix.size > 0 and count < count_limit): while (ix.size > 0 and count < count_limit):
ki = k[ix] ki = k[ix]
kh = ki * hi[ix] kh = ki * hi[ix]
hn[ix] = (ki*tanh(kh)-wi[ix]**2.0/gi)/(tanh(kh)+kh/(cosh(kh)**2.0)) hn[ix] = (ki * tanh(kh) - wi[ix] ** 2.0 / gi) / \
(tanh(kh) + kh / (cosh(kh) ** 2.0))
knew = ki - hn[ix] knew = ki - hn[ix]
# Make sure that the current guess is not zero. # Make sure that the current guess is not zero.
# When Newton's Method suggests steps that lead to zero guesses # When Newton's Method suggests steps that lead to zero guesses
@ -182,15 +183,15 @@ def w2k(w, theta=0.0, h=inf, g=9.81, count_limit=100):
hn[ix[ksmall]] = ki[ksmall] - knew[ksmall] hn[ix[ksmall]] = ki[ksmall] - knew[ksmall]
k[ix] = knew k[ix] = knew
# disp(['Iteration ',num2str(count),' Number of points left: ' num2str(length(ix)) ]), # disp(['Iteration ',num2str(count),' Number of points left: '
# num2str(length(ix)) ]),
ix = find((abs(hn) > sqrt(eps) * abs(k)) * abs(hn) > sqrt(eps)) ix = find((abs(hn) > sqrt(eps) * abs(k)) * abs(hn) > sqrt(eps))
count += 1 count += 1
if count == count_limit: if count == count_limit:
txt1 = ''' W2K did not converge. warnings.warn('W2K did not converge. The maximum error in the ' +
The maximum error in the last step was: %13.8f''' % max(hn[ix]) 'last step was: %13.8f' % max(hn[ix]))
warnings.warn(txt1)
k.shape = oshape k.shape = oshape
@ -198,9 +199,12 @@ def w2k(w, theta=0.0, h=inf, g=9.81, count_limit=100):
k1 = k * cos(th) k1 = k * cos(th)
return k1, k2 return k1, k2
def main():
def test_docstrings():
import doctest import doctest
doctest.testmod() print('Testing docstrings in %s' % __file__)
doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
if __name__ == '__main__': if __name__ == '__main__':
main() test_docstrings()

@ -6,21 +6,25 @@ Created on 19. juli 2010
import numpy as np import numpy as np
from wafo.wave_theory.dispersion_relation import w2k, k2w # @UnusedImport from wafo.wave_theory.dispersion_relation import w2k, k2w # @UnusedImport
def test_k2w_infinite_water_depth(): def test_k2w_infinite_water_depth():
vals = k2w(np.arange(0.01, .5, 0.2))[0] vals = k2w(np.arange(0.01, .5, 0.2))[0]
true_vals = np.array([0.3132092, 1.43530485, 2.00551739]) true_vals = np.array([0.3132092, 1.43530485, 2.00551739])
assert((np.abs(vals - true_vals) < 1e-7).all()) assert((np.abs(vals - true_vals) < 1e-7).all())
def test_k2w_finite_water_depth(): def test_k2w_finite_water_depth():
vals = k2w(np.arange(0.01, .5, 0.2), h=20)[0] vals = k2w(np.arange(0.01, .5, 0.2), h=20)[0]
true_vals = np.array([0.13914927, 1.43498213, 2.00551724]) true_vals = np.array([0.13914927, 1.43498213, 2.00551724])
assert((np.abs(vals - true_vals) < 1e-7).all()) assert((np.abs(vals - true_vals) < 1e-7).all())
def test_w2k_infinite_water_depth(): def test_w2k_infinite_water_depth():
vals = w2k(range(4))[0] vals = w2k(range(4))[0]
true_vals = np.array([0., 0.1019368, 0.4077472, 0.91743119]) true_vals = np.array([0., 0.1019368, 0.4077472, 0.91743119])
assert((np.abs(vals - true_vals) < 1e-7).all()) assert((np.abs(vals - true_vals) < 1e-7).all())
def test_w2k_finite_water_depth(): def test_w2k_finite_water_depth():
vals = w2k(range(4), h=20)[0] vals = w2k(range(4), h=20)[0]
true_vals = np.array([0., 0.10503601, 0.40774726, 0.91743119]) true_vals = np.array([0., 0.10503601, 0.40774726, 0.91743119])

Loading…
Cancel
Save