pbrod 8 years ago
commit 71b1ca0a17

@ -1,3 +1,4 @@
from .kdetools import *
from .gridding import *
from .kernels import *
from .demo import *

@ -0,0 +1,305 @@
'''
Created on 2. jan. 2017
@author: pab
'''
from __future__ import absolute_import, division
import scipy.stats
import numpy as np
import warnings
from wafo.plotbackend import plotbackend as plt
from wafo.kdetools import Kernel, TKDE, KDE, KRegression, BKRegression
try:
from wafo import fig
except ImportError:
warnings.warn('fig import only supported on Windows')
__all__ = ['kde_demo1', 'kde_demo2', 'kde_demo3', 'kde_demo4', 'kde_demo5',
'kreg_demo1', ]
def kde_demo1():
"""KDEDEMO1 Demonstrate the smoothing parameter impact on KDE.
KDEDEMO1 shows the true density (dotted) compared to KDE based on 7
observations (solid) and their individual kernels (dashed) for 3
different values of the smoothing parameter, hs.
"""
st = scipy.stats
x = np.linspace(-4, 4, 101)
x0 = x / 2.0
data = np.random.normal(loc=0, scale=1.0, size=7)
kernel = Kernel('gauss')
hs = kernel.hns(data)
h_vec = [hs / 2, hs, 2 * hs]
for ix, h in enumerate(h_vec):
plt.figure(ix)
kde = KDE(data, hs=h, kernel=kernel)
f2 = kde(x, output='plot', title='h_s = {0:2.2f}'.format(float(h)),
ylab='Density')
f2.plot('k-')
plt.plot(x, st.norm.pdf(x, 0, 1), 'k:')
n = len(data)
plt.plot(data, np.zeros(data.shape), 'bx')
y = kernel(x0) / (n * h * kernel.norm_factor(d=1, n=n))
for i in range(n):
plt.plot(data[i] + x0 * h, y, 'b--')
plt.plot([data[i], data[i]], [0, np.max(y)], 'b')
plt.axis([min(x), max(x), 0, 0.5])
def kde_demo2():
'''Demonstrate the difference between transformation- and ordinary-KDE.
KDEDEMO2 shows that the transformation KDE is a better estimate for
Rayleigh distributed data around 0 than the ordinary KDE.
'''
st = scipy.stats
data = st.rayleigh.rvs(scale=1, size=300)
x = np.linspace(1.5e-2, 5, 55)
kde = KDE(data)
f = kde(output='plot', title='Ordinary KDE (hs={0:})'.format(kde.hs))
plt.figure(0)
f.plot()
plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')
# plotnorm((data).^(L2)) # gives a straight line => L2 = 0.5 reasonable
hs = Kernel('gauss').get_smoothing(data**0.5)
tkde = TKDE(data, hs=hs, L2=0.5)
ft = tkde(x, output='plot',
title='Transformation KDE (hs={0:})'.format(tkde.tkde.hs))
plt.figure(1)
ft.plot()
plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')
plt.figure(0)
def kde_demo3():
'''Demonstrate the difference between transformation and ordinary-KDE in 2D
KDEDEMO3 shows that the transformation KDE is a better estimate for
Rayleigh distributed data around 0 than the ordinary KDE.
'''
st = scipy.stats
data = st.rayleigh.rvs(scale=1, size=(2, 300))
# x = np.linspace(1.5e-3, 5, 55)
kde = KDE(data)
f = kde(output='plot', title='Ordinary KDE', plotflag=1)
plt.figure(0)
f.plot()
plt.plot(data[0], data[1], '.')
# plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable
hs = Kernel('gauss').get_smoothing(data**0.5)
tkde = TKDE(data, hs=hs, L2=0.5)
ft = tkde.eval_grid_fast(
output='plot', title='Transformation KDE', plotflag=1)
plt.figure(1)
ft.plot()
plt.plot(data[0], data[1], '.')
plt.figure(0)
def kde_demo4(N=50):
'''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
for 1D multimodal distributions
KDEDEMO4 shows that the improved Sheather-Jones plug-in smoothing is a
better compared to normal reference rules (in this case the hns)
'''
st = scipy.stats
data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(N,)),
st.norm.rvs(loc=-5, scale=1, size=(N,))))
# x = np.linspace(1.5e-3, 5, 55)
kde = KDE(data, kernel=Kernel('gauss', 'hns'))
f = kde(output='plot', title='Ordinary KDE', plotflag=1)
kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
f1 = kde1(output='plot', label='Ordinary KDE', plotflag=1)
plt.figure(0)
f.plot('r', label='hns={0}'.format(kde.hs))
# plt.figure(2)
f1.plot('b', label='hisj={0}'.format(kde1.hs))
x = np.linspace(-9, 9)
plt.plot(x, (st.norm.pdf(x, loc=-5, scale=1) +
st.norm.pdf(x, loc=5, scale=1)) / 2, 'k:',
label='True density')
plt.legend()
def kde_demo5(N=500):
'''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
for 2D multimodal distributions
KDEDEMO5 shows that the improved Sheather-Jones plug-in smoothing is better
compared to normal reference rules (in this case the hns)
'''
st = scipy.stats
data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(2, N,)),
st.norm.rvs(loc=-5, scale=1, size=(2, N,))))
kde = KDE(data, kernel=Kernel('gauss', 'hns'))
f = kde(output='plot', plotflag=1,
title='Ordinary KDE, hns={0:s}'.format(str(list(kde.hs))))
kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
f1 = kde1(output='plot', plotflag=1,
title='Ordinary KDE, hisj={0:s}'.format(str(list(kde1.hs))))
plt.figure(0)
plt.clf()
f.plot()
plt.plot(data[0], data[1], '.')
plt.figure(1)
plt.clf()
f1.plot()
plt.plot(data[0], data[1], '.')
def kreg_demo1(hs=None, fast=False, fun='hisj'):
"""Compare KRegression to KernelReg from statsmodels.nonparametric
"""
N = 100
# ei = np.random.normal(loc=0, scale=0.075, size=(N,))
ei = np.array([
-0.08508516, 0.10462496, 0.07694448, -0.03080661, 0.05777525,
0.06096313, -0.16572389, 0.01838912, -0.06251845, -0.09186784,
-0.04304887, -0.13365788, -0.0185279, -0.07289167, 0.02319097,
0.06887854, -0.08938374, -0.15181813, 0.03307712, 0.08523183,
-0.0378058, -0.06312874, 0.01485772, 0.06307944, -0.0632959,
0.18963205, 0.0369126, -0.01485447, 0.04037722, 0.0085057,
-0.06912903, 0.02073998, 0.1174351, 0.17599277, -0.06842139,
0.12587608, 0.07698113, -0.0032394, -0.12045792, -0.03132877,
0.05047314, 0.02013453, 0.04080741, 0.00158392, 0.10237899,
-0.09069682, 0.09242174, -0.15445323, 0.09190278, 0.07138498,
0.03002497, 0.02495252, 0.01286942, 0.06449978, 0.03031802,
0.11754861, -0.02322272, 0.00455867, -0.02132251, 0.09119446,
-0.03210086, -0.06509545, 0.07306443, 0.04330647, 0.078111,
-0.04146907, 0.05705476, 0.02492201, -0.03200572, -0.02859788,
-0.05893749, 0.00089538, 0.0432551, 0.04001474, 0.04888828,
-0.17708392, 0.16478644, 0.1171006, 0.11664846, 0.01410477,
-0.12458953, -0.11692081, 0.0413047, -0.09292439, -0.07042327,
0.14119701, -0.05114335, 0.04994696, -0.09520663, 0.04829406,
-0.01603065, -0.1933216, 0.19352763, 0.11819496, 0.04567619,
-0.08348306, 0.00812816, -0.00908206, 0.14528945, 0.02901065])
x = np.linspace(0, 1, N)
va_1 = 0.3 ** 2
va_2 = 0.7 ** 2
y0 = np.exp(-x ** 2 / (2 * va_1)) + 1.3*np.exp(-(x - 1) ** 2 / (2 * va_2))
y = y0 + ei
kernel = Kernel('gauss', fun=fun)
hopt = kernel.hisj(x)
kreg = KRegression(
x, y, p=0, hs=hs, kernel=kernel, xmin=-2 * hopt, xmax=1 + 2 * hopt)
if fast:
kreg.__call__ = kreg.eval_grid_fast
f = kreg(x, output='plot', title='Kernel regression', plotflag=1)
plt.figure(0)
f.plot(label='p=0')
kreg.p = 1
f1 = kreg(x, output='plot', title='Kernel regression', plotflag=1)
f1.plot(label='p=1')
# print(f1.data)
plt.plot(x, y, '.', label='data')
plt.plot(x, y0, 'k', label='True model')
from statsmodels.nonparametric.kernel_regression import KernelReg
kreg2 = KernelReg(y, x, ('c'))
y2 = kreg2.fit(x)
plt.plot(x, y2[0], 'm', label='statsmodel')
plt.legend()
plt.show()
print(kreg.tkde.tkde._inv_hs)
print(kreg.tkde.tkde.hs)
def _get_data(n=100, symmetric=False, loc1=1.1, scale1=0.6, scale2=1.0):
"""
Return test data for binomial regression demo.
"""
st = scipy.stats
dist = st.norm
norm1 = scale2 * (dist.pdf(-loc1, loc=-loc1, scale=scale1) +
dist.pdf(-loc1, loc=loc1, scale=scale1))
def fun1(x):
return ((dist.pdf(x, loc=-loc1, scale=scale1) +
dist.pdf(x, loc=loc1, scale=scale1)) / norm1).clip(max=1.0)
x = np.sort(6 * np.random.rand(n, 1) - 3, axis=0)
y = (fun1(x) > np.random.rand(n, 1)).ravel()
# y = (np.cos(x)>2*np.random.rand(n, 1)-1).ravel()
x = x.ravel()
if symmetric:
xi = np.hstack((x.ravel(), -x.ravel()))
yi = np.hstack((y, y))
i = np.argsort(xi)
x = xi[i]
y = yi[i]
return x, y, fun1
def check_bkregression():
"""
Check binomial regression
"""
plt.ion()
k = 0
for _i, n in enumerate([50, 100, 300, 600]):
x, y, fun1 = _get_data(n, symmetric=True, loc1=0.1,
scale1=0.6, scale2=0.75)
bkreg = BKRegression(x, y, a=0.05, b=0.05)
fbest = bkreg.prb_search_best(
hsfun='hste', alpha=0.05, color='g', label='Transit_D')
figk = plt.figure(k)
ax = figk.gca()
k += 1
# fbest.score.plot(axis=ax)
# axsize = ax.axis()
# ax.vlines(fbest.hs,axsize[2]+1,axsize[3])
# ax.set(yscale='log')
fbest.labels.title = 'N = {:d}'.format(n)
fbest.plot(axis=ax)
ax.plot(x, fun1(x), 'r')
ax.legend(frameon=False, markerscale=4)
# ax = plt.gca()
ax.set_yticklabels(ax.get_yticks() * 100.0)
ax.grid(True)
fig.tile(range(0, k))
plt.ioff()
plt.show('hold')
if __name__ == '__main__':
# kde_demo5()
# check_bkregression()
kreg_demo1(hs=0.04, fast=True)
plt.show('hold')

@ -21,18 +21,11 @@ from numpy import sqrt, atleast_2d, meshgrid
from numpy.fft import fftn, ifftn
from wafo.misc import nextpow2
from wafo.containers import PlotData
from wafo.plotbackend import plotbackend as plt
from wafo.testing import test_docstrings
from wafo.kdetools.kernels import iqrange, qlevels, Kernel
from wafo.kdetools.gridding import gridcount
try:
from wafo import fig
except ImportError:
warnings.warn('fig import only supported on Windows')
__all__ = ['TKDE', 'KDE', 'kde_demo1', 'kde_demo2', 'test_docstrings',
'KRegression', 'BKRegression']
__all__ = ['TKDE', 'KDE', 'test_docstrings', 'KRegression', 'BKRegression']
_TINY = np.finfo(float).machar.tiny
# _REALMIN = np.finfo(float).machar.xmin
@ -413,32 +406,11 @@ class TKDE(_KDE):
for i, v2 in enumerate(L2.tolist()):
factor = v2 * np.sign(v2) if v2 else 1
pdf *= np.where(v2 == 1, 1, points[i] ** (v2 - 1) * factor)
if (np.abs(np.diff(pdf)).max() > 10).any():
msg = ''' Numerical problems may have occured due to the power
transformation. Check the KDE for spurious spikes'''
warnings.warn(msg)
return pdf
def eval_grid_fast2(self, *args, **kwds):
"""Evaluate the estimated pdf on a grid.
Parameters
----------
arg_0,arg_1,... arg_d-1 : vectors
Alternatively, if no vectors is passed in then
arg_i = gauss2dat(linspace(dat2gauss(self.xmin[i]),
dat2gauss(self.xmax[i]), self.inc))
output : string optional
'value' if value output
'data' if object output
Returns
-------
values : array-like
The values evaluated at meshgrid(*args).
"""
return self.eval_grid_fun(self._eval_grid_fast, *args, **kwds)
_assert_warn((np.abs(np.diff(pdf)).max() < 10).all(), '''
Numerical problems may have occured due to the power transformation.
Check the KDE for spurious spikes''')
return pdf
def _interpolate(self, points, f, *args, **kwds):
ipoints = meshgrid(*args) # if self.d > 1 else args
@ -1179,284 +1151,5 @@ class BKRegression(object):
return prb_best
def kde_demo1():
"""KDEDEMO1 Demonstrate the smoothing parameter impact on KDE.
KDEDEMO1 shows the true density (dotted) compared to KDE based on 7
observations (solid) and their individual kernels (dashed) for 3
different values of the smoothing parameter, hs.
"""
st = scipy.stats
x = np.linspace(-4, 4, 101)
x0 = x / 2.0
data = np.random.normal(loc=0, scale=1.0, size=7)
kernel = Kernel('gauss')
hs = kernel.hns(data)
hVec = [hs / 2, hs, 2 * hs]
for ix, h in enumerate(hVec):
plt.figure(ix)
kde = KDE(data, hs=h, kernel=kernel)
f2 = kde(x, output='plot', title='h_s = {0:2.2f}'.format(float(h)),
ylab='Density')
f2.plot('k-')
plt.plot(x, st.norm.pdf(x, 0, 1), 'k:')
n = len(data)
plt.plot(data, np.zeros(data.shape), 'bx')
y = kernel(x0) / (n * h * kernel.norm_factor(d=1, n=n))
for i in range(n):
plt.plot(data[i] + x0 * h, y, 'b--')
plt.plot([data[i], data[i]], [0, np.max(y)], 'b')
plt.axis([min(x), max(x), 0, 0.5])
def kde_demo2():
'''Demonstrate the difference between transformation- and ordinary-KDE.
KDEDEMO2 shows that the transformation KDE is a better estimate for
Rayleigh distributed data around 0 than the ordinary KDE.
'''
st = scipy.stats
data = st.rayleigh.rvs(scale=1, size=300)
x = np.linspace(1.5e-2, 5, 55)
kde = KDE(data)
f = kde(output='plot', title='Ordinary KDE (hs={0:})'.format(kde.hs))
plt.figure(0)
f.plot()
plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')
# plotnorm((data).^(L2)) # gives a straight line => L2 = 0.5 reasonable
hs = Kernel('gauss').get_smoothing(data**0.5)
tkde = TKDE(data, hs=hs, L2=0.5)
ft = tkde(x, output='plot',
title='Transformation KDE (hs={0:})'.format(tkde.tkde.hs))
plt.figure(1)
ft.plot()
plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')
plt.figure(0)
def kde_demo3():
'''Demonstrate the difference between transformation and ordinary-KDE in 2D
KDEDEMO3 shows that the transformation KDE is a better estimate for
Rayleigh distributed data around 0 than the ordinary KDE.
'''
st = scipy.stats
data = st.rayleigh.rvs(scale=1, size=(2, 300))
# x = np.linspace(1.5e-3, 5, 55)
kde = KDE(data)
f = kde(output='plot', title='Ordinary KDE', plotflag=1)
plt.figure(0)
f.plot()
plt.plot(data[0], data[1], '.')
# plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable
hs = Kernel('gauss').get_smoothing(data**0.5)
tkde = TKDE(data, hs=hs, L2=0.5)
ft = tkde.eval_grid_fast(
output='plot', title='Transformation KDE', plotflag=1)
plt.figure(1)
ft.plot()
plt.plot(data[0], data[1], '.')
plt.figure(0)
def kde_demo4(N=50):
'''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
for 1D multimodal distributions
KDEDEMO4 shows that the improved Sheather-Jones plug-in smoothing is a
better compared to normal reference rules (in this case the hns)
'''
st = scipy.stats
data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(N,)),
st.norm.rvs(loc=-5, scale=1, size=(N,))))
# x = np.linspace(1.5e-3, 5, 55)
kde = KDE(data, kernel=Kernel('gauss', 'hns'))
f = kde(output='plot', title='Ordinary KDE', plotflag=1)
kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
f1 = kde1(output='plot', label='Ordinary KDE', plotflag=1)
plt.figure(0)
f.plot('r', label='hns={0}'.format(kde.hs))
# plt.figure(2)
f1.plot('b', label='hisj={0}'.format(kde1.hs))
x = np.linspace(-9, 9)
plt.plot(x, (st.norm.pdf(x, loc=-5, scale=1) +
st.norm.pdf(x, loc=5, scale=1)) / 2, 'k:',
label='True density')
plt.legend()
def kde_demo5(N=500):
'''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
for 2D multimodal distributions
KDEDEMO5 shows that the improved Sheather-Jones plug-in smoothing is better
compared to normal reference rules (in this case the hns)
'''
st = scipy.stats
data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(2, N,)),
st.norm.rvs(loc=-5, scale=1, size=(2, N,))))
kde = KDE(data, kernel=Kernel('gauss', 'hns'))
f = kde(output='plot', plotflag=1,
title='Ordinary KDE, hns={0:s}'.format(str(list(kde.hs))))
kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
f1 = kde1(output='plot', plotflag=1,
title='Ordinary KDE, hisj={0:s}'.format(str(list(kde1.hs))))
plt.figure(0)
plt.clf()
f.plot()
plt.plot(data[0], data[1], '.')
plt.figure(1)
plt.clf()
f1.plot()
plt.plot(data[0], data[1], '.')
def kreg_demo1(hs=None, fast=False, fun='hisj'):
""""""
N = 100
# ei = np.random.normal(loc=0, scale=0.075, size=(N,))
ei = np.array([
-0.08508516, 0.10462496, 0.07694448, -0.03080661, 0.05777525,
0.06096313, -0.16572389, 0.01838912, -0.06251845, -0.09186784,
-0.04304887, -0.13365788, -0.0185279, -0.07289167, 0.02319097,
0.06887854, -0.08938374, -0.15181813, 0.03307712, 0.08523183,
-0.0378058, -0.06312874, 0.01485772, 0.06307944, -0.0632959,
0.18963205, 0.0369126, -0.01485447, 0.04037722, 0.0085057,
-0.06912903, 0.02073998, 0.1174351, 0.17599277, -0.06842139,
0.12587608, 0.07698113, -0.0032394, -0.12045792, -0.03132877,
0.05047314, 0.02013453, 0.04080741, 0.00158392, 0.10237899,
-0.09069682, 0.09242174, -0.15445323, 0.09190278, 0.07138498,
0.03002497, 0.02495252, 0.01286942, 0.06449978, 0.03031802,
0.11754861, -0.02322272, 0.00455867, -0.02132251, 0.09119446,
-0.03210086, -0.06509545, 0.07306443, 0.04330647, 0.078111,
-0.04146907, 0.05705476, 0.02492201, -0.03200572, -0.02859788,
-0.05893749, 0.00089538, 0.0432551, 0.04001474, 0.04888828,
-0.17708392, 0.16478644, 0.1171006, 0.11664846, 0.01410477,
-0.12458953, -0.11692081, 0.0413047, -0.09292439, -0.07042327,
0.14119701, -0.05114335, 0.04994696, -0.09520663, 0.04829406,
-0.01603065, -0.1933216, 0.19352763, 0.11819496, 0.04567619,
-0.08348306, 0.00812816, -0.00908206, 0.14528945, 0.02901065])
x = np.linspace(0, 1, N)
va_1 = 0.3 ** 2
va_2 = 0.7 ** 2
y0 = np.exp(-x ** 2 / (2 * va_1)) + 1.3*np.exp(-(x - 1) ** 2 / (2 * va_2))
y = y0 + ei
kernel = Kernel('gauss', fun=fun)
hopt = kernel.hisj(x)
kreg = KRegression(
x, y, p=0, hs=hs, kernel=kernel, xmin=-2 * hopt, xmax=1 + 2 * hopt)
if fast:
kreg.__call__ = kreg.eval_grid_fast
f = kreg(x, output='plot', title='Kernel regression', plotflag=1)
plt.figure(0)
f.plot(label='p=0')
kreg.p = 1
f1 = kreg(x, output='plot', title='Kernel regression', plotflag=1)
f1.plot(label='p=1')
# print(f1.data)
plt.plot(x, y, '.', label='data')
plt.plot(x, y0, 'k', label='True model')
from statsmodels.nonparametric.kernel_regression import KernelReg
kreg2 = KernelReg(y, x, ('c'))
y2 = kreg2.fit(x)
plt.plot(x, y2[0], 'm', label='statsmodel')
plt.legend()
plt.show()
print(kreg.tkde.tkde._inv_hs)
print(kreg.tkde.tkde.hs)
def _get_data(n=100, symmetric=False, loc1=1.1, scale1=0.6, scale2=1.0):
st = scipy.stats
dist = st.norm
norm1 = scale2 * (dist.pdf(-loc1, loc=-loc1, scale=scale1) +
dist.pdf(-loc1, loc=loc1, scale=scale1))
def fun1(x):
return ((dist.pdf(x, loc=-loc1, scale=scale1) +
dist.pdf(x, loc=loc1, scale=scale1)) / norm1).clip(max=1.0)
x = np.sort(6 * np.random.rand(n, 1) - 3, axis=0)
y = (fun1(x) > np.random.rand(n, 1)).ravel()
# y = (np.cos(x)>2*np.random.rand(n, 1)-1).ravel()
x = x.ravel()
if symmetric:
xi = np.hstack((x.ravel(), -x.ravel()))
yi = np.hstack((y, y))
i = np.argsort(xi)
x = xi[i]
y = yi[i]
return x, y, fun1
def check_bkregression():
plt.ion()
k = 0
for _i, n in enumerate([50, 100, 300, 600]):
x, y, fun1 = _get_data(n, symmetric=True, loc1=0.1,
scale1=0.6, scale2=0.75)
bkreg = BKRegression(x, y, a=0.05, b=0.05)
fbest = bkreg.prb_search_best(
hsfun='hste', alpha=0.05, color='g', label='Transit_D')
figk = plt.figure(k)
ax = figk.gca()
k += 1
# fbest.score.plot(axis=ax)
# axsize = ax.axis()
# ax.vlines(fbest.hs,axsize[2]+1,axsize[3])
# ax.set(yscale='log')
fbest.labels.title = 'N = {:d}'.format(n)
fbest.plot(axis=ax)
ax.plot(x, fun1(x), 'r')
ax.legend(frameon=False, markerscale=4)
# ax = plt.gca()
ax.set_yticklabels(ax.get_yticks() * 100.0)
ax.grid(True)
fig.tile(range(0, k))
plt.ioff()
plt.show('hold')
if __name__ == '__main__':
if False:
test_docstrings(__file__)
else:
# kde_demo5()
# check_bkregression()
kreg_demo1(hs=0.04, fast=True)
plt.show('hold')

@ -484,6 +484,8 @@ class Kernel(object):
'laplace' - Laplace kernel.
'logistic' - Logistic kernel.
Note that only the first 4 letters of the kernel name is needed.
fun : string
defining smoothing function/bandwidth.
Examples
--------

@ -26,6 +26,12 @@ class TestKde(unittest.TestCase):
0.72433808, 1.92973094, 0.44749838, 1.36508452])
self.x = np.linspace(0, max(self.data) + 1, 10)
def test_default_bandwidth_and_inc(self):
kde0 = wk.KDE(self.data, hs=-1, alpha=0.0, inc=None)
print(kde0.hs.tolist(), kde0.inc)
assert_allclose(kde0.hs, 0.19682759537327105)
assert_allclose(kde0.inc, 64)
def test0_KDE1D(self):
data, x = self.data, self.x
@ -36,6 +42,11 @@ class TestKde(unittest.TestCase):
0.52219649, 0.3906213, 0.26381501, 0.16407362,
0.08270612, 0.02991145, 0.00720821])
fx = kde0.eval_points(x)
assert_allclose(fx, [0.2039735, 0.40252503, 0.54595078,
0.52219649, 0.3906213, 0.26381501, 0.16407362,
0.08270612, 0.02991145, 0.00720821])
fx = kde0.eval_grid(x, r=1)
assert_allclose(-fx, [0.11911419724002906, 0.13440000694772541,
0.044400116190638696, -0.0677695267531197,
@ -85,17 +96,19 @@ class TestKde(unittest.TestCase):
x = np.linspace(0.01, max(data) + 1, 10)
kde = wk.TKDE(data, hs=0.5, L2=0.5)
f = kde(x)
assert_allclose(f, [1.03982714, 0.45839018, 0.39514782, 0.32860602,
0.26433318, 0.20717946, 0.15907684, 0.1201074,
0.08941027, 0.06574882])
f = kde.eval_points(x)
assert_allclose(f, [1.03982714, 0.45839018, 0.39514782, 0.32860602,
0.26433318, 0.20717946, 0.15907684, 0.1201074,
0.08941027, 0.06574882])
f = kde.eval_grid(x)
assert_allclose(f, [1.03982714, 0.45839018, 0.39514782, 0.32860602,
0.26433318, 0.20717946, 0.15907684, 0.1201074,
0.08941027, 0.06574882])
assert_allclose(np.trapz(f, x), 0.94787730659349068)
f = kde.eval_grid_fast(x)
assert_allclose(f, [1.0401892415290148, 0.45838973393693677,
0.39514689240671547, 0.32860531818532457,
0.2643330110605783, 0.20717975528556506,
0.15907696844388747, 0.12010770443337843,
0.08941129458260941, 0.06574899139165799])
f = kde.eval_grid_fast2(x)
assert_allclose(f, [1.0401892415290148, 0.45838973393693677,
0.39514689240671547, 0.32860531818532457,
0.2643330110605783, 0.20717975528556506,
@ -170,17 +183,28 @@ class TestKde(unittest.TestCase):
x = np.linspace(0, max(np.ravel(data)) + 1, 3)
kde0 = wk.KDE(data, hs=0.5, alpha=0.0, inc=512)
assert_allclose(kde0.eval_grid(x, x),
[[3.27260963e-02, 4.21654678e-02, 5.85338634e-04],
[6.78845466e-02, 1.42195839e-01, 1.41676003e-03],
[1.39466746e-04, 4.26983850e-03, 2.52736185e-05]])
f0 = kde0.eval_grid_fast(x, x, output='plot')
t = [[0.0443506097653615, 0.06433530873456418, 0.0041353838654317856],
[0.07218297149063724, 0.1235819591878892, 0.009288890372002473],
[0.001613328022214066, 0.00794857884864038, 0.0005874786787715641]
]
assert_allclose(kde0.eval_grid_fast(x, x), t)
assert_allclose(f0.data, t)
def test_2d_default_bandwidth(self):
# N = 20
# data = np.random.rayleigh(1, size=(2, N))
data = DATA2D
kde0 = wk.KDE(data, kernel=wk.Kernel('epan', 'hmns'), inc=512)
assert_allclose(kde0.hs, [[0.8838122391117693, 0.08341940479019105],
[0.08341940479019104, 0.7678179747855731]])
self.assertRaises(ValueError, kde0.eval_points, [1, 2, 3])
assert_allclose(kde0.eval_points([1, 2]), 0.11329600006973661)
class TestRegression(unittest.TestCase):
@ -313,9 +337,8 @@ class TestRegression(unittest.TestCase):
bkreg = wk.BKRegression(x, y, a=0.05, b=0.05)
fbest = bkreg.prb_search_best(hsfun='hste', alpha=0.05, color='g')
print(fbest.data[::10].tolist())
# print(fbest.data[::10].tolist())
assert_allclose(fbest.data[::10],
[1.80899736e-15, 0, 6.48351162e-16, 6.61404311e-15,
1.10010120e-12, 1.36709203e-10, 1.11994766e-08,
5.73040143e-07, 1.68974054e-05, 2.68633448e-04,
@ -328,6 +351,27 @@ class TestRegression(unittest.TestCase):
2.68633448e-04, 1.68974054e-05, 5.73040143e-07,
1.11994760e-08, 1.36708818e-10, 1.09965904e-12,
5.43806309e-15, 0.0, 0, 0], atol=1e-10)
bkreg = wk.BKRegression(x, y, method='wilson')
fbest = bkreg.prb_search_best(hsfun='hste', alpha=0.05, color='g')
assert_allclose(fbest.data[::10],
[3.2321397702105376e-15, 4.745626420805898e-17,
6.406118940191104e-16, 5.648884668051452e-16,
3.499875381296387e-16, 1.0090442883241678e-13,
4.264723863193633e-11, 9.29288388831705e-09,
9.610074789043923e-07, 4.086642453634508e-05,
0.0008305202502773989, 0.00909121197102206,
0.05490768364395013, 0.1876637145781381,
0.4483015169104682, 0.8666709816557657,
0.9916656713022183, 0.9996648903706271,
0.999990921956741, 0.9999909219567404,
0.999664890370625, 0.9916656713022127,
0.8666709816557588, 0.4483015169104501,
0.18766371457812697, 0.054907683643947366,
0.009091211971022042, 0.0008305202502770367,
4.086642453593762e-05, 9.610074786590158e-07,
9.292883469982049e-09, 4.264660017463372e-11,
1.005284921271869e-13, -0.0, -0.0, -0.0, -0.0, -0.0],
atol=1e-10)
if __name__ == "__main__":
# import sys;sys.argv = ['', 'Test.testName']

@ -3,10 +3,10 @@ from scipy.stats._distn_infrastructure import * # @UnusedWildImport
from scipy.stats._distn_infrastructure import (_skew, # @UnusedImport
_kurtosis, _ncx2_log_pdf, # @IgnorePep8 @UnusedImport
_ncx2_pdf, _ncx2_cdf) # @UnusedImport @IgnorePep8
from .estimation import FitDistribution
from .estimation import FitDistribution, rv_frozen # @Reimport
from ._constants import _XMAX, _XMIN
from wafo.misc import lazyselect as _lazyselect
from wafo.misc import lazywhere as _lazywhere
from wafo.misc import lazyselect as _lazyselect # @UnusedImport
from wafo.misc import lazywhere as _lazywhere # @UnusedImport
_doc_default_example = """\
Examples
@ -70,137 +70,6 @@ Accurate confidence interval with profile loglikelihood
"""
# Frozen RV class
class rv_frozen(object):
''' Frozen continous or discrete 1D Random Variable object (RV)
Methods
-------
rvs(size=1)
Random variates.
pdf(x)
Probability density function.
cdf(x)
Cumulative density function.
sf(x)
Survival function (1-cdf --- sometimes more accurate).
ppf(q)
Percent point function (inverse of cdf --- percentiles).
isf(q)
Inverse survival function (inverse of sf).
stats(moments='mv')
Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
moment(n)
n-th order non-central moment of distribution.
entropy()
(Differential) entropy of the RV.
interval(alpha)
Confidence interval with equal areas around the median.
expect(func, lb, ub, conditional=False)
Calculate expected value of a function with respect to the
distribution.
'''
def __init__(self, dist, *args, **kwds):
# create a new instance
self.dist = dist # .__class__(**dist._ctor_param)
shapes, loc, scale = self.dist._parse_args(*args, **kwds)
if isinstance(dist, rv_continuous):
self.par = shapes + (loc, scale)
else: # rv_discrete
self.par = shapes + (loc,)
self.a = self.dist.a
self.b = self.dist.b
self.shapes = self.dist.shapes
# @property
# def shapes(self):
# return self.dist.shapes
@property
def random_state(self):
return self.dist._random_state
@random_state.setter
def random_state(self, seed):
self.dist._random_state = check_random_state(seed)
def pdf(self, x):
''' Probability density function at x of the given RV.'''
return self.dist.pdf(x, *self.par)
def logpdf(self, x):
return self.dist.logpdf(x, *self.par)
def cdf(self, x):
'''Cumulative distribution function at x of the given RV.'''
return self.dist.cdf(x, *self.par)
def logcdf(self, x):
return self.dist.logcdf(x, *self.par)
def ppf(self, q):
'''Percent point function (inverse of cdf) at q of the given RV.'''
return self.dist.ppf(q, *self.par)
def isf(self, q):
'''Inverse survival function at q of the given RV.'''
return self.dist.isf(q, *self.par)
def rvs(self, size=None, random_state=None):
kwds = {'size': size, 'random_state': random_state}
return self.dist.rvs(*self.par, **kwds)
def sf(self, x):
'''Survival function (1-cdf) at x of the given RV.'''
return self.dist.sf(x, *self.par)
def logsf(self, x):
return self.dist.logsf(x, *self.par)
def stats(self, moments='mv'):
''' Some statistics of the given RV'''
kwds = dict(moments=moments)
return self.dist.stats(*self.par, **kwds)
def median(self):
return self.dist.median(*self.par)
def mean(self):
return self.dist.mean(*self.par)
def var(self):
return self.dist.var(*self.par)
def std(self):
return self.dist.std(*self.par)
def moment(self, n):
return self.dist.moment(n, *self.par)
def entropy(self):
return self.dist.entropy(*self.par)
def pmf(self, k):
'''Probability mass function at k of the given RV'''
return self.dist.pmf(k, *self.par)
def logpmf(self, k):
return self.dist.logpmf(k, *self.par)
def interval(self, alpha):
return self.dist.interval(alpha, *self.par)
def expect(self, func=None, lb=None, ub=None, conditional=False, **kwds):
if isinstance(self.dist, rv_continuous):
a, loc, scale = self.par[:-2], self.par[:-2], self.par[-1]
return self.dist.expect(func, a, loc, scale, lb, ub, conditional,
**kwds)
a, loc = self.par[:-1], self.par[-1]
if kwds:
raise ValueError("Discrete expect does not accept **kwds.")
return self.dist.expect(func, a, loc, lb, ub, conditional)
def freeze(self, *args, **kwds):
"""Freeze the distribution for the given arguments.
@ -219,41 +88,6 @@ def freeze(self, *args, **kwds):
return rv_frozen(self, *args, **kwds)
# def link(self, x, logSF, theta, i):
# '''
# Return theta[i] as function of quantile, survival probability and
# theta[j] for j!=i.
#
# Parameters
# ----------
# x : quantile
# logSF : logarithm of the survival probability
# theta : list
# all distribution parameters including location and scale.
#
# Returns
# -------
# theta[i] : real scalar
# fixed distribution parameter theta[i] as function of x, logSF and
# theta[j] where j != i.
#
# LINK is a function connecting the fixed distribution parameter theta[i]
# with the quantile (x) and the survival probability (SF) and the
# remaining free distribution parameters theta[j] for j!=i, i.e.:
# theta[i] = link(x, logSF, theta, i),
# where logSF = log(Prob(X>x; theta)).
#
# See also
# estimation.Profile
# '''
# return self._link(x, logSF, theta, i)
#
#
# def _link(self, x, logSF, theta, i):
# msg = 'Link function not implemented for the {} distribution'
# raise NotImplementedError(msg.format(self.name))
def _resolve_ties(self, log_dprb, x, args, scale):
dx = np.diff(x, axis=0)
tie = dx == 0
@ -293,7 +127,6 @@ def _nlogps_and_penalty(self, x, scale, args):
if n_bad > 0:
penalty = 100.0 * np.log(_XMAX) * n_bad
return -np.sum(log_dprb[finite_log_dprb], axis=0) + penalty
return -np.sum(log_dprb, axis=0)
@ -353,7 +186,7 @@ def _nnlf_and_penalty(self, x, args):
return -np.sum(logpdf, axis=0)
def _penalized_nnlf(self, theta, x, penalty=None):
def _penalized_nnlf(self, theta, x):
''' Return negative loglikelihood function,
i.e., - sum (log pdf(x, theta), axis=0)
where theta are the parameters (including loc and scale)
@ -622,8 +455,7 @@ def _open_support_mask(self, x):
rv_generic.freeze = freeze
rv_discrete.freeze = freeze
rv_continuous.freeze = freeze
#rv_continuous.link = link
#rv_continuous._link = _link
rv_continuous._penalized_nlogps = _penalized_nlogps
rv_continuous._penalized_nnlf = _penalized_nnlf
rv_continuous._reduce_func = _reduce_func

@ -9,11 +9,11 @@ Author: Per A. Brodtkorb 2008
from __future__ import division, absolute_import
import warnings
from scipy.stats import rv_continuous
from scipy.stats._distn_infrastructure import check_random_state
from wafo.plotbackend import plotbackend as plt
from wafo.misc import ecross, findcross
from wafo.stats._constants import _EPS
from wafo.stats._distn_infrastructure import rv_frozen, rv_continuous
from scipy._lib.six import string_types
import numdifftools as nd # @UnresolvedImport
from scipy import special
@ -36,6 +36,26 @@ arr = asarray
# all = alltrue # @ReservedAssignment
def _assert_warn(cond, msg):
if not cond:
warnings.warn(msg)
def _assert(cond, msg):
if not cond:
raise ValueError(msg)
def _assert_index(cond, msg):
if not cond:
raise IndexError(msg)
def _assert_not_implemented(cond, msg):
if not cond:
raise NotImplementedError(msg)
def _burr_link(x, logsf, phat, ix):
c, d, loc, scale = phat
logp = log(-expm1(logsf))
@ -90,43 +110,43 @@ def _genpareto_link(x, logsf, phat, ix):
# Stuart Coles (2004)
# "An introduction to statistical modelling of extreme values".
# Springer series in statistics
_assert_not_implemented(ix != 0, 'link(x,logsf,phat,i) where i=0 is '
'not implemented!')
c, loc, scale = phat
if c == 0:
return _expon_link(x, logsf, phat[1:], ix-1)
if ix == 2:
# Reorganizing w.r.t.scale, Eq. 4.13 and 4.14, pp 81 in
# Coles (2004) gives
# link = -(x-loc)*c/expm1(-c*logsf)
if c != 0.0:
phati = (x - loc) * c / expm1(-c * logsf)
else:
phati = -(x - loc) / logsf
elif ix == 1:
if c != 0:
phati = x + scale * expm1(c * logsf) / c
else:
phati = x + scale * logsf
elif ix == 0:
raise NotImplementedError(
'link(x,logsf,phat,i) where i=0 is not implemented!')
else:
return (x - loc) * c / expm1(-c * logsf)
if ix == 1:
return x + scale * expm1(c * logsf) / c
raise IndexError('Index to the fixed parameter is out of bounds')
def _gumbel_r_link(x, logsf, phat, ix):
loc, scale = phat
loglogP = log(-log(-expm1(logsf)))
if ix == 1:
return -(x - loc) / loglogP
if ix == 1:
return x + scale * loglogP
raise IndexError('Index to the fixed parameter is out of bounds')
return phati
def _genextreme_link(x, logsf, phat, ix):
_assert_not_implemented(ix != 0, 'link(x,logsf,phat,i) where i=0 is not '
'implemented!')
c, loc, scale = phat
if c == 0:
return _gumbel_r_link(x, logsf, phat[1:], ix-1)
loglogP = log(-log(-expm1(logsf)))
if ix == 2:
# link = -(x-loc)*c/expm1(c*log(-logP))
if c != 0.0:
return -(x - loc) * c / expm1(c * loglogP)
return -(x - loc) / loglogP
if ix == 1:
if c != 0:
return x + scale * expm1(c * loglogP) / c
return x + scale * loglogP
if ix == 0:
raise NotImplementedError(
'link(x,logsf,phat,i) where i=0 is not implemented!')
raise IndexError('Index to the fixed parameter is out of bounds')
@ -168,6 +188,7 @@ LINKS = dict(expon=_expon_link,
frechet_r=_weibull_min_link,
genpareto=_genpareto_link,
genexpon=_genexpon_link,
gumbel_r=_gumbel_r_link,
rayleigh=_rayleigh_link,
trunclayleigh=_trunclayleigh_link,
genextreme=_genextreme_link,
@ -259,7 +280,7 @@ class Profile(object):
self._set_indexes(fit_dist, i)
method = fit_dist.method.lower()
self._set_plot_labels(fit_dist, method)
self._set_plot_labels(method)
Lmax = self._loglike_max(fit_dist, method)
self.Lmax = Lmax
@ -268,13 +289,16 @@ class Profile(object):
self._set_profile()
def _set_plot_labels(self, fit_dist, method):
def _set_plot_labels(self, method, title='', xlabel=''):
if not title:
title = '{:s} params'.format(self.fit_dist.dist.name)
if not xlabel:
xlabel = 'phat[{}]'.format(np.ravel(self.i_fixed)[0])
percent = 100 * (1.0 - self.alpha)
self.title = '{:g}% CI for {:s} params'.format(percent,
fit_dist.dist.name)
self.title = '{:g}% CI for {:s}'.format(percent, title)
like_txt = 'likelihood' if method == 'ml' else 'product spacing'
self.ylabel = 'Profile log' + like_txt
self.xlabel = 'phat[{}]'.format(np.ravel(self.i_fixed)[0])
self.xlabel = xlabel
@staticmethod
def _loglike_max(fit_dist, method):
@ -400,7 +424,17 @@ class Profile(object):
warnings.warn(str(err))
def _get_variance(self):
return self.fit_dist.par_cov[self.i_fixed, :][:, self.i_fixed]
invfun = getattr(self, '_myinvfun', None)
if invfun is not None:
i_notfixed = self.i_notfixed
pcov = self.fit_dist.par_cov[i_notfixed, :][:, i_notfixed]
gradfun = nd.Gradient(invfun)
phatv = self._par
drl = gradfun(phatv[i_notfixed])
pvar = np.sum(np.dot(drl, pcov) * drl)
return pvar
pvar = self.fit_dist.par_cov[self.i_fixed, :][:, self.i_fixed]
return pvar
def _approx_p_min_max(self, p_opt):
pvar = self._get_variance()
@ -414,9 +448,9 @@ class Profile(object):
p_low, p_up = self._approx_p_min_max(p_opt)
pmin, pmax = self.pmin, self.pmax
if pmin is None:
pmin = self._search_pmin(phatfree0, p_low, p_opt)
pmin = self._search_p_min_max(phatfree0, p_low, p_opt, 'min')
if pmax is None:
pmax = self._search_pmax(phatfree0, p_up, p_opt)
pmax = self._search_p_min_max(phatfree0, p_up, p_opt, 'max')
return pmin, pmax
def _adaptive_pvec(self, p_opt, pmin, pmax):
@ -438,58 +472,43 @@ class Profile(object):
return self._adaptive_pvec(p_opt, pmin, pmax)
return np.linspace(self.pmin, self.pmax, self.n)
def _search_pmin(self, phatfree0, p_min0, p_opt):
phatfree = phatfree0.copy()
dp = np.maximum((p_opt - p_min0)/40, 0.01)*10
Lmax, phatfree = self._profile_optimum(phatfree, p_opt)
p_min_opt = p_min0
for j in range(51):
p_min = p_opt - dp
Lmax, phatfree = self._profile_optimum(phatfree, p_min)
# print((dp, p_min, p_min_opt, Lmax))
def _update_p_opt(self, p_minmax_opt, dp, Lmax, p_minmax, j):
# print((dp, p_minmax, p_minmax_opt, Lmax))
converged = False
if np.isnan(Lmax):
dp *= 0.33
elif Lmax < self.alpha_cross_level - self.alpha_Lrange * 5 * (j + 1):
p_min_opt = p_min
p_minmax_opt = p_minmax
dp *= 0.33
elif Lmax < self.alpha_cross_level:
p_min_opt = p_min
break
p_minmax_opt = p_minmax
converged = True
else:
dp *= 1.67
else:
msg = 'Exceeded max iterations. (p_min0={}, p_min={}, p={})'
warnings.warn(msg.format(p_min0, p_min_opt, p_opt))
# print('search_pmin iterations={}'.format(j))
return p_min_opt
return p_minmax_opt, dp, converged
def _search_pmax(self, phatfree0, p_max0, p_opt):
def _search_p_min_max(self, phatfree0, p_minmax0, p_opt, direction):
phatfree = phatfree0.copy()
dp = (p_max0 - p_opt)/40
if dp < 1e-2:
dp = 0.1
sign = dict(min=-1, max=1)[direction]
dp = np.maximum(sign*(p_minmax0 - p_opt) / 40, 0.01) * 10
Lmax, phatfree = self._profile_optimum(phatfree, p_opt)
p_max_opt = p_opt
for j in range(51):
p_max = p_opt + dp
Lmax, phatfree = self._profile_optimum(phatfree, p_max)
if np.isnan(Lmax):
dp *= 0.33
elif Lmax < self.alpha_cross_level - self.alpha_Lrange*5*(j+1):
p_max_opt = p_max
dp *= 0.33
elif Lmax < self.alpha_cross_level:
p_max_opt = p_max
break
else:
dp *= 1.67
else:
msg = 'Exceeded max iterations. (p={}, p_max={}, p_max0 = {})'
warnings.warn(msg.format(p_opt, p_max_opt, p_max0))
# print('search_pmax iterations={}'.format(j))
return p_max_opt
p_minmax_opt = p_minmax0
j = 0
converged = False
# for j in range(51):
while j < 51 and not converged:
j += 1
p_minmax = p_opt + sign * dp
Lmax, phatfree = self._profile_optimum(phatfree, p_minmax)
p_minmax_opt, dp, converged = self._update_p_opt(p_minmax_opt, dp,
Lmax, p_minmax, j)
_assert_warn(j < 50, 'Exceeded max iterations. '
'(p_{0}0={1}, p_{0}={2}, p={3})'.format(direction,
p_minmax0,
p_minmax_opt,
p_opt))
# print('search_pmin iterations={}'.format(j))
return p_minmax_opt
def _profile_fun(self, free_par, fix_par):
''' Return negative of loglike or logps function
@ -504,37 +523,40 @@ class Profile(object):
par[self.i_fixed] = self._local_link(fix_par, par)
return self.fit_dist.fitfun(par)
def get_bounds(self, alpha=0.05):
'''Return confidence interval for profiled parameter
'''
if alpha < self.alpha:
msg = 'Might not be able to return bounds with alpha less than {}'
warnings.warn(msg.format(self.alpha))
cross_level = self.Lmax - 0.5 * chi2isf(alpha, 1)
ind = findcross(self.data, cross_level)
n = len(ind)
def _check_bounds(self, cross_level, ind, n):
if n == 0:
warnings.warn('''Number of crossings is zero, i.e.,
upper and lower bound is not found!''')
bounds = (self.pmin, self.pmax)
warnings.warn('Number of crossings is zero, i.e., upper and lower '
'bound is not found!')
bounds = self.pmin, self.pmax
elif n == 1:
x0 = ecross(self.args, self.data, ind, cross_level)
is_upcrossing = self.data[ind] < self.data[ind + 1]
if is_upcrossing:
bounds = (x0, self.pmax)
bounds = x0, self.pmax
warnings.warn('Upper bound is larger')
else:
bounds = (self.pmin, x0)
bounds = self.pmin, x0
warnings.warn('Lower bound is smaller')
elif n == 2:
bounds = ecross(self.args, self.data, ind, cross_level)
else:
warnings.warn('Number of crossings too large! Something is wrong!')
bounds = ecross(self.args, self.data, ind[[0, -1]], cross_level)
return bounds
def get_bounds(self, alpha=0.05):
'''Return confidence interval for profiled parameter
'''
_assert_warn(self.alpha <= alpha, 'Might not be able to return bounds '
'with alpha less than {}'.format(self.alpha))
cross_level = self.Lmax - 0.5 * chi2isf(alpha, 1)
ind = findcross(self.data, cross_level)
n = len(ind)
if n == 2:
bounds = ecross(self.args, self.data, ind, cross_level)
else:
bounds = self._check_bounds(cross_level, ind, n)
return bounds
def plot(self, axis=None):
'''
Plot profile function for p_opt with 100(1-alpha)% confidence interval.
@ -561,8 +583,25 @@ class Profile(object):
def plot_all_profiles(phats, plot=None):
if plot is not None:
plt = plot
def _remove_title_or_ylabel(plt, n, j):
if j != 0:
plt.title('')
if j != n // 2:
plt.ylabel('')
def _profile(phats, k):
profile_phat_k = Profile(phats, i=k)
m = 0
while hasattr(profile_phat_k, 'best_par') and m < 7:
# iterate to find optimum phat!
phats.fit(*profile_phat_k.best_par)
profile_phat_k = Profile(phats, i=k)
m += 1
return profile_phat_k
if plot is None:
plot = plt
if phats.par_fix:
indices = phats.i_notfixed
@ -571,20 +610,12 @@ def plot_all_profiles(phats, plot=None):
n = len(indices)
for j, k in enumerate(indices):
plt.subplot(n, 1, j+1)
profile_phat_k = Profile(phats, i=k)
m = 0
while hasattr(profile_phat_k, 'best_par') and m < 7:
phats.fit(*profile_phat_k.best_par)
profile_phat_k = Profile(phats, i=k)
m += 1
profile_phat_k = _profile(phats, k)
profile_phat_k.plot()
if j != 0:
plt.title('')
if j != n//2:
plt.ylabel('')
plt.subplots_adjust(hspace=0.5)
_remove_title_or_ylabel(plt, n, j)
plot.subplots_adjust(hspace=0.5)
par_txt = ('{:1.2g}, '*len(phats.par))[:-2].format(*phats.par)
plt.suptitle('phat = [{}] (fit metod: {})'.format(par_txt, phats.method))
plot.suptitle('phat = [{}] (fit metod: {})'.format(par_txt, phats.method))
return phats
@ -681,21 +712,9 @@ class ProfileQuantile(Profile):
prb = exp(self.log_sf)
return self.fit_dist.dist.isf(prb, *mphat)
def _get_variance(self):
i_notfixed = self.i_notfixed
phatv = self._par
gradfun = nd.Gradient(self._myinvfun)
drl = gradfun(phatv[self.i_notfixed])
pcov = self.fit_dist.par_cov[i_notfixed, :][:, i_notfixed]
pvar = np.sum(np.dot(drl, pcov) * drl)
return pvar
def _set_plot_labels(self, fit_dist, method):
super(ProfileQuantile, self)._set_plot_labels(fit_dist, method)
percent = 100 * (1.0 - self.alpha)
self.title = '{:g}% CI for {:s} quantile'.format(percent,
fit_dist.dist.name)
self.xlabel = 'x'
def _set_plot_labels(self, method, title='', xlabel='x'):
title = '{:s} quantile'.format(self.fit_dist.dist.name)
super(ProfileQuantile, self)._set_plot_labels(method, title, xlabel)
class ProfileProbability(Profile):
@ -783,27 +802,151 @@ class ProfileProbability(Profile):
fix_par = self.link(self.x, fixed_log_sf, par, self.i_fixed)
return fix_par
def _myprbfun(self, phatnotfixed):
def _myinvfun(self, phatnotfixed):
"""_myprbfun"""
mphat = self._par.copy()
mphat[self.i_notfixed] = phatnotfixed
logsf = self.fit_dist.dist.logsf(self.x, *mphat)
return np.where(np.isfinite(logsf), logsf, np.nan)
def _get_variance(self):
i_notfixed = self.i_notfixed
phatv = self._par
gradfun = nd.Gradient(self._myprbfun)
drl = gradfun(phatv[self.i_notfixed])
pcov = self.fit_dist.par_cov[i_notfixed, :][:, i_notfixed]
pvar = np.sum(np.dot(drl, pcov) * drl)
return pvar
def _set_plot_labels(self, method, title='', xlabel=''):
title = '{:s} probability'.format(self.fit_dist.dist.name)
xlabel = 'log(sf)'
super(ProfileProbability, self)._set_plot_labels(method, title, xlabel)
def _set_plot_labels(self, fit_dist, method):
super(ProfileProbability, self)._set_plot_labels(fit_dist, method)
percent = 100 * (1.0 - self.alpha)
self.title = '{:g}% CI for {:s} probability'.format(percent,
fit_dist.dist.name)
self.xlabel = 'log(sf)'
# Frozen RV class
class rv_frozen(object):
''' Frozen continous or discrete 1D Random Variable object (RV)
Methods
-------
rvs(size=1)
Random variates.
pdf(x)
Probability density function.
cdf(x)
Cumulative density function.
sf(x)
Survival function (1-cdf --- sometimes more accurate).
ppf(q)
Percent point function (inverse of cdf --- percentiles).
isf(q)
Inverse survival function (inverse of sf).
stats(moments='mv')
Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
moment(n)
n-th order non-central moment of distribution.
entropy()
(Differential) entropy of the RV.
interval(alpha)
Confidence interval with equal areas around the median.
expect(func, lb, ub, conditional=False)
Calculate expected value of a function with respect to the
distribution.
'''
def __init__(self, dist, *args, **kwds):
# create a new instance
self.dist = dist # .__class__(**dist._ctor_param)
shapes, loc, scale = self.dist._parse_args(*args, **kwds)
if isinstance(dist, rv_continuous):
self.par = shapes + (loc, scale)
else: # rv_discrete
self.par = shapes + (loc,)
# a, b may be set in _argcheck, depending on *args, **kwds. Ouch.
self.dist._argcheck(*shapes)
self.a = self.dist.a
self.b = self.dist.b
self.shapes = self.dist.shapes
# @property
# def shapes(self):
# return self.dist.shapes
@property
def random_state(self):
return self.dist._random_state
@random_state.setter
def random_state(self, seed):
self.dist._random_state = check_random_state(seed)
def pdf(self, x):
''' Probability density function at x of the given RV.'''
return self.dist.pdf(x, *self.par)
def logpdf(self, x):
return self.dist.logpdf(x, *self.par)
def cdf(self, x):
'''Cumulative distribution function at x of the given RV.'''
return self.dist.cdf(x, *self.par)
def logcdf(self, x):
return self.dist.logcdf(x, *self.par)
def ppf(self, q):
'''Percent point function (inverse of cdf) at q of the given RV.'''
return self.dist.ppf(q, *self.par)
def isf(self, q):
'''Inverse survival function at q of the given RV.'''
return self.dist.isf(q, *self.par)
def rvs(self, size=None, random_state=None):
kwds = {'size': size, 'random_state': random_state}
return self.dist.rvs(*self.par, **kwds)
def sf(self, x):
'''Survival function (1-cdf) at x of the given RV.'''
return self.dist.sf(x, *self.par)
def logsf(self, x):
return self.dist.logsf(x, *self.par)
def stats(self, moments='mv'):
''' Some statistics of the given RV'''
kwds = dict(moments=moments)
return self.dist.stats(*self.par, **kwds)
def median(self):
return self.dist.median(*self.par)
def mean(self):
return self.dist.mean(*self.par)
def var(self):
return self.dist.var(*self.par)
def std(self):
return self.dist.std(*self.par)
def moment(self, n):
return self.dist.moment(n, *self.par)
def entropy(self):
return self.dist.entropy(*self.par)
def pmf(self, k):
'''Probability mass function at k of the given RV'''
return self.dist.pmf(k, *self.par)
def logpmf(self, k):
return self.dist.logpmf(k, *self.par)
def interval(self, alpha):
return self.dist.interval(alpha, *self.par)
def expect(self, func=None, lb=None, ub=None, conditional=False, **kwds):
if isinstance(self.dist, rv_continuous):
a, loc, scale = self.par[:-2], self.par[:-2], self.par[-1]
return self.dist.expect(func, a, loc, scale, lb, ub, conditional,
**kwds)
a, loc = self.par[:-1], self.par[-1]
if kwds:
raise ValueError("Discrete expect does not accept **kwds.")
return self.dist.expect(func, a, loc, lb, ub, conditional)
class FitDistribution(rv_frozen):
@ -1015,7 +1158,7 @@ class FitDistribution(rv_frozen):
t.append('%s = %s\n' % (par, str(getattr(self, par))))
return ''.join(t)
def _reduce_func(self, args, kwds):
def _convert_fshapes2fnum(self, kwds):
# First of all, convert fshapes params to fnum: eg for stats.beta,
# shapes='a, b'. To fix `a`, can specify either `f1` or `fa`.
# Convert the latter into the former.
@ -1030,10 +1173,13 @@ class FitDistribution(rv_frozen):
raise ValueError("Duplicate entry for %s." % key)
else:
kwds[key] = val
return kwds
def _unpack_args_kwds(self, args, kwds):
kwds = self._convert_fshapes2fnum(kwds)
args = list(args)
nargs = len(args)
fixedn = []
names = ['f%d' % n for n in range(nargs - 2)] + ['floc', 'fscale']
names = ['f%d' % n for n in range(len(args) - 2)] + ['floc', 'fscale']
x0 = []
for n, key in enumerate(names):
if key in kwds:
@ -1041,7 +1187,12 @@ class FitDistribution(rv_frozen):
args[n] = kwds.pop(key)
else:
x0.append(args[n])
return x0, args, fixedn
def _reduce_func(self, args, kwds):
x0, args, fixedn = self._unpack_args_kwds(args, kwds)
nargs = len(args)
fitfun = self._fitfun
if len(fixedn) == 0:
@ -1322,6 +1473,29 @@ class FitDistribution(rv_frozen):
ci.append((np.nan, np.nan))
return np.array(ci)
def _fit_summary_text(self):
fixstr = ''
if self.par_fix is not None:
numfix = len(self.i_fixed)
if numfix > 0:
format0 = ', '.join(['%d'] * numfix)
format1 = ', '.join(['%g'] * numfix)
phatistr = format0 % tuple(self.i_fixed)
phatvstr = format1 % tuple(self.par[self.i_fixed])
fixstr = 'Fixed: phat[{0:s}] = {1:s} '.format(phatistr,
phatvstr)
subtxt = ('Fit method: {0:s}, Fit p-value: {1:2.2f} {2:s}, ' +
'phat=[{3:s}], {4:s}')
par_txt = ('{:1.2g}, ' * len(self.par))[:-2].format(*self.par)
try:
LL_txt = 'Lps_max={:2.2g}, Ll_max={:2.2g}'.format(self.LPSmax,
self.LLmax)
except Exception:
LL_txt = 'Lps_max={}, Ll_max={}'.format(self.LPSmax, self.LLmax)
txt = subtxt.format(self.method.upper(), self.pvalue, fixstr, par_txt,
LL_txt)
return txt
def plotfitsummary(self, axes=None, fig=None):
''' Plot various diagnostic plots to asses the quality of the fit.
@ -1345,29 +1519,9 @@ class FitDistribution(rv_frozen):
if fig is None:
fig = plt.gcf()
fixstr = ''
if self.par_fix is not None:
numfix = len(self.i_fixed)
if numfix > 0:
format0 = ', '.join(['%d'] * numfix)
format1 = ', '.join(['%g'] * numfix)
phatistr = format0 % tuple(self.i_fixed)
phatvstr = format1 % tuple(self.par[self.i_fixed])
fixstr = 'Fixed: phat[{0:s}] = {1:s} '.format(phatistr,
phatvstr)
subtxt = ('Fit method: {0:s}, Fit p-value: {1:2.2f} {2:s}, ' +
'phat=[{3:s}], {4:s}')
par_txt = ('{:1.2g}, '*len(self.par))[:-2].format(*self.par)
try:
LL_txt = 'Lps_max={:2.2g}, Ll_max={:2.2g}'.format(self.LPSmax,
self.LLmax)
except Exception:
LL_txt = 'Lps_max={}, Ll_max={}'.format(self.LPSmax, self.LLmax)
try:
fig.text(0.05, 0.01, subtxt.format(self.method.upper(),
self.pvalue, fixstr, par_txt,
LL_txt))
txt = self._fit_summary_text()
fig.text(0.05, 0.01, txt)
except AttributeError:
pass
@ -1560,8 +1714,8 @@ def test1():
phat = FitDistribution(dist, R, floc=0.5, method='ml')
phats = FitDistribution(dist, R, floc=0.5, method='mps')
# import matplotlib.pyplot as plt
# plt.figure(0)
# plot_all_profiles(phat, plot=plt)
plt.figure(0)
plot_all_profiles(phat, plot=plt)
plt.figure(1)
phats.plotfitsummary()
@ -1596,5 +1750,5 @@ def test1():
if __name__ == '__main__':
test1()
# test_doctstrings()
# test1()
test_doctstrings()

Loading…
Cancel
Save