pbrod 8 years ago
commit 71b1ca0a17

@ -1,3 +1,4 @@
from .kdetools import * from .kdetools import *
from .gridding import * from .gridding import *
from .kernels import * from .kernels import *
from .demo import *

@ -0,0 +1,305 @@
'''
Created on 2. jan. 2017
@author: pab
'''
from __future__ import absolute_import, division
import scipy.stats
import numpy as np
import warnings
from wafo.plotbackend import plotbackend as plt
from wafo.kdetools import Kernel, TKDE, KDE, KRegression, BKRegression
try:
from wafo import fig
except ImportError:
warnings.warn('fig import only supported on Windows')
__all__ = ['kde_demo1', 'kde_demo2', 'kde_demo3', 'kde_demo4', 'kde_demo5',
'kreg_demo1', ]
def kde_demo1():
"""KDEDEMO1 Demonstrate the smoothing parameter impact on KDE.
KDEDEMO1 shows the true density (dotted) compared to KDE based on 7
observations (solid) and their individual kernels (dashed) for 3
different values of the smoothing parameter, hs.
"""
st = scipy.stats
x = np.linspace(-4, 4, 101)
x0 = x / 2.0
data = np.random.normal(loc=0, scale=1.0, size=7)
kernel = Kernel('gauss')
hs = kernel.hns(data)
h_vec = [hs / 2, hs, 2 * hs]
for ix, h in enumerate(h_vec):
plt.figure(ix)
kde = KDE(data, hs=h, kernel=kernel)
f2 = kde(x, output='plot', title='h_s = {0:2.2f}'.format(float(h)),
ylab='Density')
f2.plot('k-')
plt.plot(x, st.norm.pdf(x, 0, 1), 'k:')
n = len(data)
plt.plot(data, np.zeros(data.shape), 'bx')
y = kernel(x0) / (n * h * kernel.norm_factor(d=1, n=n))
for i in range(n):
plt.plot(data[i] + x0 * h, y, 'b--')
plt.plot([data[i], data[i]], [0, np.max(y)], 'b')
plt.axis([min(x), max(x), 0, 0.5])
def kde_demo2():
'''Demonstrate the difference between transformation- and ordinary-KDE.
KDEDEMO2 shows that the transformation KDE is a better estimate for
Rayleigh distributed data around 0 than the ordinary KDE.
'''
st = scipy.stats
data = st.rayleigh.rvs(scale=1, size=300)
x = np.linspace(1.5e-2, 5, 55)
kde = KDE(data)
f = kde(output='plot', title='Ordinary KDE (hs={0:})'.format(kde.hs))
plt.figure(0)
f.plot()
plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')
# plotnorm((data).^(L2)) # gives a straight line => L2 = 0.5 reasonable
hs = Kernel('gauss').get_smoothing(data**0.5)
tkde = TKDE(data, hs=hs, L2=0.5)
ft = tkde(x, output='plot',
title='Transformation KDE (hs={0:})'.format(tkde.tkde.hs))
plt.figure(1)
ft.plot()
plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')
plt.figure(0)
def kde_demo3():
'''Demonstrate the difference between transformation and ordinary-KDE in 2D
KDEDEMO3 shows that the transformation KDE is a better estimate for
Rayleigh distributed data around 0 than the ordinary KDE.
'''
st = scipy.stats
data = st.rayleigh.rvs(scale=1, size=(2, 300))
# x = np.linspace(1.5e-3, 5, 55)
kde = KDE(data)
f = kde(output='plot', title='Ordinary KDE', plotflag=1)
plt.figure(0)
f.plot()
plt.plot(data[0], data[1], '.')
# plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable
hs = Kernel('gauss').get_smoothing(data**0.5)
tkde = TKDE(data, hs=hs, L2=0.5)
ft = tkde.eval_grid_fast(
output='plot', title='Transformation KDE', plotflag=1)
plt.figure(1)
ft.plot()
plt.plot(data[0], data[1], '.')
plt.figure(0)
def kde_demo4(N=50):
'''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
for 1D multimodal distributions
KDEDEMO4 shows that the improved Sheather-Jones plug-in smoothing is a
better compared to normal reference rules (in this case the hns)
'''
st = scipy.stats
data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(N,)),
st.norm.rvs(loc=-5, scale=1, size=(N,))))
# x = np.linspace(1.5e-3, 5, 55)
kde = KDE(data, kernel=Kernel('gauss', 'hns'))
f = kde(output='plot', title='Ordinary KDE', plotflag=1)
kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
f1 = kde1(output='plot', label='Ordinary KDE', plotflag=1)
plt.figure(0)
f.plot('r', label='hns={0}'.format(kde.hs))
# plt.figure(2)
f1.plot('b', label='hisj={0}'.format(kde1.hs))
x = np.linspace(-9, 9)
plt.plot(x, (st.norm.pdf(x, loc=-5, scale=1) +
st.norm.pdf(x, loc=5, scale=1)) / 2, 'k:',
label='True density')
plt.legend()
def kde_demo5(N=500):
'''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
for 2D multimodal distributions
KDEDEMO5 shows that the improved Sheather-Jones plug-in smoothing is better
compared to normal reference rules (in this case the hns)
'''
st = scipy.stats
data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(2, N,)),
st.norm.rvs(loc=-5, scale=1, size=(2, N,))))
kde = KDE(data, kernel=Kernel('gauss', 'hns'))
f = kde(output='plot', plotflag=1,
title='Ordinary KDE, hns={0:s}'.format(str(list(kde.hs))))
kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
f1 = kde1(output='plot', plotflag=1,
title='Ordinary KDE, hisj={0:s}'.format(str(list(kde1.hs))))
plt.figure(0)
plt.clf()
f.plot()
plt.plot(data[0], data[1], '.')
plt.figure(1)
plt.clf()
f1.plot()
plt.plot(data[0], data[1], '.')
def kreg_demo1(hs=None, fast=False, fun='hisj'):
"""Compare KRegression to KernelReg from statsmodels.nonparametric
"""
N = 100
# ei = np.random.normal(loc=0, scale=0.075, size=(N,))
ei = np.array([
-0.08508516, 0.10462496, 0.07694448, -0.03080661, 0.05777525,
0.06096313, -0.16572389, 0.01838912, -0.06251845, -0.09186784,
-0.04304887, -0.13365788, -0.0185279, -0.07289167, 0.02319097,
0.06887854, -0.08938374, -0.15181813, 0.03307712, 0.08523183,
-0.0378058, -0.06312874, 0.01485772, 0.06307944, -0.0632959,
0.18963205, 0.0369126, -0.01485447, 0.04037722, 0.0085057,
-0.06912903, 0.02073998, 0.1174351, 0.17599277, -0.06842139,
0.12587608, 0.07698113, -0.0032394, -0.12045792, -0.03132877,
0.05047314, 0.02013453, 0.04080741, 0.00158392, 0.10237899,
-0.09069682, 0.09242174, -0.15445323, 0.09190278, 0.07138498,
0.03002497, 0.02495252, 0.01286942, 0.06449978, 0.03031802,
0.11754861, -0.02322272, 0.00455867, -0.02132251, 0.09119446,
-0.03210086, -0.06509545, 0.07306443, 0.04330647, 0.078111,
-0.04146907, 0.05705476, 0.02492201, -0.03200572, -0.02859788,
-0.05893749, 0.00089538, 0.0432551, 0.04001474, 0.04888828,
-0.17708392, 0.16478644, 0.1171006, 0.11664846, 0.01410477,
-0.12458953, -0.11692081, 0.0413047, -0.09292439, -0.07042327,
0.14119701, -0.05114335, 0.04994696, -0.09520663, 0.04829406,
-0.01603065, -0.1933216, 0.19352763, 0.11819496, 0.04567619,
-0.08348306, 0.00812816, -0.00908206, 0.14528945, 0.02901065])
x = np.linspace(0, 1, N)
va_1 = 0.3 ** 2
va_2 = 0.7 ** 2
y0 = np.exp(-x ** 2 / (2 * va_1)) + 1.3*np.exp(-(x - 1) ** 2 / (2 * va_2))
y = y0 + ei
kernel = Kernel('gauss', fun=fun)
hopt = kernel.hisj(x)
kreg = KRegression(
x, y, p=0, hs=hs, kernel=kernel, xmin=-2 * hopt, xmax=1 + 2 * hopt)
if fast:
kreg.__call__ = kreg.eval_grid_fast
f = kreg(x, output='plot', title='Kernel regression', plotflag=1)
plt.figure(0)
f.plot(label='p=0')
kreg.p = 1
f1 = kreg(x, output='plot', title='Kernel regression', plotflag=1)
f1.plot(label='p=1')
# print(f1.data)
plt.plot(x, y, '.', label='data')
plt.plot(x, y0, 'k', label='True model')
from statsmodels.nonparametric.kernel_regression import KernelReg
kreg2 = KernelReg(y, x, ('c'))
y2 = kreg2.fit(x)
plt.plot(x, y2[0], 'm', label='statsmodel')
plt.legend()
plt.show()
print(kreg.tkde.tkde._inv_hs)
print(kreg.tkde.tkde.hs)
def _get_data(n=100, symmetric=False, loc1=1.1, scale1=0.6, scale2=1.0):
"""
Return test data for binomial regression demo.
"""
st = scipy.stats
dist = st.norm
norm1 = scale2 * (dist.pdf(-loc1, loc=-loc1, scale=scale1) +
dist.pdf(-loc1, loc=loc1, scale=scale1))
def fun1(x):
return ((dist.pdf(x, loc=-loc1, scale=scale1) +
dist.pdf(x, loc=loc1, scale=scale1)) / norm1).clip(max=1.0)
x = np.sort(6 * np.random.rand(n, 1) - 3, axis=0)
y = (fun1(x) > np.random.rand(n, 1)).ravel()
# y = (np.cos(x)>2*np.random.rand(n, 1)-1).ravel()
x = x.ravel()
if symmetric:
xi = np.hstack((x.ravel(), -x.ravel()))
yi = np.hstack((y, y))
i = np.argsort(xi)
x = xi[i]
y = yi[i]
return x, y, fun1
def check_bkregression():
"""
Check binomial regression
"""
plt.ion()
k = 0
for _i, n in enumerate([50, 100, 300, 600]):
x, y, fun1 = _get_data(n, symmetric=True, loc1=0.1,
scale1=0.6, scale2=0.75)
bkreg = BKRegression(x, y, a=0.05, b=0.05)
fbest = bkreg.prb_search_best(
hsfun='hste', alpha=0.05, color='g', label='Transit_D')
figk = plt.figure(k)
ax = figk.gca()
k += 1
# fbest.score.plot(axis=ax)
# axsize = ax.axis()
# ax.vlines(fbest.hs,axsize[2]+1,axsize[3])
# ax.set(yscale='log')
fbest.labels.title = 'N = {:d}'.format(n)
fbest.plot(axis=ax)
ax.plot(x, fun1(x), 'r')
ax.legend(frameon=False, markerscale=4)
# ax = plt.gca()
ax.set_yticklabels(ax.get_yticks() * 100.0)
ax.grid(True)
fig.tile(range(0, k))
plt.ioff()
plt.show('hold')
if __name__ == '__main__':
# kde_demo5()
# check_bkregression()
kreg_demo1(hs=0.04, fast=True)
plt.show('hold')

@ -21,18 +21,11 @@ from numpy import sqrt, atleast_2d, meshgrid
from numpy.fft import fftn, ifftn from numpy.fft import fftn, ifftn
from wafo.misc import nextpow2 from wafo.misc import nextpow2
from wafo.containers import PlotData from wafo.containers import PlotData
from wafo.plotbackend import plotbackend as plt
from wafo.testing import test_docstrings from wafo.testing import test_docstrings
from wafo.kdetools.kernels import iqrange, qlevels, Kernel from wafo.kdetools.kernels import iqrange, qlevels, Kernel
from wafo.kdetools.gridding import gridcount from wafo.kdetools.gridding import gridcount
try: __all__ = ['TKDE', 'KDE', 'test_docstrings', 'KRegression', 'BKRegression']
from wafo import fig
except ImportError:
warnings.warn('fig import only supported on Windows')
__all__ = ['TKDE', 'KDE', 'kde_demo1', 'kde_demo2', 'test_docstrings',
'KRegression', 'BKRegression']
_TINY = np.finfo(float).machar.tiny _TINY = np.finfo(float).machar.tiny
# _REALMIN = np.finfo(float).machar.xmin # _REALMIN = np.finfo(float).machar.xmin
@ -413,32 +406,11 @@ class TKDE(_KDE):
for i, v2 in enumerate(L2.tolist()): for i, v2 in enumerate(L2.tolist()):
factor = v2 * np.sign(v2) if v2 else 1 factor = v2 * np.sign(v2) if v2 else 1
pdf *= np.where(v2 == 1, 1, points[i] ** (v2 - 1) * factor) pdf *= np.where(v2 == 1, 1, points[i] ** (v2 - 1) * factor)
if (np.abs(np.diff(pdf)).max() > 10).any():
msg = ''' Numerical problems may have occured due to the power
transformation. Check the KDE for spurious spikes'''
warnings.warn(msg)
return pdf
def eval_grid_fast2(self, *args, **kwds):
"""Evaluate the estimated pdf on a grid.
Parameters
----------
arg_0,arg_1,... arg_d-1 : vectors
Alternatively, if no vectors is passed in then
arg_i = gauss2dat(linspace(dat2gauss(self.xmin[i]),
dat2gauss(self.xmax[i]), self.inc))
output : string optional
'value' if value output
'data' if object output
Returns
-------
values : array-like
The values evaluated at meshgrid(*args).
""" _assert_warn((np.abs(np.diff(pdf)).max() < 10).all(), '''
return self.eval_grid_fun(self._eval_grid_fast, *args, **kwds) Numerical problems may have occured due to the power transformation.
Check the KDE for spurious spikes''')
return pdf
def _interpolate(self, points, f, *args, **kwds): def _interpolate(self, points, f, *args, **kwds):
ipoints = meshgrid(*args) # if self.d > 1 else args ipoints = meshgrid(*args) # if self.d > 1 else args
@ -1179,284 +1151,5 @@ class BKRegression(object):
return prb_best return prb_best
def kde_demo1():
"""KDEDEMO1 Demonstrate the smoothing parameter impact on KDE.
KDEDEMO1 shows the true density (dotted) compared to KDE based on 7
observations (solid) and their individual kernels (dashed) for 3
different values of the smoothing parameter, hs.
"""
st = scipy.stats
x = np.linspace(-4, 4, 101)
x0 = x / 2.0
data = np.random.normal(loc=0, scale=1.0, size=7)
kernel = Kernel('gauss')
hs = kernel.hns(data)
hVec = [hs / 2, hs, 2 * hs]
for ix, h in enumerate(hVec):
plt.figure(ix)
kde = KDE(data, hs=h, kernel=kernel)
f2 = kde(x, output='plot', title='h_s = {0:2.2f}'.format(float(h)),
ylab='Density')
f2.plot('k-')
plt.plot(x, st.norm.pdf(x, 0, 1), 'k:')
n = len(data)
plt.plot(data, np.zeros(data.shape), 'bx')
y = kernel(x0) / (n * h * kernel.norm_factor(d=1, n=n))
for i in range(n):
plt.plot(data[i] + x0 * h, y, 'b--')
plt.plot([data[i], data[i]], [0, np.max(y)], 'b')
plt.axis([min(x), max(x), 0, 0.5])
def kde_demo2():
'''Demonstrate the difference between transformation- and ordinary-KDE.
KDEDEMO2 shows that the transformation KDE is a better estimate for
Rayleigh distributed data around 0 than the ordinary KDE.
'''
st = scipy.stats
data = st.rayleigh.rvs(scale=1, size=300)
x = np.linspace(1.5e-2, 5, 55)
kde = KDE(data)
f = kde(output='plot', title='Ordinary KDE (hs={0:})'.format(kde.hs))
plt.figure(0)
f.plot()
plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')
# plotnorm((data).^(L2)) # gives a straight line => L2 = 0.5 reasonable
hs = Kernel('gauss').get_smoothing(data**0.5)
tkde = TKDE(data, hs=hs, L2=0.5)
ft = tkde(x, output='plot',
title='Transformation KDE (hs={0:})'.format(tkde.tkde.hs))
plt.figure(1)
ft.plot()
plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')
plt.figure(0)
def kde_demo3():
'''Demonstrate the difference between transformation and ordinary-KDE in 2D
KDEDEMO3 shows that the transformation KDE is a better estimate for
Rayleigh distributed data around 0 than the ordinary KDE.
'''
st = scipy.stats
data = st.rayleigh.rvs(scale=1, size=(2, 300))
# x = np.linspace(1.5e-3, 5, 55)
kde = KDE(data)
f = kde(output='plot', title='Ordinary KDE', plotflag=1)
plt.figure(0)
f.plot()
plt.plot(data[0], data[1], '.')
# plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable
hs = Kernel('gauss').get_smoothing(data**0.5)
tkde = TKDE(data, hs=hs, L2=0.5)
ft = tkde.eval_grid_fast(
output='plot', title='Transformation KDE', plotflag=1)
plt.figure(1)
ft.plot()
plt.plot(data[0], data[1], '.')
plt.figure(0)
def kde_demo4(N=50):
'''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
for 1D multimodal distributions
KDEDEMO4 shows that the improved Sheather-Jones plug-in smoothing is a
better compared to normal reference rules (in this case the hns)
'''
st = scipy.stats
data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(N,)),
st.norm.rvs(loc=-5, scale=1, size=(N,))))
# x = np.linspace(1.5e-3, 5, 55)
kde = KDE(data, kernel=Kernel('gauss', 'hns'))
f = kde(output='plot', title='Ordinary KDE', plotflag=1)
kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
f1 = kde1(output='plot', label='Ordinary KDE', plotflag=1)
plt.figure(0)
f.plot('r', label='hns={0}'.format(kde.hs))
# plt.figure(2)
f1.plot('b', label='hisj={0}'.format(kde1.hs))
x = np.linspace(-9, 9)
plt.plot(x, (st.norm.pdf(x, loc=-5, scale=1) +
st.norm.pdf(x, loc=5, scale=1)) / 2, 'k:',
label='True density')
plt.legend()
def kde_demo5(N=500):
'''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
for 2D multimodal distributions
KDEDEMO5 shows that the improved Sheather-Jones plug-in smoothing is better
compared to normal reference rules (in this case the hns)
'''
st = scipy.stats
data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(2, N,)),
st.norm.rvs(loc=-5, scale=1, size=(2, N,))))
kde = KDE(data, kernel=Kernel('gauss', 'hns'))
f = kde(output='plot', plotflag=1,
title='Ordinary KDE, hns={0:s}'.format(str(list(kde.hs))))
kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
f1 = kde1(output='plot', plotflag=1,
title='Ordinary KDE, hisj={0:s}'.format(str(list(kde1.hs))))
plt.figure(0)
plt.clf()
f.plot()
plt.plot(data[0], data[1], '.')
plt.figure(1)
plt.clf()
f1.plot()
plt.plot(data[0], data[1], '.')
def kreg_demo1(hs=None, fast=False, fun='hisj'):
""""""
N = 100
# ei = np.random.normal(loc=0, scale=0.075, size=(N,))
ei = np.array([
-0.08508516, 0.10462496, 0.07694448, -0.03080661, 0.05777525,
0.06096313, -0.16572389, 0.01838912, -0.06251845, -0.09186784,
-0.04304887, -0.13365788, -0.0185279, -0.07289167, 0.02319097,
0.06887854, -0.08938374, -0.15181813, 0.03307712, 0.08523183,
-0.0378058, -0.06312874, 0.01485772, 0.06307944, -0.0632959,
0.18963205, 0.0369126, -0.01485447, 0.04037722, 0.0085057,
-0.06912903, 0.02073998, 0.1174351, 0.17599277, -0.06842139,
0.12587608, 0.07698113, -0.0032394, -0.12045792, -0.03132877,
0.05047314, 0.02013453, 0.04080741, 0.00158392, 0.10237899,
-0.09069682, 0.09242174, -0.15445323, 0.09190278, 0.07138498,
0.03002497, 0.02495252, 0.01286942, 0.06449978, 0.03031802,
0.11754861, -0.02322272, 0.00455867, -0.02132251, 0.09119446,
-0.03210086, -0.06509545, 0.07306443, 0.04330647, 0.078111,
-0.04146907, 0.05705476, 0.02492201, -0.03200572, -0.02859788,
-0.05893749, 0.00089538, 0.0432551, 0.04001474, 0.04888828,
-0.17708392, 0.16478644, 0.1171006, 0.11664846, 0.01410477,
-0.12458953, -0.11692081, 0.0413047, -0.09292439, -0.07042327,
0.14119701, -0.05114335, 0.04994696, -0.09520663, 0.04829406,
-0.01603065, -0.1933216, 0.19352763, 0.11819496, 0.04567619,
-0.08348306, 0.00812816, -0.00908206, 0.14528945, 0.02901065])
x = np.linspace(0, 1, N)
va_1 = 0.3 ** 2
va_2 = 0.7 ** 2
y0 = np.exp(-x ** 2 / (2 * va_1)) + 1.3*np.exp(-(x - 1) ** 2 / (2 * va_2))
y = y0 + ei
kernel = Kernel('gauss', fun=fun)
hopt = kernel.hisj(x)
kreg = KRegression(
x, y, p=0, hs=hs, kernel=kernel, xmin=-2 * hopt, xmax=1 + 2 * hopt)
if fast:
kreg.__call__ = kreg.eval_grid_fast
f = kreg(x, output='plot', title='Kernel regression', plotflag=1)
plt.figure(0)
f.plot(label='p=0')
kreg.p = 1
f1 = kreg(x, output='plot', title='Kernel regression', plotflag=1)
f1.plot(label='p=1')
# print(f1.data)
plt.plot(x, y, '.', label='data')
plt.plot(x, y0, 'k', label='True model')
from statsmodels.nonparametric.kernel_regression import KernelReg
kreg2 = KernelReg(y, x, ('c'))
y2 = kreg2.fit(x)
plt.plot(x, y2[0], 'm', label='statsmodel')
plt.legend()
plt.show()
print(kreg.tkde.tkde._inv_hs)
print(kreg.tkde.tkde.hs)
def _get_data(n=100, symmetric=False, loc1=1.1, scale1=0.6, scale2=1.0):
st = scipy.stats
dist = st.norm
norm1 = scale2 * (dist.pdf(-loc1, loc=-loc1, scale=scale1) +
dist.pdf(-loc1, loc=loc1, scale=scale1))
def fun1(x):
return ((dist.pdf(x, loc=-loc1, scale=scale1) +
dist.pdf(x, loc=loc1, scale=scale1)) / norm1).clip(max=1.0)
x = np.sort(6 * np.random.rand(n, 1) - 3, axis=0)
y = (fun1(x) > np.random.rand(n, 1)).ravel()
# y = (np.cos(x)>2*np.random.rand(n, 1)-1).ravel()
x = x.ravel()
if symmetric:
xi = np.hstack((x.ravel(), -x.ravel()))
yi = np.hstack((y, y))
i = np.argsort(xi)
x = xi[i]
y = yi[i]
return x, y, fun1
def check_bkregression():
plt.ion()
k = 0
for _i, n in enumerate([50, 100, 300, 600]):
x, y, fun1 = _get_data(n, symmetric=True, loc1=0.1,
scale1=0.6, scale2=0.75)
bkreg = BKRegression(x, y, a=0.05, b=0.05)
fbest = bkreg.prb_search_best(
hsfun='hste', alpha=0.05, color='g', label='Transit_D')
figk = plt.figure(k)
ax = figk.gca()
k += 1
# fbest.score.plot(axis=ax)
# axsize = ax.axis()
# ax.vlines(fbest.hs,axsize[2]+1,axsize[3])
# ax.set(yscale='log')
fbest.labels.title = 'N = {:d}'.format(n)
fbest.plot(axis=ax)
ax.plot(x, fun1(x), 'r')
ax.legend(frameon=False, markerscale=4)
# ax = plt.gca()
ax.set_yticklabels(ax.get_yticks() * 100.0)
ax.grid(True)
fig.tile(range(0, k))
plt.ioff()
plt.show('hold')
if __name__ == '__main__': if __name__ == '__main__':
if False: test_docstrings(__file__)
test_docstrings(__file__)
else:
# kde_demo5()
# check_bkregression()
kreg_demo1(hs=0.04, fast=True)
plt.show('hold')

@ -483,7 +483,9 @@ class Kernel(object):
'rectangular' - Rectangular kernel. 'rectangular' - Rectangular kernel.
'laplace' - Laplace kernel. 'laplace' - Laplace kernel.
'logistic' - Logistic kernel. 'logistic' - Logistic kernel.
Note that only the first 4 letters of the kernel name is needed. Note that only the first 4 letters of the kernel name is needed.
fun : string
defining smoothing function/bandwidth.
Examples Examples
-------- --------

@ -26,6 +26,12 @@ class TestKde(unittest.TestCase):
0.72433808, 1.92973094, 0.44749838, 1.36508452]) 0.72433808, 1.92973094, 0.44749838, 1.36508452])
self.x = np.linspace(0, max(self.data) + 1, 10) self.x = np.linspace(0, max(self.data) + 1, 10)
def test_default_bandwidth_and_inc(self):
kde0 = wk.KDE(self.data, hs=-1, alpha=0.0, inc=None)
print(kde0.hs.tolist(), kde0.inc)
assert_allclose(kde0.hs, 0.19682759537327105)
assert_allclose(kde0.inc, 64)
def test0_KDE1D(self): def test0_KDE1D(self):
data, x = self.data, self.x data, x = self.data, self.x
@ -36,6 +42,11 @@ class TestKde(unittest.TestCase):
0.52219649, 0.3906213, 0.26381501, 0.16407362, 0.52219649, 0.3906213, 0.26381501, 0.16407362,
0.08270612, 0.02991145, 0.00720821]) 0.08270612, 0.02991145, 0.00720821])
fx = kde0.eval_points(x)
assert_allclose(fx, [0.2039735, 0.40252503, 0.54595078,
0.52219649, 0.3906213, 0.26381501, 0.16407362,
0.08270612, 0.02991145, 0.00720821])
fx = kde0.eval_grid(x, r=1) fx = kde0.eval_grid(x, r=1)
assert_allclose(-fx, [0.11911419724002906, 0.13440000694772541, assert_allclose(-fx, [0.11911419724002906, 0.13440000694772541,
0.044400116190638696, -0.0677695267531197, 0.044400116190638696, -0.0677695267531197,
@ -85,17 +96,19 @@ class TestKde(unittest.TestCase):
x = np.linspace(0.01, max(data) + 1, 10) x = np.linspace(0.01, max(data) + 1, 10)
kde = wk.TKDE(data, hs=0.5, L2=0.5) kde = wk.TKDE(data, hs=0.5, L2=0.5)
f = kde(x) f = kde(x)
assert_allclose(f, [1.03982714, 0.45839018, 0.39514782, 0.32860602,
0.26433318, 0.20717946, 0.15907684, 0.1201074,
0.08941027, 0.06574882])
f = kde.eval_points(x)
assert_allclose(f, [1.03982714, 0.45839018, 0.39514782, 0.32860602,
0.26433318, 0.20717946, 0.15907684, 0.1201074,
0.08941027, 0.06574882])
f = kde.eval_grid(x)
assert_allclose(f, [1.03982714, 0.45839018, 0.39514782, 0.32860602, assert_allclose(f, [1.03982714, 0.45839018, 0.39514782, 0.32860602,
0.26433318, 0.20717946, 0.15907684, 0.1201074, 0.26433318, 0.20717946, 0.15907684, 0.1201074,
0.08941027, 0.06574882]) 0.08941027, 0.06574882])
assert_allclose(np.trapz(f, x), 0.94787730659349068) assert_allclose(np.trapz(f, x), 0.94787730659349068)
f = kde.eval_grid_fast(x) f = kde.eval_grid_fast(x)
assert_allclose(f, [1.0401892415290148, 0.45838973393693677,
0.39514689240671547, 0.32860531818532457,
0.2643330110605783, 0.20717975528556506,
0.15907696844388747, 0.12010770443337843,
0.08941129458260941, 0.06574899139165799])
f = kde.eval_grid_fast2(x)
assert_allclose(f, [1.0401892415290148, 0.45838973393693677, assert_allclose(f, [1.0401892415290148, 0.45838973393693677,
0.39514689240671547, 0.32860531818532457, 0.39514689240671547, 0.32860531818532457,
0.2643330110605783, 0.20717975528556506, 0.2643330110605783, 0.20717975528556506,
@ -170,17 +183,28 @@ class TestKde(unittest.TestCase):
x = np.linspace(0, max(np.ravel(data)) + 1, 3) x = np.linspace(0, max(np.ravel(data)) + 1, 3)
kde0 = wk.KDE(data, hs=0.5, alpha=0.0, inc=512) kde0 = wk.KDE(data, hs=0.5, alpha=0.0, inc=512)
assert_allclose(kde0.eval_grid(x, x), assert_allclose(kde0.eval_grid(x, x),
[[3.27260963e-02, 4.21654678e-02, 5.85338634e-04], [[3.27260963e-02, 4.21654678e-02, 5.85338634e-04],
[6.78845466e-02, 1.42195839e-01, 1.41676003e-03], [6.78845466e-02, 1.42195839e-01, 1.41676003e-03],
[1.39466746e-04, 4.26983850e-03, 2.52736185e-05]]) [1.39466746e-04, 4.26983850e-03, 2.52736185e-05]])
f0 = kde0.eval_grid_fast(x, x, output='plot')
t = [[0.0443506097653615, 0.06433530873456418, 0.0041353838654317856], t = [[0.0443506097653615, 0.06433530873456418, 0.0041353838654317856],
[0.07218297149063724, 0.1235819591878892, 0.009288890372002473], [0.07218297149063724, 0.1235819591878892, 0.009288890372002473],
[0.001613328022214066, 0.00794857884864038, 0.0005874786787715641] [0.001613328022214066, 0.00794857884864038, 0.0005874786787715641]
] ]
assert_allclose(kde0.eval_grid_fast(x, x), t) assert_allclose(f0.data, t)
def test_2d_default_bandwidth(self):
# N = 20
# data = np.random.rayleigh(1, size=(2, N))
data = DATA2D
kde0 = wk.KDE(data, kernel=wk.Kernel('epan', 'hmns'), inc=512)
assert_allclose(kde0.hs, [[0.8838122391117693, 0.08341940479019105],
[0.08341940479019104, 0.7678179747855731]])
self.assertRaises(ValueError, kde0.eval_points, [1, 2, 3])
assert_allclose(kde0.eval_points([1, 2]), 0.11329600006973661)
class TestRegression(unittest.TestCase): class TestRegression(unittest.TestCase):
@ -313,9 +337,8 @@ class TestRegression(unittest.TestCase):
bkreg = wk.BKRegression(x, y, a=0.05, b=0.05) bkreg = wk.BKRegression(x, y, a=0.05, b=0.05)
fbest = bkreg.prb_search_best(hsfun='hste', alpha=0.05, color='g') fbest = bkreg.prb_search_best(hsfun='hste', alpha=0.05, color='g')
print(fbest.data[::10].tolist()) # print(fbest.data[::10].tolist())
assert_allclose(fbest.data[::10], assert_allclose(fbest.data[::10],
[1.80899736e-15, 0, 6.48351162e-16, 6.61404311e-15, [1.80899736e-15, 0, 6.48351162e-16, 6.61404311e-15,
1.10010120e-12, 1.36709203e-10, 1.11994766e-08, 1.10010120e-12, 1.36709203e-10, 1.11994766e-08,
5.73040143e-07, 1.68974054e-05, 2.68633448e-04, 5.73040143e-07, 1.68974054e-05, 2.68633448e-04,
@ -328,6 +351,27 @@ class TestRegression(unittest.TestCase):
2.68633448e-04, 1.68974054e-05, 5.73040143e-07, 2.68633448e-04, 1.68974054e-05, 5.73040143e-07,
1.11994760e-08, 1.36708818e-10, 1.09965904e-12, 1.11994760e-08, 1.36708818e-10, 1.09965904e-12,
5.43806309e-15, 0.0, 0, 0], atol=1e-10) 5.43806309e-15, 0.0, 0, 0], atol=1e-10)
bkreg = wk.BKRegression(x, y, method='wilson')
fbest = bkreg.prb_search_best(hsfun='hste', alpha=0.05, color='g')
assert_allclose(fbest.data[::10],
[3.2321397702105376e-15, 4.745626420805898e-17,
6.406118940191104e-16, 5.648884668051452e-16,
3.499875381296387e-16, 1.0090442883241678e-13,
4.264723863193633e-11, 9.29288388831705e-09,
9.610074789043923e-07, 4.086642453634508e-05,
0.0008305202502773989, 0.00909121197102206,
0.05490768364395013, 0.1876637145781381,
0.4483015169104682, 0.8666709816557657,
0.9916656713022183, 0.9996648903706271,
0.999990921956741, 0.9999909219567404,
0.999664890370625, 0.9916656713022127,
0.8666709816557588, 0.4483015169104501,
0.18766371457812697, 0.054907683643947366,
0.009091211971022042, 0.0008305202502770367,
4.086642453593762e-05, 9.610074786590158e-07,
9.292883469982049e-09, 4.264660017463372e-11,
1.005284921271869e-13, -0.0, -0.0, -0.0, -0.0, -0.0],
atol=1e-10)
if __name__ == "__main__": if __name__ == "__main__":
# import sys;sys.argv = ['', 'Test.testName'] # import sys;sys.argv = ['', 'Test.testName']

@ -3,10 +3,10 @@ from scipy.stats._distn_infrastructure import * # @UnusedWildImport
from scipy.stats._distn_infrastructure import (_skew, # @UnusedImport from scipy.stats._distn_infrastructure import (_skew, # @UnusedImport
_kurtosis, _ncx2_log_pdf, # @IgnorePep8 @UnusedImport _kurtosis, _ncx2_log_pdf, # @IgnorePep8 @UnusedImport
_ncx2_pdf, _ncx2_cdf) # @UnusedImport @IgnorePep8 _ncx2_pdf, _ncx2_cdf) # @UnusedImport @IgnorePep8
from .estimation import FitDistribution from .estimation import FitDistribution, rv_frozen # @Reimport
from ._constants import _XMAX, _XMIN from ._constants import _XMAX, _XMIN
from wafo.misc import lazyselect as _lazyselect from wafo.misc import lazyselect as _lazyselect # @UnusedImport
from wafo.misc import lazywhere as _lazywhere from wafo.misc import lazywhere as _lazywhere # @UnusedImport
_doc_default_example = """\ _doc_default_example = """\
Examples Examples
@ -70,137 +70,6 @@ Accurate confidence interval with profile loglikelihood
""" """
# Frozen RV class
class rv_frozen(object):
''' Frozen continous or discrete 1D Random Variable object (RV)
Methods
-------
rvs(size=1)
Random variates.
pdf(x)
Probability density function.
cdf(x)
Cumulative density function.
sf(x)
Survival function (1-cdf --- sometimes more accurate).
ppf(q)
Percent point function (inverse of cdf --- percentiles).
isf(q)
Inverse survival function (inverse of sf).
stats(moments='mv')
Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
moment(n)
n-th order non-central moment of distribution.
entropy()
(Differential) entropy of the RV.
interval(alpha)
Confidence interval with equal areas around the median.
expect(func, lb, ub, conditional=False)
Calculate expected value of a function with respect to the
distribution.
'''
def __init__(self, dist, *args, **kwds):
# create a new instance
self.dist = dist # .__class__(**dist._ctor_param)
shapes, loc, scale = self.dist._parse_args(*args, **kwds)
if isinstance(dist, rv_continuous):
self.par = shapes + (loc, scale)
else: # rv_discrete
self.par = shapes + (loc,)
self.a = self.dist.a
self.b = self.dist.b
self.shapes = self.dist.shapes
# @property
# def shapes(self):
# return self.dist.shapes
@property
def random_state(self):
return self.dist._random_state
@random_state.setter
def random_state(self, seed):
self.dist._random_state = check_random_state(seed)
def pdf(self, x):
''' Probability density function at x of the given RV.'''
return self.dist.pdf(x, *self.par)
def logpdf(self, x):
return self.dist.logpdf(x, *self.par)
def cdf(self, x):
'''Cumulative distribution function at x of the given RV.'''
return self.dist.cdf(x, *self.par)
def logcdf(self, x):
return self.dist.logcdf(x, *self.par)
def ppf(self, q):
'''Percent point function (inverse of cdf) at q of the given RV.'''
return self.dist.ppf(q, *self.par)
def isf(self, q):
'''Inverse survival function at q of the given RV.'''
return self.dist.isf(q, *self.par)
def rvs(self, size=None, random_state=None):
kwds = {'size': size, 'random_state': random_state}
return self.dist.rvs(*self.par, **kwds)
def sf(self, x):
'''Survival function (1-cdf) at x of the given RV.'''
return self.dist.sf(x, *self.par)
def logsf(self, x):
return self.dist.logsf(x, *self.par)
def stats(self, moments='mv'):
''' Some statistics of the given RV'''
kwds = dict(moments=moments)
return self.dist.stats(*self.par, **kwds)
def median(self):
return self.dist.median(*self.par)
def mean(self):
return self.dist.mean(*self.par)
def var(self):
return self.dist.var(*self.par)
def std(self):
return self.dist.std(*self.par)
def moment(self, n):
return self.dist.moment(n, *self.par)
def entropy(self):
return self.dist.entropy(*self.par)
def pmf(self, k):
'''Probability mass function at k of the given RV'''
return self.dist.pmf(k, *self.par)
def logpmf(self, k):
return self.dist.logpmf(k, *self.par)
def interval(self, alpha):
return self.dist.interval(alpha, *self.par)
def expect(self, func=None, lb=None, ub=None, conditional=False, **kwds):
if isinstance(self.dist, rv_continuous):
a, loc, scale = self.par[:-2], self.par[:-2], self.par[-1]
return self.dist.expect(func, a, loc, scale, lb, ub, conditional,
**kwds)
a, loc = self.par[:-1], self.par[-1]
if kwds:
raise ValueError("Discrete expect does not accept **kwds.")
return self.dist.expect(func, a, loc, lb, ub, conditional)
def freeze(self, *args, **kwds): def freeze(self, *args, **kwds):
"""Freeze the distribution for the given arguments. """Freeze the distribution for the given arguments.
@ -219,41 +88,6 @@ def freeze(self, *args, **kwds):
return rv_frozen(self, *args, **kwds) return rv_frozen(self, *args, **kwds)
# def link(self, x, logSF, theta, i):
# '''
# Return theta[i] as function of quantile, survival probability and
# theta[j] for j!=i.
#
# Parameters
# ----------
# x : quantile
# logSF : logarithm of the survival probability
# theta : list
# all distribution parameters including location and scale.
#
# Returns
# -------
# theta[i] : real scalar
# fixed distribution parameter theta[i] as function of x, logSF and
# theta[j] where j != i.
#
# LINK is a function connecting the fixed distribution parameter theta[i]
# with the quantile (x) and the survival probability (SF) and the
# remaining free distribution parameters theta[j] for j!=i, i.e.:
# theta[i] = link(x, logSF, theta, i),
# where logSF = log(Prob(X>x; theta)).
#
# See also
# estimation.Profile
# '''
# return self._link(x, logSF, theta, i)
#
#
# def _link(self, x, logSF, theta, i):
# msg = 'Link function not implemented for the {} distribution'
# raise NotImplementedError(msg.format(self.name))
def _resolve_ties(self, log_dprb, x, args, scale): def _resolve_ties(self, log_dprb, x, args, scale):
dx = np.diff(x, axis=0) dx = np.diff(x, axis=0)
tie = dx == 0 tie = dx == 0
@ -293,7 +127,6 @@ def _nlogps_and_penalty(self, x, scale, args):
if n_bad > 0: if n_bad > 0:
penalty = 100.0 * np.log(_XMAX) * n_bad penalty = 100.0 * np.log(_XMAX) * n_bad
return -np.sum(log_dprb[finite_log_dprb], axis=0) + penalty return -np.sum(log_dprb[finite_log_dprb], axis=0) + penalty
return -np.sum(log_dprb, axis=0) return -np.sum(log_dprb, axis=0)
@ -353,7 +186,7 @@ def _nnlf_and_penalty(self, x, args):
return -np.sum(logpdf, axis=0) return -np.sum(logpdf, axis=0)
def _penalized_nnlf(self, theta, x, penalty=None): def _penalized_nnlf(self, theta, x):
''' Return negative loglikelihood function, ''' Return negative loglikelihood function,
i.e., - sum (log pdf(x, theta), axis=0) i.e., - sum (log pdf(x, theta), axis=0)
where theta are the parameters (including loc and scale) where theta are the parameters (including loc and scale)
@ -622,8 +455,7 @@ def _open_support_mask(self, x):
rv_generic.freeze = freeze rv_generic.freeze = freeze
rv_discrete.freeze = freeze rv_discrete.freeze = freeze
rv_continuous.freeze = freeze rv_continuous.freeze = freeze
#rv_continuous.link = link
#rv_continuous._link = _link
rv_continuous._penalized_nlogps = _penalized_nlogps rv_continuous._penalized_nlogps = _penalized_nlogps
rv_continuous._penalized_nnlf = _penalized_nnlf rv_continuous._penalized_nnlf = _penalized_nnlf
rv_continuous._reduce_func = _reduce_func rv_continuous._reduce_func = _reduce_func

@ -9,11 +9,11 @@ Author: Per A. Brodtkorb 2008
from __future__ import division, absolute_import from __future__ import division, absolute_import
import warnings import warnings
from scipy.stats import rv_continuous
from scipy.stats._distn_infrastructure import check_random_state
from wafo.plotbackend import plotbackend as plt from wafo.plotbackend import plotbackend as plt
from wafo.misc import ecross, findcross from wafo.misc import ecross, findcross
from wafo.stats._constants import _EPS from wafo.stats._constants import _EPS
from wafo.stats._distn_infrastructure import rv_frozen, rv_continuous
from scipy._lib.six import string_types from scipy._lib.six import string_types
import numdifftools as nd # @UnresolvedImport import numdifftools as nd # @UnresolvedImport
from scipy import special from scipy import special
@ -36,6 +36,26 @@ arr = asarray
# all = alltrue # @ReservedAssignment # all = alltrue # @ReservedAssignment
def _assert_warn(cond, msg):
if not cond:
warnings.warn(msg)
def _assert(cond, msg):
if not cond:
raise ValueError(msg)
def _assert_index(cond, msg):
if not cond:
raise IndexError(msg)
def _assert_not_implemented(cond, msg):
if not cond:
raise NotImplementedError(msg)
def _burr_link(x, logsf, phat, ix): def _burr_link(x, logsf, phat, ix):
c, d, loc, scale = phat c, d, loc, scale = phat
logp = log(-expm1(logsf)) logp = log(-expm1(logsf))
@ -90,43 +110,43 @@ def _genpareto_link(x, logsf, phat, ix):
# Stuart Coles (2004) # Stuart Coles (2004)
# "An introduction to statistical modelling of extreme values". # "An introduction to statistical modelling of extreme values".
# Springer series in statistics # Springer series in statistics
_assert_not_implemented(ix != 0, 'link(x,logsf,phat,i) where i=0 is '
'not implemented!')
c, loc, scale = phat c, loc, scale = phat
if c == 0:
return _expon_link(x, logsf, phat[1:], ix-1)
if ix == 2: if ix == 2:
# Reorganizing w.r.t.scale, Eq. 4.13 and 4.14, pp 81 in # Reorganizing w.r.t.scale, Eq. 4.13 and 4.14, pp 81 in
# Coles (2004) gives # Coles (2004) gives
# link = -(x-loc)*c/expm1(-c*logsf) # link = -(x-loc)*c/expm1(-c*logsf)
if c != 0.0: return (x - loc) * c / expm1(-c * logsf)
phati = (x - loc) * c / expm1(-c * logsf) if ix == 1:
else: return x + scale * expm1(c * logsf) / c
phati = -(x - loc) / logsf raise IndexError('Index to the fixed parameter is out of bounds')
elif ix == 1:
if c != 0:
phati = x + scale * expm1(c * logsf) / c def _gumbel_r_link(x, logsf, phat, ix):
else: loc, scale = phat
phati = x + scale * logsf loglogP = log(-log(-expm1(logsf)))
elif ix == 0: if ix == 1:
raise NotImplementedError( return -(x - loc) / loglogP
'link(x,logsf,phat,i) where i=0 is not implemented!') if ix == 1:
else: return x + scale * loglogP
raise IndexError('Index to the fixed parameter is out of bounds') raise IndexError('Index to the fixed parameter is out of bounds')
return phati
def _genextreme_link(x, logsf, phat, ix): def _genextreme_link(x, logsf, phat, ix):
_assert_not_implemented(ix != 0, 'link(x,logsf,phat,i) where i=0 is not '
'implemented!')
c, loc, scale = phat c, loc, scale = phat
if c == 0:
return _gumbel_r_link(x, logsf, phat[1:], ix-1)
loglogP = log(-log(-expm1(logsf))) loglogP = log(-log(-expm1(logsf)))
if ix == 2: if ix == 2:
# link = -(x-loc)*c/expm1(c*log(-logP)) # link = -(x-loc)*c/expm1(c*log(-logP))
if c != 0.0: return -(x - loc) * c / expm1(c * loglogP)
return -(x - loc) * c / expm1(c * loglogP)
return -(x - loc) / loglogP
if ix == 1: if ix == 1:
if c != 0: return x + scale * expm1(c * loglogP) / c
return x + scale * expm1(c * loglogP) / c
return x + scale * loglogP
if ix == 0:
raise NotImplementedError(
'link(x,logsf,phat,i) where i=0 is not implemented!')
raise IndexError('Index to the fixed parameter is out of bounds') raise IndexError('Index to the fixed parameter is out of bounds')
@ -168,6 +188,7 @@ LINKS = dict(expon=_expon_link,
frechet_r=_weibull_min_link, frechet_r=_weibull_min_link,
genpareto=_genpareto_link, genpareto=_genpareto_link,
genexpon=_genexpon_link, genexpon=_genexpon_link,
gumbel_r=_gumbel_r_link,
rayleigh=_rayleigh_link, rayleigh=_rayleigh_link,
trunclayleigh=_trunclayleigh_link, trunclayleigh=_trunclayleigh_link,
genextreme=_genextreme_link, genextreme=_genextreme_link,
@ -259,7 +280,7 @@ class Profile(object):
self._set_indexes(fit_dist, i) self._set_indexes(fit_dist, i)
method = fit_dist.method.lower() method = fit_dist.method.lower()
self._set_plot_labels(fit_dist, method) self._set_plot_labels(method)
Lmax = self._loglike_max(fit_dist, method) Lmax = self._loglike_max(fit_dist, method)
self.Lmax = Lmax self.Lmax = Lmax
@ -268,13 +289,16 @@ class Profile(object):
self._set_profile() self._set_profile()
def _set_plot_labels(self, fit_dist, method): def _set_plot_labels(self, method, title='', xlabel=''):
if not title:
title = '{:s} params'.format(self.fit_dist.dist.name)
if not xlabel:
xlabel = 'phat[{}]'.format(np.ravel(self.i_fixed)[0])
percent = 100 * (1.0 - self.alpha) percent = 100 * (1.0 - self.alpha)
self.title = '{:g}% CI for {:s} params'.format(percent, self.title = '{:g}% CI for {:s}'.format(percent, title)
fit_dist.dist.name)
like_txt = 'likelihood' if method == 'ml' else 'product spacing' like_txt = 'likelihood' if method == 'ml' else 'product spacing'
self.ylabel = 'Profile log' + like_txt self.ylabel = 'Profile log' + like_txt
self.xlabel = 'phat[{}]'.format(np.ravel(self.i_fixed)[0]) self.xlabel = xlabel
@staticmethod @staticmethod
def _loglike_max(fit_dist, method): def _loglike_max(fit_dist, method):
@ -400,7 +424,17 @@ class Profile(object):
warnings.warn(str(err)) warnings.warn(str(err))
def _get_variance(self): def _get_variance(self):
return self.fit_dist.par_cov[self.i_fixed, :][:, self.i_fixed] invfun = getattr(self, '_myinvfun', None)
if invfun is not None:
i_notfixed = self.i_notfixed
pcov = self.fit_dist.par_cov[i_notfixed, :][:, i_notfixed]
gradfun = nd.Gradient(invfun)
phatv = self._par
drl = gradfun(phatv[i_notfixed])
pvar = np.sum(np.dot(drl, pcov) * drl)
return pvar
pvar = self.fit_dist.par_cov[self.i_fixed, :][:, self.i_fixed]
return pvar
def _approx_p_min_max(self, p_opt): def _approx_p_min_max(self, p_opt):
pvar = self._get_variance() pvar = self._get_variance()
@ -414,9 +448,9 @@ class Profile(object):
p_low, p_up = self._approx_p_min_max(p_opt) p_low, p_up = self._approx_p_min_max(p_opt)
pmin, pmax = self.pmin, self.pmax pmin, pmax = self.pmin, self.pmax
if pmin is None: if pmin is None:
pmin = self._search_pmin(phatfree0, p_low, p_opt) pmin = self._search_p_min_max(phatfree0, p_low, p_opt, 'min')
if pmax is None: if pmax is None:
pmax = self._search_pmax(phatfree0, p_up, p_opt) pmax = self._search_p_min_max(phatfree0, p_up, p_opt, 'max')
return pmin, pmax return pmin, pmax
def _adaptive_pvec(self, p_opt, pmin, pmax): def _adaptive_pvec(self, p_opt, pmin, pmax):
@ -438,58 +472,43 @@ class Profile(object):
return self._adaptive_pvec(p_opt, pmin, pmax) return self._adaptive_pvec(p_opt, pmin, pmax)
return np.linspace(self.pmin, self.pmax, self.n) return np.linspace(self.pmin, self.pmax, self.n)
def _search_pmin(self, phatfree0, p_min0, p_opt): def _update_p_opt(self, p_minmax_opt, dp, Lmax, p_minmax, j):
phatfree = phatfree0.copy() # print((dp, p_minmax, p_minmax_opt, Lmax))
converged = False
dp = np.maximum((p_opt - p_min0)/40, 0.01)*10 if np.isnan(Lmax):
Lmax, phatfree = self._profile_optimum(phatfree, p_opt) dp *= 0.33
p_min_opt = p_min0 elif Lmax < self.alpha_cross_level - self.alpha_Lrange * 5 * (j + 1):
for j in range(51): p_minmax_opt = p_minmax
p_min = p_opt - dp dp *= 0.33
Lmax, phatfree = self._profile_optimum(phatfree, p_min) elif Lmax < self.alpha_cross_level:
# print((dp, p_min, p_min_opt, Lmax)) p_minmax_opt = p_minmax
if np.isnan(Lmax): converged = True
dp *= 0.33
elif Lmax < self.alpha_cross_level - self.alpha_Lrange*5*(j+1):
p_min_opt = p_min
dp *= 0.33
elif Lmax < self.alpha_cross_level:
p_min_opt = p_min
break
else:
dp *= 1.67
else: else:
msg = 'Exceeded max iterations. (p_min0={}, p_min={}, p={})' dp *= 1.67
warnings.warn(msg.format(p_min0, p_min_opt, p_opt)) return p_minmax_opt, dp, converged
# print('search_pmin iterations={}'.format(j))
return p_min_opt
def _search_pmax(self, phatfree0, p_max0, p_opt): def _search_p_min_max(self, phatfree0, p_minmax0, p_opt, direction):
phatfree = phatfree0.copy() phatfree = phatfree0.copy()
sign = dict(min=-1, max=1)[direction]
dp = (p_max0 - p_opt)/40 dp = np.maximum(sign*(p_minmax0 - p_opt) / 40, 0.01) * 10
if dp < 1e-2:
dp = 0.1
Lmax, phatfree = self._profile_optimum(phatfree, p_opt) Lmax, phatfree = self._profile_optimum(phatfree, p_opt)
p_max_opt = p_opt p_minmax_opt = p_minmax0
for j in range(51): j = 0
p_max = p_opt + dp converged = False
Lmax, phatfree = self._profile_optimum(phatfree, p_max) # for j in range(51):
if np.isnan(Lmax): while j < 51 and not converged:
dp *= 0.33 j += 1
elif Lmax < self.alpha_cross_level - self.alpha_Lrange*5*(j+1): p_minmax = p_opt + sign * dp
p_max_opt = p_max Lmax, phatfree = self._profile_optimum(phatfree, p_minmax)
dp *= 0.33 p_minmax_opt, dp, converged = self._update_p_opt(p_minmax_opt, dp,
elif Lmax < self.alpha_cross_level: Lmax, p_minmax, j)
p_max_opt = p_max _assert_warn(j < 50, 'Exceeded max iterations. '
break '(p_{0}0={1}, p_{0}={2}, p={3})'.format(direction,
else: p_minmax0,
dp *= 1.67 p_minmax_opt,
else: p_opt))
msg = 'Exceeded max iterations. (p={}, p_max={}, p_max0 = {})' # print('search_pmin iterations={}'.format(j))
warnings.warn(msg.format(p_opt, p_max_opt, p_max0)) return p_minmax_opt
# print('search_pmax iterations={}'.format(j))
return p_max_opt
def _profile_fun(self, free_par, fix_par): def _profile_fun(self, free_par, fix_par):
''' Return negative of loglike or logps function ''' Return negative of loglike or logps function
@ -504,37 +523,40 @@ class Profile(object):
par[self.i_fixed] = self._local_link(fix_par, par) par[self.i_fixed] = self._local_link(fix_par, par)
return self.fit_dist.fitfun(par) return self.fit_dist.fitfun(par)
def get_bounds(self, alpha=0.05): def _check_bounds(self, cross_level, ind, n):
'''Return confidence interval for profiled parameter
'''
if alpha < self.alpha:
msg = 'Might not be able to return bounds with alpha less than {}'
warnings.warn(msg.format(self.alpha))
cross_level = self.Lmax - 0.5 * chi2isf(alpha, 1)
ind = findcross(self.data, cross_level)
n = len(ind)
if n == 0: if n == 0:
warnings.warn('''Number of crossings is zero, i.e., warnings.warn('Number of crossings is zero, i.e., upper and lower '
upper and lower bound is not found!''') 'bound is not found!')
bounds = (self.pmin, self.pmax) bounds = self.pmin, self.pmax
elif n == 1: elif n == 1:
x0 = ecross(self.args, self.data, ind, cross_level) x0 = ecross(self.args, self.data, ind, cross_level)
is_upcrossing = self.data[ind] < self.data[ind + 1] is_upcrossing = self.data[ind] < self.data[ind + 1]
if is_upcrossing: if is_upcrossing:
bounds = (x0, self.pmax) bounds = x0, self.pmax
warnings.warn('Upper bound is larger') warnings.warn('Upper bound is larger')
else: else:
bounds = (self.pmin, x0) bounds = self.pmin, x0
warnings.warn('Lower bound is smaller') warnings.warn('Lower bound is smaller')
elif n == 2:
bounds = ecross(self.args, self.data, ind, cross_level)
else: else:
warnings.warn('Number of crossings too large! Something is wrong!') warnings.warn('Number of crossings too large! Something is wrong!')
bounds = ecross(self.args, self.data, ind[[0, -1]], cross_level) bounds = ecross(self.args, self.data, ind[[0, -1]], cross_level)
return bounds return bounds
def get_bounds(self, alpha=0.05):
'''Return confidence interval for profiled parameter
'''
_assert_warn(self.alpha <= alpha, 'Might not be able to return bounds '
'with alpha less than {}'.format(self.alpha))
cross_level = self.Lmax - 0.5 * chi2isf(alpha, 1)
ind = findcross(self.data, cross_level)
n = len(ind)
if n == 2:
bounds = ecross(self.args, self.data, ind, cross_level)
else:
bounds = self._check_bounds(cross_level, ind, n)
return bounds
def plot(self, axis=None): def plot(self, axis=None):
''' '''
Plot profile function for p_opt with 100(1-alpha)% confidence interval. Plot profile function for p_opt with 100(1-alpha)% confidence interval.
@ -561,8 +583,25 @@ class Profile(object):
def plot_all_profiles(phats, plot=None): def plot_all_profiles(phats, plot=None):
if plot is not None: def _remove_title_or_ylabel(plt, n, j):
plt = plot if j != 0:
plt.title('')
if j != n // 2:
plt.ylabel('')
def _profile(phats, k):
profile_phat_k = Profile(phats, i=k)
m = 0
while hasattr(profile_phat_k, 'best_par') and m < 7:
# iterate to find optimum phat!
phats.fit(*profile_phat_k.best_par)
profile_phat_k = Profile(phats, i=k)
m += 1
return profile_phat_k
if plot is None:
plot = plt
if phats.par_fix: if phats.par_fix:
indices = phats.i_notfixed indices = phats.i_notfixed
@ -571,20 +610,12 @@ def plot_all_profiles(phats, plot=None):
n = len(indices) n = len(indices)
for j, k in enumerate(indices): for j, k in enumerate(indices):
plt.subplot(n, 1, j+1) plt.subplot(n, 1, j+1)
profile_phat_k = Profile(phats, i=k) profile_phat_k = _profile(phats, k)
m = 0
while hasattr(profile_phat_k, 'best_par') and m < 7:
phats.fit(*profile_phat_k.best_par)
profile_phat_k = Profile(phats, i=k)
m += 1
profile_phat_k.plot() profile_phat_k.plot()
if j != 0: _remove_title_or_ylabel(plt, n, j)
plt.title('') plot.subplots_adjust(hspace=0.5)
if j != n//2:
plt.ylabel('')
plt.subplots_adjust(hspace=0.5)
par_txt = ('{:1.2g}, '*len(phats.par))[:-2].format(*phats.par) par_txt = ('{:1.2g}, '*len(phats.par))[:-2].format(*phats.par)
plt.suptitle('phat = [{}] (fit metod: {})'.format(par_txt, phats.method)) plot.suptitle('phat = [{}] (fit metod: {})'.format(par_txt, phats.method))
return phats return phats
@ -681,21 +712,9 @@ class ProfileQuantile(Profile):
prb = exp(self.log_sf) prb = exp(self.log_sf)
return self.fit_dist.dist.isf(prb, *mphat) return self.fit_dist.dist.isf(prb, *mphat)
def _get_variance(self): def _set_plot_labels(self, method, title='', xlabel='x'):
i_notfixed = self.i_notfixed title = '{:s} quantile'.format(self.fit_dist.dist.name)
phatv = self._par super(ProfileQuantile, self)._set_plot_labels(method, title, xlabel)
gradfun = nd.Gradient(self._myinvfun)
drl = gradfun(phatv[self.i_notfixed])
pcov = self.fit_dist.par_cov[i_notfixed, :][:, i_notfixed]
pvar = np.sum(np.dot(drl, pcov) * drl)
return pvar
def _set_plot_labels(self, fit_dist, method):
super(ProfileQuantile, self)._set_plot_labels(fit_dist, method)
percent = 100 * (1.0 - self.alpha)
self.title = '{:g}% CI for {:s} quantile'.format(percent,
fit_dist.dist.name)
self.xlabel = 'x'
class ProfileProbability(Profile): class ProfileProbability(Profile):
@ -783,27 +802,151 @@ class ProfileProbability(Profile):
fix_par = self.link(self.x, fixed_log_sf, par, self.i_fixed) fix_par = self.link(self.x, fixed_log_sf, par, self.i_fixed)
return fix_par return fix_par
def _myprbfun(self, phatnotfixed): def _myinvfun(self, phatnotfixed):
"""_myprbfun"""
mphat = self._par.copy() mphat = self._par.copy()
mphat[self.i_notfixed] = phatnotfixed mphat[self.i_notfixed] = phatnotfixed
logsf = self.fit_dist.dist.logsf(self.x, *mphat) logsf = self.fit_dist.dist.logsf(self.x, *mphat)
return np.where(np.isfinite(logsf), logsf, np.nan) return np.where(np.isfinite(logsf), logsf, np.nan)
def _get_variance(self): def _set_plot_labels(self, method, title='', xlabel=''):
i_notfixed = self.i_notfixed title = '{:s} probability'.format(self.fit_dist.dist.name)
phatv = self._par xlabel = 'log(sf)'
gradfun = nd.Gradient(self._myprbfun) super(ProfileProbability, self)._set_plot_labels(method, title, xlabel)
drl = gradfun(phatv[self.i_notfixed])
pcov = self.fit_dist.par_cov[i_notfixed, :][:, i_notfixed]
pvar = np.sum(np.dot(drl, pcov) * drl)
return pvar
def _set_plot_labels(self, fit_dist, method):
super(ProfileProbability, self)._set_plot_labels(fit_dist, method) # Frozen RV class
percent = 100 * (1.0 - self.alpha) class rv_frozen(object):
self.title = '{:g}% CI for {:s} probability'.format(percent, ''' Frozen continous or discrete 1D Random Variable object (RV)
fit_dist.dist.name)
self.xlabel = 'log(sf)' Methods
-------
rvs(size=1)
Random variates.
pdf(x)
Probability density function.
cdf(x)
Cumulative density function.
sf(x)
Survival function (1-cdf --- sometimes more accurate).
ppf(q)
Percent point function (inverse of cdf --- percentiles).
isf(q)
Inverse survival function (inverse of sf).
stats(moments='mv')
Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
moment(n)
n-th order non-central moment of distribution.
entropy()
(Differential) entropy of the RV.
interval(alpha)
Confidence interval with equal areas around the median.
expect(func, lb, ub, conditional=False)
Calculate expected value of a function with respect to the
distribution.
'''
def __init__(self, dist, *args, **kwds):
# create a new instance
self.dist = dist # .__class__(**dist._ctor_param)
shapes, loc, scale = self.dist._parse_args(*args, **kwds)
if isinstance(dist, rv_continuous):
self.par = shapes + (loc, scale)
else: # rv_discrete
self.par = shapes + (loc,)
# a, b may be set in _argcheck, depending on *args, **kwds. Ouch.
self.dist._argcheck(*shapes)
self.a = self.dist.a
self.b = self.dist.b
self.shapes = self.dist.shapes
# @property
# def shapes(self):
# return self.dist.shapes
@property
def random_state(self):
return self.dist._random_state
@random_state.setter
def random_state(self, seed):
self.dist._random_state = check_random_state(seed)
def pdf(self, x):
''' Probability density function at x of the given RV.'''
return self.dist.pdf(x, *self.par)
def logpdf(self, x):
return self.dist.logpdf(x, *self.par)
def cdf(self, x):
'''Cumulative distribution function at x of the given RV.'''
return self.dist.cdf(x, *self.par)
def logcdf(self, x):
return self.dist.logcdf(x, *self.par)
def ppf(self, q):
'''Percent point function (inverse of cdf) at q of the given RV.'''
return self.dist.ppf(q, *self.par)
def isf(self, q):
'''Inverse survival function at q of the given RV.'''
return self.dist.isf(q, *self.par)
def rvs(self, size=None, random_state=None):
kwds = {'size': size, 'random_state': random_state}
return self.dist.rvs(*self.par, **kwds)
def sf(self, x):
'''Survival function (1-cdf) at x of the given RV.'''
return self.dist.sf(x, *self.par)
def logsf(self, x):
return self.dist.logsf(x, *self.par)
def stats(self, moments='mv'):
''' Some statistics of the given RV'''
kwds = dict(moments=moments)
return self.dist.stats(*self.par, **kwds)
def median(self):
return self.dist.median(*self.par)
def mean(self):
return self.dist.mean(*self.par)
def var(self):
return self.dist.var(*self.par)
def std(self):
return self.dist.std(*self.par)
def moment(self, n):
return self.dist.moment(n, *self.par)
def entropy(self):
return self.dist.entropy(*self.par)
def pmf(self, k):
'''Probability mass function at k of the given RV'''
return self.dist.pmf(k, *self.par)
def logpmf(self, k):
return self.dist.logpmf(k, *self.par)
def interval(self, alpha):
return self.dist.interval(alpha, *self.par)
def expect(self, func=None, lb=None, ub=None, conditional=False, **kwds):
if isinstance(self.dist, rv_continuous):
a, loc, scale = self.par[:-2], self.par[:-2], self.par[-1]
return self.dist.expect(func, a, loc, scale, lb, ub, conditional,
**kwds)
a, loc = self.par[:-1], self.par[-1]
if kwds:
raise ValueError("Discrete expect does not accept **kwds.")
return self.dist.expect(func, a, loc, lb, ub, conditional)
class FitDistribution(rv_frozen): class FitDistribution(rv_frozen):
@ -1015,7 +1158,7 @@ class FitDistribution(rv_frozen):
t.append('%s = %s\n' % (par, str(getattr(self, par)))) t.append('%s = %s\n' % (par, str(getattr(self, par))))
return ''.join(t) return ''.join(t)
def _reduce_func(self, args, kwds): def _convert_fshapes2fnum(self, kwds):
# First of all, convert fshapes params to fnum: eg for stats.beta, # First of all, convert fshapes params to fnum: eg for stats.beta,
# shapes='a, b'. To fix `a`, can specify either `f1` or `fa`. # shapes='a, b'. To fix `a`, can specify either `f1` or `fa`.
# Convert the latter into the former. # Convert the latter into the former.
@ -1030,10 +1173,13 @@ class FitDistribution(rv_frozen):
raise ValueError("Duplicate entry for %s." % key) raise ValueError("Duplicate entry for %s." % key)
else: else:
kwds[key] = val kwds[key] = val
return kwds
def _unpack_args_kwds(self, args, kwds):
kwds = self._convert_fshapes2fnum(kwds)
args = list(args) args = list(args)
nargs = len(args)
fixedn = [] fixedn = []
names = ['f%d' % n for n in range(nargs - 2)] + ['floc', 'fscale'] names = ['f%d' % n for n in range(len(args) - 2)] + ['floc', 'fscale']
x0 = [] x0 = []
for n, key in enumerate(names): for n, key in enumerate(names):
if key in kwds: if key in kwds:
@ -1041,7 +1187,12 @@ class FitDistribution(rv_frozen):
args[n] = kwds.pop(key) args[n] = kwds.pop(key)
else: else:
x0.append(args[n]) x0.append(args[n])
return x0, args, fixedn
def _reduce_func(self, args, kwds):
x0, args, fixedn = self._unpack_args_kwds(args, kwds)
nargs = len(args)
fitfun = self._fitfun fitfun = self._fitfun
if len(fixedn) == 0: if len(fixedn) == 0:
@ -1055,7 +1206,7 @@ class FitDistribution(rv_frozen):
def restore(args, theta): def restore(args, theta):
# Replace with theta for all numbers not in fixedn # Replace with theta for all numbers not in fixedn
# This allows the non-fixed values to vary, but # This allows the non-fixed values to vary, but
# we still call self.nnlf with all parameters. # we still call self.nnlf with all parameters.
i = 0 i = 0
for n in range(nargs): for n in range(nargs):
if n not in fixedn: if n not in fixedn:
@ -1219,7 +1370,7 @@ class FitDistribution(rv_frozen):
H = np.asmatrix(self._hessian(self._fitfun, self.par, self.data)) H = np.asmatrix(self._hessian(self._fitfun, self.par, self.data))
# H = -nd.Hessian(lambda par: self._fitfun(par, self.data), # H = -nd.Hessian(lambda par: self._fitfun(par, self.data),
# method='forward')(self.par) # method='forward')(self.par)
self.H = H self.H = H
try: try:
if somefixed: if somefixed:
@ -1322,6 +1473,29 @@ class FitDistribution(rv_frozen):
ci.append((np.nan, np.nan)) ci.append((np.nan, np.nan))
return np.array(ci) return np.array(ci)
def _fit_summary_text(self):
fixstr = ''
if self.par_fix is not None:
numfix = len(self.i_fixed)
if numfix > 0:
format0 = ', '.join(['%d'] * numfix)
format1 = ', '.join(['%g'] * numfix)
phatistr = format0 % tuple(self.i_fixed)
phatvstr = format1 % tuple(self.par[self.i_fixed])
fixstr = 'Fixed: phat[{0:s}] = {1:s} '.format(phatistr,
phatvstr)
subtxt = ('Fit method: {0:s}, Fit p-value: {1:2.2f} {2:s}, ' +
'phat=[{3:s}], {4:s}')
par_txt = ('{:1.2g}, ' * len(self.par))[:-2].format(*self.par)
try:
LL_txt = 'Lps_max={:2.2g}, Ll_max={:2.2g}'.format(self.LPSmax,
self.LLmax)
except Exception:
LL_txt = 'Lps_max={}, Ll_max={}'.format(self.LPSmax, self.LLmax)
txt = subtxt.format(self.method.upper(), self.pvalue, fixstr, par_txt,
LL_txt)
return txt
def plotfitsummary(self, axes=None, fig=None): def plotfitsummary(self, axes=None, fig=None):
''' Plot various diagnostic plots to asses the quality of the fit. ''' Plot various diagnostic plots to asses the quality of the fit.
@ -1345,29 +1519,9 @@ class FitDistribution(rv_frozen):
if fig is None: if fig is None:
fig = plt.gcf() fig = plt.gcf()
fixstr = ''
if self.par_fix is not None:
numfix = len(self.i_fixed)
if numfix > 0:
format0 = ', '.join(['%d'] * numfix)
format1 = ', '.join(['%g'] * numfix)
phatistr = format0 % tuple(self.i_fixed)
phatvstr = format1 % tuple(self.par[self.i_fixed])
fixstr = 'Fixed: phat[{0:s}] = {1:s} '.format(phatistr,
phatvstr)
subtxt = ('Fit method: {0:s}, Fit p-value: {1:2.2f} {2:s}, ' +
'phat=[{3:s}], {4:s}')
par_txt = ('{:1.2g}, '*len(self.par))[:-2].format(*self.par)
try:
LL_txt = 'Lps_max={:2.2g}, Ll_max={:2.2g}'.format(self.LPSmax,
self.LLmax)
except Exception:
LL_txt = 'Lps_max={}, Ll_max={}'.format(self.LPSmax, self.LLmax)
try: try:
fig.text(0.05, 0.01, subtxt.format(self.method.upper(), txt = self._fit_summary_text()
self.pvalue, fixstr, par_txt, fig.text(0.05, 0.01, txt)
LL_txt))
except AttributeError: except AttributeError:
pass pass
@ -1560,8 +1714,8 @@ def test1():
phat = FitDistribution(dist, R, floc=0.5, method='ml') phat = FitDistribution(dist, R, floc=0.5, method='ml')
phats = FitDistribution(dist, R, floc=0.5, method='mps') phats = FitDistribution(dist, R, floc=0.5, method='mps')
# import matplotlib.pyplot as plt # import matplotlib.pyplot as plt
# plt.figure(0) plt.figure(0)
# plot_all_profiles(phat, plot=plt) plot_all_profiles(phat, plot=plt)
plt.figure(1) plt.figure(1)
phats.plotfitsummary() phats.plotfitsummary()
@ -1596,5 +1750,5 @@ def test1():
if __name__ == '__main__': if __name__ == '__main__':
test1() # test1()
# test_doctstrings() test_doctstrings()

Loading…
Cancel
Save