Added savitzky_golay savitzky_golay_piecewise sgolay2d

Added evar
Added some work in progress
master
Per.Andreas.Brodtkorb 13 years ago
parent 65101025d7
commit dd0c86e59b

@ -11,6 +11,7 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import division from __future__ import division
import numpy as np import numpy as np
import scipy.signal
import scipy.sparse as sp import scipy.sparse as sp
import scipy.sparse.linalg #@UnusedImport import scipy.sparse.linalg #@UnusedImport
from numpy.ma.core import ones, zeros, prod, sin from numpy.ma.core import ones, zeros, prod, sin
@ -19,7 +20,234 @@ from numpy.lib.shape_base import vstack
from numpy.lib.function_base import linspace from numpy.lib.function_base import linspace
import polynomial as pl import polynomial as pl
__all__ = ['PPform', 'SmoothSpline', 'stineman_interp'] __all__ = ['PPform', 'savitzky_golay', 'savitzky_golay_piecewise', 'sgolay2d','SmoothSpline', 'stineman_interp']
def savitzky_golay(y, window_size, order, deriv=0):
r"""Smooth (and optionally differentiate) data with a Savitzky-Golay filter.
The Savitzky-Golay filter removes high frequency noise from data.
It has the advantage of preserving the original shape and
features of the signal better than other types of filtering
approaches, such as moving averages techhniques.
Parameters
----------
y : array_like, shape (N,)
the values of the time history of the signal.
window_size : int
the length of the window. Must be an odd integer number.
order : int
the order of the polynomial used in the filtering.
Must be less then `window_size` - 1.
deriv: int
the order of the derivative to compute (default = 0 means only smoothing)
Returns
-------
ys : ndarray, shape (N)
the smoothed signal (or it's n-th derivative).
Notes
-----
The Savitzky-Golay is a type of low-pass filter, particularly
suited for smoothing noisy data. The main idea behind this
approach is to make for each point a least-square fit with a
polynomial of high order over a odd-sized window centered at
the point.
Examples
--------
>>> t = np.linspace(-4, 4, 500)
>>> y = np.exp( -t**2 ) + np.random.normal(0, 0.05, t.shape)
>>> ysg = savitzky_golay(y, window_size=31, order=4)
>>> import matplotlib.pyplot as plt
>>> plt.plot(t, y, label='Noisy signal')
>>> plt.plot(t, np.exp(-t**2), 'k', lw=1.5, label='Original signal')
>>> plt.plot(t, ysg, 'r', label='Filtered signal')
>>> plt.legend()
>>> plt.show()
References
----------
.. [1] A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of
Data by Simplified Least Squares Procedures. Analytical
Chemistry, 1964, 36 (8), pp 1627-1639.
.. [2] Numerical Recipes 3rd Edition: The Art of Scientific Computing
W.H. Press, S.A. Teukolsky, W.T. Vetterling, B.P. Flannery
Cambridge University Press ISBN-13: 9780521880688
"""
try:
window_size = np.abs(np.int(window_size))
order = np.abs(np.int(order))
except ValueError, msg:
raise ValueError("window_size and order have to be of type int")
if window_size % 2 != 1 or window_size < 1:
raise TypeError("window_size size must be a positive odd number")
if window_size < order + 2:
raise TypeError("window_size is too small for the polynomials order")
order_range = range(order+1)
half_window = (window_size -1) // 2
# precompute coefficients
b = np.mat([[k**i for i in order_range] for k in range(-half_window, half_window+1)])
m = np.linalg.pinv(b).A[deriv]
# pad the signal at the extremes with
# values taken from the signal itself
firstvals = y[0] - np.abs( y[1:half_window+1][::-1] - y[0] )
lastvals = y[-1] + np.abs(y[-half_window-1:-1][::-1] - y[-1])
y = np.concatenate((firstvals, y, lastvals))
return np.convolve( m, y, mode='valid')
def savitzky_golay_piecewise(xvals, data, kernel=11, order =4):
'''
One of the most popular applications of S-G filter, apart from smoothing UV-VIS
and IR spectra, is smoothing of curves obtained in electroanalytical experiments.
In cyclic voltammetry, voltage (being the abcissa) changes like a triangle wave.
And in the signal there are cusps at the turning points (at switching potentials)
which should never be smoothed. In this case, Savitzky-Golay smoothing should be
done piecewise, ie. separately on pieces monotonic in x
Example
-------
>>> n = 1e3
>>> x = np.linspace(0, 25, n)
>>> y = np.round(sin(x))
>>> sig2 = linspace(0,0.5,50)
# As an example, this figure shows the effect of an additive noise with a variance
# of 0.2 (original signal (black), noisy signal (red) and filtered signal (blue dots)).
>>> yn = y + sqrt(0.2)*np.random.randn(*x.shape)
>>> yr = savitzky_golay_piecewise(x, yn, kernel=11, order=4)
>>> plt.plot(x, yn, 'r', x, y, 'k', x, yr, 'b.')
'''
turnpoint=0
last=len(xvals)
if xvals[1]>xvals[0] : #x is increasing?
for i in range(1,last) : #yes
if xvals[i]<xvals[i-1] : #search where x starts to fall
turnpoint=i
break
else: #no, x is decreasing
for i in range(1,last) : #search where it starts to rise
if xvals[i]>xvals[i-1] :
turnpoint=i
break
if turnpoint==0 : #no change in direction of x
return savitzky_golay(data, kernel, order)
else:
#smooth the first piece
firstpart=savitzky_golay(data[0:turnpoint],kernel,order)
#recursively smooth the rest
rest=savitzky_golay_piecewise(xvals[turnpoint:], data[turnpoint:], kernel, order)
return np.concatenate((firstpart,rest))
def sgolay2d ( z, window_size, order, derivative=None):
"""
Savitsky - Golay filters can also be used to smooth two dimensional data affected
by noise. The algorithm is exactly the same as for the one dimensional case, only
the math is a bit more tricky. The basic algorithm is as follow:
for each point of the two dimensional matrix extract a sub - matrix, centered at
that point and with a size equal to an odd number "window_size".
for this sub - matrix compute a least - square fit of a polynomial surface, defined as
p(x, y) = a0 + a1 * x + a2 * y + a3 * x2 + a4 * y2 + a5 * x * y + ... .
Note that x and y are equal to zero at the central point.
replace the initial central point with the value computed with the fit.
Note that because the fit coefficients are linear with respect to the data spacing, they can pre - computed for efficiency. Moreover, it is important to appropriately pad the borders of the data, with a mirror image of the data itself, so that the evaluation of the fit at the borders of the data can happen smoothly.
Here is the code for two dimensional filtering.
Example
-------
# create some sample twoD data
>>> x = np.linspace(-3,3,100)
>>> y = np.linspace(-3,3,100)
>>> X, Y = np.meshgrid(x,y)
>>> Z = np.exp( -(X**2+Y**2))
# add noise
>>> Zn = Z + np.random.normal( 0, 0.2, Z.shape )
# filter it
>>> Zf = sgolay2d( Zn, window_size=29, order=4)
# do some plotting
>>> import matplotlib.pyplot as plt
>>> plt.matshow(Z)
>>> plt.matshow(Zn)
>>> plt.matshow(Zf)
"""
# number of terms in the polynomial expression
n_terms = (order + 1) * (order + 2) / 2.0
if window_size % 2 == 0:
raise ValueError('window_size must be odd')
if window_size ** 2 < n_terms:
raise ValueError('order is too high for the window size')
half_size = window_size // 2
# exponents of the polynomial.
# p(x,y) = a0 + a1*x + a2*y + a3*x^2 + a4*y^2 + a5*x*y + ...
# this line gives a list of two item tuple. Each tuple contains
# the exponents of the k-th term. First element of tuple is for x
# second element for y.
# Ex. exps = [(0,0), (1,0), (0,1), (2,0), (1,1), (0,2), ...]
exps = [ (k - n, n) for k in range(order + 1) for n in range(k + 1) ]
# coordinates of points
ind = np.arange(-half_size, half_size + 1, dtype=np.float64)
dx = np.repeat(ind, window_size)
dy = np.tile(ind, [window_size, 1]).reshape(window_size ** 2,)
# build matrix of system of equation
A = np.empty((window_size ** 2, len(exps)))
for i, exp in enumerate(exps):
A[:, i] = (dx ** exp[0]) * (dy ** exp[1])
# pad input array with appropriate values at the four borders
new_shape = z.shape[0] + 2 * half_size, z.shape[1] + 2 * half_size
Z = np.zeros((new_shape))
# top band
band = z[0, :]
Z[:half_size, half_size:-half_size] = band - np.abs(np.flipud(z[1:half_size + 1, :]) - band)
# bottom band
band = z[-1, :]
Z[-half_size:, half_size:-half_size] = band + np.abs(np.flipud(z[-half_size - 1:-1, :]) - band)
# left band
band = np.tile(z[:, 0].reshape(-1, 1), [1, half_size])
Z[half_size:-half_size, :half_size] = band - np.abs(np.fliplr(z[:, 1:half_size + 1]) - band)
# right band
band = np.tile(z[:, -1].reshape(-1, 1), [1, half_size])
Z[half_size:-half_size, -half_size:] = band + np.abs(np.fliplr(z[:, -half_size - 1:-1]) - band)
# central band
Z[half_size:-half_size, half_size:-half_size] = z
# top left corner
band = z[0, 0]
Z[:half_size, :half_size] = band - np.abs(np.flipud(np.fliplr(z[1:half_size + 1, 1:half_size + 1])) - band)
# bottom right corner
band = z[-1, -1]
Z[-half_size:, -half_size:] = band + np.abs(np.flipud(np.fliplr(z[-half_size - 1:-1, -half_size - 1:-1])) - band)
# top right corner
band = Z[half_size, -half_size:]
Z[:half_size, -half_size:] = band - np.abs(np.flipud(Z[half_size + 1:2 * half_size + 1, -half_size:]) - band)
# bottom left corner
band = Z[-half_size:, half_size].reshape(-1, 1)
Z[-half_size:, :half_size] = band - np.abs(np.fliplr(Z[-half_size:, half_size + 1:2 * half_size + 1]) - band)
# solve system and convolve
if derivative == None:
m = np.linalg.pinv(A)[0].reshape((window_size, -1))
return scipy.signal.fftconvolve(Z, m, mode='valid')
elif derivative == 'col':
c = np.linalg.pinv(A)[1].reshape((window_size, -1))
return scipy.signal.fftconvolve(Z, -c, mode='valid')
elif derivative == 'row':
r = np.linalg.pinv(A)[2].reshape((window_size, -1))
return scipy.signal.fftconvolve(Z, -r, mode='valid')
elif derivative == 'both':
c = np.linalg.pinv(A)[1].reshape((window_size, -1))
r = np.linalg.pinv(A)[2].reshape((window_size, -1))
return scipy.signal.fftconvolve(Z, -r, mode='valid'), scipy.signal.fftconvolve(Z, -c, mode='valid')
class PPform(object): class PPform(object):
"""The ppform of the piecewise polynomials is given in terms of coefficients """The ppform of the piecewise polynomials is given in terms of coefficients

@ -2780,6 +2780,480 @@ def gridcount(data, X, y=1):
c = c.transpose(*T) # make sure c is stored in the same way as meshgrid c = c.transpose(*T) # make sure c is stored in the same way as meshgrid
return c return c
def evar(y):
'''
Noise variance estimation.
Assuming that the deterministic function Y has additive Gaussian noise,
EVAR(Y) returns an estimated variance of this noise.
Note:
----
A thin-plate smoothing spline model is used to smooth Y. It is assumed
that the model whose generalized cross-validation score is minimum can
provide the variance of the additive noise. A few tests showed that
EVAR works very well with "not too irregular" functions.
Examples:
--------
1D signal
>>> n = 1e6;
>>> x = np.linspace(0,100,n);
>>> y = np.cos(x/10)+(x/50)
>>> var0 = 0.02 # noise variance
>>> yn = y + sqrt(var0)*np.random.randn(*y.shape)
>>> evar(yn) #estimated variance
2D function
>>> xp = np.linspace(0,1,50)
>>> x, y = np.meshgrid(xp,xp)
>>> f = np.exp(x+y) + np.sin((x-2*y)*3)
>>> var0 = 0.04 # noise variance
>>> fn = f + sqrt(var0)*np.random.randn(*f.shape)
>>> evar(fn) estimated variance
3D function
>>> yp = np.linspace(-2,2,50)
>>> [x,y,z] = meshgrid(yp,yp,yp, sparse=True)
>>> f = x*exp(-x**2-y**2-z**2)
>>> var0 = 0.5 # noise variance
>>> fn = f + sqrt(var0)*np.random.randn(*f.shape)
>>> evar(fn) estimated variance
Other example
-------------
http://www.biomecardio.com/matlab/evar.html
Note:
----
EVAR is only adapted to evenly-gridded 1-D to N-D data.
See also
--------
VAR, STD, SMOOTHN
'''
# Damien Garcia -- 2008/04, revised 2009/10
y = np.atleast_1d(y)
d = y.ndim
sh0 = y.shape
S = np.zeros(sh0)
sh1 = np.ones((d,))
cos = np.cos
pi = np.pi
for i in range(d):
ni = sh0[i]
sh1[i] = ni
t = np.arange(ni).reshape(sh1)/ni
S += cos(pi*t)
sh1[i] = 1
S2 = 2*(d-S).ravel()
# N-D Discrete Cosine Transform of Y
dcty2 = dctn(y).ravel()**2
def score_fun(L, S2, dcty2):
# Generalized cross validation score
M = 1-1./(1+10**L*S2)
noisevar = (dcty2*M**2).mean()
return noisevar/M.mean()**2
#fun = lambda x : score_fun(x, S2, dcty2)
Lopt = optimize.fminbound(score_fun, -38, 38, args=(S2, dcty2))
M = 1-1./(1+10**Lopt*S2)
noisevar = (dcty2*M**2).mean()
return noisevar
def unfinished_smoothn(data,s=None, weight=None, robust=False, z0=None, tolz=1e-3, maxiter=100):
'''
SMOOTHN Robust spline smoothing for 1-D to N-D data.
SMOOTHN provides a fast, automatized and robust discretized smoothing
spline for data of any dimension.
Z = SMOOTHN(Y) automatically smoothes the uniformly-sampled array Y. Y
can be any N-D noisy array (time series, images, 3D data,...). Non
finite data (NaN or Inf) are treated as missing values.
Z = SMOOTHN(Y,S) smoothes the array Y using the smoothing parameter S.
S must be a real positive scalar. The larger S is, the smoother the
output will be. If the smoothing parameter S is omitted (see previous
option) or empty (i.e. S = []), it is automatically determined using
the generalized cross-validation (GCV) method.
Z = SMOOTHN(Y,W) or Z = SMOOTHN(Y,W,S) specifies a weighting array W of
real positive values, that must have the same size as Y. Note that a
nil weight corresponds to a missing value.
Robust smoothing
----------------
Z = SMOOTHN(...,'robust') carries out a robust smoothing that minimizes
the influence of outlying data.
[Z,S] = SMOOTHN(...) also returns the calculated value for S so that
you can fine-tune the smoothing subsequently if needed.
An iteration process is used in the presence of weighted and/or missing
values. Z = SMOOTHN(...,OPTION_NAME,OPTION_VALUE) smoothes with the
termination parameters specified by OPTION_NAME and OPTION_VALUE. They
can contain the following criteria:
-----------------
TolZ: Termination tolerance on Z (default = 1e-3)
TolZ must be in ]0,1[
MaxIter: Maximum number of iterations allowed (default = 100)
Initial: Initial value for the iterative process (default =
original data)
-----------------
Syntax: [Z,...] = SMOOTHN(...,'MaxIter',500,'TolZ',1e-4,'Initial',Z0);
[Z,S,EXITFLAG] = SMOOTHN(...) returns a boolean value EXITFLAG that
describes the exit condition of SMOOTHN:
1 SMOOTHN converged.
0 Maximum number of iterations was reached.
Class Support
-------------
Input array can be numeric or logical. The returned array is of class
double.
Notes
-----
The N-D (inverse) discrete cosine transform functions <a
href="matlab:web('http://www.biomecardio.com/matlab/dctn.html')"
>DCTN</a> and <a
href="matlab:web('http://www.biomecardio.com/matlab/idctn.html')"
>IDCTN</a> are required.
To be made
----------
Estimate the confidence bands (see Wahba 1983, Nychka 1988).
Reference
---------
Garcia D, Robust smoothing of gridded data in one and higher dimensions
with missing values. Computational Statistics & Data Analysis, 2010.
<a
href="matlab:web('http://www.biomecardio.com/pageshtm/publi/csda10.pdf')">PDF download</a>
Examples:
--------
1-D example
x = linspace(0,100,2^8);
y = cos(x/10)+(x/50).^2 + randn(size(x))/10;
y([70 75 80]) = [5.5 5 6];
z = smoothn(y); Regular smoothing
zr = smoothn(y,'robust'); Robust smoothing
subplot(121), plot(x,y,'r.',x,z,'k','LineWidth',2)
axis square, title('Regular smoothing')
subplot(122), plot(x,y,'r.',x,zr,'k','LineWidth',2)
axis square, title('Robust smoothing')
2-D example
xp = 0:.02:1;
[x,y] = meshgrid(xp);
f = exp(x+y) + sin((x-2*y)*3);
fn = f + randn(size(f))*0.5;
fs = smoothn(fn);
subplot(121), surf(xp,xp,fn), zlim([0 8]), axis square
subplot(122), surf(xp,xp,fs), zlim([0 8]), axis square
2-D example with missing data
n = 256;
y0 = peaks(n);
y = y0 + rand(size(y0))*2;
I = randperm(n^2);
y(I(1:n^2*0.5)) = NaN; lose 1/2 of data
y(40:90,140:190) = NaN; create a hole
z = smoothn(y); smooth data
subplot(2,2,1:2), imagesc(y), axis equal off
title('Noisy corrupt data')
subplot(223), imagesc(z), axis equal off
title('Recovered data ...')
subplot(224), imagesc(y0), axis equal off
title('... compared with original data')
3-D example
[x,y,z] = meshgrid(-2:.2:2);
xslice = [-0.8,1]; yslice = 2; zslice = [-2,0];
vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06;
subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic')
title('Noisy data')
v = smoothn(vn);
subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic')
title('Smoothed data')
Cardioid
t = linspace(0,2*pi,1000);
x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1;
y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1;
z = smoothn(complex(x,y));
plot(x,y,'r.',real(z),imag(z),'k','linewidth',2)
axis equal tight
Cellular vortical flow
[x,y] = meshgrid(linspace(0,1,24));
Vx = cos(2*pi*x+pi/2).*cos(2*pi*y);
Vy = sin(2*pi*x+pi/2).*sin(2*pi*y);
Vx = Vx + sqrt(0.05)*randn(24,24); adding Gaussian noise
Vy = Vy + sqrt(0.05)*randn(24,24); adding Gaussian noise
I = randperm(numel(Vx));
Vx(I(1:30)) = (rand(30,1)-0.5)*5; adding outliers
Vy(I(1:30)) = (rand(30,1)-0.5)*5; adding outliers
Vx(I(31:60)) = NaN; missing values
Vy(I(31:60)) = NaN; missing values
Vs = smoothn(complex(Vx,Vy),'robust'); automatic smoothing
subplot(121), quiver(x,y,Vx,Vy,2.5), axis square
title('Noisy velocity field')
subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square
title('Smoothed velocity field')
See also SMOOTH, SMOOTH3, DCTN, IDCTN.
-- Damien Garcia -- 2009/03, revised 2010/11
Visit my <a
href="matlab:web('http://www.biomecardio.com/matlab/smoothn.html')">website</a> for more details about SMOOTHN
'''
y = np.atleast_1d(data)
sizy = y.shape
noe = y.size
if noe<2:
return data
weightstr = 'bisquare'
W = np.ones(sizy)
# Smoothness parameter and weights
if weight is None:
pass
elif isinstance(weight, str):
weightstr = W.lower()
# Weights. Zero weights are assigned to not finite values (Inf or NaN),
# (Inf/NaN values = missing data).
IsFinite = np.isfinite(y);
nof = sum(IsFinite) # number of finite elements
W = W*IsFinite
if any(W<0):
raise ValueError('Weights must all be >=0')
else:
W = W/W.max()
#% Weighted or missing data?
# isweighted = any(W(:)<1);
# %---
# % Robust smoothing?
# isrobust = any(strcmpi(varargin,'robust'));
# %---
# % Automatic smoothing?
# isauto = isempty(s);
# %---
# % DCTN and IDCTN are required
# test4DCTNandIDCTN
#
# %% Creation of the Lambda tensor
# %---
# % Lambda contains the eingenvalues of the difference matrix used in this
# % penalized least squares process.
# d = ndims(y);
# Lambda = zeros(sizy);
# for i = 1:d
# siz0 = ones(1,d);
# siz0(i) = sizy(i);
# Lambda = bsxfun(@plus,Lambda,...
# cos(pi*(reshape(1:sizy(i),siz0)-1)/sizy(i)));
# end
# Lambda = -2*(d-Lambda);
# if ~isauto, Gamma = 1./(1+s*Lambda.^2); end
#
# %% Upper and lower bound for the smoothness parameter
# % The average leverage (h) is by definition in [0 1]. Weak smoothing occurs
# % if h is close to 1, while over-smoothing appears when h is near 0. Upper
# % and lower bounds for h are given to avoid under- or over-smoothing. See
# % equation relating h to the smoothness parameter (Equation #12 in the
# % referenced CSDA paper).
# N = sum(sizy~=1); % tensor rank of the y-array
# hMin = 1e-6; hMax = 0.99;
# sMinBnd = (((1+sqrt(1+8*hMax.^(2/N)))/4./hMax.^(2/N)).^2-1)/16;
# sMaxBnd = (((1+sqrt(1+8*hMin.^(2/N)))/4./hMin.^(2/N)).^2-1)/16;
#
# %% Initialize before iterating
# %---
# Wtot = W;
# %--- Initial conditions for z
# if isweighted
# %--- With weighted/missing data
# % An initial guess is provided to ensure faster convergence. For that
# % purpose, a nearest neighbor interpolation followed by a coarse
# % smoothing are performed.
# %---
# if isinitial % an initial guess (z0) has been provided
# z = z0;
# else
# z = InitialGuess(y,IsFinite);
# end
# else
# z = zeros(sizy);
# end
# %---
# z0 = z;
# y(~IsFinite) = 0; % arbitrary values for missing y-data
# %---
# tol = 1;
# RobustIterativeProcess = true;
# RobustStep = 1;
# nit = 0;
# %--- Error on p. Smoothness parameter s = 10^p
# errp = 0.1;
# opt = optimset('TolX',errp);
# %--- Relaxation factor RF: to speedup convergence
# RF = 1 + 0.75*isweighted;
#
# %% Main iterative process
# %---
# while RobustIterativeProcess
# %--- "amount" of weights (see the function GCVscore)
# aow = sum(Wtot(:))/noe; % 0 < aow <= 1
# %---
# while tol>TolZ && nit<MaxIter
# nit = nit+1;
# DCTy = dctn(Wtot.*(y-z)+z);
# if isauto && ~rem(log2(nit),1)
# %---
# % The generalized cross-validation (GCV) method is used.
# % We seek the smoothing parameter s that minimizes the GCV
# % score i.e. s = Argmin(GCVscore).
# % Because this process is time-consuming, it is performed from
# % time to time (when nit is a power of 2)
# %---
# fminbnd(@gcv,log10(sMinBnd),log10(sMaxBnd),opt);
# end
# z = RF*idctn(Gamma.*DCTy) + (1-RF)*z;
#
# % if no weighted/missing data => tol=0 (no iteration)
# tol = isweighted*norm(z0(:)-z(:))/norm(z(:));
#
# z0 = z; % re-initialization
# end
# exitflag = nit<MaxIter;
#
# if isrobust %-- Robust Smoothing: iteratively re-weighted process
# %--- average leverage
# h = sqrt(1+16*s); h = sqrt(1+h)/sqrt(2)/h; h = h^N;
# %--- take robust weights into account
# Wtot = W.*RobustWeights(y-z,IsFinite,h,weightstr);
# %--- re-initialize for another iterative weighted process
# isweighted = true; tol = 1; nit = 0;
# %---
# RobustStep = RobustStep+1;
# RobustIterativeProcess = RobustStep<4; % 3 robust steps are enough.
# else
# RobustIterativeProcess = false; % stop the whole process
# end
# end
#
# %% Warning messages
# %---
# if isauto
# if abs(log10(s)-log10(sMinBnd))<errp
# warning('MATLAB:smoothn:SLowerBound',...
# ['s = ' num2str(s,'%.3e') ': the lower bound for s ',...
# 'has been reached. Put s as an input variable if required.'])
# elseif abs(log10(s)-log10(sMaxBnd))<errp
# warning('MATLAB:smoothn:SUpperBound',...
# ['s = ' num2str(s,'%.3e') ': the upper bound for s ',...
# 'has been reached. Put s as an input variable if required.'])
# end
# end
# if nargout<3 && ~exitflag
# warning('MATLAB:smoothn:MaxIter',...
# ['Maximum number of iterations (' int2str(MaxIter) ') has ',...
# 'been exceeded. Increase MaxIter option or decrease TolZ value.'])
# end
#
#
# %% GCV score
# %---
# function GCVscore = gcv(p)
# % Search the smoothing parameter s that minimizes the GCV score
# %---
# s = 10^p;
# Gamma = 1./(1+s*Lambda.^2);
# %--- RSS = Residual sum-of-squares
# if aow>0.9 % aow = 1 means that all of the data are equally weighted
# % very much faster: does not require any inverse DCT
# RSS = norm(DCTy(:).*(Gamma(:)-1))^2;
# else
# % take account of the weights to calculate RSS:
# yhat = idctn(Gamma.*DCTy);
# RSS = norm(sqrt(Wtot(IsFinite)).*(y(IsFinite)-yhat(IsFinite)))^2;
# end
# %---
# TrH = sum(Gamma(:));
# GCVscore = RSS/nof/(1-TrH/noe)^2;
# end
#
# end
#
# %% Robust weights
# function W = RobustWeights(r,I,h,wstr)
# % weights for robust smoothing.
# MAD = median(abs(r(I)-median(r(I)))); % median absolute deviation
# u = abs(r/(1.4826*MAD)/sqrt(1-h)); % studentized residuals
# if strcmp(wstr,'cauchy')
# c = 2.385; W = 1./(1+(u/c).^2); % Cauchy weights
# elseif strcmp(wstr,'talworth')
# c = 2.795; W = u<c; % Talworth weights
# else
# c = 4.685; W = (1-(u/c).^2).^2.*((u/c)<1); % bisquare weights
# end
# W(isnan(W)) = 0;
# end
#
# %% Test for DCTN and IDCTN
# function test4DCTNandIDCTN
# if ~exist('dctn','file')
# error('MATLAB:smoothn:MissingFunction',...
# ['DCTN and IDCTN are required. Download DCTN <a href="matlab:web(''',...
# 'http://www.biomecardio.com/matlab/dctn.html'')">here</a>.'])
# elseif ~exist('idctn','file')
# error('MATLAB:smoothn:MissingFunction',...
# ['DCTN and IDCTN are required. Download IDCTN <a href="matlab:web(''',...
# 'http://www.biomecardio.com/matlab/idctn.html'')">here</a>.'])
# end
# end
#
# %% Initial Guess with weighted/missing data
# function z = InitialGuess(y,I)
# %-- nearest neighbor interpolation (in case of missing values)
# if any(~I(:))
# if license('test','image_toolbox')
# [z,L] = bwdist(I);
# z = y;
# z(~I) = y(L(~I));
# else
# % If BWDIST does not exist, NaN values are all replaced with the
# % same scalar. The initial guess is not optimal and a warning
# % message thus appears.
# z = y;
# z(~I) = mean(y(I));
# warning('MATLAB:smoothn:InitialGuess',...
# ['BWDIST (Image Processing Toolbox) does not exist. ',...
# 'The initial guess may not be optimal; additional',...
# ' iterations can thus be required to ensure complete',...
# ' convergence. Increase ''MaxIter'' criterion if necessary.'])
# end
# else
# z = y;
# end
# %-- coarse fast smoothing using one-tenth of the DCT coefficients
# siz = size(z);
# z = dctn(z);
# for k = 1:ndims(z)
# z(ceil(siz(k)/10)+1:end,:) = 0;
# z = reshape(z,circshift(siz,[0 1-k]));
# z = shiftdim(z,1);
# end
# z = idctn(z);
# end
def kde_demo1(): def kde_demo1():
''' '''

@ -205,6 +205,193 @@ def polyder(p, m=1):
return poly1d(val) return poly1d(val)
return val return val
def unfinished_polydeg(x,y):
'''
Return optimal degree for polynomial fitting.
N = POLYDEG(X,Y) finds the optimal degree for polynomial fitting
according to the Akaike's information criterion.
Assuming that you want to find the degree N of a polynomial that fits
the data Y(X) best in a least-squares sense, the Akaike's information
criterion is defined by:
2*(N+1) + n*(log(2*pi*RSS/n)+1)
where n is the number of points and RSS is the residual sum of squares.
The optimal degree N is defined here as that which minimizes <a
href="matlab:web('http://en.wikipedia.org/wiki/Akaike_Information_Criterion')">AIC</a>.
Notes:
-----
If the number of data is small, POLYDEG may tend to return:
N = (number of points)-1.
ORTHOFIT is more appropriate than POLYFIT for polynomial fitting with
relatively high degrees.
Examples:
--------
load census
n = polydeg(cdate,pop)
x = linspace(0,10,300);
y = sin(x.^3/100).^2 + 0.05*randn(size(x));
n = polydeg(x,y)
ys = orthofit(x,y,n);
plot(x,y,'.',x,ys,'k')
Damien Garcia, 02/2008, revised 01/2010
See also POLYFIT, ORTHOFIT.
'''
x, y = np.atleast_1d(x, y)
N = len(x)
## Search the optimal degree minimizing the Akaike's information criterion
# ---
# y(x) are fitted in a least-squares sense using a polynomial of degree n
# developed in a series of orthogonal polynomials.
p = y.mean()
ys = np.ones((N,))*p
AIC = 2+N*(np.log(2*pi*((ys-y)**2).sum()/N)+1)+ 4/(N-2) #correction for small sample sizes
p = zeros((2,2))
p[1,0] = x.mean()
PL = np.ones((2,N))
PL[1] = x-p[1,0]
n = 1
nit = 0
# While-loop is stopped when a minimum is detected. 3 more steps are
# required to take AIC noise into account and to ensure that this minimum
# is a (likely) global minimum.
while nit<3:
if n>0:
p[0,n] = sum(x*PL[:,n]**2)/sum(PL[:,n]**2)
p[1,n] = sum(x*PL[:,n-1]*PL[:,n])/sum(PL[:,n-1]**2)
PL[:,n] = (x-p[0,n+1])*PL[:,n]-p[1,n+1]*PL[:,n-1]
#end
tmp = sum(y*PL)/sum(PL**2)
ys = sum(PL*tmp,axis=-1)
# -- Akaike's Information Criterion
aic = 2*(n+1)+N*(np.log(2*pi*sum((ys-y.ravel()**2)/N)+1)) + 2*(n+1)*(n+2)/(N-n-2)
if aic>=AIC:
nit += 1
else:
nit = 0
AIC = aic
n = n+1
if n>=N:
break
n = n-nit-1
return n
def unfinished_orthofit(x,y,n):
'''
ORTHOFIT Fit polynomial to data.
YS = ORTHOFIT(X,Y,N) smooths/fits data Y(X) in a least-squares sense
using a polynomial of degree N and returns the smoothed data YS.
[YS,YI] = ORTHOFIT(X,Y,N,XI) also returns the values YI of the fitting
polynomial at the points of a different XI array.
YI = ORTHOFIT(X,Y,N,XI) returns only the values YI of the fitting
polynomial at the points of the XI array.
[YS,P] = ORTHOFIT(X,Y,N) returns the polynomial coefficients P for use
with POLYVAL.
Notes:
-----
ORTHOFIT smooths/fits data using a polynomial of degree N developed in
a sequence of orthogonal polynomials. ORTHOFIT is more appropriate than
POLYFIT for polynomial fitting and smoothing since this method does not
involve any matrix linear system but a simple recursive procedure.
Degrees much higher than 30 could be used with orthogonal polynomials,
whereas badly conditioned matrices may appear with a classical
polynomial fitting of degree typically higher than 10.
To avoid using unnecessarily high degrees, you may let the function
POLYDEG choose it for you. POLYDEG finds an optimal polynomial degree
according to the Akaike's information criterion (available <a
href="matlab:web('http://www.biomecardio.com/matlab/polydeg.html')">here</a>).
Example:
-------
x = linspace(0,10,300);
y = sin(x.^3/100).^2 + 0.05*randn(size(x));
ys = orthofit(x,y,25);
plot(x,y,'.',x,ys,'k')
try POLYFIT for comparison...
Automatic degree determination with <a
href="matlab:web('http://www.biomecardio.com/matlab/polydeg.html')">POLYDEG</a>
n = polydeg(x,y);
ys = orthofit(x,y,n);
plot(x,y,'.',x,ys,'k')
Reference: Méthodes de calcul numérique 2. JP Nougier. Hermes Science
Publications, 2001. Section 4.7 pp 116-121
Damien Garcia, 09/2007, revised 01/2010
See also POLYDEG, POLYFIT, POLYVAL.
'''
x, y = np.atleast_1d(x,y)
# Particular case: n=0
if n==0:
p = y.mean()
ys = np.ones(y.shape)*p
return p, ys
# Reshape
x = x.ravel()
siz0 = y.shape
y = y.ravel()
# Coefficients of the orthogonal polynomials
p = np.zeros((3,n+1))
p[1,1] = x.mean()
N = len(x)
PL = np.ones((N,n+1))
PL[:,1] = x-p[1,1]
for i in range(2,n+1):
p[1,i] = sum(x*PL[:,i-1]**2)/sum(PL[:,i-1]**2)
p[2,i] = sum(x*PL[:,i-2]*PL[:,i-1])/sum(PL[:,i-2]**2)
PL[:,i] = (x-p[1,i])*PL[:,i-1]-p[2,i]*PL[:,i-2]
#end
p[0,:] = sum(PL*y)/sum(PL**2);
# ys = smoothed y
#ys = sum(PL*p(0,:) axis=1)
#ys.shape = siz0
# Coefficients of the polynomial in its final form
yi = np.zeros((n+1,n+1))
yi[0,n] = 1
yi[1,n-1:n+1] = 1 -p[1,1]
for i in range(2, n+1):
yi[i,:] = np.hstack((yi[i-1,1:], 0))-p[1,i]*yi[i-1,:]-p[2,i]*yi[i-2,:];
p = sum(p[0,:]*yi, axis=0)
return p
def polyreloc(p, x, y=0.0): def polyreloc(p, x, y=0.0):
""" """
Relocate polynomial Relocate polynomial

Loading…
Cancel
Save