diff --git a/pywafo/src/wafo/interpolate.py b/pywafo/src/wafo/interpolate.py index 60e3223..0cef674 100644 --- a/pywafo/src/wafo/interpolate.py +++ b/pywafo/src/wafo/interpolate.py @@ -11,6 +11,7 @@ #!/usr/bin/env python from __future__ import division import numpy as np +import scipy.signal import scipy.sparse as sp import scipy.sparse.linalg #@UnusedImport from numpy.ma.core import ones, zeros, prod, sin @@ -19,7 +20,234 @@ from numpy.lib.shape_base import vstack from numpy.lib.function_base import linspace import polynomial as pl -__all__ = ['PPform', 'SmoothSpline', 'stineman_interp'] +__all__ = ['PPform', 'savitzky_golay', 'savitzky_golay_piecewise', 'sgolay2d','SmoothSpline', 'stineman_interp'] + +def savitzky_golay(y, window_size, order, deriv=0): + r"""Smooth (and optionally differentiate) data with a Savitzky-Golay filter. + The Savitzky-Golay filter removes high frequency noise from data. + It has the advantage of preserving the original shape and + features of the signal better than other types of filtering + approaches, such as moving averages techhniques. + + Parameters + ---------- + y : array_like, shape (N,) + the values of the time history of the signal. + window_size : int + the length of the window. Must be an odd integer number. + order : int + the order of the polynomial used in the filtering. + Must be less then `window_size` - 1. + deriv: int + the order of the derivative to compute (default = 0 means only smoothing) + Returns + ------- + ys : ndarray, shape (N) + the smoothed signal (or it's n-th derivative). + + Notes + ----- + The Savitzky-Golay is a type of low-pass filter, particularly + suited for smoothing noisy data. The main idea behind this + approach is to make for each point a least-square fit with a + polynomial of high order over a odd-sized window centered at + the point. + + Examples + -------- + >>> t = np.linspace(-4, 4, 500) + >>> y = np.exp( -t**2 ) + np.random.normal(0, 0.05, t.shape) + >>> ysg = savitzky_golay(y, window_size=31, order=4) + >>> import matplotlib.pyplot as plt + >>> plt.plot(t, y, label='Noisy signal') + >>> plt.plot(t, np.exp(-t**2), 'k', lw=1.5, label='Original signal') + >>> plt.plot(t, ysg, 'r', label='Filtered signal') + >>> plt.legend() + >>> plt.show() + + References + ---------- + .. [1] A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of + Data by Simplified Least Squares Procedures. Analytical + Chemistry, 1964, 36 (8), pp 1627-1639. + .. [2] Numerical Recipes 3rd Edition: The Art of Scientific Computing + W.H. Press, S.A. Teukolsky, W.T. Vetterling, B.P. Flannery + Cambridge University Press ISBN-13: 9780521880688 + """ + try: + window_size = np.abs(np.int(window_size)) + order = np.abs(np.int(order)) + except ValueError, msg: + raise ValueError("window_size and order have to be of type int") + if window_size % 2 != 1 or window_size < 1: + raise TypeError("window_size size must be a positive odd number") + if window_size < order + 2: + raise TypeError("window_size is too small for the polynomials order") + order_range = range(order+1) + half_window = (window_size -1) // 2 + # precompute coefficients + b = np.mat([[k**i for i in order_range] for k in range(-half_window, half_window+1)]) + m = np.linalg.pinv(b).A[deriv] + # pad the signal at the extremes with + # values taken from the signal itself + firstvals = y[0] - np.abs( y[1:half_window+1][::-1] - y[0] ) + lastvals = y[-1] + np.abs(y[-half_window-1:-1][::-1] - y[-1]) + y = np.concatenate((firstvals, y, lastvals)) + return np.convolve( m, y, mode='valid') + +def savitzky_golay_piecewise(xvals, data, kernel=11, order =4): + ''' + One of the most popular applications of S-G filter, apart from smoothing UV-VIS + and IR spectra, is smoothing of curves obtained in electroanalytical experiments. + In cyclic voltammetry, voltage (being the abcissa) changes like a triangle wave. + And in the signal there are cusps at the turning points (at switching potentials) + which should never be smoothed. In this case, Savitzky-Golay smoothing should be + done piecewise, ie. separately on pieces monotonic in x + + Example + ------- + >>> n = 1e3 + >>> x = np.linspace(0, 25, n) + >>> y = np.round(sin(x)) + >>> sig2 = linspace(0,0.5,50) + + # As an example, this figure shows the effect of an additive noise with a variance + # of 0.2 (original signal (black), noisy signal (red) and filtered signal (blue dots)). + + >>> yn = y + sqrt(0.2)*np.random.randn(*x.shape) + >>> yr = savitzky_golay_piecewise(x, yn, kernel=11, order=4) + >>> plt.plot(x, yn, 'r', x, y, 'k', x, yr, 'b.') + ''' + turnpoint=0 + last=len(xvals) + if xvals[1]>xvals[0] : #x is increasing? + for i in range(1,last) : #yes + if xvals[i]xvals[i-1] : + turnpoint=i + break + if turnpoint==0 : #no change in direction of x + return savitzky_golay(data, kernel, order) + else: + #smooth the first piece + firstpart=savitzky_golay(data[0:turnpoint],kernel,order) + #recursively smooth the rest + rest=savitzky_golay_piecewise(xvals[turnpoint:], data[turnpoint:], kernel, order) + return np.concatenate((firstpart,rest)) + +def sgolay2d ( z, window_size, order, derivative=None): + """ + Savitsky - Golay filters can also be used to smooth two dimensional data affected + by noise. The algorithm is exactly the same as for the one dimensional case, only + the math is a bit more tricky. The basic algorithm is as follow: + for each point of the two dimensional matrix extract a sub - matrix, centered at + that point and with a size equal to an odd number "window_size". + for this sub - matrix compute a least - square fit of a polynomial surface, defined as + p(x, y) = a0 + a1 * x + a2 * y + a3 * x2 + a4 * y2 + a5 * x * y + ... . + Note that x and y are equal to zero at the central point. + replace the initial central point with the value computed with the fit. + Note that because the fit coefficients are linear with respect to the data spacing, they can pre - computed for efficiency. Moreover, it is important to appropriately pad the borders of the data, with a mirror image of the data itself, so that the evaluation of the fit at the borders of the data can happen smoothly. + Here is the code for two dimensional filtering. + + Example + ------- + # create some sample twoD data + >>> x = np.linspace(-3,3,100) + >>> y = np.linspace(-3,3,100) + >>> X, Y = np.meshgrid(x,y) + >>> Z = np.exp( -(X**2+Y**2)) + + # add noise + >>> Zn = Z + np.random.normal( 0, 0.2, Z.shape ) + + # filter it + >>> Zf = sgolay2d( Zn, window_size=29, order=4) + + # do some plotting + >>> import matplotlib.pyplot as plt + >>> plt.matshow(Z) + >>> plt.matshow(Zn) + >>> plt.matshow(Zf) + """ + # number of terms in the polynomial expression + n_terms = (order + 1) * (order + 2) / 2.0 + + if window_size % 2 == 0: + raise ValueError('window_size must be odd') + + if window_size ** 2 < n_terms: + raise ValueError('order is too high for the window size') + + half_size = window_size // 2 + + # exponents of the polynomial. + # p(x,y) = a0 + a1*x + a2*y + a3*x^2 + a4*y^2 + a5*x*y + ... + # this line gives a list of two item tuple. Each tuple contains + # the exponents of the k-th term. First element of tuple is for x + # second element for y. + # Ex. exps = [(0,0), (1,0), (0,1), (2,0), (1,1), (0,2), ...] + exps = [ (k - n, n) for k in range(order + 1) for n in range(k + 1) ] + + # coordinates of points + ind = np.arange(-half_size, half_size + 1, dtype=np.float64) + dx = np.repeat(ind, window_size) + dy = np.tile(ind, [window_size, 1]).reshape(window_size ** 2,) + + # build matrix of system of equation + A = np.empty((window_size ** 2, len(exps))) + for i, exp in enumerate(exps): + A[:, i] = (dx ** exp[0]) * (dy ** exp[1]) + + # pad input array with appropriate values at the four borders + new_shape = z.shape[0] + 2 * half_size, z.shape[1] + 2 * half_size + Z = np.zeros((new_shape)) + # top band + band = z[0, :] + Z[:half_size, half_size:-half_size] = band - np.abs(np.flipud(z[1:half_size + 1, :]) - band) + # bottom band + band = z[-1, :] + Z[-half_size:, half_size:-half_size] = band + np.abs(np.flipud(z[-half_size - 1:-1, :]) - band) + # left band + band = np.tile(z[:, 0].reshape(-1, 1), [1, half_size]) + Z[half_size:-half_size, :half_size] = band - np.abs(np.fliplr(z[:, 1:half_size + 1]) - band) + # right band + band = np.tile(z[:, -1].reshape(-1, 1), [1, half_size]) + Z[half_size:-half_size, -half_size:] = band + np.abs(np.fliplr(z[:, -half_size - 1:-1]) - band) + # central band + Z[half_size:-half_size, half_size:-half_size] = z + + # top left corner + band = z[0, 0] + Z[:half_size, :half_size] = band - np.abs(np.flipud(np.fliplr(z[1:half_size + 1, 1:half_size + 1])) - band) + # bottom right corner + band = z[-1, -1] + Z[-half_size:, -half_size:] = band + np.abs(np.flipud(np.fliplr(z[-half_size - 1:-1, -half_size - 1:-1])) - band) + + # top right corner + band = Z[half_size, -half_size:] + Z[:half_size, -half_size:] = band - np.abs(np.flipud(Z[half_size + 1:2 * half_size + 1, -half_size:]) - band) + # bottom left corner + band = Z[-half_size:, half_size].reshape(-1, 1) + Z[-half_size:, :half_size] = band - np.abs(np.fliplr(Z[-half_size:, half_size + 1:2 * half_size + 1]) - band) + + # solve system and convolve + if derivative == None: + m = np.linalg.pinv(A)[0].reshape((window_size, -1)) + return scipy.signal.fftconvolve(Z, m, mode='valid') + elif derivative == 'col': + c = np.linalg.pinv(A)[1].reshape((window_size, -1)) + return scipy.signal.fftconvolve(Z, -c, mode='valid') + elif derivative == 'row': + r = np.linalg.pinv(A)[2].reshape((window_size, -1)) + return scipy.signal.fftconvolve(Z, -r, mode='valid') + elif derivative == 'both': + c = np.linalg.pinv(A)[1].reshape((window_size, -1)) + r = np.linalg.pinv(A)[2].reshape((window_size, -1)) + return scipy.signal.fftconvolve(Z, -r, mode='valid'), scipy.signal.fftconvolve(Z, -c, mode='valid') class PPform(object): """The ppform of the piecewise polynomials is given in terms of coefficients diff --git a/pywafo/src/wafo/kdetools.py b/pywafo/src/wafo/kdetools.py index 5243db4..d822980 100644 --- a/pywafo/src/wafo/kdetools.py +++ b/pywafo/src/wafo/kdetools.py @@ -2780,6 +2780,480 @@ def gridcount(data, X, y=1): c = c.transpose(*T) # make sure c is stored in the same way as meshgrid return c +def evar(y): + ''' + Noise variance estimation. + Assuming that the deterministic function Y has additive Gaussian noise, + EVAR(Y) returns an estimated variance of this noise. + + Note: + ---- + A thin-plate smoothing spline model is used to smooth Y. It is assumed + that the model whose generalized cross-validation score is minimum can + provide the variance of the additive noise. A few tests showed that + EVAR works very well with "not too irregular" functions. + + Examples: + -------- + 1D signal + >>> n = 1e6; + >>> x = np.linspace(0,100,n); + >>> y = np.cos(x/10)+(x/50) + >>> var0 = 0.02 # noise variance + >>> yn = y + sqrt(var0)*np.random.randn(*y.shape) + >>> evar(yn) #estimated variance + + 2D function + >>> xp = np.linspace(0,1,50) + >>> x, y = np.meshgrid(xp,xp) + >>> f = np.exp(x+y) + np.sin((x-2*y)*3) + >>> var0 = 0.04 # noise variance + >>> fn = f + sqrt(var0)*np.random.randn(*f.shape) + >>> evar(fn) estimated variance + + 3D function + >>> yp = np.linspace(-2,2,50) + >>> [x,y,z] = meshgrid(yp,yp,yp, sparse=True) + >>> f = x*exp(-x**2-y**2-z**2) + >>> var0 = 0.5 # noise variance + >>> fn = f + sqrt(var0)*np.random.randn(*f.shape) + >>> evar(fn) estimated variance + + Other example + ------------- + http://www.biomecardio.com/matlab/evar.html + + Note: + ---- + EVAR is only adapted to evenly-gridded 1-D to N-D data. + + See also + -------- + VAR, STD, SMOOTHN + ''' + + # Damien Garcia -- 2008/04, revised 2009/10 + y = np.atleast_1d(y) + d = y.ndim + sh0 = y.shape + + S = np.zeros(sh0) + sh1 = np.ones((d,)) + cos = np.cos + pi = np.pi + for i in range(d): + ni = sh0[i] + sh1[i] = ni + t = np.arange(ni).reshape(sh1)/ni + S += cos(pi*t) + sh1[i] = 1 + + S2 = 2*(d-S).ravel() + # N-D Discrete Cosine Transform of Y + dcty2 = dctn(y).ravel()**2 + def score_fun(L, S2, dcty2): + # Generalized cross validation score + M = 1-1./(1+10**L*S2) + noisevar = (dcty2*M**2).mean() + return noisevar/M.mean()**2 + #fun = lambda x : score_fun(x, S2, dcty2) + Lopt = optimize.fminbound(score_fun, -38, 38, args=(S2, dcty2)) + M = 1-1./(1+10**Lopt*S2) + noisevar = (dcty2*M**2).mean() + return noisevar + +def unfinished_smoothn(data,s=None, weight=None, robust=False, z0=None, tolz=1e-3, maxiter=100): + ''' + SMOOTHN Robust spline smoothing for 1-D to N-D data. + SMOOTHN provides a fast, automatized and robust discretized smoothing + spline for data of any dimension. + + Z = SMOOTHN(Y) automatically smoothes the uniformly-sampled array Y. Y + can be any N-D noisy array (time series, images, 3D data,...). Non + finite data (NaN or Inf) are treated as missing values. + + Z = SMOOTHN(Y,S) smoothes the array Y using the smoothing parameter S. + S must be a real positive scalar. The larger S is, the smoother the + output will be. If the smoothing parameter S is omitted (see previous + option) or empty (i.e. S = []), it is automatically determined using + the generalized cross-validation (GCV) method. + + Z = SMOOTHN(Y,W) or Z = SMOOTHN(Y,W,S) specifies a weighting array W of + real positive values, that must have the same size as Y. Note that a + nil weight corresponds to a missing value. + + Robust smoothing + ---------------- + Z = SMOOTHN(...,'robust') carries out a robust smoothing that minimizes + the influence of outlying data. + + [Z,S] = SMOOTHN(...) also returns the calculated value for S so that + you can fine-tune the smoothing subsequently if needed. + + An iteration process is used in the presence of weighted and/or missing + values. Z = SMOOTHN(...,OPTION_NAME,OPTION_VALUE) smoothes with the + termination parameters specified by OPTION_NAME and OPTION_VALUE. They + can contain the following criteria: + ----------------- + TolZ: Termination tolerance on Z (default = 1e-3) + TolZ must be in ]0,1[ + MaxIter: Maximum number of iterations allowed (default = 100) + Initial: Initial value for the iterative process (default = + original data) + ----------------- + Syntax: [Z,...] = SMOOTHN(...,'MaxIter',500,'TolZ',1e-4,'Initial',Z0); + + [Z,S,EXITFLAG] = SMOOTHN(...) returns a boolean value EXITFLAG that + describes the exit condition of SMOOTHN: + 1 SMOOTHN converged. + 0 Maximum number of iterations was reached. + + Class Support + ------------- + Input array can be numeric or logical. The returned array is of class + double. + + Notes + ----- + The N-D (inverse) discrete cosine transform functions DCTN and IDCTN are required. + + To be made + ---------- + Estimate the confidence bands (see Wahba 1983, Nychka 1988). + + Reference + --------- + Garcia D, Robust smoothing of gridded data in one and higher dimensions + with missing values. Computational Statistics & Data Analysis, 2010. + PDF download + + Examples: + -------- + 1-D example + x = linspace(0,100,2^8); + y = cos(x/10)+(x/50).^2 + randn(size(x))/10; + y([70 75 80]) = [5.5 5 6]; + z = smoothn(y); Regular smoothing + zr = smoothn(y,'robust'); Robust smoothing + subplot(121), plot(x,y,'r.',x,z,'k','LineWidth',2) + axis square, title('Regular smoothing') + subplot(122), plot(x,y,'r.',x,zr,'k','LineWidth',2) + axis square, title('Robust smoothing') + + 2-D example + xp = 0:.02:1; + [x,y] = meshgrid(xp); + f = exp(x+y) + sin((x-2*y)*3); + fn = f + randn(size(f))*0.5; + fs = smoothn(fn); + subplot(121), surf(xp,xp,fn), zlim([0 8]), axis square + subplot(122), surf(xp,xp,fs), zlim([0 8]), axis square + + 2-D example with missing data + n = 256; + y0 = peaks(n); + y = y0 + rand(size(y0))*2; + I = randperm(n^2); + y(I(1:n^2*0.5)) = NaN; lose 1/2 of data + y(40:90,140:190) = NaN; create a hole + z = smoothn(y); smooth data + subplot(2,2,1:2), imagesc(y), axis equal off + title('Noisy corrupt data') + subplot(223), imagesc(z), axis equal off + title('Recovered data ...') + subplot(224), imagesc(y0), axis equal off + title('... compared with original data') + + 3-D example + [x,y,z] = meshgrid(-2:.2:2); + xslice = [-0.8,1]; yslice = 2; zslice = [-2,0]; + vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06; + subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic') + title('Noisy data') + v = smoothn(vn); + subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic') + title('Smoothed data') + + Cardioid + t = linspace(0,2*pi,1000); + x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1; + y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1; + z = smoothn(complex(x,y)); + plot(x,y,'r.',real(z),imag(z),'k','linewidth',2) + axis equal tight + + Cellular vortical flow + [x,y] = meshgrid(linspace(0,1,24)); + Vx = cos(2*pi*x+pi/2).*cos(2*pi*y); + Vy = sin(2*pi*x+pi/2).*sin(2*pi*y); + Vx = Vx + sqrt(0.05)*randn(24,24); adding Gaussian noise + Vy = Vy + sqrt(0.05)*randn(24,24); adding Gaussian noise + I = randperm(numel(Vx)); + Vx(I(1:30)) = (rand(30,1)-0.5)*5; adding outliers + Vy(I(1:30)) = (rand(30,1)-0.5)*5; adding outliers + Vx(I(31:60)) = NaN; missing values + Vy(I(31:60)) = NaN; missing values + Vs = smoothn(complex(Vx,Vy),'robust'); automatic smoothing + subplot(121), quiver(x,y,Vx,Vy,2.5), axis square + title('Noisy velocity field') + subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square + title('Smoothed velocity field') + + See also SMOOTH, SMOOTH3, DCTN, IDCTN. + + -- Damien Garcia -- 2009/03, revised 2010/11 + Visit my website for more details about SMOOTHN + ''' + + y = np.atleast_1d(data) + sizy = y.shape + noe = y.size + if noe<2: + return data + + weightstr = 'bisquare' + W = np.ones(sizy) + # Smoothness parameter and weights + if weight is None: + pass + elif isinstance(weight, str): + weightstr = W.lower() + + # Weights. Zero weights are assigned to not finite values (Inf or NaN), + # (Inf/NaN values = missing data). + IsFinite = np.isfinite(y); + nof = sum(IsFinite) # number of finite elements + W = W*IsFinite + if any(W<0): + raise ValueError('Weights must all be >=0') + else: + W = W/W.max() + + + #% Weighted or missing data? +# isweighted = any(W(:)<1); +# %--- +# % Robust smoothing? +# isrobust = any(strcmpi(varargin,'robust')); +# %--- +# % Automatic smoothing? +# isauto = isempty(s); +# %--- +# % DCTN and IDCTN are required +# test4DCTNandIDCTN +# +# %% Creation of the Lambda tensor +# %--- +# % Lambda contains the eingenvalues of the difference matrix used in this +# % penalized least squares process. +# d = ndims(y); +# Lambda = zeros(sizy); +# for i = 1:d +# siz0 = ones(1,d); +# siz0(i) = sizy(i); +# Lambda = bsxfun(@plus,Lambda,... +# cos(pi*(reshape(1:sizy(i),siz0)-1)/sizy(i))); +# end +# Lambda = -2*(d-Lambda); +# if ~isauto, Gamma = 1./(1+s*Lambda.^2); end +# +# %% Upper and lower bound for the smoothness parameter +# % The average leverage (h) is by definition in [0 1]. Weak smoothing occurs +# % if h is close to 1, while over-smoothing appears when h is near 0. Upper +# % and lower bounds for h are given to avoid under- or over-smoothing. See +# % equation relating h to the smoothness parameter (Equation #12 in the +# % referenced CSDA paper). +# N = sum(sizy~=1); % tensor rank of the y-array +# hMin = 1e-6; hMax = 0.99; +# sMinBnd = (((1+sqrt(1+8*hMax.^(2/N)))/4./hMax.^(2/N)).^2-1)/16; +# sMaxBnd = (((1+sqrt(1+8*hMin.^(2/N)))/4./hMin.^(2/N)).^2-1)/16; +# +# %% Initialize before iterating +# %--- +# Wtot = W; +# %--- Initial conditions for z +# if isweighted +# %--- With weighted/missing data +# % An initial guess is provided to ensure faster convergence. For that +# % purpose, a nearest neighbor interpolation followed by a coarse +# % smoothing are performed. +# %--- +# if isinitial % an initial guess (z0) has been provided +# z = z0; +# else +# z = InitialGuess(y,IsFinite); +# end +# else +# z = zeros(sizy); +# end +# %--- +# z0 = z; +# y(~IsFinite) = 0; % arbitrary values for missing y-data +# %--- +# tol = 1; +# RobustIterativeProcess = true; +# RobustStep = 1; +# nit = 0; +# %--- Error on p. Smoothness parameter s = 10^p +# errp = 0.1; +# opt = optimset('TolX',errp); +# %--- Relaxation factor RF: to speedup convergence +# RF = 1 + 0.75*isweighted; +# +# %% Main iterative process +# %--- +# while RobustIterativeProcess +# %--- "amount" of weights (see the function GCVscore) +# aow = sum(Wtot(:))/noe; % 0 < aow <= 1 +# %--- +# while tol>TolZ && nit tol=0 (no iteration) +# tol = isweighted*norm(z0(:)-z(:))/norm(z(:)); +# +# z0 = z; % re-initialization +# end +# exitflag = nit0.9 % aow = 1 means that all of the data are equally weighted +# % very much faster: does not require any inverse DCT +# RSS = norm(DCTy(:).*(Gamma(:)-1))^2; +# else +# % take account of the weights to calculate RSS: +# yhat = idctn(Gamma.*DCTy); +# RSS = norm(sqrt(Wtot(IsFinite)).*(y(IsFinite)-yhat(IsFinite)))^2; +# end +# %--- +# TrH = sum(Gamma(:)); +# GCVscore = RSS/nof/(1-TrH/noe)^2; +# end +# +# end +# +# %% Robust weights +# function W = RobustWeights(r,I,h,wstr) +# % weights for robust smoothing. +# MAD = median(abs(r(I)-median(r(I)))); % median absolute deviation +# u = abs(r/(1.4826*MAD)/sqrt(1-h)); % studentized residuals +# if strcmp(wstr,'cauchy') +# c = 2.385; W = 1./(1+(u/c).^2); % Cauchy weights +# elseif strcmp(wstr,'talworth') +# c = 2.795; W = uhere.']) +# elseif ~exist('idctn','file') +# error('MATLAB:smoothn:MissingFunction',... +# ['DCTN and IDCTN are required. Download IDCTN here.']) +# end +# end +# +# %% Initial Guess with weighted/missing data +# function z = InitialGuess(y,I) +# %-- nearest neighbor interpolation (in case of missing values) +# if any(~I(:)) +# if license('test','image_toolbox') +# [z,L] = bwdist(I); +# z = y; +# z(~I) = y(L(~I)); +# else +# % If BWDIST does not exist, NaN values are all replaced with the +# % same scalar. The initial guess is not optimal and a warning +# % message thus appears. +# z = y; +# z(~I) = mean(y(I)); +# warning('MATLAB:smoothn:InitialGuess',... +# ['BWDIST (Image Processing Toolbox) does not exist. ',... +# 'The initial guess may not be optimal; additional',... +# ' iterations can thus be required to ensure complete',... +# ' convergence. Increase ''MaxIter'' criterion if necessary.']) +# end +# else +# z = y; +# end +# %-- coarse fast smoothing using one-tenth of the DCT coefficients +# siz = size(z); +# z = dctn(z); +# for k = 1:ndims(z) +# z(ceil(siz(k)/10)+1:end,:) = 0; +# z = reshape(z,circshift(siz,[0 1-k])); +# z = shiftdim(z,1); +# end +# z = idctn(z); +# end + def kde_demo1(): ''' diff --git a/pywafo/src/wafo/polynomial.py b/pywafo/src/wafo/polynomial.py index 7e3fa8c..9fe9ffc 100644 --- a/pywafo/src/wafo/polynomial.py +++ b/pywafo/src/wafo/polynomial.py @@ -205,6 +205,193 @@ def polyder(p, m=1): return poly1d(val) return val +def unfinished_polydeg(x,y): + ''' + Return optimal degree for polynomial fitting. + N = POLYDEG(X,Y) finds the optimal degree for polynomial fitting + according to the Akaike's information criterion. + + Assuming that you want to find the degree N of a polynomial that fits + the data Y(X) best in a least-squares sense, the Akaike's information + criterion is defined by: + 2*(N+1) + n*(log(2*pi*RSS/n)+1) + where n is the number of points and RSS is the residual sum of squares. + The optimal degree N is defined here as that which minimizes AIC. + + Notes: + ----- + If the number of data is small, POLYDEG may tend to return: + N = (number of points)-1. + + ORTHOFIT is more appropriate than POLYFIT for polynomial fitting with + relatively high degrees. + + Examples: + -------- + load census + n = polydeg(cdate,pop) + + x = linspace(0,10,300); + y = sin(x.^3/100).^2 + 0.05*randn(size(x)); + n = polydeg(x,y) + ys = orthofit(x,y,n); + plot(x,y,'.',x,ys,'k') + + Damien Garcia, 02/2008, revised 01/2010 + + See also POLYFIT, ORTHOFIT. + ''' + x, y = np.atleast_1d(x, y) + + N = len(x) + + + ## Search the optimal degree minimizing the Akaike's information criterion + # --- + # y(x) are fitted in a least-squares sense using a polynomial of degree n + # developed in a series of orthogonal polynomials. + + + p = y.mean() + ys = np.ones((N,))*p + AIC = 2+N*(np.log(2*pi*((ys-y)**2).sum()/N)+1)+ 4/(N-2) #correction for small sample sizes + + p = zeros((2,2)) + p[1,0] = x.mean() + PL = np.ones((2,N)) + PL[1] = x-p[1,0] + + n = 1 + nit = 0 + + # While-loop is stopped when a minimum is detected. 3 more steps are + # required to take AIC noise into account and to ensure that this minimum + # is a (likely) global minimum. + + while nit<3: + if n>0: + p[0,n] = sum(x*PL[:,n]**2)/sum(PL[:,n]**2) + p[1,n] = sum(x*PL[:,n-1]*PL[:,n])/sum(PL[:,n-1]**2) + PL[:,n] = (x-p[0,n+1])*PL[:,n]-p[1,n+1]*PL[:,n-1] + #end + + tmp = sum(y*PL)/sum(PL**2) + ys = sum(PL*tmp,axis=-1) + + # -- Akaike's Information Criterion + aic = 2*(n+1)+N*(np.log(2*pi*sum((ys-y.ravel()**2)/N)+1)) + 2*(n+1)*(n+2)/(N-n-2) + + + if aic>=AIC: + nit += 1 + else: + nit = 0 + AIC = aic + + n = n+1 + + if n>=N: + break + n = n-nit-1 + + return n + +def unfinished_orthofit(x,y,n): + ''' + ORTHOFIT Fit polynomial to data. + YS = ORTHOFIT(X,Y,N) smooths/fits data Y(X) in a least-squares sense + using a polynomial of degree N and returns the smoothed data YS. + + [YS,YI] = ORTHOFIT(X,Y,N,XI) also returns the values YI of the fitting + polynomial at the points of a different XI array. + + YI = ORTHOFIT(X,Y,N,XI) returns only the values YI of the fitting + polynomial at the points of the XI array. + + [YS,P] = ORTHOFIT(X,Y,N) returns the polynomial coefficients P for use + with POLYVAL. + + Notes: + ----- + ORTHOFIT smooths/fits data using a polynomial of degree N developed in + a sequence of orthogonal polynomials. ORTHOFIT is more appropriate than + POLYFIT for polynomial fitting and smoothing since this method does not + involve any matrix linear system but a simple recursive procedure. + Degrees much higher than 30 could be used with orthogonal polynomials, + whereas badly conditioned matrices may appear with a classical + polynomial fitting of degree typically higher than 10. + + To avoid using unnecessarily high degrees, you may let the function + POLYDEG choose it for you. POLYDEG finds an optimal polynomial degree + according to the Akaike's information criterion (available here). + + Example: + ------- + x = linspace(0,10,300); + y = sin(x.^3/100).^2 + 0.05*randn(size(x)); + ys = orthofit(x,y,25); + plot(x,y,'.',x,ys,'k') + try POLYFIT for comparison... + + Automatic degree determination with POLYDEG + n = polydeg(x,y); + ys = orthofit(x,y,n); + plot(x,y,'.',x,ys,'k') + + Reference: Méthodes de calcul numérique 2. JP Nougier. Hermes Science + Publications, 2001. Section 4.7 pp 116-121 + + Damien Garcia, 09/2007, revised 01/2010 + + See also POLYDEG, POLYFIT, POLYVAL. + ''' + x, y = np.atleast_1d(x,y) + # Particular case: n=0 + if n==0: + p = y.mean() + ys = np.ones(y.shape)*p + return p, ys + + + # Reshape + x = x.ravel() + siz0 = y.shape + y = y.ravel() + + # Coefficients of the orthogonal polynomials + p = np.zeros((3,n+1)) + p[1,1] = x.mean() + + N = len(x) + PL = np.ones((N,n+1)) + PL[:,1] = x-p[1,1] + + for i in range(2,n+1): + p[1,i] = sum(x*PL[:,i-1]**2)/sum(PL[:,i-1]**2) + p[2,i] = sum(x*PL[:,i-2]*PL[:,i-1])/sum(PL[:,i-2]**2) + PL[:,i] = (x-p[1,i])*PL[:,i-1]-p[2,i]*PL[:,i-2] + #end + p[0,:] = sum(PL*y)/sum(PL**2); + + # ys = smoothed y + #ys = sum(PL*p(0,:) axis=1) + #ys.shape = siz0 + + + # Coefficients of the polynomial in its final form + + yi = np.zeros((n+1,n+1)) + yi[0,n] = 1 + yi[1,n-1:n+1] = 1 -p[1,1] + for i in range(2, n+1): + yi[i,:] = np.hstack((yi[i-1,1:], 0))-p[1,i]*yi[i-1,:]-p[2,i]*yi[i-2,:]; + + p = sum(p[0,:]*yi, axis=0) + return p + def polyreloc(p, x, y=0.0): """ Relocate polynomial