diff --git a/pywafo/src/wafo/integrate.py b/pywafo/src/wafo/integrate.py index 35cbd10..96758d2 100644 --- a/pywafo/src/wafo/integrate.py +++ b/pywafo/src/wafo/integrate.py @@ -1,1491 +1,1498 @@ -from __future__ import division -import warnings -import copy -import numpy as np -from numpy import pi, sqrt, ones, zeros #@UnresolvedImport -from scipy import integrate as intg -import scipy.special.orthogonal as ort -from scipy import special as sp -from wafo.plotbackend import plotbackend as plt -from scipy.integrate import simps, trapz #@UnusedImport -from wafo.misc import is_numlike -from wafo.demos import humps - -_POINTS_AND_WEIGHTS = {} - -__all__ = ['dea3', 'clencurt', 'romberg', - 'h_roots','j_roots', 'la_roots','p_roots','qrule', - 'gaussq', 'richardson', 'quadgr', 'qdemo'] - - -def dea3(v0, v1, v2): - ''' - Extrapolate a slowly convergent sequence - - Parameters - ---------- - v0,v1,v2 : array-like - 3 values of a convergent sequence to extrapolate - - Returns - ------- - result : array-like - extrapolated value - abserr : array-like - absolute error estimate - - Description - ----------- - DEA3 attempts to extrapolate nonlinearly to a better estimate - of the sequence's limiting value, thus improving the rate of - convergence. The routine is based on the epsilon algorithm of - P. Wynn, see [1]_. - - Example - ------- - # integrate sin(x) from 0 to pi/2 - - >>> import numpy as np - >>> Ei= np.zeros(3) - >>> linfun = lambda k : np.linspace(0, np.pi/2., 2.**(k+5)+1) - >>> for k in np.arange(3): - ... x = linfun(k) - ... Ei[k] = np.trapz(np.sin(x),x) - >>> En, err = dea3(Ei[0],Ei[1],Ei[2]) - >>> En, err - (array([ 1.]), array([ 0.0002008])) - >>> TrueErr = Ei-1. - >>> TrueErr - array([ -2.0080568e-04, -5.0199908e-05, -1.2549882e-05]) - - See also - -------- - dea - - Reference - --------- - .. [1] C. Brezinski (1977) - "Acceleration de la convergence en analyse numerique", - "Lecture Notes in Math.", vol. 584, - Springer-Verlag, New York, 1977. - ''' - - E0, E1, E2 = np.atleast_1d(v0, v1, v2) - abs = np.abs #@ReservedAssignment - max = np.maximum #@ReservedAssignment - ten = 10.0 - one = ones(1) - small = np.finfo(float).eps #1.0e-16 #spacing(one) - delta2 = E2 - E1 - delta1 = E1 - E0 - err2 = abs(delta2) - err1 = abs(delta1) - tol2 = max(abs(E2), abs(E1)) * small - tol1 = max(abs(E1), abs(E0)) * small - - result = zeros(E0.shape) - abserr = result.copy() - converged = (err1 <= tol1) & (err2 <= tol2).ravel() - k0, = converged.nonzero() - if k0.size > 0 : - #%C IF E0, E1 AND E2 ARE EQUAL TO WITHIN MACHINE - #%C ACCURACY, CONVERGENCE IS ASSUMED. - result[k0] = E2[k0] - abserr[k0] = err1[k0] + err2[k0] + E2[k0] * small * ten - - k1, = (1 - converged).nonzero() - - if k1.size > 0 : - with warnings.catch_warnings(): - warnings.simplefilter("ignore") # ignore division by zero and overflow - ss = one / delta2[k1] - one / delta1[k1] - smallE2 = (abs(ss * E1[k1]) <= 1.0e-3).ravel() - k2 = k1[smallE2.nonzero()] - if k2.size > 0 : - result[k2] = E2[k2] - abserr[k2] = err1[k2] + err2[k2] + E2[k2] * small * ten - - k4, = (1 - smallE2).nonzero() - if k4.size > 0 : - k3 = k1[k4] - result[k3] = E1[k3] + one / ss[k4] - abserr[k3] = err1[k3] + err2[k3] + abs(result[k3] - E2[k3]) - - return result, abserr - -def clencurt(fun, a, b, n0=5, trace=False, *args): - ''' - Numerical evaluation of an integral, Clenshaw-Curtis method. - - Parameters - ---------- - fun : callable - a, b : array-like - Lower and upper integration limit, respectively. - n : integer - defines number of evaluation points (default 5) - - Returns - ------- - Q = evaluated integral - tol = Estimate of the approximation error - - Notes - ----- - CLENCURT approximates the integral of f(x) from a to b - using an 2*n+1 points Clenshaw-Curtis formula. - The error estimate is usually a conservative estimate of the - approximation error. - The integral is exact for polynomials of degree 2*n or less. - - Example - ------- - >>> import numpy as np - >>> val,err = clencurt(np.exp,0,2) - >>> abs(val-np.expm1(2))< err, err<1e-10 - (array([ True], dtype=bool), array([ True], dtype=bool)) - - - See also - -------- - simpson, - gaussq - - References - ---------- - [1] Goodwin, E.T. (1961), - "Modern Computing Methods", - 2nd edition, New yourk: Philosophical Library, pp. 78--79 - - [2] Clenshaw, C.W. and Curtis, A.R. (1960), - Numerische Matematik, Vol. 2, pp. 197--205 - ''' - - - #% make sure n is even - n = 2 * n0 - a, b = np.atleast_1d(a, b) - a_shape = a.shape - af = a.ravel() - bf = b.ravel() - - Na = np.prod(a_shape) - - s = np.r_[0:n + 1] - s2 = np.r_[0:n + 1:2] - s2.shape = (-1, 1) - x1 = np.cos(np.pi * s / n) - x1.shape = (-1, 1) - x = x1 * (bf - af) / 2. + (bf + af) / 2 - - if hasattr(fun, '__call__'): - f = fun(x) - else: - x0 = np.flipud(fun[:, 0]) - n = len(x0) - 1 - if abs(x - x0) > 1e-8: - raise ValueError('Input vector x must equal cos(pi*s/n)*(b-a)/2+(b+a)/2') - - f = np.flipud(fun[:, 1::]) - - if trace: - plt.plot(x, f, '+') - - # using a Gauss-Lobatto variant, i.e., first and last - # term f(a) and f(b) is multiplied with 0.5 - f[0, :] = f[0, :] / 2 - f[n, :] = f[n, :] / 2 - -## % x = cos(pi*0:n/n) -## % f = f(x) -## % -## % N+1 -## % c(k) = (2/N) sum f''(n)*cos(pi*(2*k-2)*(n-1)/N), 1 <= k <= N/2+1. -## % n=1 - fft = np.fft.fft - tmp = np.real(fft(f[:n, :], axis=0)) - c = 2 / n * (tmp[0:n / 2 + 1, :] + np.cos(np.pi * s2) * f[n, :]) -## % old call -## % c = 2/n * cos(s2*s'*pi/n) * f - c[0, :] = c[0, :] / 2 - c[n / 2, :] = c[n / 2, :] / 2 - -## % alternative call -## % c = dct(f) - - - c = c[0:n / 2 + 1, :] / ((s2 - 1) * (s2 + 1)) - Q = (af - bf) * np.sum(c, axis=0) - #Q = (a-b).*sum( c(1:n/2+1,:)./repmat((s2-1).*(s2+1),1,Na)) - - abserr = (bf - af) * np.abs(c[n / 2, :]) - - if Na > 1: - abserr = np.reshape(abserr, a_shape) - Q = np.reshape(Q, a_shape) - return Q, abserr - -def romberg(fun, a, b, releps=1e-3, abseps=1e-3): - ''' - Numerical integration with the Romberg method - - Parameters - ---------- - fun : callable - function to integrate - a, b : real scalars - lower and upper integration limits, respectively. - releps, abseps : scalar, optional - requested relative and absolute error, respectively. - - Returns - ------- - Q : scalar - value of integral - abserr : scalar - estimated absolute error of integral - - ROMBERG approximates the integral of F(X) from A to B - using Romberg's method of integration. The function F - must return a vector of output values if a vector of input values is given. - - - Example - ------- - >>> import numpy as np - >>> [q,err] = romberg(np.sqrt,0,10,0,1e-4) - >>> q,err - (array([ 21.0818511]), array([ 6.6163547e-05])) - ''' - h = b - a - hMin = 1.0e-9 - # Max size of extrapolation table - tableLimit = max(min(np.round(np.log2(h / hMin)), 30), 3) - - rom = zeros((2, tableLimit)) - - rom[0, 0] = h * (fun(a) + fun(b)) / 2 - ipower = 1 - fp = ones(tableLimit) * 4 - - #Ih1 = 0 - Ih2 = 0. - Ih4 = rom[0, 0] - abserr = Ih4 - #epstab = zeros(1,decdigs+7) - #newflg = 1 - #[res,abserr,epstab,newflg] = dea(newflg,Ih4,abserr,epstab) - two = 1 - one = 0 - for i in xrange(1, tableLimit): - h *= 0.5 - Un5 = np.sum(fun(a + np.arange(1, 2 * ipower, 2) * h)) * h - - # trapezoidal approximations - #T2n = 0.5 * (Tn + Un) = 0.5*Tn + Un5 - rom[two, 0] = 0.5 * rom[one, 0] + Un5 - - fp[i] = 4 * fp[i - 1] - # Richardson extrapolation - for k in xrange(i): - #rom(2,k+1)=(fp(k)*rom(2,k)-rom(1,k))/(fp(k)-1) - rom[two, k + 1] = rom[two, k] + (rom[two, k] - rom[one, k]) / (fp[k] - 1) - - Ih1 = Ih2 - Ih2 = Ih4 - - Ih4 = rom[two, i] - - if (2 <= i): - [res, abserr] = dea3(Ih1, Ih2, Ih4) - # Ih4 = res - if (abserr <= max(abseps, releps * abs(res))): - break - - # rom(1,1:i) = rom(2,1:i) - two = one - one = (one + 1) % 2 - ipower *= 2 - return res, abserr - -def h_roots(n, method='newton'): - ''' - Returns the roots (x) of the nth order Hermite polynomial, - H_n(x), and weights (w) to use in Gaussian Quadrature over - [-inf,inf] with weighting function exp(-x**2). - - Parameters - ---------- - n : integer - number of roots - method : 'newton' or 'eigenvalue' - uses Newton Raphson to find zeros of the Hermite polynomial (Fast) - or eigenvalue of the jacobi matrix (Slow) to obtain the nodes and - weights, respectively. - - Returns - ------- - x : ndarray - roots - w : ndarray - weights - - Example - ------- - >>> import numpy as np - >>> [x,w] = h_roots(10) - >>> np.sum(x*w) - -5.2516042729766621e-19 - - See also - -------- - qrule, gaussq - - References - ---------- - [1] Golub, G. H. and Welsch, J. H. (1969) - 'Calculation of Gaussian Quadrature Rules' - Mathematics of Computation, vol 23,page 221-230, - - [2]. Stroud and Secrest (1966), 'gaussian quadrature formulas', - prentice-hall, Englewood cliffs, n.j. - ''' - - - if not method.startswith('n'): - return ort.h_roots(n) - else: - sqrt = np.sqrt - max_iter = 10 - releps = 3e-14 - C = [9.084064e-01, 5.214976e-02, 2.579930e-03, 3.986126e-03] - #PIM4=0.7511255444649425 - PIM4 = np.pi ** (-1. / 4) - - # The roots are symmetric about the origin, so we have to - # find only half of them. - m = int(np.fix((n + 1) / 2)) - - # Initial approximations to the roots go into z. - anu = 2.0 * n + 1 - rhs = np.arange(3, 4 * m, 4) * np.pi / anu - r3 = rhs ** (1. / 3) - r2 = r3 ** 2 - theta = r3 * (C[0] + r2 * (C[1] + r2 * (C[2] + r2 * C[3]))) - z = sqrt(anu) * np.cos(theta) - - L = zeros((3, len(z))) - k0 = 0 - kp1 = 1 - for _its in xrange(max_iter): - #Newtons method carried out simultaneously on the roots. - L[k0, :] = 0 - L[kp1, :] = PIM4 - - for j in xrange(1, n + 1): - #%Loop up the recurrence relation to get the Hermite - #%polynomials evaluated at z. - km1 = k0 - k0 = kp1 - kp1 = np.mod(kp1 + 1, 3) - - L[kp1, :] = z * sqrt(2 / j) * L[k0, :] - np.sqrt((j - 1) / j) * L[km1, :] - - - # L now contains the desired Hermite polynomials. - # We next compute pp, the derivatives, - # by the relation (4.5.21) using p2, the polynomials - # of one lower order. - - pp = sqrt(2 * n) * L[k0, :] - dz = L[kp1, :] / pp - - z = z - dz # Newtons formula. - - if not np.any(abs(dz) > releps): - break - else: - warnings.warn('too many iterations!') - - x = np.empty(n) - w = np.empty(n) - x[0:m] = z # Store the root - x[n - 1:n - m - 1:-1] = -z # and its symmetric counterpart. - w[0:m] = 2. / pp ** 2 # Compute the weight - w[n - 1:n - m - 1:-1] = w[0:m] # and its symmetric counterpart. - return x, w - -def j_roots(n, alpha, beta, method='newton'): - ''' - Returns the roots (x) of the nth order Jacobi polynomial, P^(alpha,beta)_n(x) - and weights (w) to use in Gaussian Quadrature over [-1,1] with weighting - function (1-x)**alpha (1+x)**beta with alpha,beta > -1. - - Parameters - ---------- - n : integer - number of roots - alpha,beta : scalars - defining shape of Jacobi polynomial - method : 'newton' or 'eigenvalue' - uses Newton Raphson to find zeros of the Hermite polynomial (Fast) - or eigenvalue of the jacobi matrix (Slow) to obtain the nodes and - weights, respectively. - - Returns - ------- - x : ndarray - roots - w : ndarray - weights - - - Example - -------- - >>> [x,w]= j_roots(10,0,0) - >>> sum(x*w) - 2.7755575615628914e-16 - - See also - -------- - qrule, gaussq - - - Reference - --------- - [1] Golub, G. H. and Welsch, J. H. (1969) - 'Calculation of Gaussian Quadrature Rules' - Mathematics of Computation, vol 23,page 221-230, - - [2]. Stroud and Secrest (1966), 'gaussian quadrature formulas', - prentice-hall, Englewood cliffs, n.j. - ''' - - if not method.startswith('n'): - [x, w] = ort.j_roots(n, alpha, beta) - else: - - max_iter = 10 - releps = 3e-14 - - # Initial approximations to the roots go into z. - alfbet = alpha + beta - - - z = np.cos(np.pi * (np.arange(1, n + 1) - 0.25 + 0.5 * alpha) / (n + 0.5 * (alfbet + 1))) - - L = zeros((3, len(z))) - k0 = 0 - kp1 = 1 - for _its in xrange(max_iter): - #Newton's method carried out simultaneously on the roots. - tmp = 2 + alfbet - L[k0, :] = 1 - L[kp1, :] = (alpha - beta + tmp * z) / 2 - - for j in xrange(2, n + 1): - #Loop up the recurrence relation to get the Jacobi - #polynomials evaluated at z. - km1 = k0 - k0 = kp1 - kp1 = np.mod(kp1 + 1, 3) - - a = 2. * j * (j + alfbet) * tmp - tmp = tmp + 2 - c = 2 * (j - 1 + alpha) * (j - 1 + beta) * tmp - b = (tmp - 1) * (alpha ** 2 - beta ** 2 + tmp * (tmp - 2) * z) - - L[kp1, :] = (b * L[k0, :] - c * L[km1, :]) / a - - #L now contains the desired Jacobi polynomials. - #We next compute pp, the derivatives with a standard - # relation involving the polynomials of one lower order. - - pp = (n * (alpha - beta - tmp * z) * L[kp1, :] + 2 * (n + alpha) * (n + beta) * L[k0, :]) / (tmp * (1 - z ** 2)) - dz = L[kp1, :] / pp - z = z - dz # Newton's formula. - - - if not any(abs(dz) > releps * abs(z)): - break - else: - warnings.warn('too many iterations in jrule') - - x = z # %Store the root and the weight. - w = np.exp(sp.gammaln(alpha + n) + sp.gammaln(beta + n) - sp.gammaln(n + 1) - - sp.gammaln(alpha + beta + n + 1)) * tmp * 2 ** alfbet / (pp * L[k0, :]) - - return x, w - -def la_roots(n, alpha=0, method='newton'): - ''' - Returns the roots (x) of the nth order generalized (associated) Laguerre - polynomial, L^(alpha)_n(x), and weights (w) to use in Gaussian quadrature over - [0,inf] with weighting function exp(-x) x**alpha with alpha > -1. - - Parameters - ---------- - n : integer - number of roots - method : 'newton' or 'eigenvalue' - uses Newton Raphson to find zeros of the Laguerre polynomial (Fast) - or eigenvalue of the jacobi matrix (Slow) to obtain the nodes and - weights, respectively. - - Returns - ------- - x : ndarray - roots - w : ndarray - weights - - Example - ------- - >>> import numpy as np - >>> [x,w] = h_roots(10) - >>> np.sum(x*w) - -5.2516042729766621e-19 - - See also - -------- - qrule, gaussq - - References - ---------- - [1] Golub, G. H. and Welsch, J. H. (1969) - 'Calculation of Gaussian Quadrature Rules' - Mathematics of Computation, vol 23,page 221-230, - - [2]. Stroud and Secrest (1966), 'gaussian quadrature formulas', - prentice-hall, Englewood cliffs, n.j. - ''' - - if alpha <= -1: - raise ValueError('alpha must be greater than -1') - - if not method.startswith('n'): - return ort.la_roots(n, alpha) - else: - max_iter = 10 - releps = 3e-14 - C = [9.084064e-01, 5.214976e-02, 2.579930e-03, 3.986126e-03] - - # Initial approximations to the roots go into z. - anu = 4.0 * n + 2.0 * alpha + 2.0 - rhs = np.arange(4 * n - 1, 2, -4) * np.pi / anu - r3 = rhs ** (1. / 3) - r2 = r3 ** 2 - theta = r3 * (C[0] + r2 * (C[1] + r2 * (C[2] + r2 * C[3]))) - z = anu * np.cos(theta) ** 2 - - dz = zeros(len(z)) - L = zeros((3, len(z))) - Lp = zeros((1, len(z))) - pp = zeros((1, len(z))) - k0 = 0 - kp1 = 1 - k = slice(len(z)) - for _its in xrange(max_iter): - #%Newton's method carried out simultaneously on the roots. - L[k0, k] = 0. - L[kp1, k] = 1. - - for jj in xrange(1, n + 1): - # Loop up the recurrence relation to get the Laguerre - # polynomials evaluated at z. - km1 = k0 - k0 = kp1 - kp1 = np.mod(kp1 + 1, 3) - - L[kp1, k] = ((2 * jj - 1 + alpha - z[k]) * L[k0, k] - (jj - 1 + alpha) * L[km1, k]) / jj - #end - #%L now contains the desired Laguerre polynomials. - #%We next compute pp, the derivatives with a standard - #% relation involving the polynomials of one lower order. - - Lp[k] = L[k0, k] - pp[k] = (n * L[kp1, k] - (n + alpha) * Lp[k]) / z[k] - - dz[k] = L[kp1, k] / pp[k] - z[k] = z[k] - dz[k]# % Newton?s formula. - #%k = find((abs(dz) > releps.*z)) - - - if not np.any(abs(dz) > releps): - break - else: - warnings.warn('too many iterations!') - - x = z - w = -np.exp(sp.gammaln(alpha + n) - sp.gammaln(n)) / (pp * n * Lp) - return x, w - -def p_roots(n, method='newton', a= -1, b=1): - ''' - Returns the roots (x) of the nth order Legendre polynomial, P_n(x), - and weights (w) to use in Gaussian Quadrature over [-1,1] with weighting - function 1. - - Parameters - ---------- - n : integer - number of roots - method : 'newton' or 'eigenvalue' - uses Newton Raphson to find zeros of the Hermite polynomial (Fast) - or eigenvalue of the jacobi matrix (Slow) to obtain the nodes and - weights, respectively. - - Returns - ------- - x : ndarray - roots - w : ndarray - weights - - - Example - ------- - Integral of exp(x) from a = 0 to b = 3 is: exp(3)-exp(0)= - >>> import numpy as np - >>> [x,w] = p_roots(11,a=0,b=3) - >>> np.sum(np.exp(x)*w) - 19.085536923187668 - - See also - -------- - quadg. - - - References - ---------- - [1] Davis and Rabinowitz (1975) 'Methods of Numerical Integration', page 365, - Academic Press. - - [2] Golub, G. H. and Welsch, J. H. (1969) - 'Calculation of Gaussian Quadrature Rules' - Mathematics of Computation, vol 23,page 221-230, - - [3] Stroud and Secrest (1966), 'gaussian quadrature formulas', - prentice-hall, Englewood cliffs, n.j. - ''' - - if not method.startswith('n'): - x, w = ort.p_roots(n) - else: - - m = int(np.fix((n + 1) / 2)) - - mm = 4 * m - 1 - t = (np.pi / (4 * n + 2)) * np.arange(3, mm + 1, 4) - nn = (1 - (1 - 1 / n) / (8 * n * n)) - xo = nn * np.cos(t) - - if method.endswith('1'): - - # Compute the zeros of the N+1 Legendre Polynomial - # using the recursion relation and the Newton-Raphson method - - - # Legendre-Gauss Polynomials - L = zeros((3, m)) - - # Derivative of LGP - Lp = zeros((m,)) - dx = zeros((m,)) - - releps = 1e-15 - max_iter = 100 - # Compute the zeros of the N+1 Legendre Polynomial - # using the recursion relation and the Newton-Raphson method - - # Iterate until new points are uniformly within epsilon of old points - k = slice(m) - k0 = 0 - kp1 = 1 - for _ix in xrange(max_iter): - L[k0, k] = 1 - L[kp1, k] = xo[k] - - for jj in xrange(2, n + 1): - km1 = k0 - k0 = kp1 - kp1 = np.mod(k0 + 1, 3) - L[kp1, k] = ((2 * jj - 1) * xo[k] * L[k0, k] - (jj - 1) * L[km1, k]) / jj - - Lp[k] = n * (L[k0, k] - xo[k] * L[kp1, k]) / (1 - xo[k] ** 2) - - dx[k] = L[kp1, k] / Lp[k] - xo[k] = xo[k] - dx[k] - k, = np.nonzero((abs(dx) > releps * np.abs(xo))) - if len(k) == 0: - break - else: - warnings.warn('Too many iterations!') - - x = -xo - w = 2. / ((1 - x ** 2) * (Lp ** 2)) - else: - # Algorithm given by Davis and Rabinowitz in 'Methods - # of Numerical Integration', page 365, Academic Press, 1975. - - e1 = n * (n + 1) - - for _j in xrange(2): - pkm1 = 1 - pk = xo - for k in xrange(2, n + 1): - t1 = xo * pk - pkp1 = t1 - pkm1 - (t1 - pkm1) / k + t1 - pkm1 = pk - pk = pkp1 - - den = 1. - xo * xo - d1 = n * (pkm1 - xo * pk) - dpn = d1 / den - d2pn = (2. * xo * dpn - e1 * pk) / den - d3pn = (4. * xo * d2pn + (2 - e1) * dpn) / den - d4pn = (6. * xo * d3pn + (6 - e1) * d2pn) / den - u = pk / dpn - v = d2pn / dpn - h = -u * (1 + (.5 * u) * (v + u * (v * v - u * d3pn / (3 * dpn)))) - p = pk + h * (dpn + (.5 * h) * (d2pn + (h / 3) * (d3pn + .25 * h * d4pn))) - dp = dpn + h * (d2pn + (.5 * h) * (d3pn + h * d4pn / 3)) - h = h - p / dp - xo = xo + h - - x = -xo - h - fx = d1 - h * e1 * (pk + (h / 2) * (dpn + (h / 3) * (d2pn + (h / 4) * (d3pn + (.2 * h) * d4pn)))) - w = 2 * (1 - x ** 2) / (fx ** 2) - - if (m + m) > n: - x[m - 1] = 0.0 - - if not ((m + m) == n): - m = m - 1 - - x = np.hstack((x, -x[m - 1::-1])) - w = np.hstack((w, w[m - 1::-1])) - - - if (a != -1) | (b != 1): - # Linear map from[-1,1] to [a,b] - dh = (b - a) / 2 - x = dh * (x + 1) + a - w = w * dh - - return x, w - -def qrule(n, wfun=1, alpha=0, beta=0): - ''' - Return nodes and weights for Gaussian quadratures. - - Parameters - ---------- - n : integer - number of base points - wfun : integer - defining the weight function, p(x). (default wfun = 1) - 1,11,21: p(x) = 1 a =-1, b = 1 Gauss-Legendre - 2,12 : p(x) = exp(-x^2) a =-inf, b = inf Hermite - 3,13 : p(x) = x^alpha*exp(-x) a = 0, b = inf Laguerre - 4,14 : p(x) = (x-a)^alpha*(b-x)^beta a =-1, b = 1 Jacobi - 5 : p(x) = 1/sqrt((x-a)*(b-x)), a =-1, b = 1 Chebyshev 1'st kind - 6 : p(x) = sqrt((x-a)*(b-x)), a =-1, b = 1 Chebyshev 2'nd kind - 7 : p(x) = sqrt((x-a)/(b-x)), a = 0, b = 1 - 8 : p(x) = 1/sqrt(b-x), a = 0, b = 1 - 9 : p(x) = sqrt(b-x), a = 0, b = 1 - - Returns - ------- - bp = base points (abscissas) - wf = weight factors - - The Gaussian Quadrature integrates a (2n-1)th order - polynomial exactly and the integral is of the form - b n - Int ( p(x)* F(x) ) dx = Sum ( wf_j* F( bp_j ) ) - a j=1 - where p(x) is the weight function. - For Jacobi and Laguerre: alpha, beta >-1 (default alpha=beta=0) - - Examples: - --------- - >>> [bp,wf] = qrule(10) - >>> sum(bp**2*wf) # integral of x^2 from a = -1 to b = 1 - 0.66666666666666641 - >>> [bp,wf] = qrule(10,2) - >>> sum(bp**2*wf) # integral of exp(-x.^2)*x.^2 from a = -inf to b = inf - 0.88622692545275772 - >>> [bp,wf] = qrule(10,4,1,2) - >>> (bp*wf).sum() # integral of (x+1)*(1-x)^2 from a = -1 to b = 1 - 0.26666666666666755 - - See also - -------- - gaussq - - Reference - --------- - Abromowitz and Stegun (1954) - (for method 5 to 9) - ''' - - if (alpha <= -1) | (beta <= -1): - raise ValueError('alpha and beta must be greater than -1') - - if wfun == 1: # Gauss-Legendre - [bp, wf] = p_roots(n) - elif wfun == 2: # Hermite - [bp, wf] = h_roots(n) - elif wfun == 3: # Generalized Laguerre - [bp, wf] = la_roots(n, alpha) - elif wfun == 4: #Gauss-Jacobi - [bp, wf] = j_roots(n, alpha, beta) - elif wfun == 5: # p(x)=1/sqrt((x-a)*(b-x)), a=-1 and b=1 (default) - jj = np.arange(1, n + 1) - wf = ones(n) * np.pi / n - bp = np.cos((2 * jj - 1) * np.pi / (2 * n)) - - elif wfun == 6: # p(x)=sqrt((x-a)*(b-x)), a=-1 and b=1 - jj = np.arange(1, n + 1) - xj = jj * np.pi / (n + 1) - wf = np.pi / (n + 1) * np.sin(xj) ** 2 - bp = np.cos(xj) - - elif wfun == 7: # p(x)=sqrt((x-a)/(b-x)), a=0 and b=1 - jj = np.arange(1, n + 1) - xj = (jj - 0.5) * pi / (2 * n + 1) - bp = np.cos(xj) ** 2 - wf = 2 * np.pi * bp / (2 * n + 1) - - elif wfun == 8: # p(x)=1/sqrt(b-x), a=0 and b=1 - [bp1, wf1] = p_roots(2 * n) - k, = np.where(0 <= bp1) - wf = 2 * wf1[k] - bp = 1 - bp1[k] ** 2 - - elif wfun == 9: # p(x)=np.sqrt(b-x), a=0 and b=1 - [bp1, wf1] = p_roots(2 * n + 1) - k, = np.where(0 < bp1) - wf = 2 * bp1[k] ** 2 * wf1[k] - bp = 1 - bp1[k] ** 2 - else: - raise ValueError('unknown weight function') - return bp, wf - - -def gaussq(fun, a, b, reltol=1e-3, abstol=1e-3, alpha=0, beta=0, wfun=1, - trace=False, args=None): - ''' - Numerically evaluate integral, Gauss quadrature. - - Parameters - ---------- - fun : callable - a,b : array-like - lower and upper integration limits, respectively. - reltol, abstol : real scalars, optional - relative and absolute tolerance, respectively. (default reltol=abstool=1e-3). - wfun : scalar integer, optional - defining the weight function, p(x). (default wfun = 1) - 1 : p(x) = 1 a =-1, b = 1 Gauss-Legendre - 2 : p(x) = exp(-x^2) a =-inf, b = inf Hermite - 3 : p(x) = x^alpha*exp(-x) a = 0, b = inf Laguerre - 4 : p(x) = (x-a)^alpha*(b-x)^beta a =-1, b = 1 Jacobi - 5 : p(x) = 1/sqrt((x-a)*(b-x)), a =-1, b = 1 Chebyshev 1'st kind - 6 : p(x) = sqrt((x-a)*(b-x)), a =-1, b = 1 Chebyshev 2'nd kind - 7 : p(x) = sqrt((x-a)/(b-x)), a = 0, b = 1 - 8 : p(x) = 1/sqrt(b-x), a = 0, b = 1 - 9 : p(x) = sqrt(b-x), a = 0, b = 1 - trace : bool, optional - If non-zero a point plot of the integrand (default False). - gn : scalar integer - number of base points to start the integration with (default 2). - alpha, beta : real scalars, optional - Shape parameters of Laguerre or Jacobi weight function - (alpha,beta>-1) (default alpha=beta=0) - - Returns - ------- - val : ndarray - evaluated integral - err : ndarray - error estimate, absolute tolerance abs(int-intold) - - Notes - ----- - GAUSSQ numerically evaluate integral using a Gauss quadrature. - The Quadrature integrates a (2m-1)th order polynomial exactly and the - integral is of the form - b - Int (p(x)* Fun(x)) dx - a - GAUSSQ is vectorized to accept integration limits A, B and - coefficients P1,P2,...Pn, as matrices or scalars and the - result is the common size of A, B and P1,P2,...,Pn. - - Examples - --------- - integration of x**2 from 0 to 2 and from 1 to 4 - - >>> from scitools import numpyutils as npt - >>> A = [0, 1]; B = [2,4] - >>> fun = npt.wrap2callable('x**2') - >>> [val1,err1] = gaussq(fun,A,B) - >>> val1 - array([ 2.6666667, 21. ]) - >>> err1 - array([ 1.7763568e-15, 1.0658141e-14]) - - Integration of x^2*exp(-x) from zero to infinity: - >>> fun2 = npt.wrap2callable('1') - >>> val2, err2 = gaussq(fun2, 0, npt.inf, wfun=3, alpha=2) - >>> val3, err3 = gaussq(lambda x: x**2,0, npt.inf, wfun=3, alpha=0) - >>> val2, err2 - (array([ 2.]), array([ 6.6613381e-15])) - >>> val3, err3 - (array([ 2.]), array([ 1.7763568e-15])) - - Integrate humps from 0 to 2 and from 1 to 4 - >>> val4, err4 = gaussq(humps,A,B) - - See also - -------- - qrule - gaussq2d - ''' - global _POINTS_AND_WEIGHTS - max_iter = 11 - gn = 2 - if not hasattr(fun, '__call__'): - raise ValueError('Function must be callable') - - A, B = np.atleast_1d(a, b) - a_shape = np.atleast_1d(A.shape) - b_shape = np.atleast_1d(B.shape) - - if np.prod(a_shape) == 1: # make sure the integration limits have correct size - A = A * ones(b_shape) - a_shape = b_shape - elif np.prod(b_shape) == 1: - B = B * ones(a_shape) - elif any(a_shape != b_shape): - raise ValueError('The integration limits must have equal size!') - - - if args is None: - num_parameters = 0 - else: - num_parameters = len(args) - P0 = copy.deepcopy(args) - isvector1 = zeros(num_parameters) - - nk = np.prod(a_shape) #% # of integrals we have to compute - for ix in xrange(num_parameters): - if is_numlike(P0[ix]): - p0_shape = np.shape(P0[ix]) - Np0 = np.prod(p0_shape) - isvector1[ix] = (Np0 > 1) - if isvector1[ix]: - if nk == 1: - a_shape = p0_shape - nk = Np0 - A = A * ones(a_shape) - B = B * ones(a_shape) - elif nk != Np0: - raise ValueError('The input must have equal size!') - - P0[ix].shape = (-1, 1) # make sure it is a column - - - k = np.arange(nk) - val = zeros(nk) - val_old = zeros(nk) - abserr = zeros(nk) - - - #setup mapping parameters - A.shape = (-1, 1) - B.shape = (-1, 1) - jacob = (B - A) / 2 - - shift = 1 - if wfun == 1:# Gauss-legendre - dx = jacob - elif wfun == 2 or wfun == 3: - shift = 0 - jacob = ones((nk, 1)) - A = zeros((nk, 1)) - dx = jacob - elif wfun == 4: - dx = jacob ** (alpha + beta + 1) - elif wfun == 5: - dx = ones((nk, 1)) - elif wfun == 6: - dx = jacob ** 2 - elif wfun == 7: - shift = 0 - jacob = jacob * 2 - dx = jacob - elif wfun == 8: - shift = 0 - jacob = jacob * 2 - dx = sqrt(jacob) - elif wfun == 9: - shift = 0 - jacob = jacob * 2 - dx = sqrt(jacob) ** 3 - else: - raise ValueError('unknown option') - - dx = dx.ravel() - - if trace: - x_trace = [0, ]*max_iter - y_trace = [0, ]*max_iter - - - if num_parameters > 0: - ix_vec, = np.where(isvector1) - if len(ix_vec): - P1 = copy.copy(P0) - - #% Break out of the iteration loop for three reasons: - #% 1) the last update is very small (compared to int and compared to reltol) - #% 2) There are more than 11 iterations. This should NEVER happen. - - - for ix in xrange(max_iter): - x_and_w = 'wfun%d_%d_%g_%g' % (wfun, gn, alpha, beta) - if x_and_w in _POINTS_AND_WEIGHTS: - xn, w = _POINTS_AND_WEIGHTS[x_and_w] - else: - xn, w = qrule(gn, wfun, alpha, beta) - _POINTS_AND_WEIGHTS[x_and_w] = (xn, w) - - # calculate the x values - x = (xn + shift) * jacob[k, :] + A[k, :] - - - # calculate function values y=fun(x,p1,p2,....,pn) - if num_parameters > 0: - if len(ix_vec): - #% Expand vector to the correct size - for iy in ix_vec: - P1[iy] = P0[iy][k, :] - - y = fun(x, **P1) - else: - y = fun(x, **P0) - else: - y = fun(x) - - - val[k] = np.sum(w * y, axis=1) * dx[k] # do the integration sum(y.*w) - - - if trace: - x_trace.append(x.ravel()) - y_trace.append(y.ravel()) - - hfig = plt.plot(x, y, 'r.') - #hold on - #drawnow,shg - #if trace>1: - # pause - - plt.setp(hfig, 'color', 'b') - - - abserr[k] = abs(val_old[k] - val[k]) #absolute tolerance - if ix > 1: - - k, = np.where(abserr > np.maximum(abs(reltol * val), abstol)) # abserr > abs(abstol))%indices to integrals which did not converge - nk = len(k)# of integrals we have to compute again - if nk : - val_old[k] = val[k] - else: - break - - gn *= 2 #double the # of basepoints and weights - else: - if nk > 1: - if (nk == np.prod(a_shape)): - tmptxt = 'All integrals did not converge--singularities likely!' - else: - tmptxt = '%d integrals did not converge--singularities likely!' % (nk,) - - else: - tmptxt = 'Integral did not converge--singularity likely!' - warnings.warn(tmptxt) - - val.shape = a_shape # make sure int is the same size as the integration limits - abserr.shape = a_shape - - if trace > 0: - plt.clf() - plt.plot(np.hstack(x_trace), np.hstack(y_trace), '+') - return val, abserr - -def richardson(Q, k): - # license BSD - # Richardson extrapolation with parameter estimation - c = np.real((Q[k - 1] - Q[k - 2]) / (Q[k] - Q[k - 1])) - 1. - #% The lower bound 0.07 admits the singularity x.^-0.9 - c = max(c, 0.07) - R = Q[k] + (Q[k] - Q[k - 1]) / c - return R - -def quadgr(fun, a, b, abseps=1e-5, max_iter=17): - ''' - Gauss-Legendre quadrature with Richardson extrapolation. - - [Q,ERR] = QUADGR(FUN,A,B,TOL) approximates the integral of a function - FUN from A to B with an absolute error tolerance TOL. FUN is a function - handle and must accept vector arguments. TOL is 1e-6 by default. Q is - the integral approximation and ERR is an estimate of the absolute error. - - QUADGR uses a 12-point Gauss-Legendre quadrature. The error estimate is - based on successive interval bisection. Richardson extrapolation - accelerates the convergence for some integrals, especially integrals - with endpoint singularities. - - Examples - -------- - >>> import numpy as np - >>> Q, err = quadgr(np.log,0,1) - >>> quadgr(np.exp,0,9999*1j*np.pi) - (-2.0000000000122662, 2.1933237448479304e-09) - - >>> quadgr(lambda x: np.sqrt(4-x**2),0,2,1e-12) - (3.1415926535897811, 1.5809575870662229e-13) - - >>> quadgr(lambda x: x**-0.75,0,1) - (4.0000000000000266, 5.6843418860808015e-14) - - >>> quadgr(lambda x: 1./np.sqrt(1-x**2),-1,1) - (3.141596056985029, 6.2146261559092864e-06) - - >>> quadgr(lambda x: np.exp(-x**2),-np.inf,np.inf,1e-9) #% sqrt(pi) - (1.7724538509055152, 1.9722334876348668e-11) - - >>> quadgr(lambda x: np.cos(x)*np.exp(-x),0,np.inf,1e-9) - (0.50000000000000044, 7.3296813063450372e-11) - - See also - -------- - QUAD, - QUADGK - ''' - # Author: jonas.lundgren@saabgroup.com, 2009. license BSD - # Order limits (required if infinite limits) - if a == b: - Q = b - a - err = b - a - return Q, err - elif np.real(a) > np.real(b): - reverse = True - a, b = b, a - else: - reverse = False - - - #% Infinite limits - if np.isinf(a) | np.isinf(b): - # Check real limits - if ~ np.isreal(a) | ~np.isreal(b) | np.isnan(a) | np.isnan(b): - raise ValueError('Infinite intervals must be real.') - - # Change of variable - if np.isfinite(a) & np.isinf(b): - # a to inf - fun1 = lambda t : fun(a + t / (1 - t)) / (1 - t) ** 2 - [Q, err] = quadgr(fun1, 0, 1, abseps) - elif np.isinf(a) & np.isfinite(b): - # -inf to b - fun2 = lambda t: fun(b + t / (1 + t)) / (1 + t) ** 2 - [Q, err] = quadgr(fun2, -1, 0, abseps) - else: # -inf to inf - fun1 = lambda t: fun(t / (1 - t)) / (1 - t) ** 2 - fun2 = lambda t: fun(t / (1 + t)) / (1 + t) ** 2 - [Q1, err1] = quadgr(fun1, 0, 1, abseps / 2) - [Q2, err2] = quadgr(fun2, -1, 0, abseps / 2) - Q = Q1 + Q2 - err = err1 + err2 - - # Reverse direction - if reverse: - Q = -Q - return Q, err - - # Gauss-Legendre quadrature (12-point) - xq = np.asarray([0.12523340851146894, 0.36783149899818018, 0.58731795428661748, - 0.76990267419430469, 0.9041172563704748, 0.98156063424671924]) - wq = np.asarray([0.24914704581340288, 0.23349253653835478, 0.20316742672306584, - 0.16007832854334636, 0.10693932599531818, 0.047175336386511842]) - xq = np.hstack((xq, -xq)) - wq = np.hstack((wq, wq)) - nq = len(xq) -# iscomplex = (np.iscomplex(a) | np.iscomplex(b)).any() -# if iscomplex: -# dtype = np.complex128 -# else: - dtype = np.float64 - - # Initiate vectors -# max_iter = 17 # Max number of iterations - Q0 = zeros(max_iter, dtype=dtype) # Quadrature - Q1 = zeros(max_iter, dtype=dtype) # First Richardson extrapolation - Q2 = zeros(max_iter, dtype=dtype) # Second Richardson extrapolation - - # One interval - hh = (b - a) / 2 # Half interval length - x = (a + b) / 2 + hh * xq # Nodes - # Quadrature - Q0[0] = hh * np.sum(wq * fun(x), axis=0) - - # Successive bisection of intervals - for k in xrange(1, max_iter): - - # Interval bisection - hh = hh / 2 - x = np.hstack([x + a, x + b]) / 2 - # Quadrature - Q0[k] = hh * np.sum(wq * np.sum(np.reshape(fun(x), (-1, nq)), axis=0), axis=0) - - # Richardson extrapolation - if k >= 5: - Q1[k] = richardson(Q0, k) - Q2[k] = richardson(Q1, k) - elif k >= 3: - Q1[k] = richardson(Q0, k) - - - #% Estimate absolute error - if k >= 6: - Qv = np.hstack((Q0[k], Q1[k], Q2[k])) - Qw = np.hstack((Q0[k - 1], Q1[k - 1], Q2[k - 1])) - elif k >= 4: - Qv = np.hstack((Q0[k], Q1[k])) - Qw = np.hstack((Q0[k - 1], Q1[k - 1])) - else: - Qv = np.atleast_1d(Q0[k]) - Qw = Q0[k - 1] - - errors = np.atleast_1d(abs(Qv - Qw)) - j = errors.argmin() - err = errors[j] - Q = Qv[j] - if k >= 2 : #and not iscomplex: - _val, err1 = dea3(Q0[k - 2], Q0[k - 1], Q0[k]) - - # Convergence - if (err < abseps) | ~np.isfinite(Q): - break - else: - warnings.warn('Max number of iterations reached without convergence.') - - if ~ np.isfinite(Q): - warnings.warn('Integral approximation is Infinite or NaN.') - - - # The error estimate should not be zero - err = err + 2 * np.finfo(Q).eps - # Reverse direction - if reverse: - Q = -Q - - return Q, err - -def qdemo(f, a, b): - ''' - Compares different quadrature rules. - - Parameters - ---------- - f : callable - function - a,b : scalars - lower and upper integration limits - - Details - ------- - qdemo(f,a,b) computes and compares various approximations to - the integral of f from a to b. Three approximations are used, - the composite trapezoid, Simpson's, and Boole's rules, all with - equal length subintervals. - In a case like qdemo(exp,0,3) one can see the expected - convergence rates for each of the three methods. - In a case like qdemo(sqrt,0,3), the convergence rate is limited - not by the method, but by the singularity of the integrand. - - Example - ------- - >>> import numpy as np - >>> qdemo(np.exp,0,3) - true value = 19.08553692 - ftn Trapezoid Simpsons Booles - evals approx error approx error approx error - 3, 22.5366862979, 3.4511493747, 19.5061466023, 0.4206096791, 19.4008539142, 0.3153169910 - 5, 19.9718950387, 0.8863581155, 19.1169646189, 0.0314276957, 19.0910191534, 0.0054822302 - 9, 19.3086731081, 0.2231361849, 19.0875991312, 0.0020622080, 19.0856414320, 0.0001045088 - 17, 19.1414188470, 0.0558819239, 19.0856674267, 0.0001305035, 19.0855386464, 0.0000017232 - 33, 19.0995135407, 0.0139766175, 19.0855451052, 0.0000081821, 19.0855369505, 0.0000000273 - 65, 19.0890314614, 0.0034945382, 19.0855374350, 0.0000005118, 19.0855369236, 0.0000000004 - 129, 19.0864105817, 0.0008736585, 19.0855369552, 0.0000000320, 19.0855369232, 0.0000000000 - 257, 19.0857553393, 0.0002184161, 19.0855369252, 0.0000000020, 19.0855369232, 0.0000000000 - 513, 19.0855915273, 0.0000546041, 19.0855369233, 0.0000000001, 19.0855369232, 0.0000000000 - ftn Clenshaw Chebychev Gauss-L - evals approx error approx error approx error - 3, 19.5061466023, 0.4206096791, 0.0000000000, 1.0000000000, 19.0803304585, 0.0052064647 - 5, 19.0834145766, 0.0021223465, 0.0000000000, 1.0000000000, 19.0855365951, 0.0000003281 - 9, 19.0855369150, 0.0000000082, 0.0000000000, 1.0000000000, 19.0855369232, 0.0000000000 - 17, 19.0855369232, 0.0000000000, 0.0000000000, 1.0000000000, 19.0855369232, 0.0000000000 - 33, 19.0855369232, 0.0000000000, 0.0000000000, 1.0000000000, 19.0855369232, 0.0000000000 - 65, 19.0855369232, 0.0000000000, 0.0000000000, 1.0000000000, 19.0855369232, 0.0000000000 - 129, 19.0855369232, 0.0000000000, 0.0000000000, 1.0000000000, 19.0855369232, 0.0000000000 - 257, 19.0855369232, 0.0000000000, 0.0000000000, 1.0000000000, 19.0855369232, 0.0000000000 - 513, 19.0855369232, 0.0000000000, 0.0000000000, 1.0000000000, 19.0855369232, 0.0000000000 - - ''' - # use quad8 with small tolerance to get "true" value - #true1 = quad8(f,a,b,1e-10) - #[true tol]= gaussq(f,a,b,1e-12) - #[true tol] = agakron(f,a,b,1e-13) - true_val, _tol = intg.quad(f, a, b) - print('true value = %12.8f' % (true_val,)) - kmax = 9 - neval = zeros(kmax, dtype=int) - qt = zeros(kmax) - qs = zeros(kmax) - qb = zeros(kmax) - qc = zeros(kmax) - qc2 = zeros(kmax) - qg = zeros(kmax) - - et = ones(kmax) - es = ones(kmax) - eb = ones(kmax) - ec = ones(kmax) - ec2 = ones(kmax) - ec3 = ones(kmax) - eg = ones(kmax) - # try various approximations - - for k in xrange(kmax): - n = 2 ** (k + 1) + 1 - neval[k] = n - h = (b - a) / (n - 1) - x = np.linspace(a, b, n) - y = f(x) - - # trapezoid approximation - q = np.trapz(y, x) - #h*( (y(1)+y(n))/2 + sum(y(2:n-1)) ) - qt[k] = q - et[k] = abs(q - true_val) - # Simpson approximation - q = intg.simps(y, x) - #(h/3)*( y(1)+y(n) + 4*sum(y(2:2:n-1)) + 2*sum(y(3:2:n-2)) ) - qs[k] = q - es[k] = abs(q - true_val) - # Boole's rule - #q = boole(x,y) - q = (2 * h / 45) * (7 * (y[0] + y[-1]) + 12 * np.sum(y[2:n - 1:4]) - + 32 * np.sum(y[1:n - 1:2]) + 14 * np.sum(y[4:n - 3:4])) - qb[k] = q - eb[k] = abs(q - true_val) - - # Clenshaw-Curtis - [q, ec3[k]] = clencurt(f, a, b, (n - 1) / 2) - qc[k] = q - ec[k] = abs(q - true_val) - - # Chebychev - #ck = chebfit(f,n,a,b) - #q = chebval(b,chebint(ck,a,b),a,b) - #qc2[k] = q; ec2[k] = abs(q - true) - - # Gauss-Legendre quadrature - q = intg.fixed_quad(f, a, b, n=n)[0] - #[x, w]=qrule(n,1) - #x = (b-a)/2*x + (a+b)/2 % Transform base points X. - #w = (b-a)/2*w % Adjust weigths. - #q = sum(feval(f,x)*w) - qg[k] = q - eg[k] = abs(q - true_val) - - - #% display results - formats = ['%4.0f, ', ] + ['%10.10f, ', ]*6 - formats[-1] = formats[-1].split(',')[0] - data = np.vstack((neval, qt, et, qs, es, qb, eb)).T - print(' ftn Trapezoid Simpson''s Boole''s') - print('evals approx error approx error approx error') - - for k in xrange(kmax): - tmp = data[k].tolist() - print(''.join(fi % t for fi, t in zip(formats, tmp))) - - # display results - data = np.vstack((neval, qc, ec, qc2, ec2, qg, eg)).T - print(' ftn Clenshaw Chebychev Gauss-L') - print('evals approx error approx error approx error') - for k in xrange(kmax): - tmp = data[k].tolist() - print(''.join(fi % t for fi, t in zip(formats, tmp))) - - - plt.loglog(neval, np.vstack((et, es, eb, ec, ec2, eg)).T) - plt.xlabel('number of function evaluations') - plt.ylabel('error') - plt.legend(('Trapezoid', 'Simpsons', 'Booles', 'Clenshaw', 'Chebychev', 'Gauss-L')) - #ec3' - - - - -def main(): -# val, err = clencurt(np.exp, 0, 2) -# valt = np.exp(2) - np.exp(0) -# [Q, err] = quadgr(lambda x: x ** 2, 1, 4, 1e-9) -# [Q, err] = quadgr(humps, 1, 4, 1e-9) -# -# [x, w] = h_roots(11, 'newton') -# sum(w) -# [x2, w2] = la_roots(11, 1, 't') -# -# from scitools import numpyutils as npu #@UnresolvedImport -# fun = npu.wrap2callable('x**2') -# p0 = fun(0) -# A = [0, 1, 1]; B = [2, 4, 3] -# area, err = gaussq(fun, A, B) -# -# fun = npu.wrap2callable('x**2') -# [val1, err1] = gaussq(fun, A, B) -# -# -# #Integration of x^2*exp(-x) from zero to infinity: -# fun2 = npu.wrap2callable('1') -# [val2, err2] = gaussq(fun2, 0, np.inf, wfun=3, alpha=2) -# [val2, err2] = gaussq(lambda x: x ** 2, 0, np.inf, wfun=3, alpha=0) -# -# #Integrate humps from 0 to 2 and from 1 to 4 -# [val3, err3] = gaussq(humps, A, B) -# -# [x, w] = p_roots(11, 'newton', 1, 3) -# y = np.sum(x ** 2 * w) - - x = np.linspace(0, np.pi / 2) - q0 = np.trapz(humps(x), x) - [q, err] = romberg(humps, 0, np.pi / 2, 1e-4) - print q, err - -def test_docstrings(): - np.set_printoptions(precision=7) - import doctest - doctest.testmod() - -if __name__ == '__main__': - test_docstrings() - #main() +from __future__ import division +import warnings +import copy +import numpy as np +from numpy import pi, sqrt, ones, zeros # @UnresolvedImport +from scipy import integrate as intg +import scipy.special.orthogonal as ort +from scipy import special as sp +from wafo.plotbackend import plotbackend as plt +from scipy.integrate import simps, trapz # @UnusedImport +from wafo.misc import is_numlike +from wafo.demos import humps + +_POINTS_AND_WEIGHTS = {} + +__all__ = ['dea3', 'clencurt', 'romberg', + 'h_roots', 'j_roots', 'la_roots', 'p_roots', 'qrule', + 'gaussq', 'richardson', 'quadgr', 'qdemo'] + + +def dea3(v0, v1, v2): + ''' + Extrapolate a slowly convergent sequence + + Parameters + ---------- + v0,v1,v2 : array-like + 3 values of a convergent sequence to extrapolate + + Returns + ------- + result : array-like + extrapolated value + abserr : array-like + absolute error estimate + + Description + ----------- + DEA3 attempts to extrapolate nonlinearly to a better estimate + of the sequence's limiting value, thus improving the rate of + convergence. The routine is based on the epsilon algorithm of + P. Wynn, see [1]_. + + Example + ------- + # integrate sin(x) from 0 to pi/2 + + >>> import numpy as np + >>> Ei= np.zeros(3) + >>> linfun = lambda k : np.linspace(0, np.pi/2., 2.**(k+5)+1) + >>> for k in np.arange(3): + ... x = linfun(k) + ... Ei[k] = np.trapz(np.sin(x),x) + >>> En, err = dea3(Ei[0],Ei[1],Ei[2]) + >>> En, err + (array([ 1.]), array([ 0.0002008])) + >>> TrueErr = Ei-1. + >>> TrueErr + array([ -2.0080568e-04, -5.0199908e-05, -1.2549882e-05]) + + See also + -------- + dea + + Reference + --------- + .. [1] C. Brezinski (1977) + "Acceleration de la convergence en analyse numerique", + "Lecture Notes in Math.", vol. 584, + Springer-Verlag, New York, 1977. + ''' + + E0, E1, E2 = np.atleast_1d(v0, v1, v2) + abs = np.abs # @ReservedAssignment + max = np.maximum # @ReservedAssignment + ten = 10.0 + one = ones(1) + small = np.finfo(float).eps # 1.0e-16 #spacing(one) + delta2 = E2 - E1 + delta1 = E1 - E0 + err2 = abs(delta2) + err1 = abs(delta1) + tol2 = max(abs(E2), abs(E1)) * small + tol1 = max(abs(E1), abs(E0)) * small + + result = zeros(E0.shape) + abserr = result.copy() + converged = (err1 <= tol1) & (err2 <= tol2).ravel() + k0, = converged.nonzero() + if k0.size > 0: + #%C IF E0, E1 AND E2 ARE EQUAL TO WITHIN MACHINE + #%C ACCURACY, CONVERGENCE IS ASSUMED. + result[k0] = E2[k0] + abserr[k0] = err1[k0] + err2[k0] + E2[k0] * small * ten + + k1, = (1 - converged).nonzero() + + if k1.size > 0: + with warnings.catch_warnings(): + # ignore division by zero and overflow + warnings.simplefilter("ignore") + ss = one / delta2[k1] - one / delta1[k1] + smallE2 = (abs(ss * E1[k1]) <= 1.0e-3).ravel() + k2 = k1[smallE2.nonzero()] + if k2.size > 0: + result[k2] = E2[k2] + abserr[k2] = err1[k2] + err2[k2] + E2[k2] * small * ten + + k4, = (1 - smallE2).nonzero() + if k4.size > 0: + k3 = k1[k4] + result[k3] = E1[k3] + one / ss[k4] + abserr[k3] = err1[k3] + err2[k3] + abs(result[k3] - E2[k3]) + + return result, abserr + + +def clencurt(fun, a, b, n0=5, trace=False, *args): + ''' + Numerical evaluation of an integral, Clenshaw-Curtis method. + + Parameters + ---------- + fun : callable + a, b : array-like + Lower and upper integration limit, respectively. + n : integer + defines number of evaluation points (default 5) + + Returns + ------- + Q = evaluated integral + tol = Estimate of the approximation error + + Notes + ----- + CLENCURT approximates the integral of f(x) from a to b + using an 2*n+1 points Clenshaw-Curtis formula. + The error estimate is usually a conservative estimate of the + approximation error. + The integral is exact for polynomials of degree 2*n or less. + + Example + ------- + >>> import numpy as np + >>> val,err = clencurt(np.exp,0,2) + >>> abs(val-np.expm1(2))< err, err<1e-10 + (array([ True], dtype=bool), array([ True], dtype=bool)) + + + See also + -------- + simpson, + gaussq + + References + ---------- + [1] Goodwin, E.T. (1961), + "Modern Computing Methods", + 2nd edition, New yourk: Philosophical Library, pp. 78--79 + + [2] Clenshaw, C.W. and Curtis, A.R. (1960), + Numerische Matematik, Vol. 2, pp. 197--205 + ''' + + #% make sure n is even + n = 2 * n0 + a, b = np.atleast_1d(a, b) + a_shape = a.shape + af = a.ravel() + bf = b.ravel() + + Na = np.prod(a_shape) + + s = np.r_[0:n + 1] + s2 = np.r_[0:n + 1:2] + s2.shape = (-1, 1) + x1 = np.cos(np.pi * s / n) + x1.shape = (-1, 1) + x = x1 * (bf - af) / 2. + (bf + af) / 2 + + if hasattr(fun, '__call__'): + f = fun(x) + else: + x0 = np.flipud(fun[:, 0]) + n = len(x0) - 1 + if abs(x - x0) > 1e-8: + raise ValueError( + 'Input vector x must equal cos(pi*s/n)*(b-a)/2+(b+a)/2') + + f = np.flipud(fun[:, 1::]) + + if trace: + plt.plot(x, f, '+') + + # using a Gauss-Lobatto variant, i.e., first and last + # term f(a) and f(b) is multiplied with 0.5 + f[0, :] = f[0, :] / 2 + f[n, :] = f[n, :] / 2 + +# % x = cos(pi*0:n/n) +# % f = f(x) +# % +# % N+1 +# % c(k) = (2/N) sum f''(n)*cos(pi*(2*k-2)*(n-1)/N), 1 <= k <= N/2+1. +# % n=1 + fft = np.fft.fft + tmp = np.real(fft(f[:n, :], axis=0)) + c = 2 / n * (tmp[0:n / 2 + 1, :] + np.cos(np.pi * s2) * f[n, :]) +# % old call +# % c = 2/n * cos(s2*s'*pi/n) * f + c[0, :] = c[0, :] / 2 + c[n / 2, :] = c[n / 2, :] / 2 + +# % alternative call +# % c = dct(f) + + c = c[0:n / 2 + 1, :] / ((s2 - 1) * (s2 + 1)) + Q = (af - bf) * np.sum(c, axis=0) + # Q = (a-b).*sum( c(1:n/2+1,:)./repmat((s2-1).*(s2+1),1,Na)) + + abserr = (bf - af) * np.abs(c[n / 2, :]) + + if Na > 1: + abserr = np.reshape(abserr, a_shape) + Q = np.reshape(Q, a_shape) + return Q, abserr + + +def romberg(fun, a, b, releps=1e-3, abseps=1e-3): + ''' + Numerical integration with the Romberg method + + Parameters + ---------- + fun : callable + function to integrate + a, b : real scalars + lower and upper integration limits, respectively. + releps, abseps : scalar, optional + requested relative and absolute error, respectively. + + Returns + ------- + Q : scalar + value of integral + abserr : scalar + estimated absolute error of integral + + ROMBERG approximates the integral of F(X) from A to B + using Romberg's method of integration. The function F + must return a vector of output values if a vector of input values is given. + + + Example + ------- + >>> import numpy as np + >>> [q,err] = romberg(np.sqrt,0,10,0,1e-4) + >>> q,err + (array([ 21.0818511]), array([ 6.6163547e-05])) + ''' + h = b - a + hMin = 1.0e-9 + # Max size of extrapolation table + tableLimit = max(min(np.round(np.log2(h / hMin)), 30), 3) + + rom = zeros((2, tableLimit)) + + rom[0, 0] = h * (fun(a) + fun(b)) / 2 + ipower = 1 + fp = ones(tableLimit) * 4 + + #Ih1 = 0 + Ih2 = 0. + Ih4 = rom[0, 0] + abserr = Ih4 + #epstab = zeros(1,decdigs+7) + #newflg = 1 + #[res,abserr,epstab,newflg] = dea(newflg,Ih4,abserr,epstab) + two = 1 + one = 0 + for i in xrange(1, tableLimit): + h *= 0.5 + Un5 = np.sum(fun(a + np.arange(1, 2 * ipower, 2) * h)) * h + + # trapezoidal approximations + # T2n = 0.5 * (Tn + Un) = 0.5*Tn + Un5 + rom[two, 0] = 0.5 * rom[one, 0] + Un5 + + fp[i] = 4 * fp[i - 1] + # Richardson extrapolation + for k in xrange(i): + # rom(2,k+1)=(fp(k)*rom(2,k)-rom(1,k))/(fp(k)-1) + rom[two, k + 1] = rom[two, k] + \ + (rom[two, k] - rom[one, k]) / (fp[k] - 1) + + Ih1 = Ih2 + Ih2 = Ih4 + + Ih4 = rom[two, i] + + if (2 <= i): + [res, abserr] = dea3(Ih1, Ih2, Ih4) + # Ih4 = res + if (abserr <= max(abseps, releps * abs(res))): + break + + # rom(1,1:i) = rom(2,1:i) + two = one + one = (one + 1) % 2 + ipower *= 2 + return res, abserr + + +def h_roots(n, method='newton'): + ''' + Returns the roots (x) of the nth order Hermite polynomial, + H_n(x), and weights (w) to use in Gaussian Quadrature over + [-inf,inf] with weighting function exp(-x**2). + + Parameters + ---------- + n : integer + number of roots + method : 'newton' or 'eigenvalue' + uses Newton Raphson to find zeros of the Hermite polynomial (Fast) + or eigenvalue of the jacobi matrix (Slow) to obtain the nodes and + weights, respectively. + + Returns + ------- + x : ndarray + roots + w : ndarray + weights + + Example + ------- + >>> import numpy as np + >>> [x,w] = h_roots(10) + >>> np.sum(x*w) + -5.2516042729766621e-19 + + See also + -------- + qrule, gaussq + + References + ---------- + [1] Golub, G. H. and Welsch, J. H. (1969) + 'Calculation of Gaussian Quadrature Rules' + Mathematics of Computation, vol 23,page 221-230, + + [2]. Stroud and Secrest (1966), 'gaussian quadrature formulas', + prentice-hall, Englewood cliffs, n.j. + ''' + + if not method.startswith('n'): + return ort.h_roots(n) + else: + sqrt = np.sqrt + max_iter = 10 + releps = 3e-14 + C = [9.084064e-01, 5.214976e-02, 2.579930e-03, 3.986126e-03] + # PIM4=0.7511255444649425 + PIM4 = np.pi ** (-1. / 4) + + # The roots are symmetric about the origin, so we have to + # find only half of them. + m = int(np.fix((n + 1) / 2)) + + # Initial approximations to the roots go into z. + anu = 2.0 * n + 1 + rhs = np.arange(3, 4 * m, 4) * np.pi / anu + r3 = rhs ** (1. / 3) + r2 = r3 ** 2 + theta = r3 * (C[0] + r2 * (C[1] + r2 * (C[2] + r2 * C[3]))) + z = sqrt(anu) * np.cos(theta) + + L = zeros((3, len(z))) + k0 = 0 + kp1 = 1 + for _its in xrange(max_iter): + # Newtons method carried out simultaneously on the roots. + L[k0, :] = 0 + L[kp1, :] = PIM4 + + for j in xrange(1, n + 1): + #%Loop up the recurrence relation to get the Hermite + #%polynomials evaluated at z. + km1 = k0 + k0 = kp1 + kp1 = np.mod(kp1 + 1, 3) + + L[kp1, :] = (z * sqrt(2 / j) * L[k0, :] - + np.sqrt((j - 1) / j) * L[km1, :]) + + # L now contains the desired Hermite polynomials. + # We next compute pp, the derivatives, + # by the relation (4.5.21) using p2, the polynomials + # of one lower order. + + pp = sqrt(2 * n) * L[k0, :] + dz = L[kp1, :] / pp + + z = z - dz # Newtons formula. + + if not np.any(abs(dz) > releps): + break + else: + warnings.warn('too many iterations!') + + x = np.empty(n) + w = np.empty(n) + x[0:m] = z # Store the root + x[n - 1:n - m - 1:-1] = -z # and its symmetric counterpart. + w[0:m] = 2. / pp ** 2 # Compute the weight + w[n - 1:n - m - 1:-1] = w[0:m] # and its symmetric counterpart. + return x, w + + +def j_roots(n, alpha, beta, method='newton'): + ''' + Returns the roots of the nth order Jacobi polynomial, P^(alpha,beta)_n(x) + and weights (w) to use in Gaussian Quadrature over [-1,1] with weighting + function (1-x)**alpha (1+x)**beta with alpha,beta > -1. + + Parameters + ---------- + n : integer + number of roots + alpha,beta : scalars + defining shape of Jacobi polynomial + method : 'newton' or 'eigenvalue' + uses Newton Raphson to find zeros of the Hermite polynomial (Fast) + or eigenvalue of the jacobi matrix (Slow) to obtain the nodes and + weights, respectively. + + Returns + ------- + x : ndarray + roots + w : ndarray + weights + + + Example + -------- + >>> [x,w]= j_roots(10,0,0) + >>> sum(x*w) + 2.7755575615628914e-16 + + See also + -------- + qrule, gaussq + + + Reference + --------- + [1] Golub, G. H. and Welsch, J. H. (1969) + 'Calculation of Gaussian Quadrature Rules' + Mathematics of Computation, vol 23,page 221-230, + + [2]. Stroud and Secrest (1966), 'gaussian quadrature formulas', + prentice-hall, Englewood cliffs, n.j. + ''' + + if not method.startswith('n'): + [x, w] = ort.j_roots(n, alpha, beta) + else: + + max_iter = 10 + releps = 3e-14 + + # Initial approximations to the roots go into z. + alfbet = alpha + beta + + z = np.cos(np.pi * (np.arange(1, n + 1) - 0.25 + 0.5 * alpha) / + (n + 0.5 * (alfbet + 1))) + + L = zeros((3, len(z))) + k0 = 0 + kp1 = 1 + for _its in xrange(max_iter): + # Newton's method carried out simultaneously on the roots. + tmp = 2 + alfbet + L[k0, :] = 1 + L[kp1, :] = (alpha - beta + tmp * z) / 2 + + for j in xrange(2, n + 1): + # Loop up the recurrence relation to get the Jacobi + # polynomials evaluated at z. + km1 = k0 + k0 = kp1 + kp1 = np.mod(kp1 + 1, 3) + + a = 2. * j * (j + alfbet) * tmp + tmp = tmp + 2 + c = 2 * (j - 1 + alpha) * (j - 1 + beta) * tmp + b = (tmp - 1) * (alpha ** 2 - beta ** 2 + tmp * (tmp - 2) * z) + + L[kp1, :] = (b * L[k0, :] - c * L[km1, :]) / a + + # L now contains the desired Jacobi polynomials. + # We next compute pp, the derivatives with a standard + # relation involving the polynomials of one lower order. + + pp = (n * (alpha - beta - tmp * z) * L[kp1, :] + + 2 * (n + alpha) * (n + beta) * L[k0, :]) / (tmp * (1 - z ** 2)) + dz = L[kp1, :] / pp + z = z - dz # Newton's formula. + + if not any(abs(dz) > releps * abs(z)): + break + else: + warnings.warn('too many iterations in jrule') + + x = z # %Store the root and the weight. + f = (sp.gammaln(alpha + n) + sp.gammaln(beta + n) - + sp.gammaln(n + 1) - sp.gammaln(alpha + beta + n + 1)) + w = (np.exp(f) * tmp * 2 ** alfbet / (pp * L[k0, :])) + + return x, w + + +def la_roots(n, alpha=0, method='newton'): + ''' + Returns the roots (x) of the nth order generalized (associated) Laguerre + polynomial, L^(alpha)_n(x), and weights (w) to use in Gaussian quadrature + over [0,inf] with weighting function exp(-x) x**alpha with alpha > -1. + + Parameters + ---------- + n : integer + number of roots + method : 'newton' or 'eigenvalue' + uses Newton Raphson to find zeros of the Laguerre polynomial (Fast) + or eigenvalue of the jacobi matrix (Slow) to obtain the nodes and + weights, respectively. + + Returns + ------- + x : ndarray + roots + w : ndarray + weights + + Example + ------- + >>> import numpy as np + >>> [x,w] = h_roots(10) + >>> np.sum(x*w) + -5.2516042729766621e-19 + + See also + -------- + qrule, gaussq + + References + ---------- + [1] Golub, G. H. and Welsch, J. H. (1969) + 'Calculation of Gaussian Quadrature Rules' + Mathematics of Computation, vol 23,page 221-230, + + [2]. Stroud and Secrest (1966), 'gaussian quadrature formulas', + prentice-hall, Englewood cliffs, n.j. + ''' + + if alpha <= -1: + raise ValueError('alpha must be greater than -1') + + if not method.startswith('n'): + return ort.la_roots(n, alpha) + else: + max_iter = 10 + releps = 3e-14 + C = [9.084064e-01, 5.214976e-02, 2.579930e-03, 3.986126e-03] + + # Initial approximations to the roots go into z. + anu = 4.0 * n + 2.0 * alpha + 2.0 + rhs = np.arange(4 * n - 1, 2, -4) * np.pi / anu + r3 = rhs ** (1. / 3) + r2 = r3 ** 2 + theta = r3 * (C[0] + r2 * (C[1] + r2 * (C[2] + r2 * C[3]))) + z = anu * np.cos(theta) ** 2 + + dz = zeros(len(z)) + L = zeros((3, len(z))) + Lp = zeros((1, len(z))) + pp = zeros((1, len(z))) + k0 = 0 + kp1 = 1 + k = slice(len(z)) + for _its in xrange(max_iter): + #%Newton's method carried out simultaneously on the roots. + L[k0, k] = 0. + L[kp1, k] = 1. + + for jj in xrange(1, n + 1): + # Loop up the recurrence relation to get the Laguerre + # polynomials evaluated at z. + km1 = k0 + k0 = kp1 + kp1 = np.mod(kp1 + 1, 3) + + L[kp1, k] = ((2 * jj - 1 + alpha - z[k]) * L[ + k0, k] - (jj - 1 + alpha) * L[km1, k]) / jj + # end + #%L now contains the desired Laguerre polynomials. + #%We next compute pp, the derivatives with a standard + #% relation involving the polynomials of one lower order. + + Lp[k] = L[k0, k] + pp[k] = (n * L[kp1, k] - (n + alpha) * Lp[k]) / z[k] + + dz[k] = L[kp1, k] / pp[k] + z[k] = z[k] - dz[k] # % Newton?s formula. + #%k = find((abs(dz) > releps.*z)) + + if not np.any(abs(dz) > releps): + break + else: + warnings.warn('too many iterations!') + + x = z + w = -np.exp(sp.gammaln(alpha + n) - sp.gammaln(n)) / (pp * n * Lp) + return x, w + + +def p_roots(n, method='newton', a=-1, b=1): + ''' + Returns the roots (x) of the nth order Legendre polynomial, P_n(x), + and weights (w) to use in Gaussian Quadrature over [-1,1] with weighting + function 1. + + Parameters + ---------- + n : integer + number of roots + method : 'newton' or 'eigenvalue' + uses Newton Raphson to find zeros of the Hermite polynomial (Fast) + or eigenvalue of the jacobi matrix (Slow) to obtain the nodes and + weights, respectively. + + Returns + ------- + x : ndarray + roots + w : ndarray + weights + + + Example + ------- + Integral of exp(x) from a = 0 to b = 3 is: exp(3)-exp(0)= + >>> import numpy as np + >>> [x,w] = p_roots(11,a=0,b=3) + >>> np.sum(np.exp(x)*w) + 19.085536923187668 + + See also + -------- + quadg. + + + References + ---------- + [1] Davis and Rabinowitz (1975) 'Methods of Numerical Integration', + page 365, Academic Press. + + [2] Golub, G. H. and Welsch, J. H. (1969) + 'Calculation of Gaussian Quadrature Rules' + Mathematics of Computation, vol 23,page 221-230, + + [3] Stroud and Secrest (1966), 'gaussian quadrature formulas', + prentice-hall, Englewood cliffs, n.j. + ''' + + if not method.startswith('n'): + x, w = ort.p_roots(n) + else: + + m = int(np.fix((n + 1) / 2)) + + mm = 4 * m - 1 + t = (np.pi / (4 * n + 2)) * np.arange(3, mm + 1, 4) + nn = (1 - (1 - 1 / n) / (8 * n * n)) + xo = nn * np.cos(t) + + if method.endswith('1'): + + # Compute the zeros of the N+1 Legendre Polynomial + # using the recursion relation and the Newton-Raphson method + + # Legendre-Gauss Polynomials + L = zeros((3, m)) + + # Derivative of LGP + Lp = zeros((m,)) + dx = zeros((m,)) + + releps = 1e-15 + max_iter = 100 + # Compute the zeros of the N+1 Legendre Polynomial + # using the recursion relation and the Newton-Raphson method + + # Iterate until new points are uniformly within epsilon of old + # points + k = slice(m) + k0 = 0 + kp1 = 1 + for _ix in xrange(max_iter): + L[k0, k] = 1 + L[kp1, k] = xo[k] + + for jj in xrange(2, n + 1): + km1 = k0 + k0 = kp1 + kp1 = np.mod(k0 + 1, 3) + L[kp1, k] = ((2 * jj - 1) * xo[k] * L[ + k0, k] - (jj - 1) * L[km1, k]) / jj + + Lp[k] = n * (L[k0, k] - xo[k] * L[kp1, k]) / (1 - xo[k] ** 2) + + dx[k] = L[kp1, k] / Lp[k] + xo[k] = xo[k] - dx[k] + k, = np.nonzero((abs(dx) > releps * np.abs(xo))) + if len(k) == 0: + break + else: + warnings.warn('Too many iterations!') + + x = -xo + w = 2. / ((1 - x ** 2) * (Lp ** 2)) + else: + # Algorithm given by Davis and Rabinowitz in 'Methods + # of Numerical Integration', page 365, Academic Press, 1975. + + e1 = n * (n + 1) + + for _j in xrange(2): + pkm1 = 1 + pk = xo + for k in xrange(2, n + 1): + t1 = xo * pk + pkp1 = t1 - pkm1 - (t1 - pkm1) / k + t1 + pkm1 = pk + pk = pkp1 + + den = 1. - xo * xo + d1 = n * (pkm1 - xo * pk) + dpn = d1 / den + d2pn = (2. * xo * dpn - e1 * pk) / den + d3pn = (4. * xo * d2pn + (2 - e1) * dpn) / den + d4pn = (6. * xo * d3pn + (6 - e1) * d2pn) / den + u = pk / dpn + v = d2pn / dpn + h = (-u * (1 + (.5 * u) * (v + u * + (v * v - u * d3pn / (3 * dpn))))) + p = (pk + h * (dpn + (.5 * h) * (d2pn + (h / 3) * + (d3pn + .25 * h * d4pn)))) + dp = dpn + h * (d2pn + (.5 * h) * (d3pn + h * d4pn / 3)) + h = h - p / dp + xo = xo + h + + x = -xo - h + fx = (d1 - h * e1 * (pk + (h / 2) * + (dpn + (h / 3) * (d2pn + (h / 4) * + (d3pn + (.2 * h) * d4pn))))) + w = 2 * (1 - x ** 2) / (fx ** 2) + + if (m + m) > n: + x[m - 1] = 0.0 + + if not ((m + m) == n): + m = m - 1 + + x = np.hstack((x, -x[m - 1::-1])) + w = np.hstack((w, w[m - 1::-1])) + + if (a != -1) | (b != 1): + # Linear map from[-1,1] to [a,b] + dh = (b - a) / 2 + x = dh * (x + 1) + a + w = w * dh + + return x, w + + +def qrule(n, wfun=1, alpha=0, beta=0): + ''' + Return nodes and weights for Gaussian quadratures. + + Parameters + ---------- + n : integer + number of base points + wfun : integer + defining the weight function, p(x). (default wfun = 1) + 1,11,21: p(x) = 1 a =-1, b = 1 Gauss-Legendre + 2,12 : p(x) = exp(-x^2) a =-inf, b = inf Hermite + 3,13 : p(x) = x^alpha*exp(-x) a = 0, b = inf Laguerre + 4,14 : p(x) = (x-a)^alpha*(b-x)^beta a =-1, b = 1 Jacobi + 5 : p(x) = 1/sqrt((x-a)*(b-x)), a =-1, b = 1 Chebyshev 1'st kind + 6 : p(x) = sqrt((x-a)*(b-x)), a =-1, b = 1 Chebyshev 2'nd kind + 7 : p(x) = sqrt((x-a)/(b-x)), a = 0, b = 1 + 8 : p(x) = 1/sqrt(b-x), a = 0, b = 1 + 9 : p(x) = sqrt(b-x), a = 0, b = 1 + + Returns + ------- + bp = base points (abscissas) + wf = weight factors + + The Gaussian Quadrature integrates a (2n-1)th order + polynomial exactly and the integral is of the form + b n + Int ( p(x)* F(x) ) dx = Sum ( wf_j* F( bp_j ) ) + a j=1 + where p(x) is the weight function. + For Jacobi and Laguerre: alpha, beta >-1 (default alpha=beta=0) + + Examples: + --------- + >>> [bp,wf] = qrule(10) + >>> sum(bp**2*wf) # integral of x^2 from a = -1 to b = 1 + 0.66666666666666641 + >>> [bp,wf] = qrule(10,2) + >>> sum(bp**2*wf) # integral of exp(-x.^2)*x.^2 from a = -inf to b = inf + 0.88622692545275772 + >>> [bp,wf] = qrule(10,4,1,2) + >>> (bp*wf).sum() # integral of (x+1)*(1-x)^2 from a = -1 to b = 1 + 0.26666666666666755 + + See also + -------- + gaussq + + Reference + --------- + Abromowitz and Stegun (1954) + (for method 5 to 9) + ''' + + if (alpha <= -1) | (beta <= -1): + raise ValueError('alpha and beta must be greater than -1') + + if wfun == 1: # Gauss-Legendre + [bp, wf] = p_roots(n) + elif wfun == 2: # Hermite + [bp, wf] = h_roots(n) + elif wfun == 3: # Generalized Laguerre + [bp, wf] = la_roots(n, alpha) + elif wfun == 4: # Gauss-Jacobi + [bp, wf] = j_roots(n, alpha, beta) + elif wfun == 5: # p(x)=1/sqrt((x-a)*(b-x)), a=-1 and b=1 (default) + jj = np.arange(1, n + 1) + wf = ones(n) * np.pi / n + bp = np.cos((2 * jj - 1) * np.pi / (2 * n)) + + elif wfun == 6: # p(x)=sqrt((x-a)*(b-x)), a=-1 and b=1 + jj = np.arange(1, n + 1) + xj = jj * np.pi / (n + 1) + wf = np.pi / (n + 1) * np.sin(xj) ** 2 + bp = np.cos(xj) + + elif wfun == 7: # p(x)=sqrt((x-a)/(b-x)), a=0 and b=1 + jj = np.arange(1, n + 1) + xj = (jj - 0.5) * pi / (2 * n + 1) + bp = np.cos(xj) ** 2 + wf = 2 * np.pi * bp / (2 * n + 1) + + elif wfun == 8: # p(x)=1/sqrt(b-x), a=0 and b=1 + [bp1, wf1] = p_roots(2 * n) + k, = np.where(0 <= bp1) + wf = 2 * wf1[k] + bp = 1 - bp1[k] ** 2 + + elif wfun == 9: # p(x)=np.sqrt(b-x), a=0 and b=1 + [bp1, wf1] = p_roots(2 * n + 1) + k, = np.where(0 < bp1) + wf = 2 * bp1[k] ** 2 * wf1[k] + bp = 1 - bp1[k] ** 2 + else: + raise ValueError('unknown weight function') + return bp, wf + + +def gaussq(fun, a, b, reltol=1e-3, abstol=1e-3, alpha=0, beta=0, wfun=1, + trace=False, args=None): + ''' + Numerically evaluate integral, Gauss quadrature. + + Parameters + ---------- + fun : callable + a,b : array-like + lower and upper integration limits, respectively. + reltol, abstol : real scalars, optional + relative and absolute tolerance, respectively. + (default reltol=abstool=1e-3). + wfun : scalar integer, optional + defining the weight function, p(x). (default wfun = 1) + 1 : p(x) = 1 a =-1, b = 1 Gauss-Legendre + 2 : p(x) = exp(-x^2) a =-inf, b = inf Hermite + 3 : p(x) = x^alpha*exp(-x) a = 0, b = inf Laguerre + 4 : p(x) = (x-a)^alpha*(b-x)^beta a =-1, b = 1 Jacobi + 5 : p(x) = 1/sqrt((x-a)*(b-x)), a =-1, b = 1 Chebyshev 1'st kind + 6 : p(x) = sqrt((x-a)*(b-x)), a =-1, b = 1 Chebyshev 2'nd kind + 7 : p(x) = sqrt((x-a)/(b-x)), a = 0, b = 1 + 8 : p(x) = 1/sqrt(b-x), a = 0, b = 1 + 9 : p(x) = sqrt(b-x), a = 0, b = 1 + trace : bool, optional + If non-zero a point plot of the integrand (default False). + gn : scalar integer + number of base points to start the integration with (default 2). + alpha, beta : real scalars, optional + Shape parameters of Laguerre or Jacobi weight function + (alpha,beta>-1) (default alpha=beta=0) + + Returns + ------- + val : ndarray + evaluated integral + err : ndarray + error estimate, absolute tolerance abs(int-intold) + + Notes + ----- + GAUSSQ numerically evaluate integral using a Gauss quadrature. + The Quadrature integrates a (2m-1)th order polynomial exactly and the + integral is of the form + b + Int (p(x)* Fun(x)) dx + a + GAUSSQ is vectorized to accept integration limits A, B and + coefficients P1,P2,...Pn, as matrices or scalars and the + result is the common size of A, B and P1,P2,...,Pn. + + Examples + --------- + integration of x**2 from 0 to 2 and from 1 to 4 + + >>> from scitools import numpyutils as npt + >>> A = [0, 1]; B = [2,4] + >>> fun = npt.wrap2callable('x**2') + >>> [val1,err1] = gaussq(fun,A,B) + >>> val1 + array([ 2.6666667, 21. ]) + >>> err1 + array([ 1.7763568e-15, 1.0658141e-14]) + + Integration of x^2*exp(-x) from zero to infinity: + >>> fun2 = npt.wrap2callable('1') + >>> val2, err2 = gaussq(fun2, 0, npt.inf, wfun=3, alpha=2) + >>> val3, err3 = gaussq(lambda x: x**2,0, npt.inf, wfun=3, alpha=0) + >>> val2, err2 + (array([ 2.]), array([ 6.6613381e-15])) + >>> val3, err3 + (array([ 2.]), array([ 1.7763568e-15])) + + Integrate humps from 0 to 2 and from 1 to 4 + >>> val4, err4 = gaussq(humps,A,B) + + See also + -------- + qrule + gaussq2d + ''' + global _POINTS_AND_WEIGHTS + max_iter = 11 + gn = 2 + if not hasattr(fun, '__call__'): + raise ValueError('Function must be callable') + + A, B = np.atleast_1d(a, b) + a_shape = np.atleast_1d(A.shape) + b_shape = np.atleast_1d(B.shape) + + # make sure the integration limits have correct size + if np.prod(a_shape) == 1: + A = A * ones(b_shape) + a_shape = b_shape + elif np.prod(b_shape) == 1: + B = B * ones(a_shape) + elif any(a_shape != b_shape): + raise ValueError('The integration limits must have equal size!') + + if args is None: + num_parameters = 0 + else: + num_parameters = len(args) + P0 = copy.deepcopy(args) + isvector1 = zeros(num_parameters) + + nk = np.prod(a_shape) # % # of integrals we have to compute + for ix in xrange(num_parameters): + if is_numlike(P0[ix]): + p0_shape = np.shape(P0[ix]) + Np0 = np.prod(p0_shape) + isvector1[ix] = (Np0 > 1) + if isvector1[ix]: + if nk == 1: + a_shape = p0_shape + nk = Np0 + A = A * ones(a_shape) + B = B * ones(a_shape) + elif nk != Np0: + raise ValueError('The input must have equal size!') + + P0[ix].shape = (-1, 1) # make sure it is a column + + k = np.arange(nk) + val = zeros(nk) + val_old = zeros(nk) + abserr = zeros(nk) + + # setup mapping parameters + A.shape = (-1, 1) + B.shape = (-1, 1) + jacob = (B - A) / 2 + + shift = 1 + if wfun == 1: # Gauss-legendre + dx = jacob + elif wfun == 2 or wfun == 3: + shift = 0 + jacob = ones((nk, 1)) + A = zeros((nk, 1)) + dx = jacob + elif wfun == 4: + dx = jacob ** (alpha + beta + 1) + elif wfun == 5: + dx = ones((nk, 1)) + elif wfun == 6: + dx = jacob ** 2 + elif wfun == 7: + shift = 0 + jacob = jacob * 2 + dx = jacob + elif wfun == 8: + shift = 0 + jacob = jacob * 2 + dx = sqrt(jacob) + elif wfun == 9: + shift = 0 + jacob = jacob * 2 + dx = sqrt(jacob) ** 3 + else: + raise ValueError('unknown option') + + dx = dx.ravel() + + if trace: + x_trace = [0, ] * max_iter + y_trace = [0, ] * max_iter + + if num_parameters > 0: + ix_vec, = np.where(isvector1) + if len(ix_vec): + P1 = copy.copy(P0) + + # Break out of the iteration loop for three reasons: + # 1) the last update is very small (compared to int and to reltol) + # 2) There are more than 11 iterations. This should NEVER happen. + + for ix in xrange(max_iter): + x_and_w = 'wfun%d_%d_%g_%g' % (wfun, gn, alpha, beta) + if x_and_w in _POINTS_AND_WEIGHTS: + xn, w = _POINTS_AND_WEIGHTS[x_and_w] + else: + xn, w = qrule(gn, wfun, alpha, beta) + _POINTS_AND_WEIGHTS[x_and_w] = (xn, w) + + # calculate the x values + x = (xn + shift) * jacob[k, :] + A[k, :] + + # calculate function values y=fun(x,p1,p2,....,pn) + if num_parameters > 0: + if len(ix_vec): + #% Expand vector to the correct size + for iy in ix_vec: + P1[iy] = P0[iy][k, :] + + y = fun(x, **P1) + else: + y = fun(x, **P0) + else: + y = fun(x) + + val[k] = np.sum(w * y, axis=1) * dx[k] # do the integration sum(y.*w) + + if trace: + x_trace.append(x.ravel()) + y_trace.append(y.ravel()) + + hfig = plt.plot(x, y, 'r.') + # hold on + # drawnow,shg + # if trace>1: + # pause + + plt.setp(hfig, 'color', 'b') + + abserr[k] = abs(val_old[k] - val[k]) # absolute tolerance + if ix > 1: + k, = np.where(abserr > np.maximum(abs(reltol * val), abstol)) + # abserr > abs(abstol))%indices to integrals which + # did not converge + nk = len(k) # of integrals we have to compute again + if nk: + val_old[k] = val[k] + else: + break + + gn *= 2 # double the # of basepoints and weights + else: + if nk > 1: + if (nk == np.prod(a_shape)): + tmptxt = 'All integrals did not converge' + else: + tmptxt = '%d integrals did not converge' % (nk,) + else: + tmptxt = 'Integral did not converge--singularity likely!' + warnings.warn(tmptxt + '--singularities likely!') + + # make sure int is the same size as the integration limits + val.shape = a_shape + abserr.shape = a_shape + + if trace > 0: + plt.clf() + plt.plot(np.hstack(x_trace), np.hstack(y_trace), '+') + return val, abserr + + +def richardson(Q, k): + # license BSD + # Richardson extrapolation with parameter estimation + c = np.real((Q[k - 1] - Q[k - 2]) / (Q[k] - Q[k - 1])) - 1. + #% The lower bound 0.07 admits the singularity x.^-0.9 + c = max(c, 0.07) + R = Q[k] + (Q[k] - Q[k - 1]) / c + return R + + +def quadgr(fun, a, b, abseps=1e-5, max_iter=17): + ''' + Gauss-Legendre quadrature with Richardson extrapolation. + + [Q,ERR] = QUADGR(FUN,A,B,TOL) approximates the integral of a function + FUN from A to B with an absolute error tolerance TOL. FUN is a function + handle and must accept vector arguments. TOL is 1e-6 by default. Q is + the integral approximation and ERR is an estimate of the absolute error. + + QUADGR uses a 12-point Gauss-Legendre quadrature. The error estimate is + based on successive interval bisection. Richardson extrapolation + accelerates the convergence for some integrals, especially integrals + with endpoint singularities. + + Examples + -------- + >>> import numpy as np + >>> Q, err = quadgr(np.log,0,1) + >>> quadgr(np.exp,0,9999*1j*np.pi) + (-2.0000000000122662, 2.1933237448479304e-09) + + >>> quadgr(lambda x: np.sqrt(4-x**2),0,2,1e-12) + (3.1415926535897811, 1.5809575870662229e-13) + + >>> quadgr(lambda x: x**-0.75,0,1) + (4.0000000000000266, 5.6843418860808015e-14) + + >>> quadgr(lambda x: 1./np.sqrt(1-x**2),-1,1) + (3.141596056985029, 6.2146261559092864e-06) + + >>> quadgr(lambda x: np.exp(-x**2),-np.inf,np.inf,1e-9) #% sqrt(pi) + (1.7724538509055152, 1.9722334876348668e-11) + + >>> quadgr(lambda x: np.cos(x)*np.exp(-x),0,np.inf,1e-9) + (0.50000000000000044, 7.3296813063450372e-11) + + See also + -------- + QUAD, + QUADGK + ''' + # Author: jonas.lundgren@saabgroup.com, 2009. license BSD + # Order limits (required if infinite limits) + if a == b: + Q = b - a + err = b - a + return Q, err + elif np.real(a) > np.real(b): + reverse = True + a, b = b, a + else: + reverse = False + + #% Infinite limits + if np.isinf(a) | np.isinf(b): + # Check real limits + if ~ np.isreal(a) | ~np.isreal(b) | np.isnan(a) | np.isnan(b): + raise ValueError('Infinite intervals must be real.') + + # Change of variable + if np.isfinite(a) & np.isinf(b): + # a to inf + fun1 = lambda t: fun(a + t / (1 - t)) / (1 - t) ** 2 + [Q, err] = quadgr(fun1, 0, 1, abseps) + elif np.isinf(a) & np.isfinite(b): + # -inf to b + fun2 = lambda t: fun(b + t / (1 + t)) / (1 + t) ** 2 + [Q, err] = quadgr(fun2, -1, 0, abseps) + else: # -inf to inf + fun1 = lambda t: fun(t / (1 - t)) / (1 - t) ** 2 + fun2 = lambda t: fun(t / (1 + t)) / (1 + t) ** 2 + [Q1, err1] = quadgr(fun1, 0, 1, abseps / 2) + [Q2, err2] = quadgr(fun2, -1, 0, abseps / 2) + Q = Q1 + Q2 + err = err1 + err2 + + # Reverse direction + if reverse: + Q = -Q + return Q, err + + # Gauss-Legendre quadrature (12-point) + xq = np.asarray( + [0.12523340851146894, 0.36783149899818018, 0.58731795428661748, + 0.76990267419430469, 0.9041172563704748, 0.98156063424671924]) + wq = np.asarray( + [0.24914704581340288, 0.23349253653835478, 0.20316742672306584, + 0.16007832854334636, 0.10693932599531818, 0.047175336386511842]) + xq = np.hstack((xq, -xq)) + wq = np.hstack((wq, wq)) + nq = len(xq) +# iscomplex = (np.iscomplex(a) | np.iscomplex(b)).any() +# if iscomplex: +# dtype = np.complex128 +# else: + dtype = np.float64 + + # Initiate vectors +# max_iter = 17 # Max number of iterations + Q0 = zeros(max_iter, dtype=dtype) # Quadrature + Q1 = zeros(max_iter, dtype=dtype) # First Richardson extrapolation + Q2 = zeros(max_iter, dtype=dtype) # Second Richardson extrapolation + + # One interval + hh = (b - a) / 2 # Half interval length + x = (a + b) / 2 + hh * xq # Nodes + # Quadrature + Q0[0] = hh * np.sum(wq * fun(x), axis=0) + + # Successive bisection of intervals + for k in xrange(1, max_iter): + + # Interval bisection + hh = hh / 2 + x = np.hstack([x + a, x + b]) / 2 + # Quadrature + Q0[k] = hh * \ + np.sum(wq * np.sum(np.reshape(fun(x), (-1, nq)), axis=0), axis=0) + + # Richardson extrapolation + if k >= 5: + Q1[k] = richardson(Q0, k) + Q2[k] = richardson(Q1, k) + elif k >= 3: + Q1[k] = richardson(Q0, k) + + #% Estimate absolute error + if k >= 6: + Qv = np.hstack((Q0[k], Q1[k], Q2[k])) + Qw = np.hstack((Q0[k - 1], Q1[k - 1], Q2[k - 1])) + elif k >= 4: + Qv = np.hstack((Q0[k], Q1[k])) + Qw = np.hstack((Q0[k - 1], Q1[k - 1])) + else: + Qv = np.atleast_1d(Q0[k]) + Qw = Q0[k - 1] + + errors = np.atleast_1d(abs(Qv - Qw)) + j = errors.argmin() + err = errors[j] + Q = Qv[j] + if k >= 2: # and not iscomplex: + _val, err1 = dea3(Q0[k - 2], Q0[k - 1], Q0[k]) + + # Convergence + if (err < abseps) | ~np.isfinite(Q): + break + else: + warnings.warn('Max number of iterations reached without convergence.') + + if ~ np.isfinite(Q): + warnings.warn('Integral approximation is Infinite or NaN.') + + # The error estimate should not be zero + err = err + 2 * np.finfo(Q).eps + # Reverse direction + if reverse: + Q = -Q + + return Q, err + + +def qdemo(f, a, b): + ''' + Compares different quadrature rules. + + Parameters + ---------- + f : callable + function + a,b : scalars + lower and upper integration limits + + Details + ------- + qdemo(f,a,b) computes and compares various approximations to + the integral of f from a to b. Three approximations are used, + the composite trapezoid, Simpson's, and Boole's rules, all with + equal length subintervals. + In a case like qdemo(exp,0,3) one can see the expected + convergence rates for each of the three methods. + In a case like qdemo(sqrt,0,3), the convergence rate is limited + not by the method, but by the singularity of the integrand. + + Example + ------- + >>> import numpy as np + >>> qdemo(np.exp,0,3) + true value = 19.08553692 + ftn Trapezoid Simpsons Booles + evals approx error approx error approx error + 3, 22.5366862979, 3.4511493747, 19.5061466023, 0.4206096791, 19.4008539142, 0.3153169910 + 5, 19.9718950387, 0.8863581155, 19.1169646189, 0.0314276957, 19.0910191534, 0.0054822302 + 9, 19.3086731081, 0.2231361849, 19.0875991312, 0.0020622080, 19.0856414320, 0.0001045088 + 17, 19.1414188470, 0.0558819239, 19.0856674267, 0.0001305035, 19.0855386464, 0.0000017232 + 33, 19.0995135407, 0.0139766175, 19.0855451052, 0.0000081821, 19.0855369505, 0.0000000273 + 65, 19.0890314614, 0.0034945382, 19.0855374350, 0.0000005118, 19.0855369236, 0.0000000004 + 129, 19.0864105817, 0.0008736585, 19.0855369552, 0.0000000320, 19.0855369232, 0.0000000000 + 257, 19.0857553393, 0.0002184161, 19.0855369252, 0.0000000020, 19.0855369232, 0.0000000000 + 513, 19.0855915273, 0.0000546041, 19.0855369233, 0.0000000001, 19.0855369232, 0.0000000000 + ftn Clenshaw Chebychev Gauss-L + evals approx error approx error approx error + 3, 19.5061466023, 0.4206096791, 0.0000000000, 1.0000000000, 19.0803304585, 0.0052064647 + 5, 19.0834145766, 0.0021223465, 0.0000000000, 1.0000000000, 19.0855365951, 0.0000003281 + 9, 19.0855369150, 0.0000000082, 0.0000000000, 1.0000000000, 19.0855369232, 0.0000000000 + 17, 19.0855369232, 0.0000000000, 0.0000000000, 1.0000000000, 19.0855369232, 0.0000000000 + 33, 19.0855369232, 0.0000000000, 0.0000000000, 1.0000000000, 19.0855369232, 0.0000000000 + 65, 19.0855369232, 0.0000000000, 0.0000000000, 1.0000000000, 19.0855369232, 0.0000000000 + 129, 19.0855369232, 0.0000000000, 0.0000000000, 1.0000000000, 19.0855369232, 0.0000000000 + 257, 19.0855369232, 0.0000000000, 0.0000000000, 1.0000000000, 19.0855369232, 0.0000000000 + 513, 19.0855369232, 0.0000000000, 0.0000000000, 1.0000000000, 19.0855369232, 0.0000000000 + ''' + # use quad8 with small tolerance to get "true" value + #true1 = quad8(f,a,b,1e-10) + #[true tol]= gaussq(f,a,b,1e-12) + #[true tol] = agakron(f,a,b,1e-13) + true_val, _tol = intg.quad(f, a, b) + print('true value = %12.8f' % (true_val,)) + kmax = 9 + neval = zeros(kmax, dtype=int) + qt = zeros(kmax) + qs = zeros(kmax) + qb = zeros(kmax) + qc = zeros(kmax) + qc2 = zeros(kmax) + qg = zeros(kmax) + + et = ones(kmax) + es = ones(kmax) + eb = ones(kmax) + ec = ones(kmax) + ec2 = ones(kmax) + ec3 = ones(kmax) + eg = ones(kmax) + # try various approximations + + for k in xrange(kmax): + n = 2 ** (k + 1) + 1 + neval[k] = n + h = (b - a) / (n - 1) + x = np.linspace(a, b, n) + y = f(x) + + # trapezoid approximation + q = np.trapz(y, x) + # h*( (y(1)+y(n))/2 + sum(y(2:n-1)) ) + qt[k] = q + et[k] = abs(q - true_val) + # Simpson approximation + q = intg.simps(y, x) + #(h/3)*( y(1)+y(n) + 4*sum(y(2:2:n-1)) + 2*sum(y(3:2:n-2)) ) + qs[k] = q + es[k] = abs(q - true_val) + # Boole's rule + #q = boole(x,y) + q = (2 * h / 45) * (7 * (y[0] + y[-1]) + 12 * np.sum(y[2:n - 1:4]) + + 32 * np.sum(y[1:n - 1:2]) + + 14 * np.sum(y[4:n - 3:4])) + qb[k] = q + eb[k] = abs(q - true_val) + + # Clenshaw-Curtis + [q, ec3[k]] = clencurt(f, a, b, (n - 1) / 2) + qc[k] = q + ec[k] = abs(q - true_val) + + # Chebychev + #ck = chebfit(f,n,a,b) + #q = chebval(b,chebint(ck,a,b),a,b) + #qc2[k] = q; ec2[k] = abs(q - true) + + # Gauss-Legendre quadrature + q = intg.fixed_quad(f, a, b, n=n)[0] + #[x, w]=qrule(n,1) + # x = (b-a)/2*x + (a+b)/2 % Transform base points X. + # w = (b-a)/2*w % Adjust weigths. + #q = sum(feval(f,x)*w) + qg[k] = q + eg[k] = abs(q - true_val) + + #% display results + formats = ['%4.0f, ', ] + ['%10.10f, ', ] * 6 + formats[-1] = formats[-1].split(',')[0] + data = np.vstack((neval, qt, et, qs, es, qb, eb)).T + print(' ftn Trapezoid Simpson''s Boole''s') + print('evals approx error approx error approx error') + + for k in xrange(kmax): + tmp = data[k].tolist() + print(''.join(fi % t for fi, t in zip(formats, tmp))) + + # display results + data = np.vstack((neval, qc, ec, qc2, ec2, qg, eg)).T + print(' ftn Clenshaw Chebychev Gauss-L') + print('evals approx error approx error approx error') + for k in xrange(kmax): + tmp = data[k].tolist() + print(''.join(fi % t for fi, t in zip(formats, tmp))) + + plt.loglog(neval, np.vstack((et, es, eb, ec, ec2, eg)).T) + plt.xlabel('number of function evaluations') + plt.ylabel('error') + plt.legend( + ('Trapezoid', 'Simpsons', 'Booles', 'Clenshaw', 'Chebychev', 'Gauss-L')) + # ec3' + + +def main(): +# val, err = clencurt(np.exp, 0, 2) +# valt = np.exp(2) - np.exp(0) +# [Q, err] = quadgr(lambda x: x ** 2, 1, 4, 1e-9) +# [Q, err] = quadgr(humps, 1, 4, 1e-9) +# +# [x, w] = h_roots(11, 'newton') +# sum(w) +# [x2, w2] = la_roots(11, 1, 't') +# +# from scitools import numpyutils as npu #@UnresolvedImport +# fun = npu.wrap2callable('x**2') +# p0 = fun(0) +# A = [0, 1, 1]; B = [2, 4, 3] +# area, err = gaussq(fun, A, B) +# +# fun = npu.wrap2callable('x**2') +# [val1, err1] = gaussq(fun, A, B) +# +# +# Integration of x^2*exp(-x) from zero to infinity: +# fun2 = npu.wrap2callable('1') +# [val2, err2] = gaussq(fun2, 0, np.inf, wfun=3, alpha=2) +# [val2, err2] = gaussq(lambda x: x ** 2, 0, np.inf, wfun=3, alpha=0) +# +# Integrate humps from 0 to 2 and from 1 to 4 +# [val3, err3] = gaussq(humps, A, B) +# +# [x, w] = p_roots(11, 'newton', 1, 3) +# y = np.sum(x ** 2 * w) + + x = np.linspace(0, np.pi / 2) + _q0 = np.trapz(humps(x), x) + [q, err] = romberg(humps, 0, np.pi / 2, 1e-4) + print q, err + + +def test_docstrings(): + np.set_printoptions(precision=7) + import doctest + doctest.testmod() + +if __name__ == '__main__': + test_docstrings() + # main() diff --git a/pywafo/src/wafo/interpolate.py b/pywafo/src/wafo/interpolate.py index e8e855e..7ec6f68 100644 --- a/pywafo/src/wafo/interpolate.py +++ b/pywafo/src/wafo/interpolate.py @@ -1,1156 +1,1249 @@ -#------------------------------------------------------------------------------- -# Name: module1 -# Purpose: -# -# Author: pab -# -# Created: 30.12.2008 -# Copyright: (c) pab 2008 -# Licence: -#------------------------------------------------------------------------------- -#!/usr/bin/env python -from __future__ import division -import numpy as np -import scipy.signal -import scipy.sparse as sp -import scipy.sparse.linalg #@UnusedImport -from numpy.ma.core import ones, zeros, prod, sin -from numpy import diff, pi, inf #@UnresolvedImport -from numpy.lib.shape_base import vstack -from numpy.lib.function_base import linspace -from scipy.interpolate import PiecewisePolynomial - -import polynomial as pl - - -__all__ = ['PPform', 'savitzky_golay', 'savitzky_golay_piecewise', 'sgolay2d','SmoothSpline', - 'pchip_slopes','slopes','stineman_interp', 'Pchip','StinemanInterp', 'CubicHermiteSpline'] - -def savitzky_golay(y, window_size, order, deriv=0): - r"""Smooth (and optionally differentiate) data with a Savitzky-Golay filter. - The Savitzky-Golay filter removes high frequency noise from data. - It has the advantage of preserving the original shape and - features of the signal better than other types of filtering - approaches, such as moving averages techhniques. - - Parameters - ---------- - y : array_like, shape (N,) - the values of the time history of the signal. - window_size : int - the length of the window. Must be an odd integer number. - order : int - the order of the polynomial used in the filtering. - Must be less then `window_size` - 1. - deriv: int - the order of the derivative to compute (default = 0 means only smoothing) - Returns - ------- - ys : ndarray, shape (N) - the smoothed signal (or it's n-th derivative). - - Notes - ----- - The Savitzky-Golay is a type of low-pass filter, particularly - suited for smoothing noisy data. The test_doctstrings idea behind this - approach is to make for each point a least-square fit with a - polynomial of high order over a odd-sized window centered at - the point. - - Examples - -------- - >>> t = np.linspace(-4, 4, 500) - >>> y = np.exp( -t**2 ) + np.random.normal(0, 0.05, t.shape) - >>> ysg = savitzky_golay(y, window_size=31, order=4) - >>> import matplotlib.pyplot as plt - >>> h=plt.plot(t, y, label='Noisy signal') - >>> h=plt.plot(t, np.exp(-t**2), 'k', lw=1.5, label='Original signal') - >>> h=plt.plot(t, ysg, 'r', label='Filtered signal') - >>> h=plt.legend() - >>> plt.show() - - References - ---------- - .. [1] A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of - Data by Simplified Least Squares Procedures. Analytical - Chemistry, 1964, 36 (8), pp 1627-1639. - .. [2] Numerical Recipes 3rd Edition: The Art of Scientific Computing - W.H. Press, S.A. Teukolsky, W.T. Vetterling, B.P. Flannery - Cambridge University Press ISBN-13: 9780521880688 - """ - try: - window_size = np.abs(np.int(window_size)) - order = np.abs(np.int(order)) - except ValueError: - raise ValueError("window_size and order have to be of type int") - if window_size % 2 != 1 or window_size < 1: - raise TypeError("window_size size must be a positive odd number") - if window_size < order + 2: - raise TypeError("window_size is too small for the polynomials order") - order_range = range(order+1) - half_window = (window_size -1) // 2 - # precompute coefficients - b = np.mat([[k**i for i in order_range] for k in range(-half_window, half_window+1)]) - m = np.linalg.pinv(b).A[deriv] - # pad the signal at the extremes with - # values taken from the signal itself - firstvals = y[0] - np.abs( y[1:half_window+1][::-1] - y[0] ) - lastvals = y[-1] + np.abs(y[-half_window-1:-1][::-1] - y[-1]) - y = np.concatenate((firstvals, y, lastvals)) - return np.convolve( m, y, mode='valid') - -def savitzky_golay_piecewise(xvals, data, kernel=11, order =4): - ''' - One of the most popular applications of S-G filter, apart from smoothing UV-VIS - and IR spectra, is smoothing of curves obtained in electroanalytical experiments. - In cyclic voltammetry, voltage (being the abcissa) changes like a triangle wave. - And in the signal there are cusps at the turning points (at switching potentials) - which should never be smoothed. In this case, Savitzky-Golay smoothing should be - done piecewise, ie. separately on pieces monotonic in x - - Example - ------- - >>> import numpy as np - >>> import matplotlib.pyplot as plt - >>> n = 1e3 - >>> x = np.linspace(0, 25, n) - >>> y = np.round(sin(x)) - >>> sig2 = linspace(0,0.5,50) - - # As an example, this figure shows the effect of an additive noise with a variance - # of 0.2 (original signal (black), noisy signal (red) and filtered signal (blue dots)). - - >>> yn = y + np.sqrt(0.2)*np.random.randn(*x.shape) - >>> yr = savitzky_golay_piecewise(x, yn, kernel=11, order=4) - >>> h=plt.plot(x, yn, 'r', x, y, 'k', x, yr, 'b.') - ''' - turnpoint=0 - last=len(xvals) - if xvals[1]>xvals[0] : #x is increasing? - for i in range(1,last) : #yes - if xvals[i]xvals[i-1] : - turnpoint=i - break - if turnpoint==0 : #no change in direction of x - return savitzky_golay(data, kernel, order) - else: - #smooth the first piece - firstpart=savitzky_golay(data[0:turnpoint],kernel,order) - #recursively smooth the rest - rest=savitzky_golay_piecewise(xvals[turnpoint:], data[turnpoint:], kernel, order) - return np.concatenate((firstpart,rest)) - -def sgolay2d ( z, window_size, order, derivative=None): - """ - Savitsky - Golay filters can also be used to smooth two dimensional data affected - by noise. The algorithm is exactly the same as for the one dimensional case, only - the math is a bit more tricky. The basic algorithm is as follow: - for each point of the two dimensional matrix extract a sub - matrix, centered at - that point and with a size equal to an odd number "window_size". - for this sub - matrix compute a least - square fit of a polynomial surface, defined as - p(x, y) = a0 + a1 * x + a2 * y + a3 * x2 + a4 * y2 + a5 * x * y + ... . - Note that x and y are equal to zero at the central point. - replace the initial central point with the value computed with the fit. - Note that because the fit coefficients are linear with respect to the data spacing, they can pre - computed for efficiency. Moreover, it is important to appropriately pad the borders of the data, with a mirror image of the data itself, so that the evaluation of the fit at the borders of the data can happen smoothly. - Here is the code for two dimensional filtering. - - Example - ------- - # create some sample twoD data - >>> x = np.linspace(-3,3,100) - >>> y = np.linspace(-3,3,100) - >>> X, Y = np.meshgrid(x,y) - >>> Z = np.exp( -(X**2+Y**2)) - - # add noise - >>> Zn = Z + np.random.normal( 0, 0.2, Z.shape ) - - # filter it - >>> Zf = sgolay2d( Zn, window_size=29, order=4) - - # do some plotting - >>> import matplotlib.pyplot as plt - >>> h=plt.matshow(Z) - >>> h=plt.matshow(Zn) - >>> h=plt.matshow(Zf) - """ - # number of terms in the polynomial expression - n_terms = (order + 1) * (order + 2) / 2.0 - - if window_size % 2 == 0: - raise ValueError('window_size must be odd') - - if window_size ** 2 < n_terms: - raise ValueError('order is too high for the window size') - - half_size = window_size // 2 - - # exponents of the polynomial. - # p(x,y) = a0 + a1*x + a2*y + a3*x^2 + a4*y^2 + a5*x*y + ... - # this line gives a list of two item tuple. Each tuple contains - # the exponents of the k-th term. First element of tuple is for x - # second element for y. - # Ex. exps = [(0,0), (1,0), (0,1), (2,0), (1,1), (0,2), ...] - exps = [ (k - n, n) for k in range(order + 1) for n in range(k + 1) ] - - # coordinates of points - ind = np.arange(-half_size, half_size + 1, dtype=np.float64) - dx = np.repeat(ind, window_size) - dy = np.tile(ind, [window_size, 1]).reshape(window_size ** 2,) - - # build matrix of system of equation - A = np.empty((window_size ** 2, len(exps))) - for i, exp in enumerate(exps): - A[:, i] = (dx ** exp[0]) * (dy ** exp[1]) - - # pad input array with appropriate values at the four borders - new_shape = z.shape[0] + 2 * half_size, z.shape[1] + 2 * half_size - Z = np.zeros((new_shape)) - # top band - band = z[0, :] - Z[:half_size, half_size:-half_size] = band - np.abs(np.flipud(z[1:half_size + 1, :]) - band) - # bottom band - band = z[-1, :] - Z[-half_size:, half_size:-half_size] = band + np.abs(np.flipud(z[-half_size - 1:-1, :]) - band) - # left band - band = np.tile(z[:, 0].reshape(-1, 1), [1, half_size]) - Z[half_size:-half_size, :half_size] = band - np.abs(np.fliplr(z[:, 1:half_size + 1]) - band) - # right band - band = np.tile(z[:, -1].reshape(-1, 1), [1, half_size]) - Z[half_size:-half_size, -half_size:] = band + np.abs(np.fliplr(z[:, -half_size - 1:-1]) - band) - # central band - Z[half_size:-half_size, half_size:-half_size] = z - - # top left corner - band = z[0, 0] - Z[:half_size, :half_size] = band - np.abs(np.flipud(np.fliplr(z[1:half_size + 1, 1:half_size + 1])) - band) - # bottom right corner - band = z[-1, -1] - Z[-half_size:, -half_size:] = band + np.abs(np.flipud(np.fliplr(z[-half_size - 1:-1, -half_size - 1:-1])) - band) - - # top right corner - band = Z[half_size, -half_size:] - Z[:half_size, -half_size:] = band - np.abs(np.flipud(Z[half_size + 1:2 * half_size + 1, -half_size:]) - band) - # bottom left corner - band = Z[-half_size:, half_size].reshape(-1, 1) - Z[-half_size:, :half_size] = band - np.abs(np.fliplr(Z[-half_size:, half_size + 1:2 * half_size + 1]) - band) - - # solve system and convolve - if derivative == None: - m = np.linalg.pinv(A)[0].reshape((window_size, -1)) - return scipy.signal.fftconvolve(Z, m, mode='valid') - elif derivative == 'col': - c = np.linalg.pinv(A)[1].reshape((window_size, -1)) - return scipy.signal.fftconvolve(Z, -c, mode='valid') - elif derivative == 'row': - r = np.linalg.pinv(A)[2].reshape((window_size, -1)) - return scipy.signal.fftconvolve(Z, -r, mode='valid') - elif derivative == 'both': - c = np.linalg.pinv(A)[1].reshape((window_size, -1)) - r = np.linalg.pinv(A)[2].reshape((window_size, -1)) - return scipy.signal.fftconvolve(Z, -r, mode='valid'), scipy.signal.fftconvolve(Z, -c, mode='valid') - -class PPform(object): - """The ppform of the piecewise polynomials is given in terms of coefficients - and breaks. The polynomial in the ith interval is - x_{i} <= x < x_{i+1} - - S_i = sum(coefs[m,i]*(x-breaks[i])^(k-m), m=0..k) - where k is the degree of the polynomial. - - Example - ------- - >>> import matplotlib.pyplot as plt - >>> coef = np.array([[1,1]]) # unit step function - >>> coef = np.array([[1,1],[0,1]]) # linear from 0 to 2 - >>> coef = np.array([[1,1],[1,1],[0,2]]) # linear from 0 to 2 - >>> breaks = [0,1,2] - >>> self = PPform(coef, breaks) - >>> x = linspace(-1,3) - >>> h=plt.plot(x,self(x)) - """ - def __init__(self, coeffs, breaks, fill=0.0, sort=False, a=None, b=None): - if sort: - self.breaks = np.sort(breaks) - else: - self.breaks = np.asarray(breaks) - if a is None: - a = self.breaks[0] - if b is None: - b = self.breaks[-1] - self.coeffs = np.asarray(coeffs) - self.order = self.coeffs.shape[0] - self.fill = fill - self.a = a - self.b = b - - def __call__(self, xnew): - saveshape = np.shape(xnew) - xnew = np.ravel(xnew) - res = np.empty_like(xnew) - mask = (self.a <= xnew) & (xnew <= self.b) - res[~mask] = self.fill - xx = xnew.compress(mask) - indxs = np.searchsorted(self.breaks[:-1], xx) - 1 - indxs = indxs.clip(0, len(self.breaks)) - pp = self.coeffs - dx = xx - self.breaks.take(indxs) - if True: - v = pp[0, indxs] - for i in xrange(1, self.order): - v = dx * v + pp[i, indxs] - values = v - else: - V = np.vander(dx, N=self.order) - # values = np.diag(dot(V,pp[:,indxs])) - dot = np.dot - values = np.array([dot(V[k, :], pp[:, indxs[k]]) for k in xrange(len(xx))]) - - res[mask] = values - res.shape = saveshape - return res - - def linear_extrapolate(self, output=True): - ''' - Return a 1D PPform which extrapolate linearly outside its basic interval - ''' - - max_order = 2 - - if self.order <= max_order: - if output: - return self - else: - return - breaks = self.breaks.copy() - coefs = self.coeffs.copy() - #pieces = len(breaks) - 1 - - # Add new breaks beyond each end - breaks2add = breaks[[0, -1]] + np.array([-1, 1]) - newbreaks = np.hstack([breaks2add[0], breaks, breaks2add[1]]) - - dx = newbreaks[[0, -2]] - breaks[[0, -2]] - - dx = dx.ravel() - - # Get coefficients for the new last polynomial piece (a_n) - # by just relocate the previous last polynomial and - # then set all terms of order > maxOrder to zero - - a_nn = coefs[:, -1] - dxN = dx[-1] - - a_n = pl.polyreloc(a_nn, -dxN) # Relocate last polynomial - #set to zero all terms of order > maxOrder - a_n[0:self.order - max_order] = 0 - - #Get the coefficients for the new first piece (a_1) - # by first setting all terms of order > maxOrder to zero and then - # relocate the polynomial. - - - #Set to zero all terms of order > maxOrder, i.e., not using them - a_11 = coefs[self.order - max_order::, 0] - dx1 = dx[0] - - a_1 = pl.polyreloc(a_11, -dx1) # Relocate first polynomial - a_1 = np.hstack([zeros(self.order - max_order), a_1]) - - newcoefs = np.hstack([ a_1.reshape(-1, 1), coefs, a_n.reshape(-1, 1)]) - if output: - return PPform(newcoefs, newbreaks, a= -inf, b=inf) - else: - self.coeffs = newcoefs - self.breaks = newbreaks - self.a = -inf - self.b = inf - - def derivative(self): - """ - Return first derivative of the piecewise polynomial - """ - - cof = pl.polyder(self.coeffs) - brks = self.breaks.copy() - return PPform(cof, brks, fill=self.fill) - - - def integrate(self): - """ - Return the indefinite integral of the piecewise polynomial - """ - cof = pl.polyint(self.coeffs) - - pieces = len(self.breaks) - 1 - if 1 < pieces : - # evaluate each integrated polynomial at the right endpoint of its interval - xs = diff(self.breaks[:-1, ...], axis=0) - index = np.arange(pieces - 1) - - vv = xs * cof[0, index] - k = self.order - for i in xrange(1, k): - vv = xs * (vv + cof[i, index]) - - cof[-1] = np.hstack((0, vv)).cumsum() - - return PPform(cof, self.breaks, fill=self.fill) - - - -## def fromspline(cls, xk, cvals, order, fill=0.0): -## N = len(xk)-1 -## sivals = np.empty((order+1,N), dtype=float) -## for m in xrange(order,-1,-1): -## fact = spec.gamma(m+1) -## res = _fitpack._bspleval(xk[:-1], xk, cvals, order, m) -## res /= fact -## sivals[order-m,:] = res -## return cls(sivals, xk, fill=fill) - -class SmoothSpline(PPform): - """ - Cubic Smoothing Spline. - - Parameters - ---------- - x : array-like - x-coordinates of data. (vector) - y : array-like - y-coordinates of data. (vector or matrix) - p : real scalar - smoothing parameter between 0 and 1: - 0 -> LS-straight line - 1 -> cubic spline interpolant - lin_extrap : bool - if False regular smoothing spline - if True a smoothing spline with a constraint on the ends to - ensure linear extrapolation outside the range of the data (default) - var : array-like - variance of each y(i) (default 1) - - Returns - ------- - pp : ppform - If xx is not given, return self-form of the spline. - - Given the approximate values - - y(i) = g(x(i))+e(i) - - of some smooth function, g, where e(i) is the error. SMOOTH tries to - recover g from y by constructing a function, f, which minimizes - - p * sum (Y(i) - f(X(i)))^2/d2(i) + (1-p) * int (f'')^2 - - - Example - ------- - >>> import numpy as np - >>> import matplotlib.pyplot as plt - >>> x = np.linspace(0,1) - >>> y = np.exp(x)+1e-1*np.random.randn(x.size) - >>> pp9 = SmoothSpline(x, y, p=.9) - >>> pp99 = SmoothSpline(x, y, p=.99, var=0.01) - >>> h=plt.plot(x,y, x,pp99(x),'g', x,pp9(x),'k', x,np.exp(x),'r') - - See also - -------- - lc2tr, dat2tr - - - References - ---------- - Carl de Boor (1978) - 'Practical Guide to Splines' - Springer Verlag - Uses EqXIV.6--9, self 239 - """ - def __init__(self, xx, yy, p=None, lin_extrap=True, var=1): - coefs, brks = self._compute_coefs(xx, yy, p, var) - super(SmoothSpline, self).__init__(coefs, brks) - if lin_extrap: - self.linear_extrapolate(output=False) - - def _compute_coefs(self, xx, yy, p=None, var=1): - x, y = np.atleast_1d(xx, yy) - x = x.ravel() - dx = np.diff(x) - must_sort = (dx < 0).any() - if must_sort: - ind = x.argsort() - x = x[ind] - y = y[..., ind] - dx = np.diff(x) - - n = len(x) - - #ndy = y.ndim - szy = y.shape - - nd = prod(szy[:-1]) - ny = szy[-1] - - if n < 2: - raise ValueError('There must be >=2 data points.') - elif (dx <= 0).any(): - raise ValueError('Two consecutive values in x can not be equal.') - elif n != ny: - raise ValueError('x and y must have the same length.') - - dydx = np.diff(y) / dx - - if (n == 2) : #% straight line - coefs = np.vstack([dydx.ravel(), y[0, :]]) - else: - - dx1 = 1. / dx - D = sp.spdiags(var * ones(n), 0, n, n) # The variance - - u, p = self._compute_u(p, D, dydx, dx, dx1, n) - dx1.shape = (n - 1, -1) - dx.shape = (n - 1, -1) - zrs = zeros(nd) - if p < 1: - ai = (y - (6 * (1 - p) * D * diff(vstack([zrs, - diff(vstack([zrs, u, zrs]), axis=0) * dx1, - zrs]), axis=0)).T).T #faster than yi-6*(1-p)*Q*u - else: - ai = y.reshape(n, -1) - - # The piecewise polynominals are written as - # fi=ai+bi*(x-xi)+ci*(x-xi)^2+di*(x-xi)^3 - # where the derivatives in the knots according to Carl de Boor are: - # ddfi = 6*p*[0;u] = 2*ci; - # dddfi = 2*diff([ci;0])./dx = 6*di; - # dfi = diff(ai)./dx-(ci+di.*dx).*dx = bi; - - ci = np.vstack([zrs, 3 * p * u]) - di = (diff(vstack([ci, zrs]), axis=0) * dx1 / 3); - bi = (diff(ai, axis=0) * dx1 - (ci + di * dx) * dx) - ai = ai[:n - 1, ...] - if nd > 1: - di = di.T - ci = ci.T - ai = ai.T - if not any(di): - if not any(ci): - coefs = vstack([bi.ravel(), ai.ravel()]) - else: - coefs = vstack([ci.ravel(), bi.ravel(), ai.ravel()]) - else: - coefs = vstack([di.ravel(), ci.ravel(), bi.ravel(), ai.ravel()]) - - return coefs, x - - def _compute_u(self, p, D, dydx, dx, dx1, n): - if p is None or p != 0: - data = [dx[1:n - 1], 2 * (dx[:n - 2] + dx[1:n - 1]), dx[:n - 2]] - R = sp.spdiags(data, [-1, 0, 1], n - 2, n - 2) - - if p is None or p < 1: - Q = sp.spdiags([dx1[:n - 2], -(dx1[:n - 2] + dx1[1:n - 1]), dx1[1:n - 1]], [0, -1, -2], n, n - 2) - QDQ = (Q.T * D * Q) - if p is None or p < 0: - # Estimate p - p = 1. / (1. + QDQ.diagonal().sum() / (100. * R.diagonal().sum()** 2)); - - if p == 0: - QQ = 6 * QDQ - else: - QQ = (6 * (1 - p)) * (QDQ) + p * R - else: - QQ = R - - # Make sure it uses symmetric matrix solver - ddydx = diff(dydx, axis=0) - sp.linalg.use_solver(useUmfpack=True) - u = 2 * sp.linalg.spsolve((QQ + QQ.T), ddydx) - return u.reshape(n - 2, -1), p - -def _edge_case(m0, d1): - return np.where((d1==0) | (m0==0), 0.0, 1.0/(1.0/m0+1.0/d1)) - -def pchip_slopes(x, y): - # Determine the derivatives at the points y_k, d_k, by using - # PCHIP algorithm is: - # We choose the derivatives at the point x_k by - # Let m_k be the slope of the kth segment (between k and k+1) - # If m_k=0 or m_{k-1}=0 or sgn(m_k) != sgn(m_{k-1}) then d_k == 0 - # else use weighted harmonic mean: - # w_1 = 2h_k + h_{k-1}, w_2 = h_k + 2h_{k-1} - # 1/d_k = 1/(w_1 + w_2)*(w_1 / m_k + w_2 / m_{k-1}) - # where h_k is the spacing between x_k and x_{k+1} - - hk = x[1:] - x[:-1] - mk = (y[1:] - y[:-1]) / hk - smk = np.sign(mk) - condition = ((smk[1:] != smk[:-1]) | (mk[1:]==0) | (mk[:-1]==0)) - - w1 = 2*hk[1:] + hk[:-1] - w2 = hk[1:] + 2*hk[:-1] - whmean = 1.0/(w1+w2)*(w1/mk[1:] + w2/mk[:-1]) - - dk = np.zeros_like(y) - dk[1:-1][condition] = 0.0 - dk[1:-1][~condition] = 1.0/whmean[~condition] - - # For end-points choose d_0 so that 1/d_0 = 1/m_0 + 1/d_1 unless - # one of d_1 or m_0 is 0, then choose d_0 = 0 - - dk[0] = _edge_case(mk[0],dk[1]) - dk[-1] = _edge_case(mk[-1],dk[-2]) - return dk - -def slopes(x,y, method='parabola', tension=0, monotone=False): - ''' - Return estimated slopes y'(x) - - Parameters - ---------- - x, y : array-like - array containing the x- and y-data, respectively. - x must be sorted low to high... (no repeats) while - y can have repeated values. - method : string - defining method of estimation for yp. Valid options are: - 'Catmull-Rom' yp = (y[k+1]-y[k-1])/(x[k+1]-x[k-1]) - 'Cardinal' yp = (1-tension) * (y[k+1]-y[k-1])/(x[k+1]-x[k-1]) - 'parabola' - 'secant' average secants - yp = 0.5*((y[k+1]-y[k])/(x[k+1]-x[k]) + (y[k]-y[k-1])/(x[k]-x[k-1])) - tension : real scalar between 0 and 1. - tension parameter used in Cardinal method - monotone : bool - If True modifies yp to preserve monoticity - - Returns - ------- - yp : ndarray - estimated slope - - References: - ----------- - Wikipedia: Monotone cubic interpolation - Cubic Hermite spline - - ''' - x = np.asarray(x, np.float_) - y = np.asarray(y, np.float_) - yp = np.zeros(y.shape, np.float_) - - - dx = x[1:] - x[:-1] - # Compute the slopes of the secant lines between successive points - dydx = (y[1:] - y[:-1]) / dx - - method = method.lower() - if method.startswith('p'): #parabola'): - yp[1:-1] = (dydx[:-1] * dx[1:] + dydx[1:] * dx[:-1]) / (dx[1:] + dx[:-1]) - yp[0] = 2.0 * dydx[0] - yp[1] - yp[-1] = 2.0 * dydx[-1] - yp[-2] - else: - # At the endpoints - use one-sided differences - yp[0] = dydx[0] - yp[-1] = dydx[-1] - if method.startswith('s'): #secant'): - # In the middle - use the average of the secants - yp[1:-1] = (dydx[:-1] + dydx[1:]) / 2.0 - else: # Cardinal or Catmull-Rom method - yp[1:-1] = (y[2:] - y[:-2]) / (x[2:] - x[:-2]) - if method.startswith('car'): #cardinal'): - yp = (1-tension) * yp - - if monotone: - # Special case: intervals where y[k] == y[k+1] - # Setting these slopes to zero guarantees the spline connecting - # these points will be flat which preserves monotonicity - ii, = (dydx == 0.0).nonzero() - yp[ii] = 0.0 - yp[ii+1] = 0.0 - - alpha = yp[:-1]/dydx - beta = yp[1:]/dydx - dist = alpha**2 + beta**2 - tau = 3.0 / np.sqrt(dist) - - # To prevent overshoot or undershoot, restrict the position vector - # (alpha, beta) to a circle of radius 3. If (alpha**2 + beta**2)>9, - # then set m[k] = tau[k]alpha[k]delta[k] and m[k+1] = tau[k]beta[b]delta[k] - # where tau = 3/sqrt(alpha**2 + beta**2). - - # Find the indices that need adjustment - indices_to_fix, = (dist > 9.0).nonzero() - for ii in indices_to_fix: - yp[ii] = tau[ii] * alpha[ii] * dydx[ii] - yp[ii+1] = tau[ii] * beta[ii] * dydx[ii] - - return yp - -def stineman_interp(xi, x, y, yp=None): - """ - Given data vectors *x* and *y*, the slope vector *yp* and a new - abscissa vector *xi*, the function :func:`stineman_interp` uses - Stineman interpolation to calculate a vector *yi* corresponding to - *xi*. - - Here's an example that generates a coarse sine curve, then - interpolates over a finer abscissa:: - - x = linspace(0,2*pi,20); y = sin(x); yp = cos(x) - xi = linspace(0,2*pi,40); - yi = stineman_interp(xi,x,y,yp); - plot(x,y,'o',xi,yi) - - The interpolation method is described in the article A - CONSISTENTLY WELL BEHAVED METHOD OF INTERPOLATION by Russell - W. Stineman. The article appeared in the July 1980 issue of - Creative Computing with a note from the editor stating that while - they were: - - not an academic journal but once in a while something serious - and original comes in adding that this was - "apparently a real solution" to a well known problem. - - For *yp* = *None*, the routine automatically determines the slopes - using the :func:`slopes` routine. - - *x* is assumed to be sorted in increasing order. - - For values ``xi[j] < x[0]`` or ``xi[j] > x[-1]``, the routine - tries an extrapolation. The relevance of the data obtained from - this, of course, is questionable... - - Original implementation by Halldor Bjornsson, Icelandic - Meteorolocial Office, March 2006 halldor at vedur.is - - Completely reworked and optimized for Python by Norbert Nemec, - Institute of Theoretical Physics, University or Regensburg, April - 2006 Norbert.Nemec at physik.uni-regensburg.de - """ - - # Cast key variables as float. - x = np.asarray(x, np.float_) - y = np.asarray(y, np.float_) - assert x.shape == y.shape - #N = len(y) - - if yp is None: - yp = slopes(x, y) - else: - yp = np.asarray(yp, np.float_) - - xi = np.asarray(xi, np.float_) - #yi = np.zeros(xi.shape, np.float_) - - # calculate linear slopes - dx = x[1:] - x[:-1] - dy = y[1:] - y[:-1] - s = dy / dx #note length of s is N-1 so last element is #N-2 - - # find the segment each xi is in - # this line actually is the key to the efficiency of this implementation - idx = np.searchsorted(x[1:-1], xi) - - # now we have generally: x[idx[j]] <= xi[j] <= x[idx[j]+1] - # except at the boundaries, where it may be that xi[j] < x[0] or xi[j] > x[-1] - - # the y-values that would come out from a linear interpolation: - sidx = s.take(idx) - xidx = x.take(idx) - yidx = y.take(idx) - xidxp1 = x.take(idx + 1) - yo = yidx + sidx * (xi - xidx) - - # the difference that comes when using the slopes given in yp - dy1 = (yp.take(idx) - sidx) * (xi - xidx) # using the yp slope of the left point - dy2 = (yp.take(idx + 1) - sidx) * (xi - xidxp1) # using the yp slope of the right point - - dy1dy2 = dy1 * dy2 - # The following is optimized for Python. The solution actually - # does more calculations than necessary but exploiting the power - # of numpy, this is far more efficient than coding a loop by hand - # in Python - dy1mdy2 = np.where(dy1dy2,dy1-dy2,np.inf) - dy1pdy2 = np.where(dy1dy2,dy1+dy2,np.inf) - yi = yo + dy1dy2 * np.choose(np.array(np.sign(dy1dy2), np.int32) + 1, - ((2 * xi - xidx - xidxp1) / ((dy1mdy2) * (xidxp1 - xidx)), - 0.0, - 1 / (dy1pdy2))) - return yi - -class StinemanInterp(object): - ''' - Returns the values of an interpolating function that runs through a set of points according to the algorithm of Stineman (1980). - - Parameters - --------- - x,y : array-like - coordinates of points defining the interpolating function. - yp : array-like - slopes of the interpolating function at x. Optional: only given if they are known, else the argument is not used. - method : string - method for computing the slope at the given points if the slope is not known. With method= - "parabola" calculates the slopes from a parabola through every three points. - Notes - ----- - The interpolation method is described in an article by Russell W. Stineman (1980) - - According to Stineman, the interpolation procedure has "the following properties: - - If values of the ordinates of the specified points change monotonically, and the slopes of the line segments joining - the points change monotonically, then the interpolating curve and its slope will change monotonically. - If the slopes of the line segments joining the specified points change monotonically, then the slopes of the interpolating - curve will change monotonically. Suppose that the conditions in (1) or (2) are satisfied by a set of points, but a small - change in the ordinate or slope at one of the points will result conditions (1) or (2) being not longer satisfied. Then - making this small change in the ordinate or slope at a point will cause no more than a small change in the interpolating - curve." The method is based on rational interpolation with specially chosen rational functions to satisfy the above three - conditions. - - Slopes computed at the given points with the methods provided by the `StinemanInterp' function satisfy Stineman's requirements. - The original method suggested by Stineman (method="scaledstineman", the default, and "stineman") result in lower slopes near - abrupt steps or spikes in the point sequence, and therefore a smaller tendency for overshooting. The method based on a second - degree polynomial (method="parabola") provides better approximation to smooth functions, but it results in in higher slopes - near abrupt steps or spikes and can lead to some overshooting where Stineman's method does not. Both methods lead to much - less tendency for `spurious' oscillations than traditional interplation methods based on polynomials, such as splines - (see the examples section). - - Stineman states that "The complete assurance that the procedure will never generate `wild' points makes it attractive as a - general purpose procedure". - - This interpolation method has been implemented in Matlab and R in addition to Python. - - Examples - -------- - >>> import wafo.interpolate as wi - >>> import numpy as np - >>> import matplotlib.pyplot as plt - >>> x = np.linspace(0,2*pi,20) - >>> y = np.sin(x); yp = np.cos(x) - >>> xi = np.linspace(0,2*pi,40); - >>> yi = wi.StinemanInterp(x,y)(xi) - >>> yi1 = wi.CubicHermiteSpline(x,y, yp)(xi) - >>> yi2 = wi.Pchip(x,y, method='parabola')(xi) - >>> h=plt.subplot(211) - >>> h=plt.plot(x,y,'o',xi,yi,'r', xi,yi1, 'g', xi,yi1, 'b') - >>> h=plt.subplot(212) - >>> h=plt.plot(xi,np.abs(sin(xi)-yi), 'r', xi, np.abs(sin(xi)-yi1), 'g', xi, np.abs(sin(xi)-yi2), 'b') - - References - ---------- - Stineman, R. W. A Consistently Well Behaved Method of Interpolation. Creative Computing (1980), volume 6, number 7, p. 54-57. - - See Also - -------- - slopes, Pchip - ''' - def __init__(self, x,y,yp=None,method='parabola', monotone=False): - if yp is None: - yp = slopes(x, y, method, monotone) - self.x = np.asarray(x, np.float_) - self.y = np.asarray(y, np.float_) - self.yp = np.asarray(yp, np.float_) - - def __call__(self, xi): - xi = np.asarray(xi, np.float_) - x = self.x - y = self.y - yp = self.yp - # calculate linear slopes - dx = x[1:] - x[:-1] - dy = y[1:] - y[:-1] - s = dy / dx #note length of s is N-1 so last element is #N-2 - - # find the segment each xi is in - # this line actually is the key to the efficiency of this implementation - idx = np.searchsorted(x[1:-1], xi) - - # now we have generally: x[idx[j]] <= xi[j] <= x[idx[j]+1] - # except at the boundaries, where it may be that xi[j] < x[0] or xi[j] > x[-1] - - # the y-values that would come out from a linear interpolation: - sidx = s.take(idx) - xidx = x.take(idx) - yidx = y.take(idx) - xidxp1 = x.take(idx + 1) - yo = yidx + sidx * (xi - xidx) - - # the difference that comes when using the slopes given in yp - dy1 = (yp.take(idx) - sidx) * (xi - xidx) # using the yp slope of the left point - dy2 = (yp.take(idx + 1) - sidx) * (xi - xidxp1) # using the yp slope of the right point - - dy1dy2 = dy1 * dy2 - # The following is optimized for Python. The solution actually - # does more calculations than necessary but exploiting the power - # of numpy, this is far more efficient than coding a loop by hand - # in Python - dy1mdy2 = np.where(dy1dy2,dy1-dy2,np.inf) - dy1pdy2 = np.where(dy1dy2,dy1+dy2,np.inf) - yi = yo + dy1dy2 * np.choose(np.array(np.sign(dy1dy2), np.int32) + 1, - ((2 * xi - xidx - xidxp1) / ((dy1mdy2) * (xidxp1 - xidx)), - 0.0, - 1 / (dy1pdy2))) - return yi - -class StinemanInterp2(PiecewisePolynomial): - def __init__(self, x, y, yp=None, method='parabola', monotone=False): - if yp is None: - yp = slopes(x, y, method, monotone=monotone) - super(StinemanInterp2,self).__init__(x, zip(y,yp)) - -class CubicHermiteSpline(PiecewisePolynomial): - ''' - Piecewise Cubic Hermite Interpolation using Catmull-Rom - method for computing the slopes. - ''' - def __init__(self, x, y, yp=None, method='Catmull-Rom'): - if yp is None: - yp = slopes(x, y, method, monotone=False) - super(CubicHermiteSpline, self).__init__(x, zip(y,yp), orders=3) - -class Pchip(PiecewisePolynomial): - """PCHIP 1-d monotonic cubic interpolation - - Description - ----------- - x and y are arrays of values used to approximate some function f: - y = f(x) - This class factory function returns a callable class whose __call__ method - uses monotonic cubic, interpolation to find the value of new points. - - Parameters - ---------- - x : array - A 1D array of monotonically increasing real values. x cannot - include duplicate values (otherwise f is overspecified) - y : array - A 1-D array of real values. y's length along the interpolation - axis must be equal to the length of x. - yp : array - slopes of the interpolating function at x. Optional: only given if they are known, else the argument is not used. - method : string - method for computing the slope at the given points if the slope is not known. With method= - "parabola" calculates the slopes from a parabola through every three points. - - Assumes x is sorted in monotonic order (e.g. x[1] > x[0]) - - Example - ------- - >>> import wafo.interpolate as wi - - # Create a step function (will demonstrate monotonicity) - >>> x = np.arange(7.0) - 3.0 - >>> y = np.array([-1.0, -1,-1,0,1,1,1]) - - # Interpolate using monotonic piecewise Hermite cubic spline - >>> xvec = np.arange(599.)/100. - 3.0 - >>> yvec = wi.Pchip(x, y)(xvec) - - # Call the Scipy cubic spline interpolator - >>> from scipy.interpolate import interpolate - >>> function = interpolate.interp1d(x, y, kind='cubic') - >>> yvec1 = function(xvec) - - # Non-montonic cubic Hermite spline interpolator using - # Catmul-Rom method for computing slopes... - >>> yvec2 = wi.CubicHermiteSpline(x,y)(xvec) - - >>> yvec3 = wi.StinemanInterp(x, y)(xvec) - - # Plot the results - >>> import matplotlib.pyplot as plt - >>> h=plt.plot(x, y, 'ro') - >>> h=plt.plot(xvec, yvec, 'b') - >>> h=plt.plot(xvec, yvec1, 'k') - >>> h=plt.plot(xvec, yvec2, 'g') - >>> h=plt.plot(xvec, yvec3, 'm') - >>> h=plt.title("pchip() step function test") - - >>> h=plt.xlabel("X") - >>> h=plt.ylabel("Y") - >>> h=plt.title("Comparing pypchip() vs. Scipy interp1d() vs. non-monotonic CHS") - >>> legends = ["Data", "pypchip()", "interp1d","CHS", 'SI'] - >>> h=plt.legend(legends, loc="upper left") - >>> plt.show() - - """ - def __init__(self, x, y, yp=None, method='secant'): - if yp is None: - yp = slopes(x, y, method=method, monotone=True) - super(Pchip, self).__init__(x, zip(y,yp), orders=3) - -def test_smoothing_spline(): - x = linspace(0, 2 * pi + pi / 4, 20) - y = sin(x) #+ np.random.randn(x.size) - pp = SmoothSpline(x, y, p=1) - x1 = linspace(-1, 2 * pi + pi / 4 + 1, 20) - y1 = pp(x1) - pp1 = pp.derivative() - pp0 = pp1.integrate() - dy1 = pp1(x1) - y01 = pp0(x1) - #dy = y-y1 - import matplotlib.pyplot as plb - - plb.plot(x, y, x1, y1, '.', x1, dy1, 'ro', x1, y01, 'r-') - plb.show() - pass - #tck = interpolate.splrep(x, y, s=len(x)) - -def compare_methods(): - ############################################################ - # Sine wave test - ############################################################ - fun = np.sin - # Create a example vector containing a sine wave. - x = np.arange(30.0)/10. - y = fun(x) - - # Interpolate the data above to the grid defined by "xvec" - xvec = np.arange(250.)/100. - - # Initialize the interpolator slopes - # Create the pchip slopes - m = slopes(x, y, method='parabola', monotone=True) - m1 = slopes(x, y, method='parabola', monotone=False) - m2 = slopes(x, y, method='catmul', monotone=False) - m3 = pchip_slopes(x, y) - - # Call the monotonic piece-wise Hermite cubic interpolator - yvec = Pchip(x, y, m)(xvec) - yvec1 = Pchip(x, y, m1)(xvec) - yvec2 = Pchip(x, y, m2)(xvec) - yvec3 = Pchip(x, y, m3)(xvec) - - import matplotlib.pyplot as plt - - plt.figure() - plt.plot(x,y, 'ro', xvec, fun(xvec),'r') - plt.title("pchip() Sin test code") - - # Plot the interpolated points - plt.plot(xvec, yvec, xvec, yvec1, xvec, yvec2,'g.',xvec, yvec3) - plt.legend(['true','true','parbola_monoton','parabola','catmul','pchip'], frameon=False, loc=0) - plt.ioff() - plt.show() - - - -def demo_monoticity(): - # Step function test... - import matplotlib.pyplot as plt - plt.figure(2) - plt.title("pchip() step function test") - # Create a step function (will demonstrate monotonicity) - x = np.arange(7.0) - 3.0 - y = np.array([-1.0, -1,-1,0,1,1,1]) - - # Interpolate using monotonic piecewise Hermite cubic spline - xvec = np.arange(599.)/100. - 3.0 - - # Create the pchip slopes - m = slopes(x,y, monotone=True) -# m1 = slopes(x, y, monotone=False) -# m2 = slopes(x,y,method='catmul',monotone=False) - m3 = pchip_slopes(x, y) - # Interpolate... - yvec = Pchip(x, y, m)(xvec) - - # Call the Scipy cubic spline interpolator - from scipy.interpolate import interpolate as ip - function = ip.interp1d(x, y, kind='cubic') - yvec2 = function(xvec) - - # Non-montonic cubic Hermite spline interpolator using - # Catmul-Rom method for computing slopes... - yvec3 = CubicHermiteSpline(x,y)(xvec) - yvec4 = StinemanInterp(x, y)(xvec) - yvec5 = Pchip(x, y, m3)(xvec) #@UnusedVariable - - # Plot the results - plt.plot(x, y, 'ro', label='Data') - plt.plot(xvec, yvec, 'b', label='Pchip') - plt.plot(xvec, yvec2, 'k', label='interp1d') - plt.plot(xvec, yvec3, 'g', label='CHS') - plt.plot(xvec, yvec4, 'm', label='Stineman') - #plt.plot(xvec, yvec5, 'yo', label='Pchip2') - plt.xlabel("X") - plt.ylabel("Y") - plt.title("Comparing Pchip() vs. Scipy interp1d() vs. non-monotonic CHS") -# legends = ["Data", "Pchip()", "interp1d","CHS", 'Stineman'] - plt.legend(loc="upper left", frameon=False) - plt.ioff() - plt.show() - -def test_doctstrings(): - from scipy import interpolate - import matplotlib.pyplot as plt - import matplotlib - matplotlib.interactive(True) - - coef = np.array([[1, 1], [0, 1]]) # linear from 0 to 2 - #coef = np.array([[1,1],[1,1],[0,2]]) # linear from 0 to 2 - breaks = [0, 1, 2] - pp = PPform(coef, breaks, a= -100, b=100) - x = linspace(-1, 3, 20) - y = pp(x) #@UnusedVariable - - x = linspace(0, 2 * pi + pi / 4, 20) - y = x + np.random.randn(x.size) - tck = interpolate.splrep(x, y, s=len(x)) - xnew = linspace(0, 2 * pi, 100) - ynew = interpolate.splev(xnew, tck, der=0) - tck0 = interpolate.splmake(xnew, ynew, order=3, kind='smoothest', conds=None) - pp = interpolate.ppform.fromspline(*tck0) - - plt.plot(x, y, "x", xnew, ynew, xnew, sin(xnew), x, y, "b") - plt.legend(['Linear', 'Cubic Spline', 'True']) - plt.title('Cubic-spline interpolation') - - - t = np.arange(0, 1.1, .1) - x = np.sin(2 * np.pi * t) - y = np.cos(2 * np.pi * t) - tck1, u = interpolate.splprep([t, y], s=0) #@UnusedVariable - tck2 = interpolate.splrep(t, y, s=len(t), task=0) - #interpolate.spl - tck = interpolate.splmake(t, y, order=3, kind='smoothest', conds=None) - self = interpolate.ppform.fromspline(*tck2) - plt.plot(t, self(t)) - pass - -def test_pp(): - coef = np.array([[1, 1], [0, 0]]) # linear from 0 to 2 @UnusedVariable - - coef = np.array([[1, 1], [1, 1], [0, 2]]) # quadratic from 0 to 1 and 1 to 2. - dc = pl.polyder(coef, 1) - c2 = pl.polyint(dc, 1) #@UnusedVariable - breaks = [0, 1, 2] - pp = PPform(coef, breaks) - pp(0.5) - pp(1) - pp(1.5) - dpp = pp.derivative() - import pylab as plb - x = plb.linspace(-1, 3) - plb.plot(x, pp(x), x, dpp(x), '.') - plb.show() - - -def test_docstrings(): - import doctest - doctest.testmod() - - -if __name__ == '__main__': -# test_docstrings() - #test_doctstrings() - #test_smoothing_spline() - #compare_methods() - demo_monoticity() \ No newline at end of file +#------------------------------------------------------------------------- +# Name: module1 +# Purpose: +# +# Author: pab +# +# Created: 30.12.2008 +# Copyright: (c) pab 2008 +# Licence: +#------------------------------------------------------------------------- +#!/usr/bin/env python +from __future__ import division +import numpy as np +import scipy.signal +import scipy.special as spec +import scipy.sparse as sp +import scipy.sparse.linalg # @UnusedImport +from numpy.ma.core import ones, zeros, prod, sin +from numpy import diff, pi, inf # @UnresolvedImport +from numpy.lib.shape_base import vstack +from numpy.lib.function_base import linspace +from scipy.interpolate import PiecewisePolynomial + +import polynomial as pl + + +__all__ = [ + 'PPform', 'savitzky_golay', 'savitzky_golay_piecewise', 'sgolay2d', + 'SmoothSpline', 'pchip_slopes', 'slopes', 'stineman_interp', 'Pchip', + 'StinemanInterp', 'CubicHermiteSpline'] + + +def savitzky_golay(y, window_size, order, deriv=0): + """Smooth (and optionally differentiate) data with a Savitzky-Golay filter. + The Savitzky-Golay filter removes high frequency noise from data. + It has the advantage of preserving the original shape and + features of the signal better than other types of filtering + approaches, such as moving averages techhniques. + + Parameters + ---------- + y : array_like, shape (N,) + the values of the time history of the signal. + window_size : int + the length of the window. Must be an odd integer number. + order : int + the order of the polynomial used in the filtering. + Must be less then `window_size` - 1. + deriv: int + order of the derivative to compute (default = 0 means only smoothing) + + Returns + ------- + ys : ndarray, shape (N) + the smoothed signal (or it's n-th derivative). + + Notes + ----- + The Savitzky-Golay is a type of low-pass filter, particularly + suited for smoothing noisy data. The test_doctstrings idea behind this + approach is to make for each point a least-square fit with a + polynomial of high order over a odd-sized window centered at + the point. + + Examples + -------- + >>> t = np.linspace(-4, 4, 500) + >>> y = np.exp( -t**2 ) + np.random.normal(0, 0.05, t.shape) + >>> ysg = savitzky_golay(y, window_size=31, order=4) + >>> import matplotlib.pyplot as plt + >>> h=plt.plot(t, y, label='Noisy signal') + >>> h=plt.plot(t, np.exp(-t**2), 'k', lw=1.5, label='Original signal') + >>> h=plt.plot(t, ysg, 'r', label='Filtered signal') + >>> h=plt.legend() + >>> plt.show() + + References + ---------- + .. [1] A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of + Data by Simplified Least Squares Procedures. Analytical + Chemistry, 1964, 36 (8), pp 1627-1639. + .. [2] Numerical Recipes 3rd Edition: The Art of Scientific Computing + W.H. Press, S.A. Teukolsky, W.T. Vetterling, B.P. Flannery + Cambridge University Press ISBN-13: 9780521880688 + """ + try: + window_size = np.abs(np.int(window_size)) + order = np.abs(np.int(order)) + except ValueError: + raise ValueError("window_size and order have to be of type int") + if window_size % 2 != 1 or window_size < 1: + raise TypeError("window_size size must be a positive odd number") + if window_size < order + 2: + raise TypeError("window_size is too small for the polynomials order") + order_range = range(order + 1) + half_window = (window_size - 1) // 2 + # precompute coefficients + b = np.mat([[k ** i for i in order_range] + for k in range(-half_window, half_window + 1)]) + m = np.linalg.pinv(b).A[deriv] + # pad the signal at the extremes with + # values taken from the signal itself + firstvals = y[0] - np.abs(y[1:half_window + 1][::-1] - y[0]) + lastvals = y[-1] + np.abs(y[-half_window - 1:-1][::-1] - y[-1]) + y = np.concatenate((firstvals, y, lastvals)) + return np.convolve(m, y, mode='valid') + + +def savitzky_golay_piecewise(xvals, data, kernel=11, order=4): + ''' + One of the most popular applications of S-G filter, apart from smoothing + UV-VIS and IR spectra, is smoothing of curves obtained in electroanalytical + experiments. In cyclic voltammetry, voltage (being the abcissa) changes + like a triangle wave. And in the signal there are cusps at the turning + points (at switching potentials) which should never be smoothed. + In this case, Savitzky-Golay smoothing should be + done piecewise, ie. separately on pieces monotonic in x + + Example + ------- + >>> import numpy as np + >>> import matplotlib.pyplot as plt + >>> n = 1e3 + >>> x = np.linspace(0, 25, n) + >>> y = np.round(sin(x)) + >>> sig2 = linspace(0,0.5,50) + + # As an example, this figure shows the effect of an additive noise with a + # variance of 0.2 (original signal (black), noisy signal (red) and filtered + # signal (blue dots)). + + >>> yn = y + np.sqrt(0.2)*np.random.randn(*x.shape) + >>> yr = savitzky_golay_piecewise(x, yn, kernel=11, order=4) + >>> h=plt.plot(x, yn, 'r', x, y, 'k', x, yr, 'b.') + ''' + turnpoint = 0 + last = len(xvals) + if xvals[1] > xvals[0]: # x is increasing? + for i in range(1, last): # yes + if xvals[i] < xvals[i - 1]: # search where x starts to fall + turnpoint = i + break + else: # no, x is decreasing + for i in range(1, last): # search where it starts to rise + if xvals[i] > xvals[i - 1]: + turnpoint = i + break + if turnpoint == 0: # no change in direction of x + return savitzky_golay(data, kernel, order) + else: + # smooth the first piece + firstpart = savitzky_golay(data[0:turnpoint], kernel, order) + # recursively smooth the rest + rest = savitzky_golay_piecewise( + xvals[turnpoint:], data[turnpoint:], kernel, order) + return np.concatenate((firstpart, rest)) + + +def sgolay2d(z, window_size, order, derivative=None): + """ + Savitsky - Golay filters can also be used to smooth two dimensional data + affected by noise. The algorithm is exactly the same as for the one + dimensional case, only the math is a bit more tricky. The basic algorithm + is as follow: for each point of the two dimensional matrix extract a sub + - matrix, centered at that point and with a size equal to an odd number + "window_size". for this sub - matrix compute a least - square fit of a + polynomial surface, defined as + p(x, y) = a0 + a1 * x + a2 * y + a3 * x2 + a4 * y2 + a5 * x * y + ... . + + Note that x and y are equal to zero at the central point. + replace the initial central point with the value computed with the fit. + Note that because the fit coefficients are linear with respect to the data + spacing, they can pre - computed for efficiency. Moreover, it is important + to appropriately pad the borders of the data, with a mirror image of the + data itself, so that the evaluation of the fit at the borders of the data + can happen smoothly. + Here is the code for two dimensional filtering. + + Example + ------- + # create some sample twoD data + >>> x = np.linspace(-3,3,100) + >>> y = np.linspace(-3,3,100) + >>> X, Y = np.meshgrid(x,y) + >>> Z = np.exp( -(X**2+Y**2)) + + # add noise + >>> Zn = Z + np.random.normal( 0, 0.2, Z.shape ) + + # filter it + >>> Zf = sgolay2d( Zn, window_size=29, order=4) + + # do some plotting + >>> import matplotlib.pyplot as plt + >>> h=plt.matshow(Z) + >>> h=plt.matshow(Zn) + >>> h=plt.matshow(Zf) + """ + # number of terms in the polynomial expression + n_terms = (order + 1) * (order + 2) / 2.0 + + if window_size % 2 == 0: + raise ValueError('window_size must be odd') + + if window_size ** 2 < n_terms: + raise ValueError('order is too high for the window size') + + half_size = window_size // 2 + + # exponents of the polynomial. + # p(x,y) = a0 + a1*x + a2*y + a3*x^2 + a4*y^2 + a5*x*y + ... + # this line gives a list of two item tuple. Each tuple contains + # the exponents of the k-th term. First element of tuple is for x + # second element for y. + # Ex. exps = [(0,0), (1,0), (0,1), (2,0), (1,1), (0,2), ...] + exps = [(k - n, n) for k in range(order + 1) for n in range(k + 1)] + + # coordinates of points + ind = np.arange(-half_size, half_size + 1, dtype=np.float64) + dx = np.repeat(ind, window_size) + dy = np.tile(ind, [window_size, 1]).reshape(window_size ** 2,) + + # build matrix of system of equation + A = np.empty((window_size ** 2, len(exps))) + for i, exp in enumerate(exps): + A[:, i] = (dx ** exp[0]) * (dy ** exp[1]) + + # pad input array with appropriate values at the four borders + new_shape = z.shape[0] + 2 * half_size, z.shape[1] + 2 * half_size + Z = np.zeros((new_shape)) + # top band + band = z[0, :] + Z[:half_size, half_size:-half_size] = band - \ + np.abs(np.flipud(z[1:half_size + 1, :]) - band) + # bottom band + band = z[-1, :] + Z[-half_size:, half_size:-half_size] = band + \ + np.abs(np.flipud(z[-half_size - 1:-1, :]) - band) + # left band + band = np.tile(z[:, 0].reshape(-1, 1), [1, half_size]) + Z[half_size:-half_size, :half_size] = band - \ + np.abs(np.fliplr(z[:, 1:half_size + 1]) - band) + # right band + band = np.tile(z[:, -1].reshape(-1, 1), [1, half_size]) + Z[half_size:-half_size, -half_size:] = band + \ + np.abs(np.fliplr(z[:, -half_size - 1:-1]) - band) + # central band + Z[half_size:-half_size, half_size:-half_size] = z + + # top left corner + band = z[0, 0] + Z[:half_size, :half_size] = band - \ + np.abs( + np.flipud(np.fliplr(z[1:half_size + 1, 1:half_size + 1])) - band) + # bottom right corner + band = z[-1, -1] + Z[-half_size:, -half_size:] = band + \ + np.abs(np.flipud(np.fliplr(z[-half_size - 1:-1, -half_size - 1:-1])) - + band) + + # top right corner + band = Z[half_size, -half_size:] + Z[:half_size, -half_size:] = band - \ + np.abs( + np.flipud(Z[half_size + 1:2 * half_size + 1, -half_size:]) - band) + # bottom left corner + band = Z[-half_size:, half_size].reshape(-1, 1) + Z[-half_size:, :half_size] = band - \ + np.abs( + np.fliplr(Z[-half_size:, half_size + 1:2 * half_size + 1]) - band) + + # solve system and convolve + if derivative == None: + m = np.linalg.pinv(A)[0].reshape((window_size, -1)) + return scipy.signal.fftconvolve(Z, m, mode='valid') + elif derivative == 'col': + c = np.linalg.pinv(A)[1].reshape((window_size, -1)) + return scipy.signal.fftconvolve(Z, -c, mode='valid') + elif derivative == 'row': + r = np.linalg.pinv(A)[2].reshape((window_size, -1)) + return scipy.signal.fftconvolve(Z, -r, mode='valid') + elif derivative == 'both': + c = np.linalg.pinv(A)[1].reshape((window_size, -1)) + r = np.linalg.pinv(A)[2].reshape((window_size, -1)) + return (scipy.signal.fftconvolve(Z, -r, mode='valid'), + scipy.signal.fftconvolve(Z, -c, mode='valid')) + + +class PPform(object): + + """The ppform of the piecewise polynomials + is given in terms of coefficients and breaks. + The polynomial in the ith interval is + x_{i} <= x < x_{i+1} + + S_i = sum(coefs[m,i]*(x-breaks[i])^(k-m), m=0..k) + where k is the degree of the polynomial. + + Example + ------- + >>> import matplotlib.pyplot as plt + >>> coef = np.array([[1,1]]) # unit step function + >>> coef = np.array([[1,1],[0,1]]) # linear from 0 to 2 + >>> coef = np.array([[1,1],[1,1],[0,2]]) # linear from 0 to 2 + >>> breaks = [0,1,2] + >>> self = PPform(coef, breaks) + >>> x = linspace(-1,3) + >>> h=plt.plot(x,self(x)) + """ + + def __init__(self, coeffs, breaks, fill=0.0, sort=False, a=None, b=None): + if sort: + self.breaks = np.sort(breaks) + else: + self.breaks = np.asarray(breaks) + if a is None: + a = self.breaks[0] + if b is None: + b = self.breaks[-1] + self.coeffs = np.asarray(coeffs) + self.order = self.coeffs.shape[0] + self.fill = fill + self.a = a + self.b = b + + def __call__(self, xnew): + saveshape = np.shape(xnew) + xnew = np.ravel(xnew) + res = np.empty_like(xnew) + mask = (self.a <= xnew) & (xnew <= self.b) + res[~mask] = self.fill + xx = xnew.compress(mask) + indxs = np.searchsorted(self.breaks[:-1], xx) - 1 + indxs = indxs.clip(0, len(self.breaks)) + pp = self.coeffs + dx = xx - self.breaks.take(indxs) + if True: + v = pp[0, indxs] + for i in xrange(1, self.order): + v = dx * v + pp[i, indxs] + values = v + else: + V = np.vander(dx, N=self.order) + # values = np.diag(dot(V,pp[:,indxs])) + dot = np.dot + values = np.array([dot(V[k, :], pp[:, indxs[k]]) + for k in xrange(len(xx))]) + + res[mask] = values + res.shape = saveshape + return res + + def linear_extrapolate(self, output=True): + ''' + Return 1D PPform which extrapolate linearly outside its basic interval + ''' + + max_order = 2 + + if self.order <= max_order: + if output: + return self + else: + return + breaks = self.breaks.copy() + coefs = self.coeffs.copy() + #pieces = len(breaks) - 1 + + # Add new breaks beyond each end + breaks2add = breaks[[0, -1]] + np.array([-1, 1]) + newbreaks = np.hstack([breaks2add[0], breaks, breaks2add[1]]) + + dx = newbreaks[[0, -2]] - breaks[[0, -2]] + + dx = dx.ravel() + + # Get coefficients for the new last polynomial piece (a_n) + # by just relocate the previous last polynomial and + # then set all terms of order > maxOrder to zero + + a_nn = coefs[:, -1] + dxN = dx[-1] + + a_n = pl.polyreloc(a_nn, -dxN) # Relocate last polynomial + # set to zero all terms of order > maxOrder + a_n[0:self.order - max_order] = 0 + + # Get the coefficients for the new first piece (a_1) + # by first setting all terms of order > maxOrder to zero and then + # relocate the polynomial. + + # Set to zero all terms of order > maxOrder, i.e., not using them + a_11 = coefs[self.order - max_order::, 0] + dx1 = dx[0] + + a_1 = pl.polyreloc(a_11, -dx1) # Relocate first polynomial + a_1 = np.hstack([zeros(self.order - max_order), a_1]) + + newcoefs = np.hstack([a_1.reshape(-1, 1), coefs, a_n.reshape(-1, 1)]) + if output: + return PPform(newcoefs, newbreaks, a=-inf, b=inf) + else: + self.coeffs = newcoefs + self.breaks = newbreaks + self.a = -inf + self.b = inf + + def derivative(self): + """ + Return first derivative of the piecewise polynomial + """ + + cof = pl.polyder(self.coeffs) + brks = self.breaks.copy() + return PPform(cof, brks, fill=self.fill) + + def integrate(self): + """ + Return the indefinite integral of the piecewise polynomial + """ + cof = pl.polyint(self.coeffs) + + pieces = len(self.breaks) - 1 + if 1 < pieces: + # evaluate each integrated polynomial at the right endpoint of its + # interval + xs = diff(self.breaks[:-1, ...], axis=0) + index = np.arange(pieces - 1) + + vv = xs * cof[0, index] + k = self.order + for i in xrange(1, k): + vv = xs * (vv + cof[i, index]) + + cof[-1] = np.hstack((0, vv)).cumsum() + + return PPform(cof, self.breaks, fill=self.fill) + +# def fromspline(self, xk, cvals, order, fill=0.0): +# N = len(xk) - 1 +# sivals = np.empty((order + 1, N), dtype=float) +# for m in xrange(order, -1, -1): +# fact = spec.gamma(m + 1) +# res = _fitpack._bspleval(xk[:-1], xk, cvals, order, m) +# res /= fact +# sivals[order - m, :] = res +# return self(sivals, xk, fill=fill) + + +class SmoothSpline(PPform): + + """ + Cubic Smoothing Spline. + + Parameters + ---------- + x : array-like + x-coordinates of data. (vector) + y : array-like + y-coordinates of data. (vector or matrix) + p : real scalar + smoothing parameter between 0 and 1: + 0 -> LS-straight line + 1 -> cubic spline interpolant + lin_extrap : bool + if False regular smoothing spline + if True a smoothing spline with a constraint on the ends to + ensure linear extrapolation outside the range of the data (default) + var : array-like + variance of each y(i) (default 1) + + Returns + ------- + pp : ppform + If xx is not given, return self-form of the spline. + + Given the approximate values + + y(i) = g(x(i))+e(i) + + of some smooth function, g, where e(i) is the error. SMOOTH tries to + recover g from y by constructing a function, f, which minimizes + + p * sum (Y(i) - f(X(i)))^2/d2(i) + (1-p) * int (f'')^2 + + + Example + ------- + >>> import numpy as np + >>> import matplotlib.pyplot as plt + >>> x = np.linspace(0,1) + >>> y = np.exp(x)+1e-1*np.random.randn(x.size) + >>> pp9 = SmoothSpline(x, y, p=.9) + >>> pp99 = SmoothSpline(x, y, p=.99, var=0.01) + >>> h=plt.plot(x,y, x,pp99(x),'g', x,pp9(x),'k', x,np.exp(x),'r') + + See also + -------- + lc2tr, dat2tr + + + References + ---------- + Carl de Boor (1978) + 'Practical Guide to Splines' + Springer Verlag + Uses EqXIV.6--9, self 239 + """ + + def __init__(self, xx, yy, p=None, lin_extrap=True, var=1): + coefs, brks = self._compute_coefs(xx, yy, p, var) + super(SmoothSpline, self).__init__(coefs, brks) + if lin_extrap: + self.linear_extrapolate(output=False) + + def _compute_coefs(self, xx, yy, p=None, var=1): + x, y = np.atleast_1d(xx, yy) + x = x.ravel() + dx = np.diff(x) + must_sort = (dx < 0).any() + if must_sort: + ind = x.argsort() + x = x[ind] + y = y[..., ind] + dx = np.diff(x) + + n = len(x) + + #ndy = y.ndim + szy = y.shape + + nd = prod(szy[:-1]) + ny = szy[-1] + + if n < 2: + raise ValueError('There must be >=2 data points.') + elif (dx <= 0).any(): + raise ValueError('Two consecutive values in x can not be equal.') + elif n != ny: + raise ValueError('x and y must have the same length.') + + dydx = np.diff(y) / dx + + if (n == 2): # % straight line + coefs = np.vstack([dydx.ravel(), y[0, :]]) + else: + + dx1 = 1. / dx + D = sp.spdiags(var * ones(n), 0, n, n) # The variance + + u, p = self._compute_u(p, D, dydx, dx, dx1, n) + dx1.shape = (n - 1, -1) + dx.shape = (n - 1, -1) + zrs = zeros(nd) + if p < 1: + # faster than yi-6*(1-p)*Q*u + ai = (y - (6 * (1 - p) * D * + diff(vstack([zrs, + diff(vstack([zrs, u, zrs]), axis=0) * dx1, + zrs]), axis=0)).T).T + else: + ai = y.reshape(n, -1) + + # The piecewise polynominals are written as + # fi=ai+bi*(x-xi)+ci*(x-xi)^2+di*(x-xi)^3 + # where the derivatives in the knots according to Carl de Boor are: + # ddfi = 6*p*[0;u] = 2*ci; + # dddfi = 2*diff([ci;0])./dx = 6*di; + # dfi = diff(ai)./dx-(ci+di.*dx).*dx = bi; + + ci = np.vstack([zrs, 3 * p * u]) + di = (diff(vstack([ci, zrs]), axis=0) * dx1 / 3) + bi = (diff(ai, axis=0) * dx1 - (ci + di * dx) * dx) + ai = ai[:n - 1, ...] + if nd > 1: + di = di.T + ci = ci.T + ai = ai.T + if not any(di): + if not any(ci): + coefs = vstack([bi.ravel(), ai.ravel()]) + else: + coefs = vstack([ci.ravel(), bi.ravel(), ai.ravel()]) + else: + coefs = vstack( + [di.ravel(), ci.ravel(), bi.ravel(), ai.ravel()]) + + return coefs, x + + def _compute_u(self, p, D, dydx, dx, dx1, n): + if p is None or p != 0: + data = [dx[1:n - 1], 2 * (dx[:n - 2] + dx[1:n - 1]), dx[:n - 2]] + R = sp.spdiags(data, [-1, 0, 1], n - 2, n - 2) + + if p is None or p < 1: + Q = sp.spdiags( + [dx1[:n - 2], -(dx1[:n - 2] + dx1[1:n - 1]), dx1[1:n - 1]], + [0, -1, -2], n, n - 2) + QDQ = (Q.T * D * Q) + if p is None or p < 0: + # Estimate p + p = 1. / \ + (1. + QDQ.diagonal().sum() / + (100. * R.diagonal().sum() ** 2)) + + if p == 0: + QQ = 6 * QDQ + else: + QQ = (6 * (1 - p)) * (QDQ) + p * R + else: + QQ = R + + # Make sure it uses symmetric matrix solver + ddydx = diff(dydx, axis=0) + sp.linalg.use_solver(useUmfpack=True) + u = 2 * sp.linalg.spsolve((QQ + QQ.T), ddydx) + return u.reshape(n - 2, -1), p + + +def _edge_case(m0, d1): + return np.where((d1 == 0) | (m0 == 0), 0.0, 1.0 / (1.0 / m0 + 1.0 / d1)) + + +def pchip_slopes(x, y): + # Determine the derivatives at the points y_k, d_k, by using + # PCHIP algorithm is: + # We choose the derivatives at the point x_k by + # Let m_k be the slope of the kth segment (between k and k+1) + # If m_k=0 or m_{k-1}=0 or sgn(m_k) != sgn(m_{k-1}) then d_k == 0 + # else use weighted harmonic mean: + # w_1 = 2h_k + h_{k-1}, w_2 = h_k + 2h_{k-1} + # 1/d_k = 1/(w_1 + w_2)*(w_1 / m_k + w_2 / m_{k-1}) + # where h_k is the spacing between x_k and x_{k+1} + + hk = x[1:] - x[:-1] + mk = (y[1:] - y[:-1]) / hk + smk = np.sign(mk) + condition = ((smk[1:] != smk[:-1]) | (mk[1:] == 0) | (mk[:-1] == 0)) + + w1 = 2 * hk[1:] + hk[:-1] + w2 = hk[1:] + 2 * hk[:-1] + whmean = 1.0 / (w1 + w2) * (w1 / mk[1:] + w2 / mk[:-1]) + + dk = np.zeros_like(y) + dk[1:-1][condition] = 0.0 + dk[1:-1][~condition] = 1.0 / whmean[~condition] + + # For end-points choose d_0 so that 1/d_0 = 1/m_0 + 1/d_1 unless + # one of d_1 or m_0 is 0, then choose d_0 = 0 + + dk[0] = _edge_case(mk[0], dk[1]) + dk[-1] = _edge_case(mk[-1], dk[-2]) + return dk + + +def slopes(x, y, method='parabola', tension=0, monotone=False): + ''' + Return estimated slopes y'(x) + + Parameters + ---------- + x, y : array-like + array containing the x- and y-data, respectively. + x must be sorted low to high... (no repeats) while + y can have repeated values. + method : string + defining method of estimation for yp. Valid options are: + 'Catmull-Rom' yp = (y[k+1]-y[k-1])/(x[k+1]-x[k-1]) + 'Cardinal' yp = (1-tension) * (y[k+1]-y[k-1])/(x[k+1]-x[k-1]) + 'parabola' + 'secant' average secants + yp = 0.5*((y[k+1]-y[k])/(x[k+1]-x[k]) + (y[k]-y[k-1])/(x[k]-x[k-1])) + tension : real scalar between 0 and 1. + tension parameter used in Cardinal method + monotone : bool + If True modifies yp to preserve monoticity + + Returns + ------- + yp : ndarray + estimated slope + + References: + ----------- + Wikipedia: Monotone cubic interpolation + Cubic Hermite spline + + ''' + x = np.asarray(x, np.float_) + y = np.asarray(y, np.float_) + yp = np.zeros(y.shape, np.float_) + + dx = x[1:] - x[:-1] + # Compute the slopes of the secant lines between successive points + dydx = (y[1:] - y[:-1]) / dx + + method = method.lower() + if method.startswith('p'): # parabola'): + yp[1:-1] = (dydx[:-1] * dx[1:] + dydx[1:] * dx[:-1]) / \ + (dx[1:] + dx[:-1]) + yp[0] = 2.0 * dydx[0] - yp[1] + yp[-1] = 2.0 * dydx[-1] - yp[-2] + else: + # At the endpoints - use one-sided differences + yp[0] = dydx[0] + yp[-1] = dydx[-1] + if method.startswith('s'): # secant'): + # In the middle - use the average of the secants + yp[1:-1] = (dydx[:-1] + dydx[1:]) / 2.0 + else: # Cardinal or Catmull-Rom method + yp[1:-1] = (y[2:] - y[:-2]) / (x[2:] - x[:-2]) + if method.startswith('car'): # cardinal'): + yp = (1 - tension) * yp + + if monotone: + # Special case: intervals where y[k] == y[k+1] + # Setting these slopes to zero guarantees the spline connecting + # these points will be flat which preserves monotonicity + ii, = (dydx == 0.0).nonzero() + yp[ii] = 0.0 + yp[ii + 1] = 0.0 + + alpha = yp[:-1] / dydx + beta = yp[1:] / dydx + dist = alpha ** 2 + beta ** 2 + tau = 3.0 / np.sqrt(dist) + + # To prevent overshoot or undershoot, restrict the position vector + # (alpha, beta) to a circle of radius 3. If (alpha**2 + beta**2)>9, + # then set m[k] = tau[k]alpha[k]delta[k] and + # m[k+1] = tau[k]beta[b]delta[k] + # where tau = 3/sqrt(alpha**2 + beta**2). + + # Find the indices that need adjustment + indices_to_fix, = (dist > 9.0).nonzero() + for ii in indices_to_fix: + yp[ii] = tau[ii] * alpha[ii] * dydx[ii] + yp[ii + 1] = tau[ii] * beta[ii] * dydx[ii] + + return yp + + +def stineman_interp(xi, x, y, yp=None): + """ + Given data vectors *x* and *y*, the slope vector *yp* and a new + abscissa vector *xi*, the function :func:`stineman_interp` uses + Stineman interpolation to calculate a vector *yi* corresponding to + *xi*. + + Here's an example that generates a coarse sine curve, then + interpolates over a finer abscissa:: + + x = linspace(0,2*pi,20); y = sin(x); yp = cos(x) + xi = linspace(0,2*pi,40); + yi = stineman_interp(xi,x,y,yp); + plot(x,y,'o',xi,yi) + + The interpolation method is described in the article A + CONSISTENTLY WELL BEHAVED METHOD OF INTERPOLATION by Russell + W. Stineman. The article appeared in the July 1980 issue of + Creative Computing with a note from the editor stating that while + they were: + + not an academic journal but once in a while something serious + and original comes in adding that this was + "apparently a real solution" to a well known problem. + + For *yp* = *None*, the routine automatically determines the slopes + using the :func:`slopes` routine. + + *x* is assumed to be sorted in increasing order. + + For values ``xi[j] < x[0]`` or ``xi[j] > x[-1]``, the routine + tries an extrapolation. The relevance of the data obtained from + this, of course, is questionable... + + Original implementation by Halldor Bjornsson, Icelandic + Meteorolocial Office, March 2006 halldor at vedur.is + + Completely reworked and optimized for Python by Norbert Nemec, + Institute of Theoretical Physics, University or Regensburg, April + 2006 Norbert.Nemec at physik.uni-regensburg.de + """ + + # Cast key variables as float. + x = np.asarray(x, np.float_) + y = np.asarray(y, np.float_) + assert x.shape == y.shape + #N = len(y) + + if yp is None: + yp = slopes(x, y) + else: + yp = np.asarray(yp, np.float_) + + xi = np.asarray(xi, np.float_) + #yi = np.zeros(xi.shape, np.float_) + + # calculate linear slopes + dx = x[1:] - x[:-1] + dy = y[1:] - y[:-1] + s = dy / dx # note length of s is N-1 so last element is #N-2 + + # find the segment each xi is in + # this line actually is the key to the efficiency of this implementation + idx = np.searchsorted(x[1:-1], xi) + + # now we have generally: x[idx[j]] <= xi[j] <= x[idx[j]+1] + # except at the boundaries, where it may be that xi[j] < x[0] or xi[j] > + # x[-1] + + # the y-values that would come out from a linear interpolation: + sidx = s.take(idx) + xidx = x.take(idx) + yidx = y.take(idx) + xidxp1 = x.take(idx + 1) + yo = yidx + sidx * (xi - xidx) + + # the difference that comes when using the slopes given in yp + # using the yp slope of the left point + dy1 = (yp.take(idx) - sidx) * (xi - xidx) + # using the yp slope of the right point + dy2 = (yp.take(idx + 1) - sidx) * (xi - xidxp1) + + dy1dy2 = dy1 * dy2 + # The following is optimized for Python. The solution actually + # does more calculations than necessary but exploiting the power + # of numpy, this is far more efficient than coding a loop by hand + # in Python + dy1mdy2 = np.where(dy1dy2, dy1 - dy2, np.inf) + dy1pdy2 = np.where(dy1dy2, dy1 + dy2, np.inf) + yi = yo + dy1dy2 * np.choose( + np.array(np.sign(dy1dy2), np.int32) + 1, + ((2 * xi - xidx - xidxp1) / ((dy1mdy2) * (xidxp1 - xidx)), 0.0, + 1 / (dy1pdy2))) + return yi + + +class StinemanInterp(object): + + ''' + Returns an interpolating function + that runs through a set of points according to the algorithm of + Stineman (1980). + + Parameters + ---------- + x,y : array-like + coordinates of points defining the interpolating function. + yp : array-like + slopes of the interpolating function at x. + Optional: only given if they are known, else the argument is not used. + method : string + method for computing the slope at the given points if the slope is not + known. With method= "parabola" calculates the slopes from a parabola + through every three points. + + Notes + ----- + The interpolation method is described by Russell W. Stineman (1980) + + According to Stineman, the interpolation procedure has "the following + properties: + + If values of the ordinates of the specified points change monotonically, + and the slopes of the line segments joining the points change + monotonically, then the interpolating curve and its slope will change + monotonically. If the slopes of the line segments joining the specified + points change monotonically, then the slopes of the interpolating curve + will change monotonically. Suppose that the conditions in (1) or (2) are + satisfied by a set of points, but a small change in the ordinate or slope + at one of the points will result conditions(1) or (2) being not longer + satisfied. Then making this small change in the ordinate or slope at a + point will cause no more than a small change in the interpolating + curve." The method is based on rational interpolation with specially chosen + rational functions to satisfy the above three conditions. + + Slopes computed at the given points with the methods provided by the + `StinemanInterp' function satisfy Stineman's requirements. + The original method suggested by Stineman(method="scaledstineman", the + default, and "stineman") result in lower slopes near abrupt steps or spikes + in the point sequence, and therefore a smaller tendency for overshooting. + The method based on a second degree polynomial(method="parabola") provides + better approximation to smooth functions, but it results in in higher + slopes near abrupt steps or spikes and can lead to some overshooting where + Stineman's method does not. Both methods lead to much less tendency for + `spurious' oscillations than traditional interplation methods based on + polynomials, such as splines + (see the examples section). + + Stineman states that "The complete assurance that the procedure will never + generate `wild' points makes it attractive as a general purpose procedure". + + This interpolation method has been implemented in Matlab and R in addition + to Python. + + Examples + -------- + >>> import wafo.interpolate as wi + >>> import numpy as np + >>> import matplotlib.pyplot as plt + >>> x = np.linspace(0,2*pi,20) + >>> y = np.sin(x); yp = np.cos(x) + >>> xi = np.linspace(0,2*pi,40); + >>> yi = wi.StinemanInterp(x,y)(xi) + >>> yi1 = wi.CubicHermiteSpline(x,y, yp)(xi) + >>> yi2 = wi.Pchip(x,y, method='parabola')(xi) + >>> h=plt.subplot(211) + >>> h=plt.plot(x,y,'o',xi,yi,'r', xi,yi1, 'g', xi,yi1, 'b') + >>> h=plt.subplot(212) + >>> h=plt.plot(xi,np.abs(sin(xi)-yi), 'r', + ... xi, np.abs(sin(xi)-yi1), 'g', + ... xi, np.abs(sin(xi)-yi2), 'b') + + References + ---------- + Stineman, R. W. A Consistently Well Behaved Method of Interpolation. + Creative Computing (1980), volume 6, number 7, p. 54-57. + + See Also + -------- + slopes, Pchip + ''' + def __init__(self, x, y, yp=None, method='parabola', monotone=False): + if yp is None: + yp = slopes(x, y, method, monotone) + self.x = np.asarray(x, np.float_) + self.y = np.asarray(y, np.float_) + self.yp = np.asarray(yp, np.float_) + + def __call__(self, xi): + xi = np.asarray(xi, np.float_) + x = self.x + y = self.y + yp = self.yp + # calculate linear slopes + dx = x[1:] - x[:-1] + dy = y[1:] - y[:-1] + s = dy / dx # note length of s is N-1 so last element is #N-2 + + # find the segment each xi is in + # this line actually is the key to the efficiency of this + # implementation + idx = np.searchsorted(x[1:-1], xi) + + # now we have generally: x[idx[j]] <= xi[j] <= x[idx[j]+1] + # except at the boundaries, where it may be that xi[j] < x[0] or xi[j] + # > x[-1] + + # the y-values that would come out from a linear interpolation: + sidx = s.take(idx) + xidx = x.take(idx) + yidx = y.take(idx) + xidxp1 = x.take(idx + 1) + yo = yidx + sidx * (xi - xidx) + + # the difference that comes when using the slopes given in yp + # using the yp slope of the left point + dy1 = (yp.take(idx) - sidx) * (xi - xidx) + # using the yp slope of the right point + dy2 = (yp.take(idx + 1) - sidx) * (xi - xidxp1) + + dy1dy2 = dy1 * dy2 + # The following is optimized for Python. The solution actually + # does more calculations than necessary but exploiting the power + # of numpy, this is far more efficient than coding a loop by hand + # in Python + dy1mdy2 = np.where(dy1dy2, dy1 - dy2, np.inf) + dy1pdy2 = np.where(dy1dy2, dy1 + dy2, np.inf) + yi = yo + dy1dy2 * np.choose( + np.array(np.sign(dy1dy2), np.int32) + 1, + ((2 * xi - xidx - xidxp1) / ((dy1mdy2) * (xidxp1 - xidx)), 0.0, + 1 / (dy1pdy2))) + return yi + + +class StinemanInterp2(PiecewisePolynomial): + + def __init__(self, x, y, yp=None, method='parabola', monotone=False): + if yp is None: + yp = slopes(x, y, method, monotone=monotone) + super(StinemanInterp2, self).__init__(x, zip(y, yp)) + + +class CubicHermiteSpline(PiecewisePolynomial): + + ''' + Piecewise Cubic Hermite Interpolation using Catmull-Rom + method for computing the slopes. + ''' + + def __init__(self, x, y, yp=None, method='Catmull-Rom'): + if yp is None: + yp = slopes(x, y, method, monotone=False) + super(CubicHermiteSpline, self).__init__(x, zip(y, yp), orders=3) + + +class Pchip(PiecewisePolynomial): + + """PCHIP 1-d monotonic cubic interpolation + + Description + ----------- + x and y are arrays of values used to approximate some function f: + y = f(x) + This class factory function returns a callable class whose __call__ method + uses monotonic cubic, interpolation to find the value of new points. + + Parameters + ---------- + x : array + A 1D array of monotonically increasing real values. x cannot + include duplicate values (otherwise f is overspecified) + y : array + A 1-D array of real values. y's length along the interpolation + axis must be equal to the length of x. + yp : array + slopes of the interpolating function at x. + Optional: only given if they are known, else the argument is not used. + method : string + method for computing the slope at the given points if the slope is not + known. With method="parabola" calculates the slopes from a parabola + through every three points. + + Assumes x is sorted in monotonic order (e.g. x[1] > x[0]) + + Example + ------- + >>> import wafo.interpolate as wi + + # Create a step function (will demonstrate monotonicity) + >>> x = np.arange(7.0) - 3.0 + >>> y = np.array([-1.0, -1,-1,0,1,1,1]) + + # Interpolate using monotonic piecewise Hermite cubic spline + >>> xvec = np.arange(599.)/100. - 3.0 + >>> yvec = wi.Pchip(x, y)(xvec) + + # Call the Scipy cubic spline interpolator + >>> from scipy.interpolate import interpolate + >>> function = interpolate.interp1d(x, y, kind='cubic') + >>> yvec1 = function(xvec) + + # Non-montonic cubic Hermite spline interpolator using + # Catmul-Rom method for computing slopes... + >>> yvec2 = wi.CubicHermiteSpline(x,y)(xvec) + + >>> yvec3 = wi.StinemanInterp(x, y)(xvec) + + # Plot the results + >>> import matplotlib.pyplot as plt + >>> h=plt.plot(x, y, 'ro') + >>> h=plt.plot(xvec, yvec, 'b') + >>> h=plt.plot(xvec, yvec1, 'k') + >>> h=plt.plot(xvec, yvec2, 'g') + >>> h=plt.plot(xvec, yvec3, 'm') + >>> h=plt.title("pchip() step function test") + + >>> h=plt.xlabel("X") + >>> h=plt.ylabel("Y") + >>> h=plt.title("Comparing pypchip() vs. Scipy interp1d() vs. non-monotonic CHS") + >>> legends = ["Data", "pypchip()", "interp1d","CHS", 'SI'] + >>> h=plt.legend(legends, loc="upper left") + >>> plt.show() + + """ + + def __init__(self, x, y, yp=None, method='secant'): + if yp is None: + yp = slopes(x, y, method=method, monotone=True) + super(Pchip, self).__init__(x, zip(y, yp), orders=3) + + +def test_smoothing_spline(): + x = linspace(0, 2 * pi + pi / 4, 20) + y = sin(x) # + np.random.randn(x.size) + pp = SmoothSpline(x, y, p=1) + x1 = linspace(-1, 2 * pi + pi / 4 + 1, 20) + y1 = pp(x1) + pp1 = pp.derivative() + pp0 = pp1.integrate() + dy1 = pp1(x1) + y01 = pp0(x1) + #dy = y-y1 + import matplotlib.pyplot as plb + + plb.plot(x, y, x1, y1, '.', x1, dy1, 'ro', x1, y01, 'r-') + plb.show() + pass + #tck = interpolate.splrep(x, y, s=len(x)) + + +def compare_methods(): + # + # Sine wave test + # + fun = np.sin + # Create a example vector containing a sine wave. + x = np.arange(30.0) / 10. + y = fun(x) + + # Interpolate the data above to the grid defined by "xvec" + xvec = np.arange(250.) / 100. + + # Initialize the interpolator slopes + # Create the pchip slopes + m = slopes(x, y, method='parabola', monotone=True) + m1 = slopes(x, y, method='parabola', monotone=False) + m2 = slopes(x, y, method='catmul', monotone=False) + m3 = pchip_slopes(x, y) + + # Call the monotonic piece-wise Hermite cubic interpolator + yvec = Pchip(x, y, m)(xvec) + yvec1 = Pchip(x, y, m1)(xvec) + yvec2 = Pchip(x, y, m2)(xvec) + yvec3 = Pchip(x, y, m3)(xvec) + + import matplotlib.pyplot as plt + + plt.figure() + plt.plot(x, y, 'ro', xvec, fun(xvec), 'r') + plt.title("pchip() Sin test code") + + # Plot the interpolated points + plt.plot(xvec, yvec, xvec, yvec1, xvec, yvec2, 'g.', xvec, yvec3) + plt.legend( + ['true', 'true', 'parbola_monoton', 'parabola', 'catmul', 'pchip'], + frameon=False, loc=0) + plt.ioff() + plt.show() + + +def demo_monoticity(): + # Step function test... + import matplotlib.pyplot as plt + plt.figure(2) + plt.title("pchip() step function test") + # Create a step function (will demonstrate monotonicity) + x = np.arange(7.0) - 3.0 + y = np.array([-1.0, -1, -1, 0, 1, 1, 1]) + + # Interpolate using monotonic piecewise Hermite cubic spline + xvec = np.arange(599.) / 100. - 3.0 + + # Create the pchip slopes + m = slopes(x, y, monotone=True) +# m1 = slopes(x, y, monotone=False) +# m2 = slopes(x,y,method='catmul',monotone=False) + m3 = pchip_slopes(x, y) + # Interpolate... + yvec = Pchip(x, y, m)(xvec) + + # Call the Scipy cubic spline interpolator + from scipy.interpolate import interpolate as ip + function = ip.interp1d(x, y, kind='cubic') + yvec2 = function(xvec) + + # Non-montonic cubic Hermite spline interpolator using + # Catmul-Rom method for computing slopes... + yvec3 = CubicHermiteSpline(x, y)(xvec) + yvec4 = StinemanInterp(x, y)(xvec) + yvec5 = Pchip(x, y, m3)(xvec) # @UnusedVariable + + # Plot the results + plt.plot(x, y, 'ro', label='Data') + plt.plot(xvec, yvec, 'b', label='Pchip') + plt.plot(xvec, yvec2, 'k', label='interp1d') + plt.plot(xvec, yvec3, 'g', label='CHS') + plt.plot(xvec, yvec4, 'm', label='Stineman') + #plt.plot(xvec, yvec5, 'yo', label='Pchip2') + plt.xlabel("X") + plt.ylabel("Y") + plt.title("Comparing Pchip() vs. Scipy interp1d() vs. non-monotonic CHS") +# legends = ["Data", "Pchip()", "interp1d","CHS", 'Stineman'] + plt.legend(loc="upper left", frameon=False) + plt.ioff() + plt.show() + + +def test_func(): + from scipy import interpolate + import matplotlib.pyplot as plt + import matplotlib + matplotlib.interactive(False) + + coef = np.array([[1, 1], [0, 1]]) # linear from 0 to 2 + # coef = np.array([[1,1],[1,1],[0,2]]) # linear from 0 to 2 + breaks = [0, 1, 2] + pp = PPform(coef, breaks, a=-100, b=100) + x = linspace(-1, 3, 20) + y = pp(x) # @UnusedVariable + + x = linspace(0, 2 * pi + pi / 4, 20) + y = sin(x) + np.random.randn(x.size) + tck = interpolate.splrep(x, y, s=len(x)) # @UndefinedVariable + xnew = linspace(0, 2 * pi, 100) + ynew = interpolate.splev(xnew, tck, der=0) # @UndefinedVariable + tck0 = interpolate.splmake( # @UndefinedVariable + xnew, ynew, order=3, kind='smoothest', conds=None) + pp = interpolate.ppform.fromspline(*tck0) # @UndefinedVariable + + plt.plot(x, y, "x", xnew, ynew, xnew, sin(xnew), x, y, "b", x, pp(x), 'g') + plt.legend(['Linear', 'Cubic Spline', 'True']) + plt.title('Cubic-spline interpolation') + plt.show() + + t = np.arange(0, 1.1, .1) + x = np.sin(2 * np.pi * t) + y = np.cos(2 * np.pi * t) + _tck1, _u = interpolate.splprep([t, y], s=0) # @UndefinedVariable + tck2 = interpolate.splrep(t, y, s=len(t), task=0) # @UndefinedVariable + # interpolate.spl + tck = interpolate.splmake(t, y, order=3, kind='smoothest', conds=None) # @UndefinedVariable + self = interpolate.ppform.fromspline(*tck2) # @UndefinedVariable + plt.plot(t, self(t)) + plt.show() + pass + + +def test_pp(): + coef = np.array([[1, 1], [0, 0]]) # linear from 0 to 2 @UnusedVariable + + # quadratic from 0 to 1 and 1 to 2. + coef = np.array([[1, 1], [1, 1], [0, 2]]) + dc = pl.polyder(coef, 1) + c2 = pl.polyint(dc, 1) # @UnusedVariable + breaks = [0, 1, 2] + pp = PPform(coef, breaks) + pp(0.5) + pp(1) + pp(1.5) + dpp = pp.derivative() + import pylab as plb + x = plb.linspace(-1, 3) + plb.plot(x, pp(x), x, dpp(x), '.') + plb.show() + + +def test_docstrings(): + import doctest + doctest.testmod() + + +if __name__ == '__main__': + test_func() + # test_doctstrings() + # test_smoothing_spline() + # compare_methods() + #demo_monoticity() diff --git a/pywafo/src/wafo/kdetools.py b/pywafo/src/wafo/kdetools.py index 4e78f62..1c358fc 100644 --- a/pywafo/src/wafo/kdetools.py +++ b/pywafo/src/wafo/kdetools.py @@ -1,4293 +1,4507 @@ -#------------------------------------------------------------------------------- -# Name: kdetools -# Purpose: -# -# Author: pab -# -# Created: 01.11.2008 -# Copyright: (c) pab 2008 -# Licence: LGPL -#------------------------------------------------------------------------------- -#!/usr/bin/env python -from __future__ import division -import copy -import numpy as np -import scipy -import warnings -from itertools import product -from scipy import interpolate, linalg, optimize, sparse, special, stats -from scipy.special import gamma -from scipy.ndimage.morphology import distance_transform_edt -from numpy import pi, sqrt, atleast_2d, exp, newaxis #@UnresolvedImport - -from wafo.misc import meshgrid, nextpow2, tranproc #, trangood -from wafo.wafodata import PlotData -from wafo.dctpack import dct, dctn, idctn -from wafo.plotbackend import plotbackend as plt -try: - from wafo import fig -except ImportError: - print 'fig import only supported on Windows' - -_TINY = np.finfo(float).machar.tiny - -def _invnorm(q): - return special.ndtri(q) - -_stats_epan = (1. / 5, 3. / 5, np.inf) -_stats_biwe = (1. / 7, 5. / 7, 45. / 2) -_stats_triw = (1. / 9, 350. / 429, np.inf) -_stats_rect = (1. / 3, 1. / 2, np.inf) -_stats_tria = (1. / 6, 2. / 3, np.inf) -_stats_lapl = (2, 1. / 4, np.inf) -_stats_logi = (pi ** 2 / 3, 1. / 6, 1 / 42) -_stats_gaus = (1, 1. / (2 * sqrt(pi)), 3. / (8 * sqrt(pi))) - -__all__ = ['sphere_volume', 'TKDE', 'KDE', 'Kernel', 'accum', 'qlevels', - 'iqrange', 'gridcount', 'kde_demo1', 'kde_demo2', 'test_docstrings'] -def sphere_volume(d, r=1.0): - """ - Returns volume of d-dimensional sphere with radius r - - Parameters - ---------- - d : scalar or array_like - dimension of sphere - r : scalar or array_like - radius of sphere (default 1) - - Example - ------- - >>> sphere_volume(2., r=2.) - 12.566370614359172 - >>> sphere_volume(2., r=1.) - 3.1415926535897931 - - Reference - --------- - Wand,M.P. and Jones, M.C. (1995) - 'Kernel smoothing' - Chapman and Hall, pp 105 - """ - return (r ** d) * 2.0 * pi ** (d / 2.0) / (d * gamma(d / 2.0)) - -class KDEgauss(object): - """ Kernel-Density Estimator base class. - - Parameters - ---------- - data : (# of dims, # of data)-array - datapoints to estimate from - hs : array-like (optional) - smooting parameter vector/matrix. - (default compute from data using kernel.get_smoothing function) - alpha : real scalar (optional) - sensitivity parameter (default 0 regular KDE) - A good choice might be alpha = 0.5 ( or 1/D) - alpha = 0 Regular KDE (hs is constant) - 0 < alpha <= 1 Adaptive KDE (Make hs change) - - - Members - ------- - d : int - number of dimensions - n : int - number of datapoints - - Methods - ------- - kde.eval_grid_fast(x0, x1,..., xd) : array - evaluate the estimated pdf on meshgrid(x0, x1,..., xd) - kde(x0, x1,..., xd) : array - same as kde.eval_grid_fast(x0, x1,..., xd) - """ - - def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, xmax=None, inc=512): - self.dataset = atleast_2d(data) - self.hs = hs - self.kernel = kernel if kernel else Kernel('gauss') - self.alpha = alpha - self.xmin = xmin - self.xmax = xmax - self.inc = inc - self.initialize() - - def initialize(self): - self.d, self.n = self.dataset.shape - self._set_xlimits() - self._initialize() - - def _initialize(self): - self._compute_smoothing() - - def _compute_smoothing(self): - """Computes the smoothing matrix - """ - get_smoothing = self.kernel.get_smoothing - h = self.hs - if h is None: - h = get_smoothing(self.dataset) - h = np.atleast_1d(h) - hsiz = h.shape - - if (len(hsiz) == 1) or (self.d == 1): - if max(hsiz) == 1: - h = h * np.ones(self.d) - else: - h.shape = (self.d,) # make sure it has the correct dimension - - # If h negative calculate automatic values - ind, = np.where(h <= 0) - for i in ind.tolist(): # - h[i] = get_smoothing(self.dataset[i]) - deth = h.prod() - self.inv_hs = np.diag(1.0 / h) - else: #fully general smoothing matrix - deth = linalg.det(h) - if deth <= 0: - raise ValueError('bandwidth matrix h must be positive definit!') - self.inv_hs = linalg.inv(h) - self.hs = h - self._norm_factor = deth * self.n - - def _set_xlimits(self): - amin = self.dataset.min(axis= -1) - amax = self.dataset.max(axis= -1) - iqr = iqrange(self.dataset, axis= -1) - sigma = np.minimum(np.std(self.dataset, axis= -1, ddof=1), iqr / 1.34) - #xyzrange = amax - amin - #offset = xyzrange / 4.0 - offset = 2 * sigma - if self.xmin is None: - self.xmin = amin - offset - else: - self.xmin = self.xmin * np.ones((self.d,1)) - if self.xmax is None: - self.xmax = amax + offset - else: - self.xmax = self.xmax * np.ones((self.d,1)) - - def eval_grid_fast(self, *args, **kwds): - """Evaluate the estimated pdf on a grid. - - Parameters - ---------- - arg_0,arg_1,... arg_d-1 : vectors - Alternatively, if no vectors is passed in then - arg_i = linspace(self.xmin[i], self.xmax[i], self.inc) - output : string optional - 'value' if value output - 'data' if object output - - Returns - ------- - values : array-like - The values evaluated at meshgrid(*args). - - """ - if len(args) == 0: - args = [] - for i in range(self.d): - args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc)) - self.args = args - return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds) - - def _eval_grid_fast(self, *args, **kwds): - X = np.vstack(args) - d, inc = X.shape - #dx = X[:, 1] - X[:, 0] - R = X.max(axis=-1)- X.min(axis=-1) - - t_star = (self.hs/R)**2 - I = (np.asfarray(np.arange(0, inc))*pi)**2 - In = [] - - for i in range(d): - In.append(I * t_star[i] * 0.5) - - Inc = meshgrid(*In) if d > 1 else In - - kw = np.zeros((inc,)*d) - for i in range(d): - kw += exp(-Inc[i]) - y = kwds.get('y', 1.0) - d, n = self.dataset.shape - # Find the binned kernel weights, c. - c = gridcount(self.dataset, X, y=y)/n - # Perform the convolution. - at = dctn(c) * kw - z = idctn(at)*at.size/np.prod(R) - return z*(z>0.0) - - def _eval_grid_fun(self, eval_grd, *args, **kwds): - output = kwds.pop('output', 'value') - f = eval_grd(*args, **kwds) - if output == 'value': - return f - else: - titlestr = 'Kernel density estimate (%s)' % self.kernel.name - kwds2 = dict(title=titlestr) - kwds2['plot_kwds'] = dict(plotflag=1) - kwds2.update(**kwds) - args = self.args - if self.d == 1: - args = args[0] - wdata = PlotData(f, args, **kwds2) - if self.d > 1: - PL = np.r_[10:90:20, 95, 99, 99.9] - try: - ql = qlevels(f, p=PL) - wdata.clevels = ql - wdata.plevels = PL - except: - pass - return wdata - - def _check_shape(self, points): - points = atleast_2d(points) - d, m = points.shape - if d != self.d: - if d == 1 and m == self.d: - # points was passed in as a row vector - points = np.reshape(points, (self.d, 1)) - else: - msg = "points have dimension %s, dataset has dimension %s" % (d, - self.d) - raise ValueError(msg) - return points - def eval_points(self, points, **kwds): - """Evaluate the estimated pdf on a set of points. - - Parameters - ---------- - points : (# of dimensions, # of points)-array - Alternatively, a (# of dimensions,) vector can be passed in and - treated as a single point. - - Returns - ------- - values : (# of points,)-array - The values at each point. - - Raises - ------ - ValueError if the dimensionality of the input points is different than - the dimensionality of the KDE. - """ - - points = self._check_shape(points) - return self._eval_points(points, **kwds) - - def _eval_points(self, points, **kwds): - pass - - __call__ = eval_grid_fast -class _KDE(object): - """ Kernel-Density Estimator base class. - - Parameters - ---------- - data : (# of dims, # of data)-array - datapoints to estimate from - hs : array-like (optional) - smooting parameter vector/matrix. - (default compute from data using kernel.get_smoothing function) - kernel : kernel function object. - kernel must have get_smoothing method - alpha : real scalar (optional) - sensitivity parameter (default 0 regular KDE) - A good choice might be alpha = 0.5 ( or 1/D) - alpha = 0 Regular KDE (hs is constant) - 0 < alpha <= 1 Adaptive KDE (Make hs change) - - - Members - ------- - d : int - number of dimensions - n : int - number of datapoints - - Methods - ------- - kde.eval_grid_fast(x0, x1,..., xd) : array - evaluate the estimated pdf on meshgrid(x0, x1,..., xd) - kde.eval_grid(x0, x1,..., xd) : array - evaluate the estimated pdf on meshgrid(x0, x1,..., xd) - kde.eval_points(points) : array - evaluate the estimated pdf on a provided set of points - kde(x0, x1,..., xd) : array - same as kde.eval_grid(x0, x1,..., xd) - """ - - def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, xmax=None, inc=512): - self.dataset = atleast_2d(data) - self.hs = hs - self.kernel = kernel if kernel else Kernel('gauss') - self.alpha = alpha - self.xmin = xmin - self.xmax = xmax - self.inc = inc - self.initialize() - - def initialize(self): - self.d, self.n = self.dataset.shape - if self.n>1: - self._set_xlimits() - self._initialize() - - def _initialize(self): - pass - - def _set_xlimits(self): - amin = self.dataset.min(axis= -1) - amax = self.dataset.max(axis= -1) - iqr = iqrange(self.dataset, axis= -1) - self._sigma = np.minimum(np.std(self.dataset, axis= -1, ddof=1), iqr / 1.34) - #xyzrange = amax - amin - #offset = xyzrange / 4.0 - offset = self._sigma - if self.xmin is None: - self.xmin = amin - offset - else: - self.xmin = self.xmin * np.ones((self.d,1)) - if self.xmax is None: - self.xmax = amax + offset - else: - self.xmax = self.xmax * np.ones((self.d,1)) - - - - def eval_grid_fast(self, *args, **kwds): - """Evaluate the estimated pdf on a grid. - - Parameters - ---------- - arg_0,arg_1,... arg_d-1 : vectors - Alternatively, if no vectors is passed in then - arg_i = linspace(self.xmin[i], self.xmax[i], self.inc) - output : string optional - 'value' if value output - 'data' if object output - - Returns - ------- - values : array-like - The values evaluated at meshgrid(*args). - - """ - if len(args) == 0: - args = [] - for i in range(self.d): - args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc)) - self.args = args - return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds) - - def _eval_grid_fast(self, *args, **kwds): - pass - - def eval_grid(self, *args, **kwds): - """Evaluate the estimated pdf on a grid. - - Parameters - ---------- - arg_0,arg_1,... arg_d-1 : vectors - Alternatively, if no vectors is passed in then - arg_i = linspace(self.xmin[i], self.xmax[i], self.inc) - output : string optional - 'value' if value output - 'data' if object output - - Returns - ------- - values : array-like - The values evaluated at meshgrid(*args). - - """ - if len(args) == 0: - args = [] - for i in range(self.d): - args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc)) - self.args = args - return self._eval_grid_fun(self._eval_grid, *args, **kwds) - def _eval_grid(self, *args): - pass - def _eval_grid_fun(self, eval_grd, *args, **kwds): - output = kwds.pop('output', 'value') - f = eval_grd(*args, **kwds) - if output == 'value': - return f - else: - titlestr = 'Kernel density estimate (%s)' % self.kernel.name - kwds2 = dict(title=titlestr) - - kwds2['plot_kwds'] = kwds.pop('plot_kwds', dict(plotflag=1)) - kwds2.update(**kwds) - args = self.args - if self.d == 1: - args = args[0] - wdata = PlotData(f, args, **kwds2) - if self.d > 1: - PL = np.r_[10:90:20, 95, 99, 99.9] - try: - ql = qlevels(f, p=PL) - wdata.clevels = ql - wdata.plevels = PL - except: - pass - return wdata - - def _check_shape(self, points): - points = atleast_2d(points) - d, m = points.shape - if d != self.d: - if d == 1 and m == self.d: - # points was passed in as a row vector - points = np.reshape(points, (self.d, 1)) - else: - msg = "points have dimension %s, dataset has dimension %s" % (d, - self.d) - raise ValueError(msg) - return points - def eval_points(self, points, **kwds): - """Evaluate the estimated pdf on a set of points. - - Parameters - ---------- - points : (# of dimensions, # of points)-array - Alternatively, a (# of dimensions,) vector can be passed in and - treated as a single point. - - Returns - ------- - values : (# of points,)-array - The values at each point. - - Raises - ------ - ValueError if the dimensionality of the input points is different than - the dimensionality of the KDE. - """ - - points = self._check_shape(points) - return self._eval_points(points, **kwds) - - def _eval_points(self, points, **kwds): - pass - - __call__ = eval_grid - -class TKDE(_KDE): - """ Transformation Kernel-Density Estimator. - - Parameters - ---------- - dataset : (# of dims, # of data)-array - datapoints to estimate from - hs : array-like (optional) - smooting parameter vector/matrix. - (default compute from data using kernel.get_smoothing function) - kernel : kernel function object. - kernel must have get_smoothing method - alpha : real scalar (optional) - sensitivity parameter (default 0 regular KDE) - A good choice might be alpha = 0.5 ( or 1/D) - alpha = 0 Regular KDE (hs is constant) - 0 < alpha <= 1 Adaptive KDE (Make hs change) - xmin, xmax : vectors - specifying the default argument range for the kde.eval_grid methods. - For the kde.eval_grid_fast methods the values must cover the range of the data. - (default min(data)-range(data)/4, max(data)-range(data)/4) - If a single value of xmin or xmax is given then the boundary is the is - the same for all dimensions. - inc : scalar integer - defining the default dimension of the output from kde.eval_grid methods (default 512) - (For kde.eval_grid_fast: A value below 50 is very fast to compute but - may give some inaccuracies. Values between 100 and 500 give very - accurate results) - L2 : array-like - vector of transformation parameters (default 1 no transformation) - t(xi;L2) = xi^L2*sign(L2) for L2(i) ~= 0 - t(xi;L2) = log(xi) for L2(i) == 0 - If single value of L2 is given then the transformation is the same in all directions. - - Members - ------- - d : int - number of dimensions - n : int - number of datapoints - - Methods - ------- - kde.eval_grid_fast(x0, x1,..., xd) : array - evaluate the estimated pdf on meshgrid(x0, x1,..., xd) - kde.eval_grid(x0, x1,..., xd) : array - evaluate the estimated pdf on meshgrid(x0, x1,..., xd) - kde.eval_points(points) : array - evaluate the estimated pdf on a provided set of points - kde(x0, x1,..., xd) : array - same as kde.eval_grid(x0, x1,..., xd) - - - Example - ------- - N = 20 - data = np.random.rayleigh(1, size=(N,)) - >>> data = np.array([ 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887, - ... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326, - ... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487, - ... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452]) - - >>> import wafo.kdetools as wk - >>> x = np.linspace(0.01, max(data.ravel()) + 1, 10) - >>> kde = wk.TKDE(data, hs=0.5, L2=0.5) - >>> f = kde(x) - >>> f - array([ 1.03982714, 0.45839018, 0.39514782, 0.32860602, 0.26433318, - 0.20717946, 0.15907684, 0.1201074 , 0.08941027, 0.06574882]) - - >>> kde.eval_grid(x) - array([ 1.03982714, 0.45839018, 0.39514782, 0.32860602, 0.26433318, - 0.20717946, 0.15907684, 0.1201074 , 0.08941027, 0.06574882]) - - >>> kde.eval_grid_fast(x) - array([ 1.06437223, 0.46203314, 0.39593137, 0.32781899, 0.26276433, - 0.20532206, 0.15723498, 0.11843998, 0.08797755, 0. ]) - - import pylab as plb - h1 = plb.plot(x, f) # 1D probability density plot - t = np.trapz(f, x) - """ - - def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, - xmax=None, inc=512, L2=None): - self.L2 = L2 - super(TKDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc) - - def _initialize(self): - self._check_xmin() - tdataset = self._dat2gaus(self.dataset) - xmin = self.xmin - if xmin is not None: - xmin = self._dat2gaus(np.reshape(xmin,(-1,1))) - xmax = self.xmax - if xmax is not None: - xmax = self._dat2gaus(np.reshape(xmax,(-1,1))) - self.tkde = KDE(tdataset, self.hs, self.kernel, self.alpha, xmin, xmax, - self.inc) - if self.inc is None: - self.inc = self.tkde.inc - def _check_xmin(self): - if self.L2 is not None: - amin = self.dataset.min(axis= -1) - L2 = np.atleast_1d(self.L2) * np.ones(self.d) # default no transformation - self.xmin = np.where(L2 != 1, np.maximum(self.xmin, amin / 100.0), self.xmin).reshape((-1,1)) - - def _dat2gaus(self, points): - if self.L2 is None: - return points # default no transformation - - L2 = np.atleast_1d(self.L2) * np.ones(self.d) # default no transformation - - tpoints = copy.copy(points) - for i, v2 in enumerate(L2.tolist()): - tpoints[i] = np.log(points[i]) if v2 == 0 else points[i] ** v2 - return tpoints - - def _gaus2dat(self, tpoints): - if self.L2 is None: - return tpoints # default no transformation - - L2 = np.atleast_1d(self.L2) * np.ones(self.d) # default no transformation - - points = copy.copy(tpoints) - for i, v2 in enumerate(L2.tolist()): - points[i] = np.exp(tpoints[i]) if v2 == 0 else tpoints[i] ** (1.0 / v2) - return points - - def _scale_pdf(self, pdf, points): - if self.L2 is None: - return pdf - L2 = np.atleast_1d(self.L2) * np.ones(self.d) # default no transformation - for i, v2 in enumerate(L2.tolist()): - factor = v2 * np.sign(v2) if v2 else 1 - pdf *= np.where(v2 == 1, 1, points[i] ** (v2 - 1) * factor) - if (np.abs(np.diff(pdf)).max() > 10).any(): - msg = ''' Numerical problems may have occured due to the power - transformation. Check the KDE for spurious spikes''' - warnings.warn(msg) - return pdf - - def eval_grid_fast2(self, *args, **kwds): - """Evaluate the estimated pdf on a grid. - - Parameters - ---------- - arg_0,arg_1,... arg_d-1 : vectors - Alternatively, if no vectors is passed in then - arg_i = gauss2dat(linspace(dat2gauss(self.xmin[i]), dat2gauss(self.xmax[i]), self.inc)) - output : string optional - 'value' if value output - 'data' if object output - - Returns - ------- - values : array-like - The values evaluated at meshgrid(*args). - - """ - return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds) - - def _eval_grid_fast(self, *args, **kwds): - if self.L2 is None: - f = self.tkde.eval_grid_fast(*args, **kwds) - self.args = self.tkde.args - return f - #targs = self._dat2gaus(list(args)) if len(args) else args - tf = self.tkde.eval_grid_fast() - self.args = self._gaus2dat(list(self.tkde.args)) - points = meshgrid(*self.args) if self.d > 1 else self.args - f = self._scale_pdf(tf, points) - if len(args): - ipoints = meshgrid(*args) if self.d>1 else args - #shape0 = points[0].shape - #shape0i = ipoints[0].shape - for i in range(self.d): - points[i].shape = (-1,) - #ipoints[i].shape = (-1,) - points = np.asarray(points).T - #ipoints = np.asarray(ipoints).T - fi = interpolate.griddata(points, f.ravel(), tuple(ipoints), method='linear', - fill_value=0.0) - #fi.shape = shape0i - self.args = args - r = kwds.get('r', 0) - if r==0: - return fi*(fi>0) - else: - return fi - return f - def _eval_grid(self, *args, **kwds): - if self.L2 is None: - return self.tkde.eval_grid(*args, **kwds) - targs = self._dat2gaus(list(args)) - tf = self.tkde.eval_grid(*targs, **kwds) - points = meshgrid(*args) if self.d > 1 else self.args - f = self._scale_pdf(tf, points) - return f - - def _eval_points(self, points): - """Evaluate the estimated pdf on a set of points. - - Parameters - ---------- - points : (# of dimensions, # of points)-array - Alternatively, a (# of dimensions,) vector can be passed in and - treated as a single point. - - Returns - ------- - values : (# of points,)-array - The values at each point. - - Raises - ------ - ValueError if the dimensionality of the input points is different than - the dimensionality of the KDE. - """ - if self.L2 is None: - return self.tkde.eval_points(points) - - tpoints = self._dat2gaus(points) - tf = self.tkde.eval_points(tpoints) - f = self._scale_pdf(tf, points) - return f - -class KDE(_KDE): - """ Kernel-Density Estimator. - - Parameters - ---------- - data : (# of dims, # of data)-array - datapoints to estimate from - hs : array-like (optional) - smooting parameter vector/matrix. - (default compute from data using kernel.get_smoothing function) - kernel : kernel function object. - kernel must have get_smoothing method - alpha : real scalar (optional) - sensitivity parameter (default 0 regular KDE) - A good choice might be alpha = 0.5 ( or 1/D) - alpha = 0 Regular KDE (hs is constant) - 0 < alpha <= 1 Adaptive KDE (Make hs change) - xmin, xmax : vectors - specifying the default argument range for the kde.eval_grid methods. - For the kde.eval_grid_fast methods the values must cover the range of the data. - (default min(data)-range(data)/4, max(data)-range(data)/4) - If a single value of xmin or xmax is given then the boundary is the is - the same for all dimensions. - inc : scalar integer - defining the default dimension of the output from kde.eval_grid methods (default 512) - (For kde.eval_grid_fast: A value below 50 is very fast to compute but - may give some inaccuracies. Values between 100 and 500 give very - accurate results) - - Members - ------- - d : int - number of dimensions - n : int - number of datapoints - - Methods - ------- - kde.eval_grid_fast(x0, x1,..., xd) : array - evaluate the estimated pdf on meshgrid(x0, x1,..., xd) - kde.eval_grid(x0, x1,..., xd) : array - evaluate the estimated pdf on meshgrid(x0, x1,..., xd) - kde.eval_points(points) : array - evaluate the estimated pdf on a provided set of points - kde(x0, x1,..., xd) : array - same as kde.eval_grid(x0, x1,..., xd) - - - Example - ------- - N = 20 - data = np.random.rayleigh(1, size=(N,)) - >>> data = np.array([ 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887, - ... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326, - ... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487, - ... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452]) - - >>> x = np.linspace(0, max(data.ravel()) + 1, 10) - >>> import wafo.kdetools as wk - >>> kde = wk.KDE(data, hs=0.5, alpha=0.5) - >>> f = kde(x) - >>> f - array([ 0.17252055, 0.41014271, 0.61349072, 0.57023834, 0.37198073, - 0.21409279, 0.12738463, 0.07460326, 0.03956191, 0.01887164]) - - >>> kde.eval_grid(x) - array([ 0.17252055, 0.41014271, 0.61349072, 0.57023834, 0.37198073, - 0.21409279, 0.12738463, 0.07460326, 0.03956191, 0.01887164]) - - >>> kde0 = wk.KDE(data, hs=0.5, alpha=0.0) - >>> kde0.eval_points(x) - array([ 0.2039735 , 0.40252503, 0.54595078, 0.52219649, 0.3906213 , - 0.26381501, 0.16407362, 0.08270612, 0.02991145, 0.00720821]) - - >>> kde0.eval_grid(x) - array([ 0.2039735 , 0.40252503, 0.54595078, 0.52219649, 0.3906213 , - 0.26381501, 0.16407362, 0.08270612, 0.02991145, 0.00720821]) - >>> f = kde0.eval_grid(x, output='plotobj') - >>> f.data - array([ 0.2039735 , 0.40252503, 0.54595078, 0.52219649, 0.3906213 , - 0.26381501, 0.16407362, 0.08270612, 0.02991145, 0.00720821]) - - >>> f = kde0.eval_grid_fast() - >>> np.interp(x, kde0.args[0], f) - array([ 0.21227584, 0.41256459, 0.5495661 , 0.5176579 , 0.38431616, - 0.2591162 , 0.15978948, 0.07889179, 0.02769818, 0.00791829]) - >>> f1 = kde0.eval_grid_fast(output='plot') - >>> np.interp(x, f1.args, f1.data) - array([ 0.21227584, 0.41256459, 0.5495661 , 0.5176579 , 0.38431616, - 0.2591162 , 0.15978948, 0.07889179, 0.02769818, 0.00791829]) - >>> h = f1.plot() - - import pylab as plb - h1 = plb.plot(x, f) # 1D probability density plot - t = np.trapz(f, x) - """ - - def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, xmax=None, inc=512): - super(KDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc) - - def _initialize(self): - self._compute_smoothing() - self._lambda = np.ones(self.n) - if self.alpha > 0: - #pilot = KDE(self.dataset, hs=self.hs, kernel=self.kernel, alpha=0) - #f = pilot.eval_points(self.dataset) # get a pilot estimate by regular KDE (alpha=0) - f = self.eval_points(self.dataset) # pilot estimate - g = np.exp(np.mean(np.log(f))) - self._lambda = (f / g) ** (-self.alpha) - - if self.inc is None: - unused_tau, tau = self.kernel.effective_support() - xyzrange = 8 * self._sigma - L1 = 10 - self.inc = 2 ** nextpow2(max(48,(L1 * xyzrange/ (tau * self.hs) ).max())) - pass - def _compute_smoothing(self): - """Computes the smoothing matrix - """ - get_smoothing = self.kernel.get_smoothing - h = self.hs - if h is None: - h = get_smoothing(self.dataset) - h = np.atleast_1d(h) - hsiz = h.shape - - if (len(hsiz) == 1) or (self.d == 1): - if max(hsiz) == 1: - h = h * np.ones(self.d) - else: - h.shape = (self.d,) # make sure it has the correct dimension - - # If h negative calculate automatic values - ind, = np.where(h <= 0) - for i in ind.tolist(): # - h[i] = get_smoothing(self.dataset[i]) - deth = h.prod() - self.inv_hs = np.diag(1.0 / h) - else: #fully general smoothing matrix - deth = linalg.det(h) - if deth <= 0: - raise ValueError('bandwidth matrix h must be positive definit!') - self.inv_hs = linalg.inv(h) - self.hs = h - self._norm_factor = deth * self.n - - - def _eval_grid_fast(self, *args, **kwds): - X = np.vstack(args) - d, inc = X.shape - dx = X[:, 1] - X[:, 0] - - Xn = [] - nfft0 = 2 * inc - nfft = (nfft0,) * d - x0 = np.linspace(-inc, inc, nfft0+1) - for i in range(d): - Xn.append(x0[:-1] * dx[i]) - - Xnc = meshgrid(*Xn) if d > 1 else Xn - - shape0 = Xnc[0].shape - for i in range(d): - Xnc[i].shape = (-1,) - - Xn = np.dot(self.inv_hs, np.vstack(Xnc)) - - # Obtain the kernel weights. - kw = self.kernel(Xn) - - #plt.plot(kw) - #plt.draw() - #plt.show() - norm_fact0 = (kw.sum()*dx.prod()*self.n) - norm_fact = (self._norm_factor * self.kernel.norm_factor(d, self.n)) - if np.abs(norm_fact0-norm_fact)>0.05*norm_fact: - warnings.warn('Numerical inaccuracy due to too low discretization. Increase the discretization of the evaluation grid (inc=%d)!' % inc) - norm_fact = norm_fact0 - - kw = kw/norm_fact - r = kwds.get('r', 0) - if r!=0: - kw *= np.vstack(Xnc) ** r if d>1 else Xnc[0] - kw.shape = shape0 - kw = np.fft.ifftshift(kw) - fftn = np.fft.fftn - ifftn = np.fft.ifftn - - y = kwds.get('y', 1.0) - #if self.alpha>0: - # y = y / self._lambda**d - - # Find the binned kernel weights, c. - c = gridcount(self.dataset, X, y=y) - # Perform the convolution. - z = np.real(ifftn(fftn(c, s=nfft) * fftn(kw))) - - ix = (slice(0, inc),)*d - if r==0: - return z[ix] * (z[ix] > 0.0) - else: - return z[ix] - def _eval_grid(self, *args, **kwds): - - grd = meshgrid(*args) if len(args) > 1 else list(args) - shape0 = grd[0].shape - d = len(grd) - for i in range(d): - grd[i] = grd[i].ravel() - f = self.eval_points(np.vstack(grd), **kwds) - return f.reshape(shape0) - - - def _eval_points(self, points, **kwds): - """Evaluate the estimated pdf on a set of points. - - Parameters - ---------- - points : (# of dimensions, # of points)-array - Alternatively, a (# of dimensions,) vector can be passed in and - treated as a single point. - - Returns - ------- - values : (# of points,)-array - The values at each point. - - Raises - ------ - ValueError if the dimensionality of the input points is different than - the dimensionality of the KDE. - """ - d, m = points.shape - - result = np.zeros((m,)) - - r = kwds.get('r', 0) - if r==0: - fun = lambda xi : 1 - else: - fun = lambda xi : (xi ** r).sum(axis=0) - - if m >= self.n: - y = kwds.get('y', np.ones(self.n)) - # there are more points than data, so loop over data - for i in range(self.n): - diff = self.dataset[:, i, np.newaxis] - points - tdiff = np.dot(self.inv_hs / self._lambda[i], diff) - result += y[i] * fun(diff) * self.kernel(tdiff) / self._lambda[i] ** d - else: - y = kwds.get('y', 1) - # loop over points - for i in range(m): - diff = self.dataset - points[:, i, np.newaxis] - tdiff = np.dot(self.inv_hs, diff / self._lambda[np.newaxis, :]) - tmp = y * fun(diff) * self.kernel(tdiff) / self._lambda ** d - result[i] = tmp.sum(axis= -1) - - result /= (self._norm_factor * self.kernel.norm_factor(d, self.n)) - - return result - -class KRegression(_KDE): - """ Kernel-Regression - - Parameters - ---------- - data : (# of dims, # of data)-array - datapoints to estimate from - y : # of data - array - response variable - p : scalar integer (0 or 1) - Nadaraya-Watson estimator if p=0, - local linear estimator if p=1. - hs : array-like (optional) - smooting parameter vector/matrix. - (default compute from data using kernel.get_smoothing function) - kernel : kernel function object. - kernel must have get_smoothing method - alpha : real scalar (optional) - sensitivity parameter (default 0 regular KDE) - A good choice might be alpha = 0.5 ( or 1/D) - alpha = 0 Regular KDE (hs is constant) - 0 < alpha <= 1 Adaptive KDE (Make hs change) - xmin, xmax : vectors - specifying the default argument range for the kde.eval_grid methods. - For the kde.eval_grid_fast methods the values must cover the range of the data. - (default min(data)-range(data)/4, max(data)-range(data)/4) - If a single value of xmin or xmax is given then the boundary is the is - the same for all dimensions. - inc : scalar integer - defining the default dimension of the output from kde.eval_grid methods (default 128) - (For kde.eval_grid_fast: A value below 50 is very fast to compute but - may give some inaccuracies. Values between 100 and 500 give very - accurate results) - - Members - ------- - d : int - number of dimensions - n : int - number of datapoints - - Methods - ------- - kde.eval_grid_fast(x0, x1,..., xd) : array - evaluate the estimated pdf on meshgrid(x0, x1,..., xd) - kde.eval_grid(x0, x1,..., xd) : array - evaluate the estimated pdf on meshgrid(x0, x1,..., xd) - kde.eval_points(points) : array - evaluate the estimated pdf on a provided set of points - kde(x0, x1,..., xd) : array - same as kde.eval_grid(x0, x1,..., xd) - - - Example - ------- - >>> N = 100 - >>> ei = np.random.normal(loc=0, scale=0.075, size=(N,)) - - >>> x = np.linspace(0, 1, N) - >>> import wafo.kdetools as wk - - >>> y = 2*np.exp(-x**2/(2*0.3**2))+3*np.exp(-(x-1)**2/(2*0.7**2)) + ei - >>> kreg = wk.KRegression(x, y) - >>> f = kreg(output='plotobj', title='Kernel regression', plotflag=1) - >>> h = f.plot(label='p=0') - """ - - def __init__(self, data, y, p=0, hs=None, kernel=None, alpha=0.0, xmin=None, xmax=None, inc=128, L2=None): - - self.tkde = TKDE(data, hs=hs, kernel=kernel, alpha=alpha, xmin=xmin,xmax=xmax, inc=inc, L2=L2) - self.y = y - self.p = p - - def eval_grid_fast(self, *args, **kwds): - self._grdfun = self.tkde.eval_grid_fast - return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds) - - def eval_grid(self, *args, **kwds): - self._grdfun = self.tkde.eval_grid - return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds) - - def _eval_gridfun(self, *args, **kwds): - grdfun = self._grdfun - s0 = grdfun(*args, r=0) - t0 = grdfun(*args, r=0, y=self.y) - if self.p==0: - return (t0 / (s0 + _TINY)).clip(min=-_REALMAX, max=_REALMAX) - elif self.p==1: - s1 = grdfun(*args, r=1) - s2 = grdfun(*args, r=2) - t1 = grdfun(*args, r=1, y=self.y) - return ((s2 * t0 - s1 * t1) / (s2 * s0 - s1**2)).clip(min=-_REALMAX, max=_REALMAX) - __call__ = eval_grid_fast - -class BKRegression(object): - ''' - Kernel-Regression on binomial data - - method : {'beta', 'wilson'} - method is one of the following - 'beta', return Bayesian Credible interval using beta-distribution. - 'wilson', return Wilson score interval - a, b : scalars - parameters of the beta distribution defining the apriori distribution of p, i.e., - the Bayes estimator for p: p = (y+a)/(n+a+b). - Setting a=b=0.5 gives Jeffreys interval. - ''' - def __init__(self, *args, **kwds): - self.method = kwds.pop('method','beta') - self.a = max(kwds.pop('a', 0.5), _TINY) - self.b = max(kwds.pop('b', 0.5), _TINY) - self.kreg = KRegression(*args, **kwds) - self.hs_e = None # defines bin width (i.e. smoothing) in empirical estimate -# self.x = self.kreg.tkde.dataset -# self.y = self.kreg.y - def _set_smoothing(self,hs): - self.kreg.tkde.hs = hs - self.kreg.tkde.initialize() - - x = property(fget=lambda cls: cls.kreg.tkde.dataset.squeeze()) - y = property(fget=lambda cls: cls.kreg.y) - kernel = property(fget=lambda cls: cls.kreg.tkde.kernel) - hs = property(fset=_set_smoothing, fget=lambda cls: cls.kreg.tkde.hs) - - def _get_max_smoothing(self, fun=None): - ''' - Return maximum value for smoothing parameter - ''' - x = self.x - y = self.y - if fun is None: - get_smoothing = self.kernel.get_smoothing - else: - get_smoothing = getattr(self.kernel, fun) - - hs1 = get_smoothing(x) - #hx = np.median(np.abs(x-np.median(x)))/0.6745*(4.0/(3*n))**0.2 - if (y==True).any(): - hs2 = get_smoothing(x[y==True]) - #hy = np.median(np.abs(y-np.mean(y)))/0.6745*(4.0/(3*n))**0.2 - else: - hs2 = 4*hs1 - #hy = 4*hx - - hopt = sqrt(hs1*hs2) - return hopt, hs1, hs2 - - def get_grid(self, hs_e=None): - if hs_e is None: - if self.hs_e is None: - hs1 = self._get_max_smoothing('hste')[0] - hs2 = self._get_max_smoothing('hos')[0] - self.hs_e = sqrt(hs1*hs2) - hs_e = self.hs_e - x = self.x - xmin, xmax = x.min(), x.max() - ni = max(2*int((xmax-xmin)/hs_e)+3,5) - sml = hs_e #*0.1 - xi = np.linspace(xmin-sml,xmax+sml, ni) - return xi - - def prb_ci(self, n, p, alpha=0.05, **kwds): - ''' - Return Confidence Interval for the binomial probability p - - Parameters - ---------- - n : array-like - number of Bernoulli trials - p : array-like - estimated probability of success in each trial - alpha : scalar - confidence level - method : {'beta', 'wilson'} - method is one of the following - 'beta', return Bayesian Credible interval using beta-distribution. - 'wilson', return Wilson score interval - a, b : scalars - parameters of the beta distribution defining the apriori distribution of p, i.e., - the Bayes estimator for p: p = (y+a)/(n+a+b). - Setting a=b=0.5 gives Jeffreys interval. - ''' - if self.method.startswith('w'): - #Wilson score - z0 = -_invnorm(alpha/2) - den = 1+(z0**2./n); - xc=(p+(z0**2)/(2*n))/den; - halfwidth=(z0*sqrt((p*(1-p)/n)+(z0**2/(4*(n**2)))))/den - plo = (xc-halfwidth).clip(min=0) # wilson score - pup = (xc+halfwidth).clip(max=1.0) # wilson score - else: - # Jeffreys intervall a=b=0.5 - #st.beta.isf(alpha/2, y+a, n-y+b) y = n*p, n-y = n*(1-p) - a = self.a - b = self.b - st = stats - pup = np.where(p==1, 1, st.beta.isf(alpha/2, n*p+a, n*(1-p)+b)) - plo = np.where(p==0, 0, st.beta.isf(1-alpha/2, n*p+a, n*(1-p)+b)) - return plo, pup - - def prb_empirical(self, xi=None, hs_e=None, alpha=0.05, color='r', **kwds): - ''' - Returns empirical binomial probabiltity - - Parameters - ---------- - x : ndarray - position vector - y : ndarray - binomial response variable (zeros and ones) - alpha : scalar - confidence level - color: - used in plot - - Returns - ------- - P(x) : PlotData object - empirical probability - ''' - if xi is None: - xi = self.get_grid(hs_e) - - x = self.x - y = self.y - - c = gridcount(x, xi) #+ self.a + self.b # count data - if (y==True).any(): - c0 = gridcount(x[y==True],xi) #+ self.a # count success - else: - c0 = np.zeros(xi.shape) - prb = np.where(c==0, 0, c0/(c+_TINY)) # assume prb==0 for c==0 - CI = np.vstack(self.prb_ci(c, prb, alpha,**kwds)) - - #prb_e = PlotData(prb, xi, plotmethod='scatter', plot_kwds=dict(color=color, s=5, picker=5)) - prb_e = PlotData(prb, xi, plotmethod='plot', plot_args=['.'],plot_kwds=dict(markersize=6, color=color, picker=5)) - #prb_e = PlotData(prb, xi, plotmethod='errorbar', plot_kwds=dict(color=color, yerr=np.abs(prb-CI))) - prb_e.dataCI = CI.T - prb_e.count = c - return prb_e - - def prb_smoothed(self, prb_e, hs, alpha=0.05, color='r', label=''): - ''' - Return smoothed binomial probability - - Parameters - ---------- - prb_e : PlotData object with empirical binomial probabilites - hs : smoothing parameter - alpha : confidence level - color : color of plot object - label : label for plot object - ''' - - x_e = prb_e.args - n_e = len(x_e) - dx_e = x_e[1]-x_e[0] - n = self.x.size - - x_s = np.linspace(x_e[0],x_e[-1], 10*n_e+1) - self.hs = hs - - prb_s = self.kreg(x_s, output='plotobj', title='', plot_kwds=dict(color=color, linewidth=2)) #dict(plotflag=7)) - m_nan = np.isnan(prb_s.data) - if m_nan.any(): # assume 0/0 division - prb_s.data[m_nan] = 0.0 - - #prb_s.data[np.isnan(prb_s.data)] = 0 - c_s = self.kreg.tkde.eval_grid_fast(x_s) * dx_e * n # expected number of data in each bin - plo, pup = self.prb_ci(c_s, prb_s.data, alpha) - - prb_s.dataCI = np.vstack((plo,pup)).T - prb_s.prediction_error_avg = np.trapz(pup-plo, x_s)/(x_s[-1]-x_s[0]) - - if label: - prb_s.plot_kwds['label'] = label - prb_s.children = [PlotData([plo, pup],x_s, - plotmethod='fill_between', - plot_kwds=dict(alpha=0.2, color=color)), - prb_e] - - # empirical oversmooths the data -# p_s = prb_s.eval_points(self.x) -# dp_s = np.diff(prb_s.data) -# k = (dp_s[:-1]*dp_s[1:]<0).sum() # numpeaks -# p_e = self.y -# n_s = interpolate.interp1d(x_s, c_s)(self.x) -# plo, pup = self.prb_ci(n_s, p_s, alpha) -# sigmai = (pup-plo) -# aicc = (((p_e-p_s)/sigmai)**2).sum()+ 2*k*(k+1)/np.maximum(n-k+1,1) - - - p_e = prb_e.eval_points(x_s) - p_s = prb_s.data - dp_s = np.sign(np.diff(p_s)) - k = (dp_s[:-1]!=dp_s[1:]).sum() # numpeaks - - #sigmai = (pup-plo)+_EPS - #aicc = (((p_e-p_s)/sigmai)**2).sum()+ 2*k*(k+1)/np.maximum(n_e-k+1,1) + np.abs((p_e-pup).clip(min=0)-(p_e-plo).clip(max=0)).sum() - sigmai = _logit(pup) - _logit(plo) + _EPS - aicc = (((_logit(p_e)-_logit(p_s))/sigmai)**2).sum()+ 2*k*(k+1)/np.maximum(n_e-k+1,1) + np.abs((p_e-pup).clip(min=0)-(p_e-plo).clip(max=0)).sum() - - prb_s.aicc = aicc - #prb_s.labels.title = '' - #prb_s.labels.title='perr=%1.3f,aicc=%1.3f, n=%d, hs=%1.3f' % (prb_s.prediction_error_avg,aicc,n,hs) - - return prb_s - - def prb_search_best(self, prb_e=None, hsvec=None, hsfun='hste', alpha=0.05, color='r', label=''): - ''' - Return best smoothed binomial probability - - Parameters - ---------- - prb_e : PlotData object with empirical binomial probabilites - hsvec : arraylike - vector smoothing parameters (default np.linspace(hsmax*0.1,hsmax,55)) - hsfun : - method for calculating hsmax - - ''' - if prb_e is None: - prb_e = self.prb_empirical(hs_e=self.hs_e, alpha=alpha, color=color) - if hsvec is None: - hsmax = self._get_max_smoothing(hsfun)[0] #@UnusedVariable - hsmax = max(hsmax, self.hs_e) - hsvec = np.linspace(hsmax*0.2,hsmax,55) - - - hs_best = hsvec[-1]+0.1 - prb_best = self.prb_smoothed(prb_e, hs_best, alpha, color, label) - aicc = np.zeros(np.size(hsvec)) - for i, hi in enumerate(hsvec): - f = self.prb_smoothed(prb_e, hi, alpha, color, label) - aicc[i] = f.aicc - if f.aicc<=prb_best.aicc: - prb_best = f - hs_best = hi - prb_best.score = PlotData(aicc,hsvec) - prb_best.hs = hs_best - self._set_smoothing(hs_best) - return prb_best - -class _Kernel(object): - def __init__(self, r=1.0, stats=None): - self.r = r # radius of kernel - self.stats = stats - def norm_factor(self, d=1, n=None): - return 1.0 - def norm_kernel(self, x): - X = np.atleast_2d(x) - return self._kernel(X) / self.norm_factor(*X.shape) - def kernel(self, x): - return self._kernel(np.atleast_2d(x)) - def deriv4_6_8_10(self, t, numout=4): - raise Exception('Method not implemented for this kernel!') - def effective_support(self): - ''' - Return the effective support of kernel. - - The kernel must be symmetric and compactly supported on [-tau tau] - if the kernel has infinite support then the kernel must have - the effective support in [-tau tau], i.e., be negligible outside the range - ''' - return self._effective_support() - def _effective_support(self): - return - self.r, self.r - __call__ = kernel - -class _KernelMulti(_Kernel): - # p=0; %Sphere = rect for 1D - # p=1; %Multivariate Epanechnikov kernel. - # p=2; %Multivariate Bi-weight Kernel - # p=3; %Multi variate Tri-weight Kernel - # p=4; %Multi variate Four-weight Kernel - def __init__(self, r=1.0, p=1, stats=None): - self.r = r - self.p = p - self.stats = stats - def norm_factor(self, d=1, n=None): - r = self.r - p = self.p - c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(d, r) / np.prod(np.r_[(d + 2):(2 * p + d + 1):2])# normalizing constant - return c - def _kernel(self, x): - r = self.r - p = self.p - x2 = x ** 2 - return ((1.0 - x2.sum(axis=0) / r ** 2).clip(min=0.0)) ** p - -mkernel_epanechnikov = _KernelMulti(p=1, stats=_stats_epan) -mkernel_biweight = _KernelMulti(p=2, stats=_stats_biwe) -mkernel_triweight = _KernelMulti(p=3, stats=_stats_triw) - -class _KernelProduct(_KernelMulti): - # p=0; %rectangular - # p=1; %1D product Epanechnikov kernel. - # p=2; %1D product Bi-weight Kernel - # p=3; %1D product Tri-weight Kernel - # p=4; %1D product Four-weight Kernel - - def norm_factor(self, d=1, n=None): - r = self.r - p = self.p - c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(1, r) / np.prod(np.r_[(1 + 2):(2 * p + 2):2])# normalizing constant - return c ** d - def _kernel(self, x): - r = self.r # radius - pdf = (1 - (x / r) ** 2).clip(min=0.0) - return pdf.prod(axis=0) - -mkernel_p1epanechnikov = _KernelProduct(p=1, stats=_stats_epan) -mkernel_p1biweight = _KernelProduct(p=2, stats=_stats_biwe) -mkernel_p1triweight = _KernelProduct(p=3, stats=_stats_triw) - - -class _KernelRectangular(_Kernel): - def _kernel(self, x): - return np.where(np.all(np.abs(x) <= self.r, axis=0), 1, 0.0) - def norm_factor(self, d=1, n=None): - r = self.r - return (2 * r) ** d -mkernel_rectangular = _KernelRectangular(stats=_stats_rect) - -class _KernelTriangular(_Kernel): - def _kernel(self, x): - pdf = (1 - np.abs(x)).clip(min=0.0) - return pdf.prod(axis=0) -mkernel_triangular = _KernelTriangular(stats=_stats_tria) - -class _KernelGaussian(_Kernel): - def _kernel(self, x): - sigma = self.r / 4.0 - x2 = (x / sigma) ** 2 - return exp(-0.5 * x2.sum(axis=0)) - def norm_factor(self, d=1, n=None): - sigma = self.r / 4.0 - return (2 * pi * sigma) ** (d / 2.0) - def deriv4_6_8_10(self, t, numout=4): - ''' - Returns 4th, 6th, 8th and 10th derivatives of the kernel function. - ''' - phi0 = exp(-0.5 * t ** 2) / sqrt(2 * pi) - p4 = [1, 0, -6, 0, +3] - p4val = np.polyval(p4, t) * phi0 - if numout == 1: - return p4val - out = [p4val] - pn = p4 - for unusedix in range(numout - 1): - pnp1 = np.polyadd(-np.r_[pn, 0], np.polyder(pn)) - pnp2 = np.polyadd(-np.r_[pnp1, 0], np.polyder(pnp1)) - out.append(np.polyval(pnp2, t) * phi0) - pn = pnp2 - return out - -mkernel_gaussian = _KernelGaussian(r=4.0, stats=_stats_gaus) - -#def mkernel_gaussian(X): -# x2 = X ** 2 -# d = X.shape[0] -# return (2 * pi) ** (-d / 2) * exp(-0.5 * x2.sum(axis=0)) - -class _KernelLaplace(_Kernel): - def _kernel(self, x): - absX = np.abs(x) - return exp(-absX.sum(axis=0)) - def norm_factor(self, d=1, n=None): - return 2 ** d -mkernel_laplace = _KernelLaplace(r=7.0, stats=_stats_lapl) - -class _KernelLogistic(_Kernel): - def _kernel(self, x): - s = exp(-x) - return np.prod(1.0 / (s + 1) ** 2, axis=0) -mkernel_logistic = _KernelLogistic(r=7.0, stats=_stats_logi) - -_MKERNEL_DICT = dict( - epan=mkernel_epanechnikov, - biwe=mkernel_biweight, - triw=mkernel_triweight, - p1ep=mkernel_p1epanechnikov, - p1bi=mkernel_p1biweight, - p1tr=mkernel_p1triweight, - rect=mkernel_rectangular, - tria=mkernel_triangular, - lapl=mkernel_laplace, - logi=mkernel_logistic, - gaus=mkernel_gaussian - ) -_KERNEL_EXPONENT_DICT = dict(re=0, sp=0, ep=1, bi=2, tr=3, fo=4, fi=5, si=6, se=7) - -class Kernel(object): - ''' - Multivariate kernel - - Parameters - ---------- - name : string - defining the kernel. Valid options are: - 'epanechnikov' - Epanechnikov kernel. - 'biweight' - Bi-weight kernel. - 'triweight' - Tri-weight kernel. - 'p1epanechnikov' - product of 1D Epanechnikov kernel. - 'p1biweight' - product of 1D Bi-weight kernel. - 'p1triweight' - product of 1D Tri-weight kernel. - 'triangular' - Triangular kernel. - 'gaussian' - Gaussian kernel - 'rectangular' - Rectangular kernel. - 'laplace' - Laplace kernel. - 'logistic' - Logistic kernel. - Note that only the first 4 letters of the kernel name is needed. - - Examples - -------- - N = 20 - data = np.random.rayleigh(1, size=(N,)) - >>> data = np.array([ 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887, - ... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326, - ... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487, - ... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452]) - - >>> import wafo.kdetools as wk - >>> gauss = wk.Kernel('gaussian') - >>> gauss.stats() - (1, 0.28209479177387814, 0.21157109383040862) - >>> gauss.hscv(data) - array([ 0.21555043]) - >>> gauss.hstt(data) - array([ 0.15165387]) - >>> gauss.hste(data) - array([ 0.18942238]) - >>> gauss.hldpi(data) - array([ 0.1718688]) - - >>> wk.Kernel('laplace').stats() - (2, 0.25, inf) - - >>> triweight = wk.Kernel('triweight'); triweight.stats() - (0.1111111111111111, 0.81585081585081587, inf) - - >>> triweight(np.linspace(-1,1,11)) - array([ 0. , 0.046656, 0.262144, 0.592704, 0.884736, 1. , - 0.884736, 0.592704, 0.262144, 0.046656, 0. ]) - >>> triweight.hns(data) - array([ 0.82087056]) - >>> triweight.hos(data) - array([ 0.88265652]) - >>> triweight.hste(data) - array([ 0.56570278]) - >>> triweight.hscv(data) - array([ 0.64193201]) - - See also - -------- - mkernel - - References - ---------- - B. W. Silverman (1986) - 'Density estimation for statistics and data analysis' - Chapman and Hall, pp. 43, 76 - - Wand, M. P. and Jones, M. C. (1995) - 'Density estimation for statistics and data analysis' - Chapman and Hall, pp 31, 103, 175 - ''' - def __init__(self, name, fun='hste'): #'hns'): - self.kernel = _MKERNEL_DICT[name[:4]] - #self.name = self.kernel.__name__.replace('mkernel_', '').title() - try: - self.get_smoothing = getattr(self, fun) - except: - self.get_smoothing = self.hste - def _get_name(self): - return self.kernel.__class__.__name__.replace('_Kernel', '').title() - name = property(_get_name) - def get_smoothing(self, *args, **kwds): - pass - def stats(self): - ''' Return some 1D statistics of the kernel. - - Returns - ------- - mu2 : real scalar - 2'nd order moment, i.e.,int(x^2*kernel(x)) - R : real scalar - integral of squared kernel, i.e., int(kernel(x)^2) - Rdd : real scalar - integral of squared double derivative of kernel, i.e., int( (kernel''(x))^2 ). - - Reference - --------- - Wand,M.P. and Jones, M.C. (1995) - 'Kernel smoothing' - Chapman and Hall, pp 176. - ''' - return self.kernel.stats - #name = self.name[2:6] if self.name[:2].lower() == 'p1' else self.name[:4] - #return _KERNEL_STATS_DICT[name.lower()] - def deriv4_6_8_10(self, t, numout=4): - return self.kernel.deriv4_6_8_10(t, numout) - - def effective_support(self): - return self.kernel.effective_support() - - def hns(self, data): - ''' - Returns Normal Scale Estimate of Smoothing Parameter. - - Parameter - --------- - data : 2D array - shape d x n (d = # dimensions ) - - Returns - ------- - h : array-like - one dimensional optimal value for smoothing parameter - given the data and kernel. size D - - HNS only gives an optimal value with respect to mean integrated - square error, when the true underlying distribution - is Gaussian. This works reasonably well if the data resembles a - Gaussian distribution. However if the distribution is asymmetric, - multimodal or have long tails then HNS may return a to large - smoothing parameter, i.e., the KDE may be oversmoothed and mask - important features of the data. (=> large bias). - One way to remedy this is to reduce H by multiplying with a constant - factor, e.g., 0.85. Another is to try different values for H and make a - visual check by eye. - - Example: - data = rndnorm(0, 1,20,1) - h = hns(data,'epan'); - - See also: - --------- - hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt, kde - - Reference: - --------- - B. W. Silverman (1986) - 'Density estimation for statistics and data analysis' - Chapman and Hall, pp 43-48 - Wand,M.P. and Jones, M.C. (1995) - 'Kernel smoothing' - Chapman and Hall, pp 60--63 - ''' - - A = np.atleast_2d(data) - n = A.shape[1] - - # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) - mu2, R, unusedRdd = self.stats() - AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5) - iqr = iqrange(A, axis=1) # interquartile range - stdA = np.std(A, axis=1, ddof=1) - # use of interquartile range guards against outliers. - # the use of interquartile range is better if - # the distribution is skew or have heavy tails - # This lessen the chance of oversmoothing. - return np.where(iqr > 0, np.minimum(stdA, iqr / 1.349), stdA) * AMISEconstant - - def hos(self, data): - ''' Returns Oversmoothing Parameter. - - - - h = one dimensional maximum smoothing value for smoothing parameter - given the data and kernel. size 1 x D - data = data matrix, size N x D (D = # dimensions ) - - The oversmoothing or maximal smoothing principle relies on the fact - that there is a simple upper bound for the AMISE-optimal bandwidth for - estimation of densities with a fixed value of a particular scale - measure. While HOS will give too large bandwidth for optimal estimation - of a general density it provides an excellent starting point for - subjective choice of bandwidth. A sensible strategy is to plot an - estimate with bandwidth HOS and then sucessively look at plots based on - convenient fractions of HOS to see what features are present in the - data for various amount of smoothing. The relation to HNS is given by: - - HOS = HNS/0.93 - - Example: - data = rndnorm(0, 1,20,1) - h = hos(data,'epan'); - - See also hste, hbcv, hboot, hldpi, hlscv, hscv, hstt, kde, kdefun - - Reference - --------- - B. W. Silverman (1986) - 'Density estimation for statistics and data analysis' - Chapman and Hall, pp 43-48 - - Wand,M.P. and Jones, M.C. (1986) - 'Kernel smoothing' - Chapman and Hall, pp 60--63 - ''' - return self.hns(data) / 0.93 - def hmns(self, data): - ''' - Returns Multivariate Normal Scale Estimate of Smoothing Parameter. - - CALL: h = hmns(data,kernel) - - h = M dimensional optimal value for smoothing parameter - given the data and kernel. size D x D - data = data matrix, size D x N (D = # dimensions ) - kernel = 'epanechnikov' - Epanechnikov kernel. - 'biweight' - Bi-weight kernel. - 'triweight' - Tri-weight kernel. - 'gaussian' - Gaussian kernel - - Note that only the first 4 letters of the kernel name is needed. - - HMNS only gives a optimal value with respect to mean integrated - square error, when the true underlying distribution is - Multivariate Gaussian. This works reasonably well if the data resembles a - Multivariate Gaussian distribution. However if the distribution is - asymmetric, multimodal or have long tails then HNS is maybe more - appropriate. - - Example: - data = rndnorm(0, 1,20,2) - h = hmns(data,'epan'); - - See also - -------- - - hns, hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt - - Reference - ---------- - B. W. Silverman (1986) - 'Density estimation for statistics and data analysis' - Chapman and Hall, pp 43-48, 87 - - Wand,M.P. and Jones, M.C. (1995) - 'Kernel smoothing' - Chapman and Hall, pp 60--63, 86--88 - ''' - # TODO: implement more kernels - - A = np.atleast_2d(data) - d, n = A.shape - - if d == 1: - return self.hns(data) - name = self.name[:4].lower() - if name == 'epan': # Epanechnikov kernel - a = (8.0 * (d + 4.0) * (2 * sqrt(pi)) ** d / sphere_volume(d)) ** (1. / (4.0 + d)) - elif name == 'biwe': # Bi-weight kernel - a = 2.7779; - if d > 2: - raise ValueError('not implemented for d>2') - elif name == 'triw': # Triweight - a = 3.12; - if d > 2: - raise ValueError('not implemented for d>2') - elif name == 'gaus': # Gaussian kernel - a = (4.0 / (d + 2.0)) ** (1. / (d + 4.0)) - else: - raise ValueError('Unknown kernel.') - - covA = scipy.cov(A) - - return a * linalg.sqrtm(covA).real * n ** (-1. / (d + 4)) - def hste(self, data, h0=None, inc=128, maxit=100, releps=0.01, abseps=0.0): - '''HSTE 2-Stage Solve the Equation estimate of smoothing parameter. - - CALL: hs = hste(data,kernel,h0) - - hs = one dimensional value for smoothing parameter - given the data and kernel. size 1 x D - data = data matrix, size N x D (D = # dimensions ) - kernel = 'gaussian' - Gaussian kernel (default) - ( currently the only supported kernel) - h0 = initial starting guess for hs (default h0=hns(A,kernel)) - - Example: - x = rndnorm(0,1,50,1); - hs = hste(x,'gauss'); - - See also hbcv, hboot, hos, hldpi, hlscv, hscv, hstt, kde, kdefun - - Reference: - B. W. Silverman (1986) - 'Density estimation for statistics and data analysis' - Chapman and Hall, pp 57--61 - - Wand,M.P. and Jones, M.C. (1986) - 'Kernel smoothing' - Chapman and Hall, pp 74--75 - ''' - # TODO: NB: this routine can be made faster: - # TODO: replace the iteration in the end with a Newton Raphson scheme - - A = np.atleast_2d(data) - d, n = A.shape - - # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) - mu2, R, unusedRdd = self.stats() - - AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5) - STEconstant = R / (mu2 ** (2) * n) - - sigmaA = self.hns(A) / AMISEconstant - if h0 is None: - h0 = sigmaA * AMISEconstant - - h = np.asarray(h0, dtype=float) - - nfft = inc * 2 - amin = A.min(axis=1) # Find the minimum value of A. - amax = A.max(axis=1) #Find the maximum value of A. - arange = amax - amin # Find the range of A. - - #% xa holds the x 'axis' vector, defining a grid of x values where - #% the k.d. function will be evaluated. - - ax1 = amin - arange / 8.0 - bx1 = amax + arange / 8.0 - - kernel2 = Kernel('gauss') - mu2, R, unusedRdd = kernel2.stats() - STEconstant2 = R / (mu2 ** (2) * n) - fft = np.fft.fft - ifft = np.fft.ifft - - for dim in range(d): - s = sigmaA[dim] - ax = ax1[dim] - bx = bx1[dim] - - xa = np.linspace(ax, bx, inc) - xn = np.linspace(0, bx - ax, inc) - - c = gridcount(A[dim], xa) - - # Step 1 - psi6NS = -15 / (16 * sqrt(pi) * s ** 7) - psi8NS = 105 / (32 * sqrt(pi) * s ** 9) - - # Step 2 - k40, k60 = kernel2.deriv4_6_8_10(0, numout=2) - g1 = (-2 * k40 / (mu2 * psi6NS * n)) ** (1.0 / 7) - g2 = (-2 * k60 / (mu2 * psi8NS * n)) ** (1.0 / 9) - - # Estimate psi6 given g2. - kw4, kw6 = kernel2.deriv4_6_8_10(xn / g2, numout=2) # kernel weights. - kw = np.r_[kw6, 0, kw6[-1:0:-1]] # Apply fftshift to kw. - z = np.real(ifft(fft(c, nfft) * fft(kw))) # convolution. - psi6 = np.sum(c * z[:inc]) / (n * (n - 1) * g2 ** 7) - - # Estimate psi4 given g1. - kw4 = kernel2.deriv4_6_8_10(xn / g1, numout=1) # kernel weights. - kw = np.r_[kw4, 0, kw4[-1:0:-1]] #Apply 'fftshift' to kw. - z = np.real(ifft(fft(c, nfft) * fft(kw))) # convolution. - psi4 = np.sum(c * z[:inc]) / (n * (n - 1) * g1 ** 5) - - - - h1 = h[dim] - h_old = 0 - count = 0 - - while ((abs(h_old - h1) > max(releps * h1, abseps)) and (count < maxit)): - count += 1 - h_old = h1 - - # Step 3 - gamma = ((2 * k40 * mu2 * psi4 * h1 ** 5) / (-psi6 * R)) ** (1.0 / 7) - - # Now estimate psi4 given gamma. - kw4 = kernel2.deriv4_6_8_10(xn / gamma, numout=1) #kernel weights. - kw = np.r_[kw4, 0, kw4[-1:0:-1]] # Apply 'fftshift' to kw. - z = np.real(ifft(fft(c, nfft) * fft(kw))) # convolution. - - psi4Gamma = np.sum(c * z[:inc]) / (n * (n - 1) * gamma ** 5) - - # Step 4 - h1 = (STEconstant2 / psi4Gamma) ** (1.0 / 5) - - # Kernel other than Gaussian scale bandwidth - h1 = h1 * (STEconstant / STEconstant2) ** (1.0 / 5) - - - if count >= maxit: - warnings.warn('The obtained value did not converge.') - - h[dim] = h1 - #end % for dim loop - return h - def hisj(self, data, inc=512, L=7): - ''' - HISJ Improved Sheather-Jones estimate of smoothing parameter. - - Unlike many other implementations, this one is immune to problems - caused by multimodal densities with widely separated modes. The - estimation does not deteriorate for multimodal densities, because - it do not assume a parametric model for the data. - - Parameters - ---------- - data - a vector of data from which the density estimate is constructed; - inc - the number of mesh points used in the uniform discretization - - Returns - ------- - bandwidth - the optimal bandwidth - - Reference - --------- - Kernel density estimation via diffusion - Z. I. Botev, J. F. Grotowski, and D. P. Kroese (2010) - Annals of Statistics, Volume 38, Number 5, pages 2916-2957. - ''' - A = np.atleast_2d(data) - d, n = A.shape - - # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) - mu2, R, unusedRdd = self.stats() - STEconstant = R / (n * mu2 ** 2) - - amin = A.min(axis=1) # Find the minimum value of A. - amax = A.max(axis=1) # Find the maximum value of A. - arange = amax - amin # Find the range of A. - - #% xa holds the x 'axis' vector, defining a grid of x values where - #% the k.d. function will be evaluated. - - ax1 = amin - arange / 8.0 - bx1 = amax + arange / 8.0 - - kernel2 = Kernel('gauss') - mu2, R, unusedRdd = kernel2.stats() - STEconstant2 = R / (mu2 ** (2) * n) - - def fixed_point(t, N, I, a2): - ''' this implements the function t-zeta*gamma^[L](t)''' - - prod = np.prod - #L = 7 - logI = np.log(I) - f = 2 * pi**(2 * L) * (a2 * exp(L * logI -I * pi ** 2 * t)).sum() - for s in range(L - 1, 1, -1): - K0 = prod(np.r_[1:2*s:2]) / sqrt(2 * pi); - const = (1 + (1. / 2) ** (s + 1. / 2)) / 3 - time = (2 * const * K0 / N / f) ** (2. / (3 + 2 * s)) - f = 2 * pi ** (2 * s) * (a2 * exp(s * logI-I * pi ** 2 * time)).sum() - return t - (2 * N * sqrt(pi) * f) ** (-2. / 5) - - h = np.empty(d) - for dim in range(d): - ax = ax1[dim] - bx = bx1[dim] - xa = np.linspace(ax, bx, inc) - R = bx-ax - - c = gridcount(A[dim], xa) - N = len(set(A[dim])) - #a = dct(c/c.sum(), norm=None) - a = dct(c/len(A[dim]), norm=None) - - # now compute the optimal bandwidth^2 using the referenced method - I = np.asfarray(np.arange(1, inc))**2 - a2 = (a[1:]/2)**2 - fun = lambda t: fixed_point(t, N, I, a2) - x = np.linspace(0, 0.1, 150) - ai = x[0] - f0 = fun(ai) - for bi in x[1:]: - f1 = fun(bi) - if f1*f0<=0: - #print('ai = %g, bi = %g' % (ai,bi)) - break - else: - ai = bi - #y = np.asarray([fun(j) for j in x]) - #plt.figure(1) - #plt.plot(x,y) - #plt.show() - - # use fzero to solve the equation t=zeta*gamma^[5](t) - try: - t_star = optimize.brentq(fun, a=ai, b=bi) - except: - t_star = 0.28*N**(-2./5) - warnings.warn('Failure in obtaining smoothing parameter') - - # smooth the discrete cosine transform of initial data using t_star - # a_t = a*exp(-np.arange(inc)**2*pi**2*t_star/2) - # now apply the inverse discrete cosine transform - #density = idct(a_t)/R; - - # take the rescaling of the data into account - bandwidth = sqrt(t_star)*R - - # Kernel other than Gaussian scale bandwidth - h[dim] = bandwidth * (STEconstant / STEconstant2) ** (1.0 / 5) - #end % for dim loop - return h - - def hstt(self, data, h0=None, inc=128, maxit=100, releps=0.01, abseps=0.0): - '''HSTT Scott-Tapia-Thompson estimate of smoothing parameter. - - CALL: hs = hstt(data,kernel) - - hs = one dimensional value for smoothing parameter - given the data and kernel. size 1 x D - data = data matrix, size N x D (D = # dimensions ) - kernel = 'epanechnikov' - Epanechnikov kernel. (default) - 'biweight' - Bi-weight kernel. - 'triweight' - Tri-weight kernel. - 'triangular' - Triangular kernel. - 'gaussian' - Gaussian kernel - 'rectangular' - Rectangular kernel. - 'laplace' - Laplace kernel. - 'logistic' - Logistic kernel. - - HSTT returns Scott-Tapia-Thompson (STT) estimate of smoothing - parameter. This is a Solve-The-Equation rule (STE). - Simulation studies shows that the STT estimate of HS - is a good choice under a variety of models. A comparison with - likelihood cross-validation (LCV) indicates that LCV performs slightly - better for short tailed densities. - However, STT method in contrast to LCV is insensitive to outliers. - - Example - ------- - x = rndnorm(0,1,50,1); - hs = hstt(x,'gauss'); - - See also - -------- - hste, hbcv, hboot, hos, hldpi, hlscv, hscv, kde, kdebin - - - - Reference - --------- - B. W. Silverman (1986) - 'Density estimation for statistics and data analysis' - Chapman and Hall, pp 57--61 - ''' - A = np.atleast_2d(data) - d, n = A.shape - - # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) - mu2, R, unusedRdd = self.stats() - - AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5) - STEconstant = R / (mu2 ** (2) * n) - - sigmaA = self.hns(A) / AMISEconstant - if h0 is None: - h0 = sigmaA * AMISEconstant - - h = np.asarray(h0, dtype=float) - - nfft = inc * 2 - amin = A.min(axis=1) # Find the minimum value of A. - amax = A.max(axis=1) #Find the maximum value of A. - arange = amax - amin # Find the range of A. - - #% xa holds the x 'axis' vector, defining a grid of x values where - #% the k.d. function will be evaluated. - - ax1 = amin - arange / 8.0 - bx1 = amax + arange / 8.0 - - fft = np.fft.fft - ifft = np.fft.ifft - for dim in range(d): - s = sigmaA[dim] - datan = A[dim] / s - ax = ax1[dim] / s - bx = bx1[dim] / s - - xa = np.linspace(ax, bx, inc) - xn = np.linspace(0, bx - ax, inc) - - c = gridcount(datan, xa) - - count = 1 - h_old = 0 - h1 = h[dim] / s - delta = (bx - ax) / (inc - 1) - while ((abs(h_old - h1) > max(releps * h1, abseps)) and (count < maxit)): - count += 1 - h_old = h1 - - kw4 = self.kernel(xn / h1) / (n * h1 * self.norm_factor(d=1)) - kw = np.r_[kw4, 0, kw4[-1:0:-1]] # Apply 'fftshift' to kw. - f = np.real(ifft(fft(c, nfft) * fft(kw))) # convolution. - - # Estimate psi4=R(f'') using simple finite differences and quadrature. - ix = np.arange(1, inc - 1) - z = ((f[ix + 1] - 2 * f[ix] + f[ix - 1]) / delta ** 2) ** 2 - psi4 = delta * z.sum() - h1 = (STEconstant / psi4) ** (1. / 5); - - if count >= maxit: - warnings.warn('The obtained value did not converge.') - - h[dim] = h1 * s - #end % for dim loop - return h - - - def hscv(self, data, hvec=None, inc=128, maxit=100, fulloutput=False): - ''' - HSCV Smoothed cross-validation estimate of smoothing parameter. - - CALL: [hs,hvec,score] = hscv(data,kernel,hvec); - - hs = smoothing parameter - hvec = vector defining possible values of hs - (default linspace(0.25*h0,h0,100), h0=0.62) - score = score vector - data = data vector - kernel = 'gaussian' - Gaussian kernel the only supported - - Note that only the first 4 letters of the kernel name is needed. - - Example: - data = rndnorm(0,1,20,1) - [hs hvec score] = hscv(data,'epan'); - plot(hvec,score) - See also hste, hbcv, hboot, hos, hldpi, hlscv, hstt, kde, kdefun - - Wand,M.P. and Jones, M.C. (1986) - 'Kernel smoothing' - Chapman and Hall, pp 75--79 - ''' - # TODO: Add support for other kernels than Gaussian - A = np.atleast_2d(data) - d, n = A.shape - - # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) - mu2, R, unusedRdd = self.stats() - - AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5) - STEconstant = R / (mu2 ** (2) * n) - - sigmaA = self.hns(A) / AMISEconstant - if hvec is None: - H = AMISEconstant / 0.93 - hvec = np.linspace(0.25 * H, H, maxit) - hvec = np.asarray(hvec, dtype=float) - - steps = len(hvec) - score = np.zeros(steps) - - nfft = inc * 2 - amin = A.min(axis=1) # Find the minimum value of A. - amax = A.max(axis=1) #Find the maximum value of A. - arange = amax - amin # Find the range of A. - - #% xa holds the x 'axis' vector, defining a grid of x values where - #% the k.d. function will be evaluated. - - ax1 = amin - arange / 8.0 - bx1 = amax + arange / 8.0 - - kernel2 = Kernel('gauss') - mu2, R, unusedRdd = kernel2.stats() - STEconstant2 = R / (mu2 ** (2) * n) - fft = np.fft.fft - ifft = np.fft.ifft - - - h = np.zeros(d) - hvec = hvec * (STEconstant2 / STEconstant) ** (1. / 5.) - - k40, k60, k80, k100 = kernel2.deriv4_6_8_10(0, numout=4) - psi8 = 105 / (32 * sqrt(pi)); - psi12 = 3465. / (512 * sqrt(pi)) - g1 = (-2. * k60 / (mu2 * psi8 * n)) ** (1. / 9.) - g2 = (-2. * k100 / (mu2 * psi12 * n)) ** (1. / 13.) - - for dim in range(d): - s = sigmaA[dim] - ax = ax1[dim] / s - bx = bx1[dim] / s - datan = A[dim] / s - - xa = np.linspace(ax, bx, inc) - xn = np.linspace(0, bx - ax, inc) - - c = gridcount(datan, xa) - - kw4, kw6 = kernel2.deriv4_6_8_10(xn / g1, numout=2) - kw = np.r_[kw6, 0, kw6[-1:0:-1]] - z = np.real(ifft(fft(c, nfft) * fft(kw))) - psi6 = np.sum(c * z[:inc]) / (n ** 2 * g1 ** 7) - - kw4, kw6, kw8, kw10 = kernel2.deriv4_6_8_10(xn / g2, numout=4) - kw = np.r_[kw10, 0, kw10[-1:0:-1]] - z = np.real(ifft(fft(c, nfft) * fft(kw))) - psi10 = np.sum(c * z[:inc]) / (n ** 2 * g2 ** 11) - - g3 = (-2. * k40 / (mu2 * psi6 * n)) ** (1. / 7.) - g4 = (-2. * k80 / (mu2 * psi10 * n)) ** (1. / 11.) - - kw4 = kernel2.deriv4_6_8_10(xn / g3, numout=1) - kw = np.r_[kw4, 0, kw4[-1:0:-1]] - z = np.real(ifft(fft(c, nfft) * fft(kw))) - psi4 = np.sum(c * z[:inc]) / (n ** 2 * g3 ** 5) - - kw4, kw6, kw8 = kernel2.deriv4_6_8_10(xn / g3, numout=3) - kw = np.r_[kw8, 0, kw8[-1:0:-1]] - z = np.real(ifft(fft(c, nfft) * fft(kw))) - psi8 = np.sum(c * z[:inc]) / (n ** 2 * g4 ** 9) - - const = (441. / (64 * pi)) ** (1. / 18.) * (4 * pi) ** (-1. / 5.) * psi4 ** (-2. / 5.) * psi8 ** (-1. / 9.) - - M = np.atleast_2d(datan) - - Y = (M - M.T).ravel() - - for i in range(steps): - g = const * n ** (-23. / 45) * hvec[i] ** (-2) - sig1 = sqrt(2 * hvec[i] ** 2 + 2 * g ** 2) - sig2 = sqrt(hvec[i] ** 2 + 2 * g ** 2) - sig3 = sqrt(2 * g ** 2) - term2 = np.sum(kernel2(Y / sig1) / sig1 - 2 * kernel2(Y / sig2) / sig2 + kernel2(Y / sig3) / sig3) - - score[i] = 1. / (n * hvec[i] * 2. * sqrt(pi)) + term2 / n ** 2 - - idx = score.argmin() - # Kernel other than Gaussian scale bandwidth - h[dim] = hvec[idx] * (STEconstant / STEconstant2) ** (1 / 5) - if idx == 0: - warnings.warn('Optimum is probably lower than hs=%g for dim=%d' % (h[dim] * s, dim)) - elif idx == maxit - 1: - warnings.warn('Optimum is probably higher than hs=%g for dim=%d' % (h[dim] * s, dim)) - - hvec = hvec * (STEconstant / STEconstant2) ** (1 / 5) - if fulloutput: - return h * sigmaA, score, hvec, sigmaA - else: - return h * sigmaA - - def hldpi(self, data, L=2, inc=128): - '''HLDPI L-stage Direct Plug-In estimate of smoothing parameter. - - CALL: hs = hldpi(data,kernel,L) - - hs = one dimensional value for smoothing parameter - given the data and kernel. size 1 x D - data = data matrix, size N x D (D = # dimensions ) - kernel = 'epanechnikov' - Epanechnikov kernel. - 'biweight' - Bi-weight kernel. - 'triweight' - Tri-weight kernel. - 'triangluar' - Triangular kernel. - 'gaussian' - Gaussian kernel - 'rectangular' - Rectanguler kernel. - 'laplace' - Laplace kernel. - 'logistic' - Logistic kernel. - L = 0,1,2,3,... (default 2) - - Note that only the first 4 letters of the kernel name is needed. - - Example: - x = rndnorm(0,1,50,1); - hs = hldpi(x,'gauss',1); - - See also hste, hbcv, hboot, hos, hlscv, hscv, hstt, kde, kdefun - - Wand,M.P. and Jones, M.C. (1995) - 'Kernel smoothing' - Chapman and Hall, pp 67--74 - ''' - A = np.atleast_2d(data) - d, n = A.shape - - # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) - mu2, R, unusedRdd = self.stats() - - AMISEconstant = (8 * sqrt(pi) * R / (3 * n * mu2 ** 2)) ** (1. / 5) - STEconstant = R / (n * mu2 ** 2) - - sigmaA = self.hns(A) / AMISEconstant - - - nfft = inc * 2 - amin = A.min(axis=1) # Find the minimum value of A. - amax = A.max(axis=1) #Find the maximum value of A. - arange = amax - amin # Find the range of A. - - #% xa holds the x 'axis' vector, defining a grid of x values where - #% the k.d. function will be evaluated. - - ax1 = amin - arange / 8.0 - bx1 = amax + arange / 8.0 - - kernel2 = Kernel('gauss') - mu2, unusedR, unusedRdd = kernel2.stats() - - fft = np.fft.fft - ifft = np.fft.ifft - - h = np.zeros(d) - for dim in range(d): - s = sigmaA[dim] - datan = A[dim] #/ s - ax = ax1[dim] #/ s - bx = bx1[dim] #/ s - - xa = np.linspace(ax, bx, inc) - xn = np.linspace(0, bx - ax, inc) - - c = gridcount(datan, xa) - - r = 2 * L + 4 - rd2 = L + 2 - - # Eq. 3.7 in Wand and Jones (1995) - PSI_r = (-1) ** (rd2) * np.prod(np.r_[rd2 + 1:r+1]) / (sqrt(pi) * (2 * s) ** (r + 1)); - #PSI_r = (-1) ** (rd2) * np.prod(np.r_[rd2 + 1:r]) / (sqrt(pi) * (2.0) ** (r + 1)); - PSI = PSI_r - if L > 0: - # High order derivatives of the Gaussian kernel - Kd = kernel2.deriv4_6_8_10(0, numout=L) - - # L-stage iterations to estimate PSI_4 - for ix in range(L, 0, -1): - gi = (-2 * Kd[ix-1] / (mu2 * PSI * n)) ** (1. / (2 * ix + 5)) - - # Obtain the kernel weights. - KW0 = kernel2.deriv4_6_8_10(xn / gi, numout=ix) - if ix > 1: - KW0 = KW0[-1] - kw = np.r_[KW0, 0, KW0[inc - 1:0:-1]] # Apply 'fftshift' to kw. - - # Perform the convolution. - z = np.real(ifft(fft(c, nfft) * fft(kw))) - - PSI = np.sum(c * z[:inc]) / (n ** 2 * gi ** (2 * ix + 3)) - #end - #end - h[dim] = (STEconstant / PSI) ** (1. / 5) - return h - def norm_factor(self, d=1, n=None): - return self.kernel.norm_factor(d, n) - def eval_points(self, points): - return self.kernel(np.atleast_2d(points)) - __call__ = eval_points - -def mkernel(X, kernel): - ''' - MKERNEL Multivariate Kernel Function. - - Paramaters - --------- - X : array-like - matrix size d x n (d = # dimensions, n = # evaluation points) - kernel : string - defining kernel - 'epanechnikov' - Epanechnikov kernel. - 'biweight' - Bi-weight kernel. - 'triweight' - Tri-weight kernel. - 'p1epanechnikov' - product of 1D Epanechnikov kernel. - 'p1biweight' - product of 1D Bi-weight kernel. - 'p1triweight' - product of 1D Tri-weight kernel. - 'triangular' - Triangular kernel. - 'gaussian' - Gaussian kernel - 'rectangular' - Rectangular kernel. - 'laplace' - Laplace kernel. - 'logistic' - Logistic kernel. - Note that only the first 4 letters of the kernel name is needed. - Returns - ------- - z : ndarray - kernel function values evaluated at X - - - See also - -------- - kde, kdefun, kdebin - - References - ---------- - B. W. Silverman (1986) - 'Density estimation for statistics and data analysis' - Chapman and Hall, pp. 43, 76 - - Wand, M. P. and Jones, M. C. (1995) - 'Density estimation for statistics and data analysis' - Chapman and Hall, pp 31, 103, 175 - ''' - fun = _MKERNEL_DICT[kernel[:4]] - return fun(np.atleast_2d(X)) - -def accumsum(accmap, a, size, dtype=None): - if dtype is None: - dtype = a.dtype - size = np.atleast_1d(size) - if len(size)>1: - binx = accmap[:,0] - biny = accmap[:,1] - out = sparse.coo_matrix((a.ravel(), (binx, biny)),shape=size, dtype=dtype).tocsr() - else: - binx = accmap.ravel() - zero = np.zeros(len(binx)) - out = sparse.coo_matrix((a.ravel(), (binx, zero)),shape=(size,1), dtype=dtype).tocsr() - return out - -def accumsum2(accmap, a, size): - return np.bincount(accmap.ravel(), a.ravel(), np.array(size).max()) - -def accum(accmap, a, func=None, size=None, fill_value=0, dtype=None): - """ - An accumulation function similar to Matlab's `accumarray` function. - - Parameters - ---------- - accmap : ndarray - This is the "accumulation map". It maps input (i.e. indices into - `a`) to their destination in the output array. The first `a.ndim` - dimensions of `accmap` must be the same as `a.shape`. That is, - `accmap.shape[:a.ndim]` must equal `a.shape`. For example, if `a` - has shape (15,4), then `accmap.shape[:2]` must equal (15,4). In this - case `accmap[i,j]` gives the index into the output array where - element (i,j) of `a` is to be accumulated. If the output is, say, - a 2D, then `accmap` must have shape (15,4,2). The value in the - last dimension give indices into the output array. If the output is - 1D, then the shape of `accmap` can be either (15,4) or (15,4,1) - a : ndarray - The input data to be accumulated. - func : callable or None - The accumulation function. The function will be passed a list - of values from `a` to be accumulated. - If None, numpy.sum is assumed. - size : ndarray or None - The size of the output array. If None, the size will be determined - from `accmap`. - fill_value : scalar - The default value for elements of the output array. - dtype : numpy data type, or None - The data type of the output array. If None, the data type of - `a` is used. - - Returns - ------- - out : ndarray - The accumulated results. - - The shape of `out` is `size` if `size` is given. Otherwise the - shape is determined by the (lexicographically) largest indices of - the output found in `accmap`. - - - Examples - -------- - >>> from numpy import array, prod - >>> a = array([[1,2,3],[4,-1,6],[-1,8,9]]) - >>> a - array([[ 1, 2, 3], - [ 4, -1, 6], - [-1, 8, 9]]) - >>> # Sum the diagonals. - >>> accmap = array([[0,1,2],[2,0,1],[1,2,0]]) - >>> s = accum(accmap, a) - >>> s - array([ 9, 7, 15]) - >>> # A 2D output, from sub-arrays with shapes and positions like this: - >>> # [ (2,2) (2,1)] - >>> # [ (1,2) (1,1)] - >>> accmap = array([ - ... [[0,0],[0,0],[0,1]], - ... [[0,0],[0,0],[0,1]], - ... [[1,0],[1,0],[1,1]]]) - >>> # Accumulate using a product. - >>> accum(accmap, a, func=prod, dtype=float) - array([[ -8., 18.], - [ -8., 9.]]) - >>> # Same accmap, but create an array of lists of values. - >>> accum(accmap, a, func=lambda x: x, dtype='O') - array([[[1, 2, 4, -1], [3, 6]], - [[-1, 8], [9]]], dtype=object) - """ - - # Check for bad arguments and handle the defaults. - if accmap.shape[:a.ndim] != a.shape: - raise ValueError("The initial dimensions of accmap must be the same as a.shape") - if func is None: - func = np.sum - if dtype is None: - dtype = a.dtype - if accmap.shape == a.shape: - accmap = np.expand_dims(accmap, -1) - adims = tuple(range(a.ndim)) - if size is None: - size = 1 + np.squeeze(np.apply_over_axes(np.max, accmap, axes=adims)) - size = np.atleast_1d(size) - - # Create an array of python lists of values. - vals = np.empty(size, dtype='O') - for s in product(*[range(k) for k in size]): - vals[s] = [] - for s in product(*[range(k) for k in a.shape]): - indx = tuple(accmap[s]) - val = a[s] - vals[indx].append(val) - - # Create the output array. - out = np.empty(size, dtype=dtype) - for s in product(*[range(k) for k in size]): - if vals[s] == []: - out[s] = fill_value - else: - out[s] = func(vals[s]) - return out - -def qlevels(pdf, p=(10, 30, 50, 70, 90, 95, 99, 99.9), x1=None, x2=None): - '''QLEVELS Calculates quantile levels which encloses P% of PDF - - CALL: [ql PL] = qlevels(pdf,PL,x1,x2); - - ql = the discrete quantile levels. - pdf = joint point density function matrix or vector - PL = percent level (default [10:20:90 95 99 99.9]) - x1,x2 = vectors of the spacing of the variables - (Default unit spacing) - - QLEVELS numerically integrates PDF by decreasing height and find the - quantile levels which encloses P% of the distribution. If X1 and - (or) X2 is unspecified it is assumed that dX1 and dX2 is constant. - NB! QLEVELS normalizes the integral of PDF to N/(N+0.001) before - calculating QL in order to reflect the sampling of PDF is finite. - Currently only able to handle 1D and 2D PDF's if dXi is not constant (i=1,2). - - Example - ------- - >>> import wafo.stats as ws - >>> x = np.linspace(-8,8,2001); - >>> PL = np.r_[10:90:20, 90, 95, 99, 99.9] - >>> qlevels(ws.norm.pdf(x),p=PL, x1=x); - array([ 0.39591707, 0.37058719, 0.31830968, 0.23402133, 0.10362052, - 0.05862129, 0.01449505, 0.00178806]) - - # compared with the exact values - >>> ws.norm.pdf(ws.norm.ppf((100-PL)/200)) - array([ 0.39580488, 0.370399 , 0.31777657, 0.23315878, 0.10313564, - 0.05844507, 0.01445974, 0.00177719]) - - See also - -------- - qlevels2, tranproc - ''' - - norm = 1 # normalize cdf to unity - pdf = np.atleast_1d(pdf) - if any(pdf.ravel() < 0): - raise ValueError('This is not a pdf since one or more values of pdf is negative') - - fsiz = pdf.shape - fsizmin = min(fsiz) - if fsizmin == 0: - return [] - - N = np.prod(fsiz); - d = len(fsiz) - if x1 is None or ((x2 is None) and d > 2): - fdfi = pdf.ravel() - else: - if d == 1: # pdf in one dimension - dx22 = np.ones(1) - else: # % pdf in two dimensions - dx2 = np.diff(x2.ravel())*0.5; - dx22 = np.r_[0, dx2] + np.r_[dx2, 0]; - - dx1 = np.diff(x1.ravel())*0.5 - dx11 = np.r_[0 , dx1] + np.r_[dx1, 0] - dx1x2 = dx22[:, None] * dx11 - fdfi = (pdf * dx1x2).ravel(); - - - p = np.atleast_1d(p) - - if np.any((p < 0) | (100 < p)): - raise ValueError('PL must satisfy 0 <= PL <= 100') - - - p2 = p / 100.0 - ind = np.argsort(pdf.ravel()) # sort by height of pdf - ind = ind[::-1] - fi = pdf.flat[ind] - - Fi = np.cumsum(fdfi[ind]) # integration in the order of decreasing height of pdf - - if norm: # %normalize Fi to make sure int pdf dx1 dx2 approx 1 - Fi = Fi / Fi[-1] * N / (N + 1.5e-8) - - maxFi = np.max(Fi) - if maxFi > 1: - warnings.warn('this is not a pdf since cdf>1! normalizing') - - Fi = Fi / Fi[-1] * N / (N + 1.5e-8) - - elif maxFi < .95: - msg = '''The given pdf is too sparsely sampled since cdf<.95. - Thus QL is questionable''' - warnings.warn(msg) - - ind, = np.where(np.diff(np.r_[Fi, 1]) > 0) # make sure Fi is strictly increasing by not considering duplicate values - ui = tranproc(Fi[ind], fi[ind], p2) # calculating the inverse of Fi to find the index - # to the desired quantile level - # ui=smooth(Fi(ind),fi(ind),1,p2(:),1) % alternative - # res=ui-ui2 - - if np.any(ui >= max(pdf.ravel())): - warnings.warn('The lowest percent level is too close to 0%') - - if np.any(ui <= min(pdf.ravel())): - msg = '''The given pdf is too sparsely sampled or - the highest percent level is too close to 100%''' - warnings.warn(msg) - ui[ui < 0] = 0.0 - - return ui - -def qlevels2(data, p=(10,30,50,70,90, 95, 99, 99.9), method=1): - ''' - QLEVELS2 Calculates quantile levels which encloses P% of data - - CALL: [ql PL] = qlevels2(data,PL,method); - - ql = the discrete quantile levels, size D X Np - Parameters - ---------- - data : data matrix, size D x N (D = # of dimensions) - p : percent level vector, length Np (default [10:20:90 95 99 99.9]) - method : integer - 1 Interpolation so that F(X_(k)) == (k-0.5)/n. (default) - 2 Interpolation so that F(X_(k)) == k/(n+1). - 3 Based on the empirical distribution. - - Returns - ------- - - QLEVELS2 sort the columns of data in ascending order and find the - quantile levels for each column which encloses P% of the data. - - Examples : % Finding quantile levels enclosing P% of data: - -------- - >>> import wafo.stats as ws - >>> PL = np.r_[10:90:20, 90, 95, 99, 99.9] - >>> xs = ws.norm.rvs(size=2500000) - >>> np.round(qlevels2(ws.norm.pdf(xs), p=PL), decimals=3) - array([ 0.396, 0.37 , 0.318, 0.233, 0.103, 0.058, 0.014, 0.002]) - - # compared with the exact values - >>> ws.norm.pdf(ws.norm.ppf((100-PL)/200)) - array([ 0.39580488, 0.370399 , 0.31777657, 0.23315878, 0.10313564, - 0.05844507, 0.01445974, 0.00177719]) - - # Finding the median of xs: - >>> '%2.2f' % np.abs(qlevels2(xs,50)[0]) - '0.00' - - See also - -------- - qlevels - ''' - q = 100-np.atleast_1d(p) - return percentile(data, q, axis=-1, method=method) - - -_PKDICT = {1: lambda k, w, n: (k - w) / (n - 1), - 2: lambda k, w, n: (k - w / 2) / n, - 3: lambda k, w, n: k / n, - 4: lambda k, w, n: k / (n + 1), - 5: lambda k, w, n: (k - w / 3) / (n + 1 / 3), - 6: lambda k, w, n: (k - w * 3 / 8) / (n + 1 / 4)} -def _compute_qth_weighted_percentile(a, q, axis, out, method, weights, overwrite_input): - # normalise weight vector such that sum of the weight vector equals to n - q = np.atleast_1d(q) / 100.0 - if (q < 0).any() or (q > 1).any(): - raise ValueError, "percentile must be in the range [0,100]" - - shape0 = a.shape - if axis is None: - sorted_ = a.ravel() - else: - taxes = range(a.ndim) - taxes[-1], taxes[axis] = taxes[axis], taxes[-1] - sorted_ = np.transpose(a, taxes).reshape(-1, shape0[axis]) - - ind = sorted_.argsort(axis= -1) - if overwrite_input: - sorted_.sort(axis= -1) - else: - sorted_ = np.sort(sorted_, axis= -1) - - w = np.atleast_1d(weights) - n = len(w) - w = w * n / w.sum() - - # Work on each column separately because of weight vector - m = sorted_.shape[0] - nq = len(q) - y = np.zeros((m, nq)) - pk_fun = _PKDICT.get(method, 1) - for i in range(m): - sortedW = w[ind[i]] # rearrange the weight according to ind - k = sortedW.cumsum() # cumulative weight - pk = pk_fun(k, sortedW, n) # different algorithm to compute percentile - # Interpolation between pk and sorted_ for given value of q - y[i] = np.interp(q, pk, sorted_[i]) - if axis is None: - return np.squeeze(y) - else: - shape1 = list(shape0) - shape1[axis], shape1[-1] = shape1[-1], nq - return np.squeeze(np.transpose(y.reshape(shape1), taxes)) - -#method=1: p(k) = k/(n-1) -#method=2: p(k) = (k+0.5)/n. -#method=3: p(k) = (k+1)/n -#method=4: p(k) = (k+1)/(n+1) -#method=5: p(k) = (k+2/3)/(n+1/3) -#method=6: p(k) = (k+5/8)/(n+1/4) - -_KDICT = {1:lambda p, n: p * (n - 1), - 2:lambda p, n: p * n - 0.5, - 3:lambda p, n: p * n - 1, - 4:lambda p, n: p * (n + 1) - 1, - 5:lambda p, n: p * (n + 1. / 3) - 2. / 3, - 6:lambda p, n: p * (n + 1. / 4) - 5. / 8} -def _compute_qth_percentile(sorted_, q, axis, out, method): - if not np.isscalar(q): - p = [_compute_qth_percentile(sorted_, qi, axis, None, method) - for qi in q] - if out is not None: - out.flat = p - return p - - q = q / 100.0 - if (q < 0) or (q > 1): - raise ValueError, "percentile must be in the range [0,100]" - - indexer = [slice(None)] * sorted_.ndim - Nx = sorted_.shape[axis] - k_fun = _KDICT.get(method, 1) - index = np.clip(k_fun(q, Nx), 0, Nx - 1) - i = int(index) - if i == index: - indexer[axis] = slice(i, i + 1) - weights1 = np.array(1) - sumval = 1.0 - else: - indexer[axis] = slice(i, i + 2) - j = i + 1 - weights1 = np.array([(j - index), (index - i)], float) - wshape = [1] * sorted_.ndim - wshape[axis] = 2 - weights1.shape = wshape - sumval = weights1.sum() - - # Use add.reduce in both cases to coerce data type as well as - # check and use out array. - return np.add.reduce(sorted_[indexer] * weights1, axis=axis, out=out) / sumval - -def percentile(a, q, axis=None, out=None, overwrite_input=False, method=1, weights=None): - """ - Compute the qth percentile of the data along the specified axis. - - Returns the qth percentile of the array elements. - - Parameters - ---------- - a : array_like - Input array or object that can be converted to an array. - q : float in range of [0,100] (or sequence of floats) - percentile to compute which must be between 0 and 100 inclusive - axis : {None, int}, optional - Axis along which the percentiles are computed. The default (axis=None) - is to compute the median along a flattened version of the array. - out : ndarray, optional - Alternative output array in which to place the result. It must - have the same shape and buffer length as the expected output, - but the type (of the output) will be cast if necessary. - overwrite_input : {False, True}, optional - If True, then allow use of memory of input array (a) for - calculations. The input array will be modified by the call to - median. This will save memory when you do not need to preserve - the contents of the input array. Treat the input as undefined, - but it will probably be fully or partially sorted. Default is - False. Note that, if `overwrite_input` is True and the input - is not already an ndarray, an error will be raised. - method : scalar integer - defining the interpolation method. Valid options are - 1 : p[k] = k/(n-1). In this case, p[k] = mode[F(x[k])]. - This is used by S. (default) - 2 : p[k] = (k+0.5)/n. That is a piecewise linear function where - the knots are the values midway through the steps of the - empirical cdf. This is popular amongst hydrologists. - Matlab also uses this formula. - 3 : p[k] = (k+1)/n. That is, linear interpolation of the empirical cdf. - 4 : p[k] = (k+1)/(n+1). Thus p[k] = E[F(x[k])]. - This is used by Minitab and by SPSS. - 5 : p[k] = (k+2/3)/(n+1/3). Then p[k] =~ median[F(x[k])]. - The resulting quantile estimates are approximately - median-unbiased regardless of the distribution of x. - 6 : p[k] = (k+5/8)/(n+1/4). The resulting quantile estimates are - approximately unbiased for the expected order statistics - if x is normally distributed. - - Returns - ------- - pcntile : ndarray - A new array holding the result (unless `out` is specified, in - which case that array is returned instead). If the input contains - integers, or floats of smaller precision than 64, then the output - data-type is float64. Otherwise, the output data-type is the same - as that of the input. - - See Also - -------- - mean, median - - Notes - ----- - Given a vector V of length N, the qth percentile of V is the qth ranked - value in a sorted copy of V. A weighted average of the two nearest neighbors - is used if the normalized ranking does not match q exactly. - The same as the median if q is 0.5; the same as the min if q is 0; - and the same as the max if q is 1 - - Examples - -------- - >>> import wafo.kdetools as wk - >>> a = np.array([[10, 7, 4], [3, 2, 1]]) - >>> a - array([[10, 7, 4], - [ 3, 2, 1]]) - >>> wk.percentile(a, 50) - 3.5 - >>> wk.percentile(a, 50, axis=0) - array([ 6.5, 4.5, 2.5]) - >>> wk.percentile(a, 50, axis=0, weights=np.ones(2)) - array([ 6.5, 4.5, 2.5]) - >>> wk.percentile(a, 50, axis=1) - array([ 7., 2.]) - >>> wk.percentile(a, 50, axis=1, weights=np.ones(3)) - array([ 7., 2.]) - >>> m = wk.percentile(a, 50, axis=0) - >>> out = np.zeros_like(m) - >>> wk.percentile(a, 50, axis=0, out=m) - array([ 6.5, 4.5, 2.5]) - >>> m - array([ 6.5, 4.5, 2.5]) - >>> b = a.copy() - >>> wk.percentile(b, 50, axis=1, overwrite_input=True) - array([ 7., 2.]) - >>> assert not np.all(a==b) - >>> b = a.copy() - >>> wk.percentile(b, 50, axis=None, overwrite_input=True) - 3.5 - >>> np.all(a==b) - False - - """ - a = np.asarray(a) - try: - if q == 0: - return a.min(axis=axis, out=out) - elif q == 100: - return a.max(axis=axis, out=out) - except: - pass - if weights is not None: - return _compute_qth_weighted_percentile(a, q, axis, out, method, weights, overwrite_input) - elif overwrite_input: - if axis is None: - sorted_ = a.ravel() - sorted_.sort() - else: - a.sort(axis=axis) - sorted_ = a - else: - sorted_ = np.sort(a, axis=axis) - if axis is None: - axis = 0 - - return _compute_qth_percentile(sorted_, q, axis, out, method) - -def iqrange(data, axis=None): - ''' - Returns the Inter Quartile Range of data - - Parameters - ---------- - data : array-like - Input array or object that can be converted to an array. - axis : {None, int}, optional - Axis along which the percentiles are computed. The default (axis=None) - is to compute the median along a flattened version of the array. - Returns - ------- - r : array-like - abs(np.percentile(data, 75, axis)-np.percentile(data, 25, axis)) - - Notes - ----- - IQRANGE is a robust measure of spread. The use of interquartile range - guards against outliers if the distribution have heavy tails. - - Example - ------- - >>> a = np.arange(101) - >>> iqrange(a) - 50.0 - - See also - -------- - np.std - ''' - return np.abs(np.percentile(data, 75, axis=axis) - np.percentile(data, 25, axis=axis)) - -def bitget(int_type, offset): - ''' - Returns the value of the bit at the offset position in int_type. - - Example - ------- - >>> bitget(5, np.r_[0:4]) - array([1, 0, 1, 0]) - ''' - - return np.bitwise_and(int_type, 1 << offset) >> offset - - -def gridcount(data, X, y=1): - ''' - Returns D-dimensional histogram using linear binning. - - Parameters - ---------- - data = column vectors with D-dimensional data, shape D x Nd - X = row vectors defining discretization, shape D x N - Must include the range of the data. - - Returns - ------- - c = gridcount, shape N x N x ... x N - - GRIDCOUNT obtains the grid counts using linear binning. - There are 2 strategies: simple- or linear- binning. - Suppose that an observation occurs at x and that the nearest point - below and above is y and z, respectively. Then simple binning strategy - assigns a unit weight to either y or z, whichever is closer. Linear - binning, on the other hand, assigns the grid point at y with the weight - of (z-x)/(z-y) and the gridpoint at z a weight of (y-x)/(z-y). - - In terms of approximation error of using gridcounts as pdf-estimate, - linear binning is significantly more accurate than simple binning. - - NOTE: The interval [min(X);max(X)] must include the range of the data. - The order of C is permuted in the same order as - meshgrid for D==2 or D==3. - - Example - ------- - >>> import numpy as np - >>> import wafo.kdetools as wk - >>> import pylab as plb - >>> N = 20; - >>> data = np.random.rayleigh(1,N) - >>> x = np.linspace(0,max(data)+1,50) - >>> dx = x[1]-x[0] - - >>> c = wk.gridcount(data,x) - - >>> h = plb.plot(x,c,'.') # 1D histogram - >>> pdf = c/dx/N - >>> h1 = plb.plot(x, pdf) # 1D probability density plot - >>> '%1.3f' % np.trapz(pdf, x) - '1.000' - - See also - -------- - bincount, accum, kdebin - - Reference - ---------- - Wand,M.P. and Jones, M.C. (1995) - 'Kernel smoothing' - Chapman and Hall, pp 182-192 - ''' - dat = np.atleast_2d(data) - x = np.atleast_2d(X) - y = np.atleast_1d(y).ravel() - d = dat.shape[0] - d1, inc = x.shape - - if d != d1: - raise ValueError('Dimension 0 of data and X do not match.') - - dx = np.diff(x[:, :2], axis=1) - xlo = x[:, 0] - xup = x[:, -1] - - datlo = dat.min(axis=1) - datup = dat.max(axis=1) - if ((datlo < xlo) | (xup < datup)).any(): - raise ValueError('X does not include whole range of the data!') - - csiz = np.repeat(inc, d) - use_sparse = False - if use_sparse: - acfun = accumsum # faster than accum - else: - acfun = accumsum2 #accum - - binx = np.asarray(np.floor((dat - xlo[:, newaxis]) / dx), dtype=int) - w = dx.prod() - abs = np.abs #@ReservedAssignment - if d == 1: - x.shape = (-1,) - c = np.asarray((acfun(binx, (x[binx + 1] - dat) * y, size=(inc, )) + - acfun(binx+1, (dat - x[binx]) * y, size=(inc, ))) / w).ravel() - -# elif d == 2: -# b2 = binx[1] -# b1 = binx[0] -# c_ = np.c_ -# stk = np.vstack -# c = (acfun(c_[b1, b2] , abs(np.prod(stk([X[0, b1 + 1], X[1, b2 + 1]]) - dat, axis=0)), size=[inc, inc]) + -# acfun(c_[b1 + 1, b2] , abs(np.prod(stk([X[0, b1], X[1, b2 + 1]]) - dat, axis=0)), size=[inc, inc]) + -# acfun(c_[b1 , b2 + 1], abs(np.prod(stk([X[0, b1 + 1], X[1, b2]]) - dat, axis=0)), size=[inc, inc]) + -# acfun(c_[b1 + 1, b2 + 1], abs(np.prod(stk([X[0, b1], X[1, b2]]) - dat, axis=0)), size=[inc, inc])) / w -# c = c.T # make sure c is stored in the same way as meshgrid - else: # % d>2 - - Nc = csiz.prod() - c = np.zeros((Nc,)) - - fact2 = np.asarray(np.reshape(inc * np.arange(d), (d, -1)), dtype=int) - fact1 = np.asarray(np.reshape(csiz.cumprod() / inc, (d, -1)), dtype=int) - #fact1 = fact1(ones(n,1),:); - bt0 = [0, 0] - X1 = X.ravel() - for ir in xrange(2 ** (d - 1)): - bt0[0] = np.reshape(bitget(ir, np.arange(d)), (d, -1)) - bt0[1] = 1 - bt0[0] - for ix in xrange(2): - one = np.mod(ix, 2) - two = np.mod(ix + 1, 2) - # Convert to linear index - b1 = np.sum((binx + bt0[one]) * fact1, axis=0) #linear index to c - bt2 = bt0[two] + fact2 - b2 = binx + bt2 # linear index to X - c += acfun(b1, abs(np.prod(X1[b2] - dat, axis=0)) * y, size=(Nc,)) - - c = np.reshape(c/w , csiz, order='F') - - T = range(d) - T[1], T[0] = T[0], T[1] - c = c.transpose(*T) # make sure c is stored in the same way as meshgrid - return c - -def evar(y): - ''' - Noise variance estimation. - Assuming that the deterministic function Y has additive Gaussian noise, - EVAR(Y) returns an estimated variance of this noise. - - Note: - ---- - A thin-plate smoothing spline model is used to smooth Y. It is assumed - that the model whose generalized cross-validation score is minimum can - provide the variance of the additive noise. A few tests showed that - EVAR works very well with "not too irregular" functions. - - Examples: - -------- - 1D signal - >>> n = 1e6 - >>> x = np.linspace(0,100,n); - >>> y = np.cos(x/10)+(x/50) - >>> var0 = 0.02 # noise variance - >>> yn = y + sqrt(var0)*np.random.randn(*y.shape) - >>> s = evar(yn) #estimated variance - >>> np.abs(s-var0)/var0 < 3.5/np.sqrt(n) - True - - 2D function - >>> xp = np.linspace(0,1,50) - >>> x, y = np.meshgrid(xp,xp) - >>> f = np.exp(x+y) + np.sin((x-2*y)*3) - >>> var0 = 0.04 # noise variance - >>> fn = f + sqrt(var0)*np.random.randn(*f.shape) - >>> s = evar(fn) # estimated variance - >>> np.abs(s-var0)/var0 < 3.5/np.sqrt(50) - True - - 3D function - >>> yp = np.linspace(-2,2,50) - >>> [x,y,z] = meshgrid(yp,yp,yp, sparse=True) - >>> f = x*exp(-x**2-y**2-z**2) - >>> var0 = 0.5 # noise variance - >>> fn = f + sqrt(var0)*np.random.randn(*f.shape) - >>> s = evar(fn) # estimated variance - >>> np.abs(s-var0)/var0 < 3.5/np.sqrt(50) - True - - - Other example - ------------- - http://www.biomecardio.com/matlab/evar.html - - Note: - ---- - EVAR is only adapted to evenly-gridded 1-D to N-D data. - - See also - -------- - VAR, STD, SMOOTHN - ''' - - # Damien Garcia -- 2008/04, revised 2009/10 - y = np.atleast_1d(y) - d = y.ndim - sh0 = y.shape - - S = np.zeros(sh0) - sh1 = np.ones((d,)) - cos = np.cos - pi = np.pi - for i in range(d): - ni = sh0[i] - sh1[i] = ni - t = np.arange(ni).reshape(sh1)/ni - S += cos(pi*t) - sh1[i] = 1 - - S2 = 2*(d-S).ravel() - # N-D Discrete Cosine Transform of Y - dcty2 = dctn(y).ravel()**2 - def score_fun(L, S2, dcty2): - # Generalized cross validation score - M = 1-1./(1+10**L*S2) - noisevar = (dcty2*M**2).mean() - return noisevar/M.mean()**2 - #fun = lambda x : score_fun(x, S2, dcty2) - Lopt = optimize.fminbound(score_fun, -38, 38, args=(S2, dcty2)) - M = 1.0-1.0/(1+10**Lopt*S2) - noisevar = (dcty2*M**2).mean() - return noisevar - -def smoothn(data, s=None, weight=None, robust=False, z0=None, tolz=1e-3, maxiter=100, fulloutput=False): - ''' - SMOOTHN fast and robust spline smoothing for 1-D to N-D data. - - - Parameters - ---------- - data : array like - uniformly-sampled data array to smooth. Non finite values (NaN or Inf) - are treated as missing values. - s : real positive scalar - smooting parameter. The larger S is, the smoother the output will be. - Default value is automatically determined using the generalized - cross-validation (GCV) method. - weight : string or array weights - weighting array of real positive values, that must have the same size as DATA. - Note that a zero weight corresponds to a missing value. - robust : bool - If true carry out a robust smoothing that minimizes the influence of outlying data. - tolz : real positive scalar - Termination tolerance on Z (default = 1e-3) - maxiter : scalar integer - Maximum number of iterations allowed (default = 100) - z0 : array-like - Initial value for the iterative process (default = original data) - - Returns - ------- - z : array like - smoothed data - - To be made - ---------- - Estimate the confidence bands (see Wahba 1983, Nychka 1988). - - Reference - --------- - Garcia D, Robust smoothing of gridded data in one and higher dimensions - with missing values. Computational Statistics & Data Analysis, 2010. - http://www.biomecardio.com/pageshtm/publi/csda10.pdf - - Examples: - -------- - - 1-D example - >>> import matplotlib.pyplot as plt - >>> x = np.linspace(0,100,2**8) - >>> y = np.cos(x/10)+(x/50)**2 + np.random.randn(*x.shape)/10 - >>> y[np.r_[70, 75, 80]] = np.array([5.5, 5, 6]) - >>> z = smoothn(y) # Regular smoothing - >>> zr = smoothn(y,robust=True) # Robust smoothing - >>> h=plt.subplot(121), - >>> h = plt.plot(x,y,'r.',x,z,'k',linewidth=2) - >>> h=plt.title('Regular smoothing') - >>> h=plt.subplot(122) - >>> h=plt.plot(x,y,'r.',x,zr,'k',linewidth=2) - >>> h=plt.title('Robust smoothing') - - 2-D example - >>> xp = np.r_[0:1:.02] - >>> [x,y] = np.meshgrid(xp,xp) - >>> f = np.exp(x+y) + np.sin((x-2*y)*3); - >>> fn = f + np.random.randn(*f.shape)*0.5; - >>> fs = smoothn(fn); - >>> h=plt.subplot(121), - >>> h=plt.contourf(xp,xp,fn) - >>> h=plt.subplot(122) - >>> h=plt.contourf(xp,xp,fs) - - 2-D example with missing data - n = 256; - y0 = peaks(n); - y = y0 + rand(size(y0))*2; - I = randperm(n^2); - y(I(1:n^2*0.5)) = NaN; lose 1/2 of data - y(40:90,140:190) = NaN; create a hole - z = smoothn(y); smooth data - subplot(2,2,1:2), imagesc(y), axis equal off - title('Noisy corrupt data') - subplot(223), imagesc(z), axis equal off - title('Recovered data ...') - subplot(224), imagesc(y0), axis equal off - title('... compared with original data') - - 3-D example - [x,y,z] = meshgrid(-2:.2:2); - xslice = [-0.8,1]; yslice = 2; zslice = [-2,0]; - vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06; - subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic') - title('Noisy data') - v = smoothn(vn); - subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic') - title('Smoothed data') - - Cardioid - - t = linspace(0,2*pi,1000); - x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1; - y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1; - z = smoothn(complex(x,y)); - plot(x,y,'r.',real(z),imag(z),'k','linewidth',2) - axis equal tight - - Cellular vortical flow - [x,y] = meshgrid(linspace(0,1,24)); - Vx = cos(2*pi*x+pi/2).*cos(2*pi*y); - Vy = sin(2*pi*x+pi/2).*sin(2*pi*y); - Vx = Vx + sqrt(0.05)*randn(24,24); adding Gaussian noise - Vy = Vy + sqrt(0.05)*randn(24,24); adding Gaussian noise - I = randperm(numel(Vx)); - Vx(I(1:30)) = (rand(30,1)-0.5)*5; adding outliers - Vy(I(1:30)) = (rand(30,1)-0.5)*5; adding outliers - Vx(I(31:60)) = NaN; missing values - Vy(I(31:60)) = NaN; missing values - Vs = smoothn(complex(Vx,Vy),'robust'); automatic smoothing - subplot(121), quiver(x,y,Vx,Vy,2.5), axis square - title('Noisy velocity field') - subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square - title('Smoothed velocity field') - - See also SMOOTH, SMOOTH3, DCTN, IDCTN. - - -- Damien Garcia -- 2009/03, revised 2010/11 - Visit my website for more details about SMOOTHN - ''' - - y = np.atleast_1d(data) - sizy = y.shape - noe = y.size - if noe<2: - return data - - weightstr = 'bisquare' - W = np.ones(sizy) - # Smoothness parameter and weights - if weight is None: - pass - elif isinstance(weight, str): - weightstr = weight.lower() - else: - W = weight - - # Weights. Zero weights are assigned to not finite values (Inf or NaN), - # (Inf/NaN values = missing data). - IsFinite = np.isfinite(y) - nof = IsFinite.sum() # number of finite elements - W = W * IsFinite - if (W<0).any(): - raise ValueError('Weights must all be >=0') - else: - W = W/W.max() - - # Weighted or missing data? - isweighted = (W<1).any() - - - # Automatic smoothing? - isauto = s is None - # Creation of the Lambda tensor - # Lambda contains the eingenvalues of the difference matrix used in this - # penalized least squares process. - d = y.ndim - Lambda = np.zeros(sizy) - siz0 = [1,]*d - for i in range(d): - siz0[i] = sizy[i] - Lambda = Lambda + np.cos(pi*np.arange(sizy[i])/sizy[i]).reshape(siz0) - siz0[i] = 1 - - Lambda = -2*(d-Lambda) - if not isauto: - Gamma = 1./(1 + s * Lambda ** 2) - - # Upper and lower bound for the smoothness parameter - # The average leverage (h) is by definition in [0 1]. Weak smoothing occurs - # if h is close to 1, while over-smoothing appears when h is near 0. Upper - # and lower bounds for h are given to avoid under- or over-smoothing. See - # equation relating h to the smoothness parameter (Equation #12 in the - # referenced CSDA paper). - N = (np.array(sizy)!=1).sum() # tensor rank of the y-array - hMin = 1e-6; - hMax = 0.99; - sMinBnd = (((1+sqrt(1+8*hMax**(2./N)))/4./hMax**(2./N))**2-1)/16 - sMaxBnd = (((1+sqrt(1+8*hMin**(2./N)))/4./hMin**(2./N))**2-1)/16 - - # Initialize before iterating - - Wtot = W; - # Initial conditions for z - if isweighted: - # With weighted/missing data - # An initial guess is provided to ensure faster convergence. For that - # purpose, a nearest neighbor interpolation followed by a coarse - # smoothing are performed. - - if z0 is None: - z = InitialGuess(y,IsFinite) - else: - # an initial guess (z0) has been provided - z = z0 - else: - z = np.zeros(sizy) - z0 = z - y[~IsFinite] = 0 # arbitrary values for missing y-data - - tol = 1 - RobustIterativeProcess = True - RobustStep = 1; - - # Error on p. Smoothness parameter s = 10^p - errp = 0.1; - - # Relaxation factor RF: to speedup convergence - RF = 1 + 0.75 if weight is None else 1.0 - - norm = linalg.norm - # Main iterative process - while RobustIterativeProcess: - # "amount" of weights (see the function GCVscore) - aow = Wtot.sum()/noe # 0 < aow <= 1 - exitflag = True - for nit in range(1,maxiter+1): - DCTy = dctn(Wtot*(y-z)+z) - if isauto and not np.remainder(np.log2(nit),1): - - # The generalized cross-validation (GCV) method is used. - # We seek the smoothing parameter s that minimizes the GCV - # score i.e. s = Argmin(GCVscore). - # Because this process is time-consuming, it is performed from - # time to time (when nit is a power of 2) - log10s = optimize.fminbound(gcv, np.log10(sMinBnd),np.log10(sMaxBnd), args=(aow, Lambda, DCTy, y, Wtot, IsFinite, nof, noe), - xtol=errp, full_output=False, disp=False) - s = 10**log10s - Gamma = 1.0/(1+s*Lambda**2) - z = RF*idctn(Gamma*DCTy) + (1-RF)*z - - # if no weighted/missing data => tol=0 (no iteration) - tol = norm(z0.ravel()-z.ravel())/norm(z.ravel()) if isweighted else 0.0 - if tol<=tolz: - break - z0 = z # re-initialization - else: - exitflag = False #nit0.9: # aow = 1 means that all of the data are equally weighted - # very much faster: does not require any inverse DCT - RSS = linalg.norm(DCTy.ravel()*(Gamma.ravel()-1))**2 - else: - # take account of the weights to calculate RSS: - yhat = idctn(Gamma*DCTy) - RSS = linalg.norm(sqrt(Wtot[IsFinite])*(y[IsFinite]-yhat[IsFinite]))**2 - #end - - TrH = Gamma.sum() - GCVscore = RSS/nof/(1.0-TrH/noe)**2 - return GCVscore - - -# Robust weights -def RobustWeights(r,I,h,wstr): - #weights for robust smoothing. - MAD = np.median(abs(r[I]-np.median(r[I]))) # median absolute deviation - u = abs(r/(1.4826*MAD)/sqrt(1-h)) # studentized residuals - if wstr == 'cauchy': - c = 2.385; - W = 1./(1+(u/c)**2) # Cauchy weights - elif wstr=='talworth': - c = 2.795 - W = u L2 = 0.5 reasonable - - tkde = TKDE(data, L2=0.5) - ft = tkde(x, output='plot', title='Transformation KDE (hs=%g)' % tkde.tkde.hs) - plt.figure(1) - ft.plot() - - plt.plot(x, st.rayleigh.pdf(x, scale=1), ':') - - plt.figure(0) - -def kde_demo3(): - '''Demonstrate the difference between transformation and ordinary-KDE in 2D - - KDEDEMO3 shows that the transformation KDE is a better estimate for - Rayleigh distributed data around 0 than the ordinary KDE. - ''' - import scipy.stats as st - data = st.rayleigh.rvs(scale=1, size=(2,300)) - - #x = np.linspace(1.5e-3, 5, 55) - - kde = KDE(data) - f = kde(output='plot', title='Ordinary KDE', plotflag=1) - plt.figure(0) - f.plot() - - plt.plot(data[0], data[1], '.') - - #plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable - - tkde = TKDE(data, L2=0.5) - ft = tkde.eval_grid_fast(output='plot', title='Transformation KDE', plotflag=1) - - - plt.figure(1) - ft.plot() - - plt.plot(data[0],data[1], '.') - - plt.figure(0) - - - -def kde_demo4(N=50): - '''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior - for 1D multimodal distributions - - KDEDEMO4 shows that the improved Sheather-Jones plug-in smoothing is a better - compared to normal reference rules (in this case the hns) - ''' - import scipy.stats as st - - data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(N,)), - st.norm.rvs(loc=-5, scale=1, size=(N,)))) - - #x = np.linspace(1.5e-3, 5, 55) - - kde = KDE(data, kernel=Kernel('gauss', 'hns')) - f = kde(output='plot', title='Ordinary KDE', plotflag=1) - - - - kde1 = KDE(data, kernel=Kernel('gauss', 'hisj')) - f1 = kde1(output='plot', label='Ordinary KDE', plotflag=1) - - plt.figure(0) - f.plot('r', label='hns=%g' % kde.hs) - #plt.figure(2) - f1.plot('b', label='hisj=%g' % kde1.hs) - x = np.linspace(-4,4) - for loc in [-5,5]: - plt.plot(x + loc, st.norm.pdf(x, 0, scale=1)/2, 'k:', label='True density') - plt.legend() - -def kde_demo5(N=500): - '''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior - for 2D multimodal distributions - - KDEDEMO5 shows that the improved Sheather-Jones plug-in smoothing is better - compared to normal reference rules (in this case the hns) - ''' - import scipy.stats as st - - data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(2,N,)), - st.norm.rvs(loc=-5, scale=1, size=(2,N,)))) - kde = KDE(data, kernel=Kernel('gauss', 'hns')) - f = kde(output='plot', title='Ordinary KDE (hns=%g %g)' % tuple(kde.hs.tolist()), plotflag=1) - - kde1 = KDE(data, kernel=Kernel('gauss', 'hisj')) - f1 = kde1(output='plot', title='Ordinary KDE (hisj=%g %g)' % tuple(kde1.hs.tolist()), plotflag=1) - - plt.figure(0) - plt.clf() - f.plot() - plt.plot(data[0], data[1], '.') - plt.figure(1) - plt.clf() - f1.plot() - plt.plot(data[0], data[1], '.') - -def kreg_demo1(hs=None, fast=False, fun='hisj'): - ''' - - ''' - N = 100 - #ei = np.random.normal(loc=0, scale=0.075, size=(N,)) - ei = np.array([-0.08508516, 0.10462496, 0.07694448, -0.03080661, 0.05777525, - 0.06096313, -0.16572389, 0.01838912, -0.06251845, -0.09186784, - -0.04304887, -0.13365788, -0.0185279 , -0.07289167, 0.02319097, - 0.06887854, -0.08938374, -0.15181813, 0.03307712, 0.08523183, - -0.0378058 , -0.06312874, 0.01485772, 0.06307944, -0.0632959 , - 0.18963205, 0.0369126 , -0.01485447, 0.04037722, 0.0085057 , - -0.06912903, 0.02073998, 0.1174351 , 0.17599277, -0.06842139, - 0.12587608, 0.07698113, -0.0032394 , -0.12045792, -0.03132877, - 0.05047314, 0.02013453, 0.04080741, 0.00158392, 0.10237899, - -0.09069682, 0.09242174, -0.15445323, 0.09190278, 0.07138498, - 0.03002497, 0.02495252, 0.01286942, 0.06449978, 0.03031802, - 0.11754861, -0.02322272, 0.00455867, -0.02132251, 0.09119446, - -0.03210086, -0.06509545, 0.07306443, 0.04330647, 0.078111 , - -0.04146907, 0.05705476, 0.02492201, -0.03200572, -0.02859788, - -0.05893749, 0.00089538, 0.0432551 , 0.04001474, 0.04888828, - -0.17708392, 0.16478644, 0.1171006 , 0.11664846, 0.01410477, - -0.12458953, -0.11692081, 0.0413047 , -0.09292439, -0.07042327, - 0.14119701, -0.05114335, 0.04994696, -0.09520663, 0.04829406, - -0.01603065, -0.1933216 , 0.19352763, 0.11819496, 0.04567619, - -0.08348306, 0.00812816, -0.00908206, 0.14528945, 0.02901065]) - x = np.linspace(0, 1, N) - - - y0 = 2*np.exp(-x**2/(2*0.3**2))+3*np.exp(-(x-1)**2/(2*0.7**2)) - y = y0 + ei - kernel = Kernel('gauss',fun=fun) - hopt = kernel.hisj(x) - kreg = KRegression(x, y, p=0, hs=hs, kernel=kernel, xmin=-2*hopt, xmax=1+2*hopt) - if fast: - kreg.__call__ = kreg.eval_grid_fast - - f = kreg(output='plot', title='Kernel regression', plotflag=1) - plt.figure(0) - f.plot(label='p=0') - - - kreg.p=1 - f1 = kreg(output='plot', title='Kernel regression', plotflag=1) - f1.plot(label='p=1') - #print(f1.data) - plt.plot(x, y, '.', label='data') - plt.plot(x, y0, 'k', label='True model') - plt.legend() - - plt.show() - - print(kreg.tkde.tkde.inv_hs) - print(kreg.tkde.tkde.hs) - -_REALMIN = np.finfo(float).machar.xmin -_REALMAX = np.finfo(float).machar.xmax -_EPS = np.finfo(float).eps -def _logit(p): - pc = p.clip(min=0, max=1) - return (np.log(pc)-np.log1p(-pc)).clip(min=-40,max=40) -def _logitinv(x): - return 1.0/(np.exp(-x)+1) - - -def _get_data(n=100, symmetric=False, loc1=1.1, scale1=0.6, scale2=1.0): - import scipy.stats as st - #from sg_filter import SavitzkyGolay - dist = st.norm - - - norm1 = scale2*(dist.pdf(-loc1, loc=-loc1, scale=scale1) + dist.pdf(-loc1, loc=loc1, scale=scale1)) - fun1 = lambda x : ((dist.pdf(x, loc=-loc1, scale=scale1) + dist.pdf(x, loc=loc1, scale=scale1))/norm1).clip(max=1.0) - - x = np.sort(6*np.random.rand(n,1)-3, axis=0) - - y = (fun1(x)>np.random.rand(n, 1)).ravel() - #y = (np.cos(x)>2*np.random.rand(n, 1)-1).ravel() - x = x.ravel() - - if symmetric: - xi = np.hstack((x.ravel(),-x.ravel())) - yi = np.hstack((y, y)) - i = np.argsort(xi) - x = xi[i] - y = yi[i] - return x, y, fun1 - - - - -def kreg_demo2(n=100, hs=None, symmetric=False, fun='hisj', plotlog=False): - x,y, fun1 = _get_data(n, symmetric) - kreg_demo3(x,y,fun1, hs=None, fun='hisj', plotlog=False) - -def kreg_demo3(x,y, fun1, hs=None, fun='hisj', plotlog=False): - st = stats - - alpha=0.1 - z0 = -_invnorm(alpha/2) - - - n = x.size - hopt, hs1, hs2 =_get_regression_smooting(x,y,fun='hos') - if hs is None: - hs = hopt - - forward = _logit - reverse = _logitinv - #forward = np.log - #reverse = np.exp - - xmin, xmax = x.min(), x.max() - ni = max(2*int((xmax-xmin)/hopt)+3,5) - print(ni) - print(xmin, xmax) - sml = hopt*0.1 - xi = np.linspace(xmin-sml,xmax+sml, ni) - xiii = np.linspace(xmin-sml,xmax+sml, 4*ni+1) - - c = gridcount(x, xi) - if (y==True).any(): - c0 = gridcount(x[y==True],xi) - else: - c0 = np.zeros(xi.shape) - yi = np.where(c==0, 0, c0/c) - - kreg = KRegression(x, y, hs=hs, p=0) - fiii = kreg(xiii) - yiii = interpolate.interp1d(xi, yi)(xiii) - fit = fun1(xiii).clip(max=1.0) - df = np.diff(fiii) - eerr = np.abs((yiii-fiii)).std()+ 0.5*(df[:-1]*df[1:]<0).sum()/n - err = (fiii-fit).std() - f = kreg(xiii,output='plotobj', title='%s err=%1.3f,eerr=%1.3f, n=%d, hs=%1.3f, hs1=%1.3f, hs2=%1.3f' % (fun,err,eerr,n,hs, hs1,hs2), plotflag=1) - - - #yi[yi==0] = 1.0/(c[c!=0].min()+4) - #yi[yi==1] = 1-1.0/(c[c!=0].min()+4) - #yi[yi==0] = fi[yi==0] - - #yi[yi==0] = np.exp(stineman_interp(xi[yi==0], xi[yi>0],np.log(yi[yi>0]))) - #yi[yi==0] = fun1(xi[yi==0]) - try: - yi[yi==0] = yi[yi>0].min()/sqrt(n) - except: - yi[yi==0] = 1./n - yi[yi==1] =1-(1-yi[yi<1].max())/sqrt(n) - - logity = forward(yi) - - - gkreg = KRegression(xi, logity, hs=hs, xmin=xmin-hopt,xmax=xmax+hopt) - fg = gkreg.eval_grid(xi,output='plotobj', title='Kernel regression', plotflag=1) - sa = (fg.data-logity).std() - sa2 = iqrange(fg.data-logity) / 1.349 - #print('sa=%g %g' % (sa, sa2)) - sa = min(sa,sa2) - -# plt.figure(1) -# plt.plot(xi, slogity-logity,'r.') -# #plt.plot(xi, logity-,'b.') -# plt.plot(xi, fg.data-logity, 'b.') -# plt.show() -# return - - - fg = gkreg.eval_grid(xiii,output='plotobj', title='Kernel regression', plotflag=1) - pi = reverse(fg.data) - - dx = xi[1]-xi[0] - ckreg = KDE(x,hs=hs) - #ci = ckreg.eval_grid_fast(xi)*n*dx - ciii = ckreg.eval_grid_fast(xiii)*dx* x.size #n*(1+symmetric) - -# sa1 = np.sqrt(1./(ciii*pi*(1-pi))) -# plo3 = reverse(fg.data-z0*sa) -# pup3 = reverse(fg.data+z0*sa) - fg.data = pi - pi = f.data - - - # ref Casella and Berger (1990) "Statistical inference" pp444 -# a = 2*pi + z0**2/(ciii+1e-16) -# b = 2*(1+z0**2/(ciii+1e-16)) -# plo2 = ((a-sqrt(a**2-2*pi**2*b))/b).clip(min=0,max=1) -# pup2 = ((a+sqrt(a**2-2*pi**2*b))/b).clip(min=0,max=1) - - # Jeffreys intervall a=b=0.5 - #st.beta.isf(alpha/2, x+a, n-x+b) - ab = 0.07 #0.055 - pi1 = pi #fun1(xiii) - pup2 = np.where(pi==1, 1, st.beta.isf(alpha/2, ciii*pi1+ab, ciii*(1-pi1)+ab)) - plo2 = np.where(pi==0, 0, st.beta.isf(1-alpha/2, ciii*pi1+ab, ciii*(1-pi1)+ab)) - - averr = np.trapz(pup2-plo2, xiii)/(xiii[-1]-xiii[0]) + 0.5*(df[:-1]*df[1:]<0).sum() - - #f2 = kreg_demo4(x, y, hs, hopt) - # Wilson score - den = 1+(z0**2./ciii); - xc=(pi1+(z0**2)/(2*ciii))/den; - halfwidth=(z0*sqrt((pi1*(1-pi1)/ciii)+(z0**2/(4*(ciii**2)))))/den - plo = (xc-halfwidth).clip(min=0) # wilson score - pup = (xc+halfwidth).clip(max=1.0) # wilson score - #pup = (pi + z0*np.sqrt(pi*(1-pi)/ciii)).clip(min=0,max=1) # dont use - #plo = (pi - z0*np.sqrt(pi*(1-pi)/ciii)).clip(min=0,max=1) - - #mi = kreg.eval_grid(x) - #sigma = (stineman_interp(x, xiii, pup)-stineman_interp(x, xiii, plo))/4 - #aic = np.abs((y-mi)/sigma).std()+ 0.5*(df[:-1]*df[1:]<0).sum()/n - #aic = np.abs((yiii-fiii)/(pup-plo)).std()+ 0.5*(df[:-1]*df[1:]<0).sum() + ((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum() - - k = (df[:-1]*df[1:]<0).sum() # numpeaks - sigmai = (pup-plo) - aic = (((yiii-fiii)/sigmai)**2).sum()+ 2*k*(k+1)/np.maximum(ni-k+1,1) + np.abs((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum() - - #aic = (((yiii-fiii)/sigmai)**2).sum()+ 2*k*(k+1)/(ni-k+1) + np.abs((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum() - - #aic = averr + ((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum() - - fg.plot(label='KReg grid aic=%2.3f' % (aic) ) - f.plot(label='KReg averr=%2.3f ' %(averr)) - labtxt = '%d CI' % (int(100*(1-alpha))) - plt.fill_between(xiii, pup, plo, alpha=0.20,color='r', linestyle='--', label=labtxt) - plt.fill_between(xiii, pup2, plo2,alpha = 0.20, color='b', linestyle=':',label='%d CI2' % (int(100*(1-alpha)))) - plt.plot(xiii, fun1(xiii), 'r', label='True model') - plt.scatter(xi,yi, label='data') - print('maxp = %g' % (np.nanmax(f.data))) - print('hs = %g' %( kreg.tkde.tkde.hs)) - plt.legend() - h = plt.gca() - if plotlog: - plt.setp(h,yscale='log') - #plt.show() - return hs1, hs2 - - - -def kreg_demo4(x,y, hs, hopt, alpha=0.05): - st = stats - - n = x.size - xmin, xmax = x.min(), x.max() - ni = max(2*int((xmax-xmin)/hopt)+3,5) - - sml = hopt*0.1 - xi = np.linspace(xmin-sml,xmax+sml, ni) - xiii = np.linspace(xmin-sml,xmax+sml, 4*ni+1) - - kreg = KRegression(x, y, hs=hs, p=0) - - - dx = xi[1]-xi[0] - ciii = kreg.tkde.eval_grid_fast(xiii) * dx * x.size -# ckreg = KDE(x,hs=hs) -# ciiii = ckreg.eval_grid_fast(xiii)*dx* x.size #n*(1+symmetric) - - f = kreg(xiii, output='plotobj') #, plot_kwds=dict(plotflag=7)) - pi = f.data - - # Jeffreys intervall a=b=0.5 - #st.beta.isf(alpha/2, x+a, n-x+b) - ab = 0.07 #0.5 - pi1 = pi - pup = np.where(pi1==1, 1, st.beta.isf(alpha/2, ciii*pi1+ab, ciii*(1-pi1)+ab)) - plo = np.where(pi1==0, 0, st.beta.isf(1-alpha/2, ciii*pi1+ab, ciii*(1-pi1)+ab)) - - - # Wilson score - # z0 = -_invnorm(alpha/2) -# den = 1+(z0**2./ciii); -# xc=(pi1+(z0**2)/(2*ciii))/den; -# halfwidth=(z0*sqrt((pi1*(1-pi1)/ciii)+(z0**2/(4*(ciii**2)))))/den -# plo2 = (xc-halfwidth).clip(min=0) # wilson score -# pup2 = (xc+halfwidth).clip(max=1.0) # wilson score - - #f.dataCI = np.vstack((plo,pup)).T - f.prediction_error_avg = np.trapz(pup-plo, xiii)/(xiii[-1]-xiii[0]) - fiii = f.data - - c = gridcount(x, xi) - if (y==True).any(): - c0 = gridcount(x[y==True],xi) - else: - c0 = np.zeros(xi.shape) - yi = np.where(c==0, 0, c0/c) - - f.children = [PlotData([plo, pup],xiii,plotmethod='fill_between',plot_kwds=dict(alpha=0.2, color='r')), - PlotData(yi,xi,plotmethod='scatter', plot_kwds=dict(color='r', s=5))] - - yiii = interpolate.interp1d(xi, yi)(xiii) - df = np.diff(fiii) - k = (df[:-1]*df[1:]<0).sum() # numpeaks - sigmai = (pup-plo) - aicc = (((yiii-fiii)/sigmai)**2).sum()+ 2*k*(k+1)/np.maximum(ni-k+1,1) + np.abs((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum() - - f.aicc = aicc - f.labels.title='perr=%1.3f,aicc=%1.3f, n=%d, hs=%1.3f' % (f.prediction_error_avg,aicc,n,hs) - - return f - -def check_kreg_demo3(): - - - plt.ion() - k = 0 - for n in [50, 100,300,600, 4000]: - x,y, fun1 = _get_data(n, symmetric=True,loc1=1.0, scale1=0.6, scale2=1.25) - k0 = k - - for fun in ['hste', ]: - hsmax, hs1, hs2 =_get_regression_smooting(x,y,fun=fun) #@UnusedVariable - for hi in np.linspace(hsmax*0.25,hsmax,9): - plt.figure(k) - k +=1 - unused = kreg_demo3(x,y,fun1, hs=hi, fun=fun, plotlog=False) - - #kreg_demo2(n=n,symmetric=True,fun='hste', plotlog=False) - fig.tile(range(k0,k)) - plt.ioff() - plt.show() - -def check_kreg_demo4(): - plt.ion() - #test_docstrings() - #kde_demo2() - #kreg_demo1(fast=True) - #kde_gauss_demo() - #kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True) - k = 0 - for i, n in enumerate([100,300,600,4000]): #@UnusedVariable - x,y, fun1 = _get_data(n, symmetric=True,loc1=0.1, scale1=0.6, scale2=0.75) - #k0 = k - hopt1, _h1, _h2 = _get_regression_smooting(x,y,fun='hos') - hopt2, _h1, _h2 = _get_regression_smooting(x,y,fun='hste') - hopt = sqrt(hopt1*hopt2) - #hopt = _get_regression_smooting(x,y,fun='hos')[0] - for j, fun in enumerate(['hste']): # , 'hisj', 'hns', 'hstt' @UnusedVariable - hsmax, _hs1, _hs2 =_get_regression_smooting(x,y,fun=fun) - - fmax = kreg_demo4(x, y, hsmax+0.1, hopt) - for hi in np.linspace(hsmax*0.1,hsmax,55): - f = kreg_demo4(x, y, hi, hopt) - if f.aicc<=fmax.aicc: - fmax = f - plt.figure(k) - k +=1 - fmax.plot() - plt.plot(x, fun1(x),'r') - - #kreg_demo2(n=n,symmetric=True,fun='hste', plotlog=False) - fig.tile(range(0,k)) - plt.ioff() - plt.show() - -def check_regression_bin(): - plt.ion() - #test_docstrings() - #kde_demo2() - #kreg_demo1(fast=True) - #kde_gauss_demo() - #kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True) - k = 0 - for i, n in enumerate([100,300,600,4000]): #@UnusedVariable - x,y, fun1 = _get_data(n, symmetric=True,loc1=0.1, scale1=0.6, scale2=0.75) - fbest = regressionbin(x, y, alpha=0.05, color='g', label='Transit_D') - - figk = plt.figure(k) - ax = figk.gca() - k +=1 - fbest.plot(axis=ax) - ax.plot(x, fun1(x),'r') - ax.legend(frameon=False, markerscale=4) - #ax = plt.gca() - ax.set_yticklabels(ax.get_yticks()*100.0) - ax.grid(True) - - fig.tile(range(0,k)) - plt.ioff() - plt.show() - -def check_bkregression(): - plt.ion() - k = 0 - for i, n in enumerate([50, 100,300,600]): #@UnusedVariable - x,y, fun1 = _get_data(n, symmetric=True,loc1=0.1, scale1=0.6, scale2=0.75) - bkreg = BKRegression(x,y) - fbest = bkreg.prb_search_best(hsfun='hste', alpha=0.05, color='g', label='Transit_D') - - - figk = plt.figure(k) - ax = figk.gca() - k +=1 -# fbest.score.plot(axis=ax) -# axsize = ax.axis() -# ax.vlines(fbest.hs,axsize[2]+1,axsize[3]) -# ax.set(yscale='log') - fbest.plot(axis=ax) - ax.plot(x, fun1(x),'r') - ax.legend(frameon=False, markerscale=4) - #ax = plt.gca() - ax.set_yticklabels(ax.get_yticks()*100.0) - ax.grid(True) - - fig.tile(range(0,k)) - plt.ioff() - plt.show() - -def _get_regression_smooting(x,y,fun='hste'): - hs1 = Kernel('gauss', fun=fun).get_smoothing(x) - #hx = np.median(np.abs(x-np.median(x)))/0.6745*(4.0/(3*n))**0.2 - if (y==True).any(): - hs2 = Kernel('gauss', fun=fun).get_smoothing(x[y==True]) - #hy = np.median(np.abs(y-np.mean(y)))/0.6745*(4.0/(3*n))**0.2 - else: - hs2 = 4*hs1 - #hy = 4*hx - - #hy2 = Kernel('gauss', fun=fun).get_smoothing(y) - #kernel = Kernel('gauss',fun=fun) - #hopt = (hs1+2*hs2)/3 - #hopt = (hs1+4*hs2)/5 #kernel.get_smoothing(x) - #hopt = hs2 - hopt = sqrt(hs1*hs2) - return hopt, hs1, hs2 - -def empirical_bin_prb(x,y, hopt, color='r'): - ''' - Returns empirical binomial probabiltity - - Parameters - ---------- - x : ndarray - position ve - y : ndarray - binomial response variable (zeros and ones) - - Returns - ------- - P(x) : PlotData object - empirical probability - ''' - xmin, xmax = x.min(), x.max() - ni = max(2*int((xmax-xmin)/hopt)+3,5) - - sml = hopt #*0.1 - xi = np.linspace(xmin-sml,xmax+sml, ni) - - c = gridcount(x, xi) - if (y==True).any(): - c0 = gridcount(x[y==True],xi) - else: - c0 = np.zeros(xi.shape) - yi = np.where(c==0, 0, c0/c) - return PlotData(yi,xi, plotmethod='scatter', plot_kwds=dict(color=color, s=5)) - -def smoothed_bin_prb(x,y, hs, hopt, alpha=0.05, color='r', label='', bin_prb=None): - ''' - Parameters - ---------- - x,y - hs : smoothing parameter - hopt : spacing in empirical_bin_prb - alpha : confidence level - color : color of plot object - bin_prb : PlotData object with empirical bin prb - ''' - if bin_prb is None: - bin_prb = empirical_bin_prb(x, y, hopt, color) - - xi = bin_prb.args - yi = bin_prb.data - ni = len(xi) - dxi = xi[1]-xi[0] - - n = x.size - - xiii = np.linspace(xi[0],xi[-1], 10*ni+1) - - kreg = KRegression(x, y, hs=hs, p=0) - ciii = kreg.tkde.eval_grid_fast(xiii) * dxi * n # expected number of data in each bin - - f = kreg(xiii, output='plotobj') #, plot_kwds=dict(plotflag=7)) - pi = f.data - - st = stats - # Jeffreys intervall a=b=0.5 - #st.beta.isf(alpha/2, x+a, n-x+b) - ab = 0.07 #0.5 - pi1 = pi - pup = np.where(pi1==1, 1, st.beta.isf(alpha/2, ciii*pi1+ab, ciii*(1-pi1)+ab)) - plo = np.where(pi1==0, 0, st.beta.isf(1-alpha/2, ciii*pi1+ab, ciii*(1-pi1)+ab)) - - - # Wilson score - # z0 = -_invnorm(alpha/2) -# den = 1+(z0**2./ciii); -# xc=(pi1+(z0**2)/(2*ciii))/den; -# halfwidth=(z0*sqrt((pi1*(1-pi1)/ciii)+(z0**2/(4*(ciii**2)))))/den -# plo2 = (xc-halfwidth).clip(min=0) # wilson score -# pup2 = (xc+halfwidth).clip(max=1.0) # wilson score - - #f.dataCI = np.vstack((plo,pup)).T - f.prediction_error_avg = np.trapz(pup-plo, xiii)/(xiii[-1]-xiii[0]) - fiii = f.data - - f.plot_kwds['color'] = color - f.plot_kwds['linewidth']=2 - if label: - f.plot_kwds['label'] = label - f.children = [PlotData([plo, pup],xiii,plotmethod='fill_between',plot_kwds=dict(alpha=0.2, color=color)), - bin_prb] - - yiii = interpolate.interp1d(xi, yi)(xiii) - df = np.diff(fiii) - k = (df[:-1]*df[1:]<0).sum() # numpeaks - sigmai = (pup-plo) - aicc = (((yiii-fiii)/sigmai)**2).sum()+ 2*k*(k+1)/np.maximum(ni-k+1,1) + np.abs((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum() - - f.aicc = aicc - f.fun = kreg - f.labels.title='perr=%1.3f,aicc=%1.3f, n=%d, hs=%1.3f' % (f.prediction_error_avg,aicc,n,hs) - - return f - -def regressionbin(x,y, alpha=0.05, color='r', label=''): - ''' - Return kernel regression estimate for binomial data - - Parameters - ---------- - x : arraylike - positions - y : arraylike - of 0 and 1 - ''' - hopt1, h1,h2 = _get_regression_smooting(x,y,fun='hos') #@UnusedVariable - hopt2, h1,h2 = _get_regression_smooting(x,y,fun='hste') #@UnusedVariable - hopt = sqrt(hopt1*hopt2) - - fbest = smoothed_bin_prb(x, y, hopt2+0.1, hopt, alpha, color, label) - bin_prb = fbest.children[-1] - for fun in ['hste']: # , 'hisj', 'hns', 'hstt' - hsmax, hs1, hs2 =_get_regression_smooting(x,y,fun=fun) #@UnusedVariable - for hi in np.linspace(hsmax*0.1,hsmax,55): - f = smoothed_bin_prb(x, y, hi, hopt, alpha, color, label, bin_prb) - if f.aicc<=fbest.aicc: - fbest = f - #hbest = hi - return fbest -def kde_gauss_demo(n=50): - ''' - KDEDEMO Demonstrate the KDEgauss - - KDEDEMO1 shows the true density (dotted) compared to KDE based on 7 - observations (solid) and their individual kernels (dashed) for 3 - different values of the smoothing parameter, hs. - ''' - - st = stats - #x = np.linspace(-4, 4, 101) - #data = np.random.normal(loc=0, scale=1.0, size=n) - #data = np.random.exponential(scale=1.0, size=n) -# n1 = 128 -# I = (np.arange(n1)*pi)**2 *0.01*0.5 -# kw = exp(-I) -# plt.plot(idctn(kw)) -# return - #dist = st.norm - dist = st.expon - data = dist.rvs(loc=0, scale=1.0, size=n) - d, N = np.atleast_2d(data).shape #@UnusedVariable - - if d==1: - plot_options = [dict(color='red'), dict(color='green'), dict(color='black')] - else: - plot_options = [dict(colors='red'), dict(colors='green'), dict(colors='black')] - - plt.figure(1) - kde0 = KDE(data, kernel=Kernel('gauss', 'hste')) - f0 = kde0.eval_grid_fast(output='plot', ylab='Density') - f0.plot(**plot_options[0]) - - kde1 = TKDE(data, kernel=Kernel('gauss', 'hisj'), L2=.5) - f1 = kde1.eval_grid_fast(output='plot', ylab='Density') - f1.plot(**plot_options[1]) - - kde2 = KDEgauss(data) - f2 = kde2(output='plot', ylab='Density') - x = f2.args - f2.plot(**plot_options[2]) - - fmax = dist.pdf(x, 0, 1).max() - if d==1: - plt.plot(x, dist.pdf(x, 0, 1), 'k:') - plt.axis([x.min(), x.max(), 0, fmax]) - plt.show() - print(fmax/f2.data.max()) - format_ = ''.join(('%g, ')*d) - format_ = 'hs0=%s hs1=%s hs2=%s' % (format_, format_, format_) - print(format_ % tuple(kde0.hs.tolist()+kde1.tkde.hs.tolist()+kde2.hs.tolist())) - print('inc0 = %d, inc1 = %d, inc2 = %d' % (kde0.inc, kde1.inc,kde2.inc)) - - -def test_docstrings(): - import doctest - doctest.testmod() - -if __name__ == '__main__': - test_docstrings() - - #check_bkregression() - #check_regression_bin() - #check_kreg_demo3() - #check_kreg_demo4() - - - #test_smoothn_2d() - #test_smoothn_cardioid() - - - #kde_demo2() - #kreg_demo1(fast=True) - #kde_gauss_demo() - #kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True) +#------------------------------------------------------------------------- +# Name: kdetools +# Purpose: +# +# Author: pab +# +# Created: 01.11.2008 +# Copyright: (c) pab 2008 +# Licence: LGPL +#------------------------------------------------------------------------- +#!/usr/bin/env python +from __future__ import division +import copy +import numpy as np +import scipy +import warnings +from itertools import product +from scipy import interpolate, linalg, optimize, sparse, special, stats +from scipy.special import gamma +from scipy.ndimage.morphology import distance_transform_edt +from numpy import pi, sqrt, atleast_2d, exp, newaxis # @UnresolvedImport + +from wafo.misc import meshgrid, nextpow2, tranproc # , trangood +from wafo.wafodata import PlotData +from wafo.dctpack import dct, dctn, idctn +from wafo.plotbackend import plotbackend as plt +try: + from wafo import fig +except ImportError: + print 'fig import only supported on Windows' + +_TINY = np.finfo(float).machar.tiny + + +def _invnorm(q): + return special.ndtri(q) + +_stats_epan = (1. / 5, 3. / 5, np.inf) +_stats_biwe = (1. / 7, 5. / 7, 45. / 2) +_stats_triw = (1. / 9, 350. / 429, np.inf) +_stats_rect = (1. / 3, 1. / 2, np.inf) +_stats_tria = (1. / 6, 2. / 3, np.inf) +_stats_lapl = (2, 1. / 4, np.inf) +_stats_logi = (pi ** 2 / 3, 1. / 6, 1 / 42) +_stats_gaus = (1, 1. / (2 * sqrt(pi)), 3. / (8 * sqrt(pi))) + +__all__ = ['sphere_volume', 'TKDE', 'KDE', 'Kernel', 'accum', 'qlevels', + 'iqrange', 'gridcount', 'kde_demo1', 'kde_demo2', 'test_docstrings'] + + +def sphere_volume(d, r=1.0): + """ + Returns volume of d-dimensional sphere with radius r + + Parameters + ---------- + d : scalar or array_like + dimension of sphere + r : scalar or array_like + radius of sphere (default 1) + + Example + ------- + >>> sphere_volume(2., r=2.) + 12.566370614359172 + >>> sphere_volume(2., r=1.) + 3.1415926535897931 + + Reference + --------- + Wand,M.P. and Jones, M.C. (1995) + 'Kernel smoothing' + Chapman and Hall, pp 105 + """ + return (r ** d) * 2.0 * pi ** (d / 2.0) / (d * gamma(d / 2.0)) + + +class KDEgauss(object): + + """ Kernel-Density Estimator base class. + + Parameters + ---------- + data : (# of dims, # of data)-array + datapoints to estimate from + hs : array-like (optional) + smooting parameter vector/matrix. + (default compute from data using kernel.get_smoothing function) + alpha : real scalar (optional) + sensitivity parameter (default 0 regular KDE) + A good choice might be alpha = 0.5 ( or 1/D) + alpha = 0 Regular KDE (hs is constant) + 0 < alpha <= 1 Adaptive KDE (Make hs change) + + + Members + ------- + d : int + number of dimensions + n : int + number of datapoints + + Methods + ------- + kde.eval_grid_fast(x0, x1,..., xd) : array + evaluate the estimated pdf on meshgrid(x0, x1,..., xd) + kde(x0, x1,..., xd) : array + same as kde.eval_grid_fast(x0, x1,..., xd) + """ + + def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, + xmax=None, inc=512): + self.dataset = atleast_2d(data) + self.hs = hs + self.kernel = kernel if kernel else Kernel('gauss') + self.alpha = alpha + self.xmin = xmin + self.xmax = xmax + self.inc = inc + self.initialize() + + def initialize(self): + self.d, self.n = self.dataset.shape + self._set_xlimits() + self._initialize() + + def _initialize(self): + self._compute_smoothing() + + def _compute_smoothing(self): + """Computes the smoothing matrix + """ + get_smoothing = self.kernel.get_smoothing + h = self.hs + if h is None: + h = get_smoothing(self.dataset) + h = np.atleast_1d(h) + hsiz = h.shape + + if (len(hsiz) == 1) or (self.d == 1): + if max(hsiz) == 1: + h = h * np.ones(self.d) + else: + h.shape = (self.d,) # make sure it has the correct dimension + + # If h negative calculate automatic values + ind, = np.where(h <= 0) + for i in ind.tolist(): + h[i] = get_smoothing(self.dataset[i]) + deth = h.prod() + self.inv_hs = np.diag(1.0 / h) + else: # fully general smoothing matrix + deth = linalg.det(h) + if deth <= 0: + raise ValueError( + 'bandwidth matrix h must be positive definit!') + self.inv_hs = linalg.inv(h) + self.hs = h + self._norm_factor = deth * self.n + + def _set_xlimits(self): + amin = self.dataset.min(axis=-1) + amax = self.dataset.max(axis=-1) + iqr = iqrange(self.dataset, axis=-1) + sigma = np.minimum(np.std(self.dataset, axis=-1, ddof=1), iqr / 1.34) + #xyzrange = amax - amin + #offset = xyzrange / 4.0 + offset = 2 * sigma + if self.xmin is None: + self.xmin = amin - offset + else: + self.xmin = self.xmin * np.ones((self.d, 1)) + if self.xmax is None: + self.xmax = amax + offset + else: + self.xmax = self.xmax * np.ones((self.d, 1)) + + def eval_grid_fast(self, *args, **kwds): + """Evaluate the estimated pdf on a grid. + + Parameters + ---------- + arg_0,arg_1,... arg_d-1 : vectors + Alternatively, if no vectors is passed in then + arg_i = linspace(self.xmin[i], self.xmax[i], self.inc) + output : string optional + 'value' if value output + 'data' if object output + + Returns + ------- + values : array-like + The values evaluated at meshgrid(*args). + + """ + if len(args) == 0: + args = [] + for i in range(self.d): + args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc)) + self.args = args + return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds) + + def _eval_grid_fast(self, *args, **kwds): + X = np.vstack(args) + d, inc = X.shape + #dx = X[:, 1] - X[:, 0] + R = X.max(axis=-1) - X.min(axis=-1) + + t_star = (self.hs / R) ** 2 + I = (np.asfarray(np.arange(0, inc)) * pi) ** 2 + In = [] + + for i in range(d): + In.append(I * t_star[i] * 0.5) + + Inc = meshgrid(*In) if d > 1 else In + + kw = np.zeros((inc,) * d) + for i in range(d): + kw += exp(-Inc[i]) + y = kwds.get('y', 1.0) + d, n = self.dataset.shape + # Find the binned kernel weights, c. + c = gridcount(self.dataset, X, y=y) / n + # Perform the convolution. + at = dctn(c) * kw + z = idctn(at) * at.size / np.prod(R) + return z * (z > 0.0) + + def _eval_grid_fun(self, eval_grd, *args, **kwds): + output = kwds.pop('output', 'value') + f = eval_grd(*args, **kwds) + if output == 'value': + return f + else: + titlestr = 'Kernel density estimate (%s)' % self.kernel.name + kwds2 = dict(title=titlestr) + kwds2['plot_kwds'] = dict(plotflag=1) + kwds2.update(**kwds) + args = self.args + if self.d == 1: + args = args[0] + wdata = PlotData(f, args, **kwds2) + if self.d > 1: + PL = np.r_[10:90:20, 95, 99, 99.9] + try: + ql = qlevels(f, p=PL) + wdata.clevels = ql + wdata.plevels = PL + except: + pass + return wdata + + def _check_shape(self, points): + points = atleast_2d(points) + d, m = points.shape + if d != self.d: + if d == 1 and m == self.d: + # points was passed in as a row vector + points = np.reshape(points, (self.d, 1)) + else: + msg = "points have dimension %s, dataset has dimension %s" + raise ValueError(msg % (d, self.d)) + return points + + def eval_points(self, points, **kwds): + """Evaluate the estimated pdf on a set of points. + + Parameters + ---------- + points : (# of dimensions, # of points)-array + Alternatively, a (# of dimensions,) vector can be passed in and + treated as a single point. + + Returns + ------- + values : (# of points,)-array + The values at each point. + + Raises + ------ + ValueError if the dimensionality of the input points is different than + the dimensionality of the KDE. + """ + + points = self._check_shape(points) + return self._eval_points(points, **kwds) + + def _eval_points(self, points, **kwds): + pass + + __call__ = eval_grid_fast + + +class _KDE(object): + + """ Kernel-Density Estimator base class. + + Parameters + ---------- + data : (# of dims, # of data)-array + datapoints to estimate from + hs : array-like (optional) + smooting parameter vector/matrix. + (default compute from data using kernel.get_smoothing function) + kernel : kernel function object. + kernel must have get_smoothing method + alpha : real scalar (optional) + sensitivity parameter (default 0 regular KDE) + A good choice might be alpha = 0.5 ( or 1/D) + alpha = 0 Regular KDE (hs is constant) + 0 < alpha <= 1 Adaptive KDE (Make hs change) + + Members + ------- + d : int + number of dimensions + n : int + number of datapoints + + Methods + ------- + kde.eval_grid_fast(x0, x1,..., xd) : array + evaluate the estimated pdf on meshgrid(x0, x1,..., xd) + kde.eval_grid(x0, x1,..., xd) : array + evaluate the estimated pdf on meshgrid(x0, x1,..., xd) + kde.eval_points(points) : array + evaluate the estimated pdf on a provided set of points + kde(x0, x1,..., xd) : array + same as kde.eval_grid(x0, x1,..., xd) + """ + + def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, + xmax=None, inc=512): + self.dataset = atleast_2d(data) + self.hs = hs + self.kernel = kernel if kernel else Kernel('gauss') + self.alpha = alpha + self.xmin = xmin + self.xmax = xmax + self.inc = inc + self.initialize() + + def initialize(self): + self.d, self.n = self.dataset.shape + if self.n > 1: + self._set_xlimits() + self._initialize() + + def _initialize(self): + pass + + def _set_xlimits(self): + amin = self.dataset.min(axis=-1) + amax = self.dataset.max(axis=-1) + iqr = iqrange(self.dataset, axis=-1) + self._sigma = np.minimum( + np.std(self.dataset, axis=-1, ddof=1), iqr / 1.34) + #xyzrange = amax - amin + #offset = xyzrange / 4.0 + offset = self._sigma + if self.xmin is None: + self.xmin = amin - offset + else: + self.xmin = self.xmin * np.ones((self.d, 1)) + if self.xmax is None: + self.xmax = amax + offset + else: + self.xmax = self.xmax * np.ones((self.d, 1)) + + def eval_grid_fast(self, *args, **kwds): + """Evaluate the estimated pdf on a grid. + + Parameters + ---------- + arg_0,arg_1,... arg_d-1 : vectors + Alternatively, if no vectors is passed in then + arg_i = linspace(self.xmin[i], self.xmax[i], self.inc) + output : string optional + 'value' if value output + 'data' if object output + + Returns + ------- + values : array-like + The values evaluated at meshgrid(*args). + + """ + if len(args) == 0: + args = [] + for i in range(self.d): + args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc)) + self.args = args + return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds) + + def _eval_grid_fast(self, *args, **kwds): + pass + + def eval_grid(self, *args, **kwds): + """Evaluate the estimated pdf on a grid. + + Parameters + ---------- + arg_0,arg_1,... arg_d-1 : vectors + Alternatively, if no vectors is passed in then + arg_i = linspace(self.xmin[i], self.xmax[i], self.inc) + output : string optional + 'value' if value output + 'data' if object output + + Returns + ------- + values : array-like + The values evaluated at meshgrid(*args). + + """ + if len(args) == 0: + args = [] + for i in range(self.d): + args.append(np.linspace(self.xmin[i], self.xmax[i], self.inc)) + self.args = args + return self._eval_grid_fun(self._eval_grid, *args, **kwds) + + def _eval_grid(self, *args): + pass + + def _eval_grid_fun(self, eval_grd, *args, **kwds): + output = kwds.pop('output', 'value') + f = eval_grd(*args, **kwds) + if output == 'value': + return f + else: + titlestr = 'Kernel density estimate (%s)' % self.kernel.name + kwds2 = dict(title=titlestr) + + kwds2['plot_kwds'] = kwds.pop('plot_kwds', dict(plotflag=1)) + kwds2.update(**kwds) + args = self.args + if self.d == 1: + args = args[0] + wdata = PlotData(f, args, **kwds2) + if self.d > 1: + PL = np.r_[10:90:20, 95, 99, 99.9] + try: + ql = qlevels(f, p=PL) + wdata.clevels = ql + wdata.plevels = PL + except: + pass + return wdata + + def _check_shape(self, points): + points = atleast_2d(points) + d, m = points.shape + if d != self.d: + if d == 1 and m == self.d: + # points was passed in as a row vector + points = np.reshape(points, (self.d, 1)) + else: + msg = "points have dimension %s, dataset has dimension %s" + raise ValueError(msg % (d, self.d)) + return points + + def eval_points(self, points, **kwds): + """Evaluate the estimated pdf on a set of points. + + Parameters + ---------- + points : (# of dimensions, # of points)-array + Alternatively, a (# of dimensions,) vector can be passed in and + treated as a single point. + + Returns + ------- + values : (# of points,)-array + The values at each point. + + Raises + ------ + ValueError if the dimensionality of the input points is different than + the dimensionality of the KDE. + """ + + points = self._check_shape(points) + return self._eval_points(points, **kwds) + + def _eval_points(self, points, **kwds): + pass + + __call__ = eval_grid + + +class TKDE(_KDE): + + """ Transformation Kernel-Density Estimator. + + Parameters + ---------- + dataset : (# of dims, # of data)-array + datapoints to estimate from + hs : array-like (optional) + smooting parameter vector/matrix. + (default compute from data using kernel.get_smoothing function) + kernel : kernel function object. + kernel must have get_smoothing method + alpha : real scalar (optional) + sensitivity parameter (default 0 regular KDE) + A good choice might be alpha = 0.5 ( or 1/D) + alpha = 0 Regular KDE (hs is constant) + 0 < alpha <= 1 Adaptive KDE (Make hs change) + xmin, xmax : vectors + specifying the default argument range for the kde.eval_grid methods. + For the kde.eval_grid_fast methods the values must cover the range of + the data. (default min(data)-range(data)/4, max(data)-range(data)/4) + If a single value of xmin or xmax is given then the boundary is the is + the same for all dimensions. + inc : scalar integer + defining the default dimension of the output from kde.eval_grid methods + (default 512) + (For kde.eval_grid_fast: A value below 50 is very fast to compute but + may give some inaccuracies. Values between 100 and 500 give very + accurate results) + L2 : array-like + vector of transformation parameters (default 1 no transformation) + t(xi;L2) = xi^L2*sign(L2) for L2(i) ~= 0 + t(xi;L2) = log(xi) for L2(i) == 0 + If single value of L2 is given then the transformation is the same in + all directions. + + Members + ------- + d : int + number of dimensions + n : int + number of datapoints + + Methods + ------- + kde.eval_grid_fast(x0, x1,..., xd) : array + evaluate the estimated pdf on meshgrid(x0, x1,..., xd) + kde.eval_grid(x0, x1,..., xd) : array + evaluate the estimated pdf on meshgrid(x0, x1,..., xd) + kde.eval_points(points) : array + evaluate the estimated pdf on a provided set of points + kde(x0, x1,..., xd) : array + same as kde.eval_grid(x0, x1,..., xd) + + Example + ------- + N = 20 + data = np.random.rayleigh(1, size=(N,)) + >>> data = np.array([ + ... 0.75355792, 0.72779194, 0.94149169, 0.07841119,2.32291887, + ... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326, + ... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487, + ... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452]) + + >>> import wafo.kdetools as wk + >>> x = np.linspace(0.01, max(data.ravel()) + 1, 10) + >>> kde = wk.TKDE(data, hs=0.5, L2=0.5) + >>> f = kde(x) + >>> f + array([ 1.03982714, 0.45839018, 0.39514782, 0.32860602, 0.26433318, + 0.20717946, 0.15907684, 0.1201074 , 0.08941027, 0.06574882]) + + >>> kde.eval_grid(x) + array([ 1.03982714, 0.45839018, 0.39514782, 0.32860602, 0.26433318, + 0.20717946, 0.15907684, 0.1201074 , 0.08941027, 0.06574882]) + + >>> kde.eval_grid_fast(x) + array([ 1.06437223, 0.46203314, 0.39593137, 0.32781899, 0.26276433, + 0.20532206, 0.15723498, 0.11843998, 0.08797755, 0. ]) + + import pylab as plb + h1 = plb.plot(x, f) # 1D probability density plot + t = np.trapz(f, x) + """ + + def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, + xmax=None, inc=512, L2=None): + self.L2 = L2 + super(TKDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc) + + def _initialize(self): + self._check_xmin() + tdataset = self._dat2gaus(self.dataset) + xmin = self.xmin + if xmin is not None: + xmin = self._dat2gaus(np.reshape(xmin, (-1, 1))) + xmax = self.xmax + if xmax is not None: + xmax = self._dat2gaus(np.reshape(xmax, (-1, 1))) + self.tkde = KDE(tdataset, self.hs, self.kernel, self.alpha, xmin, xmax, + self.inc) + if self.inc is None: + self.inc = self.tkde.inc + + def _check_xmin(self): + if self.L2 is not None: + amin = self.dataset.min(axis=-1) + # default no transformation + L2 = np.atleast_1d(self.L2) * np.ones(self.d) + self.xmin = np.where(L2 != 1, np.maximum( + self.xmin, amin / 100.0), self.xmin).reshape((-1, 1)) + + def _dat2gaus(self, points): + if self.L2 is None: + return points # default no transformation + + # default no transformation + L2 = np.atleast_1d(self.L2) * np.ones(self.d) + + tpoints = copy.copy(points) + for i, v2 in enumerate(L2.tolist()): + tpoints[i] = np.log(points[i]) if v2 == 0 else points[i] ** v2 + return tpoints + + def _gaus2dat(self, tpoints): + if self.L2 is None: + return tpoints # default no transformation + + # default no transformation + L2 = np.atleast_1d(self.L2) * np.ones(self.d) + + points = copy.copy(tpoints) + for i, v2 in enumerate(L2.tolist()): + points[i] = np.exp( + tpoints[i]) if v2 == 0 else tpoints[i] ** (1.0 / v2) + return points + + def _scale_pdf(self, pdf, points): + if self.L2 is None: + return pdf + # default no transformation + L2 = np.atleast_1d(self.L2) * np.ones(self.d) + for i, v2 in enumerate(L2.tolist()): + factor = v2 * np.sign(v2) if v2 else 1 + pdf *= np.where(v2 == 1, 1, points[i] ** (v2 - 1) * factor) + if (np.abs(np.diff(pdf)).max() > 10).any(): + msg = ''' Numerical problems may have occured due to the power + transformation. Check the KDE for spurious spikes''' + warnings.warn(msg) + return pdf + + def eval_grid_fast2(self, *args, **kwds): + """Evaluate the estimated pdf on a grid. + + Parameters + ---------- + arg_0,arg_1,... arg_d-1 : vectors + Alternatively, if no vectors is passed in then + arg_i = gauss2dat(linspace(dat2gauss(self.xmin[i]), + dat2gauss(self.xmax[i]), self.inc)) + output : string optional + 'value' if value output + 'data' if object output + + Returns + ------- + values : array-like + The values evaluated at meshgrid(*args). + """ + return self._eval_grid_fun(self._eval_grid_fast, *args, **kwds) + + def _eval_grid_fast(self, *args, **kwds): + if self.L2 is None: + f = self.tkde.eval_grid_fast(*args, **kwds) + self.args = self.tkde.args + return f + #targs = self._dat2gaus(list(args)) if len(args) else args + tf = self.tkde.eval_grid_fast() + self.args = self._gaus2dat(list(self.tkde.args)) + points = meshgrid(*self.args) if self.d > 1 else self.args + f = self._scale_pdf(tf, points) + if len(args): + ipoints = meshgrid(*args) if self.d > 1 else args + #shape0 = points[0].shape + #shape0i = ipoints[0].shape + for i in range(self.d): + points[i].shape = (-1,) + #ipoints[i].shape = (-1,) + points = np.asarray(points).T + #ipoints = np.asarray(ipoints).T + fi = interpolate.griddata( # @UndefinedVariable + points, f.ravel(), tuple(ipoints), method='linear', + fill_value=0.0) + #fi.shape = shape0i + self.args = args + r = kwds.get('r', 0) + if r == 0: + return fi * (fi > 0) + else: + return fi + return f + + def _eval_grid(self, *args, **kwds): + if self.L2 is None: + return self.tkde.eval_grid(*args, **kwds) + targs = self._dat2gaus(list(args)) + tf = self.tkde.eval_grid(*targs, **kwds) + points = meshgrid(*args) if self.d > 1 else self.args + f = self._scale_pdf(tf, points) + return f + + def _eval_points(self, points): + """Evaluate the estimated pdf on a set of points. + + Parameters + ---------- + points : (# of dimensions, # of points)-array + Alternatively, a (# of dimensions,) vector can be passed in and + treated as a single point. + + Returns + ------- + values : (# of points,)-array + The values at each point. + + Raises + ------ + ValueError if the dimensionality of the input points is different than + the dimensionality of the KDE. + """ + if self.L2 is None: + return self.tkde.eval_points(points) + + tpoints = self._dat2gaus(points) + tf = self.tkde.eval_points(tpoints) + f = self._scale_pdf(tf, points) + return f + + +class KDE(_KDE): + + """ Kernel-Density Estimator. + + Parameters + ---------- + data : (# of dims, # of data)-array + datapoints to estimate from + hs : array-like (optional) + smooting parameter vector/matrix. + (default compute from data using kernel.get_smoothing function) + kernel : kernel function object. + kernel must have get_smoothing method + alpha : real scalar (optional) + sensitivity parameter (default 0 regular KDE) + A good choice might be alpha = 0.5 ( or 1/D) + alpha = 0 Regular KDE (hs is constant) + 0 < alpha <= 1 Adaptive KDE (Make hs change) + xmin, xmax : vectors + specifying the default argument range for the kde.eval_grid methods. + For the kde.eval_grid_fast methods the values must cover the range of + the data. + (default min(data)-range(data)/4, max(data)-range(data)/4) + If a single value of xmin or xmax is given then the boundary is the is + the same for all dimensions. + inc : scalar integer + defining the default dimension of the output from kde.eval_grid methods + (default 512) + (For kde.eval_grid_fast: A value below 50 is very fast to compute but + may give some inaccuracies. Values between 100 and 500 give very + accurate results) + + Members + ------- + d : int + number of dimensions + n : int + number of datapoints + + Methods + ------- + kde.eval_grid_fast(x0, x1,..., xd) : array + evaluate the estimated pdf on meshgrid(x0, x1,..., xd) + kde.eval_grid(x0, x1,..., xd) : array + evaluate the estimated pdf on meshgrid(x0, x1,..., xd) + kde.eval_points(points) : array + evaluate the estimated pdf on a provided set of points + kde(x0, x1,..., xd) : array + same as kde.eval_grid(x0, x1,..., xd) + + Example + ------- + N = 20 + data = np.random.rayleigh(1, size=(N,)) + >>> data = np.array([ + ... 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887, + ... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326, + ... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487, + ... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452]) + + >>> x = np.linspace(0, max(data.ravel()) + 1, 10) + >>> import wafo.kdetools as wk + >>> kde = wk.KDE(data, hs=0.5, alpha=0.5) + >>> f = kde(x) + >>> f + array([ 0.17252055, 0.41014271, 0.61349072, 0.57023834, 0.37198073, + 0.21409279, 0.12738463, 0.07460326, 0.03956191, 0.01887164]) + + >>> kde.eval_grid(x) + array([ 0.17252055, 0.41014271, 0.61349072, 0.57023834, 0.37198073, + 0.21409279, 0.12738463, 0.07460326, 0.03956191, 0.01887164]) + + >>> kde0 = wk.KDE(data, hs=0.5, alpha=0.0) + >>> kde0.eval_points(x) + array([ 0.2039735 , 0.40252503, 0.54595078, 0.52219649, 0.3906213 , + 0.26381501, 0.16407362, 0.08270612, 0.02991145, 0.00720821]) + + >>> kde0.eval_grid(x) + array([ 0.2039735 , 0.40252503, 0.54595078, 0.52219649, 0.3906213 , + 0.26381501, 0.16407362, 0.08270612, 0.02991145, 0.00720821]) + >>> f = kde0.eval_grid(x, output='plotobj') + >>> f.data + array([ 0.2039735 , 0.40252503, 0.54595078, 0.52219649, 0.3906213 , + 0.26381501, 0.16407362, 0.08270612, 0.02991145, 0.00720821]) + + >>> f = kde0.eval_grid_fast() + >>> np.interp(x, kde0.args[0], f) + array([ 0.21227584, 0.41256459, 0.5495661 , 0.5176579 , 0.38431616, + 0.2591162 , 0.15978948, 0.07889179, 0.02769818, 0.00791829]) + >>> f1 = kde0.eval_grid_fast(output='plot') + >>> np.interp(x, f1.args, f1.data) + array([ 0.21227584, 0.41256459, 0.5495661 , 0.5176579 , 0.38431616, + 0.2591162 , 0.15978948, 0.07889179, 0.02769818, 0.00791829]) + >>> h = f1.plot() + + import pylab as plb + h1 = plb.plot(x, f) # 1D probability density plot + t = np.trapz(f, x) + """ + + def __init__(self, data, hs=None, kernel=None, alpha=0.0, xmin=None, + xmax=None, inc=512): + super(KDE, self).__init__(data, hs, kernel, alpha, xmin, xmax, inc) + + def _initialize(self): + self._compute_smoothing() + self._lambda = np.ones(self.n) + if self.alpha > 0: + #pilot = KDE(self.dataset, hs=self.hs, kernel=self.kernel, alpha=0) + # f = pilot.eval_points(self.dataset) # get a pilot estimate by + # regular KDE (alpha=0) + f = self.eval_points(self.dataset) # pilot estimate + g = np.exp(np.mean(np.log(f))) + self._lambda = (f / g) ** (-self.alpha) + + if self.inc is None: + unused_tau, tau = self.kernel.effective_support() + xyzrange = 8 * self._sigma + L1 = 10 + self.inc = 2 ** nextpow2( + max(48, (L1 * xyzrange / (tau * self.hs)).max())) + pass + + def _compute_smoothing(self): + """Computes the smoothing matrix + """ + get_smoothing = self.kernel.get_smoothing + h = self.hs + if h is None: + h = get_smoothing(self.dataset) + h = np.atleast_1d(h) + hsiz = h.shape + + if (len(hsiz) == 1) or (self.d == 1): + if max(hsiz) == 1: + h = h * np.ones(self.d) + else: + h.shape = (self.d,) # make sure it has the correct dimension + + # If h negative calculate automatic values + ind, = np.where(h <= 0) + for i in ind.tolist(): + h[i] = get_smoothing(self.dataset[i]) + deth = h.prod() + self.inv_hs = np.diag(1.0 / h) + else: # fully general smoothing matrix + deth = linalg.det(h) + if deth <= 0: + raise ValueError( + 'bandwidth matrix h must be positive definit!') + self.inv_hs = linalg.inv(h) + self.hs = h + self._norm_factor = deth * self.n + + def _eval_grid_fast(self, *args, **kwds): + X = np.vstack(args) + d, inc = X.shape + dx = X[:, 1] - X[:, 0] + + Xn = [] + nfft0 = 2 * inc + nfft = (nfft0,) * d + x0 = np.linspace(-inc, inc, nfft0 + 1) + for i in range(d): + Xn.append(x0[:-1] * dx[i]) + + Xnc = meshgrid(*Xn) if d > 1 else Xn + + shape0 = Xnc[0].shape + for i in range(d): + Xnc[i].shape = (-1,) + + Xn = np.dot(self.inv_hs, np.vstack(Xnc)) + + # Obtain the kernel weights. + kw = self.kernel(Xn) + + # plt.plot(kw) + # plt.draw() + # plt.show() + norm_fact0 = (kw.sum() * dx.prod() * self.n) + norm_fact = (self._norm_factor * self.kernel.norm_factor(d, self.n)) + if np.abs(norm_fact0 - norm_fact) > 0.05 * norm_fact: + warnings.warn( + 'Numerical inaccuracy due to too low discretization. ' + + 'Increase the discretization of the evaluation grid (inc=%d)!' + % inc) + norm_fact = norm_fact0 + + kw = kw / norm_fact + r = kwds.get('r', 0) + if r != 0: + kw *= np.vstack(Xnc) ** r if d > 1 else Xnc[0] + kw.shape = shape0 + kw = np.fft.ifftshift(kw) + fftn = np.fft.fftn + ifftn = np.fft.ifftn + + y = kwds.get('y', 1.0) + # if self.alpha>0: + # y = y / self._lambda**d + + # Find the binned kernel weights, c. + c = gridcount(self.dataset, X, y=y) + # Perform the convolution. + z = np.real(ifftn(fftn(c, s=nfft) * fftn(kw))) + + ix = (slice(0, inc),) * d + if r == 0: + return z[ix] * (z[ix] > 0.0) + else: + return z[ix] + + def _eval_grid(self, *args, **kwds): + + grd = meshgrid(*args) if len(args) > 1 else list(args) + shape0 = grd[0].shape + d = len(grd) + for i in range(d): + grd[i] = grd[i].ravel() + f = self.eval_points(np.vstack(grd), **kwds) + return f.reshape(shape0) + + def _eval_points(self, points, **kwds): + """Evaluate the estimated pdf on a set of points. + + Parameters + ---------- + points : (# of dimensions, # of points)-array + Alternatively, a (# of dimensions,) vector can be passed in and + treated as a single point. + + Returns + ------- + values : (# of points,)-array + The values at each point. + + Raises + ------ + ValueError if the dimensionality of the input points is different than + the dimensionality of the KDE. + """ + d, m = points.shape + + result = np.zeros((m,)) + + r = kwds.get('r', 0) + if r == 0: + fun = lambda xi: 1 + else: + fun = lambda xi: (xi ** r).sum(axis=0) + + if m >= self.n: + y = kwds.get('y', np.ones(self.n)) + # there are more points than data, so loop over data + for i in range(self.n): + diff = self.dataset[:, i, np.newaxis] - points + tdiff = np.dot(self.inv_hs / self._lambda[i], diff) + result += y[i] * fun(diff) * self.kernel( + tdiff) / self._lambda[i] ** d + else: + y = kwds.get('y', 1) + # loop over points + for i in range(m): + diff = self.dataset - points[:, i, np.newaxis] + tdiff = np.dot(self.inv_hs, diff / self._lambda[np.newaxis, :]) + tmp = y * fun(diff) * self.kernel(tdiff) / self._lambda ** d + result[i] = tmp.sum(axis=-1) + + result /= (self._norm_factor * self.kernel.norm_factor(d, self.n)) + + return result + + +class KRegression(_KDE): + + """ Kernel-Regression + + Parameters + ---------- + data : (# of dims, # of data)-array + datapoints to estimate from + y : # of data - array + response variable + p : scalar integer (0 or 1) + Nadaraya-Watson estimator if p=0, + local linear estimator if p=1. + hs : array-like (optional) + smooting parameter vector/matrix. + (default compute from data using kernel.get_smoothing function) + kernel : kernel function object. + kernel must have get_smoothing method + alpha : real scalar (optional) + sensitivity parameter (default 0 regular KDE) + A good choice might be alpha = 0.5 ( or 1/D) + alpha = 0 Regular KDE (hs is constant) + 0 < alpha <= 1 Adaptive KDE (Make hs change) + xmin, xmax : vectors + specifying the default argument range for the kde.eval_grid methods. + For the kde.eval_grid_fast methods the values must cover the range of + the data. + (default min(data)-range(data)/4, max(data)-range(data)/4) + If a single value of xmin or xmax is given then the boundary is the is + the same for all dimensions. + inc : scalar integer + defining the default dimension of the output from kde.eval_grid methods + (default 128) + (For kde.eval_grid_fast: A value below 50 is very fast to compute but + may give some inaccuracies. Values between 100 and 500 give very + accurate results) + + Members + ------- + d : int + number of dimensions + n : int + number of datapoints + + Methods + ------- + kde.eval_grid_fast(x0, x1,..., xd) : array + evaluate the estimated pdf on meshgrid(x0, x1,..., xd) + kde.eval_grid(x0, x1,..., xd) : array + evaluate the estimated pdf on meshgrid(x0, x1,..., xd) + kde.eval_points(points) : array + evaluate the estimated pdf on a provided set of points + kde(x0, x1,..., xd) : array + same as kde.eval_grid(x0, x1,..., xd) + + Example + ------- + >>> N = 100 + >>> ei = np.random.normal(loc=0, scale=0.075, size=(N,)) + + >>> x = np.linspace(0, 1, N) + >>> import wafo.kdetools as wk + + >>> y = 2*np.exp(-x**2/(2*0.3**2))+3*np.exp(-(x-1)**2/(2*0.7**2)) + ei + >>> kreg = wk.KRegression(x, y) + >>> f = kreg(output='plotobj', title='Kernel regression', plotflag=1) + >>> h = f.plot(label='p=0') + """ + + def __init__(self, data, y, p=0, hs=None, kernel=None, alpha=0.0, + xmin=None, xmax=None, inc=128, L2=None): + + self.tkde = TKDE(data, hs=hs, kernel=kernel, + alpha=alpha, xmin=xmin, xmax=xmax, inc=inc, L2=L2) + self.y = y + self.p = p + + def eval_grid_fast(self, *args, **kwds): + self._grdfun = self.tkde.eval_grid_fast + return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds) + + def eval_grid(self, *args, **kwds): + self._grdfun = self.tkde.eval_grid + return self.tkde._eval_grid_fun(self._eval_gridfun, *args, **kwds) + + def _eval_gridfun(self, *args, **kwds): + grdfun = self._grdfun + s0 = grdfun(*args, r=0) + t0 = grdfun(*args, r=0, y=self.y) + if self.p == 0: + return (t0 / (s0 + _TINY)).clip(min=-_REALMAX, max=_REALMAX) + elif self.p == 1: + s1 = grdfun(*args, r=1) + s2 = grdfun(*args, r=2) + t1 = grdfun(*args, r=1, y=self.y) + return ((s2 * t0 - s1 * t1) / + (s2 * s0 - s1 ** 2)).clip(min=-_REALMAX, max=_REALMAX) + __call__ = eval_grid_fast + + +class BKRegression(object): + + ''' + Kernel-Regression on binomial data + + method : {'beta', 'wilson'} + method is one of the following + 'beta', return Bayesian Credible interval using beta-distribution. + 'wilson', return Wilson score interval + a, b : scalars + parameters of the beta distribution defining the apriori distribution + of p, i.e., the Bayes estimator for p: p = (y+a)/(n+a+b). + Setting a=b=0.5 gives Jeffreys interval. + ''' + + def __init__(self, *args, **kwds): + self.method = kwds.pop('method', 'beta') + self.a = max(kwds.pop('a', 0.5), _TINY) + self.b = max(kwds.pop('b', 0.5), _TINY) + self.kreg = KRegression(*args, **kwds) + # defines bin width (i.e. smoothing) in empirical estimate + self.hs_e = None +# self.x = self.kreg.tkde.dataset +# self.y = self.kreg.y + + def _set_smoothing(self, hs): + self.kreg.tkde.hs = hs + self.kreg.tkde.initialize() + + x = property(fget=lambda cls: cls.kreg.tkde.dataset.squeeze()) + y = property(fget=lambda cls: cls.kreg.y) + kernel = property(fget=lambda cls: cls.kreg.tkde.kernel) + hs = property(fset=_set_smoothing, fget=lambda cls: cls.kreg.tkde.hs) + + def _get_max_smoothing(self, fun=None): + ''' + Return maximum value for smoothing parameter + ''' + x = self.x + y = self.y + if fun is None: + get_smoothing = self.kernel.get_smoothing + else: + get_smoothing = getattr(self.kernel, fun) + + hs1 = get_smoothing(x) + #hx = np.median(np.abs(x-np.median(x)))/0.6745*(4.0/(3*n))**0.2 + if (y == True).any(): + hs2 = get_smoothing(x[y == True]) + #hy = np.median(np.abs(y-np.mean(y)))/0.6745*(4.0/(3*n))**0.2 + else: + hs2 = 4 * hs1 + #hy = 4*hx + + hopt = sqrt(hs1 * hs2) + return hopt, hs1, hs2 + + def get_grid(self, hs_e=None): + if hs_e is None: + if self.hs_e is None: + hs1 = self._get_max_smoothing('hste')[0] + hs2 = self._get_max_smoothing('hos')[0] + self.hs_e = sqrt(hs1 * hs2) + hs_e = self.hs_e + x = self.x + xmin, xmax = x.min(), x.max() + ni = max(2 * int((xmax - xmin) / hs_e) + 3, 5) + sml = hs_e # *0.1 + xi = np.linspace(xmin - sml, xmax + sml, ni) + return xi + + def prb_ci(self, n, p, alpha=0.05, **kwds): + ''' + Return Confidence Interval for the binomial probability p + + Parameters + ---------- + n : array-like + number of Bernoulli trials + p : array-like + estimated probability of success in each trial + alpha : scalar + confidence level + method : {'beta', 'wilson'} + method is one of the following + 'beta', return Bayesian Credible interval using beta-distribution. + 'wilson', return Wilson score interval + a, b : scalars + parameters of the beta distribution defining the apriori + distribution of p, i.e., + the Bayes estimator for p: p = (y+a)/(n+a+b). + Setting a=b=0.5 gives Jeffreys interval. + ''' + if self.method.startswith('w'): + # Wilson score + z0 = -_invnorm(alpha / 2) + den = 1 + (z0 ** 2. / n) + xc = (p + (z0 ** 2) / (2 * n)) / den + halfwidth = (z0 * sqrt((p * (1 - p) / n) + + (z0 ** 2 / (4 * (n ** 2))))) / den + plo = (xc - halfwidth).clip(min=0) # wilson score + pup = (xc + halfwidth).clip(max=1.0) # wilson score + else: + # Jeffreys intervall a=b=0.5 + # st.beta.isf(alpha/2, y+a, n-y+b) y = n*p, n-y = n*(1-p) + a = self.a + b = self.b + st = stats + pup = np.where( + p == 1, 1, st.beta.isf(alpha / 2, n * p + a, n * (1 - p) + b)) + plo = np.where(p == 0, 0, + st.beta.isf(1 - alpha / 2, + n * p + a, n * (1 - p) + b)) + return plo, pup + + def prb_empirical(self, xi=None, hs_e=None, alpha=0.05, color='r', **kwds): + ''' + Returns empirical binomial probabiltity + + Parameters + ---------- + x : ndarray + position vector + y : ndarray + binomial response variable (zeros and ones) + alpha : scalar + confidence level + color: + used in plot + + Returns + ------- + P(x) : PlotData object + empirical probability + ''' + if xi is None: + xi = self.get_grid(hs_e) + + x = self.x + y = self.y + + c = gridcount(x, xi) # + self.a + self.b # count data + if (y == True).any(): + c0 = gridcount(x[y == True], xi) # + self.a # count success + else: + c0 = np.zeros(xi.shape) + prb = np.where(c == 0, 0, c0 / (c + _TINY)) # assume prb==0 for c==0 + CI = np.vstack(self.prb_ci(c, prb, alpha, **kwds)) + + prb_e = PlotData(prb, xi, plotmethod='plot', plot_args=['.'], + plot_kwds=dict(markersize=6, color=color, picker=5)) + prb_e.dataCI = CI.T + prb_e.count = c + return prb_e + + def prb_smoothed(self, prb_e, hs, alpha=0.05, color='r', label=''): + ''' + Return smoothed binomial probability + + Parameters + ---------- + prb_e : PlotData object with empirical binomial probabilites + hs : smoothing parameter + alpha : confidence level + color : color of plot object + label : label for plot object + ''' + + x_e = prb_e.args + n_e = len(x_e) + dx_e = x_e[1] - x_e[0] + n = self.x.size + + x_s = np.linspace(x_e[0], x_e[-1], 10 * n_e + 1) + self.hs = hs + + prb_s = self.kreg(x_s, output='plotobj', title='', plot_kwds=dict( + color=color, linewidth=2)) # dict(plotflag=7)) + m_nan = np.isnan(prb_s.data) + if m_nan.any(): # assume 0/0 division + prb_s.data[m_nan] = 0.0 + + #prb_s.data[np.isnan(prb_s.data)] = 0 + # expected number of data in each bin + c_s = self.kreg.tkde.eval_grid_fast(x_s) * dx_e * n + plo, pup = self.prb_ci(c_s, prb_s.data, alpha) + + prb_s.dataCI = np.vstack((plo, pup)).T + prb_s.prediction_error_avg = np.trapz( + pup - plo, x_s) / (x_s[-1] - x_s[0]) + + if label: + prb_s.plot_kwds['label'] = label + prb_s.children = [PlotData([plo, pup], x_s, + plotmethod='fill_between', + plot_kwds=dict(alpha=0.2, color=color)), + prb_e] + + # empirical oversmooths the data +# p_s = prb_s.eval_points(self.x) +# dp_s = np.diff(prb_s.data) +# k = (dp_s[:-1]*dp_s[1:]<0).sum() # numpeaks +# p_e = self.y +# n_s = interpolate.interp1d(x_s, c_s)(self.x) +# plo, pup = self.prb_ci(n_s, p_s, alpha) +# sigmai = (pup-plo) +# aicc = (((p_e-p_s)/sigmai)**2).sum()+ 2*k*(k+1)/np.maximum(n-k+1,1) + + p_e = prb_e.eval_points(x_s) + p_s = prb_s.data + dp_s = np.sign(np.diff(p_s)) + k = (dp_s[:-1] != dp_s[1:]).sum() # numpeaks + + #sigmai = (pup-plo)+_EPS + #aicc = (((p_e-p_s)/sigmai)**2).sum()+ 2*k*(k+1)/np.maximum(n_e-k+1,1) + # + np.abs((p_e-pup).clip(min=0)-(p_e-plo).clip(max=0)).sum() + sigmai = _logit(pup) - _logit(plo) + _EPS + aicc = ((((_logit(p_e) - _logit(p_s)) / sigmai) ** 2).sum() + + 2 * k * (k + 1) / np.maximum(n_e - k + 1, 1) + + np.abs((p_e - pup).clip(min=0) - (p_e - plo).clip(max=0)).sum()) + + prb_s.aicc = aicc + #prb_s.labels.title = '' + #prb_s.labels.title='perr=%1.3f,aicc=%1.3f, n=%d, hs=%1.3f' % + #(prb_s.prediction_error_avg,aicc,n,hs) + + return prb_s + + def prb_search_best(self, prb_e=None, hsvec=None, hsfun='hste', + alpha=0.05, color='r', label=''): + ''' + Return best smoothed binomial probability + + Parameters + ---------- + prb_e : PlotData object with empirical binomial probabilites + hsvec : arraylike + vector smoothing parameters + (default np.linspace(hsmax*0.1,hsmax,55)) + hsfun : + method for calculating hsmax + + ''' + if prb_e is None: + prb_e = self.prb_empirical( + hs_e=self.hs_e, alpha=alpha, color=color) + if hsvec is None: + hsmax = self._get_max_smoothing(hsfun)[0] # @UnusedVariable + hsmax = max(hsmax, self.hs_e) + hsvec = np.linspace(hsmax * 0.2, hsmax, 55) + + hs_best = hsvec[-1] + 0.1 + prb_best = self.prb_smoothed(prb_e, hs_best, alpha, color, label) + aicc = np.zeros(np.size(hsvec)) + for i, hi in enumerate(hsvec): + f = self.prb_smoothed(prb_e, hi, alpha, color, label) + aicc[i] = f.aicc + if f.aicc <= prb_best.aicc: + prb_best = f + hs_best = hi + prb_best.score = PlotData(aicc, hsvec) + prb_best.hs = hs_best + self._set_smoothing(hs_best) + return prb_best + + +class _Kernel(object): + + def __init__(self, r=1.0, stats=None): + self.r = r # radius of kernel + self.stats = stats + + def norm_factor(self, d=1, n=None): + return 1.0 + + def norm_kernel(self, x): + X = np.atleast_2d(x) + return self._kernel(X) / self.norm_factor(*X.shape) + + def kernel(self, x): + return self._kernel(np.atleast_2d(x)) + + def deriv4_6_8_10(self, t, numout=4): + raise Exception('Method not implemented for this kernel!') + + def effective_support(self): + ''' + Return the effective support of kernel. + + The kernel must be symmetric and compactly supported on [-tau tau] + if the kernel has infinite support then the kernel must have the + effective support in [-tau tau], i.e., be negligible outside the range + ''' + return self._effective_support() + + def _effective_support(self): + return - self.r, self.r + __call__ = kernel + + +class _KernelMulti(_Kernel): + # p=0; %Sphere = rect for 1D + # p=1; %Multivariate Epanechnikov kernel. + # p=2; %Multivariate Bi-weight Kernel + # p=3; %Multi variate Tri-weight Kernel + # p=4; %Multi variate Four-weight Kernel + + def __init__(self, r=1.0, p=1, stats=None): + self.r = r + self.p = p + self.stats = stats + + def norm_factor(self, d=1, n=None): + r = self.r + p = self.p + c = 2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(d, r) / np.prod( + np.r_[(d + 2):(2 * p + d + 1):2]) # normalizing constant + return c + + def _kernel(self, x): + r = self.r + p = self.p + x2 = x ** 2 + return ((1.0 - x2.sum(axis=0) / r ** 2).clip(min=0.0)) ** p + +mkernel_epanechnikov = _KernelMulti(p=1, stats=_stats_epan) +mkernel_biweight = _KernelMulti(p=2, stats=_stats_biwe) +mkernel_triweight = _KernelMulti(p=3, stats=_stats_triw) + + +class _KernelProduct(_KernelMulti): + # p=0; %rectangular + # p=1; %1D product Epanechnikov kernel. + # p=2; %1D product Bi-weight Kernel + # p=3; %1D product Tri-weight Kernel + # p=4; %1D product Four-weight Kernel + + def norm_factor(self, d=1, n=None): + r = self.r + p = self.p + c = (2 ** p * np.prod(np.r_[1:p + 1]) * sphere_volume(1, r) / + np.prod(np.r_[(1 + 2):(2 * p + 2):2])) + return c ** d + + def _kernel(self, x): + r = self.r # radius + pdf = (1 - (x / r) ** 2).clip(min=0.0) + return pdf.prod(axis=0) + +mkernel_p1epanechnikov = _KernelProduct(p=1, stats=_stats_epan) +mkernel_p1biweight = _KernelProduct(p=2, stats=_stats_biwe) +mkernel_p1triweight = _KernelProduct(p=3, stats=_stats_triw) + + +class _KernelRectangular(_Kernel): + + def _kernel(self, x): + return np.where(np.all(np.abs(x) <= self.r, axis=0), 1, 0.0) + + def norm_factor(self, d=1, n=None): + r = self.r + return (2 * r) ** d +mkernel_rectangular = _KernelRectangular(stats=_stats_rect) + + +class _KernelTriangular(_Kernel): + + def _kernel(self, x): + pdf = (1 - np.abs(x)).clip(min=0.0) + return pdf.prod(axis=0) +mkernel_triangular = _KernelTriangular(stats=_stats_tria) + + +class _KernelGaussian(_Kernel): + + def _kernel(self, x): + sigma = self.r / 4.0 + x2 = (x / sigma) ** 2 + return exp(-0.5 * x2.sum(axis=0)) + + def norm_factor(self, d=1, n=None): + sigma = self.r / 4.0 + return (2 * pi * sigma) ** (d / 2.0) + + def deriv4_6_8_10(self, t, numout=4): + ''' + Returns 4th, 6th, 8th and 10th derivatives of the kernel function. + ''' + phi0 = exp(-0.5 * t ** 2) / sqrt(2 * pi) + p4 = [1, 0, -6, 0, +3] + p4val = np.polyval(p4, t) * phi0 + if numout == 1: + return p4val + out = [p4val] + pn = p4 + for unusedix in range(numout - 1): + pnp1 = np.polyadd(-np.r_[pn, 0], np.polyder(pn)) + pnp2 = np.polyadd(-np.r_[pnp1, 0], np.polyder(pnp1)) + out.append(np.polyval(pnp2, t) * phi0) + pn = pnp2 + return out + +mkernel_gaussian = _KernelGaussian(r=4.0, stats=_stats_gaus) + +# def mkernel_gaussian(X): +# x2 = X ** 2 +# d = X.shape[0] +# return (2 * pi) ** (-d / 2) * exp(-0.5 * x2.sum(axis=0)) + + +class _KernelLaplace(_Kernel): + + def _kernel(self, x): + absX = np.abs(x) + return exp(-absX.sum(axis=0)) + + def norm_factor(self, d=1, n=None): + return 2 ** d +mkernel_laplace = _KernelLaplace(r=7.0, stats=_stats_lapl) + + +class _KernelLogistic(_Kernel): + + def _kernel(self, x): + s = exp(-x) + return np.prod(1.0 / (s + 1) ** 2, axis=0) +mkernel_logistic = _KernelLogistic(r=7.0, stats=_stats_logi) + +_MKERNEL_DICT = dict( + epan=mkernel_epanechnikov, + biwe=mkernel_biweight, + triw=mkernel_triweight, + p1ep=mkernel_p1epanechnikov, + p1bi=mkernel_p1biweight, + p1tr=mkernel_p1triweight, + rect=mkernel_rectangular, + tria=mkernel_triangular, + lapl=mkernel_laplace, + logi=mkernel_logistic, + gaus=mkernel_gaussian +) +_KERNEL_EXPONENT_DICT = dict( + re=0, sp=0, ep=1, bi=2, tr=3, fo=4, fi=5, si=6, se=7) + + +class Kernel(object): + + ''' + Multivariate kernel + + Parameters + ---------- + name : string + defining the kernel. Valid options are: + 'epanechnikov' - Epanechnikov kernel. + 'biweight' - Bi-weight kernel. + 'triweight' - Tri-weight kernel. + 'p1epanechnikov' - product of 1D Epanechnikov kernel. + 'p1biweight' - product of 1D Bi-weight kernel. + 'p1triweight' - product of 1D Tri-weight kernel. + 'triangular' - Triangular kernel. + 'gaussian' - Gaussian kernel + 'rectangular' - Rectangular kernel. + 'laplace' - Laplace kernel. + 'logistic' - Logistic kernel. + Note that only the first 4 letters of the kernel name is needed. + + Examples + -------- + N = 20 + data = np.random.rayleigh(1, size=(N,)) + >>> data = np.array([ + ... 0.75355792, 0.72779194, 0.94149169, 0.07841119, 2.32291887, + ... 1.10419995, 0.77055114, 0.60288273, 1.36883635, 1.74754326, + ... 1.09547561, 1.01671133, 0.73211143, 0.61891719, 0.75903487, + ... 1.8919469 , 0.72433808, 1.92973094, 0.44749838, 1.36508452]) + + >>> import wafo.kdetools as wk + >>> gauss = wk.Kernel('gaussian') + >>> gauss.stats() + (1, 0.28209479177387814, 0.21157109383040862) + >>> gauss.hscv(data) + array([ 0.21555043]) + >>> gauss.hstt(data) + array([ 0.15165387]) + >>> gauss.hste(data) + array([ 0.18942238]) + >>> gauss.hldpi(data) + array([ 0.1718688]) + + >>> wk.Kernel('laplace').stats() + (2, 0.25, inf) + + >>> triweight = wk.Kernel('triweight'); triweight.stats() + (0.1111111111111111, 0.81585081585081587, inf) + + >>> triweight(np.linspace(-1,1,11)) + array([ 0. , 0.046656, 0.262144, 0.592704, 0.884736, 1. , + 0.884736, 0.592704, 0.262144, 0.046656, 0. ]) + >>> triweight.hns(data) + array([ 0.82087056]) + >>> triweight.hos(data) + array([ 0.88265652]) + >>> triweight.hste(data) + array([ 0.56570278]) + >>> triweight.hscv(data) + array([ 0.64193201]) + + See also + -------- + mkernel + + References + ---------- + B. W. Silverman (1986) + 'Density estimation for statistics and data analysis' + Chapman and Hall, pp. 43, 76 + + Wand, M. P. and Jones, M. C. (1995) + 'Density estimation for statistics and data analysis' + Chapman and Hall, pp 31, 103, 175 + ''' + + def __init__(self, name, fun='hste'): # 'hns'): + self.kernel = _MKERNEL_DICT[name[:4]] + #self.name = self.kernel.__name__.replace('mkernel_', '').title() + try: + self.get_smoothing = getattr(self, fun) + except: + self.get_smoothing = self.hste + + def _get_name(self): + return self.kernel.__class__.__name__.replace('_Kernel', '').title() + name = property(_get_name) + + def get_smoothing(self, *args, **kwds): + pass + + def stats(self): + ''' Return some 1D statistics of the kernel. + + Returns + ------- + mu2 : real scalar + 2'nd order moment, i.e.,int(x^2*kernel(x)) + R : real scalar + integral of squared kernel, i.e., int(kernel(x)^2) + Rdd : real scalar + integral of squared double derivative of kernel, + i.e., int( (kernel''(x))^2 ). + + Reference + --------- + Wand,M.P. and Jones, M.C. (1995) + 'Kernel smoothing' + Chapman and Hall, pp 176. + ''' + return self.kernel.stats + + def deriv4_6_8_10(self, t, numout=4): + return self.kernel.deriv4_6_8_10(t, numout) + + def effective_support(self): + return self.kernel.effective_support() + + def hns(self, data): + ''' + Returns Normal Scale Estimate of Smoothing Parameter. + + Parameter + --------- + data : 2D array + shape d x n (d = # dimensions ) + + Returns + ------- + h : array-like + one dimensional optimal value for smoothing parameter + given the data and kernel. size D + + HNS only gives an optimal value with respect to mean integrated + square error, when the true underlying distribution + is Gaussian. This works reasonably well if the data resembles a + Gaussian distribution. However if the distribution is asymmetric, + multimodal or have long tails then HNS may return a to large + smoothing parameter, i.e., the KDE may be oversmoothed and mask + important features of the data. (=> large bias). + One way to remedy this is to reduce H by multiplying with a constant + factor, e.g., 0.85. Another is to try different values for H and make a + visual check by eye. + + Example: + data = rndnorm(0, 1,20,1) + h = hns(data,'epan') + + See also: + --------- + hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt, kde + + Reference: + --------- + B. W. Silverman (1986) + 'Density estimation for statistics and data analysis' + Chapman and Hall, pp 43-48 + Wand,M.P. and Jones, M.C. (1995) + 'Kernel smoothing' + Chapman and Hall, pp 60--63 + ''' + + A = np.atleast_2d(data) + n = A.shape[1] + + # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) + mu2, R, unusedRdd = self.stats() + AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5) + iqr = iqrange(A, axis=1) # interquartile range + stdA = np.std(A, axis=1, ddof=1) + # use of interquartile range guards against outliers. + # the use of interquartile range is better if + # the distribution is skew or have heavy tails + # This lessen the chance of oversmoothing. + return np.where(iqr > 0, + np.minimum(stdA, iqr / 1.349), stdA) * AMISEconstant + + def hos(self, data): + ''' + Returns Oversmoothing Parameter. + + Parameter + --------- + data = data matrix, size N x D (D = # dimensions ) + + Returns + ------- + h : vector size 1 x D + one dimensional maximum smoothing value for smoothing parameter + given the data and kernel. + + The oversmoothing or maximal smoothing principle relies on the fact + that there is a simple upper bound for the AMISE-optimal bandwidth for + estimation of densities with a fixed value of a particular scale + measure. While HOS will give too large bandwidth for optimal estimation + of a general density it provides an excellent starting point for + subjective choice of bandwidth. A sensible strategy is to plot an + estimate with bandwidth HOS and then sucessively look at plots based on + convenient fractions of HOS to see what features are present in the + data for various amount of smoothing. The relation to HNS is given by: + + HOS = HNS/0.93 + + Example: + -------- + data = rndnorm(0, 1,20,1) + h = hos(data,'epan'); + + See also hste, hbcv, hboot, hldpi, hlscv, hscv, hstt, kde, kdefun + + Reference + --------- + B. W. Silverman (1986) + 'Density estimation for statistics and data analysis' + Chapman and Hall, pp 43-48 + + Wand,M.P. and Jones, M.C. (1986) + 'Kernel smoothing' + Chapman and Hall, pp 60--63 + ''' + return self.hns(data) / 0.93 + + def hmns(self, data): + ''' + Returns Multivariate Normal Scale Estimate of Smoothing Parameter. + + CALL: h = hmns(data,kernel) + + h = M dimensional optimal value for smoothing parameter + given the data and kernel. size D x D + data = data matrix, size D x N (D = # dimensions ) + kernel = 'epanechnikov' - Epanechnikov kernel. + 'biweight' - Bi-weight kernel. + 'triweight' - Tri-weight kernel. + 'gaussian' - Gaussian kernel + + Note that only the first 4 letters of the kernel name is needed. + + HMNS only gives a optimal value with respect to mean integrated + square error, when the true underlying distribution is Multivariate + Gaussian. This works reasonably well if the data resembles a + Multivariate Gaussian distribution. However if the distribution is + asymmetric, multimodal or have long tails then HNS is maybe more + appropriate. + + Example: + data = rndnorm(0, 1,20,2) + h = hmns(data,'epan') + + See also + -------- + + hns, hste, hbcv, hboot, hos, hldpi, hlscv, hscv, hstt + + Reference + ---------- + B. W. Silverman (1986) + 'Density estimation for statistics and data analysis' + Chapman and Hall, pp 43-48, 87 + + Wand,M.P. and Jones, M.C. (1995) + 'Kernel smoothing' + Chapman and Hall, pp 60--63, 86--88 + ''' + # TODO: implement more kernels + + A = np.atleast_2d(data) + d, n = A.shape + + if d == 1: + return self.hns(data) + name = self.name[:4].lower() + if name == 'epan': # Epanechnikov kernel + a = (8.0 * (d + 4.0) * (2 * sqrt(pi)) + ** d / sphere_volume(d)) ** (1. / (4.0 + d)) + elif name == 'biwe': # Bi-weight kernel + a = 2.7779 + if d > 2: + raise ValueError('not implemented for d>2') + elif name == 'triw': # Triweight + a = 3.12 + if d > 2: + raise ValueError('not implemented for d>2') + elif name == 'gaus': # Gaussian kernel + a = (4.0 / (d + 2.0)) ** (1. / (d + 4.0)) + else: + raise ValueError('Unknown kernel.') + + covA = scipy.cov(A) + + return a * linalg.sqrtm(covA).real * n ** (-1. / (d + 4)) + + def hste(self, data, h0=None, inc=128, maxit=100, releps=0.01, abseps=0.0): + '''HSTE 2-Stage Solve the Equation estimate of smoothing parameter. + + CALL: hs = hste(data,kernel,h0) + + hs = one dimensional value for smoothing parameter + given the data and kernel. size 1 x D + data = data matrix, size N x D (D = # dimensions ) + kernel = 'gaussian' - Gaussian kernel (default) + ( currently the only supported kernel) + h0 = initial starting guess for hs (default h0=hns(A,kernel)) + + Example: + x = rndnorm(0,1,50,1); + hs = hste(x,'gauss'); + + See also hbcv, hboot, hos, hldpi, hlscv, hscv, hstt, kde, kdefun + + Reference + --------- + B. W. Silverman (1986) + 'Density estimation for statistics and data analysis' + Chapman and Hall, pp 57--61 + + Wand,M.P. and Jones, M.C. (1986) + 'Kernel smoothing' + Chapman and Hall, pp 74--75 + ''' + # TODO: NB: this routine can be made faster: + # TODO: replace the iteration in the end with a Newton Raphson scheme + + A = np.atleast_2d(data) + d, n = A.shape + + # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) + mu2, R, unusedRdd = self.stats() + + AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5) + STEconstant = R / (mu2 ** (2) * n) + + sigmaA = self.hns(A) / AMISEconstant + if h0 is None: + h0 = sigmaA * AMISEconstant + + h = np.asarray(h0, dtype=float) + + nfft = inc * 2 + amin = A.min(axis=1) # Find the minimum value of A. + amax = A.max(axis=1) # Find the maximum value of A. + arange = amax - amin # Find the range of A. + + #% xa holds the x 'axis' vector, defining a grid of x values where + #% the k.d. function will be evaluated. + + ax1 = amin - arange / 8.0 + bx1 = amax + arange / 8.0 + + kernel2 = Kernel('gauss') + mu2, R, unusedRdd = kernel2.stats() + STEconstant2 = R / (mu2 ** (2) * n) + fft = np.fft.fft + ifft = np.fft.ifft + + for dim in range(d): + s = sigmaA[dim] + ax = ax1[dim] + bx = bx1[dim] + + xa = np.linspace(ax, bx, inc) + xn = np.linspace(0, bx - ax, inc) + + c = gridcount(A[dim], xa) + + # Step 1 + psi6NS = -15 / (16 * sqrt(pi) * s ** 7) + psi8NS = 105 / (32 * sqrt(pi) * s ** 9) + + # Step 2 + k40, k60 = kernel2.deriv4_6_8_10(0, numout=2) + g1 = (-2 * k40 / (mu2 * psi6NS * n)) ** (1.0 / 7) + g2 = (-2 * k60 / (mu2 * psi8NS * n)) ** (1.0 / 9) + + # Estimate psi6 given g2. + # kernel weights. + kw4, kw6 = kernel2.deriv4_6_8_10(xn / g2, numout=2) + # Apply fftshift to kw. + kw = np.r_[kw6, 0, kw6[-1:0:-1]] + z = np.real(ifft(fft(c, nfft) * fft(kw))) # convolution. + psi6 = np.sum(c * z[:inc]) / (n * (n - 1) * g2 ** 7) + + # Estimate psi4 given g1. + kw4 = kernel2.deriv4_6_8_10(xn / g1, numout=1) # kernel weights. + kw = np.r_[kw4, 0, kw4[-1:0:-1]] # Apply 'fftshift' to kw. + z = np.real(ifft(fft(c, nfft) * fft(kw))) # convolution. + psi4 = np.sum(c * z[:inc]) / (n * (n - 1) * g1 ** 5) + + h1 = h[dim] + h_old = 0 + count = 0 + + while ((abs(h_old - h1) > max(releps * h1, abseps)) and + (count < maxit)): + count += 1 + h_old = h1 + + # Step 3 + gamma = ((2 * k40 * mu2 * psi4 * h1 ** 5) / + (-psi6 * R)) ** (1.0 / 7) + + # Now estimate psi4 given gamma. + #kernel weights. + kw4 = kernel2.deriv4_6_8_10(xn / gamma, numout=1) + kw = np.r_[kw4, 0, kw4[-1:0:-1]] # Apply 'fftshift' to kw. + z = np.real(ifft(fft(c, nfft) * fft(kw))) # convolution. + + psi4Gamma = np.sum(c * z[:inc]) / (n * (n - 1) * gamma ** 5) + + # Step 4 + h1 = (STEconstant2 / psi4Gamma) ** (1.0 / 5) + + # Kernel other than Gaussian scale bandwidth + h1 = h1 * (STEconstant / STEconstant2) ** (1.0 / 5) + + if count >= maxit: + warnings.warn('The obtained value did not converge.') + + h[dim] = h1 + # end % for dim loop + return h + + def hisj(self, data, inc=512, L=7): + ''' + HISJ Improved Sheather-Jones estimate of smoothing parameter. + + Unlike many other implementations, this one is immune to problems + caused by multimodal densities with widely separated modes. The + estimation does not deteriorate for multimodal densities, because + it do not assume a parametric model for the data. + + Parameters + ---------- + data - a vector of data from which the density estimate is constructed + inc - the number of mesh points used in the uniform discretization + + Returns + ------- + bandwidth - the optimal bandwidth + + Reference + --------- + Kernel density estimation via diffusion + Z. I. Botev, J. F. Grotowski, and D. P. Kroese (2010) + Annals of Statistics, Volume 38, Number 5, pages 2916-2957. + ''' + A = np.atleast_2d(data) + d, n = A.shape + + # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) + mu2, R, unusedRdd = self.stats() + STEconstant = R / (n * mu2 ** 2) + + amin = A.min(axis=1) # Find the minimum value of A. + amax = A.max(axis=1) # Find the maximum value of A. + arange = amax - amin # Find the range of A. + + #% xa holds the x 'axis' vector, defining a grid of x values where + #% the k.d. function will be evaluated. + + ax1 = amin - arange / 8.0 + bx1 = amax + arange / 8.0 + + kernel2 = Kernel('gauss') + mu2, R, unusedRdd = kernel2.stats() + STEconstant2 = R / (mu2 ** (2) * n) + + def fixed_point(t, N, I, a2): + ''' this implements the function t-zeta*gamma^[L](t)''' + + prod = np.prod + #L = 7 + logI = np.log(I) + f = 2 * \ + pi ** (2 * L) * (a2 * exp(L * logI - I * pi ** 2 * t)).sum() + for s in range(L - 1, 1, -1): + K0 = prod(np.r_[1:2 * s:2]) / sqrt(2 * pi) + const = (1 + (1. / 2) ** (s + 1. / 2)) / 3 + time = (2 * const * K0 / N / f) ** (2. / (3 + 2 * s)) + f = 2 * \ + pi ** (2 * s) * \ + (a2 * exp(s * logI - I * pi ** 2 * time)).sum() + return t - (2 * N * sqrt(pi) * f) ** (-2. / 5) + + h = np.empty(d) + for dim in range(d): + ax = ax1[dim] + bx = bx1[dim] + xa = np.linspace(ax, bx, inc) + R = bx - ax + + c = gridcount(A[dim], xa) + N = len(set(A[dim])) + #a = dct(c/c.sum(), norm=None) + a = dct(c / len(A[dim]), norm=None) + + # now compute the optimal bandwidth^2 using the referenced method + I = np.asfarray(np.arange(1, inc)) ** 2 + a2 = (a[1:] / 2) ** 2 + fun = lambda t: fixed_point(t, N, I, a2) + x = np.linspace(0, 0.1, 150) + ai = x[0] + f0 = fun(ai) + for bi in x[1:]: + f1 = fun(bi) + if f1 * f0 <= 0: + #print('ai = %g, bi = %g' % (ai,bi)) + break + else: + ai = bi + #y = np.asarray([fun(j) for j in x]) + # plt.figure(1) + # plt.plot(x,y) + # plt.show() + + # use fzero to solve the equation t=zeta*gamma^[5](t) + try: + t_star = optimize.brentq(fun, a=ai, b=bi) + except: + t_star = 0.28 * N ** (-2. / 5) + warnings.warn('Failure in obtaining smoothing parameter') + + # smooth the discrete cosine transform of initial data using t_star + # a_t = a*exp(-np.arange(inc)**2*pi**2*t_star/2) + # now apply the inverse discrete cosine transform + #density = idct(a_t)/R; + + # take the rescaling of the data into account + bandwidth = sqrt(t_star) * R + + # Kernel other than Gaussian scale bandwidth + h[dim] = bandwidth * (STEconstant / STEconstant2) ** (1.0 / 5) + # end % for dim loop + return h + + def hstt(self, data, h0=None, inc=128, maxit=100, releps=0.01, abseps=0.0): + '''HSTT Scott-Tapia-Thompson estimate of smoothing parameter. + + CALL: hs = hstt(data,kernel) + + hs = one dimensional value for smoothing parameter + given the data and kernel. size 1 x D + data = data matrix, size N x D (D = # dimensions ) + kernel = 'epanechnikov' - Epanechnikov kernel. (default) + 'biweight' - Bi-weight kernel. + 'triweight' - Tri-weight kernel. + 'triangular' - Triangular kernel. + 'gaussian' - Gaussian kernel + 'rectangular' - Rectangular kernel. + 'laplace' - Laplace kernel. + 'logistic' - Logistic kernel. + + HSTT returns Scott-Tapia-Thompson (STT) estimate of smoothing + parameter. This is a Solve-The-Equation rule (STE). + Simulation studies shows that the STT estimate of HS + is a good choice under a variety of models. A comparison with + likelihood cross-validation (LCV) indicates that LCV performs slightly + better for short tailed densities. + However, STT method in contrast to LCV is insensitive to outliers. + + Example + ------- + x = rndnorm(0,1,50,1); + hs = hstt(x,'gauss'); + + See also + -------- + hste, hbcv, hboot, hos, hldpi, hlscv, hscv, kde, kdebin + + Reference + --------- + B. W. Silverman (1986) + 'Density estimation for statistics and data analysis' + Chapman and Hall, pp 57--61 + ''' + A = np.atleast_2d(data) + d, n = A.shape + + # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) + mu2, R, unusedRdd = self.stats() + + AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5) + STEconstant = R / (mu2 ** (2) * n) + + sigmaA = self.hns(A) / AMISEconstant + if h0 is None: + h0 = sigmaA * AMISEconstant + + h = np.asarray(h0, dtype=float) + + nfft = inc * 2 + amin = A.min(axis=1) # Find the minimum value of A. + amax = A.max(axis=1) # Find the maximum value of A. + arange = amax - amin # Find the range of A. + + #% xa holds the x 'axis' vector, defining a grid of x values where + #% the k.d. function will be evaluated. + + ax1 = amin - arange / 8.0 + bx1 = amax + arange / 8.0 + + fft = np.fft.fft + ifft = np.fft.ifft + for dim in range(d): + s = sigmaA[dim] + datan = A[dim] / s + ax = ax1[dim] / s + bx = bx1[dim] / s + + xa = np.linspace(ax, bx, inc) + xn = np.linspace(0, bx - ax, inc) + + c = gridcount(datan, xa) + + count = 1 + h_old = 0 + h1 = h[dim] / s + delta = (bx - ax) / (inc - 1) + while ((abs(h_old - h1) > max(releps * h1, abseps)) and + (count < maxit)): + count += 1 + h_old = h1 + + kw4 = self.kernel(xn / h1) / (n * h1 * self.norm_factor(d=1)) + kw = np.r_[kw4, 0, kw4[-1:0:-1]] # Apply 'fftshift' to kw. + f = np.real(ifft(fft(c, nfft) * fft(kw))) # convolution. + + # Estimate psi4=R(f'') using simple finite differences and + # quadrature. + ix = np.arange(1, inc - 1) + z = ((f[ix + 1] - 2 * f[ix] + f[ix - 1]) / delta ** 2) ** 2 + psi4 = delta * z.sum() + h1 = (STEconstant / psi4) ** (1. / 5) + + if count >= maxit: + warnings.warn('The obtained value did not converge.') + + h[dim] = h1 * s + # end % for dim loop + return h + + def hscv(self, data, hvec=None, inc=128, maxit=100, fulloutput=False): + ''' + HSCV Smoothed cross-validation estimate of smoothing parameter. + + CALL: [hs,hvec,score] = hscv(data,kernel,hvec) + + hs = smoothing parameter + hvec = vector defining possible values of hs + (default linspace(0.25*h0,h0,100), h0=0.62) + score = score vector + data = data vector + kernel = 'gaussian' - Gaussian kernel the only supported + + Note that only the first 4 letters of the kernel name is needed. + + Example: + data = rndnorm(0,1,20,1) + [hs hvec score] = hscv(data,'epan'); + plot(hvec,score) + See also hste, hbcv, hboot, hos, hldpi, hlscv, hstt, kde, kdefun + + Wand,M.P. and Jones, M.C. (1986) + 'Kernel smoothing' + Chapman and Hall, pp 75--79 + ''' + # TODO: Add support for other kernels than Gaussian + A = np.atleast_2d(data) + d, n = A.shape + + # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) + mu2, R, unusedRdd = self.stats() + + AMISEconstant = (8 * sqrt(pi) * R / (3 * mu2 ** 2 * n)) ** (1. / 5) + STEconstant = R / (mu2 ** (2) * n) + + sigmaA = self.hns(A) / AMISEconstant + if hvec is None: + H = AMISEconstant / 0.93 + hvec = np.linspace(0.25 * H, H, maxit) + hvec = np.asarray(hvec, dtype=float) + + steps = len(hvec) + score = np.zeros(steps) + + nfft = inc * 2 + amin = A.min(axis=1) # Find the minimum value of A. + amax = A.max(axis=1) # Find the maximum value of A. + arange = amax - amin # Find the range of A. + + #% xa holds the x 'axis' vector, defining a grid of x values where + #% the k.d. function will be evaluated. + + ax1 = amin - arange / 8.0 + bx1 = amax + arange / 8.0 + + kernel2 = Kernel('gauss') + mu2, R, unusedRdd = kernel2.stats() + STEconstant2 = R / (mu2 ** (2) * n) + fft = np.fft.fft + ifft = np.fft.ifft + + h = np.zeros(d) + hvec = hvec * (STEconstant2 / STEconstant) ** (1. / 5.) + + k40, k60, k80, k100 = kernel2.deriv4_6_8_10(0, numout=4) + psi8 = 105 / (32 * sqrt(pi)) + psi12 = 3465. / (512 * sqrt(pi)) + g1 = (-2. * k60 / (mu2 * psi8 * n)) ** (1. / 9.) + g2 = (-2. * k100 / (mu2 * psi12 * n)) ** (1. / 13.) + + for dim in range(d): + s = sigmaA[dim] + ax = ax1[dim] / s + bx = bx1[dim] / s + datan = A[dim] / s + + xa = np.linspace(ax, bx, inc) + xn = np.linspace(0, bx - ax, inc) + + c = gridcount(datan, xa) + + kw4, kw6 = kernel2.deriv4_6_8_10(xn / g1, numout=2) + kw = np.r_[kw6, 0, kw6[-1:0:-1]] + z = np.real(ifft(fft(c, nfft) * fft(kw))) + psi6 = np.sum(c * z[:inc]) / (n ** 2 * g1 ** 7) + + kw4, kw6, kw8, kw10 = kernel2.deriv4_6_8_10(xn / g2, numout=4) + kw = np.r_[kw10, 0, kw10[-1:0:-1]] + z = np.real(ifft(fft(c, nfft) * fft(kw))) + psi10 = np.sum(c * z[:inc]) / (n ** 2 * g2 ** 11) + + g3 = (-2. * k40 / (mu2 * psi6 * n)) ** (1. / 7.) + g4 = (-2. * k80 / (mu2 * psi10 * n)) ** (1. / 11.) + + kw4 = kernel2.deriv4_6_8_10(xn / g3, numout=1) + kw = np.r_[kw4, 0, kw4[-1:0:-1]] + z = np.real(ifft(fft(c, nfft) * fft(kw))) + psi4 = np.sum(c * z[:inc]) / (n ** 2 * g3 ** 5) + + kw4, kw6, kw8 = kernel2.deriv4_6_8_10(xn / g3, numout=3) + kw = np.r_[kw8, 0, kw8[-1:0:-1]] + z = np.real(ifft(fft(c, nfft) * fft(kw))) + psi8 = np.sum(c * z[:inc]) / (n ** 2 * g4 ** 9) + + const = (441. / (64 * pi)) ** (1. / 18.) * \ + (4 * pi) ** (-1. / 5.) * \ + psi4 ** (-2. / 5.) * psi8 ** (-1. / 9.) + + M = np.atleast_2d(datan) + + Y = (M - M.T).ravel() + + for i in range(steps): + g = const * n ** (-23. / 45) * hvec[i] ** (-2) + sig1 = sqrt(2 * hvec[i] ** 2 + 2 * g ** 2) + sig2 = sqrt(hvec[i] ** 2 + 2 * g ** 2) + sig3 = sqrt(2 * g ** 2) + term2 = np.sum(kernel2(Y / sig1) / sig1 - 2 * kernel2( + Y / sig2) / sig2 + kernel2(Y / sig3) / sig3) + + score[i] = 1. / (n * hvec[i] * 2. * sqrt(pi)) + term2 / n ** 2 + + idx = score.argmin() + # Kernel other than Gaussian scale bandwidth + h[dim] = hvec[idx] * (STEconstant / STEconstant2) ** (1 / 5) + if idx == 0: + warnings.warn( + 'Optimum is probably lower than hs=%g for dim=%d' % + (h[dim] * s, dim)) + elif idx == maxit - 1: + warnings.warn( + 'Optimum is probably higher than hs=%g for dim=%d' % + (h[dim] * s, dim)) + + hvec = hvec * (STEconstant / STEconstant2) ** (1 / 5) + if fulloutput: + return h * sigmaA, score, hvec, sigmaA + else: + return h * sigmaA + + def hldpi(self, data, L=2, inc=128): + '''HLDPI L-stage Direct Plug-In estimate of smoothing parameter. + + CALL: hs = hldpi(data,kernel,L) + + hs = one dimensional value for smoothing parameter + given the data and kernel. size 1 x D + data = data matrix, size N x D (D = # dimensions ) + kernel = 'epanechnikov' - Epanechnikov kernel. + 'biweight' - Bi-weight kernel. + 'triweight' - Tri-weight kernel. + 'triangluar' - Triangular kernel. + 'gaussian' - Gaussian kernel + 'rectangular' - Rectanguler kernel. + 'laplace' - Laplace kernel. + 'logistic' - Logistic kernel. + L = 0,1,2,3,... (default 2) + + Note that only the first 4 letters of the kernel name is needed. + + Example: + x = rndnorm(0,1,50,1); + hs = hldpi(x,'gauss',1); + + See also hste, hbcv, hboot, hos, hlscv, hscv, hstt, kde, kdefun + + Wand,M.P. and Jones, M.C. (1995) + 'Kernel smoothing' + Chapman and Hall, pp 67--74 + ''' + A = np.atleast_2d(data) + d, n = A.shape + + # R= int(mkernel(x)^2), mu2= int(x^2*mkernel(x)) + mu2, R, unusedRdd = self.stats() + + AMISEconstant = (8 * sqrt(pi) * R / (3 * n * mu2 ** 2)) ** (1. / 5) + STEconstant = R / (n * mu2 ** 2) + + sigmaA = self.hns(A) / AMISEconstant + + nfft = inc * 2 + amin = A.min(axis=1) # Find the minimum value of A. + amax = A.max(axis=1) # Find the maximum value of A. + arange = amax - amin # Find the range of A. + + #% xa holds the x 'axis' vector, defining a grid of x values where + #% the k.d. function will be evaluated. + + ax1 = amin - arange / 8.0 + bx1 = amax + arange / 8.0 + + kernel2 = Kernel('gauss') + mu2, unusedR, unusedRdd = kernel2.stats() + + fft = np.fft.fft + ifft = np.fft.ifft + + h = np.zeros(d) + for dim in range(d): + s = sigmaA[dim] + datan = A[dim] # / s + ax = ax1[dim] # / s + bx = bx1[dim] # / s + + xa = np.linspace(ax, bx, inc) + xn = np.linspace(0, bx - ax, inc) + + c = gridcount(datan, xa) + + r = 2 * L + 4 + rd2 = L + 2 + + # Eq. 3.7 in Wand and Jones (1995) + PSI_r = (-1) ** (rd2) * np.prod( + np.r_[rd2 + 1:r + 1]) / (sqrt(pi) * (2 * s) ** (r + 1)) + PSI = PSI_r + if L > 0: + # High order derivatives of the Gaussian kernel + Kd = kernel2.deriv4_6_8_10(0, numout=L) + + # L-stage iterations to estimate PSI_4 + for ix in range(L, 0, -1): + gi = (-2 * Kd[ix - 1] / + (mu2 * PSI * n)) ** (1. / (2 * ix + 5)) + + # Obtain the kernel weights. + KW0 = kernel2.deriv4_6_8_10(xn / gi, numout=ix) + if ix > 1: + KW0 = KW0[-1] + # Apply 'fftshift' to kw. + kw = np.r_[KW0, 0, KW0[inc - 1:0:-1]] + + # Perform the convolution. + z = np.real(ifft(fft(c, nfft) * fft(kw))) + + PSI = np.sum(c * z[:inc]) / (n ** 2 * gi ** (2 * ix + 3)) + # end + # end + h[dim] = (STEconstant / PSI) ** (1. / 5) + return h + + def norm_factor(self, d=1, n=None): + return self.kernel.norm_factor(d, n) + + def eval_points(self, points): + return self.kernel(np.atleast_2d(points)) + __call__ = eval_points + + +def mkernel(X, kernel): + ''' + MKERNEL Multivariate Kernel Function. + + Paramaters + ---------- + X : array-like + matrix size d x n (d = # dimensions, n = # evaluation points) + kernel : string + defining kernel + 'epanechnikov' - Epanechnikov kernel. + 'biweight' - Bi-weight kernel. + 'triweight' - Tri-weight kernel. + 'p1epanechnikov' - product of 1D Epanechnikov kernel. + 'p1biweight' - product of 1D Bi-weight kernel. + 'p1triweight' - product of 1D Tri-weight kernel. + 'triangular' - Triangular kernel. + 'gaussian' - Gaussian kernel + 'rectangular' - Rectangular kernel. + 'laplace' - Laplace kernel. + 'logistic' - Logistic kernel. + Note that only the first 4 letters of the kernel name is needed. + + Returns + ------- + z : ndarray + kernel function values evaluated at X + + See also + -------- + kde, kdefun, kdebin + + References + ---------- + B. W. Silverman (1986) + 'Density estimation for statistics and data analysis' + Chapman and Hall, pp. 43, 76 + + Wand, M. P. and Jones, M. C. (1995) + 'Density estimation for statistics and data analysis' + Chapman and Hall, pp 31, 103, 175 + ''' + fun = _MKERNEL_DICT[kernel[:4]] + return fun(np.atleast_2d(X)) + + +def accumsum(accmap, a, size, dtype=None): + if dtype is None: + dtype = a.dtype + size = np.atleast_1d(size) + if len(size) > 1: + binx = accmap[:, 0] + biny = accmap[:, 1] + out = sparse.coo_matrix( + (a.ravel(), (binx, biny)), shape=size, dtype=dtype).tocsr() + else: + binx = accmap.ravel() + zero = np.zeros(len(binx)) + out = sparse.coo_matrix( + (a.ravel(), (binx, zero)), shape=(size, 1), dtype=dtype).tocsr() + return out + + +def accumsum2(accmap, a, size): + return np.bincount(accmap.ravel(), a.ravel(), np.array(size).max()) + + +def accum(accmap, a, func=None, size=None, fill_value=0, dtype=None): + """ + An accumulation function similar to Matlab's `accumarray` function. + + Parameters + ---------- + accmap : ndarray + This is the "accumulation map". It maps input (i.e. indices into + `a`) to their destination in the output array. The first `a.ndim` + dimensions of `accmap` must be the same as `a.shape`. That is, + `accmap.shape[:a.ndim]` must equal `a.shape`. For example, if `a` + has shape (15,4), then `accmap.shape[:2]` must equal (15,4). In this + case `accmap[i,j]` gives the index into the output array where + element (i,j) of `a` is to be accumulated. If the output is, say, + a 2D, then `accmap` must have shape (15,4,2). The value in the + last dimension give indices into the output array. If the output is + 1D, then the shape of `accmap` can be either (15,4) or (15,4,1) + a : ndarray + The input data to be accumulated. + func : callable or None + The accumulation function. The function will be passed a list + of values from `a` to be accumulated. + If None, numpy.sum is assumed. + size : ndarray or None + The size of the output array. If None, the size will be determined + from `accmap`. + fill_value : scalar + The default value for elements of the output array. + dtype : numpy data type, or None + The data type of the output array. If None, the data type of + `a` is used. + + Returns + ------- + out : ndarray + The accumulated results. + + The shape of `out` is `size` if `size` is given. Otherwise the + shape is determined by the (lexicographically) largest indices of + the output found in `accmap`. + + + Examples + -------- + >>> from numpy import array, prod + >>> a = array([[1,2,3],[4,-1,6],[-1,8,9]]) + >>> a + array([[ 1, 2, 3], + [ 4, -1, 6], + [-1, 8, 9]]) + >>> # Sum the diagonals. + >>> accmap = array([[0,1,2],[2,0,1],[1,2,0]]) + >>> s = accum(accmap, a) + >>> s + array([ 9, 7, 15]) + >>> # A 2D output, from sub-arrays with shapes and positions like this: + >>> # [ (2,2) (2,1)] + >>> # [ (1,2) (1,1)] + >>> accmap = array([ + ... [[0,0],[0,0],[0,1]], + ... [[0,0],[0,0],[0,1]], + ... [[1,0],[1,0],[1,1]]]) + >>> # Accumulate using a product. + >>> accum(accmap, a, func=prod, dtype=float) + array([[ -8., 18.], + [ -8., 9.]]) + >>> # Same accmap, but create an array of lists of values. + >>> accum(accmap, a, func=lambda x: x, dtype='O') + array([[[1, 2, 4, -1], [3, 6]], + [[-1, 8], [9]]], dtype=object) + """ + + # Check for bad arguments and handle the defaults. + if accmap.shape[:a.ndim] != a.shape: + raise ValueError( + "The initial dimensions of accmap must be the same as a.shape") + if func is None: + func = np.sum + if dtype is None: + dtype = a.dtype + if accmap.shape == a.shape: + accmap = np.expand_dims(accmap, -1) + adims = tuple(range(a.ndim)) + if size is None: + size = 1 + np.squeeze(np.apply_over_axes(np.max, accmap, axes=adims)) + size = np.atleast_1d(size) + + # Create an array of python lists of values. + vals = np.empty(size, dtype='O') + for s in product(*[range(k) for k in size]): + vals[s] = [] + for s in product(*[range(k) for k in a.shape]): + indx = tuple(accmap[s]) + val = a[s] + vals[indx].append(val) + + # Create the output array. + out = np.empty(size, dtype=dtype) + for s in product(*[range(k) for k in size]): + if vals[s] == []: + out[s] = fill_value + else: + out[s] = func(vals[s]) + return out + + +def qlevels(pdf, p=(10, 30, 50, 70, 90, 95, 99, 99.9), x1=None, x2=None): + '''QLEVELS Calculates quantile levels which encloses P% of PDF + + CALL: [ql PL] = qlevels(pdf,PL,x1,x2); + + ql = the discrete quantile levels. + pdf = joint point density function matrix or vector + PL = percent level (default [10:20:90 95 99 99.9]) + x1,x2 = vectors of the spacing of the variables + (Default unit spacing) + + QLEVELS numerically integrates PDF by decreasing height and find the + quantile levels which encloses P% of the distribution. If X1 and + (or) X2 is unspecified it is assumed that dX1 and dX2 is constant. + NB! QLEVELS normalizes the integral of PDF to N/(N+0.001) before + calculating QL in order to reflect the sampling of PDF is finite. + Currently only able to handle 1D and 2D PDF's if dXi is not constant + (i=1,2). + + Example + ------- + >>> import wafo.stats as ws + >>> x = np.linspace(-8,8,2001); + >>> PL = np.r_[10:90:20, 90, 95, 99, 99.9] + >>> qlevels(ws.norm.pdf(x),p=PL, x1=x); + array([ 0.39591707, 0.37058719, 0.31830968, 0.23402133, 0.10362052, + 0.05862129, 0.01449505, 0.00178806]) + + # compared with the exact values + >>> ws.norm.pdf(ws.norm.ppf((100-PL)/200)) + array([ 0.39580488, 0.370399 , 0.31777657, 0.23315878, 0.10313564, + 0.05844507, 0.01445974, 0.00177719]) + + See also + -------- + qlevels2, tranproc + ''' + + norm = 1 # normalize cdf to unity + pdf = np.atleast_1d(pdf) + if any(pdf.ravel() < 0): + raise ValueError( + 'This is not a pdf since one or more values of pdf is negative') + + fsiz = pdf.shape + fsizmin = min(fsiz) + if fsizmin == 0: + return [] + + N = np.prod(fsiz) + d = len(fsiz) + if x1 is None or ((x2 is None) and d > 2): + fdfi = pdf.ravel() + else: + if d == 1: # pdf in one dimension + dx22 = np.ones(1) + else: # % pdf in two dimensions + dx2 = np.diff(x2.ravel()) * 0.5 + dx22 = np.r_[0, dx2] + np.r_[dx2, 0] + + dx1 = np.diff(x1.ravel()) * 0.5 + dx11 = np.r_[0, dx1] + np.r_[dx1, 0] + dx1x2 = dx22[:, None] * dx11 + fdfi = (pdf * dx1x2).ravel() + + p = np.atleast_1d(p) + + if np.any((p < 0) | (100 < p)): + raise ValueError('PL must satisfy 0 <= PL <= 100') + + p2 = p / 100.0 + ind = np.argsort(pdf.ravel()) # sort by height of pdf + ind = ind[::-1] + fi = pdf.flat[ind] + + # integration in the order of decreasing height of pdf + Fi = np.cumsum(fdfi[ind]) + + if norm: # %normalize Fi to make sure int pdf dx1 dx2 approx 1 + Fi = Fi / Fi[-1] * N / (N + 1.5e-8) + + maxFi = np.max(Fi) + if maxFi > 1: + warnings.warn('this is not a pdf since cdf>1! normalizing') + + Fi = Fi / Fi[-1] * N / (N + 1.5e-8) + + elif maxFi < .95: + msg = '''The given pdf is too sparsely sampled since cdf<.95. + Thus QL is questionable''' + warnings.warn(msg) + + # make sure Fi is strictly increasing by not considering duplicate values + ind, = np.where(np.diff(np.r_[Fi, 1]) > 0) + # calculating the inverse of Fi to find the index + ui = tranproc(Fi[ind], fi[ind], p2) + # to the desired quantile level + # ui=smooth(Fi(ind),fi(ind),1,p2(:),1) % alternative + # res=ui-ui2 + + if np.any(ui >= max(pdf.ravel())): + warnings.warn('The lowest percent level is too close to 0%') + + if np.any(ui <= min(pdf.ravel())): + msg = '''The given pdf is too sparsely sampled or + the highest percent level is too close to 100%''' + warnings.warn(msg) + ui[ui < 0] = 0.0 + + return ui + + +def qlevels2(data, p=(10, 30, 50, 70, 90, 95, 99, 99.9), method=1): + ''' + QLEVELS2 Calculates quantile levels which encloses P% of data + + CALL: [ql PL] = qlevels2(data,PL,method); + + ql = the discrete quantile levels, size D X Np + Parameters + ---------- + data : data matrix, size D x N (D = # of dimensions) + p : percent level vector, length Np (default [10:20:90 95 99 99.9]) + method : integer + 1 Interpolation so that F(X_(k)) == (k-0.5)/n. (default) + 2 Interpolation so that F(X_(k)) == k/(n+1). + 3 Based on the empirical distribution. + + Returns + ------- + + QLEVELS2 sort the columns of data in ascending order and find the + quantile levels for each column which encloses P% of the data. + + Examples : Finding quantile levels enclosing P% of data: + -------- + >>> import wafo.stats as ws + >>> PL = np.r_[10:90:20, 90, 95, 99, 99.9] + >>> xs = ws.norm.rvs(size=2500000) + >>> np.round(qlevels2(ws.norm.pdf(xs), p=PL), decimals=3) + array([ 0.396, 0.37 , 0.318, 0.233, 0.103, 0.058, 0.014, 0.002]) + + # compared with the exact values + >>> ws.norm.pdf(ws.norm.ppf((100-PL)/200)) + array([ 0.39580488, 0.370399 , 0.31777657, 0.23315878, 0.10313564, + 0.05844507, 0.01445974, 0.00177719]) + + # Finding the median of xs: + >>> '%2.2f' % np.abs(qlevels2(xs,50)[0]) + '0.00' + + See also + -------- + qlevels + ''' + q = 100 - np.atleast_1d(p) + return percentile(data, q, axis=-1, method=method) + + +_PKDICT = {1: lambda k, w, n: (k - w) / (n - 1), + 2: lambda k, w, n: (k - w / 2) / n, + 3: lambda k, w, n: k / n, + 4: lambda k, w, n: k / (n + 1), + 5: lambda k, w, n: (k - w / 3) / (n + 1 / 3), + 6: lambda k, w, n: (k - w * 3 / 8) / (n + 1 / 4)} + + +def _compute_qth_weighted_percentile(a, q, axis, out, method, weights, + overwrite_input): + # normalise weight vector such that sum of the weight vector equals to n + q = np.atleast_1d(q) / 100.0 + if (q < 0).any() or (q > 1).any(): + raise ValueError("percentile must be in the range [0,100]") + + shape0 = a.shape + if axis is None: + sorted_ = a.ravel() + else: + taxes = range(a.ndim) + taxes[-1], taxes[axis] = taxes[axis], taxes[-1] + sorted_ = np.transpose(a, taxes).reshape(-1, shape0[axis]) + + ind = sorted_.argsort(axis=-1) + if overwrite_input: + sorted_.sort(axis=-1) + else: + sorted_ = np.sort(sorted_, axis=-1) + + w = np.atleast_1d(weights) + n = len(w) + w = w * n / w.sum() + + # Work on each column separately because of weight vector + m = sorted_.shape[0] + nq = len(q) + y = np.zeros((m, nq)) + pk_fun = _PKDICT.get(method, 1) + for i in range(m): + sortedW = w[ind[i]] # rearrange the weight according to ind + k = sortedW.cumsum() # cumulative weight + # different algorithm to compute percentile + pk = pk_fun(k, sortedW, n) + # Interpolation between pk and sorted_ for given value of q + y[i] = np.interp(q, pk, sorted_[i]) + if axis is None: + return np.squeeze(y) + else: + shape1 = list(shape0) + shape1[axis], shape1[-1] = shape1[-1], nq + return np.squeeze(np.transpose(y.reshape(shape1), taxes)) + +# method=1: p(k) = k/(n-1) +# method=2: p(k) = (k+0.5)/n. +# method=3: p(k) = (k+1)/n +# method=4: p(k) = (k+1)/(n+1) +# method=5: p(k) = (k+2/3)/(n+1/3) +# method=6: p(k) = (k+5/8)/(n+1/4) + +_KDICT = {1: lambda p, n: p * (n - 1), + 2: lambda p, n: p * n - 0.5, + 3: lambda p, n: p * n - 1, + 4: lambda p, n: p * (n + 1) - 1, + 5: lambda p, n: p * (n + 1. / 3) - 2. / 3, + 6: lambda p, n: p * (n + 1. / 4) - 5. / 8} + + +def _compute_qth_percentile(sorted_, q, axis, out, method): + if not np.isscalar(q): + p = [_compute_qth_percentile(sorted_, qi, axis, None, method) + for qi in q] + if out is not None: + out.flat = p + return p + + q = q / 100.0 + if (q < 0) or (q > 1): + raise ValueError("percentile must be in the range [0,100]") + + indexer = [slice(None)] * sorted_.ndim + Nx = sorted_.shape[axis] + k_fun = _KDICT.get(method, 1) + index = np.clip(k_fun(q, Nx), 0, Nx - 1) + i = int(index) + if i == index: + indexer[axis] = slice(i, i + 1) + weights1 = np.array(1) + sumval = 1.0 + else: + indexer[axis] = slice(i, i + 2) + j = i + 1 + weights1 = np.array([(j - index), (index - i)], float) + wshape = [1] * sorted_.ndim + wshape[axis] = 2 + weights1.shape = wshape + sumval = weights1.sum() + + # Use add.reduce in both cases to coerce data type as well as + # check and use out array. + return np.add.reduce(sorted_[indexer] * weights1, + axis=axis, out=out) / sumval + + +def percentile(a, q, axis=None, out=None, overwrite_input=False, method=1, + weights=None): + """ + Compute the qth percentile of the data along the specified axis. + + Returns the qth percentile of the array elements. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + q : float in range of [0,100] (or sequence of floats) + percentile to compute which must be between 0 and 100 inclusive + axis : {None, int}, optional + Axis along which the percentiles are computed. The default (axis=None) + is to compute the median along a flattened version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : {False, True}, optional + If True, then allow use of memory of input array (a) for + calculations. The input array will be modified by the call to + median. This will save memory when you do not need to preserve + the contents of the input array. Treat the input as undefined, + but it will probably be fully or partially sorted. Default is + False. Note that, if `overwrite_input` is True and the input + is not already an ndarray, an error will be raised. + method : scalar integer + defining the interpolation method. Valid options are + 1 : p[k] = k/(n-1). In this case, p[k] = mode[F(x[k])]. + This is used by S. (default) + 2 : p[k] = (k+0.5)/n. That is a piecewise linear function where + the knots are the values midway through the steps of the + empirical cdf. This is popular amongst hydrologists. + Matlab also uses this formula. + 3 : p[k] = (k+1)/n. That is, linear interpolation of the empirical cdf. + 4 : p[k] = (k+1)/(n+1). Thus p[k] = E[F(x[k])]. + This is used by Minitab and by SPSS. + 5 : p[k] = (k+2/3)/(n+1/3). Then p[k] =~ median[F(x[k])]. + The resulting quantile estimates are approximately + median-unbiased regardless of the distribution of x. + 6 : p[k] = (k+5/8)/(n+1/4). The resulting quantile estimates are + approximately unbiased for the expected order statistics + if x is normally distributed. + + Returns + ------- + pcntile : ndarray + A new array holding the result (unless `out` is specified, in + which case that array is returned instead). If the input contains + integers, or floats of smaller precision than 64, then the output + data-type is float64. Otherwise, the output data-type is the same + as that of the input. + + See Also + -------- + mean, median + + Notes + ----- + Given a vector V of length N, the qth percentile of V is the qth ranked + value in a sorted copy of V. A weighted average of the two nearest + neighbors is used if the normalized ranking does not match q exactly. + The same as the median if q is 0.5; the same as the min if q is 0; + and the same as the max if q is 1 + + Examples + -------- + >>> import wafo.kdetools as wk + >>> a = np.array([[10, 7, 4], [3, 2, 1]]) + >>> a + array([[10, 7, 4], + [ 3, 2, 1]]) + >>> wk.percentile(a, 50) + 3.5 + >>> wk.percentile(a, 50, axis=0) + array([ 6.5, 4.5, 2.5]) + >>> wk.percentile(a, 50, axis=0, weights=np.ones(2)) + array([ 6.5, 4.5, 2.5]) + >>> wk.percentile(a, 50, axis=1) + array([ 7., 2.]) + >>> wk.percentile(a, 50, axis=1, weights=np.ones(3)) + array([ 7., 2.]) + >>> m = wk.percentile(a, 50, axis=0) + >>> out = np.zeros_like(m) + >>> wk.percentile(a, 50, axis=0, out=m) + array([ 6.5, 4.5, 2.5]) + >>> m + array([ 6.5, 4.5, 2.5]) + >>> b = a.copy() + >>> wk.percentile(b, 50, axis=1, overwrite_input=True) + array([ 7., 2.]) + >>> assert not np.all(a==b) + >>> b = a.copy() + >>> wk.percentile(b, 50, axis=None, overwrite_input=True) + 3.5 + >>> np.all(a==b) + False + + """ + a = np.asarray(a) + try: + if q == 0: + return a.min(axis=axis, out=out) + elif q == 100: + return a.max(axis=axis, out=out) + except: + pass + if weights is not None: + return _compute_qth_weighted_percentile(a, q, axis, out, method, + weights, overwrite_input) + elif overwrite_input: + if axis is None: + sorted_ = a.ravel() + sorted_.sort() + else: + a.sort(axis=axis) + sorted_ = a + else: + sorted_ = np.sort(a, axis=axis) + if axis is None: + axis = 0 + + return _compute_qth_percentile(sorted_, q, axis, out, method) + + +def iqrange(data, axis=None): + ''' + Returns the Inter Quartile Range of data + + Parameters + ---------- + data : array-like + Input array or object that can be converted to an array. + axis : {None, int}, optional + Axis along which the percentiles are computed. The default (axis=None) + is to compute the median along a flattened version of the array. + + Returns + ------- + r : array-like + abs(np.percentile(data, 75, axis)-np.percentile(data, 25, axis)) + + Notes + ----- + IQRANGE is a robust measure of spread. The use of interquartile range + guards against outliers if the distribution have heavy tails. + + Example + ------- + >>> a = np.arange(101) + >>> iqrange(a) + 50.0 + + See also + -------- + np.std + ''' + return np.abs(np.percentile(data, 75, axis=axis) - + np.percentile(data, 25, axis=axis)) + + +def bitget(int_type, offset): + ''' + Returns the value of the bit at the offset position in int_type. + + Example + ------- + >>> bitget(5, np.r_[0:4]) + array([1, 0, 1, 0]) + ''' + return np.bitwise_and(int_type, 1 << offset) >> offset + + +def gridcount(data, X, y=1): + ''' + Returns D-dimensional histogram using linear binning. + + Parameters + ---------- + data = column vectors with D-dimensional data, shape D x Nd + X = row vectors defining discretization, shape D x N + Must include the range of the data. + + Returns + ------- + c = gridcount, shape N x N x ... x N + + GRIDCOUNT obtains the grid counts using linear binning. + There are 2 strategies: simple- or linear- binning. + Suppose that an observation occurs at x and that the nearest point + below and above is y and z, respectively. Then simple binning strategy + assigns a unit weight to either y or z, whichever is closer. Linear + binning, on the other hand, assigns the grid point at y with the weight + of (z-x)/(z-y) and the gridpoint at z a weight of (y-x)/(z-y). + + In terms of approximation error of using gridcounts as pdf-estimate, + linear binning is significantly more accurate than simple binning. + + NOTE: The interval [min(X);max(X)] must include the range of the data. + The order of C is permuted in the same order as + meshgrid for D==2 or D==3. + + Example + ------- + >>> import numpy as np + >>> import wafo.kdetools as wk + >>> import pylab as plb + >>> N = 20; + >>> data = np.random.rayleigh(1,N) + >>> x = np.linspace(0,max(data)+1,50) + >>> dx = x[1]-x[0] + + >>> c = wk.gridcount(data,x) + + >>> h = plb.plot(x,c,'.') # 1D histogram + >>> pdf = c/dx/N + >>> h1 = plb.plot(x, pdf) # 1D probability density plot + >>> '%1.3f' % np.trapz(pdf, x) + '1.000' + + See also + -------- + bincount, accum, kdebin + + Reference + ---------- + Wand,M.P. and Jones, M.C. (1995) + 'Kernel smoothing' + Chapman and Hall, pp 182-192 + ''' + dat = np.atleast_2d(data) + x = np.atleast_2d(X) + y = np.atleast_1d(y).ravel() + d = dat.shape[0] + d1, inc = x.shape + + if d != d1: + raise ValueError('Dimension 0 of data and X do not match.') + + dx = np.diff(x[:, :2], axis=1) + xlo = x[:, 0] + xup = x[:, -1] + + datlo = dat.min(axis=1) + datup = dat.max(axis=1) + if ((datlo < xlo) | (xup < datup)).any(): + raise ValueError('X does not include whole range of the data!') + + csiz = np.repeat(inc, d) + use_sparse = False + if use_sparse: + acfun = accumsum # faster than accum + else: + acfun = accumsum2 # accum + + binx = np.asarray(np.floor((dat - xlo[:, newaxis]) / dx), dtype=int) + w = dx.prod() + abs = np.abs # @ReservedAssignment + if d == 1: + x.shape = (-1,) + c = np.asarray((acfun(binx, (x[binx + 1] - dat) * y, size=(inc, )) + + acfun(binx + 1, (dat - x[binx]) * y, size=(inc, ))) / + w).ravel() + else: # % d>2 + + Nc = csiz.prod() + c = np.zeros((Nc,)) + + fact2 = np.asarray(np.reshape(inc * np.arange(d), (d, -1)), dtype=int) + fact1 = np.asarray( + np.reshape(csiz.cumprod() / inc, (d, -1)), dtype=int) + # fact1 = fact1(ones(n,1),:); + bt0 = [0, 0] + X1 = X.ravel() + for ir in xrange(2 ** (d - 1)): + bt0[0] = np.reshape(bitget(ir, np.arange(d)), (d, -1)) + bt0[1] = 1 - bt0[0] + for ix in xrange(2): + one = np.mod(ix, 2) + two = np.mod(ix + 1, 2) + # Convert to linear index + #linear index to c + b1 = np.sum((binx + bt0[one]) * fact1, axis=0) + bt2 = bt0[two] + fact2 + b2 = binx + bt2 # linear index to X + c += acfun( + b1, abs(np.prod(X1[b2] - dat, axis=0)) * y, size=(Nc,)) + + c = np.reshape(c / w, csiz, order='F') + + T = range(d) + T[1], T[0] = T[0], T[1] + # make sure c is stored in the same way as meshgrid + c = c.transpose(*T) + return c + + +def evar(y): + ''' + Noise variance estimation. + Assuming that the deterministic function Y has additive Gaussian noise, + EVAR(Y) returns an estimated variance of this noise. + + Note: + ---- + A thin-plate smoothing spline model is used to smooth Y. It is assumed + that the model whose generalized cross-validation score is minimum can + provide the variance of the additive noise. A few tests showed that + EVAR works very well with "not too irregular" functions. + + Examples: + -------- + 1D signal + >>> n = 1e6 + >>> x = np.linspace(0,100,n); + >>> y = np.cos(x/10)+(x/50) + >>> var0 = 0.02 # noise variance + >>> yn = y + sqrt(var0)*np.random.randn(*y.shape) + >>> s = evar(yn) #estimated variance + >>> np.abs(s-var0)/var0 < 3.5/np.sqrt(n) + True + + 2D function + >>> xp = np.linspace(0,1,50) + >>> x, y = np.meshgrid(xp,xp) + >>> f = np.exp(x+y) + np.sin((x-2*y)*3) + >>> var0 = 0.04 # noise variance + >>> fn = f + sqrt(var0)*np.random.randn(*f.shape) + >>> s = evar(fn) # estimated variance + >>> np.abs(s-var0)/var0 < 3.5/np.sqrt(50) + True + + 3D function + >>> yp = np.linspace(-2,2,50) + >>> [x,y,z] = meshgrid(yp,yp,yp, sparse=True) + >>> f = x*exp(-x**2-y**2-z**2) + >>> var0 = 0.5 # noise variance + >>> fn = f + sqrt(var0)*np.random.randn(*f.shape) + >>> s = evar(fn) # estimated variance + >>> np.abs(s-var0)/var0 < 3.5/np.sqrt(50) + True + + Other example + ------------- + http://www.biomecardio.com/matlab/evar.html + + Note: + ---- + EVAR is only adapted to evenly-gridded 1-D to N-D data. + + See also + -------- + VAR, STD, SMOOTHN + ''' + + # Damien Garcia -- 2008/04, revised 2009/10 + y = np.atleast_1d(y) + d = y.ndim + sh0 = y.shape + + S = np.zeros(sh0) + sh1 = np.ones((d,)) + cos = np.cos + pi = np.pi + for i in range(d): + ni = sh0[i] + sh1[i] = ni + t = np.arange(ni).reshape(sh1) / ni + S += cos(pi * t) + sh1[i] = 1 + + S2 = 2 * (d - S).ravel() + # N-D Discrete Cosine Transform of Y + dcty2 = dctn(y).ravel() ** 2 + + def score_fun(L, S2, dcty2): + # Generalized cross validation score + M = 1 - 1. / (1 + 10 ** L * S2) + noisevar = (dcty2 * M ** 2).mean() + return noisevar / M.mean() ** 2 + #fun = lambda x : score_fun(x, S2, dcty2) + Lopt = optimize.fminbound(score_fun, -38, 38, args=(S2, dcty2)) + M = 1.0 - 1.0 / (1 + 10 ** Lopt * S2) + noisevar = (dcty2 * M ** 2).mean() + return noisevar + + +def smoothn(data, s=None, weight=None, robust=False, z0=None, tolz=1e-3, + maxiter=100, fulloutput=False): + ''' + SMOOTHN fast and robust spline smoothing for 1-D to N-D data. + + Parameters + ---------- + data : array like + uniformly-sampled data array to smooth. Non finite values (NaN or Inf) + are treated as missing values. + s : real positive scalar + smooting parameter. The larger S is, the smoother the output will be. + Default value is automatically determined using the generalized + cross-validation (GCV) method. + weight : string or array weights + weighting array of real positive values, that must have the same size + as DATA. Note that a zero weight corresponds to a missing value. + robust : bool + If true carry out a robust smoothing that minimizes the influence of + outlying data. + tolz : real positive scalar + Termination tolerance on Z (default = 1e-3) + maxiter : scalar integer + Maximum number of iterations allowed (default = 100) + z0 : array-like + Initial value for the iterative process (default = original data) + + Returns + ------- + z : array like + smoothed data + + To be made + ---------- + Estimate the confidence bands (see Wahba 1983, Nychka 1988). + + Reference + --------- + Garcia D, Robust smoothing of gridded data in one and higher dimensions + with missing values. Computational Statistics & Data Analysis, 2010. + http://www.biomecardio.com/pageshtm/publi/csda10.pdf + + Examples: + -------- + + 1-D example + >>> import matplotlib.pyplot as plt + >>> x = np.linspace(0,100,2**8) + >>> y = np.cos(x/10)+(x/50)**2 + np.random.randn(*x.shape)/10 + >>> y[np.r_[70, 75, 80]] = np.array([5.5, 5, 6]) + >>> z = smoothn(y) # Regular smoothing + >>> zr = smoothn(y,robust=True) # Robust smoothing + >>> h=plt.subplot(121), + >>> h = plt.plot(x,y,'r.',x,z,'k',linewidth=2) + >>> h=plt.title('Regular smoothing') + >>> h=plt.subplot(122) + >>> h=plt.plot(x,y,'r.',x,zr,'k',linewidth=2) + >>> h=plt.title('Robust smoothing') + + 2-D example + >>> xp = np.r_[0:1:.02] + >>> [x,y] = np.meshgrid(xp,xp) + >>> f = np.exp(x+y) + np.sin((x-2*y)*3); + >>> fn = f + np.random.randn(*f.shape)*0.5; + >>> fs = smoothn(fn); + >>> h=plt.subplot(121), + >>> h=plt.contourf(xp,xp,fn) + >>> h=plt.subplot(122) + >>> h=plt.contourf(xp,xp,fs) + + 2-D example with missing data + n = 256; + y0 = peaks(n); + y = y0 + rand(size(y0))*2; + I = randperm(n^2); + y(I(1:n^2*0.5)) = NaN; lose 1/2 of data + y(40:90,140:190) = NaN; create a hole + z = smoothn(y); smooth data + subplot(2,2,1:2), imagesc(y), axis equal off + title('Noisy corrupt data') + subplot(223), imagesc(z), axis equal off + title('Recovered data ...') + subplot(224), imagesc(y0), axis equal off + title('... compared with original data') + + 3-D example + [x,y,z] = meshgrid(-2:.2:2); + xslice = [-0.8,1]; yslice = 2; zslice = [-2,0]; + vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06; + subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic') + title('Noisy data') + v = smoothn(vn); + subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic') + title('Smoothed data') + + Cardioid + + t = linspace(0,2*pi,1000); + x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1; + y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1; + z = smoothn(complex(x,y)); + plot(x,y,'r.',real(z),imag(z),'k','linewidth',2) + axis equal tight + + Cellular vortical flow + [x,y] = meshgrid(linspace(0,1,24)); + Vx = cos(2*pi*x+pi/2).*cos(2*pi*y); + Vy = sin(2*pi*x+pi/2).*sin(2*pi*y); + Vx = Vx + sqrt(0.05)*randn(24,24); adding Gaussian noise + Vy = Vy + sqrt(0.05)*randn(24,24); adding Gaussian noise + I = randperm(numel(Vx)); + Vx(I(1:30)) = (rand(30,1)-0.5)*5; adding outliers + Vy(I(1:30)) = (rand(30,1)-0.5)*5; adding outliers + Vx(I(31:60)) = NaN; missing values + Vy(I(31:60)) = NaN; missing values + Vs = smoothn(complex(Vx,Vy),'robust'); automatic smoothing + subplot(121), quiver(x,y,Vx,Vy,2.5), axis square + title('Noisy velocity field') + subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square + title('Smoothed velocity field') + + See also SMOOTH, SMOOTH3, DCTN, IDCTN. + + -- Damien Garcia -- 2009/03, revised 2010/11 + Visit + http://www.biomecardio.com/matlab/smoothn.html + for more details about SMOOTHN + ''' + + y = np.atleast_1d(data) + sizy = y.shape + noe = y.size + if noe < 2: + return data + + weightstr = 'bisquare' + W = np.ones(sizy) + # Smoothness parameter and weights + if weight is None: + pass + elif isinstance(weight, str): + weightstr = weight.lower() + else: + W = weight + + # Weights. Zero weights are assigned to not finite values (Inf or NaN), + # (Inf/NaN values = missing data). + IsFinite = np.isfinite(y) + nof = IsFinite.sum() # number of finite elements + W = W * IsFinite + if (W < 0).any(): + raise ValueError('Weights must all be >=0') + else: + W = W / W.max() + + # Weighted or missing data? + isweighted = (W < 1).any() + + # Automatic smoothing? + isauto = s is None + # Creation of the Lambda tensor + # Lambda contains the eingenvalues of the difference matrix used in this + # penalized least squares process. + d = y.ndim + Lambda = np.zeros(sizy) + siz0 = [1, ] * d + for i in range(d): + siz0[i] = sizy[i] + Lambda = Lambda + \ + np.cos(pi * np.arange(sizy[i]) / sizy[i]).reshape(siz0) + siz0[i] = 1 + + Lambda = -2 * (d - Lambda) + if not isauto: + Gamma = 1. / (1 + s * Lambda ** 2) + + # Upper and lower bound for the smoothness parameter + # The average leverage (h) is by definition in [0 1]. Weak smoothing occurs + # if h is close to 1, while over-smoothing appears when h is near 0. Upper + # and lower bounds for h are given to avoid under- or over-smoothing. See + # equation relating h to the smoothness parameter (Equation #12 in the + # referenced CSDA paper). + N = (np.array(sizy) != 1).sum() # tensor rank of the y-array + hMin = 1e-6 + hMax = 0.99 + sMinBnd = (((1 + sqrt(1 + 8 * hMax ** (2. / N))) / 4. / + hMax ** (2. / N)) ** 2 - 1) / 16 + sMaxBnd = (((1 + sqrt(1 + 8 * hMin ** (2. / N))) / 4. / + hMin ** (2. / N)) ** 2 - 1) / 16 + + # Initialize before iterating + + Wtot = W + # Initial conditions for z + if isweighted: + # With weighted/missing data + # An initial guess is provided to ensure faster convergence. For that + # purpose, a nearest neighbor interpolation followed by a coarse + # smoothing are performed. + + if z0 is None: + z = InitialGuess(y, IsFinite) + else: + # an initial guess (z0) has been provided + z = z0 + else: + z = np.zeros(sizy) + z0 = z + y[~IsFinite] = 0 # arbitrary values for missing y-data + + tol = 1 + RobustIterativeProcess = True + RobustStep = 1 + + # Error on p. Smoothness parameter s = 10^p + errp = 0.1 + + # Relaxation factor RF: to speedup convergence + RF = 1 + 0.75 if weight is None else 1.0 + + norm = linalg.norm + # Main iterative process + while RobustIterativeProcess: + # "amount" of weights (see the function GCVscore) + aow = Wtot.sum() / noe # 0 < aow <= 1 + exitflag = True + for nit in range(1, maxiter + 1): + DCTy = dctn(Wtot * (y - z) + z) + if isauto and not np.remainder(np.log2(nit), 1): + + # The generalized cross-validation (GCV) method is used. + # We seek the smoothing parameter s that minimizes the GCV + # score i.e. s = Argmin(GCVscore). + # Because this process is time-consuming, it is performed from + # time to time (when nit is a power of 2) + log10s = optimize.fminbound( + gcv, np.log10(sMinBnd), np.log10(sMaxBnd), + args=(aow, Lambda, DCTy, y, Wtot, IsFinite, nof, noe), + xtol=errp, full_output=False, disp=False) + s = 10 ** log10s + Gamma = 1.0 / (1 + s * Lambda ** 2) + z = RF * idctn(Gamma * DCTy) + (1 - RF) * z + + # if no weighted/missing data => tol=0 (no iteration) + tol = norm(z0.ravel() - z.ravel()) / norm( + z.ravel()) if isweighted else 0.0 + if tol <= tolz: + break + z0 = z # re-initialization + else: + exitflag = False # nit 0.9: # aow = 1 means that all of the data are equally weighted + # very much faster: does not require any inverse DCT + RSS = linalg.norm(DCTy.ravel() * (Gamma.ravel() - 1)) ** 2 + else: + # take account of the weights to calculate RSS: + yhat = idctn(Gamma * DCTy) + RSS = linalg.norm( + sqrt(Wtot[IsFinite]) * (y[IsFinite] - yhat[IsFinite])) ** 2 + # end + + TrH = Gamma.sum() + GCVscore = RSS / nof / (1.0 - TrH / noe) ** 2 + return GCVscore + + +# Robust weights +def RobustWeights(r, I, h, wstr): + # weights for robust smoothing. + MAD = np.median(abs(r[I] - np.median(r[I]))) # median absolute deviation + u = abs(r / (1.4826 * MAD) / sqrt(1 - h)) # studentized residuals + if wstr == 'cauchy': + c = 2.385 + W = 1. / (1 + (u / c) ** 2) # Cauchy weights + elif wstr == 'talworth': + c = 2.795 + W = u < c # Talworth weights + else: # bisquare weights + c = 4.685 + W = (1 - (u / c) ** 2) ** 2 * ((u / c) < 1) + + W[np.isnan(W)] = 0 + return W + +# Initial Guess with weighted/missing data + + +def InitialGuess(y, I): + # nearest neighbor interpolation (in case of missing values) + z = y + if (1 - I).any(): + + if True: # license('test','image_toolbox') + notI = ~I + z, L = distance_transform_edt(notI, return_indices=True) + #[z,L] = bwdist(I); + z[notI] = y[L.flat[notI]] + else: + #% If BWDIST does not exist, NaN values are all replaced with the + #% same scalar. The initial guess is not optimal and a warning + #% message thus appears. + z[1 - I] = y[I].mean() + + # coarse fast smoothing using one-tenth of the DCT coefficients + siz = z.shape + d = z.ndim + z = dctn(z) + for k in range(d): + z[int((siz[k] + 0.5) / 10) + 1::, ...] = 0 + z = z.reshape(np.roll(siz, -k)) + z = z.transpose(np.roll(range(z.ndim), -1)) + #z = shiftdim(z,1); + # end + z = idctn(z) + + return z + + +def test_smoothn_1d(): + x = np.linspace(0, 100, 2 ** 8) + y = np.cos(x / 10) + (x / 50) ** 2 + np.random.randn(x.size) / 10 + y[np.r_[70, 75, 80]] = np.array([5.5, 5, 6]) + z = smoothn(y) # Regular smoothing + zr = smoothn(y, robust=True) # Robust smoothing + plt.subplot(121), + unused_h = plt.plot(x, y, 'r.', x, z, 'k', linewidth=2) + plt.title('Regular smoothing') + plt.subplot(122) + plt.plot(x, y, 'r.', x, zr, 'k', linewidth=2) + plt.title('Robust smoothing') + plt.show() + + +def test_smoothn_2d(): + + #import mayavi.mlab as plt + xp = np.r_[0:1:.02] + [x, y] = np.meshgrid(xp, xp) + f = np.exp(x + y) + np.sin((x - 2 * y) * 3) + fn = f + np.random.randn(*f.shape) * 0.5 + fs, s = smoothn(fn, fulloutput=True) # @UnusedVariable + fs2 = smoothn(fn, s=2 * s) + plt.subplot(131), + plt.contourf(xp, xp, fn) + plt.subplot(132), + plt.contourf(xp, xp, fs2) + plt.subplot(133), + plt.contourf(xp, xp, f) + plt.show() + + +def test_smoothn_cardioid(): + t = np.linspace(0, 2 * pi, 1000) + cos = np.cos + sin = np.sin + randn = np.random.randn + x = 2 * cos(t) * (1 - cos(t)) + randn(t.size) * 0.1 + y = 2 * sin(t) * (1 - cos(t)) + randn(t.size) * 0.1 + z = smoothn(x + 1j * y) + plt.plot(x, y, 'r.', z.real, z.imag, 'k', linewidth=2) + plt.show() + + +def kde_demo1(): + ''' + KDEDEMO1 Demonstrate the smoothing parameter impact on KDE + + KDEDEMO1 shows the true density (dotted) compared to KDE based on 7 + observations (solid) and their individual kernels (dashed) for 3 + different values of the smoothing parameter, hs. + ''' + + import scipy.stats as st + x = np.linspace(-4, 4, 101) + x0 = x / 2.0 + data = np.random.normal(loc=0, scale=1.0, size=7) + kernel = Kernel('gauss') + hs = kernel.hns(data) + hVec = [hs / 2, hs, 2 * hs] + + for ix, h in enumerate(hVec): + plt.figure(ix) + kde = KDE(data, hs=h, kernel=kernel) + f2 = kde(x, output='plot', title='h_s = %2.2f' % h, ylab='Density') + f2.plot('k-') + + plt.plot(x, st.norm.pdf(x, 0, 1), 'k:') + n = len(data) + plt.plot(data, np.zeros(data.shape), 'bx') + y = kernel(x0) / (n * h * kernel.norm_factor(d=1, n=n)) + for i in range(n): + plt.plot(data[i] + x0 * h, y, 'b--') + plt.plot([data[i], data[i]], [0, np.max(y)], 'b') + + plt.axis([x.min(), x.max(), 0, 0.5]) + + +def kde_demo2(): + '''Demonstrate the difference between transformation- and ordinary-KDE. + + KDEDEMO2 shows that the transformation KDE is a better estimate for + Rayleigh distributed data around 0 than the ordinary KDE. + ''' + import scipy.stats as st + data = st.rayleigh.rvs(scale=1, size=300) + + x = np.linspace(1.5e-2, 5, 55) + + kde = KDE(data) + f = kde(output='plot', title='Ordinary KDE (hs=%g)' % kde.hs) + plt.figure(0) + f.plot() + + plt.plot(x, st.rayleigh.pdf(x, scale=1), ':') + + # plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable + + tkde = TKDE(data, L2=0.5) + ft = tkde(x, output='plot', title='Transformation KDE (hs=%g)' % + tkde.tkde.hs) + plt.figure(1) + ft.plot() + + plt.plot(x, st.rayleigh.pdf(x, scale=1), ':') + + plt.figure(0) + + +def kde_demo3(): + '''Demonstrate the difference between transformation and ordinary-KDE in 2D + + KDEDEMO3 shows that the transformation KDE is a better estimate for + Rayleigh distributed data around 0 than the ordinary KDE. + ''' + import scipy.stats as st + data = st.rayleigh.rvs(scale=1, size=(2, 300)) + + #x = np.linspace(1.5e-3, 5, 55) + + kde = KDE(data) + f = kde(output='plot', title='Ordinary KDE', plotflag=1) + plt.figure(0) + f.plot() + + plt.plot(data[0], data[1], '.') + + # plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable + + tkde = TKDE(data, L2=0.5) + ft = tkde.eval_grid_fast( + output='plot', title='Transformation KDE', plotflag=1) + + plt.figure(1) + ft.plot() + + plt.plot(data[0], data[1], '.') + + plt.figure(0) + + +def kde_demo4(N=50): + '''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior + for 1D multimodal distributions + + KDEDEMO4 shows that the improved Sheather-Jones plug-in smoothing is a + better compared to normal reference rules (in this case the hns) + ''' + import scipy.stats as st + + data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(N,)), + st.norm.rvs(loc=-5, scale=1, size=(N,)))) + + #x = np.linspace(1.5e-3, 5, 55) + + kde = KDE(data, kernel=Kernel('gauss', 'hns')) + f = kde(output='plot', title='Ordinary KDE', plotflag=1) + + kde1 = KDE(data, kernel=Kernel('gauss', 'hisj')) + f1 = kde1(output='plot', label='Ordinary KDE', plotflag=1) + + plt.figure(0) + f.plot('r', label='hns=%g' % kde.hs) + # plt.figure(2) + f1.plot('b', label='hisj=%g' % kde1.hs) + x = np.linspace(-4, 4) + for loc in [-5, 5]: + plt.plot(x + loc, st.norm.pdf(x, 0, scale=1) + / 2, 'k:', label='True density') + plt.legend() + + +def kde_demo5(N=500): + '''Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior + for 2D multimodal distributions + + KDEDEMO5 shows that the improved Sheather-Jones plug-in smoothing is better + compared to normal reference rules (in this case the hns) + ''' + import scipy.stats as st + + data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(2, N,)), + st.norm.rvs(loc=-5, scale=1, size=(2, N,)))) + kde = KDE(data, kernel=Kernel('gauss', 'hns')) + f = kde(output='plot', title='Ordinary KDE (hns=%g %g)' % + tuple(kde.hs.tolist()), plotflag=1) + + kde1 = KDE(data, kernel=Kernel('gauss', 'hisj')) + f1 = kde1(output='plot', title='Ordinary KDE (hisj=%g %g)' % + tuple(kde1.hs.tolist()), plotflag=1) + + plt.figure(0) + plt.clf() + f.plot() + plt.plot(data[0], data[1], '.') + plt.figure(1) + plt.clf() + f1.plot() + plt.plot(data[0], data[1], '.') + + +def kreg_demo1(hs=None, fast=False, fun='hisj'): + ''' + ''' + N = 100 + #ei = np.random.normal(loc=0, scale=0.075, size=(N,)) + ei = np.array( + [-0.08508516, 0.10462496, 0.07694448, -0.03080661, 0.05777525, + 0.06096313, -0.16572389, 0.01838912, - + 0.06251845, -0.09186784, + -0.04304887, -0.13365788, - + 0.0185279, -0.07289167, 0.02319097, + 0.06887854, -0.08938374, - + 0.15181813, 0.03307712, 0.08523183, + -0.0378058, - + 0.06312874, 0.01485772, 0.06307944, -0.0632959, + 0.18963205, 0.0369126, - + 0.01485447, 0.04037722, 0.0085057, + -0.06912903, 0.02073998, 0.1174351, 0.17599277, - + 0.06842139, + 0.12587608, 0.07698113, - + 0.0032394, -0.12045792, -0.03132877, + 0.05047314, 0.02013453, 0.04080741, 0.00158392, 0.10237899, + -0.09069682, 0.09242174, - + 0.15445323, 0.09190278, 0.07138498, + 0.03002497, 0.02495252, 0.01286942, 0.06449978, 0.03031802, + 0.11754861, -0.02322272, 0.00455867, - + 0.02132251, 0.09119446, + -0.03210086, - + 0.06509545, 0.07306443, 0.04330647, 0.078111, + -0.04146907, 0.05705476, 0.02492201, - + 0.03200572, -0.02859788, + - + 0.05893749, 0.00089538, 0.0432551, 0.04001474, 0.04888828, + - + 0.17708392, 0.16478644, 0.1171006, 0.11664846, 0.01410477, + -0.12458953, -0.11692081, 0.0413047, - + 0.09292439, -0.07042327, + 0.14119701, -0.05114335, 0.04994696, - + 0.09520663, 0.04829406, + -0.01603065, - + 0.1933216, 0.19352763, 0.11819496, 0.04567619, + -0.08348306, 0.00812816, -0.00908206, 0.14528945, 0.02901065]) + x = np.linspace(0, 1, N) + + y0 = 2 * np.exp(-x ** 2 / (2 * 0.3 ** 2)) + \ + 3 * np.exp(-(x - 1) ** 2 / (2 * 0.7 ** 2)) + y = y0 + ei + kernel = Kernel('gauss', fun=fun) + hopt = kernel.hisj(x) + kreg = KRegression( + x, y, p=0, hs=hs, kernel=kernel, xmin=-2 * hopt, xmax=1 + 2 * hopt) + if fast: + kreg.__call__ = kreg.eval_grid_fast + + f = kreg(output='plot', title='Kernel regression', plotflag=1) + plt.figure(0) + f.plot(label='p=0') + + kreg.p = 1 + f1 = kreg(output='plot', title='Kernel regression', plotflag=1) + f1.plot(label='p=1') + # print(f1.data) + plt.plot(x, y, '.', label='data') + plt.plot(x, y0, 'k', label='True model') + plt.legend() + + plt.show() + + print(kreg.tkde.tkde.inv_hs) + print(kreg.tkde.tkde.hs) + +_REALMIN = np.finfo(float).machar.xmin +_REALMAX = np.finfo(float).machar.xmax +_EPS = np.finfo(float).eps + + +def _logit(p): + pc = p.clip(min=0, max=1) + return (np.log(pc) - np.log1p(-pc)).clip(min=-40, max=40) + + +def _logitinv(x): + return 1.0 / (np.exp(-x) + 1) + + +def _get_data(n=100, symmetric=False, loc1=1.1, scale1=0.6, scale2=1.0): + import scipy.stats as st + #from sg_filter import SavitzkyGolay + dist = st.norm + + norm1 = scale2 * \ + (dist.pdf(-loc1, loc=-loc1, scale=scale1) + + dist.pdf(-loc1, loc=loc1, scale=scale1)) + fun1 = lambda x: ((dist.pdf(x, loc=-loc1, scale=scale1) + + dist.pdf(x, loc=loc1, scale=scale1)) / + norm1).clip(max=1.0) + + x = np.sort(6 * np.random.rand(n, 1) - 3, axis=0) + + y = (fun1(x) > np.random.rand(n, 1)).ravel() + #y = (np.cos(x)>2*np.random.rand(n, 1)-1).ravel() + x = x.ravel() + + if symmetric: + xi = np.hstack((x.ravel(), -x.ravel())) + yi = np.hstack((y, y)) + i = np.argsort(xi) + x = xi[i] + y = yi[i] + return x, y, fun1 + + +def kreg_demo2(n=100, hs=None, symmetric=False, fun='hisj', plotlog=False): + x, y, fun1 = _get_data(n, symmetric) + kreg_demo3(x, y, fun1, hs=None, fun='hisj', plotlog=False) + + +def kreg_demo3(x, y, fun1, hs=None, fun='hisj', plotlog=False): + st = stats + + alpha = 0.1 + z0 = -_invnorm(alpha / 2) + + n = x.size + hopt, hs1, hs2 = _get_regression_smooting(x, y, fun='hos') + if hs is None: + hs = hopt + + forward = _logit + reverse = _logitinv + #forward = np.log + #reverse = np.exp + + xmin, xmax = x.min(), x.max() + ni = max(2 * int((xmax - xmin) / hopt) + 3, 5) + print(ni) + print(xmin, xmax) + sml = hopt * 0.1 + xi = np.linspace(xmin - sml, xmax + sml, ni) + xiii = np.linspace(xmin - sml, xmax + sml, 4 * ni + 1) + + c = gridcount(x, xi) + if (y == True).any(): + c0 = gridcount(x[y == True], xi) + else: + c0 = np.zeros(xi.shape) + yi = np.where(c == 0, 0, c0 / c) + + kreg = KRegression(x, y, hs=hs, p=0) + fiii = kreg(xiii) + yiii = interpolate.interp1d(xi, yi)(xiii) + fit = fun1(xiii).clip(max=1.0) + df = np.diff(fiii) + eerr = np.abs((yiii - fiii)).std() + 0.5 * (df[:-1] * df[1:] < 0).sum() / n + err = (fiii - fit).std() + f = kreg( + xiii, output='plotobj', title='%s err=%1.3f,eerr=%1.3f, n=%d, hs=%1.3f, hs1=%1.3f, hs2=%1.3f' % + (fun, err, eerr, n, hs, hs1, hs2), plotflag=1) + + #yi[yi==0] = 1.0/(c[c!=0].min()+4) + #yi[yi==1] = 1-1.0/(c[c!=0].min()+4) + #yi[yi==0] = fi[yi==0] + #yi[yi==0] = np.exp(stineman_interp(xi[yi==0], xi[yi>0],np.log(yi[yi>0]))) + #yi[yi==0] = fun1(xi[yi==0]) + try: + yi[yi == 0] = yi[yi > 0].min() / sqrt(n) + except: + yi[yi == 0] = 1. / n + yi[yi == 1] = 1 - (1 - yi[yi < 1].max()) / sqrt(n) + + logity = forward(yi) + + gkreg = KRegression(xi, logity, hs=hs, xmin=xmin - hopt, xmax=xmax + hopt) + fg = gkreg.eval_grid( + xi, output='plotobj', title='Kernel regression', plotflag=1) + sa = (fg.data - logity).std() + sa2 = iqrange(fg.data - logity) / 1.349 + #print('sa=%g %g' % (sa, sa2)) + sa = min(sa, sa2) + +# plt.figure(1) +# plt.plot(xi, slogity-logity,'r.') +# plt.plot(xi, logity-,'b.') +# plt.plot(xi, fg.data-logity, 'b.') +# plt.show() +# return + + fg = gkreg.eval_grid( + xiii, output='plotobj', title='Kernel regression', plotflag=1) + pi = reverse(fg.data) + + dx = xi[1] - xi[0] + ckreg = KDE(x, hs=hs) + #ci = ckreg.eval_grid_fast(xi)*n*dx + ciii = ckreg.eval_grid_fast(xiii) * dx * x.size # n*(1+symmetric) + +# sa1 = np.sqrt(1./(ciii*pi*(1-pi))) +# plo3 = reverse(fg.data-z0*sa) +# pup3 = reverse(fg.data+z0*sa) + fg.data = pi + pi = f.data + + # ref Casella and Berger (1990) "Statistical inference" pp444 +# a = 2*pi + z0**2/(ciii+1e-16) +# b = 2*(1+z0**2/(ciii+1e-16)) +# plo2 = ((a-sqrt(a**2-2*pi**2*b))/b).clip(min=0,max=1) +# pup2 = ((a+sqrt(a**2-2*pi**2*b))/b).clip(min=0,max=1) + # Jeffreys intervall a=b=0.5 + #st.beta.isf(alpha/2, x+a, n-x+b) + ab = 0.07 # 0.055 + pi1 = pi # fun1(xiii) + pup2 = np.where(pi == 1, 1, st.beta.isf( + alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab)) + plo2 = np.where(pi == 0, 0, st.beta.isf( + 1 - alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab)) + + averr = np.trapz(pup2 - plo2, xiii) / ( + xiii[-1] - xiii[0]) + 0.5 * (df[:-1] * df[1:] < 0).sum() + + #f2 = kreg_demo4(x, y, hs, hopt) + # Wilson score + den = 1 + (z0 ** 2. / ciii) + xc = (pi1 + (z0 ** 2) / (2 * ciii)) / den + halfwidth = ( + z0 * sqrt((pi1 * (1 - pi1) / ciii) + (z0 ** 2 / (4 * (ciii ** 2))))) / den + plo = (xc - halfwidth).clip(min=0) # wilson score + pup = (xc + halfwidth).clip(max=1.0) # wilson score + # pup = (pi + z0*np.sqrt(pi*(1-pi)/ciii)).clip(min=0,max=1) # dont use + #plo = (pi - z0*np.sqrt(pi*(1-pi)/ciii)).clip(min=0,max=1) + + #mi = kreg.eval_grid(x) + #sigma = (stineman_interp(x, xiii, pup)-stineman_interp(x, xiii, plo))/4 + #aic = np.abs((y-mi)/sigma).std()+ 0.5*(df[:-1]*df[1:]<0).sum()/n + #aic = np.abs((yiii-fiii)/(pup-plo)).std()+ 0.5*(df[:-1]*df[1:]<0).sum() + ((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum() + + k = (df[:-1] * df[1:] < 0).sum() # numpeaks + sigmai = (pup - plo) + aic = (((yiii - fiii) / sigmai) ** 2).sum() + 2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \ + np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum() + + #aic = (((yiii-fiii)/sigmai)**2).sum()+ 2*k*(k+1)/(ni-k+1) + np.abs((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum() + + #aic = averr + ((yiii-pup).clip(min=0)-(yiii-plo).clip(max=0)).sum() + + fg.plot(label='KReg grid aic=%2.3f' % (aic)) + f.plot(label='KReg averr=%2.3f ' % (averr)) + labtxt = '%d CI' % (int(100 * (1 - alpha))) + plt.fill_between(xiii, pup, plo, alpha=0.20, + color='r', linestyle='--', label=labtxt) + plt.fill_between(xiii, pup2, plo2, alpha=0.20, color='b', + linestyle=':', label='%d CI2' % (int(100 * (1 - alpha)))) + plt.plot(xiii, fun1(xiii), 'r', label='True model') + plt.scatter(xi, yi, label='data') + print('maxp = %g' % (np.nanmax(f.data))) + print('hs = %g' % (kreg.tkde.tkde.hs)) + plt.legend() + h = plt.gca() + if plotlog: + plt.setp(h, yscale='log') + # plt.show() + return hs1, hs2 + + +def kreg_demo4(x, y, hs, hopt, alpha=0.05): + st = stats + + n = x.size + xmin, xmax = x.min(), x.max() + ni = max(2 * int((xmax - xmin) / hopt) + 3, 5) + + sml = hopt * 0.1 + xi = np.linspace(xmin - sml, xmax + sml, ni) + xiii = np.linspace(xmin - sml, xmax + sml, 4 * ni + 1) + + kreg = KRegression(x, y, hs=hs, p=0) + + dx = xi[1] - xi[0] + ciii = kreg.tkde.eval_grid_fast(xiii) * dx * x.size +# ckreg = KDE(x,hs=hs) +# ciiii = ckreg.eval_grid_fast(xiii)*dx* x.size #n*(1+symmetric) + + f = kreg(xiii, output='plotobj') # , plot_kwds=dict(plotflag=7)) + pi = f.data + + # Jeffreys intervall a=b=0.5 + #st.beta.isf(alpha/2, x+a, n-x+b) + ab = 0.07 # 0.5 + pi1 = pi + pup = np.where(pi1 == 1, 1, st.beta.isf( + alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab)) + plo = np.where(pi1 == 0, 0, st.beta.isf( + 1 - alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab)) + + # Wilson score + # z0 = -_invnorm(alpha/2) +# den = 1+(z0**2./ciii); +# xc=(pi1+(z0**2)/(2*ciii))/den; +# halfwidth=(z0*sqrt((pi1*(1-pi1)/ciii)+(z0**2/(4*(ciii**2)))))/den +# plo2 = (xc-halfwidth).clip(min=0) # wilson score +# pup2 = (xc+halfwidth).clip(max=1.0) # wilson score + #f.dataCI = np.vstack((plo,pup)).T + f.prediction_error_avg = np.trapz(pup - plo, xiii) / (xiii[-1] - xiii[0]) + fiii = f.data + + c = gridcount(x, xi) + if (y == True).any(): + c0 = gridcount(x[y == True], xi) + else: + c0 = np.zeros(xi.shape) + yi = np.where(c == 0, 0, c0 / c) + + f.children = [PlotData( + [plo, pup], xiii, plotmethod='fill_between', plot_kwds=dict(alpha=0.2, color='r')), + PlotData(yi, xi, plotmethod='scatter', plot_kwds=dict(color='r', s=5))] + + yiii = interpolate.interp1d(xi, yi)(xiii) + df = np.diff(fiii) + k = (df[:-1] * df[1:] < 0).sum() # numpeaks + sigmai = (pup - plo) + aicc = (((yiii - fiii) / sigmai) ** 2).sum() + 2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \ + np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum() + + f.aicc = aicc + f.labels.title = 'perr=%1.3f,aicc=%1.3f, n=%d, hs=%1.3f' % ( + f.prediction_error_avg, aicc, n, hs) + + return f + + +def check_kreg_demo3(): + + plt.ion() + k = 0 + for n in [50, 100, 300, 600, 4000]: + x, y, fun1 = _get_data( + n, symmetric=True, loc1=1.0, scale1=0.6, scale2=1.25) + k0 = k + + for fun in ['hste', ]: + #@UnusedVariable + hsmax, hs1, hs2 = _get_regression_smooting(x, y, fun=fun) + for hi in np.linspace(hsmax * 0.25, hsmax, 9): + plt.figure(k) + k += 1 + unused = kreg_demo3(x, y, fun1, hs=hi, fun=fun, plotlog=False) + + #kreg_demo2(n=n,symmetric=True,fun='hste', plotlog=False) + fig.tile(range(k0, k)) + plt.ioff() + plt.show() + + +def check_kreg_demo4(): + plt.ion() + # test_docstrings() + # kde_demo2() + # kreg_demo1(fast=True) + # kde_gauss_demo() + #kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True) + k = 0 + for i, n in enumerate([100, 300, 600, 4000]): # @UnusedVariable + x, y, fun1 = _get_data( + n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75) + #k0 = k + hopt1, _h1, _h2 = _get_regression_smooting(x, y, fun='hos') + hopt2, _h1, _h2 = _get_regression_smooting(x, y, fun='hste') + hopt = sqrt(hopt1 * hopt2) + #hopt = _get_regression_smooting(x,y,fun='hos')[0] + # , 'hisj', 'hns', 'hstt' @UnusedVariable + for j, fun in enumerate(['hste']): + hsmax, _hs1, _hs2 = _get_regression_smooting(x, y, fun=fun) + + fmax = kreg_demo4(x, y, hsmax + 0.1, hopt) + for hi in np.linspace(hsmax * 0.1, hsmax, 55): + f = kreg_demo4(x, y, hi, hopt) + if f.aicc <= fmax.aicc: + fmax = f + plt.figure(k) + k += 1 + fmax.plot() + plt.plot(x, fun1(x), 'r') + + #kreg_demo2(n=n,symmetric=True,fun='hste', plotlog=False) + fig.tile(range(0, k)) + plt.ioff() + plt.show() + + +def check_regression_bin(): + plt.ion() + # test_docstrings() + # kde_demo2() + # kreg_demo1(fast=True) + # kde_gauss_demo() + #kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True) + k = 0 + for i, n in enumerate([100, 300, 600, 4000]): # @UnusedVariable + x, y, fun1 = _get_data( + n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75) + fbest = regressionbin(x, y, alpha=0.05, color='g', label='Transit_D') + + figk = plt.figure(k) + ax = figk.gca() + k += 1 + fbest.plot(axis=ax) + ax.plot(x, fun1(x), 'r') + ax.legend(frameon=False, markerscale=4) + #ax = plt.gca() + ax.set_yticklabels(ax.get_yticks() * 100.0) + ax.grid(True) + + fig.tile(range(0, k)) + plt.ioff() + plt.show() + + +def check_bkregression(): + plt.ion() + k = 0 + for i, n in enumerate([50, 100, 300, 600]): # @UnusedVariable + x, y, fun1 = _get_data( + n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75) + bkreg = BKRegression(x, y) + fbest = bkreg.prb_search_best( + hsfun='hste', alpha=0.05, color='g', label='Transit_D') + + figk = plt.figure(k) + ax = figk.gca() + k += 1 +# fbest.score.plot(axis=ax) +# axsize = ax.axis() +# ax.vlines(fbest.hs,axsize[2]+1,axsize[3]) +# ax.set(yscale='log') + fbest.plot(axis=ax) + ax.plot(x, fun1(x), 'r') + ax.legend(frameon=False, markerscale=4) + #ax = plt.gca() + ax.set_yticklabels(ax.get_yticks() * 100.0) + ax.grid(True) + + fig.tile(range(0, k)) + plt.ioff() + plt.show() + + +def _get_regression_smooting(x, y, fun='hste'): + hs1 = Kernel('gauss', fun=fun).get_smoothing(x) + #hx = np.median(np.abs(x-np.median(x)))/0.6745*(4.0/(3*n))**0.2 + if (y == True).any(): + hs2 = Kernel('gauss', fun=fun).get_smoothing(x[y == True]) + #hy = np.median(np.abs(y-np.mean(y)))/0.6745*(4.0/(3*n))**0.2 + else: + hs2 = 4 * hs1 + #hy = 4*hx + + #hy2 = Kernel('gauss', fun=fun).get_smoothing(y) + #kernel = Kernel('gauss',fun=fun) + #hopt = (hs1+2*hs2)/3 + # hopt = (hs1+4*hs2)/5 #kernel.get_smoothing(x) + #hopt = hs2 + hopt = sqrt(hs1 * hs2) + return hopt, hs1, hs2 + + +def empirical_bin_prb(x, y, hopt, color='r'): + ''' + Returns empirical binomial probabiltity + + Parameters + ---------- + x : ndarray + position ve + y : ndarray + binomial response variable (zeros and ones) + + Returns + ------- + P(x) : PlotData object + empirical probability + ''' + xmin, xmax = x.min(), x.max() + ni = max(2 * int((xmax - xmin) / hopt) + 3, 5) + + sml = hopt # *0.1 + xi = np.linspace(xmin - sml, xmax + sml, ni) + + c = gridcount(x, xi) + if (y == True).any(): + c0 = gridcount(x[y == True], xi) + else: + c0 = np.zeros(xi.shape) + yi = np.where(c == 0, 0, c0 / c) + return PlotData(yi, xi, plotmethod='scatter', plot_kwds=dict(color=color, s=5)) + + +def smoothed_bin_prb(x, y, hs, hopt, alpha=0.05, color='r', label='', bin_prb=None): + ''' + Parameters + ---------- + x,y + hs : smoothing parameter + hopt : spacing in empirical_bin_prb + alpha : confidence level + color : color of plot object + bin_prb : PlotData object with empirical bin prb + ''' + if bin_prb is None: + bin_prb = empirical_bin_prb(x, y, hopt, color) + + xi = bin_prb.args + yi = bin_prb.data + ni = len(xi) + dxi = xi[1] - xi[0] + + n = x.size + + xiii = np.linspace(xi[0], xi[-1], 10 * ni + 1) + + kreg = KRegression(x, y, hs=hs, p=0) + # expected number of data in each bin + ciii = kreg.tkde.eval_grid_fast(xiii) * dxi * n + + f = kreg(xiii, output='plotobj') # , plot_kwds=dict(plotflag=7)) + pi = f.data + + st = stats + # Jeffreys intervall a=b=0.5 + #st.beta.isf(alpha/2, x+a, n-x+b) + ab = 0.07 # 0.5 + pi1 = pi + pup = np.where(pi1 == 1, 1, st.beta.isf( + alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab)) + plo = np.where(pi1 == 0, 0, st.beta.isf( + 1 - alpha / 2, ciii * pi1 + ab, ciii * (1 - pi1) + ab)) + + # Wilson score + # z0 = -_invnorm(alpha/2) +# den = 1+(z0**2./ciii); +# xc=(pi1+(z0**2)/(2*ciii))/den; +# halfwidth=(z0*sqrt((pi1*(1-pi1)/ciii)+(z0**2/(4*(ciii**2)))))/den +# plo2 = (xc-halfwidth).clip(min=0) # wilson score +# pup2 = (xc+halfwidth).clip(max=1.0) # wilson score + #f.dataCI = np.vstack((plo,pup)).T + f.prediction_error_avg = np.trapz(pup - plo, xiii) / (xiii[-1] - xiii[0]) + fiii = f.data + + f.plot_kwds['color'] = color + f.plot_kwds['linewidth'] = 2 + if label: + f.plot_kwds['label'] = label + f.children = [PlotData( + [plo, pup], xiii, plotmethod='fill_between', plot_kwds=dict(alpha=0.2, color=color)), + bin_prb] + + yiii = interpolate.interp1d(xi, yi)(xiii) + df = np.diff(fiii) + k = (df[:-1] * df[1:] < 0).sum() # numpeaks + sigmai = (pup - plo) + aicc = (((yiii - fiii) / sigmai) ** 2).sum() + 2 * k * (k + 1) / np.maximum(ni - k + 1, 1) + \ + np.abs((yiii - pup).clip(min=0) - (yiii - plo).clip(max=0)).sum() + + f.aicc = aicc + f.fun = kreg + f.labels.title = 'perr=%1.3f,aicc=%1.3f, n=%d, hs=%1.3f' % ( + f.prediction_error_avg, aicc, n, hs) + + return f + + +def regressionbin(x, y, alpha=0.05, color='r', label=''): + ''' + Return kernel regression estimate for binomial data + + Parameters + ---------- + x : arraylike + positions + y : arraylike + of 0 and 1 + ''' + # @UnusedVariable + hopt1, h1, h2 = _get_regression_smooting(x, y, fun='hos') + # @UnusedVariable + hopt2, h1, h2 = _get_regression_smooting(x, y, fun='hste') + hopt = sqrt(hopt1 * hopt2) + + fbest = smoothed_bin_prb(x, y, hopt2 + 0.1, hopt, alpha, color, label) + bin_prb = fbest.children[-1] + for fun in ['hste']: # , 'hisj', 'hns', 'hstt' + #@UnusedVariable + hsmax, hs1, hs2 = _get_regression_smooting(x, y, fun=fun) + for hi in np.linspace(hsmax * 0.1, hsmax, 55): + f = smoothed_bin_prb(x, y, hi, hopt, alpha, color, label, bin_prb) + if f.aicc <= fbest.aicc: + fbest = f + #hbest = hi + return fbest + + +def kde_gauss_demo(n=50): + ''' + KDEDEMO Demonstrate the KDEgauss + + KDEDEMO1 shows the true density (dotted) compared to KDE based on 7 + observations (solid) and their individual kernels (dashed) for 3 + different values of the smoothing parameter, hs. + ''' + + st = stats + #x = np.linspace(-4, 4, 101) + #data = np.random.normal(loc=0, scale=1.0, size=n) + #data = np.random.exponential(scale=1.0, size=n) +# n1 = 128 +# I = (np.arange(n1)*pi)**2 *0.01*0.5 +# kw = exp(-I) +# plt.plot(idctn(kw)) +# return + #dist = st.norm + dist = st.expon + data = dist.rvs(loc=0, scale=1.0, size=n) + d, N = np.atleast_2d(data).shape # @UnusedVariable + + if d == 1: + plot_options = [dict(color='red'), dict( + color='green'), dict(color='black')] + else: + plot_options = [dict(colors='red'), dict( + colors='green'), dict(colors='black')] + + plt.figure(1) + kde0 = KDE(data, kernel=Kernel('gauss', 'hste')) + f0 = kde0.eval_grid_fast(output='plot', ylab='Density') + f0.plot(**plot_options[0]) + + kde1 = TKDE(data, kernel=Kernel('gauss', 'hisj'), L2=.5) + f1 = kde1.eval_grid_fast(output='plot', ylab='Density') + f1.plot(**plot_options[1]) + + kde2 = KDEgauss(data) + f2 = kde2(output='plot', ylab='Density') + x = f2.args + f2.plot(**plot_options[2]) + + fmax = dist.pdf(x, 0, 1).max() + if d == 1: + plt.plot(x, dist.pdf(x, 0, 1), 'k:') + plt.axis([x.min(), x.max(), 0, fmax]) + plt.show() + print(fmax / f2.data.max()) + format_ = ''.join(('%g, ') * d) + format_ = 'hs0=%s hs1=%s hs2=%s' % (format_, format_, format_) + print( + format_ % tuple(kde0.hs.tolist() + kde1.tkde.hs.tolist() + kde2.hs.tolist())) + print('inc0 = %d, inc1 = %d, inc2 = %d' % (kde0.inc, kde1.inc, kde2.inc)) + + +def test_docstrings(): + import doctest + doctest.testmod() + +if __name__ == '__main__': + test_docstrings() + + # check_bkregression() + # check_regression_bin() + # check_kreg_demo3() + # check_kreg_demo4() + + + # test_smoothn_2d() + # test_smoothn_cardioid() + + + # kde_demo2() + # kreg_demo1(fast=True) + # kde_gauss_demo() + #kreg_demo2(n=120,symmetric=True,fun='hste', plotlog=True) diff --git a/pywafo/src/wafo/magic.py b/pywafo/src/wafo/magic.py index 2f2a1f2..0ee6c87 100644 --- a/pywafo/src/wafo/magic.py +++ b/pywafo/src/wafo/magic.py @@ -1,15 +1,16 @@ -# -*- coding: utf-8 -*- -""" -Created on Tue Apr 17 13:59:12 2012 - -@author: pab -""" -import numpy as np - -def magic(n): - ix = np.arange(n)+1 - J, I = np.meshgrid(ix,ix) - A = np.mod(I+J-(n+3)/2,n) - B = np.mod(I+2*J-2,n) - M = n*A + B + 1 - return M \ No newline at end of file +# -*- coding: utf-8 -*- +""" +Created on Tue Apr 17 13:59:12 2012 + +@author: pab +""" +import numpy as np + + +def magic(n): + ix = np.arange(n) + 1 + J, I = np.meshgrid(ix, ix) + A = np.mod(I + J - (n + 3) / 2, n) + B = np.mod(I + 2 * J - 2, n) + M = n * A + B + 1 + return M diff --git a/pywafo/src/wafo/meshgrid.py b/pywafo/src/wafo/meshgrid.py index 538cbd6..a72fb8a 100644 --- a/pywafo/src/wafo/meshgrid.py +++ b/pywafo/src/wafo/meshgrid.py @@ -1,133 +1,136 @@ -import numpy as np -def meshgrid(*xi, **kwargs): - """ - Return coordinate matrices from one or more coordinate vectors. - - Make N-D coordinate arrays for vectorized evaluations of - N-D scalar/vector fields over N-D grids, given - one-dimensional coordinate arrays x1, x2,..., xn. - - Parameters - ---------- - x1, x2,..., xn : array_like - 1-D arrays representing the coordinates of a grid. - indexing : 'xy' or 'ij' (optional) - cartesian ('xy', default) or matrix ('ij') indexing of output - sparse : True or False (default) (optional) - If True a sparse grid is returned in order to conserve memory. - copy : True (default) or False (optional) - If False a view into the original arrays are returned in order to - conserve memory. Please note that sparse=False, copy=False will likely - return non-contiguous arrays. Furthermore, more than one element of a - broadcasted array may refer to a single memory location. If you - need to write to the arrays, make copies first. - - Returns - ------- - X1, X2,..., XN : ndarray - For vectors `x1`, `x2`,..., 'xn' with lengths ``Ni=len(xi)`` , - return ``(N1, N2, N3,...Nn)`` shaped arrays if indexing='ij' - or ``(N2, N1, N3,...Nn)`` shaped arrays if indexing='xy' - with the elements of `xi` repeated to fill the matrix along - the first dimension for `x1`, the second for `x2` and so on. - - Notes - ----- - This function supports both indexing conventions through the indexing keyword - argument. Giving the string 'ij' returns a meshgrid with matrix indexing, - while 'xy' returns a meshgrid with Cartesian indexing. The difference is - illustrated by the following code snippet: - - xv, yv = meshgrid(x, y, sparse=False, indexing='ij') - for i in range(nx): - for j in range(ny): - # treat xv[i,j], yv[i,j] - - xv, yv = meshgrid(x, y, sparse=False, indexing='xy') - for i in range(nx): - for j in range(ny): - # treat xv[j,i], yv[j,i] - - See Also - -------- - index_tricks.mgrid : Construct a multi-dimensional "meshgrid" - using indexing notation. - index_tricks.ogrid : Construct an open multi-dimensional "meshgrid" - using indexing notation. - - Examples - -------- - >>> nx, ny = (3, 2) - >>> x = np.linspace(0, 1, nx) - >>> y = np.linspace(0, 1, ny) - >>> xv, yv = meshgrid(x, y) - >>> xv - array([[ 0. , 0.5, 1. ], - [ 0. , 0.5, 1. ]]) - >>> yv - array([[ 0., 0., 0.], - [ 1., 1., 1.]]) - >>> xv, yv = meshgrid(x, y, sparse=True) # make sparse output arrays - >>> xv - array([[ 0. , 0.5, 1. ]]) - >>> yv - array([[ 0.], - [ 1.]]) - - `meshgrid` is very useful to evaluate functions on a grid. - - >>> x = np.arange(-5, 5, 0.1) - >>> y = np.arange(-5, 5, 0.1) - >>> xx, yy = meshgrid(x, y, sparse=True) - >>> z = np.sin(xx**2+yy**2)/(xx**2+yy**2) - - >>> import matplotlib.pyplot as plt - >>> h = plt.contourf(x,y,z) - """ - copy_ = kwargs.get('copy', True) - args = np.atleast_1d(*xi) - ndim = len(args) - - if not isinstance(args, list) or ndim<2: - raise TypeError('meshgrid() takes 2 or more arguments (%d given)' % int(ndim>0)) - - sparse = kwargs.get('sparse', False) - indexing = kwargs.get('indexing', 'xy') - - s0 = (1,)*ndim - output = [x.reshape(s0[:i] + (-1,) + s0[i + 1::]) for i, x in enumerate(args)] - - shape = [x.size for x in output] - - if indexing == 'xy': - # switch first and second axis - output[0].shape = (1, -1) + (1,)*(ndim - 2) - output[1].shape = (-1, 1) + (1,)*(ndim - 2) - shape[0], shape[1] = shape[1], shape[0] - - if sparse: - if copy_: - return [x.copy() for x in output] - else: - return output - else: - # Return the full N-D matrix (not only the 1-D vector) - if copy_: - mult_fact = np.ones(shape, dtype=int) - return [x * mult_fact for x in output] - else: - return np.broadcast_arrays(*output) - - -def ndgrid(*args, **kwargs): - """ - Same as calling meshgrid with indexing='ij' (see meshgrid for - documentation). - """ - kwargs['indexing'] = 'ij' - return meshgrid(*args, **kwargs) - -if __name__ == '__main__': - import doctest - doctest.testmod() - +import numpy as np + + +def meshgrid(*xi, **kwargs): + """ + Return coordinate matrices from one or more coordinate vectors. + + Make N-D coordinate arrays for vectorized evaluations of + N-D scalar/vector fields over N-D grids, given + one-dimensional coordinate arrays x1, x2,..., xn. + + Parameters + ---------- + x1, x2,..., xn : array_like + 1-D arrays representing the coordinates of a grid. + indexing : 'xy' or 'ij' (optional) + cartesian ('xy', default) or matrix ('ij') indexing of output + sparse : True or False (default) (optional) + If True a sparse grid is returned in order to conserve memory. + copy : True (default) or False (optional) + If False a view into the original arrays are returned in order to + conserve memory. Please note that sparse=False, copy=False will likely + return non-contiguous arrays. Furthermore, more than one element of a + broadcasted array may refer to a single memory location. If you + need to write to the arrays, make copies first. + + Returns + ------- + X1, X2,..., XN : ndarray + For vectors `x1`, `x2`,..., 'xn' with lengths ``Ni=len(xi)`` , + return ``(N1, N2, N3,...Nn)`` shaped arrays if indexing='ij' + or ``(N2, N1, N3,...Nn)`` shaped arrays if indexing='xy' + with the elements of `xi` repeated to fill the matrix along + the first dimension for `x1`, the second for `x2` and so on. + + Notes + ----- + This function supports both indexing conventions through the indexing + keyword argument. Giving the string 'ij' returns a meshgrid with matrix + indexing, while 'xy' returns a meshgrid with Cartesian indexing. The + difference is illustrated by the following code snippet: + + xv, yv = meshgrid(x, y, sparse=False, indexing='ij') + for i in range(nx): + for j in range(ny): + # treat xv[i,j], yv[i,j] + + xv, yv = meshgrid(x, y, sparse=False, indexing='xy') + for i in range(nx): + for j in range(ny): + # treat xv[j,i], yv[j,i] + + See Also + -------- + index_tricks.mgrid : Construct a multi-dimensional "meshgrid" + using indexing notation. + index_tricks.ogrid : Construct an open multi-dimensional "meshgrid" + using indexing notation. + + Examples + -------- + >>> nx, ny = (3, 2) + >>> x = np.linspace(0, 1, nx) + >>> y = np.linspace(0, 1, ny) + >>> xv, yv = meshgrid(x, y) + >>> xv + array([[ 0. , 0.5, 1. ], + [ 0. , 0.5, 1. ]]) + >>> yv + array([[ 0., 0., 0.], + [ 1., 1., 1.]]) + >>> xv, yv = meshgrid(x, y, sparse=True) # make sparse output arrays + >>> xv + array([[ 0. , 0.5, 1. ]]) + >>> yv + array([[ 0.], + [ 1.]]) + + `meshgrid` is very useful to evaluate functions on a grid. + + >>> x = np.arange(-5, 5, 0.1) + >>> y = np.arange(-5, 5, 0.1) + >>> xx, yy = meshgrid(x, y, sparse=True) + >>> z = np.sin(xx**2+yy**2)/(xx**2+yy**2) + + >>> import matplotlib.pyplot as plt + >>> h = plt.contourf(x,y,z) + """ + copy_ = kwargs.get('copy', True) + args = np.atleast_1d(*xi) + ndim = len(args) + + if not isinstance(args, list) or ndim < 2: + raise TypeError( + 'meshgrid() takes 2 or more arguments (%d given)' % int(ndim > 0)) + + sparse = kwargs.get('sparse', False) + indexing = kwargs.get('indexing', 'xy') + + s0 = (1,) * ndim + output = [x.reshape(s0[:i] + (-1,) + s0[i + 1::]) + for i, x in enumerate(args)] + + shape = [x.size for x in output] + + if indexing == 'xy': + # switch first and second axis + output[0].shape = (1, -1) + (1,) * (ndim - 2) + output[1].shape = (-1, 1) + (1,) * (ndim - 2) + shape[0], shape[1] = shape[1], shape[0] + + if sparse: + if copy_: + return [x.copy() for x in output] + else: + return output + else: + # Return the full N-D matrix (not only the 1-D vector) + if copy_: + mult_fact = np.ones(shape, dtype=int) + return [x * mult_fact for x in output] + else: + return np.broadcast_arrays(*output) + + +def ndgrid(*args, **kwargs): + """ + Same as calling meshgrid with indexing='ij' (see meshgrid for + documentation). + """ + kwargs['indexing'] = 'ij' + return meshgrid(*args, **kwargs) + +if __name__ == '__main__': + import doctest + doctest.testmod() diff --git a/pywafo/src/wafo/misc.py b/pywafo/src/wafo/misc.py index e833c98..3c1f57c 100644 --- a/pywafo/src/wafo/misc.py +++ b/pywafo/src/wafo/misc.py @@ -1,2527 +1,2606 @@ -''' -Misc -''' -from __future__ import division - -import sys -import fractions -import numpy as np -from numpy import (abs, amax, any, logical_and, arange, linspace, atleast_1d, #atleast_2d, - array, asarray, broadcast_arrays, ceil, floor, frexp, hypot, - sqrt, arctan2, sin, cos, exp, log, mod, diff, empty_like, - finfo, inf, pi, interp, isnan, isscalar, zeros, ones, linalg, - r_, sign, unique, hstack, vstack, nonzero, where, extract) -from scipy.special import gammaln -from scipy.integrate import trapz, simps -import warnings -from plotbackend import plotbackend -from collections import OrderedDict - - -try: - import c_library as clib #@UnresolvedImport -except: - clib = None -floatinfo = finfo(float) - - -__all__ = ['is_numlike', 'JITImport', 'DotDict', 'Bunch', 'printf', 'sub_dict_select', - 'parse_kwargs', 'detrendma', 'ecross', 'findcross', - 'findextrema', 'findpeaks', 'findrfc', 'rfcfilter', 'findtp', 'findtc', - 'findoutliers', 'common_shape', 'argsreduce', - 'stirlerr', 'getshipchar', 'betaloge', 'gravity', 'nextpow2', - 'discretize', 'polar2cart', 'cart2polar', 'meshgrid', 'ndgrid', - 'trangood', 'tranproc', 'plot_histgrm', 'num2pistr', 'test_docstrings'] - - -def is_numlike(obj): - 'return true if *obj* looks like a number' - try: - obj + 1 - except TypeError: - return False - else: - return True - -class JITImport(object): - ''' - Just In Time Import of module - - Example - ------- - >>> np = JITImport('numpy') - >>> np.exp(0)==1.0 - True - ''' - def __init__(self, module_name): - self._module_name = module_name - self._module = None - def __getattr__(self, attr): - try: - return getattr(self._module, attr) - except: - if self._module is None: - self._module = __import__(self._module_name, None, None, ['*']) - #assert(isinstance(self._module, types.ModuleType), 'module') - return getattr(self._module, attr) - else: - raise - -class DotDict(dict): - ''' Implement dot access to dict values - - Example - ------- - >>> d = DotDict(test1=1,test2=3) - >>> d.test1 - 1 - ''' - __getattr__ = dict.__getitem__ - -class Bunch(object): - ''' Implement keyword argument initialization of class - - Example - ------- - >>> d = Bunch(test1=1,test2=3) - >>> d.test1 - 1 - ''' - def __init__(self, **kwargs): - self.__dict__.update(kwargs) - def keys(self): - return self.__dict__.keys() - def update(self, ** kwargs): - self.__dict__.update(kwargs) - -def printf(format, *args): #@ReservedAssignment - sys.stdout.write(format % args) - - -def sub_dict_select(somedict, somekeys): - ''' - Extracting a Subset from Dictionary - - Example - -------- - # Update options dict from keyword arguments if - # the keyword exists in options - >>> opt = dict(arg1=2, arg2=3) - >>> kwds = dict(arg2=100,arg3=1000) - >>> sub_dict = sub_dict_select(kwds,opt.keys()) - >>> opt.update(sub_dict) - >>> opt - {'arg1': 2, 'arg2': 100} - - See also - -------- - dict_intersection - ''' - #slower: validKeys = set(somedict).intersection(somekeys) - return dict((k, somedict[k]) for k in somekeys if k in somedict) - - -def parse_kwargs(options, **kwargs): - ''' Update options dict from keyword arguments if the keyword exists in options - - Example - >>> opt = dict(arg1=2, arg2=3) - >>> opt = parse_kwargs(opt,arg2=100) - >>> print opt - {'arg1': 2, 'arg2': 100} - >>> opt2 = dict(arg2=101) - >>> opt = parse_kwargs(opt,**opt2) - - See also sub_dict_select - ''' - - newopts = sub_dict_select(kwargs, options.keys()) - if len(newopts) > 0: - options.update(newopts) - return options - -def testfun(*args, **kwargs): - opts = dict(opt1=1, opt2=2) - if len(args) == 1 and len(kwargs) == 0 and type(args[0]) is str and args[0].startswith('default'): - return opts - opts = parse_kwargs(opts, **kwargs) - return opts - -def detrendma(x, L): - """ - Removes a trend from data using a moving average - of size 2*L+1. If 2*L+1 > len(x) then the mean is removed - - Parameters - ---------- - x : vector or matrix of column vectors - of data - L : scalar, integer - defines the size of the moving average window - - Returns - ------- - y : ndarray - detrended data - - Examples - -------- - >>> import wafo.misc as wm - >>> import pylab as plb - >>> exp = plb.exp; cos = plb.cos; randn = plb.randn - >>> x = plb.linspace(0,1,200) - >>> y = exp(x)+cos(5*2*pi*x)+1e-1*randn(x.size) - >>> y0 = wm.detrendma(y,20); tr = y-y0 - >>> h = plb.plot(x, y, x, y0, 'r', x, exp(x), 'k', x, tr, 'm') - - >>> plb.close('all') - - See also - -------- - Reconstruct - """ - - if L <= 0: - raise ValueError('L must be positive') - if L != round(L): - raise ValueError('L must be an integer') - - x1 = atleast_1d(x) - if x1.shape[0] == 1: - x1 = x1.ravel() - - n = x1.shape[0] - if n < 2 * L + 1: # only able to remove the mean - return x1 - x1.mean(axis=0) - - - mn = x1[0:2 * L + 1].mean(axis=0) - y = empty_like(x1) - y[0:L] = x1[0:L] - mn - - ix = r_[L:(n - L)] - trend = ((x1[ix + L] - x1[ix - L]) / (2 * L + 1)).cumsum(axis=0) + mn - y[ix] = x1[ix] - trend - y[n - L::] = x1[n - L::] - trend[-1] - return y - -def ecross(t, f, ind, v=0): - ''' - Extracts exact level v crossings - - ECROSS interpolates t and f linearly to find the exact level v - crossings, i.e., the points where f(t0) = v - - Parameters - ---------- - t,f : vectors - of arguments and functions values, respectively. - ind : ndarray of integers - indices to level v crossings as found by findcross. - v : scalar or vector (of size(ind)) - defining the level(s) to cross. - - Returns - ------- - t0 : vector - of exact level v crossings. - - Example - ------- - >>> from matplotlib import pylab as plb - >>> import wafo.misc as wm - >>> ones = plb.ones - >>> t = plb.linspace(0,7*plb.pi,250) - >>> x = plb.sin(t) - >>> ind = wm.findcross(x,0.75) - >>> ind - array([ 9, 25, 80, 97, 151, 168, 223, 239]) - >>> t0 = wm.ecross(t,x,ind,0.75) - >>> t0 - array([ 0.84910514, 2.2933879 , 7.13205663, 8.57630119, - 13.41484739, 14.85909194, 19.69776067, 21.14204343]) - >>> a = plb.plot(t, x, '.', t[ind], x[ind], 'r.', t, ones(t.shape)*0.75, - ... t0, ones(t0.shape)*0.75, 'g.') - - >>> plb.close('all') - - See also - -------- - findcross - ''' - # Tested on: Python 2.5 - # revised pab Feb2004 - # By pab 18.06.2001 - return t[ind] + (v - f[ind]) * (t[ind + 1] - t[ind]) / (f[ind + 1] - f[ind]) - -def _findcross(xn): - '''Return indices to zero up and downcrossings of a vector - ''' - if clib is not None: - ind, m = clib.findcross(xn, 0.0) - return ind[:m] - - n = len(xn) - iz, = (xn == 0).nonzero() - if len(iz) > 0: - # Trick to avoid turning points on the crossinglevel. - if iz[0] == 0: - if len(iz) == n: - warnings.warn('All values are equal to crossing level!') - return zeros(0, dtype=np.int) - - diz = diff(iz) - if len(diz) > 0 and (diz > 1).any(): - ix = iz[(diz > 1).argmax()] - else: - ix = iz[-1] - - #x(ix) is a up crossing if x(1:ix) = v and x(ix+1) > v. - #x(ix) is a downcrossing if x(1:ix) = v and x(ix+1) < v. - xn[0:ix + 1] = -xn[ix + 1] - iz = iz[ix + 1::] - - for ix in iz.tolist(): - xn[ix] = xn[ix - 1] - - #% indices to local level crossings ( without turningpoints) - ind, = (xn[:n - 1] * xn[1:] < 0).nonzero() - return ind - -def findcross(x, v=0.0, kind=None): - ''' - Return indices to level v up and/or downcrossings of a vector - - Parameters - ---------- - x : array_like - vector with sampled values. - v : scalar, real - level v. - kind : string - defines type of wave or crossing returned. Possible options are - 'dw' : downcrossing wave - 'uw' : upcrossing wave - 'cw' : crest wave - 'tw' : trough wave - 'd' : downcrossings only - 'u' : upcrossings only - None : All crossings will be returned - - Returns - ------- - ind : array-like - indices to the crossings in the original sequence x. - - Example - ------- - >>> from matplotlib import pylab as plb - >>> import wafo.misc as wm - >>> ones = plb.ones - >>> findcross([0, 1, -1, 1],0) - array([0, 1, 2]) - >>> v = 0.75 - >>> t = plb.linspace(0,7*plb.pi,250) - >>> x = plb.sin(t) - >>> ind = wm.findcross(x,v) # all crossings - >>> ind - array([ 9, 25, 80, 97, 151, 168, 223, 239]) - >>> t0 = plb.plot(t,x,'.',t[ind],x[ind],'r.', t, ones(t.shape)*v) - >>> ind2 = wm.findcross(x,v,'u') - >>> ind2 - array([ 9, 80, 151, 223]) - >>> t0 = plb.plot(t[ind2],x[ind2],'o') - >>> plb.close('all') - - See also - -------- - crossdef - wavedef - ''' - xn = np.int8(sign(atleast_1d(x).ravel() - v)) #@UndefinedVariable - ind = _findcross(xn) - if ind.size == 0: - warnings.warn('No level v = %0.5g crossings found in x' % v) - return ind - - if kind not in ('du', 'all', None): - if kind == 'd': #downcrossings only - t_0 = int(xn[ind[0] + 1] > 0) - ind = ind[t_0::2] - elif kind == 'u': #upcrossings only - t_0 = int(xn[ind[0] + 1] < 0) - ind = ind[t_0::2] - elif kind in ('dw', 'uw', 'tw', 'cw'): - #make sure that the first is a level v down-crossing if wdef=='dw' - #or make sure that the first is a level v up-crossing if wdef=='uw' - #make sure that the first is a level v down-crossing if wdef=='tw' - #or make sure that the first is a level v up-crossing if wdef=='cw' - xor = lambda a, b: a ^ b - first_is_down_crossing = int(xn[ind[0]] > xn[ind[0] + 1]) - if xor(first_is_down_crossing, kind in ('dw', 'tw')): - ind = ind[1::] - - n_c = ind.size # number of level v crossings - # make sure the number of troughs and crests are according to the - # wavedef, i.e., make sure length(ind) is odd if dw or uw - # and even if tw or cw - is_odd = mod(n_c, 2) - if xor(is_odd, kind in ('dw', 'uw')): - ind = ind[:-1] - else: - raise ValueError('Unknown wave/crossing definition!') - return ind - -def findextrema(x): - ''' - Return indices to minima and maxima of a vector - - Parameters - ---------- - x : vector with sampled values. - - Returns - ------- - ind : indices to minima and maxima in the original sequence x. - - Examples - -------- - >>> import numpy as np - >>> import pylab as pb - >>> import wafo.misc as wm - >>> t = np.linspace(0,7*np.pi,250) - >>> x = np.sin(t) - >>> ind = wm.findextrema(x) - >>> a = pb.plot(t,x,'.',t[ind],x[ind],'r.') - >>> pb.close('all') - - See also - -------- - findcross - crossdef - ''' - xn = atleast_1d(x).ravel() - return findcross(diff(xn), 0.0) + 1 -def findpeaks(data, n=2, min_h=None, min_p=0.0): - ''' - Find peaks of vector or matrix possibly rainflow filtered - - Parameters - ---------- - data = matrix or vector - n = The n highest peaks are found (if exist). (default 2) - min_h = The threshold in the rainflowfilter (default 0.05*range(S(:))). - A zero value will return all the peaks of S. - min_p = 0..1, Only the peaks that are higher than - min_p*max(max(S)) min_p*(the largest peak in S) - are returned (default 0). - Returns - ix = - linear index to peaks of S - - Example: - - Find highest 8 peaks that are not - less that 0.3*"global max" and have - rainflow amplitude larger than 5. - >>> import numpy as np - >>> import wafo.misc as wm - >>> x = np.arange(0,10,0.01) - >>> data = x**2+10*np.sin(3*x)+0.5*np.sin(50*x) - >>> wm.findpeaks(data, n=8, min_h=5, min_p=0.3) - array([908, 694, 481]) - - See also - -------- - findtp - ''' - S = np.atleast_1d(data) - smax = S.max() - if min_h is None: - smin = S.min() - min_h = 0.05 * (smax - smin) - ndim = S.ndim - S = np.atleast_2d(S) - nrows, mcols = S.shape - - # Finding turningpoints of the spectrum - # Returning only those with rainflowcycle heights greater than h_min - indP = [] # indices to peaks - ind = [] - for iy in range(nrows): # % find all peaks - TuP = findtp(S[iy], min_h) - if len(TuP): - ind = TuP[1::2] #; % extract indices to maxima only - else: # % did not find any , try maximum - ind = np.atleast_1d(S[iy].argmax()) - - if ndim > 1: - if iy == 0: - ind2 = np.flatnonzero(S[iy, ind] > S[iy + 1, ind]) - elif iy == nrows - 1: - ind2 = np.flatnonzero(S[iy, ind] > S[iy - 1, ind]) - else: - ind2 = np.flatnonzero((S[iy, ind] > S[iy - 1, ind]) & (S[iy, ind] > S[iy + 1, ind])) - - if len(ind2): - indP.append((ind[ind2] + iy * mcols)) - - if ndim > 1: - ind = np.hstack(indP) if len(indP) else [] - if len(ind) == 0: - return [] - - peaks = S.take(ind) - ind2 = peaks.argsort()[::-1] - - - # keeping only the Np most significant peak frequencies. - nmax = min(n, len(ind)) - ind = ind[ind2[:nmax]] - if (min_p > 0) : - # Keeping only peaks larger than min_p percent relative to the maximum peak - ind = ind[(S.take(ind) > min_p * smax)] - - return ind - -def findrfc_astm(tp): - """ - Return rainflow counted cycles - - Nieslony's Matlab implementation of the ASTM standard practice for rainflow - counting ported to a Python C module. - - Parameters - ---------- - tp : array-like - vector of turningpoints (NB! Only values, not sampled times) - - Returns - ------- - sig_rfc : array-like - array of shape (n,3) with: - sig_rfc[:,0] Cycles amplitude - sig_rfc[:,1] Cycles mean value - sig_rfc[:,2] Cycle type, half (=0.5) or full (=1.0) - """ - - y1 = atleast_1d(tp).ravel() - sig_rfc, cnr = clib.findrfc3_astm(y1) - # the sig_rfc was constructed too big in rainflow.rf3, so - # reduce the sig_rfc array as done originally by a matlab mex c function - n = len(sig_rfc) - sig_rfc = sig_rfc.__getslice__(0, n - cnr[0]) - # sig_rfc holds the actual rainflow counted cycles, not the indices - return sig_rfc - -def findrfc(tp, h=0.0, method='clib'): - ''' - Return indices to rainflow cycles of a sequence of TP. - - Parameters - ----------- - tp : array-like - vector of turningpoints (NB! Only values, not sampled times) - h : real scalar - rainflow threshold. If h>0, then all rainflow cycles with height - smaller than h are removed. - method : string, optional - 'clib' 'None' - Specify 'clib' for calling the c_functions, otherwise fallback to - the Python implementation. - - Returns - ------- - ind : ndarray of int - indices to the rainflow cycles of the original sequence TP. - - Example: - -------- - >>> import pylab as pb - >>> import wafo.misc as wm - >>> t = pb.linspace(0,7*np.pi,250) - >>> x = pb.sin(t)+0.1*np.sin(50*t) - >>> ind = wm.findextrema(x) - >>> ti, tp = t[ind], x[ind] - >>> a = pb.plot(t,x,'.',ti,tp,'r.') - >>> ind1 = wm.findrfc(tp,0.3); ind1 - array([ 0, 9, 32, 53, 74, 95, 116, 137]) - >>> ind2 = wm.findrfc(tp,0.3, method=''); ind2 - array([ 0, 9, 32, 53, 74, 95, 116, 137]) - >>> a = pb.plot(ti[ind1],tp[ind1]) - >>> pb.close('all') - - See also - -------- - rfcfilter, - findtp. - ''' - # TODO: merge rfcfilter and findrfc - y1 = atleast_1d(tp).ravel() - - n = len(y1) - ind = zeros(0, dtype=np.int) - ix = 0 - if y1[0] > y1[1]: - #first is a max, ignore it - y = y1[1::] - NC = floor((n - 1) / 2) - 1 - Tstart = 1 - else: - y = y1 - NC = floor(n / 2) - 1 - Tstart = 0 - - if (NC < 1): - return ind #No RFC cycles*/ - - if (y[0] > y[1]) and (y[1] > y[2]): - warnings.warn('This is not a sequence of turningpoints, exit') - return ind - - if (y[0] < y[1]) and (y[1] < y[2]): - warnings.warn('This is not a sequence of turningpoints, exit') - return ind - - if clib is None or method not in ('clib'): - ind = zeros(n, dtype=np.int) - NC = np.int(NC) - for i in xrange(NC): - Tmi = Tstart + 2 * i - Tpl = Tstart + 2 * i + 2 - xminus = y[2 * i] - xplus = y[2 * i + 2] - - if(i != 0): - j = i - 1 - while ((j >= 0) and (y[2 * j + 1] <= y[2 * i + 1])): - if (y[2 * j] < xminus): - xminus = y[2 * j] - Tmi = Tstart + 2 * j - j -= 1 - if (xminus >= xplus): - if (y[2 * i + 1] - xminus >= h): - ind[ix] = Tmi - ix += 1 - ind[ix] = (Tstart + 2 * i + 1) - ix += 1 - #goto L180 continue - else: - j = i + 1 - while (j < NC): - if (y[2 * j + 1] >= y[2 * i + 1]): - break #goto L170 - if((y[2 * j + 2] <= xplus)): - xplus = y[2 * j + 2] - Tpl = (Tstart + 2 * j + 2) - j += 1 - else: - if ((y[2 * i + 1] - xminus) >= h): - ind[ix] = Tmi - ix += 1 - ind[ix] = (Tstart + 2 * i + 1) - ix += 1 - #iy = i - continue - - - #goto L180 - #L170: - if (xplus <= xminus): - if ((y[2 * i + 1] - xminus) >= h): - ind[ix] = Tmi - ix += 1 - ind[ix] = (Tstart + 2 * i + 1) - ix += 1 - elif ((y[2 * i + 1] - xplus) >= h): - ind[ix] = (Tstart + 2 * i + 1) - ix += 1 - ind[ix] = Tpl - ix += 1 - - #L180: - #iy=i - # /* for i */ - else: - ind, ix = clib.findrfc(y, h) - return np.sort(ind[:ix]) - -def mctp2rfc(fmM, fMm=None): - ''' - Return Rainflow matrix given a Markov matrix of a Markov chain of turning points - - computes f_rfc = f_mM + F_mct(f_mM). - - Parameters - ---------- - fmM = the min2max Markov matrix, - fMm = the max2min Markov matrix, - - Returns - ------- - f_rfc = the rainflow matrix, - - Example: - ------- - >>> fmM = np.array([[ 0.0183, 0.0160, 0.0002, 0.0000, 0], - ... [0.0178, 0.5405, 0.0952, 0, 0], - ... [0.0002, 0.0813, 0, 0, 0], - ... [0.0000, 0, 0, 0, 0], - ... [ 0, 0, 0, 0, 0]]) - - >>> mctp2rfc(fmM) - array([[ 2.66998090e-02, 7.79970042e-03, 4.90607697e-07, - 0.00000000e+00, 0.00000000e+00], - [ 9.59962873e-03, 5.48500862e-01, 9.53995094e-02, - 0.00000000e+00, 0.00000000e+00], - [ 5.62297379e-07, 8.14994377e-02, 0.00000000e+00, - 0.00000000e+00, 0.00000000e+00], - [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, - 0.00000000e+00, 0.00000000e+00], - [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, - 0.00000000e+00, 0.00000000e+00]]) - - ''' - - if fMm is None: - fmM = np.atleast_1d(fmM) - fMm = fmM.copy() - else: - fmM, fMm = np.atleast_1d(fmM, fMm) - f_mM, f_Mm = fmM.copy(), fMm.copy() - N = max(f_mM.shape) - f_max = np.sum(f_mM, axis=1) - f_min = np.sum(f_mM, axis=0) - f_rfc = zeros((N, N)) - f_rfc[N - 2, 0] = f_max[N - 2] - f_rfc[0, N - 2] = f_min[N - 2] - for k in range(2, N - 1): - for i in range(1, k): - AA = f_mM[N - 1 - k:N - 1 - k + i, k - i:k] - AA1 = f_Mm[N - 1 - k:N - 1 - k + i, k - i:k] - RAA = f_rfc[N - 1 - k:N - 1 - k + i, k - i:k] - nA = max(AA.shape) - MA = f_max[N - 1 - k:N - 1 - k + i] - mA = f_min[k - i:k] - SA = AA.sum() - SRA = RAA.sum() - - DRFC = SA - SRA - NT = min(mA[0] - sum(RAA[:, 0]), MA[0] - sum(RAA[0, :])) # ?? check - NT = max(NT, 0) # ??check - - if NT > 1e-6 * max(MA[0], mA[0]): - NN = MA - np.sum(AA, axis=1) # T - e = (mA - np.sum(AA, axis=0)) # T - e = np.flipud(e) - PmM = np.rot90(AA.copy()) - for j in range(nA): - norm = mA[nA - 1 - j] - if norm != 0: - PmM[j, :] = PmM[j, :] / norm - e[j] = e[j] / norm - #end - #end - fx = 0.0; - if max(abs(e)) > 1e-6 and max(abs(NN)) > 1e-6 * max(MA[0], mA[0]): - PMm = AA1.copy() - for j in range(nA): - norm = MA[j] - if norm != 0: - PMm[j, :] = PMm[j, :] / norm; - #end - #end - PMm = np.fliplr(PMm) - - A = PMm - B = PmM - - if nA == 1: - fx = NN * (A / (1 - B * A) * e) - else: - rh = np.eye(A.shape[0]) - np.dot(B, A) - fx = np.dot(NN, np.dot(A, linalg.solve(rh, e))) #least squares - #end - #end - f_rfc[N - 1 - k, k - i] = fx + DRFC - - # check2=[ DRFC fx] - # pause - else: - f_rfc[N - 1 - k, k - i] = 0.0 - #end - #end - m0 = max(0, f_min[0] - np.sum(f_rfc[N - k + 1:N, 0])) - M0 = max(0, f_max[N - 1 - k] - np.sum(f_rfc[N - 1 - k, 1:k])) - f_rfc[N - 1 - k, 0] = min(m0, M0) - #% n_loops_left=N-k+1 - #end - - for k in range(1, N): - M0 = max(0, f_max[0] - np.sum(f_rfc[0, N - k:N])); - m0 = max(0, f_min[N - 1 - k] - np.sum(f_rfc[1:k+1, N - 1 - k])); - f_rfc[0, N - 1 - k] = min(m0, M0) - #end - -# %clf -# %subplot(1,2,2) -# %pcolor(levels(paramm),levels(paramM),flipud(f_mM)) -# % title('Markov matrix') -# % ylabel('max'), xlabel('min') -# %axis([paramm(1) paramm(2) paramM(1) paramM(2)]) -# %axis('square') -# -# %subplot(1,2,1) -# %pcolor(levels(paramm),levels(paramM),flipud(f_rfc)) -# % title('Rainflow matrix') -# % ylabel('max'), xlabel('rfc-min') -# %axis([paramm(1) paramm(2) paramM(1) paramM(2)]) -# %axis('square') - - return f_rfc - - - -def rfcfilter(x, h, method=0): - """ - Rainflow filter a signal. - - Parameters - ----------- - x : vector - Signal. [nx1] - h : real, scalar - Threshold for rainflow filter. - method : scalar, integer - 0 : removes cycles with range < h. (default) - 1 : removes cycles with range <= h. - - Returns - -------- - y = Rainflow filtered signal. - - Examples: - --------- - # 1. Filtered signal y is the turning points of x. - >>> import wafo.data - >>> import wafo.misc as wm - >>> x = wafo.data.sea() - >>> y = wm.rfcfilter(x[:,1], h=0, method=1) - >>> y.shape - (2172,) - >>> y[0:5] - array([-1.2004945 , 0.83950546, -0.09049454, -0.02049454, -0.09049454]) - >>> y[-5::] - array([ 0.18950546, 0.15950546, 0.91950546, -0.51049454, -0.48049454]) - - # 2. This removes all rainflow cycles with range less than 0.5. - >>> y1 = wm.rfcfilter(x[:,1], h=0.5) - >>> y1.shape - (863,) - >>> y1[0:5] - array([-1.2004945 , 0.83950546, -0.43049454, 0.34950546, -0.51049454]) - >>> y1[-5::] - array([-0.64049454, 0.65950546, -1.0004945 , 0.91950546, -0.51049454]) - - >>> ind = wm.findtp(x[:,1], h=0.5) - >>> y2 = x[ind,1] - >>> y2[0:5] - array([-1.2004945 , 0.83950546, -0.43049454, 0.34950546, -0.51049454]) - >>> y2[-5::] - array([ 0.83950546, -0.64049454, 0.65950546, -1.0004945 , 0.91950546]) - - See also - -------- - findrfc - """ - # TODO merge rfcfilter and findrfc - y = atleast_1d(x).ravel() - n = len(y) - t = zeros(n, dtype=np.int) - j = 0 - t0 = 0 - y0 = y[t0] - - z0 = 0 - if method == 0: - cmpfun1 = lambda a, b: a <= b - cmpfun2 = lambda a, b: a < b - else: - cmpfun1 = lambda a, b: a < b - cmpfun2 = lambda a, b: a <= b - - # The rainflow filter - for tim1, yi in enumerate(y[1::]): - fpi = y0 + h - fmi = y0 - h - ti = tim1 + 1 - #yi = y[ti] - - if z0 == 0: - if cmpfun1(yi, fmi): - z1 = -1 - elif cmpfun1(fpi, yi): - z1 = +1 - else: - z1 = 0 - t1, y1 = (t0, y0) if z1 == 0 else (ti, yi) - else: - if (((z0 == +1) & cmpfun1(yi, fmi)) | ((z0 == -1) & cmpfun2(yi, fpi))): - z1 = -1 - elif (((z0 == +1) & cmpfun2(fmi, yi)) | ((z0 == -1) & cmpfun1(fpi, yi))): - z1 = +1 - else: - warnings.warn('Something wrong, i=%d' % tim1) - - # Update y1 - if z1 != z0: - t1, y1 = ti, yi - elif z1 == -1: - # y1 = min([y0 xi]) - t1, y1 = (t0, y0) if y0 < yi else (ti, yi) - elif z1 == +1: - # y1 = max([y0 xi]) - t1, y1 = (t0, y0) if y0 > yi else (ti, yi) - - # Update y if y0 is a turning point - if abs(z0 - z1) == 2: - j += 1 - t[j] = t0 - - # Update t0, y0, z0 - t0, y0, z0 = t1, y1, z1 - #end - - #% Update y if last y0 is greater than (or equal) threshold - if cmpfun1(h, abs(y0 - y[t[j]])): - j += 1 - t[j] = t0 - return y[t[:j+1]] - -def findtp(x, h=0.0, kind=None): - ''' - Return indices to turning points (tp) of data, optionally rainflowfiltered. - - Parameters - ---------- - x : vector - signal - h : real, scalar - rainflow threshold - if h<0, then ind = range(len(x)) - if h=0, then tp is a sequence of turning points (default) - if h>0, then all rainflow cycles with height smaller than - h are removed. - kind : string - defines the type of wave or indicate the ASTM rainflow counting method. - Possible options are 'astm' 'mw' 'Mw' or 'none'. - If None all rainflow filtered min and max - will be returned, otherwise only the rainflow filtered - min and max, which define a wave according to the - wave definition, will be returned. - - Returns - ------- - ind : arraylike - indices to the turning points in the original sequence. - - Example: - -------- - >>> import wafo.data - >>> import pylab as plb - >>> import wafo.misc as wm - >>> x = wafo.data.sea() - >>> x1 = x[0:200,:] - >>> itp = wm.findtp(x1[:,1],0,'Mw') - >>> itph = wm.findtp(x1[:,1],0.3,'Mw') - >>> tp = x1[itp,:] - >>> tph = x1[itph,:] - >>> a = plb.plot(x1[:,0],x1[:,1],tp[:,0],tp[:,1],'ro',tph[:,1],tph[:,1],'k.') - >>> plb.close('all') - >>> itp - array([ 11, 21, 22, 24, 26, 28, 31, 39, 43, 45, 47, 51, 56, - 64, 70, 78, 82, 84, 89, 94, 101, 108, 119, 131, 141, 148, - 149, 150, 159, 173, 184, 190, 199]) - >>> itph - array([ 11, 28, 31, 39, 47, 51, 56, 64, 70, 78, 89, 94, 101, - 108, 119, 131, 141, 148, 159, 173, 184, 190, 199]) - - See also - --------- - findtc - findcross - findextrema - findrfc - ''' - n = len(x) - if h < 0.0: - return arange(n) - - ind = findextrema(x) - - if ind.size < 2: - return None - - #% In order to get the exact up-crossing intensity from rfc by - #% mm2lc(tp2mm(rfc)) we have to add the indices - #% to the last value (and also the first if the - #% sequence of turning points does not start with a minimum). - - if kind == 'astm': - # the Nieslony approach always put the first loading point as the first - # turning point. - if x[ind[0]] != x[0]: # add the first turning point is the first of the signal - ind = np.r_[0, ind, n - 1] - else: # only add the last point of the signal - ind = np.r_[ind, n - 1] - else: - if x[ind[0]] > x[ind[1]]: # adds indices to first and last value - ind = r_[0, ind, n - 1] - else: # adds index to the last value - ind = r_[ind, n - 1] - - if h > 0.0: - ind1 = findrfc(x[ind], h) - ind = ind[ind1] - - if kind in ('mw', 'Mw'): - xor = lambda a, b: a ^ b - # make sure that the first is a Max if wdef == 'Mw' - # or make sure that the first is a min if wdef == 'mw' - first_is_max = (x[ind[0]] > x[ind[1]]) - - remove_first = xor(first_is_max, kind.startswith('Mw')) - if remove_first: - ind = ind[1::] - - # make sure the number of minima and Maxima are according to the - # wavedef. i.e., make sure Nm=length(ind) is odd - if (mod(ind.size, 2)) != 1: - ind = ind[:-1] - return ind - -def findtc(x_in, v=None, kind=None): - """ - Return indices to troughs and crests of data. - - Parameters - ---------- - x : vector - surface elevation. - v : real scalar - reference level (default v = mean of x). - - kind : string - defines the type of wave. Possible options are - 'dw', 'uw', 'tw', 'cw' or None. - If None indices to all troughs and crests will be returned, - otherwise only the paired ones will be returned - according to the wavedefinition. - - Returns - -------- - tc_ind : vector of ints - indices to the trough and crest turningpoints of sequence x. - v_ind : vector of ints - indices to the level v crossings of the original - sequence x. (d,u) - - Example: - -------- - >>> import wafo.data - >>> import pylab as plb - >>> import wafo.misc as wm - >>> x = wafo.data.sea() - >>> x1 = x[0:200,:] - >>> itc, iv = wm.findtc(x1[:,1],0,'dw') - >>> tc = x1[itc,:] - >>> a = plb.plot(x1[:,0],x1[:,1],tc[:,0],tc[:,1],'ro') - >>> plb.close('all') - - See also - -------- - findtp - findcross, - wavedef - """ - - x = atleast_1d(x_in) - if v is None: - v = x.mean() - - v_ind = findcross(x, v, kind) - n_c = v_ind.size - if n_c <= 2: - warnings.warn('There are no waves!') - return zeros(0, dtype=np.int), zeros(0, dtype=np.int) - - # determine the number of trough2crest (or crest2trough) cycles - isodd = mod(n_c, 2) - if isodd: - n_tc = int((n_c - 1) / 2) - else: - n_tc = int((n_c - 2) / 2) - - #% allocate variables before the loop increases the speed - ind = zeros(n_c - 1, dtype=np.int) - - first_is_down_crossing = (x[v_ind[0]] > x[v_ind[0] + 1]) - if first_is_down_crossing: - for i in xrange(n_tc): - #% trough - j = 2 * i - ind[j] = x[v_ind[j] + 1:v_ind[j + 1] + 1].argmin() - #% crest - ind[j + 1] = x[v_ind[j + 1] + 1:v_ind[j + 2] + 1].argmax() - - if (2 * n_tc + 1 < n_c) and (kind in (None, 'tw')): - #% trough - ind[n_c - 2] = x[v_ind[n_c - 2] + 1:v_ind[n_c - 1]].argmin() - - else: # %%%% the first is a up-crossing - for i in xrange(n_tc): - #% trough - j = 2 * i - ind[j] = x[v_ind[j] + 1:v_ind[j + 1] + 1].argmax() - #% crest - ind[j + 1] = x[v_ind[j + 1] + 1:v_ind[j + 2] + 1].argmin() - - if (2 * n_tc + 1 < n_c) and (kind in (None, 'cw')): - #% trough - ind[n_c - 2] = x[v_ind[n_c - 2] + 1:v_ind[n_c - 1]].argmax() - - return v_ind[:n_c - 1] + ind + 1, v_ind - -def findoutliers(x, zcrit=0.0, dcrit=None, ddcrit=None, verbose=False): - """ - Return indices to spurious points of data - - Parameters - ---------- - x : vector - of data values. - zcrit : real scalar - critical distance between consecutive points. - dcrit : real scalar - critical distance of Dx used for determination of spurious - points. (Default 1.5 standard deviation of x) - ddcrit : real scalar - critical distance of DDx used for determination of spurious - points. (Default 1.5 standard deviation of x) - - Returns - ------- - inds : ndarray of integers - indices to spurious points. - indg : ndarray of integers - indices to the rest of the points. - - Notes - ----- - Consecutive points less than zcrit apart are considered as spurious. - The point immediately after and before are also removed. Jumps greater than - dcrit in Dxn and greater than ddcrit in D^2xn are also considered as spurious. - (All distances to be interpreted in the vertical direction.) - Another good choice for dcrit and ddcrit are: - - dcrit = 5*dT and ddcrit = 9.81/2*dT**2 - - where dT is the timestep between points. - - Examples - -------- - >>> import numpy as np - >>> import wafo - >>> import wafo.misc as wm - >>> xx = wafo.data.sea() - >>> dt = np.diff(xx[:2,0]) - >>> dcrit = 5*dt - >>> ddcrit = 9.81/2*dt*dt - >>> zcrit = 0 - >>> [inds, indg] = wm.findoutliers(xx[:,1],zcrit,dcrit,ddcrit,verbose=True) - Found 0 spurious positive jumps of Dx - Found 0 spurious negative jumps of Dx - Found 37 spurious positive jumps of D^2x - Found 200 spurious negative jumps of D^2x - Found 244 consecutive equal values - Found the total of 1152 spurious points - - #waveplot(xx,'-',xx(inds,:),1,1,1) - - See also - -------- - waveplot, reconstruct - """ - - - # finding outliers - findjumpsDx = True # find jumps in Dx - # two point spikes and Spikes dcrit above/under the - # previous and the following point are spurios. - findSpikes = False #find spikes - findDspikes = False # find double (two point) spikes - findjumpsD2x = True # find jumps in D^2x - findNaN = True # % find missing values - - xn = asarray(x).flatten() - - if xn.size < 2: - raise ValueError('The vector must have more than 2 elements!') - - - ind = zeros(0, dtype=int) - #indg=[] - indmiss = isnan(xn) - if findNaN and indmiss.any(): - ind, = nonzero(indmiss) - if verbose: - print('Found %d missing points' % ind.size) - xn[indmiss] = 0. #%set NaN's to zero - - if dcrit is None: - dcrit = 1.5 * xn.std() - if verbose: - print('dcrit is set to %g' % dcrit) - - if ddcrit is None: - ddcrit = 1.5 * xn.std() - if verbose: - print('ddcrit is set to %g' % ddcrit) - - dxn = diff(xn) - ddxn = diff(dxn) - - if findSpikes: # finding spurious spikes - tmp, = nonzero((dxn[:-1] > dcrit) * (dxn[1::] < -dcrit) | - (dxn[:-1] < -dcrit) * (dxn[1::] > dcrit)) - if tmp.size > 0: - tmp = tmp + 1 - ind = hstack((ind, tmp)) - if verbose: - print('Found %d spurious spikes' % tmp.size) - - if findDspikes: #,% finding spurious double (two point) spikes - tmp, = nonzero((dxn[:-2] > dcrit) * (dxn[2::] < -dcrit) | - (dxn[:-2] < -dcrit) * (dxn[2::] > dcrit)) - if tmp.size > 0: - tmp = tmp + 1 - ind = hstack((ind, tmp, tmp + 1)) #%removing both points - if verbose: - print('Found %d spurious two point (double) spikes' % tmp.size) - - if findjumpsDx: # ,% finding spurious jumps in Dx - tmp, = nonzero(dxn > dcrit) - if verbose: - print('Found %d spurious positive jumps of Dx' % tmp.size) - if tmp.size > 0: - ind = hstack((ind, tmp + 1)) #removing the point after the jump - - tmp, = nonzero(dxn < -dcrit) - if verbose: - print('Found %d spurious negative jumps of Dx' % tmp.size) - if tmp.size > 0: - ind = hstack((ind, tmp)) #removing the point before the jump - - if findjumpsD2x: # ,% finding spurious jumps in D^2x - tmp, = nonzero(ddxn > ddcrit) - if tmp.size > 0: - tmp = tmp + 1 - ind = hstack((ind, tmp)) # removing the jump - - if verbose: - print('Found %d spurious positive jumps of D^2x' % tmp.size) - - tmp, = nonzero(ddxn < -ddcrit) - if tmp.size > 0: - tmp = tmp + 1 - ind = hstack((ind, tmp)) # removing the jump - - if verbose: - print('Found %d spurious negative jumps of D^2x' % tmp.size) - - if zcrit >= 0.0: - #% finding consecutive values less than zcrit apart. - indzeros = (abs(dxn) <= zcrit) - indz, = nonzero(indzeros) - if indz.size > 0: - indz = indz + 1 - #%finding the beginning and end of consecutive equal values - indtr, = nonzero((diff(indzeros))) - indtr = indtr + 1 - #%indices to consecutive equal points - if True: # removing the point before + all equal points + the point after - ind = hstack((ind, indtr - 1, indz, indtr, indtr + 1)) - else: # % removing all points + the point after - ind = hstack((ind, indz, indtr, indtr + 1)) - - if verbose: - if zcrit == 0.: - print('Found %d consecutive equal values' % indz.size) - else: - print('Found %d consecutive values less than %g apart.' % (indz.size, zcrit)) - indg = ones(xn.size, dtype=bool) - - if ind.size > 1: - ind = unique(ind) - indg[ind] = 0 - indg, = nonzero(indg) - - if verbose: - print('Found the total of %d spurious points' % ind.size) - - return ind, indg - -def common_shape(*args, ** kwds): - ''' - Return the common shape of a sequence of arrays - - Parameters - ----------- - *args : arraylike - sequence of arrays - **kwds : - shape - - Returns - ------- - shape : tuple - common shape of the elements of args. - - Raises - ------ - An error is raised if some of the arrays do not conform - to the common shape according to the broadcasting rules in numpy. - - Examples - -------- - >>> import numpy as np - >>> import wafo.misc as wm - >>> A = np.ones((4,1)) - >>> B = 2 - >>> C = np.ones((1,5))*5 - >>> wm.common_shape(A,B,C) - (4, 5) - >>> wm.common_shape(A,B,C,shape=(3,4,1)) - (3, 4, 5) - - See also - -------- - broadcast, broadcast_arrays - ''' - args = map(asarray, args) - shapes = [x.shape for x in args] - shape = kwds.get('shape') - if shape is not None: - if not isinstance(shape, (list, tuple)): - shape = (shape,) - shapes.append(tuple(shape)) - if len(set(shapes)) == 1: - # Common case where nothing needs to be broadcasted. - return tuple(shapes[0]) - shapes = [list(s) for s in shapes] - nds = [len(s) for s in shapes] - biggest = max(nds) - # Go through each array and prepend dimensions of length 1 to each of the - # shapes in order to make the number of dimensions equal. - for i in range(len(shapes)): - diff = biggest - nds[i] - if diff > 0: - shapes[i] = [1] * diff + shapes[i] - - # Check each dimension for compatibility. A dimension length of 1 is - # accepted as compatible with any other length. - c_shape = [] - for axis in range(biggest): - lengths = [s[axis] for s in shapes] - unique = set(lengths + [1]) - if len(unique) > 2: - # There must be at least two non-1 lengths for this axis. - raise ValueError("shape mismatch: two or more arrays have " - "incompatible dimensions on axis %r." % (axis,)) - elif len(unique) == 2: - # There is exactly one non-1 length. The common shape will take this - # value. - unique.remove(1) - new_length = unique.pop() - c_shape.append(new_length) - else: - # Every array has a length of 1 on this axis. Strides can be left - # alone as nothing is broadcasted. - c_shape.append(1) - - return tuple(c_shape) - -def argsreduce(condition, * args): - """ Return the elements of each input array that satisfy some condition. - - Parameters - ---------- - condition : array_like - An array whose nonzero or True entries indicate the elements of each - input array to extract. The shape of 'condition' must match the common - shape of the input arrays according to the broadcasting rules in numpy. - arg1, arg2, arg3, ... : array_like - one or more input arrays. - - Returns - ------- - narg1, narg2, narg3, ... : ndarray - sequence of extracted copies of the input arrays converted to the same - size as the nonzero values of condition. - - Example - ------- - >>> import wafo.misc as wm - >>> import numpy as np - >>> rand = np.random.random_sample - >>> A = rand((4,5)) - >>> B = 2 - >>> C = rand((1,5)) - >>> cond = np.ones(A.shape) - >>> [A1,B1,C1] = wm.argsreduce(cond,A,B,C) - >>> B1.shape - (20,) - >>> cond[2,:] = 0 - >>> [A2,B2,C2] = wm.argsreduce(cond,A,B,C) - >>> B2.shape - (15,) - - See also - -------- - numpy.extract - """ - newargs = atleast_1d(*args) - if not isinstance(newargs, list): - newargs = [newargs, ] - expand_arr = (condition == condition) - return [extract(condition, arr1 * expand_arr) for arr1 in newargs] - - -def stirlerr(n): - ''' - Return error of Stirling approximation, i.e., log(n!) - log( sqrt(2*pi*n)*(n/exp(1))**n ) - - Example - ------- - >>> import wafo.misc as wm - >>> wm.stirlerr(2) - array([ 0.0413407]) - - See also - --------- - binom - - - Reference - ----------- - Catherine Loader (2000). - Fast and Accurate Computation of Binomial Probabilities - - - ''' - - S0 = 0.083333333333333333333 # /* 1/12 */ - S1 = 0.00277777777777777777778 # /* 1/360 */ - S2 = 0.00079365079365079365079365 # /* 1/1260 */ - S3 = 0.000595238095238095238095238 # /* 1/1680 */ - S4 = 0.0008417508417508417508417508 # /* 1/1188 */ - - n1 = atleast_1d(n) - - y = gammaln(n1 + 1) - log(sqrt(2 * pi * n1) * (n1 / exp(1)) ** n1) - - - nn = n1 * n1 - - n500 = 500 < n1 - y[n500] = (S0 - S1 / nn[n500]) / n1[n500] - n80 = logical_and(80 < n1, n1 <= 500) - if any(n80): - y[n80] = (S0 - (S1 - S2 / nn[n80]) / nn[n80]) / n1[n80] - n35 = logical_and(35 < n1, n1 <= 80) - if any(n35): - nn35 = nn[n35] - y[n35] = (S0 - (S1 - (S2 - S3 / nn35) / nn35) / nn35) / n1[n35] - - n15 = logical_and(15 < n1, n1 <= 35) - if any(n15): - nn15 = nn[n15] - y[n15] = (S0 - (S1 - (S2 - (S3 - S4 / nn15) / nn15) / nn15) / nn15) / n1[n15] - - return y - -def getshipchar(value=None, property="max_deadweight", **kwds): #@ReservedAssignment - ''' - Return ship characteristics from value of one ship-property - - Parameters - ---------- - value : scalar - value to use in the estimation. - property : string - defining the ship property used in the estimation. Options are: - 'max_deadweight','length','beam','draft','service_speed', - 'propeller_diameter'. - The length was found from statistics of 40 vessels of size 85 to - 100000 tonn. An exponential curve through 0 was selected, and the - factor and exponent that minimized the standard deviation of the relative - error was selected. (The error returned is the same for any ship.) The - servicespeed was found for ships above 1000 tonns only. - The propeller diameter formula is from [1]_. - - Returns - ------- - sc : dict - containing estimated mean values and standard-deviations of ship characteristics: - max_deadweight [kkg], (weight of cargo, fuel etc.) - length [m] - beam [m] - draught [m] - service_speed [m/s] - propeller_diameter [m] - - Example - --------- - >>> import wafo.misc as wm - >>> sc = wm.getshipchar(10,'service_speed') - >>> for key in sorted(sc): key, sc[key] - ('beam', 29.0) - ('beamSTD', 2.9000000000000004) - ('draught', 9.6) - ('draughtSTD', 2.112) - ('length', 216.0) - ('lengthSTD', 2.011309883194276) - ('max_deadweight', 30969.0) - ('max_deadweightSTD', 3096.9) - ('propeller_diameter', 6.761165385916601) - ('propeller_diameterSTD', 0.20267047566705432) - ('service_speed', 10.0) - ('service_speedSTD', 0) - - Other units: 1 ft = 0.3048 m and 1 knot = 0.5144 m/s - - - Reference - --------- - .. [1] Gray and Greeley, (1978), - "Source level model for propeller blade rate radiation for the world's merchant - fleet", Bolt Beranek and Newman Technical Memorandum No. 458. - ''' - if value is None: - names = kwds.keys() - if len(names)!=1: - raise ValueError('Only on keyword') - property = names[0] #@ReservedAssignment - value = kwds[property] - value = np.atleast_1d(value) - valid_props = dict(l='length', b='beam', d='draught', m='max_deadweigth', - s='service_speed', p='propeller_diameter') - prop = valid_props[property[0]] - - prop2max_dw = dict(length=lambda x: (x / 3.45) ** (2.5), - beam=lambda x: ((x / 1.78) ** (1 / 0.27)), - draught=lambda x: ((x / 0.8) ** (1 / 0.24)), - service_speed=lambda x: ((x / 1.14) ** (1 / 0.21)), - propeller_diameter=lambda x: (((x / 0.12) ** (4 / 3) / 3.45) ** (2.5))) - - max_deadweight = prop2max_dw.get(prop, lambda x: x)(value) - propertySTD = prop + 'STD' - - length = round(3.45 * max_deadweight ** 0.40) - length_err = length ** 0.13 - - beam = round(1.78 * max_deadweight ** 0.27 * 10) / 10 - beam_err = beam * 0.10 - - draught = round(0.80 * max_deadweight ** 0.24 * 10) / 10 - draught_err = draught * 0.22 - - #S = round(2/3*(L)**0.525) - speed = round(1.14 * max_deadweight ** 0.21 * 10) / 10 - speed_err = speed * 0.10 - - - p_diam = 0.12 * length ** (3.0 / 4.0) - p_diam_err = 0.12 * length_err ** (3.0 / 4.0) - - max_deadweight = round(max_deadweight) - max_deadweightSTD = 0.1 * max_deadweight - - shipchar = OrderedDict(beam=beam, beamSTD=beam_err, - draught=draught, draughtSTD=draught_err, - length=length, lengthSTD=length_err, - max_deadweight=max_deadweight, max_deadweightSTD=max_deadweightSTD, - propeller_diameter=p_diam, propeller_diameterSTD=p_diam_err, - service_speed=speed, service_speedSTD=speed_err) - - shipchar[propertySTD] = 0 - return shipchar - -def betaloge(z, w): - ''' - Natural Logarithm of beta function. - - CALL betaloge(z,w) - - BETALOGE computes the natural logarithm of the beta - function for corresponding elements of Z and W. The arrays Z and - W must be real and nonnegative. Both arrays must be the same size, - or either can be scalar. BETALOGE is defined as: - - y = LOG(BETA(Z,W)) = gammaln(Z)+gammaln(W)-gammaln(Z+W) - - and is obtained without computing BETA(Z,W). Since the beta - function can range over very large or very small values, its - logarithm is sometimes more useful. - This implementation is more accurate than the BETALN implementation - for large arguments - - Example - ------- - >>> import wafo.misc as wm - >>> wm.betaloge(3,2) - array([-2.48490665]) - - See also - -------- - betaln, beta - ''' - # y = gammaln(z)+gammaln(w)-gammaln(z+w) - zpw = z + w - return (stirlerr(z) + stirlerr(w) + 0.5 * log(2 * pi) + (w - 0.5) * log(w) - + (z - 0.5) * log(z) - stirlerr(zpw) - (zpw - 0.5) * log(zpw)) - - # stirlings approximation: - # (-(zpw-0.5).*log(zpw) +(w-0.5).*log(w)+(z-0.5).*log(z) +0.5*log(2*pi)) - #return y - -def gravity(phi=45): - ''' Returns the constant acceleration of gravity - - GRAVITY calculates the acceleration of gravity - using the international gravitational formulae [1]_: - - g = 9.78049*(1+0.0052884*sin(phir)**2-0.0000059*sin(2*phir)**2) - where - phir = phi*pi/180 - - Parameters - ---------- - phi : {float, int} - latitude in degrees - - Returns - -------- - g : ndarray - acceleration of gravity [m/s**2] - - Examples - -------- - >>> import wafo.misc as wm - >>> import numpy as np - >>> phi = np.linspace(0,45,5) - >>> wm.gravity(phi) - array([ 9.78049 , 9.78245014, 9.78803583, 9.79640552, 9.80629387]) - - See also - -------- - wdensity - - References - ---------- - .. [1] Irgens, Fridtjov (1987) - "Formelsamling i mekanikk: - statikk, fasthetsl?re, dynamikk fluidmekanikk" - tapir forlag, University of Trondheim, - ISBN 82-519-0786-1, pp 19 - - ''' - - phir = phi * pi / 180. # change from degrees to radians - return 9.78049 * (1. + 0.0052884 * sin(phir) ** 2. - 0.0000059 * sin(2 * phir) ** 2.) - -def nextpow2(x): - ''' - Return next higher power of 2 - - Example - ------- - >>> import wafo.misc as wm - >>> wm.nextpow2(10) - 4 - >>> wm.nextpow2(np.arange(5)) - 3 - ''' - t = isscalar(x) or len(x) - if (t > 1): - f, n = frexp(t) - else: - f, n = frexp(abs(x)) - - if (f == 0.5): - n = n - 1 - return n - -def discretize(fun, a, b, tol=0.005, n=5, method='linear'): - ''' - Automatic discretization of function - - Parameters - ---------- - fun : callable - function to discretize - a,b : real scalars - evaluation limits - tol : real, scalar - absoute error tolerance - n : scalar integer - number of values - method : string - defining method of gridding, options are 'linear' and 'adaptive' - - Returns - ------- - x : discretized values - y : fun(x) - - Example - ------- - >>> import wafo.misc as wm - >>> import numpy as np - >>> import pylab as plb - >>> x,y = wm.discretize(np.cos, 0, np.pi) - >>> xa,ya = wm.discretize(np.cos, 0, np.pi, method='adaptive') - >>> t = plb.plot(x, y, xa, ya, 'r.') - >>> plb.show() - - >>> plb.close('all') - - ''' - if method.startswith('a'): - return _discretize_adaptive(fun, a, b, tol, n) - else: - return _discretize_linear(fun, a, b, tol, n) - -def _discretize_linear(fun, a, b, tol=0.005, n=5): - ''' - Automatic discretization of function, linear gridding - ''' - tiny = floatinfo.tiny - - - x = linspace(a, b, n) - y = fun(x) - - err0 = inf - err = 10000 - nmax = 2 ** 20 - while (err != err0 and err > tol and n < nmax): - err0 = err - x0 = x - y0 = y - n = 2 * (n - 1) + 1 - x = linspace (a, b, n) - y = fun(x) - y00 = interp(x, x0, y0) - err = 0.5 * amax(abs((y00 - y) / (abs(y00 + y) + tiny))) - return x, y - -def _discretize_adaptive(fun, a, b, tol=0.005, n=5): - ''' - Automatic discretization of function, adaptive gridding. - ''' - tiny = floatinfo.tiny - n += (mod(n, 2) == 0) # make sure n is odd - x = linspace(a, b, n) - fx = fun(x) - - n2 = (n - 1) / 2 - erri = hstack((zeros((n2, 1)), ones((n2, 1)))).ravel() - err = erri.max() - err0 = inf - #while (err != err0 and err > tol and n < nmax): - for j in range(50): - if err != err0 and np.any(erri > tol): - err0 = err - # find top errors - - I, = where(erri > tol) - # double the sample rate in intervals with the most error - y = (vstack(((x[I] + x[I - 1]) / 2, (x[I + 1] + x[I]) / 2)).T).ravel() - fy = fun(y) - - fy0 = interp(y, x, fx) - erri = 0.5 * (abs((fy0 - fy) / (abs(fy0 + fy) + tiny))) - - err = erri.max() - - x = hstack((x, y)) - - I = x.argsort() - x = x[I] - erri = hstack((zeros(len(fx)), erri))[I] - fx = hstack((fx, fy))[I] - - else: - break - else: - warnings.warn('Recursion level limit reached j=%d' % j) - - return x, fx - - -def polar2cart(theta, rho, z=None): - ''' - Transform polar coordinates into 2D cartesian coordinates. - - Returns - ------- - x, y : array-like - Cartesian coordinates, x = rho*cos(theta), y = rho*sin(theta) - - See also - -------- - cart2polar - ''' - x, y = rho * cos(theta), rho * sin(theta) - if z is None: - return x, y - else: - return x, y, z - - -def cart2polar(x, y, z=None): - ''' Transform 2D cartesian coordinates into polar coordinates. - - Returns - ------- - theta : array-like - arctan2(y,x) - rho : array-like - sqrt(x**2+y**2) - - See also - -------- - polar2cart - ''' - t, r = arctan2(y, x), hypot(x, y) - if z is None: - return t, r - else: - return t, r, z - - -def meshgrid(*xi, **kwargs): - """ - Return coordinate matrices from one or more coordinate vectors. - - Make N-D coordinate arrays for vectorized evaluations of - N-D scalar/vector fields over N-D grids, given - one-dimensional coordinate arrays x1, x2,..., xn. - - Parameters - ---------- - x1, x2,..., xn : array_like - 1-D arrays representing the coordinates of a grid. - indexing : 'xy' or 'ij' (optional) - cartesian ('xy', default) or matrix ('ij') indexing of output - sparse : True or False (default) (optional) - If True a sparse grid is returned in order to conserve memory. - copy : True (default) or False (optional) - If False a view into the original arrays are returned in order to - conserve memory - - Returns - ------- - X1, X2,..., XN : ndarray - For vectors `x1`, `x2`,..., 'xn' with lengths ``Ni=len(xi)`` , - return ``(N1, N2, N3,...Nn)`` shaped arrays if indexing='ij' - or ``(N2, N1, N3,...Nn)`` shaped arrays if indexing='xy' - with the elements of `xi` repeated to fill the matrix along - the first dimension for `x1`, the second for `x2` and so on. - - See Also - -------- - index_tricks.mgrid : Construct a multi-dimensional "meshgrid" - using indexing notation. - index_tricks.ogrid : Construct an open multi-dimensional "meshgrid" - using indexing notation. - - Examples - -------- - >>> x = np.linspace(0,1,3) # coordinates along x axis - >>> y = np.linspace(0,1,2) # coordinates along y axis - >>> xv, yv = meshgrid(x,y) # extend x and y for a 2D xy grid - >>> xv - array([[ 0. , 0.5, 1. ], - [ 0. , 0.5, 1. ]]) - >>> yv - array([[ 0., 0., 0.], - [ 1., 1., 1.]]) - >>> xv, yv = meshgrid(x,y, sparse=True) # make sparse output arrays - >>> xv - array([[ 0. , 0.5, 1. ]]) - >>> yv - array([[ 0.], - [ 1.]]) - - >>> meshgrid(x,y,sparse=True,indexing='ij') # change to matrix indexing - [array([[ 0. ], - [ 0.5], - [ 1. ]]), array([[ 0., 1.]])] - >>> meshgrid(x,y,indexing='ij') - [array([[ 0. , 0. ], - [ 0.5, 0.5], - [ 1. , 1. ]]), array([[ 0., 1.], - [ 0., 1.], - [ 0., 1.]])] - - >>> meshgrid(0,1,5) # just a 3D point - [array([[[0]]]), array([[[1]]]), array([[[5]]])] - >>> map(np.squeeze,meshgrid(0,1,5)) # just a 3D point - [array(0), array(1), array(5)] - >>> meshgrid(3) - array([3]) - >>> meshgrid(y) # 1D grid y is just returned - array([ 0., 1.]) - - `meshgrid` is very useful to evaluate functions on a grid. - - >>> x = np.arange(-5, 5, 0.1) - >>> y = np.arange(-5, 5, 0.1) - >>> xx, yy = meshgrid(x, y, sparse=True) - >>> z = np.sin(xx**2+yy**2)/(xx**2+yy**2) - """ - copy_ = kwargs.get('copy', True) - args = atleast_1d(*xi) - if not isinstance(args, list): - if args.size > 0: - return args.copy() if copy_ else args - else: - raise TypeError('meshgrid() take 1 or more arguments (0 given)') - - sparse = kwargs.get('sparse', False) - indexing = kwargs.get('indexing', 'xy') # 'ij' - - - ndim = len(args) - s0 = (1,) * ndim - output = [x.reshape(s0[:i] + (-1,) + s0[i + 1::]) for i, x in enumerate(args)] - - shape = [x.size for x in output] - - if indexing == 'xy': - # switch first and second axis - output[0].shape = (1, -1) + (1,) * (ndim - 2) - output[1].shape = (-1, 1) + (1,) * (ndim - 2) - shape[0], shape[1] = shape[1], shape[0] - - if sparse: - if copy_: - return [x.copy() for x in output] - else: - return output - else: - # Return the full N-D matrix (not only the 1-D vector) - if copy_: - mult_fact = ones(shape, dtype=int) - return [x * mult_fact for x in output] - else: - return broadcast_arrays(*output) - - -def ndgrid(*args, **kwargs): - """ - Same as calling meshgrid with indexing='ij' (see meshgrid for - documentation). - """ - kwargs['indexing'] = 'ij' - return meshgrid(*args, ** kwargs) - -def trangood(x, f, min_n=None, min_x=None, max_x=None, max_n=inf): - """ - Make sure transformation is efficient. - - Parameters - ------------ - x, f : array_like - input transform function, (x,f(x)). - min_n : scalar, int - minimum number of points in the good transform. - (Default x.shape[0]) - min_x : scalar, real - minimum x value to transform. (Default min(x)) - max_x : scalar, real - maximum x value to transform. (Default max(x)) - max_n : scalar, int - maximum number of points in the good transform - (default inf) - Returns - ------- - x, f : array_like - the good transform function. - - TRANGOOD interpolates f linearly and optionally - extrapolate it linearly outside the range of x - with X uniformly spaced. - - See also - --------- - tranproc, - numpy.interp - """ - xo, fo = atleast_1d(x, f) - #n = xo.size - if (xo.ndim != 1): - raise ValueError('x must be a vector.') - if (fo.ndim != 1): - raise ValueError('f must be a vector.') - - i = xo.argsort() - xo = xo[i] - fo = fo[i] - del i - dx = diff(xo) - if (any(dx <= 0)): - raise ValueError('Duplicate x-values not allowed.') - - nf = fo.shape[0] - - if max_x is None: - max_x = xo[-1] - if min_x is None: - min_x = xo[0] - if min_n is None: - min_n = nf - if (min_n < 2): - min_n = 2 - if (max_n < 2): - max_n = 2 - - ddx = diff(dx) - xn = xo[-1] - x0 = xo[0] - L = float(xn - x0) - eps = floatinfo.eps - if ((nf < min_n) or (max_n < nf) or any(abs(ddx) > 10 * eps * (L))): -## % pab 07.01.2001: Always choose the stepsize df so that -## % it is an exactly representable number. -## % This is important when calculating numerical derivatives and is -## % accomplished by the following. - dx = L / (min(min_n, max_n) - 1) - dx = (dx + 2.) - 2. - xi = arange(x0, xn + dx / 2., dx) - #% New call pab 11.11.2000: This is much quicker - fo = interp(xi, xo, fo) - xo = xi - -# x is now uniformly spaced - dx = xo[1] - xo[0] - - # Extrapolate linearly outside the range of ff - if (min_x < xo[0]): - x1 = dx * arange(floor((min_x - xo[0]) / dx), -2) - f2 = fo[0] + x1 * (fo[1] - fo[0]) / (xo[1] - xo[0]) - fo = hstack((f2, fo)) - xo = hstack((x1 + xo[0], xo)) - - if (max_x > xo[-1]): - x1 = dx * arange(1, ceil((max_x - xo[-1]) / dx) + 1) - f2 = f[-1] + x1 * (f[-1] - f[-2]) / (xo[-1] - xo[-2]) - fo = hstack((fo, f2)) - xo = hstack((xo, x1 + xo[-1])) - - return xo, fo - -def tranproc(x, f, x0, *xi): - """ - Transforms process X and up to four derivatives - using the transformation f. - - Parameters - ---------- - x,f : array-like - [x,f(x)], transform function, y = f(x). - x0, x1,...,xn : vectors - where xi is the i'th time derivative of x0. 0<=N<=4. - - Returns - ------- - y0, y1,...,yn : vectors - where yi is the i'th time derivative of y0 = f(x0). - - By the basic rules of derivation: - Y1 = f'(X0)*X1 - Y2 = f''(X0)*X1^2 + f'(X0)*X2 - Y3 = f'''(X0)*X1^3 + f'(X0)*X3 + 3*f''(X0)*X1*X2 - Y4 = f''''(X0)*X1^4 + f'(X0)*X4 + 6*f'''(X0)*X1^2*X2 - + f''(X0)*(3*X2^2 + 4*X1*X3) - - The derivation of f is performed numerically with a central difference - method with linear extrapolation towards the beginning and end of f, - respectively. - - Example - -------- - Derivative of g and the transformed Gaussian model. - >>> import pylab as plb - >>> import wafo.misc as wm - >>> import wafo.transform.models as wtm - >>> tr = wtm.TrHermite() - >>> x = linspace(-5,5,501) - >>> g = tr(x) - >>> gder = wm.tranproc(x, g, x, ones(g.shape[0])) - >>> h = plb.plot(x, g, x, gder[1]) - - plb.plot(x,pdfnorm(g)*gder[1],x,pdfnorm(x)) - plb.legend('Transformed model','Gaussian model') - - >>> plb.close('all') - - See also - -------- - trangood. - """ - - eps = floatinfo.eps - xo, fo, x0 = atleast_1d(x, f, x0) - xi = atleast_1d(*xi) - if not isinstance(xi, list): - xi = [xi, ] - N = len(xi) # N = number of derivatives - nmax = ceil((xo.ptp()) * 10 ** (7. / max(N, 1))) - xo, fo = trangood(xo, fo, min_x=min(x0), max_x=max(x0), max_n=nmax) - - n = f.shape[0] - #y = x0.copy() - xu = (n - 1) * (x0 - xo[0]) / (xo[-1] - xo[0]) - - fi = asarray(floor(xu), dtype=int) - fi = where(fi == n - 1, fi - 1, fi) - - xu = xu - fi - y0 = fo[fi] + (fo[fi + 1] - fo[fi]) * xu - - y = y0 - - if N > 0: - y = [y0] - hn = xo[1] - xo[0] - if hn ** N < sqrt(eps): - print('Numerical problems may occur for the derivatives in tranproc.') - warnings.warn('The sampling of the transformation may be too small.') - - #% Transform X with the derivatives of f. - fxder = zeros((N, x0.size)) - fder = vstack((xo, fo)) - for k in range(N): #% Derivation of f(x) using a difference method. - n = fder.shape[-1] - #%fder = [(fder(1:n-1,1)+fder(2:n,1))/2 diff(fder(:,2))./diff(fder(:,1))] - fder = vstack([(fder[0, 0:n - 1] + fder[0, 1:n]) / 2, diff(fder[1, :]) / hn]) - fxder[k] = tranproc(fder[0], fder[1], x0) - - # Calculate the transforms of the derivatives of X. - # First time derivative of y: y1 = f'(x)*x1 - - y1 = fxder[0] * xi[0] - y.append(y1) - if N > 1: - - # Second time derivative of y: - # y2 = f''(x)*x1.^2+f'(x)*x2 - y2 = fxder[1] * xi[0] ** 2. + fxder[0] * xi[1] - y.append(y2) - if N > 2: - # Third time derivative of y: - # y3 = f'''(x)*x1.^3+f'(x)*x3 +3*f''(x)*x1*x2 - y3 = fxder[2] * xi[0] ** 3 + fxder[0] * xi[2] + \ - 3 * fxder[1] * xi[0] * xi[1] - y.append(y3) - if N > 3: - # Fourth time derivative of y: - # y4 = f''''(x)*x1.^4+f'(x)*x4 - # +6*f'''(x)*x1^2*x2+f''(x)*(3*x2^2+4x1*x3) - y4 = (fxder[3] * xi[0] ** 4. + fxder[0] * xi[3] + \ - 6. * fxder[2] * xi[0] ** 2. * xi[1] + \ - fxder[1] * (3. * xi[1] ** 2. + 4. * xi[0] * xi[1])) - y.append(y4) - if N > 4: - warnings.warn('Transformation of derivatives of order>4 not supported.') - return y #y0,y1,y2,y3,y4 -def good_bins(data=None, range=None, num_bins=None, num_data=None, odd=False, loose=True): #@ReservedAssignment - ''' Return good bins for histogram - - Parameters - ---------- - data : array-like - the data - range : (float, float) - minimum and maximum range of bins (default data.min(), data.max()) - num_bins : scalar integer - approximate number of bins wanted (default depending on num_data=len(data)) - odd : bool - placement of bins (0 or 1) (default 0) - loose : bool - if True add extra space to min and max - if False the bins are made tight to the min and max - - Example - ------- - >>> import wafo.misc as wm - >>> wm.good_bins(range=(0,5), num_bins=6) - array([-1., 0., 1., 2., 3., 4., 5., 6.]) - >>> wm.good_bins(range=(0,5), num_bins=6, loose=False) - array([ 0., 1., 2., 3., 4., 5.]) - >>> wm.good_bins(range=(0,5), num_bins=6, odd=True) - array([-1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5]) - >>> wm.good_bins(range=(0,5), num_bins=6, odd=True, loose=False) - array([-0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5]) - ''' - - if data is not None: - x = np.atleast_1d(data) - num_data = len(x) - - mn, mx = range if range else (x.min(), x.max()) - - if num_bins is None: - num_bins = np.ceil(4 * np.sqrt(np.sqrt(num_data))) - - d = float(mx - mn) / num_bins * 2 - e = np.floor(np.log(d) / np.log(10)); - m = np.floor(d / 10 ** e) - if m > 5: - m = 5 - elif m > 2: - m = 2 - - d = m * 10 ** e - mn = (np.floor(mn / d) - loose) * d - odd * d / 2 - mx = (np.ceil(mx / d) + loose) * d + odd * d / 2 - limits = np.arange(mn, mx + d / 2, d) - return limits - -def plot_histgrm(data, bins=None, range=None, normed=False, weights=None, lintype='b-'): #@ReservedAssignment - ''' - Plot histogram - - Parameters - ----------- - data : array-like - the data - bins : int or sequence of scalars, optional - If an int, it defines the number of equal-width - bins in the given range (4 * sqrt(sqrt(len(data)), by default). - If a sequence, it defines the bin edges, including the - rightmost edge, allowing for non-uniform bin widths. - range : (float, float), optional - The lower and upper range of the bins. If not provided, range - is simply ``(data.min(), data.max())``. Values outside the range are - ignored. - normed : bool, optional - If False, the result will contain the number of samples in each bin. - If True, the result is the value of the probability *density* function - at the bin, normalized such that the *integral* over the range is 1. - weights : array_like, optional - An array of weights, of the same shape as `data`. Each value in `data` - only contributes its associated weight towards the bin count - (instead of 1). If `normed` is True, the weights are normalized, - so that the integral of the density over the range remains 1 - lintype : specify color and lintype, see PLOT for possibilities. - - Returns - ------- - h : list - of plot-objects - - Example - ------- - >>> import pylab as plb - >>> import wafo.misc as wm - >>> import wafo.stats as ws - >>> R = ws.weibull_min.rvs(2,loc=0,scale=2, size=100) - >>> h0 = wm.plot_histgrm(R, 20, normed=True) - >>> x = linspace(-3,16,200) - >>> h1 = plb.plot(x,ws.weibull_min.pdf(x,2,0,2),'r') - - See also - -------- - wafo.misc.good_bins - numpy.histogram - ''' - - x = np.atleast_1d(data) - if bins is None: - bins = np.ceil(4 * np.sqrt(np.sqrt(len(x)))) - - bin_, limits = np.histogram(data, bins=bins, normed=normed, weights=weights) #, new=True) - limits.shape = (-1, 1) - xx = limits.repeat(3, axis=1) - xx.shape = (-1,) - xx = xx[1:-1] - bin_.shape = (-1, 1) - yy = bin_.repeat(3, axis=1) - #yy[0,0] = 0.0 # pdf - yy[:, 0] = 0.0 # histogram - yy.shape = (-1,) - yy = np.hstack((yy, 0.0)) - return plotbackend.plot(xx, yy, lintype, limits, limits * 0) - -def num2pistr(x, n=3): - ''' - Convert a scalar to a text string in fractions of pi - if the numerator is less than 10 and not equal 0 - and if the denominator is less than 10. - - Parameters - ---------- - x = a scalar - n = maximum digits of precision. (default 3) - Returns - ------- - xtxt = a text string in fractions of pi - - Example - >>> import wafo.misc as wm - >>> t = wm.num2pistr(np.pi*3/4) - >>> t=='3\\pi/4' - True - ''' - - frac = fractions.Fraction.from_float(x / pi).limit_denominator(10000000) - num = frac.numerator - den = frac.denominator - if (den < 10) and (num < 10) and (num != 0): - dtxt = '' if abs(den) == 1 else '/%d' % den - if abs(num) == 1: # % numerator - ntxt = '-' if num == -1 else '' - else: - ntxt = '%d' % num - xtxt = ntxt + r'\pi' + dtxt - else: - format = '%0.' + '%dg' % n #@ReservedAssignment - xtxt = format % x - return xtxt - -def fourier(data, t=None, T=None, m=None, n=None, method='trapz'): - ''' - Returns Fourier coefficients. - - Parameters - ---------- - data : array-like - vector or matrix of row vectors with data points shape p x n. - t : array-like - vector with n values indexed from 1 to N. - T : real scalar - primitive period of signal, i.e., smallest period. (default T = t[-1]-t[0] - m : scalar integer - defines no of harmonics desired (default M = N) - n : scalar integer - no of data points (default len(t)) - method : string - integration method used - - Returns - ------- - a,b = Fourier coefficients size m x p - - FOURIER finds the coefficients for a Fourier series representation - of the signal x(t) (given in digital form). It is assumed the signal - is periodic over T. N is the number of data points, and M-1 is the - number of coefficients. - - The signal can be estimated by using M-1 harmonics by: - M-1 - x[i] = 0.5*a[0] + sum (a[n]*c[n,i] + b[n]*s[n,i]) - n=1 - where - c[n,i] = cos(2*pi*(n-1)*t[i]/T) - s[n,i] = sin(2*pi*(n-1)*t[i]/T) - - Note that a[0] is the "dc value". - Remaining values are a[1], a[2], ... , a[M-1]. - - Example - ------- - >>> import wafo.misc as wm - >>> import numpy as np - >>> T = 2*np.pi - >>> t = np.linspace(0,4*T) - >>> x = np.sin(t) - >>> a, b = wm.fourier(x, t, T=T, m=5) - >>> (np.round(a.ravel()), np.round(b.ravel())) - (array([ 0., -0., 0., -0., 0.]), array([ 0., 4., -0., -0., 0.])) - - See also - -------- - fft - ''' - x = np.atleast_2d(data) - p, n = x.shape - if t is None: - t = np.arange(n) - else: - t = np.atleast_1d(t) - - n = len(t) if n is None else n - m = n if n is None else m - T = t[-1] - t[0] if T is None else T - - if method.startswith('trapz'): - intfun = trapz - elif method.startswith('simp'): - intfun = simps - - # Define the vectors for computing the Fourier coefficients - t.shape = (1, -1) - a = zeros((m, p)) - b = zeros((m, p)) - a[0] = intfun(x, t, axis= -1) - - # Compute M-1 more coefficients - tmp = 2 * pi * t / T - #% tmp = 2*pi*(0:N-1).'/(N-1); - for i in range(1, m): - a[i] = intfun(x * cos(i * tmp), t, axis= -1) - b[i] = intfun(x * sin(i * tmp), t, axis= -1) - - a = a / pi - b = b / pi - - # Alternative: faster for large M, but gives different results than above. -# nper = diff(t([1 end]))/T; %No of periods given -# if nper == round(nper): -# N1 = n/nper -# else: -# N1 = n -# -# -# -# # Fourier coefficients by fft -# Fcof1 = 2*ifft(x(1:N1,:),[],1); -# Pcor = [1; exp(sqrt(-1)*(1:M-1).'*t(1))]; % correction term to get -# % the correct integration limits -# Fcof = Fcof1(1:M,:).*Pcor(:,ones(1,P)); -# a = real(Fcof(1:M,:)); -# b = imag(Fcof(1:M,:)); - - return a, b - - - -def _test_find_cross(): - t = findcross([0, 0, 1, -1, 1], 0) #@UnusedVariable - -def _test_common_shape(): - - A = ones((4, 1)) - B = 2 - C = ones((1, 5)) * 5 - common_shape(A, B, C) - - common_shape(A, B, C, shape=(3, 4, 1)) - - A = ones((4, 1)) - B = 2 - C = ones((1, 5)) * 5 - common_shape(A, B, C, shape=(4, 5)) - - -def _test_meshgrid(): - x = array([-1, -0.5, 1, 4, 5], float) - y = array([0, -2, -5], float) - xv, yv = meshgrid(x, y, sparse=False) - print(xv) - print(yv) - xv, yv = meshgrid(x, y, sparse=True) # make sparse output arrays - print(xv) - print(yv) - print(meshgrid(0, 1, 5, sparse=True)) # just a 3D point - print(meshgrid([0, 1, 5], sparse=True)) # just a 3D point - xv, yv = meshgrid(y, y) - yv[0, 0] = 10 - print(xv) - print(yv) -## >>> xv -## array([[ 0. , 0.5, 1. ]]) -## >>> yv -## array([[ 0.], -## [ 1.]]) -## array([[-1. , -0.5, 1. , 4. , 5. ], -## [-1. , -0.5, 1. , 4. , 5. ], -## [-1. , -0.5, 1. , 4. , 5. ]]) -## -## array([[ 0., 0., 0., 0., 0.], -## [-2., -2., -2., -2., -2.], -## [-5., -5., -5., -5., -5.]]) -def _test_tranproc(): - import wafo.transform.models as wtm - tr = wtm.TrHermite() - x = linspace(-5, 5, 501) - g = tr(x) - _gder = tranproc(x, g, x, ones(g.size)) - pass - #>>> gder(:,1) = g(:,1) - #>>> plot(g(:,1),[g(:,2),gder(:,2)]) - #>>> plot(g(:,1),pdfnorm(g(:,2)).*gder(:,2),g(:,1),pdfnorm(g(:,1))) - #>>> legend('Transformed model','Gaussian model') -def _test_detrend(): - import pylab as plb - cos = plb.cos;randn = plb.randn - x = linspace(0, 1, 200) - y = exp(x) + cos(5 * 2 * pi * x) + 1e-1 * randn(x.size) - y0 = detrendma(y, 20);tr = y - y0 - plb.plot(x, y, x, y0, 'r', x, exp(x), 'k', x, tr, 'm') - -def _test_extrema(): - import pylab as pb - from pylab import plot - t = pb.linspace(0, 7 * pi, 250) - x = pb.sin(t) + 0.1 * sin(50 * t) - ind = findextrema(x) - ti, tp = t[ind], x[ind] - plot(t, x, '.', ti, tp, 'r.') - _ind1 = findrfc(tp, 0.3) - - - -def _test_discretize(): - import pylab as plb - x, y = discretize(cos, 0, pi) - plb.plot(x, y) - plb.show() - plb.close('all') - - -def _test_stirlerr(): - x = linspace(1, 5, 6) - print stirlerr(x) - print stirlerr(1) - print getshipchar(1000) - print betaloge(3, 2) - -def _test_parse_kwargs(): - opt = dict(arg1=1, arg2=3) - print opt - opt = parse_kwargs(opt, arg1=5) - print opt - opt2 = dict(arg3=15) - opt = parse_kwargs(opt, **opt2) - print opt - - opt0 = testfun('default') - print opt0 - opt0.update(opt1=100) - print opt0 - opt0 = parse_kwargs(opt0, opt2=200) - print opt0 - out1 = testfun(opt0['opt1'], **opt0) - print out1 - -def test_docstrings(): - import doctest - doctest.testmod() - -if __name__ == "__main__": - test_docstrings() +''' +Misc +''' +from __future__ import division + +import sys +import fractions +import numpy as np +from numpy import ( + abs, amax, any, logical_and, arange, linspace, atleast_1d, # atleast_2d, + array, asarray, broadcast_arrays, ceil, floor, frexp, hypot, + sqrt, arctan2, sin, cos, exp, log, mod, diff, empty_like, + finfo, inf, pi, interp, isnan, isscalar, zeros, ones, linalg, + r_, sign, unique, hstack, vstack, nonzero, where, extract) +from scipy.special import gammaln +from scipy.integrate import trapz, simps +import warnings +from plotbackend import plotbackend +from collections import OrderedDict + + +try: + import c_library as clib # @UnresolvedImport +except: + clib = None +floatinfo = finfo(float) + + +__all__ = [ + 'is_numlike', 'JITImport', 'DotDict', 'Bunch', 'printf', 'sub_dict_select', + 'parse_kwargs', 'detrendma', 'ecross', 'findcross', + 'findextrema', 'findpeaks', 'findrfc', 'rfcfilter', 'findtp', 'findtc', + 'findoutliers', 'common_shape', 'argsreduce', + 'stirlerr', 'getshipchar', 'betaloge', 'gravity', 'nextpow2', + 'discretize', 'polar2cart', 'cart2polar', 'meshgrid', 'ndgrid', + 'trangood', 'tranproc', 'plot_histgrm', 'num2pistr', 'test_docstrings'] + + +def is_numlike(obj): + 'return true if *obj* looks like a number' + try: + obj + 1 + except TypeError: + return False + else: + return True + + +class JITImport(object): + + ''' + Just In Time Import of module + + Example + ------- + >>> np = JITImport('numpy') + >>> np.exp(0)==1.0 + True + ''' + + def __init__(self, module_name): + self._module_name = module_name + self._module = None + + def __getattr__(self, attr): + try: + return getattr(self._module, attr) + except: + if self._module is None: + self._module = __import__(self._module_name, None, None, ['*']) + #assert(isinstance(self._module, types.ModuleType), 'module') + return getattr(self._module, attr) + else: + raise + + +class DotDict(dict): + + ''' Implement dot access to dict values + + Example + ------- + >>> d = DotDict(test1=1,test2=3) + >>> d.test1 + 1 + ''' + __getattr__ = dict.__getitem__ + + +class Bunch(object): + + ''' Implement keyword argument initialization of class + + Example + ------- + >>> d = Bunch(test1=1,test2=3) + >>> d.test1 + 1 + ''' + + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + def keys(self): + return self.__dict__.keys() + + def update(self, ** kwargs): + self.__dict__.update(kwargs) + + +def printf(format, *args): # @ReservedAssignment + sys.stdout.write(format % args) + + +def sub_dict_select(somedict, somekeys): + ''' + Extracting a Subset from Dictionary + + Example + -------- + # Update options dict from keyword arguments if + # the keyword exists in options + >>> opt = dict(arg1=2, arg2=3) + >>> kwds = dict(arg2=100,arg3=1000) + >>> sub_dict = sub_dict_select(kwds,opt.keys()) + >>> opt.update(sub_dict) + >>> opt + {'arg1': 2, 'arg2': 100} + + See also + -------- + dict_intersection + ''' + # slower: validKeys = set(somedict).intersection(somekeys) + return dict((k, somedict[k]) for k in somekeys if k in somedict) + + +def parse_kwargs(options, **kwargs): + ''' Update options dict from keyword arguments if it exists in options + + Example + >>> opt = dict(arg1=2, arg2=3) + >>> opt = parse_kwargs(opt,arg2=100) + >>> print opt + {'arg1': 2, 'arg2': 100} + >>> opt2 = dict(arg2=101) + >>> opt = parse_kwargs(opt,**opt2) + + See also sub_dict_select + ''' + + newopts = sub_dict_select(kwargs, options.keys()) + if len(newopts) > 0: + options.update(newopts) + return options + + +def testfun(*args, **kwargs): + opts = dict(opt1=1, opt2=2) + if (len(args) == 1 and len(kwargs) == 0 and type(args[0]) is str and + args[0].startswith('default')): + return opts + opts = parse_kwargs(opts, **kwargs) + return opts + + +def detrendma(x, L): + """ + Removes a trend from data using a moving average + of size 2*L+1. If 2*L+1 > len(x) then the mean is removed + + Parameters + ---------- + x : vector or matrix of column vectors + of data + L : scalar, integer + defines the size of the moving average window + + Returns + ------- + y : ndarray + detrended data + + Examples + -------- + >>> import wafo.misc as wm + >>> import pylab as plb + >>> exp = plb.exp; cos = plb.cos; randn = plb.randn + >>> x = plb.linspace(0,1,200) + >>> y = exp(x)+cos(5*2*pi*x)+1e-1*randn(x.size) + >>> y0 = wm.detrendma(y,20); tr = y-y0 + >>> h = plb.plot(x, y, x, y0, 'r', x, exp(x), 'k', x, tr, 'm') + + >>> plb.close('all') + + See also + -------- + Reconstruct + """ + + if L <= 0: + raise ValueError('L must be positive') + if L != round(L): + raise ValueError('L must be an integer') + + x1 = atleast_1d(x) + if x1.shape[0] == 1: + x1 = x1.ravel() + + n = x1.shape[0] + if n < 2 * L + 1: # only able to remove the mean + return x1 - x1.mean(axis=0) + + mn = x1[0:2 * L + 1].mean(axis=0) + y = empty_like(x1) + y[0:L] = x1[0:L] - mn + + ix = r_[L:(n - L)] + trend = ((x1[ix + L] - x1[ix - L]) / (2 * L + 1)).cumsum(axis=0) + mn + y[ix] = x1[ix] - trend + y[n - L::] = x1[n - L::] - trend[-1] + return y + + +def ecross(t, f, ind, v=0): + ''' + Extracts exact level v crossings + + ECROSS interpolates t and f linearly to find the exact level v + crossings, i.e., the points where f(t0) = v + + Parameters + ---------- + t,f : vectors + of arguments and functions values, respectively. + ind : ndarray of integers + indices to level v crossings as found by findcross. + v : scalar or vector (of size(ind)) + defining the level(s) to cross. + + Returns + ------- + t0 : vector + of exact level v crossings. + + Example + ------- + >>> from matplotlib import pylab as plb + >>> import wafo.misc as wm + >>> ones = plb.ones + >>> t = plb.linspace(0,7*plb.pi,250) + >>> x = plb.sin(t) + >>> ind = wm.findcross(x,0.75) + >>> ind + array([ 9, 25, 80, 97, 151, 168, 223, 239]) + >>> t0 = wm.ecross(t,x,ind,0.75) + >>> t0 + array([ 0.84910514, 2.2933879 , 7.13205663, 8.57630119, + 13.41484739, 14.85909194, 19.69776067, 21.14204343]) + >>> a = plb.plot(t, x, '.', t[ind], x[ind], 'r.', t, ones(t.shape)*0.75, + ... t0, ones(t0.shape)*0.75, 'g.') + + >>> plb.close('all') + + See also + -------- + findcross + ''' + # Tested on: Python 2.5 + # revised pab Feb2004 + # By pab 18.06.2001 + return (t[ind] + (v - f[ind]) * (t[ind + 1] - t[ind]) / + (f[ind + 1] - f[ind])) + + +def _findcross(xn): + '''Return indices to zero up and downcrossings of a vector + ''' + if clib is not None: + ind, m = clib.findcross(xn, 0.0) + return ind[:m] + + n = len(xn) + iz, = (xn == 0).nonzero() + if len(iz) > 0: + # Trick to avoid turning points on the crossinglevel. + if iz[0] == 0: + if len(iz) == n: + warnings.warn('All values are equal to crossing level!') + return zeros(0, dtype=np.int) + + diz = diff(iz) + if len(diz) > 0 and (diz > 1).any(): + ix = iz[(diz > 1).argmax()] + else: + ix = iz[-1] + + # x(ix) is a up crossing if x(1:ix) = v and x(ix+1) > v. + # x(ix) is a downcrossing if x(1:ix) = v and x(ix+1) < v. + xn[0:ix + 1] = -xn[ix + 1] + iz = iz[ix + 1::] + + for ix in iz.tolist(): + xn[ix] = xn[ix - 1] + + #% indices to local level crossings ( without turningpoints) + ind, = (xn[:n - 1] * xn[1:] < 0).nonzero() + return ind + + +def findcross(x, v=0.0, kind=None): + ''' + Return indices to level v up and/or downcrossings of a vector + + Parameters + ---------- + x : array_like + vector with sampled values. + v : scalar, real + level v. + kind : string + defines type of wave or crossing returned. Possible options are + 'dw' : downcrossing wave + 'uw' : upcrossing wave + 'cw' : crest wave + 'tw' : trough wave + 'd' : downcrossings only + 'u' : upcrossings only + None : All crossings will be returned + + Returns + ------- + ind : array-like + indices to the crossings in the original sequence x. + + Example + ------- + >>> from matplotlib import pylab as plb + >>> import wafo.misc as wm + >>> ones = plb.ones + >>> findcross([0, 1, -1, 1],0) + array([0, 1, 2]) + >>> v = 0.75 + >>> t = plb.linspace(0,7*plb.pi,250) + >>> x = plb.sin(t) + >>> ind = wm.findcross(x,v) # all crossings + >>> ind + array([ 9, 25, 80, 97, 151, 168, 223, 239]) + >>> t0 = plb.plot(t,x,'.',t[ind],x[ind],'r.', t, ones(t.shape)*v) + >>> ind2 = wm.findcross(x,v,'u') + >>> ind2 + array([ 9, 80, 151, 223]) + >>> t0 = plb.plot(t[ind2],x[ind2],'o') + >>> plb.close('all') + + See also + -------- + crossdef + wavedef + ''' + xn = np.int8(sign(atleast_1d(x).ravel() - v)) # @UndefinedVariable + ind = _findcross(xn) + if ind.size == 0: + warnings.warn('No level v = %0.5g crossings found in x' % v) + return ind + + if kind not in ('du', 'all', None): + if kind == 'd': # downcrossings only + t_0 = int(xn[ind[0] + 1] > 0) + ind = ind[t_0::2] + elif kind == 'u': # upcrossings only + t_0 = int(xn[ind[0] + 1] < 0) + ind = ind[t_0::2] + elif kind in ('dw', 'uw', 'tw', 'cw'): + # make sure the first is a level v down-crossing if wdef=='dw' + # or make sure the first is a level v up-crossing if wdef=='uw' + # make sure the first is a level v down-crossing if wdef=='tw' + # or make sure the first is a level v up-crossing if + # wdef=='cw' + xor = lambda a, b: a ^ b + first_is_down_crossing = int(xn[ind[0]] > xn[ind[0] + 1]) + if xor(first_is_down_crossing, kind in ('dw', 'tw')): + ind = ind[1::] + + n_c = ind.size # number of level v crossings + # make sure the number of troughs and crests are according to the + # wavedef, i.e., make sure length(ind) is odd if dw or uw + # and even if tw or cw + is_odd = mod(n_c, 2) + if xor(is_odd, kind in ('dw', 'uw')): + ind = ind[:-1] + else: + raise ValueError('Unknown wave/crossing definition!') + return ind + + +def findextrema(x): + ''' + Return indices to minima and maxima of a vector + + Parameters + ---------- + x : vector with sampled values. + + Returns + ------- + ind : indices to minima and maxima in the original sequence x. + + Examples + -------- + >>> import numpy as np + >>> import pylab as pb + >>> import wafo.misc as wm + >>> t = np.linspace(0,7*np.pi,250) + >>> x = np.sin(t) + >>> ind = wm.findextrema(x) + >>> a = pb.plot(t,x,'.',t[ind],x[ind],'r.') + >>> pb.close('all') + + See also + -------- + findcross + crossdef + ''' + xn = atleast_1d(x).ravel() + return findcross(diff(xn), 0.0) + 1 + + +def findpeaks(data, n=2, min_h=None, min_p=0.0): + ''' + Find peaks of vector or matrix possibly rainflow filtered + + Parameters + ---------- + data = matrix or vector + n = The n highest peaks are found (if exist). (default 2) + min_h = The threshold in the rainflowfilter (default 0.05*range(S(:))). + A zero value will return all the peaks of S. + min_p = 0..1, Only the peaks that are higher than + min_p*max(max(S)) min_p*(the largest peak in S) + are returned (default 0). + Returns + ix = + linear index to peaks of S + + Example: + + Find highest 8 peaks that are not + less that 0.3*"global max" and have + rainflow amplitude larger than 5. + >>> import numpy as np + >>> import wafo.misc as wm + >>> x = np.arange(0,10,0.01) + >>> data = x**2+10*np.sin(3*x)+0.5*np.sin(50*x) + >>> wm.findpeaks(data, n=8, min_h=5, min_p=0.3) + array([908, 694, 481]) + + See also + -------- + findtp + ''' + S = np.atleast_1d(data) + smax = S.max() + if min_h is None: + smin = S.min() + min_h = 0.05 * (smax - smin) + ndim = S.ndim + S = np.atleast_2d(S) + nrows, mcols = S.shape + + # Finding turningpoints of the spectrum + # Returning only those with rainflowcycle heights greater than h_min + indP = [] # indices to peaks + ind = [] + for iy in range(nrows): # % find all peaks + TuP = findtp(S[iy], min_h) + if len(TuP): + ind = TuP[1::2] # ; % extract indices to maxima only + else: # % did not find any , try maximum + ind = np.atleast_1d(S[iy].argmax()) + + if ndim > 1: + if iy == 0: + ind2 = np.flatnonzero(S[iy, ind] > S[iy + 1, ind]) + elif iy == nrows - 1: + ind2 = np.flatnonzero(S[iy, ind] > S[iy - 1, ind]) + else: + ind2 = np.flatnonzero((S[iy, ind] > S[iy - 1, ind]) & + (S[iy, ind] > S[iy + 1, ind])) + + if len(ind2): + indP.append((ind[ind2] + iy * mcols)) + + if ndim > 1: + ind = np.hstack(indP) if len(indP) else [] + if len(ind) == 0: + return [] + + peaks = S.take(ind) + ind2 = peaks.argsort()[::-1] + + # keeping only the Np most significant peak frequencies. + nmax = min(n, len(ind)) + ind = ind[ind2[:nmax]] + if (min_p > 0): + # Keeping only peaks larger than min_p percent relative to the maximum + # peak + ind = ind[(S.take(ind) > min_p * smax)] + + return ind + + +def findrfc_astm(tp): + """ + Return rainflow counted cycles + + Nieslony's Matlab implementation of the ASTM standard practice for rainflow + counting ported to a Python C module. + + Parameters + ---------- + tp : array-like + vector of turningpoints (NB! Only values, not sampled times) + + Returns + ------- + sig_rfc : array-like + array of shape (n,3) with: + sig_rfc[:,0] Cycles amplitude + sig_rfc[:,1] Cycles mean value + sig_rfc[:,2] Cycle type, half (=0.5) or full (=1.0) + """ + + y1 = atleast_1d(tp).ravel() + sig_rfc, cnr = clib.findrfc3_astm(y1) + # the sig_rfc was constructed too big in rainflow.rf3, so + # reduce the sig_rfc array as done originally by a matlab mex c function + n = len(sig_rfc) + sig_rfc = sig_rfc.__getslice__(0, n - cnr[0]) + # sig_rfc holds the actual rainflow counted cycles, not the indices + return sig_rfc + + +def findrfc(tp, h=0.0, method='clib'): + ''' + Return indices to rainflow cycles of a sequence of TP. + + Parameters + ----------- + tp : array-like + vector of turningpoints (NB! Only values, not sampled times) + h : real scalar + rainflow threshold. If h>0, then all rainflow cycles with height + smaller than h are removed. + method : string, optional + 'clib' 'None' + Specify 'clib' for calling the c_functions, otherwise fallback to + the Python implementation. + + Returns + ------- + ind : ndarray of int + indices to the rainflow cycles of the original sequence TP. + + Example: + -------- + >>> import pylab as pb + >>> import wafo.misc as wm + >>> t = pb.linspace(0,7*np.pi,250) + >>> x = pb.sin(t)+0.1*np.sin(50*t) + >>> ind = wm.findextrema(x) + >>> ti, tp = t[ind], x[ind] + >>> a = pb.plot(t,x,'.',ti,tp,'r.') + >>> ind1 = wm.findrfc(tp,0.3); ind1 + array([ 0, 9, 32, 53, 74, 95, 116, 137]) + >>> ind2 = wm.findrfc(tp,0.3, method=''); ind2 + array([ 0, 9, 32, 53, 74, 95, 116, 137]) + >>> a = pb.plot(ti[ind1],tp[ind1]) + >>> pb.close('all') + + See also + -------- + rfcfilter, + findtp. + ''' + # TODO: merge rfcfilter and findrfc + y1 = atleast_1d(tp).ravel() + + n = len(y1) + ind = zeros(0, dtype=np.int) + ix = 0 + if y1[0] > y1[1]: + # first is a max, ignore it + y = y1[1::] + NC = floor((n - 1) / 2) - 1 + Tstart = 1 + else: + y = y1 + NC = floor(n / 2) - 1 + Tstart = 0 + + if (NC < 1): + return ind # No RFC cycles*/ + + if (y[0] > y[1]) and (y[1] > y[2]): + warnings.warn('This is not a sequence of turningpoints, exit') + return ind + + if (y[0] < y[1]) and (y[1] < y[2]): + warnings.warn('This is not a sequence of turningpoints, exit') + return ind + + if clib is None or method not in ('clib'): + ind = zeros(n, dtype=np.int) + NC = np.int(NC) + for i in xrange(NC): + Tmi = Tstart + 2 * i + Tpl = Tstart + 2 * i + 2 + xminus = y[2 * i] + xplus = y[2 * i + 2] + + if(i != 0): + j = i - 1 + while ((j >= 0) and (y[2 * j + 1] <= y[2 * i + 1])): + if (y[2 * j] < xminus): + xminus = y[2 * j] + Tmi = Tstart + 2 * j + j -= 1 + if (xminus >= xplus): + if (y[2 * i + 1] - xminus >= h): + ind[ix] = Tmi + ix += 1 + ind[ix] = (Tstart + 2 * i + 1) + ix += 1 + # goto L180 continue + else: + j = i + 1 + while (j < NC): + if (y[2 * j + 1] >= y[2 * i + 1]): + break # goto L170 + if((y[2 * j + 2] <= xplus)): + xplus = y[2 * j + 2] + Tpl = (Tstart + 2 * j + 2) + j += 1 + else: + if ((y[2 * i + 1] - xminus) >= h): + ind[ix] = Tmi + ix += 1 + ind[ix] = (Tstart + 2 * i + 1) + ix += 1 + #iy = i + continue + + # goto L180 + # L170: + if (xplus <= xminus): + if ((y[2 * i + 1] - xminus) >= h): + ind[ix] = Tmi + ix += 1 + ind[ix] = (Tstart + 2 * i + 1) + ix += 1 + elif ((y[2 * i + 1] - xplus) >= h): + ind[ix] = (Tstart + 2 * i + 1) + ix += 1 + ind[ix] = Tpl + ix += 1 + + # L180: + # iy=i + # /* for i */ + else: + ind, ix = clib.findrfc(y, h) + return np.sort(ind[:ix]) + + +def mctp2rfc(fmM, fMm=None): + ''' + Return Rainflow matrix given a Markov matrix of a Markov chain + of turning points + + computes f_rfc = f_mM + F_mct(f_mM). + + Parameters + ---------- + fmM = the min2max Markov matrix, + fMm = the max2min Markov matrix, + + Returns + ------- + f_rfc = the rainflow matrix, + + Example: + ------- + >>> fmM = np.array([[ 0.0183, 0.0160, 0.0002, 0.0000, 0], + ... [0.0178, 0.5405, 0.0952, 0, 0], + ... [0.0002, 0.0813, 0, 0, 0], + ... [0.0000, 0, 0, 0, 0], + ... [ 0, 0, 0, 0, 0]]) + + >>> mctp2rfc(fmM) + array([[ 2.66998090e-02, 7.79970042e-03, 4.90607697e-07, + 0.00000000e+00, 0.00000000e+00], + [ 9.59962873e-03, 5.48500862e-01, 9.53995094e-02, + 0.00000000e+00, 0.00000000e+00], + [ 5.62297379e-07, 8.14994377e-02, 0.00000000e+00, + 0.00000000e+00, 0.00000000e+00], + [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 0.00000000e+00], + [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 0.00000000e+00]]) + + ''' + + if fMm is None: + fmM = np.atleast_1d(fmM) + fMm = fmM.copy() + else: + fmM, fMm = np.atleast_1d(fmM, fMm) + f_mM, f_Mm = fmM.copy(), fMm.copy() + N = max(f_mM.shape) + f_max = np.sum(f_mM, axis=1) + f_min = np.sum(f_mM, axis=0) + f_rfc = zeros((N, N)) + f_rfc[N - 2, 0] = f_max[N - 2] + f_rfc[0, N - 2] = f_min[N - 2] + for k in range(2, N - 1): + for i in range(1, k): + AA = f_mM[N - 1 - k:N - 1 - k + i, k - i:k] + AA1 = f_Mm[N - 1 - k:N - 1 - k + i, k - i:k] + RAA = f_rfc[N - 1 - k:N - 1 - k + i, k - i:k] + nA = max(AA.shape) + MA = f_max[N - 1 - k:N - 1 - k + i] + mA = f_min[k - i:k] + SA = AA.sum() + SRA = RAA.sum() + + DRFC = SA - SRA + # ?? check + NT = min(mA[0] - sum(RAA[:, 0]), MA[0] - sum(RAA[0, :])) + NT = max(NT, 0) # ??check + + if NT > 1e-6 * max(MA[0], mA[0]): + NN = MA - np.sum(AA, axis=1) # T + e = (mA - np.sum(AA, axis=0)) # T + e = np.flipud(e) + PmM = np.rot90(AA.copy()) + for j in range(nA): + norm = mA[nA - 1 - j] + if norm != 0: + PmM[j, :] = PmM[j, :] / norm + e[j] = e[j] / norm + # end + # end + fx = 0.0 + if (max(abs(e)) > 1e-6 and + max(abs(NN)) > 1e-6 * max(MA[0], mA[0])): + PMm = AA1.copy() + for j in range(nA): + norm = MA[j] + if norm != 0: + PMm[j, :] = PMm[j, :] / norm + # end + # end + PMm = np.fliplr(PMm) + + A = PMm + B = PmM + + if nA == 1: + fx = NN * (A / (1 - B * A) * e) + else: + rh = np.eye(A.shape[0]) - np.dot(B, A) + #least squares + fx = np.dot(NN, np.dot(A, linalg.solve(rh, e))) + # end + # end + f_rfc[N - 1 - k, k - i] = fx + DRFC + + # check2=[ DRFC fx] + # pause + else: + f_rfc[N - 1 - k, k - i] = 0.0 + # end + # end + m0 = max(0, f_min[0] - np.sum(f_rfc[N - k + 1:N, 0])) + M0 = max(0, f_max[N - 1 - k] - np.sum(f_rfc[N - 1 - k, 1:k])) + f_rfc[N - 1 - k, 0] = min(m0, M0) + #% n_loops_left=N-k+1 + # end + + for k in range(1, N): + M0 = max(0, f_max[0] - np.sum(f_rfc[0, N - k:N])) + m0 = max(0, f_min[N - 1 - k] - np.sum(f_rfc[1:k + 1, N - 1 - k])) + f_rfc[0, N - 1 - k] = min(m0, M0) + # end + +# %clf +# %subplot(1,2,2) +# %pcolor(levels(paramm),levels(paramM),flipud(f_mM)) +# % title('Markov matrix') +# % ylabel('max'), xlabel('min') +# %axis([paramm(1) paramm(2) paramM(1) paramM(2)]) +# %axis('square') +# +# %subplot(1,2,1) +# %pcolor(levels(paramm),levels(paramM),flipud(f_rfc)) +# % title('Rainflow matrix') +# % ylabel('max'), xlabel('rfc-min') +# %axis([paramm(1) paramm(2) paramM(1) paramM(2)]) +# %axis('square') + + return f_rfc + + +def rfcfilter(x, h, method=0): + """ + Rainflow filter a signal. + + Parameters + ----------- + x : vector + Signal. [nx1] + h : real, scalar + Threshold for rainflow filter. + method : scalar, integer + 0 : removes cycles with range < h. (default) + 1 : removes cycles with range <= h. + + Returns + -------- + y = Rainflow filtered signal. + + Examples: + --------- + # 1. Filtered signal y is the turning points of x. + >>> import wafo.data + >>> import wafo.misc as wm + >>> x = wafo.data.sea() + >>> y = wm.rfcfilter(x[:,1], h=0, method=1) + >>> y.shape + (2172,) + >>> y[0:5] + array([-1.2004945 , 0.83950546, -0.09049454, -0.02049454, -0.09049454]) + >>> y[-5::] + array([ 0.18950546, 0.15950546, 0.91950546, -0.51049454, -0.48049454]) + + # 2. This removes all rainflow cycles with range less than 0.5. + >>> y1 = wm.rfcfilter(x[:,1], h=0.5) + >>> y1.shape + (863,) + >>> y1[0:5] + array([-1.2004945 , 0.83950546, -0.43049454, 0.34950546, -0.51049454]) + >>> y1[-5::] + array([-0.64049454, 0.65950546, -1.0004945 , 0.91950546, -0.51049454]) + + >>> ind = wm.findtp(x[:,1], h=0.5) + >>> y2 = x[ind,1] + >>> y2[0:5] + array([-1.2004945 , 0.83950546, -0.43049454, 0.34950546, -0.51049454]) + >>> y2[-5::] + array([ 0.83950546, -0.64049454, 0.65950546, -1.0004945 , 0.91950546]) + + See also + -------- + findrfc + """ + # TODO merge rfcfilter and findrfc + y = atleast_1d(x).ravel() + n = len(y) + t = zeros(n, dtype=np.int) + j = 0 + t0 = 0 + y0 = y[t0] + + z0 = 0 + if method == 0: + cmpfun1 = lambda a, b: a <= b + cmpfun2 = lambda a, b: a < b + else: + cmpfun1 = lambda a, b: a < b + cmpfun2 = lambda a, b: a <= b + + # The rainflow filter + for tim1, yi in enumerate(y[1::]): + fpi = y0 + h + fmi = y0 - h + ti = tim1 + 1 + #yi = y[ti] + + if z0 == 0: + if cmpfun1(yi, fmi): + z1 = -1 + elif cmpfun1(fpi, yi): + z1 = +1 + else: + z1 = 0 + t1, y1 = (t0, y0) if z1 == 0 else (ti, yi) + else: + if (((z0 == +1) & cmpfun1(yi, fmi)) | + ((z0 == -1) & cmpfun2(yi, fpi))): + z1 = -1 + elif (((z0 == +1) & cmpfun2(fmi, yi)) | + ((z0 == -1) & cmpfun1(fpi, yi))): + z1 = +1 + else: + warnings.warn('Something wrong, i=%d' % tim1) + + # Update y1 + if z1 != z0: + t1, y1 = ti, yi + elif z1 == -1: + # y1 = min([y0 xi]) + t1, y1 = (t0, y0) if y0 < yi else (ti, yi) + elif z1 == +1: + # y1 = max([y0 xi]) + t1, y1 = (t0, y0) if y0 > yi else (ti, yi) + + # Update y if y0 is a turning point + if abs(z0 - z1) == 2: + j += 1 + t[j] = t0 + + # Update t0, y0, z0 + t0, y0, z0 = t1, y1, z1 + # end + + #% Update y if last y0 is greater than (or equal) threshold + if cmpfun1(h, abs(y0 - y[t[j]])): + j += 1 + t[j] = t0 + return y[t[:j + 1]] + + +def findtp(x, h=0.0, kind=None): + ''' + Return indices to turning points (tp) of data, optionally rainflowfiltered. + + Parameters + ---------- + x : vector + signal + h : real, scalar + rainflow threshold + if h<0, then ind = range(len(x)) + if h=0, then tp is a sequence of turning points (default) + if h>0, then all rainflow cycles with height smaller than + h are removed. + kind : string + defines the type of wave or indicate the ASTM rainflow counting method. + Possible options are 'astm' 'mw' 'Mw' or 'none'. + If None all rainflow filtered min and max + will be returned, otherwise only the rainflow filtered + min and max, which define a wave according to the + wave definition, will be returned. + + Returns + ------- + ind : arraylike + indices to the turning points in the original sequence. + + Example: + -------- + >>> import wafo.data + >>> import pylab as plb + >>> import wafo.misc as wm + >>> x = wafo.data.sea() + >>> x1 = x[0:200,:] + >>> itp = wm.findtp(x1[:,1],0,'Mw') + >>> itph = wm.findtp(x1[:,1],0.3,'Mw') + >>> tp = x1[itp,:] + >>> tph = x1[itph,:] + >>> a = plb.plot(x1[:,0],x1[:,1], + ... tp[:,0],tp[:,1],'ro', + ... tph[:,1],tph[:,1],'k.') + >>> plb.close('all') + >>> itp + array([ 11, 21, 22, 24, 26, 28, 31, 39, 43, 45, 47, 51, 56, + 64, 70, 78, 82, 84, 89, 94, 101, 108, 119, 131, 141, 148, + 149, 150, 159, 173, 184, 190, 199]) + >>> itph + array([ 11, 28, 31, 39, 47, 51, 56, 64, 70, 78, 89, 94, 101, + 108, 119, 131, 141, 148, 159, 173, 184, 190, 199]) + + See also + --------- + findtc + findcross + findextrema + findrfc + ''' + n = len(x) + if h < 0.0: + return arange(n) + + ind = findextrema(x) + + if ind.size < 2: + return None + + #% In order to get the exact up-crossing intensity from rfc by + #% mm2lc(tp2mm(rfc)) we have to add the indices + #% to the last value (and also the first if the + #% sequence of turning points does not start with a minimum). + + if kind == 'astm': + # the Nieslony approach always put the first loading point as the first + # turning point. + # add the first turning point is the first of the signal + if x[ind[0]] != x[0]: + ind = np.r_[0, ind, n - 1] + else: # only add the last point of the signal + ind = np.r_[ind, n - 1] + else: + if x[ind[0]] > x[ind[1]]: # adds indices to first and last value + ind = r_[0, ind, n - 1] + else: # adds index to the last value + ind = r_[ind, n - 1] + + if h > 0.0: + ind1 = findrfc(x[ind], h) + ind = ind[ind1] + + if kind in ('mw', 'Mw'): + xor = lambda a, b: a ^ b + # make sure that the first is a Max if wdef == 'Mw' + # or make sure that the first is a min if wdef == 'mw' + first_is_max = (x[ind[0]] > x[ind[1]]) + + remove_first = xor(first_is_max, kind.startswith('Mw')) + if remove_first: + ind = ind[1::] + + # make sure the number of minima and Maxima are according to the + # wavedef. i.e., make sure Nm=length(ind) is odd + if (mod(ind.size, 2)) != 1: + ind = ind[:-1] + return ind + + +def findtc(x_in, v=None, kind=None): + """ + Return indices to troughs and crests of data. + + Parameters + ---------- + x : vector + surface elevation. + v : real scalar + reference level (default v = mean of x). + + kind : string + defines the type of wave. Possible options are + 'dw', 'uw', 'tw', 'cw' or None. + If None indices to all troughs and crests will be returned, + otherwise only the paired ones will be returned + according to the wavedefinition. + + Returns + -------- + tc_ind : vector of ints + indices to the trough and crest turningpoints of sequence x. + v_ind : vector of ints + indices to the level v crossings of the original + sequence x. (d,u) + + Example: + -------- + >>> import wafo.data + >>> import pylab as plb + >>> import wafo.misc as wm + >>> x = wafo.data.sea() + >>> x1 = x[0:200,:] + >>> itc, iv = wm.findtc(x1[:,1],0,'dw') + >>> tc = x1[itc,:] + >>> a = plb.plot(x1[:,0],x1[:,1],tc[:,0],tc[:,1],'ro') + >>> plb.close('all') + + See also + -------- + findtp + findcross, + wavedef + """ + + x = atleast_1d(x_in) + if v is None: + v = x.mean() + + v_ind = findcross(x, v, kind) + n_c = v_ind.size + if n_c <= 2: + warnings.warn('There are no waves!') + return zeros(0, dtype=np.int), zeros(0, dtype=np.int) + + # determine the number of trough2crest (or crest2trough) cycles + isodd = mod(n_c, 2) + if isodd: + n_tc = int((n_c - 1) / 2) + else: + n_tc = int((n_c - 2) / 2) + + #% allocate variables before the loop increases the speed + ind = zeros(n_c - 1, dtype=np.int) + + first_is_down_crossing = (x[v_ind[0]] > x[v_ind[0] + 1]) + if first_is_down_crossing: + for i in xrange(n_tc): + #% trough + j = 2 * i + ind[j] = x[v_ind[j] + 1:v_ind[j + 1] + 1].argmin() + #% crest + ind[j + 1] = x[v_ind[j + 1] + 1:v_ind[j + 2] + 1].argmax() + + if (2 * n_tc + 1 < n_c) and (kind in (None, 'tw')): + #% trough + ind[n_c - 2] = x[v_ind[n_c - 2] + 1:v_ind[n_c - 1]].argmin() + + else: # %%%% the first is a up-crossing + for i in xrange(n_tc): + #% trough + j = 2 * i + ind[j] = x[v_ind[j] + 1:v_ind[j + 1] + 1].argmax() + #% crest + ind[j + 1] = x[v_ind[j + 1] + 1:v_ind[j + 2] + 1].argmin() + + if (2 * n_tc + 1 < n_c) and (kind in (None, 'cw')): + #% trough + ind[n_c - 2] = x[v_ind[n_c - 2] + 1:v_ind[n_c - 1]].argmax() + + return v_ind[:n_c - 1] + ind + 1, v_ind + + +def findoutliers(x, zcrit=0.0, dcrit=None, ddcrit=None, verbose=False): + """ + Return indices to spurious points of data + + Parameters + ---------- + x : vector + of data values. + zcrit : real scalar + critical distance between consecutive points. + dcrit : real scalar + critical distance of Dx used for determination of spurious + points. (Default 1.5 standard deviation of x) + ddcrit : real scalar + critical distance of DDx used for determination of spurious + points. (Default 1.5 standard deviation of x) + + Returns + ------- + inds : ndarray of integers + indices to spurious points. + indg : ndarray of integers + indices to the rest of the points. + + Notes + ----- + Consecutive points less than zcrit apart are considered as spurious. + The point immediately after and before are also removed. Jumps greater than + dcrit in Dxn and greater than ddcrit in D^2xn are also considered as + spurious. + (All distances to be interpreted in the vertical direction.) + Another good choice for dcrit and ddcrit are: + + dcrit = 5*dT and ddcrit = 9.81/2*dT**2 + + where dT is the timestep between points. + + Examples + -------- + >>> import numpy as np + >>> import wafo + >>> import wafo.misc as wm + >>> xx = wafo.data.sea() + >>> dt = np.diff(xx[:2,0]) + >>> dcrit = 5*dt + >>> ddcrit = 9.81/2*dt*dt + >>> zcrit = 0 + >>> [inds, indg] = wm.findoutliers(xx[:,1],zcrit,dcrit,ddcrit,verbose=True) + Found 0 spurious positive jumps of Dx + Found 0 spurious negative jumps of Dx + Found 37 spurious positive jumps of D^2x + Found 200 spurious negative jumps of D^2x + Found 244 consecutive equal values + Found the total of 1152 spurious points + + #waveplot(xx,'-',xx(inds,:),1,1,1) + + See also + -------- + waveplot, reconstruct + """ + + # finding outliers + findjumpsDx = True # find jumps in Dx + # two point spikes and Spikes dcrit above/under the + # previous and the following point are spurios. + findSpikes = False # find spikes + findDspikes = False # find double (two point) spikes + findjumpsD2x = True # find jumps in D^2x + findNaN = True # % find missing values + + xn = asarray(x).flatten() + + if xn.size < 2: + raise ValueError('The vector must have more than 2 elements!') + + ind = zeros(0, dtype=int) + # indg=[] + indmiss = isnan(xn) + if findNaN and indmiss.any(): + ind, = nonzero(indmiss) + if verbose: + print('Found %d missing points' % ind.size) + xn[indmiss] = 0. # %set NaN's to zero + + if dcrit is None: + dcrit = 1.5 * xn.std() + if verbose: + print('dcrit is set to %g' % dcrit) + + if ddcrit is None: + ddcrit = 1.5 * xn.std() + if verbose: + print('ddcrit is set to %g' % ddcrit) + + dxn = diff(xn) + ddxn = diff(dxn) + + if findSpikes: # finding spurious spikes + tmp, = nonzero((dxn[:-1] > dcrit) * (dxn[1::] < -dcrit) | + (dxn[:-1] < -dcrit) * (dxn[1::] > dcrit)) + if tmp.size > 0: + tmp = tmp + 1 + ind = hstack((ind, tmp)) + if verbose: + print('Found %d spurious spikes' % tmp.size) + + if findDspikes: # ,% finding spurious double (two point) spikes + tmp, = nonzero((dxn[:-2] > dcrit) * (dxn[2::] < -dcrit) | + (dxn[:-2] < -dcrit) * (dxn[2::] > dcrit)) + if tmp.size > 0: + tmp = tmp + 1 + ind = hstack((ind, tmp, tmp + 1)) # %removing both points + if verbose: + print('Found %d spurious two point (double) spikes' % tmp.size) + + if findjumpsDx: # ,% finding spurious jumps in Dx + tmp, = nonzero(dxn > dcrit) + if verbose: + print('Found %d spurious positive jumps of Dx' % tmp.size) + if tmp.size > 0: + ind = hstack((ind, tmp + 1)) # removing the point after the jump + + tmp, = nonzero(dxn < -dcrit) + if verbose: + print('Found %d spurious negative jumps of Dx' % tmp.size) + if tmp.size > 0: + ind = hstack((ind, tmp)) # removing the point before the jump + + if findjumpsD2x: # ,% finding spurious jumps in D^2x + tmp, = nonzero(ddxn > ddcrit) + if tmp.size > 0: + tmp = tmp + 1 + ind = hstack((ind, tmp)) # removing the jump + + if verbose: + print('Found %d spurious positive jumps of D^2x' % tmp.size) + + tmp, = nonzero(ddxn < -ddcrit) + if tmp.size > 0: + tmp = tmp + 1 + ind = hstack((ind, tmp)) # removing the jump + + if verbose: + print('Found %d spurious negative jumps of D^2x' % tmp.size) + + if zcrit >= 0.0: + #% finding consecutive values less than zcrit apart. + indzeros = (abs(dxn) <= zcrit) + indz, = nonzero(indzeros) + if indz.size > 0: + indz = indz + 1 + #%finding the beginning and end of consecutive equal values + indtr, = nonzero((diff(indzeros))) + indtr = indtr + 1 + #%indices to consecutive equal points + # removing the point before + all equal points + the point after + if True: + ind = hstack((ind, indtr - 1, indz, indtr, indtr + 1)) + else: # % removing all points + the point after + ind = hstack((ind, indz, indtr, indtr + 1)) + + if verbose: + if zcrit == 0.: + print('Found %d consecutive equal values' % indz.size) + else: + print('Found %d consecutive values less than %g apart.' % + (indz.size, zcrit)) + indg = ones(xn.size, dtype=bool) + + if ind.size > 1: + ind = unique(ind) + indg[ind] = 0 + indg, = nonzero(indg) + + if verbose: + print('Found the total of %d spurious points' % ind.size) + + return ind, indg + + +def common_shape(*args, ** kwds): + ''' + Return the common shape of a sequence of arrays + + Parameters + ----------- + *args : arraylike + sequence of arrays + **kwds : + shape + + Returns + ------- + shape : tuple + common shape of the elements of args. + + Raises + ------ + An error is raised if some of the arrays do not conform + to the common shape according to the broadcasting rules in numpy. + + Examples + -------- + >>> import numpy as np + >>> import wafo.misc as wm + >>> A = np.ones((4,1)) + >>> B = 2 + >>> C = np.ones((1,5))*5 + >>> wm.common_shape(A,B,C) + (4, 5) + >>> wm.common_shape(A,B,C,shape=(3,4,1)) + (3, 4, 5) + + See also + -------- + broadcast, broadcast_arrays + ''' + args = map(asarray, args) + shapes = [x.shape for x in args] + shape = kwds.get('shape') + if shape is not None: + if not isinstance(shape, (list, tuple)): + shape = (shape,) + shapes.append(tuple(shape)) + if len(set(shapes)) == 1: + # Common case where nothing needs to be broadcasted. + return tuple(shapes[0]) + shapes = [list(s) for s in shapes] + nds = [len(s) for s in shapes] + biggest = max(nds) + # Go through each array and prepend dimensions of length 1 to each of the + # shapes in order to make the number of dimensions equal. + for i in range(len(shapes)): + diff = biggest - nds[i] + if diff > 0: + shapes[i] = [1] * diff + shapes[i] + + # Check each dimension for compatibility. A dimension length of 1 is + # accepted as compatible with any other length. + c_shape = [] + for axis in range(biggest): + lengths = [s[axis] for s in shapes] + unique = set(lengths + [1]) + if len(unique) > 2: + # There must be at least two non-1 lengths for this axis. + raise ValueError("shape mismatch: two or more arrays have " + "incompatible dimensions on axis %r." % (axis,)) + elif len(unique) == 2: + # There is exactly one non-1 length. + # The common shape will take this value. + unique.remove(1) + new_length = unique.pop() + c_shape.append(new_length) + else: + # Every array has a length of 1 on this axis. Strides can be left + # alone as nothing is broadcasted. + c_shape.append(1) + + return tuple(c_shape) + + +def argsreduce(condition, * args): + """ Return the elements of each input array that satisfy some condition. + + Parameters + ---------- + condition : array_like + An array whose nonzero or True entries indicate the elements of each + input array to extract. The shape of 'condition' must match the common + shape of the input arrays according to the broadcasting rules in numpy. + arg1, arg2, arg3, ... : array_like + one or more input arrays. + + Returns + ------- + narg1, narg2, narg3, ... : ndarray + sequence of extracted copies of the input arrays converted to the same + size as the nonzero values of condition. + + Example + ------- + >>> import wafo.misc as wm + >>> import numpy as np + >>> rand = np.random.random_sample + >>> A = rand((4,5)) + >>> B = 2 + >>> C = rand((1,5)) + >>> cond = np.ones(A.shape) + >>> [A1,B1,C1] = wm.argsreduce(cond,A,B,C) + >>> B1.shape + (20,) + >>> cond[2,:] = 0 + >>> [A2,B2,C2] = wm.argsreduce(cond,A,B,C) + >>> B2.shape + (15,) + + See also + -------- + numpy.extract + """ + newargs = atleast_1d(*args) + if not isinstance(newargs, list): + newargs = [newargs, ] + expand_arr = (condition == condition) + return [extract(condition, arr1 * expand_arr) for arr1 in newargs] + + +def stirlerr(n): + ''' + Return error of Stirling approximation, + i.e., log(n!) - log( sqrt(2*pi*n)*(n/exp(1))**n ) + + Example + ------- + >>> import wafo.misc as wm + >>> wm.stirlerr(2) + array([ 0.0413407]) + + See also + --------- + binom + + + Reference + ----------- + Catherine Loader (2000). + Fast and Accurate Computation of Binomial Probabilities + + + ''' + + S0 = 0.083333333333333333333 # /* 1/12 */ + S1 = 0.00277777777777777777778 # /* 1/360 */ + S2 = 0.00079365079365079365079365 # /* 1/1260 */ + S3 = 0.000595238095238095238095238 # /* 1/1680 */ + S4 = 0.0008417508417508417508417508 # /* 1/1188 */ + + n1 = atleast_1d(n) + + y = gammaln(n1 + 1) - log(sqrt(2 * pi * n1) * (n1 / exp(1)) ** n1) + + nn = n1 * n1 + + n500 = 500 < n1 + y[n500] = (S0 - S1 / nn[n500]) / n1[n500] + n80 = logical_and(80 < n1, n1 <= 500) + if any(n80): + y[n80] = (S0 - (S1 - S2 / nn[n80]) / nn[n80]) / n1[n80] + n35 = logical_and(35 < n1, n1 <= 80) + if any(n35): + nn35 = nn[n35] + y[n35] = (S0 - (S1 - (S2 - S3 / nn35) / nn35) / nn35) / n1[n35] + + n15 = logical_and(15 < n1, n1 <= 35) + if any(n15): + nn15 = nn[n15] + y[n15] = ( + S0 - (S1 - (S2 - (S3 - S4 / nn15) / nn15) / nn15) / nn15) / n1[n15] + + return y + +#@ReservedAssignment + + +def getshipchar(value=None, property="max_deadweight", # @ReservedAssignment + **kwds): + ''' + Return ship characteristics from value of one ship-property + + Parameters + ---------- + value : scalar + value to use in the estimation. + property : string + defining the ship property used in the estimation. Options are: + 'max_deadweight','length','beam','draft','service_speed', + 'propeller_diameter'. + The length was found from statistics of 40 vessels of size 85 to + 100000 tonn. An exponential curve through 0 was selected, and the + factor and exponent that minimized the standard deviation of the + relative error was selected. (The error returned is the same for + any ship.) The servicespeed was found for ships above 1000 tonns + only. The propeller diameter formula is from [1]_. + + Returns + ------- + sc : dict + containing estimated mean values and standard-deviations of ship + characteristics: + max_deadweight [kkg], (weight of cargo, fuel etc.) + length [m] + beam [m] + draught [m] + service_speed [m/s] + propeller_diameter [m] + + Example + --------- + >>> import wafo.misc as wm + >>> sc = wm.getshipchar(10,'service_speed') + >>> for key in sorted(sc): key, sc[key] + ('beam', 29.0) + ('beamSTD', 2.9000000000000004) + ('draught', 9.6) + ('draughtSTD', 2.112) + ('length', 216.0) + ('lengthSTD', 2.011309883194276) + ('max_deadweight', 30969.0) + ('max_deadweightSTD', 3096.9) + ('propeller_diameter', 6.761165385916601) + ('propeller_diameterSTD', 0.20267047566705432) + ('service_speed', 10.0) + ('service_speedSTD', 0) + + Other units: 1 ft = 0.3048 m and 1 knot = 0.5144 m/s + + + Reference + --------- + .. [1] Gray and Greeley, (1978), + "Source level model for propeller blade rate radiation for the world's + merchant fleet", Bolt Beranek and Newman Technical Memorandum No. 458. + ''' + if value is None: + names = kwds.keys() + if len(names) != 1: + raise ValueError('Only on keyword') + property = names[0] # @ReservedAssignment + value = kwds[property] + value = np.atleast_1d(value) + valid_props = dict(l='length', b='beam', d='draught', m='max_deadweigth', + s='service_speed', p='propeller_diameter') + prop = valid_props[property[0]] + + prop2max_dw = dict(length=lambda x: (x / 3.45) ** (2.5), + beam=lambda x: ((x / 1.78) ** (1 / 0.27)), + draught=lambda x: ((x / 0.8) ** (1 / 0.24)), + service_speed=lambda x: ((x / 1.14) ** (1 / 0.21)), + propeller_diameter=lambda x: (((x / 0.12) ** (4 / 3) / + 3.45) ** (2.5))) + + max_deadweight = prop2max_dw.get(prop, lambda x: x)(value) + propertySTD = prop + 'STD' + + length = round(3.45 * max_deadweight ** 0.40) + length_err = length ** 0.13 + + beam = round(1.78 * max_deadweight ** 0.27 * 10) / 10 + beam_err = beam * 0.10 + + draught = round(0.80 * max_deadweight ** 0.24 * 10) / 10 + draught_err = draught * 0.22 + + #S = round(2/3*(L)**0.525) + speed = round(1.14 * max_deadweight ** 0.21 * 10) / 10 + speed_err = speed * 0.10 + + p_diam = 0.12 * length ** (3.0 / 4.0) + p_diam_err = 0.12 * length_err ** (3.0 / 4.0) + + max_deadweight = round(max_deadweight) + max_deadweightSTD = 0.1 * max_deadweight + + shipchar = OrderedDict(beam=beam, beamSTD=beam_err, + draught=draught, draughtSTD=draught_err, + length=length, lengthSTD=length_err, + max_deadweight=max_deadweight, + max_deadweightSTD=max_deadweightSTD, + propeller_diameter=p_diam, + propeller_diameterSTD=p_diam_err, + service_speed=speed, service_speedSTD=speed_err) + + shipchar[propertySTD] = 0 + return shipchar + + +def betaloge(z, w): + ''' + Natural Logarithm of beta function. + + CALL betaloge(z,w) + + BETALOGE computes the natural logarithm of the beta + function for corresponding elements of Z and W. The arrays Z and + W must be real and nonnegative. Both arrays must be the same size, + or either can be scalar. BETALOGE is defined as: + + y = LOG(BETA(Z,W)) = gammaln(Z)+gammaln(W)-gammaln(Z+W) + + and is obtained without computing BETA(Z,W). Since the beta + function can range over very large or very small values, its + logarithm is sometimes more useful. + This implementation is more accurate than the BETALN implementation + for large arguments + + Example + ------- + >>> import wafo.misc as wm + >>> wm.betaloge(3,2) + array([-2.48490665]) + + See also + -------- + betaln, beta + ''' + # y = gammaln(z)+gammaln(w)-gammaln(z+w) + zpw = z + w + return (stirlerr(z) + stirlerr(w) + 0.5 * log(2 * pi) + (w - 0.5) * log(w) + + (z - 0.5) * log(z) - stirlerr(zpw) - (zpw - 0.5) * log(zpw)) + + # stirlings approximation: + # (-(zpw-0.5).*log(zpw) +(w-0.5).*log(w)+(z-0.5).*log(z) +0.5*log(2*pi)) + # return y + + +def gravity(phi=45): + ''' Returns the constant acceleration of gravity + + GRAVITY calculates the acceleration of gravity + using the international gravitational formulae [1]_: + + g = 9.78049*(1+0.0052884*sin(phir)**2-0.0000059*sin(2*phir)**2) + where + phir = phi*pi/180 + + Parameters + ---------- + phi : {float, int} + latitude in degrees + + Returns + -------- + g : ndarray + acceleration of gravity [m/s**2] + + Examples + -------- + >>> import wafo.misc as wm + >>> import numpy as np + >>> phi = np.linspace(0,45,5) + >>> wm.gravity(phi) + array([ 9.78049 , 9.78245014, 9.78803583, 9.79640552, 9.80629387]) + + See also + -------- + wdensity + + References + ---------- + .. [1] Irgens, Fridtjov (1987) + "Formelsamling i mekanikk: + statikk, fasthetsl?re, dynamikk fluidmekanikk" + tapir forlag, University of Trondheim, + ISBN 82-519-0786-1, pp 19 + + ''' + + phir = phi * pi / 180. # change from degrees to radians + return 9.78049 * (1. + 0.0052884 * sin(phir) ** 2. - + 0.0000059 * sin(2 * phir) ** 2.) + + +def nextpow2(x): + ''' + Return next higher power of 2 + + Example + ------- + >>> import wafo.misc as wm + >>> wm.nextpow2(10) + 4 + >>> wm.nextpow2(np.arange(5)) + 3 + ''' + t = isscalar(x) or len(x) + if (t > 1): + f, n = frexp(t) + else: + f, n = frexp(abs(x)) + + if (f == 0.5): + n = n - 1 + return n + + +def discretize(fun, a, b, tol=0.005, n=5, method='linear'): + ''' + Automatic discretization of function + + Parameters + ---------- + fun : callable + function to discretize + a,b : real scalars + evaluation limits + tol : real, scalar + absoute error tolerance + n : scalar integer + number of values + method : string + defining method of gridding, options are 'linear' and 'adaptive' + + Returns + ------- + x : discretized values + y : fun(x) + + Example + ------- + >>> import wafo.misc as wm + >>> import numpy as np + >>> import pylab as plb + >>> x,y = wm.discretize(np.cos, 0, np.pi) + >>> xa,ya = wm.discretize(np.cos, 0, np.pi, method='adaptive') + >>> t = plb.plot(x, y, xa, ya, 'r.') + >>> plb.show() + + >>> plb.close('all') + + ''' + if method.startswith('a'): + return _discretize_adaptive(fun, a, b, tol, n) + else: + return _discretize_linear(fun, a, b, tol, n) + + +def _discretize_linear(fun, a, b, tol=0.005, n=5): + ''' + Automatic discretization of function, linear gridding + ''' + tiny = floatinfo.tiny + + x = linspace(a, b, n) + y = fun(x) + + err0 = inf + err = 10000 + nmax = 2 ** 20 + while (err != err0 and err > tol and n < nmax): + err0 = err + x0 = x + y0 = y + n = 2 * (n - 1) + 1 + x = linspace(a, b, n) + y = fun(x) + y00 = interp(x, x0, y0) + err = 0.5 * amax(abs((y00 - y) / (abs(y00 + y) + tiny))) + return x, y + + +def _discretize_adaptive(fun, a, b, tol=0.005, n=5): + ''' + Automatic discretization of function, adaptive gridding. + ''' + tiny = floatinfo.tiny + n += (mod(n, 2) == 0) # make sure n is odd + x = linspace(a, b, n) + fx = fun(x) + + n2 = (n - 1) / 2 + erri = hstack((zeros((n2, 1)), ones((n2, 1)))).ravel() + err = erri.max() + err0 = inf + # while (err != err0 and err > tol and n < nmax): + for j in range(50): + if err != err0 and np.any(erri > tol): + err0 = err + # find top errors + + I, = where(erri > tol) + # double the sample rate in intervals with the most error + y = (vstack(((x[I] + x[I - 1]) / 2, + (x[I + 1] + x[I]) / 2)).T).ravel() + fy = fun(y) + + fy0 = interp(y, x, fx) + erri = 0.5 * (abs((fy0 - fy) / (abs(fy0 + fy) + tiny))) + + err = erri.max() + + x = hstack((x, y)) + + I = x.argsort() + x = x[I] + erri = hstack((zeros(len(fx)), erri))[I] + fx = hstack((fx, fy))[I] + + else: + break + else: + warnings.warn('Recursion level limit reached j=%d' % j) + + return x, fx + + +def polar2cart(theta, rho, z=None): + ''' + Transform polar coordinates into 2D cartesian coordinates. + + Returns + ------- + x, y : array-like + Cartesian coordinates, x = rho*cos(theta), y = rho*sin(theta) + + See also + -------- + cart2polar + ''' + x, y = rho * cos(theta), rho * sin(theta) + if z is None: + return x, y + else: + return x, y, z + + +def cart2polar(x, y, z=None): + ''' Transform 2D cartesian coordinates into polar coordinates. + + Returns + ------- + theta : array-like + arctan2(y,x) + rho : array-like + sqrt(x**2+y**2) + + See also + -------- + polar2cart + ''' + t, r = arctan2(y, x), hypot(x, y) + if z is None: + return t, r + else: + return t, r, z + + +def meshgrid(*xi, **kwargs): + """ + Return coordinate matrices from one or more coordinate vectors. + + Make N-D coordinate arrays for vectorized evaluations of + N-D scalar/vector fields over N-D grids, given + one-dimensional coordinate arrays x1, x2,..., xn. + + Parameters + ---------- + x1, x2,..., xn : array_like + 1-D arrays representing the coordinates of a grid. + indexing : 'xy' or 'ij' (optional) + cartesian ('xy', default) or matrix ('ij') indexing of output + sparse : True or False (default) (optional) + If True a sparse grid is returned in order to conserve memory. + copy : True (default) or False (optional) + If False a view into the original arrays are returned in order to + conserve memory + + Returns + ------- + X1, X2,..., XN : ndarray + For vectors `x1`, `x2`,..., 'xn' with lengths ``Ni=len(xi)`` , + return ``(N1, N2, N3,...Nn)`` shaped arrays if indexing='ij' + or ``(N2, N1, N3,...Nn)`` shaped arrays if indexing='xy' + with the elements of `xi` repeated to fill the matrix along + the first dimension for `x1`, the second for `x2` and so on. + + See Also + -------- + index_tricks.mgrid : Construct a multi-dimensional "meshgrid" + using indexing notation. + index_tricks.ogrid : Construct an open multi-dimensional "meshgrid" + using indexing notation. + + Examples + -------- + >>> x = np.linspace(0,1,3) # coordinates along x axis + >>> y = np.linspace(0,1,2) # coordinates along y axis + >>> xv, yv = meshgrid(x,y) # extend x and y for a 2D xy grid + >>> xv + array([[ 0. , 0.5, 1. ], + [ 0. , 0.5, 1. ]]) + >>> yv + array([[ 0., 0., 0.], + [ 1., 1., 1.]]) + >>> xv, yv = meshgrid(x,y, sparse=True) # make sparse output arrays + >>> xv + array([[ 0. , 0.5, 1. ]]) + >>> yv + array([[ 0.], + [ 1.]]) + + >>> meshgrid(x,y,sparse=True,indexing='ij') # change to matrix indexing + [array([[ 0. ], + [ 0.5], + [ 1. ]]), array([[ 0., 1.]])] + >>> meshgrid(x,y,indexing='ij') + [array([[ 0. , 0. ], + [ 0.5, 0.5], + [ 1. , 1. ]]), array([[ 0., 1.], + [ 0., 1.], + [ 0., 1.]])] + + >>> meshgrid(0,1,5) # just a 3D point + [array([[[0]]]), array([[[1]]]), array([[[5]]])] + >>> map(np.squeeze,meshgrid(0,1,5)) # just a 3D point + [array(0), array(1), array(5)] + >>> meshgrid(3) + array([3]) + >>> meshgrid(y) # 1D grid y is just returned + array([ 0., 1.]) + + `meshgrid` is very useful to evaluate functions on a grid. + + >>> x = np.arange(-5, 5, 0.1) + >>> y = np.arange(-5, 5, 0.1) + >>> xx, yy = meshgrid(x, y, sparse=True) + >>> z = np.sin(xx**2+yy**2)/(xx**2+yy**2) + """ + copy_ = kwargs.get('copy', True) + args = atleast_1d(*xi) + if not isinstance(args, list): + if args.size > 0: + return args.copy() if copy_ else args + else: + raise TypeError('meshgrid() take 1 or more arguments (0 given)') + + sparse = kwargs.get('sparse', False) + indexing = kwargs.get('indexing', 'xy') # 'ij' + + ndim = len(args) + s0 = (1,) * ndim + output = [x.reshape(s0[:i] + (-1,) + s0[i + 1::]) + for i, x in enumerate(args)] + + shape = [x.size for x in output] + + if indexing == 'xy': + # switch first and second axis + output[0].shape = (1, -1) + (1,) * (ndim - 2) + output[1].shape = (-1, 1) + (1,) * (ndim - 2) + shape[0], shape[1] = shape[1], shape[0] + + if sparse: + if copy_: + return [x.copy() for x in output] + else: + return output + else: + # Return the full N-D matrix (not only the 1-D vector) + if copy_: + mult_fact = ones(shape, dtype=int) + return [x * mult_fact for x in output] + else: + return broadcast_arrays(*output) + + +def ndgrid(*args, **kwargs): + """ + Same as calling meshgrid with indexing='ij' (see meshgrid for + documentation). + """ + kwargs['indexing'] = 'ij' + return meshgrid(*args, ** kwargs) + + +def trangood(x, f, min_n=None, min_x=None, max_x=None, max_n=inf): + """ + Make sure transformation is efficient. + + Parameters + ------------ + x, f : array_like + input transform function, (x,f(x)). + min_n : scalar, int + minimum number of points in the good transform. + (Default x.shape[0]) + min_x : scalar, real + minimum x value to transform. (Default min(x)) + max_x : scalar, real + maximum x value to transform. (Default max(x)) + max_n : scalar, int + maximum number of points in the good transform + (default inf) + Returns + ------- + x, f : array_like + the good transform function. + + TRANGOOD interpolates f linearly and optionally + extrapolate it linearly outside the range of x + with X uniformly spaced. + + See also + --------- + tranproc, + numpy.interp + """ + xo, fo = atleast_1d(x, f) + #n = xo.size + if (xo.ndim != 1): + raise ValueError('x must be a vector.') + if (fo.ndim != 1): + raise ValueError('f must be a vector.') + + i = xo.argsort() + xo = xo[i] + fo = fo[i] + del i + dx = diff(xo) + if (any(dx <= 0)): + raise ValueError('Duplicate x-values not allowed.') + + nf = fo.shape[0] + + if max_x is None: + max_x = xo[-1] + if min_x is None: + min_x = xo[0] + if min_n is None: + min_n = nf + if (min_n < 2): + min_n = 2 + if (max_n < 2): + max_n = 2 + + ddx = diff(dx) + xn = xo[-1] + x0 = xo[0] + L = float(xn - x0) + eps = floatinfo.eps + if ((nf < min_n) or (max_n < nf) or any(abs(ddx) > 10 * eps * (L))): +# % pab 07.01.2001: Always choose the stepsize df so that +# % it is an exactly representable number. +# % This is important when calculating numerical derivatives and is +# % accomplished by the following. + dx = L / (min(min_n, max_n) - 1) + dx = (dx + 2.) - 2. + xi = arange(x0, xn + dx / 2., dx) + #% New call pab 11.11.2000: This is much quicker + fo = interp(xi, xo, fo) + xo = xi + +# x is now uniformly spaced + dx = xo[1] - xo[0] + + # Extrapolate linearly outside the range of ff + if (min_x < xo[0]): + x1 = dx * arange(floor((min_x - xo[0]) / dx), -2) + f2 = fo[0] + x1 * (fo[1] - fo[0]) / (xo[1] - xo[0]) + fo = hstack((f2, fo)) + xo = hstack((x1 + xo[0], xo)) + + if (max_x > xo[-1]): + x1 = dx * arange(1, ceil((max_x - xo[-1]) / dx) + 1) + f2 = f[-1] + x1 * (f[-1] - f[-2]) / (xo[-1] - xo[-2]) + fo = hstack((fo, f2)) + xo = hstack((xo, x1 + xo[-1])) + + return xo, fo + + +def tranproc(x, f, x0, *xi): + """ + Transforms process X and up to four derivatives + using the transformation f. + + Parameters + ---------- + x,f : array-like + [x,f(x)], transform function, y = f(x). + x0, x1,...,xn : vectors + where xi is the i'th time derivative of x0. 0<=N<=4. + + Returns + ------- + y0, y1,...,yn : vectors + where yi is the i'th time derivative of y0 = f(x0). + + By the basic rules of derivation: + Y1 = f'(X0)*X1 + Y2 = f''(X0)*X1^2 + f'(X0)*X2 + Y3 = f'''(X0)*X1^3 + f'(X0)*X3 + 3*f''(X0)*X1*X2 + Y4 = f''''(X0)*X1^4 + f'(X0)*X4 + 6*f'''(X0)*X1^2*X2 + + f''(X0)*(3*X2^2 + 4*X1*X3) + + The derivation of f is performed numerically with a central difference + method with linear extrapolation towards the beginning and end of f, + respectively. + + Example + -------- + Derivative of g and the transformed Gaussian model. + >>> import pylab as plb + >>> import wafo.misc as wm + >>> import wafo.transform.models as wtm + >>> tr = wtm.TrHermite() + >>> x = linspace(-5,5,501) + >>> g = tr(x) + >>> gder = wm.tranproc(x, g, x, ones(g.shape[0])) + >>> h = plb.plot(x, g, x, gder[1]) + + plb.plot(x,pdfnorm(g)*gder[1],x,pdfnorm(x)) + plb.legend('Transformed model','Gaussian model') + + >>> plb.close('all') + + See also + -------- + trangood. + """ + + eps = floatinfo.eps + xo, fo, x0 = atleast_1d(x, f, x0) + xi = atleast_1d(*xi) + if not isinstance(xi, list): + xi = [xi, ] + N = len(xi) # N = number of derivatives + nmax = ceil((xo.ptp()) * 10 ** (7. / max(N, 1))) + xo, fo = trangood(xo, fo, min_x=min(x0), max_x=max(x0), max_n=nmax) + + n = f.shape[0] + #y = x0.copy() + xu = (n - 1) * (x0 - xo[0]) / (xo[-1] - xo[0]) + + fi = asarray(floor(xu), dtype=int) + fi = where(fi == n - 1, fi - 1, fi) + + xu = xu - fi + y0 = fo[fi] + (fo[fi + 1] - fo[fi]) * xu + + y = y0 + + if N > 0: + y = [y0] + hn = xo[1] - xo[0] + if hn ** N < sqrt(eps): + msg = ('Numerical problems may occur for the derivatives in ' + + 'tranproc.\nThe sampling of the transformation may be too small.') + warnings.warn(msg) + + # Transform X with the derivatives of f. + fxder = zeros((N, x0.size)) + fder = vstack((xo, fo)) + for k in range(N): # Derivation of f(x) using a difference method. + n = fder.shape[-1] + fder = vstack([(fder[0, 0:n - 1] + fder[0, 1:n]) / 2, + diff(fder[1, :]) / hn]) + fxder[k] = tranproc(fder[0], fder[1], x0) + + # Calculate the transforms of the derivatives of X. + # First time derivative of y: y1 = f'(x)*x1 + + y1 = fxder[0] * xi[0] + y.append(y1) + if N > 1: + + # Second time derivative of y: + # y2 = f''(x)*x1.^2+f'(x)*x2 + y2 = fxder[1] * xi[0] ** 2. + fxder[0] * xi[1] + y.append(y2) + if N > 2: + # Third time derivative of y: + # y3 = f'''(x)*x1.^3+f'(x)*x3 +3*f''(x)*x1*x2 + y3 = fxder[2] * xi[0] ** 3 + fxder[0] * xi[2] + \ + 3 * fxder[1] * xi[0] * xi[1] + y.append(y3) + if N > 3: + # Fourth time derivative of y: + # y4 = f''''(x)*x1.^4+f'(x)*x4 + # +6*f'''(x)*x1^2*x2+f''(x)*(3*x2^2+4x1*x3) + y4 = (fxder[3] * xi[0] ** 4. + fxder[0] * xi[3] + + 6. * fxder[2] * xi[0] ** 2. * xi[1] + + fxder[1] * (3. * xi[1] ** 2. + 4. * xi[0] * xi[1])) + y.append(y4) + if N > 4: + warnings.warn('Transformation of derivatives of ' + + 'order>4 not supported.') + return y # y0,y1,y2,y3,y4 + + +def good_bins(data=None, range=None, num_bins=None, # @ReservedAssignment + num_data=None, odd=False, loose=True): + ''' Return good bins for histogram + + Parameters + ---------- + data : array-like + the data + range : (float, float) + minimum and maximum range of bins (default data.min(), data.max()) + num_bins : scalar integer + approximate number of bins wanted + (default depending on num_data=len(data)) + odd : bool + placement of bins (0 or 1) (default 0) + loose : bool + if True add extra space to min and max + if False the bins are made tight to the min and max + + Example + ------- + >>> import wafo.misc as wm + >>> wm.good_bins(range=(0,5), num_bins=6) + array([-1., 0., 1., 2., 3., 4., 5., 6.]) + >>> wm.good_bins(range=(0,5), num_bins=6, loose=False) + array([ 0., 1., 2., 3., 4., 5.]) + >>> wm.good_bins(range=(0,5), num_bins=6, odd=True) + array([-1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5]) + >>> wm.good_bins(range=(0,5), num_bins=6, odd=True, loose=False) + array([-0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5]) + ''' + + if data is not None: + x = np.atleast_1d(data) + num_data = len(x) + + mn, mx = range if range else (x.min(), x.max()) + + if num_bins is None: + num_bins = np.ceil(4 * np.sqrt(np.sqrt(num_data))) + + d = float(mx - mn) / num_bins * 2 + e = np.floor(np.log(d) / np.log(10)) + m = np.floor(d / 10 ** e) + if m > 5: + m = 5 + elif m > 2: + m = 2 + + d = m * 10 ** e + mn = (np.floor(mn / d) - loose) * d - odd * d / 2 + mx = (np.ceil(mx / d) + loose) * d + odd * d / 2 + limits = np.arange(mn, mx + d / 2, d) + return limits + + +def plot_histgrm(data, bins=None, range=None, # @ReservedAssignment + normed=False, weights=None, lintype='b-'): + ''' + Plot histogram + + Parameters + ----------- + data : array-like + the data + bins : int or sequence of scalars, optional + If an int, it defines the number of equal-width + bins in the given range (4 * sqrt(sqrt(len(data)), by default). + If a sequence, it defines the bin edges, including the + rightmost edge, allowing for non-uniform bin widths. + range : (float, float), optional + The lower and upper range of the bins. If not provided, range + is simply ``(data.min(), data.max())``. Values outside the range are + ignored. + normed : bool, optional + If False, the result will contain the number of samples in each bin. + If True, the result is the value of the probability *density* function + at the bin, normalized such that the *integral* over the range is 1. + weights : array_like, optional + An array of weights, of the same shape as `data`. Each value in `data` + only contributes its associated weight towards the bin count + (instead of 1). If `normed` is True, the weights are normalized, + so that the integral of the density over the range remains 1 + lintype : specify color and lintype, see PLOT for possibilities. + + Returns + ------- + h : list + of plot-objects + + Example + ------- + >>> import pylab as plb + >>> import wafo.misc as wm + >>> import wafo.stats as ws + >>> R = ws.weibull_min.rvs(2,loc=0,scale=2, size=100) + >>> h0 = wm.plot_histgrm(R, 20, normed=True) + >>> x = linspace(-3,16,200) + >>> h1 = plb.plot(x,ws.weibull_min.pdf(x,2,0,2),'r') + + See also + -------- + wafo.misc.good_bins + numpy.histogram + ''' + + x = np.atleast_1d(data) + if bins is None: + bins = np.ceil(4 * np.sqrt(np.sqrt(len(x)))) + + #, new=True) + bin_, limits = np.histogram( + data, bins=bins, normed=normed, weights=weights) + limits.shape = (-1, 1) + xx = limits.repeat(3, axis=1) + xx.shape = (-1,) + xx = xx[1:-1] + bin_.shape = (-1, 1) + yy = bin_.repeat(3, axis=1) + # yy[0,0] = 0.0 # pdf + yy[:, 0] = 0.0 # histogram + yy.shape = (-1,) + yy = np.hstack((yy, 0.0)) + return plotbackend.plot(xx, yy, lintype, limits, limits * 0) + + +def num2pistr(x, n=3): + ''' + Convert a scalar to a text string in fractions of pi + if the numerator is less than 10 and not equal 0 + and if the denominator is less than 10. + + Parameters + ---------- + x = a scalar + n = maximum digits of precision. (default 3) + Returns + ------- + xtxt = a text string in fractions of pi + + Example + >>> import wafo.misc as wm + >>> t = wm.num2pistr(np.pi*3/4) + >>> t=='3\\pi/4' + True + ''' + + frac = fractions.Fraction.from_float(x / pi).limit_denominator(10000000) + num = frac.numerator + den = frac.denominator + if (den < 10) and (num < 10) and (num != 0): + dtxt = '' if abs(den) == 1 else '/%d' % den + if abs(num) == 1: # % numerator + ntxt = '-' if num == -1 else '' + else: + ntxt = '%d' % num + xtxt = ntxt + r'\pi' + dtxt + else: + format = '%0.' + '%dg' % n # @ReservedAssignment + xtxt = format % x + return xtxt + + +def fourier(data, t=None, T=None, m=None, n=None, method='trapz'): + ''' + Returns Fourier coefficients. + + Parameters + ---------- + data : array-like + vector or matrix of row vectors with data points shape p x n. + t : array-like + vector with n values indexed from 1 to N. + T : real scalar + primitive period of signal, i.e., smallest period. + (default T = t[-1]-t[0] + m : scalar integer + defines no of harmonics desired (default M = N) + n : scalar integer + no of data points (default len(t)) + method : string + integration method used + + Returns + ------- + a,b = Fourier coefficients size m x p + + FOURIER finds the coefficients for a Fourier series representation + of the signal x(t) (given in digital form). It is assumed the signal + is periodic over T. N is the number of data points, and M-1 is the + number of coefficients. + + The signal can be estimated by using M-1 harmonics by: + M-1 + x[i] = 0.5*a[0] + sum (a[n]*c[n,i] + b[n]*s[n,i]) + n=1 + where + c[n,i] = cos(2*pi*(n-1)*t[i]/T) + s[n,i] = sin(2*pi*(n-1)*t[i]/T) + + Note that a[0] is the "dc value". + Remaining values are a[1], a[2], ... , a[M-1]. + + Example + ------- + >>> import wafo.misc as wm + >>> import numpy as np + >>> T = 2*np.pi + >>> t = np.linspace(0,4*T) + >>> x = np.sin(t) + >>> a, b = wm.fourier(x, t, T=T, m=5) + >>> (np.round(a.ravel()), np.round(b.ravel())) + (array([ 0., -0., 0., -0., 0.]), array([ 0., 4., -0., -0., 0.])) + + See also + -------- + fft + ''' + x = np.atleast_2d(data) + p, n = x.shape + if t is None: + t = np.arange(n) + else: + t = np.atleast_1d(t) + + n = len(t) if n is None else n + m = n if n is None else m + T = t[-1] - t[0] if T is None else T + + if method.startswith('trapz'): + intfun = trapz + elif method.startswith('simp'): + intfun = simps + + # Define the vectors for computing the Fourier coefficients + t.shape = (1, -1) + a = zeros((m, p)) + b = zeros((m, p)) + a[0] = intfun(x, t, axis=-1) + + # Compute M-1 more coefficients + tmp = 2 * pi * t / T + #% tmp = 2*pi*(0:N-1).'/(N-1); + for i in range(1, m): + a[i] = intfun(x * cos(i * tmp), t, axis=-1) + b[i] = intfun(x * sin(i * tmp), t, axis=-1) + + a = a / pi + b = b / pi + + # Alternative: faster for large M, but gives different results than above. +# nper = diff(t([1 end]))/T; %No of periods given +# if nper == round(nper): +# N1 = n/nper +# else: +# N1 = n +# +# +# +# Fourier coefficients by fft +# Fcof1 = 2*ifft(x(1:N1,:),[],1); +# Pcor = [1; exp(sqrt(-1)*(1:M-1).'*t(1))]; % correction term to get +# % the correct integration limits +# Fcof = Fcof1(1:M,:).*Pcor(:,ones(1,P)); +# a = real(Fcof(1:M,:)); +# b = imag(Fcof(1:M,:)); + + return a, b + + +def _test_find_cross(): + t = findcross([0, 0, 1, -1, 1], 0) # @UnusedVariable + + +def _test_common_shape(): + + A = ones((4, 1)) + B = 2 + C = ones((1, 5)) * 5 + common_shape(A, B, C) + + common_shape(A, B, C, shape=(3, 4, 1)) + + A = ones((4, 1)) + B = 2 + C = ones((1, 5)) * 5 + common_shape(A, B, C, shape=(4, 5)) + + +def _test_meshgrid(): + x = array([-1, -0.5, 1, 4, 5], float) + y = array([0, -2, -5], float) + xv, yv = meshgrid(x, y, sparse=False) + print(xv) + print(yv) + xv, yv = meshgrid(x, y, sparse=True) # make sparse output arrays + print(xv) + print(yv) + print(meshgrid(0, 1, 5, sparse=True)) # just a 3D point + print(meshgrid([0, 1, 5], sparse=True)) # just a 3D point + xv, yv = meshgrid(y, y) + yv[0, 0] = 10 + print(xv) + print(yv) +# >>> xv +## array([[ 0. , 0.5, 1. ]]) +# >>> yv +# array([[ 0.], +# [ 1.]]) +# array([[-1. , -0.5, 1. , 4. , 5. ], +## [-1. , -0.5, 1. , 4. , 5. ], +# [-1. , -0.5, 1. , 4. , 5. ]]) +# +# array([[ 0., 0., 0., 0., 0.], +## [-2., -2., -2., -2., -2.], +# [-5., -5., -5., -5., -5.]]) + + +def _test_tranproc(): + import wafo.transform.models as wtm + tr = wtm.TrHermite() + x = linspace(-5, 5, 501) + g = tr(x) + _gder = tranproc(x, g, x, ones(g.size)) + pass + #>>> gder(:,1) = g(:,1) + #>>> plot(g(:,1),[g(:,2),gder(:,2)]) + #>>> plot(g(:,1),pdfnorm(g(:,2)).*gder(:,2),g(:,1),pdfnorm(g(:,1))) + #>>> legend('Transformed model','Gaussian model') + + +def _test_detrend(): + import pylab as plb + cos = plb.cos + randn = plb.randn + x = linspace(0, 1, 200) + y = exp(x) + cos(5 * 2 * pi * x) + 1e-1 * randn(x.size) + y0 = detrendma(y, 20) + tr = y - y0 + plb.plot(x, y, x, y0, 'r', x, exp(x), 'k', x, tr, 'm') + + +def _test_extrema(): + import pylab as pb + from pylab import plot + t = pb.linspace(0, 7 * pi, 250) + x = pb.sin(t) + 0.1 * sin(50 * t) + ind = findextrema(x) + ti, tp = t[ind], x[ind] + plot(t, x, '.', ti, tp, 'r.') + _ind1 = findrfc(tp, 0.3) + + +def _test_discretize(): + import pylab as plb + x, y = discretize(cos, 0, pi) + plb.plot(x, y) + plb.show() + plb.close('all') + + +def _test_stirlerr(): + x = linspace(1, 5, 6) + print stirlerr(x) + print stirlerr(1) + print getshipchar(1000) + print betaloge(3, 2) + + +def _test_parse_kwargs(): + opt = dict(arg1=1, arg2=3) + print opt + opt = parse_kwargs(opt, arg1=5) + print opt + opt2 = dict(arg3=15) + opt = parse_kwargs(opt, **opt2) + print opt + + opt0 = testfun('default') + print opt0 + opt0.update(opt1=100) + print opt0 + opt0 = parse_kwargs(opt0, opt2=200) + print opt0 + out1 = testfun(opt0['opt1'], **opt0) + print out1 + + +def test_docstrings(): + import doctest + doctest.testmod() + +if __name__ == "__main__": + test_docstrings()