"""This module contains all the functions needed for variogram analysis """ import sklearn.metrics.pairwise as pairwise import numpy as np def lagindices(pwdist, lag, tol): ''' Input: (pwdist) square NumPy array of pairwise distances (lag) the distance, h, between points (tol) the tolerance we are comfortable with around (lag) Output: (ind) list of tuples; the first element is the row of (data) for one point, the second element is the row of a point (lag)+/-(tol) away from the first point, e.g., (3,5) corresponds fo data[3,:], and data[5,:] ''' # grab the coordinates in a given range: lag +/- tolerance i, j = np.where((pwdist >= lag - tol) & (pwdist < lag + tol)) # zip the coordinates into a list indices = list(zip(i, j)) # take out the repeated elements, # since p is a *symmetric* distance matrix indices = np.array([i for i in indices if i[1] > i[0]]) return indices def semivariance(data, indices): ''' Input: (data) NumPy array where the fris t two columns are the spatial coordinates, x and y, and the third column is the variable of interest (indices) indices of paired data points in (data) Output: (z) semivariance value at lag (h) +/- (tol) ''' # take the squared difference between # the values of the variable of interest z = [(data[i] - data[j])**2.0 for i, j in indices] # the semivariance is half the mean squared difference return np.mean(z) / 2.0 def semivariogram(t, data, lags, tol): ''' Input: (data) NumPy array where the fris t two columns are the spatial coordinates, x and y (lag) the distance, h, between points (tol) the tolerance we are comfortable with around (lag) Output: (sv) <2xN> NumPy array of lags and semivariogram values ''' return variogram(t, data, lags, tol, 'semivariogram') def covariance(data, indices): ''' Input: (data) NumPy array where the fris t two columns are the spatial coordinates, x and y (lag) the distance, h, between points (tol) the tolerance we are comfortable with around (lag) Output: (z) covariance value at lag (h) +/- (tol) ''' # grab the indices of the points # that are lag +/- tolerance apart m_tail = np.mean([data[i] for i, j in indices]) m_head = np.mean([data[j] for i, j in indices]) m = m_tail * m_head z = [data[i] * data[j] - m for i, j in indices] return np.mean(z) def covariogram(t, data, lags, tol): ''' Input: (data) NumPy array where the fris t two columns are the spatial coordinates, x and y (lag) the distance, h, between points (tol) the tolerance we are comfortable with around (lag) Output: (cv) <2xN> NumPy array of lags and covariogram values ''' return variogram(t, data, lags, tol, 'covariogram') def variogram(t, data, lags, tol, method): ''' Input: (data) NumPy array where the fris t two columns are the spatial coordinates, x and y (lag) the distance, h, between points (tol) the tolerance we are comfortable with around (lag) (method) either 'semivariogram', or 'covariogram' Output: (cv) <2xN> NumPy array of lags and variogram values ''' # calculate the pairwise distances pwdist = pairwise.pairwise_distances(np.reshape(np.array(t), (-1,1))) # create a list of lists of indices of points having the ~same lag index = [lagindices(pwdist, lag, tol) for lag in lags] # calculate the variogram at different lags given some tolerance if method in ['semivariogram', 'semi', 'sv', 's']: v = [semivariance(data, indices) for indices in index] elif method in ['covariogram', 'cov', 'co', 'cv', 'c']: v = [covariance(data, indices) for indices in index] # bundle the semivariogram values with their lags return np.array(list(zip(lags, v))).T