diff --git a/functions/variograms.py b/functions/variograms.py deleted file mode 100644 index 295c8b4..0000000 --- a/functions/variograms.py +++ /dev/null @@ -1,98 +0,0 @@ -"""This module contains all the functions needed for variogram analysis """ - -import sklearn.metrics.pairwise as pairwise -import numpy as np - -def lagindices(pwdist, lag, tol): - ''' - Input: (pwdist) square NumPy array of pairwise distances - (lag) the distance, h, between points - (tol) the tolerance we are comfortable with around (lag) - Output: (ind) list of tuples; the first element is the row of - (data) for one point, the second element is the row - of a point (lag)+/-(tol) away from the first point, - e.g., (3,5) corresponds fo data[3,:], and data[5,:] - ''' - # grab the coordinates in a given range: lag +/- tolerance - i, j = np.where((pwdist >= lag - tol) & (pwdist < lag + tol)) - # zip the coordinates into a list - indices = list(zip(i, j)) - # take out the repeated elements, - # since p is a *symmetric* distance matrix - indices = np.array([i for i in indices if i[1] > i[0]]) - return indices - - -def semivariance(data, indices): - ''' - Input: (data) NumPy array where the fris t two columns - are the spatial coordinates, x and y, and - the third column is the variable of interest - (indices) indices of paired data points in (data) - Output: (z) semivariance value at lag (h) +/- (tol) - ''' - # take the squared difference between - # the values of the variable of interest - z = [(data[i] - data[j])**2.0 for i, j in indices] - # the semivariance is half the mean squared difference - return np.mean(z) / 2.0 - -def semivariogram(t, data, lags, tol): - ''' - Input: (data) NumPy array where the fris t two columns - are the spatial coordinates, x and y - (lag) the distance, h, between points - (tol) the tolerance we are comfortable with around (lag) - Output: (sv) <2xN> NumPy array of lags and semivariogram values - ''' - return variogram(t, data, lags, tol, 'semivariogram') - - -def covariance(data, indices): - ''' - Input: (data) NumPy array where the fris t two columns - are the spatial coordinates, x and y - (lag) the distance, h, between points - (tol) the tolerance we are comfortable with around (lag) - Output: (z) covariance value at lag (h) +/- (tol) - ''' - # grab the indices of the points - # that are lag +/- tolerance apart - m_tail = np.mean([data[i] for i, j in indices]) - m_head = np.mean([data[j] for i, j in indices]) - m = m_tail * m_head - z = [data[i] * data[j] - m for i, j in indices] - return np.mean(z) - - -def covariogram(t, data, lags, tol): - ''' - Input: (data) NumPy array where the fris t two columns - are the spatial coordinates, x and y - (lag) the distance, h, between points - (tol) the tolerance we are comfortable with around (lag) - Output: (cv) <2xN> NumPy array of lags and covariogram values - ''' - return variogram(t, data, lags, tol, 'covariogram') - - -def variogram(t, data, lags, tol, method): - ''' - Input: (data) NumPy array where the fris t two columns - are the spatial coordinates, x and y - (lag) the distance, h, between points - (tol) the tolerance we are comfortable with around (lag) - (method) either 'semivariogram', or 'covariogram' - Output: (cv) <2xN> NumPy array of lags and variogram values - ''' - # calculate the pairwise distances - pwdist = pairwise.pairwise_distances(np.reshape(np.array(t), (-1,1))) - # create a list of lists of indices of points having the ~same lag - index = [lagindices(pwdist, lag, tol) for lag in lags] - # calculate the variogram at different lags given some tolerance - if method in ['semivariogram', 'semi', 'sv', 's']: - v = [semivariance(data, indices) for indices in index] - elif method in ['covariogram', 'cov', 'co', 'cv', 'c']: - v = [covariance(data, indices) for indices in index] - # bundle the semivariogram values with their lags - return np.array(list(zip(lags, v))).T