You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

99 lines
4.1 KiB
Python

"""This module contains all the functions needed for variogram analysis """
import sklearn.metrics.pairwise as pairwise
import numpy as np
def lagindices(pwdist, lag, tol):
'''
Input: (pwdist) square NumPy array of pairwise distances
(lag) the distance, h, between points
(tol) the tolerance we are comfortable with around (lag)
Output: (ind) list of tuples; the first element is the row of
(data) for one point, the second element is the row
of a point (lag)+/-(tol) away from the first point,
e.g., (3,5) corresponds fo data[3,:], and data[5,:]
'''
# grab the coordinates in a given range: lag +/- tolerance
i, j = np.where((pwdist >= lag - tol) & (pwdist < lag + tol))
# zip the coordinates into a list
indices = list(zip(i, j))
# take out the repeated elements,
# since p is a *symmetric* distance matrix
indices = np.array([i for i in indices if i[1] > i[0]])
return indices
def semivariance(data, indices):
'''
Input: (data) NumPy array where the fris t two columns
are the spatial coordinates, x and y, and
the third column is the variable of interest
(indices) indices of paired data points in (data)
Output: (z) semivariance value at lag (h) +/- (tol)
'''
# take the squared difference between
# the values of the variable of interest
z = [(data[i] - data[j])**2.0 for i, j in indices]
# the semivariance is half the mean squared difference
return np.mean(z) / 2.0
def semivariogram(t, data, lags, tol):
'''
Input: (data) NumPy array where the fris t two columns
are the spatial coordinates, x and y
(lag) the distance, h, between points
(tol) the tolerance we are comfortable with around (lag)
Output: (sv) <2xN> NumPy array of lags and semivariogram values
'''
return variogram(t, data, lags, tol, 'semivariogram')
def covariance(data, indices):
'''
Input: (data) NumPy array where the fris t two columns
are the spatial coordinates, x and y
(lag) the distance, h, between points
(tol) the tolerance we are comfortable with around (lag)
Output: (z) covariance value at lag (h) +/- (tol)
'''
# grab the indices of the points
# that are lag +/- tolerance apart
m_tail = np.mean([data[i] for i, j in indices])
m_head = np.mean([data[j] for i, j in indices])
m = m_tail * m_head
z = [data[i] * data[j] - m for i, j in indices]
return np.mean(z)
def covariogram(t, data, lags, tol):
'''
Input: (data) NumPy array where the fris t two columns
are the spatial coordinates, x and y
(lag) the distance, h, between points
(tol) the tolerance we are comfortable with around (lag)
Output: (cv) <2xN> NumPy array of lags and covariogram values
'''
return variogram(t, data, lags, tol, 'covariogram')
def variogram(t, data, lags, tol, method):
'''
Input: (data) NumPy array where the fris t two columns
are the spatial coordinates, x and y
(lag) the distance, h, between points
(tol) the tolerance we are comfortable with around (lag)
(method) either 'semivariogram', or 'covariogram'
Output: (cv) <2xN> NumPy array of lags and variogram values
'''
# calculate the pairwise distances
pwdist = pairwise.pairwise_distances(np.reshape(np.array(t), (-1,1)))
# create a list of lists of indices of points having the ~same lag
index = [lagindices(pwdist, lag, tol) for lag in lags]
# calculate the variogram at different lags given some tolerance
if method in ['semivariogram', 'semi', 'sv', 's']:
v = [semivariance(data, indices) for indices in index]
elif method in ['covariogram', 'cov', 'co', 'cv', 'c']:
v = [covariance(data, indices) for indices in index]
# bundle the semivariogram values with their lags
return np.array(list(zip(lags, v))).T