You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
99 lines
4.1 KiB
Python
99 lines
4.1 KiB
Python
6 years ago
|
"""This module contains all the functions needed for variogram analysis """
|
||
|
|
||
|
import sklearn.metrics.pairwise as pairwise
|
||
|
import numpy as np
|
||
|
|
||
|
def lagindices(pwdist, lag, tol):
|
||
|
'''
|
||
|
Input: (pwdist) square NumPy array of pairwise distances
|
||
|
(lag) the distance, h, between points
|
||
|
(tol) the tolerance we are comfortable with around (lag)
|
||
|
Output: (ind) list of tuples; the first element is the row of
|
||
|
(data) for one point, the second element is the row
|
||
|
of a point (lag)+/-(tol) away from the first point,
|
||
|
e.g., (3,5) corresponds fo data[3,:], and data[5,:]
|
||
|
'''
|
||
|
# grab the coordinates in a given range: lag +/- tolerance
|
||
|
i, j = np.where((pwdist >= lag - tol) & (pwdist < lag + tol))
|
||
|
# zip the coordinates into a list
|
||
|
indices = list(zip(i, j))
|
||
|
# take out the repeated elements,
|
||
|
# since p is a *symmetric* distance matrix
|
||
|
indices = np.array([i for i in indices if i[1] > i[0]])
|
||
|
return indices
|
||
|
|
||
|
|
||
|
def semivariance(data, indices):
|
||
|
'''
|
||
|
Input: (data) NumPy array where the fris t two columns
|
||
|
are the spatial coordinates, x and y, and
|
||
|
the third column is the variable of interest
|
||
|
(indices) indices of paired data points in (data)
|
||
|
Output: (z) semivariance value at lag (h) +/- (tol)
|
||
|
'''
|
||
|
# take the squared difference between
|
||
|
# the values of the variable of interest
|
||
|
z = [(data[i] - data[j])**2.0 for i, j in indices]
|
||
|
# the semivariance is half the mean squared difference
|
||
|
return np.mean(z) / 2.0
|
||
|
|
||
|
def semivariogram(t, data, lags, tol):
|
||
|
'''
|
||
|
Input: (data) NumPy array where the fris t two columns
|
||
|
are the spatial coordinates, x and y
|
||
|
(lag) the distance, h, between points
|
||
|
(tol) the tolerance we are comfortable with around (lag)
|
||
|
Output: (sv) <2xN> NumPy array of lags and semivariogram values
|
||
|
'''
|
||
|
return variogram(t, data, lags, tol, 'semivariogram')
|
||
|
|
||
|
|
||
|
def covariance(data, indices):
|
||
|
'''
|
||
|
Input: (data) NumPy array where the fris t two columns
|
||
|
are the spatial coordinates, x and y
|
||
|
(lag) the distance, h, between points
|
||
|
(tol) the tolerance we are comfortable with around (lag)
|
||
|
Output: (z) covariance value at lag (h) +/- (tol)
|
||
|
'''
|
||
|
# grab the indices of the points
|
||
|
# that are lag +/- tolerance apart
|
||
|
m_tail = np.mean([data[i] for i, j in indices])
|
||
|
m_head = np.mean([data[j] for i, j in indices])
|
||
|
m = m_tail * m_head
|
||
|
z = [data[i] * data[j] - m for i, j in indices]
|
||
|
return np.mean(z)
|
||
|
|
||
|
|
||
|
def covariogram(t, data, lags, tol):
|
||
|
'''
|
||
|
Input: (data) NumPy array where the fris t two columns
|
||
|
are the spatial coordinates, x and y
|
||
|
(lag) the distance, h, between points
|
||
|
(tol) the tolerance we are comfortable with around (lag)
|
||
|
Output: (cv) <2xN> NumPy array of lags and covariogram values
|
||
|
'''
|
||
|
return variogram(t, data, lags, tol, 'covariogram')
|
||
|
|
||
|
|
||
|
def variogram(t, data, lags, tol, method):
|
||
|
'''
|
||
|
Input: (data) NumPy array where the fris t two columns
|
||
|
are the spatial coordinates, x and y
|
||
|
(lag) the distance, h, between points
|
||
|
(tol) the tolerance we are comfortable with around (lag)
|
||
|
(method) either 'semivariogram', or 'covariogram'
|
||
|
Output: (cv) <2xN> NumPy array of lags and variogram values
|
||
|
'''
|
||
|
# calculate the pairwise distances
|
||
|
pwdist = pairwise.pairwise_distances(np.reshape(np.array(t), (-1,1)))
|
||
|
# create a list of lists of indices of points having the ~same lag
|
||
|
index = [lagindices(pwdist, lag, tol) for lag in lags]
|
||
|
# calculate the variogram at different lags given some tolerance
|
||
|
if method in ['semivariogram', 'semi', 'sv', 's']:
|
||
|
v = [semivariance(data, indices) for indices in index]
|
||
|
elif method in ['covariogram', 'cov', 'co', 'cv', 'c']:
|
||
|
v = [covariance(data, indices) for indices in index]
|
||
|
# bundle the semivariogram values with their lags
|
||
|
return np.array(list(zip(lags, v))).T
|