updated functions folder
parent
f8e1397412
commit
925c23ce26
@ -0,0 +1,98 @@
|
||||
"""This module contains all the functions needed for variogram analysis """
|
||||
|
||||
import sklearn.metrics.pairwise as pairwise
|
||||
import numpy as np
|
||||
|
||||
def lagindices(pwdist, lag, tol):
|
||||
'''
|
||||
Input: (pwdist) square NumPy array of pairwise distances
|
||||
(lag) the distance, h, between points
|
||||
(tol) the tolerance we are comfortable with around (lag)
|
||||
Output: (ind) list of tuples; the first element is the row of
|
||||
(data) for one point, the second element is the row
|
||||
of a point (lag)+/-(tol) away from the first point,
|
||||
e.g., (3,5) corresponds fo data[3,:], and data[5,:]
|
||||
'''
|
||||
# grab the coordinates in a given range: lag +/- tolerance
|
||||
i, j = np.where((pwdist >= lag - tol) & (pwdist < lag + tol))
|
||||
# zip the coordinates into a list
|
||||
indices = list(zip(i, j))
|
||||
# take out the repeated elements,
|
||||
# since p is a *symmetric* distance matrix
|
||||
indices = np.array([i for i in indices if i[1] > i[0]])
|
||||
return indices
|
||||
|
||||
|
||||
def semivariance(data, indices):
|
||||
'''
|
||||
Input: (data) NumPy array where the fris t two columns
|
||||
are the spatial coordinates, x and y, and
|
||||
the third column is the variable of interest
|
||||
(indices) indices of paired data points in (data)
|
||||
Output: (z) semivariance value at lag (h) +/- (tol)
|
||||
'''
|
||||
# take the squared difference between
|
||||
# the values of the variable of interest
|
||||
z = [(data[i] - data[j])**2.0 for i, j in indices]
|
||||
# the semivariance is half the mean squared difference
|
||||
return np.mean(z) / 2.0
|
||||
|
||||
def semivariogram(t, data, lags, tol):
|
||||
'''
|
||||
Input: (data) NumPy array where the fris t two columns
|
||||
are the spatial coordinates, x and y
|
||||
(lag) the distance, h, between points
|
||||
(tol) the tolerance we are comfortable with around (lag)
|
||||
Output: (sv) <2xN> NumPy array of lags and semivariogram values
|
||||
'''
|
||||
return variogram(t, data, lags, tol, 'semivariogram')
|
||||
|
||||
|
||||
def covariance(data, indices):
|
||||
'''
|
||||
Input: (data) NumPy array where the fris t two columns
|
||||
are the spatial coordinates, x and y
|
||||
(lag) the distance, h, between points
|
||||
(tol) the tolerance we are comfortable with around (lag)
|
||||
Output: (z) covariance value at lag (h) +/- (tol)
|
||||
'''
|
||||
# grab the indices of the points
|
||||
# that are lag +/- tolerance apart
|
||||
m_tail = np.mean([data[i] for i, j in indices])
|
||||
m_head = np.mean([data[j] for i, j in indices])
|
||||
m = m_tail * m_head
|
||||
z = [data[i] * data[j] - m for i, j in indices]
|
||||
return np.mean(z)
|
||||
|
||||
|
||||
def covariogram(t, data, lags, tol):
|
||||
'''
|
||||
Input: (data) NumPy array where the fris t two columns
|
||||
are the spatial coordinates, x and y
|
||||
(lag) the distance, h, between points
|
||||
(tol) the tolerance we are comfortable with around (lag)
|
||||
Output: (cv) <2xN> NumPy array of lags and covariogram values
|
||||
'''
|
||||
return variogram(t, data, lags, tol, 'covariogram')
|
||||
|
||||
|
||||
def variogram(t, data, lags, tol, method):
|
||||
'''
|
||||
Input: (data) NumPy array where the fris t two columns
|
||||
are the spatial coordinates, x and y
|
||||
(lag) the distance, h, between points
|
||||
(tol) the tolerance we are comfortable with around (lag)
|
||||
(method) either 'semivariogram', or 'covariogram'
|
||||
Output: (cv) <2xN> NumPy array of lags and variogram values
|
||||
'''
|
||||
# calculate the pairwise distances
|
||||
pwdist = pairwise.pairwise_distances(np.reshape(np.array(t), (-1,1)))
|
||||
# create a list of lists of indices of points having the ~same lag
|
||||
index = [lagindices(pwdist, lag, tol) for lag in lags]
|
||||
# calculate the variogram at different lags given some tolerance
|
||||
if method in ['semivariogram', 'semi', 'sv', 's']:
|
||||
v = [semivariance(data, indices) for indices in index]
|
||||
elif method in ['covariogram', 'cov', 'co', 'cv', 'c']:
|
||||
v = [covariance(data, indices) for indices in index]
|
||||
# bundle the semivariogram values with their lags
|
||||
return np.array(list(zip(lags, v))).T
|
Loading…
Reference in New Issue