CoastSat_WRL/functions/variograms.py

"""This module contains all the functions needed for variogram analysis """

import sklearn.metrics.pairwise as pairwise
import numpy as np

def lagindices(pwdist, lag, tol):
    '''
    Input:  (pwdist) square NumPy array of pairwise distances
            (lag)    the distance, h, between points
            (tol)    the tolerance we are comfortable with around (lag)
    Output: (ind)    list of tuples; the first element is the row of
                     (data) for one point, the second element is the row
                     of a point (lag)+/-(tol) away from the first point,
                     e.g., (3,5) corresponds fo data[3,:], and data[5,:]
    '''
    # grab the coordinates in a given range: lag +/- tolerance
    i, j = np.where((pwdist >= lag - tol) & (pwdist < lag + tol))
    # zip the coordinates into a list
    indices = list(zip(i, j))
    # take out the repeated elements,
    # since p is a *symmetric* distance matrix
    indices = np.array([i for i in indices if i[1] > i[0]])
    return indices


def semivariance(data, indices):
    '''
    Input:  (data)    NumPy array where the fris t two columns
                      are the spatial coordinates, x and y, and
                      the third column is the variable of interest
            (indices) indices of paired data points in (data)
    Output:  (z)      semivariance value at lag (h) +/- (tol)
    '''
    # take the squared difference between
    # the values of the variable of interest
    z = [(data[i] - data[j])**2.0 for i, j in indices]
    # the semivariance is half the mean squared difference
    return np.mean(z) / 2.0

def semivariogram(t, data, lags, tol):
    '''
    Input:  (data) NumPy array where the fris t two columns
                   are the spatial coordinates, x and y
            (lag)  the distance, h, between points
            (tol)  the tolerance we are comfortable with around (lag)
    Output: (sv)   <2xN> NumPy array of lags and semivariogram values
    '''
    return variogram(t, data, lags, tol, 'semivariogram')


def covariance(data, indices):
    '''
    Input:  (data) NumPy array where the fris t two columns
                   are the spatial coordinates, x and y
            (lag)  the distance, h, between points
            (tol)  the tolerance we are comfortable with around (lag)
    Output:  (z)   covariance value at lag (h) +/- (tol)
    '''
    # grab the indices of the points
    # that are lag +/- tolerance apart
    m_tail = np.mean([data[i] for i, j in indices])
    m_head = np.mean([data[j] for i, j in indices])
    m = m_tail * m_head
    z = [data[i] * data[j] - m for i, j in indices]
    return np.mean(z)


def covariogram(t, data, lags, tol):
    '''
    Input:  (data) NumPy array where the fris t two columns
                   are the spatial coordinates, x and y
            (lag)  the distance, h, between points
            (tol)  the tolerance we are comfortable with around (lag)
    Output: (cv)   <2xN> NumPy array of lags and covariogram values
    '''
    return variogram(t, data, lags, tol, 'covariogram')


def variogram(t, data, lags, tol, method):
    '''
    Input:  (data) NumPy array where the fris t two columns
                   are the spatial coordinates, x and y
            (lag)  the distance, h, between points
            (tol)  the tolerance we are comfortable with around (lag)
            (method) either 'semivariogram', or 'covariogram'
    Output: (cv)   <2xN> NumPy array of lags and variogram values
    '''
    # calculate the pairwise distances
    pwdist = pairwise.pairwise_distances(np.reshape(np.array(t), (-1,1)))
    # create a list of lists of indices of points having the ~same lag
    index = [lagindices(pwdist, lag, tol) for lag in lags]
    # calculate the variogram at different lags given some tolerance
    if method in ['semivariogram', 'semi', 'sv', 's']:
        v = [semivariance(data, indices) for indices in index]
    elif method in ['covariogram', 'cov', 'co', 'cv', 'c']:
        v = [covariance(data, indices) for indices in index]
    # bundle the semivariogram values with their lags
    return np.array(list(zip(lags, v))).T