updated functions folder

8 years ago · 925c23ce26
parent f8e1397412
commit 925c23ce26
3 changed files with 169 additions and 18 deletions
--- a/functions/data_analysis.py
+++ b/functions/data_analysis.py
@ -194,11 +194,14 @@ def calculate_chainage(sds, transects, orientation, along_dist):
                max_cross = np.nanmax(xy_rot[0,:])
                min_cross = np.nanmin(xy_rot[0,:])
                std_cross = np.nanstd(xy_rot[0,:])
-                
+                ###################################################
                if std_cross > 10: # if large std, take the most seaward point 
                    mean_cross = max_cross
                    median_cross = max_cross
                    min_cross = max_cross
 #                    mean_cross = np.nan
 #                    median_cross = np.nan
 #                    min_cross = np.nan  
                # store the statistics
                chainage_mtx[i,j,:] = np.array([mean_cross, median_cross, max_cross,
@ -243,10 +246,13 @@ def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5):
    # create 3 figures       
    fig1 = plt.figure()
    gs1 = gridspec.GridSpec(chain_sds.shape[1], 1)
    axfig1 = []
    fig2 = plt.figure()
    gs2 = gridspec.GridSpec(2, chain_sds.shape[1])
    axfig2 = []
    fig3 = plt.figure()
    gs3 = gridspec.GridSpec(2,1)
    axfig3 = []
    dates_sds_num = np.array([_.toordinal() for _ in dates_sds])
    stats = dict([])
@ -340,13 +346,16 @@ def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5):
        # make time-series plot
        plt.figure(fig1.number)
-        fig1.add_subplot(gs1[i,0])
+        ax = fig1.add_subplot(gs1[i,0])
-        plt.plot(dates_sur, chain_sur, 'o-', color='C1', markersize=4, label='survey all')
+        axfig1.append(ax)
-        plt.plot(dates_fin, chain_sur_fin, 'o', color=[0.3, 0.3, 0.3], markersize=2, label='survey interp')
+        plt.plot(dates_sur, chain_sur, '-', color='C1', markersize=2, label='survey data')
-        plt.plot(dates_fin, chain_sds_fin, 'o--', color='b', markersize=4, label='SDS')
+#        plt.plot(dates_fin, chain_sur_fin, 'o', color=[0.3, 0.3, 0.3], markersize=2, label='survey interp')
-        plt.title(pfname, fontweight='bold')
+        plt.plot(dates_fin, chain_sds_fin, 'o--', color='C0', markersize=4, alpha=1, label='satellite data')
-#        plt.xlim([dates_sds[0], dates_sds[-1]])
+        strtitle = '%s     (correlation = %.2f)' % (pfname, correlation)
-        plt.ylabel('chainage [m]')
+        plt.title(strtitle, fontweight='bold')
        plt.xlim([dates_sds[0], dates_sds[-1]])
        plt.ylabel('cross-shore position [m]')
        plt.legend()
        # make scatter plot
        plt.figure(fig2.number)
@ -358,9 +367,9 @@ def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5):
        ymax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)])
        ymin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)])
        plt.plot([xmin, xmax], [ymin, ymax], 'k--')
-        plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'b:')
+        plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'r:')
-        str_corr = ' y = %.2f x + %.2f\n R2 = %.2f' % (slope, intercept, R2)
+        str_corr = ' y = %.2f x + %.2f\n R2 = %.2f\n n = %d' % (slope, intercept, R2, len(diff_chain))
-        plt.text(xmin, ymax-5, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
+        plt.text(xmin, 0.9*ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
        plt.xlabel('chainage survey [m]')
        plt.ylabel('chainage satellite [m]')
        plt.title(pfname, fontweight='bold')
@ -411,9 +420,9 @@ def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5):
    ymax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)])
    ymin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)])
    plt.plot([xmin, xmax], [ymin, ymax], 'k--')
-    plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'b:')
+    plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'r:')
-    str_corr = ' y = %.2f x + %.2f\n R2 = %.2f' % (slope, intercept, R2)
+    str_corr = ' y = %.2f x + %.2f\n R2 = %.2f\n n = %d' % (slope, intercept, R2, len(diff_chain_all))
-    plt.text(xmin, ymax-5, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
+    plt.text(xmin, 0.9*ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
    plt.xlabel('chainage survey [m]')
    plt.ylabel('chainage satellite [m]')
    plt.title(pfname, fontweight='bold')
@ -424,9 +433,14 @@ def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5):
    density = plt.hist(diff_chain_all, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k')
    plt.xlim([-50, 50])
    plt.xlabel('error [m]')
    plt.ylabel('pdf')
    str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90) 
    plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.3), horizontalalignment='left', fontsize=10)
    fig3.set_size_inches(9.2, 9.28)
    fig3.set_tight_layout(True)  
 #    for i in range(len(axfig1)):
 #        axfig1[i].set_ylim([0,150]) # Narrabeen data
 #        axfig1[i].set_ylim([25,110]) # Tairua data            
    return stats
--- a/functions/sds.py
+++ b/functions/sds.py
@ -686,6 +686,26 @@ def classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size, plot_bool):
    im_water = im_classif == 3
    im_labels = np.stack((im_sand,im_swash,im_water), axis=-1)  
    # only select the patches that are beaches
 #    try:
 #        labels_sand = measure.label(im_sand)
 #        values = np.unique(labels_sand)
 #        se = morphology.disk(5)
 #        im_sand_new = np.zeros((im_ms_ps.shape[0],im_ms_ps.shape[1])).astype('bool')
 #        counter = 0
 #        for j in range(1,len(values)):
 #            patch_sand = labels_sand == values[j]
 #            im_buffer = morphology.binary_dilation(patch_sand, se)
 #            sum_inter = sum(sum(np.logical_and(im_buffer,im_swash)))
 #            if sum_inter >= 20:
 #                im_sand_new = np.logical_or(im_sand_new, patch_sand)
 #                counter = counter + 1
 #        if counter >= 1:
 #            im_labels[:,:,0] = im_sand_new 
 #    except:
 #        print('nothing')
    if plot_bool:
        # display on top of pansharpened RGB
        im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
@ -778,6 +798,25 @@ def classify_image_NN_nopan(im_ms_ps, cloud_mask, min_beach_size, plot_bool):
    im_water = im_classif == 3
    im_labels = np.stack((im_sand,im_swash,im_water), axis=-1)  
    # only select the patches that are beaches
 #    try:
 #        labels_sand = measure.label(im_sand)
 #        values = np.unique(labels_sand)
 #        se = morphology.disk(5)
 #        im_sand_new = np.zeros((im_ms_ps.shape[0],im_ms_ps.shape[1])).astype('bool')
 #        counter = 0
 #        for j in range(1,len(values)):
 #            patch_sand = labels_sand == values[j]
 #            im_buffer = morphology.binary_dilation(patch_sand, se)
 #            sum_inter = sum(sum(np.logical_and(im_buffer,im_swash)))
 #            if sum_inter >= 20:
 #                im_sand_new = np.logical_or(im_sand_new, patch_sand)
 #                counter = counter + 1
 #        if counter >= 1:
 #            im_labels[:,:,0] = im_sand_new 
 #    except:
 #        print('nothing')
    if plot_bool:
        # display on top of pansharpened RGB
        im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
--- a/functions/variograms.py
+++ b/functions/variograms.py
@ -0,0 +1,98 @@
 """This module contains all the functions needed for variogram analysis """
 import sklearn.metrics.pairwise as pairwise
 import numpy as np
 def lagindices(pwdist, lag, tol):
    '''
    Input:  (pwdist) square NumPy array of pairwise distances
            (lag)    the distance, h, between points
            (tol)    the tolerance we are comfortable with around (lag)
    Output: (ind)    list of tuples; the first element is the row of
                     (data) for one point, the second element is the row
                     of a point (lag)+/-(tol) away from the first point,
                     e.g., (3,5) corresponds fo data[3,:], and data[5,:]
    '''
    # grab the coordinates in a given range: lag +/- tolerance
    i, j = np.where((pwdist >= lag - tol) & (pwdist < lag + tol))
    # zip the coordinates into a list
    indices = list(zip(i, j))
    # take out the repeated elements,
    # since p is a *symmetric* distance matrix
    indices = np.array([i for i in indices if i[1] > i[0]])
    return indices
 def semivariance(data, indices):
    '''
    Input:  (data)    NumPy array where the fris t two columns
                      are the spatial coordinates, x and y, and
                      the third column is the variable of interest
            (indices) indices of paired data points in (data)
    Output:  (z)      semivariance value at lag (h) +/- (tol)
    '''
    # take the squared difference between
    # the values of the variable of interest
    z = [(data[i] - data[j])**2.0 for i, j in indices]
    # the semivariance is half the mean squared difference
    return np.mean(z) / 2.0
 def semivariogram(t, data, lags, tol):
    '''
    Input:  (data) NumPy array where the fris t two columns
                   are the spatial coordinates, x and y
            (lag)  the distance, h, between points
            (tol)  the tolerance we are comfortable with around (lag)
    Output: (sv)   <2xN> NumPy array of lags and semivariogram values
    '''
    return variogram(t, data, lags, tol, 'semivariogram')
 def covariance(data, indices):
    '''
    Input:  (data) NumPy array where the fris t two columns
                   are the spatial coordinates, x and y
            (lag)  the distance, h, between points
            (tol)  the tolerance we are comfortable with around (lag)
    Output:  (z)   covariance value at lag (h) +/- (tol)
    '''
    # grab the indices of the points
    # that are lag +/- tolerance apart
    m_tail = np.mean([data[i] for i, j in indices])
    m_head = np.mean([data[j] for i, j in indices])
    m = m_tail * m_head
    z = [data[i] * data[j] - m for i, j in indices]
    return np.mean(z)
 def covariogram(t, data, lags, tol):
    '''
    Input:  (data) NumPy array where the fris t two columns
                   are the spatial coordinates, x and y
            (lag)  the distance, h, between points
            (tol)  the tolerance we are comfortable with around (lag)
    Output: (cv)   <2xN> NumPy array of lags and covariogram values
    '''
    return variogram(t, data, lags, tol, 'covariogram')
 def variogram(t, data, lags, tol, method):
    '''
    Input:  (data) NumPy array where the fris t two columns
                   are the spatial coordinates, x and y
            (lag)  the distance, h, between points
            (tol)  the tolerance we are comfortable with around (lag)
            (method) either 'semivariogram', or 'covariogram'
    Output: (cv)   <2xN> NumPy array of lags and variogram values
    '''
    # calculate the pairwise distances
    pwdist = pairwise.pairwise_distances(np.reshape(np.array(t), (-1,1)))
    # create a list of lists of indices of points having the ~same lag
    index = [lagindices(pwdist, lag, tol) for lag in lags]
    # calculate the variogram at different lags given some tolerance
    if method in ['semivariogram', 'semi', 'sv', 's']:
        v = [semivariance(data, indices) for indices in index]
    elif method in ['covariogram', 'cov', 'co', 'cv', 'c']:
        v = [covariance(data, indices) for indices in index]
    # bundle the semivariogram values with their lags
    return np.array(list(zip(lags, v))).T