updated functions folder

7 years ago · 925c23ce26
parent f8e1397412
commit 925c23ce26
3 changed files with 169 additions and 18 deletions
--- a/functions/data_analysis.py
+++ b/functions/data_analysis.py
@ -194,12 +194,15 @@ def calculate_chainage(sds, transects, orientation, along_dist):
                max_cross = np.nanmax(xy_rot[0,:])
                min_cross = np.nanmin(xy_rot[0,:])
                std_cross = np.nanstd(xy_rot[0,:])
-                
-                if std_cross > 10: # if large std, take the most seaward point
+                ###################################################
+                if std_cross > 10: # if large std, take the most seaward point 
                    mean_cross = max_cross
                    median_cross = max_cross
                    min_cross = max_cross
-                
+#                    mean_cross = np.nan
+#                    median_cross = np.nan
+#                    min_cross = np.nan  
+                    
                # store the statistics
                chainage_mtx[i,j,:] = np.array([mean_cross, median_cross, max_cross,
                            min_cross, n_points, std_cross])   
@ -243,10 +246,13 @@ def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5):
    # create 3 figures       
    fig1 = plt.figure()
    gs1 = gridspec.GridSpec(chain_sds.shape[1], 1)
+    axfig1 = []
    fig2 = plt.figure()
    gs2 = gridspec.GridSpec(2, chain_sds.shape[1])
+    axfig2 = []
    fig3 = plt.figure()
    gs3 = gridspec.GridSpec(2,1)
+    axfig3 = []
    
    dates_sds_num = np.array([_.toordinal() for _ in dates_sds])
    stats = dict([])
@ -340,13 +346,16 @@ def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5):
        
        # make time-series plot
        plt.figure(fig1.number)
-        fig1.add_subplot(gs1[i,0])
-        plt.plot(dates_sur, chain_sur, 'o-', color='C1', markersize=4, label='survey all')
-        plt.plot(dates_fin, chain_sur_fin, 'o', color=[0.3, 0.3, 0.3], markersize=2, label='survey interp')
-        plt.plot(dates_fin, chain_sds_fin, 'o--', color='b', markersize=4, label='SDS')
-        plt.title(pfname, fontweight='bold')
-#        plt.xlim([dates_sds[0], dates_sds[-1]])
-        plt.ylabel('chainage [m]')
+        ax = fig1.add_subplot(gs1[i,0])
+        axfig1.append(ax)
+        plt.plot(dates_sur, chain_sur, '-', color='C1', markersize=2, label='survey data')
+#        plt.plot(dates_fin, chain_sur_fin, 'o', color=[0.3, 0.3, 0.3], markersize=2, label='survey interp')
+        plt.plot(dates_fin, chain_sds_fin, 'o--', color='C0', markersize=4, alpha=1, label='satellite data')
+        strtitle = '%s     (correlation = %.2f)' % (pfname, correlation)
+        plt.title(strtitle, fontweight='bold')
+        plt.xlim([dates_sds[0], dates_sds[-1]])
+        plt.ylabel('cross-shore position [m]')
+        plt.legend()
        
        # make scatter plot
        plt.figure(fig2.number)
@ -358,9 +367,9 @@ def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5):
        ymax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)])
        ymin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)])
        plt.plot([xmin, xmax], [ymin, ymax], 'k--')
-        plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'b:')
-        str_corr = ' y = %.2f x + %.2f\n R2 = %.2f' % (slope, intercept, R2)
-        plt.text(xmin, ymax-5, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
+        plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'r:')
+        str_corr = ' y = %.2f x + %.2f\n R2 = %.2f\n n = %d' % (slope, intercept, R2, len(diff_chain))
+        plt.text(xmin, 0.9*ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
        plt.xlabel('chainage survey [m]')
        plt.ylabel('chainage satellite [m]')
        plt.title(pfname, fontweight='bold')
@ -411,22 +420,27 @@ def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5):
    ymax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)])
    ymin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)])
    plt.plot([xmin, xmax], [ymin, ymax], 'k--')
-    plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'b:')
-    str_corr = ' y = %.2f x + %.2f\n R2 = %.2f' % (slope, intercept, R2)
-    plt.text(xmin, ymax-5, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
+    plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'r:')
+    str_corr = ' y = %.2f x + %.2f\n R2 = %.2f\n n = %d' % (slope, intercept, R2, len(diff_chain_all))
+    plt.text(xmin, 0.9*ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
    plt.xlabel('chainage survey [m]')
    plt.ylabel('chainage satellite [m]')
    plt.title(pfname, fontweight='bold')
-
+    
    fig3.add_subplot(gs3[1,0])
    binwidth = 3
    bins = np.arange(min(diff_chain_all), max(diff_chain_all) + binwidth, binwidth)
    density = plt.hist(diff_chain_all, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k')
    plt.xlim([-50, 50])
    plt.xlabel('error [m]')
+    plt.ylabel('pdf')
    str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90) 
    plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.3), horizontalalignment='left', fontsize=10)
    fig3.set_size_inches(9.2, 9.28)
-    fig3.set_tight_layout(True)              
+    fig3.set_tight_layout(True)  
+
+#    for i in range(len(axfig1)):
+#        axfig1[i].set_ylim([0,150]) # Narrabeen data
+#        axfig1[i].set_ylim([25,110]) # Tairua data            
        
    return stats
--- a/functions/sds.py
+++ b/functions/sds.py
@ -686,6 +686,26 @@ def classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size, plot_bool):
    im_water = im_classif == 3
    im_labels = np.stack((im_sand,im_swash,im_water), axis=-1)  
    
+    
+    # only select the patches that are beaches
+#    try:
+#        labels_sand = measure.label(im_sand)
+#        values = np.unique(labels_sand)
+#        se = morphology.disk(5)
+#        im_sand_new = np.zeros((im_ms_ps.shape[0],im_ms_ps.shape[1])).astype('bool')
+#        counter = 0
+#        for j in range(1,len(values)):
+#            patch_sand = labels_sand == values[j]
+#            im_buffer = morphology.binary_dilation(patch_sand, se)
+#            sum_inter = sum(sum(np.logical_and(im_buffer,im_swash)))
+#            if sum_inter >= 20:
+#                im_sand_new = np.logical_or(im_sand_new, patch_sand)
+#                counter = counter + 1
+#        if counter >= 1:
+#            im_labels[:,:,0] = im_sand_new 
+#    except:
+#        print('nothing')
+    
    if plot_bool:
        # display on top of pansharpened RGB
        im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
@ -778,6 +798,25 @@ def classify_image_NN_nopan(im_ms_ps, cloud_mask, min_beach_size, plot_bool):
    im_water = im_classif == 3
    im_labels = np.stack((im_sand,im_swash,im_water), axis=-1)  
    
+    # only select the patches that are beaches
+#    try:
+#        labels_sand = measure.label(im_sand)
+#        values = np.unique(labels_sand)
+#        se = morphology.disk(5)
+#        im_sand_new = np.zeros((im_ms_ps.shape[0],im_ms_ps.shape[1])).astype('bool')
+#        counter = 0
+#        for j in range(1,len(values)):
+#            patch_sand = labels_sand == values[j]
+#            im_buffer = morphology.binary_dilation(patch_sand, se)
+#            sum_inter = sum(sum(np.logical_and(im_buffer,im_swash)))
+#            if sum_inter >= 20:
+#                im_sand_new = np.logical_or(im_sand_new, patch_sand)
+#                counter = counter + 1
+#        if counter >= 1:
+#            im_labels[:,:,0] = im_sand_new 
+#    except:
+#        print('nothing')
+    
    if plot_bool:
        # display on top of pansharpened RGB
        im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
--- a/functions/variograms.py
+++ b/functions/variograms.py
@ -0,0 +1,98 @@
+"""This module contains all the functions needed for variogram analysis """
+
+import sklearn.metrics.pairwise as pairwise
+import numpy as np
+
+def lagindices(pwdist, lag, tol):
+    '''
+    Input:  (pwdist) square NumPy array of pairwise distances
+            (lag)    the distance, h, between points
+            (tol)    the tolerance we are comfortable with around (lag)
+    Output: (ind)    list of tuples; the first element is the row of
+                     (data) for one point, the second element is the row
+                     of a point (lag)+/-(tol) away from the first point,
+                     e.g., (3,5) corresponds fo data[3,:], and data[5,:]
+    '''
+    # grab the coordinates in a given range: lag +/- tolerance
+    i, j = np.where((pwdist >= lag - tol) & (pwdist < lag + tol))
+    # zip the coordinates into a list
+    indices = list(zip(i, j))
+    # take out the repeated elements,
+    # since p is a *symmetric* distance matrix
+    indices = np.array([i for i in indices if i[1] > i[0]])
+    return indices
+
+
+def semivariance(data, indices):
+    '''
+    Input:  (data)    NumPy array where the fris t two columns
+                      are the spatial coordinates, x and y, and
+                      the third column is the variable of interest
+            (indices) indices of paired data points in (data)
+    Output:  (z)      semivariance value at lag (h) +/- (tol)
+    '''
+    # take the squared difference between
+    # the values of the variable of interest
+    z = [(data[i] - data[j])**2.0 for i, j in indices]
+    # the semivariance is half the mean squared difference
+    return np.mean(z) / 2.0
+  
+def semivariogram(t, data, lags, tol):
+    '''
+    Input:  (data) NumPy array where the fris t two columns
+                   are the spatial coordinates, x and y
+            (lag)  the distance, h, between points
+            (tol)  the tolerance we are comfortable with around (lag)
+    Output: (sv)   <2xN> NumPy array of lags and semivariogram values
+    '''
+    return variogram(t, data, lags, tol, 'semivariogram')
+
+
+def covariance(data, indices):
+    '''
+    Input:  (data) NumPy array where the fris t two columns
+                   are the spatial coordinates, x and y
+            (lag)  the distance, h, between points
+            (tol)  the tolerance we are comfortable with around (lag)
+    Output:  (z)   covariance value at lag (h) +/- (tol)
+    '''
+    # grab the indices of the points
+    # that are lag +/- tolerance apart
+    m_tail = np.mean([data[i] for i, j in indices])
+    m_head = np.mean([data[j] for i, j in indices])
+    m = m_tail * m_head
+    z = [data[i] * data[j] - m for i, j in indices]
+    return np.mean(z)
+
+
+def covariogram(t, data, lags, tol):
+    '''
+    Input:  (data) NumPy array where the fris t two columns
+                   are the spatial coordinates, x and y
+            (lag)  the distance, h, between points
+            (tol)  the tolerance we are comfortable with around (lag)
+    Output: (cv)   <2xN> NumPy array of lags and covariogram values
+    '''
+    return variogram(t, data, lags, tol, 'covariogram')
+
+
+def variogram(t, data, lags, tol, method):
+    '''
+    Input:  (data) NumPy array where the fris t two columns
+                   are the spatial coordinates, x and y
+            (lag)  the distance, h, between points
+            (tol)  the tolerance we are comfortable with around (lag)
+            (method) either 'semivariogram', or 'covariogram'
+    Output: (cv)   <2xN> NumPy array of lags and variogram values
+    '''
+    # calculate the pairwise distances
+    pwdist = pairwise.pairwise_distances(np.reshape(np.array(t), (-1,1)))
+    # create a list of lists of indices of points having the ~same lag
+    index = [lagindices(pwdist, lag, tol) for lag in lags]
+    # calculate the variogram at different lags given some tolerance
+    if method in ['semivariogram', 'semi', 'sv', 's']:
+        v = [semivariance(data, indices) for indices in index]
+    elif method in ['covariogram', 'cov', 'co', 'cv', 'c']:
+        v = [covariance(data, indices) for indices in index]
+    # bundle the semivariogram values with their lags
+    return np.array(list(zip(lags, v))).T