diff --git a/functions/data_analysis.py b/functions/data_analysis.py index 9244311..7ba36d5 100644 --- a/functions/data_analysis.py +++ b/functions/data_analysis.py @@ -194,12 +194,15 @@ def calculate_chainage(sds, transects, orientation, along_dist): max_cross = np.nanmax(xy_rot[0,:]) min_cross = np.nanmin(xy_rot[0,:]) std_cross = np.nanstd(xy_rot[0,:]) - - if std_cross > 10: # if large std, take the most seaward point + ################################################### + if std_cross > 10: # if large std, take the most seaward point mean_cross = max_cross median_cross = max_cross min_cross = max_cross - +# mean_cross = np.nan +# median_cross = np.nan +# min_cross = np.nan + # store the statistics chainage_mtx[i,j,:] = np.array([mean_cross, median_cross, max_cross, min_cross, n_points, std_cross]) @@ -243,10 +246,13 @@ def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5): # create 3 figures fig1 = plt.figure() gs1 = gridspec.GridSpec(chain_sds.shape[1], 1) + axfig1 = [] fig2 = plt.figure() gs2 = gridspec.GridSpec(2, chain_sds.shape[1]) + axfig2 = [] fig3 = plt.figure() gs3 = gridspec.GridSpec(2,1) + axfig3 = [] dates_sds_num = np.array([_.toordinal() for _ in dates_sds]) stats = dict([]) @@ -340,13 +346,16 @@ def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5): # make time-series plot plt.figure(fig1.number) - fig1.add_subplot(gs1[i,0]) - plt.plot(dates_sur, chain_sur, 'o-', color='C1', markersize=4, label='survey all') - plt.plot(dates_fin, chain_sur_fin, 'o', color=[0.3, 0.3, 0.3], markersize=2, label='survey interp') - plt.plot(dates_fin, chain_sds_fin, 'o--', color='b', markersize=4, label='SDS') - plt.title(pfname, fontweight='bold') -# plt.xlim([dates_sds[0], dates_sds[-1]]) - plt.ylabel('chainage [m]') + ax = fig1.add_subplot(gs1[i,0]) + axfig1.append(ax) + plt.plot(dates_sur, chain_sur, '-', color='C1', markersize=2, label='survey data') +# plt.plot(dates_fin, chain_sur_fin, 'o', color=[0.3, 0.3, 0.3], markersize=2, label='survey interp') + plt.plot(dates_fin, chain_sds_fin, 'o--', color='C0', markersize=4, alpha=1, label='satellite data') + strtitle = '%s (correlation = %.2f)' % (pfname, correlation) + plt.title(strtitle, fontweight='bold') + plt.xlim([dates_sds[0], dates_sds[-1]]) + plt.ylabel('cross-shore position [m]') + plt.legend() # make scatter plot plt.figure(fig2.number) @@ -358,9 +367,9 @@ def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5): ymax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)]) ymin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)]) plt.plot([xmin, xmax], [ymin, ymax], 'k--') - plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'b:') - str_corr = ' y = %.2f x + %.2f\n R2 = %.2f' % (slope, intercept, R2) - plt.text(xmin, ymax-5, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left') + plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'r:') + str_corr = ' y = %.2f x + %.2f\n R2 = %.2f\n n = %d' % (slope, intercept, R2, len(diff_chain)) + plt.text(xmin, 0.9*ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left') plt.xlabel('chainage survey [m]') plt.ylabel('chainage satellite [m]') plt.title(pfname, fontweight='bold') @@ -411,22 +420,27 @@ def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5): ymax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)]) ymin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)]) plt.plot([xmin, xmax], [ymin, ymax], 'k--') - plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'b:') - str_corr = ' y = %.2f x + %.2f\n R2 = %.2f' % (slope, intercept, R2) - plt.text(xmin, ymax-5, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left') + plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'r:') + str_corr = ' y = %.2f x + %.2f\n R2 = %.2f\n n = %d' % (slope, intercept, R2, len(diff_chain_all)) + plt.text(xmin, 0.9*ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left') plt.xlabel('chainage survey [m]') plt.ylabel('chainage satellite [m]') plt.title(pfname, fontweight='bold') - + fig3.add_subplot(gs3[1,0]) binwidth = 3 bins = np.arange(min(diff_chain_all), max(diff_chain_all) + binwidth, binwidth) density = plt.hist(diff_chain_all, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k') plt.xlim([-50, 50]) plt.xlabel('error [m]') + plt.ylabel('pdf') str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90) plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.3), horizontalalignment='left', fontsize=10) fig3.set_size_inches(9.2, 9.28) - fig3.set_tight_layout(True) + fig3.set_tight_layout(True) + +# for i in range(len(axfig1)): +# axfig1[i].set_ylim([0,150]) # Narrabeen data +# axfig1[i].set_ylim([25,110]) # Tairua data return stats \ No newline at end of file diff --git a/functions/sds.py b/functions/sds.py index 301753a..7a32837 100644 --- a/functions/sds.py +++ b/functions/sds.py @@ -686,6 +686,26 @@ def classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size, plot_bool): im_water = im_classif == 3 im_labels = np.stack((im_sand,im_swash,im_water), axis=-1) + + # only select the patches that are beaches +# try: +# labels_sand = measure.label(im_sand) +# values = np.unique(labels_sand) +# se = morphology.disk(5) +# im_sand_new = np.zeros((im_ms_ps.shape[0],im_ms_ps.shape[1])).astype('bool') +# counter = 0 +# for j in range(1,len(values)): +# patch_sand = labels_sand == values[j] +# im_buffer = morphology.binary_dilation(patch_sand, se) +# sum_inter = sum(sum(np.logical_and(im_buffer,im_swash))) +# if sum_inter >= 20: +# im_sand_new = np.logical_or(im_sand_new, patch_sand) +# counter = counter + 1 +# if counter >= 1: +# im_labels[:,:,0] = im_sand_new +# except: +# print('nothing') + if plot_bool: # display on top of pansharpened RGB im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False) @@ -778,6 +798,25 @@ def classify_image_NN_nopan(im_ms_ps, cloud_mask, min_beach_size, plot_bool): im_water = im_classif == 3 im_labels = np.stack((im_sand,im_swash,im_water), axis=-1) + # only select the patches that are beaches +# try: +# labels_sand = measure.label(im_sand) +# values = np.unique(labels_sand) +# se = morphology.disk(5) +# im_sand_new = np.zeros((im_ms_ps.shape[0],im_ms_ps.shape[1])).astype('bool') +# counter = 0 +# for j in range(1,len(values)): +# patch_sand = labels_sand == values[j] +# im_buffer = morphology.binary_dilation(patch_sand, se) +# sum_inter = sum(sum(np.logical_and(im_buffer,im_swash))) +# if sum_inter >= 20: +# im_sand_new = np.logical_or(im_sand_new, patch_sand) +# counter = counter + 1 +# if counter >= 1: +# im_labels[:,:,0] = im_sand_new +# except: +# print('nothing') + if plot_bool: # display on top of pansharpened RGB im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False) diff --git a/functions/variograms.py b/functions/variograms.py new file mode 100644 index 0000000..295c8b4 --- /dev/null +++ b/functions/variograms.py @@ -0,0 +1,98 @@ +"""This module contains all the functions needed for variogram analysis """ + +import sklearn.metrics.pairwise as pairwise +import numpy as np + +def lagindices(pwdist, lag, tol): + ''' + Input: (pwdist) square NumPy array of pairwise distances + (lag) the distance, h, between points + (tol) the tolerance we are comfortable with around (lag) + Output: (ind) list of tuples; the first element is the row of + (data) for one point, the second element is the row + of a point (lag)+/-(tol) away from the first point, + e.g., (3,5) corresponds fo data[3,:], and data[5,:] + ''' + # grab the coordinates in a given range: lag +/- tolerance + i, j = np.where((pwdist >= lag - tol) & (pwdist < lag + tol)) + # zip the coordinates into a list + indices = list(zip(i, j)) + # take out the repeated elements, + # since p is a *symmetric* distance matrix + indices = np.array([i for i in indices if i[1] > i[0]]) + return indices + + +def semivariance(data, indices): + ''' + Input: (data) NumPy array where the fris t two columns + are the spatial coordinates, x and y, and + the third column is the variable of interest + (indices) indices of paired data points in (data) + Output: (z) semivariance value at lag (h) +/- (tol) + ''' + # take the squared difference between + # the values of the variable of interest + z = [(data[i] - data[j])**2.0 for i, j in indices] + # the semivariance is half the mean squared difference + return np.mean(z) / 2.0 + +def semivariogram(t, data, lags, tol): + ''' + Input: (data) NumPy array where the fris t two columns + are the spatial coordinates, x and y + (lag) the distance, h, between points + (tol) the tolerance we are comfortable with around (lag) + Output: (sv) <2xN> NumPy array of lags and semivariogram values + ''' + return variogram(t, data, lags, tol, 'semivariogram') + + +def covariance(data, indices): + ''' + Input: (data) NumPy array where the fris t two columns + are the spatial coordinates, x and y + (lag) the distance, h, between points + (tol) the tolerance we are comfortable with around (lag) + Output: (z) covariance value at lag (h) +/- (tol) + ''' + # grab the indices of the points + # that are lag +/- tolerance apart + m_tail = np.mean([data[i] for i, j in indices]) + m_head = np.mean([data[j] for i, j in indices]) + m = m_tail * m_head + z = [data[i] * data[j] - m for i, j in indices] + return np.mean(z) + + +def covariogram(t, data, lags, tol): + ''' + Input: (data) NumPy array where the fris t two columns + are the spatial coordinates, x and y + (lag) the distance, h, between points + (tol) the tolerance we are comfortable with around (lag) + Output: (cv) <2xN> NumPy array of lags and covariogram values + ''' + return variogram(t, data, lags, tol, 'covariogram') + + +def variogram(t, data, lags, tol, method): + ''' + Input: (data) NumPy array where the fris t two columns + are the spatial coordinates, x and y + (lag) the distance, h, between points + (tol) the tolerance we are comfortable with around (lag) + (method) either 'semivariogram', or 'covariogram' + Output: (cv) <2xN> NumPy array of lags and variogram values + ''' + # calculate the pairwise distances + pwdist = pairwise.pairwise_distances(np.reshape(np.array(t), (-1,1))) + # create a list of lists of indices of points having the ~same lag + index = [lagindices(pwdist, lag, tol) for lag in lags] + # calculate the variogram at different lags given some tolerance + if method in ['semivariogram', 'semi', 'sv', 's']: + v = [semivariance(data, indices) for indices in index] + elif method in ['covariogram', 'cov', 'co', 'cv', 'c']: + v = [covariance(data, indices) for indices in index] + # bundle the semivariogram values with their lags + return np.array(list(zip(lags, v))).T