# -*- coding: utf-8 -*- #==========================================================# # Extract shorelines from Landsat images #==========================================================# # Initial settings import os import numpy as np import matplotlib.pyplot as plt import ee import pdb # other modules from osgeo import gdal, ogr, osr import pickle import matplotlib.cm as cm from pylab import ginput # image processing modules import skimage.filters as filters import skimage.exposure as exposure import skimage.transform as transform import sklearn.decomposition as decomposition import skimage.measure as measure import skimage.morphology as morphology # machine learning modules from sklearn.model_selection import train_test_split from sklearn.neural_network import MLPClassifier from sklearn.preprocessing import StandardScaler, Normalizer from sklearn.externals import joblib # import own modules import functions.utils as utils import functions.sds as sds # some settings np.seterr(all='ignore') # raise/ignore divisions by 0 and nans plt.rcParams['axes.grid'] = True plt.rcParams['figure.max_open_warning'] = 100 ee.Initialize() # parameters cloud_thresh = 0.5 # threshold for cloud cover plot_bool = False # if you want the plots min_contour_points = 100# minimum number of points contained in each water line output_epsg = 28356 # GDA94 / MGA Zone 56 buffer_size = 10 # radius (in pixels) of disk for buffer (pixel classification) min_beach_size = 50 # number of pixels in a beach (pixel classification) # load metadata (timestamps and epsg code) for the collection satname = 'L8' sitename = 'NARRA' #sitename = 'OLDBAR' # Load metadata filepath = os.path.join(os.getcwd(), 'data', satname, sitename) with open(os.path.join(filepath, sitename + '_timestamps' + '.pkl'), 'rb') as f: timestamps = pickle.load(f) with open(os.path.join(filepath, sitename + '_accuracy_georef' + '.pkl'), 'rb') as f: acc_georef = pickle.load(f) with open(os.path.join(filepath, sitename + '_epsgcode' + '.pkl'), 'rb') as f: input_epsg = pickle.load(f) with open(os.path.join(filepath, sitename + '_refpoints' + '.pkl'), 'rb') as f: refpoints = pickle.load(f) # sort timestamps and georef accuracy (dowloaded images are sorted by date in directory) timestamps_sorted = sorted(timestamps) idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__) acc_georef_sorted = [acc_georef[j] for j in idx_sorted] # path to images file_path_pan = os.path.join(os.getcwd(), 'data', satname, sitename, 'pan') file_path_ms = os.path.join(os.getcwd(), 'data', satname, sitename, 'ms') file_names_pan = os.listdir(file_path_pan) file_names_ms = os.listdir(file_path_ms) N = len(file_names_pan) # initialise some variables cloud_cover_ts = [] date_acquired_ts = [] acc_georef_ts = [] idx_skipped = [] idx_nocloud = [] t = [] shorelines = [] #%% for i in range(1): i = 0 # read pan image fn_pan = os.path.join(file_path_pan, file_names_pan[i]) data = gdal.Open(fn_pan, gdal.GA_ReadOnly) georef = np.array(data.GetGeoTransform()) bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)] im_pan = np.stack(bands, 2)[:,:,0] nrows = im_pan.shape[0] ncols = im_pan.shape[1] # read ms image fn_ms = os.path.join(file_path_ms, file_names_ms[i]) data = gdal.Open(fn_ms, gdal.GA_ReadOnly) bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)] im_ms = np.stack(bands, 2) # cloud mask im_qa = im_ms[:,:,5] cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool) cloud_mask = transform.resize(cloud_mask, (im_pan.shape[0], im_pan.shape[1]), order=0, preserve_range=True, mode='constant').astype('bool_') # resize the image using bilinear interpolation (order 1) im_ms = transform.resize(im_ms,(im_pan.shape[0], im_pan.shape[1]), order=1, preserve_range=True, mode='constant') # check if -inf or nan values and add to cloud mask im_inf = np.isin(im_ms[:,:,0], -np.inf) im_nan = np.isnan(im_ms[:,:,0]) cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan) # calculate cloud cover and skip image if too high cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1]) if cloud_cover > cloud_thresh: print('skip ' + str(i) + ' - cloudy (' + str(cloud_cover) + ')') idx_skipped.append(i) continue idx_nocloud.append(i) # check if image for that date is already present if file_names_pan[i][len(satname)+1+len(sitename)+1:len(satname)+1+len(sitename)+1+10] in date_acquired_ts: # find the index of the image that is repeated idx_samedate = utils.find_indices(date_acquired_ts, lambda e : e == file_names_pan[i][9:19]) idx_samedate = idx_samedate[0] print('cloud cover ' + str(cloud_cover) + ' - ' + str(cloud_cover_ts[idx_samedate])) print('acc georef ' + str(acc_georef_sorted[i]) + ' - ' + str(acc_georef_ts[idx_samedate])) # keep image with less cloud cover or best georeferencing accuracy if cloud_cover < cloud_cover_ts[idx_samedate] - 0.01: skip = False elif acc_georef_sorted[i] < acc_georef_ts[idx_samedate]: skip = False else: skip = True if skip: print('skip ' + str(i) + ' - repeated') idx_skipped.append(i) continue else: del shorelines[idx_samedate] del t[idx_samedate] del cloud_cover_ts[idx_samedate] del date_acquired_ts[idx_samedate] del acc_georef_ts[idx_samedate] print('keep ' + str(i) + ' - deleted ' + str(idx_samedate)) # pansharpen rgb image im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, plot_bool) im_display = sds.rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 100, False) # add down-sized bands for NIR and SWIR (since pansharpening is not possible) im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) # classify image in 4 classes (sand, whitewater, water, other) with NN classifier im_classif = sds.classify_image_NN(im_ms_ps, im_pan, cloud_mask, True) # labels im_sand = im_classif == 1 im_swash = im_classif == 2 im_water = im_classif == 3 vec_sand = im_sand.reshape(ncols*nrows) vec_water = im_water.reshape(ncols*nrows) vec_swash = im_swash.reshape(ncols*nrows) t.append(timestamps_sorted[i]) cloud_cover_ts.append(cloud_cover) acc_georef_ts.append(acc_georef_sorted[i]) date_acquired_ts.append(file_names_pan[i][9:19]) # calculate indices im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, plot_bool) im_ndmwi = sds.nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, plot_bool) im_nir = im_ms_ps[:,:,3] im_swir = im_ms_ps[:,:,4] im_ind = np.stack((im_ndwi, im_ndmwi), axis=-1) vec_ind = im_ind.reshape(nrows*ncols,2) # keep only beach morphology.remove_small_objects(im_sand, min_size=50, connectivity=2, in_place=True) # buffer around beach buffer_size = 7 se = morphology.disk(buffer_size) im_buffer = morphology.binary_dilation(im_sand, se) vec_buffer = im_buffer.reshape(nrows*ncols) im = np.copy(im_display) im[~im_buffer,0] = 0 im[~im_buffer,1] = 0 im[~im_buffer,2] = 0 plt.figure() plt.imshow(im) plt.draw() int_water = vec_ind[np.logical_and(vec_buffer,vec_water),:] int_sand = vec_ind[np.logical_and(vec_buffer,vec_sand),:] int_swash = vec_ind[np.logical_and(vec_buffer,vec_swash),:] fig, ax = plt.subplots(2,1, sharex=True) ax[0].hist(int_water[:,0], bins=100, label='water') ax[0].hist(int_sand[:,0], bins=100, alpha=0.5, label='sand') ax[0].hist(int_swash[:,0], bins=100, alpha=0.5, label='swash') ax[0].legend() ax[0].set_title('Water Index NIR-G') ax[1].hist(int_water[:,1], bins=100, label='water') ax[1].hist(int_sand[:,1], bins=100, alpha=0.5, label='sand') ax[1].hist(int_swash[:,1], bins=100, alpha=0.5, label='swash') ax[1].legend() ax[1].set_title('Modified Water Index SWIR-G') plt.draw() int_all = np.append(int_water,int_sand, axis=0) t1 = filters.threshold_otsu(int_all[:,0]) t2 = filters.threshold_otsu(int_all[:,1]) contours1 = measure.find_contours(im_ndwi, t1) contours2 = measure.find_contours(im_ndmwi, t1) plt.figure() plt.imshow(im_display) for i,contour in enumerate(contours1): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='c') for i,contour in enumerate(contours2): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='m') plt.draw() plt.figure() ax1 = plt.subplot(1,5,1) plt.imshow(im_display) plt.xticks([]) plt.yticks([]) plt.axis('off') plt.title('RGB') plt.subplot(1,5,2, sharex=ax1, sharey=ax1) plt.imshow(im_ndwi, cmap='seismic') plt.xticks([]) plt.yticks([]) plt.axis('off') plt.title('NDWI') plt.subplot(1,5,3, sharex=ax1, sharey=ax1) plt.imshow(im_ndmwi, cmap='seismic') plt.xticks([]) plt.yticks([]) plt.axis('off') plt.title('NDMWI') plt.subplot(1,5,4, sharex=ax1, sharey=ax1) plt.imshow(im_nir, cmap='seismic') plt.xticks([]) plt.yticks([]) plt.axis('off') plt.title('NIR') plt.subplot(1,5,5, sharex=ax1, sharey=ax1) plt.imshow(im_swir, cmap='seismic') plt.xticks([]) plt.yticks([]) plt.axis('off') plt.title('SWIR') fig, (ax1,ax2,ax3,ax4) = plt.subplots(4,2, figsize = (8,6)) ax1[0].set_title('Probability density function') ax1[1].set_title('Cumulative distribution') im = im_ndwi t1 = filters.threshold_otsu(im) vals = ax1[0].hist(im.reshape(nrows*ncols), bins=300) ax1[0].plot([t1, t1],[0, np.max(vals[0])], 'r-', label='Otsu threshold') vals = ax1[1].hist(im.reshape(nrows*ncols), bins=300, cumulative=True, histtype='step') ax1[1].plot([t1, t1],[0, np.max(vals[0])], 'r-', label='Otsu threshold') ax1[0].set_ylabel('NDWI') im = im_ndmwi t1 = filters.threshold_otsu(im) vals = ax2[0].hist(im.reshape(nrows*ncols), bins=300) ax2[0].plot([t1, t1],[0, np.max(vals[0])], 'r-', label='Otsu threshold') vals = ax2[1].hist(im.reshape(nrows*ncols), bins=300, cumulative=True, histtype='step') ax2[1].plot([t1, t1],[0, np.max(vals[0])], 'r-', label='Otsu threshold') ax2[0].set_ylabel('NDMWI') im = im_nir t1 = filters.threshold_otsu(im) vals = ax3[0].hist(im.reshape(nrows*ncols), bins=300) ax3[0].plot([t1, t1],[0, np.max(vals[0])], 'r-', label='Otsu threshold') vals = ax3[1].hist(im.reshape(nrows*ncols), bins=300, cumulative=True, histtype='step') ax3[1].plot([t1, t1],[0, np.max(vals[0])], 'r-', label='Otsu threshold') ax3[0].set_ylabel('NIR') im = im_swir t1 = filters.threshold_otsu(im) vals = ax4[0].hist(im.reshape(nrows*ncols), bins=300) ax4[0].plot([t1, t1],[0, np.max(vals[0])], 'r-', label='Otsu threshold') vals = ax4[1].hist(im.reshape(nrows*ncols), bins=300, cumulative=True, histtype='step') ax4[1].plot([t1, t1],[0, np.max(vals[0])], 'r-', label='Otsu threshold') ax4[0].set_ylabel('SWIR') plt.draw() #%% # calculate NDWI im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, plot_bool) # detect edges wl_pix = sds.find_wl_contours(im_ndwi, cloud_mask, min_contour_points, True) # convert from pixels to world coordinates wl_coords = sds.convert_pix2world(wl_pix, georef) # convert to output epsg spatial reference wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg) # classify sand pixels im_sand = sds.classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, False, min_beach_size, plot_bool) # plot a figure to select the correct water line and discard cloudy images plt.figure() cmap = cm.get_cmap('jet') plt.subplot(121) plt.imshow(im_ms_ps[:,:,[2,1,0]]) for j,contour in enumerate(wl_pix): colours = cmap(np.linspace(0, 1, num=len(wl_pix))) plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color=colours[j,:]) plt.axis('image') plt.title(file_names_pan[i]) plt.subplot(122) centroids = [] for j,contour in enumerate(wl): colours = cmap(np.linspace(0, 1, num=len(wl))) centroids.append([np.mean(contour[:, 0]),np.mean(contour[:, 1])]) plt.plot(contour[:, 0], contour[:, 1], linewidth=2, color=colours[j,:]) plt.plot(np.mean(contour[:, 0]), np.mean(contour[:, 1]), 'o', color=colours[j,:]) plt.plot(refpoints[:,0], refpoints[:,1], 'k.') plt.axis('equal') plt.title(file_names_pan[i]) mng = plt.get_current_fig_manager() mng.window.showMaximized() plt.tight_layout() plt.draw() # click on the left image to discard, otherwise on the closest centroid in the right image pt_in = np.array(ginput(n=1, timeout=1000)) if pt_in[0][0] < 10000: print('skip ' + str(i) + ' - manual') idx_skipped.append(i) continue # get contour that was selected (click closest to centroid) dist_centroid = [np.linalg.norm(_ - pt_in) for _ in centroids] shorelines.append(wl[np.argmin(dist_centroid)]) # plot all shorelines plt.figure() plt.axis('equal') for j in range(len(shorelines)): plt.plot(shorelines[j][:,0], shorelines[j][:,1]) plt.draw() output = {'t':t, 'shorelines':shorelines, 'cloud_cover':cloud_cover_ts, 'acc_georef':acc_georef_ts} #with open(os.path.join(filepath, sitename + '_output' + '.pkl'), 'wb') as f: # pickle.dump(output, f) # #with open(os.path.join(filepath, sitename + '_skipped' + '.pkl'), 'wb') as f: # pickle.dump(idx_skipped, f) # #with open(os.path.join(filepath, sitename + '_idxnocloud' + '.pkl'), 'wb') as f: # pickle.dump(idx_nocloud, f)