"""This module contains all the functions needed for extracting satellite-derived shorelines (SDS) Author: Kilian Vos, Water Research Laboratory, University of New South Wales """ # Initial settings import os import numpy as np import matplotlib.pyplot as plt import pdb # other modules from osgeo import gdal, ogr, osr import scipy.interpolate as interpolate from datetime import datetime, timedelta import matplotlib.patches as mpatches import matplotlib.lines as mlines import matplotlib.cm as cm from matplotlib import gridspec from pylab import ginput import pickle # image processing modules import skimage.filters as filters import skimage.exposure as exposure import skimage.transform as transform import sklearn.decomposition as decomposition import skimage.measure as measure import skimage.morphology as morphology # machine learning modules from sklearn.externals import joblib from shapely.geometry import LineString import SDS_tools, SDS_preprocess np.seterr(all='ignore') # raise/ignore divisions by 0 and nans def nd_index(im1, im2, cloud_mask): """ Computes normalised difference index on 2 images (2D), given a cloud mask (2D). KV WRL 2018 Arguments: ----------- im1, im2: np.array Images (2D) with which to calculate the ND index cloud_mask: np.array 2D cloud mask with True where cloud pixels are Returns: ----------- im_nd: np.array Image (2D) containing the ND index """ # reshape the cloud mask vec_mask = cloud_mask.reshape(im1.shape[0] * im1.shape[1]) # initialise with NaNs vec_nd = np.ones(len(vec_mask)) * np.nan # reshape the two images vec1 = im1.reshape(im1.shape[0] * im1.shape[1]) vec2 = im2.reshape(im2.shape[0] * im2.shape[1]) # compute the normalised difference index temp = np.divide(vec1[~vec_mask] - vec2[~vec_mask], vec1[~vec_mask] + vec2[~vec_mask]) vec_nd[~vec_mask] = temp # reshape into image im_nd = vec_nd.reshape(im1.shape[0], im1.shape[1]) return im_nd def classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size): """ Classifies every pixel in the image in one of 4 classes: - sand --> label = 1 - whitewater (breaking waves and swash) --> label = 2 - water --> label = 3 - other (vegetation, buildings, rocks...) --> label = 0 The classifier is a Neural Network, trained with 7000 pixels for the class SAND and 1500 pixels for each of the other classes. This is because the class of interest for my application is SAND and I wanted to minimize the classification error for that class. KV WRL 2018 Arguments: ----------- im_ms_ps: np.array Pansharpened RGB + downsampled NIR and SWIR im_pan: Panchromatic band cloud_mask: np.array 2D cloud mask with True where cloud pixels are plot_bool: boolean True if plot is wanted Returns: ----------- im_classif: np.array 2D image containing labels im_labels: np.array of booleans 3D image containing a boolean image for each class (im_classif == label) """ # load classifier clf = joblib.load('.\\classifiers\\NN_4classes_withpan.pkl') # calculate features n_features = 10 im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features)) im_features[:,:,[0,1,2,3,4]] = im_ms_ps im_features[:,:,5] = im_pan im_features[:,:,6] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False) # (NIR-G) im_features[:,:,7] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask, False) # ND(NIR-R) im_features[:,:,8] = nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask, False) # ND(B-R) im_features[:,:,9] = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) # ND(SWIR-G) # remove NaNs and clouds vec_features = im_features.reshape((im_ms_ps.shape[0] * im_ms_ps.shape[1], n_features)) vec_cloud = cloud_mask.reshape(cloud_mask.shape[0]*cloud_mask.shape[1]) vec_nan = np.any(np.isnan(vec_features), axis=1) vec_mask = np.logical_or(vec_cloud, vec_nan) vec_features = vec_features[~vec_mask, :] # predict with NN classifier labels = clf.predict(vec_features) # recompose image vec_classif = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1])) vec_classif[~vec_mask] = labels im_classif = vec_classif.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1])) # labels im_sand = im_classif == 1 # remove small patches of sand im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2) im_swash = im_classif == 2 im_water = im_classif == 3 im_labels = np.stack((im_sand,im_swash,im_water), axis=-1) return im_classif, im_labels def classify_image_NN_nopan(im_ms_ps, cloud_mask, min_beach_size): """ To be used for multispectral images that do not have a panchromatic band (L5 and S2). Classifies every pixel in the image in one of 4 classes: - sand --> label = 1 - whitewater (breaking waves and swash) --> label = 2 - water --> label = 3 - other (vegetation, buildings, rocks...) --> label = 0 The classifier is a Neural Network, trained with 7000 pixels for the class SAND and 1500 pixels for each of the other classes. This is because the class of interest for my application is SAND and I wanted to minimize the classification error for that class. KV WRL 2018 Arguments: ----------- im_ms_ps: np.array Pansharpened RGB + downsampled NIR and SWIR im_pan: Panchromatic band cloud_mask: np.array 2D cloud mask with True where cloud pixels are Returns: ----------- im_classif: np.ndarray 2D image containing labels im_labels: np.ndarray of booleans 3D image containing a boolean image for each class (im_classif == label) """ # load classifier clf = joblib.load('.\\classifiers\\NN_4classes_nopan.pkl') # calculate features n_features = 9 im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features)) im_features[:,:,[0,1,2,3,4]] = im_ms_ps im_features[:,:,5] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask) # (NIR-G) im_features[:,:,6] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask) # ND(NIR-R) im_features[:,:,7] = nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask) # ND(B-R) im_features[:,:,8] = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask) # ND(SWIR-G) # remove NaNs and clouds vec_features = im_features.reshape((im_ms_ps.shape[0] * im_ms_ps.shape[1], n_features)) vec_cloud = cloud_mask.reshape(cloud_mask.shape[0]*cloud_mask.shape[1]) vec_nan = np.any(np.isnan(vec_features), axis=1) vec_mask = np.logical_or(vec_cloud, vec_nan) vec_features = vec_features[~vec_mask, :] # predict with NN classifier labels = clf.predict(vec_features) # recompose image vec_classif = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1])) vec_classif[~vec_mask] = labels im_classif = vec_classif.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1])) # labels im_sand = im_classif == 1 # remove small patches of sand im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2) im_swash = im_classif == 2 im_water = im_classif == 3 im_labels = np.stack((im_sand,im_swash,im_water), axis=-1) return im_classif, im_labels def find_wl_contours1(im_ndwi, cloud_mask): """ Traditional method for shorelien detection. Finds the water line by thresholding the Normalized Difference Water Index and applying the Marching Squares Algorithm to contour the iso-value corresponding to the threshold. KV WRL 2018 Arguments: ----------- im_ndwi: np.ndarray Image (2D) with the NDWI (water index) cloud_mask: np.ndarray 2D cloud mask with True where cloud pixels are Returns: ----------- contours_wl: list of np.arrays contains the (row,column) coordinates of the contour lines """ # reshape image to vector vec_ndwi = im_ndwi.reshape(im_ndwi.shape[0] * im_ndwi.shape[1]) vec_mask = cloud_mask.reshape(cloud_mask.shape[0] * cloud_mask.shape[1]) vec = vec_ndwi[~vec_mask] # apply otsu's threshold vec = vec[~np.isnan(vec)] t_otsu = filters.threshold_otsu(vec) # use Marching Squares algorithm to detect contours on ndwi image contours = measure.find_contours(im_ndwi, t_otsu) # remove contours that have nans (due to cloud pixels in the contour) contours_nonans = [] for k in range(len(contours)): if np.any(np.isnan(contours[k])): index_nan = np.where(np.isnan(contours[k]))[0] contours_temp = np.delete(contours[k], index_nan, axis=0) if len(contours_temp) > 1: contours_nonans.append(contours_temp) else: contours_nonans.append(contours[k]) contours = contours_nonans return contours def find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size): """ New robust method for extracting shorelines. Incorporates the classification component to refube the treshold and make it specific to the sand/water interface. KV WRL 2018 Arguments: ----------- im_ms_ps: np.array Pansharpened RGB + downsampled NIR and SWIR im_labels: np.array 3D image containing a boolean image for each class in the order (sand, swash, water) cloud_mask: np.array 2D cloud mask with True where cloud pixels are buffer_size: int size of the buffer around the sandy beach Returns: ----------- contours_wi: list of np.arrays contains the (row,column) coordinates of the contour lines extracted with the NDWI (Normalized Difference Water Index) contours_mwi: list of np.arrays contains the (row,column) coordinates of the contour lines extracted with the MNDWI (Modified Normalized Difference Water Index) """ nrows = cloud_mask.shape[0] ncols = cloud_mask.shape[1] # calculate Normalized Difference Modified Water Index (SWIR - G) im_mwi = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask) # calculate Normalized Difference Modified Water Index (NIR - G) im_wi = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask) # stack indices together im_ind = np.stack((im_wi, im_mwi), axis=-1) vec_ind = im_ind.reshape(nrows*ncols,2) # reshape labels into vectors vec_sand = im_labels[:,:,0].reshape(ncols*nrows) vec_water = im_labels[:,:,2].reshape(ncols*nrows) # create a buffer around the sandy beach se = morphology.disk(buffer_size) im_buffer = morphology.binary_dilation(im_labels[:,:,0], se) vec_buffer = im_buffer.reshape(nrows*ncols) # select water/sand/swash pixels that are within the buffer int_water = vec_ind[np.logical_and(vec_buffer,vec_water),:] int_sand = vec_ind[np.logical_and(vec_buffer,vec_sand),:] # make sure both classes have the same number of pixels before thresholding if len(int_water) > 0 and len(int_sand) > 0: if np.argmin([int_sand.shape[0],int_water.shape[0]]) == 1: if (int_sand.shape[0] - int_water.shape[0])/int_water.shape[0] > 0.5: int_sand = int_sand[np.random.randint(0,int_sand.shape[0],int_water.shape[0]),:] else: if (int_water.shape[0] - int_sand.shape[0])/int_sand.shape[0] > 0.5: int_water = int_water[np.random.randint(0,int_water.shape[0],int_sand.shape[0]),:] # threshold the sand/water intensities int_all = np.append(int_water,int_sand, axis=0) t_mwi = filters.threshold_otsu(int_all[:,0]) t_wi = filters.threshold_otsu(int_all[:,1]) # find contour with MS algorithm im_wi_buffer = np.copy(im_wi) im_wi_buffer[~im_buffer] = np.nan im_mwi_buffer = np.copy(im_mwi) im_mwi_buffer[~im_buffer] = np.nan contours_wi = measure.find_contours(im_wi_buffer, t_wi) contours_mwi = measure.find_contours(im_mwi, t_mwi) # remove contour points that are nans (around clouds) contours = contours_wi contours_nonans = [] for k in range(len(contours)): if np.any(np.isnan(contours[k])): index_nan = np.where(np.isnan(contours[k]))[0] contours_temp = np.delete(contours[k], index_nan, axis=0) if len(contours_temp) > 1: contours_nonans.append(contours_temp) else: contours_nonans.append(contours[k]) contours_wi = contours_nonans contours = contours_mwi contours_nonans = [] for k in range(len(contours)): if np.any(np.isnan(contours[k])): index_nan = np.where(np.isnan(contours[k]))[0] contours_temp = np.delete(contours[k], index_nan, axis=0) if len(contours_temp) > 1: contours_nonans.append(contours_temp) else: contours_nonans.append(contours[k]) contours_mwi = contours_nonans return contours_wi, contours_mwi def process_shoreline(contours, georef, image_epsg, settings): # convert pixel coordinates to world coordinates contours_world = SDS_tools.convert_pix2world(contours, georef) # convert world coordinates to desired spatial reference system contours_epsg = SDS_tools.convert_epsg(contours_world, image_epsg, settings['output_epsg']) # remove contours that have a perimeter < min_length_wl (provided in settings dict) # this enable to remove the very small contours that do not correspond to the shoreline contours_long = [] for l, wl in enumerate(contours_epsg): coords = [(wl[k,0], wl[k,1]) for k in range(len(wl))] a = LineString(coords) # shapely LineString structure if a.length >= settings['min_length_sl']: contours_long.append(wl) # format points into np.array x_points = np.array([]) y_points = np.array([]) for k in range(len(contours_long)): x_points = np.append(x_points,contours_long[k][:,0]) y_points = np.append(y_points,contours_long[k][:,1]) contours_array = np.transpose(np.array([x_points,y_points])) # if reference shoreline has been manually digitised if 'refsl' in settings.keys(): # only keep the points that are at a certain distance (define in settings) from the # reference shoreline, enables to avoid false detections and remove obvious outliers temp = np.zeros((len(contours_array))).astype(bool) for k in range(len(settings['refsl'])): temp = np.logical_or(np.linalg.norm(contours_array - settings['refsl'][k,[0,1]], axis=1) < settings['max_dist_ref'], temp) shoreline = contours_array[temp] else: shoreline = contours_array return shoreline def show_detection(im_ms, cloud_mask, im_labels, shoreline,image_epsg, georef, settings, date, satname): # subfolder to store the .jpg files filepath = os.path.join(os.getcwd(), 'data', settings['sitename'], 'jpg_files', 'detection') # display RGB image im_RGB = SDS_preprocess.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9) # display classified image im_class = np.copy(im_RGB) cmap = cm.get_cmap('tab20c') colorpalette = cmap(np.arange(0,13,1)) colours = np.zeros((3,4)) colours[0,:] = colorpalette[5] colours[1,:] = np.array([204/255,1,1,1]) colours[2,:] = np.array([0,91/255,1,1]) for k in range(0,im_labels.shape[2]): im_class[im_labels[:,:,k],0] = colours[k,0] im_class[im_labels[:,:,k],1] = colours[k,1] im_class[im_labels[:,:,k],2] = colours[k,2] # display MNDWI grayscale image im_mwi = nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask) # transform world coordinates of shoreline into pixel coordinates sl_pix = SDS_tools.convert_world2pix(SDS_tools.convert_epsg(shoreline, settings['output_epsg'], image_epsg)[:,[0,1]], georef) # make figure fig = plt.figure() gs = gridspec.GridSpec(1, 3) gs.update(bottom=0.05, top=0.95) ax1 = fig.add_subplot(gs[0,0]) plt.imshow(im_RGB) plt.plot(sl_pix[:,0], sl_pix[:,1], 'k--') plt.axis('off') ax1.set_anchor('W') btn_keep = plt.text(0, 0.9, 'keep', size=16, ha="left", va="top", transform=ax1.transAxes, bbox=dict(boxstyle="square", ec='k',fc='w')) btn_skip = plt.text(1, 0.9, 'skip', size=16, ha="right", va="top", transform=ax1.transAxes, bbox=dict(boxstyle="square", ec='k',fc='w')) plt.title('Click on if shoreline detection is correct. Click on if false detection') ax2 = fig.add_subplot(gs[0,1]) plt.imshow(im_class) plt.plot(sl_pix[:,0], sl_pix[:,1], 'k--') plt.axis('off') ax2.set_anchor('W') orange_patch = mpatches.Patch(color=colours[0,:], label='sand') white_patch = mpatches.Patch(color=colours[1,:], label='whitewater') blue_patch = mpatches.Patch(color=colours[2,:], label='water') black_line = mlines.Line2D([],[],color='k',linestyle='--', label='shoreline') plt.legend(handles=[orange_patch,white_patch,blue_patch, black_line], bbox_to_anchor=(1, 0.5), fontsize=9) ax3 = fig.add_subplot(gs[0,2]) plt.imshow(im_mwi, cmap='bwr') plt.plot(sl_pix[:,0], sl_pix[:,1], 'k--') plt.axis('off') cb = plt.colorbar() cb.ax.tick_params(labelsize=10) cb.set_label('MNDWI values') ax3.set_anchor('W') fig.set_size_inches([12.53, 9.3]) fig.set_tight_layout(True) mng = plt.get_current_fig_manager() mng.window.showMaximized() # wait for user's selection ( or ) pt = ginput(n=1, timeout=100, show_clicks=True) pt = np.array(pt) # if clicks next to , return skip_image = True if pt[0][0] > im_ms.shape[1]/2: skip_image = True plt.close() else: skip_image = False ax1.set_title(date + ' ' + satname) btn_skip.set_visible(False) btn_keep.set_visible(False) fig.savefig(os.path.join(filepath, date + '_' + satname + '.jpg'), dpi=150) plt.close() return skip_image def extract_shorelines(metadata, settings): sitename = settings['sitename'] # initialise output structure out = dict([]) # create a subfolder to store the .jpg images showing the detection filepath_jpg = os.path.join(os.getcwd(), 'data', sitename, 'jpg_files', 'detection') try: os.makedirs(filepath_jpg) except: print('') # loop through satellite list for satname in metadata.keys(): # access the images if satname == 'L5': # access downloaded Landsat 5 images filepath = os.path.join(os.getcwd(), 'data', sitename, satname, '30m') filenames = os.listdir(filepath) elif satname == 'L7': # access downloaded Landsat 7 images filepath_pan = os.path.join(os.getcwd(), 'data', sitename, 'L7', 'pan') filepath_ms = os.path.join(os.getcwd(), 'data', sitename, 'L7', 'ms') filenames_pan = os.listdir(filepath_pan) filenames_ms = os.listdir(filepath_ms) if (not len(filenames_pan) == len(filenames_ms)): raise 'error: not the same amount of files for pan and ms' filepath = [filepath_pan, filepath_ms] filenames = filenames_pan elif satname == 'L8': # access downloaded Landsat 7 images filepath_pan = os.path.join(os.getcwd(), 'data', sitename, 'L8', 'pan') filepath_ms = os.path.join(os.getcwd(), 'data', sitename, 'L8', 'ms') filenames_pan = os.listdir(filepath_pan) filenames_ms = os.listdir(filepath_ms) if (not len(filenames_pan) == len(filenames_ms)): raise 'error: not the same amount of files for pan and ms' filepath = [filepath_pan, filepath_ms] filenames = filenames_pan elif satname == 'S2': # access downloaded Sentinel 2 images filepath10 = os.path.join(os.getcwd(), 'data', sitename, satname, '10m') filenames10 = os.listdir(filepath10) filepath20 = os.path.join(os.getcwd(), 'data', sitename, satname, '20m') filenames20 = os.listdir(filepath20) filepath60 = os.path.join(os.getcwd(), 'data', sitename, satname, '60m') filenames60 = os.listdir(filepath60) if (not len(filenames10) == len(filenames20)) or (not len(filenames20) == len(filenames60)): raise 'error: not the same amount of files for 10, 20 and 60 m' filepath = [filepath10, filepath20, filepath60] filenames = filenames10 # initialise some variables out_timestamp = [] # datetime at which the image was acquired (UTC time) out_shoreline = [] # vector of shoreline points out_filename = [] # filename of the images from which the shorelines where derived out_cloudcover = [] # cloud cover of the images out_geoaccuracy = []# georeferencing accuracy of the images out_idxkeep = [] # index that were kept during the analysis (cloudy images are skipped) # loop through the images for i in range(len(filenames)): # get image filename fn = SDS_tools.get_filenames(filenames[i],filepath, satname) # preprocess image (cloud mask + pansharpening/downsampling) im_ms, georef, cloud_mask = SDS_preprocess.preprocess_single(fn, satname) # get image spatial reference system (epsg code) from metadata dict image_epsg = metadata[satname]['epsg'][i] # calculate cloud cover cloud_cover = np.divide(sum(sum(cloud_mask.astype(int))), (cloud_mask.shape[0]*cloud_mask.shape[1])) # skip image if cloud cover is above threshold if cloud_cover > settings['cloud_thresh']: continue # classify image in 4 classes (sand, whitewater, water, other) with NN classifier im_classif, im_labels = classify_image_NN_nopan(im_ms, cloud_mask, settings['min_beach_size']) # extract water line contours # if there aren't any sandy pixels, use find_wl_contours1 (traditional method), # otherwise use find_wl_contours2 (enhanced method with classification) if sum(sum(im_labels[:,:,0])) == 0 : # compute MNDWI (SWIR-Green normalized index) grayscale image im_mndwi = nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask) # find water contourson MNDWI grayscale image contours_mwi = find_wl_contours1(im_mndwi, cloud_mask) else: # use classification to refine threshold and extract sand/water interface contours_wi, contours_mwi = find_wl_contours2(im_ms, im_labels, cloud_mask, settings['buffer_size']) # extract clean shoreline from water contours shoreline = process_shoreline(contours_mwi, georef, image_epsg, settings) if settings['check_detection']: date = filenames[i][:10] skip_image = show_detection(im_ms, cloud_mask, im_labels, shoreline, image_epsg, georef, settings, date, satname) if skip_image: continue # fill and save output structure out_timestamp.append(metadata[satname]['dates'][i]) out_shoreline.append(shoreline) out_filename.append(filenames[i]) out_cloudcover.append(cloud_cover) out_geoaccuracy.append(metadata[satname]['acc_georef'][i]) out_idxkeep.append(i) out[satname] = { 'timestamp': out_timestamp, 'shoreline': out_shoreline, 'filename': out_filename, 'cloudcover': out_cloudcover, 'geoaccuracy': out_geoaccuracy, 'idxkeep': out_idxkeep } # add some metadata out['meta'] = { 'timestamp': 'UTC time', 'shoreline': 'coordinate system epsg : ' + str(settings['output_epsg']), 'cloudcover': 'calculated on the cropped image', 'geoaccuracy': 'RMSE error based on GCPs', 'idxkeep': 'indices of the images that were kept to extract a shoreline' } # save output structure as out.pkl filepath = os.path.join(os.getcwd(), 'data', sitename) with open(os.path.join(filepath, sitename + '_out.pkl'), 'wb') as f: pickle.dump(out, f) return out