From 6f3555cbfc85e8b9e46cfaf92c8f588b6d11edf4 Mon Sep 17 00:00:00 2001 From: Kilian Vos Date: Sun, 30 Sep 2018 16:52:06 +1000 Subject: [PATCH] Supprimer 'functions/sds.py' --- functions/sds.py | 1206 ---------------------------------------------- 1 file changed, 1206 deletions(-) delete mode 100644 functions/sds.py diff --git a/functions/sds.py b/functions/sds.py deleted file mode 100644 index 7a32837..0000000 --- a/functions/sds.py +++ /dev/null @@ -1,1206 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Thu Mar 1 11:20:35 2018 - -@author: z5030440 -""" - -"""This module contains all the functions needed for extracting satellite derived shoreline (SDS) """ - -# Initial settings -import numpy as np -import matplotlib.pyplot as plt -import matplotlib.patches as mpatches -from matplotlib import gridspec -import pdb -import ee - -# other modules -from osgeo import gdal, ogr, osr -import tempfile -from urllib.request import urlretrieve -import zipfile -import scipy.interpolate as interpolate - -# image processing modules -import skimage.filters as filters -import skimage.exposure as exposure -import skimage.transform as transform -import sklearn.decomposition as decomposition -import skimage.measure as measure -import skimage.morphology as morphology - -# machine learning modules -from sklearn.cluster import KMeans -from sklearn.neural_network import MLPClassifier -from sklearn.externals import joblib - - -# import own modules -from functions.utils import * - - -# Download from ee server function - -def download_tif(image, polygon, bandsId, filepath): - """downloads tif image (region and bands) from the ee server and stores it in a temp file""" - url = ee.data.makeDownloadUrl(ee.data.getDownloadId({ - 'image': image.serialize(), - 'region': polygon, - 'bands': bandsId, - 'filePerBand': 'false', - 'name': 'data', - })) - local_zip, headers = urlretrieve(url) - with zipfile.ZipFile(local_zip) as local_zipfile: - return local_zipfile.extract('data.tif', filepath) - -def create_cloud_mask(im_qa, satname, plot_bool): - """ - Creates a cloud mask from the image containing the QA band information - - KV WRL 2018 - - Arguments: - ----------- - im_qa: np.ndarray - Image containing the QA band - satname: string - short name for the satellite (L8, L7, S2) - plot_bool: boolean - True if plot is wanted - - Returns: - ----------- - cloud_mask : np.ndarray of booleans - A boolean array with True where the cloud are present - """ - - # convert QA bits - if satname == 'L8': - cloud_values = [2800, 2804, 2808, 2812, 6896, 6900, 6904, 6908] - elif satname == 'L7' or satname == 'L5' or satname == 'L4': - cloud_values = [752, 756, 760, 764] - elif satname == 'S2': - cloud_values = [1024, 2048] # 1024 = dense cloud, 2048 = cirrus clouds - - cloud_mask = np.isin(im_qa, cloud_values) - # remove isolated cloud pixels (there are some in the swash and they cause problems) - if sum(sum(cloud_mask)) > 0: - morphology.remove_small_objects(cloud_mask, min_size=10, connectivity=1, in_place=True) - - if plot_bool: - plt.figure() - plt.imshow(cloud_mask, cmap='gray') - plt.draw() - - #cloud_shadow_values = [2976, 2980, 2984, 2988, 3008, 3012, 3016, 3020] - #cloud_shadow_mask = np.isin(im_qa, cloud_shadow_values) - - return cloud_mask - -def rescale_image_intensity(im, cloud_mask, prob_high, plot_bool): - """ - Rescales the intensity of an image (multispectral or single band) by applying - a cloud mask and clipping the prob_high upper percentile. This functions allows - to stretch the contrast of an image. - - KV WRL 2018 - - Arguments: - ----------- - im: np.ndarray - Image to rescale, can be 3D (multispectral) or 2D (single band) - cloud_mask: np.ndarray - 2D cloud mask with True where cloud pixels are - prob_high: float - probability of exceedence used to calculate the upper percentile - plot_bool: boolean - True if plot is wanted - - Returns: - ----------- - im_adj: np.ndarray - The rescaled image - """ - prc_low = 0 # lower percentile - vec_mask = cloud_mask.reshape(im.shape[0] * im.shape[1]) - - if plot_bool: - plt.figure() - - if len(im.shape) > 2: - vec = im.reshape(im.shape[0] * im.shape[1], im.shape[2]) - vec_adj = np.ones((len(vec_mask), im.shape[2])) * np.nan - - for i in range(im.shape[2]): - prc_high = np.percentile(vec[~vec_mask, i], prob_high) - vec_rescaled = exposure.rescale_intensity(vec[~vec_mask, i], in_range=(prc_low, prc_high)) - vec_adj[~vec_mask,i] = vec_rescaled - - if plot_bool: - plt.subplot(np.floor(im.shape[2]/2) + 1, np.floor(im.shape[2]/2), i+1) - plt.hist(vec[~vec_mask, i], bins=200, label='original') - plt.hist(vec_rescaled, bins=200, alpha=0.5, label='rescaled') - plt.legend() - plt.title('Band' + str(i+1)) - plt.show() - - im_adj = vec_adj.reshape(im.shape[0], im.shape[1], im.shape[2]) - - if plot_bool: - plt.figure() - ax1 = plt.subplot(121) - plt.imshow(im[:,:,[2,1,0]]) - plt.axis('off') - plt.title('Original') - ax2 = plt.subplot(122, sharex=ax1, sharey=ax1) - plt.imshow(im_adj[:,:,[2,1,0]]) - plt.axis('off') - plt.title('Rescaled') - plt.show() - - else: - vec = im.reshape(im.shape[0] * im.shape[1]) - vec_adj = np.ones(len(vec_mask)) * np.nan - prc_high = np.percentile(vec[~vec_mask], prob_high) - vec_rescaled = exposure.rescale_intensity(vec[~vec_mask], in_range=(prc_low, prc_high)) - vec_adj[~vec_mask] = vec_rescaled - - if plot_bool: - plt.hist(vec[~vec_mask], bins=200, label='original') - plt.hist(vec_rescaled, bins=200, alpha=0.5, label='rescaled') - plt.legend() - plt.title('Single band') - plt.show() - - im_adj = vec_adj.reshape(im.shape[0], im.shape[1]) - - if plot_bool: - plt.figure() - ax1 = plt.subplot(121) - plt.imshow(im, cmap='gray') - plt.axis('off') - plt.title('Original') - ax2 = plt.subplot(122, sharex=ax1, sharey=ax1) - plt.imshow(im_adj, cmap='gray') - plt.axis('off') - plt.title('Rescaled') - plt.show() - - return im_adj - - -def hist_match(source, template): - """ - Adjust the pixel values of a grayscale image such that its histogram - matches that of a target image - - Arguments: - ----------- - source: np.ndarray - Image to transform; the histogram is computed over the flattened - array - template: np.ndarray - Template image; can have different dimensions to source - Returns: - ----------- - matched: np.ndarray - The transformed output image - """ - - oldshape = source.shape - source = source.ravel() - template = template.ravel() - - # get the set of unique pixel values and their corresponding indices and - # counts - s_values, bin_idx, s_counts = np.unique(source, return_inverse=True, - return_counts=True) - t_values, t_counts = np.unique(template, return_counts=True) - - # take the cumsum of the counts and normalize by the number of pixels to - # get the empirical cumulative distribution functions for the source and - # template images (maps pixel value --> quantile) - s_quantiles = np.cumsum(s_counts).astype(np.float64) - s_quantiles /= s_quantiles[-1] - t_quantiles = np.cumsum(t_counts).astype(np.float64) - t_quantiles /= t_quantiles[-1] - - # interpolate linearly to find the pixel values in the template image - # that correspond most closely to the quantiles in the source image - interp_t_values = np.interp(s_quantiles, t_quantiles, t_values) - - return interp_t_values[bin_idx].reshape(oldshape) - -def pansharpen(im_ms, im_pan, cloud_mask, plot_bool): - """ - Pansharpens a multispectral image (3D), using the panchromatic band (2D) - and a cloud mask - - KV WRL 2018 - - Arguments: - ----------- - im_ms: np.ndarray - Multispectral image to pansharpen (3D) - im_pan: np.ndarray - Panchromatic band (2D) - cloud_mask: np.ndarray - 2D cloud mask with True where cloud pixels are - plot_bool: boolean - True if plot is wanted - - Returns: - ----------- - im_ms_ps: np.ndarray - Pansharpened multisoectral image (3D) - """ - - # reshape image into vector and apply cloud mask - vec = im_ms.reshape(im_ms.shape[0] * im_ms.shape[1], im_ms.shape[2]) - vec_mask = cloud_mask.reshape(im_ms.shape[0] * im_ms.shape[1]) - vec = vec[~vec_mask, :] - # apply PCA to RGB bands - pca = decomposition.PCA() - vec_pcs = pca.fit_transform(vec) - - # replace 1st PC with pan band (after matching histograms) - vec_pan = im_pan.reshape(im_pan.shape[0] * im_pan.shape[1]) - vec_pan = vec_pan[~vec_mask] - -# plt.figure() -# ax1 = plt.subplot(131) -# plt.imshow(im_pan, cmap='gray') -# plt.title('Pan band') -# plt.subplot(132, sharex=ax1, sharey=ax1) -# plt.imshow(vec_pcs[:,0].reshape(im_pan.shape[0],im_pan.shape[1]), cmap='gray') -# plt.title('PC1') -# plt.subplot(133, sharex=ax1, sharey=ax1) -# plt.imshow(hist_match(vec_pan, vec_pcs[:,0]).reshape(im_pan.shape[0],im_pan.shape[1]), cmap='gray') -# plt.title('Pan band histmatched') -# -# plt.figure() -# plt.hist(hist_match(vec_pan, vec_pcs[:,0]), bins=300) -# plt.hist(vec_pcs[:,0], bins=300, alpha=0.5) -# plt.hist(vec_pan, bins=300, alpha=0.5) -# plt.draw() - - vec_pcs[:,0] = hist_match(vec_pan, vec_pcs[:,0]) - vec_ms_ps = pca.inverse_transform(vec_pcs) - - # reshape vector into image - vec_ms_ps_full = np.ones((len(vec_mask), im_ms.shape[2])) * np.nan - vec_ms_ps_full[~vec_mask,:] = vec_ms_ps - im_ms_ps = vec_ms_ps_full.reshape(im_ms.shape[0], im_ms.shape[1], im_ms.shape[2]) - - if plot_bool: - - plt.figure() - ax1 = plt.subplot(121) - plt.imshow(rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False)) - plt.axis('off') - plt.title('Original') - ax2 = plt.subplot(122, sharex=ax1, sharey=ax1) - plt.imshow(rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)) - plt.axis('off') - plt.title('Pansharpened') - plt.show() - - return im_ms_ps - -def nd_index(im1, im2, cloud_mask, plot_bool): - """ - Computes normalised difference index on 2 images (2D), given a cloud mask (2D) - - KV WRL 2018 - - Arguments: - ----------- - im1, im2: np.ndarray - Images (2D) with which to calculate the ND index - cloud_mask: np.ndarray - 2D cloud mask with True where cloud pixels are - plot_bool: boolean - True if plot is wanted - - Returns: ----------- - im_nd: np.ndarray - - Image (2D) containing the ND index - """ - vec_mask = cloud_mask.reshape(im1.shape[0] * im1.shape[1]) - vec_nd = np.ones(len(vec_mask)) * np.nan - vec1 = im1.reshape(im1.shape[0] * im1.shape[1]) - vec2 = im2.reshape(im2.shape[0] * im2.shape[1]) - temp = np.divide(vec1[~vec_mask] - vec2[~vec_mask], - vec1[~vec_mask] + vec2[~vec_mask]) - vec_nd[~vec_mask] = temp - im_nd = vec_nd.reshape(im1.shape[0], im1.shape[1]) - - if plot_bool: - plt.figure() - plt.imshow(im_nd, cmap='seismic') - plt.colorbar() - plt.title('Normalised index') - plt.show() - - return im_nd - -def find_wl_contours(im_ndwi, cloud_mask, plot_bool): - """ - Finds the water line by thresholding the Normalized Difference Water Index and applying the Marching - Squares Algorithm - - KV WRL 2018 - - Arguments: - ----------- - im_ndwi: np.ndarray - Image (2D) with the NDWI (water index) - cloud_mask: np.ndarray - 2D cloud mask with True where cloud pixels are - plot_bool: boolean - True if plot is wanted - - Returns: ----------- - contours_wl: list of np.arrays - contains the (row,column) coordinates of the contour lines - - """ - - # reshape image to vector - vec_ndwi = im_ndwi.reshape(im_ndwi.shape[0] * im_ndwi.shape[1]) - vec_mask = cloud_mask.reshape(cloud_mask.shape[0] * cloud_mask.shape[1]) - vec = vec_ndwi[~vec_mask] - # apply otsu's threshold - t_otsu = filters.threshold_otsu(vec) - # use Marching Squares algorithm to detect contours on ndwi image - contours = measure.find_contours(im_ndwi, t_otsu) - - # remove contour points that are nans - contours_nonans = [] - for k in range(len(contours)): - if np.any(np.isnan(contours[k])): - index_nan = np.where(np.isnan(contours[k]))[0] - contours_temp = np.delete(contours[k], index_nan, axis=0) - if len(contours_temp) > 1: - contours_nonans.append(contours_temp) - else: - contours_nonans.append(contours[k]) - contours = contours_nonans - - if plot_bool: - # plot otsu's histogram segmentation - plt.figure() - vals = plt.hist(vec, bins=200) - plt.plot([t_otsu, t_otsu],[0, np.max(vals[0])], 'r-', label='Otsu threshold') - plt.legend() - plt.show() - - # plot the water line contours on top of water index - plt.figure() - plt.imshow(im_ndwi, cmap='seismic') - plt.colorbar() - for i,contour in enumerate(contours): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k') - plt.axis('image') - plt.title('Detected water lines') - plt.show() - - return contours - -def convert_pix2world(points, crs_vec): - """ - Converts pixel coordinates (row,columns) to world projected coordinates - performing an affine transformation - - KV WRL 2018 - - Arguments: - ----------- - points: np.ndarray or list of np.ndarray - array with 2 columns (rows first and columns second) - crs_vec: np.ndarray - vector of 6 elements [Xtr, Xscale, Xshear, Ytr, Yshear, Yscale] - - Returns: ----------- - points_converted: np.ndarray or list of np.ndarray - converted coordinates, first columns with X and second column with Y - - """ - - # make affine transformation matrix - aff_mat = np.array([[crs_vec[1], crs_vec[2], crs_vec[0]], - [crs_vec[4], crs_vec[5], crs_vec[3]], - [0, 0, 1]]) - # create affine transformation - tform = transform.AffineTransform(aff_mat) - - if type(points) is list: - points_converted = [] - # iterate over the list - for i, arr in enumerate(points): - tmp = arr[:,[1,0]] - points_converted.append(tform(tmp)) - - elif type(points) is np.ndarray: - tmp = points[:,[1,0]] - points_converted = tform(tmp) - - else: - print('invalid input type') - raise - - return points_converted - -def convert_world2pix(points, crs_vec): - """ - Converts world projected coordinates (X,Y) to image coordinates (row,column) - performing an affine transformation - - KV WRL 2018 - - Arguments: - ----------- - points: np.ndarray or list of np.ndarray - array with 2 columns (rows first and columns second) - crs_vec: np.ndarray - vector of 6 elements [Xtr, Xscale, Xshear, Ytr, Yshear, Yscale] - - Returns: ----------- - points_converted: np.ndarray or list of np.ndarray - converted coordinates, first columns with row and second column with column - - """ - - # make affine transformation matrix - aff_mat = np.array([[crs_vec[1], crs_vec[2], crs_vec[0]], - [crs_vec[4], crs_vec[5], crs_vec[3]], - [0, 0, 1]]) - # create affine transformation - tform = transform.AffineTransform(aff_mat) - - if type(points) is list: - points_converted = [] - # iterate over the list - for i, arr in enumerate(points): - points_converted.append(tform.inverse(points)) - - elif type(points) is np.ndarray: - points_converted = tform.inverse(points) - - else: - print('invalid input type') - raise - - return points_converted - - -def convert_epsg(points, epsg_in, epsg_out): - """ - Converts from one spatial reference to another using the epsg codes - - KV WRL 2018 - - Arguments: - ----------- - points: np.ndarray or list of np.ndarray - array with 2 columns (rows first and columns second) - epsg_in: int - epsg code of the spatial reference in which the input is - epsg_out: int - epsg code of the spatial reference in which the output will be - - Returns: ----------- - points_converted: np.ndarray or list of np.ndarray - converted coordinates - - """ - - # define input and output spatial references - inSpatialRef = osr.SpatialReference() - inSpatialRef.ImportFromEPSG(epsg_in) - outSpatialRef = osr.SpatialReference() - outSpatialRef.ImportFromEPSG(epsg_out) - # create a coordinates transform - coordTransform = osr.CoordinateTransformation(inSpatialRef, outSpatialRef) - # transform points - if type(points) is list: - points_converted = [] - # iterate over the list - for i, arr in enumerate(points): - points_converted.append(np.array(coordTransform.TransformPoints(arr))) - - elif type(points) is np.ndarray: - points_converted = np.array(coordTransform.TransformPoints(points)) - - else: - print('invalid input type') - raise - - return points_converted - -def classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, buffer_size, min_beach_size, plot_bool): - """ - Classifies sand pixels using an unsupervised algorithm (Kmeans) - Set buffer size to False if you want to classify the entire image, - otherwise buffer size defines the buffer around the shoreline in which - pixels are considered for classification. - This classification is not robust and is only used to train a supervised algorithm - - KV WRL 2018 - - Arguments: - ----------- - im_ms_ps: np.ndarray - Pansharpened RGB + downsampled NIR and SWIR - im_pan: - Panchromatic band - cloud_mask: np.ndarray - 2D cloud mask with True where cloud pixels are - wl_pix: list of np.ndarray - list of arrays containig the pixel coordinates of the water line - buffer_size: int or False - radius of the disk used to create a buffer around the water line - when False, the entire image is considered for kmeans - min_beach_size: int - minimum number of connected pixels belonging to a single beach - plot_bool: boolean - True if plot is wanted - - Returns: ----------- - im_sand: np.ndarray - 2D binary image containing True where sand pixels are located - - """ - # reshape the 2D images into vectors - vec_ms_ps = im_ms_ps.reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1], im_ms_ps.shape[2]) - vec_pan = im_pan.reshape(im_pan.shape[0]*im_pan.shape[1]) - vec_mask = cloud_mask.reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1]) - # add B,G,R,NIR and pan bands to the vector of features - vec_features = np.zeros((vec_ms_ps.shape[0], 5)) - vec_features[:,[0,1,2,3]] = vec_ms_ps[:,[0,1,2,3]] - vec_features[:,4] = vec_pan - - if buffer_size: - # create binary image with ones where the detected water lines is - im_buffer = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1])) - for i, contour in enumerate(wl_pix): - indices = [(int(_[0]), int(_[1])) for _ in list(np.round(contour))] - for j, idx in enumerate(indices): - im_buffer[idx] = 1 - # perform a dilation on the binary image - se = morphology.disk(buffer_size) - im_buffer = morphology.binary_dilation(im_buffer, se) - vec_buffer = (im_buffer == 1).reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1]) - else: - vec_buffer = np.ones((vec_pan.shape[0])) - # add cloud mask to buffer - vec_buffer= np.logical_and(vec_buffer, ~vec_mask) - # perform kmeans (6 clusters) - kmeans = KMeans(n_clusters=6, random_state=0).fit(vec_features[vec_buffer,:]) - labels = np.ones((len(vec_mask))) * np.nan - labels[vec_buffer] = kmeans.labels_ - im_labels = labels.reshape(im_ms_ps.shape[0], im_ms_ps.shape[1]) - # find the class with maximum reflection in the B,G,R,Pan - im_sand = im_labels == np.argmax(np.mean(kmeans.cluster_centers_[:,[0,1,2,4]], axis=1)) - im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2) - im_sand = morphology.binary_erosion(im_sand, morphology.disk(1)) -# im_sand = morphology.binary_dilation(im_sand, morphology.disk(1)) - - if plot_bool: - im = np.copy(rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)) - im[im_sand,0] = 0 - im[im_sand,1] = 0 - im[im_sand,2] = 1 - plt.figure() - plt.imshow(im) - plt.axis('image') - plt.title('Sand classification') - plt.show() - - return im_sand - -def classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size, plot_bool): - """ - Classifies every pixel in the image in one of 4 classes: - - sand --> label = 1 - - whitewater (breaking waves and swash) --> label = 2 - - water --> label = 3 - - other (vegetation, buildings, rocks...) --> label = 0 - - The classifier is a Neural Network, trained with 7000 pixels for the class SAND and 1500 pixels for - each of the other classes. This is because the class of interest for my application is SAND and I - wanted to minimize the classification error for that class - - KV WRL 2018 - - Arguments: - ----------- - im_ms_ps: np.ndarray - Pansharpened RGB + downsampled NIR and SWIR - im_pan: - Panchromatic band - cloud_mask: np.ndarray - 2D cloud mask with True where cloud pixels are - plot_bool: boolean - True if plot is wanted - - Returns: ----------- - im_classif: np.ndarray - 2D image containing labels - im_labels: np.ndarray of booleans - 3D image containing a boolean image for each class (im_classif == label) - - """ - - # load classifier - clf = joblib.load('functions/NeuralNet_classif.pkl') - - # calculate features - n_features = 10 - im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features)) - im_features[:,:,[0,1,2,3,4]] = im_ms_ps - im_features[:,:,5] = im_pan - im_features[:,:,6] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False) # (NIR-G) - im_features[:,:,7] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask, False) # ND(NIR-R) - im_features[:,:,8] = nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask, False) # ND(B-R) - im_features[:,:,9] = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) # ND(SWIR-G) - # remove NaNs and clouds - vec_features = im_features.reshape((im_ms_ps.shape[0] * im_ms_ps.shape[1], n_features)) - vec_cloud = cloud_mask.reshape(cloud_mask.shape[0]*cloud_mask.shape[1]) - vec_nan = np.any(np.isnan(vec_features), axis=1) - vec_mask = np.logical_or(vec_cloud, vec_nan) - vec_features = vec_features[~vec_mask, :] - # predict with NN classifier - labels = clf.predict(vec_features) - # recompose image - vec_classif = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1])) - vec_classif[~vec_mask] = labels - im_classif = vec_classif.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1])) - - # labels - im_sand = im_classif == 1 - im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2) - im_swash = im_classif == 2 - im_water = im_classif == 3 - im_labels = np.stack((im_sand,im_swash,im_water), axis=-1) - - - # only select the patches that are beaches -# try: -# labels_sand = measure.label(im_sand) -# values = np.unique(labels_sand) -# se = morphology.disk(5) -# im_sand_new = np.zeros((im_ms_ps.shape[0],im_ms_ps.shape[1])).astype('bool') -# counter = 0 -# for j in range(1,len(values)): -# patch_sand = labels_sand == values[j] -# im_buffer = morphology.binary_dilation(patch_sand, se) -# sum_inter = sum(sum(np.logical_and(im_buffer,im_swash))) -# if sum_inter >= 20: -# im_sand_new = np.logical_or(im_sand_new, patch_sand) -# counter = counter + 1 -# if counter >= 1: -# im_labels[:,:,0] = im_sand_new -# except: -# print('nothing') - - if plot_bool: - # display on top of pansharpened RGB - im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False) - im = np.copy(im_display) - # define colours for plot - colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]]) - for k in range(0,im_labels.shape[2]): - im[im_labels[:,:,k],0] = colours[k,0] - im[im_labels[:,:,k],1] = colours[k,1] - im[im_labels[:,:,k],2] = colours[k,2] - - plt.figure() - ax1 = plt.subplot(121) - plt.imshow(im_display) - plt.axis('off') - plt.title('Image') - ax2 = plt.subplot(122, sharex=ax1, sharey=ax1) - plt.imshow(im) - plt.axis('off') - plt.title('NN classifier') - mng = plt.get_current_fig_manager() - mng.window.showMaximized() - plt.tight_layout() - plt.draw() - - return im_classif, im_labels - -def classify_image_NN_nopan(im_ms_ps, cloud_mask, min_beach_size, plot_bool): - """ - Classifies every pixel in the image in one of 4 classes: - - sand --> label = 1 - - whitewater (breaking waves and swash) --> label = 2 - - water --> label = 3 - - other (vegetation, buildings, rocks...) --> label = 0 - - The classifier is a Neural Network, trained with 7000 pixels for the class SAND and 1500 pixels for - each of the other classes. This is because the class of interest for my application is SAND and I - wanted to minimize the classification error for that class - - KV WRL 2018 - - Arguments: - ----------- - im_ms_ps: np.ndarray - Pansharpened RGB + downsampled NIR and SWIR - im_pan: - Panchromatic band - cloud_mask: np.ndarray - 2D cloud mask with True where cloud pixels are - plot_bool: boolean - True if plot is wanted - - Returns: ----------- - im_classif: np.ndarray - 2D image containing labels - im_labels: np.ndarray of booleans - 3D image containing a boolean image for each class (im_classif == label) - - """ - - # load classifier - clf = joblib.load('functions/NeuralNet_classif_nopan.pkl') - - # calculate features - n_features = 9 - im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features)) - im_features[:,:,[0,1,2,3,4]] = im_ms_ps - im_features[:,:,5] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False) # (NIR-G) - im_features[:,:,6] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask, False) # ND(NIR-R) - im_features[:,:,7] = nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask, False) # ND(B-R) - im_features[:,:,8] = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) # ND(SWIR-G) - # remove NaNs and clouds - vec_features = im_features.reshape((im_ms_ps.shape[0] * im_ms_ps.shape[1], n_features)) - vec_cloud = cloud_mask.reshape(cloud_mask.shape[0]*cloud_mask.shape[1]) - vec_nan = np.any(np.isnan(vec_features), axis=1) - vec_mask = np.logical_or(vec_cloud, vec_nan) - vec_features = vec_features[~vec_mask, :] - # predict with NN classifier - labels = clf.predict(vec_features) - - # recompose image - vec_classif = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1])) - vec_classif[~vec_mask] = labels - im_classif = vec_classif.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1])) - - # labels - im_sand = im_classif == 1 - im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2) - im_swash = im_classif == 2 - im_water = im_classif == 3 - im_labels = np.stack((im_sand,im_swash,im_water), axis=-1) - - # only select the patches that are beaches -# try: -# labels_sand = measure.label(im_sand) -# values = np.unique(labels_sand) -# se = morphology.disk(5) -# im_sand_new = np.zeros((im_ms_ps.shape[0],im_ms_ps.shape[1])).astype('bool') -# counter = 0 -# for j in range(1,len(values)): -# patch_sand = labels_sand == values[j] -# im_buffer = morphology.binary_dilation(patch_sand, se) -# sum_inter = sum(sum(np.logical_and(im_buffer,im_swash))) -# if sum_inter >= 20: -# im_sand_new = np.logical_or(im_sand_new, patch_sand) -# counter = counter + 1 -# if counter >= 1: -# im_labels[:,:,0] = im_sand_new -# except: -# print('nothing') - - if plot_bool: - # display on top of pansharpened RGB - im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False) - im = np.copy(im_display) - # define colours for plot - colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]]) - for k in range(0,im_labels.shape[2]): - im[im_labels[:,:,k],0] = colours[k,0] - im[im_labels[:,:,k],1] = colours[k,1] - im[im_labels[:,:,k],2] = colours[k,2] - - plt.figure() - ax1 = plt.subplot(121) - plt.imshow(im_display) - plt.axis('off') - plt.title('Image') - ax2 = plt.subplot(122, sharex=ax1, sharey=ax1) - plt.imshow(im) - plt.axis('off') - plt.title('NN classifier') - mng = plt.get_current_fig_manager() - mng.window.showMaximized() - plt.tight_layout() - plt.draw() - - return im_classif, im_labels - -def find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size, plot_bool): - """ - New method for extracting shorelines (more robust) - - KV WRL 2018 - - Arguments: - ----------- - im_ms_ps: np.ndarray - Pansharpened RGB + downsampled NIR and SWIR - im_labels: np.ndarray - 3D image containing a boolean image for each class in the order (sand, swash, water) - cloud_mask: np.ndarray - 2D cloud mask with True where cloud pixels are - buffer_size: int - size of the buffer around the sandy beach - plot_bool: boolean - True if plot is wanted - - Returns: ----------- - contours_wi: list of np.arrays - contains the (row,column) coordinates of the contour lines extracted with the Water Index - contours_mwi: list of np.arrays - contains the (row,column) coordinates of the contour lines extracted with the Modified Water Index - - """ - - nrows = cloud_mask.shape[0] - ncols = cloud_mask.shape[1] - im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False) - - # calculate Normalized Difference Modified Water Index (SWIR - G) - im_mwi = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) - # calculate Normalized Difference Modified Water Index (NIR - G) - im_wi = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False) - # stack indices together - im_ind = np.stack((im_wi, im_mwi), axis=-1) - vec_ind = im_ind.reshape(nrows*ncols,2) - - # process labels - vec_sand = im_labels[:,:,0].reshape(ncols*nrows) - vec_swash = im_labels[:,:,1].reshape(ncols*nrows) - vec_water = im_labels[:,:,2].reshape(ncols*nrows) - - # create a buffer around the sandy beach - se = morphology.disk(buffer_size) - im_buffer = morphology.binary_dilation(im_labels[:,:,0], se) - vec_buffer = im_buffer.reshape(nrows*ncols) - - # select water/sand/swash pixels that are within the buffer - int_water = vec_ind[np.logical_and(vec_buffer,vec_water),:] - int_sand = vec_ind[np.logical_and(vec_buffer,vec_sand),:] - int_swash = vec_ind[np.logical_and(vec_buffer,vec_swash),:] - - # threshold the sand/water intensities - int_all = np.append(int_water,int_sand, axis=0) - t_mwi = filters.threshold_otsu(int_all[:,0]) - t_wi = filters.threshold_otsu(int_all[:,1]) - - # find contour with MS algorithm - im_wi_buffer = np.copy(im_wi) - im_wi_buffer[~im_buffer] = np.nan - im_mwi_buffer = np.copy(im_mwi) - im_mwi_buffer[~im_buffer] = np.nan - contours_wi = measure.find_contours(im_wi_buffer, t_wi) - contours_mwi = measure.find_contours(im_mwi, t_mwi) # WARNING (on entire image) - - # remove contour points that are nans (around clouds) - - contours = contours_wi - contours_nonans = [] - for k in range(len(contours)): - if np.any(np.isnan(contours[k])): - index_nan = np.where(np.isnan(contours[k]))[0] - contours_temp = np.delete(contours[k], index_nan, axis=0) - if len(contours_temp) > 1: - contours_nonans.append(contours_temp) - else: - contours_nonans.append(contours[k]) - contours_wi = contours_nonans - - contours = contours_mwi - contours_nonans = [] - for k in range(len(contours)): - if np.any(np.isnan(contours[k])): - index_nan = np.where(np.isnan(contours[k]))[0] - contours_temp = np.delete(contours[k], index_nan, axis=0) - if len(contours_temp) > 1: - contours_nonans.append(contours_temp) - else: - contours_nonans.append(contours[k]) - contours_mwi = contours_nonans - - if plot_bool: - - im = np.copy(im_display) - # define colours for plot - colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]]) - for k in range(0,im_labels.shape[2]): - im[im_labels[:,:,k],0] = colours[k,0] - im[im_labels[:,:,k],1] = colours[k,1] - im[im_labels[:,:,k],2] = colours[k,2] - - fig = plt.figure() - gs = gridspec.GridSpec(3, 3, height_ratios=[1, 1, 3]) - - ax1 = fig.add_subplot(gs[0,:]) - vals = plt.hist(int_water[:,0], bins=100, label='water') - plt.hist(int_sand[:,0], bins=100, alpha=0.5, label='sand') - plt.hist(int_swash[:,0], bins=100, alpha=0.5, label='swash') - plt.plot([t_wi, t_wi], [0, np.max(vals[0])], 'r-') - plt.legend() - plt.title('Water Index NIR-G') - - ax2 = fig.add_subplot(gs[1,:], sharex=ax1) - vals = plt.hist(int_water[:,1], bins=100, label='water') - plt.hist(int_sand[:,1], bins=100, alpha=0.5, label='sand') - plt.hist(int_swash[:,1], bins=100, alpha=0.5, label='swash') - plt.plot([t_mwi, t_mwi], [0, np.max(vals[0])], 'r-') - plt.legend() - plt.title('Modified Water Index SWIR-G') - - ax3 = fig.add_subplot(gs[2,0]) - plt.imshow(im) - for i,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k') - for i,contour in enumerate(contours_wi): plt.plot(contour[:, 1], contour[:, 0], linestyle='--', linewidth=1, color='w') - plt.grid(False) - plt.xticks([]) - plt.yticks([]) - - ax4 = fig.add_subplot(gs[2,1], sharex=ax3, sharey=ax3) - plt.imshow(im_display) - for i,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k') - for i,contour in enumerate(contours_wi): plt.plot(contour[:, 1], contour[:, 0], linestyle='--', linewidth=1, color='w') - plt.grid(False) - plt.xticks([]) - plt.yticks([]) - - ax5 = fig.add_subplot(gs[2,2], sharex=ax3, sharey=ax3) - plt.imshow(im_mwi, cmap='seismic') - for i,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k') - for i,contour in enumerate(contours_wi): plt.plot(contour[:, 1], contour[:, 0], linestyle='--', linewidth=1, color='w') - plt.grid(False) - plt.xticks([]) - plt.yticks([]) - -# plt.gcf().set_size_inches(17.99,7.55) - mng = plt.get_current_fig_manager() - mng.window.showMaximized() - plt.gcf().set_tight_layout(True) - plt.draw() - - return contours_wi, contours_mwi - -def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5): - """ - Compare sds with groundtruth data from topographic surveys / argus shorelines - - KV WRL 2018 - - Arguments: - ----------- - dates_sds: list - list of dates corresponding to each row in chain_sds - chain_sds: np.ndarray - array with time series of chainage for each transect (each transect is one column) - topo_profiles: dict - dict containing the dates and chainage of the groundtruth - mod: 0 or 1 - 0 for linear interpolation between 2 closest surveys, 1 for only nearest neighbour - min_days: int - minimum number of days for which the data can be compared - - Returns: ----------- - stats: dict - contains all the statistics of the comparison - - """ - - # create 3 figures - fig1 = plt.figure() - gs1 = gridspec.GridSpec(chain_sds.shape[1], 1) - fig2 = plt.figure() - gs2 = gridspec.GridSpec(2, chain_sds.shape[1]) - fig3 = plt.figure() - gs3 = gridspec.GridSpec(2,1) - - dates_sds_num = np.array([_.toordinal() for _ in dates_sds]) - stats = dict([]) - data_fin = dict([]) - - # for each transect compare and plot the data - for i in range(chain_sds.shape[1]): - - pfname = list(topo_profiles.keys())[i] - stats[pfname] = dict([]) - data_fin[pfname] = dict([]) - - dates_sur = topo_profiles[pfname]['dates'] - chain_sur = topo_profiles[pfname]['chainage'] - - # convert to datenum - dates_sur_num = np.array([_.toordinal() for _ in dates_sur]) - - chain_sur_interp = [] - diff_days = [] - - for j, satdate in enumerate(dates_sds_num): - - temp_diff = satdate - dates_sur_num - - if mod==0: - # select measurement before and after sat image date and interpolate - - ind_before = np.where(temp_diff == temp_diff[temp_diff > 0][-1])[0] - if ind_before == len(temp_diff)-1: - chain_sur_interp.append(np.nan) - diff_days.append(np.abs(satdate-dates_sur_num[ind_before])[0]) - continue - ind_after = np.where(temp_diff == temp_diff[temp_diff < 0][0])[0] - tempx = np.zeros(2) - tempx[0] = dates_sur_num[ind_before] - tempx[1] = dates_sur_num[ind_after] - tempy = np.zeros(2) - tempy[0] = chain_sur[ind_before] - tempy[1] = chain_sur[ind_after] - diff_days.append(np.abs(np.max([satdate-tempx[0], satdate-tempx[1]]))) - # interpolate - f = interpolate.interp1d(tempx, tempy) - chain_sur_interp.append(f(satdate)) - - elif mod==1: - # select the closest measurement - - idx_closest = find_indices(np.abs(temp_diff), lambda e: e == np.min(np.abs(temp_diff)))[0] - diff_days.append(np.abs(satdate-dates_sur_num[idx_closest])) - if diff_days[j] > mindays: - chain_sur_interp.append(np.nan) - else: - chain_sur_interp.append(chain_sur[idx_closest]) - - chain_sur_interp = np.array(chain_sur_interp) - - # remove nan values - idx_sur_nan = ~np.isnan(chain_sur_interp) - idx_sat_nan = ~np.isnan(chain_sds[:,i]) - idx_nan = np.logical_and(idx_sur_nan, idx_sat_nan) - - # groundtruth and sds - chain_sur_fin = chain_sur_interp[idx_nan] - chain_sds_fin = chain_sds[idx_nan,i] - dates_fin = [k for (k, v) in zip(dates_sds, idx_nan) if v] - diff_chain = chain_sur_fin - chain_sds_fin - - # calculate statistics - rmse = np.sqrt(np.nanmean((diff_chain)**2)) - mean = np.nanmean(diff_chain) - std = np.nanstd(diff_chain) - q90 = np.percentile(np.abs(diff_chain), 90) - - # store data - stats[pfname]['rmse'] = rmse - stats[pfname]['mean'] = mean - stats[pfname]['std'] = std - stats[pfname]['q90'] = q90 - stats[pfname]['diffdays'] = diff_days - - data_fin[pfname]['dates'] = dates_fin - data_fin[pfname]['sds'] = chain_sds_fin - data_fin[pfname]['survey'] = chain_sur_fin - - # make time-series plot - plt.figure(fig1.number) - ax = fig1.add_subplot(gs1[i,0]) - plt.plot(dates_sur, chain_sur, 'o-', color='C1', markersize=4, label='survey all') - plt.plot(dates_fin, chain_sur_fin, 'o', color=[0.3, 0.3, 0.3], markersize=2, label='survey interp') - plt.plot(dates_fin, chain_sds_fin, 'o--', color='b', markersize=4, label='SDS') - plt.title(pfname, fontweight='bold') - plt.xlim([dates_sds[0], dates_sds[-1]]) - plt.ylabel('chainage [m]') - - # make scatter plot - plt.figure(fig2.number) - ax1 = fig2.add_subplot(gs2[0,i]) - plt.axis('equal') - plt.plot(chain_sur_fin, chain_sds_fin, 'ko', markersize=4, markerfacecolor='w', alpha=0.7) - xmax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)]) - xmin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)]) - ymax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)]) - ymin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)]) - plt.plot([xmin, xmax], [ymin, ymax], 'r--') - correlation = np.corrcoef(chain_sur_fin, chain_sds_fin)[0,1] - str_corr = 'r = %.2f' % (correlation) - plt.text(xmin, ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left') - plt.xlabel('chainage survey [m]') - plt.ylabel('chainage satellite [m]') - plt.title(pfname, fontweight='bold') - - ax2 = fig2.add_subplot(gs2[1,i]) - binwidth = 3 - bins = np.arange(min(diff_chain), max(diff_chain) + binwidth, binwidth) - density = plt.hist(diff_chain, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k') - plt.xlim([-50, 50]) - plt.xlabel('error [m]') - str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90) - plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.5), horizontalalignment='left', fontsize=10) - - fig1.set_size_inches(19.2, 9.28) - fig1.set_tight_layout(True) - fig2.set_size_inches(19.2, 9.28) - fig2.set_tight_layout(True) - - # plot all the data together - chain_sds_all = [] - chain_sur_all = [] - for i in range(chain_sds.shape[1]): - pfname = list(topo_profiles.keys())[i] - chain_sds_all = np.append(chain_sds_all,data_fin[pfname]['sds']) - chain_sur_all = np.append(chain_sur_all,data_fin[pfname]['survey']) - - diff_chain_all = chain_sur_all - chain_sds_all - - # calculate statistics - rmse = np.sqrt(np.nanmean((diff_chain_all)**2)) - mean = np.nanmean(diff_chain_all) - std = np.nanstd(diff_chain_all) - q90 = np.percentile(np.abs(diff_chain_all), 90) - - stats['all'] = {'rmse':rmse,'mean':mean,'std':std,'q90':q90} - - # make plot with all datapoints (from all the transects) - plt.figure(fig3.number) - ax1 = fig3.add_subplot(gs3[0,0]) - plt.axis('equal') - plt.plot(chain_sur_all, chain_sds_all, 'ko', markersize=4, markerfacecolor='w', alpha=0.7) - xmax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)]) - xmin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)]) - ymax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)]) - ymin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)]) - plt.plot([xmin, xmax], [ymin, ymax], 'r--') - correlation = np.corrcoef(chain_sur_all, chain_sds_all)[0,1] - str_corr = 'r = %.2f' % (correlation) - plt.text(xmin, ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left') - plt.xlabel('chainage survey [m]') - plt.ylabel('chainage satellite [m]') - plt.title(pfname, fontweight='bold') - - ax2 = fig3.add_subplot(gs3[1,0]) - binwidth = 3 - bins = np.arange(min(diff_chain_all), max(diff_chain_all) + binwidth, binwidth) - density = plt.hist(diff_chain_all, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k') - plt.xlim([-50, 50]) - plt.xlabel('error [m]') - str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90) - plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.5), horizontalalignment='left', fontsize=10) - fig3.set_size_inches(9.2, 9.28) - fig3.set_tight_layout(True) - - return stats - \ No newline at end of file