CoastSat_WRL/functions/sds.py

# -*- coding: utf-8 -*-
"""
Created on Thu Mar  1 11:20:35 2018

@author: z5030440
"""

"""This module contains all the functions needed for extracting satellite derived shoreline (SDS) """

# Initial settings
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib import gridspec
import pdb
import ee

# other modules
from osgeo import gdal, ogr, osr
import tempfile
from urllib.request import urlretrieve
import zipfile
import scipy.interpolate as interpolate

# image processing modules
import skimage.filters as filters
import skimage.exposure as exposure
import skimage.transform as transform
import sklearn.decomposition as decomposition
import skimage.measure as measure
import skimage.morphology as morphology

# machine learning modules
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
from sklearn.externals import joblib


# import own modules
from functions.utils import *


# Download from ee server function

def download_tif(image, polygon, bandsId, filepath):
    """downloads tif image (region and bands) from the ee server and stores it in a temp file"""
    url = ee.data.makeDownloadUrl(ee.data.getDownloadId({
        'image': image.serialize(),
        'region': polygon,
        'bands': bandsId,
        'filePerBand': 'false',
        'name': 'data',
        }))
    local_zip, headers = urlretrieve(url)
    with zipfile.ZipFile(local_zip) as local_zipfile:
        return local_zipfile.extract('data.tif', filepath)

def create_cloud_mask(im_qa, satname, plot_bool):
    """
    Creates a cloud mask from the image containing the QA band information

    KV WRL 2018

    Arguments:
    -----------
        im_qa: np.ndarray
            Image containing the QA band
        satname: string
            short name for the satellite (L8, L7, S2)
        plot_bool: boolean
            True if plot is wanted

    Returns:
    -----------
        cloud_mask : np.ndarray of booleans
            A boolean array with True where the cloud are present
    """

    # convert QA bits
    if satname == 'L8':
        cloud_values = [2800, 2804, 2808, 2812, 6896, 6900, 6904, 6908]
    elif satname == 'L7' or satname == 'L5' or satname == 'L4':
        cloud_values = [752, 756, 760, 764]
    elif satname == 'S2':
        cloud_values = [1024, 2048] # 1024 = dense cloud, 2048 = cirrus clouds

    cloud_mask = np.isin(im_qa, cloud_values)
    # remove isolated cloud pixels (there are some in the swash and they cause problems)
    if sum(sum(cloud_mask)) > 0:
        morphology.remove_small_objects(cloud_mask, min_size=10, connectivity=1, in_place=True)

    if plot_bool:
        plt.figure()
        plt.imshow(cloud_mask, cmap='gray')
        plt.draw()

    #cloud_shadow_values = [2976, 2980, 2984, 2988, 3008, 3012, 3016, 3020]
    #cloud_shadow_mask = np.isin(im_qa, cloud_shadow_values)

    return cloud_mask

def rescale_image_intensity(im, cloud_mask, prob_high, plot_bool):
    """
    Rescales the intensity of an image (multispectral or single band) by applying
    a cloud mask and clipping the prob_high upper percentile. This functions allows
    to stretch the contrast of an image.

    KV WRL 2018

    Arguments:
    -----------
        im: np.ndarray
            Image to rescale, can be 3D (multispectral) or 2D (single band)
        cloud_mask: np.ndarray
            2D cloud mask with True where cloud pixels are
        prob_high: float
            probability of exceedence used to calculate the upper percentile
        plot_bool: boolean
            True if plot is wanted

    Returns:
    -----------
        im_adj: np.ndarray
            The rescaled image
    """
    prc_low = 0 # lower percentile
    vec_mask = cloud_mask.reshape(im.shape[0] * im.shape[1])

    if plot_bool:
        plt.figure()

    if len(im.shape) > 2:
        vec =  im.reshape(im.shape[0] * im.shape[1], im.shape[2])
        vec_adj = np.ones((len(vec_mask), im.shape[2])) * np.nan

        for i in range(im.shape[2]):
            prc_high = np.percentile(vec[~vec_mask, i], prob_high)
            vec_rescaled = exposure.rescale_intensity(vec[~vec_mask, i], in_range=(prc_low, prc_high))
            vec_adj[~vec_mask,i] = vec_rescaled

            if plot_bool:
                plt.subplot(np.floor(im.shape[2]/2) + 1, np.floor(im.shape[2]/2), i+1)
                plt.hist(vec[~vec_mask, i], bins=200, label='original')
                plt.hist(vec_rescaled, bins=200, alpha=0.5, label='rescaled')
                plt.legend()
                plt.title('Band' + str(i+1))
                plt.show()

        im_adj = vec_adj.reshape(im.shape[0], im.shape[1], im.shape[2])

        if plot_bool:
            plt.figure()
            ax1 = plt.subplot(121)
            plt.imshow(im[:,:,[2,1,0]])
            plt.axis('off')
            plt.title('Original')
            ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
            plt.imshow(im_adj[:,:,[2,1,0]])
            plt.axis('off')
            plt.title('Rescaled')
            plt.show()

    else:
        vec =  im.reshape(im.shape[0] * im.shape[1])
        vec_adj = np.ones(len(vec_mask)) * np.nan
        prc_high = np.percentile(vec[~vec_mask], prob_high)
        vec_rescaled = exposure.rescale_intensity(vec[~vec_mask], in_range=(prc_low, prc_high))
        vec_adj[~vec_mask] = vec_rescaled

        if plot_bool:
            plt.hist(vec[~vec_mask], bins=200, label='original')
            plt.hist(vec_rescaled, bins=200, alpha=0.5, label='rescaled')
            plt.legend()
            plt.title('Single band')
            plt.show()

        im_adj = vec_adj.reshape(im.shape[0], im.shape[1])

        if plot_bool:
            plt.figure()
            ax1 = plt.subplot(121)
            plt.imshow(im, cmap='gray')
            plt.axis('off')
            plt.title('Original')
            ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
            plt.imshow(im_adj, cmap='gray')
            plt.axis('off')
            plt.title('Rescaled')
            plt.show()

    return im_adj


def hist_match(source, template):
    """
    Adjust the pixel values of a grayscale image such that its histogram
    matches that of a target image

    Arguments:
    -----------
        source: np.ndarray
            Image to transform; the histogram is computed over the flattened
            array
        template: np.ndarray
            Template image; can have different dimensions to source
    Returns:
    -----------
        matched: np.ndarray
            The transformed output image
    """

    oldshape = source.shape
    source = source.ravel()
    template = template.ravel()

    # get the set of unique pixel values and their corresponding indices and
    # counts
    s_values, bin_idx, s_counts = np.unique(source, return_inverse=True,
                                            return_counts=True)
    t_values, t_counts = np.unique(template, return_counts=True)

    # take the cumsum of the counts and normalize by the number of pixels to
    # get the empirical cumulative distribution functions for the source and
    # template images (maps pixel value --> quantile)
    s_quantiles = np.cumsum(s_counts).astype(np.float64)
    s_quantiles /= s_quantiles[-1]
    t_quantiles = np.cumsum(t_counts).astype(np.float64)
    t_quantiles /= t_quantiles[-1]

    # interpolate linearly to find the pixel values in the template image
    # that correspond most closely to the quantiles in the source image
    interp_t_values = np.interp(s_quantiles, t_quantiles, t_values)

    return interp_t_values[bin_idx].reshape(oldshape)

def pansharpen(im_ms, im_pan, cloud_mask, plot_bool):
    """
    Pansharpens a multispectral image (3D), using the panchromatic band (2D)
    and a cloud mask

    KV WRL 2018

    Arguments:
    -----------
        im_ms: np.ndarray
            Multispectral image to pansharpen (3D)
        im_pan: np.ndarray
            Panchromatic band (2D)
        cloud_mask: np.ndarray
            2D cloud mask with True where cloud pixels are
        plot_bool: boolean
            True if plot is wanted

    Returns:
    -----------
        im_ms_ps: np.ndarray
            Pansharpened multisoectral image (3D)
    """

    # reshape image into vector and apply cloud mask
    vec = im_ms.reshape(im_ms.shape[0] * im_ms.shape[1], im_ms.shape[2])
    vec_mask = cloud_mask.reshape(im_ms.shape[0] * im_ms.shape[1])
    vec = vec[~vec_mask, :]
    # apply PCA to RGB bands
    pca = decomposition.PCA()
    vec_pcs = pca.fit_transform(vec)

    # replace 1st PC with pan band (after matching histograms)
    vec_pan = im_pan.reshape(im_pan.shape[0] * im_pan.shape[1])
    vec_pan = vec_pan[~vec_mask]

#    plt.figure()
#    ax1 = plt.subplot(131)
#    plt.imshow(im_pan, cmap='gray')
#    plt.title('Pan band')
#    plt.subplot(132, sharex=ax1, sharey=ax1)
#    plt.imshow(vec_pcs[:,0].reshape(im_pan.shape[0],im_pan.shape[1]), cmap='gray')
#    plt.title('PC1')
#    plt.subplot(133, sharex=ax1, sharey=ax1)
#    plt.imshow(hist_match(vec_pan, vec_pcs[:,0]).reshape(im_pan.shape[0],im_pan.shape[1]), cmap='gray')
#    plt.title('Pan band histmatched')
#
#    plt.figure()
#    plt.hist(hist_match(vec_pan, vec_pcs[:,0]), bins=300)
#    plt.hist(vec_pcs[:,0], bins=300, alpha=0.5)
#    plt.hist(vec_pan, bins=300, alpha=0.5)
#    plt.draw()

    vec_pcs[:,0] = hist_match(vec_pan, vec_pcs[:,0])
    vec_ms_ps = pca.inverse_transform(vec_pcs)

    # reshape vector into image
    vec_ms_ps_full = np.ones((len(vec_mask), im_ms.shape[2])) * np.nan
    vec_ms_ps_full[~vec_mask,:] = vec_ms_ps
    im_ms_ps = vec_ms_ps_full.reshape(im_ms.shape[0], im_ms.shape[1], im_ms.shape[2])

    if plot_bool:

        plt.figure()
        ax1 = plt.subplot(121)
        plt.imshow(rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False))
        plt.axis('off')
        plt.title('Original')
        ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
        plt.imshow(rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False))
        plt.axis('off')
        plt.title('Pansharpened')
        plt.show()

    return im_ms_ps

def nd_index(im1, im2, cloud_mask, plot_bool):
    """
    Computes normalised difference index on 2 images (2D), given a cloud mask (2D)

    KV WRL 2018

    Arguments:
    -----------
        im1, im2: np.ndarray
            Images (2D) with which to calculate the ND index
        cloud_mask: np.ndarray
            2D cloud mask with True where cloud pixels are
        plot_bool: boolean
            True if plot is wanted

    Returns:    -----------
        im_nd: np.ndarray

            Image (2D) containing the ND index
    """
    vec_mask = cloud_mask.reshape(im1.shape[0] * im1.shape[1])
    vec_nd = np.ones(len(vec_mask)) * np.nan
    vec1 = im1.reshape(im1.shape[0] * im1.shape[1])
    vec2 = im2.reshape(im2.shape[0] * im2.shape[1])
    temp = np.divide(vec1[~vec_mask] - vec2[~vec_mask],
                     vec1[~vec_mask] + vec2[~vec_mask])
    vec_nd[~vec_mask] = temp
    im_nd = vec_nd.reshape(im1.shape[0], im1.shape[1])

    if plot_bool:
        plt.figure()
        plt.imshow(im_nd, cmap='seismic')
        plt.colorbar()
        plt.title('Normalised index')
        plt.show()

    return im_nd

def find_wl_contours(im_ndwi, cloud_mask, plot_bool):
    """
    Finds the water line by thresholding the Normalized Difference Water Index and applying the Marching
    Squares Algorithm

    KV WRL 2018

    Arguments:
    -----------
        im_ndwi: np.ndarray
            Image (2D) with the NDWI (water index)
        cloud_mask: np.ndarray
            2D cloud mask with True where cloud pixels are
        plot_bool: boolean
            True if plot is wanted

    Returns:    -----------
        contours_wl: list of np.arrays
            contains the (row,column) coordinates of the contour lines

    """

    # reshape image to vector
    vec_ndwi = im_ndwi.reshape(im_ndwi.shape[0] * im_ndwi.shape[1])
    vec_mask = cloud_mask.reshape(cloud_mask.shape[0] * cloud_mask.shape[1])
    vec = vec_ndwi[~vec_mask]
    # apply otsu's threshold
    t_otsu = filters.threshold_otsu(vec)
    # use Marching Squares algorithm to detect contours on ndwi image
    contours = measure.find_contours(im_ndwi, t_otsu)

    # remove contour points that are nans
    contours_nonans = []
    for k in range(len(contours)):
        if np.any(np.isnan(contours[k])):
            index_nan = np.where(np.isnan(contours[k]))[0]
            contours_temp = np.delete(contours[k], index_nan, axis=0)
            if len(contours_temp) > 1:
                contours_nonans.append(contours_temp)
        else:
            contours_nonans.append(contours[k])
    contours = contours_nonans

    if plot_bool:
        # plot otsu's histogram segmentation
        plt.figure()
        vals = plt.hist(vec, bins=200)
        plt.plot([t_otsu, t_otsu],[0, np.max(vals[0])], 'r-', label='Otsu threshold')
        plt.legend()
        plt.show()

        # plot the water line contours on top of water index
        plt.figure()
        plt.imshow(im_ndwi, cmap='seismic')
        plt.colorbar()
        for i,contour in enumerate(contours): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k')
        plt.axis('image')
        plt.title('Detected water lines')
        plt.show()

    return contours

def convert_pix2world(points, crs_vec):
    """
    Converts pixel coordinates (row,columns) to world projected coordinates
    performing an affine transformation

    KV WRL 2018

    Arguments:
    -----------
        points: np.ndarray or list of np.ndarray
            array with 2 columns (rows first and columns second)
        crs_vec: np.ndarray
            vector of 6 elements [Xtr, Xscale, Xshear, Ytr, Yshear, Yscale]

    Returns:    -----------
        points_converted: np.ndarray or list of np.ndarray
            converted coordinates, first columns with X and second column with Y

    """

    # make affine transformation matrix
    aff_mat = np.array([[crs_vec[1], crs_vec[2], crs_vec[0]],
                       [crs_vec[4], crs_vec[5], crs_vec[3]],
                       [0, 0, 1]])
    # create affine transformation
    tform = transform.AffineTransform(aff_mat)

    if type(points) is list:
        points_converted = []
        # iterate over the list
        for i, arr in enumerate(points):
            tmp = arr[:,[1,0]]
            points_converted.append(tform(tmp))

    elif type(points) is np.ndarray:
        tmp = points[:,[1,0]]
        points_converted = tform(tmp)

    else:
        print('invalid input type')
        raise

    return points_converted

def convert_world2pix(points, crs_vec):
    """
    Converts world projected coordinates (X,Y) to image coordinates (row,column)
    performing an affine transformation

    KV WRL 2018

    Arguments:
    -----------
        points: np.ndarray or list of np.ndarray
            array with 2 columns (rows first and columns second)
        crs_vec: np.ndarray
            vector of 6 elements [Xtr, Xscale, Xshear, Ytr, Yshear, Yscale]

    Returns:    -----------
        points_converted: np.ndarray or list of np.ndarray
            converted coordinates, first columns with row and second column with column

    """

    # make affine transformation matrix
    aff_mat = np.array([[crs_vec[1], crs_vec[2], crs_vec[0]],
                       [crs_vec[4], crs_vec[5], crs_vec[3]],
                       [0, 0, 1]])
    # create affine transformation
    tform = transform.AffineTransform(aff_mat)

    if type(points) is list:
        points_converted = []
        # iterate over the list
        for i, arr in enumerate(points):
            points_converted.append(tform.inverse(points))

    elif type(points) is np.ndarray:
        points_converted = tform.inverse(points)

    else:
        print('invalid input type')
        raise

    return points_converted


def convert_epsg(points, epsg_in, epsg_out):
    """
    Converts from one spatial reference to another using the epsg codes

    KV WRL 2018

    Arguments:
    -----------
        points: np.ndarray or list of np.ndarray
            array with 2 columns (rows first and columns second)
        epsg_in: int
            epsg code of the spatial reference in which the input is
        epsg_out: int
            epsg code of the spatial reference in which the output will be

    Returns:    -----------
        points_converted: np.ndarray or list of np.ndarray
            converted coordinates

    """

    # define input and output spatial references
    inSpatialRef = osr.SpatialReference()
    inSpatialRef.ImportFromEPSG(epsg_in)
    outSpatialRef = osr.SpatialReference()
    outSpatialRef.ImportFromEPSG(epsg_out)
    # create a coordinates transform
    coordTransform = osr.CoordinateTransformation(inSpatialRef, outSpatialRef)
    # transform points
    if type(points) is list:
        points_converted = []
        # iterate over the list
        for i, arr in enumerate(points):
            points_converted.append(np.array(coordTransform.TransformPoints(arr)))

    elif type(points) is np.ndarray:
        points_converted = np.array(coordTransform.TransformPoints(points))

    else:
        print('invalid input type')
        raise

    return points_converted

def classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, buffer_size, min_beach_size, plot_bool):
    """
    Classifies sand pixels using an unsupervised algorithm (Kmeans)
    Set buffer size to False if you want to classify the entire image,
    otherwise buffer size defines the buffer around the shoreline in which
    pixels are considered for classification.
    This classification is not robust and is only used to train a supervised algorithm

    KV WRL 2018

    Arguments:
    -----------
        im_ms_ps: np.ndarray
            Pansharpened RGB + downsampled NIR and SWIR
        im_pan:
            Panchromatic band
        cloud_mask: np.ndarray
            2D cloud mask with True where cloud pixels are
        wl_pix: list of np.ndarray
            list of arrays containig the pixel coordinates of the water line
        buffer_size: int or False
            radius of the disk used to create a buffer around the water line
            when False, the entire image is considered for kmeans
        min_beach_size: int
            minimum number of connected pixels belonging to a single beach
        plot_bool: boolean
            True if plot is wanted

    Returns:    -----------
        im_sand: np.ndarray
            2D binary image containing True where sand pixels are located

    """
    # reshape the 2D images into vectors
    vec_ms_ps = im_ms_ps.reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1], im_ms_ps.shape[2])
    vec_pan = im_pan.reshape(im_pan.shape[0]*im_pan.shape[1])
    vec_mask = cloud_mask.reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1])
    # add B,G,R,NIR and pan bands to the vector of features
    vec_features = np.zeros((vec_ms_ps.shape[0], 5))
    vec_features[:,[0,1,2,3]] = vec_ms_ps[:,[0,1,2,3]]
    vec_features[:,4] = vec_pan

    if buffer_size:
        # create binary image with ones where the detected water lines is
        im_buffer = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1]))
        for i, contour in enumerate(wl_pix):
                indices = [(int(_[0]), int(_[1])) for _ in list(np.round(contour))]
                for j, idx in enumerate(indices):
                    im_buffer[idx] = 1
        # perform a dilation on the binary image
        se = morphology.disk(buffer_size)
        im_buffer = morphology.binary_dilation(im_buffer, se)
        vec_buffer = (im_buffer == 1).reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1])
    else:
        vec_buffer = np.ones((vec_pan.shape[0]))
    # add cloud mask to buffer
    vec_buffer= np.logical_and(vec_buffer, ~vec_mask)
    # perform kmeans (6 clusters)
    kmeans = KMeans(n_clusters=6, random_state=0).fit(vec_features[vec_buffer,:])
    labels = np.ones((len(vec_mask))) * np.nan
    labels[vec_buffer] = kmeans.labels_
    im_labels = labels.reshape(im_ms_ps.shape[0], im_ms_ps.shape[1])
    # find the class with maximum reflection in the B,G,R,Pan
    im_sand = im_labels == np.argmax(np.mean(kmeans.cluster_centers_[:,[0,1,2,4]], axis=1))
    im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2)
    im_sand = morphology.binary_erosion(im_sand, morphology.disk(1))
#    im_sand = morphology.binary_dilation(im_sand, morphology.disk(1))

    if plot_bool:
        im = np.copy(rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False))
        im[im_sand,0] = 0
        im[im_sand,1] = 0
        im[im_sand,2] = 1
        plt.figure()
        plt.imshow(im)
        plt.axis('image')
        plt.title('Sand classification')
        plt.show()

    return im_sand

def classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size, plot_bool):
    """
    Classifies every pixel in the image in one of 4 classes:
        - sand                                          --> label = 1
        - whitewater (breaking waves and swash)         --> label = 2
        - water                                         --> label = 3
        - other (vegetation, buildings, rocks...)       --> label = 0

    The classifier is a Neural Network, trained with 7000 pixels for the class SAND and 1500 pixels for
    each of the other classes. This is because the class of interest for my application is SAND and I
    wanted to minimize the classification error for that class

    KV WRL 2018

    Arguments:
    -----------
        im_ms_ps: np.ndarray
            Pansharpened RGB + downsampled NIR and SWIR
        im_pan:
            Panchromatic band
        cloud_mask: np.ndarray
            2D cloud mask with True where cloud pixels are
        plot_bool: boolean
            True if plot is wanted

    Returns:    -----------
        im_classif: np.ndarray
            2D image containing labels
        im_labels: np.ndarray of booleans
            3D image containing a boolean image for each class (im_classif == label)

    """

    # load classifier
    clf = joblib.load('functions/NeuralNet_classif.pkl')

    # calculate features
    n_features = 10
    im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features))
    im_features[:,:,[0,1,2,3,4]] = im_ms_ps
    im_features[:,:,5] = im_pan
    im_features[:,:,6] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False) # (NIR-G)
    im_features[:,:,7] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask, False) # ND(NIR-R)
    im_features[:,:,8] = nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask, False) # ND(B-R)
    im_features[:,:,9] = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) # ND(SWIR-G)
    # remove NaNs and clouds
    vec_features = im_features.reshape((im_ms_ps.shape[0] * im_ms_ps.shape[1], n_features))
    vec_cloud = cloud_mask.reshape(cloud_mask.shape[0]*cloud_mask.shape[1])
    vec_nan = np.any(np.isnan(vec_features), axis=1)
    vec_mask = np.logical_or(vec_cloud, vec_nan)
    vec_features = vec_features[~vec_mask, :]
    # predict with NN classifier
    labels = clf.predict(vec_features)
    # recompose image
    vec_classif = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1]))
    vec_classif[~vec_mask] = labels
    im_classif = vec_classif.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1]))

    # labels
    im_sand = im_classif == 1
    im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2)
    im_swash = im_classif == 2
    im_water = im_classif == 3
    im_labels = np.stack((im_sand,im_swash,im_water), axis=-1)


    # only select the patches that are beaches
#    try:
#        labels_sand = measure.label(im_sand)
#        values = np.unique(labels_sand)
#        se = morphology.disk(5)
#        im_sand_new = np.zeros((im_ms_ps.shape[0],im_ms_ps.shape[1])).astype('bool')
#        counter = 0
#        for j in range(1,len(values)):
#            patch_sand = labels_sand == values[j]
#            im_buffer = morphology.binary_dilation(patch_sand, se)
#            sum_inter = sum(sum(np.logical_and(im_buffer,im_swash)))
#            if sum_inter >= 20:
#                im_sand_new = np.logical_or(im_sand_new, patch_sand)
#                counter = counter + 1
#        if counter >= 1:
#            im_labels[:,:,0] = im_sand_new
#    except:
#        print('nothing')

    if plot_bool:
        # display on top of pansharpened RGB
        im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
        im = np.copy(im_display)
        # define colours for plot
        colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
        for k in range(0,im_labels.shape[2]):
            im[im_labels[:,:,k],0] = colours[k,0]
            im[im_labels[:,:,k],1] = colours[k,1]
            im[im_labels[:,:,k],2] = colours[k,2]

        plt.figure()
        ax1 = plt.subplot(121)
        plt.imshow(im_display)
        plt.axis('off')
        plt.title('Image')
        ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
        plt.imshow(im)
        plt.axis('off')
        plt.title('NN classifier')
        mng = plt.get_current_fig_manager()
        mng.window.showMaximized()
        plt.tight_layout()
        plt.draw()

    return im_classif, im_labels

def classify_image_NN_nopan(im_ms_ps, cloud_mask, min_beach_size, plot_bool):
    """
    Classifies every pixel in the image in one of 4 classes:
        - sand                                          --> label = 1
        - whitewater (breaking waves and swash)         --> label = 2
        - water                                         --> label = 3
        - other (vegetation, buildings, rocks...)       --> label = 0

    The classifier is a Neural Network, trained with 7000 pixels for the class SAND and 1500 pixels for
    each of the other classes. This is because the class of interest for my application is SAND and I
    wanted to minimize the classification error for that class

    KV WRL 2018

    Arguments:
    -----------
        im_ms_ps: np.ndarray
            Pansharpened RGB + downsampled NIR and SWIR
        im_pan:
            Panchromatic band
        cloud_mask: np.ndarray
            2D cloud mask with True where cloud pixels are
        plot_bool: boolean
            True if plot is wanted

    Returns:    -----------
        im_classif: np.ndarray
            2D image containing labels
        im_labels: np.ndarray of booleans
            3D image containing a boolean image for each class (im_classif == label)

    """

    # load classifier
    clf = joblib.load('functions/NeuralNet_classif_nopan.pkl')

    # calculate features
    n_features = 9
    im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features))
    im_features[:,:,[0,1,2,3,4]] = im_ms_ps
    im_features[:,:,5] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False) # (NIR-G)
    im_features[:,:,6] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask, False) # ND(NIR-R)
    im_features[:,:,7] = nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask, False) # ND(B-R)
    im_features[:,:,8] = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) # ND(SWIR-G)
    # remove NaNs and clouds
    vec_features = im_features.reshape((im_ms_ps.shape[0] * im_ms_ps.shape[1], n_features))
    vec_cloud = cloud_mask.reshape(cloud_mask.shape[0]*cloud_mask.shape[1])
    vec_nan = np.any(np.isnan(vec_features), axis=1)
    vec_mask = np.logical_or(vec_cloud, vec_nan)
    vec_features = vec_features[~vec_mask, :]
    # predict with NN classifier
    labels = clf.predict(vec_features)

    # recompose image
    vec_classif = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1]))
    vec_classif[~vec_mask] = labels
    im_classif = vec_classif.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1]))

    # labels
    im_sand = im_classif == 1
    im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2)
    im_swash = im_classif == 2
    im_water = im_classif == 3
    im_labels = np.stack((im_sand,im_swash,im_water), axis=-1)

    # only select the patches that are beaches
#    try:
#        labels_sand = measure.label(im_sand)
#        values = np.unique(labels_sand)
#        se = morphology.disk(5)
#        im_sand_new = np.zeros((im_ms_ps.shape[0],im_ms_ps.shape[1])).astype('bool')
#        counter = 0
#        for j in range(1,len(values)):
#            patch_sand = labels_sand == values[j]
#            im_buffer = morphology.binary_dilation(patch_sand, se)
#            sum_inter = sum(sum(np.logical_and(im_buffer,im_swash)))
#            if sum_inter >= 20:
#                im_sand_new = np.logical_or(im_sand_new, patch_sand)
#                counter = counter + 1
#        if counter >= 1:
#            im_labels[:,:,0] = im_sand_new
#    except:
#        print('nothing')

    if plot_bool:
        # display on top of pansharpened RGB
        im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
        im = np.copy(im_display)
        # define colours for plot
        colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
        for k in range(0,im_labels.shape[2]):
            im[im_labels[:,:,k],0] = colours[k,0]
            im[im_labels[:,:,k],1] = colours[k,1]
            im[im_labels[:,:,k],2] = colours[k,2]

        plt.figure()
        ax1 = plt.subplot(121)
        plt.imshow(im_display)
        plt.axis('off')
        plt.title('Image')
        ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
        plt.imshow(im)
        plt.axis('off')
        plt.title('NN classifier')
        mng = plt.get_current_fig_manager()
        mng.window.showMaximized()
        plt.tight_layout()
        plt.draw()

    return im_classif, im_labels

def find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size, plot_bool):
    """
    New method for extracting shorelines (more robust)

    KV WRL 2018

    Arguments:
    -----------
        im_ms_ps: np.ndarray
            Pansharpened RGB + downsampled NIR and SWIR
        im_labels: np.ndarray
            3D image containing a boolean image for each class in the order (sand, swash, water)
        cloud_mask: np.ndarray
            2D cloud mask with True where cloud pixels are
        buffer_size: int
            size of the buffer around the sandy beach
        plot_bool: boolean
            True if plot is wanted

    Returns:    -----------
        contours_wi: list of np.arrays
            contains the (row,column) coordinates of the contour lines extracted with the Water Index
        contours_mwi: list of np.arrays
            contains the (row,column) coordinates of the contour lines extracted with the Modified Water Index

    """

    nrows = cloud_mask.shape[0]
    ncols = cloud_mask.shape[1]
    im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)

    # calculate Normalized Difference Modified Water Index (SWIR - G)
    im_mwi = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False)
    # calculate Normalized Difference Modified Water Index (NIR - G)
    im_wi = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False)
    # stack indices together
    im_ind = np.stack((im_wi, im_mwi), axis=-1)
    vec_ind = im_ind.reshape(nrows*ncols,2)

    # process labels
    vec_sand = im_labels[:,:,0].reshape(ncols*nrows)
    vec_swash = im_labels[:,:,1].reshape(ncols*nrows)
    vec_water = im_labels[:,:,2].reshape(ncols*nrows)

    # create a buffer around the sandy beach
    se = morphology.disk(buffer_size)
    im_buffer = morphology.binary_dilation(im_labels[:,:,0], se)
    vec_buffer = im_buffer.reshape(nrows*ncols)

    # select water/sand/swash pixels that are within the buffer
    int_water = vec_ind[np.logical_and(vec_buffer,vec_water),:]
    int_sand = vec_ind[np.logical_and(vec_buffer,vec_sand),:]
    int_swash = vec_ind[np.logical_and(vec_buffer,vec_swash),:]

    # threshold the sand/water intensities
    int_all = np.append(int_water,int_sand, axis=0)
    t_mwi = filters.threshold_otsu(int_all[:,0])
    t_wi = filters.threshold_otsu(int_all[:,1])

    # find contour with MS algorithm
    im_wi_buffer = np.copy(im_wi)
    im_wi_buffer[~im_buffer] = np.nan
    im_mwi_buffer = np.copy(im_mwi)
    im_mwi_buffer[~im_buffer] = np.nan
    contours_wi = measure.find_contours(im_wi_buffer, t_wi)
    contours_mwi = measure.find_contours(im_mwi, t_mwi) # WARNING (on entire image)

    # remove contour points that are nans (around clouds)

    contours = contours_wi
    contours_nonans = []
    for k in range(len(contours)):
        if np.any(np.isnan(contours[k])):
            index_nan = np.where(np.isnan(contours[k]))[0]
            contours_temp = np.delete(contours[k], index_nan, axis=0)
            if len(contours_temp) > 1:
                contours_nonans.append(contours_temp)
        else:
            contours_nonans.append(contours[k])
    contours_wi = contours_nonans

    contours = contours_mwi
    contours_nonans = []
    for k in range(len(contours)):
        if np.any(np.isnan(contours[k])):
            index_nan = np.where(np.isnan(contours[k]))[0]
            contours_temp = np.delete(contours[k], index_nan, axis=0)
            if len(contours_temp) > 1:
                contours_nonans.append(contours_temp)
        else:
            contours_nonans.append(contours[k])
    contours_mwi = contours_nonans

    if plot_bool:

        im = np.copy(im_display)
        # define colours for plot
        colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
        for k in range(0,im_labels.shape[2]):
            im[im_labels[:,:,k],0] = colours[k,0]
            im[im_labels[:,:,k],1] = colours[k,1]
            im[im_labels[:,:,k],2] = colours[k,2]

        fig = plt.figure()
        gs = gridspec.GridSpec(3, 3, height_ratios=[1, 1, 3])

        ax1 = fig.add_subplot(gs[0,:])
        vals = plt.hist(int_water[:,0], bins=100, label='water')
        plt.hist(int_sand[:,0], bins=100, alpha=0.5, label='sand')
        plt.hist(int_swash[:,0], bins=100, alpha=0.5, label='swash')
        plt.plot([t_wi, t_wi], [0, np.max(vals[0])], 'r-')
        plt.legend()
        plt.title('Water Index NIR-G')

        ax2 = fig.add_subplot(gs[1,:], sharex=ax1)
        vals = plt.hist(int_water[:,1], bins=100, label='water')
        plt.hist(int_sand[:,1], bins=100, alpha=0.5, label='sand')
        plt.hist(int_swash[:,1], bins=100, alpha=0.5, label='swash')
        plt.plot([t_mwi, t_mwi], [0, np.max(vals[0])], 'r-')
        plt.legend()
        plt.title('Modified Water Index SWIR-G')

        ax3 = fig.add_subplot(gs[2,0])
        plt.imshow(im)
        for i,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k')
        for i,contour in enumerate(contours_wi): plt.plot(contour[:, 1], contour[:, 0], linestyle='--', linewidth=1, color='w')
        plt.grid(False)
        plt.xticks([])
        plt.yticks([])

        ax4 = fig.add_subplot(gs[2,1], sharex=ax3, sharey=ax3)
        plt.imshow(im_display)
        for i,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k')
        for i,contour in enumerate(contours_wi): plt.plot(contour[:, 1], contour[:, 0], linestyle='--', linewidth=1, color='w')
        plt.grid(False)
        plt.xticks([])
        plt.yticks([])

        ax5 = fig.add_subplot(gs[2,2], sharex=ax3, sharey=ax3)
        plt.imshow(im_mwi, cmap='seismic')
        for i,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k')
        for i,contour in enumerate(contours_wi): plt.plot(contour[:, 1], contour[:, 0], linestyle='--', linewidth=1, color='w')
        plt.grid(False)
        plt.xticks([])
        plt.yticks([])

#        plt.gcf().set_size_inches(17.99,7.55)
        mng = plt.get_current_fig_manager()
        mng.window.showMaximized()
        plt.gcf().set_tight_layout(True)
        plt.draw()

    return contours_wi, contours_mwi

def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5):
    """
    Compare sds with groundtruth data from topographic surveys / argus shorelines

    KV WRL 2018

    Arguments:
    -----------
        dates_sds: list
            list of dates corresponding to each row in chain_sds
        chain_sds: np.ndarray
            array with time series of chainage for each transect (each transect is one column)
        topo_profiles: dict
            dict containing the dates and chainage of the groundtruth
        mod: 0 or 1
            0 for linear interpolation between 2 closest surveys, 1 for only nearest neighbour
        min_days: int
            minimum number of days for which the data can be compared

    Returns:    -----------
        stats: dict
            contains all the statistics of the comparison

    """

    # create 3 figures
    fig1 = plt.figure()
    gs1 = gridspec.GridSpec(chain_sds.shape[1], 1)
    fig2 = plt.figure()
    gs2 = gridspec.GridSpec(2, chain_sds.shape[1])
    fig3 = plt.figure()
    gs3 = gridspec.GridSpec(2,1)

    dates_sds_num = np.array([_.toordinal() for _ in dates_sds])
    stats = dict([])
    data_fin = dict([])

    # for each transect compare and plot the data
    for i in range(chain_sds.shape[1]):

        pfname = list(topo_profiles.keys())[i]
        stats[pfname] = dict([])
        data_fin[pfname] = dict([])

        dates_sur = topo_profiles[pfname]['dates']
        chain_sur = topo_profiles[pfname]['chainage']

        # convert to datenum
        dates_sur_num = np.array([_.toordinal() for _ in dates_sur])

        chain_sur_interp = []
        diff_days = []

        for j, satdate in enumerate(dates_sds_num):

            temp_diff = satdate - dates_sur_num

            if mod==0:
                # select measurement before and after sat image date and interpolate

                ind_before = np.where(temp_diff == temp_diff[temp_diff > 0][-1])[0]
                if ind_before == len(temp_diff)-1:
                    chain_sur_interp.append(np.nan)
                    diff_days.append(np.abs(satdate-dates_sur_num[ind_before])[0])
                    continue
                ind_after = np.where(temp_diff == temp_diff[temp_diff < 0][0])[0]
                tempx = np.zeros(2)
                tempx[0] = dates_sur_num[ind_before]
                tempx[1] = dates_sur_num[ind_after]
                tempy = np.zeros(2)
                tempy[0] = chain_sur[ind_before]
                tempy[1] = chain_sur[ind_after]
                diff_days.append(np.abs(np.max([satdate-tempx[0], satdate-tempx[1]])))
                # interpolate
                f = interpolate.interp1d(tempx, tempy)
                chain_sur_interp.append(f(satdate))

            elif mod==1:
                # select the closest measurement

                idx_closest = find_indices(np.abs(temp_diff), lambda e: e == np.min(np.abs(temp_diff)))[0]
                diff_days.append(np.abs(satdate-dates_sur_num[idx_closest]))
                if diff_days[j] > mindays:
                    chain_sur_interp.append(np.nan)
                else:
                    chain_sur_interp.append(chain_sur[idx_closest])

        chain_sur_interp = np.array(chain_sur_interp)

        # remove nan values
        idx_sur_nan = ~np.isnan(chain_sur_interp)
        idx_sat_nan = ~np.isnan(chain_sds[:,i])
        idx_nan = np.logical_and(idx_sur_nan, idx_sat_nan)

        # groundtruth and sds
        chain_sur_fin = chain_sur_interp[idx_nan]
        chain_sds_fin = chain_sds[idx_nan,i]
        dates_fin = [k for (k, v) in zip(dates_sds, idx_nan) if v]
        diff_chain = chain_sur_fin - chain_sds_fin

        # calculate statistics
        rmse = np.sqrt(np.nanmean((diff_chain)**2))
        mean = np.nanmean(diff_chain)
        std = np.nanstd(diff_chain)
        q90 = np.percentile(np.abs(diff_chain), 90)

        # store data
        stats[pfname]['rmse'] = rmse
        stats[pfname]['mean'] = mean
        stats[pfname]['std'] = std
        stats[pfname]['q90'] = q90
        stats[pfname]['diffdays'] = diff_days

        data_fin[pfname]['dates'] = dates_fin
        data_fin[pfname]['sds'] = chain_sds_fin
        data_fin[pfname]['survey'] = chain_sur_fin

        # make time-series plot
        plt.figure(fig1.number)
        ax = fig1.add_subplot(gs1[i,0])
        plt.plot(dates_sur, chain_sur, 'o-', color='C1', markersize=4, label='survey all')
        plt.plot(dates_fin, chain_sur_fin, 'o', color=[0.3, 0.3, 0.3], markersize=2, label='survey interp')
        plt.plot(dates_fin, chain_sds_fin, 'o--', color='b', markersize=4, label='SDS')
        plt.title(pfname, fontweight='bold')
        plt.xlim([dates_sds[0], dates_sds[-1]])
        plt.ylabel('chainage [m]')

        # make scatter plot
        plt.figure(fig2.number)
        ax1 = fig2.add_subplot(gs2[0,i])
        plt.axis('equal')
        plt.plot(chain_sur_fin, chain_sds_fin, 'ko', markersize=4, markerfacecolor='w', alpha=0.7)
        xmax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)])
        xmin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)])
        ymax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)])
        ymin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)])
        plt.plot([xmin, xmax], [ymin, ymax], 'r--')
        correlation = np.corrcoef(chain_sur_fin, chain_sds_fin)[0,1]
        str_corr = 'r = %.2f' % (correlation)
        plt.text(xmin, ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
        plt.xlabel('chainage survey [m]')
        plt.ylabel('chainage satellite [m]')
        plt.title(pfname, fontweight='bold')

        ax2 = fig2.add_subplot(gs2[1,i])
        binwidth = 3
        bins = np.arange(min(diff_chain), max(diff_chain) + binwidth, binwidth)
        density = plt.hist(diff_chain, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k')
        plt.xlim([-50, 50])
        plt.xlabel('error [m]')
        str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90)
        plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.5), horizontalalignment='left', fontsize=10)

    fig1.set_size_inches(19.2, 9.28)
    fig1.set_tight_layout(True)
    fig2.set_size_inches(19.2, 9.28)
    fig2.set_tight_layout(True)

    # plot all the data together
    chain_sds_all = []
    chain_sur_all = []
    for i in range(chain_sds.shape[1]):
        pfname = list(topo_profiles.keys())[i]
        chain_sds_all = np.append(chain_sds_all,data_fin[pfname]['sds'])
        chain_sur_all = np.append(chain_sur_all,data_fin[pfname]['survey'])

    diff_chain_all = chain_sur_all - chain_sds_all

    # calculate statistics
    rmse = np.sqrt(np.nanmean((diff_chain_all)**2))
    mean = np.nanmean(diff_chain_all)
    std = np.nanstd(diff_chain_all)
    q90 = np.percentile(np.abs(diff_chain_all), 90)

    stats['all'] = {'rmse':rmse,'mean':mean,'std':std,'q90':q90}

    # make plot with all datapoints (from all the transects)
    plt.figure(fig3.number)
    ax1 = fig3.add_subplot(gs3[0,0])
    plt.axis('equal')
    plt.plot(chain_sur_all, chain_sds_all, 'ko', markersize=4, markerfacecolor='w', alpha=0.7)
    xmax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)])
    xmin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)])
    ymax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)])
    ymin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)])
    plt.plot([xmin, xmax], [ymin, ymax], 'r--')
    correlation = np.corrcoef(chain_sur_all, chain_sds_all)[0,1]
    str_corr = 'r = %.2f' % (correlation)
    plt.text(xmin, ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
    plt.xlabel('chainage survey [m]')
    plt.ylabel('chainage satellite [m]')
    plt.title(pfname, fontweight='bold')

    ax2 = fig3.add_subplot(gs3[1,0])
    binwidth = 3
    bins = np.arange(min(diff_chain_all), max(diff_chain_all) + binwidth, binwidth)
    density = plt.hist(diff_chain_all, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k')
    plt.xlim([-50, 50])
    plt.xlabel('error [m]')
    str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90)
    plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.5), horizontalalignment='left', fontsize=10)
    fig3.set_size_inches(9.2, 9.28)
    fig3.set_tight_layout(True)

    return stats