You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1206 lines
45 KiB
Python

# -*- coding: utf-8 -*-
"""
Created on Thu Mar 1 11:20:35 2018
@author: z5030440
"""
"""This module contains all the functions needed for extracting satellite derived shoreline (SDS) """
# Initial settings
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib import gridspec
import pdb
import ee
# other modules
from osgeo import gdal, ogr, osr
import tempfile
from urllib.request import urlretrieve
import zipfile
import scipy.interpolate as interpolate
# image processing modules
import skimage.filters as filters
import skimage.exposure as exposure
import skimage.transform as transform
import sklearn.decomposition as decomposition
import skimage.measure as measure
import skimage.morphology as morphology
# machine learning modules
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
from sklearn.externals import joblib
# import own modules
from functions.utils import *
# Download from ee server function
def download_tif(image, polygon, bandsId, filepath):
"""downloads tif image (region and bands) from the ee server and stores it in a temp file"""
url = ee.data.makeDownloadUrl(ee.data.getDownloadId({
'image': image.serialize(),
'region': polygon,
'bands': bandsId,
'filePerBand': 'false',
'name': 'data',
}))
local_zip, headers = urlretrieve(url)
with zipfile.ZipFile(local_zip) as local_zipfile:
return local_zipfile.extract('data.tif', filepath)
def create_cloud_mask(im_qa, satname, plot_bool):
"""
Creates a cloud mask from the image containing the QA band information
KV WRL 2018
Arguments:
-----------
im_qa: np.ndarray
Image containing the QA band
satname: string
short name for the satellite (L8, L7, S2)
plot_bool: boolean
True if plot is wanted
Returns:
-----------
cloud_mask : np.ndarray of booleans
A boolean array with True where the cloud are present
"""
# convert QA bits
if satname == 'L8':
cloud_values = [2800, 2804, 2808, 2812, 6896, 6900, 6904, 6908]
elif satname == 'L7' or satname == 'L5' or satname == 'L4':
cloud_values = [752, 756, 760, 764]
elif satname == 'S2':
cloud_values = [1024, 2048] # 1024 = dense cloud, 2048 = cirrus clouds
cloud_mask = np.isin(im_qa, cloud_values)
# remove isolated cloud pixels (there are some in the swash and they cause problems)
if sum(sum(cloud_mask)) > 0:
morphology.remove_small_objects(cloud_mask, min_size=10, connectivity=1, in_place=True)
if plot_bool:
plt.figure()
plt.imshow(cloud_mask, cmap='gray')
plt.draw()
#cloud_shadow_values = [2976, 2980, 2984, 2988, 3008, 3012, 3016, 3020]
#cloud_shadow_mask = np.isin(im_qa, cloud_shadow_values)
return cloud_mask
def rescale_image_intensity(im, cloud_mask, prob_high, plot_bool):
"""
Rescales the intensity of an image (multispectral or single band) by applying
a cloud mask and clipping the prob_high upper percentile. This functions allows
to stretch the contrast of an image.
KV WRL 2018
Arguments:
-----------
im: np.ndarray
Image to rescale, can be 3D (multispectral) or 2D (single band)
cloud_mask: np.ndarray
2D cloud mask with True where cloud pixels are
prob_high: float
probability of exceedence used to calculate the upper percentile
plot_bool: boolean
True if plot is wanted
Returns:
-----------
im_adj: np.ndarray
The rescaled image
"""
prc_low = 0 # lower percentile
vec_mask = cloud_mask.reshape(im.shape[0] * im.shape[1])
if plot_bool:
plt.figure()
if len(im.shape) > 2:
vec = im.reshape(im.shape[0] * im.shape[1], im.shape[2])
vec_adj = np.ones((len(vec_mask), im.shape[2])) * np.nan
for i in range(im.shape[2]):
prc_high = np.percentile(vec[~vec_mask, i], prob_high)
vec_rescaled = exposure.rescale_intensity(vec[~vec_mask, i], in_range=(prc_low, prc_high))
vec_adj[~vec_mask,i] = vec_rescaled
if plot_bool:
plt.subplot(np.floor(im.shape[2]/2) + 1, np.floor(im.shape[2]/2), i+1)
plt.hist(vec[~vec_mask, i], bins=200, label='original')
plt.hist(vec_rescaled, bins=200, alpha=0.5, label='rescaled')
plt.legend()
plt.title('Band' + str(i+1))
plt.show()
im_adj = vec_adj.reshape(im.shape[0], im.shape[1], im.shape[2])
if plot_bool:
plt.figure()
ax1 = plt.subplot(121)
plt.imshow(im[:,:,[2,1,0]])
plt.axis('off')
plt.title('Original')
ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
plt.imshow(im_adj[:,:,[2,1,0]])
plt.axis('off')
plt.title('Rescaled')
plt.show()
else:
vec = im.reshape(im.shape[0] * im.shape[1])
vec_adj = np.ones(len(vec_mask)) * np.nan
prc_high = np.percentile(vec[~vec_mask], prob_high)
vec_rescaled = exposure.rescale_intensity(vec[~vec_mask], in_range=(prc_low, prc_high))
vec_adj[~vec_mask] = vec_rescaled
if plot_bool:
plt.hist(vec[~vec_mask], bins=200, label='original')
plt.hist(vec_rescaled, bins=200, alpha=0.5, label='rescaled')
plt.legend()
plt.title('Single band')
plt.show()
im_adj = vec_adj.reshape(im.shape[0], im.shape[1])
if plot_bool:
plt.figure()
ax1 = plt.subplot(121)
plt.imshow(im, cmap='gray')
plt.axis('off')
plt.title('Original')
ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
plt.imshow(im_adj, cmap='gray')
plt.axis('off')
plt.title('Rescaled')
plt.show()
return im_adj
def hist_match(source, template):
"""
Adjust the pixel values of a grayscale image such that its histogram
matches that of a target image
Arguments:
-----------
source: np.ndarray
Image to transform; the histogram is computed over the flattened
array
template: np.ndarray
Template image; can have different dimensions to source
Returns:
-----------
matched: np.ndarray
The transformed output image
"""
oldshape = source.shape
source = source.ravel()
template = template.ravel()
# get the set of unique pixel values and their corresponding indices and
# counts
s_values, bin_idx, s_counts = np.unique(source, return_inverse=True,
return_counts=True)
t_values, t_counts = np.unique(template, return_counts=True)
# take the cumsum of the counts and normalize by the number of pixels to
# get the empirical cumulative distribution functions for the source and
# template images (maps pixel value --> quantile)
s_quantiles = np.cumsum(s_counts).astype(np.float64)
s_quantiles /= s_quantiles[-1]
t_quantiles = np.cumsum(t_counts).astype(np.float64)
t_quantiles /= t_quantiles[-1]
# interpolate linearly to find the pixel values in the template image
# that correspond most closely to the quantiles in the source image
interp_t_values = np.interp(s_quantiles, t_quantiles, t_values)
return interp_t_values[bin_idx].reshape(oldshape)
def pansharpen(im_ms, im_pan, cloud_mask, plot_bool):
"""
Pansharpens a multispectral image (3D), using the panchromatic band (2D)
and a cloud mask
KV WRL 2018
Arguments:
-----------
im_ms: np.ndarray
Multispectral image to pansharpen (3D)
im_pan: np.ndarray
Panchromatic band (2D)
cloud_mask: np.ndarray
2D cloud mask with True where cloud pixels are
plot_bool: boolean
True if plot is wanted
Returns:
-----------
im_ms_ps: np.ndarray
Pansharpened multisoectral image (3D)
"""
# reshape image into vector and apply cloud mask
vec = im_ms.reshape(im_ms.shape[0] * im_ms.shape[1], im_ms.shape[2])
vec_mask = cloud_mask.reshape(im_ms.shape[0] * im_ms.shape[1])
vec = vec[~vec_mask, :]
# apply PCA to RGB bands
pca = decomposition.PCA()
vec_pcs = pca.fit_transform(vec)
# replace 1st PC with pan band (after matching histograms)
vec_pan = im_pan.reshape(im_pan.shape[0] * im_pan.shape[1])
vec_pan = vec_pan[~vec_mask]
# plt.figure()
# ax1 = plt.subplot(131)
# plt.imshow(im_pan, cmap='gray')
# plt.title('Pan band')
# plt.subplot(132, sharex=ax1, sharey=ax1)
# plt.imshow(vec_pcs[:,0].reshape(im_pan.shape[0],im_pan.shape[1]), cmap='gray')
# plt.title('PC1')
# plt.subplot(133, sharex=ax1, sharey=ax1)
# plt.imshow(hist_match(vec_pan, vec_pcs[:,0]).reshape(im_pan.shape[0],im_pan.shape[1]), cmap='gray')
# plt.title('Pan band histmatched')
#
# plt.figure()
# plt.hist(hist_match(vec_pan, vec_pcs[:,0]), bins=300)
# plt.hist(vec_pcs[:,0], bins=300, alpha=0.5)
# plt.hist(vec_pan, bins=300, alpha=0.5)
# plt.draw()
vec_pcs[:,0] = hist_match(vec_pan, vec_pcs[:,0])
vec_ms_ps = pca.inverse_transform(vec_pcs)
# reshape vector into image
vec_ms_ps_full = np.ones((len(vec_mask), im_ms.shape[2])) * np.nan
vec_ms_ps_full[~vec_mask,:] = vec_ms_ps
im_ms_ps = vec_ms_ps_full.reshape(im_ms.shape[0], im_ms.shape[1], im_ms.shape[2])
if plot_bool:
plt.figure()
ax1 = plt.subplot(121)
plt.imshow(rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False))
plt.axis('off')
plt.title('Original')
ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
plt.imshow(rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False))
plt.axis('off')
plt.title('Pansharpened')
plt.show()
return im_ms_ps
def nd_index(im1, im2, cloud_mask, plot_bool):
"""
Computes normalised difference index on 2 images (2D), given a cloud mask (2D)
KV WRL 2018
Arguments:
-----------
im1, im2: np.ndarray
Images (2D) with which to calculate the ND index
cloud_mask: np.ndarray
2D cloud mask with True where cloud pixels are
plot_bool: boolean
True if plot is wanted
Returns: -----------
im_nd: np.ndarray
Image (2D) containing the ND index
"""
vec_mask = cloud_mask.reshape(im1.shape[0] * im1.shape[1])
vec_nd = np.ones(len(vec_mask)) * np.nan
vec1 = im1.reshape(im1.shape[0] * im1.shape[1])
vec2 = im2.reshape(im2.shape[0] * im2.shape[1])
temp = np.divide(vec1[~vec_mask] - vec2[~vec_mask],
vec1[~vec_mask] + vec2[~vec_mask])
vec_nd[~vec_mask] = temp
im_nd = vec_nd.reshape(im1.shape[0], im1.shape[1])
if plot_bool:
plt.figure()
plt.imshow(im_nd, cmap='seismic')
plt.colorbar()
plt.title('Normalised index')
plt.show()
return im_nd
def find_wl_contours(im_ndwi, cloud_mask, plot_bool):
"""
Finds the water line by thresholding the Normalized Difference Water Index and applying the Marching
Squares Algorithm
KV WRL 2018
Arguments:
-----------
im_ndwi: np.ndarray
Image (2D) with the NDWI (water index)
cloud_mask: np.ndarray
2D cloud mask with True where cloud pixels are
plot_bool: boolean
True if plot is wanted
Returns: -----------
contours_wl: list of np.arrays
contains the (row,column) coordinates of the contour lines
"""
# reshape image to vector
vec_ndwi = im_ndwi.reshape(im_ndwi.shape[0] * im_ndwi.shape[1])
vec_mask = cloud_mask.reshape(cloud_mask.shape[0] * cloud_mask.shape[1])
vec = vec_ndwi[~vec_mask]
# apply otsu's threshold
t_otsu = filters.threshold_otsu(vec)
# use Marching Squares algorithm to detect contours on ndwi image
contours = measure.find_contours(im_ndwi, t_otsu)
# remove contour points that are nans
contours_nonans = []
for k in range(len(contours)):
if np.any(np.isnan(contours[k])):
index_nan = np.where(np.isnan(contours[k]))[0]
contours_temp = np.delete(contours[k], index_nan, axis=0)
if len(contours_temp) > 1:
contours_nonans.append(contours_temp)
else:
contours_nonans.append(contours[k])
contours = contours_nonans
if plot_bool:
# plot otsu's histogram segmentation
plt.figure()
vals = plt.hist(vec, bins=200)
plt.plot([t_otsu, t_otsu],[0, np.max(vals[0])], 'r-', label='Otsu threshold')
plt.legend()
plt.show()
# plot the water line contours on top of water index
plt.figure()
plt.imshow(im_ndwi, cmap='seismic')
plt.colorbar()
for i,contour in enumerate(contours): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k')
plt.axis('image')
plt.title('Detected water lines')
plt.show()
return contours
def convert_pix2world(points, crs_vec):
"""
Converts pixel coordinates (row,columns) to world projected coordinates
performing an affine transformation
KV WRL 2018
Arguments:
-----------
points: np.ndarray or list of np.ndarray
array with 2 columns (rows first and columns second)
crs_vec: np.ndarray
vector of 6 elements [Xtr, Xscale, Xshear, Ytr, Yshear, Yscale]
Returns: -----------
points_converted: np.ndarray or list of np.ndarray
converted coordinates, first columns with X and second column with Y
"""
# make affine transformation matrix
aff_mat = np.array([[crs_vec[1], crs_vec[2], crs_vec[0]],
[crs_vec[4], crs_vec[5], crs_vec[3]],
[0, 0, 1]])
# create affine transformation
tform = transform.AffineTransform(aff_mat)
if type(points) is list:
points_converted = []
# iterate over the list
for i, arr in enumerate(points):
tmp = arr[:,[1,0]]
points_converted.append(tform(tmp))
elif type(points) is np.ndarray:
tmp = points[:,[1,0]]
points_converted = tform(tmp)
else:
print('invalid input type')
raise
return points_converted
def convert_world2pix(points, crs_vec):
"""
Converts world projected coordinates (X,Y) to image coordinates (row,column)
performing an affine transformation
KV WRL 2018
Arguments:
-----------
points: np.ndarray or list of np.ndarray
array with 2 columns (rows first and columns second)
crs_vec: np.ndarray
vector of 6 elements [Xtr, Xscale, Xshear, Ytr, Yshear, Yscale]
Returns: -----------
points_converted: np.ndarray or list of np.ndarray
converted coordinates, first columns with row and second column with column
"""
# make affine transformation matrix
aff_mat = np.array([[crs_vec[1], crs_vec[2], crs_vec[0]],
[crs_vec[4], crs_vec[5], crs_vec[3]],
[0, 0, 1]])
# create affine transformation
tform = transform.AffineTransform(aff_mat)
if type(points) is list:
points_converted = []
# iterate over the list
for i, arr in enumerate(points):
points_converted.append(tform.inverse(points))
elif type(points) is np.ndarray:
points_converted = tform.inverse(points)
else:
print('invalid input type')
raise
return points_converted
def convert_epsg(points, epsg_in, epsg_out):
"""
Converts from one spatial reference to another using the epsg codes
KV WRL 2018
Arguments:
-----------
points: np.ndarray or list of np.ndarray
array with 2 columns (rows first and columns second)
epsg_in: int
epsg code of the spatial reference in which the input is
epsg_out: int
epsg code of the spatial reference in which the output will be
Returns: -----------
points_converted: np.ndarray or list of np.ndarray
converted coordinates
"""
# define input and output spatial references
inSpatialRef = osr.SpatialReference()
inSpatialRef.ImportFromEPSG(epsg_in)
outSpatialRef = osr.SpatialReference()
outSpatialRef.ImportFromEPSG(epsg_out)
# create a coordinates transform
coordTransform = osr.CoordinateTransformation(inSpatialRef, outSpatialRef)
# transform points
if type(points) is list:
points_converted = []
# iterate over the list
for i, arr in enumerate(points):
points_converted.append(np.array(coordTransform.TransformPoints(arr)))
elif type(points) is np.ndarray:
points_converted = np.array(coordTransform.TransformPoints(points))
else:
print('invalid input type')
raise
return points_converted
def classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, buffer_size, min_beach_size, plot_bool):
"""
Classifies sand pixels using an unsupervised algorithm (Kmeans)
Set buffer size to False if you want to classify the entire image,
otherwise buffer size defines the buffer around the shoreline in which
pixels are considered for classification.
This classification is not robust and is only used to train a supervised algorithm
KV WRL 2018
Arguments:
-----------
im_ms_ps: np.ndarray
Pansharpened RGB + downsampled NIR and SWIR
im_pan:
Panchromatic band
cloud_mask: np.ndarray
2D cloud mask with True where cloud pixels are
wl_pix: list of np.ndarray
list of arrays containig the pixel coordinates of the water line
buffer_size: int or False
radius of the disk used to create a buffer around the water line
when False, the entire image is considered for kmeans
min_beach_size: int
minimum number of connected pixels belonging to a single beach
plot_bool: boolean
True if plot is wanted
Returns: -----------
im_sand: np.ndarray
2D binary image containing True where sand pixels are located
"""
# reshape the 2D images into vectors
vec_ms_ps = im_ms_ps.reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1], im_ms_ps.shape[2])
vec_pan = im_pan.reshape(im_pan.shape[0]*im_pan.shape[1])
vec_mask = cloud_mask.reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1])
# add B,G,R,NIR and pan bands to the vector of features
vec_features = np.zeros((vec_ms_ps.shape[0], 5))
vec_features[:,[0,1,2,3]] = vec_ms_ps[:,[0,1,2,3]]
vec_features[:,4] = vec_pan
if buffer_size:
# create binary image with ones where the detected water lines is
im_buffer = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1]))
for i, contour in enumerate(wl_pix):
indices = [(int(_[0]), int(_[1])) for _ in list(np.round(contour))]
for j, idx in enumerate(indices):
im_buffer[idx] = 1
# perform a dilation on the binary image
se = morphology.disk(buffer_size)
im_buffer = morphology.binary_dilation(im_buffer, se)
vec_buffer = (im_buffer == 1).reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1])
else:
vec_buffer = np.ones((vec_pan.shape[0]))
# add cloud mask to buffer
vec_buffer= np.logical_and(vec_buffer, ~vec_mask)
# perform kmeans (6 clusters)
kmeans = KMeans(n_clusters=6, random_state=0).fit(vec_features[vec_buffer,:])
labels = np.ones((len(vec_mask))) * np.nan
labels[vec_buffer] = kmeans.labels_
im_labels = labels.reshape(im_ms_ps.shape[0], im_ms_ps.shape[1])
# find the class with maximum reflection in the B,G,R,Pan
im_sand = im_labels == np.argmax(np.mean(kmeans.cluster_centers_[:,[0,1,2,4]], axis=1))
im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2)
im_sand = morphology.binary_erosion(im_sand, morphology.disk(1))
# im_sand = morphology.binary_dilation(im_sand, morphology.disk(1))
if plot_bool:
im = np.copy(rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False))
im[im_sand,0] = 0
im[im_sand,1] = 0
im[im_sand,2] = 1
plt.figure()
plt.imshow(im)
plt.axis('image')
plt.title('Sand classification')
plt.show()
return im_sand
def classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size, plot_bool):
"""
Classifies every pixel in the image in one of 4 classes:
- sand --> label = 1
- whitewater (breaking waves and swash) --> label = 2
- water --> label = 3
- other (vegetation, buildings, rocks...) --> label = 0
The classifier is a Neural Network, trained with 7000 pixels for the class SAND and 1500 pixels for
each of the other classes. This is because the class of interest for my application is SAND and I
wanted to minimize the classification error for that class
KV WRL 2018
Arguments:
-----------
im_ms_ps: np.ndarray
Pansharpened RGB + downsampled NIR and SWIR
im_pan:
Panchromatic band
cloud_mask: np.ndarray
2D cloud mask with True where cloud pixels are
plot_bool: boolean
True if plot is wanted
Returns: -----------
im_classif: np.ndarray
2D image containing labels
im_labels: np.ndarray of booleans
3D image containing a boolean image for each class (im_classif == label)
"""
# load classifier
clf = joblib.load('functions/NeuralNet_classif.pkl')
# calculate features
n_features = 10
im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features))
im_features[:,:,[0,1,2,3,4]] = im_ms_ps
im_features[:,:,5] = im_pan
im_features[:,:,6] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False) # (NIR-G)
im_features[:,:,7] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask, False) # ND(NIR-R)
im_features[:,:,8] = nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask, False) # ND(B-R)
im_features[:,:,9] = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) # ND(SWIR-G)
# remove NaNs and clouds
vec_features = im_features.reshape((im_ms_ps.shape[0] * im_ms_ps.shape[1], n_features))
vec_cloud = cloud_mask.reshape(cloud_mask.shape[0]*cloud_mask.shape[1])
vec_nan = np.any(np.isnan(vec_features), axis=1)
vec_mask = np.logical_or(vec_cloud, vec_nan)
vec_features = vec_features[~vec_mask, :]
# predict with NN classifier
labels = clf.predict(vec_features)
# recompose image
vec_classif = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1]))
vec_classif[~vec_mask] = labels
im_classif = vec_classif.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1]))
# labels
im_sand = im_classif == 1
im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2)
im_swash = im_classif == 2
im_water = im_classif == 3
im_labels = np.stack((im_sand,im_swash,im_water), axis=-1)
# only select the patches that are beaches
# try:
# labels_sand = measure.label(im_sand)
# values = np.unique(labels_sand)
# se = morphology.disk(5)
# im_sand_new = np.zeros((im_ms_ps.shape[0],im_ms_ps.shape[1])).astype('bool')
# counter = 0
# for j in range(1,len(values)):
# patch_sand = labels_sand == values[j]
# im_buffer = morphology.binary_dilation(patch_sand, se)
# sum_inter = sum(sum(np.logical_and(im_buffer,im_swash)))
# if sum_inter >= 20:
# im_sand_new = np.logical_or(im_sand_new, patch_sand)
# counter = counter + 1
# if counter >= 1:
# im_labels[:,:,0] = im_sand_new
# except:
# print('nothing')
if plot_bool:
# display on top of pansharpened RGB
im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
im = np.copy(im_display)
# define colours for plot
colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
for k in range(0,im_labels.shape[2]):
im[im_labels[:,:,k],0] = colours[k,0]
im[im_labels[:,:,k],1] = colours[k,1]
im[im_labels[:,:,k],2] = colours[k,2]
plt.figure()
ax1 = plt.subplot(121)
plt.imshow(im_display)
plt.axis('off')
plt.title('Image')
ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
plt.imshow(im)
plt.axis('off')
plt.title('NN classifier')
mng = plt.get_current_fig_manager()
mng.window.showMaximized()
plt.tight_layout()
plt.draw()
return im_classif, im_labels
def classify_image_NN_nopan(im_ms_ps, cloud_mask, min_beach_size, plot_bool):
"""
Classifies every pixel in the image in one of 4 classes:
- sand --> label = 1
- whitewater (breaking waves and swash) --> label = 2
- water --> label = 3
- other (vegetation, buildings, rocks...) --> label = 0
The classifier is a Neural Network, trained with 7000 pixels for the class SAND and 1500 pixels for
each of the other classes. This is because the class of interest for my application is SAND and I
wanted to minimize the classification error for that class
KV WRL 2018
Arguments:
-----------
im_ms_ps: np.ndarray
Pansharpened RGB + downsampled NIR and SWIR
im_pan:
Panchromatic band
cloud_mask: np.ndarray
2D cloud mask with True where cloud pixels are
plot_bool: boolean
True if plot is wanted
Returns: -----------
im_classif: np.ndarray
2D image containing labels
im_labels: np.ndarray of booleans
3D image containing a boolean image for each class (im_classif == label)
"""
# load classifier
clf = joblib.load('functions/NeuralNet_classif_nopan.pkl')
# calculate features
n_features = 9
im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features))
im_features[:,:,[0,1,2,3,4]] = im_ms_ps
im_features[:,:,5] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False) # (NIR-G)
im_features[:,:,6] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask, False) # ND(NIR-R)
im_features[:,:,7] = nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask, False) # ND(B-R)
im_features[:,:,8] = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) # ND(SWIR-G)
# remove NaNs and clouds
vec_features = im_features.reshape((im_ms_ps.shape[0] * im_ms_ps.shape[1], n_features))
vec_cloud = cloud_mask.reshape(cloud_mask.shape[0]*cloud_mask.shape[1])
vec_nan = np.any(np.isnan(vec_features), axis=1)
vec_mask = np.logical_or(vec_cloud, vec_nan)
vec_features = vec_features[~vec_mask, :]
# predict with NN classifier
labels = clf.predict(vec_features)
# recompose image
vec_classif = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1]))
vec_classif[~vec_mask] = labels
im_classif = vec_classif.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1]))
# labels
im_sand = im_classif == 1
im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2)
im_swash = im_classif == 2
im_water = im_classif == 3
im_labels = np.stack((im_sand,im_swash,im_water), axis=-1)
# only select the patches that are beaches
# try:
# labels_sand = measure.label(im_sand)
# values = np.unique(labels_sand)
# se = morphology.disk(5)
# im_sand_new = np.zeros((im_ms_ps.shape[0],im_ms_ps.shape[1])).astype('bool')
# counter = 0
# for j in range(1,len(values)):
# patch_sand = labels_sand == values[j]
# im_buffer = morphology.binary_dilation(patch_sand, se)
# sum_inter = sum(sum(np.logical_and(im_buffer,im_swash)))
# if sum_inter >= 20:
# im_sand_new = np.logical_or(im_sand_new, patch_sand)
# counter = counter + 1
# if counter >= 1:
# im_labels[:,:,0] = im_sand_new
# except:
# print('nothing')
if plot_bool:
# display on top of pansharpened RGB
im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
im = np.copy(im_display)
# define colours for plot
colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
for k in range(0,im_labels.shape[2]):
im[im_labels[:,:,k],0] = colours[k,0]
im[im_labels[:,:,k],1] = colours[k,1]
im[im_labels[:,:,k],2] = colours[k,2]
plt.figure()
ax1 = plt.subplot(121)
plt.imshow(im_display)
plt.axis('off')
plt.title('Image')
ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
plt.imshow(im)
plt.axis('off')
plt.title('NN classifier')
mng = plt.get_current_fig_manager()
mng.window.showMaximized()
plt.tight_layout()
plt.draw()
return im_classif, im_labels
def find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size, plot_bool):
"""
New method for extracting shorelines (more robust)
KV WRL 2018
Arguments:
-----------
im_ms_ps: np.ndarray
Pansharpened RGB + downsampled NIR and SWIR
im_labels: np.ndarray
3D image containing a boolean image for each class in the order (sand, swash, water)
cloud_mask: np.ndarray
2D cloud mask with True where cloud pixels are
buffer_size: int
size of the buffer around the sandy beach
plot_bool: boolean
True if plot is wanted
Returns: -----------
contours_wi: list of np.arrays
contains the (row,column) coordinates of the contour lines extracted with the Water Index
contours_mwi: list of np.arrays
contains the (row,column) coordinates of the contour lines extracted with the Modified Water Index
"""
nrows = cloud_mask.shape[0]
ncols = cloud_mask.shape[1]
im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
# calculate Normalized Difference Modified Water Index (SWIR - G)
im_mwi = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False)
# calculate Normalized Difference Modified Water Index (NIR - G)
im_wi = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False)
# stack indices together
im_ind = np.stack((im_wi, im_mwi), axis=-1)
vec_ind = im_ind.reshape(nrows*ncols,2)
# process labels
vec_sand = im_labels[:,:,0].reshape(ncols*nrows)
vec_swash = im_labels[:,:,1].reshape(ncols*nrows)
vec_water = im_labels[:,:,2].reshape(ncols*nrows)
# create a buffer around the sandy beach
se = morphology.disk(buffer_size)
im_buffer = morphology.binary_dilation(im_labels[:,:,0], se)
vec_buffer = im_buffer.reshape(nrows*ncols)
# select water/sand/swash pixels that are within the buffer
int_water = vec_ind[np.logical_and(vec_buffer,vec_water),:]
int_sand = vec_ind[np.logical_and(vec_buffer,vec_sand),:]
int_swash = vec_ind[np.logical_and(vec_buffer,vec_swash),:]
# threshold the sand/water intensities
int_all = np.append(int_water,int_sand, axis=0)
t_mwi = filters.threshold_otsu(int_all[:,0])
t_wi = filters.threshold_otsu(int_all[:,1])
# find contour with MS algorithm
im_wi_buffer = np.copy(im_wi)
im_wi_buffer[~im_buffer] = np.nan
im_mwi_buffer = np.copy(im_mwi)
im_mwi_buffer[~im_buffer] = np.nan
contours_wi = measure.find_contours(im_wi_buffer, t_wi)
contours_mwi = measure.find_contours(im_mwi, t_mwi) # WARNING (on entire image)
# remove contour points that are nans (around clouds)
contours = contours_wi
contours_nonans = []
for k in range(len(contours)):
if np.any(np.isnan(contours[k])):
index_nan = np.where(np.isnan(contours[k]))[0]
contours_temp = np.delete(contours[k], index_nan, axis=0)
if len(contours_temp) > 1:
contours_nonans.append(contours_temp)
else:
contours_nonans.append(contours[k])
contours_wi = contours_nonans
contours = contours_mwi
contours_nonans = []
for k in range(len(contours)):
if np.any(np.isnan(contours[k])):
index_nan = np.where(np.isnan(contours[k]))[0]
contours_temp = np.delete(contours[k], index_nan, axis=0)
if len(contours_temp) > 1:
contours_nonans.append(contours_temp)
else:
contours_nonans.append(contours[k])
contours_mwi = contours_nonans
if plot_bool:
im = np.copy(im_display)
# define colours for plot
colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
for k in range(0,im_labels.shape[2]):
im[im_labels[:,:,k],0] = colours[k,0]
im[im_labels[:,:,k],1] = colours[k,1]
im[im_labels[:,:,k],2] = colours[k,2]
fig = plt.figure()
gs = gridspec.GridSpec(3, 3, height_ratios=[1, 1, 3])
ax1 = fig.add_subplot(gs[0,:])
vals = plt.hist(int_water[:,0], bins=100, label='water')
plt.hist(int_sand[:,0], bins=100, alpha=0.5, label='sand')
plt.hist(int_swash[:,0], bins=100, alpha=0.5, label='swash')
plt.plot([t_wi, t_wi], [0, np.max(vals[0])], 'r-')
plt.legend()
plt.title('Water Index NIR-G')
ax2 = fig.add_subplot(gs[1,:], sharex=ax1)
vals = plt.hist(int_water[:,1], bins=100, label='water')
plt.hist(int_sand[:,1], bins=100, alpha=0.5, label='sand')
plt.hist(int_swash[:,1], bins=100, alpha=0.5, label='swash')
plt.plot([t_mwi, t_mwi], [0, np.max(vals[0])], 'r-')
plt.legend()
plt.title('Modified Water Index SWIR-G')
ax3 = fig.add_subplot(gs[2,0])
plt.imshow(im)
for i,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k')
for i,contour in enumerate(contours_wi): plt.plot(contour[:, 1], contour[:, 0], linestyle='--', linewidth=1, color='w')
plt.grid(False)
plt.xticks([])
plt.yticks([])
ax4 = fig.add_subplot(gs[2,1], sharex=ax3, sharey=ax3)
plt.imshow(im_display)
for i,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k')
for i,contour in enumerate(contours_wi): plt.plot(contour[:, 1], contour[:, 0], linestyle='--', linewidth=1, color='w')
plt.grid(False)
plt.xticks([])
plt.yticks([])
ax5 = fig.add_subplot(gs[2,2], sharex=ax3, sharey=ax3)
plt.imshow(im_mwi, cmap='seismic')
for i,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k')
for i,contour in enumerate(contours_wi): plt.plot(contour[:, 1], contour[:, 0], linestyle='--', linewidth=1, color='w')
plt.grid(False)
plt.xticks([])
plt.yticks([])
# plt.gcf().set_size_inches(17.99,7.55)
mng = plt.get_current_fig_manager()
mng.window.showMaximized()
plt.gcf().set_tight_layout(True)
plt.draw()
return contours_wi, contours_mwi
def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5):
"""
Compare sds with groundtruth data from topographic surveys / argus shorelines
KV WRL 2018
Arguments:
-----------
dates_sds: list
list of dates corresponding to each row in chain_sds
chain_sds: np.ndarray
array with time series of chainage for each transect (each transect is one column)
topo_profiles: dict
dict containing the dates and chainage of the groundtruth
mod: 0 or 1
0 for linear interpolation between 2 closest surveys, 1 for only nearest neighbour
min_days: int
minimum number of days for which the data can be compared
Returns: -----------
stats: dict
contains all the statistics of the comparison
"""
# create 3 figures
fig1 = plt.figure()
gs1 = gridspec.GridSpec(chain_sds.shape[1], 1)
fig2 = plt.figure()
gs2 = gridspec.GridSpec(2, chain_sds.shape[1])
fig3 = plt.figure()
gs3 = gridspec.GridSpec(2,1)
dates_sds_num = np.array([_.toordinal() for _ in dates_sds])
stats = dict([])
data_fin = dict([])
# for each transect compare and plot the data
for i in range(chain_sds.shape[1]):
pfname = list(topo_profiles.keys())[i]
stats[pfname] = dict([])
data_fin[pfname] = dict([])
dates_sur = topo_profiles[pfname]['dates']
chain_sur = topo_profiles[pfname]['chainage']
# convert to datenum
dates_sur_num = np.array([_.toordinal() for _ in dates_sur])
chain_sur_interp = []
diff_days = []
for j, satdate in enumerate(dates_sds_num):
temp_diff = satdate - dates_sur_num
if mod==0:
# select measurement before and after sat image date and interpolate
ind_before = np.where(temp_diff == temp_diff[temp_diff > 0][-1])[0]
if ind_before == len(temp_diff)-1:
chain_sur_interp.append(np.nan)
diff_days.append(np.abs(satdate-dates_sur_num[ind_before])[0])
continue
ind_after = np.where(temp_diff == temp_diff[temp_diff < 0][0])[0]
tempx = np.zeros(2)
tempx[0] = dates_sur_num[ind_before]
tempx[1] = dates_sur_num[ind_after]
tempy = np.zeros(2)
tempy[0] = chain_sur[ind_before]
tempy[1] = chain_sur[ind_after]
diff_days.append(np.abs(np.max([satdate-tempx[0], satdate-tempx[1]])))
# interpolate
f = interpolate.interp1d(tempx, tempy)
chain_sur_interp.append(f(satdate))
elif mod==1:
# select the closest measurement
idx_closest = find_indices(np.abs(temp_diff), lambda e: e == np.min(np.abs(temp_diff)))[0]
diff_days.append(np.abs(satdate-dates_sur_num[idx_closest]))
if diff_days[j] > mindays:
chain_sur_interp.append(np.nan)
else:
chain_sur_interp.append(chain_sur[idx_closest])
chain_sur_interp = np.array(chain_sur_interp)
# remove nan values
idx_sur_nan = ~np.isnan(chain_sur_interp)
idx_sat_nan = ~np.isnan(chain_sds[:,i])
idx_nan = np.logical_and(idx_sur_nan, idx_sat_nan)
# groundtruth and sds
chain_sur_fin = chain_sur_interp[idx_nan]
chain_sds_fin = chain_sds[idx_nan,i]
dates_fin = [k for (k, v) in zip(dates_sds, idx_nan) if v]
diff_chain = chain_sur_fin - chain_sds_fin
# calculate statistics
rmse = np.sqrt(np.nanmean((diff_chain)**2))
mean = np.nanmean(diff_chain)
std = np.nanstd(diff_chain)
q90 = np.percentile(np.abs(diff_chain), 90)
# store data
stats[pfname]['rmse'] = rmse
stats[pfname]['mean'] = mean
stats[pfname]['std'] = std
stats[pfname]['q90'] = q90
stats[pfname]['diffdays'] = diff_days
data_fin[pfname]['dates'] = dates_fin
data_fin[pfname]['sds'] = chain_sds_fin
data_fin[pfname]['survey'] = chain_sur_fin
# make time-series plot
plt.figure(fig1.number)
ax = fig1.add_subplot(gs1[i,0])
plt.plot(dates_sur, chain_sur, 'o-', color='C1', markersize=4, label='survey all')
plt.plot(dates_fin, chain_sur_fin, 'o', color=[0.3, 0.3, 0.3], markersize=2, label='survey interp')
plt.plot(dates_fin, chain_sds_fin, 'o--', color='b', markersize=4, label='SDS')
plt.title(pfname, fontweight='bold')
plt.xlim([dates_sds[0], dates_sds[-1]])
plt.ylabel('chainage [m]')
# make scatter plot
plt.figure(fig2.number)
ax1 = fig2.add_subplot(gs2[0,i])
plt.axis('equal')
plt.plot(chain_sur_fin, chain_sds_fin, 'ko', markersize=4, markerfacecolor='w', alpha=0.7)
xmax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)])
xmin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)])
ymax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)])
ymin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)])
plt.plot([xmin, xmax], [ymin, ymax], 'r--')
correlation = np.corrcoef(chain_sur_fin, chain_sds_fin)[0,1]
str_corr = 'r = %.2f' % (correlation)
plt.text(xmin, ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
plt.xlabel('chainage survey [m]')
plt.ylabel('chainage satellite [m]')
plt.title(pfname, fontweight='bold')
ax2 = fig2.add_subplot(gs2[1,i])
binwidth = 3
bins = np.arange(min(diff_chain), max(diff_chain) + binwidth, binwidth)
density = plt.hist(diff_chain, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k')
plt.xlim([-50, 50])
plt.xlabel('error [m]')
str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90)
plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.5), horizontalalignment='left', fontsize=10)
fig1.set_size_inches(19.2, 9.28)
fig1.set_tight_layout(True)
fig2.set_size_inches(19.2, 9.28)
fig2.set_tight_layout(True)
# plot all the data together
chain_sds_all = []
chain_sur_all = []
for i in range(chain_sds.shape[1]):
pfname = list(topo_profiles.keys())[i]
chain_sds_all = np.append(chain_sds_all,data_fin[pfname]['sds'])
chain_sur_all = np.append(chain_sur_all,data_fin[pfname]['survey'])
diff_chain_all = chain_sur_all - chain_sds_all
# calculate statistics
rmse = np.sqrt(np.nanmean((diff_chain_all)**2))
mean = np.nanmean(diff_chain_all)
std = np.nanstd(diff_chain_all)
q90 = np.percentile(np.abs(diff_chain_all), 90)
stats['all'] = {'rmse':rmse,'mean':mean,'std':std,'q90':q90}
# make plot with all datapoints (from all the transects)
plt.figure(fig3.number)
ax1 = fig3.add_subplot(gs3[0,0])
plt.axis('equal')
plt.plot(chain_sur_all, chain_sds_all, 'ko', markersize=4, markerfacecolor='w', alpha=0.7)
xmax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)])
xmin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)])
ymax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)])
ymin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)])
plt.plot([xmin, xmax], [ymin, ymax], 'r--')
correlation = np.corrcoef(chain_sur_all, chain_sds_all)[0,1]
str_corr = 'r = %.2f' % (correlation)
plt.text(xmin, ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
plt.xlabel('chainage survey [m]')
plt.ylabel('chainage satellite [m]')
plt.title(pfname, fontweight='bold')
ax2 = fig3.add_subplot(gs3[1,0])
binwidth = 3
bins = np.arange(min(diff_chain_all), max(diff_chain_all) + binwidth, binwidth)
density = plt.hist(diff_chain_all, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k')
plt.xlim([-50, 50])
plt.xlabel('error [m]')
str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90)
plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.5), horizontalalignment='left', fontsize=10)
fig3.set_size_inches(9.2, 9.28)
fig3.set_tight_layout(True)
return stats