You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
geetools_VH/extract_shorelines_test.py

322 lines
11 KiB
Python

# -*- coding: utf-8 -*-
#==========================================================#
# Extract shorelines from Landsat images
#==========================================================#
# Initial settings
import os
import numpy as np
import matplotlib.pyplot as plt
import ee
import pdb
# other modules
from osgeo import gdal, ogr, osr
import pickle
import matplotlib.cm as cm
from pylab import ginput
# image processing modules
import skimage.filters as filters
import skimage.exposure as exposure
import skimage.transform as transform
import sklearn.decomposition as decomposition
import skimage.measure as measure
import skimage.morphology as morphology
# machine learning modules
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.externals import joblib
# import own modules
import functions.utils as utils
import functions.sds as sds
# some settings
np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
plt.rcParams['axes.grid'] = True
plt.rcParams['figure.max_open_warning'] = 100
ee.Initialize()
# parameters
cloud_thresh = 0.3 # threshold for cloud cover
plot_bool = False # if you want the plots
min_contour_points = 100# minimum number of points contained in each water line
output_epsg = 28356 # GDA94 / MGA Zone 56
buffer_size = 10 # radius (in pixels) of disk for buffer (pixel classification)
min_beach_size = 30 # number of pixels in a beach (pixel classification)
# load metadata (timestamps and epsg code) for the collection
satname = 'L8'
sitename = 'NARRA'
#sitename = 'OLDBAR'
# Load metadata
filepath = os.path.join(os.getcwd(), 'data', satname, sitename)
with open(os.path.join(filepath, sitename + '_timestamps' + '.pkl'), 'rb') as f:
timestamps = pickle.load(f)
with open(os.path.join(filepath, sitename + '_accuracy_georef' + '.pkl'), 'rb') as f:
acc_georef = pickle.load(f)
with open(os.path.join(filepath, sitename + '_epsgcode' + '.pkl'), 'rb') as f:
input_epsg = pickle.load(f)
with open(os.path.join(filepath, sitename + '_refpoints' + '.pkl'), 'rb') as f:
refpoints = pickle.load(f)
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
# path to images
file_path_pan = os.path.join(os.getcwd(), 'data', satname, sitename, 'pan')
file_path_ms = os.path.join(os.getcwd(), 'data', satname, sitename, 'ms')
file_names_pan = os.listdir(file_path_pan)
file_names_ms = os.listdir(file_path_ms)
N = len(file_names_pan)
# initialise some variables
cloud_cover_ts = []
date_acquired_ts = []
acc_georef_ts = []
idx_skipped = []
idx_nocloud = []
t = []
shorelines = []
#%%
for i in [20]:#range(N):
# read pan image
fn_pan = os.path.join(file_path_pan, file_names_pan[i])
data = gdal.Open(fn_pan, gdal.GA_ReadOnly)
georef = np.array(data.GetGeoTransform())
bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]
im_pan = np.stack(bands, 2)[:,:,0]
nrows = im_pan.shape[0]
ncols = im_pan.shape[1]
# read ms image
fn_ms = os.path.join(file_path_ms, file_names_ms[i])
data = gdal.Open(fn_ms, gdal.GA_ReadOnly)
bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]
im_ms = np.stack(bands, 2)
# cloud mask
im_qa = im_ms[:,:,5]
cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool)
cloud_mask = transform.resize(cloud_mask, (im_pan.shape[0], im_pan.shape[1]),
order=0, preserve_range=True,
mode='constant').astype('bool_')
# resize the image using bilinear interpolation (order 1)
im_ms = transform.resize(im_ms,(im_pan.shape[0], im_pan.shape[1]),
order=1, preserve_range=True, mode='constant')
# check if -inf or nan values and add to cloud mask
im_inf = np.isin(im_ms[:,:,0], -np.inf)
im_nan = np.isnan(im_ms[:,:,0])
cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
# calculate cloud cover and skip image if too high
cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
if cloud_cover > cloud_thresh:
print('skip ' + str(i) + ' - cloudy (' + str(cloud_cover) + ')')
idx_skipped.append(i)
continue
idx_nocloud.append(i)
# check if image for that date already exists and choose the best in terms of cloud cover and georeferencing
if file_names_pan[i][len(satname)+1+len(sitename)+1:len(satname)+1+len(sitename)+1+10] in date_acquired_ts:
# find the index of the image that is repeated
idx_samedate = utils.find_indices(date_acquired_ts, lambda e : e == file_names_pan[i][9:19])
idx_samedate = idx_samedate[0]
print('cloud cover ' + str(cloud_cover) + ' - ' + str(cloud_cover_ts[idx_samedate]))
print('acc georef ' + str(acc_georef_sorted[i]) + ' - ' + str(acc_georef_ts[idx_samedate]))
# keep image with less cloud cover or best georeferencing accuracy
if cloud_cover < cloud_cover_ts[idx_samedate] - 0.01:
skip = False
elif acc_georef_sorted[i] < acc_georef_ts[idx_samedate]:
skip = False
else:
skip = True
if skip:
print('skip ' + str(i) + ' - repeated')
idx_skipped.append(i)
continue
else:
del shorelines[idx_samedate]
del t[idx_samedate]
del cloud_cover_ts[idx_samedate]
del date_acquired_ts[idx_samedate]
del acc_georef_ts[idx_samedate]
print('keep ' + str(i) + ' - deleted ' + str(idx_samedate))
# pansharpen rgb image
im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, plot_bool)
# rescale pansharpened RGB for visualisation
im_display = sds.rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 100, False)
# add down-sized bands for NIR and SWIR (since pansharpening is not possible)
im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2)
# classify image in 4 classes (sand, whitewater, water, other) with NN classifier
im_classif, im_labels = sds.classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size, True)
t.append(timestamps_sorted[i])
cloud_cover_ts.append(cloud_cover)
acc_georef_ts.append(acc_georef_sorted[i])
date_acquired_ts.append(file_names_pan[i][9:19])
# labels
im_sand = im_classif == 1
im_swash = im_classif == 2
im_water = im_classif == 3
vec_sand = im_sand.reshape(ncols*nrows)
vec_water = im_water.reshape(ncols*nrows)
vec_swash = im_swash.reshape(ncols*nrows)
# calculate indices and stack into a vector
im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, plot_bool)
im_ndmwi = sds.nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, plot_bool)
im_nir = im_ms_ps[:,:,3]
im_swir = im_ms_ps[:,:,4]
im_ind = np.stack((im_ndwi, im_ndmwi), axis=-1)
vec_ind = im_ind.reshape(nrows*ncols,2)
# remove noise and only keep the sand belonging to large beaches
morphology.remove_small_objects(im_sand, min_size=50, connectivity=2, in_place=True)
# create a buffer around beach
buffer_size = 7
se = morphology.disk(buffer_size)
im_buffer = morphology.binary_dilation(im_sand, se)
vec_buffer = im_buffer.reshape(nrows*ncols)
# display buffer
im = np.copy(im_display)
im[~im_buffer,0] = 1
im[~im_buffer,1] = 1
im[~im_buffer,2] = 1
im2 = np.copy(im_ndmwi)
im2[~im_buffer] = np.nan
plt.figure()
ax1 = plt.subplot(121)
plt.imshow(im)
plt.axis('off')
plt.title('RGB')
ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
plt.imshow(im2, cmap='seismic')
plt.colorbar()
plt.axis('off')
plt.title('Water Index')
plt.tight_layout()
plt.draw()
# select water/sand/swash pixels that are within the buffer
int_water = vec_ind[np.logical_and(vec_buffer,vec_water),:]
int_sand = vec_ind[np.logical_and(vec_buffer,vec_sand),:]
int_swash = vec_ind[np.logical_and(vec_buffer,vec_swash),:]
# append sand and water
int_all = np.append(int_water,int_sand, axis=0)
t_ndwi = filters.threshold_otsu(int_all[:,0])
t_ndmwi = filters.threshold_otsu(int_all[:,1])
fig, ax = plt.subplots(2,1, sharex=True)
vals = ax[0].hist(int_water[:,0], bins=100, label='water')
ax[0].hist(int_sand[:,0], bins=100, alpha=0.5, label='sand')
ax[0].hist(int_swash[:,0], bins=100, alpha=0.5, label='swash')
ax[0].plot([t_ndwi, t_ndwi], [0, np.max(vals[0])], 'r-')
ax[0].legend()
ax[0].set_title('Water Index NIR-G')
vals = ax[1].hist(int_water[:,1], bins=100, label='water')
ax[1].hist(int_sand[:,1], bins=100, alpha=0.5, label='sand')
ax[1].hist(int_swash[:,1], bins=100, alpha=0.5, label='swash')
ax[1].plot([t_ndmwi, t_ndmwi], [0, np.max(vals[0])], 'r-')
ax[1].legend()
ax[1].set_title('Modified Water Index SWIR-G')
plt.draw()
im_ndwi_buffer = np.copy(im_ndwi)
im_ndwi_buffer[~im_buffer] = np.nan
contours1 = measure.find_contours(im_ndwi_buffer, t_ndwi)
im_ndmwi_buffer = np.copy(im_ndmwi)
im_ndmwi_buffer[~im_buffer] = np.nan
contours2 = measure.find_contours(im_ndmwi_buffer, t_ndmwi)
plt.figure()
ax1 = plt.subplot(1,3,1)
im = np.copy(im_display)
# define colours for plot
colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
for k in range(0,im_labels.shape[2]):
im[im_labels[:,:,k],0] = colours[k,0]
im[im_labels[:,:,k],1] = colours[k,1]
im[im_labels[:,:,k],2] = colours[k,2]
plt.imshow(im)
for i,contour in enumerate(contours2): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k')
plt.tight_layout()
plt.grid(False)
plt.draw()
plt.subplot(1,3,2, sharex=ax1, sharey=ax1)
plt.imshow(im_display)
for i,contour in enumerate(contours2): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k')
plt.tight_layout()
plt.grid(False)
plt.draw()
plt.subplot(1,3,3, sharex=ax1, sharey=ax1)
plt.imshow(im_ndmwi, cmap='seismic')
plt.colorbar()
for i,contour in enumerate(contours2): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k')
plt.tight_layout()
plt.grid(False)
plt.draw()
# plot of all the indices
plt.figure()
ax1 = plt.subplot(1,5,1)
plt.imshow(im_display)
plt.xticks([])
plt.yticks([])
plt.axis('off')
plt.title('RGB')
plt.subplot(1,5,2, sharex=ax1, sharey=ax1)
plt.imshow(im_ndwi, cmap='seismic')
plt.xticks([])
plt.yticks([])
plt.axis('off')
plt.title('NDWI')
plt.subplot(1,5,3, sharex=ax1, sharey=ax1)
plt.imshow(im_ndmwi, cmap='seismic')
plt.xticks([])
plt.yticks([])
plt.axis('off')
plt.title('NDMWI')
plt.subplot(1,5,4, sharex=ax1, sharey=ax1)
plt.imshow(im_nir, cmap='seismic')
plt.xticks([])
plt.yticks([])
plt.axis('off')
plt.title('NIR')
plt.subplot(1,5,5, sharex=ax1, sharey=ax1)
plt.imshow(im_swir, cmap='seismic')
plt.xticks([])
plt.yticks([])
plt.axis('off')
plt.title('SWIR')