#==========================================================# #==========================================================# # Extract shorelines from Landsat images #==========================================================# #==========================================================# #==========================================================# # Initial settings #==========================================================# import os import numpy as np import matplotlib.pyplot as plt import ee import pdb # other modules from osgeo import gdal, ogr, osr import pickle import matplotlib.cm as cm from pylab import ginput from shapely.geometry import LineString # image processing modules import skimage.filters as filters import skimage.exposure as exposure import skimage.transform as transform import sklearn.decomposition as decomposition import skimage.measure as measure import skimage.morphology as morphology # machine learning modules from sklearn.model_selection import train_test_split from sklearn.neural_network import MLPClassifier from sklearn.preprocessing import StandardScaler, Normalizer from sklearn.externals import joblib # import own modules import functions.utils as utils import functions.sds as sds # some other settings np.seterr(all='ignore') # raise/ignore divisions by 0 and nans plt.rcParams['axes.grid'] = True plt.rcParams['figure.max_open_warning'] = 100 ee.Initialize() #==========================================================# # Parameters #==========================================================# sitename = 'NARRA' cloud_thresh = 0.7 # threshold for cloud cover plot_bool = False # if you want the plots output_epsg = 28356 # GDA94 / MGA Zone 56 buffer_size = 7 # radius (in pixels) of disk for buffer (pixel classification) min_beach_size = 20 # number of pixels in a beach (pixel classification) dist_ref = 100 # maximum distance from reference point min_length_wl = 200 # minimum length of shoreline LineString to be kept manual_bool = True # to manually check images output = dict([]) #==========================================================# # Metadata #==========================================================# filepath = os.path.join(os.getcwd(), 'data', sitename) with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'rb') as f: metadata = pickle.load(f) #%% #==========================================================# # Read S2 images #==========================================================# satname = 'S2' dates = metadata[satname]['dates'] input_epsg = 32756 # metadata[satname]['epsg'] # path to images filepath10 = os.path.join(os.getcwd(), 'data', sitename, satname, '10m') filenames10 = os.listdir(filepath10) filepath20 = os.path.join(os.getcwd(), 'data', sitename, satname, '20m') filenames20 = os.listdir(filepath20) filepath60 = os.path.join(os.getcwd(), 'data', sitename, satname, '60m') filenames60 = os.listdir(filepath60) if (not len(filenames10) == len(filenames20)) or (not len(filenames20) == len(filenames60)): raise 'error: not the same amount of files for 10, 20 and 60 m' N = len(filenames10) # initialise variables cloud_cover_ts = [] acc_georef_ts = [] date_acquired_ts = [] filename_ts = [] satname_ts = [] timestamp = [] shorelines = [] idx_skipped = [] spacing = '==========================================================' msg = ' %s\n %s\n %s' % (spacing, satname, spacing) print(msg) for i in range(N): # read 10m bands fn = os.path.join(filepath10, filenames10[i]) data = gdal.Open(fn, gdal.GA_ReadOnly) georef = np.array(data.GetGeoTransform()) bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] im10 = np.stack(bands, 2) im10 = im10/10000 # TOA scaled to 10000 # if image is only zeros, skip it if sum(sum(sum(im10))) < 1: print('skip ' + str(i) + ' - no data') idx_skipped.append(i) continue nrows = im10.shape[0] ncols = im10.shape[1] # read 20m band (SWIR1) fn = os.path.join(filepath20, filenames20[i]) data = gdal.Open(fn, gdal.GA_ReadOnly) bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] im20 = np.stack(bands, 2) im20 = im20[:,:,0] im20 = im20/10000 # TOA scaled to 10000 im_swir = transform.resize(im20, (nrows, ncols), order=1, preserve_range=True, mode='constant') im_swir = np.expand_dims(im_swir, axis=2) # append down-sampled swir band to the 10m bands im_ms = np.append(im10, im_swir, axis=2) # read 60m band (QA) fn = os.path.join(filepath60, filenames60[i]) data = gdal.Open(fn, gdal.GA_ReadOnly) bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] im60 = np.stack(bands, 2) im_qa = im60[:,:,0] cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool) cloud_mask = transform.resize(cloud_mask,(nrows, ncols), order=0, preserve_range=True, mode='constant') # check if -inf or nan values on any band and add to cloud mask for k in range(im_ms.shape[2]): im_inf = np.isin(im_ms[:,:,k], -np.inf) im_nan = np.isnan(im_ms[:,:,k]) cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan) # calculate cloud cover and if above threshold, skip it cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1]) if cloud_cover > cloud_thresh: print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)') idx_skipped.append(i) continue # rescale image intensity for display purposes im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False) # classify image in 4 classes (sand, whitewater, water, other) with NN classifier im_classif, im_labels = sds.classify_image_NN_nopan(im_ms, cloud_mask, min_beach_size, plot_bool) # if there aren't any sandy pixels if sum(sum(im_labels[:,:,0])) == 0 : # use global threshold im_ndwi = sds.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask, plot_bool) contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool) else: # use specific threhsold contours_wi, contours_mwi = sds.find_wl_contours2(im_ms, im_labels, cloud_mask, buffer_size, plot_bool) # convert from pixels to world coordinates wl_coords = sds.convert_pix2world(contours_mwi, georef) # convert to output epsg spatial reference wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg) # remove contour lines that have a perimeter < min_length_wl wl_good = [] for l, wls in enumerate(wl): coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))] a = LineString(coords) # shapely LineString structure if a.length >= min_length_wl: wl_good.append(wls) # format points and only select the ones close to the refpoints x_points = np.array([]) y_points = np.array([]) for k in range(len(wl_good)): x_points = np.append(x_points,wl_good[k][:,0]) y_points = np.append(y_points,wl_good[k][:,1]) wl_good = np.transpose(np.array([x_points,y_points])) temp = np.zeros((len(wl_good))).astype(bool) for k in range(len(refpoints)): temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp) wl_final = wl_good[temp] # plot output plt.figure() im = np.copy(im_display) colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]]) for k in range(0,im_labels.shape[2]): im[im_labels[:,:,k],0] = colours[k,0] im[im_labels[:,:,k],1] = colours[k,1] im[im_labels[:,:,k],2] = colours[k,2] plt.imshow(im) for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--') plt.title(satname + ' ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + ' acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' ) plt.draw() pt_in = np.array(ginput(n=1, timeout=1000)) plt.close() # if image is rejected, skip it if pt_in[0][1] > nrows/2: print('skip ' + str(i) + ' - rejected') idx_skipped.append(i) continue # if accepted, store the data cloud_cover_ts.append(cloud_cover) acc_georef_ts.append(metadata[satname]['acc_georef'][i]) filename_ts.append(filenames10[i]) satname_ts.append(satname) date_acquired_ts.append(filenames10[i][:10]) timestamp.append(metadata[satname]['dates'][i]) shorelines.append(wl_final) # store in output structure output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped, 'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts, 'acc_georef':acc_georef_ts}} del idx_skipped #%% #==========================================================# # Read L7&L8 images #==========================================================# satname = 'L8' dates = metadata[satname]['dates'] input_epsg = 32656 # metadata[satname]['epsg'] # path to images filepath_pan = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8', 'pan') filepath_ms = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8', 'ms') filenames_pan = os.listdir(filepath_pan) filenames_ms = os.listdir(filepath_ms) if (not len(filenames_pan) == len(filenames_ms)): raise 'error: not the same amount of files for pan and ms' N = len(filenames_pan) # initialise variables cloud_cover_ts = [] acc_georef_ts = [] date_acquired_ts = [] filename_ts = [] satname_ts = [] timestamp = [] shorelines = [] idx_skipped = [] spacing = '==========================================================' msg = ' %s\n %s\n %s' % (spacing, satname, spacing) print(msg) for i in range(N): # get satellite name sat = filenames_pan[i][20:22] # read pan image fn_pan = os.path.join(filepath_pan, filenames_pan[i]) data = gdal.Open(fn_pan, gdal.GA_ReadOnly) georef = np.array(data.GetGeoTransform()) bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] im_pan = np.stack(bands, 2)[:,:,0] nrows = im_pan.shape[0] ncols = im_pan.shape[1] # read ms image fn_ms = os.path.join(filepath_ms, filenames_ms[i]) data = gdal.Open(fn_ms, gdal.GA_ReadOnly) bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] im_ms = np.stack(bands, 2) # cloud mask im_qa = im_ms[:,:,5] cloud_mask = sds.create_cloud_mask(im_qa, sat, plot_bool) cloud_mask = transform.resize(cloud_mask, (nrows, ncols), order=0, preserve_range=True, mode='constant').astype('bool_') # resize the image using bilinear interpolation (order 1) im_ms = im_ms[:,:,:5] im_ms = transform.resize(im_ms,(nrows, ncols), order=1, preserve_range=True, mode='constant') # check if -inf or nan values on any band and add to cloud mask for k in range(im_ms.shape[2]+1): if k == 5: im_inf = np.isin(im_pan, -np.inf) im_nan = np.isnan(im_pan) else: im_inf = np.isin(im_ms[:,:,k], -np.inf) im_nan = np.isnan(im_ms[:,:,k]) cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan) # calculate cloud cover and skip image if above threshold cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1]) if cloud_cover > cloud_thresh: print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)') idx_skipped.append(i) continue # Pansharpen image (different for L8 and L7) if sat == 'L7': # pansharpen (Green, Red, NIR) and downsample Blue and SWIR1 im_ms_ps = sds.pansharpen(im_ms[:,:,[1,2,3]], im_pan, cloud_mask, plot_bool) im_ms_ps = np.append(im_ms[:,:,[0]], im_ms_ps, axis=2) im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[4]], axis=2) im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False) elif sat == 'L8': # pansharpen RGB image and downsample NIR and SWIR1 im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, plot_bool) im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) im_display = sds.rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False) # classify image in 4 classes (sand, whitewater, water, other) with NN classifier im_classif, im_labels = sds.classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size, plot_bool) # if there aren't any sandy pixels if sum(sum(im_labels[:,:,0])) == 0 : # use global threshold im_ndwi = sds.nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, plot_bool) contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool) else: # use specific threhsold contours_wi, contours_mwi = sds.find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size, plot_bool) # convert from pixels to world coordinates wl_coords = sds.convert_pix2world(contours_mwi, georef) # convert to output epsg spatial reference wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg) # remove contour lines that have a perimeter < min_length_wl wl_good = [] for l, wls in enumerate(wl): coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))] a = LineString(coords) # shapely LineString structure if a.length >= min_length_wl: wl_good.append(wls) # format points and only select the ones close to the refpoints x_points = np.array([]) y_points = np.array([]) for k in range(len(wl_good)): x_points = np.append(x_points,wl_good[k][:,0]) y_points = np.append(y_points,wl_good[k][:,1]) wl_good = np.transpose(np.array([x_points,y_points])) temp = np.zeros((len(wl_good))).astype(bool) for k in range(len(refpoints)): temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp) wl_final = wl_good[temp] # plot output plt.figure() plt.subplot(121) im = np.copy(im_display) colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]]) for k in range(0,im_labels.shape[2]): im[im_labels[:,:,k],0] = colours[k,0] im[im_labels[:,:,k],1] = colours[k,1] im[im_labels[:,:,k],2] = colours[k,2] plt.imshow(im) for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--') plt.title(sat + ' ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + ' acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' ) pt_in = np.array(ginput(n=1, timeout=1000)) plt.close() # if image is rejected, skip it if pt_in[0][1] > nrows/2: print('skip ' + str(i) + ' - rejected') idx_skipped.append(i) continue # if accepted, store the data cloud_cover_ts.append(cloud_cover) acc_georef_ts.append(metadata[satname]['acc_georef'][i]) filename_ts.append(filenames_pan[i]) satname_ts.append(sat) date_acquired_ts.append(filenames_pan[i][:10]) timestamp.append(metadata[satname]['dates'][i]) shorelines.append(wl_final) # store in output structure output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped, 'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts, 'acc_georef':acc_georef_ts}} del idx_skipped #%% #==========================================================# # Read L5 images #==========================================================# satname = 'L5' dates = metadata[satname]['dates'] input_epsg = 32656 # metadata[satname]['epsg'] # path to images filepath_img = os.path.join(os.getcwd(), 'data', sitename, satname, '30m') filenames = os.listdir(filepath_img) N = len(filenames) # initialise variables cloud_cover_ts = [] acc_georef_ts = [] date_acquired_ts = [] filename_ts = [] satname_ts = [] timestamp = [] shorelines = [] idx_skipped = [] spacing = '==========================================================' msg = ' %s\n %s\n %s' % (spacing, satname, spacing) print(msg) for i in range(N): # read ms image fn = os.path.join(filepath_img, filenames[i]) data = gdal.Open(fn, gdal.GA_ReadOnly) georef = np.array(data.GetGeoTransform()) bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] im_ms = np.stack(bands, 2) # down-sample to half hte original pixel size nrows = im_ms.shape[0]*2 ncols = im_ms.shape[1]*2 # cloud mask im_qa = im_ms[:,:,5] im_ms = im_ms[:,:,:-1] cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool) cloud_mask = transform.resize(cloud_mask, (nrows, ncols), order=0, preserve_range=True, mode='constant').astype('bool_') # resize the image using bilinear interpolation (order 1) im_ms = transform.resize(im_ms,(nrows, ncols), order=1, preserve_range=True, mode='constant') # adjust georef vector (scale becomes 15m and origin is adjusted to the center of new corner pixel) georef[1] = 15 georef[5] = -15 georef[0] = georef[0] + 7.5 georef[3] = georef[3] - 7.5 # check if -inf or nan values on any band and add to cloud mask for k in range(im_ms.shape[2]): im_inf = np.isin(im_ms[:,:,k], -np.inf) im_nan = np.isnan(im_ms[:,:,k]) cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan) # calculate cloud cover and skip image if above threshold cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1]) if cloud_cover > cloud_thresh: print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)') idx_skipped.append(i) continue # rescale image intensity for display purposes im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False) # classify image in 4 classes (sand, whitewater, water, other) with NN classifier im_classif, im_labels = sds.classify_image_NN_nopan(im_ms, cloud_mask, min_beach_size, plot_bool) # if there aren't any sandy pixels if sum(sum(im_labels[:,:,0])) == 0 : # use global threshold im_ndwi = sds.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask, plot_bool) contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool) else: # use specific threhsold contours_wi, contours_mwi = sds.find_wl_contours2(im_ms, im_labels, cloud_mask, buffer_size, plot_bool) # convert from pixels to world coordinates wl_coords = sds.convert_pix2world(contours_mwi, georef) # convert to output epsg spatial reference wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg) # remove contour lines that have a perimeter < min_length_wl wl_good = [] for l, wls in enumerate(wl): coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))] a = LineString(coords) # shapely LineString structure if a.length >= min_length_wl: wl_good.append(wls) # format points and only select the ones close to the refpoints x_points = np.array([]) y_points = np.array([]) for k in range(len(wl_good)): x_points = np.append(x_points,wl_good[k][:,0]) y_points = np.append(y_points,wl_good[k][:,1]) wl_good = np.transpose(np.array([x_points,y_points])) temp = np.zeros((len(wl_good))).astype(bool) for k in range(len(refpoints)): temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp) wl_final = wl_good[temp] # plot output plt.figure() plt.subplot(121) im = np.copy(im_display) colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]]) for k in range(0,im_labels.shape[2]): im[im_labels[:,:,k],0] = colours[k,0] im[im_labels[:,:,k],1] = colours[k,1] im[im_labels[:,:,k],2] = colours[k,2] plt.imshow(im) for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--') plt.title(satname + ' ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + ' acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' ) plt.subplot(122) plt.axis('equal') plt.axis('off') plt.plot(refpoints[:,0], refpoints[:,1], 'k.') plt.plot(wl_final[:,0], wl_final[:,1], 'r.') mng = plt.get_current_fig_manager() mng.window.showMaximized() plt.tight_layout() plt.draw() pt_in = np.array(ginput(n=1, timeout=1000)) plt.close() # if image is rejected, skip it if pt_in[0][1] > nrows/2: print('skip ' + str(i) + ' - rejected') idx_skipped.append(i) continue # if accepted, store the data cloud_cover_ts.append(cloud_cover) acc_georef_ts.append(metadata[satname]['acc_georef'][i]) filename_ts.append(filenames[i]) satname_ts.append(satname) date_acquired_ts.append(filenames[i][:10]) timestamp.append(metadata[satname]['dates'][i]) shorelines.append(wl_final) # store in output structure output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped, 'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts, 'acc_georef':acc_georef_ts}} del idx_skipped #==========================================================# #==========================================================# #==========================================================# #==========================================================# #%% # save output with open(os.path.join(filepath, sitename + '_output' + '.pkl'), 'wb') as f: pickle.dump(output, f) # save idx_skipped #idx_skipped = dict([]) #for satname in list(output.keys()): # idx_skipped[satname] = output[satname]['idx_skipped'] #with open(os.path.join(filepath, sitename + '_idxskipped' + '.pkl'), 'wb') as f: # pickle.dump(idx_skipped, f)