#==========================================================# #==========================================================# # Extract shorelines from Landsat images #==========================================================# #==========================================================# #==========================================================# # Initial settings #==========================================================# import os import numpy as np import matplotlib.pyplot as plt import ee import pdb # other modules from osgeo import gdal, ogr, osr import pickle import matplotlib.cm as cm from pylab import ginput from shapely.geometry import LineString # image processing modules import skimage.filters as filters import skimage.exposure as exposure import skimage.transform as transform import sklearn.decomposition as decomposition import skimage.measure as measure import skimage.morphology as morphology # machine learning modules from sklearn.model_selection import train_test_split from sklearn.neural_network import MLPClassifier from sklearn.preprocessing import StandardScaler, Normalizer from sklearn.externals import joblib # import own modules import functions.utils as utils import functions.sds as sds # some other settings np.seterr(all='ignore') # raise/ignore divisions by 0 and nans plt.rcParams['axes.grid'] = True plt.rcParams['figure.max_open_warning'] = 100 ee.Initialize() #==========================================================# # Parameters #==========================================================# sitename = 'NARRA' cloud_thresh = 0.7 # threshold for cloud cover plot_bool = False # if you want the plots output_epsg = 28356 # GDA94 / MGA Zone 56 buffer_size = 7 # radius (in pixels) of disk for buffer (pixel classification) min_beach_size = 20 # number of pixels in a beach (pixel classification) dist_ref = 100 # maximum distance from reference point min_length_wl = 200 # minimum length of shoreline LineString to be kept output = dict([]) #==========================================================# # Metadata #==========================================================# filepath = os.path.join(os.getcwd(), 'data', sitename) with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'rb') as f: metadata = pickle.load(f) #%% #==========================================================# # Read S2 images #==========================================================# satname = 'S2' dates = metadata[satname]['dates'] input_epsg = metadata[satname]['epsg'] # path to images filepath10 = os.path.join(os.getcwd(), 'data', sitename, satname, '10m') filenames10 = os.listdir(filepath10) filepath20 = os.path.join(os.getcwd(), 'data', sitename, satname, '20m') filenames20 = os.listdir(filepath20) filepath60 = os.path.join(os.getcwd(), 'data', sitename, satname, '60m') filenames60 = os.listdir(filepath60) if (not len(filenames10) == len(filenames20)) or (not len(filenames20) == len(filenames60)): raise 'error: not the same amount of files for 10, 20 and 60 m' N = len(filenames10) # initialise variables cloud_cover_ts = [] acc_georef_ts = [] date_acquired_ts = [] filename_ts = [] satname_ts = [] timestamp = [] shorelines = [] idx_skipped = [] spacing = '==========================================================' msg = ' %s\n %s\n %s' % (spacing, satname, spacing) print(msg) for i in range(N): # read 10m bands fn = os.path.join(filepath10, filenames10[i]) data = gdal.Open(fn, gdal.GA_ReadOnly) georef = np.array(data.GetGeoTransform()) bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] im10 = np.stack(bands, 2) im10 = im10/10000 # TOA scaled to 10000 # if image is only zeros, skip it if sum(sum(sum(im10))) < 1: print('skip ' + str(i) + ' - no data') idx_skipped.append(i) continue nrows = im10.shape[0] ncols = im10.shape[1] # read 20m band (SWIR1) fn = os.path.join(filepath20, filenames20[i]) data = gdal.Open(fn, gdal.GA_ReadOnly) bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] im20 = np.stack(bands, 2) im20 = im20[:,:,0] im20 = im20/10000 # TOA scaled to 10000 im_swir = transform.resize(im20, (nrows, ncols), order=1, preserve_range=True, mode='constant') im_swir = np.expand_dims(im_swir, axis=2) # append down-sampled swir band to the 10m bands im_ms = np.append(im10, im_swir, axis=2) # read 60m band (QA) fn = os.path.join(filepath60, filenames60[i]) data = gdal.Open(fn, gdal.GA_ReadOnly) bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] im60 = np.stack(bands, 2) im_qa = im60[:,:,0] cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool) cloud_mask = transform.resize(cloud_mask,(nrows, ncols), order=0, preserve_range=True, mode='constant') # check if -inf or nan values on any band and add to cloud mask for k in range(im_ms.shape[2]): im_inf = np.isin(im_ms[:,:,k], -np.inf) im_nan = np.isnan(im_ms[:,:,k]) cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan) # calculate cloud cover and if above threshold, skip it cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1]) if cloud_cover > cloud_thresh: print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)') idx_skipped.append(i) continue # rescale image intensity for display purposes im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False) # plot rgb image plt.figure() plt.axis('off') plt.imshow(im_display) # classify image in 4 classes (sand, whitewater, water, other) with NN classifier im_classif, im_labels = sds.classify_image_NN_nopan(im_ms, cloud_mask, min_beach_size, plot_bool) # store the data cloud_cover_ts.append(cloud_cover) acc_georef_ts.append(metadata[satname]['acc_georef'][i]) filename_ts.append(filenames10[i]) satname_ts.append(satname) date_acquired_ts.append(filenames10[i][:10]) timestamp.append(metadata[satname]['dates'][i]) # store in output structure output[satname] = {'dates':timestamp, 'idx_skipped':idx_skipped, 'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts, 'acc_georef':acc_georef_ts}} # save output #with open(os.path.join(filepath, sitename + '_output' + satname + '.pkl'), 'wb') as f: # pickle.dump(output, f)