diff --git a/download_images.py b/download_images.py new file mode 100644 index 0000000..a44fca8 --- /dev/null +++ b/download_images.py @@ -0,0 +1,390 @@ +#==========================================================# +#==========================================================# +# Download L5, L7, L8, S2 images of a given area +#==========================================================# +#==========================================================# + + + +#==========================================================# +# Initial settings +#==========================================================# +import os +import numpy as np +import matplotlib.pyplot as plt +import pdb +import ee + +# other modules +from osgeo import gdal, ogr, osr +from urllib.request import urlretrieve +import zipfile +from datetime import datetime +import pytz +import pickle + +# import own modules +import functions.utils as utils +import functions.sds as sds + +np.seterr(all='ignore') # raise/ignore divisions by 0 and nans +ee.Initialize() + +#==========================================================# +# Location +#==========================================================# + +## location (Narrabeen-Collaroy beach) +#polygon = [[[151.301454, -33.700754], +# [151.311453, -33.702075], +# [151.307237, -33.739761], +# [151.294220, -33.736329], +# [151.301454, -33.700754]]]; + +# location (Tairua beach) +sitename = 'TAIRUA' +polygon = [[[175.835574, -36.982022], + [175.888220, -36.980680], + [175.893527, -37.029610], + [175.833444, -37.031767], + [175.835574, -36.982022]]]; + +# initialise metadata dictionnary (stores timestamps and georefencing accuracy of each image) +metadata = dict([]) + +# create directories +try: + os.makedirs(os.path.join(os.getcwd(), 'data',sitename)) +except: + print('directory already exists') + + +#%% +#==========================================================# +#==========================================================# +# L5 +#==========================================================# +#==========================================================# + + + +# define filenames for images +suffix = '.tif' +filepath = os.path.join(os.getcwd(), 'data', sitename, 'L5', '30m') +try: + os.makedirs(filepath) +except: + print('directory already exists') + +#==========================================================# +# Select L5 collection +#==========================================================# + +satname = 'L5' +input_col = ee.ImageCollection('LANDSAT/LT05/C01/T1_TOA') + +# filter by location +flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)) +n_img = flt_col.size().getInfo() +print('Number of images covering ' + sitename, n_img) +im_all = flt_col.getInfo().get('features') + + +#==========================================================# +# Main loop trough images +#==========================================================# + +timestamps = [] +acc_georef = [] +all_names = [] +for i in range(n_img): + + # find each image in ee database + im = ee.Image(im_all[i].get('id')) + + im_dic = im.getInfo() + im_bands = im_dic.get('bands') + t = im_dic['properties']['system:time_start'] + im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc) + timestamps.append(im_timestamp) + im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S') + im_epsg = int(im_dic['bands'][0]['crs'][5:]) + try: + acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL']) + except: + acc_georef.append(12) + print('No geometric rmse model property') + + # delete dimensions key from dictionnary, otherwise the entire image is extracted + for j in range(len(im_bands)): del im_bands[j]['dimensions'] + + # bands for L5 + ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[7]] + + # filenames + filename = im_date + '_' + satname + '_' + sitename + suffix + + print(i) + if any(filename in _ for _ in all_names): + filename = im_date + '_' + satname + '_' + sitename + '_dup' + suffix + all_names.append(filename) + + local_data = sds.download_tif(im, polygon, ms_bands, filepath) + os.rename(local_data, os.path.join(filepath, filename)) + +# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory) +timestamps_sorted = sorted(timestamps) +idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__) +acc_georef_sorted = [acc_georef[j] for j in idx_sorted] + +metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg} + +#%% +#==========================================================# +#==========================================================# +# L7&L8 +#==========================================================# +#==========================================================# + + + +# define filenames for images +suffix = '.tif' +filepath = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8') +filepath_pan = os.path.join(filepath, 'pan') +filepath_ms = os.path.join(filepath, 'ms') +try: + os.makedirs(filepath_pan) + os.makedirs(filepath_ms) +except: + print('directory already exists') + +#==========================================================# +# Select L7 collection +#==========================================================# + +satname = 'L7' +input_col = ee.ImageCollection('LANDSAT/LE07/C01/T1_RT_TOA') + +# filter by location +flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)) +n_img = flt_col.size().getInfo() +print('Number of images covering ' + sitename, n_img) +im_all = flt_col.getInfo().get('features') + +#==========================================================# +# Main loop trough images +#==========================================================# + +timestamps = [] +acc_georef = [] +all_names = [] +for i in range(n_img): + + # find each image in ee database + im = ee.Image(im_all[i].get('id')) + + im_dic = im.getInfo() + im_bands = im_dic.get('bands') + t = im_dic['properties']['system:time_start'] + im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc) + timestamps.append(im_timestamp) + im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S') + im_epsg = int(im_dic['bands'][0]['crs'][5:]) + try: + acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL']) + except: + acc_georef.append(12) + print('No geometric rmse model property') + + # delete dimensions key from dictionnary, otherwise the entire image is extracted + for j in range(len(im_bands)): del im_bands[j]['dimensions'] + + # bands for L7 + pan_band = [im_bands[8]] + ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[9]] + + # filenames + filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix + filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix + + print(i) + if any(filename_pan in _ for _ in all_names): + filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix + filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix + all_names.append(filename_pan) + + local_data_pan = sds.download_tif(im, polygon, pan_band, filepath_pan) + os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan)) + local_data_ms = sds.download_tif(im, polygon, ms_bands, filepath_ms) + os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms)) + +#==========================================================# +# Select L8 collection +#==========================================================# + +satname = 'L8' +input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA') + +# filter by location +flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)) +n_img = flt_col.size().getInfo() +print('Number of images covering Narrabeen:', n_img) +im_all = flt_col.getInfo().get('features') + +#==========================================================# +# Main loop trough images +#==========================================================# + +for i in range(n_img): + + # find each image in ee database + im = ee.Image(im_all[i].get('id')) + + im_dic = im.getInfo() + im_bands = im_dic.get('bands') + t = im_dic['properties']['system:time_start'] + im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc) + timestamps.append(im_timestamp) + im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S') + im_epsg = int(im_dic['bands'][0]['crs'][5:]) + try: + acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL']) + except: + acc_georef.append(12) + print('No geometric rmse model property') + + # delete dimensions key from dictionnary, otherwise the entire image is extracted + for j in range(len(im_bands)): del im_bands[j]['dimensions'] + + # bands for L8 + pan_band = [im_bands[7]] + ms_bands = [im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[5], im_bands[11]] + + # filenames + filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix + filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix + + print(i) + if any(filename_pan in _ for _ in all_names): + filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix + filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix + all_names.append(filename_pan) + + local_data_pan = sds.download_tif(im, polygon, pan_band, filepath_pan) + os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan)) + local_data_ms = sds.download_tif(im, polygon, ms_bands, filepath_ms) + os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms)) + + +# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory) +timestamps_sorted = sorted(timestamps) +idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__) +acc_georef_sorted = [acc_georef[j] for j in idx_sorted] + +metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg} + +#%% +#==========================================================# +#==========================================================# +# S2 +#==========================================================# +#==========================================================# + + + +# define filenames for images +suffix = '.tif' +filepath = os.path.join(os.getcwd(), 'data', sitename, 'S2') +try: + os.makedirs(os.path.join(filepath, '10m')) + os.makedirs(os.path.join(filepath, '20m')) + os.makedirs(os.path.join(filepath, '60m')) +except: + print('directory already exists') + +#==========================================================# +# Select L2 collection +#==========================================================# + +satname = 'S2' +input_col = ee.ImageCollection('COPERNICUS/S2') + +# filter by location +flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)) +n_img = flt_col.size().getInfo() +print('Number of images covering ' + sitename, n_img) +im_all = flt_col.getInfo().get('features') + +#==========================================================# +# Main loop trough images +#==========================================================# + +timestamps = [] +acc_georef = [] +all_names = [] +for i in range(n_img): + + # find each image in ee database + im = ee.Image(im_all[i].get('id')) + + im_dic = im.getInfo() + im_bands = im_dic.get('bands') + t = im_dic['properties']['system:time_start'] + im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc) + im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S') + timestamps.append(im_timestamp) + im_epsg = int(im_dic['bands'][0]['crs'][5:]) + try: + if im_dic['properties']['GEOMETRIC_QUALITY_FLAG'] == 'PASSED': + acc_georef.append(1) + else: + acc_georef.append(0) + except: + acc_georef.append(0) + + # delete dimensions key from dictionnary, otherwise the entire image is extracted + for j in range(len(im_bands)): del im_bands[j]['dimensions'] + + # bands for S2 + bands10 = [im_bands[1], im_bands[2], im_bands[3], im_bands[7]] + bands20 = [im_bands[11]] + bands60 = [im_bands[15]] + + # filenames + filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + suffix + filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + suffix + filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + suffix + + print(i) + if any(filename10 in _ for _ in all_names): + filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + '_dup' + suffix + filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + '_dup' + suffix + filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + '_dup' + suffix + all_names.append(filename10) + + local_data = sds.download_tif(im, polygon, bands10, filepath) + os.rename(local_data, os.path.join(filepath, '10m', filename10)) + + local_data = sds.download_tif(im, polygon, bands20, filepath) + os.rename(local_data, os.path.join(filepath, '20m', filename20)) + + local_data = sds.download_tif(im, polygon, bands60, filepath) + os.rename(local_data, os.path.join(filepath, '60m', filename60)) + +# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory) +timestamps_sorted = sorted(timestamps) +idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__) +acc_georef_sorted = [acc_georef[j] for j in idx_sorted] + +metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg} + + + +#%% save metadata + +filepath = os.path.join(os.getcwd(), 'data', sitename) +with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'wb') as f: + pickle.dump(metadata, f) + + \ No newline at end of file diff --git a/functions/NeuralNet_classif_nopan.pkl b/functions/NeuralNet_classif_nopan.pkl new file mode 100644 index 0000000..2a780fd Binary files /dev/null and b/functions/NeuralNet_classif_nopan.pkl differ diff --git a/functions/data_analysis.py b/functions/data_analysis.py new file mode 100644 index 0000000..9244311 --- /dev/null +++ b/functions/data_analysis.py @@ -0,0 +1,432 @@ +"""This module contains all the functions needed for data analysis """ + +# Initial settings +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches +from matplotlib import gridspec +import pdb +import ee + +# other modules +from osgeo import gdal, ogr, osr +import scipy.interpolate as interpolate +import scipy.stats as sstats + +# image processing modules +import skimage.filters as filters +import skimage.exposure as exposure +import skimage.transform as transform +import sklearn.decomposition as decomposition +import skimage.measure as measure +import skimage.morphology as morphology + +# machine learning modules +from sklearn.cluster import KMeans +from sklearn.neural_network import MLPClassifier +from sklearn.externals import joblib + +import time + +# import own modules +import functions.utils as utils + +def get_tide(dates_sds, dates_tide, tide_level): + + tide = [] + for i in range(len(dates_sds)): + dates_diff = np.abs(np.array([ (dates_sds[i] - _).total_seconds() for _ in dates_tide])) + if np.min(dates_diff) <= 1800: # half-an-hour + idx_closest = np.argmin(dates_diff) + tide.append(tide_level[idx_closest]) + else: + tide.append(np.nan) + tide = np.array(tide) + + return tide + +def remove_duplicates(output, satname): + " removes duplicates from output structure, keep the one with less cloud cover or best georeferencing " + dates = output['dates'] + dates_str = [_.strftime('%Y%m%d') for _ in dates] + dupl = utils.duplicates_dict(dates_str) + if dupl: + output_nodup = dict([]) + idx_remove = [] + if satname == 'L8' or satname == 'L5': + for k,v in dupl.items(): + + idx1 = v[0] + idx2 = v[1] + + c1 = output['metadata']['cloud_cover'][idx1] + c2 = output['metadata']['cloud_cover'][idx2] + g1 = output['metadata']['acc_georef'][idx1] + g2 = output['metadata']['acc_georef'][idx2] + + if c1 < c2 - 0.01: + idx_remove.append(idx2) + elif g1 < g2 - 0.1: + idx_remove.append(idx2) + else: + idx_remove.append(idx1) + + else: + for k,v in dupl.items(): + + idx1 = v[0] + idx2 = v[1] + + c1 = output['metadata']['cloud_cover'][idx1] + c2 = output['metadata']['cloud_cover'][idx2] + + if c1 < c2 - 0.01: + idx_remove.append(idx2) + else: + idx_remove.append(idx1) + + idx_remove = sorted(idx_remove) + idx_all = np.linspace(0, len(dates_str)-1, len(dates_str)) + idx_keep = list(np.where(~np.isin(idx_all,idx_remove))[0]) + + output_nodup['dates'] = [output['dates'][k] for k in idx_keep] + output_nodup['shorelines'] = [output['shorelines'][k] for k in idx_keep] + output_nodup['metadata'] = dict([]) + for key in list(output['metadata'].keys()): + output_nodup['metadata'][key] = [output['metadata'][key][k] for k in idx_keep] + print(satname + ' : ' + str(len(idx_remove)) + ' duplicates') + return output_nodup + + else: + print(satname + ' : ' + 'no duplicates') + return output + + +def merge(output): + " merges data from the different satellites " + + # stack all list together under one key + output_all = {'dates':[], 'shorelines':[], + 'metadata':{'filenames':[], 'satname':[], 'cloud_cover':[], 'acc_georef':[]}} + for satname in list(output.keys()): + output_all['dates'] = output_all['dates'] + output[satname]['dates'] + output_all['shorelines'] = output_all['shorelines'] + output[satname]['shorelines'] + for key in list(output[satname]['metadata'].keys()): + output_all['metadata'][key] = output_all['metadata'][key] + output[satname]['metadata'][key] + + output_all_sorted = {'dates':[], 'shorelines':[], + 'metadata':{'filenames':[], 'satname':[], 'cloud_cover':[], 'acc_georef':[]}} + # sort the dates + idx_sorted = sorted(range(len(output_all['dates'])), key=output_all['dates'].__getitem__) + output_all_sorted['dates'] = [output_all['dates'][i] for i in idx_sorted] + output_all_sorted['shorelines'] = [output_all['shorelines'][i] for i in idx_sorted] + for key in list(output_all['metadata'].keys()): + output_all_sorted['metadata'][key] = [output_all['metadata'][key][i] for i in idx_sorted] + + return output_all_sorted + +def create_transects(x0, y0, orientation, chainage_length): + " creates shore-normal transects " + + transects = [] + + for k in range(len(x0)): + + # orientation of cross-shore profile + phi = (90 - orientation[k])*np.pi/180 + + # create a vector using the chainage length + x = np.linspace(0,chainage_length,chainage_length+1) + y = np.zeros(len(x)) + coords = np.zeros((len(x),2)) + coords[:,0] = x + coords[:,1] = y + + # translate and rotate the vector using the origin and orientation + tf = transform.EuclideanTransform(rotation=phi, translation=(x0[k],y0[k])) + coords_tf = tf(coords) + + transects.append(coords_tf) + + return transects + +def calculate_chainage(sds, transects, orientation, along_dist): + " intersect SDS with transect and compute chainage position " + + chainage_mtx = np.zeros((len(sds),len(transects),6)) + + for i in range(len(sds)): + + sl = sds[i] + + for j in range(len(transects)): + + # compute rotation matrix + X0 = transects[j][0,0] + Y0 = transects[j][0,1] + phi = (90 - orientation[j])*np.pi/180 + Mrot = np.array([[np.cos(phi), np.sin(phi)],[-np.sin(phi), np.cos(phi)]]) + + # calculate point to line distance between shoreline points and profile + p1 = np.array([X0,Y0]) + p2 = transects[j][-1,:] + p3 = sl + d = np.abs(np.cross(p2-p1,p3-p1)/np.linalg.norm(p2-p1)) + idx_close = utils.find_indices(d, lambda e: e <= along_dist) + + # check if there are SDS points around the profile or not + if not idx_close: + chainage_mtx[i,j,:] = np.tile(np.nan,(1,6)) + + else: + # change of base to shore-normal coordinate system + xy_close = np.array([sl[idx_close,0],sl[idx_close,1]]) - np.tile(np.array([[X0],[Y0]]), (1,len(sl[idx_close]))) + xy_rot = np.matmul(Mrot, xy_close) + + # put nan values if the chainage is negative (MAKE SURE TO PICK ORIGIN CORRECTLY) + if np.any(xy_rot[0,:] < 0): + xy_rot[0,np.where(xy_rot[0,:] < 0)] = np.nan + + # compute mean, median max and std of chainage position + n_points = len(xy_rot[0,:]) + mean_cross = np.nanmean(xy_rot[0,:]) + median_cross = np.nanmedian(xy_rot[0,:]) + max_cross = np.nanmax(xy_rot[0,:]) + min_cross = np.nanmin(xy_rot[0,:]) + std_cross = np.nanstd(xy_rot[0,:]) + + if std_cross > 10: # if large std, take the most seaward point + mean_cross = max_cross + median_cross = max_cross + min_cross = max_cross + + # store the statistics + chainage_mtx[i,j,:] = np.array([mean_cross, median_cross, max_cross, + min_cross, n_points, std_cross]) + + # format into dictionnary + chainage = dict([]) + chainage['mean'] = chainage_mtx[:,:,0] + chainage['median'] = chainage_mtx[:,:,1] + chainage['max'] = chainage_mtx[:,:,2] + chainage['min'] = chainage_mtx[:,:,3] + chainage['npoints'] = chainage_mtx[:,:,4] + chainage['std'] = chainage_mtx[:,:,5] + + return chainage + +def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5): + """ + Compare sds with groundtruth data from topographic surveys / argus shorelines + + KV WRL 2018 + + Arguments: + ----------- + dates_sds: list + list of dates corresponding to each row in chain_sds + chain_sds: np.ndarray + array with time series of chainage for each transect (each transect is one column) + topo_profiles: dict + dict containing the dates and chainage of the groundtruth + mod: 0 or 1 + 0 for linear interpolation between 2 closest surveys, 1 for only nearest neighbour + min_days: int + minimum number of days for which the data can be compared + + Returns: ----------- + stats: dict + contains all the statistics of the comparison + + """ + + # create 3 figures + fig1 = plt.figure() + gs1 = gridspec.GridSpec(chain_sds.shape[1], 1) + fig2 = plt.figure() + gs2 = gridspec.GridSpec(2, chain_sds.shape[1]) + fig3 = plt.figure() + gs3 = gridspec.GridSpec(2,1) + + dates_sds_num = np.array([_.toordinal() for _ in dates_sds]) + stats = dict([]) + data_fin = dict([]) + + # for each transect compare and plot the data + for i in range(chain_sds.shape[1]): + + pfname = list(topo_profiles.keys())[i] + stats[pfname] = dict([]) + data_fin[pfname] = dict([]) + + dates_sur = topo_profiles[pfname]['dates'] + chain_sur = topo_profiles[pfname]['chainage'] + + # convert to datenum + dates_sur_num = np.array([_.toordinal() for _ in dates_sur]) + + chain_sur_interp = [] + diff_days = [] + + for j, satdate in enumerate(dates_sds_num): + + temp_diff = satdate - dates_sur_num + + if mod==0: + # select measurement before and after sat image date and interpolate + + ind_before = np.where(temp_diff == temp_diff[temp_diff > 0][-1])[0] + if ind_before == len(temp_diff)-1: + chain_sur_interp.append(np.nan) + diff_days.append(np.abs(satdate-dates_sur_num[ind_before])[0]) + continue + ind_after = np.where(temp_diff == temp_diff[temp_diff < 0][0])[0] + tempx = np.zeros(2) + tempx[0] = dates_sur_num[ind_before] + tempx[1] = dates_sur_num[ind_after] + tempy = np.zeros(2) + tempy[0] = chain_sur[ind_before] + tempy[1] = chain_sur[ind_after] + diff_days.append(np.abs(np.max([satdate-tempx[0], satdate-tempx[1]]))) + # interpolate + f = interpolate.interp1d(tempx, tempy) + chain_sur_interp.append(f(satdate)) + + elif mod==1: + # select the closest measurement + + idx_closest = utils.find_indices(np.abs(temp_diff), lambda e: e == np.min(np.abs(temp_diff)))[0] + diff_days.append(np.abs(satdate-dates_sur_num[idx_closest])) + if diff_days[j] > mindays: + chain_sur_interp.append(np.nan) + else: + chain_sur_interp.append(chain_sur[idx_closest]) + + chain_sur_interp = np.array(chain_sur_interp) + + # remove nan values + idx_sur_nan = ~np.isnan(chain_sur_interp) + idx_sat_nan = ~np.isnan(chain_sds[:,i]) + idx_nan = np.logical_and(idx_sur_nan, idx_sat_nan) + + # groundtruth and sds + chain_sur_fin = chain_sur_interp[idx_nan] + chain_sds_fin = chain_sds[idx_nan,i] + dates_fin = [k for (k, v) in zip(dates_sds, idx_nan) if v] + + # calculate statistics + slope, intercept, rvalue, pvalue, std_err = sstats.linregress(chain_sur_fin, chain_sds_fin) + R2 = rvalue**2 + correlation = np.corrcoef(chain_sur_fin, chain_sds_fin)[0,1] + diff_chain = chain_sur_fin - chain_sds_fin + + rmse = np.sqrt(np.nanmean((diff_chain)**2)) + mean = np.nanmean(diff_chain) + std = np.nanstd(diff_chain) + q90 = np.percentile(np.abs(diff_chain), 90) + + # store data + stats[pfname]['rmse'] = rmse + stats[pfname]['mean'] = mean + stats[pfname]['std'] = std + stats[pfname]['q90'] = q90 + stats[pfname]['diffdays'] = diff_days + stats[pfname]['corr'] = correlation + stats[pfname]['linfit'] = {'slope':slope, 'intercept':intercept, 'R2':R2, 'pvalue':pvalue} + + data_fin[pfname]['dates'] = dates_fin + data_fin[pfname]['sds'] = chain_sds_fin + data_fin[pfname]['survey'] = chain_sur_fin + + # make time-series plot + plt.figure(fig1.number) + fig1.add_subplot(gs1[i,0]) + plt.plot(dates_sur, chain_sur, 'o-', color='C1', markersize=4, label='survey all') + plt.plot(dates_fin, chain_sur_fin, 'o', color=[0.3, 0.3, 0.3], markersize=2, label='survey interp') + plt.plot(dates_fin, chain_sds_fin, 'o--', color='b', markersize=4, label='SDS') + plt.title(pfname, fontweight='bold') +# plt.xlim([dates_sds[0], dates_sds[-1]]) + plt.ylabel('chainage [m]') + + # make scatter plot + plt.figure(fig2.number) + fig2.add_subplot(gs2[0,i]) + plt.axis('equal') + plt.plot(chain_sur_fin, chain_sds_fin, 'ko', markersize=4, markerfacecolor='w', alpha=0.7) + xmax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)]) + xmin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)]) + ymax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)]) + ymin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)]) + plt.plot([xmin, xmax], [ymin, ymax], 'k--') + plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'b:') + str_corr = ' y = %.2f x + %.2f\n R2 = %.2f' % (slope, intercept, R2) + plt.text(xmin, ymax-5, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left') + plt.xlabel('chainage survey [m]') + plt.ylabel('chainage satellite [m]') + plt.title(pfname, fontweight='bold') + + fig2.add_subplot(gs2[1,i]) + binwidth = 3 + bins = np.arange(min(diff_chain), max(diff_chain) + binwidth, binwidth) + density = plt.hist(diff_chain, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k') + plt.xlim([-50, 50]) + plt.xlabel('error [m]') + str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90) + plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.3), horizontalalignment='left', fontsize=10) + + fig1.set_size_inches(19.2, 9.28) + fig1.set_tight_layout(True) + fig2.set_size_inches(19.2, 9.28) + fig2.set_tight_layout(True) + + # all transects together + chain_sds_all = [] + chain_sur_all = [] + for i in range(chain_sds.shape[1]): + pfname = list(topo_profiles.keys())[i] + chain_sds_all = np.append(chain_sds_all,data_fin[pfname]['sds']) + chain_sur_all = np.append(chain_sur_all,data_fin[pfname]['survey']) + + # calculate statistics + slope, intercept, rvalue, pvalue, std_err = sstats.linregress(chain_sur_all, chain_sds_all) + R2 = rvalue**2 + correlation = np.corrcoef(chain_sur_all, chain_sds_all)[0,1] + diff_chain_all = chain_sur_all - chain_sds_all + + rmse = np.sqrt(np.nanmean((diff_chain_all)**2)) + mean = np.nanmean(diff_chain_all) + std = np.nanstd(diff_chain_all) + q90 = np.percentile(np.abs(diff_chain_all), 90) + + stats['all'] = {'rmse':rmse,'mean':mean,'std':std,'q90':q90, 'corr':correlation, + 'linfit':{'slope':slope, 'intercept':intercept, 'R2':R2, 'pvalue':pvalue}} + + # make plot + plt.figure(fig3.number) + fig3.add_subplot(gs3[0,0]) + plt.axis('equal') + plt.plot(chain_sur_all, chain_sds_all, 'ko', markersize=4, markerfacecolor='w', alpha=0.7) + xmax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)]) + xmin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)]) + ymax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)]) + ymin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)]) + plt.plot([xmin, xmax], [ymin, ymax], 'k--') + plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'b:') + str_corr = ' y = %.2f x + %.2f\n R2 = %.2f' % (slope, intercept, R2) + plt.text(xmin, ymax-5, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left') + plt.xlabel('chainage survey [m]') + plt.ylabel('chainage satellite [m]') + plt.title(pfname, fontweight='bold') + + fig3.add_subplot(gs3[1,0]) + binwidth = 3 + bins = np.arange(min(diff_chain_all), max(diff_chain_all) + binwidth, binwidth) + density = plt.hist(diff_chain_all, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k') + plt.xlim([-50, 50]) + plt.xlabel('error [m]') + str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90) + plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.3), horizontalalignment='left', fontsize=10) + fig3.set_size_inches(9.2, 9.28) + fig3.set_tight_layout(True) + + return stats \ No newline at end of file diff --git a/functions/sds.py b/functions/sds.py index ebcee1b..301753a 100644 --- a/functions/sds.py +++ b/functions/sds.py @@ -5,11 +5,13 @@ Created on Thu Mar 1 11:20:35 2018 @author: z5030440 """ -"""This script contains the functions needed for satellite derived shoreline (SDS) extraction""" +"""This module contains all the functions needed for extracting satellite derived shoreline (SDS) """ # Initial settings import numpy as np import matplotlib.pyplot as plt +import matplotlib.patches as mpatches +from matplotlib import gridspec import pdb import ee @@ -18,6 +20,7 @@ from osgeo import gdal, ogr, osr import tempfile from urllib.request import urlretrieve import zipfile +import scipy.interpolate as interpolate # image processing modules import skimage.filters as filters @@ -39,7 +42,7 @@ from functions.utils import * # Download from ee server function -def download_tif(image, polygon, bandsId): +def download_tif(image, polygon, bandsId, filepath): """downloads tif image (region and bands) from the ee server and stores it in a temp file""" url = ee.data.makeDownloadUrl(ee.data.getDownloadId({ 'image': image.serialize(), @@ -50,40 +53,7 @@ def download_tif(image, polygon, bandsId): })) local_zip, headers = urlretrieve(url) with zipfile.ZipFile(local_zip) as local_zipfile: - return local_zipfile.extract('data.tif', tempfile.mkdtemp()) - -def load_image(image, polygon, bandsId): - """ - Loads an ee.Image() as a np.array. e.Image() is retrieved from the EE database. - The geographic area and bands to select can be specified - - KV WRL 2018 - - Arguments: - ----------- - image: ee.Image() - image objec from the EE database - polygon: list - coordinates of the points creating a polygon. Each point is a list with 2 values - bandsId: list - bands to select, each band is a dictionnary in the list containing the following keys: - crs, crs_transform, data_type and id. NOTE: you have to remove the key dimensions, otherwise - the entire image is retrieved. - - Returns: - ----------- - image_array : np.ndarray - An array containing the image (2D if one band, otherwise 3D) - georef : np.ndarray - 6 element vector containing the crs_parameters - [X_ul_corner Xscale Xshear Y_ul_corner Yshear Yscale] - """ - - local_tif_filename = download_tif(image, polygon, bandsId) - dataset = gdal.Open(local_tif_filename, gdal.GA_ReadOnly) - georef = np.array(dataset.GetGeoTransform()) - bands = [dataset.GetRasterBand(i + 1).ReadAsArray() for i in range(dataset.RasterCount)] - return np.stack(bands, 2), georef + return local_zipfile.extract('data.tif', filepath) def create_cloud_mask(im_qa, satname, plot_bool): """ @@ -109,8 +79,10 @@ def create_cloud_mask(im_qa, satname, plot_bool): # convert QA bits if satname == 'L8': cloud_values = [2800, 2804, 2808, 2812, 6896, 6900, 6904, 6908] - elif satname == 'L7': + elif satname == 'L7' or satname == 'L5' or satname == 'L4': cloud_values = [752, 756, 760, 764] + elif satname == 'S2': + cloud_values = [1024, 2048] # 1024 = dense cloud, 2048 = cirrus clouds cloud_mask = np.isin(im_qa, cloud_values) # remove isolated cloud pixels (there are some in the swash and they cause problems) @@ -127,109 +99,6 @@ def create_cloud_mask(im_qa, satname, plot_bool): return cloud_mask -def read_eeimage(im, polygon, sat_name, plot_bool): - """ - Read an ee.Image() object and returns the panchromatic band, multispectral bands (B, G, R, NIR, SWIR) - and a cloud mask. All outputs are at 15m resolution (bilinear interpolation for the multispectral bands) - - KV WRL 2018 - - Arguments: - ----------- - im: ee.Image() - Image to read from the Google Earth Engine database - plot_bool: boolean - True if plot is wanted - - Returns: - ----------- - im_pan: np.ndarray (2D) - The panchromatic band (15m) - im_ms: np.ndarray (3D) - The multispectral bands interpolated at 15m - im_cloud: np.ndarray (2D) - The cloud mask at 15m - crs_params: list - EPSG code and affine transformation parameters - """ - - im_dic = im.getInfo() - # save metadata - im_meta = im_dic.get('properties') - meta = {'timestamp':im_meta['system:time_start'], - 'date_acquired':im_meta['DATE_ACQUIRED'], - 'geom_rmse_model':im_meta['GEOMETRIC_RMSE_MODEL'], - 'gcp_model':im_meta['GROUND_CONTROL_POINTS_MODEL'], - 'quality':im_meta['IMAGE_QUALITY_OLI'], - 'sun_azimuth':im_meta['SUN_AZIMUTH'], - 'sun_elevation':im_meta['SUN_ELEVATION']} - - im_bands = im_dic.get('bands') - - # delete dimensions key from dictionnary, otherwise the entire image is extracted - for i in range(len(im_bands)): del im_bands[i]['dimensions'] - - # load panchromatic band - pan_band = [im_bands[7]] - im_pan, crs_pan = load_image(im, polygon, pan_band) - im_pan = im_pan[:,:,0] - - # load the multispectral bands (B2,B3,B4,B5,B6) = (blue,green,red,nir,swir1) - ms_bands = [im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[5]] - im_ms_30m, crs_ms = load_image(im, polygon, ms_bands) - - # create cloud mask - qa_band = [im_bands[11]] - im_qa, crs_qa = load_image(im, polygon, qa_band) - im_qa = im_qa[:,:,0] - im_cloud = create_cloud_mask(im_qa, sat_name, plot_bool) - im_cloud = transform.resize(im_cloud, (im_pan.shape[0], im_pan.shape[1]), - order=0, preserve_range=True, mode='constant').astype('bool_') - - # resize the image using bilinear interpolation (order 1) - im_ms = transform.resize(im_ms_30m,(im_pan.shape[0], im_pan.shape[1]), - order=1, preserve_range=True, mode='constant') - - # check if -inf values (means out of image) and add to cloud mask - im_inf = np.isin(im_ms[:,:,0], -np.inf) - im_nan = np.isnan(im_ms[:,:,0]) - im_cloud = np.logical_or(np.logical_or(im_cloud, im_inf), im_nan) - - # get the crs parameters for the image at 15m and 30m resolution - crs = {'crs_15m':crs_pan, 'crs_30m':crs_ms, 'epsg_code':int(pan_band[0]['crs'][5:])} - - if plot_bool: - - # if there are -inf in the image, set them to 0 before plotting - if sum(sum(np.isin(im_ms_30m[:,:,0], -np.inf).astype(int))) > 0: - idx = np.isin(im_ms_30m[:,:,0], -np.inf) - im_ms_30m[idx,0] = 0; im_ms_30m[idx,1] = 0; im_ms_30m[idx,2] = 0; - im_ms_30m[idx,3] = 0; im_ms_30m[idx,4] = 0 - - plt.figure() - - plt.subplot(221) - plt.imshow(im_pan, cmap='gray') - plt.title('PANCHROMATIC') - - plt.subplot(222) - plt.imshow(im_ms_30m[:,:,[2,1,0]]) - plt.title('RGB') - - - plt.subplot(223) - plt.imshow(im_ms_30m[:,:,3], cmap='gray') - plt.title('NIR') - - plt.subplot(224) - plt.imshow(im_ms_30m[:,:,4], cmap='gray') - plt.title('SWIR') - - plt.show() - - return im_pan, im_ms, im_cloud, crs, meta - - def rescale_image_intensity(im, cloud_mask, prob_high, plot_bool): """ Rescales the intensity of an image (multispectral or single band) by applying @@ -395,9 +264,28 @@ def pansharpen(im_ms, im_pan, cloud_mask, plot_bool): # apply PCA to RGB bands pca = decomposition.PCA() vec_pcs = pca.fit_transform(vec) + # replace 1st PC with pan band (after matching histograms) vec_pan = im_pan.reshape(im_pan.shape[0] * im_pan.shape[1]) vec_pan = vec_pan[~vec_mask] + +# plt.figure() +# ax1 = plt.subplot(131) +# plt.imshow(im_pan, cmap='gray') +# plt.title('Pan band') +# plt.subplot(132, sharex=ax1, sharey=ax1) +# plt.imshow(vec_pcs[:,0].reshape(im_pan.shape[0],im_pan.shape[1]), cmap='gray') +# plt.title('PC1') +# plt.subplot(133, sharex=ax1, sharey=ax1) +# plt.imshow(hist_match(vec_pan, vec_pcs[:,0]).reshape(im_pan.shape[0],im_pan.shape[1]), cmap='gray') +# plt.title('Pan band histmatched') +# +# plt.figure() +# plt.hist(hist_match(vec_pan, vec_pcs[:,0]), bins=300) +# plt.hist(vec_pcs[:,0], bins=300, alpha=0.5) +# plt.hist(vec_pan, bins=300, alpha=0.5) +# plt.draw() + vec_pcs[:,0] = hist_match(vec_pan, vec_pcs[:,0]) vec_ms_ps = pca.inverse_transform(vec_pcs) @@ -407,13 +295,14 @@ def pansharpen(im_ms, im_pan, cloud_mask, plot_bool): im_ms_ps = vec_ms_ps_full.reshape(im_ms.shape[0], im_ms.shape[1], im_ms.shape[2]) if plot_bool: + plt.figure() ax1 = plt.subplot(121) - plt.imshow(rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 100, False)) + plt.imshow(rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False)) plt.axis('off') plt.title('Original') ax2 = plt.subplot(122, sharex=ax1, sharey=ax1) - plt.imshow(rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 100, False)) + plt.imshow(rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)) plt.axis('off') plt.title('Pansharpened') plt.show() @@ -458,9 +347,10 @@ def nd_index(im1, im2, cloud_mask, plot_bool): return im_nd -def find_wl_contours(im_ndwi, cloud_mask, min_contour_points, plot_bool): +def find_wl_contours(im_ndwi, cloud_mask, plot_bool): """ - Computes normalised difference index on 2 images (2D), given a cloud mask (2D) + Finds the water line by thresholding the Normalized Difference Water Index and applying the Marching + Squares Algorithm KV WRL 2018 @@ -470,8 +360,6 @@ def find_wl_contours(im_ndwi, cloud_mask, min_contour_points, plot_bool): Image (2D) with the NDWI (water index) cloud_mask: np.ndarray 2D cloud mask with True where cloud pixels are - min_contour_points: int - minimum number of points in each contour line plot_bool: boolean True if plot is wanted @@ -489,17 +377,19 @@ def find_wl_contours(im_ndwi, cloud_mask, min_contour_points, plot_bool): t_otsu = filters.threshold_otsu(vec) # use Marching Squares algorithm to detect contours on ndwi image contours = measure.find_contours(im_ndwi, t_otsu) - # filter water lines - contours_wl = [] - for i, contour in enumerate(contours): - # remove contour points that are around clouds (nan values) - if np.any(np.isnan(contour)): - index_nan = np.where(np.isnan(contour))[0] - contour = np.delete(contour, index_nan, axis=0) - # remove contours that have only few points (less than min_contour_points) - if contour.shape[0] > min_contour_points: - contours_wl.append(contour) - + + # remove contour points that are nans + contours_nonans = [] + for k in range(len(contours)): + if np.any(np.isnan(contours[k])): + index_nan = np.where(np.isnan(contours[k]))[0] + contours_temp = np.delete(contours[k], index_nan, axis=0) + if len(contours_temp) > 1: + contours_nonans.append(contours_temp) + else: + contours_nonans.append(contours[k]) + contours = contours_nonans + if plot_bool: # plot otsu's histogram segmentation plt.figure() @@ -512,12 +402,12 @@ def find_wl_contours(im_ndwi, cloud_mask, min_contour_points, plot_bool): plt.figure() plt.imshow(im_ndwi, cmap='seismic') plt.colorbar() - for i,contour in enumerate(contours_wl): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k') + for i,contour in enumerate(contours): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k') plt.axis('image') plt.title('Detected water lines') plt.show() - return contours_wl + return contours def convert_pix2world(points, crs_vec): """ @@ -563,6 +453,49 @@ def convert_pix2world(points, crs_vec): return points_converted +def convert_world2pix(points, crs_vec): + """ + Converts world projected coordinates (X,Y) to image coordinates (row,column) + performing an affine transformation + + KV WRL 2018 + + Arguments: + ----------- + points: np.ndarray or list of np.ndarray + array with 2 columns (rows first and columns second) + crs_vec: np.ndarray + vector of 6 elements [Xtr, Xscale, Xshear, Ytr, Yshear, Yscale] + + Returns: ----------- + points_converted: np.ndarray or list of np.ndarray + converted coordinates, first columns with row and second column with column + + """ + + # make affine transformation matrix + aff_mat = np.array([[crs_vec[1], crs_vec[2], crs_vec[0]], + [crs_vec[4], crs_vec[5], crs_vec[3]], + [0, 0, 1]]) + # create affine transformation + tform = transform.AffineTransform(aff_mat) + + if type(points) is list: + points_converted = [] + # iterate over the list + for i, arr in enumerate(points): + points_converted.append(tform.inverse(points)) + + elif type(points) is np.ndarray: + points_converted = tform.inverse(points) + + else: + print('invalid input type') + raise + + return points_converted + + def convert_epsg(points, epsg_in, epsg_out): """ Converts from one spatial reference to another using the epsg codes @@ -676,7 +609,7 @@ def classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, buffer_size # im_sand = morphology.binary_dilation(im_sand, morphology.disk(1)) if plot_bool: - im = np.copy(rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 100, False)) + im = np.copy(rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)) im[im_sand,0] = 0 im[im_sand,1] = 0 im[im_sand,2] = 1 @@ -688,7 +621,7 @@ def classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, buffer_size return im_sand -def classify_image_NN(im_ms_ps, im_pan, cloud_mask, plot_bool): +def classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size, plot_bool): """ Classifies every pixel in the image in one of 4 classes: - sand --> label = 1 @@ -714,8 +647,10 @@ def classify_image_NN(im_ms_ps, im_pan, cloud_mask, plot_bool): True if plot is wanted Returns: ----------- - im_labels: np.ndarray - 2D binary image containing True where sand pixels are located + im_classif: np.ndarray + 2D image containing labels + im_labels: np.ndarray of booleans + 3D image containing a boolean image for each class (im_classif == label) """ @@ -743,17 +678,117 @@ def classify_image_NN(im_ms_ps, im_pan, cloud_mask, plot_bool): vec_classif = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1])) vec_classif[~vec_mask] = labels im_classif = vec_classif.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1])) -# im_classif = morphology.remove_small_objects(im_classif, min_size=min_beach_size, connectivity=2) - + + # labels + im_sand = im_classif == 1 + im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2) + im_swash = im_classif == 2 + im_water = im_classif == 3 + im_labels = np.stack((im_sand,im_swash,im_water), axis=-1) + if plot_bool: - im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 100, False) + # display on top of pansharpened RGB + im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False) im = np.copy(im_display) - colours = np.array([[1,0,0],[1,1,0],[0,1,1],[0,0,1]]) - for k in range(4): - im[im_classif == k,0] = colours[k,0] - im[im_classif == k,1] = colours[k,1] - im[im_classif == k,2] = colours[k,2] - + # define colours for plot + colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]]) + for k in range(0,im_labels.shape[2]): + im[im_labels[:,:,k],0] = colours[k,0] + im[im_labels[:,:,k],1] = colours[k,1] + im[im_labels[:,:,k],2] = colours[k,2] + + plt.figure() + ax1 = plt.subplot(121) + plt.imshow(im_display) + plt.axis('off') + plt.title('Image') + ax2 = plt.subplot(122, sharex=ax1, sharey=ax1) + plt.imshow(im) + plt.axis('off') + plt.title('NN classifier') + mng = plt.get_current_fig_manager() + mng.window.showMaximized() + plt.tight_layout() + plt.draw() + + return im_classif, im_labels + +def classify_image_NN_nopan(im_ms_ps, cloud_mask, min_beach_size, plot_bool): + """ + Classifies every pixel in the image in one of 4 classes: + - sand --> label = 1 + - whitewater (breaking waves and swash) --> label = 2 + - water --> label = 3 + - other (vegetation, buildings, rocks...) --> label = 0 + + The classifier is a Neural Network, trained with 7000 pixels for the class SAND and 1500 pixels for + each of the other classes. This is because the class of interest for my application is SAND and I + wanted to minimize the classification error for that class + + KV WRL 2018 + + Arguments: + ----------- + im_ms_ps: np.ndarray + Pansharpened RGB + downsampled NIR and SWIR + im_pan: + Panchromatic band + cloud_mask: np.ndarray + 2D cloud mask with True where cloud pixels are + plot_bool: boolean + True if plot is wanted + + Returns: ----------- + im_classif: np.ndarray + 2D image containing labels + im_labels: np.ndarray of booleans + 3D image containing a boolean image for each class (im_classif == label) + + """ + + # load classifier + clf = joblib.load('functions/NeuralNet_classif_nopan.pkl') + + # calculate features + n_features = 9 + im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features)) + im_features[:,:,[0,1,2,3,4]] = im_ms_ps + im_features[:,:,5] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False) # (NIR-G) + im_features[:,:,6] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask, False) # ND(NIR-R) + im_features[:,:,7] = nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask, False) # ND(B-R) + im_features[:,:,8] = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) # ND(SWIR-G) + # remove NaNs and clouds + vec_features = im_features.reshape((im_ms_ps.shape[0] * im_ms_ps.shape[1], n_features)) + vec_cloud = cloud_mask.reshape(cloud_mask.shape[0]*cloud_mask.shape[1]) + vec_nan = np.any(np.isnan(vec_features), axis=1) + vec_mask = np.logical_or(vec_cloud, vec_nan) + vec_features = vec_features[~vec_mask, :] + # predict with NN classifier + labels = clf.predict(vec_features) + + # recompose image + vec_classif = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1])) + vec_classif[~vec_mask] = labels + im_classif = vec_classif.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1])) + + # labels + im_sand = im_classif == 1 + im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2) + im_swash = im_classif == 2 + im_water = im_classif == 3 + im_labels = np.stack((im_sand,im_swash,im_water), axis=-1) + + if plot_bool: + # display on top of pansharpened RGB + im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False) + im = np.copy(im_display) + # define colours for plot + colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]]) + for k in range(0,im_labels.shape[2]): + im[im_labels[:,:,k],0] = colours[k,0] + im[im_labels[:,:,k],1] = colours[k,1] + im[im_labels[:,:,k],2] = colours[k,2] + plt.figure() ax1 = plt.subplot(121) plt.imshow(im_display) @@ -762,8 +797,371 @@ def classify_image_NN(im_ms_ps, im_pan, cloud_mask, plot_bool): ax2 = plt.subplot(122, sharex=ax1, sharey=ax1) plt.imshow(im) plt.axis('off') - plt.title('NN') + plt.title('NN classifier') + mng = plt.get_current_fig_manager() + mng.window.showMaximized() + plt.tight_layout() + plt.draw() + + return im_classif, im_labels + +def find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size, plot_bool): + """ + New method for extracting shorelines (more robust) + + KV WRL 2018 + + Arguments: + ----------- + im_ms_ps: np.ndarray + Pansharpened RGB + downsampled NIR and SWIR + im_labels: np.ndarray + 3D image containing a boolean image for each class in the order (sand, swash, water) + cloud_mask: np.ndarray + 2D cloud mask with True where cloud pixels are + buffer_size: int + size of the buffer around the sandy beach + plot_bool: boolean + True if plot is wanted + + Returns: ----------- + contours_wi: list of np.arrays + contains the (row,column) coordinates of the contour lines extracted with the Water Index + contours_mwi: list of np.arrays + contains the (row,column) coordinates of the contour lines extracted with the Modified Water Index + + """ + + nrows = cloud_mask.shape[0] + ncols = cloud_mask.shape[1] + im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False) + + # calculate Normalized Difference Modified Water Index (SWIR - G) + im_mwi = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) + # calculate Normalized Difference Modified Water Index (NIR - G) + im_wi = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False) + # stack indices together + im_ind = np.stack((im_wi, im_mwi), axis=-1) + vec_ind = im_ind.reshape(nrows*ncols,2) + + # process labels + vec_sand = im_labels[:,:,0].reshape(ncols*nrows) + vec_swash = im_labels[:,:,1].reshape(ncols*nrows) + vec_water = im_labels[:,:,2].reshape(ncols*nrows) + + # create a buffer around the sandy beach + se = morphology.disk(buffer_size) + im_buffer = morphology.binary_dilation(im_labels[:,:,0], se) + vec_buffer = im_buffer.reshape(nrows*ncols) + + # select water/sand/swash pixels that are within the buffer + int_water = vec_ind[np.logical_and(vec_buffer,vec_water),:] + int_sand = vec_ind[np.logical_and(vec_buffer,vec_sand),:] + int_swash = vec_ind[np.logical_and(vec_buffer,vec_swash),:] + + # threshold the sand/water intensities + int_all = np.append(int_water,int_sand, axis=0) + t_mwi = filters.threshold_otsu(int_all[:,0]) + t_wi = filters.threshold_otsu(int_all[:,1]) + + # find contour with MS algorithm + im_wi_buffer = np.copy(im_wi) + im_wi_buffer[~im_buffer] = np.nan + im_mwi_buffer = np.copy(im_mwi) + im_mwi_buffer[~im_buffer] = np.nan + contours_wi = measure.find_contours(im_wi_buffer, t_wi) + contours_mwi = measure.find_contours(im_mwi, t_mwi) # WARNING (on entire image) + + # remove contour points that are nans (around clouds) + + contours = contours_wi + contours_nonans = [] + for k in range(len(contours)): + if np.any(np.isnan(contours[k])): + index_nan = np.where(np.isnan(contours[k]))[0] + contours_temp = np.delete(contours[k], index_nan, axis=0) + if len(contours_temp) > 1: + contours_nonans.append(contours_temp) + else: + contours_nonans.append(contours[k]) + contours_wi = contours_nonans + + contours = contours_mwi + contours_nonans = [] + for k in range(len(contours)): + if np.any(np.isnan(contours[k])): + index_nan = np.where(np.isnan(contours[k]))[0] + contours_temp = np.delete(contours[k], index_nan, axis=0) + if len(contours_temp) > 1: + contours_nonans.append(contours_temp) + else: + contours_nonans.append(contours[k]) + contours_mwi = contours_nonans + + if plot_bool: + + im = np.copy(im_display) + # define colours for plot + colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]]) + for k in range(0,im_labels.shape[2]): + im[im_labels[:,:,k],0] = colours[k,0] + im[im_labels[:,:,k],1] = colours[k,1] + im[im_labels[:,:,k],2] = colours[k,2] + + fig = plt.figure() + gs = gridspec.GridSpec(3, 3, height_ratios=[1, 1, 3]) + + ax1 = fig.add_subplot(gs[0,:]) + vals = plt.hist(int_water[:,0], bins=100, label='water') + plt.hist(int_sand[:,0], bins=100, alpha=0.5, label='sand') + plt.hist(int_swash[:,0], bins=100, alpha=0.5, label='swash') + plt.plot([t_wi, t_wi], [0, np.max(vals[0])], 'r-') + plt.legend() + plt.title('Water Index NIR-G') + + ax2 = fig.add_subplot(gs[1,:], sharex=ax1) + vals = plt.hist(int_water[:,1], bins=100, label='water') + plt.hist(int_sand[:,1], bins=100, alpha=0.5, label='sand') + plt.hist(int_swash[:,1], bins=100, alpha=0.5, label='swash') + plt.plot([t_mwi, t_mwi], [0, np.max(vals[0])], 'r-') + plt.legend() + plt.title('Modified Water Index SWIR-G') + + ax3 = fig.add_subplot(gs[2,0]) + plt.imshow(im) + for i,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k') + for i,contour in enumerate(contours_wi): plt.plot(contour[:, 1], contour[:, 0], linestyle='--', linewidth=1, color='w') + plt.grid(False) + plt.xticks([]) + plt.yticks([]) + + ax4 = fig.add_subplot(gs[2,1], sharex=ax3, sharey=ax3) + plt.imshow(im_display) + for i,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k') + for i,contour in enumerate(contours_wi): plt.plot(contour[:, 1], contour[:, 0], linestyle='--', linewidth=1, color='w') + plt.grid(False) + plt.xticks([]) + plt.yticks([]) + + ax5 = fig.add_subplot(gs[2,2], sharex=ax3, sharey=ax3) + plt.imshow(im_mwi, cmap='seismic') + for i,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k') + for i,contour in enumerate(contours_wi): plt.plot(contour[:, 1], contour[:, 0], linestyle='--', linewidth=1, color='w') + plt.grid(False) + plt.xticks([]) + plt.yticks([]) + +# plt.gcf().set_size_inches(17.99,7.55) + mng = plt.get_current_fig_manager() + mng.window.showMaximized() + plt.gcf().set_tight_layout(True) plt.draw() + + return contours_wi, contours_mwi + +def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5): + """ + Compare sds with groundtruth data from topographic surveys / argus shorelines - return im_classif + KV WRL 2018 + + Arguments: + ----------- + dates_sds: list + list of dates corresponding to each row in chain_sds + chain_sds: np.ndarray + array with time series of chainage for each transect (each transect is one column) + topo_profiles: dict + dict containing the dates and chainage of the groundtruth + mod: 0 or 1 + 0 for linear interpolation between 2 closest surveys, 1 for only nearest neighbour + min_days: int + minimum number of days for which the data can be compared + + Returns: ----------- + stats: dict + contains all the statistics of the comparison + """ + + # create 3 figures + fig1 = plt.figure() + gs1 = gridspec.GridSpec(chain_sds.shape[1], 1) + fig2 = plt.figure() + gs2 = gridspec.GridSpec(2, chain_sds.shape[1]) + fig3 = plt.figure() + gs3 = gridspec.GridSpec(2,1) + + dates_sds_num = np.array([_.toordinal() for _ in dates_sds]) + stats = dict([]) + data_fin = dict([]) + + # for each transect compare and plot the data + for i in range(chain_sds.shape[1]): + + pfname = list(topo_profiles.keys())[i] + stats[pfname] = dict([]) + data_fin[pfname] = dict([]) + + dates_sur = topo_profiles[pfname]['dates'] + chain_sur = topo_profiles[pfname]['chainage'] + + # convert to datenum + dates_sur_num = np.array([_.toordinal() for _ in dates_sur]) + + chain_sur_interp = [] + diff_days = [] + + for j, satdate in enumerate(dates_sds_num): + + temp_diff = satdate - dates_sur_num + + if mod==0: + # select measurement before and after sat image date and interpolate + + ind_before = np.where(temp_diff == temp_diff[temp_diff > 0][-1])[0] + if ind_before == len(temp_diff)-1: + chain_sur_interp.append(np.nan) + diff_days.append(np.abs(satdate-dates_sur_num[ind_before])[0]) + continue + ind_after = np.where(temp_diff == temp_diff[temp_diff < 0][0])[0] + tempx = np.zeros(2) + tempx[0] = dates_sur_num[ind_before] + tempx[1] = dates_sur_num[ind_after] + tempy = np.zeros(2) + tempy[0] = chain_sur[ind_before] + tempy[1] = chain_sur[ind_after] + diff_days.append(np.abs(np.max([satdate-tempx[0], satdate-tempx[1]]))) + # interpolate + f = interpolate.interp1d(tempx, tempy) + chain_sur_interp.append(f(satdate)) + + elif mod==1: + # select the closest measurement + + idx_closest = find_indices(np.abs(temp_diff), lambda e: e == np.min(np.abs(temp_diff)))[0] + diff_days.append(np.abs(satdate-dates_sur_num[idx_closest])) + if diff_days[j] > mindays: + chain_sur_interp.append(np.nan) + else: + chain_sur_interp.append(chain_sur[idx_closest]) + + chain_sur_interp = np.array(chain_sur_interp) + + # remove nan values + idx_sur_nan = ~np.isnan(chain_sur_interp) + idx_sat_nan = ~np.isnan(chain_sds[:,i]) + idx_nan = np.logical_and(idx_sur_nan, idx_sat_nan) + + # groundtruth and sds + chain_sur_fin = chain_sur_interp[idx_nan] + chain_sds_fin = chain_sds[idx_nan,i] + dates_fin = [k for (k, v) in zip(dates_sds, idx_nan) if v] + diff_chain = chain_sur_fin - chain_sds_fin + + # calculate statistics + rmse = np.sqrt(np.nanmean((diff_chain)**2)) + mean = np.nanmean(diff_chain) + std = np.nanstd(diff_chain) + q90 = np.percentile(np.abs(diff_chain), 90) + + # store data + stats[pfname]['rmse'] = rmse + stats[pfname]['mean'] = mean + stats[pfname]['std'] = std + stats[pfname]['q90'] = q90 + stats[pfname]['diffdays'] = diff_days + + data_fin[pfname]['dates'] = dates_fin + data_fin[pfname]['sds'] = chain_sds_fin + data_fin[pfname]['survey'] = chain_sur_fin + + # make time-series plot + plt.figure(fig1.number) + ax = fig1.add_subplot(gs1[i,0]) + plt.plot(dates_sur, chain_sur, 'o-', color='C1', markersize=4, label='survey all') + plt.plot(dates_fin, chain_sur_fin, 'o', color=[0.3, 0.3, 0.3], markersize=2, label='survey interp') + plt.plot(dates_fin, chain_sds_fin, 'o--', color='b', markersize=4, label='SDS') + plt.title(pfname, fontweight='bold') + plt.xlim([dates_sds[0], dates_sds[-1]]) + plt.ylabel('chainage [m]') + + # make scatter plot + plt.figure(fig2.number) + ax1 = fig2.add_subplot(gs2[0,i]) + plt.axis('equal') + plt.plot(chain_sur_fin, chain_sds_fin, 'ko', markersize=4, markerfacecolor='w', alpha=0.7) + xmax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)]) + xmin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)]) + ymax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)]) + ymin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)]) + plt.plot([xmin, xmax], [ymin, ymax], 'r--') + correlation = np.corrcoef(chain_sur_fin, chain_sds_fin)[0,1] + str_corr = 'r = %.2f' % (correlation) + plt.text(xmin, ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left') + plt.xlabel('chainage survey [m]') + plt.ylabel('chainage satellite [m]') + plt.title(pfname, fontweight='bold') + + ax2 = fig2.add_subplot(gs2[1,i]) + binwidth = 3 + bins = np.arange(min(diff_chain), max(diff_chain) + binwidth, binwidth) + density = plt.hist(diff_chain, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k') + plt.xlim([-50, 50]) + plt.xlabel('error [m]') + str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90) + plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.5), horizontalalignment='left', fontsize=10) + + fig1.set_size_inches(19.2, 9.28) + fig1.set_tight_layout(True) + fig2.set_size_inches(19.2, 9.28) + fig2.set_tight_layout(True) + + # plot all the data together + chain_sds_all = [] + chain_sur_all = [] + for i in range(chain_sds.shape[1]): + pfname = list(topo_profiles.keys())[i] + chain_sds_all = np.append(chain_sds_all,data_fin[pfname]['sds']) + chain_sur_all = np.append(chain_sur_all,data_fin[pfname]['survey']) + + diff_chain_all = chain_sur_all - chain_sds_all + + # calculate statistics + rmse = np.sqrt(np.nanmean((diff_chain_all)**2)) + mean = np.nanmean(diff_chain_all) + std = np.nanstd(diff_chain_all) + q90 = np.percentile(np.abs(diff_chain_all), 90) + + stats['all'] = {'rmse':rmse,'mean':mean,'std':std,'q90':q90} + + # make plot with all datapoints (from all the transects) + plt.figure(fig3.number) + ax1 = fig3.add_subplot(gs3[0,0]) + plt.axis('equal') + plt.plot(chain_sur_all, chain_sds_all, 'ko', markersize=4, markerfacecolor='w', alpha=0.7) + xmax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)]) + xmin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)]) + ymax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)]) + ymin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)]) + plt.plot([xmin, xmax], [ymin, ymax], 'r--') + correlation = np.corrcoef(chain_sur_all, chain_sds_all)[0,1] + str_corr = 'r = %.2f' % (correlation) + plt.text(xmin, ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left') + plt.xlabel('chainage survey [m]') + plt.ylabel('chainage satellite [m]') + plt.title(pfname, fontweight='bold') + + ax2 = fig3.add_subplot(gs3[1,0]) + binwidth = 3 + bins = np.arange(min(diff_chain_all), max(diff_chain_all) + binwidth, binwidth) + density = plt.hist(diff_chain_all, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k') + plt.xlim([-50, 50]) + plt.xlabel('error [m]') + str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90) + plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.5), horizontalalignment='left', fontsize=10) + fig3.set_size_inches(9.2, 9.28) + fig3.set_tight_layout(True) + + return stats + \ No newline at end of file diff --git a/functions/utils.py b/functions/utils.py index 019c0d4..efd804e 100644 --- a/functions/utils.py +++ b/functions/utils.py @@ -8,7 +8,9 @@ Contains all the utilities, convenience functions and small functions that do si """ import matplotlib.pyplot as plt +from datetime import datetime, timedelta import numpy as np +import scipy.io as sio import pdb @@ -59,3 +61,50 @@ def find_indices(lst, condition): def reject_outliers(data, m=2): "rejects outliers in a numpy array" return data[abs(data - np.mean(data)) < m * np.std(data)] + +def duplicates_dict(lst): + "return duplicates and indices" + # nested function + def duplicates(lst, item): + return [i for i, x in enumerate(lst) if x == item] + + return dict((x, duplicates(lst, x)) for x in set(lst) if lst.count(x) > 1) + +def datenum2datetime(datenum): + "convert datenum to datetime" + #takes in datenum and outputs python datetime + time = [datetime.fromordinal(int(dn)) + timedelta(days=float(dn)%1) - timedelta(days = 366) for dn in datenum] + return time + +def loadmat(filename): + ''' + this function should be called instead of direct spio.loadmat + as it cures the problem of not properly recovering python dictionaries + from mat files. It calls the function check keys to cure all entries + which are still mat-objects + ''' + data = sio.loadmat(filename, struct_as_record=False, squeeze_me=True) + return _check_keys(data) + +def _check_keys(dict): + ''' + checks if entries in dictionary are mat-objects. If yes + todict is called to change them to nested dictionaries + ''' + for key in dict: + if isinstance(dict[key], sio.matlab.mio5_params.mat_struct): + dict[key] = _todict(dict[key]) + return dict + +def _todict(matobj): + ''' + A recursive function which constructs from matobjects nested dictionaries + ''' + dict = {} + for strg in matobj._fieldnames: + elem = matobj.__dict__[strg] + if isinstance(elem, sio.matlab.mio5_params.mat_struct): + dict[strg] = _todict(elem) + else: + dict[strg] = elem + return dict \ No newline at end of file diff --git a/read_images.py b/read_images.py new file mode 100644 index 0000000..8f289aa --- /dev/null +++ b/read_images.py @@ -0,0 +1,589 @@ +#==========================================================# +#==========================================================# +# Extract shorelines from Landsat images +#==========================================================# +#==========================================================# + + +#==========================================================# +# Initial settings +#==========================================================# + +import os +import numpy as np +import matplotlib.pyplot as plt +import ee +import pdb + +# other modules +from osgeo import gdal, ogr, osr +import pickle +import matplotlib.cm as cm +from pylab import ginput +from shapely.geometry import LineString + +# image processing modules +import skimage.filters as filters +import skimage.exposure as exposure +import skimage.transform as transform +import sklearn.decomposition as decomposition +import skimage.measure as measure +import skimage.morphology as morphology + +# machine learning modules +from sklearn.model_selection import train_test_split +from sklearn.neural_network import MLPClassifier +from sklearn.preprocessing import StandardScaler, Normalizer +from sklearn.externals import joblib + +# import own modules +import functions.utils as utils +import functions.sds as sds + +# some other settings +np.seterr(all='ignore') # raise/ignore divisions by 0 and nans +plt.rcParams['axes.grid'] = True +plt.rcParams['figure.max_open_warning'] = 100 +ee.Initialize() + +#==========================================================# +# Parameters +#==========================================================# + +sitename = 'NARRA' + +cloud_thresh = 0.7 # threshold for cloud cover +plot_bool = False # if you want the plots +output_epsg = 28356 # GDA94 / MGA Zone 56 +buffer_size = 7 # radius (in pixels) of disk for buffer (pixel classification) +min_beach_size = 20 # number of pixels in a beach (pixel classification) +dist_ref = 100 # maximum distance from reference point +min_length_wl = 200 # minimum length of shoreline LineString to be kept +manual_bool = True # to manually check images + + +output = dict([]) + +#==========================================================# +# Metadata +#==========================================================# + +filepath = os.path.join(os.getcwd(), 'data', sitename) +with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'rb') as f: + metadata = pickle.load(f) + + +#%% +#==========================================================# +# Read S2 images +#==========================================================# + +satname = 'S2' +dates = metadata[satname]['dates'] +input_epsg = 32756 # metadata[satname]['epsg'] + +# path to images +filepath10 = os.path.join(os.getcwd(), 'data', sitename, satname, '10m') +filenames10 = os.listdir(filepath10) +filepath20 = os.path.join(os.getcwd(), 'data', sitename, satname, '20m') +filenames20 = os.listdir(filepath20) +filepath60 = os.path.join(os.getcwd(), 'data', sitename, satname, '60m') +filenames60 = os.listdir(filepath60) +if (not len(filenames10) == len(filenames20)) or (not len(filenames20) == len(filenames60)): + raise 'error: not the same amount of files for 10, 20 and 60 m' +N = len(filenames10) + +# initialise variables +cloud_cover_ts = [] +acc_georef_ts = [] +date_acquired_ts = [] +filename_ts = [] +satname_ts = [] +timestamp = [] +shorelines = [] +idx_skipped = [] + +spacing = '==========================================================' +msg = ' %s\n %s\n %s' % (spacing, satname, spacing) +print(msg) + +for i in range(N): + + # read 10m bands + fn = os.path.join(filepath10, filenames10[i]) + data = gdal.Open(fn, gdal.GA_ReadOnly) + georef = np.array(data.GetGeoTransform()) + bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] + im10 = np.stack(bands, 2) + im10 = im10/10000 # TOA scaled to 10000 + + # if image is only zeros, skip it + if sum(sum(sum(im10))) < 1: + print('skip ' + str(i) + ' - no data') + idx_skipped.append(i) + continue + + nrows = im10.shape[0] + ncols = im10.shape[1] + + # read 20m band (SWIR1) + fn = os.path.join(filepath20, filenames20[i]) + data = gdal.Open(fn, gdal.GA_ReadOnly) + bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] + im20 = np.stack(bands, 2) + im20 = im20[:,:,0] + im20 = im20/10000 # TOA scaled to 10000 + im_swir = transform.resize(im20, (nrows, ncols), order=1, preserve_range=True, mode='constant') + im_swir = np.expand_dims(im_swir, axis=2) + + # append down-sampled swir band to the 10m bands + im_ms = np.append(im10, im_swir, axis=2) + + # read 60m band (QA) + fn = os.path.join(filepath60, filenames60[i]) + data = gdal.Open(fn, gdal.GA_ReadOnly) + bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] + im60 = np.stack(bands, 2) + im_qa = im60[:,:,0] + cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool) + cloud_mask = transform.resize(cloud_mask,(nrows, ncols), order=0, preserve_range=True, mode='constant') + # check if -inf or nan values on any band and add to cloud mask + for k in range(im_ms.shape[2]): + im_inf = np.isin(im_ms[:,:,k], -np.inf) + im_nan = np.isnan(im_ms[:,:,k]) + cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan) + + # calculate cloud cover and if above threshold, skip it + cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1]) + if cloud_cover > cloud_thresh: + print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)') + idx_skipped.append(i) + continue + + # rescale image intensity for display purposes + im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False) + + # classify image in 4 classes (sand, whitewater, water, other) with NN classifier + im_classif, im_labels = sds.classify_image_NN_nopan(im_ms, cloud_mask, min_beach_size, plot_bool) + + # if there aren't any sandy pixels + if sum(sum(im_labels[:,:,0])) == 0 : + # use global threshold + im_ndwi = sds.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask, plot_bool) + contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool) + else: + # use specific threhsold + contours_wi, contours_mwi = sds.find_wl_contours2(im_ms, im_labels, cloud_mask, buffer_size, plot_bool) + + # convert from pixels to world coordinates + wl_coords = sds.convert_pix2world(contours_mwi, georef) + # convert to output epsg spatial reference + wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg) + + # remove contour lines that have a perimeter < min_length_wl + wl_good = [] + for l, wls in enumerate(wl): + coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))] + a = LineString(coords) # shapely LineString structure + if a.length >= min_length_wl: + wl_good.append(wls) + + # format points and only select the ones close to the refpoints + x_points = np.array([]) + y_points = np.array([]) + for k in range(len(wl_good)): + x_points = np.append(x_points,wl_good[k][:,0]) + y_points = np.append(y_points,wl_good[k][:,1]) + wl_good = np.transpose(np.array([x_points,y_points])) + temp = np.zeros((len(wl_good))).astype(bool) + for k in range(len(refpoints)): + temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp) + wl_final = wl_good[temp] + + + # plot output + plt.figure() + im = np.copy(im_display) + colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]]) + for k in range(0,im_labels.shape[2]): + im[im_labels[:,:,k],0] = colours[k,0] + im[im_labels[:,:,k],1] = colours[k,1] + im[im_labels[:,:,k],2] = colours[k,2] + plt.imshow(im) + for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--') + plt.title(satname + ' ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + ' acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' ) + plt.draw() + + pt_in = np.array(ginput(n=1, timeout=1000)) + plt.close() + + # if image is rejected, skip it + if pt_in[0][1] > nrows/2: + print('skip ' + str(i) + ' - rejected') + idx_skipped.append(i) + continue + + # if accepted, store the data + cloud_cover_ts.append(cloud_cover) + acc_georef_ts.append(metadata[satname]['acc_georef'][i]) + + filename_ts.append(filenames10[i]) + satname_ts.append(satname) + date_acquired_ts.append(filenames10[i][:10]) + + timestamp.append(metadata[satname]['dates'][i]) + shorelines.append(wl_final) + +# store in output structure +output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped, + 'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts, + 'acc_georef':acc_georef_ts}} +del idx_skipped + + +#%% +#==========================================================# +# Read L7&L8 images +#==========================================================# + +satname = 'L8' +dates = metadata[satname]['dates'] +input_epsg = 32656 # metadata[satname]['epsg'] + +# path to images +filepath_pan = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8', 'pan') +filepath_ms = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8', 'ms') +filenames_pan = os.listdir(filepath_pan) +filenames_ms = os.listdir(filepath_ms) +if (not len(filenames_pan) == len(filenames_ms)): + raise 'error: not the same amount of files for pan and ms' +N = len(filenames_pan) + +# initialise variables +cloud_cover_ts = [] +acc_georef_ts = [] +date_acquired_ts = [] +filename_ts = [] +satname_ts = [] +timestamp = [] +shorelines = [] +idx_skipped = [] + + +spacing = '==========================================================' +msg = ' %s\n %s\n %s' % (spacing, satname, spacing) +print(msg) + +for i in range(N): + + # get satellite name + sat = filenames_pan[i][20:22] + + # read pan image + fn_pan = os.path.join(filepath_pan, filenames_pan[i]) + data = gdal.Open(fn_pan, gdal.GA_ReadOnly) + georef = np.array(data.GetGeoTransform()) + bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] + im_pan = np.stack(bands, 2)[:,:,0] + nrows = im_pan.shape[0] + ncols = im_pan.shape[1] + + # read ms image + fn_ms = os.path.join(filepath_ms, filenames_ms[i]) + data = gdal.Open(fn_ms, gdal.GA_ReadOnly) + bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] + im_ms = np.stack(bands, 2) + + # cloud mask + im_qa = im_ms[:,:,5] + cloud_mask = sds.create_cloud_mask(im_qa, sat, plot_bool) + cloud_mask = transform.resize(cloud_mask, (nrows, ncols), order=0, preserve_range=True, mode='constant').astype('bool_') + # resize the image using bilinear interpolation (order 1) + im_ms = im_ms[:,:,:5] + im_ms = transform.resize(im_ms,(nrows, ncols), order=1, preserve_range=True, mode='constant') + + # check if -inf or nan values on any band and add to cloud mask + for k in range(im_ms.shape[2]+1): + if k == 5: + im_inf = np.isin(im_pan, -np.inf) + im_nan = np.isnan(im_pan) + else: + im_inf = np.isin(im_ms[:,:,k], -np.inf) + im_nan = np.isnan(im_ms[:,:,k]) + cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan) + + # calculate cloud cover and skip image if above threshold + cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1]) + if cloud_cover > cloud_thresh: + print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)') + idx_skipped.append(i) + continue + + # Pansharpen image (different for L8 and L7) + if sat == 'L7': + # pansharpen (Green, Red, NIR) and downsample Blue and SWIR1 + im_ms_ps = sds.pansharpen(im_ms[:,:,[1,2,3]], im_pan, cloud_mask, plot_bool) + im_ms_ps = np.append(im_ms[:,:,[0]], im_ms_ps, axis=2) + im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[4]], axis=2) + im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False) + elif sat == 'L8': + # pansharpen RGB image and downsample NIR and SWIR1 + im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, plot_bool) + im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) + im_display = sds.rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False) + + # classify image in 4 classes (sand, whitewater, water, other) with NN classifier + im_classif, im_labels = sds.classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size, plot_bool) + + # if there aren't any sandy pixels + if sum(sum(im_labels[:,:,0])) == 0 : + # use global threshold + im_ndwi = sds.nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, plot_bool) + contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool) + else: + # use specific threhsold + contours_wi, contours_mwi = sds.find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size, plot_bool) + + # convert from pixels to world coordinates + wl_coords = sds.convert_pix2world(contours_mwi, georef) + # convert to output epsg spatial reference + wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg) + + # remove contour lines that have a perimeter < min_length_wl + wl_good = [] + for l, wls in enumerate(wl): + coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))] + a = LineString(coords) # shapely LineString structure + if a.length >= min_length_wl: + wl_good.append(wls) + + # format points and only select the ones close to the refpoints + x_points = np.array([]) + y_points = np.array([]) + for k in range(len(wl_good)): + x_points = np.append(x_points,wl_good[k][:,0]) + y_points = np.append(y_points,wl_good[k][:,1]) + wl_good = np.transpose(np.array([x_points,y_points])) + temp = np.zeros((len(wl_good))).astype(bool) + for k in range(len(refpoints)): + temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp) + wl_final = wl_good[temp] + + # plot output + plt.figure() + plt.subplot(121) + im = np.copy(im_display) + colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]]) + for k in range(0,im_labels.shape[2]): + im[im_labels[:,:,k],0] = colours[k,0] + im[im_labels[:,:,k],1] = colours[k,1] + im[im_labels[:,:,k],2] = colours[k,2] + plt.imshow(im) + for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--') + plt.title(sat + ' ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + ' acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' ) + + pt_in = np.array(ginput(n=1, timeout=1000)) + plt.close() + + # if image is rejected, skip it + if pt_in[0][1] > nrows/2: + print('skip ' + str(i) + ' - rejected') + idx_skipped.append(i) + continue + + # if accepted, store the data + cloud_cover_ts.append(cloud_cover) + acc_georef_ts.append(metadata[satname]['acc_georef'][i]) + + filename_ts.append(filenames_pan[i]) + satname_ts.append(sat) + date_acquired_ts.append(filenames_pan[i][:10]) + + timestamp.append(metadata[satname]['dates'][i]) + shorelines.append(wl_final) + +# store in output structure +output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped, + 'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts, + 'acc_georef':acc_georef_ts}} + +del idx_skipped + + + +#%% +#==========================================================# +# Read L5 images +#==========================================================# + +satname = 'L5' +dates = metadata[satname]['dates'] +input_epsg = 32656 # metadata[satname]['epsg'] + +# path to images +filepath_img = os.path.join(os.getcwd(), 'data', sitename, satname, '30m') +filenames = os.listdir(filepath_img) +N = len(filenames) + +# initialise variables +cloud_cover_ts = [] +acc_georef_ts = [] +date_acquired_ts = [] +filename_ts = [] +satname_ts = [] +timestamp = [] +shorelines = [] +idx_skipped = [] + + +spacing = '==========================================================' +msg = ' %s\n %s\n %s' % (spacing, satname, spacing) +print(msg) + +for i in range(N): + + # read ms image + fn = os.path.join(filepath_img, filenames[i]) + data = gdal.Open(fn, gdal.GA_ReadOnly) + georef = np.array(data.GetGeoTransform()) + bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] + im_ms = np.stack(bands, 2) + + # down-sample to half hte original pixel size + nrows = im_ms.shape[0]*2 + ncols = im_ms.shape[1]*2 + + # cloud mask + im_qa = im_ms[:,:,5] + im_ms = im_ms[:,:,:-1] + cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool) + cloud_mask = transform.resize(cloud_mask, (nrows, ncols), order=0, preserve_range=True, mode='constant').astype('bool_') + + # resize the image using bilinear interpolation (order 1) + im_ms = transform.resize(im_ms,(nrows, ncols), order=1, preserve_range=True, mode='constant') + + # adjust georef vector (scale becomes 15m and origin is adjusted to the center of new corner pixel) + georef[1] = 15 + georef[5] = -15 + georef[0] = georef[0] + 7.5 + georef[3] = georef[3] - 7.5 + + # check if -inf or nan values on any band and add to cloud mask + for k in range(im_ms.shape[2]): + im_inf = np.isin(im_ms[:,:,k], -np.inf) + im_nan = np.isnan(im_ms[:,:,k]) + cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan) + + # calculate cloud cover and skip image if above threshold + cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1]) + if cloud_cover > cloud_thresh: + print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)') + idx_skipped.append(i) + continue + + # rescale image intensity for display purposes + im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False) + + # classify image in 4 classes (sand, whitewater, water, other) with NN classifier + im_classif, im_labels = sds.classify_image_NN_nopan(im_ms, cloud_mask, min_beach_size, plot_bool) + + # if there aren't any sandy pixels + if sum(sum(im_labels[:,:,0])) == 0 : + # use global threshold + im_ndwi = sds.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask, plot_bool) + contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool) + else: + # use specific threhsold + contours_wi, contours_mwi = sds.find_wl_contours2(im_ms, im_labels, cloud_mask, buffer_size, plot_bool) + + # convert from pixels to world coordinates + wl_coords = sds.convert_pix2world(contours_mwi, georef) + # convert to output epsg spatial reference + wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg) + + # remove contour lines that have a perimeter < min_length_wl + wl_good = [] + for l, wls in enumerate(wl): + coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))] + a = LineString(coords) # shapely LineString structure + if a.length >= min_length_wl: + wl_good.append(wls) + + # format points and only select the ones close to the refpoints + x_points = np.array([]) + y_points = np.array([]) + for k in range(len(wl_good)): + x_points = np.append(x_points,wl_good[k][:,0]) + y_points = np.append(y_points,wl_good[k][:,1]) + wl_good = np.transpose(np.array([x_points,y_points])) + temp = np.zeros((len(wl_good))).astype(bool) + for k in range(len(refpoints)): + temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp) + wl_final = wl_good[temp] + + # plot output + plt.figure() + plt.subplot(121) + im = np.copy(im_display) + colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]]) + for k in range(0,im_labels.shape[2]): + im[im_labels[:,:,k],0] = colours[k,0] + im[im_labels[:,:,k],1] = colours[k,1] + im[im_labels[:,:,k],2] = colours[k,2] + plt.imshow(im) + for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--') + plt.title(satname + ' ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + ' acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' ) + plt.subplot(122) + plt.axis('equal') + plt.axis('off') + plt.plot(refpoints[:,0], refpoints[:,1], 'k.') + plt.plot(wl_final[:,0], wl_final[:,1], 'r.') + mng = plt.get_current_fig_manager() + mng.window.showMaximized() + plt.tight_layout() + plt.draw() + + pt_in = np.array(ginput(n=1, timeout=1000)) + plt.close() + + # if image is rejected, skip it + if pt_in[0][1] > nrows/2: + print('skip ' + str(i) + ' - rejected') + idx_skipped.append(i) + continue + + # if accepted, store the data + cloud_cover_ts.append(cloud_cover) + acc_georef_ts.append(metadata[satname]['acc_georef'][i]) + + filename_ts.append(filenames[i]) + satname_ts.append(satname) + date_acquired_ts.append(filenames[i][:10]) + + timestamp.append(metadata[satname]['dates'][i]) + shorelines.append(wl_final) + +# store in output structure +output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped, + 'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts, + 'acc_georef':acc_georef_ts}} + +del idx_skipped + +#==========================================================# +#==========================================================# +#==========================================================# +#==========================================================# + +#%% +# save output +with open(os.path.join(filepath, sitename + '_output' + '.pkl'), 'wb') as f: + pickle.dump(output, f) + +# save idx_skipped +#idx_skipped = dict([]) +#for satname in list(output.keys()): +# idx_skipped[satname] = output[satname]['idx_skipped'] +#with open(os.path.join(filepath, sitename + '_idxskipped' + '.pkl'), 'wb') as f: +# pickle.dump(idx_skipped, f) + \ No newline at end of file diff --git a/sl_comparison_NARRA.py b/sl_comparison_NARRA.py deleted file mode 100644 index ac9a3b0..0000000 --- a/sl_comparison_NARRA.py +++ /dev/null @@ -1,382 +0,0 @@ -# -*- coding: utf-8 -*- - -#==========================================================# -# Compare Narrabeen SDS with 3D quadbike surveys -#==========================================================# - -# Initial settings - -import os -import numpy as np -import matplotlib.pyplot as plt -import pdb -import ee - -import matplotlib.dates as mdates -import matplotlib.cm as cm -from datetime import datetime, timedelta -import pickle -import pytz -import scipy.io as sio -import scipy.interpolate as interpolate -import statsmodels.api as sm -import skimage.measure as measure - -# my functions -import functions.utils as utils - -# some settings -np.seterr(all='ignore') # raise/ignore divisions by 0 and nans -plt.rcParams['axes.grid'] = True -plt.rcParams['figure.max_open_warning'] = 100 - -au_tz = pytz.timezone('Australia/Sydney') - -# load quadbike dates and convert from datenum to datetime -filename = 'data\quadbike\survey_dates.mat' -filepath = os.path.join(os.getcwd(), filename) -dates_quad = sio.loadmat(filepath)['dates'] # matrix containing year, month, day -dates_quad = [datetime(dates_quad[i,0], dates_quad[i,1], dates_quad[i,2], - tzinfo=au_tz) for i in range(dates_quad.shape[0])] - -# load timestamps from satellite images -satname = 'L8' -sitename = 'NARRA' -filepath = os.path.join(os.getcwd(), 'data', satname, sitename) -with open(os.path.join(filepath, sitename + '_output_new' + '.pkl'), 'rb') as f: - output = pickle.load(f) - -dates_l8 = output['t'] -# convert to AEST -dates_l8 = [_.astimezone(au_tz) for _ in dates_l8] -# remove duplicates -dates_l8_str = [_.strftime('%Y%m%d') for _ in dates_l8] -dupl = utils.duplicates_dict(dates_l8_str) -idx_remove = [] -for k,v in dupl.items(): - - idx1 = v[0] - idx2 = v[1] - - c1 = output['cloud_cover'][idx1] - c2 = output['cloud_cover'][idx2] - g1 = output['acc_georef'][idx1] - g2 = output['acc_georef'][idx2] - - if c1 < c2 - 0.01: - idx_remove.append(idx2) - elif g1 < g2 - 0.1: - idx_remove.append(idx2) - else: - idx_remove.append(idx1) -idx_remove = sorted(idx_remove) -idx_all = np.linspace(0,70,71) -idx_keep = list(np.where(~np.isin(idx_all,idx_remove))[0]) -output['t'] = [output['t'][k] for k in idx_keep] -output['shorelines'] = [output['shorelines'][k] for k in idx_keep] -output['cloud_cover'] = [output['cloud_cover'][k] for k in idx_keep] -output['acc_georef'] = [output['acc_georef'][k] for k in idx_keep] -# convert to AEST -dates_l8 = output['t'] -dates_l8 = [_.astimezone(au_tz) for _ in dates_l8] - -# load wave data (already AEST) -filename = 'data\wave\SydneyProcessed.mat' -filepath = os.path.join(os.getcwd(), filename) -wave_data = sio.loadmat(filepath) -idx = utils.find_indices(wave_data['dates'][:,0], lambda e: e >= dates_l8[0].year and e <= dates_l8[-1].year) -hsig = np.array([wave_data['Hsig'][i][0] for i in idx]) -wdir = np.array([wave_data['Wdir'][i][0] for i in idx]) -dates_wave = [datetime(wave_data['dates'][i,0], wave_data['dates'][i,1], - wave_data['dates'][i,2], wave_data['dates'][i,3], - wave_data['dates'][i,4], wave_data['dates'][i,5], - tzinfo=au_tz) for i in idx] - -# load tide data (already AEST) -filename = 'SydTideData.mat' -filepath = os.path.join(os.getcwd(), 'data', 'tide', filename) -tide_data = sio.loadmat(filepath) -idx = utils.find_indices(tide_data['dates'][:,0], lambda e: e >= dates_l8[0].year and e <= dates_l8[-1].year) -tide = np.array([tide_data['tide'][i][0] for i in idx]) -dates_tide = [datetime(tide_data['dates'][i,0], tide_data['dates'][i,1], - tide_data['dates'][i,2], tide_data['dates'][i,3], - tide_data['dates'][i,4], tide_data['dates'][i,5], - tzinfo=au_tz) for i in idx] - -#%% make a plot of all the dates with wave data -orange = [255/255,140/255,0] -blue = [0,191/255,255/255] -f = plt.figure() -months = mdates.MonthLocator() -month_fmt = mdates.DateFormatter('%b %Y') -days = mdates.DayLocator() -years = [2013,2014,2015,2016] -for k in range(len(years)): - sel_year = years[k] - ax = plt.subplot(4,1,k+1) - idx_year = utils.find_indices(dates_wave, lambda e : e.year >= sel_year and e.year <= sel_year) - plt.plot([dates_wave[i] for i in idx_year], [hsig[i] for i in idx_year], 'k-', linewidth=0.5) - hsigmax = np.nanmax([hsig[i] for i in idx_year]) - cbool = True - for j in range(len(dates_quad)): - if dates_quad[j].year == sel_year: - if cbool: - plt.plot([dates_quad[j], dates_quad[j]], [0, hsigmax], color=orange, label='survey') - cbool = False - else: - plt.plot([dates_quad[j], dates_quad[j]], [0, hsigmax], color=orange) - cbool = True - for j in range(len(dates_l8)): - if dates_l8[j].year == sel_year: - if cbool: - plt.plot([dates_l8[j], dates_l8[j]], [0, hsigmax], color=blue, label='landsat8') - cbool = False - else: - plt.plot([dates_l8[j], dates_l8[j]], [0, hsigmax], color=blue) - if k == 3: - plt.legend() - plt.xlim((datetime(sel_year,1,1), datetime(sel_year,12,31, tzinfo=au_tz))) - plt.ylim((0, hsigmax)) - plt.ylabel('Hs [m]') - ax.xaxis.set_major_locator = months - ax.xaxis.set_major_formatter(month_fmt) -f.subplots_adjust(hspace=0.2) -plt.draw() - -#%% calculate difference between dates (quad and sat) -diff_days = [ [(x - _).days for _ in dates_quad] for x in dates_l8] -max_diff = 14 -idx_closest = [utils.find_indices(_, lambda e: abs(e) <= max_diff) for _ in diff_days] -# store in dates_diff dictionnary -dates_diff = [] -cloud_cover = [] -for i in range(len(idx_closest)): - if not idx_closest[i]: - continue - elif len(idx_closest[i]) > 1: - idx_best = np.argmin(np.abs([diff_days[i][_] for _ in idx_closest[i]])) - dates_temp = [dates_quad[_] for _ in idx_closest[i]] - days_temp = [diff_days[i][_] for _ in idx_closest[i]] - dates_diff.append({"date sat": dates_l8[i], - "date quad": dates_temp[idx_best], - "days diff": days_temp[idx_best]}) - else: - dates_diff.append({"date sat": dates_l8[i], - "date quad": dates_quad[idx_closest[i][0]], - "days diff": diff_days[i][idx_closest[i][0]] - }) - # store cloud data - cloud_cover.append(output['cloud_cover'][i]) - -# store wave data -wave_hsig = [] -for i in range(len(dates_diff)): - wave_hsig.append(hsig[np.argmin(np.abs(np.array([(dates_diff[i]['date sat'] - _).total_seconds() for _ in dates_wave])))]) - -# make a plot -plt.figure() -counter = 0 -for i in range(len(dates_diff)): - counter = counter + 1 - if dates_diff[i]['date quad'] > dates_diff[i]['date sat']: - date_min = dates_diff[i]['date sat'] - date_max = dates_diff[i]['date quad'] - color1 = orange - color2 = blue - else: - date_min = dates_diff[i]['date quad'] - date_max = dates_diff[i]['date sat'] - color1 = blue - color2 = orange - idx_t = utils.find_indices(dates_wave, lambda e : e >= date_min and e <= date_max) - hsigmax = np.nanmax([hsig[i] for i in idx_t]) - hsigmin = np.nanmin([hsig[i] for i in idx_t]) - if counter > 9: - counter = 1 - plt.figure() - ax = plt.subplot(3,3,counter) - plt.plot([dates_wave[i] for i in idx_t], [hsig[i] for i in idx_t], 'k-', linewidth=1.5) - plt.plot([date_min, date_min], [0, 4.5], color=color2, label='survey') - plt.plot([date_max, date_max], [0, 4.5], color=color1, label='landsat8') - plt.ylabel('Hs [m]') - ax.xaxis.set_major_locator(mdates.DayLocator(tz=au_tz)) - ax.xaxis.set_minor_locator(mdates.HourLocator(tz=au_tz)) - ax.xaxis.set_major_formatter(mdates.DateFormatter('%d')) - ax.xaxis.set_minor_locator(months) - plt.title(dates_diff[i]['date sat'].strftime('%b %Y') + ' (' + str(abs(dates_diff[i]['days diff'])) + ' days)') - plt.draw() - plt.gcf().subplots_adjust(hspace=0.5) - -# mean day difference -np.mean([ np.abs(_['days diff']) for _ in dates_diff]) - -#%% Compare shorelines in elevation - -dist_buffer = 50 # buffer of points selected for interpolation - -# load quadbike .mat files -foldername = 'data\quadbike\surveys3D' -folderpath = os.path.join(os.getcwd(), foldername) -filenames = os.listdir(folderpath) - -# get the satellite shorelines -sl = output['shorelines'] - -# get dates from filenames -dates_quad = [datetime(int(_[6:10]), int(_[11:13]), int(_[14:16]), tzinfo= au_tz) for _ in filenames] - -zav = [] -ztide = [] -sl_gt = [] -for i in range(len(dates_diff)): - - sl_smooth = sl[i] - - # select closest 3D survey and load .mat file - idx_closest = np.argmin(np.abs(np.array([(dates_diff[i]['date sat'] - _).days for _ in dates_quad]))) - survey3d = sio.loadmat(os.path.join(folderpath, filenames[idx_closest])) - # reshape to a vector - xs = survey3d['x'].reshape(survey3d['x'].shape[0] * survey3d['x'].shape[1]) - ys = survey3d['y'].reshape(survey3d['y'].shape[0] * survey3d['y'].shape[1]) - zs = survey3d['z'].reshape(survey3d['z'].shape[0] * survey3d['z'].shape[1]) - # remove nan values - idx_nan = np.isnan(zs) - xs = xs[~idx_nan] - ys = ys[~idx_nan] - zs = zs[~idx_nan] - - # find water level at the time the image was acquired - idx_closest = np.argmin(np.abs(np.array([(dates_diff[i]['date sat'] - _).total_seconds() for _ in dates_tide]))) - tide_level = tide[idx_closest] - ztide.append(tide_level) - - # find contour corresponding to the water level on 3D surface (if below minimum, add 0.05m increments) - if tide_level < np.nanmin(survey3d['z']): - tide_level = np.nanmin(survey3d['z']) - sl_tide = measure.find_contours(survey3d['z'], tide_level) - sl_tide = sl_tide[np.argmax(np.array([len(_) for _ in sl_tide]))] - count = 0 - while len(sl_tide) < 900: - count = count + 1 - tide_level = tide_level + 0.05*count - sl_tide = measure.find_contours(survey3d['z'], tide_level) - sl_tide = sl_tide[np.argmax(np.array([len(_) for _ in sl_tide]))] - print('added ' + str(0.05*count) + ' cm - contour with ' + str(len(sl_tide)) + ' points') - else: - sl_tide = measure.find_contours(survey3d['z'], tide_level) - sl_tide = sl_tide[np.argmax(np.array([len(_) for _ in sl_tide]))] - # remove nans - if np.any(np.isnan(sl_tide)): - index_nan = np.where(np.isnan(sl_tide))[0] - sl_tide = np.delete(sl_tide, index_nan, axis=0) - # get x,y coordinates - xtide = [survey3d['x'][int(np.round(sl_tide[m,0])), int(np.round(sl_tide[m,1]))] for m in range(sl_tide.shape[0])] - ytide = [survey3d['y'][int(np.round(sl_tide[m,0])), int(np.round(sl_tide[m,1]))] for m in range(sl_tide.shape[0])] - sl_gt.append(np.transpose(np.array([np.array(xtide), np.array(ytide)]))) - # interpolate SDS on 3D surface to get elevation (point by point) - zq = [] - for j in range(sl_smooth.shape[0]): - xq = sl_smooth[j,0] - yq = sl_smooth[j,1] - dist_q = np.linalg.norm(np.transpose(np.array([[xq - _ for _ in xs],[yq - _ for _ in ys]])), axis=1) - idx_buffer = dist_q <= dist_buffer - if sum(idx_buffer) > 0: - tck = interpolate.bisplrep(xs[idx_buffer], ys[idx_buffer], zs[idx_buffer]) - zq.append(interpolate.bisplev(xq, yq, tck)) - - zq = np.array(zq) - plt.figure() - plt.hist(zq, bins=100) - plt.draw() -# plt.figure() -# plt.axis('equal') -# plt.scatter(xs, ys, s=10, c=zs, marker='o', cmap=cm.get_cmap('jet'), -# label='quad data') -# plt.plot(xs[idx_buffer], ys[idx_buffer], 'ko') -# plt.plot(xq,yq,'ro') -# plt.draw() - - # store the alongshore median elevation - zav.append(np.median(utils.reject_outliers(zq, m=2))) - - # make plot - red = [255/255, 0, 0] - gray = [0.75, 0.75, 0.75] - plt.figure() - plt.subplot(121) - plt.axis('equal') - plt.scatter(xs, ys, s=10, c=zs, marker='o', cmap=cm.get_cmap('jet'), - label='3D survey') - plt.plot(xtide, ytide, '--', color=gray, linewidth=2.5, label='tide level contour') - plt.plot(sl_smooth[:,0], sl_smooth[:,1], '-', color=red, linewidth=2.5, label='SDS') -# plt.plot(sl[i][idx_beach,0], sl[i][idx_beach,1], 'w-', linewidth=2) - plt.xlabel('Eastings [m]') - plt.ylabel('Northings [m]') - plt.title('Shoreline comparison') - plt.colorbar(label='mAHD') - plt.legend() - plt.ylim((6266100, 6267000)) - plt.subplot(122) - plt.plot(np.linspace(0,1,len(zq)), zq, 'ko-', markersize=5) - plt.plot([0, 1], [zav[i], zav[i]], 'r-', label='median') - plt.plot([0, 1], [ztide[i], ztide[i]], 'g--', label = 'measured tide') - plt.xlabel('Northings [m]') - plt.ylabel('Elevation [mAHD]') - plt.title('Alongshore SDS elevation') - plt.legend() - mng = plt.get_current_fig_manager() - mng.window.showMaximized() - plt.tight_layout() - plt.draw() - - print(i) - -#%% Calculate some error statistics -zav = np.array(zav) -ztide = np.array(ztide) - -f = plt.figure() -plt.subplot(3,1,1) -plt.bar(np.linspace(1,len(zav),len(zav)), zav-ztide) -plt.ylabel('Error in z [m]') -plt.title('Elevation error') -plt.xticks([]) -plt.draw() - -plt.subplot(3,1,2) -plt.bar(np.linspace(1,len(zav),len(zav)), wave_hsig, color=orange) -plt.ylabel('Hsig [m]') -plt.xticks([]) -plt.draw() - -plt.subplot(3,1,3) -plt.bar(np.linspace(1,len(zav),len(zav)), np.array(cloud_cover)*100, color='g') -plt.ylabel('Cloud cover %') -plt.xlabel('comparison #') -plt.grid(False) -plt.grid(axis='y') -f.subplots_adjust(hspace=0) -plt.draw() - -np.sqrt(np.mean((zav - ztide)**2)) - - - -#%% plot to show LOWESS smoothing -#i = 0 -#idx_beach = [np.min(np.linalg.norm(sl[i][k,:] - narrabeach, axis=1)) < dist_thresh for k in range(sl[i].shape[0])] -#x = sl[i][idx_beach,0] -#y = sl[i][idx_beach,1] -#sl_smooth = lowess(x,y, frac=1./10, it = 10) -# -#plt.figure() -#plt.axis('equal') -#plt.scatter -#plt.plot(x,y,'bo', linewidth=2, label='original SDS') -#plt.plot(sl_smooth[:,1], sl_smooth[:,0], 'ro', linewidth=2, label='smoothed SDS') -#plt.legend() -#plt.xlabel('Eastings [m]') -#plt.ylabel('Northings [m]') -#plt.title('Local weighted scatterplot smoothing (LOWESS)') -#plt.draw() -