updated master

added download_images and read_images scripts
master
kvos 7 years ago
parent cd74f6c39c
commit 62f5c5330f

@ -0,0 +1,390 @@
#==========================================================#
#==========================================================#
# Download L5, L7, L8, S2 images of a given area
#==========================================================#
#==========================================================#
#==========================================================#
# Initial settings
#==========================================================#
import os
import numpy as np
import matplotlib.pyplot as plt
import pdb
import ee
# other modules
from osgeo import gdal, ogr, osr
from urllib.request import urlretrieve
import zipfile
from datetime import datetime
import pytz
import pickle
# import own modules
import functions.utils as utils
import functions.sds as sds
np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
ee.Initialize()
#==========================================================#
# Location
#==========================================================#
## location (Narrabeen-Collaroy beach)
#polygon = [[[151.301454, -33.700754],
# [151.311453, -33.702075],
# [151.307237, -33.739761],
# [151.294220, -33.736329],
# [151.301454, -33.700754]]];
# location (Tairua beach)
sitename = 'TAIRUA'
polygon = [[[175.835574, -36.982022],
[175.888220, -36.980680],
[175.893527, -37.029610],
[175.833444, -37.031767],
[175.835574, -36.982022]]];
# initialise metadata dictionnary (stores timestamps and georefencing accuracy of each image)
metadata = dict([])
# create directories
try:
os.makedirs(os.path.join(os.getcwd(), 'data',sitename))
except:
print('directory already exists')
#%%
#==========================================================#
#==========================================================#
# L5
#==========================================================#
#==========================================================#
# define filenames for images
suffix = '.tif'
filepath = os.path.join(os.getcwd(), 'data', sitename, 'L5', '30m')
try:
os.makedirs(filepath)
except:
print('directory already exists')
#==========================================================#
# Select L5 collection
#==========================================================#
satname = 'L5'
input_col = ee.ImageCollection('LANDSAT/LT05/C01/T1_TOA')
# filter by location
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
n_img = flt_col.size().getInfo()
print('Number of images covering ' + sitename, n_img)
im_all = flt_col.getInfo().get('features')
#==========================================================#
# Main loop trough images
#==========================================================#
timestamps = []
acc_georef = []
all_names = []
for i in range(n_img):
# find each image in ee database
im = ee.Image(im_all[i].get('id'))
im_dic = im.getInfo()
im_bands = im_dic.get('bands')
t = im_dic['properties']['system:time_start']
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
timestamps.append(im_timestamp)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
im_epsg = int(im_dic['bands'][0]['crs'][5:])
try:
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
except:
acc_georef.append(12)
print('No geometric rmse model property')
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for L5
ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[7]]
# filenames
filename = im_date + '_' + satname + '_' + sitename + suffix
print(i)
if any(filename in _ for _ in all_names):
filename = im_date + '_' + satname + '_' + sitename + '_dup' + suffix
all_names.append(filename)
local_data = sds.download_tif(im, polygon, ms_bands, filepath)
os.rename(local_data, os.path.join(filepath, filename))
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg}
#%%
#==========================================================#
#==========================================================#
# L7&L8
#==========================================================#
#==========================================================#
# define filenames for images
suffix = '.tif'
filepath = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8')
filepath_pan = os.path.join(filepath, 'pan')
filepath_ms = os.path.join(filepath, 'ms')
try:
os.makedirs(filepath_pan)
os.makedirs(filepath_ms)
except:
print('directory already exists')
#==========================================================#
# Select L7 collection
#==========================================================#
satname = 'L7'
input_col = ee.ImageCollection('LANDSAT/LE07/C01/T1_RT_TOA')
# filter by location
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
n_img = flt_col.size().getInfo()
print('Number of images covering ' + sitename, n_img)
im_all = flt_col.getInfo().get('features')
#==========================================================#
# Main loop trough images
#==========================================================#
timestamps = []
acc_georef = []
all_names = []
for i in range(n_img):
# find each image in ee database
im = ee.Image(im_all[i].get('id'))
im_dic = im.getInfo()
im_bands = im_dic.get('bands')
t = im_dic['properties']['system:time_start']
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
timestamps.append(im_timestamp)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
im_epsg = int(im_dic['bands'][0]['crs'][5:])
try:
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
except:
acc_georef.append(12)
print('No geometric rmse model property')
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for L7
pan_band = [im_bands[8]]
ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[9]]
# filenames
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
print(i)
if any(filename_pan in _ for _ in all_names):
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix
all_names.append(filename_pan)
local_data_pan = sds.download_tif(im, polygon, pan_band, filepath_pan)
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
local_data_ms = sds.download_tif(im, polygon, ms_bands, filepath_ms)
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
#==========================================================#
# Select L8 collection
#==========================================================#
satname = 'L8'
input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
# filter by location
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
n_img = flt_col.size().getInfo()
print('Number of images covering Narrabeen:', n_img)
im_all = flt_col.getInfo().get('features')
#==========================================================#
# Main loop trough images
#==========================================================#
for i in range(n_img):
# find each image in ee database
im = ee.Image(im_all[i].get('id'))
im_dic = im.getInfo()
im_bands = im_dic.get('bands')
t = im_dic['properties']['system:time_start']
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
timestamps.append(im_timestamp)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
im_epsg = int(im_dic['bands'][0]['crs'][5:])
try:
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
except:
acc_georef.append(12)
print('No geometric rmse model property')
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for L8
pan_band = [im_bands[7]]
ms_bands = [im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[5], im_bands[11]]
# filenames
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
print(i)
if any(filename_pan in _ for _ in all_names):
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix
all_names.append(filename_pan)
local_data_pan = sds.download_tif(im, polygon, pan_band, filepath_pan)
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
local_data_ms = sds.download_tif(im, polygon, ms_bands, filepath_ms)
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg}
#%%
#==========================================================#
#==========================================================#
# S2
#==========================================================#
#==========================================================#
# define filenames for images
suffix = '.tif'
filepath = os.path.join(os.getcwd(), 'data', sitename, 'S2')
try:
os.makedirs(os.path.join(filepath, '10m'))
os.makedirs(os.path.join(filepath, '20m'))
os.makedirs(os.path.join(filepath, '60m'))
except:
print('directory already exists')
#==========================================================#
# Select L2 collection
#==========================================================#
satname = 'S2'
input_col = ee.ImageCollection('COPERNICUS/S2')
# filter by location
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
n_img = flt_col.size().getInfo()
print('Number of images covering ' + sitename, n_img)
im_all = flt_col.getInfo().get('features')
#==========================================================#
# Main loop trough images
#==========================================================#
timestamps = []
acc_georef = []
all_names = []
for i in range(n_img):
# find each image in ee database
im = ee.Image(im_all[i].get('id'))
im_dic = im.getInfo()
im_bands = im_dic.get('bands')
t = im_dic['properties']['system:time_start']
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
timestamps.append(im_timestamp)
im_epsg = int(im_dic['bands'][0]['crs'][5:])
try:
if im_dic['properties']['GEOMETRIC_QUALITY_FLAG'] == 'PASSED':
acc_georef.append(1)
else:
acc_georef.append(0)
except:
acc_georef.append(0)
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for S2
bands10 = [im_bands[1], im_bands[2], im_bands[3], im_bands[7]]
bands20 = [im_bands[11]]
bands60 = [im_bands[15]]
# filenames
filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + suffix
filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + suffix
filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + suffix
print(i)
if any(filename10 in _ for _ in all_names):
filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + '_dup' + suffix
filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + '_dup' + suffix
filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + '_dup' + suffix
all_names.append(filename10)
local_data = sds.download_tif(im, polygon, bands10, filepath)
os.rename(local_data, os.path.join(filepath, '10m', filename10))
local_data = sds.download_tif(im, polygon, bands20, filepath)
os.rename(local_data, os.path.join(filepath, '20m', filename20))
local_data = sds.download_tif(im, polygon, bands60, filepath)
os.rename(local_data, os.path.join(filepath, '60m', filename60))
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg}
#%% save metadata
filepath = os.path.join(os.getcwd(), 'data', sitename)
with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'wb') as f:
pickle.dump(metadata, f)

@ -0,0 +1,432 @@
"""This module contains all the functions needed for data analysis """
# Initial settings
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib import gridspec
import pdb
import ee
# other modules
from osgeo import gdal, ogr, osr
import scipy.interpolate as interpolate
import scipy.stats as sstats
# image processing modules
import skimage.filters as filters
import skimage.exposure as exposure
import skimage.transform as transform
import sklearn.decomposition as decomposition
import skimage.measure as measure
import skimage.morphology as morphology
# machine learning modules
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
from sklearn.externals import joblib
import time
# import own modules
import functions.utils as utils
def get_tide(dates_sds, dates_tide, tide_level):
tide = []
for i in range(len(dates_sds)):
dates_diff = np.abs(np.array([ (dates_sds[i] - _).total_seconds() for _ in dates_tide]))
if np.min(dates_diff) <= 1800: # half-an-hour
idx_closest = np.argmin(dates_diff)
tide.append(tide_level[idx_closest])
else:
tide.append(np.nan)
tide = np.array(tide)
return tide
def remove_duplicates(output, satname):
" removes duplicates from output structure, keep the one with less cloud cover or best georeferencing "
dates = output['dates']
dates_str = [_.strftime('%Y%m%d') for _ in dates]
dupl = utils.duplicates_dict(dates_str)
if dupl:
output_nodup = dict([])
idx_remove = []
if satname == 'L8' or satname == 'L5':
for k,v in dupl.items():
idx1 = v[0]
idx2 = v[1]
c1 = output['metadata']['cloud_cover'][idx1]
c2 = output['metadata']['cloud_cover'][idx2]
g1 = output['metadata']['acc_georef'][idx1]
g2 = output['metadata']['acc_georef'][idx2]
if c1 < c2 - 0.01:
idx_remove.append(idx2)
elif g1 < g2 - 0.1:
idx_remove.append(idx2)
else:
idx_remove.append(idx1)
else:
for k,v in dupl.items():
idx1 = v[0]
idx2 = v[1]
c1 = output['metadata']['cloud_cover'][idx1]
c2 = output['metadata']['cloud_cover'][idx2]
if c1 < c2 - 0.01:
idx_remove.append(idx2)
else:
idx_remove.append(idx1)
idx_remove = sorted(idx_remove)
idx_all = np.linspace(0, len(dates_str)-1, len(dates_str))
idx_keep = list(np.where(~np.isin(idx_all,idx_remove))[0])
output_nodup['dates'] = [output['dates'][k] for k in idx_keep]
output_nodup['shorelines'] = [output['shorelines'][k] for k in idx_keep]
output_nodup['metadata'] = dict([])
for key in list(output['metadata'].keys()):
output_nodup['metadata'][key] = [output['metadata'][key][k] for k in idx_keep]
print(satname + ' : ' + str(len(idx_remove)) + ' duplicates')
return output_nodup
else:
print(satname + ' : ' + 'no duplicates')
return output
def merge(output):
" merges data from the different satellites "
# stack all list together under one key
output_all = {'dates':[], 'shorelines':[],
'metadata':{'filenames':[], 'satname':[], 'cloud_cover':[], 'acc_georef':[]}}
for satname in list(output.keys()):
output_all['dates'] = output_all['dates'] + output[satname]['dates']
output_all['shorelines'] = output_all['shorelines'] + output[satname]['shorelines']
for key in list(output[satname]['metadata'].keys()):
output_all['metadata'][key] = output_all['metadata'][key] + output[satname]['metadata'][key]
output_all_sorted = {'dates':[], 'shorelines':[],
'metadata':{'filenames':[], 'satname':[], 'cloud_cover':[], 'acc_georef':[]}}
# sort the dates
idx_sorted = sorted(range(len(output_all['dates'])), key=output_all['dates'].__getitem__)
output_all_sorted['dates'] = [output_all['dates'][i] for i in idx_sorted]
output_all_sorted['shorelines'] = [output_all['shorelines'][i] for i in idx_sorted]
for key in list(output_all['metadata'].keys()):
output_all_sorted['metadata'][key] = [output_all['metadata'][key][i] for i in idx_sorted]
return output_all_sorted
def create_transects(x0, y0, orientation, chainage_length):
" creates shore-normal transects "
transects = []
for k in range(len(x0)):
# orientation of cross-shore profile
phi = (90 - orientation[k])*np.pi/180
# create a vector using the chainage length
x = np.linspace(0,chainage_length,chainage_length+1)
y = np.zeros(len(x))
coords = np.zeros((len(x),2))
coords[:,0] = x
coords[:,1] = y
# translate and rotate the vector using the origin and orientation
tf = transform.EuclideanTransform(rotation=phi, translation=(x0[k],y0[k]))
coords_tf = tf(coords)
transects.append(coords_tf)
return transects
def calculate_chainage(sds, transects, orientation, along_dist):
" intersect SDS with transect and compute chainage position "
chainage_mtx = np.zeros((len(sds),len(transects),6))
for i in range(len(sds)):
sl = sds[i]
for j in range(len(transects)):
# compute rotation matrix
X0 = transects[j][0,0]
Y0 = transects[j][0,1]
phi = (90 - orientation[j])*np.pi/180
Mrot = np.array([[np.cos(phi), np.sin(phi)],[-np.sin(phi), np.cos(phi)]])
# calculate point to line distance between shoreline points and profile
p1 = np.array([X0,Y0])
p2 = transects[j][-1,:]
p3 = sl
d = np.abs(np.cross(p2-p1,p3-p1)/np.linalg.norm(p2-p1))
idx_close = utils.find_indices(d, lambda e: e <= along_dist)
# check if there are SDS points around the profile or not
if not idx_close:
chainage_mtx[i,j,:] = np.tile(np.nan,(1,6))
else:
# change of base to shore-normal coordinate system
xy_close = np.array([sl[idx_close,0],sl[idx_close,1]]) - np.tile(np.array([[X0],[Y0]]), (1,len(sl[idx_close])))
xy_rot = np.matmul(Mrot, xy_close)
# put nan values if the chainage is negative (MAKE SURE TO PICK ORIGIN CORRECTLY)
if np.any(xy_rot[0,:] < 0):
xy_rot[0,np.where(xy_rot[0,:] < 0)] = np.nan
# compute mean, median max and std of chainage position
n_points = len(xy_rot[0,:])
mean_cross = np.nanmean(xy_rot[0,:])
median_cross = np.nanmedian(xy_rot[0,:])
max_cross = np.nanmax(xy_rot[0,:])
min_cross = np.nanmin(xy_rot[0,:])
std_cross = np.nanstd(xy_rot[0,:])
if std_cross > 10: # if large std, take the most seaward point
mean_cross = max_cross
median_cross = max_cross
min_cross = max_cross
# store the statistics
chainage_mtx[i,j,:] = np.array([mean_cross, median_cross, max_cross,
min_cross, n_points, std_cross])
# format into dictionnary
chainage = dict([])
chainage['mean'] = chainage_mtx[:,:,0]
chainage['median'] = chainage_mtx[:,:,1]
chainage['max'] = chainage_mtx[:,:,2]
chainage['min'] = chainage_mtx[:,:,3]
chainage['npoints'] = chainage_mtx[:,:,4]
chainage['std'] = chainage_mtx[:,:,5]
return chainage
def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5):
"""
Compare sds with groundtruth data from topographic surveys / argus shorelines
KV WRL 2018
Arguments:
-----------
dates_sds: list
list of dates corresponding to each row in chain_sds
chain_sds: np.ndarray
array with time series of chainage for each transect (each transect is one column)
topo_profiles: dict
dict containing the dates and chainage of the groundtruth
mod: 0 or 1
0 for linear interpolation between 2 closest surveys, 1 for only nearest neighbour
min_days: int
minimum number of days for which the data can be compared
Returns: -----------
stats: dict
contains all the statistics of the comparison
"""
# create 3 figures
fig1 = plt.figure()
gs1 = gridspec.GridSpec(chain_sds.shape[1], 1)
fig2 = plt.figure()
gs2 = gridspec.GridSpec(2, chain_sds.shape[1])
fig3 = plt.figure()
gs3 = gridspec.GridSpec(2,1)
dates_sds_num = np.array([_.toordinal() for _ in dates_sds])
stats = dict([])
data_fin = dict([])
# for each transect compare and plot the data
for i in range(chain_sds.shape[1]):
pfname = list(topo_profiles.keys())[i]
stats[pfname] = dict([])
data_fin[pfname] = dict([])
dates_sur = topo_profiles[pfname]['dates']
chain_sur = topo_profiles[pfname]['chainage']
# convert to datenum
dates_sur_num = np.array([_.toordinal() for _ in dates_sur])
chain_sur_interp = []
diff_days = []
for j, satdate in enumerate(dates_sds_num):
temp_diff = satdate - dates_sur_num
if mod==0:
# select measurement before and after sat image date and interpolate
ind_before = np.where(temp_diff == temp_diff[temp_diff > 0][-1])[0]
if ind_before == len(temp_diff)-1:
chain_sur_interp.append(np.nan)
diff_days.append(np.abs(satdate-dates_sur_num[ind_before])[0])
continue
ind_after = np.where(temp_diff == temp_diff[temp_diff < 0][0])[0]
tempx = np.zeros(2)
tempx[0] = dates_sur_num[ind_before]
tempx[1] = dates_sur_num[ind_after]
tempy = np.zeros(2)
tempy[0] = chain_sur[ind_before]
tempy[1] = chain_sur[ind_after]
diff_days.append(np.abs(np.max([satdate-tempx[0], satdate-tempx[1]])))
# interpolate
f = interpolate.interp1d(tempx, tempy)
chain_sur_interp.append(f(satdate))
elif mod==1:
# select the closest measurement
idx_closest = utils.find_indices(np.abs(temp_diff), lambda e: e == np.min(np.abs(temp_diff)))[0]
diff_days.append(np.abs(satdate-dates_sur_num[idx_closest]))
if diff_days[j] > mindays:
chain_sur_interp.append(np.nan)
else:
chain_sur_interp.append(chain_sur[idx_closest])
chain_sur_interp = np.array(chain_sur_interp)
# remove nan values
idx_sur_nan = ~np.isnan(chain_sur_interp)
idx_sat_nan = ~np.isnan(chain_sds[:,i])
idx_nan = np.logical_and(idx_sur_nan, idx_sat_nan)
# groundtruth and sds
chain_sur_fin = chain_sur_interp[idx_nan]
chain_sds_fin = chain_sds[idx_nan,i]
dates_fin = [k for (k, v) in zip(dates_sds, idx_nan) if v]
# calculate statistics
slope, intercept, rvalue, pvalue, std_err = sstats.linregress(chain_sur_fin, chain_sds_fin)
R2 = rvalue**2
correlation = np.corrcoef(chain_sur_fin, chain_sds_fin)[0,1]
diff_chain = chain_sur_fin - chain_sds_fin
rmse = np.sqrt(np.nanmean((diff_chain)**2))
mean = np.nanmean(diff_chain)
std = np.nanstd(diff_chain)
q90 = np.percentile(np.abs(diff_chain), 90)
# store data
stats[pfname]['rmse'] = rmse
stats[pfname]['mean'] = mean
stats[pfname]['std'] = std
stats[pfname]['q90'] = q90
stats[pfname]['diffdays'] = diff_days
stats[pfname]['corr'] = correlation
stats[pfname]['linfit'] = {'slope':slope, 'intercept':intercept, 'R2':R2, 'pvalue':pvalue}
data_fin[pfname]['dates'] = dates_fin
data_fin[pfname]['sds'] = chain_sds_fin
data_fin[pfname]['survey'] = chain_sur_fin
# make time-series plot
plt.figure(fig1.number)
fig1.add_subplot(gs1[i,0])
plt.plot(dates_sur, chain_sur, 'o-', color='C1', markersize=4, label='survey all')
plt.plot(dates_fin, chain_sur_fin, 'o', color=[0.3, 0.3, 0.3], markersize=2, label='survey interp')
plt.plot(dates_fin, chain_sds_fin, 'o--', color='b', markersize=4, label='SDS')
plt.title(pfname, fontweight='bold')
# plt.xlim([dates_sds[0], dates_sds[-1]])
plt.ylabel('chainage [m]')
# make scatter plot
plt.figure(fig2.number)
fig2.add_subplot(gs2[0,i])
plt.axis('equal')
plt.plot(chain_sur_fin, chain_sds_fin, 'ko', markersize=4, markerfacecolor='w', alpha=0.7)
xmax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)])
xmin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)])
ymax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)])
ymin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)])
plt.plot([xmin, xmax], [ymin, ymax], 'k--')
plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'b:')
str_corr = ' y = %.2f x + %.2f\n R2 = %.2f' % (slope, intercept, R2)
plt.text(xmin, ymax-5, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
plt.xlabel('chainage survey [m]')
plt.ylabel('chainage satellite [m]')
plt.title(pfname, fontweight='bold')
fig2.add_subplot(gs2[1,i])
binwidth = 3
bins = np.arange(min(diff_chain), max(diff_chain) + binwidth, binwidth)
density = plt.hist(diff_chain, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k')
plt.xlim([-50, 50])
plt.xlabel('error [m]')
str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90)
plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.3), horizontalalignment='left', fontsize=10)
fig1.set_size_inches(19.2, 9.28)
fig1.set_tight_layout(True)
fig2.set_size_inches(19.2, 9.28)
fig2.set_tight_layout(True)
# all transects together
chain_sds_all = []
chain_sur_all = []
for i in range(chain_sds.shape[1]):
pfname = list(topo_profiles.keys())[i]
chain_sds_all = np.append(chain_sds_all,data_fin[pfname]['sds'])
chain_sur_all = np.append(chain_sur_all,data_fin[pfname]['survey'])
# calculate statistics
slope, intercept, rvalue, pvalue, std_err = sstats.linregress(chain_sur_all, chain_sds_all)
R2 = rvalue**2
correlation = np.corrcoef(chain_sur_all, chain_sds_all)[0,1]
diff_chain_all = chain_sur_all - chain_sds_all
rmse = np.sqrt(np.nanmean((diff_chain_all)**2))
mean = np.nanmean(diff_chain_all)
std = np.nanstd(diff_chain_all)
q90 = np.percentile(np.abs(diff_chain_all), 90)
stats['all'] = {'rmse':rmse,'mean':mean,'std':std,'q90':q90, 'corr':correlation,
'linfit':{'slope':slope, 'intercept':intercept, 'R2':R2, 'pvalue':pvalue}}
# make plot
plt.figure(fig3.number)
fig3.add_subplot(gs3[0,0])
plt.axis('equal')
plt.plot(chain_sur_all, chain_sds_all, 'ko', markersize=4, markerfacecolor='w', alpha=0.7)
xmax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)])
xmin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)])
ymax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)])
ymin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)])
plt.plot([xmin, xmax], [ymin, ymax], 'k--')
plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'b:')
str_corr = ' y = %.2f x + %.2f\n R2 = %.2f' % (slope, intercept, R2)
plt.text(xmin, ymax-5, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
plt.xlabel('chainage survey [m]')
plt.ylabel('chainage satellite [m]')
plt.title(pfname, fontweight='bold')
fig3.add_subplot(gs3[1,0])
binwidth = 3
bins = np.arange(min(diff_chain_all), max(diff_chain_all) + binwidth, binwidth)
density = plt.hist(diff_chain_all, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k')
plt.xlim([-50, 50])
plt.xlabel('error [m]')
str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90)
plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.3), horizontalalignment='left', fontsize=10)
fig3.set_size_inches(9.2, 9.28)
fig3.set_tight_layout(True)
return stats

@ -5,11 +5,13 @@ Created on Thu Mar 1 11:20:35 2018
@author: z5030440 @author: z5030440
""" """
"""This script contains the functions needed for satellite derived shoreline (SDS) extraction""" """This module contains all the functions needed for extracting satellite derived shoreline (SDS) """
# Initial settings # Initial settings
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib import gridspec
import pdb import pdb
import ee import ee
@ -18,6 +20,7 @@ from osgeo import gdal, ogr, osr
import tempfile import tempfile
from urllib.request import urlretrieve from urllib.request import urlretrieve
import zipfile import zipfile
import scipy.interpolate as interpolate
# image processing modules # image processing modules
import skimage.filters as filters import skimage.filters as filters
@ -39,7 +42,7 @@ from functions.utils import *
# Download from ee server function # Download from ee server function
def download_tif(image, polygon, bandsId): def download_tif(image, polygon, bandsId, filepath):
"""downloads tif image (region and bands) from the ee server and stores it in a temp file""" """downloads tif image (region and bands) from the ee server and stores it in a temp file"""
url = ee.data.makeDownloadUrl(ee.data.getDownloadId({ url = ee.data.makeDownloadUrl(ee.data.getDownloadId({
'image': image.serialize(), 'image': image.serialize(),
@ -50,40 +53,7 @@ def download_tif(image, polygon, bandsId):
})) }))
local_zip, headers = urlretrieve(url) local_zip, headers = urlretrieve(url)
with zipfile.ZipFile(local_zip) as local_zipfile: with zipfile.ZipFile(local_zip) as local_zipfile:
return local_zipfile.extract('data.tif', tempfile.mkdtemp()) return local_zipfile.extract('data.tif', filepath)
def load_image(image, polygon, bandsId):
"""
Loads an ee.Image() as a np.array. e.Image() is retrieved from the EE database.
The geographic area and bands to select can be specified
KV WRL 2018
Arguments:
-----------
image: ee.Image()
image objec from the EE database
polygon: list
coordinates of the points creating a polygon. Each point is a list with 2 values
bandsId: list
bands to select, each band is a dictionnary in the list containing the following keys:
crs, crs_transform, data_type and id. NOTE: you have to remove the key dimensions, otherwise
the entire image is retrieved.
Returns:
-----------
image_array : np.ndarray
An array containing the image (2D if one band, otherwise 3D)
georef : np.ndarray
6 element vector containing the crs_parameters
[X_ul_corner Xscale Xshear Y_ul_corner Yshear Yscale]
"""
local_tif_filename = download_tif(image, polygon, bandsId)
dataset = gdal.Open(local_tif_filename, gdal.GA_ReadOnly)
georef = np.array(dataset.GetGeoTransform())
bands = [dataset.GetRasterBand(i + 1).ReadAsArray() for i in range(dataset.RasterCount)]
return np.stack(bands, 2), georef
def create_cloud_mask(im_qa, satname, plot_bool): def create_cloud_mask(im_qa, satname, plot_bool):
""" """
@ -109,8 +79,10 @@ def create_cloud_mask(im_qa, satname, plot_bool):
# convert QA bits # convert QA bits
if satname == 'L8': if satname == 'L8':
cloud_values = [2800, 2804, 2808, 2812, 6896, 6900, 6904, 6908] cloud_values = [2800, 2804, 2808, 2812, 6896, 6900, 6904, 6908]
elif satname == 'L7': elif satname == 'L7' or satname == 'L5' or satname == 'L4':
cloud_values = [752, 756, 760, 764] cloud_values = [752, 756, 760, 764]
elif satname == 'S2':
cloud_values = [1024, 2048] # 1024 = dense cloud, 2048 = cirrus clouds
cloud_mask = np.isin(im_qa, cloud_values) cloud_mask = np.isin(im_qa, cloud_values)
# remove isolated cloud pixels (there are some in the swash and they cause problems) # remove isolated cloud pixels (there are some in the swash and they cause problems)
@ -127,109 +99,6 @@ def create_cloud_mask(im_qa, satname, plot_bool):
return cloud_mask return cloud_mask
def read_eeimage(im, polygon, sat_name, plot_bool):
"""
Read an ee.Image() object and returns the panchromatic band, multispectral bands (B, G, R, NIR, SWIR)
and a cloud mask. All outputs are at 15m resolution (bilinear interpolation for the multispectral bands)
KV WRL 2018
Arguments:
-----------
im: ee.Image()
Image to read from the Google Earth Engine database
plot_bool: boolean
True if plot is wanted
Returns:
-----------
im_pan: np.ndarray (2D)
The panchromatic band (15m)
im_ms: np.ndarray (3D)
The multispectral bands interpolated at 15m
im_cloud: np.ndarray (2D)
The cloud mask at 15m
crs_params: list
EPSG code and affine transformation parameters
"""
im_dic = im.getInfo()
# save metadata
im_meta = im_dic.get('properties')
meta = {'timestamp':im_meta['system:time_start'],
'date_acquired':im_meta['DATE_ACQUIRED'],
'geom_rmse_model':im_meta['GEOMETRIC_RMSE_MODEL'],
'gcp_model':im_meta['GROUND_CONTROL_POINTS_MODEL'],
'quality':im_meta['IMAGE_QUALITY_OLI'],
'sun_azimuth':im_meta['SUN_AZIMUTH'],
'sun_elevation':im_meta['SUN_ELEVATION']}
im_bands = im_dic.get('bands')
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for i in range(len(im_bands)): del im_bands[i]['dimensions']
# load panchromatic band
pan_band = [im_bands[7]]
im_pan, crs_pan = load_image(im, polygon, pan_band)
im_pan = im_pan[:,:,0]
# load the multispectral bands (B2,B3,B4,B5,B6) = (blue,green,red,nir,swir1)
ms_bands = [im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[5]]
im_ms_30m, crs_ms = load_image(im, polygon, ms_bands)
# create cloud mask
qa_band = [im_bands[11]]
im_qa, crs_qa = load_image(im, polygon, qa_band)
im_qa = im_qa[:,:,0]
im_cloud = create_cloud_mask(im_qa, sat_name, plot_bool)
im_cloud = transform.resize(im_cloud, (im_pan.shape[0], im_pan.shape[1]),
order=0, preserve_range=True, mode='constant').astype('bool_')
# resize the image using bilinear interpolation (order 1)
im_ms = transform.resize(im_ms_30m,(im_pan.shape[0], im_pan.shape[1]),
order=1, preserve_range=True, mode='constant')
# check if -inf values (means out of image) and add to cloud mask
im_inf = np.isin(im_ms[:,:,0], -np.inf)
im_nan = np.isnan(im_ms[:,:,0])
im_cloud = np.logical_or(np.logical_or(im_cloud, im_inf), im_nan)
# get the crs parameters for the image at 15m and 30m resolution
crs = {'crs_15m':crs_pan, 'crs_30m':crs_ms, 'epsg_code':int(pan_band[0]['crs'][5:])}
if plot_bool:
# if there are -inf in the image, set them to 0 before plotting
if sum(sum(np.isin(im_ms_30m[:,:,0], -np.inf).astype(int))) > 0:
idx = np.isin(im_ms_30m[:,:,0], -np.inf)
im_ms_30m[idx,0] = 0; im_ms_30m[idx,1] = 0; im_ms_30m[idx,2] = 0;
im_ms_30m[idx,3] = 0; im_ms_30m[idx,4] = 0
plt.figure()
plt.subplot(221)
plt.imshow(im_pan, cmap='gray')
plt.title('PANCHROMATIC')
plt.subplot(222)
plt.imshow(im_ms_30m[:,:,[2,1,0]])
plt.title('RGB')
plt.subplot(223)
plt.imshow(im_ms_30m[:,:,3], cmap='gray')
plt.title('NIR')
plt.subplot(224)
plt.imshow(im_ms_30m[:,:,4], cmap='gray')
plt.title('SWIR')
plt.show()
return im_pan, im_ms, im_cloud, crs, meta
def rescale_image_intensity(im, cloud_mask, prob_high, plot_bool): def rescale_image_intensity(im, cloud_mask, prob_high, plot_bool):
""" """
Rescales the intensity of an image (multispectral or single band) by applying Rescales the intensity of an image (multispectral or single band) by applying
@ -395,9 +264,28 @@ def pansharpen(im_ms, im_pan, cloud_mask, plot_bool):
# apply PCA to RGB bands # apply PCA to RGB bands
pca = decomposition.PCA() pca = decomposition.PCA()
vec_pcs = pca.fit_transform(vec) vec_pcs = pca.fit_transform(vec)
# replace 1st PC with pan band (after matching histograms) # replace 1st PC with pan band (after matching histograms)
vec_pan = im_pan.reshape(im_pan.shape[0] * im_pan.shape[1]) vec_pan = im_pan.reshape(im_pan.shape[0] * im_pan.shape[1])
vec_pan = vec_pan[~vec_mask] vec_pan = vec_pan[~vec_mask]
# plt.figure()
# ax1 = plt.subplot(131)
# plt.imshow(im_pan, cmap='gray')
# plt.title('Pan band')
# plt.subplot(132, sharex=ax1, sharey=ax1)
# plt.imshow(vec_pcs[:,0].reshape(im_pan.shape[0],im_pan.shape[1]), cmap='gray')
# plt.title('PC1')
# plt.subplot(133, sharex=ax1, sharey=ax1)
# plt.imshow(hist_match(vec_pan, vec_pcs[:,0]).reshape(im_pan.shape[0],im_pan.shape[1]), cmap='gray')
# plt.title('Pan band histmatched')
#
# plt.figure()
# plt.hist(hist_match(vec_pan, vec_pcs[:,0]), bins=300)
# plt.hist(vec_pcs[:,0], bins=300, alpha=0.5)
# plt.hist(vec_pan, bins=300, alpha=0.5)
# plt.draw()
vec_pcs[:,0] = hist_match(vec_pan, vec_pcs[:,0]) vec_pcs[:,0] = hist_match(vec_pan, vec_pcs[:,0])
vec_ms_ps = pca.inverse_transform(vec_pcs) vec_ms_ps = pca.inverse_transform(vec_pcs)
@ -407,13 +295,14 @@ def pansharpen(im_ms, im_pan, cloud_mask, plot_bool):
im_ms_ps = vec_ms_ps_full.reshape(im_ms.shape[0], im_ms.shape[1], im_ms.shape[2]) im_ms_ps = vec_ms_ps_full.reshape(im_ms.shape[0], im_ms.shape[1], im_ms.shape[2])
if plot_bool: if plot_bool:
plt.figure() plt.figure()
ax1 = plt.subplot(121) ax1 = plt.subplot(121)
plt.imshow(rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 100, False)) plt.imshow(rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False))
plt.axis('off') plt.axis('off')
plt.title('Original') plt.title('Original')
ax2 = plt.subplot(122, sharex=ax1, sharey=ax1) ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
plt.imshow(rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 100, False)) plt.imshow(rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False))
plt.axis('off') plt.axis('off')
plt.title('Pansharpened') plt.title('Pansharpened')
plt.show() plt.show()
@ -458,9 +347,10 @@ def nd_index(im1, im2, cloud_mask, plot_bool):
return im_nd return im_nd
def find_wl_contours(im_ndwi, cloud_mask, min_contour_points, plot_bool): def find_wl_contours(im_ndwi, cloud_mask, plot_bool):
""" """
Computes normalised difference index on 2 images (2D), given a cloud mask (2D) Finds the water line by thresholding the Normalized Difference Water Index and applying the Marching
Squares Algorithm
KV WRL 2018 KV WRL 2018
@ -470,8 +360,6 @@ def find_wl_contours(im_ndwi, cloud_mask, min_contour_points, plot_bool):
Image (2D) with the NDWI (water index) Image (2D) with the NDWI (water index)
cloud_mask: np.ndarray cloud_mask: np.ndarray
2D cloud mask with True where cloud pixels are 2D cloud mask with True where cloud pixels are
min_contour_points: int
minimum number of points in each contour line
plot_bool: boolean plot_bool: boolean
True if plot is wanted True if plot is wanted
@ -489,16 +377,18 @@ def find_wl_contours(im_ndwi, cloud_mask, min_contour_points, plot_bool):
t_otsu = filters.threshold_otsu(vec) t_otsu = filters.threshold_otsu(vec)
# use Marching Squares algorithm to detect contours on ndwi image # use Marching Squares algorithm to detect contours on ndwi image
contours = measure.find_contours(im_ndwi, t_otsu) contours = measure.find_contours(im_ndwi, t_otsu)
# filter water lines
contours_wl = [] # remove contour points that are nans
for i, contour in enumerate(contours): contours_nonans = []
# remove contour points that are around clouds (nan values) for k in range(len(contours)):
if np.any(np.isnan(contour)): if np.any(np.isnan(contours[k])):
index_nan = np.where(np.isnan(contour))[0] index_nan = np.where(np.isnan(contours[k]))[0]
contour = np.delete(contour, index_nan, axis=0) contours_temp = np.delete(contours[k], index_nan, axis=0)
# remove contours that have only few points (less than min_contour_points) if len(contours_temp) > 1:
if contour.shape[0] > min_contour_points: contours_nonans.append(contours_temp)
contours_wl.append(contour) else:
contours_nonans.append(contours[k])
contours = contours_nonans
if plot_bool: if plot_bool:
# plot otsu's histogram segmentation # plot otsu's histogram segmentation
@ -512,12 +402,12 @@ def find_wl_contours(im_ndwi, cloud_mask, min_contour_points, plot_bool):
plt.figure() plt.figure()
plt.imshow(im_ndwi, cmap='seismic') plt.imshow(im_ndwi, cmap='seismic')
plt.colorbar() plt.colorbar()
for i,contour in enumerate(contours_wl): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k') for i,contour in enumerate(contours): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k')
plt.axis('image') plt.axis('image')
plt.title('Detected water lines') plt.title('Detected water lines')
plt.show() plt.show()
return contours_wl return contours
def convert_pix2world(points, crs_vec): def convert_pix2world(points, crs_vec):
""" """
@ -563,6 +453,49 @@ def convert_pix2world(points, crs_vec):
return points_converted return points_converted
def convert_world2pix(points, crs_vec):
"""
Converts world projected coordinates (X,Y) to image coordinates (row,column)
performing an affine transformation
KV WRL 2018
Arguments:
-----------
points: np.ndarray or list of np.ndarray
array with 2 columns (rows first and columns second)
crs_vec: np.ndarray
vector of 6 elements [Xtr, Xscale, Xshear, Ytr, Yshear, Yscale]
Returns: -----------
points_converted: np.ndarray or list of np.ndarray
converted coordinates, first columns with row and second column with column
"""
# make affine transformation matrix
aff_mat = np.array([[crs_vec[1], crs_vec[2], crs_vec[0]],
[crs_vec[4], crs_vec[5], crs_vec[3]],
[0, 0, 1]])
# create affine transformation
tform = transform.AffineTransform(aff_mat)
if type(points) is list:
points_converted = []
# iterate over the list
for i, arr in enumerate(points):
points_converted.append(tform.inverse(points))
elif type(points) is np.ndarray:
points_converted = tform.inverse(points)
else:
print('invalid input type')
raise
return points_converted
def convert_epsg(points, epsg_in, epsg_out): def convert_epsg(points, epsg_in, epsg_out):
""" """
Converts from one spatial reference to another using the epsg codes Converts from one spatial reference to another using the epsg codes
@ -676,7 +609,7 @@ def classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, buffer_size
# im_sand = morphology.binary_dilation(im_sand, morphology.disk(1)) # im_sand = morphology.binary_dilation(im_sand, morphology.disk(1))
if plot_bool: if plot_bool:
im = np.copy(rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 100, False)) im = np.copy(rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False))
im[im_sand,0] = 0 im[im_sand,0] = 0
im[im_sand,1] = 0 im[im_sand,1] = 0
im[im_sand,2] = 1 im[im_sand,2] = 1
@ -688,7 +621,7 @@ def classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, buffer_size
return im_sand return im_sand
def classify_image_NN(im_ms_ps, im_pan, cloud_mask, plot_bool): def classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size, plot_bool):
""" """
Classifies every pixel in the image in one of 4 classes: Classifies every pixel in the image in one of 4 classes:
- sand --> label = 1 - sand --> label = 1
@ -714,8 +647,10 @@ def classify_image_NN(im_ms_ps, im_pan, cloud_mask, plot_bool):
True if plot is wanted True if plot is wanted
Returns: ----------- Returns: -----------
im_labels: np.ndarray im_classif: np.ndarray
2D binary image containing True where sand pixels are located 2D image containing labels
im_labels: np.ndarray of booleans
3D image containing a boolean image for each class (im_classif == label)
""" """
@ -743,16 +678,24 @@ def classify_image_NN(im_ms_ps, im_pan, cloud_mask, plot_bool):
vec_classif = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1])) vec_classif = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1]))
vec_classif[~vec_mask] = labels vec_classif[~vec_mask] = labels
im_classif = vec_classif.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1])) im_classif = vec_classif.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1]))
# im_classif = morphology.remove_small_objects(im_classif, min_size=min_beach_size, connectivity=2)
# labels
im_sand = im_classif == 1
im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2)
im_swash = im_classif == 2
im_water = im_classif == 3
im_labels = np.stack((im_sand,im_swash,im_water), axis=-1)
if plot_bool: if plot_bool:
im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 100, False) # display on top of pansharpened RGB
im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
im = np.copy(im_display) im = np.copy(im_display)
colours = np.array([[1,0,0],[1,1,0],[0,1,1],[0,0,1]]) # define colours for plot
for k in range(4): colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
im[im_classif == k,0] = colours[k,0] for k in range(0,im_labels.shape[2]):
im[im_classif == k,1] = colours[k,1] im[im_labels[:,:,k],0] = colours[k,0]
im[im_classif == k,2] = colours[k,2] im[im_labels[:,:,k],1] = colours[k,1]
im[im_labels[:,:,k],2] = colours[k,2]
plt.figure() plt.figure()
ax1 = plt.subplot(121) ax1 = plt.subplot(121)
@ -762,8 +705,463 @@ def classify_image_NN(im_ms_ps, im_pan, cloud_mask, plot_bool):
ax2 = plt.subplot(122, sharex=ax1, sharey=ax1) ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
plt.imshow(im) plt.imshow(im)
plt.axis('off') plt.axis('off')
plt.title('NN') plt.title('NN classifier')
mng = plt.get_current_fig_manager()
mng.window.showMaximized()
plt.tight_layout()
plt.draw() plt.draw()
return im_classif return im_classif, im_labels
def classify_image_NN_nopan(im_ms_ps, cloud_mask, min_beach_size, plot_bool):
"""
Classifies every pixel in the image in one of 4 classes:
- sand --> label = 1
- whitewater (breaking waves and swash) --> label = 2
- water --> label = 3
- other (vegetation, buildings, rocks...) --> label = 0
The classifier is a Neural Network, trained with 7000 pixels for the class SAND and 1500 pixels for
each of the other classes. This is because the class of interest for my application is SAND and I
wanted to minimize the classification error for that class
KV WRL 2018
Arguments:
-----------
im_ms_ps: np.ndarray
Pansharpened RGB + downsampled NIR and SWIR
im_pan:
Panchromatic band
cloud_mask: np.ndarray
2D cloud mask with True where cloud pixels are
plot_bool: boolean
True if plot is wanted
Returns: -----------
im_classif: np.ndarray
2D image containing labels
im_labels: np.ndarray of booleans
3D image containing a boolean image for each class (im_classif == label)
"""
# load classifier
clf = joblib.load('functions/NeuralNet_classif_nopan.pkl')
# calculate features
n_features = 9
im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features))
im_features[:,:,[0,1,2,3,4]] = im_ms_ps
im_features[:,:,5] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False) # (NIR-G)
im_features[:,:,6] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask, False) # ND(NIR-R)
im_features[:,:,7] = nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask, False) # ND(B-R)
im_features[:,:,8] = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) # ND(SWIR-G)
# remove NaNs and clouds
vec_features = im_features.reshape((im_ms_ps.shape[0] * im_ms_ps.shape[1], n_features))
vec_cloud = cloud_mask.reshape(cloud_mask.shape[0]*cloud_mask.shape[1])
vec_nan = np.any(np.isnan(vec_features), axis=1)
vec_mask = np.logical_or(vec_cloud, vec_nan)
vec_features = vec_features[~vec_mask, :]
# predict with NN classifier
labels = clf.predict(vec_features)
# recompose image
vec_classif = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1]))
vec_classif[~vec_mask] = labels
im_classif = vec_classif.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1]))
# labels
im_sand = im_classif == 1
im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2)
im_swash = im_classif == 2
im_water = im_classif == 3
im_labels = np.stack((im_sand,im_swash,im_water), axis=-1)
if plot_bool:
# display on top of pansharpened RGB
im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
im = np.copy(im_display)
# define colours for plot
colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
for k in range(0,im_labels.shape[2]):
im[im_labels[:,:,k],0] = colours[k,0]
im[im_labels[:,:,k],1] = colours[k,1]
im[im_labels[:,:,k],2] = colours[k,2]
plt.figure()
ax1 = plt.subplot(121)
plt.imshow(im_display)
plt.axis('off')
plt.title('Image')
ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
plt.imshow(im)
plt.axis('off')
plt.title('NN classifier')
mng = plt.get_current_fig_manager()
mng.window.showMaximized()
plt.tight_layout()
plt.draw()
return im_classif, im_labels
def find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size, plot_bool):
"""
New method for extracting shorelines (more robust)
KV WRL 2018
Arguments:
-----------
im_ms_ps: np.ndarray
Pansharpened RGB + downsampled NIR and SWIR
im_labels: np.ndarray
3D image containing a boolean image for each class in the order (sand, swash, water)
cloud_mask: np.ndarray
2D cloud mask with True where cloud pixels are
buffer_size: int
size of the buffer around the sandy beach
plot_bool: boolean
True if plot is wanted
Returns: -----------
contours_wi: list of np.arrays
contains the (row,column) coordinates of the contour lines extracted with the Water Index
contours_mwi: list of np.arrays
contains the (row,column) coordinates of the contour lines extracted with the Modified Water Index
"""
nrows = cloud_mask.shape[0]
ncols = cloud_mask.shape[1]
im_display = rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
# calculate Normalized Difference Modified Water Index (SWIR - G)
im_mwi = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False)
# calculate Normalized Difference Modified Water Index (NIR - G)
im_wi = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False)
# stack indices together
im_ind = np.stack((im_wi, im_mwi), axis=-1)
vec_ind = im_ind.reshape(nrows*ncols,2)
# process labels
vec_sand = im_labels[:,:,0].reshape(ncols*nrows)
vec_swash = im_labels[:,:,1].reshape(ncols*nrows)
vec_water = im_labels[:,:,2].reshape(ncols*nrows)
# create a buffer around the sandy beach
se = morphology.disk(buffer_size)
im_buffer = morphology.binary_dilation(im_labels[:,:,0], se)
vec_buffer = im_buffer.reshape(nrows*ncols)
# select water/sand/swash pixels that are within the buffer
int_water = vec_ind[np.logical_and(vec_buffer,vec_water),:]
int_sand = vec_ind[np.logical_and(vec_buffer,vec_sand),:]
int_swash = vec_ind[np.logical_and(vec_buffer,vec_swash),:]
# threshold the sand/water intensities
int_all = np.append(int_water,int_sand, axis=0)
t_mwi = filters.threshold_otsu(int_all[:,0])
t_wi = filters.threshold_otsu(int_all[:,1])
# find contour with MS algorithm
im_wi_buffer = np.copy(im_wi)
im_wi_buffer[~im_buffer] = np.nan
im_mwi_buffer = np.copy(im_mwi)
im_mwi_buffer[~im_buffer] = np.nan
contours_wi = measure.find_contours(im_wi_buffer, t_wi)
contours_mwi = measure.find_contours(im_mwi, t_mwi) # WARNING (on entire image)
# remove contour points that are nans (around clouds)
contours = contours_wi
contours_nonans = []
for k in range(len(contours)):
if np.any(np.isnan(contours[k])):
index_nan = np.where(np.isnan(contours[k]))[0]
contours_temp = np.delete(contours[k], index_nan, axis=0)
if len(contours_temp) > 1:
contours_nonans.append(contours_temp)
else:
contours_nonans.append(contours[k])
contours_wi = contours_nonans
contours = contours_mwi
contours_nonans = []
for k in range(len(contours)):
if np.any(np.isnan(contours[k])):
index_nan = np.where(np.isnan(contours[k]))[0]
contours_temp = np.delete(contours[k], index_nan, axis=0)
if len(contours_temp) > 1:
contours_nonans.append(contours_temp)
else:
contours_nonans.append(contours[k])
contours_mwi = contours_nonans
if plot_bool:
im = np.copy(im_display)
# define colours for plot
colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
for k in range(0,im_labels.shape[2]):
im[im_labels[:,:,k],0] = colours[k,0]
im[im_labels[:,:,k],1] = colours[k,1]
im[im_labels[:,:,k],2] = colours[k,2]
fig = plt.figure()
gs = gridspec.GridSpec(3, 3, height_ratios=[1, 1, 3])
ax1 = fig.add_subplot(gs[0,:])
vals = plt.hist(int_water[:,0], bins=100, label='water')
plt.hist(int_sand[:,0], bins=100, alpha=0.5, label='sand')
plt.hist(int_swash[:,0], bins=100, alpha=0.5, label='swash')
plt.plot([t_wi, t_wi], [0, np.max(vals[0])], 'r-')
plt.legend()
plt.title('Water Index NIR-G')
ax2 = fig.add_subplot(gs[1,:], sharex=ax1)
vals = plt.hist(int_water[:,1], bins=100, label='water')
plt.hist(int_sand[:,1], bins=100, alpha=0.5, label='sand')
plt.hist(int_swash[:,1], bins=100, alpha=0.5, label='swash')
plt.plot([t_mwi, t_mwi], [0, np.max(vals[0])], 'r-')
plt.legend()
plt.title('Modified Water Index SWIR-G')
ax3 = fig.add_subplot(gs[2,0])
plt.imshow(im)
for i,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k')
for i,contour in enumerate(contours_wi): plt.plot(contour[:, 1], contour[:, 0], linestyle='--', linewidth=1, color='w')
plt.grid(False)
plt.xticks([])
plt.yticks([])
ax4 = fig.add_subplot(gs[2,1], sharex=ax3, sharey=ax3)
plt.imshow(im_display)
for i,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k')
for i,contour in enumerate(contours_wi): plt.plot(contour[:, 1], contour[:, 0], linestyle='--', linewidth=1, color='w')
plt.grid(False)
plt.xticks([])
plt.yticks([])
ax5 = fig.add_subplot(gs[2,2], sharex=ax3, sharey=ax3)
plt.imshow(im_mwi, cmap='seismic')
for i,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=3, color='k')
for i,contour in enumerate(contours_wi): plt.plot(contour[:, 1], contour[:, 0], linestyle='--', linewidth=1, color='w')
plt.grid(False)
plt.xticks([])
plt.yticks([])
# plt.gcf().set_size_inches(17.99,7.55)
mng = plt.get_current_fig_manager()
mng.window.showMaximized()
plt.gcf().set_tight_layout(True)
plt.draw()
return contours_wi, contours_mwi
def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5):
"""
Compare sds with groundtruth data from topographic surveys / argus shorelines
KV WRL 2018
Arguments:
-----------
dates_sds: list
list of dates corresponding to each row in chain_sds
chain_sds: np.ndarray
array with time series of chainage for each transect (each transect is one column)
topo_profiles: dict
dict containing the dates and chainage of the groundtruth
mod: 0 or 1
0 for linear interpolation between 2 closest surveys, 1 for only nearest neighbour
min_days: int
minimum number of days for which the data can be compared
Returns: -----------
stats: dict
contains all the statistics of the comparison
"""
# create 3 figures
fig1 = plt.figure()
gs1 = gridspec.GridSpec(chain_sds.shape[1], 1)
fig2 = plt.figure()
gs2 = gridspec.GridSpec(2, chain_sds.shape[1])
fig3 = plt.figure()
gs3 = gridspec.GridSpec(2,1)
dates_sds_num = np.array([_.toordinal() for _ in dates_sds])
stats = dict([])
data_fin = dict([])
# for each transect compare and plot the data
for i in range(chain_sds.shape[1]):
pfname = list(topo_profiles.keys())[i]
stats[pfname] = dict([])
data_fin[pfname] = dict([])
dates_sur = topo_profiles[pfname]['dates']
chain_sur = topo_profiles[pfname]['chainage']
# convert to datenum
dates_sur_num = np.array([_.toordinal() for _ in dates_sur])
chain_sur_interp = []
diff_days = []
for j, satdate in enumerate(dates_sds_num):
temp_diff = satdate - dates_sur_num
if mod==0:
# select measurement before and after sat image date and interpolate
ind_before = np.where(temp_diff == temp_diff[temp_diff > 0][-1])[0]
if ind_before == len(temp_diff)-1:
chain_sur_interp.append(np.nan)
diff_days.append(np.abs(satdate-dates_sur_num[ind_before])[0])
continue
ind_after = np.where(temp_diff == temp_diff[temp_diff < 0][0])[0]
tempx = np.zeros(2)
tempx[0] = dates_sur_num[ind_before]
tempx[1] = dates_sur_num[ind_after]
tempy = np.zeros(2)
tempy[0] = chain_sur[ind_before]
tempy[1] = chain_sur[ind_after]
diff_days.append(np.abs(np.max([satdate-tempx[0], satdate-tempx[1]])))
# interpolate
f = interpolate.interp1d(tempx, tempy)
chain_sur_interp.append(f(satdate))
elif mod==1:
# select the closest measurement
idx_closest = find_indices(np.abs(temp_diff), lambda e: e == np.min(np.abs(temp_diff)))[0]
diff_days.append(np.abs(satdate-dates_sur_num[idx_closest]))
if diff_days[j] > mindays:
chain_sur_interp.append(np.nan)
else:
chain_sur_interp.append(chain_sur[idx_closest])
chain_sur_interp = np.array(chain_sur_interp)
# remove nan values
idx_sur_nan = ~np.isnan(chain_sur_interp)
idx_sat_nan = ~np.isnan(chain_sds[:,i])
idx_nan = np.logical_and(idx_sur_nan, idx_sat_nan)
# groundtruth and sds
chain_sur_fin = chain_sur_interp[idx_nan]
chain_sds_fin = chain_sds[idx_nan,i]
dates_fin = [k for (k, v) in zip(dates_sds, idx_nan) if v]
diff_chain = chain_sur_fin - chain_sds_fin
# calculate statistics
rmse = np.sqrt(np.nanmean((diff_chain)**2))
mean = np.nanmean(diff_chain)
std = np.nanstd(diff_chain)
q90 = np.percentile(np.abs(diff_chain), 90)
# store data
stats[pfname]['rmse'] = rmse
stats[pfname]['mean'] = mean
stats[pfname]['std'] = std
stats[pfname]['q90'] = q90
stats[pfname]['diffdays'] = diff_days
data_fin[pfname]['dates'] = dates_fin
data_fin[pfname]['sds'] = chain_sds_fin
data_fin[pfname]['survey'] = chain_sur_fin
# make time-series plot
plt.figure(fig1.number)
ax = fig1.add_subplot(gs1[i,0])
plt.plot(dates_sur, chain_sur, 'o-', color='C1', markersize=4, label='survey all')
plt.plot(dates_fin, chain_sur_fin, 'o', color=[0.3, 0.3, 0.3], markersize=2, label='survey interp')
plt.plot(dates_fin, chain_sds_fin, 'o--', color='b', markersize=4, label='SDS')
plt.title(pfname, fontweight='bold')
plt.xlim([dates_sds[0], dates_sds[-1]])
plt.ylabel('chainage [m]')
# make scatter plot
plt.figure(fig2.number)
ax1 = fig2.add_subplot(gs2[0,i])
plt.axis('equal')
plt.plot(chain_sur_fin, chain_sds_fin, 'ko', markersize=4, markerfacecolor='w', alpha=0.7)
xmax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)])
xmin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)])
ymax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)])
ymin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)])
plt.plot([xmin, xmax], [ymin, ymax], 'r--')
correlation = np.corrcoef(chain_sur_fin, chain_sds_fin)[0,1]
str_corr = 'r = %.2f' % (correlation)
plt.text(xmin, ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
plt.xlabel('chainage survey [m]')
plt.ylabel('chainage satellite [m]')
plt.title(pfname, fontweight='bold')
ax2 = fig2.add_subplot(gs2[1,i])
binwidth = 3
bins = np.arange(min(diff_chain), max(diff_chain) + binwidth, binwidth)
density = plt.hist(diff_chain, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k')
plt.xlim([-50, 50])
plt.xlabel('error [m]')
str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90)
plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.5), horizontalalignment='left', fontsize=10)
fig1.set_size_inches(19.2, 9.28)
fig1.set_tight_layout(True)
fig2.set_size_inches(19.2, 9.28)
fig2.set_tight_layout(True)
# plot all the data together
chain_sds_all = []
chain_sur_all = []
for i in range(chain_sds.shape[1]):
pfname = list(topo_profiles.keys())[i]
chain_sds_all = np.append(chain_sds_all,data_fin[pfname]['sds'])
chain_sur_all = np.append(chain_sur_all,data_fin[pfname]['survey'])
diff_chain_all = chain_sur_all - chain_sds_all
# calculate statistics
rmse = np.sqrt(np.nanmean((diff_chain_all)**2))
mean = np.nanmean(diff_chain_all)
std = np.nanstd(diff_chain_all)
q90 = np.percentile(np.abs(diff_chain_all), 90)
stats['all'] = {'rmse':rmse,'mean':mean,'std':std,'q90':q90}
# make plot with all datapoints (from all the transects)
plt.figure(fig3.number)
ax1 = fig3.add_subplot(gs3[0,0])
plt.axis('equal')
plt.plot(chain_sur_all, chain_sds_all, 'ko', markersize=4, markerfacecolor='w', alpha=0.7)
xmax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)])
xmin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)])
ymax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)])
ymin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)])
plt.plot([xmin, xmax], [ymin, ymax], 'r--')
correlation = np.corrcoef(chain_sur_all, chain_sds_all)[0,1]
str_corr = 'r = %.2f' % (correlation)
plt.text(xmin, ymax, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
plt.xlabel('chainage survey [m]')
plt.ylabel('chainage satellite [m]')
plt.title(pfname, fontweight='bold')
ax2 = fig3.add_subplot(gs3[1,0])
binwidth = 3
bins = np.arange(min(diff_chain_all), max(diff_chain_all) + binwidth, binwidth)
density = plt.hist(diff_chain_all, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k')
plt.xlim([-50, 50])
plt.xlabel('error [m]')
str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90)
plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.5), horizontalalignment='left', fontsize=10)
fig3.set_size_inches(9.2, 9.28)
fig3.set_tight_layout(True)
return stats

@ -8,7 +8,9 @@ Contains all the utilities, convenience functions and small functions that do si
""" """
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import numpy as np import numpy as np
import scipy.io as sio
import pdb import pdb
@ -59,3 +61,50 @@ def find_indices(lst, condition):
def reject_outliers(data, m=2): def reject_outliers(data, m=2):
"rejects outliers in a numpy array" "rejects outliers in a numpy array"
return data[abs(data - np.mean(data)) < m * np.std(data)] return data[abs(data - np.mean(data)) < m * np.std(data)]
def duplicates_dict(lst):
"return duplicates and indices"
# nested function
def duplicates(lst, item):
return [i for i, x in enumerate(lst) if x == item]
return dict((x, duplicates(lst, x)) for x in set(lst) if lst.count(x) > 1)
def datenum2datetime(datenum):
"convert datenum to datetime"
#takes in datenum and outputs python datetime
time = [datetime.fromordinal(int(dn)) + timedelta(days=float(dn)%1) - timedelta(days = 366) for dn in datenum]
return time
def loadmat(filename):
'''
this function should be called instead of direct spio.loadmat
as it cures the problem of not properly recovering python dictionaries
from mat files. It calls the function check keys to cure all entries
which are still mat-objects
'''
data = sio.loadmat(filename, struct_as_record=False, squeeze_me=True)
return _check_keys(data)
def _check_keys(dict):
'''
checks if entries in dictionary are mat-objects. If yes
todict is called to change them to nested dictionaries
'''
for key in dict:
if isinstance(dict[key], sio.matlab.mio5_params.mat_struct):
dict[key] = _todict(dict[key])
return dict
def _todict(matobj):
'''
A recursive function which constructs from matobjects nested dictionaries
'''
dict = {}
for strg in matobj._fieldnames:
elem = matobj.__dict__[strg]
if isinstance(elem, sio.matlab.mio5_params.mat_struct):
dict[strg] = _todict(elem)
else:
dict[strg] = elem
return dict

@ -0,0 +1,589 @@
#==========================================================#
#==========================================================#
# Extract shorelines from Landsat images
#==========================================================#
#==========================================================#
#==========================================================#
# Initial settings
#==========================================================#
import os
import numpy as np
import matplotlib.pyplot as plt
import ee
import pdb
# other modules
from osgeo import gdal, ogr, osr
import pickle
import matplotlib.cm as cm
from pylab import ginput
from shapely.geometry import LineString
# image processing modules
import skimage.filters as filters
import skimage.exposure as exposure
import skimage.transform as transform
import sklearn.decomposition as decomposition
import skimage.measure as measure
import skimage.morphology as morphology
# machine learning modules
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.externals import joblib
# import own modules
import functions.utils as utils
import functions.sds as sds
# some other settings
np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
plt.rcParams['axes.grid'] = True
plt.rcParams['figure.max_open_warning'] = 100
ee.Initialize()
#==========================================================#
# Parameters
#==========================================================#
sitename = 'NARRA'
cloud_thresh = 0.7 # threshold for cloud cover
plot_bool = False # if you want the plots
output_epsg = 28356 # GDA94 / MGA Zone 56
buffer_size = 7 # radius (in pixels) of disk for buffer (pixel classification)
min_beach_size = 20 # number of pixels in a beach (pixel classification)
dist_ref = 100 # maximum distance from reference point
min_length_wl = 200 # minimum length of shoreline LineString to be kept
manual_bool = True # to manually check images
output = dict([])
#==========================================================#
# Metadata
#==========================================================#
filepath = os.path.join(os.getcwd(), 'data', sitename)
with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'rb') as f:
metadata = pickle.load(f)
#%%
#==========================================================#
# Read S2 images
#==========================================================#
satname = 'S2'
dates = metadata[satname]['dates']
input_epsg = 32756 # metadata[satname]['epsg']
# path to images
filepath10 = os.path.join(os.getcwd(), 'data', sitename, satname, '10m')
filenames10 = os.listdir(filepath10)
filepath20 = os.path.join(os.getcwd(), 'data', sitename, satname, '20m')
filenames20 = os.listdir(filepath20)
filepath60 = os.path.join(os.getcwd(), 'data', sitename, satname, '60m')
filenames60 = os.listdir(filepath60)
if (not len(filenames10) == len(filenames20)) or (not len(filenames20) == len(filenames60)):
raise 'error: not the same amount of files for 10, 20 and 60 m'
N = len(filenames10)
# initialise variables
cloud_cover_ts = []
acc_georef_ts = []
date_acquired_ts = []
filename_ts = []
satname_ts = []
timestamp = []
shorelines = []
idx_skipped = []
spacing = '=========================================================='
msg = ' %s\n %s\n %s' % (spacing, satname, spacing)
print(msg)
for i in range(N):
# read 10m bands
fn = os.path.join(filepath10, filenames10[i])
data = gdal.Open(fn, gdal.GA_ReadOnly)
georef = np.array(data.GetGeoTransform())
bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
im10 = np.stack(bands, 2)
im10 = im10/10000 # TOA scaled to 10000
# if image is only zeros, skip it
if sum(sum(sum(im10))) < 1:
print('skip ' + str(i) + ' - no data')
idx_skipped.append(i)
continue
nrows = im10.shape[0]
ncols = im10.shape[1]
# read 20m band (SWIR1)
fn = os.path.join(filepath20, filenames20[i])
data = gdal.Open(fn, gdal.GA_ReadOnly)
bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
im20 = np.stack(bands, 2)
im20 = im20[:,:,0]
im20 = im20/10000 # TOA scaled to 10000
im_swir = transform.resize(im20, (nrows, ncols), order=1, preserve_range=True, mode='constant')
im_swir = np.expand_dims(im_swir, axis=2)
# append down-sampled swir band to the 10m bands
im_ms = np.append(im10, im_swir, axis=2)
# read 60m band (QA)
fn = os.path.join(filepath60, filenames60[i])
data = gdal.Open(fn, gdal.GA_ReadOnly)
bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
im60 = np.stack(bands, 2)
im_qa = im60[:,:,0]
cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool)
cloud_mask = transform.resize(cloud_mask,(nrows, ncols), order=0, preserve_range=True, mode='constant')
# check if -inf or nan values on any band and add to cloud mask
for k in range(im_ms.shape[2]):
im_inf = np.isin(im_ms[:,:,k], -np.inf)
im_nan = np.isnan(im_ms[:,:,k])
cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
# calculate cloud cover and if above threshold, skip it
cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
if cloud_cover > cloud_thresh:
print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)')
idx_skipped.append(i)
continue
# rescale image intensity for display purposes
im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False)
# classify image in 4 classes (sand, whitewater, water, other) with NN classifier
im_classif, im_labels = sds.classify_image_NN_nopan(im_ms, cloud_mask, min_beach_size, plot_bool)
# if there aren't any sandy pixels
if sum(sum(im_labels[:,:,0])) == 0 :
# use global threshold
im_ndwi = sds.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask, plot_bool)
contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool)
else:
# use specific threhsold
contours_wi, contours_mwi = sds.find_wl_contours2(im_ms, im_labels, cloud_mask, buffer_size, plot_bool)
# convert from pixels to world coordinates
wl_coords = sds.convert_pix2world(contours_mwi, georef)
# convert to output epsg spatial reference
wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg)
# remove contour lines that have a perimeter < min_length_wl
wl_good = []
for l, wls in enumerate(wl):
coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))]
a = LineString(coords) # shapely LineString structure
if a.length >= min_length_wl:
wl_good.append(wls)
# format points and only select the ones close to the refpoints
x_points = np.array([])
y_points = np.array([])
for k in range(len(wl_good)):
x_points = np.append(x_points,wl_good[k][:,0])
y_points = np.append(y_points,wl_good[k][:,1])
wl_good = np.transpose(np.array([x_points,y_points]))
temp = np.zeros((len(wl_good))).astype(bool)
for k in range(len(refpoints)):
temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp)
wl_final = wl_good[temp]
# plot output
plt.figure()
im = np.copy(im_display)
colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
for k in range(0,im_labels.shape[2]):
im[im_labels[:,:,k],0] = colours[k,0]
im[im_labels[:,:,k],1] = colours[k,1]
im[im_labels[:,:,k],2] = colours[k,2]
plt.imshow(im)
for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--')
plt.title(satname + ' ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + ' acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' )
plt.draw()
pt_in = np.array(ginput(n=1, timeout=1000))
plt.close()
# if image is rejected, skip it
if pt_in[0][1] > nrows/2:
print('skip ' + str(i) + ' - rejected')
idx_skipped.append(i)
continue
# if accepted, store the data
cloud_cover_ts.append(cloud_cover)
acc_georef_ts.append(metadata[satname]['acc_georef'][i])
filename_ts.append(filenames10[i])
satname_ts.append(satname)
date_acquired_ts.append(filenames10[i][:10])
timestamp.append(metadata[satname]['dates'][i])
shorelines.append(wl_final)
# store in output structure
output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped,
'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts,
'acc_georef':acc_georef_ts}}
del idx_skipped
#%%
#==========================================================#
# Read L7&L8 images
#==========================================================#
satname = 'L8'
dates = metadata[satname]['dates']
input_epsg = 32656 # metadata[satname]['epsg']
# path to images
filepath_pan = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8', 'pan')
filepath_ms = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8', 'ms')
filenames_pan = os.listdir(filepath_pan)
filenames_ms = os.listdir(filepath_ms)
if (not len(filenames_pan) == len(filenames_ms)):
raise 'error: not the same amount of files for pan and ms'
N = len(filenames_pan)
# initialise variables
cloud_cover_ts = []
acc_georef_ts = []
date_acquired_ts = []
filename_ts = []
satname_ts = []
timestamp = []
shorelines = []
idx_skipped = []
spacing = '=========================================================='
msg = ' %s\n %s\n %s' % (spacing, satname, spacing)
print(msg)
for i in range(N):
# get satellite name
sat = filenames_pan[i][20:22]
# read pan image
fn_pan = os.path.join(filepath_pan, filenames_pan[i])
data = gdal.Open(fn_pan, gdal.GA_ReadOnly)
georef = np.array(data.GetGeoTransform())
bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
im_pan = np.stack(bands, 2)[:,:,0]
nrows = im_pan.shape[0]
ncols = im_pan.shape[1]
# read ms image
fn_ms = os.path.join(filepath_ms, filenames_ms[i])
data = gdal.Open(fn_ms, gdal.GA_ReadOnly)
bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
im_ms = np.stack(bands, 2)
# cloud mask
im_qa = im_ms[:,:,5]
cloud_mask = sds.create_cloud_mask(im_qa, sat, plot_bool)
cloud_mask = transform.resize(cloud_mask, (nrows, ncols), order=0, preserve_range=True, mode='constant').astype('bool_')
# resize the image using bilinear interpolation (order 1)
im_ms = im_ms[:,:,:5]
im_ms = transform.resize(im_ms,(nrows, ncols), order=1, preserve_range=True, mode='constant')
# check if -inf or nan values on any band and add to cloud mask
for k in range(im_ms.shape[2]+1):
if k == 5:
im_inf = np.isin(im_pan, -np.inf)
im_nan = np.isnan(im_pan)
else:
im_inf = np.isin(im_ms[:,:,k], -np.inf)
im_nan = np.isnan(im_ms[:,:,k])
cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
# calculate cloud cover and skip image if above threshold
cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
if cloud_cover > cloud_thresh:
print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)')
idx_skipped.append(i)
continue
# Pansharpen image (different for L8 and L7)
if sat == 'L7':
# pansharpen (Green, Red, NIR) and downsample Blue and SWIR1
im_ms_ps = sds.pansharpen(im_ms[:,:,[1,2,3]], im_pan, cloud_mask, plot_bool)
im_ms_ps = np.append(im_ms[:,:,[0]], im_ms_ps, axis=2)
im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[4]], axis=2)
im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False)
elif sat == 'L8':
# pansharpen RGB image and downsample NIR and SWIR1
im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, plot_bool)
im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2)
im_display = sds.rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
# classify image in 4 classes (sand, whitewater, water, other) with NN classifier
im_classif, im_labels = sds.classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size, plot_bool)
# if there aren't any sandy pixels
if sum(sum(im_labels[:,:,0])) == 0 :
# use global threshold
im_ndwi = sds.nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, plot_bool)
contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool)
else:
# use specific threhsold
contours_wi, contours_mwi = sds.find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size, plot_bool)
# convert from pixels to world coordinates
wl_coords = sds.convert_pix2world(contours_mwi, georef)
# convert to output epsg spatial reference
wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg)
# remove contour lines that have a perimeter < min_length_wl
wl_good = []
for l, wls in enumerate(wl):
coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))]
a = LineString(coords) # shapely LineString structure
if a.length >= min_length_wl:
wl_good.append(wls)
# format points and only select the ones close to the refpoints
x_points = np.array([])
y_points = np.array([])
for k in range(len(wl_good)):
x_points = np.append(x_points,wl_good[k][:,0])
y_points = np.append(y_points,wl_good[k][:,1])
wl_good = np.transpose(np.array([x_points,y_points]))
temp = np.zeros((len(wl_good))).astype(bool)
for k in range(len(refpoints)):
temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp)
wl_final = wl_good[temp]
# plot output
plt.figure()
plt.subplot(121)
im = np.copy(im_display)
colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
for k in range(0,im_labels.shape[2]):
im[im_labels[:,:,k],0] = colours[k,0]
im[im_labels[:,:,k],1] = colours[k,1]
im[im_labels[:,:,k],2] = colours[k,2]
plt.imshow(im)
for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--')
plt.title(sat + ' ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + ' acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' )
pt_in = np.array(ginput(n=1, timeout=1000))
plt.close()
# if image is rejected, skip it
if pt_in[0][1] > nrows/2:
print('skip ' + str(i) + ' - rejected')
idx_skipped.append(i)
continue
# if accepted, store the data
cloud_cover_ts.append(cloud_cover)
acc_georef_ts.append(metadata[satname]['acc_georef'][i])
filename_ts.append(filenames_pan[i])
satname_ts.append(sat)
date_acquired_ts.append(filenames_pan[i][:10])
timestamp.append(metadata[satname]['dates'][i])
shorelines.append(wl_final)
# store in output structure
output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped,
'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts,
'acc_georef':acc_georef_ts}}
del idx_skipped
#%%
#==========================================================#
# Read L5 images
#==========================================================#
satname = 'L5'
dates = metadata[satname]['dates']
input_epsg = 32656 # metadata[satname]['epsg']
# path to images
filepath_img = os.path.join(os.getcwd(), 'data', sitename, satname, '30m')
filenames = os.listdir(filepath_img)
N = len(filenames)
# initialise variables
cloud_cover_ts = []
acc_georef_ts = []
date_acquired_ts = []
filename_ts = []
satname_ts = []
timestamp = []
shorelines = []
idx_skipped = []
spacing = '=========================================================='
msg = ' %s\n %s\n %s' % (spacing, satname, spacing)
print(msg)
for i in range(N):
# read ms image
fn = os.path.join(filepath_img, filenames[i])
data = gdal.Open(fn, gdal.GA_ReadOnly)
georef = np.array(data.GetGeoTransform())
bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
im_ms = np.stack(bands, 2)
# down-sample to half hte original pixel size
nrows = im_ms.shape[0]*2
ncols = im_ms.shape[1]*2
# cloud mask
im_qa = im_ms[:,:,5]
im_ms = im_ms[:,:,:-1]
cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool)
cloud_mask = transform.resize(cloud_mask, (nrows, ncols), order=0, preserve_range=True, mode='constant').astype('bool_')
# resize the image using bilinear interpolation (order 1)
im_ms = transform.resize(im_ms,(nrows, ncols), order=1, preserve_range=True, mode='constant')
# adjust georef vector (scale becomes 15m and origin is adjusted to the center of new corner pixel)
georef[1] = 15
georef[5] = -15
georef[0] = georef[0] + 7.5
georef[3] = georef[3] - 7.5
# check if -inf or nan values on any band and add to cloud mask
for k in range(im_ms.shape[2]):
im_inf = np.isin(im_ms[:,:,k], -np.inf)
im_nan = np.isnan(im_ms[:,:,k])
cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
# calculate cloud cover and skip image if above threshold
cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
if cloud_cover > cloud_thresh:
print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)')
idx_skipped.append(i)
continue
# rescale image intensity for display purposes
im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False)
# classify image in 4 classes (sand, whitewater, water, other) with NN classifier
im_classif, im_labels = sds.classify_image_NN_nopan(im_ms, cloud_mask, min_beach_size, plot_bool)
# if there aren't any sandy pixels
if sum(sum(im_labels[:,:,0])) == 0 :
# use global threshold
im_ndwi = sds.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask, plot_bool)
contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool)
else:
# use specific threhsold
contours_wi, contours_mwi = sds.find_wl_contours2(im_ms, im_labels, cloud_mask, buffer_size, plot_bool)
# convert from pixels to world coordinates
wl_coords = sds.convert_pix2world(contours_mwi, georef)
# convert to output epsg spatial reference
wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg)
# remove contour lines that have a perimeter < min_length_wl
wl_good = []
for l, wls in enumerate(wl):
coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))]
a = LineString(coords) # shapely LineString structure
if a.length >= min_length_wl:
wl_good.append(wls)
# format points and only select the ones close to the refpoints
x_points = np.array([])
y_points = np.array([])
for k in range(len(wl_good)):
x_points = np.append(x_points,wl_good[k][:,0])
y_points = np.append(y_points,wl_good[k][:,1])
wl_good = np.transpose(np.array([x_points,y_points]))
temp = np.zeros((len(wl_good))).astype(bool)
for k in range(len(refpoints)):
temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp)
wl_final = wl_good[temp]
# plot output
plt.figure()
plt.subplot(121)
im = np.copy(im_display)
colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
for k in range(0,im_labels.shape[2]):
im[im_labels[:,:,k],0] = colours[k,0]
im[im_labels[:,:,k],1] = colours[k,1]
im[im_labels[:,:,k],2] = colours[k,2]
plt.imshow(im)
for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--')
plt.title(satname + ' ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + ' acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' )
plt.subplot(122)
plt.axis('equal')
plt.axis('off')
plt.plot(refpoints[:,0], refpoints[:,1], 'k.')
plt.plot(wl_final[:,0], wl_final[:,1], 'r.')
mng = plt.get_current_fig_manager()
mng.window.showMaximized()
plt.tight_layout()
plt.draw()
pt_in = np.array(ginput(n=1, timeout=1000))
plt.close()
# if image is rejected, skip it
if pt_in[0][1] > nrows/2:
print('skip ' + str(i) + ' - rejected')
idx_skipped.append(i)
continue
# if accepted, store the data
cloud_cover_ts.append(cloud_cover)
acc_georef_ts.append(metadata[satname]['acc_georef'][i])
filename_ts.append(filenames[i])
satname_ts.append(satname)
date_acquired_ts.append(filenames[i][:10])
timestamp.append(metadata[satname]['dates'][i])
shorelines.append(wl_final)
# store in output structure
output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped,
'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts,
'acc_georef':acc_georef_ts}}
del idx_skipped
#==========================================================#
#==========================================================#
#==========================================================#
#==========================================================#
#%%
# save output
with open(os.path.join(filepath, sitename + '_output' + '.pkl'), 'wb') as f:
pickle.dump(output, f)
# save idx_skipped
#idx_skipped = dict([])
#for satname in list(output.keys()):
# idx_skipped[satname] = output[satname]['idx_skipped']
#with open(os.path.join(filepath, sitename + '_idxskipped' + '.pkl'), 'wb') as f:
# pickle.dump(idx_skipped, f)

@ -1,382 +0,0 @@
# -*- coding: utf-8 -*-
#==========================================================#
# Compare Narrabeen SDS with 3D quadbike surveys
#==========================================================#
# Initial settings
import os
import numpy as np
import matplotlib.pyplot as plt
import pdb
import ee
import matplotlib.dates as mdates
import matplotlib.cm as cm
from datetime import datetime, timedelta
import pickle
import pytz
import scipy.io as sio
import scipy.interpolate as interpolate
import statsmodels.api as sm
import skimage.measure as measure
# my functions
import functions.utils as utils
# some settings
np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
plt.rcParams['axes.grid'] = True
plt.rcParams['figure.max_open_warning'] = 100
au_tz = pytz.timezone('Australia/Sydney')
# load quadbike dates and convert from datenum to datetime
filename = 'data\quadbike\survey_dates.mat'
filepath = os.path.join(os.getcwd(), filename)
dates_quad = sio.loadmat(filepath)['dates'] # matrix containing year, month, day
dates_quad = [datetime(dates_quad[i,0], dates_quad[i,1], dates_quad[i,2],
tzinfo=au_tz) for i in range(dates_quad.shape[0])]
# load timestamps from satellite images
satname = 'L8'
sitename = 'NARRA'
filepath = os.path.join(os.getcwd(), 'data', satname, sitename)
with open(os.path.join(filepath, sitename + '_output_new' + '.pkl'), 'rb') as f:
output = pickle.load(f)
dates_l8 = output['t']
# convert to AEST
dates_l8 = [_.astimezone(au_tz) for _ in dates_l8]
# remove duplicates
dates_l8_str = [_.strftime('%Y%m%d') for _ in dates_l8]
dupl = utils.duplicates_dict(dates_l8_str)
idx_remove = []
for k,v in dupl.items():
idx1 = v[0]
idx2 = v[1]
c1 = output['cloud_cover'][idx1]
c2 = output['cloud_cover'][idx2]
g1 = output['acc_georef'][idx1]
g2 = output['acc_georef'][idx2]
if c1 < c2 - 0.01:
idx_remove.append(idx2)
elif g1 < g2 - 0.1:
idx_remove.append(idx2)
else:
idx_remove.append(idx1)
idx_remove = sorted(idx_remove)
idx_all = np.linspace(0,70,71)
idx_keep = list(np.where(~np.isin(idx_all,idx_remove))[0])
output['t'] = [output['t'][k] for k in idx_keep]
output['shorelines'] = [output['shorelines'][k] for k in idx_keep]
output['cloud_cover'] = [output['cloud_cover'][k] for k in idx_keep]
output['acc_georef'] = [output['acc_georef'][k] for k in idx_keep]
# convert to AEST
dates_l8 = output['t']
dates_l8 = [_.astimezone(au_tz) for _ in dates_l8]
# load wave data (already AEST)
filename = 'data\wave\SydneyProcessed.mat'
filepath = os.path.join(os.getcwd(), filename)
wave_data = sio.loadmat(filepath)
idx = utils.find_indices(wave_data['dates'][:,0], lambda e: e >= dates_l8[0].year and e <= dates_l8[-1].year)
hsig = np.array([wave_data['Hsig'][i][0] for i in idx])
wdir = np.array([wave_data['Wdir'][i][0] for i in idx])
dates_wave = [datetime(wave_data['dates'][i,0], wave_data['dates'][i,1],
wave_data['dates'][i,2], wave_data['dates'][i,3],
wave_data['dates'][i,4], wave_data['dates'][i,5],
tzinfo=au_tz) for i in idx]
# load tide data (already AEST)
filename = 'SydTideData.mat'
filepath = os.path.join(os.getcwd(), 'data', 'tide', filename)
tide_data = sio.loadmat(filepath)
idx = utils.find_indices(tide_data['dates'][:,0], lambda e: e >= dates_l8[0].year and e <= dates_l8[-1].year)
tide = np.array([tide_data['tide'][i][0] for i in idx])
dates_tide = [datetime(tide_data['dates'][i,0], tide_data['dates'][i,1],
tide_data['dates'][i,2], tide_data['dates'][i,3],
tide_data['dates'][i,4], tide_data['dates'][i,5],
tzinfo=au_tz) for i in idx]
#%% make a plot of all the dates with wave data
orange = [255/255,140/255,0]
blue = [0,191/255,255/255]
f = plt.figure()
months = mdates.MonthLocator()
month_fmt = mdates.DateFormatter('%b %Y')
days = mdates.DayLocator()
years = [2013,2014,2015,2016]
for k in range(len(years)):
sel_year = years[k]
ax = plt.subplot(4,1,k+1)
idx_year = utils.find_indices(dates_wave, lambda e : e.year >= sel_year and e.year <= sel_year)
plt.plot([dates_wave[i] for i in idx_year], [hsig[i] for i in idx_year], 'k-', linewidth=0.5)
hsigmax = np.nanmax([hsig[i] for i in idx_year])
cbool = True
for j in range(len(dates_quad)):
if dates_quad[j].year == sel_year:
if cbool:
plt.plot([dates_quad[j], dates_quad[j]], [0, hsigmax], color=orange, label='survey')
cbool = False
else:
plt.plot([dates_quad[j], dates_quad[j]], [0, hsigmax], color=orange)
cbool = True
for j in range(len(dates_l8)):
if dates_l8[j].year == sel_year:
if cbool:
plt.plot([dates_l8[j], dates_l8[j]], [0, hsigmax], color=blue, label='landsat8')
cbool = False
else:
plt.plot([dates_l8[j], dates_l8[j]], [0, hsigmax], color=blue)
if k == 3:
plt.legend()
plt.xlim((datetime(sel_year,1,1), datetime(sel_year,12,31, tzinfo=au_tz)))
plt.ylim((0, hsigmax))
plt.ylabel('Hs [m]')
ax.xaxis.set_major_locator = months
ax.xaxis.set_major_formatter(month_fmt)
f.subplots_adjust(hspace=0.2)
plt.draw()
#%% calculate difference between dates (quad and sat)
diff_days = [ [(x - _).days for _ in dates_quad] for x in dates_l8]
max_diff = 14
idx_closest = [utils.find_indices(_, lambda e: abs(e) <= max_diff) for _ in diff_days]
# store in dates_diff dictionnary
dates_diff = []
cloud_cover = []
for i in range(len(idx_closest)):
if not idx_closest[i]:
continue
elif len(idx_closest[i]) > 1:
idx_best = np.argmin(np.abs([diff_days[i][_] for _ in idx_closest[i]]))
dates_temp = [dates_quad[_] for _ in idx_closest[i]]
days_temp = [diff_days[i][_] for _ in idx_closest[i]]
dates_diff.append({"date sat": dates_l8[i],
"date quad": dates_temp[idx_best],
"days diff": days_temp[idx_best]})
else:
dates_diff.append({"date sat": dates_l8[i],
"date quad": dates_quad[idx_closest[i][0]],
"days diff": diff_days[i][idx_closest[i][0]]
})
# store cloud data
cloud_cover.append(output['cloud_cover'][i])
# store wave data
wave_hsig = []
for i in range(len(dates_diff)):
wave_hsig.append(hsig[np.argmin(np.abs(np.array([(dates_diff[i]['date sat'] - _).total_seconds() for _ in dates_wave])))])
# make a plot
plt.figure()
counter = 0
for i in range(len(dates_diff)):
counter = counter + 1
if dates_diff[i]['date quad'] > dates_diff[i]['date sat']:
date_min = dates_diff[i]['date sat']
date_max = dates_diff[i]['date quad']
color1 = orange
color2 = blue
else:
date_min = dates_diff[i]['date quad']
date_max = dates_diff[i]['date sat']
color1 = blue
color2 = orange
idx_t = utils.find_indices(dates_wave, lambda e : e >= date_min and e <= date_max)
hsigmax = np.nanmax([hsig[i] for i in idx_t])
hsigmin = np.nanmin([hsig[i] for i in idx_t])
if counter > 9:
counter = 1
plt.figure()
ax = plt.subplot(3,3,counter)
plt.plot([dates_wave[i] for i in idx_t], [hsig[i] for i in idx_t], 'k-', linewidth=1.5)
plt.plot([date_min, date_min], [0, 4.5], color=color2, label='survey')
plt.plot([date_max, date_max], [0, 4.5], color=color1, label='landsat8')
plt.ylabel('Hs [m]')
ax.xaxis.set_major_locator(mdates.DayLocator(tz=au_tz))
ax.xaxis.set_minor_locator(mdates.HourLocator(tz=au_tz))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%d'))
ax.xaxis.set_minor_locator(months)
plt.title(dates_diff[i]['date sat'].strftime('%b %Y') + ' (' + str(abs(dates_diff[i]['days diff'])) + ' days)')
plt.draw()
plt.gcf().subplots_adjust(hspace=0.5)
# mean day difference
np.mean([ np.abs(_['days diff']) for _ in dates_diff])
#%% Compare shorelines in elevation
dist_buffer = 50 # buffer of points selected for interpolation
# load quadbike .mat files
foldername = 'data\quadbike\surveys3D'
folderpath = os.path.join(os.getcwd(), foldername)
filenames = os.listdir(folderpath)
# get the satellite shorelines
sl = output['shorelines']
# get dates from filenames
dates_quad = [datetime(int(_[6:10]), int(_[11:13]), int(_[14:16]), tzinfo= au_tz) for _ in filenames]
zav = []
ztide = []
sl_gt = []
for i in range(len(dates_diff)):
sl_smooth = sl[i]
# select closest 3D survey and load .mat file
idx_closest = np.argmin(np.abs(np.array([(dates_diff[i]['date sat'] - _).days for _ in dates_quad])))
survey3d = sio.loadmat(os.path.join(folderpath, filenames[idx_closest]))
# reshape to a vector
xs = survey3d['x'].reshape(survey3d['x'].shape[0] * survey3d['x'].shape[1])
ys = survey3d['y'].reshape(survey3d['y'].shape[0] * survey3d['y'].shape[1])
zs = survey3d['z'].reshape(survey3d['z'].shape[0] * survey3d['z'].shape[1])
# remove nan values
idx_nan = np.isnan(zs)
xs = xs[~idx_nan]
ys = ys[~idx_nan]
zs = zs[~idx_nan]
# find water level at the time the image was acquired
idx_closest = np.argmin(np.abs(np.array([(dates_diff[i]['date sat'] - _).total_seconds() for _ in dates_tide])))
tide_level = tide[idx_closest]
ztide.append(tide_level)
# find contour corresponding to the water level on 3D surface (if below minimum, add 0.05m increments)
if tide_level < np.nanmin(survey3d['z']):
tide_level = np.nanmin(survey3d['z'])
sl_tide = measure.find_contours(survey3d['z'], tide_level)
sl_tide = sl_tide[np.argmax(np.array([len(_) for _ in sl_tide]))]
count = 0
while len(sl_tide) < 900:
count = count + 1
tide_level = tide_level + 0.05*count
sl_tide = measure.find_contours(survey3d['z'], tide_level)
sl_tide = sl_tide[np.argmax(np.array([len(_) for _ in sl_tide]))]
print('added ' + str(0.05*count) + ' cm - contour with ' + str(len(sl_tide)) + ' points')
else:
sl_tide = measure.find_contours(survey3d['z'], tide_level)
sl_tide = sl_tide[np.argmax(np.array([len(_) for _ in sl_tide]))]
# remove nans
if np.any(np.isnan(sl_tide)):
index_nan = np.where(np.isnan(sl_tide))[0]
sl_tide = np.delete(sl_tide, index_nan, axis=0)
# get x,y coordinates
xtide = [survey3d['x'][int(np.round(sl_tide[m,0])), int(np.round(sl_tide[m,1]))] for m in range(sl_tide.shape[0])]
ytide = [survey3d['y'][int(np.round(sl_tide[m,0])), int(np.round(sl_tide[m,1]))] for m in range(sl_tide.shape[0])]
sl_gt.append(np.transpose(np.array([np.array(xtide), np.array(ytide)])))
# interpolate SDS on 3D surface to get elevation (point by point)
zq = []
for j in range(sl_smooth.shape[0]):
xq = sl_smooth[j,0]
yq = sl_smooth[j,1]
dist_q = np.linalg.norm(np.transpose(np.array([[xq - _ for _ in xs],[yq - _ for _ in ys]])), axis=1)
idx_buffer = dist_q <= dist_buffer
if sum(idx_buffer) > 0:
tck = interpolate.bisplrep(xs[idx_buffer], ys[idx_buffer], zs[idx_buffer])
zq.append(interpolate.bisplev(xq, yq, tck))
zq = np.array(zq)
plt.figure()
plt.hist(zq, bins=100)
plt.draw()
# plt.figure()
# plt.axis('equal')
# plt.scatter(xs, ys, s=10, c=zs, marker='o', cmap=cm.get_cmap('jet'),
# label='quad data')
# plt.plot(xs[idx_buffer], ys[idx_buffer], 'ko')
# plt.plot(xq,yq,'ro')
# plt.draw()
# store the alongshore median elevation
zav.append(np.median(utils.reject_outliers(zq, m=2)))
# make plot
red = [255/255, 0, 0]
gray = [0.75, 0.75, 0.75]
plt.figure()
plt.subplot(121)
plt.axis('equal')
plt.scatter(xs, ys, s=10, c=zs, marker='o', cmap=cm.get_cmap('jet'),
label='3D survey')
plt.plot(xtide, ytide, '--', color=gray, linewidth=2.5, label='tide level contour')
plt.plot(sl_smooth[:,0], sl_smooth[:,1], '-', color=red, linewidth=2.5, label='SDS')
# plt.plot(sl[i][idx_beach,0], sl[i][idx_beach,1], 'w-', linewidth=2)
plt.xlabel('Eastings [m]')
plt.ylabel('Northings [m]')
plt.title('Shoreline comparison')
plt.colorbar(label='mAHD')
plt.legend()
plt.ylim((6266100, 6267000))
plt.subplot(122)
plt.plot(np.linspace(0,1,len(zq)), zq, 'ko-', markersize=5)
plt.plot([0, 1], [zav[i], zav[i]], 'r-', label='median')
plt.plot([0, 1], [ztide[i], ztide[i]], 'g--', label = 'measured tide')
plt.xlabel('Northings [m]')
plt.ylabel('Elevation [mAHD]')
plt.title('Alongshore SDS elevation')
plt.legend()
mng = plt.get_current_fig_manager()
mng.window.showMaximized()
plt.tight_layout()
plt.draw()
print(i)
#%% Calculate some error statistics
zav = np.array(zav)
ztide = np.array(ztide)
f = plt.figure()
plt.subplot(3,1,1)
plt.bar(np.linspace(1,len(zav),len(zav)), zav-ztide)
plt.ylabel('Error in z [m]')
plt.title('Elevation error')
plt.xticks([])
plt.draw()
plt.subplot(3,1,2)
plt.bar(np.linspace(1,len(zav),len(zav)), wave_hsig, color=orange)
plt.ylabel('Hsig [m]')
plt.xticks([])
plt.draw()
plt.subplot(3,1,3)
plt.bar(np.linspace(1,len(zav),len(zav)), np.array(cloud_cover)*100, color='g')
plt.ylabel('Cloud cover %')
plt.xlabel('comparison #')
plt.grid(False)
plt.grid(axis='y')
f.subplots_adjust(hspace=0)
plt.draw()
np.sqrt(np.mean((zav - ztide)**2))
#%% plot to show LOWESS smoothing
#i = 0
#idx_beach = [np.min(np.linalg.norm(sl[i][k,:] - narrabeach, axis=1)) < dist_thresh for k in range(sl[i].shape[0])]
#x = sl[i][idx_beach,0]
#y = sl[i][idx_beach,1]
#sl_smooth = lowess(x,y, frac=1./10, it = 10)
#
#plt.figure()
#plt.axis('equal')
#plt.scatter
#plt.plot(x,y,'bo', linewidth=2, label='original SDS')
#plt.plot(sl_smooth[:,1], sl_smooth[:,0], 'ro', linewidth=2, label='smoothed SDS')
#plt.legend()
#plt.xlabel('Eastings [m]')
#plt.ylabel('Northings [m]')
#plt.title('Local weighted scatterplot smoothing (LOWESS)')
#plt.draw()
Loading…
Cancel
Save