major updates

Kilian Vos 6 years ago
parent 2e3b90316f
commit a0b49c7dcf

3
.gitignore vendored

@ -5,4 +5,5 @@
*.mp4 *.mp4
*.gif *.gif
*.jpg *.jpg
*.pkl *.pkl
*.xml

@ -0,0 +1,62 @@
<?xml version="1.0" encoding="UTF-8"?>
<kml xmlns="http://www.opengis.net/kml/2.2">
<Document>
<name>NARRA</name>
<Style id="poly-000000-1200-77-nodesc-normal">
<LineStyle>
<color>ff000000</color>
<width>1.2</width>
</LineStyle>
<PolyStyle>
<color>4d000000</color>
<fill>1</fill>
<outline>1</outline>
</PolyStyle>
<BalloonStyle>
<text><![CDATA[<h3>$[name]</h3>]]></text>
</BalloonStyle>
</Style>
<Style id="poly-000000-1200-77-nodesc-highlight">
<LineStyle>
<color>ff000000</color>
<width>1.8</width>
</LineStyle>
<PolyStyle>
<color>4d000000</color>
<fill>1</fill>
<outline>1</outline>
</PolyStyle>
<BalloonStyle>
<text><![CDATA[<h3>$[name]</h3>]]></text>
</BalloonStyle>
</Style>
<StyleMap id="poly-000000-1200-77-nodesc">
<Pair>
<key>normal</key>
<styleUrl>#poly-000000-1200-77-nodesc-normal</styleUrl>
</Pair>
<Pair>
<key>highlight</key>
<styleUrl>#poly-000000-1200-77-nodesc-highlight</styleUrl>
</Pair>
</StyleMap>
<Placemark>
<name>Polygon 1</name>
<styleUrl>#poly-000000-1200-77-nodesc</styleUrl>
<Polygon>
<outerBoundaryIs>
<LinearRing>
<tessellate>1</tessellate>
<coordinates>
151.2957545,-33.7012561,0
151.297557,-33.7388075,0
151.312234,-33.7390216,0
151.311204,-33.701399,0
151.2957545,-33.7012561,0
</coordinates>
</LinearRing>
</outerBoundaryIs>
</Polygon>
</Placemark>
</Document>
</kml>

@ -1,25 +1,37 @@
"""This module contains all the functions needed to download the satellite images from GEE """This module contains all the functions needed to download the satellite images from the Google
Earth Engine Server
Author: Kilian Vos, Water Research Laboratory, University of New South Wales Author: Kilian Vos, Water Research Laboratory, University of New South Wales
""" """
# Initial settings # load modules
import os import os
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import pdb import pdb
# earth engine modules
import ee import ee
from urllib.request import urlretrieve from urllib.request import urlretrieve
import zipfile
import copy
import gdal_merge
# additional modules
from datetime import datetime from datetime import datetime
import pytz import pytz
import pickle import pickle
import zipfile import skimage.morphology as morphology
# own modules
import SDS_preprocess, SDS_tools
np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
# initialise connection with GEE server # initialise connection with GEE server
ee.Initialize() ee.Initialize()
# Functions
def download_tif(image, polygon, bandsId, filepath): def download_tif(image, polygon, bandsId, filepath):
""" """
Downloads a .TIF image from the ee server and stores it in a temp file Downloads a .TIF image from the ee server and stores it in a temp file
@ -49,34 +61,48 @@ def download_tif(image, polygon, bandsId, filepath):
return local_zipfile.extract('data.tif', filepath) return local_zipfile.extract('data.tif', filepath)
def get_images(sitename,polygon,dates,sat): def get_images(inputs):
""" """
Downloads all images from Landsat 5, Landsat 7, Landsat 8 and Sentinel-2 covering the given Downloads all images from Landsat 5, Landsat 7, Landsat 8 and Sentinel-2 covering the area of
polygon and acquired during the given dates. The images are organised in subfolders and divided interest and acquired between the specified dates.
by satellite mission and pixel resolution. The downloaded images are in .TIF format and organised in subfolders, divided by satellite
mission and pixel resolution.
KV WRL 2018 KV WRL 2018
Arguments: Arguments:
----------- -----------
sitename: str inputs: dict
dictionnary that contains the following fields:
'sitename': str
String containig the name of the site String containig the name of the site
polygon: list 'polygon': list
polygon containing the lon/lat coordinates to be extracted polygon containing the lon/lat coordinates to be extracted
longitudes in the first column and latitudes in the second column longitudes in the first column and latitudes in the second column
dates: list of str 'dates': list of str
list that contains 2 strings with the initial and final dates in format 'yyyy-mm-dd' list that contains 2 strings with the initial and final dates in format 'yyyy-mm-dd'
e.g. ['1987-01-01', '2018-01-01'] e.g. ['1987-01-01', '2018-01-01']
sat: list of str 'sat_list': list of str
list that contains the names of the satellite missions to include list that contains the names of the satellite missions to include
e.g. ['L5', 'L7', 'L8', 'S2'] e.g. ['L5', 'L7', 'L8', 'S2']
Returns:
-----------
metadata: dict
contains all the information about the satellite images that were downloaded
""" """
# read inputs dictionnary
sitename = inputs['sitename']
polygon = inputs['polygon']
dates = inputs['dates']
sat_list= inputs['sat_list']
# format in which the images are downloaded # format in which the images are downloaded
suffix = '.tif' suffix = '.tif'
# initialise metadata dictionnary (stores timestamps and georefencing accuracy of each image) # initialize metadata dictionnary (stores timestamps and georefencing accuracy of each image)
metadata = dict([]) metadata = dict([])
# create directories # create directories
@ -89,7 +115,7 @@ def get_images(sitename,polygon,dates,sat):
# download L5 images # download L5 images
#=============================================================================================# #=============================================================================================#
if 'L5' in sat or 'Landsat5' in sat: if 'L5' in sat_list or 'Landsat5' in sat_list:
satname = 'L5' satname = 'L5'
# create a subfolder to store L5 images # create a subfolder to store L5 images
@ -105,19 +131,27 @@ def get_images(sitename,polygon,dates,sat):
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1]) flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
# get all images in the filtered collection # get all images in the filtered collection
im_all = flt_col.getInfo().get('features') im_all = flt_col.getInfo().get('features')
# print how many images there are for the user # remove very cloudy images (>95% cloud)
n_img = flt_col.size().getInfo() cloud_cover = [_['properties']['CLOUD_COVER'] for _ in im_all]
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img) if np.any([_ > 95 for _ in cloud_cover]):
idx_delete = np.where([_ > 95 for _ in cloud_cover])[0]
im_all_cloud = [x for k,x in enumerate(im_all) if k not in idx_delete]
else:
im_all_cloud = im_all
n_img = len(im_all_cloud)
# print how many images there are
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
# loop trough images # loop trough images
timestamps = [] timestamps = []
acc_georef = [] acc_georef = []
filenames = []
all_names = [] all_names = []
im_epsg = [] im_epsg = []
for i in range(n_img): for i in range(n_img):
# find each image in ee database # find each image in ee database
im = ee.Image(im_all[i].get('id')) im = ee.Image(im_all_cloud[i].get('id'))
# read metadata # read metadata
im_dic = im.getInfo() im_dic = im.getInfo()
# get bands # get bands
@ -136,32 +170,38 @@ def get_images(sitename,polygon,dates,sat):
except: except:
# default value of accuracy (RMSE = 12m) # default value of accuracy (RMSE = 12m)
acc_georef.append(12) acc_georef.append(12)
print('No geometric rmse model property')
# delete dimensions key from dictionnary, otherwise the entire image is extracted # delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions'] for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for L5 # bands for L5
ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[7]] ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[7]]
# filenames for the images # filenames for the images
filename = im_date + '_' + satname + '_' + sitename + suffix filename = im_date + '_' + satname + '_' + sitename + suffix
# if two images taken at the same date add 'dup' in the name # if two images taken at the same date add 'dup' in the name (duplicate)
if any(filename in _ for _ in all_names): if any(filename in _ for _ in all_names):
filename = im_date + '_' + satname + '_' + sitename + '_dup' + suffix filename = im_date + '_' + satname + '_' + sitename + '_dup' + suffix
all_names.append(filename) all_names.append(filename)
filenames.append(filename)
# download .TIF image # download .TIF image
local_data = download_tif(im, polygon, ms_bands, filepath) local_data = download_tif(im, polygon, ms_bands, filepath)
# update filename # update filename
os.rename(local_data, os.path.join(filepath, filename)) try:
print(i, end='..') os.rename(local_data, os.path.join(filepath, filename))
except:
os.remove(os.path.join(filepath, filename))
os.rename(local_data, os.path.join(filepath, filename))
print(i+1, end='..')
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory) # sort timestamps and georef accuracy (downloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps) timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__) idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted] acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
filenames_sorted = [filenames[j] for j in idx_sorted]
im_epsg_sorted = [im_epsg[j] for j in idx_sorted] im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
# save into dict # save into dict
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
'epsg':im_epsg_sorted} 'epsg':im_epsg_sorted, 'filenames':filenames_sorted}
print('Finished with ' + satname) print('\nFinished with ' + satname)
@ -169,7 +209,7 @@ def get_images(sitename,polygon,dates,sat):
# download L7 images # download L7 images
#=============================================================================================# #=============================================================================================#
if 'L7' in sat or 'Landsat7' in sat: if 'L7' in sat_list or 'Landsat7' in sat_list:
satname = 'L7' satname = 'L7'
# create subfolders (one for 30m multispectral bands and one for 15m pan bands) # create subfolders (one for 30m multispectral bands and one for 15m pan bands)
@ -188,19 +228,27 @@ def get_images(sitename,polygon,dates,sat):
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1]) flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
# get all images in the filtered collection # get all images in the filtered collection
im_all = flt_col.getInfo().get('features') im_all = flt_col.getInfo().get('features')
# print how many images there are for the user # remove very cloudy images (>95% cloud)
n_img = flt_col.size().getInfo() cloud_cover = [_['properties']['CLOUD_COVER'] for _ in im_all]
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img) if np.any([_ > 95 for _ in cloud_cover]):
idx_delete = np.where([_ > 95 for _ in cloud_cover])[0]
im_all_cloud = [x for k,x in enumerate(im_all) if k not in idx_delete]
else:
im_all_cloud = im_all
n_img = len(im_all_cloud)
# print how many images there are
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
# loop trough images # loop trough images
timestamps = [] timestamps = []
acc_georef = [] acc_georef = []
filenames = []
all_names = [] all_names = []
im_epsg = [] im_epsg = []
for i in range(n_img): for i in range(n_img):
# find each image in ee database # find each image in ee database
im = ee.Image(im_all[i].get('id')) im = ee.Image(im_all_cloud[i].get('id'))
# read metadata # read metadata
im_dic = im.getInfo() im_dic = im.getInfo()
# get bands # get bands
@ -219,7 +267,6 @@ def get_images(sitename,polygon,dates,sat):
except: except:
# default value of accuracy (RMSE = 12m) # default value of accuracy (RMSE = 12m)
acc_georef.append(12) acc_georef.append(12)
print('No geometric rmse model property')
# delete dimensions key from dictionnary, otherwise the entire image is extracted # delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions'] for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for L7 # bands for L7
@ -232,31 +279,42 @@ def get_images(sitename,polygon,dates,sat):
if any(filename_pan in _ for _ in all_names): if any(filename_pan in _ for _ in all_names):
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix
all_names.append(filename_pan) all_names.append(filename_pan)
filenames.append(filename_pan)
# download .TIF image # download .TIF image
local_data_pan = download_tif(im, polygon, pan_band, filepath_pan) local_data_pan = download_tif(im, polygon, pan_band, filepath_pan)
local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms) local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms)
# update filename # update filename
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan)) try:
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms)) os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
print(i, end='..') except:
os.remove(os.path.join(filepath_pan, filename_pan))
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
try:
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
except:
os.remove(os.path.join(filepath_ms, filename_ms))
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
print(i+1, end='..')
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory) # sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps) timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__) idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted] acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
filenames_sorted = [filenames[j] for j in idx_sorted]
im_epsg_sorted = [im_epsg[j] for j in idx_sorted] im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
# save into dict # save into dict
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
'epsg':im_epsg_sorted} 'epsg':im_epsg_sorted, 'filenames':filenames_sorted}
print('Finished with ' + satname) print('\nFinished with ' + satname)
#=============================================================================================# #=============================================================================================#
# download L8 images # download L8 images
#=============================================================================================# #=============================================================================================#
if 'L8' in sat or 'Landsat8' in sat: if 'L8' in sat_list or 'Landsat8' in sat_list:
satname = 'L8' satname = 'L8'
# create subfolders (one for 30m multispectral bands and one for 15m pan bands) # create subfolders (one for 30m multispectral bands and one for 15m pan bands)
@ -275,19 +333,27 @@ def get_images(sitename,polygon,dates,sat):
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1]) flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
# get all images in the filtered collection # get all images in the filtered collection
im_all = flt_col.getInfo().get('features') im_all = flt_col.getInfo().get('features')
# print how many images there are for the user # remove very cloudy images (>95% cloud)
n_img = flt_col.size().getInfo() cloud_cover = [_['properties']['CLOUD_COVER'] for _ in im_all]
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img) if np.any([_ > 95 for _ in cloud_cover]):
idx_delete = np.where([_ > 95 for _ in cloud_cover])[0]
im_all_cloud = [x for k,x in enumerate(im_all) if k not in idx_delete]
else:
im_all_cloud = im_all
n_img = len(im_all_cloud)
# print how many images there are
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
# loop trough images # loop trough images
timestamps = [] timestamps = []
acc_georef = [] acc_georef = []
filenames = []
all_names = [] all_names = []
im_epsg = [] im_epsg = []
for i in range(n_img): for i in range(n_img):
# find each image in ee database # find each image in ee database
im = ee.Image(im_all[i].get('id')) im = ee.Image(im_all_cloud[i].get('id'))
# read metadata # read metadata
im_dic = im.getInfo() im_dic = im.getInfo()
# get bands # get bands
@ -306,7 +372,6 @@ def get_images(sitename,polygon,dates,sat):
except: except:
# default value of accuracy (RMSE = 12m) # default value of accuracy (RMSE = 12m)
acc_georef.append(12) acc_georef.append(12)
print('No geometric rmse model property')
# delete dimensions key from dictionnary, otherwise the entire image is extracted # delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions'] for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for L8 # bands for L8
@ -319,30 +384,41 @@ def get_images(sitename,polygon,dates,sat):
if any(filename_pan in _ for _ in all_names): if any(filename_pan in _ for _ in all_names):
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix
all_names.append(filename_pan) all_names.append(filename_pan)
filenames.append(filename_pan)
# download .TIF image # download .TIF image
local_data_pan = download_tif(im, polygon, pan_band, filepath_pan) local_data_pan = download_tif(im, polygon, pan_band, filepath_pan)
local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms) local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms)
# update filename # update filename
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan)) try:
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms)) os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
print(i, end='..') except:
os.remove(os.path.join(filepath_pan, filename_pan))
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
try:
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
except:
os.remove(os.path.join(filepath_ms, filename_ms))
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
print(i+1, end='..')
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory) # sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps) timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__) idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted] acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
filenames_sorted = [filenames[j] for j in idx_sorted]
im_epsg_sorted = [im_epsg[j] for j in idx_sorted] im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
'epsg':im_epsg_sorted} 'epsg':im_epsg_sorted, 'filenames':filenames_sorted}
print('Finished with ' + satname) print('\nFinished with ' + satname)
#=============================================================================================# #=============================================================================================#
# download S2 images # download S2 images
#=============================================================================================# #=============================================================================================#
if 'S2' in sat or 'Sentinel2' in sat: if 'S2' in sat_list or 'Sentinel2' in sat_list:
satname = 'S2' satname = 'S2'
# create subfolders for the 10m, 20m and 60m multipectral bands # create subfolders for the 10m, 20m and 60m multipectral bands
@ -359,20 +435,60 @@ def get_images(sitename,polygon,dates,sat):
# filter by location and dates # filter by location and dates
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1]) flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
# get all images in the filtered collection # get all images in the filtered collection
im_all = flt_col.getInfo().get('features') im_all = flt_col.getInfo().get('features')
# remove duplicates in the collection (there are many in S2 collection)
timestamps = [datetime.fromtimestamp(_['properties']['system:time_start']/1000,
tz=pytz.utc) for _ in im_all]
# utm zone projection
utm_zones = np.array([int(_['bands'][0]['crs'][5:]) for _ in im_all])
utm_zone_selected = np.max(np.unique(utm_zones))
# find the images that were acquired at the same time but have different utm zones
idx_all = np.arange(0,len(im_all),1)
idx_covered = np.ones(len(im_all)).astype(bool)
idx_delete = []
i = 0
while 1:
same_time = np.abs([(timestamps[i]-_).total_seconds() for _ in timestamps]) < 60*60*24
idx_same_time = np.where(same_time)[0]
same_utm = utm_zones == utm_zone_selected
idx_temp = np.where([same_time[j] == True and same_utm[j] == False for j in idx_all])[0]
idx_keep = idx_same_time[[_ not in idx_temp for _ in idx_same_time ]]
# if more than 2 images with same date and same utm, drop the last ones
if len(idx_keep) > 2:
idx_temp = np.append(idx_temp,idx_keep[-(len(idx_keep)-2):])
for j in idx_temp:
idx_delete.append(j)
idx_covered[idx_same_time] = False
if np.any(idx_covered):
i = np.where(idx_covered)[0][0]
else:
break
# update the collection by deleting all those images that have same timestamp and different
# utm projection
im_all_updated = [x for k,x in enumerate(im_all) if k not in idx_delete]
# remove very cloudy images (>95% cloud)
cloud_cover = [_['properties']['CLOUDY_PIXEL_PERCENTAGE'] for _ in im_all_updated]
if np.any([_ > 95 for _ in cloud_cover]):
idx_delete = np.where([_ > 95 for _ in cloud_cover])[0]
im_all_cloud = [x for k,x in enumerate(im_all_updated) if k not in idx_delete]
else:
im_all_cloud = im_all_updated
n_img = len(im_all_cloud)
# print how many images there are # print how many images there are
n_img = flt_col.size().getInfo()
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img) print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
# loop trough images # loop trough images
timestamps = [] timestamps = []
acc_georef = [] acc_georef = []
filenames = []
all_names = [] all_names = []
im_epsg = [] im_epsg = []
for i in range(n_img): for i in range(n_img):
# find each image in ee database # find each image in ee database
im = ee.Image(im_all[i].get('id')) im = ee.Image(im_all_cloud[i].get('id'))
# read metadata # read metadata
im_dic = im.getInfo() im_dic = im.getInfo()
# get bands # get bands
@ -394,39 +510,290 @@ def get_images(sitename,polygon,dates,sat):
filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + suffix filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + suffix
# if two images taken at the same date skip the second image (they are the same) # if two images taken at the same date skip the second image (they are the same)
if any(filename10 in _ for _ in all_names): if any(filename10 in _ for _ in all_names):
continue filename10 = filename10[:filename10.find('.')] + '_dup' + suffix
filename20 = filename20[:filename20.find('.')] + '_dup' + suffix
filename60 = filename60[:filename60.find('.')] + '_dup' + suffix
all_names.append(filename10) all_names.append(filename10)
filenames.append(filename10)
# download .TIF image and update filename # download .TIF image and update filename
local_data = download_tif(im, polygon, bands10, os.path.join(filepath, '10m')) local_data = download_tif(im, polygon, bands10, os.path.join(filepath, '10m'))
os.rename(local_data, os.path.join(filepath, '10m', filename10)) try:
os.rename(local_data, os.path.join(filepath, '10m', filename10))
except:
os.remove(os.path.join(filepath, '10m', filename10))
os.rename(local_data, os.path.join(filepath, '10m', filename10))
local_data = download_tif(im, polygon, bands20, os.path.join(filepath, '20m')) local_data = download_tif(im, polygon, bands20, os.path.join(filepath, '20m'))
os.rename(local_data, os.path.join(filepath, '20m', filename20)) try:
os.rename(local_data, os.path.join(filepath, '20m', filename20))
except:
os.remove(os.path.join(filepath, '20m', filename20))
os.rename(local_data, os.path.join(filepath, '20m', filename20))
local_data = download_tif(im, polygon, bands60, os.path.join(filepath, '60m')) local_data = download_tif(im, polygon, bands60, os.path.join(filepath, '60m'))
os.rename(local_data, os.path.join(filepath, '60m', filename60)) try:
os.rename(local_data, os.path.join(filepath, '60m', filename60))
except:
os.remove(os.path.join(filepath, '60m', filename60))
os.rename(local_data, os.path.join(filepath, '60m', filename60))
# save timestamp, epsg code and georeferencing accuracy (1 if passed 0 if not passed) # save timestamp, epsg code and georeferencing accuracy (1 if passed 0 if not passed)
timestamps.append(im_timestamp) timestamps.append(im_timestamp)
im_epsg.append(int(im_dic['bands'][0]['crs'][5:])) im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
# Sentinel-2 products don't provide a georeferencing accuracy (RMSE as in Landsat)
# but they have a flag indicating if the geometric quality control was passed or failed
# if passed a value of 1 is stored if faile a value of -1 is stored in the metadata
try: try:
if im_dic['properties']['GEOMETRIC_QUALITY_FLAG'] == 'PASSED': if im_dic['properties']['GEOMETRIC_QUALITY_FLAG'] == 'PASSED':
acc_georef.append(1) acc_georef.append(1)
else: else:
acc_georef.append(0) acc_georef.append(-1)
except: except:
acc_georef.append(0) acc_georef.append(-1)
print(i, end='..') print(i+1, end='..')
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory) # sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps) timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__) idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted] acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
filenames_sorted = [filenames[j] for j in idx_sorted]
im_epsg_sorted = [im_epsg[j] for j in idx_sorted] im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
'epsg':im_epsg_sorted} 'epsg':im_epsg_sorted, 'filenames':filenames_sorted}
print('Finished with ' + satname) print('\nFinished with ' + satname)
# merge overlapping images (only if polygon is at the edge of an image)
if 'S2' in metadata.keys():
metadata = merge_overlapping_images(metadata,inputs)
# save metadata dict # save metadata dict
filepath = os.path.join(os.getcwd(), 'data', sitename) filepath = os.path.join(os.getcwd(), 'data', sitename)
with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'wb') as f: with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'wb') as f:
pickle.dump(metadata, f) pickle.dump(metadata, f)
return metadata
def merge_overlapping_images(metadata,inputs):
"""
When the area of interest is located at the boundary between 2 images, there will be overlap
between the 2 images and both will be downloaded from Google Earth Engine. This function
merges the 2 images, so that the area of interest is covered by only 1 image.
KV WRL 2018
Arguments:
-----------
metadata: dict
contains all the information about the satellite images that were downloaded
inputs: dict
dictionnary that contains the following fields:
'sitename': str
String containig the name of the site
'polygon': list
polygon containing the lon/lat coordinates to be extracted
longitudes in the first column and latitudes in the second column
'dates': list of str
list that contains 2 strings with the initial and final dates in format 'yyyy-mm-dd'
e.g. ['1987-01-01', '2018-01-01']
'sat_list': list of str
list that contains the names of the satellite missions to include
e.g. ['L5', 'L7', 'L8', 'S2']
Returns:
-----------
metadata: dict
updated metadata with the information of the merged images
"""
# only for Sentinel-2 at this stage (could be implemented for Landsat as well)
sat = 'S2'
filepath = os.path.join(os.getcwd(), 'data', inputs['sitename'])
# find the images that are overlapping (same date in S2 filenames)
filenames = metadata[sat]['filenames']
filenames_copy = filenames.copy()
# loop through all the filenames and find the pairs of overlapping images (same date and time of acquisition)
pairs = []
for i,fn in enumerate(filenames):
filenames_copy[i] = []
# find duplicate
boolvec = [fn[:22] == _[:22] for _ in filenames_copy]
if np.any(boolvec):
idx_dup = np.where(boolvec)[0][0]
if len(filenames[i]) > len(filenames[idx_dup]):
pairs.append([idx_dup,i])
else:
pairs.append([i,idx_dup])
msg = 'Merging %d pairs of overlapping images...' % len(pairs)
print(msg)
# for each pair of images, merge them into one complete image
for i,pair in enumerate(pairs):
print(i+1, end='..')
fn_im = []
for index in range(len(pair)):
# read image
fn_im.append([os.path.join(filepath, 'S2', '10m', filenames[pair[index]]),
os.path.join(filepath, 'S2', '20m', filenames[pair[index]].replace('10m','20m')),
os.path.join(filepath, 'S2', '60m', filenames[pair[index]].replace('10m','60m'))])
im_ms, georef, cloud_mask, im_extra, imQA = SDS_preprocess.preprocess_single(fn_im[index], sat)
# in Sentinel2 images close to the edge of the image there are some artefacts,
# that are squares with constant pixel intensities. They need to be masked in the
# raster (GEOTIFF). It can be done using the image standard deviation, which
# indicates values close to 0 for the artefacts.
# First mask the 10m bands
if len(im_ms) > 0:
im_std = SDS_tools.image_std(im_ms[:,:,0],1)
im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std))
mask = morphology.dilation(im_binary, morphology.square(3))
for k in range(im_ms.shape[2]):
im_ms[mask,k] = np.nan
SDS_tools.mask_raster(fn_im[index][0], mask)
# Then mask the 20m band
im_std = SDS_tools.image_std(im_extra,1)
im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std))
mask = morphology.dilation(im_binary, morphology.square(3))
im_extra[mask] = np.nan
SDS_tools.mask_raster(fn_im[index][1], mask)
else:
continue
# make a figure for quality control
# plt.figure()
# plt.subplot(221)
# plt.imshow(im_ms[:,:,[2,1,0]])
# plt.title('imRGB')
# plt.subplot(222)
# plt.imshow(im20, cmap='gray')
# plt.title('im20')
# plt.subplot(223)
# plt.imshow(imQA, cmap='gray')
# plt.title('imQA')
# plt.subplot(224)
# plt.title(fn_im[index][0][-30:])
# merge masked 10m bands
fn_merged = os.path.join(os.getcwd(), 'merged.tif')
gdal_merge.main(['', '-o', fn_merged, '-n', '0', fn_im[0][0], fn_im[1][0]])
os.chmod(fn_im[0][0], 0o777)
os.remove(fn_im[0][0])
os.chmod(fn_im[1][0], 0o777)
os.remove(fn_im[1][0])
os.rename(fn_merged, fn_im[0][0])
# merge masked 20m band (SWIR band)
fn_merged = os.path.join(os.getcwd(), 'merged.tif')
gdal_merge.main(['', '-o', fn_merged, '-n', '0', fn_im[0][1], fn_im[1][1]])
os.chmod(fn_im[0][1], 0o777)
os.remove(fn_im[0][1])
os.chmod(fn_im[1][1], 0o777)
os.remove(fn_im[1][1])
os.rename(fn_merged, fn_im[0][1])
# merge QA band (60m band)
fn_merged = os.path.join(os.getcwd(), 'merged.tif')
gdal_merge.main(['', '-o', fn_merged, '-n', 'nan', fn_im[0][2], fn_im[1][2]])
os.chmod(fn_im[0][2], 0o777)
os.remove(fn_im[0][2])
os.chmod(fn_im[1][2], 0o777)
os.remove(fn_im[1][2])
os.rename(fn_merged, fn_im[0][2])
# update the metadata dict (delete all the duplicates)
metadata2 = copy.deepcopy(metadata)
filenames_copy = metadata2[sat]['filenames']
index_list = []
for i in range(len(filenames_copy)):
if filenames_copy[i].find('dup') == -1:
index_list.append(i)
for key in metadata2[sat].keys():
metadata2[sat][key] = [metadata2[sat][key][_] for _ in index_list]
return metadata2
def remove_cloudy_images(metadata,inputs,cloud_thresh):
"""
Deletes the .TIF file of images that have a cloud cover percentage that is above the cloud
threshold.
KV WRL 2018
Arguments:
-----------
metadata: dict
contains all the information about the satellite images that were downloaded
inputs: dict
dictionnary that contains the following fields:
'sitename': str
String containig the name of the site
'polygon': list
polygon containing the lon/lat coordinates to be extracted
longitudes in the first column and latitudes in the second column
'dates': list of str
list that contains 2 strings with the initial and final dates in format 'yyyy-mm-dd'
e.g. ['1987-01-01', '2018-01-01']
'sat_list': list of str
list that contains the names of the satellite missions to include
e.g. ['L5', 'L7', 'L8', 'S2']
cloud_thresh: float
value between 0 and 1 indicating the maximum cloud fraction in the image that is accepted
Returns:
-----------
metadata: dict
updated metadata with the information of the merged images
"""
# create a deep copy
metadata2 = copy.deepcopy(metadata)
for satname in metadata.keys():
# get the image filenames
filepath = SDS_tools.get_filepath(inputs,satname)
filenames = metadata[satname]['filenames']
# loop through images
idx_good = []
for i in range(len(filenames)):
# image filename
fn = SDS_tools.get_filenames(filenames[i],filepath, satname)
# preprocess image (cloud mask + pansharpening/downsampling)
im_ms, georef, cloud_mask, im_extra, imQA = SDS_preprocess.preprocess_single(fn, satname)
# calculate cloud cover
cloud_cover = np.divide(sum(sum(cloud_mask.astype(int))),
(cloud_mask.shape[0]*cloud_mask.shape[1]))
# skip image if cloud cover is above threshold
if cloud_cover > cloud_thresh or cloud_cover == 1:
# remove image files
if satname == 'L5':
os.chmod(fn, 0o777)
os.remove(fn)
else:
for j in range(len(fn)):
os.chmod(fn[j], 0o777)
os.remove(fn[j])
else:
idx_good.append(i)
msg = '\n%d cloudy images were removed for %s.' % (len(filenames)-len(idx_good), satname)
print(msg)
# update the metadata dict (delete all cloudy images)
for key in metadata2[satname].keys():
metadata2[satname][key] = [metadata2[satname][key][_] for _ in idx_good]
return metadata2

@ -1,28 +1,36 @@
"""This module contains all the functions needed to preprocess the satellite images: creating a """This module contains all the functions needed to preprocess the satellite images before the
cloud mask and pansharpening/downsampling the images. shoreline can be extracted. This includes creating a cloud mask and
pansharpening/downsampling the multispectral bands.
Author: Kilian Vos, Water Research Laboratory, University of New South Wales Author: Kilian Vos, Water Research Laboratory, University of New South Wales
""" """
# Initial settings # load modules
import os import os
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from osgeo import gdal, ogr, osr import pdb
# image processing modules
import skimage.transform as transform import skimage.transform as transform
import skimage.morphology as morphology import skimage.morphology as morphology
import sklearn.decomposition as decomposition import sklearn.decomposition as decomposition
import skimage.exposure as exposure import skimage.exposure as exposure
# other modules
from osgeo import gdal, ogr, osr
from pylab import ginput from pylab import ginput
import pickle import pickle
import pdb import matplotlib.path as mpltPath
# own modules
import SDS_tools import SDS_tools
# Functions np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
def create_cloud_mask(im_qa, satname): def create_cloud_mask(im_qa, satname):
""" """
Creates a cloud mask from the image containing the QA band information. Creates a cloud mask using the information contained in the QA band.
KV WRL 2018 KV WRL 2018
@ -31,15 +39,15 @@ def create_cloud_mask(im_qa, satname):
im_qa: np.array im_qa: np.array
Image containing the QA band Image containing the QA band
satname: string satname: string
short name for the satellite (L8, L7, S2) short name for the satellite (L5, L7, L8 or S2)
Returns: Returns:
----------- -----------
cloud_mask : np.ndarray of booleans cloud_mask : np.array
A boolean array with True where the cloud are present A boolean array with True if a pixel is cloudy and False otherwise
""" """
# convert QA bits depending on the satellite mission # convert QA bits (the bits allocated to cloud cover vary depending on the satellite mission)
if satname == 'L8': if satname == 'L8':
cloud_values = [2800, 2804, 2808, 2812, 6896, 6900, 6904, 6908] cloud_values = [2800, 2804, 2808, 2812, 6896, 6900, 6904, 6908]
elif satname == 'L7' or satname == 'L5' or satname == 'L4': elif satname == 'L7' or satname == 'L5' or satname == 'L4':
@ -50,7 +58,7 @@ def create_cloud_mask(im_qa, satname):
# find which pixels have bits corresponding to cloud values # find which pixels have bits corresponding to cloud values
cloud_mask = np.isin(im_qa, cloud_values) cloud_mask = np.isin(im_qa, cloud_values)
# remove isolated cloud pixels (there are some in the swash zone and they can cause problems) # remove isolated cloud pixels (there are some in the swash zone and they are not clouds)
if sum(sum(cloud_mask)) > 0 and sum(sum(~cloud_mask)) > 0: if sum(sum(cloud_mask)) > 0 and sum(sum(~cloud_mask)) > 0:
morphology.remove_small_objects(cloud_mask, min_size=10, connectivity=1, in_place=True) morphology.remove_small_objects(cloud_mask, min_size=10, connectivity=1, in_place=True)
@ -100,8 +108,10 @@ def hist_match(source, template):
def pansharpen(im_ms, im_pan, cloud_mask): def pansharpen(im_ms, im_pan, cloud_mask):
""" """
Pansharpens a multispectral image (3D), using the panchromatic band (2D) and a cloud mask. Pansharpens a multispectral image, using the panchromatic band and a cloud mask.
A PCA is applied to the image, then the 1st PC is replaced with the panchromatic band. A PCA is applied to the image, then the 1st PC is replaced with the panchromatic band.
Note that it is essential to match the histrograms of the 1st PC and the panchromatic band
before replacing and inverting the PCA.
KV WRL 2018 KV WRL 2018
@ -117,14 +127,14 @@ def pansharpen(im_ms, im_pan, cloud_mask):
Returns: Returns:
----------- -----------
im_ms_ps: np.ndarray im_ms_ps: np.ndarray
Pansharpened multisoectral image (3D) Pansharpened multispectral image (3D)
""" """
# reshape image into vector and apply cloud mask # reshape image into vector and apply cloud mask
vec = im_ms.reshape(im_ms.shape[0] * im_ms.shape[1], im_ms.shape[2]) vec = im_ms.reshape(im_ms.shape[0] * im_ms.shape[1], im_ms.shape[2])
vec_mask = cloud_mask.reshape(im_ms.shape[0] * im_ms.shape[1]) vec_mask = cloud_mask.reshape(im_ms.shape[0] * im_ms.shape[1])
vec = vec[~vec_mask, :] vec = vec[~vec_mask, :]
# apply PCA to RGB bands # apply PCA to multispectral bands
pca = decomposition.PCA() pca = decomposition.PCA()
vec_pcs = pca.fit_transform(vec) vec_pcs = pca.fit_transform(vec)
@ -146,7 +156,7 @@ def rescale_image_intensity(im, cloud_mask, prob_high):
""" """
Rescales the intensity of an image (multispectral or single band) by applying Rescales the intensity of an image (multispectral or single band) by applying
a cloud mask and clipping the prob_high upper percentile. This functions allows a cloud mask and clipping the prob_high upper percentile. This functions allows
to stretch the contrast of an image for visualisation purposes. to stretch the contrast of an image, only for visualisation purposes.
KV WRL 2018 KV WRL 2018
@ -201,7 +211,10 @@ def rescale_image_intensity(im, cloud_mask, prob_high):
def preprocess_single(fn, satname): def preprocess_single(fn, satname):
""" """
Creates a cloud mask using the QA band and performs pansharpening/down-sampling of the image. Reads the image and outputs the pansharpened/down-sampled multispectral bands, the
georeferencing vector of the image (coordinates of the upper left pixel), the cloud mask and
the QA band. For Landsat 7-8 it also outputs the panchromatic band and for Sentinel-2 it also
outputs the 20m SWIR band.
KV WRL 2018 KV WRL 2018
@ -209,7 +222,8 @@ def preprocess_single(fn, satname):
----------- -----------
fn: str or list of str fn: str or list of str
filename of the .TIF file containing the image filename of the .TIF file containing the image
for L7, L8 and S2 there is a filename for the bands at different resolutions for L7, L8 and S2 this is a list of filenames, one filename for each band at different
resolution (30m and 15m for Landsat 7-8, 10m, 20m, 60m for Sentinel-2)
satname: str satname: str
name of the satellite mission (e.g., 'L5') name of the satellite mission (e.g., 'L5')
@ -222,6 +236,11 @@ def preprocess_single(fn, satname):
coordinates of the top-left pixel of the image coordinates of the top-left pixel of the image
cloud_mask: np.array cloud_mask: np.array
2D cloud mask with True where cloud pixels are 2D cloud mask with True where cloud pixels are
im_extra : np.array
2D array containing the 20m resolution SWIR band for Sentinel-2 and the 15m resolution
panchromatic band for Landsat 7 and Landsat 8. This field is empty for Landsat 5.
imQA: np.array
2D array containing the QA band, from which the cloud_mask can be computed.
""" """
@ -267,6 +286,9 @@ def preprocess_single(fn, satname):
# calculate cloud cover # calculate cloud cover
cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1]) cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
# no extra image for Landsat 5 (they are all 30 m bands)
im_extra = []
imQA = im_qa
#=============================================================================================# #=============================================================================================#
# L7 images # L7 images
@ -324,6 +346,9 @@ def preprocess_single(fn, satname):
im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[4]], axis=2) im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[4]], axis=2)
im_ms = im_ms_ps.copy() im_ms = im_ms_ps.copy()
# the extra image is the 15m panchromatic band
im_extra = im_pan
imQA = im_qa
#=============================================================================================# #=============================================================================================#
# L8 images # L8 images
@ -380,6 +405,9 @@ def preprocess_single(fn, satname):
im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2)
im_ms = im_ms_ps.copy() im_ms = im_ms_ps.copy()
# the extra image is the 15m panchromatic band
im_extra = im_pan
imQA = im_qa
#=============================================================================================# #=============================================================================================#
# S2 images # S2 images
@ -400,7 +428,7 @@ def preprocess_single(fn, satname):
georef = [] georef = []
# skip the image by giving it a full cloud_mask # skip the image by giving it a full cloud_mask
cloud_mask = np.ones((im10.shape[0],im10.shape[1])).astype('bool') cloud_mask = np.ones((im10.shape[0],im10.shape[1])).astype('bool')
return im_ms, georef, cloud_mask return im_ms, georef, cloud_mask, [], []
# size of 10m bands # size of 10m bands
nrows = im10.shape[0] nrows = im10.shape[0]
@ -427,8 +455,8 @@ def preprocess_single(fn, satname):
data = gdal.Open(fn60, gdal.GA_ReadOnly) data = gdal.Open(fn60, gdal.GA_ReadOnly)
bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)] bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
im60 = np.stack(bands, 2) im60 = np.stack(bands, 2)
im_qa = im60[:,:,0] imQA = im60[:,:,0]
cloud_mask = create_cloud_mask(im_qa, satname) cloud_mask = create_cloud_mask(imQA, satname)
# resize the cloud mask using nearest neighbour interpolation (order 0) # resize the cloud mask using nearest neighbour interpolation (order 0)
cloud_mask = transform.resize(cloud_mask,(nrows, ncols), order=0, preserve_range=True, cloud_mask = transform.resize(cloud_mask,(nrows, ncols), order=0, preserve_range=True,
mode='constant') mode='constant')
@ -440,8 +468,10 @@ def preprocess_single(fn, satname):
# calculate cloud cover # calculate cloud cover
cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1]) cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
# the extra image is the 20m SWIR band
im_extra = im20
return im_ms, georef, cloud_mask return im_ms, georef, cloud_mask, im_extra, imQA
def create_jpg(im_ms, cloud_mask, date, satname, filepath): def create_jpg(im_ms, cloud_mask, date, satname, filepath):
@ -476,21 +506,32 @@ def create_jpg(im_ms, cloud_mask, date, satname, filepath):
fig = plt.figure() fig = plt.figure()
fig.set_size_inches([18,9]) fig.set_size_inches([18,9])
fig.set_tight_layout(True) fig.set_tight_layout(True)
# RGB ax1 = fig.add_subplot(111)
plt.subplot(131) ax1.axis('off')
plt.axis('off') ax1.imshow(im_RGB)
plt.imshow(im_RGB) ax1.set_title(date + ' ' + satname, fontsize=16)
plt.title(date + ' ' + satname, fontsize=16)
# NIR # if im_RGB.shape[1] > 2*im_RGB.shape[0]:
plt.subplot(132) # ax1 = fig.add_subplot(311)
plt.axis('off') # ax2 = fig.add_subplot(312)
plt.imshow(im_NIR, cmap='seismic') # ax3 = fig.add_subplot(313)
plt.title('Near Infrared', fontsize=16) # else:
# SWIR # ax1 = fig.add_subplot(131)
plt.subplot(133) # ax2 = fig.add_subplot(132)
plt.axis('off') # ax3 = fig.add_subplot(133)
plt.imshow(im_SWIR, cmap='seismic') # # RGB
plt.title('Short-wave Infrared', fontsize=16) # ax1.axis('off')
# ax1.imshow(im_RGB)
# ax1.set_title(date + ' ' + satname, fontsize=16)
# # NIR
# ax2.axis('off')
# ax2.imshow(im_NIR, cmap='seismic')
# ax2.set_title('Near Infrared', fontsize=16)
# # SWIR
# ax3.axis('off')
# ax3.imshow(im_SWIR, cmap='seismic')
# ax3.set_title('Short-wave Infrared', fontsize=16)
# save figure # save figure
plt.rcParams['savefig.jpeg_quality'] = 100 plt.rcParams['savefig.jpeg_quality'] = 100
fig.savefig(os.path.join(filepath, fig.savefig(os.path.join(filepath,
@ -498,28 +539,29 @@ def create_jpg(im_ms, cloud_mask, date, satname, filepath):
plt.close() plt.close()
def preprocess_all_images(metadata, settings): def save_jpg(metadata, settings):
""" """
Saves a .jpg image for all the file contained in metadata. Saves a .jpg image for all the images contained in metadata.
KV WRL 2018 KV WRL 2018
Arguments: Arguments:
----------- -----------
sitename: str
name of the site (and corresponding folder)
metadata: dict metadata: dict
contains all the information about the satellite images that were downloaded contains all the information about the satellite images that were downloaded
cloud_thresh: float settings: dict
maximum fraction of cloud cover allowed in the images contains the following fields:
'cloud_thresh': float
value between 0 and 1 indicating the maximum cloud fraction in the image that is accepted
'sitename': string
name of the site (also name of the folder where the images are stored)
Returns: Returns:
----------- -----------
Generates .jpg files for all the satellite images avaialble
""" """
sitename = settings['sitename'] sitename = settings['inputs']['sitename']
cloud_thresh = settings['cloud_thresh'] cloud_thresh = settings['cloud_thresh']
# create subfolder to store the jpg files # create subfolder to store the jpg files
@ -531,65 +573,57 @@ def preprocess_all_images(metadata, settings):
# loop through satellite list # loop through satellite list
for satname in metadata.keys(): for satname in metadata.keys():
# access the images
if satname == 'L5': filepath = SDS_tools.get_filepath(settings['inputs'],satname)
# access downloaded Landsat 5 images filenames = metadata[satname]['filenames']
filepath = os.path.join(os.getcwd(), 'data', sitename, satname, '30m')
filenames = os.listdir(filepath)
elif satname == 'L7':
# access downloaded Landsat 7 images
filepath_pan = os.path.join(os.getcwd(), 'data', sitename, 'L7', 'pan')
filepath_ms = os.path.join(os.getcwd(), 'data', sitename, 'L7', 'ms')
filenames_pan = os.listdir(filepath_pan)
filenames_ms = os.listdir(filepath_ms)
if (not len(filenames_pan) == len(filenames_ms)):
raise 'error: not the same amount of files for pan and ms'
filepath = [filepath_pan, filepath_ms]
filenames = filenames_pan
elif satname == 'L8':
# access downloaded Landsat 7 images
filepath_pan = os.path.join(os.getcwd(), 'data', sitename, 'L8', 'pan')
filepath_ms = os.path.join(os.getcwd(), 'data', sitename, 'L8', 'ms')
filenames_pan = os.listdir(filepath_pan)
filenames_ms = os.listdir(filepath_ms)
if (not len(filenames_pan) == len(filenames_ms)):
raise 'error: not the same amount of files for pan and ms'
filepath = [filepath_pan, filepath_ms]
filenames = filenames_pan
elif satname == 'S2':
# access downloaded Sentinel 2 images
filepath10 = os.path.join(os.getcwd(), 'data', sitename, satname, '10m')
filenames10 = os.listdir(filepath10)
filepath20 = os.path.join(os.getcwd(), 'data', sitename, satname, '20m')
filenames20 = os.listdir(filepath20)
filepath60 = os.path.join(os.getcwd(), 'data', sitename, satname, '60m')
filenames60 = os.listdir(filepath60)
if (not len(filenames10) == len(filenames20)) or (not len(filenames20) == len(filenames60)):
raise 'error: not the same amount of files for 10, 20 and 60 m'
filepath = [filepath10, filepath20, filepath60]
filenames = filenames10
# loop through images # loop through images
for i in range(len(filenames)): for i in range(len(filenames)):
# image filename # image filename
fn = SDS_tools.get_filenames(filenames[i],filepath, satname) fn = SDS_tools.get_filenames(filenames[i],filepath, satname)
# preprocess image (cloud mask + pansharpening/downsampling) # read and preprocess image
im_ms, georef, cloud_mask = preprocess_single(fn, satname) im_ms, georef, cloud_mask, im_extra, imQA = preprocess_single(fn, satname)
# calculate cloud cover # calculate cloud cover
cloud_cover = np.divide(sum(sum(cloud_mask.astype(int))), cloud_cover = np.divide(sum(sum(cloud_mask.astype(int))),
(cloud_mask.shape[0]*cloud_mask.shape[1])) (cloud_mask.shape[0]*cloud_mask.shape[1]))
# skip image if cloud cover is above threshold # skip image if cloud cover is above threshold
if cloud_cover > cloud_thresh: if cloud_cover > cloud_thresh or cloud_cover == 1:
continue continue
# save .jpg with date and satellite in the title # save .jpg with date and satellite in the title
date = filenames[i][:10] date = filenames[i][:10]
create_jpg(im_ms, cloud_mask, date, satname, filepath_jpg) create_jpg(im_ms, cloud_mask, date, satname, filepath_jpg)
def get_reference_sl(metadata, settings): def get_reference_sl_manual(metadata, settings):
"""
Allows the user to manually digitize a reference shoreline that is used seed the shoreline
detection algorithm. The reference shoreline helps to detect the outliers, making the shoreline
detection more robust.
KV WRL 2018
Arguments:
-----------
metadata: dict
contains all the information about the satellite images that were downloaded
settings: dict
contains the following fields:
'cloud_thresh': float
value between 0 and 1 indicating the maximum cloud fraction in the image that is accepted
'sitename': string
name of the site (also name of the folder where the images are stored)
'output_epsg': int
epsg code of the desired spatial reference system
Returns:
-----------
ref_sl: np.array
coordinates of the reference shoreline that was manually digitized
"""
sitename = settings['sitename'] sitename = settings['inputs']['sitename']
# check if reference shoreline already exists # check if reference shoreline already exists in the corresponding folder
filepath = os.path.join(os.getcwd(), 'data', sitename) filepath = os.path.join(os.getcwd(), 'data', sitename)
filename = sitename + '_ref_sl.pkl' filename = sitename + '_ref_sl.pkl'
if filename in os.listdir(filepath): if filename in os.listdir(filepath):
@ -599,23 +633,31 @@ def get_reference_sl(metadata, settings):
return refsl return refsl
else: else:
satname = 'S2' # first try to use S2 images (10m res for manually digitizing the reference shoreline)
# access downloaded Sentinel 2 images if 'S2' in metadata.keys():
filepath10 = os.path.join(os.getcwd(), 'data', sitename, satname, '10m') satname = 'S2'
filenames10 = os.listdir(filepath10) filepath = SDS_tools.get_filepath(settings['inputs'],satname)
filepath20 = os.path.join(os.getcwd(), 'data', sitename, satname, '20m') filenames = metadata[satname]['filenames']
filenames20 = os.listdir(filepath20) # if no S2 images, try L8 (15m res in the RGB with pansharpening)
filepath60 = os.path.join(os.getcwd(), 'data', sitename, satname, '60m') elif not 'S2' in metadata.keys() and 'L8' in metadata.keys():
filenames60 = os.listdir(filepath60) satname = 'L8'
if (not len(filenames10) == len(filenames20)) or (not len(filenames20) == len(filenames60)): filepath = SDS_tools.get_filepath(settings['inputs'],satname)
raise 'error: not the same amount of files for 10, 20 and 60 m' filenames = metadata[satname]['filenames']
for i in range(len(filenames10)): # if no S2 images and no L8, use L5 images (L7 images have black diagonal bands making it
# image filename # hard to manually digitize a shoreline)
fn = [os.path.join(filepath10, filenames10[i]), elif not 'S2' in metadata.keys() and not 'L8' in metadata.keys() and 'L5' in metadata.keys():
os.path.join(filepath20, filenames20[i]), satname = 'L5'
os.path.join(filepath60, filenames60[i])] filepath = SDS_tools.get_filepath(settings['inputs'],satname)
# preprocess image (cloud mask + pansharpening/downsampling) filenames = metadata[satname]['filenames']
im_ms, georef, cloud_mask = preprocess_single(fn, satname) else:
print('You cannot digitize the shoreline on L7 images, add another L8, S2 or L5 to your dataset.')
# loop trhough the images
for i in range(len(filenames)):
# read image
fn = SDS_tools.get_filenames(filenames[i],filepath, satname)
im_ms, georef, cloud_mask, im_extra, imQA = preprocess_single(fn, satname)
# calculate cloud cover # calculate cloud cover
cloud_cover = np.divide(sum(sum(cloud_mask.astype(int))), cloud_cover = np.divide(sum(sum(cloud_mask.astype(int))),
(cloud_mask.shape[0]*cloud_mask.shape[1])) (cloud_mask.shape[0]*cloud_mask.shape[1]))
@ -624,44 +666,110 @@ def get_reference_sl(metadata, settings):
continue continue
# rescale image intensity for display purposes # rescale image intensity for display purposes
im_RGB = rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9) im_RGB = rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9)
# make figure # plot the image RGB on a figure
fig = plt.figure() fig = plt.figure()
fig.set_size_inches([18,9]) fig.set_size_inches([18,9])
fig.set_tight_layout(True) fig.set_tight_layout(True)
# RGB
plt.axis('off') plt.axis('off')
plt.imshow(im_RGB) plt.imshow(im_RGB)
plt.title('click <skip> if image is not clear enough to digitize the shoreline.\n' + # decide if the image if good enough for digitizing the shoreline
'Otherwise click on <keep> and start digitizing the shoreline.\n' + plt.title('click <keep> if image is clear enough to digitize the shoreline.\n' +
'When finished digitizing the shoreline click on the scroll wheel ' + 'If not (too cloudy) click on <skip> to get another image', fontsize=14)
'(middle click).', fontsize=14) keep_button = plt.text(0, 0.9, 'keep', size=16, ha="left", va="top",
plt.text(0, 0.9, 'keep', size=16, ha="left", va="top",
transform=plt.gca().transAxes, transform=plt.gca().transAxes,
bbox=dict(boxstyle="square", ec='k',fc='w')) bbox=dict(boxstyle="square", ec='k',fc='w'))
plt.text(1, 0.9, 'skip', size=16, ha="right", va="top", skip_button = plt.text(1, 0.9, 'skip', size=16, ha="right", va="top",
transform=plt.gca().transAxes, transform=plt.gca().transAxes,
bbox=dict(boxstyle="square", ec='k',fc='w')) bbox=dict(boxstyle="square", ec='k',fc='w'))
mng = plt.get_current_fig_manager() mng = plt.get_current_fig_manager()
mng.window.showMaximized() mng.window.showMaximized()
# let user click on the image once # let user click on the image once
pt_keep = ginput(n=1, timeout=100, show_clicks=True) pt_input = ginput(n=1, timeout=1000000, show_clicks=True)
pt_keep = np.array(pt_keep) pt_input = np.array(pt_input)
# if clicks next to <skip>, show another image # if clicks next to <skip>, show another image
if pt_keep[0][0] > im_ms.shape[1]/2: if pt_input[0][0] > im_ms.shape[1]/2:
plt.close() plt.close()
continue continue
else: else:
# remove keep and skip buttons
keep_button.set_visible(False)
skip_button.set_visible(False)
# update title (instructions)
plt.title('Digitize the shoreline on this image by clicking on it.\n' +
'When finished digitizing the shoreline click on the scroll wheel ' +
'(middle click).', fontsize=14)
plt.draw()
# let user click on the shoreline # let user click on the shoreline
pts = ginput(n=5000, timeout=100000, show_clicks=True) pts = ginput(n=50000, timeout=100000, show_clicks=True)
pts_pix = np.array(pts) pts_pix = np.array(pts)
plt.close() plt.close()
# convert image coordinates to world coordinates # convert image coordinates to world coordinates
pts_world = SDS_tools.convert_pix2world(pts_pix[:,[1,0]], georef) pts_world = SDS_tools.convert_pix2world(pts_pix[:,[1,0]], georef)
image_epsg = metadata[satname]['epsg'][i] image_epsg = metadata[satname]['epsg'][i]
pts_coords = SDS_tools.convert_epsg(pts_world, image_epsg, settings['output_epsg']) pts_coords = SDS_tools.convert_epsg(pts_world, image_epsg, settings['output_epsg'])
# save the reference shoreline
filepath = os.path.join(os.getcwd(), 'data', sitename)
with open(os.path.join(filepath, sitename + '_ref_sl.pkl'), 'wb') as f: with open(os.path.join(filepath, sitename + '_ref_sl.pkl'), 'wb') as f:
pickle.dump(pts_coords, f) pickle.dump(pts_coords, f)
print('Reference shoreline has been saved') print('Reference shoreline has been saved')
break break
return pts_coords return pts_coords
def get_reference_sl_Australia(settings):
"""
Automatically finds a reference shoreline from a high resolution coastline of Australia
(Smartline from Geoscience Australia). It finds the points of the national coastline vector
that are situated inside the area of interest (polygon).
KV WRL 2018
Arguments:
-----------
settings: dict
contains the following fields:
'cloud_thresh': float
value between 0 and 1 indicating the maximum cloud fraction in the image that is accepted
'sitename': string
name of the site (also name of the folder where the images are stored)
'output_epsg': int
epsg code of the desired spatial reference system
Returns:
-----------
ref_sl: np.array
coordinates of the reference shoreline found in the shapefile
"""
# load high-resolution shoreline of Australia
filename = os.path.join(os.getcwd(), 'data', 'shoreline_Australia.pkl')
with open(filename, 'rb') as f:
sl = pickle.load(f)
# spatial reference system of this shoreline
sl_epsg = 4283 # GDA94 geographic
# only select the points that sit inside the area of interest (polygon)
polygon = settings['inputs']['polygon']
# spatial reference system of the polygon (latitudes and longitudes)
polygon_epsg = 4326 # WGS84 geographic
polygon = SDS_tools.convert_epsg(np.array(polygon[0]), polygon_epsg, sl_epsg)[:,:-1]
# use matplotlib function Path
path = mpltPath.Path(polygon)
sl_inside = sl[np.where(path.contains_points(sl))]
# convert to desired output coordinate system
ref_sl = SDS_tools.convert_epsg(sl_inside, sl_epsg, settings['output_epsg'])[:,:-1]
# make a figure for quality control
plt.figure()
plt.axis('equal')
plt.xlabel('Eastings [m]')
plt.ylabel('Northings [m]')
plt.plot(ref_sl[:,0], ref_sl[:,1], 'r.')
polygon = SDS_tools.convert_epsg(polygon, sl_epsg, settings['output_epsg'])[:,:-1]
plt.plot(polygon[:,0], polygon[:,1], 'k-')
return ref_sl

@ -3,23 +3,12 @@
Author: Kilian Vos, Water Research Laboratory, University of New South Wales Author: Kilian Vos, Water Research Laboratory, University of New South Wales
""" """
# Initial settings # load modules
import os import os
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import pdb import pdb
# other modules
from osgeo import gdal, ogr, osr
import scipy.interpolate as interpolate
from datetime import datetime, timedelta
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
import matplotlib.cm as cm
from matplotlib import gridspec
from pylab import ginput
import pickle
# image processing modules # image processing modules
import skimage.filters as filters import skimage.filters as filters
import skimage.exposure as exposure import skimage.exposure as exposure
@ -32,7 +21,20 @@ import skimage.morphology as morphology
from sklearn.externals import joblib from sklearn.externals import joblib
from shapely.geometry import LineString from shapely.geometry import LineString
# other modules
from osgeo import gdal, ogr, osr
import scipy.interpolate as interpolate
from datetime import datetime, timedelta
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
import matplotlib.cm as cm
from matplotlib import gridspec
from pylab import ginput
import pickle
# own modules
import SDS_tools, SDS_preprocess import SDS_tools, SDS_preprocess
np.seterr(all='ignore') # raise/ignore divisions by 0 and nans np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
@ -70,139 +72,154 @@ def nd_index(im1, im2, cloud_mask):
return im_nd return im_nd
def classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size): def calculate_features(im_ms, cloud_mask, im_bool):
""" """
Classifies every pixel in the image in one of 4 classes: Calculates a range of features on the image that are used for the supervised classification.
- sand --> label = 1 The features include spectral normalized-difference indices and standard deviation of the image.
- whitewater (breaking waves and swash) --> label = 2
- water --> label = 3
- other (vegetation, buildings, rocks...) --> label = 0
The classifier is a Neural Network, trained with 7000 pixels for the class SAND and 1500
pixels for each of the other classes. This is because the class of interest for my application
is SAND and I wanted to minimize the classification error for that class.
KV WRL 2018 KV WRL 2018
Arguments: Arguments:
----------- -----------
im_ms_ps: np.array im_ms: np.array
Pansharpened RGB + downsampled NIR and SWIR RGB + downsampled NIR and SWIR
im_pan:
Panchromatic band
cloud_mask: np.array cloud_mask: np.array
2D cloud mask with True where cloud pixels are 2D cloud mask with True where cloud pixels are
plot_bool: boolean im_bool: np.array
True if plot is wanted 2D array of boolean indicating where on the image to calculate the features
Returns: ----------- Returns: -----------
im_classif: np.array features: np.array
2D image containing labels matrix containing each feature (columns) calculated for all
im_labels: np.array of booleans the pixels (rows) indicated in im_bool
3D image containing a boolean image for each class (im_classif == label) """
"""
# load classifier # add all the multispectral bands
clf = joblib.load('.\\classifiers\\NN_4classes_withpan.pkl') features = np.expand_dims(im_ms[im_bool,0],axis=1)
for k in range(1,im_ms.shape[2]):
# calculate features feature = np.expand_dims(im_ms[im_bool,k],axis=1)
n_features = 10 features = np.append(features, feature, axis=-1)
im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features)) # NIR-G
im_features[:,:,[0,1,2,3,4]] = im_ms_ps im_NIRG = nd_index(im_ms[:,:,3], im_ms[:,:,1], cloud_mask)
im_features[:,:,5] = im_pan features = np.append(features, np.expand_dims(im_NIRG[im_bool],axis=1), axis=-1)
im_features[:,:,6] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False) # (NIR-G) # SWIR-G
im_features[:,:,7] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask, False) # ND(NIR-R) im_SWIRG = nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask)
im_features[:,:,8] = nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask, False) # ND(B-R) features = np.append(features, np.expand_dims(im_SWIRG[im_bool],axis=1), axis=-1)
im_features[:,:,9] = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) # ND(SWIR-G) # NIR-R
# remove NaNs and clouds im_NIRR = nd_index(im_ms[:,:,3], im_ms[:,:,2], cloud_mask)
vec_features = im_features.reshape((im_ms_ps.shape[0] * im_ms_ps.shape[1], n_features)) features = np.append(features, np.expand_dims(im_NIRR[im_bool],axis=1), axis=-1)
vec_cloud = cloud_mask.reshape(cloud_mask.shape[0]*cloud_mask.shape[1]) # SWIR-NIR
vec_nan = np.any(np.isnan(vec_features), axis=1) im_SWIRNIR = nd_index(im_ms[:,:,4], im_ms[:,:,3], cloud_mask)
vec_mask = np.logical_or(vec_cloud, vec_nan) features = np.append(features, np.expand_dims(im_SWIRNIR[im_bool],axis=1), axis=-1)
vec_features = vec_features[~vec_mask, :] # B-R
# predict with NN classifier im_BR = nd_index(im_ms[:,:,0], im_ms[:,:,2], cloud_mask)
labels = clf.predict(vec_features) features = np.append(features, np.expand_dims(im_BR[im_bool],axis=1), axis=-1)
# recompose image # calculate standard deviation of individual bands
vec_classif = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1])) for k in range(im_ms.shape[2]):
vec_classif[~vec_mask] = labels im_std = SDS_tools.image_std(im_ms[:,:,k], 1)
im_classif = vec_classif.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1])) features = np.append(features, np.expand_dims(im_std[im_bool],axis=1), axis=-1)
# calculate standard deviation of the spectral indices
# labels im_std = SDS_tools.image_std(im_NIRG, 1)
im_sand = im_classif == 1 features = np.append(features, np.expand_dims(im_std[im_bool],axis=1), axis=-1)
# remove small patches of sand im_std = SDS_tools.image_std(im_SWIRG, 1)
im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2) features = np.append(features, np.expand_dims(im_std[im_bool],axis=1), axis=-1)
im_swash = im_classif == 2 im_std = SDS_tools.image_std(im_NIRR, 1)
im_water = im_classif == 3 features = np.append(features, np.expand_dims(im_std[im_bool],axis=1), axis=-1)
im_labels = np.stack((im_sand,im_swash,im_water), axis=-1) im_std = SDS_tools.image_std(im_SWIRNIR, 1)
features = np.append(features, np.expand_dims(im_std[im_bool],axis=1), axis=-1)
return im_classif, im_labels im_std = SDS_tools.image_std(im_BR, 1)
features = np.append(features, np.expand_dims(im_std[im_bool],axis=1), axis=-1)
def classify_image_NN_nopan(im_ms_ps, cloud_mask, min_beach_size): return features
def classify_image_NN(im_ms, im_extra, cloud_mask, min_beach_size, satname):
""" """
To be used for multispectral images that do not have a panchromatic band (L5 and S2).
Classifies every pixel in the image in one of 4 classes: Classifies every pixel in the image in one of 4 classes:
- sand --> label = 1 - sand --> label = 1
- whitewater (breaking waves and swash) --> label = 2 - whitewater (breaking waves and swash) --> label = 2
- water --> label = 3 - water --> label = 3
- other (vegetation, buildings, rocks...) --> label = 0 - other (vegetation, buildings, rocks...) --> label = 0
The classifier is a Neural Network, trained with 7000 pixels for the class SAND and 1500 The classifier is a Neural Network, trained on several sites in New South Wales, Australia.
pixels for each of the other classes. This is because the class of interest for my application
is SAND and I wanted to minimize the classification error for that class.
KV WRL 2018 KV WRL 2018
Arguments: Arguments:
----------- -----------
im_ms_ps: np.array im_ms: np.array
Pansharpened RGB + downsampled NIR and SWIR Pansharpened RGB + downsampled NIR and SWIR
im_pan: im_extra:
Panchromatic band only used for Landsat 7 and 8 where im_extra is the panchromatic band
cloud_mask: np.array cloud_mask: np.array
2D cloud mask with True where cloud pixels are 2D cloud mask with True where cloud pixels are
min_beach_size: int
minimum number of pixels that have to be connected in the SAND class
Returns: ----------- Returns: -----------
im_classif: np.ndarray im_classif: np.array
2D image containing labels 2D image containing labels
im_labels: np.ndarray of booleans im_labels: np.array of booleans
3D image containing a boolean image for each class (im_classif == label) 3D image containing a boolean image for each class (im_classif == label)
""" """
# load classifier if satname == 'L5':
clf = joblib.load('.\\classifiers\\NN_4classes_nopan.pkl') # load classifier (without panchromatic band)
clf = joblib.load(os.path.join(os.getcwd(), 'classifiers', 'NN_4classes_nopan.pkl'))
# calculate features # calculate features
n_features = 9 n_features = 9
im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features)) im_features = np.zeros((im_ms.shape[0], im_ms.shape[1], n_features))
im_features[:,:,[0,1,2,3,4]] = im_ms_ps im_features[:,:,[0,1,2,3,4]] = im_ms
im_features[:,:,5] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask) # (NIR-G) im_features[:,:,5] = nd_index(im_ms[:,:,3], im_ms[:,:,1], cloud_mask) # (NIR-G)
im_features[:,:,6] = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask) # ND(NIR-R) im_features[:,:,6] = nd_index(im_ms[:,:,3], im_ms[:,:,2], cloud_mask) # ND(NIR-R)
im_features[:,:,7] = nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask) # ND(B-R) im_features[:,:,7] = nd_index(im_ms[:,:,0], im_ms[:,:,2], cloud_mask) # ND(B-R)
im_features[:,:,8] = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask) # ND(SWIR-G) im_features[:,:,8] = nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask) # ND(SWIR-G)
# remove NaNs and clouds vec_features = im_features.reshape((im_ms.shape[0] * im_ms.shape[1], n_features))
vec_features = im_features.reshape((im_ms_ps.shape[0] * im_ms_ps.shape[1], n_features))
elif satname in ['L7','L8']:
# load classifier (with panchromatic band)
clf = joblib.load(os.path.join(os.getcwd(), 'classifiers', 'NN_4classes_withpan.pkl'))
# calculate features
n_features = 10
im_features = np.zeros((im_ms.shape[0], im_ms.shape[1], n_features))
im_features[:,:,[0,1,2,3,4]] = im_ms
im_features[:,:,5] = im_extra
im_features[:,:,6] = nd_index(im_ms[:,:,3], im_ms[:,:,1], cloud_mask) # (NIR-G)
im_features[:,:,7] = nd_index(im_ms[:,:,3], im_ms[:,:,2], cloud_mask) # ND(NIR-R)
im_features[:,:,8] = nd_index(im_ms[:,:,0], im_ms[:,:,2], cloud_mask) # ND(B-R)
im_features[:,:,9] = nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask) # ND(SWIR-G)
vec_features = im_features.reshape((im_ms.shape[0] * im_ms.shape[1], n_features))
elif satname == 'S2':
# load classifier (special classifier for Sentinel-2 images)
clf = joblib.load(os.path.join(os.getcwd(), 'classifiers', 'NN_4classes_S2.pkl'))
# calculate features
vec_features = calculate_features(im_ms, cloud_mask, np.ones(cloud_mask.shape).astype(bool))
vec_features[np.isnan(vec_features)] = 1e-9 # NaN values are create when std is too close to 0
# remove NaNs and cloudy pixels
vec_cloud = cloud_mask.reshape(cloud_mask.shape[0]*cloud_mask.shape[1]) vec_cloud = cloud_mask.reshape(cloud_mask.shape[0]*cloud_mask.shape[1])
vec_nan = np.any(np.isnan(vec_features), axis=1) vec_nan = np.any(np.isnan(vec_features), axis=1)
vec_mask = np.logical_or(vec_cloud, vec_nan) vec_mask = np.logical_or(vec_cloud, vec_nan)
vec_features = vec_features[~vec_mask, :] vec_features = vec_features[~vec_mask, :]
# predict with NN classifier
# classify pixels
labels = clf.predict(vec_features) labels = clf.predict(vec_features)
# recompose image # recompose image
vec_classif = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1])) vec_classif = np.nan*np.ones((cloud_mask.shape[0]*cloud_mask.shape[1]))
vec_classif[~vec_mask] = labels vec_classif[~vec_mask] = labels
im_classif = vec_classif.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1])) im_classif = vec_classif.reshape((cloud_mask.shape[0], cloud_mask.shape[1]))
# labels # create a stack of boolean images for each label
im_sand = im_classif == 1 im_sand = im_classif == 1
# remove small patches of sand
im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2)
im_swash = im_classif == 2 im_swash = im_classif == 2
im_water = im_classif == 3 im_water = im_classif == 3
im_labels = np.stack((im_sand,im_swash,im_water), axis=-1) # remove small patches of sand or water that could be around the image (usually noise)
im_sand = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2)
im_water = morphology.remove_small_objects(im_water, min_size=min_beach_size, connectivity=2)
im_labels = np.stack((im_sand,im_swash,im_water), axis=-1)
return im_classif, im_labels return im_classif, im_labels
@ -237,7 +254,7 @@ def find_wl_contours1(im_ndwi, cloud_mask):
# use Marching Squares algorithm to detect contours on ndwi image # use Marching Squares algorithm to detect contours on ndwi image
contours = measure.find_contours(im_ndwi, t_otsu) contours = measure.find_contours(im_ndwi, t_otsu)
# remove contours that have nans (due to cloud pixels in the contour) # remove contours that contain NaNs (due to cloud pixels in the contour)
contours_nonans = [] contours_nonans = []
for k in range(len(contours)): for k in range(len(contours)):
if np.any(np.isnan(contours[k])): if np.any(np.isnan(contours[k])):
@ -251,31 +268,32 @@ def find_wl_contours1(im_ndwi, cloud_mask):
return contours return contours
def find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size): def find_wl_contours2(im_ms, im_labels, cloud_mask, buffer_size):
""" """
New robust method for extracting shorelines. Incorporates the classification component to New robust method for extracting shorelines. Incorporates the classification component to
refube the treshold and make it specific to the sand/water interface. refine the treshold and make it specific to the sand/water interface.
KV WRL 2018 KV WRL 2018
Arguments: Arguments:
----------- -----------
im_ms_ps: np.array im_ms: np.array
Pansharpened RGB + downsampled NIR and SWIR RGB + downsampled NIR and SWIR
im_labels: np.array im_labels: np.array
3D image containing a boolean image for each class in the order (sand, swash, water) 3D image containing a boolean image for each class in the order (sand, swash, water)
cloud_mask: np.array cloud_mask: np.array
2D cloud mask with True where cloud pixels are 2D cloud mask with True where cloud pixels are
buffer_size: int buffer_size: int
size of the buffer around the sandy beach size of the buffer around the sandy beach over which the pixels are considered in the
thresholding algorithm.
Returns: ----------- Returns: -----------
contours_wi: list of np.arrays contours_wi: list of np.arrays
contains the (row,column) coordinates of the contour lines extracted with the contains the (row,column) coordinates of the contour lines extracted from the
NDWI (Normalized Difference Water Index) NDWI (Normalized Difference Water Index) image
contours_mwi: list of np.arrays contours_mwi: list of np.arrays
contains the (row,column) coordinates of the contour lines extracted with the contains the (row,column) coordinates of the contour lines extracted from the
MNDWI (Modified Normalized Difference Water Index) MNDWI (Modified Normalized Difference Water Index) image
""" """
@ -283,9 +301,9 @@ def find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size):
ncols = cloud_mask.shape[1] ncols = cloud_mask.shape[1]
# calculate Normalized Difference Modified Water Index (SWIR - G) # calculate Normalized Difference Modified Water Index (SWIR - G)
im_mwi = nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask) im_mwi = nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask)
# calculate Normalized Difference Modified Water Index (NIR - G) # calculate Normalized Difference Modified Water Index (NIR - G)
im_wi = nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask) im_wi = nd_index(im_ms[:,:,3], im_ms[:,:,1], cloud_mask)
# stack indices together # stack indices together
im_ind = np.stack((im_wi, im_mwi), axis=-1) im_ind = np.stack((im_wi, im_mwi), axis=-1)
vec_ind = im_ind.reshape(nrows*ncols,2) vec_ind = im_ind.reshape(nrows*ncols,2)
@ -306,16 +324,14 @@ def find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size):
# make sure both classes have the same number of pixels before thresholding # make sure both classes have the same number of pixels before thresholding
if len(int_water) > 0 and len(int_sand) > 0: if len(int_water) > 0 and len(int_sand) > 0:
if np.argmin([int_sand.shape[0],int_water.shape[0]]) == 1: if np.argmin([int_sand.shape[0],int_water.shape[0]]) == 1:
if (int_sand.shape[0] - int_water.shape[0])/int_water.shape[0] > 0.5: int_sand = int_sand[np.random.choice(int_sand.shape[0],int_water.shape[0], replace=False),:]
int_sand = int_sand[np.random.randint(0,int_sand.shape[0],int_water.shape[0]),:]
else: else:
if (int_water.shape[0] - int_sand.shape[0])/int_sand.shape[0] > 0.5: int_water = int_water[np.random.choice(int_water.shape[0],int_sand.shape[0], replace=False),:]
int_water = int_water[np.random.randint(0,int_water.shape[0],int_sand.shape[0]),:]
# threshold the sand/water intensities # threshold the sand/water intensities
int_all = np.append(int_water,int_sand, axis=0) int_all = np.append(int_water,int_sand, axis=0)
t_mwi = filters.threshold_otsu(int_all[:,0]) t_mwi = filters.threshold_otsu(int_all[:,0])
t_wi = filters.threshold_otsu(int_all[:,1]) t_wi = filters.threshold_otsu(int_all[:,1])
# find contour with MS algorithm # find contour with MS algorithm
im_wi_buffer = np.copy(im_wi) im_wi_buffer = np.copy(im_wi)
@ -325,7 +341,7 @@ def find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size):
contours_wi = measure.find_contours(im_wi_buffer, t_wi) contours_wi = measure.find_contours(im_wi_buffer, t_wi)
contours_mwi = measure.find_contours(im_mwi, t_mwi) contours_mwi = measure.find_contours(im_mwi, t_mwi)
# remove contour points that are nans (around clouds) # remove contour points that are NaNs (around clouds)
contours = contours_wi contours = contours_wi
contours_nonans = [] contours_nonans = []
for k in range(len(contours)): for k in range(len(contours)):
@ -337,7 +353,7 @@ def find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size):
else: else:
contours_nonans.append(contours[k]) contours_nonans.append(contours[k])
contours_wi = contours_nonans contours_wi = contours_nonans
# repeat for MNDWI contours
contours = contours_mwi contours = contours_mwi
contours_nonans = [] contours_nonans = []
for k in range(len(contours)): for k in range(len(contours)):
@ -353,6 +369,33 @@ def find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size):
return contours_wi, contours_mwi return contours_wi, contours_mwi
def process_shoreline(contours, georef, image_epsg, settings): def process_shoreline(contours, georef, image_epsg, settings):
"""
Converts the contours from image coordinates to world coordinates. This function also removes
the contours that are too small to be a shoreline (based on the parameter
settings['min_length_sl'])
KV WRL 2018
Arguments:
-----------
contours: np.array or list of np.array
image contours as detected by the function find_contours
georef: np.array
vector of 6 elements [Xtr, Xscale, Xshear, Ytr, Yshear, Yscale]
image_epsg: int
spatial reference system of the image from which the contours were extracted
settings: dict
contains important parameters for processing the shoreline:
output_epsg: output spatial reference system
min_length_sl: minimum length of shoreline perimeter to be kept (in meters)
reference_sl: [optional] reference shoreline coordinates
max_dist_ref: max distance (in meters) allowed from a reference shoreline
Returns: -----------
shoreline: np.array
array of points with the X and Y coordinates of the shoreline
"""
# convert pixel coordinates to world coordinates # convert pixel coordinates to world coordinates
contours_world = SDS_tools.convert_pix2world(contours, georef) contours_world = SDS_tools.convert_pix2world(contours, georef)
@ -390,13 +433,47 @@ def process_shoreline(contours, georef, image_epsg, settings):
def show_detection(im_ms, cloud_mask, im_labels, shoreline,image_epsg, georef, def show_detection(im_ms, cloud_mask, im_labels, shoreline,image_epsg, georef,
settings, date, satname): settings, date, satname):
"""
Shows the detected shoreline to the user for visual quality control. The user can select "keep"
if the shoreline detection is correct or "skip" if it is incorrect.
# subfolder to store the .jpg files KV WRL 2018
filepath = os.path.join(os.getcwd(), 'data', settings['sitename'], 'jpg_files', 'detection')
Arguments:
-----------
im_ms: np.array
RGB + downsampled NIR and SWIR
cloud_mask: np.array
2D cloud mask with True where cloud pixels are
im_labels: np.array
3D image containing a boolean image for each class in the order (sand, swash, water)
shoreline: np.array
array of points with the X and Y coordinates of the shoreline
image_epsg: int
spatial reference system of the image from which the contours were extracted
georef: np.array
vector of 6 elements [Xtr, Xscale, Xshear, Ytr, Yshear, Yscale]
settings: dict
contains important parameters for processing the shoreline
date: string
date at which the image was taken
satname: string
indicates the satname (L5,L7,L8 or S2)
Returns: -----------
skip_image: boolean
True if the user wants to skip the image, False otherwise.
"""
sitename = settings['inputs']['sitename']
# subfolder where the .jpg file is stored if the user accepts the shoreline detection
filepath = os.path.join(os.getcwd(), 'data', sitename, 'jpg_files', 'detection')
# display RGB image
im_RGB = SDS_preprocess.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9) im_RGB = SDS_preprocess.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9)
# display classified image
# compute classified image
im_class = np.copy(im_RGB) im_class = np.copy(im_RGB)
cmap = cm.get_cmap('tab20c') cmap = cm.get_cmap('tab20c')
colorpalette = cmap(np.arange(0,13,1)) colorpalette = cmap(np.arange(0,13,1))
@ -408,60 +485,86 @@ def show_detection(im_ms, cloud_mask, im_labels, shoreline,image_epsg, georef,
im_class[im_labels[:,:,k],0] = colours[k,0] im_class[im_labels[:,:,k],0] = colours[k,0]
im_class[im_labels[:,:,k],1] = colours[k,1] im_class[im_labels[:,:,k],1] = colours[k,1]
im_class[im_labels[:,:,k],2] = colours[k,2] im_class[im_labels[:,:,k],2] = colours[k,2]
# display MNDWI grayscale image
# compute MNDWI grayscale image
im_mwi = nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask) im_mwi = nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask)
# transform world coordinates of shoreline into pixel coordinates # transform world coordinates of shoreline into pixel coordinates
sl_pix = SDS_tools.convert_world2pix(SDS_tools.convert_epsg(shoreline, settings['output_epsg'], # use try/except in case there are no coordinates to be transformed (shoreline = [])
image_epsg)[:,[0,1]], georef) try:
# make figure sl_pix = SDS_tools.convert_world2pix(SDS_tools.convert_epsg(shoreline,
settings['output_epsg'],
image_epsg)[:,[0,1]], georef)
except:
# if try fails, just add nan into the shoreline vector so the next parts can still run
sl_pix = np.array([[np.nan, np.nan],[np.nan, np.nan]])
# according to the image shape, decide whether it is better to have the images in the subplot
# in different rows or different columns
fig = plt.figure() fig = plt.figure()
gs = gridspec.GridSpec(1, 3) if im_RGB.shape[1] > 2*im_RGB.shape[0]:
gs.update(bottom=0.05, top=0.95) # vertical subplots
ax1 = fig.add_subplot(gs[0,0]) gs = gridspec.GridSpec(3, 1)
plt.imshow(im_RGB) gs.update(bottom=0.03, top=0.97, left=0.03, right=0.97)
plt.plot(sl_pix[:,0], sl_pix[:,1], 'k--') ax1 = fig.add_subplot(gs[0,0])
plt.axis('off') ax2 = fig.add_subplot(gs[1,0])
ax1.set_anchor('W') ax3 = fig.add_subplot(gs[2,0])
else:
# horizontal subplots
gs = gridspec.GridSpec(1, 3)
gs.update(bottom=0.05, top=0.95, left=0.05, right=0.95)
ax1 = fig.add_subplot(gs[0,0])
ax2 = fig.add_subplot(gs[0,1])
ax3 = fig.add_subplot(gs[0,2])
# create image 1 (RGB)
ax1.imshow(im_RGB)
ax1.plot(sl_pix[:,0], sl_pix[:,1], 'k.', markersize=3)
ax1.axis('off')
btn_keep = plt.text(0, 0.9, 'keep', size=16, ha="left", va="top", btn_keep = plt.text(0, 0.9, 'keep', size=16, ha="left", va="top",
transform=ax1.transAxes, transform=ax1.transAxes,
bbox=dict(boxstyle="square", ec='k',fc='w')) bbox=dict(boxstyle="square", ec='k',fc='w'))
btn_skip = plt.text(1, 0.9, 'skip', size=16, ha="right", va="top", btn_skip = plt.text(1, 0.9, 'skip', size=16, ha="right", va="top",
transform=ax1.transAxes, transform=ax1.transAxes,
bbox=dict(boxstyle="square", ec='k',fc='w')) bbox=dict(boxstyle="square", ec='k',fc='w'))
plt.title('Click on <keep> if shoreline detection is correct. Click on <skip> if false detection') ax1.set_title(sitename + ' ' + date + ' ' + satname, fontweight='bold', fontsize=16)
ax2 = fig.add_subplot(gs[0,1])
plt.imshow(im_class) # create image 2 (classification)
plt.plot(sl_pix[:,0], sl_pix[:,1], 'k--') ax2.imshow(im_class)
plt.axis('off') ax2.plot(sl_pix[:,0], sl_pix[:,1], 'k.', markersize=3)
ax2.set_anchor('W') ax2.axis('off')
orange_patch = mpatches.Patch(color=colours[0,:], label='sand') orange_patch = mpatches.Patch(color=colours[0,:], label='sand')
white_patch = mpatches.Patch(color=colours[1,:], label='whitewater') white_patch = mpatches.Patch(color=colours[1,:], label='whitewater')
blue_patch = mpatches.Patch(color=colours[2,:], label='water') blue_patch = mpatches.Patch(color=colours[2,:], label='water')
black_line = mlines.Line2D([],[],color='k',linestyle='--', label='shoreline') black_line = mlines.Line2D([],[],color='k',linestyle='--', label='shoreline')
plt.legend(handles=[orange_patch,white_patch,blue_patch, black_line], bbox_to_anchor=(1, 0.5), fontsize=9) ax2.legend(handles=[orange_patch,white_patch,blue_patch, black_line],
ax3 = fig.add_subplot(gs[0,2]) bbox_to_anchor=(1, 0.5), fontsize=9)
plt.imshow(im_mwi, cmap='bwr') # create image 3 (MNDWI)
plt.plot(sl_pix[:,0], sl_pix[:,1], 'k--') ax3.imshow(im_mwi, cmap='bwr')
plt.axis('off') ax3.plot(sl_pix[:,0], sl_pix[:,1], 'k.', markersize=3)
cb = plt.colorbar() ax3.axis('off')
cb.ax.tick_params(labelsize=10)
cb.set_label('MNDWI values') # additional options
ax3.set_anchor('W') # ax1.set_anchor('W')
# ax2.set_anchor('W')
# cb = plt.colorbar()
# cb.ax.tick_params(labelsize=10)
# cb.set_label('MNDWI values')
# ax3.set_anchor('W')
fig.set_size_inches([12.53, 9.3]) fig.set_size_inches([12.53, 9.3])
fig.set_tight_layout(True)
mng = plt.get_current_fig_manager() mng = plt.get_current_fig_manager()
mng.window.showMaximized() mng.window.showMaximized()
# wait for user's selection (<keep> or <skip>) # wait for user's selection: <keep> or <skip>
pt = ginput(n=1, timeout=100, show_clicks=True) pt = ginput(n=1, timeout=100000, show_clicks=True)
pt = np.array(pt) pt = np.array(pt)
# if clicks next to <skip>, return skip_image = True # if user clicks around the <skip> button, return skip_image = True
if pt[0][0] > im_ms.shape[1]/2: if pt[0][0] > im_ms.shape[1]/2:
skip_image = True skip_image = True
plt.close() plt.close()
else: else:
skip_image = False skip_image = False
ax1.set_title(date + ' ' + satname)
btn_skip.set_visible(False) btn_skip.set_visible(False)
btn_keep.set_visible(False) btn_keep.set_visible(False)
fig.savefig(os.path.join(filepath, date + '_' + satname + '.jpg'), dpi=150) fig.savefig(os.path.join(filepath, date + '_' + satname + '.jpg'), dpi=150)
@ -471,11 +574,41 @@ def show_detection(im_ms, cloud_mask, im_labels, shoreline,image_epsg, georef,
def extract_shorelines(metadata, settings): def extract_shorelines(metadata, settings):
"""
sitename = settings['sitename'] Extracts shorelines from satellite images.
KV WRL 2018
Arguments:
-----------
metadata: dict
contains all the information about the satellite images that were downloaded
inputs: dict
contains the following fields:
sitename: str
String containig the name of the site
polygon: list
polygon containing the lon/lat coordinates to be extracted
longitudes in the first column and latitudes in the second column
dates: list of str
list that contains 2 strings with the initial and final dates in format
'yyyy-mm-dd' e.g. ['1987-01-01', '2018-01-01']
sat_list: list of str
list that contains the names of the satellite missions to include
e.g. ['L5', 'L7', 'L8', 'S2']
Returns:
-----------
output: dict
contains the extracted shorelines and corresponding dates.
"""
sitename = settings['inputs']['sitename']
# initialise output structure # initialise output structure
out = dict([]) output = dict([])
# create a subfolder to store the .jpg images showing the detection # create a subfolder to store the .jpg images showing the detection
filepath_jpg = os.path.join(os.getcwd(), 'data', sitename, 'jpg_files', 'detection') filepath_jpg = os.path.join(os.getcwd(), 'data', sitename, 'jpg_files', 'detection')
try: try:
@ -486,58 +619,25 @@ def extract_shorelines(metadata, settings):
# loop through satellite list # loop through satellite list
for satname in metadata.keys(): for satname in metadata.keys():
# access the images # get images
if satname == 'L5': filepath = SDS_tools.get_filepath(settings['inputs'],satname)
# access downloaded Landsat 5 images filenames = metadata[satname]['filenames']
filepath = os.path.join(os.getcwd(), 'data', sitename, satname, '30m')
filenames = os.listdir(filepath)
elif satname == 'L7':
# access downloaded Landsat 7 images
filepath_pan = os.path.join(os.getcwd(), 'data', sitename, 'L7', 'pan')
filepath_ms = os.path.join(os.getcwd(), 'data', sitename, 'L7', 'ms')
filenames_pan = os.listdir(filepath_pan)
filenames_ms = os.listdir(filepath_ms)
if (not len(filenames_pan) == len(filenames_ms)):
raise 'error: not the same amount of files for pan and ms'
filepath = [filepath_pan, filepath_ms]
filenames = filenames_pan
elif satname == 'L8':
# access downloaded Landsat 7 images
filepath_pan = os.path.join(os.getcwd(), 'data', sitename, 'L8', 'pan')
filepath_ms = os.path.join(os.getcwd(), 'data', sitename, 'L8', 'ms')
filenames_pan = os.listdir(filepath_pan)
filenames_ms = os.listdir(filepath_ms)
if (not len(filenames_pan) == len(filenames_ms)):
raise 'error: not the same amount of files for pan and ms'
filepath = [filepath_pan, filepath_ms]
filenames = filenames_pan
elif satname == 'S2':
# access downloaded Sentinel 2 images
filepath10 = os.path.join(os.getcwd(), 'data', sitename, satname, '10m')
filenames10 = os.listdir(filepath10)
filepath20 = os.path.join(os.getcwd(), 'data', sitename, satname, '20m')
filenames20 = os.listdir(filepath20)
filepath60 = os.path.join(os.getcwd(), 'data', sitename, satname, '60m')
filenames60 = os.listdir(filepath60)
if (not len(filenames10) == len(filenames20)) or (not len(filenames20) == len(filenames60)):
raise 'error: not the same amount of files for 10, 20 and 60 m'
filepath = [filepath10, filepath20, filepath60]
filenames = filenames10
# initialise some variables # initialise some variables
out_timestamp = [] # datetime at which the image was acquired (UTC time) output_timestamp = [] # datetime at which the image was acquired (UTC time)
out_shoreline = [] # vector of shoreline points output_shoreline = [] # vector of shoreline points
out_filename = [] # filename of the images from which the shorelines where derived output_filename = [] # filename of the images from which the shorelines where derived
out_cloudcover = [] # cloud cover of the images output_cloudcover = [] # cloud cover of the images
out_geoaccuracy = []# georeferencing accuracy of the images output_geoaccuracy = []# georeferencing accuracy of the images
out_idxkeep = [] # index that were kept during the analysis (cloudy images are skipped) output_idxkeep = [] # index that were kept during the analysis (cloudy images are skipped)
# loop through the images # loop through the images
for i in range(len(filenames)): for i in range(len(filenames)):
# get image filename # get image filename
fn = SDS_tools.get_filenames(filenames[i],filepath, satname) fn = SDS_tools.get_filenames(filenames[i],filepath, satname)
# preprocess image (cloud mask + pansharpening/downsampling) # preprocess image (cloud mask + pansharpening/downsampling)
im_ms, georef, cloud_mask = SDS_preprocess.preprocess_single(fn, satname) im_ms, georef, cloud_mask, im_extra, imQA = SDS_preprocess.preprocess_single(fn, satname)
# get image spatial reference system (epsg code) from metadata dict # get image spatial reference system (epsg code) from metadata dict
image_epsg = metadata[satname]['epsg'][i] image_epsg = metadata[satname]['epsg'][i]
# calculate cloud cover # calculate cloud cover
@ -546,22 +646,28 @@ def extract_shorelines(metadata, settings):
# skip image if cloud cover is above threshold # skip image if cloud cover is above threshold
if cloud_cover > settings['cloud_thresh']: if cloud_cover > settings['cloud_thresh']:
continue continue
# classify image in 4 classes (sand, whitewater, water, other) with NN classifier # classify image in 4 classes (sand, whitewater, water, other) with NN classifier
im_classif, im_labels = classify_image_NN_nopan(im_ms, cloud_mask, im_classif, im_labels = classify_image_NN(im_ms, im_extra, cloud_mask,
settings['min_beach_size']) settings['min_beach_size'], satname)
# extract water line contours # extract water line contours
# if there aren't any sandy pixels, use find_wl_contours1 (traditional method), # if there aren't any sandy pixels, use find_wl_contours1 (traditional method),
# otherwise use find_wl_contours2 (enhanced method with classification) # otherwise use find_wl_contours2 (enhanced method with classification)
if sum(sum(im_labels[:,:,0])) == 0 : try: # use try/except structure for long runs
# compute MNDWI (SWIR-Green normalized index) grayscale image if sum(sum(im_labels[:,:,0])) == 0 :
im_mndwi = nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask) # compute MNDWI (SWIR-Green normalized index) grayscale image
# find water contourson MNDWI grayscale image im_mndwi = nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask)
contours_mwi = find_wl_contours1(im_mndwi, cloud_mask) # find water contourson MNDWI grayscale image
else: contours_mwi = find_wl_contours1(im_mndwi, cloud_mask)
# use classification to refine threshold and extract sand/water interface else:
contours_wi, contours_mwi = find_wl_contours2(im_ms, im_labels, # use classification to refine threshold and extract sand/water interface
cloud_mask, settings['buffer_size']) contours_wi, contours_mwi = find_wl_contours2(im_ms, im_labels,
# extract clean shoreline from water contours cloud_mask, settings['buffer_size'])
except:
continue
# process water contours into shorelines
shoreline = process_shoreline(contours_mwi, georef, image_epsg, settings) shoreline = process_shoreline(contours_mwi, georef, image_epsg, settings)
if settings['check_detection']: if settings['check_detection']:
@ -571,34 +677,35 @@ def extract_shorelines(metadata, settings):
if skip_image: if skip_image:
continue continue
# fill and save output structure # fill and save outputput structure
out_timestamp.append(metadata[satname]['dates'][i]) output_timestamp.append(metadata[satname]['dates'][i])
out_shoreline.append(shoreline) output_shoreline.append(shoreline)
out_filename.append(filenames[i]) output_filename.append(filenames[i])
out_cloudcover.append(cloud_cover) output_cloudcover.append(cloud_cover)
out_geoaccuracy.append(metadata[satname]['acc_georef'][i]) output_geoaccuracy.append(metadata[satname]['acc_georef'][i])
out_idxkeep.append(i) output_idxkeep.append(i)
out[satname] = { output[satname] = {
'timestamp': out_timestamp, 'timestamp': output_timestamp,
'shoreline': out_shoreline, 'shoreline': output_shoreline,
'filename': out_filename, 'filename': output_filename,
'cloudcover': out_cloudcover, 'cloudcover': output_cloudcover,
'geoaccuracy': out_geoaccuracy, 'geoaccuracy': output_geoaccuracy,
'idxkeep': out_idxkeep 'idxkeep': output_idxkeep
} }
# add some metadata # add some metadata
out['meta'] = { output['meta'] = {
'timestamp': 'UTC time', 'timestamp': 'UTC time',
'shoreline': 'coordinate system epsg : ' + str(settings['output_epsg']), 'shoreline': 'coordinate system epsg : ' + str(settings['output_epsg']),
'cloudcover': 'calculated on the cropped image', 'cloudcover': 'calculated on the cropped image',
'geoaccuracy': 'RMSE error based on GCPs', 'geoaccuracy': 'RMSE error based on GCPs',
'idxkeep': 'indices of the images that were kept to extract a shoreline' 'idxkeep': 'indices of the images that were kept to extract a shoreline'
} }
# save output structure as out.pkl
# save outputput structure as output.pkl
filepath = os.path.join(os.getcwd(), 'data', sitename) filepath = os.path.join(os.getcwd(), 'data', sitename)
with open(os.path.join(filepath, sitename + '_out.pkl'), 'wb') as f: with open(os.path.join(filepath, sitename + '_output.pkl'), 'wb') as f:
pickle.dump(out, f) pickle.dump(output, f)
return out return output

@ -3,15 +3,17 @@
Author: Kilian Vos, Water Research Laboratory, University of New South Wales Author: Kilian Vos, Water Research Laboratory, University of New South Wales
""" """
# Initial settings # load modules
import os import os
import numpy as np import numpy as np
import matplotlib.pyplot as plt
import pdb
# other modules
from osgeo import gdal, ogr, osr from osgeo import gdal, ogr, osr
import skimage.transform as transform import skimage.transform as transform
import simplekml import simplekml
import pdb from scipy.ndimage.filters import uniform_filter
# Functions
def convert_pix2world(points, georef): def convert_pix2world(points, georef):
""" """
@ -143,6 +145,21 @@ def convert_epsg(points, epsg_in, epsg_out):
return points_converted return points_converted
def coords_from_kml(fn): def coords_from_kml(fn):
"""
Extracts coordinates from a .kml file.
KV WRL 2018
Arguments:
-----------
fn: str
filepath + filename of the kml file to be read
Returns: -----------
polygon: list
coordinates extracted from the .kml file
"""
# read .kml file # read .kml file
with open(fn) as kmlFile: with open(fn) as kmlFile:
@ -152,6 +169,7 @@ def coords_from_kml(fn):
str2 = '</coordinates>' str2 = '</coordinates>'
subdoc = doc[doc.find(str1)+len(str1):doc.find(str2)] subdoc = doc[doc.find(str1)+len(str1):doc.find(str2)]
coordlist = subdoc.split('\n') coordlist = subdoc.split('\n')
# read coordinates
polygon = [] polygon = []
for i in range(1,len(coordlist)-1): for i in range(1,len(coordlist)-1):
polygon.append([float(coordlist[i].split(',')[0]), float(coordlist[i].split(',')[1])]) polygon.append([float(coordlist[i].split(',')[0]), float(coordlist[i].split(',')[1])])
@ -159,29 +177,196 @@ def coords_from_kml(fn):
return [polygon] return [polygon]
def save_kml(coords, epsg): def save_kml(coords, epsg):
"""
Saves coordinates with specified spatial reference system into a .kml file in WGS84.
KV WRL 2018
Arguments:
-----------
coords: np.array
coordinates (2 columns) to be converted into a .kml file
Returns:
-----------
Saves 'coords.kml' in the current folder.
"""
kml = simplekml.Kml() kml = simplekml.Kml()
coords_wgs84 = convert_epsg(coords, epsg, 4326) coords_wgs84 = convert_epsg(coords, epsg, 4326)
kml.newlinestring(name='coords', coords=coords_wgs84) kml.newlinestring(name='coords', coords=coords_wgs84)
kml.save('coords.kml') kml.save('coords.kml')
def get_filepath(inputs,satname):
"""
Create filepath to the different folders containing the satellite images.
KV WRL 2018
Arguments:
-----------
inputs: dict
dictionnary that contains the following fields:
'sitename': str
String containig the name of the site
'polygon': list
polygon containing the lon/lat coordinates to be extracted
longitudes in the first column and latitudes in the second column
'dates': list of str
list that contains 2 strings with the initial and final dates in format 'yyyy-mm-dd'
e.g. ['1987-01-01', '2018-01-01']
'sat_list': list of str
list that contains the names of the satellite missions to include
e.g. ['L5', 'L7', 'L8', 'S2']
satname: str
short name of the satellite mission
Returns:
-----------
filepath: str or list of str
contains the filepath(s) to the folder(s) containing the satellite images
"""
sitename = inputs['sitename']
# access the images
if satname == 'L5':
# access downloaded Landsat 5 images
filepath = os.path.join(os.getcwd(), 'data', sitename, satname, '30m')
elif satname == 'L7':
# access downloaded Landsat 7 images
filepath_pan = os.path.join(os.getcwd(), 'data', sitename, 'L7', 'pan')
filepath_ms = os.path.join(os.getcwd(), 'data', sitename, 'L7', 'ms')
filenames_pan = os.listdir(filepath_pan)
filenames_ms = os.listdir(filepath_ms)
if (not len(filenames_pan) == len(filenames_ms)):
raise 'error: not the same amount of files for pan and ms'
filepath = [filepath_pan, filepath_ms]
elif satname == 'L8':
# access downloaded Landsat 8 images
filepath_pan = os.path.join(os.getcwd(), 'data', sitename, 'L8', 'pan')
filepath_ms = os.path.join(os.getcwd(), 'data', sitename, 'L8', 'ms')
filenames_pan = os.listdir(filepath_pan)
filenames_ms = os.listdir(filepath_ms)
if (not len(filenames_pan) == len(filenames_ms)):
raise 'error: not the same amount of files for pan and ms'
filepath = [filepath_pan, filepath_ms]
elif satname == 'S2':
# access downloaded Sentinel 2 images
filepath10 = os.path.join(os.getcwd(), 'data', sitename, satname, '10m')
filenames10 = os.listdir(filepath10)
filepath20 = os.path.join(os.getcwd(), 'data', sitename, satname, '20m')
filenames20 = os.listdir(filepath20)
filepath60 = os.path.join(os.getcwd(), 'data', sitename, satname, '60m')
filenames60 = os.listdir(filepath60)
if (not len(filenames10) == len(filenames20)) or (not len(filenames20) == len(filenames60)):
raise 'error: not the same amount of files for 10, 20 and 60 m bands'
filepath = [filepath10, filepath20, filepath60]
return filepath
def get_filenames(filename, filepath, satname): def get_filenames(filename, filepath, satname):
"""
Creates filepath + filename for all the bands belonging to the same image.
KV WRL 2018
Arguments:
-----------
filename: str
name of the downloaded satellite image as found in the metadata
filepath: str or list of str
contains the filepath(s) to the folder(s) containing the satellite images
satname: str
short name of the satellite mission
Returns:
-----------
fn: str or list of str
contains the filepath + filenames to access the satellite image
"""
if satname == 'L5': if satname == 'L5':
fn = os.path.join(filepath, filename) fn = os.path.join(filepath, filename)
if satname == 'L7' or satname == 'L8': if satname == 'L7' or satname == 'L8':
idx = filename.find('.tif') filename_ms = filename.replace('pan','ms')
filename_ms = filename[:idx-3] + 'ms.tif'
fn = [os.path.join(filepath[0], filename), fn = [os.path.join(filepath[0], filename),
os.path.join(filepath[1], filename_ms)] os.path.join(filepath[1], filename_ms)]
if satname == 'S2': if satname == 'S2':
idx = filename.find('.tif') filename20 = filename.replace('10m','20m')
filename20 = filename[:idx-3] + '20m.tif' filename60 = filename.replace('10m','60m')
filename60 = filename[:idx-3] + '60m.tif'
fn = [os.path.join(filepath[0], filename), fn = [os.path.join(filepath[0], filename),
os.path.join(filepath[1], filename20), os.path.join(filepath[1], filename20),
os.path.join(filepath[2], filename60)] os.path.join(filepath[2], filename60)]
return fn return fn
def image_std(image, radius):
"""
Calculates the standard deviation of an image, using a moving window of specified radius.
Arguments:
-----------
image: np.array
2D array containing the pixel intensities of a single-band image
radius: int
radius defining the moving window used to calculate the standard deviation. For example,
radius = 1 will produce a 3x3 moving window.
Returns:
-----------
win_std: np.array
2D array containing the standard deviation of the image
"""
# convert to float
image = image.astype(float)
# first pad the image
image_padded = np.pad(image, radius, 'reflect')
# window size
win_rows, win_cols = radius*2 + 1, radius*2 + 1
# calculate std
win_mean = uniform_filter(image_padded, (win_rows, win_cols))
win_sqr_mean = uniform_filter(image_padded**2, (win_rows, win_cols))
win_var = win_sqr_mean - win_mean**2
win_std = np.sqrt(win_var)
# remove padding
win_std = win_std[radius:-radius, radius:-radius]
return win_std
def mask_raster(fn, mask):
"""
Masks a .tif raster using GDAL.
Arguments:
-----------
fn: str
filepath + filename of the .tif raster
mask: np.array
array of boolean where True indicates the pixels that are to be masked
Returns:
-----------
overwrites the .tif file directly
"""
# open raster
raster = gdal.Open(fn, gdal.GA_Update)
# mask raster
for i in range(raster.RasterCount):
out_band = raster.GetRasterBand(i+1)
out_data = out_band.ReadAsArray()
out_band.SetNoDataValue(0)
no_data_value = out_band.GetNoDataValue()
out_data[mask] = no_data_value
out_band.WriteArray(out_data)
# close dataset and flush cache
raster = None

@ -0,0 +1,540 @@
#!/usr/bin/env python
###############################################################################
# $Id$
#
# Project: InSAR Peppers
# Purpose: Module to extract data from many rasters into one output.
# Author: Frank Warmerdam, warmerdam@pobox.com
#
###############################################################################
# Copyright (c) 2000, Atlantis Scientific Inc. (www.atlsci.com)
# Copyright (c) 2009-2011, Even Rouault <even dot rouault at mines-paris dot org>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
###############################################################################
# changes 29Apr2011
# If the input image is a multi-band one, use all the channels in
# building the stack.
# anssi.pekkarinen@fao.org
import math
import sys
import time
from osgeo import gdal
try:
progress = gdal.TermProgress_nocb
except:
progress = gdal.TermProgress
__version__ = '$id$'[5:-1]
verbose = 0
quiet = 0
# =============================================================================
def raster_copy( s_fh, s_xoff, s_yoff, s_xsize, s_ysize, s_band_n,
t_fh, t_xoff, t_yoff, t_xsize, t_ysize, t_band_n,
nodata=None ):
if verbose != 0:
print('Copy %d,%d,%d,%d to %d,%d,%d,%d.'
% (s_xoff, s_yoff, s_xsize, s_ysize,
t_xoff, t_yoff, t_xsize, t_ysize ))
if nodata is not None:
return raster_copy_with_nodata(
s_fh, s_xoff, s_yoff, s_xsize, s_ysize, s_band_n,
t_fh, t_xoff, t_yoff, t_xsize, t_ysize, t_band_n,
nodata )
s_band = s_fh.GetRasterBand( s_band_n )
m_band = None
# Works only in binary mode and doesn't take into account
# intermediate transparency values for compositing.
if s_band.GetMaskFlags() != gdal.GMF_ALL_VALID:
m_band = s_band.GetMaskBand()
elif s_band.GetColorInterpretation() == gdal.GCI_AlphaBand:
m_band = s_band
if m_band is not None:
return raster_copy_with_mask(
s_fh, s_xoff, s_yoff, s_xsize, s_ysize, s_band_n,
t_fh, t_xoff, t_yoff, t_xsize, t_ysize, t_band_n,
m_band )
s_band = s_fh.GetRasterBand( s_band_n )
t_band = t_fh.GetRasterBand( t_band_n )
data = s_band.ReadRaster( s_xoff, s_yoff, s_xsize, s_ysize,
t_xsize, t_ysize, t_band.DataType )
t_band.WriteRaster( t_xoff, t_yoff, t_xsize, t_ysize,
data, t_xsize, t_ysize, t_band.DataType )
return 0
# =============================================================================
def raster_copy_with_nodata( s_fh, s_xoff, s_yoff, s_xsize, s_ysize, s_band_n,
t_fh, t_xoff, t_yoff, t_xsize, t_ysize, t_band_n,
nodata ):
try:
import numpy as Numeric
except ImportError:
import Numeric
s_band = s_fh.GetRasterBand( s_band_n )
t_band = t_fh.GetRasterBand( t_band_n )
data_src = s_band.ReadAsArray( s_xoff, s_yoff, s_xsize, s_ysize,
t_xsize, t_ysize )
data_dst = t_band.ReadAsArray( t_xoff, t_yoff, t_xsize, t_ysize )
nodata_test = Numeric.equal(data_src,nodata)
to_write = Numeric.choose( nodata_test, (data_src, data_dst) )
t_band.WriteArray( to_write, t_xoff, t_yoff )
return 0
# =============================================================================
def raster_copy_with_mask( s_fh, s_xoff, s_yoff, s_xsize, s_ysize, s_band_n,
t_fh, t_xoff, t_yoff, t_xsize, t_ysize, t_band_n,
m_band ):
try:
import numpy as Numeric
except ImportError:
import Numeric
s_band = s_fh.GetRasterBand( s_band_n )
t_band = t_fh.GetRasterBand( t_band_n )
data_src = s_band.ReadAsArray( s_xoff, s_yoff, s_xsize, s_ysize,
t_xsize, t_ysize )
data_mask = m_band.ReadAsArray( s_xoff, s_yoff, s_xsize, s_ysize,
t_xsize, t_ysize )
data_dst = t_band.ReadAsArray( t_xoff, t_yoff, t_xsize, t_ysize )
mask_test = Numeric.equal(data_mask, 0)
to_write = Numeric.choose( mask_test, (data_src, data_dst) )
t_band.WriteArray( to_write, t_xoff, t_yoff )
return 0
# =============================================================================
def names_to_fileinfos( names ):
"""
Translate a list of GDAL filenames, into file_info objects.
names -- list of valid GDAL dataset names.
Returns a list of file_info objects. There may be less file_info objects
than names if some of the names could not be opened as GDAL files.
"""
file_infos = []
for name in names:
fi = file_info()
if fi.init_from_name( name ) == 1:
file_infos.append( fi )
return file_infos
# *****************************************************************************
class file_info:
"""A class holding information about a GDAL file."""
def init_from_name(self, filename):
"""
Initialize file_info from filename
filename -- Name of file to read.
Returns 1 on success or 0 if the file can't be opened.
"""
fh = gdal.Open( filename )
if fh is None:
return 0
self.filename = filename
self.bands = fh.RasterCount
self.xsize = fh.RasterXSize
self.ysize = fh.RasterYSize
self.band_type = fh.GetRasterBand(1).DataType
self.projection = fh.GetProjection()
self.geotransform = fh.GetGeoTransform()
self.ulx = self.geotransform[0]
self.uly = self.geotransform[3]
self.lrx = self.ulx + self.geotransform[1] * self.xsize
self.lry = self.uly + self.geotransform[5] * self.ysize
ct = fh.GetRasterBand(1).GetRasterColorTable()
if ct is not None:
self.ct = ct.Clone()
else:
self.ct = None
return 1
def report( self ):
print('Filename: '+ self.filename)
print('File Size: %dx%dx%d'
% (self.xsize, self.ysize, self.bands))
print('Pixel Size: %f x %f'
% (self.geotransform[1],self.geotransform[5]))
print('UL:(%f,%f) LR:(%f,%f)'
% (self.ulx,self.uly,self.lrx,self.lry))
def copy_into( self, t_fh, s_band = 1, t_band = 1, nodata_arg=None ):
"""
Copy this files image into target file.
This method will compute the overlap area of the file_info objects
file, and the target gdal.Dataset object, and copy the image data
for the common window area. It is assumed that the files are in
a compatible projection ... no checking or warping is done. However,
if the destination file is a different resolution, or different
image pixel type, the appropriate resampling and conversions will
be done (using normal GDAL promotion/demotion rules).
t_fh -- gdal.Dataset object for the file into which some or all
of this file may be copied.
Returns 1 on success (or if nothing needs to be copied), and zero one
failure.
"""
t_geotransform = t_fh.GetGeoTransform()
t_ulx = t_geotransform[0]
t_uly = t_geotransform[3]
t_lrx = t_geotransform[0] + t_fh.RasterXSize * t_geotransform[1]
t_lry = t_geotransform[3] + t_fh.RasterYSize * t_geotransform[5]
# figure out intersection region
tgw_ulx = max(t_ulx,self.ulx)
tgw_lrx = min(t_lrx,self.lrx)
if t_geotransform[5] < 0:
tgw_uly = min(t_uly,self.uly)
tgw_lry = max(t_lry,self.lry)
else:
tgw_uly = max(t_uly,self.uly)
tgw_lry = min(t_lry,self.lry)
# do they even intersect?
if tgw_ulx >= tgw_lrx:
return 1
if t_geotransform[5] < 0 and tgw_uly <= tgw_lry:
return 1
if t_geotransform[5] > 0 and tgw_uly >= tgw_lry:
return 1
# compute target window in pixel coordinates.
tw_xoff = int((tgw_ulx - t_geotransform[0]) / t_geotransform[1] + 0.1)
tw_yoff = int((tgw_uly - t_geotransform[3]) / t_geotransform[5] + 0.1)
tw_xsize = int((tgw_lrx - t_geotransform[0])/t_geotransform[1] + 0.5) \
- tw_xoff
tw_ysize = int((tgw_lry - t_geotransform[3])/t_geotransform[5] + 0.5) \
- tw_yoff
if tw_xsize < 1 or tw_ysize < 1:
return 1
# Compute source window in pixel coordinates.
sw_xoff = int((tgw_ulx - self.geotransform[0]) / self.geotransform[1])
sw_yoff = int((tgw_uly - self.geotransform[3]) / self.geotransform[5])
sw_xsize = int((tgw_lrx - self.geotransform[0]) \
/ self.geotransform[1] + 0.5) - sw_xoff
sw_ysize = int((tgw_lry - self.geotransform[3]) \
/ self.geotransform[5] + 0.5) - sw_yoff
if sw_xsize < 1 or sw_ysize < 1:
return 1
# Open the source file, and copy the selected region.
s_fh = gdal.Open( self.filename )
return raster_copy( s_fh, sw_xoff, sw_yoff, sw_xsize, sw_ysize, s_band,
t_fh, tw_xoff, tw_yoff, tw_xsize, tw_ysize, t_band,
nodata_arg )
# =============================================================================
def Usage():
print('Usage: gdal_merge.py [-o out_filename] [-of out_format] [-co NAME=VALUE]*')
print(' [-ps pixelsize_x pixelsize_y] [-tap] [-separate] [-q] [-v] [-pct]')
print(' [-ul_lr ulx uly lrx lry] [-init "value [value...]"]')
print(' [-n nodata_value] [-a_nodata output_nodata_value]')
print(' [-ot datatype] [-createonly] input_files')
print(' [--help-general]')
print('')
# =============================================================================
#
# Program mainline.
#
def main( argv=None ):
global verbose, quiet
verbose = 0
quiet = 0
names = []
format = 'GTiff'
out_file = 'out.tif'
ulx = None
psize_x = None
separate = 0
copy_pct = 0
nodata = None
a_nodata = None
create_options = []
pre_init = []
band_type = None
createonly = 0
bTargetAlignedPixels = False
start_time = time.time()
gdal.AllRegister()
if argv is None:
argv = sys.argv
argv = gdal.GeneralCmdLineProcessor( argv )
if argv is None:
sys.exit( 0 )
# Parse command line arguments.
i = 1
while i < len(argv):
arg = argv[i]
if arg == '-o':
i = i + 1
out_file = argv[i]
elif arg == '-v':
verbose = 1
elif arg == '-q' or arg == '-quiet':
quiet = 1
elif arg == '-createonly':
createonly = 1
elif arg == '-separate':
separate = 1
elif arg == '-seperate':
separate = 1
elif arg == '-pct':
copy_pct = 1
elif arg == '-ot':
i = i + 1
band_type = gdal.GetDataTypeByName( argv[i] )
if band_type == gdal.GDT_Unknown:
print('Unknown GDAL data type: %s' % argv[i])
sys.exit( 1 )
elif arg == '-init':
i = i + 1
str_pre_init = argv[i].split()
for x in str_pre_init:
pre_init.append(float(x))
elif arg == '-n':
i = i + 1
nodata = float(argv[i])
elif arg == '-a_nodata':
i = i + 1
a_nodata = float(argv[i])
elif arg == '-f':
# for backward compatibility.
i = i + 1
format = argv[i]
elif arg == '-of':
i = i + 1
format = argv[i]
elif arg == '-co':
i = i + 1
create_options.append( argv[i] )
elif arg == '-ps':
psize_x = float(argv[i+1])
psize_y = -1 * abs(float(argv[i+2]))
i = i + 2
elif arg == '-tap':
bTargetAlignedPixels = True
elif arg == '-ul_lr':
ulx = float(argv[i+1])
uly = float(argv[i+2])
lrx = float(argv[i+3])
lry = float(argv[i+4])
i = i + 4
elif arg[:1] == '-':
print('Unrecognized command option: %s' % arg)
Usage()
sys.exit( 1 )
else:
names.append(arg)
i = i + 1
if len(names) == 0:
print('No input files selected.')
Usage()
sys.exit( 1 )
Driver = gdal.GetDriverByName(format)
if Driver is None:
print('Format driver %s not found, pick a supported driver.' % format)
sys.exit( 1 )
DriverMD = Driver.GetMetadata()
if 'DCAP_CREATE' not in DriverMD:
print('Format driver %s does not support creation and piecewise writing.\nPlease select a format that does, such as GTiff (the default) or HFA (Erdas Imagine).' % format)
sys.exit( 1 )
# Collect information on all the source files.
file_infos = names_to_fileinfos( names )
if ulx is None:
ulx = file_infos[0].ulx
uly = file_infos[0].uly
lrx = file_infos[0].lrx
lry = file_infos[0].lry
for fi in file_infos:
ulx = min(ulx, fi.ulx)
uly = max(uly, fi.uly)
lrx = max(lrx, fi.lrx)
lry = min(lry, fi.lry)
if psize_x is None:
psize_x = file_infos[0].geotransform[1]
psize_y = file_infos[0].geotransform[5]
if band_type is None:
band_type = file_infos[0].band_type
# Try opening as an existing file.
gdal.PushErrorHandler( 'CPLQuietErrorHandler' )
t_fh = gdal.Open( out_file, gdal.GA_Update )
gdal.PopErrorHandler()
# Create output file if it does not already exist.
if t_fh is None:
if bTargetAlignedPixels:
ulx = math.floor(ulx / psize_x) * psize_x
lrx = math.ceil(lrx / psize_x) * psize_x
lry = math.floor(lry / -psize_y) * -psize_y
uly = math.ceil(uly / -psize_y) * -psize_y
geotransform = [ulx, psize_x, 0, uly, 0, psize_y]
xsize = int((lrx - ulx) / geotransform[1] + 0.5)
ysize = int((lry - uly) / geotransform[5] + 0.5)
if separate != 0:
bands=0
for fi in file_infos:
bands=bands + fi.bands
else:
bands = file_infos[0].bands
t_fh = Driver.Create( out_file, xsize, ysize, bands,
band_type, create_options )
if t_fh is None:
print('Creation failed, terminating gdal_merge.')
sys.exit( 1 )
t_fh.SetGeoTransform( geotransform )
t_fh.SetProjection( file_infos[0].projection )
if copy_pct:
t_fh.GetRasterBand(1).SetRasterColorTable(file_infos[0].ct)
else:
if separate != 0:
bands=0
for fi in file_infos:
bands=bands + fi.bands
if t_fh.RasterCount < bands :
print('Existing output file has less bands than the input files. You should delete it before. Terminating gdal_merge.')
sys.exit( 1 )
else:
bands = min(file_infos[0].bands,t_fh.RasterCount)
# Do we need to set nodata value ?
if a_nodata is not None:
for i in range(t_fh.RasterCount):
t_fh.GetRasterBand(i+1).SetNoDataValue(a_nodata)
# Do we need to pre-initialize the whole mosaic file to some value?
if pre_init is not None:
if t_fh.RasterCount <= len(pre_init):
for i in range(t_fh.RasterCount):
t_fh.GetRasterBand(i+1).Fill( pre_init[i] )
elif len(pre_init) == 1:
for i in range(t_fh.RasterCount):
t_fh.GetRasterBand(i+1).Fill( pre_init[0] )
# Copy data from source files into output file.
t_band = 1
if quiet == 0 and verbose == 0:
progress( 0.0 )
fi_processed = 0
for fi in file_infos:
if createonly != 0:
continue
if verbose != 0:
print("")
print("Processing file %5d of %5d, %6.3f%% completed in %d minutes."
% (fi_processed+1,len(file_infos),
fi_processed * 100.0 / len(file_infos),
int(round((time.time() - start_time)/60.0)) ))
fi.report()
if separate == 0 :
for band in range(1, bands+1):
fi.copy_into( t_fh, band, band, nodata )
else:
for band in range(1, fi.bands+1):
fi.copy_into( t_fh, band, t_band, nodata )
t_band = t_band+1
fi_processed = fi_processed+1
if quiet == 0 and verbose == 0:
progress( fi_processed / float(len(file_infos)) )
# Force file to be closed.
t_fh = None
if __name__ == '__main__':
sys.exit(main())

@ -0,0 +1,285 @@
#==========================================================#
# Create a classifier for satellite images
#==========================================================#
# load modules
import os
import pickle
import warnings
import numpy as np
import matplotlib.cm as cm
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
from pylab import ginput
import SDS_download, SDS_preprocess, SDS_shoreline, SDS_tools, SDS_classification
filepath_sites = os.path.join(os.getcwd(), 'polygons')
sites = os.listdir(filepath_sites)
for site in sites:
polygon = SDS_tools.coords_from_kml(os.path.join(filepath_sites,site))
# load Sentinel-2 images
inputs = {
'polygon': polygon,
'dates': ['2016-10-01', '2016-11-01'],
'sat_list': ['S2'],
'sitename': site[:site.find('.')]
}
satname = inputs['sat_list'][0]
metadata = SDS_download.get_images(inputs)
metadata = SDS_download.remove_cloudy_images(metadata,inputs,0.2)
filepath = os.path.join(os.getcwd(), 'data', inputs['sitename'])
with open(os.path.join(filepath, inputs['sitename'] + '_metadata_' + satname + '.pkl'), 'wb') as f:
pickle.dump(metadata, f)
#with open(os.path.join(filepath, inputs['sitename'] + '_metadata_' + satname + '.pkl'), 'rb') as f:
# metadata = pickle.load(f)
# settings needed to run the shoreline extraction
settings = {
# general parameters:
'cloud_thresh': 0.1, # threshold on maximum cloud cover
'output_epsg': 28356, # epsg code of spatial reference system desired for the output
# shoreline detection parameters:
'min_beach_size': 20, # minimum number of connected pixels for a beach
'buffer_size': 7, # radius (in pixels) of disk for buffer around sandy pixels
'min_length_sl': 200, # minimum length of shoreline perimeter to be kept
'max_dist_ref': 100, # max distance (in meters) allowed from a reference shoreline
# quality control:
'check_detection': True, # if True, shows each shoreline detection and lets the user
# decide which ones are correct and which ones are false due to
# the presence of clouds
# also add the inputs
'inputs': inputs
}
# preprocess images (cloud masking, pansharpening/down-sampling)
SDS_preprocess.preprocess_all_images(metadata, settings)
training_data = dict([])
training_data['sand'] = dict([])
training_data['swash'] = dict([])
training_data['water'] = dict([])
training_data['land'] = dict([])
# read images
filepath = SDS_tools.get_filepath(inputs,satname)
filenames = metadata[satname]['filenames']
for i in range(len(filenames)):
fn = SDS_tools.get_filenames(filenames[i],filepath,satname)
im_ms, georef, cloud_mask, im20, imQA = SDS_preprocess.preprocess_single(fn,satname)
nrow = im_ms.shape[0]
ncol = im_ms.shape[1]
im_RGB = SDS_preprocess.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9)
plt.figure()
mng = plt.get_current_fig_manager()
mng.window.showMaximized()
plt.imshow(im_RGB)
plt.axis('off')
# Digitize sandy pixels
plt.title('Digitize SAND pixels', fontweight='bold', fontsize=15)
pt = ginput(n=1000, timeout=100000, show_clicks=True)
if len(pt) > 0:
pt = np.round(pt).astype(int)
im_sand = np.zeros((nrow,ncol))
for k in range(len(pt)):
im_sand[pt[k,1],pt[k,0]] = 1
im_RGB[pt[k,1],pt[k,0],0] = 1
im_RGB[pt[k,1],pt[k,0],1] = 0
im_RGB[pt[k,1],pt[k,0],2] = 0
im_sand = im_sand.astype(bool)
features = SDS_classification.calculate_features(im_ms, cloud_mask, im_sand)
else:
im_sand = np.zeros((nrow,ncol)).astype(bool)
features = []
training_data['sand'][filenames[i]] = {'pixels':im_sand,'features':features}
# Digitize swash pixels
plt.title('Digitize SWASH pixels', fontweight='bold', fontsize=15)
plt.draw()
pt = ginput(n=1000, timeout=100000, show_clicks=True)
if len(pt) > 0:
pt = np.round(pt).astype(int)
im_swash = np.zeros((nrow,ncol))
for k in range(len(pt)):
im_swash[pt[k,1],pt[k,0]] = 1
im_RGB[pt[k,1],pt[k,0],0] = 0
im_RGB[pt[k,1],pt[k,0],1] = 1
im_RGB[pt[k,1],pt[k,0],2] = 0
im_swash = im_swash.astype(bool)
features = SDS_classification.calculate_features(im_ms, cloud_mask, im_swash)
else:
im_swash = np.zeros((nrow,ncol)).astype(bool)
features = []
training_data['swash'][filenames[i]] = {'pixels':im_swash,'features':features}
# Digitize rectangle containig water pixels
plt.title('Click 2 points to draw a rectange in the WATER', fontweight='bold', fontsize=15)
plt.draw()
pt = ginput(n=2, timeout=100000, show_clicks=True)
if len(pt) > 0:
pt = np.round(pt).astype(int)
idx_row = np.arange(np.min(pt[:,1]),np.max(pt[:,1])+1,1)
idx_col = np.arange(np.min(pt[:,0]),np.max(pt[:,0])+1,1)
xx, yy = np.meshgrid(idx_row,idx_col, indexing='ij')
rows = xx.reshape(xx.shape[0]*xx.shape[1])
cols = yy.reshape(yy.shape[0]*yy.shape[1])
im_water = np.zeros((nrow,ncol)).astype(bool)
for k in range(len(rows)):
im_water[rows[k],cols[k]] = 1
im_RGB[rows[k],cols[k],0] = 0
im_RGB[rows[k],cols[k],1] = 0
im_RGB[rows[k],cols[k],2] = 1
im_water = im_water.astype(bool)
features = SDS_classification.calculate_features(im_ms, cloud_mask, im_water)
else:
im_water = np.zeros((nrow,ncol)).astype(bool)
features = []
training_data['water'][filenames[i]] = {'pixels':im_water,'features':features}
# Digitize rectangle containig land pixels
plt.title('Click 2 points to draw a rectange in the LAND', fontweight='bold', fontsize=15)
plt.draw()
pt = ginput(n=2, timeout=100000, show_clicks=True)
plt.close()
if len(pt) > 0:
pt = np.round(pt).astype(int)
idx_row = np.arange(np.min(pt[:,1]),np.max(pt[:,1])+1,1)
idx_col = np.arange(np.min(pt[:,0]),np.max(pt[:,0])+1,1)
xx, yy = np.meshgrid(idx_row,idx_col, indexing='ij')
rows = xx.reshape(xx.shape[0]*xx.shape[1])
cols = yy.reshape(yy.shape[0]*yy.shape[1])
im_land = np.zeros((nrow,ncol)).astype(bool)
for k in range(len(rows)):
im_land[rows[k],cols[k]] = 1
im_RGB[rows[k],cols[k],0] = 1
im_RGB[rows[k],cols[k],1] = 1
im_RGB[rows[k],cols[k],2] = 0
im_land = im_land.astype(bool)
features = SDS_classification.calculate_features(im_ms, cloud_mask, im_land)
else:
im_land = np.zeros((nrow,ncol)).astype(bool)
features = []
training_data['land'][filenames[i]] = {'pixels':im_land,'features':features}
plt.figure()
plt.title('Classified image')
plt.imshow(im_RGB)
# save training data for each site
filepath = os.path.join(os.getcwd(), 'data', inputs['sitename'])
with open(os.path.join(filepath, inputs['sitename'] + '_training_' + satname + '.pkl'), 'wb') as f:
pickle.dump(training_data, f)
#%%
## load Landsat 5 images
#inputs = {
# 'polygon': polygon,
# 'dates': ['1987-01-01', '1988-01-01'],
# 'sat_list': ['L5'],
# 'sitename': site[:site.find('.')]
# }
#metadata = SDS_download.get_images(inputs)
#
## load Landsat 7 images
#inputs = {
# 'polygon': polygon,
# 'dates': ['2001-01-01', '2002-01-01'],
# 'sat_list': ['L7'],
# 'sitename': site[:site.find('.')]
# }
#metadata = SDS_download.get_images(inputs)
#
## load Landsat 8 images
#inputs = {
# 'polygon': polygon,
# 'dates': ['2014-01-01', '2015-01-01'],
# 'sat_list': ['L8'],
# 'sitename': site[:site.find('.')]
# }
#metadata = SDS_download.get_images(inputs)
#%% clean the Landsat collections
#import ee
#from datetime import datetime, timedelta
#import pytz
#import copy
#ee.Initialize()
#site = sites[0]
#dates = ['2017-12-01', '2017-12-25']
#polygon = SDS_tools.coords_from_kml(os.path.join(filepath_sites,site))
## Landsat collection
#input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
## filter by location and dates
#flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(inputs['dates'][0],inputs['dates'][1])
## get all images in the filtered collection
#im_all = flt_col.getInfo().get('features')
#cloud_cover = [_['properties']['CLOUD_COVER'] for _ in im_all]
#if np.any([_ > 90 for _ in cloud_cover]):
# idx_delete = np.where([_ > 90 for _ in cloud_cover])[0]
# im_all_cloud = [x for k,x in enumerate(im_all) if k not in idx_delete]
#%% clean the S2 collection
#import ee
#from datetime import datetime, timedelta
#import pytz
#import copy
#ee.Initialize()
## Sentinel2 collection
#input_col = ee.ImageCollection('COPERNICUS/S2')
## filter by location and dates
#flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(inputs['dates'][0],inputs['dates'][1])
## get all images in the filtered collection
#im_all = flt_col.getInfo().get('features')
#
## remove duplicates (there are many in S2 collection)
## timestamps
#timestamps = [datetime.fromtimestamp(_['properties']['system:time_start']/1000, tz=pytz.utc) for _ in im_all]
## utm zones
#utm_zones = np.array([int(_['bands'][0]['crs'][5:]) for _ in im_all])
#utm_zone_selected = np.max(np.unique(utm_zones))
#idx_all = np.arange(0,len(im_all),1)
#idx_covered = np.ones(len(im_all)).astype(bool)
#idx_delete = []
#i = 0
#while 1:
# same_time = np.abs([(timestamps[i]-_).total_seconds() for _ in timestamps]) < 60*60*24
# idx_same_time = np.where(same_time)[0]
# same_utm = utm_zones == utm_zone_selected
# idx_temp = np.where([same_time[j] == True and same_utm[j] == False for j in idx_all])[0]
# idx_keep = idx_same_time[[_ not in idx_temp for _ in idx_same_time ]]
# if len(idx_keep) > 2: # if more than 2 images with same date and same utm, drop the last one
# idx_temp = np.append(idx_temp,idx_keep[-1])
# for j in idx_temp:
# idx_delete.append(j)
# idx_covered[idx_same_time] = False
# if np.any(idx_covered):
# i = np.where(idx_covered)[0][0]
# else:
# break
#im_all_updated = [x for k,x in enumerate(im_all) if k not in idx_delete]
#
## remove very cloudy images (>90% cloud)
#cloud_cover = [_['properties']['CLOUDY_PIXEL_PERCENTAGE'] for _ in im_all_updated]
#if np.any([_ > 90 for _ in cloud_cover]):
# idx_delete = np.where([_ > 90 for _ in cloud_cover])[0]
# im_all_cloud = [x for k,x in enumerate(im_all_updated) if k not in idx_delete]

@ -135,7 +135,7 @@
" metadata = pickle.load(f)\n", " metadata = pickle.load(f)\n",
" \n", " \n",
"# [OPTIONAL] saves .jpg files of the preprocessed images (cloud mask and pansharpening/down-sampling) \n", "# [OPTIONAL] saves .jpg files of the preprocessed images (cloud mask and pansharpening/down-sampling) \n",
"#SDS_preprocess.preprocess_all_images(metadata, settings)\n", "#SDS_preprocess.save_jpg(metadata, settings)\n",
"\n", "\n",
"# [OPTIONAL] to avoid false detections and identify obvious outliers there is the option to\n", "# [OPTIONAL] to avoid false detections and identify obvious outliers there is the option to\n",
"# create a reference shoreline position (manually clicking on a satellite image)\n", "# create a reference shoreline position (manually clicking on a satellite image)\n",

@ -8,41 +8,44 @@ import pickle
import warnings import warnings
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import SDS_download, SDS_preprocess, SDS_shoreline
import SDS_download, SDS_preprocess, SDS_shoreline, SDS_tools
# define the area of interest (longitude, latitude) # define the area of interest (longitude, latitude)
polygon = [[[151.301454, -33.700754], polygon = SDS_tools.coords_from_kml('NARRA.kml')
[151.311453, -33.702075],
[151.307237, -33.739761],
[151.294220, -33.736329],
[151.301454, -33.700754]]]
# define dates of interest # define dates of interest
dates = ['2017-12-01', '2018-01-01'] dates = ['2015-01-01', '2019-01-01']
# define satellite missions # define satellite missions
sat_list = ['L5', 'L7', 'L8', 'S2'] sat_list = ['S2']
# give a name to the site # give a name to the site
sitename = 'NARRA' sitename = 'NARRA'
# put all the inputs into a dictionnary
inputs = {
'polygon': polygon,
'dates': dates,
'sat_list': sat_list,
'sitename': sitename
}
# download satellite images (also saves metadata.pkl) # download satellite images (also saves metadata.pkl)
#SDS_download.get_images(sitename, polygon, dates, sat_list) metadata = SDS_download.get_images(inputs)
# load metadata structure (contains information on the downloaded satellite images and is created # if you have already downloaded the images, just load the metadata file
# after all images have been successfully downloaded)
filepath = os.path.join(os.getcwd(), 'data', sitename) filepath = os.path.join(os.getcwd(), 'data', sitename)
with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'rb') as f: with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'rb') as f:
metadata = pickle.load(f) metadata = pickle.load(f)
# parameters and settings #%%
# settings needed to run the shoreline extraction
settings = { settings = {
'sitename': sitename,
# general parameters: # general parameters:
'cloud_thresh': 0.5, # threshold on maximum cloud cover 'cloud_thresh': 0.2, # threshold on maximum cloud cover
'output_epsg': 28356, # epsg code of the desired output spatial reference system 'output_epsg': 28356, # epsg code of spatial reference system desired for the output
# shoreline detection parameters: # shoreline detection parameters:
'min_beach_size': 20, # minimum number of connected pixels for a beach 'min_beach_size': 20, # minimum number of connected pixels for a beach
@ -51,30 +54,34 @@ settings = {
'max_dist_ref': 100, # max distance (in meters) allowed from a reference shoreline 'max_dist_ref': 100, # max distance (in meters) allowed from a reference shoreline
# quality control: # quality control:
'check_detection': True # if True, shows each shoreline detection and lets the user 'check_detection': True, # if True, shows each shoreline detection and lets the user
# decide which ones are correct and which ones are false due to # decide which ones are correct and which ones are false due to
# the presence of clouds # the presence of clouds
# also add the inputs
'inputs': inputs
} }
# preprocess images (cloud masking, pansharpening/down-sampling) # preprocess images (cloud masking, pansharpening/down-sampling)
SDS_preprocess.preprocess_all_images(metadata, settings) #SDS_preprocess.save_jpg(metadata, settings)
# create a reference shoreline (used to identify outliers and false detections) # create a reference shoreline (helps to identify outliers and false detections)
settings['refsl'] = SDS_preprocess.get_reference_sl(metadata, settings) settings['refsl'] = SDS_preprocess.get_reference_sl_manual(metadata, settings)
#settings['refsl'] = SDS_preprocess.get_reference_sl_Australia(settings)
# extract shorelines from all images (also saves output.pkl) # extract shorelines from all images (also saves output.pkl)
out = SDS_shoreline.extract_shorelines(metadata, settings) output = SDS_shoreline.extract_shorelines(metadata, settings)
# plot shorelines # plot shorelines
plt.figure() plt.figure()
plt.axis('equal') plt.axis('equal')
plt.xlabel('Eastings [m]') plt.xlabel('Eastings [m]')
plt.ylabel('Northings [m]') plt.ylabel('Northings [m]')
for satname in out.keys(): for satname in output.keys():
if satname == 'meta': if satname == 'meta':
continue continue
for i in range(len(out[satname]['shoreline'])): for i in range(len(output[satname]['shoreline'])):
sl = out[satname]['shoreline'][i] sl = output[satname]['shoreline'][i]
date = out[satname]['timestamp'][i] date = output[satname]['timestamp'][i]
plt.plot(sl[:, 0], sl[:, 1], '-', label=date.strftime('%d-%m-%Y')) plt.plot(sl[:, 0], sl[:, 1], '.', label=date.strftime('%d-%m-%Y'))
plt.legend() plt.legend()
Loading…
Cancel
Save