You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
geetools_VH/SDS_download.py

799 lines
34 KiB
Python

6 years ago
"""This module contains all the functions needed to download the satellite images from the Google
Earth Engine Server
Author: Kilian Vos, Water Research Laboratory, University of New South Wales
"""
6 years ago
# load modules
import os
import numpy as np
import matplotlib.pyplot as plt
import pdb
6 years ago
# earth engine modules
import ee
from urllib.request import urlretrieve
6 years ago
import zipfile
import copy
import gdal_merge
# additional modules
from datetime import datetime
import pytz
import pickle
6 years ago
import skimage.morphology as morphology
# own modules
import SDS_preprocess, SDS_tools
np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
# initialise connection with GEE server
ee.Initialize()
def download_tif(image, polygon, bandsId, filepath):
"""
Downloads a .TIF image from the ee server and stores it in a temp file
Arguments:
-----------
image: ee.Image
Image object to be downloaded
polygon: list
polygon containing the lon/lat coordinates to be extracted
longitudes in the first column and latitudes in the second column
bandsId: list of dict
list of bands to be downloaded
filepath: location where the temporary file should be saved
"""
url = ee.data.makeDownloadUrl(ee.data.getDownloadId({
'image': image.serialize(),
'region': polygon,
'bands': bandsId,
'filePerBand': 'false',
'name': 'data',
}))
local_zip, headers = urlretrieve(url)
with zipfile.ZipFile(local_zip) as local_zipfile:
return local_zipfile.extract('data.tif', filepath)
6 years ago
def get_images(inputs):
"""
6 years ago
Downloads all images from Landsat 5, Landsat 7, Landsat 8 and Sentinel-2 covering the area of
interest and acquired between the specified dates.
The downloaded images are in .TIF format and organised in subfolders, divided by satellite
mission and pixel resolution.
KV WRL 2018
Arguments:
-----------
6 years ago
inputs: dict
dictionnary that contains the following fields:
'sitename': str
String containig the name of the site
6 years ago
'polygon': list
polygon containing the lon/lat coordinates to be extracted
longitudes in the first column and latitudes in the second column
6 years ago
'dates': list of str
list that contains 2 strings with the initial and final dates in format 'yyyy-mm-dd'
e.g. ['1987-01-01', '2018-01-01']
6 years ago
'sat_list': list of str
list that contains the names of the satellite missions to include
e.g. ['L5', 'L7', 'L8', 'S2']
6 years ago
Returns:
-----------
metadata: dict
contains all the information about the satellite images that were downloaded
"""
6 years ago
# read inputs dictionnary
sitename = inputs['sitename']
polygon = inputs['polygon']
dates = inputs['dates']
sat_list= inputs['sat_list']
# format in which the images are downloaded
suffix = '.tif'
6 years ago
# initialize metadata dictionnary (stores timestamps and georefencing accuracy of each image)
metadata = dict([])
# create directories
try:
os.makedirs(os.path.join(os.getcwd(), 'data',sitename))
except:
print('')
#=============================================================================================#
# download L5 images
#=============================================================================================#
6 years ago
if 'L5' in sat_list or 'Landsat5' in sat_list:
satname = 'L5'
# create a subfolder to store L5 images
filepath = os.path.join(os.getcwd(), 'data', sitename, satname, '30m')
try:
os.makedirs(filepath)
except:
print('')
# Landsat 5 collection
input_col = ee.ImageCollection('LANDSAT/LT05/C01/T1_TOA')
# filter by location and dates
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
# get all images in the filtered collection
im_all = flt_col.getInfo().get('features')
6 years ago
# remove very cloudy images (>95% cloud)
cloud_cover = [_['properties']['CLOUD_COVER'] for _ in im_all]
if np.any([_ > 95 for _ in cloud_cover]):
idx_delete = np.where([_ > 95 for _ in cloud_cover])[0]
im_all_cloud = [x for k,x in enumerate(im_all) if k not in idx_delete]
else:
im_all_cloud = im_all
n_img = len(im_all_cloud)
# print how many images there are
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
# loop trough images
timestamps = []
acc_georef = []
6 years ago
filenames = []
all_names = []
im_epsg = []
for i in range(n_img):
# find each image in ee database
6 years ago
im = ee.Image(im_all_cloud[i].get('id'))
# read metadata
im_dic = im.getInfo()
# get bands
im_bands = im_dic.get('bands')
# get time of acquisition (UNIX time)
t = im_dic['properties']['system:time_start']
# convert to datetime
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
timestamps.append(im_timestamp)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
# get EPSG code of reference system
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
# get geometric accuracy
try:
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
except:
# default value of accuracy (RMSE = 12m)
acc_georef.append(12)
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for L5
ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[7]]
# filenames for the images
filename = im_date + '_' + satname + '_' + sitename + suffix
6 years ago
# if two images taken at the same date add 'dup' in the name (duplicate)
if any(filename in _ for _ in all_names):
filename = im_date + '_' + satname + '_' + sitename + '_dup' + suffix
all_names.append(filename)
6 years ago
filenames.append(filename)
# download .TIF image
local_data = download_tif(im, polygon, ms_bands, filepath)
# update filename
6 years ago
try:
os.rename(local_data, os.path.join(filepath, filename))
except:
os.remove(os.path.join(filepath, filename))
os.rename(local_data, os.path.join(filepath, filename))
print(i+1, end='..')
6 years ago
# sort timestamps and georef accuracy (downloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
6 years ago
filenames_sorted = [filenames[j] for j in idx_sorted]
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
# save into dict
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
6 years ago
'epsg':im_epsg_sorted, 'filenames':filenames_sorted}
print('\nFinished with ' + satname)
#=============================================================================================#
# download L7 images
#=============================================================================================#
6 years ago
if 'L7' in sat_list or 'Landsat7' in sat_list:
satname = 'L7'
# create subfolders (one for 30m multispectral bands and one for 15m pan bands)
filepath = os.path.join(os.getcwd(), 'data', sitename, 'L7')
filepath_pan = os.path.join(filepath, 'pan')
filepath_ms = os.path.join(filepath, 'ms')
try:
os.makedirs(filepath_pan)
os.makedirs(filepath_ms)
except:
print('')
# landsat 7 collection
input_col = ee.ImageCollection('LANDSAT/LE07/C01/T1_RT_TOA')
# filter by location and dates
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
# get all images in the filtered collection
im_all = flt_col.getInfo().get('features')
6 years ago
# remove very cloudy images (>95% cloud)
cloud_cover = [_['properties']['CLOUD_COVER'] for _ in im_all]
if np.any([_ > 95 for _ in cloud_cover]):
idx_delete = np.where([_ > 95 for _ in cloud_cover])[0]
im_all_cloud = [x for k,x in enumerate(im_all) if k not in idx_delete]
else:
im_all_cloud = im_all
n_img = len(im_all_cloud)
# print how many images there are
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
# loop trough images
timestamps = []
acc_georef = []
6 years ago
filenames = []
all_names = []
im_epsg = []
for i in range(n_img):
# find each image in ee database
6 years ago
im = ee.Image(im_all_cloud[i].get('id'))
# read metadata
im_dic = im.getInfo()
# get bands
im_bands = im_dic.get('bands')
# get time of acquisition (UNIX time)
t = im_dic['properties']['system:time_start']
# convert to datetime
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
timestamps.append(im_timestamp)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
# get EPSG code of reference system
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
# get geometric accuracy
try:
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
except:
# default value of accuracy (RMSE = 12m)
acc_georef.append(12)
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for L7
pan_band = [im_bands[8]]
ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[9]]
# filenames for the images
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
# if two images taken at the same date add 'dup' in the name
if any(filename_pan in _ for _ in all_names):
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix
6 years ago
all_names.append(filename_pan)
filenames.append(filename_pan)
# download .TIF image
local_data_pan = download_tif(im, polygon, pan_band, filepath_pan)
local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms)
# update filename
6 years ago
try:
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
except:
os.remove(os.path.join(filepath_pan, filename_pan))
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
try:
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
except:
os.remove(os.path.join(filepath_ms, filename_ms))
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
print(i+1, end='..')
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
6 years ago
filenames_sorted = [filenames[j] for j in idx_sorted]
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
# save into dict
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
6 years ago
'epsg':im_epsg_sorted, 'filenames':filenames_sorted}
print('\nFinished with ' + satname)
#=============================================================================================#
# download L8 images
#=============================================================================================#
6 years ago
if 'L8' in sat_list or 'Landsat8' in sat_list:
satname = 'L8'
# create subfolders (one for 30m multispectral bands and one for 15m pan bands)
filepath = os.path.join(os.getcwd(), 'data', sitename, 'L8')
filepath_pan = os.path.join(filepath, 'pan')
filepath_ms = os.path.join(filepath, 'ms')
try:
os.makedirs(filepath_pan)
os.makedirs(filepath_ms)
except:
print('')
# landsat 8 collection
input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
# filter by location and dates
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
# get all images in the filtered collection
im_all = flt_col.getInfo().get('features')
6 years ago
# remove very cloudy images (>95% cloud)
cloud_cover = [_['properties']['CLOUD_COVER'] for _ in im_all]
if np.any([_ > 95 for _ in cloud_cover]):
idx_delete = np.where([_ > 95 for _ in cloud_cover])[0]
im_all_cloud = [x for k,x in enumerate(im_all) if k not in idx_delete]
else:
im_all_cloud = im_all
n_img = len(im_all_cloud)
# print how many images there are
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
# loop trough images
timestamps = []
acc_georef = []
6 years ago
filenames = []
all_names = []
im_epsg = []
for i in range(n_img):
# find each image in ee database
6 years ago
im = ee.Image(im_all_cloud[i].get('id'))
# read metadata
im_dic = im.getInfo()
# get bands
im_bands = im_dic.get('bands')
# get time of acquisition (UNIX time)
t = im_dic['properties']['system:time_start']
# convert to datetime
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
timestamps.append(im_timestamp)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
# get EPSG code of reference system
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
# get geometric accuracy
try:
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
except:
# default value of accuracy (RMSE = 12m)
acc_georef.append(12)
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for L8
pan_band = [im_bands[7]]
ms_bands = [im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[5], im_bands[11]]
# filenames for the images
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
# if two images taken at the same date add 'dup' in the name
if any(filename_pan in _ for _ in all_names):
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix
6 years ago
all_names.append(filename_pan)
filenames.append(filename_pan)
# download .TIF image
local_data_pan = download_tif(im, polygon, pan_band, filepath_pan)
local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms)
# update filename
6 years ago
try:
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
except:
os.remove(os.path.join(filepath_pan, filename_pan))
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
try:
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
except:
os.remove(os.path.join(filepath_ms, filename_ms))
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
print(i+1, end='..')
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
6 years ago
filenames_sorted = [filenames[j] for j in idx_sorted]
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
6 years ago
'epsg':im_epsg_sorted, 'filenames':filenames_sorted}
print('\nFinished with ' + satname)
#=============================================================================================#
# download S2 images
#=============================================================================================#
6 years ago
if 'S2' in sat_list or 'Sentinel2' in sat_list:
satname = 'S2'
# create subfolders for the 10m, 20m and 60m multipectral bands
filepath = os.path.join(os.getcwd(), 'data', sitename, 'S2')
try:
os.makedirs(os.path.join(filepath, '10m'))
os.makedirs(os.path.join(filepath, '20m'))
os.makedirs(os.path.join(filepath, '60m'))
except:
print('')
# Sentinel2 collection
input_col = ee.ImageCollection('COPERNICUS/S2')
# filter by location and dates
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
# get all images in the filtered collection
6 years ago
im_all = flt_col.getInfo().get('features')
# remove duplicates in the collection (there are many in S2 collection)
timestamps = [datetime.fromtimestamp(_['properties']['system:time_start']/1000,
tz=pytz.utc) for _ in im_all]
# utm zone projection
utm_zones = np.array([int(_['bands'][0]['crs'][5:]) for _ in im_all])
utm_zone_selected = np.max(np.unique(utm_zones))
# find the images that were acquired at the same time but have different utm zones
idx_all = np.arange(0,len(im_all),1)
idx_covered = np.ones(len(im_all)).astype(bool)
idx_delete = []
i = 0
while 1:
same_time = np.abs([(timestamps[i]-_).total_seconds() for _ in timestamps]) < 60*60*24
idx_same_time = np.where(same_time)[0]
same_utm = utm_zones == utm_zone_selected
idx_temp = np.where([same_time[j] == True and same_utm[j] == False for j in idx_all])[0]
idx_keep = idx_same_time[[_ not in idx_temp for _ in idx_same_time ]]
# if more than 2 images with same date and same utm, drop the last ones
if len(idx_keep) > 2:
idx_temp = np.append(idx_temp,idx_keep[-(len(idx_keep)-2):])
for j in idx_temp:
idx_delete.append(j)
idx_covered[idx_same_time] = False
if np.any(idx_covered):
i = np.where(idx_covered)[0][0]
else:
break
# update the collection by deleting all those images that have same timestamp and different
# utm projection
im_all_updated = [x for k,x in enumerate(im_all) if k not in idx_delete]
# remove very cloudy images (>95% cloud)
cloud_cover = [_['properties']['CLOUDY_PIXEL_PERCENTAGE'] for _ in im_all_updated]
if np.any([_ > 95 for _ in cloud_cover]):
idx_delete = np.where([_ > 95 for _ in cloud_cover])[0]
im_all_cloud = [x for k,x in enumerate(im_all_updated) if k not in idx_delete]
else:
im_all_cloud = im_all_updated
n_img = len(im_all_cloud)
# print how many images there are
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
# loop trough images
timestamps = []
acc_georef = []
6 years ago
filenames = []
all_names = []
im_epsg = []
for i in range(n_img):
# find each image in ee database
6 years ago
im = ee.Image(im_all_cloud[i].get('id'))
# read metadata
im_dic = im.getInfo()
# get bands
im_bands = im_dic.get('bands')
# get time of acquisition (UNIX time)
t = im_dic['properties']['system:time_start']
# convert to datetime
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for S2
bands10 = [im_bands[1], im_bands[2], im_bands[3], im_bands[7]]
bands20 = [im_bands[11]]
bands60 = [im_bands[15]]
# filenames for images
filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + suffix
filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + suffix
filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + suffix
# if two images taken at the same date skip the second image (they are the same)
if any(filename10 in _ for _ in all_names):
6 years ago
filename10 = filename10[:filename10.find('.')] + '_dup' + suffix
filename20 = filename20[:filename20.find('.')] + '_dup' + suffix
filename60 = filename60[:filename60.find('.')] + '_dup' + suffix
all_names.append(filename10)
6 years ago
filenames.append(filename10)
# download .TIF image and update filename
local_data = download_tif(im, polygon, bands10, os.path.join(filepath, '10m'))
6 years ago
try:
os.rename(local_data, os.path.join(filepath, '10m', filename10))
except:
os.remove(os.path.join(filepath, '10m', filename10))
os.rename(local_data, os.path.join(filepath, '10m', filename10))
local_data = download_tif(im, polygon, bands20, os.path.join(filepath, '20m'))
6 years ago
try:
os.rename(local_data, os.path.join(filepath, '20m', filename20))
except:
os.remove(os.path.join(filepath, '20m', filename20))
os.rename(local_data, os.path.join(filepath, '20m', filename20))
local_data = download_tif(im, polygon, bands60, os.path.join(filepath, '60m'))
6 years ago
try:
os.rename(local_data, os.path.join(filepath, '60m', filename60))
except:
os.remove(os.path.join(filepath, '60m', filename60))
os.rename(local_data, os.path.join(filepath, '60m', filename60))
# save timestamp, epsg code and georeferencing accuracy (1 if passed 0 if not passed)
timestamps.append(im_timestamp)
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
6 years ago
# Sentinel-2 products don't provide a georeferencing accuracy (RMSE as in Landsat)
# but they have a flag indicating if the geometric quality control was passed or failed
# if passed a value of 1 is stored if faile a value of -1 is stored in the metadata
try:
if im_dic['properties']['GEOMETRIC_QUALITY_FLAG'] == 'PASSED':
acc_georef.append(1)
else:
6 years ago
acc_georef.append(-1)
except:
6 years ago
acc_georef.append(-1)
print(i+1, end='..')
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
6 years ago
filenames_sorted = [filenames[j] for j in idx_sorted]
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
6 years ago
'epsg':im_epsg_sorted, 'filenames':filenames_sorted}
print('\nFinished with ' + satname)
# merge overlapping images (only if polygon is at the edge of an image)
if 'S2' in metadata.keys():
metadata = merge_overlapping_images(metadata,inputs)
# save metadata dict
filepath = os.path.join(os.getcwd(), 'data', sitename)
with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'wb') as f:
6 years ago
pickle.dump(metadata, f)
return metadata
def merge_overlapping_images(metadata,inputs):
"""
When the area of interest is located at the boundary between 2 images, there will be overlap
between the 2 images and both will be downloaded from Google Earth Engine. This function
merges the 2 images, so that the area of interest is covered by only 1 image.
KV WRL 2018
Arguments:
-----------
metadata: dict
contains all the information about the satellite images that were downloaded
inputs: dict
dictionnary that contains the following fields:
'sitename': str
String containig the name of the site
'polygon': list
polygon containing the lon/lat coordinates to be extracted
longitudes in the first column and latitudes in the second column
'dates': list of str
list that contains 2 strings with the initial and final dates in format 'yyyy-mm-dd'
e.g. ['1987-01-01', '2018-01-01']
'sat_list': list of str
list that contains the names of the satellite missions to include
e.g. ['L5', 'L7', 'L8', 'S2']
Returns:
-----------
metadata: dict
updated metadata with the information of the merged images
"""
# only for Sentinel-2 at this stage (could be implemented for Landsat as well)
sat = 'S2'
filepath = os.path.join(os.getcwd(), 'data', inputs['sitename'])
# find the images that are overlapping (same date in S2 filenames)
filenames = metadata[sat]['filenames']
filenames_copy = filenames.copy()
# loop through all the filenames and find the pairs of overlapping images (same date and time of acquisition)
pairs = []
for i,fn in enumerate(filenames):
filenames_copy[i] = []
# find duplicate
boolvec = [fn[:22] == _[:22] for _ in filenames_copy]
if np.any(boolvec):
idx_dup = np.where(boolvec)[0][0]
if len(filenames[i]) > len(filenames[idx_dup]):
pairs.append([idx_dup,i])
else:
pairs.append([i,idx_dup])
msg = 'Merging %d pairs of overlapping images...' % len(pairs)
print(msg)
# for each pair of images, merge them into one complete image
for i,pair in enumerate(pairs):
print(i+1, end='..')
fn_im = []
for index in range(len(pair)):
# read image
fn_im.append([os.path.join(filepath, 'S2', '10m', filenames[pair[index]]),
os.path.join(filepath, 'S2', '20m', filenames[pair[index]].replace('10m','20m')),
os.path.join(filepath, 'S2', '60m', filenames[pair[index]].replace('10m','60m'))])
im_ms, georef, cloud_mask, im_extra, imQA = SDS_preprocess.preprocess_single(fn_im[index], sat)
# in Sentinel2 images close to the edge of the image there are some artefacts,
# that are squares with constant pixel intensities. They need to be masked in the
# raster (GEOTIFF). It can be done using the image standard deviation, which
# indicates values close to 0 for the artefacts.
# First mask the 10m bands
if len(im_ms) > 0:
im_std = SDS_tools.image_std(im_ms[:,:,0],1)
im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std))
mask = morphology.dilation(im_binary, morphology.square(3))
for k in range(im_ms.shape[2]):
im_ms[mask,k] = np.nan
SDS_tools.mask_raster(fn_im[index][0], mask)
# Then mask the 20m band
im_std = SDS_tools.image_std(im_extra,1)
im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std))
mask = morphology.dilation(im_binary, morphology.square(3))
im_extra[mask] = np.nan
SDS_tools.mask_raster(fn_im[index][1], mask)
else:
continue
# make a figure for quality control
# plt.figure()
# plt.subplot(221)
# plt.imshow(im_ms[:,:,[2,1,0]])
# plt.title('imRGB')
# plt.subplot(222)
# plt.imshow(im20, cmap='gray')
# plt.title('im20')
# plt.subplot(223)
# plt.imshow(imQA, cmap='gray')
# plt.title('imQA')
# plt.subplot(224)
# plt.title(fn_im[index][0][-30:])
# merge masked 10m bands
fn_merged = os.path.join(os.getcwd(), 'merged.tif')
gdal_merge.main(['', '-o', fn_merged, '-n', '0', fn_im[0][0], fn_im[1][0]])
os.chmod(fn_im[0][0], 0o777)
os.remove(fn_im[0][0])
os.chmod(fn_im[1][0], 0o777)
os.remove(fn_im[1][0])
os.rename(fn_merged, fn_im[0][0])
# merge masked 20m band (SWIR band)
fn_merged = os.path.join(os.getcwd(), 'merged.tif')
gdal_merge.main(['', '-o', fn_merged, '-n', '0', fn_im[0][1], fn_im[1][1]])
os.chmod(fn_im[0][1], 0o777)
os.remove(fn_im[0][1])
os.chmod(fn_im[1][1], 0o777)
os.remove(fn_im[1][1])
os.rename(fn_merged, fn_im[0][1])
# merge QA band (60m band)
fn_merged = os.path.join(os.getcwd(), 'merged.tif')
gdal_merge.main(['', '-o', fn_merged, '-n', 'nan', fn_im[0][2], fn_im[1][2]])
os.chmod(fn_im[0][2], 0o777)
os.remove(fn_im[0][2])
os.chmod(fn_im[1][2], 0o777)
os.remove(fn_im[1][2])
os.rename(fn_merged, fn_im[0][2])
# update the metadata dict (delete all the duplicates)
metadata2 = copy.deepcopy(metadata)
filenames_copy = metadata2[sat]['filenames']
index_list = []
for i in range(len(filenames_copy)):
if filenames_copy[i].find('dup') == -1:
index_list.append(i)
for key in metadata2[sat].keys():
metadata2[sat][key] = [metadata2[sat][key][_] for _ in index_list]
return metadata2
def remove_cloudy_images(metadata,inputs,cloud_thresh):
"""
Deletes the .TIF file of images that have a cloud cover percentage that is above the cloud
threshold.
KV WRL 2018
Arguments:
-----------
metadata: dict
contains all the information about the satellite images that were downloaded
inputs: dict
dictionnary that contains the following fields:
'sitename': str
String containig the name of the site
'polygon': list
polygon containing the lon/lat coordinates to be extracted
longitudes in the first column and latitudes in the second column
'dates': list of str
list that contains 2 strings with the initial and final dates in format 'yyyy-mm-dd'
e.g. ['1987-01-01', '2018-01-01']
'sat_list': list of str
list that contains the names of the satellite missions to include
e.g. ['L5', 'L7', 'L8', 'S2']
cloud_thresh: float
value between 0 and 1 indicating the maximum cloud fraction in the image that is accepted
Returns:
-----------
metadata: dict
updated metadata with the information of the merged images
"""
# create a deep copy
metadata2 = copy.deepcopy(metadata)
for satname in metadata.keys():
# get the image filenames
filepath = SDS_tools.get_filepath(inputs,satname)
filenames = metadata[satname]['filenames']
# loop through images
idx_good = []
for i in range(len(filenames)):
# image filename
fn = SDS_tools.get_filenames(filenames[i],filepath, satname)
# preprocess image (cloud mask + pansharpening/downsampling)
im_ms, georef, cloud_mask, im_extra, imQA = SDS_preprocess.preprocess_single(fn, satname)
# calculate cloud cover
cloud_cover = np.divide(sum(sum(cloud_mask.astype(int))),
(cloud_mask.shape[0]*cloud_mask.shape[1]))
# skip image if cloud cover is above threshold
if cloud_cover > cloud_thresh or cloud_cover == 1:
# remove image files
if satname == 'L5':
os.chmod(fn, 0o777)
os.remove(fn)
else:
for j in range(len(fn)):
os.chmod(fn[j], 0o777)
os.remove(fn[j])
else:
idx_good.append(i)
msg = '\n%d cloudy images were removed for %s.' % (len(filenames)-len(idx_good), satname)
print(msg)
# update the metadata dict (delete all cloudy images)
for key in metadata2[satname].keys():
metadata2[satname][key] = [metadata2[satname][key][_] for _ in idx_good]
return metadata2