You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
432 lines
19 KiB
Python
432 lines
19 KiB
Python
"""This module contains all the functions needed to download the satellite images from GEE
|
|
|
|
Author: Kilian Vos, Water Research Laboratory, University of New South Wales
|
|
"""
|
|
|
|
# Initial settings
|
|
import os
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import pdb
|
|
import ee
|
|
from urllib.request import urlretrieve
|
|
from datetime import datetime
|
|
import pytz
|
|
import pickle
|
|
import zipfile
|
|
|
|
# initialise connection with GEE server
|
|
ee.Initialize()
|
|
|
|
# Functions
|
|
|
|
def download_tif(image, polygon, bandsId, filepath):
|
|
"""
|
|
Downloads a .TIF image from the ee server and stores it in a temp file
|
|
|
|
Arguments:
|
|
-----------
|
|
image: ee.Image
|
|
Image object to be downloaded
|
|
polygon: list
|
|
polygon containing the lon/lat coordinates to be extracted
|
|
longitudes in the first column and latitudes in the second column
|
|
bandsId: list of dict
|
|
list of bands to be downloaded
|
|
filepath: location where the temporary file should be saved
|
|
|
|
"""
|
|
|
|
url = ee.data.makeDownloadUrl(ee.data.getDownloadId({
|
|
'image': image.serialize(),
|
|
'region': polygon,
|
|
'bands': bandsId,
|
|
'filePerBand': 'false',
|
|
'name': 'data',
|
|
}))
|
|
local_zip, headers = urlretrieve(url)
|
|
with zipfile.ZipFile(local_zip) as local_zipfile:
|
|
return local_zipfile.extract('data.tif', filepath)
|
|
|
|
|
|
def get_images(sitename,polygon,dates,sat):
|
|
"""
|
|
Downloads all images from Landsat 5, Landsat 7, Landsat 8 and Sentinel-2 covering the given
|
|
polygon and acquired during the given dates. The images are organised in subfolders and divided
|
|
by satellite mission and pixel resolution.
|
|
|
|
KV WRL 2018
|
|
|
|
Arguments:
|
|
-----------
|
|
sitename: str
|
|
String containig the name of the site
|
|
polygon: list
|
|
polygon containing the lon/lat coordinates to be extracted
|
|
longitudes in the first column and latitudes in the second column
|
|
dates: list of str
|
|
list that contains 2 strings with the initial and final dates in format 'yyyy-mm-dd'
|
|
e.g. ['1987-01-01', '2018-01-01']
|
|
sat: list of str
|
|
list that contains the names of the satellite missions to include
|
|
e.g. ['L5', 'L7', 'L8', 'S2']
|
|
|
|
"""
|
|
|
|
# format in which the images are downloaded
|
|
suffix = '.tif'
|
|
|
|
# initialise metadata dictionnary (stores timestamps and georefencing accuracy of each image)
|
|
metadata = dict([])
|
|
|
|
# create directories
|
|
try:
|
|
os.makedirs(os.path.join(os.getcwd(), 'data',sitename))
|
|
except:
|
|
print('')
|
|
|
|
#=============================================================================================#
|
|
# download L5 images
|
|
#=============================================================================================#
|
|
|
|
if 'L5' in sat or 'Landsat5' in sat:
|
|
|
|
satname = 'L5'
|
|
# create a subfolder to store L5 images
|
|
filepath = os.path.join(os.getcwd(), 'data', sitename, satname, '30m')
|
|
try:
|
|
os.makedirs(filepath)
|
|
except:
|
|
print('')
|
|
|
|
# Landsat 5 collection
|
|
input_col = ee.ImageCollection('LANDSAT/LT05/C01/T1_TOA')
|
|
# filter by location and dates
|
|
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
|
|
# get all images in the filtered collection
|
|
im_all = flt_col.getInfo().get('features')
|
|
# print how many images there are for the user
|
|
n_img = flt_col.size().getInfo()
|
|
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
|
|
|
|
# loop trough images
|
|
timestamps = []
|
|
acc_georef = []
|
|
all_names = []
|
|
im_epsg = []
|
|
for i in range(n_img):
|
|
|
|
# find each image in ee database
|
|
im = ee.Image(im_all[i].get('id'))
|
|
# read metadata
|
|
im_dic = im.getInfo()
|
|
# get bands
|
|
im_bands = im_dic.get('bands')
|
|
# get time of acquisition (UNIX time)
|
|
t = im_dic['properties']['system:time_start']
|
|
# convert to datetime
|
|
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
|
|
timestamps.append(im_timestamp)
|
|
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
|
|
# get EPSG code of reference system
|
|
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
|
|
# get geometric accuracy
|
|
try:
|
|
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
|
|
except:
|
|
# default value of accuracy (RMSE = 12m)
|
|
acc_georef.append(12)
|
|
print('No geometric rmse model property')
|
|
# delete dimensions key from dictionnary, otherwise the entire image is extracted
|
|
for j in range(len(im_bands)): del im_bands[j]['dimensions']
|
|
# bands for L5
|
|
ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[7]]
|
|
# filenames for the images
|
|
filename = im_date + '_' + satname + '_' + sitename + suffix
|
|
# if two images taken at the same date add 'dup' in the name
|
|
if any(filename in _ for _ in all_names):
|
|
filename = im_date + '_' + satname + '_' + sitename + '_dup' + suffix
|
|
all_names.append(filename)
|
|
# download .TIF image
|
|
local_data = download_tif(im, polygon, ms_bands, filepath)
|
|
# update filename
|
|
os.rename(local_data, os.path.join(filepath, filename))
|
|
print(i, end='..')
|
|
|
|
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
|
|
timestamps_sorted = sorted(timestamps)
|
|
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
|
|
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
|
|
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
|
|
# save into dict
|
|
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
|
|
'epsg':im_epsg_sorted}
|
|
print('Finished with ' + satname)
|
|
|
|
|
|
|
|
#=============================================================================================#
|
|
# download L7 images
|
|
#=============================================================================================#
|
|
|
|
if 'L7' in sat or 'Landsat7' in sat:
|
|
|
|
satname = 'L7'
|
|
# create subfolders (one for 30m multispectral bands and one for 15m pan bands)
|
|
filepath = os.path.join(os.getcwd(), 'data', sitename, 'L7')
|
|
filepath_pan = os.path.join(filepath, 'pan')
|
|
filepath_ms = os.path.join(filepath, 'ms')
|
|
try:
|
|
os.makedirs(filepath_pan)
|
|
os.makedirs(filepath_ms)
|
|
except:
|
|
print('')
|
|
|
|
# landsat 7 collection
|
|
input_col = ee.ImageCollection('LANDSAT/LE07/C01/T1_RT_TOA')
|
|
# filter by location and dates
|
|
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
|
|
# get all images in the filtered collection
|
|
im_all = flt_col.getInfo().get('features')
|
|
# print how many images there are for the user
|
|
n_img = flt_col.size().getInfo()
|
|
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
|
|
|
|
# loop trough images
|
|
timestamps = []
|
|
acc_georef = []
|
|
all_names = []
|
|
im_epsg = []
|
|
for i in range(n_img):
|
|
|
|
# find each image in ee database
|
|
im = ee.Image(im_all[i].get('id'))
|
|
# read metadata
|
|
im_dic = im.getInfo()
|
|
# get bands
|
|
im_bands = im_dic.get('bands')
|
|
# get time of acquisition (UNIX time)
|
|
t = im_dic['properties']['system:time_start']
|
|
# convert to datetime
|
|
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
|
|
timestamps.append(im_timestamp)
|
|
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
|
|
# get EPSG code of reference system
|
|
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
|
|
# get geometric accuracy
|
|
try:
|
|
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
|
|
except:
|
|
# default value of accuracy (RMSE = 12m)
|
|
acc_georef.append(12)
|
|
print('No geometric rmse model property')
|
|
# delete dimensions key from dictionnary, otherwise the entire image is extracted
|
|
for j in range(len(im_bands)): del im_bands[j]['dimensions']
|
|
# bands for L7
|
|
pan_band = [im_bands[8]]
|
|
ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[9]]
|
|
# filenames for the images
|
|
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
|
|
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
|
|
# if two images taken at the same date add 'dup' in the name
|
|
if any(filename_pan in _ for _ in all_names):
|
|
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
|
|
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix
|
|
all_names.append(filename_pan)
|
|
# download .TIF image
|
|
local_data_pan = download_tif(im, polygon, pan_band, filepath_pan)
|
|
local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms)
|
|
# update filename
|
|
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
|
|
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
|
|
print(i, end='..')
|
|
|
|
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
|
|
timestamps_sorted = sorted(timestamps)
|
|
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
|
|
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
|
|
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
|
|
# save into dict
|
|
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
|
|
'epsg':im_epsg_sorted}
|
|
print('Finished with ' + satname)
|
|
|
|
|
|
#=============================================================================================#
|
|
# download L8 images
|
|
#=============================================================================================#
|
|
|
|
if 'L8' in sat or 'Landsat8' in sat:
|
|
|
|
satname = 'L8'
|
|
# create subfolders (one for 30m multispectral bands and one for 15m pan bands)
|
|
filepath = os.path.join(os.getcwd(), 'data', sitename, 'L8')
|
|
filepath_pan = os.path.join(filepath, 'pan')
|
|
filepath_ms = os.path.join(filepath, 'ms')
|
|
try:
|
|
os.makedirs(filepath_pan)
|
|
os.makedirs(filepath_ms)
|
|
except:
|
|
print('')
|
|
|
|
# landsat 8 collection
|
|
input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
|
|
# filter by location and dates
|
|
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
|
|
# get all images in the filtered collection
|
|
im_all = flt_col.getInfo().get('features')
|
|
# print how many images there are for the user
|
|
n_img = flt_col.size().getInfo()
|
|
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
|
|
|
|
# loop trough images
|
|
timestamps = []
|
|
acc_georef = []
|
|
all_names = []
|
|
im_epsg = []
|
|
for i in range(n_img):
|
|
|
|
# find each image in ee database
|
|
im = ee.Image(im_all[i].get('id'))
|
|
# read metadata
|
|
im_dic = im.getInfo()
|
|
# get bands
|
|
im_bands = im_dic.get('bands')
|
|
# get time of acquisition (UNIX time)
|
|
t = im_dic['properties']['system:time_start']
|
|
# convert to datetime
|
|
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
|
|
timestamps.append(im_timestamp)
|
|
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
|
|
# get EPSG code of reference system
|
|
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
|
|
# get geometric accuracy
|
|
try:
|
|
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
|
|
except:
|
|
# default value of accuracy (RMSE = 12m)
|
|
acc_georef.append(12)
|
|
print('No geometric rmse model property')
|
|
# delete dimensions key from dictionnary, otherwise the entire image is extracted
|
|
for j in range(len(im_bands)): del im_bands[j]['dimensions']
|
|
# bands for L8
|
|
pan_band = [im_bands[7]]
|
|
ms_bands = [im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[5], im_bands[11]]
|
|
# filenames for the images
|
|
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
|
|
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
|
|
# if two images taken at the same date add 'dup' in the name
|
|
if any(filename_pan in _ for _ in all_names):
|
|
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
|
|
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix
|
|
all_names.append(filename_pan)
|
|
# download .TIF image
|
|
local_data_pan = download_tif(im, polygon, pan_band, filepath_pan)
|
|
local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms)
|
|
# update filename
|
|
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
|
|
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
|
|
print(i, end='..')
|
|
|
|
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
|
|
timestamps_sorted = sorted(timestamps)
|
|
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
|
|
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
|
|
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
|
|
|
|
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
|
|
'epsg':im_epsg_sorted}
|
|
print('Finished with ' + satname)
|
|
|
|
#=============================================================================================#
|
|
# download S2 images
|
|
#=============================================================================================#
|
|
|
|
if 'S2' in sat or 'Sentinel2' in sat:
|
|
|
|
satname = 'S2'
|
|
# create subfolders for the 10m, 20m and 60m multipectral bands
|
|
filepath = os.path.join(os.getcwd(), 'data', sitename, 'S2')
|
|
try:
|
|
os.makedirs(os.path.join(filepath, '10m'))
|
|
os.makedirs(os.path.join(filepath, '20m'))
|
|
os.makedirs(os.path.join(filepath, '60m'))
|
|
except:
|
|
print('')
|
|
|
|
# Sentinel2 collection
|
|
input_col = ee.ImageCollection('COPERNICUS/S2')
|
|
# filter by location and dates
|
|
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
|
|
# get all images in the filtered collection
|
|
im_all = flt_col.getInfo().get('features')
|
|
# print how many images there are
|
|
n_img = flt_col.size().getInfo()
|
|
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
|
|
|
|
# loop trough images
|
|
timestamps = []
|
|
acc_georef = []
|
|
all_names = []
|
|
im_epsg = []
|
|
for i in range(n_img):
|
|
|
|
# find each image in ee database
|
|
im = ee.Image(im_all[i].get('id'))
|
|
# read metadata
|
|
im_dic = im.getInfo()
|
|
# get bands
|
|
im_bands = im_dic.get('bands')
|
|
# get time of acquisition (UNIX time)
|
|
t = im_dic['properties']['system:time_start']
|
|
# convert to datetime
|
|
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
|
|
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
|
|
# delete dimensions key from dictionnary, otherwise the entire image is extracted
|
|
for j in range(len(im_bands)): del im_bands[j]['dimensions']
|
|
# bands for S2
|
|
bands10 = [im_bands[1], im_bands[2], im_bands[3], im_bands[7]]
|
|
bands20 = [im_bands[11]]
|
|
bands60 = [im_bands[15]]
|
|
# filenames for images
|
|
filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + suffix
|
|
filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + suffix
|
|
filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + suffix
|
|
# if two images taken at the same date skip the second image (they are the same)
|
|
if any(filename10 in _ for _ in all_names):
|
|
continue
|
|
all_names.append(filename10)
|
|
# download .TIF image and update filename
|
|
local_data = download_tif(im, polygon, bands10, os.path.join(filepath, '10m'))
|
|
os.rename(local_data, os.path.join(filepath, '10m', filename10))
|
|
local_data = download_tif(im, polygon, bands20, os.path.join(filepath, '20m'))
|
|
os.rename(local_data, os.path.join(filepath, '20m', filename20))
|
|
local_data = download_tif(im, polygon, bands60, os.path.join(filepath, '60m'))
|
|
os.rename(local_data, os.path.join(filepath, '60m', filename60))
|
|
|
|
# save timestamp, epsg code and georeferencing accuracy (1 if passed 0 if not passed)
|
|
timestamps.append(im_timestamp)
|
|
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
|
|
try:
|
|
if im_dic['properties']['GEOMETRIC_QUALITY_FLAG'] == 'PASSED':
|
|
acc_georef.append(1)
|
|
else:
|
|
acc_georef.append(0)
|
|
except:
|
|
acc_georef.append(0)
|
|
print(i, end='..')
|
|
|
|
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
|
|
timestamps_sorted = sorted(timestamps)
|
|
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
|
|
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
|
|
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
|
|
|
|
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
|
|
'epsg':im_epsg_sorted}
|
|
print('Finished with ' + satname)
|
|
|
|
# save metadata dict
|
|
filepath = os.path.join(os.getcwd(), 'data', sitename)
|
|
with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'wb') as f:
|
|
pickle.dump(metadata, f) |