You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
432 lines
19 KiB
Python
432 lines
19 KiB
Python
6 years ago
|
"""This module contains all the functions needed to download the satellite images from GEE
|
||
|
|
||
|
Author: Kilian Vos, Water Research Laboratory, University of New South Wales
|
||
|
"""
|
||
|
|
||
|
# Initial settings
|
||
|
import os
|
||
|
import numpy as np
|
||
|
import matplotlib.pyplot as plt
|
||
|
import pdb
|
||
|
import ee
|
||
|
from urllib.request import urlretrieve
|
||
|
from datetime import datetime
|
||
|
import pytz
|
||
|
import pickle
|
||
|
import zipfile
|
||
|
|
||
|
# initialise connection with GEE server
|
||
|
ee.Initialize()
|
||
|
|
||
|
# Functions
|
||
|
|
||
|
def download_tif(image, polygon, bandsId, filepath):
|
||
|
"""
|
||
|
Downloads a .TIF image from the ee server and stores it in a temp file
|
||
|
|
||
|
Arguments:
|
||
|
-----------
|
||
|
image: ee.Image
|
||
|
Image object to be downloaded
|
||
|
polygon: list
|
||
|
polygon containing the lon/lat coordinates to be extracted
|
||
|
longitudes in the first column and latitudes in the second column
|
||
|
bandsId: list of dict
|
||
|
list of bands to be downloaded
|
||
|
filepath: location where the temporary file should be saved
|
||
|
|
||
|
"""
|
||
|
|
||
|
url = ee.data.makeDownloadUrl(ee.data.getDownloadId({
|
||
|
'image': image.serialize(),
|
||
|
'region': polygon,
|
||
|
'bands': bandsId,
|
||
|
'filePerBand': 'false',
|
||
|
'name': 'data',
|
||
|
}))
|
||
|
local_zip, headers = urlretrieve(url)
|
||
|
with zipfile.ZipFile(local_zip) as local_zipfile:
|
||
|
return local_zipfile.extract('data.tif', filepath)
|
||
|
|
||
|
|
||
|
def get_images(sitename,polygon,dates,sat):
|
||
|
"""
|
||
|
Downloads all images from Landsat 5, Landsat 7, Landsat 8 and Sentinel-2 covering the given
|
||
|
polygon and acquired during the given dates. The images are organised in subfolders and divided
|
||
|
by satellite mission and pixel resolution.
|
||
|
|
||
|
KV WRL 2018
|
||
|
|
||
|
Arguments:
|
||
|
-----------
|
||
|
sitename: str
|
||
|
String containig the name of the site
|
||
|
polygon: list
|
||
|
polygon containing the lon/lat coordinates to be extracted
|
||
|
longitudes in the first column and latitudes in the second column
|
||
|
dates: list of str
|
||
|
list that contains 2 strings with the initial and final dates in format 'yyyy-mm-dd'
|
||
|
e.g. ['1987-01-01', '2018-01-01']
|
||
|
sat: list of str
|
||
|
list that contains the names of the satellite missions to include
|
||
|
e.g. ['L5', 'L7', 'L8', 'S2']
|
||
|
|
||
|
"""
|
||
|
|
||
|
# format in which the images are downloaded
|
||
|
suffix = '.tif'
|
||
|
|
||
|
# initialise metadata dictionnary (stores timestamps and georefencing accuracy of each image)
|
||
|
metadata = dict([])
|
||
|
|
||
|
# create directories
|
||
|
try:
|
||
|
os.makedirs(os.path.join(os.getcwd(), 'data',sitename))
|
||
|
except:
|
||
|
print('')
|
||
|
|
||
|
#=============================================================================================#
|
||
|
# download L5 images
|
||
|
#=============================================================================================#
|
||
|
|
||
|
if 'L5' in sat or 'Landsat5' in sat:
|
||
|
|
||
|
satname = 'L5'
|
||
|
# create a subfolder to store L5 images
|
||
|
filepath = os.path.join(os.getcwd(), 'data', sitename, satname, '30m')
|
||
|
try:
|
||
|
os.makedirs(filepath)
|
||
|
except:
|
||
|
print('')
|
||
|
|
||
|
# Landsat 5 collection
|
||
|
input_col = ee.ImageCollection('LANDSAT/LT05/C01/T1_TOA')
|
||
|
# filter by location and dates
|
||
|
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
|
||
|
# get all images in the filtered collection
|
||
|
im_all = flt_col.getInfo().get('features')
|
||
|
# print how many images there are for the user
|
||
|
n_img = flt_col.size().getInfo()
|
||
|
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
|
||
|
|
||
|
# loop trough images
|
||
|
timestamps = []
|
||
|
acc_georef = []
|
||
|
all_names = []
|
||
|
im_epsg = []
|
||
|
for i in range(n_img):
|
||
|
|
||
|
# find each image in ee database
|
||
|
im = ee.Image(im_all[i].get('id'))
|
||
|
# read metadata
|
||
|
im_dic = im.getInfo()
|
||
|
# get bands
|
||
|
im_bands = im_dic.get('bands')
|
||
|
# get time of acquisition (UNIX time)
|
||
|
t = im_dic['properties']['system:time_start']
|
||
|
# convert to datetime
|
||
|
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
|
||
|
timestamps.append(im_timestamp)
|
||
|
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
|
||
|
# get EPSG code of reference system
|
||
|
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
|
||
|
# get geometric accuracy
|
||
|
try:
|
||
|
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
|
||
|
except:
|
||
|
# default value of accuracy (RMSE = 12m)
|
||
|
acc_georef.append(12)
|
||
|
print('No geometric rmse model property')
|
||
|
# delete dimensions key from dictionnary, otherwise the entire image is extracted
|
||
|
for j in range(len(im_bands)): del im_bands[j]['dimensions']
|
||
|
# bands for L5
|
||
|
ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[7]]
|
||
|
# filenames for the images
|
||
|
filename = im_date + '_' + satname + '_' + sitename + suffix
|
||
|
# if two images taken at the same date add 'dup' in the name
|
||
|
if any(filename in _ for _ in all_names):
|
||
|
filename = im_date + '_' + satname + '_' + sitename + '_dup' + suffix
|
||
|
all_names.append(filename)
|
||
|
# download .TIF image
|
||
|
local_data = download_tif(im, polygon, ms_bands, filepath)
|
||
|
# update filename
|
||
|
os.rename(local_data, os.path.join(filepath, filename))
|
||
|
print(i, end='..')
|
||
|
|
||
|
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
|
||
|
timestamps_sorted = sorted(timestamps)
|
||
|
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
|
||
|
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
|
||
|
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
|
||
|
# save into dict
|
||
|
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
|
||
|
'epsg':im_epsg_sorted}
|
||
|
print('Finished with ' + satname)
|
||
|
|
||
|
|
||
|
|
||
|
#=============================================================================================#
|
||
|
# download L7 images
|
||
|
#=============================================================================================#
|
||
|
|
||
|
if 'L7' in sat or 'Landsat7' in sat:
|
||
|
|
||
|
satname = 'L7'
|
||
|
# create subfolders (one for 30m multispectral bands and one for 15m pan bands)
|
||
|
filepath = os.path.join(os.getcwd(), 'data', sitename, 'L7')
|
||
|
filepath_pan = os.path.join(filepath, 'pan')
|
||
|
filepath_ms = os.path.join(filepath, 'ms')
|
||
|
try:
|
||
|
os.makedirs(filepath_pan)
|
||
|
os.makedirs(filepath_ms)
|
||
|
except:
|
||
|
print('')
|
||
|
|
||
|
# landsat 7 collection
|
||
|
input_col = ee.ImageCollection('LANDSAT/LE07/C01/T1_RT_TOA')
|
||
|
# filter by location and dates
|
||
|
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
|
||
|
# get all images in the filtered collection
|
||
|
im_all = flt_col.getInfo().get('features')
|
||
|
# print how many images there are for the user
|
||
|
n_img = flt_col.size().getInfo()
|
||
|
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
|
||
|
|
||
|
# loop trough images
|
||
|
timestamps = []
|
||
|
acc_georef = []
|
||
|
all_names = []
|
||
|
im_epsg = []
|
||
|
for i in range(n_img):
|
||
|
|
||
|
# find each image in ee database
|
||
|
im = ee.Image(im_all[i].get('id'))
|
||
|
# read metadata
|
||
|
im_dic = im.getInfo()
|
||
|
# get bands
|
||
|
im_bands = im_dic.get('bands')
|
||
|
# get time of acquisition (UNIX time)
|
||
|
t = im_dic['properties']['system:time_start']
|
||
|
# convert to datetime
|
||
|
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
|
||
|
timestamps.append(im_timestamp)
|
||
|
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
|
||
|
# get EPSG code of reference system
|
||
|
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
|
||
|
# get geometric accuracy
|
||
|
try:
|
||
|
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
|
||
|
except:
|
||
|
# default value of accuracy (RMSE = 12m)
|
||
|
acc_georef.append(12)
|
||
|
print('No geometric rmse model property')
|
||
|
# delete dimensions key from dictionnary, otherwise the entire image is extracted
|
||
|
for j in range(len(im_bands)): del im_bands[j]['dimensions']
|
||
|
# bands for L7
|
||
|
pan_band = [im_bands[8]]
|
||
|
ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[9]]
|
||
|
# filenames for the images
|
||
|
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
|
||
|
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
|
||
|
# if two images taken at the same date add 'dup' in the name
|
||
|
if any(filename_pan in _ for _ in all_names):
|
||
|
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
|
||
|
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix
|
||
|
all_names.append(filename_pan)
|
||
|
# download .TIF image
|
||
|
local_data_pan = download_tif(im, polygon, pan_band, filepath_pan)
|
||
|
local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms)
|
||
|
# update filename
|
||
|
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
|
||
|
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
|
||
|
print(i, end='..')
|
||
|
|
||
|
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
|
||
|
timestamps_sorted = sorted(timestamps)
|
||
|
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
|
||
|
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
|
||
|
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
|
||
|
# save into dict
|
||
|
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
|
||
|
'epsg':im_epsg_sorted}
|
||
|
print('Finished with ' + satname)
|
||
|
|
||
|
|
||
|
#=============================================================================================#
|
||
|
# download L8 images
|
||
|
#=============================================================================================#
|
||
|
|
||
|
if 'L8' in sat or 'Landsat8' in sat:
|
||
|
|
||
|
satname = 'L8'
|
||
|
# create subfolders (one for 30m multispectral bands and one for 15m pan bands)
|
||
|
filepath = os.path.join(os.getcwd(), 'data', sitename, 'L8')
|
||
|
filepath_pan = os.path.join(filepath, 'pan')
|
||
|
filepath_ms = os.path.join(filepath, 'ms')
|
||
|
try:
|
||
|
os.makedirs(filepath_pan)
|
||
|
os.makedirs(filepath_ms)
|
||
|
except:
|
||
|
print('')
|
||
|
|
||
|
# landsat 8 collection
|
||
|
input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
|
||
|
# filter by location and dates
|
||
|
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
|
||
|
# get all images in the filtered collection
|
||
|
im_all = flt_col.getInfo().get('features')
|
||
|
# print how many images there are for the user
|
||
|
n_img = flt_col.size().getInfo()
|
||
|
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
|
||
|
|
||
|
# loop trough images
|
||
|
timestamps = []
|
||
|
acc_georef = []
|
||
|
all_names = []
|
||
|
im_epsg = []
|
||
|
for i in range(n_img):
|
||
|
|
||
|
# find each image in ee database
|
||
|
im = ee.Image(im_all[i].get('id'))
|
||
|
# read metadata
|
||
|
im_dic = im.getInfo()
|
||
|
# get bands
|
||
|
im_bands = im_dic.get('bands')
|
||
|
# get time of acquisition (UNIX time)
|
||
|
t = im_dic['properties']['system:time_start']
|
||
|
# convert to datetime
|
||
|
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
|
||
|
timestamps.append(im_timestamp)
|
||
|
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
|
||
|
# get EPSG code of reference system
|
||
|
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
|
||
|
# get geometric accuracy
|
||
|
try:
|
||
|
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
|
||
|
except:
|
||
|
# default value of accuracy (RMSE = 12m)
|
||
|
acc_georef.append(12)
|
||
|
print('No geometric rmse model property')
|
||
|
# delete dimensions key from dictionnary, otherwise the entire image is extracted
|
||
|
for j in range(len(im_bands)): del im_bands[j]['dimensions']
|
||
|
# bands for L8
|
||
|
pan_band = [im_bands[7]]
|
||
|
ms_bands = [im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[5], im_bands[11]]
|
||
|
# filenames for the images
|
||
|
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
|
||
|
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
|
||
|
# if two images taken at the same date add 'dup' in the name
|
||
|
if any(filename_pan in _ for _ in all_names):
|
||
|
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
|
||
|
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix
|
||
|
all_names.append(filename_pan)
|
||
|
# download .TIF image
|
||
|
local_data_pan = download_tif(im, polygon, pan_band, filepath_pan)
|
||
|
local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms)
|
||
|
# update filename
|
||
|
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
|
||
|
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
|
||
|
print(i, end='..')
|
||
|
|
||
|
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
|
||
|
timestamps_sorted = sorted(timestamps)
|
||
|
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
|
||
|
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
|
||
|
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
|
||
|
|
||
|
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
|
||
|
'epsg':im_epsg_sorted}
|
||
|
print('Finished with ' + satname)
|
||
|
|
||
|
#=============================================================================================#
|
||
|
# download S2 images
|
||
|
#=============================================================================================#
|
||
|
|
||
|
if 'S2' in sat or 'Sentinel2' in sat:
|
||
|
|
||
|
satname = 'S2'
|
||
|
# create subfolders for the 10m, 20m and 60m multipectral bands
|
||
|
filepath = os.path.join(os.getcwd(), 'data', sitename, 'S2')
|
||
|
try:
|
||
|
os.makedirs(os.path.join(filepath, '10m'))
|
||
|
os.makedirs(os.path.join(filepath, '20m'))
|
||
|
os.makedirs(os.path.join(filepath, '60m'))
|
||
|
except:
|
||
|
print('')
|
||
|
|
||
|
# Sentinel2 collection
|
||
|
input_col = ee.ImageCollection('COPERNICUS/S2')
|
||
|
# filter by location and dates
|
||
|
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
|
||
|
# get all images in the filtered collection
|
||
|
im_all = flt_col.getInfo().get('features')
|
||
|
# print how many images there are
|
||
|
n_img = flt_col.size().getInfo()
|
||
|
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
|
||
|
|
||
|
# loop trough images
|
||
|
timestamps = []
|
||
|
acc_georef = []
|
||
|
all_names = []
|
||
|
im_epsg = []
|
||
|
for i in range(n_img):
|
||
|
|
||
|
# find each image in ee database
|
||
|
im = ee.Image(im_all[i].get('id'))
|
||
|
# read metadata
|
||
|
im_dic = im.getInfo()
|
||
|
# get bands
|
||
|
im_bands = im_dic.get('bands')
|
||
|
# get time of acquisition (UNIX time)
|
||
|
t = im_dic['properties']['system:time_start']
|
||
|
# convert to datetime
|
||
|
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
|
||
|
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
|
||
|
# delete dimensions key from dictionnary, otherwise the entire image is extracted
|
||
|
for j in range(len(im_bands)): del im_bands[j]['dimensions']
|
||
|
# bands for S2
|
||
|
bands10 = [im_bands[1], im_bands[2], im_bands[3], im_bands[7]]
|
||
|
bands20 = [im_bands[11]]
|
||
|
bands60 = [im_bands[15]]
|
||
|
# filenames for images
|
||
|
filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + suffix
|
||
|
filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + suffix
|
||
|
filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + suffix
|
||
|
# if two images taken at the same date skip the second image (they are the same)
|
||
|
if any(filename10 in _ for _ in all_names):
|
||
|
continue
|
||
|
all_names.append(filename10)
|
||
|
# download .TIF image and update filename
|
||
|
local_data = download_tif(im, polygon, bands10, os.path.join(filepath, '10m'))
|
||
|
os.rename(local_data, os.path.join(filepath, '10m', filename10))
|
||
|
local_data = download_tif(im, polygon, bands20, os.path.join(filepath, '20m'))
|
||
|
os.rename(local_data, os.path.join(filepath, '20m', filename20))
|
||
|
local_data = download_tif(im, polygon, bands60, os.path.join(filepath, '60m'))
|
||
|
os.rename(local_data, os.path.join(filepath, '60m', filename60))
|
||
|
|
||
|
# save timestamp, epsg code and georeferencing accuracy (1 if passed 0 if not passed)
|
||
|
timestamps.append(im_timestamp)
|
||
|
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
|
||
|
try:
|
||
|
if im_dic['properties']['GEOMETRIC_QUALITY_FLAG'] == 'PASSED':
|
||
|
acc_georef.append(1)
|
||
|
else:
|
||
|
acc_georef.append(0)
|
||
|
except:
|
||
|
acc_georef.append(0)
|
||
|
print(i, end='..')
|
||
|
|
||
|
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
|
||
|
timestamps_sorted = sorted(timestamps)
|
||
|
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
|
||
|
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
|
||
|
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
|
||
|
|
||
|
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
|
||
|
'epsg':im_epsg_sorted}
|
||
|
print('Finished with ' + satname)
|
||
|
|
||
|
# save metadata dict
|
||
|
filepath = os.path.join(os.getcwd(), 'data', sitename)
|
||
|
with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'wb') as f:
|
||
|
pickle.dump(metadata, f)
|