You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
geetools_VH/SDS_download.py

432 lines
19 KiB
Python

"""This module contains all the functions needed to download the satellite images from GEE
Author: Kilian Vos, Water Research Laboratory, University of New South Wales
"""
# Initial settings
import os
import numpy as np
import matplotlib.pyplot as plt
import pdb
import ee
from urllib.request import urlretrieve
from datetime import datetime
import pytz
import pickle
import zipfile
# initialise connection with GEE server
ee.Initialize()
# Functions
def download_tif(image, polygon, bandsId, filepath):
"""
Downloads a .TIF image from the ee server and stores it in a temp file
Arguments:
-----------
image: ee.Image
Image object to be downloaded
polygon: list
polygon containing the lon/lat coordinates to be extracted
longitudes in the first column and latitudes in the second column
bandsId: list of dict
list of bands to be downloaded
filepath: location where the temporary file should be saved
"""
url = ee.data.makeDownloadUrl(ee.data.getDownloadId({
'image': image.serialize(),
'region': polygon,
'bands': bandsId,
'filePerBand': 'false',
'name': 'data',
}))
local_zip, headers = urlretrieve(url)
with zipfile.ZipFile(local_zip) as local_zipfile:
return local_zipfile.extract('data.tif', filepath)
def get_images(sitename,polygon,dates,sat):
"""
Downloads all images from Landsat 5, Landsat 7, Landsat 8 and Sentinel-2 covering the given
polygon and acquired during the given dates. The images are organised in subfolders and divided
by satellite mission and pixel resolution.
KV WRL 2018
Arguments:
-----------
sitename: str
String containig the name of the site
polygon: list
polygon containing the lon/lat coordinates to be extracted
longitudes in the first column and latitudes in the second column
dates: list of str
list that contains 2 strings with the initial and final dates in format 'yyyy-mm-dd'
e.g. ['1987-01-01', '2018-01-01']
sat: list of str
list that contains the names of the satellite missions to include
e.g. ['L5', 'L7', 'L8', 'S2']
"""
# format in which the images are downloaded
suffix = '.tif'
# initialise metadata dictionnary (stores timestamps and georefencing accuracy of each image)
metadata = dict([])
# create directories
try:
os.makedirs(os.path.join(os.getcwd(), 'data',sitename))
except:
print('')
#=============================================================================================#
# download L5 images
#=============================================================================================#
if 'L5' in sat or 'Landsat5' in sat:
satname = 'L5'
# create a subfolder to store L5 images
filepath = os.path.join(os.getcwd(), 'data', sitename, satname, '30m')
try:
os.makedirs(filepath)
except:
print('')
# Landsat 5 collection
input_col = ee.ImageCollection('LANDSAT/LT05/C01/T1_TOA')
# filter by location and dates
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
# get all images in the filtered collection
im_all = flt_col.getInfo().get('features')
# print how many images there are for the user
n_img = flt_col.size().getInfo()
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
# loop trough images
timestamps = []
acc_georef = []
all_names = []
im_epsg = []
for i in range(n_img):
# find each image in ee database
im = ee.Image(im_all[i].get('id'))
# read metadata
im_dic = im.getInfo()
# get bands
im_bands = im_dic.get('bands')
# get time of acquisition (UNIX time)
t = im_dic['properties']['system:time_start']
# convert to datetime
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
timestamps.append(im_timestamp)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
# get EPSG code of reference system
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
# get geometric accuracy
try:
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
except:
# default value of accuracy (RMSE = 12m)
acc_georef.append(12)
print('No geometric rmse model property')
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for L5
ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[7]]
# filenames for the images
filename = im_date + '_' + satname + '_' + sitename + suffix
# if two images taken at the same date add 'dup' in the name
if any(filename in _ for _ in all_names):
filename = im_date + '_' + satname + '_' + sitename + '_dup' + suffix
all_names.append(filename)
# download .TIF image
local_data = download_tif(im, polygon, ms_bands, filepath)
# update filename
os.rename(local_data, os.path.join(filepath, filename))
print(i, end='..')
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
# save into dict
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
'epsg':im_epsg_sorted}
print('Finished with ' + satname)
#=============================================================================================#
# download L7 images
#=============================================================================================#
if 'L7' in sat or 'Landsat7' in sat:
satname = 'L7'
# create subfolders (one for 30m multispectral bands and one for 15m pan bands)
filepath = os.path.join(os.getcwd(), 'data', sitename, 'L7')
filepath_pan = os.path.join(filepath, 'pan')
filepath_ms = os.path.join(filepath, 'ms')
try:
os.makedirs(filepath_pan)
os.makedirs(filepath_ms)
except:
print('')
# landsat 7 collection
input_col = ee.ImageCollection('LANDSAT/LE07/C01/T1_RT_TOA')
# filter by location and dates
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
# get all images in the filtered collection
im_all = flt_col.getInfo().get('features')
# print how many images there are for the user
n_img = flt_col.size().getInfo()
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
# loop trough images
timestamps = []
acc_georef = []
all_names = []
im_epsg = []
for i in range(n_img):
# find each image in ee database
im = ee.Image(im_all[i].get('id'))
# read metadata
im_dic = im.getInfo()
# get bands
im_bands = im_dic.get('bands')
# get time of acquisition (UNIX time)
t = im_dic['properties']['system:time_start']
# convert to datetime
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
timestamps.append(im_timestamp)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
# get EPSG code of reference system
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
# get geometric accuracy
try:
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
except:
# default value of accuracy (RMSE = 12m)
acc_georef.append(12)
print('No geometric rmse model property')
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for L7
pan_band = [im_bands[8]]
ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[9]]
# filenames for the images
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
# if two images taken at the same date add 'dup' in the name
if any(filename_pan in _ for _ in all_names):
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix
all_names.append(filename_pan)
# download .TIF image
local_data_pan = download_tif(im, polygon, pan_band, filepath_pan)
local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms)
# update filename
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
print(i, end='..')
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
# save into dict
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
'epsg':im_epsg_sorted}
print('Finished with ' + satname)
#=============================================================================================#
# download L8 images
#=============================================================================================#
if 'L8' in sat or 'Landsat8' in sat:
satname = 'L8'
# create subfolders (one for 30m multispectral bands and one for 15m pan bands)
filepath = os.path.join(os.getcwd(), 'data', sitename, 'L8')
filepath_pan = os.path.join(filepath, 'pan')
filepath_ms = os.path.join(filepath, 'ms')
try:
os.makedirs(filepath_pan)
os.makedirs(filepath_ms)
except:
print('')
# landsat 8 collection
input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
# filter by location and dates
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
# get all images in the filtered collection
im_all = flt_col.getInfo().get('features')
# print how many images there are for the user
n_img = flt_col.size().getInfo()
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
# loop trough images
timestamps = []
acc_georef = []
all_names = []
im_epsg = []
for i in range(n_img):
# find each image in ee database
im = ee.Image(im_all[i].get('id'))
# read metadata
im_dic = im.getInfo()
# get bands
im_bands = im_dic.get('bands')
# get time of acquisition (UNIX time)
t = im_dic['properties']['system:time_start']
# convert to datetime
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
timestamps.append(im_timestamp)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
# get EPSG code of reference system
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
# get geometric accuracy
try:
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
except:
# default value of accuracy (RMSE = 12m)
acc_georef.append(12)
print('No geometric rmse model property')
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for L8
pan_band = [im_bands[7]]
ms_bands = [im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[5], im_bands[11]]
# filenames for the images
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
# if two images taken at the same date add 'dup' in the name
if any(filename_pan in _ for _ in all_names):
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix
all_names.append(filename_pan)
# download .TIF image
local_data_pan = download_tif(im, polygon, pan_band, filepath_pan)
local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms)
# update filename
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
print(i, end='..')
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
'epsg':im_epsg_sorted}
print('Finished with ' + satname)
#=============================================================================================#
# download S2 images
#=============================================================================================#
if 'S2' in sat or 'Sentinel2' in sat:
satname = 'S2'
# create subfolders for the 10m, 20m and 60m multipectral bands
filepath = os.path.join(os.getcwd(), 'data', sitename, 'S2')
try:
os.makedirs(os.path.join(filepath, '10m'))
os.makedirs(os.path.join(filepath, '20m'))
os.makedirs(os.path.join(filepath, '60m'))
except:
print('')
# Sentinel2 collection
input_col = ee.ImageCollection('COPERNICUS/S2')
# filter by location and dates
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(dates[0],dates[1])
# get all images in the filtered collection
im_all = flt_col.getInfo().get('features')
# print how many images there are
n_img = flt_col.size().getInfo()
print('Number of ' + satname + ' images covering ' + sitename + ':', n_img)
# loop trough images
timestamps = []
acc_georef = []
all_names = []
im_epsg = []
for i in range(n_img):
# find each image in ee database
im = ee.Image(im_all[i].get('id'))
# read metadata
im_dic = im.getInfo()
# get bands
im_bands = im_dic.get('bands')
# get time of acquisition (UNIX time)
t = im_dic['properties']['system:time_start']
# convert to datetime
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for S2
bands10 = [im_bands[1], im_bands[2], im_bands[3], im_bands[7]]
bands20 = [im_bands[11]]
bands60 = [im_bands[15]]
# filenames for images
filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + suffix
filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + suffix
filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + suffix
# if two images taken at the same date skip the second image (they are the same)
if any(filename10 in _ for _ in all_names):
continue
all_names.append(filename10)
# download .TIF image and update filename
local_data = download_tif(im, polygon, bands10, os.path.join(filepath, '10m'))
os.rename(local_data, os.path.join(filepath, '10m', filename10))
local_data = download_tif(im, polygon, bands20, os.path.join(filepath, '20m'))
os.rename(local_data, os.path.join(filepath, '20m', filename20))
local_data = download_tif(im, polygon, bands60, os.path.join(filepath, '60m'))
os.rename(local_data, os.path.join(filepath, '60m', filename60))
# save timestamp, epsg code and georeferencing accuracy (1 if passed 0 if not passed)
timestamps.append(im_timestamp)
im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
try:
if im_dic['properties']['GEOMETRIC_QUALITY_FLAG'] == 'PASSED':
acc_georef.append(1)
else:
acc_georef.append(0)
except:
acc_georef.append(0)
print(i, end='..')
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
im_epsg_sorted = [im_epsg[j] for j in idx_sorted]
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted,
'epsg':im_epsg_sorted}
print('Finished with ' + satname)
# save metadata dict
filepath = os.path.join(os.getcwd(), 'data', sitename)
with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'wb') as f:
pickle.dump(metadata, f)