You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
geetools_VH/download_images.py

390 lines
13 KiB
Python

#==========================================================#
#==========================================================#
# Download L5, L7, L8, S2 images of a given area
#==========================================================#
#==========================================================#
#==========================================================#
# Initial settings
#==========================================================#
import os
import numpy as np
import matplotlib.pyplot as plt
import pdb
import ee
# other modules
from osgeo import gdal, ogr, osr
from urllib.request import urlretrieve
import zipfile
from datetime import datetime
import pytz
import pickle
# import own modules
import functions.utils as utils
import functions.sds as sds
np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
ee.Initialize()
#==========================================================#
# Location
#==========================================================#
## location (Narrabeen-Collaroy beach)
#polygon = [[[151.301454, -33.700754],
# [151.311453, -33.702075],
# [151.307237, -33.739761],
# [151.294220, -33.736329],
# [151.301454, -33.700754]]];
# location (Tairua beach)
sitename = 'TAIRUA'
polygon = [[[175.835574, -36.982022],
[175.888220, -36.980680],
[175.893527, -37.029610],
[175.833444, -37.031767],
[175.835574, -36.982022]]];
# initialise metadata dictionnary (stores timestamps and georefencing accuracy of each image)
metadata = dict([])
# create directories
try:
os.makedirs(os.path.join(os.getcwd(), 'data',sitename))
except:
print('directory already exists')
#%%
#==========================================================#
#==========================================================#
# L5
#==========================================================#
#==========================================================#
# define filenames for images
suffix = '.tif'
filepath = os.path.join(os.getcwd(), 'data', sitename, 'L5', '30m')
try:
os.makedirs(filepath)
except:
print('directory already exists')
#==========================================================#
# Select L5 collection
#==========================================================#
satname = 'L5'
input_col = ee.ImageCollection('LANDSAT/LT05/C01/T1_TOA')
# filter by location
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
n_img = flt_col.size().getInfo()
print('Number of images covering ' + sitename, n_img)
im_all = flt_col.getInfo().get('features')
#==========================================================#
# Main loop trough images
#==========================================================#
timestamps = []
acc_georef = []
all_names = []
for i in range(n_img):
# find each image in ee database
im = ee.Image(im_all[i].get('id'))
im_dic = im.getInfo()
im_bands = im_dic.get('bands')
t = im_dic['properties']['system:time_start']
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
timestamps.append(im_timestamp)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
im_epsg = int(im_dic['bands'][0]['crs'][5:])
try:
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
except:
acc_georef.append(12)
print('No geometric rmse model property')
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for L5
ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[7]]
# filenames
filename = im_date + '_' + satname + '_' + sitename + suffix
print(i)
if any(filename in _ for _ in all_names):
filename = im_date + '_' + satname + '_' + sitename + '_dup' + suffix
all_names.append(filename)
local_data = sds.download_tif(im, polygon, ms_bands, filepath)
os.rename(local_data, os.path.join(filepath, filename))
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg}
#%%
#==========================================================#
#==========================================================#
# L7&L8
#==========================================================#
#==========================================================#
# define filenames for images
suffix = '.tif'
filepath = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8')
filepath_pan = os.path.join(filepath, 'pan')
filepath_ms = os.path.join(filepath, 'ms')
try:
os.makedirs(filepath_pan)
os.makedirs(filepath_ms)
except:
print('directory already exists')
#==========================================================#
# Select L7 collection
#==========================================================#
satname = 'L7'
input_col = ee.ImageCollection('LANDSAT/LE07/C01/T1_RT_TOA')
# filter by location
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
n_img = flt_col.size().getInfo()
print('Number of images covering ' + sitename, n_img)
im_all = flt_col.getInfo().get('features')
#==========================================================#
# Main loop trough images
#==========================================================#
timestamps = []
acc_georef = []
all_names = []
for i in range(n_img):
# find each image in ee database
im = ee.Image(im_all[i].get('id'))
im_dic = im.getInfo()
im_bands = im_dic.get('bands')
t = im_dic['properties']['system:time_start']
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
timestamps.append(im_timestamp)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
im_epsg = int(im_dic['bands'][0]['crs'][5:])
try:
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
except:
acc_georef.append(12)
print('No geometric rmse model property')
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for L7
pan_band = [im_bands[8]]
ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[9]]
# filenames
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
print(i)
if any(filename_pan in _ for _ in all_names):
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix
all_names.append(filename_pan)
local_data_pan = sds.download_tif(im, polygon, pan_band, filepath_pan)
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
local_data_ms = sds.download_tif(im, polygon, ms_bands, filepath_ms)
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
#==========================================================#
# Select L8 collection
#==========================================================#
satname = 'L8'
input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
# filter by location
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
n_img = flt_col.size().getInfo()
print('Number of images covering Narrabeen:', n_img)
im_all = flt_col.getInfo().get('features')
#==========================================================#
# Main loop trough images
#==========================================================#
for i in range(n_img):
# find each image in ee database
im = ee.Image(im_all[i].get('id'))
im_dic = im.getInfo()
im_bands = im_dic.get('bands')
t = im_dic['properties']['system:time_start']
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
timestamps.append(im_timestamp)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
im_epsg = int(im_dic['bands'][0]['crs'][5:])
try:
acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
except:
acc_georef.append(12)
print('No geometric rmse model property')
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for L8
pan_band = [im_bands[7]]
ms_bands = [im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[5], im_bands[11]]
# filenames
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
print(i)
if any(filename_pan in _ for _ in all_names):
filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix
all_names.append(filename_pan)
local_data_pan = sds.download_tif(im, polygon, pan_band, filepath_pan)
os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
local_data_ms = sds.download_tif(im, polygon, ms_bands, filepath_ms)
os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg}
#%%
#==========================================================#
#==========================================================#
# S2
#==========================================================#
#==========================================================#
# define filenames for images
suffix = '.tif'
filepath = os.path.join(os.getcwd(), 'data', sitename, 'S2')
try:
os.makedirs(os.path.join(filepath, '10m'))
os.makedirs(os.path.join(filepath, '20m'))
os.makedirs(os.path.join(filepath, '60m'))
except:
print('directory already exists')
#==========================================================#
# Select L2 collection
#==========================================================#
satname = 'S2'
input_col = ee.ImageCollection('COPERNICUS/S2')
# filter by location
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
n_img = flt_col.size().getInfo()
print('Number of images covering ' + sitename, n_img)
im_all = flt_col.getInfo().get('features')
#==========================================================#
# Main loop trough images
#==========================================================#
timestamps = []
acc_georef = []
all_names = []
for i in range(n_img):
# find each image in ee database
im = ee.Image(im_all[i].get('id'))
im_dic = im.getInfo()
im_bands = im_dic.get('bands')
t = im_dic['properties']['system:time_start']
im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
timestamps.append(im_timestamp)
im_epsg = int(im_dic['bands'][0]['crs'][5:])
try:
if im_dic['properties']['GEOMETRIC_QUALITY_FLAG'] == 'PASSED':
acc_georef.append(1)
else:
acc_georef.append(0)
except:
acc_georef.append(0)
# delete dimensions key from dictionnary, otherwise the entire image is extracted
for j in range(len(im_bands)): del im_bands[j]['dimensions']
# bands for S2
bands10 = [im_bands[1], im_bands[2], im_bands[3], im_bands[7]]
bands20 = [im_bands[11]]
bands60 = [im_bands[15]]
# filenames
filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + suffix
filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + suffix
filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + suffix
print(i)
if any(filename10 in _ for _ in all_names):
filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + '_dup' + suffix
filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + '_dup' + suffix
filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + '_dup' + suffix
all_names.append(filename10)
local_data = sds.download_tif(im, polygon, bands10, filepath)
os.rename(local_data, os.path.join(filepath, '10m', filename10))
local_data = sds.download_tif(im, polygon, bands20, filepath)
os.rename(local_data, os.path.join(filepath, '20m', filename20))
local_data = sds.download_tif(im, polygon, bands60, filepath)
os.rename(local_data, os.path.join(filepath, '60m', filename60))
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg}
#%% save metadata
filepath = os.path.join(os.getcwd(), 'data', sitename)
with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'wb') as f:
pickle.dump(metadata, f)