#==========================================================# #==========================================================# # Download L5, L7, L8, S2 images of a given area #==========================================================# #==========================================================# #==========================================================# # Initial settings #==========================================================# import os import numpy as np import matplotlib.pyplot as plt import pdb import ee # other modules from osgeo import gdal, ogr, osr from urllib.request import urlretrieve import zipfile from datetime import datetime import pytz import pickle # import own modules import functions.utils as utils import functions.sds as sds np.seterr(all='ignore') # raise/ignore divisions by 0 and nans ee.Initialize() #==========================================================# # Location #==========================================================# ## location (Narrabeen-Collaroy beach) #polygon = [[[151.301454, -33.700754], # [151.311453, -33.702075], # [151.307237, -33.739761], # [151.294220, -33.736329], # [151.301454, -33.700754]]]; # location (Tairua beach) sitename = 'TAIRUA' polygon = [[[175.835574, -36.982022], [175.888220, -36.980680], [175.893527, -37.029610], [175.833444, -37.031767], [175.835574, -36.982022]]]; # initialise metadata dictionnary (stores timestamps and georefencing accuracy of each image) metadata = dict([]) # create directories try: os.makedirs(os.path.join(os.getcwd(), 'data',sitename)) except: print('directory already exists') #%% #==========================================================# #==========================================================# # L5 #==========================================================# #==========================================================# # define filenames for images suffix = '.tif' filepath = os.path.join(os.getcwd(), 'data', sitename, 'L5', '30m') try: os.makedirs(filepath) except: print('directory already exists') #==========================================================# # Select L5 collection #==========================================================# satname = 'L5' input_col = ee.ImageCollection('LANDSAT/LT05/C01/T1_TOA') # filter by location flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)) n_img = flt_col.size().getInfo() print('Number of images covering ' + sitename, n_img) im_all = flt_col.getInfo().get('features') #==========================================================# # Main loop trough images #==========================================================# timestamps = [] acc_georef = [] all_names = [] for i in range(n_img): # find each image in ee database im = ee.Image(im_all[i].get('id')) im_dic = im.getInfo() im_bands = im_dic.get('bands') t = im_dic['properties']['system:time_start'] im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc) timestamps.append(im_timestamp) im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S') im_epsg = int(im_dic['bands'][0]['crs'][5:]) try: acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL']) except: acc_georef.append(12) print('No geometric rmse model property') # delete dimensions key from dictionnary, otherwise the entire image is extracted for j in range(len(im_bands)): del im_bands[j]['dimensions'] # bands for L5 ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[7]] # filenames filename = im_date + '_' + satname + '_' + sitename + suffix print(i) if any(filename in _ for _ in all_names): filename = im_date + '_' + satname + '_' + sitename + '_dup' + suffix all_names.append(filename) local_data = sds.download_tif(im, polygon, ms_bands, filepath) os.rename(local_data, os.path.join(filepath, filename)) # sort timestamps and georef accuracy (dowloaded images are sorted by date in directory) timestamps_sorted = sorted(timestamps) idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__) acc_georef_sorted = [acc_georef[j] for j in idx_sorted] metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg} #%% #==========================================================# #==========================================================# # L7&L8 #==========================================================# #==========================================================# # define filenames for images suffix = '.tif' filepath = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8') filepath_pan = os.path.join(filepath, 'pan') filepath_ms = os.path.join(filepath, 'ms') try: os.makedirs(filepath_pan) os.makedirs(filepath_ms) except: print('directory already exists') #==========================================================# # Select L7 collection #==========================================================# satname = 'L7' input_col = ee.ImageCollection('LANDSAT/LE07/C01/T1_RT_TOA') # filter by location flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)) n_img = flt_col.size().getInfo() print('Number of images covering ' + sitename, n_img) im_all = flt_col.getInfo().get('features') #==========================================================# # Main loop trough images #==========================================================# timestamps = [] acc_georef = [] all_names = [] for i in range(n_img): # find each image in ee database im = ee.Image(im_all[i].get('id')) im_dic = im.getInfo() im_bands = im_dic.get('bands') t = im_dic['properties']['system:time_start'] im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc) timestamps.append(im_timestamp) im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S') im_epsg = int(im_dic['bands'][0]['crs'][5:]) try: acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL']) except: acc_georef.append(12) print('No geometric rmse model property') # delete dimensions key from dictionnary, otherwise the entire image is extracted for j in range(len(im_bands)): del im_bands[j]['dimensions'] # bands for L7 pan_band = [im_bands[8]] ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[9]] # filenames filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix print(i) if any(filename_pan in _ for _ in all_names): filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix all_names.append(filename_pan) local_data_pan = sds.download_tif(im, polygon, pan_band, filepath_pan) os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan)) local_data_ms = sds.download_tif(im, polygon, ms_bands, filepath_ms) os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms)) #==========================================================# # Select L8 collection #==========================================================# satname = 'L8' input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA') # filter by location flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)) n_img = flt_col.size().getInfo() print('Number of images covering Narrabeen:', n_img) im_all = flt_col.getInfo().get('features') #==========================================================# # Main loop trough images #==========================================================# for i in range(n_img): # find each image in ee database im = ee.Image(im_all[i].get('id')) im_dic = im.getInfo() im_bands = im_dic.get('bands') t = im_dic['properties']['system:time_start'] im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc) timestamps.append(im_timestamp) im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S') im_epsg = int(im_dic['bands'][0]['crs'][5:]) try: acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL']) except: acc_georef.append(12) print('No geometric rmse model property') # delete dimensions key from dictionnary, otherwise the entire image is extracted for j in range(len(im_bands)): del im_bands[j]['dimensions'] # bands for L8 pan_band = [im_bands[7]] ms_bands = [im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[5], im_bands[11]] # filenames filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix print(i) if any(filename_pan in _ for _ in all_names): filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix all_names.append(filename_pan) local_data_pan = sds.download_tif(im, polygon, pan_band, filepath_pan) os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan)) local_data_ms = sds.download_tif(im, polygon, ms_bands, filepath_ms) os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms)) # sort timestamps and georef accuracy (dowloaded images are sorted by date in directory) timestamps_sorted = sorted(timestamps) idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__) acc_georef_sorted = [acc_georef[j] for j in idx_sorted] metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg} #%% #==========================================================# #==========================================================# # S2 #==========================================================# #==========================================================# # define filenames for images suffix = '.tif' filepath = os.path.join(os.getcwd(), 'data', sitename, 'S2') try: os.makedirs(os.path.join(filepath, '10m')) os.makedirs(os.path.join(filepath, '20m')) os.makedirs(os.path.join(filepath, '60m')) except: print('directory already exists') #==========================================================# # Select L2 collection #==========================================================# satname = 'S2' input_col = ee.ImageCollection('COPERNICUS/S2') # filter by location flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)) n_img = flt_col.size().getInfo() print('Number of images covering ' + sitename, n_img) im_all = flt_col.getInfo().get('features') #==========================================================# # Main loop trough images #==========================================================# timestamps = [] acc_georef = [] all_names = [] for i in range(n_img): # find each image in ee database im = ee.Image(im_all[i].get('id')) im_dic = im.getInfo() im_bands = im_dic.get('bands') t = im_dic['properties']['system:time_start'] im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc) im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S') timestamps.append(im_timestamp) im_epsg = int(im_dic['bands'][0]['crs'][5:]) try: if im_dic['properties']['GEOMETRIC_QUALITY_FLAG'] == 'PASSED': acc_georef.append(1) else: acc_georef.append(0) except: acc_georef.append(0) # delete dimensions key from dictionnary, otherwise the entire image is extracted for j in range(len(im_bands)): del im_bands[j]['dimensions'] # bands for S2 bands10 = [im_bands[1], im_bands[2], im_bands[3], im_bands[7]] bands20 = [im_bands[11]] bands60 = [im_bands[15]] # filenames filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + suffix filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + suffix filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + suffix print(i) if any(filename10 in _ for _ in all_names): filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + '_dup' + suffix filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + '_dup' + suffix filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + '_dup' + suffix all_names.append(filename10) local_data = sds.download_tif(im, polygon, bands10, filepath) os.rename(local_data, os.path.join(filepath, '10m', filename10)) local_data = sds.download_tif(im, polygon, bands20, filepath) os.rename(local_data, os.path.join(filepath, '20m', filename20)) local_data = sds.download_tif(im, polygon, bands60, filepath) os.rename(local_data, os.path.join(filepath, '60m', filename60)) # sort timestamps and georef accuracy (dowloaded images are sorted by date in directory) timestamps_sorted = sorted(timestamps) idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__) acc_georef_sorted = [acc_georef[j] for j in idx_sorted] metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg} #%% save metadata filepath = os.path.join(os.getcwd(), 'data', sitename) with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'wb') as f: pickle.dump(metadata, f)