geetools_VH/read_images.py

# -*- coding: utf-8 -*-

#==========================================================#
# Extract shorelines from Landsat images
#==========================================================#

# Initial settings
import os
import numpy as np
import matplotlib.pyplot as plt
import ee
import pdb

# other modules
from osgeo import gdal, ogr, osr
import pickle
import matplotlib.cm as cm
from pylab import ginput

# image processing modules
import skimage.filters as filters 
import skimage.exposure as exposure
import skimage.transform as transform
import sklearn.decomposition as decomposition
import skimage.measure as measure

# import own modules
import functions.utils as utils
import functions.sds_old1 as sds

# some settings
np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
plt.rcParams['axes.grid'] = True
plt.rcParams['figure.max_open_warning'] = 100
ee.Initialize()

# parameters
cloud_thresh = 0.5      # threshold for cloud cover
plot_bool = False      # if you want the plots
prob_high = 99.9        # upper probability to clip and rescale pixel intensity
min_contour_points = 100# minimum number of points contained in each water line
output_epsg = 28356     # GDA94 / MGA Zone 56
buffer_size = 10        # radius (in pixels) of disk for buffer (pixel classification)
min_beach_size = 50     # number of pixels in a beach (pixel classification)

# load metadata (timestamps and epsg code) for the collection
satname = 'L8'
sitename = 'NARRA'
#sitename = 'OLDBAR'

# Load metadata
filepath = os.path.join(os.getcwd(), 'data', satname, sitename)
with open(os.path.join(filepath, sitename + '_timestamps' + '.pkl'), 'rb') as f:
    timestamps = pickle.load(f)
with open(os.path.join(filepath, sitename + '_accuracy_georef' + '.pkl'), 'rb') as f:
    acc_georef = pickle.load(f) 
with open(os.path.join(filepath, sitename + '_epsgcode' + '.pkl'), 'rb') as f:
    input_epsg = pickle.load(f)
with open(os.path.join(filepath, sitename + '_refpoints' + '.pkl'), 'rb') as f:
    refpoints = pickle.load(f)
# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
timestamps_sorted = sorted(timestamps)
idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
acc_georef_sorted = [acc_georef[j] for j in idx_sorted]

# path to images
file_path_pan = os.path.join(os.getcwd(), 'data', satname, sitename, 'pan')
file_path_ms = os.path.join(os.getcwd(), 'data', satname, sitename, 'ms')
file_names_pan = os.listdir(file_path_pan)
file_names_ms = os.listdir(file_path_ms)
N = len(file_names_pan)

# initialise some variables
cloud_cover_ts = []
date_acquired_ts = []
acc_georef_ts = []
idx_skipped = []
idx_nocloud = []
t = []
shorelines = []

for i in range(N):
    # read pan image
    fn_pan = os.path.join(file_path_pan, file_names_pan[i])
    data = gdal.Open(fn_pan, gdal.GA_ReadOnly)
    georef = np.array(data.GetGeoTransform())
    bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]
    im_pan = np.stack(bands, 2)[:,:,0]
    # read ms image
    fn_ms = os.path.join(file_path_ms, file_names_ms[i])
    data = gdal.Open(fn_ms, gdal.GA_ReadOnly)
    bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]
    im_ms = np.stack(bands, 2)
    # cloud mask
    im_qa = im_ms[:,:,5]
    cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool)
    cloud_mask = transform.resize(cloud_mask, (im_pan.shape[0], im_pan.shape[1]),
                                order=0, preserve_range=True, 
                                mode='constant').astype('bool_')    
    # resize the image using bilinear interpolation (order 1)
    im_ms = transform.resize(im_ms,(im_pan.shape[0], im_pan.shape[1]),
                             order=1, preserve_range=True, mode='constant')
    # check if -inf or nan values and add to cloud mask
    im_inf = np.isin(im_ms[:,:,0], -np.inf)
    im_nan = np.isnan(im_ms[:,:,0])
    cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
    cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
    if cloud_cover > cloud_thresh:
        print('skip ' + str(i) + ' - cloudy (' + str(cloud_cover) + ')')
        idx_skipped.append(i)
        continue
    idx_nocloud.append(i)
    # check if image for that date is already present
    if file_names_pan[i][len(satname)+1+len(sitename)+1:len(satname)+1+len(sitename)+1+10] in date_acquired_ts:
        # find the index of the image that is repeated
        idx_samedate = utils.find_indices(date_acquired_ts, lambda e : e == file_names_pan[i][9:19])
        idx_samedate = idx_samedate[0]
        print('cloud cover ' + str(cloud_cover) + ' - ' + str(cloud_cover_ts[idx_samedate]))
        print('acc georef ' + str(acc_georef_sorted[i]) + ' - ' + str(acc_georef_ts[idx_samedate]))
        # keep image with less cloud cover or best georeferencing accuracy
        if cloud_cover < cloud_cover_ts[idx_samedate] - 0.01: 
            skip = False
        elif acc_georef_sorted[i] < acc_georef_ts[idx_samedate]:
            skip = False
        else:
            skip = True
        if skip:
            print('skip ' + str(i) + ' - repeated')
            idx_skipped.append(i)
            continue
        else:
#            del shorelines[idx_samedate]
            del t[idx_samedate]
            del cloud_cover_ts[idx_samedate]
            del date_acquired_ts[idx_samedate]
            del acc_georef_ts[idx_samedate]
            print('keep ' + str(i) + ' - deleted ' + str(idx_samedate))
            
    # rescale intensities
    im_ms = sds.rescale_image_intensity(im_ms, cloud_mask, prob_high, plot_bool)
    im_pan = sds.rescale_image_intensity(im_pan, cloud_mask, prob_high, plot_bool)
    # pansharpen rgb image
    im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, True)
    # add down-sized bands for NIR and SWIR (since pansharpening is not possible)
    im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) 
    # calculate NDWI
    im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, plot_bool)
    # detect edges
    wl_pix = sds.find_wl_contours(im_ndwi, cloud_mask, min_contour_points, True)
    # convert from pixels to world coordinates
    wl_coords = sds.convert_pix2world(wl_pix, georef)
    # convert to output epsg spatial reference
    wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg)
    
    # classify sand pixels
#    im_sand = sds.classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, False, min_beach_size, True)
    
    # plot a figure to select the correct water line and discard cloudy images
    plt.figure()
    cmap = cm.get_cmap('jet')
    plt.subplot(121)
    plt.imshow(im_ms_ps[:,:,[2,1,0]])
    for j,contour in enumerate(wl_pix):
        colours = cmap(np.linspace(0, 1, num=len(wl_pix)))
        plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color=colours[j,:])
    plt.axis('image')
    plt.title(file_names_pan[i])
    plt.subplot(122)
    centroids = []
    for j,contour in enumerate(wl):
        colours = cmap(np.linspace(0, 1, num=len(wl)))
        centroids.append([np.mean(contour[:, 0]),np.mean(contour[:, 1])])
        plt.plot(contour[:, 0], contour[:, 1], linewidth=2, color=colours[j,:])
        plt.plot(np.mean(contour[:, 0]), np.mean(contour[:, 1]), 'o', color=colours[j,:])
    plt.plot(refpoints[:,0], refpoints[:,1], 'k.')
    plt.axis('equal')
    plt.title(file_names_pan[i])
    mng = plt.get_current_fig_manager()                                         
    mng.window.showMaximized()
    plt.tight_layout()   
    plt.draw()
    # click on the left image to discard, otherwise on the closest centroid in the right image
    pt_in = np.array(ginput(n=1, timeout=1000))
    if pt_in[0][0] < 10000:
        print('skip ' + str(i) + ' - manual')
        idx_skipped.append(i)
        continue
    # get contour that was selected (click closest to centroid)
    dist_centroid = [np.linalg.norm(_ - pt_in) for _ in centroids]
    shorelines.append(wl[np.argmin(dist_centroid)])
    
    t.append(timestamps_sorted[i])
    cloud_cover_ts.append(cloud_cover)
    acc_georef_ts.append(acc_georef_sorted[i])
    date_acquired_ts.append(file_names_pan[i][9:19])
    
    
#plt.figure()
#plt.axis('equal')
#for j in range(len(shorelines)):
#    plt.plot(shorelines[j][:,0], shorelines[j][:,1])
#plt.draw()
    
output = {'t':t, 'shorelines':shorelines, 'cloud_cover':cloud_cover_ts, 'acc_georef':acc_georef_ts}

#with open(os.path.join(filepath, sitename + '_output' + '.pkl'), 'wb') as f:
#    pickle.dump(output, f)
#    
#with open(os.path.join(filepath, sitename + '_skipped' + '.pkl'), 'wb') as f:
#    pickle.dump(idx_skipped, f)
#
#with open(os.path.join(filepath, sitename + '_idxnocloud' + '.pkl'), 'wb') as f:
#    pickle.dump(idx_nocloud, f)
new implementation 7 years ago			`# -- coding: utf-8 --`

updated most functions and workflow 7 years ago			`#==========================================================#`
			`# Extract shorelines from Landsat images`
			`#==========================================================#`
new implementation 7 years ago
			`# Initial settings`
			`import os`
			`import numpy as np`
			`import matplotlib.pyplot as plt`
			`import ee`
			`import pdb`

			`# other modules`
			`from osgeo import gdal, ogr, osr`
			`import pickle`
			`import matplotlib.cm as cm`
			`from pylab import ginput`

			`# image processing modules`
			`import skimage.filters as filters`
			`import skimage.exposure as exposure`
			`import skimage.transform as transform`
			`import sklearn.decomposition as decomposition`
			`import skimage.measure as measure`

			`# import own modules`
			`import functions.utils as utils`
testing the NN classifier 7 years ago			`import functions.sds_old1 as sds`
new implementation 7 years ago
work on shoreline comparison 7 years ago			`# some settings`
new implementation 7 years ago			`np.seterr(all='ignore') # raise/ignore divisions by 0 and nans`
reorganisation 7 years ago			`plt.rcParams['axes.grid'] = True`
			`plt.rcParams['figure.max_open_warning'] = 100`
new implementation 7 years ago			`ee.Initialize()`

work on shoreline comparison 7 years ago			`# parameters`
new implementation 7 years ago			`cloud_thresh = 0.5 # threshold for cloud cover`
updated most functions and workflow 7 years ago			`plot_bool = False # if you want the plots`
new implementation 7 years ago			`prob_high = 99.9 # upper probability to clip and rescale pixel intensity`
			`min_contour_points = 100# minimum number of points contained in each water line`
			`output_epsg = 28356 # GDA94 / MGA Zone 56`
updated sand classification if buffer_size = False, then classify the entire image 7 years ago			`buffer_size = 10 # radius (in pixels) of disk for buffer (pixel classification)`
			`min_beach_size = 50 # number of pixels in a beach (pixel classification)`
new implementation 7 years ago
work on shoreline comparison 7 years ago			`# load metadata (timestamps and epsg code) for the collection`
new implementation 7 years ago			`satname = 'L8'`
add georeferencing accuracy in the metadata 7 years ago			`sitename = 'NARRA'`
			`#sitename = 'OLDBAR'`

			`# Load metadata`
new implementation 7 years ago			`filepath = os.path.join(os.getcwd(), 'data', satname, sitename)`
			`with open(os.path.join(filepath, sitename + '_timestamps' + '.pkl'), 'rb') as f:`
			`timestamps = pickle.load(f)`
add georeferencing accuracy in the metadata 7 years ago			`with open(os.path.join(filepath, sitename + '_accuracy_georef' + '.pkl'), 'rb') as f:`
			`acc_georef = pickle.load(f)`
new implementation 7 years ago			`with open(os.path.join(filepath, sitename + '_epsgcode' + '.pkl'), 'rb') as f:`
			`input_epsg = pickle.load(f)`
updated most functions and workflow 7 years ago			`with open(os.path.join(filepath, sitename + '_refpoints' + '.pkl'), 'rb') as f:`
			`refpoints = pickle.load(f)`
add georeferencing accuracy in the metadata 7 years ago			`# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)`
			`timestamps_sorted = sorted(timestamps)`
			`idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)`
			`acc_georef_sorted = [acc_georef[j] for j in idx_sorted]`
new implementation 7 years ago
work on shoreline comparison 7 years ago			`# path to images`
updated most functions and workflow 7 years ago			`file_path_pan = os.path.join(os.getcwd(), 'data', satname, sitename, 'pan')`
			`file_path_ms = os.path.join(os.getcwd(), 'data', satname, sitename, 'ms')`
new implementation 7 years ago			`file_names_pan = os.listdir(file_path_pan)`
			`file_names_ms = os.listdir(file_path_ms)`
			`N = len(file_names_pan)`
work on shoreline comparison 7 years ago
			`# initialise some variables`
			`cloud_cover_ts = []`
			`date_acquired_ts = []`
add georeferencing accuracy in the metadata 7 years ago			`acc_georef_ts = []`
work on shoreline comparison 7 years ago			`idx_skipped = []`
updated most functions and workflow 7 years ago			`idx_nocloud = []`
new implementation 7 years ago			`t = []`
			`shorelines = []`

			`for i in range(N):`
			`# read pan image`
			`fn_pan = os.path.join(file_path_pan, file_names_pan[i])`
			`data = gdal.Open(fn_pan, gdal.GA_ReadOnly)`
			`georef = np.array(data.GetGeoTransform())`
			`bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]`
			`im_pan = np.stack(bands, 2)[:,:,0]`
			`# read ms image`
			`fn_ms = os.path.join(file_path_ms, file_names_ms[i])`
			`data = gdal.Open(fn_ms, gdal.GA_ReadOnly)`
			`bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]`
			`im_ms = np.stack(bands, 2)`
			`# cloud mask`
			`im_qa = im_ms[:,:,5]`
reorganisation 7 years ago			`cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool)`
new implementation 7 years ago			`cloud_mask = transform.resize(cloud_mask, (im_pan.shape[0], im_pan.shape[1]),`
			`order=0, preserve_range=True,`
			`mode='constant').astype('bool_')`
			`# resize the image using bilinear interpolation (order 1)`
			`im_ms = transform.resize(im_ms,(im_pan.shape[0], im_pan.shape[1]),`
			`order=1, preserve_range=True, mode='constant')`
			`# check if -inf or nan values and add to cloud mask`
			`im_inf = np.isin(im_ms[:,:,0], -np.inf)`
			`im_nan = np.isnan(im_ms[:,:,0])`
			`cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)`
work on shoreline comparison 7 years ago			`cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])`
			`if cloud_cover > cloud_thresh:`
add georeferencing accuracy in the metadata 7 years ago			`print('skip ' + str(i) + ' - cloudy (' + str(cloud_cover) + ')')`
work on shoreline comparison 7 years ago			`idx_skipped.append(i)`
new implementation 7 years ago			`continue`
updated most functions and workflow 7 years ago			`idx_nocloud.append(i)`
add georeferencing accuracy in the metadata 7 years ago			`# check if image for that date is already present`
updated most functions and workflow 7 years ago			`if file_names_pan[i][len(satname)+1+len(sitename)+1:len(satname)+1+len(sitename)+1+10] in date_acquired_ts:`
add georeferencing accuracy in the metadata 7 years ago			`# find the index of the image that is repeated`
work on shoreline comparison 7 years ago			`idx_samedate = utils.find_indices(date_acquired_ts, lambda e : e == file_names_pan[i][9:19])`
			`idx_samedate = idx_samedate[0]`
add georeferencing accuracy in the metadata 7 years ago			`print('cloud cover ' + str(cloud_cover) + ' - ' + str(cloud_cover_ts[idx_samedate]))`
			`print('acc georef ' + str(acc_georef_sorted[i]) + ' - ' + str(acc_georef_ts[idx_samedate]))`
			`# keep image with less cloud cover or best georeferencing accuracy`
			`if cloud_cover < cloud_cover_ts[idx_samedate] - 0.01:`
			`skip = False`
			`elif acc_georef_sorted[i] < acc_georef_ts[idx_samedate]:`
			`skip = False`
			`else:`
			`skip = True`
			`if skip:`
			`print('skip ' + str(i) + ' - repeated')`
work on shoreline comparison 7 years ago			`idx_skipped.append(i)`
			`continue`
			`else:`
updated sand classification if buffer_size = False, then classify the entire image 7 years ago			`# del shorelines[idx_samedate]`
work on shoreline comparison 7 years ago			`del t[idx_samedate]`
			`del cloud_cover_ts[idx_samedate]`
			`del date_acquired_ts[idx_samedate]`
add georeferencing accuracy in the metadata 7 years ago			`del acc_georef_ts[idx_samedate]`
			`print('keep ' + str(i) + ' - deleted ' + str(idx_samedate))`
work on shoreline comparison 7 years ago
new implementation 7 years ago			`# rescale intensities`
			`im_ms = sds.rescale_image_intensity(im_ms, cloud_mask, prob_high, plot_bool)`
			`im_pan = sds.rescale_image_intensity(im_pan, cloud_mask, prob_high, plot_bool)`
			`# pansharpen rgb image`
testing the NN classifier 7 years ago			`im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, True)`
new implementation 7 years ago			`# add down-sized bands for NIR and SWIR (since pansharpening is not possible)`
			`im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2)`
			`# calculate NDWI`
			`im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, plot_bool)`
			`# detect edges`
testing the NN classifier 7 years ago			`wl_pix = sds.find_wl_contours(im_ndwi, cloud_mask, min_contour_points, True)`
new implementation 7 years ago			`# convert from pixels to world coordinates`
			`wl_coords = sds.convert_pix2world(wl_pix, georef)`
			`# convert to output epsg spatial reference`
			`wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg)`

updated sand classification if buffer_size = False, then classify the entire image 7 years ago			`# classify sand pixels`
testing the NN classifier 7 years ago			`# im_sand = sds.classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, False, min_beach_size, True)`
updated sand classification if buffer_size = False, then classify the entire image 7 years ago
testing the NN classifier 7 years ago			`# plot a figure to select the correct water line and discard cloudy images`
			`plt.figure()`
			`cmap = cm.get_cmap('jet')`
			`plt.subplot(121)`
			`plt.imshow(im_ms_ps[:,:,[2,1,0]])`
			`for j,contour in enumerate(wl_pix):`
			`colours = cmap(np.linspace(0, 1, num=len(wl_pix)))`
			`plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color=colours[j,:])`
			`plt.axis('image')`
			`plt.title(file_names_pan[i])`
			`plt.subplot(122)`
			`centroids = []`
			`for j,contour in enumerate(wl):`
			`colours = cmap(np.linspace(0, 1, num=len(wl)))`
			`centroids.append([np.mean(contour[:, 0]),np.mean(contour[:, 1])])`
			`plt.plot(contour[:, 0], contour[:, 1], linewidth=2, color=colours[j,:])`
			`plt.plot(np.mean(contour[:, 0]), np.mean(contour[:, 1]), 'o', color=colours[j,:])`
			`plt.plot(refpoints[:,0], refpoints[:,1], 'k.')`
			`plt.axis('equal')`
			`plt.title(file_names_pan[i])`
			`mng = plt.get_current_fig_manager()`
			`mng.window.showMaximized()`
			`plt.tight_layout()`
			`plt.draw()`
			`# click on the left image to discard, otherwise on the closest centroid in the right image`
			`pt_in = np.array(ginput(n=1, timeout=1000))`
			`if pt_in[0][0] < 10000:`
			`print('skip ' + str(i) + ' - manual')`
			`idx_skipped.append(i)`
			`continue`
			`# get contour that was selected (click closest to centroid)`
			`dist_centroid = [np.linalg.norm(_ - pt_in) for _ in centroids]`
			`shorelines.append(wl[np.argmin(dist_centroid)])`
updated sand classification if buffer_size = False, then classify the entire image 7 years ago
new implementation 7 years ago			`t.append(timestamps_sorted[i])`
work on shoreline comparison 7 years ago			`cloud_cover_ts.append(cloud_cover)`
add georeferencing accuracy in the metadata 7 years ago			`acc_georef_ts.append(acc_georef_sorted[i])`
work on shoreline comparison 7 years ago			`date_acquired_ts.append(file_names_pan[i][9:19])`
new implementation 7 years ago
updated most functions and workflow 7 years ago
new implementation 7 years ago			`#plt.figure()`
			`#plt.axis('equal')`
			`#for j in range(len(shorelines)):`
			`# plt.plot(shorelines[j][:,0], shorelines[j][:,1])`
			`#plt.draw()`

add georeferencing accuracy in the metadata 7 years ago			`output = {'t':t, 'shorelines':shorelines, 'cloud_cover':cloud_cover_ts, 'acc_georef':acc_georef_ts}`
reorganisation 7 years ago
add georeferencing accuracy in the metadata 7 years ago			`#with open(os.path.join(filepath, sitename + '_output' + '.pkl'), 'wb') as f:`
			`# pickle.dump(output, f)`
			`#`
			`#with open(os.path.join(filepath, sitename + '_skipped' + '.pkl'), 'wb') as f:`
			`# pickle.dump(idx_skipped, f)`
			`#`
			`#with open(os.path.join(filepath, sitename + '_idxnocloud' + '.pkl'), 'wb') as f:`
			`# pickle.dump(idx_nocloud, f)`