updated master

added download_images and read_images scripts
updated gitignore
26 changed files with 2460 additions and 1814 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,7 @@
 *.pyc
 *.mat
 *.tif
 *.png
 *.mp4
 *.gif
 *.jpg
--- a/README.md
+++ b/README.md
@ -1,6 +1,9 @@
 This toolbox uses the Google Earth Engine Python API to explore collections of publicly available satellite imagery (Landsat, Sentinel).
 It has .py routines to:
-	- read and preprocess satellite images (cloud masking, contrast stretching)
+- read and preprocess satellite images (cloud masking, contrast stretching)
-	- pansharpen Landsat 8 images
+- pansharpen Landsat 8 images
-	- extract shorelines with the Marching Squares algorithm
+- extract shorelines with the Marching Squares algorithm
 - classify image in 4 classes (sand, whitewater, water, other) using a Neural Network classifier
 Requirements: all the packages contained in py3_environments.txt
--- a/compare_shorelines.py
+++ b/compare_shorelines.py
@ -1,222 +0,0 @@
 # -*- coding: utf-8 -*-
 # Preamble
 import ee
 import matplotlib.pyplot as plt
 import matplotlib.cm as cm
 import numpy as np
 import pandas as pd
 from datetime import datetime
 import pickle
 import pdb
 import pytz
 from pylab import ginput
 import scipy.io as sio
 import scipy.interpolate
 import os
 # image processing modules
 import skimage.filters as filters 
 import skimage.exposure as exposure
 import skimage.transform as transform
 import sklearn.decomposition as decomposition
 import skimage.morphology as morphology
 import skimage.measure as measure
 # my functions
 import functions.utils as utils
 import functions.sds as sds
 np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
 ee.Initialize()
 au_tz = pytz.timezone('Australia/Sydney')
 #%%
 # load SDS shorelines
 with open('data\data_gt_l8.pkl', 'rb') as f:
    data = pickle.load(f)
 # load quadbike dates and convert from datenum to datetime
 suffix = '.mat'
 dir_name = os.getcwd()
 file_name = 'data\quadbike_dates'
 file_path = os.path.join(dir_name, file_name + suffix)
 quad_dates = sio.loadmat(file_path)['dates']
 dt_quad = []
 for i in range(quad_dates.shape[0]): 
    dt_quad.append(datetime(quad_dates[i,0], quad_dates[i,1], quad_dates[i,2], tzinfo=au_tz))
 # remove overlapping images, keep the one with lowest cloud_cover 
 n = len(data['cloud_cover'])
 idx_worst = []
 for i in range(n):
    date_im = data['date_acquired'][i]
    idx_double = np.isin(data['date_acquired'], date_im)
    if sum(idx_double.astype(int)) > 1:
        idx_worst.append(np.where(idx_double)[0][np.argmax(np.array(data['cloud_cover'])[idx_double])])
 dt_sat = []
 new_meta = {'contours':[],
            'cloud_cover':[],
            'geom_rmse_model':[],
            'gcp_model':[],
            'quality':[],
            'sun_azimuth':[],
            'sun_elevation':[]}
 for i in range(n):
    if not np.isin(i,idx_worst):
        dt_sat.append(data['dt'][i].astimezone(au_tz))
        new_meta['contours'].append(data['contours'][i])
        new_meta['cloud_cover'].append(data['cloud_cover'][i])
        new_meta['geom_rmse_model'].append(data['geom_rmse_model'][i])
        new_meta['gcp_model'].append(data['gcp_model'][i])
        new_meta['quality'].append(data['quality'][i])
        new_meta['sun_azimuth'].append(data['sun_azimuth'][i])
        new_meta['sun_elevation'].append(data['sun_elevation'][i])
 # calculate difference between days
 diff_days = [ [(x - _).days for _ in dt_quad] for x in dt_sat]
 day_thresh = 15
 idx_close = [utils.find_indices(_, lambda e: abs(e) < day_thresh) for _ in diff_days]
 # put everything in a dictionnary and save it
 wl_comp = []
 for i in range(len(dt_sat)):
    wl_comp.append({'sat dt': dt_sat[i],
                    'quad dt': [dt_quad[_] for _ in idx_close[i]],
                    'days diff': [diff_days[i][_] for _ in idx_close[i]],
                    'contours':  new_meta['contours'][i],
                    'cloud_cover':  new_meta['cloud_cover'][i],
                    'geom_rmse_model':  new_meta['geom_rmse_model'][i],
                    'gcp_model':  new_meta['gcp_model'][i],
                    'quality':  new_meta['quality'][i],
                    'sun_azimuth':  new_meta['sun_azimuth'][i],
                    'sun_elevation':  new_meta['sun_elevation'][i]})
 with open('wl_l8_comparison.pkl', 'wb') as f:
    pickle.dump(wl_comp, f)
 #%%
 with open('data\wl_l8_comparison.pkl', 'rb') as f:
    wl = pickle.load(f)
 # load quadbike dates and convert from datenum to datetime
 suffix = '.mat'
 dir_name = os.getcwd()
 subfolder_name = 'data\quadbike_surveys'
 file_path = os.path.join(dir_name, subfolder_name)
 file_names = os.listdir(file_path)
 for i in range(len(file_names)):
    fn_mat = os.path.join(file_path, file_names[i])
    years = int(file_names[i][6:10])
    months = int(file_names[i][11:13])
    days = int(file_names[i][14:16])
    for j in range(len(wl)):
        if wl[j]['quad dt'][0] == datetime(years, months, days, tzinfo=au_tz):
            quad_mat = sio.loadmat(fn_mat)
            wl[j].update({'quad_data':{'x':quad_mat['x'],
                         'y':quad_mat['y'],
                         'z':quad_mat['z'],
                         'dt': datetime(years, months, days, tzinfo=au_tz)}})
 with open('data\wl_final.pkl', 'wb') as f:
    pickle.dump(wl, f) 
 #%%
 with open('data\wl_final.pkl', 'rb') as f:
    wl = pickle.load(f) 
 i = 0        
 x = wl[i]['quad_data']['x']
 y = wl[i]['quad_data']['y']
 z = wl[i]['quad_data']['z']
 x = x.reshape(x.shape[0] * x.shape[1])
 y = y.reshape(y.shape[0] * y.shape[1])
 z = z.reshape(z.shape[0] * z.shape[1])
 idx_nan = np.isnan(z)
 x_nan = x[idx_nan]
 y_nan = y[idx_nan]
 z_nan = z[idx_nan]
 x_nonan = x[~idx_nan]
 y_nonan = y[~idx_nan]
 z_nonan = z[~idx_nan]
 xs = x_nonan[::10]
 ys = y_nonan[::10]
 zs = z_nonan[::10]
 xq = wl[i]['contours'][:,0]
 yq = wl[i]['contours'][:,1]
 # cut xq around xs
 np.min(xs)
 np.max(xs)
 np.min(ys)
 np.max(ys)
 idx_x = np.logical_and(xq < np.max(xs), xq > np.min(xs))
 idx_y = np.logical_and(yq < np.max(ys), yq > np.min(ys))
 idx_in = np.logical_and(idx_x, idx_y)
 xq = xq[idx_in]
 yq = yq[idx_in]
 for i in range(len(xq)):
    idx_x = np.logical_and(xs < xq[i] + 10, xs > xq[i] - 10)
    idx_y = np.logical_and(ys < yq[i] + 10, ys > yq[i] - 10)
    xint = xs[idx_x]
    yint = ys[idx_y]
 f = interpolate.interp2d(xs, ys, zs, kind='linear')
 zq = f(xq,yq)
 plt.figure()
 plt.grid()
 plt.scatter(xs, ys, s=10, c=zs, marker='o', cmap=cm.get_cmap('jet'),
            label='quad data')
 plt.plot(xq,yq,'r-o', markersize=5, label='SDS')
 plt.axis('equal')
 plt.legend()
 plt.colorbar(label='mAHD')
 plt.xlabel('Eastings [m]')
 plt.ylabel('Northings [m]')
 plt.show()
 plt.figure()
 plt.plot(zq[:,0])
 plt.show()
 plt.figure()
 plt.grid()
 plt.scatter(x_nonan, y_nonan, s=10, c=z_nonan, marker='o', cmap=cm.get_cmap('jet'),
            label='quad data')
 #plt.plot(x_nan, y_nan, 'k.', label='nans')
 plt.plot(xq,yq,'r-o', markersize=5, label='SDS')
 plt.axis('equal')
 plt.legend()
 plt.colorbar(label='mAHD')
 plt.xlabel('Eastings [m]')
 plt.ylabel('Northings [m]')
 plt.show()
 z2 = scipy.interpolate.griddata([x, y], z, [xq, yq], method='linear')
 f_interp = scipy.interpolate.interp2d(x1,y1,z1, kind='linear')
 sio.savemat('shoreline1.mat', {'x':xq, 'y':yq})
 from scipy import interpolate
 x = np.arange(-5.01, 5.01, 0.01)
 y = np.arange(-5.01, 5.01, 0.01)
 xx, yy = np.meshgrid(x, y)
 z = np.sin(xx**2+yy**2)
 f = interpolate.interp2d(x, y, z, kind='cubic')
 xnew = np.arange(-5.01, 5.01, 1e-2)
 ynew = np.arange(-5.01, 5.01, 1e-2)
 znew = f(xnew, ynew)
 plt.plot(x, z[:, 0], 'ro-', xnew, znew[:, 0], 'b-')
 plt.show()
--- a/compare_shorelines2.py
+++ b/compare_shorelines2.py
@ -1,47 +0,0 @@
 # -*- coding: utf-8 -*-
 # Preamble
 import ee
 import matplotlib.pyplot as plt
 import matplotlib.cm as cm
 import numpy as np
 import pandas as pd
 from datetime import datetime
 import pickle
 import pdb
 import pytz
 from pylab import ginput
 import scipy.io as sio
 import scipy.interpolate
 import os
 # image processing modules
 import skimage.filters as filters 
 import skimage.exposure as exposure
 import skimage.transform as transform
 import sklearn.decomposition as decomposition
 import skimage.morphology as morphology
 import skimage.measure as measure
 # my functions
 import functions.utils as utils
 import functions.sds as sds
 np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
 ee.Initialize()
 with open('data\wl_final.pkl', 'rb') as f:
    wl = pickle.load(f)
 i = 0        
 x = wl[i]['quad_data']['x']
 y = wl[i]['quad_data']['y']
 z = wl[i]['quad_data']['z']
 x = x.reshape(x.shape[0] * x.shape[1])
 y = y.reshape(y.shape[0] * y.shape[1])
 z = z.reshape(z.shape[0] * z.shape[1])
 idx_nan = np.isnan(z)
 x = x[~idx_nan]
 y = y[~idx_nan]
 z = z[~idx_nan]
--- a/data/data_2016.pkl
+++ b/data/data_2016.pkl
--- a/data/data_gt_l8.pkl
+++ b/data/data_gt_l8.pkl
--- a/data/idx_nogt.pkl
+++ b/data/idx_nogt.pkl
--- a/data/narra_beach.pkl
+++ b/data/narra_beach.pkl
--- a/data/quadbike_dates.mat
+++ b/data/quadbike_dates.mat
--- a/data/wl_final.pkl
+++ b/data/wl_final.pkl
--- a/data/wl_l8_comparison.pkl
+++ b/data/wl_l8_comparison.pkl
--- a/download_images.py
+++ b/download_images.py
@ -0,0 +1,390 @@
 #==========================================================#
 #==========================================================#
 # Download L5, L7, L8, S2 images of a given area 
 #==========================================================#
 #==========================================================#
 #==========================================================#
 # Initial settings
 #==========================================================#
 import os
 import numpy as np
 import matplotlib.pyplot as plt
 import pdb
 import ee
 # other modules
 from osgeo import gdal, ogr, osr
 from urllib.request import urlretrieve
 import zipfile
 from datetime import datetime
 import pytz
 import pickle
 # import own modules
 import functions.utils as utils
 import functions.sds as sds
 np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
 ee.Initialize()
 #==========================================================#
 # Location
 #==========================================================#
 ## location (Narrabeen-Collaroy beach)
 #polygon = [[[151.301454, -33.700754],
 #               [151.311453, -33.702075], 
 #               [151.307237, -33.739761],
 #               [151.294220, -33.736329],
 #               [151.301454, -33.700754]]]; 
 # location (Tairua beach)
 sitename = 'TAIRUA'
 polygon = [[[175.835574, -36.982022],
               [175.888220, -36.980680], 
               [175.893527, -37.029610],
               [175.833444, -37.031767],
               [175.835574, -36.982022]]];
 # initialise metadata dictionnary (stores timestamps and georefencing accuracy of each image)       
 metadata = dict([])
 # create directories
 try:
    os.makedirs(os.path.join(os.getcwd(), 'data',sitename))
 except:
    print('directory already exists')
 #%%
 #==========================================================#
 #==========================================================#
 # L5
 #==========================================================#
 #==========================================================#
 # define filenames for images
 suffix = '.tif'
 filepath = os.path.join(os.getcwd(), 'data', sitename, 'L5', '30m')
 try:
    os.makedirs(filepath)
 except:
    print('directory already exists')
 #==========================================================#
 # Select L5 collection 
 #==========================================================#
 satname = 'L5'
 input_col = ee.ImageCollection('LANDSAT/LT05/C01/T1_TOA')
 # filter by location
 flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
 n_img = flt_col.size().getInfo()
 print('Number of images covering ' + sitename, n_img)
 im_all = flt_col.getInfo().get('features')
 #==========================================================#
 # Main loop trough images
 #==========================================================#
 timestamps = []
 acc_georef = []
 all_names = []
 for i in range(n_img):
    # find each image in ee database
    im = ee.Image(im_all[i].get('id'))
    im_dic = im.getInfo()
    im_bands = im_dic.get('bands')
    t = im_dic['properties']['system:time_start']
    im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
    timestamps.append(im_timestamp)
    im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
    im_epsg = int(im_dic['bands'][0]['crs'][5:])
    try:
        acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
    except:
        acc_georef.append(12)   
        print('No geometric rmse model property')
    # delete dimensions key from dictionnary, otherwise the entire image is extracted
    for j in range(len(im_bands)): del im_bands[j]['dimensions']
    # bands for L5
    ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[7]]
    # filenames
    filename = im_date + '_' + satname + '_' + sitename + suffix
    print(i)
    if any(filename in _ for _ in all_names):
        filename = im_date + '_' + satname + '_' + sitename + '_dup' + suffix 
    all_names.append(filename)
    local_data = sds.download_tif(im, polygon, ms_bands, filepath)
    os.rename(local_data, os.path.join(filepath, filename))
 # sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
 timestamps_sorted = sorted(timestamps)
 idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
 acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
 metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg}
 #%%
 #==========================================================#
 #==========================================================#
 # L7&L8
 #==========================================================#
 #==========================================================#
 # define filenames for images
 suffix = '.tif'
 filepath = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8')
 filepath_pan = os.path.join(filepath, 'pan')
 filepath_ms = os.path.join(filepath, 'ms')
 try:
    os.makedirs(filepath_pan)
    os.makedirs(filepath_ms)
 except:
    print('directory already exists')           
 #==========================================================#
 # Select L7 collection 
 #==========================================================#
 satname = 'L7'   
 input_col = ee.ImageCollection('LANDSAT/LE07/C01/T1_RT_TOA')
 # filter by location
 flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
 n_img = flt_col.size().getInfo()
 print('Number of images covering ' + sitename, n_img)
 im_all = flt_col.getInfo().get('features')
 #==========================================================#
 # Main loop trough images
 #==========================================================#
 timestamps = []
 acc_georef = []
 all_names = []
 for i in range(n_img):
    # find each image in ee database
    im = ee.Image(im_all[i].get('id'))
    im_dic = im.getInfo()
    im_bands = im_dic.get('bands')
    t = im_dic['properties']['system:time_start']
    im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
    timestamps.append(im_timestamp)
    im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
    im_epsg = int(im_dic['bands'][0]['crs'][5:])
    try:
        acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
    except:
        acc_georef.append(12)   
        print('No geometric rmse model property')
    # delete dimensions key from dictionnary, otherwise the entire image is extracted
    for j in range(len(im_bands)): del im_bands[j]['dimensions']
    # bands for L7
    pan_band = [im_bands[8]]
    ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[9]]
    # filenames
    filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
    filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
    print(i)
    if any(filename_pan in _ for _ in all_names):
        filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
        filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix 
    all_names.append(filename_pan)
    local_data_pan = sds.download_tif(im, polygon, pan_band, filepath_pan)
    os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
    local_data_ms = sds.download_tif(im, polygon, ms_bands, filepath_ms)
    os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
 #==========================================================#
 # Select L8 collection 
 #==========================================================#
 satname = 'L8'
 input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
 # filter by location
 flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
 n_img = flt_col.size().getInfo()
 print('Number of images covering Narrabeen:', n_img)
 im_all = flt_col.getInfo().get('features')
 #==========================================================#
 # Main loop trough images
 #==========================================================#
 for i in range(n_img):
    # find each image in ee database
    im = ee.Image(im_all[i].get('id'))
    im_dic = im.getInfo()
    im_bands = im_dic.get('bands')
    t = im_dic['properties']['system:time_start']
    im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
    timestamps.append(im_timestamp)
    im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
    im_epsg = int(im_dic['bands'][0]['crs'][5:])
    try:
        acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
    except:
        acc_georef.append(12)   
        print('No geometric rmse model property')
    # delete dimensions key from dictionnary, otherwise the entire image is extracted
    for j in range(len(im_bands)): del im_bands[j]['dimensions']
    # bands for L8    
    pan_band = [im_bands[7]]
    ms_bands = [im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[5], im_bands[11]]
    # filenames
    filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
    filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
    print(i)
    if any(filename_pan in _ for _ in all_names):
        filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
        filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix 
    all_names.append(filename_pan)
    local_data_pan = sds.download_tif(im, polygon, pan_band, filepath_pan)
    os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
    local_data_ms = sds.download_tif(im, polygon, ms_bands, filepath_ms)
    os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
 # sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
 timestamps_sorted = sorted(timestamps)
 idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
 acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
 metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg}
 #%%
 #==========================================================#
 #==========================================================#
 # S2
 #==========================================================#
 #==========================================================#
 # define filenames for images
 suffix = '.tif'
 filepath = os.path.join(os.getcwd(), 'data', sitename, 'S2')
 try:
    os.makedirs(os.path.join(filepath, '10m'))
    os.makedirs(os.path.join(filepath, '20m'))
    os.makedirs(os.path.join(filepath, '60m'))
 except:
    print('directory already exists') 
 #==========================================================#
 # Select L2 collection 
 #==========================================================#
 satname = 'S2'
 input_col = ee.ImageCollection('COPERNICUS/S2')
 # filter by location
 flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
 n_img = flt_col.size().getInfo()
 print('Number of images covering ' + sitename, n_img)
 im_all = flt_col.getInfo().get('features') 
 #==========================================================#
 # Main loop trough images
 #==========================================================#
 timestamps = []
 acc_georef = []
 all_names = []
 for i in range(n_img):
    # find each image in ee database
    im = ee.Image(im_all[i].get('id'))
    im_dic = im.getInfo()
    im_bands = im_dic.get('bands')
    t = im_dic['properties']['system:time_start']
    im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
    im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
    timestamps.append(im_timestamp)
    im_epsg = int(im_dic['bands'][0]['crs'][5:])
    try:
        if im_dic['properties']['GEOMETRIC_QUALITY_FLAG'] == 'PASSED':
            acc_georef.append(1)
        else:
            acc_georef.append(0)
    except:
        acc_georef.append(0)
    # delete dimensions key from dictionnary, otherwise the entire image is extracted
    for j in range(len(im_bands)): del im_bands[j]['dimensions']
    # bands for S2
    bands10 = [im_bands[1], im_bands[2], im_bands[3], im_bands[7]]
    bands20 = [im_bands[11]]
    bands60 = [im_bands[15]]
    # filenames
    filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + suffix
    filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + suffix
    filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + suffix
    print(i)
    if any(filename10 in _ for _ in all_names):
        filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + '_dup' + suffix
        filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + '_dup' + suffix
        filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + '_dup' + suffix
    all_names.append(filename10)
    local_data = sds.download_tif(im, polygon, bands10, filepath)
    os.rename(local_data, os.path.join(filepath, '10m', filename10))
    local_data = sds.download_tif(im, polygon, bands20, filepath)
    os.rename(local_data, os.path.join(filepath, '20m', filename20))
    local_data = sds.download_tif(im, polygon, bands60, filepath)
    os.rename(local_data, os.path.join(filepath, '60m', filename60))
 # sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
 timestamps_sorted = sorted(timestamps)
 idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
 acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
 metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg}
 #%% save metadata
 filepath = os.path.join(os.getcwd(), 'data', sitename)
 with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'wb') as f:
    pickle.dump(metadata, f)  
--- a/functions/NeuralNet_classif.pkl
+++ b/functions/NeuralNet_classif.pkl
--- a/functions/NeuralNet_classif_nopan.pkl
+++ b/functions/NeuralNet_classif_nopan.pkl
--- a/functions/data_analysis.py
+++ b/functions/data_analysis.py
@ -0,0 +1,432 @@
 """This module contains all the functions needed for data analysis """
 # Initial settings
 import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib.patches as mpatches
 from matplotlib import gridspec
 import pdb
 import ee
 # other modules
 from osgeo import gdal, ogr, osr
 import scipy.interpolate as interpolate
 import scipy.stats as sstats
 # image processing modules
 import skimage.filters as filters 
 import skimage.exposure as exposure
 import skimage.transform as transform
 import sklearn.decomposition as decomposition
 import skimage.measure as measure
 import skimage.morphology as morphology
 # machine learning modules
 from sklearn.cluster import KMeans
 from sklearn.neural_network import MLPClassifier
 from sklearn.externals import joblib
 import time
 # import own modules
 import functions.utils as utils
 def get_tide(dates_sds, dates_tide, tide_level):
    tide = []
    for i in range(len(dates_sds)):
        dates_diff = np.abs(np.array([ (dates_sds[i] - _).total_seconds() for _ in dates_tide]))
        if np.min(dates_diff) <= 1800: # half-an-hour
            idx_closest = np.argmin(dates_diff)
            tide.append(tide_level[idx_closest])
        else:
            tide.append(np.nan)
    tide = np.array(tide)
    return tide
 def remove_duplicates(output, satname):
    " removes duplicates from output structure, keep the one with less cloud cover or best georeferencing "
    dates = output['dates']
    dates_str = [_.strftime('%Y%m%d') for _ in dates]
    dupl = utils.duplicates_dict(dates_str)
    if dupl:
        output_nodup = dict([])
        idx_remove = []
        if satname == 'L8' or satname == 'L5':
            for k,v in dupl.items():
                idx1 = v[0]
                idx2 = v[1]
                c1 = output['metadata']['cloud_cover'][idx1]
                c2 = output['metadata']['cloud_cover'][idx2]
                g1 = output['metadata']['acc_georef'][idx1]
                g2 = output['metadata']['acc_georef'][idx2]
                if c1 < c2 - 0.01:
                    idx_remove.append(idx2)
                elif g1 < g2 - 0.1:
                    idx_remove.append(idx2)
                else:
                    idx_remove.append(idx1)
        else:
            for k,v in dupl.items():
                idx1 = v[0]
                idx2 = v[1]
                c1 = output['metadata']['cloud_cover'][idx1]
                c2 = output['metadata']['cloud_cover'][idx2]
                if c1 < c2 - 0.01:
                    idx_remove.append(idx2)
                else:
                    idx_remove.append(idx1)
        idx_remove = sorted(idx_remove)
        idx_all = np.linspace(0, len(dates_str)-1, len(dates_str))
        idx_keep = list(np.where(~np.isin(idx_all,idx_remove))[0])        
        output_nodup['dates'] = [output['dates'][k] for k in idx_keep]
        output_nodup['shorelines'] = [output['shorelines'][k] for k in idx_keep]
        output_nodup['metadata'] = dict([])
        for key in list(output['metadata'].keys()):
            output_nodup['metadata'][key] = [output['metadata'][key][k] for k in idx_keep]
        print(satname + ' : ' + str(len(idx_remove)) + ' duplicates')
        return output_nodup
    else: 
        print(satname + ' : ' + 'no duplicates')
        return output
 def merge(output):
    " merges data from the different satellites "
    # stack all list together under one key
    output_all = {'dates':[], 'shorelines':[],
                  'metadata':{'filenames':[], 'satname':[], 'cloud_cover':[], 'acc_georef':[]}}
    for satname in list(output.keys()):
        output_all['dates'] = output_all['dates'] + output[satname]['dates']
        output_all['shorelines'] = output_all['shorelines'] + output[satname]['shorelines']
        for key in list(output[satname]['metadata'].keys()):
            output_all['metadata'][key]  = output_all['metadata'][key] + output[satname]['metadata'][key]   
    output_all_sorted = {'dates':[], 'shorelines':[],
                         'metadata':{'filenames':[], 'satname':[], 'cloud_cover':[], 'acc_georef':[]}}
    # sort the dates
    idx_sorted = sorted(range(len(output_all['dates'])), key=output_all['dates'].__getitem__)
    output_all_sorted['dates'] = [output_all['dates'][i] for i in idx_sorted]
    output_all_sorted['shorelines'] = [output_all['shorelines'][i] for i in idx_sorted]
    for key in list(output_all['metadata'].keys()):
        output_all_sorted['metadata'][key] = [output_all['metadata'][key][i] for i in idx_sorted]
    return output_all_sorted
 def create_transects(x0, y0, orientation, chainage_length):
    " creates shore-normal transects "
    transects = []
    for k in range(len(x0)):
        # orientation of cross-shore profile
        phi = (90 - orientation[k])*np.pi/180
        # create a vector using the chainage length
        x = np.linspace(0,chainage_length,chainage_length+1)
        y = np.zeros(len(x))
        coords = np.zeros((len(x),2))
        coords[:,0] = x
        coords[:,1] = y
        # translate and rotate the vector using the origin and orientation
        tf = transform.EuclideanTransform(rotation=phi, translation=(x0[k],y0[k]))
        coords_tf = tf(coords)
        transects.append(coords_tf)
    return transects
 def calculate_chainage(sds, transects, orientation, along_dist):
    " intersect SDS with transect and compute chainage position "
    chainage_mtx = np.zeros((len(sds),len(transects),6))
    for i in range(len(sds)):
        sl = sds[i]
        for j in range(len(transects)): 
            # compute rotation matrix
            X0 = transects[j][0,0]
            Y0 = transects[j][0,1]
            phi = (90 - orientation[j])*np.pi/180
            Mrot = np.array([[np.cos(phi), np.sin(phi)],[-np.sin(phi), np.cos(phi)]])
            # calculate point to line distance between shoreline points and profile
            p1 = np.array([X0,Y0])
            p2 = transects[j][-1,:]
            p3 = sl
            d = np.abs(np.cross(p2-p1,p3-p1)/np.linalg.norm(p2-p1))
            idx_close = utils.find_indices(d, lambda e: e <= along_dist)
            # check if there are SDS points around the profile or not 
            if not idx_close:
                chainage_mtx[i,j,:] = np.tile(np.nan,(1,6))
            else:
                # change of base to shore-normal coordinate system
                xy_close = np.array([sl[idx_close,0],sl[idx_close,1]]) - np.tile(np.array([[X0],[Y0]]), (1,len(sl[idx_close])))
                xy_rot = np.matmul(Mrot, xy_close)
                # put nan values if the chainage is negative (MAKE SURE TO PICK ORIGIN CORRECTLY)
                if np.any(xy_rot[0,:] < 0):
                    xy_rot[0,np.where(xy_rot[0,:] < 0)] = np.nan
                # compute mean, median max and std of chainage position
                n_points = len(xy_rot[0,:])
                mean_cross = np.nanmean(xy_rot[0,:])
                median_cross = np.nanmedian(xy_rot[0,:])
                max_cross = np.nanmax(xy_rot[0,:])
                min_cross = np.nanmin(xy_rot[0,:])
                std_cross = np.nanstd(xy_rot[0,:])
                if std_cross > 10: # if large std, take the most seaward point
                    mean_cross = max_cross
                    median_cross = max_cross
                    min_cross = max_cross
                # store the statistics
                chainage_mtx[i,j,:] = np.array([mean_cross, median_cross, max_cross,
                            min_cross, n_points, std_cross])   
    # format into dictionnary
    chainage = dict([])
    chainage['mean'] = chainage_mtx[:,:,0]
    chainage['median'] = chainage_mtx[:,:,1]
    chainage['max'] = chainage_mtx[:,:,2]
    chainage['min'] = chainage_mtx[:,:,3]
    chainage['npoints'] = chainage_mtx[:,:,4]
    chainage['std'] = chainage_mtx[:,:,5]
    return chainage
 def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5):
    """
    Compare sds with groundtruth data from topographic surveys / argus shorelines
    KV WRL 2018
    Arguments:
    -----------
        dates_sds: list
            list of dates corresponding to each row in chain_sds
        chain_sds: np.ndarray
            array with time series of chainage for each transect (each transect is one column)
        topo_profiles: dict
            dict containing the dates and chainage of the groundtruth
        mod: 0 or 1
            0 for linear interpolation between 2 closest surveys, 1 for only nearest neighbour
        min_days: int
            minimum number of days for which the data can be compared     
    Returns:    -----------
        stats: dict
            contains all the statistics of the comparison
    """       
    # create 3 figures       
    fig1 = plt.figure()
    gs1 = gridspec.GridSpec(chain_sds.shape[1], 1)
    fig2 = plt.figure()
    gs2 = gridspec.GridSpec(2, chain_sds.shape[1])
    fig3 = plt.figure()
    gs3 = gridspec.GridSpec(2,1)
    dates_sds_num = np.array([_.toordinal() for _ in dates_sds])
    stats = dict([])
    data_fin = dict([])
    # for each transect compare and plot the data
    for i in range(chain_sds.shape[1]):
        pfname = list(topo_profiles.keys())[i]
        stats[pfname] = dict([])
        data_fin[pfname] = dict([])
        dates_sur = topo_profiles[pfname]['dates']
        chain_sur = topo_profiles[pfname]['chainage']
        # convert to datenum
        dates_sur_num = np.array([_.toordinal() for _ in dates_sur])
        chain_sur_interp = []
        diff_days = []
        for j, satdate in enumerate(dates_sds_num):
            temp_diff = satdate - dates_sur_num
            if mod==0:
                # select measurement before and after sat image date and interpolate
                ind_before = np.where(temp_diff == temp_diff[temp_diff > 0][-1])[0]     
                if ind_before == len(temp_diff)-1:
                    chain_sur_interp.append(np.nan)
                    diff_days.append(np.abs(satdate-dates_sur_num[ind_before])[0])
                    continue         
                ind_after = np.where(temp_diff == temp_diff[temp_diff < 0][0])[0]            
                tempx = np.zeros(2)
                tempx[0] = dates_sur_num[ind_before]
                tempx[1] = dates_sur_num[ind_after]
                tempy = np.zeros(2)
                tempy[0] = chain_sur[ind_before]
                tempy[1] = chain_sur[ind_after]
                diff_days.append(np.abs(np.max([satdate-tempx[0], satdate-tempx[1]])))                
                # interpolate
                f = interpolate.interp1d(tempx, tempy)
                chain_sur_interp.append(f(satdate))
            elif mod==1:
                # select the closest measurement
                idx_closest = utils.find_indices(np.abs(temp_diff), lambda e: e == np.min(np.abs(temp_diff)))[0]
                diff_days.append(np.abs(satdate-dates_sur_num[idx_closest]))
                if diff_days[j] > mindays:
                    chain_sur_interp.append(np.nan)
                else:
                    chain_sur_interp.append(chain_sur[idx_closest])
        chain_sur_interp = np.array(chain_sur_interp)
        # remove nan values
        idx_sur_nan = ~np.isnan(chain_sur_interp)
        idx_sat_nan = ~np.isnan(chain_sds[:,i])
        idx_nan = np.logical_and(idx_sur_nan, idx_sat_nan)
        # groundtruth and sds
        chain_sur_fin = chain_sur_interp[idx_nan]
        chain_sds_fin = chain_sds[idx_nan,i]
        dates_fin = [k for (k, v) in zip(dates_sds, idx_nan) if v]
        # calculate statistics
        slope, intercept, rvalue, pvalue, std_err = sstats.linregress(chain_sur_fin, chain_sds_fin) 
        R2 = rvalue**2
        correlation = np.corrcoef(chain_sur_fin, chain_sds_fin)[0,1]
        diff_chain = chain_sur_fin - chain_sds_fin
        rmse = np.sqrt(np.nanmean((diff_chain)**2))
        mean = np.nanmean(diff_chain)
        std = np.nanstd(diff_chain)
        q90 = np.percentile(np.abs(diff_chain), 90)
        # store data
        stats[pfname]['rmse'] = rmse
        stats[pfname]['mean'] = mean
        stats[pfname]['std'] = std
        stats[pfname]['q90'] = q90
        stats[pfname]['diffdays'] = diff_days
        stats[pfname]['corr'] = correlation
        stats[pfname]['linfit'] = {'slope':slope, 'intercept':intercept, 'R2':R2, 'pvalue':pvalue}
        data_fin[pfname]['dates'] = dates_fin
        data_fin[pfname]['sds'] = chain_sds_fin
        data_fin[pfname]['survey'] = chain_sur_fin
        # make time-series plot
        plt.figure(fig1.number)
        fig1.add_subplot(gs1[i,0])
        plt.plot(dates_sur, chain_sur, 'o-', color='C1', markersize=4, label='survey all')
        plt.plot(dates_fin, chain_sur_fin, 'o', color=[0.3, 0.3, 0.3], markersize=2, label='survey interp')
        plt.plot(dates_fin, chain_sds_fin, 'o--', color='b', markersize=4, label='SDS')
        plt.title(pfname, fontweight='bold')
 #        plt.xlim([dates_sds[0], dates_sds[-1]])
        plt.ylabel('chainage [m]')
        # make scatter plot
        plt.figure(fig2.number)
        fig2.add_subplot(gs2[0,i])
        plt.axis('equal')
        plt.plot(chain_sur_fin, chain_sds_fin, 'ko', markersize=4, markerfacecolor='w', alpha=0.7)
        xmax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)])
        xmin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)])
        ymax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)])
        ymin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)])
        plt.plot([xmin, xmax], [ymin, ymax], 'k--')
        plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'b:')
        str_corr = ' y = %.2f x + %.2f\n R2 = %.2f' % (slope, intercept, R2)
        plt.text(xmin, ymax-5, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
        plt.xlabel('chainage survey [m]')
        plt.ylabel('chainage satellite [m]')
        plt.title(pfname, fontweight='bold')
        fig2.add_subplot(gs2[1,i])
        binwidth = 3
        bins = np.arange(min(diff_chain), max(diff_chain) + binwidth, binwidth)
        density = plt.hist(diff_chain, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k')
        plt.xlim([-50, 50])
        plt.xlabel('error [m]')
        str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90) 
        plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.3), horizontalalignment='left', fontsize=10)
    fig1.set_size_inches(19.2, 9.28)
    fig1.set_tight_layout(True)
    fig2.set_size_inches(19.2, 9.28)
    fig2.set_tight_layout(True)
    # all transects together
    chain_sds_all = []
    chain_sur_all = []
    for i in range(chain_sds.shape[1]):
        pfname = list(topo_profiles.keys())[i]
        chain_sds_all = np.append(chain_sds_all,data_fin[pfname]['sds'])
        chain_sur_all = np.append(chain_sur_all,data_fin[pfname]['survey'])
    # calculate statistics
    slope, intercept, rvalue, pvalue, std_err = sstats.linregress(chain_sur_all, chain_sds_all) 
    R2 = rvalue**2
    correlation = np.corrcoef(chain_sur_all, chain_sds_all)[0,1]
    diff_chain_all = chain_sur_all - chain_sds_all
    rmse = np.sqrt(np.nanmean((diff_chain_all)**2))
    mean = np.nanmean(diff_chain_all)
    std = np.nanstd(diff_chain_all)
    q90 = np.percentile(np.abs(diff_chain_all), 90)
    stats['all'] = {'rmse':rmse,'mean':mean,'std':std,'q90':q90, 'corr':correlation,
         'linfit':{'slope':slope, 'intercept':intercept, 'R2':R2, 'pvalue':pvalue}}
    # make plot
    plt.figure(fig3.number)
    fig3.add_subplot(gs3[0,0])
    plt.axis('equal')
    plt.plot(chain_sur_all, chain_sds_all, 'ko', markersize=4, markerfacecolor='w', alpha=0.7)
    xmax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)])
    xmin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)])
    ymax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)])
    ymin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)])
    plt.plot([xmin, xmax], [ymin, ymax], 'k--')
    plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'b:')
    str_corr = ' y = %.2f x + %.2f\n R2 = %.2f' % (slope, intercept, R2)
    plt.text(xmin, ymax-5, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
    plt.xlabel('chainage survey [m]')
    plt.ylabel('chainage satellite [m]')
    plt.title(pfname, fontweight='bold')
    fig3.add_subplot(gs3[1,0])
    binwidth = 3
    bins = np.arange(min(diff_chain_all), max(diff_chain_all) + binwidth, binwidth)
    density = plt.hist(diff_chain_all, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k')
    plt.xlim([-50, 50])
    plt.xlabel('error [m]')
    str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90) 
    plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.3), horizontalalignment='left', fontsize=10)
    fig3.set_size_inches(9.2, 9.28)
    fig3.set_tight_layout(True)              
    return stats
--- a/functions/sds.py
+++ b/functions/sds.py
--- a/functions/utils.py
+++ b/functions/utils.py
@ -8,8 +8,9 @@ Contains all the utilities, convenience functions and small functions that do si
 """
 import matplotlib.pyplot as plt
 from datetime import datetime, timedelta
 import numpy as np
-import datetime
+import scipy.io as sio
 import pdb
@ -56,3 +57,54 @@ def compare_images(im1, im2):
 def find_indices(lst, condition):
    "imitation of MATLAB find function"
    return [i for i, elem in enumerate(lst) if condition(elem)]
 def reject_outliers(data, m=2):
    "rejects outliers in a numpy array"
    return data[abs(data - np.mean(data)) < m * np.std(data)]
 def duplicates_dict(lst):
    "return duplicates and indices"
    # nested function
    def duplicates(lst, item):
            return [i for i, x in enumerate(lst) if x == item]
    return dict((x, duplicates(lst, x)) for x in set(lst) if lst.count(x) > 1)
 def datenum2datetime(datenum):
    "convert datenum to datetime"
    #takes in datenum and outputs python datetime
    time = [datetime.fromordinal(int(dn)) + timedelta(days=float(dn)%1) - timedelta(days = 366) for dn in datenum]
    return time
 def loadmat(filename):
    '''
    this function should be called instead of direct spio.loadmat
    as it cures the problem of not properly recovering python dictionaries
    from mat files. It calls the function check keys to cure all entries
    which are still mat-objects
    '''
    data = sio.loadmat(filename, struct_as_record=False, squeeze_me=True)
    return _check_keys(data)
 def _check_keys(dict):
    '''
    checks if entries in dictionary are mat-objects. If yes
    todict is called to change them to nested dictionaries
    '''
    for key in dict:
        if isinstance(dict[key], sio.matlab.mio5_params.mat_struct):
            dict[key] = _todict(dict[key])
    return dict        
 def _todict(matobj):
    '''
    A recursive function which constructs from matobjects nested dictionaries
    '''
    dict = {}
    for strg in matobj._fieldnames:
        elem = matobj.__dict__[strg]
        if isinstance(elem, sio.matlab.mio5_params.mat_struct):
            dict[strg] = _todict(elem)
        else:
            dict[strg] = elem
    return dict
--- a/geocheck.py
+++ b/geocheck.py
@ -1,221 +0,0 @@
 # -*- coding: utf-8 -*-
 """
 Created on Thu Mar  1 14:32:08 2018
@author: z5030440
 Main code to extract shorelines from Landsat imagery
 """
 # Preamble
 import ee
 from IPython import display
 import math
 import matplotlib.pyplot as plt
 import numpy as np
 import pdb
 # image processing modules
 import skimage.filters as filters 
 import skimage.exposure as exposure
 import skimage.transform as transform
 import sklearn.decomposition as decomposition
 import skimage.morphology as morphology
 import skimage.measure as measure
 from shapely.geometry import Polygon
 from osgeo import gdal
 from osgeo import osr
 import tempfile
 import urllib
 from urllib.request import urlretrieve
 import zipfile
 # my modules
 from utils import *
 # from sds import *
 np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
 ee.Initialize()
 plot_bool = True # if you want the plots
 def download_tif(image, bandsId):
    """downloads tif image (region and bands) from the ee server and stores it in a temp file"""
    url = ee.data.makeDownloadUrl(ee.data.getDownloadId({
        'image': image.serialize(),
        'bands': bandsId,
        'filePerBand': 'false',
        'name': 'data',
        }))
    local_zip, headers = urlretrieve(url)
    with zipfile.ZipFile(local_zip) as local_zipfile:
        return local_zipfile.extract('data.tif', tempfile.mkdtemp())
 def load_image(image, bandsId): 
    """loads an ee.Image() as a np.array. e.Image() is retrieved from the EE database."""   
    local_tif_filename = download_tif(image, bandsId)
    dataset = gdal.Open(local_tif_filename, gdal.GA_ReadOnly)
    bands = [dataset.GetRasterBand(i + 1).ReadAsArray() for i in range(dataset.RasterCount)]
    return np.stack(bands, 2), dataset
 im = ee.Image('LANDSAT/LC08/C01/T1_RT_TOA/LC08_089083_20130411')
 lon = [151.2820816040039, 151.3425064086914]
 lat = [-33.68206818063878, -33.74775138989556]
 polygon = [[lon[0], lat[0]], [lon[1], lat[0]], [lon[1], lat[1]], [lon[0], lat[1]]];
 # get image metadata into dictionnary
 im_dic = im.getInfo()
 im_bands = im_dic.get('bands')
 # delete dimensions key from dictionnary, otherwise the entire image is extracted
 #for i in range(len(im_bands)): del im_bands[i]['dimensions']
 pan_band = [im_bands[7]]
 ms_bands = [im_bands[1], im_bands[2], im_bands[3]]
 im_full, dataset_full = load_image(im, ms_bands)
 plt.figure()
 plt.imshow(np.clip(im_full[:,:,[2,1,0]] * 3, 0, 1))
 plt.show()
 #%%
 def download_tif(image, polygon, bandsId):
    """downloads tif image (region and bands) from the ee server and stores it in a temp file"""
    url = ee.data.makeDownloadUrl(ee.data.getDownloadId({
        'image': image.serialize(),
        'region': polygon,
        'bands': bandsId,
        'filePerBand': 'false',
        'name': 'data',
        }))
    local_zip, headers = urlretrieve(url)
    with zipfile.ZipFile(local_zip) as local_zipfile:
        return local_zipfile.extract('data.tif', tempfile.mkdtemp())
 def load_image(image, polygon, bandsId): 
    """
    Loads an ee.Image() as a np.array. e.Image() is retrieved from the EE database.
    The geographic area and bands to select can be specified
    KV WRL 2018
    Arguments:
    -----------
        image: ee.Image()
            image objec from the EE database
        polygon: list
            coordinates of the points creating a polygon. Each point is a list with 2 values
        bandsId: list
            bands to select, each band is a dictionnary in the list containing the following keys:
            crs, crs_transform, data_type and id. NOTE: you have to remove the key dimensions, otherwise
            the entire image is retrieved.
    Returns:
    -----------
        image_array : np.ndarray
            An array containing the image (2D if one band, otherwise 3D)
    """
    local_tif_filename = download_tif(image, polygon, bandsId)
    dataset = gdal.Open(local_tif_filename, gdal.GA_ReadOnly)
    bands = [dataset.GetRasterBand(i + 1).ReadAsArray() for i in range(dataset.RasterCount)]
    return np.stack(bands, 2), dataset
 for i in range(len(im_bands)): del im_bands[i]['dimensions']
 ms_bands = [im_bands[1], im_bands[2], im_bands[3]]
 im_cropped, dataset_cropped = load_image(im, polygon, ms_bands)
 plt.figure()
 plt.imshow(np.clip(im_cropped[:,:,[2,1,0]] * 3, 0, 1))
 plt.show()
 #%%
 crs_full  = dataset_full.GetGeoTransform()
 crs_cropped  = dataset_cropped.GetGeoTransform()
 scale = crs_full[1]
 ul_full = np.array([crs_full[0], crs_full[3]])
 ul_cropped = np.array([crs_cropped[0], crs_cropped[3]])
 delta = np.abs(ul_full - ul_cropped)/scale
 u0 = delta[0].astype('int')
 v0 = delta[1].astype('int')
 im_full[v0,u0,:]
 im_cropped[0,0,:]
 lrx = ul_cropped[0] + (dataset_cropped.RasterXSize * scale)
 lry = ul_cropped[1] + (dataset_cropped.RasterYSize * (-scale))
 lr_cropped = np.array([lrx, lry])
 delta = np.abs(ul_full - lr_cropped)/scale
 u1 = delta[0].astype('int')
 v1 = delta[1].astype('int')
 im_cropped2 = im_full[v0:v1,u0:u1,:]
 #%%
 crs_full  = dataset_full.GetGeoTransform()
 source = osr.SpatialReference()
 source.ImportFromWkt(dataset_full.GetProjection())
 target = osr.SpatialReference()
 target.ImportFromEPSG(4326)
 transform = osr.CoordinateTransformation(source, target)
 transform.TransformPoint(ulx, uly)
 #%%
 crs_cropped  = dataset_cropped.GetGeoTransform()
 ulx = crs_cropped[0]
 uly = crs_cropped[3]
 source = osr.SpatialReference()
 source.ImportFromWkt(dataset_cropped.GetProjection())
 target = osr.SpatialReference()
 target.ImportFromEPSG(4326)
 transform = osr.CoordinateTransformation(source, target)
 transform.TransformPoint(lrx, lry)
 #%%
 source = osr.SpatialReference()
 source.ImportFromEPSG(4326)
 target = osr.SpatialReference()
 target.ImportFromEPSG(32656)
 coords = transform.TransformPoint(151.2820816040039, -33.68206818063878)
 coords[0] - ulx
 coords[1] - uly
 #%%
 x_ul_full = ms_bands[0]['crs_transform'][2]
 y_ul_full = ms_bands[0]['crs_transform'][5]
 scale = ms_bands[0]['crs_transform'][0]
 x_ul_cropped = np.array([340756.105840223, 346357.851288875, 346474.839525944, 340877.362938763])
 y_ul_cropped = np.array([-3728229.45372866, -3728137.91775723, -3735421.58347927,  -3735513.20696522])
 dx = abs(x_ul_full - x_ul_cropped)
 dy = abs(y_ul_full - y_ul_cropped)
 u_coord = np.round(dx/scale).astype('int')
 v_coord = np.round(dy/scale).astype('int')
 im_cropped2 = im_full[np.min(v_coord):np.max(v_coord), np.min(u_coord):np.max(u_coord),:]
 plt.figure()
 plt.imshow(np.clip(im_cropped2[:,:,[2,1,0]] * 3, 0, 1), cmap='gray')
 plt.show()
 sum(sum(sum(np.equal(im_cropped,im_cropped2).astype('int')-1)))
--- a/get_coordinates.py
+++ b/get_coordinates.py
@ -1,96 +0,0 @@
 # -*- coding: utf-8 -*-
 """
 Created on Fri Mar 23 12:46:04 2018
@author: z5030440
 """
 # Preamble
 import ee
 import matplotlib.pyplot as plt
 import matplotlib.cm as cm
 import numpy as np
 import pandas as pd
 from datetime import datetime
 import pickle
 import pdb
 import pytz
 from pylab import ginput
 # image processing modules
 import skimage.filters as filters 
 import skimage.exposure as exposure
 import skimage.transform as transform
 import sklearn.decomposition as decomposition
 import skimage.morphology as morphology
 import skimage.measure as measure
 # my functions
 import functions.utils as utils
 import functions.sds as sds
 np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
 ee.Initialize()
 #%% Select images
 # parameters
 plot_bool = False # if you want the plots
 prob_high = 99.9 # upper probability to clip and rescale pixel intensity
 min_contour_points = 100 # minimum number of points contained in each water line
 output_epsg = 28356 # GDA94 / MGA Zone 56
 cloud_threshold = 0.8
 # select collection
 input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
 # location (Narrabeen-Collaroy beach)
 rect_narra = [[[151.3473129272461,-33.69035274454718],
              [151.2820816040039,-33.68206818063878], 
              [151.27281188964844,-33.74775138989556],
              [151.3425064086914,-33.75231878701767],
              [151.3473129272461,-33.69035274454718]]];
 #rect_narra = [[[151.301454, -33.700754],
 #               [151.311453, -33.702075], 
 #               [151.307237, -33.739761],
 #               [151.294220, -33.736329],
 #               [151.301454, -33.700754]]];
 # Dates
 start_date = '2016-01-01'
 end_date = '2016-12-31'
 # filter by location
 flt_col = input_col.filterBounds(ee.Geometry.Polygon(rect_narra)).filterDate(start_date, end_date)
 n_img = flt_col.size().getInfo()
 print('Number of images covering Narrabeen:', n_img)
 im_all = flt_col.getInfo().get('features')
 # find each image in ee database
 im = ee.Image(im_all[0].get('id')) 
 # load image as np.array
 im_pan, im_ms, im_cloud, crs, meta = sds.read_eeimage(im, rect_narra, plot_bool)
 # rescale intensities
 im_ms = sds.rescale_image_intensity(im_ms, im_cloud, prob_high, plot_bool)
 im_pan = sds.rescale_image_intensity(im_pan, im_cloud, prob_high, plot_bool)
 # pansharpen rgb image
 im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, im_cloud, plot_bool)
 plt.figure()
 plt.imshow(im_ms_ps[:,:,[2,1,0]])
 plt.show()
 pts = ginput(15)
 points = np.array(pts)
 plt.plot(points[:,0], points[:,1], 'ko')
 plt.show()
 pts_coords = sds.convert_pix2world(points[:,[1,0]], crs['crs_15m'])
 pts = sds.convert_epsg(pts_coords, crs['epsg_code'], output_epsg)
 with open('data/narra_beach.pkl', 'wb') as f:
    pickle.dump(pts, f)
--- a/plot_cloud_cover.py
+++ b/plot_cloud_cover.py
@ -1,119 +0,0 @@
 # -*- coding: utf-8 -*-
 """
 Created on Thu Mar  1 14:32:08 2018
@author: z5030440
 Main code to extract shorelines from Landsat imagery
 """
 # Preamble
 import ee
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from datetime import datetime
 import pytz
 import pdb
 # image processing modules
 import skimage.filters as filters 
 import skimage.exposure as exposure
 import skimage.transform as transform
 import sklearn.decomposition as decomposition
 import skimage.morphology as morphology
 import skimage.measure as measure
 # my functions
 import functions.utils as utils
 import functions.sds as sds
 np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
 ee.Initialize()
 # parameters
 plot_bool = False # if you want the plots
 prob_high = 99.9 # upper probability to clip and rescale pixel intensity
 min_contour_points = 100 # minimum number of points contained in each water line
 # select collection
 input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_TOA')
 # location (Narrabeen-Collaroy beach)
 rect_narra = [[[151.3473129272461,-33.69035274454718],
              [151.2820816040039,-33.68206818063878], 
              [151.27281188964844,-33.74775138989556],
              [151.3425064086914,-33.75231878701767],
              [151.3473129272461,-33.69035274454718]]];
 # Dates
 start_date = '2016-01-01'
 end_date = '2016-12-31'
 # filter by location
 flt_col = input_col.filterBounds(ee.Geometry.Polygon(rect_narra))#.filterDate(start_date, end_date)
 n_img = flt_col.size().getInfo()
 print('Number of images covering Narrabeen:', n_img)
 im_all = flt_col.getInfo().get('features')
 props = {'cloud_cover_cropped':[],
         'cloud_cover':[],
         'cloud_cover_land':[],
         'date_acquired':[],
         'geom_rmse_model':[],
         'geom_rmse_verify':[],
         'gcp_model':[],
         'gcp_verify':[],
         'quality':[],
         'sun_azimuth':[],
         'sun_elevation':[]}
 t = []
 # loop through all images
 for i in range(n_img):
    # find each image in ee database
    im = ee.Image(im_all[i].get('id'))
    im_bands = im_all[i].get('bands')
    im_props = im_all[i]['properties']
    # compute cloud cover on cropped image    
    for j in range(len(im_bands)): del im_bands[j]['dimensions']
    qa_band = [im_bands[11]]
    im_qa, crs_qa = sds.load_image(im, rect_narra, qa_band)
    im_qa = im_qa[:,:,0]
    im_cloud = sds.create_cloud_mask(im_qa)
    props['cloud_cover_cropped'].append(100*sum(sum(im_cloud.astype(int)))/(im_cloud.shape[0]*im_cloud.shape[1]))
    # extract image metadata
    props['cloud_cover'].append(im_props['CLOUD_COVER'])
    props['cloud_cover_land' ].append(im_props['CLOUD_COVER_LAND'])
    props['date_acquired'].append(im_props['DATE_ACQUIRED'])
    props['geom_rmse_model'].append(im_props['GEOMETRIC_RMSE_MODEL']) 
    props['gcp_model'].append(im_props['GROUND_CONTROL_POINTS_MODEL'])
    props['quality'].append(im_props['IMAGE_QUALITY_OLI'])
    props['sun_azimuth'].append(im_props['SUN_AZIMUTH'])
    props['sun_elevation'].append(im_props['SUN_ELEVATION'])
    # try structure as sometimes the geometry cannot be verified
    try:
        props['geom_rmse_verify'].append(im_props['GEOMETRIC_RMSE_VERIFY'])
        props['gcp_verify'].append(im_props['GROUND_CONTROL_POINTS_VERIFY'])
    except:
        props['geom_rmse_verify'].append(np.nan)
        props['gcp_verify'].append(np.nan) 
    # record exact time of acquisition
    t.append(im_props['system:time_start'])
 #%% create pd.DataFrame with datetime index
 dt = [];
 fmt = '%Y-%m-%d %H:%M:%S %Z%z'
 au_tz = pytz.timezone('Australia/Sydney')
 for k in range(len(t)): dt.append(datetime.fromtimestamp(t[k]/1000, tz=au_tz))  
 df = pd.DataFrame(data = props, index=dt , columns=list(props.keys()))
 df.to_pickle('meta_l8.pkl')
 #df['cloud_cover_cropped'].groupby(df.index.month).count().plot.bar()
 #df_monthly = df['cloud_cover_cropped'].groupby(df.index.month)
--- a/py3_environment.txt
+++ b/py3_environment.txt
@ -0,0 +1,177 @@
 # This file may be used to create an environment using:
 # $ conda create --name <env> --file <this file>
 # platform: win-64
@EXPLICIT
 https://repo.continuum.io/pkgs/main/win-64/alabaster-0.7.10-py36hcd07829_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/asn1crypto-0.24.0-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/astroid-1.6.1-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/babel-2.5.3-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/backports-1.0-py36h81696a8_1.tar.bz2
 https://repo.continuum.io/pkgs/free/win-64/backports.weakref-1.0rc1-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/free/win-64/bleach-1.5.0-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/bokeh-0.12.14-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/ca-certificates-2017.08.26-h94faf87_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/certifi-2018.1.18-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/cffi-1.11.4-py36hfa6e2cd_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/chardet-3.0.4-py36h420ce6e_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/click-6.7-py36hec8c647_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/cloudpickle-0.5.2-py36_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/colorama-0.3.9-py36h029ae33_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/cryptography-2.1.4-py36he1d7878_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/curl-7.58.0-h7602738_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/cycler-0.10.0-py36h009560c_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/dask-0.17.0-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/dask-core-0.17.0-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/decorator-4.2.1-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/distributed-1.21.0-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/docutils-0.14-py36h6012d8f_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/entrypoints-0.2.3-py36hfd66bb0_2.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/expat-2.2.5-hcc4222d_0.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/freetype-2.8.1-vc14_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/freexl-1.0.4-h342dbcb_5.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/geos-3.6.2-h9ef7328_2.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/hdf4-4.2.13-h712560f_2.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/hdf5-1.10.1-h98b8871_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/heapdict-1.0.0-py36_2.tar.bz2
 https://repo.continuum.io/pkgs/free/win-64/html5lib-0.9999999-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/icc_rt-2017.0.4-h97af966_0.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/icu-58.2-vc14_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/idna-2.6-py36h148d497_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/imageio-2.3.0-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/imagesize-0.7.1-py36he29f638_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/intel-openmp-2018.0.0-hd92c6cd_8.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/ipykernel-4.8.0-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/ipython-6.2.1-py36h9cf0123_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/ipython_genutils-0.2.0-py36h3c5d0ee_0.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/ipywidgets-7.1.1-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/isort-4.2.15-py36h6198cc5_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/jedi-0.11.1-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/jinja2-2.10-py36h292fed1_0.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/jpeg-9b-vc14_2.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/jsonschema-2.6.0-py36h7636477_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/jupyter-1.0.0-py36_4.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/jupyter_client-5.2.2-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/jupyter_console-5.2.0-py36h6d89b47_1.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/jupyter_contrib_core-0.3.3-py36_1.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/jupyter_contrib_nbextensions-0.4.0-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/jupyter_core-4.4.0-py36h56e9d50_0.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/jupyter_highlight_selected_word-0.1.0-py36_0.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/jupyter_latex_envs-1.3.8.2-py36_1.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/jupyter_nbextensions_configurator-0.4.0-py36_0.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/jupyterlab-0.31.5-py36_1.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/jupyterlab_launcher-0.10.3-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/kealib-1.4.7-ha5b336b_5.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/krb5-1.14.2-h63dfc2a_6.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/lazy-object-proxy-1.3.1-py36hd1c21d2_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/libboost-1.65.1-he51fdeb_4.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/libcurl-7.58.0-h7602738_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/libgdal-2.2.2-h2727f2b_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/libiconv-1.15-h1df5818_7.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/libkml-1.3.0-hc65d273_3.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/libnetcdf-4.4.1.1-h825a56a_8.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.34-vc14_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/libpq-9.6.6-hfe3f2bf_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/libprotobuf-3.4.1-h3dba5dd_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/libspatialite-4.3.0a-h383548d_18.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/libssh2-1.8.0-hd619d38_4.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/libtiff-4.0.9-vc14_0.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/libxml2-2.9.5-vc14_1.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/libxslt-1.1.32-vc14_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/locket-0.2.0-py36hfed976d_1.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/lxml-4.1.1-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/free/win-64/markdown-2.6.9-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/markupsafe-1.0-py36h0e26971_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/matplotlib-2.1.2-py36h016c42a_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/mccabe-0.6.1-py36hb41005a_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/mistune-0.8.3-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/mkl-2018.0.1-h2108138_4.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/msgpack-python-0.5.1-py36he980bc4_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/nbconvert-5.3.1-py36h8dc0fde_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/nbformat-4.4.0-py36h3a5bc1b_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/networkx-2.1-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/nodejs-8.9.3-hd6b2f15_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/notebook-5.4.0-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/numpy-1.14.1-py36hb69e940_2.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/numpydoc-0.7.0-py36ha25429e_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/olefile-0.45.1-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/openjpeg-2.2.0-h29c51c3_2.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/openssl-1.0.2n-vc14_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/packaging-16.8-py36ha0986f6_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pandas-0.22.0-py36h6538335_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pandoc-1.19.2.1-hb2460c7_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pandocfilters-1.4.2-py36h3ef6317_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/parso-0.1.1-py36hae3edee_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/partd-0.3.8-py36hc8e763b_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/patsy-0.5.0-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pickleshare-0.7.4-py36h9de030f_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pillow-5.0.0-py36h0738816_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pip-9.0.1-py36h226ae91_4.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/proj4-4.9.3-hcf24537_7.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/prompt_toolkit-1.0.15-py36h60b8f86_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/protobuf-3.4.1-py36h07fa351_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/psutil-5.4.3-py36hfa6e2cd_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pycodestyle-2.3.1-py36h7cc55cd_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pycparser-2.18-py36hd053e01_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pyflakes-1.6.0-py36h0b975d6_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pygments-2.2.0-py36hb010967_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pylint-1.8.2-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pyopenssl-17.5.0-py36h5b7d817_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pyparsing-2.2.0-py36h785a196_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pyqt-5.6.0-py36hb5ed885_5.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pysocks-1.6.7-py36h698d350_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/python-3.6.4-h6538335_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/python-dateutil-2.6.1-py36h509ddcb_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pytz-2018.3-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pywavelets-0.5.2-py36hc649158_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pywinpty-0.5-py36h6538335_1.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/pyyaml-3.12-py36_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/pyzmq-16.0.3-py36he714bf5_0.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/qt-5.6.2-vc14_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/qtawesome-0.4.4-py36h5aa48f6_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/qtconsole-4.3.1-py36h99a29a9_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/qtpy-1.3.1-py36hb8717c5_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/requests-2.18.4-py36h4371aae_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/rope-0.10.7-py36had63a69_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/scikit-image-0.13.1-py36hfa6e2cd_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/scikit-learn-0.19.1-py36h53aea1b_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/scipy-1.0.0-py36h1260518_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/send2trash-1.4.2-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/setuptools-38.4.0-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/shapely-1.6.4-py36h2a969d5_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/simplegeneric-0.8.1-py36_2.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/sip-4.18.1-py36h9c25514_2.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/six-1.11.0-py36h4db2310_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/snowballstemmer-1.2.1-py36h763602f_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/sortedcontainers-1.5.9-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/sphinx-1.6.6-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/sphinxcontrib-1.0-py36hbbac3d2_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/sphinxcontrib-websupport-1.0.1-py36hb5e5916_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/spyder-3.2.8-py36_0.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/sqlite-3.20.1-vc14_2.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/statsmodels-0.8.0-py36h6189b4c_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/tblib-1.3.2-py36h30f5020_0.tar.bz2
 https://repo.continuum.io/pkgs/free/win-64/tensorflow-1.2.1-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/terminado-0.8.1-py36_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/testpath-0.3.1-py36h2698cfe_0.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/tk-8.6.7-vc14_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/toolz-0.9.0-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/tornado-4.5.3-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/traitlets-4.3.2-py36h096827d_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/typing-3.6.2-py36hb035bda_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/urllib3-1.22-py36h276f60a_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/vc-14-h0510ff6_3.tar.bz2
 https://repo.continuum.io/pkgs/free/win-64/vs2015_runtime-14.0.25420-0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/wcwidth-0.1.7-py36h3d5aa90_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/webencodings-0.5.1-py36h67c50ae_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/werkzeug-0.14.1-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/wheel-0.30.0-py36h6c3ec14_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/widgetsnbextension-3.1.0-py36_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/win_inet_pton-1.0.1-py36he67d7fd_1.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/wincertstore-0.2-py36h7fe50ca_0.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/winpty-0.4.3-vc14_2.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/wrapt-1.10.11-py36he5f5981_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/xerces-c-3.2.0-h44c76bb_2.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/xz-5.2.3-h7c615d8_2.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/yaml-0.1.7-vc14_0.tar.bz2
 https://repo.continuum.io/pkgs/main/win-64/zict-0.1.3-py36h2d8e73e_0.tar.bz2
 https://conda.anaconda.org/conda-forge/win-64/zlib-1.2.11-vc14_0.tar.bz2
--- a/read_images.py
+++ b/read_images.py
@ -0,0 +1,589 @@
 #==========================================================#
 #==========================================================#
 # Extract shorelines from Landsat images
 #==========================================================#
 #==========================================================#
 #==========================================================#
 # Initial settings
 #==========================================================#
 import os
 import numpy as np
 import matplotlib.pyplot as plt
 import ee
 import pdb
 # other modules
 from osgeo import gdal, ogr, osr
 import pickle
 import matplotlib.cm as cm
 from pylab import ginput
 from shapely.geometry import LineString
 # image processing modules
 import skimage.filters as filters 
 import skimage.exposure as exposure
 import skimage.transform as transform
 import sklearn.decomposition as decomposition
 import skimage.measure as measure
 import skimage.morphology as morphology
 # machine learning modules
 from sklearn.model_selection import train_test_split
 from sklearn.neural_network import MLPClassifier
 from sklearn.preprocessing import StandardScaler, Normalizer 
 from sklearn.externals import joblib
 # import own modules
 import functions.utils as utils
 import functions.sds as sds
 # some other settings
 np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
 plt.rcParams['axes.grid'] = True
 plt.rcParams['figure.max_open_warning'] = 100
 ee.Initialize()
 #==========================================================#
 # Parameters
 #==========================================================#
 sitename = 'NARRA'
 cloud_thresh = 0.7      # threshold for cloud cover
 plot_bool = False       # if you want the plots
 output_epsg = 28356     # GDA94 / MGA Zone 56
 buffer_size = 7         # radius (in pixels) of disk for buffer (pixel classification)
 min_beach_size = 20     # number of pixels in a beach (pixel classification)
 dist_ref = 100          # maximum distance from reference point
 min_length_wl = 200     # minimum length of shoreline LineString to be kept 
 manual_bool = True      # to manually check images
 output = dict([])
 #==========================================================#
 # Metadata
 #==========================================================#
 filepath = os.path.join(os.getcwd(), 'data', sitename)
 with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'rb') as f:
    metadata = pickle.load(f)
 #%%    
 #==========================================================#
 # Read S2 images
 #==========================================================#
 satname = 'S2'
 dates = metadata[satname]['dates']
 input_epsg = 32756 # metadata[satname]['epsg']
 # path to images
 filepath10 = os.path.join(os.getcwd(), 'data', sitename, satname, '10m')
 filenames10 = os.listdir(filepath10)
 filepath20 = os.path.join(os.getcwd(), 'data', sitename, satname, '20m')
 filenames20 = os.listdir(filepath20)
 filepath60 = os.path.join(os.getcwd(), 'data', sitename, satname, '60m')
 filenames60 = os.listdir(filepath60)
 if (not len(filenames10) == len(filenames20)) or (not len(filenames20) == len(filenames60)):
    raise 'error: not the same amount of files for 10, 20 and 60 m'
 N = len(filenames10)
 # initialise variables
 cloud_cover_ts = []
 acc_georef_ts = []
 date_acquired_ts = []
 filename_ts = []
 satname_ts = []
 timestamp = []
 shorelines = []
 idx_skipped = []
 spacing = '=========================================================='
 msg = ' %s\n %s\n %s' % (spacing, satname, spacing)
 print(msg)
 for i in range(N):
    # read 10m bands
    fn = os.path.join(filepath10, filenames10[i])
    data = gdal.Open(fn, gdal.GA_ReadOnly)
    georef = np.array(data.GetGeoTransform())
    bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
    im10 = np.stack(bands, 2)
    im10 = im10/10000 # TOA scaled to 10000
    # if image is only zeros, skip it
    if sum(sum(sum(im10))) < 1:
        print('skip ' + str(i) + ' - no data')
        idx_skipped.append(i)
        continue        
    nrows = im10.shape[0]
    ncols = im10.shape[1]
    # read 20m band (SWIR1)
    fn = os.path.join(filepath20, filenames20[i])
    data = gdal.Open(fn, gdal.GA_ReadOnly)
    bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
    im20 = np.stack(bands, 2)
    im20 = im20[:,:,0]
    im20 = im20/10000 # TOA scaled to 10000
    im_swir = transform.resize(im20, (nrows, ncols), order=1, preserve_range=True, mode='constant')
    im_swir = np.expand_dims(im_swir, axis=2)
    # append down-sampled swir band to the 10m bands
    im_ms = np.append(im10, im_swir, axis=2)
    # read 60m band (QA)
    fn = os.path.join(filepath60, filenames60[i])
    data = gdal.Open(fn, gdal.GA_ReadOnly)
    bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
    im60 = np.stack(bands, 2)
    im_qa = im60[:,:,0]
    cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool)
    cloud_mask = transform.resize(cloud_mask,(nrows, ncols), order=0, preserve_range=True, mode='constant')
    # check if -inf or nan values on any band and add to cloud mask
    for k in range(im_ms.shape[2]):   
            im_inf = np.isin(im_ms[:,:,k], -np.inf)
            im_nan = np.isnan(im_ms[:,:,k])
            cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
    # calculate cloud cover and if above threshold, skip it
    cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
    if cloud_cover > cloud_thresh:
        print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)')
        idx_skipped.append(i)
        continue
    # rescale image intensity for display purposes
    im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False)
    # classify image in 4 classes (sand, whitewater, water, other) with NN classifier
    im_classif, im_labels = sds.classify_image_NN_nopan(im_ms, cloud_mask, min_beach_size, plot_bool)
    # if there aren't any sandy pixels
    if sum(sum(im_labels[:,:,0])) == 0 :
        # use global threshold
        im_ndwi = sds.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask, plot_bool)
        contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool)
    else:
        # use specific threhsold
        contours_wi, contours_mwi = sds.find_wl_contours2(im_ms, im_labels, cloud_mask, buffer_size, plot_bool)
    # convert from pixels to world coordinates
    wl_coords = sds.convert_pix2world(contours_mwi, georef)
    # convert to output epsg spatial reference
    wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg)
    # remove contour lines that have a perimeter < min_length_wl
    wl_good = []
    for l, wls in enumerate(wl):
        coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))]
        a = LineString(coords) # shapely LineString structure
        if a.length >= min_length_wl:
            wl_good.append(wls)
    # format points and only select the ones close to the refpoints
    x_points = np.array([])
    y_points = np.array([])
    for k in range(len(wl_good)):
        x_points = np.append(x_points,wl_good[k][:,0])
        y_points = np.append(y_points,wl_good[k][:,1])
    wl_good = np.transpose(np.array([x_points,y_points]))
    temp = np.zeros((len(wl_good))).astype(bool)
    for k in range(len(refpoints)): 
        temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp) 
    wl_final = wl_good[temp]
    # plot output
    plt.figure()
    im = np.copy(im_display)
    colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
    for k in range(0,im_labels.shape[2]):
        im[im_labels[:,:,k],0] = colours[k,0]
        im[im_labels[:,:,k],1] = colours[k,1]
        im[im_labels[:,:,k],2] = colours[k,2]
    plt.imshow(im)
    for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--')
    plt.title(satname + '     ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + '      acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' )
    plt.draw()
    pt_in = np.array(ginput(n=1, timeout=1000))
    plt.close()
    # if image is rejected, skip it
    if pt_in[0][1] > nrows/2:
        print('skip ' + str(i) + ' - rejected')
        idx_skipped.append(i)
        continue
    # if accepted, store the data
    cloud_cover_ts.append(cloud_cover)
    acc_georef_ts.append(metadata[satname]['acc_georef'][i])
    filename_ts.append(filenames10[i])
    satname_ts.append(satname)
    date_acquired_ts.append(filenames10[i][:10])
    timestamp.append(metadata[satname]['dates'][i])
    shorelines.append(wl_final)
 # store in output structure
 output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped,
      'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts,
                  'acc_georef':acc_georef_ts}}
 del idx_skipped
 #%%
 #==========================================================#
 # Read L7&L8 images
 #==========================================================#
 satname = 'L8'
 dates = metadata[satname]['dates']
 input_epsg = 32656 # metadata[satname]['epsg']
 # path to images
 filepath_pan = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8', 'pan')
 filepath_ms = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8', 'ms')
 filenames_pan = os.listdir(filepath_pan)
 filenames_ms = os.listdir(filepath_ms)
 if (not len(filenames_pan) == len(filenames_ms)):
    raise 'error: not the same amount of files for pan and ms'
 N = len(filenames_pan)
 # initialise variables
 cloud_cover_ts = []
 acc_georef_ts = []
 date_acquired_ts = []
 filename_ts = []
 satname_ts = []
 timestamp = []
 shorelines = []
 idx_skipped = []
 spacing = '=========================================================='
 msg = ' %s\n %s\n %s' % (spacing, satname, spacing)
 print(msg)
 for i in range(N):
    # get satellite name
    sat = filenames_pan[i][20:22]
    # read pan image
    fn_pan = os.path.join(filepath_pan, filenames_pan[i])
    data = gdal.Open(fn_pan, gdal.GA_ReadOnly)
    georef = np.array(data.GetGeoTransform())
    bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
    im_pan = np.stack(bands, 2)[:,:,0]
    nrows = im_pan.shape[0]
    ncols = im_pan.shape[1]
    # read ms image
    fn_ms = os.path.join(filepath_ms, filenames_ms[i])
    data = gdal.Open(fn_ms, gdal.GA_ReadOnly)
    bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
    im_ms = np.stack(bands, 2)
    # cloud mask
    im_qa = im_ms[:,:,5]
    cloud_mask = sds.create_cloud_mask(im_qa, sat, plot_bool)
    cloud_mask = transform.resize(cloud_mask, (nrows, ncols), order=0, preserve_range=True, mode='constant').astype('bool_')    
    # resize the image using bilinear interpolation (order 1)
    im_ms = im_ms[:,:,:5]
    im_ms = transform.resize(im_ms,(nrows, ncols), order=1, preserve_range=True, mode='constant')
    # check if -inf or nan values on any band and add to cloud mask
    for k in range(im_ms.shape[2]+1): 
        if k == 5:
            im_inf = np.isin(im_pan, -np.inf)
            im_nan = np.isnan(im_pan)        
        else:  
            im_inf = np.isin(im_ms[:,:,k], -np.inf)
            im_nan = np.isnan(im_ms[:,:,k])
        cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
    # calculate cloud cover and skip image if above threshold
    cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
    if cloud_cover > cloud_thresh:
        print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)')
        idx_skipped.append(i)
        continue
    # Pansharpen image (different for L8 and L7)
    if sat == 'L7':
        # pansharpen (Green, Red, NIR) and downsample Blue and SWIR1
        im_ms_ps = sds.pansharpen(im_ms[:,:,[1,2,3]], im_pan, cloud_mask, plot_bool)
        im_ms_ps = np.append(im_ms[:,:,[0]], im_ms_ps, axis=2)
        im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[4]], axis=2)
        im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False)
    elif sat == 'L8':
        # pansharpen RGB image and downsample NIR and SWIR1
        im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, plot_bool)
        im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) 
        im_display = sds.rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
    # classify image in 4 classes (sand, whitewater, water, other) with NN classifier
    im_classif, im_labels = sds.classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size, plot_bool)
    # if there aren't any sandy pixels
    if sum(sum(im_labels[:,:,0])) == 0 :
        # use global threshold
        im_ndwi = sds.nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, plot_bool)
        contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool)
    else:
        # use specific threhsold
        contours_wi, contours_mwi = sds.find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size, plot_bool)
    # convert from pixels to world coordinates
    wl_coords = sds.convert_pix2world(contours_mwi, georef)
    # convert to output epsg spatial reference
    wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg)
    # remove contour lines that have a perimeter < min_length_wl
    wl_good = []
    for l, wls in enumerate(wl):
        coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))]
        a = LineString(coords) # shapely LineString structure
        if a.length >= min_length_wl:
            wl_good.append(wls)   
    # format points and only select the ones close to the refpoints
    x_points = np.array([])
    y_points = np.array([])
    for k in range(len(wl_good)):
        x_points = np.append(x_points,wl_good[k][:,0])
        y_points = np.append(y_points,wl_good[k][:,1])
    wl_good = np.transpose(np.array([x_points,y_points]))
    temp = np.zeros((len(wl_good))).astype(bool)
    for k in range(len(refpoints)): 
        temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp) 
    wl_final = wl_good[temp]   
    # plot output
    plt.figure()
    plt.subplot(121)
    im = np.copy(im_display)
    colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
    for k in range(0,im_labels.shape[2]):
        im[im_labels[:,:,k],0] = colours[k,0]
        im[im_labels[:,:,k],1] = colours[k,1]
        im[im_labels[:,:,k],2] = colours[k,2]
    plt.imshow(im)
    for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--')
    plt.title(sat + '     ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + '      acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' )
    pt_in = np.array(ginput(n=1, timeout=1000))
    plt.close()
    # if image is rejected, skip it
    if pt_in[0][1] > nrows/2:
        print('skip ' + str(i) + ' - rejected')
        idx_skipped.append(i)
        continue
    # if accepted, store the data
    cloud_cover_ts.append(cloud_cover)
    acc_georef_ts.append(metadata[satname]['acc_georef'][i])
    filename_ts.append(filenames_pan[i])
    satname_ts.append(sat)
    date_acquired_ts.append(filenames_pan[i][:10])
    timestamp.append(metadata[satname]['dates'][i])
    shorelines.append(wl_final)
 # store in output structure
 output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped,
      'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts,
                  'acc_georef':acc_georef_ts}}    
 del idx_skipped
 #%%
 #==========================================================#
 # Read L5 images
 #==========================================================#
 satname = 'L5'
 dates = metadata[satname]['dates']
 input_epsg = 32656 # metadata[satname]['epsg']
 # path to images
 filepath_img = os.path.join(os.getcwd(), 'data', sitename, satname, '30m')
 filenames = os.listdir(filepath_img)
 N = len(filenames)
 # initialise variables
 cloud_cover_ts = []
 acc_georef_ts = []
 date_acquired_ts = []
 filename_ts = []
 satname_ts = []
 timestamp = []
 shorelines = []
 idx_skipped = []
 spacing = '=========================================================='
 msg = ' %s\n %s\n %s' % (spacing, satname, spacing)
 print(msg)
 for i in range(N):
    # read ms image
    fn = os.path.join(filepath_img, filenames[i])
    data = gdal.Open(fn, gdal.GA_ReadOnly)
    georef = np.array(data.GetGeoTransform())
    bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
    im_ms = np.stack(bands, 2)
    # down-sample to half hte original pixel size
    nrows = im_ms.shape[0]*2
    ncols = im_ms.shape[1]*2
    # cloud mask
    im_qa = im_ms[:,:,5]
    im_ms = im_ms[:,:,:-1]
    cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool)
    cloud_mask = transform.resize(cloud_mask, (nrows, ncols), order=0, preserve_range=True, mode='constant').astype('bool_')
    # resize the image using bilinear interpolation (order 1)
    im_ms = transform.resize(im_ms,(nrows, ncols), order=1, preserve_range=True, mode='constant')
    # adjust georef vector (scale becomes 15m and origin is adjusted to the center of new corner pixel)
    georef[1] = 15
    georef[5] = -15
    georef[0] = georef[0] + 7.5
    georef[3] = georef[3] - 7.5
    # check if -inf or nan values on any band and add to cloud mask
    for k in range(im_ms.shape[2]):   
        im_inf = np.isin(im_ms[:,:,k], -np.inf)
        im_nan = np.isnan(im_ms[:,:,k])
        cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
    # calculate cloud cover and skip image if above threshold
    cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
    if cloud_cover > cloud_thresh:
        print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)')
        idx_skipped.append(i)
        continue
    # rescale image intensity for display purposes
    im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False)
    # classify image in 4 classes (sand, whitewater, water, other) with NN classifier
    im_classif, im_labels = sds.classify_image_NN_nopan(im_ms, cloud_mask, min_beach_size, plot_bool)
    # if there aren't any sandy pixels
    if sum(sum(im_labels[:,:,0])) == 0 :
        # use global threshold
        im_ndwi = sds.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask, plot_bool)
        contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool)
    else:
        # use specific threhsold
        contours_wi, contours_mwi = sds.find_wl_contours2(im_ms, im_labels, cloud_mask, buffer_size, plot_bool)
    # convert from pixels to world coordinates
    wl_coords = sds.convert_pix2world(contours_mwi, georef)
    # convert to output epsg spatial reference
    wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg)
    # remove contour lines that have a perimeter < min_length_wl
    wl_good = []
    for l, wls in enumerate(wl):
        coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))]
        a = LineString(coords) # shapely LineString structure
        if a.length >= min_length_wl:
            wl_good.append(wls)
    # format points and only select the ones close to the refpoints
    x_points = np.array([])
    y_points = np.array([])
    for k in range(len(wl_good)):
        x_points = np.append(x_points,wl_good[k][:,0])
        y_points = np.append(y_points,wl_good[k][:,1])
    wl_good = np.transpose(np.array([x_points,y_points]))
    temp = np.zeros((len(wl_good))).astype(bool)
    for k in range(len(refpoints)): 
        temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp) 
    wl_final = wl_good[temp]
    # plot output
    plt.figure()
    plt.subplot(121)
    im = np.copy(im_display)
    colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
    for k in range(0,im_labels.shape[2]):
        im[im_labels[:,:,k],0] = colours[k,0]
        im[im_labels[:,:,k],1] = colours[k,1]
        im[im_labels[:,:,k],2] = colours[k,2]
    plt.imshow(im)
    for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--')
    plt.title(satname + '     ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + '      acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' )
    plt.subplot(122)
    plt.axis('equal')
    plt.axis('off')
    plt.plot(refpoints[:,0], refpoints[:,1], 'k.')
    plt.plot(wl_final[:,0], wl_final[:,1], 'r.')
    mng = plt.get_current_fig_manager()                                         
    mng.window.showMaximized()
    plt.tight_layout()
    plt.draw()
    pt_in = np.array(ginput(n=1, timeout=1000))
    plt.close()
    # if image is rejected, skip it
    if pt_in[0][1] > nrows/2:
        print('skip ' + str(i) + ' - rejected')
        idx_skipped.append(i)
        continue
    # if accepted, store the data
    cloud_cover_ts.append(cloud_cover)
    acc_georef_ts.append(metadata[satname]['acc_georef'][i])
    filename_ts.append(filenames[i])
    satname_ts.append(satname)
    date_acquired_ts.append(filenames[i][:10])
    timestamp.append(metadata[satname]['dates'][i])
    shorelines.append(wl_final)
 # store in output structure
 output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped,
      'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts,
                  'acc_georef':acc_georef_ts}}    
 del idx_skipped
 #==========================================================#
 #==========================================================#
 #==========================================================#
 #==========================================================#
 #%%
 # save output
 with open(os.path.join(filepath, sitename + '_output' + '.pkl'), 'wb') as f:
    pickle.dump(output, f)
 # save idx_skipped
 #idx_skipped = dict([])
 #for satname in list(output.keys()):
 #    idx_skipped[satname] = output[satname]['idx_skipped']
 #with open(os.path.join(filepath, sitename + '_idxskipped' + '.pkl'), 'wb') as f:
 #    pickle.dump(idx_skipped, f)
--- a/sds_extract.py
+++ b/sds_extract.py
@ -1,13 +1,10 @@
 # -*- coding: utf-8 -*-
 """
 Created on Thu Mar  1 14:32:08 2018
-@author: z5030440
+#==========================================================#
-
+# Extract shorelines from Landsat images
-Main code to extract shorelines from Landsat imagery
+#==========================================================#
 """
 # Preamble
 # Initial settings
 import ee
 import matplotlib.pyplot as plt
 import matplotlib.cm as cm
@ -27,206 +24,88 @@ import sklearn.decomposition as decomposition
 import skimage.morphology as morphology
 import skimage.measure as measure
-# my functions
+# my modules
 import functions.utils as utils
 import functions.sds as sds
 # some settings
 np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
 plt.rcParams['axes.grid'] = False
 plt.rcParams['figure.max_open_warning'] = 100
 ee.Initialize()
 #%% Select images
 # parameters
-plot_bool = False # if you want the plots
+cloud_thresh = 0.5      # threshold for cloud cover
-prob_high = 99.9 # upper probability to clip and rescale pixel intensity
+plot_bool = True      # if you want the plots
-min_contour_points = 100 # minimum number of points contained in each water line
+min_contour_points = 100# minimum number of points contained in each water line
 output_epsg = 28356     # GDA94 / MGA Zone 56
-cloud_threshold = 0.8
+buffer_size = 10        # radius of disk for buffer (sand classif parameter)
 min_beach_size = 50     # number of pixels in a beach (sand classif parameter)
 # select collection
-input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
+satname = 'L8'
 input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA') # Landsat 8 Tier 1 TOA
 # location (Narrabeen-Collaroy beach)
-rect_narra = [[[151.3473129272461,-33.69035274454718],
+polygon = [[[151.3473129272461,-33.69035274454718],
            [151.2820816040039,-33.68206818063878], 
            [151.27281188964844,-33.74775138989556],
            [151.3425064086914,-33.75231878701767],
            [151.3473129272461,-33.69035274454718]]];
-with open('data/narra_beach.pkl', 'rb') as f:
+# dates
-    pts_beach = pickle.load(f)
+start_date = '2013-01-01'
-               
+end_date = '2018-12-31'
 #rect_narra = [[[151.301454, -33.700754],
 #               [151.311453, -33.702075], 
 #               [151.307237, -33.739761],
 #               [151.294220, -33.736329],
 #               [151.301454, -33.700754]]];
-# Dates
+# filter by location and date
-start_date = '2016-01-01'
+flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(start_date, end_date)
 end_date = '2016-12-31'
 # filter by location
 flt_col = input_col.filterBounds(ee.Geometry.Polygon(rect_narra)).filterDate(start_date, end_date)
 n_img = flt_col.size().getInfo()
-print('Number of images covering Narrabeen:', n_img)
+print('Number of images covering the polygon:', n_img)
 im_all = flt_col.getInfo().get('features')
-#%% Extract shorelines
+i = 0 # first image
-metadata = {'timestamp':[],
+    
-            'date_acquired':[],
+# find image in ee database
            'cloud_cover':[],
            'geom_rmse_model':[],
            'gcp_model':[],
            'quality':[],
            'sun_azimuth':[],
            'sun_elevation':[]}
 skipped_images = np.zeros((n_img,1)).astype(bool)
 output_wl = []
 # loop through all images
 for i in range(n_img):
    # find each image in ee database
    im = ee.Image(im_all[i].get('id')) 
    # load image as np.array
    im_pan, im_ms, im_cloud, crs, meta = sds.read_eeimage(im, rect_narra, plot_bool)
    # if clouds -> skip the image
    if sum(sum(im_cloud.astype(int)))/(im_cloud.shape[0]*im_cloud.shape[1]) > cloud_threshold:
        skipped_images[i] = True
        continue
    # rescale intensities
    im_ms = sds.rescale_image_intensity(im_ms, im_cloud, prob_high, plot_bool)
    im_pan = sds.rescale_image_intensity(im_pan, im_cloud, prob_high, plot_bool)
    # pansharpen rgb image
    im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, im_cloud, plot_bool)
    # add down-sized bands for NIR and SWIR (since pansharpening is not possible)
    im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) 
    # calculate NDWI
    im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], im_cloud, plot_bool)
    # edge detection
    wl_pix = sds.find_wl_contours(im_ndwi, im_cloud, min_contour_points, plot_bool)
    plt.figure()
    plt.imshow(im_ms_ps[:,:,[2,1,0]])
    for i,contour in enumerate(wl_pix): plt.plot(contour[:, 1], contour[:, 0], linewidth=2)
    plt.axis('image')
    plt.title('Detected water lines')
    plt.show()
    # convert from pixels to world coordinates
    wl_coords = sds.convert_pix2world(wl_pix, crs['crs_15m'])
    # convert to output epsg spatial reference
    wl = sds.convert_epsg(wl_coords, crs['epsg_code'], output_epsg)
    # find contour closest to narrabeen beach
    sum_dist = np.zeros(len(wl))
    for k,contour in enumerate(wl):
        min_dist = np.zeros(len(pts_beach))
        for j,pt in enumerate(pts_beach):
            min_dist[j] = np.min(np.linalg.norm(contour - pt, axis=1))
        sum_dist[k] = np.sum(min_dist)/len(min_dist)
    try:
        wl_beach = wl[np.argmin(sum_dist)]
 #        plt.figure()
 #        plt.axis('equal')
 #        plt.plot(pts_beach[:,0], pts_beach[:,1], 'ko')
 #        plt.plot(wl_beach[:,0], wl_beach[:,1], 'r')
 #        plt.show()
    except:
        wl_beach = []
    # plot for QA
    plt.figure()
    plt.imshow(im_ms_ps[:,:,[2,1,0]])
    for k,contour in enumerate(wl_pix): plt.plot(contour[:, 1], contour[:, 0], linewidth=2)
    if len(wl_beach) > 0:
        plt.plot(wl_pix[np.argmin(sum_dist)][:,1], wl_pix[np.argmin(sum_dist)][:,0], linewidth=3, color='w')
    plt.axis('image')
    plt.title('im ' + str(i) + ' : ' + datetime.strftime(datetime
                                    .fromtimestamp(meta['timestamp']/1000, tz=pytz.utc)
                                    .astimezone(pytz.timezone('Australia/Sydney')), '%Y-%m-%d %H:%M:%S %Z%z'))
    plt.show()
    # store metadata of each image in dict
    metadata['timestamp'].append(meta['timestamp'])
    metadata['date_acquired'].append(meta['date_acquired'])
    metadata['cloud_cover'].append(sum(sum(im_cloud.astype(int)))/(im_cloud.shape[0]*im_cloud.shape[1]))
    metadata['geom_rmse_model'].append(meta['geom_rmse_model'])
    metadata['gcp_model'].append(meta['gcp_model'])
    metadata['quality'].append(meta['quality'])
    metadata['sun_azimuth'].append(meta['sun_azimuth'])
    metadata['sun_elevation'].append(meta['sun_elevation'])
    # store water lines
    output_wl.append(wl_beach)
    print(i)
 # generate datetimes
 #fmt = '%Y-%m-%d %H:%M:%S %Z%z'
 #au_tz = pytz.timezone('Australia/Sydney')
 dt = [];
 t = metadata['timestamp']
 for k in range(len(t)): dt.append(datetime.fromtimestamp(t[k]/1000, tz=pytz.utc)) 
 # save outputs
 data = metadata.copy()
 data.update({'dt':dt})
 data.update({'contours':output_wl})  
 #with open('data_2016.pkl', 'wb') as f:
 #    pickle.dump(data, f) 
 #%% Load data
 #with open('data_2016.pkl', 'rb') as f:
 #    data = pickle.load(f)
 # load backgroud image
 i = 0
 im = ee.Image(im_all[i].get('id')) 
-im_pan, im_ms, im_cloud, crs, meta = sds.read_eeimage(im, rect_narra, plot_bool)
+
-im_ms = sds.rescale_image_intensity(im_ms, im_cloud, prob_high, plot_bool)
+# load image as np.array
-im_pan = sds.rescale_image_intensity(im_pan, im_cloud, prob_high, plot_bool)
+im_pan, im_ms, cloud_mask, crs, meta = sds.read_eeimage(im, polygon, satname, plot_bool)
-im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, im_cloud, plot_bool)
+
 # mask -inf or nan values on the image and add to cloud_mask
 im_inf = np.isin(im_ms[:,:,0], -np.inf)
 im_nan = np.isnan(im_ms[:,:,0])
 cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
 cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
 print('Cloud cover : ' + str(int(round(100*cloud_cover))) + ' %')
 # pansharpen rgb image
 im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, plot_bool)
 # add down-sized bands for NIR and SWIR (since pansharpening is not possible)
 im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) 
 # calculate NDWI
 im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, plot_bool)
 # edge detection
 wl_pix = sds.find_wl_contours(im_ndwi, cloud_mask, min_contour_points, plot_bool)
 plt.figure()
 plt.imshow(im_ms_ps[:,:,[2,1,0]])
 for i,contour in enumerate(wl_pix): plt.plot(contour[:, 1], contour[:, 0], linewidth=2)
 plt.axis('image')
-plt.title('2016 shorelines')
+plt.title('Detected water lines')
 n = len(data['cloud_cover'])
 idx_best = []
 # remove overlapping images, based on cloud cover
 for i in range(n):
    date_im = data['date_acquired'][i]
    idx = np.isin(data['date_acquired'], date_im)
    best = np.where(idx)[0][np.argmin(np.array(data['cloud_cover'])[idx])]
    if ~np.isin(best, idx_best):
        idx_best.append(best)
 point_narra = np.array([342500, 6266990])
 plt.figure()
 plt.axis('equal')
 plt.grid()
 cmap = cm.get_cmap('jet')
 colours = cmap(np.linspace(0, 1, num=len(idx_best)))
 for i, idx in enumerate(idx_best):
    for j in range(len(data['contours'][i])):
        if np.any(np.linalg.norm(data['contours'][i][j][:,[0,1]] - point_narra, axis=1) < 200):
            plt.plot(data['contours'][i][j][:,0], data['contours'][i][j][:,1],
                     label=str(data['date_acquired'][i]),
                     linewidth=2, color=colours[i,:])
 plt.legend()
 plt.show()
 # convert from pixels to world coordinates
 wl_coords = sds.convert_pix2world(wl_pix, crs['crs_15m'])
 # convert to output epsg spatial reference
 wl = sds.convert_epsg(wl_coords, crs['epsg_code'], output_epsg)
 # classify sand pixels with Kmeans
 #im_sand = sds.classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, buffer_size, min_beach_size, plot_bool)
-
+# classify image in 4 classes (sand, whitewater, water, other) with NN classifier
 im_classif = sds.classify_image_NN(im_ms_ps, im_pan, cloud_mask, plot_bool)
--- a/sds_extract_allcode.py
+++ b/sds_extract_allcode.py
@ -1,359 +0,0 @@
 # -*- coding: utf-8 -*-
 """
 Created on Wed Feb 21 18:05:01 2018
@author: z5030440
 """
 #%% Initial settings
 # import packages
 import ee
 from IPython import display
 import math
 import matplotlib.pyplot as plt
 import numpy as np
 from osgeo import gdal
 import tempfile
 import tensorflow as tf
 import urllib
 from urllib.request import urlretrieve
 import zipfile
 import skimage.filters as filters 
 import skimage.exposure as exposure
 import skimage.transform as transform
 import sklearn.decomposition as decomposition
 import skimage.morphology as morphology
 # import scripts
 from GEEImageFunctions import *
 np.seterr(all='ignore') # raise divisions by 0 and nans
 ee.Initialize()
 # Load image collection and filter it based on location (Narrabeen)
 input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
 #n_img = input_col.size().getInfo()
 #print('Number of images in collection:', n_img)
 # filter based on location (Narrabeen-Collaroy)
 rect_narra = [[[151.3473129272461,-33.69035274454718],
              [151.2820816040039,-33.68206818063878], 
              [151.27281188964844,-33.74775138989556],
              [151.3425064086914,-33.75231878701767],
              [151.3473129272461,-33.69035274454718]]]; 
 flt_col = input_col.filterBounds(ee.Geometry.Polygon(rect_narra))
 n_img = flt_col.size().getInfo()
 print('Number of images covering Narrabeen:', n_img)
 # Select the most recent image and download it
 im = ee.Image(flt_col.sort('SENSING_TIME',False).first())
 im_dic = im.getInfo()
 image_prop = im_dic.get('properties')
 im_bands = im_dic.get('bands')
 for i in range(len(im_bands)): del im_bands[i]['dimensions'] # delete the dimensions key
 # download the panchromatic band (B8)
 pan_band = [im_bands[7]]
 im_pan = load_image(im, rect_narra, pan_band)
 im_pan = im_pan[:,:,0]
 size_pan = im_pan.shape
 vec_pan = im_pan.reshape(size_pan[0] * size_pan[1])
 # download the QA band (BQA)
 qa_band = [im_bands[11]]
 im_qa = load_image(im, rect_narra, qa_band)
 im_qa = im_qa[:,:,0]
 # convert QA bits
 cloud_values = [2800, 2804, 2808, 2812, 6896, 6900, 6904, 6908]
 cloud_shadow_values = [2976, 2980, 2984, 2988, 3008, 3012, 3016, 3020]
 # Create cloud mask (resized to be applied to the Pan band)
 im_cloud = np.isin(im_qa, cloud_values)
 im_cloud_shadow = np.isin(im_qa, cloud_shadow_values)
 im_cloud_res = transform.resize(im_cloud,(im_pan.shape[0], im_pan.shape[1]), order=0, preserve_range=True).astype('bool_')
 vec_cloud = im_cloud.reshape(im_cloud.shape[0] * im_cloud.shape[1])
 vec_cloud_res = im_cloud_res.reshape(size_pan[0] * size_pan[1])
 # download the other bands (B2,B3,B4,B5,B6) = (blue,green,red,nir,swir1)
 ms_bands = [im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[5]]
 im_ms = load_image(im, rect_narra, ms_bands)
 size_ms = im_ms.shape
 vec_ms = im_ms.reshape(size_ms[0] * size_ms[1], size_ms[2])
 # Plot the RGB image and cloud masks
 plt.figure()
 ax1 = plt.subplot(121)
 plt.imshow(im_ms[:,:,[2,1,0]])
 plt.title('RGB')
 ax2 = plt.subplot(122)
 plt.imshow(im_cloud, cmap='gray')
 plt.title('Cloud mask')
 #ax3 = plt.subplot(133, sharex=ax1, sharey=ax1)
 #plt.imshow(im_cloud_shadow)
 #plt.title('Cloud mask shadow')
 plt.show()
 # Resize multispectral bands (30m) to the size of the pan band (15m) using bilinear interpolation
 im_ms_res = transform.resize(im_ms,(size_pan[0], size_pan[1]), order=1, preserve_range=True, mode='constant')
 vec_ms_res = im_ms_res.reshape(size_pan[0] * size_pan[1], size_ms[2])
 # Adjust intensities (set cloud pixels to 0 intensity)
 cloud_value = np.nan
 prc_low = 0 # lower percentile
 prob_high = 99.9 # upper percentile probability to clip
 # Rescale intensities between 0 and 1
 vec_ms_adj = np.ones((len(vec_cloud_res),size_ms[2])) * np.nan
 for i in range(im_ms.shape[2]):
    prc_high = np.percentile(vec_ms_res[~vec_cloud_res,i], prob_high)
    vec_rescaled = exposure.rescale_intensity(vec_ms_res[~vec_cloud_res,i], in_range=(prc_low,prc_high))
    plt.figure()
    plt.hist(vec_rescaled, bins = 300)
    plt.show()
    vec_ms_adj[~vec_cloud_res,i] = vec_rescaled
 im_ms_adj = vec_ms_adj.reshape(size_pan[0], size_pan[1], size_ms[2])
 # same for the pan band
 vec_pan_adj = np.ones(len(vec_cloud_res)) * np.nan
 prc_high = np.percentile(vec_pan[~vec_cloud_res],prob_high)
 vec_rescaled = exposure.rescale_intensity(vec_pan[~vec_cloud_res], in_range=(prc_low,prc_high))
 plt.figure()
 plt.hist(vec_rescaled, bins = 300)
 plt.show()
 vec_pan_adj[~vec_cloud_res] = vec_rescaled
 im_pan_adj = vec_pan_adj.reshape(size_pan[0], size_pan[1])
 # Plot adjusted images
 plt.figure()
 plt.subplot(131)
 plt.imshow(im_pan_adj, cmap='gray')
 plt.title('PANCHROMATIC (15 m pixel)')
 plt.subplot(132)
 plt.imshow(im_ms_adj[:,:,[2,1,0]])
 plt.title('RGB (30 m pixel)')
 plt.show()
 plt.subplot(133)
 plt.imshow(im_ms_adj[:,:,[3,1,0]])
 plt.title('NIR-GB (30 m pixel)')
 plt.show()
 #%% Pansharpening (PCA)
 # Run PCA on selected bands
 sel_bands = [0,1,2]
 temp = vec_ms_adj[:,sel_bands]
 vec_ms_adj_nocloud = temp[~vec_cloud_res,:]
 pca = decomposition.PCA()
 vec_pcs = pca.fit_transform(vec_ms_adj_nocloud)
 vec_pcs_all = np.ones((len(vec_cloud_res),len(sel_bands))) * np.nan
 vec_pcs_all[~vec_cloud_res,:] = vec_pcs
 im_pcs = vec_pcs_all.reshape(size_pan[0], size_pan[1], vec_pcs.shape[1])
 plt.figure()
 plt.subplot(221)
 plt.imshow(im_pcs[:,:,0], cmap='gray')
 plt.title('Component 1')
 plt.subplot(222)
 plt.imshow(im_pcs[:,:,1], cmap='gray')
 plt.title('Component 2')
 plt.subplot(223)
 plt.imshow(im_pcs[:,:,2], cmap='gray')
 plt.title('Component 3')
 plt.show()
 # Compare the Pan image with the 1st Principal component
 compare_images(im_pan_adj,im_pcs[:,:,0])
 intensity_histogram(im_pan_adj)
 intensity_histogram(im_pcs[:,:,0])
 # Match histogram of the pan image with the 1st principal component and replace the 1st component
 vec_pcs[:,0] = hist_match(vec_pan_adj[~vec_cloud_res], vec_pcs[:,0])
 vec_ms_ps = pca.inverse_transform(vec_pcs)
 # normalise between 0 and 1
 for i in range(vec_pcs.shape[1]):
    vec_ms_ps[:,i] = np.divide(vec_ms_ps[:,i] - np.min(vec_ms_ps[:,i]),
               np.max(vec_ms_ps[:,i]) - np.min(vec_ms_ps[:,i]))
 vec_ms_ps_all = np.ones((len(vec_cloud_res),len(sel_bands))) * np.nan
 vec_ms_ps_all[~vec_cloud_res,:] = vec_ms_ps
 im_ms_ps = vec_ms_ps_all.reshape(size_pan[0], size_pan[1], len(sel_bands))
 vec_ms_ps_all = np.append(vec_ms_ps_all, vec_ms_adj[:,[3,4]], axis=1)
 im_ms_ps = np.append(im_ms_ps, im_ms_adj[:,:,[3,4]], axis=2)
 # Plot adjusted images
 plt.figure()
 plt.subplot(121)
 plt.imshow(im_ms_adj[:,:,[2,1,0]])
 plt.title('Original RGB')
 plt.show()
 plt.subplot(122)
 plt.imshow(im_ms_ps[:,:,[2,1,0]])
 plt.title('Pansharpened RGB')
 plt.show()
 plt.figure()
 plt.subplot(121)
 plt.imshow(im_ms_adj[:,:,[3,1,0]])
 plt.title('Original NIR-GB')
 plt.show()
 plt.subplot(122)
 plt.imshow(im_ms_ps[:,:,[3,1,0]])
 plt.title('Pansharpened NIR-GB')
 plt.show()
 #%% Compute Normalized Difference Water Index (NDWI)
 # With NIR
 vec_ndwi_nir = np.ones(len(vec_cloud_res)) * np.nan
 temp = np.divide(vec_ms_ps_all[~vec_cloud_res,3] - vec_ms_ps_all[~vec_cloud_res,1],
                         vec_ms_ps_all[~vec_cloud_res,3] + vec_ms_ps_all[~vec_cloud_res,1])
 vec_ndwi_nir[~vec_cloud_res] = temp
 im_ndwi_nir = vec_ndwi_nir.reshape(size_pan[0], size_pan[1])
 # With SWIR_1
 vec_ndwi_swir = np.ones(len(vec_cloud_res)) * np.nan
 temp = np.divide(vec_ms_ps_all[~vec_cloud_res,4] - vec_ms_ps_all[~vec_cloud_res,1],
                         vec_ms_ps_all[~vec_cloud_res,4] + vec_ms_ps_all[~vec_cloud_res,1])
 vec_ndwi_swir[~vec_cloud_res] = temp
 im_ndwi_swir = vec_ndwi_swir.reshape(size_pan[0], size_pan[1])
 plt.figure()
 ax1 = plt.subplot(211)
 plt.hist(vec_ndwi_nir[~vec_cloud_res], bins=300, label='NIR')
 plt.hist(vec_ndwi_swir[~vec_cloud_res], bins=300, label='SWIR', alpha=0.5)
 plt.legend()
 ax2 = plt.subplot(212, sharex=ax1)
 plt.hist(vec_ndwi_nir[~vec_cloud_res], bins=300, cumulative=True, histtype='step', label='NIR')
 plt.hist(vec_ndwi_swir[~vec_cloud_res], bins=300, cumulative=True, histtype='step', label='SWIR')
 plt.legend()
 plt.show()
 compare_images(im_ndwi_nir,im_ndwi_swir)
 plt.figure()
 plt.imshow(im_ndwi_nir, cmap='seismic')
 plt.title('Water Index')
 plt.colorbar()
 plt.show()
 #%% Extract shorelines (NIR)
 ndwi_nir = vec_ndwi_nir[~vec_cloud_res]
 t_otsu = filters.threshold_otsu(ndwi_nir)
 t_min = filters.threshold_minimum(ndwi_nir)
 t_mean = filters.threshold_mean(ndwi_nir)
 t_li = filters.threshold_li(ndwi_nir)
 # try all thresholding algorithms
 plt.figure()
 plt.hist(ndwi_nir, bins=300)
 plt.plot([t_otsu, t_otsu],[0, 15000], 'r-', label='Otsu threshold')
 #plt.plot([t_min, t_min],[0, 15000], 'g-', label='min')
 #plt.plot([t_mean, t_mean],[0, 15000], 'y-', label='mean')
 #plt.plot([t_li, t_li],[0, 15000], 'm-', label='li')
 plt.legend()
 plt.show()
 plt.figure()
 plt.imshow(im_ndwi_nir > t_otsu, cmap='gray')
 plt.title('Binary image')
 plt.show()
 im_bin = im_ndwi_nir > t_otsu
 im_open = morphology.binary_opening(im_bin,morphology.disk(1))
 im_close = morphology.binary_closing(im_open,morphology.disk(1))
 im_bin_coast_in = im_close ^ morphology.erosion(im_close,morphology.disk(1))
 im_bin_sl_in = morphology.remove_small_objects(im_bin_coast_in,100,8)
 compare_images(im_close,im_bin_sl_in)
 plt.figure()
 plt.subplot(121)
 plt.imshow(im_close, cmap='gray')
 plt.title('morphological closing')
 plt.subplot(122)
 plt.imshow(im_bin_sl_in, cmap='gray')
 plt.title('Water mark')
 plt.show()
 im_bin_coast_out = morphology.dilation(im_close,morphology.disk(1)) ^ im_close
 im_bin_sl_out = morphology.remove_small_objects(im_bin_coast_out,100,8)
 # Plot shorelines on top of RGB image
 im_rgb_sl = np.copy(im_ms_ps[:,:,[2,1,0]])
 im_rgb_sl[im_bin_sl_in,0] = 0
 im_rgb_sl[im_bin_sl_in,1] = 1
 im_rgb_sl[im_bin_sl_in,2] = 1
 im_rgb_sl[im_bin_sl_out,0] = 1
 im_rgb_sl[im_bin_sl_out,1] = 0
 im_rgb_sl[im_bin_sl_out,2] = 1
 plt.figure()
 plt.imshow(im_rgb_sl)
 plt.title('Pansharpened RGB')
 plt.show()
 #%% Extract shorelines SWIR
 ndwi_swir = vec_ndwi_swir[~vec_cloud_res]
 t_otsu = filters.threshold_otsu(ndwi_swir)
 plt.figure()
 plt.hist(ndwi_swir, bins=300)
 plt.plot([t_otsu, t_otsu],[0, 15000], 'r-', label='Otsu threshold')
 #plt.plot([t_min, t_min],[0, 15000], 'g-', label='min')
 #plt.plot([t_mean, t_mean],[0, 15000], 'y-', label='mean')
 #plt.plot([t_li, t_li],[0, 15000], 'm-', label='li')
 plt.legend()
 plt.show()
 plt.figure()
 plt.imshow(im_ndwi_swir > t_otsu, cmap='gray')
 plt.title('Binary image')
 plt.show()
 im_bin = im_ndwi_swir > t_otsu
 im_open = morphology.binary_opening(im_bin,morphology.disk(1))
 im_close = morphology.binary_closing(im_open,morphology.disk(1))
 im_bin_coast_in = im_close ^ morphology.erosion(im_close,morphology.disk(1))
 im_bin_sl_in = morphology.remove_small_objects(im_bin_coast_in,100,8)
 compare_images(im_close,im_bin_sl_in)
 im_bin_coast_out = morphology.dilation(im_close,morphology.disk(1)) ^ im_close
 im_bin_sl_out = morphology.remove_small_objects(im_bin_coast_out,100,8)
 # Plot shorelines on top of RGB image
 im_rgb_sl = np.copy(im_ms_ps[:,:,[2,1,0]])
 im_rgb_sl[im_bin_sl_in,0] = 0
 im_rgb_sl[im_bin_sl_in,1] = 1
 im_rgb_sl[im_bin_sl_in,2] = 1
 im_rgb_sl[im_bin_sl_out,0] = 1
 im_rgb_sl[im_bin_sl_out,1] = 0
 im_rgb_sl[im_bin_sl_out,2] = 1
 plt.figure()
 plt.imshow(im_rgb_sl)
 plt.title('Pansharpened RGB')
 plt.show()
--- a/sds_extract_test.py
+++ b/sds_extract_test.py
@ -1,260 +0,0 @@
 # -*- coding: utf-8 -*-
 """
 Created on Thu Mar  1 14:32:08 2018
@author: z5030440
 Main code to extract shorelines from Landsat imagery
 """
 # Preamble
 import ee
 import matplotlib.pyplot as plt
 import matplotlib.cm as cm
 import numpy as np
 import pandas as pd
 from datetime import datetime
 import pickle
 import pdb
 import pytz
 from pylab import ginput
 # image processing modules
 import skimage.filters as filters 
 import skimage.exposure as exposure
 import skimage.transform as transform
 import sklearn.decomposition as decomposition
 import skimage.morphology as morphology
 import skimage.measure as measure
 # my functions
 import functions.utils as utils
 import functions.sds as sds
 np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
 ee.Initialize()
 #%% Select images
 # parameters
 plot_bool = False # if you want the plots
 prob_high = 99.9 # upper probability to clip and rescale pixel intensity
 min_contour_points = 100 # minimum number of points contained in each water line
 output_epsg = 28356 # GDA94 / MGA Zone 56
 cloud_threshold = 0.7
 # select collection
 input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
 # location (Narrabeen-Collaroy beach)
 rect_narra = [[[151.3473129272461,-33.69035274454718],
              [151.2820816040039,-33.68206818063878], 
              [151.27281188964844,-33.74775138989556],
              [151.3425064086914,-33.75231878701767],
              [151.3473129272461,-33.69035274454718]]];
 with open('data/narra_beach.pkl', 'rb') as f:
    pts_beach = pickle.load(f)
 with open('data/idx_nogt.pkl', 'rb') as f:
    idx_nogt = pickle.load(f)  
 idx_nogt = np.array(idx_nogt)
 #rect_narra = [[[151.301454, -33.700754],
 #               [151.311453, -33.702075], 
 #               [151.307237, -33.739761],
 #               [151.294220, -33.736329],
 #               [151.301454, -33.700754]]];
 # Dates
 start_date = '2016-01-01'
 end_date = '2016-12-31'
 # filter by location
 flt_col = input_col.filterBounds(ee.Geometry.Polygon(rect_narra))#.filterDate(start_date, end_date)
 n_img = flt_col.size().getInfo()
 print('Number of images covering Narrabeen:', n_img)
 im_all = flt_col.getInfo().get('features')
 #%% Extract shorelines
 metadata = {'timestamp':[],
            'date_acquired':[],
            'cloud_cover':[],
            'geom_rmse_model':[],
            'gcp_model':[],
            'quality':[],
            'sun_azimuth':[],
            'sun_elevation':[]}
 skipped_images = np.zeros((n_img,1)).astype(bool)
 output_wl = []
 # loop through all images
 for i in range(n_img):
    if np.isin(i, idx_nogt):
        continue
    # find each image in ee database
    im = ee.Image(im_all[i].get('id')) 
    # load image as np.array
    im_pan, im_ms, im_cloud, crs, meta = sds.read_eeimage(im, rect_narra, plot_bool)
    # if clouds -> skip the image
    if sum(sum(im_cloud.astype(int)))/(im_cloud.shape[0]*im_cloud.shape[1]) > cloud_threshold:
        skipped_images[i] = True
        continue
    # rescale intensities
    im_ms = sds.rescale_image_intensity(im_ms, im_cloud, prob_high, plot_bool)
    im_pan = sds.rescale_image_intensity(im_pan, im_cloud, prob_high, plot_bool)
    # pansharpen rgb image
    im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, im_cloud, plot_bool)
    # add down-sized bands for NIR and SWIR (since pansharpening is not possible)
    im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) 
    # calculate NDWI
    im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], im_cloud, plot_bool)
    # edge detection
    wl_pix = sds.find_wl_contours(im_ndwi, im_cloud, min_contour_points, plot_bool)
    # convert from pixels to world coordinates
    wl_coords = sds.convert_pix2world(wl_pix, crs['crs_15m'])
    # convert to output epsg spatial reference
    wl = sds.convert_epsg(wl_coords, crs['epsg_code'], output_epsg)
    # find contour closest to narrabeen beach
    sum_dist = np.zeros(len(wl))
    for k,contour in enumerate(wl):
        min_dist = np.zeros(len(pts_beach))
        for j,pt in enumerate(pts_beach):
            min_dist[j] = np.min(np.linalg.norm(contour - pt, axis=1))
        sum_dist[k] = np.sum(min_dist)/len(min_dist)
    try:
        wl_beach = wl[np.argmin(sum_dist)]
 #        plt.figure()
 #        plt.axis('equal')
 #        plt.plot(pts_beach[:,0], pts_beach[:,1], 'ko')
 #        plt.plot(wl_beach[:,0], wl_beach[:,1], 'r')
 #        plt.show()
    except:
        wl_beach = []
    plt.figure()
    plt.imshow(im_ms_ps[:,:,[2,1,0]])
    for k,contour in enumerate(wl_pix): plt.plot(contour[:, 1], contour[:, 0], linewidth=2)
    if len(wl_beach) > 0:
        plt.plot(wl_pix[np.argmin(sum_dist)][:,1], wl_pix[np.argmin(sum_dist)][:,0], linewidth=3, color='w')
    plt.axis('image')
    plt.title('im ' + str(i) + ' : ' + datetime.strftime(datetime
                                    .fromtimestamp(meta['timestamp']/1000, tz=pytz.utc)
                                    .astimezone(pytz.timezone('Australia/Sydney')), '%Y-%m-%d %H:%M:%S %Z%z'))
    plt.show()
    # manually validate shoreline detection
    input_pt = np.array(ginput(1))
    if input_pt[0,1] > 300:
        skipped_images[i] = True
        continue
    # store metadata of each image in dict
    metadata['timestamp'].append(meta['timestamp'])
    metadata['date_acquired'].append(meta['date_acquired'])
    metadata['cloud_cover'].append(sum(sum(im_cloud.astype(int)))/(im_cloud.shape[0]*im_cloud.shape[1]))
    metadata['geom_rmse_model'].append(meta['geom_rmse_model'])
    metadata['gcp_model'].append(meta['gcp_model'])
    metadata['quality'].append(meta['quality'])
    metadata['sun_azimuth'].append(meta['sun_azimuth'])
    metadata['sun_elevation'].append(meta['sun_elevation'])
    # store water lines
    output_wl.append(wl_beach)
    print(i)
 # generate datetimes
 #fmt = '%Y-%m-%d %H:%M:%S %Z%z'
 #au_tz = pytz.timezone('Australia/Sydney')
 dt = [];
 t = metadata['timestamp']
 for k in range(len(t)): dt.append(datetime.fromtimestamp(t[k]/1000, tz=pytz.utc)) 
 # save outputs
 data = metadata.copy()
 data.update({'dt':dt})
 data.update({'contours':output_wl})  
 with open('data_gt15d_32_56.pkl', 'wb') as f:
    pickle.dump(data, f) 
 #%% Load data
 ##with open('data_2016.pkl', 'rb') as f:
 ##    data = pickle.load(f)
 #    
 #
 ## load backgroud image
 #i = 0
 #im = ee.Image(im_all[i].get('id')) 
 #im_pan, im_ms, im_cloud, crs, meta = sds.read_eeimage(im, rect_narra, plot_bool)
 #im_ms = sds.rescale_image_intensity(im_ms, im_cloud, prob_high, plot_bool)
 #im_pan = sds.rescale_image_intensity(im_pan, im_cloud, prob_high, plot_bool)
 #im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, im_cloud, plot_bool)
 #
 #plt.figure()
 #plt.imshow(im_ms_ps[:,:,[2,1,0]])
 #plt.axis('image')
 #plt.title('2016 shorelines')
 #
 #n = len(data['cloud_cover'])
 #idx_best = []
 ## remove overlapping images, based on cloud cover
 #for i in range(n):
 #    date_im = data['date_acquired'][i]
 #    idx = np.isin(data['date_acquired'], date_im)
 #    best = np.where(idx)[0][np.argmin(np.array(data['cloud_cover'])[idx])]
 #    if ~np.isin(best, idx_best):
 #        idx_best.append(best)
 #
 #point_narra = np.array([342500, 6266990])
 #plt.figure()
 #plt.axis('equal')
 #plt.grid()
 #cmap = cm.get_cmap('jet')
 #colours = cmap(np.linspace(0, 1, num=len(idx_best)))
 #for i, idx in enumerate(idx_best):
 #    for j in range(len(data['contours'][i])):
 #        if np.any(np.linalg.norm(data['contours'][i][j][:,[0,1]] - point_narra, axis=1) < 200):
 #            plt.plot(data['contours'][i][j][:,0], data['contours'][i][j][:,1],
 #                     label=str(data['date_acquired'][i]),
 #                     linewidth=2, color=colours[i,:])
 #            
 #plt.legend()
 #plt.show()       
 #
 #pts_narra = sds.convert_epsg(pts_narra, output_epsg, 4326)
 #
 ##kml.newlinestring(name="beach",
 ##         coords = [(_[0], _[1]) for _ in pts_narra])
 ##kml.save("narra.kml")
 #%%
 #with open('data_gt15d_0_31.pkl', 'rb') as f:
 #    data1 = pickle.load(f)
 #with open('data_gt15d_32_56.pkl', 'rb') as f:
 #    data2 = pickle.load(f)
 #with open('data_gt15d_99_193.pkl', 'rb') as f:
 #    data3 = pickle.load(f)  
 #    
 #data = []
 #data = data1.copy()
 #for k,cat in enumerate(data.keys()):
 #    for j in range(len(data2[cat])):
 #        data[cat].append(data2[cat][j])
 #    for j in range(len(data3[cat])):
 #        data[cat].append(data3[cat][j])
 #    
 #
 #with open('data_gt_l8.pkl', 'wb') as f:
 #    pickle.dump(data, f) 
--- a/time_coverage.py
+++ b/time_coverage.py
@ -1,136 +0,0 @@
 # -*- coding: utf-8 -*-
 """
 Created on Tue Mar 20 16:15:51 2018
@author: z5030440
 """
 import scipy.io as sio
 import os
 import ee
 import matplotlib.pyplot as plt
 import matplotlib.dates as mdates
 import numpy as np
 import pandas as pd
 from datetime import datetime, timedelta
 import pickle
 import pdb
 import pytz
 # image processing modules
 import skimage.filters as filters 
 import skimage.exposure as exposure
 import skimage.transform as transform
 import skimage.morphology as morphology
 import skimage.measure as measure
 import sklearn.decomposition as decomposition
 from scipy import spatial
 # my functions
 import functions.utils as utils
 import functions.sds as sds
 #plt.rcParams['axes.grid'] = True
 au_tz = pytz.timezone('Australia/Sydney')
 # load quadbike dates and convert from datenum to datetime
 suffix = '.mat'
 dir_name = os.getcwd()
 file_name = 'data\quadbike_dates'
 file_path = os.path.join(dir_name, file_name + suffix)
 quad_dates = sio.loadmat(file_path)['dates']
 dt_quad = []
 for i in range(quad_dates.shape[0]): 
    dt_quad.append(datetime(quad_dates[i,0], quad_dates[i,1], quad_dates[i,2], tzinfo=au_tz))
 # load satellite datetimes (in UTC) and convert to AEST time
 input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
 # location (Narrabeen-Collaroy beach)
 rect_narra = [[[151.3473129272461,-33.69035274454718],
              [151.2820816040039,-33.68206818063878], 
              [151.27281188964844,-33.74775138989556],
              [151.3425064086914,-33.75231878701767],
              [151.3473129272461,-33.69035274454718]]];
 flt_col = input_col.filterBounds(ee.Geometry.Polygon(rect_narra))               
 n_img = flt_col.size().getInfo()
 print('Number of images covering Narrabeen:', n_img)
 im_all = flt_col.getInfo().get('features')
 # extract datetimes from image metadata
 dt_sat = [_['properties']['system:time_start'] for _ in im_all]
 dt_sat = [datetime.fromtimestamp(_/1000, tz=pytz.utc) for _ in dt_sat]
 dt_sat = [_.astimezone(au_tz) for _ in dt_sat]
 # calculate days difference
 diff_days = [ [(x - _).days for _ in dt_quad] for x in dt_sat]
 day_thresh = 15
 idx = [utils.find_indices(_, lambda e: abs(e) < day_thresh) for _ in diff_days]
 dt_diff = []
 idx_nogt = []
 for i in range(n_img):
    if not idx[i]:
        idx_nogt.append(i)
        continue
    dt_diff.append({"sat dt": dt_sat[i],
               "quad dt": [dt_quad[_] for _ in idx[i]],
               "days diff": [diff_days[i][_] for _ in idx[i]] })
 with open('idx_nogt.pkl', 'wb') as f:
    pickle.dump(idx_nogt, f)     
 #%%  
 dates_sat = mdates.date2num(dt_sat)
 dates_quad = mdates.date2num(dt_quad)
 plt.figure()
 plt.plot_date(dates_sat, np.zeros((n_img,1)))
 plt.plot_date(dates_quad, np.ones((len(dates_quad),1)))
 plt.show()
 data = pd.read_pickle('data_2016.pkl')
 dt_sat = [_.astimezone(au_tz) for _ in data['dt']]
 [ (_ - dt_sat[0]).days for _ in dt_quad]
 dn_sat = []
 for i in range(len(dt_sat)): dn_sat.append(dt_sat[i].toordinal())
 dn_sat = np.array(dn_sat)
 dn_sur = []
 for i in range(len(dt_survey)): dn_sur.append(dt_survey[i].toordinal())
 dn_sur = np.array(dn_sur)
 distances = np.zeros((len(dn_sat),4)).astype('int32')
 indexes = np.zeros((len(dn_sat),2)).astype('int32')
 for i in range(len(dn_sat)):
    distances[i,0] = np.sort(abs(dn_sat[i] - dn_sur))[0]
    distances[i,1] = np.sort(abs(dn_sat[i] - dn_sur))[1]
    distances[i,2] = dt_sat[i].year
    distances[i,3] = dt_sat[i].month
    indexes[i,0] = np.where(abs(dn_sat[i] - dn_sur) == np.sort(abs(dn_sat[i] - dn_sur))[0])[0][0]
    indexes[i,1] = np.where(abs(dn_sat[i] - dn_sur) == np.sort(abs(dn_sat[i] - dn_sur))[1])[0][0]
 years = [2013, 2014, 2015, 2016]
 months = mdates.MonthLocator()
 days = mdates.DayLocator()
 month_fmt = mdates.DateFormatter('%b')
 f, ax = plt.subplots(4, 1)
 for i, ca in enumerate(ax):
    ca.xaxis.set_major_locator(months)
    ca.xaxis.set_major_formatter(month_fmt)
    ca.xaxis.set_minor_locator(days)
    ca.set_ylabel(str(years[i]))
    for j in range(len(dt_sat)):
        if dt_sat[j].year == years[i]:
            ca.plot(dt_sat[j],0, 'bo', markerfacecolor='b')
 #f.subplots_adjust(hspace=0)
 #plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False)            
 plt.plot(dt_survey, np.zeros([len(dt_survey),1]), 'bo')
 plt.plot(dt_sat, np.ones([len(dt_sat),1]), 'ro')
 plt.yticks([])
 plt.show()
Author	SHA1	Message	Date
kvos	62f5c5330f	updated master added download_images and read_images scripts	8 years ago
kvos	cd74f6c39c	updated gitignore	8 years ago
Kilian Vos	0e18b6d4d0	update README.md	8 years ago
kvos	ee556de2fe	Neural Network image classification NN trained to classify each pixel of the image in 4 classes ( sand, whitewater, water, other)	8 years ago
kvos	e92fd60ba2	uploaded p3_environment.txt	8 years ago
kvos	2abac763b1	added sand classification	8 years ago
Kilian Vos	ca90712623	Update 'README.md'	8 years ago
kvos	1882c98d9b	update sds.py module	8 years ago
Kilian Vos	b964426cd4	gitingnore file	8 years ago
Kilian Vos	ea32adf79b	clean master most things transferred to development branch	8 years ago