updated master

added download_images and read_images scripts
updated gitignore
26 changed files with 2460 additions and 1814 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,7 @@
+*.pyc
+*.mat
+*.tif
+*.png
+*.mp4
+*.gif
+*.jpg
--- a/README.md
+++ b/README.md
@ -4,3 +4,6 @@ It has .py routines to:
 - read and preprocess satellite images (cloud masking, contrast stretching)
 - pansharpen Landsat 8 images
 - extract shorelines with the Marching Squares algorithm
+- classify image in 4 classes (sand, whitewater, water, other) using a Neural Network classifier
+
+Requirements: all the packages contained in py3_environments.txt
--- a/compare_shorelines.py
+++ b/compare_shorelines.py
@ -1,222 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Preamble
-import ee
-import matplotlib.pyplot as plt
-import matplotlib.cm as cm
-import numpy as np
-import pandas as pd
-from datetime import datetime
-import pickle
-import pdb
-import pytz
-from pylab import ginput
-import scipy.io as sio
-import scipy.interpolate
-import os
-
-# image processing modules
-import skimage.filters as filters 
-import skimage.exposure as exposure
-import skimage.transform as transform
-import sklearn.decomposition as decomposition
-import skimage.morphology as morphology
-import skimage.measure as measure
-
-# my functions
-import functions.utils as utils
-import functions.sds as sds
-
-np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
-ee.Initialize()
-
-au_tz = pytz.timezone('Australia/Sydney')
-
-#%%
-# load SDS shorelines
-with open('data\data_gt_l8.pkl', 'rb') as f:
-    data = pickle.load(f)
-    
-# load quadbike dates and convert from datenum to datetime
-suffix = '.mat'
-dir_name = os.getcwd()
-file_name = 'data\quadbike_dates'
-file_path = os.path.join(dir_name, file_name + suffix)
-quad_dates = sio.loadmat(file_path)['dates']
-dt_quad = []
-for i in range(quad_dates.shape[0]): 
-    dt_quad.append(datetime(quad_dates[i,0], quad_dates[i,1], quad_dates[i,2], tzinfo=au_tz))
-
-# remove overlapping images, keep the one with lowest cloud_cover 
-n = len(data['cloud_cover'])
-idx_worst = []
-for i in range(n):
-    date_im = data['date_acquired'][i]
-    idx_double = np.isin(data['date_acquired'], date_im)
-    if sum(idx_double.astype(int)) > 1:
-        idx_worst.append(np.where(idx_double)[0][np.argmax(np.array(data['cloud_cover'])[idx_double])])
-dt_sat = []
-new_meta = {'contours':[],
-            'cloud_cover':[],
-            'geom_rmse_model':[],
-            'gcp_model':[],
-            'quality':[],
-            'sun_azimuth':[],
-            'sun_elevation':[]}
-for i in range(n):
-    if not np.isin(i,idx_worst):
-        dt_sat.append(data['dt'][i].astimezone(au_tz))
-        new_meta['contours'].append(data['contours'][i])
-        new_meta['cloud_cover'].append(data['cloud_cover'][i])
-        new_meta['geom_rmse_model'].append(data['geom_rmse_model'][i])
-        new_meta['gcp_model'].append(data['gcp_model'][i])
-        new_meta['quality'].append(data['quality'][i])
-        new_meta['sun_azimuth'].append(data['sun_azimuth'][i])
-        new_meta['sun_elevation'].append(data['sun_elevation'][i])
-        
-# calculate difference between days
-diff_days = [ [(x - _).days for _ in dt_quad] for x in dt_sat]
-day_thresh = 15
-idx_close = [utils.find_indices(_, lambda e: abs(e) < day_thresh) for _ in diff_days]
-
-# put everything in a dictionnary and save it
-wl_comp = []
-for i in range(len(dt_sat)):
-    wl_comp.append({'sat dt': dt_sat[i],
-                    'quad dt': [dt_quad[_] for _ in idx_close[i]],
-                    'days diff': [diff_days[i][_] for _ in idx_close[i]],
-                    'contours':  new_meta['contours'][i],
-                    'cloud_cover':  new_meta['cloud_cover'][i],
-                    'geom_rmse_model':  new_meta['geom_rmse_model'][i],
-                    'gcp_model':  new_meta['gcp_model'][i],
-                    'quality':  new_meta['quality'][i],
-                    'sun_azimuth':  new_meta['sun_azimuth'][i],
-                    'sun_elevation':  new_meta['sun_elevation'][i]})
-
-with open('wl_l8_comparison.pkl', 'wb') as f:
-    pickle.dump(wl_comp, f)
-    
-#%%
-with open('data\wl_l8_comparison.pkl', 'rb') as f:
-    wl = pickle.load(f)
-# load quadbike dates and convert from datenum to datetime
-suffix = '.mat'
-dir_name = os.getcwd()
-subfolder_name = 'data\quadbike_surveys'
-file_path = os.path.join(dir_name, subfolder_name)
-file_names = os.listdir(file_path)
-
-for i in range(len(file_names)):
-    fn_mat = os.path.join(file_path, file_names[i])
-    years = int(file_names[i][6:10])
-    months = int(file_names[i][11:13])
-    days = int(file_names[i][14:16])
-    for j in range(len(wl)):
-        if wl[j]['quad dt'][0] == datetime(years, months, days, tzinfo=au_tz):
-            quad_mat = sio.loadmat(fn_mat)
-            wl[j].update({'quad_data':{'x':quad_mat['x'],
-                         'y':quad_mat['y'],
-                         'z':quad_mat['z'],
-                         'dt': datetime(years, months, days, tzinfo=au_tz)}})
-with open('data\wl_final.pkl', 'wb') as f:
-    pickle.dump(wl, f) 
-#%%
-with open('data\wl_final.pkl', 'rb') as f:
-    wl = pickle.load(f) 
-    
-i = 0        
-x = wl[i]['quad_data']['x']
-y = wl[i]['quad_data']['y']
-z = wl[i]['quad_data']['z']
-x = x.reshape(x.shape[0] * x.shape[1])
-y = y.reshape(y.shape[0] * y.shape[1])
-z = z.reshape(z.shape[0] * z.shape[1])
-idx_nan = np.isnan(z)
-
-x_nan = x[idx_nan]
-y_nan = y[idx_nan]
-z_nan = z[idx_nan]
-
-x_nonan = x[~idx_nan]
-y_nonan = y[~idx_nan]
-z_nonan = z[~idx_nan]
-
-xs = x_nonan[::10]
-ys = y_nonan[::10]
-zs = z_nonan[::10]
-
-xq = wl[i]['contours'][:,0]
-yq = wl[i]['contours'][:,1]
-
-# cut xq around xs
-np.min(xs)
-np.max(xs)
-np.min(ys)
-np.max(ys)
-
-idx_x = np.logical_and(xq < np.max(xs), xq > np.min(xs))
-idx_y = np.logical_and(yq < np.max(ys), yq > np.min(ys))
-idx_in = np.logical_and(idx_x, idx_y)
-
-xq = xq[idx_in]
-yq = yq[idx_in]
-
-for i in range(len(xq)):
-    idx_x = np.logical_and(xs < xq[i] + 10, xs > xq[i] - 10)
-    idx_y = np.logical_and(ys < yq[i] + 10, ys > yq[i] - 10)
-    xint = xs[idx_x]
-    yint = ys[idx_y]
-
-f = interpolate.interp2d(xs, ys, zs, kind='linear')
-zq = f(xq,yq)
-
-plt.figure()
-plt.grid()
-plt.scatter(xs, ys, s=10, c=zs, marker='o', cmap=cm.get_cmap('jet'),
-            label='quad data')
-plt.plot(xq,yq,'r-o', markersize=5, label='SDS')
-plt.axis('equal')
-plt.legend()
-plt.colorbar(label='mAHD')
-plt.xlabel('Eastings [m]')
-plt.ylabel('Northings [m]')
-plt.show()
-
-plt.figure()
-plt.plot(zq[:,0])
-plt.show()
-
-
-plt.figure()
-plt.grid()
-plt.scatter(x_nonan, y_nonan, s=10, c=z_nonan, marker='o', cmap=cm.get_cmap('jet'),
-            label='quad data')
-#plt.plot(x_nan, y_nan, 'k.', label='nans')
-plt.plot(xq,yq,'r-o', markersize=5, label='SDS')
-plt.axis('equal')
-plt.legend()
-plt.colorbar(label='mAHD')
-plt.xlabel('Eastings [m]')
-plt.ylabel('Northings [m]')
-plt.show()
-
-
-z2 = scipy.interpolate.griddata([x, y], z, [xq, yq], method='linear')
-
-f_interp = scipy.interpolate.interp2d(x1,y1,z1, kind='linear')
-    
-    
-sio.savemat('shoreline1.mat', {'x':xq, 'y':yq})
-
-from scipy import interpolate
-x = np.arange(-5.01, 5.01, 0.01)
-y = np.arange(-5.01, 5.01, 0.01)
-xx, yy = np.meshgrid(x, y)
-z = np.sin(xx**2+yy**2)
-f = interpolate.interp2d(x, y, z, kind='cubic')
-
-xnew = np.arange(-5.01, 5.01, 1e-2)
-ynew = np.arange(-5.01, 5.01, 1e-2)
-znew = f(xnew, ynew)
-plt.plot(x, z[:, 0], 'ro-', xnew, znew[:, 0], 'b-')
-plt.show()
--- a/compare_shorelines2.py
+++ b/compare_shorelines2.py
@ -1,47 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Preamble
-import ee
-import matplotlib.pyplot as plt
-import matplotlib.cm as cm
-import numpy as np
-import pandas as pd
-from datetime import datetime
-import pickle
-import pdb
-import pytz
-from pylab import ginput
-import scipy.io as sio
-import scipy.interpolate
-import os
-
-# image processing modules
-import skimage.filters as filters 
-import skimage.exposure as exposure
-import skimage.transform as transform
-import sklearn.decomposition as decomposition
-import skimage.morphology as morphology
-import skimage.measure as measure
-
-# my functions
-import functions.utils as utils
-import functions.sds as sds
-
-np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
-ee.Initialize()
-
-with open('data\wl_final.pkl', 'rb') as f:
-    wl = pickle.load(f)
-    
-i = 0        
-x = wl[i]['quad_data']['x']
-y = wl[i]['quad_data']['y']
-z = wl[i]['quad_data']['z']
-x = x.reshape(x.shape[0] * x.shape[1])
-y = y.reshape(y.shape[0] * y.shape[1])
-z = z.reshape(z.shape[0] * z.shape[1])
-
-idx_nan = np.isnan(z)
-x = x[~idx_nan]
-y = y[~idx_nan]
-z = z[~idx_nan]
--- a/data/data_2016.pkl
+++ b/data/data_2016.pkl
--- a/data/data_gt_l8.pkl
+++ b/data/data_gt_l8.pkl
--- a/data/idx_nogt.pkl
+++ b/data/idx_nogt.pkl
--- a/data/narra_beach.pkl
+++ b/data/narra_beach.pkl
--- a/data/quadbike_dates.mat
+++ b/data/quadbike_dates.mat
--- a/data/wl_final.pkl
+++ b/data/wl_final.pkl
--- a/data/wl_l8_comparison.pkl
+++ b/data/wl_l8_comparison.pkl
--- a/download_images.py
+++ b/download_images.py
@ -0,0 +1,390 @@
+#==========================================================#
+#==========================================================#
+# Download L5, L7, L8, S2 images of a given area 
+#==========================================================#
+#==========================================================#
+
+
+
+#==========================================================#
+# Initial settings
+#==========================================================#
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+import pdb
+import ee
+
+# other modules
+from osgeo import gdal, ogr, osr
+from urllib.request import urlretrieve
+import zipfile
+from datetime import datetime
+import pytz
+import pickle
+
+# import own modules
+import functions.utils as utils
+import functions.sds as sds
+
+np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
+ee.Initialize()
+
+#==========================================================#
+# Location
+#==========================================================#
+
+## location (Narrabeen-Collaroy beach)
+#polygon = [[[151.301454, -33.700754],
+#               [151.311453, -33.702075], 
+#               [151.307237, -33.739761],
+#               [151.294220, -33.736329],
+#               [151.301454, -33.700754]]]; 
+
+# location (Tairua beach)
+sitename = 'TAIRUA'
+polygon = [[[175.835574, -36.982022],
+               [175.888220, -36.980680], 
+               [175.893527, -37.029610],
+               [175.833444, -37.031767],
+               [175.835574, -36.982022]]];
+            
+# initialise metadata dictionnary (stores timestamps and georefencing accuracy of each image)       
+metadata = dict([])
+
+# create directories
+try:
+    os.makedirs(os.path.join(os.getcwd(), 'data',sitename))
+except:
+    print('directory already exists')
+        
+
+#%%
+#==========================================================#
+#==========================================================#
+# L5
+#==========================================================#
+#==========================================================#
+
+
+
+# define filenames for images
+suffix = '.tif'
+filepath = os.path.join(os.getcwd(), 'data', sitename, 'L5', '30m')
+try:
+    os.makedirs(filepath)
+except:
+    print('directory already exists')
+    
+#==========================================================#
+# Select L5 collection 
+#==========================================================#
+
+satname = 'L5'
+input_col = ee.ImageCollection('LANDSAT/LT05/C01/T1_TOA')
+
+# filter by location
+flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
+n_img = flt_col.size().getInfo()
+print('Number of images covering ' + sitename, n_img)
+im_all = flt_col.getInfo().get('features')
+
+
+#==========================================================#
+# Main loop trough images
+#==========================================================#
+
+timestamps = []
+acc_georef = []
+all_names = []
+for i in range(n_img):
+    
+    # find each image in ee database
+    im = ee.Image(im_all[i].get('id'))
+
+    im_dic = im.getInfo()
+    im_bands = im_dic.get('bands')
+    t = im_dic['properties']['system:time_start']
+    im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
+    timestamps.append(im_timestamp)
+    im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
+    im_epsg = int(im_dic['bands'][0]['crs'][5:])
+    try:
+        acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
+    except:
+        acc_georef.append(12)   
+        print('No geometric rmse model property')
+    
+    # delete dimensions key from dictionnary, otherwise the entire image is extracted
+    for j in range(len(im_bands)): del im_bands[j]['dimensions']
+    
+    # bands for L5
+    ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[7]]
+    
+    # filenames
+    filename = im_date + '_' + satname + '_' + sitename + suffix
+
+    print(i)
+    if any(filename in _ for _ in all_names):
+        filename = im_date + '_' + satname + '_' + sitename + '_dup' + suffix 
+    all_names.append(filename)
+    
+    local_data = sds.download_tif(im, polygon, ms_bands, filepath)
+    os.rename(local_data, os.path.join(filepath, filename))
+
+# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
+timestamps_sorted = sorted(timestamps)
+idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
+acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
+
+metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg}
+
+#%%
+#==========================================================#
+#==========================================================#
+# L7&L8
+#==========================================================#
+#==========================================================#
+    
+
+
+# define filenames for images
+suffix = '.tif'
+filepath = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8')
+filepath_pan = os.path.join(filepath, 'pan')
+filepath_ms = os.path.join(filepath, 'ms')
+try:
+    os.makedirs(filepath_pan)
+    os.makedirs(filepath_ms)
+except:
+    print('directory already exists')           
+
+#==========================================================#
+# Select L7 collection 
+#==========================================================#
+     
+satname = 'L7'   
+input_col = ee.ImageCollection('LANDSAT/LE07/C01/T1_RT_TOA')
+
+# filter by location
+flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
+n_img = flt_col.size().getInfo()
+print('Number of images covering ' + sitename, n_img)
+im_all = flt_col.getInfo().get('features')
+
+#==========================================================#
+# Main loop trough images
+#==========================================================#
+
+timestamps = []
+acc_georef = []
+all_names = []
+for i in range(n_img):
+    
+    # find each image in ee database
+    im = ee.Image(im_all[i].get('id'))
+    
+    im_dic = im.getInfo()
+    im_bands = im_dic.get('bands')
+    t = im_dic['properties']['system:time_start']
+    im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
+    timestamps.append(im_timestamp)
+    im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
+    im_epsg = int(im_dic['bands'][0]['crs'][5:])
+    try:
+        acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
+    except:
+        acc_georef.append(12)   
+        print('No geometric rmse model property')
+    
+    # delete dimensions key from dictionnary, otherwise the entire image is extracted
+    for j in range(len(im_bands)): del im_bands[j]['dimensions']
+    
+    # bands for L7
+    pan_band = [im_bands[8]]
+    ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[9]]
+    
+    # filenames
+    filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
+    filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
+
+    print(i)
+    if any(filename_pan in _ for _ in all_names):
+        filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
+        filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix 
+    all_names.append(filename_pan)
+    
+    local_data_pan = sds.download_tif(im, polygon, pan_band, filepath_pan)
+    os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
+    local_data_ms = sds.download_tif(im, polygon, ms_bands, filepath_ms)
+    os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
+
+#==========================================================#
+# Select L8 collection 
+#==========================================================#
+    
+satname = 'L8'
+input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
+
+# filter by location
+flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
+n_img = flt_col.size().getInfo()
+print('Number of images covering Narrabeen:', n_img)
+im_all = flt_col.getInfo().get('features')
+
+#==========================================================#
+# Main loop trough images
+#==========================================================#
+
+for i in range(n_img):
+    
+    # find each image in ee database
+    im = ee.Image(im_all[i].get('id'))
+    
+    im_dic = im.getInfo()
+    im_bands = im_dic.get('bands')
+    t = im_dic['properties']['system:time_start']
+    im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
+    timestamps.append(im_timestamp)
+    im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
+    im_epsg = int(im_dic['bands'][0]['crs'][5:])
+    try:
+        acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
+    except:
+        acc_georef.append(12)   
+        print('No geometric rmse model property')
+    
+    # delete dimensions key from dictionnary, otherwise the entire image is extracted
+    for j in range(len(im_bands)): del im_bands[j]['dimensions']
+    
+    # bands for L8    
+    pan_band = [im_bands[7]]
+    ms_bands = [im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[5], im_bands[11]]
+
+    # filenames
+    filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
+    filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
+
+    print(i)
+    if any(filename_pan in _ for _ in all_names):
+        filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
+        filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix 
+    all_names.append(filename_pan)
+    
+    local_data_pan = sds.download_tif(im, polygon, pan_band, filepath_pan)
+    os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
+    local_data_ms = sds.download_tif(im, polygon, ms_bands, filepath_ms)
+    os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
+
+
+# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
+timestamps_sorted = sorted(timestamps)
+idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
+acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
+
+metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg}
+
+#%%
+#==========================================================#
+#==========================================================#
+# S2
+#==========================================================#
+#==========================================================#
+    
+
+
+# define filenames for images
+suffix = '.tif'
+filepath = os.path.join(os.getcwd(), 'data', sitename, 'S2')
+try:
+    os.makedirs(os.path.join(filepath, '10m'))
+    os.makedirs(os.path.join(filepath, '20m'))
+    os.makedirs(os.path.join(filepath, '60m'))
+except:
+    print('directory already exists') 
+    
+#==========================================================#
+# Select L2 collection 
+#==========================================================#
+
+satname = 'S2'
+input_col = ee.ImageCollection('COPERNICUS/S2')
+
+# filter by location
+flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
+n_img = flt_col.size().getInfo()
+print('Number of images covering ' + sitename, n_img)
+im_all = flt_col.getInfo().get('features') 
+
+#==========================================================#
+# Main loop trough images
+#==========================================================#
+
+timestamps = []
+acc_georef = []
+all_names = []
+for i in range(n_img):
+    
+    # find each image in ee database
+    im = ee.Image(im_all[i].get('id'))
+    
+    im_dic = im.getInfo()
+    im_bands = im_dic.get('bands')
+    t = im_dic['properties']['system:time_start']
+    im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
+    im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
+    timestamps.append(im_timestamp)
+    im_epsg = int(im_dic['bands'][0]['crs'][5:])
+    try:
+        if im_dic['properties']['GEOMETRIC_QUALITY_FLAG'] == 'PASSED':
+            acc_georef.append(1)
+        else:
+            acc_georef.append(0)
+    except:
+        acc_georef.append(0)
+    
+    # delete dimensions key from dictionnary, otherwise the entire image is extracted
+    for j in range(len(im_bands)): del im_bands[j]['dimensions']
+    
+    # bands for S2
+    bands10 = [im_bands[1], im_bands[2], im_bands[3], im_bands[7]]
+    bands20 = [im_bands[11]]
+    bands60 = [im_bands[15]]
+    
+    # filenames
+    filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + suffix
+    filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + suffix
+    filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + suffix
+    
+    print(i)
+    if any(filename10 in _ for _ in all_names):
+        filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + '_dup' + suffix
+        filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + '_dup' + suffix
+        filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + '_dup' + suffix
+    all_names.append(filename10)
+    
+    local_data = sds.download_tif(im, polygon, bands10, filepath)
+    os.rename(local_data, os.path.join(filepath, '10m', filename10))
+    
+    local_data = sds.download_tif(im, polygon, bands20, filepath)
+    os.rename(local_data, os.path.join(filepath, '20m', filename20))
+    
+    local_data = sds.download_tif(im, polygon, bands60, filepath)
+    os.rename(local_data, os.path.join(filepath, '60m', filename60))
+    
+# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
+timestamps_sorted = sorted(timestamps)
+idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
+acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
+
+metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg}
+
+
+
+#%% save metadata
+
+filepath = os.path.join(os.getcwd(), 'data', sitename)
+with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'wb') as f:
+    pickle.dump(metadata, f)  
+    
+    
--- a/functions/NeuralNet_classif.pkl
+++ b/functions/NeuralNet_classif.pkl
--- a/functions/NeuralNet_classif_nopan.pkl
+++ b/functions/NeuralNet_classif_nopan.pkl
--- a/functions/data_analysis.py
+++ b/functions/data_analysis.py
@ -0,0 +1,432 @@
+"""This module contains all the functions needed for data analysis """
+
+# Initial settings
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from matplotlib import gridspec
+import pdb
+import ee
+
+# other modules
+from osgeo import gdal, ogr, osr
+import scipy.interpolate as interpolate
+import scipy.stats as sstats
+
+# image processing modules
+import skimage.filters as filters 
+import skimage.exposure as exposure
+import skimage.transform as transform
+import sklearn.decomposition as decomposition
+import skimage.measure as measure
+import skimage.morphology as morphology
+
+# machine learning modules
+from sklearn.cluster import KMeans
+from sklearn.neural_network import MLPClassifier
+from sklearn.externals import joblib
+
+import time
+
+# import own modules
+import functions.utils as utils
+
+def get_tide(dates_sds, dates_tide, tide_level):
+    
+    tide = []
+    for i in range(len(dates_sds)):
+        dates_diff = np.abs(np.array([ (dates_sds[i] - _).total_seconds() for _ in dates_tide]))
+        if np.min(dates_diff) <= 1800: # half-an-hour
+            idx_closest = np.argmin(dates_diff)
+            tide.append(tide_level[idx_closest])
+        else:
+            tide.append(np.nan)
+    tide = np.array(tide)
+    
+    return tide
+
+def remove_duplicates(output, satname):
+    " removes duplicates from output structure, keep the one with less cloud cover or best georeferencing "
+    dates = output['dates']
+    dates_str = [_.strftime('%Y%m%d') for _ in dates]
+    dupl = utils.duplicates_dict(dates_str)
+    if dupl:
+        output_nodup = dict([])
+        idx_remove = []
+        if satname == 'L8' or satname == 'L5':
+            for k,v in dupl.items():
+                
+                idx1 = v[0]
+                idx2 = v[1]
+                
+                c1 = output['metadata']['cloud_cover'][idx1]
+                c2 = output['metadata']['cloud_cover'][idx2]
+                g1 = output['metadata']['acc_georef'][idx1]
+                g2 = output['metadata']['acc_georef'][idx2]
+                
+                if c1 < c2 - 0.01:
+                    idx_remove.append(idx2)
+                elif g1 < g2 - 0.1:
+                    idx_remove.append(idx2)
+                else:
+                    idx_remove.append(idx1)
+            
+        else:
+            for k,v in dupl.items():
+                
+                idx1 = v[0]
+                idx2 = v[1]
+                
+                c1 = output['metadata']['cloud_cover'][idx1]
+                c2 = output['metadata']['cloud_cover'][idx2]
+                
+                if c1 < c2 - 0.01:
+                    idx_remove.append(idx2)
+                else:
+                    idx_remove.append(idx1)
+                    
+        idx_remove = sorted(idx_remove)
+        idx_all = np.linspace(0, len(dates_str)-1, len(dates_str))
+        idx_keep = list(np.where(~np.isin(idx_all,idx_remove))[0])        
+        
+        output_nodup['dates'] = [output['dates'][k] for k in idx_keep]
+        output_nodup['shorelines'] = [output['shorelines'][k] for k in idx_keep]
+        output_nodup['metadata'] = dict([])
+        for key in list(output['metadata'].keys()):
+            output_nodup['metadata'][key] = [output['metadata'][key][k] for k in idx_keep]
+        print(satname + ' : ' + str(len(idx_remove)) + ' duplicates')
+        return output_nodup
+        
+    else: 
+        print(satname + ' : ' + 'no duplicates')
+        return output
+    
+    
+def merge(output):
+    " merges data from the different satellites "
+    
+    # stack all list together under one key
+    output_all = {'dates':[], 'shorelines':[],
+                  'metadata':{'filenames':[], 'satname':[], 'cloud_cover':[], 'acc_georef':[]}}
+    for satname in list(output.keys()):
+        output_all['dates'] = output_all['dates'] + output[satname]['dates']
+        output_all['shorelines'] = output_all['shorelines'] + output[satname]['shorelines']
+        for key in list(output[satname]['metadata'].keys()):
+            output_all['metadata'][key]  = output_all['metadata'][key] + output[satname]['metadata'][key]   
+    
+    output_all_sorted = {'dates':[], 'shorelines':[],
+                         'metadata':{'filenames':[], 'satname':[], 'cloud_cover':[], 'acc_georef':[]}}
+    # sort the dates
+    idx_sorted = sorted(range(len(output_all['dates'])), key=output_all['dates'].__getitem__)
+    output_all_sorted['dates'] = [output_all['dates'][i] for i in idx_sorted]
+    output_all_sorted['shorelines'] = [output_all['shorelines'][i] for i in idx_sorted]
+    for key in list(output_all['metadata'].keys()):
+        output_all_sorted['metadata'][key] = [output_all['metadata'][key][i] for i in idx_sorted]
+        
+    return output_all_sorted
+
+def create_transects(x0, y0, orientation, chainage_length):
+    " creates shore-normal transects "
+    
+    transects = []
+    
+    for k in range(len(x0)):
+        
+        # orientation of cross-shore profile
+        phi = (90 - orientation[k])*np.pi/180
+        
+        # create a vector using the chainage length
+        x = np.linspace(0,chainage_length,chainage_length+1)
+        y = np.zeros(len(x))
+        coords = np.zeros((len(x),2))
+        coords[:,0] = x
+        coords[:,1] = y
+        
+        # translate and rotate the vector using the origin and orientation
+        tf = transform.EuclideanTransform(rotation=phi, translation=(x0[k],y0[k]))
+        coords_tf = tf(coords)
+        
+        transects.append(coords_tf)
+        
+    return transects
+
+def calculate_chainage(sds, transects, orientation, along_dist):
+    " intersect SDS with transect and compute chainage position "
+    
+    chainage_mtx = np.zeros((len(sds),len(transects),6))
+    
+    for i in range(len(sds)):
+        
+        sl = sds[i]
+        
+        for j in range(len(transects)): 
+            
+            # compute rotation matrix
+            X0 = transects[j][0,0]
+            Y0 = transects[j][0,1]
+            phi = (90 - orientation[j])*np.pi/180
+            Mrot = np.array([[np.cos(phi), np.sin(phi)],[-np.sin(phi), np.cos(phi)]])
+    
+            # calculate point to line distance between shoreline points and profile
+            p1 = np.array([X0,Y0])
+            p2 = transects[j][-1,:]
+            p3 = sl
+            d = np.abs(np.cross(p2-p1,p3-p1)/np.linalg.norm(p2-p1))
+            idx_close = utils.find_indices(d, lambda e: e <= along_dist)
+            
+            # check if there are SDS points around the profile or not 
+            if not idx_close:
+                chainage_mtx[i,j,:] = np.tile(np.nan,(1,6))
+                
+            else:
+                # change of base to shore-normal coordinate system
+                xy_close = np.array([sl[idx_close,0],sl[idx_close,1]]) - np.tile(np.array([[X0],[Y0]]), (1,len(sl[idx_close])))
+                xy_rot = np.matmul(Mrot, xy_close)
+                
+                # put nan values if the chainage is negative (MAKE SURE TO PICK ORIGIN CORRECTLY)
+                if np.any(xy_rot[0,:] < 0):
+                    xy_rot[0,np.where(xy_rot[0,:] < 0)] = np.nan
+                    
+                # compute mean, median max and std of chainage position
+                n_points = len(xy_rot[0,:])
+                mean_cross = np.nanmean(xy_rot[0,:])
+                median_cross = np.nanmedian(xy_rot[0,:])
+                max_cross = np.nanmax(xy_rot[0,:])
+                min_cross = np.nanmin(xy_rot[0,:])
+                std_cross = np.nanstd(xy_rot[0,:])
+                
+                if std_cross > 10: # if large std, take the most seaward point
+                    mean_cross = max_cross
+                    median_cross = max_cross
+                    min_cross = max_cross
+                
+                # store the statistics
+                chainage_mtx[i,j,:] = np.array([mean_cross, median_cross, max_cross,
+                            min_cross, n_points, std_cross])   
+     
+    # format into dictionnary
+    chainage = dict([])
+    chainage['mean'] = chainage_mtx[:,:,0]
+    chainage['median'] = chainage_mtx[:,:,1]
+    chainage['max'] = chainage_mtx[:,:,2]
+    chainage['min'] = chainage_mtx[:,:,3]
+    chainage['npoints'] = chainage_mtx[:,:,4]
+    chainage['std'] = chainage_mtx[:,:,5]
+        
+    return chainage
+
+def compare_sds(dates_sds, chain_sds, topo_profiles, mod=0, mindays=5):
+    """
+    Compare sds with groundtruth data from topographic surveys / argus shorelines
+    
+    KV WRL 2018
+
+    Arguments:
+    -----------
+        dates_sds: list
+            list of dates corresponding to each row in chain_sds
+        chain_sds: np.ndarray
+            array with time series of chainage for each transect (each transect is one column)
+        topo_profiles: dict
+            dict containing the dates and chainage of the groundtruth
+        mod: 0 or 1
+            0 for linear interpolation between 2 closest surveys, 1 for only nearest neighbour
+        min_days: int
+            minimum number of days for which the data can be compared     
+                
+    Returns:    -----------
+        stats: dict
+            contains all the statistics of the comparison
+
+    """       
+
+    # create 3 figures       
+    fig1 = plt.figure()
+    gs1 = gridspec.GridSpec(chain_sds.shape[1], 1)
+    fig2 = plt.figure()
+    gs2 = gridspec.GridSpec(2, chain_sds.shape[1])
+    fig3 = plt.figure()
+    gs3 = gridspec.GridSpec(2,1)
+    
+    dates_sds_num = np.array([_.toordinal() for _ in dates_sds])
+    stats = dict([])
+    data_fin = dict([])
+    
+    # for each transect compare and plot the data
+    for i in range(chain_sds.shape[1]):
+        
+        pfname = list(topo_profiles.keys())[i]
+        stats[pfname] = dict([])
+        data_fin[pfname] = dict([])
+        
+        dates_sur = topo_profiles[pfname]['dates']
+        chain_sur = topo_profiles[pfname]['chainage']
+        
+        # convert to datenum
+        dates_sur_num = np.array([_.toordinal() for _ in dates_sur])
+        
+        chain_sur_interp = []
+        diff_days = []
+        
+        for j, satdate in enumerate(dates_sds_num):
+            
+            temp_diff = satdate - dates_sur_num
+            
+            if mod==0:
+                # select measurement before and after sat image date and interpolate
+                
+                ind_before = np.where(temp_diff == temp_diff[temp_diff > 0][-1])[0]     
+                if ind_before == len(temp_diff)-1:
+                    chain_sur_interp.append(np.nan)
+                    diff_days.append(np.abs(satdate-dates_sur_num[ind_before])[0])
+                    continue         
+                ind_after = np.where(temp_diff == temp_diff[temp_diff < 0][0])[0]            
+                tempx = np.zeros(2)
+                tempx[0] = dates_sur_num[ind_before]
+                tempx[1] = dates_sur_num[ind_after]
+                tempy = np.zeros(2)
+                tempy[0] = chain_sur[ind_before]
+                tempy[1] = chain_sur[ind_after]
+                diff_days.append(np.abs(np.max([satdate-tempx[0], satdate-tempx[1]])))                
+                # interpolate
+                f = interpolate.interp1d(tempx, tempy)
+                chain_sur_interp.append(f(satdate))
+                
+            elif mod==1:
+                # select the closest measurement
+                
+                idx_closest = utils.find_indices(np.abs(temp_diff), lambda e: e == np.min(np.abs(temp_diff)))[0]
+                diff_days.append(np.abs(satdate-dates_sur_num[idx_closest]))
+                if diff_days[j] > mindays:
+                    chain_sur_interp.append(np.nan)
+                else:
+                    chain_sur_interp.append(chain_sur[idx_closest])
+
+        chain_sur_interp = np.array(chain_sur_interp)
+        
+        # remove nan values
+        idx_sur_nan = ~np.isnan(chain_sur_interp)
+        idx_sat_nan = ~np.isnan(chain_sds[:,i])
+        idx_nan = np.logical_and(idx_sur_nan, idx_sat_nan)
+        
+        # groundtruth and sds
+        chain_sur_fin = chain_sur_interp[idx_nan]
+        chain_sds_fin = chain_sds[idx_nan,i]
+        dates_fin = [k for (k, v) in zip(dates_sds, idx_nan) if v]
+        
+        # calculate statistics
+        slope, intercept, rvalue, pvalue, std_err = sstats.linregress(chain_sur_fin, chain_sds_fin) 
+        R2 = rvalue**2
+        correlation = np.corrcoef(chain_sur_fin, chain_sds_fin)[0,1]
+        diff_chain = chain_sur_fin - chain_sds_fin
+                
+        rmse = np.sqrt(np.nanmean((diff_chain)**2))
+        mean = np.nanmean(diff_chain)
+        std = np.nanstd(diff_chain)
+        q90 = np.percentile(np.abs(diff_chain), 90)
+        
+        # store data
+        stats[pfname]['rmse'] = rmse
+        stats[pfname]['mean'] = mean
+        stats[pfname]['std'] = std
+        stats[pfname]['q90'] = q90
+        stats[pfname]['diffdays'] = diff_days
+        stats[pfname]['corr'] = correlation
+        stats[pfname]['linfit'] = {'slope':slope, 'intercept':intercept, 'R2':R2, 'pvalue':pvalue}
+        
+        data_fin[pfname]['dates'] = dates_fin
+        data_fin[pfname]['sds'] = chain_sds_fin
+        data_fin[pfname]['survey'] = chain_sur_fin
+        
+        # make time-series plot
+        plt.figure(fig1.number)
+        fig1.add_subplot(gs1[i,0])
+        plt.plot(dates_sur, chain_sur, 'o-', color='C1', markersize=4, label='survey all')
+        plt.plot(dates_fin, chain_sur_fin, 'o', color=[0.3, 0.3, 0.3], markersize=2, label='survey interp')
+        plt.plot(dates_fin, chain_sds_fin, 'o--', color='b', markersize=4, label='SDS')
+        plt.title(pfname, fontweight='bold')
+#        plt.xlim([dates_sds[0], dates_sds[-1]])
+        plt.ylabel('chainage [m]')
+        
+        # make scatter plot
+        plt.figure(fig2.number)
+        fig2.add_subplot(gs2[0,i])
+        plt.axis('equal')
+        plt.plot(chain_sur_fin, chain_sds_fin, 'ko', markersize=4, markerfacecolor='w', alpha=0.7)
+        xmax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)])
+        xmin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)])
+        ymax = np.max([np.nanmax(chain_sds_fin),np.nanmax(chain_sur_fin)])
+        ymin = np.min([np.nanmin(chain_sds_fin),np.nanmin(chain_sur_fin)])
+        plt.plot([xmin, xmax], [ymin, ymax], 'k--')
+        plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'b:')
+        str_corr = ' y = %.2f x + %.2f\n R2 = %.2f' % (slope, intercept, R2)
+        plt.text(xmin, ymax-5, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
+        plt.xlabel('chainage survey [m]')
+        plt.ylabel('chainage satellite [m]')
+        plt.title(pfname, fontweight='bold')
+        
+        fig2.add_subplot(gs2[1,i])
+        binwidth = 3
+        bins = np.arange(min(diff_chain), max(diff_chain) + binwidth, binwidth)
+        density = plt.hist(diff_chain, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k')
+        plt.xlim([-50, 50])
+        plt.xlabel('error [m]')
+        str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90) 
+        plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.3), horizontalalignment='left', fontsize=10)
+                                  
+    fig1.set_size_inches(19.2, 9.28)
+    fig1.set_tight_layout(True)
+    fig2.set_size_inches(19.2, 9.28)
+    fig2.set_tight_layout(True)
+
+    # all transects together
+    chain_sds_all = []
+    chain_sur_all = []
+    for i in range(chain_sds.shape[1]):
+        pfname = list(topo_profiles.keys())[i]
+        chain_sds_all = np.append(chain_sds_all,data_fin[pfname]['sds'])
+        chain_sur_all = np.append(chain_sur_all,data_fin[pfname]['survey'])
+    
+    # calculate statistics
+    slope, intercept, rvalue, pvalue, std_err = sstats.linregress(chain_sur_all, chain_sds_all) 
+    R2 = rvalue**2
+    correlation = np.corrcoef(chain_sur_all, chain_sds_all)[0,1]
+    diff_chain_all = chain_sur_all - chain_sds_all
+    
+    rmse = np.sqrt(np.nanmean((diff_chain_all)**2))
+    mean = np.nanmean(diff_chain_all)
+    std = np.nanstd(diff_chain_all)
+    q90 = np.percentile(np.abs(diff_chain_all), 90)
+    
+    stats['all'] = {'rmse':rmse,'mean':mean,'std':std,'q90':q90, 'corr':correlation,
+         'linfit':{'slope':slope, 'intercept':intercept, 'R2':R2, 'pvalue':pvalue}}
+    
+    # make plot
+    plt.figure(fig3.number)
+    fig3.add_subplot(gs3[0,0])
+    plt.axis('equal')
+    plt.plot(chain_sur_all, chain_sds_all, 'ko', markersize=4, markerfacecolor='w', alpha=0.7)
+    xmax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)])
+    xmin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)])
+    ymax = np.max([np.nanmax(chain_sds_all),np.nanmax(chain_sur_all)])
+    ymin = np.min([np.nanmin(chain_sds_all),np.nanmin(chain_sur_all)])
+    plt.plot([xmin, xmax], [ymin, ymax], 'k--')
+    plt.plot([xmin, xmax], [xmin*slope + intercept, xmax*slope + intercept], 'b:')
+    str_corr = ' y = %.2f x + %.2f\n R2 = %.2f' % (slope, intercept, R2)
+    plt.text(xmin, ymax-5, str_corr, bbox=dict(facecolor=[0.7,0.7,0.7], alpha=0.5), horizontalalignment='left')
+    plt.xlabel('chainage survey [m]')
+    plt.ylabel('chainage satellite [m]')
+    plt.title(pfname, fontweight='bold')
+
+    fig3.add_subplot(gs3[1,0])
+    binwidth = 3
+    bins = np.arange(min(diff_chain_all), max(diff_chain_all) + binwidth, binwidth)
+    density = plt.hist(diff_chain_all, bins=bins, density=True, color=[0.8, 0.8, 0.8], edgecolor='k')
+    plt.xlim([-50, 50])
+    plt.xlabel('error [m]')
+    str_stats = ' rmse = %.1f\n mean = %.1f\n std = %.1f\n q90 = %.1f' % (rmse, mean, std, q90) 
+    plt.text(15, np.max(density[0])-0.015, str_stats, bbox=dict(facecolor=[0.8,0.8,0.8], alpha=0.3), horizontalalignment='left', fontsize=10)
+    fig3.set_size_inches(9.2, 9.28)
+    fig3.set_tight_layout(True)              
+        
+    return stats
--- a/functions/sds.py
+++ b/functions/sds.py
--- a/functions/utils.py
+++ b/functions/utils.py
@ -8,8 +8,9 @@ Contains all the utilities, convenience functions and small functions that do si
 """

 import matplotlib.pyplot as plt
+from datetime import datetime, timedelta
 import numpy as np
-import datetime
+import scipy.io as sio
 import pdb


@ -56,3 +57,54 @@ def compare_images(im1, im2):
 def find_indices(lst, condition):
    "imitation of MATLAB find function"
    return [i for i, elem in enumerate(lst) if condition(elem)]
+
+def reject_outliers(data, m=2):
+    "rejects outliers in a numpy array"
+    return data[abs(data - np.mean(data)) < m * np.std(data)]
+
+def duplicates_dict(lst):
+    "return duplicates and indices"
+    # nested function
+    def duplicates(lst, item):
+            return [i for i, x in enumerate(lst) if x == item]
+        
+    return dict((x, duplicates(lst, x)) for x in set(lst) if lst.count(x) > 1)
+
+def datenum2datetime(datenum):
+    "convert datenum to datetime"
+    #takes in datenum and outputs python datetime
+    time = [datetime.fromordinal(int(dn)) + timedelta(days=float(dn)%1) - timedelta(days = 366) for dn in datenum]
+    return time
+
+def loadmat(filename):
+    '''
+    this function should be called instead of direct spio.loadmat
+    as it cures the problem of not properly recovering python dictionaries
+    from mat files. It calls the function check keys to cure all entries
+    which are still mat-objects
+    '''
+    data = sio.loadmat(filename, struct_as_record=False, squeeze_me=True)
+    return _check_keys(data)
+
+def _check_keys(dict):
+    '''
+    checks if entries in dictionary are mat-objects. If yes
+    todict is called to change them to nested dictionaries
+    '''
+    for key in dict:
+        if isinstance(dict[key], sio.matlab.mio5_params.mat_struct):
+            dict[key] = _todict(dict[key])
+    return dict        
+
+def _todict(matobj):
+    '''
+    A recursive function which constructs from matobjects nested dictionaries
+    '''
+    dict = {}
+    for strg in matobj._fieldnames:
+        elem = matobj.__dict__[strg]
+        if isinstance(elem, sio.matlab.mio5_params.mat_struct):
+            dict[strg] = _todict(elem)
+        else:
+            dict[strg] = elem
+    return dict
--- a/geocheck.py
+++ b/geocheck.py
@ -1,221 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Mar  1 14:32:08 2018
-
-@author: z5030440
-
-Main code to extract shorelines from Landsat imagery
-"""
-# Preamble
-
-import ee
-from IPython import display
-import math
-import matplotlib.pyplot as plt
-import numpy as np
-import pdb
-
-# image processing modules
-import skimage.filters as filters 
-import skimage.exposure as exposure
-import skimage.transform as transform
-import sklearn.decomposition as decomposition
-import skimage.morphology as morphology
-import skimage.measure as measure
-
-from shapely.geometry import Polygon
-
-from osgeo import gdal
-from osgeo import osr
-import tempfile
-import urllib
-from urllib.request import urlretrieve
-import zipfile
-
-# my modules
-from utils import *
-# from sds import *
-
-np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
-ee.Initialize()
-plot_bool = True # if you want the plots
-
-def download_tif(image, bandsId):
-    """downloads tif image (region and bands) from the ee server and stores it in a temp file"""
-    url = ee.data.makeDownloadUrl(ee.data.getDownloadId({
-        'image': image.serialize(),
-        'bands': bandsId,
-        'filePerBand': 'false',
-        'name': 'data',
-        }))
-    local_zip, headers = urlretrieve(url)
-    with zipfile.ZipFile(local_zip) as local_zipfile:
-        return local_zipfile.extract('data.tif', tempfile.mkdtemp())
-
-def load_image(image, bandsId): 
-    """loads an ee.Image() as a np.array. e.Image() is retrieved from the EE database."""   
-    local_tif_filename = download_tif(image, bandsId)
-    dataset = gdal.Open(local_tif_filename, gdal.GA_ReadOnly)
-    bands = [dataset.GetRasterBand(i + 1).ReadAsArray() for i in range(dataset.RasterCount)]
-    return np.stack(bands, 2), dataset
-
-
-
-im = ee.Image('LANDSAT/LC08/C01/T1_RT_TOA/LC08_089083_20130411')
-
-lon = [151.2820816040039, 151.3425064086914]
-lat = [-33.68206818063878, -33.74775138989556]
-polygon = [[lon[0], lat[0]], [lon[1], lat[0]], [lon[1], lat[1]], [lon[0], lat[1]]];
-             
-# get image metadata into dictionnary
-im_dic = im.getInfo()
-im_bands = im_dic.get('bands')
-# delete dimensions key from dictionnary, otherwise the entire image is extracted
-#for i in range(len(im_bands)): del im_bands[i]['dimensions']
-pan_band = [im_bands[7]]
-ms_bands = [im_bands[1], im_bands[2], im_bands[3]]
-im_full, dataset_full = load_image(im, ms_bands)
-
-plt.figure()
-plt.imshow(np.clip(im_full[:,:,[2,1,0]] * 3, 0, 1))
-plt.show()
-#%%
-
-def download_tif(image, polygon, bandsId):
-    """downloads tif image (region and bands) from the ee server and stores it in a temp file"""
-    url = ee.data.makeDownloadUrl(ee.data.getDownloadId({
-        'image': image.serialize(),
-        'region': polygon,
-        'bands': bandsId,
-        'filePerBand': 'false',
-        'name': 'data',
-        }))
-    local_zip, headers = urlretrieve(url)
-    with zipfile.ZipFile(local_zip) as local_zipfile:
-        return local_zipfile.extract('data.tif', tempfile.mkdtemp())
-
-def load_image(image, polygon, bandsId): 
-    """
-    Loads an ee.Image() as a np.array. e.Image() is retrieved from the EE database.
-    The geographic area and bands to select can be specified
-    
-    KV WRL 2018
-    
-    Arguments:
-    -----------
-        image: ee.Image()
-            image objec from the EE database
-        polygon: list
-            coordinates of the points creating a polygon. Each point is a list with 2 values
-        bandsId: list
-            bands to select, each band is a dictionnary in the list containing the following keys:
-            crs, crs_transform, data_type and id. NOTE: you have to remove the key dimensions, otherwise
-            the entire image is retrieved.
-            
-    Returns:
-    -----------
-        image_array : np.ndarray
-            An array containing the image (2D if one band, otherwise 3D)
-    """
-    
-    local_tif_filename = download_tif(image, polygon, bandsId)
-    dataset = gdal.Open(local_tif_filename, gdal.GA_ReadOnly)
-    bands = [dataset.GetRasterBand(i + 1).ReadAsArray() for i in range(dataset.RasterCount)]
-    return np.stack(bands, 2), dataset
-
-
-for i in range(len(im_bands)): del im_bands[i]['dimensions']
-ms_bands = [im_bands[1], im_bands[2], im_bands[3]]
-
-im_cropped, dataset_cropped = load_image(im, polygon, ms_bands)
-
-plt.figure()
-plt.imshow(np.clip(im_cropped[:,:,[2,1,0]] * 3, 0, 1))
-plt.show()
-
-#%%
-crs_full  = dataset_full.GetGeoTransform()
-crs_cropped  = dataset_cropped.GetGeoTransform()
-scale = crs_full[1]
-ul_full = np.array([crs_full[0], crs_full[3]])
-ul_cropped = np.array([crs_cropped[0], crs_cropped[3]])
-
-delta = np.abs(ul_full - ul_cropped)/scale
-
-u0 = delta[0].astype('int')
-v0 = delta[1].astype('int')
-
-im_full[v0,u0,:]
-im_cropped[0,0,:]
-
-lrx = ul_cropped[0] + (dataset_cropped.RasterXSize * scale)
-lry = ul_cropped[1] + (dataset_cropped.RasterYSize * (-scale))
-
-lr_cropped = np.array([lrx, lry])
-
-delta = np.abs(ul_full - lr_cropped)/scale
-u1 = delta[0].astype('int')
-v1 = delta[1].astype('int')
-
-im_cropped2 = im_full[v0:v1,u0:u1,:]
-
-#%%
-crs_full  = dataset_full.GetGeoTransform()
-source = osr.SpatialReference()
-source.ImportFromWkt(dataset_full.GetProjection())
-
-target = osr.SpatialReference()
-target.ImportFromEPSG(4326)
-
-transform = osr.CoordinateTransformation(source, target)
-
-transform.TransformPoint(ulx, uly)
-
-#%%
-crs_cropped  = dataset_cropped.GetGeoTransform()
-ulx = crs_cropped[0]
-uly = crs_cropped[3]
-source = osr.SpatialReference()
-source.ImportFromWkt(dataset_cropped.GetProjection())
-
-target = osr.SpatialReference()
-target.ImportFromEPSG(4326)
-
-transform = osr.CoordinateTransformation(source, target)
-
-transform.TransformPoint(lrx, lry)
-
-
-
-#%%
-source = osr.SpatialReference()
-source.ImportFromEPSG(4326)
-
-target = osr.SpatialReference()
-target.ImportFromEPSG(32656)
-
-coords = transform.TransformPoint(151.2820816040039, -33.68206818063878)
-coords[0] - ulx
-coords[1] - uly
-#%%
-x_ul_full = ms_bands[0]['crs_transform'][2]
-y_ul_full = ms_bands[0]['crs_transform'][5]
-scale = ms_bands[0]['crs_transform'][0]
-
-x_ul_cropped = np.array([340756.105840223, 346357.851288875, 346474.839525944, 340877.362938763])
-y_ul_cropped = np.array([-3728229.45372866, -3728137.91775723, -3735421.58347927,  -3735513.20696522])
-
-dx = abs(x_ul_full - x_ul_cropped)
-dy = abs(y_ul_full - y_ul_cropped)
-
-u_coord = np.round(dx/scale).astype('int')
-v_coord = np.round(dy/scale).astype('int')
-
-im_cropped2 = im_full[np.min(v_coord):np.max(v_coord), np.min(u_coord):np.max(u_coord),:]
-
-plt.figure()
-plt.imshow(np.clip(im_cropped2[:,:,[2,1,0]] * 3, 0, 1), cmap='gray')
-plt.show()
-
-sum(sum(sum(np.equal(im_cropped,im_cropped2).astype('int')-1)))
-
--- a/get_coordinates.py
+++ b/get_coordinates.py
@ -1,96 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Mar 23 12:46:04 2018
-
-@author: z5030440
-"""
-
-# Preamble
-
-import ee
-import matplotlib.pyplot as plt
-import matplotlib.cm as cm
-import numpy as np
-import pandas as pd
-from datetime import datetime
-import pickle
-import pdb
-import pytz
-from pylab import ginput
-
-# image processing modules
-import skimage.filters as filters 
-import skimage.exposure as exposure
-import skimage.transform as transform
-import sklearn.decomposition as decomposition
-import skimage.morphology as morphology
-import skimage.measure as measure
-
-# my functions
-import functions.utils as utils
-import functions.sds as sds
-
-np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
-ee.Initialize()
-
-#%% Select images
-
-# parameters
-plot_bool = False # if you want the plots
-prob_high = 99.9 # upper probability to clip and rescale pixel intensity
-min_contour_points = 100 # minimum number of points contained in each water line
-output_epsg = 28356 # GDA94 / MGA Zone 56
-cloud_threshold = 0.8
-
-# select collection
-input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
-
-# location (Narrabeen-Collaroy beach)
-rect_narra = [[[151.3473129272461,-33.69035274454718],
-              [151.2820816040039,-33.68206818063878], 
-              [151.27281188964844,-33.74775138989556],
-              [151.3425064086914,-33.75231878701767],
-              [151.3473129272461,-33.69035274454718]]];
-               
-#rect_narra = [[[151.301454, -33.700754],
-#               [151.311453, -33.702075], 
-#               [151.307237, -33.739761],
-#               [151.294220, -33.736329],
-#               [151.301454, -33.700754]]];
-
-# Dates
-start_date = '2016-01-01'
-end_date = '2016-12-31'
-# filter by location
-flt_col = input_col.filterBounds(ee.Geometry.Polygon(rect_narra)).filterDate(start_date, end_date)
-
-n_img = flt_col.size().getInfo()
-print('Number of images covering Narrabeen:', n_img)
-im_all = flt_col.getInfo().get('features')
-
-# find each image in ee database
-im = ee.Image(im_all[0].get('id')) 
-# load image as np.array
-im_pan, im_ms, im_cloud, crs, meta = sds.read_eeimage(im, rect_narra, plot_bool)
-
-# rescale intensities
-im_ms = sds.rescale_image_intensity(im_ms, im_cloud, prob_high, plot_bool)
-im_pan = sds.rescale_image_intensity(im_pan, im_cloud, prob_high, plot_bool)
-
-# pansharpen rgb image
-im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, im_cloud, plot_bool)
-
-plt.figure()
-plt.imshow(im_ms_ps[:,:,[2,1,0]])
-plt.show()
-
-pts = ginput(15)
-points = np.array(pts)
-plt.plot(points[:,0], points[:,1], 'ko')
-plt.show()
-
-pts_coords = sds.convert_pix2world(points[:,[1,0]], crs['crs_15m'])
-pts = sds.convert_epsg(pts_coords, crs['epsg_code'], output_epsg)
-
-with open('data/narra_beach.pkl', 'wb') as f:
-    pickle.dump(pts, f)
--- a/plot_cloud_cover.py
+++ b/plot_cloud_cover.py
@ -1,119 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Mar  1 14:32:08 2018
-
-@author: z5030440
-
-Main code to extract shorelines from Landsat imagery
-"""
-
-# Preamble
-import ee
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-from datetime import datetime
-import pytz
-import pdb
-
-# image processing modules
-import skimage.filters as filters 
-import skimage.exposure as exposure
-import skimage.transform as transform
-import sklearn.decomposition as decomposition
-import skimage.morphology as morphology
-import skimage.measure as measure
-
-# my functions
-import functions.utils as utils
-import functions.sds as sds
-
-np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
-ee.Initialize()
-
-# parameters
-plot_bool = False # if you want the plots
-prob_high = 99.9 # upper probability to clip and rescale pixel intensity
-min_contour_points = 100 # minimum number of points contained in each water line
-
-# select collection
-input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_TOA')
-
-# location (Narrabeen-Collaroy beach)
-rect_narra = [[[151.3473129272461,-33.69035274454718],
-              [151.2820816040039,-33.68206818063878], 
-              [151.27281188964844,-33.74775138989556],
-              [151.3425064086914,-33.75231878701767],
-              [151.3473129272461,-33.69035274454718]]];
-# Dates
-start_date = '2016-01-01'
-end_date = '2016-12-31'
-# filter by location
-flt_col = input_col.filterBounds(ee.Geometry.Polygon(rect_narra))#.filterDate(start_date, end_date)
-
-n_img = flt_col.size().getInfo()
-print('Number of images covering Narrabeen:', n_img)
-im_all = flt_col.getInfo().get('features')
-
-props = {'cloud_cover_cropped':[],
-         'cloud_cover':[],
-         'cloud_cover_land':[],
-         'date_acquired':[],
-         'geom_rmse_model':[],
-         'geom_rmse_verify':[],
-         'gcp_model':[],
-         'gcp_verify':[],
-         'quality':[],
-         'sun_azimuth':[],
-         'sun_elevation':[]}
-t = []
-# loop through all images
-for i in range(n_img):
-    
-    # find each image in ee database
-    im = ee.Image(im_all[i].get('id'))
-    im_bands = im_all[i].get('bands')
-    im_props = im_all[i]['properties']
-    
-    # compute cloud cover on cropped image    
-    for j in range(len(im_bands)): del im_bands[j]['dimensions']
-    qa_band = [im_bands[11]]
-    im_qa, crs_qa = sds.load_image(im, rect_narra, qa_band)
-    im_qa = im_qa[:,:,0]
-    im_cloud = sds.create_cloud_mask(im_qa)
-    props['cloud_cover_cropped'].append(100*sum(sum(im_cloud.astype(int)))/(im_cloud.shape[0]*im_cloud.shape[1]))
-    
-    # extract image metadata
-    props['cloud_cover'].append(im_props['CLOUD_COVER'])
-    props['cloud_cover_land' ].append(im_props['CLOUD_COVER_LAND'])
-    props['date_acquired'].append(im_props['DATE_ACQUIRED'])
-    props['geom_rmse_model'].append(im_props['GEOMETRIC_RMSE_MODEL']) 
-    props['gcp_model'].append(im_props['GROUND_CONTROL_POINTS_MODEL'])
-    props['quality'].append(im_props['IMAGE_QUALITY_OLI'])
-    props['sun_azimuth'].append(im_props['SUN_AZIMUTH'])
-    props['sun_elevation'].append(im_props['SUN_ELEVATION'])
-    
-    # try structure as sometimes the geometry cannot be verified
-    try:
-        props['geom_rmse_verify'].append(im_props['GEOMETRIC_RMSE_VERIFY'])
-        props['gcp_verify'].append(im_props['GROUND_CONTROL_POINTS_VERIFY'])
-    except:
-        props['geom_rmse_verify'].append(np.nan)
-        props['gcp_verify'].append(np.nan) 
-        
-    # record exact time of acquisition
-    t.append(im_props['system:time_start'])
-    
-#%% create pd.DataFrame with datetime index
-dt = [];
-fmt = '%Y-%m-%d %H:%M:%S %Z%z'
-au_tz = pytz.timezone('Australia/Sydney')
-for k in range(len(t)): dt.append(datetime.fromtimestamp(t[k]/1000, tz=au_tz))  
-
-df = pd.DataFrame(data = props, index=dt , columns=list(props.keys()))
-
-df.to_pickle('meta_l8.pkl')
-
-#df['cloud_cover_cropped'].groupby(df.index.month).count().plot.bar()
-
-#df_monthly = df['cloud_cover_cropped'].groupby(df.index.month)
--- a/py3_environment.txt
+++ b/py3_environment.txt
@ -0,0 +1,177 @@
+# This file may be used to create an environment using:
+# $ conda create --name <env> --file <this file>
+# platform: win-64
+@EXPLICIT
+https://repo.continuum.io/pkgs/main/win-64/alabaster-0.7.10-py36hcd07829_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/asn1crypto-0.24.0-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/astroid-1.6.1-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/babel-2.5.3-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/backports-1.0-py36h81696a8_1.tar.bz2
+https://repo.continuum.io/pkgs/free/win-64/backports.weakref-1.0rc1-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/free/win-64/bleach-1.5.0-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/bokeh-0.12.14-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/ca-certificates-2017.08.26-h94faf87_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/certifi-2018.1.18-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/cffi-1.11.4-py36hfa6e2cd_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/chardet-3.0.4-py36h420ce6e_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/click-6.7-py36hec8c647_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/cloudpickle-0.5.2-py36_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/colorama-0.3.9-py36h029ae33_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/cryptography-2.1.4-py36he1d7878_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/curl-7.58.0-h7602738_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/cycler-0.10.0-py36h009560c_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/dask-0.17.0-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/dask-core-0.17.0-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/decorator-4.2.1-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/distributed-1.21.0-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/docutils-0.14-py36h6012d8f_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/entrypoints-0.2.3-py36hfd66bb0_2.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/expat-2.2.5-hcc4222d_0.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/freetype-2.8.1-vc14_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/freexl-1.0.4-h342dbcb_5.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/geos-3.6.2-h9ef7328_2.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/hdf4-4.2.13-h712560f_2.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/hdf5-1.10.1-h98b8871_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/heapdict-1.0.0-py36_2.tar.bz2
+https://repo.continuum.io/pkgs/free/win-64/html5lib-0.9999999-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/icc_rt-2017.0.4-h97af966_0.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/icu-58.2-vc14_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/idna-2.6-py36h148d497_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/imageio-2.3.0-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/imagesize-0.7.1-py36he29f638_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/intel-openmp-2018.0.0-hd92c6cd_8.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/ipykernel-4.8.0-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/ipython-6.2.1-py36h9cf0123_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/ipython_genutils-0.2.0-py36h3c5d0ee_0.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/ipywidgets-7.1.1-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/isort-4.2.15-py36h6198cc5_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/jedi-0.11.1-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/jinja2-2.10-py36h292fed1_0.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/jpeg-9b-vc14_2.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/jsonschema-2.6.0-py36h7636477_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/jupyter-1.0.0-py36_4.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/jupyter_client-5.2.2-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/jupyter_console-5.2.0-py36h6d89b47_1.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/jupyter_contrib_core-0.3.3-py36_1.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/jupyter_contrib_nbextensions-0.4.0-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/jupyter_core-4.4.0-py36h56e9d50_0.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/jupyter_highlight_selected_word-0.1.0-py36_0.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/jupyter_latex_envs-1.3.8.2-py36_1.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/jupyter_nbextensions_configurator-0.4.0-py36_0.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/jupyterlab-0.31.5-py36_1.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/jupyterlab_launcher-0.10.3-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/kealib-1.4.7-ha5b336b_5.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/krb5-1.14.2-h63dfc2a_6.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/lazy-object-proxy-1.3.1-py36hd1c21d2_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/libboost-1.65.1-he51fdeb_4.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/libcurl-7.58.0-h7602738_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/libgdal-2.2.2-h2727f2b_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/libiconv-1.15-h1df5818_7.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/libkml-1.3.0-hc65d273_3.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/libnetcdf-4.4.1.1-h825a56a_8.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.34-vc14_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/libpq-9.6.6-hfe3f2bf_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/libprotobuf-3.4.1-h3dba5dd_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/libspatialite-4.3.0a-h383548d_18.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/libssh2-1.8.0-hd619d38_4.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/libtiff-4.0.9-vc14_0.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/libxml2-2.9.5-vc14_1.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/libxslt-1.1.32-vc14_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/locket-0.2.0-py36hfed976d_1.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/lxml-4.1.1-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/free/win-64/markdown-2.6.9-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/markupsafe-1.0-py36h0e26971_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/matplotlib-2.1.2-py36h016c42a_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/mccabe-0.6.1-py36hb41005a_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/mistune-0.8.3-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/mkl-2018.0.1-h2108138_4.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/msgpack-python-0.5.1-py36he980bc4_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/nbconvert-5.3.1-py36h8dc0fde_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/nbformat-4.4.0-py36h3a5bc1b_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/networkx-2.1-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/nodejs-8.9.3-hd6b2f15_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/notebook-5.4.0-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/numpy-1.14.1-py36hb69e940_2.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/numpydoc-0.7.0-py36ha25429e_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/olefile-0.45.1-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/openjpeg-2.2.0-h29c51c3_2.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/openssl-1.0.2n-vc14_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/packaging-16.8-py36ha0986f6_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pandas-0.22.0-py36h6538335_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pandoc-1.19.2.1-hb2460c7_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pandocfilters-1.4.2-py36h3ef6317_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/parso-0.1.1-py36hae3edee_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/partd-0.3.8-py36hc8e763b_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/patsy-0.5.0-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pickleshare-0.7.4-py36h9de030f_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pillow-5.0.0-py36h0738816_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pip-9.0.1-py36h226ae91_4.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/proj4-4.9.3-hcf24537_7.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/prompt_toolkit-1.0.15-py36h60b8f86_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/protobuf-3.4.1-py36h07fa351_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/psutil-5.4.3-py36hfa6e2cd_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pycodestyle-2.3.1-py36h7cc55cd_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pycparser-2.18-py36hd053e01_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pyflakes-1.6.0-py36h0b975d6_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pygments-2.2.0-py36hb010967_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pylint-1.8.2-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pyopenssl-17.5.0-py36h5b7d817_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pyparsing-2.2.0-py36h785a196_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pyqt-5.6.0-py36hb5ed885_5.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pysocks-1.6.7-py36h698d350_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/python-3.6.4-h6538335_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/python-dateutil-2.6.1-py36h509ddcb_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pytz-2018.3-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pywavelets-0.5.2-py36hc649158_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pywinpty-0.5-py36h6538335_1.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/pyyaml-3.12-py36_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/pyzmq-16.0.3-py36he714bf5_0.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/qt-5.6.2-vc14_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/qtawesome-0.4.4-py36h5aa48f6_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/qtconsole-4.3.1-py36h99a29a9_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/qtpy-1.3.1-py36hb8717c5_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/requests-2.18.4-py36h4371aae_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/rope-0.10.7-py36had63a69_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/scikit-image-0.13.1-py36hfa6e2cd_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/scikit-learn-0.19.1-py36h53aea1b_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/scipy-1.0.0-py36h1260518_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/send2trash-1.4.2-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/setuptools-38.4.0-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/shapely-1.6.4-py36h2a969d5_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/simplegeneric-0.8.1-py36_2.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/sip-4.18.1-py36h9c25514_2.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/six-1.11.0-py36h4db2310_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/snowballstemmer-1.2.1-py36h763602f_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/sortedcontainers-1.5.9-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/sphinx-1.6.6-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/sphinxcontrib-1.0-py36hbbac3d2_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/sphinxcontrib-websupport-1.0.1-py36hb5e5916_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/spyder-3.2.8-py36_0.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/sqlite-3.20.1-vc14_2.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/statsmodels-0.8.0-py36h6189b4c_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/tblib-1.3.2-py36h30f5020_0.tar.bz2
+https://repo.continuum.io/pkgs/free/win-64/tensorflow-1.2.1-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/terminado-0.8.1-py36_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/testpath-0.3.1-py36h2698cfe_0.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/tk-8.6.7-vc14_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/toolz-0.9.0-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/tornado-4.5.3-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/traitlets-4.3.2-py36h096827d_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/typing-3.6.2-py36hb035bda_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/urllib3-1.22-py36h276f60a_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/vc-14-h0510ff6_3.tar.bz2
+https://repo.continuum.io/pkgs/free/win-64/vs2015_runtime-14.0.25420-0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/wcwidth-0.1.7-py36h3d5aa90_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/webencodings-0.5.1-py36h67c50ae_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/werkzeug-0.14.1-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/wheel-0.30.0-py36h6c3ec14_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/widgetsnbextension-3.1.0-py36_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/win_inet_pton-1.0.1-py36he67d7fd_1.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/wincertstore-0.2-py36h7fe50ca_0.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/winpty-0.4.3-vc14_2.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/wrapt-1.10.11-py36he5f5981_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/xerces-c-3.2.0-h44c76bb_2.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/xz-5.2.3-h7c615d8_2.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/yaml-0.1.7-vc14_0.tar.bz2
+https://repo.continuum.io/pkgs/main/win-64/zict-0.1.3-py36h2d8e73e_0.tar.bz2
+https://conda.anaconda.org/conda-forge/win-64/zlib-1.2.11-vc14_0.tar.bz2
--- a/read_images.py
+++ b/read_images.py
@ -0,0 +1,589 @@
+#==========================================================#
+#==========================================================#
+# Extract shorelines from Landsat images
+#==========================================================#
+#==========================================================#
+
+
+#==========================================================#
+# Initial settings
+#==========================================================#
+
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+import ee
+import pdb
+
+# other modules
+from osgeo import gdal, ogr, osr
+import pickle
+import matplotlib.cm as cm
+from pylab import ginput
+from shapely.geometry import LineString
+
+# image processing modules
+import skimage.filters as filters 
+import skimage.exposure as exposure
+import skimage.transform as transform
+import sklearn.decomposition as decomposition
+import skimage.measure as measure
+import skimage.morphology as morphology
+
+# machine learning modules
+from sklearn.model_selection import train_test_split
+from sklearn.neural_network import MLPClassifier
+from sklearn.preprocessing import StandardScaler, Normalizer 
+from sklearn.externals import joblib
+
+# import own modules
+import functions.utils as utils
+import functions.sds as sds
+
+# some other settings
+np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
+plt.rcParams['axes.grid'] = True
+plt.rcParams['figure.max_open_warning'] = 100
+ee.Initialize()
+
+#==========================================================#
+# Parameters
+#==========================================================#
+
+sitename = 'NARRA'
+
+cloud_thresh = 0.7      # threshold for cloud cover
+plot_bool = False       # if you want the plots
+output_epsg = 28356     # GDA94 / MGA Zone 56
+buffer_size = 7         # radius (in pixels) of disk for buffer (pixel classification)
+min_beach_size = 20     # number of pixels in a beach (pixel classification)
+dist_ref = 100          # maximum distance from reference point
+min_length_wl = 200     # minimum length of shoreline LineString to be kept 
+manual_bool = True      # to manually check images
+
+
+output = dict([])
+
+#==========================================================#
+# Metadata
+#==========================================================#
+
+filepath = os.path.join(os.getcwd(), 'data', sitename)
+with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'rb') as f:
+    metadata = pickle.load(f)
+
+
+#%%    
+#==========================================================#
+# Read S2 images
+#==========================================================#
+
+satname = 'S2'
+dates = metadata[satname]['dates']
+input_epsg = 32756 # metadata[satname]['epsg']
+
+# path to images
+filepath10 = os.path.join(os.getcwd(), 'data', sitename, satname, '10m')
+filenames10 = os.listdir(filepath10)
+filepath20 = os.path.join(os.getcwd(), 'data', sitename, satname, '20m')
+filenames20 = os.listdir(filepath20)
+filepath60 = os.path.join(os.getcwd(), 'data', sitename, satname, '60m')
+filenames60 = os.listdir(filepath60)
+if (not len(filenames10) == len(filenames20)) or (not len(filenames20) == len(filenames60)):
+    raise 'error: not the same amount of files for 10, 20 and 60 m'
+N = len(filenames10)
+
+# initialise variables
+cloud_cover_ts = []
+acc_georef_ts = []
+date_acquired_ts = []
+filename_ts = []
+satname_ts = []
+timestamp = []
+shorelines = []
+idx_skipped = []
+
+spacing = '=========================================================='
+msg = ' %s\n %s\n %s' % (spacing, satname, spacing)
+print(msg)
+
+for i in range(N):
+    
+    # read 10m bands
+    fn = os.path.join(filepath10, filenames10[i])
+    data = gdal.Open(fn, gdal.GA_ReadOnly)
+    georef = np.array(data.GetGeoTransform())
+    bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
+    im10 = np.stack(bands, 2)
+    im10 = im10/10000 # TOA scaled to 10000
+    
+    # if image is only zeros, skip it
+    if sum(sum(sum(im10))) < 1:
+        print('skip ' + str(i) + ' - no data')
+        idx_skipped.append(i)
+        continue        
+    
+    nrows = im10.shape[0]
+    ncols = im10.shape[1]
+    
+    # read 20m band (SWIR1)
+    fn = os.path.join(filepath20, filenames20[i])
+    data = gdal.Open(fn, gdal.GA_ReadOnly)
+    bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
+    im20 = np.stack(bands, 2)
+    im20 = im20[:,:,0]
+    im20 = im20/10000 # TOA scaled to 10000
+    im_swir = transform.resize(im20, (nrows, ncols), order=1, preserve_range=True, mode='constant')
+    im_swir = np.expand_dims(im_swir, axis=2)
+    
+    # append down-sampled swir band to the 10m bands
+    im_ms = np.append(im10, im_swir, axis=2)
+    
+    # read 60m band (QA)
+    fn = os.path.join(filepath60, filenames60[i])
+    data = gdal.Open(fn, gdal.GA_ReadOnly)
+    bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
+    im60 = np.stack(bands, 2)
+    im_qa = im60[:,:,0]
+    cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool)
+    cloud_mask = transform.resize(cloud_mask,(nrows, ncols), order=0, preserve_range=True, mode='constant')
+    # check if -inf or nan values on any band and add to cloud mask
+    for k in range(im_ms.shape[2]):   
+            im_inf = np.isin(im_ms[:,:,k], -np.inf)
+            im_nan = np.isnan(im_ms[:,:,k])
+            cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
+            
+    # calculate cloud cover and if above threshold, skip it
+    cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
+    if cloud_cover > cloud_thresh:
+        print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)')
+        idx_skipped.append(i)
+        continue
+    
+    # rescale image intensity for display purposes
+    im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False)
+    
+    # classify image in 4 classes (sand, whitewater, water, other) with NN classifier
+    im_classif, im_labels = sds.classify_image_NN_nopan(im_ms, cloud_mask, min_beach_size, plot_bool)
+    
+    # if there aren't any sandy pixels
+    if sum(sum(im_labels[:,:,0])) == 0 :
+        # use global threshold
+        im_ndwi = sds.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask, plot_bool)
+        contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool)
+    else:
+        # use specific threhsold
+        contours_wi, contours_mwi = sds.find_wl_contours2(im_ms, im_labels, cloud_mask, buffer_size, plot_bool)
+
+    # convert from pixels to world coordinates
+    wl_coords = sds.convert_pix2world(contours_mwi, georef)
+    # convert to output epsg spatial reference
+    wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg)
+    
+    # remove contour lines that have a perimeter < min_length_wl
+    wl_good = []
+    for l, wls in enumerate(wl):
+        coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))]
+        a = LineString(coords) # shapely LineString structure
+        if a.length >= min_length_wl:
+            wl_good.append(wls)
+
+    # format points and only select the ones close to the refpoints
+    x_points = np.array([])
+    y_points = np.array([])
+    for k in range(len(wl_good)):
+        x_points = np.append(x_points,wl_good[k][:,0])
+        y_points = np.append(y_points,wl_good[k][:,1])
+    wl_good = np.transpose(np.array([x_points,y_points]))
+    temp = np.zeros((len(wl_good))).astype(bool)
+    for k in range(len(refpoints)): 
+        temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp) 
+    wl_final = wl_good[temp]
+    
+
+    # plot output
+    plt.figure()
+    im = np.copy(im_display)
+    colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
+    for k in range(0,im_labels.shape[2]):
+        im[im_labels[:,:,k],0] = colours[k,0]
+        im[im_labels[:,:,k],1] = colours[k,1]
+        im[im_labels[:,:,k],2] = colours[k,2]
+    plt.imshow(im)
+    for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--')
+    plt.title(satname + '     ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + '      acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' )
+    plt.draw()
+    
+    pt_in = np.array(ginput(n=1, timeout=1000))
+    plt.close()
+    
+    # if image is rejected, skip it
+    if pt_in[0][1] > nrows/2:
+        print('skip ' + str(i) + ' - rejected')
+        idx_skipped.append(i)
+        continue
+        
+    # if accepted, store the data
+    cloud_cover_ts.append(cloud_cover)
+    acc_georef_ts.append(metadata[satname]['acc_georef'][i])
+    
+    filename_ts.append(filenames10[i])
+    satname_ts.append(satname)
+    date_acquired_ts.append(filenames10[i][:10])
+
+    timestamp.append(metadata[satname]['dates'][i])
+    shorelines.append(wl_final)
+
+# store in output structure
+output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped,
+      'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts,
+                  'acc_georef':acc_georef_ts}}
+del idx_skipped
+
+
+#%%
+#==========================================================#
+# Read L7&L8 images
+#==========================================================#
+
+satname = 'L8'
+dates = metadata[satname]['dates']
+input_epsg = 32656 # metadata[satname]['epsg']
+
+# path to images
+filepath_pan = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8', 'pan')
+filepath_ms = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8', 'ms')
+filenames_pan = os.listdir(filepath_pan)
+filenames_ms = os.listdir(filepath_ms)
+if (not len(filenames_pan) == len(filenames_ms)):
+    raise 'error: not the same amount of files for pan and ms'
+N = len(filenames_pan)
+
+# initialise variables
+cloud_cover_ts = []
+acc_georef_ts = []
+date_acquired_ts = []
+filename_ts = []
+satname_ts = []
+timestamp = []
+shorelines = []
+idx_skipped = []
+
+ 
+spacing = '=========================================================='
+msg = ' %s\n %s\n %s' % (spacing, satname, spacing)
+print(msg)
+
+for i in range(N):
+    
+    # get satellite name
+    sat = filenames_pan[i][20:22]
+    
+    # read pan image
+    fn_pan = os.path.join(filepath_pan, filenames_pan[i])
+    data = gdal.Open(fn_pan, gdal.GA_ReadOnly)
+    georef = np.array(data.GetGeoTransform())
+    bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
+    im_pan = np.stack(bands, 2)[:,:,0]
+    nrows = im_pan.shape[0]
+    ncols = im_pan.shape[1]
+
+    # read ms image
+    fn_ms = os.path.join(filepath_ms, filenames_ms[i])
+    data = gdal.Open(fn_ms, gdal.GA_ReadOnly)
+    bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
+    im_ms = np.stack(bands, 2)
+    
+    # cloud mask
+    im_qa = im_ms[:,:,5]
+    cloud_mask = sds.create_cloud_mask(im_qa, sat, plot_bool)
+    cloud_mask = transform.resize(cloud_mask, (nrows, ncols), order=0, preserve_range=True, mode='constant').astype('bool_')    
+    # resize the image using bilinear interpolation (order 1)
+    im_ms = im_ms[:,:,:5]
+    im_ms = transform.resize(im_ms,(nrows, ncols), order=1, preserve_range=True, mode='constant')
+    
+    # check if -inf or nan values on any band and add to cloud mask
+    for k in range(im_ms.shape[2]+1): 
+        if k == 5:
+            im_inf = np.isin(im_pan, -np.inf)
+            im_nan = np.isnan(im_pan)        
+        else:  
+            im_inf = np.isin(im_ms[:,:,k], -np.inf)
+            im_nan = np.isnan(im_ms[:,:,k])
+        cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
+
+    # calculate cloud cover and skip image if above threshold
+    cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
+    if cloud_cover > cloud_thresh:
+        print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)')
+        idx_skipped.append(i)
+        continue
+    
+    # Pansharpen image (different for L8 and L7)
+    if sat == 'L7':
+        # pansharpen (Green, Red, NIR) and downsample Blue and SWIR1
+        im_ms_ps = sds.pansharpen(im_ms[:,:,[1,2,3]], im_pan, cloud_mask, plot_bool)
+        im_ms_ps = np.append(im_ms[:,:,[0]], im_ms_ps, axis=2)
+        im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[4]], axis=2)
+        im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False)
+    elif sat == 'L8':
+        # pansharpen RGB image and downsample NIR and SWIR1
+        im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, plot_bool)
+        im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) 
+        im_display = sds.rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
+
+    # classify image in 4 classes (sand, whitewater, water, other) with NN classifier
+    im_classif, im_labels = sds.classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size, plot_bool)
+    
+    # if there aren't any sandy pixels
+    if sum(sum(im_labels[:,:,0])) == 0 :
+        # use global threshold
+        im_ndwi = sds.nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, plot_bool)
+        contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool)
+    else:
+        # use specific threhsold
+        contours_wi, contours_mwi = sds.find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size, plot_bool)
+    
+    # convert from pixels to world coordinates
+    wl_coords = sds.convert_pix2world(contours_mwi, georef)
+    # convert to output epsg spatial reference
+    wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg)
+    
+    # remove contour lines that have a perimeter < min_length_wl
+    wl_good = []
+    for l, wls in enumerate(wl):
+        coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))]
+        a = LineString(coords) # shapely LineString structure
+        if a.length >= min_length_wl:
+            wl_good.append(wls)   
+    
+    # format points and only select the ones close to the refpoints
+    x_points = np.array([])
+    y_points = np.array([])
+    for k in range(len(wl_good)):
+        x_points = np.append(x_points,wl_good[k][:,0])
+        y_points = np.append(y_points,wl_good[k][:,1])
+    wl_good = np.transpose(np.array([x_points,y_points]))
+    temp = np.zeros((len(wl_good))).astype(bool)
+    for k in range(len(refpoints)): 
+        temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp) 
+    wl_final = wl_good[temp]   
+        
+    # plot output
+    plt.figure()
+    plt.subplot(121)
+    im = np.copy(im_display)
+    colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
+    for k in range(0,im_labels.shape[2]):
+        im[im_labels[:,:,k],0] = colours[k,0]
+        im[im_labels[:,:,k],1] = colours[k,1]
+        im[im_labels[:,:,k],2] = colours[k,2]
+    plt.imshow(im)
+    for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--')
+    plt.title(sat + '     ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + '      acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' )
+
+    pt_in = np.array(ginput(n=1, timeout=1000))
+    plt.close()
+    
+    # if image is rejected, skip it
+    if pt_in[0][1] > nrows/2:
+        print('skip ' + str(i) + ' - rejected')
+        idx_skipped.append(i)
+        continue
+        
+    # if accepted, store the data
+    cloud_cover_ts.append(cloud_cover)
+    acc_georef_ts.append(metadata[satname]['acc_georef'][i])
+    
+    filename_ts.append(filenames_pan[i])
+    satname_ts.append(sat)
+    date_acquired_ts.append(filenames_pan[i][:10])
+
+    timestamp.append(metadata[satname]['dates'][i])
+    shorelines.append(wl_final)
+
+# store in output structure
+output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped,
+      'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts,
+                  'acc_georef':acc_georef_ts}}    
+
+del idx_skipped
+
+
+
+#%%
+#==========================================================#
+# Read L5 images
+#==========================================================#
+
+satname = 'L5'
+dates = metadata[satname]['dates']
+input_epsg = 32656 # metadata[satname]['epsg']
+
+# path to images
+filepath_img = os.path.join(os.getcwd(), 'data', sitename, satname, '30m')
+filenames = os.listdir(filepath_img)
+N = len(filenames)
+
+# initialise variables
+cloud_cover_ts = []
+acc_georef_ts = []
+date_acquired_ts = []
+filename_ts = []
+satname_ts = []
+timestamp = []
+shorelines = []
+idx_skipped = []
+
+
+spacing = '=========================================================='
+msg = ' %s\n %s\n %s' % (spacing, satname, spacing)
+print(msg)
+
+for i in range(N):
+    
+    # read ms image
+    fn = os.path.join(filepath_img, filenames[i])
+    data = gdal.Open(fn, gdal.GA_ReadOnly)
+    georef = np.array(data.GetGeoTransform())
+    bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
+    im_ms = np.stack(bands, 2)
+
+    # down-sample to half hte original pixel size
+    nrows = im_ms.shape[0]*2
+    ncols = im_ms.shape[1]*2
+    
+    # cloud mask
+    im_qa = im_ms[:,:,5]
+    im_ms = im_ms[:,:,:-1]
+    cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool)
+    cloud_mask = transform.resize(cloud_mask, (nrows, ncols), order=0, preserve_range=True, mode='constant').astype('bool_')
+    
+    # resize the image using bilinear interpolation (order 1)
+    im_ms = transform.resize(im_ms,(nrows, ncols), order=1, preserve_range=True, mode='constant')
+    
+    # adjust georef vector (scale becomes 15m and origin is adjusted to the center of new corner pixel)
+    georef[1] = 15
+    georef[5] = -15
+    georef[0] = georef[0] + 7.5
+    georef[3] = georef[3] - 7.5
+    
+    # check if -inf or nan values on any band and add to cloud mask
+    for k in range(im_ms.shape[2]):   
+        im_inf = np.isin(im_ms[:,:,k], -np.inf)
+        im_nan = np.isnan(im_ms[:,:,k])
+        cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
+        
+    # calculate cloud cover and skip image if above threshold
+    cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
+    if cloud_cover > cloud_thresh:
+        print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)')
+        idx_skipped.append(i)
+        continue
+    
+    # rescale image intensity for display purposes
+    im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False)
+    
+    # classify image in 4 classes (sand, whitewater, water, other) with NN classifier
+    im_classif, im_labels = sds.classify_image_NN_nopan(im_ms, cloud_mask, min_beach_size, plot_bool)
+        
+    # if there aren't any sandy pixels
+    if sum(sum(im_labels[:,:,0])) == 0 :
+        # use global threshold
+        im_ndwi = sds.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask, plot_bool)
+        contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool)
+    else:
+        # use specific threhsold
+        contours_wi, contours_mwi = sds.find_wl_contours2(im_ms, im_labels, cloud_mask, buffer_size, plot_bool)
+
+    # convert from pixels to world coordinates
+    wl_coords = sds.convert_pix2world(contours_mwi, georef)
+    # convert to output epsg spatial reference
+    wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg)
+    
+    # remove contour lines that have a perimeter < min_length_wl
+    wl_good = []
+    for l, wls in enumerate(wl):
+        coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))]
+        a = LineString(coords) # shapely LineString structure
+        if a.length >= min_length_wl:
+            wl_good.append(wls)
+
+    # format points and only select the ones close to the refpoints
+    x_points = np.array([])
+    y_points = np.array([])
+    for k in range(len(wl_good)):
+        x_points = np.append(x_points,wl_good[k][:,0])
+        y_points = np.append(y_points,wl_good[k][:,1])
+    wl_good = np.transpose(np.array([x_points,y_points]))
+    temp = np.zeros((len(wl_good))).astype(bool)
+    for k in range(len(refpoints)): 
+        temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp) 
+    wl_final = wl_good[temp]
+    
+    # plot output
+    plt.figure()
+    plt.subplot(121)
+    im = np.copy(im_display)
+    colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
+    for k in range(0,im_labels.shape[2]):
+        im[im_labels[:,:,k],0] = colours[k,0]
+        im[im_labels[:,:,k],1] = colours[k,1]
+        im[im_labels[:,:,k],2] = colours[k,2]
+    plt.imshow(im)
+    for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--')
+    plt.title(satname + '     ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + '      acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' )
+    plt.subplot(122)
+    plt.axis('equal')
+    plt.axis('off')
+    plt.plot(refpoints[:,0], refpoints[:,1], 'k.')
+    plt.plot(wl_final[:,0], wl_final[:,1], 'r.')
+    mng = plt.get_current_fig_manager()                                         
+    mng.window.showMaximized()
+    plt.tight_layout()
+    plt.draw()
+
+    pt_in = np.array(ginput(n=1, timeout=1000))
+    plt.close()
+    
+    # if image is rejected, skip it
+    if pt_in[0][1] > nrows/2:
+        print('skip ' + str(i) + ' - rejected')
+        idx_skipped.append(i)
+        continue
+        
+    # if accepted, store the data
+    cloud_cover_ts.append(cloud_cover)
+    acc_georef_ts.append(metadata[satname]['acc_georef'][i])
+    
+    filename_ts.append(filenames[i])
+    satname_ts.append(satname)
+    date_acquired_ts.append(filenames[i][:10])
+
+    timestamp.append(metadata[satname]['dates'][i])
+    shorelines.append(wl_final)
+
+# store in output structure
+output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped,
+      'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts,
+                  'acc_georef':acc_georef_ts}}    
+
+del idx_skipped
+
+#==========================================================#
+#==========================================================#
+#==========================================================#
+#==========================================================#
+
+#%%
+# save output
+with open(os.path.join(filepath, sitename + '_output' + '.pkl'), 'wb') as f:
+    pickle.dump(output, f)
+    
+# save idx_skipped
+#idx_skipped = dict([])
+#for satname in list(output.keys()):
+#    idx_skipped[satname] = output[satname]['idx_skipped']
+#with open(os.path.join(filepath, sitename + '_idxskipped' + '.pkl'), 'wb') as f:
+#    pickle.dump(idx_skipped, f)
+    
--- a/sds_extract.py
+++ b/sds_extract.py
@ -1,13 +1,10 @@
 # -*- coding: utf-8 -*-
-"""
-Created on Thu Mar  1 14:32:08 2018

-@author: z5030440
-
-Main code to extract shorelines from Landsat imagery
-"""
-# Preamble
+#==========================================================#
+# Extract shorelines from Landsat images
+#==========================================================#

+# Initial settings
 import ee
 import matplotlib.pyplot as plt
 import matplotlib.cm as cm
@ -27,90 +24,72 @@ import sklearn.decomposition as decomposition
 import skimage.morphology as morphology
 import skimage.measure as measure

-# my functions
+# my modules
 import functions.utils as utils
 import functions.sds as sds

+# some settings
 np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
+plt.rcParams['axes.grid'] = False
+plt.rcParams['figure.max_open_warning'] = 100
 ee.Initialize()

-#%% Select images
-
 # parameters
-plot_bool = False # if you want the plots
-prob_high = 99.9 # upper probability to clip and rescale pixel intensity
+cloud_thresh = 0.5      # threshold for cloud cover
+plot_bool = True      # if you want the plots
 min_contour_points = 100# minimum number of points contained in each water line
 output_epsg = 28356     # GDA94 / MGA Zone 56
-cloud_threshold = 0.8
+buffer_size = 10        # radius of disk for buffer (sand classif parameter)
+min_beach_size = 50     # number of pixels in a beach (sand classif parameter)

 # select collection
-input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
+satname = 'L8'
+input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA') # Landsat 8 Tier 1 TOA

 # location (Narrabeen-Collaroy beach)
-rect_narra = [[[151.3473129272461,-33.69035274454718],
+polygon = [[[151.3473129272461,-33.69035274454718],
            [151.2820816040039,-33.68206818063878], 
            [151.27281188964844,-33.74775138989556],
            [151.3425064086914,-33.75231878701767],
            [151.3473129272461,-33.69035274454718]]];

-with open('data/narra_beach.pkl', 'rb') as f:
-    pts_beach = pickle.load(f)
-               
-#rect_narra = [[[151.301454, -33.700754],
-#               [151.311453, -33.702075], 
-#               [151.307237, -33.739761],
-#               [151.294220, -33.736329],
-#               [151.301454, -33.700754]]];
+# dates
+start_date = '2013-01-01'
+end_date = '2018-12-31'

-# Dates
-start_date = '2016-01-01'
-end_date = '2016-12-31'
-# filter by location
-flt_col = input_col.filterBounds(ee.Geometry.Polygon(rect_narra)).filterDate(start_date, end_date)
+# filter by location and date
+flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(start_date, end_date)

 n_img = flt_col.size().getInfo()
-print('Number of images covering Narrabeen:', n_img)
+print('Number of images covering the polygon:', n_img)
 im_all = flt_col.getInfo().get('features')

-#%% Extract shorelines
-metadata = {'timestamp':[],
-            'date_acquired':[],
-            'cloud_cover':[],
-            'geom_rmse_model':[],
-            'gcp_model':[],
-            'quality':[],
-            'sun_azimuth':[],
-            'sun_elevation':[]}
-skipped_images = np.zeros((n_img,1)).astype(bool)
-output_wl = []
-# loop through all images
-for i in range(n_img):
-    
-    # find each image in ee database
+i = 0 # first image
+    
+# find image in ee database
 im = ee.Image(im_all[i].get('id')) 
-    # load image as np.array
-    im_pan, im_ms, im_cloud, crs, meta = sds.read_eeimage(im, rect_narra, plot_bool)

-    # if clouds -> skip the image
-    if sum(sum(im_cloud.astype(int)))/(im_cloud.shape[0]*im_cloud.shape[1]) > cloud_threshold:
-        skipped_images[i] = True
-        continue
+# load image as np.array
+im_pan, im_ms, cloud_mask, crs, meta = sds.read_eeimage(im, polygon, satname, plot_bool)

-    # rescale intensities
-    im_ms = sds.rescale_image_intensity(im_ms, im_cloud, prob_high, plot_bool)
-    im_pan = sds.rescale_image_intensity(im_pan, im_cloud, prob_high, plot_bool)
+# mask -inf or nan values on the image and add to cloud_mask
+im_inf = np.isin(im_ms[:,:,0], -np.inf)
+im_nan = np.isnan(im_ms[:,:,0])
+cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
+cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
+print('Cloud cover : ' + str(int(round(100*cloud_cover))) + ' %')

 # pansharpen rgb image
-    im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, im_cloud, plot_bool)
+im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, plot_bool)

 # add down-sized bands for NIR and SWIR (since pansharpening is not possible)
 im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) 

 # calculate NDWI
-    im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], im_cloud, plot_bool)
+im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, plot_bool)

 # edge detection
-    wl_pix = sds.find_wl_contours(im_ndwi, im_cloud, min_contour_points, plot_bool)
+wl_pix = sds.find_wl_contours(im_ndwi, cloud_mask, min_contour_points, plot_bool)

 plt.figure()
 plt.imshow(im_ms_ps[:,:,[2,1,0]])
@ -121,112 +100,12 @@ for i in range(n_img):

 # convert from pixels to world coordinates
 wl_coords = sds.convert_pix2world(wl_pix, crs['crs_15m'])
+
 # convert to output epsg spatial reference
 wl = sds.convert_epsg(wl_coords, crs['epsg_code'], output_epsg)

-    # find contour closest to narrabeen beach
-    sum_dist = np.zeros(len(wl))
-    for k,contour in enumerate(wl):
-        min_dist = np.zeros(len(pts_beach))
-        for j,pt in enumerate(pts_beach):
-            min_dist[j] = np.min(np.linalg.norm(contour - pt, axis=1))
-        sum_dist[k] = np.sum(min_dist)/len(min_dist)
-    try:
-        wl_beach = wl[np.argmin(sum_dist)]
-#        plt.figure()
-#        plt.axis('equal')
-#        plt.plot(pts_beach[:,0], pts_beach[:,1], 'ko')
-#        plt.plot(wl_beach[:,0], wl_beach[:,1], 'r')
-#        plt.show()
-    except:
-        wl_beach = []
-     
-    # plot for QA
-    plt.figure()
-    plt.imshow(im_ms_ps[:,:,[2,1,0]])
-    for k,contour in enumerate(wl_pix): plt.plot(contour[:, 1], contour[:, 0], linewidth=2)
-    if len(wl_beach) > 0:
-        plt.plot(wl_pix[np.argmin(sum_dist)][:,1], wl_pix[np.argmin(sum_dist)][:,0], linewidth=3, color='w')
-    plt.axis('image')
-    plt.title('im ' + str(i) + ' : ' + datetime.strftime(datetime
-                                    .fromtimestamp(meta['timestamp']/1000, tz=pytz.utc)
-                                    .astimezone(pytz.timezone('Australia/Sydney')), '%Y-%m-%d %H:%M:%S %Z%z'))
-    plt.show()
-    
-    # store metadata of each image in dict
-    metadata['timestamp'].append(meta['timestamp'])
-    metadata['date_acquired'].append(meta['date_acquired'])
-    metadata['cloud_cover'].append(sum(sum(im_cloud.astype(int)))/(im_cloud.shape[0]*im_cloud.shape[1]))
-    metadata['geom_rmse_model'].append(meta['geom_rmse_model'])
-    metadata['gcp_model'].append(meta['gcp_model'])
-    metadata['quality'].append(meta['quality'])
-    metadata['sun_azimuth'].append(meta['sun_azimuth'])
-    metadata['sun_elevation'].append(meta['sun_elevation'])
-    # store water lines
-    output_wl.append(wl_beach)
-    
-    print(i)
-   
-# generate datetimes
-#fmt = '%Y-%m-%d %H:%M:%S %Z%z'
-#au_tz = pytz.timezone('Australia/Sydney')
-dt = [];
-t = metadata['timestamp']
-for k in range(len(t)): dt.append(datetime.fromtimestamp(t[k]/1000, tz=pytz.utc)) 
-
-# save outputs
-data = metadata.copy()
-data.update({'dt':dt})
-data.update({'contours':output_wl})  
-
-#with open('data_2016.pkl', 'wb') as f:
-#    pickle.dump(data, f) 
-#%% Load data
-
-#with open('data_2016.pkl', 'rb') as f:
-#    data = pickle.load(f)
-    
-
-# load backgroud image
-i = 0
-im = ee.Image(im_all[i].get('id')) 
-im_pan, im_ms, im_cloud, crs, meta = sds.read_eeimage(im, rect_narra, plot_bool)
-im_ms = sds.rescale_image_intensity(im_ms, im_cloud, prob_high, plot_bool)
-im_pan = sds.rescale_image_intensity(im_pan, im_cloud, prob_high, plot_bool)
-im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, im_cloud, plot_bool)
-
-plt.figure()
-plt.imshow(im_ms_ps[:,:,[2,1,0]])
-plt.axis('image')
-plt.title('2016 shorelines')
-
-n = len(data['cloud_cover'])
-idx_best = []
-# remove overlapping images, based on cloud cover
-for i in range(n):
-    date_im = data['date_acquired'][i]
-    idx = np.isin(data['date_acquired'], date_im)
-    best = np.where(idx)[0][np.argmin(np.array(data['cloud_cover'])[idx])]
-    if ~np.isin(best, idx_best):
-        idx_best.append(best)
-
-point_narra = np.array([342500, 6266990])
-plt.figure()
-plt.axis('equal')
-plt.grid()
-cmap = cm.get_cmap('jet')
-colours = cmap(np.linspace(0, 1, num=len(idx_best)))
-for i, idx in enumerate(idx_best):
-    for j in range(len(data['contours'][i])):
-        if np.any(np.linalg.norm(data['contours'][i][j][:,[0,1]] - point_narra, axis=1) < 200):
-            plt.plot(data['contours'][i][j][:,0], data['contours'][i][j][:,1],
-                     label=str(data['date_acquired'][i]),
-                     linewidth=2, color=colours[i,:])
-            
-plt.legend()
-plt.show()       
-
-    
-
-    
+# classify sand pixels with Kmeans
+#im_sand = sds.classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, buffer_size, min_beach_size, plot_bool)

+# classify image in 4 classes (sand, whitewater, water, other) with NN classifier
+im_classif = sds.classify_image_NN(im_ms_ps, im_pan, cloud_mask, plot_bool)
--- a/sds_extract_allcode.py
+++ b/sds_extract_allcode.py
@ -1,359 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Feb 21 18:05:01 2018
-
-@author: z5030440
-"""
-
-#%% Initial settings
-
-# import packages
-import ee
-from IPython import display
-import math
-import matplotlib.pyplot as plt
-import numpy as np
-from osgeo import gdal
-import tempfile
-import tensorflow as tf
-import urllib
-from urllib.request import urlretrieve
-import zipfile
-import skimage.filters as filters 
-import skimage.exposure as exposure
-import skimage.transform as transform
-import sklearn.decomposition as decomposition
-import skimage.morphology as morphology
-# import scripts
-from GEEImageFunctions import *
-
-np.seterr(all='ignore') # raise divisions by 0 and nans
-ee.Initialize()
-
-# Load image collection and filter it based on location (Narrabeen)
-
-input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
-#n_img = input_col.size().getInfo()
-#print('Number of images in collection:', n_img)
-
-# filter based on location (Narrabeen-Collaroy)
-rect_narra = [[[151.3473129272461,-33.69035274454718],
-              [151.2820816040039,-33.68206818063878], 
-              [151.27281188964844,-33.74775138989556],
-              [151.3425064086914,-33.75231878701767],
-              [151.3473129272461,-33.69035274454718]]]; 
-              
-flt_col = input_col.filterBounds(ee.Geometry.Polygon(rect_narra))
-n_img = flt_col.size().getInfo()
-print('Number of images covering Narrabeen:', n_img)
-
-# Select the most recent image and download it
-
-im = ee.Image(flt_col.sort('SENSING_TIME',False).first())
-im_dic = im.getInfo()
-image_prop = im_dic.get('properties')
-im_bands = im_dic.get('bands')
-for i in range(len(im_bands)): del im_bands[i]['dimensions'] # delete the dimensions key
-
-# download the panchromatic band (B8)
-pan_band = [im_bands[7]]
-im_pan = load_image(im, rect_narra, pan_band)
-im_pan = im_pan[:,:,0]
-size_pan = im_pan.shape
-vec_pan = im_pan.reshape(size_pan[0] * size_pan[1])
-# download the QA band (BQA)
-qa_band = [im_bands[11]]
-im_qa = load_image(im, rect_narra, qa_band)
-im_qa = im_qa[:,:,0]
-
-# convert QA bits
-cloud_values = [2800, 2804, 2808, 2812, 6896, 6900, 6904, 6908]
-cloud_shadow_values = [2976, 2980, 2984, 2988, 3008, 3012, 3016, 3020]
-
-# Create cloud mask (resized to be applied to the Pan band)
-im_cloud = np.isin(im_qa, cloud_values)
-im_cloud_shadow = np.isin(im_qa, cloud_shadow_values)
-im_cloud_res = transform.resize(im_cloud,(im_pan.shape[0], im_pan.shape[1]), order=0, preserve_range=True).astype('bool_')
-vec_cloud = im_cloud.reshape(im_cloud.shape[0] * im_cloud.shape[1])
-vec_cloud_res = im_cloud_res.reshape(size_pan[0] * size_pan[1])
-
-
-# download the other bands (B2,B3,B4,B5,B6) = (blue,green,red,nir,swir1)
-ms_bands = [im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[5]]
-im_ms = load_image(im, rect_narra, ms_bands)
-size_ms = im_ms.shape
-vec_ms = im_ms.reshape(size_ms[0] * size_ms[1], size_ms[2])
-
-# Plot the RGB image and cloud masks
-plt.figure()
-ax1 = plt.subplot(121)
-plt.imshow(im_ms[:,:,[2,1,0]])
-plt.title('RGB')
-ax2 = plt.subplot(122)
-plt.imshow(im_cloud, cmap='gray')
-plt.title('Cloud mask')
-#ax3 = plt.subplot(133, sharex=ax1, sharey=ax1)
-#plt.imshow(im_cloud_shadow)
-#plt.title('Cloud mask shadow')
-plt.show()
-
-# Resize multispectral bands (30m) to the size of the pan band (15m) using bilinear interpolation
-im_ms_res = transform.resize(im_ms,(size_pan[0], size_pan[1]), order=1, preserve_range=True, mode='constant')
-vec_ms_res = im_ms_res.reshape(size_pan[0] * size_pan[1], size_ms[2])
-
-# Adjust intensities (set cloud pixels to 0 intensity)
-cloud_value = np.nan
-
-prc_low = 0 # lower percentile
-prob_high = 99.9 # upper percentile probability to clip
-
-# Rescale intensities between 0 and 1
-vec_ms_adj = np.ones((len(vec_cloud_res),size_ms[2])) * np.nan
-for i in range(im_ms.shape[2]):
-    prc_high = np.percentile(vec_ms_res[~vec_cloud_res,i], prob_high)
-    vec_rescaled = exposure.rescale_intensity(vec_ms_res[~vec_cloud_res,i], in_range=(prc_low,prc_high))
-    plt.figure()
-    plt.hist(vec_rescaled, bins = 300)
-    plt.show()
-    vec_ms_adj[~vec_cloud_res,i] = vec_rescaled
-
-    
-im_ms_adj = vec_ms_adj.reshape(size_pan[0], size_pan[1], size_ms[2])
-
-# same for the pan band
-vec_pan_adj = np.ones(len(vec_cloud_res)) * np.nan
-prc_high = np.percentile(vec_pan[~vec_cloud_res],prob_high)
-vec_rescaled = exposure.rescale_intensity(vec_pan[~vec_cloud_res], in_range=(prc_low,prc_high))
-plt.figure()
-plt.hist(vec_rescaled, bins = 300)
-plt.show()
-vec_pan_adj[~vec_cloud_res] = vec_rescaled
-im_pan_adj = vec_pan_adj.reshape(size_pan[0], size_pan[1])
-
-# Plot adjusted images
-plt.figure()
-plt.subplot(131)
-plt.imshow(im_pan_adj, cmap='gray')
-plt.title('PANCHROMATIC (15 m pixel)')
-
-plt.subplot(132)
-plt.imshow(im_ms_adj[:,:,[2,1,0]])
-plt.title('RGB (30 m pixel)')
-plt.show()
-
-plt.subplot(133)
-plt.imshow(im_ms_adj[:,:,[3,1,0]])
-plt.title('NIR-GB (30 m pixel)')
-plt.show()
-
-
-#%% Pansharpening (PCA)
-# Run PCA on selected bands
-
-sel_bands = [0,1,2]
-temp = vec_ms_adj[:,sel_bands]
-vec_ms_adj_nocloud = temp[~vec_cloud_res,:]
-pca = decomposition.PCA()
-vec_pcs = pca.fit_transform(vec_ms_adj_nocloud)
-vec_pcs_all = np.ones((len(vec_cloud_res),len(sel_bands))) * np.nan
-vec_pcs_all[~vec_cloud_res,:] = vec_pcs
-im_pcs = vec_pcs_all.reshape(size_pan[0], size_pan[1], vec_pcs.shape[1])
-
-plt.figure()
-plt.subplot(221)
-plt.imshow(im_pcs[:,:,0], cmap='gray')
-plt.title('Component 1')
-plt.subplot(222)
-plt.imshow(im_pcs[:,:,1], cmap='gray')
-plt.title('Component 2')
-plt.subplot(223)
-plt.imshow(im_pcs[:,:,2], cmap='gray')
-plt.title('Component 3')
-plt.show()
-
-# Compare the Pan image with the 1st Principal component
-compare_images(im_pan_adj,im_pcs[:,:,0])
-intensity_histogram(im_pan_adj)
-intensity_histogram(im_pcs[:,:,0])
-
-# Match histogram of the pan image with the 1st principal component and replace the 1st component
-vec_pcs[:,0] = hist_match(vec_pan_adj[~vec_cloud_res], vec_pcs[:,0])
-vec_ms_ps = pca.inverse_transform(vec_pcs)
-
-# normalise between 0 and 1
-for i in range(vec_pcs.shape[1]):
-    vec_ms_ps[:,i] = np.divide(vec_ms_ps[:,i] - np.min(vec_ms_ps[:,i]),
-               np.max(vec_ms_ps[:,i]) - np.min(vec_ms_ps[:,i]))
-
-vec_ms_ps_all = np.ones((len(vec_cloud_res),len(sel_bands))) * np.nan
-vec_ms_ps_all[~vec_cloud_res,:] = vec_ms_ps
-im_ms_ps = vec_ms_ps_all.reshape(size_pan[0], size_pan[1], len(sel_bands))
-vec_ms_ps_all = np.append(vec_ms_ps_all, vec_ms_adj[:,[3,4]], axis=1)
-im_ms_ps = np.append(im_ms_ps, im_ms_adj[:,:,[3,4]], axis=2)
-
-# Plot adjusted images
-plt.figure()
-plt.subplot(121)
-plt.imshow(im_ms_adj[:,:,[2,1,0]])
-plt.title('Original RGB')
-plt.show()
-
-plt.subplot(122)
-plt.imshow(im_ms_ps[:,:,[2,1,0]])
-plt.title('Pansharpened RGB')
-plt.show()
-
-plt.figure()
-plt.subplot(121)
-plt.imshow(im_ms_adj[:,:,[3,1,0]])
-plt.title('Original NIR-GB')
-plt.show()
-
-plt.subplot(122)
-plt.imshow(im_ms_ps[:,:,[3,1,0]])
-plt.title('Pansharpened NIR-GB')
-plt.show()
-#%% Compute Normalized Difference Water Index (NDWI)
-
-# With NIR
-vec_ndwi_nir = np.ones(len(vec_cloud_res)) * np.nan
-temp = np.divide(vec_ms_ps_all[~vec_cloud_res,3] - vec_ms_ps_all[~vec_cloud_res,1],
-                         vec_ms_ps_all[~vec_cloud_res,3] + vec_ms_ps_all[~vec_cloud_res,1])
-vec_ndwi_nir[~vec_cloud_res] = temp
-im_ndwi_nir = vec_ndwi_nir.reshape(size_pan[0], size_pan[1])
-
-# With SWIR_1
-vec_ndwi_swir = np.ones(len(vec_cloud_res)) * np.nan
-temp = np.divide(vec_ms_ps_all[~vec_cloud_res,4] - vec_ms_ps_all[~vec_cloud_res,1],
-                         vec_ms_ps_all[~vec_cloud_res,4] + vec_ms_ps_all[~vec_cloud_res,1])
-vec_ndwi_swir[~vec_cloud_res] = temp
-im_ndwi_swir = vec_ndwi_swir.reshape(size_pan[0], size_pan[1])
-
-plt.figure()
-ax1 = plt.subplot(211)
-plt.hist(vec_ndwi_nir[~vec_cloud_res], bins=300, label='NIR')
-plt.hist(vec_ndwi_swir[~vec_cloud_res], bins=300, label='SWIR', alpha=0.5)
-plt.legend()
-ax2 = plt.subplot(212, sharex=ax1)
-plt.hist(vec_ndwi_nir[~vec_cloud_res], bins=300, cumulative=True, histtype='step', label='NIR')
-plt.hist(vec_ndwi_swir[~vec_cloud_res], bins=300, cumulative=True, histtype='step', label='SWIR')
-plt.legend()
-plt.show()
-
-compare_images(im_ndwi_nir,im_ndwi_swir)
-
-plt.figure()
-plt.imshow(im_ndwi_nir, cmap='seismic')
-plt.title('Water Index')
-plt.colorbar()
-plt.show()
-
-#%% Extract shorelines (NIR)
-
-ndwi_nir = vec_ndwi_nir[~vec_cloud_res]
-
-t_otsu = filters.threshold_otsu(ndwi_nir)
-t_min = filters.threshold_minimum(ndwi_nir)
-t_mean = filters.threshold_mean(ndwi_nir)
-t_li = filters.threshold_li(ndwi_nir)
-# try all thresholding algorithms
-
-plt.figure()
-plt.hist(ndwi_nir, bins=300)
-plt.plot([t_otsu, t_otsu],[0, 15000], 'r-', label='Otsu threshold')
-#plt.plot([t_min, t_min],[0, 15000], 'g-', label='min')
-#plt.plot([t_mean, t_mean],[0, 15000], 'y-', label='mean')
-#plt.plot([t_li, t_li],[0, 15000], 'm-', label='li')
-plt.legend()
-plt.show()
-
-plt.figure()
-plt.imshow(im_ndwi_nir > t_otsu, cmap='gray')
-plt.title('Binary image')
-plt.show()
-
-im_bin = im_ndwi_nir > t_otsu
-im_open = morphology.binary_opening(im_bin,morphology.disk(1))
-im_close = morphology.binary_closing(im_open,morphology.disk(1))
-im_bin_coast_in = im_close ^ morphology.erosion(im_close,morphology.disk(1))
-im_bin_sl_in = morphology.remove_small_objects(im_bin_coast_in,100,8)
-
-compare_images(im_close,im_bin_sl_in)
-
-plt.figure()
-plt.subplot(121)
-plt.imshow(im_close, cmap='gray')
-plt.title('morphological closing')
-plt.subplot(122)
-plt.imshow(im_bin_sl_in, cmap='gray')
-plt.title('Water mark')
-plt.show()
-
-
-im_bin_coast_out = morphology.dilation(im_close,morphology.disk(1)) ^ im_close
-im_bin_sl_out = morphology.remove_small_objects(im_bin_coast_out,100,8)
-
-
-# Plot shorelines on top of RGB image
-im_rgb_sl = np.copy(im_ms_ps[:,:,[2,1,0]])
-
-im_rgb_sl[im_bin_sl_in,0] = 0
-im_rgb_sl[im_bin_sl_in,1] = 1
-im_rgb_sl[im_bin_sl_in,2] = 1
-
-im_rgb_sl[im_bin_sl_out,0] = 1
-im_rgb_sl[im_bin_sl_out,1] = 0
-im_rgb_sl[im_bin_sl_out,2] = 1
-
-plt.figure()
-plt.imshow(im_rgb_sl)
-plt.title('Pansharpened RGB')
-plt.show()
-
-#%% Extract shorelines SWIR
-
-ndwi_swir = vec_ndwi_swir[~vec_cloud_res]
-t_otsu = filters.threshold_otsu(ndwi_swir)
-
-plt.figure()
-plt.hist(ndwi_swir, bins=300)
-plt.plot([t_otsu, t_otsu],[0, 15000], 'r-', label='Otsu threshold')
-#plt.plot([t_min, t_min],[0, 15000], 'g-', label='min')
-#plt.plot([t_mean, t_mean],[0, 15000], 'y-', label='mean')
-#plt.plot([t_li, t_li],[0, 15000], 'm-', label='li')
-plt.legend()
-plt.show()
-
-plt.figure()
-plt.imshow(im_ndwi_swir > t_otsu, cmap='gray')
-plt.title('Binary image')
-plt.show()
-
-im_bin = im_ndwi_swir > t_otsu
-im_open = morphology.binary_opening(im_bin,morphology.disk(1))
-im_close = morphology.binary_closing(im_open,morphology.disk(1))
-im_bin_coast_in = im_close ^ morphology.erosion(im_close,morphology.disk(1))
-im_bin_sl_in = morphology.remove_small_objects(im_bin_coast_in,100,8)
-
-compare_images(im_close,im_bin_sl_in)
-
-
-im_bin_coast_out = morphology.dilation(im_close,morphology.disk(1)) ^ im_close
-im_bin_sl_out = morphology.remove_small_objects(im_bin_coast_out,100,8)
-
-
-# Plot shorelines on top of RGB image
-im_rgb_sl = np.copy(im_ms_ps[:,:,[2,1,0]])
-
-im_rgb_sl[im_bin_sl_in,0] = 0
-im_rgb_sl[im_bin_sl_in,1] = 1
-im_rgb_sl[im_bin_sl_in,2] = 1
-
-im_rgb_sl[im_bin_sl_out,0] = 1
-im_rgb_sl[im_bin_sl_out,1] = 0
-im_rgb_sl[im_bin_sl_out,2] = 1
-
-plt.figure()
-plt.imshow(im_rgb_sl)
-plt.title('Pansharpened RGB')
-plt.show()
--- a/sds_extract_test.py
+++ b/sds_extract_test.py
@ -1,260 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Mar  1 14:32:08 2018
-
-@author: z5030440
-
-Main code to extract shorelines from Landsat imagery
-"""
-# Preamble
-
-import ee
-import matplotlib.pyplot as plt
-import matplotlib.cm as cm
-import numpy as np
-import pandas as pd
-from datetime import datetime
-import pickle
-import pdb
-import pytz
-from pylab import ginput
-
-
-# image processing modules
-import skimage.filters as filters 
-import skimage.exposure as exposure
-import skimage.transform as transform
-import sklearn.decomposition as decomposition
-import skimage.morphology as morphology
-import skimage.measure as measure
-
-# my functions
-import functions.utils as utils
-import functions.sds as sds
-
-np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
-ee.Initialize()
-
-#%% Select images
-
-# parameters
-plot_bool = False # if you want the plots
-prob_high = 99.9 # upper probability to clip and rescale pixel intensity
-min_contour_points = 100 # minimum number of points contained in each water line
-output_epsg = 28356 # GDA94 / MGA Zone 56
-cloud_threshold = 0.7
-
-# select collection
-input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
-
-# location (Narrabeen-Collaroy beach)
-rect_narra = [[[151.3473129272461,-33.69035274454718],
-              [151.2820816040039,-33.68206818063878], 
-              [151.27281188964844,-33.74775138989556],
-              [151.3425064086914,-33.75231878701767],
-              [151.3473129272461,-33.69035274454718]]];
-               
-with open('data/narra_beach.pkl', 'rb') as f:
-    pts_beach = pickle.load(f)
- 
-with open('data/idx_nogt.pkl', 'rb') as f:
-    idx_nogt = pickle.load(f)  
-idx_nogt = np.array(idx_nogt)
-            
-#rect_narra = [[[151.301454, -33.700754],
-#               [151.311453, -33.702075], 
-#               [151.307237, -33.739761],
-#               [151.294220, -33.736329],
-#               [151.301454, -33.700754]]];
-
-# Dates
-start_date = '2016-01-01'
-end_date = '2016-12-31'
-# filter by location
-flt_col = input_col.filterBounds(ee.Geometry.Polygon(rect_narra))#.filterDate(start_date, end_date)
-
-n_img = flt_col.size().getInfo()
-print('Number of images covering Narrabeen:', n_img)
-im_all = flt_col.getInfo().get('features')
-
-#%% Extract shorelines
-metadata = {'timestamp':[],
-            'date_acquired':[],
-            'cloud_cover':[],
-            'geom_rmse_model':[],
-            'gcp_model':[],
-            'quality':[],
-            'sun_azimuth':[],
-            'sun_elevation':[]}
-skipped_images = np.zeros((n_img,1)).astype(bool)
-output_wl = []
-# loop through all images
-for i in range(n_img):
-    
-    if np.isin(i, idx_nogt):
-        continue
-    
-    # find each image in ee database
-    im = ee.Image(im_all[i].get('id')) 
-    # load image as np.array
-    im_pan, im_ms, im_cloud, crs, meta = sds.read_eeimage(im, rect_narra, plot_bool)
-    
-    # if clouds -> skip the image
-    if sum(sum(im_cloud.astype(int)))/(im_cloud.shape[0]*im_cloud.shape[1]) > cloud_threshold:
-        skipped_images[i] = True
-        continue
-    
-    # rescale intensities
-    im_ms = sds.rescale_image_intensity(im_ms, im_cloud, prob_high, plot_bool)
-    im_pan = sds.rescale_image_intensity(im_pan, im_cloud, prob_high, plot_bool)
-    
-    # pansharpen rgb image
-    im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, im_cloud, plot_bool)
-    
-    # add down-sized bands for NIR and SWIR (since pansharpening is not possible)
-    im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) 
-    
-    # calculate NDWI
-    im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], im_cloud, plot_bool)
-    
-    # edge detection
-    wl_pix = sds.find_wl_contours(im_ndwi, im_cloud, min_contour_points, plot_bool)
-    # convert from pixels to world coordinates
-    wl_coords = sds.convert_pix2world(wl_pix, crs['crs_15m'])
-    # convert to output epsg spatial reference
-    wl = sds.convert_epsg(wl_coords, crs['epsg_code'], output_epsg)
-    
-    # find contour closest to narrabeen beach
-    sum_dist = np.zeros(len(wl))
-    for k,contour in enumerate(wl):
-        min_dist = np.zeros(len(pts_beach))
-        for j,pt in enumerate(pts_beach):
-            min_dist[j] = np.min(np.linalg.norm(contour - pt, axis=1))
-        sum_dist[k] = np.sum(min_dist)/len(min_dist)
-    try:
-        wl_beach = wl[np.argmin(sum_dist)]
-#        plt.figure()
-#        plt.axis('equal')
-#        plt.plot(pts_beach[:,0], pts_beach[:,1], 'ko')
-#        plt.plot(wl_beach[:,0], wl_beach[:,1], 'r')
-#        plt.show()
-    except:
-        wl_beach = []
-        
-    plt.figure()
-    plt.imshow(im_ms_ps[:,:,[2,1,0]])
-    for k,contour in enumerate(wl_pix): plt.plot(contour[:, 1], contour[:, 0], linewidth=2)
-    if len(wl_beach) > 0:
-        plt.plot(wl_pix[np.argmin(sum_dist)][:,1], wl_pix[np.argmin(sum_dist)][:,0], linewidth=3, color='w')
-    plt.axis('image')
-    plt.title('im ' + str(i) + ' : ' + datetime.strftime(datetime
-                                    .fromtimestamp(meta['timestamp']/1000, tz=pytz.utc)
-                                    .astimezone(pytz.timezone('Australia/Sydney')), '%Y-%m-%d %H:%M:%S %Z%z'))
-    plt.show()
-    
-    # manually validate shoreline detection
-    input_pt = np.array(ginput(1))
-    if input_pt[0,1] > 300:
-        skipped_images[i] = True
-        continue
-    
-    # store metadata of each image in dict
-    metadata['timestamp'].append(meta['timestamp'])
-    metadata['date_acquired'].append(meta['date_acquired'])
-    metadata['cloud_cover'].append(sum(sum(im_cloud.astype(int)))/(im_cloud.shape[0]*im_cloud.shape[1]))
-    metadata['geom_rmse_model'].append(meta['geom_rmse_model'])
-    metadata['gcp_model'].append(meta['gcp_model'])
-    metadata['quality'].append(meta['quality'])
-    metadata['sun_azimuth'].append(meta['sun_azimuth'])
-    metadata['sun_elevation'].append(meta['sun_elevation'])
-    # store water lines
-    output_wl.append(wl_beach)
-    
-    print(i)
-   
-# generate datetimes
-#fmt = '%Y-%m-%d %H:%M:%S %Z%z'
-#au_tz = pytz.timezone('Australia/Sydney')
-dt = [];
-t = metadata['timestamp']
-for k in range(len(t)): dt.append(datetime.fromtimestamp(t[k]/1000, tz=pytz.utc)) 
-
-# save outputs
-data = metadata.copy()
-data.update({'dt':dt})
-data.update({'contours':output_wl})  
-
-with open('data_gt15d_32_56.pkl', 'wb') as f:
-    pickle.dump(data, f) 
-#%% Load data
-
-##with open('data_2016.pkl', 'rb') as f:
-##    data = pickle.load(f)
-#    
-#
-## load backgroud image
-#i = 0
-#im = ee.Image(im_all[i].get('id')) 
-#im_pan, im_ms, im_cloud, crs, meta = sds.read_eeimage(im, rect_narra, plot_bool)
-#im_ms = sds.rescale_image_intensity(im_ms, im_cloud, prob_high, plot_bool)
-#im_pan = sds.rescale_image_intensity(im_pan, im_cloud, prob_high, plot_bool)
-#im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, im_cloud, plot_bool)
-#
-#plt.figure()
-#plt.imshow(im_ms_ps[:,:,[2,1,0]])
-#plt.axis('image')
-#plt.title('2016 shorelines')
-#
-#n = len(data['cloud_cover'])
-#idx_best = []
-## remove overlapping images, based on cloud cover
-#for i in range(n):
-#    date_im = data['date_acquired'][i]
-#    idx = np.isin(data['date_acquired'], date_im)
-#    best = np.where(idx)[0][np.argmin(np.array(data['cloud_cover'])[idx])]
-#    if ~np.isin(best, idx_best):
-#        idx_best.append(best)
-#
-#point_narra = np.array([342500, 6266990])
-#plt.figure()
-#plt.axis('equal')
-#plt.grid()
-#cmap = cm.get_cmap('jet')
-#colours = cmap(np.linspace(0, 1, num=len(idx_best)))
-#for i, idx in enumerate(idx_best):
-#    for j in range(len(data['contours'][i])):
-#        if np.any(np.linalg.norm(data['contours'][i][j][:,[0,1]] - point_narra, axis=1) < 200):
-#            plt.plot(data['contours'][i][j][:,0], data['contours'][i][j][:,1],
-#                     label=str(data['date_acquired'][i]),
-#                     linewidth=2, color=colours[i,:])
-#            
-#plt.legend()
-#plt.show()       
-#
-#pts_narra = sds.convert_epsg(pts_narra, output_epsg, 4326)
-#
-##kml.newlinestring(name="beach",
-##         coords = [(_[0], _[1]) for _ in pts_narra])
-##kml.save("narra.kml")
-
-
-#%%
-
-#with open('data_gt15d_0_31.pkl', 'rb') as f:
-#    data1 = pickle.load(f)
-#with open('data_gt15d_32_56.pkl', 'rb') as f:
-#    data2 = pickle.load(f)
-#with open('data_gt15d_99_193.pkl', 'rb') as f:
-#    data3 = pickle.load(f)  
-#    
-#data = []
-#data = data1.copy()
-#for k,cat in enumerate(data.keys()):
-#    for j in range(len(data2[cat])):
-#        data[cat].append(data2[cat][j])
-#    for j in range(len(data3[cat])):
-#        data[cat].append(data3[cat][j])
-#    
-#
-#with open('data_gt_l8.pkl', 'wb') as f:
-#    pickle.dump(data, f) 
--- a/time_coverage.py
+++ b/time_coverage.py
@ -1,136 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Mar 20 16:15:51 2018
-
-@author: z5030440
-"""
-
-import scipy.io as sio
-import os
-import ee
-import matplotlib.pyplot as plt
-import matplotlib.dates as mdates
-import numpy as np
-import pandas as pd
-from datetime import datetime, timedelta
-import pickle
-import pdb
-import pytz
-
-
-# image processing modules
-import skimage.filters as filters 
-import skimage.exposure as exposure
-import skimage.transform as transform
-import skimage.morphology as morphology
-import skimage.measure as measure
-import sklearn.decomposition as decomposition
-from scipy import spatial
-# my functions
-import functions.utils as utils
-import functions.sds as sds
-#plt.rcParams['axes.grid'] = True
-au_tz = pytz.timezone('Australia/Sydney')
-
-# load quadbike dates and convert from datenum to datetime
-suffix = '.mat'
-dir_name = os.getcwd()
-file_name = 'data\quadbike_dates'
-file_path = os.path.join(dir_name, file_name + suffix)
-quad_dates = sio.loadmat(file_path)['dates']
-dt_quad = []
-for i in range(quad_dates.shape[0]): 
-    dt_quad.append(datetime(quad_dates[i,0], quad_dates[i,1], quad_dates[i,2], tzinfo=au_tz))
-    
-# load satellite datetimes (in UTC) and convert to AEST time
-input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
-# location (Narrabeen-Collaroy beach)
-rect_narra = [[[151.3473129272461,-33.69035274454718],
-              [151.2820816040039,-33.68206818063878], 
-              [151.27281188964844,-33.74775138989556],
-              [151.3425064086914,-33.75231878701767],
-              [151.3473129272461,-33.69035274454718]]];
-flt_col = input_col.filterBounds(ee.Geometry.Polygon(rect_narra))               
-n_img = flt_col.size().getInfo()
-print('Number of images covering Narrabeen:', n_img)
-im_all = flt_col.getInfo().get('features')
-
-# extract datetimes from image metadata
-dt_sat = [_['properties']['system:time_start'] for _ in im_all]
-dt_sat = [datetime.fromtimestamp(_/1000, tz=pytz.utc) for _ in dt_sat]
-dt_sat = [_.astimezone(au_tz) for _ in dt_sat]
-# calculate days difference
-diff_days = [ [(x - _).days for _ in dt_quad] for x in dt_sat]
-day_thresh = 15
-idx = [utils.find_indices(_, lambda e: abs(e) < day_thresh) for _ in diff_days]
-
-dt_diff = []
-idx_nogt = []
-for i in range(n_img):
-    if not idx[i]:
-        idx_nogt.append(i)
-        continue
-    dt_diff.append({"sat dt": dt_sat[i],
-               "quad dt": [dt_quad[_] for _ in idx[i]],
-               "days diff": [diff_days[i][_] for _ in idx[i]] })
-
-with open('idx_nogt.pkl', 'wb') as f:
-    pickle.dump(idx_nogt, f)     
-
-    
-#%%  
-dates_sat = mdates.date2num(dt_sat)
-dates_quad = mdates.date2num(dt_quad)
-plt.figure()
-plt.plot_date(dates_sat, np.zeros((n_img,1)))
-plt.plot_date(dates_quad, np.ones((len(dates_quad),1)))
-plt.show()
-
-data = pd.read_pickle('data_2016.pkl')
-
-dt_sat = [_.astimezone(au_tz) for _ in data['dt']]
-
-[ (_ - dt_sat[0]).days for _ in dt_quad]
-
-
-
-dn_sat = []
-for i in range(len(dt_sat)): dn_sat.append(dt_sat[i].toordinal())
-dn_sat = np.array(dn_sat)
-dn_sur = []
-for i in range(len(dt_survey)): dn_sur.append(dt_survey[i].toordinal())
-dn_sur = np.array(dn_sur)
-
-distances = np.zeros((len(dn_sat),4)).astype('int32')
-indexes = np.zeros((len(dn_sat),2)).astype('int32')
-for i in range(len(dn_sat)):
-    distances[i,0] = np.sort(abs(dn_sat[i] - dn_sur))[0]
-    distances[i,1] = np.sort(abs(dn_sat[i] - dn_sur))[1]
-    distances[i,2] = dt_sat[i].year
-    distances[i,3] = dt_sat[i].month
-    indexes[i,0] = np.where(abs(dn_sat[i] - dn_sur) == np.sort(abs(dn_sat[i] - dn_sur))[0])[0][0]
-    indexes[i,1] = np.where(abs(dn_sat[i] - dn_sur) == np.sort(abs(dn_sat[i] - dn_sur))[1])[0][0]
-
-
-years = [2013, 2014, 2015, 2016]
-months = mdates.MonthLocator()
-days = mdates.DayLocator()
-month_fmt = mdates.DateFormatter('%b')
-f, ax = plt.subplots(4, 1)
-for i, ca in enumerate(ax):
-    ca.xaxis.set_major_locator(months)
-    ca.xaxis.set_major_formatter(month_fmt)
-    ca.xaxis.set_minor_locator(days)
-    ca.set_ylabel(str(years[i]))
-    for j in range(len(dt_sat)):
-        if dt_sat[j].year == years[i]:
-            ca.plot(dt_sat[j],0, 'bo', markerfacecolor='b')
-#f.subplots_adjust(hspace=0)
-#plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False)            
- 
-    
-plt.plot(dt_survey, np.zeros([len(dt_survey),1]), 'bo')
-plt.plot(dt_sat, np.ones([len(dt_sat),1]), 'ro')
-plt.yticks([])
-plt.show()
-
Author	SHA1	Message	Date
kvos	62f5c5330f	updated master added download_images and read_images scripts	7 years ago
kvos	cd74f6c39c	updated gitignore	7 years ago
Kilian Vos	0e18b6d4d0	update README.md	7 years ago
kvos	ee556de2fe	Neural Network image classification NN trained to classify each pixel of the image in 4 classes ( sand, whitewater, water, other)	7 years ago
kvos	e92fd60ba2	uploaded p3_environment.txt	7 years ago
kvos	2abac763b1	added sand classification	7 years ago
Kilian Vos	ca90712623	Update 'README.md'	7 years ago
kvos	1882c98d9b	update sds.py module	7 years ago
Kilian Vos	b964426cd4	gitingnore file	7 years ago
Kilian Vos	ea32adf79b	clean master most things transferred to development branch	7 years ago