create NN for sand classif

7 years ago · 0af37d9c6a
parent 35f421bfcb
commit 0af37d9c6a
41 changed files with 123368 additions and 195 deletions
--- a/classify_sand.py
+++ b/classify_sand.py
@ -1,181 +0,0 @@
-# -*- coding: utf-8 -*-
-
-#==========================================================#
-# Extract shorelines from Landsat images
-#==========================================================#
-
-# Initial settings
-import ee
-import matplotlib.pyplot as plt
-import matplotlib.cm as cm
-import numpy as np
-import pandas as pd
-from datetime import datetime
-import pickle
-import pdb
-import pytz
-
-
-# image processing modules
-import skimage.filters as filters 
-import skimage.exposure as exposure
-import skimage.transform as transform
-import sklearn.decomposition as decomposition
-import skimage.morphology as morphology
-import skimage.measure as measure
-
-# machine learning modules
-from sklearn.cluster import KMeans
-
-# my modules
-import functions.utils as utils
-import functions.sds as sds
-
-# some settings
-np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
-plt.rcParams['axes.grid'] = False
-plt.rcParams['figure.max_open_warning'] = 100
-ee.Initialize()
-
-# parameters
-cloud_thresh = 0.5      # threshold for cloud cover
-plot_bool = True        # if you want the plots
-prob_high = 99.9        # upper probability to clip and rescale pixel intensity
-min_contour_points = 100# minimum number of points contained in each water line
-output_epsg = 28356     # GDA94 / MGA Zone 56
-buffer_size = 10        # radius (in pixels) of disk for buffer (pixel classification)
-min_beach_size = 50     # number of pixels in a beach (pixel classification)
-
-# select collection
-satname = 'L8'
-input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA') # Landsat 8 Tier 1 TOA
-
-# location (Narrabeen-Collaroy beach)
-polygon = [[[151.3473129272461,-33.69035274454718],
-            [151.2820816040039,-33.68206818063878], 
-            [151.27281188964844,-33.74775138989556],
-            [151.3425064086914,-33.75231878701767],
-            [151.3473129272461,-33.69035274454718]]];
-
-# dates
-start_date = '2013-01-01'
-end_date = '2018-12-31'
-
-# filter by location and date
-flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(start_date, end_date)
-
-n_img = flt_col.size().getInfo()
-print('Number of images covering the polygon:', n_img)
-im_all = flt_col.getInfo().get('features')
-
-i = 0 # first image
-    
-# find image in ee database
-im = ee.Image(im_all[i].get('id')) 
-
-# load image as np.array
-im_pan, im_ms, im_cloud, crs, meta = sds.read_eeimage(im, polygon, satname, plot_bool)
-
-# rescale intensities
-im_ms = sds.rescale_image_intensity(im_ms, im_cloud, prob_high, plot_bool)
-im_pan = sds.rescale_image_intensity(im_pan, im_cloud, prob_high, plot_bool)
-
-# pansharpen rgb image
-im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, im_cloud, plot_bool)
-
-# add down-sized bands for NIR and SWIR (since pansharpening is not possible)
-im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) 
-
-# calculate NDWI
-im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], im_cloud, plot_bool)
-
-# edge detection
-wl_pix = sds.find_wl_contours(im_ndwi, im_cloud, min_contour_points, plot_bool)
-
-# convert from pixels to world coordinates
-wl_coords = sds.convert_pix2world(wl_pix, crs['crs_15m'])
-
-# convert to output epsg spatial reference
-wl = sds.convert_epsg(wl_coords, crs['epsg_code'], output_epsg)
-
-# classify sand pixels
-im_sand = sds.classify_sand_unsupervised(im_ms_ps, im_pan, im_cloud, wl_pix, buffer_size, min_beach_size, plot_bool)
-
-#%%
-plt.figure()
-plt.imshow(im_ms_ps[:,:,[2,1,0]])
-for i,contour in enumerate(wl_pix): plt.plot(contour[:, 1], contour[:, 0], linewidth=2)
-plt.axis('image')
-plt.title('Detected water lines')
-plt.show()
-
-vec = im_ms_ps.reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1], im_ms_ps.shape[2])
-vec_pan = im_pan.reshape(im_pan.shape[0]*im_pan.shape[1])
-features = np.zeros((len(vec), 5))
-features[:,[0,1,2,3]] = vec[:,[0,1,2,3]]
-features[:,4] = vec_pan
-vec_mask = im_cloud.reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1])
-
-# create buffer
-im_buffer = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1]))
-for i, contour in enumerate(wl_pix):
-        indices = [(int(_[0]), int(_[1])) for _ in list(np.round(contour))]
-        for j, idx in enumerate(indices):
-            im_buffer[idx] = 1
-        
-        
-plt.figure()
-plt.imshow(im_buffer)
-plt.draw()
-
-se = morphology.disk(buffer_size)
-im_buffer = morphology.binary_dilation(im_buffer, se)
-
-plt.figure()
-plt.imshow(im_buffer)
-plt.draw()
-
-vec_buffer = (im_buffer == 1).reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1])
-
-vec_buffer= np.logical_and(vec_buffer, ~vec_mask)
-
-#vec_buffer = np.ravel_multi_index(z,(im_ms_ps.shape[0], im_ms_ps.shape[1]))
-
-
-kmeans = KMeans(n_clusters=6, random_state=0).fit(vec[vec_buffer,:])
-labels = kmeans.labels_
-labels_full = np.ones((len(vec_mask))) * np.nan
-labels_full[vec_buffer] = labels
-im_labels = labels_full.reshape(im_ms_ps.shape[0], im_ms_ps.shape[1])
-
-plt.figure()
-plt.imshow(im_labels)
-plt.axis('equal')
-plt.draw()
-
-utils.compare_images(im_labels, im_pan)
-
-plt.figure()
-for i in range(6): plt.plot(kmeans.cluster_centers_[i,:])
-plt.draw()
-
-im_sand = im_labels == np.argmax(np.mean(kmeans.cluster_centers_[:,[0,1,2,4]], axis=1))
-
-im_sand2 = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2)
-im_sand3 = morphology.binary_dilation(im_sand2, morphology.disk(1))
-plt.figure()
-plt.imshow(im_sand3)
-plt.draw()
-
-im_ms_ps[im_sand3,0] = 0
-im_ms_ps[im_sand3,1] = 0
-im_ms_ps[im_sand3,2] = 1
-
-
-plt.figure()
-plt.imshow(im_ms_ps[:,:,[2,1,0]])
-plt.axis('image')
-plt.title('Sand classification')
-plt.show()
-
-#%%
--- a/data/L8/NARRA_all/NARRA_all_accuracy_georef.pkl
+++ b/data/L8/NARRA_all/NARRA_all_accuracy_georef.pkl
--- a/data/L8/NARRA_all/NARRA_all_clf.pkl
+++ b/data/L8/NARRA_all/NARRA_all_clf.pkl
--- a/data/L8/NARRA_all/NARRA_all_epsgcode.pkl
+++ b/data/L8/NARRA_all/NARRA_all_epsgcode.pkl
@ -0,0 +1 @@
+<EFBFBD>M<>.
--- a/data/L8/NARRA_all/NARRA_all_sand_idxskip.pkl
+++ b/data/L8/NARRA_all/NARRA_all_sand_idxskip.pkl
--- a/data/L8/NARRA_all/NARRA_all_sand_train.pkl
+++ b/data/L8/NARRA_all/NARRA_all_sand_train.pkl
--- a/data/L8/NARRA_all/NARRA_all_skipped_sandclassif.pkl
+++ b/data/L8/NARRA_all/NARRA_all_skipped_sandclassif.pkl
--- a/data/L8/NARRA_all/NARRA_all_timestamps.pkl
+++ b/data/L8/NARRA_all/NARRA_all_timestamps.pkl
--- a/data/L8/NARRA_all/NARRA_all_train_neg.pkl
+++ b/data/L8/NARRA_all/NARRA_all_train_neg.pkl
--- a/data/L8/NARRA_all/NARRA_all_train_pos.pkl
+++ b/data/L8/NARRA_all/NARRA_all_train_pos.pkl
--- a/data/L8/NARRA_all/NARRA_all_training_data_wnans.pkl
+++ b/data/L8/NARRA_all/NARRA_all_training_data_wnans.pkl
--- a/download_images_L8.py
+++ b/download_images_L8.py
@ -47,12 +47,18 @@ def download_tif(image, polygon, bandsId, filepath):
    
 # select collection
 input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
+# Location (Narrabeen all)
+polygon = [[[151.3473129272461,-33.69035274454718],
+            [151.2820816040039,-33.68206818063878], 
+            [151.27281188964844,-33.74775138989556],
+            [151.3425064086914,-33.75231878701767],
+            [151.3473129272461,-33.69035274454718]]];
 # location (Narrabeen-Collaroy beach)
-polygon = [[[151.301454, -33.700754],
-               [151.311453, -33.702075], 
-               [151.307237, -33.739761],
-               [151.294220, -33.736329],
-               [151.301454, -33.700754]]];
+#polygon = [[[151.301454, -33.700754],
+#               [151.311453, -33.702075], 
+#               [151.307237, -33.739761],
+#               [151.294220, -33.736329],
+#               [151.301454, -33.700754]]];
 # location (Oldbar beach)
 #polygon = [[[152.664508, -31.896163],
 #               [152.665827, -31.897112], 
@ -77,7 +83,8 @@ print('Number of images covering the area:', n_img)
 im_all = flt_col.getInfo().get('features')

 satname = 'L8'
-sitename = 'NARRA'
+sitename = 'NARRA_all'
+#sitename = 'NARRA'
 #sitename = 'OLDBAR'
 suffix = '.tif'
 filepath = os.path.join(os.getcwd(), 'data', satname, sitename)
@ -115,15 +122,15 @@ for i in range(n_img):
        filename_ms = satname + '_' + sitename + '_' + im_date + '_ms' + '_r' + suffix 
    all_names_pan.append(filename_pan)
    
-#    local_data_pan = download_tif(im, polygon, pan_band, filepath_pan)
-#    os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
-#    local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms)
-#    os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
+    local_data_pan = download_tif(im, polygon, pan_band, filepath_pan)
+    os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
+    local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms)
+    os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
    
    
-#with open(os.path.join(filepath, sitename + '_timestamps' + '.pkl'), 'wb') as f:
-#    pickle.dump(timestamps, f)  
-#with open(os.path.join(filepath, sitename + '_epsgcode' + '.pkl'), 'wb') as f:
-#    pickle.dump(im_epsg, f)      
+with open(os.path.join(filepath, sitename + '_timestamps' + '.pkl'), 'wb') as f:
+    pickle.dump(timestamps, f)  
+with open(os.path.join(filepath, sitename + '_epsgcode' + '.pkl'), 'wb') as f:
+    pickle.dump(im_epsg, f)      
 with open(os.path.join(filepath, sitename + '_accuracy_georef' + '.pkl'), 'wb') as f:
    pickle.dump(acc_georef, f)  
--- a/old/oldbar/L8OLDBAR_inlet2_shorelines.kml
+++ b/old/oldbar/L8OLDBAR_inlet2_shorelines.kml
--- a/old/oldbar/L8OLDBAR_inlet_shorelines.kml
+++ b/old/oldbar/L8OLDBAR_inlet_shorelines.kml
--- a/old/oldbar/L8OLDBAR_shorelines.kml
+++ b/old/oldbar/L8OLDBAR_shorelines.kml
--- a/old/oldbar/time_coverage_oldbar.py
+++ b/old/oldbar/time_coverage_oldbar.py
--- a/old/oldcodes/box_head.py
+++ b/old/oldcodes/box_head.py
--- a/old/oldcodes/classify_sand_test.py
+++ b/old/oldcodes/classify_sand_test.py
@ -0,0 +1,476 @@
+# -*- coding: utf-8 -*-
+
+#==========================================================#
+# Extract shorelines from Landsat images
+#==========================================================#
+
+# Initial settings
+import ee
+import matplotlib.pyplot as plt
+import matplotlib.cm as cm
+import numpy as np
+import pandas as pd
+from datetime import datetime
+import pickle
+import pdb
+import pytz
+from pylab import ginput
+from osgeo import gdal
+
+
+
+# image processing modules
+import skimage.filters as filters 
+import skimage.exposure as exposure
+import skimage.transform as transform
+import sklearn.decomposition as decomposition
+import skimage.morphology as morphology
+import skimage.measure as measure
+import skimage.color as color
+import skimage.feature as feature
+# machine learning modules
+from sklearn.cluster import KMeans
+
+# my modules
+import functions.utils as utils
+import functions.sds as sds
+
+# some settings
+np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
+plt.rcParams['axes.grid'] = False
+plt.rcParams['figure.max_open_warning'] = 100
+ee.Initialize()
+
+# parameters
+cloud_thresh = 0.5      # threshold for cloud cover
+plot_bool = False        # if you want the plots
+prob_high = 99.9        # upper probability to clip and rescale pixel intensity
+min_contour_points = 100# minimum number of points contained in each water line
+output_epsg = 28356     # GDA94 / MGA Zone 56
+buffer_size = 10        # radius (in pixels) of disk for buffer (pixel classification)
+min_beach_size = 50     # number of pixels in a beach (pixel classification)
+
+# select collection
+satname = 'L8'
+input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA') # Landsat 8 Tier 1 TOA
+
+# location (Narrabeen-Collaroy beach)
+polygon = [[[151.3473129272461,-33.69035274454718],
+            [151.2820816040039,-33.68206818063878], 
+            [151.27281188964844,-33.74775138989556],
+            [151.3425064086914,-33.75231878701767],
+            [151.3473129272461,-33.69035274454718]]];
+
+# dates
+start_date = '2013-01-01'
+end_date = '2018-12-31'
+
+# filter by location and date
+flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(start_date, end_date)
+
+n_img = flt_col.size().getInfo()
+print('Number of images covering the polygon:', n_img)
+im_all = flt_col.getInfo().get('features')
+
+i = 0 # first image
+
+# find image in ee database
+im = ee.Image(im_all[i].get('id')) 
+
+# load image as np.array
+im_pan, im_ms, im_cloud, crs, meta = sds.read_eeimage(im, polygon, satname, plot_bool)
+
+# rescale intensities
+im_ms = sds.rescale_image_intensity(im_ms, im_cloud, prob_high, plot_bool)
+im_pan = sds.rescale_image_intensity(im_pan, im_cloud, prob_high, plot_bool)
+
+# pansharpen rgb image
+im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, im_cloud, plot_bool)
+
+# add down-sized bands for NIR and SWIR (since pansharpening is not possible)
+im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) 
+
+# calculate NDWI
+im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], im_cloud, plot_bool)
+
+# edge detection
+wl_pix = sds.find_wl_contours(im_ndwi, im_cloud, min_contour_points, plot_bool)
+
+# convert from pixels to world coordinates
+wl_coords = sds.convert_pix2world(wl_pix, crs['crs_15m'])
+
+# convert to output epsg spatial reference
+wl = sds.convert_epsg(wl_coords, crs['epsg_code'], output_epsg)
+
+# classify sand pixels
+im_sand = sds.classify_sand_unsupervised(im_ms_ps, im_pan, im_cloud, wl_pix, buffer_size, min_beach_size, plot_bool)
+
+#pt_in = np.array(ginput(n=1, timeout=1000))
+#if pt_in[0][0] < im_ms_ps.shape[1]/2:
+win = np.ones((3,3))
+
+im_features = np.zeros((sum(sum(im_sand)), 20))
+im_features[:,[0,1,2,3,4]] = im_ms_ps[im_sand,:] # B G R NIR SWIR
+im_features[:,5] = im_pan[im_sand] # Pan
+im_features[:,6] = im_ndwi[im_sand] # NDWI
+im_features[:,7] = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], im_cloud, False)[im_sand] # NDVI
+im_features[:,8] = sds.nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], im_cloud, False)[im_sand] # ND Blue - Red
+for i in range(9):
+    im_features[:,i+9] = ndimage.generic_filter(im_features[:,i], np.std, footprint = win)
+     
+    
+im_ms_ps[im_sand,:]
+ 
+im_grey = color.rgb2grey(im_ms_ps[:,:,[2,1,0]])
+plt.figure()
+plt.imshow(im_grey, cmap='gray')
+plt.draw()
+
+counts, bins = np.histogram(im_grey[~im_cloud], bins=255)
+im_grey_d = np.digitize(im_grey, bins=bins) - 1
+
+from scipy import ndimage
+
+varianceMatrix1 = ndimage.generic_filter(im_grey_d, np.max, footprint = np.ones((3,3)))
+varianceMatrix2 = ndimage.generic_filter(im_grey_d, np.min, footprint = np.ones((3,3)))
+varianceMatrix = varianceMatrix1 - varianceMatrix2
+im_grey = color.rgb2grey(im_ms_ps[:,:,[2,1,0]])
+plt.figure()
+plt.imshow(varianceMatrix, cmap='gray')
+plt.draw()
+#%%
+data = gdal.Open('l8_test.tif', gdal.GA_ReadOnly)
+bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]
+im = np.stack(bands, 2)
+
+im_test = im[:,:,3]
+
+plt.figure()
+plt.imshow(im_test, cmap='gray')
+plt.axis('image')
+plt.draw()
+
+im_stats = np.zeros((im_test.shape[0], im_test.shape[1], 6))
+winsize = 5
+prop_names = ('contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM')
+for i in range(im_test.shape[0]):
+    print(int(np.round(100*i/im_test.shape[0])))
+    for j in range(im_test.shape[1]):
+        #windows needs to fit completely in image
+        if i <2 or j <2:
+            continue
+        if i > (im_test.shape[0] - 3) or j > (im_test.shape[1] - 3):
+            continue
+
+        #Calculate GLCM on a 3x3 window
+        glcm_window = im_test[i-2: i+3, j-2 : j+3]
+        glcm = feature.greycomatrix(glcm_window, [1], [0],  symmetric = True, normed = True )
+
+        #Calculate contrast and replace center pixel
+        for k in range(6): im_stats[i,j,k] = feature.greycoprops(glcm, prop_names[k])
+
+plt.figure()    
+for i in range(6):
+    plt.subplot(2,3,i+1)
+    plt.imshow(im_stats[:,:,i], cmap='jet')
+    plt.axis('image')
+    plt.title(prop_names[i])
+    plt.draw()
+    
+pixel_loc = [200, 200]
+im_stats[pixel_loc[0], pixel_loc[1], 3]
+#%%
+
+for i in range(im_grey_d.shape[0]):
+    print(int(np.round(100*i/im_grey_d.shape[0])))
+    for j in range(im_grey_d.shape[1]):
+        #windows needs to fit completely in image
+        if i <1 or j <1:
+            continue
+        if i > (im_grey_d.shape[0] - 1) or j > (im_grey_d.shape[0] - 1):
+            continue
+
+        #Calculate GLCM on a 3x3 window
+        glcm_window = im_grey_d[i-1: i+1, j-1 : j+1]
+        glcm = feature.greycomatrix(glcm_window, [1,2], [0], levels=256,  symmetric = True, normed = True )
+
+        #Calculate contrast and replace center pixel
+        im_stats[i,j,0] = feature.greycoprops(glcm, 'contrast')
+        im_stats[i,j,1] = feature.greycoprops(glcm, 'dissimilarity')
+        im_stats[i,j,2] = feature.greycoprops(glcm, 'homogeneity')
+        im_stats[i,j,3] = feature.greycoprops(glcm, 'energy')
+        im_stats[i,j,4] = feature.greycoprops(glcm, 'correlation')
+        im_stats[i,j,5] = feature.greycoprops(glcm, 'ASM')
+
+plt.figure()    
+for i in range(6):
+    plt.subplot(2,3,i+1)
+    plt.imshow(im_stats[:,:,i], cmap='jet')
+    plt.axis('image')
+    plt.draw()
+#%%
+from multiprocessing import Pool
+from itertools import product
+
+N = 10000000
+pool = Pool() #defaults to number of available CPU's
+a = np.ones((N))
+b = np.ones((N))*2
+out = np.zeros((N))
+
+t = time.time()
+for i in range(len(a)):
+    out[i] = a[i]*b[i]
+elapsed = time.time() - t
+print(elapsed)
+
+def fun(a,b):
+    return a*b
+
+chunksize = 20 #this may take some guessing ... take a look at the docs to decide
+for ind, res in enumerate(pool.map(fun, range(N)), chunksize):
+    output.flat[ind] = res
+#%%
+
+import gdal, osr
+import numpy as np
+from scipy.interpolate import RectBivariateSpline
+from numpy.lib.stride_tricks import as_strided as ast
+import dask.array as da
+from joblib import Parallel, delayed, cpu_count
+import os
+from skimage.feature import greycomatrix, greycoprops
+
+def im_resize(im,Nx,Ny):
+    '''
+    resize array by bivariate spline interpolation
+    '''
+    ny, nx = np.shape(im)
+    xx = np.linspace(0,nx,Nx)
+    yy = np.linspace(0,ny,Ny)
+
+    try:
+        im = da.from_array(im, chunks=1000)   #dask implementation
+    except:
+        pass
+
+    newKernel = RectBivariateSpline(np.r_[:ny],np.r_[:nx],im)
+    return newKernel(yy,xx)
+
+def p_me(Z, win):
+    '''
+    loop to calculate greycoprops
+    '''
+    try:
+        glcm = greycomatrix(Z, [5], [0], 256, symmetric=True, normed=True)
+        cont = greycoprops(glcm, 'contrast')
+        diss = greycoprops(glcm, 'dissimilarity')
+        homo = greycoprops(glcm, 'homogeneity')
+        eng = greycoprops(glcm, 'energy')
+        corr = greycoprops(glcm, 'correlation')
+        ASM = greycoprops(glcm, 'ASM')
+        return (cont, diss, homo, eng, corr, ASM)
+    except:
+        return (0,0,0,0,0,0)
+
+def norm_shape(shap):
+   '''
+   Normalize numpy array shapes so they're always expressed as a tuple,
+   even for one-dimensional shapes.
+   '''
+   try:
+      i = int(shap)
+      return (i,)
+   except TypeError:
+      # shape was not a number
+      pass
+
+   try:
+      t = tuple(shap)
+      return t
+   except TypeError:
+      # shape was not iterable
+      pass
+
+   raise TypeError('shape must be an int, or a tuple of ints')
+
+def sliding_window(a, ws, ss = None, flatten = True):
+    '''
+    Source: http://www.johnvinyard.com/blog/?p=268#more-268
+    Parameters:
+        a  - an n-dimensional numpy array
+        ws - an int (a is 1D) or tuple (a is 2D or greater) representing the size 
+             of each dimension of the window
+        ss - an int (a is 1D) or tuple (a is 2D or greater) representing the 
+             amount to slide the window in each dimension. If not specified, it
+             defaults to ws.
+        flatten - if True, all slices are flattened, otherwise, there is an 
+                  extra dimension for each dimension of the input.
+
+    Returns
+        an array containing each n-dimensional window from a
+    '''      
+    if None is ss:
+        # ss was not provided. the windows will not overlap in any direction.
+        ss = ws
+    ws = norm_shape(ws)
+    ss = norm_shape(ss)
+    # convert ws, ss, and a.shape to numpy arrays
+    ws = np.array(ws)
+    ss = np.array(ss)
+    shap = np.array(a.shape)
+    # ensure that ws, ss, and a.shape all have the same number of dimensions
+    ls = [len(shap),len(ws),len(ss)]
+    if 1 != len(set(ls)):
+        raise ValueError(\
+        'a.shape, ws and ss must all have the same length. They were %s' % str(ls))
+
+    # ensure that ws is smaller than a in every dimension
+    if np.any(ws > shap):
+        raise ValueError(\
+        'ws cannot be larger than a in any dimension.\
+     a.shape was %s and ws was %s' % (str(a.shape),str(ws)))
+
+    # how many slices will there be in each dimension?
+    newshape = norm_shape(((shap - ws) // ss) + 1)
+
+
+    # the shape of the strided array will be the number of slices in each dimension
+    # plus the shape of the window (tuple addition)
+    newshape += norm_shape(ws)
+
+
+    # the strides tuple will be the array's strides multiplied by step size, plus
+    # the array's strides (tuple addition)
+    newstrides = norm_shape(np.array(a.strides) * ss) + a.strides
+    a = ast(a,shape = newshape,strides = newstrides)
+    if not flatten:
+        return a
+    # Collapse strided so that it has one more dimension than the window.  I.e.,
+    # the new array is a flat list of slices.
+    meat = len(ws) if ws.shape else 0
+    firstdim = (np.product(newshape[:-meat]),) if ws.shape else ()
+    dim = firstdim + (newshape[-meat:])
+    # remove any dimensions with size 1
+    dim = filter(lambda i : i != 1,dim)
+    return a.reshape(dim), newshape
+
+#Stuff to change
+win = 3
+meter = str(win/4)
+merge = im_grey_d
+Z,ind = sliding_window(merge,(win,win),(1,1))
+
+Ny, Nx = np.shape(merge)
+
+w = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(p_me)(Z[k]) for k in xrange(len(Z)))
+
+cont = [a[0] for a in w]
+diss = [a[1] for a in w]
+homo = [a[2] for a in w]
+eng  = [a[3] for a in w]
+corr = [a[4] for a in w]
+ASM  = [a[5] for a in w]
+
+
+#Reshape to match number of windows
+plt_cont = np.reshape(cont , ( ind[0], ind[1] ) )
+plt_diss = np.reshape(diss , ( ind[0], ind[1] ) )
+plt_homo = np.reshape(homo , ( ind[0], ind[1] ) )
+plt_eng = np.reshape(eng , ( ind[0], ind[1] ) )
+plt_corr = np.reshape(corr , ( ind[0], ind[1] ) )
+plt_ASM =  np.reshape(ASM , ( ind[0], ind[1] ) )
+del cont, diss, homo, eng, corr, ASM
+
+#Resize Images to receive texture and define filenames
+contrast = im_resize(plt_cont,Nx,Ny)
+contrast[merge==0]=np.nan
+dissimilarity = im_resize(plt_diss,Nx,Ny)
+dissimilarity[merge==0]=np.nan    
+homogeneity = im_resize(plt_homo,Nx,Ny)
+homogeneity[merge==0]=np.nan
+energy = im_resize(plt_eng,Nx,Ny)
+energy[merge==0]=np.nan
+correlation = im_resize(plt_corr,Nx,Ny)
+correlation[merge==0]=np.nan
+ASM = im_resize(plt_ASM,Nx,Ny)
+ASM[merge==0]=np.nan
+
+#%%
+plt.figure()
+plt.imshow(im_ms_ps[:,:,[2,1,0]])
+for i,contour in enumerate(wl_pix): plt.plot(contour[:, 1], contour[:, 0], linewidth=2)
+plt.axis('image')
+plt.title('Detected water lines')
+plt.show()
+
+vec = im_ms_ps.reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1], im_ms_ps.shape[2])
+vec_pan = im_pan.reshape(im_pan.shape[0]*im_pan.shape[1])
+features = np.zeros((len(vec), 5))
+features[:,[0,1,2,3]] = vec[:,[0,1,2,3]]
+features[:,4] = vec_pan
+vec_mask = im_cloud.reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1])
+
+# create buffer
+im_buffer = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1]))
+for i, contour in enumerate(wl_pix):
+        indices = [(int(_[0]), int(_[1])) for _ in list(np.round(contour))]
+        for j, idx in enumerate(indices):
+            im_buffer[idx] = 1
+        
+        
+plt.figure()
+plt.imshow(im_buffer)
+plt.draw()
+
+se = morphology.disk(buffer_size)
+im_buffer = morphology.binary_dilation(im_buffer, se)
+
+plt.figure()
+plt.imshow(im_buffer)
+plt.draw()
+
+vec_buffer = (im_buffer == 1).reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1])
+
+vec_buffer= np.logical_and(vec_buffer, ~vec_mask)
+
+#vec_buffer = np.ravel_multi_index(z,(im_ms_ps.shape[0], im_ms_ps.shape[1]))
+
+
+kmeans = KMeans(n_clusters=6, random_state=0).fit(vec[vec_buffer,:])
+labels = kmeans.labels_
+labels_full = np.ones((len(vec_mask))) * np.nan
+labels_full[vec_buffer] = labels
+im_labels = labels_full.reshape(im_ms_ps.shape[0], im_ms_ps.shape[1])
+
+plt.figure()
+plt.imshow(im_labels)
+plt.axis('equal')
+plt.draw()
+
+utils.compare_images(im_labels, im_pan)
+
+plt.figure()
+for i in range(6): plt.plot(kmeans.cluster_centers_[i,:])
+plt.draw()
+
+im_sand = im_labels == np.argmax(np.mean(kmeans.cluster_centers_[:,[0,1,2,4]], axis=1))
+
+im_sand2 = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2)
+im_sand3 = morphology.binary_dilation(im_sand2, morphology.disk(1))
+plt.figure()
+plt.imshow(im_sand3)
+plt.draw()
+
+im_ms_ps[im_sand3,0] = 0
+im_ms_ps[im_sand3,1] = 0
+im_ms_ps[im_sand3,2] = 1
+
+
+plt.figure()
+plt.imshow(im_ms_ps[:,:,[2,1,0]])
+plt.axis('image')
+plt.title('Sand classification')
+plt.show()
+
+#%%
--- a/old/oldcodes/compare_shorelines.py
+++ b/old/oldcodes/compare_shorelines.py
--- a/old/oldcodes/compare_shorelines2.py
+++ b/old/oldcodes/compare_shorelines2.py
--- a/old/oldcodes/geocheck.py
+++ b/old/oldcodes/geocheck.py
--- a/old/oldcodes/get_coordinates.py
+++ b/old/oldcodes/get_coordinates.py
--- a/old/oldcodes/l8_calendar_dates.py
+++ b/old/oldcodes/l8_calendar_dates.py
@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+
+
+from datetime import datetime, timedelta
+import pytz
+import csv
+import pandas as pd
+au_tz = pytz.timezone('Australia/Sydney')
+
+dt1 = datetime(2018, 4, 17, tzinfo= au_tz)
+
+dt = []
+dt.append(dt1)
+for i in range(1,100):
+    dt1 = dt[i-1]
+    dt.append(dt1 + timedelta(days=16))
+
+
+dtstr = [_.strftime('%d %b %Y') for _ in dt]
+
+df = pd.DataFrame(dtstr)
+df.to_csv('L7_NARRA_dates.csv', index=False, header=False)
+
--- a/old/oldcodes/plot_cloud_cover.py
+++ b/old/oldcodes/plot_cloud_cover.py
--- a/old/oldcodes/sds_extract.py
+++ b/old/oldcodes/sds_extract.py
--- a/old/oldcodes/sds_extract_allcode.py
+++ b/old/oldcodes/sds_extract_allcode.py
--- a/old/oldcodes/sds_extract_test.py
+++ b/old/oldcodes/sds_extract_test.py
--- a/old/oldcodes/time_coverage.py
+++ b/old/oldcodes/time_coverage.py
--- a/old/olddata/NARRA_output.pkl
+++ b/old/olddata/NARRA_output.pkl
--- a/old/olddata/data_2016.pkl
+++ b/old/olddata/data_2016.pkl
--- a/old/olddata/data_gt_l8.pkl
+++ b/old/olddata/data_gt_l8.pkl
--- a/old/olddata/idx_nogt.pkl
+++ b/old/olddata/idx_nogt.pkl
--- a/old/olddata/wl_final.pkl
+++ b/old/olddata/wl_final.pkl
--- a/old/olddata/wl_l8_comparison.pkl
+++ b/old/olddata/wl_l8_comparison.pkl
--- a/sand_classification/NN_all.pkl
+++ b/sand_classification/NN_all.pkl
--- a/sand_classification/NN_new.pkl
+++ b/sand_classification/NN_new.pkl
--- a/sand_classification/wekaNN/NNdefaultparams.txt
+++ b/sand_classification/wekaNN/NNdefaultparams.txt
@ -0,0 +1,26 @@
+Time taken to build model: 44.94 seconds
+
+=== Stratified cross-validation ===
+=== Summary ===
+
+Correctly Classified Instances       79136               99.2799 %
+Incorrectly Classified Instances       574                0.7201 %
+Kappa statistic                          0.9838
+Mean absolute error                      0.0105
+Root mean squared error                  0.0781
+Relative absolute error                  2.3774 %
+Root relative squared error             16.5717 %
+Total Number of Instances            79710     
+
+=== Detailed Accuracy By Class ===
+
+                 TP Rate  FP Rate  Precision  Recall   F-Measure  MCC      ROC Area  PRC Area  Class
+                 0.995    0.011    0.994      0.995    0.995      0.984    0.998     0.999     0
+                 0.989    0.005    0.990      0.989    0.989      0.984    0.998     0.997     1
+Weighted Avg.    0.993    0.009    0.993      0.993    0.993      0.984    0.998     0.998     
+
+=== Confusion Matrix ===
+
+     a     b   <-- classified as
+ 52960   270 |     a = 0
+   304 26176 |     b = 1
--- a/sand_classification/wekaNN/training_data.csv
+++ b/sand_classification/wekaNN/training_data.csv
--- a/sand_create_train.py
+++ b/sand_create_train.py
@ -0,0 +1,245 @@
+# -*- coding: utf-8 -*-
+
+#==========================================================#
+# Create a training data
+#==========================================================#
+
+# Initial settings
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+import ee
+import pdb
+import time
+import pandas as pd
+# other modules
+from osgeo import gdal, ogr, osr
+import pickle
+import matplotlib.cm as cm
+from pylab import ginput
+
+# image processing modules
+import skimage.filters as filters 
+import skimage.exposure as exposure
+import skimage.transform as transform
+import sklearn.decomposition as decomposition
+import skimage.measure as measure
+import skimage.morphology as morphology
+from scipy import ndimage
+
+# machine learning modules
+from sklearn.model_selection import train_test_split
+from sklearn.neural_network import MLPClassifier
+from sklearn.preprocessing import StandardScaler, Normalizer 
+from sklearn.externals import joblib
+
+# import own modules
+import functions.utils as utils
+import functions.sds as sds
+
+# some settings
+np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
+plt.rcParams['axes.grid'] = True
+plt.rcParams['figure.max_open_warning'] = 100
+ee.Initialize()
+
+# parameters
+cloud_thresh = 0.5      # threshold for cloud cover
+plot_bool = False      # if you want the plots
+prob_high = 99.9        # upper probability to clip and rescale pixel intensity
+min_contour_points = 100# minimum number of points contained in each water line
+output_epsg = 28356     # GDA94 / MGA Zone 56
+buffer_size = 10        # radius (in pixels) of disk for buffer (pixel classification)
+min_beach_size = 50     # number of pixels in a beach (pixel classification)
+
+# load metadata (timestamps and epsg code) for the collection
+satname = 'L8'
+sitename = 'NARRA_all'
+#sitename = 'NARRA'
+#sitename = 'OLDBAR'
+
+# Load metadata
+filepath = os.path.join(os.getcwd(), 'data', satname, sitename)
+
+# path to images
+file_path_pan = os.path.join(os.getcwd(), 'data', satname, sitename, 'pan')
+file_path_ms = os.path.join(os.getcwd(), 'data', satname, sitename, 'ms')
+file_names_pan = os.listdir(file_path_pan)
+file_names_ms = os.listdir(file_path_ms)
+N = len(file_names_pan)
+
+# initialise some variables
+idx_skipped = []
+idx_nocloud = []
+n_features = 10
+train_pos = np.nan*np.ones((1,n_features))
+train_neg = np.nan*np.ones((1,n_features))
+columns = ('B','G','R','NIR','SWIR','Pan','WI','VI','BR', 'mWI', 'SAND')
+
+#%%
+for i in range(N):
+    # read pan image
+    fn_pan = os.path.join(file_path_pan, file_names_pan[i])
+    data = gdal.Open(fn_pan, gdal.GA_ReadOnly)
+    georef = np.array(data.GetGeoTransform())
+    bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]
+    im_pan = np.stack(bands, 2)[:,:,0]
+    # read ms image
+    fn_ms = os.path.join(file_path_ms, file_names_ms[i])
+    data = gdal.Open(fn_ms, gdal.GA_ReadOnly)
+    bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]
+    im_ms = np.stack(bands, 2)
+    # cloud mask
+    im_qa = im_ms[:,:,5]
+    cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool)
+    cloud_mask = transform.resize(cloud_mask, (im_pan.shape[0], im_pan.shape[1]),
+                                order=0, preserve_range=True, 
+                                mode='constant').astype('bool_')    
+    # resize the image using bilinear interpolation (order 1)
+    im_ms = transform.resize(im_ms,(im_pan.shape[0], im_pan.shape[1]),
+                             order=1, preserve_range=True, mode='constant')
+    # check if -inf or nan values and add to cloud mask
+    im_inf = np.isin(im_ms[:,:,0], -np.inf)
+    im_nan = np.isnan(im_ms[:,:,0])
+    cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
+    # skip if cloud cover is more than the threshold
+    cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
+    if cloud_cover > cloud_thresh:
+        print('skip ' + str(i) + ' - cloudy (' + str(cloud_cover) + ')')
+        idx_skipped.append(i)
+        continue
+    idx_nocloud.append(i)
+            
+    # rescale intensities
+    im_ms = sds.rescale_image_intensity(im_ms, cloud_mask, prob_high, plot_bool)
+    im_pan = sds.rescale_image_intensity(im_pan, cloud_mask, prob_high, plot_bool)
+    # pansharpen rgb image
+    im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, plot_bool)
+    nrow = im_ms_ps.shape[0]
+    ncol = im_ms_ps.shape[1]
+    # add down-sized bands for NIR and SWIR (since pansharpening is not possible)
+    im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) 
+    # calculate NDWI
+    im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, plot_bool)
+    # detect edges
+    wl_pix = sds.find_wl_contours(im_ndwi, cloud_mask, min_contour_points, plot_bool)
+    # classify sand pixels with Kmeans
+    im_sand = sds.classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, buffer_size, min_beach_size, plot_bool)
+    
+    # plot a figure to manually select which images to keep
+    im = np.copy(im_ms_ps)
+    im[im_sand,0] = 0
+    im[im_sand,1] = 0
+    im[im_sand,2] = 1
+    plt.figure()
+    plt.imshow(im[:,:,[2,1,0]])
+    plt.axis('image')
+    plt.title('Sand classification')
+    plt.show()
+    mng = plt.get_current_fig_manager()                                         
+    mng.window.showMaximized()
+    plt.tight_layout()   
+    plt.draw()
+    
+    
+    
+    # click a point
+    # top-left quadrant:   keep classif as pos and click somewhere for neg
+    # bottom-left:         keep classif as neg
+    # any right quadrant:  discard image
+    pt_in = np.array(ginput(n=1, timeout=1000))
+    if pt_in[0][0] < im_ms_ps.shape[1]/2:
+        
+        im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features))
+        im_features[:,:,[0,1,2,3,4]] = im_ms_ps
+        im_features[:,:,5] = im_pan
+        im_features[:,:,6] = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False) # (NIR-G)
+        im_features[:,:,7] = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask, False) # (NIR-R)
+        im_features[:,:,8] = sds.nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask, False) # (B-R)
+        im_features[:,:,9] = sds.nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) # (SWIR-G)
+        # win = np.ones((3,3))
+        # im_features[:,:,9] = ndimage.generic_filter(im_features[:,:,5], np.std, footprint=win)
+        # im_features[:,:,10] = ndimage.generic_filter(im_features[:,:,5], np.max, footprint=win) - ndimage.generic_filter(im_features[:,:,5], np.min, footprint=win)
+        
+        if pt_in[0][1] < im_ms_ps.shape[0]/2:
+            # positive examples
+            vec_pos = im_features[im_sand,:]
+            train_pos = np.append(train_pos, vec_pos, axis=0)   
+            
+            # click where negative examples are
+            pt_neg = np.round(np.array(ginput(n=1, timeout=1000))[0])
+            radius = int(round(np.sqrt(sum(sum(im_sand)))))
+            idx_rows = np.linspace(0,radius-1,radius).astype(int) + int(pt_neg[1])
+            idx_cols = np.linspace(0,radius-1,radius).astype(int) + int(pt_neg[0])
+            xx, yy = np.meshgrid(idx_rows,idx_cols, indexing='ij')
+            row_neg = xx.reshape(radius*radius)
+            col_neg = yy.reshape(radius*radius)
+            im_nosand = np.zeros((nrow,ncol)).astype(bool)
+            for i in range(len(row_neg)): 
+                im_nosand[row_neg[i],col_neg[i]] = True
+                im_ms_ps[row_neg[i],col_neg[i],0] = 1
+                im_ms_ps[row_neg[i],col_neg[i],1] = 1
+                im_ms_ps[row_neg[i],col_neg[i],2] = 0
+            plt.imshow(im_ms_ps[:,:,[2,1,0]])
+            plt.draw()
+        
+            # negative examples
+            vec_neg = im_features[im_nosand,:]
+            train_neg = np.append(train_neg, vec_neg, axis=0)
+            
+        else:
+            # negative examples
+            vec_neg = im_features[im_sand,:]
+            train_neg = np.append(train_neg, vec_neg, axis=0)  
+
+    else:
+        print('skip ' + str(i))
+        idx_skipped.append(i)
+        
+# format data 
+train_pos = train_pos[1:,:]
+train_neg = train_neg[1:,:]
+n_pos = len(train_pos)
+n_neg = len(train_neg)
+training_data = np.zeros((n_pos+n_neg, n_features+1))
+training_data[:n_pos,:n_features] = train_pos
+training_data[n_pos:n_pos+n_neg,:n_features] = train_neg
+training_data[:n_pos,n_features] = np.ones((n_pos))
+df_train = pd.DataFrame(training_data, columns=columns)
+df_train.dropna(axis=0, how='any', inplace=True)
+sand_train = np.array(df_train)
+
+# save data
+#with open(os.path.join(filepath, sitename + '_sand_idxskip' + '.pkl'), 'wb') as f:
+#    pickle.dump(idx_skipped, f)
+#with open(os.path.join(filepath, sitename + '_sand_train' + '.pkl'), 'wb') as f:
+#    pickle.dump(sand_train, f)
+#df_train.to_csv('training_data.csv')  
+
+#%% Train neural network on data
+
+# load training data
+with open(os.path.join(filepath, sitename + '_sand_train' + '.pkl'), 'rb') as f:
+    sand_train = pickle.load(f)
+    
+n_features = sand_train.shape[1] - 1
+X = sand_train[:,0:n_features]
+y = sand_train[:,n_features]
+
+# divide in train and test
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
+
+#scaler = StandardScaler()
+#scaler.fit(X_train)
+#X_train = scaler.transform(X_train)
+#X_test = scaler.transform(X_test)
+
+# run NN on train dat and evaluate on test data
+clf = MLPClassifier()
+clf.fit(X_train,y_train)
+clf.score(X_test,y_test)
+
+# save NN model
+joblib.dump(clf, os.path.join(os.getcwd(), 'sand_classification', 'NN_small.pkl'))
+
+
--- a/sand_runNN.py
+++ b/sand_runNN.py
@ -0,0 +1,176 @@
+# -*- coding: utf-8 -*-
+
+#==========================================================#
+# Run Neural Network on image to extract sandy pixels
+#==========================================================#
+
+# Initial settings
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+import ee
+import pdb
+import time
+import pandas as pd
+# other modules
+from osgeo import gdal, ogr, osr
+import pickle
+import matplotlib.cm as cm
+from pylab import ginput
+
+# image processing modules
+import skimage.filters as filters 
+import skimage.exposure as exposure
+import skimage.transform as transform
+import sklearn.decomposition as decomposition
+import skimage.measure as measure
+import skimage.morphology as morphology
+from scipy import ndimage
+
+# machine learning modules
+from sklearn.model_selection import train_test_split
+from sklearn.neural_network import MLPClassifier
+from sklearn.preprocessing import StandardScaler, Normalizer 
+from sklearn.externals import joblib
+
+# import own modules
+import functions.utils as utils
+import functions.sds as sds
+
+# some settings
+np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
+plt.rcParams['axes.grid'] = True
+plt.rcParams['figure.max_open_warning'] = 100
+ee.Initialize()
+
+# parameters
+cloud_thresh = 0.5      # threshold for cloud cover
+plot_bool = False      # if you want the plots
+prob_high = 100        # upper probability to clip and rescale pixel intensity
+min_contour_points = 100# minimum number of points contained in each water line
+output_epsg = 28356     # GDA94 / MGA Zone 56
+buffer_size = 10        # radius (in pixels) of disk for buffer (pixel classification)
+min_beach_size = 50     # number of pixels in a beach (pixel classification)
+
+# load metadata (timestamps and epsg code) for the collection
+satname = 'L8'
+sitename = 'NARRA_all'
+#sitename = 'NARRA'
+#sitename = 'OLDBAR'
+
+# Load metadata
+filepath = os.path.join(os.getcwd(), 'data', satname, sitename)
+
+# path to images
+file_path_pan = os.path.join(os.getcwd(), 'data', satname, sitename, 'pan')
+file_path_ms = os.path.join(os.getcwd(), 'data', satname, sitename, 'ms')
+file_names_pan = os.listdir(file_path_pan)
+file_names_ms = os.listdir(file_path_ms)
+N = len(file_names_pan)
+
+# initialise some variables
+idx_skipped = []
+idx_nocloud = []
+n_features = 10
+train_pos = np.nan*np.ones((1,n_features))
+train_neg = np.nan*np.ones((1,n_features))
+columns = ('B','G','R','NIR','SWIR','Pan','WI','VI','BR', 'mWI', 'SAND')
+
+clf = joblib.load(os.path.join(os.getcwd(), 'sand_classification', 'NN_new.pkl'))
+#%%
+for i in range(N):
+    # read pan image
+    fn_pan = os.path.join(file_path_pan, file_names_pan[i])
+    data = gdal.Open(fn_pan, gdal.GA_ReadOnly)
+    georef = np.array(data.GetGeoTransform())
+    bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]
+    im_pan = np.stack(bands, 2)[:,:,0]
+    # read ms image
+    fn_ms = os.path.join(file_path_ms, file_names_ms[i])
+    data = gdal.Open(fn_ms, gdal.GA_ReadOnly)
+    bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]
+    im_ms = np.stack(bands, 2)
+    # cloud mask
+    im_qa = im_ms[:,:,5]
+    cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool)
+    cloud_mask = transform.resize(cloud_mask, (im_pan.shape[0], im_pan.shape[1]),
+                                order=0, preserve_range=True, 
+                                mode='constant').astype('bool_')    
+    # resize the image using bilinear interpolation (order 1)
+    im_ms = transform.resize(im_ms,(im_pan.shape[0], im_pan.shape[1]),
+                             order=1, preserve_range=True, mode='constant')
+    # check if -inf or nan values and add to cloud mask
+    im_inf = np.isin(im_ms[:,:,0], -np.inf)
+    im_nan = np.isnan(im_ms[:,:,0])
+    cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
+    # skip if cloud cover is more than the threshold
+    cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
+    if cloud_cover > cloud_thresh:
+        print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)')
+        idx_skipped.append(i)
+        continue
+    idx_nocloud.append(i)
+            
+    # rescale intensities
+    im_ms = sds.rescale_image_intensity(im_ms, cloud_mask, prob_high, plot_bool)
+    im_pan = sds.rescale_image_intensity(im_pan, cloud_mask, prob_high, plot_bool)
+    # pansharpen rgb image
+    im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, plot_bool)
+    nrow = im_ms_ps.shape[0]
+    ncol = im_ms_ps.shape[1]
+    # add down-sized bands for NIR and SWIR (since pansharpening is not possible)
+    im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) 
+    # calculate NDWI
+    im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, plot_bool)
+    # detect edges
+    wl_pix = sds.find_wl_contours(im_ndwi, cloud_mask, min_contour_points, plot_bool)
+    # classify sand pixels with Kmeans
+    im_sand = sds.classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, buffer_size, min_beach_size, plot_bool)
+        
+    # calculate features
+    im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features))
+    im_features[:,:,[0,1,2,3,4]] = im_ms_ps
+    im_features[:,:,5] = im_pan
+    im_features[:,:,6] = im_ndwi
+    im_features[:,:,7] = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask, False) # ND(NIR-R)
+    im_features[:,:,8] = sds.nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask, False) # ND(B-R)
+    im_features[:,:,9] = sds.nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) # (SWIR-G)
+    # remove NaNs and clouds
+    vec = im_features.reshape((im_ms_ps.shape[0] * im_ms_ps.shape[1], n_features))
+    vec_cloud = cloud_mask.reshape(cloud_mask.shape[0]*cloud_mask.shape[1])
+    vec_nan = np.any(np.isnan(vec), axis=1)
+    vec_mask = np.logical_or(vec_cloud, vec_nan)
+    vec = vec[~vec_mask, :]
+    # predict with NN
+    y = clf.predict(vec)
+    # recompose image
+    vec_new = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1])) 
+    vec_new[~vec_mask] = y
+    im_classif = vec_new.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1]))
+    im_classif = im_classif.astype(bool)
+    im_classif = morphology.remove_small_objects(im_classif, min_size=min_beach_size, connectivity=2)
+    
+    # make comparison plot between NN and Kmeans
+    im1 = np.copy(im_ms_ps)
+    im1[im_classif,0] = 0
+    im1[im_classif,1] = 0
+    im1[im_classif,2] = 1
+    
+    im2 = np.copy(im_ms_ps)
+    im2[im_sand,0] = 0
+    im2[im_sand,1] = 0
+    im2[im_sand,2] = 1
+    
+    plt.figure()
+    ax1 = plt.subplot(121)
+    plt.imshow(im1[:,:,[2,1,0]])
+    plt.axis('off')
+    plt.title('NN')
+    ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
+    plt.imshow(im2[:,:,[2,1,0]])
+    plt.axis('off')
+    plt.title('Kmeans')
+    mng = plt.get_current_fig_manager()                                         
+    mng.window.showMaximized()
+    plt.tight_layout()   
+    plt.draw()
--- a/training_data.csv
+++ b/training_data.csv