Code to download and read sentinel 2 images

8 years ago · f8e1397412
parent 62f5c5330f
commit f8e1397412
2 changed files with 35 additions and 665 deletions
--- a/download_images.py
+++ b/download_images.py
@ -35,20 +35,14 @@ ee.Initialize()
 #==========================================================#

 ## location (Narrabeen-Collaroy beach)
-#polygon = [[[151.301454, -33.700754],
-#               [151.311453, -33.702075], 
-#               [151.307237, -33.739761],
-#               [151.294220, -33.736329],
-#               [151.301454, -33.700754]]]; 
+sitename = 'NARRA'
+polygon = [[[151.301454, -33.700754],
+           [151.311453, -33.702075], 
+           [151.307237, -33.739761],
+           [151.294220, -33.736329],
+           [151.301454, -33.700754]]]; 
+

-# location (Tairua beach)
-sitename = 'TAIRUA'
-polygon = [[[175.835574, -36.982022],
-               [175.888220, -36.980680], 
-               [175.893527, -37.029610],
-               [175.833444, -37.031767],
-               [175.835574, -36.982022]]];
-            
 # initialise metadata dictionnary (stores timestamps and georefencing accuracy of each image)       
 metadata = dict([])

@ -57,232 +51,6 @@ try:
    os.makedirs(os.path.join(os.getcwd(), 'data',sitename))
 except:
    print('directory already exists')
-        
-
-#%%
-#==========================================================#
-#==========================================================#
-# L5
-#==========================================================#
-#==========================================================#
-
-
-
-# define filenames for images
-suffix = '.tif'
-filepath = os.path.join(os.getcwd(), 'data', sitename, 'L5', '30m')
-try:
-    os.makedirs(filepath)
-except:
-    print('directory already exists')
-    
-#==========================================================#
-# Select L5 collection 
-#==========================================================#
-
-satname = 'L5'
-input_col = ee.ImageCollection('LANDSAT/LT05/C01/T1_TOA')
-
-# filter by location
-flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
-n_img = flt_col.size().getInfo()
-print('Number of images covering ' + sitename, n_img)
-im_all = flt_col.getInfo().get('features')
-
-
-#==========================================================#
-# Main loop trough images
-#==========================================================#
-
-timestamps = []
-acc_georef = []
-all_names = []
-for i in range(n_img):
-    
-    # find each image in ee database
-    im = ee.Image(im_all[i].get('id'))
-
-    im_dic = im.getInfo()
-    im_bands = im_dic.get('bands')
-    t = im_dic['properties']['system:time_start']
-    im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
-    timestamps.append(im_timestamp)
-    im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
-    im_epsg = int(im_dic['bands'][0]['crs'][5:])
-    try:
-        acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
-    except:
-        acc_georef.append(12)   
-        print('No geometric rmse model property')
-    
-    # delete dimensions key from dictionnary, otherwise the entire image is extracted
-    for j in range(len(im_bands)): del im_bands[j]['dimensions']
-    
-    # bands for L5
-    ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[7]]
-    
-    # filenames
-    filename = im_date + '_' + satname + '_' + sitename + suffix
-
-    print(i)
-    if any(filename in _ for _ in all_names):
-        filename = im_date + '_' + satname + '_' + sitename + '_dup' + suffix 
-    all_names.append(filename)
-    
-    local_data = sds.download_tif(im, polygon, ms_bands, filepath)
-    os.rename(local_data, os.path.join(filepath, filename))
-
-# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
-timestamps_sorted = sorted(timestamps)
-idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
-acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
-
-metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg}
-
-#%%
-#==========================================================#
-#==========================================================#
-# L7&L8
-#==========================================================#
-#==========================================================#
-    
-
-
-# define filenames for images
-suffix = '.tif'
-filepath = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8')
-filepath_pan = os.path.join(filepath, 'pan')
-filepath_ms = os.path.join(filepath, 'ms')
-try:
-    os.makedirs(filepath_pan)
-    os.makedirs(filepath_ms)
-except:
-    print('directory already exists')           
-
-#==========================================================#
-# Select L7 collection 
-#==========================================================#
-     
-satname = 'L7'   
-input_col = ee.ImageCollection('LANDSAT/LE07/C01/T1_RT_TOA')
-
-# filter by location
-flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
-n_img = flt_col.size().getInfo()
-print('Number of images covering ' + sitename, n_img)
-im_all = flt_col.getInfo().get('features')
-
-#==========================================================#
-# Main loop trough images
-#==========================================================#
-
-timestamps = []
-acc_georef = []
-all_names = []
-for i in range(n_img):
-    
-    # find each image in ee database
-    im = ee.Image(im_all[i].get('id'))
-    
-    im_dic = im.getInfo()
-    im_bands = im_dic.get('bands')
-    t = im_dic['properties']['system:time_start']
-    im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
-    timestamps.append(im_timestamp)
-    im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
-    im_epsg = int(im_dic['bands'][0]['crs'][5:])
-    try:
-        acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
-    except:
-        acc_georef.append(12)   
-        print('No geometric rmse model property')
-    
-    # delete dimensions key from dictionnary, otherwise the entire image is extracted
-    for j in range(len(im_bands)): del im_bands[j]['dimensions']
-    
-    # bands for L7
-    pan_band = [im_bands[8]]
-    ms_bands = [im_bands[0], im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[9]]
-    
-    # filenames
-    filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
-    filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
-
-    print(i)
-    if any(filename_pan in _ for _ in all_names):
-        filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
-        filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix 
-    all_names.append(filename_pan)
-    
-    local_data_pan = sds.download_tif(im, polygon, pan_band, filepath_pan)
-    os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
-    local_data_ms = sds.download_tif(im, polygon, ms_bands, filepath_ms)
-    os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
-
-#==========================================================#
-# Select L8 collection 
-#==========================================================#
-    
-satname = 'L8'
-input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
-
-# filter by location
-flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon))
-n_img = flt_col.size().getInfo()
-print('Number of images covering Narrabeen:', n_img)
-im_all = flt_col.getInfo().get('features')
-
-#==========================================================#
-# Main loop trough images
-#==========================================================#
-
-for i in range(n_img):
-    
-    # find each image in ee database
-    im = ee.Image(im_all[i].get('id'))
-    
-    im_dic = im.getInfo()
-    im_bands = im_dic.get('bands')
-    t = im_dic['properties']['system:time_start']
-    im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
-    timestamps.append(im_timestamp)
-    im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
-    im_epsg = int(im_dic['bands'][0]['crs'][5:])
-    try:
-        acc_georef.append(im_dic['properties']['GEOMETRIC_RMSE_MODEL'])
-    except:
-        acc_georef.append(12)   
-        print('No geometric rmse model property')
-    
-    # delete dimensions key from dictionnary, otherwise the entire image is extracted
-    for j in range(len(im_bands)): del im_bands[j]['dimensions']
-    
-    # bands for L8    
-    pan_band = [im_bands[7]]
-    ms_bands = [im_bands[1], im_bands[2], im_bands[3], im_bands[4], im_bands[5], im_bands[11]]
-
-    # filenames
-    filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + suffix
-    filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + suffix
-
-    print(i)
-    if any(filename_pan in _ for _ in all_names):
-        filename_pan = im_date + '_' + satname + '_' + sitename + '_pan' + '_dup' + suffix
-        filename_ms = im_date + '_' + satname + '_' + sitename + '_ms' + '_dup' + suffix 
-    all_names.append(filename_pan)
-    
-    local_data_pan = sds.download_tif(im, polygon, pan_band, filepath_pan)
-    os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
-    local_data_ms = sds.download_tif(im, polygon, ms_bands, filepath_ms)
-    os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
-
-
-# sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
-timestamps_sorted = sorted(timestamps)
-idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
-acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
-
-metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg}

 #%%
 #==========================================================#
@ -323,6 +91,7 @@ im_all = flt_col.getInfo().get('features')
 timestamps = []
 acc_georef = []
 all_names = []
+im_epsg = []
 for i in range(n_img):
    
    # find each image in ee database
@ -333,15 +102,6 @@ for i in range(n_img):
    t = im_dic['properties']['system:time_start']
    im_timestamp = datetime.fromtimestamp(t/1000, tz=pytz.utc)
    im_date = im_timestamp.strftime('%Y-%m-%d-%H-%M-%S')
-    timestamps.append(im_timestamp)
-    im_epsg = int(im_dic['bands'][0]['crs'][5:])
-    try:
-        if im_dic['properties']['GEOMETRIC_QUALITY_FLAG'] == 'PASSED':
-            acc_georef.append(1)
-        else:
-            acc_georef.append(0)
-    except:
-        acc_georef.append(0)
    
    # delete dimensions key from dictionnary, otherwise the entire image is extracted
    for j in range(len(im_bands)): del im_bands[j]['dimensions']
@ -358,9 +118,7 @@ for i in range(n_img):
    
    print(i)
    if any(filename10 in _ for _ in all_names):
-        filename10 = im_date + '_' + satname + '_' + sitename + '_' + '10m' + '_dup' + suffix
-        filename20 = im_date + '_' + satname + '_' + sitename + '_' + '20m' + '_dup' + suffix
-        filename60 = im_date + '_' + satname + '_' + sitename + '_' + '60m' + '_dup' + suffix
+        continue
    all_names.append(filename10)
    
    local_data = sds.download_tif(im, polygon, bands10, filepath)
@ -372,12 +130,24 @@ for i in range(n_img):
    local_data = sds.download_tif(im, polygon, bands60, filepath)
    os.rename(local_data, os.path.join(filepath, '60m', filename60))
    
+    timestamps.append(im_timestamp)
+    im_epsg.append(int(im_dic['bands'][0]['crs'][5:]))
+    try:
+        if im_dic['properties']['GEOMETRIC_QUALITY_FLAG'] == 'PASSED':
+            acc_georef.append(1)
+        else:
+            acc_georef.append(0)
+    except:
+        acc_georef.append(0)
+    
+
 # sort timestamps and georef accuracy (dowloaded images are sorted by date in directory)
 timestamps_sorted = sorted(timestamps)
 idx_sorted = sorted(range(len(timestamps)), key=timestamps.__getitem__)
 acc_georef_sorted = [acc_georef[j] for j in idx_sorted]
+im_epsg_sorted = [im_epsg[j] for j in idx_sorted]

-metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg}
+metadata[satname] = {'dates':timestamps_sorted, 'acc_georef':acc_georef_sorted, 'epsg':im_epsg_sorted}



--- a/read_images.py
+++ b/read_images.py
@ -59,8 +59,6 @@ buffer_size = 7         # radius (in pixels) of disk for buffer (pixel classific
 min_beach_size = 20     # number of pixels in a beach (pixel classification)
 dist_ref = 100          # maximum distance from reference point
 min_length_wl = 200     # minimum length of shoreline LineString to be kept 
-manual_bool = True      # to manually check images
-

 output = dict([])

@ -71,16 +69,15 @@ output = dict([])
 filepath = os.path.join(os.getcwd(), 'data', sitename)
 with open(os.path.join(filepath, sitename + '_metadata' + '.pkl'), 'rb') as f:
    metadata = pickle.load(f)
-
-
-#%%    
+    
+#%%
 #==========================================================#
 # Read S2 images
 #==========================================================#

 satname = 'S2'
 dates = metadata[satname]['dates']
-input_epsg = 32756 # metadata[satname]['epsg']
+input_epsg = metadata[satname]['epsg']

 # path to images
 filepath10 = os.path.join(os.getcwd(), 'data', sitename, satname, '10m')
@ -163,427 +160,30 @@ for i in range(N):
    # rescale image intensity for display purposes
    im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False)
    
-    # classify image in 4 classes (sand, whitewater, water, other) with NN classifier
-    im_classif, im_labels = sds.classify_image_NN_nopan(im_ms, cloud_mask, min_beach_size, plot_bool)
-    
-    # if there aren't any sandy pixels
-    if sum(sum(im_labels[:,:,0])) == 0 :
-        # use global threshold
-        im_ndwi = sds.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask, plot_bool)
-        contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool)
-    else:
-        # use specific threhsold
-        contours_wi, contours_mwi = sds.find_wl_contours2(im_ms, im_labels, cloud_mask, buffer_size, plot_bool)
-
-    # convert from pixels to world coordinates
-    wl_coords = sds.convert_pix2world(contours_mwi, georef)
-    # convert to output epsg spatial reference
-    wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg)
-    
-    # remove contour lines that have a perimeter < min_length_wl
-    wl_good = []
-    for l, wls in enumerate(wl):
-        coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))]
-        a = LineString(coords) # shapely LineString structure
-        if a.length >= min_length_wl:
-            wl_good.append(wls)
-
-    # format points and only select the ones close to the refpoints
-    x_points = np.array([])
-    y_points = np.array([])
-    for k in range(len(wl_good)):
-        x_points = np.append(x_points,wl_good[k][:,0])
-        y_points = np.append(y_points,wl_good[k][:,1])
-    wl_good = np.transpose(np.array([x_points,y_points]))
-    temp = np.zeros((len(wl_good))).astype(bool)
-    for k in range(len(refpoints)): 
-        temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp) 
-    wl_final = wl_good[temp]
-    
-
-    # plot output
+    # plot rgb image
    plt.figure()
-    im = np.copy(im_display)
-    colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
-    for k in range(0,im_labels.shape[2]):
-        im[im_labels[:,:,k],0] = colours[k,0]
-        im[im_labels[:,:,k],1] = colours[k,1]
-        im[im_labels[:,:,k],2] = colours[k,2]
-    plt.imshow(im)
-    for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--')
-    plt.title(satname + '     ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + '      acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' )
-    plt.draw()
-    
-    pt_in = np.array(ginput(n=1, timeout=1000))
-    plt.close()
+    plt.axis('off')
+    plt.imshow(im_display)
    
-    # if image is rejected, skip it
-    if pt_in[0][1] > nrows/2:
-        print('skip ' + str(i) + ' - rejected')
-        idx_skipped.append(i)
-        continue
+    # classify image in 4 classes (sand, whitewater, water, other) with NN classifier
+    im_classif, im_labels = sds.classify_image_NN_nopan(im_ms, cloud_mask, min_beach_size, plot_bool)
        
-    # if accepted, store the data
+    # store the data
    cloud_cover_ts.append(cloud_cover)
    acc_georef_ts.append(metadata[satname]['acc_georef'][i])
-    
    filename_ts.append(filenames10[i])
    satname_ts.append(satname)
    date_acquired_ts.append(filenames10[i][:10])
-
    timestamp.append(metadata[satname]['dates'][i])
-    shorelines.append(wl_final)

 # store in output structure
-output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped,
+output[satname] = {'dates':timestamp, 'idx_skipped':idx_skipped,
      'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts,
                  'acc_georef':acc_georef_ts}}
-del idx_skipped
-
-
-#%%
-#==========================================================#
-# Read L7&L8 images
-#==========================================================#
-
-satname = 'L8'
-dates = metadata[satname]['dates']
-input_epsg = 32656 # metadata[satname]['epsg']
-
-# path to images
-filepath_pan = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8', 'pan')
-filepath_ms = os.path.join(os.getcwd(), 'data', sitename, 'L7&L8', 'ms')
-filenames_pan = os.listdir(filepath_pan)
-filenames_ms = os.listdir(filepath_ms)
-if (not len(filenames_pan) == len(filenames_ms)):
-    raise 'error: not the same amount of files for pan and ms'
-N = len(filenames_pan)
-
-# initialise variables
-cloud_cover_ts = []
-acc_georef_ts = []
-date_acquired_ts = []
-filename_ts = []
-satname_ts = []
-timestamp = []
-shorelines = []
-idx_skipped = []
-
- 
-spacing = '=========================================================='
-msg = ' %s\n %s\n %s' % (spacing, satname, spacing)
-print(msg)
-
-for i in range(N):
-    
-    # get satellite name
-    sat = filenames_pan[i][20:22]
-    
-    # read pan image
-    fn_pan = os.path.join(filepath_pan, filenames_pan[i])
-    data = gdal.Open(fn_pan, gdal.GA_ReadOnly)
-    georef = np.array(data.GetGeoTransform())
-    bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
-    im_pan = np.stack(bands, 2)[:,:,0]
-    nrows = im_pan.shape[0]
-    ncols = im_pan.shape[1]
-
-    # read ms image
-    fn_ms = os.path.join(filepath_ms, filenames_ms[i])
-    data = gdal.Open(fn_ms, gdal.GA_ReadOnly)
-    bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
-    im_ms = np.stack(bands, 2)
-    
-    # cloud mask
-    im_qa = im_ms[:,:,5]
-    cloud_mask = sds.create_cloud_mask(im_qa, sat, plot_bool)
-    cloud_mask = transform.resize(cloud_mask, (nrows, ncols), order=0, preserve_range=True, mode='constant').astype('bool_')    
-    # resize the image using bilinear interpolation (order 1)
-    im_ms = im_ms[:,:,:5]
-    im_ms = transform.resize(im_ms,(nrows, ncols), order=1, preserve_range=True, mode='constant')
-    
-    # check if -inf or nan values on any band and add to cloud mask
-    for k in range(im_ms.shape[2]+1): 
-        if k == 5:
-            im_inf = np.isin(im_pan, -np.inf)
-            im_nan = np.isnan(im_pan)        
-        else:  
-            im_inf = np.isin(im_ms[:,:,k], -np.inf)
-            im_nan = np.isnan(im_ms[:,:,k])
-        cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
-
-    # calculate cloud cover and skip image if above threshold
-    cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
-    if cloud_cover > cloud_thresh:
-        print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)')
-        idx_skipped.append(i)
-        continue
-    
-    # Pansharpen image (different for L8 and L7)
-    if sat == 'L7':
-        # pansharpen (Green, Red, NIR) and downsample Blue and SWIR1
-        im_ms_ps = sds.pansharpen(im_ms[:,:,[1,2,3]], im_pan, cloud_mask, plot_bool)
-        im_ms_ps = np.append(im_ms[:,:,[0]], im_ms_ps, axis=2)
-        im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[4]], axis=2)
-        im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False)
-    elif sat == 'L8':
-        # pansharpen RGB image and downsample NIR and SWIR1
-        im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, plot_bool)
-        im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2) 
-        im_display = sds.rescale_image_intensity(im_ms_ps[:,:,[2,1,0]], cloud_mask, 99.9, False)
-
-    # classify image in 4 classes (sand, whitewater, water, other) with NN classifier
-    im_classif, im_labels = sds.classify_image_NN(im_ms_ps, im_pan, cloud_mask, min_beach_size, plot_bool)
-    
-    # if there aren't any sandy pixels
-    if sum(sum(im_labels[:,:,0])) == 0 :
-        # use global threshold
-        im_ndwi = sds.nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, plot_bool)
-        contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool)
-    else:
-        # use specific threhsold
-        contours_wi, contours_mwi = sds.find_wl_contours2(im_ms_ps, im_labels, cloud_mask, buffer_size, plot_bool)
-    
-    # convert from pixels to world coordinates
-    wl_coords = sds.convert_pix2world(contours_mwi, georef)
-    # convert to output epsg spatial reference
-    wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg)
-    
-    # remove contour lines that have a perimeter < min_length_wl
-    wl_good = []
-    for l, wls in enumerate(wl):
-        coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))]
-        a = LineString(coords) # shapely LineString structure
-        if a.length >= min_length_wl:
-            wl_good.append(wls)   
-    
-    # format points and only select the ones close to the refpoints
-    x_points = np.array([])
-    y_points = np.array([])
-    for k in range(len(wl_good)):
-        x_points = np.append(x_points,wl_good[k][:,0])
-        y_points = np.append(y_points,wl_good[k][:,1])
-    wl_good = np.transpose(np.array([x_points,y_points]))
-    temp = np.zeros((len(wl_good))).astype(bool)
-    for k in range(len(refpoints)): 
-        temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp) 
-    wl_final = wl_good[temp]   
-        
-    # plot output
-    plt.figure()
-    plt.subplot(121)
-    im = np.copy(im_display)
-    colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
-    for k in range(0,im_labels.shape[2]):
-        im[im_labels[:,:,k],0] = colours[k,0]
-        im[im_labels[:,:,k],1] = colours[k,1]
-        im[im_labels[:,:,k],2] = colours[k,2]
-    plt.imshow(im)
-    for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--')
-    plt.title(sat + '     ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + '      acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' )
-
-    pt_in = np.array(ginput(n=1, timeout=1000))
-    plt.close()
-    
-    # if image is rejected, skip it
-    if pt_in[0][1] > nrows/2:
-        print('skip ' + str(i) + ' - rejected')
-        idx_skipped.append(i)
-        continue
-        
-    # if accepted, store the data
-    cloud_cover_ts.append(cloud_cover)
-    acc_georef_ts.append(metadata[satname]['acc_georef'][i])
-    
-    filename_ts.append(filenames_pan[i])
-    satname_ts.append(sat)
-    date_acquired_ts.append(filenames_pan[i][:10])
-
-    timestamp.append(metadata[satname]['dates'][i])
-    shorelines.append(wl_final)
-
-# store in output structure
-output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped,
-      'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts,
-                  'acc_georef':acc_georef_ts}}    
-
-del idx_skipped
-
-
-
-#%%
-#==========================================================#
-# Read L5 images
-#==========================================================#
-
-satname = 'L5'
-dates = metadata[satname]['dates']
-input_epsg = 32656 # metadata[satname]['epsg']
-
-# path to images
-filepath_img = os.path.join(os.getcwd(), 'data', sitename, satname, '30m')
-filenames = os.listdir(filepath_img)
-N = len(filenames)
-
-# initialise variables
-cloud_cover_ts = []
-acc_georef_ts = []
-date_acquired_ts = []
-filename_ts = []
-satname_ts = []
-timestamp = []
-shorelines = []
-idx_skipped = []
-

-spacing = '=========================================================='
-msg = ' %s\n %s\n %s' % (spacing, satname, spacing)
-print(msg)
-
-for i in range(N):
-    
-    # read ms image
-    fn = os.path.join(filepath_img, filenames[i])
-    data = gdal.Open(fn, gdal.GA_ReadOnly)
-    georef = np.array(data.GetGeoTransform())
-    bands = [data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)]
-    im_ms = np.stack(bands, 2)
-
-    # down-sample to half hte original pixel size
-    nrows = im_ms.shape[0]*2
-    ncols = im_ms.shape[1]*2
-    
-    # cloud mask
-    im_qa = im_ms[:,:,5]
-    im_ms = im_ms[:,:,:-1]
-    cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool)
-    cloud_mask = transform.resize(cloud_mask, (nrows, ncols), order=0, preserve_range=True, mode='constant').astype('bool_')
-    
-    # resize the image using bilinear interpolation (order 1)
-    im_ms = transform.resize(im_ms,(nrows, ncols), order=1, preserve_range=True, mode='constant')
-    
-    # adjust georef vector (scale becomes 15m and origin is adjusted to the center of new corner pixel)
-    georef[1] = 15
-    georef[5] = -15
-    georef[0] = georef[0] + 7.5
-    georef[3] = georef[3] - 7.5
-    
-    # check if -inf or nan values on any band and add to cloud mask
-    for k in range(im_ms.shape[2]):   
-        im_inf = np.isin(im_ms[:,:,k], -np.inf)
-        im_nan = np.isnan(im_ms[:,:,k])
-        cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
-        
-    # calculate cloud cover and skip image if above threshold
-    cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
-    if cloud_cover > cloud_thresh:
-        print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)')
-        idx_skipped.append(i)
-        continue
-    
-    # rescale image intensity for display purposes
-    im_display = sds.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9, False)
-    
-    # classify image in 4 classes (sand, whitewater, water, other) with NN classifier
-    im_classif, im_labels = sds.classify_image_NN_nopan(im_ms, cloud_mask, min_beach_size, plot_bool)
-        
-    # if there aren't any sandy pixels
-    if sum(sum(im_labels[:,:,0])) == 0 :
-        # use global threshold
-        im_ndwi = sds.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask, plot_bool)
-        contours = sds.find_wl_contours(im_ndwi, cloud_mask, plot_bool)
-    else:
-        # use specific threhsold
-        contours_wi, contours_mwi = sds.find_wl_contours2(im_ms, im_labels, cloud_mask, buffer_size, plot_bool)
-
-    # convert from pixels to world coordinates
-    wl_coords = sds.convert_pix2world(contours_mwi, georef)
-    # convert to output epsg spatial reference
-    wl = sds.convert_epsg(wl_coords, input_epsg, output_epsg)
-    
-    # remove contour lines that have a perimeter < min_length_wl
-    wl_good = []
-    for l, wls in enumerate(wl):
-        coords = [(wls[k,0], wls[k,1]) for k in range(len(wls))]
-        a = LineString(coords) # shapely LineString structure
-        if a.length >= min_length_wl:
-            wl_good.append(wls)
-
-    # format points and only select the ones close to the refpoints
-    x_points = np.array([])
-    y_points = np.array([])
-    for k in range(len(wl_good)):
-        x_points = np.append(x_points,wl_good[k][:,0])
-        y_points = np.append(y_points,wl_good[k][:,1])
-    wl_good = np.transpose(np.array([x_points,y_points]))
-    temp = np.zeros((len(wl_good))).astype(bool)
-    for k in range(len(refpoints)): 
-        temp = np.logical_or(np.linalg.norm(wl_good - refpoints[k,[0,1]], axis=1) < dist_ref, temp) 
-    wl_final = wl_good[temp]
-    
-    # plot output
-    plt.figure()
-    plt.subplot(121)
-    im = np.copy(im_display)
-    colours = np.array([[1,128/255,0/255],[204/255,1,1],[0,0,204/255]])
-    for k in range(0,im_labels.shape[2]):
-        im[im_labels[:,:,k],0] = colours[k,0]
-        im[im_labels[:,:,k],1] = colours[k,1]
-        im[im_labels[:,:,k],2] = colours[k,2]
-    plt.imshow(im)
-    for k,contour in enumerate(contours_mwi): plt.plot(contour[:, 1], contour[:, 0], linewidth=2, color='k', linestyle='--')
-    plt.title(satname + '     ' + metadata[satname]['dates'][i].strftime('%Y-%m-%d') + '      acc : ' + str(metadata[satname]['acc_georef'][i]) + ' m' )
-    plt.subplot(122)
-    plt.axis('equal')
-    plt.axis('off')
-    plt.plot(refpoints[:,0], refpoints[:,1], 'k.')
-    plt.plot(wl_final[:,0], wl_final[:,1], 'r.')
-    mng = plt.get_current_fig_manager()                                         
-    mng.window.showMaximized()
-    plt.tight_layout()
-    plt.draw()
-
-    pt_in = np.array(ginput(n=1, timeout=1000))
-    plt.close()
-    
-    # if image is rejected, skip it
-    if pt_in[0][1] > nrows/2:
-        print('skip ' + str(i) + ' - rejected')
-        idx_skipped.append(i)
-        continue
-        
-    # if accepted, store the data
-    cloud_cover_ts.append(cloud_cover)
-    acc_georef_ts.append(metadata[satname]['acc_georef'][i])
-    
-    filename_ts.append(filenames[i])
-    satname_ts.append(satname)
-    date_acquired_ts.append(filenames[i][:10])
-
-    timestamp.append(metadata[satname]['dates'][i])
-    shorelines.append(wl_final)
-
-# store in output structure
-output[satname] = {'dates':timestamp, 'shorelines':shorelines, 'idx_skipped':idx_skipped,
-      'metadata':{'filenames':filename_ts, 'satname':satname_ts, 'cloud_cover':cloud_cover_ts,
-                  'acc_georef':acc_georef_ts}}    
-
-del idx_skipped
+# save output
+#with open(os.path.join(filepath, sitename + '_output' + satname + '.pkl'), 'wb') as f:
+#    pickle.dump(output, f)

-#==========================================================#
-#==========================================================#
-#==========================================================#
-#==========================================================#

-#%%
-# save output
-with open(os.path.join(filepath, sitename + '_output' + '.pkl'), 'wb') as f:
-    pickle.dump(output, f)
-    
-# save idx_skipped
-#idx_skipped = dict([])
-#for satname in list(output.keys()):
-#    idx_skipped[satname] = output[satname]['idx_skipped']
-#with open(os.path.join(filepath, sitename + '_idxskipped' + '.pkl'), 'wb') as f:
-#    pickle.dump(idx_skipped, f)