create NN for sand classif

development
kvos 7 years ago
parent 35f421bfcb
commit 0af37d9c6a

@ -1,181 +0,0 @@
# -*- coding: utf-8 -*-
#==========================================================#
# Extract shorelines from Landsat images
#==========================================================#
# Initial settings
import ee
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import pandas as pd
from datetime import datetime
import pickle
import pdb
import pytz
# image processing modules
import skimage.filters as filters
import skimage.exposure as exposure
import skimage.transform as transform
import sklearn.decomposition as decomposition
import skimage.morphology as morphology
import skimage.measure as measure
# machine learning modules
from sklearn.cluster import KMeans
# my modules
import functions.utils as utils
import functions.sds as sds
# some settings
np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
plt.rcParams['axes.grid'] = False
plt.rcParams['figure.max_open_warning'] = 100
ee.Initialize()
# parameters
cloud_thresh = 0.5 # threshold for cloud cover
plot_bool = True # if you want the plots
prob_high = 99.9 # upper probability to clip and rescale pixel intensity
min_contour_points = 100# minimum number of points contained in each water line
output_epsg = 28356 # GDA94 / MGA Zone 56
buffer_size = 10 # radius (in pixels) of disk for buffer (pixel classification)
min_beach_size = 50 # number of pixels in a beach (pixel classification)
# select collection
satname = 'L8'
input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA') # Landsat 8 Tier 1 TOA
# location (Narrabeen-Collaroy beach)
polygon = [[[151.3473129272461,-33.69035274454718],
[151.2820816040039,-33.68206818063878],
[151.27281188964844,-33.74775138989556],
[151.3425064086914,-33.75231878701767],
[151.3473129272461,-33.69035274454718]]];
# dates
start_date = '2013-01-01'
end_date = '2018-12-31'
# filter by location and date
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(start_date, end_date)
n_img = flt_col.size().getInfo()
print('Number of images covering the polygon:', n_img)
im_all = flt_col.getInfo().get('features')
i = 0 # first image
# find image in ee database
im = ee.Image(im_all[i].get('id'))
# load image as np.array
im_pan, im_ms, im_cloud, crs, meta = sds.read_eeimage(im, polygon, satname, plot_bool)
# rescale intensities
im_ms = sds.rescale_image_intensity(im_ms, im_cloud, prob_high, plot_bool)
im_pan = sds.rescale_image_intensity(im_pan, im_cloud, prob_high, plot_bool)
# pansharpen rgb image
im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, im_cloud, plot_bool)
# add down-sized bands for NIR and SWIR (since pansharpening is not possible)
im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2)
# calculate NDWI
im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], im_cloud, plot_bool)
# edge detection
wl_pix = sds.find_wl_contours(im_ndwi, im_cloud, min_contour_points, plot_bool)
# convert from pixels to world coordinates
wl_coords = sds.convert_pix2world(wl_pix, crs['crs_15m'])
# convert to output epsg spatial reference
wl = sds.convert_epsg(wl_coords, crs['epsg_code'], output_epsg)
# classify sand pixels
im_sand = sds.classify_sand_unsupervised(im_ms_ps, im_pan, im_cloud, wl_pix, buffer_size, min_beach_size, plot_bool)
#%%
plt.figure()
plt.imshow(im_ms_ps[:,:,[2,1,0]])
for i,contour in enumerate(wl_pix): plt.plot(contour[:, 1], contour[:, 0], linewidth=2)
plt.axis('image')
plt.title('Detected water lines')
plt.show()
vec = im_ms_ps.reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1], im_ms_ps.shape[2])
vec_pan = im_pan.reshape(im_pan.shape[0]*im_pan.shape[1])
features = np.zeros((len(vec), 5))
features[:,[0,1,2,3]] = vec[:,[0,1,2,3]]
features[:,4] = vec_pan
vec_mask = im_cloud.reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1])
# create buffer
im_buffer = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1]))
for i, contour in enumerate(wl_pix):
indices = [(int(_[0]), int(_[1])) for _ in list(np.round(contour))]
for j, idx in enumerate(indices):
im_buffer[idx] = 1
plt.figure()
plt.imshow(im_buffer)
plt.draw()
se = morphology.disk(buffer_size)
im_buffer = morphology.binary_dilation(im_buffer, se)
plt.figure()
plt.imshow(im_buffer)
plt.draw()
vec_buffer = (im_buffer == 1).reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1])
vec_buffer= np.logical_and(vec_buffer, ~vec_mask)
#vec_buffer = np.ravel_multi_index(z,(im_ms_ps.shape[0], im_ms_ps.shape[1]))
kmeans = KMeans(n_clusters=6, random_state=0).fit(vec[vec_buffer,:])
labels = kmeans.labels_
labels_full = np.ones((len(vec_mask))) * np.nan
labels_full[vec_buffer] = labels
im_labels = labels_full.reshape(im_ms_ps.shape[0], im_ms_ps.shape[1])
plt.figure()
plt.imshow(im_labels)
plt.axis('equal')
plt.draw()
utils.compare_images(im_labels, im_pan)
plt.figure()
for i in range(6): plt.plot(kmeans.cluster_centers_[i,:])
plt.draw()
im_sand = im_labels == np.argmax(np.mean(kmeans.cluster_centers_[:,[0,1,2,4]], axis=1))
im_sand2 = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2)
im_sand3 = morphology.binary_dilation(im_sand2, morphology.disk(1))
plt.figure()
plt.imshow(im_sand3)
plt.draw()
im_ms_ps[im_sand3,0] = 0
im_ms_ps[im_sand3,1] = 0
im_ms_ps[im_sand3,2] = 1
plt.figure()
plt.imshow(im_ms_ps[:,:,[2,1,0]])
plt.axis('image')
plt.title('Sand classification')
plt.show()
#%%

@ -47,12 +47,18 @@ def download_tif(image, polygon, bandsId, filepath):
# select collection # select collection
input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA') input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA')
# Location (Narrabeen all)
polygon = [[[151.3473129272461,-33.69035274454718],
[151.2820816040039,-33.68206818063878],
[151.27281188964844,-33.74775138989556],
[151.3425064086914,-33.75231878701767],
[151.3473129272461,-33.69035274454718]]];
# location (Narrabeen-Collaroy beach) # location (Narrabeen-Collaroy beach)
polygon = [[[151.301454, -33.700754], #polygon = [[[151.301454, -33.700754],
[151.311453, -33.702075], # [151.311453, -33.702075],
[151.307237, -33.739761], # [151.307237, -33.739761],
[151.294220, -33.736329], # [151.294220, -33.736329],
[151.301454, -33.700754]]]; # [151.301454, -33.700754]]];
# location (Oldbar beach) # location (Oldbar beach)
#polygon = [[[152.664508, -31.896163], #polygon = [[[152.664508, -31.896163],
# [152.665827, -31.897112], # [152.665827, -31.897112],
@ -77,7 +83,8 @@ print('Number of images covering the area:', n_img)
im_all = flt_col.getInfo().get('features') im_all = flt_col.getInfo().get('features')
satname = 'L8' satname = 'L8'
sitename = 'NARRA' sitename = 'NARRA_all'
#sitename = 'NARRA'
#sitename = 'OLDBAR' #sitename = 'OLDBAR'
suffix = '.tif' suffix = '.tif'
filepath = os.path.join(os.getcwd(), 'data', satname, sitename) filepath = os.path.join(os.getcwd(), 'data', satname, sitename)
@ -115,15 +122,15 @@ for i in range(n_img):
filename_ms = satname + '_' + sitename + '_' + im_date + '_ms' + '_r' + suffix filename_ms = satname + '_' + sitename + '_' + im_date + '_ms' + '_r' + suffix
all_names_pan.append(filename_pan) all_names_pan.append(filename_pan)
# local_data_pan = download_tif(im, polygon, pan_band, filepath_pan) local_data_pan = download_tif(im, polygon, pan_band, filepath_pan)
# os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan)) os.rename(local_data_pan, os.path.join(filepath_pan, filename_pan))
# local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms) local_data_ms = download_tif(im, polygon, ms_bands, filepath_ms)
# os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms)) os.rename(local_data_ms, os.path.join(filepath_ms, filename_ms))
#with open(os.path.join(filepath, sitename + '_timestamps' + '.pkl'), 'wb') as f: with open(os.path.join(filepath, sitename + '_timestamps' + '.pkl'), 'wb') as f:
# pickle.dump(timestamps, f) pickle.dump(timestamps, f)
#with open(os.path.join(filepath, sitename + '_epsgcode' + '.pkl'), 'wb') as f: with open(os.path.join(filepath, sitename + '_epsgcode' + '.pkl'), 'wb') as f:
# pickle.dump(im_epsg, f) pickle.dump(im_epsg, f)
with open(os.path.join(filepath, sitename + '_accuracy_georef' + '.pkl'), 'wb') as f: with open(os.path.join(filepath, sitename + '_accuracy_georef' + '.pkl'), 'wb') as f:
pickle.dump(acc_georef, f) pickle.dump(acc_georef, f)

@ -0,0 +1,476 @@
# -*- coding: utf-8 -*-
#==========================================================#
# Extract shorelines from Landsat images
#==========================================================#
# Initial settings
import ee
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import pandas as pd
from datetime import datetime
import pickle
import pdb
import pytz
from pylab import ginput
from osgeo import gdal
# image processing modules
import skimage.filters as filters
import skimage.exposure as exposure
import skimage.transform as transform
import sklearn.decomposition as decomposition
import skimage.morphology as morphology
import skimage.measure as measure
import skimage.color as color
import skimage.feature as feature
# machine learning modules
from sklearn.cluster import KMeans
# my modules
import functions.utils as utils
import functions.sds as sds
# some settings
np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
plt.rcParams['axes.grid'] = False
plt.rcParams['figure.max_open_warning'] = 100
ee.Initialize()
# parameters
cloud_thresh = 0.5 # threshold for cloud cover
plot_bool = False # if you want the plots
prob_high = 99.9 # upper probability to clip and rescale pixel intensity
min_contour_points = 100# minimum number of points contained in each water line
output_epsg = 28356 # GDA94 / MGA Zone 56
buffer_size = 10 # radius (in pixels) of disk for buffer (pixel classification)
min_beach_size = 50 # number of pixels in a beach (pixel classification)
# select collection
satname = 'L8'
input_col = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA') # Landsat 8 Tier 1 TOA
# location (Narrabeen-Collaroy beach)
polygon = [[[151.3473129272461,-33.69035274454718],
[151.2820816040039,-33.68206818063878],
[151.27281188964844,-33.74775138989556],
[151.3425064086914,-33.75231878701767],
[151.3473129272461,-33.69035274454718]]];
# dates
start_date = '2013-01-01'
end_date = '2018-12-31'
# filter by location and date
flt_col = input_col.filterBounds(ee.Geometry.Polygon(polygon)).filterDate(start_date, end_date)
n_img = flt_col.size().getInfo()
print('Number of images covering the polygon:', n_img)
im_all = flt_col.getInfo().get('features')
i = 0 # first image
# find image in ee database
im = ee.Image(im_all[i].get('id'))
# load image as np.array
im_pan, im_ms, im_cloud, crs, meta = sds.read_eeimage(im, polygon, satname, plot_bool)
# rescale intensities
im_ms = sds.rescale_image_intensity(im_ms, im_cloud, prob_high, plot_bool)
im_pan = sds.rescale_image_intensity(im_pan, im_cloud, prob_high, plot_bool)
# pansharpen rgb image
im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, im_cloud, plot_bool)
# add down-sized bands for NIR and SWIR (since pansharpening is not possible)
im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2)
# calculate NDWI
im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], im_cloud, plot_bool)
# edge detection
wl_pix = sds.find_wl_contours(im_ndwi, im_cloud, min_contour_points, plot_bool)
# convert from pixels to world coordinates
wl_coords = sds.convert_pix2world(wl_pix, crs['crs_15m'])
# convert to output epsg spatial reference
wl = sds.convert_epsg(wl_coords, crs['epsg_code'], output_epsg)
# classify sand pixels
im_sand = sds.classify_sand_unsupervised(im_ms_ps, im_pan, im_cloud, wl_pix, buffer_size, min_beach_size, plot_bool)
#pt_in = np.array(ginput(n=1, timeout=1000))
#if pt_in[0][0] < im_ms_ps.shape[1]/2:
win = np.ones((3,3))
im_features = np.zeros((sum(sum(im_sand)), 20))
im_features[:,[0,1,2,3,4]] = im_ms_ps[im_sand,:] # B G R NIR SWIR
im_features[:,5] = im_pan[im_sand] # Pan
im_features[:,6] = im_ndwi[im_sand] # NDWI
im_features[:,7] = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], im_cloud, False)[im_sand] # NDVI
im_features[:,8] = sds.nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], im_cloud, False)[im_sand] # ND Blue - Red
for i in range(9):
im_features[:,i+9] = ndimage.generic_filter(im_features[:,i], np.std, footprint = win)
im_ms_ps[im_sand,:]
im_grey = color.rgb2grey(im_ms_ps[:,:,[2,1,0]])
plt.figure()
plt.imshow(im_grey, cmap='gray')
plt.draw()
counts, bins = np.histogram(im_grey[~im_cloud], bins=255)
im_grey_d = np.digitize(im_grey, bins=bins) - 1
from scipy import ndimage
varianceMatrix1 = ndimage.generic_filter(im_grey_d, np.max, footprint = np.ones((3,3)))
varianceMatrix2 = ndimage.generic_filter(im_grey_d, np.min, footprint = np.ones((3,3)))
varianceMatrix = varianceMatrix1 - varianceMatrix2
im_grey = color.rgb2grey(im_ms_ps[:,:,[2,1,0]])
plt.figure()
plt.imshow(varianceMatrix, cmap='gray')
plt.draw()
#%%
data = gdal.Open('l8_test.tif', gdal.GA_ReadOnly)
bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]
im = np.stack(bands, 2)
im_test = im[:,:,3]
plt.figure()
plt.imshow(im_test, cmap='gray')
plt.axis('image')
plt.draw()
im_stats = np.zeros((im_test.shape[0], im_test.shape[1], 6))
winsize = 5
prop_names = ('contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM')
for i in range(im_test.shape[0]):
print(int(np.round(100*i/im_test.shape[0])))
for j in range(im_test.shape[1]):
#windows needs to fit completely in image
if i <2 or j <2:
continue
if i > (im_test.shape[0] - 3) or j > (im_test.shape[1] - 3):
continue
#Calculate GLCM on a 3x3 window
glcm_window = im_test[i-2: i+3, j-2 : j+3]
glcm = feature.greycomatrix(glcm_window, [1], [0], symmetric = True, normed = True )
#Calculate contrast and replace center pixel
for k in range(6): im_stats[i,j,k] = feature.greycoprops(glcm, prop_names[k])
plt.figure()
for i in range(6):
plt.subplot(2,3,i+1)
plt.imshow(im_stats[:,:,i], cmap='jet')
plt.axis('image')
plt.title(prop_names[i])
plt.draw()
pixel_loc = [200, 200]
im_stats[pixel_loc[0], pixel_loc[1], 3]
#%%
for i in range(im_grey_d.shape[0]):
print(int(np.round(100*i/im_grey_d.shape[0])))
for j in range(im_grey_d.shape[1]):
#windows needs to fit completely in image
if i <1 or j <1:
continue
if i > (im_grey_d.shape[0] - 1) or j > (im_grey_d.shape[0] - 1):
continue
#Calculate GLCM on a 3x3 window
glcm_window = im_grey_d[i-1: i+1, j-1 : j+1]
glcm = feature.greycomatrix(glcm_window, [1,2], [0], levels=256, symmetric = True, normed = True )
#Calculate contrast and replace center pixel
im_stats[i,j,0] = feature.greycoprops(glcm, 'contrast')
im_stats[i,j,1] = feature.greycoprops(glcm, 'dissimilarity')
im_stats[i,j,2] = feature.greycoprops(glcm, 'homogeneity')
im_stats[i,j,3] = feature.greycoprops(glcm, 'energy')
im_stats[i,j,4] = feature.greycoprops(glcm, 'correlation')
im_stats[i,j,5] = feature.greycoprops(glcm, 'ASM')
plt.figure()
for i in range(6):
plt.subplot(2,3,i+1)
plt.imshow(im_stats[:,:,i], cmap='jet')
plt.axis('image')
plt.draw()
#%%
from multiprocessing import Pool
from itertools import product
N = 10000000
pool = Pool() #defaults to number of available CPU's
a = np.ones((N))
b = np.ones((N))*2
out = np.zeros((N))
t = time.time()
for i in range(len(a)):
out[i] = a[i]*b[i]
elapsed = time.time() - t
print(elapsed)
def fun(a,b):
return a*b
chunksize = 20 #this may take some guessing ... take a look at the docs to decide
for ind, res in enumerate(pool.map(fun, range(N)), chunksize):
output.flat[ind] = res
#%%
import gdal, osr
import numpy as np
from scipy.interpolate import RectBivariateSpline
from numpy.lib.stride_tricks import as_strided as ast
import dask.array as da
from joblib import Parallel, delayed, cpu_count
import os
from skimage.feature import greycomatrix, greycoprops
def im_resize(im,Nx,Ny):
'''
resize array by bivariate spline interpolation
'''
ny, nx = np.shape(im)
xx = np.linspace(0,nx,Nx)
yy = np.linspace(0,ny,Ny)
try:
im = da.from_array(im, chunks=1000) #dask implementation
except:
pass
newKernel = RectBivariateSpline(np.r_[:ny],np.r_[:nx],im)
return newKernel(yy,xx)
def p_me(Z, win):
'''
loop to calculate greycoprops
'''
try:
glcm = greycomatrix(Z, [5], [0], 256, symmetric=True, normed=True)
cont = greycoprops(glcm, 'contrast')
diss = greycoprops(glcm, 'dissimilarity')
homo = greycoprops(glcm, 'homogeneity')
eng = greycoprops(glcm, 'energy')
corr = greycoprops(glcm, 'correlation')
ASM = greycoprops(glcm, 'ASM')
return (cont, diss, homo, eng, corr, ASM)
except:
return (0,0,0,0,0,0)
def norm_shape(shap):
'''
Normalize numpy array shapes so they're always expressed as a tuple,
even for one-dimensional shapes.
'''
try:
i = int(shap)
return (i,)
except TypeError:
# shape was not a number
pass
try:
t = tuple(shap)
return t
except TypeError:
# shape was not iterable
pass
raise TypeError('shape must be an int, or a tuple of ints')
def sliding_window(a, ws, ss = None, flatten = True):
'''
Source: http://www.johnvinyard.com/blog/?p=268#more-268
Parameters:
a - an n-dimensional numpy array
ws - an int (a is 1D) or tuple (a is 2D or greater) representing the size
of each dimension of the window
ss - an int (a is 1D) or tuple (a is 2D or greater) representing the
amount to slide the window in each dimension. If not specified, it
defaults to ws.
flatten - if True, all slices are flattened, otherwise, there is an
extra dimension for each dimension of the input.
Returns
an array containing each n-dimensional window from a
'''
if None is ss:
# ss was not provided. the windows will not overlap in any direction.
ss = ws
ws = norm_shape(ws)
ss = norm_shape(ss)
# convert ws, ss, and a.shape to numpy arrays
ws = np.array(ws)
ss = np.array(ss)
shap = np.array(a.shape)
# ensure that ws, ss, and a.shape all have the same number of dimensions
ls = [len(shap),len(ws),len(ss)]
if 1 != len(set(ls)):
raise ValueError(\
'a.shape, ws and ss must all have the same length. They were %s' % str(ls))
# ensure that ws is smaller than a in every dimension
if np.any(ws > shap):
raise ValueError(\
'ws cannot be larger than a in any dimension.\
a.shape was %s and ws was %s' % (str(a.shape),str(ws)))
# how many slices will there be in each dimension?
newshape = norm_shape(((shap - ws) // ss) + 1)
# the shape of the strided array will be the number of slices in each dimension
# plus the shape of the window (tuple addition)
newshape += norm_shape(ws)
# the strides tuple will be the array's strides multiplied by step size, plus
# the array's strides (tuple addition)
newstrides = norm_shape(np.array(a.strides) * ss) + a.strides
a = ast(a,shape = newshape,strides = newstrides)
if not flatten:
return a
# Collapse strided so that it has one more dimension than the window. I.e.,
# the new array is a flat list of slices.
meat = len(ws) if ws.shape else 0
firstdim = (np.product(newshape[:-meat]),) if ws.shape else ()
dim = firstdim + (newshape[-meat:])
# remove any dimensions with size 1
dim = filter(lambda i : i != 1,dim)
return a.reshape(dim), newshape
#Stuff to change
win = 3
meter = str(win/4)
merge = im_grey_d
Z,ind = sliding_window(merge,(win,win),(1,1))
Ny, Nx = np.shape(merge)
w = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(p_me)(Z[k]) for k in xrange(len(Z)))
cont = [a[0] for a in w]
diss = [a[1] for a in w]
homo = [a[2] for a in w]
eng = [a[3] for a in w]
corr = [a[4] for a in w]
ASM = [a[5] for a in w]
#Reshape to match number of windows
plt_cont = np.reshape(cont , ( ind[0], ind[1] ) )
plt_diss = np.reshape(diss , ( ind[0], ind[1] ) )
plt_homo = np.reshape(homo , ( ind[0], ind[1] ) )
plt_eng = np.reshape(eng , ( ind[0], ind[1] ) )
plt_corr = np.reshape(corr , ( ind[0], ind[1] ) )
plt_ASM = np.reshape(ASM , ( ind[0], ind[1] ) )
del cont, diss, homo, eng, corr, ASM
#Resize Images to receive texture and define filenames
contrast = im_resize(plt_cont,Nx,Ny)
contrast[merge==0]=np.nan
dissimilarity = im_resize(plt_diss,Nx,Ny)
dissimilarity[merge==0]=np.nan
homogeneity = im_resize(plt_homo,Nx,Ny)
homogeneity[merge==0]=np.nan
energy = im_resize(plt_eng,Nx,Ny)
energy[merge==0]=np.nan
correlation = im_resize(plt_corr,Nx,Ny)
correlation[merge==0]=np.nan
ASM = im_resize(plt_ASM,Nx,Ny)
ASM[merge==0]=np.nan
#%%
plt.figure()
plt.imshow(im_ms_ps[:,:,[2,1,0]])
for i,contour in enumerate(wl_pix): plt.plot(contour[:, 1], contour[:, 0], linewidth=2)
plt.axis('image')
plt.title('Detected water lines')
plt.show()
vec = im_ms_ps.reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1], im_ms_ps.shape[2])
vec_pan = im_pan.reshape(im_pan.shape[0]*im_pan.shape[1])
features = np.zeros((len(vec), 5))
features[:,[0,1,2,3]] = vec[:,[0,1,2,3]]
features[:,4] = vec_pan
vec_mask = im_cloud.reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1])
# create buffer
im_buffer = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1]))
for i, contour in enumerate(wl_pix):
indices = [(int(_[0]), int(_[1])) for _ in list(np.round(contour))]
for j, idx in enumerate(indices):
im_buffer[idx] = 1
plt.figure()
plt.imshow(im_buffer)
plt.draw()
se = morphology.disk(buffer_size)
im_buffer = morphology.binary_dilation(im_buffer, se)
plt.figure()
plt.imshow(im_buffer)
plt.draw()
vec_buffer = (im_buffer == 1).reshape(im_ms_ps.shape[0] * im_ms_ps.shape[1])
vec_buffer= np.logical_and(vec_buffer, ~vec_mask)
#vec_buffer = np.ravel_multi_index(z,(im_ms_ps.shape[0], im_ms_ps.shape[1]))
kmeans = KMeans(n_clusters=6, random_state=0).fit(vec[vec_buffer,:])
labels = kmeans.labels_
labels_full = np.ones((len(vec_mask))) * np.nan
labels_full[vec_buffer] = labels
im_labels = labels_full.reshape(im_ms_ps.shape[0], im_ms_ps.shape[1])
plt.figure()
plt.imshow(im_labels)
plt.axis('equal')
plt.draw()
utils.compare_images(im_labels, im_pan)
plt.figure()
for i in range(6): plt.plot(kmeans.cluster_centers_[i,:])
plt.draw()
im_sand = im_labels == np.argmax(np.mean(kmeans.cluster_centers_[:,[0,1,2,4]], axis=1))
im_sand2 = morphology.remove_small_objects(im_sand, min_size=min_beach_size, connectivity=2)
im_sand3 = morphology.binary_dilation(im_sand2, morphology.disk(1))
plt.figure()
plt.imshow(im_sand3)
plt.draw()
im_ms_ps[im_sand3,0] = 0
im_ms_ps[im_sand3,1] = 0
im_ms_ps[im_sand3,2] = 1
plt.figure()
plt.imshow(im_ms_ps[:,:,[2,1,0]])
plt.axis('image')
plt.title('Sand classification')
plt.show()
#%%

@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
from datetime import datetime, timedelta
import pytz
import csv
import pandas as pd
au_tz = pytz.timezone('Australia/Sydney')
dt1 = datetime(2018, 4, 17, tzinfo= au_tz)
dt = []
dt.append(dt1)
for i in range(1,100):
dt1 = dt[i-1]
dt.append(dt1 + timedelta(days=16))
dtstr = [_.strftime('%d %b %Y') for _ in dt]
df = pd.DataFrame(dtstr)
df.to_csv('L7_NARRA_dates.csv', index=False, header=False)

Binary file not shown.

Binary file not shown.

@ -0,0 +1,26 @@
Time taken to build model: 44.94 seconds
=== Stratified cross-validation ===
=== Summary ===
Correctly Classified Instances 79136 99.2799 %
Incorrectly Classified Instances 574 0.7201 %
Kappa statistic 0.9838
Mean absolute error 0.0105
Root mean squared error 0.0781
Relative absolute error 2.3774 %
Root relative squared error 16.5717 %
Total Number of Instances 79710
=== Detailed Accuracy By Class ===
TP Rate FP Rate Precision Recall F-Measure MCC ROC Area PRC Area Class
0.995 0.011 0.994 0.995 0.995 0.984 0.998 0.999 0
0.989 0.005 0.990 0.989 0.989 0.984 0.998 0.997 1
Weighted Avg. 0.993 0.009 0.993 0.993 0.993 0.984 0.998 0.998
=== Confusion Matrix ===
a b <-- classified as
52960 270 | a = 0
304 26176 | b = 1

File diff suppressed because it is too large Load Diff

@ -0,0 +1,245 @@
# -*- coding: utf-8 -*-
#==========================================================#
# Create a training data
#==========================================================#
# Initial settings
import os
import numpy as np
import matplotlib.pyplot as plt
import ee
import pdb
import time
import pandas as pd
# other modules
from osgeo import gdal, ogr, osr
import pickle
import matplotlib.cm as cm
from pylab import ginput
# image processing modules
import skimage.filters as filters
import skimage.exposure as exposure
import skimage.transform as transform
import sklearn.decomposition as decomposition
import skimage.measure as measure
import skimage.morphology as morphology
from scipy import ndimage
# machine learning modules
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.externals import joblib
# import own modules
import functions.utils as utils
import functions.sds as sds
# some settings
np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
plt.rcParams['axes.grid'] = True
plt.rcParams['figure.max_open_warning'] = 100
ee.Initialize()
# parameters
cloud_thresh = 0.5 # threshold for cloud cover
plot_bool = False # if you want the plots
prob_high = 99.9 # upper probability to clip and rescale pixel intensity
min_contour_points = 100# minimum number of points contained in each water line
output_epsg = 28356 # GDA94 / MGA Zone 56
buffer_size = 10 # radius (in pixels) of disk for buffer (pixel classification)
min_beach_size = 50 # number of pixels in a beach (pixel classification)
# load metadata (timestamps and epsg code) for the collection
satname = 'L8'
sitename = 'NARRA_all'
#sitename = 'NARRA'
#sitename = 'OLDBAR'
# Load metadata
filepath = os.path.join(os.getcwd(), 'data', satname, sitename)
# path to images
file_path_pan = os.path.join(os.getcwd(), 'data', satname, sitename, 'pan')
file_path_ms = os.path.join(os.getcwd(), 'data', satname, sitename, 'ms')
file_names_pan = os.listdir(file_path_pan)
file_names_ms = os.listdir(file_path_ms)
N = len(file_names_pan)
# initialise some variables
idx_skipped = []
idx_nocloud = []
n_features = 10
train_pos = np.nan*np.ones((1,n_features))
train_neg = np.nan*np.ones((1,n_features))
columns = ('B','G','R','NIR','SWIR','Pan','WI','VI','BR', 'mWI', 'SAND')
#%%
for i in range(N):
# read pan image
fn_pan = os.path.join(file_path_pan, file_names_pan[i])
data = gdal.Open(fn_pan, gdal.GA_ReadOnly)
georef = np.array(data.GetGeoTransform())
bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]
im_pan = np.stack(bands, 2)[:,:,0]
# read ms image
fn_ms = os.path.join(file_path_ms, file_names_ms[i])
data = gdal.Open(fn_ms, gdal.GA_ReadOnly)
bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]
im_ms = np.stack(bands, 2)
# cloud mask
im_qa = im_ms[:,:,5]
cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool)
cloud_mask = transform.resize(cloud_mask, (im_pan.shape[0], im_pan.shape[1]),
order=0, preserve_range=True,
mode='constant').astype('bool_')
# resize the image using bilinear interpolation (order 1)
im_ms = transform.resize(im_ms,(im_pan.shape[0], im_pan.shape[1]),
order=1, preserve_range=True, mode='constant')
# check if -inf or nan values and add to cloud mask
im_inf = np.isin(im_ms[:,:,0], -np.inf)
im_nan = np.isnan(im_ms[:,:,0])
cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
# skip if cloud cover is more than the threshold
cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
if cloud_cover > cloud_thresh:
print('skip ' + str(i) + ' - cloudy (' + str(cloud_cover) + ')')
idx_skipped.append(i)
continue
idx_nocloud.append(i)
# rescale intensities
im_ms = sds.rescale_image_intensity(im_ms, cloud_mask, prob_high, plot_bool)
im_pan = sds.rescale_image_intensity(im_pan, cloud_mask, prob_high, plot_bool)
# pansharpen rgb image
im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, plot_bool)
nrow = im_ms_ps.shape[0]
ncol = im_ms_ps.shape[1]
# add down-sized bands for NIR and SWIR (since pansharpening is not possible)
im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2)
# calculate NDWI
im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, plot_bool)
# detect edges
wl_pix = sds.find_wl_contours(im_ndwi, cloud_mask, min_contour_points, plot_bool)
# classify sand pixels with Kmeans
im_sand = sds.classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, buffer_size, min_beach_size, plot_bool)
# plot a figure to manually select which images to keep
im = np.copy(im_ms_ps)
im[im_sand,0] = 0
im[im_sand,1] = 0
im[im_sand,2] = 1
plt.figure()
plt.imshow(im[:,:,[2,1,0]])
plt.axis('image')
plt.title('Sand classification')
plt.show()
mng = plt.get_current_fig_manager()
mng.window.showMaximized()
plt.tight_layout()
plt.draw()
# click a point
# top-left quadrant: keep classif as pos and click somewhere for neg
# bottom-left: keep classif as neg
# any right quadrant: discard image
pt_in = np.array(ginput(n=1, timeout=1000))
if pt_in[0][0] < im_ms_ps.shape[1]/2:
im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features))
im_features[:,:,[0,1,2,3,4]] = im_ms_ps
im_features[:,:,5] = im_pan
im_features[:,:,6] = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, False) # (NIR-G)
im_features[:,:,7] = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask, False) # (NIR-R)
im_features[:,:,8] = sds.nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask, False) # (B-R)
im_features[:,:,9] = sds.nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) # (SWIR-G)
# win = np.ones((3,3))
# im_features[:,:,9] = ndimage.generic_filter(im_features[:,:,5], np.std, footprint=win)
# im_features[:,:,10] = ndimage.generic_filter(im_features[:,:,5], np.max, footprint=win) - ndimage.generic_filter(im_features[:,:,5], np.min, footprint=win)
if pt_in[0][1] < im_ms_ps.shape[0]/2:
# positive examples
vec_pos = im_features[im_sand,:]
train_pos = np.append(train_pos, vec_pos, axis=0)
# click where negative examples are
pt_neg = np.round(np.array(ginput(n=1, timeout=1000))[0])
radius = int(round(np.sqrt(sum(sum(im_sand)))))
idx_rows = np.linspace(0,radius-1,radius).astype(int) + int(pt_neg[1])
idx_cols = np.linspace(0,radius-1,radius).astype(int) + int(pt_neg[0])
xx, yy = np.meshgrid(idx_rows,idx_cols, indexing='ij')
row_neg = xx.reshape(radius*radius)
col_neg = yy.reshape(radius*radius)
im_nosand = np.zeros((nrow,ncol)).astype(bool)
for i in range(len(row_neg)):
im_nosand[row_neg[i],col_neg[i]] = True
im_ms_ps[row_neg[i],col_neg[i],0] = 1
im_ms_ps[row_neg[i],col_neg[i],1] = 1
im_ms_ps[row_neg[i],col_neg[i],2] = 0
plt.imshow(im_ms_ps[:,:,[2,1,0]])
plt.draw()
# negative examples
vec_neg = im_features[im_nosand,:]
train_neg = np.append(train_neg, vec_neg, axis=0)
else:
# negative examples
vec_neg = im_features[im_sand,:]
train_neg = np.append(train_neg, vec_neg, axis=0)
else:
print('skip ' + str(i))
idx_skipped.append(i)
# format data
train_pos = train_pos[1:,:]
train_neg = train_neg[1:,:]
n_pos = len(train_pos)
n_neg = len(train_neg)
training_data = np.zeros((n_pos+n_neg, n_features+1))
training_data[:n_pos,:n_features] = train_pos
training_data[n_pos:n_pos+n_neg,:n_features] = train_neg
training_data[:n_pos,n_features] = np.ones((n_pos))
df_train = pd.DataFrame(training_data, columns=columns)
df_train.dropna(axis=0, how='any', inplace=True)
sand_train = np.array(df_train)
# save data
#with open(os.path.join(filepath, sitename + '_sand_idxskip' + '.pkl'), 'wb') as f:
# pickle.dump(idx_skipped, f)
#with open(os.path.join(filepath, sitename + '_sand_train' + '.pkl'), 'wb') as f:
# pickle.dump(sand_train, f)
#df_train.to_csv('training_data.csv')
#%% Train neural network on data
# load training data
with open(os.path.join(filepath, sitename + '_sand_train' + '.pkl'), 'rb') as f:
sand_train = pickle.load(f)
n_features = sand_train.shape[1] - 1
X = sand_train[:,0:n_features]
y = sand_train[:,n_features]
# divide in train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
#scaler = StandardScaler()
#scaler.fit(X_train)
#X_train = scaler.transform(X_train)
#X_test = scaler.transform(X_test)
# run NN on train dat and evaluate on test data
clf = MLPClassifier()
clf.fit(X_train,y_train)
clf.score(X_test,y_test)
# save NN model
joblib.dump(clf, os.path.join(os.getcwd(), 'sand_classification', 'NN_small.pkl'))

@ -0,0 +1,176 @@
# -*- coding: utf-8 -*-
#==========================================================#
# Run Neural Network on image to extract sandy pixels
#==========================================================#
# Initial settings
import os
import numpy as np
import matplotlib.pyplot as plt
import ee
import pdb
import time
import pandas as pd
# other modules
from osgeo import gdal, ogr, osr
import pickle
import matplotlib.cm as cm
from pylab import ginput
# image processing modules
import skimage.filters as filters
import skimage.exposure as exposure
import skimage.transform as transform
import sklearn.decomposition as decomposition
import skimage.measure as measure
import skimage.morphology as morphology
from scipy import ndimage
# machine learning modules
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.externals import joblib
# import own modules
import functions.utils as utils
import functions.sds as sds
# some settings
np.seterr(all='ignore') # raise/ignore divisions by 0 and nans
plt.rcParams['axes.grid'] = True
plt.rcParams['figure.max_open_warning'] = 100
ee.Initialize()
# parameters
cloud_thresh = 0.5 # threshold for cloud cover
plot_bool = False # if you want the plots
prob_high = 100 # upper probability to clip and rescale pixel intensity
min_contour_points = 100# minimum number of points contained in each water line
output_epsg = 28356 # GDA94 / MGA Zone 56
buffer_size = 10 # radius (in pixels) of disk for buffer (pixel classification)
min_beach_size = 50 # number of pixels in a beach (pixel classification)
# load metadata (timestamps and epsg code) for the collection
satname = 'L8'
sitename = 'NARRA_all'
#sitename = 'NARRA'
#sitename = 'OLDBAR'
# Load metadata
filepath = os.path.join(os.getcwd(), 'data', satname, sitename)
# path to images
file_path_pan = os.path.join(os.getcwd(), 'data', satname, sitename, 'pan')
file_path_ms = os.path.join(os.getcwd(), 'data', satname, sitename, 'ms')
file_names_pan = os.listdir(file_path_pan)
file_names_ms = os.listdir(file_path_ms)
N = len(file_names_pan)
# initialise some variables
idx_skipped = []
idx_nocloud = []
n_features = 10
train_pos = np.nan*np.ones((1,n_features))
train_neg = np.nan*np.ones((1,n_features))
columns = ('B','G','R','NIR','SWIR','Pan','WI','VI','BR', 'mWI', 'SAND')
clf = joblib.load(os.path.join(os.getcwd(), 'sand_classification', 'NN_new.pkl'))
#%%
for i in range(N):
# read pan image
fn_pan = os.path.join(file_path_pan, file_names_pan[i])
data = gdal.Open(fn_pan, gdal.GA_ReadOnly)
georef = np.array(data.GetGeoTransform())
bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]
im_pan = np.stack(bands, 2)[:,:,0]
# read ms image
fn_ms = os.path.join(file_path_ms, file_names_ms[i])
data = gdal.Open(fn_ms, gdal.GA_ReadOnly)
bands = [data.GetRasterBand(i + 1).ReadAsArray() for i in range(data.RasterCount)]
im_ms = np.stack(bands, 2)
# cloud mask
im_qa = im_ms[:,:,5]
cloud_mask = sds.create_cloud_mask(im_qa, satname, plot_bool)
cloud_mask = transform.resize(cloud_mask, (im_pan.shape[0], im_pan.shape[1]),
order=0, preserve_range=True,
mode='constant').astype('bool_')
# resize the image using bilinear interpolation (order 1)
im_ms = transform.resize(im_ms,(im_pan.shape[0], im_pan.shape[1]),
order=1, preserve_range=True, mode='constant')
# check if -inf or nan values and add to cloud mask
im_inf = np.isin(im_ms[:,:,0], -np.inf)
im_nan = np.isnan(im_ms[:,:,0])
cloud_mask = np.logical_or(np.logical_or(cloud_mask, im_inf), im_nan)
# skip if cloud cover is more than the threshold
cloud_cover = sum(sum(cloud_mask.astype(int)))/(cloud_mask.shape[0]*cloud_mask.shape[1])
if cloud_cover > cloud_thresh:
print('skip ' + str(i) + ' - cloudy (' + str(np.round(cloud_cover*100).astype(int)) + '%)')
idx_skipped.append(i)
continue
idx_nocloud.append(i)
# rescale intensities
im_ms = sds.rescale_image_intensity(im_ms, cloud_mask, prob_high, plot_bool)
im_pan = sds.rescale_image_intensity(im_pan, cloud_mask, prob_high, plot_bool)
# pansharpen rgb image
im_ms_ps = sds.pansharpen(im_ms[:,:,[0,1,2]], im_pan, cloud_mask, plot_bool)
nrow = im_ms_ps.shape[0]
ncol = im_ms_ps.shape[1]
# add down-sized bands for NIR and SWIR (since pansharpening is not possible)
im_ms_ps = np.append(im_ms_ps, im_ms[:,:,[3,4]], axis=2)
# calculate NDWI
im_ndwi = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,1], cloud_mask, plot_bool)
# detect edges
wl_pix = sds.find_wl_contours(im_ndwi, cloud_mask, min_contour_points, plot_bool)
# classify sand pixels with Kmeans
im_sand = sds.classify_sand_unsupervised(im_ms_ps, im_pan, cloud_mask, wl_pix, buffer_size, min_beach_size, plot_bool)
# calculate features
im_features = np.zeros((im_ms_ps.shape[0], im_ms_ps.shape[1], n_features))
im_features[:,:,[0,1,2,3,4]] = im_ms_ps
im_features[:,:,5] = im_pan
im_features[:,:,6] = im_ndwi
im_features[:,:,7] = sds.nd_index(im_ms_ps[:,:,3], im_ms_ps[:,:,2], cloud_mask, False) # ND(NIR-R)
im_features[:,:,8] = sds.nd_index(im_ms_ps[:,:,0], im_ms_ps[:,:,2], cloud_mask, False) # ND(B-R)
im_features[:,:,9] = sds.nd_index(im_ms_ps[:,:,4], im_ms_ps[:,:,1], cloud_mask, False) # (SWIR-G)
# remove NaNs and clouds
vec = im_features.reshape((im_ms_ps.shape[0] * im_ms_ps.shape[1], n_features))
vec_cloud = cloud_mask.reshape(cloud_mask.shape[0]*cloud_mask.shape[1])
vec_nan = np.any(np.isnan(vec), axis=1)
vec_mask = np.logical_or(vec_cloud, vec_nan)
vec = vec[~vec_mask, :]
# predict with NN
y = clf.predict(vec)
# recompose image
vec_new = np.zeros((cloud_mask.shape[0]*cloud_mask.shape[1]))
vec_new[~vec_mask] = y
im_classif = vec_new.reshape((im_ms_ps.shape[0], im_ms_ps.shape[1]))
im_classif = im_classif.astype(bool)
im_classif = morphology.remove_small_objects(im_classif, min_size=min_beach_size, connectivity=2)
# make comparison plot between NN and Kmeans
im1 = np.copy(im_ms_ps)
im1[im_classif,0] = 0
im1[im_classif,1] = 0
im1[im_classif,2] = 1
im2 = np.copy(im_ms_ps)
im2[im_sand,0] = 0
im2[im_sand,1] = 0
im2[im_sand,2] = 1
plt.figure()
ax1 = plt.subplot(121)
plt.imshow(im1[:,:,[2,1,0]])
plt.axis('off')
plt.title('NN')
ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
plt.imshow(im2[:,:,[2,1,0]])
plt.axis('off')
plt.title('Kmeans')
mng = plt.get_current_fig_manager()
mng.window.showMaximized()
plt.tight_layout()
plt.draw()

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save