Initial commit
commit
85fa9e55c2
@ -0,0 +1,11 @@
|
||||
# Jupyter NB Checkpoints
|
||||
.ipynb_checkpoints/
|
||||
|
||||
# exclude data from source control by default
|
||||
/data/
|
||||
|
||||
# Pycharm
|
||||
.idea
|
||||
|
||||
# DotEnv configuration
|
||||
.env
|
@ -0,0 +1,45 @@
|
||||
|
||||
|
||||
#################################################################################
|
||||
# PROJECT RULES #
|
||||
#################################################################################
|
||||
.PHONY: mat_to_csv
|
||||
mat-to-csv: ##@data Converts raw .mat files to .csv for python
|
||||
cd ./src/data/ && python mat_to_csv.py
|
||||
|
||||
sites-csv-to-shp: ./data/interim/sites.shp
|
||||
cd ./src/data && python csv_to_shp.py
|
||||
|
||||
#################################################################################
|
||||
# Self Documenting Commands #
|
||||
#################################################################################
|
||||
.DEFAULT_GOAL := help
|
||||
.PHONY: help
|
||||
|
||||
# Refer to https://gist.github.com/prwhite/8168133
|
||||
|
||||
#COLORS
|
||||
GREEN := $(shell tput -Txterm setaf 2)
|
||||
WHITE := $(shell tput -Txterm setaf 7)
|
||||
YELLOW := $(shell tput -Txterm setaf 3)
|
||||
RESET := $(shell tput -Txterm sgr0)
|
||||
|
||||
# Add the following 'help' target to your Makefile
|
||||
# And add help text after each target name starting with '\#\#'
|
||||
# A category can be added with @category
|
||||
HELP_FUN = \
|
||||
%help; \
|
||||
while(<>) { push @{$$help{$$2 // 'options'}}, [$$1, $$3] if /^([a-zA-Z\-]+)\s*:.*\#\#(?:@([a-zA-Z\-]+))?\s(.*)$$/ }; \
|
||||
print "usage: make [target]\n\n"; \
|
||||
for (sort keys %help) { \
|
||||
print "${WHITE}$$_:${RESET}\n"; \
|
||||
for (@{$$help{$$_}}) { \
|
||||
$$sep = " " x (32 - length $$_->[0]); \
|
||||
print " ${YELLOW}$$_->[0]${RESET}$$sep${GREEN}$$_->[1]${RESET}\n"; \
|
||||
}; \
|
||||
print "\n"; }
|
||||
|
||||
help: ##@other Show this help.
|
||||
@perl -e '$(HELP_FUN)' $(MAKEFILE_LIST)
|
||||
|
||||
|
@ -0,0 +1,21 @@
|
||||
# 2016 Narrabeen Storm EWS Performance
|
||||
This repository investigates whether the storm impacts (i.e. Sallenger, 2000) of the June 2016 Narrabeen Storm could
|
||||
have been forecasted in advance.
|
||||
|
||||
## Repository and analysis format
|
||||
This repository follows the [Cookiecutter Data Science](https://drivendata.github.io/cookiecutter-data-science/)
|
||||
structure where possible. The analysis is done in python (look at the `/src/` folder) with some interactive, exploratory notebooks located at `/notebooks`.
|
||||
|
||||
## Where to start?
|
||||
Check out jupyter notebook `./notebooks/01_exploration.ipynb` which has an example of how to import the data and some interactive widgets.
|
||||
|
||||
## Available data
|
||||
Raw, interim and processed data used in this analysis is kept in the `/data/` folder.
|
||||
|
||||
- `/data/raw/processed_shorelines`: This data was recieved from Tom Beuzen in October 2018. It consists of pre/post storm profiles at every 100 m sections along beaches ranging from Dee Why to Nambucca . Profiles are based on raw aerial LIDAR and were processed by Mitch Harley. Tides and waves (10 m contour and reverse shoaled deepwater) for each individual 100 m section is also provided.
|
||||
- `/data/raw/raw_lidar`: This is the raw pre/post storm aerial LIDAR which was taken for the June 2016 storm. `.las` files are the raw files which have been processed into `.tiff` files using `PDAL`. Note that these files have not been corrected for systematic errors, so actual elevations should be taken from the `processed_shorelines` folder. Obtained November 2018 from Mitch Harley from the black external HDD labeled "UNSW LIDAR".
|
||||
- `/data/raw/profile_features`: Dune toe and crest locations based on prestorm LIDAR. Refer to `/notebooks/qgis.qgz` as this shows how they were manually extracted. Note that the shapefiles only show the location (lat/lon) of the dune crest and toe. For actual elevations, these locations need to related to the processed shorelines.
|
||||
|
||||
## Notebooks
|
||||
- `/notebooks/01_exploration.ipynb`: Shows how to import processed shorelines, waves and tides. An interactive widget plots the location and cross sections.
|
||||
- `/notebooks/qgis.qgz`: A QGIS file which is used to explore the aerial LIDAR data in `/data/raw/raw_lidar`. By examining the pre-strom lidar, dune crest and dune toe lines are manually extracted. These are stored in the `/data/profile_features/`.
|
File diff suppressed because one or more lines are too long
Binary file not shown.
@ -0,0 +1,13 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
|
||||
def main():
|
||||
|
||||
data_folder = './data/interim'
|
||||
df_waves = pd.read_csv(os.path.join(data_folder, 'waves.csv'), index_col=[0,1])
|
||||
df_tides = pd.read_csv(os.path.join(data_folder, 'tides.csv'), index_col=[0,1])
|
||||
df_profiles = pd.read_csv(os.path.join(data_folder, 'profiles.csv'), index_col=[0,1,2])
|
||||
df_sites = pd.read_csv(os.path.join(data_folder, 'sites.csv'),index_col=[0])
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,38 @@
|
||||
"""
|
||||
Converts .csv files to .shape files
|
||||
"""
|
||||
|
||||
from fiona.crs import from_epsg
|
||||
import fiona
|
||||
from shapely.geometry import Point, mapping
|
||||
from fiona import collection
|
||||
import pandas as pd
|
||||
import os
|
||||
|
||||
def sites_csv_to_shp(input_csv='.\data\interim\sites.csv', output_shp='.\data\interim\sites.shp'):
|
||||
"""
|
||||
Converts our dataframe of sites to .shp to load in QGis
|
||||
:param input_csv:
|
||||
:param output_shp:
|
||||
:return:
|
||||
"""
|
||||
df_sites = pd.read_csv(input_csv, index_col=[0])
|
||||
|
||||
schema = {
|
||||
'geometry': 'Point',
|
||||
'properties': {
|
||||
'beach': 'str',
|
||||
'site_id': 'str'
|
||||
}
|
||||
}
|
||||
with fiona.open(output_shp, 'w', crs=from_epsg(4326), driver='ESRI Shapefile', schema=schema) as output:
|
||||
for index, row in df_sites.iterrows():
|
||||
point = Point(row['lon'], row['lat'])
|
||||
prop = {
|
||||
'beach': row['beach'],
|
||||
'site_id': index,
|
||||
}
|
||||
output.write({'geometry': mapping(point), 'properties': prop})
|
||||
|
||||
if __name__ == '__main__':
|
||||
sites_csv_to_shp()
|
@ -0,0 +1,180 @@
|
||||
"""
|
||||
Converts raw .mat files into a flattened .csv structure which can be imported into python pandas.
|
||||
"""
|
||||
|
||||
import logging.config
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import pandas as pd
|
||||
from mat4py import loadmat
|
||||
|
||||
logging.config.fileConfig('../logging.conf', disable_existing_loggers=False)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def parse_waves(waves_mat):
|
||||
"""
|
||||
Parses the raw waves.mat file and returns a pandas dataframe
|
||||
:param waves_mat:
|
||||
:return:
|
||||
"""
|
||||
logger.info('Parsing %s', waves_mat)
|
||||
mat_data = loadmat(waves_mat)['data']
|
||||
rows = []
|
||||
for i in range(0, len(mat_data['site'])):
|
||||
for j in range(0, len(mat_data['dates'][i])):
|
||||
rows.append({
|
||||
'beach': mat_data['site'][i],
|
||||
'lon': mat_data['lon'][i],
|
||||
'lat': mat_data['lat'][i],
|
||||
'datetime': matlab_datenum_to_datetime(mat_data['dates'][i][j][0]),
|
||||
'Hs': mat_data['H'][i][j][0],
|
||||
'Hs0': mat_data['Ho'][i][j][0],
|
||||
'Tp': mat_data['T'][i][j][0],
|
||||
'dir': mat_data['D'][i][j][0],
|
||||
'E': mat_data['E'][i][j][0],
|
||||
'P': mat_data['P'][i][j][0],
|
||||
'Exs': mat_data['Exs'][i][j][0],
|
||||
'Pxs': mat_data['Pxs'][i][j][0],
|
||||
})
|
||||
|
||||
df = pd.DataFrame(rows)
|
||||
df['datetime'] = df['datetime'].dt.round('1s')
|
||||
return df
|
||||
|
||||
|
||||
def parse_tides(tides_mat):
|
||||
"""
|
||||
Parses the raw tides.mat file and returns a pandas dataframe
|
||||
:param tides_mat:
|
||||
:return:
|
||||
"""
|
||||
logger.info('Parsing %s', tides_mat)
|
||||
mat_data = loadmat(tides_mat)['data']
|
||||
rows = []
|
||||
for i in range(0, len(mat_data['site'])):
|
||||
for j in range(0, len(mat_data['time'])):
|
||||
rows.append({
|
||||
'beach': mat_data['site'][i][0],
|
||||
'lon': mat_data['lons'][i][0],
|
||||
'lat': mat_data['lats'][i][0],
|
||||
'datetime': matlab_datenum_to_datetime(mat_data['time'][j][0]),
|
||||
'tide': mat_data['tide'][i][j]
|
||||
})
|
||||
|
||||
df = pd.DataFrame(rows)
|
||||
df['datetime'] = df['datetime'].dt.round('1s')
|
||||
return df
|
||||
|
||||
|
||||
def parse_profiles(profiles_mat):
|
||||
"""
|
||||
Parses the raw profiles.mat file and returns a pandas dataframe
|
||||
:param tides_mat:
|
||||
:return:
|
||||
"""
|
||||
logger.info('Parsing %s', profiles_mat)
|
||||
mat_data = loadmat(profiles_mat)['data']
|
||||
rows = []
|
||||
for i in range(0, len(mat_data['site'])):
|
||||
for j in range(0, len(mat_data['pfx'][i])):
|
||||
for profile_type in ['prestorm', 'poststorm']:
|
||||
|
||||
if profile_type == 'prestorm':
|
||||
z = mat_data['pf1'][i][j][0]
|
||||
if profile_type == 'poststorm':
|
||||
z = mat_data['pf2'][i][j][0]
|
||||
|
||||
rows.append({
|
||||
'beach': mat_data['site'][i],
|
||||
'lon': mat_data['lon'][i],
|
||||
'lat': mat_data['lat'][i],
|
||||
'profile_type': profile_type,
|
||||
'x': mat_data['pfx'][i][j][0],
|
||||
'z': z,
|
||||
})
|
||||
|
||||
df = pd.DataFrame(rows)
|
||||
return df
|
||||
|
||||
|
||||
def matlab_datenum_to_datetime(matlab_datenum):
|
||||
# https://stackoverflow.com/a/13965852
|
||||
return datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum % 1) - timedelta(
|
||||
days=366)
|
||||
|
||||
|
||||
def get_unique_sites(dfs, cols=['beach', 'lat', 'lon']):
|
||||
"""
|
||||
Generates a dataframe of unique sites based on beach names, lats and lons. Creates a unique site ID for each.
|
||||
:param dfs:
|
||||
:param cols:
|
||||
:return:
|
||||
"""
|
||||
|
||||
rows = []
|
||||
df_all = pd.concat([df[cols] for df in dfs])
|
||||
beach_groups = df_all.groupby(['beach'])
|
||||
for beach_name, beach_group in beach_groups:
|
||||
site_groups = beach_group.groupby(['lat', 'lon'])
|
||||
siteNo = 1
|
||||
for site_name, site_group in site_groups:
|
||||
site = '{}{:04d}'.format(beach_name, siteNo)
|
||||
rows.append({'site_id': site,
|
||||
'lat': site_name[0],
|
||||
'lon': site_name[1],
|
||||
'beach': beach_name})
|
||||
siteNo += 1
|
||||
|
||||
df = pd.DataFrame(rows)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def replace_unique_sites(df, df_sites, cols=['beach', 'lat', 'lon']):
|
||||
"""
|
||||
Replaces beach/lat/lon columns with the unique site_id
|
||||
:param dfs:
|
||||
:param df_sites:
|
||||
:return:
|
||||
"""
|
||||
|
||||
df_merged = df.merge(df_sites, on=cols)
|
||||
|
||||
# Check that all our records have a unique site identifier
|
||||
n_unmatched = len(df) - len(df_merged)
|
||||
if n_unmatched > 0:
|
||||
logger.warning('Not all records (%d of %d) matched with a unique site', n_unmatched, len(df))
|
||||
|
||||
df_merged = df_merged.drop(columns=cols)
|
||||
|
||||
return df_merged
|
||||
|
||||
|
||||
def main():
|
||||
df_waves = parse_waves(waves_mat='../../data/raw/waves.mat')
|
||||
df_tides = parse_tides(tides_mat='../../data/raw/tides.mat')
|
||||
df_profiles = parse_profiles(profiles_mat='../../data/raw/profiles.mat')
|
||||
df_sites = get_unique_sites(dfs=[df_waves, df_tides, df_profiles])
|
||||
|
||||
logger.info('Identifying unique sites')
|
||||
df_waves = replace_unique_sites(df_waves, df_sites)
|
||||
df_tides = replace_unique_sites(df_tides, df_sites)
|
||||
df_profiles = replace_unique_sites(df_profiles, df_sites)
|
||||
|
||||
logger.info('Setting pandas index')
|
||||
df_profiles.set_index(['site_id', 'profile_type', 'x'], inplace=True)
|
||||
df_waves.set_index(['site_id', 'datetime'], inplace=True)
|
||||
df_tides.set_index(['site_id', 'datetime'], inplace=True)
|
||||
df_sites.set_index(['site_id'], inplace=True)
|
||||
|
||||
logger.info('Outputting .csv files')
|
||||
df_profiles.to_csv('../../data/interim/profiles.csv')
|
||||
df_tides.to_csv('../../data/interim/tides.csv')
|
||||
df_waves.to_csv('../../data/interim/waves.csv')
|
||||
df_sites.to_csv('../../data/interim/sites.csv')
|
||||
logger.info('Done!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,76 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
import fiona
|
||||
from shapely.geometry import LineString, Point
|
||||
from shapely.geometry import shape
|
||||
from shapely.ops import transform
|
||||
import pyproj
|
||||
from functools import partial
|
||||
import numpy as np
|
||||
|
||||
def shapes_from_shp(shp_file):
|
||||
"""
|
||||
Parses a shape file and returns a list of shapely shapes
|
||||
:param shp_file:
|
||||
:return:
|
||||
"""
|
||||
shapes = []
|
||||
for feat in fiona.open(shp_file):
|
||||
shapes.append(shape(feat['geometry']))
|
||||
return shapes
|
||||
|
||||
|
||||
def convert_coord_systems(g1, in_coord_system='EPSG:4326', out_coord_system='EPSG:28356'):
|
||||
"""
|
||||
Converts coordinates from one coordinates system to another. Needed because shapefiles are usually defined in
|
||||
lat/lon but should be converted to GDA to calculated distances.
|
||||
https://gis.stackexchange.com/a/127432
|
||||
:param in_coord_system: Default is lat/lon WGS84
|
||||
:param out_coord_system: Default is GDA56 for NSW coastline
|
||||
:return:
|
||||
"""
|
||||
project = partial(
|
||||
pyproj.transform,
|
||||
pyproj.Proj(init=in_coord_system), # source coordinate system
|
||||
pyproj.Proj(init=out_coord_system)) # destination coordinate system
|
||||
|
||||
g2 = transform(project, g1) # apply projection
|
||||
return g2
|
||||
|
||||
def distance_to_intersection(lat,lon,orientation,line_strings):
|
||||
"""
|
||||
Returns the distance at whjch a line drawn from a lat/lon at an orientation intersects a line stinrg
|
||||
:param lat:
|
||||
:param lon:
|
||||
:param orientation: Angle, clockwise positive from true north in degrees, of the tangent to the shoreline facing
|
||||
towards the
|
||||
land.
|
||||
:param line_string:
|
||||
:return:
|
||||
"""
|
||||
start_point = Point(lon,lat)
|
||||
start_point = convert_coord_systems(start_point)
|
||||
|
||||
distance = 1000 # m look up to 1000m for an intersection
|
||||
new_point = Point(start_point.coords.xy[0]+distance*np.cos(np.deg2rad(orientation)),
|
||||
start_point.coords.xy[1]+distance*np.sin(np.deg2rad(orientation)))
|
||||
profile_line = LineString([start_point, new_point])
|
||||
|
||||
# Check whether profile_line intersects with any lines in line_string
|
||||
for line_string in line_strings:
|
||||
intersection_points = profile_line.intersection(line_string)
|
||||
if not intersection_points.is_empty:
|
||||
return intersection_points.distance(start_point)
|
||||
|
||||
return None
|
||||
|
||||
def get_sites_dune_crest_toe():
|
||||
data_folder = './data/interim'
|
||||
df_sites = pd.read_csv(os.path.join(data_folder, 'sites.csv'),index_col=[0])
|
||||
|
||||
# Import
|
||||
for f in ['./data/raw/profile_features/dune_crests.shp']:
|
||||
shapes = shapes_from_shp(f)
|
||||
shapes = [convert_coord_systems(x) for x in shapes]
|
||||
|
||||
# Iterate through each site
|
@ -0,0 +1,27 @@
|
||||
[loggers]
|
||||
keys=root, matplotlib
|
||||
|
||||
[handlers]
|
||||
keys=consoleHandler
|
||||
|
||||
[formatters]
|
||||
keys=simpleFormatter
|
||||
|
||||
[logger_root]
|
||||
level=DEBUG
|
||||
handlers=consoleHandler
|
||||
|
||||
[logger_matplotlib]
|
||||
level=WARNING
|
||||
handlers=consoleHandler
|
||||
qualname=matplotlib
|
||||
|
||||
[handler_consoleHandler]
|
||||
class=StreamHandler
|
||||
level=DEBUG
|
||||
formatter=simpleFormatter
|
||||
args=(sys.stdout,)
|
||||
|
||||
[formatter_simpleFormatter]
|
||||
format=%(asctime)s %(name)-17s %(levelname)-8s %(message)s
|
||||
datefmt=%a, %d %b %Y %H:%M:%S
|
Loading…
Reference in New Issue