Initial commit

7 years ago · 85fa9e55c2
commit 85fa9e55c2
10 changed files with 1672 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,11 @@
+# Jupyter NB Checkpoints
+.ipynb_checkpoints/
+
+# exclude data from source control by default
+/data/
+
+# Pycharm
+.idea
+
+# DotEnv configuration
+.env
--- a/45
+++ b/45
@ -0,0 +1,45 @@
+
+
+#################################################################################
+# PROJECT RULES                                                                 #
+#################################################################################
+.PHONY: mat_to_csv
+mat-to-csv: ##@data Converts raw .mat files to .csv for python
+	cd ./src/data/ && python mat_to_csv.py
+
+sites-csv-to-shp: ./data/interim/sites.shp
+    cd ./src/data && python csv_to_shp.py
+
+#################################################################################
+# Self Documenting Commands                                                     #
+#################################################################################
+.DEFAULT_GOAL := help
+.PHONY: help
+
+# Refer to https://gist.github.com/prwhite/8168133
+
+#COLORS
+GREEN  := $(shell tput -Txterm setaf 2)
+WHITE  := $(shell tput -Txterm setaf 7)
+YELLOW := $(shell tput -Txterm setaf 3)
+RESET  := $(shell tput -Txterm sgr0)
+
+# Add the following 'help' target to your Makefile
+# And add help text after each target name starting with '\#\#'
+# A category can be added with @category
+HELP_FUN = \
+    %help; \
+    while(<>) { push @{$$help{$$2 // 'options'}}, [$$1, $$3] if /^([a-zA-Z\-]+)\s*:.*\#\#(?:@([a-zA-Z\-]+))?\s(.*)$$/ }; \
+    print "usage: make [target]\n\n"; \
+    for (sort keys %help) { \
+    print "${WHITE}$$_:${RESET}\n"; \
+    for (@{$$help{$$_}}) { \
+    $$sep = " " x (32 - length $$_->[0]); \
+    print "  ${YELLOW}$$_->[0]${RESET}$$sep${GREEN}$$_->[1]${RESET}\n"; \
+    }; \
+    print "\n"; }
+
+help: ##@other Show this help.
+	@perl -e '$(HELP_FUN)' $(MAKEFILE_LIST)
+
+
--- a/README.md
+++ b/README.md
@ -0,0 +1,21 @@
+# 2016 Narrabeen Storm EWS Performance
+This repository investigates whether the storm impacts (i.e. Sallenger, 2000) of the June 2016 Narrabeen Storm could 
+ have been forecasted in advance.
+
+## Repository and analysis format
+This repository follows the [Cookiecutter Data Science](https://drivendata.github.io/cookiecutter-data-science/) 
+structure where possible. The analysis is done in python (look at the `/src/` folder) with some interactive, exploratory notebooks located at `/notebooks`.
+
+## Where to start?
+Check out jupyter notebook `./notebooks/01_exploration.ipynb` which has an example of how to import the data and some interactive widgets.
+
+## Available data
+Raw, interim and processed data used in this analysis is kept in the `/data/` folder.
+
+- `/data/raw/processed_shorelines`: This data was recieved from Tom Beuzen in October 2018. It consists of pre/post storm profiles at every 100 m sections along beaches ranging from Dee Why to Nambucca . Profiles are based on raw aerial LIDAR and were processed by Mitch Harley. Tides and waves (10 m contour and reverse shoaled deepwater) for each individual 100 m section is also provided.
+- `/data/raw/raw_lidar`: This is the raw pre/post storm aerial LIDAR which was taken for the June 2016 storm. `.las` files are the raw files which have been processed into `.tiff` files using `PDAL`. Note that these files have not been corrected for systematic errors, so actual elevations should be taken from the `processed_shorelines` folder. Obtained November 2018 from Mitch Harley from the black external HDD labeled "UNSW LIDAR".
+- `/data/raw/profile_features`: Dune toe and crest locations based on prestorm LIDAR. Refer to `/notebooks/qgis.qgz` as this shows how they were manually extracted. Note that the shapefiles only show the location (lat/lon) of the dune crest and toe. For actual elevations, these locations need to related to the processed shorelines.
+
+## Notebooks
+- `/notebooks/01_exploration.ipynb`: Shows how to import processed shorelines, waves and tides. An interactive widget plots the location and cross sections.
+- `/notebooks/qgis.qgz`: A QGIS file which is used to explore the aerial LIDAR data in `/data/raw/raw_lidar`. By examining the pre-strom lidar, dune crest and dune toe lines are manually extracted. These are stored in the `/data/profile_features/`. 
--- a/notebooks/01_exploration.ipynb
+++ b/notebooks/01_exploration.ipynb
--- a/notebooks/qgis.qgz
+++ b/notebooks/qgis.qgz
--- a/src/analysis/analysis.py
+++ b/src/analysis/analysis.py
@ -0,0 +1,13 @@
+import pandas as pd
+import os
+
+def main():
+
+    data_folder = './data/interim'
+    df_waves = pd.read_csv(os.path.join(data_folder, 'waves.csv'), index_col=[0,1])
+    df_tides = pd.read_csv(os.path.join(data_folder, 'tides.csv'), index_col=[0,1])
+    df_profiles = pd.read_csv(os.path.join(data_folder, 'profiles.csv'), index_col=[0,1,2])
+    df_sites = pd.read_csv(os.path.join(data_folder, 'sites.csv'),index_col=[0])
+
+if __name__ == '__main__':
+    main()
--- a/src/data/csv_to_shp.py
+++ b/src/data/csv_to_shp.py
@ -0,0 +1,38 @@
+"""
+Converts .csv files to .shape files
+"""
+
+from fiona.crs import from_epsg
+import fiona
+from shapely.geometry import Point, mapping
+from fiona import collection
+import pandas as pd
+import os
+
+def sites_csv_to_shp(input_csv='.\data\interim\sites.csv', output_shp='.\data\interim\sites.shp'):
+    """
+    Converts our dataframe of sites to .shp to load in QGis
+    :param input_csv:
+    :param output_shp:
+    :return:
+    """
+    df_sites = pd.read_csv(input_csv, index_col=[0])
+
+    schema = {
+        'geometry': 'Point',
+        'properties': {
+            'beach': 'str',
+            'site_id': 'str'
+        }
+    }
+    with fiona.open(output_shp, 'w', crs=from_epsg(4326), driver='ESRI Shapefile', schema=schema) as output:
+        for index, row in df_sites.iterrows():
+            point = Point(row['lon'], row['lat'])
+            prop = {
+                'beach': row['beach'],
+                'site_id': index,
+            }
+            output.write({'geometry': mapping(point), 'properties': prop})
+
+if __name__ == '__main__':
+    sites_csv_to_shp()
--- a/src/data/mat_to_csv.py
+++ b/src/data/mat_to_csv.py
@ -0,0 +1,180 @@
+"""
+Converts raw .mat files into a flattened .csv structure which can be imported into python pandas.
+"""
+
+import logging.config
+from datetime import datetime, timedelta
+
+import pandas as pd
+from mat4py import loadmat
+
+logging.config.fileConfig('../logging.conf', disable_existing_loggers=False)
+logger = logging.getLogger(__name__)
+
+
+def parse_waves(waves_mat):
+    """
+    Parses the raw waves.mat file and returns a pandas dataframe
+    :param waves_mat:
+    :return:
+    """
+    logger.info('Parsing %s', waves_mat)
+    mat_data = loadmat(waves_mat)['data']
+    rows = []
+    for i in range(0, len(mat_data['site'])):
+        for j in range(0, len(mat_data['dates'][i])):
+            rows.append({
+                'beach': mat_data['site'][i],
+                'lon': mat_data['lon'][i],
+                'lat': mat_data['lat'][i],
+                'datetime': matlab_datenum_to_datetime(mat_data['dates'][i][j][0]),
+                'Hs': mat_data['H'][i][j][0],
+                'Hs0': mat_data['Ho'][i][j][0],
+                'Tp': mat_data['T'][i][j][0],
+                'dir': mat_data['D'][i][j][0],
+                'E': mat_data['E'][i][j][0],
+                'P': mat_data['P'][i][j][0],
+                'Exs': mat_data['Exs'][i][j][0],
+                'Pxs': mat_data['Pxs'][i][j][0],
+            })
+
+    df = pd.DataFrame(rows)
+    df['datetime'] = df['datetime'].dt.round('1s')
+    return df
+
+
+def parse_tides(tides_mat):
+    """
+    Parses the raw tides.mat file and returns a pandas dataframe
+    :param tides_mat:
+    :return:
+    """
+    logger.info('Parsing %s', tides_mat)
+    mat_data = loadmat(tides_mat)['data']
+    rows = []
+    for i in range(0, len(mat_data['site'])):
+        for j in range(0, len(mat_data['time'])):
+            rows.append({
+                'beach': mat_data['site'][i][0],
+                'lon': mat_data['lons'][i][0],
+                'lat': mat_data['lats'][i][0],
+                'datetime': matlab_datenum_to_datetime(mat_data['time'][j][0]),
+                'tide': mat_data['tide'][i][j]
+            })
+
+    df = pd.DataFrame(rows)
+    df['datetime'] = df['datetime'].dt.round('1s')
+    return df
+
+
+def parse_profiles(profiles_mat):
+    """
+    Parses the raw profiles.mat file and returns a pandas dataframe
+    :param tides_mat:
+    :return:
+    """
+    logger.info('Parsing %s', profiles_mat)
+    mat_data = loadmat(profiles_mat)['data']
+    rows = []
+    for i in range(0, len(mat_data['site'])):
+        for j in range(0, len(mat_data['pfx'][i])):
+            for profile_type in ['prestorm', 'poststorm']:
+
+                if profile_type == 'prestorm':
+                    z = mat_data['pf1'][i][j][0]
+                if profile_type == 'poststorm':
+                    z = mat_data['pf2'][i][j][0]
+
+                rows.append({
+                    'beach': mat_data['site'][i],
+                    'lon': mat_data['lon'][i],
+                    'lat': mat_data['lat'][i],
+                    'profile_type': profile_type,
+                    'x': mat_data['pfx'][i][j][0],
+                    'z': z,
+                })
+
+    df = pd.DataFrame(rows)
+    return df
+
+
+def matlab_datenum_to_datetime(matlab_datenum):
+    # https://stackoverflow.com/a/13965852
+    return datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum % 1) - timedelta(
+        days=366)
+
+
+def get_unique_sites(dfs, cols=['beach', 'lat', 'lon']):
+    """
+    Generates a dataframe of unique sites based on beach names, lats and lons. Creates a unique site ID for each.
+    :param dfs:
+    :param cols:
+    :return:
+    """
+
+    rows = []
+    df_all = pd.concat([df[cols] for df in dfs])
+    beach_groups = df_all.groupby(['beach'])
+    for beach_name, beach_group in beach_groups:
+        site_groups = beach_group.groupby(['lat', 'lon'])
+        siteNo = 1
+        for site_name, site_group in site_groups:
+            site = '{}{:04d}'.format(beach_name, siteNo)
+            rows.append({'site_id': site,
+                         'lat': site_name[0],
+                         'lon': site_name[1],
+                         'beach': beach_name})
+            siteNo += 1
+
+    df = pd.DataFrame(rows)
+
+    return df
+
+
+def replace_unique_sites(df, df_sites, cols=['beach', 'lat', 'lon']):
+    """
+    Replaces beach/lat/lon columns with the unique site_id
+    :param dfs:
+    :param df_sites:
+    :return:
+    """
+
+    df_merged = df.merge(df_sites, on=cols)
+
+    # Check that all our records have a unique site identifier
+    n_unmatched = len(df) - len(df_merged)
+    if n_unmatched > 0:
+        logger.warning('Not all records (%d of %d) matched with a unique site', n_unmatched, len(df))
+
+    df_merged = df_merged.drop(columns=cols)
+
+    return df_merged
+
+
+def main():
+    df_waves = parse_waves(waves_mat='../../data/raw/waves.mat')
+    df_tides = parse_tides(tides_mat='../../data/raw/tides.mat')
+    df_profiles = parse_profiles(profiles_mat='../../data/raw/profiles.mat')
+    df_sites = get_unique_sites(dfs=[df_waves, df_tides, df_profiles])
+
+    logger.info('Identifying unique sites')
+    df_waves = replace_unique_sites(df_waves, df_sites)
+    df_tides = replace_unique_sites(df_tides, df_sites)
+    df_profiles = replace_unique_sites(df_profiles, df_sites)
+
+    logger.info('Setting pandas index')
+    df_profiles.set_index(['site_id', 'profile_type', 'x'], inplace=True)
+    df_waves.set_index(['site_id', 'datetime'], inplace=True)
+    df_tides.set_index(['site_id', 'datetime'], inplace=True)
+    df_sites.set_index(['site_id'], inplace=True)
+
+    logger.info('Outputting .csv files')
+    df_profiles.to_csv('../../data/interim/profiles.csv')
+    df_tides.to_csv('../../data/interim/tides.csv')
+    df_waves.to_csv('../../data/interim/waves.csv')
+    df_sites.to_csv('../../data/interim/sites.csv')
+    logger.info('Done!')
+
+
+if __name__ == '__main__':
+    main()
--- a/src/data/profile_features.py
+++ b/src/data/profile_features.py
@ -0,0 +1,76 @@
+import pandas as pd
+import os
+import fiona
+from shapely.geometry import LineString, Point
+from shapely.geometry import shape
+from shapely.ops import transform
+import pyproj
+from functools import partial
+import numpy as np
+
+def shapes_from_shp(shp_file):
+    """
+    Parses a shape file and returns a list of shapely shapes
+    :param shp_file:
+    :return:
+    """
+    shapes = []
+    for feat in fiona.open(shp_file):
+        shapes.append(shape(feat['geometry']))
+    return shapes
+
+
+def convert_coord_systems(g1, in_coord_system='EPSG:4326', out_coord_system='EPSG:28356'):
+    """
+    Converts coordinates from one coordinates system to another. Needed because shapefiles are usually defined in
+    lat/lon but should be converted to GDA to calculated distances.
+    https://gis.stackexchange.com/a/127432
+    :param in_coord_system: Default is lat/lon WGS84
+    :param out_coord_system: Default is GDA56 for NSW coastline
+    :return:
+    """
+    project = partial(
+        pyproj.transform,
+        pyproj.Proj(init=in_coord_system), # source coordinate system
+        pyproj.Proj(init=out_coord_system)) # destination coordinate system
+
+    g2 = transform(project, g1)  # apply projection
+    return g2
+
+def distance_to_intersection(lat,lon,orientation,line_strings):
+    """
+    Returns the distance at whjch a line drawn from a lat/lon at an orientation intersects a line stinrg
+    :param lat:
+    :param lon:
+    :param orientation: Angle, clockwise positive from true north in degrees, of the tangent to the shoreline facing
+    towards the
+    land.
+    :param line_string:
+    :return:
+    """
+    start_point = Point(lon,lat)
+    start_point = convert_coord_systems(start_point)
+
+    distance = 1000 # m look up to 1000m for an intersection
+    new_point = Point(start_point.coords.xy[0]+distance*np.cos(np.deg2rad(orientation)),
+                      start_point.coords.xy[1]+distance*np.sin(np.deg2rad(orientation)))
+    profile_line = LineString([start_point, new_point])
+
+    # Check whether profile_line intersects with any lines in line_string
+    for line_string in line_strings:
+        intersection_points = profile_line.intersection(line_string)
+        if not intersection_points.is_empty:
+            return intersection_points.distance(start_point)
+
+    return None
+
+def get_sites_dune_crest_toe():
+    data_folder = './data/interim'
+    df_sites = pd.read_csv(os.path.join(data_folder, 'sites.csv'),index_col=[0])
+
+    # Import
+    for f in ['./data/raw/profile_features/dune_crests.shp']:
+        shapes = shapes_from_shp(f)
+        shapes = [convert_coord_systems(x) for x in shapes]
+
+    # Iterate through each site
--- a/src/logging.conf
+++ b/src/logging.conf
@ -0,0 +1,27 @@
+[loggers]
+keys=root, matplotlib
+
+[handlers]
+keys=consoleHandler
+
+[formatters]
+keys=simpleFormatter
+
+[logger_root]
+level=DEBUG
+handlers=consoleHandler
+
+[logger_matplotlib]
+level=WARNING
+handlers=consoleHandler
+qualname=matplotlib
+
+[handler_consoleHandler]
+class=StreamHandler
+level=DEBUG
+formatter=simpleFormatter
+args=(sys.stdout,)
+
+[formatter_simpleFormatter]
+format=%(asctime)s %(name)-17s %(levelname)-8s %(message)s
+datefmt=%a, %d %b %Y %H:%M:%S