Merge branch 'feature/refactor-commands' into develop

7 years ago · f73ceb5bcf
parent a3b07f20e8 07046d4686
commit f73ceb5bcf
18 changed files with 1293 additions and 547 deletions
--- a/.env
+++ b/.env
@ -0,0 +1,20 @@
+# Environment variables go here, these will be automatically read when using "pipenv run python 'file.py'"
+
+# Location where data is backed up to. Should be able to copy a set of the data from here.
+DATA_BACKUP_DIR="J:/Coastal/Temp/CKL/nsw_2016_storm_impact/data"
+
+# Location where the matlab interpreter is located. Required for a couple of data processing scripts.
+MATLAB_PATH="C:/Program Files/MATLAB/R2016b/bin/win64/MATLAB.exe"
+
+# Number of threads to use for multi-core processing. Used when calculating time-varying beach slope when estimating
+# total water level.
+MULTIPROCESS_THREADS=2
+
+# The settings below should be left as is unless you know what you're doing.
+
+# We want to create the pipenv virtualenv in the current folder
+PIPENV_VENV_IN_PROJECT=1
+
+# Need to set pythonpath so that relative imports can be properly used in with pipenv
+# Refer to https://stackoverflow.com/q/52986500 and https://stackoverflow.com/a/49797761
+PYTHONPATH=${PWD}
--- a/.gitignore
+++ b/.gitignore
@ -7,11 +7,14 @@
 # Pycharm
 .idea

-# DotEnv configuration
-.env
+# Matlab
 *.asv

+# DotEnv configuration
+# .env
+
 # Python
 __pycache__/
 *.py[cod]
 *$py.class
+/.venv/
--- a/143
+++ b/143
@ -1,9 +1,24 @@
-DATA_BACKUP_DIR = "J:/Coastal/Temp/CKL/nsw_2016_storm_impact/data"
+###############################
+# Load environment variables

-#################################################################################
-# PROJECT RULES                                                                 #
-#################################################################################
-.PHONY: push-data mat_to_csv sites-csv-to-shp
+include .env
+export $(shell sed 's/=.*//' .env)
+CURRENT_DIR = $(shell pwd)
+
+
+###############################
+# Create python virtual environment
+
+. PHONY: venv_init
+
+venv-init: ##@environment Setup virtual environment
+	pip install pipenv
+	pipenv --python 3.7
+	pipenv install
+
+
+###############################
+# Get data from network drive

 push-data: ##@data Copies data from ./data/ to data backup directory
 	rclone copy ./data/ $(DATA_BACKUP_DIR) --exclude "*.las" --progress
@ -12,15 +27,125 @@ pull-data: ##@data Copies data from data backup directory to ./data/
 	# We probably don't want to pull the raw LIDAR .las files, so lets exclude them
 	rclone copy $(DATA_BACKUP_DIR) ./data/ --exclude "*.las" --progress

+
+###############################
+# Process data
+.PHONY: process-mat
+
+process-mat: ./data/interim/sites.csv ./data/interim/waves.csv ./data/interim/profiles.csv ./data/interim/tides.csv  ##@data Process all .mat to .csv
+
+# Calculates beach orientations at each profile
+./data/raw/processed_shorelines/orientations.mat: ./data/raw/processed_shorelines/profiles.mat
+	$(MATLAB_PATH) -nosplash -r "cd $(CURRENT_DIR); run('./src/data/beach_orientations.m'); quit"
+
+# Produces a .csv of sites where our beach cross-sections are located
+./data/interim/sites.csv: ./data/raw/processed_shorelines/*.mat
+	pipenv run python ./src/data/parse_mat.py create-sites-csv \
+	--waves-mat "./data/raw/processed_shorelines/waves.mat" \
+	--tides-mat "./data/raw/processed_shorelines/tides.mat" \
+	--profiles-mat "./data/raw/processed_shorelines/profiles.mat" \
+	--orientations-mat "./data/raw/processed_shorelines/orientations.mat" \
+	--output-file "./data/interim/sites.csv"
+
+# Produces a .csv of waves for each site
+./data/interim/waves.csv: ./data/interim/sites.csv ./data/raw/processed_shorelines/waves.mat
+	pipenv run python ./src/data/parse_mat.py create-waves-csv \
+	--waves-mat "./data/raw/processed_shorelines/waves.mat" \
+	--sites-csv "./data/interim/sites.csv" \
+	--output-file "./data/interim/waves.csv"
+
+# Produces a .csv of profiles for each site
+./data/interim/profiles.csv: ./data/interim/sites.csv ./data/raw/processed_shorelines/profiles.mat
+	pipenv run python ./src/data/parse_mat.py create-profiles-csv \
+	--profiles-mat "./data/raw/processed_shorelines/profiles.mat" \
+	--sites-csv "./data/interim/sites.csv" \
+	--output-file "./data/interim/profiles.csv"
+
+# Produces a .csv of tides for each site
+./data/interim/tides.csv: ./data/interim/sites.csv ./data/raw/processed_shorelines/tides.mat
+	pipenv run python ./src/data/parse_mat.py create-tides-csv \
+	--tides-mat "./data/raw/processed_shorelines/tides.mat" \
+	--sites-csv "./data/interim/sites.csv" \
+	--output-file "./data/interim/tides.csv"
+
+# Creates a .shp of our sites to load into QGis
+./data/interim/sites.shp: ./data/interim/sites.csv
+	pipenv run python ./src/data/csv_to_shp.py sites-csv-to-shp \
+	--input-csv "./data/interim/sites.csv" \
+	--output-shp "./data/interim/sites.shp"
+
+# Creates a .csv of our dune toe and crest profile features
+./data/interim/profile_features.csv: ./data/raw/profile_features/dune_crests.shp ./data/raw/profile_features/dune_toes.shp ./data/interim/sites.csv ./data/interim/profiles.csv
+	pipenv run python ./src/data/profile_features.py create-profile-features \
+	--dune-crest-shp "./data/raw/profile_features/dune_crests.shp" \
+	--dune-toe-shp "./data/raw/profile_features/dune_toes.shp" \
+	--sites-csv "./data/interim/sites.csv" \
+	--profiles-csv "./data/interim/profiles.csv" \
+	--output-csv "./data/interim/profile_features.csv"
+
+# Creates a forecast of twl using sto06 and prestorm time varying prestorm foreshore slope
+./data/interim/twl_foreshore_slope_sto06.csv: ./data/interim/waves.csv ./data/interim/tides.csv ./data/interim/profiles.csv ./data/interim/sites.csv ./data/interim/profile_features.csv
+	pipenv run python ./src/analysis/forecast_twl.py create-twl-forecast \
+	--waves-csv "./data/interim/waves.csv" \
+	--tides-csv "./data/interim/tides.csv" \
+	--profiles-csv "./data/interim/profiles.csv" \
+	--profile-features-csv "./data/interim/profile_features.csv" \
+	--runup-function "sto06" \
+	--slope "foreshore" \
+	--output-file "./data/interim/twl_foreshore_slope_sto06.csv"
+
+# Creates a forecast of twl using sto06 and prestorm mean foreshore slope
+./data/interim/twl_mean_slope_sto06.csv: ./data/interim/waves.csv ./data/interim/tides.csv ./data/interim/profiles.csv ./data/interim/sites.csv ./data/interim/profile_features.csv
+	pipenv run python ./src/analysis/forecast_twl.py create-twl-forecast \
+	--waves-csv "./data/interim/waves.csv" \
+	--tides-csv "./data/interim/tides.csv" \
+	--profiles-csv "./data/interim/profiles.csv" \
+	--profile-features-csv "./data/interim/profile_features.csv" \
+	--runup-function "sto06" \
+	--slope "mean" \
+	--output-file "./data/interim/twl_mean_slope_sto06.csv"
+
+./data/interim/impacts_observed.csv: ./data/interim/profiles.csv ./data/interim/profile_features.csv
+	pipenv run python ./src/analysis/observed_storm_impacts.py create-observed-impacts \
+	--profiles-csv "./data/interim/profiles.csv" \
+	--profile-features-csv "./data/interim/profile_features.csv" \
+	--output-file "./data/interim/impacts_observed.csv"
+
+./data/interim/impacts_forecasted_mean_slope_sto06.csv: ./data/interim/profile_features.csv ./data/interim/twl_mean_slope_sto06.csv
+	pipenv run python ./src/analysis/forecasted_storm_impacts.py create-forecasted-impacts \
+	--profile-features-csv "./data/interim/profile_features.csv" \
+	--forecasted-twl-csv "./data/interim/twl_mean_slope_sto06.csv" \
+	--output-file "./data/interim/impacts_forecasted_mean_slope_sto06.csv"
+
+./data/interim/impacts_forecasted_foreshore_slope_sto06.csv: ./data/interim/profile_features.csv ./data/interim/twl_foreshore_slope_sto06.csv
+	pipenv run python ./src/analysis/forecasted_storm_impacts.py create-forecasted-impacts \
+	--profile-features-csv "./data/interim/profile_features.csv" \
+	--forecasted-twl-csv "./data/interim/twl_foreshore_slope_sto06.csv" \
+	--output-file "./data/interim/impacts_forecasted_foreshore_slope_sto06.csv"
+
+
+#################################################################################
+# PROJECT RULES                                                                 #
+#################################################################################
+.PHONY: push-data parse_mat sites-csv-to-shp
+
+
 mat-to-csv: ##@data Converts raw .mat files to .csv for python
-	cd ./src/data/ && python mat_to_csv.py
+	cd ./src/data/ && python parse_mat.py

 sites-csv-to-shp: ##@data Create the sites.shp from sites.csv
 	cd ./src/data && python csv_to_shp.py sites_csv_to_shp "..\..\data\interim\sites.csv" "..\..\data\interim\sites.shp"

-#################################################################################
-# Self Documenting Commands                                                     #
-#################################################################################
+
+###############################
+# Misc commands
+format: ./src/*.py ##@misc Check python file formatting
+	pipenv run black --line-length 120 "src/"
+
+
+###############################
+# Help command
+
 .DEFAULT_GOAL := help
 .PHONY: help

--- a/25
+++ b/25
@ -0,0 +1,25 @@
+[[source]]
+name = "pypi"
+url = "https://pypi.org/simple"
+verify_ssl = true
+
+[dev-packages]
+
+[packages]
+numpy = "*"
+scipy = "*"
+pandas = "*"
+matplotlib = "*"
+click = "*"
+mat4py = "*"
+black = "*"
+shapely = "*"
+fiona = {file = "https://download.lfd.uci.edu/pythonlibs/h2ufg7oq/Fiona-1.7.13-cp37-cp37m-win_amd64.whl"}
+gdal = {file = "https://download.lfd.uci.edu/pythonlibs/h2ufg7oq/GDAL-2.3.2-cp37-cp37m-win_amd64.whl"}
+pyproj = {file = "https://download.lfd.uci.edu/pythonlibs/h2ufg7oq/pyproj-1.9.5.1-cp37-cp37m-win_amd64.whl"}
+
+[requires]
+python_version = "3.7"
+
+[pipenv]
+allow_prereleases = true
--- a/Pipfile.lock
+++ b/Pipfile.lock
@ -0,0 +1,309 @@
+{
+    "_meta": {
+        "hash": {
+            "sha256": "33595d902a1ea304411921342c1637944162170fafe98ebf510f973a4cba9bb1"
+        },
+        "pipfile-spec": 6,
+        "requires": {
+            "python_version": "3.7"
+        },
+        "sources": [
+            {
+                "name": "pypi",
+                "url": "https://pypi.org/simple",
+                "verify_ssl": true
+            }
+        ]
+    },
+    "default": {
+        "appdirs": {
+            "hashes": [
+                "sha256:9e5896d1372858f8dd3344faf4e5014d21849c756c8d5701f78f8a103b372d92",
+                "sha256:d8b24664561d0d34ddfaec54636d502d7cea6e29c3eaf68f3df6180863e2166e"
+            ],
+            "version": "==1.4.3"
+        },
+        "attrs": {
+            "hashes": [
+                "sha256:10cbf6e27dbce8c30807caf056c8eb50917e0eaafe86347671b57254006c3e69",
+                "sha256:ca4be454458f9dec299268d472aaa5a11f67a4ff70093396e1ceae9c76cf4bbb"
+            ],
+            "version": "==18.2.0"
+        },
+        "black": {
+            "hashes": [
+                "sha256:817243426042db1d36617910df579a54f1afd659adb96fc5032fcf4b36209739",
+                "sha256:e030a9a28f542debc08acceb273f228ac422798e5215ba2a791a6ddeaaca22a5"
+            ],
+            "index": "pypi",
+            "version": "==18.9b0"
+        },
+        "click": {
+            "hashes": [
+                "sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13",
+                "sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7"
+            ],
+            "index": "pypi",
+            "version": "==7.0"
+        },
+        "click-plugins": {
+            "hashes": [
+                "sha256:b1ee1ccc9421c73007fe290680d97984eb6eaf5f4512b7620c6aa46031d6cb6b",
+                "sha256:dfed74b5063546a137de99baaaf742b4de4337ad2b3e1df5ec7c8a256adc0847"
+            ],
+            "version": "==1.0.4"
+        },
+        "cligj": {
+            "hashes": [
+                "sha256:20f24ce9abfde3f758aec3399e6811b936b6772f360846c662c19bf5537b4f14",
+                "sha256:60c93dda4499562eb87509a8ff3535a7441053b766c9c26bcf874a732f939c7c",
+                "sha256:6c7d52d529a78712491974f975c33473f430c0f7beb18c0d7a402a743dcb460a"
+            ],
+            "version": "==0.5.0"
+        },
+        "cycler": {
+            "hashes": [
+                "sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d",
+                "sha256:cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8"
+            ],
+            "version": "==0.10.0"
+        },
+        "fiona": {
+            "file": "https://download.lfd.uci.edu/pythonlibs/h2ufg7oq/Fiona-1.7.13-cp37-cp37m-win_amd64.whl",
+            "hashes": [
+                "sha256:425a98dcf87c06a481e256803c8c68d7d4cd04f5e4505c4ab32a13571a3cc57a"
+            ],
+            "index": "pypi",
+            "version": "==1.7.13"
+        },
+        "gdal": {
+            "file": "https://download.lfd.uci.edu/pythonlibs/h2ufg7oq/GDAL-2.3.2-cp37-cp37m-win_amd64.whl",
+            "hashes": [
+                "sha256:2f6c36ee59f9b24fb16514e4fce8b73e7833714feb9b8397f91662256e1b12d8"
+            ],
+            "index": "pypi",
+            "version": "==2.3.2"
+        },
+        "kiwisolver": {
+            "hashes": [
+                "sha256:0ee4ed8b3ae8f5f712b0aa9ebd2858b5b232f1b9a96b0943dceb34df2a223bc3",
+                "sha256:0f7f532f3c94e99545a29f4c3f05637f4d2713e7fd91b4dd8abfc18340b86cd5",
+                "sha256:1a078f5dd7e99317098f0e0d490257fd0349d79363e8c923d5bb76428f318421",
+                "sha256:1aa0b55a0eb1bd3fa82e704f44fb8f16e26702af1a073cc5030eea399e617b56",
+                "sha256:2874060b91e131ceeff00574b7c2140749c9355817a4ed498e82a4ffa308ecbc",
+                "sha256:379d97783ba8d2934d52221c833407f20ca287b36d949b4bba6c75274bcf6363",
+                "sha256:3b791ddf2aefc56382aadc26ea5b352e86a2921e4e85c31c1f770f527eb06ce4",
+                "sha256:4329008a167fac233e398e8a600d1b91539dc33c5a3eadee84c0d4b04d4494fa",
+                "sha256:45813e0873bbb679334a161b28cb9606d9665e70561fd6caa8863e279b5e464b",
+                "sha256:53a5b27e6b5717bdc0125338a822605084054c80f382051fb945d2c0e6899a20",
+                "sha256:574f24b9805cb1c72d02b9f7749aa0cc0b81aa82571be5201aa1453190390ae5",
+                "sha256:66f82819ff47fa67a11540da96966fb9245504b7f496034f534b81cacf333861",
+                "sha256:79e5fe3ccd5144ae80777e12973027bd2f4f5e3ae8eb286cabe787bed9780138",
+                "sha256:83410258eb886f3456714eea4d4304db3a1fc8624623fc3f38a487ab36c0f653",
+                "sha256:8b6a7b596ce1d2a6d93c3562f1178ebd3b7bb445b3b0dd33b09f9255e312a965",
+                "sha256:9576cb63897fbfa69df60f994082c3f4b8e6adb49cccb60efb2a80a208e6f996",
+                "sha256:95a25d9f3449046ecbe9065be8f8380c03c56081bc5d41fe0fb964aaa30b2195",
+                "sha256:a424f048bebc4476620e77f3e4d1f282920cef9bc376ba16d0b8fe97eec87cde",
+                "sha256:aaec1cfd94f4f3e9a25e144d5b0ed1eb8a9596ec36d7318a504d813412563a85",
+                "sha256:acb673eecbae089ea3be3dcf75bfe45fc8d4dcdc951e27d8691887963cf421c7",
+                "sha256:b15bc8d2c2848a4a7c04f76c9b3dc3561e95d4dabc6b4f24bfabe5fd81a0b14f",
+                "sha256:b1c240d565e977d80c0083404c01e4d59c5772c977fae2c483f100567f50847b",
+                "sha256:c595693de998461bcd49b8d20568c8870b3209b8ea323b2a7b0ea86d85864694",
+                "sha256:ce3be5d520b4d2c3e5eeb4cd2ef62b9b9ab8ac6b6fedbaa0e39cdb6f50644278",
+                "sha256:e0f910f84b35c36a3513b96d816e6442ae138862257ae18a0019d2fc67b041dc",
+                "sha256:ea36e19ac0a483eea239320aef0bd40702404ff8c7e42179a2d9d36c5afcb55c",
+                "sha256:efabbcd4f406b532206b8801058c8bab9e79645b9880329253ae3322b7b02cd5",
+                "sha256:f923406e6b32c86309261b8195e24e18b6a8801df0cfc7814ac44017bfcb3939"
+            ],
+            "version": "==1.0.1"
+        },
+        "mat4py": {
+            "hashes": [
+                "sha256:8272ce80747120ff44200b1fde341c657595813e1adf61262e44b52642c10dbe"
+            ],
+            "index": "pypi",
+            "version": "==0.4.1"
+        },
+        "matplotlib": {
+            "hashes": [
+                "sha256:16aa61846efddf91df623bbb4598e63be1068a6b6a2e6361cc802b41c7a286eb",
+                "sha256:1975b71a33ac986bb39b6d5cfbc15c7b1f218f1134efb4eb3881839d6ae69984",
+                "sha256:2b222744bd54781e6cc0b717fa35a54e5f176ba2ced337f27c5b435b334ef854",
+                "sha256:317643c0e88fad55414347216362b2e229c130edd5655fea5f8159a803098468",
+                "sha256:4269ce3d1b897d46fc3cc2273a0cc2a730345bb47e4456af662e6fca85c89dd7",
+                "sha256:65214fd668975077cdf8d408ccf2b2d6bdf73b4e6895a79f8e99ce4f0b43fcdb",
+                "sha256:74bc213ab8a92d86a0b304d9359d1e1d14168d4c6121b83862c9d8a88b89a738",
+                "sha256:88949be0db54755995dfb0210d0099a8712a3c696c860441971354c3debfc4af",
+                "sha256:8e1223d868be89423ec95ada5f37aa408ee64fe76ccb8e4d5f533699ba4c0e4a",
+                "sha256:9fa00f2d7a552a95fa6016e498fdeb6d74df537853dda79a9055c53dfc8b6e1a",
+                "sha256:c27fd46cab905097ba4bc28d5ba5289930f313fb1970c9d41092c9975b80e9b4",
+                "sha256:c94b792af431f6adb6859eb218137acd9a35f4f7442cea57e4a59c54751c36af",
+                "sha256:f4c12a01eb2dc16693887a874ba948b18c92f425c4d329639ece6d3bb8e631bb"
+            ],
+            "index": "pypi",
+            "version": "==3.0.2"
+        },
+        "munch": {
+            "hashes": [
+                "sha256:6ae3d26b837feacf732fb8aa5b842130da1daf221f5af9f9d4b2a0a6414b0d51"
+            ],
+            "version": "==2.3.2"
+        },
+        "numpy": {
+            "hashes": [
+                "sha256:0df89ca13c25eaa1621a3f09af4c8ba20da849692dcae184cb55e80952c453fb",
+                "sha256:154c35f195fd3e1fad2569930ca51907057ae35e03938f89a8aedae91dd1b7c7",
+                "sha256:18e84323cdb8de3325e741a7a8dd4a82db74fde363dce32b625324c7b32aa6d7",
+                "sha256:1e8956c37fc138d65ded2d96ab3949bd49038cc6e8a4494b1515b0ba88c91565",
+                "sha256:23557bdbca3ccbde3abaa12a6e82299bc92d2b9139011f8c16ca1bb8c75d1e95",
+                "sha256:24fd645a5e5d224aa6e39d93e4a722fafa9160154f296fd5ef9580191c755053",
+                "sha256:36e36b6868e4440760d4b9b44587ea1dc1f06532858d10abba98e851e154ca70",
+                "sha256:3d734559db35aa3697dadcea492a423118c5c55d176da2f3be9c98d4803fc2a7",
+                "sha256:416a2070acf3a2b5d586f9a6507bb97e33574df5bd7508ea970bbf4fc563fa52",
+                "sha256:4a22dc3f5221a644dfe4a63bf990052cc674ef12a157b1056969079985c92816",
+                "sha256:4d8d3e5aa6087490912c14a3c10fbdd380b40b421c13920ff468163bc50e016f",
+                "sha256:4f41fd159fba1245e1958a99d349df49c616b133636e0cf668f169bce2aeac2d",
+                "sha256:561ef098c50f91fbac2cc9305b68c915e9eb915a74d9038ecf8af274d748f76f",
+                "sha256:56994e14b386b5c0a9b875a76d22d707b315fa037affc7819cda08b6d0489756",
+                "sha256:73a1f2a529604c50c262179fcca59c87a05ff4614fe8a15c186934d84d09d9a5",
+                "sha256:7da99445fd890206bfcc7419f79871ba8e73d9d9e6b82fe09980bc5bb4efc35f",
+                "sha256:99d59e0bcadac4aa3280616591fb7bcd560e2218f5e31d5223a2e12a1425d495",
+                "sha256:a4cc09489843c70b22e8373ca3dfa52b3fab778b57cf81462f1203b0852e95e3",
+                "sha256:a61dc29cfca9831a03442a21d4b5fd77e3067beca4b5f81f1a89a04a71cf93fa",
+                "sha256:b1853df739b32fa913cc59ad9137caa9cc3d97ff871e2bbd89c2a2a1d4a69451",
+                "sha256:b1f44c335532c0581b77491b7715a871d0dd72e97487ac0f57337ccf3ab3469b",
+                "sha256:b261e0cb0d6faa8fd6863af26d30351fd2ffdb15b82e51e81e96b9e9e2e7ba16",
+                "sha256:c857ae5dba375ea26a6228f98c195fec0898a0fd91bcf0e8a0cae6d9faf3eca7",
+                "sha256:cf5bb4a7d53a71bb6a0144d31df784a973b36d8687d615ef6a7e9b1809917a9b",
+                "sha256:db9814ff0457b46f2e1d494c1efa4111ca089e08c8b983635ebffb9c1573361f",
+                "sha256:df04f4bad8a359daa2ff74f8108ea051670cafbca533bb2636c58b16e962989e",
+                "sha256:ecf81720934a0e18526177e645cbd6a8a21bb0ddc887ff9738de07a1df5c6b61",
+                "sha256:edfa6fba9157e0e3be0f40168eb142511012683ac3dc82420bee4a3f3981b30e"
+            ],
+            "index": "pypi",
+            "version": "==1.15.4"
+        },
+        "pandas": {
+            "hashes": [
+                "sha256:11975fad9edbdb55f1a560d96f91830e83e29bed6ad5ebf506abda09818eaf60",
+                "sha256:12e13d127ca1b585dd6f6840d3fe3fa6e46c36a6afe2dbc5cb0b57032c902e31",
+                "sha256:1c87fcb201e1e06f66e23a61a5fea9eeebfe7204a66d99df24600e3f05168051",
+                "sha256:242e9900de758e137304ad4b5663c2eff0d798c2c3b891250bd0bd97144579da",
+                "sha256:26c903d0ae1542890cb9abadb4adcb18f356b14c2df46e4ff657ae640e3ac9e7",
+                "sha256:2e1e88f9d3e5f107b65b59cd29f141995597b035d17cc5537e58142038942e1a",
+                "sha256:31b7a48b344c14691a8e92765d4023f88902ba3e96e2e4d0364d3453cdfd50db",
+                "sha256:4fd07a932b4352f8a8973761ab4e84f965bf81cc750fb38e04f01088ab901cb8",
+                "sha256:5b24ca47acf69222e82530e89111dd9d14f9b970ab2cd3a1c2c78f0c4fbba4f4",
+                "sha256:647b3b916cc8f6aeba240c8171be3ab799c3c1b2ea179a3be0bd2712c4237553",
+                "sha256:66b060946046ca27c0e03e9bec9bba3e0b918bafff84c425ca2cc2e157ce121e",
+                "sha256:6efa9fa6e1434141df8872d0fa4226fc301b17aacf37429193f9d70b426ea28f",
+                "sha256:be4715c9d8367e51dbe6bc6d05e205b1ae234f0dc5465931014aa1c4af44c1ba",
+                "sha256:bea90da782d8e945fccfc958585210d23de374fa9294a9481ed2abcef637ebfc",
+                "sha256:d318d77ab96f66a59e792a481e2701fba879e1a453aefeebdb17444fe204d1ed",
+                "sha256:d785fc08d6f4207437e900ffead930a61e634c5e4f980ba6d3dc03c9581748c7",
+                "sha256:de9559287c4fe8da56e8c3878d2374abc19d1ba2b807bfa7553e912a8e5ba87c",
+                "sha256:f4f98b190bb918ac0bc0e3dd2ab74ff3573da9f43106f6dba6385406912ec00f",
+                "sha256:f71f1a7e2d03758f6e957896ed696254e2bc83110ddbc6942018f1a232dd9dad",
+                "sha256:fb944c8f0b0ab5c1f7846c686bc4cdf8cde7224655c12edcd59d5212cd57bec0"
+            ],
+            "index": "pypi",
+            "version": "==0.23.4"
+        },
+        "pyparsing": {
+            "hashes": [
+                "sha256:40856e74d4987de5d01761a22d1621ae1c7f8774585acae358aa5c5936c6c90b",
+                "sha256:f353aab21fd474459d97b709e527b5571314ee5f067441dc9f88e33eecd96592"
+            ],
+            "version": "==2.3.0"
+        },
+        "pyproj": {
+            "file": "https://download.lfd.uci.edu/pythonlibs/h2ufg7oq/pyproj-1.9.5.1-cp37-cp37m-win_amd64.whl",
+            "hashes": [
+                "sha256:2b8d0e937e1fa28b65bb351930ab2df9b5bd78e4cc953f7a5a415ff206a3acde"
+            ],
+            "index": "pypi",
+            "version": "==1.9.5.1"
+        },
+        "python-dateutil": {
+            "hashes": [
+                "sha256:063df5763652e21de43de7d9e00ccf239f953a832941e37be541614732cdfc93",
+                "sha256:88f9287c0174266bb0d8cedd395cfba9c58e87e5ad86b2ce58859bc11be3cf02"
+            ],
+            "version": "==2.7.5"
+        },
+        "pytz": {
+            "hashes": [
+                "sha256:31cb35c89bd7d333cd32c5f278fca91b523b0834369e757f4c5641ea252236ca",
+                "sha256:8e0f8568c118d3077b46be7d654cc8167fa916092e28320cde048e54bfc9f1e6"
+            ],
+            "version": "==2018.7"
+        },
+        "scipy": {
+            "hashes": [
+                "sha256:0611ee97296265af4a21164a5323f8c1b4e8e15c582d3dfa7610825900136bb7",
+                "sha256:08237eda23fd8e4e54838258b124f1cd141379a5f281b0a234ca99b38918c07a",
+                "sha256:0e645dbfc03f279e1946cf07c9c754c2a1859cb4a41c5f70b25f6b3a586b6dbd",
+                "sha256:0e9bb7efe5f051ea7212555b290e784b82f21ffd0f655405ac4f87e288b730b3",
+                "sha256:108c16640849e5827e7d51023efb3bd79244098c3f21e4897a1007720cb7ce37",
+                "sha256:340ef70f5b0f4e2b4b43c8c8061165911bc6b2ad16f8de85d9774545e2c47463",
+                "sha256:3ad73dfc6f82e494195144bd3a129c7241e761179b7cb5c07b9a0ede99c686f3",
+                "sha256:3b243c77a822cd034dad53058d7c2abf80062aa6f4a32e9799c95d6391558631",
+                "sha256:404a00314e85eca9d46b80929571b938e97a143b4f2ddc2b2b3c91a4c4ead9c5",
+                "sha256:423b3ff76957d29d1cce1bc0d62ebaf9a3fdfaf62344e3fdec14619bb7b5ad3a",
+                "sha256:42d9149a2fff7affdd352d157fa5717033767857c11bd55aa4a519a44343dfef",
+                "sha256:625f25a6b7d795e8830cb70439453c9f163e6870e710ec99eba5722775b318f3",
+                "sha256:698c6409da58686f2df3d6f815491fd5b4c2de6817a45379517c92366eea208f",
+                "sha256:729f8f8363d32cebcb946de278324ab43d28096f36593be6281ca1ee86ce6559",
+                "sha256:8190770146a4c8ed5d330d5b5ad1c76251c63349d25c96b3094875b930c44692",
+                "sha256:878352408424dffaa695ffedf2f9f92844e116686923ed9aa8626fc30d32cfd1",
+                "sha256:8b984f0821577d889f3c7ca8445564175fb4ac7c7f9659b7c60bef95b2b70e76",
+                "sha256:8f841bbc21d3dad2111a94c490fb0a591b8612ffea86b8e5571746ae76a3deac",
+                "sha256:c22b27371b3866c92796e5d7907e914f0e58a36d3222c5d436ddd3f0e354227a",
+                "sha256:d0cdd5658b49a722783b8b4f61a6f1f9c75042d0e29a30ccb6cacc9b25f6d9e2",
+                "sha256:d40dc7f494b06dcee0d303e51a00451b2da6119acbeaccf8369f2d29e28917ac",
+                "sha256:d8491d4784aceb1f100ddb8e31239c54e4afab8d607928a9f7ef2469ec35ae01",
+                "sha256:dfc5080c38dde3f43d8fbb9c0539a7839683475226cf83e4b24363b227dfe552",
+                "sha256:e24e22c8d98d3c704bb3410bce9b69e122a8de487ad3dbfe9985d154e5c03a40",
+                "sha256:e7a01e53163818d56eabddcafdc2090e9daba178aad05516b20c6591c4811020",
+                "sha256:ee677635393414930541a096fc8e61634304bb0153e4e02b75685b11eba14cae",
+                "sha256:f0521af1b722265d824d6ad055acfe9bd3341765735c44b5a4d0069e189a0f40",
+                "sha256:f25c281f12c0da726c6ed00535ca5d1622ec755c30a3f8eafef26cf43fede694"
+            ],
+            "index": "pypi",
+            "version": "==1.1.0"
+        },
+        "shapely": {
+            "hashes": [
+                "sha256:045e991636787c22bf3e18b57cdaa200681acc0e5db0720123643909d99ad32b",
+                "sha256:2e8398aacf67cfdfcd64154738c809fea52008afefb4704103f43face369230d",
+                "sha256:56b8184ef9cf2e2e1dd09ccfe341028af08ea57254524c9458e7f115655385af",
+                "sha256:7268fd767dc88ef083a528a1e8977a358c7a56cb349aae9e4c36913cfba30857",
+                "sha256:7e06705e0a20e10f0ce35b233b32b57f6b77044e58e2ad4023d6e64f6c3719a7",
+                "sha256:937502b7f7bfea39910e30617a30d74ce1b6585895b3d8a2a4602c223a0dd73c",
+                "sha256:99dc867fe6519c1af1840cceea8bcf5dd1ece077207bdcb19072cdb4fbda8584",
+                "sha256:9e45485c49fd9ee81a81be756e648a0c1c125e770e3ed42845350d75a46723ad",
+                "sha256:e3c3eb85f7d4308ccbfcdd23513bfe201b193673c98400219b9a480b903b3033",
+                "sha256:eb4f295b1ff558857d8061ff7716b1e10ec3c24b5b784bccb51dc87e6fd3ad07",
+                "sha256:f87c677c0b176827167d1ebad37bba36a9e6baf61f608ff8ef4b9d9ff002c3c3",
+                "sha256:ffe14cf22da9c95aa87a287ddb96202e3cbb4ec1ec862050d9e4b114307fa206"
+            ],
+            "index": "pypi",
+            "version": "==1.7a1"
+        },
+        "six": {
+            "hashes": [
+                "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
+                "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
+            ],
+            "version": "==1.11.0"
+        },
+        "toml": {
+            "hashes": [
+                "sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c",
+                "sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e"
+            ],
+            "version": "==0.10.0"
+        }
+    },
+    "develop": {}
+}
--- a/README.md
+++ b/README.md
@ -1,56 +1,61 @@
 # 2016 Narrabeen Storm EWS Performance
-This repository investigates whether the storm impacts (i.e. Sallenger, 2000) of the June 2016 Narrabeen Storm could 
-have been forecasted in advance. 
+This repository investigates whether the storm impacts (i.e. Sallenger, 2000) of the June 2016 Narrabeen Storm could
+have been forecasted in advance.

 ## Repository and analysis format
-This repository follows the [Cookiecutter Data Science](https://drivendata.github.io/cookiecutter-data-science/) 
-structure where possible. The analysis is done in python (look at the `/src/` folder) with some interactive, 
-exploratory notebooks located at `/notebooks`. 
+This repository follows the [Cookiecutter Data Science](https://drivendata.github.io/cookiecutter-data-science/)
+structure where possible. The analysis is done in python (look at the `/src/` folder) with some interactive,
+exploratory notebooks located at `/notebooks`.

 Development is conducted using a [gitflow](https://www.atlassian
-.com/git/tutorials/comparing-workflows/gitflow-workflow) approach - mainly the `master` branch stores the official 
-release history and the `develop` branch serves as an integration branch for features. Other `hotfix` and `feature` 
+.com/git/tutorials/comparing-workflows/gitflow-workflow) approach - mainly the `master` branch stores the official
+release history and the `develop` branch serves as an integration branch for features. Other `hotfix` and `feature`
 branches should be created and merged as necessary.

 ## Where to start?
+Check .env
+Uses pipenv
 1. Clone this repository.
 2. Pull data from WRL coastal J drive with `make pull-data`
-3. Check out jupyter notebook `./notebooks/01_exploration.ipynb` which has an example of how to import the data and 
-some interactive widgets. 
+3. Check out jupyter notebook `./notebooks/01_exploration.ipynb` which has an example of how to import the data and
+some interactive widgets.

 ## Requirements
 The following requirements are needed to run various bits:
- [Python 3.6+](https://conda.io/docs/user-guide/install/windows.html): Used for processing and analysing data. 
+- [Python 3.6+](https://conda.io/docs/user-guide/install/windows.html): Used for processing and analysing data.
 Jupyter notebooks are used for exploratory analyis and communication.
- [QGIS](https://www.qgis.org/en/site/forusers/download): Used for looking at raw LIDAR pre/post storm surveys and 
+- [QGIS](https://www.qgis.org/en/site/forusers/download): Used for looking at raw LIDAR pre/post storm surveys and
 extracting dune crests/toes
- [rclone](https://rclone.org/downloads/): Data is not tracked by this repository, but is backed up to a remote 
-Chris Leaman working directory located on the WRL coastal drive. Rclone is used to sync local and remote copies. 
+- [rclone](https://rclone.org/downloads/): Data is not tracked by this repository, but is backed up to a remote
+Chris Leaman working directory located on the WRL coastal drive. Rclone is used to sync local and remote copies.
 Ensure rclone.exe is located on your `PATH` environment.
 - [gnuMake](http://gnuwin32.sourceforge.net/packages/make.htm): A list of commands for processing data is provided in
 the `./Makefile`. Use gnuMake to launch these commands. Ensure make.exe is located on your `PATH` environment.

 ## Available data
-Raw, interim and processed data used in this analysis is kept in the `/data/` folder. Data is not tracked in the 
-repository due to size constraints, but stored locally. A mirror is kept of the coastal folder J drive which you can 
+Raw, interim and processed data used in this analysis is kept in the `/data/` folder. Data is not tracked in the
+repository due to size constraints, but stored locally. A mirror is kept of the coastal folder J drive which you can
 use to push/pull to, using rclone. In order to get the data, run `make pull-data`.

 List of data:
- `/data/raw/processed_shorelines`: This data was recieved from Tom Beuzen in October 2018. It consists of pre/post 
-storm profiles at every 100 m sections along beaches ranging from Dee Why to Nambucca . Profiles are based on raw 
-aerial LIDAR and were processed by Mitch Harley. Tides and waves (10 m contour and reverse shoaled deepwater) for 
+- `/data/raw/processed_shorelines`: This data was recieved from Tom Beuzen in October 2018. It consists of pre/post
+storm profiles at every 100 m sections along beaches ranging from Dee Why to Nambucca . Profiles are based on raw
+aerial LIDAR and were processed by Mitch Harley. Tides and waves (10 m contour and reverse shoaled deepwater) for
 each individual 100 m section is also provided.
- `/data/raw/raw_lidar`: This is the raw pre/post storm aerial LIDAR which was taken for the June 2016 storm. `.las` 
-files are the raw files which have been processed into `.tiff` files using `PDAL`. Note that these files have not 
-been corrected for systematic errors, so actual elevations should be taken from the `processed_shorelines` folder. 
+- `/data/raw/raw_lidar`: This is the raw pre/post storm aerial LIDAR which was taken for the June 2016 storm. `.las`
+files are the raw files which have been processed into `.tiff` files using `PDAL`. Note that these files have not
+been corrected for systematic errors, so actual elevations should be taken from the `processed_shorelines` folder.
 Obtained November 2018 from Mitch Harley from the black external HDD labeled "UNSW LIDAR".
- `/data/raw/profile_features`: Dune toe and crest locations based on prestorm LIDAR. Refer to `/notebooks/qgis.qgz` 
+- `/data/raw/profile_features`: Dune toe and crest locations based on prestorm LIDAR. Refer to `/notebooks/qgis.qgz`
 as this shows how they were manually extracted. Note that the shapefiles only show the location (lat/lon) of the dune
 crest and toe. For actual elevations, these locations need to related to the processed shorelines.

 ## Notebooks
 - `/notebooks/01_exploration.ipynb`: Shows how to import processed shorelines, waves and tides. An interactive widget
 plots the location and cross sections.
- `/notebooks/qgis.qgz`: A QGIS file which is used to explore the aerial LIDAR data in `/data/raw/raw_lidar`. By 
-examining the pre-strom lidar, dune crest and dune toe lines are manually extracted. These are stored in the 
-`/data/profile_features/`. 
+- `/notebooks/qgis.qgz`: A QGIS file which is used to explore the aerial LIDAR data in `/data/raw/raw_lidar`. By
+examining the pre-strom lidar, dune crest and dune toe lines are manually extracted. These are stored in the
+`/data/profile_features/`.
+
+## TODO
+https://ljvmiranda921.github.io/notebook/2018/06/21/precommits-using-black-and-flake8/
--- a/src/analysis/analysis.py
+++ b/src/analysis/analysis.py
@ -1,13 +1,15 @@
 import pandas as pd
 import os

+
 def main():

-    data_folder = './data/interim'
-    df_waves = pd.read_csv(os.path.join(data_folder, 'waves.csv'), index_col=[0,1])
-    df_tides = pd.read_csv(os.path.join(data_folder, 'tides.csv'), index_col=[0,1])
-    df_profiles = pd.read_csv(os.path.join(data_folder, 'profiles.csv'), index_col=[0,1,2])
-    df_sites = pd.read_csv(os.path.join(data_folder, 'sites.csv'),index_col=[0])
+    data_folder = "./data/interim"
+    df_waves = pd.read_csv(os.path.join(data_folder, "waves.csv"), index_col=[0, 1])
+    df_tides = pd.read_csv(os.path.join(data_folder, "tides.csv"), index_col=[0, 1])
+    df_profiles = pd.read_csv(os.path.join(data_folder, "profiles.csv"), index_col=[0, 1, 2])
+    df_sites = pd.read_csv(os.path.join(data_folder, "sites.csv"), index_col=[0])
+

-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/src/analysis/compare_impacts.py
+++ b/src/analysis/compare_impacts.py
@ -7,7 +7,7 @@ import os

 import pandas as pd

-logging.config.fileConfig('./src/logging.conf', disable_existing_loggers=False)
+logging.config.fileConfig("./src/logging.conf", disable_existing_loggers=False)
 logger = logging.getLogger(__name__)


@ -18,15 +18,16 @@ def compare_impacts(df_forecasted, df_observed):
    :param df_observed:
    :return:
    """
-    df_compared = df_forecasted.merge(df_observed, left_index=True, right_index=True,
-                                      suffixes=['_forecasted', '_observed'])
+    df_compared = df_forecasted.merge(
+        df_observed, left_index=True, right_index=True, suffixes=["_forecasted", "_observed"]
+    )
    return df_compared


-if __name__ == '__main__':
-    logger.info('Importing existing data')
-    data_folder = './data/interim'
-    df_forecasted = pd.read_csv(os.path.join(data_folder, 'impacts_forecasted_mean_slope_sto06.csv'), index_col=[0])
-    df_observed = pd.read_csv(os.path.join(data_folder, 'impacts_observed.csv'), index_col=[0])
+if __name__ == "__main__":
+    logger.info("Importing existing data")
+    data_folder = "./data/interim"
+    df_forecasted = pd.read_csv(os.path.join(data_folder, "impacts_forecasted_mean_slope_sto06.csv"), index_col=[0])
+    df_observed = pd.read_csv(os.path.join(data_folder, "impacts_observed.csv"), index_col=[0])
    df_compared = compare_impacts(df_forecasted, df_observed)
-    df_compared.to_csv(os.path.join(data_folder, 'impacts_observed_vs_forecasted_mean_slope_sto06.csv'))
+    df_compared.to_csv(os.path.join(data_folder, "impacts_observed_vs_forecasted_mean_slope_sto06.csv"))
--- a/src/analysis/forecast_twl.py
+++ b/src/analysis/forecast_twl.py
@ -1,62 +1,74 @@
 import logging.config
 import os
 from multiprocessing import Pool
-
+import click
 import numpy as np
 import numpy.ma as ma
 import pandas as pd
 from scipy import stats

-from src.analysis.runup_models import sto06_individual, sto06

-logging.config.fileConfig('./src/logging.conf', disable_existing_loggers=False)
+from src.analysis import runup_models
+
+logging.config.fileConfig("./src/logging.conf", disable_existing_loggers=False)
 logger = logging.getLogger(__name__)

+MULTIPROCESS_THREADS = int(os.environ.get("MULTIPROCESS_THREADS", 4))

-def forecast_twl(df_tides, df_profiles, df_waves, df_profile_features, runup_function, n_processes=4,
-                 slope='foreshore'):
+
+def forecast_twl(
+    df_tides,
+    df_profiles,
+    df_waves,
+    df_profile_features,
+    runup_function,
+    n_processes=MULTIPROCESS_THREADS,
+    slope="foreshore",
+):
    # Use df_waves as a base
    df_twl = df_waves.copy()

    # Merge tides
-    logger.info('Merging tides')
+    logger.info("Merging tides")
    df_twl = df_twl.merge(df_tides, left_index=True, right_index=True)

    # Estimate foreshore slope. Do the analysis per site_id. This is so we only have to query the x and z
    # cross-section profiles once per site.
-    logger.info('Calculating beach slopes')
-    site_ids = df_twl.index.get_level_values('site_id').unique()
+    logger.info("Calculating beach slopes")
+    site_ids = df_twl.index.get_level_values("site_id").unique()
    # site_ids = [x for x in site_ids if 'NARRA' in x]  # todo remove this - for testing narrabeen only

-    if slope == 'foreshore':
+    if slope == "foreshore":
        # Process each site_id with a different process and combine results at the end
        with Pool(processes=n_processes) as pool:
-            results = pool.starmap(foreshore_slope_for_site_id,
-                                   [(site_id, df_twl, df_profiles) for site_id in site_ids])
-        df_twl['beta'] = pd.concat(results)
+            results = pool.starmap(
+                foreshore_slope_for_site_id, [(site_id, df_twl, df_profiles) for site_id in site_ids]
+            )
+        df_twl["beta"] = pd.concat(results)

-    elif slope == 'mean':
+    elif slope == "mean":
        # todo mean beach profile
-        df_temp = df_twl.join(df_profile_features, how='inner')
-        df_temp['mhw'] = 0.5
+        df_temp = df_twl.join(df_profile_features, how="inner")
+        df_temp["mhw"] = 0.5
        with Pool(processes=n_processes) as pool:
-            results = pool.starmap(mean_slope_for_site_id,
-                                   [(site_id, df_temp, df_profiles, 'dune_toe_z', 'mhw') for site_id in site_ids])
-        df_twl['beta'] = pd.concat(results)
+            results = pool.starmap(
+                mean_slope_for_site_id, [(site_id, df_temp, df_profiles, "dune_toe_z", "mhw") for site_id in site_ids]
+            )
+        df_twl["beta"] = pd.concat(results)

    # Estimate runup
-    R2, setup, S_total, S_inc, S_ig = runup_function(df_twl, Hs0_col='Hs0', Tp_col='Tp', beta_col='beta')
+    R2, setup, S_total, S_inc, S_ig = runup_function(df_twl, Hs0_col="Hs0", Tp_col="Tp", beta_col="beta")

-    df_twl['R2'] = R2
-    df_twl['setup'] = setup
-    df_twl['S_total'] = S_total
+    df_twl["R2"] = R2
+    df_twl["setup"] = setup
+    df_twl["S_total"] = S_total

    # Estimate TWL
-    df_twl['R_high'] = df_twl['tide'] + df_twl['R2']
-    df_twl['R_low'] = df_twl['tide'] + 1.1 * df_twl['setup'] - 1.1 / 2 * df_twl['S_total']
+    df_twl["R_high"] = df_twl["tide"] + df_twl["R2"]
+    df_twl["R_low"] = df_twl["tide"] + 1.1 * df_twl["setup"] - 1.1 / 2 * df_twl["S_total"]

    # Drop unneeded columns
-    df_twl.drop(columns=['E', 'Exs', 'P', 'Pxs', 'dir'], inplace=True, errors='ignore')
+    df_twl.drop(columns=["E", "Exs", "P", "Pxs", "dir"], inplace=True, errors="ignore")

    return df_twl

@ -74,15 +86,21 @@ def mean_slope_for_site_id(site_id, df_twl, df_profiles, top_elevation_col, btm_

    # Get the prestorm beach profile
    profile = df_profiles.query("site_id =='{}' and profile_type == 'prestorm'".format(site_id))
-    profile_x = profile.index.get_level_values('x').tolist()
+    profile_x = profile.index.get_level_values("x").tolist()
    profile_z = profile.z.tolist()

    df_twl_site = df_twl.query("site_id == '{}'".format(site_id))

-    df_beta = df_twl_site.apply(lambda row: slope_from_profile(profile_x=profile_x, profile_z=profile_z,
-                                                               top_elevation=row[top_elevation_col],
-                                                               btm_elevation=row[btm_elevation_col],
-                                                               method='end_points'), axis=1)
+    df_beta = df_twl_site.apply(
+        lambda row: slope_from_profile(
+            profile_x=profile_x,
+            profile_z=profile_z,
+            top_elevation=row[top_elevation_col],
+            btm_elevation=row[btm_elevation_col],
+            method="end_points",
+        ),
+        axis=1,
+    )
    return df_beta


@ -99,16 +117,22 @@ def foreshore_slope_for_site_id(site_id, df_twl, df_profiles):

    # Get the prestorm beach profile
    profile = df_profiles.query("site_id =='{}' and profile_type == 'prestorm'".format(site_id))
-    profile_x = profile.index.get_level_values('x').tolist()
+    profile_x = profile.index.get_level_values("x").tolist()
    profile_z = profile.z.tolist()

    df_twl_site = df_twl.query("site_id == '{}'".format(site_id))

-    df_beta = df_twl_site.apply(lambda row: foreshore_slope_from_profile(profile_x=profile_x, profile_z=profile_z,
-                                                                         tide=row.tide,
-                                                                         runup_function=sto06_individual,
-                                                                         Hs0=row.Hs0,
-                                                                         Tp=row.Tp), axis=1)
+    df_beta = df_twl_site.apply(
+        lambda row: foreshore_slope_from_profile(
+            profile_x=profile_x,
+            profile_z=profile_z,
+            tide=row.tide,
+            runup_function=runup_models.sto06_individual,
+            Hs0=row.Hs0,
+            Tp=row.Tp,
+        ),
+        axis=1,
+    )
    return df_beta


@ -137,9 +161,13 @@ def foreshore_slope_from_profile(profile_x, profile_z, tide, runup_function, **k

    while True:
        R2, setup, S_total, _, _ = runup_function(beta=beta, **kwargs)
-        beta_new = slope_from_profile(profile_x=profile_x, profile_z=profile_z, method='end_points',
-                                      top_elevation=tide + setup + S_total / 2,
-                                      btm_elevation=tide + setup - S_total / 2)
+        beta_new = slope_from_profile(
+            profile_x=profile_x,
+            profile_z=profile_z,
+            method="end_points",
+            top_elevation=tide + setup + S_total / 2,
+            btm_elevation=tide + setup - S_total / 2,
+        )

        # Return None if we can't find a slope, usually because the elevations we've specified are above/below our
        # profile x and z coordinates.
@ -158,7 +186,7 @@ def foreshore_slope_from_profile(profile_x, profile_z, tide, runup_function, **k
        iteration_count += 1


-def slope_from_profile(profile_x, profile_z, top_elevation, btm_elevation, method='end_points'):
+def slope_from_profile(profile_x, profile_z, top_elevation, btm_elevation, method="end_points"):
    """
    Returns a slope (beta) from a bed profile, given the top and bottom elevations of where the slope should be taken.
    :param x: List of x bed profile coordinates
@ -173,16 +201,10 @@ def slope_from_profile(profile_x, profile_z, top_elevation, btm_elevation, metho
    if any([x is None for x in [profile_x, profile_z, top_elevation, btm_elevation]]):
        return None

-    end_points = {
-        'top': {
-            'z': top_elevation,
-        },
-        'btm': {
-            'z': btm_elevation,
-        }}
+    end_points = {"top": {"z": top_elevation}, "btm": {"z": btm_elevation}}

    for end_type in end_points.keys():
-        elevation = end_points[end_type]['z']
+        elevation = end_points[end_type]["z"]
        intersection_x = crossings(profile_x, profile_z, elevation)

        # No intersections found
@ -191,26 +213,26 @@ def slope_from_profile(profile_x, profile_z, top_elevation, btm_elevation, metho

        # One intersection
        elif len(intersection_x) == 1:
-            end_points[end_type]['x'] = intersection_x[0]
+            end_points[end_type]["x"] = intersection_x[0]

        # More than on intersection
        else:
-            if end_type == 'top':
+            if end_type == "top":
                # For top elevation, take most seaward intersection
-                end_points[end_type]['x'] = intersection_x[-1]
+                end_points[end_type]["x"] = intersection_x[-1]
            else:
                # For bottom elevation, take most landward intersection that is seaward of top elevation
-                end_points[end_type]['x'] = [x for x in intersection_x if x > end_points['top']['x']][0]
+                end_points[end_type]["x"] = [x for x in intersection_x if x > end_points["top"]["x"]][0]

-    if method == 'end_points':
-        x_top = end_points['top']['x']
-        x_btm = end_points['btm']['x']
-        z_top = end_points['top']['z']
-        z_btm = end_points['btm']['z']
+    if method == "end_points":
+        x_top = end_points["top"]["x"]
+        x_btm = end_points["btm"]["x"]
+        z_top = end_points["top"]["z"]
+        z_btm = end_points["btm"]["z"]
        return -(z_top - z_btm) / (x_top - x_btm)

-    elif method == 'least_squares':
-        profile_mask = [True if end_points['top']['x'] < pts < end_points['btm']['x'] else False for pts in x]
+    elif method == "least_squares":
+        profile_mask = [True if end_points["top"]["x"] < pts < end_points["btm"]["x"] else False for pts in x]
        slope_x = np.array(profile_x)[profile_mask].tolist()
        slope_z = np.array(profile_z)[profile_mask].tolist()
        slope, _, _, _, _ = stats.linregress(slope_x, slope_z)
@ -245,23 +267,42 @@ def crossings(profile_x, profile_z, constant_z):
    return [profile_x[i] - (profile_x[i] - profile_x[i + 1]) / (z[i] - z[i + 1]) * (z[i]) for i in indicies]


-if __name__ == '__main__':
-    logger.info('Importing data')
-    data_folder = './data/interim'
-    df_waves = pd.read_csv(os.path.join(data_folder, 'waves.csv'), index_col=[0, 1])
-    df_tides = pd.read_csv(os.path.join(data_folder, 'tides.csv'), index_col=[0, 1])
-    df_profiles = pd.read_csv(os.path.join(data_folder, 'profiles.csv'), index_col=[0, 1, 2])
-    df_sites = pd.read_csv(os.path.join(data_folder, 'sites.csv'), index_col=[0])
-    df_profile_features = pd.read_csv(os.path.join(data_folder, 'profile_features.csv'), index_col=[0])
-
-    logger.info('Forecasting TWL')
-
-    df_twl_foreshore_slope_sto06 = forecast_twl(df_tides, df_profiles, df_waves, df_profile_features,
-                                                runup_function=sto06, slope='foreshore')
-    df_twl_foreshore_slope_sto06.to_csv(os.path.join(data_folder, 'twl_foreshore_slope_sto06.csv'))
-
-    df_twl_mean_slope_sto06 = forecast_twl(df_tides, df_profiles, df_waves, df_profile_features,
-                                           runup_function=sto06, slope='mean')
-    df_twl_mean_slope_sto06.to_csv(os.path.join(data_folder, 'twl_mean_slope_sto06.csv'))
-
-    logger.info('Done')
+@click.command()
+@click.option("--waves-csv", required=True, help="")
+@click.option("--tides-csv", required=True, help="")
+@click.option("--profiles-csv", required=True, help="")
+@click.option("--profile-features-csv", required=True, help="")
+@click.option("--runup-function", required=True, help="", type=click.Choice(["sto06"]))
+@click.option("--slope", required=True, help="", type=click.Choice(["foreshore", "mean"]))
+@click.option("--output-file", required=True, help="")
+def create_twl_forecast(waves_csv, tides_csv, profiles_csv, profile_features_csv, runup_function, slope, output_file):
+    logger.info("Creating forecast of total water levels")
+    logger.info("Importing data")
+    df_waves = pd.read_csv(waves_csv, index_col=[0, 1])
+    df_tides = pd.read_csv(tides_csv, index_col=[0, 1])
+    df_profiles = pd.read_csv(profiles_csv, index_col=[0, 1, 2])
+    df_profile_features = pd.read_csv(profile_features_csv, index_col=[0])
+
+    logger.info("Forecasting TWL")
+    df_twl_foreshore_slope_sto06 = forecast_twl(
+        df_tides,
+        df_profiles,
+        df_waves,
+        df_profile_features,
+        runup_function=getattr(runup_models, runup_function),
+        slope=slope,
+    )
+
+    df_twl_foreshore_slope_sto06.to_csv(output_file)
+    logger.info("Saved to %s", output_file)
+    logger.info("Done!")
+
+
+@click.group()
+def cli():
+    pass
+
+
+if __name__ == "__main__":
+    cli.add_command(create_twl_forecast)
+    cli()
--- a/src/analysis/forecasted_storm_impacts.py
+++ b/src/analysis/forecasted_storm_impacts.py
@ -4,10 +4,10 @@ Estimates the forecasted storm impacts based on the forecasted water level and d

 import logging.config
 import os
-
+import click
 import pandas as pd

-logging.config.fileConfig('./src/logging.conf', disable_existing_loggers=False)
+logging.config.fileConfig("./src/logging.conf", disable_existing_loggers=False)
 logger = logging.getLogger(__name__)


@ -19,20 +19,19 @@ def forecasted_impacts(df_profile_features, df_forecasted_twl):
    :param df_forecasted_twl:
    :return:
    """
-    logger.info('Getting forecasted storm regimes')
+    logger.info("Getting forecasted storm impacts")

    df_forecasted_impacts = pd.DataFrame(index=df_profile_features.index)

    # For each site, find the maximum R_high value and the corresponding R_low value.
-    idx = df_forecasted_twl.groupby(level=['site_id'])['R_high'].idxmax().dropna()
-    df_r_vals = df_forecasted_twl.loc[idx, ['R_high', 'R_low']].reset_index(['datetime'])
-    df_forecasted_impacts = df_forecasted_impacts.merge(df_r_vals, how='left', left_index=True, right_index=True)
+    idx = df_forecasted_twl.groupby(level=["site_id"])["R_high"].idxmax().dropna()
+    df_r_vals = df_forecasted_twl.loc[idx, ["R_high", "R_low"]].reset_index(["datetime"])
+    df_forecasted_impacts = df_forecasted_impacts.merge(df_r_vals, how="left", left_index=True, right_index=True)

    # Join with df_profile features to find dune toe and crest elevations
-    df_forecasted_impacts = df_forecasted_impacts.merge(df_profile_features[['dune_toe_z', 'dune_crest_z']],
-                                                        how='left',
-                                                        left_index=True,
-                                                        right_index=True)
+    df_forecasted_impacts = df_forecasted_impacts.merge(
+        df_profile_features[["dune_toe_z", "dune_crest_z"]], how="left", left_index=True, right_index=True
+    )

    # Compare R_high and R_low wirth dune crest and toe elevations
    df_forecasted_impacts = storm_regime(df_forecasted_impacts)
@ -47,27 +46,49 @@ def storm_regime(df_forecasted_impacts):
    :param df_forecasted_impacts:
    :return:
    """
-    logger.info('Getting forecasted storm regimes')
+    logger.info("Getting forecasted storm regimes")
+    df_forecasted_impacts.loc[
+        df_forecasted_impacts.R_high <= df_forecasted_impacts.dune_toe_z, "storm_regime"
+    ] = "swash"
    df_forecasted_impacts.loc[
-        df_forecasted_impacts.R_high <= df_forecasted_impacts.dune_toe_z, 'storm_regime'] = 'swash'
+        df_forecasted_impacts.dune_toe_z <= df_forecasted_impacts.R_high, "storm_regime"
+    ] = "collision"
    df_forecasted_impacts.loc[
-        df_forecasted_impacts.dune_toe_z <= df_forecasted_impacts.R_high, 'storm_regime'] = 'collision'
-    df_forecasted_impacts.loc[(df_forecasted_impacts.dune_crest_z <= df_forecasted_impacts.R_high) &
-                              (df_forecasted_impacts.R_low <= df_forecasted_impacts.dune_crest_z),
-                              'storm_regime'] = 'overwash'
-    df_forecasted_impacts.loc[(df_forecasted_impacts.dune_crest_z <= df_forecasted_impacts.R_low) &
-                              (df_forecasted_impacts.dune_crest_z <= df_forecasted_impacts.R_high),
-                              'storm_regime'] = 'inundation'
+        (df_forecasted_impacts.dune_crest_z <= df_forecasted_impacts.R_high)
+        & (df_forecasted_impacts.R_low <= df_forecasted_impacts.dune_crest_z),
+        "storm_regime",
+    ] = "overwash"
+    df_forecasted_impacts.loc[
+        (df_forecasted_impacts.dune_crest_z <= df_forecasted_impacts.R_low)
+        & (df_forecasted_impacts.dune_crest_z <= df_forecasted_impacts.R_high),
+        "storm_regime",
+    ] = "inundation"

    return df_forecasted_impacts


-if __name__ == '__main__':
-    logger.info('Importing existing data')
-    data_folder = './data/interim'
-    df_profiles = pd.read_csv(os.path.join(data_folder, 'profiles.csv'), index_col=[0, 1, 2])
-    df_profile_features = pd.read_csv(os.path.join(data_folder, 'profile_features.csv'), index_col=[0])
-    df_forecasted_twl = pd.read_csv(os.path.join(data_folder, 'twl_mean_slope_sto06.csv'), index_col=[0, 1])
+@click.command()
+@click.option("--profile-features-csv", required=True, help="")
+@click.option("--forecasted-twl-csv", required=True, help="")
+@click.option("--output-file", required=True, help="")
+def create_forecasted_impacts(profile_features_csv, forecasted_twl_csv, output_file):
+
+    logger.info("Creating observed wave impacts")
+    logger.info("Importing existing data")
+    df_profile_features = pd.read_csv(profile_features_csv, index_col=[0])
+    df_forecasted_twl = pd.read_csv(forecasted_twl_csv, index_col=[0, 1])

    df_forecasted_impacts = forecasted_impacts(df_profile_features, df_forecasted_twl)
-    df_forecasted_impacts.to_csv(os.path.join(data_folder, 'impacts_forecasted_mean_slope_sto06.csv'))
+    df_forecasted_impacts.to_csv(output_file)
+    logger.info("Saved to %s", output_file)
+    logger.info("Done!")
+
+
+@click.group()
+def cli():
+    pass
+
+
+if __name__ == "__main__":
+    cli.add_command(create_forecasted_impacts)
+    cli()
--- a/src/analysis/observed_storm_impacts.py
+++ b/src/analysis/observed_storm_impacts.py
@ -1,11 +1,11 @@
 import logging.config
 import os
-
+import click
 import numpy as np
 import pandas as pd
 from scipy.integrate import simps

-logging.config.fileConfig('./src/logging.conf', disable_existing_loggers=False)
+logging.config.fileConfig("./src/logging.conf", disable_existing_loggers=False)
 logger = logging.getLogger(__name__)


@ -29,14 +29,14 @@ def volume_change(df_profiles, df_profile_features, zone):
    :param zone: Either 'swash' or 'dune_face'
    :return:
    """
-    logger.info('Calculating change in beach volume in {} zone'.format(zone))
+    logger.info("Calculating change in beach volume in {} zone".format(zone))

    df_vol_changes = pd.DataFrame(index=df_profile_features.index)
    df_profiles = df_profiles.sort_index()
-    sites = df_profiles.groupby(level=['site_id'])
+    sites = df_profiles.groupby(level=["site_id"])

    for site_id, df_site in sites:
-        logger.debug('Calculating change in beach volume at {} in {} zone'.format(site_id, zone))
+        logger.debug("Calculating change in beach volume at {} in {} zone".format(site_id, zone))
        prestorm_dune_toe_x = df_profile_features.loc[df_profile_features.index == site_id].dune_toe_x.tolist()
        prestorm_dune_crest_x = df_profile_features.loc[df_profile_features.index == site_id].dune_crest_x.tolist()

@ -50,36 +50,44 @@ def volume_change(df_profiles, df_profile_features, zone):

        # Find last x coordinate where we have both prestorm and poststorm measurements. If we don't do this,
        # the prestorm and poststorm values are going to be calculated over different lengths.
-        df_zone = df_site.dropna(subset=['z'])
-        x_last_obs = min([max(df_zone.query("profile_type == '{}'".format(profile_type)).index.get_level_values('x'))
-                          for profile_type in ['prestorm', 'poststorm']])
+        df_zone = df_site.dropna(subset=["z"])
+        x_last_obs = min(
+            [
+                max(df_zone.query("profile_type == '{}'".format(profile_type)).index.get_level_values("x"))
+                for profile_type in ["prestorm", "poststorm"]
+            ]
+        )

        # Where we want to measure pre and post storm volume is dependant on the zone selected
-        if zone == 'swash':
+        if zone == "swash":
            x_min = prestorm_dune_toe_x
            x_max = x_last_obs
-        elif zone == 'dune_face':
+        elif zone == "dune_face":
            x_min = prestorm_dune_crest_x
            x_max = prestorm_dune_toe_x
        else:
-            logger.warning('Zone argument not properly specified. Please check')
+            logger.warning("Zone argument not properly specified. Please check")
            x_min = None
            x_max = None

        # Now, compute the volume of sand between the x-coordinates prestorm_dune_toe_x and x_swash_last for both prestorm
        # and post storm profiles.
-        prestorm_vol = beach_volume(x=df_zone.query("profile_type=='prestorm'").index.get_level_values('x'),
-                                    z=df_zone.query("profile_type=='prestorm'").z,
-                                    x_min=x_min,
-                                    x_max=x_max)
-        poststorm_vol = beach_volume(x=df_zone.query("profile_type=='poststorm'").index.get_level_values('x'),
-                                     z=df_zone.query("profile_type=='poststorm'").z,
-                                     x_min=x_min,
-                                     x_max=x_max)
-
-        df_vol_changes.loc[site_id, 'prestorm_{}_vol'.format(zone)] = prestorm_vol
-        df_vol_changes.loc[site_id, 'poststorm_{}_vol'.format(zone)] = poststorm_vol
-        df_vol_changes.loc[site_id, '{}_vol_change'.format(zone)] = prestorm_vol - poststorm_vol
+        prestorm_vol = beach_volume(
+            x=df_zone.query("profile_type=='prestorm'").index.get_level_values("x"),
+            z=df_zone.query("profile_type=='prestorm'").z,
+            x_min=x_min,
+            x_max=x_max,
+        )
+        poststorm_vol = beach_volume(
+            x=df_zone.query("profile_type=='poststorm'").index.get_level_values("x"),
+            z=df_zone.query("profile_type=='poststorm'").z,
+            x_min=x_min,
+            x_max=x_max,
+        )
+
+        df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = prestorm_vol
+        df_vol_changes.loc[site_id, "poststorm_{}_vol".format(zone)] = poststorm_vol
+        df_vol_changes.loc[site_id, "{}_vol_change".format(zone)] = prestorm_vol - poststorm_vol

    return df_vol_changes

@ -110,28 +118,67 @@ def storm_regime(df_observed_impacts):
    :param df_observed_impacts:
    :return:
    """
-    logger.info('Getting observed storm regimes')
-    df_observed_impacts.loc[df_observed_impacts.swash_vol_change < 3, 'storm_regime'] = 'swash'
-    df_observed_impacts.loc[df_observed_impacts.dune_face_vol_change > 3, 'storm_regime'] = 'collision'
+    logger.info("Getting observed storm regimes")
+    df_observed_impacts.loc[df_observed_impacts.swash_vol_change < 3, "storm_regime"] = "swash"
+    df_observed_impacts.loc[df_observed_impacts.dune_face_vol_change > 3, "storm_regime"] = "collision"
    return df_observed_impacts


-if __name__ == '__main__':
-    logger.info('Importing existing data')
-    data_folder = './data/interim'
-    df_profiles = pd.read_csv(os.path.join(data_folder, 'profiles.csv'), index_col=[0, 1, 2])
-    df_profile_features = pd.read_csv(os.path.join(data_folder, 'profile_features.csv'), index_col=[0])
+if __name__ == "__main__":
+    logger.info("Importing existing data")
+    data_folder = "./data/interim"
+    df_profiles = pd.read_csv(os.path.join(data_folder, "profiles.csv"), index_col=[0, 1, 2])
+    df_profile_features = pd.read_csv(os.path.join(data_folder, "profile_features.csv"), index_col=[0])
+
+    logger.info("Creating new dataframe for observed impacts")
+    df_observed_impacts = pd.DataFrame(index=df_profile_features.index)
+
+    logger.info("Getting pre/post storm volumes")
+    df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone="swash")
+    df_dune_face_vol_changes = volume_change(df_profiles, df_profile_features, zone="dune_face")
+    df_observed_impacts = df_observed_impacts.join([df_swash_vol_changes, df_dune_face_vol_changes])
+
+    # Classify regime based on volume changes
+    df_observed_impacts = storm_regime(df_observed_impacts)
+
+    # Save dataframe to csv
+    df_observed_impacts.to_csv(os.path.join(data_folder, "impacts_observed.csv"))
+
+
+@click.command()
+@click.option("--profiles-csv", required=True, help="")
+@click.option("--profile-features-csv", required=True, help="")
+@click.option("--output-file", required=True, help="")
+def create_observed_impacts(profiles_csv, profile_features_csv, output_file):

-    logger.info('Creating new dataframe for observed impacts')
+    logger.info("Creating observed wave impacts")
+    logger.info("Importing data")
+    df_profiles = pd.read_csv(profiles_csv, index_col=[0, 1, 2])
+    df_profile_features = pd.read_csv(profile_features_csv, index_col=[0])
+
+    logger.info("Creating new dataframe for observed impacts")
    df_observed_impacts = pd.DataFrame(index=df_profile_features.index)

-    logger.info('Getting pre/post storm volumes')
-    df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone='swash')
-    df_dune_face_vol_changes = volume_change(df_profiles, df_profile_features, zone='dune_face')
+    logger.info("Getting pre/post storm volumes")
+    df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone="swash")
+    df_dune_face_vol_changes = volume_change(df_profiles, df_profile_features, zone="dune_face")
    df_observed_impacts = df_observed_impacts.join([df_swash_vol_changes, df_dune_face_vol_changes])

    # Classify regime based on volume changes
    df_observed_impacts = storm_regime(df_observed_impacts)

    # Save dataframe to csv
-    df_observed_impacts.to_csv(os.path.join(data_folder, 'impacts_observed.csv'))
+    df_observed_impacts.to_csv(output_file)
+
+    logger.info("Saved to %s", output_file)
+    logger.info("Done!")
+
+
+@click.group()
+def cli():
+    pass
+
+
+if __name__ == "__main__":
+    cli.add_command(create_observed_impacts)
+    cli()
--- a/src/analysis/runup_models.py
+++ b/src/analysis/runup_models.py
@ -1,6 +1,7 @@
 import numpy as np
 import pandas as pd

+
 def sto06_individual(Hs0, Tp, beta):

    Lp = 9.8 * Tp ** 2 / 2 / np.pi
@ -9,17 +10,18 @@ def sto06_individual(Hs0, Tp, beta):
    S_inc = 0.75 * beta * np.sqrt(Hs0 * Lp)

    # Dissipative conditions
-    if beta / (Hs0/Lp)**(0.5) <= 0.3:
+    if beta / (Hs0 / Lp) ** (0.5) <= 0.3:
        setup = 0.016 * (Hs0 * Lp) ** 0.5
        S_total = 0.046 * (Hs0 * Lp) ** 0.5
-        R2 = 0.043 *  (Hs0 * Lp) ** 0.5
+        R2 = 0.043 * (Hs0 * Lp) ** 0.5
    else:
        setup = 0.35 * beta * (Hs0 * Lp) ** 0.5
-        S_total = np.sqrt(S_inc ** 2 + S_ig **2)
+        S_total = np.sqrt(S_inc ** 2 + S_ig ** 2)
        R2 = 1.1 * (setup + S_total / 2)

    return R2, setup, S_total, S_inc, S_ig

+
 def sto06(df, Hs0_col, Tp_col, beta_col):
    """
    Vectorized version of Stockdon06 which can be used with dataframes
@ -30,22 +32,23 @@ def sto06(df, Hs0_col, Tp_col, beta_col):
    :return:
    """

-    Lp  = 9.8 * df[Tp_col] ** 2 / 2 / np.pi
+    Lp = 9.8 * df[Tp_col] ** 2 / 2 / np.pi

    # General equation
-    S_ig = pd.to_numeric(0.06 * np.sqrt(df[Hs0_col] * Lp), errors='coerce')
-    S_inc = pd.to_numeric(0.75 * df[beta_col] * np.sqrt(df[Hs0_col] * Lp), errors='coerce')
-    setup = pd.to_numeric(0.35 * df[beta_col] * np.sqrt(df[Hs0_col] * Lp), errors='coerce')
+    S_ig = pd.to_numeric(0.06 * np.sqrt(df[Hs0_col] * Lp), errors="coerce")
+    S_inc = pd.to_numeric(0.75 * df[beta_col] * np.sqrt(df[Hs0_col] * Lp), errors="coerce")
+    setup = pd.to_numeric(0.35 * df[beta_col] * np.sqrt(df[Hs0_col] * Lp), errors="coerce")
    S_total = np.sqrt(S_inc ** 2 + S_ig ** 2)
    R2 = 1.1 * (setup + S_total / 2)

    # Dissipative conditions
-    dissipative = df[beta_col] / (df[Hs0_col] / Lp)**(0.5) <= 0.3
-    setup.loc[dissipative,:] = 0.016 * (df[Hs0_col] * Lp) ** (0.5)  # eqn 16
-    S_total.loc[dissipative,:] = 0.046 * (df[Hs0_col] * Lp) ** (0.5)  # eqn 17
-    R2.loc[dissipative,:] = 0.043 * (df[Hs0_col] * Lp) ** (0.5)  # eqn 18
+    dissipative = df[beta_col] / (df[Hs0_col] / Lp) ** (0.5) <= 0.3
+    setup.loc[dissipative, :] = 0.016 * (df[Hs0_col] * Lp) ** (0.5)  # eqn 16
+    S_total.loc[dissipative, :] = 0.046 * (df[Hs0_col] * Lp) ** (0.5)  # eqn 17
+    R2.loc[dissipative, :] = 0.043 * (df[Hs0_col] * Lp) ** (0.5)  # eqn 18

    return R2, setup, S_total, S_inc, S_ig

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    pass
--- a/src/data/beach_orientations.m
+++ b/src/data/beach_orientations.m
@ -1,19 +1,39 @@
-% Calculate orientation the beach profile at each unique site and save to .mat file
+% Calculate orientation the beach profile at each unique site and save to .mat file. Orientation is
+% the number of degrees, anticlockwise from east, perpendicular to the shoreline (pointing towards
+% land).
+
+%% Setup
 % Needs the following coastal tools:
-% J:\Coastal\Tools\MALT Logspiral Transformation
-% J:\Coastal\Tools\Coordinate Transformations
+addpath('J:\Coastal\Tools\MALT Logspiral Transformation')
+addpath('J:\Coastal\Tools\Coordinate Transformations')

 clear 
 clc

+%% Options
+% Where is the profiles file located? This should contain a structure including the .lat and .lon
+% for each analysed cross section
+profilesFile = '..\..\data\raw\processed_shorelines\profiles.mat';
+
+% Where should we store the processed beach orientations?
+outputFile = '..\..\data\raw\processed_shorelines\orientations.mat';
+
+% How far in meters does the profile extend towards land and sea? Used to provide end points of the
+% cross section
+distance = 200;
+
+
+%% Script
+
 % Load profile data, this is where we want to calculate orientations.
 warning('off','all')
-data = load('C:\Users\z5189959\Desktop\nsw_2016_storm_impact\data\raw\processed_shorelines\profiles.mat');
+data = load(profilesFile);
 data = data.data;

+% Save results to variable
 output = [];

-for ii = 1:n
+for ii = 1:length(data)
  disp(num2str(ii))
  lat = data(ii).lat;
  lon = data(ii).lon;
@ -21,14 +41,41 @@ for ii = 1:n
  [x,y,utmzone] = deg2utm(lat,lon);

  if strcmp(beach, 'BOOM') == 1 || strcmp(beach, 'HARGn') == 1 || strcmp(beach, 'BILG') == 1 || strcmp(beach, 'HARGs') == 1 || strcmp(beach, 'DEEWHYn') == 1
-    % log spiral transformation file is out of date. Talk to Mitch
-    continue
-  end
+    % These are straight beaches, load the transformation file directly and read the rotation angle.
+    parameterDir = 'J:\Coastal\Tools\MALT Logspiral Transformation';
+    parameterFile = [parameterDir, filesep, beach, '.mat'];
+    parameterMat = load(parameterFile);
+    fields = fieldnames(parameterMat);
+    field = fields(1,1);
+    site = getfield(parameterMat, field{1});
+    rot_angle = site.rot_angle;  % Angle of the shoreline counter clockwise from east
+    
+    % Figure out end points in utm coordinates
+
+    x_land = x - distance * cos(deg2rad(rot_angle));
+    y_land = y + distance * sin(deg2rad(rot_angle));
+    x_sea = x + distance * cos(deg2rad(rot_angle));
+    y_sea = y - distance * sin(deg2rad(rot_angle));
+    
+    [lat_land,lon_land] = utm2deg(x_land,y_land,utmzone);
+    [lat_sea,lon_sea] = utm2deg(x_land,y_land,utmzone);
    
-  if strcmp(beach, 'AVOCAs') == 1
-    % negative solution. Talk to Mitch
+    row.lat_center = lat;
+    row.lon_center = lon;
+    row.lat_land = lat_land;
+    row.lon_land = lat_land;
+    row.lat_sea = lat_sea;
+    row.lon_sea = lon_sea;
+    row.orientation = rot_angle + 90;  % Tangent to shoreline towards line 
+    row.beach = beach;
+    output = [output; row];
    continue
  end
+    
+%   if strcmp(beach, 'AVOCAs') == 1
+%     % negative solution. Talk to Mitch
+%     continue
+%   end
  
  % Get the sp log spiral transformed coordinates
  xyz.x = x;
@ -56,7 +103,12 @@ for ii = 1:n
  [lat_sea,lon_sea] = utm2deg(xyz_sea.x,xyz_sea.y,utmzone);

  % Orientation in degrees anticlockwise from east, pointing towards land
-  orientation = radtodeg(atan2((xyz_land.y - xyz_sea.y), (xyz_land.x - xyz_sea.x)));
+  try
+    orientation = radtodeg(atan2((xyz_land.y - xyz_sea.y), (xyz_land.x - xyz_sea.x)));
+  catch
+    disp(['Cannot calculate orientation: ' beach])
+    continue
+  end
  
  row.lat_center = lat;
  row.lon_center = lon;
@ -70,4 +122,4 @@ for ii = 1:n
 end
 warning('on','all')

-save('orientations.mat','output','-v7')
+save(outputFile','output','-v7')
--- a/src/data/csv_to_shp.py
+++ b/src/data/csv_to_shp.py
@ -7,11 +7,15 @@ import fiona
 import pandas as pd
 from fiona.crs import from_epsg
 from shapely.geometry import Point, mapping
+import logging.config
+
+logging.config.fileConfig("./src/logging.conf", disable_existing_loggers=False)
+logger = logging.getLogger(__name__)


@click.command()
-@click.argument('input_csv')
-@click.argument('output_shp')
+@click.option("--input-csv", required=True, help=".csv file to convert")
+@click.option("--output-shp", required=True, help="where to store .shp file")
 def sites_csv_to_shp(input_csv, output_shp):
    """
    Converts our dataframe of sites to .shp to load in QGis
@ -19,23 +23,16 @@ def sites_csv_to_shp(input_csv, output_shp):
    :param output_shp:
    :return:
    """
+    logger.info("Converting %s to %s", input_csv, output_shp)
    df_sites = pd.read_csv(input_csv, index_col=[0])

-    schema = {
-        'geometry': 'Point',
-        'properties': {
-            'beach': 'str',
-            'site_id': 'str'
-        }
-    }
-    with fiona.open(output_shp, 'w', crs=from_epsg(4326), driver='ESRI Shapefile', schema=schema) as output:
+    schema = {"geometry": "Point", "properties": {"beach": "str", "site_id": "str"}}
+    with fiona.open(output_shp, "w", crs=from_epsg(4326), driver="ESRI Shapefile", schema=schema) as output:
        for index, row in df_sites.iterrows():
-            point = Point(row['lon'], row['lat'])
-            prop = {
-                'beach': row['beach'],
-                'site_id': index,
-            }
-            output.write({'geometry': mapping(point), 'properties': prop})
+            point = Point(row["lon"], row["lat"])
+            prop = {"beach": row["beach"], "site_id": index}
+            output.write({"geometry": mapping(point), "properties": prop})
+    logger.info("Done!")


@click.group()
@ -43,6 +40,6 @@ def cli():
    pass


-if __name__ == '__main__':
+if __name__ == "__main__":
    cli.add_command(sites_csv_to_shp)
    cli()
--- a/src/data/mat_to_csv.py
+++ b/src/data/mat_to_csv.py
@ -1,263 +0,0 @@
-"""
-Converts raw .mat files into a flattened .csv structure which can be imported into python pandas.
-"""
-
-import logging.config
-from datetime import datetime, timedelta
-
-import pandas as pd
-from mat4py import loadmat
-import numpy as np
-
-logging.config.fileConfig('./src/logging.conf', disable_existing_loggers=False)
-logger = logging.getLogger(__name__)
-
-
-def parse_orientations(orientations_mat):
-    """
-    Parses the raw orientations.mat file and returns a pandas dataframe. Note that orientations are the direction
-    towards land measured in degrees anti-clockwise from east.
-    :param orientations_mat:
-    :return:
-    """
-    logger.info('Parsing %s', orientations_mat)
-    mat_data = loadmat(orientations_mat)['output']
-    rows = []
-    for i in range(0, len(mat_data['beach'])):
-        rows.append({
-            'beach': mat_data['beach'][i],
-            'orientation': mat_data['orientation'][i],
-            'lat_center': mat_data['lat_center'][i],
-            'lon_center': mat_data['lon_center'][i],
-            'lat_land': mat_data['lat_land'][i],
-            'lon_land': mat_data['lon_land'][i],
-            'lat_sea': mat_data['lat_sea'][i],
-            'lon_sea': mat_data['lon_sea'][i],
-        })
-
-    df = pd.DataFrame(rows)
-    return df
-
-def combine_sites_and_orientaions(df_sites, df_orientations):
-    """
-    Replaces beach/lat/lon columns with the unique site_id.
-    :param dfs:
-    :param df_sites:
-    :return:
-    """
-    df_merged_sites = df_sites.merge(df_orientations[['beach', 'lat_center', 'lon_center', 'orientation']],
-                                      left_on=['beach', 'lat', 'lon'],
-                                       right_on=['beach', 'lat_center', 'lon_center'])
-
-    # Check that all our records have a unique site identifier
-    n_unmatched = len(df_sites) - len(df_merged_sites)
-    if n_unmatched > 0:
-        logger.warning('Not all records (%d of %d) matched with an orientation', n_unmatched, len(df_sites))
-
-    # Drop extra columns
-    df_merged_sites = df_merged_sites.drop(columns = ['lat_center', 'lon_center'])
-
-    return df_merged_sites
-
-def specify_lat_lon_profile_center(df_sites, x_val=200):
-    """
-    Specify which x-coordinate in the beach profile cross section the lat/lon corresponds to
-    :param df_sites:
-    :return:
-    """
-    df_sites['profile_x_lat_lon'] = x_val
-    return df_sites
-
-def parse_waves(waves_mat):
-    """
-    Parses the raw waves.mat file and returns a pandas dataframe
-    :param waves_mat:
-    :return:
-    """
-    logger.info('Parsing %s', waves_mat)
-    mat_data = loadmat(waves_mat)['data']
-    rows = []
-    for i in range(0, len(mat_data['site'])):
-        for j in range(0, len(mat_data['dates'][i])):
-            rows.append({
-                'beach': mat_data['site'][i],
-                'lon': mat_data['lon'][i],
-                'lat': mat_data['lat'][i],
-                'datetime': matlab_datenum_to_datetime(mat_data['dates'][i][j][0]),
-                'Hs': mat_data['H'][i][j][0],
-                'Hs0': mat_data['Ho'][i][j][0],
-                'Tp': mat_data['T'][i][j][0],
-                'dir': mat_data['D'][i][j][0],
-                'E': mat_data['E'][i][j][0],
-                'P': mat_data['P'][i][j][0],
-                'Exs': mat_data['Exs'][i][j][0],
-                'Pxs': mat_data['Pxs'][i][j][0],
-            })
-
-    df = pd.DataFrame(rows)
-    df['datetime'] = df['datetime'].dt.round('1s')
-    return df
-
-
-def parse_tides(tides_mat):
-    """
-    Parses the raw tides.mat file and returns a pandas dataframe
-    :param tides_mat:
-    :return:
-    """
-    logger.info('Parsing %s', tides_mat)
-    mat_data = loadmat(tides_mat)['data']
-    rows = []
-    for i in range(0, len(mat_data['site'])):
-        for j in range(0, len(mat_data['time'])):
-            rows.append({
-                'beach': mat_data['site'][i][0],
-                'lon': mat_data['lons'][i][0],
-                'lat': mat_data['lats'][i][0],
-                'datetime': matlab_datenum_to_datetime(mat_data['time'][j][0]),
-                'tide': mat_data['tide'][i][j]
-            })
-
-    df = pd.DataFrame(rows)
-    df['datetime'] = df['datetime'].dt.round('1s')
-    return df
-
-
-def parse_profiles(profiles_mat):
-    """
-    Parses the raw profiles.mat file and returns a pandas dataframe
-    :param tides_mat:
-    :return:
-    """
-    logger.info('Parsing %s', profiles_mat)
-    mat_data = loadmat(profiles_mat)['data']
-    rows = []
-    for i in range(0, len(mat_data['site'])):
-        for j in range(0, len(mat_data['pfx'][i])):
-            for profile_type in ['prestorm', 'poststorm']:
-
-                if profile_type == 'prestorm':
-                    z = mat_data['pf1'][i][j][0]
-                if profile_type == 'poststorm':
-                    z = mat_data['pf2'][i][j][0]
-
-                rows.append({
-                    'beach': mat_data['site'][i],
-                    'lon': mat_data['lon'][i],
-                    'lat': mat_data['lat'][i],
-                    'profile_type': profile_type,
-                    'x': mat_data['pfx'][i][j][0],
-                    'z': z,
-                })
-
-    df = pd.DataFrame(rows)
-    return df
-
-def remove_zeros(df_profiles):
-    """
-    When parsing the pre/post storm profiles, the end of some profiles have constant values of zero. Let's change
-    these to NaNs for consistancy. Didn't use pandas fillnan because 0 may still be a valid value.
-    :param df:
-    :return:
-    """
-
-    df_profiles = df_profiles.sort_index()
-    groups = df_profiles.groupby(level=['site_id','profile_type'])
-    for key, _ in groups:
-        logger.debug('Removing zeros from {} profile at {}'.format(key[1], key[0]))
-        idx_site = (df_profiles.index.get_level_values('site_id') == key[0]) & \
-                   (df_profiles.index.get_level_values('profile_type') == key[1])
-        df_profile = df_profiles[idx_site]
-        x_last_ele = df_profile[df_profile.z!=0].index.get_level_values('x')[-1]
-        df_profiles.loc[idx_site & (df_profiles.index.get_level_values('x')>x_last_ele), 'z'] = np.nan
-
-    return df_profiles
-
-def matlab_datenum_to_datetime(matlab_datenum):
-    # https://stackoverflow.com/a/13965852
-    return datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum % 1) - timedelta(
-        days=366)
-
-
-def get_unique_sites(dfs, cols=['beach', 'lat', 'lon']):
-    """
-    Generates a dataframe of unique sites based on beach names, lats and lons. Creates a unique site ID for each.
-    :param dfs:
-    :param cols:
-    :return:
-    """
-
-    rows = []
-    df_all = pd.concat([df[cols] for df in dfs])
-    beach_groups = df_all.groupby(['beach'])
-    for beach_name, beach_group in beach_groups:
-        site_groups = beach_group.groupby(['lat', 'lon'])
-        siteNo = 1
-        for site_name, site_group in site_groups:
-            site = '{}{:04d}'.format(beach_name, siteNo)
-            rows.append({'site_id': site,
-                         'lat': site_name[0],
-                         'lon': site_name[1],
-                         'beach': beach_name})
-            siteNo += 1
-
-    df = pd.DataFrame(rows)
-
-    return df
-
-
-def replace_unique_sites(df, df_sites, cols=['beach', 'lat', 'lon']):
-    """
-    Replaces beach/lat/lon columns with the unique site_id
-    :param dfs:
-    :param df_sites:
-    :return:
-    """
-
-    df_merged = df.merge(df_sites, on=cols)
-
-    # Check that all our records have a unique site identifier
-    n_unmatched = len(df) - len(df_merged)
-    if n_unmatched > 0:
-        logger.warning('Not all records (%d of %d) matched with a unique site', n_unmatched, len(df))
-
-    df_merged = df_merged.drop(columns=cols)
-
-    return df_merged
-
-
-def main():
-    df_waves = parse_waves(waves_mat='./data/raw/processed_shorelines/waves.mat')
-    df_tides = parse_tides(tides_mat='./data/raw/processed_shorelines/tides.mat')
-    df_profiles = parse_profiles(profiles_mat='./data/raw/processed_shorelines/profiles.mat')
-    df_sites = get_unique_sites(dfs=[df_waves, df_tides, df_profiles])
-    df_orientations = parse_orientations(orientations_mat='./data/raw/processed_shorelines/orientations.mat')
-
-    logger.info('Identifying unique sites')
-    df_waves = replace_unique_sites(df_waves, df_sites)
-    df_tides = replace_unique_sites(df_tides, df_sites)
-    df_profiles = replace_unique_sites(df_profiles, df_sites)
-
-    logger.info('Combine orientations into sites')
-    df_sites = combine_sites_and_orientaions(df_sites, df_orientations)
-    df_sites = specify_lat_lon_profile_center(df_sites)
-
-    logger.info('Setting pandas index')
-    df_profiles.set_index(['site_id', 'profile_type', 'x'], inplace=True)
-    df_waves.set_index(['site_id', 'datetime'], inplace=True)
-    df_tides.set_index(['site_id', 'datetime'], inplace=True)
-    df_sites.set_index(['site_id'], inplace=True)
-
-    logger.info('Nanning profile zero elevations')
-    df_profiles = remove_zeros(df_profiles)
-
-    logger.info('Outputting .csv files')
-    df_profiles.to_csv('./data/interim/profiles.csv')
-    df_tides.to_csv('./data/interim/tides.csv')
-    df_waves.to_csv('./data/interim/waves.csv')
-    df_sites.to_csv('./data/interim/sites.csv')
-    logger.info('Done!')
-
-
-if __name__ == '__main__':
-    main()
--- a/src/data/parse_mat.py
+++ b/src/data/parse_mat.py
@ -0,0 +1,342 @@
+"""
+Converts raw .mat files into a flattened .csv structure which can be imported into python pandas.
+"""
+
+import logging.config
+from datetime import datetime, timedelta
+import click
+import pandas as pd
+from mat4py import loadmat
+import numpy as np
+
+logging.config.fileConfig("./src/logging.conf", disable_existing_loggers=False)
+logger = logging.getLogger(__name__)
+
+
+def parse_orientations(orientations_mat):
+    """
+    Parses the raw orientations.mat file and returns a pandas dataframe. Note that orientations are the direction
+    towards land measured in degrees anti-clockwise from east.
+    :param orientations_mat:
+    :return:
+    """
+    logger.info("Parsing %s", orientations_mat)
+    mat_data = loadmat(orientations_mat)["output"]
+    rows = []
+    for i in range(0, len(mat_data["beach"])):
+        rows.append(
+            {
+                "beach": mat_data["beach"][i],
+                "orientation": mat_data["orientation"][i],
+                "lat_center": mat_data["lat_center"][i],
+                "lon_center": mat_data["lon_center"][i],
+                "lat_land": mat_data["lat_land"][i],
+                "lon_land": mat_data["lon_land"][i],
+                "lat_sea": mat_data["lat_sea"][i],
+                "lon_sea": mat_data["lon_sea"][i],
+            }
+        )
+
+    df = pd.DataFrame(rows)
+    return df
+
+
+def combine_sites_and_orientaions(df_sites, df_orientations):
+    """
+    Replaces beach/lat/lon columns with the unique site_id.
+    :param dfs:
+    :param df_sites:
+    :return:
+    """
+    df_merged_sites = df_sites.merge(
+        df_orientations[["beach", "lat_center", "lon_center", "orientation"]],
+        left_on=["beach", "lat", "lon"],
+        right_on=["beach", "lat_center", "lon_center"],
+    )
+
+    # Check that all our records have a unique site identifier
+    n_unmatched = len(df_sites) - len(df_merged_sites)
+    if n_unmatched > 0:
+        logger.warning("Not all records (%d of %d) matched with an orientation", n_unmatched, len(df_sites))
+
+    # Drop extra columns
+    df_merged_sites = df_merged_sites.drop(columns=["lat_center", "lon_center"])
+
+    return df_merged_sites
+
+
+def specify_lat_lon_profile_center(df_sites, x_val=200):
+    """
+    Specify which x-coordinate in the beach profile cross section the lat/lon corresponds to
+    :param df_sites:
+    :return:
+    """
+    df_sites["profile_x_lat_lon"] = x_val
+    return df_sites
+
+
+def parse_waves(waves_mat):
+    """
+    Parses the raw waves.mat file and returns a pandas dataframe
+    :param waves_mat:
+    :return:
+    """
+    logger.info("Parsing %s", waves_mat)
+    mat_data = loadmat(waves_mat)["data"]
+    rows = []
+    for i in range(0, len(mat_data["site"])):
+        for j in range(0, len(mat_data["dates"][i])):
+            rows.append(
+                {
+                    "beach": mat_data["site"][i],
+                    "lon": mat_data["lon"][i],
+                    "lat": mat_data["lat"][i],
+                    "datetime": matlab_datenum_to_datetime(mat_data["dates"][i][j][0]),
+                    "Hs": mat_data["H"][i][j][0],
+                    "Hs0": mat_data["Ho"][i][j][0],
+                    "Tp": mat_data["T"][i][j][0],
+                    "dir": mat_data["D"][i][j][0],
+                    "E": mat_data["E"][i][j][0],
+                    "P": mat_data["P"][i][j][0],
+                    "Exs": mat_data["Exs"][i][j][0],
+                    "Pxs": mat_data["Pxs"][i][j][0],
+                }
+            )
+
+    df = pd.DataFrame(rows)
+    df["datetime"] = df["datetime"].dt.round("1s")
+    return df
+
+
+def parse_tides(tides_mat):
+    """
+    Parses the raw tides.mat file and returns a pandas dataframe
+    :param tides_mat:
+    :return:
+    """
+    logger.info("Parsing %s", tides_mat)
+    mat_data = loadmat(tides_mat)["data"]
+    rows = []
+    for i in range(0, len(mat_data["site"])):
+        for j in range(0, len(mat_data["time"])):
+            rows.append(
+                {
+                    "beach": mat_data["site"][i][0],
+                    "lon": mat_data["lons"][i][0],
+                    "lat": mat_data["lats"][i][0],
+                    "datetime": matlab_datenum_to_datetime(mat_data["time"][j][0]),
+                    "tide": mat_data["tide"][i][j],
+                }
+            )
+
+    df = pd.DataFrame(rows)
+    df["datetime"] = df["datetime"].dt.round("1s")
+    return df
+
+
+def parse_profiles(profiles_mat):
+    """
+    Parses the raw profiles.mat file and returns a pandas dataframe
+    :param tides_mat:
+    :return:
+    """
+    logger.info("Parsing %s", profiles_mat)
+    mat_data = loadmat(profiles_mat)["data"]
+    rows = []
+    for i in range(0, len(mat_data["site"])):
+        for j in range(0, len(mat_data["pfx"][i])):
+            for profile_type in ["prestorm", "poststorm"]:
+
+                if profile_type == "prestorm":
+                    z = mat_data["pf1"][i][j][0]
+                if profile_type == "poststorm":
+                    z = mat_data["pf2"][i][j][0]
+
+                rows.append(
+                    {
+                        "beach": mat_data["site"][i],
+                        "lon": mat_data["lon"][i],
+                        "lat": mat_data["lat"][i],
+                        "profile_type": profile_type,
+                        "x": mat_data["pfx"][i][j][0],
+                        "z": z,
+                    }
+                )
+
+    df = pd.DataFrame(rows)
+    return df
+
+
+def remove_zeros(df_profiles):
+    """
+    When parsing the pre/post storm profiles, the end of some profiles have constant values of zero. Let's change
+    these to NaNs for consistancy. Didn't use pandas fillnan because 0 may still be a valid value.
+    :param df:
+    :return:
+    """
+
+    df_profiles = df_profiles.sort_index()
+    groups = df_profiles.groupby(level=["site_id", "profile_type"])
+    for key, _ in groups:
+        logger.debug("Removing zeros from {} profile at {}".format(key[1], key[0]))
+        idx_site = (df_profiles.index.get_level_values("site_id") == key[0]) & (
+            df_profiles.index.get_level_values("profile_type") == key[1]
+        )
+        df_profile = df_profiles[idx_site]
+        x_last_ele = df_profile[df_profile.z != 0].index.get_level_values("x")[-1]
+        df_profiles.loc[idx_site & (df_profiles.index.get_level_values("x") > x_last_ele), "z"] = np.nan
+
+    return df_profiles
+
+
+def matlab_datenum_to_datetime(matlab_datenum):
+    """
+    Adapted from https://stackoverflow.com/a/13965852
+    :param matlab_datenum:
+    :return:
+    """
+    return datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum % 1) - timedelta(days=366)
+
+
+def get_unique_sites(dfs, cols=["beach", "lat", "lon"]):
+    """
+    Generates a dataframe of unique sites based on beach names, lats and lons. Creates a unique site ID for each.
+    :param dfs:
+    :param cols:
+    :return:
+    """
+
+    rows = []
+    df_all = pd.concat([df[cols] for df in dfs])
+    beach_groups = df_all.groupby(["beach"])
+    for beach_name, beach_group in beach_groups:
+        site_groups = beach_group.groupby(["lat", "lon"])
+        siteNo = 1
+        for site_name, site_group in site_groups:
+            site = "{}{:04d}".format(beach_name, siteNo)
+            rows.append({"site_id": site, "lat": site_name[0], "lon": site_name[1], "beach": beach_name})
+            siteNo += 1
+
+    df = pd.DataFrame(rows)
+
+    return df
+
+
+def replace_unique_sites(df, df_sites, cols=["lat", "lon"]):
+    """
+    Replaces beach/lat/lon columns with the unique site_id
+    :param dfs:
+    :param df_sites:
+    :return:
+    """
+    # Make the sites index a column, so it can be merged into df
+    df_sites["site_id"] = df_sites.index.get_level_values("site_id")
+
+    # Merging on a float can lead to subtle bugs. Lets convert lat/lons to integers and merge on that instead
+    precision = 8
+    df_sites["lat_int"] = np.round(df_sites["lat"] * 10 ** precision).astype(np.int64)
+    df_sites["lon_int"] = np.round(df_sites["lon"] * 10 ** precision).astype(np.int64)
+    df["lat_int"] = np.round(df["lat"] * 10 ** precision).astype(np.int64)
+    df["lon_int"] = np.round(df["lon"] * 10 ** precision).astype(np.int64)
+
+    df_merged = df.merge(df_sites, on=["lat_int", "lon_int"])
+
+    # Check that all our records have a unique site identifier
+    n_unmatched = len(df) - len(df_merged)
+    if n_unmatched > 0:
+        logger.warning("Not all records (%d of %d) matched with a unique site", n_unmatched, len(df))
+
+    df_merged = df_merged.drop(
+        columns=[
+            "lat_x",
+            "lon_x",
+            "lat_int",
+            "lon_int",
+            "beach_y",
+            "beach_x",
+            "lat_y",
+            "lon_y",
+            "orientation",
+            "profile_x_lat_lon",
+        ]
+    )
+
+    return df_merged
+
+
+@click.command(short_help="create sites.csv")
+@click.option("--waves-mat", required=True, help=".mat file containing wave records")
+@click.option("--tides-mat", required=True, help=".mat file containing tide records")
+@click.option("--profiles-mat", required=True, help=".mat file containing beach profiles")
+@click.option("--orientations-mat", required=True, help=".mat file containing orientation of beach profiles")
+@click.option("--output-file", required=True, help="where to save sites.csv")
+def create_sites_csv(waves_mat, tides_mat, profiles_mat, orientations_mat, output_file):
+    logger.info("Creating %s", output_file)
+    df_waves = parse_waves(waves_mat=waves_mat)
+    df_tides = parse_tides(tides_mat=tides_mat)
+    df_profiles = parse_profiles(profiles_mat=profiles_mat)
+    df_orientations = parse_orientations(orientations_mat=orientations_mat)
+    df_sites = get_unique_sites(dfs=[df_waves, df_tides, df_profiles])
+    df_sites = combine_sites_and_orientaions(df_sites, df_orientations)
+    df_sites = specify_lat_lon_profile_center(df_sites)
+    df_sites.set_index(["site_id"], inplace=True)
+    df_sites.to_csv(output_file)
+    logger.info("Created %s", output_file)
+
+
+@click.command(short_help="create waves.csv")
+@click.option("--waves-mat", required=True, help=".mat file containing wave records")
+@click.option("--sites-csv", required=True, help=".csv file description of cross section sites")
+@click.option("--output-file", required=True, help="where to save waves.csv")
+def create_waves_csv(waves_mat, sites_csv, output_file):
+    logger.info("Creating %s", output_file)
+    df_waves = parse_waves(waves_mat=waves_mat)
+    df_sites = pd.read_csv(sites_csv, index_col=[0])
+    df_waves = replace_unique_sites(df_waves, df_sites)
+    df_waves.set_index(["site_id", "datetime"], inplace=True)
+    df_waves.sort_index(inplace=True)
+    df_waves.to_csv(output_file)
+    logger.info("Created %s", output_file)
+
+
+@click.command(short_help="create profiles.csv")
+@click.option("--profiles-mat", required=True, help=".mat file containing beach profiles")
+@click.option("--sites-csv", required=True, help=".csv file description of cross section sites")
+@click.option("--output-file", required=True, help="where to save profiles.csv")
+def create_profiles_csv(profiles_mat, sites_csv, output_file):
+    logger.info("Creating %s", output_file)
+    df_profiles = parse_profiles(profiles_mat=profiles_mat)
+    df_sites = pd.read_csv(sites_csv, index_col=[0])
+    df_profiles = replace_unique_sites(df_profiles, df_sites)
+    df_profiles.set_index(["site_id", "profile_type", "x"], inplace=True)
+    df_profiles.sort_index(inplace=True)
+    df_profiles.to_csv(output_file)
+    logger.info("Created %s", output_file)
+
+
+@click.command(short_help="create profiles.csv")
+@click.option("--tides-mat", required=True, help=".mat file containing tides")
+@click.option("--sites-csv", required=True, help=".csv file description of cross section sites")
+@click.option("--output-file", required=True, help="where to save tides.csv")
+def create_tides_csv(tides_mat, sites_csv, output_file):
+    logger.info("Creating %s", output_file)
+    df_tides = parse_tides(tides_mat=tides_mat)
+    df_sites = pd.read_csv(sites_csv, index_col=[0])
+    df_tides = replace_unique_sites(df_tides, df_sites)
+    df_tides.set_index(["site_id", "datetime"], inplace=True)
+    df_tides.sort_index(inplace=True)
+    df_tides.to_csv(output_file)
+    logger.info("Created %s", output_file)
+
+
+@click.group()
+def cli():
+    pass
+
+
+if __name__ == "__main__":
+    cli.add_command(create_sites_csv)
+    cli.add_command(create_waves_csv)
+    cli.add_command(create_profiles_csv)
+    cli.add_command(create_tides_csv)
+    cli()
--- a/src/data/profile_features.py
+++ b/src/data/profile_features.py
@ -1,6 +1,6 @@
 import os
 from functools import partial
-
+import click
 import fiona
 import numpy as np
 import pandas as pd
@ -8,6 +8,11 @@ import pyproj
 from shapely.geometry import LineString, Point
 from shapely.geometry import shape
 from shapely.ops import transform
+import logging.config
+
+
+logging.config.fileConfig("./src/logging.conf", disable_existing_loggers=False)
+logger = logging.getLogger(__name__)


 def shapes_from_shp(shp_file):
@ -19,14 +24,14 @@ def shapes_from_shp(shp_file):
    shapes = []
    ids = []
    properties = []
-    for feat in fiona.open(shp_file, 'r'):
-        shapes.append(shape(feat['geometry']))
-        ids.append(feat['id'])
-        properties.append(feat['properties'])
+    for feat in fiona.open(shp_file, "r"):
+        shapes.append(shape(feat["geometry"]))
+        ids.append(feat["id"])
+        properties.append(feat["properties"])
    return shapes, ids, properties


-def convert_coord_systems(g1, in_coord_system='EPSG:4326', out_coord_system='EPSG:28356'):
+def convert_coord_systems(g1, in_coord_system="EPSG:4326", out_coord_system="EPSG:28356"):
    """
    Converts coordinates from one coordinates system to another. Needed because shapefiles are usually defined in
    lat/lon but should be converted to GDA to calculated distances.
@ -38,7 +43,8 @@ def convert_coord_systems(g1, in_coord_system='EPSG:4326', out_coord_system='EPS
    project = partial(
        pyproj.transform,
        pyproj.Proj(init=in_coord_system),  # source coordinate system
-        pyproj.Proj(init=out_coord_system))  # destination coordinate system
+        pyproj.Proj(init=out_coord_system),
+    )  # destination coordinate system

    g2 = transform(project, g1)  # apply projection
    return g2
@ -59,15 +65,19 @@ def distance_to_intersection(lat, lon, landward_orientation, beach, line_strings
    start_point = convert_coord_systems(start_point)

    distance = 1000  # m look up to 1000m for an intersection
-    landward_point = Point(start_point.coords.xy[0] + distance * np.cos(np.deg2rad(landward_orientation)),
-                           start_point.coords.xy[1] + distance * np.sin(np.deg2rad(landward_orientation)))
+    landward_point = Point(
+        start_point.coords.xy[0] + distance * np.cos(np.deg2rad(landward_orientation)),
+        start_point.coords.xy[1] + distance * np.sin(np.deg2rad(landward_orientation)),
+    )
    landward_line = LineString([start_point, landward_point])
-    seaward_point = Point(start_point.coords.xy[0] - distance * np.cos(np.deg2rad(landward_orientation)),
-                          start_point.coords.xy[1] - distance * np.sin(np.deg2rad(landward_orientation)))
+    seaward_point = Point(
+        start_point.coords.xy[0] - distance * np.cos(np.deg2rad(landward_orientation)),
+        start_point.coords.xy[1] - distance * np.sin(np.deg2rad(landward_orientation)),
+    )
    seaward_line = LineString([start_point, seaward_point])

    # Look at relevant line_strings which have the same beach property in order to reduce computation time
-    line_strings = [s for s, p in zip(line_strings, line_properties) if p['beach'] == beach]
+    line_strings = [s for s, p in zip(line_strings, line_properties) if p["beach"] == beach]

    # Check whether profile_line intersects with any lines in line_string. If intersection point is landwards,
    # consider this negative, otherwise seawards is positive.
@ -99,7 +109,7 @@ def beach_profile_elevation(x_coord, df_profiles, profile_type, site_id):

    # Get profile
    df_profile = df_profiles.query('profile_type == "{}" and site_id =="{}"'.format(profile_type, site_id))
-    return np.interp(x_coord, df_profile.index.get_level_values('x'), df_profile['z'])
+    return np.interp(x_coord, df_profile.index.get_level_values("x"), df_profile["z"])


 def parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp):
@ -111,51 +121,52 @@ def parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp):
    # Get site information. Base our profile features on each site
    df_profile_features = df_sites

-    features = {
-        'dune_crest':
-            {
-                'file': dune_crest_shp
-            },
-        'dune_toe':
-            {
-                'file': dune_toe_shp
-            },
-    }
+    features = {"dune_crest": {"file": dune_crest_shp}, "dune_toe": {"file": dune_toe_shp}}

    # Import our dune crest and toes
    for feat in features.keys():
-        shapes, _, properties = shapes_from_shp(features[feat]['file'])
+        shapes, _, properties = shapes_from_shp(features[feat]["file"])
        shapes = [convert_coord_systems(x) for x in shapes]

        # Figure out the x coordinates of our crest and toes, by looking at where our beach sections intersect our
        # shape files.
-        col_name = '{}_x'.format(feat)
-        df_profile_features[col_name] = df_profile_features['profile_x_lat_lon'] + \
-                                        df_profile_features.apply(lambda row:
-                                                                  distance_to_intersection(
-                                                                      row['lat'], row['lon'], row['orientation'],
-                                                                      row['beach'], shapes, properties),
-                                                                  axis=1)
+        col_name = "{}_x".format(feat)
+        df_profile_features[col_name] = df_profile_features["profile_x_lat_lon"] + df_profile_features.apply(
+            lambda row: distance_to_intersection(
+                row["lat"], row["lon"], row["orientation"], row["beach"], shapes, properties
+            ),
+            axis=1,
+        )
        # Get the elevations of the crest and toe
-        col_name = '{}_z'.format(feat)
-        df_profile_features[col_name] = df_profile_features.apply(lambda row:
-                                                                  beach_profile_elevation(
-                                                                      row['{}_x'.format(feat)],
-                                                                      df_profiles,
-                                                                      'prestorm',
-                                                                      row.name),
-                                                                  axis=1)
-
-    df_profile_features = df_profile_features.drop(columns=['beach', 'lat', 'lon', 'orientation'])
-    return df_profile_features
+        col_name = "{}_z".format(feat)
+        df_profile_features[col_name] = df_profile_features.apply(
+            lambda row: beach_profile_elevation(row["{}_x".format(feat)], df_profiles, "prestorm", row.name), axis=1
+        )

+    df_profile_features = df_profile_features.drop(columns=["beach", "lat", "lon", "orientation"])
+    return df_profile_features

-if __name__ == '__main__':
-    data_folder = './data/interim'
-    df_sites = pd.read_csv(os.path.join(data_folder, 'sites.csv'), index_col=[0])
-    df_profiles = pd.read_csv(os.path.join(data_folder, 'profiles.csv'), index_col=[0, 1, 2])

-    dune_crest_shp = './data/raw/profile_features/dune_crests.shp'
-    dune_toe_shp = './data/raw/profile_features/dune_toes.shp'
+@click.command(short_help="create .csv of dune toe and crest positions")
+@click.option("--dune-crest-shp", required=True, help=".csv file to convert")
+@click.option("--dune-toe-shp", required=True, help="where to store .shp file")
+@click.option("--sites-csv", required=True, help="where to store .shp file")
+@click.option("--profiles-csv", required=True, help="where to store .shp file")
+@click.option("--output-csv", required=True, help="where to store .shp file")
+def create_profile_features(dune_crest_shp, dune_toe_shp, sites_csv, profiles_csv, output_csv):
+    logger.info("Creating .csv of dune crests and toes")
+    df_sites = pd.read_csv(sites_csv, index_col=[0])
+    df_profiles = pd.read_csv(profiles_csv, index_col=[0, 1, 2])
    df_profile_features = parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp)
-    df_profile_features.to_csv('./data/interim/profile_features.csv')
+    df_profile_features.to_csv(output_csv)
+    logger.info("Done!")
+
+
+@click.group()
+def cli():
+    pass
+
+
+if __name__ == "__main__":
+    cli.add_command(create_profile_features)
+    cli()
--- a/src/logging.conf
+++ b/src/logging.conf
@ -1,5 +1,5 @@
 [loggers]
-keys=root, matplotlib
+keys=root, matplotlib, fiona

 [handlers]
 keys=consoleHandler
@ -16,9 +16,14 @@ level=WARNING
 handlers=consoleHandler
 qualname=matplotlib

+[logger_fiona]
+level=WARNING
+handlers=consoleHandler
+qualname=fiona
+
 [handler_consoleHandler]
 class=StreamHandler
-level=DEBUG
+level=INFO
 formatter=simpleFormatter
 args=(sys.stdout,)