diff --git a/.env b/.env
index de5e5c4..8a4468b 100644
--- a/.env
+++ b/.env
@@ -10,11 +10,9 @@ MATLAB_PATH="C:/Program Files/MATLAB/R2016b/bin/win64/MATLAB.exe"
 # total water level.
 MULTIPROCESS_THREADS=2
 
-# GDAL_DATA="C://Users//z5189959//Desktop//nsw-2016-storm-impact//.venv//Library//share//gdal"
-
 
 # The settings below should be left as is unless you know what you're doing.
 
 # Need to set pythonpath so that relative imports can be properly used in with pipenv
 # Refer to https://stackoverflow.com/q/52986500 and https://stackoverflow.com/a/49797761
-PYTHONPATH=${PWD}
\ No newline at end of file
+# PYTHONPATH=${PWD}
\ No newline at end of file
diff --git a/src/analysis/forecast_twl.py b/src/analysis/forecast_twl.py
index e15df6e..ddc93a6 100644
--- a/src/analysis/forecast_twl.py
+++ b/src/analysis/forecast_twl.py
@@ -7,8 +7,7 @@ import numpy.ma as ma
 import pandas as pd
 from scipy import stats
 
-
-from src.analysis import runup_models
+import runup_models
 
 logging.config.fileConfig("./src/logging.conf", disable_existing_loggers=False)
 logger = logging.getLogger(__name__)
@@ -266,6 +265,7 @@ def crossings(profile_x, profile_z, constant_z):
     z = np.subtract(profile_z, constant_z)
 
     # Find all indices right before any crossing.
+    # TODO Sometimes this can give a runtime warning https://stackoverflow.com/a/36489085
     indicies = np.where(z[:-1] * z[1:] < 0)[0]
 
     # Use linear interpolation to find intersample crossings.
diff --git a/src/analysis/observed_storm_impacts.py b/src/analysis/observed_storm_impacts.py
index 0aadbbe..80bdbda 100644
--- a/src/analysis/observed_storm_impacts.py
+++ b/src/analysis/observed_storm_impacts.py
@@ -48,6 +48,11 @@ def volume_change(df_profiles, df_profile_features, zone):
         if np.isnan(prestorm_dune_toe_x):
             prestorm_dune_toe_x = prestorm_dune_crest_x
 
+        # If no prestorm and poststorm profiles, skip site and continue
+        profile_lengths = [len(df_site.query("profile_type == '{}'".format(x))) for x in ['prestorm', 'poststorm']]
+        if any([length ==0 for length in profile_lengths]):
+            continue
+
         # Find last x coordinate where we have both prestorm and poststorm measurements. If we don't do this,
         # the prestorm and poststorm values are going to be calculated over different lengths.
         df_zone = df_site.dropna(subset=["z"])
@@ -128,25 +133,26 @@ def storm_regime(df_observed_impacts):
     return df_observed_impacts
 
 
-if __name__ == "__main__":
-    logger.info("Importing existing data")
-    data_folder = "./data/interim"
-    df_profiles = pd.read_csv(os.path.join(data_folder, "profiles.csv"), index_col=[0, 1, 2])
-    df_profile_features = pd.read_csv(os.path.join(data_folder, "profile_features.csv"), index_col=[0])
-
-    logger.info("Creating new dataframe for observed impacts")
-    df_observed_impacts = pd.DataFrame(index=df_profile_features.index)
-
-    logger.info("Getting pre/post storm volumes")
-    df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone="swash")
-    df_dune_face_vol_changes = volume_change(df_profiles, df_profile_features, zone="dune_face")
-    df_observed_impacts = df_observed_impacts.join([df_swash_vol_changes, df_dune_face_vol_changes])
-
-    # Classify regime based on volume changes
-    df_observed_impacts = storm_regime(df_observed_impacts)
-
-    # Save dataframe to csv
-    df_observed_impacts.to_csv(os.path.join(data_folder, "impacts_observed.csv"))
+#
+# if __name__ == "__main__":
+#     logger.info("Importing existing data")
+#     data_folder = "./data/interim"
+#     df_profiles = pd.read_csv(os.path.join(data_folder, "profiles.csv"), index_col=[0, 1, 2])
+#     df_profile_features = pd.read_csv(os.path.join(data_folder, "profile_features.csv"), index_col=[0])
+#
+#     logger.info("Creating new dataframe for observed impacts")
+#     df_observed_impacts = pd.DataFrame(index=df_profile_features.index)
+#
+#     logger.info("Getting pre/post storm volumes")
+#     df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone="swash")
+#     df_dune_face_vol_changes = volume_change(df_profiles, df_profile_features, zone="dune_face")
+#     df_observed_impacts = df_observed_impacts.join([df_swash_vol_changes, df_dune_face_vol_changes])
+#
+#     # Classify regime based on volume changes
+#     df_observed_impacts = storm_regime(df_observed_impacts)
+#
+#     # Save dataframe to csv
+#     df_observed_impacts.to_csv(os.path.join(data_folder, "impacts_observed.csv"))
 
 
 @click.command()
diff --git a/src/data/__init__.py b/src/data/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/data/csv_to_shp.py b/src/data/csv_to_shp.py
index 39525fa..a5438c2 100644
--- a/src/data/csv_to_shp.py
+++ b/src/data/csv_to_shp.py
@@ -2,15 +2,20 @@
 Converts .csv files to .shape files
 """
 
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
 import click
 import fiona
 import pandas as pd
 from fiona.crs import from_epsg
 from shapely.geometry import Point, mapping
-import logging.config
 
-logging.config.fileConfig("./src/logging.conf", disable_existing_loggers=False)
-logger = logging.getLogger(__name__)
+from utils import setup_logging
+
+logger = setup_logging()
 
 
 @click.command()
@@ -25,11 +30,11 @@ def sites_csv_to_shp(input_csv, output_shp):
     """
     logger.info("Converting %s to %s", input_csv, output_shp)
     df_sites = pd.read_csv(input_csv, index_col=[0])
-
+    logger.info(os.environ.get("GDAL_DATA", None))
     schema = {"geometry": "Point", "properties": {"beach": "str", "site_id": "str"}}
     with fiona.open(output_shp, "w", crs=from_epsg(4326), driver="ESRI Shapefile", schema=schema) as output:
         for index, row in df_sites.iterrows():
-            point = Point(row["lon"], row["lat"])
+            point = Point(row["x_200_lon"], row["x_200_lat"])
             prop = {"beach": row["beach"], "site_id": index}
             output.write({"geometry": mapping(point), "properties": prop})
     logger.info("Done!")
diff --git a/src/data/parse_mat.py b/src/data/parse_mat.py
index d6f7cb2..de17dfc 100644
--- a/src/data/parse_mat.py
+++ b/src/data/parse_mat.py
@@ -2,15 +2,25 @@
 Converts raw .mat files into a flattened .csv structure which can be imported into python pandas.
 """
 
-import logging.config
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
 from datetime import datetime, timedelta
+import math
+
+
 import click
+import numpy as np
 import pandas as pd
 from mat4py import loadmat
-import numpy as np
+from shapely.geometry import Point
+
+from profile_features import convert_coord_systems
+from utils import setup_logging
 
-logging.config.fileConfig("./src/logging.conf", disable_existing_loggers=False)
-logger = logging.getLogger(__name__)
+logger = setup_logging()
 
 
 def parse_orientations(orientations_mat):
@@ -134,7 +144,7 @@ def parse_tides(tides_mat):
     return df
 
 
-def parse_profiles(profiles_mat):
+def parse_profiles_and_sites(profiles_mat):
     """
     Parses the raw profiles.mat file and returns a pandas dataframe
     :param tides_mat:
@@ -142,39 +152,108 @@ def parse_profiles(profiles_mat):
     """
     logger.info("Parsing %s", profiles_mat)
     mat_data = loadmat(profiles_mat)["data"]
-    rows = []
-    for i in range(0, len(mat_data["site"])):
-        for j in range(0, len(mat_data["pfx"][i])):
-            for profile_type in ["prestorm", "poststorm"]:
-
-                if profile_type == "prestorm":
-                    z = mat_data["pf1"][i][j][0]
-                if profile_type == "poststorm":
-                    z = mat_data["pf2"][i][j][0]
-
-                rows.append(
-                    {
-                        "beach": mat_data["site"][i],
-                        "lon": mat_data["lon"][i],
-                        "lat": mat_data["lat"][i],
-                        "profile_type": profile_type,
-                        "x": mat_data["pfx"][i][j][0],
-                        "z": z,
-                    }
-                )
+    profile_rows = []
+    site_rows = []
+    site_counter = 0
+
+    for i, site in enumerate(mat_data["site"]):
+
+        # Give each site a unique id
+        if len(site_rows) == 0 or site_rows[-1]["beach"] != site:
+            site_counter = 1
+        else:
+            site_counter += 1
+        site_id = "{}{:04d}".format(site, site_counter)
+
+        # Initalize location of x=200m latitude and longitude
+        x_200_lat = np.nan
+        x_200_lon = np.nan
+
+        # Want to calculation the orientation
+        orientation = {}
+
+        for x, lat, lon, z_prestorm, z_poststorm, easting, northing in zip(
+            mat_data["x"][i],
+            mat_data["lats"][i],
+            mat_data["lons"][i],
+            mat_data["Zpre"][i],
+            mat_data["Zpost"][i],
+            mat_data["eastings"][i],
+            mat_data["northings"][i],
+        ):
+
+            # Only extract pre and post storm profile
+            for j, profile_type in enumerate(["prestorm", "poststorm"]):
+
+                if mat_data["isgood"][i][j] == 1:
+                    land_lim = mat_data["landlims"][i][j]
+                    survey_datetime = matlab_datenum_to_datetime(mat_data["surveydates"][i][j])
+
+                    if profile_type == "prestorm":
+                        z = z_prestorm
+                    else:
+                        z = z_poststorm
+
+                    # Keep a record of the where the center of the profile is located, and the locations of the land
+                    # and sea
+
+                    # TODO: This code isn't very transferrable. What if we don't have lat/lons at 200 m? Relook at this
+                    if x[0] == 200:
+                        x_200_lat = lat[0]
+                        x_200_lon = lon[0]
+                    elif x[0] == 0:
+                        orientation["land_easting"] = easting[0]
+                        orientation["land_northing"] = northing[0]
+                    elif x[0] == 400:
+                        orientation["sea_easting"] = easting[0]
+                        orientation["sea_northing"] = northing[0]
+
+                    profile_rows.append(
+                        {
+                            "site_id": site_id,
+                            "lon": lon[0],
+                            "lat": lat[0],
+                            "profile_type": profile_type,
+                            "x": x[0],
+                            "z": z[0],
+                            "land_lim": land_lim,
+                            "survey_datetime": survey_datetime,
+                        }
+                    )
+
+        orientation = math.degrees(
+            math.atan2(
+                orientation["land_northing"] - orientation["sea_northing"],
+                orientation["land_easting"] - orientation["sea_easting"],
+            )
+        )
+        site_rows.append(
+            {
+                "site_id": site_id,
+                "beach": site,
+                "lat": x_200_lat,
+                "lon": x_200_lon,
+                "orientation": orientation,
+                "profile_x_lat_lon": 200,
+            }
+        )
 
-    df = pd.DataFrame(rows)
-    return df
+    df_profiles = pd.DataFrame(profile_rows)
+    df_sites = pd.DataFrame(site_rows)
+
+    logger.info("Parsed profiles and sites")
+    return df_profiles, df_sites
 
 
 def remove_zeros(df_profiles):
     """
     When parsing the pre/post storm profiles, the end of some profiles have constant values of zero. Let's change
     these to NaNs for consistancy. Didn't use pandas fillnan because 0 may still be a valid value.
-    :param df:
+    :param df_profiles:
     :return:
     """
 
+    logger.info("Removing zeros from end of profiles")
     df_profiles = df_profiles.sort_index()
     groups = df_profiles.groupby(level=["site_id", "profile_type"])
     for key, _ in groups:
@@ -185,6 +264,7 @@ def remove_zeros(df_profiles):
         df_profile = df_profiles[idx_site]
         x_last_ele = df_profile[df_profile.z != 0].index.get_level_values("x")[-1]
         df_profiles.loc[idx_site & (df_profiles.index.get_level_values("x") > x_last_ele), "z"] = np.nan
+    logger.info("Removed zeros from end of profiles")
 
     return df_profiles
 
@@ -198,31 +278,7 @@ def matlab_datenum_to_datetime(matlab_datenum):
     return datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum % 1) - timedelta(days=366)
 
 
-def get_unique_sites(dfs, cols=["beach", "lat", "lon"]):
-    """
-    Generates a dataframe of unique sites based on beach names, lats and lons. Creates a unique site ID for each.
-    :param dfs:
-    :param cols:
-    :return:
-    """
-
-    rows = []
-    df_all = pd.concat([df[cols] for df in dfs])
-    beach_groups = df_all.groupby(["beach"])
-    for beach_name, beach_group in beach_groups:
-        site_groups = beach_group.groupby(["lat", "lon"])
-        siteNo = 1
-        for site_name, site_group in site_groups:
-            site = "{}{:04d}".format(beach_name, siteNo)
-            rows.append({"site_id": site, "lat": site_name[0], "lon": site_name[1], "beach": beach_name})
-            siteNo += 1
-
-    df = pd.DataFrame(rows)
-
-    return df
-
-
-def replace_unique_sites(df, df_sites, cols=["lat", "lon"]):
+def replace_unique_sites(df, df_sites):
     """
     Replaces beach/lat/lon columns with the unique site_id
     :param dfs:
@@ -232,56 +288,37 @@ def replace_unique_sites(df, df_sites, cols=["lat", "lon"]):
     # Make the sites index a column, so it can be merged into df
     df_sites["site_id"] = df_sites.index.get_level_values("site_id")
 
-    # Merging on a float can lead to subtle bugs. Lets convert lat/lons to integers and merge on that instead
-    precision = 8
-    df_sites["lat_int"] = np.round(df_sites["lat"] * 10 ** precision).astype(np.int64)
-    df_sites["lon_int"] = np.round(df_sites["lon"] * 10 ** precision).astype(np.int64)
-    df["lat_int"] = np.round(df["lat"] * 10 ** precision).astype(np.int64)
-    df["lon_int"] = np.round(df["lon"] * 10 ** precision).astype(np.int64)
+    # Create eastings and northings so we can calculate distances
+    site_points = [convert_coord_systems(Point(lon, lat)).xy for lon, lat in zip(df_sites["lon"], df_sites["lat"])]
+    df_sites["easting"] = [x[0][0] for x in site_points]
+    df_sites["northing"] = [x[1][0] for x in site_points]
 
-    df_merged = df.merge(df_sites, on=["lat_int", "lon_int"])
+    # Process each unique combination lat/lons in groups
+    groups = df.groupby(["lat", "lon"])
+    for (lat, lon), df_group in groups:
 
-    # Check that all our records have a unique site identifier
-    n_unmatched = len(df) - len(df_merged)
-    if n_unmatched > 0:
-        logger.warning("Not all records (%d of %d) matched with a unique site", n_unmatched, len(df))
-
-    df_merged = df_merged.drop(
-        columns=[
-            "lat_x",
-            "lon_x",
-            "lat_int",
-            "lon_int",
-            "beach_y",
-            "beach_x",
-            "lat_y",
-            "lon_y",
-            "orientation",
-            "profile_x_lat_lon",
-        ]
-    )
+        # Calculate distances from each point to each site and determine closest site
+        easting, northing = [x[0] for x in convert_coord_systems(Point(lon, lat)).xy]
+        distances_to_sites = np.sqrt((df_sites["easting"] - easting) ** 2 + (df_sites["northing"] - northing) ** 2)
+        min_distance = distances_to_sites.min()
+        closest_site = distances_to_sites.idxmin()
 
-    return df_merged
+        # Do some logging so we can check later.
+        if min_distance > 1:
+            logger.warning("Closest site to (%.4f,%.4f) is %s (%.2f m away)", lat, lon, closest_site, min_distance)
+        else:
+            logger.info("Closest site to (%.4f,%.4f) is %s (%.2f m away)", lat, lon, closest_site, min_distance)
 
+        # Assign site_id based on closest site
+        df.loc[df_group.index, "site_id"] = closest_site
 
-@click.command(short_help="create sites.csv")
-@click.option("--waves-mat", required=True, help=".mat file containing wave records")
-@click.option("--tides-mat", required=True, help=".mat file containing tide records")
-@click.option("--profiles-mat", required=True, help=".mat file containing beach profiles")
-@click.option("--orientations-mat", required=True, help=".mat file containing orientation of beach profiles")
-@click.option("--output-file", required=True, help="where to save sites.csv")
-def create_sites_csv(waves_mat, tides_mat, profiles_mat, orientations_mat, output_file):
-    logger.info("Creating %s", output_file)
-    df_waves = parse_waves(waves_mat=waves_mat)
-    df_tides = parse_tides(tides_mat=tides_mat)
-    df_profiles = parse_profiles(profiles_mat=profiles_mat)
-    df_orientations = parse_orientations(orientations_mat=orientations_mat)
-    df_sites = get_unique_sites(dfs=[df_waves, df_tides, df_profiles])
-    df_sites = combine_sites_and_orientaions(df_sites, df_orientations)
-    df_sites = specify_lat_lon_profile_center(df_sites)
-    df_sites.set_index(["site_id"], inplace=True)
-    df_sites.to_csv(output_file)
-    logger.info("Created %s", output_file)
+    nan_count = df.site_id.isna().sum()
+    if nan_count > 0:
+        logger.warning("Not all records (%d of %d) matched with a unique site", nan_count, len(df))
+
+    df = df.drop(columns=["lat", "lon", "beach"])
+
+    return df
 
 
 @click.command(short_help="create waves.csv")
@@ -301,17 +338,22 @@ def create_waves_csv(waves_mat, sites_csv, output_file):
 
 @click.command(short_help="create profiles.csv")
 @click.option("--profiles-mat", required=True, help=".mat file containing beach profiles")
-@click.option("--sites-csv", required=True, help=".csv file description of cross section sites")
-@click.option("--output-file", required=True, help="where to save profiles.csv")
-def create_profiles_csv(profiles_mat, sites_csv, output_file):
-    logger.info("Creating %s", output_file)
-    df_profiles = parse_profiles(profiles_mat=profiles_mat)
-    df_sites = pd.read_csv(sites_csv, index_col=[0])
-    df_profiles = replace_unique_sites(df_profiles, df_sites)
+@click.option("--profiles-output-file", required=True, help="where to save profiles.csv")
+@click.option("--sites-output-file", required=True, help="where to save sites.csv")
+def create_sites_and_profiles_csv(profiles_mat, profiles_output_file, sites_output_file):
+    logger.info("Creating sites and profiles csvs")
+    df_profiles, df_sites = parse_profiles_and_sites(profiles_mat=profiles_mat)
     df_profiles.set_index(["site_id", "profile_type", "x"], inplace=True)
     df_profiles.sort_index(inplace=True)
-    df_profiles.to_csv(output_file)
-    logger.info("Created %s", output_file)
+    df_profiles = remove_zeros(df_profiles)
+
+    df_sites.set_index(["site_id"], inplace=True)
+    df_sites.sort_index(inplace=True)
+
+    df_profiles.to_csv(profiles_output_file)
+    logger.info("Created %s", profiles_output_file)
+    df_sites.to_csv(sites_output_file)
+    logger.info("Created %s", sites_output_file)
 
 
 @click.command(short_help="create profiles.csv")
@@ -335,8 +377,7 @@ def cli():
 
 
 if __name__ == "__main__":
-    cli.add_command(create_sites_csv)
     cli.add_command(create_waves_csv)
-    cli.add_command(create_profiles_csv)
+    cli.add_command(create_sites_and_profiles_csv)
     cli.add_command(create_tides_csv)
     cli()
diff --git a/src/data/profile_features.py b/src/data/profile_features.py
index 78ba406..cb63568 100644
--- a/src/data/profile_features.py
+++ b/src/data/profile_features.py
@@ -15,22 +15,6 @@ logging.config.fileConfig("./src/logging.conf", disable_existing_loggers=False)
 logger = logging.getLogger(__name__)
 
 
-def shapes_from_shp(shp_file):
-    """
-    Parses a shape file and returns a list of shapely shapes, ids and properties
-    :param shp_file:
-    :return:
-    """
-    shapes = []
-    ids = []
-    properties = []
-    for feat in fiona.open(shp_file, "r"):
-        shapes.append(shape(feat["geometry"]))
-        ids.append(feat["id"])
-        properties.append(feat["properties"])
-    return shapes, ids, properties
-
-
 def convert_coord_systems(g1, in_coord_system="EPSG:4326", out_coord_system="EPSG:28356"):
     """
     Converts coordinates from one coordinates system to another. Needed because shapefiles are usually defined in
@@ -50,6 +34,22 @@ def convert_coord_systems(g1, in_coord_system="EPSG:4326", out_coord_system="EPS
     return g2
 
 
+def shapes_from_shp(shp_file):
+    """
+    Parses a shape file and returns a list of shapely shapes, ids and properties
+    :param shp_file:
+    :return:
+    """
+    shapes = []
+    ids = []
+    properties = []
+    for feat in fiona.open(shp_file, "r"):
+        shapes.append(shape(feat["geometry"]))
+        ids.append(feat["id"])
+        properties.append(feat["properties"])
+    return shapes, ids, properties
+
+
 def distance_to_intersection(lat, lon, landward_orientation, beach, line_strings, line_properties):
     """
     Returns the distance at whjch a line drawn from a lat/lon at an orientation intersects a line stinrg
@@ -143,7 +143,7 @@ def parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp):
             lambda row: beach_profile_elevation(row["{}_x".format(feat)], df_profiles, "prestorm", row.name), axis=1
         )
 
-    df_profile_features = df_profile_features.drop(columns=["beach", "lat", "lon", "orientation"])
+    df_profile_features = df_profile_features.drop(columns=["beach", "lat", "lon", "orientation", "profile_x_lat_lon"])
     return df_profile_features
 
 
diff --git a/src/logging.yaml b/src/logging.yaml
new file mode 100644
index 0000000..fc82b31
--- /dev/null
+++ b/src/logging.yaml
@@ -0,0 +1,50 @@
+---
+version: 1
+disable_existing_loggers: False
+formatters:
+    simple:
+        format: "%(asctime)s - %(filename)s - %(levelname)s - %(message)s"
+
+handlers:
+    console:
+        class: logging.StreamHandler
+        level: DEBUG
+        formatter: simple
+        stream: ext://sys.stdout
+
+    info_file_handler:
+        class: logging.handlers.RotatingFileHandler
+        level: INFO
+        formatter: simple
+        filename: info.log
+        maxBytes: 10485760 # 10MB
+        backupCount: 3
+        encoding: utf8
+
+    warning_file_handler:
+        class: logging.handlers.RotatingFileHandler
+        level: WARNING
+        formatter: simple
+        filename: warning.log
+        maxBytes: 10485760 # 10MB
+        backupCount: 3
+        encoding: utf8
+
+    error_file_handler:
+        class: logging.handlers.RotatingFileHandler
+        level: ERROR
+        formatter: simple
+        filename: error.log
+        maxBytes: 10485760 # 10MB
+        backupCount: 3
+        encoding: utf8
+
+loggers:
+    my_module:
+        level: ERROR
+        handlers: [console]
+        propagate: no
+
+root:
+    level: INFO
+    handlers: [console, info_file_handler, error_file_handler, warning_file_handler]
diff --git a/src/utils.py b/src/utils.py
new file mode 100644
index 0000000..0df1fc6
--- /dev/null
+++ b/src/utils.py
@@ -0,0 +1,16 @@
+import logging.config
+import os
+import yaml
+
+
+def setup_logging(path="./src/logging.yaml", default_level=logging.INFO):
+    """
+    Setup logging configuration
+    """
+    if os.path.exists(path):
+        with open(path, "rt") as f:
+            config = yaml.safe_load(f.read())
+        logging.config.dictConfig(config)
+    else:
+        logging.basicConfig(level=default_level)
+    return logging.getLogger(__name__)