Fix formatting

7 years ago · 4e5b0ce81c
parent 43e85224e8
commit 4e5b0ce81c
7 changed files with 275 additions and 84 deletions
--- a/src/analysis/compare_impacts.py
+++ b/src/analysis/compare_impacts.py
@ -19,7 +19,10 @@ def compare_impacts(df_forecasted, df_observed):
    :return:
    """
    df_compared = df_forecasted.merge(
-        df_observed, left_index=True, right_index=True, suffixes=["_forecasted", "_observed"]
+        df_observed,
+        left_index=True,
+        right_index=True,
+        suffixes=["_forecasted", "_observed"],
    )
    return df_compared

@ -27,7 +30,14 @@ def compare_impacts(df_forecasted, df_observed):
 if __name__ == "__main__":
    logger.info("Importing existing data")
    data_folder = "./data/interim"
-    df_forecasted = pd.read_csv(os.path.join(data_folder, "impacts_forecasted_mean_slope_sto06.csv"), index_col=[0])
-    df_observed = pd.read_csv(os.path.join(data_folder, "impacts_observed.csv"), index_col=[0])
+    df_forecasted = pd.read_csv(
+        os.path.join(data_folder, "impacts_forecasted_mean_slope_sto06.csv"),
+        index_col=[0],
+    )
+    df_observed = pd.read_csv(
+        os.path.join(data_folder, "impacts_observed.csv"), index_col=[0]
+    )
    df_compared = compare_impacts(df_forecasted, df_observed)
-    df_compared.to_csv(os.path.join(data_folder, "impacts_observed_vs_forecasted_mean_slope_sto06.csv"))
+    df_compared.to_csv(
+        os.path.join(data_folder, "impacts_observed_vs_forecasted_mean_slope_sto06.csv")
+    )
--- a/src/analysis/forecasted_storm_impacts.py
+++ b/src/analysis/forecasted_storm_impacts.py
@ -20,18 +20,24 @@ def forecasted_impacts(df_profile_features, df_forecasted_twl):
    """
    logger.info("Getting forecasted storm impacts")

-    df_forecasted_impacts = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique())
+    df_forecasted_impacts = pd.DataFrame(
+        index=df_profile_features.index.get_level_values("site_id").unique()
+    )

    # For each site, find the maximum R_high value and the corresponding R_low value.
    idx = df_forecasted_twl.groupby(level=["site_id"])["R_high"].idxmax().dropna()
-    df_r_vals = df_forecasted_twl.loc[idx, ["R_high", "R_low"]].reset_index(["datetime"])
-    df_forecasted_impacts = df_forecasted_impacts.merge(df_r_vals, how="left", left_index=True, right_index=True)
+    df_r_vals = df_forecasted_twl.loc[idx, ["R_high", "R_low"]].reset_index(
+        ["datetime"]
+    )
+    df_forecasted_impacts = df_forecasted_impacts.merge(
+        df_r_vals, how="left", left_index=True, right_index=True
+    )

    # Join with df_profile features to find dune toe and crest elevations
    df_forecasted_impacts = df_forecasted_impacts.merge(
-        df_profile_features.query("profile_type=='prestorm'").reset_index("profile_type")[
-            ["dune_toe_z", "dune_crest_z"]
-        ],
+        df_profile_features.query("profile_type=='prestorm'").reset_index(
+            "profile_type"
+        )[["dune_toe_z", "dune_crest_z"]],
        how="left",
        left_on=["site_id"],
        right_on=["site_id"],
@ -71,7 +77,12 @@ def storm_regime(df_forecasted_impacts):
    return df_forecasted_impacts


-def twl_exceedence_time(df_profile_features, df_forecasted_twl, z_twl_col="R_high", z_exceedence_col="dune_toe_z"):
+def twl_exceedence_time(
+    df_profile_features,
+    df_forecasted_twl,
+    z_twl_col="R_high",
+    z_exceedence_col="dune_toe_z",
+):
    """
    Returns a dataframe of number of hours the twl exceeded a certain z elevation.
    May need to use this https://stackoverflow.com/a/53656968 if datetimes are not consistent.
@ -85,11 +96,15 @@ def twl_exceedence_time(df_profile_features, df_forecasted_twl, z_twl_col="R_hig

    # Get a dataframe of prestorm dune toes organised by site_id
    df_dune_toes = (
-        df_profile_features.query('profile_type=="prestorm"').reset_index("profile_type")[z_exceedence_col].to_frame()
+        df_profile_features.query('profile_type=="prestorm"')
+        .reset_index("profile_type")[z_exceedence_col]
+        .to_frame()
    )

    # Merge dune toes into site_id
-    df_merged = df_forecasted_twl.merge(df_dune_toes, left_on=["site_id"], right_on=["site_id"])
+    df_merged = df_forecasted_twl.merge(
+        df_dune_toes, left_on=["site_id"], right_on=["site_id"]
+    )

    # Return the sum of hours that twl exceeded the level
    return (
@ -114,7 +129,9 @@ def create_forecasted_impacts(profile_features_csv, forecasted_twl_csv, output_f
    df_forecasted_impacts = forecasted_impacts(df_profile_features, df_forecasted_twl)

    df_forecasted_impacts = df_forecasted_impacts.merge(
-        twl_exceedence_time(df_profile_features, df_forecasted_twl), left_on=["site_id"], right_on=["site_id"]
+        twl_exceedence_time(df_profile_features, df_forecasted_twl),
+        left_on=["site_id"],
+        right_on=["site_id"],
    )

    df_forecasted_impacts.to_csv(output_file)
--- a/src/analysis/observed_storm_impacts.py
+++ b/src/analysis/observed_storm_impacts.py
@ -30,12 +30,16 @@ def volume_change(df_profiles, df_profile_features, zone):
    """
    logger.info("Calculating change in beach volume in {} zone".format(zone))

-    df_vol_changes = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique())
+    df_vol_changes = pd.DataFrame(
+        index=df_profile_features.index.get_level_values("site_id").unique()
+    )
    df_profiles = df_profiles.sort_index()
    sites = df_profiles.groupby(level=["site_id"])

    for site_id, df_site in sites:
-        logger.debug("Calculating change in beach volume at {} in {} zone".format(site_id, zone))
+        logger.debug(
+            "Calculating change in beach volume at {} in {} zone".format(site_id, zone)
+        )

        # TODO Change this query to an index
        query = "site_id=='{}'&profile_type=='prestorm'".format(site_id)
@ -51,7 +55,10 @@ def volume_change(df_profiles, df_profile_features, zone):
            prestorm_dune_toe_x = prestorm_dune_crest_x

        # If no prestorm and poststorm profiles, skip site and continue
-        profile_lengths = [len(df_site.query("profile_type == '{}'".format(x))) for x in ["prestorm", "poststorm"]]
+        profile_lengths = [
+            len(df_site.query("profile_type == '{}'".format(x)))
+            for x in ["prestorm", "poststorm"]
+        ]
        if any([length == 0 for length in profile_lengths]):
            continue

@ -60,13 +67,21 @@ def volume_change(df_profiles, df_profile_features, zone):
        df_zone = df_site.dropna(subset=["z"])
        x_last_obs = min(
            [
-                max(df_zone.query("profile_type == '{}'".format(profile_type)).index.get_level_values("x"))
+                max(
+                    df_zone.query(
+                        "profile_type == '{}'".format(profile_type)
+                    ).index.get_level_values("x")
+                )
                for profile_type in ["prestorm", "poststorm"]
            ]
        )
        x_first_obs = max(
            [
-                min(df_zone.query("profile_type == '{}'".format(profile_type)).index.get_level_values("x"))
+                min(
+                    df_zone.query(
+                        "profile_type == '{}'".format(profile_type)
+                    ).index.get_level_values("x")
+                )
                for profile_type in ["prestorm", "poststorm"]
            ]
        )
@ -100,16 +115,24 @@ def volume_change(df_profiles, df_profile_features, zone):

        # Volume change needs to be calculated including a tolerance for LIDAR accuracy. If difference between
        # profiles is less than 20 cm, consider them as zero difference.
-        prestorm_z = df_zone.query("profile_type=='prestorm'").reset_index("profile_type").z
-        poststorm_z = df_zone.query("profile_type=='poststorm'").reset_index("profile_type").z
+        prestorm_z = (
+            df_zone.query("profile_type=='prestorm'").reset_index("profile_type").z
+        )
+        poststorm_z = (
+            df_zone.query("profile_type=='poststorm'").reset_index("profile_type").z
+        )
        diff_z = prestorm_z - poststorm_z
        diff_z[abs(diff_z) < 0.2] = 0
-        diff_vol = beach_volume(x=diff_z.index.get_level_values("x"), z=diff_z, x_min=x_min, x_max=x_max)
+        diff_vol = beach_volume(
+            x=diff_z.index.get_level_values("x"), z=diff_z, x_min=x_min, x_max=x_max
+        )

        df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = prestorm_vol
        df_vol_changes.loc[site_id, "poststorm_{}_vol".format(zone)] = poststorm_vol
        df_vol_changes.loc[site_id, "{}_vol_change".format(zone)] = diff_vol
-        df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = diff_vol / prestorm_vol * 100
+        df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = (
+            diff_vol / prestorm_vol * 100
+        )

    return df_vol_changes

@ -142,15 +165,19 @@ def storm_regime(df_observed_impacts):
    """
    logger.info("Getting observed storm regimes")

-    swash = (df_observed_impacts.dune_face_pct_change <= 2) & (df_observed_impacts.dune_face_vol_change <= 3)
-    collision = (df_observed_impacts.dune_face_pct_change >= 2) | (df_observed_impacts.dune_face_vol_change > 3)
+    swash = (df_observed_impacts.dune_face_pct_change <= 2) & (
+        df_observed_impacts.dune_face_vol_change <= 3
+    )
+    collision = (df_observed_impacts.dune_face_pct_change >= 2) | (
+        df_observed_impacts.dune_face_vol_change > 3
+    )

    df_observed_impacts.loc[swash, "storm_regime"] = "swash"
    df_observed_impacts.loc[collision, "storm_regime"] = "collision"

-    # TODO We may be able to identify observed regimes by looking at the change in crest and toe elevation. This would be useful for 
+    # TODO We may be able to identify observed regimes by looking at the change in crest and toe elevation. This would be useful for
    # locations where we have overwash and cannot calculate the change in volume correctly. Otherwise, maybe it's better to put it in manually.
-    
+
    return df_observed_impacts


@ -160,7 +187,9 @@ def overwrite_impacts(df_observed_impacts, df_raw_features):
    :param df_raw_profile_features:
    :return:
    """
-    df_observed_impacts.update(df_raw_features.rename(columns={"observed_storm_regime": "storm_regime"}))
+    df_observed_impacts.update(
+        df_raw_features.rename(columns={"observed_storm_regime": "storm_regime"})
+    )
    return df_observed_impacts


@ -169,11 +198,15 @@ def overwrite_impacts(df_observed_impacts, df_raw_features):
@click.option("--profile-features-crest-toes-csv", required=True, help="")
@click.option("--raw-profile-features-csv", required=True, help="")
@click.option("--output-file", required=True, help="")
-def create_observed_impacts(profiles_csv, profile_features_crest_toes_csv, raw_profile_features_csv, output_file):
+def create_observed_impacts(
+    profiles_csv, profile_features_crest_toes_csv, raw_profile_features_csv, output_file
+):

    profiles_csv = "./data/interim/profiles.csv"
    profile_features_crest_toes_csv = "./data/interim/profile_features_crest_toes.csv"
-    raw_profile_features_csv = "./data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv"
+    raw_profile_features_csv = (
+        "./data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv"
+    )

    logger.info("Creating observed wave impacts")
    logger.info("Importing data")
@ -181,12 +214,18 @@ def create_observed_impacts(profiles_csv, profile_features_crest_toes_csv, raw_p
    df_profile_features = pd.read_csv(profile_features_crest_toes_csv, index_col=[0, 1])

    logger.info("Creating new dataframe for observed impacts")
-    df_observed_impacts = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique())
+    df_observed_impacts = pd.DataFrame(
+        index=df_profile_features.index.get_level_values("site_id").unique()
+    )

    logger.info("Getting pre/post storm volumes")
    df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone="swash")
-    df_dune_face_vol_changes = volume_change(df_profiles, df_profile_features, zone="dune_face")
-    df_observed_impacts = df_observed_impacts.join([df_swash_vol_changes, df_dune_face_vol_changes])
+    df_dune_face_vol_changes = volume_change(
+        df_profiles, df_profile_features, zone="dune_face"
+    )
+    df_observed_impacts = df_observed_impacts.join(
+        [df_swash_vol_changes, df_dune_face_vol_changes]
+    )

    # Classify regime based on volume changes
    df_observed_impacts = storm_regime(df_observed_impacts)
--- a/src/data/csv_to_geojson.py
+++ b/src/data/csv_to_geojson.py
@ -16,7 +16,9 @@ from logs import setup_logging
 logger = setup_logging()


-def lat_lon_from_profile_x_coord(center_lat_lon, orientation, center_profile_x, x_coord):
+def lat_lon_from_profile_x_coord(
+    center_lat_lon, orientation, center_profile_x, x_coord
+):
    """
    Returns the lat/lon of a point on a profile with the given x_coord
    :param center_lat_lon: Shapely point of lat/lon of profile center
@ -31,7 +33,9 @@ def lat_lon_from_profile_x_coord(center_lat_lon, orientation, center_profile_x,
    point_x = center_x + (center_profile_x - x_coord) * np.cos(np.deg2rad(orientation))
    point_y = center_y + (center_profile_x - x_coord) * np.sin(np.deg2rad(orientation))
    point_xy = Point(point_x, point_y)
-    point_lat_lon = convert_coord_systems(point_xy, in_coord_system="EPSG:28356", out_coord_system="EPSG:4326")
+    point_lat_lon = convert_coord_systems(
+        point_xy, in_coord_system="EPSG:28356", out_coord_system="EPSG:4326"
+    )
    return point_lat_lon


@ -41,7 +45,9 @@ def lat_lon_from_profile_x_coord(center_lat_lon, orientation, center_profile_x,
@click.option("--crest-toes-csv", required=True, help=".csv file to convert")
@click.option("--impacts-csv", required=True, help=".csv file to convert")
@click.option("--output-geojson", required=True, help="where to store .geojson file")
-def R_high_to_geojson(sites_csv, profiles_csv, crest_toes_csv, impacts_csv, output_geojson):
+def R_high_to_geojson(
+    sites_csv, profiles_csv, crest_toes_csv, impacts_csv, output_geojson
+):
    """
    Converts impact R_high into a lat/lon geojson that we can plot in QGIS
    :param sites_csv:
@ -58,10 +64,14 @@ def R_high_to_geojson(sites_csv, profiles_csv, crest_toes_csv, impacts_csv, outp
    # Create geojson file
    schema = {
        "geometry": "Point",
-        "properties": OrderedDict([("beach", "str"), ("site_id", "str"), ("elevation", "float")]),
+        "properties": OrderedDict(
+            [("beach", "str"), ("site_id", "str"), ("elevation", "float")]
+        ),
    }

-    with fiona.open(output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema) as output:
+    with fiona.open(
+        output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema
+    ) as output:
        for index, row in df_impacts.iterrows():

            site_id = index
@ -72,12 +82,18 @@ def R_high_to_geojson(sites_csv, profiles_csv, crest_toes_csv, impacts_csv, outp

            # Get poststorm profile
            df_profile = df_profiles.loc[(site_id, "prestorm")]
-            int_x = crossings(df_profile.index.get_level_values("x").tolist(), df_profile.z.tolist(), R_high_z)
+            int_x = crossings(
+                df_profile.index.get_level_values("x").tolist(),
+                df_profile.z.tolist(),
+                R_high_z,
+            )

            # Take the intersection closest to the dune face.
            try:
-                x_cols = [x for x in df_crest_toes.columns if '_x' in x]
-                dune_face_x = np.mean(df_crest_toes.loc[(site_id, "prestorm"),x_cols].tolist())
+                x_cols = [x for x in df_crest_toes.columns if "_x" in x]
+                dune_face_x = np.mean(
+                    df_crest_toes.loc[(site_id, "prestorm"), x_cols].tolist()
+                )
                int_x = min(int_x, key=lambda x: abs(x - dune_face_x))
            except:
                continue
@ -91,7 +107,9 @@ def R_high_to_geojson(sites_csv, profiles_csv, crest_toes_csv, impacts_csv, outp
                x_coord=int_x,
            )

-            prop = OrderedDict([("beach", beach), ("site_id", site_id), ("elevation", R_high_z)])
+            prop = OrderedDict(
+                [("beach", beach), ("site_id", site_id), ("elevation", R_high_z)]
+            )
            output.write({"geometry": mapping(point_lat_lon), "properties": prop})


@ -99,7 +117,9 @@ def R_high_to_geojson(sites_csv, profiles_csv, crest_toes_csv, impacts_csv, outp
@click.option("--sites-csv", required=True, help=".csv file to convert")
@click.option("--profile-features-csv", required=True, help=".csv file to convert")
@click.option("--output-geojson", required=True, help="where to store .geojson file")
-def profile_features_crest_toes_to_geojson(sites_csv, profile_features_csv, output_geojson):
+def profile_features_crest_toes_to_geojson(
+    sites_csv, profile_features_csv, output_geojson
+):
    """
    Converts profile_features containing dune toes and crest locations to a geojson we can load into QGIS
    :param sites_csv:
@ -131,7 +151,9 @@ def profile_features_crest_toes_to_geojson(sites_csv, profile_features_csv, outp
        ),
    }

-    with fiona.open(output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema) as output:
+    with fiona.open(
+        output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema
+    ) as output:
        for index, row in df_profile_features.iterrows():
            beach = index[:-4]
            site_id = index
@ -185,9 +207,14 @@ def sites_csv_to_geojson(input_csv, output_geojson):
    df_sites = pd.read_csv(input_csv, index_col=[0])
    logger.info(os.environ.get("GDAL_DATA", None))

-    schema = {"geometry": "LineString", "properties": OrderedDict([("beach", "str"), ("site_id", "str")])}
+    schema = {
+        "geometry": "LineString",
+        "properties": OrderedDict([("beach", "str"), ("site_id", "str")]),
+    }

-    with fiona.open(output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema) as output:
+    with fiona.open(
+        output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema
+    ) as output:
        for index, row in df_sites.iterrows():

            center_lat_lon = Point(row["lon"], row["lat"])
@ -215,10 +242,16 @@ def sites_csv_to_geojson(input_csv, output_geojson):

@click.command()
@click.option("--sites-csv", required=True, help="sites.csv file to convert")
-@click.option("--observed-impacts-csv", required=True, help="impacts-observed.csv file to convert")
-@click.option("--forecast-impacts-csv", required=True, help="impacts-forecast.csv file to convert")
+@click.option(
+    "--observed-impacts-csv", required=True, help="impacts-observed.csv file to convert"
+)
+@click.option(
+    "--forecast-impacts-csv", required=True, help="impacts-forecast.csv file to convert"
+)
@click.option("--output-geojson", required=True, help="where to store .geojson file")
-def impacts_to_geojson(sites_csv, observed_impacts_csv, forecast_impacts_csv, output_geojson):
+def impacts_to_geojson(
+    sites_csv, observed_impacts_csv, forecast_impacts_csv, output_geojson
+):
    """
    Converts impacts observed and forecasted to a geojson for visualization in QGIS
    :param sites_csv:
@ -231,7 +264,9 @@ def impacts_to_geojson(sites_csv, observed_impacts_csv, forecast_impacts_csv, ou
    # Get information from .csv and read into pandas dataframe
    df_sites = pd.read_csv(sites_csv, index_col=[0])
    df_observed = pd.read_csv(observed_impacts_csv, index_col=[0])
-    df_forecast = pd.read_csv(forecast_impacts_csv, index_col=[0]).rename({"storm_regime": "forecast_storm_regime"})
+    df_forecast = pd.read_csv(forecast_impacts_csv, index_col=[0]).rename(
+        {"storm_regime": "forecast_storm_regime"}
+    )

    # Rename columns, so we can distinguish between forecast and observed
    df_observed = df_observed.rename(columns={"storm_regime": "observed_storm_regime"})
@ -241,7 +276,9 @@ def impacts_to_geojson(sites_csv, observed_impacts_csv, forecast_impacts_csv, ou
    df = pd.concat([df_sites, df_observed, df_forecast], sort=True, axis=1)

    # Make new column for accuracy of forecast. Use underpredict/correct/overpredict classes
-    df.loc[df.observed_storm_regime == df.forecast_storm_regime, "forecast_accuray"] = "correct"
+    df.loc[
+        df.observed_storm_regime == df.forecast_storm_regime, "forecast_accuray"
+    ] = "correct"

    # Observed/Forecasted/Class for each combination
    classes = [
@ -256,7 +293,10 @@ def impacts_to_geojson(sites_csv, observed_impacts_csv, forecast_impacts_csv, ou
        ("overwash", "overwash", "correct"),
    ]
    for c in classes:
-        df.loc[(df.observed_storm_regime == c[0]) & (df.forecast_storm_regime == c[1]), "forecast_accuracy"] = c[2]
+        df.loc[
+            (df.observed_storm_regime == c[0]) & (df.forecast_storm_regime == c[1]),
+            "forecast_accuracy",
+        ] = c[2]

    schema = {
        "geometry": "Point",
@ -271,7 +311,9 @@ def impacts_to_geojson(sites_csv, observed_impacts_csv, forecast_impacts_csv, ou
        ),
    }

-    with fiona.open(output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema) as output:
+    with fiona.open(
+        output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema
+    ) as output:
        for index, row in df.iterrows():

            # Locate the marker at the seaward end of the profile to avoid cluttering the coastline.
--- a/src/data/parse_mat.py
+++ b/src/data/parse_mat.py
@ -27,10 +27,19 @@ def parse_crest_toes(df_raw_features, df_profiles):
    # Puts profiles_features_csv into format expected by rest of analysis
    df_crest_toes = df_raw_features.reset_index().melt(
        id_vars=["site_id"],
-        value_vars=["prestorm_dune_crest_x", "prestorm_dune_toe_x", "poststorm_dune_crest_x", "poststorm_dune_toe_x"],
+        value_vars=[
+            "prestorm_dune_crest_x",
+            "prestorm_dune_toe_x",
+            "poststorm_dune_crest_x",
+            "poststorm_dune_toe_x",
+        ],
+    )
+    df_crest_toes["profile_type"] = df_crest_toes.variable.str.extract(
+        r"(prestorm|poststorm)"
+    )
+    df_crest_toes["point_type"] = df_crest_toes.variable.str.extract(
+        r"(dune_crest_x|dune_toe_x)"
    )
-    df_crest_toes["profile_type"] = df_crest_toes.variable.str.extract(r"(prestorm|poststorm)")
-    df_crest_toes["point_type"] = df_crest_toes.variable.str.extract(r"(dune_crest_x|dune_toe_x)")
    df_crest_toes = df_crest_toes.drop(columns=["variable"])
    df_crest_toes = df_crest_toes.sort_values("site_id")
    df_crest_toes = df_crest_toes.set_index(["site_id", "profile_type", "point_type"])
@ -52,7 +61,9 @@ def parse_crest_toes(df_raw_features, df_profiles):
                x_val = df_crest_toes.loc[(site_id, param), "dune_{}_x".format(loc)]

                if np.isnan(x_val):
-                    df_crest_toes.loc[(site_id, param), "dune_{}_z".format(loc)] = np.nan
+                    df_crest_toes.loc[
+                        (site_id, param), "dune_{}_z".format(loc)
+                    ] = np.nan
                    continue

                # Try get the value from the other profile if we return nan or empty dataframe
@ -121,7 +132,9 @@ def parse_dune_crest_toes(df_sites, crest_mat, toe_mat):

    # Want the site_id instead of the site_no, so merge in df_sites
    df_sites.reset_index(inplace=True)
-    df_profile_features = df_sites[["site_no", "site_id"]].merge(df_profile_features, how="outer", on=["site_no"])
+    df_profile_features = df_sites[["site_no", "site_id"]].merge(
+        df_profile_features, how="outer", on=["site_no"]
+    )
    df_profile_features.drop(columns=["site_no"], inplace=True)
    df_profile_features.set_index(["site_id", "profile_type"], inplace=True)
    df_profile_features.sort_index(inplace=True)
@ -257,7 +270,9 @@ def parse_profiles_and_sites(profiles_mat):
                    z = z_site[j]
                    land_lim = mat_data["landlims"][i][j]

-                survey_datetime = matlab_datenum_to_datetime(mat_data["surveydates"][i][j])
+                survey_datetime = matlab_datenum_to_datetime(
+                    mat_data["surveydates"][i][j]
+                )

                # Keep a record of the where the center of the profile is located, and the locations of the land
                # and sea
@ -333,7 +348,9 @@ def remove_zeros(df_profiles):
        )
        df_profile = df_profiles[idx_site]
        x_last_ele = df_profile[df_profile.z == 0].index.get_level_values("x")[0]
-        df_profiles.loc[idx_site & (df_profiles.index.get_level_values("x") > x_last_ele), "z"] = np.nan
+        df_profiles.loc[
+            idx_site & (df_profiles.index.get_level_values("x") > x_last_ele), "z"
+        ] = np.nan
    logger.info("Removed zeros from end of profiles")

    return df_profiles
@ -345,7 +362,11 @@ def matlab_datenum_to_datetime(matlab_datenum):
    :param matlab_datenum:
    :return:
    """
-    return datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum % 1) - timedelta(days=366)
+    return (
+        datetime.fromordinal(int(matlab_datenum))
+        + timedelta(days=matlab_datenum % 1)
+        - timedelta(days=366)
+    )


 def replace_unique_sites(df, df_sites):
@ -359,7 +380,10 @@ def replace_unique_sites(df, df_sites):
    df_sites["site_id"] = df_sites.index.get_level_values("site_id")

    # Create eastings and northings so we can calculate distances
-    site_points = [convert_coord_systems(Point(lon, lat)).xy for lon, lat in zip(df_sites["lon"], df_sites["lat"])]
+    site_points = [
+        convert_coord_systems(Point(lon, lat)).xy
+        for lon, lat in zip(df_sites["lon"], df_sites["lat"])
+    ]
    df_sites["easting"] = [x[0][0] for x in site_points]
    df_sites["northing"] = [x[1][0] for x in site_points]

@ -369,22 +393,39 @@ def replace_unique_sites(df, df_sites):

        # Calculate distances from each point to each site and determine closest site
        easting, northing = [x[0] for x in convert_coord_systems(Point(lon, lat)).xy]
-        distances_to_sites = np.sqrt((df_sites["easting"] - easting) ** 2 + (df_sites["northing"] - northing) ** 2)
+        distances_to_sites = np.sqrt(
+            (df_sites["easting"] - easting) ** 2
+            + (df_sites["northing"] - northing) ** 2
+        )
        min_distance = distances_to_sites.min()
        closest_site = distances_to_sites.idxmin()

        # Do some logging so we can check later.
        if min_distance > 1:
-            logger.warning("Closest site to (%.4f,%.4f) is %s (%.2f m away)", lat, lon, closest_site, min_distance)
+            logger.warning(
+                "Closest site to (%.4f,%.4f) is %s (%.2f m away)",
+                lat,
+                lon,
+                closest_site,
+                min_distance,
+            )
        else:
-            logger.info("Closest site to (%.4f,%.4f) is %s (%.2f m away)", lat, lon, closest_site, min_distance)
+            logger.info(
+                "Closest site to (%.4f,%.4f) is %s (%.2f m away)",
+                lat,
+                lon,
+                closest_site,
+                min_distance,
+            )

        # Assign site_id based on closest site
        df.loc[df_group.index, "site_id"] = closest_site

    nan_count = df.site_id.isna().sum()
    if nan_count > 0:
-        logger.warning("Not all records (%d of %d) matched with a unique site", nan_count, len(df))
+        logger.warning(
+            "Not all records (%d of %d) matched with a unique site", nan_count, len(df)
+        )

    df = df.drop(columns=["lat", "lon", "beach"])

@ -393,7 +434,9 @@ def replace_unique_sites(df, df_sites):

@click.command(short_help="create waves.csv")
@click.option("--waves-mat", required=True, help=".mat file containing wave records")
-@click.option("--sites-csv", required=True, help=".csv file description of cross section sites")
+@click.option(
+    "--sites-csv", required=True, help=".csv file description of cross section sites"
+)
@click.option("--output-file", required=True, help="where to save waves.csv")
 def create_waves_csv(waves_mat, sites_csv, output_file):
    logger.info("Creating %s", output_file)
@ -420,7 +463,9 @@ def create_waves_csv(waves_mat, sites_csv, output_file):


@click.command(short_help="create profile_features.csv")
-@click.option("--profile-features-csv", required=True, help=".mat file containing wave records")
+@click.option(
+    "--profile-features-csv", required=True, help=".mat file containing wave records"
+)
@click.option("--profiles-csv", required=True, help=".mat file containing wave records")
@click.option("--output-file", required=True, help="where to save waves.csv")
 def create_crest_toes(profile_features_csv, profiles_csv, output_file):
@ -435,10 +480,16 @@ def create_crest_toes(profile_features_csv, profiles_csv, output_file):


@click.command(short_help="create profiles.csv")
-@click.option("--profiles-mat", required=True, help=".mat file containing beach profiles")
-@click.option("--profiles-output-file", required=True, help="where to save profiles.csv")
+@click.option(
+    "--profiles-mat", required=True, help=".mat file containing beach profiles"
+)
+@click.option(
+    "--profiles-output-file", required=True, help="where to save profiles.csv"
+)
@click.option("--sites-output-file", required=True, help="where to save sites.csv")
-def create_sites_and_profiles_csv(profiles_mat, profiles_output_file, sites_output_file):
+def create_sites_and_profiles_csv(
+    profiles_mat, profiles_output_file, sites_output_file
+):
    logger.info("Creating sites and profiles csvs")
    df_profiles, df_sites = parse_profiles_and_sites(profiles_mat=profiles_mat)
    df_profiles.set_index(["site_id", "profile_type", "x"], inplace=True)
@ -456,7 +507,9 @@ def create_sites_and_profiles_csv(profiles_mat, profiles_output_file, sites_outp

@click.command(short_help="create profiles.csv")
@click.option("--tides-mat", required=True, help=".mat file containing tides")
-@click.option("--sites-csv", required=True, help=".csv file description of cross section sites")
+@click.option(
+    "--sites-csv", required=True, help=".csv file description of cross section sites"
+)
@click.option("--output-file", required=True, help="where to save tides.csv")
 def create_tides_csv(tides_mat, sites_csv, output_file):
    logger.info("Creating %s", output_file)
--- a/src/data/parse_shp.py
+++ b/src/data/parse_shp.py
@ -31,7 +31,9 @@ def shapes_from_shp(shp_file):
    return shapes, ids, properties


-def distance_to_intersection(lat, lon, landward_orientation, beach, line_strings, line_properties):
+def distance_to_intersection(
+    lat, lon, landward_orientation, beach, line_strings, line_properties
+):
    """
    Returns the distance at whjch a line drawn from a lat/lon at an orientation intersects a line stinrg
    :param lat:
@ -58,7 +60,9 @@ def distance_to_intersection(lat, lon, landward_orientation, beach, line_strings
    seaward_line = LineString([start_point, seaward_point])

    # Look at relevant line_strings which have the same beach property in order to reduce computation time
-    line_strings = [s for s, p in zip(line_strings, line_properties) if p["beach"] == beach]
+    line_strings = [
+        s for s, p in zip(line_strings, line_properties) if p["beach"] == beach
+    ]

    # Check whether profile_line intersects with any lines in line_string. If intersection point is landwards,
    # consider this negative, otherwise seawards is positive.
@ -89,7 +93,9 @@ def beach_profile_elevation(x_coord, df_profiles, profile_type, site_id):
        return None

    # Get profile
-    df_profile = df_profiles.query('profile_type == "{}" and site_id =="{}"'.format(profile_type, site_id))
+    df_profile = df_profiles.query(
+        'profile_type == "{}" and site_id =="{}"'.format(profile_type, site_id)
+    )
    return np.interp(x_coord, df_profile.index.get_level_values("x"), df_profile["z"])


@ -102,7 +108,10 @@ def parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp):
    # Get site information. Base our profile features on each site
    df_profile_features = df_sites

-    features = {"dune_crest": {"file": dune_crest_shp}, "dune_toe": {"file": dune_toe_shp}}
+    features = {
+        "dune_crest": {"file": dune_crest_shp},
+        "dune_toe": {"file": dune_toe_shp},
+    }

    # Import our dune crest and toes
    for feat in features.keys():
@ -112,19 +121,31 @@ def parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp):
        # Figure out the x coordinates of our crest and toes, by looking at where our beach sections intersect our
        # shape files.
        col_name = "{}_x".format(feat)
-        df_profile_features[col_name] = df_profile_features["profile_x_lat_lon"] + df_profile_features.apply(
+        df_profile_features[col_name] = df_profile_features[
+            "profile_x_lat_lon"
+        ] + df_profile_features.apply(
            lambda row: distance_to_intersection(
-                row["lat"], row["lon"], row["orientation"], row["beach"], shapes, properties
+                row["lat"],
+                row["lon"],
+                row["orientation"],
+                row["beach"],
+                shapes,
+                properties,
            ),
            axis=1,
        )
        # Get the elevations of the crest and toe
        col_name = "{}_z".format(feat)
        df_profile_features[col_name] = df_profile_features.apply(
-            lambda row: beach_profile_elevation(row["{}_x".format(feat)], df_profiles, "prestorm", row.name), axis=1
+            lambda row: beach_profile_elevation(
+                row["{}_x".format(feat)], df_profiles, "prestorm", row.name
+            ),
+            axis=1,
        )

-    df_profile_features = df_profile_features.drop(columns=["beach", "lat", "lon", "orientation", "profile_x_lat_lon"])
+    df_profile_features = df_profile_features.drop(
+        columns=["beach", "lat", "lon", "orientation", "profile_x_lat_lon"]
+    )
    return df_profile_features


@ -134,10 +155,14 @@ def parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp):
@click.option("--sites-csv", required=True, help="where to store .shp file")
@click.option("--profiles-csv", required=True, help="where to store .shp file")
@click.option("--output-csv", required=True, help="where to store .shp file")
-def create_profile_features(dune_crest_shp, dune_toe_shp, sites_csv, profiles_csv, output_csv):
+def create_profile_features(
+    dune_crest_shp, dune_toe_shp, sites_csv, profiles_csv, output_csv
+):
    logger.info("Creating .csv of dune crests and toes")
    df_sites = pd.read_csv(sites_csv, index_col=[0])
    df_profiles = pd.read_csv(profiles_csv, index_col=[0, 1, 2])
-    df_profile_features = parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp)
+    df_profile_features = parse_profile_features(
+        df_sites, df_profiles, dune_crest_shp, dune_toe_shp
+    )
    df_profile_features.to_csv(output_csv)
    logger.info("Done!")
--- a/src/utils.py
+++ b/src/utils.py
@ -32,11 +32,16 @@ def crossings(profile_x, profile_z, constant_z):
    indicies = np.where(z[:-1] * z[1:] < 0)[0]

    # Use linear interpolation to find intersample crossings.
-    return [profile_x[i] - (profile_x[i] - profile_x[i + 1]) / (z[i] - z[i + 1]) * (z[i]) for i in indicies]
+    return [
+        profile_x[i] - (profile_x[i] - profile_x[i + 1]) / (z[i] - z[i + 1]) * (z[i])
+        for i in indicies
+    ]


 # TODO Think of a better way to do this than having to manually specify the coordinate systems
-def convert_coord_systems(g1, in_coord_system="EPSG:4326", out_coord_system="EPSG:28356"):
+def convert_coord_systems(
+    g1, in_coord_system="EPSG:4326", out_coord_system="EPSG:28356"
+):
    """
    Converts coordinates from one coordinates system to another. Needed because shapefiles are usually defined in
    lat/lon but should be converted to GDA to calculated distances.