From 4e5b0ce81c7af005bee5f1b2733b579bb529d349 Mon Sep 17 00:00:00 2001 From: Chris Leaman Date: Fri, 8 Feb 2019 16:55:11 +1100 Subject: [PATCH] Fix formatting --- src/analysis/compare_impacts.py | 18 +++-- src/analysis/forecasted_storm_impacts.py | 37 +++++++--- src/analysis/observed_storm_impacts.py | 77 +++++++++++++++----- src/data/csv_to_geojson.py | 82 +++++++++++++++------ src/data/parse_mat.py | 91 +++++++++++++++++++----- src/data/parse_shp.py | 45 +++++++++--- src/utils.py | 9 ++- 7 files changed, 275 insertions(+), 84 deletions(-) diff --git a/src/analysis/compare_impacts.py b/src/analysis/compare_impacts.py index 9894365..3f7ab1c 100644 --- a/src/analysis/compare_impacts.py +++ b/src/analysis/compare_impacts.py @@ -19,7 +19,10 @@ def compare_impacts(df_forecasted, df_observed): :return: """ df_compared = df_forecasted.merge( - df_observed, left_index=True, right_index=True, suffixes=["_forecasted", "_observed"] + df_observed, + left_index=True, + right_index=True, + suffixes=["_forecasted", "_observed"], ) return df_compared @@ -27,7 +30,14 @@ def compare_impacts(df_forecasted, df_observed): if __name__ == "__main__": logger.info("Importing existing data") data_folder = "./data/interim" - df_forecasted = pd.read_csv(os.path.join(data_folder, "impacts_forecasted_mean_slope_sto06.csv"), index_col=[0]) - df_observed = pd.read_csv(os.path.join(data_folder, "impacts_observed.csv"), index_col=[0]) + df_forecasted = pd.read_csv( + os.path.join(data_folder, "impacts_forecasted_mean_slope_sto06.csv"), + index_col=[0], + ) + df_observed = pd.read_csv( + os.path.join(data_folder, "impacts_observed.csv"), index_col=[0] + ) df_compared = compare_impacts(df_forecasted, df_observed) - df_compared.to_csv(os.path.join(data_folder, "impacts_observed_vs_forecasted_mean_slope_sto06.csv")) + df_compared.to_csv( + os.path.join(data_folder, "impacts_observed_vs_forecasted_mean_slope_sto06.csv") + ) diff --git a/src/analysis/forecasted_storm_impacts.py b/src/analysis/forecasted_storm_impacts.py index ca4de5e..e02c171 100644 --- a/src/analysis/forecasted_storm_impacts.py +++ b/src/analysis/forecasted_storm_impacts.py @@ -20,18 +20,24 @@ def forecasted_impacts(df_profile_features, df_forecasted_twl): """ logger.info("Getting forecasted storm impacts") - df_forecasted_impacts = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique()) + df_forecasted_impacts = pd.DataFrame( + index=df_profile_features.index.get_level_values("site_id").unique() + ) # For each site, find the maximum R_high value and the corresponding R_low value. idx = df_forecasted_twl.groupby(level=["site_id"])["R_high"].idxmax().dropna() - df_r_vals = df_forecasted_twl.loc[idx, ["R_high", "R_low"]].reset_index(["datetime"]) - df_forecasted_impacts = df_forecasted_impacts.merge(df_r_vals, how="left", left_index=True, right_index=True) + df_r_vals = df_forecasted_twl.loc[idx, ["R_high", "R_low"]].reset_index( + ["datetime"] + ) + df_forecasted_impacts = df_forecasted_impacts.merge( + df_r_vals, how="left", left_index=True, right_index=True + ) # Join with df_profile features to find dune toe and crest elevations df_forecasted_impacts = df_forecasted_impacts.merge( - df_profile_features.query("profile_type=='prestorm'").reset_index("profile_type")[ - ["dune_toe_z", "dune_crest_z"] - ], + df_profile_features.query("profile_type=='prestorm'").reset_index( + "profile_type" + )[["dune_toe_z", "dune_crest_z"]], how="left", left_on=["site_id"], right_on=["site_id"], @@ -71,7 +77,12 @@ def storm_regime(df_forecasted_impacts): return df_forecasted_impacts -def twl_exceedence_time(df_profile_features, df_forecasted_twl, z_twl_col="R_high", z_exceedence_col="dune_toe_z"): +def twl_exceedence_time( + df_profile_features, + df_forecasted_twl, + z_twl_col="R_high", + z_exceedence_col="dune_toe_z", +): """ Returns a dataframe of number of hours the twl exceeded a certain z elevation. May need to use this https://stackoverflow.com/a/53656968 if datetimes are not consistent. @@ -85,11 +96,15 @@ def twl_exceedence_time(df_profile_features, df_forecasted_twl, z_twl_col="R_hig # Get a dataframe of prestorm dune toes organised by site_id df_dune_toes = ( - df_profile_features.query('profile_type=="prestorm"').reset_index("profile_type")[z_exceedence_col].to_frame() + df_profile_features.query('profile_type=="prestorm"') + .reset_index("profile_type")[z_exceedence_col] + .to_frame() ) # Merge dune toes into site_id - df_merged = df_forecasted_twl.merge(df_dune_toes, left_on=["site_id"], right_on=["site_id"]) + df_merged = df_forecasted_twl.merge( + df_dune_toes, left_on=["site_id"], right_on=["site_id"] + ) # Return the sum of hours that twl exceeded the level return ( @@ -114,7 +129,9 @@ def create_forecasted_impacts(profile_features_csv, forecasted_twl_csv, output_f df_forecasted_impacts = forecasted_impacts(df_profile_features, df_forecasted_twl) df_forecasted_impacts = df_forecasted_impacts.merge( - twl_exceedence_time(df_profile_features, df_forecasted_twl), left_on=["site_id"], right_on=["site_id"] + twl_exceedence_time(df_profile_features, df_forecasted_twl), + left_on=["site_id"], + right_on=["site_id"], ) df_forecasted_impacts.to_csv(output_file) diff --git a/src/analysis/observed_storm_impacts.py b/src/analysis/observed_storm_impacts.py index e8537a2..b0653d5 100644 --- a/src/analysis/observed_storm_impacts.py +++ b/src/analysis/observed_storm_impacts.py @@ -30,12 +30,16 @@ def volume_change(df_profiles, df_profile_features, zone): """ logger.info("Calculating change in beach volume in {} zone".format(zone)) - df_vol_changes = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique()) + df_vol_changes = pd.DataFrame( + index=df_profile_features.index.get_level_values("site_id").unique() + ) df_profiles = df_profiles.sort_index() sites = df_profiles.groupby(level=["site_id"]) for site_id, df_site in sites: - logger.debug("Calculating change in beach volume at {} in {} zone".format(site_id, zone)) + logger.debug( + "Calculating change in beach volume at {} in {} zone".format(site_id, zone) + ) # TODO Change this query to an index query = "site_id=='{}'&profile_type=='prestorm'".format(site_id) @@ -51,7 +55,10 @@ def volume_change(df_profiles, df_profile_features, zone): prestorm_dune_toe_x = prestorm_dune_crest_x # If no prestorm and poststorm profiles, skip site and continue - profile_lengths = [len(df_site.query("profile_type == '{}'".format(x))) for x in ["prestorm", "poststorm"]] + profile_lengths = [ + len(df_site.query("profile_type == '{}'".format(x))) + for x in ["prestorm", "poststorm"] + ] if any([length == 0 for length in profile_lengths]): continue @@ -60,13 +67,21 @@ def volume_change(df_profiles, df_profile_features, zone): df_zone = df_site.dropna(subset=["z"]) x_last_obs = min( [ - max(df_zone.query("profile_type == '{}'".format(profile_type)).index.get_level_values("x")) + max( + df_zone.query( + "profile_type == '{}'".format(profile_type) + ).index.get_level_values("x") + ) for profile_type in ["prestorm", "poststorm"] ] ) x_first_obs = max( [ - min(df_zone.query("profile_type == '{}'".format(profile_type)).index.get_level_values("x")) + min( + df_zone.query( + "profile_type == '{}'".format(profile_type) + ).index.get_level_values("x") + ) for profile_type in ["prestorm", "poststorm"] ] ) @@ -100,16 +115,24 @@ def volume_change(df_profiles, df_profile_features, zone): # Volume change needs to be calculated including a tolerance for LIDAR accuracy. If difference between # profiles is less than 20 cm, consider them as zero difference. - prestorm_z = df_zone.query("profile_type=='prestorm'").reset_index("profile_type").z - poststorm_z = df_zone.query("profile_type=='poststorm'").reset_index("profile_type").z + prestorm_z = ( + df_zone.query("profile_type=='prestorm'").reset_index("profile_type").z + ) + poststorm_z = ( + df_zone.query("profile_type=='poststorm'").reset_index("profile_type").z + ) diff_z = prestorm_z - poststorm_z diff_z[abs(diff_z) < 0.2] = 0 - diff_vol = beach_volume(x=diff_z.index.get_level_values("x"), z=diff_z, x_min=x_min, x_max=x_max) + diff_vol = beach_volume( + x=diff_z.index.get_level_values("x"), z=diff_z, x_min=x_min, x_max=x_max + ) df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = prestorm_vol df_vol_changes.loc[site_id, "poststorm_{}_vol".format(zone)] = poststorm_vol df_vol_changes.loc[site_id, "{}_vol_change".format(zone)] = diff_vol - df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = diff_vol / prestorm_vol * 100 + df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = ( + diff_vol / prestorm_vol * 100 + ) return df_vol_changes @@ -142,15 +165,19 @@ def storm_regime(df_observed_impacts): """ logger.info("Getting observed storm regimes") - swash = (df_observed_impacts.dune_face_pct_change <= 2) & (df_observed_impacts.dune_face_vol_change <= 3) - collision = (df_observed_impacts.dune_face_pct_change >= 2) | (df_observed_impacts.dune_face_vol_change > 3) + swash = (df_observed_impacts.dune_face_pct_change <= 2) & ( + df_observed_impacts.dune_face_vol_change <= 3 + ) + collision = (df_observed_impacts.dune_face_pct_change >= 2) | ( + df_observed_impacts.dune_face_vol_change > 3 + ) df_observed_impacts.loc[swash, "storm_regime"] = "swash" df_observed_impacts.loc[collision, "storm_regime"] = "collision" - # TODO We may be able to identify observed regimes by looking at the change in crest and toe elevation. This would be useful for + # TODO We may be able to identify observed regimes by looking at the change in crest and toe elevation. This would be useful for # locations where we have overwash and cannot calculate the change in volume correctly. Otherwise, maybe it's better to put it in manually. - + return df_observed_impacts @@ -160,7 +187,9 @@ def overwrite_impacts(df_observed_impacts, df_raw_features): :param df_raw_profile_features: :return: """ - df_observed_impacts.update(df_raw_features.rename(columns={"observed_storm_regime": "storm_regime"})) + df_observed_impacts.update( + df_raw_features.rename(columns={"observed_storm_regime": "storm_regime"}) + ) return df_observed_impacts @@ -169,11 +198,15 @@ def overwrite_impacts(df_observed_impacts, df_raw_features): @click.option("--profile-features-crest-toes-csv", required=True, help="") @click.option("--raw-profile-features-csv", required=True, help="") @click.option("--output-file", required=True, help="") -def create_observed_impacts(profiles_csv, profile_features_crest_toes_csv, raw_profile_features_csv, output_file): +def create_observed_impacts( + profiles_csv, profile_features_crest_toes_csv, raw_profile_features_csv, output_file +): profiles_csv = "./data/interim/profiles.csv" profile_features_crest_toes_csv = "./data/interim/profile_features_crest_toes.csv" - raw_profile_features_csv = "./data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv" + raw_profile_features_csv = ( + "./data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv" + ) logger.info("Creating observed wave impacts") logger.info("Importing data") @@ -181,12 +214,18 @@ def create_observed_impacts(profiles_csv, profile_features_crest_toes_csv, raw_p df_profile_features = pd.read_csv(profile_features_crest_toes_csv, index_col=[0, 1]) logger.info("Creating new dataframe for observed impacts") - df_observed_impacts = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique()) + df_observed_impacts = pd.DataFrame( + index=df_profile_features.index.get_level_values("site_id").unique() + ) logger.info("Getting pre/post storm volumes") df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone="swash") - df_dune_face_vol_changes = volume_change(df_profiles, df_profile_features, zone="dune_face") - df_observed_impacts = df_observed_impacts.join([df_swash_vol_changes, df_dune_face_vol_changes]) + df_dune_face_vol_changes = volume_change( + df_profiles, df_profile_features, zone="dune_face" + ) + df_observed_impacts = df_observed_impacts.join( + [df_swash_vol_changes, df_dune_face_vol_changes] + ) # Classify regime based on volume changes df_observed_impacts = storm_regime(df_observed_impacts) diff --git a/src/data/csv_to_geojson.py b/src/data/csv_to_geojson.py index e7ca651..a339e13 100644 --- a/src/data/csv_to_geojson.py +++ b/src/data/csv_to_geojson.py @@ -16,7 +16,9 @@ from logs import setup_logging logger = setup_logging() -def lat_lon_from_profile_x_coord(center_lat_lon, orientation, center_profile_x, x_coord): +def lat_lon_from_profile_x_coord( + center_lat_lon, orientation, center_profile_x, x_coord +): """ Returns the lat/lon of a point on a profile with the given x_coord :param center_lat_lon: Shapely point of lat/lon of profile center @@ -31,7 +33,9 @@ def lat_lon_from_profile_x_coord(center_lat_lon, orientation, center_profile_x, point_x = center_x + (center_profile_x - x_coord) * np.cos(np.deg2rad(orientation)) point_y = center_y + (center_profile_x - x_coord) * np.sin(np.deg2rad(orientation)) point_xy = Point(point_x, point_y) - point_lat_lon = convert_coord_systems(point_xy, in_coord_system="EPSG:28356", out_coord_system="EPSG:4326") + point_lat_lon = convert_coord_systems( + point_xy, in_coord_system="EPSG:28356", out_coord_system="EPSG:4326" + ) return point_lat_lon @@ -41,7 +45,9 @@ def lat_lon_from_profile_x_coord(center_lat_lon, orientation, center_profile_x, @click.option("--crest-toes-csv", required=True, help=".csv file to convert") @click.option("--impacts-csv", required=True, help=".csv file to convert") @click.option("--output-geojson", required=True, help="where to store .geojson file") -def R_high_to_geojson(sites_csv, profiles_csv, crest_toes_csv, impacts_csv, output_geojson): +def R_high_to_geojson( + sites_csv, profiles_csv, crest_toes_csv, impacts_csv, output_geojson +): """ Converts impact R_high into a lat/lon geojson that we can plot in QGIS :param sites_csv: @@ -58,10 +64,14 @@ def R_high_to_geojson(sites_csv, profiles_csv, crest_toes_csv, impacts_csv, outp # Create geojson file schema = { "geometry": "Point", - "properties": OrderedDict([("beach", "str"), ("site_id", "str"), ("elevation", "float")]), + "properties": OrderedDict( + [("beach", "str"), ("site_id", "str"), ("elevation", "float")] + ), } - with fiona.open(output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema) as output: + with fiona.open( + output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema + ) as output: for index, row in df_impacts.iterrows(): site_id = index @@ -72,12 +82,18 @@ def R_high_to_geojson(sites_csv, profiles_csv, crest_toes_csv, impacts_csv, outp # Get poststorm profile df_profile = df_profiles.loc[(site_id, "prestorm")] - int_x = crossings(df_profile.index.get_level_values("x").tolist(), df_profile.z.tolist(), R_high_z) + int_x = crossings( + df_profile.index.get_level_values("x").tolist(), + df_profile.z.tolist(), + R_high_z, + ) # Take the intersection closest to the dune face. try: - x_cols = [x for x in df_crest_toes.columns if '_x' in x] - dune_face_x = np.mean(df_crest_toes.loc[(site_id, "prestorm"),x_cols].tolist()) + x_cols = [x for x in df_crest_toes.columns if "_x" in x] + dune_face_x = np.mean( + df_crest_toes.loc[(site_id, "prestorm"), x_cols].tolist() + ) int_x = min(int_x, key=lambda x: abs(x - dune_face_x)) except: continue @@ -91,7 +107,9 @@ def R_high_to_geojson(sites_csv, profiles_csv, crest_toes_csv, impacts_csv, outp x_coord=int_x, ) - prop = OrderedDict([("beach", beach), ("site_id", site_id), ("elevation", R_high_z)]) + prop = OrderedDict( + [("beach", beach), ("site_id", site_id), ("elevation", R_high_z)] + ) output.write({"geometry": mapping(point_lat_lon), "properties": prop}) @@ -99,7 +117,9 @@ def R_high_to_geojson(sites_csv, profiles_csv, crest_toes_csv, impacts_csv, outp @click.option("--sites-csv", required=True, help=".csv file to convert") @click.option("--profile-features-csv", required=True, help=".csv file to convert") @click.option("--output-geojson", required=True, help="where to store .geojson file") -def profile_features_crest_toes_to_geojson(sites_csv, profile_features_csv, output_geojson): +def profile_features_crest_toes_to_geojson( + sites_csv, profile_features_csv, output_geojson +): """ Converts profile_features containing dune toes and crest locations to a geojson we can load into QGIS :param sites_csv: @@ -131,7 +151,9 @@ def profile_features_crest_toes_to_geojson(sites_csv, profile_features_csv, outp ), } - with fiona.open(output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema) as output: + with fiona.open( + output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema + ) as output: for index, row in df_profile_features.iterrows(): beach = index[:-4] site_id = index @@ -185,9 +207,14 @@ def sites_csv_to_geojson(input_csv, output_geojson): df_sites = pd.read_csv(input_csv, index_col=[0]) logger.info(os.environ.get("GDAL_DATA", None)) - schema = {"geometry": "LineString", "properties": OrderedDict([("beach", "str"), ("site_id", "str")])} + schema = { + "geometry": "LineString", + "properties": OrderedDict([("beach", "str"), ("site_id", "str")]), + } - with fiona.open(output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema) as output: + with fiona.open( + output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema + ) as output: for index, row in df_sites.iterrows(): center_lat_lon = Point(row["lon"], row["lat"]) @@ -215,10 +242,16 @@ def sites_csv_to_geojson(input_csv, output_geojson): @click.command() @click.option("--sites-csv", required=True, help="sites.csv file to convert") -@click.option("--observed-impacts-csv", required=True, help="impacts-observed.csv file to convert") -@click.option("--forecast-impacts-csv", required=True, help="impacts-forecast.csv file to convert") +@click.option( + "--observed-impacts-csv", required=True, help="impacts-observed.csv file to convert" +) +@click.option( + "--forecast-impacts-csv", required=True, help="impacts-forecast.csv file to convert" +) @click.option("--output-geojson", required=True, help="where to store .geojson file") -def impacts_to_geojson(sites_csv, observed_impacts_csv, forecast_impacts_csv, output_geojson): +def impacts_to_geojson( + sites_csv, observed_impacts_csv, forecast_impacts_csv, output_geojson +): """ Converts impacts observed and forecasted to a geojson for visualization in QGIS :param sites_csv: @@ -231,7 +264,9 @@ def impacts_to_geojson(sites_csv, observed_impacts_csv, forecast_impacts_csv, ou # Get information from .csv and read into pandas dataframe df_sites = pd.read_csv(sites_csv, index_col=[0]) df_observed = pd.read_csv(observed_impacts_csv, index_col=[0]) - df_forecast = pd.read_csv(forecast_impacts_csv, index_col=[0]).rename({"storm_regime": "forecast_storm_regime"}) + df_forecast = pd.read_csv(forecast_impacts_csv, index_col=[0]).rename( + {"storm_regime": "forecast_storm_regime"} + ) # Rename columns, so we can distinguish between forecast and observed df_observed = df_observed.rename(columns={"storm_regime": "observed_storm_regime"}) @@ -241,7 +276,9 @@ def impacts_to_geojson(sites_csv, observed_impacts_csv, forecast_impacts_csv, ou df = pd.concat([df_sites, df_observed, df_forecast], sort=True, axis=1) # Make new column for accuracy of forecast. Use underpredict/correct/overpredict classes - df.loc[df.observed_storm_regime == df.forecast_storm_regime, "forecast_accuray"] = "correct" + df.loc[ + df.observed_storm_regime == df.forecast_storm_regime, "forecast_accuray" + ] = "correct" # Observed/Forecasted/Class for each combination classes = [ @@ -256,7 +293,10 @@ def impacts_to_geojson(sites_csv, observed_impacts_csv, forecast_impacts_csv, ou ("overwash", "overwash", "correct"), ] for c in classes: - df.loc[(df.observed_storm_regime == c[0]) & (df.forecast_storm_regime == c[1]), "forecast_accuracy"] = c[2] + df.loc[ + (df.observed_storm_regime == c[0]) & (df.forecast_storm_regime == c[1]), + "forecast_accuracy", + ] = c[2] schema = { "geometry": "Point", @@ -271,7 +311,9 @@ def impacts_to_geojson(sites_csv, observed_impacts_csv, forecast_impacts_csv, ou ), } - with fiona.open(output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema) as output: + with fiona.open( + output_geojson, "w", driver="GeoJSON", crs=from_epsg(4326), schema=schema + ) as output: for index, row in df.iterrows(): # Locate the marker at the seaward end of the profile to avoid cluttering the coastline. diff --git a/src/data/parse_mat.py b/src/data/parse_mat.py index 10a3868..ffec095 100644 --- a/src/data/parse_mat.py +++ b/src/data/parse_mat.py @@ -27,10 +27,19 @@ def parse_crest_toes(df_raw_features, df_profiles): # Puts profiles_features_csv into format expected by rest of analysis df_crest_toes = df_raw_features.reset_index().melt( id_vars=["site_id"], - value_vars=["prestorm_dune_crest_x", "prestorm_dune_toe_x", "poststorm_dune_crest_x", "poststorm_dune_toe_x"], + value_vars=[ + "prestorm_dune_crest_x", + "prestorm_dune_toe_x", + "poststorm_dune_crest_x", + "poststorm_dune_toe_x", + ], + ) + df_crest_toes["profile_type"] = df_crest_toes.variable.str.extract( + r"(prestorm|poststorm)" + ) + df_crest_toes["point_type"] = df_crest_toes.variable.str.extract( + r"(dune_crest_x|dune_toe_x)" ) - df_crest_toes["profile_type"] = df_crest_toes.variable.str.extract(r"(prestorm|poststorm)") - df_crest_toes["point_type"] = df_crest_toes.variable.str.extract(r"(dune_crest_x|dune_toe_x)") df_crest_toes = df_crest_toes.drop(columns=["variable"]) df_crest_toes = df_crest_toes.sort_values("site_id") df_crest_toes = df_crest_toes.set_index(["site_id", "profile_type", "point_type"]) @@ -52,7 +61,9 @@ def parse_crest_toes(df_raw_features, df_profiles): x_val = df_crest_toes.loc[(site_id, param), "dune_{}_x".format(loc)] if np.isnan(x_val): - df_crest_toes.loc[(site_id, param), "dune_{}_z".format(loc)] = np.nan + df_crest_toes.loc[ + (site_id, param), "dune_{}_z".format(loc) + ] = np.nan continue # Try get the value from the other profile if we return nan or empty dataframe @@ -121,7 +132,9 @@ def parse_dune_crest_toes(df_sites, crest_mat, toe_mat): # Want the site_id instead of the site_no, so merge in df_sites df_sites.reset_index(inplace=True) - df_profile_features = df_sites[["site_no", "site_id"]].merge(df_profile_features, how="outer", on=["site_no"]) + df_profile_features = df_sites[["site_no", "site_id"]].merge( + df_profile_features, how="outer", on=["site_no"] + ) df_profile_features.drop(columns=["site_no"], inplace=True) df_profile_features.set_index(["site_id", "profile_type"], inplace=True) df_profile_features.sort_index(inplace=True) @@ -257,7 +270,9 @@ def parse_profiles_and_sites(profiles_mat): z = z_site[j] land_lim = mat_data["landlims"][i][j] - survey_datetime = matlab_datenum_to_datetime(mat_data["surveydates"][i][j]) + survey_datetime = matlab_datenum_to_datetime( + mat_data["surveydates"][i][j] + ) # Keep a record of the where the center of the profile is located, and the locations of the land # and sea @@ -333,7 +348,9 @@ def remove_zeros(df_profiles): ) df_profile = df_profiles[idx_site] x_last_ele = df_profile[df_profile.z == 0].index.get_level_values("x")[0] - df_profiles.loc[idx_site & (df_profiles.index.get_level_values("x") > x_last_ele), "z"] = np.nan + df_profiles.loc[ + idx_site & (df_profiles.index.get_level_values("x") > x_last_ele), "z" + ] = np.nan logger.info("Removed zeros from end of profiles") return df_profiles @@ -345,7 +362,11 @@ def matlab_datenum_to_datetime(matlab_datenum): :param matlab_datenum: :return: """ - return datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum % 1) - timedelta(days=366) + return ( + datetime.fromordinal(int(matlab_datenum)) + + timedelta(days=matlab_datenum % 1) + - timedelta(days=366) + ) def replace_unique_sites(df, df_sites): @@ -359,7 +380,10 @@ def replace_unique_sites(df, df_sites): df_sites["site_id"] = df_sites.index.get_level_values("site_id") # Create eastings and northings so we can calculate distances - site_points = [convert_coord_systems(Point(lon, lat)).xy for lon, lat in zip(df_sites["lon"], df_sites["lat"])] + site_points = [ + convert_coord_systems(Point(lon, lat)).xy + for lon, lat in zip(df_sites["lon"], df_sites["lat"]) + ] df_sites["easting"] = [x[0][0] for x in site_points] df_sites["northing"] = [x[1][0] for x in site_points] @@ -369,22 +393,39 @@ def replace_unique_sites(df, df_sites): # Calculate distances from each point to each site and determine closest site easting, northing = [x[0] for x in convert_coord_systems(Point(lon, lat)).xy] - distances_to_sites = np.sqrt((df_sites["easting"] - easting) ** 2 + (df_sites["northing"] - northing) ** 2) + distances_to_sites = np.sqrt( + (df_sites["easting"] - easting) ** 2 + + (df_sites["northing"] - northing) ** 2 + ) min_distance = distances_to_sites.min() closest_site = distances_to_sites.idxmin() # Do some logging so we can check later. if min_distance > 1: - logger.warning("Closest site to (%.4f,%.4f) is %s (%.2f m away)", lat, lon, closest_site, min_distance) + logger.warning( + "Closest site to (%.4f,%.4f) is %s (%.2f m away)", + lat, + lon, + closest_site, + min_distance, + ) else: - logger.info("Closest site to (%.4f,%.4f) is %s (%.2f m away)", lat, lon, closest_site, min_distance) + logger.info( + "Closest site to (%.4f,%.4f) is %s (%.2f m away)", + lat, + lon, + closest_site, + min_distance, + ) # Assign site_id based on closest site df.loc[df_group.index, "site_id"] = closest_site nan_count = df.site_id.isna().sum() if nan_count > 0: - logger.warning("Not all records (%d of %d) matched with a unique site", nan_count, len(df)) + logger.warning( + "Not all records (%d of %d) matched with a unique site", nan_count, len(df) + ) df = df.drop(columns=["lat", "lon", "beach"]) @@ -393,7 +434,9 @@ def replace_unique_sites(df, df_sites): @click.command(short_help="create waves.csv") @click.option("--waves-mat", required=True, help=".mat file containing wave records") -@click.option("--sites-csv", required=True, help=".csv file description of cross section sites") +@click.option( + "--sites-csv", required=True, help=".csv file description of cross section sites" +) @click.option("--output-file", required=True, help="where to save waves.csv") def create_waves_csv(waves_mat, sites_csv, output_file): logger.info("Creating %s", output_file) @@ -420,7 +463,9 @@ def create_waves_csv(waves_mat, sites_csv, output_file): @click.command(short_help="create profile_features.csv") -@click.option("--profile-features-csv", required=True, help=".mat file containing wave records") +@click.option( + "--profile-features-csv", required=True, help=".mat file containing wave records" +) @click.option("--profiles-csv", required=True, help=".mat file containing wave records") @click.option("--output-file", required=True, help="where to save waves.csv") def create_crest_toes(profile_features_csv, profiles_csv, output_file): @@ -435,10 +480,16 @@ def create_crest_toes(profile_features_csv, profiles_csv, output_file): @click.command(short_help="create profiles.csv") -@click.option("--profiles-mat", required=True, help=".mat file containing beach profiles") -@click.option("--profiles-output-file", required=True, help="where to save profiles.csv") +@click.option( + "--profiles-mat", required=True, help=".mat file containing beach profiles" +) +@click.option( + "--profiles-output-file", required=True, help="where to save profiles.csv" +) @click.option("--sites-output-file", required=True, help="where to save sites.csv") -def create_sites_and_profiles_csv(profiles_mat, profiles_output_file, sites_output_file): +def create_sites_and_profiles_csv( + profiles_mat, profiles_output_file, sites_output_file +): logger.info("Creating sites and profiles csvs") df_profiles, df_sites = parse_profiles_and_sites(profiles_mat=profiles_mat) df_profiles.set_index(["site_id", "profile_type", "x"], inplace=True) @@ -456,7 +507,9 @@ def create_sites_and_profiles_csv(profiles_mat, profiles_output_file, sites_outp @click.command(short_help="create profiles.csv") @click.option("--tides-mat", required=True, help=".mat file containing tides") -@click.option("--sites-csv", required=True, help=".csv file description of cross section sites") +@click.option( + "--sites-csv", required=True, help=".csv file description of cross section sites" +) @click.option("--output-file", required=True, help="where to save tides.csv") def create_tides_csv(tides_mat, sites_csv, output_file): logger.info("Creating %s", output_file) diff --git a/src/data/parse_shp.py b/src/data/parse_shp.py index 00a7f17..25a4cfa 100644 --- a/src/data/parse_shp.py +++ b/src/data/parse_shp.py @@ -31,7 +31,9 @@ def shapes_from_shp(shp_file): return shapes, ids, properties -def distance_to_intersection(lat, lon, landward_orientation, beach, line_strings, line_properties): +def distance_to_intersection( + lat, lon, landward_orientation, beach, line_strings, line_properties +): """ Returns the distance at whjch a line drawn from a lat/lon at an orientation intersects a line stinrg :param lat: @@ -58,7 +60,9 @@ def distance_to_intersection(lat, lon, landward_orientation, beach, line_strings seaward_line = LineString([start_point, seaward_point]) # Look at relevant line_strings which have the same beach property in order to reduce computation time - line_strings = [s for s, p in zip(line_strings, line_properties) if p["beach"] == beach] + line_strings = [ + s for s, p in zip(line_strings, line_properties) if p["beach"] == beach + ] # Check whether profile_line intersects with any lines in line_string. If intersection point is landwards, # consider this negative, otherwise seawards is positive. @@ -89,7 +93,9 @@ def beach_profile_elevation(x_coord, df_profiles, profile_type, site_id): return None # Get profile - df_profile = df_profiles.query('profile_type == "{}" and site_id =="{}"'.format(profile_type, site_id)) + df_profile = df_profiles.query( + 'profile_type == "{}" and site_id =="{}"'.format(profile_type, site_id) + ) return np.interp(x_coord, df_profile.index.get_level_values("x"), df_profile["z"]) @@ -102,7 +108,10 @@ def parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp): # Get site information. Base our profile features on each site df_profile_features = df_sites - features = {"dune_crest": {"file": dune_crest_shp}, "dune_toe": {"file": dune_toe_shp}} + features = { + "dune_crest": {"file": dune_crest_shp}, + "dune_toe": {"file": dune_toe_shp}, + } # Import our dune crest and toes for feat in features.keys(): @@ -112,19 +121,31 @@ def parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp): # Figure out the x coordinates of our crest and toes, by looking at where our beach sections intersect our # shape files. col_name = "{}_x".format(feat) - df_profile_features[col_name] = df_profile_features["profile_x_lat_lon"] + df_profile_features.apply( + df_profile_features[col_name] = df_profile_features[ + "profile_x_lat_lon" + ] + df_profile_features.apply( lambda row: distance_to_intersection( - row["lat"], row["lon"], row["orientation"], row["beach"], shapes, properties + row["lat"], + row["lon"], + row["orientation"], + row["beach"], + shapes, + properties, ), axis=1, ) # Get the elevations of the crest and toe col_name = "{}_z".format(feat) df_profile_features[col_name] = df_profile_features.apply( - lambda row: beach_profile_elevation(row["{}_x".format(feat)], df_profiles, "prestorm", row.name), axis=1 + lambda row: beach_profile_elevation( + row["{}_x".format(feat)], df_profiles, "prestorm", row.name + ), + axis=1, ) - df_profile_features = df_profile_features.drop(columns=["beach", "lat", "lon", "orientation", "profile_x_lat_lon"]) + df_profile_features = df_profile_features.drop( + columns=["beach", "lat", "lon", "orientation", "profile_x_lat_lon"] + ) return df_profile_features @@ -134,10 +155,14 @@ def parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp): @click.option("--sites-csv", required=True, help="where to store .shp file") @click.option("--profiles-csv", required=True, help="where to store .shp file") @click.option("--output-csv", required=True, help="where to store .shp file") -def create_profile_features(dune_crest_shp, dune_toe_shp, sites_csv, profiles_csv, output_csv): +def create_profile_features( + dune_crest_shp, dune_toe_shp, sites_csv, profiles_csv, output_csv +): logger.info("Creating .csv of dune crests and toes") df_sites = pd.read_csv(sites_csv, index_col=[0]) df_profiles = pd.read_csv(profiles_csv, index_col=[0, 1, 2]) - df_profile_features = parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp) + df_profile_features = parse_profile_features( + df_sites, df_profiles, dune_crest_shp, dune_toe_shp + ) df_profile_features.to_csv(output_csv) logger.info("Done!") diff --git a/src/utils.py b/src/utils.py index 0809ad5..f375ea6 100644 --- a/src/utils.py +++ b/src/utils.py @@ -32,11 +32,16 @@ def crossings(profile_x, profile_z, constant_z): indicies = np.where(z[:-1] * z[1:] < 0)[0] # Use linear interpolation to find intersample crossings. - return [profile_x[i] - (profile_x[i] - profile_x[i + 1]) / (z[i] - z[i + 1]) * (z[i]) for i in indicies] + return [ + profile_x[i] - (profile_x[i] - profile_x[i + 1]) / (z[i] - z[i + 1]) * (z[i]) + for i in indicies + ] # TODO Think of a better way to do this than having to manually specify the coordinate systems -def convert_coord_systems(g1, in_coord_system="EPSG:4326", out_coord_system="EPSG:28356"): +def convert_coord_systems( + g1, in_coord_system="EPSG:4326", out_coord_system="EPSG:28356" +): """ Converts coordinates from one coordinates system to another. Needed because shapefiles are usually defined in lat/lon but should be converted to GDA to calculated distances.