diff --git a/Makefile b/Makefile index b3bb230..1196d12 100644 --- a/Makefile +++ b/Makefile @@ -102,12 +102,17 @@ impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv ./data/inte # --output-csv "./data/interim/profile_features.csv" # Create a .csv of our dune toe and crest profile features from Tom Beuzen's .mat file +# Also apply an overwrite of some values, using an excel sheet ./data/interim/profile_features.csv: ./data/raw/profile_features_tom_beuzen/*.mat ./data/interim/sites.csv activate ./.venv && python ./src/cli.py create-profile-features \ --crest-mat "./data/raw/profile_features_tom_beuzen/J16_DuneCrest.mat" \ --toe-mat "./data/raw/profile_features_tom_beuzen/J16_DuneToe.mat" \ --sites-csv "./data/interim/sites.csv" \ - --output-file "./data/interim/profile_features.csv" + --output-file "./data/interim/profile_features.csv" \ + && python ./src/cli.py apply-profile-features-overwrite \ + --interim_file "./data/interim/profile_features.csv" \ + --overwrite_file "./data/raw/profile_features_chris_leaman/profile_features_chris_leaman.xlsx" \ + --profile_file "./data/interim/profiles.csv" # Creates a forecast of twl using sto06 and prestorm time varying prestorm foreshore slope ./data/interim/twl_foreshore_slope_sto06.csv: ./data/interim/waves.csv ./data/interim/tides.csv ./data/interim/profiles.csv ./data/interim/sites.csv ./data/interim/profile_features.csv diff --git a/src/cli.py b/src/cli.py index e924453..0323345 100644 --- a/src/cli.py +++ b/src/cli.py @@ -10,6 +10,7 @@ import data.csv_to_shp as csv_to_shp import analysis.forecast_twl as forecast_twl import analysis.forecasted_storm_impacts as forecasted_storm_impacts import analysis.observed_storm_impacts as observed_storm_impacts +import data.apply_manual_overwrites as apply_manual_overwrites # Disable numpy warnings import warnings @@ -32,4 +33,5 @@ if __name__ == "__main__": cli.add_command(forecast_twl.create_twl_forecast) cli.add_command(forecasted_storm_impacts.create_forecasted_impacts) cli.add_command(observed_storm_impacts.create_observed_impacts) + cli.add_command(apply_manual_overwrites.apply_profile_features_overwrite) cli() diff --git a/src/data/apply_manual_overwrites.py b/src/data/apply_manual_overwrites.py new file mode 100644 index 0000000..633cc63 --- /dev/null +++ b/src/data/apply_manual_overwrites.py @@ -0,0 +1,103 @@ +""" +After generating interim data files based on raw data, we may need to overwrite some rows with manual data. +""" + +import pandas as pd +import numpy as np +import click +from utils import setup_logging + +logger = setup_logging() + + +def overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite=True): + """ + Overwrite the interim profile features file with an excel file. + :param interim_file: Should be './data/interim/profile_features.csv' + :param overwrite_file: Should be './data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv' + :param overwrite: Whether or not to overwrite the original interim_file. If false, file will not be written + :return: + """ + + # Merge + df_merged = df_interim.merge(df_overwrite, left_index=True, right_index=True, suffixes=["", "_overwrite"]) + + # Remove x vals if overwrite file as remove + df_merged.loc[df_merged.dune_crest_x_overwrite == "remove", "dune_crest_x"] = np.nan + df_merged.loc[df_merged.dune_toe_x_overwrite == "remove", "dune_toe_x"] = np.nan + + # Put in new x vals. Note that a NaN value in the overwrite column, means keep the original value. + idx = (df_merged.dune_crest_x_overwrite.notnull()) & (df_merged.dune_crest_x_overwrite != "remove") + df_merged.loc[idx, "dune_crest_x"] = df_merged.loc[idx, "dune_crest_x_overwrite"] + + idx = (df_merged.dune_toe_x_overwrite.notnull()) & (df_merged.dune_toe_x_overwrite != "remove") + df_merged.loc[idx, "dune_toe_x"] = df_merged.loc[idx, "dune_toe_x_overwrite"] + + # Recalculate z values from x coordinates + for site_id in df_merged.index.get_level_values("site_id").unique(): + + logger.info("Overwriting dune crest/toes with manual values: {}".format(site_id)) + + # Get profiles + df_profile = df_profiles.query('site_id=="{}"'.format(site_id)) + + for param in ["prestorm", "poststorm"]: + for loc in ["crest", "toe"]: + + # Get x value to find corresponding z value + x_val = df_merged.loc[(site_id, param), "dune_{}_x".format(loc)] + if np.isnan(x_val): + df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = np.nan + continue + + # Get the corresponding z value for our x value + query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, param, x_val) + + # Try get the value from the other profile if we return nan or empty dataframe + if df_profile.query(query).empty: + if param == "prestorm": + query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "poststorm", x_val) + elif param == "poststorm": + query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "prestorm", x_val) + z_val = df_profile.query(query).iloc[0].z + + else: + z_val = df_profile.query(query).iloc[0].z + + # Put results back into merged dataframe + df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = z_val + + # Drop columns + df_merged = df_merged.drop(columns=["dune_crest_x_overwrite", "dune_toe_x_overwrite", "comment"], errors="ignore") + + # Merge back into interim data frame. Use concat/duplicates since .update will not update nan values + df_final = pd.concat([df_merged, df_interim]) + df_final = df_final[~df_final.index.duplicated(keep="first")] + df_final = df_final.sort_index() + + # Write to file + return df_final + + +@click.command(short_help="overwrite profile_features with manual excel sheet") +@click.option("--interim_file", required=True, help="path of profile_features.csv") +@click.option("--overwrite_file", required=True, help="path of excel file with overwrite data") +@click.option("--profile_file", required=True, help="path of profiles.csv") +@click.option("--overwrite/--no-overwrite", default=True) +def apply_profile_features_overwrite(interim_file, overwrite_file, profile_file, overwrite): + logger.info("Overwriting profile features with manual excel file") + + # Load files + df_interim = pd.read_csv(interim_file, index_col=[0, 1]) + df_overwrite = pd.read_excel(overwrite_file) + df_profiles = pd.read_csv(profile_file, index_col=[0, 1, 2]) + if "site_id" in df_overwrite.columns and "profile_type" in df_overwrite.columns: + df_overwrite = df_overwrite.set_index(["site_id", "profile_type"]) + + # Replace interim values with overwrite values + df_interim = overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite) + + # Write to csv + df_interim.to_csv(interim_file, float_format="%.3f") + + logger.info("Done!")