Add override of profile features

7 years ago · 4b9e69f2f0
parent 12070a1acf
commit 4b9e69f2f0
3 changed files with 111 additions and 1 deletions
--- a/7
+++ b/7
@ -102,12 +102,17 @@ impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv ./data/inte
 # 	--output-csv "./data/interim/profile_features.csv"
 # Create a .csv of our dune toe and crest profile features from Tom Beuzen's .mat file
 # Also apply an overwrite of some values, using an excel sheet
 ./data/interim/profile_features.csv: ./data/raw/profile_features_tom_beuzen/*.mat ./data/interim/sites.csv
 	activate ./.venv && python ./src/cli.py create-profile-features \
 	--crest-mat "./data/raw/profile_features_tom_beuzen/J16_DuneCrest.mat" \
 	--toe-mat "./data/raw/profile_features_tom_beuzen/J16_DuneToe.mat" \
 	--sites-csv "./data/interim/sites.csv" \
-	--output-file "./data/interim/profile_features.csv"
+	--output-file "./data/interim/profile_features.csv" \
 	&& python ./src/cli.py apply-profile-features-overwrite \
 	--interim_file "./data/interim/profile_features.csv" \
 	--overwrite_file "./data/raw/profile_features_chris_leaman/profile_features_chris_leaman.xlsx" \
 	--profile_file "./data/interim/profiles.csv"
 # Creates a forecast of twl using sto06 and prestorm time varying prestorm foreshore slope
 ./data/interim/twl_foreshore_slope_sto06.csv: ./data/interim/waves.csv ./data/interim/tides.csv ./data/interim/profiles.csv ./data/interim/sites.csv ./data/interim/profile_features.csv
--- a/src/cli.py
+++ b/src/cli.py
@ -10,6 +10,7 @@ import data.csv_to_shp as csv_to_shp
 import analysis.forecast_twl as forecast_twl
 import analysis.forecasted_storm_impacts as forecasted_storm_impacts
 import analysis.observed_storm_impacts as observed_storm_impacts
 import data.apply_manual_overwrites as apply_manual_overwrites
 # Disable numpy warnings
 import warnings
@ -32,4 +33,5 @@ if __name__ == "__main__":
    cli.add_command(forecast_twl.create_twl_forecast)
    cli.add_command(forecasted_storm_impacts.create_forecasted_impacts)
    cli.add_command(observed_storm_impacts.create_observed_impacts)
    cli.add_command(apply_manual_overwrites.apply_profile_features_overwrite)
    cli()
--- a/src/data/apply_manual_overwrites.py
+++ b/src/data/apply_manual_overwrites.py
@ -0,0 +1,103 @@
 """
 After generating interim data files based on raw data, we may need to overwrite some rows with manual data.
 """
 import pandas as pd
 import numpy as np
 import click
 from utils import setup_logging
 logger = setup_logging()
 def overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite=True):
    """
    Overwrite the interim profile features file with an excel file.
    :param interim_file: Should be './data/interim/profile_features.csv'
    :param overwrite_file: Should be './data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv'
    :param overwrite: Whether or not to overwrite the original interim_file. If false, file will not be written
    :return:
    """
    # Merge
    df_merged = df_interim.merge(df_overwrite, left_index=True, right_index=True, suffixes=["", "_overwrite"])
    # Remove x vals if overwrite file as remove
    df_merged.loc[df_merged.dune_crest_x_overwrite == "remove", "dune_crest_x"] = np.nan
    df_merged.loc[df_merged.dune_toe_x_overwrite == "remove", "dune_toe_x"] = np.nan
    # Put in new x vals. Note that a NaN value in the overwrite column, means keep the original value.
    idx = (df_merged.dune_crest_x_overwrite.notnull()) & (df_merged.dune_crest_x_overwrite != "remove")
    df_merged.loc[idx, "dune_crest_x"] = df_merged.loc[idx, "dune_crest_x_overwrite"]
    idx = (df_merged.dune_toe_x_overwrite.notnull()) & (df_merged.dune_toe_x_overwrite != "remove")
    df_merged.loc[idx, "dune_toe_x"] = df_merged.loc[idx, "dune_toe_x_overwrite"]
    # Recalculate z values from x coordinates
    for site_id in df_merged.index.get_level_values("site_id").unique():
        logger.info("Overwriting dune crest/toes with manual values: {}".format(site_id))
        # Get profiles
        df_profile = df_profiles.query('site_id=="{}"'.format(site_id))
        for param in ["prestorm", "poststorm"]:
            for loc in ["crest", "toe"]:
                # Get x value to find corresponding z value
                x_val = df_merged.loc[(site_id, param), "dune_{}_x".format(loc)]
                if np.isnan(x_val):
                    df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = np.nan
                    continue
                # Get the corresponding z value for our x value
                query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, param, x_val)
                # Try get the value from the other profile if we return nan or empty dataframe
                if df_profile.query(query).empty:
                    if param == "prestorm":
                        query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "poststorm", x_val)
                    elif param == "poststorm":
                        query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "prestorm", x_val)
                    z_val = df_profile.query(query).iloc[0].z
                else:
                    z_val = df_profile.query(query).iloc[0].z
                # Put results back into merged dataframe
                df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = z_val
    # Drop columns
    df_merged = df_merged.drop(columns=["dune_crest_x_overwrite", "dune_toe_x_overwrite", "comment"], errors="ignore")
    # Merge back into interim data frame. Use concat/duplicates since .update will not update nan values
    df_final = pd.concat([df_merged, df_interim])
    df_final = df_final[~df_final.index.duplicated(keep="first")]
    df_final = df_final.sort_index()
    # Write to file
    return df_final
@click.command(short_help="overwrite profile_features with manual excel sheet")
@click.option("--interim_file", required=True, help="path of profile_features.csv")
@click.option("--overwrite_file", required=True, help="path of excel file with overwrite data")
@click.option("--profile_file", required=True, help="path of profiles.csv")
@click.option("--overwrite/--no-overwrite", default=True)
 def apply_profile_features_overwrite(interim_file, overwrite_file, profile_file, overwrite):
    logger.info("Overwriting profile features with manual excel file")
    # Load files
    df_interim = pd.read_csv(interim_file, index_col=[0, 1])
    df_overwrite = pd.read_excel(overwrite_file)
    df_profiles = pd.read_csv(profile_file, index_col=[0, 1, 2])
    if "site_id" in df_overwrite.columns and "profile_type" in df_overwrite.columns:
        df_overwrite = df_overwrite.set_index(["site_id", "profile_type"])
    # Replace interim values with overwrite values
    df_interim = overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite)
    # Write to csv
    df_interim.to_csv(interim_file, float_format="%.3f")
    logger.info("Done!")