Add override of profile features
parent
12070a1acf
commit
4b9e69f2f0
@ -0,0 +1,103 @@
|
||||
"""
|
||||
After generating interim data files based on raw data, we may need to overwrite some rows with manual data.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import click
|
||||
from utils import setup_logging
|
||||
|
||||
logger = setup_logging()
|
||||
|
||||
|
||||
def overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite=True):
|
||||
"""
|
||||
Overwrite the interim profile features file with an excel file.
|
||||
:param interim_file: Should be './data/interim/profile_features.csv'
|
||||
:param overwrite_file: Should be './data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv'
|
||||
:param overwrite: Whether or not to overwrite the original interim_file. If false, file will not be written
|
||||
:return:
|
||||
"""
|
||||
|
||||
# Merge
|
||||
df_merged = df_interim.merge(df_overwrite, left_index=True, right_index=True, suffixes=["", "_overwrite"])
|
||||
|
||||
# Remove x vals if overwrite file as remove
|
||||
df_merged.loc[df_merged.dune_crest_x_overwrite == "remove", "dune_crest_x"] = np.nan
|
||||
df_merged.loc[df_merged.dune_toe_x_overwrite == "remove", "dune_toe_x"] = np.nan
|
||||
|
||||
# Put in new x vals. Note that a NaN value in the overwrite column, means keep the original value.
|
||||
idx = (df_merged.dune_crest_x_overwrite.notnull()) & (df_merged.dune_crest_x_overwrite != "remove")
|
||||
df_merged.loc[idx, "dune_crest_x"] = df_merged.loc[idx, "dune_crest_x_overwrite"]
|
||||
|
||||
idx = (df_merged.dune_toe_x_overwrite.notnull()) & (df_merged.dune_toe_x_overwrite != "remove")
|
||||
df_merged.loc[idx, "dune_toe_x"] = df_merged.loc[idx, "dune_toe_x_overwrite"]
|
||||
|
||||
# Recalculate z values from x coordinates
|
||||
for site_id in df_merged.index.get_level_values("site_id").unique():
|
||||
|
||||
logger.info("Overwriting dune crest/toes with manual values: {}".format(site_id))
|
||||
|
||||
# Get profiles
|
||||
df_profile = df_profiles.query('site_id=="{}"'.format(site_id))
|
||||
|
||||
for param in ["prestorm", "poststorm"]:
|
||||
for loc in ["crest", "toe"]:
|
||||
|
||||
# Get x value to find corresponding z value
|
||||
x_val = df_merged.loc[(site_id, param), "dune_{}_x".format(loc)]
|
||||
if np.isnan(x_val):
|
||||
df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = np.nan
|
||||
continue
|
||||
|
||||
# Get the corresponding z value for our x value
|
||||
query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, param, x_val)
|
||||
|
||||
# Try get the value from the other profile if we return nan or empty dataframe
|
||||
if df_profile.query(query).empty:
|
||||
if param == "prestorm":
|
||||
query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "poststorm", x_val)
|
||||
elif param == "poststorm":
|
||||
query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "prestorm", x_val)
|
||||
z_val = df_profile.query(query).iloc[0].z
|
||||
|
||||
else:
|
||||
z_val = df_profile.query(query).iloc[0].z
|
||||
|
||||
# Put results back into merged dataframe
|
||||
df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = z_val
|
||||
|
||||
# Drop columns
|
||||
df_merged = df_merged.drop(columns=["dune_crest_x_overwrite", "dune_toe_x_overwrite", "comment"], errors="ignore")
|
||||
|
||||
# Merge back into interim data frame. Use concat/duplicates since .update will not update nan values
|
||||
df_final = pd.concat([df_merged, df_interim])
|
||||
df_final = df_final[~df_final.index.duplicated(keep="first")]
|
||||
df_final = df_final.sort_index()
|
||||
|
||||
# Write to file
|
||||
return df_final
|
||||
|
||||
|
||||
@click.command(short_help="overwrite profile_features with manual excel sheet")
|
||||
@click.option("--interim_file", required=True, help="path of profile_features.csv")
|
||||
@click.option("--overwrite_file", required=True, help="path of excel file with overwrite data")
|
||||
@click.option("--profile_file", required=True, help="path of profiles.csv")
|
||||
@click.option("--overwrite/--no-overwrite", default=True)
|
||||
def apply_profile_features_overwrite(interim_file, overwrite_file, profile_file, overwrite):
|
||||
logger.info("Overwriting profile features with manual excel file")
|
||||
|
||||
# Load files
|
||||
df_interim = pd.read_csv(interim_file, index_col=[0, 1])
|
||||
df_overwrite = pd.read_excel(overwrite_file)
|
||||
df_profiles = pd.read_csv(profile_file, index_col=[0, 1, 2])
|
||||
if "site_id" in df_overwrite.columns and "profile_type" in df_overwrite.columns:
|
||||
df_overwrite = df_overwrite.set_index(["site_id", "profile_type"])
|
||||
|
||||
# Replace interim values with overwrite values
|
||||
df_interim = overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite)
|
||||
|
||||
# Write to csv
|
||||
df_interim.to_csv(interim_file, float_format="%.3f")
|
||||
|
||||
logger.info("Done!")
|
Loading…
Reference in New Issue