Refactor overwriting dune crest/toes and impacts
Uses one, central .csv file contained in ./data/raw/profile_features_chris_leamandevelop
parent
e1d95a1752
commit
3af90601ef
@ -1,103 +0,0 @@
|
|||||||
"""
|
|
||||||
After generating interim data files based on raw data, we may need to overwrite some rows with manual data.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import click
|
|
||||||
from logs import setup_logging
|
|
||||||
|
|
||||||
logger = setup_logging()
|
|
||||||
|
|
||||||
|
|
||||||
def overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite=True):
|
|
||||||
"""
|
|
||||||
Overwrite the interim profile features file with an excel file.
|
|
||||||
:param interim_file: Should be './data/interim/profile_features.csv'
|
|
||||||
:param overwrite_file: Should be './data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv'
|
|
||||||
:param overwrite: Whether or not to overwrite the original interim_file. If false, file will not be written
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Merge
|
|
||||||
df_merged = df_interim.merge(df_overwrite, left_index=True, right_index=True, suffixes=["", "_overwrite"])
|
|
||||||
|
|
||||||
# Remove x vals if overwrite file as remove
|
|
||||||
df_merged.loc[df_merged.dune_crest_x_overwrite == "remove", "dune_crest_x"] = np.nan
|
|
||||||
df_merged.loc[df_merged.dune_toe_x_overwrite == "remove", "dune_toe_x"] = np.nan
|
|
||||||
|
|
||||||
# Put in new x vals. Note that a NaN value in the overwrite column, means keep the original value.
|
|
||||||
idx = (df_merged.dune_crest_x_overwrite.notnull()) & (df_merged.dune_crest_x_overwrite != "remove")
|
|
||||||
df_merged.loc[idx, "dune_crest_x"] = df_merged.loc[idx, "dune_crest_x_overwrite"]
|
|
||||||
|
|
||||||
idx = (df_merged.dune_toe_x_overwrite.notnull()) & (df_merged.dune_toe_x_overwrite != "remove")
|
|
||||||
df_merged.loc[idx, "dune_toe_x"] = df_merged.loc[idx, "dune_toe_x_overwrite"]
|
|
||||||
|
|
||||||
# Recalculate z values from x coordinates
|
|
||||||
for site_id in df_merged.index.get_level_values("site_id").unique():
|
|
||||||
|
|
||||||
logger.info("Overwriting dune crest/toes with manual values: {}".format(site_id))
|
|
||||||
|
|
||||||
# Get profiles
|
|
||||||
df_profile = df_profiles.query('site_id=="{}"'.format(site_id))
|
|
||||||
|
|
||||||
for param in ["prestorm", "poststorm"]:
|
|
||||||
for loc in ["crest", "toe"]:
|
|
||||||
|
|
||||||
# Get x value to find corresponding z value
|
|
||||||
x_val = df_merged.loc[(site_id, param), "dune_{}_x".format(loc)]
|
|
||||||
if np.isnan(x_val):
|
|
||||||
df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = np.nan
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Get the corresponding z value for our x value
|
|
||||||
query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, param, x_val)
|
|
||||||
|
|
||||||
# Try get the value from the other profile if we return nan or empty dataframe
|
|
||||||
if df_profile.query(query).empty:
|
|
||||||
if param == "prestorm":
|
|
||||||
query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "poststorm", x_val)
|
|
||||||
elif param == "poststorm":
|
|
||||||
query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "prestorm", x_val)
|
|
||||||
z_val = df_profile.query(query).iloc[0].z
|
|
||||||
|
|
||||||
else:
|
|
||||||
z_val = df_profile.query(query).iloc[0].z
|
|
||||||
|
|
||||||
# Put results back into merged dataframe
|
|
||||||
df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = z_val
|
|
||||||
|
|
||||||
# Drop columns
|
|
||||||
df_merged = df_merged.drop(columns=["dune_crest_x_overwrite", "dune_toe_x_overwrite", "comment"], errors="ignore")
|
|
||||||
|
|
||||||
# Merge back into interim data frame. Use concat/duplicates since .update will not update nan values
|
|
||||||
df_final = pd.concat([df_merged, df_interim])
|
|
||||||
df_final = df_final[~df_final.index.duplicated(keep="first")]
|
|
||||||
df_final = df_final.sort_index()
|
|
||||||
|
|
||||||
# Write to file
|
|
||||||
return df_final
|
|
||||||
|
|
||||||
|
|
||||||
@click.command(short_help="overwrite profile_features with manual excel sheet")
|
|
||||||
@click.option("--interim_file", required=True, help="path of profile_features.csv")
|
|
||||||
@click.option("--overwrite_file", required=True, help="path of excel file with overwrite data")
|
|
||||||
@click.option("--profile_file", required=True, help="path of profiles.csv")
|
|
||||||
@click.option("--overwrite/--no-overwrite", default=True)
|
|
||||||
def apply_profile_features_overwrite(interim_file, overwrite_file, profile_file, overwrite):
|
|
||||||
logger.info("Overwriting profile features with manual excel file")
|
|
||||||
|
|
||||||
# Load files
|
|
||||||
df_interim = pd.read_csv(interim_file, index_col=[0, 1])
|
|
||||||
df_overwrite = pd.read_excel(overwrite_file)
|
|
||||||
df_profiles = pd.read_csv(profile_file, index_col=[0, 1, 2])
|
|
||||||
if "site_id" in df_overwrite.columns and "profile_type" in df_overwrite.columns:
|
|
||||||
df_overwrite = df_overwrite.set_index(["site_id", "profile_type"])
|
|
||||||
|
|
||||||
# Replace interim values with overwrite values
|
|
||||||
df_interim = overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite)
|
|
||||||
|
|
||||||
# Write to csv
|
|
||||||
df_interim.to_csv(interim_file, float_format="%.3f")
|
|
||||||
|
|
||||||
logger.info("Done!")
|
|
Loading…
Reference in New Issue