Add override of profile features

develop
Chris Leaman 6 years ago
parent 12070a1acf
commit 4b9e69f2f0

@ -102,12 +102,17 @@ impacts: ./data/interim/impacts_forecasted_foreshore_slope_sto06.csv ./data/inte
# --output-csv "./data/interim/profile_features.csv"
# Create a .csv of our dune toe and crest profile features from Tom Beuzen's .mat file
# Also apply an overwrite of some values, using an excel sheet
./data/interim/profile_features.csv: ./data/raw/profile_features_tom_beuzen/*.mat ./data/interim/sites.csv
activate ./.venv && python ./src/cli.py create-profile-features \
--crest-mat "./data/raw/profile_features_tom_beuzen/J16_DuneCrest.mat" \
--toe-mat "./data/raw/profile_features_tom_beuzen/J16_DuneToe.mat" \
--sites-csv "./data/interim/sites.csv" \
--output-file "./data/interim/profile_features.csv"
--output-file "./data/interim/profile_features.csv" \
&& python ./src/cli.py apply-profile-features-overwrite \
--interim_file "./data/interim/profile_features.csv" \
--overwrite_file "./data/raw/profile_features_chris_leaman/profile_features_chris_leaman.xlsx" \
--profile_file "./data/interim/profiles.csv"
# Creates a forecast of twl using sto06 and prestorm time varying prestorm foreshore slope
./data/interim/twl_foreshore_slope_sto06.csv: ./data/interim/waves.csv ./data/interim/tides.csv ./data/interim/profiles.csv ./data/interim/sites.csv ./data/interim/profile_features.csv

@ -10,6 +10,7 @@ import data.csv_to_shp as csv_to_shp
import analysis.forecast_twl as forecast_twl
import analysis.forecasted_storm_impacts as forecasted_storm_impacts
import analysis.observed_storm_impacts as observed_storm_impacts
import data.apply_manual_overwrites as apply_manual_overwrites
# Disable numpy warnings
import warnings
@ -32,4 +33,5 @@ if __name__ == "__main__":
cli.add_command(forecast_twl.create_twl_forecast)
cli.add_command(forecasted_storm_impacts.create_forecasted_impacts)
cli.add_command(observed_storm_impacts.create_observed_impacts)
cli.add_command(apply_manual_overwrites.apply_profile_features_overwrite)
cli()

@ -0,0 +1,103 @@
"""
After generating interim data files based on raw data, we may need to overwrite some rows with manual data.
"""
import pandas as pd
import numpy as np
import click
from utils import setup_logging
logger = setup_logging()
def overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite=True):
"""
Overwrite the interim profile features file with an excel file.
:param interim_file: Should be './data/interim/profile_features.csv'
:param overwrite_file: Should be './data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv'
:param overwrite: Whether or not to overwrite the original interim_file. If false, file will not be written
:return:
"""
# Merge
df_merged = df_interim.merge(df_overwrite, left_index=True, right_index=True, suffixes=["", "_overwrite"])
# Remove x vals if overwrite file as remove
df_merged.loc[df_merged.dune_crest_x_overwrite == "remove", "dune_crest_x"] = np.nan
df_merged.loc[df_merged.dune_toe_x_overwrite == "remove", "dune_toe_x"] = np.nan
# Put in new x vals. Note that a NaN value in the overwrite column, means keep the original value.
idx = (df_merged.dune_crest_x_overwrite.notnull()) & (df_merged.dune_crest_x_overwrite != "remove")
df_merged.loc[idx, "dune_crest_x"] = df_merged.loc[idx, "dune_crest_x_overwrite"]
idx = (df_merged.dune_toe_x_overwrite.notnull()) & (df_merged.dune_toe_x_overwrite != "remove")
df_merged.loc[idx, "dune_toe_x"] = df_merged.loc[idx, "dune_toe_x_overwrite"]
# Recalculate z values from x coordinates
for site_id in df_merged.index.get_level_values("site_id").unique():
logger.info("Overwriting dune crest/toes with manual values: {}".format(site_id))
# Get profiles
df_profile = df_profiles.query('site_id=="{}"'.format(site_id))
for param in ["prestorm", "poststorm"]:
for loc in ["crest", "toe"]:
# Get x value to find corresponding z value
x_val = df_merged.loc[(site_id, param), "dune_{}_x".format(loc)]
if np.isnan(x_val):
df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = np.nan
continue
# Get the corresponding z value for our x value
query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, param, x_val)
# Try get the value from the other profile if we return nan or empty dataframe
if df_profile.query(query).empty:
if param == "prestorm":
query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "poststorm", x_val)
elif param == "poststorm":
query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "prestorm", x_val)
z_val = df_profile.query(query).iloc[0].z
else:
z_val = df_profile.query(query).iloc[0].z
# Put results back into merged dataframe
df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = z_val
# Drop columns
df_merged = df_merged.drop(columns=["dune_crest_x_overwrite", "dune_toe_x_overwrite", "comment"], errors="ignore")
# Merge back into interim data frame. Use concat/duplicates since .update will not update nan values
df_final = pd.concat([df_merged, df_interim])
df_final = df_final[~df_final.index.duplicated(keep="first")]
df_final = df_final.sort_index()
# Write to file
return df_final
@click.command(short_help="overwrite profile_features with manual excel sheet")
@click.option("--interim_file", required=True, help="path of profile_features.csv")
@click.option("--overwrite_file", required=True, help="path of excel file with overwrite data")
@click.option("--profile_file", required=True, help="path of profiles.csv")
@click.option("--overwrite/--no-overwrite", default=True)
def apply_profile_features_overwrite(interim_file, overwrite_file, profile_file, overwrite):
logger.info("Overwriting profile features with manual excel file")
# Load files
df_interim = pd.read_csv(interim_file, index_col=[0, 1])
df_overwrite = pd.read_excel(overwrite_file)
df_profiles = pd.read_csv(profile_file, index_col=[0, 1, 2])
if "site_id" in df_overwrite.columns and "profile_type" in df_overwrite.columns:
df_overwrite = df_overwrite.set_index(["site_id", "profile_type"])
# Replace interim values with overwrite values
df_interim = overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite)
# Write to csv
df_interim.to_csv(interim_file, float_format="%.3f")
logger.info("Done!")
Loading…
Cancel
Save