Refactor overwriting dune crest/toes and impacts

Uses one, central .csv file contained in ./data/raw/profile_features_chris_leaman
develop
Chris Leaman 6 years ago
parent e1d95a1752
commit 3af90601ef

@ -148,16 +148,33 @@ def storm_regime(df_observed_impacts):
return df_observed_impacts
def overwrite_impacts(df_observed_impacts, df_raw_features):
"""
Overwrites calculated impacts with impacts manually specified in profile_features file
:param df_raw_profile_features:
:return:
"""
df_observed_impacts.update(df_raw_features.rename(columns={
'observed_storm_regime':'storm_regime'}))
return df_observed_impacts
@click.command()
@click.option("--profiles-csv", required=True, help="")
@click.option("--profile-features-csv", required=True, help="")
@click.option("--profile-features-crest-toes-csv", required=True, help="")
@click.option("--raw-profile-features-csv", required=True,help="")
@click.option("--output-file", required=True, help="")
def create_observed_impacts(profiles_csv, profile_features_csv, output_file):
def create_observed_impacts(profiles_csv, profile_features_crest_toes_csv, raw_profile_features_csv,output_file):
profiles_csv = './data/interim/profiles.csv'
profile_features_crest_toes_csv= './data/interim/profile_features_crest_toes.csv'
raw_profile_features_csv = './data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv'
logger.info("Creating observed wave impacts")
logger.info("Importing data")
df_profiles = pd.read_csv(profiles_csv, index_col=[0, 1, 2])
df_profile_features = pd.read_csv(profile_features_csv, index_col=[0, 1])
df_profile_features = pd.read_csv(profile_features_crest_toes_csv, index_col=[0, 1])
logger.info("Creating new dataframe for observed impacts")
df_observed_impacts = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique())
@ -170,6 +187,10 @@ def create_observed_impacts(profiles_csv, profile_features_csv, output_file):
# Classify regime based on volume changes
df_observed_impacts = storm_regime(df_observed_impacts)
# Overwrite storm impacts with manually picked impacts
df_raw_features = pd.read_csv(raw_profile_features_csv, index_col=[0])
df_observed_impacts = overwrite_impacts(df_observed_impacts, df_raw_features)
# Save dataframe to csv
df_observed_impacts.to_csv(output_file, float_format="%.4f")

@ -10,7 +10,6 @@ import click
import analysis.forecast_twl as forecast_twl
import analysis.forecasted_storm_impacts as forecasted_storm_impacts
import analysis.observed_storm_impacts as observed_storm_impacts
import data.apply_manual_overwrites as apply_manual_overwrites
import data.csv_to_geojson as csv_to_geojson
import data.parse_mat as parse_mat
@ -23,15 +22,14 @@ def cli():
if __name__ == "__main__":
cli.add_command(apply_manual_overwrites.apply_profile_features_overwrite)
cli.add_command(csv_to_geojson.impacts_to_geojson)
cli.add_command(csv_to_geojson.profile_features_to_geojson)
cli.add_command(csv_to_geojson.profile_features_crest_toes_to_geojson)
cli.add_command(csv_to_geojson.R_high_to_geojson)
cli.add_command(csv_to_geojson.sites_csv_to_geojson)
cli.add_command(forecast_twl.create_twl_forecast)
cli.add_command(forecasted_storm_impacts.create_forecasted_impacts)
cli.add_command(observed_storm_impacts.create_observed_impacts)
cli.add_command(parse_mat.create_profile_features)
cli.add_command(parse_mat.create_crest_toes)
cli.add_command(parse_mat.create_sites_and_profiles_csv)
cli.add_command(parse_mat.create_tides_csv)
cli.add_command(parse_mat.create_waves_csv)

@ -1,103 +0,0 @@
"""
After generating interim data files based on raw data, we may need to overwrite some rows with manual data.
"""
import pandas as pd
import numpy as np
import click
from logs import setup_logging
logger = setup_logging()
def overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite=True):
"""
Overwrite the interim profile features file with an excel file.
:param interim_file: Should be './data/interim/profile_features.csv'
:param overwrite_file: Should be './data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv'
:param overwrite: Whether or not to overwrite the original interim_file. If false, file will not be written
:return:
"""
# Merge
df_merged = df_interim.merge(df_overwrite, left_index=True, right_index=True, suffixes=["", "_overwrite"])
# Remove x vals if overwrite file as remove
df_merged.loc[df_merged.dune_crest_x_overwrite == "remove", "dune_crest_x"] = np.nan
df_merged.loc[df_merged.dune_toe_x_overwrite == "remove", "dune_toe_x"] = np.nan
# Put in new x vals. Note that a NaN value in the overwrite column, means keep the original value.
idx = (df_merged.dune_crest_x_overwrite.notnull()) & (df_merged.dune_crest_x_overwrite != "remove")
df_merged.loc[idx, "dune_crest_x"] = df_merged.loc[idx, "dune_crest_x_overwrite"]
idx = (df_merged.dune_toe_x_overwrite.notnull()) & (df_merged.dune_toe_x_overwrite != "remove")
df_merged.loc[idx, "dune_toe_x"] = df_merged.loc[idx, "dune_toe_x_overwrite"]
# Recalculate z values from x coordinates
for site_id in df_merged.index.get_level_values("site_id").unique():
logger.info("Overwriting dune crest/toes with manual values: {}".format(site_id))
# Get profiles
df_profile = df_profiles.query('site_id=="{}"'.format(site_id))
for param in ["prestorm", "poststorm"]:
for loc in ["crest", "toe"]:
# Get x value to find corresponding z value
x_val = df_merged.loc[(site_id, param), "dune_{}_x".format(loc)]
if np.isnan(x_val):
df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = np.nan
continue
# Get the corresponding z value for our x value
query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, param, x_val)
# Try get the value from the other profile if we return nan or empty dataframe
if df_profile.query(query).empty:
if param == "prestorm":
query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "poststorm", x_val)
elif param == "poststorm":
query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "prestorm", x_val)
z_val = df_profile.query(query).iloc[0].z
else:
z_val = df_profile.query(query).iloc[0].z
# Put results back into merged dataframe
df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = z_val
# Drop columns
df_merged = df_merged.drop(columns=["dune_crest_x_overwrite", "dune_toe_x_overwrite", "comment"], errors="ignore")
# Merge back into interim data frame. Use concat/duplicates since .update will not update nan values
df_final = pd.concat([df_merged, df_interim])
df_final = df_final[~df_final.index.duplicated(keep="first")]
df_final = df_final.sort_index()
# Write to file
return df_final
@click.command(short_help="overwrite profile_features with manual excel sheet")
@click.option("--interim_file", required=True, help="path of profile_features.csv")
@click.option("--overwrite_file", required=True, help="path of excel file with overwrite data")
@click.option("--profile_file", required=True, help="path of profiles.csv")
@click.option("--overwrite/--no-overwrite", default=True)
def apply_profile_features_overwrite(interim_file, overwrite_file, profile_file, overwrite):
logger.info("Overwriting profile features with manual excel file")
# Load files
df_interim = pd.read_csv(interim_file, index_col=[0, 1])
df_overwrite = pd.read_excel(overwrite_file)
df_profiles = pd.read_csv(profile_file, index_col=[0, 1, 2])
if "site_id" in df_overwrite.columns and "profile_type" in df_overwrite.columns:
df_overwrite = df_overwrite.set_index(["site_id", "profile_type"])
# Replace interim values with overwrite values
df_interim = overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite)
# Write to csv
df_interim.to_csv(interim_file, float_format="%.3f")
logger.info("Done!")

@ -17,33 +17,68 @@ from logs import setup_logging
logger = setup_logging()
def parse_orientations(orientations_mat):
def parse_crest_toes(df_raw_features, df_profiles):
"""
Parses the raw orientations.mat file and returns a pandas dataframe. Note that orientations are the direction
towards land measured in degrees anti-clockwise from east.
:param orientations_mat:
Parses profile_features_chris_leaman.csv
:param profile_features_csv:
:return:
"""
logger.info("Parsing %s", orientations_mat)
mat_data = loadmat(orientations_mat)["output"]
rows = []
for i in range(0, len(mat_data["beach"])):
rows.append(
{
"beach": mat_data["beach"][i],
"orientation": mat_data["orientation"][i],
"lat_center": mat_data["lat_center"][i],
"lon_center": mat_data["lon_center"][i],
"lat_land": mat_data["lat_land"][i],
"lon_land": mat_data["lon_land"][i],
"lat_sea": mat_data["lat_sea"][i],
"lon_sea": mat_data["lon_sea"][i],
}
)
df = pd.DataFrame(rows)
return df
# Puts profiles_features_csv into format expected by rest of analysis
df_crest_toes = df_raw_features.reset_index().melt(id_vars=['site_id'],
value_vars=['prestorm_dune_crest_x', 'prestorm_dune_toe_x',
'poststorm_dune_crest_x', 'poststorm_dune_toe_x'])
df_crest_toes['profile_type'] = df_crest_toes.variable.str.extract(r'(prestorm|poststorm)')
df_crest_toes['point_type'] = df_crest_toes.variable.str.extract(r'(dune_crest_x|dune_toe_x)')
df_crest_toes = df_crest_toes.drop(columns=['variable'])
df_crest_toes = df_crest_toes.sort_values('site_id')
df_crest_toes = df_crest_toes.set_index(['site_id', 'profile_type', 'point_type'])
df_crest_toes = df_crest_toes.unstack()
df_crest_toes.columns = df_crest_toes.columns.droplevel()
# Now let's calculate the corresponding z elevations for each of our x coordinates
for site_id in df_crest_toes.index.get_level_values("site_id").unique():
logger.info('Calculating dune toe/crest z elevations for {}'.format(site_id))
# Get profile for this site
idx = pd.IndexSlice
df_profile = df_profiles.loc[idx[site_id, :,:], :]
for param in ["prestorm", "poststorm"]:
for loc in ["crest", "toe"]:
# Get x value to find corresponding z value
x_val = df_crest_toes.loc[(site_id, param), "dune_{}_x".format(loc)]
if np.isnan(x_val):
df_crest_toes.loc[(site_id, param), "dune_{}_z".format(loc)] = np.nan
continue
# Try get the value from the other profile if we return nan or empty dataframe
df_z = df_profile.loc[idx[site_id, param, x_val],:]
if df_z.empty:
if param == "prestorm":
new_param = 'poststorm'
elif param == "poststorm":
new_param = 'prestorm'
z_val = df_profile.loc[idx[site_id, new_param, x_val],:].z
else:
z_val = df_z.z
# # Try get the value from the other profile if we return nan or empty dataframe
# if df_profile.query(query).empty:
# if param == "prestorm":
# query = query.replace('prestorm', 'poststorm')
# elif param == "poststorm":
# query = query.replace('poststorm', 'prestorm')
# z_val = df_profile.query(query).iloc[0].z
# else:
# z_val = df_profile.query(query).iloc[0].z
# Put results back into merged dataframe
df_crest_toes.loc[(site_id, param), "dune_{}_z".format(loc)] = z_val
return df_crest_toes
def parse_dune_crest_toes(df_sites, crest_mat, toe_mat):
"""
@ -93,39 +128,6 @@ def parse_dune_crest_toes(df_sites, crest_mat, toe_mat):
return df_profile_features
def combine_sites_and_orientaions(df_sites, df_orientations):
"""
Replaces beach/lat/lon columns with the unique site_id.
:param dfs:
:param df_sites:
:return:
"""
df_merged_sites = df_sites.merge(
df_orientations[["beach", "lat_center", "lon_center", "orientation"]],
left_on=["beach", "lat", "lon"],
right_on=["beach", "lat_center", "lon_center"],
)
# Check that all our records have a unique site identifier
n_unmatched = len(df_sites) - len(df_merged_sites)
if n_unmatched > 0:
logger.warning("Not all records (%d of %d) matched with an orientation", n_unmatched, len(df_sites))
# Drop extra columns
df_merged_sites = df_merged_sites.drop(columns=["lat_center", "lon_center"])
return df_merged_sites
def specify_lat_lon_profile_center(df_sites, x_val=200):
"""
Specify which x-coordinate in the beach profile cross section the lat/lon corresponds to
:param df_sites:
:return:
"""
df_sites["profile_x_lat_lon"] = x_val
return df_sites
def parse_waves(waves_mat):
"""
@ -403,19 +405,35 @@ def create_waves_csv(waves_mat, sites_csv, output_file):
logger.info("Created %s", output_file)
# @click.command(short_help="create profile_features.csv")
# @click.option("--crest-mat", required=True, help=".mat file containing wave records")
# @click.option("--toe-mat", required=True, help=".mat file containing wave records")
# @click.option("--sites-csv", required=True, help=".csv file description of cross section sites")
# @click.option("--output-file", required=True, help="where to save waves.csv")
# def create_profile_features(crest_mat, toe_mat, sites_csv, output_file):
# logger.info("Creating %s", output_file)
# df_sites = pd.read_csv(sites_csv, index_col=[0])
# df_profile_features = parse_dune_crest_toes(df_sites, crest_mat, toe_mat)
# df_profile_features.to_csv(output_file)
# logger.info("Created %s", output_file)
@click.command(short_help="create profile_features.csv")
@click.option("--crest-mat", required=True, help=".mat file containing wave records")
@click.option("--toe-mat", required=True, help=".mat file containing wave records")
@click.option("--sites-csv", required=True, help=".csv file description of cross section sites")
@click.option("--profile-features-csv", required=True, help=".mat file containing wave records")
@click.option("--profiles-csv", required=True, help=".mat file containing wave records")
@click.option("--output-file", required=True, help="where to save waves.csv")
def create_profile_features(crest_mat, toe_mat, sites_csv, output_file):
def create_crest_toes(profile_features_csv, profiles_csv, output_file):
logger.info("Creating %s", output_file)
df_sites = pd.read_csv(sites_csv, index_col=[0])
df_profile_features = parse_dune_crest_toes(df_sites, crest_mat, toe_mat)
df_profile_features.to_csv(output_file)
df_raw_features = pd.read_csv(profile_features_csv, index_col=[0])
df_profiles = pd.read_csv(profiles_csv, index_col=[0,1,2])
df_crest_toes = parse_crest_toes(df_raw_features, df_profiles)
df_crest_toes.to_csv(output_file,float_format="%.3f")
logger.info("Created %s", output_file)
@click.command(short_help="create profiles.csv")
@click.option("--profiles-mat", required=True, help=".mat file containing beach profiles")
@click.option("--profiles-output-file", required=True, help="where to save profiles.csv")
@ -432,7 +450,7 @@ def create_sites_and_profiles_csv(profiles_mat, profiles_output_file, sites_outp
df_profiles.to_csv(profiles_output_file)
logger.info("Created %s", profiles_output_file)
df_sites.to_csv(sites_output_file)
df_sites.to_csv(sites_output_file,float_format="%.3f")
logger.info("Created %s", sites_output_file)

Loading…
Cancel
Save