From 3af90601efa81f059d44afe58fcfa2f8f3541f2e Mon Sep 17 00:00:00 2001 From: Chris Leaman Date: Wed, 19 Dec 2018 16:14:27 +1100 Subject: [PATCH] Refactor overwriting dune crest/toes and impacts Uses one, central .csv file contained in ./data/raw/profile_features_chris_leaman --- src/analysis/observed_storm_impacts.py | 27 ++++- src/cli.py | 6 +- src/data/apply_manual_overwrites.py | 103 ------------------ src/data/parse_mat.py | 144 ++++++++++++++----------- 4 files changed, 107 insertions(+), 173 deletions(-) delete mode 100644 src/data/apply_manual_overwrites.py diff --git a/src/analysis/observed_storm_impacts.py b/src/analysis/observed_storm_impacts.py index 1046cd3..fd9f7b4 100644 --- a/src/analysis/observed_storm_impacts.py +++ b/src/analysis/observed_storm_impacts.py @@ -148,16 +148,33 @@ def storm_regime(df_observed_impacts): return df_observed_impacts +def overwrite_impacts(df_observed_impacts, df_raw_features): + """ + Overwrites calculated impacts with impacts manually specified in profile_features file + :param df_raw_profile_features: + :return: + """ + df_observed_impacts.update(df_raw_features.rename(columns={ + 'observed_storm_regime':'storm_regime'})) + return df_observed_impacts + + + @click.command() @click.option("--profiles-csv", required=True, help="") -@click.option("--profile-features-csv", required=True, help="") +@click.option("--profile-features-crest-toes-csv", required=True, help="") +@click.option("--raw-profile-features-csv", required=True,help="") @click.option("--output-file", required=True, help="") -def create_observed_impacts(profiles_csv, profile_features_csv, output_file): +def create_observed_impacts(profiles_csv, profile_features_crest_toes_csv, raw_profile_features_csv,output_file): + + profiles_csv = './data/interim/profiles.csv' + profile_features_crest_toes_csv= './data/interim/profile_features_crest_toes.csv' + raw_profile_features_csv = './data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv' logger.info("Creating observed wave impacts") logger.info("Importing data") df_profiles = pd.read_csv(profiles_csv, index_col=[0, 1, 2]) - df_profile_features = pd.read_csv(profile_features_csv, index_col=[0, 1]) + df_profile_features = pd.read_csv(profile_features_crest_toes_csv, index_col=[0, 1]) logger.info("Creating new dataframe for observed impacts") df_observed_impacts = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique()) @@ -170,6 +187,10 @@ def create_observed_impacts(profiles_csv, profile_features_csv, output_file): # Classify regime based on volume changes df_observed_impacts = storm_regime(df_observed_impacts) + # Overwrite storm impacts with manually picked impacts + df_raw_features = pd.read_csv(raw_profile_features_csv, index_col=[0]) + df_observed_impacts = overwrite_impacts(df_observed_impacts, df_raw_features) + # Save dataframe to csv df_observed_impacts.to_csv(output_file, float_format="%.4f") diff --git a/src/cli.py b/src/cli.py index f617459..c290548 100644 --- a/src/cli.py +++ b/src/cli.py @@ -10,7 +10,6 @@ import click import analysis.forecast_twl as forecast_twl import analysis.forecasted_storm_impacts as forecasted_storm_impacts import analysis.observed_storm_impacts as observed_storm_impacts -import data.apply_manual_overwrites as apply_manual_overwrites import data.csv_to_geojson as csv_to_geojson import data.parse_mat as parse_mat @@ -23,15 +22,14 @@ def cli(): if __name__ == "__main__": - cli.add_command(apply_manual_overwrites.apply_profile_features_overwrite) cli.add_command(csv_to_geojson.impacts_to_geojson) - cli.add_command(csv_to_geojson.profile_features_to_geojson) + cli.add_command(csv_to_geojson.profile_features_crest_toes_to_geojson) cli.add_command(csv_to_geojson.R_high_to_geojson) cli.add_command(csv_to_geojson.sites_csv_to_geojson) cli.add_command(forecast_twl.create_twl_forecast) cli.add_command(forecasted_storm_impacts.create_forecasted_impacts) cli.add_command(observed_storm_impacts.create_observed_impacts) - cli.add_command(parse_mat.create_profile_features) + cli.add_command(parse_mat.create_crest_toes) cli.add_command(parse_mat.create_sites_and_profiles_csv) cli.add_command(parse_mat.create_tides_csv) cli.add_command(parse_mat.create_waves_csv) diff --git a/src/data/apply_manual_overwrites.py b/src/data/apply_manual_overwrites.py deleted file mode 100644 index ddcb0c0..0000000 --- a/src/data/apply_manual_overwrites.py +++ /dev/null @@ -1,103 +0,0 @@ -""" -After generating interim data files based on raw data, we may need to overwrite some rows with manual data. -""" - -import pandas as pd -import numpy as np -import click -from logs import setup_logging - -logger = setup_logging() - - -def overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite=True): - """ - Overwrite the interim profile features file with an excel file. - :param interim_file: Should be './data/interim/profile_features.csv' - :param overwrite_file: Should be './data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv' - :param overwrite: Whether or not to overwrite the original interim_file. If false, file will not be written - :return: - """ - - # Merge - df_merged = df_interim.merge(df_overwrite, left_index=True, right_index=True, suffixes=["", "_overwrite"]) - - # Remove x vals if overwrite file as remove - df_merged.loc[df_merged.dune_crest_x_overwrite == "remove", "dune_crest_x"] = np.nan - df_merged.loc[df_merged.dune_toe_x_overwrite == "remove", "dune_toe_x"] = np.nan - - # Put in new x vals. Note that a NaN value in the overwrite column, means keep the original value. - idx = (df_merged.dune_crest_x_overwrite.notnull()) & (df_merged.dune_crest_x_overwrite != "remove") - df_merged.loc[idx, "dune_crest_x"] = df_merged.loc[idx, "dune_crest_x_overwrite"] - - idx = (df_merged.dune_toe_x_overwrite.notnull()) & (df_merged.dune_toe_x_overwrite != "remove") - df_merged.loc[idx, "dune_toe_x"] = df_merged.loc[idx, "dune_toe_x_overwrite"] - - # Recalculate z values from x coordinates - for site_id in df_merged.index.get_level_values("site_id").unique(): - - logger.info("Overwriting dune crest/toes with manual values: {}".format(site_id)) - - # Get profiles - df_profile = df_profiles.query('site_id=="{}"'.format(site_id)) - - for param in ["prestorm", "poststorm"]: - for loc in ["crest", "toe"]: - - # Get x value to find corresponding z value - x_val = df_merged.loc[(site_id, param), "dune_{}_x".format(loc)] - if np.isnan(x_val): - df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = np.nan - continue - - # Get the corresponding z value for our x value - query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, param, x_val) - - # Try get the value from the other profile if we return nan or empty dataframe - if df_profile.query(query).empty: - if param == "prestorm": - query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "poststorm", x_val) - elif param == "poststorm": - query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "prestorm", x_val) - z_val = df_profile.query(query).iloc[0].z - - else: - z_val = df_profile.query(query).iloc[0].z - - # Put results back into merged dataframe - df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = z_val - - # Drop columns - df_merged = df_merged.drop(columns=["dune_crest_x_overwrite", "dune_toe_x_overwrite", "comment"], errors="ignore") - - # Merge back into interim data frame. Use concat/duplicates since .update will not update nan values - df_final = pd.concat([df_merged, df_interim]) - df_final = df_final[~df_final.index.duplicated(keep="first")] - df_final = df_final.sort_index() - - # Write to file - return df_final - - -@click.command(short_help="overwrite profile_features with manual excel sheet") -@click.option("--interim_file", required=True, help="path of profile_features.csv") -@click.option("--overwrite_file", required=True, help="path of excel file with overwrite data") -@click.option("--profile_file", required=True, help="path of profiles.csv") -@click.option("--overwrite/--no-overwrite", default=True) -def apply_profile_features_overwrite(interim_file, overwrite_file, profile_file, overwrite): - logger.info("Overwriting profile features with manual excel file") - - # Load files - df_interim = pd.read_csv(interim_file, index_col=[0, 1]) - df_overwrite = pd.read_excel(overwrite_file) - df_profiles = pd.read_csv(profile_file, index_col=[0, 1, 2]) - if "site_id" in df_overwrite.columns and "profile_type" in df_overwrite.columns: - df_overwrite = df_overwrite.set_index(["site_id", "profile_type"]) - - # Replace interim values with overwrite values - df_interim = overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite) - - # Write to csv - df_interim.to_csv(interim_file, float_format="%.3f") - - logger.info("Done!") diff --git a/src/data/parse_mat.py b/src/data/parse_mat.py index 56db1a1..e56d669 100644 --- a/src/data/parse_mat.py +++ b/src/data/parse_mat.py @@ -17,33 +17,68 @@ from logs import setup_logging logger = setup_logging() -def parse_orientations(orientations_mat): +def parse_crest_toes(df_raw_features, df_profiles): """ - Parses the raw orientations.mat file and returns a pandas dataframe. Note that orientations are the direction - towards land measured in degrees anti-clockwise from east. - :param orientations_mat: + Parses profile_features_chris_leaman.csv + :param profile_features_csv: :return: """ - logger.info("Parsing %s", orientations_mat) - mat_data = loadmat(orientations_mat)["output"] - rows = [] - for i in range(0, len(mat_data["beach"])): - rows.append( - { - "beach": mat_data["beach"][i], - "orientation": mat_data["orientation"][i], - "lat_center": mat_data["lat_center"][i], - "lon_center": mat_data["lon_center"][i], - "lat_land": mat_data["lat_land"][i], - "lon_land": mat_data["lon_land"][i], - "lat_sea": mat_data["lat_sea"][i], - "lon_sea": mat_data["lon_sea"][i], - } - ) - df = pd.DataFrame(rows) - return df + # Puts profiles_features_csv into format expected by rest of analysis + df_crest_toes = df_raw_features.reset_index().melt(id_vars=['site_id'], + value_vars=['prestorm_dune_crest_x', 'prestorm_dune_toe_x', + 'poststorm_dune_crest_x', 'poststorm_dune_toe_x']) + df_crest_toes['profile_type'] = df_crest_toes.variable.str.extract(r'(prestorm|poststorm)') + df_crest_toes['point_type'] = df_crest_toes.variable.str.extract(r'(dune_crest_x|dune_toe_x)') + df_crest_toes = df_crest_toes.drop(columns=['variable']) + df_crest_toes = df_crest_toes.sort_values('site_id') + df_crest_toes = df_crest_toes.set_index(['site_id', 'profile_type', 'point_type']) + df_crest_toes = df_crest_toes.unstack() + df_crest_toes.columns = df_crest_toes.columns.droplevel() + + # Now let's calculate the corresponding z elevations for each of our x coordinates + for site_id in df_crest_toes.index.get_level_values("site_id").unique(): + logger.info('Calculating dune toe/crest z elevations for {}'.format(site_id)) + + # Get profile for this site + idx = pd.IndexSlice + df_profile = df_profiles.loc[idx[site_id, :,:], :] + + for param in ["prestorm", "poststorm"]: + for loc in ["crest", "toe"]: + + # Get x value to find corresponding z value + x_val = df_crest_toes.loc[(site_id, param), "dune_{}_x".format(loc)] + + if np.isnan(x_val): + df_crest_toes.loc[(site_id, param), "dune_{}_z".format(loc)] = np.nan + continue + + # Try get the value from the other profile if we return nan or empty dataframe + df_z = df_profile.loc[idx[site_id, param, x_val],:] + if df_z.empty: + if param == "prestorm": + new_param = 'poststorm' + elif param == "poststorm": + new_param = 'prestorm' + z_val = df_profile.loc[idx[site_id, new_param, x_val],:].z + else: + z_val = df_z.z + # # Try get the value from the other profile if we return nan or empty dataframe + # if df_profile.query(query).empty: + # if param == "prestorm": + # query = query.replace('prestorm', 'poststorm') + # elif param == "poststorm": + # query = query.replace('poststorm', 'prestorm') + # z_val = df_profile.query(query).iloc[0].z + # else: + # z_val = df_profile.query(query).iloc[0].z + + # Put results back into merged dataframe + df_crest_toes.loc[(site_id, param), "dune_{}_z".format(loc)] = z_val + + return df_crest_toes def parse_dune_crest_toes(df_sites, crest_mat, toe_mat): """ @@ -93,39 +128,6 @@ def parse_dune_crest_toes(df_sites, crest_mat, toe_mat): return df_profile_features -def combine_sites_and_orientaions(df_sites, df_orientations): - """ - Replaces beach/lat/lon columns with the unique site_id. - :param dfs: - :param df_sites: - :return: - """ - df_merged_sites = df_sites.merge( - df_orientations[["beach", "lat_center", "lon_center", "orientation"]], - left_on=["beach", "lat", "lon"], - right_on=["beach", "lat_center", "lon_center"], - ) - - # Check that all our records have a unique site identifier - n_unmatched = len(df_sites) - len(df_merged_sites) - if n_unmatched > 0: - logger.warning("Not all records (%d of %d) matched with an orientation", n_unmatched, len(df_sites)) - - # Drop extra columns - df_merged_sites = df_merged_sites.drop(columns=["lat_center", "lon_center"]) - - return df_merged_sites - - -def specify_lat_lon_profile_center(df_sites, x_val=200): - """ - Specify which x-coordinate in the beach profile cross section the lat/lon corresponds to - :param df_sites: - :return: - """ - df_sites["profile_x_lat_lon"] = x_val - return df_sites - def parse_waves(waves_mat): """ @@ -403,19 +405,35 @@ def create_waves_csv(waves_mat, sites_csv, output_file): logger.info("Created %s", output_file) +# @click.command(short_help="create profile_features.csv") +# @click.option("--crest-mat", required=True, help=".mat file containing wave records") +# @click.option("--toe-mat", required=True, help=".mat file containing wave records") +# @click.option("--sites-csv", required=True, help=".csv file description of cross section sites") +# @click.option("--output-file", required=True, help="where to save waves.csv") +# def create_profile_features(crest_mat, toe_mat, sites_csv, output_file): +# logger.info("Creating %s", output_file) +# df_sites = pd.read_csv(sites_csv, index_col=[0]) +# df_profile_features = parse_dune_crest_toes(df_sites, crest_mat, toe_mat) +# df_profile_features.to_csv(output_file) +# logger.info("Created %s", output_file) + + @click.command(short_help="create profile_features.csv") -@click.option("--crest-mat", required=True, help=".mat file containing wave records") -@click.option("--toe-mat", required=True, help=".mat file containing wave records") -@click.option("--sites-csv", required=True, help=".csv file description of cross section sites") +@click.option("--profile-features-csv", required=True, help=".mat file containing wave records") +@click.option("--profiles-csv", required=True, help=".mat file containing wave records") @click.option("--output-file", required=True, help="where to save waves.csv") -def create_profile_features(crest_mat, toe_mat, sites_csv, output_file): +def create_crest_toes(profile_features_csv, profiles_csv, output_file): logger.info("Creating %s", output_file) - df_sites = pd.read_csv(sites_csv, index_col=[0]) - df_profile_features = parse_dune_crest_toes(df_sites, crest_mat, toe_mat) - df_profile_features.to_csv(output_file) + + df_raw_features = pd.read_csv(profile_features_csv, index_col=[0]) + df_profiles = pd.read_csv(profiles_csv, index_col=[0,1,2]) + df_crest_toes = parse_crest_toes(df_raw_features, df_profiles) + + df_crest_toes.to_csv(output_file,float_format="%.3f") logger.info("Created %s", output_file) + @click.command(short_help="create profiles.csv") @click.option("--profiles-mat", required=True, help=".mat file containing beach profiles") @click.option("--profiles-output-file", required=True, help="where to save profiles.csv") @@ -432,7 +450,7 @@ def create_sites_and_profiles_csv(profiles_mat, profiles_output_file, sites_outp df_profiles.to_csv(profiles_output_file) logger.info("Created %s", profiles_output_file) - df_sites.to_csv(sites_output_file) + df_sites.to_csv(sites_output_file,float_format="%.3f") logger.info("Created %s", sites_output_file)