From 3af90601efa81f059d44afe58fcfa2f8f3541f2e Mon Sep 17 00:00:00 2001
From: Chris Leaman <ckleaman@gmail.com>
Date: Wed, 19 Dec 2018 16:14:27 +1100
Subject: [PATCH] Refactor overwriting dune crest/toes and impacts

Uses one, central .csv file contained in ./data/raw/profile_features_chris_leaman
---
 src/analysis/observed_storm_impacts.py |  27 ++++-
 src/cli.py                             |   6 +-
 src/data/apply_manual_overwrites.py    | 103 ------------------
 src/data/parse_mat.py                  | 144 ++++++++++++++-----------
 4 files changed, 107 insertions(+), 173 deletions(-)
 delete mode 100644 src/data/apply_manual_overwrites.py

diff --git a/src/analysis/observed_storm_impacts.py b/src/analysis/observed_storm_impacts.py
index 1046cd3..fd9f7b4 100644
--- a/src/analysis/observed_storm_impacts.py
+++ b/src/analysis/observed_storm_impacts.py
@@ -148,16 +148,33 @@ def storm_regime(df_observed_impacts):
     return df_observed_impacts
 
 
+def overwrite_impacts(df_observed_impacts, df_raw_features):
+    """
+    Overwrites calculated impacts with impacts manually specified in profile_features file
+    :param df_raw_profile_features:
+    :return:
+    """
+    df_observed_impacts.update(df_raw_features.rename(columns={
+        'observed_storm_regime':'storm_regime'}))
+    return df_observed_impacts
+
+
+
 @click.command()
 @click.option("--profiles-csv", required=True, help="")
-@click.option("--profile-features-csv", required=True, help="")
+@click.option("--profile-features-crest-toes-csv", required=True, help="")
+@click.option("--raw-profile-features-csv", required=True,help="")
 @click.option("--output-file", required=True, help="")
-def create_observed_impacts(profiles_csv, profile_features_csv, output_file):
+def create_observed_impacts(profiles_csv, profile_features_crest_toes_csv, raw_profile_features_csv,output_file):
+
+    profiles_csv = './data/interim/profiles.csv'
+    profile_features_crest_toes_csv= './data/interim/profile_features_crest_toes.csv'
+    raw_profile_features_csv = './data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv'
 
     logger.info("Creating observed wave impacts")
     logger.info("Importing data")
     df_profiles = pd.read_csv(profiles_csv, index_col=[0, 1, 2])
-    df_profile_features = pd.read_csv(profile_features_csv, index_col=[0, 1])
+    df_profile_features = pd.read_csv(profile_features_crest_toes_csv, index_col=[0, 1])
 
     logger.info("Creating new dataframe for observed impacts")
     df_observed_impacts = pd.DataFrame(index=df_profile_features.index.get_level_values("site_id").unique())
@@ -170,6 +187,10 @@ def create_observed_impacts(profiles_csv, profile_features_csv, output_file):
     # Classify regime based on volume changes
     df_observed_impacts = storm_regime(df_observed_impacts)
 
+    # Overwrite storm impacts with manually picked impacts
+    df_raw_features = pd.read_csv(raw_profile_features_csv, index_col=[0])
+    df_observed_impacts = overwrite_impacts(df_observed_impacts, df_raw_features)
+
     # Save dataframe to csv
     df_observed_impacts.to_csv(output_file, float_format="%.4f")
 
diff --git a/src/cli.py b/src/cli.py
index f617459..c290548 100644
--- a/src/cli.py
+++ b/src/cli.py
@@ -10,7 +10,6 @@ import click
 import analysis.forecast_twl as forecast_twl
 import analysis.forecasted_storm_impacts as forecasted_storm_impacts
 import analysis.observed_storm_impacts as observed_storm_impacts
-import data.apply_manual_overwrites as apply_manual_overwrites
 import data.csv_to_geojson as csv_to_geojson
 import data.parse_mat as parse_mat
 
@@ -23,15 +22,14 @@ def cli():
 
 
 if __name__ == "__main__":
-    cli.add_command(apply_manual_overwrites.apply_profile_features_overwrite)
     cli.add_command(csv_to_geojson.impacts_to_geojson)
-    cli.add_command(csv_to_geojson.profile_features_to_geojson)
+    cli.add_command(csv_to_geojson.profile_features_crest_toes_to_geojson)
     cli.add_command(csv_to_geojson.R_high_to_geojson)
     cli.add_command(csv_to_geojson.sites_csv_to_geojson)
     cli.add_command(forecast_twl.create_twl_forecast)
     cli.add_command(forecasted_storm_impacts.create_forecasted_impacts)
     cli.add_command(observed_storm_impacts.create_observed_impacts)
-    cli.add_command(parse_mat.create_profile_features)
+    cli.add_command(parse_mat.create_crest_toes)
     cli.add_command(parse_mat.create_sites_and_profiles_csv)
     cli.add_command(parse_mat.create_tides_csv)
     cli.add_command(parse_mat.create_waves_csv)
diff --git a/src/data/apply_manual_overwrites.py b/src/data/apply_manual_overwrites.py
deleted file mode 100644
index ddcb0c0..0000000
--- a/src/data/apply_manual_overwrites.py
+++ /dev/null
@@ -1,103 +0,0 @@
-"""
-After generating interim data files based on raw data, we may need to overwrite some rows with manual data.
-"""
-
-import pandas as pd
-import numpy as np
-import click
-from logs import setup_logging
-
-logger = setup_logging()
-
-
-def overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite=True):
-    """
-    Overwrite the interim profile features file with an excel file.
-    :param interim_file: Should be './data/interim/profile_features.csv'
-    :param overwrite_file: Should be './data/raw/profile_features_chris_leaman/profile_features_chris_leaman.csv'
-    :param overwrite: Whether or not to overwrite the original interim_file. If false, file will not be written
-    :return:
-    """
-
-    # Merge
-    df_merged = df_interim.merge(df_overwrite, left_index=True, right_index=True, suffixes=["", "_overwrite"])
-
-    # Remove x vals if overwrite file as remove
-    df_merged.loc[df_merged.dune_crest_x_overwrite == "remove", "dune_crest_x"] = np.nan
-    df_merged.loc[df_merged.dune_toe_x_overwrite == "remove", "dune_toe_x"] = np.nan
-
-    # Put in new x vals. Note that a NaN value in the overwrite column, means keep the original value.
-    idx = (df_merged.dune_crest_x_overwrite.notnull()) & (df_merged.dune_crest_x_overwrite != "remove")
-    df_merged.loc[idx, "dune_crest_x"] = df_merged.loc[idx, "dune_crest_x_overwrite"]
-
-    idx = (df_merged.dune_toe_x_overwrite.notnull()) & (df_merged.dune_toe_x_overwrite != "remove")
-    df_merged.loc[idx, "dune_toe_x"] = df_merged.loc[idx, "dune_toe_x_overwrite"]
-
-    # Recalculate z values from x coordinates
-    for site_id in df_merged.index.get_level_values("site_id").unique():
-
-        logger.info("Overwriting dune crest/toes with manual values: {}".format(site_id))
-
-        # Get profiles
-        df_profile = df_profiles.query('site_id=="{}"'.format(site_id))
-
-        for param in ["prestorm", "poststorm"]:
-            for loc in ["crest", "toe"]:
-
-                # Get x value to find corresponding z value
-                x_val = df_merged.loc[(site_id, param), "dune_{}_x".format(loc)]
-                if np.isnan(x_val):
-                    df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = np.nan
-                    continue
-
-                # Get the corresponding z value for our x value
-                query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, param, x_val)
-
-                # Try get the value from the other profile if we return nan or empty dataframe
-                if df_profile.query(query).empty:
-                    if param == "prestorm":
-                        query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "poststorm", x_val)
-                    elif param == "poststorm":
-                        query = 'site_id=="{}" & profile_type=="{}" & x=="{}"'.format(site_id, "prestorm", x_val)
-                    z_val = df_profile.query(query).iloc[0].z
-
-                else:
-                    z_val = df_profile.query(query).iloc[0].z
-
-                # Put results back into merged dataframe
-                df_merged.loc[(site_id, param), "dune_{}_z".format(loc)] = z_val
-
-    # Drop columns
-    df_merged = df_merged.drop(columns=["dune_crest_x_overwrite", "dune_toe_x_overwrite", "comment"], errors="ignore")
-
-    # Merge back into interim data frame. Use concat/duplicates since .update will not update nan values
-    df_final = pd.concat([df_merged, df_interim])
-    df_final = df_final[~df_final.index.duplicated(keep="first")]
-    df_final = df_final.sort_index()
-
-    # Write to file
-    return df_final
-
-
-@click.command(short_help="overwrite profile_features with manual excel sheet")
-@click.option("--interim_file", required=True, help="path of profile_features.csv")
-@click.option("--overwrite_file", required=True, help="path of excel file with overwrite data")
-@click.option("--profile_file", required=True, help="path of profiles.csv")
-@click.option("--overwrite/--no-overwrite", default=True)
-def apply_profile_features_overwrite(interim_file, overwrite_file, profile_file, overwrite):
-    logger.info("Overwriting profile features with manual excel file")
-
-    # Load files
-    df_interim = pd.read_csv(interim_file, index_col=[0, 1])
-    df_overwrite = pd.read_excel(overwrite_file)
-    df_profiles = pd.read_csv(profile_file, index_col=[0, 1, 2])
-    if "site_id" in df_overwrite.columns and "profile_type" in df_overwrite.columns:
-        df_overwrite = df_overwrite.set_index(["site_id", "profile_type"])
-
-    # Replace interim values with overwrite values
-    df_interim = overwrite_profile_features(df_interim, df_overwrite, df_profiles, overwrite)
-
-    # Write to csv
-    df_interim.to_csv(interim_file, float_format="%.3f")
-
-    logger.info("Done!")
diff --git a/src/data/parse_mat.py b/src/data/parse_mat.py
index 56db1a1..e56d669 100644
--- a/src/data/parse_mat.py
+++ b/src/data/parse_mat.py
@@ -17,33 +17,68 @@ from logs import setup_logging
 logger = setup_logging()
 
 
-def parse_orientations(orientations_mat):
+def parse_crest_toes(df_raw_features, df_profiles):
     """
-    Parses the raw orientations.mat file and returns a pandas dataframe. Note that orientations are the direction
-    towards land measured in degrees anti-clockwise from east.
-    :param orientations_mat:
+    Parses profile_features_chris_leaman.csv
+    :param profile_features_csv:
     :return:
     """
-    logger.info("Parsing %s", orientations_mat)
-    mat_data = loadmat(orientations_mat)["output"]
-    rows = []
-    for i in range(0, len(mat_data["beach"])):
-        rows.append(
-            {
-                "beach": mat_data["beach"][i],
-                "orientation": mat_data["orientation"][i],
-                "lat_center": mat_data["lat_center"][i],
-                "lon_center": mat_data["lon_center"][i],
-                "lat_land": mat_data["lat_land"][i],
-                "lon_land": mat_data["lon_land"][i],
-                "lat_sea": mat_data["lat_sea"][i],
-                "lon_sea": mat_data["lon_sea"][i],
-            }
-        )
 
-    df = pd.DataFrame(rows)
-    return df
+    # Puts profiles_features_csv into format expected by rest of analysis
+    df_crest_toes = df_raw_features.reset_index().melt(id_vars=['site_id'],
+                               value_vars=['prestorm_dune_crest_x', 'prestorm_dune_toe_x',
+                                      'poststorm_dune_crest_x', 'poststorm_dune_toe_x'])
+    df_crest_toes['profile_type'] = df_crest_toes.variable.str.extract(r'(prestorm|poststorm)')
+    df_crest_toes['point_type'] = df_crest_toes.variable.str.extract(r'(dune_crest_x|dune_toe_x)')
+    df_crest_toes = df_crest_toes.drop(columns=['variable'])
+    df_crest_toes = df_crest_toes.sort_values('site_id')
+    df_crest_toes = df_crest_toes.set_index(['site_id', 'profile_type', 'point_type'])
+    df_crest_toes = df_crest_toes.unstack()
+    df_crest_toes.columns = df_crest_toes.columns.droplevel()
+
+    # Now let's calculate the corresponding z elevations for each of our x coordinates
+    for site_id in df_crest_toes.index.get_level_values("site_id").unique():
+        logger.info('Calculating dune toe/crest z elevations for {}'.format(site_id))
+
+        # Get profile for this site
+        idx = pd.IndexSlice
+        df_profile = df_profiles.loc[idx[site_id, :,:], :]
+
+        for param in ["prestorm", "poststorm"]:
+            for loc in ["crest", "toe"]:
+
+                # Get x value to find corresponding z value
+                x_val = df_crest_toes.loc[(site_id, param), "dune_{}_x".format(loc)]
+
+                if np.isnan(x_val):
+                    df_crest_toes.loc[(site_id, param), "dune_{}_z".format(loc)] = np.nan
+                    continue
+
+                # Try get the value from the other profile if we return nan or empty dataframe
+                df_z = df_profile.loc[idx[site_id, param, x_val],:]
+                if df_z.empty:
+                    if param == "prestorm":
+                        new_param = 'poststorm'
+                    elif param == "poststorm":
+                        new_param = 'prestorm'
+                    z_val = df_profile.loc[idx[site_id, new_param, x_val],:].z
+                else:
+                    z_val = df_z.z
 
+                # # Try get the value from the other profile if we return nan or empty dataframe
+                # if df_profile.query(query).empty:
+                #     if param == "prestorm":
+                #         query = query.replace('prestorm', 'poststorm')
+                #     elif param == "poststorm":
+                #         query = query.replace('poststorm', 'prestorm')
+                #     z_val = df_profile.query(query).iloc[0].z
+                # else:
+                #     z_val = df_profile.query(query).iloc[0].z
+
+                # Put results back into merged dataframe
+                df_crest_toes.loc[(site_id, param), "dune_{}_z".format(loc)] = z_val
+
+    return df_crest_toes
 
 def parse_dune_crest_toes(df_sites, crest_mat, toe_mat):
     """
@@ -93,39 +128,6 @@ def parse_dune_crest_toes(df_sites, crest_mat, toe_mat):
     return df_profile_features
 
 
-def combine_sites_and_orientaions(df_sites, df_orientations):
-    """
-    Replaces beach/lat/lon columns with the unique site_id.
-    :param dfs:
-    :param df_sites:
-    :return:
-    """
-    df_merged_sites = df_sites.merge(
-        df_orientations[["beach", "lat_center", "lon_center", "orientation"]],
-        left_on=["beach", "lat", "lon"],
-        right_on=["beach", "lat_center", "lon_center"],
-    )
-
-    # Check that all our records have a unique site identifier
-    n_unmatched = len(df_sites) - len(df_merged_sites)
-    if n_unmatched > 0:
-        logger.warning("Not all records (%d of %d) matched with an orientation", n_unmatched, len(df_sites))
-
-    # Drop extra columns
-    df_merged_sites = df_merged_sites.drop(columns=["lat_center", "lon_center"])
-
-    return df_merged_sites
-
-
-def specify_lat_lon_profile_center(df_sites, x_val=200):
-    """
-    Specify which x-coordinate in the beach profile cross section the lat/lon corresponds to
-    :param df_sites:
-    :return:
-    """
-    df_sites["profile_x_lat_lon"] = x_val
-    return df_sites
-
 
 def parse_waves(waves_mat):
     """
@@ -403,19 +405,35 @@ def create_waves_csv(waves_mat, sites_csv, output_file):
     logger.info("Created %s", output_file)
 
 
+# @click.command(short_help="create profile_features.csv")
+# @click.option("--crest-mat", required=True, help=".mat file containing wave records")
+# @click.option("--toe-mat", required=True, help=".mat file containing wave records")
+# @click.option("--sites-csv", required=True, help=".csv file description of cross section sites")
+# @click.option("--output-file", required=True, help="where to save waves.csv")
+# def create_profile_features(crest_mat, toe_mat, sites_csv, output_file):
+#     logger.info("Creating %s", output_file)
+#     df_sites = pd.read_csv(sites_csv, index_col=[0])
+#     df_profile_features = parse_dune_crest_toes(df_sites, crest_mat, toe_mat)
+#     df_profile_features.to_csv(output_file)
+#     logger.info("Created %s", output_file)
+
+
 @click.command(short_help="create profile_features.csv")
-@click.option("--crest-mat", required=True, help=".mat file containing wave records")
-@click.option("--toe-mat", required=True, help=".mat file containing wave records")
-@click.option("--sites-csv", required=True, help=".csv file description of cross section sites")
+@click.option("--profile-features-csv", required=True, help=".mat file containing wave records")
+@click.option("--profiles-csv", required=True, help=".mat file containing wave records")
 @click.option("--output-file", required=True, help="where to save waves.csv")
-def create_profile_features(crest_mat, toe_mat, sites_csv, output_file):
+def create_crest_toes(profile_features_csv, profiles_csv, output_file):
     logger.info("Creating %s", output_file)
-    df_sites = pd.read_csv(sites_csv, index_col=[0])
-    df_profile_features = parse_dune_crest_toes(df_sites, crest_mat, toe_mat)
-    df_profile_features.to_csv(output_file)
+
+    df_raw_features = pd.read_csv(profile_features_csv, index_col=[0])
+    df_profiles = pd.read_csv(profiles_csv, index_col=[0,1,2])
+    df_crest_toes = parse_crest_toes(df_raw_features, df_profiles)
+
+    df_crest_toes.to_csv(output_file,float_format="%.3f")
     logger.info("Created %s", output_file)
 
 
+
 @click.command(short_help="create profiles.csv")
 @click.option("--profiles-mat", required=True, help=".mat file containing beach profiles")
 @click.option("--profiles-output-file", required=True, help="where to save profiles.csv")
@@ -432,7 +450,7 @@ def create_sites_and_profiles_csv(profiles_mat, profiles_output_file, sites_outp
 
     df_profiles.to_csv(profiles_output_file)
     logger.info("Created %s", profiles_output_file)
-    df_sites.to_csv(sites_output_file)
+    df_sites.to_csv(sites_output_file,float_format="%.3f")
     logger.info("Created %s", sites_output_file)