From 965dcf2ec07e071380f824dff6d900fbc46fd463 Mon Sep 17 00:00:00 2001
From: Chris Leaman <ckleaman@gmail.com>
Date: Mon, 29 Apr 2019 14:06:25 +1000
Subject: [PATCH] Optimize calculation of change in volume

We now calculate the where the pre-storm and post-storm profiles change a bit more robustly.
---
 src/analysis/observed_storm_impacts.py | 194 ++++++++++++++++++++-----
 1 file changed, 157 insertions(+), 37 deletions(-)

diff --git a/src/analysis/observed_storm_impacts.py b/src/analysis/observed_storm_impacts.py
index 320610d..66f681f 100644
--- a/src/analysis/observed_storm_impacts.py
+++ b/src/analysis/observed_storm_impacts.py
@@ -5,6 +5,7 @@ from scipy.integrate import simps
 from scipy.signal import savgol_filter
 from scipy.interpolate import interp1d
 from numpy import ma as ma
+from itertools import groupby
 
 from tqdm import tqdm
 from logs import setup_logging
@@ -56,59 +57,154 @@ def volume_change(df_profiles, df_profile_features):
 
         params = {}
 
+        # Calculate pre and post storm volume seawards of our profile change point
+        # and store in dictionary and dataframe.
+        df_prestorm = df_profiles.loc[(site_id, "prestorm")]
+        df_poststorm = df_profiles.loc[(site_id, "poststorm")]
+
+        # Calculate total subaerial volume change
+        # Calculate difference between pre and post storm profiles
+        z_diff = (
+            df_profiles.loc[(site_id, "poststorm")].z
+            - df_profiles.loc[(site_id, "prestorm")].z
+        )
+
+        # There are no common points between pre and poststorm values, so we have to
+        # skip this
+        if z_diff.dropna().size == 0:
+            continue
+
+        # # # Debug
+        # import matplotlib.pyplot as plt
+        # plt.plot(z_diff)
+        # plt.plot(df_profiles.loc[(site_id, "prestorm")].z)
+        # plt.plot(df_profiles.loc[(site_id, "poststorm")].z)
+        # plt.show()
+
+        # First, find locations where we have a good match between pre and post storm
+        # profiles.
+        lidar_accuracy = 0.2  # m
+        good_match = start_stop(
+            (abs(z_diff) < lidar_accuracy).values, trigger_val=True, len_thresh=20
+        )
+
+        # If entire profile has changed (usually at lagoon entrance), take change
+        # point as the first x-coord where we have both pre and poststorm profiles
+        if good_match.size == 0:
+            x_change_point = min(
+                set(df_prestorm.z.dropna().index.get_level_values("x"))
+                & set(df_poststorm.z.dropna().index.get_level_values("x"))
+            )
+            z_change_point = df_prestorm.loc[x_change_point].z
+
+        else:
+
+            # Minimum idx_change_points should be the first place where we have a good match
+            idx_change_point_min = good_match[0][0]
+
+            # Identify locations where z_diff is negative, i.e. profile has been
+            # eroded by the storm. Then group them by the number of consecutive
+            # values.
+            grouped = start_stop((z_diff < 0).values, trigger_val=True, len_thresh=1)
+
+            # Sort by streaklength, then get the start index of the longest streak
+            # of true values. x_change_point is then the x-coordinate where our pre and
+            # post storm profiles start to change.
+            idx_change_points = sorted(
+                [x for x in grouped if x[0] > idx_change_point_min],
+                key=lambda x: x[1] - x[0],
+                reverse=True,
+            )
+
+            if len(idx_change_points) == 0:
+                continue
+            else:
+                idx_change_point = idx_change_points[0][0]
+                x_change_point = z_diff.index[idx_change_point]
+                z_change_point = df_prestorm.loc[x_change_point].z
+
+        # Landward of the change point, set difference in pre/post storm profiles
+        # equal to zero
+        z_diff[z_diff.index < x_change_point] = 0
+
+        params["prestorm_total_subaerial_vol"] = beach_volume(
+            x=df_prestorm.index.get_level_values("x"),
+            z=df_prestorm.z.values,
+            x_min=x_change_point,
+        )
+        params["poststorm_total_subaerial_vol"] = beach_volume(
+            x=df_poststorm.index.get_level_values("x"),
+            z=df_poststorm.z.values,
+            x_min=x_change_point,
+        )
+        params["total_vol_change"] = (
+            params["poststorm_total_subaerial_vol"]
+            - params["prestorm_total_subaerial_vol"]
+        )
+        params["x_change_point"] = x_change_point
+        params["z_change_point"] = z_change_point
+
+        df_vol_changes.loc[site_id, "total_vol_change"] = params["total_vol_change"]
+        df_vol_changes.loc[site_id, "x_change_point"] = params["x_change_point"]
+        df_vol_changes.loc[site_id, "z_change_point"] = params["z_change_point"]
+
         for zone in ["dune", "swash"]:
             params[zone] = {}
 
             for profile_type in ["prestorm", "poststorm"]:
-                params[zone][profile_type] = {}
+
+                # Store zone/profile_type results in a dictionary, then append at the
+                # end to the params dictionary.
+                d = {}
+
+                # Get variables, this helps simplify the below code
+                df_profile_feature = df_profile_features.loc[(site_id, profile_type)]
+                df_profile = df_profiles.loc[(site_id, profile_type)]
 
                 # Define the edges of the swash and dunes where we want to calculate subaeraial volume.
                 if zone == "swash":
-                    params[zone][profile_type]["x_min"] = df_profile_features.loc[
-                        (site_id, profile_type)
-                    ].dune_toe_x
-                    params[zone][profile_type]["x_max"] = max(
-                        df_profiles.loc[(site_id, profile_type)].index.get_level_values(
-                            "x"
-                        )
-                    )
+                    d["x_min"] = df_profile_feature.dune_toe_x
+                    d["x_max"] = max(df_profile.index.get_level_values("x"))
 
                     # For profiles with no Dlow value, we take Dhigh as the minimum value to calculate swash
-                    if np.isnan(params[zone][profile_type]["x_min"]):
-                        params[zone][profile_type]["x_min"] = df_profile_features.loc[
-                            (site_id, profile_type)
-                        ].dune_crest_x
+                    if np.isnan(d["x_min"]):
+                        d["x_min"] = df_profile_feature.dune_crest_x
 
                 elif zone == "dune":
-                    params[zone][profile_type]["x_min"] = df_profile_features.loc[
-                        (site_id, profile_type)
-                    ].dune_crest_x
-                    params[zone][profile_type]["x_max"] = df_profile_features.loc[
-                        (site_id, profile_type)
-                    ].dune_toe_x
+                    d["x_min"] = df_profile_feature.dune_crest_x
+                    d["x_max"] = df_profile_feature.dune_toe_x
 
                 # For profiles with no Dlow value, the dune is undefined and we cannot calculate a dune volume.
 
                 # Calculate subaerial volume based on our x min and maxes
-                params[zone][profile_type]["subaerial_vol"] = beach_volume(
-                    x=df_profiles.loc[(site_id, profile_type)].index.get_level_values(
-                        "x"
-                    ),
-                    z=df_profiles.loc[(site_id, profile_type)].z.values,
-                    x_min=params[zone][profile_type]["x_min"],
-                    x_max=params[zone][profile_type]["x_max"],
+                d["subaerial_vol"] = beach_volume(
+                    x=df_profile.index.get_level_values("x"),
+                    z=df_profile.z.values,
+                    x_min=d["x_min"],
+                    x_max=d["x_max"],
                 )
 
-            # TODO Landward limit still needs to be incorporated
+                params[zone][profile_type] = d
 
-            params[zone]["vol_change"] = (
-                params[zone]["poststorm"]["subaerial_vol"]
-                - params[zone]["prestorm"]["subaerial_vol"]
+            # Calculate change in volumes. Use the z_diff array which has been
+            # zero'ed out landward of the x_change_point
+            params[zone]["vol_change"] = beach_volume(
+                x=z_diff.index.values,
+                z=z_diff.values,
+                x_min=params[zone]["prestorm"]["x_min"],
+                x_max=params[zone]["prestorm"]["x_max"],
             )
+
             params[zone]["pct_change"] = (
                 params[zone]["vol_change"] / params[zone]["prestorm"]["subaerial_vol"]
             )
 
+            # params[zone]["vol_loss"] = (params[zone]["prestorm"]["subaerial_vol"] -
+            #                             params[zone]["poststorm"]["subaerial_vol"])
+            # params[zone]["pct_loss"] = \
+            #     params[zone]["vol_loss"] / params[zone]["prestorm"]["subaerial_vol"]
+
+            # Save results in our data frame
             df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = params[zone][
                 "prestorm"
             ]["subaerial_vol"]
@@ -121,7 +217,6 @@ def volume_change(df_profiles, df_profile_features):
             df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = params[zone][
                 "pct_change"
             ]
-
     return df_vol_changes
 
 
@@ -153,12 +248,8 @@ def storm_regime(df_observed_impacts):
     """
     logger.info("Getting observed storm regimes")
 
-    swash = (df_observed_impacts.dune_pct_change <= 2) & (
-        df_observed_impacts.dune_vol_change <= 3
-    )
-    collision = (df_observed_impacts.dune_pct_change >= 2) | (
-        df_observed_impacts.dune_vol_change > 3
-    )
+    swash = df_observed_impacts.dune_vol_change > -3
+    collision = df_observed_impacts.dune_vol_change < -3
 
     df_observed_impacts.loc[swash, "storm_regime"] = "swash"
     df_observed_impacts.loc[collision, "storm_regime"] = "collision"
@@ -344,3 +435,32 @@ def get_beach_width(df_profile_features, df_profiles, profile_type, ele, col_nam
     ).dune_toe_x
     df_width = (df_x_position - df_x_prestorm_dune_toe).rename(col_name)
     return df_width
+
+
+def start_stop(a, trigger_val, len_thresh=2):
+    """
+    https://stackoverflow.com/a/51259253
+
+    In [47]: myArray
+    Out[47]: array([1, 1, 0, 2, 0, 1, 1, 1, 1, 0, 0, 1, 2, 1, 1, 1])
+
+    In [48]: start_stop(myArray, trigger_val=1, len_thresh=2)
+    Out[48]:
+    array([[ 5,  8],
+            [13, 15]])
+
+    :param a:
+    :param trigger_val:
+    :param len_thresh:
+    :return:
+    """
+    # "Enclose" mask with sentients to catch shifts later on
+    mask = np.r_[False, np.equal(a, trigger_val), False]
+
+    # Get the shifting indices
+    idx = np.flatnonzero(mask[1:] != mask[:-1])
+
+    # Get lengths
+    lens = idx[1::2] - idx[::2]
+
+    return idx.reshape(-1, 2)[lens > len_thresh] - [0, 1]