From f616f56fd3809e1b9bcdf2eb29212fa091735887 Mon Sep 17 00:00:00 2001
From: Chris Leaman <ckleaman@gmail.com>
Date: Tue, 26 Mar 2019 15:47:11 +1100
Subject: [PATCH] Optimize volume change calculations

---
 src/analysis/observed_storm_impacts.py | 238 ++++++++-----------------
 1 file changed, 76 insertions(+), 162 deletions(-)

diff --git a/src/analysis/observed_storm_impacts.py b/src/analysis/observed_storm_impacts.py
index d1009f5..320610d 100644
--- a/src/analysis/observed_storm_impacts.py
+++ b/src/analysis/observed_storm_impacts.py
@@ -6,6 +6,7 @@ from scipy.signal import savgol_filter
 from scipy.interpolate import interp1d
 from numpy import ma as ma
 
+from tqdm import tqdm
 from logs import setup_logging
 from utils import crossings, get_i_or_default
 from analysis.forecast_twl import get_mean_slope, get_intertidal_slope
@@ -34,175 +35,92 @@ def round_up_to_odd(f):
     return int(np.ceil(f) // 2 * 2 + 1)
 
 
-def volume_change(df_profiles, df_profile_features, zone):
+def volume_change(df_profiles, df_profile_features):
     """
     Calculates how much the volume change there is between prestrom and post storm profiles.
     :param df_profiles:
     :param df_profile_features:
-    :param zone: Either 'swash' or 'dune_face'
     :return:
     """
-    logger.info("Calculating change in beach volume in {} zone".format(zone))
+    logger.info("Calculating change in beach volume")
 
     df_vol_changes = pd.DataFrame(
         index=df_profile_features.index.get_level_values("site_id").unique()
     )
+
+    df_profiles = df_profiles.dropna(subset=["z"])
     df_profiles = df_profiles.sort_index()
     sites = df_profiles.groupby(level=["site_id"])
 
-    for site_id, df_site in sites:
-        logger.debug(
-            "Calculating change in beach volume at {} in {} zone".format(site_id, zone)
-        )
-
-        prestorm_row = df_profile_features.loc[(site_id, "prestorm")]
-        prestorm_dune_toe_x = prestorm_row.dune_toe_x
-        prestorm_dune_crest_x = prestorm_row.dune_crest_x
-
-        # If no dune toe has been defined, Dlow = Dhigh. Refer to Sallenger (2000).
-        if np.isnan(prestorm_dune_toe_x):
-            prestorm_dune_toe_x = prestorm_dune_crest_x
-
-        # If no prestorm and poststorm profiles, skip site and continue
-        profile_lengths = [
-            len(df_site.xs(x, level="profile_type")) for x in ["prestorm", "poststorm"]
-        ]
-        if any([length == 0 for length in profile_lengths]):
-            continue
-
-        # Find last x coordinate where we have both prestorm and poststorm measurements. If we don't do this,
-        # the prestorm and poststorm values are going to be calculated over different lengths.
-        df_zone = df_site.dropna(subset=["z"])
-        x_last_obs = min(
-            [
-                max(
-                    df_zone.query(
-                        "profile_type == '{}'".format(profile_type)
-                    ).index.get_level_values("x")
-                )
-                for profile_type in ["prestorm", "poststorm"]
-            ]
-        )
-        x_first_obs = max(
-            [
-                min(
-                    df_zone.query(
-                        "profile_type == '{}'".format(profile_type)
-                    ).index.get_level_values("x")
+    for site_id, df_site in tqdm(sites):
+
+        params = {}
+
+        for zone in ["dune", "swash"]:
+            params[zone] = {}
+
+            for profile_type in ["prestorm", "poststorm"]:
+                params[zone][profile_type] = {}
+
+                # Define the edges of the swash and dunes where we want to calculate subaeraial volume.
+                if zone == "swash":
+                    params[zone][profile_type]["x_min"] = df_profile_features.loc[
+                        (site_id, profile_type)
+                    ].dune_toe_x
+                    params[zone][profile_type]["x_max"] = max(
+                        df_profiles.loc[(site_id, profile_type)].index.get_level_values(
+                            "x"
+                        )
+                    )
+
+                    # For profiles with no Dlow value, we take Dhigh as the minimum value to calculate swash
+                    if np.isnan(params[zone][profile_type]["x_min"]):
+                        params[zone][profile_type]["x_min"] = df_profile_features.loc[
+                            (site_id, profile_type)
+                        ].dune_crest_x
+
+                elif zone == "dune":
+                    params[zone][profile_type]["x_min"] = df_profile_features.loc[
+                        (site_id, profile_type)
+                    ].dune_crest_x
+                    params[zone][profile_type]["x_max"] = df_profile_features.loc[
+                        (site_id, profile_type)
+                    ].dune_toe_x
+
+                # For profiles with no Dlow value, the dune is undefined and we cannot calculate a dune volume.
+
+                # Calculate subaerial volume based on our x min and maxes
+                params[zone][profile_type]["subaerial_vol"] = beach_volume(
+                    x=df_profiles.loc[(site_id, profile_type)].index.get_level_values(
+                        "x"
+                    ),
+                    z=df_profiles.loc[(site_id, profile_type)].z.values,
+                    x_min=params[zone][profile_type]["x_min"],
+                    x_max=params[zone][profile_type]["x_max"],
                 )
-                for profile_type in ["prestorm", "poststorm"]
-            ]
-        )
 
-        # Where we want to measure pre and post storm volume is dependant on the zone selected
-        if zone == "swash":
-            x_min = max(prestorm_dune_toe_x, x_first_obs)
-            x_max = x_last_obs
-        elif zone == "dune_face":
-            x_min = max(prestorm_dune_crest_x, x_first_obs)
-            x_max = min(prestorm_dune_toe_x, x_last_obs)
-        else:
-            logger.warning("Zone argument not properly specified. Please check")
-            x_min = None
-            x_max = None
-
-        # Now, compute the volume of sand between the x-coordinates prestorm_dune_toe_x and x_swash_last for both prestorm
-        # and post storm profiles.
-        prestorm_vol = beach_volume(
-            x=df_zone.query("profile_type=='prestorm'").index.get_level_values("x"),
-            z=df_zone.query("profile_type=='prestorm'").z,
-            x_min=x_min,
-            x_max=x_max,
-        )
-        poststorm_vol = beach_volume(
-            x=df_zone.query("profile_type=='poststorm'").index.get_level_values("x"),
-            z=df_zone.query("profile_type=='poststorm'").z,
-            x_min=x_min,
-            x_max=x_max,
-        )
+            # TODO Landward limit still needs to be incorporated
 
-        # Identify the x location where our pre and post storm profiles start to differ. This is so changes no due to
-        # the storm are not included when calculating volume.
-        df_prestorm = (
-            df_site.xs("prestorm", level="profile_type").z.rename("z_pre").to_frame()
-        )
-        df_poststorm = (
-            df_site.xs("poststorm", level="profile_type").z.rename("z_post").to_frame()
-        )
-        df_diff = df_prestorm.merge(df_poststorm, on=["site_id", "x"])
-        df_diff["z_diff"] = df_diff.z_pre - df_diff.z_post
-
-        # Find all locations where the difference in pre and post storm is zero.
-        x_crossings = crossings(df_diff.index.get_level_values("x"), df_diff.z_diff, 0)
-
-        # Determine where our pre and post storm profiles begin to change
-        if len(x_crossings) == 0:
-            # If no intersections, no change point
-            x_change_point = np.nan
-        else:
-
-            # If there is an intersection, check that the difference between the prestorm and poststorm profile is increasing.
-
-            # Find the slopes of the df_diff, this is so we can identify segments where the difference is increasing.
-            # Add to df_diff data frame
-            valid = ~ma.masked_invalid(df_diff.z_diff).mask
-            n_valid = sum(valid)
-            window_length = round_up_to_odd(min(51, n_valid / 2))
-            z_diff_slope = savgol_filter(
-                df_diff.z_diff[valid], window_length, 3, deriv=1
+            params[zone]["vol_change"] = (
+                params[zone]["poststorm"]["subaerial_vol"]
+                - params[zone]["prestorm"]["subaerial_vol"]
+            )
+            params[zone]["pct_change"] = (
+                params[zone]["vol_change"] / params[zone]["prestorm"]["subaerial_vol"]
             )
-            x_diff_slope = df_diff.index.get_level_values("x")[valid]
-            df_diff.loc[(site_id, x_diff_slope), "z_diff_slope"] = z_diff_slope
-
-            # Create an interpolated function from the slope
-            s = interp1d(df_diff.index.get_level_values("x"), df_diff.z_diff_slope)
-            x_crossings_slopes = s(x_crossings)
-
-            # Only take x_crossings where we have increasing difference in diff slope
-            x_crossings = [x for x, s in zip(x_crossings, x_crossings_slopes) if s > 0]
-
-            # Take the most seaward location as the x location where our profiles are the same and the difference in slopes is increasing
-            if len(x_crossings) != 0:
-                x_change_point = x_crossings[-1]
-            else:
-                x_change_point = np.nan
-
-        # # For debugging
-        # import matplotlib.pyplot as plt
-        # f,(ax1,ax2) = plt.subplots(2,1,sharex=True)
-        # ax1.plot(df_prestorm.index.get_level_values('x'), df_prestorm.z_pre,label='prestorm')
-        # ax1.plot(df_poststorm.index.get_level_values('x'), df_poststorm.z_post,label='poststorm')
-        # ax1.axvline(x_crossings[-1], color='red', linestyle='--', linewidth=0.5, label='Change point')
-        # ax1.legend()
-        # ax1.set_title(site_id)
-        # ax1.set_ylabel('elevation (m AHD)')
-        # ax2.plot(df_diff.index.get_level_values('x'), df_diff.z_diff)
-        # ax2.set_xlabel('x coordinate (m)')
-        # ax2.set_ylabel('elevation diff (m)')
-        # ax2.axvline(x_crossings[-1],color='red',linestyle='--',linewidth=0.5)
-        # plt.show()
-
-        diff_vol = beach_volume(
-            x=df_diff.index.get_level_values("x"),
-            z=df_diff.z_diff,
-            x_min=np.nanmax([x_min, x_change_point]),
-            x_max=np.nanmax([x_max, x_change_point]),
-        )
-
-        # Here, if cannot calculate the difference volume, assume no volume change
-        if np.isnan(diff_vol):
-            diff_vol = 0
-
-        # Base pct change on diff volume
-        if diff_vol == 0:
-            pct_change = 0
-        else:
-            pct_change = diff_vol / prestorm_vol * 100
 
-        df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = prestorm_vol
-        df_vol_changes.loc[site_id, "poststorm_{}_vol".format(zone)] = poststorm_vol
-        df_vol_changes.loc[site_id, "{}_vol_change".format(zone)] = diff_vol
-        df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = pct_change
+            df_vol_changes.loc[site_id, "prestorm_{}_vol".format(zone)] = params[zone][
+                "prestorm"
+            ]["subaerial_vol"]
+            df_vol_changes.loc[site_id, "poststorm_{}_vol".format(zone)] = params[zone][
+                "poststorm"
+            ]["subaerial_vol"]
+            df_vol_changes.loc[site_id, "{}_vol_change".format(zone)] = params[zone][
+                "vol_change"
+            ]
+            df_vol_changes.loc[site_id, "{}_pct_change".format(zone)] = params[zone][
+                "pct_change"
+            ]
 
     return df_vol_changes
 
@@ -235,11 +153,11 @@ def storm_regime(df_observed_impacts):
     """
     logger.info("Getting observed storm regimes")
 
-    swash = (df_observed_impacts.dune_face_pct_change <= 2) & (
-        df_observed_impacts.dune_face_vol_change <= 3
+    swash = (df_observed_impacts.dune_pct_change <= 2) & (
+        df_observed_impacts.dune_vol_change <= 3
     )
-    collision = (df_observed_impacts.dune_face_pct_change >= 2) | (
-        df_observed_impacts.dune_face_vol_change > 3
+    collision = (df_observed_impacts.dune_pct_change >= 2) | (
+        df_observed_impacts.dune_vol_change > 3
     )
 
     df_observed_impacts.loc[swash, "storm_regime"] = "swash"
@@ -296,13 +214,9 @@ def create_observed_impacts(
 
     # TODO Review volume change with changing dune toe/crests
     logger.info("Getting pre/post storm volumes")
-    df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone="swash")
-    df_dune_face_vol_changes = volume_change(
-        df_profiles, df_profile_features, zone="dune_face"
-    )
-    df_observed_impacts = df_observed_impacts.join(
-        [df_swash_vol_changes, df_dune_face_vol_changes]
-    )
+
+    df_vol_changes = volume_change(df_profiles, df_profile_features)
+    df_observed_impacts = df_observed_impacts.join(df_vol_changes)
 
     # Classify regime based on volume changes
     df_observed_impacts = storm_regime(df_observed_impacts)