From a293d5d6912c0aa449b3a12bbcdcca4b86d14d6c Mon Sep 17 00:00:00 2001 From: Chris Leaman Date: Tue, 19 Mar 2019 14:56:50 +1100 Subject: [PATCH] Fix incorrect calculation of pre/post change point If pre and post storm profiles returned to similar elevations within the sampled profile length, the change point detection would select the most seaward point and incorrectly calculate the swash zone volumes. This change looks at the slope of the difference between the two profiles and assumes that the difference between the two profiles should be increasing at the location of the change point. --- src/analysis/observed_storm_impacts.py | 48 +++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/src/analysis/observed_storm_impacts.py b/src/analysis/observed_storm_impacts.py index 03bd714..d1009f5 100644 --- a/src/analysis/observed_storm_impacts.py +++ b/src/analysis/observed_storm_impacts.py @@ -2,6 +2,9 @@ import click import numpy as np import pandas as pd from scipy.integrate import simps +from scipy.signal import savgol_filter +from scipy.interpolate import interp1d +from numpy import ma as ma from logs import setup_logging from utils import crossings, get_i_or_default @@ -22,6 +25,15 @@ def return_first_or_nan(l): return l[0] +def round_up_to_odd(f): + """ + https://stackoverflow.com/a/31648815 + :param f: + :return: + """ + return int(np.ceil(f) // 2 * 2 + 1) + + def volume_change(df_profiles, df_profile_features, zone): """ Calculates how much the volume change there is between prestrom and post storm profiles. @@ -120,14 +132,40 @@ def volume_change(df_profiles, df_profile_features, zone): df_diff = df_prestorm.merge(df_poststorm, on=["site_id", "x"]) df_diff["z_diff"] = df_diff.z_pre - df_diff.z_post - # Find all locations where the difference in pre and post storm is zero. Take the most seaward location as the - # x location where our profiles are the same. + # Find all locations where the difference in pre and post storm is zero. x_crossings = crossings(df_diff.index.get_level_values("x"), df_diff.z_diff, 0) - if len(x_crossings) != 0: - x_change_point = x_crossings[-1] - else: + # Determine where our pre and post storm profiles begin to change + if len(x_crossings) == 0: + # If no intersections, no change point x_change_point = np.nan + else: + + # If there is an intersection, check that the difference between the prestorm and poststorm profile is increasing. + + # Find the slopes of the df_diff, this is so we can identify segments where the difference is increasing. + # Add to df_diff data frame + valid = ~ma.masked_invalid(df_diff.z_diff).mask + n_valid = sum(valid) + window_length = round_up_to_odd(min(51, n_valid / 2)) + z_diff_slope = savgol_filter( + df_diff.z_diff[valid], window_length, 3, deriv=1 + ) + x_diff_slope = df_diff.index.get_level_values("x")[valid] + df_diff.loc[(site_id, x_diff_slope), "z_diff_slope"] = z_diff_slope + + # Create an interpolated function from the slope + s = interp1d(df_diff.index.get_level_values("x"), df_diff.z_diff_slope) + x_crossings_slopes = s(x_crossings) + + # Only take x_crossings where we have increasing difference in diff slope + x_crossings = [x for x, s in zip(x_crossings, x_crossings_slopes) if s > 0] + + # Take the most seaward location as the x location where our profiles are the same and the difference in slopes is increasing + if len(x_crossings) != 0: + x_change_point = x_crossings[-1] + else: + x_change_point = np.nan # # For debugging # import matplotlib.pyplot as plt