Improve performance of TWL forecasting function

Use faster pandas indexing instead of .query function
develop
Chris Leaman 6 years ago
parent 25a26d9e46
commit a9a5e02933

@ -35,10 +35,11 @@ def forecast_twl(
# Estimate foreshore slope. Do the analysis per site_id. This is so we only have to query the x and z
# cross-section profiles once per site.
logger.info("Calculating beach slopes")
site_ids = df_twl.index.get_level_values("site_id").unique()
if slope == "foreshore":
logger.info("Calculating foreshore slopes")
# Process each site_id with a different process and combine results at the end
with Pool(processes=n_processes) as pool:
results = pool.starmap(
@ -48,35 +49,46 @@ def forecast_twl(
df_twl["beta"] = pd.concat(results)
elif slope == "mean":
df_temp = df_twl.join(
df_profile_features.query(
"profile_type=='{}'".format(profile_type)
).reset_index(level="profile_type"),
how="inner",
)
df_temp["mhw"] = 0.5
logger.info("Calculating mean (dune toe to MHW) slopes")
btm_z = 0.5 # m AHD
# When calculating mean slope, we go from the dune toe to mhw. However, in some profiles, the dune toe is not
# defined. In these cases, we should go to the dune crest
df_temp["top_elevation"] = df_temp["dune_toe_z"]
df_temp.loc[df_temp.dune_toe_z.isnull(), "top_elevation"] = df_temp.loc[
df_temp.dune_toe_z.isnull(), "dune_crest_z"
]
df_temp["top_x"] = df_temp["dune_toe_x"]
df_temp.loc[df_temp.dune_toe_x.isnull(), "top_x"] = df_temp.loc[
df_temp.dune_toe_x.isnull(), "dune_crest_x"
]
# defined. In these cases, we should go to the dune crest. Let's make a temporary dataframe which has this
# already calculated.
df_top_ele = df_profile_features.xs(profile_type, level="profile_type").copy()
df_top_ele.loc[:, "top_ele"] = df_top_ele.dune_toe_z
df_top_ele.loc[
df_top_ele.top_ele.isnull().values, "top_ele"
] = df_top_ele.dune_crest_z
n_no_top_ele = len(df_top_ele[df_top_ele.top_ele.isnull()].index)
if n_no_top_ele != 0:
logger.warning(
"{} sites do not have dune toes/crests to calculate mean slope".format(
n_no_top_ele
)
)
with Pool(processes=n_processes) as pool:
results = pool.starmap(
mean_slope_for_site_id,
[
(site_id, df_temp, df_profiles, "top_elevation", "top_x", "mhw")
for site_id in site_ids
],
df_slopes = (
df_profiles.xs(profile_type, level="profile_type")
.dropna(subset=["z"])
.groupby("site_id")
.apply(
lambda x: slope_from_profile(
profile_x=x.index.get_level_values("x").tolist(),
profile_z=x.z.tolist(),
top_elevation=df_top_ele.loc[x.index[0][0], :].top_ele,
btm_elevation=btm_z,
method="least_squares",
)
)
.rename("beta")
.to_frame()
)
# Merge calculated slopes onto each twl timestep
df_twl = df_twl.merge(df_slopes, left_index=True, right_index=True)
df_twl["beta"] = pd.concat(results)
elif slope == "intertidal":
logger.info("Calculating intertidal slopes")

Loading…
Cancel
Save