Replace zeros with nans in beach profile

In raw beach profile data, the end of some cross-sections can be padded with zero values. This probably shouldn't be the case - if we wanted to assume a zero elevation at these locations, we should do that in whatever calculation, not in the raw data.

The added function will detect these padded zero values and replace them with nans.
master^2
Chris Leaman 6 years ago
parent 8eea9c4393
commit e7d6aa8761

@ -7,6 +7,7 @@ from datetime import datetime, timedelta
import pandas as pd import pandas as pd
from mat4py import loadmat from mat4py import loadmat
import numpy as np
logging.config.fileConfig('./src/logging.conf', disable_existing_loggers=False) logging.config.fileConfig('./src/logging.conf', disable_existing_loggers=False)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -152,6 +153,25 @@ def parse_profiles(profiles_mat):
df = pd.DataFrame(rows) df = pd.DataFrame(rows)
return df return df
def remove_zeros(df_profiles):
"""
When parsing the pre/post storm profiles, the end of some profiles have constant values of zero. Let's change
these to NaNs for consistancy. Didn't use pandas fillnan because 0 may still be a valid value.
:param df:
:return:
"""
df_profiles = df_profiles.sort_index()
groups = df_profiles.groupby(level=['site_id','profile_type'])
for key, _ in groups:
logger.debug('Removing zeros from {} profile at {}'.format(key[1], key[0]))
idx_site = (df_profiles.index.get_level_values('site_id') == key[0]) & \
(df_profiles.index.get_level_values('profile_type') == key[1])
df_profile = df_profiles[idx_site]
x_last_ele = df_profile[df_profile.z!=0].index.get_level_values('x')[-1]
df_profiles.loc[idx_site & (df_profiles.index.get_level_values('x')>x_last_ele), 'z'] = np.nan
return df_profiles
def matlab_datenum_to_datetime(matlab_datenum): def matlab_datenum_to_datetime(matlab_datenum):
# https://stackoverflow.com/a/13965852 # https://stackoverflow.com/a/13965852
@ -228,6 +248,9 @@ def main():
df_tides.set_index(['site_id', 'datetime'], inplace=True) df_tides.set_index(['site_id', 'datetime'], inplace=True)
df_sites.set_index(['site_id'], inplace=True) df_sites.set_index(['site_id'], inplace=True)
logger.info('Nanning profile zero elevations')
df_profiles = remove_zeros(df_profiles)
logger.info('Outputting .csv files') logger.info('Outputting .csv files')
df_profiles.to_csv('./data/interim/profiles.csv') df_profiles.to_csv('./data/interim/profiles.csv')
df_tides.to_csv('./data/interim/tides.csv') df_tides.to_csv('./data/interim/tides.csv')

Loading…
Cancel
Save