@ -7,6 +7,7 @@ from datetime import datetime, timedelta
import pandas as pd
from mat4py import loadmat
import numpy as np
logging . config . fileConfig ( ' ./src/logging.conf ' , disable_existing_loggers = False )
logger = logging . getLogger ( __name__ )
@ -152,6 +153,25 @@ def parse_profiles(profiles_mat):
df = pd . DataFrame ( rows )
return df
def remove_zeros ( df_profiles ) :
"""
When parsing the pre / post storm profiles , the end of some profiles have constant values of zero . Let ' s change
these to NaNs for consistancy . Didn ' t use pandas fillnan because 0 may still be a valid value.
: param df :
: return :
"""
df_profiles = df_profiles . sort_index ( )
groups = df_profiles . groupby ( level = [ ' site_id ' , ' profile_type ' ] )
for key , _ in groups :
logger . debug ( ' Removing zeros from {} profile at {} ' . format ( key [ 1 ] , key [ 0 ] ) )
idx_site = ( df_profiles . index . get_level_values ( ' site_id ' ) == key [ 0 ] ) & \
( df_profiles . index . get_level_values ( ' profile_type ' ) == key [ 1 ] )
df_profile = df_profiles [ idx_site ]
x_last_ele = df_profile [ df_profile . z != 0 ] . index . get_level_values ( ' x ' ) [ - 1 ]
df_profiles . loc [ idx_site & ( df_profiles . index . get_level_values ( ' x ' ) > x_last_ele ) , ' z ' ] = np . nan
return df_profiles
def matlab_datenum_to_datetime ( matlab_datenum ) :
# https://stackoverflow.com/a/13965852
@ -228,6 +248,9 @@ def main():
df_tides . set_index ( [ ' site_id ' , ' datetime ' ] , inplace = True )
df_sites . set_index ( [ ' site_id ' ] , inplace = True )
logger . info ( ' Nanning profile zero elevations ' )
df_profiles = remove_zeros ( df_profiles )
logger . info ( ' Outputting .csv files ' )
df_profiles . to_csv ( ' ./data/interim/profiles.csv ' )
df_tides . to_csv ( ' ./data/interim/tides.csv ' )