You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

177 lines
5.0 KiB
Python

import os
import numpy as np
import pandas as pd
def read_wavestaff_file(csv_name,
m=None,
b=None,
length=2000,
raw=False,
abs_time=True):
"""Read data from Wave Logger ASCII data file.
The Wave Logger III stores water level observations as unsigned 12-bit
integers (uint12), ranging from 0 to 4095. The input variables 'm' and 'b'
are the slope and intercept of the function:
eta = m * x + b
where 'x' is the raw data value. If 'm' and 'b' are not provided, the
nominal factory calibration will be applied, based on length of the staff.
E.g. if the staff length is 2000 mm, then x = 0 when the probe is dry, and
x = 4095 when the depth at the staff is 2 m. This gives:
m = 2000 / 4095
= 0.488
Args:
csv_name: path to Wave Logger ASCII data file
m: calibrated slope (mm / uint12)
b: calibrated zero offset (mm)
length: length of wave staff probe (mm)
raw: return raw uint12 values instead of calibrated units
abs_time: use absolute date and time (otherwise use relative seconds)
Returns:
pandas dataframe of all data blocks
"""
meta, blocks = get_file_info(csv_name)
master = pd.DataFrame()
for block in blocks:
raw_data = pd.read_csv(
csv_name,
skiprows=block['start_line'],
nrows=block['nrows'],
header=None,
usecols=range(13))
# Get water level observations (m)
val = raw_data.iloc[:, :-1].values.flatten()
data = {}
if raw:
# Use raw values
data['eta (raw)'] = val
elif bool(m) & bool(b):
# Calculate eta based on calibration values
data['eta (mm)'] = val * m + b
else:
# Calculate eta based on staff length
data['eta, uncalibrated (mm)'] = val * length / 4095
# Get temperature (degrees C)
temperature = raw_data.iloc[:, -1].values / 16
data['temperature (C)'] = np.repeat(temperature, 12)
# Generate timestamps
dt = 1 / meta['frequency']
t_seconds = np.arange(len(val)) * dt
if abs_time:
# Add start time if absolute date and time is required
t = pd.TimedeltaIndex(t_seconds, 's') + block['start_time']
index_name = 'datetime'
else:
# Use seconds if relative time is specified
t = t_seconds
index_name = 'time (s)'
# Put all results into dataframe
master = master.append(pd.DataFrame(data, index=t))
master.index.name = index_name
return master
def parse_header(line):
"""Parse header line from Wave Logger ASCII data file.
Args:
line: header string from top of file or burst block
Returns:
start time (datetime object), and metadata (dict)
"""
descriptions = {
'y': 'year',
'm': 'month',
'd': 'day',
'H': 'hour',
'M': 'minute',
'S': 'second',
'F': 'frequency',
'L': 'burst_length',
'I': 'burst_interval',
'N': 'newfile_interval',
'A': 'scale',
'C': 'cycle_count',
'T': 'logger_type',
'R': 'reserved',
}
items = [s for s in line.split(',') if s]
meta = {}
for i, s in enumerate(items):
if i < 3:
# Use lowercase keys for year, month, day
key = s[0].lower()
else:
# Use uppercase keys for other items
key = s[0]
# Add metadata value
meta[descriptions[key]] = int(s[1:])
# Get start time
start_time = pd.datetime(
2000 + meta['year'],
meta['month'],
meta['day'],
meta['hour'],
meta['minute'],
meta['second'],
)
return start_time, meta
def get_file_info(csv_name):
"""Get file info from Wave Logger ASCII data file.
Args:
csv_name: path to Wave Logger ASCII data file
Returns:
file metadata (dict), and data blocks (list of dicts)
"""
blocks = []
# Get start and end lines for all data blocks
with open(csv_name, 'r') as f:
header, *lines = f.read().splitlines()
for i, line in enumerate(lines, start=2):
if len(blocks) > 0 & (line.startswith('Y') |
(i == len(lines) + 1)):
# Add end line and line count for previous block
b = blocks[-1]
b['end_line'] = i - 2
b['nrows'] = b['end_line'] - b['start_line'] + 1
if line.startswith('Y'):
# Add start line for current block
block = {}
block['start_line'] = i
# Get start time for current block
block['start_time'] = parse_header(line)[0]
blocks.append(block)
# Get metadata from file header
meta = parse_header(header)[-1]
return meta, blocks