commit 65119499ae6c15f27f248507ce071bd890a4d5f0 Author: Dan Howe Date: Fri Apr 12 11:36:14 2019 +1000 Add 'wavestaff.py' diff --git a/wavestaff/wavestaff.py b/wavestaff/wavestaff.py new file mode 100644 index 0000000..84e2e4d --- /dev/null +++ b/wavestaff/wavestaff.py @@ -0,0 +1,176 @@ +import os +import numpy as np +import pandas as pd + + +def read_wavestaff_file(csv_name, + m=None, + b=None, + length=2000, + raw=False, + abs_time=True): + """Read data from Wave Logger ASCII data file. + + The Wave Logger III stores water level observations as unsigned 12-bit + integers (uint12), ranging from 0 to 4095. The input variables 'm' and 'b' + are the slope and intercept of the function: + + eta = m * x + b + + where 'x' is the raw data value. If 'm' and 'b' are not provided, the + nominal factory calibration will be applied, based on length of the staff. + E.g. if the staff length is 2000 mm, then x = 0 when the probe is dry, and + x = 4095 when the depth at the staff is 2 m. This gives: + + m = 2000 / 4095 + = 0.488 + + Args: + csv_name: path to Wave Logger ASCII data file + m: calibrated slope (mm / uint12) + b: calibrated zero offset (mm) + length: length of wave staff probe (mm) + raw: return raw uint12 values instead of calibrated units + abs_time: use absolute date and time (otherwise use relative seconds) + + Returns: + pandas dataframe of all data blocks + """ + + meta, blocks = get_file_info(csv_name) + + master = pd.DataFrame() + for block in blocks: + raw_data = pd.read_csv( + csv_name, + skiprows=block['start_line'], + nrows=block['nrows'], + header=None, + usecols=range(13)) + + # Get water level observations (m) + val = raw_data.iloc[:, :-1].values.flatten() + + data = {} + if raw: + # Use raw values + data['eta (raw)'] = val + elif bool(m) & bool(b): + # Calculate eta based on calibration values + data['eta (mm)'] = val * m + b + else: + # Calculate eta based on staff length + data['eta, uncalibrated (mm)'] = val * length / 4095 + + # Get temperature (degrees C) + temperature = raw_data.iloc[:, -1].values / 16 + data['temperature (C)'] = np.repeat(temperature, 12) + + # Generate timestamps + dt = 1 / meta['frequency'] + t_seconds = np.arange(len(val)) * dt + + if abs_time: + # Add start time if absolute date and time is required + t = pd.TimedeltaIndex(t_seconds, 's') + block['start_time'] + index_name = 'datetime' + + else: + # Use seconds if relative time is specified + t = t_seconds + index_name = 'time (s)' + + # Put all results into dataframe + master = master.append(pd.DataFrame(data, index=t)) + master.index.name = index_name + + return master + + +def parse_header(line): + """Parse header line from Wave Logger ASCII data file. + + Args: + line: header string from top of file or burst block + + Returns: + start time (datetime object), and metadata (dict) + """ + + descriptions = { + 'y': 'year', + 'm': 'month', + 'd': 'day', + 'H': 'hour', + 'M': 'minute', + 'S': 'second', + 'F': 'frequency', + 'L': 'burst_length', + 'I': 'burst_interval', + 'N': 'newfile_interval', + 'A': 'scale', + 'C': 'cycle_count', + 'T': 'logger_type', + 'R': 'reserved', + } + + items = [s for s in line.split(',') if s] + meta = {} + for i, s in enumerate(items): + if i < 3: + # Use lowercase keys for year, month, day + key = s[0].lower() + else: + # Use uppercase keys for other items + key = s[0] + + # Add metadata value + meta[descriptions[key]] = int(s[1:]) + + # Get start time + start_time = pd.datetime( + 2000 + meta['year'], + meta['month'], + meta['day'], + meta['hour'], + meta['minute'], + meta['second'], + ) + + return start_time, meta + + +def get_file_info(csv_name): + """Get file info from Wave Logger ASCII data file. + + Args: + csv_name: path to Wave Logger ASCII data file + + Returns: + file metadata (dict), and data blocks (list of dicts) + """ + + blocks = [] + # Get start and end lines for all data blocks + with open(csv_name, 'r') as f: + header, *lines = f.read().splitlines() + for i, line in enumerate(lines, start=2): + if len(blocks) > 0 & (line.startswith('Y') | + (i == len(lines) + 1)): + # Add end line and line count for previous block + b = blocks[-1] + b['end_line'] = i - 2 + b['nrows'] = b['end_line'] - b['start_line'] + 1 + + if line.startswith('Y'): + # Add start line for current block + block = {} + block['start_line'] = i + # Get start time for current block + block['start_time'] = parse_header(line)[0] + blocks.append(block) + + # Get metadata from file header + meta = parse_header(header)[-1] + + return meta, blocks