import os import numpy as np import pandas as pd def read_wavestaff_file(csv_name, m=None, b=None, length=2000, raw=False, abs_time=True): """Read data from Wave Logger ASCII data file. The Wave Logger III stores water level observations as unsigned 12-bit integers (uint12), ranging from 0 to 4095. The input variables 'm' and 'b' are the slope and intercept of the function: eta = m * x + b where 'x' is the raw data value. If 'm' and 'b' are not provided, the nominal factory calibration will be applied, based on length of the staff. E.g. if the staff length is 2000 mm, then x = 0 when the probe is dry, and x = 4095 when the depth at the staff is 2 m. This gives: m = 2000 / 4095 = 0.488 Args: csv_name: path to Wave Logger ASCII data file m: calibrated slope (mm / uint12) b: calibrated zero offset (mm) length: length of wave staff probe (mm) raw: return raw uint12 values instead of calibrated units abs_time: use absolute date and time (otherwise use relative seconds) Returns: pandas dataframe of all data blocks """ meta, blocks = get_file_info(csv_name) master = pd.DataFrame() for block in blocks: raw_data = pd.read_csv( csv_name, skiprows=block['start_line'], nrows=block['nrows'], header=None, usecols=range(13)) # Get water level observations (m) val = raw_data.iloc[:, :-1].values.flatten() data = {} if raw: # Use raw values data['eta (raw)'] = val elif bool(m) & bool(b): # Calculate eta based on calibration values data['eta (mm)'] = val * m + b else: # Calculate eta based on staff length data['eta, uncalibrated (mm)'] = val * length / 4095 # Get temperature (degrees C) temperature = raw_data.iloc[:, -1].values / 16 data['temperature (C)'] = np.repeat(temperature, 12) # Generate timestamps dt = 1 / meta['frequency'] t_seconds = np.arange(len(val)) * dt if abs_time: # Add start time if absolute date and time is required t = pd.TimedeltaIndex(t_seconds, 's') + block['start_time'] index_name = 'datetime' else: # Use seconds if relative time is specified t = t_seconds index_name = 'time (s)' # Put all results into dataframe master = master.append(pd.DataFrame(data, index=t)) master.index.name = index_name return master def parse_header(line): """Parse header line from Wave Logger ASCII data file. Args: line: header string from top of file or burst block Returns: start time (datetime object), and metadata (dict) """ descriptions = { 'y': 'year', 'm': 'month', 'd': 'day', 'H': 'hour', 'M': 'minute', 'S': 'second', 'F': 'frequency', 'L': 'burst_length', 'I': 'burst_interval', 'N': 'newfile_interval', 'A': 'scale', 'C': 'cycle_count', 'T': 'logger_type', 'R': 'reserved', } items = [s for s in line.split(',') if s] meta = {} for i, s in enumerate(items): if i < 3: # Use lowercase keys for year, month, day key = s[0].lower() else: # Use uppercase keys for other items key = s[0] # Add metadata value meta[descriptions[key]] = int(s[1:]) # Get start time start_time = pd.datetime( 2000 + meta['year'], meta['month'], meta['day'], meta['hour'], meta['minute'], meta['second'], ) return start_time, meta def get_file_info(csv_name): """Get file info from Wave Logger ASCII data file. Args: csv_name: path to Wave Logger ASCII data file Returns: file metadata (dict), and data blocks (list of dicts) """ blocks = [] # Get start and end lines for all data blocks with open(csv_name, 'r') as f: header, *lines = f.read().splitlines() for i, line in enumerate(lines, start=2): if len(blocks) > 0 & (line.startswith('Y') | (i == len(lines) + 1)): # Add end line and line count for previous block b = blocks[-1] b['end_line'] = i - 2 b['nrows'] = b['end_line'] - b['start_line'] + 1 if line.startswith('Y'): # Add start line for current block block = {} block['start_line'] = i # Get start time for current block block['start_time'] = parse_header(line)[0] blocks.append(block) # Get metadata from file header meta = parse_header(header)[-1] return meta, blocks