python-snippets/wavestaff/wavestaff.py

import os
import numpy as np
import pandas as pd


def read_wavestaff_file(csv_name,
                        m=None,
                        b=None,
                        length=2000,
                        raw=False,
                        abs_time=True):
    """Read data from Wave Logger ASCII data file.

    The Wave Logger III stores water level observations as unsigned 12-bit
    integers (uint12), ranging from 0 to 4095. The input variables 'm' and 'b'
    are the slope and intercept of the function:

        eta = m * x + b

    where 'x' is the raw data value. If 'm' and 'b' are not provided, the
    nominal factory calibration will be applied, based on length of the staff.
    E.g. if the staff length is 2000 mm, then x = 0 when the probe is dry, and
    x = 4095 when the depth at the staff is 2 m. This gives:

        m = 2000 / 4095
          = 0.488

    Args:
        csv_name:  path to  Wave Logger ASCII data file
        m:         calibrated slope (mm / uint12)
        b:         calibrated zero offset (mm)
        length:    length of wave staff probe (mm)
        raw:       return raw uint12 values instead of calibrated units
        abs_time:  use absolute date and time (otherwise use relative seconds)

    Returns:
        pandas dataframe of all data blocks
    """

    meta, blocks = get_file_info(csv_name)

    master = pd.DataFrame()
    for block in blocks:
        raw_data = pd.read_csv(
            csv_name,
            skiprows=block['start_line'],
            nrows=block['nrows'],
            header=None,
            usecols=range(13))

        # Get water level observations (m)
        val = raw_data.iloc[:, :-1].values.flatten()

        data = {}
        if raw:
            # Use raw values
            data['eta (raw)'] = val
        elif bool(m) & bool(b):
            # Calculate eta based on calibration values
            data['eta (mm)'] = val * m + b
        else:
            # Calculate eta based on staff length
            data['eta, uncalibrated (mm)'] = val * length / 4095

        # Get temperature (degrees C)
        temperature = raw_data.iloc[:, -1].values / 16
        data['temperature (C)'] = np.repeat(temperature, 12)

        # Generate timestamps
        dt = 1 / meta['frequency']
        t_seconds = np.arange(len(val)) * dt

        if abs_time:
            # Add start time if absolute date and time is required
            t = pd.TimedeltaIndex(t_seconds, 's') + block['start_time']
            index_name = 'datetime'

        else:
            # Use seconds if relative time is specified
            t = t_seconds
            index_name = 'time (s)'

        # Put all results into dataframe
        master = master.append(pd.DataFrame(data, index=t))
        master.index.name = index_name

    return master


def parse_header(line):
    """Parse header line from Wave Logger ASCII data file.

    Args:
        line: header string from top of file or burst block

    Returns:
        start time (datetime object), and metadata (dict)
    """

    descriptions = {
        'y': 'year',
        'm': 'month',
        'd': 'day',
        'H': 'hour',
        'M': 'minute',
        'S': 'second',
        'F': 'frequency',
        'L': 'burst_length',
        'I': 'burst_interval',
        'N': 'newfile_interval',
        'A': 'scale',
        'C': 'cycle_count',
        'T': 'logger_type',
        'R': 'reserved',
    }

    items = [s for s in line.split(',') if s]
    meta = {}
    for i, s in enumerate(items):
        if i < 3:
            # Use lowercase keys for year, month, day
            key = s[0].lower()
        else:
            # Use uppercase keys for other items
            key = s[0]

        # Add metadata value
        meta[descriptions[key]] = int(s[1:])

    # Get start time
    start_time = pd.datetime(
        2000 + meta['year'],
        meta['month'],
        meta['day'],
        meta['hour'],
        meta['minute'],
        meta['second'],
    )

    return start_time, meta


def get_file_info(csv_name):
    """Get file info from Wave Logger ASCII data file.

    Args:
        csv_name: path to  Wave Logger ASCII data file

    Returns:
        file metadata (dict), and data blocks (list of dicts)
    """

    blocks = []
    # Get start and end lines for all data blocks
    with open(csv_name, 'r') as f:
        header, *lines = f.read().splitlines()
        for i, line in enumerate(lines, start=2):
            if len(blocks) > 0 & (line.startswith('Y') |
                                  (i == len(lines) + 1)):
                # Add end line and line count for previous block
                b = blocks[-1]
                b['end_line'] = i - 2
                b['nrows'] = b['end_line'] - b['start_line'] + 1

            if line.startswith('Y'):
                # Add start line for current block
                block = {}
                block['start_line'] = i
                # Get start time for current block
                block['start_time'] = parse_header(line)[0]
                blocks.append(block)

    # Get metadata from file header
    meta = parse_header(header)[-1]

    return meta, blocks