Add 'wavestaff.py'

6 years ago · 65119499ae
commit 65119499ae
1 changed files with 176 additions and 0 deletions
--- a/wavestaff/wavestaff.py
+++ b/wavestaff/wavestaff.py
@ -0,0 +1,176 @@
+import os
+import numpy as np
+import pandas as pd
+
+
+def read_wavestaff_file(csv_name,
+                        m=None,
+                        b=None,
+                        length=2000,
+                        raw=False,
+                        abs_time=True):
+    """Read data from Wave Logger ASCII data file.
+
+    The Wave Logger III stores water level observations as unsigned 12-bit
+    integers (uint12), ranging from 0 to 4095. The input variables 'm' and 'b'
+    are the slope and intercept of the function:
+
+        eta = m * x + b
+
+    where 'x' is the raw data value. If 'm' and 'b' are not provided, the
+    nominal factory calibration will be applied, based on length of the staff.
+    E.g. if the staff length is 2000 mm, then x = 0 when the probe is dry, and
+    x = 4095 when the depth at the staff is 2 m. This gives:
+
+        m = 2000 / 4095
+          = 0.488
+
+    Args:
+        csv_name:  path to  Wave Logger ASCII data file
+        m:         calibrated slope (mm / uint12)
+        b:         calibrated zero offset (mm)
+        length:    length of wave staff probe (mm)
+        raw:       return raw uint12 values instead of calibrated units
+        abs_time:  use absolute date and time (otherwise use relative seconds)
+
+    Returns:
+        pandas dataframe of all data blocks
+    """
+
+    meta, blocks = get_file_info(csv_name)
+
+    master = pd.DataFrame()
+    for block in blocks:
+        raw_data = pd.read_csv(
+            csv_name,
+            skiprows=block['start_line'],
+            nrows=block['nrows'],
+            header=None,
+            usecols=range(13))
+
+        # Get water level observations (m)
+        val = raw_data.iloc[:, :-1].values.flatten()
+
+        data = {}
+        if raw:
+            # Use raw values
+            data['eta (raw)'] = val
+        elif bool(m) & bool(b):
+            # Calculate eta based on calibration values
+            data['eta (mm)'] = val * m + b
+        else:
+            # Calculate eta based on staff length
+            data['eta, uncalibrated (mm)'] = val * length / 4095
+
+        # Get temperature (degrees C)
+        temperature = raw_data.iloc[:, -1].values / 16
+        data['temperature (C)'] = np.repeat(temperature, 12)
+
+        # Generate timestamps
+        dt = 1 / meta['frequency']
+        t_seconds = np.arange(len(val)) * dt
+
+        if abs_time:
+            # Add start time if absolute date and time is required
+            t = pd.TimedeltaIndex(t_seconds, 's') + block['start_time']
+            index_name = 'datetime'
+
+        else:
+            # Use seconds if relative time is specified
+            t = t_seconds
+            index_name = 'time (s)'
+
+        # Put all results into dataframe
+        master = master.append(pd.DataFrame(data, index=t))
+        master.index.name = index_name
+
+    return master
+
+
+def parse_header(line):
+    """Parse header line from Wave Logger ASCII data file.
+
+    Args:
+        line: header string from top of file or burst block
+
+    Returns:
+        start time (datetime object), and metadata (dict)
+    """
+
+    descriptions = {
+        'y': 'year',
+        'm': 'month',
+        'd': 'day',
+        'H': 'hour',
+        'M': 'minute',
+        'S': 'second',
+        'F': 'frequency',
+        'L': 'burst_length',
+        'I': 'burst_interval',
+        'N': 'newfile_interval',
+        'A': 'scale',
+        'C': 'cycle_count',
+        'T': 'logger_type',
+        'R': 'reserved',
+    }
+
+    items = [s for s in line.split(',') if s]
+    meta = {}
+    for i, s in enumerate(items):
+        if i < 3:
+            # Use lowercase keys for year, month, day
+            key = s[0].lower()
+        else:
+            # Use uppercase keys for other items
+            key = s[0]
+
+        # Add metadata value
+        meta[descriptions[key]] = int(s[1:])
+
+    # Get start time
+    start_time = pd.datetime(
+        2000 + meta['year'],
+        meta['month'],
+        meta['day'],
+        meta['hour'],
+        meta['minute'],
+        meta['second'],
+    )
+
+    return start_time, meta
+
+
+def get_file_info(csv_name):
+    """Get file info from Wave Logger ASCII data file.
+
+    Args:
+        csv_name: path to  Wave Logger ASCII data file
+
+    Returns:
+        file metadata (dict), and data blocks (list of dicts)
+    """
+
+    blocks = []
+    # Get start and end lines for all data blocks
+    with open(csv_name, 'r') as f:
+        header, *lines = f.read().splitlines()
+        for i, line in enumerate(lines, start=2):
+            if len(blocks) > 0 & (line.startswith('Y') |
+                                  (i == len(lines) + 1)):
+                # Add end line and line count for previous block
+                b = blocks[-1]
+                b['end_line'] = i - 2
+                b['nrows'] = b['end_line'] - b['start_line'] + 1
+
+            if line.startswith('Y'):
+                # Add start line for current block
+                block = {}
+                block['start_line'] = i
+                # Get start time for current block
+                block['start_time'] = parse_header(line)[0]
+                blocks.append(block)
+
+    # Get metadata from file header
+    meta = parse_header(header)[-1]
+
+    return meta, blocks