Source code for eaarl.analyze

# -*- coding: utf-8 -*-
# vim: set fileencoding=utf-8 :
'''Process raw data to derive pointeger records
'''

# Boilerplate for cross-compatibility of Python 2/3
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
from future.builtins import * # pylint: disable=wildcard-import
import future.standard_library
future.standard_library.install_aliases()

import numpy as np

import eaarl.io.waveforms
import eaarl.project
import eaarl.util.utm

[docs]def remove_failed_thresh(frame, rx=True, tx=True):
    '''Removes records with failed rx/tx thresholds.

    Parameters
        frame : pandas.DataFrame
            DataFrame to discard records from. Must contain fields thresh_rx
            and thresh_tx.
        rx : bool
            If True, discard records where thersh_rx == 1.
        tx : bool
            If True, discard records where thersh_tx == 1.

    Returns : pandas.DataFrame
        New pandas.DataFrame with kept records.
    '''
    if rx:
        frame = frame[frame.thresh_rx == 0]
    if tx:
        frame = frame[frame.thresh_tx == 0]
    return frame

[docs]def select_eaarla_channel(frame, max_saturated=5, max_samples=12, saturation_value=250):
    '''Returns a frame with the optimal channel for each EAARL-A raster.

    The EAARL-A system collected three channels for each laser pulse. Channel 1
    received 90% of the return energy, channel 2 received 9% of the return
    energy, and channel 3 received 1% of the return energy. This permits a
    greater overall range of sensitivity: if the first channel is saturated,
    you can use the second; if the second is saturated, you can use the third.
    (Channel 4 is unused on the EAARL-A system and contains noise, so it is
    never used.)

    This function picks the first non-saturated channel for each pulse.

    Parameters
        frame : pandas.DataFrame
            DataFrame of raster data to extract from. Must contain fields
            channel, raster_number, pulse_number, and rx. Must be sorted by
            raster_number, then pulse_number, then channel.
        max_saturated : int
            Maximum number of samples that may be saturated. Default is 5.
        max_samples : int
            Maximum number of samples at the head of the waveform to check.
            Defaults to 12.
        saturation_value : int
            Saturation value threshold. If a sample is greater than or equal to
            this, it is saturated. Default is 250.

    Returns : pandas.DataFrame
        New pandas.DataFrame with kept records.
    '''
    # Get rid of channel 4; in EAARL-A, it's noise
    frame = frame[frame.channel != 4]

    # Sanity test on sorting; not perfect, but should catch accidents
    if not np.all(frame.channel[0::3] == 1):
        raise ValueError('frame is not sorted properly')
    if not np.all(frame.channel[1::3] == 2):
        raise ValueError('frame is not sorted properly')
    if not np.all(frame.channel[2::3] == 3):
        raise ValueError('frame is not sorted properly')
    if not np.all(frame.raster_number.values[1:] >= frame.raster_number.values[:-1]):
        raise ValueError('frame is not sorted properly')

    # Determine which waveforms are saturated
    check_saturation = np.vectorize(
        lambda rx: (rx[:max_samples] >= saturation_value).sum() >= max_saturated,
        otypes=[bool])
    saturated = check_saturation(frame.rx)

    # Use the first non-saturated waveform for each; fallback to the third if
    # all are saturated
    keep = np.logical_not(saturated)
    keep[1::3] = np.logical_and(np.logical_not(keep[0::3]), keep[1::3])
    keep[2::3] = np.logical_and(np.logical_not(keep[0::3]), np.logical_not(keep[1::3]))

    return frame[keep]

[docs]def add_mirror(frame, ops):
    '''Adds the mirror location to the records

    Adds four fields to the dataframe: tx_pos, which is the centroid of the
    waveform; and mir_x, mir_y, and mir_z, which are the UTM coordinates of the
    mirror on the plane.

    Parameters
        frame : pandas.DataFrame
            Dataframe with waveform records.

    '''
    frame['tx_pos'] = centroid_array(frame.tx, None)
    eaarl.project.project_mirror(frame, ops)
    return frame

[docs]def add_fs(frame, ops, prefix='fs', limit=12):
    r'''Add the first surface target to the waveform data using centroid

    Detects a target using the centroid of the waveform. This will generally
    correspond to the first surface.

    Adds five fields to the dataframe: fs_pos, which is the position in the
    waveform of the target; fs_range, which is the distance in meters between
    the mirror and the target; and fx_x, fs_y, and fs_z, which are the UTM
    coordinates of the target.

    Parameters
        frame : pandas.DataFrame
            DataFrame with pulse data as returned by :func:`get_for_region`,
            :func:`get_by_time`, or :func:`get_by_rasters`.
        ops : dict
            The ops data, available as flight.ops on an eaarl.io.flight.Flight
            or by manually loading an ops file as a dict.
        prefix : string, default "fs\_"
            Allows you to change the prefix for the fields added to the
            dataframe.
        limit : integer or None, default 12
            Limits how many samples of the waveform are used for the centroid
            calculation. By default, the first 12 samples are used.
    '''
    frame[prefix + '_pos'] = centroid_array(frame.rx, limit)
    frame[prefix + '_range'] = eaarl.project.target_range(
        frame['range'], frame['tx_pos'], frame[prefix + '_pos'],
        frame['channel'], ops)
    eaarl.project.project_point(frame, ops, frame[prefix + '_range'], prefix)
    return frame

[docs]def centroid(wf, limit=None):
    '''Returns the centroid of the waveform

    Returns the 0-based index into the waveform where the centroid is located.
    If a centroid cannot be calculated, returns -1.

    Parameters
        wf : sequence of numbers
            Any sequence of numbers suitable as input to np.array, representing
            the sample values for the digitized waveform.
    '''
    wf = np.array(wf)

    if wf.size == 0:
        return -1

    if limit:
        wf = wf[:limit]

    # Remove background energy
    wf = (wf - wf[0]).astype(float)

    # Avoid divide-by-zero
    sum_power = wf.sum()
    if sum_power == 0:
        return -1

    # Need to index from 1 instead of 0 so that first value has weight
    weighted_idx = wf * np.arange(1, len(wf) + 1)
    weighted_sum = weighted_idx.sum()

    # -1 is necessary to convert from 1-based index to 0-based index
    return weighted_sum / sum_power - 1

# Wrapper around centroid for use on arrays
centroid_array = np.frompyfunc(centroid, 2, 1) # pylint: disable=invalid-name