Source code for strax.processing.data_reduction

"""Functions to perform in-place pulse-level data reduction."""

import numpy as np
import numba
from enum import IntEnum

import strax
from strax.processing.pulse_processing import NO_RECORD_LINK, record_links

export, __all__ = strax.exporter()


[docs] @export class ReductionLevel(IntEnum): """Identifies what type of data reduction has been used on a record.""" # Record not modified NO_REDUCTION = 0 # Samples near pulse start/end were removed BASELINE_CUT = 1 # Samples far from a threshold excursion were removed HITS_ONLY = 2 # The record has been replaced with a simpler waveform WAVEFORM_REPLACED = 3 # The raw waveform has been deleted, only metadata survives METADATA_ONLY = 4
[docs] @export @numba.njit(nogil=True, cache=True) def cut_baseline(records, n_before=48, n_after=30): """Replace first n_before and last n_after samples of pulses by 0.""" # records.data.shape[1] gives a numba error (file issue?) if not len(records): return samples_per_record = len(records[0]["data"]) for d_i, d in enumerate(records): if d.record_i == 0: d.data[:n_before] = 0 clear_from = d.pulse_length - n_after clear_from -= d.record_i.astype(np.int32) * samples_per_record clear_from = max(0, clear_from) if clear_from < samples_per_record: d.data[clear_from:] = 0 d["reduction_level"] = ReductionLevel.BASELINE_CUT
[docs] @export def cut_outside_hits(records, hits, left_extension=2, right_extension=15): """Return records with waveforms zeroed if not within left_extension or right_extension of hits. These extensions properly account for breaking of pulses into records. If you pass an incomplete (e.g. cut) set of records, we will not save data around hits found in the removed records, even if this stretches into records that you did pass. """ if not len(records): return records # Create a copy of records with blanked data # Even a simple records.copy() is mightily slow in numba, # and assignments to struct arrays seem troublesome. # The obvious solution: # new_recs = records.copy() # new_recs['data'] = 0 # is quite slow. # Replacing the last = with *= gives a factor 2 speed boost. # But ~40% faster still is this: meta_fields = [x for x in records.dtype.names if x not in ["data", "reduction_level"]] new_recs = np.zeros(len(records), dtype=records.dtype) new_recs[meta_fields] = records[meta_fields] new_recs["reduction_level"] = ReductionLevel.HITS_ONLY _cut_outside_hits(records, hits, new_recs, left_extension, right_extension) return new_recs
@numba.njit(nogil=True, cache=True) def _cut_outside_hits(records, hits, new_recs, left_extension=2, right_extension=15): if not len(records): return samples_per_record = len(records[0]["data"]) previous_record, next_record = record_links(records) for hit_i, h in enumerate(hits): rec_i = h["record_i"] r = records[rec_i] # Indices to keep, with 0 at the start of this record start_keep = h["left"] - left_extension end_keep = h["right"] + right_extension # Indices of samples to keep in this record (a, b), _ = strax.overlap_indices(0, r["length"], start_keep, end_keep - start_keep) new_recs[rec_i]["data"][a:b] = records[rec_i]["data"][a:b] # Keep samples in previous record, if there was one if start_keep < 0: prev_ri = previous_record[rec_i] if prev_ri != NO_RECORD_LINK: # Note start_keep is negative, so this keeps the # last few samples of the previous record a_prev = start_keep new_recs[prev_ri]["data"][a_prev:] = records[prev_ri]["data"][a_prev:] # Same for the next record, if there is one if end_keep > samples_per_record: next_ri = next_record[rec_i] if next_ri != NO_RECORD_LINK: b_next = end_keep - samples_per_record new_recs[next_ri]["data"][:b_next] = records[next_ri]["data"][:b_next]