Inherit straxen records and peaklets (#126)

* inherit from straxen * inheirt * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
XENONnT · Apr 23, 2024 · 7e159ee · 7e159ee
1 parent dfdf4b0
commit 7e159ee
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 310 deletions.
diff --git a/jobs/job.py b/jobs/job.py
@@ -175,7 +175,7 @@ def main():
     print_versions()
     _, runid = sys.argv
     runid = int(runid)
-    
+
     # Process the saltax desired mode
     logging.info("Loading context...")
     settings = load_config()

diff --git a/saltax/plugins/peaklets.py b/saltax/plugins/peaklets.py
@@ -3,7 +3,6 @@
 import strax
 from strax.processing.peak_building import _build_hit_waveform
 from strax import utils
-from immutabledict import immutabledict
 from strax.processing.general import _touching_windows
 from strax.dtypes import DIGITAL_SUM_WAVEFORM_CHANNEL
 from .records import SCHANNEL_STARTS_AT
@@ -24,7 +23,7 @@
         help="'data', 'simu', or 'salt'",
     ),
 )
-class SPeaklets(strax.Plugin):
+class SPeaklets(straxen.Peaklets):
     """
     Split records into:
      - peaklets
@@ -45,141 +44,6 @@ class SPeaklets(strax.Plugin):
     extension overlaps with any peaks or other hits.
     """
 
-    depends_on = ("records",)
-    provides = ("peaklets", "lone_hits")
-    data_kind = dict(peaklets="peaklets", lone_hits="lone_hits")
-    parallel = "process"
-    compressor = "zstd"
-
-    __version__ = "0.0.4"
-
-    peaklet_gap_threshold = straxen.URLConfig(
-        default=700, infer_type=False, help="No hits for this many ns triggers a new peak"
-    )
-
-    peak_left_extension = straxen.URLConfig(
-        default=30, infer_type=False, help="Include this many ns left of hits in peaks"
-    )
-
-    peak_right_extension = straxen.URLConfig(
-        default=200, infer_type=False, help="Include this many ns right of hits in peaks"
-    )
-
-    peak_min_pmts = straxen.URLConfig(
-        default=2,
-        infer_type=False,
-        help="Minimum number of contributing PMTs needed to define a peak",
-    )
-
-    peak_split_gof_threshold = straxen.URLConfig(
-        # See https://xe1t-wiki.lngs.infn.it/doku.php?id=
-        # xenon:xenonnt:analysis:strax_clustering_classification
-        # #natural_breaks_splitting
-        # for more information
-        default=(None, ((0.5, 1.0), (6.0, 0.4)), ((2.5, 1.0), (5.625, 0.4))),  # Reserved
-        infer_type=False,
-        help="Natural breaks goodness of fit/split threshold to split "
-        "a peak. Specify as tuples of (log10(area), threshold).",
-    )
-
-    peak_split_filter_wing_width = straxen.URLConfig(
-        default=70,
-        infer_type=False,
-        help="Wing width of moving average filter for " "low-split natural breaks",
-    )
-
-    peak_split_min_area = straxen.URLConfig(
-        default=40.0,
-        infer_type=False,
-        help="Minimum area to evaluate natural breaks criterion. " "Smaller peaks are not split.",
-    )
-
-    peak_split_iterations = straxen.URLConfig(
-        default=20, infer_type=False, help="Maximum number of recursive peak splits to do."
-    )
-
-    diagnose_sorting = straxen.URLConfig(
-        track=False,
-        default=False,
-        infer_type=False,
-        help="Enable runtime checks for sorting and disjointness",
-    )
-
-    gain_model = straxen.URLConfig(
-        infer_type=False, help="PMT gain model. Specify as URL or explicit value"
-    )
-
-    gain_model_mc = straxen.URLConfig(
-        infer_type=False, help="PMT gain model. Specify as URL or explicit value"
-    )
-
-    tight_coincidence_window_left = straxen.URLConfig(
-        default=50,
-        infer_type=False,
-        help="Time range left of peak center to call a hit a tight coincidence (ns)",
-    )
-
-    tight_coincidence_window_right = straxen.URLConfig(
-        default=50,
-        infer_type=False,
-        help="Time range right of peak center to call a hit a tight coincidence (ns)",
-    )
-
-    n_tpc_pmts = straxen.URLConfig(type=int, help="Number of TPC PMTs")
-
-    n_top_pmts = straxen.URLConfig(type=int, help="Number of top TPC array PMTs")
-
-    sum_waveform_top_array = straxen.URLConfig(
-        default=True, type=bool, help="Digitize the sum waveform of the top array separately"
-    )
-
-    saturation_correction_on = straxen.URLConfig(
-        default=True, infer_type=False, help="On off switch for saturation correction"
-    )
-
-    saturation_reference_length = straxen.URLConfig(
-        default=100,
-        infer_type=False,
-        help="Maximum number of reference sample used " "to correct saturated samples",
-    )
-
-    saturation_min_reference_length = straxen.URLConfig(
-        default=20,
-        infer_type=False,
-        help="Minimum number of reference sample used " "to correct saturated samples",
-    )
-
-    peaklet_max_duration = straxen.URLConfig(
-        default=int(10e6), infer_type=False, help="Maximum duration [ns] of a peaklet"
-    )
-
-    channel_map = straxen.URLConfig(
-        track=False,
-        type=immutabledict,
-        help="immutabledict mapping subdetector to (min, max) " "channel number.",
-    )
-
-    hit_min_amplitude = straxen.URLConfig(
-        track=True,
-        infer_type=False,
-        default="cmt://hit_thresholds_tpc?version=ONLINE&run_id=plugin.run_id",
-        help="Minimum hit amplitude in ADC counts above baseline. "
-        "Specify as a tuple of length n_tpc_pmts, or a number,"
-        'or a string like "pmt_commissioning_initial" which means calling'
-        "hitfinder_thresholds.py"
-        "or a tuple like (correction=str, version=str, nT=boolean),"
-        "which means we are using cmt.",
-    )
-
-    def infer_dtype(self):
-        return dict(
-            peaklets=strax.peak_dtype(
-                n_channels=self.n_tpc_pmts,
-                digitize_top=self.sum_waveform_top_array,
-            ),
-            lone_hits=strax.hit_dtype,
-        )
-
     def setup(self):
         if self.peak_min_pmts > 2:
             # Can fix by re-splitting,
@@ -422,53 +286,6 @@ def compute(self, records, start, end):
 
         return dict(peaklets=peaklets, lone_hits=lone_hits)
 
-    def natural_breaks_threshold(self, peaks):
-        """
-        Pasted from https://github.com/XENONnT/straxen/blob/5f232eb2c1ab39e11fb14d4e6ee2db369ed2c2ec/straxen/plugins/peaklets/peaklets.py#L332-L348
-        """
-        rise_time = -peaks["area_decile_from_midpoint"][:, 1]
-
-        # This is ~1 for an clean S2, ~0 for a clean S1,
-        # and transitions gradually in between.
-        f_s2 = 8 * np.log10(rise_time.clip(1, 1e5) / 100)
-        f_s2 = 1 / (1 + np.exp(-f_s2))
-
-        log_area = np.log10(peaks["area"].clip(1, 1e7))
-        thresholds = self.peak_split_gof_threshold
-        return f_s2 * np.interp(log_area, *np.transpose(thresholds[2])) + (1 - f_s2) * np.interp(
-            log_area, *np.transpose(thresholds[1])
-        )
-
-    @staticmethod
-    def clip_peaklet_times(peaklets, start, end):
-        straxen.plugins.peaklets.Peaklets.clip_peaklet_times(peaklets, start, end)
-
-    @staticmethod
-    def create_outside_peaks_region(peaklets, start, end):
-        """Creates time intervals which are outside peaks.
-
-        :param peaklets: Peaklets for which intervals should be
-            computed.
-        :param start: Chunk start
-        :param end: Chunk end
-        :return: array of strax.time_fields dtype.
-        """
-        outside_peaks = straxen.plugins.peaklets.Peaklets.create_outside_peaks_region(
-            peaklets, start, end
-        )
-        return outside_peaks
-
-    @staticmethod
-    def add_hit_features(hitlets, hit_max_times, peaklets):
-        """Create hits timing features :param hitlets_max: hitlets with only
-        max height time.
-
-        :param peaklets: Peaklets for which intervals should be
-            computed.
-        :return: array of peaklet_timing dtype.
-        """
-        straxen.plugins.peaklets.Peaklets.add_hit_features(hitlets, hit_max_times, peaklets)
-
 
 @numba.jit(nopython=True, nogil=True, cache=False)
 def peak_saturation_correction(

diff --git a/saltax/plugins/records.py b/saltax/plugins/records.py
@@ -1,4 +1,3 @@
-from immutabledict import immutabledict
 from straxen.plugins.records.records import count_pulses
 import numpy as np
 import strax
@@ -11,7 +10,7 @@
 
 
 @export
-class SPulseProcessing(strax.Plugin):
+class SPulseProcessing(straxen.PulseProcessing):
     """
     Split raw_records into:
      - (tpc) records
@@ -33,131 +32,8 @@ class SPulseProcessing(strax.Plugin):
     """
 
     __version__ = "0.0.2"
-
-    parallel = "process"
-    rechunk_on_save = immutabledict(records=False, veto_regions=True, pulse_counts=True)
-    compressor = "zstd"
-
     depends_on = ("raw_records", "raw_records_simu")
 
-    provides = ("records", "veto_regions", "pulse_counts")
-    data_kind = {k: k for k in provides}
-    save_when = immutabledict(
-        records=strax.SaveWhen.TARGET,
-        veto_regions=strax.SaveWhen.TARGET,
-        pulse_counts=strax.SaveWhen.ALWAYS,
-    )
-
-    hev_gain_model = straxen.URLConfig(
-        default=None, infer_type=False, help="PMT gain model used in the software high-energy veto."
-    )
-
-    baseline_samples = straxen.URLConfig(
-        default=40,
-        infer_type=False,
-        help="Number of samples to use at the start of the pulse to determine " "the baseline",
-    )
-
-    # Tail veto options
-    tail_veto_threshold = straxen.URLConfig(
-        default=0,
-        infer_type=False,
-        help=(
-            "Minimum peakarea in PE to trigger tail veto."
-            "Set to None, 0 or False to disable veto."
-        ),
-    )
-
-    tail_veto_duration = straxen.URLConfig(
-        default=int(3e6), infer_type=False, help="Time in ns to veto after large peaks"
-    )
-
-    tail_veto_resolution = straxen.URLConfig(
-        default=int(1e3),
-        infer_type=False,
-        help="Time resolution in ns for pass-veto waveform summation",
-    )
-
-    tail_veto_pass_fraction = straxen.URLConfig(
-        default=0.05, infer_type=False, help="Pass veto if maximum amplitude above max * fraction"
-    )
-
-    tail_veto_pass_extend = straxen.URLConfig(
-        default=3,
-        infer_type=False,
-        help="Extend pass veto by this many samples (tail_veto_resolution!)",
-    )
-
-    max_veto_value = straxen.URLConfig(
-        default=None,
-        infer_type=False,
-        help="Optionally pass a HE peak that exceeds this absolute area. "
-        "(if performing a hard veto, can keep a few statistics.)",
-    )
-
-    # PMT pulse processing options
-    pmt_pulse_filter = straxen.URLConfig(
-        default=None, infer_type=False, help="Linear filter to apply to pulses, will be normalized."
-    )
-
-    save_outside_hits = straxen.URLConfig(
-        default=(3, 20),
-        infer_type=False,
-        help="Save (left, right) samples besides hits; cut the rest",
-    )
-
-    n_tpc_pmts = straxen.URLConfig(type=int, help="Number of TPC PMTs")
-
-    check_raw_record_overlaps = straxen.URLConfig(
-        default=True,
-        track=False,
-        infer_type=False,
-        help="Crash if any of the pulses in raw_records overlap with others " "in the same channel",
-    )
-
-    allow_sloppy_chunking = straxen.URLConfig(
-        default=False,
-        track=False,
-        infer_type=False,
-        help=(
-            "Use a default baseline for incorrectly chunked fragments. "
-            "This is a kludge for improperly converted XENON1T data."
-        ),
-    )
-
-    hit_min_amplitude = straxen.URLConfig(
-        track=True,
-        infer_type=False,
-        default="cmt://hit_thresholds_tpc?version=ONLINE&run_id=plugin.run_id",
-        help="Minimum hit amplitude in ADC counts above baseline. "
-        "Specify as a tuple of length n_tpc_pmts, or a number,"
-        'or a string like "pmt_commissioning_initial" which means calling'
-        "hitfinder_thresholds.py"
-        "or a tuple like (correction=str, version=str, nT=boolean),"
-        "which means we are using cmt.",
-    )
-
-    def infer_dtype(self):
-        # Get record_length from the plugin making raw_records
-        self.record_length = strax.record_length_from_dtype(
-            self.deps["raw_records"].dtype_for("raw_records")
-        )
-
-        dtype = dict()
-        for p in self.provides:
-            if "records" in p:
-                dtype[p] = strax.record_dtype(self.record_length)
-        dtype["veto_regions"] = strax.hit_dtype
-        dtype["pulse_counts"] = straxen.pulse_count_dtype(self.n_tpc_pmts)
-
-        return dtype
-
-    def setup(self):
-        self.hev_enabled = self.hev_gain_model is not None and self.tail_veto_threshold
-        if self.hev_enabled:
-            self.to_pe = self.hev_gain_model
-        self.hit_thresholds = self.hit_min_amplitude
-
     def compute(self, raw_records, raw_records_simu, start, end):
         if self.check_raw_record_overlaps:
             straxen.check_overlaps(raw_records, n_channels=3000)