From 3284d6129004460107eb94182b9efc32d7793e8e Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 9 Oct 2024 17:10:29 +0200 Subject: [PATCH 01/17] add ann tier --- Snakefile | 1 + rules/ann.smk | 50 ++++++++++++++++ rules/evt.smk | 72 ++++++++++++++++++++++- scripts/build_ann.py | 124 +++++++++++++++++++++++++++++++++++++++ scripts/build_evt.py | 18 ++++-- scripts/util/patterns.py | 26 ++++++++ scripts/util/utils.py | 12 ++++ templates/config.json | 2 + 8 files changed, 298 insertions(+), 7 deletions(-) create mode 100644 rules/ann.smk create mode 100644 scripts/build_ann.py diff --git a/Snakefile b/Snakefile index 4738359..5069de0 100644 --- a/Snakefile +++ b/Snakefile @@ -59,6 +59,7 @@ include: "rules/psp.smk" include: "rules/hit.smk" include: "rules/pht.smk" include: "rules/pht_fast.smk" +include: "rules/ann.smk" include: "rules/evt.smk" include: "rules/skm.smk" include: "rules/blinding_calibration.smk" diff --git a/rules/ann.smk b/rules/ann.smk new file mode 100644 index 0000000..f7e6b1c --- /dev/null +++ b/rules/ann.smk @@ -0,0 +1,50 @@ +""" +Snakemake rules for processing ann tier. This is done only for the coax detectors +to apply the ann and risetime cuts for psd. + +""" + +from scripts.util.pars_loading import pars_catalog +from scripts.util.utils import par_dsp_path +from scripts.util.patterns import ( + get_pattern_tier_dsp, + get_pattern_tier_psp, + get_pattern_tier_ann, + get_pattern_tier, + get_pattern_log, + get_pattern_pars, + get_pattern_pars_overwrite, +) + +for tier in ["ann", "pan"]: + + rule: + input: + dsp_file=get_pattern_tier_dsp(setup) if tier == "ann" else get_pattern_tier_psp(setup), + pars_file=lambda wildcards: get_svm_file(wildcards, "ann", "cuts"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + output: + tier_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(setup, f"{tier}_db"), + log: + get_pattern_log(setup, f"tier_{tier}"), + group: + "tier-ann" + resources: + runtime=300, + mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/build_ann.py')} " + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--input {input.dsp_file} " + "--output {output.tier_file} " + "--db_file {output.db_file} " + "--pars_file {input.pars_file} " + + set_last_rule_name(workflow, f"build_{tier}") \ No newline at end of file diff --git a/rules/evt.smk b/rules/evt.smk index ed20d2d..1026d9b 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -9,6 +9,8 @@ from scripts.util.patterns import ( get_pattern_tier_tcm, get_pattern_tier_pht, get_pattern_tier_psp, + get_pattern_tier_pan, + get_pattern_tier_ann, get_pattern_tier, get_pattern_log, get_pattern_pars, @@ -31,6 +33,18 @@ for tier in ("evt", "pet"): else get_pattern_tier_pht(setup) ), tcm_file=get_pattern_tier_tcm(setup), + ann_file=lambda wildcards: ( + get_pattern_tier_ann(setup) + if tier == "evt" + else get_pattern_tier_pan(setup) + ), + # needs snakemake >= 8.3 + # ann_file= branch( + # lambda wildcards: tier if int(wildcards["period"][1:]) <= 11 else False, + # cases = {"evt":get_pattern_tier_ann(setup), + # "pet":get_pattern_tier_pan(setup), + # } + # ), xtalk_matrix=lambda wildcards: get_svm_file( tier=tier, wildcards=wildcards, name="xtc" ), @@ -63,10 +77,66 @@ for tier in ("evt", "pet"): "--par_files {input.par_files} " "--hit_file {input.hit_file} " "--tcm_file {input.tcm_file} " + "--ann_file {input.ann_file} " "--dsp_file {input.dsp_file} " "--output {output.evt_file} " - set_last_rule_name(workflow, f"build_{tier}") + set_last_rule_name(workflow, f"build_{tier}_with_ann") + # ann_rule = list(workflow.rules)[-1] + + # rule: + # input: + # dsp_file=( + # get_pattern_tier_dsp(setup) + # if tier == "evt" + # else get_pattern_tier_psp(setup) + # ), + # hit_file=( + # get_pattern_tier_hit(setup) + # if tier == "evt" + # else get_pattern_tier_pht(setup) + # ), + # tcm_file=get_pattern_tier_tcm(setup), + # xtalk_matrix=lambda wildcards: get_svm_file( + # tier=tier, wildcards=wildcards, name="xtc" + # ), + # par_files=lambda wildcards: pars_catalog.get_par_file( + # setup, wildcards.timestamp, "pht" + # ), + # output: + # evt_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), + # params: + # timestamp="{timestamp}", + # datatype="{datatype}", + # tier=tier, + # log: + # get_pattern_log(setup, f"tier_{tier}"), + # group: + # "tier-evt" + # resources: + # runtime=300, + # mem_swap=50, + # shell: + # "{swenv} python3 -B " + # f"{workflow.source_path('../scripts/build_evt.py')} " + # "--configs {configs} " + # "--metadata {meta} " + # "--log {log} " + # "--tier {params.tier} " + # "--datatype {params.datatype} " + # "--timestamp {params.timestamp} " + # "--xtc_file {input.xtalk_matrix} " + # "--par_files {input.par_files} " + # "--hit_file {input.hit_file} " + # "--tcm_file {input.tcm_file} " + # "--dsp_file {input.dsp_file} " + # "--output {output.evt_file} " + + # set_last_rule_name(workflow, f"build_{tier}") + # no_ann_rule = list(workflow.rules)[-1] + + # rule_order_list = [ann_rule, no_ann_rule] + # workflow._ruleorder.add(*rule_order_list) rule: wildcard_constraints: diff --git a/scripts/build_ann.py b/scripts/build_ann.py new file mode 100644 index 0000000..1f0f67f --- /dev/null +++ b/scripts/build_ann.py @@ -0,0 +1,124 @@ +import argparse +import json +import logging +import os +import pathlib +import re +import time +import warnings + +os.environ["LGDO_CACHE"] = "false" +os.environ["LGDO_BOUNDSCHECK"] = "false" +os.environ["DSPEED_CACHE"] = "false" +os.environ["DSPEED_BOUNDSCHECK"] = "false" + +import lgdo.lh5 as lh5 +import numpy as np +from dspeed import build_dsp +from legendmeta import LegendMetadata +from legendmeta.catalog import Props + + +def replace_list_with_array(dic): + for key, value in dic.items(): + if isinstance(value, dict): + dic[key] = replace_list_with_array(value) + elif isinstance(value, list): + dic[key] = np.array(value, dtype="float32") + else: + pass + return dic + + +warnings.filterwarnings(action="ignore", category=RuntimeWarning) + +argparser = argparse.ArgumentParser() +argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--datatype", help="Datatype", type=str, required=True) +argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) +argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[]) +argparser.add_argument("--log", help="log file", type=str) +argparser.add_argument("--input", help="input file", type=str) +argparser.add_argument("--output", help="output file", type=str) +argparser.add_argument("--db_file", help="db file", type=str) +args = argparser.parse_args() + +pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) +logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") +logging.getLogger("numba").setLevel(logging.INFO) +logging.getLogger("parse").setLevel(logging.INFO) +logging.getLogger("lgdo").setLevel(logging.INFO) +log = logging.getLogger(__name__) + +configs = LegendMetadata(path=args.configs) +channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_ann"][ + "inputs" +]["processing_chain"] + +channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} +db_files = [ + par_file + for par_file in args.pars_file + if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yml" +] + +database_dic = Props.read_from(db_files, subst_pathvar=True) +database_dic = replace_list_with_array(database_dic) + +pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) + +rng = np.random.default_rng() +rand_num = f"{rng.integers(0,99999):05d}" +temp_output = f"{args.output}.{rand_num}" + +start = time.time() + +build_dsp( + args.input, + temp_output, + {}, + database=database_dic, + chan_config=channel_dict, + write_mode="r", + buffer_len=3200 if args.datatype == "cal" else 3200, + block_width=16, +) + +log.info(f"build_ann finished in {time.time()-start}") + +os.rename(temp_output, args.output) + +if "ann" in args.output: + key = os.path.basename(args.output).replace("-tier_ann.lh5", "") +else: + key = os.path.basename(args.output).replace("-tier_pan.lh5", "") + +raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] + +raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")] + +outputs = {} +channels = [] +for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] + in_dict = False + for entry in outputs: + if outputs[entry]["fields"] == output: + outputs[entry]["channels"].append(channel.split("/")[0]) + in_dict = True + if in_dict is False: + outputs[f"group{len(list(outputs))+1}"] = { + "channels": [channel.split("/")[0]], + "fields": output, + } + channels.append(channel.split("/")[0]) + +full_dict = { + "valid_fields": { + "ann": outputs, + }, + "valid_keys": {key: {"valid_channels": {"ann": channels}}}, +} +pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True) +with open(args.db_file, "w") as w: + json.dump(full_dict, w, indent=4) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index 1fcd347..5a808b2 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -35,6 +35,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): argparser.add_argument("--hit_file", help="hit file", type=str) argparser.add_argument("--dsp_file", help="dsp file", type=str) argparser.add_argument("--tcm_file", help="tcm file", type=str) +argparser.add_argument("--ann_file", help="ann file") argparser.add_argument("--xtc_file", help="xtc file", type=str) argparser.add_argument("--par_files", help="par files", nargs="*") @@ -125,13 +126,18 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): rand_num = f"{rng.integers(0,99999):05d}" temp_output = f"{args.output}.{rand_num}" +file_table = { + "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"), + "dsp": (args.dsp_file, "dsp", "ch{}"), + "hit": (args.hit_file, "hit", "ch{}"), + "evt": (None, "evt"), +} + +if args.ann_file is not None: + file_table["ann"] = (args.ann_file, "dsp", "ch{}") + table = build_evt( - { - "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"), - "dsp": (args.dsp_file, "dsp", "ch{}"), - "hit": (args.hit_file, "hit", "ch{}"), - "evt": (None, "evt"), - }, + file_table, evt_config, ) diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index 79bcaac..2629e7e 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -16,10 +16,12 @@ pars_path, plts_path, sandbox_path, + tier_ann_path, tier_daq_path, tier_dsp_path, tier_evt_path, tier_hit_path, + tier_pan_path, tier_path, tier_pet_path, tier_pht_path, @@ -137,6 +139,16 @@ def get_pattern_tier_hit(setup): ) +def get_pattern_tier_ann(setup): + return os.path.join( + f"{tier_ann_path(setup)}", + "{datatype}", + "{period}", + "{run}", + "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_ann.lh5", + ) + + def get_pattern_tier_evt(setup): return os.path.join( f"{tier_evt_path(setup)}", @@ -175,6 +187,16 @@ def get_pattern_tier_pht(setup): ) +def get_pattern_tier_pan(setup): + return os.path.join( + f"{tier_pan_path(setup)}", + "{datatype}", + "{period}", + "{run}", + "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pan.lh5", + ) + + def get_pattern_tier_pet(setup): return os.path.join( f"{tier_pet_path(setup)}", @@ -212,6 +234,8 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): file_pattern = get_pattern_tier_dsp(setup) elif tier == "hit": file_pattern = get_pattern_tier_hit(setup) + elif tier == "ann": + file_pattern = get_pattern_tier_ann(setup) elif tier == "evt": file_pattern = get_pattern_tier_evt(setup) elif tier == "evt_concat": @@ -220,6 +244,8 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): file_pattern = get_pattern_tier_psp(setup) elif tier == "pht": file_pattern = get_pattern_tier_pht(setup) + elif tier == "pan": + file_pattern = get_pattern_tier_pan(setup) elif tier == "pet": file_pattern = get_pattern_tier_pet(setup) elif tier == "pet_concat": diff --git a/scripts/util/utils.py b/scripts/util/utils.py index f3f3ebc..5ec88b0 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -53,6 +53,10 @@ def tier_hit_path(setup): return setup["paths"]["tier_hit"] +def tier_ann_path(setup): + return setup["paths"]["tier_ann"] + + def tier_evt_path(setup): return setup["paths"]["tier_evt"] @@ -65,6 +69,10 @@ def tier_pht_path(setup): return setup["paths"]["tier_pht"] +def tier_pan_path(setup): + return setup["paths"]["tier_pan"] + + def tier_pet_path(setup): return setup["paths"]["tier_pet"] @@ -82,12 +90,16 @@ def get_tier_path(setup, tier): return tier_dsp_path(setup) elif tier == "hit": return tier_hit_path(setup) + elif tier == "ann": + return tier_ann_path(setup) elif tier == "evt": return tier_evt_path(setup) elif tier == "psp": return tier_psp_path(setup) elif tier == "pht": return tier_pht_path(setup) + elif tier == "pan": + return tier_pan_path(setup) elif tier == "pet": return tier_pet_path(setup) elif tier == "skm": diff --git a/templates/config.json b/templates/config.json index 7d17f71..a86db97 100644 --- a/templates/config.json +++ b/templates/config.json @@ -19,9 +19,11 @@ "tier_tcm": "$_/generated/tier/tcm", "tier_dsp": "$_/generated/tier/dsp", "tier_hit": "$_/generated/tier/hit", + "tier_ann": "$_/generated/tier/ann", "tier_evt": "$_/generated/tier/evt", "tier_psp": "$_/generated/tier/psp", "tier_pht": "$_/generated/tier/pht", + "tier_pan": "$_/generated/tier/pan", "tier_pet": "$_/generated/tier/pet", "tier_skm": "$_/generated/tier/skm", From 26d52f25c6565cb8cd3af147c0e13dfb61cf1877 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 20 Oct 2024 14:55:31 +0200 Subject: [PATCH 02/17] allow more jobs --- rules/ann.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/ann.smk b/rules/ann.smk index f7e6b1c..ff24820 100644 --- a/rules/ann.smk +++ b/rules/ann.smk @@ -34,7 +34,7 @@ for tier in ["ann", "pan"]: "tier-ann" resources: runtime=300, - mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, + mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, shell: "{swenv} python3 -B " f"{workflow.source_path('../scripts/build_ann.py')} " From 7918e830a4ce913166787b89f0f526bea7051ea8 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 21 Oct 2024 23:10:29 +0200 Subject: [PATCH 03/17] pc cleanup --- rules/ann.smk | 10 +++++++--- scripts/build_ann.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/rules/ann.smk b/rules/ann.smk index ff24820..64cdd50 100644 --- a/rules/ann.smk +++ b/rules/ann.smk @@ -20,7 +20,11 @@ for tier in ["ann", "pan"]: rule: input: - dsp_file=get_pattern_tier_dsp(setup) if tier == "ann" else get_pattern_tier_psp(setup), + dsp_file=( + get_pattern_tier_dsp(setup) + if tier == "ann" + else get_pattern_tier_psp(setup) + ), pars_file=lambda wildcards: get_svm_file(wildcards, "ann", "cuts"), params: timestamp="{timestamp}", @@ -46,5 +50,5 @@ for tier in ["ann", "pan"]: "--output {output.tier_file} " "--db_file {output.db_file} " "--pars_file {input.pars_file} " - - set_last_rule_name(workflow, f"build_{tier}") \ No newline at end of file + + set_last_rule_name(workflow, f"build_{tier}") diff --git a/scripts/build_ann.py b/scripts/build_ann.py index 1f0f67f..224877a 100644 --- a/scripts/build_ann.py +++ b/scripts/build_ann.py @@ -90,7 +90,7 @@ def replace_list_with_array(dic): if "ann" in args.output: key = os.path.basename(args.output).replace("-tier_ann.lh5", "") -else: +else: key = os.path.basename(args.output).replace("-tier_pan.lh5", "") raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] From e9561bdf62f0dc542721643ad8376e105e8b34c5 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 21 Oct 2024 23:10:40 +0200 Subject: [PATCH 04/17] bump pkg versions --- templates/config.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/config.json b/templates/config.json index a86db97..9fd0d0f 100644 --- a/templates/config.json +++ b/templates/config.json @@ -55,9 +55,9 @@ "arg": "/data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif" }, "pkg_versions": { - "pygama": "pygama==2.0.1", + "pygama": "pygama==2.0.3", "pylegendmeta": "pylegendmeta==0.10.2", - "dspeed": "dspeed==1.4.0a1", + "dspeed": "dspeed==1.6.1", "legend-pydataobj": "legend-pydataobj==1.7.0", "legend-daq2lh5": "legend-daq2lh5==1.2.1" } From a3c0dae6588ac4bbaeacabceb8602c3826ef55f2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 21 Oct 2024 23:18:39 +0200 Subject: [PATCH 05/17] add ml packages --- templates/config.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/templates/config.json b/templates/config.json index 9fd0d0f..0d1320d 100644 --- a/templates/config.json +++ b/templates/config.json @@ -59,7 +59,10 @@ "pylegendmeta": "pylegendmeta==0.10.2", "dspeed": "dspeed==1.6.1", "legend-pydataobj": "legend-pydataobj==1.7.0", - "legend-daq2lh5": "legend-daq2lh5==1.2.1" + "legend-daq2lh5": "legend-daq2lh5==1.2.1", + "tensorflow": "tensorflow==2.17", + "keras": "keras==3.6.0", + "jax": "jax==0.4.30" } } } From 818511da149ae57f954a4a5fa9aaba075e1ddfa2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 27 Nov 2024 15:15:38 +0100 Subject: [PATCH 06/17] refactor for new metadata, clean up patterns and some naming --- scripts/build_dsp.py | 4 +- scripts/create_chankeylist.py | 2 +- scripts/util/CalibCatalog.py | 128 ------ .../util/{dataset_cal.py => cal_grouping.py} | 13 +- scripts/util/catalog.py | 191 ++++++++ scripts/util/create_pars_keylist.py | 11 +- scripts/util/pars_loading.py | 8 +- scripts/util/patterns.py | 407 +++--------------- scripts/util/utils.py | 134 ++---- 9 files changed, 309 insertions(+), 589 deletions(-) delete mode 100644 scripts/util/CalibCatalog.py rename scripts/util/{dataset_cal.py => cal_grouping.py} (92%) create mode 100644 scripts/util/catalog.py diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index 8dad8fa..cbd0794 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -51,9 +51,7 @@ def replace_list_with_array(dic): channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} db_files = [ - par_file - for par_file in args.pars_file - if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yaml" + par_file for par_file in args.pars_file if os.path.splitext(par_file)[1] in (".json", ".yaml") ] database_dic = Props.read_from(db_files, subst_pathvar=True) diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py index c4c6cb9..435f55c 100644 --- a/scripts/create_chankeylist.py +++ b/scripts/create_chankeylist.py @@ -20,7 +20,7 @@ chmap = channel_map.channelmaps.on(args.timestamp) channels = [ - f"ch{chmap[chan].daq.rawid:03}" + chan for chan in status_map if status_map[chan]["processable"] is True and chmap[chan].system == "geds" ] diff --git a/scripts/util/CalibCatalog.py b/scripts/util/CalibCatalog.py deleted file mode 100644 index b222c5d..0000000 --- a/scripts/util/CalibCatalog.py +++ /dev/null @@ -1,128 +0,0 @@ -# -# Copyright (C) 2015 Oliver Schulz -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" -This module stores the scripts for leading validity files based on timestamp and system -""" - -import bisect -import collections -import copy -import json -import types -from collections import namedtuple -from pathlib import Path - -from .utils import unix_time - - -class Props: - @staticmethod - def read_from(sources): - def read_impl(sources): - if isinstance(sources, (str, Path)): - file_name = sources - with open(file_name) as file: - return json.load(file) - elif isinstance(sources, list): - result = {} - for p in map(read_impl, sources): - Props.add_to(result, p) - return result - else: - msg = f"Can't run Props.read_from on sources-value of type {type(sources)}" - raise ValueError(msg) - - return read_impl(sources) - - @staticmethod - def add_to(props_a, props_b): - a = props_a - b = props_b - - for key in b: - if key in a: - if isinstance(a[key], dict) and isinstance(b[key], dict): - Props.add_to(a[key], b[key]) - elif a[key] != b[key]: - a[key] = copy.copy(b[key]) - else: - a[key] = copy.copy(b[key]) - - -class PropsStream: - @staticmethod - def get(value): - if isinstance(value, (str, Path)): - return PropsStream.read_from(value) - elif isinstance(value, (collections.abc.Sequence, types.GeneratorType)): - return value - else: - msg = f"Can't get PropsStream from value of type {type(value)}" - raise ValueError(msg) - - @staticmethod - def read_from(file_name): - with open(file_name) as file: - for json_str in file: - yield json.loads(json_str) - - -class CalibCatalog(namedtuple("CalibCatalog", ["entries"])): - __slots__ = () - - class Entry(namedtuple("Entry", ["valid_from", "file"])): - __slots__ = () - - @staticmethod - def read_from(file_name): - entries = {} - - for props in PropsStream.get(file_name): - timestamp = props["valid_from"] - system = "all" if props.get("category") is None else props["category"] - file_key = props["apply"] - if system not in entries: - entries[system] = [] - entries[system].append(CalibCatalog.Entry(unix_time(timestamp), file_key)) - - for system in entries: - entries[system] = sorted(entries[system], key=lambda entry: entry.valid_from) - return CalibCatalog(entries) - - def calib_for(self, timestamp, category="all", allow_none=False): - if category in self.entries: - valid_from = [entry.valid_from for entry in self.entries[category]] - pos = bisect.bisect_right(valid_from, unix_time(timestamp)) - if pos > 0: - return self.entries[category][pos - 1].file - else: - if allow_none: - return None - else: - msg = f"No valid calibration found for timestamp: {timestamp}, category: {category}" - raise RuntimeError(msg) - else: - if allow_none: - return None - else: - msg = f"No calibrations found for category: {category}" - raise RuntimeError(msg) - - @staticmethod - def get_calib_files(catalog_file, timestamp, category="all"): - catalog = CalibCatalog.read_from(catalog_file) - return CalibCatalog.calib_for(catalog, timestamp, category) diff --git a/scripts/util/dataset_cal.py b/scripts/util/cal_grouping.py similarity index 92% rename from scripts/util/dataset_cal.py rename to scripts/util/cal_grouping.py index 693e934..aec1572 100644 --- a/scripts/util/dataset_cal.py +++ b/scripts/util/cal_grouping.py @@ -14,12 +14,23 @@ from .utils import filelist_path -class dataset_file: +class cal_grouping: def __init__(self, setup, input_file): with open(input_file) as r: self.datasets = json.load(r) + self.expand_runs() self.setup = setup + def expand_runs(self): + for channel, chan_dict in self.datasets.items(): + for part, part_dict in chan_dict.items(): + for per, runs in part_dict.items(): + if isinstance(runs, str) and ".." in runs: + start, end = runs.split("..") + self.datasets[channel][part][per] = [ + f"r{x:02}" for x in range(int(start[2:]), int(end) + 1) + ] + def get_dataset(self, dataset, channel): partition_dict = self.datasets["default"].copy() if channel in self.datasets: diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py new file mode 100644 index 0000000..1fb516b --- /dev/null +++ b/scripts/util/catalog.py @@ -0,0 +1,191 @@ +# +# Copyright (C) 2015 Oliver Schulz +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +This module stores the scripts for leading validity files based on timestamp and system +""" + +import bisect +import collections +import copy +import json +import types +from collections import namedtuple +from pathlib import Path + +import yaml + +from .utils import unix_time + + +class Props: + @staticmethod + def read_from(sources): + def read_impl(sources): + if isinstance(sources, (str, Path)): + file_name = sources + if isinstance(file_name, str): + file_name = Path(file_name) + if file_name.suffix in (".yaml", ".yml"): + with file_name.open() as file: + return yaml.safe_load(file) + elif file_name.suffix == ".json": + with open(file_name) as file: + return json.load(file) + else: + msg = f"Can't run Props.read_from on file with suffix {file_name.suffix}" + raise ValueError(msg) + elif isinstance(sources, list): + result = {} + for p in map(read_impl, sources): + Props.add_to(result, p) + return result + else: + msg = f"Can't run Props.read_from on sources-value of type {type(sources)}" + raise ValueError(msg) + + return read_impl(sources) + + @staticmethod + def add_to(props_a, props_b): + a = props_a + b = props_b + + for key in b: + if key in a: + if isinstance(a[key], dict) and isinstance(b[key], dict): + Props.add_to(a[key], b[key]) + elif a[key] != b[key]: + a[key] = copy.copy(b[key]) + else: + a[key] = copy.copy(b[key]) + + +class PropsStream: + """Simple class to control loading of validity.yaml files""" + + @staticmethod + def get(value): + if isinstance(value, str): + return PropsStream.read_from(value) + + if isinstance(value, (collections.abc.Sequence, types.GeneratorType)): + return value + + msg = f"Can't get PropsStream from value of type {type(value)}" + raise ValueError(msg) + + @staticmethod + def read_from(file_name): + with Path(file_name).open() as r: + file = yaml.safe_load(r) + file = sorted(file, key=lambda item: unix_time(item["valid_from"])) + yield from file + + +class Catalog(namedtuple("Catalog", ["entries"])): + """Implementation of the `YAML metadata validity specification `_.""" + + __slots__ = () + + class Entry(namedtuple("Entry", ["valid_from", "file"])): + __slots__ = () + + @staticmethod + def get(value): + if isinstance(value, Catalog): + return value + + if isinstance(value, str): + return Catalog.read_from(value) + + msg = f"Can't get Catalog from value of type {type(value)}" + raise ValueError(msg) + + @staticmethod + def read_from(file_name): + """Read from a valdiity YAML file and build a Catalog object""" + entries = {} + for props in PropsStream.get(file_name): + timestamp = props["valid_from"] + system = "all" if props.get("category") is None else props["category"] + file_key = props["apply"] + if system not in entries: + entries[system] = [] + mode = "append" if props.get("mode") is None else props["mode"] + mode = "reset" if len(entries[system]) == 0 else mode + if mode == "reset": + new = file_key + elif mode == "append": + new = entries[system][-1].file.copy() + file_key + elif mode == "remove": + new = entries[system][-1].file.copy() + for file in file_key: + new.remove(file) + elif mode == "replace": + new = entries[system][-1].file.copy() + if len(file_key) != 2: + msg = f"Invalid number of elements in replace mode: {len(file_key)}" + raise ValueError(msg) + new.remove(file_key[0]) + new += [file_key[1]] + + else: + msg = f"Unknown mode for {timestamp}" + raise ValueError(msg) + + if timestamp in [entry.valid_from for entry in entries[system]]: + msg = ( + f"Duplicate timestamp: {timestamp}, use reset mode instead with a single entry" + ) + raise ValueError(msg) + entries[system].append(Catalog.Entry(unix_time(timestamp), new)) + + for system in entries: + entries[system] = sorted(entries[system], key=lambda entry: entry.valid_from) + return Catalog(entries) + + def valid_for(self, timestamp, system="all", allow_none=False): + """Get the valid entries for a given timestamp and system""" + if system in self.entries: + valid_from = [entry.valid_from for entry in self.entries[system]] + pos = bisect.bisect_right(valid_from, unix_time(timestamp)) + if pos > 0: + return self.entries[system][pos - 1].file + + if system != "all": + return self.valid_for(timestamp, system="all", allow_none=allow_none) + + if allow_none: + return None + + msg = f"No valid entries found for timestamp: {timestamp}, system: {system}" + raise RuntimeError(msg) + + if system != "all": + return self.valid_for(timestamp, system="all", allow_none=allow_none) + + if allow_none: + return None + + msg = f"No entries found for system: {system}" + raise RuntimeError(msg) + + @staticmethod + def get_files(catalog_file, timestamp, category="all"): + """Helper function to get the files for a given timestamp and category""" + catalog = Catalog.read_from(catalog_file) + return Catalog.valid_for(catalog, timestamp, category) diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py index 88720ae..2fc3525 100644 --- a/scripts/util/create_pars_keylist.py +++ b/scripts/util/create_pars_keylist.py @@ -6,20 +6,20 @@ import json import re import warnings -from typing import ClassVar import snakemake as smk +import yaml from .FileKey import FileKey, ProcessingFileKey from .patterns import par_validity_pattern class pars_key_resolve: - name_dict: ClassVar[dict] = {"cal": ["par_dsp", "par_hit"], "lar": ["par_dsp", "par_hit"]} def __init__(self, valid_from, category, apply): self.valid_from = valid_from self.category = category + self.mode = "reset" self.apply = apply def __str__(self): @@ -34,7 +34,7 @@ def from_filekey(cls, filekey, name_dict): filekey.timestamp, "all", filekey.get_path_from_filekey( - par_validity_pattern(), processing_step=name_dict, ext="json" + par_validity_pattern(), processing_step=name_dict, ext="yaml" ), ) @@ -44,6 +44,11 @@ def write_to_jsonl(file_names, path): for file_name in file_names: of.write(f"{file_name.get_json()}\n") + @staticmethod + def write_to_yaml(file_names, path): + with open(path, "w") as of: + yaml.dump([file_name.__dict__ for file_name in file_names], of, sort_keys=False) + @staticmethod def match_keys(key1, key2): if ( diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py index 03f242e..7a9dd87 100644 --- a/scripts/util/pars_loading.py +++ b/scripts/util/pars_loading.py @@ -5,14 +5,14 @@ import os -from .CalibCatalog import CalibCatalog +from .catalog import Catalog from .FileKey import ProcessingFileKey # from .patterns import from .utils import get_pars_path, par_overwrite_path -class pars_catalog(CalibCatalog): +class pars_catalog(Catalog): @staticmethod def match_pars_files(filelist1, filelist2): for file2 in filelist2: @@ -29,9 +29,9 @@ def match_pars_files(filelist1, filelist2): @staticmethod def get_par_file(setup, timestamp, tier): - par_file = os.path.join(get_pars_path(setup, tier), "validity.jsonl") + par_file = os.path.join(get_pars_path(setup, tier), "validity.yaml") pars_files = pars_catalog.get_calib_files(par_file, timestamp) - par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl") + par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.yaml") pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp) if len(pars_files_overwrite) > 0: pars_files, pars_files_overwrite = pars_catalog.match_pars_files( diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index b60d73f..7f0b30c 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -5,29 +5,16 @@ import os from .utils import ( - par_dsp_path, - par_evt_path, - par_hit_path, + get_pars_path, + get_tier_path, par_overwrite_path, - par_pht_path, - par_psp_path, - par_raw_path, - par_tcm_path, pars_path, plts_path, sandbox_path, tier_daq_path, - tier_dsp_path, - tier_evt_path, - tier_hit_path, tier_path, - tier_pet_path, - tier_pht_path, - tier_psp_path, tier_raw_blind_path, - tier_raw_path, tier_skm_path, - tier_tcm_path, tmp_log_path, tmp_par_path, tmp_plts_path, @@ -87,16 +74,6 @@ def get_pattern_tier_daq(setup): ) -def get_pattern_tier_raw(setup): - return os.path.join( - f"{tier_raw_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_raw.lh5", - ) - - def get_pattern_tier_raw_blind(setup): return os.path.join( f"{tier_raw_blind_path(setup)}", @@ -107,303 +84,55 @@ def get_pattern_tier_raw_blind(setup): ) -def get_pattern_tier_tcm(setup): - return os.path.join( - f"{tier_tcm_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_tcm.lh5", - ) - - -def get_pattern_tier_dsp(setup): - return os.path.join( - f"{tier_dsp_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_dsp.lh5", - ) - - -def get_pattern_tier_hit(setup): - return os.path.join( - f"{tier_hit_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_hit.lh5", - ) - - -def get_pattern_tier_evt(setup): - return os.path.join( - f"{tier_evt_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_evt.lh5", - ) - - -def get_pattern_tier_evt_concat(setup): - return os.path.join( - f"{tier_evt_path(setup)}", - "{datatype}", - "{experiment}-{period}-{run}-{datatype}-tier_evt.lh5", - ) - - -def get_pattern_tier_psp(setup): - return os.path.join( - f"{tier_psp_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_psp.lh5", - ) - - -def get_pattern_tier_pht(setup): - return os.path.join( - f"{tier_pht_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pht.lh5", - ) - - -def get_pattern_tier_pet(setup): - return os.path.join( - f"{tier_pet_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pet.lh5", - ) - - -def get_pattern_tier_pet_concat(setup): - return os.path.join( - f"{tier_pet_path(setup)}", - "{datatype}", - "{experiment}-{period}-{run}-{datatype}-tier_pet.lh5", - ) - - -def get_pattern_tier_skm(setup): - return os.path.join( - f"{tier_skm_path(setup)}", - "phy", - "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5", - ) - - def get_pattern_tier(setup, tier, check_in_cycle=True): - if tier == "daq": - file_pattern = get_pattern_tier_daq(setup) - elif tier == "raw": - file_pattern = get_pattern_tier_raw(setup) - elif tier == "tcm": - file_pattern = get_pattern_tier_tcm(setup) - elif tier == "dsp": - file_pattern = get_pattern_tier_dsp(setup) - elif tier == "hit": - file_pattern = get_pattern_tier_hit(setup) - elif tier == "evt": - file_pattern = get_pattern_tier_evt(setup) - elif tier == "evt_concat": - file_pattern = get_pattern_tier_evt_concat(setup) - elif tier == "psp": - file_pattern = get_pattern_tier_psp(setup) - elif tier == "pht": - file_pattern = get_pattern_tier_pht(setup) - elif tier == "pet": - file_pattern = get_pattern_tier_pet(setup) - elif tier == "pet_concat": - file_pattern = get_pattern_tier_pet_concat(setup) - elif tier == "skm": - file_pattern = get_pattern_tier_skm(setup) - else: - msg = "invalid tier" - raise Exception(msg) - if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True: - return "/tmp/" + os.path.basename(file_pattern) - else: - return file_pattern - - -def get_pattern_par_raw(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_raw_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_raw_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_raw_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_raw" + f".{extension}", - ) - - -def get_pattern_par_tcm(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_tcm_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_tcm_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_tcm_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_tcm" + f".{extension}", - ) - - -def get_pattern_par_dsp(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_dsp_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_dsp_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_dsp_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_dsp" + f".{extension}", - ) - - -def get_pattern_par_hit(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_hit_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_hit_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_hit_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_hit" + f".{extension}", - ) - - -def get_pattern_par_evt(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_evt_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_evt" + f".{extension}", - ) - - -def get_pattern_par_psp(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_psp_path(setup)}", - "cal", + if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]: + file_pattern = os.path.join( + get_tier_path(setup, tier), + "{datatype}", "{period}", "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_psp_" + f"{name}.{extension}", + "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5", ) - else: - return os.path.join( - f"{par_psp_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_psp" + f".{extension}", + elif tier in ["evt_concat", "pet_concat"]: + file_pattern = os.path.join( + get_tier_path(setup, tier[:3]), + "{datatype}", + "{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5", ) - -def get_pattern_par_pht(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_pht_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pht_" + f"{name}.{extension}", + elif tier == "skm": + file_pattern = os.path.join( + f"{tier_skm_path(setup)}", + "phy", + "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5", ) else: - return os.path.join( - f"{par_pht_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pht" + f".{extension}", - ) - - -def get_pattern_par_pet(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pet_" + f"{name}.{extension}", - ) + msg = "invalid tier" + raise Exception(msg) + if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True: + return "/tmp/" + os.path.basename(file_pattern) else: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pet" + f".{extension}", - ) + return file_pattern -def get_pattern_pars(setup, tier, name=None, extension="json", check_in_cycle=True): - if tier == "raw": - file_pattern = get_pattern_par_raw(setup, name, extension) - elif tier == "tcm": - file_pattern = get_pattern_par_tcm(setup, name, extension) - elif tier == "dsp": - file_pattern = get_pattern_par_dsp(setup, name, extension) - elif tier == "hit": - file_pattern = get_pattern_par_hit(setup, name, extension) - elif tier == "evt": - file_pattern = get_pattern_par_evt(setup, name, extension) - elif tier == "psp": - file_pattern = get_pattern_par_psp(setup, name, extension) - elif tier == "pht": - file_pattern = get_pattern_par_pht(setup, name, extension) - elif tier == "pet": - file_pattern = get_pattern_par_pet(setup, name, extension) +def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=True): + if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]: + if name is not None: + return os.path.join( + get_pars_path(setup, tier), + "cal", + "{period}", + "{run}", + "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}_{name}.{extension}", + ) + else: + file_pattern = os.path.join( + get_pars_path(setup, tier), + "cal", + "{period}", + "{run}", + "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}", + ) else: msg = "invalid tier" raise Exception(msg) @@ -419,7 +148,7 @@ def get_pattern_pars(setup, tier, name=None, extension="json", check_in_cycle=Tr return file_pattern -def get_pattern_pars_svm(setup, tier, name=None, ext="json"): +def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"): if name is not None: return os.path.join( f"{par_overwrite_path(setup)}", @@ -440,7 +169,7 @@ def get_pattern_pars_svm(setup, tier, name=None, ext="json"): ) -def get_pattern_pars_overwrite(setup, tier, name=None): +def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"): if name is not None: return os.path.join( f"{par_overwrite_path(setup)}", @@ -449,10 +178,7 @@ def get_pattern_pars_overwrite(setup, tier, name=None): "{period}", "{run}", "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" - + tier - + "_" - + name - + "-overwrite.json", + + f"{tier}_{name}-overwrite.{extension}", ) else: return os.path.join( @@ -461,32 +187,34 @@ def get_pattern_pars_overwrite(setup, tier, name=None): "{datatype}", "{period}", "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + tier + "-overwrite.json", + "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + + tier + + f"-overwrite.{extension}", ) -def get_pattern_pars_tmp(setup, tier, name=None, datatype=None): +def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml"): if datatype is None: datatype = "{datatype}" if name is None: return os.path.join( f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + tier + ".json", + "{experiment}-{period}-{run}-" + + datatype + + "-{timestamp}-par_" + + f"{tier}.{extension}", ) else: return os.path.join( f"{tmp_par_path(setup)}", "{experiment}-{period}-{run}-" + datatype - + "-{timestamp}-par_" - + tier - + "_" - + name - + ".json", + + "-{timestamp}" + + f"par_{tier}_{name}.{extension}", ) -def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="json"): +def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"): if name is None: return os.path.join( f"{tmp_par_path(setup)}", @@ -509,11 +237,7 @@ def get_pattern_plts_tmp_channel(setup, tier, name=None): else: return os.path.join( f"{tmp_plts_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" - + tier - + "_" - + name - + ".pkl", + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl", ) @@ -538,19 +262,6 @@ def get_pattern_plts(setup, tier, name=None): ) -def get_energy_grids_pattern_combine(setup): - return os.path.join( - f"{tmp_par_path(setup)}", - "dsp", - "cal", - "{{period}}", - "{{run}}", - "par_dsp_energy_grid", - "{{channel}}", - "{{experiment}}-{{period}}-{{run}}-cal-{{timestamp}}-{{channel}}-{peak}-par_dsp_energy_grid.pkl", - ) - - def get_pattern_log(setup, processing_step): return os.path.join( f"{tmp_log_path(setup)}", @@ -559,17 +270,17 @@ def get_pattern_log(setup, processing_step): ) -def get_pattern_log_concat(setup, processing_step): +def get_pattern_log_channel(setup, processing_step): return os.path.join( f"{tmp_log_path(setup)}", processing_step, - "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log", + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log", ) -def get_pattern_log_channel(setup, processing_step): +def get_pattern_log_concat(setup, processing_step): return os.path.join( f"{tmp_log_path(setup)}", processing_step, - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log", + "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log", ) diff --git a/scripts/util/utils.py b/scripts/util/utils.py index 894d69e..2cb53ef 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -40,135 +40,51 @@ def tier_path(setup): return setup["paths"]["tier"] -def tier_tcm_path(setup): - return setup["paths"]["tier_tcm"] - - -def tier_raw_path(setup): - return setup["paths"]["tier_raw"] - - -def tier_dsp_path(setup): - return setup["paths"]["tier_dsp"] - - -def tier_hit_path(setup): - return setup["paths"]["tier_hit"] - - -def tier_evt_path(setup): - return setup["paths"]["tier_evt"] - - -def tier_psp_path(setup): - return setup["paths"]["tier_psp"] - - -def tier_pht_path(setup): - return setup["paths"]["tier_pht"] - - -def tier_pet_path(setup): - return setup["paths"]["tier_pet"] - - -def tier_skm_path(setup): - return setup["paths"]["tier_skm"] - - def get_tier_path(setup, tier): if tier == "raw": - return tier_raw_path(setup) + return setup["paths"]["tier_raw"] elif tier == "tcm": - return tier_tcm_path(setup) + return setup["paths"]["tier_tcm"] elif tier == "dsp": - return tier_dsp_path(setup) + return setup["paths"]["tier_dsp"] elif tier == "hit": - return tier_hit_path(setup) + return setup["paths"]["tier_hit"] elif tier == "evt": - return tier_evt_path(setup) + return setup["paths"]["tier_evt"] elif tier == "psp": - return tier_psp_path(setup) + return setup["paths"]["tier_psp"] elif tier == "pht": - return tier_pht_path(setup) + return setup["paths"]["tier_pht"] elif tier == "pet": - return tier_pet_path(setup) + return setup["paths"]["tier_pet"] elif tier == "skm": - return tier_skm_path(setup) + return setup["paths"]["tier_skm"] else: msg = f"no tier matching:{tier}" raise ValueError(msg) -def config_path(setup): - return setup["paths"]["config"] - - -def chan_map_path(setup): - return setup["paths"]["chan_map"] - - -def metadata_path(setup): - return setup["paths"]["metadata"] - - -def detector_db_path(setup): - return setup["paths"]["detector_db"] - - -def par_raw_path(setup): - return setup["paths"]["par_raw"] - - -def par_tcm_path(setup): - return setup["paths"]["par_tcm"] - - -def par_dsp_path(setup): - return setup["paths"]["par_dsp"] - - -def par_hit_path(setup): - return setup["paths"]["par_hit"] - - -def par_evt_path(setup): - return setup["paths"]["par_evt"] - - -def par_psp_path(setup): - return setup["paths"]["par_psp"] - - -def par_pht_path(setup): - return setup["paths"]["par_pht"] - - -def par_pet_path(setup): - return setup["paths"]["par_pet"] - - def pars_path(setup): return setup["paths"]["par"] def get_pars_path(setup, tier): if tier == "raw": - return par_raw_path(setup) + return setup["paths"]["par_raw"] elif tier == "tcm": - return par_tcm_path(setup) + return setup["paths"]["par_tcm"] elif tier == "dsp": - return par_dsp_path(setup) + return setup["paths"]["par_dsp"] elif tier == "hit": - return par_hit_path(setup) + return setup["paths"]["par_hit"] elif tier == "evt": - return par_evt_path(setup) + return setup["paths"]["par_evt"] elif tier == "psp": - return par_psp_path(setup) + return setup["paths"]["par_psp"] elif tier == "pht": - return par_pht_path(setup) + return setup["paths"]["par_pht"] elif tier == "pet": - return par_pet_path(setup) + return setup["paths"]["par_pet"] else: msg = f"no tier matching:{tier}" raise ValueError(msg) @@ -190,6 +106,22 @@ def par_overwrite_path(setup): return setup["paths"]["par_overwrite"] +def config_path(setup): + return setup["paths"]["config"] + + +def chan_map_path(setup): + return setup["paths"]["chan_map"] + + +def metadata_path(setup): + return setup["paths"]["metadata"] + + +def detector_db_path(setup): + return setup["paths"]["detector_db"] + + def log_path(setup): return setup["paths"]["log"] From 41c326bca6b596a78c9da886ad76a123c3d1e507 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 27 Nov 2024 15:22:10 +0100 Subject: [PATCH 07/17] update rules for pattern changes --- Snakefile | 2 +- rules/blinding_calibration.smk | 2 +- rules/blinding_check.smk | 2 +- rules/common.smk | 4 ++-- rules/dsp.smk | 5 +---- rules/evt.smk | 10 +++++----- rules/hit.smk | 5 ++--- rules/pht.smk | 1 - rules/pht_fast.smk | 1 - rules/psp.smk | 2 +- rules/raw.smk | 4 +++- rules/tcm.smk | 3 +-- 12 files changed, 18 insertions(+), 23 deletions(-) diff --git a/Snakefile b/Snakefile index 017f0b1..b2daaa2 100644 --- a/Snakefile +++ b/Snakefile @@ -44,7 +44,7 @@ configs = config_path(setup) chan_maps = chan_map_path(setup) meta = metadata_path(setup) swenv = runcmd(setup) -part = ds.dataset_file(setup, os.path.join(configs, "partitions.json")) +part = ds.cal_grouping(setup, os.path.join(configs, "partitions.json")) basedir = workflow.basedir diff --git a/rules/blinding_calibration.smk b/rules/blinding_calibration.smk index ef0a11e..bcf0d64 100644 --- a/rules/blinding_calibration.smk +++ b/rules/blinding_calibration.smk @@ -5,7 +5,7 @@ Snakemake rules for calibrating daq energy for blinding. Two steps: """ from scripts.util.patterns import ( - get_pattern_par_raw, + get_pattern_pars, get_pattern_plts, get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, diff --git a/rules/blinding_check.smk b/rules/blinding_check.smk index 653eb3f..ac7240c 100644 --- a/rules/blinding_check.smk +++ b/rules/blinding_check.smk @@ -8,7 +8,7 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_raw, + get_pattern_pars, get_pattern_plts, get_pattern_pars, ) diff --git a/rules/common.smk b/rules/common.smk index c74f514..b985044 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -8,7 +8,7 @@ from scripts.util.patterns import ( par_raw_path, get_pattern_unsorted_data, get_pattern_tier_daq, - get_pattern_tier_raw, + get_pattern_tier, get_pattern_plts_tmp_channel, ) from scripts.util import ProcessingFileKey @@ -114,4 +114,4 @@ def get_tier_pattern(tier): elif tier == "raw": return get_pattern_tier_daq(setup) else: - return get_pattern_tier_raw(setup) + return get_pattern_tier(setup, "raw", check_in_cycle=False) diff --git a/rules/dsp.smk b/rules/dsp.smk index 661a990..f8ea4a3 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -13,10 +13,7 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_dsp, get_pattern_plts, - get_pattern_tier_raw, - get_pattern_tier_tcm, get_pattern_tier, get_pattern_pars_tmp, get_pattern_log, @@ -386,7 +383,7 @@ rule build_pars_dsp: rule build_dsp: input: - raw_file=get_pattern_tier_raw(setup), + raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), pars_file=ancient( lambda wildcards: pars_catalog.get_par_file( setup, wildcards.timestamp, "dsp" diff --git a/rules/evt.smk b/rules/evt.smk index d51ad39..c760b54 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -21,16 +21,16 @@ for tier in ("evt", "pet"): rule: input: dsp_file=( - get_pattern_tier_dsp(setup) + get_pattern_tier(setup, "dsp", check_in_cycle=False) if tier == "evt" - else get_pattern_tier_psp(setup) + else get_pattern_tier(setup, "psp", check_in_cycle=False) ), hit_file=( - get_pattern_tier_hit(setup) + get_pattern_tier(setup, "hit", check_in_cycle=False) if tier == "evt" - else get_pattern_tier_pht(setup) + else get_pattern_tier(setup, "pht", check_in_cycle=False) ), - tcm_file=get_pattern_tier_tcm(setup), + tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), xtalk_matrix=lambda wildcards: get_svm_file( tier=tier, wildcards=wildcards, name="xtc" ), diff --git a/rules/hit.smk b/rules/hit.smk index fac37a1..f1bb0ba 100644 --- a/rules/hit.smk +++ b/rules/hit.smk @@ -11,9 +11,8 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_hit, + get_pattern_pars, get_pattern_plts, - get_pattern_tier_dsp, get_pattern_tier, get_pattern_pars_tmp, get_pattern_log, @@ -297,7 +296,7 @@ rule build_pars_hit: rule build_hit: input: - dsp_file=get_pattern_tier_dsp(setup), + dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), pars_file=lambda wildcards: pars_catalog.get_par_file( setup, wildcards.timestamp, "hit" ), diff --git a/rules/pht.smk b/rules/pht.smk index 86646fa..76542a3 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -13,7 +13,6 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_pht, get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk index 925d42c..5672011 100644 --- a/rules/pht_fast.smk +++ b/rules/pht_fast.smk @@ -5,7 +5,6 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_pht, get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, diff --git a/rules/psp.smk b/rules/psp.smk index 9a3e4af..a959cf4 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -337,7 +337,7 @@ rule build_pars_psp: rule build_psp: input: - raw_file=get_pattern_tier_raw(setup), + raw_file=get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), pars_file=ancient( lambda wildcards: pars_catalog.get_par_file( setup, wildcards.timestamp, "psp" diff --git a/rules/raw.smk b/rules/raw.smk index 20d1105..a81520a 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -43,7 +43,9 @@ rule build_raw_blind: and runs only if the blinding check file is on disk. Output is just the blinded raw file. """ input: - tier_file=get_pattern_tier_raw(setup).replace("{datatype}", "phy"), + tier_file=get_pattern_tier(setup, "raw", check_in_cycle=False).replace( + "{datatype}", "phy" + ), blind_file=get_blinding_curve_file, params: timestamp="{timestamp}", diff --git a/rules/tcm.smk b/rules/tcm.smk index 657cda3..c1164bb 100644 --- a/rules/tcm.smk +++ b/rules/tcm.smk @@ -3,7 +3,6 @@ Snakemake file containing the rules for generating the tcm """ from scripts.util.patterns import ( - get_pattern_tier_raw, get_pattern_tier, get_pattern_log, get_pattern_pars_tmp_channel, @@ -14,7 +13,7 @@ from scripts.util.patterns import ( # This rule builds the tcm files each raw file rule build_tier_tcm: input: - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), params: timestamp="{timestamp}", datatype="{datatype}", From 1698eb1561a8a49d9fd154688f3e01cda8c2cdee Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 27 Nov 2024 15:30:19 +0100 Subject: [PATCH 08/17] add debug mode functionality --- scripts/pars_hit_aoe.py | 4 ++++ scripts/pars_hit_ecal.py | 4 ++++ scripts/pars_hit_lq.py | 4 ++++ scripts/pars_pht_aoecal.py | 4 ++++ scripts/pars_pht_fast.py | 2 ++ scripts/pars_pht_lqcal.py | 4 ++++ scripts/pars_pht_partcal.py | 8 +++++++- 7 files changed, 29 insertions(+), 1 deletion(-) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index ed33f23..be40ed5 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -66,6 +66,7 @@ def aoe_calibration( dt_param: str = "dt_eff", comptBands_width: int = 20, plot_options: dict | None = None, + debug_mode: bool = False, ): data["AoE_Uncorr"] = data[current_param] / data[energy_param] aoe = CalAoE( @@ -82,6 +83,7 @@ def aoe_calibration( mean_func=mean_func, sigma_func=sigma_func, compt_bands_width=comptBands_width, + debug_mode=debug_mode | args.debug, ) aoe.update_cal_dicts( @@ -116,6 +118,8 @@ def aoe_calibration( argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) argparser.add_argument("--hit_pars", help="hit_pars", type=str) argparser.add_argument("--aoe_results", help="aoe_results", type=str) + +argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index d19b427..f7b8be3 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -439,6 +439,8 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): argparser.add_argument("--plot_path", help="plot_path", type=str, required=False) argparser.add_argument("--save_path", help="save_path", type=str) argparser.add_argument("--results_path", help="results_path", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -565,6 +567,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): glines, guess, kwarg_dict.get("deg", 0), + debug_mode=kwarg_dict.get("debug_mode", False) | args.debug, ) full_object_dict[cal_energy_param].hpge_get_energy_peaks( e_uncal, etol_kev=5 if det_status == "on" else 20 @@ -575,6 +578,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): glines, guess, kwarg_dict.get("deg", 0), + debug_mode=kwarg_dict.get("debug_mode", False), ) full_object_dict[cal_energy_param].hpge_get_energy_peaks( e_uncal, etol_kev=5 if det_status == "on" else 30, n_sigma=2 diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 5a0ad96..da83623 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -54,6 +54,7 @@ def lq_calibration( cdf: callable = gaussian, selection_string: str = "", plot_options: dict | None = None, + debug_mode: bool = False, ): """Loads in data from the provided files and runs the LQ calibration on said files @@ -99,6 +100,7 @@ def lq_calibration( eres_func, cdf, selection_string, + debug_mode=debug_mode | args.debug, ) data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param]) @@ -136,6 +138,8 @@ def lq_calibration( argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) argparser.add_argument("--hit_pars", help="hit_pars", type=str) argparser.add_argument("--lq_results", help="lq_results", type=str) + +argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index bf91d38..8fb2b36 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -92,6 +92,7 @@ def aoe_calibration( dt_param: str = "dt_eff", comptBands_width: int = 20, plot_options: dict | None = None, + debug_mode: bool = False, ): data["AoE_Uncorr"] = data[current_param] / data[energy_param] aoe = CalAoE( @@ -108,6 +109,7 @@ def aoe_calibration( mean_func=mean_func, sigma_func=sigma_func, compt_bands_width=comptBands_width, + debug_mode=debug_mode | args.debug, ) aoe.update_cal_dicts( { @@ -263,6 +265,8 @@ def eres_func(x): argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--aoe_results", help="aoe_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index 8210df7..6ab1a4b 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -66,6 +66,8 @@ def run_splitter(files): argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 3d5915e..890554f 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -75,6 +75,7 @@ def lq_calibration( cdf: callable = gaussian, selection_string: str = "", plot_options: dict | None = None, + debug_mode: bool = False, ): """Loads in data from the provided files and runs the LQ calibration on said files @@ -119,6 +120,7 @@ def lq_calibration( eres_func, cdf, selection_string, + debug_mode=debug_mode | args.debug, ) data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param]) @@ -259,6 +261,8 @@ def eres_func(x): argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--lq_results", help="lq_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 0d74ac8..b6f12d7 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -218,7 +218,11 @@ def calibrate_partition( for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params): energy = data.query(selection_string)[energy_param].to_numpy() full_object_dict[cal_energy_param] = HPGeCalibration( - energy_param, glines, 1, kwarg_dict.get("deg", 0) # , fixed={1: 1} + energy_param, + glines, + 1, + kwarg_dict.get("deg", 0), + debug_mode=kwarg_dict.get("debug_mode", False) | args.debug, # , fixed={1: 1} ) full_object_dict[cal_energy_param].hpge_get_energy_peaks( energy, @@ -426,6 +430,8 @@ def calibrate_partition( argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") From b8404444ee8fab5fbac4f871f6c8f535906c82d3 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 27 Nov 2024 18:02:08 +0100 Subject: [PATCH 09/17] os to pathlib.Path --- .ruff.toml | 2 +- scripts/blinding_calibration.py | 3 +- scripts/build_dsp.py | 13 +- scripts/build_evt.py | 5 +- scripts/build_hit.py | 11 +- scripts/build_raw.py | 10 +- scripts/build_raw_blind.py | 9 +- scripts/build_skm.py | 5 +- scripts/build_tcm.py | 7 +- scripts/check_blinding.py | 9 +- scripts/complete_run.py | 49 +++--- scripts/create_chankeylist.py | 7 +- scripts/merge_channels.py | 35 ++-- scripts/par_psp.py | 18 +- scripts/pars_dsp_build_svm.py | 3 +- scripts/pars_dsp_dplms.py | 17 +- scripts/pars_dsp_eopt.py | 17 +- scripts/pars_dsp_event_selection.py | 15 +- scripts/pars_dsp_nopt.py | 13 +- scripts/pars_dsp_svm.py | 9 +- scripts/pars_dsp_tau.py | 13 +- scripts/pars_hit_aoe.py | 21 ++- scripts/pars_hit_ecal.py | 21 +-- scripts/pars_hit_lq.py | 21 ++- scripts/pars_hit_qc.py | 11 +- scripts/pars_pht_aoecal.py | 37 ++-- scripts/pars_pht_fast.py | 39 ++-- scripts/pars_pht_lqcal.py | 41 +++-- scripts/pars_pht_partcal.py | 35 ++-- scripts/pars_pht_qc.py | 19 +- scripts/pars_pht_qc_phy.py | 13 +- scripts/pars_tcm_pulser.py | 7 +- scripts/util/FileKey.py | 6 +- scripts/util/cal_grouping.py | 25 ++- scripts/util/catalog.py | 2 +- scripts/util/create_pars_keylist.py | 8 +- scripts/util/pars_loading.py | 11 +- scripts/util/patterns.py | 264 +++++++++++++++------------- scripts/util/utils.py | 6 +- tests/test_util.py | 19 +- 40 files changed, 431 insertions(+), 445 deletions(-) diff --git a/.ruff.toml b/.ruff.toml index 29f8014..8b4d420 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -12,7 +12,7 @@ lint.select = [ "PIE", # flake8-pie "PL", # pylint "PT", # flake8-pytest-style - # "PTH", # flake8-use-pathlib + "PTH", # flake8-use-pathlib "RET", # flake8-return "RUF", # Ruff-specific "SIM", # flake8-simplify diff --git a/scripts/blinding_calibration.py b/scripts/blinding_calibration.py index 6a1b0a7..62207e9 100644 --- a/scripts/blinding_calibration.py +++ b/scripts/blinding_calibration.py @@ -7,6 +7,7 @@ import argparse import logging import pickle as pkl +from pathlib import Path import matplotlib as mpl import matplotlib.pyplot as plt @@ -93,7 +94,7 @@ ax2.set_xlabel("energy (keV)") ax2.set_ylabel("counts") plt.suptitle(args.channel) -with open(args.plot_file, "wb") as w: +with Path(args.plot_file).open("wb") as w: pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) plt.close() diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index cbd0794..02bf6a1 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -1,6 +1,5 @@ import argparse import logging -import os import pathlib import re import time @@ -37,7 +36,7 @@ def replace_list_with_array(dic): argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() -pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) +pathlib.Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) @@ -51,13 +50,13 @@ def replace_list_with_array(dic): channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} db_files = [ - par_file for par_file in args.pars_file if os.path.splitext(par_file)[1] in (".json", ".yaml") + par_file for par_file in args.pars_file if pathlib.Path(par_file).suffix in (".json", ".yaml") ] database_dic = Props.read_from(db_files, subst_pathvar=True) database_dic = replace_list_with_array(database_dic) -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +pathlib.Path(args.output).parent.mkdir(parents=True, exist_ok=True) rng = np.random.default_rng() rand_num = f"{rng.integers(0, 99999):05d}" @@ -78,9 +77,9 @@ def replace_list_with_array(dic): log.info(f"build_dsp finished in {time.time()-start}") -os.rename(temp_output, args.output) +pathlib.Path(temp_output).rename(args.output) -key = os.path.basename(args.output).replace("-tier_dsp.lh5", "") +key = pathlib.Path(args.output).name.replace("-tier_dsp.lh5", "") raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] @@ -109,5 +108,5 @@ def replace_list_with_array(dic): }, "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}}, } -pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True) +pathlib.Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.db_file, full_dict) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index 3d993d8..6927c24 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -1,7 +1,6 @@ import argparse import json import logging -import os import time from pathlib import Path @@ -51,7 +50,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): args = argparser.parse_args() if args.log is not None: - Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) + Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") else: logging.basicConfig(level=logging.DEBUG) @@ -118,7 +117,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): log.debug(json.dumps(evt_config["channels"], indent=2)) t_start = time.time() -Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) table = build_evt( { diff --git a/scripts/build_hit.py b/scripts/build_hit.py index c550337..8e2da80 100644 --- a/scripts/build_hit.py +++ b/scripts/build_hit.py @@ -1,8 +1,7 @@ import argparse import logging -import os -import pathlib import time +from pathlib import Path from legendmeta import TextDB from legendmeta.catalog import Props @@ -24,7 +23,7 @@ argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() -pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) +Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) @@ -59,7 +58,7 @@ hit_dict[f"{channel}/dsp"] = chan_pars t_start = time.time() -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) build_hit(args.input, lh5_tables_config=hit_dict, outfile=args.output) t_elap = time.time() - t_start log.info(f"Done! Time elapsed: {t_elap:.2f} sec.") @@ -80,12 +79,12 @@ } hit_channels.append(channel) -key = os.path.basename(args.output).replace(f"-tier_{args.tier}.lh5", "") +key = Path(args.output).replace(f"-tier_{args.tier}.lh5", "") full_dict = { "valid_fields": {args.tier: hit_outputs}, "valid_keys": {key: {"valid_channels": {args.tier: hit_channels}}}, } -pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True) +Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.db_file, full_dict) diff --git a/scripts/build_raw.py b/scripts/build_raw.py index c02b67b..03a4fca 100644 --- a/scripts/build_raw.py +++ b/scripts/build_raw.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import numpy as np from daq2lh5 import build_raw @@ -18,10 +17,10 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -os.makedirs(os.path.dirname(args.log), exist_ok=True) +Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) configs = TextDB(args.configs, lazy=True) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"][ @@ -83,4 +82,5 @@ build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings) -os.rename(temp_output, args.output) +# rename the temp file +Path(temp_output).rename(args.output) diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py index 0400f22..33a6c31 100644 --- a/scripts/build_raw_blind.py +++ b/scripts/build_raw_blind.py @@ -12,8 +12,7 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import numexpr as ne import numpy as np @@ -35,11 +34,11 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -os.makedirs(os.path.dirname(args.log), exist_ok=True) +Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") logging.getLogger("lgdo").setLevel(logging.INFO) -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) configs = TextDB(args.configs, lazy=True) channel_dict = configs.on(args.timestamp, system=args.datatype) @@ -167,4 +166,4 @@ ) # rename the temp file -os.rename(temp_output, args.output) +Path(temp_output).rename(args.output) diff --git a/scripts/build_skm.py b/scripts/build_skm.py index a327caa..10bf876 100644 --- a/scripts/build_skm.py +++ b/scripts/build_skm.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import awkward as ak from legendmeta import TextDB @@ -32,7 +31,7 @@ def get_all_out_fields(input_table, out_fields, current_field=""): args = argparser.parse_args() if args.log is not None: - pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) + Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py index c39faea..2ceb3ab 100644 --- a/scripts/build_tcm.py +++ b/scripts/build_tcm.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -21,7 +20,7 @@ logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) channel_dict = configs["snakemake_rules"]["tier_tcm"]["inputs"] @@ -50,4 +49,4 @@ **settings, ) -os.rename(temp_output, args.output) +Path(temp_output).rename(args.output) diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py index 4d8a6fa..7d6da04 100644 --- a/scripts/check_blinding.py +++ b/scripts/check_blinding.py @@ -8,9 +8,8 @@ import argparse import logging -import os -import pathlib import pickle as pkl +from pathlib import Path import matplotlib as mpl import matplotlib.pyplot as plt @@ -40,7 +39,7 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -os.makedirs(os.path.dirname(args.log), exist_ok=True) +Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) @@ -85,7 +84,7 @@ ax2.set_xlabel("energy (keV)") ax2.set_ylabel("counts") plt.suptitle(args.channel) -with open(args.plot_file, "wb") as w: +with Path(args.plot_file).open("wb") as w: pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) plt.close() @@ -93,7 +92,7 @@ # valid and if so create file else raise error. if detector is in ac mode it # will always pass this check if np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5) or det_status is False: - pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) + Path(args.output).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.output, {}) else: msg = "peaks not found in daqenergy" diff --git a/scripts/complete_run.py b/scripts/complete_run.py index f61ba37..fe800e8 100644 --- a/scripts/complete_run.py +++ b/scripts/complete_run.py @@ -1,7 +1,6 @@ # ruff: noqa: F821, T201 import datetime -import glob import json import os import time @@ -20,14 +19,14 @@ def as_ro(path): def check_log_files(log_path, output_file, gen_output, warning_file=None): now = datetime.datetime.now(datetime.UTC).strftime("%d/%m/%y %H:%M") - os.makedirs(os.path.dirname(output_file), exist_ok=True) + Path(output_file).parent.mkdir(parents=True, exist_ok=True) if warning_file is not None: - os.makedirs(os.path.dirname(warning_file), exist_ok=True) - with open(warning_file, "w") as w, open(output_file, "w") as f: + Path(warning_file).parent.mkdir(parents=True, exist_ok=True) + with Path(warning_file).open("w") as w, Path(output_file).open("w") as f: n_errors = 0 n_warnings = 0 for file in Path(log_path).rglob("*.log"): - with open(file) as r: + with Path(file).open() as r: text = r.read() if "ERROR" in text or "WARNING" in text: for line in text.splitlines(): @@ -40,24 +39,24 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None): w.write( f"{gen_output} successfully generated at {now} with warnings \n" ) - f.write(f"{os.path.basename(file)} : {line}\n") + f.write(f"{Path(file).name} : {line}\n") n_errors += 1 elif "WARNING" in line: - w.write(f"{os.path.basename(file)} : {line}\n") + w.write(f"{Path(file).name} : {line}\n") n_warnings += 1 else: pass - os.remove(file) + Path(file).unlink() text = None if n_errors == 0: f.write(f"{gen_output} successfully generated at {now} with no errors \n") if n_warnings == 0: w.write(f"{gen_output} successfully generated at {now} with no warnings \n") else: - with open(output_file, "w") as f: + with Path(output_file).open("w") as f: n_errors = 0 for file in Path(log_path).rglob("*.log"): - with open(file) as r: + with Path(file).open() as r: text = r.read() if "ERROR" in text: for line in text.splitlines(): @@ -66,18 +65,18 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None): f.write( f"{gen_output} successfully generated at {now} with errors \n" ) - f.write(f"{os.path.basename(file)} : {line}\n") + f.write(f"{Path(file).name} : {line}\n") n_errors += 1 else: pass - os.remove(file) + Path(file).unlink() text = None if n_errors == 0: f.write(f"{gen_output} successfully generated at {now} with no errors \n") walk = list(os.walk(log_path)) for path, _, _ in walk[::-1]: if len(os.listdir(path)) == 0: - os.rmdir(path) + Path(path).rmdir() def add_spaces(n): @@ -124,7 +123,7 @@ def get_run(Filekey): key_dict = {} for file in files: - key = FileKey.get_filekey_from_filename(os.path.basename(file)) + key = FileKey.get_filekey_from_filename(Path(file).name) if get_run(key) in key_dict: key_dict[get_run(key)].append(file) else: @@ -133,24 +132,24 @@ def get_run(Filekey): def build_valid_keys(input_files, output_dir): - infiles = glob.glob(as_ro(input_files)) + infiles = Path(as_ro(input_files)).glob() key_dict = get_keys(infiles) for key in list(key_dict): dtype = key.split("-")[-1] - out_file = os.path.join(output_dir, f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json') - Path(os.path.dirname(out_file)).mkdir(parents=True, exist_ok=True) - if os.path.isfile(out_file): + out_file = Path(output_dir) / f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json' + out_file.parent.mkdir(parents=True, exist_ok=True) + if Path(out_file).is_file(): out_dict = Props.read_from([out_file] + key_dict[key]) else: out_dict = Props.read_from(key_dict[key]) out_string = readable_json(out_dict) - with open(out_file, "w") as w: + with Path(out_file).open("w") as w: w.write(out_string) for input_file in infiles: - if os.path.isfile(input_file): - os.remove(input_file) + if Path(input_file).is_file(): + Path(input_file).unlink() def find_gen_runs(gen_tier_path): @@ -268,16 +267,16 @@ def fformat(tier): if snakemake.wildcards.tier != "daq": print(f"INFO: ...building FileDBs with {snakemake.threads} threads") - os.makedirs(snakemake.params.filedb_path, exist_ok=True) + Path(snakemake.params.filedb_path).parent.makedirs(parents=True, exist_ok=True) - with open(os.path.join(snakemake.params.filedb_path, "file_db_config.json"), "w") as f: + with (Path(snakemake.params.filedb_path) / "file_db_config.json").open("w") as f: json.dump(file_db_config, f, indent=2) build_file_dbs(ut.tier_path(snakemake.params.setup), snakemake.params.filedb_path) - os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json")) + (Path(snakemake.params.filedb_path) / "file_db_config.json").unlink() build_valid_keys( - os.path.join(ut.tmp_par_path(snakemake.params.setup), "*_db.json"), + Path(ut.tmp_par_path(snakemake.params.setup)) / "*_db.json", snakemake.params.valid_keys_path, ) diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py index 435f55c..6ed4510 100644 --- a/scripts/create_chankeylist.py +++ b/scripts/create_chankeylist.py @@ -1,6 +1,5 @@ import argparse -import os -import pathlib +from pathlib import Path from legendmeta import LegendMetadata, TextDB @@ -25,7 +24,7 @@ if status_map[chan]["processable"] is True and chmap[chan].system == "geds" ] -pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) -with open(args.output_file, "w") as f: +Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) +with Path(args.output_file).open("w") as f: for chan in channels: f.write(f"{chan}\n") diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index a86d47d..e8994be 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -1,8 +1,7 @@ import argparse -import os -import pathlib import pickle as pkl import shelve +from pathlib import Path import numpy as np from legendmeta.catalog import Props @@ -19,7 +18,7 @@ def replace_path(d, old_path, new_path): d[i] = replace_path(d[i], old_path, new_path) elif isinstance(d, str) and old_path in d: d = d.replace(old_path, new_path) - d = d.replace(new_path, f"$_/{os.path.basename(new_path)}") + d = d.replace(new_path, f"$_/{Path(new_path).name}") return d @@ -45,25 +44,25 @@ def replace_path(d, old_path, new_path): channel_files = args.input.infiles if hasattr(args.input, "infiles") else args.input -file_extension = pathlib.Path(args.output).suffix +file_extension = Path(args.output).suffix if file_extension == ".dat" or file_extension == ".dir": - out_file = os.path.splitext(args.output)[0] + out_file = Path(args.output).with_suffix("") else: out_file = args.output rng = np.random.default_rng() temp_output = f"{out_file}.{rng.integers(0, 99999):05d}" -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) if file_extension == ".json" or file_extension == ".yaml" or file_extension == ".yml": out_dict = {} for channel in channel_files: - if pathlib.Path(channel).suffix == file_extension: + if Path(channel).suffix == file_extension: channel_dict = Props.read_from(channel) - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) channel_name = fkey.channel out_dict[channel_name] = channel_dict else: @@ -72,29 +71,29 @@ def replace_path(d, old_path, new_path): Props.write_to(temp_output, out_dict, "json") - os.rename(temp_output, out_file) + Path(temp_output).rename(out_file) elif file_extension == ".pkl": out_dict = {} for channel in channel_files: - with open(channel, "rb") as r: + with Path(channel).open("rb") as r: channel_dict = pkl.load(r) - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) channel_name = fkey.channel out_dict[channel_name] = channel_dict - with open(temp_output, "wb") as w: + with Path(temp_output).open("wb") as w: pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL) - os.rename(temp_output, out_file) + Path(temp_output).rename(out_file) elif file_extension == ".dat" or file_extension == ".dir": common_dict = {} with shelve.open(out_file, "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: for channel in channel_files: - with open(channel, "rb") as r: + with Path(channel).open("rb") as r: channel_dict = pkl.load(r) - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) channel_name = fkey.channel if isinstance(channel_dict, dict) and "common" in list(channel_dict): chan_common_dict = channel_dict.pop("common") @@ -108,8 +107,8 @@ def replace_path(d, old_path, new_path): if args.in_db: db_dict = Props.read_from(args.in_db) for channel in channel_files: - if pathlib.Path(channel).suffix == file_extension: - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + if Path(channel).suffix == file_extension: + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) channel_name = fkey.channel tb_in = lh5.read(f"{channel_name}", channel) @@ -128,4 +127,4 @@ def replace_path(d, old_path, new_path): if args.out_db: Props.write_to(args.out_db, db_dict) - os.rename(temp_output, out_file) + Path(temp_output).rename(out_file) diff --git a/scripts/par_psp.py b/scripts/par_psp.py index 52c2ed6..94473a0 100644 --- a/scripts/par_psp.py +++ b/scripts/par_psp.py @@ -1,7 +1,7 @@ import argparse -import os import pickle as pkl from datetime import datetime +from pathlib import Path import matplotlib as mpl import matplotlib.dates as mdates @@ -44,7 +44,7 @@ # partitions could be different for different channels - do separately for each channel in_dicts = {} for file in args.input: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp in_dicts[tstamp] = Props.read_from(file) plot_dict = {} @@ -109,36 +109,36 @@ plt.close() for file in args.output: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp Props.write_to(file, in_dicts[tstamp]) if args.out_plots: for file in args.out_plots: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp if args.in_plots: for infile in args.in_plots: if tstamp in infile: - with open(infile, "rb") as f: + with Path(infile).open("rb") as f: old_plot_dict = pkl.load(f) break old_plot_dict.update({"psp": plot_dict}) new_plot_dict = old_plot_dict else: new_plot_dict = {"psp": plot_dict} - with open(file, "wb") as f: + with Path(file).open("wb") as f: pkl.dump(new_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) if args.out_obj: for file in args.out_obj: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp if args.in_obj: for infile in args.in_obj: if tstamp in infile: - with open(infile, "rb") as f: + with Path(infile).open("rb") as f: old_obj_dict = pkl.load(f) break new_obj_dict = old_obj_dict else: new_obj_dict = {} - with open(file, "wb") as f: + with Path(file).open("wb") as f: pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py index df97320..0d6ada7 100644 --- a/scripts/pars_dsp_build_svm.py +++ b/scripts/pars_dsp_build_svm.py @@ -1,6 +1,7 @@ import argparse import logging import pickle as pkl +from pathlib import Path from legendmeta.catalog import Props from lgdo import lh5 @@ -45,5 +46,5 @@ log.debug("trained model") # Save trained model with pickle -with open(args.output_file, "wb") as svm_file: +with Path(args.output_file).open("wb") as svm_file: pkl.dump(svm, svm_file, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index f643e03..607613c 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -1,9 +1,8 @@ import argparse import logging -import os -import pathlib import pickle as pkl import time +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -52,7 +51,7 @@ db_dict = Props.read_from(args.database) if dplms_dict["run_dplms"] is True: - with open(args.fft_raw_filelist) as f: + with Path(args.fft_raw_filelist).open() as f: fft_files = sorted(f.read().splitlines()) t0 = time.time() @@ -91,7 +90,7 @@ display=1, ) if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: inplot_dict = pkl.load(r) inplot_dict.update({"dplms": plot_dict}) @@ -115,14 +114,14 @@ out_dict = {} dplms_pars = Table(col_dict={"coefficients": Array([])}) if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: inplot_dict = pkl.load(r) else: inplot_dict = {} db_dict.update(out_dict) -pathlib.Path(os.path.dirname(args.lh5_path)).mkdir(parents=True, exist_ok=True) +Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True) sto.write( Table(col_dict={"dplms": dplms_pars}), name=args.channel, @@ -130,10 +129,10 @@ wo_mode="overwrite", ) -pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True) +Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.dsp_pars, db_dict) if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as f: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as f: pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index 0edf617..bcda090 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -1,10 +1,9 @@ import argparse import logging -import os -import pathlib import pickle as pkl import time import warnings +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -351,19 +350,19 @@ else: db_dict.update({"ctc_params": out_alpha_dict}) - pathlib.Path(os.path.dirname(args.qbb_grid_path)).mkdir(parents=True, exist_ok=True) - with open(args.qbb_grid_path, "wb") as f: + Path(args.qbb_grid_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.qbb_grid_path).open("wb") as f: pkl.dump(optimisers, f) else: - pathlib.Path(args.qbb_grid_path).touch() + Path(args.qbb_grid_path).touch() -pathlib.Path(os.path.dirname(args.final_dsp_pars)).mkdir(parents=True, exist_ok=True) +Path(args.final_dsp_pars).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.final_dsp_pars, db_dict) if args.plot_path: if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: plot_dict = pkl.load(r) else: plot_dict = {} @@ -383,6 +382,6 @@ "acq_space": bopt_zac.plot_acq(init_samples=sample_x), } - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as w: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as w: pkl.dump(plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index ea2bb34..2e6505b 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -1,11 +1,10 @@ import argparse import json import logging -import os -import pathlib import time import warnings from bisect import bisect_left +from pathlib import Path import lgdo import lgdo.lh5 as lh5 @@ -121,14 +120,14 @@ def get_out_data( peak_dict = Props.read_from(peak_json) db_dict = Props.read_from(args.decay_const) - pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True) + Path(args.peak_file).parent.mkdir(parents=True, exist_ok=True) if peak_dict.pop("run_selection") is True: log.debug("Starting peak selection") rng = np.random.default_rng() rand_num = f"{rng.integers(0,99999):05d}" temp_output = f"{args.peak_file}.{rand_num}" - with open(args.raw_filelist) as f: + with Path(args.raw_filelist).open() as f: files = f.read().splitlines() raw_files = sorted(files) @@ -138,7 +137,7 @@ def get_out_data( elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -225,7 +224,7 @@ def get_out_data( } for file in raw_files: - log.debug(os.path.basename(file)) + log.debug(Path(file).name) for peak, peak_dict in pk_dicts.items(): if peak_dict["idxs"] is not None: # idx is a long continuous array @@ -358,7 +357,7 @@ def get_out_data( log.debug(f"{peak} has reached the required number of events") else: - pathlib.Path(temp_output).touch() + Path(temp_output).touch() log.debug(f"event selection completed in {time.time()-t0} seconds") - os.rename(temp_output, args.peak_file) + Path(temp_output).rename(args.peak_file) diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index 67ffd5f..47261d2 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -1,9 +1,8 @@ import argparse import logging -import os -import pathlib import pickle as pkl import time +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -57,7 +56,7 @@ db_dict = Props.read_from(args.database) if opt_dict.pop("run_nopt") is True: - with open(args.raw_filelist) as f: + with Path(args.raw_filelist).open() as f: files = f.read().splitlines() raw_files = sorted(files) @@ -96,15 +95,15 @@ plot_dict = {} if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: old_plot_dict = pkl.load(r) plot_dict = dict(noise_optimisation=plot_dict, **old_plot_dict) else: plot_dict = {"noise_optimisation": plot_dict} - with open(args.plot_path, "wb") as f: + with Path(args.plot_path).open("wb") as f: pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) -pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True) +Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.dsp_pars, dict(nopt_pars=out_dict, **db_dict)) diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm.py index 28b335e..370e320 100644 --- a/scripts/pars_dsp_svm.py +++ b/scripts/pars_dsp_svm.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path from legendmeta.catalog import Props @@ -14,7 +13,7 @@ if args.log is not None: - pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) + Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") else: logging.basicConfig(level=logging.DEBUG) @@ -27,9 +26,9 @@ par_data = Props.read_from(args.input_file) -file = f"'$_/{os.path.basename(args.svm_file)}'" +file = f"'$_/{Path(args.svm_file).name}'" par_data["svm"] = {"model_file": file} -pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) +Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.output_file, par_data) diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index c4750c6..82cec2d 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -1,8 +1,7 @@ import argparse import logging -import os -import pathlib import pickle as pkl +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -52,7 +51,7 @@ kwarg_dict.pop("run_tau") if isinstance(args.raw_files, list) and args.raw_files[0].split(".")[-1] == "filelist": input_file = args.raw_files[0] - with open(input_file) as f: + with Path(input_file).open() as f: input_file = f.read().splitlines() else: input_file = args.raw_files @@ -63,7 +62,7 @@ elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -113,17 +112,17 @@ tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]]) if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) plot_dict = tau.plot_waveforms_after_correction( tb_data, "wf_pz", norm_param=kwarg_dict.get("norm_param", "pz_mean") ) plot_dict.update(tau.plot_slopes(slopes[idxs])) - with open(args.plot_path, "wb") as f: + with Path(args.plot_path).open("wb") as f: pkl.dump({"tau": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) else: out_dict = {} -pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) +Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.output_file, tau.output_dict) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index be40ed5..a393868 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -2,10 +2,9 @@ import argparse import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path from typing import Callable import numpy as np @@ -142,7 +141,7 @@ def aoe_calibration( cal_dict = ecal_dict["pars"] eres_dict = ecal_dict["results"]["ecal"] -with open(args.eres_file, "rb") as o: +with Path(args.eres_file).open("rb") as o: object_dict = pkl.load(o) if kwarg_dict["run_aoe"] is True: @@ -158,7 +157,7 @@ def aoe_calibration( for field, item in kwarg_dict["plot_options"].items(): kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) - with open(args.files[0]) as f: + with Path(args.files[0]).open() as f: files = f.read().splitlines() files = sorted(files) @@ -210,7 +209,7 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -246,7 +245,7 @@ def eres_func(x): if args.plot_file: common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: out_plot_dict = pkl.load(r) out_plot_dict.update({"aoe": plot_dict}) else: @@ -257,11 +256,11 @@ def eres_func(x): elif common_dict is not None: out_plot_dict["common"] = common_dict - pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True) - with open(args.plot_file, "wb") as w: + Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_file).open("wb") as w: pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) -pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True) +Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) results_dict = dict(**ecal_dict["results"], aoe=out_dict) final_hit_dict = { "pars": {"operations": cal_dict}, @@ -269,10 +268,10 @@ def eres_func(x): } Props.write_to(args.hit_pars, final_hit_dict) -pathlib.Path(os.path.dirname(args.aoe_results)).mkdir(parents=True, exist_ok=True) +Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True) final_object_dict = dict( **object_dict, aoe=obj, ) -with open(args.aoe_results, "wb") as w: +with Path(args.aoe_results).open("wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index f7b8be3..b310500 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -3,11 +3,10 @@ import argparse import copy import logging -import os -import pathlib import pickle as pkl import warnings from datetime import datetime +from pathlib import Path import lgdo.lh5 as lh5 import matplotlib as mpl @@ -462,9 +461,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): db_files = [ par_file for par_file in args.ctc_dict - if os.path.splitext(par_file)[1] == ".json" - or os.path.splitext(par_file)[1] == ".yml" - or os.path.splitext(par_file)[1] == ".yaml" + if Path(par_file).suffix in (".json", ".yml", ".yaml") ] database_dic = Props.read_from(db_files) @@ -493,7 +490,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): bl_plots[field]["function"] = eval(item["function"]) common_plots = kwarg_dict.pop("common_plots") - with open(args.files[0]) as f: + with Path(args.files[0]).open() as f: files = f.read().splitlines() files = sorted(files) @@ -514,7 +511,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -725,7 +722,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): common_dict.update({key: param_dict}) if args.inplot_dict: - with open(args.inplot_dict, "rb") as f: + with Path(args.inplot_dict).open("rb") as f: total_plot_dict = pkl.load(f) else: total_plot_dict = {} @@ -737,8 +734,8 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): total_plot_dict.update({"ecal": plot_dict}) - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as f: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as f: pkl.dump(total_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) # save output dictionary @@ -746,6 +743,6 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): Props.write_to(args.save_path, output_dict) # save calibration objects - with open(args.results_path, "wb") as fp: - pathlib.Path(os.path.dirname(args.results_path)).mkdir(parents=True, exist_ok=True) + with Path(args.results_path).open("wb") as fp: + Path(args.results_path).parent.mkdir(parents=True, exist_ok=True) pkl.dump({"ecal": full_object_dict}, fp, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index da83623..579b34a 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -2,10 +2,9 @@ import argparse import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path import numpy as np import pandas as pd @@ -160,7 +159,7 @@ def lq_calibration( cal_dict = ecal_dict["pars"]["operations"] eres_dict = ecal_dict["results"]["ecal"] -with open(args.eres_file, "rb") as o: +with Path(args.eres_file).open("rb") as o: object_dict = pkl.load(o) if kwarg_dict["run_lq"] is True: @@ -172,7 +171,7 @@ def lq_calibration( for field, item in kwarg_dict["plot_options"].items(): kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) - with open(args.files[0]) as f: + with Path(args.files[0]).open() as f: files = f.read().splitlines() files = sorted(files) @@ -213,7 +212,7 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -247,7 +246,7 @@ def eres_func(x): if args.plot_file: common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: out_plot_dict = pkl.load(r) out_plot_dict.update({"lq": plot_dict}) else: @@ -258,24 +257,24 @@ def eres_func(x): elif common_dict is not None: out_plot_dict["common"] = common_dict - pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True) - with open(args.plot_file, "wb") as w: + Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_file).open("wb") as w: pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) results_dict = dict(**eres_dict, lq=out_dict) -pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True) +Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) final_hit_dict = { "pars": {"operations": cal_dict}, "results": results_dict, } Props.write_to(args.hit_pars, final_hit_dict) -pathlib.Path(os.path.dirname(args.lq_results)).mkdir(parents=True, exist_ok=True) +Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True) final_object_dict = dict( **object_dict, lq=obj, ) Props.write_to(args.lq_results, final_object_dict) -with open(args.lq_results, "wb") as w: +with Path(args.lq_results).open("wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 9640087..5311c46 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -3,11 +3,10 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import numpy as np from legendmeta import LegendMetadata @@ -160,7 +159,7 @@ elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -226,10 +225,10 @@ hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal} plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} - pathlib.Path(os.path.dirname(args.save_path)).mkdir(parents=True, exist_ok=True) + Path(args.save_path).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.save_path, hit_dict) if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as f: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as f: pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index 8fb2b36..e9573e3 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -3,11 +3,10 @@ import argparse import copy import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path from typing import Callable import numpy as np @@ -32,7 +31,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -289,33 +288,33 @@ def eres_func(x): for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run if isinstance(args.input_files, list): files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() else: - with open(args.input_files) as f: + with Path(args.input_files).open() as f: files = f.read().splitlines() files = sorted( @@ -325,7 +324,7 @@ def eres_func(x): final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -369,7 +368,7 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -403,21 +402,21 @@ def eres_func(x): if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": cal_dict[fk.timestamp], "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) + Path(out).parent.mkdir(parents=True, exist_ok=True) Props.write_to(out, final_hit_dict) for out in args.aoe_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index 6ab1a4b..4064b3c 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -3,10 +3,9 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path import numpy as np import pandas as pd @@ -32,7 +31,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -83,29 +82,29 @@ def run_splitter(files): for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() files = sorted( @@ -115,7 +114,7 @@ def run_splitter(files): final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -179,7 +178,7 @@ def run_splitter(files): if args.pulser_files: mask = np.array([], dtype=bool) for file in args.pulser_files: - with open(file) as f: + with Path(file).open() as f: pulser_dict = json.load(f) pulser_mask = np.array(pulser_dict["mask"]) mask = np.append(mask, pulser_mask) @@ -188,7 +187,7 @@ def run_splitter(files): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -249,22 +248,22 @@ def run_splitter(files): if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": {"operations": cal_dict[fk.timestamp]}, "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "w") as w: + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("w") as w: json.dump(final_hit_dict, w, indent=4) for out in args.fit_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 890554f..2ba88af 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -4,10 +4,9 @@ import copy import json import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path import numpy as np import pandas as pd @@ -32,7 +31,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -285,33 +284,33 @@ def eres_func(x): for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run if isinstance(args.input_files, list): files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() else: - with open(args.input_files) as f: + with Path(args.input_files).open() as f: files = f.read().splitlines() files = sorted( @@ -321,7 +320,7 @@ def eres_func(x): final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -348,7 +347,7 @@ def eres_func(x): if args.pulser_files: mask = np.array([], dtype=bool) for file in args.pulser_files: - with open(file) as f: + with Path(file).open() as f: pulser_dict = json.load(f) pulser_mask = np.array(pulser_dict["mask"]) mask = np.append(mask, pulser_mask) @@ -357,7 +356,7 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -391,22 +390,22 @@ def eres_func(x): if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": {"operations": cal_dict[fk.timestamp]}, "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "w") as w: + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("w") as w: json.dump(final_hit_dict, w, indent=4) for out in args.lq_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index b6f12d7..a6eab18 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -3,11 +3,10 @@ import argparse import copy import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import numpy as np import pandas as pd @@ -34,7 +33,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -447,29 +446,29 @@ def calibrate_partition( for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() files = sorted( @@ -479,7 +478,7 @@ def calibrate_partition( final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -518,7 +517,7 @@ def calibrate_partition( elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -553,21 +552,21 @@ def calibrate_partition( if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": cal_dict[fk.timestamp], "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) + Path(out).parent.mkdir(parents=True, exist_ok=True) Props.write_to(out, final_hit_dict) for out in args.fit_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index f62da8b..790ee0a 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -3,11 +3,10 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import numpy as np from legendmeta import LegendMetadata @@ -72,10 +71,10 @@ if isinstance(args.cal_files, list): cal_files = [] for file in args.cal_files: - with open(file) as f: + with Path(file).open() as f: cal_files += f.read().splitlines() else: - with open(args.cal_files) as f: + with Path(args.cal_files).open() as f: cal_files = f.read().splitlines() cal_files = sorted( @@ -99,10 +98,10 @@ if isinstance(args.fft_files, list): fft_files = [] for file in args.fft_files: - with open(file) as f: + with Path(file).open() as f: fft_files += f.read().splitlines() else: - with open(args.fft_files) as f: + with Path(args.fft_files).open() as f: fft_files = f.read().splitlines() fft_files = sorted( @@ -223,7 +222,7 @@ elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, total_mask = get_tcm_pulser_ids( @@ -305,11 +304,11 @@ plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} for file in args.save_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) + Path(file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(file, hit_dict) if args.plot_path: for file in args.plot_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) - with open(file, "wb") as f: + Path(file).parent.mkdir(parents=True, exist_ok=True) + with Path(file).open("wb") as f: pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index 10af322..48f3d9f 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -3,11 +3,10 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -64,7 +63,7 @@ if isinstance(args.phy_files, list): phy_files = [] for file in sorted(args.phy_files): - with open(file) as f: + with Path(file).open() as f: run_files = f.read().splitlines() if len(run_files) == 0: continue @@ -78,7 +77,7 @@ ) bl_mask = np.append(bl_mask, bl_idxs) else: - with open(args.phy_files) as f: + with Path(args.phy_files).open() as f: phy_files = f.read().splitlines() phy_files = sorted(np.unique(phy_files)) bls = sto.read("ch1027200/dsp/", phy_files, field_mask=["wf_max", "bl_mean"])[0] @@ -147,11 +146,11 @@ log.debug(f"cut_dict is: {json.dumps(hit_dict, indent=2)}") for file in args.save_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) + Path(file).name.mkdir(parents=True, exist_ok=True) Props.write_to(file, {"pars": {"operations": hit_dict}}) if args.plot_path: for file in args.plot_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) - with open(file, "wb") as f: + Path(file).parent.mkdir(parents=True, exist_ok=True) + with Path(file).open("wb") as f: pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index f72a04a..27c1101 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -41,7 +40,7 @@ if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist": tcm_files = args.tcm_files[0] - with open(tcm_files) as f: + with Path(tcm_files).open() as f: tcm_files = f.read().splitlines() else: tcm_files = args.tcm_files @@ -51,5 +50,5 @@ tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") ) -pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True) +Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()}) diff --git a/scripts/util/FileKey.py b/scripts/util/FileKey.py index 5c01f97..9f646cc 100644 --- a/scripts/util/FileKey.py +++ b/scripts/util/FileKey.py @@ -2,9 +2,9 @@ This module contains classes to convert between keys and files using the patterns defined in patterns.py """ -import os import re from collections import namedtuple +from pathlib import Path import snakemake as smk @@ -216,7 +216,7 @@ def per_grouper(files): pers = [] per_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.experiment}-{fk.period}" not in pers: pers.append(f"{fk.experiment}-{fk.period}") per_files.append([]) @@ -231,7 +231,7 @@ def run_grouper(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.experiment}-{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.experiment}-{fk.period}-{fk.run}") run_files.append([]) diff --git a/scripts/util/cal_grouping.py b/scripts/util/cal_grouping.py index aec1572..651c137 100644 --- a/scripts/util/cal_grouping.py +++ b/scripts/util/cal_grouping.py @@ -3,7 +3,7 @@ """ import json -import os +from pathlib import Path from .FileKey import ChannelProcKey, ProcessingFileKey from .patterns import ( @@ -16,7 +16,7 @@ class cal_grouping: def __init__(self, setup, input_file): - with open(input_file) as r: + with Path(input_file).open() as r: self.datasets = json.load(r) self.expand_runs() self.setup = setup @@ -43,18 +43,13 @@ def get_filelists(self, dataset, channel, tier, experiment="l200", datatype="cal for per in dataset: if dataset[per] == "all": files += [ - os.path.join( - filelist_path(self.setup), - f"all-{experiment}-{per}-*-{datatype}-{tier}.filelist", - ) + Path(filelist_path(self.setup)) + / f"all-{experiment}-{per}-*-{datatype}-{tier}.filelist" ] else: files += [ - os.path.join( - filelist_path(self.setup), - f"all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist", - ) - for run in dataset[per] + Path(filelist_path(self.setup)) + / "all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist" ] return files @@ -80,7 +75,7 @@ def get_par_files( channel = "{channel}" selected_par_files = [] for par_file in all_par_files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(par_file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name) if ( fk.datatype == datatype and fk.experiment == experiment @@ -128,7 +123,7 @@ def get_plt_files( channel = "{channel}" selected_par_files = [] for par_file in all_par_files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(par_file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name) if ( fk.datatype == datatype and fk.experiment == experiment @@ -170,7 +165,7 @@ def get_log_file( datatype=datatype, name=name, ) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(par_files[0])) + fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) if channel == "default": fk.channel = "{channel}" else: @@ -187,7 +182,7 @@ def get_timestamp(self, catalog, dataset, channel, tier, experiment="l200", data datatype=datatype, name=None, ) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(par_files[0])) + fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) return fk.timestamp def get_wildcard_constraints(self, dataset, channel): diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py index 1fb516b..390a7c1 100644 --- a/scripts/util/catalog.py +++ b/scripts/util/catalog.py @@ -43,7 +43,7 @@ def read_impl(sources): with file_name.open() as file: return yaml.safe_load(file) elif file_name.suffix == ".json": - with open(file_name) as file: + with file_name.open() as file: return json.load(file) else: msg = f"Can't run Props.read_from on file with suffix {file_name.suffix}" diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py index 2fc3525..f347975 100644 --- a/scripts/util/create_pars_keylist.py +++ b/scripts/util/create_pars_keylist.py @@ -2,10 +2,10 @@ This module creates the validity files used for determining the time validity of data """ -import glob import json import re import warnings +from pathlib import Path import snakemake as smk import yaml @@ -40,13 +40,13 @@ def from_filekey(cls, filekey, name_dict): @staticmethod def write_to_jsonl(file_names, path): - with open(path, "w") as of: + with Path(path).open("w") as of: for file_name in file_names: of.write(f"{file_name.get_json()}\n") @staticmethod def write_to_yaml(file_names, path): - with open(path, "w") as of: + with Path(path).open("w") as of: yaml.dump([file_name.__dict__ for file_name in file_names], of, sort_keys=False) @staticmethod @@ -104,7 +104,7 @@ def get_keys(keypart, search_pattern): except AttributeError: tier_pattern_rx = re.compile(smk.io.regex(search_pattern)) fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0] - files = glob.glob(fn_glob_pattern) + files = Path(fn_glob_pattern).glob() keys = [] for f in files: m = tier_pattern_rx.match(f) diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py index 7a9dd87..a21f6ae 100644 --- a/scripts/util/pars_loading.py +++ b/scripts/util/pars_loading.py @@ -3,7 +3,7 @@ to determine the par and par overwrite for a particular timestamp """ -import os +from pathlib import Path from .catalog import Catalog from .FileKey import ProcessingFileKey @@ -29,19 +29,18 @@ def match_pars_files(filelist1, filelist2): @staticmethod def get_par_file(setup, timestamp, tier): - par_file = os.path.join(get_pars_path(setup, tier), "validity.yaml") + par_file = Path(get_pars_path(setup, tier)) / "validity.yaml" pars_files = pars_catalog.get_calib_files(par_file, timestamp) - par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.yaml") + par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp) if len(pars_files_overwrite) > 0: pars_files, pars_files_overwrite = pars_catalog.match_pars_files( pars_files, pars_files_overwrite ) - pars_files = [os.path.join(get_pars_path(setup, tier), file) for file in pars_files] + pars_files = [Path(get_pars_path(setup, tier)) / file for file in pars_files] if len(pars_files_overwrite) > 0: pars_overwrite_files = [ - os.path.join(par_overwrite_path(setup), tier, file) - for file in pars_files_overwrite + Path(par_overwrite_path(setup)) / tier / file for file in pars_files_overwrite ] pars_files += pars_overwrite_files return pars_files diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index 7f0b30c..cae1cd0 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -2,7 +2,7 @@ This module contains all the patterns needed for the data production """ -import os +from pathlib import Path from .utils import ( get_pars_path, @@ -56,61 +56,63 @@ def full_channel_pattern_with_extension(): def get_pattern_unsorted_data(setup): if sandbox_path(setup) is not None: - return os.path.join( - f"{sandbox_path(setup)}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca", + return ( + Path(f"{sandbox_path(setup)}") + / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca" ) else: return None def get_pattern_tier_daq(setup): - return os.path.join( - f"{tier_daq_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca", + return ( + Path(f"{tier_daq_path(setup)}") + / "{datatype}" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca" ) def get_pattern_tier_raw_blind(setup): - return os.path.join( - f"{tier_raw_blind_path(setup)}", - "phy", - "{period}", - "{run}", - "{experiment}-{period}-{run}-phy-{timestamp}-tier_raw.lh5", + return ( + Path(f"{tier_raw_blind_path(setup)}") + / "phy" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-phy-{timestamp}-tier_raw.lh5" ) def get_pattern_tier(setup, tier, check_in_cycle=True): if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]: - file_pattern = os.path.join( - get_tier_path(setup, tier), - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5", + file_pattern = ( + Path(get_tier_path(setup, tier)) + / "{datatype}" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + + f"{tier}.lh5" ) elif tier in ["evt_concat", "pet_concat"]: - file_pattern = os.path.join( - get_tier_path(setup, tier[:3]), - "{datatype}", - "{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5", + file_pattern = ( + Path(get_tier_path(setup, tier[:3])) + / "{datatype}" + / "{experiment}-{period}-{run}-{datatype}-tier_" + + f"{tier[:3]}.lh5" ) elif tier == "skm": - file_pattern = os.path.join( - f"{tier_skm_path(setup)}", - "phy", - "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5", + file_pattern = ( + Path(f"{tier_skm_path(setup)}") + / "phy" + / "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5" ) else: msg = "invalid tier" raise Exception(msg) - if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True: - return "/tmp/" + os.path.basename(file_pattern) + if tier_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True: + return "/tmp/" + Path(file_pattern).name else: return file_pattern @@ -118,25 +120,27 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=True): if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]: if name is not None: - return os.path.join( - get_pars_path(setup, tier), - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}_{name}.{extension}", + return ( + Path(get_pars_path(setup, tier)) + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-par_" + + f"{tier}_{name}.{extension}" ) else: - file_pattern = os.path.join( - get_pars_path(setup, tier), - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}", + file_pattern = ( + Path(get_pars_path(setup, tier)) + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-par_" + + f"{tier}.{extension}" ) else: msg = "invalid tier" raise Exception(msg) - if pars_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True: + if pars_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True: if name is None: return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{extension}" else: @@ -150,46 +154,48 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"): if name is not None: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}", + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-" + + f"par_{tier}_{name}.{ext}" ) else: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{ext}", + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-" + + f"par_{tier}.{ext}" ) def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"): if name is not None: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" - + f"{tier}_{name}-overwrite.{extension}", + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "{datatype}" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + f"{tier}_{name}-overwrite.{extension}" ) else: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "{datatype}" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + tier - + f"-overwrite.{extension}", + + f"-overwrite.{extension}" ) @@ -197,90 +203,104 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml" if datatype is None: datatype = "{datatype}" if name is None: - return os.path.join( - f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-" + return ( + Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" - + f"{tier}.{extension}", + + f"{tier}.{extension}" ) else: - return os.path.join( - f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-" + return ( + Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-" + datatype + "-{timestamp}" - + f"par_{tier}_{name}.{extension}", + + f"par_{tier}_{name}.{extension}" ) def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"): if name is None: - return os.path.join( - f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}.{extension}", + return ( + Path(f"{tmp_par_path(setup)}") + / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + + f"{tier}.{extension}" ) else: - return os.path.join( - f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" - + f"{tier}_{name}.{extension}", + return ( + Path(f"{tmp_par_path(setup)}") + / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + + f"{tier}_{name}.{extension}" ) def get_pattern_plts_tmp_channel(setup, tier, name=None): if name is None: - return os.path.join( - f"{tmp_plts_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + tier + ".pkl", + return ( + Path(f"{tmp_plts_path(setup)}") + / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + + tier + + ".pkl" ) else: - return os.path.join( - f"{tmp_plts_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl", + return ( + Path(f"{tmp_plts_path(setup)}") + / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + + f"{tier}_{name}.pkl" ) def get_pattern_plts(setup, tier, name=None): if name is None: - return os.path.join( - f"{plts_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + ".dir", + return ( + Path(f"{plts_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + + tier + + ".dir" ) else: - return os.path.join( - f"{plts_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + "_" + name + ".dir", + return ( + Path(f"{plts_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + + tier + + "_" + + name + + ".dir" ) def get_pattern_log(setup, processing_step): - return os.path.join( - f"{tmp_log_path(setup)}", - processing_step, - "{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log", + return ( + Path(f"{tmp_log_path(setup)}") + / processing_step + / "{experiment}-{period}-{run}-{datatype}-{timestamp}-" + + processing_step + + ".log" ) def get_pattern_log_channel(setup, processing_step): - return os.path.join( - f"{tmp_log_path(setup)}", - processing_step, - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log", + return ( + Path(f"{tmp_log_path(setup)}") + / processing_step + / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + + processing_step + + ".log" ) def get_pattern_log_concat(setup, processing_step): - return os.path.join( - f"{tmp_log_path(setup)}", - processing_step, - "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log", + return ( + Path(f"{tmp_log_path(setup)}") + / processing_step + / "{experiment}-{period}-{run}-{datatype}-" + + processing_step + + ".log" ) diff --git a/scripts/util/utils.py b/scripts/util/utils.py index 2cb53ef..fd433c7 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -189,7 +189,7 @@ def subst_vars_in_snakemake_config(workflow, config): config_filename = workflow.overwrite_configfiles[0] # ToDo: Better way of handling this? subst_vars( config, - var_values={"_": os.path.dirname(config_filename)}, + var_values={"_": Path(config_filename).parent}, use_env=True, ignore_missing=False, ) @@ -203,8 +203,8 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - base = os.path.basename(file) - file_name = os.path.splitext(base)[0] + base = Path(file).name + file_name = Path(base).name parts = file_name.split("-") run_no = parts[3] if run_no not in runs: diff --git a/tests/test_util.py b/tests/test_util.py index 707843b..010c749 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,5 +1,4 @@ import json -import os from pathlib import Path from scripts.util import ( @@ -20,7 +19,7 @@ testprod = Path(__file__).parent / "dummy_cycle" -with open(str(testprod / "config.json")) as r: +with testprod.open() as r: setup = json.load(r) subst_vars(setup, var_values={"_": str(testprod)}) setup = setup["setups"]["test"] @@ -107,12 +106,12 @@ def test_create_pars_keylist(): def test_pars_loading(): pars_files = CalibCatalog.get_calib_files( - os.path.join(par_dsp_path(setup), "validity.jsonl"), "20230101T123456Z" + Path(par_dsp_path(setup)) / "validity.jsonl", "20230101T123456Z" ) assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"] par_override_files = CalibCatalog.get_calib_files( - os.path.join(par_overwrite_path(setup), "dsp", "validity.jsonl"), "20230101T123456Z" + Path(par_overwrite_path(setup)) / "dsp" / "validity.jsonl", "20230101T123456Z" ) pars_files, pars_files_overwrite = pars_catalog.match_pars_files( @@ -122,12 +121,12 @@ def test_pars_loading(): assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"] assert set(pars_catalog.get_par_file(setup, "20230101T123456Z", "dsp")) == { - os.path.join( - par_dsp_path(setup), - "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json", + ( + Path(par_dsp_path(setup)) + / "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json", ), - os.path.join( - par_overwrite_path(setup), - "dsp/cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json", + ( + Path(par_overwrite_path(setup)) + / "dsp/cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json", ), } From 323dd0966c02bd9486c91bebde472ed965b13517 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 28 Nov 2024 19:04:37 +0100 Subject: [PATCH 10/17] debugging --- Snakefile | 92 +++++++++--------------- rules/blinding_calibration.smk | 10 +-- rules/blinding_check.smk | 10 +-- rules/chanlist_gen.smk | 8 +-- rules/common.smk | 50 +++++++------ rules/dsp.smk | 33 +++++---- rules/evt.smk | 11 +-- rules/filelist_gen.smk | 34 ++++++--- rules/hit.smk | 24 ++++--- rules/pht.smk | 35 +++++---- rules/pht_fast.smk | 6 +- rules/psp.smk | 41 +++++++---- rules/qc_phy.smk | 11 ++- rules/raw.smk | 1 - scripts/create_chankeylist.py | 7 +- scripts/util/FileKey.py | 8 +++ scripts/util/__init__.py | 16 ++--- scripts/util/cal_grouping.py | 38 +++++++--- scripts/util/catalog.py | 2 +- scripts/util/create_pars_keylist.py | 31 ++++---- scripts/util/pars_loading.py | 8 +-- scripts/util/patterns.py | 106 +++++++++++----------------- scripts/util/utils.py | 4 ++ 23 files changed, 311 insertions(+), 275 deletions(-) diff --git a/Snakefile b/Snakefile index b2daaa2..39a3dee 100644 --- a/Snakefile +++ b/Snakefile @@ -10,7 +10,7 @@ This includes: - the same for partition level tiers """ -import pathlib +from pathlib import Path import os import json import sys @@ -20,8 +20,8 @@ from collections import OrderedDict import logging import scripts.util as ds -from scripts.util.pars_loading import pars_catalog -from scripts.util.patterns import get_pattern_tier_raw +from scripts.util.pars_loading import ParsCatalog +from scripts.util.patterns import get_pattern_tier from scripts.util.utils import ( subst_vars_in_snakemake_config, runcmd, @@ -31,6 +31,7 @@ from scripts.util.utils import ( metadata_path, tmp_log_path, pars_path, + det_status_path, ) # Set with `snakemake --configfile=/path/to/your/config.json` @@ -43,8 +44,9 @@ setup = config["setups"]["l200"] configs = config_path(setup) chan_maps = chan_map_path(setup) meta = metadata_path(setup) +det_status = det_status_path(setup) swenv = runcmd(setup) -part = ds.cal_grouping(setup, os.path.join(configs, "partitions.json")) +part = ds.CalGrouping(setup, Path(det_status) / "cal_partitions.yaml") basedir = workflow.basedir @@ -72,32 +74,6 @@ include: "rules/blinding_calibration.smk" include: "rules/qc_phy.smk" -# Log parameter catalogs in validity.jsonl files -hit_par_cat_file = os.path.join(pars_path(setup), "hit", "validity.jsonl") -if os.path.isfile(hit_par_cat_file): - os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl")) -pathlib.Path(os.path.dirname(hit_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(hit_par_catalog, hit_par_cat_file) - -pht_par_cat_file = os.path.join(pars_path(setup), "pht", "validity.jsonl") -if os.path.isfile(pht_par_cat_file): - os.remove(os.path.join(pars_path(setup), "pht", "validity.jsonl")) -pathlib.Path(os.path.dirname(pht_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(pht_par_catalog, pht_par_cat_file) - -dsp_par_cat_file = os.path.join(pars_path(setup), "dsp", "validity.jsonl") -if os.path.isfile(dsp_par_cat_file): - os.remove(dsp_par_cat_file) -pathlib.Path(os.path.dirname(dsp_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(dsp_par_catalog, dsp_par_cat_file) - -psp_par_cat_file = os.path.join(pars_path(setup), "psp", "validity.jsonl") -if os.path.isfile(psp_par_cat_file): - os.remove(psp_par_cat_file) -pathlib.Path(os.path.dirname(psp_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(psp_par_catalog, psp_par_cat_file) - - localrules: gen_filelist, autogen_output, @@ -111,36 +87,36 @@ onstart: shell('{swenv} python3 -B -c "import ' + pkg + '"') # Log parameter catalogs in validity.jsonl files - hit_par_cat_file = os.path.join(pars_path(setup), "hit", "validity.jsonl") - if os.path.isfile(hit_par_cat_file): - os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl")) - pathlib.Path(os.path.dirname(hit_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(hit_par_catalog, hit_par_cat_file) - - pht_par_cat_file = os.path.join(pars_path(setup), "pht", "validity.jsonl") - if os.path.isfile(pht_par_cat_file): - os.remove(os.path.join(pars_path(setup), "pht", "validity.jsonl")) - pathlib.Path(os.path.dirname(pht_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(pht_par_catalog, pht_par_cat_file) - - dsp_par_cat_file = os.path.join(pars_path(setup), "dsp", "validity.jsonl") - if os.path.isfile(dsp_par_cat_file): - os.remove(dsp_par_cat_file) - pathlib.Path(os.path.dirname(dsp_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(dsp_par_catalog, dsp_par_cat_file) - - psp_par_cat_file = os.path.join(pars_path(setup), "psp", "validity.jsonl") - if os.path.isfile(psp_par_cat_file): - os.remove(psp_par_cat_file) - pathlib.Path(os.path.dirname(psp_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(psp_par_catalog, psp_par_cat_file) + hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml" + if hit_par_cat_file.is_file(): + hit_par_cat_file.unlink() + Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file) + + pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml" + if pht_par_cat_file.is_file(): + pht_par_cat_file.unlink() + Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(pht_par_catalog, pht_par_cat_file) + + dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml" + if dsp_par_cat_file.is_file(): + dsp_par_cat_file.unlink() + Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file) + + psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml" + if psp_par_cat_file.is_file(): + psp_par_cat_file.unlink() + Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file) onsuccess: from snakemake.report import auto_report rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}" - pathlib.Path(rep_dir).mkdir(parents=True, exist_ok=True) + Path(rep_dir).mkdir(parents=True, exist_ok=True) # auto_report(workflow.persistence.dag, f"{rep_dir}/report.html") with open(os.path.join(rep_dir, "dag.txt"), "w") as f: @@ -190,12 +166,12 @@ rule gen_filelist: lambda wildcards: get_filelist( wildcards, setup, - get_pattern_tier_raw(setup), - ignore_keys_file=os.path.join(configs, "ignore_keys.keylist"), - analysis_runs_file=os.path.join(configs, "analysis_runs.json"), + get_pattern_tier(setup, "raw", check_in_cycle=False), + ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", + analysis_runs_file=Path(det_status) / "runlists.yaml", ), output: - os.path.join(filelist_path(setup), "{label}-{tier}.filelist"), + Path(filelist_path(setup)) / "{label}-{tier}.filelist", run: if len(input) == 0: print( diff --git a/rules/blinding_calibration.smk b/rules/blinding_calibration.smk index bcf0d64..85ee2f6 100644 --- a/rules/blinding_calibration.smk +++ b/rules/blinding_calibration.smk @@ -11,6 +11,7 @@ from scripts.util.patterns import ( get_pattern_plts_tmp_channel, get_pattern_log_channel, ) +from pathlib import Path rule build_blinding_calibration: @@ -19,9 +20,8 @@ rule build_blinding_calibration: if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data """ input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" - ), + files=Path(filelist_path(setup)) + / "all-{experiment}-{period}-{run}-cal-raw.filelist", params: timestamp="{timestamp}", datatype="cal", @@ -57,7 +57,7 @@ rule build_plts_blinding: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, name="blindcal", ), @@ -79,7 +79,7 @@ rule build_pars_blinding: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, name="blindcal", ), diff --git a/rules/blinding_check.smk b/rules/blinding_check.smk index ac7240c..eb3407d 100644 --- a/rules/blinding_check.smk +++ b/rules/blinding_check.smk @@ -12,6 +12,7 @@ from scripts.util.patterns import ( get_pattern_plts, get_pattern_pars, ) +from pathlib import Path rule build_blinding_check: @@ -20,9 +21,8 @@ rule build_blinding_check: if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data """ input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" - ), + files=Path(filelist_path(setup)) + / "all-{experiment}-{period}-{run}-cal-raw.filelist", par_file=get_blinding_curve_file, params: timestamp="{timestamp}", @@ -59,7 +59,7 @@ rule build_plts_raw: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, ), output: @@ -80,7 +80,7 @@ rule build_pars_raw: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, ), plts=get_pattern_plts( diff --git a/rules/chanlist_gen.smk b/rules/chanlist_gen.smk index 1dc4957..820d0fa 100644 --- a/rules/chanlist_gen.smk +++ b/rules/chanlist_gen.smk @@ -13,7 +13,7 @@ from scripts.util.utils import filelist_path, runcmd def get_par_chanlist( - setup, keypart, tier, basedir, configs, chan_maps, name=None, extension="json" + setup, keypart, tier, basedir, det_status, chan_maps, name=None, extension="yaml" ): tier_pattern = "((?P[^_]+)(\\_(?P[^_]+)(\\_(?P[^_]+)?)?)?)?" keypart_rx = re.compile(tier_pattern) @@ -28,7 +28,7 @@ def get_par_chanlist( f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}", ) - cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --configs {configs}" + cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}" cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}" os.system(cmd) @@ -42,7 +42,7 @@ def get_par_chanlist( return filenames -def get_plt_chanlist(setup, keypart, tier, basedir, configs, chan_maps, name=None): +def get_plt_chanlist(setup, keypart, tier, basedir, det_status, chan_maps, name=None): key = ChannelProcKey.parse_keypart(keypart) output_file = os.path.join( @@ -50,7 +50,7 @@ def get_plt_chanlist(setup, keypart, tier, basedir, configs, chan_maps, name=Non f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}", ) - cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --configs {configs}" + cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}" cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}" os.system(cmd) diff --git a/rules/common.smk b/rules/common.smk index b985044..6ba4654 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -2,16 +2,17 @@ Helper functions for running data production """ -import pathlib, os +from pathlib import Path from scripts.util.patterns import ( par_overwrite_path, - par_raw_path, + get_pars_path, get_pattern_unsorted_data, get_pattern_tier_daq, get_pattern_tier, get_pattern_plts_tmp_channel, ) from scripts.util import ProcessingFileKey +from scripts.util.catalog import Catalog from scripts.util import utils @@ -21,8 +22,8 @@ def ro(path): def get_blinding_curve_file(wildcards): """func to get the blinding calibration curves from the overrides""" - par_files = pars_catalog.get_calib_files( - Path(par_overwrite_path(setup)) / "raw" / "validity.jsonl", + par_files = Catalog.get_files( + Path(par_overwrite_path(setup)) / "raw" / "validity.yaml", wildcards.timestamp, ) if isinstance(par_files, str): @@ -36,13 +37,13 @@ def get_blinding_curve_file(wildcards): def get_blinding_check_file(wildcards): """func to get the right blinding check file""" - par_files = pars_catalog.get_calib_files( - Path(par_raw_path(setup)) / "validity.jsonl", wildcards.timestamp + par_files = Catalog.get_files( + Path(get_pars_path(setup, "raw")) / "validity.yaml", wildcards.timestamp ) if isinstance(par_files, str): - return str(Path(par_raw_path(setup)) / par_files) + return Path(get_pars_path(setup, "raw")) / par_files else: - return [str(Path(par_raw_path(setup)) / par_file) for par_file in par_files] + return [Path(get_pars_path(setup, "raw")) / par_file for par_file in par_files] def set_last_rule_name(workflow, new_name): @@ -70,35 +71,38 @@ def set_last_rule_name(workflow, new_name): workflow.check_localrules() -def get_svm_file(wildcards, tier, name): - par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl") - pars_files_overwrite = pars_catalog.get_calib_files( - par_overwrite_file, wildcards.timestamp +def get_input_par_file(wildcards, tier, name): + par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" + pars_files_overwrite = Catalog.get_files( + par_overwrite_file, + wildcards.timestamp, ) for pars_file in pars_files_overwrite: - if name in pars_file: - return os.path.join(par_overwrite_path(setup), tier, pars_file) + if name in str(pars_file): + return Path(par_overwrite_path(setup)) / tier / pars_file raise ValueError(f"Could not find model in {pars_files_overwrite}") def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): - par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl") + par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" if timestamp is not None: - pars_files_overwrite = pars_catalog.get_calib_files( - par_overwrite_file, timestamp + pars_files_overwrite = Catalog.get_files( + par_overwrite_file, + timestamp, ) else: - pars_files_overwrite = pars_catalog.get_calib_files( - par_overwrite_file, wildcards.timestamp + pars_files_overwrite = Catalog.get_files( + par_overwrite_file, + wildcards.timestamp, ) if name is None: - fullname = f"{tier}-overwrite.json" + fullname = f"{tier}-overwrite.yaml" else: - fullname = f"{tier}_{name}-overwrite.json" + fullname = f"{tier}_{name}-overwrite.yaml" out_files = [] for pars_file in pars_files_overwrite: - if fullname in pars_file: - out_files.append(os.path.join(par_overwrite_path(setup), tier, pars_file)) + if fullname in str(pars_file): + out_files.append(Path(par_overwrite_path(setup)) / tier / pars_file) if len(out_files) == 0: raise ValueError(f"Could not find name in {pars_files_overwrite}") else: diff --git a/rules/dsp.smk b/rules/dsp.smk index f8ea4a3..3fa105c 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -6,9 +6,10 @@ Snakemake rules for processing dsp tier. This is done in 4 steps: - running dsp over all channels using par file """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import par_dsp_path +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path +from scripts.util.create_pars_keylist import ParsKeyResolve from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -18,16 +19,20 @@ from scripts.util.patterns import ( get_pattern_pars_tmp, get_pattern_log, get_pattern_pars, - get_pattern_pars_overwrite, - get_pattern_pars_svm, ) -dsp_par_catalog = pars_key_resolve.get_par_catalog( +dsp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_dsp"], "lar": ["par_dsp"]}, ) +dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml" +if dsp_par_cat_file.is_file(): + dsp_par_cat_file.unlink() +Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file) + rule build_pars_dsp_tau: input: @@ -218,14 +223,16 @@ rule build_pars_dsp_eopt: rule build_svm_dsp: input: - hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file( + hyperpars=lambda wildcards: get_input_par_file( + wildcards, "dsp", "svm_hyperpars" + ), + train_data=lambda wildcards: get_input_par_file( wildcards, "dsp", "svm_hyperpars" ).replace("hyperpars.json", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), log: - get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal"), + str(get_pattern_log(setup, "pars_dsp_svm")).replace("{datatype}", "cal"), group: "par-dsp-svm" resources: @@ -288,7 +295,7 @@ rule build_pars_dsp_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", @@ -344,7 +351,7 @@ rule build_pars_dsp: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, name="dplms", extension="lh5", @@ -385,7 +392,7 @@ rule build_dsp: input: raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), pars_file=ancient( - lambda wildcards: pars_catalog.get_par_file( + lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "dsp" ) ), diff --git a/rules/evt.smk b/rules/evt.smk index c760b54..91f04dd 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -2,13 +2,8 @@ Snakemake rules for processing evt tier. """ -from scripts.util.pars_loading import pars_catalog +from scripts.util.pars_loading import ParsCatalog from scripts.util.patterns import ( - get_pattern_tier_hit, - get_pattern_tier_dsp, - get_pattern_tier_tcm, - get_pattern_tier_pht, - get_pattern_tier_psp, get_pattern_tier, get_pattern_log, get_pattern_pars, @@ -31,10 +26,10 @@ for tier in ("evt", "pet"): else get_pattern_tier(setup, "pht", check_in_cycle=False) ), tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), - xtalk_matrix=lambda wildcards: get_svm_file( + xtalk_matrix=lambda wildcards: get_input_par_file( tier=tier, wildcards=wildcards, name="xtc" ), - par_files=lambda wildcards: pars_catalog.get_par_file( + par_files=lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "pht" ), output: diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index 557d492..cb27661 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -1,6 +1,6 @@ import glob -import json -import os +import json, yaml +from pathlib import Path from scripts.util.FileKey import FileKey, run_grouper from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind @@ -9,9 +9,20 @@ from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): ignore_keys = [] if ignore_keys_file is not None: - if os.path.isfile(ignore_keys_file): - with open(ignore_keys_file) as f: - ignore_keys = f.read().splitlines() + if Path(ignore_keys_file).is_file(): + if Path(ignore_keys_file).suffix == ".json": + with Path(ignore_keys_file).open() as f: + ignore_keys = json.load(f) + elif Path(ignore_keys_file).suffix == ".keylist": + with Path(ignore_keys_file).open() as f: + ignore_keys = f.read().splitlines() + elif Path(ignore_keys_file).suffix in (".yaml", ".yml"): + with Path(ignore_keys_file).open() as f: + ignore_keys = yaml.safe_load(f) + else: + raise Warning( + "ignore_keys_file file not in json, yaml or keylist format" + ) ignore_keys = [ key.split("#")[0].strip() if "#" in key else key.strip() for key in ignore_keys @@ -23,9 +34,16 @@ def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): ignore_keys = [] if analysis_runs_file is not None: - if os.path.isfile(analysis_runs_file): - with open(analysis_runs_file) as f: - analysis_runs = json.load(f) + if Path(analysis_runs_file).is_file(): + if Path(ignore_keys_file).suffix == ".json": + with Path(analysis_runs_file).open() as f: + analysis_runs = json.load(f) + elif Path(ignore_keys_file).suffix in (".yaml", ".yml"): + with Path(analysis_runs_file).open() as f: + analysis_runs = yaml.safe_load(f) + else: + raise Warning("analysis_runs file not in json or yaml format") + analysis_runs = [] else: analysis_runs = [] print("no analysis_runs file found") diff --git a/rules/hit.smk b/rules/hit.smk index f1bb0ba..af1fcaf 100644 --- a/rules/hit.smk +++ b/rules/hit.smk @@ -6,7 +6,9 @@ Snakemake rules for processing hit tier. This is done in 4 steps: - running build hit over all channels using par file """ -from scripts.util.pars_loading import pars_catalog +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -19,12 +21,18 @@ from scripts.util.patterns import ( get_pattern_pars, ) -hit_par_catalog = ds.pars_key_resolve.get_par_catalog( +hit_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_hit"], "lar": ["par_hit"]}, ) +hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml" +if hit_par_cat_file.is_file(): + hit_par_cat_file.unlink() +Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file) + # This rule builds the qc using the calibration dsp files and fft files rule build_qc: @@ -72,7 +80,7 @@ rule build_energy_calibration: ), pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), ctc_dict=ancient( - lambda wildcards: pars_catalog.get_par_file( + lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "dsp" ) ), @@ -216,7 +224,7 @@ rule build_pars_hit_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "hit", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", @@ -247,7 +255,7 @@ rule build_plts_hit: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "hit", basedir, - configs, + det_status, chan_maps, ), output: @@ -270,7 +278,7 @@ rule build_pars_hit: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "hit", basedir, - configs, + det_status, chan_maps, ), plts=get_pattern_plts(setup, "hit"), @@ -297,7 +305,7 @@ rule build_pars_hit: rule build_hit: input: dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), - pars_file=lambda wildcards: pars_catalog.get_par_file( + pars_file=lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "hit" ), output: diff --git a/rules/pht.smk b/rules/pht.smk index 76542a3..dad1a24 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -6,9 +6,10 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 - running build hit over all channels using par file """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path +from scripts.util.utils import filelist_path, set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -20,12 +21,18 @@ from scripts.util.patterns import ( get_pattern_pars, ) -pht_par_catalog = ds.pars_key_resolve.get_par_catalog( +pht_par_catalog = ds.ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_pht"], "lar": ["par_pht"]}, ) +pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml" +if pht_par_cat_file.is_file(): + pht_par_cat_file.unlink() +Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(pht_par_catalog, pht_par_cat_file) + intier = "psp" @@ -50,7 +57,7 @@ for key, dataset in part.datasets.items(): cal_files=part.get_filelists(partition, key, intier), fft_files=part.get_filelists(partition, key, intier, datatype="fft"), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -207,7 +214,7 @@ rule build_per_energy_calibration: pht_dict=get_pattern_pars_tmp_channel(setup, "pht", "qc"), inplots=get_pattern_plts_tmp_channel(setup, "pht", "qc"), ctc_dict=ancient( - lambda wildcards: pars_catalog.get_par_file( + lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, intier ) ), @@ -258,7 +265,7 @@ for key, dataset in part.datasets.items(): input: files=part.get_filelists(partition, key, intier), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -440,7 +447,7 @@ for key, dataset in part.datasets.items(): input: files=part.get_filelists(partition, key, intier), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -620,7 +627,7 @@ for key, dataset in part.datasets.items(): input: files=part.get_filelists(partition, key, intier), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -793,7 +800,7 @@ rule build_pars_pht_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", @@ -822,7 +829,7 @@ rule build_plts_pht: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, ), output: @@ -843,7 +850,7 @@ rule build_pars_pht: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, ), plts=get_pattern_plts(setup, "pht"), @@ -868,7 +875,7 @@ rule build_pars_pht: rule build_pht: input: dsp_file=get_pattern_tier(setup, intier, check_in_cycle=False), - pars_file=lambda wildcards: pars_catalog.get_par_file( + pars_file=lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "pht" ), output: diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk index 5672011..f83e534 100644 --- a/rules/pht_fast.smk +++ b/rules/pht_fast.smk @@ -1,6 +1,6 @@ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from scripts.util.utils import filelist_path, set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, diff --git a/rules/psp.smk b/rules/psp.smk index a959cf4..53e8f59 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -6,9 +6,10 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 - running build hit over all channels using par file """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import par_psp_path, par_dsp_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path +from scripts.util.utils import set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -20,12 +21,18 @@ from scripts.util.patterns import ( get_pattern_pars, ) -psp_par_catalog = pars_key_resolve.get_par_catalog( +psp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_psp"], "lar": ["par_psp"]}, ) +psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml" +if psp_par_cat_file.is_file(): + psp_par_cat_file.unlink() +Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file) + psp_rules = {} for key, dataset in part.datasets.items(): for partition in dataset.keys(): @@ -172,14 +179,18 @@ workflow._ruleorder.add(*rule_order_list) # [::-1] rule build_svm_psp: input: - hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file( + hyperpars=lambda wildcards: get_input_par_file( + wildcards, "psp", "svm_hyperpars" + ), + train_data=lambda wildcards: get_input_par_file( wildcards, "psp", "svm_hyperpars" - ).replace("hyperpars.json", "train.lh5"), + ) + .as_posix() + .replace("hyperpars.json", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), log: - get_pattern_log(setup, "pars_psp_svm").replace("{datatype}", "cal"), + get_pattern_log(setup, "pars_psp_svm").as_posix().replace("{datatype}", "cal"), group: "par-dsp-svm" resources: @@ -221,7 +232,7 @@ rule build_pars_psp_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "psp", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", @@ -250,7 +261,7 @@ rule build_plts_psp: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "psp", basedir, - configs, + det_status, chan_maps, ), output: @@ -271,7 +282,7 @@ rule build_pars_psp_db: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "psp", basedir, - configs, + det_status, chan_maps, ), output: @@ -298,7 +309,7 @@ rule build_pars_psp: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, name="dplms", extension="lh5", @@ -337,9 +348,9 @@ rule build_pars_psp: rule build_psp: input: - raw_file=get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), pars_file=ancient( - lambda wildcards: pars_catalog.get_par_file( + lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "psp" ) ), diff --git a/rules/qc_phy.smk b/rules/qc_phy.smk index 5b9cd6f..b89d8d3 100644 --- a/rules/qc_phy.smk +++ b/rules/qc_phy.smk @@ -1,11 +1,10 @@ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from scripts.util.utils import filelist_path, set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_pht, get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, @@ -138,7 +137,7 @@ rule build_plts_pht_phy: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, name="qcphy", ), @@ -160,7 +159,7 @@ rule build_pars_pht_phy: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, name="qcphy", ), diff --git a/rules/raw.smk b/rules/raw.smk index a81520a..8239519 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -1,6 +1,5 @@ from scripts.util.patterns import ( get_pattern_tier_daq, - get_pattern_tier_raw, get_pattern_tier, get_pattern_log, get_pattern_tier_raw_blind, diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py index 6ed4510..f01c879 100644 --- a/scripts/create_chankeylist.py +++ b/scripts/create_chankeylist.py @@ -4,7 +4,7 @@ from legendmeta import LegendMetadata, TextDB argparser = argparse.ArgumentParser() -argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--det_status", help="det_status", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channelmap", help="Channel Map", type=str, required=True) @@ -12,8 +12,8 @@ argparser.add_argument("--output_file", help="output_file", type=str, required=True) args = argparser.parse_args() -configs = TextDB(args.configs, lazy=True) -status_map = configs.on(args.timestamp, system=args.datatype)["analysis"] +det_status = TextDB(args.det_status, lazy=True) +status_map = det_status.statuses.on(args.timestamp, system=args.datatype) channel_map = LegendMetadata(args.channelmap, lazy=True) chmap = channel_map.channelmaps.on(args.timestamp) @@ -23,7 +23,6 @@ for chan in status_map if status_map[chan]["processable"] is True and chmap[chan].system == "geds" ] - Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) with Path(args.output_file).open("w") as f: for chan in channels: diff --git a/scripts/util/FileKey.py b/scripts/util/FileKey.py index 9f646cc..ca4573c 100644 --- a/scripts/util/FileKey.py +++ b/scripts/util/FileKey.py @@ -57,6 +57,8 @@ def get_filekey_from_pattern(cls, filename, pattern=None): except AttributeError: key_pattern_rx = re.compile(smk.io.regex(cls.key_pattern)) else: + if isinstance(pattern, Path): + pattern = pattern.as_posix() try: key_pattern_rx = re.compile(smk.io.regex_from_filepattern(pattern)) except AttributeError: @@ -92,6 +94,8 @@ def parse_keypart(cls, keypart): return cls(**d) def get_path_from_filekey(self, pattern, **kwargs): + if isinstance(pattern, Path): + pattern = pattern.as_posix() if kwargs is None: return smk.io.expand(pattern, **self._asdict()) else: @@ -163,6 +167,8 @@ def name(self): return f"{super().name}-{self.processing_step}" def get_path_from_filekey(self, pattern, **kwargs): + if isinstance(pattern, Path): + pattern = pattern.as_posix() if not isinstance(pattern, str): pattern = pattern(self.tier, self.identifier) if kwargs is None: @@ -198,6 +204,8 @@ def _asdict(self): @staticmethod def get_channel_files(keypart, par_pattern, chan_list): + if isinstance(par_pattern, Path): + par_pattern = par_pattern.as_posix() d = ChannelProcKey.parse_keypart(keypart) filenames = [] for chan in chan_list: diff --git a/scripts/util/__init__.py b/scripts/util/__init__.py index 90b7204..caa4dd2 100644 --- a/scripts/util/__init__.py +++ b/scripts/util/__init__.py @@ -1,8 +1,8 @@ -from .CalibCatalog import CalibCatalog, Props, PropsStream -from .create_pars_keylist import pars_key_resolve -from .dataset_cal import dataset_file +from .cal_grouping import CalGrouping +from .catalog import Catalog, Props, PropsStream +from .create_pars_keylist import ParsKeyResolve from .FileKey import ChannelProcKey, FileKey, ProcessingFileKey -from .pars_loading import pars_catalog +from .pars_loading import ParsCatalog from .utils import ( runcmd, subst_vars, @@ -14,13 +14,13 @@ __all__ = [ "Props", "PropsStream", - "CalibCatalog", - "pars_key_resolve", - "dataset_file", + "Catalog", + "ParsKeyResolve", + "CalGrouping", "FileKey", "ProcessingFileKey", "ChannelProcKey", - "pars_catalog", + "ParsCatalog", "unix_time", "runcmd", "subst_vars_impl", diff --git a/scripts/util/cal_grouping.py b/scripts/util/cal_grouping.py index 651c137..e41d5c7 100644 --- a/scripts/util/cal_grouping.py +++ b/scripts/util/cal_grouping.py @@ -5,19 +5,26 @@ import json from pathlib import Path +import yaml + from .FileKey import ChannelProcKey, ProcessingFileKey from .patterns import ( get_pattern_log_channel, + get_pattern_pars, get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, ) from .utils import filelist_path -class cal_grouping: +class CalGrouping: def __init__(self, setup, input_file): - with Path(input_file).open() as r: - self.datasets = json.load(r) + if Path(input_file).suffix == ".json": + with Path(input_file).open() as r: + self.datasets = json.load(r) + elif Path(input_file).suffix in (".yaml", ".yml"): + with Path(input_file).open() as r: + self.datasets = yaml.safe_load(r) self.expand_runs() self.setup = setup @@ -28,7 +35,7 @@ def expand_runs(self): if isinstance(runs, str) and ".." in runs: start, end = runs.split("..") self.datasets[channel][part][per] = [ - f"r{x:02}" for x in range(int(start[2:]), int(end) + 1) + f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1) ] def get_dataset(self, dataset, channel): @@ -49,7 +56,8 @@ def get_filelists(self, dataset, channel, tier, experiment="l200", datatype="cal else: files += [ Path(filelist_path(self.setup)) - / "all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist" + / f"all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist" + for run in dataset[per] ] return files @@ -62,14 +70,19 @@ def get_par_files( experiment="l200", datatype="cal", name=None, - extension="json", + extension="yaml", ): dataset = self.get_dataset(dataset, channel) all_par_files = [] for item in catalog: par_files = item.apply for par_file in par_files: - if par_file.split("-")[-1] == f"par_{tier}.json": + if ( + par_file.split("-")[-1] + == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split( + "-" + )[-1] + ): all_par_files.append(par_file) if channel == "default": channel = "{channel}" @@ -117,7 +130,12 @@ def get_plt_files( for item in catalog: par_files = item.apply for par_file in par_files: - if par_file.split("-")[-1] == f"par_{tier}.json": + if ( + par_file.split("-")[-1] + == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split( + "-" + )[-1] + ): all_par_files.append(par_file) if channel == "default": channel = "{channel}" @@ -201,6 +219,6 @@ def get_wildcard_constraints(self, dataset, channel): out_string = "" for channel in exclude_chans: out_string += f"(?!{channel})" - return out_string + r"ch\d{7}" + return out_string + r"^[VPCB]\d{1}\w{5}$" else: - return r"ch\d{7}" + return r"^[VPCB]\d{1}\w{5}$" diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py index 390a7c1..9ec9b80 100644 --- a/scripts/util/catalog.py +++ b/scripts/util/catalog.py @@ -79,7 +79,7 @@ class PropsStream: @staticmethod def get(value): - if isinstance(value, str): + if isinstance(value, (str, Path)): return PropsStream.read_from(value) if isinstance(value, (collections.abc.Sequence, types.GeneratorType)): diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py index f347975..c3e1f22 100644 --- a/scripts/util/create_pars_keylist.py +++ b/scripts/util/create_pars_keylist.py @@ -14,7 +14,7 @@ from .patterns import par_validity_pattern -class pars_key_resolve: +class ParsKeyResolve: def __init__(self, valid_from, category, apply): self.valid_from = valid_from @@ -70,7 +70,7 @@ def generate_par_keylist(keys): keys = sorted(keys, key=FileKey.get_unix_timestamp) keylist.append(keys[0]) for key in keys[1:]: - matched_key = pars_key_resolve.match_keys(keylist[-1], key) + matched_key = ParsKeyResolve.match_keys(keylist[-1], key) if matched_key not in keylist: keylist.append(matched_key) else: @@ -89,10 +89,10 @@ def match_entries(entry1, entry2): @staticmethod def match_all_entries(entrylist, name_dict): out_list = [] - out_list.append(pars_key_resolve.from_filekey(entrylist[0], name_dict)) + out_list.append(ParsKeyResolve.from_filekey(entrylist[0], name_dict)) for entry in entrylist[1:]: - new_entry = pars_key_resolve.from_filekey(entry, name_dict) - pars_key_resolve.match_entries(out_list[-1], new_entry) + new_entry = ParsKeyResolve.from_filekey(entry, name_dict) + ParsKeyResolve.match_entries(out_list[-1], new_entry) out_list.append(new_entry) return out_list @@ -100,14 +100,17 @@ def match_all_entries(entrylist, name_dict): def get_keys(keypart, search_pattern): d = FileKey.parse_keypart(keypart) try: - tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(search_pattern)) + tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern))) + except AttributeError: - tier_pattern_rx = re.compile(smk.io.regex(search_pattern)) + tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern))) fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0] - files = Path(fn_glob_pattern).glob() + p = Path(fn_glob_pattern) + parts = p.parts[p.is_absolute() :] + files = Path(p.root).glob(str(Path(*parts))) keys = [] for f in files: - m = tier_pattern_rx.match(f) + m = tier_pattern_rx.match(str(f)) if m is not None: d = m.groupdict() key = FileKey(**d) @@ -118,19 +121,19 @@ def get_keys(keypart, search_pattern): def get_par_catalog(keypart, search_patterns, name_dict): if isinstance(keypart, str): keypart = [keypart] - if isinstance(search_patterns, str): + if isinstance(search_patterns, (str, Path)): search_patterns = [search_patterns] keylist = [] for search_pattern in search_patterns: for keypar in keypart: - keylist += pars_key_resolve.get_keys(keypar, search_pattern) + keylist += ParsKeyResolve.get_keys(keypar, search_pattern) if len(keylist) != 0: keys = sorted(keylist, key=FileKey.get_unix_timestamp) - keylist = pars_key_resolve.generate_par_keylist(keys) + keylist = ParsKeyResolve.generate_par_keylist(keys) - entrylist = pars_key_resolve.match_all_entries(keylist, name_dict) + entrylist = ParsKeyResolve.match_all_entries(keylist, name_dict) else: msg = "No Keys found" warnings.warn(msg, stacklevel=0) - entrylist = [pars_key_resolve("00000000T000000Z", "all", [])] + entrylist = [ParsKeyResolve("00000000T000000Z", "all", [])] return entrylist diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py index a21f6ae..137ae03 100644 --- a/scripts/util/pars_loading.py +++ b/scripts/util/pars_loading.py @@ -12,7 +12,7 @@ from .utils import get_pars_path, par_overwrite_path -class pars_catalog(Catalog): +class ParsCatalog(Catalog): @staticmethod def match_pars_files(filelist1, filelist2): for file2 in filelist2: @@ -30,11 +30,11 @@ def match_pars_files(filelist1, filelist2): @staticmethod def get_par_file(setup, timestamp, tier): par_file = Path(get_pars_path(setup, tier)) / "validity.yaml" - pars_files = pars_catalog.get_calib_files(par_file, timestamp) + pars_files = ParsCatalog.get_files(par_file, timestamp) par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" - pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp) + pars_files_overwrite = ParsCatalog.get_files(par_overwrite_file, timestamp) if len(pars_files_overwrite) > 0: - pars_files, pars_files_overwrite = pars_catalog.match_pars_files( + pars_files, pars_files_overwrite = ParsCatalog.match_pars_files( pars_files, pars_files_overwrite ) pars_files = [Path(get_pars_path(setup, tier)) / file for file in pars_files] diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index cae1cd0..2418ead 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -14,7 +14,6 @@ tier_daq_path, tier_path, tier_raw_blind_path, - tier_skm_path, tmp_log_path, tmp_par_path, tmp_plts_path, @@ -91,28 +90,26 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): / "{datatype}" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" - + f"{tier}.lh5" + / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5") ) elif tier in ["evt_concat", "pet_concat"]: file_pattern = ( Path(get_tier_path(setup, tier[:3])) / "{datatype}" - / "{experiment}-{period}-{run}-{datatype}-tier_" - + f"{tier[:3]}.lh5" + / ("{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5") ) elif tier == "skm": file_pattern = ( - Path(f"{tier_skm_path(setup)}") + Path(f"{get_tier_path(setup, tier)}") / "phy" / "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5" ) else: msg = "invalid tier" raise Exception(msg) - if tier_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True: - return "/tmp/" + Path(file_pattern).name + if tier_path(setup) not in str(file_pattern.resolve(strict=False)) and check_in_cycle is True: + return "/tmp/" + file_pattern.name else: return file_pattern @@ -125,8 +122,10 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-par_" - + f"{tier}_{name}.{extension}" + / ( + "{experiment}-{period}-{run}-cal-{timestamp}-par_" + + f"{tier}_{name}.{extension}" + ) ) else: file_pattern = ( @@ -134,19 +133,21 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-par_" - + f"{tier}.{extension}" + / ("{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}") ) else: msg = "invalid tier" raise Exception(msg) - if pars_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True: + if ( + pars_path(setup) not in str(Path(file_pattern).resolve(strict=False)) + and check_in_cycle is True + ): if name is None: return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{extension}" else: return ( "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" - + f"par_{tier}_{name}.{extension}" + f"par_{tier}_{name}.{extension}" ) else: return file_pattern @@ -160,8 +161,7 @@ def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"): / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-" - + f"par_{tier}_{name}.{ext}" + / ("{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}") ) else: return ( @@ -170,8 +170,7 @@ def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"): / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-" - + f"par_{tier}.{ext}" + / ("{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{ext}") ) @@ -183,8 +182,10 @@ def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"): / "{datatype}" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" - f"{tier}_{name}-overwrite.{extension}" + / ( + "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + f"{tier}_{name}-overwrite.{extension}" + ) ) else: return ( @@ -193,9 +194,11 @@ def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"): / "{datatype}" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" - + tier - + f"-overwrite.{extension}" + / ( + "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + + tier + + f"-overwrite.{extension}" + ) ) @@ -203,15 +206,12 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml" if datatype is None: datatype = "{datatype}" if name is None: - return ( - Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-" - + datatype - + "-{timestamp}-par_" - + f"{tier}.{extension}" + return Path(f"{tmp_par_path(setup)}") / ( + "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + f"{tier}.{extension}" ) else: - return ( - Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-" + return Path(f"{tmp_par_path(setup)}") / ( + "{experiment}-{period}-{run}-" + datatype + "-{timestamp}" + f"par_{tier}_{name}.{extension}" @@ -220,32 +220,24 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml" def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"): if name is None: - return ( - Path(f"{tmp_par_path(setup)}") - / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" - + f"{tier}.{extension}" + return Path(f"{tmp_par_path(setup)}") / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}.{extension}" ) else: - return ( - Path(f"{tmp_par_path(setup)}") - / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + return Path(f"{tmp_par_path(setup)}") / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}_{name}.{extension}" ) def get_pattern_plts_tmp_channel(setup, tier, name=None): if name is None: - return ( - Path(f"{tmp_plts_path(setup)}") - / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" - + tier - + ".pkl" + return Path(f"{tmp_plts_path(setup)}") / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + tier + ".pkl" ) else: - return ( - Path(f"{tmp_plts_path(setup)}") - / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" - + f"{tier}_{name}.pkl" + return Path(f"{tmp_plts_path(setup)}") / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl" ) @@ -257,9 +249,7 @@ def get_pattern_plts(setup, tier, name=None): / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-plt_" - + tier - + ".dir" + / ("{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + ".dir") ) else: return ( @@ -268,11 +258,7 @@ def get_pattern_plts(setup, tier, name=None): / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-plt_" - + tier - + "_" - + name - + ".dir" + / ("{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + "_" + name + ".dir") ) @@ -280,9 +266,7 @@ def get_pattern_log(setup, processing_step): return ( Path(f"{tmp_log_path(setup)}") / processing_step - / "{experiment}-{period}-{run}-{datatype}-{timestamp}-" - + processing_step - + ".log" + / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log") ) @@ -290,9 +274,7 @@ def get_pattern_log_channel(setup, processing_step): return ( Path(f"{tmp_log_path(setup)}") / processing_step - / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" - + processing_step - + ".log" + / ("{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log") ) @@ -300,7 +282,5 @@ def get_pattern_log_concat(setup, processing_step): return ( Path(f"{tmp_log_path(setup)}") / processing_step - / "{experiment}-{period}-{run}-{datatype}-" - + processing_step - + ".log" + / ("{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log") ) diff --git a/scripts/util/utils.py b/scripts/util/utils.py index fd433c7..319eaa6 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -114,6 +114,10 @@ def chan_map_path(setup): return setup["paths"]["chan_map"] +def det_status_path(setup): + return setup["paths"]["detector_status"] + + def metadata_path(setup): return setup["paths"]["metadata"] From bbf65e90c9b4ead350b3761de17a473e9b2034fc Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Nov 2024 15:14:35 +0100 Subject: [PATCH 11/17] move info from readme to docs --- README.md | 112 ------------------------------------ docs/Makefile | 21 +++++++ docs/source/developer.rst | 15 +++++ docs/source/index.rst | 41 +++++++++++++ docs/source/user_manual.rst | 98 +++++++++++++++++++++++++++++++ 5 files changed, 175 insertions(+), 112 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/source/developer.rst create mode 100644 docs/source/index.rst create mode 100644 docs/source/user_manual.rst diff --git a/README.md b/README.md index 2459337..3565167 100644 --- a/README.md +++ b/README.md @@ -3,115 +3,3 @@ Implementation of an automatic data processing flow for L200 data, based on [Snakemake](https://snakemake.readthedocs.io/). - - -## Configuration - -Data processing resources are configured via a single site-dependent (and -possibly user-dependent) configuration file, named `config.json` in the -following. You may choose an arbitrary name, though. - -Use the included [templates/config.json](templates/config.json) as a template -and adjust the data base paths as necessary. Note that, when running Snakemake, -the default path to the config file is `./config.json`. - - -## Key-Lists - -Data generation is based on key-lists, which are flat text files -(extension ".keylist") containing one entry of the form -`{experiment}-{period}-{run}-{datatype}-{timestamp}` per line. - -Key-lists can be auto-generated based on the available DAQ files -using Snakemake targets of the form - -* `all-{experiment}.keylist` -* `all-{experiment}-{period}.keylist` -* `all-{experiment}-{period}-{run}.keylist` -* `all-{experiment}-{period}-{run}-{datatype}.keylist` - -which will generate the list of available file keys for all l200 files, resp. -a specific period, or a specific period and run, etc. - -For example: -```shell -$ snakemake all-l200-myper.keylist -``` -will generate a key-list with all files regarding period `myper`. - - -## File-Lists - -File-lists are flat files listing output files that should be generated, -with one file per line. A file-list will typically be generated for a given -data tier from a key-list, using the Snakemake targets of the form -`{label}-{tier}.filelist` (generated from `{label}.keylist`). - -For file lists based on auto-generated key-lists like -`all-{experiment}-{period}-{tier}.filelist`, the corresponding key-list -(`all-{experiment}-{period}.keylist` in this case) will be created -automatically, if it doesn't exist. - -Example: -```shell -$ snakemake all-mydet-mymeas-tier2.filelist -``` - -File-lists may of course also be derived from custom keylists, generated -manually or by other means, e.g. `my-dataset-raw.filelist` will be -generated from `my-dataset.keylist`. - - -## Main output generation - -Usually, the main output will be determined by a file-list, resp. a key-list -and data tier. The special output target `{label}-{tier}.gen` is used to -generate all files listed in `{label}-{tier}.filelist`. After the files -are created, the empty file `{label}-{tier}.filelist` will be created to -mark the successful data production. - -Snakemake targets like `all-{experiment}-{period}-{tier}.gen` may be used -to automatically generate key-lists and file-lists (if not already present) -and produce all possible output for the given data tier, based on available -tier0 files which match the target. - -Example: -```shell -$ snakemake all-mydet-mymeas-tier2.gen -``` -Targets like `my-dataset-raw.gen` (derived from a key-list -`my-dataset.keylist`) are of course allowed as well. - - -## Monitoring - -Snakemake supports monitoring by connecting to a -[panoptes](https://github.com/panoptes-organization/panoptes) server. - -Run (e.g.) -```shell -$ panoptes --port 5000 -``` -in the background to run a panoptes server instance, which comes with a -GUI that can be accessed with a web-brower on the specified port. - -Then use the Snakemake option `--wms-monitor` to instruct Snakemake to push -progress information to the panoptes server: -```shell -snakemake --wms-monitor http://127.0.0.1:5000 [...] -``` - -## Using software containers - -This dataflow doesn't use Snakemake's internal Singularity support, but -instead supports Singularity containers via -[`venv`](https://github.com/oschulz/singularity-venv) environments -for greater control. - -To use this, the path to `venv` and the name of the environment must be set -in `config.json`. - -This is only relevant then running Snakemake *outside* of the software -container, e.g. then using a batch system (see below). If Snakemake -and the whole workflow is run inside of a container instance, no -container-related settings in `config.json` are required. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..9be493d --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,21 @@ +SHELL := /bin/bash +SOURCEDIR = source +BUILDDIR = build + +all: apidoc + sphinx-build -M html "$(SOURCEDIR)" "$(BUILDDIR)" -W --keep-going + +apidoc: clean-apidoc + sphinx-apidoc \ + --private \ + --module-first \ + --force \ + --output-dir "$(SOURCEDIR)/api" \ + ../scripts \ + ../rules + +clean-apidoc: + rm -rf "$(SOURCEDIR)/api" + +clean: clean-apidoc + rm -rf "$(BUILDDIR)" diff --git a/docs/source/developer.rst b/docs/source/developer.rst new file mode 100644 index 0000000..b6d7560 --- /dev/null +++ b/docs/source/developer.rst @@ -0,0 +1,15 @@ +Developers Guide +=============== + +Snakemake is configured around a series of rules which specify how to generate a file/files from a set of input files. +These rules are defined in the ``Snakefile`` and in the files in the ``rules`` directory. +In general the structure is that a series of rules are defined to run on some calibration data generation +a final ``par_{tier}.yaml`` file at the end which can be used by the ``tier``` rule to generate all the files in the tier. +For most rules there are 2 versions the basic version and the partition version where the first uses a single run +while the latter will group many runs together. +This grouping is defined in the ``cal_grouping.yaml`` file in the `legend-datasets `_ repository. + +Each rule has specified its inputs and outputs along with how to generate which can be +a shell command or a call to a python function. These scripts are stored in the ``scripts``` directory. +Additional parameters can also be defined. +Full details can be found at `snakemake https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html)`_. diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..8534e71 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,41 @@ +Welcome to legend-dataflow's documentation! +================================== + +*legend-dataflow* is a Python package based on Snakemake ``_ +for running the data production of LEGEND. +It is designed to calibrate and optimise hundreds of channels in parallel before +bringing them all together to process the data. It takes as an input the metadata +at `legend metadata `_. + +Getting started +--------------- + +It is recommended to install and use the package through the `legend-prodenv `_. + +Next steps +---------- + +.. toctree:: + :maxdepth: 1 + + Package API reference + +.. toctree:: + :maxdepth: 1 + + tutorials + +.. toctree:: + :maxdepth: 1 + :caption: Related projects + + LEGEND Data Objects + Decoding Digitizer Data + Digital Signal Processing + Pygama + +.. toctree:: + :maxdepth: 1 + :caption: Development + + Source Code diff --git a/docs/source/user_manual.rst b/docs/source/user_manual.rst new file mode 100644 index 0000000..fb3e81b --- /dev/null +++ b/docs/source/user_manual.rst @@ -0,0 +1,98 @@ +Configuration +============= + +Data processing resources are configured via a single site-dependent (and +possibly user-dependent) configuration file, generally named ``config.json``. +Although you can choose any arbitrary name. + +A template for this file is located at ``templates/config.json`` +which can be copied to the working directory +the paths adjusted as necessary. Note that, when running Snakemake, +the default path to the config file is ``./config.json``. + +Profiles +======== + +A number of profiles are also included in the ``profiles`` directory. If none are specified, +the default profile is used. The profile can be specified by using the ``--profile`` option +when running Snakemake. These control how many jobs are run simultaneously, based on how many cores +are specified and the memory constraints of the system. A full list of all the options +that can be specified to snakemake can be found at `snakemake `_. + + +Running the Dataflow +==================== + +To run the dataflow at the most basic level all that is necassary is to tell snakemake the target file +generation. In a simple case this may just be a single file e.g. +```shell +$ snakemake /data2/public/prodenv/prod-blind/ref-v1.0.0/generated/tier/dsp/p03/r000/l200-p03-r000-cal-20230401T000000Z-tier_dsp.lh5 +``` +This would generate the file and all the files that are required to generate it. +In general though we want to generate a large number of files, and we can do this using the ``gen`` target. + +Main output generation +====================== + +Usually, the main output will be determined by a file-list. +The special output target ``{label}-{tier}.gen`` is used to +generate all files that follow the label up to the specified tier. +The label is composed of the following parts: +- the filelist designator: in most cases this will be ``all``, but other options are specified in the ``runlists.yaml`` file +in the `legend-datasets `_ repository. +- experiment: the experiment name i.e. l200 +- period: the period of the data e.g. p03 +- run: the run number e.g. r000 +- datatype: the data type e.g. cal +- timestamp: the timestamp of the data e.g. 20230401T000000Z + +Example: +```shell +$ snakemake all-l200-p03-r001-cal-20230401T000000Z-dsp.gen +``` + +You can specify as many or as few of these as they like e.g. ``all-l200-p03-dsp.gen`` +If you want to specify a lower part of the label but leave a higher part free, +you can use the ``*``` character e.g. ``all-l200-p03-*-cal-dsp.gen`` . +Additionally if you want to specify multiple options for a part of the label you can use the ``_`` character between +e.g. ``all-l200-p03-r000_r001-dsp.gen``. + +After the files +are created, the empty file ``{label}-{tier}.gen```` will be created to +mark the successful data production. + + +Monitoring +========== + +Snakemake supports monitoring by connecting to a +`panoptes `_ server. + +Run (e.g.) +```shell +$ panoptes --port 5000 +``` +in the background to run a panoptes server instance, which comes with a +GUI that can be accessed with a web-brower on the specified port. + +Then use the Snakemake option ``--wms-monitor`` to instruct Snakemake to push +progress information to the panoptes server: +```shell +snakemake --wms-monitor http://127.0.0.1:5000 [...] +``` + +Using software containers +========================= + +This dataflow doesn't use Snakemake's internal Singularity support, but +instead supports Singularity containers via +`venv `_ environments +for greater control. + +To use this, the path to ``venv`` and the name of the environment must be set +in ``config.json``. + +This is only relevant then running Snakemake *outside* of the software +container, e.g. when using a batch system (see below). If Snakemake +and the whole workflow is run inside of a container instance, no +container-related settings in ``config.json`` are required. From 9639200d37d4039bd74460d19665acedccdfc2c4 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 3 Dec 2024 22:46:01 +0100 Subject: [PATCH 12/17] add ability to specify different file selections and cleanup --- rules/filelist_gen.smk | 127 ++++++++++++++++++++++++++++------------- 1 file changed, 86 insertions(+), 41 deletions(-) diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index cb27661..d0356a8 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -5,9 +5,34 @@ from pathlib import Path from scripts.util.FileKey import FileKey, run_grouper from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind - -def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): +concat_datatypes = ["phy"] +concat_tiers = ["skm", "pet_concat", "evt_concat"] +blind_datatypes = ["phy"] + + +def expand_runs(in_dict): + """ + This function expands out the runs if a range is specified in the dictionary + e.g. + { + "p01": "r001..r005" + } + """ + for per, run_list in in_dict.items(): + if isinstance(run_list, str) and ".." in runs: + start, end = runs.split("..") + in_dict[per] = [f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1)] + return in_dict + + +def get_analysis_runs( + ignore_keys_file=None, analysis_runs_file=None, file_selection="all" +): + """ + This function reads in the ignore_keys and analysis_runs files and returns the dictionaries + """ ignore_keys = [] + analysis_runs = {} if ignore_keys_file is not None: if Path(ignore_keys_file).is_file(): if Path(ignore_keys_file).suffix == ".json": @@ -20,20 +45,18 @@ def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): with Path(ignore_keys_file).open() as f: ignore_keys = yaml.safe_load(f) else: - raise Warning( + raise ValueError( "ignore_keys_file file not in json, yaml or keylist format" ) - ignore_keys = [ + ignore_keys = [ # remove any comments in the keylist key.split("#")[0].strip() if "#" in key else key.strip() for key in ignore_keys ] else: - print("no ignore_keys.keylist file found") - ignore_keys = [] - else: - ignore_keys = [] + msg = f"no ignore_keys file found: {ignore_keys_file}" + raise ValueError(msg) - if analysis_runs_file is not None: + if analysis_runs_file is not None and file_selection != "all": if Path(analysis_runs_file).is_file(): if Path(ignore_keys_file).suffix == ".json": with Path(analysis_runs_file).open() as f: @@ -42,13 +65,18 @@ def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): with Path(analysis_runs_file).open() as f: analysis_runs = yaml.safe_load(f) else: - raise Warning("analysis_runs file not in json or yaml format") - analysis_runs = [] + msg = f"analysis_runs file not in json or yaml format: {analysis_runs_file}" + raise ValueError(msg) + if file_selection in analysis_runs: + analysis_runs = expand_runs( + analysis_runs[file_selection] + ) # select the file_selection and expand out the runs + else: + msg = f"Unknown file selection: {file_selection} not in {list(analysis_runs)}" + raise ValueError(msg) else: - analysis_runs = [] - print("no analysis_runs file found") - else: - analysis_runs = [] + msg = f"no analysis_runs file found: {analysis_runs_file}" + raise ValueError(msg) return analysis_runs, ignore_keys @@ -75,9 +103,14 @@ def get_keys(keypart): def get_pattern(setup, tier): + """ + Helper function to get the search pattern for the given tier, + some tiers such as skm need to refer to a different pattern when looking for files + as only phy files are taken to skm others are only taken to pet + """ if tier == "blind": fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False) - elif tier == "skm" or tier == "pet_concat": + elif tier in ("skm", "pet_concat"): fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False) elif tier == "evt_concat": fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False) @@ -87,6 +120,9 @@ def get_pattern(setup, tier): def concat_phy_filenames(setup, phy_filenames, tier): + """ + This function concatenates the files from the same run together + """ fn_pattern = get_pattern(setup, tier) # group files by run sorted_phy_filenames = run_grouper(phy_filenames) @@ -110,18 +146,20 @@ def build_filelist( tier, ignore_keys=None, analysis_runs=None, - file_selection="all", ): + """ + This function builds the filelist for the given filekeys, search pattern and tier. + It will ignore any keys in the ignore_keys list and only include the keys specified in the analysis_runs dict + """ fn_pattern = get_pattern(setup, tier) if ignore_keys is None: ignore_keys = [] if analysis_runs is None: - analysis_runs = [] + analysis_runs = {} phy_filenames = [] other_filenames = [] - for key in filekeys: fn_glob_pattern = key.get_path_from_filekey(search_pattern)[0] files = glob.glob(fn_glob_pattern) @@ -131,7 +169,7 @@ def build_filelist( if _key.name in ignore_keys: pass else: - if tier == "blind" and _key.datatype == "phy": + if tier == "blind" and _key.datatype in blind_datatypes: filename = FileKey.get_path_from_filekey( _key, get_pattern_tier_raw_blind(setup) ) @@ -142,32 +180,38 @@ def build_filelist( else: filename = FileKey.get_path_from_filekey(_key, fn_pattern) - if file_selection == "all": - if _key.datatype == "phy": + if analysis_runs == {}: + if ( + _key.datatype in concat_datatypes + ): # separate out phy files as some tiers these are concatenated phy_filenames += filename else: other_filenames += filename - elif file_selection == "sel": - if analysis_runs == "all" or ( - _key.period in analysis_runs + else: + if ( + _key.period + in analysis_runs # check if period in analysis_runs dicts and ( - _key.run in analysis_runs[_key.period] - or analysis_runs[_key.period] == "all" + _key.run + in analysis_runs[ + _key.period + ] # check if run in analysis_runs dicts + or analysis_runs[_key.period] + == "all" # or if runs is just specified as "all" ) ): - if _key.datatype == "phy": - phy_filenames += filename + if _key.datatype in concat_datatypes: + phy_filenames += filename # separate out phy files as some tiers these are concatenated else: other_filenames += filename - else: - msg = "unknown file selection" - raise ValueError(msg) phy_filenames = sorted(phy_filenames) other_filenames = sorted(other_filenames) - if tier == "skm" or tier == "pet_concat" or tier == "evt_concat": - phy_filenames = concat_phy_filenames(setup, phy_filenames, tier) + if tier in concat_tiers: + phy_filenames = concat_phy_filenames( + setup, phy_filenames, tier + ) # concat phy files return phy_filenames + other_filenames @@ -175,10 +219,11 @@ def build_filelist( def get_filelist( wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None ): - file_selection = wildcards.label[:3] - keypart = wildcards.label[3:] - - analysis_runs, ignore_keys = get_analysis_runs(ignore_keys_file, analysis_runs_file) + file_selection = wildcards.label.split("-", 1)[0] + keypart = f'-{wildcards.label.split("-", 1)[1]}' # remove the file selection from the keypart + analysis_runs, ignore_keys = get_analysis_runs( + ignore_keys_file, analysis_runs_file, file_selection + ) filekeys = get_keys(keypart) @@ -189,7 +234,6 @@ def get_filelist( wildcards.tier, ignore_keys, analysis_runs, - file_selection, ) @@ -204,7 +248,9 @@ def get_filelist_full_wildcards( ): keypart = f"-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-{wildcards.datatype}" - analysis_runs, ignore_keys = get_analysis_runs(ignore_keys_file, analysis_runs_file) + analysis_runs, ignore_keys = get_analysis_runs( + ignore_keys_file, analysis_runs_file, file_selection + ) filekeys = get_keys(keypart) return build_filelist( @@ -214,5 +260,4 @@ def get_filelist_full_wildcards( tier, ignore_keys, analysis_runs, - file_selection, ) From 0cb28b69de8f30acf0b21fc272b9515293b2cf97 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 3 Dec 2024 22:49:33 +0100 Subject: [PATCH 13/17] updates for new meta, switch to detector keying in configs --- Snakefile | 23 ++++---- rules/dsp.smk | 37 ++++++++++-- rules/hit.smk | 9 +++ rules/pht.smk | 7 +++ rules/pht_fast.smk | 2 + rules/psp.smk | 13 +++-- rules/tcm.smk | 1 + scripts/build_dsp.py | 18 +++++- scripts/merge_channels.py | 48 ++++++++++++--- scripts/pars_dsp_dplms.py | 21 ++++--- scripts/pars_dsp_eopt.py | 24 ++++---- scripts/pars_dsp_event_selection.py | 19 +++--- scripts/pars_dsp_nopt.py | 17 +++--- scripts/pars_dsp_tau.py | 13 ++++- scripts/pars_hit_aoe.py | 20 +++++-- scripts/pars_hit_ecal.py | 16 ++--- scripts/pars_hit_lq.py | 29 +++++---- scripts/pars_hit_qc.py | 91 +++++++++++++++++++++-------- scripts/pars_pht_aoecal.py | 13 +++-- scripts/pars_pht_fast.py | 14 +++-- scripts/pars_pht_lqcal.py | 14 +++-- scripts/pars_pht_partcal.py | 22 +++---- scripts/pars_pht_qc.py | 37 ++++++------ scripts/pars_pht_qc_phy.py | 19 +++--- scripts/pars_tcm_pulser.py | 9 ++- scripts/util/convert_np.py | 14 +++++ 26 files changed, 385 insertions(+), 165 deletions(-) create mode 100644 scripts/util/convert_np.py diff --git a/Snakefile b/Snakefile index 39a3dee..0838a8c 100644 --- a/Snakefile +++ b/Snakefile @@ -133,15 +133,15 @@ onsuccess: if os.path.isfile(file): os.remove(file) - # remove filelists - files = glob.glob(os.path.join(filelist_path(setup), "*")) - for file in files: - if os.path.isfile(file): - os.remove(file) - if os.path.exists(filelist_path(setup)): - os.rmdir(filelist_path(setup)) - - # remove logs + # # remove filelists + # files = glob.glob(os.path.join(filelist_path(setup), "*")) + # for file in files: + # if os.path.isfile(file): + # os.remove(file) + # if os.path.exists(filelist_path(setup)): + # os.rmdir(filelist_path(setup)) + + # remove logs files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log")) for file in files: if os.path.isfile(file): @@ -171,11 +171,12 @@ rule gen_filelist: analysis_runs_file=Path(det_status) / "runlists.yaml", ), output: - Path(filelist_path(setup)) / "{label}-{tier}.filelist", + temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"), run: if len(input) == 0: print( - "WARNING: No files found for the given pattern\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen" + f"WARNING: No files found for the given pattern:{wildcards.label}", + "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen", ) with open(output[0], "w") as f: for fn in input: diff --git a/rules/dsp.smk b/rules/dsp.smk index 3fa105c..34f7422 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -58,13 +58,14 @@ rule build_pars_dsp_tau: "{basedir}/../scripts/pars_dsp_tau.py " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " "--plot_path {output.plots} " "--output_file {output.decay_const} " "--pulser_file {input.pulser} " - "--raw_files {input.files}" + "--raw_files {input.files} " rule build_pars_event_selection: @@ -93,6 +94,7 @@ rule build_pars_event_selection: "{basedir}/../scripts/pars_dsp_event_selection.py " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -132,6 +134,7 @@ rule build_pars_dsp_nopt: "--database {input.database} " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -175,6 +178,7 @@ rule build_pars_dsp_dplms: "--inplots {input.inplots} " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -210,6 +214,7 @@ rule build_pars_dsp_eopt: "{basedir}/../scripts/pars_dsp_eopt.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -226,9 +231,9 @@ rule build_svm_dsp: hyperpars=lambda wildcards: get_input_par_file( wildcards, "dsp", "svm_hyperpars" ), - train_data=lambda wildcards: get_input_par_file( - wildcards, "dsp", "svm_hyperpars" - ).replace("hyperpars.json", "train.lh5"), + train_data=lambda wildcards: str( + get_input_par_file(wildcards, "dsp", "svm_hyperpars") + ).replace("hyperpars.yaml", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), log: @@ -274,9 +279,12 @@ rule build_plts_dsp: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, ), + params: + timestamp="{timestamp}", + datatype="cal", output: get_pattern_plts(setup, "dsp"), group: @@ -286,6 +294,7 @@ rule build_plts_dsp: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_pars_dsp_objects: @@ -300,6 +309,9 @@ rule build_pars_dsp_objects: name="objects", extension="pkl", ), + params: + timestamp="{timestamp}", + datatype="cal", output: get_pattern_pars( setup, @@ -315,6 +327,8 @@ rule build_pars_dsp_objects: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " rule build_pars_dsp_db: @@ -324,9 +338,12 @@ rule build_pars_dsp_db: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, ), + params: + timestamp="{timestamp}", + datatype="cal", output: temp( get_pattern_pars_tmp( @@ -342,6 +359,8 @@ rule build_pars_dsp_db: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " rule build_pars_dsp: @@ -369,6 +388,9 @@ rule build_pars_dsp: extension="dir", check_in_cycle=check_in_cycle, ), + params: + timestamp="{timestamp}", + datatype="cal", output: out_file=get_pattern_pars( setup, @@ -386,6 +408,8 @@ rule build_pars_dsp: "--in_db {input.in_db} " "--out_db {output.out_db} " "--input {input.in_files} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " rule build_dsp: @@ -415,6 +439,7 @@ rule build_dsp: "{basedir}/../scripts/build_dsp.py " "--log {log} " f"--configs {ro(configs)} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--input {params.ro_input[raw_file]} " diff --git a/rules/hit.smk b/rules/hit.smk index af1fcaf..bb42651 100644 --- a/rules/hit.smk +++ b/rules/hit.smk @@ -44,6 +44,7 @@ rule build_qc: filelist_path(setup), "all-{experiment}-{period}-{run}-fft-dsp.filelist" ), pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), + overwrite_files=lambda wildcards: get_overwrite_file("hit", wildcards), params: timestamp="{timestamp}", datatype="cal", @@ -65,11 +66,13 @@ rule build_qc: "--timestamp {params.timestamp} " "--channel {params.channel} " "--configs {configs} " + "--metadata {meta} " "--plot_path {output.plot_file} " "--save_path {output.qc_file} " "--pulser_file {input.pulser} " "--cal_files {input.files} " "--fft_files {input.fft_files} " + "--overwrite_files {input.overwrite_files} " # This rule builds the energy calibration using the calibration dsp files @@ -158,6 +161,7 @@ rule build_aoe_calibration: "{basedir}/../scripts/pars_hit_aoe.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -204,6 +208,7 @@ rule build_lq_calibration: "{basedir}/../scripts/pars_hit_lq.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -246,6 +251,7 @@ rule build_pars_hit_objects: "{basedir}/../scripts/merge_channels.py " "--input {params.ro_input} " "--output {output} " + "--channelmap {meta} " rule build_plts_hit: @@ -269,6 +275,7 @@ rule build_plts_hit: "{basedir}/../scripts/merge_channels.py " "--input {params.ro_input} " "--output {output} " + "--channelmap {meta} " rule build_pars_hit: @@ -300,6 +307,7 @@ rule build_pars_hit: "{basedir}/../scripts/merge_channels.py " "--input {params.ro_input[infiles]} " "--output {output} " + "--channelmap {meta} " rule build_hit: @@ -326,6 +334,7 @@ rule build_hit: "{swenv} python3 -B " "{basedir}/../scripts/build_hit.py " f"--configs {ro(configs)} " + "--metadata {meta} " "--log {log} " "--tier {params.tier} " "--datatype {params.datatype} " diff --git a/rules/pht.smk b/rules/pht.smk index dad1a24..e638832 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -129,6 +129,7 @@ for key, dataset in part.datasets.items(): "{basedir}/../scripts/pars_pht_qc.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -181,6 +182,7 @@ rule build_pht_qc: "{basedir}/../scripts/pars_pht_qc.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -536,6 +538,7 @@ for key, dataset in part.datasets.items(): "{basedir}/../scripts/pars_pht_aoecal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -596,6 +599,7 @@ rule build_pht_aoe_calibrations: "{basedir}/../scripts/pars_pht_aoecal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -714,6 +718,7 @@ for key, dataset in part.datasets.items(): "{basedir}/../scripts/pars_pht_lqcal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -769,6 +774,7 @@ rule build_pht_lq_calibration: "{basedir}/../scripts/pars_pht_lqcal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -896,6 +902,7 @@ rule build_pht: "{swenv} python3 -B " "{basedir}/../scripts/build_hit.py " f"--configs {ro(configs)} " + "--metadata {meta} " "--log {log} " "--tier {params.tier} " "--datatype {params.datatype} " diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk index f83e534..9369b6b 100644 --- a/rules/pht_fast.smk +++ b/rules/pht_fast.smk @@ -108,6 +108,7 @@ for key, dataset in part.datasets.items(): f"{basedir}/../scripts/pars_pht_fast.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -166,6 +167,7 @@ rule par_pht_fast: "{basedir}/../scripts/pars_pht_fast.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " diff --git a/rules/psp.smk b/rules/psp.smk index 53e8f59..260be19 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -182,11 +182,9 @@ rule build_svm_psp: hyperpars=lambda wildcards: get_input_par_file( wildcards, "psp", "svm_hyperpars" ), - train_data=lambda wildcards: get_input_par_file( - wildcards, "psp", "svm_hyperpars" - ) - .as_posix() - .replace("hyperpars.json", "train.lh5"), + train_data=lambda wildcards: str( + get_input_par_file(wildcards, "psp", "svm_hyperpars") + ).replace("hyperpars.yaml", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), log: @@ -252,6 +250,7 @@ rule build_pars_psp_objects: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_plts_psp: @@ -273,6 +272,7 @@ rule build_plts_psp: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_pars_psp_db: @@ -300,6 +300,7 @@ rule build_pars_psp_db: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_pars_psp: @@ -344,6 +345,7 @@ rule build_pars_psp: "--in_db {input.in_db} " "--out_db {output.out_db} " "--input {input.in_files} " + "--channelmap {meta} " rule build_psp: @@ -373,6 +375,7 @@ rule build_psp: "{basedir}/../scripts/build_dsp.py " "--log {log} " f"--configs {ro(configs)} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--input {params.ro_input[raw_file]} " diff --git a/rules/tcm.smk b/rules/tcm.smk index c1164bb..e3a3410 100644 --- a/rules/tcm.smk +++ b/rules/tcm.smk @@ -66,3 +66,4 @@ rule build_pulser_ids: "--channel {params.channel} " "--tcm_files {params.input} " "--pulser_file {output.pulser} " + "--metadata {meta} " diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index 02bf6a1..902ac4b 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -7,7 +7,7 @@ import numpy as np from dspeed import build_dsp -from legendmeta import TextDB +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 @@ -27,11 +27,15 @@ def replace_list_with_array(dic): argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) +argparser.add_argument("--log", help="log file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[]) -argparser.add_argument("--log", help="log file", type=str) argparser.add_argument("--input", help="input file", type=str) + argparser.add_argument("--output", help="output file", type=str) argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() @@ -41,14 +45,22 @@ def replace_list_with_array(dic): logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) logging.getLogger("lgdo").setLevel(logging.INFO) +logging.getLogger("legendmeta").setLevel(logging.INFO) log = logging.getLogger(__name__) +meta = LegendMetadata(path=args.metadata) +chan_map = meta.channelmap(args.timestamp, system=args.datatype) + + configs = TextDB(args.configs, lazy=True) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_dsp"][ "inputs" ]["processing_chain"] -channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} +channel_dict = { + f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file) + for chan, file in channel_dict.items() +} db_files = [ par_file for par_file in args.pars_file if pathlib.Path(par_file).suffix in (".json", ".yaml") ] diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index e8994be..5fb6d68 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -4,6 +4,7 @@ from pathlib import Path import numpy as np +from legendmeta import LegendMetadata from legendmeta.catalog import Props from lgdo import lh5 from util.FileKey import ChannelProcKey @@ -37,6 +38,19 @@ def replace_path(d, old_path, new_path): type=str, required=False, ) +argparser.add_argument( + "--channelmap", + help="channelmap", + type=str, + required=False, + default=None, +) +argparser.add_argument( + "--timestamp", + help="timestamp", + type=str, + required=False, +) args = argparser.parse_args() # change to only have 1 output file for multiple inputs @@ -46,6 +60,12 @@ def replace_path(d, old_path, new_path): file_extension = Path(args.output).suffix +if args.channelmap is not None: + channel_map = LegendMetadata(args.channelmap, lazy=True) + chmap = channel_map.channelmap(args.timestamp) +else: + chmap = None + if file_extension == ".dat" or file_extension == ".dir": out_file = Path(args.output).with_suffix("") else: @@ -61,9 +81,12 @@ def replace_path(d, old_path, new_path): for channel in channel_files: if Path(channel).suffix == file_extension: channel_dict = Props.read_from(channel) - fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - channel_name = fkey.channel + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + + channel_name = fkey.channel out_dict[channel_name] = channel_dict else: msg = "Output file extension does not match input file extension" @@ -79,7 +102,11 @@ def replace_path(d, old_path, new_path): with Path(channel).open("rb") as r: channel_dict = pkl.load(r) fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - channel_name = fkey.channel + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + + channel_name = fkey.channel out_dict[channel_name] = channel_dict with Path(temp_output).open("wb") as w: @@ -89,12 +116,16 @@ def replace_path(d, old_path, new_path): elif file_extension == ".dat" or file_extension == ".dir": common_dict = {} - with shelve.open(out_file, "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: + with shelve.open(str(out_file), "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: for channel in channel_files: with Path(channel).open("rb") as r: channel_dict = pkl.load(r) - fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - channel_name = fkey.channel + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel_files[0]).name) + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + + channel_name = fkey.channel if isinstance(channel_dict, dict) and "common" in list(channel_dict): chan_common_dict = channel_dict.pop("common") common_dict[channel_name] = chan_common_dict @@ -109,8 +140,11 @@ def replace_path(d, old_path, new_path): for channel in channel_files: if Path(channel).suffix == file_extension: fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - channel_name = fkey.channel + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + channel_name = fkey.channel tb_in = lh5.read(f"{channel_name}", channel) lh5.write( diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 607613c..87403b8 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -15,10 +15,11 @@ argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str) +argparser.add_argument("--database", help="database", type=str, required=True) argparser.add_argument("--log", help="log_file", type=str) -argparser.add_argument("--database", help="database", type=str, required=True) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) @@ -42,6 +43,10 @@ log = logging.getLogger(__name__) sto = lh5.LH5Store() +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + configs = LegendMetadata(args.configs, lazy=True).on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel] @@ -56,11 +61,9 @@ t0 = time.time() log.info("\nLoad fft data") - energies = sto.read(f"{args.channel}/raw/daqenergy", fft_files)[0] + energies = sto.read(f"{channel}/raw/daqenergy", fft_files)[0] idxs = np.where(energies.nda == 0)[0] - raw_fft = sto.read( - f"{args.channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs - )[0] + raw_fft = sto.read(f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs)[0] t1 = time.time() log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}") @@ -69,12 +72,12 @@ kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda + peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda ids = np.isin(peaks, peaks_rounded) peaks = peaks[ids] idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] - raw_cal = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0] + raw_cal = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}") if isinstance(dsp_config, (str, list)): @@ -107,7 +110,7 @@ dplms_pars = Table(col_dict={"coefficients": Array(coeffs)}) out_dict["dplms"][ "coefficients" - ] = f"loadlh5('{args.lh5_path}', '{args.channel}/dplms/coefficients')" + ] = f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')" log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes") else: @@ -124,7 +127,7 @@ Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True) sto.write( Table(col_dict={"dplms": dplms_pars}), - name=args.channel, + name=channel, lh5_file=args.lh5_path, wo_mode="overwrite", ) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index bcda090..d4f0098 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -26,12 +26,12 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) - argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) -argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str) argparser.add_argument("--log", help="log_file", type=str) +argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) @@ -58,6 +58,10 @@ sto = lh5.LH5Store() t0 = time.time() +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_eopt"]["inputs"]["processing_chain"][ @@ -108,12 +112,12 @@ ) peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda + peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda ids = np.isin(peaks, peaks_rounded) peaks = peaks[ids] idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] - tb_data = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0] + tb_data = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] t1 = time.time() log.info(f"Data Loaded in {(t1-t0)/60} minutes") @@ -318,32 +322,32 @@ out_alpha_dict = {} out_alpha_dict["cuspEmax_ctc"] = { "expression": "cuspEmax*(1+dt_eff*a)", - "parameters": {"a": round(bopt_cusp.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, } out_alpha_dict["cuspEftp_ctc"] = { "expression": "cuspEftp*(1+dt_eff*a)", - "parameters": {"a": round(bopt_cusp.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, } out_alpha_dict["zacEmax_ctc"] = { "expression": "zacEmax*(1+dt_eff*a)", - "parameters": {"a": round(bopt_zac.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, } out_alpha_dict["zacEftp_ctc"] = { "expression": "zacEftp*(1+dt_eff*a)", - "parameters": {"a": round(bopt_zac.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, } out_alpha_dict["trapEmax_ctc"] = { "expression": "trapEmax*(1+dt_eff*a)", - "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, } out_alpha_dict["trapEftp_ctc"] = { "expression": "trapEftp*(1+dt_eff*a)", - "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, } if "ctc_params" in db_dict: db_dict["ctc_params"].update(out_alpha_dict) diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index 2e6505b..f4dfd7d 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -83,10 +83,11 @@ def get_out_data( argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) - argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--raw_cal", help="raw_cal", type=str, nargs="*", required=True) argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) @@ -108,6 +109,10 @@ def get_out_data( sto = lh5.LH5Store() t0 = time.time() + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"][ @@ -121,11 +126,11 @@ def get_out_data( db_dict = Props.read_from(args.decay_const) Path(args.peak_file).parent.mkdir(parents=True, exist_ok=True) + rng = np.random.default_rng() + rand_num = f"{rng.integers(0,99999):05d}" + temp_output = f"{args.peak_file}.{rand_num}" if peak_dict.pop("run_selection") is True: log.debug("Starting peak selection") - rng = np.random.default_rng() - rand_num = f"{rng.integers(0,99999):05d}" - temp_output = f"{args.peak_file}.{rand_num}" with Path(args.raw_filelist).open() as f: files = f.read().splitlines() @@ -141,13 +146,13 @@ def get_out_data( tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, peak_dict["pulser_multiplicity_threshold"] + tcm_files, channel, peak_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" raise ValueError(msg) - raw_dict = Props.read_from(args.raw_cal)[args.channel]["pars"]["operations"] + raw_dict = Props.read_from(args.raw_cal)[channel]["pars"]["operations"] peaks_kev = peak_dict["peaks"] kev_widths = peak_dict["kev_widths"] @@ -156,7 +161,7 @@ def get_out_data( final_cut_field = peak_dict["final_cut_field"] energy_parameter = peak_dict.get("energy_parameter", "trapTmax") - lh5_path = f"{args.channel}/raw" + lh5_path = f"{channel}/raw" if not isinstance(kev_widths, list): kev_widths = [kev_widths] diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index 47261d2..5de3a59 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -20,6 +20,7 @@ argparser.add_argument("--inplots", help="inplots", type=str) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -44,6 +45,10 @@ t0 = time.time() +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_nopt"]["inputs"]["processing_chain"][ @@ -61,9 +66,9 @@ raw_files = sorted(files) - energies = sto.read(f"{args.channel}/raw/daqenergy", raw_files)[0] + energies = sto.read(f"{channel}/raw/daqenergy", raw_files)[0] idxs = np.where(energies.nda == 0)[0] - tb_data = sto.read(f"{args.channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs)[0] + tb_data = sto.read(f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs)[0] t1 = time.time() log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") @@ -72,7 +77,7 @@ cut_dict = generate_cuts(dsp_data, cut_dict=opt_dict.pop("cut_pars")) cut_idxs = get_cut_indexes(dsp_data, cut_dict) tb_data = sto.read( - f"{args.channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs] + f"{channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs] )[0] log.info(f"... {len(tb_data)} baselines after cuts") @@ -81,12 +86,10 @@ if args.plot_path: out_dict, plot_dict = pno.noise_optimization( - tb_data, dsp_config, db_dict.copy(), opt_dict, args.channel, display=1 + tb_data, dsp_config, db_dict.copy(), opt_dict, channel, display=1 ) else: - out_dict = pno.noise_optimization( - raw_files, dsp_config, db_dict.copy(), opt_dict, args.channel - ) + out_dict = pno.noise_optimization(raw_files, dsp_config, db_dict.copy(), opt_dict, channel) t2 = time.time() log.info(f"Optimiser finished in {(t2-t0)/60} minutes") diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index 82cec2d..b584648 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -13,10 +13,13 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--log", help="log file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) + argparser.add_argument("--plot_path", help="plot path", type=str, required=False) argparser.add_argument("--output_file", help="output file", type=str, required=True) @@ -37,6 +40,10 @@ sto = lh5.LH5Store() log = logging.getLogger(__name__) +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) config_dict = configs.on(args.timestamp, system=args.datatype) channel_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["processing_chain"][ @@ -66,14 +73,14 @@ tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" raise ValueError(msg) data = sto.read( - f"{args.channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"] + f"{channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"] )[0].view_as("pd") threshold = kwarg_dict.pop("threshold") @@ -89,7 +96,7 @@ cuts = np.where((data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering))[0] tb_data = sto.read( - f"{args.channel}/raw", + f"{channel}/raw", input_file, idx=cuts, n_rows=kwarg_dict.pop("n_events"), diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index a393868..c30c7ef 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -15,6 +15,7 @@ from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -103,17 +104,20 @@ def aoe_calibration( argparser.add_argument("files", help="files", nargs="*", type=str) argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) + argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--log", help="log_file", type=str) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) argparser.add_argument("--hit_pars", help="hit_pars", type=str) argparser.add_argument("--aoe_results", help="aoe_results", type=str) @@ -129,6 +133,10 @@ def aoe_calibration( logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_hit_aoecal" @@ -194,7 +202,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( files, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -213,7 +221,7 @@ def eres_func(x): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") + tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") ) else: msg = "No pulser file or tcm filelist provided" @@ -231,6 +239,7 @@ def eres_func(x): sigma_func=sigma_func, **kwarg_dict, ) + obj.pdf = obj.pdf.name # need to change eres func as can't pickle lambdas try: @@ -266,6 +275,9 @@ def eres_func(x): "pars": {"operations": cal_dict}, "results": results_dict, } + +final_hit_dict = convert_dict_np_to_float(final_hit_dict) + Props.write_to(args.hit_pars, final_hit_dict) Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index b310500..c94041d 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -22,6 +22,7 @@ from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data from scipy.stats import binned_statistic +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) mpl.use("agg") @@ -452,8 +453,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp) + channel = f"ch{chmap[args.channel].daq.rawid:07}" - det_status = chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["usability"] + det_status = chmap[args.channel]["analysis"]["usability"] if args.in_hit_dict: hit_dict = Props.read_from(args.in_hit_dict) @@ -466,7 +468,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): database_dic = Props.read_from(db_files) - hit_dict.update(database_dic[args.channel]["ctc_params"]) + hit_dict.update(database_dic[channel]["ctc_params"]) # get metadata dictionary configs = LegendMetadata(path=args.configs) @@ -497,7 +499,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): # load data in data, threshold_mask = load_data( files, - f"{args.channel}/dsp", + f"{channel}/dsp", hit_dict, params=[*kwarg_dict["energy_params"], kwarg_dict["cut_param"], "timestamp", "trapTmax"], threshold=kwarg_dict["threshold"], @@ -515,7 +517,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -698,14 +700,14 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): if "monitoring_parameters" in kwarg_dict: monitor_dict = monitor_parameters( - files, f"{args.channel}/dsp", kwarg_dict["monitoring_parameters"] + files, f"{channel}/dsp", kwarg_dict["monitoring_parameters"] ) results_dict.update({"monitoring_parameters": monitor_dict}) # get baseline plots and save all plots to file if args.plot_path: common_dict = baseline_tracking_plots( - sorted(files), f"{args.channel}/dsp", plot_options=bl_plots + sorted(files), f"{channel}/dsp", plot_options=bl_plots ) for plot in list(common_dict): @@ -739,7 +741,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): pkl.dump(total_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) # save output dictionary - output_dict = {"pars": hit_dict, "results": {"ecal": results_dict}} + output_dict = convert_dict_np_to_float({"pars": hit_dict, "results": {"ecal": results_dict}}) Props.write_to(args.save_path, output_dict) # save calibration objects diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 579b34a..169b560 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -11,10 +11,12 @@ from legendmeta import LegendMetadata from legendmeta.catalog import Props from pygama.math.distributions import gaussian +from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.lq_cal import * # noqa: F403 from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -128,12 +130,13 @@ def lq_calibration( argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) +argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) argparser.add_argument("--hit_pars", help="hit_pars", type=str) argparser.add_argument("--lq_results", help="lq_results", type=str) @@ -148,6 +151,10 @@ def lq_calibration( logging.getLogger("h5py").setLevel(logging.INFO) logging.getLogger("matplotlib").setLevel(logging.INFO) +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_hit_lqcal" @@ -197,7 +204,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( files, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -216,7 +223,7 @@ def eres_func(x): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") + tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") ) else: msg = "No pulser file or tcm filelist provided" @@ -262,19 +269,19 @@ def eres_func(x): pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) -results_dict = dict(**eres_dict, lq=out_dict) +final_hit_dict = convert_dict_np_to_float( + { + "pars": {"operations": cal_dict}, + "results": dict(**eres_dict, lq=out_dict), + } +) Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) -final_hit_dict = { - "pars": {"operations": cal_dict}, - "results": results_dict, -} Props.write_to(args.hit_pars, final_hit_dict) -Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True) final_object_dict = dict( **object_dict, lq=obj, ) -Props.write_to(args.lq_results, final_object_dict) +Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True) with Path(args.lq_results).open("wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 5311c46..320fee9 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -18,6 +18,7 @@ get_tcm_pulser_ids, ) from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) @@ -28,17 +29,26 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) + argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) + argparser.add_argument( + "--overwrite_files", + help="overwrite_files", + type=str, + required=False, + nargs="*", + ) argparser.add_argument("--configs", help="config", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) argparser.add_argument("--tier", help="tier", type=str, default="hit") - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_path", help="plot_path", type=str, required=False) argparser.add_argument("--save_path", help="save_path", type=str) args = argparser.parse_args() @@ -51,6 +61,10 @@ logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + # get metadata dictionary configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] @@ -58,19 +72,37 @@ kwarg_dict = Props.read_from(channel_dict) + if args.overwrite_files: + overwrite = Props.read_from(args.overwrite_files) + if channel in overwrite: + overwrite = overwrite[channel]["pars"]["operations"] + else: + overwrite = None + else: + overwrite = None + + if len(args.fft_files) == 1 and Path(args.fft_files[0]).suffix == ".filelist": + with Path(args.fft_files[0]).open() as f: + fft_files = f.read().splitlines() + else: + fft_files = args.fft_files + + if len(args.cal_files) == 1 and Path(args.cal_files[0]).suffix == ".filelist": + with Path(args.cal_files[0]).open() as f: + cal_files = f.read().splitlines() + else: + cal_files = args.fft_files + kwarg_dict_fft = kwarg_dict["fft_fields"] - if len(args.fft_files) > 0: + if len(fft_files) > 0: fft_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(args.fft_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(fft_files[0], f"{channel}/dsp/")], kwarg_dict_fft["cut_parameters"], ) fft_data = load_data( - args.fft_files, - f"{args.channel}/dsp", + fft_files, + f"{channel}/dsp", {}, [*fft_fields, "timestamp", "trapTmax"], ) @@ -123,31 +155,31 @@ hit_dict_fft = {} plot_dict_fft = {} + if overwrite is not None: + for name in kwarg_dict_fft["cut_parameters"]: + for cut_name, cut_dict in overwrite.items(): + if name in cut_name: + hit_dict_fft.update({cut_name: cut_dict}) + kwarg_dict_cal = kwarg_dict["cal_fields"] cut_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(args.cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], kwarg_dict_cal["cut_parameters"], ) if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] cut_fields += get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(args.cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], init_cal["cut_parameters"], ) # load data in data, threshold_mask = load_data( - args.cal_files, - f"{args.channel}/dsp", + cal_files, + f"{channel}/dsp", {}, - [*cut_fields, "timestamp", "trapTmax"], + [*cut_fields, "timestamp", "trapTmax", "t_sat_lo"], threshold=kwarg_dict_cal.get("threshold", 0), return_selection_mask=True, cal_energy_param="trapTmax", @@ -163,7 +195,7 @@ tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -201,16 +233,19 @@ for key in info.get("parameters", None): exp = re.sub(f"(? 500: + if len(data.query("is_pulser & ~is_recovering")) < 500: data = data.query("is_pulser & ~is_recovering") else: data = data.query("~is_pulser & ~is_recovering")[mask] @@ -222,9 +257,17 @@ display=1 if args.plot_path else 0, ) + if overwrite is not None: + for name in kwarg_dict_cal["cut_parameters"]: + for cut_name, cut_dict in overwrite.items(): + if name in cut_name: + hit_dict_cal.update({cut_name: cut_dict}) + hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal} plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} + hit_dict = convert_dict_np_to_float(hit_dict) + Path(args.save_path).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.save_path, hit_dict) diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index e9573e3..ca938e5 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -255,12 +255,13 @@ def eres_func(x): argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--aoe_results", help="aoe_results", nargs="*", type=str) @@ -276,6 +277,10 @@ def eres_func(x): logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_pht_aoecal" @@ -350,7 +355,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -372,7 +377,7 @@ def eres_func(x): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index 4064b3c..104ad05 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -54,13 +54,13 @@ def run_splitter(files): argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) - argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) @@ -77,6 +77,10 @@ def run_splitter(files): logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + cal_dict = {} results_dicts = {} for ecal in args.ecal_file: @@ -167,7 +171,7 @@ def run_splitter(files): # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict["threshold"], @@ -191,7 +195,7 @@ def run_splitter(files): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -213,7 +217,7 @@ def run_splitter(files): object_dict, inplots_dict, args.timestamp, - args.metadata, + chmap, args.configs, args.channel, args.datatype, diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 2ba88af..2c67745 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -13,6 +13,7 @@ from legendmeta import LegendMetadata from legendmeta.catalog import Props from pygama.math.distributions import gaussian +from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.lq_cal import * # noqa: F403 from pygama.pargen.lq_cal import LQCal @@ -251,12 +252,13 @@ def eres_func(x): argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--lq_results", help="lq_results", nargs="*", type=str) @@ -272,6 +274,10 @@ def eres_func(x): logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_pht_lqcal" @@ -337,7 +343,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -360,7 +366,7 @@ def eres_func(x): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index a6eab18..a2d74e4 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -142,18 +142,14 @@ def calibrate_partition( object_dicts, plot_dicts, timestamp, - metadata_path, + chmap, configs, channel, datatype, gen_plots=True, ): - # load metadata - meta = LegendMetadata(path=metadata_path) - chmap = meta.channelmap(timestamp) - - det_status = chmap.map("daq.rawid")[int(channel[2:])]["analysis"]["usability"] + det_status = chmap[channel]["analysis"]["usability"] configs = LegendMetadata(path=configs) channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"]["pars_pht_partcal"][ @@ -418,13 +414,13 @@ def calibrate_partition( argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) - argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) @@ -441,6 +437,10 @@ def calibrate_partition( logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + cal_dict = {} results_dicts = {} for ecal in args.ecal_file: @@ -498,7 +498,7 @@ def calibrate_partition( # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict["threshold"], @@ -521,7 +521,7 @@ def calibrate_partition( tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -543,7 +543,7 @@ def calibrate_partition( object_dict, inplots_dict, timestamp, - args.metadata, + chmap, args.configs, args.channel, args.datatype, diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 790ee0a..495c87b 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -18,6 +18,7 @@ get_tcm_pulser_ids, ) from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) @@ -28,6 +29,7 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) + argparser.add_argument( "--tcm_filelist", help="tcm_filelist", nargs="*", type=str, required=False ) @@ -39,12 +41,13 @@ ) argparser.add_argument("--configs", help="config", type=str, required=True) + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False) argparser.add_argument( "--save_path", @@ -62,6 +65,10 @@ logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + # get metadata dictionary configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] @@ -85,8 +92,8 @@ if args.overwrite_files: overwrite = Props.read_from(args.overwrite_files) - if args.channel in overwrite: - overwrite = overwrite[args.channel]["pars"]["operations"] + if channel in overwrite: + overwrite = overwrite[channel]["pars"]["operations"] else: overwrite = None else: @@ -111,15 +118,15 @@ if len(fft_files) > 0: fft_fields = get_keys( [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(fft_files[0], f"{args.channel}/dsp/") + key.replace(f"{channel}/dsp/", "") + for key in ls(fft_files[0], f"{channel}/dsp/") ], kwarg_dict_fft["cut_parameters"], ) fft_data = load_data( fft_files, - f"{args.channel}/dsp", + f"{channel}/dsp", {}, [*fft_fields, "timestamp", "trapTmax", "t_sat_lo"], ) @@ -184,26 +191,20 @@ kwarg_dict_cal = kwarg_dict["cal_fields"] cut_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], kwarg_dict_cal["cut_parameters"], ) if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] cut_fields += get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], init_cal["cut_parameters"], ) # load data in data, threshold_mask = load_data( cal_files, - f"{args.channel}/dsp", + f"{channel}/dsp", {}, [*cut_fields, "timestamp", "trapTmax", "t_sat_lo"], threshold=kwarg_dict_cal.get("threshold", 0), @@ -226,7 +227,7 @@ tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, total_mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -303,6 +304,8 @@ hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal} plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} + hit_dict = convert_dict_np_to_float(hit_dict) + for file in args.save_path: Path(file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(file, hit_dict) diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index 48f3d9f..4f87afb 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -17,6 +17,7 @@ generate_cut_classifiers, get_keys, ) +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) @@ -28,12 +29,13 @@ argparser.add_argument("--phy_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--configs", help="config", type=str, required=True) + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False) argparser.add_argument( "--save_path", @@ -51,6 +53,10 @@ logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + # get metadata dictionary configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] @@ -88,15 +94,12 @@ kwarg_dict_fft = kwarg_dict["fft_fields"] cut_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(phy_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(phy_files[0], f"{channel}/dsp/")], kwarg_dict_fft["cut_parameters"], ) data = sto.read( - f"{args.channel}/dsp/", + f"{channel}/dsp/", phy_files, field_mask=[*cut_fields, "daqenergy", "t_sat_lo", "timestamp"], idx=np.where(bl_mask)[0], @@ -145,6 +148,8 @@ log.debug("fft cuts applied") log.debug(f"cut_dict is: {json.dumps(hit_dict, indent=2)}") + hit_dict = convert_dict_np_to_float(hit_dict) + for file in args.save_path: Path(file).name.mkdir(parents=True, exist_ok=True) Props.write_to(file, {"pars": {"operations": hit_dict}}) diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index 27c1101..9e6ad42 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -10,6 +10,7 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--log", help="log file", type=str) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -36,6 +37,10 @@ config_dict = configs.on(args.timestamp, system=args.datatype) kwarg_dict = config_dict["snakemake_rules"]["pars_tcm_pulser"]["inputs"]["pulser_config"] +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid}" + kwarg_dict = Props.read_from(kwarg_dict) if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist": @@ -46,9 +51,7 @@ tcm_files = args.tcm_files # get pulser mask from tcm files tcm_files = sorted(np.unique(tcm_files)) -ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") -) +ids, mask = get_tcm_pulser_ids(tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")) Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()}) diff --git a/scripts/util/convert_np.py b/scripts/util/convert_np.py new file mode 100644 index 0000000..cdc363c --- /dev/null +++ b/scripts/util/convert_np.py @@ -0,0 +1,14 @@ +import numpy as np + + +def convert_dict_np_to_float(dic): + for key in dic: + if isinstance(dic[key], dict): + convert_dict_np_to_float(dic[key]) + elif isinstance(dic[key], (np.float32, np.float64)): + dic[key] = float(dic[key]) + elif isinstance(dic[key], (list, tuple)): + dic[key] = [ + float(x) if isinstance(x, (np.float32, np.float64)) else x for x in dic[key] + ] + return dic From 4f7e4058bac3836a303cb6b0ceb06cf484c30d07 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 4 Dec 2024 17:40:05 +0100 Subject: [PATCH 14/17] debugging --- rules/ann.smk | 101 ++++++++++++++--------- rules/dsp.smk | 165 +++++++++++++++++++------------------- rules/evt.smk | 142 +++++++++++++++++++++----------- rules/psp.smk | 1 + scripts/build_ann.py | 124 ---------------------------- scripts/build_dsp.py | 150 +++++++++++++++++++++------------- scripts/build_hit.py | 31 ++++--- scripts/build_tcm.py | 16 +++- scripts/merge_channels.py | 6 +- scripts/pars_dsp_tau.py | 28 +++---- scripts/pars_hit_lq.py | 2 +- 11 files changed, 380 insertions(+), 386 deletions(-) delete mode 100644 scripts/build_ann.py diff --git a/rules/ann.smk b/rules/ann.smk index 64cdd50..15558ae 100644 --- a/rules/ann.smk +++ b/rules/ann.smk @@ -4,51 +4,72 @@ to apply the ann and risetime cuts for psd. """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.utils import par_dsp_path from scripts.util.patterns import ( - get_pattern_tier_dsp, - get_pattern_tier_psp, - get_pattern_tier_ann, get_pattern_tier, get_pattern_log, get_pattern_pars, - get_pattern_pars_overwrite, ) -for tier in ["ann", "pan"]: - rule: - input: - dsp_file=( - get_pattern_tier_dsp(setup) - if tier == "ann" - else get_pattern_tier_psp(setup) - ), - pars_file=lambda wildcards: get_svm_file(wildcards, "ann", "cuts"), - params: - timestamp="{timestamp}", - datatype="{datatype}", - output: - tier_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, f"{tier}_db"), - log: - get_pattern_log(setup, f"tier_{tier}"), - group: - "tier-ann" - resources: - runtime=300, - mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, - shell: - "{swenv} python3 -B " - f"{workflow.source_path('../scripts/build_ann.py')} " - "--log {log} " - "--configs {configs} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--input {input.dsp_file} " - "--output {output.tier_file} " - "--db_file {output.db_file} " - "--pars_file {input.pars_file} " +rule build_ann: + input: + dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), + pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + output: + tier_file=get_pattern_tier(setup, "ann", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(setup, "ann_db"), + log: + get_pattern_log(setup, "tier_ann"), + group: + "tier-ann" + resources: + runtime=300, + mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/build_dsp.py')} " + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + f"--tier ann " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--input {input.dsp_file} " + "--output {output.tier_file} " + "--db_file {output.db_file} " + "--pars_file {input.pars_file} " - set_last_rule_name(workflow, f"build_{tier}") + +rule build_pan: + input: + dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False), + pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + output: + tier_file=get_pattern_tier(setup, "pan", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(setup, "pan_db"), + log: + get_pattern_log(setup, "tier_pan"), + group: + "tier-ann" + resources: + runtime=300, + mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/build_dsp.py')} " + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + f"--tier pan " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--input {input.dsp_file} " + "--output {output.tier_file} " + "--db_file {output.db_file} " + "--pars_file {input.pars_file} " diff --git a/rules/dsp.smk b/rules/dsp.smk index 34f7422..7ae67a7 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -363,86 +363,85 @@ rule build_pars_dsp_db: "--channelmap {meta} " -rule build_pars_dsp: - input: - in_files=lambda wildcards: get_par_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "dsp", - basedir, - det_status, - chan_maps, - name="dplms", - extension="lh5", - ), - in_db=get_pattern_pars_tmp( - setup, - "dsp", - datatype="cal", - ), - plts=get_pattern_plts(setup, "dsp"), - objects=get_pattern_pars( - setup, - "dsp", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - params: - timestamp="{timestamp}", - datatype="cal", - output: - out_file=get_pattern_pars( - setup, - "dsp", - extension="lh5", - check_in_cycle=check_in_cycle, - ), - out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), - group: - "merge-dsp" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--output {output.out_file} " - "--in_db {input.in_db} " - "--out_db {output.out_db} " - "--input {input.in_files} " - "--timestamp {params.timestamp} " - "--channelmap {meta} " - - -rule build_dsp: - input: - raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), - pars_file=ancient( - lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "dsp" - ) - ), - params: - timestamp="{timestamp}", - datatype="{datatype}", - ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, - output: - tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "dsp_db"), - log: - get_pattern_log(setup, "tier_dsp"), - group: - "tier-dsp" - resources: - runtime=300, - mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_dsp.py " - "--log {log} " - f"--configs {ro(configs)} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--input {params.ro_input[raw_file]} " - "--output {output.tier_file} " - "--db_file {output.db_file} " - "--pars_file {params.ro_input[pars_file]} " +# rule build_pars_dsp: +# input: +# in_files=lambda wildcards: get_par_chanlist( +# setup, +# f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", +# "dsp", +# basedir, +# det_status, +# chan_maps, +# name="dplms", +# extension="lh5", +# ), +# in_db=get_pattern_pars_tmp( +# setup, +# "dsp", +# datatype="cal", +# ), +# plts=get_pattern_plts(setup, "dsp"), +# objects=get_pattern_pars( +# setup, +# "dsp", +# name="objects", +# extension="dir", +# check_in_cycle=check_in_cycle, +# ), +# params: +# timestamp="{timestamp}", +# datatype="cal", +# output: +# out_file=get_pattern_pars( +# setup, +# "dsp", +# extension="lh5", +# check_in_cycle=check_in_cycle, +# ), +# out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), +# group: +# "merge-dsp" +# shell: +# "{swenv} python3 -B " +# "{basedir}/../scripts/merge_channels.py " +# "--output {output.out_file} " +# "--in_db {input.in_db} " +# "--out_db {output.out_db} " +# "--input {input.in_files} " +# "--timestamp {params.timestamp} " +# "--channelmap {meta} " +# rule build_dsp: +# input: +# raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), +# pars_file=ancient( +# lambda wildcards: ParsCatalog.get_par_file( +# setup, wildcards.timestamp, "dsp" +# ) +# ), +# params: +# timestamp="{timestamp}", +# datatype="{datatype}", +# ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, +# output: +# tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle), +# db_file=get_pattern_pars_tmp(setup, "dsp_db"), +# log: +# get_pattern_log(setup, "tier_dsp"), +# group: +# "tier-dsp" +# resources: +# runtime=300, +# mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, +# shell: +# "{swenv} python3 -B " +# "{basedir}/../scripts/build_dsp.py " +# "--log {log} " +# "--tier dsp " +# f"--configs {ro(configs)} " +# "--metadata {meta} " +# "--datatype {params.datatype} " +# "--timestamp {params.timestamp} " +# "--input {params.ro_input[raw_file]} " +# "--output {output.tier_file} " +# "--db_file {output.db_file} " +# "--pars_file {params.ro_input[pars_file]} " diff --git a/rules/evt.smk b/rules/evt.smk index 9239b96..112c92c 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -11,50 +11,91 @@ from scripts.util.patterns import ( ) -for tier in ("evt", "pet"): +rule build_evt: + input: + dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), + hit_file=get_pattern_tier(setup, "hit", check_in_cycle=False), + tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), + ann_file=lambda wildcards: ( + None + if int(wildcards["period"][1:]) > 11 + else get_pattern_tier(setup, "ann", check_in_cycle=False) + ), + par_files=lambda wildcards: ParsCatalog.get_par_file( + setup, wildcards.timestamp, "hit" + ), + xtalk_matrix=lambda wildcards: get_input_par_file( + tier="evt", wildcards=wildcards, name="xtc" + ), + output: + get_pattern_tier(setup, "evt", check_in_cycle=check_in_cycle), + params: + timestamp="{timestamp}", + datatype="{datatype}", + tier="evt", + ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, + log: + get_pattern_log(setup, f"tier_evt"), + group: + "tier-evt" + resources: + runtime=300, + mem_swap=50, + run: + shell_string = ( + f"{swenv} python3 -B " + f"{basedir}/../scripts/build_evt.py " + f"--configs {ro(configs)} " + f"--metadata {ro(meta)} " + "--log {log} " + "--tier {params.tier} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--xtc_file {params.ro_input[xtalk_matrix]} " + "--par_files {params.ro_input[par_files]} " + "--hit_file {params.ro_input[hit_file]} " + "--tcm_file {params.ro_input[tcm_file]} " + "--dsp_file {params.ro_input[dsp_file]} " + "--output {output} " + ) + if input.ann_file is not None: + shell_string += "--ann_file {params.ro_input[ann_file]} " - rule: - input: - dsp_file=( - get_pattern_tier(setup, "dsp", check_in_cycle=False) - if tier == "evt" - else get_pattern_tier(setup, "psp", check_in_cycle=False) - ), - hit_file=( - get_pattern_tier(setup, "hit", check_in_cycle=False) - if tier == "evt" - else get_pattern_tier(setup, "pht", check_in_cycle=False) - ), - tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), - xtalk_matrix=lambda wildcards: get_input_par_file( - tier=tier, wildcards=wildcards, name="xtc" - ), - ann_file=branch( - lambda wildcards: tier if wildcards["period"][1:] <= 11 else "none", - cases={ - "evt": get_pattern_tier(setup, "ann", check_in_cycle=False), - "pet": get_pattern_tier(setup, "pan", check_in_cycle=False), - "none": None, - }, - ), - par_files=lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "pht" - ), - output: - get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), - params: - timestamp="{timestamp}", - datatype="{datatype}", - tier=tier, - ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, - log: - get_pattern_log(setup, f"tier_{tier}"), - group: - "tier-evt" - resources: - runtime=300, - mem_swap=50, - shell: + shell(shell_string) + + +rule build_pet: + input: + dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False), + hit_file=get_pattern_tier(setup, "pht", check_in_cycle=False), + tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), + ann_file=lambda wildcards: ( + None + if int(wildcards["period"][1:]) > 11 + else get_pattern_tier(setup, "pan", check_in_cycle=False) + ), + par_files=lambda wildcards: ParsCatalog.get_par_file( + setup, wildcards.timestamp, "pht" + ), + xtalk_matrix=lambda wildcards: get_input_par_file( + tier="pet", wildcards=wildcards, name="xtc" + ), + output: + get_pattern_tier(setup, "pet", check_in_cycle=check_in_cycle), + params: + timestamp="{timestamp}", + datatype="{datatype}", + tier="pet", + ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, + log: + get_pattern_log(setup, f"tier_pet"), + group: + "tier-evt" + resources: + runtime=300, + mem_swap=50, + run: + shell_string = ( f"{swenv} python3 -B " f"{basedir}/../scripts/build_evt.py " f"--configs {ro(configs)} " @@ -68,10 +109,15 @@ for tier in ("evt", "pet"): "--hit_file {params.ro_input[hit_file]} " "--tcm_file {params.ro_input[tcm_file]} " "--dsp_file {params.ro_input[dsp_file]} " - "--ann_file {params.ro_input[ann_file]} " "--output {output} " + ) + if input.ann_file is not None: + shell_string += "--ann_file {params.ro_input[ann_file]} " + + shell(shell_string) + - set_last_rule_name(workflow, f"build_{tier}") +for evt_tier in ("evt", "pet"): rule: wildcard_constraints: @@ -87,14 +133,14 @@ for tier in ("evt", "pet"): ) ), output: - get_pattern_tier(setup, f"{tier}_concat", check_in_cycle=check_in_cycle), + get_pattern_tier(setup, f"{evt_tier}_concat", check_in_cycle=check_in_cycle), params: timestamp="all", datatype="{datatype}", lh5concat_exe=setup["paths"]["install"] + "/bin/lh5concat", ro_input=lambda _, input: utils.as_ro(setup, input), log: - get_pattern_log_concat(setup, f"tier_{tier}_concat"), + get_pattern_log_concat(setup, f"tier_{evt_tier}_concat"), group: "tier-evt" shell: @@ -102,4 +148,4 @@ for tier in ("evt", "pet"): "--output {output} " "-- {params.ro_input} &> {log}" - set_last_rule_name(workflow, f"concat_{tier}") + set_last_rule_name(workflow, f"concat_{evt_tier}") diff --git a/rules/psp.smk b/rules/psp.smk index 260be19..9fc0861 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -374,6 +374,7 @@ rule build_psp: "{swenv} python3 -B " "{basedir}/../scripts/build_dsp.py " "--log {log} " + "--tier psp " f"--configs {ro(configs)} " "--metadata {meta} " "--datatype {params.datatype} " diff --git a/scripts/build_ann.py b/scripts/build_ann.py deleted file mode 100644 index 224877a..0000000 --- a/scripts/build_ann.py +++ /dev/null @@ -1,124 +0,0 @@ -import argparse -import json -import logging -import os -import pathlib -import re -import time -import warnings - -os.environ["LGDO_CACHE"] = "false" -os.environ["LGDO_BOUNDSCHECK"] = "false" -os.environ["DSPEED_CACHE"] = "false" -os.environ["DSPEED_BOUNDSCHECK"] = "false" - -import lgdo.lh5 as lh5 -import numpy as np -from dspeed import build_dsp -from legendmeta import LegendMetadata -from legendmeta.catalog import Props - - -def replace_list_with_array(dic): - for key, value in dic.items(): - if isinstance(value, dict): - dic[key] = replace_list_with_array(value) - elif isinstance(value, list): - dic[key] = np.array(value, dtype="float32") - else: - pass - return dic - - -warnings.filterwarnings(action="ignore", category=RuntimeWarning) - -argparser = argparse.ArgumentParser() -argparser.add_argument("--configs", help="configs path", type=str, required=True) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[]) -argparser.add_argument("--log", help="log file", type=str) -argparser.add_argument("--input", help="input file", type=str) -argparser.add_argument("--output", help="output file", type=str) -argparser.add_argument("--db_file", help="db file", type=str) -args = argparser.parse_args() - -pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -log = logging.getLogger(__name__) - -configs = LegendMetadata(path=args.configs) -channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_ann"][ - "inputs" -]["processing_chain"] - -channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} -db_files = [ - par_file - for par_file in args.pars_file - if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yml" -] - -database_dic = Props.read_from(db_files, subst_pathvar=True) -database_dic = replace_list_with_array(database_dic) - -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) - -rng = np.random.default_rng() -rand_num = f"{rng.integers(0,99999):05d}" -temp_output = f"{args.output}.{rand_num}" - -start = time.time() - -build_dsp( - args.input, - temp_output, - {}, - database=database_dic, - chan_config=channel_dict, - write_mode="r", - buffer_len=3200 if args.datatype == "cal" else 3200, - block_width=16, -) - -log.info(f"build_ann finished in {time.time()-start}") - -os.rename(temp_output, args.output) - -if "ann" in args.output: - key = os.path.basename(args.output).replace("-tier_ann.lh5", "") -else: - key = os.path.basename(args.output).replace("-tier_pan.lh5", "") - -raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] - -raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")] - -outputs = {} -channels = [] -for channel, chan_dict in channel_dict.items(): - output = chan_dict["outputs"] - in_dict = False - for entry in outputs: - if outputs[entry]["fields"] == output: - outputs[entry]["channels"].append(channel.split("/")[0]) - in_dict = True - if in_dict is False: - outputs[f"group{len(list(outputs))+1}"] = { - "channels": [channel.split("/")[0]], - "fields": output, - } - channels.append(channel.split("/")[0]) - -full_dict = { - "valid_fields": { - "ann": outputs, - }, - "valid_keys": {key: {"valid_channels": {"ann": channels}}}, -} -pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True) -with open(args.db_file, "w") as w: - json.dump(full_dict, w, indent=4) diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index 902ac4b..c505058 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -1,9 +1,10 @@ import argparse import logging -import pathlib +import logging.config import re import time import warnings +from pathlib import Path import numpy as np from dspeed import build_dsp @@ -32,6 +33,7 @@ def replace_list_with_array(dic): argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) +argparser.add_argument("--tier", help="Tier", type=str, required=True) argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[]) argparser.add_argument("--input", help="input file", type=str) @@ -40,35 +42,49 @@ def replace_list_with_array(dic): argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() -pathlib.Path(args.log).parent.mkdir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) -log = logging.getLogger(__name__) +configs = TextDB(args.configs, lazy=True) +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] +if args.tier in ["dsp", "psp"]: + config_dict = config_dict["tier_dsp"] +elif args.tier in ["ann", "pan"]: + config_dict = config_dict["tier_ann"] +else: + msg = f"Tier {args.tier} not supported" + raise ValueError(msg) + +channel_dict = config_dict["inputs"]["processing_chain"] +settings_dict = config_dict["options"].get("settings", {}) +if isinstance(settings_dict, str): + settings_dict = Props.read_from(settings_dict) +log_config = config_dict["options"]["logging"] + +Path(args.log).parent.mkdir(parents=True, exist_ok=True) +log_config = Props.read_from(log_config) +log_config["handlers"]["file"]["filename"] = args.log +logging.config.dictConfig(log_config) +log = logging.getLogger("test") meta = LegendMetadata(path=args.metadata) chan_map = meta.channelmap(args.timestamp, system=args.datatype) - -configs = TextDB(args.configs, lazy=True) -channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_dsp"][ - "inputs" -]["processing_chain"] - -channel_dict = { - f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file) - for chan, file in channel_dict.items() -} +if args.tier in ["ann", "pan"]: + channel_dict = { + f"ch{chan_map[chan].daq.rawid:07}/dsp": Props.read_from(file) + for chan, file in channel_dict.items() + } +else: + channel_dict = { + f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file) + for chan, file in channel_dict.items() + } db_files = [ - par_file for par_file in args.pars_file if pathlib.Path(par_file).suffix in (".json", ".yaml") + par_file for par_file in args.pars_file if Path(par_file).suffix in (".json", ".yaml", ".yml") ] database_dic = Props.read_from(db_files, subst_pathvar=True) database_dic = replace_list_with_array(database_dic) -pathlib.Path(args.output).parent.mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) rng = np.random.default_rng() rand_num = f"{rng.integers(0, 99999):05d}" @@ -83,42 +99,66 @@ def replace_list_with_array(dic): database=database_dic, chan_config=channel_dict, write_mode="r", - buffer_len=3200 if args.datatype == "cal" else 3200, - block_width=16, + buffer_len=settings_dict.get("buffer_len", 1000), + block_width=settings_dict.get("block_width", 16), ) log.info(f"build_dsp finished in {time.time()-start}") - -pathlib.Path(temp_output).rename(args.output) - -key = pathlib.Path(args.output).name.replace("-tier_dsp.lh5", "") - -raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] - -raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")] - -outputs = {} -channels = [] -for channel, chan_dict in channel_dict.items(): - output = chan_dict["outputs"] - in_dict = False - for entry in outputs: - if outputs[entry]["fields"] == output: - outputs[entry]["channels"].append(channel.split("/")[0]) - in_dict = True - if in_dict is False: - outputs[f"group{len(list(outputs))+1}"] = { - "channels": [channel.split("/")[0]], - "fields": output, - } - channels.append(channel.split("/")[0]) - -full_dict = { - "valid_fields": { - "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}}, - "dsp": outputs, - }, - "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}}, -} -pathlib.Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) +Path(temp_output).rename(args.output) + +key = Path(args.output).name.replace(f"-tier_{args.tier}.lh5", "") + +if args.tier in ["dsp", "psp"]: + + raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] + raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")] + + outputs = {} + channels = [] + for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] + in_dict = False + for entry in outputs: + if outputs[entry]["fields"] == output: + outputs[entry]["channels"].append(channel.split("/")[0]) + in_dict = True + if in_dict is False: + outputs[f"group{len(list(outputs))+1}"] = { + "channels": [channel.split("/")[0]], + "fields": output, + } + channels.append(channel.split("/")[0]) + + full_dict = { + "valid_fields": { + "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}}, + "dsp": outputs, + }, + "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}}, + } +else: + outputs = {} + channels = [] + for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] + in_dict = False + for entry in outputs: + if outputs[entry]["fields"] == output: + outputs[entry]["channels"].append(channel.split("/")[0]) + in_dict = True + if in_dict is False: + outputs[f"group{len(list(outputs))+1}"] = { + "channels": [channel.split("/")[0]], + "fields": output, + } + channels.append(channel.split("/")[0]) + + full_dict = { + "valid_fields": { + "ann": outputs, + }, + "valid_keys": {key: {"valid_channels": {"ann": channels}}}, + } + +Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.db_file, full_dict) diff --git a/scripts/build_hit.py b/scripts/build_hit.py index 8e2da80..3aba4aa 100644 --- a/scripts/build_hit.py +++ b/scripts/build_hit.py @@ -3,7 +3,7 @@ import time from pathlib import Path -from legendmeta import TextDB +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 from pygama.hit.build_hit import build_hit @@ -13,12 +13,13 @@ argparser.add_argument("--pars_file", help="hit pars file", nargs="*") argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) +argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--tier", help="Tier", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--output", help="output file", type=str) argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() @@ -41,21 +42,27 @@ msg = "unknown tier" raise ValueError(msg) -pars_dict = Props.read_from(args.pars_file) +meta = LegendMetadata(path=args.metadata) +chan_map = meta.channelmap(args.timestamp, system=args.datatype) +pars_dict = Props.read_from(args.pars_file) pars_dict = {chan: chan_dict["pars"] for chan, chan_dict in pars_dict.items()} hit_dict = {} channels_present = lh5.ls(args.input) for channel in pars_dict: chan_pars = pars_dict[channel].copy() - if channel in channel_dict: - cfg_dict = Props.read_from(channel_dict[channel]) - Props.add_to(cfg_dict, chan_pars) - chan_pars = cfg_dict - - if channel in channels_present: - hit_dict[f"{channel}/dsp"] = chan_pars + try: + detector = chan_map.map("daq.rawid")[int(channel[2:])].name + if detector in channel_dict: + cfg_dict = Props.read_from(channel_dict[detector]) + Props.add_to(cfg_dict, chan_pars) + chan_pars = cfg_dict + + if channel in channels_present: + hit_dict[f"{channel}/dsp"] = chan_pars + except KeyError: + pass t_start = time.time() Path(args.output).parent.mkdir(parents=True, exist_ok=True) @@ -79,7 +86,7 @@ } hit_channels.append(channel) -key = Path(args.output).replace(f"-tier_{args.tier}.lh5", "") +key = args.output.replace(f"-tier_{args.tier}.lh5", "") full_dict = { "valid_fields": {args.tier: hit_outputs}, diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py index 2ceb3ab..faa39d6 100644 --- a/scripts/build_tcm.py +++ b/scripts/build_tcm.py @@ -1,5 +1,6 @@ import argparse import logging +import logging.config from pathlib import Path import lgdo.lh5 as lh5 @@ -18,13 +19,20 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["tier_tcm"] +log_config = config_dict["options"]["logging"] + +Path(args.log).parent.mkdir(parents=True, exist_ok=True) +log_config = Props.read_from(log_config) +log_config["handlers"]["file"]["filename"] = args.log +logging.config.dictConfig(log_config) +log = logging.getLogger("test") Path(args.output).parent.mkdir(parents=True, exist_ok=True) -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -channel_dict = configs["snakemake_rules"]["tier_tcm"]["inputs"] -settings = Props.read_from(channel_dict["config"]) + +settings = Props.read_from(config_dict["inputs"]["config"]) rng = np.random.default_rng() temp_output = f"{args.output}.{rng.integers(0, 99999):05d}" diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index 5fb6d68..bed04d2 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -76,7 +76,7 @@ def replace_path(d, old_path, new_path): Path(args.output).parent.mkdir(parents=True, exist_ok=True) -if file_extension == ".json" or file_extension == ".yaml" or file_extension == ".yml": +if file_extension in (".json", ".yaml", ".yml"): out_dict = {} for channel in channel_files: if Path(channel).suffix == file_extension: @@ -92,9 +92,7 @@ def replace_path(d, old_path, new_path): msg = "Output file extension does not match input file extension" raise RuntimeError(msg) - Props.write_to(temp_output, out_dict, "json") - - Path(temp_output).rename(out_file) + Props.write_to(out_file, out_dict) elif file_extension == ".pkl": out_dict = {} diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index b584648..b8d9a71 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -1,5 +1,6 @@ import argparse import logging +import logging.config import pickle as pkl from pathlib import Path @@ -29,27 +30,24 @@ argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str, required=False) args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) - sto = lh5.LH5Store() -log = logging.getLogger(__name__) + +configs = LegendMetadata(path=args.configs) +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["pars_dsp_tau"] +log_config = config_dict["options"]["logging"] + +Path(args.log).parent.mkdir(parents=True, exist_ok=True) +log_config = Props.read_from(log_config) +log_config["handlers"]["file"]["filename"] = args.log +logging.config.dictConfig(log_config) +log = logging.getLogger("test") meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid:07}" -configs = LegendMetadata(path=args.configs) -config_dict = configs.on(args.timestamp, system=args.datatype) -channel_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["processing_chain"][ - args.channel -] -kwarg_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["tau_config"][args.channel] +channel_dict = config_dict["inputs"]["processing_chain"][args.channel] +kwarg_dict = config_dict["inputs"]["tau_config"][args.channel] kwarg_dict = Props.read_from(kwarg_dict) diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 169b560..8625ed3 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -27,7 +27,7 @@ def get_results_dict(lq_class): "cal_energy_param": lq_class.cal_energy_param, "DEP_means": lq_class.timecorr_df.to_dict("index"), "rt_correction": lq_class.dt_fit_pars, - "cut_fit_pars": lq_class.cut_fit_pars, + "cut_fit_pars": lq_class.cut_fit_pars.to_dict(), "cut_value": lq_class.cut_val, "sfs": lq_class.low_side_sf.to_dict("index"), } From a2f2d7eb7d850f7ae90c2c75835521fd96845a06 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:44:30 +0000 Subject: [PATCH 15/17] style: pre-commit fixes --- rules/filelist_gen.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index d0356a8..c90c570 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -220,7 +220,7 @@ def get_filelist( wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None ): file_selection = wildcards.label.split("-", 1)[0] - keypart = f'-{wildcards.label.split("-", 1)[1]}' # remove the file selection from the keypart + keypart = f'-{wildcards.label.split("-",1)[1]}' # remove the file selection from the keypart analysis_runs, ignore_keys = get_analysis_runs( ignore_keys_file, analysis_runs_file, file_selection ) From ce2ad8526e7aad37ec8ff5e38e982d45daa3f120 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 5 Dec 2024 14:46:29 +0100 Subject: [PATCH 16/17] add isotopes where lines are from --- scripts/pars_pht_partcal.py | 56 ++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index a2d74e4..7b6a4ed 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -166,34 +166,34 @@ def calibrate_partition( # calibrate pk_pars = [ - # (238.632, (10, 10), pgf.gauss_on_step), #double line - # (241.0, (10, 10), pgf.gauss_on_step), #double line - (277.371, (10, 7), pgf.gauss_on_linear), - (288.2, (7, 10), pgf.gauss_on_linear), - (300.1, (10, 10), pgf.gauss_on_linear), - (453.0, (10, 10), pgf.gauss_on_linear), - # (511, (20, 20), pgf.gauss_on_step), double line - (549.8, (10, 10), pgf.gauss_on_linear), - (583.187, (20, 20), pgf.hpge_peak), - (727.330, (20, 20), pgf.hpge_peak), - (763.13, (20, 10), pgf.gauss_on_linear), - (785.37, (10, 20), pgf.gauss_on_linear), - (860.557, (20, 20), pgf.hpge_peak), - (893.408, (20, 20), pgf.gauss_on_linear), - (927.6, (20, 20), pgf.gauss_on_linear), - (952.120, (20, 20), pgf.gauss_on_linear), - (982.7, (20, 20), pgf.gauss_on_linear), - (1078.62, (20, 7), pgf.gauss_on_linear), - (1093.9, (7, 20), pgf.gauss_on_linear), - (1512.7, (20, 20), pgf.gauss_on_linear), - (1592.511, (20, 20), pgf.hpge_peak), - (1620.50, (20, 20), pgf.hpge_peak), - (1679.7, (20, 20), pgf.gauss_on_linear), - (1806.0, (20, 20), pgf.gauss_on_linear), - (2103.511, (20, 20), pgf.hpge_peak), - (2614.511, (40, 20), pgf.hpge_peak), - (3125.511, (20, 20), pgf.gauss_on_linear), - (3197.7, (20, 20), pgf.gauss_on_linear), + # (238.632, (10, 10), pgf.gauss_on_step), #double line, Pb-212 + # (240.986, (10, 10), pgf.gauss_on_step), #double line, Ra-224 + (277.371, (10, 7), pgf.gauss_on_linear), # Tl-208 + (288.2, (7, 10), pgf.gauss_on_linear), # Bi-212 + (300.087, (10, 10), pgf.gauss_on_linear), # Pb-212 + (452.98, (10, 10), pgf.gauss_on_linear), # Bi-212 + # (511, (20, 20), pgf.gauss_on_step), double line, #e+e- + (549.73, (10, 10), pgf.gauss_on_linear), # Rn-220 + (583.187, (20, 20), pgf.hpge_peak), # Tl-208 + (727.330, (20, 20), pgf.hpge_peak), # Bi-212 + (763.13, (20, 10), pgf.gauss_on_linear), # Tl-208 + (785.37, (10, 20), pgf.gauss_on_linear), # Bi-212 + (860.557, (20, 20), pgf.hpge_peak), # Tl-208 + (893.408, (20, 20), pgf.gauss_on_linear), # Bi-212 + (927.6, (20, 20), pgf.gauss_on_linear), # Tl-208 + (952.120, (20, 20), pgf.gauss_on_linear), # Bi-212 + (982.7, (20, 20), pgf.gauss_on_linear), # Tl-208 + (1078.62, (20, 7), pgf.gauss_on_linear), # Bi-212 + (1093.9, (7, 20), pgf.gauss_on_linear), # Tl-208 + (1512.7, (20, 20), pgf.gauss_on_linear), # Bi-212 + (1592.511, (20, 20), pgf.hpge_peak), # Tl-208 DEP + (1620.50, (20, 20), pgf.hpge_peak), # Bi-212 + (1679.7, (20, 20), pgf.gauss_on_linear), # Bi-212 + (1806.0, (20, 20), pgf.gauss_on_linear), # Bi-212 + (2103.511, (20, 20), pgf.hpge_peak), # Tl-208 SEP + (2614.511, (40, 20), pgf.hpge_peak), # Tl-208 + (3125.511, (20, 20), pgf.gauss_on_linear), # Summation + (3197.7, (20, 20), pgf.gauss_on_linear), # Summation (3475.1, (20, 20), pgf.gauss_on_linear), ] From 2deac35ff8c30a90eb13835d7f8e0e447ef803e4 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 5 Dec 2024 21:03:13 +0100 Subject: [PATCH 17/17] choose ctc based on no_ctc energy instead --- scripts/pars_hit_ecal.py | 2 +- scripts/pars_pht_partcal.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index c94041d..43ba644 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -636,7 +636,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): hit_dict.update( { cal_energy_param.replace("_ctc", ""): { - "expression": f"where({cal_energy_param}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", + "expression": f"where({cal_energy_param.replace('ctc','noctc')}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", "parameters": {}, } } diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 7b6a4ed..a454d76 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -308,7 +308,7 @@ def calibrate_partition( cal_dicts, { cal_energy_param.replace("_ctc", ""): { - "expression": f"where({cal_energy_param}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", + "expression": f"where({cal_energy_param.replace('ctc', 'noctc')}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", "parameters": {}, } },