diff --git a/.ruff.toml b/.ruff.toml index 29f8014..8b4d420 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -12,7 +12,7 @@ lint.select = [ "PIE", # flake8-pie "PL", # pylint "PT", # flake8-pytest-style - # "PTH", # flake8-use-pathlib + "PTH", # flake8-use-pathlib "RET", # flake8-return "RUF", # Ruff-specific "SIM", # flake8-simplify diff --git a/README.md b/README.md index 2459337..3565167 100644 --- a/README.md +++ b/README.md @@ -3,115 +3,3 @@ Implementation of an automatic data processing flow for L200 data, based on [Snakemake](https://snakemake.readthedocs.io/). - - -## Configuration - -Data processing resources are configured via a single site-dependent (and -possibly user-dependent) configuration file, named `config.json` in the -following. You may choose an arbitrary name, though. - -Use the included [templates/config.json](templates/config.json) as a template -and adjust the data base paths as necessary. Note that, when running Snakemake, -the default path to the config file is `./config.json`. - - -## Key-Lists - -Data generation is based on key-lists, which are flat text files -(extension ".keylist") containing one entry of the form -`{experiment}-{period}-{run}-{datatype}-{timestamp}` per line. - -Key-lists can be auto-generated based on the available DAQ files -using Snakemake targets of the form - -* `all-{experiment}.keylist` -* `all-{experiment}-{period}.keylist` -* `all-{experiment}-{period}-{run}.keylist` -* `all-{experiment}-{period}-{run}-{datatype}.keylist` - -which will generate the list of available file keys for all l200 files, resp. -a specific period, or a specific period and run, etc. - -For example: -```shell -$ snakemake all-l200-myper.keylist -``` -will generate a key-list with all files regarding period `myper`. - - -## File-Lists - -File-lists are flat files listing output files that should be generated, -with one file per line. A file-list will typically be generated for a given -data tier from a key-list, using the Snakemake targets of the form -`{label}-{tier}.filelist` (generated from `{label}.keylist`). - -For file lists based on auto-generated key-lists like -`all-{experiment}-{period}-{tier}.filelist`, the corresponding key-list -(`all-{experiment}-{period}.keylist` in this case) will be created -automatically, if it doesn't exist. - -Example: -```shell -$ snakemake all-mydet-mymeas-tier2.filelist -``` - -File-lists may of course also be derived from custom keylists, generated -manually or by other means, e.g. `my-dataset-raw.filelist` will be -generated from `my-dataset.keylist`. - - -## Main output generation - -Usually, the main output will be determined by a file-list, resp. a key-list -and data tier. The special output target `{label}-{tier}.gen` is used to -generate all files listed in `{label}-{tier}.filelist`. After the files -are created, the empty file `{label}-{tier}.filelist` will be created to -mark the successful data production. - -Snakemake targets like `all-{experiment}-{period}-{tier}.gen` may be used -to automatically generate key-lists and file-lists (if not already present) -and produce all possible output for the given data tier, based on available -tier0 files which match the target. - -Example: -```shell -$ snakemake all-mydet-mymeas-tier2.gen -``` -Targets like `my-dataset-raw.gen` (derived from a key-list -`my-dataset.keylist`) are of course allowed as well. - - -## Monitoring - -Snakemake supports monitoring by connecting to a -[panoptes](https://github.com/panoptes-organization/panoptes) server. - -Run (e.g.) -```shell -$ panoptes --port 5000 -``` -in the background to run a panoptes server instance, which comes with a -GUI that can be accessed with a web-brower on the specified port. - -Then use the Snakemake option `--wms-monitor` to instruct Snakemake to push -progress information to the panoptes server: -```shell -snakemake --wms-monitor http://127.0.0.1:5000 [...] -``` - -## Using software containers - -This dataflow doesn't use Snakemake's internal Singularity support, but -instead supports Singularity containers via -[`venv`](https://github.com/oschulz/singularity-venv) environments -for greater control. - -To use this, the path to `venv` and the name of the environment must be set -in `config.json`. - -This is only relevant then running Snakemake *outside* of the software -container, e.g. then using a batch system (see below). If Snakemake -and the whole workflow is run inside of a container instance, no -container-related settings in `config.json` are required. diff --git a/Snakefile b/Snakefile index 017f0b1..0174479 100644 --- a/Snakefile +++ b/Snakefile @@ -10,7 +10,7 @@ This includes: - the same for partition level tiers """ -import pathlib +from pathlib import Path import os import json import sys @@ -20,8 +20,8 @@ from collections import OrderedDict import logging import scripts.util as ds -from scripts.util.pars_loading import pars_catalog -from scripts.util.patterns import get_pattern_tier_raw +from scripts.util.pars_loading import ParsCatalog +from scripts.util.patterns import get_pattern_tier from scripts.util.utils import ( subst_vars_in_snakemake_config, runcmd, @@ -31,6 +31,7 @@ from scripts.util.utils import ( metadata_path, tmp_log_path, pars_path, + det_status_path, ) # Set with `snakemake --configfile=/path/to/your/config.json` @@ -43,8 +44,9 @@ setup = config["setups"]["l200"] configs = config_path(setup) chan_maps = chan_map_path(setup) meta = metadata_path(setup) +det_status = det_status_path(setup) swenv = runcmd(setup) -part = ds.dataset_file(setup, os.path.join(configs, "partitions.json")) +part = ds.CalGrouping(setup, Path(det_status) / "cal_partitions.yaml") basedir = workflow.basedir @@ -66,38 +68,13 @@ include: "rules/psp.smk" include: "rules/hit.smk" include: "rules/pht.smk" include: "rules/pht_fast.smk" +include: "rules/ann.smk" include: "rules/evt.smk" include: "rules/skm.smk" include: "rules/blinding_calibration.smk" include: "rules/qc_phy.smk" -# Log parameter catalogs in validity.jsonl files -hit_par_cat_file = os.path.join(pars_path(setup), "hit", "validity.jsonl") -if os.path.isfile(hit_par_cat_file): - os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl")) -pathlib.Path(os.path.dirname(hit_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(hit_par_catalog, hit_par_cat_file) - -pht_par_cat_file = os.path.join(pars_path(setup), "pht", "validity.jsonl") -if os.path.isfile(pht_par_cat_file): - os.remove(os.path.join(pars_path(setup), "pht", "validity.jsonl")) -pathlib.Path(os.path.dirname(pht_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(pht_par_catalog, pht_par_cat_file) - -dsp_par_cat_file = os.path.join(pars_path(setup), "dsp", "validity.jsonl") -if os.path.isfile(dsp_par_cat_file): - os.remove(dsp_par_cat_file) -pathlib.Path(os.path.dirname(dsp_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(dsp_par_catalog, dsp_par_cat_file) - -psp_par_cat_file = os.path.join(pars_path(setup), "psp", "validity.jsonl") -if os.path.isfile(psp_par_cat_file): - os.remove(psp_par_cat_file) -pathlib.Path(os.path.dirname(psp_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(psp_par_catalog, psp_par_cat_file) - - localrules: gen_filelist, autogen_output, @@ -111,36 +88,36 @@ onstart: shell('{swenv} python3 -B -c "import ' + pkg + '"') # Log parameter catalogs in validity.jsonl files - hit_par_cat_file = os.path.join(pars_path(setup), "hit", "validity.jsonl") - if os.path.isfile(hit_par_cat_file): - os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl")) - pathlib.Path(os.path.dirname(hit_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(hit_par_catalog, hit_par_cat_file) - - pht_par_cat_file = os.path.join(pars_path(setup), "pht", "validity.jsonl") - if os.path.isfile(pht_par_cat_file): - os.remove(os.path.join(pars_path(setup), "pht", "validity.jsonl")) - pathlib.Path(os.path.dirname(pht_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(pht_par_catalog, pht_par_cat_file) - - dsp_par_cat_file = os.path.join(pars_path(setup), "dsp", "validity.jsonl") - if os.path.isfile(dsp_par_cat_file): - os.remove(dsp_par_cat_file) - pathlib.Path(os.path.dirname(dsp_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(dsp_par_catalog, dsp_par_cat_file) - - psp_par_cat_file = os.path.join(pars_path(setup), "psp", "validity.jsonl") - if os.path.isfile(psp_par_cat_file): - os.remove(psp_par_cat_file) - pathlib.Path(os.path.dirname(psp_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(psp_par_catalog, psp_par_cat_file) + hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml" + if hit_par_cat_file.is_file(): + hit_par_cat_file.unlink() + Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file) + + pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml" + if pht_par_cat_file.is_file(): + pht_par_cat_file.unlink() + Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(pht_par_catalog, pht_par_cat_file) + + dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml" + if dsp_par_cat_file.is_file(): + dsp_par_cat_file.unlink() + Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file) + + psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml" + if psp_par_cat_file.is_file(): + psp_par_cat_file.unlink() + Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file) onsuccess: from snakemake.report import auto_report rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}" - pathlib.Path(rep_dir).mkdir(parents=True, exist_ok=True) + Path(rep_dir).mkdir(parents=True, exist_ok=True) # auto_report(workflow.persistence.dag, f"{rep_dir}/report.html") with open(os.path.join(rep_dir, "dag.txt"), "w") as f: @@ -157,15 +134,15 @@ onsuccess: if os.path.isfile(file): os.remove(file) - # remove filelists - files = glob.glob(os.path.join(filelist_path(setup), "*")) - for file in files: - if os.path.isfile(file): - os.remove(file) - if os.path.exists(filelist_path(setup)): - os.rmdir(filelist_path(setup)) + # # remove filelists + # files = glob.glob(os.path.join(filelist_path(setup), "*")) + # for file in files: + # if os.path.isfile(file): + # os.remove(file) + # if os.path.exists(filelist_path(setup)): + # os.rmdir(filelist_path(setup)) - # remove logs + # remove logs files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log")) for file in files: if os.path.isfile(file): @@ -190,16 +167,17 @@ rule gen_filelist: lambda wildcards: get_filelist( wildcards, setup, - get_pattern_tier_raw(setup), - ignore_keys_file=os.path.join(configs, "ignore_keys.keylist"), - analysis_runs_file=os.path.join(configs, "analysis_runs.json"), + get_pattern_tier(setup, "raw", check_in_cycle=False), + ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", + analysis_runs_file=Path(det_status) / "runlists.yaml", ), output: - os.path.join(filelist_path(setup), "{label}-{tier}.filelist"), + temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"), run: if len(input) == 0: print( - "WARNING: No files found for the given pattern\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen" + f"WARNING: No files found for the given pattern:{wildcards.label}", + "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen", ) with open(output[0], "w") as f: for fn in input: diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..9be493d --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,21 @@ +SHELL := /bin/bash +SOURCEDIR = source +BUILDDIR = build + +all: apidoc + sphinx-build -M html "$(SOURCEDIR)" "$(BUILDDIR)" -W --keep-going + +apidoc: clean-apidoc + sphinx-apidoc \ + --private \ + --module-first \ + --force \ + --output-dir "$(SOURCEDIR)/api" \ + ../scripts \ + ../rules + +clean-apidoc: + rm -rf "$(SOURCEDIR)/api" + +clean: clean-apidoc + rm -rf "$(BUILDDIR)" diff --git a/docs/source/developer.rst b/docs/source/developer.rst new file mode 100644 index 0000000..b6d7560 --- /dev/null +++ b/docs/source/developer.rst @@ -0,0 +1,15 @@ +Developers Guide +=============== + +Snakemake is configured around a series of rules which specify how to generate a file/files from a set of input files. +These rules are defined in the ``Snakefile`` and in the files in the ``rules`` directory. +In general the structure is that a series of rules are defined to run on some calibration data generation +a final ``par_{tier}.yaml`` file at the end which can be used by the ``tier``` rule to generate all the files in the tier. +For most rules there are 2 versions the basic version and the partition version where the first uses a single run +while the latter will group many runs together. +This grouping is defined in the ``cal_grouping.yaml`` file in the `legend-datasets `_ repository. + +Each rule has specified its inputs and outputs along with how to generate which can be +a shell command or a call to a python function. These scripts are stored in the ``scripts``` directory. +Additional parameters can also be defined. +Full details can be found at `snakemake https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html)`_. diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..8534e71 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,41 @@ +Welcome to legend-dataflow's documentation! +================================== + +*legend-dataflow* is a Python package based on Snakemake ``_ +for running the data production of LEGEND. +It is designed to calibrate and optimise hundreds of channels in parallel before +bringing them all together to process the data. It takes as an input the metadata +at `legend metadata `_. + +Getting started +--------------- + +It is recommended to install and use the package through the `legend-prodenv `_. + +Next steps +---------- + +.. toctree:: + :maxdepth: 1 + + Package API reference + +.. toctree:: + :maxdepth: 1 + + tutorials + +.. toctree:: + :maxdepth: 1 + :caption: Related projects + + LEGEND Data Objects + Decoding Digitizer Data + Digital Signal Processing + Pygama + +.. toctree:: + :maxdepth: 1 + :caption: Development + + Source Code diff --git a/docs/source/user_manual.rst b/docs/source/user_manual.rst new file mode 100644 index 0000000..fb3e81b --- /dev/null +++ b/docs/source/user_manual.rst @@ -0,0 +1,98 @@ +Configuration +============= + +Data processing resources are configured via a single site-dependent (and +possibly user-dependent) configuration file, generally named ``config.json``. +Although you can choose any arbitrary name. + +A template for this file is located at ``templates/config.json`` +which can be copied to the working directory +the paths adjusted as necessary. Note that, when running Snakemake, +the default path to the config file is ``./config.json``. + +Profiles +======== + +A number of profiles are also included in the ``profiles`` directory. If none are specified, +the default profile is used. The profile can be specified by using the ``--profile`` option +when running Snakemake. These control how many jobs are run simultaneously, based on how many cores +are specified and the memory constraints of the system. A full list of all the options +that can be specified to snakemake can be found at `snakemake `_. + + +Running the Dataflow +==================== + +To run the dataflow at the most basic level all that is necassary is to tell snakemake the target file +generation. In a simple case this may just be a single file e.g. +```shell +$ snakemake /data2/public/prodenv/prod-blind/ref-v1.0.0/generated/tier/dsp/p03/r000/l200-p03-r000-cal-20230401T000000Z-tier_dsp.lh5 +``` +This would generate the file and all the files that are required to generate it. +In general though we want to generate a large number of files, and we can do this using the ``gen`` target. + +Main output generation +====================== + +Usually, the main output will be determined by a file-list. +The special output target ``{label}-{tier}.gen`` is used to +generate all files that follow the label up to the specified tier. +The label is composed of the following parts: +- the filelist designator: in most cases this will be ``all``, but other options are specified in the ``runlists.yaml`` file +in the `legend-datasets `_ repository. +- experiment: the experiment name i.e. l200 +- period: the period of the data e.g. p03 +- run: the run number e.g. r000 +- datatype: the data type e.g. cal +- timestamp: the timestamp of the data e.g. 20230401T000000Z + +Example: +```shell +$ snakemake all-l200-p03-r001-cal-20230401T000000Z-dsp.gen +``` + +You can specify as many or as few of these as they like e.g. ``all-l200-p03-dsp.gen`` +If you want to specify a lower part of the label but leave a higher part free, +you can use the ``*``` character e.g. ``all-l200-p03-*-cal-dsp.gen`` . +Additionally if you want to specify multiple options for a part of the label you can use the ``_`` character between +e.g. ``all-l200-p03-r000_r001-dsp.gen``. + +After the files +are created, the empty file ``{label}-{tier}.gen```` will be created to +mark the successful data production. + + +Monitoring +========== + +Snakemake supports monitoring by connecting to a +`panoptes `_ server. + +Run (e.g.) +```shell +$ panoptes --port 5000 +``` +in the background to run a panoptes server instance, which comes with a +GUI that can be accessed with a web-brower on the specified port. + +Then use the Snakemake option ``--wms-monitor`` to instruct Snakemake to push +progress information to the panoptes server: +```shell +snakemake --wms-monitor http://127.0.0.1:5000 [...] +``` + +Using software containers +========================= + +This dataflow doesn't use Snakemake's internal Singularity support, but +instead supports Singularity containers via +`venv `_ environments +for greater control. + +To use this, the path to ``venv`` and the name of the environment must be set +in ``config.json``. + +This is only relevant then running Snakemake *outside* of the software +container, e.g. when using a batch system (see below). If Snakemake +and the whole workflow is run inside of a container instance, no +container-related settings in ``config.json`` are required. diff --git a/rules/ann.smk b/rules/ann.smk new file mode 100644 index 0000000..15558ae --- /dev/null +++ b/rules/ann.smk @@ -0,0 +1,75 @@ +""" +Snakemake rules for processing ann tier. This is done only for the coax detectors +to apply the ann and risetime cuts for psd. + +""" + +from scripts.util.patterns import ( + get_pattern_tier, + get_pattern_log, + get_pattern_pars, +) + + +rule build_ann: + input: + dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), + pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + output: + tier_file=get_pattern_tier(setup, "ann", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(setup, "ann_db"), + log: + get_pattern_log(setup, "tier_ann"), + group: + "tier-ann" + resources: + runtime=300, + mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/build_dsp.py')} " + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + f"--tier ann " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--input {input.dsp_file} " + "--output {output.tier_file} " + "--db_file {output.db_file} " + "--pars_file {input.pars_file} " + + +rule build_pan: + input: + dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False), + pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + output: + tier_file=get_pattern_tier(setup, "pan", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(setup, "pan_db"), + log: + get_pattern_log(setup, "tier_pan"), + group: + "tier-ann" + resources: + runtime=300, + mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/build_dsp.py')} " + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + f"--tier pan " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--input {input.dsp_file} " + "--output {output.tier_file} " + "--db_file {output.db_file} " + "--pars_file {input.pars_file} " diff --git a/rules/blinding_calibration.smk b/rules/blinding_calibration.smk index ef0a11e..85ee2f6 100644 --- a/rules/blinding_calibration.smk +++ b/rules/blinding_calibration.smk @@ -5,12 +5,13 @@ Snakemake rules for calibrating daq energy for blinding. Two steps: """ from scripts.util.patterns import ( - get_pattern_par_raw, + get_pattern_pars, get_pattern_plts, get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, ) +from pathlib import Path rule build_blinding_calibration: @@ -19,9 +20,8 @@ rule build_blinding_calibration: if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data """ input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" - ), + files=Path(filelist_path(setup)) + / "all-{experiment}-{period}-{run}-cal-raw.filelist", params: timestamp="{timestamp}", datatype="cal", @@ -57,7 +57,7 @@ rule build_plts_blinding: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, name="blindcal", ), @@ -79,7 +79,7 @@ rule build_pars_blinding: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, name="blindcal", ), diff --git a/rules/blinding_check.smk b/rules/blinding_check.smk index 653eb3f..eb3407d 100644 --- a/rules/blinding_check.smk +++ b/rules/blinding_check.smk @@ -8,10 +8,11 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_raw, + get_pattern_pars, get_pattern_plts, get_pattern_pars, ) +from pathlib import Path rule build_blinding_check: @@ -20,9 +21,8 @@ rule build_blinding_check: if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data """ input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" - ), + files=Path(filelist_path(setup)) + / "all-{experiment}-{period}-{run}-cal-raw.filelist", par_file=get_blinding_curve_file, params: timestamp="{timestamp}", @@ -59,7 +59,7 @@ rule build_plts_raw: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, ), output: @@ -80,7 +80,7 @@ rule build_pars_raw: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, ), plts=get_pattern_plts( diff --git a/rules/chanlist_gen.smk b/rules/chanlist_gen.smk index 1dc4957..820d0fa 100644 --- a/rules/chanlist_gen.smk +++ b/rules/chanlist_gen.smk @@ -13,7 +13,7 @@ from scripts.util.utils import filelist_path, runcmd def get_par_chanlist( - setup, keypart, tier, basedir, configs, chan_maps, name=None, extension="json" + setup, keypart, tier, basedir, det_status, chan_maps, name=None, extension="yaml" ): tier_pattern = "((?P[^_]+)(\\_(?P[^_]+)(\\_(?P[^_]+)?)?)?)?" keypart_rx = re.compile(tier_pattern) @@ -28,7 +28,7 @@ def get_par_chanlist( f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}", ) - cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --configs {configs}" + cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}" cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}" os.system(cmd) @@ -42,7 +42,7 @@ def get_par_chanlist( return filenames -def get_plt_chanlist(setup, keypart, tier, basedir, configs, chan_maps, name=None): +def get_plt_chanlist(setup, keypart, tier, basedir, det_status, chan_maps, name=None): key = ChannelProcKey.parse_keypart(keypart) output_file = os.path.join( @@ -50,7 +50,7 @@ def get_plt_chanlist(setup, keypart, tier, basedir, configs, chan_maps, name=Non f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}", ) - cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --configs {configs}" + cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}" cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}" os.system(cmd) diff --git a/rules/common.smk b/rules/common.smk index c74f514..6ba4654 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -2,16 +2,17 @@ Helper functions for running data production """ -import pathlib, os +from pathlib import Path from scripts.util.patterns import ( par_overwrite_path, - par_raw_path, + get_pars_path, get_pattern_unsorted_data, get_pattern_tier_daq, - get_pattern_tier_raw, + get_pattern_tier, get_pattern_plts_tmp_channel, ) from scripts.util import ProcessingFileKey +from scripts.util.catalog import Catalog from scripts.util import utils @@ -21,8 +22,8 @@ def ro(path): def get_blinding_curve_file(wildcards): """func to get the blinding calibration curves from the overrides""" - par_files = pars_catalog.get_calib_files( - Path(par_overwrite_path(setup)) / "raw" / "validity.jsonl", + par_files = Catalog.get_files( + Path(par_overwrite_path(setup)) / "raw" / "validity.yaml", wildcards.timestamp, ) if isinstance(par_files, str): @@ -36,13 +37,13 @@ def get_blinding_curve_file(wildcards): def get_blinding_check_file(wildcards): """func to get the right blinding check file""" - par_files = pars_catalog.get_calib_files( - Path(par_raw_path(setup)) / "validity.jsonl", wildcards.timestamp + par_files = Catalog.get_files( + Path(get_pars_path(setup, "raw")) / "validity.yaml", wildcards.timestamp ) if isinstance(par_files, str): - return str(Path(par_raw_path(setup)) / par_files) + return Path(get_pars_path(setup, "raw")) / par_files else: - return [str(Path(par_raw_path(setup)) / par_file) for par_file in par_files] + return [Path(get_pars_path(setup, "raw")) / par_file for par_file in par_files] def set_last_rule_name(workflow, new_name): @@ -70,35 +71,38 @@ def set_last_rule_name(workflow, new_name): workflow.check_localrules() -def get_svm_file(wildcards, tier, name): - par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl") - pars_files_overwrite = pars_catalog.get_calib_files( - par_overwrite_file, wildcards.timestamp +def get_input_par_file(wildcards, tier, name): + par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" + pars_files_overwrite = Catalog.get_files( + par_overwrite_file, + wildcards.timestamp, ) for pars_file in pars_files_overwrite: - if name in pars_file: - return os.path.join(par_overwrite_path(setup), tier, pars_file) + if name in str(pars_file): + return Path(par_overwrite_path(setup)) / tier / pars_file raise ValueError(f"Could not find model in {pars_files_overwrite}") def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): - par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl") + par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" if timestamp is not None: - pars_files_overwrite = pars_catalog.get_calib_files( - par_overwrite_file, timestamp + pars_files_overwrite = Catalog.get_files( + par_overwrite_file, + timestamp, ) else: - pars_files_overwrite = pars_catalog.get_calib_files( - par_overwrite_file, wildcards.timestamp + pars_files_overwrite = Catalog.get_files( + par_overwrite_file, + wildcards.timestamp, ) if name is None: - fullname = f"{tier}-overwrite.json" + fullname = f"{tier}-overwrite.yaml" else: - fullname = f"{tier}_{name}-overwrite.json" + fullname = f"{tier}_{name}-overwrite.yaml" out_files = [] for pars_file in pars_files_overwrite: - if fullname in pars_file: - out_files.append(os.path.join(par_overwrite_path(setup), tier, pars_file)) + if fullname in str(pars_file): + out_files.append(Path(par_overwrite_path(setup)) / tier / pars_file) if len(out_files) == 0: raise ValueError(f"Could not find name in {pars_files_overwrite}") else: @@ -114,4 +118,4 @@ def get_tier_pattern(tier): elif tier == "raw": return get_pattern_tier_daq(setup) else: - return get_pattern_tier_raw(setup) + return get_pattern_tier(setup, "raw", check_in_cycle=False) diff --git a/rules/dsp.smk b/rules/dsp.smk index 661a990..7ae67a7 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -6,31 +6,33 @@ Snakemake rules for processing dsp tier. This is done in 4 steps: - running dsp over all channels using par file """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import par_dsp_path +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path +from scripts.util.create_pars_keylist import ParsKeyResolve from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_dsp, get_pattern_plts, - get_pattern_tier_raw, - get_pattern_tier_tcm, get_pattern_tier, get_pattern_pars_tmp, get_pattern_log, get_pattern_pars, - get_pattern_pars_overwrite, - get_pattern_pars_svm, ) -dsp_par_catalog = pars_key_resolve.get_par_catalog( +dsp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_dsp"], "lar": ["par_dsp"]}, ) +dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml" +if dsp_par_cat_file.is_file(): + dsp_par_cat_file.unlink() +Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file) + rule build_pars_dsp_tau: input: @@ -56,13 +58,14 @@ rule build_pars_dsp_tau: "{basedir}/../scripts/pars_dsp_tau.py " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " "--plot_path {output.plots} " "--output_file {output.decay_const} " "--pulser_file {input.pulser} " - "--raw_files {input.files}" + "--raw_files {input.files} " rule build_pars_event_selection: @@ -91,6 +94,7 @@ rule build_pars_event_selection: "{basedir}/../scripts/pars_dsp_event_selection.py " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -130,6 +134,7 @@ rule build_pars_dsp_nopt: "--database {input.database} " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -173,6 +178,7 @@ rule build_pars_dsp_dplms: "--inplots {input.inplots} " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -208,6 +214,7 @@ rule build_pars_dsp_eopt: "{basedir}/../scripts/pars_dsp_eopt.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -221,14 +228,16 @@ rule build_pars_dsp_eopt: rule build_svm_dsp: input: - hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file( + hyperpars=lambda wildcards: get_input_par_file( wildcards, "dsp", "svm_hyperpars" - ).replace("hyperpars.json", "train.lh5"), + ), + train_data=lambda wildcards: str( + get_input_par_file(wildcards, "dsp", "svm_hyperpars") + ).replace("hyperpars.yaml", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), log: - get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal"), + str(get_pattern_log(setup, "pars_dsp_svm")).replace("{datatype}", "cal"), group: "par-dsp-svm" resources: @@ -270,9 +279,12 @@ rule build_plts_dsp: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, ), + params: + timestamp="{timestamp}", + datatype="cal", output: get_pattern_plts(setup, "dsp"), group: @@ -282,6 +294,7 @@ rule build_plts_dsp: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_pars_dsp_objects: @@ -291,11 +304,14 @@ rule build_pars_dsp_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", ), + params: + timestamp="{timestamp}", + datatype="cal", output: get_pattern_pars( setup, @@ -311,6 +327,8 @@ rule build_pars_dsp_objects: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " rule build_pars_dsp_db: @@ -320,9 +338,12 @@ rule build_pars_dsp_db: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, ), + params: + timestamp="{timestamp}", + datatype="cal", output: temp( get_pattern_pars_tmp( @@ -338,82 +359,89 @@ rule build_pars_dsp_db: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " -rule build_pars_dsp: - input: - in_files=lambda wildcards: get_par_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "dsp", - basedir, - configs, - chan_maps, - name="dplms", - extension="lh5", - ), - in_db=get_pattern_pars_tmp( - setup, - "dsp", - datatype="cal", - ), - plts=get_pattern_plts(setup, "dsp"), - objects=get_pattern_pars( - setup, - "dsp", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - output: - out_file=get_pattern_pars( - setup, - "dsp", - extension="lh5", - check_in_cycle=check_in_cycle, - ), - out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), - group: - "merge-dsp" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--output {output.out_file} " - "--in_db {input.in_db} " - "--out_db {output.out_db} " - "--input {input.in_files} " - - -rule build_dsp: - input: - raw_file=get_pattern_tier_raw(setup), - pars_file=ancient( - lambda wildcards: pars_catalog.get_par_file( - setup, wildcards.timestamp, "dsp" - ) - ), - params: - timestamp="{timestamp}", - datatype="{datatype}", - ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, - output: - tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "dsp_db"), - log: - get_pattern_log(setup, "tier_dsp"), - group: - "tier-dsp" - resources: - runtime=300, - mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_dsp.py " - "--log {log} " - f"--configs {ro(configs)} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--input {params.ro_input[raw_file]} " - "--output {output.tier_file} " - "--db_file {output.db_file} " - "--pars_file {params.ro_input[pars_file]} " +# rule build_pars_dsp: +# input: +# in_files=lambda wildcards: get_par_chanlist( +# setup, +# f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", +# "dsp", +# basedir, +# det_status, +# chan_maps, +# name="dplms", +# extension="lh5", +# ), +# in_db=get_pattern_pars_tmp( +# setup, +# "dsp", +# datatype="cal", +# ), +# plts=get_pattern_plts(setup, "dsp"), +# objects=get_pattern_pars( +# setup, +# "dsp", +# name="objects", +# extension="dir", +# check_in_cycle=check_in_cycle, +# ), +# params: +# timestamp="{timestamp}", +# datatype="cal", +# output: +# out_file=get_pattern_pars( +# setup, +# "dsp", +# extension="lh5", +# check_in_cycle=check_in_cycle, +# ), +# out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), +# group: +# "merge-dsp" +# shell: +# "{swenv} python3 -B " +# "{basedir}/../scripts/merge_channels.py " +# "--output {output.out_file} " +# "--in_db {input.in_db} " +# "--out_db {output.out_db} " +# "--input {input.in_files} " +# "--timestamp {params.timestamp} " +# "--channelmap {meta} " +# rule build_dsp: +# input: +# raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), +# pars_file=ancient( +# lambda wildcards: ParsCatalog.get_par_file( +# setup, wildcards.timestamp, "dsp" +# ) +# ), +# params: +# timestamp="{timestamp}", +# datatype="{datatype}", +# ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, +# output: +# tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle), +# db_file=get_pattern_pars_tmp(setup, "dsp_db"), +# log: +# get_pattern_log(setup, "tier_dsp"), +# group: +# "tier-dsp" +# resources: +# runtime=300, +# mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, +# shell: +# "{swenv} python3 -B " +# "{basedir}/../scripts/build_dsp.py " +# "--log {log} " +# "--tier dsp " +# f"--configs {ro(configs)} " +# "--metadata {meta} " +# "--datatype {params.datatype} " +# "--timestamp {params.timestamp} " +# "--input {params.ro_input[raw_file]} " +# "--output {output.tier_file} " +# "--db_file {output.db_file} " +# "--pars_file {params.ro_input[pars_file]} " diff --git a/rules/evt.smk b/rules/evt.smk index d51ad39..112c92c 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -2,13 +2,8 @@ Snakemake rules for processing evt tier. """ -from scripts.util.pars_loading import pars_catalog +from scripts.util.pars_loading import ParsCatalog from scripts.util.patterns import ( - get_pattern_tier_hit, - get_pattern_tier_dsp, - get_pattern_tier_tcm, - get_pattern_tier_pht, - get_pattern_tier_psp, get_pattern_tier, get_pattern_log, get_pattern_pars, @@ -16,42 +11,91 @@ from scripts.util.patterns import ( ) -for tier in ("evt", "pet"): +rule build_evt: + input: + dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), + hit_file=get_pattern_tier(setup, "hit", check_in_cycle=False), + tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), + ann_file=lambda wildcards: ( + None + if int(wildcards["period"][1:]) > 11 + else get_pattern_tier(setup, "ann", check_in_cycle=False) + ), + par_files=lambda wildcards: ParsCatalog.get_par_file( + setup, wildcards.timestamp, "hit" + ), + xtalk_matrix=lambda wildcards: get_input_par_file( + tier="evt", wildcards=wildcards, name="xtc" + ), + output: + get_pattern_tier(setup, "evt", check_in_cycle=check_in_cycle), + params: + timestamp="{timestamp}", + datatype="{datatype}", + tier="evt", + ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, + log: + get_pattern_log(setup, f"tier_evt"), + group: + "tier-evt" + resources: + runtime=300, + mem_swap=50, + run: + shell_string = ( + f"{swenv} python3 -B " + f"{basedir}/../scripts/build_evt.py " + f"--configs {ro(configs)} " + f"--metadata {ro(meta)} " + "--log {log} " + "--tier {params.tier} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--xtc_file {params.ro_input[xtalk_matrix]} " + "--par_files {params.ro_input[par_files]} " + "--hit_file {params.ro_input[hit_file]} " + "--tcm_file {params.ro_input[tcm_file]} " + "--dsp_file {params.ro_input[dsp_file]} " + "--output {output} " + ) + if input.ann_file is not None: + shell_string += "--ann_file {params.ro_input[ann_file]} " - rule: - input: - dsp_file=( - get_pattern_tier_dsp(setup) - if tier == "evt" - else get_pattern_tier_psp(setup) - ), - hit_file=( - get_pattern_tier_hit(setup) - if tier == "evt" - else get_pattern_tier_pht(setup) - ), - tcm_file=get_pattern_tier_tcm(setup), - xtalk_matrix=lambda wildcards: get_svm_file( - tier=tier, wildcards=wildcards, name="xtc" - ), - par_files=lambda wildcards: pars_catalog.get_par_file( - setup, wildcards.timestamp, "pht" - ), - output: - get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), - params: - timestamp="{timestamp}", - datatype="{datatype}", - tier=tier, - ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, - log: - get_pattern_log(setup, f"tier_{tier}"), - group: - "tier-evt" - resources: - runtime=300, - mem_swap=50, - shell: + shell(shell_string) + + +rule build_pet: + input: + dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False), + hit_file=get_pattern_tier(setup, "pht", check_in_cycle=False), + tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), + ann_file=lambda wildcards: ( + None + if int(wildcards["period"][1:]) > 11 + else get_pattern_tier(setup, "pan", check_in_cycle=False) + ), + par_files=lambda wildcards: ParsCatalog.get_par_file( + setup, wildcards.timestamp, "pht" + ), + xtalk_matrix=lambda wildcards: get_input_par_file( + tier="pet", wildcards=wildcards, name="xtc" + ), + output: + get_pattern_tier(setup, "pet", check_in_cycle=check_in_cycle), + params: + timestamp="{timestamp}", + datatype="{datatype}", + tier="pet", + ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, + log: + get_pattern_log(setup, f"tier_pet"), + group: + "tier-evt" + resources: + runtime=300, + mem_swap=50, + run: + shell_string = ( f"{swenv} python3 -B " f"{basedir}/../scripts/build_evt.py " f"--configs {ro(configs)} " @@ -66,8 +110,14 @@ for tier in ("evt", "pet"): "--tcm_file {params.ro_input[tcm_file]} " "--dsp_file {params.ro_input[dsp_file]} " "--output {output} " + ) + if input.ann_file is not None: + shell_string += "--ann_file {params.ro_input[ann_file]} " + + shell(shell_string) + - set_last_rule_name(workflow, f"build_{tier}") +for evt_tier in ("evt", "pet"): rule: wildcard_constraints: @@ -83,14 +133,14 @@ for tier in ("evt", "pet"): ) ), output: - get_pattern_tier(setup, f"{tier}_concat", check_in_cycle=check_in_cycle), + get_pattern_tier(setup, f"{evt_tier}_concat", check_in_cycle=check_in_cycle), params: timestamp="all", datatype="{datatype}", lh5concat_exe=setup["paths"]["install"] + "/bin/lh5concat", ro_input=lambda _, input: utils.as_ro(setup, input), log: - get_pattern_log_concat(setup, f"tier_{tier}_concat"), + get_pattern_log_concat(setup, f"tier_{evt_tier}_concat"), group: "tier-evt" shell: @@ -98,4 +148,4 @@ for tier in ("evt", "pet"): "--output {output} " "-- {params.ro_input} &> {log}" - set_last_rule_name(workflow, f"concat_{tier}") + set_last_rule_name(workflow, f"concat_{evt_tier}") diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index 557d492..c90c570 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -1,36 +1,82 @@ import glob -import json -import os +import json, yaml +from pathlib import Path from scripts.util.FileKey import FileKey, run_grouper from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind +concat_datatypes = ["phy"] +concat_tiers = ["skm", "pet_concat", "evt_concat"] +blind_datatypes = ["phy"] -def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): + +def expand_runs(in_dict): + """ + This function expands out the runs if a range is specified in the dictionary + e.g. + { + "p01": "r001..r005" + } + """ + for per, run_list in in_dict.items(): + if isinstance(run_list, str) and ".." in runs: + start, end = runs.split("..") + in_dict[per] = [f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1)] + return in_dict + + +def get_analysis_runs( + ignore_keys_file=None, analysis_runs_file=None, file_selection="all" +): + """ + This function reads in the ignore_keys and analysis_runs files and returns the dictionaries + """ ignore_keys = [] + analysis_runs = {} if ignore_keys_file is not None: - if os.path.isfile(ignore_keys_file): - with open(ignore_keys_file) as f: - ignore_keys = f.read().splitlines() - ignore_keys = [ + if Path(ignore_keys_file).is_file(): + if Path(ignore_keys_file).suffix == ".json": + with Path(ignore_keys_file).open() as f: + ignore_keys = json.load(f) + elif Path(ignore_keys_file).suffix == ".keylist": + with Path(ignore_keys_file).open() as f: + ignore_keys = f.read().splitlines() + elif Path(ignore_keys_file).suffix in (".yaml", ".yml"): + with Path(ignore_keys_file).open() as f: + ignore_keys = yaml.safe_load(f) + else: + raise ValueError( + "ignore_keys_file file not in json, yaml or keylist format" + ) + ignore_keys = [ # remove any comments in the keylist key.split("#")[0].strip() if "#" in key else key.strip() for key in ignore_keys ] else: - print("no ignore_keys.keylist file found") - ignore_keys = [] - else: - ignore_keys = [] - - if analysis_runs_file is not None: - if os.path.isfile(analysis_runs_file): - with open(analysis_runs_file) as f: - analysis_runs = json.load(f) + msg = f"no ignore_keys file found: {ignore_keys_file}" + raise ValueError(msg) + + if analysis_runs_file is not None and file_selection != "all": + if Path(analysis_runs_file).is_file(): + if Path(ignore_keys_file).suffix == ".json": + with Path(analysis_runs_file).open() as f: + analysis_runs = json.load(f) + elif Path(ignore_keys_file).suffix in (".yaml", ".yml"): + with Path(analysis_runs_file).open() as f: + analysis_runs = yaml.safe_load(f) + else: + msg = f"analysis_runs file not in json or yaml format: {analysis_runs_file}" + raise ValueError(msg) + if file_selection in analysis_runs: + analysis_runs = expand_runs( + analysis_runs[file_selection] + ) # select the file_selection and expand out the runs + else: + msg = f"Unknown file selection: {file_selection} not in {list(analysis_runs)}" + raise ValueError(msg) else: - analysis_runs = [] - print("no analysis_runs file found") - else: - analysis_runs = [] + msg = f"no analysis_runs file found: {analysis_runs_file}" + raise ValueError(msg) return analysis_runs, ignore_keys @@ -57,9 +103,14 @@ def get_keys(keypart): def get_pattern(setup, tier): + """ + Helper function to get the search pattern for the given tier, + some tiers such as skm need to refer to a different pattern when looking for files + as only phy files are taken to skm others are only taken to pet + """ if tier == "blind": fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False) - elif tier == "skm" or tier == "pet_concat": + elif tier in ("skm", "pet_concat"): fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False) elif tier == "evt_concat": fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False) @@ -69,6 +120,9 @@ def get_pattern(setup, tier): def concat_phy_filenames(setup, phy_filenames, tier): + """ + This function concatenates the files from the same run together + """ fn_pattern = get_pattern(setup, tier) # group files by run sorted_phy_filenames = run_grouper(phy_filenames) @@ -92,18 +146,20 @@ def build_filelist( tier, ignore_keys=None, analysis_runs=None, - file_selection="all", ): + """ + This function builds the filelist for the given filekeys, search pattern and tier. + It will ignore any keys in the ignore_keys list and only include the keys specified in the analysis_runs dict + """ fn_pattern = get_pattern(setup, tier) if ignore_keys is None: ignore_keys = [] if analysis_runs is None: - analysis_runs = [] + analysis_runs = {} phy_filenames = [] other_filenames = [] - for key in filekeys: fn_glob_pattern = key.get_path_from_filekey(search_pattern)[0] files = glob.glob(fn_glob_pattern) @@ -113,7 +169,7 @@ def build_filelist( if _key.name in ignore_keys: pass else: - if tier == "blind" and _key.datatype == "phy": + if tier == "blind" and _key.datatype in blind_datatypes: filename = FileKey.get_path_from_filekey( _key, get_pattern_tier_raw_blind(setup) ) @@ -124,32 +180,38 @@ def build_filelist( else: filename = FileKey.get_path_from_filekey(_key, fn_pattern) - if file_selection == "all": - if _key.datatype == "phy": + if analysis_runs == {}: + if ( + _key.datatype in concat_datatypes + ): # separate out phy files as some tiers these are concatenated phy_filenames += filename else: other_filenames += filename - elif file_selection == "sel": - if analysis_runs == "all" or ( - _key.period in analysis_runs + else: + if ( + _key.period + in analysis_runs # check if period in analysis_runs dicts and ( - _key.run in analysis_runs[_key.period] - or analysis_runs[_key.period] == "all" + _key.run + in analysis_runs[ + _key.period + ] # check if run in analysis_runs dicts + or analysis_runs[_key.period] + == "all" # or if runs is just specified as "all" ) ): - if _key.datatype == "phy": - phy_filenames += filename + if _key.datatype in concat_datatypes: + phy_filenames += filename # separate out phy files as some tiers these are concatenated else: other_filenames += filename - else: - msg = "unknown file selection" - raise ValueError(msg) phy_filenames = sorted(phy_filenames) other_filenames = sorted(other_filenames) - if tier == "skm" or tier == "pet_concat" or tier == "evt_concat": - phy_filenames = concat_phy_filenames(setup, phy_filenames, tier) + if tier in concat_tiers: + phy_filenames = concat_phy_filenames( + setup, phy_filenames, tier + ) # concat phy files return phy_filenames + other_filenames @@ -157,10 +219,11 @@ def build_filelist( def get_filelist( wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None ): - file_selection = wildcards.label[:3] - keypart = wildcards.label[3:] - - analysis_runs, ignore_keys = get_analysis_runs(ignore_keys_file, analysis_runs_file) + file_selection = wildcards.label.split("-", 1)[0] + keypart = f'-{wildcards.label.split("-",1)[1]}' # remove the file selection from the keypart + analysis_runs, ignore_keys = get_analysis_runs( + ignore_keys_file, analysis_runs_file, file_selection + ) filekeys = get_keys(keypart) @@ -171,7 +234,6 @@ def get_filelist( wildcards.tier, ignore_keys, analysis_runs, - file_selection, ) @@ -186,7 +248,9 @@ def get_filelist_full_wildcards( ): keypart = f"-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-{wildcards.datatype}" - analysis_runs, ignore_keys = get_analysis_runs(ignore_keys_file, analysis_runs_file) + analysis_runs, ignore_keys = get_analysis_runs( + ignore_keys_file, analysis_runs_file, file_selection + ) filekeys = get_keys(keypart) return build_filelist( @@ -196,5 +260,4 @@ def get_filelist_full_wildcards( tier, ignore_keys, analysis_runs, - file_selection, ) diff --git a/rules/hit.smk b/rules/hit.smk index fac37a1..bb42651 100644 --- a/rules/hit.smk +++ b/rules/hit.smk @@ -6,26 +6,33 @@ Snakemake rules for processing hit tier. This is done in 4 steps: - running build hit over all channels using par file """ -from scripts.util.pars_loading import pars_catalog +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_hit, + get_pattern_pars, get_pattern_plts, - get_pattern_tier_dsp, get_pattern_tier, get_pattern_pars_tmp, get_pattern_log, get_pattern_pars, ) -hit_par_catalog = ds.pars_key_resolve.get_par_catalog( +hit_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_hit"], "lar": ["par_hit"]}, ) +hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml" +if hit_par_cat_file.is_file(): + hit_par_cat_file.unlink() +Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file) + # This rule builds the qc using the calibration dsp files and fft files rule build_qc: @@ -37,6 +44,7 @@ rule build_qc: filelist_path(setup), "all-{experiment}-{period}-{run}-fft-dsp.filelist" ), pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), + overwrite_files=lambda wildcards: get_overwrite_file("hit", wildcards), params: timestamp="{timestamp}", datatype="cal", @@ -58,11 +66,13 @@ rule build_qc: "--timestamp {params.timestamp} " "--channel {params.channel} " "--configs {configs} " + "--metadata {meta} " "--plot_path {output.plot_file} " "--save_path {output.qc_file} " "--pulser_file {input.pulser} " "--cal_files {input.files} " "--fft_files {input.fft_files} " + "--overwrite_files {input.overwrite_files} " # This rule builds the energy calibration using the calibration dsp files @@ -73,7 +83,7 @@ rule build_energy_calibration: ), pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), ctc_dict=ancient( - lambda wildcards: pars_catalog.get_par_file( + lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "dsp" ) ), @@ -151,6 +161,7 @@ rule build_aoe_calibration: "{basedir}/../scripts/pars_hit_aoe.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -197,6 +208,7 @@ rule build_lq_calibration: "{basedir}/../scripts/pars_hit_lq.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -217,7 +229,7 @@ rule build_pars_hit_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "hit", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", @@ -239,6 +251,7 @@ rule build_pars_hit_objects: "{basedir}/../scripts/merge_channels.py " "--input {params.ro_input} " "--output {output} " + "--channelmap {meta} " rule build_plts_hit: @@ -248,7 +261,7 @@ rule build_plts_hit: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "hit", basedir, - configs, + det_status, chan_maps, ), output: @@ -262,6 +275,7 @@ rule build_plts_hit: "{basedir}/../scripts/merge_channels.py " "--input {params.ro_input} " "--output {output} " + "--channelmap {meta} " rule build_pars_hit: @@ -271,7 +285,7 @@ rule build_pars_hit: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "hit", basedir, - configs, + det_status, chan_maps, ), plts=get_pattern_plts(setup, "hit"), @@ -293,12 +307,13 @@ rule build_pars_hit: "{basedir}/../scripts/merge_channels.py " "--input {params.ro_input[infiles]} " "--output {output} " + "--channelmap {meta} " rule build_hit: input: - dsp_file=get_pattern_tier_dsp(setup), - pars_file=lambda wildcards: pars_catalog.get_par_file( + dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), + pars_file=lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "hit" ), output: @@ -319,6 +334,7 @@ rule build_hit: "{swenv} python3 -B " "{basedir}/../scripts/build_hit.py " f"--configs {ro(configs)} " + "--metadata {meta} " "--log {log} " "--tier {params.tier} " "--datatype {params.datatype} " diff --git a/rules/pht.smk b/rules/pht.smk index 86646fa..e638832 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -6,14 +6,14 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 - running build hit over all channels using par file """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path +from scripts.util.utils import filelist_path, set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_pht, get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, @@ -21,12 +21,18 @@ from scripts.util.patterns import ( get_pattern_pars, ) -pht_par_catalog = ds.pars_key_resolve.get_par_catalog( +pht_par_catalog = ds.ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_pht"], "lar": ["par_pht"]}, ) +pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml" +if pht_par_cat_file.is_file(): + pht_par_cat_file.unlink() +Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(pht_par_catalog, pht_par_cat_file) + intier = "psp" @@ -51,7 +57,7 @@ for key, dataset in part.datasets.items(): cal_files=part.get_filelists(partition, key, intier), fft_files=part.get_filelists(partition, key, intier, datatype="fft"), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -123,6 +129,7 @@ for key, dataset in part.datasets.items(): "{basedir}/../scripts/pars_pht_qc.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -175,6 +182,7 @@ rule build_pht_qc: "{basedir}/../scripts/pars_pht_qc.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -208,7 +216,7 @@ rule build_per_energy_calibration: pht_dict=get_pattern_pars_tmp_channel(setup, "pht", "qc"), inplots=get_pattern_plts_tmp_channel(setup, "pht", "qc"), ctc_dict=ancient( - lambda wildcards: pars_catalog.get_par_file( + lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, intier ) ), @@ -259,7 +267,7 @@ for key, dataset in part.datasets.items(): input: files=part.get_filelists(partition, key, intier), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -441,7 +449,7 @@ for key, dataset in part.datasets.items(): input: files=part.get_filelists(partition, key, intier), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -530,6 +538,7 @@ for key, dataset in part.datasets.items(): "{basedir}/../scripts/pars_pht_aoecal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -590,6 +599,7 @@ rule build_pht_aoe_calibrations: "{basedir}/../scripts/pars_pht_aoecal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -621,7 +631,7 @@ for key, dataset in part.datasets.items(): input: files=part.get_filelists(partition, key, intier), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -708,6 +718,7 @@ for key, dataset in part.datasets.items(): "{basedir}/../scripts/pars_pht_lqcal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -763,6 +774,7 @@ rule build_pht_lq_calibration: "{basedir}/../scripts/pars_pht_lqcal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -794,7 +806,7 @@ rule build_pars_pht_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", @@ -823,7 +835,7 @@ rule build_plts_pht: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, ), output: @@ -844,7 +856,7 @@ rule build_pars_pht: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, ), plts=get_pattern_plts(setup, "pht"), @@ -869,7 +881,7 @@ rule build_pars_pht: rule build_pht: input: dsp_file=get_pattern_tier(setup, intier, check_in_cycle=False), - pars_file=lambda wildcards: pars_catalog.get_par_file( + pars_file=lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "pht" ), output: @@ -890,6 +902,7 @@ rule build_pht: "{swenv} python3 -B " "{basedir}/../scripts/build_hit.py " f"--configs {ro(configs)} " + "--metadata {meta} " "--log {log} " "--tier {params.tier} " "--datatype {params.datatype} " diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk index 925d42c..9369b6b 100644 --- a/rules/pht_fast.smk +++ b/rules/pht_fast.smk @@ -1,11 +1,10 @@ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from scripts.util.utils import filelist_path, set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_pht, get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, @@ -109,6 +108,7 @@ for key, dataset in part.datasets.items(): f"{basedir}/../scripts/pars_pht_fast.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -167,6 +167,7 @@ rule par_pht_fast: "{basedir}/../scripts/pars_pht_fast.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " diff --git a/rules/psp.smk b/rules/psp.smk index 9a3e4af..9fc0861 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -6,9 +6,10 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 - running build hit over all channels using par file """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import par_psp_path, par_dsp_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path +from scripts.util.utils import set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -20,12 +21,18 @@ from scripts.util.patterns import ( get_pattern_pars, ) -psp_par_catalog = pars_key_resolve.get_par_catalog( +psp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_psp"], "lar": ["par_psp"]}, ) +psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml" +if psp_par_cat_file.is_file(): + psp_par_cat_file.unlink() +Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file) + psp_rules = {} for key, dataset in part.datasets.items(): for partition in dataset.keys(): @@ -172,14 +179,16 @@ workflow._ruleorder.add(*rule_order_list) # [::-1] rule build_svm_psp: input: - hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file( + hyperpars=lambda wildcards: get_input_par_file( wildcards, "psp", "svm_hyperpars" - ).replace("hyperpars.json", "train.lh5"), + ), + train_data=lambda wildcards: str( + get_input_par_file(wildcards, "psp", "svm_hyperpars") + ).replace("hyperpars.yaml", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), log: - get_pattern_log(setup, "pars_psp_svm").replace("{datatype}", "cal"), + get_pattern_log(setup, "pars_psp_svm").as_posix().replace("{datatype}", "cal"), group: "par-dsp-svm" resources: @@ -221,7 +230,7 @@ rule build_pars_psp_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "psp", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", @@ -241,6 +250,7 @@ rule build_pars_psp_objects: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_plts_psp: @@ -250,7 +260,7 @@ rule build_plts_psp: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "psp", basedir, - configs, + det_status, chan_maps, ), output: @@ -262,6 +272,7 @@ rule build_plts_psp: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_pars_psp_db: @@ -271,7 +282,7 @@ rule build_pars_psp_db: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "psp", basedir, - configs, + det_status, chan_maps, ), output: @@ -289,6 +300,7 @@ rule build_pars_psp_db: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_pars_psp: @@ -298,7 +310,7 @@ rule build_pars_psp: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, name="dplms", extension="lh5", @@ -333,13 +345,14 @@ rule build_pars_psp: "--in_db {input.in_db} " "--out_db {output.out_db} " "--input {input.in_files} " + "--channelmap {meta} " rule build_psp: input: - raw_file=get_pattern_tier_raw(setup), + raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), pars_file=ancient( - lambda wildcards: pars_catalog.get_par_file( + lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "psp" ) ), @@ -361,7 +374,9 @@ rule build_psp: "{swenv} python3 -B " "{basedir}/../scripts/build_dsp.py " "--log {log} " + "--tier psp " f"--configs {ro(configs)} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--input {params.ro_input[raw_file]} " diff --git a/rules/qc_phy.smk b/rules/qc_phy.smk index 5b9cd6f..b89d8d3 100644 --- a/rules/qc_phy.smk +++ b/rules/qc_phy.smk @@ -1,11 +1,10 @@ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from scripts.util.utils import filelist_path, set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_pht, get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, @@ -138,7 +137,7 @@ rule build_plts_pht_phy: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, name="qcphy", ), @@ -160,7 +159,7 @@ rule build_pars_pht_phy: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, name="qcphy", ), diff --git a/rules/raw.smk b/rules/raw.smk index 20d1105..8239519 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -1,6 +1,5 @@ from scripts.util.patterns import ( get_pattern_tier_daq, - get_pattern_tier_raw, get_pattern_tier, get_pattern_log, get_pattern_tier_raw_blind, @@ -43,7 +42,9 @@ rule build_raw_blind: and runs only if the blinding check file is on disk. Output is just the blinded raw file. """ input: - tier_file=get_pattern_tier_raw(setup).replace("{datatype}", "phy"), + tier_file=get_pattern_tier(setup, "raw", check_in_cycle=False).replace( + "{datatype}", "phy" + ), blind_file=get_blinding_curve_file, params: timestamp="{timestamp}", diff --git a/rules/tcm.smk b/rules/tcm.smk index 657cda3..e3a3410 100644 --- a/rules/tcm.smk +++ b/rules/tcm.smk @@ -3,7 +3,6 @@ Snakemake file containing the rules for generating the tcm """ from scripts.util.patterns import ( - get_pattern_tier_raw, get_pattern_tier, get_pattern_log, get_pattern_pars_tmp_channel, @@ -14,7 +13,7 @@ from scripts.util.patterns import ( # This rule builds the tcm files each raw file rule build_tier_tcm: input: - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), params: timestamp="{timestamp}", datatype="{datatype}", @@ -67,3 +66,4 @@ rule build_pulser_ids: "--channel {params.channel} " "--tcm_files {params.input} " "--pulser_file {output.pulser} " + "--metadata {meta} " diff --git a/scripts/blinding_calibration.py b/scripts/blinding_calibration.py index 6a1b0a7..62207e9 100644 --- a/scripts/blinding_calibration.py +++ b/scripts/blinding_calibration.py @@ -7,6 +7,7 @@ import argparse import logging import pickle as pkl +from pathlib import Path import matplotlib as mpl import matplotlib.pyplot as plt @@ -93,7 +94,7 @@ ax2.set_xlabel("energy (keV)") ax2.set_ylabel("counts") plt.suptitle(args.channel) -with open(args.plot_file, "wb") as w: +with Path(args.plot_file).open("wb") as w: pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) plt.close() diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index 8dad8fa..c505058 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -1,14 +1,14 @@ import argparse import logging -import os -import pathlib +import logging.config import re import time import warnings +from pathlib import Path import numpy as np from dspeed import build_dsp -from legendmeta import TextDB +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 @@ -28,38 +28,63 @@ def replace_list_with_array(dic): argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) +argparser.add_argument("--log", help="log file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) +argparser.add_argument("--tier", help="Tier", type=str, required=True) + argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[]) -argparser.add_argument("--log", help="log file", type=str) argparser.add_argument("--input", help="input file", type=str) + argparser.add_argument("--output", help="output file", type=str) argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() -pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -log = logging.getLogger(__name__) - configs = TextDB(args.configs, lazy=True) -channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_dsp"][ - "inputs" -]["processing_chain"] - -channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] +if args.tier in ["dsp", "psp"]: + config_dict = config_dict["tier_dsp"] +elif args.tier in ["ann", "pan"]: + config_dict = config_dict["tier_ann"] +else: + msg = f"Tier {args.tier} not supported" + raise ValueError(msg) + +channel_dict = config_dict["inputs"]["processing_chain"] +settings_dict = config_dict["options"].get("settings", {}) +if isinstance(settings_dict, str): + settings_dict = Props.read_from(settings_dict) +log_config = config_dict["options"]["logging"] + +Path(args.log).parent.mkdir(parents=True, exist_ok=True) +log_config = Props.read_from(log_config) +log_config["handlers"]["file"]["filename"] = args.log +logging.config.dictConfig(log_config) +log = logging.getLogger("test") + +meta = LegendMetadata(path=args.metadata) +chan_map = meta.channelmap(args.timestamp, system=args.datatype) + +if args.tier in ["ann", "pan"]: + channel_dict = { + f"ch{chan_map[chan].daq.rawid:07}/dsp": Props.read_from(file) + for chan, file in channel_dict.items() + } +else: + channel_dict = { + f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file) + for chan, file in channel_dict.items() + } db_files = [ - par_file - for par_file in args.pars_file - if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yaml" + par_file for par_file in args.pars_file if Path(par_file).suffix in (".json", ".yaml", ".yml") ] database_dic = Props.read_from(db_files, subst_pathvar=True) database_dic = replace_list_with_array(database_dic) -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) rng = np.random.default_rng() rand_num = f"{rng.integers(0, 99999):05d}" @@ -74,42 +99,66 @@ def replace_list_with_array(dic): database=database_dic, chan_config=channel_dict, write_mode="r", - buffer_len=3200 if args.datatype == "cal" else 3200, - block_width=16, + buffer_len=settings_dict.get("buffer_len", 1000), + block_width=settings_dict.get("block_width", 16), ) log.info(f"build_dsp finished in {time.time()-start}") - -os.rename(temp_output, args.output) - -key = os.path.basename(args.output).replace("-tier_dsp.lh5", "") - -raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] - -raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")] - -outputs = {} -channels = [] -for channel, chan_dict in channel_dict.items(): - output = chan_dict["outputs"] - in_dict = False - for entry in outputs: - if outputs[entry]["fields"] == output: - outputs[entry]["channels"].append(channel.split("/")[0]) - in_dict = True - if in_dict is False: - outputs[f"group{len(list(outputs))+1}"] = { - "channels": [channel.split("/")[0]], - "fields": output, - } - channels.append(channel.split("/")[0]) - -full_dict = { - "valid_fields": { - "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}}, - "dsp": outputs, - }, - "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}}, -} -pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True) +Path(temp_output).rename(args.output) + +key = Path(args.output).name.replace(f"-tier_{args.tier}.lh5", "") + +if args.tier in ["dsp", "psp"]: + + raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] + raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")] + + outputs = {} + channels = [] + for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] + in_dict = False + for entry in outputs: + if outputs[entry]["fields"] == output: + outputs[entry]["channels"].append(channel.split("/")[0]) + in_dict = True + if in_dict is False: + outputs[f"group{len(list(outputs))+1}"] = { + "channels": [channel.split("/")[0]], + "fields": output, + } + channels.append(channel.split("/")[0]) + + full_dict = { + "valid_fields": { + "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}}, + "dsp": outputs, + }, + "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}}, + } +else: + outputs = {} + channels = [] + for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] + in_dict = False + for entry in outputs: + if outputs[entry]["fields"] == output: + outputs[entry]["channels"].append(channel.split("/")[0]) + in_dict = True + if in_dict is False: + outputs[f"group{len(list(outputs))+1}"] = { + "channels": [channel.split("/")[0]], + "fields": output, + } + channels.append(channel.split("/")[0]) + + full_dict = { + "valid_fields": { + "ann": outputs, + }, + "valid_keys": {key: {"valid_channels": {"ann": channels}}}, + } + +Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.db_file, full_dict) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index 3d993d8..a02d9f8 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -1,7 +1,6 @@ import argparse import json import logging -import os import time from pathlib import Path @@ -35,6 +34,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): argparser.add_argument("--hit_file", help="hit file", type=str) argparser.add_argument("--dsp_file", help="dsp file", type=str) argparser.add_argument("--tcm_file", help="tcm file", type=str) +argparser.add_argument("--ann_file", help="ann file") argparser.add_argument("--xtc_file", help="xtc file", type=str) argparser.add_argument("--par_files", help="par files", nargs="*") @@ -51,7 +51,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): args = argparser.parse_args() if args.log is not None: - Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) + Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") else: logging.basicConfig(level=logging.DEBUG) @@ -118,15 +118,20 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): log.debug(json.dumps(evt_config["channels"], indent=2)) t_start = time.time() -Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) + +file_table = { + "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"), + "dsp": (args.dsp_file, "dsp", "ch{}"), + "hit": (args.hit_file, "hit", "ch{}"), + "evt": (None, "evt"), +} + +if args.ann_file is not None: + file_table["ann"] = (args.ann_file, "dsp", "ch{}") table = build_evt( - { - "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"), - "dsp": (args.dsp_file, "dsp", "ch{}"), - "hit": (args.hit_file, "hit", "ch{}"), - "evt": (None, "evt"), - }, + file_table, evt_config, ) diff --git a/scripts/build_hit.py b/scripts/build_hit.py index c550337..3aba4aa 100644 --- a/scripts/build_hit.py +++ b/scripts/build_hit.py @@ -1,10 +1,9 @@ import argparse import logging -import os -import pathlib import time +from pathlib import Path -from legendmeta import TextDB +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 from pygama.hit.build_hit import build_hit @@ -14,17 +13,18 @@ argparser.add_argument("--pars_file", help="hit pars file", nargs="*") argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) +argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--tier", help="Tier", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--output", help="output file", type=str) argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() -pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) +Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) @@ -42,24 +42,30 @@ msg = "unknown tier" raise ValueError(msg) -pars_dict = Props.read_from(args.pars_file) +meta = LegendMetadata(path=args.metadata) +chan_map = meta.channelmap(args.timestamp, system=args.datatype) +pars_dict = Props.read_from(args.pars_file) pars_dict = {chan: chan_dict["pars"] for chan, chan_dict in pars_dict.items()} hit_dict = {} channels_present = lh5.ls(args.input) for channel in pars_dict: chan_pars = pars_dict[channel].copy() - if channel in channel_dict: - cfg_dict = Props.read_from(channel_dict[channel]) - Props.add_to(cfg_dict, chan_pars) - chan_pars = cfg_dict - - if channel in channels_present: - hit_dict[f"{channel}/dsp"] = chan_pars + try: + detector = chan_map.map("daq.rawid")[int(channel[2:])].name + if detector in channel_dict: + cfg_dict = Props.read_from(channel_dict[detector]) + Props.add_to(cfg_dict, chan_pars) + chan_pars = cfg_dict + + if channel in channels_present: + hit_dict[f"{channel}/dsp"] = chan_pars + except KeyError: + pass t_start = time.time() -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) build_hit(args.input, lh5_tables_config=hit_dict, outfile=args.output) t_elap = time.time() - t_start log.info(f"Done! Time elapsed: {t_elap:.2f} sec.") @@ -80,12 +86,12 @@ } hit_channels.append(channel) -key = os.path.basename(args.output).replace(f"-tier_{args.tier}.lh5", "") +key = args.output.replace(f"-tier_{args.tier}.lh5", "") full_dict = { "valid_fields": {args.tier: hit_outputs}, "valid_keys": {key: {"valid_channels": {args.tier: hit_channels}}}, } -pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True) +Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.db_file, full_dict) diff --git a/scripts/build_raw.py b/scripts/build_raw.py index c02b67b..03a4fca 100644 --- a/scripts/build_raw.py +++ b/scripts/build_raw.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import numpy as np from daq2lh5 import build_raw @@ -18,10 +17,10 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -os.makedirs(os.path.dirname(args.log), exist_ok=True) +Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) configs = TextDB(args.configs, lazy=True) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"][ @@ -83,4 +82,5 @@ build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings) -os.rename(temp_output, args.output) +# rename the temp file +Path(temp_output).rename(args.output) diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py index 0400f22..33a6c31 100644 --- a/scripts/build_raw_blind.py +++ b/scripts/build_raw_blind.py @@ -12,8 +12,7 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import numexpr as ne import numpy as np @@ -35,11 +34,11 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -os.makedirs(os.path.dirname(args.log), exist_ok=True) +Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") logging.getLogger("lgdo").setLevel(logging.INFO) -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) configs = TextDB(args.configs, lazy=True) channel_dict = configs.on(args.timestamp, system=args.datatype) @@ -167,4 +166,4 @@ ) # rename the temp file -os.rename(temp_output, args.output) +Path(temp_output).rename(args.output) diff --git a/scripts/build_skm.py b/scripts/build_skm.py index a327caa..10bf876 100644 --- a/scripts/build_skm.py +++ b/scripts/build_skm.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import awkward as ak from legendmeta import TextDB @@ -32,7 +31,7 @@ def get_all_out_fields(input_table, out_fields, current_field=""): args = argparser.parse_args() if args.log is not None: - pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) + Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py index c39faea..faa39d6 100644 --- a/scripts/build_tcm.py +++ b/scripts/build_tcm.py @@ -1,7 +1,7 @@ import argparse import logging -import os -import pathlib +import logging.config +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -19,13 +19,20 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["tier_tcm"] +log_config = config_dict["options"]["logging"] -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.log).parent.mkdir(parents=True, exist_ok=True) +log_config = Props.read_from(log_config) +log_config["handlers"]["file"]["filename"] = args.log +logging.config.dictConfig(log_config) +log = logging.getLogger("test") -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -channel_dict = configs["snakemake_rules"]["tier_tcm"]["inputs"] -settings = Props.read_from(channel_dict["config"]) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) + + +settings = Props.read_from(config_dict["inputs"]["config"]) rng = np.random.default_rng() temp_output = f"{args.output}.{rng.integers(0, 99999):05d}" @@ -50,4 +57,4 @@ **settings, ) -os.rename(temp_output, args.output) +Path(temp_output).rename(args.output) diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py index 4d8a6fa..7d6da04 100644 --- a/scripts/check_blinding.py +++ b/scripts/check_blinding.py @@ -8,9 +8,8 @@ import argparse import logging -import os -import pathlib import pickle as pkl +from pathlib import Path import matplotlib as mpl import matplotlib.pyplot as plt @@ -40,7 +39,7 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -os.makedirs(os.path.dirname(args.log), exist_ok=True) +Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) @@ -85,7 +84,7 @@ ax2.set_xlabel("energy (keV)") ax2.set_ylabel("counts") plt.suptitle(args.channel) -with open(args.plot_file, "wb") as w: +with Path(args.plot_file).open("wb") as w: pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) plt.close() @@ -93,7 +92,7 @@ # valid and if so create file else raise error. if detector is in ac mode it # will always pass this check if np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5) or det_status is False: - pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) + Path(args.output).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.output, {}) else: msg = "peaks not found in daqenergy" diff --git a/scripts/complete_run.py b/scripts/complete_run.py index f61ba37..fe800e8 100644 --- a/scripts/complete_run.py +++ b/scripts/complete_run.py @@ -1,7 +1,6 @@ # ruff: noqa: F821, T201 import datetime -import glob import json import os import time @@ -20,14 +19,14 @@ def as_ro(path): def check_log_files(log_path, output_file, gen_output, warning_file=None): now = datetime.datetime.now(datetime.UTC).strftime("%d/%m/%y %H:%M") - os.makedirs(os.path.dirname(output_file), exist_ok=True) + Path(output_file).parent.mkdir(parents=True, exist_ok=True) if warning_file is not None: - os.makedirs(os.path.dirname(warning_file), exist_ok=True) - with open(warning_file, "w") as w, open(output_file, "w") as f: + Path(warning_file).parent.mkdir(parents=True, exist_ok=True) + with Path(warning_file).open("w") as w, Path(output_file).open("w") as f: n_errors = 0 n_warnings = 0 for file in Path(log_path).rglob("*.log"): - with open(file) as r: + with Path(file).open() as r: text = r.read() if "ERROR" in text or "WARNING" in text: for line in text.splitlines(): @@ -40,24 +39,24 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None): w.write( f"{gen_output} successfully generated at {now} with warnings \n" ) - f.write(f"{os.path.basename(file)} : {line}\n") + f.write(f"{Path(file).name} : {line}\n") n_errors += 1 elif "WARNING" in line: - w.write(f"{os.path.basename(file)} : {line}\n") + w.write(f"{Path(file).name} : {line}\n") n_warnings += 1 else: pass - os.remove(file) + Path(file).unlink() text = None if n_errors == 0: f.write(f"{gen_output} successfully generated at {now} with no errors \n") if n_warnings == 0: w.write(f"{gen_output} successfully generated at {now} with no warnings \n") else: - with open(output_file, "w") as f: + with Path(output_file).open("w") as f: n_errors = 0 for file in Path(log_path).rglob("*.log"): - with open(file) as r: + with Path(file).open() as r: text = r.read() if "ERROR" in text: for line in text.splitlines(): @@ -66,18 +65,18 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None): f.write( f"{gen_output} successfully generated at {now} with errors \n" ) - f.write(f"{os.path.basename(file)} : {line}\n") + f.write(f"{Path(file).name} : {line}\n") n_errors += 1 else: pass - os.remove(file) + Path(file).unlink() text = None if n_errors == 0: f.write(f"{gen_output} successfully generated at {now} with no errors \n") walk = list(os.walk(log_path)) for path, _, _ in walk[::-1]: if len(os.listdir(path)) == 0: - os.rmdir(path) + Path(path).rmdir() def add_spaces(n): @@ -124,7 +123,7 @@ def get_run(Filekey): key_dict = {} for file in files: - key = FileKey.get_filekey_from_filename(os.path.basename(file)) + key = FileKey.get_filekey_from_filename(Path(file).name) if get_run(key) in key_dict: key_dict[get_run(key)].append(file) else: @@ -133,24 +132,24 @@ def get_run(Filekey): def build_valid_keys(input_files, output_dir): - infiles = glob.glob(as_ro(input_files)) + infiles = Path(as_ro(input_files)).glob() key_dict = get_keys(infiles) for key in list(key_dict): dtype = key.split("-")[-1] - out_file = os.path.join(output_dir, f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json') - Path(os.path.dirname(out_file)).mkdir(parents=True, exist_ok=True) - if os.path.isfile(out_file): + out_file = Path(output_dir) / f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json' + out_file.parent.mkdir(parents=True, exist_ok=True) + if Path(out_file).is_file(): out_dict = Props.read_from([out_file] + key_dict[key]) else: out_dict = Props.read_from(key_dict[key]) out_string = readable_json(out_dict) - with open(out_file, "w") as w: + with Path(out_file).open("w") as w: w.write(out_string) for input_file in infiles: - if os.path.isfile(input_file): - os.remove(input_file) + if Path(input_file).is_file(): + Path(input_file).unlink() def find_gen_runs(gen_tier_path): @@ -268,16 +267,16 @@ def fformat(tier): if snakemake.wildcards.tier != "daq": print(f"INFO: ...building FileDBs with {snakemake.threads} threads") - os.makedirs(snakemake.params.filedb_path, exist_ok=True) + Path(snakemake.params.filedb_path).parent.makedirs(parents=True, exist_ok=True) - with open(os.path.join(snakemake.params.filedb_path, "file_db_config.json"), "w") as f: + with (Path(snakemake.params.filedb_path) / "file_db_config.json").open("w") as f: json.dump(file_db_config, f, indent=2) build_file_dbs(ut.tier_path(snakemake.params.setup), snakemake.params.filedb_path) - os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json")) + (Path(snakemake.params.filedb_path) / "file_db_config.json").unlink() build_valid_keys( - os.path.join(ut.tmp_par_path(snakemake.params.setup), "*_db.json"), + Path(ut.tmp_par_path(snakemake.params.setup)) / "*_db.json", snakemake.params.valid_keys_path, ) diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py index c4c6cb9..f01c879 100644 --- a/scripts/create_chankeylist.py +++ b/scripts/create_chankeylist.py @@ -1,11 +1,10 @@ import argparse -import os -import pathlib +from pathlib import Path from legendmeta import LegendMetadata, TextDB argparser = argparse.ArgumentParser() -argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--det_status", help="det_status", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channelmap", help="Channel Map", type=str, required=True) @@ -13,19 +12,18 @@ argparser.add_argument("--output_file", help="output_file", type=str, required=True) args = argparser.parse_args() -configs = TextDB(args.configs, lazy=True) -status_map = configs.on(args.timestamp, system=args.datatype)["analysis"] +det_status = TextDB(args.det_status, lazy=True) +status_map = det_status.statuses.on(args.timestamp, system=args.datatype) channel_map = LegendMetadata(args.channelmap, lazy=True) chmap = channel_map.channelmaps.on(args.timestamp) channels = [ - f"ch{chmap[chan].daq.rawid:03}" + chan for chan in status_map if status_map[chan]["processable"] is True and chmap[chan].system == "geds" ] - -pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) -with open(args.output_file, "w") as f: +Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) +with Path(args.output_file).open("w") as f: for chan in channels: f.write(f"{chan}\n") diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index a86d47d..bed04d2 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -1,10 +1,10 @@ import argparse -import os -import pathlib import pickle as pkl import shelve +from pathlib import Path import numpy as np +from legendmeta import LegendMetadata from legendmeta.catalog import Props from lgdo import lh5 from util.FileKey import ChannelProcKey @@ -19,7 +19,7 @@ def replace_path(d, old_path, new_path): d[i] = replace_path(d[i], old_path, new_path) elif isinstance(d, str) and old_path in d: d = d.replace(old_path, new_path) - d = d.replace(new_path, f"$_/{os.path.basename(new_path)}") + d = d.replace(new_path, f"$_/{Path(new_path).name}") return d @@ -38,6 +38,19 @@ def replace_path(d, old_path, new_path): type=str, required=False, ) +argparser.add_argument( + "--channelmap", + help="channelmap", + type=str, + required=False, + default=None, +) +argparser.add_argument( + "--timestamp", + help="timestamp", + type=str, + required=False, +) args = argparser.parse_args() # change to only have 1 output file for multiple inputs @@ -45,57 +58,72 @@ def replace_path(d, old_path, new_path): channel_files = args.input.infiles if hasattr(args.input, "infiles") else args.input -file_extension = pathlib.Path(args.output).suffix +file_extension = Path(args.output).suffix + +if args.channelmap is not None: + channel_map = LegendMetadata(args.channelmap, lazy=True) + chmap = channel_map.channelmap(args.timestamp) +else: + chmap = None if file_extension == ".dat" or file_extension == ".dir": - out_file = os.path.splitext(args.output)[0] + out_file = Path(args.output).with_suffix("") else: out_file = args.output rng = np.random.default_rng() temp_output = f"{out_file}.{rng.integers(0, 99999):05d}" -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) -if file_extension == ".json" or file_extension == ".yaml" or file_extension == ".yml": +if file_extension in (".json", ".yaml", ".yml"): out_dict = {} for channel in channel_files: - if pathlib.Path(channel).suffix == file_extension: + if Path(channel).suffix == file_extension: channel_dict = Props.read_from(channel) + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) - channel_name = fkey.channel + channel_name = fkey.channel out_dict[channel_name] = channel_dict else: msg = "Output file extension does not match input file extension" raise RuntimeError(msg) - Props.write_to(temp_output, out_dict, "json") - - os.rename(temp_output, out_file) + Props.write_to(out_file, out_dict) elif file_extension == ".pkl": out_dict = {} for channel in channel_files: - with open(channel, "rb") as r: + with Path(channel).open("rb") as r: channel_dict = pkl.load(r) - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) - channel_name = fkey.channel + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + + channel_name = fkey.channel out_dict[channel_name] = channel_dict - with open(temp_output, "wb") as w: + with Path(temp_output).open("wb") as w: pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL) - os.rename(temp_output, out_file) + Path(temp_output).rename(out_file) elif file_extension == ".dat" or file_extension == ".dir": common_dict = {} - with shelve.open(out_file, "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: + with shelve.open(str(out_file), "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: for channel in channel_files: - with open(channel, "rb") as r: + with Path(channel).open("rb") as r: channel_dict = pkl.load(r) - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) - channel_name = fkey.channel + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel_files[0]).name) + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + + channel_name = fkey.channel if isinstance(channel_dict, dict) and "common" in list(channel_dict): chan_common_dict = channel_dict.pop("common") common_dict[channel_name] = chan_common_dict @@ -108,10 +136,13 @@ def replace_path(d, old_path, new_path): if args.in_db: db_dict = Props.read_from(args.in_db) for channel in channel_files: - if pathlib.Path(channel).suffix == file_extension: - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) - channel_name = fkey.channel + if Path(channel).suffix == file_extension: + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + channel_name = fkey.channel tb_in = lh5.read(f"{channel_name}", channel) lh5.write( @@ -128,4 +159,4 @@ def replace_path(d, old_path, new_path): if args.out_db: Props.write_to(args.out_db, db_dict) - os.rename(temp_output, out_file) + Path(temp_output).rename(out_file) diff --git a/scripts/par_psp.py b/scripts/par_psp.py index 52c2ed6..94473a0 100644 --- a/scripts/par_psp.py +++ b/scripts/par_psp.py @@ -1,7 +1,7 @@ import argparse -import os import pickle as pkl from datetime import datetime +from pathlib import Path import matplotlib as mpl import matplotlib.dates as mdates @@ -44,7 +44,7 @@ # partitions could be different for different channels - do separately for each channel in_dicts = {} for file in args.input: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp in_dicts[tstamp] = Props.read_from(file) plot_dict = {} @@ -109,36 +109,36 @@ plt.close() for file in args.output: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp Props.write_to(file, in_dicts[tstamp]) if args.out_plots: for file in args.out_plots: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp if args.in_plots: for infile in args.in_plots: if tstamp in infile: - with open(infile, "rb") as f: + with Path(infile).open("rb") as f: old_plot_dict = pkl.load(f) break old_plot_dict.update({"psp": plot_dict}) new_plot_dict = old_plot_dict else: new_plot_dict = {"psp": plot_dict} - with open(file, "wb") as f: + with Path(file).open("wb") as f: pkl.dump(new_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) if args.out_obj: for file in args.out_obj: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp if args.in_obj: for infile in args.in_obj: if tstamp in infile: - with open(infile, "rb") as f: + with Path(infile).open("rb") as f: old_obj_dict = pkl.load(f) break new_obj_dict = old_obj_dict else: new_obj_dict = {} - with open(file, "wb") as f: + with Path(file).open("wb") as f: pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py index df97320..0d6ada7 100644 --- a/scripts/pars_dsp_build_svm.py +++ b/scripts/pars_dsp_build_svm.py @@ -1,6 +1,7 @@ import argparse import logging import pickle as pkl +from pathlib import Path from legendmeta.catalog import Props from lgdo import lh5 @@ -45,5 +46,5 @@ log.debug("trained model") # Save trained model with pickle -with open(args.output_file, "wb") as svm_file: +with Path(args.output_file).open("wb") as svm_file: pkl.dump(svm, svm_file, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index f643e03..87403b8 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -1,9 +1,8 @@ import argparse import logging -import os -import pathlib import pickle as pkl import time +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -16,10 +15,11 @@ argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str) +argparser.add_argument("--database", help="database", type=str, required=True) argparser.add_argument("--log", help="log_file", type=str) -argparser.add_argument("--database", help="database", type=str, required=True) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) @@ -43,6 +43,10 @@ log = logging.getLogger(__name__) sto = lh5.LH5Store() +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + configs = LegendMetadata(args.configs, lazy=True).on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel] @@ -52,16 +56,14 @@ db_dict = Props.read_from(args.database) if dplms_dict["run_dplms"] is True: - with open(args.fft_raw_filelist) as f: + with Path(args.fft_raw_filelist).open() as f: fft_files = sorted(f.read().splitlines()) t0 = time.time() log.info("\nLoad fft data") - energies = sto.read(f"{args.channel}/raw/daqenergy", fft_files)[0] + energies = sto.read(f"{channel}/raw/daqenergy", fft_files)[0] idxs = np.where(energies.nda == 0)[0] - raw_fft = sto.read( - f"{args.channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs - )[0] + raw_fft = sto.read(f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs)[0] t1 = time.time() log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}") @@ -70,12 +72,12 @@ kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda + peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda ids = np.isin(peaks, peaks_rounded) peaks = peaks[ids] idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] - raw_cal = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0] + raw_cal = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}") if isinstance(dsp_config, (str, list)): @@ -91,7 +93,7 @@ display=1, ) if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: inplot_dict = pkl.load(r) inplot_dict.update({"dplms": plot_dict}) @@ -108,32 +110,32 @@ dplms_pars = Table(col_dict={"coefficients": Array(coeffs)}) out_dict["dplms"][ "coefficients" - ] = f"loadlh5('{args.lh5_path}', '{args.channel}/dplms/coefficients')" + ] = f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')" log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes") else: out_dict = {} dplms_pars = Table(col_dict={"coefficients": Array([])}) if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: inplot_dict = pkl.load(r) else: inplot_dict = {} db_dict.update(out_dict) -pathlib.Path(os.path.dirname(args.lh5_path)).mkdir(parents=True, exist_ok=True) +Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True) sto.write( Table(col_dict={"dplms": dplms_pars}), - name=args.channel, + name=channel, lh5_file=args.lh5_path, wo_mode="overwrite", ) -pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True) +Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.dsp_pars, db_dict) if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as f: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as f: pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index 0edf617..d4f0098 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -1,10 +1,9 @@ import argparse import logging -import os -import pathlib import pickle as pkl import time import warnings +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -27,12 +26,12 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) - argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) -argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str) argparser.add_argument("--log", help="log_file", type=str) +argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) @@ -59,6 +58,10 @@ sto = lh5.LH5Store() t0 = time.time() +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_eopt"]["inputs"]["processing_chain"][ @@ -109,12 +112,12 @@ ) peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda + peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda ids = np.isin(peaks, peaks_rounded) peaks = peaks[ids] idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] - tb_data = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0] + tb_data = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] t1 = time.time() log.info(f"Data Loaded in {(t1-t0)/60} minutes") @@ -319,51 +322,51 @@ out_alpha_dict = {} out_alpha_dict["cuspEmax_ctc"] = { "expression": "cuspEmax*(1+dt_eff*a)", - "parameters": {"a": round(bopt_cusp.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, } out_alpha_dict["cuspEftp_ctc"] = { "expression": "cuspEftp*(1+dt_eff*a)", - "parameters": {"a": round(bopt_cusp.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, } out_alpha_dict["zacEmax_ctc"] = { "expression": "zacEmax*(1+dt_eff*a)", - "parameters": {"a": round(bopt_zac.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, } out_alpha_dict["zacEftp_ctc"] = { "expression": "zacEftp*(1+dt_eff*a)", - "parameters": {"a": round(bopt_zac.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, } out_alpha_dict["trapEmax_ctc"] = { "expression": "trapEmax*(1+dt_eff*a)", - "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, } out_alpha_dict["trapEftp_ctc"] = { "expression": "trapEftp*(1+dt_eff*a)", - "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, } if "ctc_params" in db_dict: db_dict["ctc_params"].update(out_alpha_dict) else: db_dict.update({"ctc_params": out_alpha_dict}) - pathlib.Path(os.path.dirname(args.qbb_grid_path)).mkdir(parents=True, exist_ok=True) - with open(args.qbb_grid_path, "wb") as f: + Path(args.qbb_grid_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.qbb_grid_path).open("wb") as f: pkl.dump(optimisers, f) else: - pathlib.Path(args.qbb_grid_path).touch() + Path(args.qbb_grid_path).touch() -pathlib.Path(os.path.dirname(args.final_dsp_pars)).mkdir(parents=True, exist_ok=True) +Path(args.final_dsp_pars).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.final_dsp_pars, db_dict) if args.plot_path: if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: plot_dict = pkl.load(r) else: plot_dict = {} @@ -383,6 +386,6 @@ "acq_space": bopt_zac.plot_acq(init_samples=sample_x), } - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as w: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as w: pkl.dump(plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index ea2bb34..f4dfd7d 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -1,11 +1,10 @@ import argparse import json import logging -import os -import pathlib import time import warnings from bisect import bisect_left +from pathlib import Path import lgdo import lgdo.lh5 as lh5 @@ -84,10 +83,11 @@ def get_out_data( argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) - argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--raw_cal", help="raw_cal", type=str, nargs="*", required=True) argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) @@ -109,6 +109,10 @@ def get_out_data( sto = lh5.LH5Store() t0 = time.time() + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"][ @@ -121,14 +125,14 @@ def get_out_data( peak_dict = Props.read_from(peak_json) db_dict = Props.read_from(args.decay_const) - pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True) + Path(args.peak_file).parent.mkdir(parents=True, exist_ok=True) + rng = np.random.default_rng() + rand_num = f"{rng.integers(0,99999):05d}" + temp_output = f"{args.peak_file}.{rand_num}" if peak_dict.pop("run_selection") is True: log.debug("Starting peak selection") - rng = np.random.default_rng() - rand_num = f"{rng.integers(0,99999):05d}" - temp_output = f"{args.peak_file}.{rand_num}" - with open(args.raw_filelist) as f: + with Path(args.raw_filelist).open() as f: files = f.read().splitlines() raw_files = sorted(files) @@ -138,17 +142,17 @@ def get_out_data( elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, peak_dict["pulser_multiplicity_threshold"] + tcm_files, channel, peak_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" raise ValueError(msg) - raw_dict = Props.read_from(args.raw_cal)[args.channel]["pars"]["operations"] + raw_dict = Props.read_from(args.raw_cal)[channel]["pars"]["operations"] peaks_kev = peak_dict["peaks"] kev_widths = peak_dict["kev_widths"] @@ -157,7 +161,7 @@ def get_out_data( final_cut_field = peak_dict["final_cut_field"] energy_parameter = peak_dict.get("energy_parameter", "trapTmax") - lh5_path = f"{args.channel}/raw" + lh5_path = f"{channel}/raw" if not isinstance(kev_widths, list): kev_widths = [kev_widths] @@ -225,7 +229,7 @@ def get_out_data( } for file in raw_files: - log.debug(os.path.basename(file)) + log.debug(Path(file).name) for peak, peak_dict in pk_dicts.items(): if peak_dict["idxs"] is not None: # idx is a long continuous array @@ -358,7 +362,7 @@ def get_out_data( log.debug(f"{peak} has reached the required number of events") else: - pathlib.Path(temp_output).touch() + Path(temp_output).touch() log.debug(f"event selection completed in {time.time()-t0} seconds") - os.rename(temp_output, args.peak_file) + Path(temp_output).rename(args.peak_file) diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index 67ffd5f..5de3a59 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -1,9 +1,8 @@ import argparse import logging -import os -import pathlib import pickle as pkl import time +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -21,6 +20,7 @@ argparser.add_argument("--inplots", help="inplots", type=str) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -45,6 +45,10 @@ t0 = time.time() +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_nopt"]["inputs"]["processing_chain"][ @@ -57,14 +61,14 @@ db_dict = Props.read_from(args.database) if opt_dict.pop("run_nopt") is True: - with open(args.raw_filelist) as f: + with Path(args.raw_filelist).open() as f: files = f.read().splitlines() raw_files = sorted(files) - energies = sto.read(f"{args.channel}/raw/daqenergy", raw_files)[0] + energies = sto.read(f"{channel}/raw/daqenergy", raw_files)[0] idxs = np.where(energies.nda == 0)[0] - tb_data = sto.read(f"{args.channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs)[0] + tb_data = sto.read(f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs)[0] t1 = time.time() log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") @@ -73,7 +77,7 @@ cut_dict = generate_cuts(dsp_data, cut_dict=opt_dict.pop("cut_pars")) cut_idxs = get_cut_indexes(dsp_data, cut_dict) tb_data = sto.read( - f"{args.channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs] + f"{channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs] )[0] log.info(f"... {len(tb_data)} baselines after cuts") @@ -82,12 +86,10 @@ if args.plot_path: out_dict, plot_dict = pno.noise_optimization( - tb_data, dsp_config, db_dict.copy(), opt_dict, args.channel, display=1 + tb_data, dsp_config, db_dict.copy(), opt_dict, channel, display=1 ) else: - out_dict = pno.noise_optimization( - raw_files, dsp_config, db_dict.copy(), opt_dict, args.channel - ) + out_dict = pno.noise_optimization(raw_files, dsp_config, db_dict.copy(), opt_dict, channel) t2 = time.time() log.info(f"Optimiser finished in {(t2-t0)/60} minutes") @@ -96,15 +98,15 @@ plot_dict = {} if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: old_plot_dict = pkl.load(r) plot_dict = dict(noise_optimisation=plot_dict, **old_plot_dict) else: plot_dict = {"noise_optimisation": plot_dict} - with open(args.plot_path, "wb") as f: + with Path(args.plot_path).open("wb") as f: pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) -pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True) +Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.dsp_pars, dict(nopt_pars=out_dict, **db_dict)) diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm.py index 28b335e..370e320 100644 --- a/scripts/pars_dsp_svm.py +++ b/scripts/pars_dsp_svm.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path from legendmeta.catalog import Props @@ -14,7 +13,7 @@ if args.log is not None: - pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) + Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") else: logging.basicConfig(level=logging.DEBUG) @@ -27,9 +26,9 @@ par_data = Props.read_from(args.input_file) -file = f"'$_/{os.path.basename(args.svm_file)}'" +file = f"'$_/{Path(args.svm_file).name}'" par_data["svm"] = {"model_file": file} -pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) +Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.output_file, par_data) diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index c4750c6..b8d9a71 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -1,8 +1,8 @@ import argparse import logging -import os -import pathlib +import logging.config import pickle as pkl +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -14,10 +14,13 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--log", help="log file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) + argparser.add_argument("--plot_path", help="plot path", type=str, required=False) argparser.add_argument("--output_file", help="output file", type=str, required=True) @@ -27,23 +30,24 @@ argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str, required=False) args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) - sto = lh5.LH5Store() -log = logging.getLogger(__name__) configs = LegendMetadata(path=args.configs) -config_dict = configs.on(args.timestamp, system=args.datatype) -channel_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["processing_chain"][ - args.channel -] -kwarg_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["tau_config"][args.channel] +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["pars_dsp_tau"] +log_config = config_dict["options"]["logging"] + +Path(args.log).parent.mkdir(parents=True, exist_ok=True) +log_config = Props.read_from(log_config) +log_config["handlers"]["file"]["filename"] = args.log +logging.config.dictConfig(log_config) +log = logging.getLogger("test") + +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + +channel_dict = config_dict["inputs"]["processing_chain"][args.channel] +kwarg_dict = config_dict["inputs"]["tau_config"][args.channel] kwarg_dict = Props.read_from(kwarg_dict) @@ -52,7 +56,7 @@ kwarg_dict.pop("run_tau") if isinstance(args.raw_files, list) and args.raw_files[0].split(".")[-1] == "filelist": input_file = args.raw_files[0] - with open(input_file) as f: + with Path(input_file).open() as f: input_file = f.read().splitlines() else: input_file = args.raw_files @@ -63,18 +67,18 @@ elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" raise ValueError(msg) data = sto.read( - f"{args.channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"] + f"{channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"] )[0].view_as("pd") threshold = kwarg_dict.pop("threshold") @@ -90,7 +94,7 @@ cuts = np.where((data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering))[0] tb_data = sto.read( - f"{args.channel}/raw", + f"{channel}/raw", input_file, idx=cuts, n_rows=kwarg_dict.pop("n_events"), @@ -113,17 +117,17 @@ tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]]) if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) plot_dict = tau.plot_waveforms_after_correction( tb_data, "wf_pz", norm_param=kwarg_dict.get("norm_param", "pz_mean") ) plot_dict.update(tau.plot_slopes(slopes[idxs])) - with open(args.plot_path, "wb") as f: + with Path(args.plot_path).open("wb") as f: pkl.dump({"tau": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) else: out_dict = {} -pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) +Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.output_file, tau.output_dict) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index ed33f23..c30c7ef 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -2,10 +2,9 @@ import argparse import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path from typing import Callable import numpy as np @@ -16,6 +15,7 @@ from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -66,6 +66,7 @@ def aoe_calibration( dt_param: str = "dt_eff", comptBands_width: int = 20, plot_options: dict | None = None, + debug_mode: bool = False, ): data["AoE_Uncorr"] = data[current_param] / data[energy_param] aoe = CalAoE( @@ -82,6 +83,7 @@ def aoe_calibration( mean_func=mean_func, sigma_func=sigma_func, compt_bands_width=comptBands_width, + debug_mode=debug_mode | args.debug, ) aoe.update_cal_dicts( @@ -102,20 +104,25 @@ def aoe_calibration( argparser.add_argument("files", help="files", nargs="*", type=str) argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) + argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--log", help="log_file", type=str) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) argparser.add_argument("--hit_pars", help="hit_pars", type=str) argparser.add_argument("--aoe_results", help="aoe_results", type=str) + +argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -126,6 +133,10 @@ def aoe_calibration( logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_hit_aoecal" @@ -138,7 +149,7 @@ def aoe_calibration( cal_dict = ecal_dict["pars"] eres_dict = ecal_dict["results"]["ecal"] -with open(args.eres_file, "rb") as o: +with Path(args.eres_file).open("rb") as o: object_dict = pkl.load(o) if kwarg_dict["run_aoe"] is True: @@ -154,7 +165,7 @@ def aoe_calibration( for field, item in kwarg_dict["plot_options"].items(): kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) - with open(args.files[0]) as f: + with Path(args.files[0]).open() as f: files = f.read().splitlines() files = sorted(files) @@ -191,7 +202,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( files, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -206,11 +217,11 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") + tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") ) else: msg = "No pulser file or tcm filelist provided" @@ -228,6 +239,7 @@ def eres_func(x): sigma_func=sigma_func, **kwarg_dict, ) + obj.pdf = obj.pdf.name # need to change eres func as can't pickle lambdas try: @@ -242,7 +254,7 @@ def eres_func(x): if args.plot_file: common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: out_plot_dict = pkl.load(r) out_plot_dict.update({"aoe": plot_dict}) else: @@ -253,22 +265,25 @@ def eres_func(x): elif common_dict is not None: out_plot_dict["common"] = common_dict - pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True) - with open(args.plot_file, "wb") as w: + Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_file).open("wb") as w: pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) -pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True) +Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) results_dict = dict(**ecal_dict["results"], aoe=out_dict) final_hit_dict = { "pars": {"operations": cal_dict}, "results": results_dict, } + +final_hit_dict = convert_dict_np_to_float(final_hit_dict) + Props.write_to(args.hit_pars, final_hit_dict) -pathlib.Path(os.path.dirname(args.aoe_results)).mkdir(parents=True, exist_ok=True) +Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True) final_object_dict = dict( **object_dict, aoe=obj, ) -with open(args.aoe_results, "wb") as w: +with Path(args.aoe_results).open("wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index d19b427..43ba644 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -3,11 +3,10 @@ import argparse import copy import logging -import os -import pathlib import pickle as pkl import warnings from datetime import datetime +from pathlib import Path import lgdo.lh5 as lh5 import matplotlib as mpl @@ -23,6 +22,7 @@ from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data from scipy.stats import binned_statistic +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) mpl.use("agg") @@ -439,6 +439,8 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): argparser.add_argument("--plot_path", help="plot_path", type=str, required=False) argparser.add_argument("--save_path", help="save_path", type=str) argparser.add_argument("--results_path", help="results_path", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -451,8 +453,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp) + channel = f"ch{chmap[args.channel].daq.rawid:07}" - det_status = chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["usability"] + det_status = chmap[args.channel]["analysis"]["usability"] if args.in_hit_dict: hit_dict = Props.read_from(args.in_hit_dict) @@ -460,14 +463,12 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): db_files = [ par_file for par_file in args.ctc_dict - if os.path.splitext(par_file)[1] == ".json" - or os.path.splitext(par_file)[1] == ".yml" - or os.path.splitext(par_file)[1] == ".yaml" + if Path(par_file).suffix in (".json", ".yml", ".yaml") ] database_dic = Props.read_from(db_files) - hit_dict.update(database_dic[args.channel]["ctc_params"]) + hit_dict.update(database_dic[channel]["ctc_params"]) # get metadata dictionary configs = LegendMetadata(path=args.configs) @@ -491,14 +492,14 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): bl_plots[field]["function"] = eval(item["function"]) common_plots = kwarg_dict.pop("common_plots") - with open(args.files[0]) as f: + with Path(args.files[0]).open() as f: files = f.read().splitlines() files = sorted(files) # load data in data, threshold_mask = load_data( files, - f"{args.channel}/dsp", + f"{channel}/dsp", hit_dict, params=[*kwarg_dict["energy_params"], kwarg_dict["cut_param"], "timestamp", "trapTmax"], threshold=kwarg_dict["threshold"], @@ -512,11 +513,11 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -565,6 +566,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): glines, guess, kwarg_dict.get("deg", 0), + debug_mode=kwarg_dict.get("debug_mode", False) | args.debug, ) full_object_dict[cal_energy_param].hpge_get_energy_peaks( e_uncal, etol_kev=5 if det_status == "on" else 20 @@ -575,6 +577,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): glines, guess, kwarg_dict.get("deg", 0), + debug_mode=kwarg_dict.get("debug_mode", False), ) full_object_dict[cal_energy_param].hpge_get_energy_peaks( e_uncal, etol_kev=5 if det_status == "on" else 30, n_sigma=2 @@ -633,7 +636,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): hit_dict.update( { cal_energy_param.replace("_ctc", ""): { - "expression": f"where({cal_energy_param}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", + "expression": f"where({cal_energy_param.replace('ctc','noctc')}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", "parameters": {}, } } @@ -697,14 +700,14 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): if "monitoring_parameters" in kwarg_dict: monitor_dict = monitor_parameters( - files, f"{args.channel}/dsp", kwarg_dict["monitoring_parameters"] + files, f"{channel}/dsp", kwarg_dict["monitoring_parameters"] ) results_dict.update({"monitoring_parameters": monitor_dict}) # get baseline plots and save all plots to file if args.plot_path: common_dict = baseline_tracking_plots( - sorted(files), f"{args.channel}/dsp", plot_options=bl_plots + sorted(files), f"{channel}/dsp", plot_options=bl_plots ) for plot in list(common_dict): @@ -721,7 +724,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): common_dict.update({key: param_dict}) if args.inplot_dict: - with open(args.inplot_dict, "rb") as f: + with Path(args.inplot_dict).open("rb") as f: total_plot_dict = pkl.load(f) else: total_plot_dict = {} @@ -733,15 +736,15 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): total_plot_dict.update({"ecal": plot_dict}) - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as f: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as f: pkl.dump(total_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) # save output dictionary - output_dict = {"pars": hit_dict, "results": {"ecal": results_dict}} + output_dict = convert_dict_np_to_float({"pars": hit_dict, "results": {"ecal": results_dict}}) Props.write_to(args.save_path, output_dict) # save calibration objects - with open(args.results_path, "wb") as fp: - pathlib.Path(os.path.dirname(args.results_path)).mkdir(parents=True, exist_ok=True) + with Path(args.results_path).open("wb") as fp: + Path(args.results_path).parent.mkdir(parents=True, exist_ok=True) pkl.dump({"ecal": full_object_dict}, fp, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 5a0ad96..8625ed3 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -2,20 +2,21 @@ import argparse import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path import numpy as np import pandas as pd from legendmeta import LegendMetadata from legendmeta.catalog import Props from pygama.math.distributions import gaussian +from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.lq_cal import * # noqa: F403 from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -26,7 +27,7 @@ def get_results_dict(lq_class): "cal_energy_param": lq_class.cal_energy_param, "DEP_means": lq_class.timecorr_df.to_dict("index"), "rt_correction": lq_class.dt_fit_pars, - "cut_fit_pars": lq_class.cut_fit_pars, + "cut_fit_pars": lq_class.cut_fit_pars.to_dict(), "cut_value": lq_class.cut_val, "sfs": lq_class.low_side_sf.to_dict("index"), } @@ -54,6 +55,7 @@ def lq_calibration( cdf: callable = gaussian, selection_string: str = "", plot_options: dict | None = None, + debug_mode: bool = False, ): """Loads in data from the provided files and runs the LQ calibration on said files @@ -99,6 +101,7 @@ def lq_calibration( eres_func, cdf, selection_string, + debug_mode=debug_mode | args.debug, ) data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param]) @@ -127,15 +130,18 @@ def lq_calibration( argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) +argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) argparser.add_argument("--hit_pars", help="hit_pars", type=str) argparser.add_argument("--lq_results", help="lq_results", type=str) + +argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -145,6 +151,10 @@ def lq_calibration( logging.getLogger("h5py").setLevel(logging.INFO) logging.getLogger("matplotlib").setLevel(logging.INFO) +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_hit_lqcal" @@ -156,7 +166,7 @@ def lq_calibration( cal_dict = ecal_dict["pars"]["operations"] eres_dict = ecal_dict["results"]["ecal"] -with open(args.eres_file, "rb") as o: +with Path(args.eres_file).open("rb") as o: object_dict = pkl.load(o) if kwarg_dict["run_lq"] is True: @@ -168,7 +178,7 @@ def lq_calibration( for field, item in kwarg_dict["plot_options"].items(): kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) - with open(args.files[0]) as f: + with Path(args.files[0]).open() as f: files = f.read().splitlines() files = sorted(files) @@ -194,7 +204,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( files, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -209,11 +219,11 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") + tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") ) else: msg = "No pulser file or tcm filelist provided" @@ -243,7 +253,7 @@ def eres_func(x): if args.plot_file: common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: out_plot_dict = pkl.load(r) out_plot_dict.update({"lq": plot_dict}) else: @@ -254,24 +264,24 @@ def eres_func(x): elif common_dict is not None: out_plot_dict["common"] = common_dict - pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True) - with open(args.plot_file, "wb") as w: + Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_file).open("wb") as w: pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) -results_dict = dict(**eres_dict, lq=out_dict) -pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True) -final_hit_dict = { - "pars": {"operations": cal_dict}, - "results": results_dict, -} +final_hit_dict = convert_dict_np_to_float( + { + "pars": {"operations": cal_dict}, + "results": dict(**eres_dict, lq=out_dict), + } +) +Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.hit_pars, final_hit_dict) -pathlib.Path(os.path.dirname(args.lq_results)).mkdir(parents=True, exist_ok=True) final_object_dict = dict( **object_dict, lq=obj, ) -Props.write_to(args.lq_results, final_object_dict) -with open(args.lq_results, "wb") as w: +Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True) +with Path(args.lq_results).open("wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 9640087..320fee9 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -3,11 +3,10 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import numpy as np from legendmeta import LegendMetadata @@ -19,6 +18,7 @@ get_tcm_pulser_ids, ) from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) @@ -29,17 +29,26 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) + argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) + argparser.add_argument( + "--overwrite_files", + help="overwrite_files", + type=str, + required=False, + nargs="*", + ) argparser.add_argument("--configs", help="config", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) argparser.add_argument("--tier", help="tier", type=str, default="hit") - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_path", help="plot_path", type=str, required=False) argparser.add_argument("--save_path", help="save_path", type=str) args = argparser.parse_args() @@ -52,6 +61,10 @@ logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + # get metadata dictionary configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] @@ -59,19 +72,37 @@ kwarg_dict = Props.read_from(channel_dict) + if args.overwrite_files: + overwrite = Props.read_from(args.overwrite_files) + if channel in overwrite: + overwrite = overwrite[channel]["pars"]["operations"] + else: + overwrite = None + else: + overwrite = None + + if len(args.fft_files) == 1 and Path(args.fft_files[0]).suffix == ".filelist": + with Path(args.fft_files[0]).open() as f: + fft_files = f.read().splitlines() + else: + fft_files = args.fft_files + + if len(args.cal_files) == 1 and Path(args.cal_files[0]).suffix == ".filelist": + with Path(args.cal_files[0]).open() as f: + cal_files = f.read().splitlines() + else: + cal_files = args.fft_files + kwarg_dict_fft = kwarg_dict["fft_fields"] - if len(args.fft_files) > 0: + if len(fft_files) > 0: fft_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(args.fft_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(fft_files[0], f"{channel}/dsp/")], kwarg_dict_fft["cut_parameters"], ) fft_data = load_data( - args.fft_files, - f"{args.channel}/dsp", + fft_files, + f"{channel}/dsp", {}, [*fft_fields, "timestamp", "trapTmax"], ) @@ -124,31 +155,31 @@ hit_dict_fft = {} plot_dict_fft = {} + if overwrite is not None: + for name in kwarg_dict_fft["cut_parameters"]: + for cut_name, cut_dict in overwrite.items(): + if name in cut_name: + hit_dict_fft.update({cut_name: cut_dict}) + kwarg_dict_cal = kwarg_dict["cal_fields"] cut_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(args.cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], kwarg_dict_cal["cut_parameters"], ) if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] cut_fields += get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(args.cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], init_cal["cut_parameters"], ) # load data in data, threshold_mask = load_data( - args.cal_files, - f"{args.channel}/dsp", + cal_files, + f"{channel}/dsp", {}, - [*cut_fields, "timestamp", "trapTmax"], + [*cut_fields, "timestamp", "trapTmax", "t_sat_lo"], threshold=kwarg_dict_cal.get("threshold", 0), return_selection_mask=True, cal_energy_param="trapTmax", @@ -160,11 +191,11 @@ elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -202,16 +233,19 @@ for key in info.get("parameters", None): exp = re.sub(f"(? 500: + if len(data.query("is_pulser & ~is_recovering")) < 500: data = data.query("is_pulser & ~is_recovering") else: data = data.query("~is_pulser & ~is_recovering")[mask] @@ -223,13 +257,21 @@ display=1 if args.plot_path else 0, ) + if overwrite is not None: + for name in kwarg_dict_cal["cut_parameters"]: + for cut_name, cut_dict in overwrite.items(): + if name in cut_name: + hit_dict_cal.update({cut_name: cut_dict}) + hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal} plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} - pathlib.Path(os.path.dirname(args.save_path)).mkdir(parents=True, exist_ok=True) + hit_dict = convert_dict_np_to_float(hit_dict) + + Path(args.save_path).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.save_path, hit_dict) if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as f: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as f: pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index bf91d38..ca938e5 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -3,11 +3,10 @@ import argparse import copy import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path from typing import Callable import numpy as np @@ -32,7 +31,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -92,6 +91,7 @@ def aoe_calibration( dt_param: str = "dt_eff", comptBands_width: int = 20, plot_options: dict | None = None, + debug_mode: bool = False, ): data["AoE_Uncorr"] = data[current_param] / data[energy_param] aoe = CalAoE( @@ -108,6 +108,7 @@ def aoe_calibration( mean_func=mean_func, sigma_func=sigma_func, compt_bands_width=comptBands_width, + debug_mode=debug_mode | args.debug, ) aoe.update_cal_dicts( { @@ -254,15 +255,18 @@ def eres_func(x): argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--aoe_results", help="aoe_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -273,6 +277,10 @@ def eres_func(x): logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_pht_aoecal" @@ -285,33 +293,33 @@ def eres_func(x): for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run if isinstance(args.input_files, list): files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() else: - with open(args.input_files) as f: + with Path(args.input_files).open() as f: files = f.read().splitlines() files = sorted( @@ -321,7 +329,7 @@ def eres_func(x): final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -347,7 +355,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -365,11 +373,11 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -399,21 +407,21 @@ def eres_func(x): if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": cal_dict[fk.timestamp], "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) + Path(out).parent.mkdir(parents=True, exist_ok=True) Props.write_to(out, final_hit_dict) for out in args.aoe_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index 8210df7..104ad05 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -3,10 +3,9 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path import numpy as np import pandas as pd @@ -32,7 +31,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -55,17 +54,19 @@ def run_splitter(files): argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) - argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -76,34 +77,38 @@ def run_splitter(files): logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + cal_dict = {} results_dicts = {} for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() files = sorted( @@ -113,7 +118,7 @@ def run_splitter(files): final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -166,7 +171,7 @@ def run_splitter(files): # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict["threshold"], @@ -177,7 +182,7 @@ def run_splitter(files): if args.pulser_files: mask = np.array([], dtype=bool) for file in args.pulser_files: - with open(file) as f: + with Path(file).open() as f: pulser_dict = json.load(f) pulser_mask = np.array(pulser_dict["mask"]) mask = np.append(mask, pulser_mask) @@ -186,11 +191,11 @@ def run_splitter(files): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -212,7 +217,7 @@ def run_splitter(files): object_dict, inplots_dict, args.timestamp, - args.metadata, + chmap, args.configs, args.channel, args.datatype, @@ -247,22 +252,22 @@ def run_splitter(files): if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": {"operations": cal_dict[fk.timestamp]}, "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "w") as w: + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("w") as w: json.dump(final_hit_dict, w, indent=4) for out in args.fit_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 3d5915e..2c67745 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -4,16 +4,16 @@ import copy import json import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path import numpy as np import pandas as pd from legendmeta import LegendMetadata from legendmeta.catalog import Props from pygama.math.distributions import gaussian +from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.lq_cal import * # noqa: F403 from pygama.pargen.lq_cal import LQCal @@ -32,7 +32,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -75,6 +75,7 @@ def lq_calibration( cdf: callable = gaussian, selection_string: str = "", plot_options: dict | None = None, + debug_mode: bool = False, ): """Loads in data from the provided files and runs the LQ calibration on said files @@ -119,6 +120,7 @@ def lq_calibration( eres_func, cdf, selection_string, + debug_mode=debug_mode | args.debug, ) data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param]) @@ -250,15 +252,18 @@ def eres_func(x): argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--lq_results", help="lq_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -269,6 +274,10 @@ def eres_func(x): logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_pht_lqcal" @@ -281,33 +290,33 @@ def eres_func(x): for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run if isinstance(args.input_files, list): files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() else: - with open(args.input_files) as f: + with Path(args.input_files).open() as f: files = f.read().splitlines() files = sorted( @@ -317,7 +326,7 @@ def eres_func(x): final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -334,7 +343,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -344,7 +353,7 @@ def eres_func(x): if args.pulser_files: mask = np.array([], dtype=bool) for file in args.pulser_files: - with open(file) as f: + with Path(file).open() as f: pulser_dict = json.load(f) pulser_mask = np.array(pulser_dict["mask"]) mask = np.append(mask, pulser_mask) @@ -353,11 +362,11 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -387,22 +396,22 @@ def eres_func(x): if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": {"operations": cal_dict[fk.timestamp]}, "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "w") as w: + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("w") as w: json.dump(final_hit_dict, w, indent=4) for out in args.lq_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 0d74ac8..a454d76 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -3,11 +3,10 @@ import argparse import copy import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import numpy as np import pandas as pd @@ -34,7 +33,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -143,18 +142,14 @@ def calibrate_partition( object_dicts, plot_dicts, timestamp, - metadata_path, + chmap, configs, channel, datatype, gen_plots=True, ): - # load metadata - meta = LegendMetadata(path=metadata_path) - chmap = meta.channelmap(timestamp) - - det_status = chmap.map("daq.rawid")[int(channel[2:])]["analysis"]["usability"] + det_status = chmap[channel]["analysis"]["usability"] configs = LegendMetadata(path=configs) channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"]["pars_pht_partcal"][ @@ -171,34 +166,34 @@ def calibrate_partition( # calibrate pk_pars = [ - # (238.632, (10, 10), pgf.gauss_on_step), #double line - # (241.0, (10, 10), pgf.gauss_on_step), #double line - (277.371, (10, 7), pgf.gauss_on_linear), - (288.2, (7, 10), pgf.gauss_on_linear), - (300.1, (10, 10), pgf.gauss_on_linear), - (453.0, (10, 10), pgf.gauss_on_linear), - # (511, (20, 20), pgf.gauss_on_step), double line - (549.8, (10, 10), pgf.gauss_on_linear), - (583.187, (20, 20), pgf.hpge_peak), - (727.330, (20, 20), pgf.hpge_peak), - (763.13, (20, 10), pgf.gauss_on_linear), - (785.37, (10, 20), pgf.gauss_on_linear), - (860.557, (20, 20), pgf.hpge_peak), - (893.408, (20, 20), pgf.gauss_on_linear), - (927.6, (20, 20), pgf.gauss_on_linear), - (952.120, (20, 20), pgf.gauss_on_linear), - (982.7, (20, 20), pgf.gauss_on_linear), - (1078.62, (20, 7), pgf.gauss_on_linear), - (1093.9, (7, 20), pgf.gauss_on_linear), - (1512.7, (20, 20), pgf.gauss_on_linear), - (1592.511, (20, 20), pgf.hpge_peak), - (1620.50, (20, 20), pgf.hpge_peak), - (1679.7, (20, 20), pgf.gauss_on_linear), - (1806.0, (20, 20), pgf.gauss_on_linear), - (2103.511, (20, 20), pgf.hpge_peak), - (2614.511, (40, 20), pgf.hpge_peak), - (3125.511, (20, 20), pgf.gauss_on_linear), - (3197.7, (20, 20), pgf.gauss_on_linear), + # (238.632, (10, 10), pgf.gauss_on_step), #double line, Pb-212 + # (240.986, (10, 10), pgf.gauss_on_step), #double line, Ra-224 + (277.371, (10, 7), pgf.gauss_on_linear), # Tl-208 + (288.2, (7, 10), pgf.gauss_on_linear), # Bi-212 + (300.087, (10, 10), pgf.gauss_on_linear), # Pb-212 + (452.98, (10, 10), pgf.gauss_on_linear), # Bi-212 + # (511, (20, 20), pgf.gauss_on_step), double line, #e+e- + (549.73, (10, 10), pgf.gauss_on_linear), # Rn-220 + (583.187, (20, 20), pgf.hpge_peak), # Tl-208 + (727.330, (20, 20), pgf.hpge_peak), # Bi-212 + (763.13, (20, 10), pgf.gauss_on_linear), # Tl-208 + (785.37, (10, 20), pgf.gauss_on_linear), # Bi-212 + (860.557, (20, 20), pgf.hpge_peak), # Tl-208 + (893.408, (20, 20), pgf.gauss_on_linear), # Bi-212 + (927.6, (20, 20), pgf.gauss_on_linear), # Tl-208 + (952.120, (20, 20), pgf.gauss_on_linear), # Bi-212 + (982.7, (20, 20), pgf.gauss_on_linear), # Tl-208 + (1078.62, (20, 7), pgf.gauss_on_linear), # Bi-212 + (1093.9, (7, 20), pgf.gauss_on_linear), # Tl-208 + (1512.7, (20, 20), pgf.gauss_on_linear), # Bi-212 + (1592.511, (20, 20), pgf.hpge_peak), # Tl-208 DEP + (1620.50, (20, 20), pgf.hpge_peak), # Bi-212 + (1679.7, (20, 20), pgf.gauss_on_linear), # Bi-212 + (1806.0, (20, 20), pgf.gauss_on_linear), # Bi-212 + (2103.511, (20, 20), pgf.hpge_peak), # Tl-208 SEP + (2614.511, (40, 20), pgf.hpge_peak), # Tl-208 + (3125.511, (20, 20), pgf.gauss_on_linear), # Summation + (3197.7, (20, 20), pgf.gauss_on_linear), # Summation (3475.1, (20, 20), pgf.gauss_on_linear), ] @@ -218,7 +213,11 @@ def calibrate_partition( for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params): energy = data.query(selection_string)[energy_param].to_numpy() full_object_dict[cal_energy_param] = HPGeCalibration( - energy_param, glines, 1, kwarg_dict.get("deg", 0) # , fixed={1: 1} + energy_param, + glines, + 1, + kwarg_dict.get("deg", 0), + debug_mode=kwarg_dict.get("debug_mode", False) | args.debug, # , fixed={1: 1} ) full_object_dict[cal_energy_param].hpge_get_energy_peaks( energy, @@ -309,7 +308,7 @@ def calibrate_partition( cal_dicts, { cal_energy_param.replace("_ctc", ""): { - "expression": f"where({cal_energy_param}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", + "expression": f"where({cal_energy_param.replace('ctc', 'noctc')}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", "parameters": {}, } }, @@ -415,17 +414,19 @@ def calibrate_partition( argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) - argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -436,34 +437,38 @@ def calibrate_partition( logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + cal_dict = {} results_dicts = {} for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() files = sorted( @@ -473,7 +478,7 @@ def calibrate_partition( final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -493,7 +498,7 @@ def calibrate_partition( # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict["threshold"], @@ -512,11 +517,11 @@ def calibrate_partition( elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -538,7 +543,7 @@ def calibrate_partition( object_dict, inplots_dict, timestamp, - args.metadata, + chmap, args.configs, args.channel, args.datatype, @@ -547,21 +552,21 @@ def calibrate_partition( if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": cal_dict[fk.timestamp], "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) + Path(out).parent.mkdir(parents=True, exist_ok=True) Props.write_to(out, final_hit_dict) for out in args.fit_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index f62da8b..495c87b 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -3,11 +3,10 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import numpy as np from legendmeta import LegendMetadata @@ -19,6 +18,7 @@ get_tcm_pulser_ids, ) from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) @@ -29,6 +29,7 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) + argparser.add_argument( "--tcm_filelist", help="tcm_filelist", nargs="*", type=str, required=False ) @@ -40,12 +41,13 @@ ) argparser.add_argument("--configs", help="config", type=str, required=True) + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False) argparser.add_argument( "--save_path", @@ -63,6 +65,10 @@ logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + # get metadata dictionary configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] @@ -72,10 +78,10 @@ if isinstance(args.cal_files, list): cal_files = [] for file in args.cal_files: - with open(file) as f: + with Path(file).open() as f: cal_files += f.read().splitlines() else: - with open(args.cal_files) as f: + with Path(args.cal_files).open() as f: cal_files = f.read().splitlines() cal_files = sorted( @@ -86,8 +92,8 @@ if args.overwrite_files: overwrite = Props.read_from(args.overwrite_files) - if args.channel in overwrite: - overwrite = overwrite[args.channel]["pars"]["operations"] + if channel in overwrite: + overwrite = overwrite[channel]["pars"]["operations"] else: overwrite = None else: @@ -99,10 +105,10 @@ if isinstance(args.fft_files, list): fft_files = [] for file in args.fft_files: - with open(file) as f: + with Path(file).open() as f: fft_files += f.read().splitlines() else: - with open(args.fft_files) as f: + with Path(args.fft_files).open() as f: fft_files = f.read().splitlines() fft_files = sorted( @@ -112,15 +118,15 @@ if len(fft_files) > 0: fft_fields = get_keys( [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(fft_files[0], f"{args.channel}/dsp/") + key.replace(f"{channel}/dsp/", "") + for key in ls(fft_files[0], f"{channel}/dsp/") ], kwarg_dict_fft["cut_parameters"], ) fft_data = load_data( fft_files, - f"{args.channel}/dsp", + f"{channel}/dsp", {}, [*fft_fields, "timestamp", "trapTmax", "t_sat_lo"], ) @@ -185,26 +191,20 @@ kwarg_dict_cal = kwarg_dict["cal_fields"] cut_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], kwarg_dict_cal["cut_parameters"], ) if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] cut_fields += get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], init_cal["cut_parameters"], ) # load data in data, threshold_mask = load_data( cal_files, - f"{args.channel}/dsp", + f"{channel}/dsp", {}, [*cut_fields, "timestamp", "trapTmax", "t_sat_lo"], threshold=kwarg_dict_cal.get("threshold", 0), @@ -223,11 +223,11 @@ elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, total_mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -304,12 +304,14 @@ hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal} plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} + hit_dict = convert_dict_np_to_float(hit_dict) + for file in args.save_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) + Path(file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(file, hit_dict) if args.plot_path: for file in args.plot_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) - with open(file, "wb") as f: + Path(file).parent.mkdir(parents=True, exist_ok=True) + with Path(file).open("wb") as f: pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index 10af322..4f87afb 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -3,11 +3,10 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -18,6 +17,7 @@ generate_cut_classifiers, get_keys, ) +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) @@ -29,12 +29,13 @@ argparser.add_argument("--phy_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--configs", help="config", type=str, required=True) + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False) argparser.add_argument( "--save_path", @@ -52,6 +53,10 @@ logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + # get metadata dictionary configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] @@ -64,7 +69,7 @@ if isinstance(args.phy_files, list): phy_files = [] for file in sorted(args.phy_files): - with open(file) as f: + with Path(file).open() as f: run_files = f.read().splitlines() if len(run_files) == 0: continue @@ -78,7 +83,7 @@ ) bl_mask = np.append(bl_mask, bl_idxs) else: - with open(args.phy_files) as f: + with Path(args.phy_files).open() as f: phy_files = f.read().splitlines() phy_files = sorted(np.unique(phy_files)) bls = sto.read("ch1027200/dsp/", phy_files, field_mask=["wf_max", "bl_mean"])[0] @@ -89,15 +94,12 @@ kwarg_dict_fft = kwarg_dict["fft_fields"] cut_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(phy_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(phy_files[0], f"{channel}/dsp/")], kwarg_dict_fft["cut_parameters"], ) data = sto.read( - f"{args.channel}/dsp/", + f"{channel}/dsp/", phy_files, field_mask=[*cut_fields, "daqenergy", "t_sat_lo", "timestamp"], idx=np.where(bl_mask)[0], @@ -146,12 +148,14 @@ log.debug("fft cuts applied") log.debug(f"cut_dict is: {json.dumps(hit_dict, indent=2)}") + hit_dict = convert_dict_np_to_float(hit_dict) + for file in args.save_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) + Path(file).name.mkdir(parents=True, exist_ok=True) Props.write_to(file, {"pars": {"operations": hit_dict}}) if args.plot_path: for file in args.plot_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) - with open(file, "wb") as f: + Path(file).parent.mkdir(parents=True, exist_ok=True) + with Path(file).open("wb") as f: pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index f72a04a..9e6ad42 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -11,6 +10,7 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--log", help="log file", type=str) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -37,19 +37,21 @@ config_dict = configs.on(args.timestamp, system=args.datatype) kwarg_dict = config_dict["snakemake_rules"]["pars_tcm_pulser"]["inputs"]["pulser_config"] +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid}" + kwarg_dict = Props.read_from(kwarg_dict) if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist": tcm_files = args.tcm_files[0] - with open(tcm_files) as f: + with Path(tcm_files).open() as f: tcm_files = f.read().splitlines() else: tcm_files = args.tcm_files # get pulser mask from tcm files tcm_files = sorted(np.unique(tcm_files)) -ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") -) +ids, mask = get_tcm_pulser_ids(tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")) -pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True) +Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()}) diff --git a/scripts/util/CalibCatalog.py b/scripts/util/CalibCatalog.py deleted file mode 100644 index b222c5d..0000000 --- a/scripts/util/CalibCatalog.py +++ /dev/null @@ -1,128 +0,0 @@ -# -# Copyright (C) 2015 Oliver Schulz -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" -This module stores the scripts for leading validity files based on timestamp and system -""" - -import bisect -import collections -import copy -import json -import types -from collections import namedtuple -from pathlib import Path - -from .utils import unix_time - - -class Props: - @staticmethod - def read_from(sources): - def read_impl(sources): - if isinstance(sources, (str, Path)): - file_name = sources - with open(file_name) as file: - return json.load(file) - elif isinstance(sources, list): - result = {} - for p in map(read_impl, sources): - Props.add_to(result, p) - return result - else: - msg = f"Can't run Props.read_from on sources-value of type {type(sources)}" - raise ValueError(msg) - - return read_impl(sources) - - @staticmethod - def add_to(props_a, props_b): - a = props_a - b = props_b - - for key in b: - if key in a: - if isinstance(a[key], dict) and isinstance(b[key], dict): - Props.add_to(a[key], b[key]) - elif a[key] != b[key]: - a[key] = copy.copy(b[key]) - else: - a[key] = copy.copy(b[key]) - - -class PropsStream: - @staticmethod - def get(value): - if isinstance(value, (str, Path)): - return PropsStream.read_from(value) - elif isinstance(value, (collections.abc.Sequence, types.GeneratorType)): - return value - else: - msg = f"Can't get PropsStream from value of type {type(value)}" - raise ValueError(msg) - - @staticmethod - def read_from(file_name): - with open(file_name) as file: - for json_str in file: - yield json.loads(json_str) - - -class CalibCatalog(namedtuple("CalibCatalog", ["entries"])): - __slots__ = () - - class Entry(namedtuple("Entry", ["valid_from", "file"])): - __slots__ = () - - @staticmethod - def read_from(file_name): - entries = {} - - for props in PropsStream.get(file_name): - timestamp = props["valid_from"] - system = "all" if props.get("category") is None else props["category"] - file_key = props["apply"] - if system not in entries: - entries[system] = [] - entries[system].append(CalibCatalog.Entry(unix_time(timestamp), file_key)) - - for system in entries: - entries[system] = sorted(entries[system], key=lambda entry: entry.valid_from) - return CalibCatalog(entries) - - def calib_for(self, timestamp, category="all", allow_none=False): - if category in self.entries: - valid_from = [entry.valid_from for entry in self.entries[category]] - pos = bisect.bisect_right(valid_from, unix_time(timestamp)) - if pos > 0: - return self.entries[category][pos - 1].file - else: - if allow_none: - return None - else: - msg = f"No valid calibration found for timestamp: {timestamp}, category: {category}" - raise RuntimeError(msg) - else: - if allow_none: - return None - else: - msg = f"No calibrations found for category: {category}" - raise RuntimeError(msg) - - @staticmethod - def get_calib_files(catalog_file, timestamp, category="all"): - catalog = CalibCatalog.read_from(catalog_file) - return CalibCatalog.calib_for(catalog, timestamp, category) diff --git a/scripts/util/FileKey.py b/scripts/util/FileKey.py index 5c01f97..ca4573c 100644 --- a/scripts/util/FileKey.py +++ b/scripts/util/FileKey.py @@ -2,9 +2,9 @@ This module contains classes to convert between keys and files using the patterns defined in patterns.py """ -import os import re from collections import namedtuple +from pathlib import Path import snakemake as smk @@ -57,6 +57,8 @@ def get_filekey_from_pattern(cls, filename, pattern=None): except AttributeError: key_pattern_rx = re.compile(smk.io.regex(cls.key_pattern)) else: + if isinstance(pattern, Path): + pattern = pattern.as_posix() try: key_pattern_rx = re.compile(smk.io.regex_from_filepattern(pattern)) except AttributeError: @@ -92,6 +94,8 @@ def parse_keypart(cls, keypart): return cls(**d) def get_path_from_filekey(self, pattern, **kwargs): + if isinstance(pattern, Path): + pattern = pattern.as_posix() if kwargs is None: return smk.io.expand(pattern, **self._asdict()) else: @@ -163,6 +167,8 @@ def name(self): return f"{super().name}-{self.processing_step}" def get_path_from_filekey(self, pattern, **kwargs): + if isinstance(pattern, Path): + pattern = pattern.as_posix() if not isinstance(pattern, str): pattern = pattern(self.tier, self.identifier) if kwargs is None: @@ -198,6 +204,8 @@ def _asdict(self): @staticmethod def get_channel_files(keypart, par_pattern, chan_list): + if isinstance(par_pattern, Path): + par_pattern = par_pattern.as_posix() d = ChannelProcKey.parse_keypart(keypart) filenames = [] for chan in chan_list: @@ -216,7 +224,7 @@ def per_grouper(files): pers = [] per_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.experiment}-{fk.period}" not in pers: pers.append(f"{fk.experiment}-{fk.period}") per_files.append([]) @@ -231,7 +239,7 @@ def run_grouper(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.experiment}-{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.experiment}-{fk.period}-{fk.run}") run_files.append([]) diff --git a/scripts/util/__init__.py b/scripts/util/__init__.py index 90b7204..caa4dd2 100644 --- a/scripts/util/__init__.py +++ b/scripts/util/__init__.py @@ -1,8 +1,8 @@ -from .CalibCatalog import CalibCatalog, Props, PropsStream -from .create_pars_keylist import pars_key_resolve -from .dataset_cal import dataset_file +from .cal_grouping import CalGrouping +from .catalog import Catalog, Props, PropsStream +from .create_pars_keylist import ParsKeyResolve from .FileKey import ChannelProcKey, FileKey, ProcessingFileKey -from .pars_loading import pars_catalog +from .pars_loading import ParsCatalog from .utils import ( runcmd, subst_vars, @@ -14,13 +14,13 @@ __all__ = [ "Props", "PropsStream", - "CalibCatalog", - "pars_key_resolve", - "dataset_file", + "Catalog", + "ParsKeyResolve", + "CalGrouping", "FileKey", "ProcessingFileKey", "ChannelProcKey", - "pars_catalog", + "ParsCatalog", "unix_time", "runcmd", "subst_vars_impl", diff --git a/scripts/util/dataset_cal.py b/scripts/util/cal_grouping.py similarity index 74% rename from scripts/util/dataset_cal.py rename to scripts/util/cal_grouping.py index 693e934..e41d5c7 100644 --- a/scripts/util/dataset_cal.py +++ b/scripts/util/cal_grouping.py @@ -3,23 +3,41 @@ """ import json -import os +from pathlib import Path + +import yaml from .FileKey import ChannelProcKey, ProcessingFileKey from .patterns import ( get_pattern_log_channel, + get_pattern_pars, get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, ) from .utils import filelist_path -class dataset_file: +class CalGrouping: def __init__(self, setup, input_file): - with open(input_file) as r: - self.datasets = json.load(r) + if Path(input_file).suffix == ".json": + with Path(input_file).open() as r: + self.datasets = json.load(r) + elif Path(input_file).suffix in (".yaml", ".yml"): + with Path(input_file).open() as r: + self.datasets = yaml.safe_load(r) + self.expand_runs() self.setup = setup + def expand_runs(self): + for channel, chan_dict in self.datasets.items(): + for part, part_dict in chan_dict.items(): + for per, runs in part_dict.items(): + if isinstance(runs, str) and ".." in runs: + start, end = runs.split("..") + self.datasets[channel][part][per] = [ + f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1) + ] + def get_dataset(self, dataset, channel): partition_dict = self.datasets["default"].copy() if channel in self.datasets: @@ -32,17 +50,13 @@ def get_filelists(self, dataset, channel, tier, experiment="l200", datatype="cal for per in dataset: if dataset[per] == "all": files += [ - os.path.join( - filelist_path(self.setup), - f"all-{experiment}-{per}-*-{datatype}-{tier}.filelist", - ) + Path(filelist_path(self.setup)) + / f"all-{experiment}-{per}-*-{datatype}-{tier}.filelist" ] else: files += [ - os.path.join( - filelist_path(self.setup), - f"all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist", - ) + Path(filelist_path(self.setup)) + / f"all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist" for run in dataset[per] ] return files @@ -56,20 +70,25 @@ def get_par_files( experiment="l200", datatype="cal", name=None, - extension="json", + extension="yaml", ): dataset = self.get_dataset(dataset, channel) all_par_files = [] for item in catalog: par_files = item.apply for par_file in par_files: - if par_file.split("-")[-1] == f"par_{tier}.json": + if ( + par_file.split("-")[-1] + == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split( + "-" + )[-1] + ): all_par_files.append(par_file) if channel == "default": channel = "{channel}" selected_par_files = [] for par_file in all_par_files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(par_file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name) if ( fk.datatype == datatype and fk.experiment == experiment @@ -111,13 +130,18 @@ def get_plt_files( for item in catalog: par_files = item.apply for par_file in par_files: - if par_file.split("-")[-1] == f"par_{tier}.json": + if ( + par_file.split("-")[-1] + == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split( + "-" + )[-1] + ): all_par_files.append(par_file) if channel == "default": channel = "{channel}" selected_par_files = [] for par_file in all_par_files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(par_file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name) if ( fk.datatype == datatype and fk.experiment == experiment @@ -159,7 +183,7 @@ def get_log_file( datatype=datatype, name=name, ) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(par_files[0])) + fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) if channel == "default": fk.channel = "{channel}" else: @@ -176,7 +200,7 @@ def get_timestamp(self, catalog, dataset, channel, tier, experiment="l200", data datatype=datatype, name=None, ) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(par_files[0])) + fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) return fk.timestamp def get_wildcard_constraints(self, dataset, channel): @@ -195,6 +219,6 @@ def get_wildcard_constraints(self, dataset, channel): out_string = "" for channel in exclude_chans: out_string += f"(?!{channel})" - return out_string + r"ch\d{7}" + return out_string + r"^[VPCB]\d{1}\w{5}$" else: - return r"ch\d{7}" + return r"^[VPCB]\d{1}\w{5}$" diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py new file mode 100644 index 0000000..9ec9b80 --- /dev/null +++ b/scripts/util/catalog.py @@ -0,0 +1,191 @@ +# +# Copyright (C) 2015 Oliver Schulz +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +This module stores the scripts for leading validity files based on timestamp and system +""" + +import bisect +import collections +import copy +import json +import types +from collections import namedtuple +from pathlib import Path + +import yaml + +from .utils import unix_time + + +class Props: + @staticmethod + def read_from(sources): + def read_impl(sources): + if isinstance(sources, (str, Path)): + file_name = sources + if isinstance(file_name, str): + file_name = Path(file_name) + if file_name.suffix in (".yaml", ".yml"): + with file_name.open() as file: + return yaml.safe_load(file) + elif file_name.suffix == ".json": + with file_name.open() as file: + return json.load(file) + else: + msg = f"Can't run Props.read_from on file with suffix {file_name.suffix}" + raise ValueError(msg) + elif isinstance(sources, list): + result = {} + for p in map(read_impl, sources): + Props.add_to(result, p) + return result + else: + msg = f"Can't run Props.read_from on sources-value of type {type(sources)}" + raise ValueError(msg) + + return read_impl(sources) + + @staticmethod + def add_to(props_a, props_b): + a = props_a + b = props_b + + for key in b: + if key in a: + if isinstance(a[key], dict) and isinstance(b[key], dict): + Props.add_to(a[key], b[key]) + elif a[key] != b[key]: + a[key] = copy.copy(b[key]) + else: + a[key] = copy.copy(b[key]) + + +class PropsStream: + """Simple class to control loading of validity.yaml files""" + + @staticmethod + def get(value): + if isinstance(value, (str, Path)): + return PropsStream.read_from(value) + + if isinstance(value, (collections.abc.Sequence, types.GeneratorType)): + return value + + msg = f"Can't get PropsStream from value of type {type(value)}" + raise ValueError(msg) + + @staticmethod + def read_from(file_name): + with Path(file_name).open() as r: + file = yaml.safe_load(r) + file = sorted(file, key=lambda item: unix_time(item["valid_from"])) + yield from file + + +class Catalog(namedtuple("Catalog", ["entries"])): + """Implementation of the `YAML metadata validity specification `_.""" + + __slots__ = () + + class Entry(namedtuple("Entry", ["valid_from", "file"])): + __slots__ = () + + @staticmethod + def get(value): + if isinstance(value, Catalog): + return value + + if isinstance(value, str): + return Catalog.read_from(value) + + msg = f"Can't get Catalog from value of type {type(value)}" + raise ValueError(msg) + + @staticmethod + def read_from(file_name): + """Read from a valdiity YAML file and build a Catalog object""" + entries = {} + for props in PropsStream.get(file_name): + timestamp = props["valid_from"] + system = "all" if props.get("category") is None else props["category"] + file_key = props["apply"] + if system not in entries: + entries[system] = [] + mode = "append" if props.get("mode") is None else props["mode"] + mode = "reset" if len(entries[system]) == 0 else mode + if mode == "reset": + new = file_key + elif mode == "append": + new = entries[system][-1].file.copy() + file_key + elif mode == "remove": + new = entries[system][-1].file.copy() + for file in file_key: + new.remove(file) + elif mode == "replace": + new = entries[system][-1].file.copy() + if len(file_key) != 2: + msg = f"Invalid number of elements in replace mode: {len(file_key)}" + raise ValueError(msg) + new.remove(file_key[0]) + new += [file_key[1]] + + else: + msg = f"Unknown mode for {timestamp}" + raise ValueError(msg) + + if timestamp in [entry.valid_from for entry in entries[system]]: + msg = ( + f"Duplicate timestamp: {timestamp}, use reset mode instead with a single entry" + ) + raise ValueError(msg) + entries[system].append(Catalog.Entry(unix_time(timestamp), new)) + + for system in entries: + entries[system] = sorted(entries[system], key=lambda entry: entry.valid_from) + return Catalog(entries) + + def valid_for(self, timestamp, system="all", allow_none=False): + """Get the valid entries for a given timestamp and system""" + if system in self.entries: + valid_from = [entry.valid_from for entry in self.entries[system]] + pos = bisect.bisect_right(valid_from, unix_time(timestamp)) + if pos > 0: + return self.entries[system][pos - 1].file + + if system != "all": + return self.valid_for(timestamp, system="all", allow_none=allow_none) + + if allow_none: + return None + + msg = f"No valid entries found for timestamp: {timestamp}, system: {system}" + raise RuntimeError(msg) + + if system != "all": + return self.valid_for(timestamp, system="all", allow_none=allow_none) + + if allow_none: + return None + + msg = f"No entries found for system: {system}" + raise RuntimeError(msg) + + @staticmethod + def get_files(catalog_file, timestamp, category="all"): + """Helper function to get the files for a given timestamp and category""" + catalog = Catalog.read_from(catalog_file) + return Catalog.valid_for(catalog, timestamp, category) diff --git a/scripts/util/convert_np.py b/scripts/util/convert_np.py new file mode 100644 index 0000000..cdc363c --- /dev/null +++ b/scripts/util/convert_np.py @@ -0,0 +1,14 @@ +import numpy as np + + +def convert_dict_np_to_float(dic): + for key in dic: + if isinstance(dic[key], dict): + convert_dict_np_to_float(dic[key]) + elif isinstance(dic[key], (np.float32, np.float64)): + dic[key] = float(dic[key]) + elif isinstance(dic[key], (list, tuple)): + dic[key] = [ + float(x) if isinstance(x, (np.float32, np.float64)) else x for x in dic[key] + ] + return dic diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py index 88720ae..c3e1f22 100644 --- a/scripts/util/create_pars_keylist.py +++ b/scripts/util/create_pars_keylist.py @@ -2,24 +2,24 @@ This module creates the validity files used for determining the time validity of data """ -import glob import json import re import warnings -from typing import ClassVar +from pathlib import Path import snakemake as smk +import yaml from .FileKey import FileKey, ProcessingFileKey from .patterns import par_validity_pattern -class pars_key_resolve: - name_dict: ClassVar[dict] = {"cal": ["par_dsp", "par_hit"], "lar": ["par_dsp", "par_hit"]} +class ParsKeyResolve: def __init__(self, valid_from, category, apply): self.valid_from = valid_from self.category = category + self.mode = "reset" self.apply = apply def __str__(self): @@ -34,16 +34,21 @@ def from_filekey(cls, filekey, name_dict): filekey.timestamp, "all", filekey.get_path_from_filekey( - par_validity_pattern(), processing_step=name_dict, ext="json" + par_validity_pattern(), processing_step=name_dict, ext="yaml" ), ) @staticmethod def write_to_jsonl(file_names, path): - with open(path, "w") as of: + with Path(path).open("w") as of: for file_name in file_names: of.write(f"{file_name.get_json()}\n") + @staticmethod + def write_to_yaml(file_names, path): + with Path(path).open("w") as of: + yaml.dump([file_name.__dict__ for file_name in file_names], of, sort_keys=False) + @staticmethod def match_keys(key1, key2): if ( @@ -65,7 +70,7 @@ def generate_par_keylist(keys): keys = sorted(keys, key=FileKey.get_unix_timestamp) keylist.append(keys[0]) for key in keys[1:]: - matched_key = pars_key_resolve.match_keys(keylist[-1], key) + matched_key = ParsKeyResolve.match_keys(keylist[-1], key) if matched_key not in keylist: keylist.append(matched_key) else: @@ -84,10 +89,10 @@ def match_entries(entry1, entry2): @staticmethod def match_all_entries(entrylist, name_dict): out_list = [] - out_list.append(pars_key_resolve.from_filekey(entrylist[0], name_dict)) + out_list.append(ParsKeyResolve.from_filekey(entrylist[0], name_dict)) for entry in entrylist[1:]: - new_entry = pars_key_resolve.from_filekey(entry, name_dict) - pars_key_resolve.match_entries(out_list[-1], new_entry) + new_entry = ParsKeyResolve.from_filekey(entry, name_dict) + ParsKeyResolve.match_entries(out_list[-1], new_entry) out_list.append(new_entry) return out_list @@ -95,14 +100,17 @@ def match_all_entries(entrylist, name_dict): def get_keys(keypart, search_pattern): d = FileKey.parse_keypart(keypart) try: - tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(search_pattern)) + tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern))) + except AttributeError: - tier_pattern_rx = re.compile(smk.io.regex(search_pattern)) + tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern))) fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0] - files = glob.glob(fn_glob_pattern) + p = Path(fn_glob_pattern) + parts = p.parts[p.is_absolute() :] + files = Path(p.root).glob(str(Path(*parts))) keys = [] for f in files: - m = tier_pattern_rx.match(f) + m = tier_pattern_rx.match(str(f)) if m is not None: d = m.groupdict() key = FileKey(**d) @@ -113,19 +121,19 @@ def get_keys(keypart, search_pattern): def get_par_catalog(keypart, search_patterns, name_dict): if isinstance(keypart, str): keypart = [keypart] - if isinstance(search_patterns, str): + if isinstance(search_patterns, (str, Path)): search_patterns = [search_patterns] keylist = [] for search_pattern in search_patterns: for keypar in keypart: - keylist += pars_key_resolve.get_keys(keypar, search_pattern) + keylist += ParsKeyResolve.get_keys(keypar, search_pattern) if len(keylist) != 0: keys = sorted(keylist, key=FileKey.get_unix_timestamp) - keylist = pars_key_resolve.generate_par_keylist(keys) + keylist = ParsKeyResolve.generate_par_keylist(keys) - entrylist = pars_key_resolve.match_all_entries(keylist, name_dict) + entrylist = ParsKeyResolve.match_all_entries(keylist, name_dict) else: msg = "No Keys found" warnings.warn(msg, stacklevel=0) - entrylist = [pars_key_resolve("00000000T000000Z", "all", [])] + entrylist = [ParsKeyResolve("00000000T000000Z", "all", [])] return entrylist diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py index 03f242e..137ae03 100644 --- a/scripts/util/pars_loading.py +++ b/scripts/util/pars_loading.py @@ -3,16 +3,16 @@ to determine the par and par overwrite for a particular timestamp """ -import os +from pathlib import Path -from .CalibCatalog import CalibCatalog +from .catalog import Catalog from .FileKey import ProcessingFileKey # from .patterns import from .utils import get_pars_path, par_overwrite_path -class pars_catalog(CalibCatalog): +class ParsCatalog(Catalog): @staticmethod def match_pars_files(filelist1, filelist2): for file2 in filelist2: @@ -29,19 +29,18 @@ def match_pars_files(filelist1, filelist2): @staticmethod def get_par_file(setup, timestamp, tier): - par_file = os.path.join(get_pars_path(setup, tier), "validity.jsonl") - pars_files = pars_catalog.get_calib_files(par_file, timestamp) - par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl") - pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp) + par_file = Path(get_pars_path(setup, tier)) / "validity.yaml" + pars_files = ParsCatalog.get_files(par_file, timestamp) + par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" + pars_files_overwrite = ParsCatalog.get_files(par_overwrite_file, timestamp) if len(pars_files_overwrite) > 0: - pars_files, pars_files_overwrite = pars_catalog.match_pars_files( + pars_files, pars_files_overwrite = ParsCatalog.match_pars_files( pars_files, pars_files_overwrite ) - pars_files = [os.path.join(get_pars_path(setup, tier), file) for file in pars_files] + pars_files = [Path(get_pars_path(setup, tier)) / file for file in pars_files] if len(pars_files_overwrite) > 0: pars_overwrite_files = [ - os.path.join(par_overwrite_path(setup), tier, file) - for file in pars_files_overwrite + Path(par_overwrite_path(setup)) / tier / file for file in pars_files_overwrite ] pars_files += pars_overwrite_files return pars_files diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index b60d73f..1bfc9f7 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -2,32 +2,18 @@ This module contains all the patterns needed for the data production """ -import os +from pathlib import Path from .utils import ( - par_dsp_path, - par_evt_path, - par_hit_path, + get_pars_path, + get_tier_path, par_overwrite_path, - par_pht_path, - par_psp_path, - par_raw_path, - par_tcm_path, pars_path, plts_path, sandbox_path, tier_daq_path, - tier_dsp_path, - tier_evt_path, - tier_hit_path, tier_path, - tier_pet_path, - tier_pht_path, - tier_psp_path, tier_raw_blind_path, - tier_raw_path, - tier_skm_path, - tier_tcm_path, tmp_log_path, tmp_par_path, tmp_plts_path, @@ -69,507 +55,232 @@ def full_channel_pattern_with_extension(): def get_pattern_unsorted_data(setup): if sandbox_path(setup) is not None: - return os.path.join( - f"{sandbox_path(setup)}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca", + return ( + Path(f"{sandbox_path(setup)}") + / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca" ) else: return None def get_pattern_tier_daq(setup): - return os.path.join( - f"{tier_daq_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca", - ) - - -def get_pattern_tier_raw(setup): - return os.path.join( - f"{tier_raw_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_raw.lh5", + return ( + Path(f"{tier_daq_path(setup)}") + / "{datatype}" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca" ) def get_pattern_tier_raw_blind(setup): - return os.path.join( - f"{tier_raw_blind_path(setup)}", - "phy", - "{period}", - "{run}", - "{experiment}-{period}-{run}-phy-{timestamp}-tier_raw.lh5", - ) - - -def get_pattern_tier_tcm(setup): - return os.path.join( - f"{tier_tcm_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_tcm.lh5", - ) - - -def get_pattern_tier_dsp(setup): - return os.path.join( - f"{tier_dsp_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_dsp.lh5", - ) - - -def get_pattern_tier_hit(setup): - return os.path.join( - f"{tier_hit_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_hit.lh5", - ) - - -def get_pattern_tier_evt(setup): - return os.path.join( - f"{tier_evt_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_evt.lh5", - ) - - -def get_pattern_tier_evt_concat(setup): - return os.path.join( - f"{tier_evt_path(setup)}", - "{datatype}", - "{experiment}-{period}-{run}-{datatype}-tier_evt.lh5", - ) - - -def get_pattern_tier_psp(setup): - return os.path.join( - f"{tier_psp_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_psp.lh5", - ) - - -def get_pattern_tier_pht(setup): - return os.path.join( - f"{tier_pht_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pht.lh5", - ) - - -def get_pattern_tier_pet(setup): - return os.path.join( - f"{tier_pet_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pet.lh5", - ) - - -def get_pattern_tier_pet_concat(setup): - return os.path.join( - f"{tier_pet_path(setup)}", - "{datatype}", - "{experiment}-{period}-{run}-{datatype}-tier_pet.lh5", - ) - - -def get_pattern_tier_skm(setup): - return os.path.join( - f"{tier_skm_path(setup)}", - "phy", - "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5", + return ( + Path(f"{tier_raw_blind_path(setup)}") + / "phy" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-phy-{timestamp}-tier_raw.lh5" ) def get_pattern_tier(setup, tier, check_in_cycle=True): - if tier == "daq": - file_pattern = get_pattern_tier_daq(setup) - elif tier == "raw": - file_pattern = get_pattern_tier_raw(setup) - elif tier == "tcm": - file_pattern = get_pattern_tier_tcm(setup) - elif tier == "dsp": - file_pattern = get_pattern_tier_dsp(setup) - elif tier == "hit": - file_pattern = get_pattern_tier_hit(setup) - elif tier == "evt": - file_pattern = get_pattern_tier_evt(setup) - elif tier == "evt_concat": - file_pattern = get_pattern_tier_evt_concat(setup) - elif tier == "psp": - file_pattern = get_pattern_tier_psp(setup) - elif tier == "pht": - file_pattern = get_pattern_tier_pht(setup) - elif tier == "pet": - file_pattern = get_pattern_tier_pet(setup) - elif tier == "pet_concat": - file_pattern = get_pattern_tier_pet_concat(setup) - elif tier == "skm": - file_pattern = get_pattern_tier_skm(setup) - else: - msg = "invalid tier" - raise Exception(msg) - if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True: - return "/tmp/" + os.path.basename(file_pattern) - else: - return file_pattern - - -def get_pattern_par_raw(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_raw_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_raw_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_raw_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_raw" + f".{extension}", - ) - - -def get_pattern_par_tcm(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_tcm_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_tcm_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_tcm_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_tcm" + f".{extension}", + if tier in ["raw", "tcm", "dsp", "hit", "ann", "evt", "psp", "pht", "pan", "pet"]: + file_pattern = ( + Path(get_tier_path(setup, tier)) + / "{datatype}" + / "{period}" + / "{run}" + / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5") ) - - -def get_pattern_par_dsp(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_dsp_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_dsp_" + f"{name}.{extension}", + elif tier in ["evt_concat", "pet_concat"]: + file_pattern = ( + Path(get_tier_path(setup, tier[:3])) + / "{datatype}" + / ("{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5") ) - else: - return os.path.join( - f"{par_dsp_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_dsp" + f".{extension}", - ) - - -def get_pattern_par_hit(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_hit_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_hit_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_hit_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_hit" + f".{extension}", - ) - - -def get_pattern_par_evt(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_evt_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_evt" + f".{extension}", - ) - - -def get_pattern_par_psp(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_psp_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_psp_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_psp_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_psp" + f".{extension}", - ) - -def get_pattern_par_pht(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_pht_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pht_" + f"{name}.{extension}", + elif tier == "skm": + file_pattern = ( + Path(f"{get_tier_path(setup, tier)}") + / "phy" + / "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5" ) else: - return os.path.join( - f"{par_pht_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pht" + f".{extension}", - ) - - -def get_pattern_par_pet(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pet_" + f"{name}.{extension}", - ) + msg = "invalid tier" + raise Exception(msg) + if tier_path(setup) not in str(file_pattern.resolve(strict=False)) and check_in_cycle is True: + return "/tmp/" + file_pattern.name else: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pet" + f".{extension}", - ) + return file_pattern -def get_pattern_pars(setup, tier, name=None, extension="json", check_in_cycle=True): - if tier == "raw": - file_pattern = get_pattern_par_raw(setup, name, extension) - elif tier == "tcm": - file_pattern = get_pattern_par_tcm(setup, name, extension) - elif tier == "dsp": - file_pattern = get_pattern_par_dsp(setup, name, extension) - elif tier == "hit": - file_pattern = get_pattern_par_hit(setup, name, extension) - elif tier == "evt": - file_pattern = get_pattern_par_evt(setup, name, extension) - elif tier == "psp": - file_pattern = get_pattern_par_psp(setup, name, extension) - elif tier == "pht": - file_pattern = get_pattern_par_pht(setup, name, extension) - elif tier == "pet": - file_pattern = get_pattern_par_pet(setup, name, extension) +def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=True): + if tier in ["raw", "tcm", "dsp", "hit", "ann", "evt", "psp", "pht", "pan", "pet"]: + if name is not None: + return ( + Path(get_pars_path(setup, tier)) + / "cal" + / "{period}" + / "{run}" + / ( + "{experiment}-{period}-{run}-cal-{timestamp}-par_" + + f"{tier}_{name}.{extension}" + ) + ) + else: + file_pattern = ( + Path(get_pars_path(setup, tier)) + / "cal" + / "{period}" + / "{run}" + / ("{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}") + ) else: msg = "invalid tier" raise Exception(msg) - if pars_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True: + if ( + pars_path(setup) not in str(Path(file_pattern).resolve(strict=False)) + and check_in_cycle is True + ): if name is None: return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{extension}" else: return ( "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" - + f"par_{tier}_{name}.{extension}" + f"par_{tier}_{name}.{extension}" ) else: return file_pattern -def get_pattern_pars_svm(setup, tier, name=None, ext="json"): +def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"): if name is not None: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}", + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / ("{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}") ) else: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{ext}", + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / ("{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{ext}") ) -def get_pattern_pars_overwrite(setup, tier, name=None): +def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"): if name is not None: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" - + tier - + "_" - + name - + "-overwrite.json", + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "{datatype}" + / "{period}" + / "{run}" + / ( + "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + f"{tier}_{name}-overwrite.{extension}" + ) ) else: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + tier + "-overwrite.json", + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "{datatype}" + / "{period}" + / "{run}" + / ( + "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + + tier + + f"-overwrite.{extension}" + ) ) -def get_pattern_pars_tmp(setup, tier, name=None, datatype=None): +def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml"): if datatype is None: datatype = "{datatype}" if name is None: - return os.path.join( - f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + tier + ".json", + return Path(f"{tmp_par_path(setup)}") / ( + "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + f"{tier}.{extension}" ) else: - return os.path.join( - f"{tmp_par_path(setup)}", + return Path(f"{tmp_par_path(setup)}") / ( "{experiment}-{period}-{run}-" + datatype - + "-{timestamp}-par_" - + tier - + "_" - + name - + ".json", + + "-{timestamp}" + + f"par_{tier}_{name}.{extension}" ) -def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="json"): +def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"): if name is None: - return os.path.join( - f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}.{extension}", + return Path(f"{tmp_par_path(setup)}") / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}.{extension}" ) else: - return os.path.join( - f"{tmp_par_path(setup)}", + return Path(f"{tmp_par_path(setup)}") / ( "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" - + f"{tier}_{name}.{extension}", + + f"{tier}_{name}.{extension}" ) def get_pattern_plts_tmp_channel(setup, tier, name=None): if name is None: - return os.path.join( - f"{tmp_plts_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + tier + ".pkl", + return Path(f"{tmp_plts_path(setup)}") / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + tier + ".pkl" ) else: - return os.path.join( - f"{tmp_plts_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" - + tier - + "_" - + name - + ".pkl", + return Path(f"{tmp_plts_path(setup)}") / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl" ) def get_pattern_plts(setup, tier, name=None): if name is None: - return os.path.join( - f"{plts_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + ".dir", + return ( + Path(f"{plts_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / ("{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + ".dir") ) else: - return os.path.join( - f"{plts_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + "_" + name + ".dir", + return ( + Path(f"{plts_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / ("{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + "_" + name + ".dir") ) -def get_energy_grids_pattern_combine(setup): - return os.path.join( - f"{tmp_par_path(setup)}", - "dsp", - "cal", - "{{period}}", - "{{run}}", - "par_dsp_energy_grid", - "{{channel}}", - "{{experiment}}-{{period}}-{{run}}-cal-{{timestamp}}-{{channel}}-{peak}-par_dsp_energy_grid.pkl", - ) - - def get_pattern_log(setup, processing_step): - return os.path.join( - f"{tmp_log_path(setup)}", - processing_step, - "{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log", + return ( + Path(f"{tmp_log_path(setup)}") + / processing_step + / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log") ) -def get_pattern_log_concat(setup, processing_step): - return os.path.join( - f"{tmp_log_path(setup)}", - processing_step, - "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log", +def get_pattern_log_channel(setup, processing_step): + return ( + Path(f"{tmp_log_path(setup)}") + / processing_step + / ("{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log") ) -def get_pattern_log_channel(setup, processing_step): - return os.path.join( - f"{tmp_log_path(setup)}", - processing_step, - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log", +def get_pattern_log_concat(setup, processing_step): + return ( + Path(f"{tmp_log_path(setup)}") + / processing_step + / ("{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log") ) diff --git a/scripts/util/utils.py b/scripts/util/utils.py index 894d69e..9d64b06 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -40,135 +40,21 @@ def tier_path(setup): return setup["paths"]["tier"] -def tier_tcm_path(setup): - return setup["paths"]["tier_tcm"] - - -def tier_raw_path(setup): - return setup["paths"]["tier_raw"] - - -def tier_dsp_path(setup): - return setup["paths"]["tier_dsp"] - - -def tier_hit_path(setup): - return setup["paths"]["tier_hit"] - - -def tier_evt_path(setup): - return setup["paths"]["tier_evt"] - - -def tier_psp_path(setup): - return setup["paths"]["tier_psp"] - - -def tier_pht_path(setup): - return setup["paths"]["tier_pht"] - - -def tier_pet_path(setup): - return setup["paths"]["tier_pet"] - - -def tier_skm_path(setup): - return setup["paths"]["tier_skm"] - - def get_tier_path(setup, tier): - if tier == "raw": - return tier_raw_path(setup) - elif tier == "tcm": - return tier_tcm_path(setup) - elif tier == "dsp": - return tier_dsp_path(setup) - elif tier == "hit": - return tier_hit_path(setup) - elif tier == "evt": - return tier_evt_path(setup) - elif tier == "psp": - return tier_psp_path(setup) - elif tier == "pht": - return tier_pht_path(setup) - elif tier == "pet": - return tier_pet_path(setup) - elif tier == "skm": - return tier_skm_path(setup) + if tier in ["raw", "tcm", "dsp", "hit", "ann", "evt", "psp", "pht", "pan", "pet", "skm"]: + return setup["paths"][f"tier_{tier}"] else: msg = f"no tier matching:{tier}" raise ValueError(msg) -def config_path(setup): - return setup["paths"]["config"] - - -def chan_map_path(setup): - return setup["paths"]["chan_map"] - - -def metadata_path(setup): - return setup["paths"]["metadata"] - - -def detector_db_path(setup): - return setup["paths"]["detector_db"] - - -def par_raw_path(setup): - return setup["paths"]["par_raw"] - - -def par_tcm_path(setup): - return setup["paths"]["par_tcm"] - - -def par_dsp_path(setup): - return setup["paths"]["par_dsp"] - - -def par_hit_path(setup): - return setup["paths"]["par_hit"] - - -def par_evt_path(setup): - return setup["paths"]["par_evt"] - - -def par_psp_path(setup): - return setup["paths"]["par_psp"] - - -def par_pht_path(setup): - return setup["paths"]["par_pht"] - - -def par_pet_path(setup): - return setup["paths"]["par_pet"] - - def pars_path(setup): return setup["paths"]["par"] def get_pars_path(setup, tier): - if tier == "raw": - return par_raw_path(setup) - elif tier == "tcm": - return par_tcm_path(setup) - elif tier == "dsp": - return par_dsp_path(setup) - elif tier == "hit": - return par_hit_path(setup) - elif tier == "evt": - return par_evt_path(setup) - elif tier == "psp": - return par_psp_path(setup) - elif tier == "pht": - return par_pht_path(setup) - elif tier == "pet": - return par_pet_path(setup) + if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]: + return setup["paths"][f"par_{tier}"] else: msg = f"no tier matching:{tier}" raise ValueError(msg) @@ -190,6 +76,26 @@ def par_overwrite_path(setup): return setup["paths"]["par_overwrite"] +def config_path(setup): + return setup["paths"]["config"] + + +def chan_map_path(setup): + return setup["paths"]["chan_map"] + + +def det_status_path(setup): + return setup["paths"]["detector_status"] + + +def metadata_path(setup): + return setup["paths"]["metadata"] + + +def detector_db_path(setup): + return setup["paths"]["detector_db"] + + def log_path(setup): return setup["paths"]["log"] @@ -257,7 +163,7 @@ def subst_vars_in_snakemake_config(workflow, config): config_filename = workflow.overwrite_configfiles[0] # ToDo: Better way of handling this? subst_vars( config, - var_values={"_": os.path.dirname(config_filename)}, + var_values={"_": Path(config_filename).parent}, use_env=True, ignore_missing=False, ) @@ -271,8 +177,8 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - base = os.path.basename(file) - file_name = os.path.splitext(base)[0] + base = Path(file).name + file_name = Path(base).name parts = file_name.split("-") run_no = parts[3] if run_no not in runs: diff --git a/templates/config.json b/templates/config.json index d3d965b..d8189ee 100644 --- a/templates/config.json +++ b/templates/config.json @@ -19,9 +19,11 @@ "tier_tcm": "$_/generated/tier/tcm", "tier_dsp": "$_/generated/tier/dsp", "tier_hit": "$_/generated/tier/hit", + "tier_ann": "$_/generated/tier/ann", "tier_evt": "$_/generated/tier/evt", "tier_psp": "$_/generated/tier/psp", "tier_pht": "$_/generated/tier/pht", + "tier_pan": "$_/generated/tier/pan", "tier_pet": "$_/generated/tier/pet", "tier_skm": "$_/generated/tier/skm", @@ -60,11 +62,14 @@ } }, "pkg_versions": { - "pygama": "pygama==2.0.2", - "pylegendmeta": "pylegendmeta==0.10.2", - "dspeed": "dspeed==1.5.0", + "pygama": "pygama==2.0.3", + "pylegendmeta": "pylegendmeta==1.1.0", + "dspeed": "dspeed==1.6.1", "legend-pydataobj": "legend-pydataobj==1.9.0", - "legend-daq2lh5": "legend-daq2lh5==1.2.2" + "legend-daq2lh5": "legend-daq2lh5==1.2.2", + "tensorflow": "tensorflow==2.17", + "keras": "keras==3.6.0", + "jax": "jax==0.4.30" } } } diff --git a/tests/test_util.py b/tests/test_util.py index 707843b..010c749 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,5 +1,4 @@ import json -import os from pathlib import Path from scripts.util import ( @@ -20,7 +19,7 @@ testprod = Path(__file__).parent / "dummy_cycle" -with open(str(testprod / "config.json")) as r: +with testprod.open() as r: setup = json.load(r) subst_vars(setup, var_values={"_": str(testprod)}) setup = setup["setups"]["test"] @@ -107,12 +106,12 @@ def test_create_pars_keylist(): def test_pars_loading(): pars_files = CalibCatalog.get_calib_files( - os.path.join(par_dsp_path(setup), "validity.jsonl"), "20230101T123456Z" + Path(par_dsp_path(setup)) / "validity.jsonl", "20230101T123456Z" ) assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"] par_override_files = CalibCatalog.get_calib_files( - os.path.join(par_overwrite_path(setup), "dsp", "validity.jsonl"), "20230101T123456Z" + Path(par_overwrite_path(setup)) / "dsp" / "validity.jsonl", "20230101T123456Z" ) pars_files, pars_files_overwrite = pars_catalog.match_pars_files( @@ -122,12 +121,12 @@ def test_pars_loading(): assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"] assert set(pars_catalog.get_par_file(setup, "20230101T123456Z", "dsp")) == { - os.path.join( - par_dsp_path(setup), - "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json", + ( + Path(par_dsp_path(setup)) + / "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json", ), - os.path.join( - par_overwrite_path(setup), - "dsp/cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json", + ( + Path(par_overwrite_path(setup)) + / "dsp/cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json", ), }