From fd93dbbc9d7a6375b2d0b1db57ac07b6e9e5d24c Mon Sep 17 00:00:00 2001 From: Alexander Held Date: Thu, 6 Jul 2023 18:20:43 +0200 Subject: [PATCH] refactor template histogram creation to accept list of tasks --- example.py | 15 +++- src/cabinetry/route.py | 107 +++++++++++++++++----------- src/cabinetry/templates/__init__.py | 43 +++++++++-- 3 files changed, 114 insertions(+), 51 deletions(-) diff --git a/example.py b/example.py index 83f7f827..69db4c6d 100644 --- a/example.py +++ b/example.py @@ -17,7 +17,20 @@ cabinetry.configuration.print_overview(config) # create template histograms - cabinetry.templates.build(config, method="uproot") + from dask.distributed import Client, LocalCluster, wait + + def produce_single_template(template): + cabinetry.templates.build(config, template_list=[template]) + + template_list = cabinetry.route.required_templates(config) + + with LocalCluster(n_workers=2) as cluster: + client = Client(cluster) + wait(client.map(produce_single_template, template_list)) + + # cabinetry.templates.build(config, template_list=template_list) + + raise SystemExit # perform histogram post-processing cabinetry.templates.postprocess(config) diff --git a/src/cabinetry/route.py b/src/cabinetry/route.py index 2dee920d..385895e2 100644 --- a/src/cabinetry/route.py +++ b/src/cabinetry/route.py @@ -2,7 +2,7 @@ import fnmatch import logging -from typing import Any, Callable, Dict, List, Literal, Optional +from typing import Any, Callable, Dict, List, Literal, Optional, Tuple import boost_histogram as bh @@ -38,6 +38,12 @@ # (which returns a histogram) into a function that returns None WrapperFunc = Callable[[UserTemplateFunc], ProcessorFunc] +# type of tuple capturing all relevant information to obtain a template histogram +# this includes region, sample, systematic and template (up/down) +TemplateHistogramInformation = Tuple[ + Dict[str, Any], Dict[str, Any], Dict[str, Any], Optional[Literal["Up", "Down"]] +] + class Router: """Holds user-defined processing functions and matches functions to templates. @@ -257,33 +263,18 @@ def _find_template_builder_match( return None -def apply_to_all_templates( - config: Dict[str, Any], - default_func: ProcessorFunc, - *, - match_func: Optional[MatchFunc] = None, -) -> None: - """Applies the supplied function ``default_func`` to all templates. - - The templates are specified by the configuration file. The function takes four - arguments in this order: - - - the dict specifying region information - - the dict specifying sample information - - the dict specifying systematic information - - the template being considered: "Up", "Down", or None for the nominal template - - In addition it is possible to specify a function that returns custom overrides. If - one is found for a given template, it is used instead of the default. +def required_templates(config: Dict[str, Any]) -> List[TemplateHistogramInformation]: + """Returns relevant information needed to produce all required template histograms. Args: config (Dict[str, Any]): cabinetry configuration - default_func (ProcessorFunc): function to be called for every template by - default - match_func: (Optional[MatchFunc], optional): function that returns user-defined - functions to override the call to ``default_func``, defaults to None (then - it is not used) + + Returns: + List[TemplateHistogramInformation]: list of relevant information for each + template histogram """ + all_templates = [] + for region in config["Regions"]: log.debug(f" in region {region['Name']}") @@ -321,22 +312,52 @@ def apply_to_all_templates( f"{' ' + template if template is not None else ''}" ) - func_override = None - if match_func is not None: - # check whether a user-defined function was registered that - # matches this region-sample-systematic-template - systematic_name = ( - systematic["Name"] if template is not None else "" - ) - func_override = match_func( - region["Name"], sample["Name"], systematic_name, template - ) - if func_override is not None: - # call the user-defined function - log.debug( - f"executing user-defined override {func_override.__name__}" - ) - func_override(region, sample, systematic, template) - else: - # call the provided default function - default_func(region, sample, systematic, template) + all_templates.append((region, sample, systematic, template)) + + return all_templates + + +def apply_to_templates( + default_func: ProcessorFunc, # BREAKING API CHANGE + template_list: List[TemplateHistogramInformation], + *, + match_func: Optional[MatchFunc] = None, +) -> None: + """Applies the supplied function ``default_func`` to all templates. + + The templates are specified by the configuration file. The function takes four + arguments in this order: + + - the dict specifying region information + - the dict specifying sample information + - the dict specifying systematic information + - the template being considered: "Up", "Down", or None for the nominal template + + In addition it is possible to specify a function that returns custom overrides. If + one is found for a given template, it is used instead of the default. + + Args: + default_func (ProcessorFunc): function to be called for every template by + default + template_list (List[TemplateHistogramInformation]): list of template information + to apply function to + match_func: (Optional[MatchFunc], optional): function that returns user-defined + functions to override the call to ``default_func``, defaults to None (then + it is not used) + """ + for region, sample, systematic, template in template_list: + func_override = None + if match_func is not None: + # check whether a user-defined function was registered that + # matches this region-sample-systematic-template + systematic_name = systematic["Name"] if template is not None else "" + func_override = match_func( + region["Name"], sample["Name"], systematic_name, template + ) + if func_override is not None: + # call the user-defined function + log.debug(f"executing user-defined override {func_override.__name__}") + func_override(region, sample, systematic, template) + else: + # call the provided default function + default_func(region, sample, systematic, template) diff --git a/src/cabinetry/templates/__init__.py b/src/cabinetry/templates/__init__.py index 034fc327..013c21c1 100644 --- a/src/cabinetry/templates/__init__.py +++ b/src/cabinetry/templates/__init__.py @@ -2,7 +2,7 @@ import logging import pathlib -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from cabinetry import route from cabinetry.templates import builder @@ -18,6 +18,7 @@ def build( *, method: str = "uproot", router: Optional[route.Router] = None, + template_list: Optional[List[route.TemplateHistogramInformation]] = None, ) -> None: """Produces all required histograms specified by the configuration file. @@ -31,6 +32,8 @@ def build( "uproot" router (Optional[route.Router], optional): instance of cabinetry.route.Router that contains user-defined overrides, defaults to None + template_list (Optional[List[route.TemplateHistogramInformation]]): list of + information for templates to process, defaults to None (all templates) """ # create an instance of the class doing the template building histogram_folder = pathlib.Path(config["General"]["HistogramFolder"]) @@ -44,12 +47,21 @@ def build( # get a function that can be queried to return a user-defined template builder match_func = router._find_template_builder_match - route.apply_to_all_templates( - config, template_builder._create_histogram, match_func=match_func + # get list of required templates to process if not provided already + if template_list is None: + template_list = route.required_templates(config) + + route.apply_to_templates( + template_builder._create_histogram, template_list, match_func=match_func ) -def collect(config: Dict[str, Any], *, method: str = "uproot") -> None: +def collect( + config: Dict[str, Any], + *, + method: str = "uproot", + template_list: Optional[List[route.TemplateHistogramInformation]] = None, +) -> None: """Collects all required histograms specified by the configuration file. Histograms must already exist, and this collects and saves them in the format used @@ -60,6 +72,8 @@ def collect(config: Dict[str, Any], *, method: str = "uproot") -> None: config (Dict[str, Any]): cabinetry configuration method (str, optional): backend to use for histogram production, defaults to "uproot" + template_list (Optional[List[route.TemplateHistogramInformation]]): list of + information for templates to process, defaults to None (all templates) """ histogram_folder = pathlib.Path(config["General"]["HistogramFolder"]) general_path = config["General"]["InputPath"] @@ -71,15 +85,30 @@ def collect(config: Dict[str, Any], *, method: str = "uproot") -> None: processor = collector._collector( histogram_folder, general_path, variation_path, method ) - route.apply_to_all_templates(config, processor) + # get list of required templates to process if not provided already + if template_list is None: + template_list = route.required_templates(config) + + route.apply_to_templates(processor, template_list) -def postprocess(config: Dict[str, Any]) -> None: + +def postprocess( + config: Dict[str, Any], + template_list: Optional[List[route.TemplateHistogramInformation]] = None, +) -> None: """Applies postprocessing to all histograms. Args: config (Dict[str, Any]): cabinetry configuration + template_list (Optional[List[route.TemplateHistogramInformation]]): list of + information for templates to process, defaults to None (all templates) """ histogram_folder = pathlib.Path(config["General"]["HistogramFolder"]) processor = postprocessor._postprocessor(histogram_folder) - route.apply_to_all_templates(config, processor) + + # get list of required templates to process if not provided already + if template_list is None: + template_list = route.required_templates(config) + + route.apply_to_templates(processor, template_list)