diff --git a/docs/changes/2411.features.rst b/docs/changes/2411.features.rst deleted file mode 100644 index 49cb45370dc..00000000000 --- a/docs/changes/2411.features.rst +++ /dev/null @@ -1,3 +0,0 @@ -Add option ``override_obs_id`` to ``SimTelEventSource`` which allows -assigning new, unique ``obs_ids`` in case productions reuse CORSIKA run -numbers. diff --git a/docs/changes/2473.feature.rst b/docs/changes/2473.feature.rst new file mode 100644 index 00000000000..94106872294 --- /dev/null +++ b/docs/changes/2473.feature.rst @@ -0,0 +1,3 @@ +Add a ``ctapipe-make-irf`` tool and a able to produce irfs given a cut-selection file and gamma, proton, and electron DL2 input files. + +Add a ``ctapipe-optimize-event-selection`` tool to produce cut-selection files. diff --git a/docs/conf.py b/docs/conf.py index 8941db319e4..2c397c32c39 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -118,6 +118,7 @@ def setup(app): ("py:class", "t.Type"), ("py:class", "t.List"), ("py:class", "t.Tuple"), + ("py:class", "t.Sequence"), ("py:class", "Config"), ("py:class", "traitlets.config.configurable.Configurable"), ("py:class", "traitlets.traitlets.HasTraits"), @@ -132,35 +133,34 @@ def setup(app): ("py:class", "traitlets.config.application.Application"), ("py:class", "traitlets.utils.sentinel.Sentinel"), ("py:class", "traitlets.traitlets.ObserveHandler"), + ("py:class", "traitlets.traitlets.T"), + ("py:class", "traitlets.traitlets.G"), + ("py:class", "Sentinel"), + ("py:class", "ObserveHandler"), ("py:class", "dict[K, V]"), ("py:class", "G"), ("py:class", "K"), ("py:class", "V"), - ("py:class", "t.Sequence"), ("py:class", "StrDict"), ("py:class", "ClassesType"), - ("py:class", "traitlets.traitlets.G"), + ("py:class", "re.Pattern"), + ("py:class", "re.Pattern[t.Any]"), + ("py:class", "astropy.coordinates.baseframe.BaseCoordinateFrame"), + ("py:class", "astropy.table.table.Table"), + ("py:class", "eventio.simtel.simtelfile.SimTelFile"), + ("py:class", "ctapipe.compat.StrEnum"), + ("py:class", "ctapipe.compat.StrEnum"), + ("py:obj", "traitlets.traitlets.T"), ("py:obj", "traitlets.traitlets.G"), ("py:obj", "traitlets.traitlets.S"), - ("py:obj", "traitlets.traitlets.T"), - ("py:class", "traitlets.traitlets.T"), - ("py:class", "re.Pattern[t.Any]"), - ("py:class", "re.Pattern"), - ("py:class", "Sentinel"), - ("py:class", "ObserveHandler"), ("py:obj", "traitlets.config.boolean_flag"), ("py:obj", "traitlets.TraitError"), ("py:obj", "-v"), # fix for wrong syntax in a traitlets docstring + ("py:obj", "cls"), + ("py:obj", "name"), ("py:meth", "MetaHasDescriptors.__init__"), ("py:meth", "HasTraits.__new__"), ("py:meth", "BaseDescriptor.instance_init"), - ("py:obj", "cls"), - ("py:obj", "name"), - ("py:class", "astropy.coordinates.baseframe.BaseCoordinateFrame"), - ("py:class", "astropy.table.table.Table"), - ("py:class", "eventio.simtel.simtelfile.SimTelFile"), - ("py:class", "ctapipe.compat.StrEnum"), - ("py:class", "ctapipe.compat.StrEnum"), ] # Sphinx gallery config diff --git a/environment.yml b/environment.yml index f92fcfa6e72..d4b4a79eaad 100644 --- a/environment.yml +++ b/environment.yml @@ -25,6 +25,7 @@ dependencies: - pypandoc - pre-commit - psutil + - pyirf - pytables - pytest - pytest-cov diff --git a/pyproject.toml b/pyproject.toml index 7a663d17c39..685a3be27a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ dependencies = [ "numba >=0.56", "numpy ~=1.16", "psutil", + "pyirf >0.10.1", "pyyaml >=5.1", "requests", "scikit-learn !=1.4.0", # 1.4.0 breaks with astropy tables, before and after works @@ -96,6 +97,8 @@ ctapipe-dump-instrument = "ctapipe.tools.dump_instrument:main" ctapipe-display-dl1 = "ctapipe.tools.display_dl1:main" ctapipe-process = "ctapipe.tools.process:main" ctapipe-merge = "ctapipe.tools.merge:main" +ctapipe-optimize-event-selection = "ctapipe.tools.optimize_event_selection:main" +ctapipe-make-irf = "ctapipe.tools.make_irf:main" ctapipe-fileinfo = "ctapipe.tools.fileinfo:main" ctapipe-quickstart = "ctapipe.tools.quickstart:main" ctapipe-train-energy-regressor = "ctapipe.tools.train_energy_regressor:main" diff --git a/src/ctapipe/conftest.py b/src/ctapipe/conftest.py index 4d743e5d217..e9f652d5c5b 100644 --- a/src/ctapipe/conftest.py +++ b/src/ctapipe/conftest.py @@ -1,6 +1,7 @@ """ common pytest fixtures for tests in ctapipe """ + import shutil from copy import deepcopy @@ -9,7 +10,7 @@ import pytest import tables from astropy.coordinates import EarthLocation -from astropy.table import Table +from astropy.table import QTable, Table, vstack from pytest_astropy_header.display import PYTEST_HEADER_MODULES from ctapipe.core import run_tool @@ -684,3 +685,108 @@ def dl1_mon_pointing_file(dl1_file, dl1_tmp_path): f.remove_node("/configuration/telescope/pointing", recursive=True) return path + + +@pytest.fixture(scope="session") +def gamma_diffuse_full_reco_file( + gamma_train_clf, + particle_classifier_path, + model_tmp_path, +): + """ + Energy reconstruction and geometric origin reconstruction have already been done. + """ + from ctapipe.tools.apply_models import ApplyModels + + output_path = model_tmp_path / "gamma_diffuse_full_reco.dl2.h5" + run_tool( + ApplyModels(), + argv=[ + f"--input={gamma_train_clf}", + f"--output={output_path}", + f"--reconstructor={particle_classifier_path}", + "--no-dl1-parameters", + "--StereoMeanCombiner.weights=konrad", + ], + raises=True, + ) + return output_path + + +@pytest.fixture(scope="session") +def proton_full_reco_file( + proton_train_clf, + particle_classifier_path, + model_tmp_path, +): + """ + Energy reconstruction and geometric origin reconstruction have already been done. + """ + from ctapipe.tools.apply_models import ApplyModels + + output_path = model_tmp_path / "proton_full_reco.dl2.h5" + run_tool( + ApplyModels(), + argv=[ + f"--input={proton_train_clf}", + f"--output={output_path}", + f"--reconstructor={particle_classifier_path}", + "--no-dl1-parameters", + "--StereoMeanCombiner.weights=konrad", + ], + raises=True, + ) + return output_path + + +@pytest.fixture(scope="session") +def irf_events_loader_test_config(): + from traitlets.config import Config + + return Config( + { + "EventPreProcessor": { + "energy_reconstructor": "ExtraTreesRegressor", + "geometry_reconstructor": "HillasReconstructor", + "gammaness_classifier": "ExtraTreesClassifier", + "quality_criteria": [ + ( + "multiplicity 4", + "np.count_nonzero(tels_with_trigger,axis=1) >= 4", + ), + ("valid classifier", "ExtraTreesClassifier_is_valid"), + ("valid geom reco", "HillasReconstructor_is_valid"), + ("valid energy reco", "ExtraTreesRegressor_is_valid"), + ], + } + } + ) + + +@pytest.fixture(scope="session") +def irf_events_table(): + from ctapipe.irf import EventPreProcessor + + N1 = 1000 + N2 = 100 + N = N1 + N2 + epp = EventPreProcessor() + tab = epp.make_empty_table() + # Add fake weight column + tab.add_column((), name="weight") + units = {c: tab[c].unit for c in tab.columns} + + empty = np.zeros((len(tab.columns), N)) * np.nan + e_tab = QTable(data=empty.T, names=tab.colnames, units=units) + # Setting values following pyirf test in pyirf/irf/tests/test_background.py + e_tab["reco_energy"] = np.append(np.full(N1, 1), np.full(N2, 2)) * u.TeV + e_tab["true_energy"] = np.append(np.full(N1, 0.9), np.full(N2, 2.1)) * u.TeV + e_tab["reco_source_fov_offset"] = ( + np.append(np.full(N1, 0.1), np.full(N2, 0.05)) * u.deg + ) + e_tab["true_source_fov_offset"] = ( + np.append(np.full(N1, 0.11), np.full(N2, 0.04)) * u.deg + ) + + ev = vstack([e_tab, tab], join_type="exact", metadata_conflicts="silent") + return ev diff --git a/src/ctapipe/irf/__init__.py b/src/ctapipe/irf/__init__.py new file mode 100644 index 00000000000..836673b31a4 --- /dev/null +++ b/src/ctapipe/irf/__init__.py @@ -0,0 +1,51 @@ +"""Top level module for the irf functionality""" + +from .benchmarks import ( + AngularResolution2dMaker, + EnergyBiasResolution2dMaker, + Sensitivity2dMaker, +) +from .binning import ( + ResultValidRange, + check_bins_in_range, + make_bins_per_decade, +) +from .irfs import ( + BackgroundRate2dMaker, + EffectiveArea2dMaker, + EnergyMigration2dMaker, + Psf3dMaker, +) +from .optimize import ( + GhPercentileCutCalculator, + OptimizationResult, + OptimizationResultStore, + PercentileCuts, + PointSourceSensitivityOptimizer, + ThetaPercentileCutCalculator, +) +from .select import EventPreProcessor, EventsLoader +from .spectra import SPECTRA, Spectra + +__all__ = [ + "AngularResolution2dMaker", + "EnergyBiasResolution2dMaker", + "Sensitivity2dMaker", + "Psf3dMaker", + "BackgroundRate2dMaker", + "EnergyMigration2dMaker", + "EffectiveArea2dMaker", + "ResultValidRange", + "OptimizationResult", + "OptimizationResultStore", + "PointSourceSensitivityOptimizer", + "PercentileCuts", + "EventsLoader", + "EventPreProcessor", + "Spectra", + "GhPercentileCutCalculator", + "ThetaPercentileCutCalculator", + "SPECTRA", + "check_bins_in_range", + "make_bins_per_decade", +] diff --git a/src/ctapipe/irf/benchmarks.py b/src/ctapipe/irf/benchmarks.py new file mode 100644 index 00000000000..a1aee0304a6 --- /dev/null +++ b/src/ctapipe/irf/benchmarks.py @@ -0,0 +1,274 @@ +"""Components to generate benchmarks""" + +from abc import abstractmethod + +import astropy.units as u +import numpy as np +from astropy.io.fits import BinTableHDU, Header +from astropy.table import QTable +from pyirf.benchmarks import angular_resolution, energy_bias_resolution +from pyirf.binning import calculate_bin_indices, create_histogram_table, split_bin_lo_hi +from pyirf.sensitivity import calculate_sensitivity, estimate_background + +from ..core.traits import Bool, Float +from .binning import FoVOffsetBinsBase, RecoEnergyBinsBase, TrueEnergyBinsBase +from .spectra import SPECTRA, Spectra + + +def _get_2d_result_table( + events: QTable, e_bins: u.Quantity, fov_bins: u.Quantity +) -> tuple[QTable, np.ndarray, tuple[int, int]]: + result = QTable() + result["ENERG_LO"], result["ENERG_HI"] = split_bin_lo_hi( + e_bins[np.newaxis, :].to(u.TeV) + ) + result["THETA_LO"], result["THETA_HI"] = split_bin_lo_hi( + fov_bins[np.newaxis, :].to(u.deg) + ) + fov_bin_index, _ = calculate_bin_indices(events["true_source_fov_offset"], fov_bins) + mat_shape = (len(fov_bins) - 1, len(e_bins) - 1) + return result, fov_bin_index, mat_shape + + +class EnergyBiasResolutionMakerBase(TrueEnergyBinsBase): + """ + Base class for calculating the bias and resolution of the energy prediciton. + """ + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + + @abstractmethod + def make_bias_resolution_hdu( + self, events: QTable, extname: str = "ENERGY BIAS RESOLUTION" + ) -> BinTableHDU: + """ + Calculate the bias and resolution of the energy prediction. + + Parameters + ---------- + events: astropy.table.QTable + Reconstructed events to be used. + extname: str + Name of the BinTableHDU. + + Returns + ------- + BinTableHDU + """ + + +class EnergyBiasResolution2dMaker(EnergyBiasResolutionMakerBase, FoVOffsetBinsBase): + """ + Calculates the bias and the resolution of the energy prediction in bins of + true energy and fov offset. + """ + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + + def make_bias_resolution_hdu( + self, events: QTable, extname: str = "ENERGY BIAS RESOLUTION" + ) -> BinTableHDU: + result, fov_bin_idx, mat_shape = _get_2d_result_table( + events=events, + e_bins=self.true_energy_bins, + fov_bins=self.fov_offset_bins, + ) + result["N_EVENTS"] = np.zeros(mat_shape)[np.newaxis, ...] + result["BIAS"] = np.full(mat_shape, np.nan)[np.newaxis, ...] + result["RESOLUTI"] = np.full(mat_shape, np.nan)[np.newaxis, ...] + + for i in range(len(self.fov_offset_bins) - 1): + bias_resolution = energy_bias_resolution( + events=events[fov_bin_idx == i], + energy_bins=self.true_energy_bins, + bias_function=np.mean, + energy_type="true", + ) + result["N_EVENTS"][:, i, :] = bias_resolution["n_events"] + result["BIAS"][:, i, :] = bias_resolution["bias"] + result["RESOLUTI"][:, i, :] = bias_resolution["resolution"] + + return BinTableHDU(result, name=extname) + + +class AngularResolutionMakerBase(TrueEnergyBinsBase, RecoEnergyBinsBase): + """ + Base class for calculating the angular resolution. + """ + + # Use reconstructed energy by default for the sake of current pipeline comparisons + use_true_energy = Bool( + False, + help="Use true energy instead of reconstructed energy for energy binning.", + ).tag(config=True) + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + + @abstractmethod + def make_angular_resolution_hdu( + self, events: QTable, extname: str = "ANGULAR RESOLUTION" + ) -> BinTableHDU: + """ + Calculate the angular resolution. + + Parameters + ---------- + events: astropy.table.QTable + Reconstructed events to be used. + extname: str + Name of the BinTableHDU. + + Returns + ------- + BinTableHDU + """ + + +class AngularResolution2dMaker(AngularResolutionMakerBase, FoVOffsetBinsBase): + """ + Calculates the angular resolution in bins of either true or reconstructed energy + and fov offset. + """ + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + + def make_angular_resolution_hdu( + self, events: QTable, extname: str = "ANGULAR RESOLUTION" + ) -> BinTableHDU: + if self.use_true_energy: + e_bins = self.true_energy_bins + energy_type = "true" + else: + e_bins = self.reco_energy_bins + energy_type = "reco" + + result, fov_bin_idx, mat_shape = _get_2d_result_table( + events=events, + e_bins=e_bins, + fov_bins=self.fov_offset_bins, + ) + result["N_EVENTS"] = np.zeros(mat_shape)[np.newaxis, ...] + result["ANG_RES"] = u.Quantity( + np.full(mat_shape, np.nan)[np.newaxis, ...], events["theta"].unit + ) + + for i in range(len(self.fov_offset_bins) - 1): + ang_res = angular_resolution( + events=events[fov_bin_idx == i], + energy_bins=e_bins, + energy_type=energy_type, + ) + result["N_EVENTS"][:, i, :] = ang_res["n_events"] + result["ANG_RES"][:, i, :] = ang_res["angular_resolution"] + + header = Header() + header["E_TYPE"] = energy_type.upper() + return BinTableHDU(result, header=header, name=extname) + + +class SensitivityMakerBase(RecoEnergyBinsBase): + """Base class for calculating the point source sensitivity.""" + + alpha = Float( + default_value=0.2, help="Ratio between size of the on and the off region." + ).tag(config=True) + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + + @abstractmethod + def make_sensitivity_hdu( + self, + signal_events: QTable, + background_events: QTable, + theta_cut: QTable, + gamma_spectrum: Spectra, + extname: str = "SENSITIVITY", + ) -> BinTableHDU: + """ + Calculate the point source sensitivity + based on ``pyirf.sensitivity.calculate_sensitivity``. + + Parameters + ---------- + signal_events: astropy.table.QTable + Reconstructed signal events to be used. + background_events: astropy.table.QTable + Reconstructed background events to be used. + theta_cut: QTable + Direction cut that was applied on ``signal_events``. + gamma_spectrum: ctapipe.irf.Spectra + Spectra by which to scale the relative sensitivity to get the flux sensitivity. + extname: str + Name of the BinTableHDU. + + Returns + ------- + BinTableHDU + """ + + +class Sensitivity2dMaker(SensitivityMakerBase, FoVOffsetBinsBase): + """ + Calculates the point source sensitivity in bins of reconstructed energy + and fov offset. + """ + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + + def make_sensitivity_hdu( + self, + signal_events: QTable, + background_events: QTable, + theta_cut: QTable, + gamma_spectrum: Spectra, + extname: str = "SENSITIVITY", + ) -> BinTableHDU: + source_spectrum = SPECTRA[gamma_spectrum] + result, fov_bin_idx, mat_shape = _get_2d_result_table( + events=signal_events, + e_bins=self.reco_energy_bins, + fov_bins=self.fov_offset_bins, + ) + result["N_SIG"] = np.zeros(mat_shape)[np.newaxis, ...] + result["N_SIG_W"] = np.zeros(mat_shape)[np.newaxis, ...] + result["N_BKG"] = np.zeros(mat_shape)[np.newaxis, ...] + result["N_BKG_W"] = np.zeros(mat_shape)[np.newaxis, ...] + result["SIGNIFIC"] = np.full(mat_shape, np.nan)[np.newaxis, ...] + result["REL_SEN"] = np.full(mat_shape, np.nan)[np.newaxis, ...] + result["FLUX_SEN"] = u.Quantity( + np.full(mat_shape, np.nan)[np.newaxis, ...], 1 / (u.TeV * u.s * u.cm**2) + ) + for i in range(len(self.fov_offset_bins) - 1): + signal_hist = create_histogram_table( + events=signal_events[fov_bin_idx == i], bins=self.reco_energy_bins + ) + bkg_hist = estimate_background( + events=background_events, + reco_energy_bins=self.reco_energy_bins, + theta_cuts=theta_cut, + alpha=self.alpha, + fov_offset_min=self.fov_offset_bins[i], + fov_offset_max=self.fov_offset_bins[i + 1], + ) + sens = calculate_sensitivity( + signal_hist=signal_hist, background_hist=bkg_hist, alpha=self.alpha + ) + result["N_SIG"][:, i, :] = sens["n_signal"] + result["N_SIG_W"][:, i, :] = sens["n_signal_weighted"] + result["N_BKG"][:, i, :] = sens["n_background"] + result["N_BKG_W"][:, i, :] = sens["n_background_weighted"] + result["SIGNIFIC"][:, i, :] = sens["significance"] + result["REL_SEN"][:, i, :] = sens["relative_sensitivity"] + result["FLUX_SEN"][:, i, :] = sens[ + "relative_sensitivity" + ] * source_spectrum(sens["reco_energy_center"]) + + header = Header() + header["ALPHA"] = self.alpha + return BinTableHDU(result, header=header, name=extname) diff --git a/src/ctapipe/irf/binning.py b/src/ctapipe/irf/binning.py new file mode 100644 index 00000000000..97b5e15d829 --- /dev/null +++ b/src/ctapipe/irf/binning.py @@ -0,0 +1,174 @@ +"""Collection of binning related functionality for the irf tools""" + +import logging + +import astropy.units as u +import numpy as np + +from ..core import Component +from ..core.traits import AstroQuantity, Integer + +logger = logging.getLogger(__name__) + + +def check_bins_in_range(bins, valid_range, source="result", raise_error=True): + """ + Check whether ``bins`` are within a ``valid_range`` and either warn + or raise an error if not. + + Parameters + ---------- + bins: u.Quantity + The bins to be checked. + valid_range: ctapipe.irf.ResultValidRange + Range for which bins are valid. + E.g. the range in which G/H cuts are calculated. + source: str + Description of which bins are being checked to give useful + warnings/ error messages. + raise_error: bool + Whether to raise an error (True) or give a warning (False) if + ``bins`` exceed ``valid_range``. + """ + low = bins >= valid_range.min + hig = bins <= valid_range.max + + if not all(low & hig): + with np.printoptions(edgeitems=2, threshold=6, precision=4): + bins = np.array2string(bins) + min_val = np.array2string(valid_range.min) + max_val = np.array2string(valid_range.max) + if raise_error: + raise ValueError( + f"Valid range for {source} is {min_val} to {max_val}, got {bins}" + ) + else: + logger.warning( + f"Valid range for {source} is {min_val} to {max_val}, got {bins}", + ) + + +@u.quantity_input(e_min=u.TeV, e_max=u.TeV) +def make_bins_per_decade(e_min, e_max, n_bins_per_decade=5): + """ + Create energy bins with at least ``n_bins_per_decade`` bins per decade. + The number of bins is calculated as + ``n_bins = ceil((log10(e_max) - log10(e_min)) * n_bins_per_decade)``. + + Parameters + ---------- + e_min: u.Quantity[energy] + Minimum energy, inclusive + e_max: u.Quantity[energy] + Maximum energy, inclusive + n_bins_per_decade: int + Minimum number of bins per decade + + Returns + ------- + bins: u.Quantity[energy] + The created bin array, will have units of ``e_min`` + """ + unit = e_min.unit + log_lower = np.log10(e_min.to_value(unit)) + log_upper = np.log10(e_max.to_value(unit)) + + n_bins = int(np.ceil((log_upper - log_lower) * n_bins_per_decade)) + + return u.Quantity(np.logspace(log_lower, log_upper, n_bins + 1), unit, copy=False) + + +class ResultValidRange: + def __init__(self, bounds_table, prefix): + self.min = bounds_table[f"{prefix}_min"][0] + self.max = bounds_table[f"{prefix}_max"][0] + + +class TrueEnergyBinsBase(Component): + """Base class for creating irfs or benchmarks binned in true energy.""" + + true_energy_min = AstroQuantity( + help="Minimum value for True Energy bins", + default_value=u.Quantity(0.015, u.TeV), + physical_type=u.physical.energy, + ).tag(config=True) + + true_energy_max = AstroQuantity( + help="Maximum value for True Energy bins", + default_value=u.Quantity(150, u.TeV), + physical_type=u.physical.energy, + ).tag(config=True) + + true_energy_n_bins_per_decade = Integer( + help="Number of bins per decade for True Energy bins", + default_value=10, + ).tag(config=True) + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + self.true_energy_bins = make_bins_per_decade( + self.true_energy_min.to(u.TeV), + self.true_energy_max.to(u.TeV), + self.true_energy_n_bins_per_decade, + ) + + +class RecoEnergyBinsBase(Component): + """Base class for creating irfs or benchmarks binned in reconstructed energy.""" + + reco_energy_min = AstroQuantity( + help="Minimum value for Reco Energy bins", + default_value=u.Quantity(0.015, u.TeV), + physical_type=u.physical.energy, + ).tag(config=True) + + reco_energy_max = AstroQuantity( + help="Maximum value for Reco Energy bins", + default_value=u.Quantity(150, u.TeV), + physical_type=u.physical.energy, + ).tag(config=True) + + reco_energy_n_bins_per_decade = Integer( + help="Number of bins per decade for Reco Energy bins", + default_value=5, + ).tag(config=True) + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + self.reco_energy_bins = make_bins_per_decade( + self.reco_energy_min.to(u.TeV), + self.reco_energy_max.to(u.TeV), + self.reco_energy_n_bins_per_decade, + ) + + +class FoVOffsetBinsBase(Component): + """Base class for creating radially symmetric irfs or benchmarks.""" + + fov_offset_min = AstroQuantity( + help="Minimum value for FoV Offset bins", + default_value=u.Quantity(0, u.deg), + physical_type=u.physical.angle, + ).tag(config=True) + + fov_offset_max = AstroQuantity( + help="Maximum value for FoV offset bins", + default_value=u.Quantity(5, u.deg), + physical_type=u.physical.angle, + ).tag(config=True) + + fov_offset_n_bins = Integer( + help="Number of FoV offset bins", + default_value=1, + ).tag(config=True) + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + self.fov_offset_bins = u.Quantity( + np.linspace( + self.fov_offset_min.to_value(u.deg), + self.fov_offset_max.to_value(u.deg), + self.fov_offset_n_bins + 1, + ), + u.deg, + ) diff --git a/src/ctapipe/irf/irfs.py b/src/ctapipe/irf/irfs.py new file mode 100644 index 00000000000..f0e3f8ddf95 --- /dev/null +++ b/src/ctapipe/irf/irfs.py @@ -0,0 +1,316 @@ +"""Components to generate IRFs""" +from abc import abstractmethod + +import astropy.units as u +import numpy as np +from astropy.io.fits import BinTableHDU +from astropy.table import QTable +from pyirf.io import ( + create_aeff2d_hdu, + create_background_2d_hdu, + create_energy_dispersion_hdu, + create_psf_table_hdu, +) +from pyirf.irf import ( + background_2d, + effective_area_per_energy, + effective_area_per_energy_and_fov, + energy_dispersion, + psf_table, +) +from pyirf.simulations import SimulatedEventsInfo + +from ..core.traits import AstroQuantity, Float, Integer +from .binning import FoVOffsetBinsBase, RecoEnergyBinsBase, TrueEnergyBinsBase + + +class PsfMakerBase(TrueEnergyBinsBase): + """Base class for calculating the point spread function.""" + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + + @abstractmethod + def make_psf_hdu(self, events: QTable, extname: str = "PSF") -> BinTableHDU: + """ + Calculate the psf and create a fits binary table HDU in GAD format. + + Parameters + ---------- + events: astropy.table.QTable + Reconstructed events to be used. + extname: str + Name for the BinTableHDU. + + Returns + ------- + BinTableHDU + """ + + +class BackgroundRateMakerBase(RecoEnergyBinsBase): + """Base class for calculating the background rate.""" + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + + @abstractmethod + def make_bkg_hdu( + self, events: QTable, obs_time: u.Quantity, extname: str = "BACKGROUND" + ) -> BinTableHDU: + """ + Calculate the background rate and create a fits binary table HDU + in GAD format. + + Parameters + ---------- + events: astropy.table.QTable + Reconstructed events to be used. + obs_time: astropy.units.Quantity[time] + Observation time. This must match with how the individual event + weights are calculated. + extname: str + Name for the BinTableHDU. + + Returns + ------- + BinTableHDU + """ + + +class EnergyMigrationMakerBase(TrueEnergyBinsBase): + """Base class for calculating the energy migration.""" + + energy_migration_min = Float( + help="Minimum value of Energy Migration matrix", + default_value=0.2, + ).tag(config=True) + + energy_migration_max = Float( + help="Maximum value of Energy Migration matrix", + default_value=5, + ).tag(config=True) + + energy_migration_n_bins = Integer( + help="Number of bins in log scale for Energy Migration matrix", + default_value=30, + ).tag(config=True) + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + self.migration_bins = np.linspace( + self.energy_migration_min, + self.energy_migration_max, + self.energy_migration_n_bins + 1, + ) + + @abstractmethod + def make_edisp_hdu( + self, events: QTable, point_like: bool, extname: str = "ENERGY MIGRATION" + ) -> BinTableHDU: + """ + Calculate the energy dispersion and create a fits binary table HDU + in GAD format. + + Parameters + ---------- + events: astropy.table.QTable + Reconstructed events to be used. + point_like: bool + If a direction cut was applied on ``events``, pass ``True``, else ``False`` + for a full-enclosure energy dispersion. + extname: str + Name for the BinTableHDU. + + Returns + ------- + BinTableHDU + """ + + +class EffectiveAreaMakerBase(TrueEnergyBinsBase): + """Base class for calculating the effective area.""" + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + + @abstractmethod + def make_aeff_hdu( + self, + events: QTable, + point_like: bool, + signal_is_point_like: bool, + sim_info: SimulatedEventsInfo, + extname: str = "EFFECTIVE AREA", + ) -> BinTableHDU: + """ + Calculate the effective area and create a fits binary table HDU + in GAD format. + + Parameters + ---------- + events: astropy.table.QTable + Reconstructed events to be used. + point_like: bool + If a direction cut was applied on ``events``, pass ``True``, else ``False`` + for a full-enclosure effective area. + signal_is_point_like: bool + If ``events`` were simulated only at a single point in the field of view, + pass ``True``, else ``False``. + sim_info: pyirf.simulations.SimulatedEventsInfoa + The overall statistics of the simulated events. + extname: str + Name of the BinTableHDU. + + Returns + ------- + BinTableHDU + """ + + +class EffectiveArea2dMaker(EffectiveAreaMakerBase, FoVOffsetBinsBase): + """ + Creates a radially symmetric parameterizations of the effective area in equidistant + bins of logarithmic true energy and field of view offset. + """ + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + + def make_aeff_hdu( + self, + events: QTable, + point_like: bool, + signal_is_point_like: bool, + sim_info: SimulatedEventsInfo, + extname: str = "EFFECTIVE AREA", + ) -> BinTableHDU: + # For point-like gammas the effective area can only be calculated + # at one point in the FoV. + if signal_is_point_like: + aeff = effective_area_per_energy( + selected_events=events, + simulation_info=sim_info, + true_energy_bins=self.true_energy_bins, + ) + # +1 dimension for FOV offset + aeff = aeff[..., np.newaxis] + else: + aeff = effective_area_per_energy_and_fov( + selected_events=events, + simulation_info=sim_info, + true_energy_bins=self.true_energy_bins, + fov_offset_bins=self.fov_offset_bins, + ) + + return create_aeff2d_hdu( + effective_area=aeff, + true_energy_bins=self.true_energy_bins, + fov_offset_bins=self.fov_offset_bins, + point_like=point_like, + extname=extname, + ) + + +class EnergyMigration2dMaker(EnergyMigrationMakerBase, FoVOffsetBinsBase): + """ + Creates a radially symmetric parameterizations of the energy migration in + equidistant bins of logarithmic true energy and field of view offset. + """ + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + + def make_edisp_hdu( + self, events: QTable, point_like: bool, extname: str = "ENERGY MIGRATION" + ) -> BinTableHDU: + edisp = energy_dispersion( + selected_events=events, + true_energy_bins=self.true_energy_bins, + fov_offset_bins=self.fov_offset_bins, + migration_bins=self.migration_bins, + ) + return create_energy_dispersion_hdu( + energy_dispersion=edisp, + true_energy_bins=self.true_energy_bins, + migration_bins=self.migration_bins, + fov_offset_bins=self.fov_offset_bins, + point_like=point_like, + extname=extname, + ) + + +class BackgroundRate2dMaker(BackgroundRateMakerBase, FoVOffsetBinsBase): + """ + Creates a radially symmetric parameterization of the background rate in equidistant + bins of logarithmic reconstructed energy and field of view offset. + """ + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + + def make_bkg_hdu( + self, events: QTable, obs_time: u.Quantity, extname: str = "BACKGROUND" + ) -> BinTableHDU: + background_rate = background_2d( + events=events, + reco_energy_bins=self.reco_energy_bins, + fov_offset_bins=self.fov_offset_bins, + t_obs=obs_time, + ) + return create_background_2d_hdu( + background_2d=background_rate, + reco_energy_bins=self.reco_energy_bins, + fov_offset_bins=self.fov_offset_bins, + extname=extname, + ) + + +class Psf3dMaker(PsfMakerBase, FoVOffsetBinsBase): + """ + Creates a radially symmetric point spread function calculated in equidistant bins + of source offset, logarithmic true energy, and field of view offset. + """ + + source_offset_min = AstroQuantity( + help="Minimum value for Source offset", + default_value=u.Quantity(0, u.deg), + physical_type=u.physical.angle, + ).tag(config=True) + + source_offset_max = AstroQuantity( + help="Maximum value for Source offset", + default_value=u.Quantity(1, u.deg), + physical_type=u.physical.angle, + ).tag(config=True) + + source_offset_n_bins = Integer( + help="Number of bins for Source offset", + default_value=100, + ).tag(config=True) + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + self.source_offset_bins = u.Quantity( + np.linspace( + self.source_offset_min.to_value(u.deg), + self.source_offset_max.to_value(u.deg), + self.source_offset_n_bins + 1, + ), + u.deg, + ) + + def make_psf_hdu(self, events: QTable, extname: str = "PSF") -> BinTableHDU: + psf = psf_table( + events=events, + true_energy_bins=self.true_energy_bins, + fov_offset_bins=self.fov_offset_bins, + source_offset_bins=self.source_offset_bins, + ) + return create_psf_table_hdu( + psf=psf, + true_energy_bins=self.true_energy_bins, + fov_offset_bins=self.fov_offset_bins, + source_offset_bins=self.source_offset_bins, + extname=extname, + ) diff --git a/src/ctapipe/irf/optimize.py b/src/ctapipe/irf/optimize.py new file mode 100644 index 00000000000..9b8fba3debc --- /dev/null +++ b/src/ctapipe/irf/optimize.py @@ -0,0 +1,483 @@ +"""module containing optimization related functions and classes""" + +import operator +from abc import abstractmethod + +import astropy.units as u +import numpy as np +from astropy.io import fits +from astropy.table import QTable, Table +from pyirf.cut_optimization import optimize_gh_cut +from pyirf.cuts import calculate_percentile_cut, evaluate_binned_cut + +from ..core import Component, QualityQuery +from ..core.traits import AstroQuantity, Float, Integer +from .binning import ResultValidRange, make_bins_per_decade +from .select import EventPreProcessor + + +class OptimizationResult: + def __init__(self, precuts, valid_energy, valid_offset, gh, theta): + self.precuts = precuts + self.valid_energy = ResultValidRange(valid_energy, "energy") + self.valid_offset = ResultValidRange(valid_offset, "offset") + self.gh_cuts = gh + self.theta_cuts = theta + + def __repr__(self): + if self.theta_cuts is not None: + return ( + f"" + ) + else: + return ( + f"" + ) + + +class OptimizationResultStore: + def __init__(self, precuts=None): + self._init_precuts(precuts) + self._results = None + + def _init_precuts(self, precuts): + if precuts: + if isinstance(precuts, QualityQuery): + self._precuts = precuts.quality_criteria + if len(self._precuts) == 0: + self._precuts = [(" ", " ")] # Ensures table serialises with units + elif isinstance(precuts, list): + self._precuts = precuts + else: + self._precuts = list(precuts) + else: + self._precuts = None + + def set_result( + self, gh_cuts, valid_energy, valid_offset, clf_prefix, theta_cuts=None + ): + if not self._precuts: + raise ValueError("Precuts must be defined before results can be saved") + + gh_cuts.meta["EXTNAME"] = "GH_CUTS" + gh_cuts.meta["CLFNAME"] = clf_prefix + + energy_lim_tab = QTable(rows=[valid_energy], names=["energy_min", "energy_max"]) + energy_lim_tab.meta["EXTNAME"] = "VALID_ENERGY" + + offset_lim_tab = QTable(rows=[valid_offset], names=["offset_min", "offset_max"]) + offset_lim_tab.meta["EXTNAME"] = "VALID_OFFSET" + + self._results = [gh_cuts, energy_lim_tab, offset_lim_tab] + + if theta_cuts is not None: + theta_cuts.meta["EXTNAME"] = "RAD_MAX" + self._results += [theta_cuts] + + def write(self, output_name, overwrite=False): + if not isinstance(self._results, list): + raise ValueError( + "The results of this object" + " have not been properly initialised," + " call `set_results` before writing." + ) + + cut_expr_tab = Table( + rows=self._precuts, + names=["name", "cut_expr"], + dtype=[np.unicode_, np.unicode_], + ) + cut_expr_tab.meta["EXTNAME"] = "QUALITY_CUTS_EXPR" + + cut_expr_tab.write(output_name, format="fits", overwrite=overwrite) + + for table in self._results: + table.write(output_name, format="fits", append=True) + + def read(self, file_name): + with fits.open(file_name) as hdul: + cut_expr_tab = Table.read(hdul[1]) + cut_expr_lst = [(name, expr) for name, expr in cut_expr_tab.iterrows()] + # TODO: this crudely fixes a problem when loading non empty tables, make it nicer + try: + cut_expr_lst.remove((" ", " ")) + except ValueError: + pass + + precuts = QualityQuery(quality_criteria=cut_expr_lst) + gh_cuts = QTable.read(hdul[2]) + valid_energy = QTable.read(hdul[3]) + valid_offset = QTable.read(hdul[4]) + theta_cuts = QTable.read(hdul[5]) if len(hdul) > 5 else None + + return OptimizationResult( + precuts, valid_energy, valid_offset, gh_cuts, theta_cuts + ) + + +class CutOptimizerBase(Component): + """Base class for cut optimization algorithms.""" + + reco_energy_min = AstroQuantity( + help="Minimum value for Reco Energy bins", + default_value=u.Quantity(0.015, u.TeV), + physical_type=u.physical.energy, + ).tag(config=True) + + reco_energy_max = AstroQuantity( + help="Maximum value for Reco Energy bins", + default_value=u.Quantity(150, u.TeV), + physical_type=u.physical.energy, + ).tag(config=True) + + reco_energy_n_bins_per_decade = Integer( + help="Number of bins per decade for Reco Energy bins", + default_value=5, + ).tag(config=True) + + min_bkg_fov_offset = AstroQuantity( + help=( + "Minimum distance from the fov center for background events " + "to be taken into account" + ), + default_value=u.Quantity(0, u.deg), + physical_type=u.physical.angle, + ).tag(config=True) + + max_bkg_fov_offset = AstroQuantity( + help=( + "Maximum distance from the fov center for background events " + "to be taken into account" + ), + default_value=u.Quantity(5, u.deg), + physical_type=u.physical.angle, + ).tag(config=True) + + @abstractmethod + def optimize_cuts( + self, + signal: QTable, + background: QTable, + alpha: float, + precuts: EventPreProcessor, + clf_prefix: str, + point_like: bool, + ) -> OptimizationResultStore: + """ + Optimize G/H (and optionally theta) cuts + and fill them in an ``OptimizationResult``. + + Parameters + ---------- + signal: astropy.table.QTable + Table containing signal events + background: astropy.table.QTable + Table containing background events + alpha: float + Size ratio of on region / off region + precuts: ctapipe.irf.EventPreProcessor + ``ctapipe.core.QualityQuery`` subclass containing preselection + criteria for events + clf_prefix: str + Prefix of the output from the G/H classifier for which the + cut will be optimized + point_like: bool + Whether a theta cut should be calculated (True) or only a + G/H cut (False) + """ + + +class GhPercentileCutCalculator(Component): + """Computes a percentile cut on gammaness.""" + + min_counts = Integer( + default_value=10, + help="Minimum number of events in a bin to attempt to find a cut value", + ).tag(config=True) + + smoothing = Float( + default_value=None, + allow_none=True, + help="When given, the width (in units of bins) of gaussian smoothing applied", + ).tag(config=True) + + target_percentile = Integer( + default_value=68, + help="Percent of events in each energy bin to keep after the G/H cut", + ).tag(config=True) + + def calculate_gh_cut(self, gammaness, reco_energy, reco_energy_bins): + if self.smoothing and self.smoothing < 0: + self.smoothing = None + + return calculate_percentile_cut( + gammaness, + reco_energy, + reco_energy_bins, + smoothing=self.smoothing, + percentile=100 - self.target_percentile, + fill_value=gammaness.max(), + min_events=self.min_counts, + ) + + +class ThetaPercentileCutCalculator(Component): + """Computes a percentile cut on theta.""" + + theta_min_angle = AstroQuantity( + default_value=u.Quantity(-1, u.deg), + physical_type=u.physical.angle, + help="Smallest angular cut value allowed (-1 means no cut)", + ).tag(config=True) + + theta_max_angle = AstroQuantity( + default_value=u.Quantity(0.32, u.deg), + physical_type=u.physical.angle, + help="Largest angular cut value allowed", + ).tag(config=True) + + min_counts = Integer( + default_value=10, + help="Minimum number of events in a bin to attempt to find a cut value", + ).tag(config=True) + + theta_fill_value = AstroQuantity( + default_value=u.Quantity(0.32, u.deg), + physical_type=u.physical.angle, + help="Angular cut value used for bins with too few events", + ).tag(config=True) + + smoothing = Float( + default_value=None, + allow_none=True, + help="When given, the width (in units of bins) of gaussian smoothing applied", + ).tag(config=True) + + target_percentile = Integer( + default_value=68, + help="Percent of events in each energy bin to keep after the theta cut", + ).tag(config=True) + + def calculate_theta_cut(self, theta, reco_energy, reco_energy_bins): + if self.theta_min_angle < 0 * u.deg: + theta_min_angle = None + else: + theta_min_angle = self.theta_min_angle + + if self.theta_max_angle < 0 * u.deg: + theta_max_angle = None + else: + theta_max_angle = self.theta_max_angle + + if self.smoothing and self.smoothing < 0: + self.smoothing = None + + return calculate_percentile_cut( + theta, + reco_energy, + reco_energy_bins, + min_value=theta_min_angle, + max_value=theta_max_angle, + smoothing=self.smoothing, + percentile=self.target_percentile, + fill_value=self.theta_fill_value, + min_events=self.min_counts, + ) + + +class PercentileCuts(CutOptimizerBase): + """ + Calculates G/H separation cut based on the percentile of signal events + to keep in each bin. + Optionally also calculates a percentile cut on theta based on the signal + events surviving this G/H cut. + """ + + classes = [GhPercentileCutCalculator, ThetaPercentileCutCalculator] + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + self.gh = GhPercentileCutCalculator(parent=self) + self.theta = ThetaPercentileCutCalculator(parent=self) + + def optimize_cuts( + self, + signal: QTable, + background: QTable, + alpha: float, + precuts: EventPreProcessor, + clf_prefix: str, + point_like: bool, + ) -> OptimizationResultStore: + reco_energy_bins = make_bins_per_decade( + self.reco_energy_min.to(u.TeV), + self.reco_energy_max.to(u.TeV), + self.reco_energy_n_bins_per_decade, + ) + gh_cuts = self.gh.calculate_gh_cut( + signal["gh_score"], + signal["reco_energy"], + reco_energy_bins, + ) + if point_like: + gh_mask = evaluate_binned_cut( + signal["gh_score"], + signal["reco_energy"], + gh_cuts, + op=operator.ge, + ) + theta_cuts = self.theta.calculate_theta_cut( + signal["theta"][gh_mask], + signal["reco_energy"][gh_mask], + reco_energy_bins, + ) + + result_saver = OptimizationResultStore(precuts) + result_saver.set_result( + gh_cuts=gh_cuts, + valid_energy=[self.reco_energy_min, self.reco_energy_max], + # A single set of cuts is calculated for the whole fov atm + valid_offset=[0 * u.deg, np.inf * u.deg], + clf_prefix=clf_prefix, + theta_cuts=theta_cuts if point_like else None, + ) + return result_saver + + +class PointSourceSensitivityOptimizer(CutOptimizerBase): + """ + Optimizes a G/H cut for maximum point source sensitivity and + calculates a percentile cut on theta. + """ + + classes = [ThetaPercentileCutCalculator] + + initial_gh_cut_efficency = Float( + default_value=0.4, help="Start value of gamma efficiency before optimization" + ).tag(config=True) + + max_gh_cut_efficiency = Float( + default_value=0.8, help="Maximum gamma efficiency requested" + ).tag(config=True) + + gh_cut_efficiency_step = Float( + default_value=0.1, + help="Stepsize used for scanning after optimal gammaness cut", + ).tag(config=True) + + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, **kwargs) + self.theta = ThetaPercentileCutCalculator(parent=self) + + def optimize_cuts( + self, + signal: QTable, + background: QTable, + alpha: float, + precuts: EventPreProcessor, + clf_prefix: str, + point_like: bool, + ) -> OptimizationResultStore: + reco_energy_bins = make_bins_per_decade( + self.reco_energy_min.to(u.TeV), + self.reco_energy_max.to(u.TeV), + self.reco_energy_n_bins_per_decade, + ) + + if point_like: + initial_gh_cuts = calculate_percentile_cut( + signal["gh_score"], + signal["reco_energy"], + bins=reco_energy_bins, + fill_value=0.0, + percentile=100 * (1 - self.initial_gh_cut_efficency), + min_events=10, + smoothing=1, + ) + initial_gh_mask = evaluate_binned_cut( + signal["gh_score"], + signal["reco_energy"], + initial_gh_cuts, + op=operator.gt, + ) + + theta_cuts = self.theta.calculate_theta_cut( + signal["theta"][initial_gh_mask], + signal["reco_energy"][initial_gh_mask], + reco_energy_bins, + ) + self.log.info("Optimizing G/H separation cut for best sensitivity") + else: + # Create a dummy theta cut since `pyirf.cut_optimization.optimize_gh_cut` + # needs a theta cut atm. + theta_cuts = QTable() + theta_cuts["low"] = reco_energy_bins[:-1] + theta_cuts["center"] = 0.5 * (reco_energy_bins[:-1] + reco_energy_bins[1:]) + theta_cuts["high"] = reco_energy_bins[1:] + theta_cuts["cut"] = self.max_bkg_fov_offset + self.log.info( + "Optimizing G/H separation cut for best sensitivity " + "with `max_bkg_fov_offset` as theta cut." + ) + + gh_cut_efficiencies = np.arange( + self.gh_cut_efficiency_step, + self.max_gh_cut_efficiency + self.gh_cut_efficiency_step / 2, + self.gh_cut_efficiency_step, + ) + opt_sens, gh_cuts = optimize_gh_cut( + signal, + background, + reco_energy_bins=reco_energy_bins, + gh_cut_efficiencies=gh_cut_efficiencies, + op=operator.ge, + theta_cuts=theta_cuts, + alpha=alpha, + fov_offset_max=self.max_bkg_fov_offset, + fov_offset_min=self.min_bkg_fov_offset, + ) + valid_energy = self._get_valid_energy_range(opt_sens) + + # Re-calculate theta cut with optimized g/h cut + if point_like: + signal["selected_gh"] = evaluate_binned_cut( + signal["gh_score"], + signal["reco_energy"], + gh_cuts, + operator.ge, + ) + theta_cuts_opt = self.theta.calculate_theta_cut( + signal[signal["selected_gh"]]["theta"], + signal[signal["selected_gh"]]["reco_energy"], + reco_energy_bins, + ) + + result_saver = OptimizationResultStore(precuts) + result_saver.set_result( + gh_cuts=gh_cuts, + valid_energy=valid_energy, + # A single set of cuts is calculated for the whole fov atm + valid_offset=[self.min_bkg_fov_offset, self.max_bkg_fov_offset], + clf_prefix=clf_prefix, + theta_cuts=theta_cuts_opt if point_like else None, + ) + return result_saver + + def _get_valid_energy_range(self, opt_sens): + keep_mask = np.isfinite(opt_sens["significance"]) + + count = np.arange(start=0, stop=len(keep_mask), step=1) + if all(np.diff(count[keep_mask]) == 1): + return [ + opt_sens["reco_energy_low"][keep_mask][0], + opt_sens["reco_energy_high"][keep_mask][-1], + ] + else: + raise ValueError("Optimal significance curve has internal NaN bins") diff --git a/src/ctapipe/irf/select.py b/src/ctapipe/irf/select.py new file mode 100644 index 00000000000..eeea407bffe --- /dev/null +++ b/src/ctapipe/irf/select.py @@ -0,0 +1,298 @@ +"""Module containing classes related to event preprocessing and selection""" + +from pathlib import Path + +import astropy.units as u +import numpy as np +from astropy.coordinates import AltAz, SkyCoord +from astropy.table import QTable, Table, vstack +from pyirf.simulations import SimulatedEventsInfo +from pyirf.spectral import PowerLaw, calculate_event_weights +from pyirf.utils import calculate_source_fov_offset, calculate_theta + +from ..coordinates import NominalFrame +from ..core import Component, QualityQuery +from ..core.traits import List, Tuple, Unicode +from ..io import TableLoader +from .spectra import SPECTRA, Spectra + + +class EventPreProcessor(QualityQuery): + """Defines preselection cuts and the necessary renaming of columns""" + + energy_reconstructor = Unicode( + default_value="RandomForestRegressor", + help="Prefix of the reco `_energy` column", + ).tag(config=True) + + geometry_reconstructor = Unicode( + default_value="HillasReconstructor", + help="Prefix of the `_alt` and `_az` reco geometry columns", + ).tag(config=True) + + gammaness_classifier = Unicode( + default_value="RandomForestClassifier", + help="Prefix of the classifier `_prediction` column", + ).tag(config=True) + + quality_criteria = List( + Tuple(Unicode(), Unicode()), + default_value=[ + ("multiplicity 4", "np.count_nonzero(tels_with_trigger,axis=1) >= 4"), + ("valid classifier", "RandomForestClassifier_is_valid"), + ("valid geom reco", "HillasReconstructor_is_valid"), + ("valid energy reco", "RandomForestRegressor_is_valid"), + ], + help=QualityQuery.quality_criteria.help, + ).tag(config=True) + + rename_columns = List( + help="List containing translation pairs new and old column names" + "used when processing input with names differing from the CTA prod5b format" + "Ex: [('alt_from_new_algorithm','reco_alt')]", + default_value=[], + ).tag(config=True) + + def normalise_column_names(self, events: Table) -> QTable: + keep_columns = [ + "obs_id", + "event_id", + "true_energy", + "true_az", + "true_alt", + ] + standard_renames = { + "reco_energy": f"{self.energy_reconstructor}_energy", + "reco_az": f"{self.geometry_reconstructor}_az", + "reco_alt": f"{self.geometry_reconstructor}_alt", + "gh_score": f"{self.gammaness_classifier}_prediction", + } + rename_from = [] + rename_to = [] + # We never enter the loop if rename_columns is empty + for old, new in self.rename_columns: + if new in standard_renames.keys(): + self.log.warning( + f"Column '{old}' will be used as '{new}' " + f"instead of {standard_renames[new]}." + ) + standard_renames.pop(new) + + rename_from.append(old) + rename_to.append(new) + + for new, old in standard_renames.items(): + if old in events.colnames: + rename_from.append(old) + rename_to.append(new) + + # check that all needed reco columns are defined + for c in ["reco_energy", "reco_az", "reco_alt", "gh_score"]: + if c not in rename_to: + raise ValueError( + f"No column corresponding to {c} is defined in " + f"EventPreProcessor.rename_columns and {standard_renames[c]} " + "is not in the given data." + ) + + keep_columns.extend(rename_from) + events = QTable(events[keep_columns], copy=False) + events.rename_columns(rename_from, rename_to) + return events + + def make_empty_table(self) -> QTable: + """ + This function defines the columns later functions expect to be present + in the event table. + """ + columns = [ + "obs_id", + "event_id", + "true_energy", + "true_az", + "true_alt", + "reco_energy", + "reco_az", + "reco_alt", + "reco_fov_lat", + "reco_fov_lon", + "gh_score", + "pointing_az", + "pointing_alt", + "theta", + "true_source_fov_offset", + "reco_source_fov_offset", + ] + units = { + "true_energy": u.TeV, + "true_az": u.deg, + "true_alt": u.deg, + "reco_energy": u.TeV, + "reco_az": u.deg, + "reco_alt": u.deg, + "reco_fov_lat": u.deg, + "reco_fov_lon": u.deg, + "pointing_az": u.deg, + "pointing_alt": u.deg, + "theta": u.deg, + "true_source_fov_offset": u.deg, + "reco_source_fov_offset": u.deg, + } + descriptions = { + "obs_id": "Observation Block ID", + "event_id": "Array Event ID", + "true_energy": "Simulated Energy", + "true_az": "Simulated azimuth", + "true_alt": "Simulated altitude", + "reco_energy": "Reconstructed energy", + "reco_az": "Reconstructed azimuth", + "reco_alt": "Reconstructed altitude", + "reco_fov_lat": "Reconstructed field of view lat", + "reco_fov_lon": "Reconstructed field of view lon", + "pointing_az": "Pointing azimuth", + "pointing_alt": "Pointing altitude", + "theta": "Reconstructed angular offset from source position", + "true_source_fov_offset": "Simulated angular offset from pointing direction", + "reco_source_fov_offset": "Reconstructed angular offset from pointing direction", + "gh_score": "prediction of the classifier, defined between [0,1], where values close to 1 mean that the positive class (e.g. gamma in gamma-ray analysis) is more likely", + } + + return QTable(names=columns, units=units, descriptions=descriptions) + + +class EventsLoader(Component): + classes = [EventPreProcessor] + + def __init__(self, kind: str, file: Path, target_spectrum: Spectra, **kwargs): + super().__init__(**kwargs) + + self.epp = EventPreProcessor(parent=self) + self.target_spectrum = SPECTRA[target_spectrum] + self.kind = kind + self.file = file + + def load_preselected_events( + self, chunk_size: int, obs_time: u.Quantity + ) -> tuple[QTable, int, dict]: + opts = dict(dl2=True, simulated=True) + with TableLoader(self.file, parent=self, **opts) as load: + header = self.epp.make_empty_table() + sim_info, spectrum, obs_conf = self.get_metadata(load, obs_time) + meta = {"sim_info": sim_info, "spectrum": spectrum} + bits = [header] + n_raw_events = 0 + for _, _, events in load.read_subarray_events_chunked(chunk_size, **opts): + selected = events[self.epp.get_table_mask(events)] + selected = self.epp.normalise_column_names(selected) + selected = self.make_derived_columns(selected, obs_conf) + bits.append(selected) + n_raw_events += len(events) + + bits.append(header) # Putting it last ensures the correct metadata is used + table = vstack(bits, join_type="exact", metadata_conflicts="silent") + return table, n_raw_events, meta + + def get_metadata( + self, loader: TableLoader, obs_time: u.Quantity + ) -> tuple[SimulatedEventsInfo, PowerLaw, Table]: + obs = loader.read_observation_information() + sim = loader.read_simulation_configuration() + show = loader.read_shower_distribution() + + for itm in ["spectral_index", "energy_range_min", "energy_range_max"]: + if len(np.unique(sim[itm])) > 1: + raise NotImplementedError( + f"Unsupported: '{itm}' differs across simulation runs" + ) + + sim_info = SimulatedEventsInfo( + n_showers=show["n_entries"].sum(), + energy_min=sim["energy_range_min"].quantity[0], + energy_max=sim["energy_range_max"].quantity[0], + max_impact=sim["max_scatter_range"].quantity[0], + spectral_index=sim["spectral_index"][0], + viewcone_max=sim["max_viewcone_radius"].quantity[0], + viewcone_min=sim["min_viewcone_radius"].quantity[0], + ) + + return ( + sim_info, + PowerLaw.from_simulation(sim_info, obstime=obs_time), + obs, + ) + + def make_derived_columns(self, events: QTable, obs_conf: Table) -> QTable: + if obs_conf["subarray_pointing_lat"].std() < 1e-3: + assert all(obs_conf["subarray_pointing_frame"] == 0) + # Lets suppose 0 means ALTAZ + events["pointing_alt"] = obs_conf["subarray_pointing_lat"][0] * u.deg + events["pointing_az"] = obs_conf["subarray_pointing_lon"][0] * u.deg + else: + raise NotImplementedError( + "No support for making irfs from varying pointings yet" + ) + + events["theta"] = calculate_theta( + events, + assumed_source_az=events["true_az"], + assumed_source_alt=events["true_alt"], + ) + events["true_source_fov_offset"] = calculate_source_fov_offset( + events, prefix="true" + ) + events["reco_source_fov_offset"] = calculate_source_fov_offset( + events, prefix="reco" + ) + + altaz = AltAz() + pointing = SkyCoord( + alt=events["pointing_alt"], az=events["pointing_az"], frame=altaz + ) + reco = SkyCoord( + alt=events["reco_alt"], + az=events["reco_az"], + frame=altaz, + ) + nominal = NominalFrame(origin=pointing) + reco_nominal = reco.transform_to(nominal) + events["reco_fov_lon"] = -reco_nominal.fov_lon # minus for GADF + events["reco_fov_lat"] = reco_nominal.fov_lat + return events + + def make_event_weights( + self, + events: QTable, + spectrum: PowerLaw, + fov_offset_bins: u.Quantity | None = None, + ) -> QTable: + if ( + self.kind == "gammas" + and self.target_spectrum.normalization.unit.is_equivalent( + spectrum.normalization.unit * u.sr + ) + ): + if fov_offset_bins is None: + raise ValueError( + "gamma_target_spectrum is point-like, but no fov offset bins " + "for the integration of the simulated diffuse spectrum was given." + ) + + events["weight"] = 1.0 + + for low, high in zip(fov_offset_bins[:-1], fov_offset_bins[1:]): + fov_mask = events["true_source_fov_offset"] >= low + fov_mask &= events["true_source_fov_offset"] < high + + events[fov_mask]["weight"] = calculate_event_weights( + events[fov_mask]["true_energy"], + target_spectrum=self.target_spectrum, + simulated_spectrum=spectrum.integrate_cone(low, high), + ) + else: + events["weight"] = calculate_event_weights( + events["true_energy"], + target_spectrum=self.target_spectrum, + simulated_spectrum=spectrum, + ) + + return events diff --git a/src/ctapipe/irf/spectra.py b/src/ctapipe/irf/spectra.py new file mode 100644 index 00000000000..fa726eadd10 --- /dev/null +++ b/src/ctapipe/irf/spectra.py @@ -0,0 +1,17 @@ +"""Definition of spectra to be used to calculate event weights for irf computation""" +from enum import Enum + +from pyirf.spectral import CRAB_HEGRA, IRFDOC_ELECTRON_SPECTRUM, IRFDOC_PROTON_SPECTRUM + + +class Spectra(Enum): + CRAB_HEGRA = 1 + IRFDOC_ELECTRON_SPECTRUM = 2 + IRFDOC_PROTON_SPECTRUM = 3 + + +SPECTRA = { + Spectra.CRAB_HEGRA: CRAB_HEGRA, + Spectra.IRFDOC_ELECTRON_SPECTRUM: IRFDOC_ELECTRON_SPECTRUM, + Spectra.IRFDOC_PROTON_SPECTRUM: IRFDOC_PROTON_SPECTRUM, +} diff --git a/src/ctapipe/irf/tests/test_benchmarks.py b/src/ctapipe/irf/tests/test_benchmarks.py new file mode 100644 index 00000000000..95298d265d7 --- /dev/null +++ b/src/ctapipe/irf/tests/test_benchmarks.py @@ -0,0 +1,130 @@ +import astropy.units as u +from astropy.table import QTable + +from ctapipe.irf.tests.test_irfs import _check_boundaries_in_hdu + + +def test_make_2d_energy_bias_res(irf_events_table): + from ctapipe.irf import EnergyBiasResolution2dMaker + + biasResMkr = EnergyBiasResolution2dMaker( + fov_offset_n_bins=3, + fov_offset_max=3 * u.deg, + true_energy_n_bins_per_decade=7, + true_energy_max=155 * u.TeV, + ) + + bias_res_hdu = biasResMkr.make_bias_resolution_hdu(events=irf_events_table) + # min 7 bins per decade between 0.015 TeV and 155 TeV -> 7 * 4 + 1 = 29 bins + assert ( + bias_res_hdu.data["N_EVENTS"].shape + == bias_res_hdu.data["BIAS"].shape + == bias_res_hdu.data["RESOLUTI"].shape + == (1, 3, 29) + ) + _check_boundaries_in_hdu( + bias_res_hdu, + lo_vals=[0 * u.deg, 0.015 * u.TeV], + hi_vals=[3 * u.deg, 155 * u.TeV], + ) + + +def test_make_2d_ang_res(irf_events_table): + from ctapipe.irf import AngularResolution2dMaker + + angResMkr = AngularResolution2dMaker( + fov_offset_n_bins=3, + fov_offset_max=3 * u.deg, + true_energy_n_bins_per_decade=7, + true_energy_max=155 * u.TeV, + reco_energy_n_bins_per_decade=6, + reco_energy_min=0.03 * u.TeV, + ) + + ang_res_hdu = angResMkr.make_angular_resolution_hdu(events=irf_events_table) + assert ( + ang_res_hdu.data["N_EVENTS"].shape + == ang_res_hdu.data["ANG_RES"].shape + == (1, 3, 23) + ) + _check_boundaries_in_hdu( + ang_res_hdu, + lo_vals=[0 * u.deg, 0.03 * u.TeV], + hi_vals=[3 * u.deg, 150 * u.TeV], + ) + + angResMkr.use_true_energy = True + ang_res_hdu = angResMkr.make_angular_resolution_hdu(events=irf_events_table) + assert ( + ang_res_hdu.data["N_EVENTS"].shape + == ang_res_hdu.data["ANG_RES"].shape + == (1, 3, 29) + ) + _check_boundaries_in_hdu( + ang_res_hdu, + lo_vals=[0 * u.deg, 0.015 * u.TeV], + hi_vals=[3 * u.deg, 155 * u.TeV], + ) + + +def test_make_2d_sensitivity( + gamma_diffuse_full_reco_file, proton_full_reco_file, irf_events_loader_test_config +): + from ctapipe.irf import EventsLoader, Sensitivity2dMaker, Spectra + + gamma_loader = EventsLoader( + config=irf_events_loader_test_config, + kind="gammas", + file=gamma_diffuse_full_reco_file, + target_spectrum=Spectra.CRAB_HEGRA, + ) + gamma_events, _, _ = gamma_loader.load_preselected_events( + chunk_size=10000, + obs_time=u.Quantity(50, u.h), + ) + proton_loader = EventsLoader( + config=irf_events_loader_test_config, + kind="protons", + file=proton_full_reco_file, + target_spectrum=Spectra.IRFDOC_PROTON_SPECTRUM, + ) + proton_events, _, _ = proton_loader.load_preselected_events( + chunk_size=10000, + obs_time=u.Quantity(50, u.h), + ) + + sensMkr = Sensitivity2dMaker( + fov_offset_n_bins=3, + fov_offset_max=3 * u.deg, + reco_energy_n_bins_per_decade=7, + reco_energy_max=155 * u.TeV, + ) + # Create a dummy theta cut since `pyirf.sensitivity.estimate_background` + # needs a theta cut atm. + theta_cuts = QTable() + theta_cuts["center"] = 0.5 * ( + sensMkr.reco_energy_bins[:-1] + sensMkr.reco_energy_bins[1:] + ) + theta_cuts["cut"] = sensMkr.fov_offset_max + + sens_hdu = sensMkr.make_sensitivity_hdu( + signal_events=gamma_events, + background_events=proton_events, + theta_cut=theta_cuts, + gamma_spectrum=Spectra.CRAB_HEGRA, + ) + assert ( + sens_hdu.data["N_SIG"].shape + == sens_hdu.data["N_SIG_W"].shape + == sens_hdu.data["N_BKG"].shape + == sens_hdu.data["N_BKG_W"].shape + == sens_hdu.data["SIGNIFIC"].shape + == sens_hdu.data["REL_SEN"].shape + == sens_hdu.data["FLUX_SEN"].shape + == (1, 3, 29) + ) + _check_boundaries_in_hdu( + sens_hdu, + lo_vals=[0 * u.deg, 0.015 * u.TeV], + hi_vals=[3 * u.deg, 155 * u.TeV], + ) diff --git a/src/ctapipe/irf/tests/test_binning.py b/src/ctapipe/irf/tests/test_binning.py new file mode 100644 index 00000000000..e905b1fe7d1 --- /dev/null +++ b/src/ctapipe/irf/tests/test_binning.py @@ -0,0 +1,112 @@ +import logging + +import astropy.units as u +import numpy as np +import pytest +from astropy.table import QTable + + +def test_check_bins_in_range(tmp_path): + from ctapipe.irf import ResultValidRange, check_bins_in_range + + valid_range = ResultValidRange( + bounds_table=QTable( + rows=[u.Quantity([0.03, 200], u.TeV)], names=["energy_min", "energy_max"] + ), + prefix="energy", + ) + + # bins are in range + bins = u.Quantity(np.logspace(-1, 2, 10), u.TeV) + check_bins_in_range(bins, valid_range) + + # bins are too small + bins = u.Quantity(np.logspace(-2, 2, 10), u.TeV) + with pytest.raises(ValueError, match="Valid range for"): + check_bins_in_range(bins, valid_range) + + # bins are too big + bins = u.Quantity(np.logspace(-1, 3, 10), u.TeV) + with pytest.raises(ValueError, match="Valid range for"): + check_bins_in_range(bins, valid_range) + + # bins are too big and too small + bins = u.Quantity(np.logspace(-2, 3, 10), u.TeV) + with pytest.raises(ValueError, match="Valid range for"): + check_bins_in_range(bins, valid_range) + + logger = logging.getLogger("ctapipe.irf.binning") + logpath = tmp_path / "test_check_bins_in_range.log" + handler = logging.FileHandler(logpath) + logger.addHandler(handler) + + check_bins_in_range(bins, valid_range, raise_error=False) + assert "Valid range for result is" in logpath.read_text() + + +def test_make_bins_per_decade(): + from ctapipe.irf import make_bins_per_decade + + bins = make_bins_per_decade(100 * u.GeV, 100 * u.TeV) + assert bins.unit == u.GeV + assert len(bins) == 16 + assert bins[0] == 100 * u.GeV + assert np.allclose(np.diff(np.log10(bins.to_value(u.GeV))), 0.2) + + bins = make_bins_per_decade(100 * u.GeV, 100 * u.TeV, 10) + assert len(bins) == 31 + assert np.allclose(np.diff(np.log10(bins.to_value(u.GeV))), 0.1) + + # respect boundaries over n_bins_per_decade + bins = make_bins_per_decade(100 * u.GeV, 105 * u.TeV) + assert len(bins) == 17 + assert np.isclose(bins[-1], 105 * u.TeV, rtol=1e-9) + + +def test_true_energy_bins_base(): + from ctapipe.irf.binning import TrueEnergyBinsBase + + binning = TrueEnergyBinsBase( + true_energy_min=0.02 * u.TeV, + true_energy_max=200 * u.TeV, + true_energy_n_bins_per_decade=7, + ) + assert len(binning.true_energy_bins) == 29 + assert binning.true_energy_bins.unit == u.TeV + assert np.isclose(binning.true_energy_bins[0], binning.true_energy_min, rtol=1e-9) + assert np.isclose(binning.true_energy_bins[-1], binning.true_energy_max, rtol=1e-9) + assert np.allclose( + np.diff(np.log10(binning.true_energy_bins.to_value(u.TeV))), 1 / 7 + ) + + +def test_reco_energy_bins_base(): + from ctapipe.irf.binning import RecoEnergyBinsBase + + binning = RecoEnergyBinsBase( + reco_energy_min=0.02 * u.TeV, + reco_energy_max=200 * u.TeV, + reco_energy_n_bins_per_decade=4, + ) + assert len(binning.reco_energy_bins) == 17 + assert binning.reco_energy_bins.unit == u.TeV + assert np.isclose(binning.reco_energy_bins[0], binning.reco_energy_min, rtol=1e-9) + assert np.isclose(binning.reco_energy_bins[-1], binning.reco_energy_max, rtol=1e-9) + assert np.allclose( + np.diff(np.log10(binning.reco_energy_bins.to_value(u.TeV))), 0.25 + ) + + +def test_fov_offset_bins_base(): + from ctapipe.irf.binning import FoVOffsetBinsBase + + binning = FoVOffsetBinsBase( + # use default for fov_offset_min + fov_offset_max=3 * u.deg, + fov_offset_n_bins=3, + ) + assert len(binning.fov_offset_bins) == 4 + assert binning.fov_offset_bins.unit == u.deg + assert np.isclose(binning.fov_offset_bins[0], binning.fov_offset_min, rtol=1e-9) + assert np.isclose(binning.fov_offset_bins[-1], binning.fov_offset_max, rtol=1e-9) + assert np.allclose(np.diff(binning.fov_offset_bins.to_value(u.deg)), 1) diff --git a/src/ctapipe/irf/tests/test_irfs.py b/src/ctapipe/irf/tests/test_irfs.py new file mode 100644 index 00000000000..bd5e212a2b9 --- /dev/null +++ b/src/ctapipe/irf/tests/test_irfs.py @@ -0,0 +1,128 @@ +import astropy.units as u +from astropy.io.fits import BinTableHDU +from pyirf.simulations import SimulatedEventsInfo + + +def _check_boundaries_in_hdu( + hdu: BinTableHDU, + lo_vals: list, + hi_vals: list, + colnames: list[str] = ["THETA", "ENERG"], +): + for col, val in zip(colnames, lo_vals): + assert u.isclose( + u.Quantity(hdu.data[f"{col}_LO"][0][0], hdu.columns[f"{col}_LO"].unit), val + ) + for col, val in zip(colnames, hi_vals): + assert u.isclose( + u.Quantity(hdu.data[f"{col}_HI"][0][-1], hdu.columns[f"{col}_HI"].unit), val + ) + + +def test_make_2d_bkg(irf_events_table): + from ctapipe.irf import BackgroundRate2dMaker + + bkgMkr = BackgroundRate2dMaker( + fov_offset_n_bins=3, + fov_offset_max=3 * u.deg, + reco_energy_n_bins_per_decade=7, + reco_energy_max=155 * u.TeV, + ) + + bkg_hdu = bkgMkr.make_bkg_hdu(events=irf_events_table, obs_time=1 * u.s) + # min 7 bins per decade between 0.015 TeV and 155 TeV -> 7 * 4 + 1 = 29 bins + assert bkg_hdu.data["BKG"].shape == (1, 3, 29) + + _check_boundaries_in_hdu( + bkg_hdu, lo_vals=[0 * u.deg, 0.015 * u.TeV], hi_vals=[3 * u.deg, 155 * u.TeV] + ) + + +def test_make_2d_energy_migration(irf_events_table): + from ctapipe.irf import EnergyMigration2dMaker + + migMkr = EnergyMigration2dMaker( + fov_offset_n_bins=3, + fov_offset_max=3 * u.deg, + true_energy_n_bins_per_decade=7, + true_energy_max=155 * u.TeV, + energy_migration_n_bins=20, + energy_migration_min=0.1, + energy_migration_max=10, + ) + mig_hdu = migMkr.make_edisp_hdu(events=irf_events_table, point_like=False) + # min 7 bins per decade between 0.015 TeV and 155 TeV -> 7 * 4 + 1 = 29 bins + assert mig_hdu.data["MATRIX"].shape == (1, 3, 20, 29) + + _check_boundaries_in_hdu( + mig_hdu, + lo_vals=[0 * u.deg, 0.015 * u.TeV, 0.1], + hi_vals=[3 * u.deg, 155 * u.TeV, 10], + colnames=["THETA", "ENERG", "MIGRA"], + ) + + +def test_make_2d_eff_area(irf_events_table): + from ctapipe.irf import EffectiveArea2dMaker + + effAreaMkr = EffectiveArea2dMaker( + fov_offset_n_bins=3, + fov_offset_max=3 * u.deg, + true_energy_n_bins_per_decade=7, + true_energy_max=155 * u.TeV, + ) + sim_info = SimulatedEventsInfo( + n_showers=3000, + energy_min=0.01 * u.TeV, + energy_max=10 * u.TeV, + max_impact=1000 * u.m, + spectral_index=-1.9, + viewcone_min=0 * u.deg, + viewcone_max=10 * u.deg, + ) + eff_area_hdu = effAreaMkr.make_aeff_hdu( + events=irf_events_table, + point_like=False, + signal_is_point_like=False, + sim_info=sim_info, + ) + # min 7 bins per decade between 0.015 TeV and 155 TeV -> 7 * 4 + 1 = 29 bins + assert eff_area_hdu.data["EFFAREA"].shape == (1, 3, 29) + + _check_boundaries_in_hdu( + eff_area_hdu, + lo_vals=[0 * u.deg, 0.015 * u.TeV], + hi_vals=[3 * u.deg, 155 * u.TeV], + ) + + # point like data -> only 1 fov offset bin + eff_area_hdu = effAreaMkr.make_aeff_hdu( + events=irf_events_table, + point_like=False, + signal_is_point_like=True, + sim_info=sim_info, + ) + assert eff_area_hdu.data["EFFAREA"].shape == (1, 1, 29) + + +def test_make_3d_psf(irf_events_table): + from ctapipe.irf import Psf3dMaker + + psfMkr = Psf3dMaker( + fov_offset_n_bins=3, + fov_offset_max=3 * u.deg, + true_energy_n_bins_per_decade=7, + true_energy_max=155 * u.TeV, + source_offset_n_bins=110, + source_offset_max=2 * u.deg, + ) + psf_hdu = psfMkr.make_psf_hdu(events=irf_events_table) + # min 7 bins per decade between 0.015 TeV and 155 TeV -> 7 * 4 + 1 = 29 bins + assert psf_hdu.data["RPSF"].shape == (1, 110, 3, 29) + + _check_boundaries_in_hdu( + psf_hdu, + lo_vals=[0 * u.deg, 0.015 * u.TeV, 0 * u.deg], + hi_vals=[3 * u.deg, 155 * u.TeV, 2 * u.deg], + colnames=["THETA", "ENERG", "RAD"], + ) diff --git a/src/ctapipe/irf/tests/test_optimize.py b/src/ctapipe/irf/tests/test_optimize.py new file mode 100644 index 00000000000..ab744cf7843 --- /dev/null +++ b/src/ctapipe/irf/tests/test_optimize.py @@ -0,0 +1,132 @@ +import astropy.units as u +import numpy as np +import pytest +from astropy.table import QTable + +from ctapipe.core import non_abstract_children +from ctapipe.irf import EventsLoader, Spectra +from ctapipe.irf.optimize import CutOptimizerBase + + +def test_optimization_result_store(tmp_path, irf_events_loader_test_config): + from ctapipe.irf import ( + EventPreProcessor, + OptimizationResult, + OptimizationResultStore, + ResultValidRange, + ) + + result_path = tmp_path / "result.h5" + epp = EventPreProcessor(irf_events_loader_test_config) + store = OptimizationResultStore(epp) + + with pytest.raises( + ValueError, + match="The results of this object have not been properly initialised", + ): + store.write(result_path) + + gh_cuts = QTable( + data=[[0.2, 0.8, 1.5] * u.TeV, [0.8, 1.5, 10] * u.TeV, [0.82, 0.91, 0.88]], + names=["low", "high", "cut"], + ) + store.set_result( + gh_cuts=gh_cuts, + valid_energy=[0.2 * u.TeV, 10 * u.TeV], + valid_offset=[0 * u.deg, np.inf * u.deg], + clf_prefix="ExtraTreesClassifier", + theta_cuts=None, + ) + store.write(result_path) + assert result_path.exists() + + result = store.read(result_path) + assert isinstance(result, OptimizationResult) + assert isinstance(result.valid_energy, ResultValidRange) + assert isinstance(result.valid_offset, ResultValidRange) + assert isinstance(result.gh_cuts, QTable) + assert result.gh_cuts.meta["CLFNAME"] == "ExtraTreesClassifier" + + +def test_gh_percentile_cut_calculator(): + from ctapipe.irf import GhPercentileCutCalculator + + calc = GhPercentileCutCalculator( + target_percentile=75, + min_counts=1, + smoothing=-1, + ) + cuts = calc.calculate_gh_cut( + gammaness=np.array([0.1, 0.6, 0.45, 0.98, 0.32, 0.95, 0.25, 0.87]), + reco_energy=[0.17, 0.36, 0.47, 0.22, 1.2, 5, 4.2, 9.1] * u.TeV, + reco_energy_bins=[0, 1, 10] * u.TeV, + ) + assert len(cuts) == 2 + assert np.isclose(cuts["cut"][0], 0.3625) + assert np.isclose(cuts["cut"][1], 0.3025) + assert calc.smoothing is None + + +def test_theta_percentile_cut_calculator(): + from ctapipe.irf import ThetaPercentileCutCalculator + + calc = ThetaPercentileCutCalculator( + target_percentile=75, + min_counts=1, + smoothing=-1, + ) + cuts = calc.calculate_theta_cut( + theta=[0.1, 0.07, 0.21, 0.4, 0.03, 0.08, 0.11, 0.18] * u.deg, + reco_energy=[0.17, 0.36, 0.47, 0.22, 1.2, 5, 4.2, 9.1] * u.TeV, + reco_energy_bins=[0, 1, 10] * u.TeV, + ) + assert len(cuts) == 2 + assert np.isclose(cuts["cut"][0], 0.2575 * u.deg) + assert np.isclose(cuts["cut"][1], 0.1275 * u.deg) + assert calc.smoothing is None + + +@pytest.mark.parametrize("Optimizer", non_abstract_children(CutOptimizerBase)) +def test_cut_optimizer( + Optimizer, + gamma_diffuse_full_reco_file, + proton_full_reco_file, + irf_events_loader_test_config, +): + from ctapipe.irf import OptimizationResultStore + + gamma_loader = EventsLoader( + config=irf_events_loader_test_config, + kind="gammas", + file=gamma_diffuse_full_reco_file, + target_spectrum=Spectra.CRAB_HEGRA, + ) + gamma_events, _, _ = gamma_loader.load_preselected_events( + chunk_size=10000, + obs_time=u.Quantity(50, u.h), + ) + proton_loader = EventsLoader( + config=irf_events_loader_test_config, + kind="protons", + file=proton_full_reco_file, + target_spectrum=Spectra.IRFDOC_PROTON_SPECTRUM, + ) + proton_events, _, _ = proton_loader.load_preselected_events( + chunk_size=10000, + obs_time=u.Quantity(50, u.h), + ) + + optimizer = Optimizer() + result = optimizer.optimize_cuts( + signal=gamma_events, + background=proton_events, + alpha=0.2, + precuts=gamma_loader.epp, # identical precuts for all particle types + clf_prefix="ExtraTreesClassifier", + point_like=True, + ) + assert isinstance(result, OptimizationResultStore) + assert len(result._results) == 4 + assert result._results[1]["energy_min"] >= result._results[0]["low"][0] + assert result._results[1]["energy_max"] <= result._results[0]["high"][-1] + assert result._results[3]["cut"].unit == u.deg diff --git a/src/ctapipe/irf/tests/test_select.py b/src/ctapipe/irf/tests/test_select.py new file mode 100644 index 00000000000..89d62a12279 --- /dev/null +++ b/src/ctapipe/irf/tests/test_select.py @@ -0,0 +1,100 @@ +import astropy.units as u +import pytest +from astropy.table import Table +from pyirf.simulations import SimulatedEventsInfo +from pyirf.spectral import PowerLaw + + +@pytest.fixture(scope="module") +def dummy_table(): + """Dummy table to test column renaming.""" + return Table( + { + "obs_id": [1, 1, 1, 2, 3, 3], + "event_id": [1, 2, 3, 1, 1, 2], + "true_energy": [0.99, 10, 0.37, 2.1, 73.4, 1] * u.TeV, + "dummy_energy": [1, 10, 0.4, 2.5, 73, 1] * u.TeV, + "classifier_prediction": [1, 0.3, 0.87, 0.93, 0, 0.1], + "true_alt": [60, 60, 60, 60, 60, 60] * u.deg, + "alt_geom": [58.5, 61.2, 59, 71.6, 60, 62] * u.deg, + "true_az": [13, 13, 13, 13, 13, 13] * u.deg, + "az_geom": [12.5, 13, 11.8, 15.1, 14.7, 12.8] * u.deg, + } + ) + + +def test_normalise_column_names(dummy_table): + from ctapipe.irf import EventPreProcessor + + epp = EventPreProcessor( + energy_reconstructor="dummy", + geometry_reconstructor="geom", + gammaness_classifier="classifier", + rename_columns=[("alt_geom", "reco_alt"), ("az_geom", "reco_az")], + ) + norm_table = epp.normalise_column_names(dummy_table) + + needed_cols = [ + "obs_id", + "event_id", + "true_energy", + "true_alt", + "true_az", + "reco_energy", + "reco_alt", + "reco_az", + "gh_score", + ] + for c in needed_cols: + assert c in norm_table.colnames + + # error if reco_{alt,az} is missing because of no-standard name + with pytest.raises(ValueError, match="No column corresponding"): + epp = EventPreProcessor( + energy_reconstructor="dummy", + geometry_reconstructor="geom", + gammaness_classifier="classifier", + ) + norm_table = epp.normalise_column_names(dummy_table) + + +def test_events_loader(gamma_diffuse_full_reco_file, irf_events_loader_test_config): + from ctapipe.irf import EventsLoader, Spectra + + loader = EventsLoader( + config=irf_events_loader_test_config, + kind="gammas", + file=gamma_diffuse_full_reco_file, + target_spectrum=Spectra.CRAB_HEGRA, + ) + events, count, meta = loader.load_preselected_events( + chunk_size=10000, + obs_time=u.Quantity(50, u.h), + ) + + columns = [ + "obs_id", + "event_id", + "true_energy", + "true_az", + "true_alt", + "reco_energy", + "reco_az", + "reco_alt", + "reco_fov_lat", + "reco_fov_lon", + "gh_score", + "pointing_az", + "pointing_alt", + "theta", + "true_source_fov_offset", + "reco_source_fov_offset", + ] + assert columns.sort() == events.colnames.sort() + + assert isinstance(count, int) + assert isinstance(meta["sim_info"], SimulatedEventsInfo) + assert isinstance(meta["spectrum"], PowerLaw) + + events = loader.make_event_weights(events, meta["spectrum"], (0 * u.deg, 1 * u.deg)) + assert "weight" in events.colnames diff --git a/src/ctapipe/irf/vis_utils.py b/src/ctapipe/irf/vis_utils.py new file mode 100644 index 00000000000..e05812e1c84 --- /dev/null +++ b/src/ctapipe/irf/vis_utils.py @@ -0,0 +1,64 @@ +import numpy as np +import scipy.stats as st + + +def find_columnwise_stats(table, col_bins, percentiles, density=False): + tab = np.squeeze(table) + out = np.ones((tab.shape[1], 5)) * -1 + # This loop over the columns seems unavoidable, + # so having a reasonable number of bins in that + # direction is good + for idx, col in enumerate(tab.T): + if (col > 0).sum() == 0: + continue + col_est = st.rv_histogram((col, col_bins), density=density) + out[idx, 0] = col_est.mean() + out[idx, 1] = col_est.median() + out[idx, 2] = col_est.std() + out[idx, 3] = col_est.ppf(percentiles[0]) + out[idx, 4] = col_est.ppf(percentiles[1]) + return out + + +def rebin_x_2d_hist(hist, xbins, x_cent, num_bins_merge=3): + num_y, num_x = hist.shape + if (num_x) % num_bins_merge == 0: + rebin_x = xbins[::num_bins_merge] + rebin_xcent = x_cent.reshape(-1, num_bins_merge).mean(axis=1) + rebin_hist = hist.reshape(num_y, -1, num_bins_merge).sum(axis=2) + return rebin_x, rebin_xcent, rebin_hist + else: + raise ValueError( + f"Could not merge {num_bins_merge} along axis of dimension {num_x}" + ) + + +def get_2d_hist_from_table(x_prefix, y_prefix, table, column): + x_lo_name, x_hi_name = f"{x_prefix}_LO", f"{x_prefix}_HI" + y_lo_name, y_hi_name = f"{y_prefix}_LO", f"{y_prefix}_HI" + + xbins = np.hstack((table[x_lo_name][0], table[x_hi_name][0][-1])) + ybins = np.hstack((table[y_lo_name][0], table[y_hi_name][0][-1])) + + if isinstance(column, str): + mat_vals = np.squeeze(table[column]) + else: + mat_vals = column + + return mat_vals, xbins, ybins + + +def get_bin_centers(bins): + return np.convolve(bins, [0.5, 0.5], mode="valid") + + +def get_x_bin_values_with_rebinning(num_rebin, xbins, xcent, mat_vals, density): + if num_rebin > 1: + rebin_x, rebin_xcent, rebin_hist = rebin_x_2d_hist( + mat_vals, xbins, xcent, num_bins_merge=num_rebin + ) + density = False + else: + rebin_x, rebin_xcent, rebin_hist = xbins, xcent, mat_vals + + return rebin_x, rebin_xcent, rebin_hist, density diff --git a/src/ctapipe/irf/visualisation.py b/src/ctapipe/irf/visualisation.py new file mode 100644 index 00000000000..8880d0f3a53 --- /dev/null +++ b/src/ctapipe/irf/visualisation.py @@ -0,0 +1,224 @@ +import astropy.units as u +import matplotlib.pyplot as plt +import numpy as np +from astropy.visualization import quantity_support +from matplotlib.colors import LogNorm +from pyirf.binning import join_bin_lo_hi + +from .vis_utils import ( + find_columnwise_stats, + get_2d_hist_from_table, + get_bin_centers, + get_x_bin_values_with_rebinning, +) + +quantity_support() + + +def plot_2d_irf_table( + ax, table, column, x_prefix, y_prefix, x_label=None, y_label=None, **mpl_args +): + mat_vals, xbins, ybins = get_2d_hist_from_table(x_prefix, y_prefix, table, column) + + if not x_label: + x_label = x_prefix + if not y_label: + y_label = y_prefix + plot = plot_hist2d( + ax, mat_vals, xbins, ybins, xlabel=x_label, ylabel=y_label, **mpl_args + ) + plt.colorbar(plot) + return ax + + +def plot_2d_table_with_col_stats( + ax, + table, + column, + x_prefix, + y_prefix, + num_rebin=4, + stat_kind=2, + quantiles=[0.2, 0.8], + x_label=None, + y_label=None, + density=False, + mpl_args={ + "histo": {"xscale": "log"}, + "stats": {"color": "firebrick"}, + }, +): + """Function to draw 2d histogram along with columnwise statistics + the plotted errorbars shown depending on stat_kind: + 0 -> mean + standard deviation + 1 -> median + standard deviation + 2 -> median + user specified quantiles around median (default 0.1 to 0.9) + """ + + mat_vals, xbins, ybins = get_2d_hist_from_table(x_prefix, y_prefix, table, column) + xcent = get_bin_centers(xbins) + rebin_x, rebin_xcent, rebin_hist, density = get_x_bin_values_with_rebinning( + num_rebin, xbins, xcent, mat_vals, density + ) + + plot = plot_hist2d( + ax, + rebin_hist, + rebin_x, + ybins, + xlabel=x_label, + ylabel=y_label, + **mpl_args["histo"], + ) + plt.colorbar(plot) + + ax = plot_2d_table_col_stats( + ax, + table, + column, + x_prefix, + y_prefix, + num_rebin, + stat_kind, + quantiles, + x_label, + y_label, + density, + mpl_args=mpl_args, + lbl_prefix="", + ) + return ax + + +def plot_2d_table_col_stats( + ax, + table, + column, + x_prefix, + y_prefix, + num_rebin=4, + stat_kind=2, + quantiles=[0.2, 0.8], + x_label=None, + y_label=None, + density=False, + lbl_prefix="", + mpl_args={"xscale": "log"}, +): + """Function to draw columnwise statistics of 2d hist + the content values shown depending on stat_kind: + 0 -> mean + standard deviation + 1 -> median + standard deviation + 2 -> median + user specified quantiles around median (default 0.1 to 0.9) + """ + + mat_vals, xbins, ybins = get_2d_hist_from_table(x_prefix, y_prefix, table, column) + xcent = get_bin_centers(xbins) + rebin_x, rebin_xcent, rebin_hist, density = get_x_bin_values_with_rebinning( + num_rebin, xbins, xcent, mat_vals, density + ) + + stats = find_columnwise_stats(rebin_hist, ybins, quantiles, density) + + sel = stats[:, 0] > 0 + if stat_kind == 1: + y_idx = 0 + err = stats[sel, 2] + label = "mean + std" + if stat_kind == 2: + y_idx = 1 + err = stats[sel, 2] + label = "median + std" + if stat_kind == 3: + y_idx = 1 + err = np.zeros_like(stats[:, 3:]) + err[sel, 0] = stats[sel, 1] - stats[sel, 3] + err[sel, 1] = stats[sel, 4] - stats[sel, 1] + err = err[sel, :].T + label = f"median + IRQ[{quantiles[0]:.2f},{quantiles[1]:.2f}]" + + ax.errorbar( + x=rebin_xcent[sel], + y=stats[sel, y_idx], + yerr=err, + label=f"{lbl_prefix} {label}", + ) + if "xscale" in mpl_args: + ax.set_xscale(mpl_args["xscale"]) + + ax.legend(loc="best") + + return ax + + +def plot_irf_table( + ax, table, column, prefix=None, lo_name=None, hi_name=None, label=None, **mpl_args +): + if isinstance(column, str): + vals = np.squeeze(table[column]) + else: + vals = column + + if prefix: + lo = table[f"{prefix}_LO"] + hi = table[f"{prefix}_HI"] + elif hi_name and lo_name: + lo = table[lo_name] + hi = table[hi_name] + else: + raise ValueError( + "Either prefix or both `lo_name` and `hi_name` has to be given" + ) + if not label: + label = column + + bins = np.squeeze(join_bin_lo_hi(lo, hi)) + ax.stairs(vals, bins, label=label, **mpl_args) + + +def plot_hist2d_as_contour( + ax, + hist, + xedges, + yedges, + xlabel, + ylabel, + levels=5, + xscale="linear", + yscale="linear", + norm="log", + cmap="Reds", +): + if norm == "log": + norm = LogNorm(vmax=hist.max()) + xg, yg = np.meshgrid(xedges[1:], yedges[1:]) + out = ax.contour(xg, yg, hist.T, norm=norm, cmap=cmap, levels=levels) + ax.set(xscale=xscale, xlabel=xlabel, yscale=yscale, ylabel=ylabel) + return out + + +def plot_hist2d( + ax, + hist, + xedges, + yedges, + xlabel, + ylabel, + xscale="linear", + yscale="linear", + norm="log", + cmap="viridis", + colorbar=False, +): + if isinstance(hist, u.Quantity): + hist = hist.value + + if norm == "log": + norm = LogNorm(vmax=hist.max()) + + xg, yg = np.meshgrid(xedges, yedges) + out = ax.pcolormesh(xg, yg, hist, norm=norm, cmap=cmap) + ax.set(xscale=xscale, xlabel=xlabel, yscale=yscale, ylabel=ylabel) + if colorbar: + plt.colorbar(out) + return out diff --git a/src/ctapipe/tools/info.py b/src/ctapipe/tools/info.py index f100b24e80e..347661cf349 100644 --- a/src/ctapipe/tools/info.py +++ b/src/ctapipe/tools/info.py @@ -26,6 +26,7 @@ "iminuit", "tables", "eventio", + "pyirf", ] ) diff --git a/src/ctapipe/tools/make_irf.py b/src/ctapipe/tools/make_irf.py new file mode 100644 index 00000000000..1e36276235c --- /dev/null +++ b/src/ctapipe/tools/make_irf.py @@ -0,0 +1,615 @@ +"""Tool to generate IRFs""" + +import operator +from functools import partial + +import astropy.units as u +import numpy as np +from astropy.io import fits +from astropy.table import QTable, vstack +from pyirf.cuts import evaluate_binned_cut +from pyirf.io import create_rad_max_hdu + +from ..core import Provenance, Tool, ToolConfigurationError, traits +from ..core.traits import AstroQuantity, Bool, Integer, classes_with_traits, flag +from ..irf import ( + EventPreProcessor, + EventsLoader, + OptimizationResultStore, + Spectra, + check_bins_in_range, +) +from ..irf.benchmarks import ( + AngularResolutionMakerBase, + EnergyBiasResolutionMakerBase, + SensitivityMakerBase, +) +from ..irf.irfs import ( + BackgroundRateMakerBase, + EffectiveAreaMakerBase, + EnergyMigrationMakerBase, + PsfMakerBase, +) + + +class IrfTool(Tool): + name = "ctapipe-make-irf" + description = "Tool to create IRF files in GAD format" + + do_background = Bool( + True, + help="Compute background rate IRF using supplied files", + ).tag(config=True) + + do_benchmarks = Bool( + False, + help="Produce IRF related benchmarks", + ).tag(config=True) + + range_check_error = Bool( + False, + help="Raise error if asking for IRFs outside range where cut optimisation is valid", + ).tag(config=True) + + cuts_file = traits.Path( + default_value=None, directory_ok=False, help="Path to optimized cuts input file" + ).tag(config=True) + + gamma_file = traits.Path( + default_value=None, directory_ok=False, help="Gamma input filename and path" + ).tag(config=True) + + gamma_target_spectrum = traits.UseEnum( + Spectra, + default_value=Spectra.CRAB_HEGRA, + help="Name of the spectra used for the simulated gamma spectrum", + ).tag(config=True) + + proton_file = traits.Path( + default_value=None, + allow_none=True, + directory_ok=False, + help="Proton input filename and path", + ).tag(config=True) + + proton_target_spectrum = traits.UseEnum( + Spectra, + default_value=Spectra.IRFDOC_PROTON_SPECTRUM, + help="Name of the spectra used for the simulated proton spectrum", + ).tag(config=True) + + electron_file = traits.Path( + default_value=None, + allow_none=True, + directory_ok=False, + help="Electron input filename and path", + ).tag(config=True) + + electron_target_spectrum = traits.UseEnum( + Spectra, + default_value=Spectra.IRFDOC_ELECTRON_SPECTRUM, + help="Name of the spectra used for the simulated electron spectrum", + ).tag(config=True) + + chunk_size = Integer( + default_value=100000, + allow_none=True, + help="How many subarray events to load at once while selecting.", + ).tag(config=True) + + output_path = traits.Path( + default_value="./IRF.fits.gz", + allow_none=False, + directory_ok=False, + help="Output file", + ).tag(config=True) + + obs_time = AstroQuantity( + default_value=u.Quantity(50, u.hour), + physical_type=u.physical.time, + help="Observation time in the form `` ``", + ).tag(config=True) + + edisp_parameterization = traits.ComponentName( + EnergyMigrationMakerBase, + default_value="EnergyMigration2dMaker", + help="The parameterization of the energy migration to be used.", + ).tag(config=True) + + aeff_parameterization = traits.ComponentName( + EffectiveAreaMakerBase, + default_value="EffectiveArea2dMaker", + help="The parameterization of the effective area to be used.", + ).tag(config=True) + + psf_parameterization = traits.ComponentName( + PsfMakerBase, + default_value="Psf3dMaker", + help="The parameterization of the point spread function to be used.", + ).tag(config=True) + + bkg_parameterization = traits.ComponentName( + BackgroundRateMakerBase, + default_value="BackgroundRate2dMaker", + help="The parameterization of the background rate to be used.", + ).tag(config=True) + + energy_bias_res_parameterization = traits.ComponentName( + EnergyBiasResolutionMakerBase, + default_value="EnergyBiasResolution2dMaker", + help=( + "The parameterization of the bias and resolution benchmark " + "for the energy prediction." + ), + ).tag(config=True) + + ang_res_parameterization = traits.ComponentName( + AngularResolutionMakerBase, + default_value="AngularResolution2dMaker", + help="The parameterization of the angular resolution benchmark.", + ).tag(config=True) + + sens_parameterization = traits.ComponentName( + SensitivityMakerBase, + default_value="Sensitivity2dMaker", + help="The parameterization of the point source sensitivity benchmark.", + ).tag(config=True) + + full_enclosure = Bool( + False, + help=( + "Compute a full enclosure IRF by not applying a theta cut and only use" + " the G/H separation cut." + ), + ).tag(config=True) + + aliases = { + "cuts": "IrfTool.cuts_file", + "gamma-file": "IrfTool.gamma_file", + "proton-file": "IrfTool.proton_file", + "electron-file": "IrfTool.electron_file", + "output": "IrfTool.output_path", + "chunk_size": "IrfTool.chunk_size", + } + + flags = { + **flag( + "do-background", + "IrfTool.do_background", + "Compute background rate.", + "Do not compute background rate.", + ), + **flag( + "do-benchmarks", + "IrfTool.do_benchmarks", + "Produce IRF related benchmarks.", + "Do not produce IRF related benchmarks.", + ), + **flag( + "full-enclosure", + "IrfTool.full_enclosure", + "Compute a full-enclosure IRF.", + "Compute a point-like IRF.", + ), + } + + classes = ( + [ + EventsLoader, + ] + + classes_with_traits(BackgroundRateMakerBase) + + classes_with_traits(EffectiveAreaMakerBase) + + classes_with_traits(EnergyMigrationMakerBase) + + classes_with_traits(PsfMakerBase) + + classes_with_traits(AngularResolutionMakerBase) + + classes_with_traits(EnergyBiasResolutionMakerBase) + + classes_with_traits(SensitivityMakerBase) + ) + + def setup(self): + self.opt_result = OptimizationResultStore().read(self.cuts_file) + + if not self.full_enclosure and self.opt_result.theta_cuts is None: + raise ToolConfigurationError( + "Computing a point-like IRF requires an (optimized) theta cut." + ) + + check_e_bins = partial( + check_bins_in_range, + valid_range=self.opt_result.valid_energy, + raise_error=self.range_check_error, + ) + self.particles = [ + EventsLoader( + parent=self, + kind="gammas", + file=self.gamma_file, + target_spectrum=self.gamma_target_spectrum, + ), + ] + if self.do_background: + if self.proton_file: + self.particles.append( + EventsLoader( + parent=self, + kind="protons", + file=self.proton_file, + target_spectrum=self.proton_target_spectrum, + ) + ) + if self.electron_file: + self.particles.append( + EventsLoader( + parent=self, + kind="electrons", + file=self.electron_file, + target_spectrum=self.electron_target_spectrum, + ) + ) + if len(self.particles) == 1: + raise RuntimeError( + "At least one electron or proton file required when specifying `do_background`." + ) + + self.bkg = BackgroundRateMakerBase.from_name( + self.bkg_parameterization, parent=self + ) + check_e_bins( + bins=self.bkg.reco_energy_bins, source="background reco energy" + ) + + self.edisp = EnergyMigrationMakerBase.from_name( + self.edisp_parameterization, parent=self + ) + self.aeff = EffectiveAreaMakerBase.from_name( + self.aeff_parameterization, parent=self + ) + + if self.full_enclosure: + self.psf = PsfMakerBase.from_name(self.psf_parameterization, parent=self) + + if self.do_benchmarks: + self.b_output = self.output_path.with_name( + self.output_path.name.replace(".fits", "-benchmark.fits") + ) + self.ang_res = AngularResolutionMakerBase.from_name( + self.ang_res_parameterization, parent=self + ) + if not self.ang_res.use_true_energy: + check_e_bins( + bins=self.ang_res.reco_energy_bins, + source="Angular resolution energy", + ) + self.bias_res = EnergyBiasResolutionMakerBase.from_name( + self.energy_bias_res_parameterization, parent=self + ) + self.sens = SensitivityMakerBase.from_name( + self.sens_parameterization, parent=self + ) + check_e_bins( + bins=self.sens.reco_energy_bins, source="Sensitivity reco energy" + ) + + def calculate_selections(self, reduced_events: dict) -> dict: + """ + Add the selection columns to the signal and optionally background tables. + + Parameters + ---------- + reduced_events: dict + dict containing the signal (``"gammas"``) and optionally background + tables (``"protons"``, ``"electrons"``) + + Returns + ------- + dict + ``reduced_events`` with selection columns added. + """ + reduced_events["gammas"]["selected_gh"] = evaluate_binned_cut( + reduced_events["gammas"]["gh_score"], + reduced_events["gammas"]["reco_energy"], + self.opt_result.gh_cuts, + operator.ge, + ) + if not self.full_enclosure: + reduced_events["gammas"]["selected_theta"] = evaluate_binned_cut( + reduced_events["gammas"]["theta"], + reduced_events["gammas"]["reco_energy"], + self.opt_result.theta_cuts, + operator.le, + ) + reduced_events["gammas"]["selected"] = ( + reduced_events["gammas"]["selected_theta"] + & reduced_events["gammas"]["selected_gh"] + ) + else: + reduced_events["gammas"]["selected"] = reduced_events["gammas"][ + "selected_gh" + ] + + if self.do_background: + for bg_type in ("protons", "electrons"): + reduced_events[bg_type]["selected_gh"] = evaluate_binned_cut( + reduced_events[bg_type]["gh_score"], + reduced_events[bg_type]["reco_energy"], + self.opt_result.gh_cuts, + operator.ge, + ) + + if self.do_background: + self.log.debug( + "Keeping %d signal, %d proton events, and %d electron events" + % ( + sum(reduced_events["gammas"]["selected"]), + sum(reduced_events["protons"]["selected_gh"]), + sum(reduced_events["electrons"]["selected_gh"]), + ) + ) + else: + self.log.debug( + "Keeping %d signal events" % (sum(reduced_events["gammas"]["selected"])) + ) + return reduced_events + + def _stack_background(self, reduced_events): + bkgs = [] + if self.proton_file: + bkgs.append("protons") + if self.electron_file: + bkgs.append("electrons") + if len(bkgs) == 2: + background = vstack( + [reduced_events["protons"], reduced_events["electrons"]] + ) + else: + background = reduced_events[bkgs[0]] + return background + + def _make_signal_irf_hdus(self, hdus, sim_info): + hdus.append( + self.aeff.make_aeff_hdu( + events=self.signal_events[self.signal_events["selected"]], + point_like=not self.full_enclosure, + signal_is_point_like=self.signal_is_point_like, + sim_info=sim_info, + ) + ) + hdus.append( + self.edisp.make_edisp_hdu( + events=self.signal_events[self.signal_events["selected"]], + point_like=not self.full_enclosure, + ) + ) + if self.full_enclosure: + hdus.append( + self.psf.make_psf_hdu( + events=self.signal_events[self.signal_events["selected"]] + ) + ) + else: + # TODO: Support fov binning + self.log.debug( + "Currently multiple fov bins is not supported for RAD_MAX. " + "Using `fov_offset_bins = [valid_offset.min, valid_offset.max]`." + ) + hdus.append( + create_rad_max_hdu( + rad_max=self.opt_result.theta_cuts["cut"].reshape(-1, 1), + reco_energy_bins=np.append( + self.opt_result.theta_cuts["low"], + self.opt_result.theta_cuts["high"][-1], + ), + fov_offset_bins=u.Quantity( + [ + self.opt_result.valid_offset.min.to_value(u.deg), + self.opt_result.valid_offset.max.to_value(u.deg), + ], + u.deg, + ), + ) + ) + return hdus + + def _make_benchmark_hdus(self, hdus): + hdus.append( + self.bias_res.make_bias_resolution_hdu( + events=self.signal_events[self.signal_events["selected"]], + ) + ) + hdus.append( + self.ang_res.make_angular_resolution_hdu( + events=self.signal_events[self.signal_events["selected_gh"]], + ) + ) + if self.do_background: + if self.full_enclosure: + # Create a dummy theta cut since `pyirf.sensitivity.estimate_background` + # needs a theta cut atm. + self.log.info( + "Using all signal events with `theta < fov_offset_max` " + "to compute the sensitivity." + ) + theta_cuts = QTable() + theta_cuts["center"] = 0.5 * ( + self.sens.reco_energy_bins[:-1] + self.sens.reco_energy_bins[1:] + ) + theta_cuts["cut"] = self.sens.fov_offset_max + else: + theta_cuts = self.opt_result.theta_cuts + + hdus.append( + self.sens.make_sensitivity_hdu( + signal_events=self.signal_events[self.signal_events["selected"]], + background_events=self.background_events[ + self.background_events["selected_gh"] + ], + theta_cut=theta_cuts, + gamma_spectrum=self.gamma_target_spectrum, + ) + ) + return hdus + + def start(self): + reduced_events = dict() + for sel in self.particles: + # TODO: not very elegant to pass them this way, refactor later + if sel.epp.quality_criteria != self.opt_result.precuts.quality_criteria: + self.log.warning( + "Precuts are different from precuts used for calculating " + "g/h / theta cuts. Provided precuts:\n%s. " + "\nUsing the same precuts as g/h / theta cuts:\n%s. " + % ( + sel.epp.to_table(functions=True)["criteria", "func"], + self.opt_result.precuts.to_table(functions=True)[ + "criteria", "func" + ], + ) + ) + sel.epp = EventPreProcessor( + parent=sel, + quality_criteria=self.opt_result.precuts.quality_criteria, + ) + + if sel.epp.gammaness_classifier != self.opt_result.gh_cuts.meta["CLFNAME"]: + self.log.warning( + "G/H cuts are only valid for gammaness scores predicted by " + "the same classifier model. Requested model: %s. " + "Model used, so that g/h cuts are valid: %s." + % ( + sel.epp.gammaness_classifier, + self.opt_result.gh_cuts.meta["CLFNAME"], + ) + ) + sel.epp.gammaness_classifier = self.opt_result.gh_cuts.meta["CLFNAME"] + + self.log.debug("%s Precuts: %s" % (sel.kind, sel.epp.quality_criteria)) + evs, cnt, meta = sel.load_preselected_events(self.chunk_size, self.obs_time) + # Only calculate event weights if background or sensitivity should be calculated. + if self.do_background: + # Sensitivity is only calculated, if do_background and do_benchmarks is true. + if self.do_benchmarks: + evs = sel.make_event_weights( + evs, meta["spectrum"], self.sens.fov_offset_bins + ) + # If only background should be calculated, + # only calculate weights for protons and electrons. + elif sel.kind in ("protons", "electrons"): + evs = sel.make_event_weights(evs, meta["spectrum"]) + + reduced_events[sel.kind] = evs + reduced_events[f"{sel.kind}_count"] = cnt + reduced_events[f"{sel.kind}_meta"] = meta + self.log.debug( + "Loaded %d %s events" % (reduced_events[f"{sel.kind}_count"], sel.kind) + ) + if sel.kind == "gammas": + self.signal_is_point_like = ( + meta["sim_info"].viewcone_max - meta["sim_info"].viewcone_min + ).value == 0 + + if self.signal_is_point_like: + self.log.warning( + "The gamma input file contains point-like simulations." + " Therefore, the IRF is only calculated at a single point" + " in the FoV. Changing `fov_offset_n_bins` to 1." + ) + self.edisp = EnergyMigrationMakerBase.from_name( + self.edisp_parameterization, parent=self, fov_offset_n_bins=1 + ) + self.aeff = EffectiveAreaMakerBase.from_name( + self.aeff_parameterization, + parent=self, + fov_offset_n_bins=1, + ) + if self.full_enclosure: + self.psf = PsfMakerBase.from_name( + self.psf_parameterization, parent=self, fov_offset_n_bins=1 + ) + if self.do_background: + self.bkg = BackgroundRateMakerBase.from_name( + self.bkg_parameterization, parent=self, fov_offset_n_bins=1 + ) + if self.do_benchmarks: + self.ang_res = AngularResolutionMakerBase.from_name( + self.ang_res_parameterization, parent=self, fov_offset_n_bins=1 + ) + self.bias_res = EnergyBiasResolutionMakerBase.from_name( + self.energy_bias_res_parameterization, + parent=self, + fov_offset_n_bins=1, + ) + self.sens = SensitivityMakerBase.from_name( + self.sens_parameterization, parent=self, fov_offset_n_bins=1 + ) + + reduced_events = self.calculate_selections(reduced_events) + + self.signal_events = reduced_events["gammas"] + if self.do_background: + self.background_events = self._stack_background(reduced_events) + + hdus = [fits.PrimaryHDU()] + hdus = self._make_signal_irf_hdus( + hdus, reduced_events["gammas_meta"]["sim_info"] + ) + if self.do_background: + hdus.append( + self.bkg.make_bkg_hdu( + self.background_events[self.background_events["selected_gh"]], + self.obs_time, + ) + ) + if "protons" in reduced_events.keys(): + hdus.append( + self.aeff.make_aeff_hdu( + events=reduced_events["protons"][ + reduced_events["protons"]["selected_gh"] + ], + point_like=not self.full_enclosure, + signal_is_point_like=False, + sim_info=reduced_events["protons_meta"]["sim_info"], + extname="EFFECTIVE AREA PROTONS", + ) + ) + if "electrons" in reduced_events.keys(): + hdus.append( + self.aeff.make_aeff_hdu( + events=reduced_events["electrons"][ + reduced_events["electrons"]["selected_gh"] + ], + point_like=not self.full_enclosure, + signal_is_point_like=False, + sim_info=reduced_events["electrons_meta"]["sim_info"], + extname="EFFECTIVE AREA ELECTRONS", + ) + ) + self.hdus = hdus + + if self.do_benchmarks: + b_hdus = [fits.PrimaryHDU()] + b_hdus = self._make_benchmark_hdus(b_hdus) + self.b_hdus = b_hdus + + def finish(self): + self.log.info("Writing outputfile '%s'" % self.output_path) + fits.HDUList(self.hdus).writeto( + self.output_path, + overwrite=self.overwrite, + ) + Provenance().add_output_file(self.output_path, role="IRF") + if self.do_benchmarks: + self.log.info("Writing benchmark file to '%s'" % self.b_output) + fits.HDUList(self.b_hdus).writeto( + self.b_output, + overwrite=self.overwrite, + ) + Provenance().add_output_file(self.b_output, role="Benchmark") + + +def main(): + tool = IrfTool() + tool.run() + + +if __name__ == "main": + main() diff --git a/src/ctapipe/tools/optimize_event_selection.py b/src/ctapipe/tools/optimize_event_selection.py new file mode 100644 index 00000000000..7945ee15e3a --- /dev/null +++ b/src/ctapipe/tools/optimize_event_selection.py @@ -0,0 +1,216 @@ +"""Tool to generate selections for IRFs production""" + +import astropy.units as u +from astropy.table import vstack + +from ..core import Provenance, Tool, traits +from ..core.traits import AstroQuantity, Bool, Float, Integer, classes_with_traits, flag +from ..irf import EventsLoader, Spectra +from ..irf.optimize import CutOptimizerBase + + +class IrfEventSelector(Tool): + name = "ctapipe-optimize-event-selection" + description = "Tool to create optimized cuts for IRF generation" + + gamma_file = traits.Path( + default_value=None, directory_ok=False, help="Gamma input filename and path" + ).tag(config=True) + + gamma_sim_spectrum = traits.UseEnum( + Spectra, + default_value=Spectra.CRAB_HEGRA, + help="Name of the pyirf spectra used for the simulated gamma spectrum", + ).tag(config=True) + + proton_file = traits.Path( + default_value=None, + directory_ok=False, + allow_none=True, + help=( + "Proton input filename and path. " + "Not needed, if ``optimization_algorithm = 'PercentileCuts'``." + ), + ).tag(config=True) + + proton_sim_spectrum = traits.UseEnum( + Spectra, + default_value=Spectra.IRFDOC_PROTON_SPECTRUM, + help="Name of the pyirf spectra used for the simulated proton spectrum", + ).tag(config=True) + + electron_file = traits.Path( + default_value=None, + directory_ok=False, + allow_none=True, + help=( + "Electron input filename and path. " + "Not needed, if ``optimization_algorithm = 'PercentileCuts'``." + ), + ).tag(config=True) + + electron_sim_spectrum = traits.UseEnum( + Spectra, + default_value=Spectra.IRFDOC_ELECTRON_SPECTRUM, + help="Name of the pyirf spectra used for the simulated electron spectrum", + ).tag(config=True) + + chunk_size = Integer( + default_value=100000, + allow_none=True, + help="How many subarray events to load at once when preselecting events.", + ).tag(config=True) + + output_path = traits.Path( + default_value="./Selection_Cuts.fits", + allow_none=False, + directory_ok=False, + help="Output file storing optimization result", + ).tag(config=True) + + obs_time = AstroQuantity( + default_value=u.Quantity(50, u.hour), + physical_type=u.physical.time, + help="Observation time in the form `` ``", + ).tag(config=True) + + alpha = Float( + default_value=0.2, help="Ratio between size of on and off regions." + ).tag(config=True) + + optimization_algorithm = traits.ComponentName( + CutOptimizerBase, + default_value="PointSourceSensitivityOptimizer", + help="The cut optimization algorithm to be used.", + ).tag(config=True) + + full_enclosure = Bool( + False, + help="Compute only the G/H separation cut needed for full enclosure IRF.", + ).tag(config=True) + + aliases = { + "gamma-file": "IrfEventSelector.gamma_file", + "proton-file": "IrfEventSelector.proton_file", + "electron-file": "IrfEventSelector.electron_file", + "output": "IrfEventSelector.output_path", + "chunk_size": "IrfEventSelector.chunk_size", + } + + flags = { + **flag( + "full-enclosure", + "IrfEventSelector.full_enclosure", + "Compute only the G/H separation cut.", + "Compute the G/H separation cut and the theta cut.", + ) + } + + classes = [EventsLoader] + classes_with_traits(CutOptimizerBase) + + def setup(self): + self.optimizer = CutOptimizerBase.from_name( + self.optimization_algorithm, parent=self + ) + self.particles = [ + EventsLoader( + parent=self, + kind="gammas", + file=self.gamma_file, + target_spectrum=self.gamma_sim_spectrum, + ) + ] + if self.optimization_algorithm != "PercentileCuts": + self.particles.append( + EventsLoader( + parent=self, + kind="protons", + file=self.proton_file, + target_spectrum=self.proton_sim_spectrum, + ) + ) + self.particles.append( + EventsLoader( + parent=self, + kind="electrons", + file=self.electron_file, + target_spectrum=self.electron_sim_spectrum, + ) + ) + + def start(self): + reduced_events = dict() + for sel in self.particles: + evs, cnt, meta = sel.load_preselected_events(self.chunk_size, self.obs_time) + if self.optimization_algorithm == "PointSourceSensitivityOptimizer": + evs = sel.make_event_weights( + evs, + meta["spectrum"], + ( + self.optimizer.min_bkg_fov_offset, + self.optimizer.max_bkg_fov_offset, + ), + ) + + reduced_events[sel.kind] = evs + reduced_events[f"{sel.kind}_count"] = cnt + if sel.kind == "gammas": + self.sim_info = meta["sim_info"] + self.gamma_spectrum = meta["spectrum"] + + self.signal_events = reduced_events["gammas"] + + if self.optimization_algorithm == "PercentileCuts": + self.log.debug("Loaded %d gammas" % reduced_events["gammas_count"]) + self.log.debug("Keeping %d gammas" % len(reduced_events["gammas"])) + self.log.info("Optimizing cuts using %d signal" % len(self.signal_events)) + else: + self.log.debug( + "Loaded %d gammas, %d protons, %d electrons" + % ( + reduced_events["gammas_count"], + reduced_events["protons_count"], + reduced_events["electrons_count"], + ) + ) + self.log.debug( + "Keeping %d gammas, %d protons, %d electrons" + % ( + len(reduced_events["gammas"]), + len(reduced_events["protons"]), + len(reduced_events["electrons"]), + ) + ) + self.background_events = vstack( + [reduced_events["protons"], reduced_events["electrons"]] + ) + self.log.info( + "Optimizing cuts using %d signal and %d background events" + % (len(self.signal_events), len(self.background_events)), + ) + + result = self.optimizer.optimize_cuts( + signal=self.signal_events, + background=self.background_events + if self.optimization_algorithm != "PercentileCuts" + else None, + alpha=self.alpha, + precuts=self.particles[0].epp, # identical precuts for all particle types + clf_prefix=self.particles[0].epp.gammaness_classifier, + point_like=not self.full_enclosure, + ) + self.result = result + + def finish(self): + self.log.info("Writing results to %s" % self.output_path) + Provenance().add_output_file(self.output_path, role="Optimization Result") + self.result.write(self.output_path, self.overwrite) + + +def main(): + tool = IrfEventSelector() + tool.run() + + +if __name__ == "main": + main()