Skip to content

Commit

Permalink
Merge pull request #87 from cta-observatory/add_files_check
Browse files Browse the repository at this point in the history
Add file check for Events and RunHeaders trees.
  • Loading branch information
aleberti authored Jan 22, 2024
2 parents c0abcde + b58a944 commit f4cdf2e
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 20 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
ctapipe-version: ["v0.19.2",]
python-version: ["3.9", "3.10", "3.11"]
ctapipe-version: ["v0.19.2"]

defaults:
run:
Expand Down
90 changes: 75 additions & 15 deletions ctapipe_io_magic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@
)

from .mars_datalevels import MARSDataLevel
from .exceptions import (
MissingInputFilesError,
FailedFileCheckError,
MissingDriveReportError,
)
from .version import __version__

from .constants import (
Expand All @@ -57,7 +62,14 @@
DATA_MAGIC_LST_TRIGGER,
)

__all__ = ["MAGICEventSource", "MARSDataLevel", "__version__"]
__all__ = [
"MAGICEventSource",
"MARSDataLevel",
"MissingInputFilesError",
"FailedFileCheckError",
"MissingDriveReportError",
"__version__",
]

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -88,15 +100,6 @@ def load_camera_geometry():
return CameraGeometry.from_table(f)


class MissingDriveReportError(Exception):
"""
Exception raised when a subrun does not have drive reports.
"""

def __init__(self, message):
self.message = message


class MAGICEventSource(EventSource):
"""
EventSource for MAGIC calibrated data.
Expand Down Expand Up @@ -179,6 +182,8 @@ def __init__(self, input_url=None, config=None, parent=None, **kwargs):
reg_comp_mc = re.compile(regex_mc)

ls = Path(path).iterdir()

self.file_list = []
self.file_list_drive = []

for file_path in ls:
Expand All @@ -188,6 +193,12 @@ def __init__(self, input_url=None, config=None, parent=None, **kwargs):
):
self.file_list_drive.append(file_path)

if not len(self.file_list_drive):
raise MissingInputFilesError(
f"No input files found in {path}. Exiting."
f"Check your input: {input_url}."
)

self.file_list_drive.sort()

if self.process_run:
Expand All @@ -197,6 +208,12 @@ def __init__(self, input_url=None, config=None, parent=None, **kwargs):

# Retrieving the list of run numbers corresponding to the data files
self.files_ = [uproot.open(rootf) for rootf in self.file_list]

is_check_valid = self.check_files()

if not is_check_valid:
raise FailedFileCheckError("Validity check for the files failed. Exiting.")

run_info = self.parse_run_info()

self.run_id = run_info[0][0]
Expand Down Expand Up @@ -405,6 +422,51 @@ def get_run_info_from_name(file_name):

return run_number, is_mc, telescope, datalevel

def check_files(self):
"""Check the the input files contain the needed trees."""

needed_trees = ["RunHeaders", "Events"]
num_files = len(self.files_)

if (
num_files == 1
and "Drive" not in self.files_[0].keys(cycle=False)
and "OriginalMC" not in self.files_[0].keys(cycle=False)
):
logger.error("Cannot proceed without Drive information for a single file.")
return False

if (
num_files == 1
and "Trigger" not in self.files_[0].keys(cycle=False)
and "OriginalMC" not in self.files_[0].keys(cycle=False)
):
logger.error(
"Cannot proceed without Trigger information for a single file."
)
return False

num_invalid_files = 0

for rootf in self.files_:
for tree in needed_trees:
if tree not in rootf.keys(cycle=False):
logger.warning(
f"File {rootf.file_path} does not have the tree {tree}."
)
if tree == "RunHeaders" or tree == "Events":
logger.error(
f"File {rootf.file_path} does not have a {tree} tree. "
f"Please check the file and try again. If the file "
f"cannot be recovered, exclude it from the analysis."
)
num_invalid_files += 1

if num_invalid_files > 0:
return False
else:
return True

def parse_run_info(self):
"""
Parses run info from the TTrees in the ROOT file
Expand Down Expand Up @@ -1116,13 +1178,11 @@ def get_event_time_difference(self):
if self.is_hast:
event_cut = (
f"(MTriggerPattern.fPrescaled == {data_trigger_pattern})"
f" | (MTriggerPattern.fPrescaled == {DATA_TOPOLOGICAL_TRIGGER})"
f" | (MTriggerPattern.fPrescaled == {DATA_MAGIC_LST_TRIGGER})"
f" | (MTriggerPattern.fPrescaled == {DATA_TOPOLOGICAL_TRIGGER})"
f" | (MTriggerPattern.fPrescaled == {DATA_MAGIC_LST_TRIGGER})"
)
else:
event_cut = (
f"(MTriggerPattern.fPrescaled == {data_trigger_pattern})",
)
event_cut = (f"(MTriggerPattern.fPrescaled == {data_trigger_pattern})",)

for uproot_file in self.files_:
event_info = uproot_file["Events"].arrays(
Expand Down
22 changes: 22 additions & 0 deletions ctapipe_io_magic/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
class MissingInputFilesError(Exception):
"""
Exception raised when there are no input files.
"""

pass


class FailedFileCheckError(Exception):
"""
Exception raised when the files check fails.
"""

pass


class MissingDriveReportError(Exception):
"""
Exception raised when a subrun does not have drive reports.
"""

pass
60 changes: 57 additions & 3 deletions ctapipe_io_magic/tests/test_magic_event_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,26 @@
test_calibrated_real_dir / "20210314_M2_05095172.002_Y_CrabNebula-W0.40+035.root",
]

test_calibrated_real_only_events = [
test_calibrated_real_dir
/ "20210314_M1_05095173.001_Y_CrabNebula-W0.40+035_only_events.root",
]

test_calibrated_real_only_drive = [
test_calibrated_real_dir
/ "20210314_M1_05095173.001_Y_CrabNebula-W0.40+035_only_drive.root",
]

test_calibrated_real_only_runh = [
test_calibrated_real_dir
/ "20210314_M1_05095173.001_Y_CrabNebula-W0.40+035_only_runh.root",
]

test_calibrated_real_only_trigger = [
test_calibrated_real_dir
/ "20210314_M1_05095173.001_Y_CrabNebula-W0.40+035_only_trigger.root",
]

test_calibrated_real_hast = [
test_calibrated_real_dir / "20230324_M1_05106879.001_Y_1ES0806+524-W0.40+000.root",
test_calibrated_real_dir / "20230324_M1_05106879.002_Y_1ES0806+524-W0.40+000.root",
Expand All @@ -32,6 +52,13 @@
test_calibrated_real + test_calibrated_simulated + test_calibrated_real_hast
)

test_calibrated_missing_trees = (
test_calibrated_real_only_events
+ test_calibrated_real_only_drive
+ test_calibrated_real_only_runh
+ test_calibrated_real_only_trigger
)

data_dict = dict()

data_dict["20210314_M1_05095172.001_Y_CrabNebula-W0.40+035.root"] = dict()
Expand Down Expand Up @@ -335,9 +362,18 @@ def test_number_of_events(dataset):
count_2_tel_m1_m2 += 1

assert count_3_tel == data_dict[source.input_url.name]["n_events_3_tel"]
assert count_2_tel_m1_lst == data_dict[source.input_url.name]["n_events_2_tel_m1_lst"]
assert count_2_tel_m2_lst == data_dict[source.input_url.name]["n_events_2_tel_m2_lst"]
assert count_2_tel_m1_m2 == data_dict[source.input_url.name]["n_events_2_tel_m1_m2"]
assert (
count_2_tel_m1_lst
== data_dict[source.input_url.name]["n_events_2_tel_m1_lst"]
)
assert (
count_2_tel_m2_lst
== data_dict[source.input_url.name]["n_events_2_tel_m2_lst"]
)
assert (
count_2_tel_m1_m2
== data_dict[source.input_url.name]["n_events_2_tel_m1_m2"]
)

# if '_M1_' in dataset.name:
# assert run['data'].n_cosmics_stereo_events_m1 == data_dict[source.input_url.name]['n_events_stereo']
Expand Down Expand Up @@ -478,6 +514,24 @@ def test_focal_length_choice(dataset):
)


@pytest.mark.parametrize("dataset", test_calibrated_missing_trees)
def test_check_files(dataset):
from ctapipe_io_magic import MAGICEventSource, FailedFileCheckError

with pytest.raises(FailedFileCheckError):
MAGICEventSource(input_url=dataset, process_run=False)


def test_check_missing_files():
from ctapipe_io_magic import MAGICEventSource, MissingInputFilesError

with pytest.raises(MissingInputFilesError):
MAGICEventSource(
input_url="20501312_M1_05095172.001_Y_FakeSource-W0.40+035.root",
process_run=False,
)


# def test_eventseeker():
# dataset = get_dataset_path("20131004_M1_05029747.003_Y_MagicCrab-W0.40+035.root")
#
Expand Down
4 changes: 4 additions & 0 deletions download_test_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ echo "https://www.magic.iac.es/mcp-testdata/test_data/real/calibrated/20230324_M
echo "https://www.magic.iac.es/mcp-testdata/test_data/real/calibrated/20230324_M1_05106879.002_Y_1ES0806+524-W0.40+000.root" >> test_data_real.txt
echo "https://www.magic.iac.es/mcp-testdata/test_data/real/calibrated/20230324_M2_05106879.001_Y_1ES0806+524-W0.40+000.root" >> test_data_real.txt
echo "https://www.magic.iac.es/mcp-testdata/test_data/real/calibrated/20230324_M2_05106879.002_Y_1ES0806+524-W0.40+000.root" >> test_data_real.txt
echo "https://www.magic.iac.es/mcp-testdata/test_data/real/calibrated/20210314_M1_05095173.001_Y_CrabNebula-W0.40+035_only_events.root" >> test_data_real.txt
echo "https://www.magic.iac.es/mcp-testdata/test_data/real/calibrated/20210314_M1_05095173.001_Y_CrabNebula-W0.40+035_only_drive.root" >> test_data_real.txt
echo "https://www.magic.iac.es/mcp-testdata/test_data/real/calibrated/20210314_M1_05095173.001_Y_CrabNebula-W0.40+035_only_runh.root" >> test_data_real.txt
echo "https://www.magic.iac.es/mcp-testdata/test_data/real/calibrated/20210314_M1_05095173.001_Y_CrabNebula-W0.40+035_only_trigger.root" >> test_data_real.txt

echo "https://www.magic.iac.es/mcp-testdata/test_data/simulated/calibrated/GA_M1_za35to50_8_824318_Y_w0.root" > test_data_simulated.txt
echo "https://www.magic.iac.es/mcp-testdata/test_data/simulated/calibrated/GA_M1_za35to50_8_824319_Y_w0.root" >> test_data_simulated.txt
Expand Down

0 comments on commit f4cdf2e

Please sign in to comment.