diff --git a/Framework/include/Framework/EventFile.h b/Framework/include/Framework/EventFile.h index 37b5f022e..cfb6c0cf6 100644 --- a/Framework/include/Framework/EventFile.h +++ b/Framework/include/Framework/EventFile.h @@ -95,6 +95,26 @@ class EventFile { */ ~EventFile(); + /** + * Check if the file we have is corrupted + * + * The check on if the file is corrupted is only helpful + * for input files, but we attempt to have a resonable + * definition for output files as well. + * + * ## Input Files + * There are two ways a file can be corrupted and these + * may not be mutually exclusive. + * 1. The LDMX_Events tree does not exist within it. + * 2. The IsZombie flag of the TFile is set + * + * ## Output Files + * For output files, we just check the IsZombie flag + * of the TFile. Again, since we are actively writing + * to this file, a corruption check is not very stable. + */ + bool isCorrupted() const; + /** * Add a rule for dropping collections from the output. * diff --git a/Framework/include/Framework/Process.h b/Framework/include/Framework/Process.h index 0b6026a81..b4a70756f 100644 --- a/Framework/include/Framework/Process.h +++ b/Framework/include/Framework/Process.h @@ -193,6 +193,11 @@ class Process { /** Maximum number of attempts to make before giving up on an event */ int maxTries_; + /** + * allow the Process to skip input files that are corrupted + */ + bool skipCorruptedInputFiles_; + /** Storage controller */ StorageControl storageController_; diff --git a/Framework/src/Framework/EventFile.cxx b/Framework/src/Framework/EventFile.cxx index ec281a070..b5e35c87d 100644 --- a/Framework/src/Framework/EventFile.cxx +++ b/Framework/src/Framework/EventFile.cxx @@ -58,13 +58,31 @@ EventFile::EventFile(const framework::config::Parameters ¶ms, "' is not readable or does not exist."); } + bool skip_corrupted = + params.getParameter("skipCorruptedInputFiles", false); + + // make sure file is not a zombie file + // (i.e. process ended without closing or the file was corrupted some other + // way) + if (file_->IsZombie()) { + if (not skip_corrupted) { + EXCEPTION_RAISE("FileError", "Input file '" + fileName_ + + "' is corrupted. Framework will not " + "attempt to recover this file."); + } + return; + } + // Get the tree name from the configuration auto tree_name{params.getParameter("tree_name")}; tree_ = static_cast(file_->Get(tree_name.c_str())); if (!tree_) { - EXCEPTION_RAISE("FileError", "File '" + fileName_ + - "' does not have a TTree named '" + - tree_name + "' in it."); + if (not skip_corrupted) { + EXCEPTION_RAISE("FileError", "File '" + fileName_ + + "' does not have a TTree named '" + + tree_name + "' in it."); + } + return; } entries_ = tree_->GetEntriesFast(); } @@ -97,6 +115,11 @@ EventFile::~EventFile() { file_->Close(); } +bool EventFile::isCorrupted() const { + if (isOutputFile_) return file_->IsZombie(); + return (!tree_ or file_->IsZombie()); +} + void EventFile::addDrop(const std::string &rule) { int offset; bool isKeep = false, isDrop = false, isIgnore = false; diff --git a/Framework/src/Framework/Process.cxx b/Framework/src/Framework/Process.cxx index 12d6298e6..57d3cd74e 100644 --- a/Framework/src/Framework/Process.cxx +++ b/Framework/src/Framework/Process.cxx @@ -36,6 +36,8 @@ Process::Process(const framework::config::Parameters &configuration) configuration.getParameter("compressionSetting", 9); termLevelInt_ = configuration.getParameter("termLogLevel", 2); fileLevelInt_ = configuration.getParameter("fileLogLevel", 0); + skipCorruptedInputFiles_ = + configuration.getParameter("skipCorruptedInputFiles", false); inputFiles_ = configuration.getParameter>("inputFiles", {}); @@ -277,6 +279,19 @@ void Process::run() { int wasRun = -1; for (auto infilename : inputFiles_) { EventFile inFile(config_, infilename); + if (inFile.isCorrupted()) { + if (skipCorruptedInputFiles_) { + ldmx_log(warn) << "Input file '" << infilename + << "' was found to be corrupted. Skipping."; + continue; + } else { + EXCEPTION_RAISE( + "BadCode", + "We should never get here. " + "EventFile is corrupted but we aren't skipping corrupted inputs. " + "EventFile should be throwing its own exceptions in this case."); + } + } ldmx_log(info) << "Opening file " << infilename; onFileOpen(inFile);