Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

check for corrupted zombie files in ldmx sw #1443

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions Framework/include/Framework/EventFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,26 @@ class EventFile {
*/
~EventFile();

/**
* Check if the file we have is corrupted
*
* The check on if the file is corrupted is only helpful
* for input files, but we attempt to have a resonable
* definition for output files as well.
*
* ## Input Files
* There are two ways a file can be corrupted and these
* may not be mutually exclusive.
* 1. The LDMX_Events tree does not exist within it.
* 2. The IsZombie flag of the TFile is set
*
* ## Output Files
* For output files, we just check the IsZombie flag
* of the TFile. Again, since we are actively writing
* to this file, a corruption check is not very stable.
*/
bool isCorrupted() const;

/**
* Add a rule for dropping collections from the output.
*
Expand Down
5 changes: 5 additions & 0 deletions Framework/include/Framework/Process.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,11 @@ class Process {
/** Maximum number of attempts to make before giving up on an event */
int maxTries_;

/**
* allow the Process to skip input files that are corrupted
*/
bool skipCorruptedInputFiles_;

/** Storage controller */
StorageControl storageController_;

Expand Down
29 changes: 26 additions & 3 deletions Framework/src/Framework/EventFile.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,31 @@ EventFile::EventFile(const framework::config::Parameters &params,
"' is not readable or does not exist.");
}

bool skip_corrupted =
params.getParameter<bool>("skipCorruptedInputFiles", false);

// make sure file is not a zombie file
// (i.e. process ended without closing or the file was corrupted some other
// way)
if (file_->IsZombie()) {
if (not skip_corrupted) {
EXCEPTION_RAISE("FileError", "Input file '" + fileName_ +
"' is corrupted. Framework will not "
"attempt to recover this file.");
}
return;
}

// Get the tree name from the configuration
auto tree_name{params.getParameter<std::string>("tree_name")};
tree_ = static_cast<TTree *>(file_->Get(tree_name.c_str()));
if (!tree_) {
EXCEPTION_RAISE("FileError", "File '" + fileName_ +
"' does not have a TTree named '" +
tree_name + "' in it.");
if (not skip_corrupted) {
EXCEPTION_RAISE("FileError", "File '" + fileName_ +
"' does not have a TTree named '" +
tree_name + "' in it.");
}
return;
}
entries_ = tree_->GetEntriesFast();
}
Expand Down Expand Up @@ -97,6 +115,11 @@ EventFile::~EventFile() {
file_->Close();
}

bool EventFile::isCorrupted() const {
if (isOutputFile_) return file_->IsZombie();
return (!tree_ or file_->IsZombie());
}

void EventFile::addDrop(const std::string &rule) {
int offset;
bool isKeep = false, isDrop = false, isIgnore = false;
Expand Down
15 changes: 15 additions & 0 deletions Framework/src/Framework/Process.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ Process::Process(const framework::config::Parameters &configuration)
configuration.getParameter<int>("compressionSetting", 9);
termLevelInt_ = configuration.getParameter<int>("termLogLevel", 2);
fileLevelInt_ = configuration.getParameter<int>("fileLogLevel", 0);
skipCorruptedInputFiles_ =
configuration.getParameter<bool>("skipCorruptedInputFiles", false);

inputFiles_ =
configuration.getParameter<std::vector<std::string>>("inputFiles", {});
Expand Down Expand Up @@ -277,6 +279,19 @@ void Process::run() {
int wasRun = -1;
for (auto infilename : inputFiles_) {
EventFile inFile(config_, infilename);
if (inFile.isCorrupted()) {
if (skipCorruptedInputFiles_) {
ldmx_log(warn) << "Input file '" << infilename
<< "' was found to be corrupted. Skipping.";
continue;
} else {
EXCEPTION_RAISE(
"BadCode",
"We should never get here. "
"EventFile is corrupted but we aren't skipping corrupted inputs. "
"EventFile should be throwing its own exceptions in this case.");
}
}

ldmx_log(info) << "Opening file " << infilename;
onFileOpen(inFile);
Expand Down
Loading