Skip to content

Commit

Permalink
Merge pull request #33 from cms-DQM/index_capacity_print
Browse files Browse the repository at this point in the history
  • Loading branch information
nothingface0 authored Jul 23, 2024
2 parents a18102d + 8aa66a8 commit de813c8
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 44 deletions.
46 changes: 24 additions & 22 deletions bin/visDQMIndexMonitoring
Original file line number Diff line number Diff line change
Expand Up @@ -16,41 +16,46 @@ INSTALLATION_DIR=/data/srv
TMP_CATALOGUE_FILE=/tmp/dqmgui_catalogue
EMAIL_ADDRESS_TO_NOTIFY=cms-dqm-coreTeam@cern.ch
# Alarm threshold value [0.0, 1.0].
# An alarm is triggered if any of the tree sizes exceed 80% of their capacity.
ALERT_THRESHOLD="0.8"
# An alarm is triggered if any of the tree sizes exceed 90% of their capacity.
ALERT_THRESHOLD="0.9"

preliminary_checks() {
if [ ! -d "$INSTALLATION_DIR" ] || [ ! -d "$INSTALLATION_DIR/state/dqmgui/$FLAVOR/ix128" ]; then
echo "Could not find $FLAVOR DQMGUI in $INSTALLATION_DIR"
exit 1
fi
# Needed to get the index limits
if [ ! -f "$INSTALLATION_DIR/current/apps/dqmgui/128/src/cpp/DQM/VisDQMIndex.h" ]; then
echo "Could not find required \"VisDQMIndex.h\" file in $INSTALLATION_DIR"
exit 1
fi
}

# Activate the DQMGUI environment with the required env variables
activate_env() {
source $INSTALLATION_DIR/current/apps/dqmgui/128/etc/profile.d/env.sh
}

# Sources the env.sh file needed for activating the DQMGUI environment,
# then dumps the Index' catalogue to the TMP_CATALOGUE_FILE
dump_catalogue() {
source $INSTALLATION_DIR/current/apps/dqmgui/128/etc/profile.d/env.sh
visDQMIndex dump $INSTALLATION_DIR/state/dqmgui/$FLAVOR/ix128 catalogue >"${TMP_CATALOGUE_FILE}_${FLAVOR}"
}

get_index_capacity() {
for tree_name_and_size in $(visDQMIndex get_capacity); do
tree_name=$(echo $tree_name_and_size | cut -d',' -f1)
tree_limit=$(echo $tree_name_and_size | cut -d',' -f2)
tree_limits[$tree_name]=$tree_limit
done
}

check_index_limits_and_send_email() {
visdqmindex_header_file="$INSTALLATION_DIR/current/apps/dqmgui/128/src/cpp/DQM/VisDQMIndex.h"
msg=
# Run over all tree types
for i in $(seq 0 $((${#_tree_types[@]} - 1))); do
tree_current_size=$(grep "${_tree_types[$i]}" "${TMP_CATALOGUE_FILE}_${FLAVOR}" | wc -l)
tree_limit=$(grep -oE "${_tree_types_limit_name[$i]}[[:space:]]+[0-9]+" "$visdqmindex_header_file" | awk '{print $2}')
percent_full=$(bc -l <<<"($tree_current_size/$tree_limit)*100")
echo "Found $tree_current_size ${_tree_types[$i]} in the catalogue. Limit is $tree_limit ($(printf '%.2f' $percent_full)% full)"
for tree_name in "${!tree_limits[@]}"; do
tree_current_size=$(grep "$tree_name" "${TMP_CATALOGUE_FILE}_${FLAVOR}" | wc -l)
percent_full=$(bc -l <<<"($tree_current_size/${tree_limits[$tree_name]})*100")
echo "Found $tree_current_size $tree_name in the catalogue. Limit is ${tree_limits[$tree_name]} ($(printf '%.2f' $percent_full)% full)"
# Check if alarm threshold is exceeded
threshold=$(printf '%.0f' $(bc <<<"$ALERT_THRESHOLD * $tree_limit"))
threshold=$(printf '%.0f' $(bc <<<"$ALERT_THRESHOLD * ${tree_limits[$tree_name]}"))
if [ $tree_current_size -gt $threshold ]; then
msg=$(printf "%s" "${msg}WARNING: DQMGUI's index tree ${_tree_types[$i]} has $tree_current_size entries out of the maximum ${tree_limit} ($(printf '%.2f' $percent_full)%% full)\n")
msg=$(printf "%s" "${msg}WARNING: DQMGUI's index tree $tree_name has $tree_current_size entries out of the maximum ${tree_limit} ($(printf '%.2f' $percent_full)%% full)\n")
fi
done
if [ -n "$msg" ]; then
Expand All @@ -64,9 +69,11 @@ cleanup() {
}

### Main script

declare -A tree_limits=()
declare -a steps=(
preliminary_checks
activate_env
get_index_capacity
dump_catalogue
check_index_limits_and_send_email
cleanup
Expand All @@ -89,11 +96,6 @@ for ARGUMENT in "$@"; do
eval "$KEY=$VALUE"
done

# The different types of trees in the index which you get by running visDQMIndex dump catalogue
_tree_types=("CMSSW-VERSION" "DATASET-NAME" "OBJECT-NAME" "SOURCE-FILE")
# The definition names of the limits, in VisDQMIndex.h, in one-to-one mapping to the _tree_types
_tree_types_limit_name=("CMSSWNAMES" "DATASETNAMES" "OBJECTNAMES" "PATHNAMES")

# For each step, check if the appropriate flag is enabled.
for step in "${steps[@]}"; do

Expand Down
67 changes: 45 additions & 22 deletions src/cpp/DQM/index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,24 +57,25 @@ using google::protobuf::io::StringOutputStream;
// ----------------------------------------------------------------------
/** Index task to perform. */
enum TaskType {
TASK_CREATE, //< Create and initialise a new index.
TASK_ADD, //< Add data to an index.
TASK_REMOVE, //< Remove data from the index.
TASK_MERGE, //< Merge an index to another.
TASK_DUMP, //< Dump the index contents.
TASK_STREAM, //< Stream a sample from the index into an intermediate .dat
//file.
TASK_STREAMPB, //< Stream a sample from the index into an intermediate
//ProtocolBuffer .pb file.
TASK_FIXSTREAMERS //< Add missing streamerinfo to oldest ones.
TASK_CREATE, //< Create and initialise a new index.
TASK_ADD, //< Add data to an index.
TASK_REMOVE, //< Remove data from the index.
TASK_MERGE, //< Merge an index to another.
TASK_DUMP, //< Dump the index contents.
TASK_STREAM, //< Stream a sample from the index into an intermediate .dat
// file.
TASK_STREAMPB, //< Stream a sample from the index into an intermediate
// ProtocolBuffer .pb file.
TASK_FIXSTREAMERS, //< Add missing streamerinfo to oldest ones.
TASK_PRINT_CAPACITY //< Print just the capacities of each index tree
};

/** Things user can choose to dump out. */
enum DumpType {
DUMP_CATALOGUE, //< Dump the master catalogue.
DUMP_INFO, //< Dump monitor element summary information.
DUMP_DATA, //< Dump monitor element serialised data.
DUMP_ALL //< Dump everything.
DUMP_ALL, //< Dump everything.
};

/** Classification of what to do with a monitor element. */
Expand All @@ -90,7 +91,7 @@ enum DataType {
TYPE_DATA, //< DQM data for real detector data.
TYPE_RELVAL, //< DQM data for release validation simulated data.
TYPE_RELVAL_RUNDEPMC, //< DQM data for release validation RunDependent
//simulated data.
// simulated data.
TYPE_MC, //< DQM data for other simulated data.
TYPE_RUNDEPMC //< DQM data for Run Dependent simulated data.
};
Expand All @@ -114,7 +115,7 @@ struct SampleInfo {
/** Classification of input files to DQM samples. */
struct FileInfo {
Filename path; //< Path name of the input file (or File name of the root file
//inside a zip archive).
// inside a zip archive).
Filename fullpath; //< File name of the zip archive and ROOT filename.
Filename
container; //< File name of the zip archive, if any, or of the root file.
Expand Down Expand Up @@ -2262,6 +2263,16 @@ static int mergeIndexes(const Filename &indexdir,
return EXIT_SUCCESS;
}

// Print the current limits of the index, for each tree.
static int printIndexCapacity() {
std::cout << "CMSSW-VERSION," << CMSSWNAMES << std::endl;
std::cout << "DATASET-NAME," << DATASETNAMES << std::endl;
std::cout << "OBJECT-NAME," << OBJECTNAMES << std::endl;
std::cout << "SOURCE-FILE," << PATHNAMES << std::endl;
std::cout << "STREAMER," << STREAMERS << std::endl;
return EXIT_SUCCESS;
}

/** Fix the streamerInfo in the index. By policy we overwrite only the
latest streamer info present in the old index, since it is the one
referring to the ROOT version bundled with the DQM GUI. We have no
Expand Down Expand Up @@ -3055,7 +3066,8 @@ static int showusage(void) {
<< app.name()
<< " [OPTIONS] streampb --sample SAMPLE-ID INDEX-DIRECTORY\n "
<< app.name()
<< " [OPTIONS] fixstreamers [--streamer STREAMER-ID] INDEX-DIRECTORY\n";
<< " [OPTIONS] fixstreamers [--streamer STREAMER-ID] INDEX-DIRECTORY\n "
<< app.name() << " [OPTIONS] get_capacity\n";
return EXIT_FAILURE;
}

Expand Down Expand Up @@ -3087,7 +3099,7 @@ int main(int argc, char **argv) {
int arg;

// Check top-level arguments.
for (arg = 1; arg < argc; ++arg)
for (arg = 1; arg < argc; ++arg) {
if (!strcmp(argv[arg], "--no-debug"))
debug = 0;
else if (!strcmp(argv[arg], "--debug") || !strcmp(argv[arg], "-d"))
Expand All @@ -3102,6 +3114,7 @@ int main(int argc, char **argv) {
return showusage();
} else
break;
}

// Check which task we should execute.
if (arg < argc) {
Expand All @@ -3121,9 +3134,12 @@ int main(int argc, char **argv) {
++arg, task = TASK_STREAMPB;
else if (!strcmp(argv[arg], "fixstreamers"))
++arg, task = TASK_FIXSTREAMERS;
else if (!strcmp(argv[arg], "get_capacity"))
++arg, task = TASK_PRINT_CAPACITY;
else {
std::cerr << app.name() << ": unrecognised task parameter '" << argv[arg]
<< "', expected one of create, add or remove\n";
<< "', expected one of create, add, remove, merge, dump, "
"stream, streamdb, fixstreamsers, get_capacity\n";
return showusage();
}
} else {
Expand Down Expand Up @@ -3226,12 +3242,15 @@ int main(int argc, char **argv) {
}
}

// Next option should be the index directory.
if (arg < argc)
indexdir = argv[arg++];
else {
std::cerr << app.name() << ": not enough arguments\n";
return showusage();
// Next option should be the index directory, unless
// only the capacity is needed, which does not depend on it.
if (task != TASK_PRINT_CAPACITY) {
if (arg < argc)
indexdir = argv[arg++];
else {
std::cerr << app.name() << ": not enough arguments\n";
return showusage();
}
}

// Now check for remaining (non-option) task parameters.
Expand Down Expand Up @@ -3442,6 +3461,8 @@ int main(int argc, char **argv) {
std::cerr << indexdir.name() << ": not a directory\n";
return EXIT_FAILURE;
}
} else if (task == TASK_PRINT_CAPACITY) {
// No extra arguments to parse
} else {
std::cerr << app.name() << ": internal error at line " << __LINE__ << '\n';
return EXIT_FAILURE;
Expand All @@ -3465,6 +3486,8 @@ int main(int argc, char **argv) {
return streamoutProtocolBuffer(indexdir, sampleid);
else if (task == TASK_FIXSTREAMERS)
return fixStreamerInfo(indexdir, streamerid);
else if (task == TASK_PRINT_CAPACITY)
return printIndexCapacity();
else {
std::cerr << app.name() << ": internal error, unknown task\n";
return EXIT_FAILURE;
Expand Down

0 comments on commit de813c8

Please sign in to comment.