diff --git a/helpers/node-mark-online b/helpers/node-mark-online index 4af85ad..ab612cc 100644 --- a/helpers/node-mark-online +++ b/helpers/node-mark-online @@ -60,12 +60,12 @@ elif [[ "$NHC_RM" == "slurm" ]]; then # Slurm does not run the HealthCheckProgram on nodes in the DOWN state, # but if someone runs NHC by hand, we want to be able to do the right thing. case "$STATUS" in - *'@'*|*'#'*|boot*|*-*|plnd*) + *'@'*|*'#'*|*-*|plnd*) # These states aren't handled yet. echo "$0: State \"$STATUS\" not yet handled; ignoring." exit 0 ;; - down*|drain*|drng*|fail*|maint*) + down*|drain*|drng*|fail*|maint*|boot*) # If there is no old note, and we've not been told to ignore that, do not online the node. if [[ "$OLD_NOTE_LEADER" == "none" && "$IGNORE_EMPTY_NOTE" != "1" ]]; then echo "$0: Not onlining $HOSTNAME: No note set." diff --git a/helpers/node-mark-reboot b/helpers/node-mark-reboot old mode 100755 new mode 100644 index 828bc9c..c55650c --- a/helpers/node-mark-reboot +++ b/helpers/node-mark-reboot @@ -25,7 +25,7 @@ NOTE="$*" if [[ "$NHC_RM" == "slurm" ]]; then SLURM_SINFO="${SLURM_SINFO:-sinfo}" SLURM_SCONTROL="${SLURM_SCONTROL:-scontrol}" - SLURM_SC_REBOOT_ARGS="${SLURM_SC_REBOOT_ARGS:-reboot ASAP NextState=RESUME}" + SLURM_SC_REBOOT_ARGS="${SLURM_SC_REBOOT_ARGS:-reboot ASAP NextState=DOWN}" LINE=( $($SLURM_SINFO -o '%t %E' -hn $HOSTNAME) ) STATUS="${LINE[0]}" @@ -47,7 +47,6 @@ if [[ "$NHC_RM" == "slurm" ]]; then if [[ "$OLD_NOTE_LEADER" != "none" && "$OLD_NOTE_LEADER" != "$LEADER" ]]; then LEADER="$OLD_NOTE_LEADER" NOTE="$OLD_NOTE" - SLURM_SC_REBOOT_ARGS="reboot ASAP NextState=DOWN" fi echo "$0: Marking $STATUS $HOSTNAME for reboot: $LEADER $NOTE" exec $SLURM_SCONTROL $SLURM_SC_REBOOT_ARGS Reason="$LEADER $NOTE" $HOSTNAME