From 89d50ab034c7ffa3d8ad5327f1fa28704dff63b1 Mon Sep 17 00:00:00 2001 From: lenka Date: Thu, 28 Mar 2024 17:04:52 -0700 Subject: [PATCH] try --- .buildkite/pipeline.yml | 49 +++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index c73ec9e4c4..596270aeb8 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -436,30 +436,31 @@ steps: command: - | # Submit the sbatch script and capture its job ID - JOB_ID=$(sbatch test/mpi_tests/local_checks.sh | awk '{print $4}') - echo "Submitted job with ID: $JOB_ID, output log: slurm-$JOB_ID.out" - START_TIME=$(date +%s) - - # Loop until the job finishes - while true; do - # Check the status of the job - STATUS=$(squeue -j $JOB_ID | grep $JOB_ID | awk '{print $5}') - ELAPSED_TIME=$(( $(date +%s) - $START_TIME )) - - # If the job status is 'PD' (pending) or 'R' (running), wait and continue checking - if [ "$STATUS" == "PD" ] || [ "$STATUS" == "R" ]; then - sleep 60 - echo "Job is still running... Elapsed time: $ELAPSED_TIME seconds." - # If the job status is 'CF' (completed successfully), print success message and exit - elif [ "$STATUS" == "CF" ]; then - echo "Job completed successfully." - exit 0 - # If the job status is anything else, print error message and exit - else - echo "Error: Job failed or terminated. See slurm-$JOB_ID.out for more information." - exit 1 - fi - done + + # JOB_ID=$(sbatch test/mpi_tests/local_checks.sh | awk '{print $4}') + # echo "Submitted job with ID: $JOB_ID, output log: slurm-$JOB_ID.out" + # START_TIME=$(date +%s) + + # # Loop until the job finishes + # while true; do + # # Check the status of the job + # STATUS=$(squeue -j $JOB_ID | grep $JOB_ID | awk '{print $5}') + # ELAPSED_TIME=$(( $(date +%s) - $START_TIME )) + + # # If the job status is 'PD' (pending) or 'R' (running), wait and continue checking + # if [ "$STATUS" == "PD" ] || [ "$STATUS" == "R" ]; then + # sleep 60 + # echo "Job is still running... Elapsed time: $ELAPSED_TIME seconds." + # # If the job status is 'CF' (completed successfully), print success message and exit + # elif [ "$STATUS" == "CF" ]; then + # echo "Job completed successfully." + # exit 0 + # # If the job status is anything else, print error message and exit + # else + # echo "Error: Job failed or terminated. See slurm-$JOB_ID.out for more information." + # exit 1 + # fi + # done agents: slurm_ntasks: 1