From 03d200aef060d9c734a8bdacd1c5b9f80b1666eb Mon Sep 17 00:00:00 2001 From: lenka Date: Thu, 28 Mar 2024 13:36:06 -0700 Subject: [PATCH] revs --- .buildkite/pipeline.yml | 10 +++++----- test/mpi_tests/local_checks.sh | 5 ++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 3b2dd0377..5ab8e0d35 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1,6 +1,6 @@ agents: queue: new-central - slurm_time: 24:00:00 + slurm_time: 4:00:00 modules: climacommon/2024_03_18 env: @@ -430,11 +430,11 @@ steps: slurm_gpus: 1 - group: "Bash scripts" - steps: - - label: "Submit and Monitor sbatch Job on Caltech HPC" + # check that (1) the script can be succesfully submitted, (2) it runs successfully command: | + " #!/bin/bash # Submit the sbatch script and capture its job ID @@ -458,13 +458,13 @@ steps: exit 0 # If the job status is anything else, print error message and exit else - echo "Error: Job failed or terminated." + echo "Error: Job failed or terminated. See slurm-$JOB_ID.out for more information." exit 1 fi done + " agents: slurm_ntasks: 1 - slurm_mem: 16GB - wait diff --git a/test/mpi_tests/local_checks.sh b/test/mpi_tests/local_checks.sh index e105cd673..8c8e356e6 100644 --- a/test/mpi_tests/local_checks.sh +++ b/test/mpi_tests/local_checks.sh @@ -9,7 +9,7 @@ export MODULE_PATH=/groups/esm/modules:$MODULE_PATH module purge module load climacommon/2024_03_18 -export CC_PATH=$(pwd)/ +export CC_PATH=$(pwd)/ # adjust this to the path of your ClimaCoupler.jl directory export RUN_NAME=coarse_single_ft64_hourly_checkpoints_restart export CONFIG_FILE=${CC_PATH}config/model_configs/${RUN_NAME}.yml export RESTART_DIR=experiments/AMIP/output/amip/${RUN_NAME}_artifacts/ @@ -19,7 +19,6 @@ export JULIA_NVTX_CALLBACKS=gc export OMPI_MCA_opal_warn_on_missing_libcuda=0 export JULIA_MAX_NUM_PRECOMPILE_FILES=100 export JULIA_CPU_TARGET='broadwell;skylake;icelake;cascadelake;epyc' -export CLIMACORE_DISTRIBUTED="MPI" export SLURM_KILL_BAD_EXIT=1 julia --project=experiments/AMIP/ -e 'using Pkg; Pkg.instantiate(;verbose=true)' @@ -55,4 +54,4 @@ fi # Trouble shooting? # - ensure you're using the latest module file of climacommon # - ensure you're using the latest version of ClimaCoupler.jl -# - did you cd to your version of ClimaCoupler.jl? \ No newline at end of file +# - did you cd to your version of ClimaCoupler.jl?