From 1b18a88307b5c4f97bb2e79e87ae934091cdd040 Mon Sep 17 00:00:00 2001 From: nefrathenrici Date: Wed, 6 Mar 2024 14:20:29 -0800 Subject: [PATCH] Update scaling pipeline, constrain to icelake nodes --- .buildkite/pipeline.yml | 3 +-- .buildkite/scaling/pipeline.sh | 17 +++++++---------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index f89477e59f..f8e8786bf3 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -6,6 +6,7 @@ agents: env: JULIA_LOAD_PATH: "${JULIA_LOAD_PATH}:${BUILDKITE_BUILD_CHECKOUT_PATH}/.buildkite" OPENBLAS_NUM_THREADS: 1 + SLURM_KILL_BAD_EXIT: 1 JULIA_NVTX_CALLBACKS: gc JULIA_MAX_NUM_PRECOMPILE_FILES: 100 JULIA_DEPOT_PATH: "${BUILDKITE_BUILD_PATH}/${BUILDKITE_PIPELINE_SLUG}/depot/default" @@ -14,8 +15,6 @@ env: GPU_CONFIG_PATH: "config/gpu_configs/" PERF_CONFIG_PATH: "config/perf_configs" MPI_CONFIG_PATH: "config/mpi_configs" - SLURM_KILL_BAD_EXIT: 1 - JULIA_NVTX_CALLBACKS: gc steps: - label: "init :computer:" diff --git a/.buildkite/scaling/pipeline.sh b/.buildkite/scaling/pipeline.sh index e1c9bf2485..ade52390f4 100755 --- a/.buildkite/scaling/pipeline.sh +++ b/.buildkite/scaling/pipeline.sh @@ -3,7 +3,8 @@ set -euo pipefail FT="Float32" resolutions=("low" "mid" "high") -process_counts=("1 2 4 8 16 32" "1 2 4 8 16 32 64" "1 2 4 8 16 32 64 128") +# Process counts for icelake nodes +process_counts=("1 2 4 7 14 28" "1 2 4 7 14 28 56" "1 2 4 7 14 28 56 112") max_procs_per_node=16 # limit this artificially for profiling profiling=disable exclusive=true @@ -64,22 +65,18 @@ done # set up environment and agents cat << 'EOM' agents: - queue: central - modules: julia/1.10.0 cuda/12.2 ucx/1.14.1_cuda-12.2 openmpi/4.1.5_cuda-12.2 nsight-systems/2023.3.1 + queue: new-central + modules: climacommon/2024_02_27 + # This constraint is set for consistent architectures across nodes + slurm_constraint: icelake env: JULIA_LOAD_PATH: "${JULIA_LOAD_PATH}:${BUILDKITE_BUILD_CHECKOUT_PATH}/.buildkite" OPENBLAS_NUM_THREADS: 1 + SLURM_KILL_BAD_EXIT: 1 JULIA_NVTX_CALLBACKS: gc - OMPI_MCA_opal_warn_on_missing_libcuda: 0 JULIA_MAX_NUM_PRECOMPILE_FILES: 100 JULIA_CPU_TARGET: 'broadwell;skylake;icelake;cascadelake;epyc' - SLURM_KILL_BAD_EXIT: 1 - JULIA_NVTX_CALLBACKS: gc - JULIA_CUDA_MEMORY_POOL: none - JULIA_MPI_HAS_CUDA: "true" - MPITRAMPOLINE_LIB: "/groups/esm/software/MPIwrapper/ompi4.1.5_cuda-12.2/lib64/libmpiwrapper.so" - MPITRAMPOLINE_MPIEXEC: "/groups/esm/software/MPIwrapper/ompi4.1.5_cuda-12.2/bin/mpiwrapperexec" steps: - label: "init :computer:"