From 90e5748b28c27448ceb11d3ebde577810c465abc Mon Sep 17 00:00:00 2001 From: kmdeck Date: Tue, 2 Jul 2024 13:54:45 -0700 Subject: [PATCH] add atmos long run pipeline --- .buildkite/longruns_gpu/pipeline.yml | 49 ++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 .buildkite/longruns_gpu/pipeline.yml diff --git a/.buildkite/longruns_gpu/pipeline.yml b/.buildkite/longruns_gpu/pipeline.yml new file mode 100644 index 0000000000..bb0b3d71ef --- /dev/null +++ b/.buildkite/longruns_gpu/pipeline.yml @@ -0,0 +1,49 @@ +agents: + queue: clima + slurm_mem: 8G + modules: julia/1.10.0 cuda/julia-pref openmpi/4.1.5-mpitrampoline nsight-systems/2024.2.1 + +env: + JULIA_MPI_HAS_CUDA: "true" + JULIA_NVTX_CALLBACKS: gc + JULIA_MAX_NUM_PRECOMPILE_FILES: 100 + OPENBLAS_NUM_THREADS: 1 + OMPI_MCA_opal_warn_on_missing_libcuda: 0 + SLURM_KILL_BAD_EXIT: 1 + SLURM_GRES_FLAGS: "allow-task-sharing" + CONFIG_PATH: "config/longrun_configs" + +timeout_in_minutes: 1440 + +steps: + - label: "init :GPU:" + key: "init_gpu_env" + command: + - echo "--- Instantiate" + - julia --project=.buildkite -e 'using Pkg; Pkg.instantiate(;verbose=true)' + - julia --project=.buildkite -e 'using Pkg; Pkg.precompile()' + - julia --project=.buildkite -e 'using CUDA; CUDA.precompile_runtime()' + - julia --project=.buildkite -e 'using Pkg; Pkg.status()' + + agents: + slurm_gpus: 1 + slurm_cpus_per_task: 8 + env: + JULIA_NUM_PRECOMPILE_TASKS: 8 + JULIA_MAX_NUM_PRECOMPILE_FILES: 50 + + - wait + + - group: "Global Land Models" + steps: + + - label: ":seedling: Soil-Canopy" + command: + - srun julia --color=yes --project=.buildkite experiments/long_runs/land.jl + artifact_paths: "$$JOB_NAME/output_active/*" + agents: + slurm_gpus: 1 + slurm_time: 12:00:00 + env: + CLIMACOMMS_DEVICE: "CUDA" + JOB_NAME: "soil_canopy"