Skip to content

Commit

Permalink
add strong scaling GPU AMIP
Browse files Browse the repository at this point in the history
  • Loading branch information
juliasloan25 committed Mar 6, 2024
1 parent 0525fc0 commit 41dff6f
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 2 deletions.
87 changes: 87 additions & 0 deletions .buildkite/gpu/pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
agents:
queue: clima
slurm_mem: 8G
modules: common nsight-systems/2023.4.1

env:
JULIA_CUDA_MEMORY_POOL: none
JULIA_MPI_HAS_CUDA: "true"
JULIA_NVTX_CALLBACKS: gc
JULIA_MAX_NUM_PRECOMPILE_FILES: 100
OPENBLAS_NUM_THREADS: 1
OMPI_MCA_opal_warn_on_missing_libcuda: 0
SLURM_KILL_BAD_EXIT: 1
SLURM_GPU_BIND: none # https://github.com/open-mpi/ompi/issues/11949#issuecomment-1737712291
GPU_CONFIG_PATH: "config/gpu_configs"
CLIMAATMOS_GC_NSTEPS: 10

steps:
- label: "init :GPU:"
key: "init_gpu_env"
command:
- echo "--- Instantiate experiments/AMIP"
- julia --project=experiments/AMIP -e 'using Pkg; Pkg.instantiate(;verbose=true)'
- julia --project=experiments/AMIP -e 'using Pkg; Pkg.precompile()'
- julia --project=experiments/AMIP -e 'using Pkg; Pkg.status()'

- echo "--- Download artifacts"
- "julia --project=artifacts -e 'using Pkg; Pkg.instantiate(;verbose=true)'"
- "julia --project=artifacts -e 'using Pkg; Pkg.precompile()'"
- "julia --project=artifacts -e 'using Pkg; Pkg.status()'"
- "julia --project=artifacts artifacts/download_artifacts.jl"

agents:
slurm_gpus: 1
slurm_cpus_per_task: 8
env:
JULIA_NUM_PRECOMPILE_TASKS: 8
JULIA_MAX_NUM_PRECOMPILE_FILES: 50

- wait

- group: "CHAP GPU strong scaling"
steps:

- label: "GPU AMIP CHAP - strong scaling - 1 GPU"
key: "gpu_amip_chap"
command:
- >
julia --threads=3 --color=yes --project=experiments/AMIP experiments/AMIP/coupler_driver.jl
--config_file $GPU_CONFIG_PATH/gpu_amip_chap.yml
artifact_paths: "gpu_amip_chap/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 1
slurm_mem: 32G
slurm_exclusive:

- label: "GPU AMIP CHAP - strong scaling - 2 GPUs"
key: "gpu_amip_chap_2process"
command:
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=experiments/AMIP experiments/AMIP/coupler_driver.jl
--config_file $GPU_CONFIG_PATH/gpu_amip_chap_2process.yml
artifact_paths: "gpu_amip_chap_2process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 2
slurm_mem: 32G
slurm_exclusive:

- label: "GPU AMIP CHAP - strong scaling - 4 GPUs"
key: "gpu_amip_chap_4process"
command:
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=experiments/AMIP experiments/AMIP/hybrid/driver.jl
--config_file $GPU_CONFIG_PATH/gpu_amip_chap_4process.yml
artifact_paths: "gpu_amip_chap_4process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 4
slurm_mem: 32G
slurm_exclusive:
22 changes: 22 additions & 0 deletions config/gpu_configs/gpu_amip_chap.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
anim: false
apply_limiter: false
atmos_config_file: "config/gpu_configs/gpu_aquaplanet_chap.yml"
dt: "100secs"
dt_cloud_fraction: "1hours"
dt_cpl: 100
dt_rad: "1hours"
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
energy_check: false
evolving_ocean: false
hourly_checkpoint: false
job_id: "gpu_amip_chap"
land_albedo_type: "map_static"
mode_name: "amip"
mono_surface: false
run_name: "gpu_amip_chap"
start_date: "19790301"
surface_setup: "PrescribedSurface"
t_end: "1days"
turb_flux_partition: "CombinedStateFluxes"
vert_diff: "true"
22 changes: 22 additions & 0 deletions config/gpu_configs/gpu_amip_chap_2process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
anim: false
apply_limiter: false
atmos_config_file: "config/gpu_configs/gpu_aquaplanet_chap_2process.yml"
dt: "100secs"
dt_cloud_fraction: "1hours"
dt_cpl: 100
dt_rad: "1hours"
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
energy_check: false
evolving_ocean: false
hourly_checkpoint: false
job_id: "gpu_amip_chap_2process"
land_albedo_type: "map_static"
mode_name: "amip"
mono_surface: false
run_name: "gpu_amip_chap_2process"
start_date: "19790301"
surface_setup: "PrescribedSurface"
t_end: "1days"
turb_flux_partition: "CombinedStateFluxes"
vert_diff: "true"
22 changes: 22 additions & 0 deletions config/gpu_configs/gpu_amip_chap_4process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
anim: false
apply_limiter: false
atmos_config_file: "config/gpu_configs/gpu_aquaplanet_chap_4process.yml"
dt: "100secs"
dt_cloud_fraction: "1hours"
dt_cpl: 100
dt_rad: "1hours"
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
energy_check: false
evolving_ocean: false
hourly_checkpoint: false
job_id: "gpu_amip_chap_4process"
land_albedo_type: "map_static"
mode_name: "amip"
mono_surface: false
run_name: "gpu_amip_chap_4process"
start_date: "19790301"
surface_setup: "PrescribedSurface"
t_end: "1days"
turb_flux_partition: "CombinedStateFluxes"
vert_diff: "true"
8 changes: 6 additions & 2 deletions experiments/AMIP/coupler_driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ function solve_coupler!(cs)
end
@show walltime

return cs
return walltime
end

## exit if running performance anaysis #hide
Expand All @@ -687,7 +687,11 @@ if haskey(ENV, "CI_PERF_SKIP_COUPLED_RUN") #hide
end #hide

## run the coupled simulation
solve_coupler!(cs);
walltime = solve_coupler!(cs);

# Show the simulated years per day of the simulation
es = CA.EfficiencyStats(tspan, walltime)
@info "SYPD: $(CA.simulated_years_per_day(es))"

#=
## Postprocessing
Expand Down

0 comments on commit 41dff6f

Please sign in to comment.