Skip to content

Commit

Permalink
Merge pull request #2790 from CliMA/zs/scaling
Browse files Browse the repository at this point in the history
clean up gpu target
  • Loading branch information
szy21 authored Mar 14, 2024
2 parents a9cd8bf + 22abac5 commit b14862c
Show file tree
Hide file tree
Showing 13 changed files with 119 additions and 216 deletions.
104 changes: 37 additions & 67 deletions .buildkite/gpu_pipeline/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,78 +103,79 @@ steps:
slurm_ntasks: 4
slurm_exclusive:

- group: "CHAP GPU strong scaling"
- group: "DYAMOND GPU strong scaling"
steps:

- label: "gpu_aquaplanet_chap - strong scaling - 1 GPU"
- label: "gpu_aquaplanet_dyamond - 1 GPU"
command:
- mkdir -p gpu_aquaplanet_chap
- mkdir -p gpu_aquaplanet_dyamond
- >
nsys profile --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_dyamond/report
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap.yml
artifact_paths: "gpu_aquaplanet_chap/*"
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond.yml
artifact_paths: "gpu_aquaplanet_dyamond/*"
agents:
slurm_gpus_per_task: 1
slurm_gpus: 1
slurm_cpus_per_task: 4
slurm_ntasks: 1
slurm_mem: 32G
slurm_exclusive:

- label: "gpu_aquaplanet_chap - strong scaling - 2 GPUs"
- label: "gpu_aquaplanet_dyamond - 2 GPUs"
command:
- mkdir -p gpu_aquaplanet_chap_2process
- mkdir -p gpu_aquaplanet_dyamond_2process
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_2process.yml
artifact_paths: "gpu_aquaplanet_chap_2process/*"
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_2process.yml
artifact_paths: "gpu_aquaplanet_dyamond_2process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 2
slurm_mem: 32G
slurm_time: 8:00:00
slurm_exclusive:

- label: "gpu_aquaplanet_chap - strong scaling - 4 GPUs"
- label: "gpu_aquaplanet_dyamond - 4 GPUs"
command:
- mkdir -p gpu_aquaplanet_chap_4process
- mkdir -p gpu_aquaplanet_dyamond_4process
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_4process.yml
artifact_paths: "gpu_aquaplanet_chap_4process/*"
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_4process.yml
artifact_paths: "gpu_aquaplanet_dyamond_4process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 4
slurm_mem: 32G
slurm_time: 8:00:00
slurm_exclusive:

- group: "CHAP GPU weak scaling"
- group: "DYAMOND GPU weak scaling"
steps:

- label: "gpu_aquaplanet_chap - weak scaling - 1 GPU"
- label: "gpu_aquaplanet_dyamond - weak scaling - 1 GPU"
command:
- mkdir -p gpu_aquaplanet_chap_ws_1process
- mkdir -p gpu_aquaplanet_dyamond_ws_1process
- >
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_ws_1process.yml
artifact_paths: "gpu_aquaplanet_chap_ws_1process/*"
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_1process.yml
artifact_paths: "gpu_aquaplanet_dyamond_ws_1process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 1
slurm_mem: 32G
slurm_exclusive:

- label: "gpu_aquaplanet_chap - weak scaling - 2 GPUs"
- label: "gpu_aquaplanet_dyamond - weak scaling - 2 GPUs"
command:
- mkdir -p gpu_aquaplanet_chap_ws_2process
- mkdir -p gpu_aquaplanet_dyamond_ws_2process
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_ws_2process.yml
artifact_paths: "gpu_aquaplanet_chap_ws_2process/*"
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_2process.yml
artifact_paths: "gpu_aquaplanet_dyamond_ws_2process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
Expand All @@ -183,14 +184,14 @@ steps:
slurm_time: 8:00:00
slurm_exclusive:

- label: "gpu_aquaplanet_chap - weak scaling - 4 GPUs"
- label: "gpu_aquaplanet_dyamond - weak scaling - 4 GPUs"
command:
- mkdir -p gpu_aquaplanet_chap_ws_4process
- mkdir -p gpu_aquaplanet_dyamond_ws_4process
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_ws_4process.yml
artifact_paths: "gpu_aquaplanet_chap_ws_4process/*"
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_4process.yml
artifact_paths: "gpu_aquaplanet_dyamond_ws_4process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
Expand All @@ -199,50 +200,19 @@ steps:
slurm_time: 8:00:00
slurm_exclusive:

- group: "DYAMOND GPU strong scaling"
- group: "Diagnostic EDMF GPU"
steps:

- label: "gpu_aquaplanet_dyamond - 1 GPU"
- label: "gpu_aquaplanet_chap - 1 GPU"
command:
- mkdir -p gpu_aquaplanet_dyamond
- >
nsys profile --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_dyamond/report
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond.yml
artifact_paths: "gpu_aquaplanet_dyamond/*"
agents:
slurm_gpus: 1
slurm_cpus_per_task: 4
slurm_exclusive:

- label: "gpu_aquaplanet_dyamond - 2 GPUs"
command:
- mkdir -p gpu_aquaplanet_dyamond_2process
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_2process.yml
artifact_paths: "gpu_aquaplanet_dyamond_2process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 2
slurm_mem: 32G
slurm_time: 8:00:00
slurm_exclusive:

- label: "gpu_aquaplanet_dyamond - 4 GPUs"
command:
- mkdir -p gpu_aquaplanet_dyamond_4process
- mkdir -p gpu_aquaplanet_chap
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_4process.yml
artifact_paths: "gpu_aquaplanet_dyamond_4process/*"
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_diagedmf.yml
artifact_paths: "gpu_aquaplanet_diagedmf/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 4
slurm_ntasks: 1
slurm_mem: 32G
slurm_time: 8:00:00
slurm_exclusive:
slurm_exclusive:
29 changes: 0 additions & 29 deletions config/gpu_configs/gpu_aquaplanet_chap_2process.yml

This file was deleted.

29 changes: 0 additions & 29 deletions config/gpu_configs/gpu_aquaplanet_chap_4process.yml

This file was deleted.

29 changes: 0 additions & 29 deletions config/gpu_configs/gpu_aquaplanet_chap_ws_1process.yml

This file was deleted.

29 changes: 0 additions & 29 deletions config/gpu_configs/gpu_aquaplanet_chap_ws_2process.yml

This file was deleted.

29 changes: 0 additions & 29 deletions config/gpu_configs/gpu_aquaplanet_chap_ws_4process.yml

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ edmfx_sgs_mass_flux: true
edmfx_sgs_diffusive_flux: true
precip_model: 0M
dt: 100secs
t_end: 1days
t_end: 12hours
toml: [toml/diagnostic_edmfx_box.toml]
5 changes: 4 additions & 1 deletion config/gpu_configs/gpu_aquaplanet_dyamond.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ precip_model: "0M"
rad: "allskywithclear"
idealized_insolation: false
dt_rad: "1hours"
vert_diff: "true"
dt_cloud_fraction: "1hours"
vert_diff: "FriersonDiffusion"
implicit_diffusion: true
approximate_linear_solve_iters: 2
surface_setup: "DefaultMoninObukhov"
rayleigh_sponge: true
dt: "100secs"
Expand Down
5 changes: 4 additions & 1 deletion config/gpu_configs/gpu_aquaplanet_dyamond_2process.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ precip_model: "0M"
rad: "allskywithclear"
idealized_insolation: false
dt_rad: "1hours"
vert_diff: "true"
dt_cloud_fraction: "1hours"
vert_diff: "FriersonDiffusion"
implicit_diffusion: true
approximate_linear_solve_iters: 2
surface_setup: "DefaultMoninObukhov"
rayleigh_sponge: true
dt: "100secs"
Expand Down
5 changes: 4 additions & 1 deletion config/gpu_configs/gpu_aquaplanet_dyamond_4process.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ precip_model: "0M"
rad: "allskywithclear"
idealized_insolation: false
dt_rad: "1hours"
vert_diff: "true"
dt_cloud_fraction: "1hours"
vert_diff: "FriersonDiffusion"
implicit_diffusion: true
approximate_linear_solve_iters: 2
surface_setup: "DefaultMoninObukhov"
rayleigh_sponge: true
dt: "100secs"
Expand Down
23 changes: 23 additions & 0 deletions config/gpu_configs/gpu_aquaplanet_dyamond_ws_1process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 30
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: "equil"
precip_model: "0M"
rad: "allskywithclear"
idealized_insolation: false
dt_rad: "1hours"
dt_cloud_fraction: "1hours"
vert_diff: "FriersonDiffusion"
implicit_diffusion: true
approximate_linear_solve_iters: 2
surface_setup: "DefaultMoninObukhov"
rayleigh_sponge: true
dt: "100secs"
t_end: "12hours"
job_id: "gpu_aquaplanet_dyamond_ws_1process"
toml: [toml/longrun_aquaplanet_dyamond.toml]
Loading

0 comments on commit b14862c

Please sign in to comment.