Skip to content

Commit

Permalink
Add strong and weak scaling jobs for CHAP configuration.
Browse files Browse the repository at this point in the history
  • Loading branch information
sriharshakandala committed Mar 4, 2024
1 parent 84f7a70 commit 41b6f79
Show file tree
Hide file tree
Showing 9 changed files with 332 additions and 25 deletions.
170 changes: 146 additions & 24 deletions .buildkite/gpu_pipeline/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,30 +55,7 @@ steps:
agents:
slurm_gpus: 1
slurm_cpus_per_task: 4

- label: "gpu_aquaplanet_dyamond"
command:
- mkdir -p gpu_aquaplanet_dyamond
- >
nsys profile --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_dyamond/report
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond.yml
artifact_paths: "gpu_aquaplanet_dyamond/*"
agents:
slurm_gpus: 1
slurm_cpus_per_task: 4

- label: "gpu_aquaplanet_chap"
command:
- mkdir -p gpu_aquaplanet_chap
- >
nsys profile --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_chap/report
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap.yml
artifact_paths: "gpu_aquaplanet_chap/*"
agents:
slurm_gpus: 1
slurm_cpus_per_task: 4
slurm_exclusive:

- label: "moist Held-Suarez"
key: "gpu_hs_rhoe_equil_55km_nz63_0M"
Expand All @@ -92,6 +69,7 @@ steps:
agents:
slurm_gpus: 1
slurm_cpus_per_task: 4
slurm_exclusive:

- label: "moist Held-Suarez - 4 gpus"
key: "gpu_hs_rhoe_equil_55km_nz63_0M_4process"
Expand All @@ -107,6 +85,7 @@ steps:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 4
slurm_exclusive:

- label: "dry baroclinic wave - 4 gpus"
key: "target_gpu_implicit_baroclinic_wave_4process"
Expand All @@ -122,3 +101,146 @@ steps:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 4
slurm_exclusive:

- group: "CHAP GPU strong scaling"
steps:

- label: "gpu_aquaplanet_chap - strong scaling - 1 GPU"
command:
- mkdir -p gpu_aquaplanet_chap
- >
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap.yml
artifact_paths: "gpu_aquaplanet_chap/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 1
slurm_mem: 32G
slurm_exclusive:

- label: "gpu_aquaplanet_chap - strong scaling - 2 GPUs"
command:
- mkdir -p gpu_aquaplanet_chap_2process
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_2process.yml
artifact_paths: "gpu_aquaplanet_chap_2process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 2
slurm_mem: 32G
slurm_exclusive:

- label: "gpu_aquaplanet_chap - strong scaling - 4 GPUs"
command:
- mkdir -p gpu_aquaplanet_chap_4process
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_4process.yml
artifact_paths: "gpu_aquaplanet_chap_4process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 4
slurm_mem: 32G
slurm_exclusive:

- group: "CHAP GPU weak scaling"
steps:

- label: "gpu_aquaplanet_chap - weak scaling - 1 GPU"
command:
- mkdir -p gpu_aquaplanet_chap_ws_1process
- >
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_ws_1process.yml
artifact_paths: "gpu_aquaplanet_chap_ws_1process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 1
slurm_mem: 32G
slurm_exclusive:

- label: "gpu_aquaplanet_chap - weak scaling - 2 GPUs"
command:
- mkdir -p gpu_aquaplanet_chap_ws_2process
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_ws_2process.yml
artifact_paths: "gpu_aquaplanet_chap_ws_2process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 2
slurm_mem: 32G
slurm_time: 8:00:00
slurm_exclusive:

- label: "gpu_aquaplanet_chap - weak scaling - 4 GPUs"
command:
- mkdir -p gpu_aquaplanet_chap_ws_4process
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_ws_4process.yml
artifact_paths: "gpu_aquaplanet_chap_ws_4process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 4
slurm_mem: 32G
slurm_time: 8:00:00
slurm_exclusive:

- group: "DYAMOND GPU strong scaling"
steps:

- label: "gpu_aquaplanet_dyamond - 1 GPU"
command:
- mkdir -p gpu_aquaplanet_dyamond
- >
nsys profile --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_dyamond/report
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond.yml
artifact_paths: "gpu_aquaplanet_dyamond/*"
agents:
slurm_gpus: 1
slurm_cpus_per_task: 4
slurm_exclusive:

- label: "gpu_aquaplanet_dyamond - 2 GPUs"
command:
- mkdir -p gpu_aquaplanet_dyamond_2process
- >
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_2process.yml
artifact_paths: "gpu_aquaplanet_dyamond_2process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 2
slurm_mem: 32G
slurm_time: 8:00:00
slurm_exclusive:

- label: "gpu_aquaplanet_dyamond - 4 GPUs"
command:
- mkdir -p gpu_aquaplanet_dyamond_4process
- >
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_4process.yml
artifact_paths: "gpu_aquaplanet_dyamond_4process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 4
slurm_mem: 32G
slurm_time: 8:00:00
slurm_exclusive:
2 changes: 1 addition & 1 deletion config/gpu_configs/gpu_aquaplanet_chap.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ job_id: gpu_aquaplanet_chap
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 16
h_elem: 30
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
Expand Down
29 changes: 29 additions & 0 deletions config/gpu_configs/gpu_aquaplanet_chap_2process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
job_id: gpu_aquaplanet_chap_2process
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 30
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: equil
surface_setup: DefaultMoninObukhov
rad: allskywithclear
idealized_insolation: false
dt_rad: 1hours
dt_cloud_fraction: 1hours
turbconv: diagnostic_edmfx
implicit_diffusion: true
approximate_linear_solve_iters: 2
prognostic_tke: true
edmfx_upwinding: first_order
edmfx_entr_model: "Generalized"
edmfx_detr_model: "Generalized"
edmfx_nh_pressure: true
edmfx_sgs_mass_flux: true
edmfx_sgs_diffusive_flux: true
precip_model: 0M
dt: 100secs
t_end: 1days
toml: [toml/diagnostic_edmfx_box.toml]
29 changes: 29 additions & 0 deletions config/gpu_configs/gpu_aquaplanet_chap_4process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
job_id: gpu_aquaplanet_chap_4process
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 30
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: equil
surface_setup: DefaultMoninObukhov
rad: allskywithclear
idealized_insolation: false
dt_rad: 1hours
dt_cloud_fraction: 1hours
turbconv: diagnostic_edmfx
implicit_diffusion: true
approximate_linear_solve_iters: 2
prognostic_tke: true
edmfx_upwinding: first_order
edmfx_entr_model: "Generalized"
edmfx_detr_model: "Generalized"
edmfx_nh_pressure: true
edmfx_sgs_mass_flux: true
edmfx_sgs_diffusive_flux: true
precip_model: 0M
dt: 100secs
t_end: 1days
toml: [toml/diagnostic_edmfx_box.toml]
29 changes: 29 additions & 0 deletions config/gpu_configs/gpu_aquaplanet_chap_ws_1process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
job_id: gpu_aquaplanet_chap_ws_1process
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 30
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: equil
surface_setup: DefaultMoninObukhov
rad: allskywithclear
idealized_insolation: false
dt_rad: 1hours
dt_cloud_fraction: 1hours
turbconv: diagnostic_edmfx
implicit_diffusion: true
approximate_linear_solve_iters: 2
prognostic_tke: true
edmfx_upwinding: first_order
edmfx_entr_model: "Generalized"
edmfx_detr_model: "Generalized"
edmfx_nh_pressure: true
edmfx_sgs_mass_flux: true
edmfx_sgs_diffusive_flux: true
precip_model: 0M
dt: 50secs
t_end: 1days
toml: [toml/diagnostic_edmfx_box.toml]
29 changes: 29 additions & 0 deletions config/gpu_configs/gpu_aquaplanet_chap_ws_2process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
job_id: gpu_aquaplanet_chap_ws_2process
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 42
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: equil
surface_setup: DefaultMoninObukhov
rad: allskywithclear
idealized_insolation: false
dt_rad: 1hours
dt_cloud_fraction: 1hours
turbconv: diagnostic_edmfx
implicit_diffusion: true
approximate_linear_solve_iters: 2
prognostic_tke: true
edmfx_upwinding: first_order
edmfx_entr_model: "Generalized"
edmfx_detr_model: "Generalized"
edmfx_nh_pressure: true
edmfx_sgs_mass_flux: true
edmfx_sgs_diffusive_flux: true
precip_model: 0M
dt: 50secs
t_end: 1days
toml: [toml/diagnostic_edmfx_box.toml]
29 changes: 29 additions & 0 deletions config/gpu_configs/gpu_aquaplanet_chap_ws_4process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
job_id: gpu_aquaplanet_chap_ws_4process
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 60
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: equil
surface_setup: DefaultMoninObukhov
rad: allskywithclear
idealized_insolation: false
dt_rad: 1hours
dt_cloud_fraction: 1hours
turbconv: diagnostic_edmfx
implicit_diffusion: true
approximate_linear_solve_iters: 2
prognostic_tke: true
edmfx_upwinding: first_order
edmfx_entr_model: "Generalized"
edmfx_detr_model: "Generalized"
edmfx_nh_pressure: true
edmfx_sgs_mass_flux: true
edmfx_sgs_diffusive_flux: true
precip_model: 0M
dt: 50secs
t_end: 1days
toml: [toml/diagnostic_edmfx_box.toml]
20 changes: 20 additions & 0 deletions config/gpu_configs/gpu_aquaplanet_dyamond_2process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 30
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: "equil"
precip_model: "0M"
rad: "allskywithclear"
idealized_insolation: false
dt_rad: "1hours"
vert_diff: "true"
surface_setup: "DefaultMoninObukhov"
rayleigh_sponge: true
dt: "50secs"
t_end: "12hours"
job_id: "gpu_aquaplanet_dyamond_2process"
toml: [toml/longrun_aquaplanet_dyamond.toml]
20 changes: 20 additions & 0 deletions config/gpu_configs/gpu_aquaplanet_dyamond_4process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 30
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: "equil"
precip_model: "0M"
rad: "allskywithclear"
idealized_insolation: false
dt_rad: "1hours"
vert_diff: "true"
surface_setup: "DefaultMoninObukhov"
rayleigh_sponge: true
dt: "50secs"
t_end: "12hours"
job_id: "gpu_aquaplanet_dyamond_4process"
toml: [toml/longrun_aquaplanet_dyamond.toml]

0 comments on commit 41b6f79

Please sign in to comment.