Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update resolution for gpu_aquaplanet_chap #2724

Merged
merged 1 commit into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 146 additions & 24 deletions .buildkite/gpu_pipeline/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,30 +55,7 @@ steps:
agents:
slurm_gpus: 1
slurm_cpus_per_task: 4

- label: "gpu_aquaplanet_dyamond"
command:
- mkdir -p gpu_aquaplanet_dyamond
- >
nsys profile --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_dyamond/report
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond.yml
artifact_paths: "gpu_aquaplanet_dyamond/*"
agents:
slurm_gpus: 1
slurm_cpus_per_task: 4

- label: "gpu_aquaplanet_chap"
command:
- mkdir -p gpu_aquaplanet_chap
- >
nsys profile --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_chap/report
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap.yml
artifact_paths: "gpu_aquaplanet_chap/*"
agents:
slurm_gpus: 1
slurm_cpus_per_task: 4
slurm_exclusive:

- label: "moist Held-Suarez"
key: "gpu_hs_rhoe_equil_55km_nz63_0M"
Expand All @@ -92,6 +69,7 @@ steps:
agents:
slurm_gpus: 1
slurm_cpus_per_task: 4
slurm_exclusive:

- label: "moist Held-Suarez - 4 gpus"
key: "gpu_hs_rhoe_equil_55km_nz63_0M_4process"
Expand All @@ -107,6 +85,7 @@ steps:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 4
slurm_exclusive:

- label: "dry baroclinic wave - 4 gpus"
key: "target_gpu_implicit_baroclinic_wave_4process"
Expand All @@ -122,3 +101,146 @@ steps:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 4
slurm_exclusive:

- group: "CHAP GPU strong scaling"
steps:

- label: "gpu_aquaplanet_chap - strong scaling - 1 GPU"
command:
- mkdir -p gpu_aquaplanet_chap
- >
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap.yml
artifact_paths: "gpu_aquaplanet_chap/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 1
slurm_mem: 32G
slurm_exclusive:

- label: "gpu_aquaplanet_chap - strong scaling - 2 GPUs"
command:
- mkdir -p gpu_aquaplanet_chap_2process
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_2process.yml
artifact_paths: "gpu_aquaplanet_chap_2process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 2
slurm_mem: 32G
slurm_exclusive:

- label: "gpu_aquaplanet_chap - strong scaling - 4 GPUs"
command:
- mkdir -p gpu_aquaplanet_chap_4process
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_4process.yml
artifact_paths: "gpu_aquaplanet_chap_4process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 4
slurm_mem: 32G
slurm_exclusive:

- group: "CHAP GPU weak scaling"
steps:

- label: "gpu_aquaplanet_chap - weak scaling - 1 GPU"
command:
- mkdir -p gpu_aquaplanet_chap_ws_1process
- >
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_ws_1process.yml
artifact_paths: "gpu_aquaplanet_chap_ws_1process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 1
slurm_mem: 32G
slurm_exclusive:

- label: "gpu_aquaplanet_chap - weak scaling - 2 GPUs"
command:
- mkdir -p gpu_aquaplanet_chap_ws_2process
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_ws_2process.yml
artifact_paths: "gpu_aquaplanet_chap_ws_2process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 2
slurm_mem: 32G
slurm_time: 8:00:00
slurm_exclusive:

- label: "gpu_aquaplanet_chap - weak scaling - 4 GPUs"
command:
- mkdir -p gpu_aquaplanet_chap_ws_4process
- >
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_ws_4process.yml
artifact_paths: "gpu_aquaplanet_chap_ws_4process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 4
slurm_mem: 32G
slurm_time: 8:00:00
slurm_exclusive:

- group: "DYAMOND GPU strong scaling"
steps:

- label: "gpu_aquaplanet_dyamond - 1 GPU"
command:
- mkdir -p gpu_aquaplanet_dyamond
- >
nsys profile --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_dyamond/report
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond.yml
artifact_paths: "gpu_aquaplanet_dyamond/*"
agents:
slurm_gpus: 1
slurm_cpus_per_task: 4
slurm_exclusive:

- label: "gpu_aquaplanet_dyamond - 2 GPUs"
command:
- mkdir -p gpu_aquaplanet_dyamond_2process
- >
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_2process.yml
artifact_paths: "gpu_aquaplanet_dyamond_2process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 2
slurm_mem: 32G
slurm_time: 8:00:00
slurm_exclusive:

- label: "gpu_aquaplanet_dyamond - 4 GPUs"
command:
- mkdir -p gpu_aquaplanet_dyamond_4process
- >
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_4process.yml
artifact_paths: "gpu_aquaplanet_dyamond_4process/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 4
slurm_mem: 32G
slurm_time: 8:00:00
slurm_exclusive:
2 changes: 1 addition & 1 deletion config/gpu_configs/gpu_aquaplanet_chap.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ job_id: gpu_aquaplanet_chap
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 16
h_elem: 30
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
Expand Down
29 changes: 29 additions & 0 deletions config/gpu_configs/gpu_aquaplanet_chap_2process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
job_id: gpu_aquaplanet_chap_2process
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 30
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: equil
surface_setup: DefaultMoninObukhov
rad: allskywithclear
idealized_insolation: false
dt_rad: 1hours
dt_cloud_fraction: 1hours
turbconv: diagnostic_edmfx
implicit_diffusion: true
approximate_linear_solve_iters: 2
prognostic_tke: true
edmfx_upwinding: first_order
edmfx_entr_model: "Generalized"
edmfx_detr_model: "Generalized"
edmfx_nh_pressure: true
edmfx_sgs_mass_flux: true
edmfx_sgs_diffusive_flux: true
precip_model: 0M
dt: 100secs
t_end: 1days
toml: [toml/diagnostic_edmfx_box.toml]
29 changes: 29 additions & 0 deletions config/gpu_configs/gpu_aquaplanet_chap_4process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
job_id: gpu_aquaplanet_chap_4process
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 30
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: equil
surface_setup: DefaultMoninObukhov
rad: allskywithclear
idealized_insolation: false
dt_rad: 1hours
dt_cloud_fraction: 1hours
turbconv: diagnostic_edmfx
implicit_diffusion: true
approximate_linear_solve_iters: 2
prognostic_tke: true
edmfx_upwinding: first_order
edmfx_entr_model: "Generalized"
edmfx_detr_model: "Generalized"
edmfx_nh_pressure: true
edmfx_sgs_mass_flux: true
edmfx_sgs_diffusive_flux: true
precip_model: 0M
dt: 100secs
t_end: 1days
toml: [toml/diagnostic_edmfx_box.toml]
29 changes: 29 additions & 0 deletions config/gpu_configs/gpu_aquaplanet_chap_ws_1process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
job_id: gpu_aquaplanet_chap_ws_1process
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 30
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: equil
surface_setup: DefaultMoninObukhov
rad: allskywithclear
idealized_insolation: false
dt_rad: 1hours
dt_cloud_fraction: 1hours
turbconv: diagnostic_edmfx
implicit_diffusion: true
approximate_linear_solve_iters: 2
prognostic_tke: true
edmfx_upwinding: first_order
edmfx_entr_model: "Generalized"
edmfx_detr_model: "Generalized"
edmfx_nh_pressure: true
edmfx_sgs_mass_flux: true
edmfx_sgs_diffusive_flux: true
precip_model: 0M
dt: 50secs
t_end: 1days
toml: [toml/diagnostic_edmfx_box.toml]
29 changes: 29 additions & 0 deletions config/gpu_configs/gpu_aquaplanet_chap_ws_2process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
job_id: gpu_aquaplanet_chap_ws_2process
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 42
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: equil
surface_setup: DefaultMoninObukhov
rad: allskywithclear
idealized_insolation: false
dt_rad: 1hours
dt_cloud_fraction: 1hours
turbconv: diagnostic_edmfx
implicit_diffusion: true
approximate_linear_solve_iters: 2
prognostic_tke: true
edmfx_upwinding: first_order
edmfx_entr_model: "Generalized"
edmfx_detr_model: "Generalized"
edmfx_nh_pressure: true
edmfx_sgs_mass_flux: true
edmfx_sgs_diffusive_flux: true
precip_model: 0M
dt: 50secs
t_end: 1days
toml: [toml/diagnostic_edmfx_box.toml]
29 changes: 29 additions & 0 deletions config/gpu_configs/gpu_aquaplanet_chap_ws_4process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
job_id: gpu_aquaplanet_chap_ws_4process
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 60
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: equil
surface_setup: DefaultMoninObukhov
rad: allskywithclear
idealized_insolation: false
dt_rad: 1hours
dt_cloud_fraction: 1hours
turbconv: diagnostic_edmfx
implicit_diffusion: true
approximate_linear_solve_iters: 2
prognostic_tke: true
edmfx_upwinding: first_order
edmfx_entr_model: "Generalized"
edmfx_detr_model: "Generalized"
edmfx_nh_pressure: true
edmfx_sgs_mass_flux: true
edmfx_sgs_diffusive_flux: true
precip_model: 0M
dt: 50secs
t_end: 1days
toml: [toml/diagnostic_edmfx_box.toml]
20 changes: 20 additions & 0 deletions config/gpu_configs/gpu_aquaplanet_dyamond_2process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 30
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: "equil"
precip_model: "0M"
rad: "allskywithclear"
idealized_insolation: false
dt_rad: "1hours"
vert_diff: "true"
surface_setup: "DefaultMoninObukhov"
rayleigh_sponge: true
dt: "50secs"
t_end: "12hours"
job_id: "gpu_aquaplanet_dyamond_2process"
toml: [toml/longrun_aquaplanet_dyamond.toml]
20 changes: 20 additions & 0 deletions config/gpu_configs/gpu_aquaplanet_dyamond_4process.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
dt_save_state_to_disk: "Inf"
dt_save_to_sol: "Inf"
output_default_diagnostics: false
h_elem: 30
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: "equil"
precip_model: "0M"
rad: "allskywithclear"
idealized_insolation: false
dt_rad: "1hours"
vert_diff: "true"
surface_setup: "DefaultMoninObukhov"
rayleigh_sponge: true
dt: "50secs"
t_end: "12hours"
job_id: "gpu_aquaplanet_dyamond_4process"
toml: [toml/longrun_aquaplanet_dyamond.toml]
Loading