diff --git a/.buildkite/gpu_pipeline/pipeline.yml b/.buildkite/gpu_pipeline/pipeline.yml index aa22c63646..5d4d4ae175 100644 --- a/.buildkite/gpu_pipeline/pipeline.yml +++ b/.buildkite/gpu_pipeline/pipeline.yml @@ -103,63 +103,64 @@ steps: slurm_ntasks: 4 slurm_exclusive: - - group: "CHAP GPU strong scaling" + - group: "DYAMOND GPU strong scaling" steps: - - label: "gpu_aquaplanet_chap - strong scaling - 1 GPU" + - label: "gpu_aquaplanet_dyamond - 1 GPU" command: - - mkdir -p gpu_aquaplanet_chap + - mkdir -p gpu_aquaplanet_dyamond - > + nsys profile --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_dyamond/report julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl - --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap.yml - artifact_paths: "gpu_aquaplanet_chap/*" + --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond.yml + artifact_paths: "gpu_aquaplanet_dyamond/*" agents: - slurm_gpus_per_task: 1 + slurm_gpus: 1 slurm_cpus_per_task: 4 - slurm_ntasks: 1 - slurm_mem: 32G slurm_exclusive: - - label: "gpu_aquaplanet_chap - strong scaling - 2 GPUs" + - label: "gpu_aquaplanet_dyamond - 2 GPUs" command: - - mkdir -p gpu_aquaplanet_chap_2process + - mkdir -p gpu_aquaplanet_dyamond_2process - > srun --cpu-bind=threads --cpus-per-task=4 julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl - --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_2process.yml - artifact_paths: "gpu_aquaplanet_chap_2process/*" + --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_2process.yml + artifact_paths: "gpu_aquaplanet_dyamond_2process/*" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 slurm_ntasks: 2 slurm_mem: 32G + slurm_time: 8:00:00 slurm_exclusive: - - label: "gpu_aquaplanet_chap - strong scaling - 4 GPUs" + - label: "gpu_aquaplanet_dyamond - 4 GPUs" command: - - mkdir -p gpu_aquaplanet_chap_4process + - mkdir -p gpu_aquaplanet_dyamond_4process - > srun --cpu-bind=threads --cpus-per-task=4 julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl - --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_4process.yml - artifact_paths: "gpu_aquaplanet_chap_4process/*" + --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_4process.yml + artifact_paths: "gpu_aquaplanet_dyamond_4process/*" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 slurm_ntasks: 4 slurm_mem: 32G + slurm_time: 8:00:00 slurm_exclusive: - - group: "CHAP GPU weak scaling" + - group: "DYAMOND GPU weak scaling" steps: - - label: "gpu_aquaplanet_chap - weak scaling - 1 GPU" + - label: "gpu_aquaplanet_dyamond - weak scaling - 1 GPU" command: - - mkdir -p gpu_aquaplanet_chap_ws_1process + - mkdir -p gpu_aquaplanet_dyamond_ws_1process - > julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl - --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_ws_1process.yml - artifact_paths: "gpu_aquaplanet_chap_ws_1process/*" + --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_1process.yml + artifact_paths: "gpu_aquaplanet_dyamond_ws_1process/*" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 @@ -167,14 +168,14 @@ steps: slurm_mem: 32G slurm_exclusive: - - label: "gpu_aquaplanet_chap - weak scaling - 2 GPUs" + - label: "gpu_aquaplanet_dyamond - weak scaling - 2 GPUs" command: - - mkdir -p gpu_aquaplanet_chap_ws_2process + - mkdir -p gpu_aquaplanet_dyamond_ws_2process - > srun --cpu-bind=threads --cpus-per-task=4 julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl - --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_ws_2process.yml - artifact_paths: "gpu_aquaplanet_chap_ws_2process/*" + --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_2process.yml + artifact_paths: "gpu_aquaplanet_dyamond_ws_2process/*" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 @@ -183,14 +184,14 @@ steps: slurm_time: 8:00:00 slurm_exclusive: - - label: "gpu_aquaplanet_chap - weak scaling - 4 GPUs" + - label: "gpu_aquaplanet_dyamond - weak scaling - 4 GPUs" command: - - mkdir -p gpu_aquaplanet_chap_ws_4process + - mkdir -p gpu_aquaplanet_dyamond_ws_4process - > srun --cpu-bind=threads --cpus-per-task=4 julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl - --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_chap_ws_4process.yml - artifact_paths: "gpu_aquaplanet_chap_ws_4process/*" + --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_4process.yml + artifact_paths: "gpu_aquaplanet_dyamond_ws_4process/*" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 @@ -199,50 +200,19 @@ steps: slurm_time: 8:00:00 slurm_exclusive: - - group: "DYAMOND GPU strong scaling" + - group: "Diagnostic EDMF GPU" steps: - - label: "gpu_aquaplanet_dyamond - 1 GPU" + - label: "gpu_aquaplanet_chap - 1 GPU" command: - - mkdir -p gpu_aquaplanet_dyamond - - > - nsys profile --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_dyamond/report - julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl - --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond.yml - artifact_paths: "gpu_aquaplanet_dyamond/*" - agents: - slurm_gpus: 1 - slurm_cpus_per_task: 4 - slurm_exclusive: - - - label: "gpu_aquaplanet_dyamond - 2 GPUs" - command: - - mkdir -p gpu_aquaplanet_dyamond_2process - - > - srun --cpu-bind=threads --cpus-per-task=4 - julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl - --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_2process.yml - artifact_paths: "gpu_aquaplanet_dyamond_2process/*" - agents: - slurm_gpus_per_task: 1 - slurm_cpus_per_task: 4 - slurm_ntasks: 2 - slurm_mem: 32G - slurm_time: 8:00:00 - slurm_exclusive: - - - label: "gpu_aquaplanet_dyamond - 4 GPUs" - command: - - mkdir -p gpu_aquaplanet_dyamond_4process + - mkdir -p gpu_aquaplanet_chap - > - srun --cpu-bind=threads --cpus-per-task=4 julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl - --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_4process.yml - artifact_paths: "gpu_aquaplanet_dyamond_4process/*" + --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_diagedmf.yml + artifact_paths: "gpu_aquaplanet_diagedmf/*" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 - slurm_ntasks: 4 + slurm_ntasks: 1 slurm_mem: 32G - slurm_time: 8:00:00 - slurm_exclusive: + slurm_exclusive: \ No newline at end of file diff --git a/config/gpu_configs/gpu_aquaplanet_chap_2process.yml b/config/gpu_configs/gpu_aquaplanet_chap_2process.yml deleted file mode 100644 index f9b649d7f0..0000000000 --- a/config/gpu_configs/gpu_aquaplanet_chap_2process.yml +++ /dev/null @@ -1,29 +0,0 @@ -job_id: gpu_aquaplanet_chap_2process -dt_save_state_to_disk: "Inf" -dt_save_to_sol: "Inf" -output_default_diagnostics: false -h_elem: 30 -z_max: 55000.0 -z_elem: 63 -dz_bottom: 30.0 -dz_top: 3000.0 -moist: equil -surface_setup: DefaultMoninObukhov -rad: allskywithclear -idealized_insolation: false -dt_rad: 1hours -dt_cloud_fraction: 1hours -turbconv: diagnostic_edmfx -implicit_diffusion: true -approximate_linear_solve_iters: 2 -prognostic_tke: true -edmfx_upwinding: first_order -edmfx_entr_model: "Generalized" -edmfx_detr_model: "Generalized" -edmfx_nh_pressure: true -edmfx_sgs_mass_flux: true -edmfx_sgs_diffusive_flux: true -precip_model: 0M -dt: 100secs -t_end: 1days -toml: [toml/diagnostic_edmfx_box.toml] diff --git a/config/gpu_configs/gpu_aquaplanet_chap_4process.yml b/config/gpu_configs/gpu_aquaplanet_chap_4process.yml deleted file mode 100644 index 0faddbf60f..0000000000 --- a/config/gpu_configs/gpu_aquaplanet_chap_4process.yml +++ /dev/null @@ -1,29 +0,0 @@ -job_id: gpu_aquaplanet_chap_4process -dt_save_state_to_disk: "Inf" -dt_save_to_sol: "Inf" -output_default_diagnostics: false -h_elem: 30 -z_max: 55000.0 -z_elem: 63 -dz_bottom: 30.0 -dz_top: 3000.0 -moist: equil -surface_setup: DefaultMoninObukhov -rad: allskywithclear -idealized_insolation: false -dt_rad: 1hours -dt_cloud_fraction: 1hours -turbconv: diagnostic_edmfx -implicit_diffusion: true -approximate_linear_solve_iters: 2 -prognostic_tke: true -edmfx_upwinding: first_order -edmfx_entr_model: "Generalized" -edmfx_detr_model: "Generalized" -edmfx_nh_pressure: true -edmfx_sgs_mass_flux: true -edmfx_sgs_diffusive_flux: true -precip_model: 0M -dt: 100secs -t_end: 1days -toml: [toml/diagnostic_edmfx_box.toml] diff --git a/config/gpu_configs/gpu_aquaplanet_chap_ws_1process.yml b/config/gpu_configs/gpu_aquaplanet_chap_ws_1process.yml deleted file mode 100644 index 06f70b2277..0000000000 --- a/config/gpu_configs/gpu_aquaplanet_chap_ws_1process.yml +++ /dev/null @@ -1,29 +0,0 @@ -job_id: gpu_aquaplanet_chap_ws_1process -dt_save_state_to_disk: "Inf" -dt_save_to_sol: "Inf" -output_default_diagnostics: false -h_elem: 30 -z_max: 55000.0 -z_elem: 63 -dz_bottom: 30.0 -dz_top: 3000.0 -moist: equil -surface_setup: DefaultMoninObukhov -rad: allskywithclear -idealized_insolation: false -dt_rad: 1hours -dt_cloud_fraction: 1hours -turbconv: diagnostic_edmfx -implicit_diffusion: true -approximate_linear_solve_iters: 2 -prognostic_tke: true -edmfx_upwinding: first_order -edmfx_entr_model: "Generalized" -edmfx_detr_model: "Generalized" -edmfx_nh_pressure: true -edmfx_sgs_mass_flux: true -edmfx_sgs_diffusive_flux: true -precip_model: 0M -dt: 50secs -t_end: 1days -toml: [toml/diagnostic_edmfx_box.toml] diff --git a/config/gpu_configs/gpu_aquaplanet_chap_ws_2process.yml b/config/gpu_configs/gpu_aquaplanet_chap_ws_2process.yml deleted file mode 100644 index 49f50ce316..0000000000 --- a/config/gpu_configs/gpu_aquaplanet_chap_ws_2process.yml +++ /dev/null @@ -1,29 +0,0 @@ -job_id: gpu_aquaplanet_chap_ws_2process -dt_save_state_to_disk: "Inf" -dt_save_to_sol: "Inf" -output_default_diagnostics: false -h_elem: 42 -z_max: 55000.0 -z_elem: 63 -dz_bottom: 30.0 -dz_top: 3000.0 -moist: equil -surface_setup: DefaultMoninObukhov -rad: allskywithclear -idealized_insolation: false -dt_rad: 1hours -dt_cloud_fraction: 1hours -turbconv: diagnostic_edmfx -implicit_diffusion: true -approximate_linear_solve_iters: 2 -prognostic_tke: true -edmfx_upwinding: first_order -edmfx_entr_model: "Generalized" -edmfx_detr_model: "Generalized" -edmfx_nh_pressure: true -edmfx_sgs_mass_flux: true -edmfx_sgs_diffusive_flux: true -precip_model: 0M -dt: 50secs -t_end: 1days -toml: [toml/diagnostic_edmfx_box.toml] diff --git a/config/gpu_configs/gpu_aquaplanet_chap_ws_4process.yml b/config/gpu_configs/gpu_aquaplanet_chap_ws_4process.yml deleted file mode 100644 index fd666d4c08..0000000000 --- a/config/gpu_configs/gpu_aquaplanet_chap_ws_4process.yml +++ /dev/null @@ -1,29 +0,0 @@ -job_id: gpu_aquaplanet_chap_ws_4process -dt_save_state_to_disk: "Inf" -dt_save_to_sol: "Inf" -output_default_diagnostics: false -h_elem: 60 -z_max: 55000.0 -z_elem: 63 -dz_bottom: 30.0 -dz_top: 3000.0 -moist: equil -surface_setup: DefaultMoninObukhov -rad: allskywithclear -idealized_insolation: false -dt_rad: 1hours -dt_cloud_fraction: 1hours -turbconv: diagnostic_edmfx -implicit_diffusion: true -approximate_linear_solve_iters: 2 -prognostic_tke: true -edmfx_upwinding: first_order -edmfx_entr_model: "Generalized" -edmfx_detr_model: "Generalized" -edmfx_nh_pressure: true -edmfx_sgs_mass_flux: true -edmfx_sgs_diffusive_flux: true -precip_model: 0M -dt: 50secs -t_end: 1days -toml: [toml/diagnostic_edmfx_box.toml] diff --git a/config/gpu_configs/gpu_aquaplanet_chap.yml b/config/gpu_configs/gpu_aquaplanet_diagedmf.yml similarity index 97% rename from config/gpu_configs/gpu_aquaplanet_chap.yml rename to config/gpu_configs/gpu_aquaplanet_diagedmf.yml index 6cb4218066..145c1cf0dc 100644 --- a/config/gpu_configs/gpu_aquaplanet_chap.yml +++ b/config/gpu_configs/gpu_aquaplanet_diagedmf.yml @@ -25,5 +25,5 @@ edmfx_sgs_mass_flux: true edmfx_sgs_diffusive_flux: true precip_model: 0M dt: 100secs -t_end: 1days +t_end: 12hours toml: [toml/diagnostic_edmfx_box.toml] diff --git a/config/gpu_configs/gpu_aquaplanet_dyamond.yml b/config/gpu_configs/gpu_aquaplanet_dyamond.yml index 468481d899..fe8684e826 100644 --- a/config/gpu_configs/gpu_aquaplanet_dyamond.yml +++ b/config/gpu_configs/gpu_aquaplanet_dyamond.yml @@ -11,7 +11,10 @@ precip_model: "0M" rad: "allskywithclear" idealized_insolation: false dt_rad: "1hours" -vert_diff: "true" +dt_cloud_fraction: "1hours" +vert_diff: "FriersonDiffusion" +implicit_diffusion: true +approximate_linear_solve_iters: 2 surface_setup: "DefaultMoninObukhov" rayleigh_sponge: true dt: "100secs" diff --git a/config/gpu_configs/gpu_aquaplanet_dyamond_2process.yml b/config/gpu_configs/gpu_aquaplanet_dyamond_2process.yml index e754ff1a7e..a7b6dd41e7 100644 --- a/config/gpu_configs/gpu_aquaplanet_dyamond_2process.yml +++ b/config/gpu_configs/gpu_aquaplanet_dyamond_2process.yml @@ -11,7 +11,10 @@ precip_model: "0M" rad: "allskywithclear" idealized_insolation: false dt_rad: "1hours" -vert_diff: "true" +dt_cloud_fraction: "1hours" +vert_diff: "FriersonDiffusion" +implicit_diffusion: true +approximate_linear_solve_iters: 2 surface_setup: "DefaultMoninObukhov" rayleigh_sponge: true dt: "100secs" diff --git a/config/gpu_configs/gpu_aquaplanet_dyamond_4process.yml b/config/gpu_configs/gpu_aquaplanet_dyamond_4process.yml index bc554a83b2..7329f3437e 100644 --- a/config/gpu_configs/gpu_aquaplanet_dyamond_4process.yml +++ b/config/gpu_configs/gpu_aquaplanet_dyamond_4process.yml @@ -11,7 +11,10 @@ precip_model: "0M" rad: "allskywithclear" idealized_insolation: false dt_rad: "1hours" -vert_diff: "true" +dt_cloud_fraction: "1hours" +vert_diff: "FriersonDiffusion" +implicit_diffusion: true +approximate_linear_solve_iters: 2 surface_setup: "DefaultMoninObukhov" rayleigh_sponge: true dt: "100secs" diff --git a/config/gpu_configs/gpu_aquaplanet_dyamond_ws_1process.yml b/config/gpu_configs/gpu_aquaplanet_dyamond_ws_1process.yml new file mode 100644 index 0000000000..d71238e704 --- /dev/null +++ b/config/gpu_configs/gpu_aquaplanet_dyamond_ws_1process.yml @@ -0,0 +1,23 @@ +dt_save_state_to_disk: "Inf" +dt_save_to_sol: "Inf" +output_default_diagnostics: false +h_elem: 30 +z_max: 55000.0 +z_elem: 63 +dz_bottom: 30.0 +dz_top: 3000.0 +moist: "equil" +precip_model: "0M" +rad: "allskywithclear" +idealized_insolation: false +dt_rad: "1hours" +dt_cloud_fraction: "1hours" +vert_diff: "FriersonDiffusion" +implicit_diffusion: true +approximate_linear_solve_iters: 2 +surface_setup: "DefaultMoninObukhov" +rayleigh_sponge: true +dt: "100secs" +t_end: "12hours" +job_id: "gpu_aquaplanet_dyamond_ws_1process" +toml: [toml/longrun_aquaplanet_dyamond.toml] diff --git a/config/gpu_configs/gpu_aquaplanet_dyamond_ws_2process.yml b/config/gpu_configs/gpu_aquaplanet_dyamond_ws_2process.yml new file mode 100644 index 0000000000..f134ebee27 --- /dev/null +++ b/config/gpu_configs/gpu_aquaplanet_dyamond_ws_2process.yml @@ -0,0 +1,23 @@ +dt_save_state_to_disk: "Inf" +dt_save_to_sol: "Inf" +output_default_diagnostics: false +h_elem: 42 +z_max: 55000.0 +z_elem: 63 +dz_bottom: 30.0 +dz_top: 3000.0 +moist: "equil" +precip_model: "0M" +rad: "allskywithclear" +idealized_insolation: false +dt_rad: "1hours" +dt_cloud_fraction: "1hours" +vert_diff: "FriersonDiffusion" +implicit_diffusion: true +approximate_linear_solve_iters: 2 +surface_setup: "DefaultMoninObukhov" +rayleigh_sponge: true +dt: "100secs" +t_end: "12hours" +job_id: "gpu_aquaplanet_dyamond_ws_2process" +toml: [toml/longrun_aquaplanet_dyamond.toml] diff --git a/config/gpu_configs/gpu_aquaplanet_dyamond_ws_4process.yml b/config/gpu_configs/gpu_aquaplanet_dyamond_ws_4process.yml new file mode 100644 index 0000000000..0bc52f263b --- /dev/null +++ b/config/gpu_configs/gpu_aquaplanet_dyamond_ws_4process.yml @@ -0,0 +1,23 @@ +dt_save_state_to_disk: "Inf" +dt_save_to_sol: "Inf" +output_default_diagnostics: false +h_elem: 60 +z_max: 55000.0 +z_elem: 63 +dz_bottom: 30.0 +dz_top: 3000.0 +moist: "equil" +precip_model: "0M" +rad: "allskywithclear" +idealized_insolation: false +dt_rad: "1hours" +dt_cloud_fraction: "1hours" +vert_diff: "FriersonDiffusion" +implicit_diffusion: true +approximate_linear_solve_iters: 2 +surface_setup: "DefaultMoninObukhov" +rayleigh_sponge: true +dt: "100secs" +t_end: "12hours" +job_id: "gpu_aquaplanet_dyamond_ws_4process" +toml: [toml/longrun_aquaplanet_dyamond.toml]