From 2c4a70c08462937a066da144676819d962e585d2 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Mon, 16 Oct 2023 12:56:52 -0700 Subject: [PATCH 1/3] simplify benchmark --- .../gpu_hs_rhoe_equilmoist_nz63_0M_55km_rs35km.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/config/gpu_configs/gpu_hs_rhoe_equilmoist_nz63_0M_55km_rs35km.yml b/config/gpu_configs/gpu_hs_rhoe_equilmoist_nz63_0M_55km_rs35km.yml index 4a2eb8b36d3..cd51c1c0e7e 100644 --- a/config/gpu_configs/gpu_hs_rhoe_equilmoist_nz63_0M_55km_rs35km.yml +++ b/config/gpu_configs/gpu_hs_rhoe_equilmoist_nz63_0M_55km_rs35km.yml @@ -1,12 +1,12 @@ dt_save_to_disk: "10days" -dt: "150secs" -t_end: "300days" -h_elem: 16 +dt: "100secs" +t_end: "1days" +h_elem: 30 z_elem: 63 dz_bottom: 30.0 dz_top: 3000.0 z_max: 55000.0 -kappa_4: 2.0e16 +kappa_4: 1.0e15 vert_diff: "true" moist: "equil" precip_model: "0M" From 3f05a5c3953670d22c181280aa17c4957a19e5c5 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Mon, 16 Oct 2023 14:43:59 -0700 Subject: [PATCH 2/3] remove soft_fail, clean up pipelines --- .buildkite/gpu_pipeline/pipeline.yml | 2 -- .buildkite/pipeline.yml | 12 +++++------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/.buildkite/gpu_pipeline/pipeline.yml b/.buildkite/gpu_pipeline/pipeline.yml index ffc44d31cff..a4993c19606 100644 --- a/.buildkite/gpu_pipeline/pipeline.yml +++ b/.buildkite/gpu_pipeline/pipeline.yml @@ -61,7 +61,6 @@ steps: artifact_paths: "gpu_aquaplanet_dyamond/*" agents: slurm_gpus: 1 - slurm_time: 23:00:00 - label: "moist Held-Suarez" key: "gpu_hs_rhoe_equilmoist_nz63_0M_55km_rs35km" @@ -74,7 +73,6 @@ steps: artifact_paths: "gpu_hs_rhoe_equilmoist_nz63_0M_55km_rs35km/*" agents: slurm_gpus: 1 - slurm_time: 23:00:00 - label: "dry baroclinic wave - 4 gpus" key: "target_gpu_implicit_baroclinic_wave_4process" diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index f29c745e852..98bca456ec7 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -942,21 +942,19 @@ steps: slurm_mem: 32G - label: "GPU: GPU moist Held-Suarez" - command: > - julia --project -e 'using CUDA; CUDA.versioninfo()' - - julia --color=yes --project=examples examples/hybrid/driver.jl - --config_file $GPU_CONFIG_PATH/central_gpu_hs_rhoe_equilmoist_nz63_0M_55km_rs35km.yml + command: + - julia --project -e 'using CUDA; CUDA.versioninfo()' + - > + julia --color=yes --project=examples examples/hybrid/driver.jl + --config_file $GPU_CONFIG_PATH/central_gpu_hs_rhoe_equilmoist_nz63_0M_55km_rs35km.yml artifact_paths: "central_gpu_hs_rhoe_equilmoist_nz63_0M_55km_rs35km/*" agents: slurm_gpus: 1 - soft_fail: true - label: "GPU: gpu_aquaplanet_dyamond" command: - mkdir -p gpu_aquaplanet_dyamond - > - nsys profile --trace=nvtx,cuda --output=gpu_aquaplanet_dyamond/report julia --color=yes --project=examples examples/hybrid/driver.jl --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond.yml artifact_paths: "gpu_aquaplanet_dyamond/*" From 3cd508088b950235120e77379a0bb8ec59e76da0 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Mon, 16 Oct 2023 14:50:34 -0700 Subject: [PATCH 3/3] reduce length of run, increase diagnostic freq --- .../central_gpu_hs_rhoe_equilmoist_nz63_0M_55km_rs35km.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/gpu_configs/central_gpu_hs_rhoe_equilmoist_nz63_0M_55km_rs35km.yml b/config/gpu_configs/central_gpu_hs_rhoe_equilmoist_nz63_0M_55km_rs35km.yml index af11c4ca086..6235ca8979a 100644 --- a/config/gpu_configs/central_gpu_hs_rhoe_equilmoist_nz63_0M_55km_rs35km.yml +++ b/config/gpu_configs/central_gpu_hs_rhoe_equilmoist_nz63_0M_55km_rs35km.yml @@ -1,6 +1,6 @@ -dt_save_to_disk: "10days" +dt_save_to_disk: "1days" dt: "150secs" -t_end: "300days" +t_end: "2days" h_elem: 16 z_elem: 63 dz_bottom: 30.0