Skip to content

Commit

Permalink
Merge branch 'main' into zs/cloud_fraction_callback
Browse files Browse the repository at this point in the history
  • Loading branch information
charleskawczynski committed Apr 3, 2024
2 parents 041a767 + 6fafae9 commit fcd66df
Show file tree
Hide file tree
Showing 72 changed files with 2,186 additions and 897 deletions.
221 changes: 110 additions & 111 deletions .buildkite/gpu_pipeline/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ agents:
modules: julia/1.10.0 cuda/julia-pref openmpi/4.1.5-mpitrampoline nsight-systems/2024.2.1

env:
JULIA_CUDA_MEMORY_POOL: none
JULIA_MPI_HAS_CUDA: "true"
JULIA_NVTX_CALLBACKS: gc
JULIA_MAX_NUM_PRECOMPILE_FILES: 100
Expand Down Expand Up @@ -51,7 +50,7 @@ steps:
# nsys profile --trace=nvtx,mpi,cuda,osrt --output=target_gpu_implicit_baroclinic_wave/report
# julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
# --config_file ${GPU_CONFIG_PATH}target_gpu_implicit_baroclinic_wave.yml
# artifact_paths: "target_gpu_implicit_baroclinic_wave/*"
# artifact_paths: "target_gpu_implicit_baroclinic_wave/output_active/*"
# agents:
# slurm_gpus: 1
# slurm_cpus_per_task: 4
Expand All @@ -65,7 +64,7 @@ steps:
nsys profile --trace=nvtx,mpi,cuda,osrt --output=gpu_hs_rhoe_equil_55km_nz63_0M/report
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_hs_rhoe_equil_55km_nz63_0M.yml
artifact_paths: "gpu_hs_rhoe_equil_55km_nz63_0M/*"
artifact_paths: "gpu_hs_rhoe_equil_55km_nz63_0M/output_active/*"
agents:
slurm_gpus: 1
slurm_cpus_per_task: 4
Expand All @@ -80,7 +79,7 @@ steps:
# nsys profile --trace=nvtx,mpi,cuda,osrt --output=gpu_hs_rhoe_equil_55km_nz63_0M_4process/report-%q{PMI_RANK}
# julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
# --config_file ${GPU_CONFIG_PATH}gpu_hs_rhoe_equil_55km_nz63_0M_4process.yml
# artifact_paths: "gpu_hs_rhoe_equil_55km_nz63_0M_4process/*"
# artifact_paths: "gpu_hs_rhoe_equil_55km_nz63_0M_4process/output_active/*"
# agents:
# slurm_gpus_per_task: 1
# slurm_cpus_per_task: 4
Expand All @@ -96,7 +95,7 @@ steps:
# nsys profile --trace=osrt,nvtx,cuda,mpi,ucx --output=target_gpu_implicit_baroclinic_wave_4process/report-%q{PMI_RANK}
# julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
# --config_file ${GPU_CONFIG_PATH}target_gpu_implicit_baroclinic_wave_4process.yml
# artifact_paths: "target_gpu_implicit_baroclinic_wave_4process/*"
# artifact_paths: "target_gpu_implicit_baroclinic_wave_4process/output_active/*"
# agents:
# slurm_gpus_per_task: 1
# slurm_cpus_per_task: 4
Expand All @@ -113,7 +112,7 @@ steps:
srun --cpu-bind=threads --cpus-per-task=4
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ss_1process.yml
artifact_paths: "gpu_aquaplanet_dyamond_ss_1process/*"
artifact_paths: "gpu_aquaplanet_dyamond_ss_1process/output_active/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
Expand All @@ -122,114 +121,114 @@ steps:
slurm_time: 8:00:00
slurm_exclusive:

# - label: "gpu_aquaplanet_dyamond - strong scaling - 2 GPUs"
# command:
# - mkdir -p gpu_aquaplanet_dyamond_ss_2process
# - >
# srun --cpu-bind=threads --cpus-per-task=4
# julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
# --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ss_2process.yml
# artifact_paths: "gpu_aquaplanet_dyamond_ss_2process/*"
# agents:
# slurm_gpus_per_task: 1
# slurm_cpus_per_task: 4
# slurm_ntasks: 2
# slurm_mem: 32G
# slurm_time: 8:00:00
# slurm_exclusive:

# - label: "gpu_aquaplanet_dyamond - strong scaling - 4 GPUs"
# command:
# - mkdir -p gpu_aquaplanet_dyamond_ss_4process
# - >
# srun --cpu-bind=threads --cpus-per-task=4
# julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
# --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ss_4process.yml
# artifact_paths: "gpu_aquaplanet_dyamond_ss_4process/*"
# agents:
# slurm_gpus_per_task: 1
# slurm_cpus_per_task: 4
# slurm_ntasks: 4
# slurm_mem: 32G
# slurm_time: 8:00:00
# slurm_exclusive:

# - wait

# - label: "gpu_aquaplanet_dyamond - strong scaling plots"
# command:
# - mkdir -p gpu_aquaplanet_dyamond_ss
# - >
# julia --color=yes --project=examples post_processing/plot_gpu_strong_scaling.jl gpu_aquaplanet_dyamond_ss
# artifact_paths: "gpu_aquaplanet_dyamond_ss/*"
# agents:
# slurm_cpus_per_task: 1
# slurm_ntasks: 1
# slurm_exclusive:
# - label: "gpu_aquaplanet_dyamond - strong scaling - 2 GPUs"
# command:
# - mkdir -p gpu_aquaplanet_dyamond_ss_2process
# - >
# srun --cpu-bind=threads --cpus-per-task=4
# julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
# --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ss_2process.yml
# artifact_paths: "gpu_aquaplanet_dyamond_ss_2process/output_active/*"
# agents:
# slurm_gpus_per_task: 1
# slurm_cpus_per_task: 4
# slurm_ntasks: 2
# slurm_mem: 32G
# slurm_time: 8:00:00
# slurm_exclusive:

# - label: "gpu_aquaplanet_dyamond - strong scaling - 4 GPUs"
# command:
# - mkdir -p gpu_aquaplanet_dyamond_ss_4process
# - >
# srun --cpu-bind=threads --cpus-per-task=4
# julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
# --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ss_4process.yml
# artifact_paths: "gpu_aquaplanet_dyamond_ss_4process/output_active/*"
# agents:
# slurm_gpus_per_task: 1
# slurm_cpus_per_task: 4
# slurm_ntasks: 4
# slurm_mem: 32G
# slurm_time: 8:00:00
# slurm_exclusive:

# - wait

# - label: "gpu_aquaplanet_dyamond - strong scaling plots"
# command:
# - mkdir -p gpu_aquaplanet_dyamond_ss
# - >
# julia --color=yes --project=examples post_processing/plot_gpu_strong_scaling.jl gpu_aquaplanet_dyamond_ss
# artifact_paths: "gpu_aquaplanet_dyamond_ss/output_active/*"
# agents:
# slurm_cpus_per_task: 1
# slurm_ntasks: 1
# slurm_exclusive:

# - group: "DYAMOND GPU weak scaling"
# steps:

# - label: "gpu_aquaplanet_dyamond - weak scaling - 1 GPU"
# command:
# - mkdir -p gpu_aquaplanet_dyamond_ws_1process
# - >
# srun --cpu-bind=threads --cpus-per-task=4
# julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
# --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_1process.yml
# artifact_paths: "gpu_aquaplanet_dyamond_ws_1process/*"
# agents:
# slurm_gpus_per_task: 1
# slurm_cpus_per_task: 4
# slurm_ntasks: 1
# slurm_mem: 32G
# slurm_time: 8:00:00
# slurm_exclusive:

# - label: "gpu_aquaplanet_dyamond - weak scaling - 2 GPUs"
# command:
# - mkdir -p gpu_aquaplanet_dyamond_ws_2process
# - >
# srun --cpu-bind=threads --cpus-per-task=4
# julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
# --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_2process.yml
# artifact_paths: "gpu_aquaplanet_dyamond_ws_2process/*"
# agents:
# slurm_gpus_per_task: 1
# slurm_cpus_per_task: 4
# slurm_ntasks: 2
# slurm_mem: 32G
# slurm_time: 8:00:00
# slurm_exclusive:

# - label: "gpu_aquaplanet_dyamond - weak scaling - 4 GPUs"
# command:
# - mkdir -p gpu_aquaplanet_dyamond_ws_4process
# - >
# srun --cpu-bind=threads --cpus-per-task=4
# julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
# --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_4process.yml
# artifact_paths: "gpu_aquaplanet_dyamond_ws_4process/*"
# agents:
# slurm_gpus_per_task: 1
# slurm_cpus_per_task: 4
# slurm_ntasks: 4
# slurm_mem: 32G
# slurm_time: 8:00:00
# slurm_exclusive:

# - wait

# - label: "gpu_aquaplanet_dyamond - weak scaling plots"
# command:
# - mkdir -p gpu_aquaplanet_dyamond_ws
# - >
# julia --color=yes --project=examples post_processing/plot_gpu_weak_scaling.jl gpu_aquaplanet_dyamond_ws
# artifact_paths: "gpu_aquaplanet_dyamond_ws/*"
# agents:
# slurm_cpus_per_task: 1
# slurm_ntasks: 1
# slurm_exclusive:
# - label: "gpu_aquaplanet_dyamond - weak scaling - 1 GPU"
# command:
# - mkdir -p gpu_aquaplanet_dyamond_ws_1process
# - >
# srun --cpu-bind=threads --cpus-per-task=4
# julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
# --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_1process.yml
# artifact_paths: "gpu_aquaplanet_dyamond_ws_1process/output_active/*"
# agents:
# slurm_gpus_per_task: 1
# slurm_cpus_per_task: 4
# slurm_ntasks: 1
# slurm_mem: 32G
# slurm_time: 8:00:00
# slurm_exclusive:

# - label: "gpu_aquaplanet_dyamond - weak scaling - 2 GPUs"
# command:
# - mkdir -p gpu_aquaplanet_dyamond_ws_2process
# - >
# srun --cpu-bind=threads --cpus-per-task=4
# julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
# --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_2process.yml
# artifact_paths: "gpu_aquaplanet_dyamond_ws_2process/output_active/*"
# agents:
# slurm_gpus_per_task: 1
# slurm_cpus_per_task: 4
# slurm_ntasks: 2
# slurm_mem: 32G
# slurm_time: 8:00:00
# slurm_exclusive:

# - label: "gpu_aquaplanet_dyamond - weak scaling - 4 GPUs"
# command:
# - mkdir -p gpu_aquaplanet_dyamond_ws_4process
# - >
# srun --cpu-bind=threads --cpus-per-task=4
# julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
# --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_4process.yml
# artifact_paths: "gpu_aquaplanet_dyamond_ws_4process/output_active/*"
# agents:
# slurm_gpus_per_task: 1
# slurm_cpus_per_task: 4
# slurm_ntasks: 4
# slurm_mem: 32G
# slurm_time: 8:00:00
# slurm_exclusive:

# - wait

# - label: "gpu_aquaplanet_dyamond - weak scaling plots"
# command:
# - mkdir -p gpu_aquaplanet_dyamond_ws
# - >
# julia --color=yes --project=examples post_processing/plot_gpu_weak_scaling.jl gpu_aquaplanet_dyamond_ws
# artifact_paths: "gpu_aquaplanet_dyamond_ws/output_active/*"
# agents:
# slurm_cpus_per_task: 1
# slurm_ntasks: 1
# slurm_exclusive:



Expand All @@ -243,7 +242,7 @@ steps:
nsys profile --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_diagedmf/report
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_diagedmf.yml
artifact_paths: "gpu_aquaplanet_diagedmf/*"
artifact_paths: "gpu_aquaplanet_diagedmf/output_active/*"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
Expand Down
19 changes: 16 additions & 3 deletions .buildkite/longruns/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ steps:
# - label: ":computer: lim ARS zalesak baroclinic wave (ρe_tot) equilmoist high resolution"
# command:
# - srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml
# artifact_paths: "$$JOB_NAME/*"
# artifact_paths: "$$JOB_NAME/output_active/*"
# agents:
# slurm_ntasks: 32
# slurm_mem_per_cpu: 32GB
Expand All @@ -58,12 +58,25 @@ steps:
- label: ":computer: SSP baroclinic wave (ρe_tot) equilmoist high resolution centered diff"
command:
- "srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml"
artifact_paths: "$$JOB_NAME/*"
artifact_paths: "$$JOB_NAME/output_active/*"
agents:
slurm_ntasks: 32
slurm_nodes: 2
slurm_time: 24:00:00
env:
JOB_NAME: "longrun_ssp_bw_rhoe_equil_highres"

- label: ":computer: aquaplanet equilmoist clearsky radiation + prognostic edmf diffusion only + 0M microphysics"
command:
- srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml
artifact_paths: "$$JOB_NAME/*"
agents:
slurm_ntasks: 64
slurm_nodes: 4
slurm_mem_per_cpu: 16GB
slurm_time: 24:00:00
env:
JOB_NAME: "longrun_aquaplanet_rhoe_equil_55km_nz63_clearsky_progedmf_diffonly_0M"

- group: "Low resolution long runs"

Expand All @@ -72,7 +85,7 @@ steps:
- label: ":computer: low resolution aquaplanet equilmoist clearsky radiation + time-varying insolation + slab ocean"
command:
- srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml
artifact_paths: "$$JOB_NAME/*"
artifact_paths: "$$JOB_NAME/output_active/*"
agents:
slurm_ntasks: 8
slurm_mem_per_cpu: 16GB
Expand Down
Loading

0 comments on commit fcd66df

Please sign in to comment.