diff --git a/.buildkite/gpu_pipeline/pipeline.yml b/.buildkite/gpu_pipeline/pipeline.yml index c164cfa423..ed1369bedd 100644 --- a/.buildkite/gpu_pipeline/pipeline.yml +++ b/.buildkite/gpu_pipeline/pipeline.yml @@ -161,7 +161,7 @@ steps: - mkdir -p gpu_aquaplanet_dyamond_ss - > julia --color=yes --project=examples post_processing/plot_gpu_strong_scaling.jl gpu_aquaplanet_dyamond_ss - artifact_paths: "gpu_aquaplanet_dyamond_ss/output_active/*" + artifact_paths: "gpu_aquaplanet_dyamond_ss/*" agents: slurm_cpus_per_task: 1 slurm_ntasks: 1 @@ -225,7 +225,7 @@ steps: - mkdir -p gpu_aquaplanet_dyamond_ws - > julia --color=yes --project=examples post_processing/plot_gpu_weak_scaling.jl gpu_aquaplanet_dyamond_ws - artifact_paths: "gpu_aquaplanet_dyamond_ws/output_active/*" + artifact_paths: "gpu_aquaplanet_dyamond_ws/*" agents: slurm_cpus_per_task: 1 slurm_ntasks: 1 diff --git a/post_processing/plot_gpu_scaling_utils.jl b/post_processing/plot_gpu_scaling_utils.jl index dffb25ead2..f24a246d79 100644 --- a/post_processing/plot_gpu_scaling_utils.jl +++ b/post_processing/plot_gpu_scaling_utils.jl @@ -1,5 +1,10 @@ using JLD2 -function get_jld2data(output_dir, job_id, s) +import PrettyTables as PT + +function get_jld2data(output_dir, job_id, t_int_days, s) + secs_per_day = 60 * 60 * 24 + secs_per_hour = 60 * 60 + days_per_year = 8760 / 24 FT = Float64 nprocs_clima_atmos = Int[] ncols_per_process = Int[] @@ -22,16 +27,46 @@ function get_jld2data(output_dir, job_id, s) push!(nprocs_clima_atmos, Int(dict["nprocs"])) push!(ncols_per_process, Int(dict["ncols_per_process"])) push!(walltime_clima_atmos, FT(dict["walltime"])) - else - @show occursin(job_id, foldername) - @show occursin(s, foldername) + found = true end end if !found @show readdir(output_dir) + for foldername in readdir(output_dir) + @show occursin(job_id, foldername) + @show occursin(s, foldername) + end end - @show nprocs_clima_atmos - @show ncols_per_process - @show walltime_clima_atmos - return (; nprocs_clima_atmos, ncols_per_process, walltime_clima_atmos) + order = sortperm(nprocs_clima_atmos) + nprocs_clima_atmos, ncols_per_process, walltime_clima_atmos = + nprocs_clima_atmos[order], + ncols_per_process[order], + walltime_clima_atmos[order] + + # simulated years per day + sypd_clima_atmos = + (secs_per_day ./ walltime_clima_atmos) * t_int_days ./ days_per_year + + # GPU hours + gpu_hours_clima_atmos = + nprocs_clima_atmos .* walltime_clima_atmos / secs_per_hour + + data = hcat( + nprocs_clima_atmos, + ncols_per_process, + walltime_clima_atmos, + sypd_clima_atmos, + ) + PT.pretty_table( + data; + header = ["N procs", "Ncols per process", "walltime (seconds)", "SYPD"], + alignment = :l, + ) + return (; + nprocs_clima_atmos, + ncols_per_process, + walltime_clima_atmos, + sypd_clima_atmos, + gpu_hours_clima_atmos, + ) end diff --git a/post_processing/plot_gpu_strong_scaling.jl b/post_processing/plot_gpu_strong_scaling.jl index 9f83908850..2b1b15e573 100644 --- a/post_processing/plot_gpu_strong_scaling.jl +++ b/post_processing/plot_gpu_strong_scaling.jl @@ -6,10 +6,6 @@ include("plot_gpu_scaling_utils.jl") job_id = "gpu_aquaplanet_dyamond_ss" output_dir = "./" -secs_per_hour = 60 * 60 -secs_per_day = 60 * 60 * 24 -days_per_year = 8760 / 24 - t_int_days = 12 / 24 # simulation integration time in days h_elem = 30 z_elem = 63 @@ -18,21 +14,14 @@ nlevels = z_elem + 1 t_int = string(t_int_days) * " days" # read ClimaAtmos scaling data -(; nprocs_clima_atmos, ncols_per_process, walltime_clima_atmos) = - get_jld2data(output_dir, job_id, "_ss_") - -order = sortperm(nprocs_clima_atmos) -nprocs_clima_atmos, ncols_per_process, walltime_clima_atmos = - nprocs_clima_atmos[order], - ncols_per_process[order], - walltime_clima_atmos[order] +(; + nprocs_clima_atmos, + ncols_per_process, + walltime_clima_atmos, + sypd_clima_atmos, + gpu_hours_clima_atmos, +) = get_jld2data(output_dir, job_id, t_int_days, "_ss_") -# simulated years per day -sypd_clima_atmos = - (secs_per_day ./ walltime_clima_atmos) * t_int_days ./ days_per_year -# GPU hours -gpu_hours_clima_atmos = - nprocs_clima_atmos .* walltime_clima_atmos / secs_per_hour # scaling efficiency single_proc_time_clima_atmos = walltime_clima_atmos[1] * nprocs_clima_atmos[1] scaling_efficiency_clima_atmos = diff --git a/post_processing/plot_gpu_weak_scaling.jl b/post_processing/plot_gpu_weak_scaling.jl index d3fc1850b1..74d0d4a5b0 100644 --- a/post_processing/plot_gpu_weak_scaling.jl +++ b/post_processing/plot_gpu_weak_scaling.jl @@ -6,10 +6,6 @@ include("plot_gpu_scaling_utils.jl") job_id = "gpu_aquaplanet_dyamond_ws" output_dir = "./" -secs_per_hour = 60 * 60 -secs_per_day = 60 * 60 * 24 -days_per_year = 8760 / 24 - t_int_days = 12 / 24 # simulation integration time in days h_elem = [30, 42, 60] z_elem = 63 @@ -18,21 +14,14 @@ nlevels = z_elem + 1 t_int = string(t_int_days) * " days" # read ClimaAtmos scaling data -(; nprocs_clima_atmos, ncols_per_process, walltime_clima_atmos) = - get_jld2data(output_dir, job_id, "_ws_") - -order = sortperm(nprocs_clima_atmos) -nprocs_clima_atmos, ncols_per_process, walltime_clima_atmos = - nprocs_clima_atmos[order], - ncols_per_process[order], - walltime_clima_atmos[order] +(; + nprocs_clima_atmos, + ncols_per_process, + walltime_clima_atmos, + sypd_clima_atmos, + gpu_hours_clima_atmos, +) = get_jld2data(output_dir, job_id, t_int_days, "_ws_") -# simulated years per day -sypd_clima_atmos = - (secs_per_day ./ walltime_clima_atmos) * t_int_days ./ days_per_year -# GPU hours -gpu_hours_clima_atmos = - nprocs_clima_atmos .* walltime_clima_atmos / secs_per_hour # weak scaling efficiency single_proc_time_clima_atmos = walltime_clima_atmos[1] * nprocs_clima_atmos[1] weak_scaling_efficiency_clima_atmos =