From 2ea093c45cd80050df4b39de172114da0f195442 Mon Sep 17 00:00:00 2001 From: Gabriele Bozzola Date: Wed, 24 Jul 2024 04:09:11 -0700 Subject: [PATCH] Do not profile setup in benchmarks --- experiments/benchmarks/bucket.jl | 35 ++++++++++++++++++++--------- experiments/benchmarks/land.jl | 36 +++++++++++++++++++++--------- experiments/benchmarks/richards.jl | 36 +++++++++++++++++++++--------- 3 files changed, 75 insertions(+), 32 deletions(-) diff --git a/experiments/benchmarks/bucket.jl b/experiments/benchmarks/bucket.jl index 31864c900d..0985d70a14 100644 --- a/experiments/benchmarks/bucket.jl +++ b/experiments/benchmarks/bucket.jl @@ -141,9 +141,7 @@ function setup_prob(t0, tf, Δt; nelements = (100, 10)) return prob, cb end -function setup_and_solve_problem(; greet = false) - # We profile the setup phase as well here. This is not intended, but it is the easiest - # to set up for both CPU/GPU at the same time +function setup_simulation(; greet = false) t0 = 0.0 tf = 7 * 86400 Δt = 3600.0 @@ -158,12 +156,12 @@ function setup_and_solve_problem(; greet = false) prob, cb = setup_prob(t0, tf, Δt; nelements) timestepper = CTS.RK4() ode_algo = CTS.ExplicitAlgorithm(timestepper) - SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) - return nothing + return prob, ode_algo, Δt, cb end # Warm up and greet -setup_and_solve_problem(; greet = true) +prob, ode_algo, Δt, cb = setup_simulation(; greet = true) +SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) @info "Starting profiling" # Stop when we profile for MAX_PROFILING_TIME_SECONDS or MAX_PROFILING_SAMPLES @@ -173,7 +171,16 @@ time_now = time() timings_s = Float64[] while (time() - time_now) < MAX_PROFILING_TIME_SECONDS && length(timings_s) < MAX_PROFILING_SAMPLES - push!(timings_s, ClimaComms.@elapsed device setup_and_solve_problem()) + lprob, lode_algo, lΔt, lcb = setup_simulation() + push!( + timings_s, + ClimaComms.@elapsed device SciMLBase.solve( + lprob, + lode_algo; + dt = lΔt, + callback = lcb, + ) + ) end num_samples = length(timings_s) average_timing_s = round(sum(timings_s) / num_samples, sigdigits = 3) @@ -190,13 +197,20 @@ std_timing_s = round( @info "Standard deviation time: $(std_timing_s) s" @info "Done profiling" -Profile.@profile setup_and_solve_problem() +prob, ode_algo, Δt, cb = setup_simulation() +Profile.@profile SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) results = Profile.fetch() flame_file = joinpath(outdir, "flame_$device_suffix.html") ProfileCanvas.html_file(flame_file, results) @info "Saved compute flame to $flame_file" -Profile.Allocs.@profile sample_rate = 0.1 setup_and_solve_problem() +prob, ode_algo, Δt, cb = setup_simulation() +Profile.Allocs.@profile sample_rate = 0.1 SciMLBase.solve( + prob, + ode_algo; + dt = Δt, + callback = cb, +) results = Profile.Allocs.fetch() profile = ProfileCanvas.view_allocs(results) alloc_flame_file = joinpath(outdir, "alloc_flame_$device_suffix.html") @@ -205,7 +219,8 @@ ProfileCanvas.html_file(alloc_flame_file, profile) if ClimaComms.device() isa ClimaComms.CUDADevice import CUDA - CUDA.@profile setup_and_solve_problem() + lprob, lode_algo, lΔt, lcb = setup_simulation() + CUDA.@profile SciMLBase.solve(lprob, lode_algo; dt = lΔt, callback = lcb) end if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaland-benchmark" diff --git a/experiments/benchmarks/land.jl b/experiments/benchmarks/land.jl index 6d7c07b983..3abec4200e 100644 --- a/experiments/benchmarks/land.jl +++ b/experiments/benchmarks/land.jl @@ -597,9 +597,7 @@ function setup_prob(t0, tf, Δt; nelements = (101, 15)) return prob, cb end -function setup_and_solve_problem(; greet = false) - # We profile the setup phase as well here. This is not intended, but it is the easiest - # to set up for both CPU/GPU at the same time +function setup_simulation(; greet = false) t0 = 0.0 tf = 60 * 60.0 * 6 Δt = 900.0 @@ -622,13 +620,12 @@ function setup_and_solve_problem(; greet = false) update_j = CTS.UpdateEvery(CTS.NewTimeStep), ), ) - - SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) - return nothing + return prob, ode_algo, Δt, cb end # Warm up and greet -setup_and_solve_problem(; greet = true) +prob, ode_algo, Δt, cb = setup_simulation(; greet = true) +SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) @info "Starting profiling" # Stop when we profile for MAX_PROFILING_TIME_SECONDS or MAX_PROFILING_SAMPLES @@ -638,7 +635,16 @@ time_now = time() timings_s = Float64[] while (time() - time_now) < MAX_PROFILING_TIME_SECONDS && length(timings_s) < MAX_PROFILING_SAMPLES - push!(timings_s, ClimaComms.@elapsed device setup_and_solve_problem()) + lprob, lode_algo, lΔt, lcb = setup_simulation() + push!( + timings_s, + ClimaComms.@elapsed device SciMLBase.solve( + lprob, + lode_algo; + dt = lΔt, + callback = lcb, + ) + ) end num_samples = length(timings_s) average_timing_s = round(sum(timings_s) / num_samples, sigdigits = 3) @@ -655,13 +661,20 @@ std_timing_s = round( @info "Standard deviation time: $(std_timing_s) s" @info "Done profiling" -Profile.@profile setup_and_solve_problem() +prob, ode_algo, Δt, cb = setup_simulation() +Profile.@profile SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) results = Profile.fetch() flame_file = joinpath(outdir, "flame_$device_suffix.html") ProfileCanvas.html_file(flame_file, results) @info "Saved compute flame to $flame_file" -Profile.Allocs.@profile sample_rate = 0.01 setup_and_solve_problem() +prob, ode_algo, Δt, cb = setup_simulation() +Profile.Allocs.@profile sample_rate = 0.01 SciMLBase.solve( + prob, + ode_algo; + dt = Δt, + callback = cb, +) results = Profile.Allocs.fetch() profile = ProfileCanvas.view_allocs(results) alloc_flame_file = joinpath(outdir, "alloc_flame_$device_suffix.html") @@ -671,7 +684,8 @@ ProfileCanvas.html_file(alloc_flame_file, profile) #= if ClimaComms.device() isa ClimaComms.CUDADevice import CUDA - CUDA.@profile setup_and_solve_problem() + lprob, lode_algo, lΔt, lcb = setup_simulation() + CUDA.@profile SciMLBase.solve(lprob, lode_algo; dt = lΔt, callback = lcb) end if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaland-benchmark" diff --git a/experiments/benchmarks/richards.jl b/experiments/benchmarks/richards.jl index c24dd998f4..c2b15f0a84 100644 --- a/experiments/benchmarks/richards.jl +++ b/experiments/benchmarks/richards.jl @@ -285,9 +285,7 @@ function setup_prob(t0, tf, Δt; nelements = (101, 15)) return prob, cb end -function setup_and_solve_problem(; greet = false) - # We profile the setup phase as well here. This is not intended, but it is the easiest - # to set up for both CPU/GPU at the same time +function setup_simulation(; greet = false) t0 = 0.0 tf = 3600.0 * 24 * 7 Δt = 1800.0 @@ -310,13 +308,12 @@ function setup_and_solve_problem(; greet = false) update_j = CTS.UpdateEvery(CTS.NewNewtonIteration), ), ) - - SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) - return nothing + return prob, ode_algo, Δt, cb end # Warm up and greet -setup_and_solve_problem(; greet = true) +prob, ode_algo, Δt, cb = setup_simulation(; greet = true) +SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) @info "Starting profiling" # Stop when we profile for MAX_PROFILING_TIME_SECONDS or MAX_PROFILING_SAMPLES @@ -326,7 +323,16 @@ time_now = time() timings_s = Float64[] while (time() - time_now) < MAX_PROFILING_TIME_SECONDS && length(timings_s) < MAX_PROFILING_SAMPLES - push!(timings_s, ClimaComms.@elapsed device setup_and_solve_problem()) + lprob, lode_algo, lΔt, lcb = setup_simulation() + push!( + timings_s, + ClimaComms.@elapsed device SciMLBase.solve( + lprob, + lode_algo; + dt = lΔt, + callback = lcb, + ) + ) end num_samples = length(timings_s) average_timing_s = round(sum(timings_s) / num_samples, sigdigits = 3) @@ -343,13 +349,20 @@ std_timing_s = round( @info "Standard deviation time: $(std_timing_s) s" @info "Done profiling" -Profile.@profile setup_and_solve_problem() +prob, ode_algo, Δt, cb = setup_simulation() +Profile.@profile SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) results = Profile.fetch() flame_file = joinpath(outdir, "flame_$device_suffix.html") ProfileCanvas.html_file(flame_file, results) @info "Saved compute flame to $flame_file" -Profile.Allocs.@profile sample_rate = 0.01 setup_and_solve_problem() +prob, ode_algo, Δt, cb = setup_simulation() +Profile.Allocs.@profile sample_rate = 0.01 SciMLBase.solve( + prob, + ode_algo; + dt = Δt, + callback = cb, +) results = Profile.Allocs.fetch() profile = ProfileCanvas.view_allocs(results) alloc_flame_file = joinpath(outdir, "alloc_flame_$device_suffix.html") @@ -358,7 +371,8 @@ ProfileCanvas.html_file(alloc_flame_file, profile) if ClimaComms.device() isa ClimaComms.CUDADevice import CUDA - CUDA.@profile setup_and_solve_problem() + lprob, lode_algo, lΔt, lcb = setup_simulation() + CUDA.@profile SciMLBase.solve(lprob, lode_algo; dt = lΔt, callback = lcb) end #= if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaland-benchmark"