From 71f32731a98ea05313a0527781b06a3a62d685ba Mon Sep 17 00:00:00 2001 From: Gabriele Bozzola Date: Mon, 20 May 2024 17:08:15 -0700 Subject: [PATCH 1/3] Add benchmark pipeline --- .buildkite/pipeline.yml | 3 +- .buildkite/target/pipeline.yml | 37 ++++++ experiments/benchmarks/bucket.jl | 215 +++++++++++++++++++++++++++++++ 3 files changed, 254 insertions(+), 1 deletion(-) create mode 100644 .buildkite/target/pipeline.yml create mode 100644 experiments/benchmarks/bucket.jl diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 138d102329..83c0318c02 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -31,7 +31,8 @@ steps: - "julia --project=lib/ClimaLandSimulations -e 'using Pkg; Pkg.status()'" agents: - slurm_ntasks: 1 + slurm_gpus: 1 + slurm_ntasks: 8 env: JULIA_NUM_PRECOMPILE_TASKS: 8 diff --git a/.buildkite/target/pipeline.yml b/.buildkite/target/pipeline.yml new file mode 100644 index 0000000000..26c0155a0e --- /dev/null +++ b/.buildkite/target/pipeline.yml @@ -0,0 +1,37 @@ +agents: + queue: clima + modules: common + +env: + JULIA_NVTX_CALLBACKS: gc + OPENBLAS_NUM_THREADS: 1 + SLURM_KILL_BAD_EXIT: 1 + +steps: + - label: "init environment :computer:" + key: "init_env" + command: + - "echo $$JULIA_DEPOT_PATH" + - echo "--- Instantiate experiments" + - "julia --project=.buildkite -e 'using Pkg; Pkg.develop(;path=\".\"); Pkg.instantiate(;verbose=true)'" + - "julia --project=.buildkite -e 'using Pkg; Pkg.status()'" + agents: + slurm_gpus: 1 + slurm_ntasks: 8 + env: + JULIA_NUM_PRECOMPILE_TASKS: 8 + + - wait + + - group: "Target Benchmark" + steps: + - label: ":bucket: Bucket" + command: "nsys launch julia --color=yes --project=.buildkite experiments/benchmarks/bucket.jl" + artifact_paths: + - "bucket_benchmark_gpu/*html" + - "bucket_benchmark_gpu/*nsys*" + env: + CLIMACOMMS_DEVICE: CUDA + agents: + slurm_mem: 8GB + slurm_gpus: 1 diff --git a/experiments/benchmarks/bucket.jl b/experiments/benchmarks/bucket.jl new file mode 100644 index 0000000000..94f53f96cb --- /dev/null +++ b/experiments/benchmarks/bucket.jl @@ -0,0 +1,215 @@ +# # Global bucket run using temporal map albedo + +# The code sets up and runs the bucket for 7 days using albedo read in from a file +# containing temporally-varying data over the globe, and analytic atmospheric and radiative +# forcings. + +# This code runs the bucket multiple times and collects statistics for execution time and +# allocations +# +# When run with buildkite on clima, this code also compares with the previous best time +# saved at the bottom of this file + +import SciMLBase +using Dates +using Test +import ClimaComms +@static pkgversion(ClimaComms) >= v"0.6" && ClimaComms.@import_required_backends +import ClimaUtilities.TimeVaryingInputs: TimeVaryingInput + +import ClimaTimeSteppers as CTS +import NCDatasets +using ClimaCore +import ClimaComms +import ClimaLand +using ClimaParams +using ClimaLand.Bucket: + BucketModel, BucketModelParameters, PrescribedSurfaceAlbedo +using ClimaLand.Domains: coordinates, Column +using ClimaLand: + initialize, + make_update_aux, + make_exp_tendency, + make_set_initial_cache, + PrescribedAtmosphere, + PrescribedRadiativeFluxes + +import Profile, ProfileCanvas + +const FT = Float64; + +context = ClimaComms.context() +device = ClimaComms.device() +device_suffix = device isa ClimaComms.CPUSingleThreaded ? "cpu" : "gpu" + +earth_param_set = ClimaLand.Parameters.LandParameters(FT); +outdir = "bucket_benchmark_$(device_suffix)" +!ispath(outdir) && mkpath(outdir) + +function setup_prob(t0, tf, Δt) + # We set up the problem in a function so that we can make multiple copies (for profiling) + + # Set up simulation domain + soil_depth = FT(3.5) + bucket_domain = ClimaLand.Domains.SphericalShell(; + radius = FT(6.3781e6), + depth = soil_depth, + nelements = (100, 10), + npolynomial = 1, + dz_tuple = FT.((1.0, 0.05)), + ) + ref_time = DateTime(2005) + + # Initialize parameters + σS_c = FT(0.2) + W_f = FT(0.15) + z_0m = FT(1e-2) + z_0b = FT(1e-3) + κ_soil = FT(0.7) + ρc_soil = FT(2e6) + τc = FT(3600) + + surface_space = bucket_domain.space.surface + # Construct albedo parameter object using temporal map + albedo = PrescribedSurfaceAlbedo{FT}(ref_time, t0, surface_space) + + bucket_parameters = BucketModelParameters(FT; albedo, z_0m, z_0b, τc) + + # Precipitation: + precip = (t) -> 0 + snow_precip = (t) -> -5e-7 * (t < 1 * 86400) + # Diurnal temperature variations: + T_atmos = (t) -> 275.0 + 5.0 * sin(2.0 * π * t / 86400 - π / 2) + # Constant otherwise: + u_atmos = (t) -> 3.0 + q_atmos = (t) -> 0.001 + h_atmos = FT(2) + P_atmos = (t) -> 101325 + + bucket_atmos = PrescribedAtmosphere( + TimeVaryingInput(precip), + TimeVaryingInput(snow_precip), + TimeVaryingInput(T_atmos), + TimeVaryingInput(u_atmos), + TimeVaryingInput(q_atmos), + TimeVaryingInput(P_atmos), + ref_time, + h_atmos, + earth_param_set, + ) + + # Prescribed radiation -- a prescribed downwelling SW diurnal cycle, with a + # peak at local noon, and a prescribed downwelling LW radiative + # flux, assuming the air temperature is on average 275 degrees + # K with a diurnal amplitude of 5 degrees K: + SW_d = (t) -> max(1361 * sin(2π * t / 86400 - π / 2), 0.0) + LW_d = (t) -> 5.67e-8 * (275.0 + 5.0 * sin(2.0 * π * t / 86400 - π / 2))^4 + bucket_rad = PrescribedRadiativeFluxes( + FT, + TimeVaryingInput(SW_d), + TimeVaryingInput(LW_d), + ref_time, + ) + + + model = BucketModel( + parameters = bucket_parameters, + domain = bucket_domain, + atmosphere = bucket_atmos, + radiation = bucket_rad, + ) + + Y, p, _coords = initialize(model) + + Y.bucket.T .= FT(270) + Y.bucket.W .= FT(0.05) + Y.bucket.Ws .= FT(0.0) + Y.bucket.σS .= FT(0.08) + + set_initial_cache! = make_set_initial_cache(model) + set_initial_cache!(p, Y, t0) + exp_tendency! = make_exp_tendency(model) + prob = SciMLBase.ODEProblem( + CTS.ClimaODEFunction((T_exp!) = exp_tendency!, (dss!) = ClimaLand.dss!), + Y, + (t0, tf), + p, + ) + updateat = collect(t0:(3Δt):tf) + updatefunc = ClimaLand.make_update_drivers(bucket_atmos, bucket_rad) + driver_cb = ClimaLand.DriverUpdateCallback(updateat, updatefunc) + cb = SciMLBase.CallbackSet(driver_cb) + + return prob, cb +end + +function setup_and_solve_problem() + # We profile the setup phase as well here. This is not intended, but it is the easiest + # to set up for both CPU/GPU at the same time + t0 = 0.0 + tf = 7 * 86400 + Δt = 3600.0 + prob, cb = setup_prob(t0, tf, Δt) + timestepper = CTS.RK4() + ode_algo = CTS.ExplicitAlgorithm(timestepper) + SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) + return nothing +end + +# Warm up +setup_and_solve_problem() + +@info "Starting profiling" +# Stop when we profile for MAX_PROFILING_TIME_SECONDS or MAX_PROFILING_SAMPLES +MAX_PROFILING_TIME_SECONDS = 500 +MAX_PROFILING_SAMPLES = 100 +time_now = time() +timings_s = Float64[] +while (time() - time_now) < MAX_PROFILING_TIME_SECONDS && + length(timings_s) < MAX_PROFILING_SAMPLES + push!(timings_s, ClimaComms.@elapsed device setup_and_solve_problem()) +end +num_samples = length(timings_s) +average_timing_s = round(sum(timings_s) / num_samples, sigdigits = 3) +max_timing_s = round(maximum(timings_s), sigdigits = 3) +min_timing_s = round(minimum(timings_s), sigdigits = 3) +std_timing_s = round( + sum(((timings_s .- average_timing_s) .^ 2) / num_samples), + sigdigits = 3, +) +@info "Num samples: $num_samples" +@info "Average time: $(average_timing_s) s" +@info "Max time: $(max_timing_s) s" +@info "Min time: $(min_timing_s) s" +@info "Standard deviation time: $(std_timing_s) s" +@info "Done profiling" + +Profile.@profile setup_and_solve_problem() +results = Profile.fetch() +flame_file = joinpath(outdir, "flame_$device_suffix.html") +ProfileCanvas.html_file(flame_file, results) +@info "Saved compute flame to $flame_file" + +Profile.Allocs.@profile sample_rate = 0.1 setup_and_solve_problem() +results = Profile.Allocs.fetch() +profile = ProfileCanvas.view_allocs(results) +alloc_flame_file = joinpath(outdir, "alloc_flame_$device_suffix.html") +ProfileCanvas.html_file(alloc_flame_file, profile) +@info "Saved allocation flame to $alloc_flame_file" + +if ClimaComms.device() isa ClimaComms.CUDADevice + import CUDA + CUDA.@profile external = true setup_and_solve_problem() +end + +if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaland-benchmark" + PREVIOUS_BEST_TIME = 5.4 + if average_timing_s > 1.1PREVIOUS_BEST_TIME + @info "Possible performance regression, previous average time was $(PREVIOUS_BEST_TIME)" + elseif average_timing_s < 0.8PREVIOUS_BEST_TIME + @info "Possible significant performance improvement, please update PREVIOUS_BEST_TIME in $(@__DIR__)" + end + @testset "Performance" begin + @test 0.8PREVIOUS_BEST_TIME <= average_timing_s ≤ 1.1PREVIOUS_BEST_TIME + end +end From 02eead2bae43f488aa70f947121c05b347dc9b44 Mon Sep 17 00:00:00 2001 From: Gabriele Bozzola Date: Mon, 20 May 2024 17:08:21 -0700 Subject: [PATCH 2/3] Remove unnecessary LOAD_PATH This was needed when we had Preferences in .buildkite --- .buildkite/pipeline.yml | 1 - .buildkite/target/pipeline.yml | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 83c0318c02..4bea9a988d 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -4,7 +4,6 @@ agents: modules: climacommon/2024_05_27 env: - JULIA_LOAD_PATH: "${JULIA_LOAD_PATH}:${BUILDKITE_BUILD_CHECKOUT_PATH}/.buildkite" JULIA_NVTX_CALLBACKS: gc OPENBLAS_NUM_THREADS: 1 SLURM_KILL_BAD_EXIT: 1 diff --git a/.buildkite/target/pipeline.yml b/.buildkite/target/pipeline.yml index 26c0155a0e..a2491e029f 100644 --- a/.buildkite/target/pipeline.yml +++ b/.buildkite/target/pipeline.yml @@ -3,6 +3,7 @@ agents: modules: common env: + JULIA_DEPOT_PATH: "${BUILDKITE_BUILD_PATH}/${BUILDKITE_PIPELINE_SLUG}/depot/default" JULIA_NVTX_CALLBACKS: gc OPENBLAS_NUM_THREADS: 1 SLURM_KILL_BAD_EXIT: 1 From db7cbd7c72668437a3cab1c0c74f01386dcd3ca3 Mon Sep 17 00:00:00 2001 From: Gabriele Bozzola Date: Thu, 30 May 2024 19:05:28 -0700 Subject: [PATCH 3/3] Remove pin to CUDA --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index cf07b06cd7..623b01289d 100644 --- a/Project.toml +++ b/Project.toml @@ -41,7 +41,7 @@ NeuralSnowExt = ["CSV", "HTTP", "Flux", "StatsBase", "cuDNN"] Adapt = "3, 4" ArtifactWrappers = "0.2" CSV = "0.10" -CUDA = "<5.3" +CUDA = "5.3" ClimaComms = "0.5.6, 0.6" ClimaCore = "0.13.2, 0.14" ClimaParams = "0.10.2"