From d3274bbf32852f2544b51034699a6d77f30eadf3 Mon Sep 17 00:00:00 2001 From: Gabriele Bozzola Date: Wed, 26 Jun 2024 10:27:16 -0700 Subject: [PATCH] Fix compare cpu/gpu tests The tests were only comparing the albedo function job --- .buildkite/pipeline.yml | 10 +------- .../Bucket/compare_gpu_cpu_output.jl | 23 +++++++++++++------ .../Bucket/global_bucket_function.jl | 7 ++++-- .../Bucket/global_bucket_staticmap.jl | 2 +- .../Bucket/global_bucket_temporalmap.jl | 2 +- 5 files changed, 24 insertions(+), 20 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 2b2d7e1aca..0e0497ffce 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -198,20 +198,12 @@ steps: - group: "CPU/GPU comparisons" steps: - - label: "Compare GPU bucket with CPU bucket (functional albedo)" + - label: "Compare GPU bucket with CPU bucket" command: "julia --color=yes --project=.buildkite experiments/standalone/Bucket/compare_gpu_cpu_output.jl" depends_on: - "global_bucket_function_cpu" - "global_bucket_function_gpu" - - - label: "Compare GPU bucket with CPU bucket (static map albedo)" - command: "julia --color=yes --project=.buildkite experiments/standalone/Bucket/compare_gpu_cpu_output.jl" - depends_on: - "global_bucket_staticmap_cpu" - "global_bucket_staticmap_gpu" - - - label: "Compare GPU bucket with CPU bucket (temporal map albedo)" - command: "julia --color=yes --project=.buildkite experiments/standalone/Bucket/compare_gpu_cpu_output.jl" - depends_on: - "global_bucket_temporalmap_cpu" - "global_bucket_temporalmap_gpu" diff --git a/experiments/standalone/Bucket/compare_gpu_cpu_output.jl b/experiments/standalone/Bucket/compare_gpu_cpu_output.jl index f51b89a679..c49162124d 100644 --- a/experiments/standalone/Bucket/compare_gpu_cpu_output.jl +++ b/experiments/standalone/Bucket/compare_gpu_cpu_output.jl @@ -1,10 +1,19 @@ using DelimitedFiles using Statistics import ClimaLand -outdir = joinpath(pkgdir(ClimaLand), "experiments/standalone/Bucket/artifacts") -cpu_state = readdlm(joinpath(outdir, "tf_state_cpu.txt"), ',') -gpu_state = readdlm(joinpath(outdir, "tf_state_gpu.txt"), ',') -@show abs(maximum(cpu_state .- gpu_state)) -@show abs(median(cpu_state .- gpu_state)) -@show abs(mean(cpu_state .- gpu_state)) -@assert isapprox(cpu_state, gpu_state) +function check(job) + outdir = joinpath( + pkgdir(ClimaLand), + "experiments/standalone/Bucket/artifacts_$job", + ) + cpu_state = readdlm(joinpath(outdir, "tf_state_cpu_$job.txt"), ',') + gpu_state = readdlm(joinpath(outdir, "tf_state_gpu_$job.txt"), ',') + @show abs(maximum(cpu_state .- gpu_state)) + @show abs(median(cpu_state .- gpu_state)) + @show abs(mean(cpu_state .- gpu_state)) + @assert isapprox(cpu_state, gpu_state) +end + +check("function") +check("staticmap") +check("temporalmap") diff --git a/experiments/standalone/Bucket/global_bucket_function.jl b/experiments/standalone/Bucket/global_bucket_function.jl index ada1866be1..45e03aaa1c 100644 --- a/experiments/standalone/Bucket/global_bucket_function.jl +++ b/experiments/standalone/Bucket/global_bucket_function.jl @@ -63,7 +63,10 @@ anim_plots = false FT = Float64; context = ClimaComms.context() earth_param_set = LP.LandParameters(FT); -outdir = joinpath(pkgdir(ClimaLand), "experiments/standalone/Bucket/artifacts") +outdir = joinpath( + pkgdir(ClimaLand), + "experiments/standalone/Bucket/artifacts_function", +) !ispath(outdir) && mkpath(outdir) # Construct simulation domain @@ -226,6 +229,6 @@ T_sfc = Array(Remapping.interpolate(remapper, prob.p.bucket.T_sfc)) device_suffix = typeof(ClimaComms.context().device) <: ClimaComms.CPUSingleThreaded ? "cpu" : "gpu" -open(joinpath(outdir, "tf_state_$device_suffix.txt"), "w") do io +open(joinpath(outdir, "tf_state_$(device_suffix)_function.txt"), "w") do io writedlm(io, hcat(T_sfc[:], W[:], Ws[:], σS[:]), ',') end; diff --git a/experiments/standalone/Bucket/global_bucket_staticmap.jl b/experiments/standalone/Bucket/global_bucket_staticmap.jl index cbf07ace4c..71fed1d816 100644 --- a/experiments/standalone/Bucket/global_bucket_staticmap.jl +++ b/experiments/standalone/Bucket/global_bucket_staticmap.jl @@ -305,7 +305,7 @@ sw_forcing = [ ]; # save prognostic state to CSV - for comparison between GPU and CPU output -open(joinpath(outdir, "tf_state_$device_suffix.txt"), "w") do io +open(joinpath(outdir, "tf_state_$(device_suffix)_staticmap.txt"), "w") do io writedlm(io, hcat(T_sfc[end][:], W[end][:], Ws[end][:], σS[end][:]), ',') end; # animation settings diff --git a/experiments/standalone/Bucket/global_bucket_temporalmap.jl b/experiments/standalone/Bucket/global_bucket_temporalmap.jl index 05cee96a01..b2cab6d207 100644 --- a/experiments/standalone/Bucket/global_bucket_temporalmap.jl +++ b/experiments/standalone/Bucket/global_bucket_temporalmap.jl @@ -277,7 +277,7 @@ F_sfc = [ ]; # save prognostic state to CSV - for comparison between GPU and CPU output -open(joinpath(outdir, "tf_state_$device_suffix.txt"), "w") do io +open(joinpath(outdir, "tf_state_$(device_suffix)_temporalmap.txt"), "w") do io writedlm(io, hcat(T_sfc[end][:], W[end][:], Ws[end][:], σS[end][:]), ',') end; # animation settings