Skip to content

Commit

Permalink
Merge pull request #2773 from CliMA/gb/bench_io
Browse files Browse the repository at this point in the history
Add job to benchmark IO
  • Loading branch information
Sbozzolo authored Mar 13, 2024
2 parents 7ab4dbb + 1c1f5d2 commit 8371841
Show file tree
Hide file tree
Showing 8 changed files with 232 additions and 27 deletions.
7 changes: 7 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,13 @@ steps:
slurm_cpus_per_task: 8

# Flame graphs
- label: ":fire: Flame graph: perf target (IO)"
command: >
julia --color=yes --project=perf perf/benchmark_netcdf_io.jl
artifact_paths: "flame_perf_io/*.html"
agents:
slurm_gpus: 1

- label: ":fire: Flame graph: perf target (default)"
command: >
julia --color=yes --project=perf perf/flame.jl
Expand Down
31 changes: 31 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
ClimaAtmos.jl Release Notes
============================

Main
-------

- ![][badge-🚀performance] Reduced the number of allocations in the NetCDF
writer. PRs [#2772](https://github.com/CliMA/ClimaAtmos.jl/pull/2772),
[#2773](https://github.com/CliMA/ClimaAtmos.jl/pull/2773).
- Added a new script, `perf/benchmark_netcdf_io.jl` to test IO performance for
the NetCDF writer. PR [#2773](https://github.com/CliMA/ClimaAtmos.jl/pull/2773).

<!--
Contributors are welcome to begin the description of changelog items with badge(s) below. Here is a brief description of when to use badges for a particular pull request / set of changes:
- 🔥behavioralΔ - behavioral changes. For example: a new model is used, yielding more accurate results.
- 🤖precisionΔ - machine-precision changes. For example, swapping the order of summed arguments can result in machine-precision changes.
- 💥breaking - breaking changes. For example: removing deprecated functions/types, removing support for functionality, API changes.
- 🚀performance - performance improvements. For example: improving type inference, reducing allocations, or code hoisting.
- ✨feature - new feature added. For example: adding support for a cubed-sphere grid
- 🐛bugfix - bugfix. For example: fixing incorrect logic, resulting in incorrect results, or fixing code that otherwise might give a `MethodError`.
-->

[badge-🔥behavioralΔ]: https://img.shields.io/badge/🔥behavioralΔ-orange.svg
[badge-🤖precisionΔ]: https://img.shields.io/badge/🤖precisionΔ-black.svg
[badge-💥breaking]: https://img.shields.io/badge/💥BREAKING-red.svg
[badge-🚀performance]: https://img.shields.io/badge/🚀performance-green.svg
[badge-✨feature/enhancement]: https://img.shields.io/badge/feature/enhancement-blue.svg
[badge-🐛bugfix]: https://img.shields.io/badge/🐛bugfix-purple.svg
8 changes: 4 additions & 4 deletions docs/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -317,19 +317,19 @@ version = "0.5.7"

[[deps.ClimaCore]]
deps = ["Adapt", "BandedMatrices", "BlockArrays", "CUDA", "ClimaComms", "CubedSphere", "DataStructures", "DocStringExtensions", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LinearAlgebra", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "Static", "StaticArrays", "Statistics", "Unrolled"]
git-tree-sha1 = "844afbd6a7c4f112f974e0031ebf4fb13d0b4157"
git-tree-sha1 = "bc6a0154e3bcc1657d3a75f697e216fb70121969"
uuid = "d414da3d-4745-48bb-8d80-42e94e092884"
version = "0.13.1"
version = "0.13.2"
weakdeps = ["Krylov"]

[deps.ClimaCore.extensions]
KrylovExt = "Krylov"

[[deps.ClimaParams]]
deps = ["DocStringExtensions", "TOML", "Test"]
git-tree-sha1 = "323dd6c5423caf31f0da81bb9c288683cbdafb01"
git-tree-sha1 = "ec67949db856e01df4cbf7d6ddafefeda02f93ee"
uuid = "5c42b081-d73a-476f-9059-fd94b934656c"
version = "0.10.2"
version = "0.10.3"

[[deps.ClimaTimeSteppers]]
deps = ["ClimaComms", "Colors", "DataStructures", "DiffEqBase", "DiffEqCallbacks", "KernelAbstractions", "Krylov", "LinearAlgebra", "LinearOperators", "NVTX", "SciMLBase", "StaticArrays"]
Expand Down
8 changes: 4 additions & 4 deletions examples/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -305,9 +305,9 @@ version = "0.5.7"

[[deps.ClimaCore]]
deps = ["Adapt", "BandedMatrices", "BlockArrays", "CUDA", "ClimaComms", "CubedSphere", "DataStructures", "DocStringExtensions", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LinearAlgebra", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "Static", "StaticArrays", "Statistics", "Unrolled"]
git-tree-sha1 = "844afbd6a7c4f112f974e0031ebf4fb13d0b4157"
git-tree-sha1 = "bc6a0154e3bcc1657d3a75f697e216fb70121969"
uuid = "d414da3d-4745-48bb-8d80-42e94e092884"
version = "0.13.1"
version = "0.13.2"
weakdeps = ["Krylov"]

[deps.ClimaCore.extensions]
Expand Down Expand Up @@ -345,9 +345,9 @@ version = "0.7.5"

[[deps.ClimaParams]]
deps = ["DocStringExtensions", "TOML", "Test"]
git-tree-sha1 = "323dd6c5423caf31f0da81bb9c288683cbdafb01"
git-tree-sha1 = "ec67949db856e01df4cbf7d6ddafefeda02f93ee"
uuid = "5c42b081-d73a-476f-9059-fd94b934656c"
version = "0.10.2"
version = "0.10.3"

[[deps.ClimaTimeSteppers]]
deps = ["ClimaComms", "Colors", "DataStructures", "DiffEqBase", "DiffEqCallbacks", "KernelAbstractions", "Krylov", "LinearAlgebra", "LinearOperators", "NVTX", "SciMLBase", "StaticArrays"]
Expand Down
8 changes: 4 additions & 4 deletions perf/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -314,9 +314,9 @@ version = "0.5.7"

[[deps.ClimaCore]]
deps = ["Adapt", "BandedMatrices", "BlockArrays", "CUDA", "ClimaComms", "CubedSphere", "DataStructures", "DocStringExtensions", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LinearAlgebra", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "Static", "StaticArrays", "Statistics", "Unrolled"]
git-tree-sha1 = "844afbd6a7c4f112f974e0031ebf4fb13d0b4157"
git-tree-sha1 = "bc6a0154e3bcc1657d3a75f697e216fb70121969"
uuid = "d414da3d-4745-48bb-8d80-42e94e092884"
version = "0.13.1"
version = "0.13.2"
weakdeps = ["Krylov"]

[deps.ClimaCore.extensions]
Expand Down Expand Up @@ -354,9 +354,9 @@ version = "0.7.5"

[[deps.ClimaParams]]
deps = ["DocStringExtensions", "TOML", "Test"]
git-tree-sha1 = "323dd6c5423caf31f0da81bb9c288683cbdafb01"
git-tree-sha1 = "ec67949db856e01df4cbf7d6ddafefeda02f93ee"
uuid = "5c42b081-d73a-476f-9059-fd94b934656c"
version = "0.10.2"
version = "0.10.3"

[[deps.ClimaTimeSteppers]]
deps = ["ClimaComms", "Colors", "DataStructures", "DiffEqBase", "DiffEqCallbacks", "KernelAbstractions", "Krylov", "LinearAlgebra", "LinearOperators", "NVTX", "SciMLBase", "StaticArrays"]
Expand Down
120 changes: 120 additions & 0 deletions perf/benchmark_netcdf_io.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import ClimaAtmos as CA
import ClimaAtmos.Diagnostics as CAD
import ClimaComms
import Profile, ProfileCanvas
import NCDatasets
import Base: rm
using BenchmarkTools

# This script runs our NetCDF writer and compares its performance with
# NCDatasets. It also produces a flamegraph for IO.

# Number of target interpolation point along each dimension
const NUM = 100

timings = Dict{ClimaComms.AbstractDevice, Any}(
ClimaComms.CPUSingleThreaded() => missing,
)
# If a GPU is available, runs on CPU and GPU
if ClimaComms.device() isa ClimaComms.CUDADevice
timings[ClimaComms.CUDADevice()] = missing
end

timings_ncdataset = copy(timings)

function add_nc(nc, outarray)
v = nc["rhoa"]
temporal_size, spatial_size... = size(v)
time_index = temporal_size + 1
nc["time"][time_index] = 10.0 * time_index
v[time_index, :, :, :] = outarray
end

for device in keys(timings)
device_name = nameof(typeof(device))
config = CA.AtmosConfig(; comms_ctx = ClimaComms.context(device))
config.parsed_args["diagnostics"] =
[Dict("short_name" => "rhoa", "period" => config.parsed_args["dt"])]
config.parsed_args["netcdf_interpolation_num_points"] = [NUM, NUM, NUM]
config.parsed_args["job_id"] = "flame_perf_io"

simulation = CA.get_simulation(config)

# Cleanup pre-existing NC files
foreach(
rm,
filter(endswith(".nc"), readdir(simulation.output_dir, join = true)),
)

atmos_model = simulation.integrator.p.atmos
cspace = axes(simulation.integrator.u.c)
diagnostics, _ = CA.get_diagnostics(config.parsed_args, atmos_model, cspace)
rhoa_diag = diagnostics[end]

netcdf_writer = simulation.output_writers[2]
field = simulation.integrator.u.c.ρ

integrator = simulation.integrator

# Run once to create the file
CAD.write_field!(
netcdf_writer,
field,
rhoa_diag,
integrator.u,
integrator.p,
integrator.t,
simulation.output_dir,
)
# Now, profile
@info "Profiling ($device_name)"
prof = Profile.@profile CAD.save_diagnostic_to_disk!(
netcdf_writer,
field,
rhoa_diag,
integrator.u,
integrator.p,
integrator.t,
simulation.output_dir,
)
results = Profile.fetch()
flame_path = joinpath(simulation.output_dir, "flame_$device_name.html")
ProfileCanvas.html_file(flame_path, results)
@info "Flame saved in $flame_path"

@info "Benchmarking our NetCDF writer (only IO) ($device_name)"
timings[device] = @benchmark CAD.save_diagnostic_to_disk!(
$netcdf_writer,
$field,
$rhoa_diag,
$(integrator.u),
$(integrator.p),
$(integrator.t),
$(simulation.output_dir),
)

@info "Benchmarking NCDatasets ($device_name)"

output_path = joinpath(simulation.output_dir, "clean_netcdf.nc")
nc = NCDatasets.NCDataset(output_path, "c")
NCDatasets.defDim(nc, "time", Inf)
NCDatasets.defVar(nc, "time", Float32, ("time",))
NCDatasets.defDim(nc, "x", NUM)
NCDatasets.defDim(nc, "y", NUM)
NCDatasets.defDim(nc, "z", NUM)
v = NCDatasets.defVar(nc, "rhoa", Float32, ("time", "x", "y", "z"))
outarray = Array(netcdf_writer.remappers["rhoa"]._interpolated_values)
v[1, :, :, :] = outarray

timings_ncdataset[device] = @benchmark $add_nc($nc, $outarray)
end

for device in keys(timings)
println("DEVICE: ", device)
println("Our writer")
show(stdout, MIME"text/plain"(), timings[device])
println()
println("NCDatasets")
show(stdout, MIME"text/plain"(), timings_ncdataset[device])
println()
end
75 changes: 61 additions & 14 deletions src/diagnostics/netcdf_writer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ and `domain` (e.g., `ClimaCore.Geometry.LatLongPoint`s).
"""
function hcoords_from_horizontal_space(space, domain, hpts) end

struct NetCDFWriter{T, TS}
struct NetCDFWriter{T, TS, DI}

# TODO: At the moment, each variable gets its remapper. This is a little bit of a waste
# because we probably only need a handful of remappers since the same remapper can be
Expand Down Expand Up @@ -488,6 +488,10 @@ struct NetCDFWriter{T, TS}
# When disable_vertical_interpolation is true, the num_points on the vertical direction
# is ignored.
disable_vertical_interpolation::Bool

# Areas of memory preallocated where the interpolation output is saved. Only the root
# process uses this
preallocated_output_arrays::DI
end

"""
Expand Down Expand Up @@ -565,30 +569,30 @@ function NetCDFWriter(;
interpolated_physical_z =
interpolate(remapper, Fields.coordinate_field(space).z)

return NetCDFWriter{typeof(num_points), typeof(interpolated_physical_z)}(
preallocated_arrays =
ClimaComms.iamroot(ClimaComms.context(space)) ? Dict{String, Array}() :
Dict{String, Nothing}()

return NetCDFWriter{
typeof(num_points),
typeof(interpolated_physical_z),
typeof(preallocated_arrays),
}(
Dict{String, Remapper}(),
num_points,
compression_level,
interpolated_physical_z,
Dict{String, NCDatasets.NCDataset}(),
disable_vertical_interpolation,
preallocated_arrays,
)
end

function write_field!(
writer::NetCDFWriter,
field,
diagnostic,
u,
p,
t,
output_dir,
)
function interpolate_field!(writer::NetCDFWriter, field, diagnostic, u, p, t)

var = diagnostic.variable

space = axes(field)
FT = Spaces.undertype(space)

horizontal_space = Spaces.horizontal_space(space)

Expand Down Expand Up @@ -644,10 +648,38 @@ function write_field!(

# Now we can interpolate onto the target points
# There's an MPI call in here (to aggregate the results)
interpolated_field = interpolate(remapper, field)
#
# The first time we call this, we call interpolate and allocate a new array.
# Future calls are in-place
if haskey(writer.preallocated_output_arrays, var.short_name)
interpolate!(
writer.preallocated_output_arrays[var.short_name],
remapper,
field,
)
else
writer.preallocated_output_arrays[var.short_name] =
interpolate(remapper, field)
end
return nothing
end

function save_diagnostic_to_disk!(
writer::NetCDFWriter,
field,
diagnostic,
u,
p,
t,
output_dir,
)
# Only the root process has to write
ClimaComms.iamroot(ClimaComms.context(field)) || return
ClimaComms.iamroot(ClimaComms.context(field)) || return nothing

var = diagnostic.variable
interpolated_field = writer.preallocated_output_arrays[var.short_name]
space = axes(field)
FT = Spaces.undertype(space)

output_path = joinpath(output_dir, "$(diagnostic.output_short_name).nc")

Expand Down Expand Up @@ -709,4 +741,19 @@ function write_field!(

# Write data to disk
NCDatasets.sync(writer.open_files[output_path])
return nothing
end

function write_field!(
writer::NetCDFWriter,
field,
diagnostic,
u,
p,
t,
output_dir,
)
interpolate_field!(writer, field, diagnostic, u, p, t)
save_diagnostic_to_disk!(writer, field, diagnostic, u, p, t, output_dir)
return nothing
end
2 changes: 1 addition & 1 deletion src/diagnostics/writers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# This file defines generic writers for diagnostics with opinionated defaults.

import ClimaCore: Hypsography
import ClimaCore.Remapping: Remapper, interpolate, interpolate_array
import ClimaCore.Remapping: Remapper, interpolate, interpolate!

import NCDatasets

Expand Down

0 comments on commit 8371841

Please sign in to comment.