diff --git a/bench/Project.toml b/bench/Project.toml index 0c5972386..7c13eb117 100644 --- a/bench/Project.toml +++ b/bench/Project.toml @@ -1,6 +1,7 @@ [deps] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" +InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" Lux = "b2108857-7c20-44ae-9111-449ecde12c47" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/bench/helpers.jl b/bench/helpers.jl index d60b9e8ae..0deae669a 100644 --- a/bench/helpers.jl +++ b/bench/helpers.jl @@ -1,9 +1,11 @@ # TODO: Special Handling for GPU Arrays with @sync -function benchmark_forward_pass(tag::String, model, x, ps, st) - SUITE[tag]["forward"]["default"] = @benchmarkable Lux.apply($model, $x, $ps, $st) +function benchmark_forward_pass(tag::String, end_tag::String, model, x, ps_nt::NamedTuple, + st) + SUITE[tag]["cpu"]["forward"]["NamedTuple"][end_tag] = @benchmarkable Lux.apply( + $model, $x, $ps_nt, $st) - ps_ca = ComponentArray(ps) - SUITE[tag]["forward"]["ComponentArray"] = @benchmarkable Lux.apply( + ps_ca = ComponentArray(ps_nt) + SUITE[tag]["cpu"]["forward"]["ComponentArray"][end_tag] = @benchmarkable Lux.apply( $model, $x, $ps_ca, $st) return diff --git a/bench/layers.jl b/bench/layers.jl new file mode 100644 index 000000000..f9c51b1e3 --- /dev/null +++ b/bench/layers.jl @@ -0,0 +1,21 @@ +function add_dense_benchmarks!() + for n in (2, 20, 200, 2000) + layer = Dense(n => n) + x, ps, st = general_setup(layer, (n, 128)) + benchmark_forward_pass("Dense($n => $n)", "($n, 128)", layer, x, ps, st) + end + + return +end + +function add_conv_benchmarks!() + for ch in (1, 3, 16, 64) + layer = Conv((3, 3), ch => ch) + x, ps, st = general_setup(layer, (64, 64, ch, 128)) + benchmark_forward_pass( + "Conv((3, 3), $ch => $ch)", "(64, 64, $ch, 128)", layer, x, ps, st) + end +end + +add_dense_benchmarks!() +add_conv_benchmarks!() diff --git a/bench/runbenchmarks.jl b/bench/runbenchmarks.jl index 5f76d244c..256698338 100644 --- a/bench/runbenchmarks.jl +++ b/bench/runbenchmarks.jl @@ -1,14 +1,18 @@ using BenchmarkTools: BenchmarkTools, BenchmarkGroup, @btime, @benchmarkable using ComponentArrays: ComponentArray +using InteractiveUtils: versioninfo using Lux: Lux, BatchNorm, Chain, Conv, Dense, Dropout, FlattenLayer, MaxPool using NNlib: relu using StableRNGs: StableRNG using Statistics: median +@info sprint(versioninfo) + const SUITE = BenchmarkGroup() include("helpers.jl") include("vgg.jl") +include("layers.jl") BenchmarkTools.tune!(SUITE) results = BenchmarkTools.run(SUITE; verbose=true) diff --git a/bench/vgg.jl b/bench/vgg.jl index e12f1f9d9..1464ffd8b 100644 --- a/bench/vgg.jl +++ b/bench/vgg.jl @@ -1,4 +1,4 @@ -function add_vgg_benchmarks() +function add_vgg_benchmarks!() vgg16 = Chain(Conv((3, 3), 3 => 64, relu; pad=(1, 1), stride=(1, 1)), BatchNorm(64), Conv((3, 3), 64 => 64, relu; pad=(1, 1), stride=(1, 1)), BatchNorm(64), MaxPool((2, 2)), Conv((3, 3), 64 => 128, relu; pad=(1, 1), stride=(1, 1)), @@ -17,16 +17,12 @@ function add_vgg_benchmarks() BatchNorm(512), MaxPool((2, 2)), FlattenLayer(), Dense(512, 4096, relu), Dropout(0.5), Dense(4096, 4096, relu), Dropout(0.5), Dense(4096, 10)) - x, ps, st = general_setup(vgg16, (32, 32, 3, 1)) - benchmark_forward_pass("vgg16 -- batchsize = 1", vgg16, x, ps, st) - - x, ps, st = general_setup(vgg16, (32, 32, 3, 16)) - benchmark_forward_pass("vgg16 -- batchsize = 16", vgg16, x, ps, st) - - x, ps, st = general_setup(vgg16, (32, 32, 3, 64)) - benchmark_forward_pass("vgg16 -- batchsize = 64", vgg16, x, ps, st) + for bsize in (1, 16, 64) + x, ps, st = general_setup(vgg16, (32, 32, 3, bsize)) + benchmark_forward_pass("vgg16", "(32, 32, 3, $bsize)", vgg16, x, ps, st) + end return end -add_vgg_benchmarks() +add_vgg_benchmarks!()