Skip to content

Commit

Permalink
Merge pull request #575 from LuxDL/ap/ad_benchmarks
Browse files Browse the repository at this point in the history
More Continuous Benchmarks
  • Loading branch information
avik-pal authored Apr 6, 2024
2 parents 849ec40 + 8c83975 commit 80e3475
Show file tree
Hide file tree
Showing 10 changed files with 269 additions and 71 deletions.
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Lux"
uuid = "b2108857-7c20-44ae-9111-449ecde12c47"
authors = ["Avik Pal <avikpal@mit.edu> and contributors"]
version = "0.5.32"
version = "0.5.33"

[deps]
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
Expand Down Expand Up @@ -70,7 +70,7 @@ LuxAMDGPU = "0.2.2"
LuxCUDA = "0.3.2"
LuxCore = "0.1.12"
LuxDeviceUtils = "0.1.16"
LuxLib = "0.3.10"
LuxLib = "0.3.11"
LuxTestUtils = "0.1.15"
MacroTools = "0.5.13"
Markdown = "1.10"
Expand Down
9 changes: 9 additions & 0 deletions bench/Project.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
[deps]
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
FastClosures = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
SimpleChains = "de6bee2f-e2f4-4ec7-b6ed-219cc6f6e9e5"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Tapir = "07d77754-e150-4737-8c94-cd238a1fb45b"
Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
122 changes: 117 additions & 5 deletions bench/helpers.jl
Original file line number Diff line number Diff line change
@@ -1,19 +1,131 @@
# TODO: Special Handling for GPU Arrays with @sync
function benchmark_forward_pass(tag::String, end_tag::String, model, x, ps_nt::NamedTuple,
st)
function benchmark_forward_pass(
tag::String, end_tag::String, model, x_dims; simple_chains=nothing,
flux_model=nothing)
SUITE[tag]["cpu"]["forward"]["NamedTuple"][end_tag] = @benchmarkable Lux.apply(
$model, $x, $ps_nt, $st)
$model, x, ps_nt, st) setup=((x, ps_nt, st) = general_setup($model, $x_dims))

ps_ca = ComponentArray(ps_nt)
SUITE[tag]["cpu"]["forward"]["ComponentArray"][end_tag] = @benchmarkable Lux.apply(
$model, $x, $ps_ca, $st)
$model, x, ps_ca, st) setup=((x, ps_nt, st) = general_setup($model, $x_dims); ps_ca = ComponentArray(ps_nt))

if simple_chains !== nothing
simple_chains_model = simple_chains(model)
SUITE[tag]["cpu"]["forward"]["SimpleChains"][end_tag] = @benchmarkable Lux.apply(
$simple_chains_model, x, ps_simple_chains, st_simple_chains) setup=((x, ps_simple_chains, st_simple_chains) = general_setup(
$simple_chains_model, $x_dims))
end

if flux_model !== nothing
SUITE[tag]["cpu"]["forward"]["Flux"][end_tag] = @benchmarkable fmodel(x) setup=(x = randn(
StableRNG(0), Float32, $x_dims);
fmodel = $(flux_model()))
end

return
end

function benchmark_reverse_pass(
tag::String, end_tag::String, backends, model, x_dims;
simple_chains=nothing, flux_model=nothing)
for backend in backends
__benchmark_reverse_pass(tag, end_tag, backend, model, x_dims)
end

if simple_chains !== nothing
simple_chains_model = simple_chains(model)
__benchmark_reverse_pass_simple_chains(
tag, end_tag, AutoZygote(), simple_chains_model, x_dims)
end

if flux_model !== nothing
__benchmark_reverse_pass_flux(tag, end_tag, AutoZygote(), flux_model, x_dims)
end

return
end

function general_setup(model, x_dims)
rng = StableRNG(0)
ps, st = Lux.setup(rng, model)
x_dims === nothing && return ps, st
x = randn(rng, Float32, x_dims)
return x, ps, st
end

# TODO: Remove these once DifferentiationInterface has been released
function __benchmark_reverse_pass(
tag::String, end_tag::String, ::AutoEnzyme, model, x_dims)
# TODO: Enable this. But enzyme doesn't handle closures well it seems...
# SUITE[tag]["cpu"]["reverse"]["Enzyme"][end_tag] = @benchmarkable Enzyme.gradient(
# $Enzyme.Reverse, $f, $x)
return error("Enzyme backend hasn't been implemented yet.")
end
function __benchmark_reverse_pass(
tag::String, end_tag::String, ::AutoTapir, model, x_dims)
SUITE[tag]["cpu"]["reverse"]["Tapir"][end_tag] = @benchmarkable Tapir.value_and_pullback!!(
trrule, 1.0f0, f, ps_ca) setup=begin
(x, ps, st) = general_setup($model, $x_dims)
ps_ca = ComponentArray(ps)
f = @closure(p->sum(abs2, first(Lux.apply($model, x, p, st))))
trrule = Tapir.build_rrule(f, ps_ca)
end
return
end
function __benchmark_reverse_pass(
tag::String, end_tag::String, ::AutoTracker, model, x_dims)
SUITE[tag]["cpu"]["reverse"]["Tracker"][end_tag] = @benchmarkable Tracker.gradient(
f, ps_ca) setup=begin
(x, ps, st) = general_setup($model, $x_dims)
ps_ca = ComponentArray(ps)
f = @closure(p->sum(abs2, first(Lux.apply($model, x, p, st))))
end
return
end
function __benchmark_reverse_pass(
tag::String, end_tag::String, ad::AutoReverseDiff, model, x_dims)
if ad.compile
SUITE[tag]["cpu"]["reverse"]["ReverseDiff (compiled)"][end_tag] = @benchmarkable ReverseDiff.gradient!(
∂ps, tape, ps_ca) setup=begin
(x, ps, st) = general_setup($model, $x_dims)
ps_ca = ComponentArray(ps)
∂ps = similar(ps_ca)
f = @closure(p->sum(abs2, first(Lux.apply($model, x, p, st))))
tape = ReverseDiff.compile(ReverseDiff.GradientTape(f, ps_ca))
end
else
SUITE[tag]["cpu"]["reverse"]["ReverseDiff"][end_tag] = @benchmarkable ReverseDiff.gradient(
f, ps_ca) setup=begin
(x, ps, st) = general_setup($model, $x_dims)
ps_ca = ComponentArray(ps)
f = @closure(p->sum(abs2, first(Lux.apply($model, x, p, st))))
end
end
end
function __benchmark_reverse_pass(tag::String, end_tag::String, ::AutoZygote, model, x_dims)
SUITE[tag]["cpu"]["reverse"]["Zygote"][end_tag] = @benchmarkable Zygote.gradient(
f, ps_ca) setup=begin
(x, ps, st) = general_setup($model, $x_dims)
ps_ca = ComponentArray(ps)
f = @closure(p->sum(abs2, first(Lux.apply($model, x, p, st))))
end
return
end
function __benchmark_reverse_pass_simple_chains(
tag::String, end_tag::String, ::AutoZygote, model, x_dims)
SUITE[tag]["cpu"]["reverse"]["SimpleChains"][end_tag] = @benchmarkable Zygote.gradient(
f, ps) setup=begin
(x, ps, st) = general_setup($model, $x_dims)
f = @closure(p->sum(abs2, first(Lux.apply($model, x, p, st))))
end
return
end
function __benchmark_reverse_pass_flux(
tag::String, end_tag::String, ::AutoZygote, model, x_dims)
SUITE[tag]["cpu"]["reverse"]["Flux"][end_tag] = @benchmarkable Zygote.gradient(
f, m) setup=begin
x = randn(StableRNG(0), Float32, $x_dims)
m = $(model)()
f = @closure(m->sum(abs2, m(x)))
end
return
end
22 changes: 17 additions & 5 deletions bench/layers.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
function add_dense_benchmarks!()
for n in (2, 20, 200, 2000)
layer = Dense(n => n)
x, ps, st = general_setup(layer, (n, 128))
benchmark_forward_pass("Dense($n => $n)", "($n, 128)", layer, x, ps, st)
simple_chains = n 200 ? Lux.ToSimpleChainsAdaptor((static(n),)) : nothing
flux_model = () -> Flux.Dense(n => n)
benchmark_forward_pass(
"Dense($n => $n)", "($n, 128)", layer, (n, 128); simple_chains, flux_model)
benchmark_reverse_pass(
"Dense($n => $n)", "($n, 128)",
(AutoTracker(), AutoReverseDiff(), AutoReverseDiff(true), AutoZygote()),
layer, (n, 128); simple_chains, flux_model)
end

return
Expand All @@ -11,9 +17,15 @@ end
function add_conv_benchmarks!()
for ch in (1, 3, 16, 64)
layer = Conv((3, 3), ch => ch)
x, ps, st = general_setup(layer, (64, 64, ch, 128))
benchmark_forward_pass(
"Conv((3, 3), $ch => $ch)", "(64, 64, $ch, 128)", layer, x, ps, st)
simple_chains = ch 16 ?
Lux.ToSimpleChainsAdaptor((static(64), static(64), static(ch))) :
nothing
flux_model = () -> Flux.Conv((3, 3), ch => ch)
benchmark_forward_pass("Conv((3, 3), $ch => $ch)", "(64, 64, $ch, 128)",
layer, (64, 64, ch, 128); simple_chains, flux_model)
benchmark_reverse_pass("Conv((3, 3), $ch => $ch)", "(64, 64, $ch, 128)",
(AutoTracker(), AutoReverseDiff(), AutoReverseDiff(true), AutoZygote()),
layer, (64, 64, ch, 128); simple_chains, flux_model)
end
end

Expand Down
20 changes: 19 additions & 1 deletion bench/runbenchmarks.jl
Original file line number Diff line number Diff line change
@@ -1,11 +1,28 @@
using ADTypes: ADTypes, AutoEnzyme, AutoTracker, AutoReverseDiff, AutoZygote
using BenchmarkTools: BenchmarkTools, BenchmarkGroup, @btime, @benchmarkable
using ComponentArrays: ComponentArray
using InteractiveUtils: versioninfo
using FastClosures: @closure
using Flux: Flux
using Lux: Lux, BatchNorm, Chain, Conv, Dense, Dropout, FlattenLayer, MaxPool
using NNlib: relu
using SimpleChains: SimpleChains, static
using StableRNGs: StableRNG
using Statistics: median

# AD Backends
using Enzyme: Enzyme
using ReverseDiff: ReverseDiff
using Tapir: Tapir
using Tracker: Tracker
using Zygote: Zygote

# BenchmarkTools Parameters
BenchmarkTools.DEFAULT_PARAMETERS.samples = 100
BenchmarkTools.DEFAULT_PARAMETERS.seconds = 0.25

struct AutoTapir <: ADTypes.AbstractReverseMode end

@info sprint(versioninfo)

const SUITE = BenchmarkGroup()
Expand All @@ -14,7 +31,8 @@ include("helpers.jl")
include("vgg.jl")
include("layers.jl")

BenchmarkTools.tune!(SUITE)
BenchmarkTools.tune!(SUITE; verbose=true)
results = BenchmarkTools.run(SUITE; verbose=true)
display(median(results))

BenchmarkTools.save(joinpath(@__DIR__, "benchmark_results.json"), median(results))
28 changes: 26 additions & 2 deletions bench/vgg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,33 @@ function add_vgg_benchmarks!()
BatchNorm(512), MaxPool((2, 2)), FlattenLayer(), Dense(512, 4096, relu),
Dropout(0.5), Dense(4096, 4096, relu), Dropout(0.5), Dense(4096, 10))

flux_model = () -> Flux.Chain(
Flux.Conv((3, 3), 3 => 64, relu; pad=(1, 1), stride=(1, 1)),
Flux.BatchNorm(64), Flux.Conv((3, 3), 64 => 64, relu; pad=(1, 1), stride=(1, 1)),
Flux.BatchNorm(64), Flux.MaxPool((2, 2)),
Flux.Conv((3, 3), 64 => 128, relu; pad=(1, 1), stride=(1, 1)), Flux.BatchNorm(128),
Flux.Conv((3, 3), 128 => 128, relu; pad=(1, 1), stride=(1, 1)), Flux.BatchNorm(128),
Flux.MaxPool((2, 2)),
Flux.Conv((3, 3), 128 => 256, relu; pad=(1, 1), stride=(1, 1)), Flux.BatchNorm(256),
Flux.Conv((3, 3), 256 => 256, relu; pad=(1, 1), stride=(1, 1)), Flux.BatchNorm(256),
Flux.Conv((3, 3), 256 => 256, relu; pad=(1, 1), stride=(1, 1)), Flux.BatchNorm(256),
Flux.MaxPool((2, 2)),
Flux.Conv((3, 3), 256 => 512, relu; pad=(1, 1), stride=(1, 1)), Flux.BatchNorm(512),
Flux.Conv((3, 3), 512 => 512, relu; pad=(1, 1), stride=(1, 1)), Flux.BatchNorm(512),
Flux.Conv((3, 3), 512 => 512, relu; pad=(1, 1), stride=(1, 1)), Flux.BatchNorm(512),
Flux.MaxPool((2, 2)),
Flux.Conv((3, 3), 512 => 512, relu; pad=(1, 1), stride=(1, 1)), Flux.BatchNorm(512),
Flux.Conv((3, 3), 512 => 512, relu; pad=(1, 1), stride=(1, 1)), Flux.BatchNorm(512),
Flux.Conv((3, 3), 512 => 512, relu; pad=(1, 1), stride=(1, 1)), Flux.BatchNorm(512),
Flux.MaxPool((2, 2)), Flux.flatten, Flux.Dense(512, 4096, relu), Flux.Dropout(0.5),
Flux.Dense(4096, 4096, relu), Flux.Dropout(0.5), Flux.Dense(4096, 10))

for bsize in (1, 16, 64)
x, ps, st = general_setup(vgg16, (32, 32, 3, bsize))
benchmark_forward_pass("vgg16", "(32, 32, 3, $bsize)", vgg16, x, ps, st)
benchmark_forward_pass(
"vgg16", "(32, 32, 3, $bsize)", vgg16, (32, 32, 3, bsize); flux_model)
benchmark_reverse_pass(
"vgg16", "(32, 32, 3, $bsize)", (AutoTracker(), AutoZygote()),
vgg16, (32, 32, 3, bsize); flux_model)
end

return
Expand Down
12 changes: 6 additions & 6 deletions docs/src/.vitepress/config.mts
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ export default defineConfig({
},
nav: [
{ text: 'Home', link: '/' },
{ text: 'Getting Started', link: '/introduction/index' },
{ text: 'Getting Started', link: '/introduction' },
{ text: 'Benchmarks', link: 'https://lux.csail.mit.edu/benchmarks/' },
{ text: 'Tutorials', link: '/tutorials/index' },
{ text: 'Tutorials', link: '/tutorials' },
{ text: 'Manual', link: '/manual/interface' },
{
text: 'API', items: [
Expand Down Expand Up @@ -104,22 +104,22 @@ export default defineConfig({
},
{
text: 'Versions', items: [
{ text: 'Stable', link: 'https://lux.csail.mit.edu/stable/' },
{ text: 'Dev', link: 'https://lux.csail.mit.edu/dev/' }
{ text: 'Stable', link: 'https://lux.csail.mit.edu/stable' },
{ text: 'Dev', link: 'https://lux.csail.mit.edu/dev' }
]
}
],
sidebar: {
"/introduction/": {
text: 'Getting Started', collapsed: false, items: [
{ text: 'Introduction', link: '/introduction/index' },
{ text: 'Introduction', link: '/introduction' },
{ text: 'Overview', link: '/introduction/overview' },
{ text: 'Resources', link: '/introduction/resources' },
{ text: 'Citation', link: '/introduction/citation' }]
},
"/tutorials/": {
text: 'Tutorials', collapsed: false, items: [
{ text: 'Overview', link: '/tutorials/index' },
{ text: 'Overview', link: '/tutorials' },
{
text: 'Beginner', collapsed: false, items: [
{ text: 'Julia & Lux for the Uninitiated', link: '/tutorials/beginner/1_Basics' },
Expand Down
4 changes: 2 additions & 2 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ hero:
actions:
- theme: brand
text: Tutorials
link: /tutorials/
link: /tutorials
- theme: alt
text: Ecosystem
link: /ecosystem
Expand All @@ -28,7 +28,7 @@ features:
- icon: 🚀
title: Fast & Extendible
details: Lux.jl is written in Julia itself, making it extremely extendible. CUDA and AMDGPU are supported first-class, with experimental support for Metal Hardware.
link: /introduction/
link: /introduction
- icon: 🧑‍🔬
title: SciML ❤️ Lux
Expand Down
5 changes: 5 additions & 0 deletions examples/SimpleChains/main.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ function train(model; rng=Xoshiro(0), kwargs...)
train_state = Lux.Experimental.TrainState(
rng, model, Adam(3.0f-4); transform_variables=identity)

### Warmup the model
x_proto = randn(rng, Float32, 28, 28, 1, 1)
y_proto = onehotbatch([1], 0:9)
Lux.Experimental.compute_gradients(AutoZygote(), loss, (x_proto, y_proto), train_state)

### Lets train the model
nepochs = 10
for epoch in 1:nepochs
Expand Down
Loading

1 comment on commit 80e3475

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark Results

Benchmark suite Current: 80e3475 Previous: 849ec40 Ratio
Dense(2 => 2)/cpu/reverse/ReverseDiff (compiled)/(2, 128) 3267.375 ns
Dense(2 => 2)/cpu/reverse/Zygote/(2, 128) 9518 ns
Dense(2 => 2)/cpu/reverse/Tracker/(2, 128) 16922 ns
Dense(2 => 2)/cpu/reverse/ReverseDiff/(2, 128) 4762.5 ns
Dense(2 => 2)/cpu/reverse/Flux/(2, 128) 7057.2 ns
Dense(2 => 2)/cpu/reverse/SimpleChains/(2, 128) 1712.7 ns
Dense(2 => 2)/cpu/forward/NamedTuple/(2, 128) 1091 ns 2001.7 ns 0.55
Dense(2 => 2)/cpu/forward/ComponentArray/(2, 128) 701.922077922078 ns 1590.4709677419355 ns 0.44
Dense(2 => 2)/cpu/forward/Flux/(2, 128) 1308.4 ns
Dense(2 => 2)/cpu/forward/SimpleChains/(2, 128) 179.44851904090268 ns
Dense(20 => 20)/cpu/reverse/ReverseDiff (compiled)/(20, 128) 17643 ns
Dense(20 => 20)/cpu/reverse/Zygote/(20, 128) 25061.5 ns
Dense(20 => 20)/cpu/reverse/Tracker/(20, 128) 37245 ns
Dense(20 => 20)/cpu/reverse/ReverseDiff/(20, 128) 23073 ns
Dense(20 => 20)/cpu/reverse/Flux/(20, 128) 21485 ns
Dense(20 => 20)/cpu/reverse/SimpleChains/(20, 128) 13360 ns
Dense(20 => 20)/cpu/forward/NamedTuple/(20, 128) 5101.714285714286 ns 4869.142857142857 ns 1.05
Dense(20 => 20)/cpu/forward/ComponentArray/(20, 128) 5107.428571428572 ns 4718.857142857143 ns 1.08
Dense(20 => 20)/cpu/forward/Flux/(20, 128) 5200.428571428571 ns
Dense(20 => 20)/cpu/forward/SimpleChains/(20, 128) 1682.65 ns
Conv((3, 3), 3 => 3)/cpu/reverse/ReverseDiff (compiled)/(64, 64, 3, 128) 39284500.5 ns
Conv((3, 3), 3 => 3)/cpu/reverse/Zygote/(64, 64, 3, 128) 91429963.5 ns
Conv((3, 3), 3 => 3)/cpu/reverse/Tracker/(64, 64, 3, 128) 92389439.5 ns
Conv((3, 3), 3 => 3)/cpu/reverse/ReverseDiff/(64, 64, 3, 128) 39691452 ns
Conv((3, 3), 3 => 3)/cpu/reverse/Flux/(64, 64, 3, 128) 89786916.5 ns
Conv((3, 3), 3 => 3)/cpu/reverse/SimpleChains/(64, 64, 3, 128) 12259593 ns
Conv((3, 3), 3 => 3)/cpu/forward/NamedTuple/(64, 64, 3, 128) 10225995 ns 10357033 ns 0.99
Conv((3, 3), 3 => 3)/cpu/forward/ComponentArray/(64, 64, 3, 128) 10142638 ns 10383988.5 ns 0.98
Conv((3, 3), 3 => 3)/cpu/forward/Flux/(64, 64, 3, 128) 10137133.5 ns
Conv((3, 3), 3 => 3)/cpu/forward/SimpleChains/(64, 64, 3, 128) 6423977 ns
vgg16/cpu/reverse/Zygote/(32, 32, 3, 1) 4365894701 ns
vgg16/cpu/reverse/Zygote/(32, 32, 3, 16) 5118048782 ns
vgg16/cpu/reverse/Zygote/(32, 32, 3, 64) 15719820981 ns
vgg16/cpu/reverse/Tracker/(32, 32, 3, 1) 1394532184 ns
vgg16/cpu/reverse/Tracker/(32, 32, 3, 16) 2737271241 ns
vgg16/cpu/reverse/Tracker/(32, 32, 3, 64) 5392134495 ns
vgg16/cpu/reverse/Flux/(32, 32, 3, 1) 86035309 ns
vgg16/cpu/reverse/Flux/(32, 32, 3, 16) 750690104 ns
vgg16/cpu/reverse/Flux/(32, 32, 3, 64) 3106551216 ns
vgg16/cpu/forward/NamedTuple/(32, 32, 3, 1) 23365059.5 ns 22706894 ns 1.03
vgg16/cpu/forward/NamedTuple/(32, 32, 3, 16) 248196871 ns 251415835 ns 0.99
vgg16/cpu/forward/NamedTuple/(32, 32, 3, 64) 1003838362 ns 990621987 ns 1.01
vgg16/cpu/forward/ComponentArray/(32, 32, 3, 1) 23649034 ns 22766979.5 ns 1.04
vgg16/cpu/forward/ComponentArray/(32, 32, 3, 16) 248954800 ns 249637390 ns 1.00
vgg16/cpu/forward/ComponentArray/(32, 32, 3, 64) 1074000658 ns 991675056.5 ns 1.08
vgg16/cpu/forward/Flux/(32, 32, 3, 1) 22160375 ns
vgg16/cpu/forward/Flux/(32, 32, 3, 16) 247762136 ns
vgg16/cpu/forward/Flux/(32, 32, 3, 64) 895437036 ns
Conv((3, 3), 64 => 64)/cpu/reverse/ReverseDiff (compiled)/(64, 64, 64, 128) 1052351659 ns
Conv((3, 3), 64 => 64)/cpu/reverse/Zygote/(64, 64, 64, 128) 2300391794 ns
Conv((3, 3), 64 => 64)/cpu/reverse/Tracker/(64, 64, 64, 128) 2730677981 ns
Conv((3, 3), 64 => 64)/cpu/reverse/ReverseDiff/(64, 64, 64, 128) 1401783862 ns
Conv((3, 3), 64 => 64)/cpu/reverse/Flux/(64, 64, 64, 128) 2328019747 ns
Conv((3, 3), 64 => 64)/cpu/forward/NamedTuple/(64, 64, 64, 128) 375448343 ns 371776615 ns 1.01
Conv((3, 3), 64 => 64)/cpu/forward/ComponentArray/(64, 64, 64, 128) 374614390 ns 372349398 ns 1.01
Conv((3, 3), 64 => 64)/cpu/forward/Flux/(64, 64, 64, 128) 375734589 ns
Conv((3, 3), 1 => 1)/cpu/reverse/ReverseDiff (compiled)/(64, 64, 1, 128) 13597474 ns
Conv((3, 3), 1 => 1)/cpu/reverse/Zygote/(64, 64, 1, 128) 30506392 ns
Conv((3, 3), 1 => 1)/cpu/reverse/Tracker/(64, 64, 1, 128) 31258806 ns
Conv((3, 3), 1 => 1)/cpu/reverse/ReverseDiff/(64, 64, 1, 128) 13877995 ns
Conv((3, 3), 1 => 1)/cpu/reverse/Flux/(64, 64, 1, 128) 30495395 ns
Conv((3, 3), 1 => 1)/cpu/reverse/SimpleChains/(64, 64, 1, 128) 1149994 ns
Conv((3, 3), 1 => 1)/cpu/forward/NamedTuple/(64, 64, 1, 128) 3880661 ns 3900400 ns 0.99
Conv((3, 3), 1 => 1)/cpu/forward/ComponentArray/(64, 64, 1, 128) 3880529 ns 3899295 ns 1.00
Conv((3, 3), 1 => 1)/cpu/forward/Flux/(64, 64, 1, 128) 3876882 ns
Conv((3, 3), 1 => 1)/cpu/forward/SimpleChains/(64, 64, 1, 128) 195006 ns
Dense(200 => 200)/cpu/reverse/ReverseDiff (compiled)/(200, 128) 297678 ns
Dense(200 => 200)/cpu/reverse/Zygote/(200, 128) 315185.5 ns
Dense(200 => 200)/cpu/reverse/Tracker/(200, 128) 397685 ns
Dense(200 => 200)/cpu/reverse/ReverseDiff/(200, 128) 344595.5 ns
Dense(200 => 200)/cpu/reverse/Flux/(200, 128) 471604 ns
Dense(200 => 200)/cpu/reverse/SimpleChains/(200, 128) 395982 ns
Dense(200 => 200)/cpu/forward/NamedTuple/(200, 128) 91626.5 ns 87314 ns 1.05
Dense(200 => 200)/cpu/forward/ComponentArray/(200, 128) 95404 ns 87424 ns 1.09
Dense(200 => 200)/cpu/forward/Flux/(200, 128) 87344 ns
Dense(200 => 200)/cpu/forward/SimpleChains/(200, 128) 104776.5 ns
Conv((3, 3), 16 => 16)/cpu/reverse/ReverseDiff (compiled)/(64, 64, 16, 128) 191759064 ns
Conv((3, 3), 16 => 16)/cpu/reverse/Zygote/(64, 64, 16, 128) 511408215 ns
Conv((3, 3), 16 => 16)/cpu/reverse/Tracker/(64, 64, 16, 128) 523965339 ns
Conv((3, 3), 16 => 16)/cpu/reverse/ReverseDiff/(64, 64, 16, 128) 212766749 ns
Conv((3, 3), 16 => 16)/cpu/reverse/Flux/(64, 64, 16, 128) 486845887 ns
Conv((3, 3), 16 => 16)/cpu/reverse/SimpleChains/(64, 64, 16, 128) 320740323 ns
Conv((3, 3), 16 => 16)/cpu/forward/NamedTuple/(64, 64, 16, 128) 64118671 ns 56912462 ns 1.13
Conv((3, 3), 16 => 16)/cpu/forward/ComponentArray/(64, 64, 16, 128) 64797793 ns 56953662 ns 1.14
Conv((3, 3), 16 => 16)/cpu/forward/Flux/(64, 64, 16, 128) 64228406 ns
Conv((3, 3), 16 => 16)/cpu/forward/SimpleChains/(64, 64, 16, 128) 29516170.5 ns
Dense(2000 => 2000)/cpu/reverse/ReverseDiff (compiled)/(2000, 128) 19828112 ns
Dense(2000 => 2000)/cpu/reverse/Zygote/(2000, 128) 25262569 ns
Dense(2000 => 2000)/cpu/reverse/Tracker/(2000, 128) 34145184 ns
Dense(2000 => 2000)/cpu/reverse/ReverseDiff/(2000, 128) 22848976.5 ns
Dense(2000 => 2000)/cpu/reverse/Flux/(2000, 128) 19974956 ns
Dense(2000 => 2000)/cpu/forward/NamedTuple/(2000, 128) 6640955 ns 6515785 ns 1.02
Dense(2000 => 2000)/cpu/forward/ComponentArray/(2000, 128) 6637819 ns 6560088 ns 1.01
Dense(2000 => 2000)/cpu/forward/Flux/(2000, 128) 6568209 ns

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.