diff --git a/.github/workflows/Benchmark.yml b/.github/workflows/Benchmark.yml new file mode 100644 index 000000000..17a36b09f --- /dev/null +++ b/.github/workflows/Benchmark.yml @@ -0,0 +1,55 @@ +name: Benchmarks +on: + push: + branches: + - main + pull_request: + branches: + - main + +concurrency: + # Skip intermediate builds: always. + # Cancel intermediate builds: only if it is a pull request build. + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} + +jobs: + benchmark: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v1 + with: + version: '1' + arch: x64 + - uses: actions/cache@v4 + env: + cache-name: cache-artifacts + with: + path: ~/.julia/artifacts + key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} + restore-keys: | + ${{ runner.os }}-test-${{ env.cache-name }}- + ${{ runner.os }}-test- + ${{ runner.os }}- + - name: Run benchmark + run: | + cd bench + julia --project --color=yes -e ' + using Pkg; + Pkg.develop(PackageSpec(path=joinpath(pwd(), ".."))); + Pkg.instantiate(); + include("runbenchmarks.jl")' + - name: Parse & Upload Benchmark Results + uses: benchmark-action/github-action-benchmark@v1 + with: + name: Benchmark Results + tool: 'julia' + output-file-path: bench/benchmark_results.json + summary-always: true + github-token: ${{ secrets.GITHUB_TOKEN }} + comment-always: true + alert-threshold: "200%" + fail-on-alert: true + benchmark-data-dir-path: benchmarks + auto-push: ${{ github.event_name != 'pull_request' }} diff --git a/.gitignore b/.gitignore index 8590783ac..b249f6420 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,5 @@ docs/src/tutorials/beginner docs/src/tutorials/intermediate docs/src/tutorials/advanced *.log + +bench/benchmark_results.json diff --git a/bench/.JuliaFormatter.toml b/bench/.JuliaFormatter.toml new file mode 100644 index 000000000..3d6dde2cb --- /dev/null +++ b/bench/.JuliaFormatter.toml @@ -0,0 +1,9 @@ +style = "sciml" +whitespace_in_kwargs = false +always_use_return = true +margin = 92 +indent = 4 +format_docstrings = true +separate_kwargs_with_semicolon = true +always_for_in = true +annotate_untyped_fields_with_any = false diff --git a/bench/Project.toml b/bench/Project.toml new file mode 100644 index 000000000..0c5972386 --- /dev/null +++ b/bench/Project.toml @@ -0,0 +1,8 @@ +[deps] +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" +Lux = "b2108857-7c20-44ae-9111-449ecde12c47" +NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/bench/README.md b/bench/README.md new file mode 100644 index 000000000..8f5380fc5 --- /dev/null +++ b/bench/README.md @@ -0,0 +1,12 @@ +# Lux.jl Continuous Benchmarking + +Currently we use the BenchmarkTools.jl package to benchmark the performance of Lux.jl over +time. + +This is built using https://github.com/benchmark-action/github-action-benchmark/ so it +allows for nice visualizations of the benchmark results in github pages and produces +warnings on PRs if the benchmarks regress. + +## Current Benchmarks + +1. Small VGG Net for CIFAR-10 diff --git a/bench/helpers.jl b/bench/helpers.jl new file mode 100644 index 000000000..d60b9e8ae --- /dev/null +++ b/bench/helpers.jl @@ -0,0 +1,17 @@ +# TODO: Special Handling for GPU Arrays with @sync +function benchmark_forward_pass(tag::String, model, x, ps, st) + SUITE[tag]["forward"]["default"] = @benchmarkable Lux.apply($model, $x, $ps, $st) + + ps_ca = ComponentArray(ps) + SUITE[tag]["forward"]["ComponentArray"] = @benchmarkable Lux.apply( + $model, $x, $ps_ca, $st) + + return +end + +function general_setup(model, x_dims) + rng = StableRNG(0) + ps, st = Lux.setup(rng, model) + x = randn(rng, Float32, x_dims) + return x, ps, st +end diff --git a/bench/runbenchmarks.jl b/bench/runbenchmarks.jl new file mode 100644 index 000000000..5f76d244c --- /dev/null +++ b/bench/runbenchmarks.jl @@ -0,0 +1,16 @@ +using BenchmarkTools: BenchmarkTools, BenchmarkGroup, @btime, @benchmarkable +using ComponentArrays: ComponentArray +using Lux: Lux, BatchNorm, Chain, Conv, Dense, Dropout, FlattenLayer, MaxPool +using NNlib: relu +using StableRNGs: StableRNG +using Statistics: median + +const SUITE = BenchmarkGroup() + +include("helpers.jl") +include("vgg.jl") + +BenchmarkTools.tune!(SUITE) +results = BenchmarkTools.run(SUITE; verbose=true) + +BenchmarkTools.save(joinpath(@__DIR__, "benchmark_results.json"), median(results)) diff --git a/bench/vgg.jl b/bench/vgg.jl new file mode 100644 index 000000000..e12f1f9d9 --- /dev/null +++ b/bench/vgg.jl @@ -0,0 +1,32 @@ +function add_vgg_benchmarks() + vgg16 = Chain(Conv((3, 3), 3 => 64, relu; pad=(1, 1), stride=(1, 1)), BatchNorm(64), + Conv((3, 3), 64 => 64, relu; pad=(1, 1), stride=(1, 1)), BatchNorm(64), + MaxPool((2, 2)), Conv((3, 3), 64 => 128, relu; pad=(1, 1), stride=(1, 1)), + BatchNorm(128), Conv((3, 3), 128 => 128, relu; pad=(1, 1), stride=(1, 1)), + BatchNorm(128), MaxPool((2, 2)), + Conv((3, 3), 128 => 256, relu; pad=(1, 1), stride=(1, 1)), + BatchNorm(256), Conv((3, 3), 256 => 256, relu; pad=(1, 1), stride=(1, 1)), + BatchNorm(256), Conv((3, 3), 256 => 256, relu; pad=(1, 1), stride=(1, 1)), + BatchNorm(256), MaxPool((2, 2)), + Conv((3, 3), 256 => 512, relu; pad=(1, 1), stride=(1, 1)), BatchNorm(512), + Conv((3, 3), 512 => 512, relu; pad=(1, 1), stride=(1, 1)), BatchNorm(512), + Conv((3, 3), 512 => 512, relu; pad=(1, 1), stride=(1, 1)), BatchNorm(512), + MaxPool((2, 2)), Conv((3, 3), 512 => 512, relu; pad=(1, 1), stride=(1, 1)), + BatchNorm(512), Conv((3, 3), 512 => 512, relu; pad=(1, 1), stride=(1, 1)), + BatchNorm(512), Conv((3, 3), 512 => 512, relu; pad=(1, 1), stride=(1, 1)), + BatchNorm(512), MaxPool((2, 2)), FlattenLayer(), Dense(512, 4096, relu), + Dropout(0.5), Dense(4096, 4096, relu), Dropout(0.5), Dense(4096, 10)) + + x, ps, st = general_setup(vgg16, (32, 32, 3, 1)) + benchmark_forward_pass("vgg16 -- batchsize = 1", vgg16, x, ps, st) + + x, ps, st = general_setup(vgg16, (32, 32, 3, 16)) + benchmark_forward_pass("vgg16 -- batchsize = 16", vgg16, x, ps, st) + + x, ps, st = general_setup(vgg16, (32, 32, 3, 64)) + benchmark_forward_pass("vgg16 -- batchsize = 64", vgg16, x, ps, st) + + return +end + +add_vgg_benchmarks()