From 83d37010341b2767f3354f2274564892a13e757c Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Fri, 8 Mar 2024 14:11:32 +0100 Subject: [PATCH] Add utilities that don't return the value (#27) * Add special cases * More complex backends * Fix benchmarks * More benchmarks * Reactivate JET test * Remove weird build folder * Don't import dangerous stuff before JET test * Re-skip JET * Wrong JET test * Better docstrings, more exhaustive tests, benchmark with NN layer * Add utilities that don't return the value * Structured benchmarks with dataframe export --- .gitignore | 2 + benchmark/Project.toml | 3 + benchmark/benchmarks.jl | 178 +++++++++++--------------------- benchmark/dataframe.jl | 36 +++++++ benchmark/utils.jl | 162 +++++++++++++++++++++++++++++ src/DifferentiationInterface.jl | 10 ++ src/array_array.jl | 43 ++++++-- src/array_scalar.jl | 22 +++- src/pullback.jl | 18 ++++ src/pushforward.jl | 18 ++++ src/scalar_array.jl | 20 +++- src/scalar_scalar.jl | 9 ++ test/diffractor.jl | 2 +- test/enzyme_forward.jl | 2 +- test/enzyme_reverse.jl | 2 +- test/finitediff.jl | 2 +- test/forwarddiff.jl | 2 +- test/polyesterforwarddiff.jl | 2 +- test/reversediff.jl | 2 +- test/utils.jl | 149 +++++++++++++++++--------- test/zygote.jl | 2 +- 21 files changed, 502 insertions(+), 184 deletions(-) create mode 100644 benchmark/dataframe.jl create mode 100644 benchmark/utils.jl diff --git a/.gitignore b/.gitignore index 8b14d40cd..4836da0eb 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,5 @@ /docs/Manifest.toml /test/Manifest.toml /benchmark/Manifest.toml + +*.csv \ No newline at end of file diff --git a/benchmark/Project.toml b/benchmark/Project.toml index 209efe139..36f358793 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -1,5 +1,7 @@ [deps] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" Diffractor = "9f5e2b26-1114-432f-b630-d3fe2085c51c" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" @@ -8,4 +10,5 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" PolyesterForwardDiff = "98d1487c-24ca-40b6-b7ab-df2af84e126b" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 7dab20d5e..7c9145c51 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -66,145 +66,89 @@ reverse_fallback_backends = [ ReverseDiffBackend(; custom=false), ] -all_custom_backends = vcat(forward_custom_backends, reverse_custom_backends) -all_fallback_backends = vcat(forward_fallback_backends, reverse_fallback_backends) -all_backends = vcat(all_custom_backends, all_fallback_backends) +all_backends = vcat( + forward_custom_backends, + forward_fallback_backends, + reverse_custom_backends, + reverse_fallback_backends, +) ## Suite -SUITE = BenchmarkGroup() +function make_suite() + SUITE = BenchmarkGroup() -### Scalar to scalar - -scalar_to_scalar = Layer(randn(), randn(), tanh) - -for backend in all_backends - handles_types(backend, Number, Number) || continue - SUITE["value_and_derivative"][(1, 1)][string(backend)] = @benchmarkable begin - value_and_derivative($backend, $scalar_to_scalar, x) - end setup = (x = randn()) -end - -for backend in all_fallback_backends - handles_types(backend, Number, Number) || continue - if autodiff_mode(backend) == :forward - SUITE["value_and_pushforward"][(1, 1)][string(backend)] = @benchmarkable begin - value_and_pushforward($backend, $scalar_to_scalar, x, dx) - end setup = (x = randn(); dx = randn()) - else - SUITE["value_and_pullback"][(1, 1)][string(backend)] = @benchmarkable begin - value_and_pullback($backend, $scalar_to_scalar, x, dy) - end setup = (x = randn(); dy = randn()) - end -end - -### Scalar to vector - -for m in [10] - scalar_to_vector = Layer(randn(m), randn(m), tanh) + ### Scalar to scalar + scalar_to_scalar = Layer(randn(), randn(), tanh) for backend in all_backends - handles_types(backend, Number, Vector) || continue - SUITE["value_and_multiderivative"][(1, m)][string(backend)] = @benchmarkable begin - value_and_multiderivative($backend, $scalar_to_vector, x) - end setup = (x = randn()) - SUITE["value_and_multiderivative!"][(1, m)][string(backend)] = @benchmarkable begin - value_and_multiderivative!(multider, $backend, $scalar_to_vector, x) - end setup = (x = randn(); multider = zeros($m)) + add_derivative_benchmarks!(SUITE, backend, scalar_to_scalar, 1, 1) end - - for backend in all_fallback_backends - handles_types(backend, Number, Vector) || continue - if autodiff_mode(backend) == :forward - SUITE["value_and_pushforward"][(1, m)][string(backend)] = @benchmarkable begin - value_and_pushforward($backend, $scalar_to_vector, x, dx) - end setup = (x = randn(); dx = randn()) - SUITE["value_and_pushforward!"][(1, m)][string(backend)] = @benchmarkable begin - value_and_pushforward!(dy, $backend, $scalar_to_vector, x, dx) - end setup = (x = randn(); dx = randn(); dy = zeros($m)) - else - SUITE["value_and_pullback"][(1, m)][string(backend)] = @benchmarkable begin - value_and_pullback($backend, $scalar_to_vector, x, dy) - end setup = (x = randn(); dy = ones($m)) - SUITE["value_and_pullback!"][(1, m)][string(backend)] = @benchmarkable begin - value_and_pullback!(dx, $backend, $scalar_to_vector, x, dy) - end setup = (x = randn(); dy = ones($m); dx = 0.0) - end + for backend in forward_fallback_backends + add_pushforward_benchmarks!(SUITE, backend, scalar_to_scalar, 1, 1) end -end - -### Vector to scalar - -for n in [10] - vector_to_scalar = Layer(randn(n), randn(), tanh) - - for backend in all_backends - handles_types(backend, Vector, Number) || continue - SUITE["value_and_gradient"][(n, 1)][string(backend)] = @benchmarkable begin - value_and_gradient($backend, $vector_to_scalar, x) - end setup = (x = randn($n)) - SUITE["value_and_gradient!"][(n, 1)][string(backend)] = @benchmarkable begin - value_and_gradient!(grad, $backend, $vector_to_scalar, x) - end setup = (x = randn($n); grad = zeros($n)) + for backend in reverse_fallback_backends + add_pullback_benchmarks!(SUITE, backend, scalar_to_scalar, 1, 1) end - for backend in all_fallback_backends - handles_types(backend, Vector, Number) || continue - if autodiff_mode(backend) == :forward - SUITE["value_and_pushforward"][(n, 1)][string(backend)] = @benchmarkable begin - value_and_pushforward($backend, $vector_to_scalar, x, dx) - end setup = (x = randn($n); dx = randn($n)) - SUITE["value_and_pushforward!"][(n, 1)][string(backend)] = @benchmarkable begin - value_and_pushforward!(dy, $backend, $vector_to_scalar, x, dx) - end setup = (x = randn($n); dx = randn($n); dy = 0.0) - else - SUITE["value_and_pullback"][(n, 1)][string(backend)] = @benchmarkable begin - value_and_pullback($backend, $vector_to_scalar, x, dy) - end setup = (x = randn($n); dy = randn()) - SUITE["value_and_pullback!"][(n, 1)][string(backend)] = @benchmarkable begin - value_and_pullback!(dx, $backend, $vector_to_scalar, x, dy) - end setup = (x = randn($n); dy = randn(); dx = zeros($n)) + ### Scalar to vector + for m in [10] + scalar_to_vector = Layer(randn(m), randn(m), tanh) + + for backend in all_backends + add_multiderivative_benchmarks!(SUITE, backend, scalar_to_vector, 1, m) + end + for backend in forward_fallback_backends + add_pushforward_benchmarks!(SUITE, backend, scalar_to_vector, 1, m) + end + for backend in reverse_fallback_backends + add_pullback_benchmarks!(SUITE, backend, scalar_to_vector, 1, m) end end -end - -### Vector to vector -for (n, m) in [(10, 10)] - vector_to_vector = Layer(randn(m, n), randn(m), tanh) + ### Vector to scalar + for n in [10] + vector_to_scalar = Layer(randn(n), randn(), tanh) - for backend in all_backends - handles_types(backend, Vector, Vector) || continue - SUITE["value_and_jacobian"][(n, m)][string(backend)] = @benchmarkable begin - value_and_jacobian($backend, $vector_to_vector, x) - end setup = (x = randn($n)) - SUITE["value_and_jacobian!"][(n, m)][string(backend)] = @benchmarkable begin - value_and_jacobian!(jac, $backend, $vector_to_vector, x) - end setup = (x = randn($n); jac = zeros($m, $n)) + for backend in all_backends + add_gradient_benchmarks!(SUITE, backend, vector_to_scalar, n, 1) + end + for backend in forward_fallback_backends + add_pushforward_benchmarks!(SUITE, backend, vector_to_scalar, n, 1) + end + for backend in reverse_fallback_backends + add_pullback_benchmarks!(SUITE, backend, vector_to_scalar, n, 1) + end end - for backend in all_fallback_backends - handles_types(backend, Vector, Vector) || continue - if autodiff_mode(backend) == :forward - SUITE["value_and_pushforward"][(n, m)][string(backend)] = @benchmarkable begin - value_and_pushforward($backend, $vector_to_vector, x, dx) - end setup = (x = randn($n); dx = randn($n)) - SUITE["value_and_pushforward!"][(n, m)][string(backend)] = @benchmarkable begin - value_and_pushforward!(dy, $backend, $vector_to_vector, x, dx) - end setup = (x = randn($n); dx = randn($n); dy = zeros($m)) - else - SUITE["value_and_pullback"][(n, m)][string(backend)] = @benchmarkable begin - value_and_pullback($backend, $vector_to_vector, x, dy) - end setup = (x = randn($n); dy = randn($m)) - SUITE["value_and_pullback!"][(n, m)][string(backend)] = @benchmarkable begin - value_and_pullback!(dx, $backend, $vector_to_vector, x, dy) - end setup = (x = randn($n); dy = randn($m); dx = zeros($n)) + ### Vector to vector + for (n, m) in [(10, 10)] + vector_to_vector = Layer(randn(m, n), randn(m), tanh) + + for backend in all_backends + add_jacobian_benchmarks!(SUITE, backend, vector_to_vector, n, m) + end + for backend in forward_fallback_backends + add_pushforward_benchmarks!(SUITE, backend, vector_to_vector, n, m) + end + for backend in reverse_fallback_backends + add_pullback_benchmarks!(SUITE, backend, vector_to_vector, n, m) end end + + return SUITE end +include("utils.jl") + +SUITE = make_suite() + # Run benchmarks locally # results = BenchmarkTools.run(SUITE; verbose=true) # Compare commits locally # using BenchmarkCI; BenchmarkCI.judge(baseline="origin/main"); BenchmarkCI.displayjudgement() + +# Parse into dataframe +# include("dataframe.jl") +# data = parse_benchmark_results(results; path=joinpath(@__DIR__, "results.csv")) diff --git a/benchmark/dataframe.jl b/benchmark/dataframe.jl new file mode 100644 index 000000000..01f06e717 --- /dev/null +++ b/benchmark/dataframe.jl @@ -0,0 +1,36 @@ +using BenchmarkTools +using CSV +using DataFrames +using Statistics + +function parse_benchmark_results_aux(result::BenchmarkTools.Trial, level=nothing) + data = DataFrame( + :samples => [length(result.times)], + :time_median => [median(result.times)], + :memory_median => [median(result.memory)], + :allocs_median => [median(result.allocs)], + ) + return data +end + +function parse_benchmark_results_aux(results::BenchmarkGroup, level=1) + data = DataFrame() + level_symbol = Symbol(string("level_$level")) + for (key, val) in pairs(results) + subdata = parse_benchmark_results_aux(val, level + 1) + subdata[!, level_symbol] = fill(key, size(subdata, 1)) + append!(data, subdata) + end + select!(data, level_symbol, Not(level_symbol)) + return data +end + +function parse_benchmark_results(results::BenchmarkGroup; path=nothing) + data = parse_benchmark_results_aux(results) + if !isnothing(path) + open(path, "w") do file + CSV.write(file, data) + end + end + return data +end diff --git a/benchmark/utils.jl b/benchmark/utils.jl new file mode 100644 index 000000000..cea537f28 --- /dev/null +++ b/benchmark/utils.jl @@ -0,0 +1,162 @@ +using DifferentiationInterface +using BenchmarkTools + +function add_pushforward_benchmarks!( + suite::BenchmarkGroup, backend::AbstractBackend, f::F, n::Integer, m::Integer +) where {F} + x = n == 1 ? randn() : randn(n) + dx = n == 1 ? randn() : randn(n) + dy = m == 1 ? 0.0 : zeros(m) + + if autodiff_mode(backend) != :forward || !handles_types(backend, typeof(x), typeof(dy)) + return nothing + end + + suite["value_and_pushforward"][(n, m)][string(backend)] = @benchmarkable begin + value_and_pushforward($backend, $f, $x, $dx) + end + suite["value_and_pushforward!"][(n, m)][string(backend)] = @benchmarkable begin + value_and_pushforward!($dy, $backend, $f, $x, $dx) + end + + suite["pushforward"][(n, m)][string(backend)] = @benchmarkable begin + pushforward($backend, $f, $x, $dx) + end + suite["pushforward!"][(n, m)][string(backend)] = @benchmarkable begin + pushforward!($dy, $backend, $f, $x, $dx) + end + + return nothing +end + +function add_pullback_benchmarks!( + suite::BenchmarkGroup, backend::AbstractBackend, f::F, n::Integer, m::Integer +) where {F} + x = n == 1 ? randn() : randn(n) + dx = n == 1 ? 0.0 : zeros(n) + dy = m == 1 ? randn() : randn(m) + + if autodiff_mode(backend) != :reverse || !handles_types(backend, typeof(x), typeof(dy)) + return nothing + end + + suite["value_and_pullback"][(n, m)][string(backend)] = @benchmarkable begin + value_and_pullback($backend, $f, $x, $dy) + end + suite["value_and_pullback!"][(n, m)][string(backend)] = @benchmarkable begin + value_and_pullback!($dx, $backend, $f, $x, $dy) + end + + suite["pullback"][(n, m)][string(backend)] = @benchmarkable begin + pullback($backend, $f, $x, $dy) + end + suite["pullback!"][(n, m)][string(backend)] = @benchmarkable begin + pullback!($dx, $backend, $f, $x, $dy) + end + + return nothing +end + +function add_derivative_benchmarks!( + suite::BenchmarkGroup, backend::AbstractBackend, f::F, n::Integer, m::Integer +) where {F} + @assert n == m == 1 + if !handles_types(backend, Number, Number) + return nothing + end + + x = randn() + + suite["value_and_derivative"][(1, 1)][string(backend)] = @benchmarkable begin + value_and_derivative($backend, $f, $x) + end + + suite["derivative"][(1, 1)][string(backend)] = @benchmarkable begin + derivative($backend, $f, $x) + end + + return nothing +end + +function add_multiderivative_benchmarks!( + suite::BenchmarkGroup, backend::AbstractBackend, f::F, n::Integer, m::Integer +) where {F} + @assert n == 1 + if !handles_types(backend, Number, Vector) + return nothing + end + + x = randn() + multider = zeros(m) + + suite["value_and_multiderivative"][(1, m)][string(backend)] = @benchmarkable begin + value_and_multiderivative($backend, $f, $x) + end + suite["value_and_multiderivative!"][(1, m)][string(backend)] = @benchmarkable begin + value_and_multiderivative!($multider, $backend, $f, $x) + end + + suite["multiderivative"][(1, m)][string(backend)] = @benchmarkable begin + multiderivative($backend, $f, $x) + end + suite["multiderivative!"][(1, m)][string(backend)] = @benchmarkable begin + multiderivative!($multider, $backend, $f, $x) + end + + return nothing +end + +function add_gradient_benchmarks!( + suite::BenchmarkGroup, backend::AbstractBackend, f::F, n::Integer, m::Integer +) where {F} + @assert m == 1 + if !handles_types(backend, Vector, Number) + return nothing + end + + x = randn(n) + grad = zeros(n) + + suite["value_and_gradient"][(n, 1)][string(backend)] = @benchmarkable begin + value_and_gradient($backend, $f, $x) + end + suite["value_and_gradient!"][(n, 1)][string(backend)] = @benchmarkable begin + value_and_gradient!($grad, $backend, $f, $x) + end + + suite["gradient"][(n, 1)][string(backend)] = @benchmarkable begin + gradient($backend, $f, $x) + end + suite["gradient!"][(n, 1)][string(backend)] = @benchmarkable begin + gradient!($grad, $backend, $f, $x) + end + + return nothing +end + +function add_jacobian_benchmarks!( + suite::BenchmarkGroup, backend::AbstractBackend, f::F, n::Integer, m::Integer +) where {F} + if !handles_types(backend, Vector, Vector) + return nothing + end + + x = randn(n) + jac = zeros(m, n) + + suite["value_and_jacobian"][(n, m)][string(backend)] = @benchmarkable begin + value_and_jacobian($backend, $f, $x) + end + suite["value_and_jacobian!"][(n, m)][string(backend)] = @benchmarkable begin + value_and_jacobian!($jac, $backend, $f, $x) + end + + suite["jacobian"][(n, m)][string(backend)] = @benchmarkable begin + jacobian($backend, $f, $x) + end + suite["jacobian!"][(n, m)][string(backend)] = @benchmarkable begin + jacobian!($jac, $backend, $f, $x) + end + + return nothing +end diff --git a/src/DifferentiationInterface.jl b/src/DifferentiationInterface.jl index 3e44f5f07..51e33d01c 100644 --- a/src/DifferentiationInterface.jl +++ b/src/DifferentiationInterface.jl @@ -37,11 +37,21 @@ export ChainRulesForwardBackend, ZygoteBackend export value_and_pushforward!, value_and_pushforward +export pushforward!, pushforward + export value_and_pullback!, value_and_pullback +export pullback!, pullback export value_and_derivative +export derivative + export value_and_multiderivative!, value_and_multiderivative +export multiderivative!, multiderivative + export value_and_gradient!, value_and_gradient +export gradient!, gradient + export value_and_jacobian!, value_and_jacobian +export jacobian!, jacobian end # module diff --git a/src/array_array.jl b/src/array_array.jl index 2ba82d021..38e23d2a1 100644 --- a/src/array_array.jl +++ b/src/array_array.jl @@ -1,12 +1,16 @@ +const JAC_NOTES = """ +## Notes + +Regardless of the shape of `x` and `y`, if `x` has length `n` and `y` has length `m`, then `jac` is expected to be a `m × n` matrix. +This function acts as if the input and output had been flattened with `vec`. +""" + """ value_and_jacobian!(jac, backend, f, x) -> (y, jac) Compute the primal value `y = f(x)` and the Jacobian matrix `jac = ∂f(x)` of an array-to-array function, overwriting `jac` if possible. -## Notes - -Regardless of the shape of `x` and `y`, if `x` has length `n` and `y` has length `m`, then `jac` is expected to be a `m × n` matrix. -This function acts as if the input and output had been flattened with `vec`. +$JAC_NOTES """ function value_and_jacobian!( jac::AbstractMatrix, backend::AbstractBackend, f, x::AbstractArray @@ -29,7 +33,7 @@ function _value_and_jacobian!( for (k, j) in enumerate(eachindex(IndexCartesian(), x)) dx_j = basisarray(backend, x, j) jac_col_j = reshape(view(jac, :, k), size(y)) - value_and_pushforward!(jac_col_j, backend, f, x, dx_j) + pushforward!(jac_col_j, backend, f, x, dx_j) end return y, jac end @@ -44,7 +48,7 @@ function _value_and_jacobian!( for (k, i) in enumerate(eachindex(IndexCartesian(), y)) dy_i = basisarray(backend, y, i) jac_row_i = reshape(view(jac, k, :), size(x)) - value_and_pullback!(jac_row_i, backend, f, x, dy_i) + pullback!(jac_row_i, backend, f, x, dy_i) end return y, jac end @@ -54,10 +58,7 @@ end Compute the primal value `y = f(x)` and the Jacobian matrix `jac = ∂f(x)` of an array-to-array function. -## Notes - -Regardless of the shape of `x` and `y`, if `x` has length `n` and `y` has length `m`, then `jac` is expected to be a `m × n` matrix. -This function acts as if the input and output had been flattened with `vec`. +$JAC_NOTES """ function value_and_jacobian(backend::AbstractBackend, f, x::AbstractArray) y = f(x) @@ -65,3 +66,25 @@ function value_and_jacobian(backend::AbstractBackend, f, x::AbstractArray) jac = similar(y, T, length(y), length(x)) return value_and_jacobian!(jac, backend, f, x) end + +""" + jacobian!(jac, backend, f, x) -> jac + +Compute the Jacobian matrix `jac = ∂f(x)` of an array-to-array function, overwriting `jac` if possible. + +$JAC_NOTES +""" +function jacobian!(jac::AbstractMatrix, backend::AbstractBackend, f, x::AbstractArray) + return last(value_and_jacobian!(jac, backend, f, x)) +end + +""" + jacobian(backend, f, x) -> jac + +Compute the Jacobian matrix `jac = ∂f(x)` of an array-to-array function. + +$JAC_NOTES +""" +function jacobian(backend::AbstractBackend, f, x::AbstractArray) + return last(value_and_jacobian(backend, f, x)) +end diff --git a/src/array_scalar.jl b/src/array_scalar.jl index 2224dcd77..6617bbccc 100644 --- a/src/array_scalar.jl +++ b/src/array_scalar.jl @@ -11,7 +11,7 @@ function value_and_gradient!( y = f(x) for j in eachindex(IndexCartesian(), x) dx_j = basisarray(backend, x, j) - _, grad[j] = value_and_pushforward!(grad[j], backend, f, x, dx_j) + grad[j] = pushforward!(grad[j], backend, f, x, dx_j) end return y, grad end @@ -20,7 +20,7 @@ function value_and_gradient!( grad::AbstractArray, backend::AbstractReverseBackend, f, x::AbstractArray ) y = f(x) - return value_and_pullback!(grad, backend, f, x, one(y)) + return y, pullback!(grad, backend, f, x, one(y)) end """ @@ -32,3 +32,21 @@ function value_and_gradient(backend::AbstractBackend, f, x::AbstractArray) grad = similar(x) return value_and_gradient!(grad, backend, f, x) end + +""" + gradient!(grad, backend, f, x) -> grad + +Compute the gradient `grad = ∇f(x)` of an array-to-scalar function, overwriting `grad` if possible. +""" +function gradient!(grad::AbstractArray, backend::AbstractBackend, f, x::AbstractArray) + return last(value_and_gradient!(grad, backend, f, x)) +end + +""" + gradient(backend, f, x) -> grad + +Compute the gradient `grad = ∇f(x)` of an array-to-scalar function. +""" +function gradient(backend::AbstractBackend, f, x::AbstractArray) + return last(value_and_gradient(backend, f, x)) +end diff --git a/src/pullback.jl b/src/pullback.jl index d413641ff..501ff7c57 100644 --- a/src/pullback.jl +++ b/src/pullback.jl @@ -21,3 +21,21 @@ function value_and_pullback(backend::AbstractReverseBackend, f, x, dy) dx = mysimilar(x) return value_and_pullback!(dx, backend, f, x, dy) end + +""" + pullback!(dx, backend::AbstractReverseBackend, f, x, dy) -> dx + +Compute the vector-Jacobian product `dx = ∂f(x)' * dy`, overwriting `dx` if possible. +""" +function pullback!(dx, backend::AbstractReverseBackend, f, x, dy) + return last(value_and_pullback!(dx, backend, f, x, dy)) +end + +""" + pullback(backend::AbstractReverseBackend, f, x, dy) -> dx + +Compute the vector-Jacobian product `dx = ∂f(x)' * dy`. +""" +function pullback(backend::AbstractReverseBackend, f, x, dy) + return last(value_and_pullback(backend, f, x, dy)) +end diff --git a/src/pushforward.jl b/src/pushforward.jl index 656c22f01..e71b92197 100644 --- a/src/pushforward.jl +++ b/src/pushforward.jl @@ -21,3 +21,21 @@ function value_and_pushforward(backend::AbstractForwardBackend, f, x, dx) dy = mysimilar(f(x)) return value_and_pushforward!(dy, backend, f, x, dx) end + +""" + pushforward!(dy, backend::AbstractForwardBackend, f, x, dx) -> dy + +Compute the Jacobian-vector product `dy = ∂f(x) * dx`, overwriting `dy` if possible. +""" +function pushforward!(dy, backend::AbstractForwardBackend, f, x, dx) + return last(value_and_pushforward!(dy, backend, f, x, dx)) +end + +""" + pushforward(backend::AbstractForwardBackend, f, x, dx) -> dy + +Compute the Jacobian-vector product `dy = ∂f(x) * dx`. +""" +function pushforward(backend::AbstractForwardBackend, f, x, dx) + return last(value_and_pushforward(backend, f, x, dx)) +end diff --git a/src/scalar_array.jl b/src/scalar_array.jl index aa55faff0..1b6aed54c 100644 --- a/src/scalar_array.jl +++ b/src/scalar_array.jl @@ -17,7 +17,7 @@ function value_and_multiderivative!( y = f(x) for i in eachindex(IndexCartesian(), y) dy_i = basisarray(backend, y, i) - _, multider[i] = value_and_pullback!(multider[i], backend, f, x, dy_i) + multider[i] = pullback!(multider[i], backend, f, x, dy_i) end return y, multider end @@ -31,3 +31,21 @@ function value_and_multiderivative(backend::AbstractBackend, f, x::Number) multider = similar(f(x)) return value_and_multiderivative!(multider, backend, f, x) end + +""" + multiderivative!(multider, backend, f, x) -> multider + +Compute the (array-valued) derivative `multider = f'(x)` of a scalar-to-array function, overwriting `multider` if possible. +""" +function multiderivative!(multider::AbstractArray, backend::AbstractBackend, f, x::Number) + return last(value_and_multiderivative!(multider, backend, f, x)) +end + +""" + multiderivative(backend, f, x) -> multider + +Compute the (array-valued) derivative `multider = f'(x)` of a scalar-to-array function. +""" +function multiderivative(backend::AbstractBackend, f, x::Number) + return last(value_and_multiderivative(backend, f, x)) +end diff --git a/src/scalar_scalar.jl b/src/scalar_scalar.jl index d65219df7..7cf8c9c2a 100644 --- a/src/scalar_scalar.jl +++ b/src/scalar_scalar.jl @@ -12,3 +12,12 @@ end function value_and_derivative(backend::AbstractReverseBackend, f, x::Number) return value_and_pullback!(one(x), backend, f, x, one(x)) end + +""" + derivative(backend, f, x) -> der + +Compute the derivative `der = f'(x)` of a scalar-to-scalar function. +""" +function derivative(backend::AbstractBackend, f, x::Number) + return last(value_and_derivative(backend, f, x)) +end diff --git a/test/diffractor.jl b/test/diffractor.jl index 5f70dc31f..62b293e50 100644 --- a/test/diffractor.jl +++ b/test/diffractor.jl @@ -1,5 +1,5 @@ -using Diffractor using DifferentiationInterface +using Diffractor: Diffractor # see https://github.com/JuliaDiff/Diffractor.jl/issues/277 diff --git a/test/enzyme_forward.jl b/test/enzyme_forward.jl index 4dc06e0bd..9289a07dc 100644 --- a/test/enzyme_forward.jl +++ b/test/enzyme_forward.jl @@ -1,5 +1,5 @@ using DifferentiationInterface -using Enzyme +using Enzyme: Enzyme test_pushforward(EnzymeForwardBackend(), scenarios; type_stability=true); test_jacobian_and_friends( diff --git a/test/enzyme_reverse.jl b/test/enzyme_reverse.jl index 68c6d5c4a..022e64843 100644 --- a/test/enzyme_reverse.jl +++ b/test/enzyme_reverse.jl @@ -1,5 +1,5 @@ using DifferentiationInterface -using Enzyme +using Enzyme: Enzyme test_pullback(EnzymeReverseBackend(), scenarios; type_stability=true); test_jacobian_and_friends( diff --git a/test/finitediff.jl b/test/finitediff.jl index cb9b657fc..b976b1f62 100644 --- a/test/finitediff.jl +++ b/test/finitediff.jl @@ -1,5 +1,5 @@ using DifferentiationInterface -using FiniteDiff +using FiniteDiff: FiniteDiff test_pushforward(FiniteDiffBackend(), scenarios; type_stability=true); test_jacobian_and_friends(FiniteDiffBackend(; custom=true), scenarios; type_stability=false); diff --git a/test/forwarddiff.jl b/test/forwarddiff.jl index 62744d88a..f6344fc61 100644 --- a/test/forwarddiff.jl +++ b/test/forwarddiff.jl @@ -1,5 +1,5 @@ using DifferentiationInterface -using ForwardDiff +using ForwardDiff: ForwardDiff test_pushforward(ForwardDiffBackend(), scenarios; type_stability=true); test_jacobian_and_friends( diff --git a/test/polyesterforwarddiff.jl b/test/polyesterforwarddiff.jl index d1cb88d41..1a3a51233 100644 --- a/test/polyesterforwarddiff.jl +++ b/test/polyesterforwarddiff.jl @@ -1,5 +1,5 @@ using DifferentiationInterface -using PolyesterForwardDiff +using PolyesterForwardDiff: PolyesterForwardDiff # see https://github.com/JuliaDiff/PolyesterForwardDiff.jl/issues/17 diff --git a/test/reversediff.jl b/test/reversediff.jl index aebfdbaac..748949ac6 100644 --- a/test/reversediff.jl +++ b/test/reversediff.jl @@ -1,5 +1,5 @@ using DifferentiationInterface -using ReverseDiff +using ReverseDiff: ReverseDiff test_pullback(ReverseDiffBackend(), scenarios; type_stability=false); test_jacobian_and_friends( diff --git a/test/utils.jl b/test/utils.jl index aeb8b97a1..7fed9e7e2 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -167,28 +167,37 @@ function test_pushforward( @testset "$X -> $Y" begin (; f, x, y, dx, dy_true) = scenario - y_out, dy_out = value_and_pushforward(backend, f, x, dx) - dy_in = zero(dy_out) - y_out2, dy_out2 = value_and_pushforward!(dy_in, backend, f, x, dx) + y_out1, dy_out1 = value_and_pushforward(backend, f, x, dx) + dy_in2 = zero(dy_out1) + y_out2, dy_out2 = value_and_pushforward!(dy_in2, backend, f, x, dx) + + dy_out3 = pushforward(backend, f, x, dx) + dy_in4 = zero(dy_out3) + dy_out4 = pushforward!(dy_in4, backend, f, x, dx) @testset "Primal value" begin - @test y_out ≈ y + @test y_out1 ≈ y @test y_out2 ≈ y end @testset "Tangent value" begin - @test dy_out ≈ dy_true rtol = 1e-3 + @test dy_out1 ≈ dy_true rtol = 1e-3 @test dy_out2 ≈ dy_true rtol = 1e-3 - if ismutable(dy_in) + @test dy_out3 ≈ dy_true rtol = 1e-3 + @test dy_out4 ≈ dy_true rtol = 1e-3 + if ismutable(dy_true) @testset "Mutation" begin - @test dy_in ≈ dy_true rtol = 1e-3 + @test dy_in2 ≈ dy_true rtol = 1e-3 + @test dy_in4 ≈ dy_true rtol = 1e-3 end end end allocs && @testset "Allocations" begin - @test (@allocated value_and_pushforward!(dy_in, backend, f, x, dx)) == 0 + @test iszero(@allocated value_and_pushforward!(dy_in2, backend, f, x, dx)) + @test iszero(@allocated pushforward!(dy_in4, backend, f, x, dx)) end type_stability && @testset "Type stability" begin - @test_opt value_and_pushforward!(dy_in, backend, f, x, dx) + @test_opt value_and_pushforward!(dy_in2, backend, f, x, dx) + @test_opt pushforward!(dy_in4, backend, f, x, dx) end end end @@ -213,28 +222,38 @@ function test_pullback( @testset "$X -> $Y" begin (; f, x, y, dy, dx_true) = scenario - y_out, dx_out = value_and_pullback(backend, f, x, dy) - dx_in = zero(dx_out) - y_out2, dx_out2 = value_and_pullback!(dx_in, backend, f, x, dy) + + y_out1, dx_out1 = value_and_pullback(backend, f, x, dy) + dx_in2 = zero(dx_out1) + y_out2, dx_out2 = value_and_pullback!(dx_in2, backend, f, x, dy) + + dx_out3 = pullback(backend, f, x, dy) + dx_in4 = zero(dx_out3) + dx_out4 = pullback!(dx_in4, backend, f, x, dy) @testset "Primal value" begin - @test y_out ≈ y + @test y_out1 ≈ y @test y_out2 ≈ y end @testset "Cotangent value" begin - @test dx_out ≈ dx_true rtol = 1e-3 + @test dx_out1 ≈ dx_true rtol = 1e-3 @test dx_out2 ≈ dx_true rtol = 1e-3 - if ismutable(dx_out) + @test dx_out3 ≈ dx_true rtol = 1e-3 + @test dx_out4 ≈ dx_true rtol = 1e-3 + if ismutable(dx_true) @testset "Mutation" begin - @test dx_in ≈ dx_true rtol = 1e-3 + @test dx_in2 ≈ dx_true rtol = 1e-3 + @test dx_in4 ≈ dx_true rtol = 1e-3 end end end allocs && @testset "Allocations" begin - @test (@allocated value_and_pullback!(dx_in, backend, f, x, dy)) == 0 + @test iszero(@allocated value_and_pullback!(dx_in2, backend, f, x, dy)) + @test iszero(@allocated pullback!(dx_in4, backend, f, x, dy)) end type_stability && @testset "Type stability" begin - @test_opt value_and_pullback!(dx_in, backend, f, x, dy) + @test_opt value_and_pullback!(dx_in2, backend, f, x, dy) + @test_opt pullback!(dx_in4, backend, f, x, dy) end end end @@ -257,19 +276,25 @@ function test_derivative( @testset "$X -> $Y" begin (; f, x, y, der_true) = scenario - y_out, der_out = value_and_derivative(backend, f, x) + + y_out1, der_out1 = value_and_derivative(backend, f, x) + + der_out2 = derivative(backend, f, x) @testset "Primal value" begin - @test y_out ≈ y + @test y_out1 ≈ y end @testset "Derivative value" begin - @test der_out ≈ der_true rtol = 1e-3 + @test der_out1 ≈ der_true rtol = 1e-3 + @test der_out2 ≈ der_true rtol = 1e-3 end allocs && @testset "Allocations" begin - @test (@allocated value_and_derivative(backend, f, x)) == 0 + @test iszero(@allocated value_and_derivative(backend, f, x)) + @test iszero(@allocated derivative(backend, f, x)) end type_stability && @testset "Type stability" begin @test_opt value_and_derivative(backend, f, x) + @test_opt derivative(backend, f, x) end end end @@ -292,28 +317,40 @@ function test_multiderivative( @testset "$X -> $Y" begin (; f, x, y, multider_true) = scenario - y_out, multider_out = value_and_multiderivative(backend, f, x) - multider_in = zero(multider_out) + + y_out1, multider_out1 = value_and_multiderivative(backend, f, x) + multider_in2 = zero(multider_out1) y_out2, multider_out2 = value_and_multiderivative!( - multider_in, backend, f, x + multider_in2, backend, f, x ) + multider_out3 = multiderivative(backend, f, x) + multider_in4 = zero(multider_out3) + multider_out4 = multiderivative!(multider_in4, backend, f, x) + @testset "Primal value" begin - @test y_out ≈ y + @test y_out1 ≈ y @test y_out2 ≈ y end @testset "Multiderivative value" begin - @test multider_out ≈ multider_true rtol = 1e-3 + @test multider_out1 ≈ multider_true rtol = 1e-3 @test multider_out2 ≈ multider_true rtol = 1e-3 + @test multider_out3 ≈ multider_true rtol = 1e-3 + @test multider_out4 ≈ multider_true rtol = 1e-3 @testset "Mutation" begin - @test multider_in ≈ multider_true rtol = 1e-3 + @test multider_in2 ≈ multider_true rtol = 1e-3 + @test multider_in4 ≈ multider_true rtol = 1e-3 end end allocs && @testset "Allocations" begin - @test (@allocated value_and_multiderivative!(multider_in, backend, f, x)) == 0 + @test iszero( + @allocated value_and_multiderivative!(multider_in2, backend, f, x) + ) + @test iszero(@allocated multiderivative!(multider_in4, backend, f, x)) end type_stability && @testset "Type stability" begin - @test_opt value_and_multiderivative!(multider_in, backend, f, x) + @test_opt value_and_multiderivative!(multider_in2, backend, f, x) + @test_opt multiderivative!(multider_in4, backend, f, x) end end end @@ -336,26 +373,36 @@ function test_gradient( @testset "$X -> $Y" begin (; f, x, y, grad_true) = scenario - y_out, grad_out = value_and_gradient(backend, f, x) - grad_in = zero(grad_out) - y_out2, grad_out2 = value_and_gradient!(grad_in, backend, f, x) + + y_out1, grad_out1 = value_and_gradient(backend, f, x) + grad_in2 = zero(grad_out1) + y_out2, grad_out2 = value_and_gradient!(grad_in2, backend, f, x) + + grad_out3 = gradient(backend, f, x) + grad_in4 = zero(grad_out3) + grad_out4 = gradient!(grad_in4, backend, f, x) @testset "Primal value" begin - @test y_out ≈ y + @test y_out1 ≈ y @test y_out2 ≈ y end @testset "Gradient value" begin - @test grad_out ≈ grad_true rtol = 1e-3 + @test grad_out1 ≈ grad_true rtol = 1e-3 @test grad_out2 ≈ grad_true rtol = 1e-3 + @test grad_out3 ≈ grad_true rtol = 1e-3 + @test grad_out4 ≈ grad_true rtol = 1e-3 @testset "Mutation" begin - @test grad_in ≈ grad_true rtol = 1e-3 + @test grad_in2 ≈ grad_true rtol = 1e-3 + @test grad_in4 ≈ grad_true rtol = 1e-3 end end allocs && @testset "Allocations" begin - @test (@allocated value_and_gradient!(grad_in, backend, f, x)) == 0 + @test iszero(@allocated value_and_gradient!(grad_in2, backend, f, x)) + @test iszero(@allocated gradient!(grad_in4, backend, f, x)) end type_stability && @testset "Type stability" begin - @test_opt value_and_gradient!(grad_in, backend, f, x) + @test_opt value_and_gradient!(grad_in2, backend, f, x) + @test_opt gradient!(grad_in4, backend, f, x) end end end @@ -378,26 +425,36 @@ function test_jacobian( @testset "$X -> $Y" begin (; f, x, y, jac_true) = scenario - y_out, jac_out = value_and_jacobian(backend, f, x) - jac_in = zero(jac_out) - y_out2, jac_out2 = value_and_jacobian!(jac_in, backend, f, x) + + y_out1, jac_out1 = value_and_jacobian(backend, f, x) + jac_in2 = zero(jac_out1) + y_out2, jac_out2 = value_and_jacobian!(jac_in2, backend, f, x) + + jac_out3 = jacobian(backend, f, x) + jac_in4 = zero(jac_out3) + jac_out4 = jacobian!(jac_in4, backend, f, x) @testset "Primal value" begin - @test y_out ≈ y + @test y_out1 ≈ y @test y_out2 ≈ y end @testset "Jacobian value" begin - @test jac_out ≈ jac_true rtol = 1e-3 + @test jac_out1 ≈ jac_true rtol = 1e-3 @test jac_out2 ≈ jac_true rtol = 1e-3 + @test jac_out3 ≈ jac_true rtol = 1e-3 + @test jac_out4 ≈ jac_true rtol = 1e-3 @testset "Mutation" begin - @test jac_in ≈ jac_true rtol = 1e-3 + @test jac_in2 ≈ jac_true rtol = 1e-3 + @test jac_in4 ≈ jac_true rtol = 1e-3 end end allocs && @testset "Allocations" begin - @test (@allocated value_and_jacobian!(jac_in, backend, f, x)) == 0 + @test iszero(@allocated value_and_jacobian!(jac_in2, backend, f, x)) + @test iszero(@allocated jacobian!(jac_in4, backend, f, x)) end type_stability && @testset "Type stability" begin - @test_opt value_and_jacobian!(jac_in, backend, f, x) + @test_opt value_and_jacobian!(jac_in2, backend, f, x) + @test_opt jacobian!(jac_in4, backend, f, x) end end end diff --git a/test/zygote.jl b/test/zygote.jl index 69dc583ac..503dfc4c0 100644 --- a/test/zygote.jl +++ b/test/zygote.jl @@ -1,5 +1,5 @@ using DifferentiationInterface -using Zygote +using Zygote: Zygote test_pullback(ZygoteBackend(), scenarios; type_stability=false); test_jacobian_and_friends(ZygoteBackend(; custom=true), scenarios; type_stability=false);