Jutho · Jutho · Jun 11, 2024 · Mar 25, 2024 · Mar 25, 2024 · Mar 25, 2024
diff --git a/Project.toml b/Project.toml
@@ -1,32 +1,40 @@
 name = "KrylovKit"
 uuid = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
 authors = ["Jutho Haegeman"]
-version = "0.7.1"
+version = "0.8"
 
 [deps]
-ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+PackageExtensionCompat = "65ce6f38-6b18-4e1d-a461-8949797d7930"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"
 
+[weakdeps]
+ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+
+[extensions]
+KrylovKitChainRulesCoreExt = "ChainRulesCore"
+
 [compat]
 Aqua = "0.6, 0.7, 0.8"
 ChainRulesCore = "1"
 ChainRulesTestUtils = "1"
 FiniteDifferences = "0.12"
 GPUArraysCore = "0.1"
-VectorInterface = "0.4"
 LinearAlgebra = "1"
-Random = "1"
+PackageExtensionCompat = "1"
 Printf = "1"
+Random = "1"
 Test = "1"
 TestExtras = "0.2"
+VectorInterface = "0.4"
 Zygote = "0.6"
 julia = "1.6"
 
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -35,4 +43,4 @@ TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["Test", "Aqua", "Random", "TestExtras", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
+test = ["Test", "Aqua", "Random", "TestExtras", "ChainRulesTestUtils", "ChainRulesCore", "FiniteDifferences", "Zygote"]
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -2,4 +2,4 @@
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 
 [compat]
-Documenter = "0.25 - 0.27"
+Documenter = "1"
diff --git a/docs/make.jl b/docs/make.jl
@@ -3,7 +3,7 @@ using KrylovKit
 
 makedocs(; modules=[KrylovKit],
  sitename="KrylovKit.jl",
- authors="Jutho Haegeman",
+ authors="Jutho Haegeman and collaborators",
  pages=["Home" => "index.md",
  "Manual" => ["man/intro.md",
  "man/linear.md",

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -15,10 +15,10 @@ The high level interface of KrylovKit is provided by the following functions:
 * [`geneigsolve`](@ref): find a few eigenvalues and corresponding vectors of a
  generalized eigenvalue problem `A*x = λ*B*x`
 * [`svdsolve`](@ref): find a few singular values and corresponding left and right
- singular vectors `A*x = σ * y` and `A'*y = σ*x`.
-* [`exponentiate`](@ref): apply the exponential of a linear map to a vector
-* [`expintegrator`](@ref): exponential integrator for a linear non-homogeneous ODE,
- generalization of `exponentiate`
+ singular vectors `A*x = σ * y` and `A'*y = σ*x`
+* [`exponentiate`](@ref): apply the exponential of a linear map to a vector `x=exp(t*A)*x₀`
+* [`expintegrator`](@ref): exponential integrator for a linear non-homogeneous ODE
+ (generalization of `exponentiate`)
 
 ## Package features and alternatives
 This section could also be titled "Why did I create KrylovKit.jl"?
@@ -76,6 +76,15 @@ However, KrylovKit.jl distinguishes itself from the previous packages in the fol
  and corresponding norm (`norm`) of an already existing vector like object. The
  latter should help with implementing certain type of preconditioners.
 
+3. Since version 0.8, KrylovKit.jl supports reverse-mode AD by defining `ChainRulesCore.rrule` 
+ definitions for the most common functionality (`linsolve`, `eigsolve`, `svdsolve`).
+ Hence, reverse mode AD engines that are compatible with the [ChainRules](https://juliadiff.org/ChainRulesCore.jl/dev/)
+ ecosystem will be able to benefit from an optimized implementation of the adjoint
+ of these functions. The `rrule` definitions for the remaining functionality 
+ (`geneigsolve` and `expintegrator`, of which `exponentiate` is a special case) will be
+ added at a later stage. There is a dedicated documentation page on how to configure these
+ `rrule`s, as they also require to solve large-scale linear or eigenvalue problems.
+
 ## Current functionality
 
 The following algorithms are currently implemented
@@ -94,8 +103,8 @@ The following algorithms are currently implemented
  2, it becomes equivalent to the latter.
 * `svdsolve`: finding largest singular values based on Golub-Kahan-Lanczos
  bidiagonalization (see [`GKL`](@ref))
-* `exponentiate`: a [`Lanczos`](@ref) based algorithm for the action of the exponential of
- a real symmetric or complex hermitian linear map.
+* `exponentiate`: a [`Lanczos`](@ref) or [`Arnoldi`](@ref) based algorithm for the action
+ of the exponential of linear map.
 * `expintegrator`: [exponential integrator](https://en.wikipedia.org/wiki/Exponential_integrator)
  for a linear non-homogeneous ODE, computes a linear combination of the `ϕⱼ` functions which generalize `ϕ₀(z) = exp(z)`.
 
@@ -104,7 +113,7 @@ The following algorithms are currently implemented
 Here follows a wish list / to-do list for the future. Any help is welcomed and appreciated.
 
 * More algorithms, including biorthogonal methods:
- - for `linsolve`: MINRES, BiCG, IDR(s), ...
+ - for `linsolve`: L-GMRES, MINRES, BiCG, IDR(s), ...
  - for `eigsolve`: BiLanczos, Jacobi-Davidson JDQR/JDQZ, subspace iteration (?), ...
  - for `geneigsolve`: trace minimization, ...
 * Support both in-place / mutating and out-of-place functions as linear maps

diff --git a/docs/src/man/eig.md b/docs/src/man/eig.md
@@ -39,6 +39,47 @@ T, vecs, vals, info = schursolve(A, x⁠₀, 1, :LM, Arnoldi(...))
 and use `vecs[1]` as the real valued eigenvector (after checking `info.converged`)
 corresponding to the largest magnitude eigenvalue of `A`.
 
+More generally, if you want to compute several eigenvalues of a real linear map, and you know
+that all of them are real, so that also the associated eigenvectors will be real, then you
+can use the `realeigsolve` method, which is also restricted to the 'expert' method call and
+which will error if any of the requested eigenvalues turn out to be complex
+```@docs
+realeigsolve
+```
+
+## Automatic differentation
+
+The `eigsolve` (and `realeigsolve`) routine can be used in conjunction with reverse-mode automatic 
+differentiation, using AD engines that are compatible with the [ChainRules](https://juliadiff.org/ChainRulesCore.jl/dev/)
+ecosystem. The adjoint problem of an eigenvalue problem is a linear problem, although it can also
+be formulated as an eigenvalue problem. Details about this approach will be published in a
+forthcoming manuscript.
+
+In either case, the adjoint problem requires the adjoint[^1] of the linear map. If the linear map is
+an `AbstractMatrix` instance, its `adjoint` will be used in the `rrule`. If the linear map is implemented 
+as a function `f`, then the AD engine itself is used to compute the corresponding adjoint via 
+`ChainRulesCore.rrule_via_ad(config, f, x)`. The specific base point `x` at which this adjoint is
+computed should not affect the result if `f` properly represents a linear map. Furthermore, the linear
+map is the only argument that affects the `eigsolve` output (from a theoretical perspective, the
+starting vector and algorithm parameters should have no effect), so that this is where the adjoint 
+variables need to be propagated to and have a nonzero effect.
+
+The adjoint problem (also referred to as cotangent problem) can thus be solved as a linear problem
+or as an eigenvalue problem. Note that this eigenvalue problem is never symmetric or Hermitian,
+even if the primal problem is. The different implementations of the `rrule` can be selected using
+the `alg_rrule` keyword argument. If a linear solver such as `GMRES` or `BiCGStab` is specified,
+the adjoint problem requires solving a number of linear problems equal to the number of requested
+eigenvalues and eigenvectors. If an eigenvalue solver is specified, for which `Arnoldi` is essentially
+the only option, then the adjoint problem is solved as a single (but larger) eigenvalue problem.
+
+Note that the phase of an eigenvector is not uniquely determined. Hence, a well-defined cost function
+constructed from eigenvectors should depend on these in such a way that its value is not affected
+by changing the phase of those eigenvectors, i.e. the cost function should be 'gauge invariant'.
+If this is not the case, the cost function is said to be 'gauge dependent', and this can be detected
+in the resulting adjoint variables for those eigenvectors. The KrylovKit `rrule` for `eigsolve`
+will print a warning if it detects from the incoming adjoint variables that the cost function is gauge
+dependent. This warning can be suppressed by passing `alg_rrule` an algorithm with `verbosity=-1`.
+
 ## Generalized eigenvalue problems
 
 Generalized eigenvalues `λ` and corresponding vectors `x` of the generalized eigenvalue
@@ -54,3 +95,8 @@ properties explicitly.
 ```@docs
 geneigsolve
 ```
+
+Currently, there is `rrule` and thus no automatic differentiation support for `geneigsolve`.
+
+[^1]: For a linear map, the adjoint or pullback required in the reverse-order chain rule coincides
+with its (conjugate) transpose, at least with respect to the standard Euclidean inner product.
diff --git a/docs/src/man/implementation.md b/docs/src/man/implementation.md
@@ -14,11 +14,16 @@ KrylovKit.OrthonormalBasis
 ```
 
 We can orthogonalize or orthonormalize a given vector to another vector (assumed normalized)
-or to a given [`KrylovKit.OrthonormalBasis`](@ref).
+or to a given [`KrylovKit.OrthonormalBasis`](@ref) using
 ```@docs
 KrylovKit.orthogonalize
 KrylovKit.orthonormalize
 ```
+or using the possibly in-place versions
+```@docs
+KrylovKit.orthogonalize!!
+KrylovKit.orthonormalize!!
+```
 
 The expansion coefficients of a general vector in terms of a given orthonormal basis can be obtained as
 ```@docs

diff --git a/docs/src/man/linear.md b/docs/src/man/linear.md
@@ -7,3 +7,25 @@ side `b`. They can be solved using the function `linsolve`:
 ```@docs
 linsolve
 ```
+
+## Automatic differentation
+
+The `linsolve` routine can be used in conjunction with reverse-mode automatic differentiation,
+using AD engines that are compatible with the [ChainRules](https://juliadiff.org/ChainRulesCore.jl/dev/)
+ecosystem. The adjoint problem of a linear problem is again a linear problem, that requires the
+adjoint[^1] of the linear map. If the linear map is an `AbstractMatrix` instance, its `adjoint`
+will be used in the `rrule`. If the linear map is implemented as a function `f`, then the AD engine
+itself is used to compute the corresponding adjoint via `ChainRulesCore.rrule_via_ad(config, f, x)`.
+The specific base point `x` at which this adjoint is computed should not affect the result if `f`
+properly represents a linear map. Furthermore, the `linsolve` output is only affected by the linear
+map argument and the right hand side argument `b` (from a theoretical perspective, the starting vector
+and algorithm parameters should have no effect), so that these two arguments are where the adjoint 
+variables need to be propagated to and have a nonzero effect.
+
+The adjoint linear problem (also referred to as cotangent problem) is by default solved using the
+same algorithms as the primal problem. However, the `rrule` can be customized to use a different
+Krylov algorithm, by specifying the `alg_rrule` keyword argument. Its value can take any of the values
+as the `algorithm` argument in `linsolve`.
+
+[^1]: For a linear map, the adjoint or pullback required in the reverse-order chain rule coincides
+with its (conjugate) transpose, at least with respect to the standard Euclidean inner product.
diff --git a/docs/src/man/svd.md b/docs/src/man/svd.md
@@ -5,3 +5,37 @@ right singular vectors using the function `svdsolve`:
 ```@docs
 svdsolve
 ```
+
+## Automatic differentation
+
+The `svdsolve` routine can be used in conjunction with reverse-mode automatic differentiation, 
+using AD engines that are compatible with the [ChainRules](https://juliadiff.org/ChainRulesCore.jl/dev/)
+ecosystem. The adjoint problem of a singular value problem contains a linear problem, although it
+can also be formulated as an eigenvalue problem. Details about this approach will be published in a
+forthcoming manuscript.
+
+Both `svdsolve` and the adjoint problem associated with it require the action of the linear map as
+well as of its adjoint[^1]. Hence, no new information about the linear map is required for the adjoint
+problem. However, the linear map is the only argument that affects the `svdsolve` output (from a
+theoretical perspective, the starting vector and algorithm parameters should have no effect), so that
+this is where the adjoint variables need to be propagated to.
+
+The adjoint problem (also referred to as cotangent problem) can thus be solved as a linear problem
+or as an eigenvalue problem. Note that this eigenvalue problem is never symmetric or Hermitian.
+The different implementations of the `rrule` can be selected using the `alg_rrule` keyword argument. 
+If a linear solver such as `GMRES` or `BiCGStab` is specified, the adjoint problem requires solving a]
+number of linear problems equal to the number of requested singular values and vectors. If an 
+eigenvalue solver is specified, for which `Arnoldi` is essentially the only option, then the adjoint
+problem is solved as a single (but larger) eigenvalue problem.
+
+Note that the common pair of left and right singular vectors has an arbitrary phase freedom.
+Hence, a well-defined cost function constructed from singular should depend on these in such a way 
+that its value is not affected by simultaneously changing the left and right singular vector with
+a common phase factor, i.e. the cost function should be 'gauge invariant'. If this is not the case, 
+the cost function is said to be 'gauge dependent', and this can be detected in the resulting adjoint
+variables for those singular vectors. The KrylovKit `rrule` for `svdsolve` will print a warning if
+it detects from the incoming adjoint variables that the cost function is gauge dependent. This
+warning can be suppressed by passing `alg_rrule` an algorithm with `verbosity=-1`.
+
+[^1]: For a linear map, the adjoint or pullback required in the reverse-order chain rule coincides
+with its (conjugate) transpose, at least with respect to the standard Euclidean inner product.
diff --git a/ext/KrylovKitChainRulesCoreExt/KrylovKitChainRulesCoreExt.jl b/ext/KrylovKitChainRulesCoreExt/KrylovKitChainRulesCoreExt.jl
@@ -0,0 +1,15 @@
+module KrylovKitChainRulesCoreExt
+
+using KrylovKit
+using ChainRulesCore
+using LinearAlgebra
+using VectorInterface
+
+using KrylovKit: apply_normal, apply_adjoint
+
+include("utilities.jl")
+include("linsolve.jl")
+include("eigsolve.jl")
+include("svdsolve.jl")
+
+end # module