[DOC] Add comparison with power iteration in eigenvalue example (#140)

* [DOC] Add comparison with power iteration in eigenvalue example * [REF] Replace magic number * [FIX] Typo * [REF] Incorporate @werkaaa's review comments, add some links * [FIX] Broken link * [DOC] Update changelog
f-dangel · Oct 25, 2024 · f82bbd4 · f82bbd4
1 parent e366829
commit f82bbd4
Show file tree

Hide file tree

Showing 2 changed files with 138 additions and 1 deletion.
diff --git a/changelog.md b/changelog.md
@@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added/New
+
+- Comparison of `eigsh` with power iteration in [eigenvalue
+  example](https://curvlinops.readthedocs.io/en/latest/basic_usage/example_eigenvalues.html#sphx-glr-basic-usage-example-eigenvalues-py)
+  ([PR](https://github.com/f-dangel/curvlinops/pull/140))
+
 ### Fixed/Removed
 
 - Deprecate Python 3.8 as it will reach its end of life in October 2024

diff --git a/docs/examples/basic_usage/example_eigenvalues.py b/docs/examples/basic_usage/example_eigenvalues.py
@@ -8,6 +8,10 @@
 As always, imports go first.
 """
 
+from contextlib import redirect_stderr
+from io import StringIO
+from typing import List, Tuple
+
 import numpy
 import scipy
 import torch
@@ -70,7 +74,7 @@
 # :math:`k=3` eigenvalues.
 
 k = 3
-which = "LA"  # largest algebraic
+which = "LM"  # largest magnitude
 top_k_evals, _ = scipy.sparse.linalg.eigsh(H, k=k, which=which)
 
 print(f"Leading {k} Hessian eigenvalues: {top_k_evals}")
@@ -104,3 +108,130 @@
 # :func:`scipy.sparse.linalg.eigsh` can also compute other subsets of
 # eigenvalues, and also their associated eigenvectors. Check out its
 # documentation for more!
+
+
+# %%
+#
+# Power iteration versus ``eigsh``
+# --------------------------------
+#
+# Here, we compare the query efficiency of :func:`scipy.sparse.linalg.eigsh` with the
+# `power iteration <https://en.wikipedia.org/wiki/Power_iteration>`_ method, a simple
+# method to compute the leading eigenvalues (in terms of magnitude). We re-use the im-
+# plementation from the `PyHessian library <https://github.com/amirgholami/PyHessian>`_
+# and adapt it to work with SciPy arrays rather than PyTorch tensors:
+
+
+def power_method(
+    A: scipy.sparse.linalg.LinearOperator,
+    max_iterations: int = 100,
+    tol: float = 1e-3,
+    k: int = 1,
+) -> Tuple[numpy.ndarray, numpy.ndarray]:
+    """Compute the top-k eigenpairs of a linear operator using power iteration.
+
+    Code modified from PyHessian, see
+    https://github.com/amirgholami/PyHessian/blob/72e5f0a0d06142387fccdab2226b4c6bae088202/pyhessian/hessian.py#L111-L156
+
+    Args:
+        A: Linear operator of dimension ``D`` whose top eigenpairs will be computed.
+        max_iterations: Maximum number of iterations. Defaults to ``100``.
+        tol: Relative tolerance between two consecutive iterations that has to be
+            reached for convergence. Defaults to ``1e-3``.
+        k: Number of eigenpairs to compute. Defaults to ``1``.
+
+    Returns:
+        The eigenvalues as array of shape ``[k]`` in descending order, and their
+        corresponding eigenvectors as array of shape ``[D, k]``.
+    """
+    eigenvalues = []
+    eigenvectors = []
+
+    def normalize(v: numpy.ndarray) -> numpy.ndarray:
+        return v / numpy.linalg.norm(v)
+
+    def orthonormalize(v: numpy.ndarray, basis: List[numpy.ndarray]) -> numpy.ndarray:
+        for basis_vector in basis:
+            v -= numpy.dot(v, basis_vector) * basis_vector
+        return normalize(v)
+
+    computed_dim = 0
+    while computed_dim < k:
+        eigenvalue = None
+        v = normalize(numpy.random.randn(A.shape[0]))
+
+        for _ in range(max_iterations):
+            v = orthonormalize(v, eigenvectors)
+            Av = A @ v
+
+            tmp_eigenvalue = v.dot(Av)
+            v = normalize(Av)
+
+            if eigenvalue is None:
+                eigenvalue = tmp_eigenvalue
+            else:
+                if abs(eigenvalue - tmp_eigenvalue) / (abs(eigenvalue) + 1e-6) < tol:
+                    break
+                else:
+                    eigenvalue = tmp_eigenvalue
+
+        eigenvalues.append(eigenvalue)
+        eigenvectors.append(v)
+        computed_dim += 1
+
+    # sort in ascending order and convert into arrays
+    eigenvalues = numpy.array(eigenvalues[::-1])
+    eigenvectors = numpy.array(eigenvectors[::-1])
+
+    return eigenvalues, eigenvectors
+
+
+# %%
+#
+# Let's compute the top-3 eigenvalues via power iteration and verify they roughly match.
+# Note that we are using a smaller :code:`tol` value than the PyHessian default value
+# here to get better convergence, and we have to use relatively large tolerances for the
+# comparison (which we didn't do when comparing :code:`eigsh` with :code:`eigh`).
+
+top_k_evals_power, _ = power_method(H, tol=1e-4, k=k)
+print(f"Comparing leading {k} Hessian eigenvalues (eigsh vs. power).")
+report_nonclose(top_k_evals_functorch, top_k_evals_power, rtol=2e-2, atol=1e-6)
+
+# %%
+#
+# This indicates that the power method achieves poorer accuracy than :code:`eigsh`. But
+# does it therefore require fewer matrix-vector products? To answer this, let's turn on
+# the linear operator's progress bar, which allows us to count the number of
+# matrix-vector products invoked by both eigen-solvers:
+
+H = HessianLinearOperator(model, loss_function, params, data, progressbar=True)
+
+# determine number of matrix-vector products used by `eigsh`
+with StringIO() as buf, redirect_stderr(buf):
+    top_k_evals, _ = scipy.sparse.linalg.eigsh(H, k=k, which=which)
+    # The tqdm progressbar will print "matmat" for each batch in a matrix-vector
+    # product. Therefore, we need to divide by the number of batches
+    queries_eigsh = buf.getvalue().count("matmat") // len(data)
+print(f"eigsh used {queries_eigsh} matrix-vector products.")
+
+# determine number of matrix-vector products used by power iteration
+with StringIO() as buf, redirect_stderr(buf):
+    top_k_evals_power, _ = power_method(H, k=k)
+    # The tqdm progressbar will print "matmat" for each batch in a matrix-vector
+    # product. Therefore, we need to divide by the number of batches
+    queries_power = buf.getvalue().count("matmat") // len(data)
+print(f"Power iteration used {queries_power} matrix-vector products.")
+
+assert queries_power > queries_eigsh
+
+# %%
+#
+# Sadly, the power iteration also does not offer computational benefits, consuming
+# more matrix-vector products than :code:`eigsh`. While it is elegant and simple,
+# it cannot compete with :code:`eigsh`, at least in the comparison provided here
+# (note that we used a relative small tolerance for the power iteration, and it will
+# likely deteriorate further if we decrease the tolerance).
+#
+# Therefore, we recommend using :code:`eigsh` for computing eigenvalues. This method
+# becomes accessible because :code:`curvlinops` interfaces with SciPy's linear
+# operators.