Merge branch 'main' into registration-mechanism

f-dangel · Oct 30, 2023 · 16ea76c · 16ea76c
2 parents 5080ae7 + cc9f605
commit 16ea76c
Show file tree

Hide file tree

Showing 18 changed files with 580 additions and 222 deletions.
diff --git a/docs/interface.md b/docs/interface.md
@@ -0,0 +1,7 @@
+This section lists the interface for structured matrices, that is the operations
+they need to implement to work in SINGD. It serves **for internal purposes
+only**. This is useful for developers that wish to add a new structured matrix
+class to the code that cannot be constructed with one of the available
+templates.
+
+::: singd.structures.base.StructuredMatrix
diff --git a/docs/structures.md b/docs/structures.md
@@ -0,0 +1,59 @@
+Here we provide a list of structured matrices. This list is meant **for internal
+purposes only**. It exists because it is more convenient to read the rendered
+LaTeX code rather than the docstring source.
+
+::: singd.structures.dense.DenseMatrix
+    options:
+        members:
+            - __init__
+
+::: singd.structures.hierarchical.Hierarchical15_15Matrix
+    options:
+        members:
+            - __init__
+
+# DIAGONAL
+
+::: singd.structures.diagonal.DiagonalMatrix
+    options:
+        members:
+            - __init__
+
+::: singd.structures.blockdiagonal.Block30DiagonalMatrix
+    options:
+        members:
+            - __init__
+
+# LOWER-TRIANGULAR
+
+::: singd.structures.triltoeplitz.TrilToeplitzMatrix
+    options:
+        members:
+            - __init__
+
+::: singd.structures.trilbottomrightdiag.TrilBottomRightDiagonalMatrix
+    options:
+        members:
+            - __init__
+
+::: singd.structures.triltopleftdiag.TrilTopLeftDiagonalMatrix
+    options:
+        members:
+            - __init__
+
+# UPPER-TRIANGULAR
+
+::: singd.structures.triutoeplitz.TriuToeplitzMatrix
+    options:
+        members:
+            - __init__
+
+::: singd.structures.triubottomrightdiag.TriuBottomRightDiagonalMatrix
+    options:
+        members:
+            - __init__
+
+::: singd.structures.triutopleftdiag.TriuTopLeftDiagonalMatrix
+    options:
+        members:
+            - __init__
diff --git a/docs/templates.md b/docs/templates.md
@@ -0,0 +1,24 @@
+Here we provide a list of templates that can be used to create new structured
+matrices. This list is meant **for internal purposes only**. It exists because
+it is more convenient to read the rendered LaTeX code rather than the docstring
+source.
+
+::: singd.structures.blockdiagonal.BlockDiagonalMatrixTemplate
+    options:
+        members:
+            -
+
+::: singd.structures.hierarchical.HierarchicalMatrixTemplate
+    options:
+        members:
+            -
+
+::: singd.structures.recursive.RecursiveBottomLeftMatrixTemplate
+    options:
+        members:
+            -
+
+::: singd.structures.recursive.RecursiveTopRightMatrixTemplate
+    options:
+        members:
+            -
diff --git a/makefile b/makefile
@@ -58,10 +58,9 @@ install-test:
 .PHONY: test test-light
 
 test:
-	@pytest -vx --run-optional-tests=expensive --cov=singd test
-
+	@pytest -vx --run-optional-tests=expensive --cov=singd --doctest-modules test singd
 test-light:
-	@pytest -vx --cov=singd test
+	@pytest -vx --cov=singd --doctest-modules test singd
 
 .PHONY: install-lint
 

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -10,6 +10,10 @@ nav:
     - Code Examples: generated/gallery
     - API Documentation: api.md
     - Developer Notes: develop.md
+    - Internal:
+      - Structures: structures.md
+      - Templates: templates.md
+      - Interface: interface.md
 theme:
     name: material
     features:
@@ -34,7 +38,7 @@ plugins:
                 options:
                       show_root_heading: true
                       show_source: true
-                      show_bases: false
+                      show_bases: true
                       show_signature_annotations: true
                       separate_signature: true
                       docstring_section_style: list

diff --git a/singd/structures/base.py b/singd/structures/base.py
@@ -25,8 +25,12 @@ class StructuredMatrix(ABC):
     a new structured matrix class with SINGD.
 
     The minimum amount of work to add a new structured matrix class requires
-    implementing the `to_dense`, `from_dense` methods.
-    The other operations will then use a naive implementation which internally
+    implementing the following methods:
+
+    - `to_dense`
+    - `from_dense`
+
+    All other operations will then use a naive implementation which internally
     re-constructs unstructured dense matrices. By default, these operations
     will trigger a warning which can be used to identify functions that can be
     implemented more efficiently using structure.
@@ -41,9 +45,9 @@ class StructuredMatrix(ABC):
         WARN_NAIVE: Warn the user if a method falls back to a naive implementation
             of this base class. This indicates a method that should be implemented to
             save memory and run time by considering the represented structure.
-            Default: ``True``.
+            Default: `True`.
         WARN_NAIVE_EXCEPTIONS: Set of methods that should not trigger a warning even
-            if ``WARN_NAIVE`` is ``True``. This can be used to silence warnings for
+            if `WARN_NAIVE` is `True`. This can be used to silence warnings for
             methods for which it is too complicated to leverage a specific structure
             and which should therefore call out to this class's implementation without
             performance warnings.
@@ -59,13 +63,18 @@ def __init__(self) -> None:
     def register_tensor(self, tensor: Tensor, name: str) -> None:
         """Register a tensor that represents a part of the matrix structure.
 
-        Args:
-            tensor: A tensor that represents a part of the matrix structure.
-            name: A name for the tensor. The tensor will be available under
-                `self.name`.
-
-        Raises:
-            ValueError: If the name is already in use.
+        <<<<<<< HEAD
+                Args:
+                    tensor: A tensor that represents a part of the matrix structure.
+                    name: A name for the tensor. The tensor will be available under
+                        `self.name`.
+        =======
+                Returns: # noqa: DAR202
+                    A tuple of tensors that need to be synchronized across devices.
+        >>>>>>> main
+
+                Raises:
+                    ValueError: If the name is already in use.
         """
         if hasattr(self, name):
             raise ValueError(f"Variable name {name!r} is already in use.")
@@ -85,9 +94,7 @@ def named_tensors(self) -> Iterator[Tuple[str, Tensor]]:
     def __matmul__(
         self, other: Union[StructuredMatrix, Tensor]
     ) -> Union[StructuredMatrix, Tensor]:
-        """Multiply onto a matrix (@ operator).
-
-        (https://peps.python.org/pep-0465/)
+        """Multiply onto a matrix ([@ operator](https://peps.python.org/pep-0465/)).
 
         Args:
             other: Another matrix which will be multiplied onto. Can be represented
@@ -116,14 +123,12 @@ def from_dense(cls, sym_mat: Tensor) -> StructuredMatrix:
         are non-zero.
 
         Warning:
-            We do not verify whether ``mat`` is symmetric internally.
+            We do not verify whether `mat` is symmetric internally.
 
         Args:
             sym_mat: A symmetric dense matrix which will be converted into a structured
                 one.
 
-        # noqa: DAR202
-
         Returns:
             Structured matrix.
 
@@ -136,8 +141,6 @@ def from_dense(cls, sym_mat: Tensor) -> StructuredMatrix:
     def to_dense(self) -> Tensor:
         """Return a dense tensor representing the structured matrix.
 
-        # noqa: DAR202
-
         Returns:
             A dense PyTorch tensor representing the matrix.
 
@@ -182,7 +185,7 @@ def __sub__(self, other: StructuredMatrix) -> StructuredMatrix:
         return self + (other * (-1.0))
 
     def rmatmat(self, mat: Tensor) -> Tensor:
-        """Multiply the structured matrix's transpose onto a matrix (``self.T @ mat``).
+        """Multiply the structured matrix's transpose onto a matrix (`self.T @ mat`).
 
         Args:
             mat: A dense matrix that will be multiplied onto.
@@ -200,7 +203,7 @@ def _warn_naive_implementation(cls, fn_name: str):
         This suggests that a child class does not implement a specialized version
         that is usually more efficient.
 
-        You can turn off the warning by setting the ``WARN_NAIVE`` class attribute.
+        You can turn off the warning by setting the `WARN_NAIVE` class attribute.
 
         Args:
             fn_name: Name of the function whose naive version is being called.
@@ -224,17 +227,17 @@ def all_reduce(
         parallel training.
 
         Args:
-            op: The reduction operation to perform (default: ``dist.ReduceOp.AVG``).
-            group: The process group to work on. If ``None``, the default process group
+            op: The reduction operation to perform (default: `dist.ReduceOp.AVG`).
+            group: The process group to work on. If `None`, the default process group
                 will be used.
-            async_op: If ``True``, this function will return a
-                ``torch.distributed.Future`` object.
+            async_op: If `True`, this function will return a
+                `torch.distributed.Future` object.
                 Otherwise, it will block until the reduction completes
-                (default: ``False``).
+                (default: `False`).
 
         Returns:
-            If ``async_op`` is ``True``, a (tuple of) ``torch.distributed.Future``
-            object(s), else ``None``.
+            If `async_op` is `True`, a (tuple of) `torch.distributed.Future`
+            object(s), else `None`.
         """
         handles = []
         for _, tensor in self.named_tensors():
@@ -253,16 +256,16 @@ def all_reduce(
     ###############################################################################
 
     def from_inner(self, X: Union[Tensor, None] = None) -> StructuredMatrix:
-        """Extract the represented structure from ``self.T @ X @ X^T @ self``.
+        """Extract the represented structure from `self.T @ X @ X^T @ self`.
 
-        We can recycle terms by writing ``self.T @ X @ X^T @ self`` as ``S @ S^T``
-        with ``S := self.T @ X``.
+        We can recycle terms by writing `self.T @ X @ X^T @ self` as `S @ S^T`
+        with `S := self.T @ X`.
 
         Args:
-            X: Optional arbitrary 2d tensor. If ``None``, ``X = I`` will be used.
+            X: Optional arbitrary 2d tensor. If `None`, `X = I` will be used.
 
         Returns:
-            The structured matrix extracted from ``self.T @ X @ X^T @ self``.
+            The structured matrix extracted from `self.T @ X @ X^T @ self`.
         """
         self._warn_naive_implementation("from_inner")
         S_dense = self.to_dense().T if X is None else self.rmatmat(X)
@@ -273,13 +276,13 @@ def from_inner(self, X: Union[Tensor, None] = None) -> StructuredMatrix:
     # integrating this interface into existing implementations of sparse IF-KFAC
     # easier, as they have access to the input/gradient covariance matrices.
     def from_inner2(self, XXT: Tensor) -> StructuredMatrix:
-        """Extract the represented structure from ``self.T @ XXT @ self``.
+        """Extract the represented structure from `self.T @ XXT @ self`.
 
         Args:
             XXT: 2d square symmetric matrix.
 
         Returns:
-            The structured matrix extracted from ``self.T @ XXT @ self``.
+            The structured matrix extracted from `self.T @ XXT @ self`.
         """
         self._warn_naive_implementation("from_inner2")
         dense = self.to_dense()
@@ -308,7 +311,7 @@ def diag_add_(self, value: float) -> StructuredMatrix:
         diag_add_(dense, value)
 
         # NOTE `self` is immutable, so we have to update its state with the following
-        # hack (otherwise, the call ``a.diag_add_(b)`` will not modify ``a``). See
+        # hack (otherwise, the call `a.diag_add_(b)` will not modify `a`). See
         # https://stackoverflow.com/a/37658673 and https://stackoverflow.com/q/1015592.
         new = self.from_dense(dense)
         self.__dict__.update(new.__dict__)
@@ -319,9 +322,9 @@ def infinity_vector_norm(self) -> Tensor:
 
         The infinity vector norm is the absolute value of the largest entry.
         Note that this is different from the infinity matrix norm, compare
-        (here)[https://pytorch.org/docs/stable/generated/torch.linalg.vector_norm.html]
+        [here](https://pytorch.org/docs/stable/generated/torch.linalg.vector_norm.html)
         and
-        (here)[https://pytorch.org/docs/stable/generated/torch.linalg.matrix_norm.html].
+        [here](https://pytorch.org/docs/stable/generated/torch.linalg.matrix_norm.html).
 
         Returns:
             The matrix's infinity vector norm.