f-dangel · f-dangel · Jul 18, 2023 · Jul 17, 2023 · Jul 18, 2023 · Jul 18, 2023
diff --git a/.github/workflows/lint-black.yaml b/.github/workflows/lint-black.yaml
@@ -16,10 +16,10 @@ jobs:
  runs-on: ubuntu-latest
  steps:
  - uses: actions/checkout@v1
- - name: Set up Python 3.7
+ - name: Set up Python 3.8
  uses: actions/setup-python@v1
  with:
- python-version: 3.7
+ python-version: 3.8
  - name: Install dependencies
  run: |
  python -m pip install --upgrade pip

diff --git a/.github/workflows/lint-darglint.yaml b/.github/workflows/lint-darglint.yaml
@@ -16,10 +16,10 @@ jobs:
  runs-on: ubuntu-latest
  steps:
  - uses: actions/checkout@v1
- - name: Set up Python 3.7
+ - name: Set up Python 3.8
  uses: actions/setup-python@v1
  with:
- python-version: 3.7
+ python-version: 3.8
  - name: Install dependencies
  run: |
  python -m pip install --upgrade pip

diff --git a/.github/workflows/lint-flake8.yaml b/.github/workflows/lint-flake8.yaml
@@ -15,10 +15,10 @@ jobs:
  runs-on: ubuntu-latest
  steps:
  - uses: actions/checkout@v1
- - name: Set up Python 3.7
+ - name: Set up Python 3.8
  uses: actions/setup-python@v1
  with:
- python-version: 3.7
+ python-version: 3.8
  - name: Install dependencies
  run: |
  python -m pip install --upgrade pip

diff --git a/.github/workflows/lint-isort.yaml b/.github/workflows/lint-isort.yaml
@@ -16,10 +16,10 @@ jobs:
  runs-on: ubuntu-latest
  steps:
  - uses: actions/checkout@v1
- - name: Set up Python 3.7
+ - name: Set up Python 3.8
  uses: actions/setup-python@v1
  with:
- python-version: 3.7
+ python-version: 3.8
  - name: Install dependencies
  run: |
  python -m pip install --upgrade pip

diff --git a/.github/workflows/lint-pydocstyle.yaml b/.github/workflows/lint-pydocstyle.yaml
@@ -17,10 +17,10 @@ jobs:
  runs-on: ubuntu-latest
  steps:
  - uses: actions/checkout@v1
- - name: Set up Python 3.7
+ - name: Set up Python 3.8
  uses: actions/setup-python@v1
  with:
- python-version: 3.7
+ python-version: 3.8
  - name: Install dependencies
  run: |
  python -m pip install --upgrade pip

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -18,7 +18,7 @@ jobs:
  USING_COVERAGE: '3.8'
  strategy:
  matrix:
- python-version: ["3.7", "3.8"]
+ python-version: ["3.8"]
  steps:
  - uses: actions/checkout@v1
  - uses: actions/setup-python@v1

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -7,7 +7,7 @@ sphinx:
  configuration: docs/rtd/conf.py
 
 python:
- version: 3.7
+ version: 3.8
  install:
  - method: pip
  path: .

diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # <img alt="Logo" src="./docs/rtd/assets/logo.svg" height="90"> scipy linear operators of deep learning matrices in PyTorch
 
 [![Python
-3.7+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://www.python.org/downloads/release/python-370/)
+3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/release/python-380/)
 ![tests](https://github.com/f-dangel/curvature-linear-operators/actions/workflows/test.yaml/badge.svg)
 [![Coveralls](https://coveralls.io/repos/github/f-dangel/curvlinops/badge.svg?branch=master)](https://coveralls.io/github/f-dangel/curvlinops)
 
@@ -13,6 +13,7 @@ for deep learning matrices, such as
 - the Fisher/generalized Gauss-Newton (GGN)
 - the Monte-Carlo approximated Fisher
 - the uncentered gradient covariance (aka empirical Fisher)
+- the output-parameter Jacobian of a neural net
 
 Matrix-vector products are carried out in PyTorch, i.e. potentially on a GPU.
 The library supports defining these matrices not only on a mini-batch, but

diff --git a/black.toml b/black.toml
@@ -1,6 +1,6 @@
 [tool.black]
 line-length = 88
-target-version = ['py36', 'py37', 'py38']
+target-version = ['py38', 'py39', 'py310', 'py311']
 include = '\.pyi?$'
 exclude = '''
 (

diff --git a/curvlinops/__init__.py b/curvlinops/__init__.py
@@ -5,6 +5,7 @@
 from curvlinops.gradient_moments import EFLinearOperator
 from curvlinops.hessian import HessianLinearOperator
 from curvlinops.inverse import CGInverseLinearOperator, NeumannInverseLinearOperator
+from curvlinops.jacobian import JacobianLinearOperator
 from curvlinops.papyan2020traces.spectrum import (
  LanczosApproximateLogSpectrumCached,
  LanczosApproximateSpectrumCached,
@@ -18,6 +19,7 @@
  "GGNLinearOperator",
  "EFLinearOperator",
  "FisherMCLinearOperator",
+ "JacobianLinearOperator",
  "CGInverseLinearOperator",
  "NeumannInverseLinearOperator",
  "SubmatrixLinearOperator",

diff --git a/curvlinops/_base.py b/curvlinops/_base.py
@@ -1,6 +1,6 @@
 """Contains functionality to analyze Hessian & GGN via matrix-free multiplication."""
 
-from typing import Callable, Iterable, List, Tuple
+from typing import Callable, Iterable, List, Optional, Tuple, Union
 
 from backpack.utils.convert_parameters import vector_to_parameter_list
 from numpy import (
@@ -14,31 +14,31 @@
 )
 from numpy.random import rand
 from scipy.sparse.linalg import LinearOperator
-from torch import Tensor
+from torch import Tensor, cat
 from torch import device as torch_device
 from torch import from_numpy, tensor, zeros_like
 from torch.autograd import grad
 from torch.nn import Module, Parameter
-from torch.nn.utils import parameters_to_vector
 from tqdm import tqdm
 
 
 class _LinearOperator(LinearOperator):
- """Base class for linear operators of DNN curvature matrices.
+ """Base class for linear operators of DNN matrices.
 
  Can be used with SciPy.
  """
 
  def __init__(
  self,
  model_func: Callable[[Tensor], Tensor],
- loss_func: Callable[[Tensor, Tensor], Tensor],
+ loss_func: Union[Callable[[Tensor, Tensor], Tensor], None],
  params: List[Parameter],
  data: Iterable[Tuple[Tensor, Tensor]],
  progressbar: bool = False,
  check_deterministic: bool = True,
+ shape: Optional[Tuple[int, int]] = None,
  ):
- """Linear operator for DNN curvature matrices.
+ """Linear operator for DNN matrices.
 
  Note:
  f(X; θ) denotes a neural network, parameterized by θ, that maps a mini-batch
@@ -49,10 +49,13 @@ def __init__(
  model_func: A function that maps the mini-batch input X to predictions.
  Could be a PyTorch module representing a neural network.
  loss_func: Loss function criterion. Maps predictions and mini-batch labels
- to a scalar value.
+ to a scalar value. If ``None``, there is no loss function and the
+ represented matrix is independent of the loss function.
  params: List of differentiable parameters used by the prediction function.
  data: Source from which mini-batches can be drawn, for instance a list of
  mini-batches ``[(X, y), ...]`` or a torch ``DataLoader``.
+ shape: Shape of the represented matrix. If ``None`` assumes ``(D, D)``
+ where ``D`` is the total number of parameters
  progressbar: Show a progressbar during matrix-multiplication.
  Default: ``False``.
  check_deterministic: Probe that model and data are deterministic, i.e.
@@ -64,8 +67,10 @@ def __init__(
  Raises:
  RuntimeError: If the check for deterministic behavior fails.
  """
- dim = sum(p.numel() for p in params)
- super().__init__(shape=(dim, dim), dtype=float32)
+ if shape is None:
+ dim = sum(p.numel() for p in params)
+ shape = (dim, dim)
+ super().__init__(shape=shape, dtype=float32)
 
  self._params = params
  self._model_func = model_func
@@ -129,22 +134,37 @@ def _check_deterministic(self):
  - Two independent loss/gradient computations yield different results
 
  Note:
- Deterministic checks are performed on CPU. We noticed that even when it
- passes on CPU, it can fail on GPU; probably due to non-deterministic
+ Deterministic checks should be performed on CPU. We noticed that even when
+ it passes on CPU, it can fail on GPU; probably due to non-deterministic
  operations.
 
  Raises:
  RuntimeError: If non-deterministic behavior is detected.
  """
- print("Performing deterministic checks")
+ v = rand(self.shape[1]).astype(self.dtype)
+ mat_v1 = self @ v
+ mat_v2 = self @ v
+
+ rtol, atol = 5e-5, 1e-6
+ if not allclose(mat_v1, mat_v2, rtol=rtol, atol=atol):
+ self.print_nonclose(mat_v1, mat_v2, rtol, atol)
+ raise RuntimeError("Check for deterministic matvec failed.")
+
+ if self._loss_func is None:
+ return
 
+ # only carried out if there is a loss function
  grad1, loss1 = self.gradient_and_loss()
- grad1, loss1 = parameters_to_vector(grad1).cpu().numpy(), loss1.cpu().numpy()
+ grad1, loss1 = (
+ self.flatten_and_concatenate(grad1).cpu().numpy(),
+ loss1.cpu().numpy(),
+ )
 
  grad2, loss2 = self.gradient_and_loss()
- grad2, loss2 = parameters_to_vector(grad2).cpu().numpy(), loss2.cpu().numpy()
-
- rtol, atol = 5e-5, 1e-6
+ grad2, loss2 = (
+ self.flatten_and_concatenate(grad2).cpu().numpy(),
+ loss2.cpu().numpy(),
+ )
 
  if not allclose(loss1, loss2, rtol=rtol, atol=atol):
  self.print_nonclose(loss1, loss2, rtol, atol)
@@ -154,16 +174,6 @@ def _check_deterministic(self):
  self.print_nonclose(grad1, grad2, rtol, atol)
  raise RuntimeError("Check for deterministic gradient failed.")
 
- v = rand(self.shape[0]).astype(self.dtype)
- mat_v1 = self @ v
- mat_v2 = self @ v
-
- if not allclose(mat_v1, mat_v2, rtol=rtol, atol=atol):
- self.print_nonclose(mat_v1, mat_v2, rtol, atol)
- raise RuntimeError("Check for deterministic matvec failed.")
-
- print("Deterministic checks passed")
-
  @staticmethod
  def print_nonclose(array1: ndarray, array2: ndarray, rtol: float, atol: float):
  """Check if the two arrays are element-wise equal within a tolerance and print
@@ -245,8 +255,7 @@ def _preprocess(self, x: ndarray) -> List[Tensor]:
  x_torch = from_numpy(x).to(self._device)
  return vector_to_parameter_list(x_torch, self._params)
 
- @staticmethod
- def _postprocess(x_list: List[Tensor]) -> ndarray:
+ def _postprocess(self, x_list: List[Tensor]) -> ndarray:
  """Convert torch list format to flat numpy array.
 
  Args:
@@ -255,7 +264,7 @@ def _postprocess(x_list: List[Tensor]) -> ndarray:
  Returns:
  Flat vector.
  """
- return parameters_to_vector([x.contiguous() for x in x_list]).cpu().numpy()
+ return self.flatten_and_concatenate(x_list).cpu().numpy()
 
  def _loop_over_data(self) -> Iterable[Tuple[Tensor, Tensor]]:
  """Yield batches of the data set, loaded to the correct device.
@@ -279,7 +288,13 @@ def gradient_and_loss(self) -> Tuple[List[Tensor], Tensor]:
 
  Returns:
  Gradient and loss on the data set.
+
+ Raises:
+ ValueError: If there is no loss function.
  """
+ if self._loss_func is None:
+ raise ValueError("No loss function specified.")
+
  total_loss = tensor([0.0], device=self._device)
  total_grad = [zeros_like(p) for p in self._params]
 
@@ -317,3 +332,15 @@ def _get_normalization_factor(self, X: Tensor, y: Tensor) -> float:
  return X.shape[0] / self._N_data
  else:
  raise ValueError("Loss must have reduction 'mean' or 'sum'.")
+
+ @staticmethod
+ def flatten_and_concatenate(tensors: List[Tensor]) -> Tensor:
+ """Flatten then concatenate all tensors in a list.
+
+ Args:
+ tensors: List of tensors.
+
+ Returns:
+ Concatenated flattened tensors.
+ """
+ return cat([t.flatten() for t in tensors])