[ADD] Option to specify number of data points

f-dangel · Feb 4, 2024 · 4d1c85b · 4d1c85b
1 parent 9caa6bb
commit 4d1c85b
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 5 deletions.
diff --git a/curvlinops/fisher.py b/curvlinops/fisher.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from math import sqrt
-from typing import Callable, Iterable, List, Tuple, Union
+from typing import Callable, Iterable, List, Optional, Tuple, Union
 
 from numpy import ndarray
 from torch import (
@@ -120,6 +120,7 @@ def __init__(
         check_deterministic: bool = True,
         seed: int = 2147483647,
         mc_samples: int = 1,
+        num_data: Optional[int] = None,
     ):
         """Linear operator for the MC approximation of the Fisher.
 
@@ -149,6 +150,8 @@ def __init__(
                 draw samples at the beginning of each matrix-vector product.
                 Default: ``2147483647``
             mc_samples: Number of samples to use. Default: ``1``.
+            num_data: Number of data points. If ``None``, it is inferred from the data
+                at the cost of one traversal through the data loader.
 
         Raises:
             NotImplementedError: If the loss function differs from ``MSELoss`` or
@@ -168,6 +171,7 @@ def __init__(
             data,
             progressbar=progressbar,
             check_deterministic=check_deterministic,
+            num_data=num_data,
         )
 
     def _matvec(self, x: ndarray) -> ndarray:

diff --git a/curvlinops/jacobian.py b/curvlinops/jacobian.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Callable, Iterable, List, Tuple
+from typing import Callable, Iterable, List, Optional, Tuple
 
 from backpack.hessianfree.lop import transposed_jacobian_vector_product as vjp
 from backpack.hessianfree.rop import jacobian_vector_product as jvp
@@ -26,6 +26,7 @@ def __init__(
         data: Iterable[Tuple[Tensor, Tensor]],
         progressbar: bool = False,
         check_deterministic: bool = True,
+        num_data: Optional[int] = None,
     ):
         r"""Linear operator for the Jacobian as SciPy linear operator.
 
@@ -52,8 +53,10 @@ def __init__(
             data: Iterable of batched input-target pairs.
             progressbar: Show progress bar.
             check_deterministic: Check if model and data are deterministic.
+            num_data: Number of data points. If ``None``, it is inferred from the data
+                at the cost of one traversal through the data loader.
         """
-        num_data = sum(t.shape[0] for t, _ in data)
+        num_data = sum(t.shape[0] for t, _ in data) if num_data is None else num_data
         x = next(iter(data))[0].to(self._infer_device(params))
         num_outputs = model_func(x).shape[1:].numel()
         num_params = sum(p.numel() for p in params)
@@ -65,6 +68,7 @@ def __init__(
             progressbar=progressbar,
             check_deterministic=check_deterministic,
             shape=(num_data * num_outputs, num_params),
+            num_data=num_data,
         )
 
     def _check_deterministic(self):
@@ -151,6 +155,7 @@ def __init__(
         data: Iterable[Tuple[Tensor, Tensor]],
         progressbar: bool = False,
         check_deterministic: bool = True,
+        num_data: Optional[int] = None,
     ):
         r"""Linear operator for the transpose Jacobian as SciPy linear operator.
 
@@ -177,8 +182,10 @@ def __init__(
             data: Iterable of batched input-target pairs.
             progressbar: Show progress bar.
             check_deterministic: Check if model and data are deterministic.
+            num_data: Number of data points. If ``None``, it is inferred from the data
+                at the cost of one traversal through the data loader.
         """
-        num_data = sum(t.shape[0] for t, _ in data)
+        num_data = sum(t.shape[0] for t, _ in data) if num_data is None else num_data
         x = next(iter(data))[0].to(self._infer_device(params))
         num_outputs = model_func(x).shape[1:].numel()
         num_params = sum(p.numel() for p in params)
@@ -190,6 +197,7 @@ def __init__(
             progressbar=progressbar,
             check_deterministic=check_deterministic,
             shape=(num_params, num_data * num_outputs),
+            num_data=num_data,
         )
 
     def _check_deterministic(self):

diff --git a/curvlinops/kfac.py b/curvlinops/kfac.py
@@ -20,7 +20,7 @@
 
 from functools import partial
 from math import sqrt
-from typing import Dict, Iterable, List, Set, Tuple, Union
+from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
 
 from einops import rearrange, reduce
 from numpy import ndarray
@@ -107,6 +107,7 @@ def __init__(
         kfac_approx: str = "expand",
         loss_average: Union[None, str] = "batch",
         separate_weight_and_bias: bool = True,
+        num_data: Optional[int] = None,
     ):
         """Kronecker-factored approximate curvature (KFAC) proxy of the Fisher/GGN.
 
@@ -165,6 +166,8 @@ def __init__(
                 consistently with the loss and the gradient. Default: ``"batch"``.
             separate_weight_and_bias: Whether to treat weights and biases separately.
                 Defaults to ``True``.
+            num_data: Number of data points. If ``None``, it is inferred from the data
+                at the cost of one traversal through the data loader.
 
         Raises:
             ValueError: If the loss function is not supported.
@@ -241,6 +244,7 @@ def __init__(
             progressbar=progressbar,
             check_deterministic=check_deterministic,
             shape=shape,
+            num_data=num_data,
         )
 
     def _matvec(self, x: ndarray) -> ndarray: