torch lazy loading (#206)

This PR finalizes the import refactoring such that `torch` (and the dependent packages `gpytorch`/`botorch`) are loaded lazily for improved package import speed. Further, new import tests ensure that: * all BayBE modules can be loaded without errors * `torch` is not eagerly loaded (to prevent regressions)
emdgroup · Apr 23, 2024 · 71d8c69 · 71d8c69
2 parents 725dca5 + a37dbef
commit 71d8c69
Show file tree

Hide file tree

Showing 12 changed files with 148 additions and 42 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,13 +15,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - `hypothesis` strategies and roundtrip test for kernels, constraints and objectives
 
 ### Changed
-- `torch` numeric types are now loaded lazily
 - Reorganized acquisition.py into `acquisition` subpackage
-- `torch` is imported lazily in `surrogates`
 - Acquisition functions are now their own objects
 - `acquisition_function_cls` constructor parameter renamed to `acquisition_function`
 - User guide now explains the new objective classes
 - Telemetry deactivation warning is only shown to developers
+- `torch`, `gpytorch` and `botorch` are lazy-loaded for improved startup time
 
 ### Removed
 - `model_params` attribute from `Surrogate` base class, `GaussianProcessSurrogate` and

diff --git a/baybe/acquisition/__init__.py b/baybe/acquisition/__init__.py
@@ -8,8 +8,6 @@
     qProbabilityOfImprovement,
     qUpperConfidenceBound,
 )
-from baybe.acquisition.adapter import AdapterModel, debotorchize
-from baybe.acquisition.partial import PartialAcquisitionFunction
 
 EI = ExpectedImprovement
 PI = ProbabilityOfImprovement
@@ -35,9 +33,4 @@
     "qEI",
     "qPI",
     "qUCB",
-    # ---------------------------
-    # Helpers
-    "debotorchize",
-    "AdapterModel",
-    "PartialAcquisitionFunction",
 ]
diff --git a/baybe/acquisition/base.py b/baybe/acquisition/base.py
@@ -8,7 +8,6 @@
 
 from attrs import define
 
-from baybe.acquisition.adapter import debotorchize
 from baybe.serialization.core import (
     converter,
     get_base_structure_hook,
@@ -29,6 +28,8 @@ def to_botorch(self, surrogate: Surrogate, best_f: float):
         """Create the botorch-ready representation of the function."""
         import botorch.acquisition as botorch_acquisition
 
+        from baybe.acquisition.adapter import debotorchize
+
         acqf_cls = getattr(botorch_acquisition, self.__class__.__name__)
 
         return debotorchize(acqf_cls)(surrogate, best_f)

diff --git a/baybe/recommenders/naive.py b/baybe/recommenders/naive.py
@@ -1,13 +1,11 @@
 """Naive recommender for hybrid spaces."""
 
 import warnings
-from typing import ClassVar, Optional, cast
+from typing import ClassVar, Optional
 
 import pandas as pd
 from attrs import define, evolve, field, fields
-from torch import Tensor
 
-from baybe.acquisition import PartialAcquisitionFunction
 from baybe.recommenders.pure.base import PureRecommender
 from baybe.recommenders.pure.bayesian.base import BayesianRecommender
 from baybe.recommenders.pure.bayesian.sequential_greedy import (
@@ -86,6 +84,8 @@ def recommend(  # noqa: D102
     ) -> pd.DataFrame:
         # See base class.
 
+        from baybe.acquisition.partial import PartialAcquisitionFunction
+
         if (not isinstance(self.disc_recommender, BayesianRecommender)) and (
             not isinstance(self.disc_recommender, NonPredictiveRecommender)
         ):
@@ -116,7 +116,7 @@ def recommend(  # noqa: D102
         # will then be attached to every discrete point when the acquisition function
         # is evaluated.
         cont_part = searchspace.continuous.samples_random(1)
-        cont_part_tensor = cast(Tensor, to_tensor(cont_part)).unsqueeze(-2)
+        cont_part_tensor = to_tensor(cont_part).unsqueeze(-2)
 
         # Get discrete candidates. The metadata flags are ignored since the search space
         # is hybrid
@@ -151,7 +151,7 @@ def recommend(  # noqa: D102
         # Get one random discrete point that will be attached when evaluating the
         # acquisition function in the discrete space.
         disc_part = searchspace.discrete.comp_rep.loc[disc_rec_idx].sample(1)
-        disc_part_tensor = cast(Tensor, to_tensor(disc_part)).unsqueeze(-2)
+        disc_part_tensor = to_tensor(disc_part).unsqueeze(-2)
 
         # Setup a fresh acquisition function for the continuous recommender
         self.cont_recommender._setup_botorch_acqf(searchspace, train_x, train_y)

diff --git a/baybe/recommenders/pure/bayesian/sequential_greedy.py b/baybe/recommenders/pure/bayesian/sequential_greedy.py
@@ -4,7 +4,6 @@
 
 import pandas as pd
 from attrs import define, field, validators
-from botorch.optim import optimize_acqf, optimize_acqf_discrete, optimize_acqf_mixed
 
 from baybe.exceptions import NoMCAcquisitionFunctionError
 from baybe.recommenders.pure.bayesian.base import BayesianRecommender
@@ -69,6 +68,8 @@ def _recommend_discrete(
     ) -> pd.Index:
         # See base class.
 
+        from botorch.optim import optimize_acqf_discrete
+
         # determine the next set of points to be tested
         candidates_tensor = to_tensor(candidates_comp)
         try:
@@ -102,7 +103,9 @@ def _recommend_continuous(
         batch_size: int,
     ) -> pd.DataFrame:
         # See base class.
+
         import torch
+        from botorch.optim import optimize_acqf
 
         try:
             points, _ = optimize_acqf(
@@ -161,6 +164,7 @@ def _recommend_hybrid(
                 is chosen.
         """
         import torch
+        from botorch.optim import optimize_acqf_mixed
 
         if len(candidates_comp) > 0:
             # Calculate the number of samples from the given percentage

diff --git a/baybe/surrogates/custom.py b/baybe/surrogates/custom.py
@@ -27,7 +27,6 @@
 from baybe.surrogates.utils import batchify, catch_constant_targets
 from baybe.surrogates.validation import validate_custom_architecture_cls
 from baybe.utils.numerical import DTypeFloatONNX
-from baybe.utils.torch import DTypeFloatTorch
 
 try:
     import onnxruntime as ort
@@ -156,6 +155,8 @@ def default_model(self) -> ort.InferenceSession:
         def _posterior(self, candidates: Tensor) -> tuple[Tensor, Tensor]:
             import torch
 
+            from baybe.utils.torch import DTypeFloatTorch
+
             model_inputs = {
                 self.onnx_input_name: candidates.numpy().astype(DTypeFloatONNX)
             }

diff --git a/baybe/surrogates/gaussian_process.py b/baybe/surrogates/gaussian_process.py
@@ -2,17 +2,9 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, ClassVar, Optional
+from typing import TYPE_CHECKING, ClassVar
 
 from attr import define, field
-from botorch import fit_gpytorch_mll
-from botorch.models import SingleTaskGP
-from botorch.models.transforms import Normalize, Standardize
-from gpytorch import ExactMarginalLogLikelihood
-from gpytorch.kernels import IndexKernel, ScaleKernel
-from gpytorch.likelihoods import GaussianLikelihood
-from gpytorch.means import ConstantMean
-from gpytorch.priors import GammaPrior
 
 from baybe.kernels import MaternKernel
 from baybe.kernels.base import Kernel
@@ -38,7 +30,9 @@ class GaussianProcessSurrogate(Surrogate):
     kernel: Kernel = field(factory=MaternKernel)
     """The kernel used by the Gaussian Process."""
 
-    _model: Optional[SingleTaskGP] = field(init=False, default=None)
+    # TODO: type should be Optional[botorch.models.SingleTaskGP] but is currently
+    #   omitted due to: https://github.com/python-attrs/cattrs/issues/531
+    _model = field(init=False, default=None)
     """The actual model."""
 
     def _posterior(self, candidates: Tensor) -> tuple[Tensor, Tensor]:
@@ -49,7 +43,10 @@ def _posterior(self, candidates: Tensor) -> tuple[Tensor, Tensor]:
     def _fit(self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor) -> None:
         # See base class.
 
+        import botorch
+        import gpytorch
         import torch
+        from gpytorch.priors import GammaPrior
 
         # identify the indexes of the task and numeric dimensions
         # TODO: generalize to multiple task parameters
@@ -63,10 +60,10 @@ def _fit(self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor) -> No
 
         # define the input and outcome transforms
         # TODO [Scaling]: scaling should be handled by search space object
-        input_transform = Normalize(
+        input_transform = botorch.models.transforms.Normalize(
             train_x.shape[1], bounds=bounds, indices=numeric_idxs
         )
-        outcome_transform = Standardize(train_y.shape[1])
+        outcome_transform = botorch.models.transforms.Standardize(train_y.shape[1])
 
         # ---------- GP prior selection ---------- #
         # TODO: temporary prior choices adapted from edbo, replace later on
@@ -105,7 +102,7 @@ def _fit(self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor) -> No
         batch_shape = train_x.shape[:-2]
 
         # create GP mean
-        mean_module = ConstantMean(batch_shape=batch_shape)
+        mean_module = gpytorch.means.ConstantMean(batch_shape=batch_shape)
 
         # define the covariance module for the numeric dimensions
         gpytorch_kernel = self.kernel.to_gpytorch(
@@ -114,7 +111,7 @@ def _fit(self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor) -> No
             batch_shape=batch_shape,
             lengthscale_prior=lengthscale_prior[0],
         )
-        base_covar_module = ScaleKernel(
+        base_covar_module = gpytorch.kernels.ScaleKernel(
             gpytorch_kernel,
             batch_shape=batch_shape,
             outputscale_prior=outputscale_prior[0],
@@ -130,22 +127,22 @@ def _fit(self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor) -> No
         if task_idx is None:
             covar_module = base_covar_module
         else:
-            task_covar_module = IndexKernel(
+            task_covar_module = gpytorch.kernels.IndexKernel(
                 num_tasks=searchspace.n_tasks,
                 active_dims=task_idx,
                 rank=searchspace.n_tasks,  # TODO: make controllable
             )
             covar_module = base_covar_module * task_covar_module
 
         # create GP likelihood
-        likelihood = GaussianLikelihood(
+        likelihood = gpytorch.likelihoods.GaussianLikelihood(
             noise_prior=noise_prior[0], batch_shape=batch_shape
         )
         if noise_prior[1] is not None:
             likelihood.noise = torch.tensor([noise_prior[1]])
 
         # construct and fit the Gaussian process
-        self._model = SingleTaskGP(
+        self._model = botorch.models.SingleTaskGP(
             train_x,
             train_y,
             input_transform=input_transform,
@@ -154,5 +151,5 @@ def _fit(self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor) -> No
             covar_module=covar_module,
             likelihood=likelihood,
         )
-        mll = ExactMarginalLogLikelihood(self._model.likelihood, self._model)
-        fit_gpytorch_mll(mll)
+        mll = gpytorch.ExactMarginalLogLikelihood(self._model.likelihood, self._model)
+        botorch.fit_gpytorch_mll(mll)
diff --git a/baybe/surrogates/utils.py b/baybe/surrogates/utils.py
@@ -5,13 +5,12 @@
 from functools import wraps
 from typing import TYPE_CHECKING, Callable, ClassVar
 
-import torch
-from torch import Tensor
-
 from baybe.scaler import DefaultScaler
 from baybe.searchspace import SearchSpace
 
 if TYPE_CHECKING:
+    from torch import Tensor
+
     from baybe.surrogates.base import Surrogate
 
 _MIN_TARGET_STD = 1e-6
@@ -90,6 +89,8 @@ def __init__(self, *args, **kwargs):
 
         def _posterior(self, candidates: Tensor) -> tuple[Tensor, Tensor]:
             """Call the posterior function of the internal model instance."""
+            import torch
+
             mean, var = self.model._posterior(candidates)
 
             # If a joint posterior is expected but the model has been overridden by one
@@ -105,6 +106,8 @@ def _fit(
             self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor
         ) -> None:
             """Select a model based on the variance of the targets and fits it."""
+            import torch
+
             from baybe.surrogates.naive import MeanPredictionSurrogate
 
             # https://github.com/pytorch/pytorch/issues/29372
@@ -232,6 +235,8 @@ def sequential_posterior(model: Surrogate, candidates: Tensor) -> [Tensor, Tenso
         Returns:
             The mean and the covariance.
         """
+        import torch
+
         # If no batch dimensions are given, call the model directly
         if candidates.ndim == 2:
             return posterior(model, candidates)

diff --git a/baybe/utils/dataframe.py b/baybe/utils/dataframe.py
@@ -3,12 +3,13 @@
 from __future__ import annotations
 
 import logging
-from collections.abc import Iterable, Sequence
+from collections.abc import Iterable, Iterator, Sequence
 from typing import (
     TYPE_CHECKING,
     Literal,
     Optional,
     Union,
+    overload,
 )
 
 import numpy as np
@@ -28,7 +29,17 @@
 _logger = logging.getLogger(__name__)
 
 
-def to_tensor(*dfs: pd.DataFrame) -> Union[Tensor, Iterable[Tensor]]:
+@overload
+def to_tensor(df: pd.DataFrame) -> Tensor:
+    ...
+
+
+@overload
+def to_tensor(*dfs: pd.DataFrame) -> Iterator[Tensor]:
+    ...
+
+
+def to_tensor(*dfs: pd.DataFrame) -> Union[Tensor, Iterator[Tensor]]:
     """Convert a given set of dataframes into tensors (dropping all indices).
 
     Args:

diff --git a/streamlit/surrogate_models.py b/streamlit/surrogate_models.py
@@ -15,7 +15,7 @@
 from funcy import rpartial
 
 import streamlit as st
-from baybe.acquisition import debotorchize
+from baybe.acquisition.adapter import debotorchize
 from baybe.parameters import NumericalDiscreteParameter
 from baybe.searchspace import SearchSpace
 from baybe.surrogates import get_available_surrogates