From 8f43d432816893b2f54575458348bb8c6ab9559f Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Mon, 22 Apr 2024 20:51:39 +0200
Subject: [PATCH 01/20] adapt to newest chemprop version + implement regressor

---
 molpipeline/estimators/chemprop/abstract.py   |   6 +-
 .../estimators/chemprop/component_wrapper.py  | 108 +++++++++++-------
 molpipeline/estimators/chemprop/models.py     |  10 +-
 .../test_chemprop/test_chemprop_pipeline.py   |  10 ++
 .../test_chemprop/test_component_wrapper.py   |  10 ++
 test_extras/test_chemprop/test_models.py      |  20 +++-
 6 files changed, 111 insertions(+), 53 deletions(-)

diff --git a/molpipeline/estimators/chemprop/abstract.py b/molpipeline/estimators/chemprop/abstract.py
index 262b5a60..5c586d90 100644
--- a/molpipeline/estimators/chemprop/abstract.py
+++ b/molpipeline/estimators/chemprop/abstract.py
@@ -13,10 +13,10 @@
 import numpy.typing as npt
 
 try:
-    from chemprop.data import MoleculeDataset, MolGraphDataLoader
+    from chemprop.data import MoleculeDataset, build_dataloader
     from chemprop.models.model import MPNN
     from lightning import pytorch as pl
-except ImportError:
+except ImportError as error:
     pass
 from sklearn.base import BaseEstimator
 
@@ -90,7 +90,7 @@ def fit(
         if y.ndim == 1:
             y = y.reshape(-1, 1)
         X.Y = y
-        training_data = MolGraphDataLoader(
+        training_data = build_dataloader(
             X, batch_size=self.batch_size, num_workers=self.n_jobs
         )
         self.lightning_trainer.fit(self.model, training_data)
diff --git a/molpipeline/estimators/chemprop/component_wrapper.py b/molpipeline/estimators/chemprop/component_wrapper.py
index 93e1581e..6c62d3a1 100644
--- a/molpipeline/estimators/chemprop/component_wrapper.py
+++ b/molpipeline/estimators/chemprop/component_wrapper.py
@@ -1,5 +1,6 @@
 """Wrapper classes for the chemprop components to make them compatible with scikit-learn."""
 
+import abc
 from typing import Any, Iterable, Self
 
 import torch
@@ -9,13 +10,18 @@
 from chemprop.nn.agg import MeanAggregation as _MeanAggregation
 from chemprop.nn.agg import SumAggregation as _SumAggregation
 from chemprop.nn.ffn import MLP
-from chemprop.nn.loss import LossFunction
+from chemprop.nn.loss import BCELoss, LossFunction, MSELoss
 from chemprop.nn.message_passing import BondMessagePassing as _BondMessagePassing
 from chemprop.nn.message_passing import MessagePassing
-from chemprop.nn.metrics import BCELoss, Metric
+from chemprop.nn.metrics import BinaryAUROCMetric, Metric, MSEMetric
 from chemprop.nn.predictors import BinaryClassificationFFN as _BinaryClassificationFFN
-from chemprop.nn.predictors import Predictor
+from chemprop.nn.predictors import RegressionFFN as _RegressionFFN
+from chemprop.nn.predictors import (
+    _FFNPredictorBase as _Predictor,  # pylint: disable=protected-access
+)
+from chemprop.nn.transforms import UnscaleTransform
 from chemprop.nn.utils import Activation, get_activation_function
+from chemprop.utils.registry import Factory
 from sklearn.base import BaseEstimator
 from torch import Tensor, nn
 
@@ -115,11 +121,11 @@ def set_params(self, **params: Any) -> Self:
 
 
 # pylint: disable=too-many-ancestors, too-many-instance-attributes
-class BinaryClassificationFFN(_BinaryClassificationFFN, BaseEstimator):
-    """A wrapper for the BinaryClassificationFFN class."""
+class PredictorWrapper(_Predictor, BaseEstimator, abc.ABC):  # type: ignore
+    """Abstract wrapper for the Predictor class."""
 
-    n_targets: int = 1
-    _default_criterion = BCELoss()
+    _T_default_criterion: LossFunction
+    _T_default_metric: Metric
 
     def __init__(
         self,
@@ -130,6 +136,9 @@ def __init__(
         dropout: float = 0,
         activation: str = "relu",
         criterion: LossFunction | None = None,
+        task_weights: Tensor | None = None,
+        threshold: float | None = None,
+        output_transform: UnscaleTransform | None = None,
     ):
         """Initialize the BinaryClassificationFFN class.
 
@@ -150,6 +159,13 @@ def __init__(
         criterion : LossFunction or None, optional (default=None)
             Loss function. None defaults to BCELoss.
         """
+        if criterion is None:
+            task_weights = torch.ones(n_tasks) if task_weights is None else task_weights
+            criterion = Factory.build(
+                self._T_default_criterion,
+                task_weights=task_weights,
+                threshold=threshold,
+            )
         super().__init__(
             n_tasks=n_tasks,
             input_dim=input_dim,
@@ -158,6 +174,7 @@ def __init__(
             dropout=dropout,
             activation=activation,
             criterion=criterion,
+            output_transform=output_transform,
         )
         self.n_tasks = n_tasks
         self._input_dim = input_dim
@@ -165,6 +182,8 @@ def __init__(
         self.n_layers = n_layers
         self.dropout = dropout
         self.activation = activation
+        self.task_weights = task_weights
+        self.threshold = threshold
 
     @property
     def input_dim(self) -> int:
@@ -218,13 +237,13 @@ def reinitialize_fnn(self) -> Self:
         Self
             The reinitialized feedforward network.
         """
-        self.ffn = MLP(
-            self.input_dim,
-            self.n_tasks * self.n_targets,
-            self.hidden_dim,
-            self.n_layers,
-            self.dropout,
-            self.activation,
+        self.ffn = MLP.build(
+            input_dim=self.input_dim,
+            output_dim=self.n_tasks * self.n_targets,
+            hidden_dim=self.hidden_dim,
+            n_layers=self.n_layers,
+            dropout=self.dropout,
+            activation=self.activation,
         )
         return self
 
@@ -246,6 +265,22 @@ def set_params(self, **params: Any) -> Self:
         return self
 
 
+class BinaryClassificationFFN(PredictorWrapper, _BinaryClassificationFFN):  # type: ignore
+    """A wrapper for the BinaryClassificationFFN class."""
+
+    n_targets: int = 1
+    _T_default_criterion = BCELoss
+    _T_default_metric = BinaryAUROCMetric
+
+
+class RegressionFFN(PredictorWrapper, _RegressionFFN):  # type: ignore
+    """A wrapper for the RegressionFFN class."""
+
+    n_targets: int = 1
+    _T_default_criterion = MSELoss
+    _T_default_metric = MSEMetric
+
+
 class MPNN(_MPNN, BaseEstimator):
     """A wrapper for the MPNN class.
 
@@ -253,14 +288,15 @@ class MPNN(_MPNN, BaseEstimator):
     and a feedforward network for prediction.
     """
 
+    bn: nn.BatchNorm1d | nn.Identity
+
     def __init__(
         self,
         message_passing: MessagePassing,
         agg: Aggregation,
-        predictor: Predictor,
+        predictor: PredictorWrapper,
         batch_norm: bool = True,
         metric_list: Iterable[Metric] | None = None,
-        task_weight: Tensor | None = None,
         warmup_epochs: int = 2,
         init_lr: float = 1e-4,
         max_lr: float = 1e-3,
@@ -280,8 +316,6 @@ def __init__(
             Whether to use batch normalization.
         metric_list : Iterable[Metric] | None, optional (default=None)
             The metrics to use for evaluation.
-        task_weight : Tensor | None, optional (default=None)
-            The weights to use for each task during training. If None, use uniform weights.
         warmup_epochs : int, optional (default=2)
             The number of epochs to use for the learning rate warmup.
         init_lr : float, optional (default=1e-4)
@@ -292,20 +326,18 @@ def __init__(
             The final learning rate.
         """
         super().__init__(
-            message_passing,
-            agg,
-            predictor,
-            batch_norm,
-            metric_list,
-            task_weight,
-            warmup_epochs,
-            init_lr,
-            max_lr,
-            final_lr,
+            message_passing=message_passing,
+            agg=agg,
+            predictor=predictor,
+            batch_norm=batch_norm,
+            metrics=metric_list,
+            warmup_epochs=warmup_epochs,
+            init_lr=init_lr,
+            max_lr=max_lr,
+            final_lr=final_lr,
         )
         self.metric_list = metric_list
         self.batch_norm = batch_norm
-        self.task_weight = task_weight
 
     def reinitialize_network(self) -> Self:
         """Reinitialize the network with the current parameters.
@@ -315,21 +347,17 @@ def reinitialize_network(self) -> Self:
         Self
             The reinitialized network.
         """
-        self.bn = (
-            nn.BatchNorm1d(self.message_passing.output_dim)
-            if self.batch_norm
-            else nn.Identity()
-        )
+        if self.batch_norm:
+            self.bn = nn.BatchNorm1d(self.message_passing.output_dim)
+        else:
+            self.bn = nn.Identity()
+
         if self.metric_list is None:
             # pylint: disable=protected-access
-            self.metrics = [self.predictor._default_metric, self.criterion]
+            self.metrics = [self.predictor._T_default_metric, self.criterion]
         else:
             self.metrics = list(self.metric_list) + [self.criterion]
-        if self.task_weight is None:
-            w_t = torch.ones(self.n_tasks)
-        else:
-            w_t = torch.tensor(self.task_weight)
-        self.w_t = nn.Parameter(w_t.unsqueeze(0), False)
+
         return self
 
     def set_params(self, **params: Any) -> Self:
diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py
index 6a74cc87..9ed0312a 100644
--- a/molpipeline/estimators/chemprop/models.py
+++ b/molpipeline/estimators/chemprop/models.py
@@ -13,7 +13,7 @@
 from sklearn.utils.metaestimators import available_if
 
 try:
-    from chemprop.data import MoleculeDataset, MolGraphDataLoader
+    from chemprop.data import MoleculeDataset, build_dataloader
     from chemprop.nn.predictors import (
         BinaryClassificationFFNBase,
         MulticlassClassificationFFN,
@@ -86,9 +86,9 @@ def _predict(
             The predictions for the input data.
         """
         self.model.eval()
-        test_data = MolGraphDataLoader(X, num_workers=self.n_jobs, shuffle=False)
+        test_data = build_dataloader(X, num_workers=self.n_jobs, shuffle=False)
         predictions = self.lightning_trainer.predict(self.model, test_data)
-        prediction_array = np.array([pred.numpy() for pred in predictions])  # type: ignore
+        prediction_array = np.array([pred.numpy() for pred in predictions[0]])  # type: ignore
 
         # Check if the predictions have the same length as the input dataset
         if prediction_array.shape[0] != len(X):
@@ -98,11 +98,11 @@ def _predict(
 
         # If the model is a binary classifier, return the probability of the positive class
         if self._is_binary_classifier():
-            if prediction_array.shape[1] != 1 or prediction_array.shape[2] != 1:
+            if prediction_array.shape[1] != 1:
                 raise ValueError(
                     "Binary classification model should output a single probability."
                 )
-            prediction_array = prediction_array[:, 0, 0]
+            prediction_array = prediction_array[:, 0]
         return prediction_array
 
     def predict(
diff --git a/test_extras/test_chemprop/test_chemprop_pipeline.py b/test_extras/test_chemprop/test_chemprop_pipeline.py
index 31de13e6..24cfc8f4 100644
--- a/test_extras/test_chemprop/test_chemprop_pipeline.py
+++ b/test_extras/test_chemprop/test_chemprop_pipeline.py
@@ -3,7 +3,9 @@
 import unittest
 
 import numpy as np
+from chemprop.nn.loss import LossFunction
 from sklearn.base import clone
+from torch import nn
 
 from molpipeline.any2mol import SmilesToMol
 from molpipeline.error_handling import ErrorFilter, FilterReinserter
@@ -106,6 +108,14 @@ def test_clone(self) -> None:
                     self.assertEqual(
                         param.__class__, cloned_params[param_name].__class__
                     )
+                elif isinstance(param, LossFunction):
+                    self.assertEqual(
+                        param.state_dict()["task_weights"],
+                        cloned_params[param_name].state_dict()["task_weights"],
+                    )
+                    self.assertEqual(type(param), type(cloned_params[param_name]))
+                elif isinstance(param, nn.Identity):
+                    self.assertEqual(type(param), type(cloned_params[param_name]))
                 else:
                     self.assertEqual(
                         param, cloned_params[param_name], f"Failed for {param_name}"
diff --git a/test_extras/test_chemprop/test_component_wrapper.py b/test_extras/test_chemprop/test_component_wrapper.py
index 02dcdd54..53a901aa 100644
--- a/test_extras/test_chemprop/test_component_wrapper.py
+++ b/test_extras/test_chemprop/test_component_wrapper.py
@@ -2,7 +2,9 @@
 
 import unittest
 
+from chemprop.nn.loss import LossFunction
 from sklearn.base import clone
+from torch import nn
 
 from molpipeline.estimators.chemprop.component_wrapper import (
     MPNN,
@@ -113,6 +115,14 @@ def test_clone(self) -> None:
             clone_param = mpnn_clone.get_params(deep=True)[param_name]
             if hasattr(param, "get_params"):
                 self.assertEqual(param.__class__, clone_param.__class__)
+            elif isinstance(param, LossFunction):
+                self.assertEqual(
+                    param.state_dict()["task_weights"],
+                    clone_param.state_dict()["task_weights"],
+                )
+                self.assertEqual(type(param), type(clone_param))
+            elif isinstance(param, nn.Identity):
+                self.assertEqual(type(param), type(clone_param))
             else:
                 self.assertEqual(param, clone_param)
 
diff --git a/test_extras/test_chemprop/test_models.py b/test_extras/test_chemprop/test_models.py
index f3a88e09..5a12625c 100644
--- a/test_extras/test_chemprop/test_models.py
+++ b/test_extras/test_chemprop/test_models.py
@@ -3,8 +3,10 @@
 import logging
 import unittest
 
+from chemprop.nn.loss import LossFunction
 from lightning import pytorch as pl
 from sklearn.base import clone
+from torch import nn
 
 from molpipeline.estimators.chemprop.component_wrapper import (
     MPNN,
@@ -46,7 +48,6 @@ def test_get_params(self) -> None:
         """Test the get_params and set_params methods."""
         chemprop_model = get_model()
         orig_params = chemprop_model.get_params(deep=True)
-
         expected_params = {
             "batch_size": 64,
             "lightning_trainer": pl.Trainer,
@@ -60,7 +61,7 @@ def test_get_params(self) -> None:
             "model__message_passing__bias": False,
             "model__message_passing__d_e": 14,
             "model__message_passing__d_h": 300,
-            "model__message_passing__d_v": 133,
+            "model__message_passing__d_v": 72,
             "model__message_passing__d_vd": None,
             "model__message_passing__depth": 3,
             "model__message_passing__dropout_rate": 0.0,
@@ -79,7 +80,9 @@ def test_get_params(self) -> None:
                     raise ValueError(f"{param_name} should be a type.")
                 self.assertIsInstance(orig_params[param_name], param)
             else:
-                self.assertEqual(orig_params[param_name], param)
+                self.assertEqual(
+                    orig_params[param_name], param, f"Test failed for {param_name}"
+                )
 
         new_params = {
             "batch_size": 32,
@@ -102,7 +105,6 @@ def test_clone(self) -> None:
         cloned_model = clone(chemprop_model)
         self.assertIsInstance(cloned_model, ChempropModel)
         cloned_model_params = cloned_model.get_params(deep=True)
-
         for param_name, param in chemprop_model.get_params(deep=True).items():
             cloned_param = cloned_model_params[param_name]
             if hasattr(param, "get_params"):
@@ -110,8 +112,16 @@ def test_clone(self) -> None:
                 self.assertNotEqual(id(param), id(cloned_param))
             elif isinstance(param, pl.Trainer):
                 self.assertIsInstance(cloned_param, pl.Trainer)
+            elif isinstance(param, LossFunction):
+                self.assertEqual(
+                    param.state_dict()["task_weights"],
+                    cloned_param.state_dict()["task_weights"],
+                )
+                self.assertEqual(type(param), type(cloned_param))
+            elif isinstance(param, nn.Identity):
+                self.assertEqual(type(param), type(cloned_param))
             else:
-                self.assertEqual(param, cloned_param)
+                self.assertEqual(param, cloned_param, f"Test failed for {param_name}")
 
     def test_classifier_methods(self) -> None:
         """Test the classifier methods."""

From 0cf0eda1e2e8f5fe14e11a69071ad5e40b38b2ad Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Mon, 22 Apr 2024 20:53:31 +0200
Subject: [PATCH 02/20] remove unused variable

---
 molpipeline/estimators/chemprop/abstract.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/molpipeline/estimators/chemprop/abstract.py b/molpipeline/estimators/chemprop/abstract.py
index 5c586d90..c9018cac 100644
--- a/molpipeline/estimators/chemprop/abstract.py
+++ b/molpipeline/estimators/chemprop/abstract.py
@@ -16,7 +16,7 @@
     from chemprop.data import MoleculeDataset, build_dataloader
     from chemprop.models.model import MPNN
     from lightning import pytorch as pl
-except ImportError as error:
+except ImportError:
     pass
 from sklearn.base import BaseEstimator
 

From 07843260cade52287783042fb68fbb845e9f18f1 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Mon, 22 Apr 2024 20:55:01 +0200
Subject: [PATCH 03/20] add missing params to docstring

---
 molpipeline/estimators/chemprop/component_wrapper.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/molpipeline/estimators/chemprop/component_wrapper.py b/molpipeline/estimators/chemprop/component_wrapper.py
index 6c62d3a1..b2d41068 100644
--- a/molpipeline/estimators/chemprop/component_wrapper.py
+++ b/molpipeline/estimators/chemprop/component_wrapper.py
@@ -158,6 +158,12 @@ def __init__(
             Activation function.
         criterion : LossFunction or None, optional (default=None)
             Loss function. None defaults to BCELoss.
+        task_weights : Tensor or None, optional (default=None)
+            Task weights.
+        threshold : float or None, optional (default=None)
+            Threshold for binary classification.
+        output_transform : UnscaleTransform or None, optional (default=None)
+            Transformations to apply to the output. None defaults to UnscaleTransform.
         """
         if criterion is None:
             task_weights = torch.ones(n_tasks) if task_weights is None else task_weights

From fb29c7a38f391e8a7b973907947ef22520306b0d Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Mon, 22 Apr 2024 20:57:14 +0200
Subject: [PATCH 04/20] remove link to custom repo and use default repo instead

---
 requirements_chemprop.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements_chemprop.txt b/requirements_chemprop.txt
index 41ccfc6a..81d53ece 100644
--- a/requirements_chemprop.txt
+++ b/requirements_chemprop.txt
@@ -1,2 +1,2 @@
-chemprop @ https://github.com//c-w-feldmann/chemprop/archive/v2/molpipeline_requirement.zip
+chemprop
 lightning < 1000
\ No newline at end of file

From 95aca5f1c0adab39655f95d8c18b893f6420e908 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Tue, 23 Apr 2024 21:23:30 +0200
Subject: [PATCH 05/20] implement ChempropRegressor

---
 molpipeline/estimators/chemprop/models.py | 41 +++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py
index 9ed0312a..f3675395 100644
--- a/molpipeline/estimators/chemprop/models.py
+++ b/molpipeline/estimators/chemprop/models.py
@@ -28,6 +28,7 @@
     MPNN,
     BinaryClassificationFFN,
     BondMessagePassing,
+    RegressionFFN,
     SumAggregation,
 )
 from molpipeline.estimators.chemprop.neural_fingerprint import ChempropNeuralFP
@@ -227,3 +228,43 @@ def set_params(self, **params: Any) -> Self:
         if not self._is_binary_classifier():
             raise ValueError("ChempropClassifier should be a binary classifier.")
         return self
+
+
+class ChempropRegressor(ChempropModel):
+    """Chemprop model with default parameters for regression tasks."""
+
+    def __init__(
+        self,
+        model: MPNN | None = None,
+        lightning_trainer: pl.Trainer | None = None,
+        batch_size: int = 64,
+        n_jobs: int = 1,
+        **kwargs: Any,  # pylint: disable=unused-argument
+    ) -> None:
+        """Initialize the chemprop regressor model.
+
+        Parameters
+        ----------
+        model : MPNN | None, optional
+            The chemprop model to wrap. If None, a default model will be used.
+        lightning_trainer : pl.Trainer, optional
+            The lightning trainer to use, by default None
+        batch_size : int, optional (default=64)
+            The batch size to use.
+        n_jobs : int, optional (default=1)
+            The number of jobs to use.
+        kwargs : Any
+            Parameters set using `set_params`.
+            Can be used to modify components of the model.
+        """
+        if model is None:
+            bond_encoder = BondMessagePassing()
+            agg = SumAggregation()
+            predictor = RegressionFFN()
+            model = MPNN(message_passing=bond_encoder, agg=agg, predictor=predictor)
+        super().__init__(
+            model=model,
+            lightning_trainer=lightning_trainer,
+            batch_size=batch_size,
+            n_jobs=n_jobs,
+        )

From 10ec6d60ce1f1aa1b20332f3b7587ef2ed7a18a0 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Tue, 23 Apr 2024 21:23:49 +0200
Subject: [PATCH 06/20] improve docstring

---
 molpipeline/estimators/chemprop/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py
index f3675395..1924f669 100644
--- a/molpipeline/estimators/chemprop/models.py
+++ b/molpipeline/estimators/chemprop/models.py
@@ -171,7 +171,7 @@ def to_encoder(self) -> ChempropNeuralFP:
 
 
 class ChempropClassifier(ChempropModel):
-    """Wrap Chemprop in a sklearn like classifier."""
+    """Chemprop model with default parameters for binary classification tasks."""
 
     def __init__(
         self,

From 64b7cc2c085cae7d87a72accd2b111eaf8332290 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Tue, 23 Apr 2024 21:50:10 +0200
Subject: [PATCH 07/20] add tests for chemprop

---
 test_extras/test_chemprop/test_models.py | 102 +++++++++++++++++++----
 1 file changed, 87 insertions(+), 15 deletions(-)

diff --git a/test_extras/test_chemprop/test_models.py b/test_extras/test_chemprop/test_models.py
index 5a12625c..4df3839b 100644
--- a/test_extras/test_chemprop/test_models.py
+++ b/test_extras/test_chemprop/test_models.py
@@ -3,19 +3,25 @@
 import logging
 import unittest
 
-from chemprop.nn.loss import LossFunction
+from chemprop.nn.loss import LossFunction, BCELoss, MSELoss
 from lightning import pytorch as pl
 from sklearn.base import clone
 from torch import nn
+from torch import Tensor
 
 from molpipeline.estimators.chemprop.component_wrapper import (
     MPNN,
     BinaryClassificationFFN,
+    RegressionFFN,
     BondMessagePassing,
     MeanAggregation,
     SumAggregation,
 )
-from molpipeline.estimators.chemprop.models import ChempropModel
+from molpipeline.estimators.chemprop.models import (
+    ChempropClassifier,
+    ChempropModel,
+    ChempropRegressor,
+)
 from molpipeline.estimators.chemprop.neural_fingerprint import ChempropNeuralFP
 
 logging.getLogger("lightning.pytorch.utilities.rank_zero").setLevel(logging.WARNING)
@@ -41,16 +47,10 @@ def get_model() -> ChempropModel:
     return chemprop_model
 
 
-class TestChempropModel(unittest.TestCase):
-    """Test the Chemprop model."""
-
-    def test_get_params(self) -> None:
-        """Test the get_params and set_params methods."""
-        chemprop_model = get_model()
-        orig_params = chemprop_model.get_params(deep=True)
-        expected_params = {
+DEFAULT_PARAMS = {
             "batch_size": 64,
             "lightning_trainer": pl.Trainer,
+            "model": MPNN,
             "model__agg__dim": 0,
             "model__agg": SumAggregation,
             "model__batch_norm": True,
@@ -67,15 +67,45 @@ def test_get_params(self) -> None:
             "model__message_passing__dropout_rate": 0.0,
             "model__message_passing__undirected": False,
             "model__message_passing": BondMessagePassing,
+            "model__metric_list": None,
+            "model__predictor__activation": "relu",
+            "model__warmup_epochs": 2,
+            "model__predictor": BinaryClassificationFFN,
+            "model__predictor__criterion": BCELoss,
+            "model__predictor__dropout": 0,
+            "model__predictor__hidden_dim": 300,
+            "model__predictor__input_dim": 300,
+            "model__predictor__n_layers": 1,
+            "model__predictor__n_tasks": 1,
+            "model__predictor__output_transform": nn.Identity,
+            "model__predictor__task_weights": Tensor([1.0]),
+            "model__predictor__threshold": None,
+            "n_jobs": 1,
         }
 
+NO_IDENTITY_CHECK = [
+    "model__agg",
+    "model__message_passing",
+    "lightning_trainer",
+    "model",
+    "model__predictor",
+    "model__predictor__criterion",
+    "model__predictor__output_transform",
+]
+
+class TestChempropModel(unittest.TestCase):
+    """Test the Chemprop model."""
+
+    def test_get_params(self) -> None:
+        """Test the get_params and set_params methods."""
+        chemprop_model = get_model()
+        orig_params = chemprop_model.get_params(deep=True)
+        expected_params = dict(DEFAULT_PARAMS)  # Shallow copy
+
+        self.assertSetEqual(set(orig_params), set(expected_params))
         # Check if the parameters are as expected
         for param_name, param in expected_params.items():
-            if param_name in [
-                "model__agg",
-                "model__message_passing",
-                "lightning_trainer",
-            ]:
+            if param_name in NO_IDENTITY_CHECK:
                 if not isinstance(param, type):
                     raise ValueError(f"{param_name} should be a type.")
                 self.assertIsInstance(orig_params[param_name], param)
@@ -141,3 +171,45 @@ def test_neural_fp(self) -> None:
         # the model should be cloned
         self.assertNotEqual(id(chemprop_model.model), id(neural_fp.model))
         self.assertEqual(neural_fp.disable_fitting, True)
+
+
+class TestChempropClassifier(unittest.TestCase):
+    """Test the Chemprop classifier model."""
+
+    def test_get_params(self) -> None:
+        """Test the get_params and set_params methods."""
+        chemprop_model = ChempropClassifier()
+        param_dict = chemprop_model.get_params(deep=True)
+        expected_params = dict(DEFAULT_PARAMS)  # Shallow copy
+        self.assertSetEqual(set(param_dict.keys()), set(expected_params.keys()))
+        for param_name, param in expected_params.items():
+            if param_name in NO_IDENTITY_CHECK:
+                if not isinstance(param, type):
+                    raise ValueError(f"{param_name} should be a type.")
+                self.assertIsInstance(param_dict[param_name], param)
+            else:
+                self.assertEqual(
+                    param_dict[param_name], param, f"Test failed for {param_name}"
+                )
+
+
+class TestChempropRegressor(unittest.TestCase):
+    """Test the Chemprop regressor model."""
+
+    def test_get_params(self) -> None:
+        """Test the get_params and set_params methods."""
+        chemprop_model = ChempropRegressor()
+        param_dict = chemprop_model.get_params(deep=True)
+        expected_params = dict(DEFAULT_PARAMS)
+        expected_params["model__predictor"] = RegressionFFN
+        expected_params["model__predictor__criterion"] = MSELoss
+        self.assertSetEqual(set(param_dict.keys()), set(expected_params.keys()))
+        for param_name, param in expected_params.items():
+            if param_name in NO_IDENTITY_CHECK:
+                if not isinstance(param, type):
+                    raise ValueError(f"{param_name} should be a type.")
+                self.assertIsInstance(param_dict[param_name], param)
+            else:
+                self.assertEqual(
+                    param_dict[param_name], param, f"Test failed for {param_name}"
+                )

From d00aa3e8cd309897f4396a8a4547401f6e3963a7 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Tue, 23 Apr 2024 21:50:18 +0200
Subject: [PATCH 08/20] fix requirements

---
 requirements_chemprop.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements_chemprop.txt b/requirements_chemprop.txt
index 81d53ece..0bd7e545 100644
--- a/requirements_chemprop.txt
+++ b/requirements_chemprop.txt
@@ -1,2 +1,2 @@
 chemprop
-lightning < 1000
\ No newline at end of file
+lightning
\ No newline at end of file

From cdeba6d032507fd11f4eca99fcd5d891bb4e5405 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Tue, 23 Apr 2024 21:54:19 +0200
Subject: [PATCH 09/20] unify workflows

---
 .github/workflows/linting.yml   | 67 ++++++++++++++++++++++++++++++++-
 .github/workflows/unittests.yml | 56 ---------------------------
 2 files changed, 66 insertions(+), 57 deletions(-)
 delete mode 100644 .github/workflows/unittests.yml

diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index 7986de7f..df576a14 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -30,7 +30,6 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install $(find . -name "requirement*" -type f -printf ' -r %p')
         pip install mypy
         mypy . || exit_code=$?
         mypy --install-types --non-interactive
@@ -148,3 +147,69 @@ jobs:
       - name: Analysing the code with isort
         run: |
           isort --profile black .
+
+  test_basis:
+    needs:
+        - pylint
+        - mypy
+        - pydocstyle
+        - docsig
+        - black
+        - flake8
+        - interrogate
+        - bandit
+        - isort
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install package
+        run: |
+          python -m pip install --upgrade pip
+          pip install .
+      - name: Run unit-tests
+        run: |
+          python -m unittest discover -v -s tests -t .
+
+  test_chemprop:
+    needs:
+      - test_basis
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v3
+        with:
+          python-version: "3.11"
+      - name: Install package
+        run: |
+          python -m pip install --upgrade pip
+          pip install torch
+          pip install .[chemprop]
+      - name: Run unit-tests for chemprop
+        run: |
+          python -m unittest discover -v -s test_extras/test_chemprop -t .
+
+  test_notebooks:
+    needs:
+      - test_basis
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v3
+        with:
+          python-version: "3.11"
+      - name: Install package
+        run: |
+          python -m pip install --upgrade pip
+          pip install .[notebooks]
+      - name: Run unit-tests for notebooks
+        run: |
+          python test_extras/test_notebooks/test_notebooks.py  --continue-on-failure
diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
deleted file mode 100644
index 1faaf0f4..00000000
--- a/.github/workflows/unittests.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-name: Unit-Tests
-
-on: [push]
-
-jobs:
-  test_basis:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.10", "3.11", "3.12"]
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v3
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install package
-        run: |
-          python -m pip install --upgrade pip
-          pip install .
-      - name: Run unit-tests
-        run: |
-          python -m unittest discover -v -s tests -t .
-
-  test_chemprop:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python 3.11
-        uses: actions/setup-python@v3
-        with:
-          python-version: "3.11"
-      - name: Install package
-        run: |
-          python -m pip install --upgrade pip
-          pip install torch
-          pip install .[chemprop]
-      - name: Run unit-tests for chemprop
-        run: |
-          python -m unittest discover -v -s test_extras/test_chemprop -t .
-
-  test_notebooks:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python 3.11
-        uses: actions/setup-python@v3
-        with:
-          python-version: "3.11"
-      - name: Install package
-        run: |
-          python -m pip install --upgrade pip
-          pip install .[notebooks]
-      - name: Run unit-tests for notebooks
-        run: |
-          python test_extras/test_notebooks/test_notebooks.py  --continue-on-failure

From f39d951a747bf5e4fd4c25e47285234d43211f71 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Tue, 23 Apr 2024 21:55:38 +0200
Subject: [PATCH 10/20] isort + black

---
 test_extras/test_chemprop/test_models.py | 76 ++++++++++++------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/test_extras/test_chemprop/test_models.py b/test_extras/test_chemprop/test_models.py
index 4df3839b..7445102a 100644
--- a/test_extras/test_chemprop/test_models.py
+++ b/test_extras/test_chemprop/test_models.py
@@ -3,18 +3,17 @@
 import logging
 import unittest
 
-from chemprop.nn.loss import LossFunction, BCELoss, MSELoss
+from chemprop.nn.loss import BCELoss, LossFunction, MSELoss
 from lightning import pytorch as pl
 from sklearn.base import clone
-from torch import nn
-from torch import Tensor
+from torch import Tensor, nn
 
 from molpipeline.estimators.chemprop.component_wrapper import (
     MPNN,
     BinaryClassificationFFN,
-    RegressionFFN,
     BondMessagePassing,
     MeanAggregation,
+    RegressionFFN,
     SumAggregation,
 )
 from molpipeline.estimators.chemprop.models import (
@@ -48,40 +47,40 @@ def get_model() -> ChempropModel:
 
 
 DEFAULT_PARAMS = {
-            "batch_size": 64,
-            "lightning_trainer": pl.Trainer,
-            "model": MPNN,
-            "model__agg__dim": 0,
-            "model__agg": SumAggregation,
-            "model__batch_norm": True,
-            "model__final_lr": 0.0001,
-            "model__init_lr": 0.0001,
-            "model__max_lr": 0.001,
-            "model__message_passing__activation": "relu",
-            "model__message_passing__bias": False,
-            "model__message_passing__d_e": 14,
-            "model__message_passing__d_h": 300,
-            "model__message_passing__d_v": 72,
-            "model__message_passing__d_vd": None,
-            "model__message_passing__depth": 3,
-            "model__message_passing__dropout_rate": 0.0,
-            "model__message_passing__undirected": False,
-            "model__message_passing": BondMessagePassing,
-            "model__metric_list": None,
-            "model__predictor__activation": "relu",
-            "model__warmup_epochs": 2,
-            "model__predictor": BinaryClassificationFFN,
-            "model__predictor__criterion": BCELoss,
-            "model__predictor__dropout": 0,
-            "model__predictor__hidden_dim": 300,
-            "model__predictor__input_dim": 300,
-            "model__predictor__n_layers": 1,
-            "model__predictor__n_tasks": 1,
-            "model__predictor__output_transform": nn.Identity,
-            "model__predictor__task_weights": Tensor([1.0]),
-            "model__predictor__threshold": None,
-            "n_jobs": 1,
-        }
+    "batch_size": 64,
+    "lightning_trainer": pl.Trainer,
+    "model": MPNN,
+    "model__agg__dim": 0,
+    "model__agg": SumAggregation,
+    "model__batch_norm": True,
+    "model__final_lr": 0.0001,
+    "model__init_lr": 0.0001,
+    "model__max_lr": 0.001,
+    "model__message_passing__activation": "relu",
+    "model__message_passing__bias": False,
+    "model__message_passing__d_e": 14,
+    "model__message_passing__d_h": 300,
+    "model__message_passing__d_v": 72,
+    "model__message_passing__d_vd": None,
+    "model__message_passing__depth": 3,
+    "model__message_passing__dropout_rate": 0.0,
+    "model__message_passing__undirected": False,
+    "model__message_passing": BondMessagePassing,
+    "model__metric_list": None,
+    "model__predictor__activation": "relu",
+    "model__warmup_epochs": 2,
+    "model__predictor": BinaryClassificationFFN,
+    "model__predictor__criterion": BCELoss,
+    "model__predictor__dropout": 0,
+    "model__predictor__hidden_dim": 300,
+    "model__predictor__input_dim": 300,
+    "model__predictor__n_layers": 1,
+    "model__predictor__n_tasks": 1,
+    "model__predictor__output_transform": nn.Identity,
+    "model__predictor__task_weights": Tensor([1.0]),
+    "model__predictor__threshold": None,
+    "n_jobs": 1,
+}
 
 NO_IDENTITY_CHECK = [
     "model__agg",
@@ -93,6 +92,7 @@ def get_model() -> ChempropModel:
     "model__predictor__output_transform",
 ]
 
+
 class TestChempropModel(unittest.TestCase):
     """Test the Chemprop model."""
 

From e73ba53c6a2c1f218803513b5d8caea697eae392 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Tue, 23 Apr 2024 22:17:44 +0200
Subject: [PATCH 11/20] improved logging

---
 molpipeline/estimators/chemprop/models.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py
index 1924f669..b88fe1d3 100644
--- a/molpipeline/estimators/chemprop/models.py
+++ b/molpipeline/estimators/chemprop/models.py
@@ -7,6 +7,7 @@
 except ImportError:
     from typing_extensions import Self
 
+from loguru import logger
 import numpy as np
 import numpy.typing as npt
 from sklearn.base import clone
@@ -19,8 +20,11 @@
         MulticlassClassificationFFN,
     )
     from lightning import pytorch as pl
-except ImportError:
-    pass
+except ImportError as error:
+    logger.error(
+        "Chemprop is not installed. Please install it using `pip install chemprop`."
+    )
+    logger.info(error)
 
 
 from molpipeline.estimators.chemprop.abstract import ABCChemprop

From 880f55cf72414f962758e01bac2cdf0e082dcb7e Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Tue, 23 Apr 2024 22:18:08 +0200
Subject: [PATCH 12/20] isort

---
 molpipeline/estimators/chemprop/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py
index b88fe1d3..5f301f08 100644
--- a/molpipeline/estimators/chemprop/models.py
+++ b/molpipeline/estimators/chemprop/models.py
@@ -7,9 +7,9 @@
 except ImportError:
     from typing_extensions import Self
 
-from loguru import logger
 import numpy as np
 import numpy.typing as npt
+from loguru import logger
 from sklearn.base import clone
 from sklearn.utils.metaestimators import available_if
 

From c9c74cb8f0a56ff699f1eb504cc80d6264eac5f4 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Tue, 23 Apr 2024 22:18:49 +0200
Subject: [PATCH 13/20] update init

---
 molpipeline/estimators/chemprop/__init__.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/molpipeline/estimators/chemprop/__init__.py b/molpipeline/estimators/chemprop/__init__.py
index 4c32d13b..e23b26c4 100644
--- a/molpipeline/estimators/chemprop/__init__.py
+++ b/molpipeline/estimators/chemprop/__init__.py
@@ -4,8 +4,18 @@
 
 installed_packages = {pkg.name for pkg in pkgutil.iter_modules()}
 if "chemprop" in installed_packages:
-    from molpipeline.estimators.chemprop.models import ChempropModel  # noqa
+    from molpipeline.estimators.chemprop.models import (
+        ChempropClassifier,
+        ChempropModel,
+        ChempropNeuralFP,
+        ChempropRegressor,
+    )
 
-    __all__ = ["ChempropModel"]
+    __all__ = [
+        "ChempropClassifier",
+        "ChempropModel",
+        "ChempropNeuralFP",
+        "ChempropRegressor",
+    ]
 else:
     __all__ = []

From d041a2f6dbc2c086baf58cbf437a043b312800f9 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Tue, 23 Apr 2024 22:32:18 +0200
Subject: [PATCH 14/20] ignore no imports - false positive

---
 molpipeline/estimators/chemprop/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/molpipeline/estimators/chemprop/__init__.py b/molpipeline/estimators/chemprop/__init__.py
index e23b26c4..e9a932e0 100644
--- a/molpipeline/estimators/chemprop/__init__.py
+++ b/molpipeline/estimators/chemprop/__init__.py
@@ -4,7 +4,7 @@
 
 installed_packages = {pkg.name for pkg in pkgutil.iter_modules()}
 if "chemprop" in installed_packages:
-    from molpipeline.estimators.chemprop.models import (
+    from molpipeline.estimators.chemprop.models import (  # noqa: F401
         ChempropClassifier,
         ChempropModel,
         ChempropNeuralFP,

From 0840a9607cc9eb8fba837a94cd576118dcfecdbc Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Tue, 23 Apr 2024 23:02:15 +0200
Subject: [PATCH 15/20] forward kwargs

---
 molpipeline/estimators/chemprop/models.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py
index 5f301f08..7f2b546e 100644
--- a/molpipeline/estimators/chemprop/models.py
+++ b/molpipeline/estimators/chemprop/models.py
@@ -183,7 +183,7 @@ def __init__(
         lightning_trainer: pl.Trainer | None = None,
         batch_size: int = 64,
         n_jobs: int = 1,
-        **kwargs: Any,  # pylint: disable=unused-argument
+        **kwargs: Any,
     ) -> None:
         """Initialize the chemprop classifier model.
 
@@ -211,6 +211,7 @@ def __init__(
             lightning_trainer=lightning_trainer,
             batch_size=batch_size,
             n_jobs=n_jobs,
+            **kwargs,
         )
         if not self._is_binary_classifier():
             raise ValueError("ChempropClassifier should be a binary classifier.")
@@ -243,7 +244,7 @@ def __init__(
         lightning_trainer: pl.Trainer | None = None,
         batch_size: int = 64,
         n_jobs: int = 1,
-        **kwargs: Any,  # pylint: disable=unused-argument
+        **kwargs: Any,
     ) -> None:
         """Initialize the chemprop regressor model.
 
@@ -271,4 +272,5 @@ def __init__(
             lightning_trainer=lightning_trainer,
             batch_size=batch_size,
             n_jobs=n_jobs,
+            **kwargs,
         )

From b66d5db562add89a31de598e3ecb71b3f85d35de Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Wed, 24 Apr 2024 00:50:45 +0200
Subject: [PATCH 16/20] adapt to new chemprop output

---
 molpipeline/estimators/chemprop/models.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py
index 7f2b546e..6178b2e0 100644
--- a/molpipeline/estimators/chemprop/models.py
+++ b/molpipeline/estimators/chemprop/models.py
@@ -93,7 +93,8 @@ def _predict(
         self.model.eval()
         test_data = build_dataloader(X, num_workers=self.n_jobs, shuffle=False)
         predictions = self.lightning_trainer.predict(self.model, test_data)
-        prediction_array = np.array([pred.numpy() for pred in predictions[0]])  # type: ignore
+        prediction_array = np.vstack(predictions)  # type: ignore
+        prediction_array = prediction_array.squeeze()
 
         # Check if the predictions have the same length as the input dataset
         if prediction_array.shape[0] != len(X):
@@ -103,11 +104,10 @@ def _predict(
 
         # If the model is a binary classifier, return the probability of the positive class
         if self._is_binary_classifier():
-            if prediction_array.shape[1] != 1:
+            if prediction_array.ndim != 1:
                 raise ValueError(
                     "Binary classification model should output a single probability."
                 )
-            prediction_array = prediction_array[:, 0]
         return prediction_array
 
     def predict(

From de1367446e2f2491b22ffd9eb609a8a5d9be28e3 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Thu, 25 Apr 2024 16:56:42 +0200
Subject: [PATCH 17/20] implement tests for regession and classification

---
 .../test_chemprop/test_chemprop_pipeline.py   | 162 +++++++++++++++++-
 1 file changed, 161 insertions(+), 1 deletion(-)

diff --git a/test_extras/test_chemprop/test_chemprop_pipeline.py b/test_extras/test_chemprop/test_chemprop_pipeline.py
index 24cfc8f4..92fe69cc 100644
--- a/test_extras/test_chemprop/test_chemprop_pipeline.py
+++ b/test_extras/test_chemprop/test_chemprop_pipeline.py
@@ -1,9 +1,14 @@
 """Test module for behavior of chemprop in a pipeline."""
 
 import unittest
+from io import BytesIO
+from typing import TypeVar
 
+import joblib
 import numpy as np
+import pandas as pd
 from chemprop.nn.loss import LossFunction
+from lightning import pytorch as pl
 from sklearn.base import clone
 from torch import nn
 
@@ -15,7 +20,11 @@
     BondMessagePassing,
     SumAggregation,
 )
-from molpipeline.estimators.chemprop.models import ChempropModel
+from molpipeline.estimators.chemprop.models import (
+    ChempropClassifier,
+    ChempropModel,
+    ChempropRegressor,
+)
 from molpipeline.mol2any.mol2chemprop import MolToChemprop
 from molpipeline.pipeline import Pipeline
 from molpipeline.post_prediction import PostPredictionWrapper
@@ -63,6 +72,100 @@ def get_model_pipeline() -> Pipeline:
     return model_pipeline
 
 
+DEFAULT_TRAINER = pl.Trainer(
+    accelerator="cpu",
+    logger=False,
+    enable_checkpointing=False,
+    max_epochs=5,
+    enable_model_summary=False,
+    enable_progress_bar=False,
+    val_check_interval=0.0,
+)
+
+
+def get_regression_pipeline() -> Pipeline:
+    """Get the Chemprop model pipeline for regression.
+
+    Returns
+    -------
+    Pipeline
+        The Chemprop model pipeline for regression.
+    """
+
+    smiles2mol = SmilesToMol()
+    mol2chemprop = MolToChemprop()
+    error_filter = ErrorFilter(filter_everything=True)
+    filter_reinserter = FilterReinserter.from_error_filter(
+        error_filter, fill_value=np.nan
+    )
+    chemprop_model = ChempropRegressor(lightning_trainer=DEFAULT_TRAINER)
+    model_pipeline = Pipeline(
+        steps=[
+            ("smiles2mol", smiles2mol),
+            ("mol2chemprop", mol2chemprop),
+            ("error_filter", error_filter),
+            ("model", chemprop_model),
+            ("filter_reinserter", PostPredictionWrapper(filter_reinserter)),
+        ],
+    )
+    return model_pipeline
+
+
+def get_classification_pipeline() -> Pipeline:
+    """Get the Chemprop model pipeline for classification.
+
+    Returns
+    -------
+    Pipeline
+        The Chemprop model pipeline for classification.
+    """
+    smiles2mol = SmilesToMol()
+    mol2chemprop = MolToChemprop()
+    error_filter = ErrorFilter(filter_everything=True)
+    filter_reinserter = FilterReinserter.from_error_filter(
+        error_filter, fill_value=np.nan
+    )
+    chemprop_model = ChempropClassifier(lightning_trainer=DEFAULT_TRAINER)
+    model_pipeline = Pipeline(
+        steps=[
+            ("smiles2mol", smiles2mol),
+            ("mol2chemprop", mol2chemprop),
+            ("error_filter", error_filter),
+            ("model", chemprop_model),
+            ("filter_reinserter", PostPredictionWrapper(filter_reinserter)),
+        ],
+    )
+    return model_pipeline
+
+
+_T = TypeVar("_T")
+
+
+def joblib_dump_load(obj: _T) -> _T:
+    """Dump and load an object using joblib.
+
+    Notes
+    -----
+    The object is not dumped to disk but to a BytesIO object.
+
+    Parameters
+    ----------
+    obj : _T
+        The object to dump and load.
+
+    Returns
+    -------
+    _T
+        The loaded object.
+    """
+    bytes_container = BytesIO()
+    joblib.dump(obj, bytes_container)
+    bytes_container.seek(0)  # update to enable reading
+    bytes_model = bytes_container.read()
+
+    return joblib.load(BytesIO(bytes_model))
+
+
 class TestChempropPipeline(unittest.TestCase):
     """Test the Chemprop model pipeline."""
 
@@ -155,3 +258,60 @@ def test_error_handling(self) -> None:
         proba = pipeline.predict_proba(smiles)
         self.assertEqual(len(proba), 4)
         self.assertTrue(np.isnan(proba[-1]).all())
+
+
+class TestRegressionPipeline(unittest.TestCase):
+    """Test the Chemprop model pipeline for regression."""
+
+    def test_prediction(self) -> None:
+        """Test the prediction of the regression model."""
+
+        molecule_net_logd_df = pd.read_csv(
+            "https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/Lipophilicity.csv"
+        ).head(1000)
+        regression_model = get_regression_pipeline()
+        regression_model.fit(
+            molecule_net_logd_df["smiles"].tolist(),
+            molecule_net_logd_df["exp"].to_numpy(),
+        )
+        pred = regression_model.predict(molecule_net_logd_df["smiles"].tolist())
+
+        self.assertEqual(len(pred), len(molecule_net_logd_df))
+
+        model_copy = joblib_dump_load(regression_model)
+        pred_copy = model_copy.predict(molecule_net_logd_df["smiles"].tolist())
+        self.assertTrue(np.allclose(pred, pred_copy))
+
+
+class TestClassificationPipeline(unittest.TestCase):
+    """Test the Chemprop model pipeline for classification."""
+
+    def test_prediction(self) -> None:
+        """Test the prediction of the classification model."""
+
+        molecule_net_bbbp_df = pd.read_csv(
+            "https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/BBBP.csv"
+        ).head(1000)
+        classification_model = get_classification_pipeline()
+        classification_model.fit(
+            molecule_net_bbbp_df["smiles"].tolist(),
+            molecule_net_bbbp_df["p_np"].to_numpy(),
+        )
+        pred = classification_model.predict(molecule_net_bbbp_df["smiles"].tolist())
+        proba = classification_model.predict_proba(
+            molecule_net_bbbp_df["smiles"].tolist()
+        )
+        self.assertEqual(len(pred), len(molecule_net_bbbp_df))
+        self.assertEqual(proba.shape[1], 2)
+        self.assertEqual(proba.shape[0], len(molecule_net_bbbp_df))
+
+        model_copy = joblib_dump_load(classification_model)
+        pred_copy = model_copy.predict(molecule_net_bbbp_df["smiles"].tolist())
+        proba_copy = model_copy.predict_proba(molecule_net_bbbp_df["smiles"].tolist())
+
+        nan_indices = np.isnan(pred)
+        self.assertListEqual(nan_indices.tolist(), np.isnan(pred_copy).tolist())
+        self.assertTrue(np.allclose(pred[~nan_indices], pred_copy[~nan_indices]))
+
+        self.assertEqual(proba.shape, proba_copy.shape)
+        self.assertTrue(np.allclose(proba[~nan_indices], proba_copy[~nan_indices]))

From 11cdd8ed8713f7f0564895ad39356455b4d8c81a Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Thu, 25 Apr 2024 17:06:54 +0200
Subject: [PATCH 18/20] using try except -> pkgutil does not work with notebook

---
 molpipeline/estimators/chemprop/__init__.py | 7 ++-----
 molpipeline/mol2any/__init__.py             | 7 +++----
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/molpipeline/estimators/chemprop/__init__.py b/molpipeline/estimators/chemprop/__init__.py
index e9a932e0..656d1833 100644
--- a/molpipeline/estimators/chemprop/__init__.py
+++ b/molpipeline/estimators/chemprop/__init__.py
@@ -1,9 +1,6 @@
 """Initialize Chemprop module."""
 
-import pkgutil
-
-installed_packages = {pkg.name for pkg in pkgutil.iter_modules()}
-if "chemprop" in installed_packages:
+try:
     from molpipeline.estimators.chemprop.models import (  # noqa: F401
         ChempropClassifier,
         ChempropModel,
@@ -17,5 +14,5 @@
         "ChempropNeuralFP",
         "ChempropRegressor",
     ]
-else:
+except ImportError:
     __all__ = []
diff --git a/molpipeline/mol2any/__init__.py b/molpipeline/mol2any/__init__.py
index 96509f73..da7bb760 100644
--- a/molpipeline/mol2any/__init__.py
+++ b/molpipeline/mol2any/__init__.py
@@ -1,7 +1,5 @@
 """Init the module for mol2any pipeline elements."""
 
-import pkgutil
-
 from molpipeline.mol2any.mol2bin import MolToBinary
 from molpipeline.mol2any.mol2concatinated_vector import MolToConcatenatedVector
 from molpipeline.mol2any.mol2inchi import MolToInchi, MolToInchiKey
@@ -21,8 +19,9 @@
     "MolToRDKitPhysChem",
 ]
 
-installed_packages = {pkg.name for pkg in pkgutil.iter_modules()}
-if "chemprop" in installed_packages:
+try:
     from molpipeline.mol2any.mol2chemprop import MolToChemprop  # noqa
 
     __all__.append("MolToChemprop")
+except ImportError:
+    pass

From 83cd8b5e161d41ab3009e8a2d0f3a72665e3f6e3 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Thu, 25 Apr 2024 17:37:30 +0200
Subject: [PATCH 19/20] removing unnecessary code

---
 molpipeline/estimators/chemprop/component_wrapper.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/molpipeline/estimators/chemprop/component_wrapper.py b/molpipeline/estimators/chemprop/component_wrapper.py
index b2d41068..043c58c0 100644
--- a/molpipeline/estimators/chemprop/component_wrapper.py
+++ b/molpipeline/estimators/chemprop/component_wrapper.py
@@ -21,7 +21,6 @@
 )
 from chemprop.nn.transforms import UnscaleTransform
 from chemprop.nn.utils import Activation, get_activation_function
-from chemprop.utils.registry import Factory
 from sklearn.base import BaseEstimator
 from torch import Tensor, nn
 
@@ -165,13 +164,8 @@ def __init__(
         output_transform : UnscaleTransform or None, optional (default=None)
             Transformations to apply to the output. None defaults to UnscaleTransform.
         """
-        if criterion is None:
-            task_weights = torch.ones(n_tasks) if task_weights is None else task_weights
-            criterion = Factory.build(
-                self._T_default_criterion,
-                task_weights=task_weights,
-                threshold=threshold,
-            )
+        if task_weights is None:
+            task_weights = torch.ones(n_tasks)
         super().__init__(
             n_tasks=n_tasks,
             input_dim=input_dim,

From c7a4bffd9cf3716ee5abf452a9cc2bba38b20bd7 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Thu, 25 Apr 2024 17:52:40 +0200
Subject: [PATCH 20/20] implement remaining predictors

---
 .../estimators/chemprop/component_wrapper.py  | 82 +++++++++++++++++--
 1 file changed, 76 insertions(+), 6 deletions(-)

diff --git a/molpipeline/estimators/chemprop/component_wrapper.py b/molpipeline/estimators/chemprop/component_wrapper.py
index 043c58c0..f74c8824 100644
--- a/molpipeline/estimators/chemprop/component_wrapper.py
+++ b/molpipeline/estimators/chemprop/component_wrapper.py
@@ -10,12 +10,36 @@
 from chemprop.nn.agg import MeanAggregation as _MeanAggregation
 from chemprop.nn.agg import SumAggregation as _SumAggregation
 from chemprop.nn.ffn import MLP
-from chemprop.nn.loss import BCELoss, LossFunction, MSELoss
+from chemprop.nn.loss import (
+    BCELoss,
+    BinaryDirichletLoss,
+    CrossEntropyLoss,
+    EvidentialLoss,
+    LossFunction,
+    MSELoss,
+    MulticlassDirichletLoss,
+    MVELoss,
+    SIDLoss,
+)
 from chemprop.nn.message_passing import BondMessagePassing as _BondMessagePassing
 from chemprop.nn.message_passing import MessagePassing
-from chemprop.nn.metrics import BinaryAUROCMetric, Metric, MSEMetric
+from chemprop.nn.metrics import (
+    BinaryAUROCMetric,
+    CrossEntropyMetric,
+    Metric,
+    MSEMetric,
+    SIDMetric,
+)
 from chemprop.nn.predictors import BinaryClassificationFFN as _BinaryClassificationFFN
+from chemprop.nn.predictors import BinaryDirichletFFN as _BinaryDirichletFFN
+from chemprop.nn.predictors import EvidentialFFN as _EvidentialFFN
+from chemprop.nn.predictors import (
+    MulticlassClassificationFFN as _MulticlassClassificationFFN,
+)
+from chemprop.nn.predictors import MulticlassDirichletFFN as _MulticlassDirichletFFN
+from chemprop.nn.predictors import MveFFN as _MveFFN
 from chemprop.nn.predictors import RegressionFFN as _RegressionFFN
+from chemprop.nn.predictors import SpectralFFN as _SpectralFFN
 from chemprop.nn.predictors import (
     _FFNPredictorBase as _Predictor,  # pylint: disable=protected-access
 )
@@ -265,6 +289,28 @@ def set_params(self, **params: Any) -> Self:
         return self
 
 
+class RegressionFFN(PredictorWrapper, _RegressionFFN):  # type: ignore
+    """A wrapper for the RegressionFFN class."""
+
+    n_targets: int = 1
+    _T_default_criterion = MSELoss
+    _T_default_metric = MSEMetric
+
+
+class MveFFN(PredictorWrapper, _MveFFN):  # type: ignore
+    """A wrapper for the MveFFN class."""
+
+    n_targets: int = 2
+    _T_default_criterion = MVELoss
+
+
+class EvidentialFFN(PredictorWrapper, _EvidentialFFN):  # type: ignore
+    """A wrapper for the EvidentialFFN class."""
+
+    n_targets: int = 4
+    _T_default_criterion = EvidentialLoss
+
+
 class BinaryClassificationFFN(PredictorWrapper, _BinaryClassificationFFN):  # type: ignore
     """A wrapper for the BinaryClassificationFFN class."""
 
@@ -273,12 +319,36 @@ class BinaryClassificationFFN(PredictorWrapper, _BinaryClassificationFFN):  # ty
     _T_default_metric = BinaryAUROCMetric
 
 
-class RegressionFFN(PredictorWrapper, _RegressionFFN):  # type: ignore
-    """A wrapper for the RegressionFFN class."""
+class BinaryDirichletFFN(PredictorWrapper, _BinaryDirichletFFN):  # type: ignore
+    """A wrapper for the BinaryDirichletFFN class."""
+
+    n_targets: int = 2
+    _T_default_criterion = BinaryDirichletLoss
+    _T_default_metric = BinaryAUROCMetric
+
+
+class MulticlassClassificationFFN(PredictorWrapper, _MulticlassClassificationFFN):  # type: ignore
+    """A wrapper for the MulticlassClassificationFFN class."""
 
     n_targets: int = 1
-    _T_default_criterion = MSELoss
-    _T_default_metric = MSEMetric
+    _T_default_criterion = CrossEntropyLoss
+    _T_default_metric = CrossEntropyMetric
+
+
+class MulticlassDirichletFFN(PredictorWrapper, _MulticlassDirichletFFN):  # type: ignore
+    """A wrapper for the MulticlassDirichletFFN class."""
+
+    n_targets: int = 1
+    _T_default_criterion = MulticlassDirichletLoss
+    _T_default_metric = CrossEntropyMetric
+
+
+class SpectralFFN(PredictorWrapper, _SpectralFFN):  # type: ignore
+    """A wrapper for the SpectralFFN class."""
+
+    n_targets: int = 1
+    _T_default_criterion = SIDLoss
+    _T_default_metric = SIDMetric
 
 
 class MPNN(_MPNN, BaseEstimator):