From 8f43d432816893b2f54575458348bb8c6ab9559f Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Mon, 22 Apr 2024 20:51:39 +0200 Subject: [PATCH 01/20] adapt to newest chemprop version + implement regressor --- molpipeline/estimators/chemprop/abstract.py | 6 +- .../estimators/chemprop/component_wrapper.py | 108 +++++++++++------- molpipeline/estimators/chemprop/models.py | 10 +- .../test_chemprop/test_chemprop_pipeline.py | 10 ++ .../test_chemprop/test_component_wrapper.py | 10 ++ test_extras/test_chemprop/test_models.py | 20 +++- 6 files changed, 111 insertions(+), 53 deletions(-) diff --git a/molpipeline/estimators/chemprop/abstract.py b/molpipeline/estimators/chemprop/abstract.py index 262b5a60..5c586d90 100644 --- a/molpipeline/estimators/chemprop/abstract.py +++ b/molpipeline/estimators/chemprop/abstract.py @@ -13,10 +13,10 @@ import numpy.typing as npt try: - from chemprop.data import MoleculeDataset, MolGraphDataLoader + from chemprop.data import MoleculeDataset, build_dataloader from chemprop.models.model import MPNN from lightning import pytorch as pl -except ImportError: +except ImportError as error: pass from sklearn.base import BaseEstimator @@ -90,7 +90,7 @@ def fit( if y.ndim == 1: y = y.reshape(-1, 1) X.Y = y - training_data = MolGraphDataLoader( + training_data = build_dataloader( X, batch_size=self.batch_size, num_workers=self.n_jobs ) self.lightning_trainer.fit(self.model, training_data) diff --git a/molpipeline/estimators/chemprop/component_wrapper.py b/molpipeline/estimators/chemprop/component_wrapper.py index 93e1581e..6c62d3a1 100644 --- a/molpipeline/estimators/chemprop/component_wrapper.py +++ b/molpipeline/estimators/chemprop/component_wrapper.py @@ -1,5 +1,6 @@ """Wrapper classes for the chemprop components to make them compatible with scikit-learn.""" +import abc from typing import Any, Iterable, Self import torch @@ -9,13 +10,18 @@ from chemprop.nn.agg import MeanAggregation as _MeanAggregation from chemprop.nn.agg import SumAggregation as _SumAggregation from chemprop.nn.ffn import MLP -from chemprop.nn.loss import LossFunction +from chemprop.nn.loss import BCELoss, LossFunction, MSELoss from chemprop.nn.message_passing import BondMessagePassing as _BondMessagePassing from chemprop.nn.message_passing import MessagePassing -from chemprop.nn.metrics import BCELoss, Metric +from chemprop.nn.metrics import BinaryAUROCMetric, Metric, MSEMetric from chemprop.nn.predictors import BinaryClassificationFFN as _BinaryClassificationFFN -from chemprop.nn.predictors import Predictor +from chemprop.nn.predictors import RegressionFFN as _RegressionFFN +from chemprop.nn.predictors import ( + _FFNPredictorBase as _Predictor, # pylint: disable=protected-access +) +from chemprop.nn.transforms import UnscaleTransform from chemprop.nn.utils import Activation, get_activation_function +from chemprop.utils.registry import Factory from sklearn.base import BaseEstimator from torch import Tensor, nn @@ -115,11 +121,11 @@ def set_params(self, **params: Any) -> Self: # pylint: disable=too-many-ancestors, too-many-instance-attributes -class BinaryClassificationFFN(_BinaryClassificationFFN, BaseEstimator): - """A wrapper for the BinaryClassificationFFN class.""" +class PredictorWrapper(_Predictor, BaseEstimator, abc.ABC): # type: ignore + """Abstract wrapper for the Predictor class.""" - n_targets: int = 1 - _default_criterion = BCELoss() + _T_default_criterion: LossFunction + _T_default_metric: Metric def __init__( self, @@ -130,6 +136,9 @@ def __init__( dropout: float = 0, activation: str = "relu", criterion: LossFunction | None = None, + task_weights: Tensor | None = None, + threshold: float | None = None, + output_transform: UnscaleTransform | None = None, ): """Initialize the BinaryClassificationFFN class. @@ -150,6 +159,13 @@ def __init__( criterion : LossFunction or None, optional (default=None) Loss function. None defaults to BCELoss. """ + if criterion is None: + task_weights = torch.ones(n_tasks) if task_weights is None else task_weights + criterion = Factory.build( + self._T_default_criterion, + task_weights=task_weights, + threshold=threshold, + ) super().__init__( n_tasks=n_tasks, input_dim=input_dim, @@ -158,6 +174,7 @@ def __init__( dropout=dropout, activation=activation, criterion=criterion, + output_transform=output_transform, ) self.n_tasks = n_tasks self._input_dim = input_dim @@ -165,6 +182,8 @@ def __init__( self.n_layers = n_layers self.dropout = dropout self.activation = activation + self.task_weights = task_weights + self.threshold = threshold @property def input_dim(self) -> int: @@ -218,13 +237,13 @@ def reinitialize_fnn(self) -> Self: Self The reinitialized feedforward network. """ - self.ffn = MLP( - self.input_dim, - self.n_tasks * self.n_targets, - self.hidden_dim, - self.n_layers, - self.dropout, - self.activation, + self.ffn = MLP.build( + input_dim=self.input_dim, + output_dim=self.n_tasks * self.n_targets, + hidden_dim=self.hidden_dim, + n_layers=self.n_layers, + dropout=self.dropout, + activation=self.activation, ) return self @@ -246,6 +265,22 @@ def set_params(self, **params: Any) -> Self: return self +class BinaryClassificationFFN(PredictorWrapper, _BinaryClassificationFFN): # type: ignore + """A wrapper for the BinaryClassificationFFN class.""" + + n_targets: int = 1 + _T_default_criterion = BCELoss + _T_default_metric = BinaryAUROCMetric + + +class RegressionFFN(PredictorWrapper, _RegressionFFN): # type: ignore + """A wrapper for the RegressionFFN class.""" + + n_targets: int = 1 + _T_default_criterion = MSELoss + _T_default_metric = MSEMetric + + class MPNN(_MPNN, BaseEstimator): """A wrapper for the MPNN class. @@ -253,14 +288,15 @@ class MPNN(_MPNN, BaseEstimator): and a feedforward network for prediction. """ + bn: nn.BatchNorm1d | nn.Identity + def __init__( self, message_passing: MessagePassing, agg: Aggregation, - predictor: Predictor, + predictor: PredictorWrapper, batch_norm: bool = True, metric_list: Iterable[Metric] | None = None, - task_weight: Tensor | None = None, warmup_epochs: int = 2, init_lr: float = 1e-4, max_lr: float = 1e-3, @@ -280,8 +316,6 @@ def __init__( Whether to use batch normalization. metric_list : Iterable[Metric] | None, optional (default=None) The metrics to use for evaluation. - task_weight : Tensor | None, optional (default=None) - The weights to use for each task during training. If None, use uniform weights. warmup_epochs : int, optional (default=2) The number of epochs to use for the learning rate warmup. init_lr : float, optional (default=1e-4) @@ -292,20 +326,18 @@ def __init__( The final learning rate. """ super().__init__( - message_passing, - agg, - predictor, - batch_norm, - metric_list, - task_weight, - warmup_epochs, - init_lr, - max_lr, - final_lr, + message_passing=message_passing, + agg=agg, + predictor=predictor, + batch_norm=batch_norm, + metrics=metric_list, + warmup_epochs=warmup_epochs, + init_lr=init_lr, + max_lr=max_lr, + final_lr=final_lr, ) self.metric_list = metric_list self.batch_norm = batch_norm - self.task_weight = task_weight def reinitialize_network(self) -> Self: """Reinitialize the network with the current parameters. @@ -315,21 +347,17 @@ def reinitialize_network(self) -> Self: Self The reinitialized network. """ - self.bn = ( - nn.BatchNorm1d(self.message_passing.output_dim) - if self.batch_norm - else nn.Identity() - ) + if self.batch_norm: + self.bn = nn.BatchNorm1d(self.message_passing.output_dim) + else: + self.bn = nn.Identity() + if self.metric_list is None: # pylint: disable=protected-access - self.metrics = [self.predictor._default_metric, self.criterion] + self.metrics = [self.predictor._T_default_metric, self.criterion] else: self.metrics = list(self.metric_list) + [self.criterion] - if self.task_weight is None: - w_t = torch.ones(self.n_tasks) - else: - w_t = torch.tensor(self.task_weight) - self.w_t = nn.Parameter(w_t.unsqueeze(0), False) + return self def set_params(self, **params: Any) -> Self: diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py index 6a74cc87..9ed0312a 100644 --- a/molpipeline/estimators/chemprop/models.py +++ b/molpipeline/estimators/chemprop/models.py @@ -13,7 +13,7 @@ from sklearn.utils.metaestimators import available_if try: - from chemprop.data import MoleculeDataset, MolGraphDataLoader + from chemprop.data import MoleculeDataset, build_dataloader from chemprop.nn.predictors import ( BinaryClassificationFFNBase, MulticlassClassificationFFN, @@ -86,9 +86,9 @@ def _predict( The predictions for the input data. """ self.model.eval() - test_data = MolGraphDataLoader(X, num_workers=self.n_jobs, shuffle=False) + test_data = build_dataloader(X, num_workers=self.n_jobs, shuffle=False) predictions = self.lightning_trainer.predict(self.model, test_data) - prediction_array = np.array([pred.numpy() for pred in predictions]) # type: ignore + prediction_array = np.array([pred.numpy() for pred in predictions[0]]) # type: ignore # Check if the predictions have the same length as the input dataset if prediction_array.shape[0] != len(X): @@ -98,11 +98,11 @@ def _predict( # If the model is a binary classifier, return the probability of the positive class if self._is_binary_classifier(): - if prediction_array.shape[1] != 1 or prediction_array.shape[2] != 1: + if prediction_array.shape[1] != 1: raise ValueError( "Binary classification model should output a single probability." ) - prediction_array = prediction_array[:, 0, 0] + prediction_array = prediction_array[:, 0] return prediction_array def predict( diff --git a/test_extras/test_chemprop/test_chemprop_pipeline.py b/test_extras/test_chemprop/test_chemprop_pipeline.py index 31de13e6..24cfc8f4 100644 --- a/test_extras/test_chemprop/test_chemprop_pipeline.py +++ b/test_extras/test_chemprop/test_chemprop_pipeline.py @@ -3,7 +3,9 @@ import unittest import numpy as np +from chemprop.nn.loss import LossFunction from sklearn.base import clone +from torch import nn from molpipeline.any2mol import SmilesToMol from molpipeline.error_handling import ErrorFilter, FilterReinserter @@ -106,6 +108,14 @@ def test_clone(self) -> None: self.assertEqual( param.__class__, cloned_params[param_name].__class__ ) + elif isinstance(param, LossFunction): + self.assertEqual( + param.state_dict()["task_weights"], + cloned_params[param_name].state_dict()["task_weights"], + ) + self.assertEqual(type(param), type(cloned_params[param_name])) + elif isinstance(param, nn.Identity): + self.assertEqual(type(param), type(cloned_params[param_name])) else: self.assertEqual( param, cloned_params[param_name], f"Failed for {param_name}" diff --git a/test_extras/test_chemprop/test_component_wrapper.py b/test_extras/test_chemprop/test_component_wrapper.py index 02dcdd54..53a901aa 100644 --- a/test_extras/test_chemprop/test_component_wrapper.py +++ b/test_extras/test_chemprop/test_component_wrapper.py @@ -2,7 +2,9 @@ import unittest +from chemprop.nn.loss import LossFunction from sklearn.base import clone +from torch import nn from molpipeline.estimators.chemprop.component_wrapper import ( MPNN, @@ -113,6 +115,14 @@ def test_clone(self) -> None: clone_param = mpnn_clone.get_params(deep=True)[param_name] if hasattr(param, "get_params"): self.assertEqual(param.__class__, clone_param.__class__) + elif isinstance(param, LossFunction): + self.assertEqual( + param.state_dict()["task_weights"], + clone_param.state_dict()["task_weights"], + ) + self.assertEqual(type(param), type(clone_param)) + elif isinstance(param, nn.Identity): + self.assertEqual(type(param), type(clone_param)) else: self.assertEqual(param, clone_param) diff --git a/test_extras/test_chemprop/test_models.py b/test_extras/test_chemprop/test_models.py index f3a88e09..5a12625c 100644 --- a/test_extras/test_chemprop/test_models.py +++ b/test_extras/test_chemprop/test_models.py @@ -3,8 +3,10 @@ import logging import unittest +from chemprop.nn.loss import LossFunction from lightning import pytorch as pl from sklearn.base import clone +from torch import nn from molpipeline.estimators.chemprop.component_wrapper import ( MPNN, @@ -46,7 +48,6 @@ def test_get_params(self) -> None: """Test the get_params and set_params methods.""" chemprop_model = get_model() orig_params = chemprop_model.get_params(deep=True) - expected_params = { "batch_size": 64, "lightning_trainer": pl.Trainer, @@ -60,7 +61,7 @@ def test_get_params(self) -> None: "model__message_passing__bias": False, "model__message_passing__d_e": 14, "model__message_passing__d_h": 300, - "model__message_passing__d_v": 133, + "model__message_passing__d_v": 72, "model__message_passing__d_vd": None, "model__message_passing__depth": 3, "model__message_passing__dropout_rate": 0.0, @@ -79,7 +80,9 @@ def test_get_params(self) -> None: raise ValueError(f"{param_name} should be a type.") self.assertIsInstance(orig_params[param_name], param) else: - self.assertEqual(orig_params[param_name], param) + self.assertEqual( + orig_params[param_name], param, f"Test failed for {param_name}" + ) new_params = { "batch_size": 32, @@ -102,7 +105,6 @@ def test_clone(self) -> None: cloned_model = clone(chemprop_model) self.assertIsInstance(cloned_model, ChempropModel) cloned_model_params = cloned_model.get_params(deep=True) - for param_name, param in chemprop_model.get_params(deep=True).items(): cloned_param = cloned_model_params[param_name] if hasattr(param, "get_params"): @@ -110,8 +112,16 @@ def test_clone(self) -> None: self.assertNotEqual(id(param), id(cloned_param)) elif isinstance(param, pl.Trainer): self.assertIsInstance(cloned_param, pl.Trainer) + elif isinstance(param, LossFunction): + self.assertEqual( + param.state_dict()["task_weights"], + cloned_param.state_dict()["task_weights"], + ) + self.assertEqual(type(param), type(cloned_param)) + elif isinstance(param, nn.Identity): + self.assertEqual(type(param), type(cloned_param)) else: - self.assertEqual(param, cloned_param) + self.assertEqual(param, cloned_param, f"Test failed for {param_name}") def test_classifier_methods(self) -> None: """Test the classifier methods.""" From 0cf0eda1e2e8f5fe14e11a69071ad5e40b38b2ad Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Mon, 22 Apr 2024 20:53:31 +0200 Subject: [PATCH 02/20] remove unused variable --- molpipeline/estimators/chemprop/abstract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/molpipeline/estimators/chemprop/abstract.py b/molpipeline/estimators/chemprop/abstract.py index 5c586d90..c9018cac 100644 --- a/molpipeline/estimators/chemprop/abstract.py +++ b/molpipeline/estimators/chemprop/abstract.py @@ -16,7 +16,7 @@ from chemprop.data import MoleculeDataset, build_dataloader from chemprop.models.model import MPNN from lightning import pytorch as pl -except ImportError as error: +except ImportError: pass from sklearn.base import BaseEstimator From 07843260cade52287783042fb68fbb845e9f18f1 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Mon, 22 Apr 2024 20:55:01 +0200 Subject: [PATCH 03/20] add missing params to docstring --- molpipeline/estimators/chemprop/component_wrapper.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/molpipeline/estimators/chemprop/component_wrapper.py b/molpipeline/estimators/chemprop/component_wrapper.py index 6c62d3a1..b2d41068 100644 --- a/molpipeline/estimators/chemprop/component_wrapper.py +++ b/molpipeline/estimators/chemprop/component_wrapper.py @@ -158,6 +158,12 @@ def __init__( Activation function. criterion : LossFunction or None, optional (default=None) Loss function. None defaults to BCELoss. + task_weights : Tensor or None, optional (default=None) + Task weights. + threshold : float or None, optional (default=None) + Threshold for binary classification. + output_transform : UnscaleTransform or None, optional (default=None) + Transformations to apply to the output. None defaults to UnscaleTransform. """ if criterion is None: task_weights = torch.ones(n_tasks) if task_weights is None else task_weights From fb29c7a38f391e8a7b973907947ef22520306b0d Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Mon, 22 Apr 2024 20:57:14 +0200 Subject: [PATCH 04/20] remove link to custom repo and use default repo instead --- requirements_chemprop.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_chemprop.txt b/requirements_chemprop.txt index 41ccfc6a..81d53ece 100644 --- a/requirements_chemprop.txt +++ b/requirements_chemprop.txt @@ -1,2 +1,2 @@ -chemprop @ https://github.com//c-w-feldmann/chemprop/archive/v2/molpipeline_requirement.zip +chemprop lightning < 1000 \ No newline at end of file From 95aca5f1c0adab39655f95d8c18b893f6420e908 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Tue, 23 Apr 2024 21:23:30 +0200 Subject: [PATCH 05/20] implement ChempropRegressor --- molpipeline/estimators/chemprop/models.py | 41 +++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py index 9ed0312a..f3675395 100644 --- a/molpipeline/estimators/chemprop/models.py +++ b/molpipeline/estimators/chemprop/models.py @@ -28,6 +28,7 @@ MPNN, BinaryClassificationFFN, BondMessagePassing, + RegressionFFN, SumAggregation, ) from molpipeline.estimators.chemprop.neural_fingerprint import ChempropNeuralFP @@ -227,3 +228,43 @@ def set_params(self, **params: Any) -> Self: if not self._is_binary_classifier(): raise ValueError("ChempropClassifier should be a binary classifier.") return self + + +class ChempropRegressor(ChempropModel): + """Chemprop model with default parameters for regression tasks.""" + + def __init__( + self, + model: MPNN | None = None, + lightning_trainer: pl.Trainer | None = None, + batch_size: int = 64, + n_jobs: int = 1, + **kwargs: Any, # pylint: disable=unused-argument + ) -> None: + """Initialize the chemprop regressor model. + + Parameters + ---------- + model : MPNN | None, optional + The chemprop model to wrap. If None, a default model will be used. + lightning_trainer : pl.Trainer, optional + The lightning trainer to use, by default None + batch_size : int, optional (default=64) + The batch size to use. + n_jobs : int, optional (default=1) + The number of jobs to use. + kwargs : Any + Parameters set using `set_params`. + Can be used to modify components of the model. + """ + if model is None: + bond_encoder = BondMessagePassing() + agg = SumAggregation() + predictor = RegressionFFN() + model = MPNN(message_passing=bond_encoder, agg=agg, predictor=predictor) + super().__init__( + model=model, + lightning_trainer=lightning_trainer, + batch_size=batch_size, + n_jobs=n_jobs, + ) From 10ec6d60ce1f1aa1b20332f3b7587ef2ed7a18a0 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Tue, 23 Apr 2024 21:23:49 +0200 Subject: [PATCH 06/20] improve docstring --- molpipeline/estimators/chemprop/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py index f3675395..1924f669 100644 --- a/molpipeline/estimators/chemprop/models.py +++ b/molpipeline/estimators/chemprop/models.py @@ -171,7 +171,7 @@ def to_encoder(self) -> ChempropNeuralFP: class ChempropClassifier(ChempropModel): - """Wrap Chemprop in a sklearn like classifier.""" + """Chemprop model with default parameters for binary classification tasks.""" def __init__( self, From 64b7cc2c085cae7d87a72accd2b111eaf8332290 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Tue, 23 Apr 2024 21:50:10 +0200 Subject: [PATCH 07/20] add tests for chemprop --- test_extras/test_chemprop/test_models.py | 102 +++++++++++++++++++---- 1 file changed, 87 insertions(+), 15 deletions(-) diff --git a/test_extras/test_chemprop/test_models.py b/test_extras/test_chemprop/test_models.py index 5a12625c..4df3839b 100644 --- a/test_extras/test_chemprop/test_models.py +++ b/test_extras/test_chemprop/test_models.py @@ -3,19 +3,25 @@ import logging import unittest -from chemprop.nn.loss import LossFunction +from chemprop.nn.loss import LossFunction, BCELoss, MSELoss from lightning import pytorch as pl from sklearn.base import clone from torch import nn +from torch import Tensor from molpipeline.estimators.chemprop.component_wrapper import ( MPNN, BinaryClassificationFFN, + RegressionFFN, BondMessagePassing, MeanAggregation, SumAggregation, ) -from molpipeline.estimators.chemprop.models import ChempropModel +from molpipeline.estimators.chemprop.models import ( + ChempropClassifier, + ChempropModel, + ChempropRegressor, +) from molpipeline.estimators.chemprop.neural_fingerprint import ChempropNeuralFP logging.getLogger("lightning.pytorch.utilities.rank_zero").setLevel(logging.WARNING) @@ -41,16 +47,10 @@ def get_model() -> ChempropModel: return chemprop_model -class TestChempropModel(unittest.TestCase): - """Test the Chemprop model.""" - - def test_get_params(self) -> None: - """Test the get_params and set_params methods.""" - chemprop_model = get_model() - orig_params = chemprop_model.get_params(deep=True) - expected_params = { +DEFAULT_PARAMS = { "batch_size": 64, "lightning_trainer": pl.Trainer, + "model": MPNN, "model__agg__dim": 0, "model__agg": SumAggregation, "model__batch_norm": True, @@ -67,15 +67,45 @@ def test_get_params(self) -> None: "model__message_passing__dropout_rate": 0.0, "model__message_passing__undirected": False, "model__message_passing": BondMessagePassing, + "model__metric_list": None, + "model__predictor__activation": "relu", + "model__warmup_epochs": 2, + "model__predictor": BinaryClassificationFFN, + "model__predictor__criterion": BCELoss, + "model__predictor__dropout": 0, + "model__predictor__hidden_dim": 300, + "model__predictor__input_dim": 300, + "model__predictor__n_layers": 1, + "model__predictor__n_tasks": 1, + "model__predictor__output_transform": nn.Identity, + "model__predictor__task_weights": Tensor([1.0]), + "model__predictor__threshold": None, + "n_jobs": 1, } +NO_IDENTITY_CHECK = [ + "model__agg", + "model__message_passing", + "lightning_trainer", + "model", + "model__predictor", + "model__predictor__criterion", + "model__predictor__output_transform", +] + +class TestChempropModel(unittest.TestCase): + """Test the Chemprop model.""" + + def test_get_params(self) -> None: + """Test the get_params and set_params methods.""" + chemprop_model = get_model() + orig_params = chemprop_model.get_params(deep=True) + expected_params = dict(DEFAULT_PARAMS) # Shallow copy + + self.assertSetEqual(set(orig_params), set(expected_params)) # Check if the parameters are as expected for param_name, param in expected_params.items(): - if param_name in [ - "model__agg", - "model__message_passing", - "lightning_trainer", - ]: + if param_name in NO_IDENTITY_CHECK: if not isinstance(param, type): raise ValueError(f"{param_name} should be a type.") self.assertIsInstance(orig_params[param_name], param) @@ -141,3 +171,45 @@ def test_neural_fp(self) -> None: # the model should be cloned self.assertNotEqual(id(chemprop_model.model), id(neural_fp.model)) self.assertEqual(neural_fp.disable_fitting, True) + + +class TestChempropClassifier(unittest.TestCase): + """Test the Chemprop classifier model.""" + + def test_get_params(self) -> None: + """Test the get_params and set_params methods.""" + chemprop_model = ChempropClassifier() + param_dict = chemprop_model.get_params(deep=True) + expected_params = dict(DEFAULT_PARAMS) # Shallow copy + self.assertSetEqual(set(param_dict.keys()), set(expected_params.keys())) + for param_name, param in expected_params.items(): + if param_name in NO_IDENTITY_CHECK: + if not isinstance(param, type): + raise ValueError(f"{param_name} should be a type.") + self.assertIsInstance(param_dict[param_name], param) + else: + self.assertEqual( + param_dict[param_name], param, f"Test failed for {param_name}" + ) + + +class TestChempropRegressor(unittest.TestCase): + """Test the Chemprop regressor model.""" + + def test_get_params(self) -> None: + """Test the get_params and set_params methods.""" + chemprop_model = ChempropRegressor() + param_dict = chemprop_model.get_params(deep=True) + expected_params = dict(DEFAULT_PARAMS) + expected_params["model__predictor"] = RegressionFFN + expected_params["model__predictor__criterion"] = MSELoss + self.assertSetEqual(set(param_dict.keys()), set(expected_params.keys())) + for param_name, param in expected_params.items(): + if param_name in NO_IDENTITY_CHECK: + if not isinstance(param, type): + raise ValueError(f"{param_name} should be a type.") + self.assertIsInstance(param_dict[param_name], param) + else: + self.assertEqual( + param_dict[param_name], param, f"Test failed for {param_name}" + ) From d00aa3e8cd309897f4396a8a4547401f6e3963a7 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Tue, 23 Apr 2024 21:50:18 +0200 Subject: [PATCH 08/20] fix requirements --- requirements_chemprop.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_chemprop.txt b/requirements_chemprop.txt index 81d53ece..0bd7e545 100644 --- a/requirements_chemprop.txt +++ b/requirements_chemprop.txt @@ -1,2 +1,2 @@ chemprop -lightning < 1000 \ No newline at end of file +lightning \ No newline at end of file From cdeba6d032507fd11f4eca99fcd5d891bb4e5405 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Tue, 23 Apr 2024 21:54:19 +0200 Subject: [PATCH 09/20] unify workflows --- .github/workflows/linting.yml | 67 ++++++++++++++++++++++++++++++++- .github/workflows/unittests.yml | 56 --------------------------- 2 files changed, 66 insertions(+), 57 deletions(-) delete mode 100644 .github/workflows/unittests.yml diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 7986de7f..df576a14 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -30,7 +30,6 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install $(find . -name "requirement*" -type f -printf ' -r %p') pip install mypy mypy . || exit_code=$? mypy --install-types --non-interactive @@ -148,3 +147,69 @@ jobs: - name: Analysing the code with isort run: | isort --profile black . + + test_basis: + needs: + - pylint + - mypy + - pydocstyle + - docsig + - black + - flake8 + - interrogate + - bandit + - isort + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install package + run: | + python -m pip install --upgrade pip + pip install . + - name: Run unit-tests + run: | + python -m unittest discover -v -s tests -t . + + test_chemprop: + needs: + - test_basis + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.11 + uses: actions/setup-python@v3 + with: + python-version: "3.11" + - name: Install package + run: | + python -m pip install --upgrade pip + pip install torch + pip install .[chemprop] + - name: Run unit-tests for chemprop + run: | + python -m unittest discover -v -s test_extras/test_chemprop -t . + + test_notebooks: + needs: + - test_basis + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.11 + uses: actions/setup-python@v3 + with: + python-version: "3.11" + - name: Install package + run: | + python -m pip install --upgrade pip + pip install .[notebooks] + - name: Run unit-tests for notebooks + run: | + python test_extras/test_notebooks/test_notebooks.py --continue-on-failure diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml deleted file mode 100644 index 1faaf0f4..00000000 --- a/.github/workflows/unittests.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: Unit-Tests - -on: [push] - -jobs: - test_basis: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.10", "3.11", "3.12"] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Install package - run: | - python -m pip install --upgrade pip - pip install . - - name: Run unit-tests - run: | - python -m unittest discover -v -s tests -t . - - test_chemprop: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.11 - uses: actions/setup-python@v3 - with: - python-version: "3.11" - - name: Install package - run: | - python -m pip install --upgrade pip - pip install torch - pip install .[chemprop] - - name: Run unit-tests for chemprop - run: | - python -m unittest discover -v -s test_extras/test_chemprop -t . - - test_notebooks: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.11 - uses: actions/setup-python@v3 - with: - python-version: "3.11" - - name: Install package - run: | - python -m pip install --upgrade pip - pip install .[notebooks] - - name: Run unit-tests for notebooks - run: | - python test_extras/test_notebooks/test_notebooks.py --continue-on-failure From f39d951a747bf5e4fd4c25e47285234d43211f71 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Tue, 23 Apr 2024 21:55:38 +0200 Subject: [PATCH 10/20] isort + black --- test_extras/test_chemprop/test_models.py | 76 ++++++++++++------------ 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/test_extras/test_chemprop/test_models.py b/test_extras/test_chemprop/test_models.py index 4df3839b..7445102a 100644 --- a/test_extras/test_chemprop/test_models.py +++ b/test_extras/test_chemprop/test_models.py @@ -3,18 +3,17 @@ import logging import unittest -from chemprop.nn.loss import LossFunction, BCELoss, MSELoss +from chemprop.nn.loss import BCELoss, LossFunction, MSELoss from lightning import pytorch as pl from sklearn.base import clone -from torch import nn -from torch import Tensor +from torch import Tensor, nn from molpipeline.estimators.chemprop.component_wrapper import ( MPNN, BinaryClassificationFFN, - RegressionFFN, BondMessagePassing, MeanAggregation, + RegressionFFN, SumAggregation, ) from molpipeline.estimators.chemprop.models import ( @@ -48,40 +47,40 @@ def get_model() -> ChempropModel: DEFAULT_PARAMS = { - "batch_size": 64, - "lightning_trainer": pl.Trainer, - "model": MPNN, - "model__agg__dim": 0, - "model__agg": SumAggregation, - "model__batch_norm": True, - "model__final_lr": 0.0001, - "model__init_lr": 0.0001, - "model__max_lr": 0.001, - "model__message_passing__activation": "relu", - "model__message_passing__bias": False, - "model__message_passing__d_e": 14, - "model__message_passing__d_h": 300, - "model__message_passing__d_v": 72, - "model__message_passing__d_vd": None, - "model__message_passing__depth": 3, - "model__message_passing__dropout_rate": 0.0, - "model__message_passing__undirected": False, - "model__message_passing": BondMessagePassing, - "model__metric_list": None, - "model__predictor__activation": "relu", - "model__warmup_epochs": 2, - "model__predictor": BinaryClassificationFFN, - "model__predictor__criterion": BCELoss, - "model__predictor__dropout": 0, - "model__predictor__hidden_dim": 300, - "model__predictor__input_dim": 300, - "model__predictor__n_layers": 1, - "model__predictor__n_tasks": 1, - "model__predictor__output_transform": nn.Identity, - "model__predictor__task_weights": Tensor([1.0]), - "model__predictor__threshold": None, - "n_jobs": 1, - } + "batch_size": 64, + "lightning_trainer": pl.Trainer, + "model": MPNN, + "model__agg__dim": 0, + "model__agg": SumAggregation, + "model__batch_norm": True, + "model__final_lr": 0.0001, + "model__init_lr": 0.0001, + "model__max_lr": 0.001, + "model__message_passing__activation": "relu", + "model__message_passing__bias": False, + "model__message_passing__d_e": 14, + "model__message_passing__d_h": 300, + "model__message_passing__d_v": 72, + "model__message_passing__d_vd": None, + "model__message_passing__depth": 3, + "model__message_passing__dropout_rate": 0.0, + "model__message_passing__undirected": False, + "model__message_passing": BondMessagePassing, + "model__metric_list": None, + "model__predictor__activation": "relu", + "model__warmup_epochs": 2, + "model__predictor": BinaryClassificationFFN, + "model__predictor__criterion": BCELoss, + "model__predictor__dropout": 0, + "model__predictor__hidden_dim": 300, + "model__predictor__input_dim": 300, + "model__predictor__n_layers": 1, + "model__predictor__n_tasks": 1, + "model__predictor__output_transform": nn.Identity, + "model__predictor__task_weights": Tensor([1.0]), + "model__predictor__threshold": None, + "n_jobs": 1, +} NO_IDENTITY_CHECK = [ "model__agg", @@ -93,6 +92,7 @@ def get_model() -> ChempropModel: "model__predictor__output_transform", ] + class TestChempropModel(unittest.TestCase): """Test the Chemprop model.""" From e73ba53c6a2c1f218803513b5d8caea697eae392 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Tue, 23 Apr 2024 22:17:44 +0200 Subject: [PATCH 11/20] improved logging --- molpipeline/estimators/chemprop/models.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py index 1924f669..b88fe1d3 100644 --- a/molpipeline/estimators/chemprop/models.py +++ b/molpipeline/estimators/chemprop/models.py @@ -7,6 +7,7 @@ except ImportError: from typing_extensions import Self +from loguru import logger import numpy as np import numpy.typing as npt from sklearn.base import clone @@ -19,8 +20,11 @@ MulticlassClassificationFFN, ) from lightning import pytorch as pl -except ImportError: - pass +except ImportError as error: + logger.error( + "Chemprop is not installed. Please install it using `pip install chemprop`." + ) + logger.info(error) from molpipeline.estimators.chemprop.abstract import ABCChemprop From 880f55cf72414f962758e01bac2cdf0e082dcb7e Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Tue, 23 Apr 2024 22:18:08 +0200 Subject: [PATCH 12/20] isort --- molpipeline/estimators/chemprop/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py index b88fe1d3..5f301f08 100644 --- a/molpipeline/estimators/chemprop/models.py +++ b/molpipeline/estimators/chemprop/models.py @@ -7,9 +7,9 @@ except ImportError: from typing_extensions import Self -from loguru import logger import numpy as np import numpy.typing as npt +from loguru import logger from sklearn.base import clone from sklearn.utils.metaestimators import available_if From c9c74cb8f0a56ff699f1eb504cc80d6264eac5f4 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Tue, 23 Apr 2024 22:18:49 +0200 Subject: [PATCH 13/20] update init --- molpipeline/estimators/chemprop/__init__.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/molpipeline/estimators/chemprop/__init__.py b/molpipeline/estimators/chemprop/__init__.py index 4c32d13b..e23b26c4 100644 --- a/molpipeline/estimators/chemprop/__init__.py +++ b/molpipeline/estimators/chemprop/__init__.py @@ -4,8 +4,18 @@ installed_packages = {pkg.name for pkg in pkgutil.iter_modules()} if "chemprop" in installed_packages: - from molpipeline.estimators.chemprop.models import ChempropModel # noqa + from molpipeline.estimators.chemprop.models import ( + ChempropClassifier, + ChempropModel, + ChempropNeuralFP, + ChempropRegressor, + ) - __all__ = ["ChempropModel"] + __all__ = [ + "ChempropClassifier", + "ChempropModel", + "ChempropNeuralFP", + "ChempropRegressor", + ] else: __all__ = [] From d041a2f6dbc2c086baf58cbf437a043b312800f9 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Tue, 23 Apr 2024 22:32:18 +0200 Subject: [PATCH 14/20] ignore no imports - false positive --- molpipeline/estimators/chemprop/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/molpipeline/estimators/chemprop/__init__.py b/molpipeline/estimators/chemprop/__init__.py index e23b26c4..e9a932e0 100644 --- a/molpipeline/estimators/chemprop/__init__.py +++ b/molpipeline/estimators/chemprop/__init__.py @@ -4,7 +4,7 @@ installed_packages = {pkg.name for pkg in pkgutil.iter_modules()} if "chemprop" in installed_packages: - from molpipeline.estimators.chemprop.models import ( + from molpipeline.estimators.chemprop.models import ( # noqa: F401 ChempropClassifier, ChempropModel, ChempropNeuralFP, From 0840a9607cc9eb8fba837a94cd576118dcfecdbc Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Tue, 23 Apr 2024 23:02:15 +0200 Subject: [PATCH 15/20] forward kwargs --- molpipeline/estimators/chemprop/models.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py index 5f301f08..7f2b546e 100644 --- a/molpipeline/estimators/chemprop/models.py +++ b/molpipeline/estimators/chemprop/models.py @@ -183,7 +183,7 @@ def __init__( lightning_trainer: pl.Trainer | None = None, batch_size: int = 64, n_jobs: int = 1, - **kwargs: Any, # pylint: disable=unused-argument + **kwargs: Any, ) -> None: """Initialize the chemprop classifier model. @@ -211,6 +211,7 @@ def __init__( lightning_trainer=lightning_trainer, batch_size=batch_size, n_jobs=n_jobs, + **kwargs, ) if not self._is_binary_classifier(): raise ValueError("ChempropClassifier should be a binary classifier.") @@ -243,7 +244,7 @@ def __init__( lightning_trainer: pl.Trainer | None = None, batch_size: int = 64, n_jobs: int = 1, - **kwargs: Any, # pylint: disable=unused-argument + **kwargs: Any, ) -> None: """Initialize the chemprop regressor model. @@ -271,4 +272,5 @@ def __init__( lightning_trainer=lightning_trainer, batch_size=batch_size, n_jobs=n_jobs, + **kwargs, ) From b66d5db562add89a31de598e3ecb71b3f85d35de Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Wed, 24 Apr 2024 00:50:45 +0200 Subject: [PATCH 16/20] adapt to new chemprop output --- molpipeline/estimators/chemprop/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py index 7f2b546e..6178b2e0 100644 --- a/molpipeline/estimators/chemprop/models.py +++ b/molpipeline/estimators/chemprop/models.py @@ -93,7 +93,8 @@ def _predict( self.model.eval() test_data = build_dataloader(X, num_workers=self.n_jobs, shuffle=False) predictions = self.lightning_trainer.predict(self.model, test_data) - prediction_array = np.array([pred.numpy() for pred in predictions[0]]) # type: ignore + prediction_array = np.vstack(predictions) # type: ignore + prediction_array = prediction_array.squeeze() # Check if the predictions have the same length as the input dataset if prediction_array.shape[0] != len(X): @@ -103,11 +104,10 @@ def _predict( # If the model is a binary classifier, return the probability of the positive class if self._is_binary_classifier(): - if prediction_array.shape[1] != 1: + if prediction_array.ndim != 1: raise ValueError( "Binary classification model should output a single probability." ) - prediction_array = prediction_array[:, 0] return prediction_array def predict( From de1367446e2f2491b22ffd9eb609a8a5d9be28e3 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Thu, 25 Apr 2024 16:56:42 +0200 Subject: [PATCH 17/20] implement tests for regession and classification --- .../test_chemprop/test_chemprop_pipeline.py | 162 +++++++++++++++++- 1 file changed, 161 insertions(+), 1 deletion(-) diff --git a/test_extras/test_chemprop/test_chemprop_pipeline.py b/test_extras/test_chemprop/test_chemprop_pipeline.py index 24cfc8f4..92fe69cc 100644 --- a/test_extras/test_chemprop/test_chemprop_pipeline.py +++ b/test_extras/test_chemprop/test_chemprop_pipeline.py @@ -1,9 +1,14 @@ """Test module for behavior of chemprop in a pipeline.""" import unittest +from io import BytesIO +from typing import TypeVar +import joblib import numpy as np +import pandas as pd from chemprop.nn.loss import LossFunction +from lightning import pytorch as pl from sklearn.base import clone from torch import nn @@ -15,7 +20,11 @@ BondMessagePassing, SumAggregation, ) -from molpipeline.estimators.chemprop.models import ChempropModel +from molpipeline.estimators.chemprop.models import ( + ChempropClassifier, + ChempropModel, + ChempropRegressor, +) from molpipeline.mol2any.mol2chemprop import MolToChemprop from molpipeline.pipeline import Pipeline from molpipeline.post_prediction import PostPredictionWrapper @@ -63,6 +72,100 @@ def get_model_pipeline() -> Pipeline: return model_pipeline +DEFAULT_TRAINER = pl.Trainer( + accelerator="cpu", + logger=False, + enable_checkpointing=False, + max_epochs=5, + enable_model_summary=False, + enable_progress_bar=False, + val_check_interval=0.0, +) + + +def get_regression_pipeline() -> Pipeline: + """Get the Chemprop model pipeline for regression. + + Returns + ------- + Pipeline + The Chemprop model pipeline for regression. + """ + + smiles2mol = SmilesToMol() + mol2chemprop = MolToChemprop() + error_filter = ErrorFilter(filter_everything=True) + filter_reinserter = FilterReinserter.from_error_filter( + error_filter, fill_value=np.nan + ) + chemprop_model = ChempropRegressor(lightning_trainer=DEFAULT_TRAINER) + model_pipeline = Pipeline( + steps=[ + ("smiles2mol", smiles2mol), + ("mol2chemprop", mol2chemprop), + ("error_filter", error_filter), + ("model", chemprop_model), + ("filter_reinserter", PostPredictionWrapper(filter_reinserter)), + ], + ) + return model_pipeline + + +def get_classification_pipeline() -> Pipeline: + """Get the Chemprop model pipeline for classification. + + Returns + ------- + Pipeline + The Chemprop model pipeline for classification. + """ + smiles2mol = SmilesToMol() + mol2chemprop = MolToChemprop() + error_filter = ErrorFilter(filter_everything=True) + filter_reinserter = FilterReinserter.from_error_filter( + error_filter, fill_value=np.nan + ) + chemprop_model = ChempropClassifier(lightning_trainer=DEFAULT_TRAINER) + model_pipeline = Pipeline( + steps=[ + ("smiles2mol", smiles2mol), + ("mol2chemprop", mol2chemprop), + ("error_filter", error_filter), + ("model", chemprop_model), + ("filter_reinserter", PostPredictionWrapper(filter_reinserter)), + ], + ) + return model_pipeline + + +_T = TypeVar("_T") + + +def joblib_dump_load(obj: _T) -> _T: + """Dump and load an object using joblib. + + Notes + ----- + The object is not dumped to disk but to a BytesIO object. + + Parameters + ---------- + obj : _T + The object to dump and load. + + Returns + ------- + _T + The loaded object. + """ + bytes_container = BytesIO() + joblib.dump(obj, bytes_container) + bytes_container.seek(0) # update to enable reading + bytes_model = bytes_container.read() + + return joblib.load(BytesIO(bytes_model)) + + class TestChempropPipeline(unittest.TestCase): """Test the Chemprop model pipeline.""" @@ -155,3 +258,60 @@ def test_error_handling(self) -> None: proba = pipeline.predict_proba(smiles) self.assertEqual(len(proba), 4) self.assertTrue(np.isnan(proba[-1]).all()) + + +class TestRegressionPipeline(unittest.TestCase): + """Test the Chemprop model pipeline for regression.""" + + def test_prediction(self) -> None: + """Test the prediction of the regression model.""" + + molecule_net_logd_df = pd.read_csv( + "https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/Lipophilicity.csv" + ).head(1000) + regression_model = get_regression_pipeline() + regression_model.fit( + molecule_net_logd_df["smiles"].tolist(), + molecule_net_logd_df["exp"].to_numpy(), + ) + pred = regression_model.predict(molecule_net_logd_df["smiles"].tolist()) + + self.assertEqual(len(pred), len(molecule_net_logd_df)) + + model_copy = joblib_dump_load(regression_model) + pred_copy = model_copy.predict(molecule_net_logd_df["smiles"].tolist()) + self.assertTrue(np.allclose(pred, pred_copy)) + + +class TestClassificationPipeline(unittest.TestCase): + """Test the Chemprop model pipeline for classification.""" + + def test_prediction(self) -> None: + """Test the prediction of the classification model.""" + + molecule_net_bbbp_df = pd.read_csv( + "https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/BBBP.csv" + ).head(1000) + classification_model = get_classification_pipeline() + classification_model.fit( + molecule_net_bbbp_df["smiles"].tolist(), + molecule_net_bbbp_df["p_np"].to_numpy(), + ) + pred = classification_model.predict(molecule_net_bbbp_df["smiles"].tolist()) + proba = classification_model.predict_proba( + molecule_net_bbbp_df["smiles"].tolist() + ) + self.assertEqual(len(pred), len(molecule_net_bbbp_df)) + self.assertEqual(proba.shape[1], 2) + self.assertEqual(proba.shape[0], len(molecule_net_bbbp_df)) + + model_copy = joblib_dump_load(classification_model) + pred_copy = model_copy.predict(molecule_net_bbbp_df["smiles"].tolist()) + proba_copy = model_copy.predict_proba(molecule_net_bbbp_df["smiles"].tolist()) + + nan_indices = np.isnan(pred) + self.assertListEqual(nan_indices.tolist(), np.isnan(pred_copy).tolist()) + self.assertTrue(np.allclose(pred[~nan_indices], pred_copy[~nan_indices])) + + self.assertEqual(proba.shape, proba_copy.shape) + self.assertTrue(np.allclose(proba[~nan_indices], proba_copy[~nan_indices])) From 11cdd8ed8713f7f0564895ad39356455b4d8c81a Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Thu, 25 Apr 2024 17:06:54 +0200 Subject: [PATCH 18/20] using try except -> pkgutil does not work with notebook --- molpipeline/estimators/chemprop/__init__.py | 7 ++----- molpipeline/mol2any/__init__.py | 7 +++---- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/molpipeline/estimators/chemprop/__init__.py b/molpipeline/estimators/chemprop/__init__.py index e9a932e0..656d1833 100644 --- a/molpipeline/estimators/chemprop/__init__.py +++ b/molpipeline/estimators/chemprop/__init__.py @@ -1,9 +1,6 @@ """Initialize Chemprop module.""" -import pkgutil - -installed_packages = {pkg.name for pkg in pkgutil.iter_modules()} -if "chemprop" in installed_packages: +try: from molpipeline.estimators.chemprop.models import ( # noqa: F401 ChempropClassifier, ChempropModel, @@ -17,5 +14,5 @@ "ChempropNeuralFP", "ChempropRegressor", ] -else: +except ImportError: __all__ = [] diff --git a/molpipeline/mol2any/__init__.py b/molpipeline/mol2any/__init__.py index 96509f73..da7bb760 100644 --- a/molpipeline/mol2any/__init__.py +++ b/molpipeline/mol2any/__init__.py @@ -1,7 +1,5 @@ """Init the module for mol2any pipeline elements.""" -import pkgutil - from molpipeline.mol2any.mol2bin import MolToBinary from molpipeline.mol2any.mol2concatinated_vector import MolToConcatenatedVector from molpipeline.mol2any.mol2inchi import MolToInchi, MolToInchiKey @@ -21,8 +19,9 @@ "MolToRDKitPhysChem", ] -installed_packages = {pkg.name for pkg in pkgutil.iter_modules()} -if "chemprop" in installed_packages: +try: from molpipeline.mol2any.mol2chemprop import MolToChemprop # noqa __all__.append("MolToChemprop") +except ImportError: + pass From 83cd8b5e161d41ab3009e8a2d0f3a72665e3f6e3 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Thu, 25 Apr 2024 17:37:30 +0200 Subject: [PATCH 19/20] removing unnecessary code --- molpipeline/estimators/chemprop/component_wrapper.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/molpipeline/estimators/chemprop/component_wrapper.py b/molpipeline/estimators/chemprop/component_wrapper.py index b2d41068..043c58c0 100644 --- a/molpipeline/estimators/chemprop/component_wrapper.py +++ b/molpipeline/estimators/chemprop/component_wrapper.py @@ -21,7 +21,6 @@ ) from chemprop.nn.transforms import UnscaleTransform from chemprop.nn.utils import Activation, get_activation_function -from chemprop.utils.registry import Factory from sklearn.base import BaseEstimator from torch import Tensor, nn @@ -165,13 +164,8 @@ def __init__( output_transform : UnscaleTransform or None, optional (default=None) Transformations to apply to the output. None defaults to UnscaleTransform. """ - if criterion is None: - task_weights = torch.ones(n_tasks) if task_weights is None else task_weights - criterion = Factory.build( - self._T_default_criterion, - task_weights=task_weights, - threshold=threshold, - ) + if task_weights is None: + task_weights = torch.ones(n_tasks) super().__init__( n_tasks=n_tasks, input_dim=input_dim, From c7a4bffd9cf3716ee5abf452a9cc2bba38b20bd7 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Thu, 25 Apr 2024 17:52:40 +0200 Subject: [PATCH 20/20] implement remaining predictors --- .../estimators/chemprop/component_wrapper.py | 82 +++++++++++++++++-- 1 file changed, 76 insertions(+), 6 deletions(-) diff --git a/molpipeline/estimators/chemprop/component_wrapper.py b/molpipeline/estimators/chemprop/component_wrapper.py index 043c58c0..f74c8824 100644 --- a/molpipeline/estimators/chemprop/component_wrapper.py +++ b/molpipeline/estimators/chemprop/component_wrapper.py @@ -10,12 +10,36 @@ from chemprop.nn.agg import MeanAggregation as _MeanAggregation from chemprop.nn.agg import SumAggregation as _SumAggregation from chemprop.nn.ffn import MLP -from chemprop.nn.loss import BCELoss, LossFunction, MSELoss +from chemprop.nn.loss import ( + BCELoss, + BinaryDirichletLoss, + CrossEntropyLoss, + EvidentialLoss, + LossFunction, + MSELoss, + MulticlassDirichletLoss, + MVELoss, + SIDLoss, +) from chemprop.nn.message_passing import BondMessagePassing as _BondMessagePassing from chemprop.nn.message_passing import MessagePassing -from chemprop.nn.metrics import BinaryAUROCMetric, Metric, MSEMetric +from chemprop.nn.metrics import ( + BinaryAUROCMetric, + CrossEntropyMetric, + Metric, + MSEMetric, + SIDMetric, +) from chemprop.nn.predictors import BinaryClassificationFFN as _BinaryClassificationFFN +from chemprop.nn.predictors import BinaryDirichletFFN as _BinaryDirichletFFN +from chemprop.nn.predictors import EvidentialFFN as _EvidentialFFN +from chemprop.nn.predictors import ( + MulticlassClassificationFFN as _MulticlassClassificationFFN, +) +from chemprop.nn.predictors import MulticlassDirichletFFN as _MulticlassDirichletFFN +from chemprop.nn.predictors import MveFFN as _MveFFN from chemprop.nn.predictors import RegressionFFN as _RegressionFFN +from chemprop.nn.predictors import SpectralFFN as _SpectralFFN from chemprop.nn.predictors import ( _FFNPredictorBase as _Predictor, # pylint: disable=protected-access ) @@ -265,6 +289,28 @@ def set_params(self, **params: Any) -> Self: return self +class RegressionFFN(PredictorWrapper, _RegressionFFN): # type: ignore + """A wrapper for the RegressionFFN class.""" + + n_targets: int = 1 + _T_default_criterion = MSELoss + _T_default_metric = MSEMetric + + +class MveFFN(PredictorWrapper, _MveFFN): # type: ignore + """A wrapper for the MveFFN class.""" + + n_targets: int = 2 + _T_default_criterion = MVELoss + + +class EvidentialFFN(PredictorWrapper, _EvidentialFFN): # type: ignore + """A wrapper for the EvidentialFFN class.""" + + n_targets: int = 4 + _T_default_criterion = EvidentialLoss + + class BinaryClassificationFFN(PredictorWrapper, _BinaryClassificationFFN): # type: ignore """A wrapper for the BinaryClassificationFFN class.""" @@ -273,12 +319,36 @@ class BinaryClassificationFFN(PredictorWrapper, _BinaryClassificationFFN): # ty _T_default_metric = BinaryAUROCMetric -class RegressionFFN(PredictorWrapper, _RegressionFFN): # type: ignore - """A wrapper for the RegressionFFN class.""" +class BinaryDirichletFFN(PredictorWrapper, _BinaryDirichletFFN): # type: ignore + """A wrapper for the BinaryDirichletFFN class.""" + + n_targets: int = 2 + _T_default_criterion = BinaryDirichletLoss + _T_default_metric = BinaryAUROCMetric + + +class MulticlassClassificationFFN(PredictorWrapper, _MulticlassClassificationFFN): # type: ignore + """A wrapper for the MulticlassClassificationFFN class.""" n_targets: int = 1 - _T_default_criterion = MSELoss - _T_default_metric = MSEMetric + _T_default_criterion = CrossEntropyLoss + _T_default_metric = CrossEntropyMetric + + +class MulticlassDirichletFFN(PredictorWrapper, _MulticlassDirichletFFN): # type: ignore + """A wrapper for the MulticlassDirichletFFN class.""" + + n_targets: int = 1 + _T_default_criterion = MulticlassDirichletLoss + _T_default_metric = CrossEntropyMetric + + +class SpectralFFN(PredictorWrapper, _SpectralFFN): # type: ignore + """A wrapper for the SpectralFFN class.""" + + n_targets: int = 1 + _T_default_criterion = SIDLoss + _T_default_metric = SIDMetric class MPNN(_MPNN, BaseEstimator):