From 18a2543ee0575202a1c51627a96f2637e2ba6981 Mon Sep 17 00:00:00 2001
From: Jochen Sieg
Date: Mon, 9 Sep 2024 13:04:00 +0200
Subject: [PATCH 1/6] mol2morgan_fingerprint: remove deprecated fp function
- Remove AllChem.GetMorganFingerprintAsBitVect
from the code because it is deprecated.
- Add a test to ensure the bit2atom mapping works
as intended.
---
molpipeline/mol2any/mol2morgan_fingerprint.py | 13 ++++++------
.../test_mol2morgan_fingerprint.py | 21 +++++++++++++++++++
2 files changed, 27 insertions(+), 7 deletions(-)
diff --git a/molpipeline/mol2any/mol2morgan_fingerprint.py b/molpipeline/mol2any/mol2morgan_fingerprint.py
index 1c93295d..79fa46c1 100644
--- a/molpipeline/mol2any/mol2morgan_fingerprint.py
+++ b/molpipeline/mol2any/mol2morgan_fingerprint.py
@@ -151,12 +151,11 @@ def _explain_rdmol(self, mol_obj: RDKitMol) -> dict[int, list[tuple[int, int]]]:
dict[int, list[tuple[int, int]]]
Dictionary with bit position as key and list of tuples with atom index and radius as value.
"""
- bit_info: dict[int, list[tuple[int, int]]] = {}
- _ = AllChem.GetMorganFingerprintAsBitVect(
- mol_obj,
- self.radius,
- useFeatures=self._use_features,
- bitInfo=bit_info,
- nBits=self._n_bits,
+ fp_generator = self._get_fp_generator()
+ additional_output = AllChem.AdditionalOutput()
+ additional_output.AllocateBitInfoMap()
+ _ = fp_generator.GetSparseFingerprint(
+ mol_obj, additionalOutput=additional_output
)
+ bit_info = additional_output.GetBitInfoMap()
return bit_info
diff --git a/tests/test_elements/test_mol2any/test_mol2morgan_fingerprint.py b/tests/test_elements/test_mol2any/test_mol2morgan_fingerprint.py
index 3a5e94a9..14ff2282 100644
--- a/tests/test_elements/test_mol2any/test_mol2morgan_fingerprint.py
+++ b/tests/test_elements/test_mol2any/test_mol2morgan_fingerprint.py
@@ -128,6 +128,27 @@ def test_setter_getter_error_handling(self) -> None:
}
self.assertRaises(ValueError, mol_fp.set_params, **params)
+ def test_bit2atom_mapping(self) -> None:
+ """Test that the mapping from bits to atom weights works as intended."""
+ # lower n_bit values, e.g. 2048, will lead to a bit clash during folding,
+ # for the test smiles "NCCOCCCC(=O)O".
+ # We want no folding clashes in this test to check the correct length
+ # of the bit-to-atom mapping.
+ n_bits = 2100
+ sparse_morgan = MolToMorganFP(radius=2, n_bits=n_bits, return_as="sparse")
+ dense_morgan = MolToMorganFP(radius=2, n_bits=n_bits, return_as="dense")
+ explicit_bit_vect_morgan = MolToMorganFP(
+ radius=2, n_bits=n_bits, return_as="explicit_bit_vect"
+ )
+
+ smi2mol = SmilesToMol()
+ for test_smi in test_smiles:
+ for fp_gen in [sparse_morgan, dense_morgan, explicit_bit_vect_morgan]:
+ mol = smi2mol.transform([test_smi])[0]
+ fp = fp_gen.transform([mol])
+ mapping = fp_gen.bit2atom_mapping(mol)
+ self.assertEqual(np.sum(fp), len(mapping)) # type: ignore
+
if __name__ == "__main__":
unittest.main()
From 91152e188016e5da4129ed5d2806c654cea60031 Mon Sep 17 00:00:00 2001
From: frederik-sandfort1
<129401811+frederik-sandfort1@users.noreply.github.com>
Date: Tue, 10 Sep 2024 13:23:48 +0200
Subject: [PATCH 2/6] Error handling sanitize bug (#85)
* fix molsanitize exception error catching
* linting
* isort on other stuff
---
molpipeline/estimators/chemprop/models.py | 6 +--
molpipeline/pipeline/_molpipeline.py | 2 +-
.../test_chemprop/test_chemprop_pipeline.py | 2 +-
test_extras/test_chemprop/test_models.py | 4 +-
tests/test_elements/test_error_handling.py | 46 ++++++++++++++++++-
5 files changed, 51 insertions(+), 9 deletions(-)
diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py
index b94bcb02..e720e029 100644
--- a/molpipeline/estimators/chemprop/models.py
+++ b/molpipeline/estimators/chemprop/models.py
@@ -15,9 +15,7 @@
try:
from chemprop.data import MoleculeDataset, build_dataloader
- from chemprop.nn.predictors import (
- BinaryClassificationFFNBase,
- )
+ from chemprop.nn.predictors import BinaryClassificationFFNBase
from lightning import pytorch as pl
except ImportError as error:
logger.error(
@@ -31,9 +29,9 @@
MPNN,
BinaryClassificationFFN,
BondMessagePassing,
+ MulticlassClassificationFFN,
RegressionFFN,
SumAggregation,
- MulticlassClassificationFFN,
)
from molpipeline.estimators.chemprop.neural_fingerprint import ChempropNeuralFP
diff --git a/molpipeline/pipeline/_molpipeline.py b/molpipeline/pipeline/_molpipeline.py
index 3ddb7c9b..8ff43eb5 100644
--- a/molpipeline/pipeline/_molpipeline.py
+++ b/molpipeline/pipeline/_molpipeline.py
@@ -349,7 +349,7 @@ def transform_single(self, input_value: Any) -> Any:
elif isinstance(p_element, FilterReinserter):
iter_value = p_element.transform_single(iter_value)
except MolSanitizeException as err:
- return InvalidInstance(
+ iter_value = InvalidInstance(
p_element.uuid,
f"RDKit MolSanitizeException: {err.args}",
p_element.name,
diff --git a/test_extras/test_chemprop/test_chemprop_pipeline.py b/test_extras/test_chemprop/test_chemprop_pipeline.py
index 646ac99c..32c4e677 100644
--- a/test_extras/test_chemprop/test_chemprop_pipeline.py
+++ b/test_extras/test_chemprop/test_chemprop_pipeline.py
@@ -22,8 +22,8 @@
from molpipeline.estimators.chemprop.models import (
ChempropClassifier,
ChempropModel,
- ChempropRegressor,
ChempropMulticlassClassifier,
+ ChempropRegressor,
)
from molpipeline.mol2any.mol2chemprop import MolToChemprop
from molpipeline.pipeline import Pipeline
diff --git a/test_extras/test_chemprop/test_models.py b/test_extras/test_chemprop/test_models.py
index 9afaf111..57a434b4 100644
--- a/test_extras/test_chemprop/test_models.py
+++ b/test_extras/test_chemprop/test_models.py
@@ -28,10 +28,10 @@
# pylint: disable=relative-beyond-top-level
from test_extras.test_chemprop.chemprop_test_utils.compare_models import compare_params
from test_extras.test_chemprop.chemprop_test_utils.constant_vars import (
- NO_IDENTITY_CHECK,
- DEFAULT_SET_PARAMS,
DEFAULT_BINARY_CLASSIFICATION_PARAMS,
DEFAULT_MULTICLASS_CLASSIFICATION_PARAMS,
+ DEFAULT_SET_PARAMS,
+ NO_IDENTITY_CHECK,
)
from test_extras.test_chemprop.chemprop_test_utils.default_models import (
get_chemprop_model_binary_classification_mpnn,
diff --git a/tests/test_elements/test_error_handling.py b/tests/test_elements/test_error_handling.py
index 535b256a..dd134f48 100644
--- a/tests/test_elements/test_error_handling.py
+++ b/tests/test_elements/test_error_handling.py
@@ -4,12 +4,16 @@
from typing import Any
import numpy as np
-from rdkit import RDLogger
+from rdkit import Chem, RDLogger
+from rdkit.Chem.rdchem import MolSanitizeException
from sklearn.base import clone
from molpipeline import ErrorFilter, FilterReinserter, Pipeline, PostPredictionWrapper
+from molpipeline.abstract_pipeline_elements.core import MolToMolPipelineElement
from molpipeline.any2mol import SmilesToMol
+from molpipeline.any2mol.auto2mol import AutoToMol
from molpipeline.mol2any import MolToMorganFP, MolToRDKitPhysChem, MolToSmiles
+from molpipeline.utils.molpipeline_types import OptionalMol, RDKitMol
from tests.utils.mock_element import MockTransformingPipelineElement
rdlog = RDLogger.logger()
@@ -247,3 +251,43 @@ def test_replace_mixed_datatypes_expected_failures(self) -> None:
self.assertRaises(ValueError, pipeline.fit, test_values)
self.assertRaises(ValueError, pipeline.transform, test_values)
self.assertRaises(ValueError, pipeline2.fit_transform, test_values)
+
+ def test_molsanitize_error(self) -> None:
+ """Test if MolSanitizeException is caught and catched by ErrorFilter."""
+
+ class DummyMolSanitizeExc(MolToMolPipelineElement):
+ """MolToMolPipelineElement with dummy molsanitize exception."""
+
+ def pretransform_single(self, value: RDKitMol) -> OptionalMol:
+ """Dummy Mol.
+
+ Parameters
+ ----------
+ value: RDKitMol
+ Molecule.
+
+ Returns
+ -------
+ OptionalMol
+ Molecule.
+ """
+ if Chem.MolToSmiles(value) == "c1ccccc1":
+ raise MolSanitizeException("This is a dummy exception.")
+ return value
+
+ pipeline = Pipeline(
+ [
+ ("autotosmiles", AutoToMol()),
+ ("atomneutralizer", DummyMolSanitizeExc()),
+ ("moltosmiles", MolToSmiles()),
+ ("errorfilter", error_filter := ErrorFilter()),
+ (
+ "filterreinserter",
+ FilterReinserter.from_error_filter(error_filter, None),
+ ),
+ ],
+ n_jobs=-1,
+ )
+
+ result = pipeline.transform(["c1ccccc1", "CCCCCCC", "c1cc"])
+ self.assertEqual(result, [None, "CCCCCCC", None])
From 85cacba32450c39a8cf57e26cdfbc78aaa28ba5f Mon Sep 17 00:00:00 2001
From: frederik-sandfort1
<129401811+frederik-sandfort1@users.noreply.github.com>
Date: Tue, 10 Sep 2024 16:00:29 +0200
Subject: [PATCH 3/6] Inchitomol (#86)
* fix molsanitize exception error catching
* linting
* isort on other stuff
* add inchitomol element
---
.../any2mol/string2mol.py | 64 ++++++++++++++++++-
molpipeline/any2mol/__init__.py | 2 +
molpipeline/any2mol/auto2mol.py | 4 +-
molpipeline/any2mol/inchi2mol.py | 27 ++++++++
molpipeline/any2mol/smiles2mol.py | 36 ++---------
.../test_any2mol/test_auto2mol.py | 28 ++++++++
6 files changed, 128 insertions(+), 33 deletions(-)
create mode 100644 molpipeline/any2mol/inchi2mol.py
diff --git a/molpipeline/abstract_pipeline_elements/any2mol/string2mol.py b/molpipeline/abstract_pipeline_elements/any2mol/string2mol.py
index cc1f5c53..9bd6ac75 100644
--- a/molpipeline/abstract_pipeline_elements/any2mol/string2mol.py
+++ b/molpipeline/abstract_pipeline_elements/any2mol/string2mol.py
@@ -4,8 +4,11 @@
import abc
-from molpipeline.abstract_pipeline_elements.core import AnyToMolPipelineElement
-from molpipeline.utils.molpipeline_types import OptionalMol
+from molpipeline.abstract_pipeline_elements.core import (
+ AnyToMolPipelineElement,
+ InvalidInstance,
+)
+from molpipeline.utils.molpipeline_types import OptionalMol, RDKitMol
class StringToMolPipelineElement(AnyToMolPipelineElement, abc.ABC):
@@ -43,3 +46,60 @@ def pretransform_single(self, value: str) -> OptionalMol:
OptionalMol
RDKit molecule if representation was valid, else InvalidInstance.
"""
+
+
+class SimpleStringToMolElement(StringToMolPipelineElement, abc.ABC):
+ """Transforms string representation to RDKit Mol objects."""
+
+ def pretransform_single(self, value: str) -> OptionalMol:
+ """Transform string to molecule.
+
+ Parameters
+ ----------
+ value: str
+ string representation.
+
+ Returns
+ -------
+ OptionalMol
+ Rdkit molecule if valid string representation, else None.
+ """
+ if value is None:
+ return InvalidInstance(
+ self.uuid,
+ f"Invalid representation: {value}",
+ self.name,
+ )
+
+ if not isinstance(value, str):
+ return InvalidInstance(
+ self.uuid,
+ f"Not a string: {value}",
+ self.name,
+ )
+
+ mol: RDKitMol = self.string_to_mol(value)
+
+ if not mol:
+ return InvalidInstance(
+ self.uuid,
+ f"Invalid representation: {value}",
+ self.name,
+ )
+ mol.SetProp("identifier", value)
+ return mol
+
+ @abc.abstractmethod
+ def string_to_mol(self, value: str) -> RDKitMol:
+ """Transform string representation to molecule.
+
+ Parameters
+ ----------
+ value: str
+ string representation
+
+ Returns
+ -------
+ RDKitMol
+ Rdkit molecule if valid representation, else None.
+ """
diff --git a/molpipeline/any2mol/__init__.py b/molpipeline/any2mol/__init__.py
index 5b8b2da3..c4dabadd 100644
--- a/molpipeline/any2mol/__init__.py
+++ b/molpipeline/any2mol/__init__.py
@@ -2,6 +2,7 @@
from molpipeline.any2mol.auto2mol import AutoToMol
from molpipeline.any2mol.bin2mol import BinaryToMol
+from molpipeline.any2mol.inchi2mol import InchiToMol
from molpipeline.any2mol.sdf2mol import SDFToMol
from molpipeline.any2mol.smiles2mol import SmilesToMol
@@ -9,5 +10,6 @@
"AutoToMol",
"BinaryToMol",
"SmilesToMol",
+ "InchiToMol",
"SDFToMol",
]
diff --git a/molpipeline/any2mol/auto2mol.py b/molpipeline/any2mol/auto2mol.py
index b33ee2d8..925b7c95 100644
--- a/molpipeline/any2mol/auto2mol.py
+++ b/molpipeline/any2mol/auto2mol.py
@@ -9,6 +9,7 @@
InvalidInstance,
)
from molpipeline.any2mol.bin2mol import BinaryToMol
+from molpipeline.any2mol.inchi2mol import InchiToMol
from molpipeline.any2mol.sdf2mol import SDFToMol
from molpipeline.any2mol.smiles2mol import SmilesToMol
from molpipeline.utils.molpipeline_types import OptionalMol, RDKitMol
@@ -29,6 +30,7 @@ def __init__(
uuid: Optional[str] = None,
elements: tuple[AnyToMolPipelineElement, ...] = (
SmilesToMol(),
+ InchiToMol(),
BinaryToMol(),
SDFToMol(),
),
@@ -44,7 +46,7 @@ def __init__(
uuid: str, optional (default=None)
Unique identifier of PipelineElement.
elements: tuple[AnyToMol, ...], optional (default=(SmilesToMol(),
- BinaryToMol(), SDFToMol()))
+ InchiToMol(), BinaryToMol(), SDFToMol()))
Elements to try to transform the input to a molecule.
"""
super().__init__(name=name, n_jobs=n_jobs, uuid=uuid)
diff --git a/molpipeline/any2mol/inchi2mol.py b/molpipeline/any2mol/inchi2mol.py
new file mode 100644
index 00000000..4c881843
--- /dev/null
+++ b/molpipeline/any2mol/inchi2mol.py
@@ -0,0 +1,27 @@
+"""Classes ment to transform given inchi to a RDKit molecule."""
+
+from rdkit import Chem
+
+from molpipeline.abstract_pipeline_elements.any2mol.string2mol import (
+ SimpleStringToMolElement,
+)
+from molpipeline.utils.molpipeline_types import RDKitMol
+
+
+class InchiToMol(SimpleStringToMolElement):
+ """Transforms Inchi to RDKit Mol objects."""
+
+ def string_to_mol(self, value: str) -> RDKitMol:
+ """Transform Inchi string to molecule.
+
+ Parameters
+ ----------
+ value: str
+ Inchi string.
+
+ Returns
+ -------
+ RDKitMol
+ Rdkit molecule if valid Inchi, else None.
+ """
+ return Chem.MolFromInchi(value)
diff --git a/molpipeline/any2mol/smiles2mol.py b/molpipeline/any2mol/smiles2mol.py
index 79db23bd..0d7c45e6 100644
--- a/molpipeline/any2mol/smiles2mol.py
+++ b/molpipeline/any2mol/smiles2mol.py
@@ -5,16 +5,15 @@
from rdkit import Chem
from molpipeline.abstract_pipeline_elements.any2mol.string2mol import (
- StringToMolPipelineElement as _StringToMolPipelineElement,
+ SimpleStringToMolElement,
)
-from molpipeline.abstract_pipeline_elements.core import InvalidInstance
-from molpipeline.utils.molpipeline_types import OptionalMol, RDKitMol
+from molpipeline.utils.molpipeline_types import RDKitMol
-class SmilesToMol(_StringToMolPipelineElement):
+class SmilesToMol(SimpleStringToMolElement):
"""Transforms Smiles to RDKit Mol objects."""
- def pretransform_single(self, value: str) -> OptionalMol:
+ def string_to_mol(self, value: str) -> RDKitMol:
"""Transform Smiles string to molecule.
Parameters
@@ -24,30 +23,7 @@ def pretransform_single(self, value: str) -> OptionalMol:
Returns
-------
- OptionalMol
+ RDKitMol
Rdkit molecule if valid SMILES, else None.
"""
- if value is None:
- return InvalidInstance(
- self.uuid,
- f"Invalid SMILES: {value}",
- self.name,
- )
-
- if not isinstance(value, str):
- return InvalidInstance(
- self.uuid,
- f"Not a string: {value}",
- self.name,
- )
-
- mol: RDKitMol = Chem.MolFromSmiles(value)
-
- if not mol:
- return InvalidInstance(
- self.uuid,
- f"Invalid SMILES: {value}",
- self.name,
- )
- mol.SetProp("identifier", value)
- return mol
+ return Chem.MolFromSmiles(value)
diff --git a/tests/test_elements/test_any2mol/test_auto2mol.py b/tests/test_elements/test_any2mol/test_auto2mol.py
index 06726a24..9cbad60b 100644
--- a/tests/test_elements/test_any2mol/test_auto2mol.py
+++ b/tests/test_elements/test_any2mol/test_auto2mol.py
@@ -17,6 +17,9 @@
SMILES_CL_BR = "NC(Cl)(Br)C(=O)O"
SMILES_METAL_AU = "OC[C@H]1OC(S[Au])[C@H](O)[C@@H](O)[C@@H]1O"
+INCHI_BENZENE = "InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"
+INCHI_CHLOROBENZENE = "InChI=1S/C6H5Cl/c7-6-4-2-1-3-5-6/h1-5H"
+
# SDF
with gzip.open(TEST_DATA_DIR / "P86_B_400.sdf.gz") as file:
SDF_P86_B_400 = file.read()
@@ -82,6 +85,31 @@ def test_auto2mol_for_smiles(self) -> None:
)
del log_block
+ def test_auto2mol_for_inchi(self) -> None:
+ """Test molecules can be read from inchi automatically."""
+
+ test_inchis = [INCHI_BENZENE, INCHI_CHLOROBENZENE]
+ expected_mols = [MOL_BENZENE, MOL_CHLOROBENZENE]
+
+ pipeline = Pipeline(
+ [
+ (
+ "Auto2Mol",
+ AutoToMol(),
+ ),
+ ]
+ )
+ log_block = rdBase.BlockLogs()
+ actual_mols = pipeline.fit_transform(test_inchis)
+ self.assertEqual(len(test_inchis), len(actual_mols))
+ self.assertTrue(
+ all(
+ Chem.MolToInchi(smiles_mol) == Chem.MolToInchi(original_mol)
+ for smiles_mol, original_mol in zip(actual_mols, expected_mols)
+ )
+ )
+ del log_block
+
def test_auto2mol_for_sdf(self) -> None:
"""Test molecules can be read from sdf automatically."""
From 1f83de3df4ffe7547252a60936f29bf4eb5f5254 Mon Sep 17 00:00:00 2001
From: "Christian W. Feldmann"
<128160984+c-w-feldmann@users.noreply.github.com>
Date: Tue, 10 Sep 2024 16:34:39 +0200
Subject: [PATCH 4/6] Add `--check-only` flag to isort (#88)
---
.github/workflows/linting.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index 5ff97d5f..8ff938f3 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -150,7 +150,7 @@ jobs:
pip install isort
- name: Analysing the code with isort
run: |
- isort --profile black .
+ isort --profile black --check-only .
test_basis:
needs:
From 4bd1dfbd54309d56d1c18b1faad2b50cb9867827 Mon Sep 17 00:00:00 2001
From: JochenSiegWork <135010976+JochenSiegWork@users.noreply.github.com>
Date: Wed, 18 Sep 2024 10:28:37 +0200
Subject: [PATCH 5/6] docu: update readme, add image, add feature calc notebook
(#90)
* docu: update readme, add image, add feature calc notebook
* readme: add feature calculation example
* readme: add published Molpipeline paper link
* notebooks: add header to feature calculation
---
.github/molpipeline.png | Bin 0 -> 36910 bytes
README.md | 83 ++-
notebooks/04_feature_calculation.ipynb | 915 +++++++++++++++++++++++++
3 files changed, 984 insertions(+), 14 deletions(-)
create mode 100755 .github/molpipeline.png
create mode 100644 notebooks/04_feature_calculation.ipynb
diff --git a/.github/molpipeline.png b/.github/molpipeline.png
new file mode 100755
index 0000000000000000000000000000000000000000..fc6129a282fd993069d0b2110d9ea607778e41f7
GIT binary patch
literal 36910
zcmeFYc{r4P`#-K5H{B9iNJ3I+v9ywPvbUkK?{{Py#%?CtC@Bg_iXvMo3`SxMhEb_h
zmQjph#>^zkU<@*5h8bgim+F4L_w&0w&+q%!?|U5I<8vKHM~Att>wR74_Bzk=b-u4#
z=dCRz|JeSAkdTn%=~HGGg@ii
zCwuy2O3H6u&6%>ts!UGG7)$OlzFU+cx!qV)>-%lFwA8OjPQ?d8r5&aaDBc2&&gj36@1<$fbQD9FjicRcT
zef7HLs&AzSDRwobPfX3G4_O|dxO~5TTg_1fI0+nAwTCBeCl#cmuvX5Rzd8Tw#}7;4
z?*x@)nkzCgVcmfTu|>Ono)9v7kGwtQr7R;u&o5uRcl+0`((AQY_FBG?&k$bzy;>94Lg3l
z)%`6|hjCSNDJiLC>__FlemsA%??!X;oTZtW$zRIdc0q1_{tckR^~S%R?Pccvp+h3U
z%xt&Fsw)9UicFM>me#V2Os4dfpTF$1`2Tw$44_dLk7djB)LhR+;#(x04Ay^)8{jOmkjFriLP@`~aEzbt3bU@x(9Z
z=lH9hq&oa1rd`;X$g8`)+`Cd6fo6Tn(2n_<74oh0w@%R$G*+6Hg@jIuJ$YjyulfE7
zLT&4>R}vD^lin)(DfFTA<6Azbq~!q?q!Dgiph^)c<+K|2wqk0${qw
zh3~}Mn2UmEPxP)yj=tyV^frf4dOruNDRke|CqQ;`A6g
z0}nLU&j2;9Mqr(izF$3atXXV}rn!WN#umoJ$%Q>zHMVx^-`H53|JB(^0=Vd1c%-?B
z&D;4vIoG7stWx#*_c`aDvOAxBzI4+y${72Cb|;f>BDGIBqh9m}_lf~e^;!&nw+*Xq
z{??GVG%vB5c1|81sL`p9@RbOHgkE;_Rx{I9cZ6JZs%w-auA~F!{SQN%?b|Pzska{8
zs_O}4`ww#Z{qN(tgJ=>^f}^Wfh&5)Y+n0G`er`9ge%uS0qN&Byl<)q%r-7I`X*9r?
zmEDE#4n0S7|2A7a;4mQGb8g+l{8$H>%c7Gdb%wsy2a~7jA*8*9i}Ifo^`X1zhqW-%
zN6_J8X9l&3yJFuOtcrH2Cgggfs8WsLveRnn5>J&pQujJ~Y5?=fzQSRTa}KTrIy1Dt
zwniEn71dQuo2saW1`?1_Vmm^wPCKSUXQ&RslALIX<=o?eLu>|V?54U4@0k-iIDgtN
zU3U5i4Zy0C+om6aZ5}(c4~_2rgx%M|CI+63=)O8|i?}nfD^CX_si56(UCPJ_?qMr`
z40s3Wf)>heA+%0tn$osyciX=|A=d@;V!P$eNkoyn4&)y^fcR~*l;J|pkY%hk;zn_H
zZUx*iV4W6l=dlb!XBWl5EdAr^S1z;%ng!)UulZb;@1tqsk*kQba|2s?cT}^LTT4vT
zkzKZmwXTzki&wH=-;}aT+s+$@eRd_P|EYfVNXtWc>b`L?;1++SrFZ4=dHJ*|{gi>n
zP;8K`OF1sY{PzvOYDuqOmYN^c#(1AmEbVwo-doM}7<;s}WK3Z@JCtjrNOQNg24Y=>;DtB!f>XP>$Mvm;tw#-AC#KR8~S-=uP$J$8;5FiY}rj=?u9)67^)R;v*+
z-hlEVAMSsieqdxr5ieZXc5=YKXX0*&uGhJ;3UbG?C|d*OYVZ+&@n%-mb|We@@JbV_(xn&QshW36OBu6`
zhuZ?~UlcZK+U_0uwPF5chh{~^!Q+ViYGZDq4v!D$X`_P%FFLw{?=xkA8}Exm1wCl@
zGE*;{V&<*pm2_|6nW;O~+U_NKO6{XScuY;5LG9Nrx#ak+X_vZzoq79n!y;q6<9FrN
zZ2$AMW&bAcjz={Ra+AGMhW8hxsi)wIMXjwR#ztHAvzJa**t4Gfyk`Vqu<`HrD1b1L$p%X8&k-8Ow~yLZ1c^L5aKeH!9|}abkpG4#w+E|O@UPco7H*~AsTLL%p1-mq
zk9}3m-t(RR`TU%B?-2;C)hxW-RMBxPY@19+DR=q#>QhHn6)f9cwz|9>&8MaYF?Dv;
zxbM%m+eh!PcspNi)Vi9gdrnLZ9LRWg7y&+;{yRR}Ij9oBUCqKphx@|pm6By@;t;ry
z)l_}#j$ZvI@31rk@nVT_l!vTB1p@oS#{m34P`
zdE4W2e&||7xw0XO4x9rX@9*hQ_P2#c)>t@A>_ESwYc-Y&{&7cxPe*nWE_mvk`KpF?
zM8!R5AK7M)dS-Ebs##sz!9Mv)JhP;vY3ul;}i49%V)^Vd;}IHFb!RxUx*_x)viTm?~e*RR9Z-pASlZ)9N)
zDgRugr`
z9>M-MH)%!~{`G7x2mA3+dxp0iK6|?)g86d5$(|K$AN$v{-&P_zKUNU`%^R5r_FvCV
zakhVy<%r7oyXyZ(a6t9{WAXoSb;QpY_iE9HPKUapgRO=d!b&jHFFU4&fI28H4~f)l
z++k++w&mmZ-(|e*{sRWQB?t4^j9@;O6+HIo!Th#d{Ky@B&Bi~;jS;18QoD9R>vx1m
z*C^WY_N<98(3c}4Bf$(JYQABx5*mi+zXi0)YV8VUL27vHhczQRzta5#!rONrkbid(
zQi03&{Jb3gA1og5WIdS&fsOictTxhSb=Pk**|hb`Us
zZFsp+n~KK#l{WYW89Z(9KM0}car3_PmWmecR0VmR*p8hEj@$7T9uP{iGg3;XqlCMB
zv#xAJl6Tnb7~b!{uh9afmvR-?kM&o%j>X8->_Ffkr&3Q)UMQQIRdYAQu7$ldS@!K6
zcQ++I31E&h0+G$Ws7ChmWG|SXG7y8OX+Mot4jIT$@j^`-@_LA){Umex$CQ{FC6teHPHn%FiW!KrOyawu}>;wi{4Um
zG$Vq5M4KqZOcaNclGqI+ce3b~Z~H5ZYE`;;Kg?BOU+DQyKTYn@bh@p#&x;3^qiH8P
z@$cN|f1`m}bl`t*2A{B7Z)(I
z&jq!cU*I{@urqw*jW5CPr1)kX7O0*5)(7lCVKem=N17cEIwCvyoK3E!F*s9~NO5p$
z1!MW$Q-%%%q|}S;Ex1w%{&*JCVayViajoDs-#mu<;a-*Y@z9okq;G0vV1K3ai;WUB
z(?ZjZ^E?d4^Ax{4
zG3{|k4-dH^%O-A-^=Wl-Xm2t=jZ{pD?W%Y9|CClfP
zlB{`*zy+RS8mtDxTNk_^;h(|SfnGzW!`*kODt`SBVhYz@p*
ztXhE7y^Eyh8(dPlci1yZW2#MEE{iwCErkyzTYnjss!ydIF&wCznA@4*y>FLxHK5wM
ze3SElHOCy+RBajx{pP5jk~YHd%5yGke}-#Dhru6pL~A80h)s_p>FK#6#1_8YY{aS5
z4aRt`zI~oegSzUi+Xa;Cb3*?+0y!xhDA!JLuEOr2j7Qr-a%p10%66O*1Ng+mId(7d<`+J-#_C(Uf*Jyv5l#T;RT`DdtxSeTsO
z5Ky4vA2fVYjUubu7AZP@Q&Qn?%UKsl9ZO{oLhzBY<~GdNZZRoicEFOkv>yP_zxJx{
z)m6&e?>jf#m~fvSKEggKz--YIN!Qr%;zET_E>1H<Rt1}=a0Ey9srii}
z=Hh>?-=N(hs6X31lJmWz8Ui*Y`l9ht%zbYL#sl|=KtO;o
zFPiksyYw-#p%Pe^1SRMfIscTJ@>O(W{y?|DHPKC{MAVJv0*8o$(Jct*0Jfhpbxu5&
zY~8Sj0=7ft*qijQZCh|EdqgzQ<|4)B-Lai*Bq$L$SG&kva&qxY+
zQ)OWv>-@Ku!~oq|qm`*k-%y`EZ07#|77FXAs_gU4l&;fFZ8#C~z#$LO*a>wJ*Xt$C
zLT#)*7NSz8;y3PB@n1rDTT__0sHmthquZa9+@AEOr;MGC5B^&MYFpc@J9nx0M<89%
z*8P1Oi65I**FPh!SIZ303G2uYo~UC5FXSpWlv^snYE9c-LZ(-}FsUf4!MAIBqq`}X
zX+Sc4)W^+>Vol-0^HZFZ@P0Z^&3CNY^NC~po&T(%@j8YV^H6z{S3md@eYc(s$w(wW
zH@sH(I#RHRLUM|{-Dh8p1$zztL5LmQA3PmPzzlvq%x_Au2Owg-@km(jbE|OD&A#7~
z#yWC3SUQLn4%j`#NfS>49aFi>;!Z>?p=7J)S|j2(&H8#ZEwac&H5
z2B<4t({;EhTLXrBzR_-l?3aLSd1VEfPa+u_YUIe2{M5JsQSK;EL*6CnJZ0~>%ODVt
z?xeXKW7(4f%W<9g$=fofY5|zJqq7$%X6y_I6r}T=Q71cLOXsSAb}{@gPk`cB=poE>7tgHl?B=8*+0jqrdxS7
zdL%0fzldb0yVYkKf)CsE6z&DXDo
zEqU7p`-8uK_RQauNQ{}shH8DgrWhT#?lwXrN!Dz0BG|7fhWxOLw^VTX{8W$bw%4c(WnJIrwUJkh3Y@*$@_Wt$7Cdi{QteY<4KiBkXMRj~3TT@lm^
zrlR9AMMIzQUQ{WD5;C+Icjp6PbU5B-W5ab&(=(*9Y$t*p9pgt$(`VmloU83eF9*Nj
zUvd&t%HNVmWIUdt71!y315>T^jU7?PE2Pxc8_R(&jrQ@SFxT+3hHG+IqeezcOo0tA
zmIFWN##v{H$;8EXS&=y}-0r}}ak!?MYB&1t)_SBU<;bA$)MkygyJ4LBk#PI5N~-3>nGSo~3b+HXcVYAN;!p%lGTRdl|u_pF%K$MBPoL
zrCD>46Ky#S*f#4kCJqt~6>aA+=2)J2yPbzXQKy&e_{nwjmw4oQi0j$l^o+4oSFYMv8wCt0{$x550J^00p1CRo+AZ
zl6x*k<2%r{yPkJFH@;Q6+PT&ju9&|g(HcY^d!G=QMF)LP8!czxFFk#fwj4*HyUluH
zKc~Y-^mIr%FL39-ScGjv@}KgmJMKibBv*!yfcR1F3a;G)Iy#tZu<8gyEqxXFt>9~A
zp4I8%s6%y}54J;#xP%d3%l0!-sm(tMwe9bpOjXp!P!m%ID#4)57fsHasb5?ho$Fo2
z^-k!G88Nd!4Yd3VTdx$jY&9s*gXuSOqU6Xk!PwTbng^3ym8OxdeyEVviP)J0`2-8B
zc3t4iN7+R9Obyr7LtjG#C37qKj?+uVmJ!=jW?an5cvl{c(0fZ|B1lk_hZV>W1pPeu4hJgnG&i$-5^uez+
zH|-Egty4|ay4UF8^#=)g!n3*rE5o@mlHu`|3Gu#NKg)?W=LOplQ{`0Ynp%6_qyHL*
z?j}9A%H?58IOPZ}ZjTmC_`NW0h{+EiJ~$2hTb!;-A!lQ70_LRR&p-9tGe($-RU+7wy}u?x^m^U
zp48&2E{PRLK?2KWNU+H2APbr=Ie$xNZGfnUH
zd9P1Vn#ZvV@2{jjHw^sylJ-?cZ5ToB_fYh`Ly9ZZGU`uD-
zrAV9L67ExVt33-ss(tB-rNtEbuIx%R^oy3!SN
z_7=)lTltJ6yce!{(};$6RE2nRb+M!is0;us{9^+PB?Y=Q@!5jq0aUgi0ox#<#jAO@
zcCF#vsZu7Io%|R>8@3dSoAZTD+Q1r$Yi_8@hcs{oci*ooeHX&*vi-Q3RFSaKLY)a0
zONg{d;b!B=V{o+hq?Fb6ZTaW=BYKh(3|lMh3wiFFZWD3H6@1pr;}8f;jz95vRC{SO
zs<&juQ<>ivCkn&E
z)*uy_dA4*P!T6jVJGyI1yg1+AuKM6CrwOkzv4AWk4@iZC#)rhUDj@#Y+Vb%Oj@H^8
z)jEAehn-*8^r&E5>6>;ho^DY+Fv_J~LC~YW*POl^0N_#~pY)~&>rJ8WO>hJ`&M(*|
zKubKA>*)!kWH*jyvr3~mmdg@;kU;wIWd&V0ws*F&8SeFRZpq{)KED{}@3E~qjB6YE
zI5SYAsw;59Kq2HMumdBC{j5ZzJ-f93dW6fI7p#1M_U=xW3U;w>Qfs<jdnh1{*NO3)jk5M{JHtw7pI3RYl{AcFc3vohm1
z^j<;PW?VYIU@+oZ^&8v+xezNZgEZZdur1~TpF|FnO)3`9ROK*&BekW^1yNU$c{Ifw
zjDUBo_8y2q#dmDW&2>MvTqi(l$7j{fDA-sv+QT$<$a9g9%`?1&T&Q=iUcApG%
zExh)9wqz(nuLJBsdSh)3COqK8k`dB<K@naNP0_Bk5
zRQG2OH-vPGg>ne=qX|We-mW(!RC~^5Yk6u?JA6@N#QF7RFg3Y(w!VKmOkeCG>7W#Y
z*habTWJyQlnh=<8M1o(%o6X{L9)pR%?ZuV#F8
zw)X#?)M4d2fSM%D27~f!2{GQ+NS)Ma%zG3ZR@RPHyUeqX&fhgfdv|1wIOWI%h
z`Y$Mk=E{as1fy3mP`V~;B0AQh#6@2Mq_NGugPJ2)=w!`MLqOq1;(S4{?pwYx21{Sz
zxD%QL4U<&D0Dt%pJ@6x^8KM`dBI2@H*SXlg7WA|tZI5#G)~N*+;DBQLUChk72#VJMER
zhS3lfy-G8-1%HH#S8jSli3hb9CH-uFuP|5cl@ogkqZIIWMXFhZYUxdL}M?06jLK#2Bc^
zZr!4hR!c8w#=g2hY?|I)MYh=Mo5^=NVQ;lY7ytg{%_6zwubHC7-c`^*o-)Q_EYNiE
zfv4f4A&=HSTLLQzPb6IDrPjia3ub-Wa4v$S@98rMl!awPPed!Cb@n?oBo3=jyOTVA
z-6oG05i;yo0tJhM)wV_}yLm6}C0K}+VUy3tUng3tOmvRebOHTM6AWi~uI#{}V;alT
zOR3&&)bFK@-{~QzJ#63h_A{=XF7d?nvkDj25%@VF=Mv8;JeuP9B%obKV?CMHI$^zx
zsA{z9bUT)33d*NL?w8|9oSU4+Tb|@$-}VD)6T1D+fRH_h!ihlv`
zfr}}8__FF96Sv5Nj1zWmkL^dbFZiN_Y^&5#*)A2X8t8otP6^r4esC!*e%z&{O0KsX
zb$esac6%1+Ck=xqb=K|I{^NMYg+gc4p+1T&9u2+6cQQh_tW}BoY%ngI*dE0zoDu%;
zsVRQ$Vax!RG)gzYRKOx%LpWz*f8#sn)vHOlTgtdxvR2
z`}l_vJVyVx$Z^JHJCu}9N=PA7yZW;(^vm;p!bX*{t`gmq5(W3!Pn?F=h|r$Aq(ZSB
zV@;mXPZTQZ@+M~7@zP;R8&Gr;F$?G}c?m*1Wp$q2ILZw!!G
zol&revjr|VN?m4|`JLsV95!aNt5A+P+j|KCo?C-v%|0xGO5RldDr_(y*20UuFLas3^zGSH&!uhZ7EVCg48
z6N~!gBLKQj=j-#C*te{f;T9w;Ga7^`wEg~d^Uw!-YM8B5w)ev4Q3z?g;5We(T8$as
zPuUe>T*iuDv~}ELlU@z4N_Wd23NKYKq8&wY=J2O6#6wYi!I=qvr;>!JyQ+UsAvbk!
z@n@@`!%w+ADbZBL^KWN!T|$m(j-Bw8tJS_}p;5ZQCSU+q1RxLO{^EFLDba(56ogperV4%wT9OeP!)lpD=6M|8qmmx5h$QAL^=|p__oK25rjhXdUKK=K+v=Lm-fMft;4mhV5X4K6}#Z}qnie0t*xcbxy)X-P=XG^Sy70L&5|#%A
zArbe8I}q$Ge4@`~`yV|+SNkqYQ3Olpi3@a<9YuM&QkYWX0W!4aVs)_K^MQ`=)aof$
zpaH8b?Mb!$Me(d`V-|^wekFqc~gY9g`?=VHJSlD%J4A8$(bXZIbpB=Suhfp7pS@_HW
z%sYjWyluxi_%F9n%4^zxGr)lDyTN!Lucp78JF&(bNgaq#V%E2KahFl?^JAHEE83Io
zIw5%GHrZiZLDPVH+-%@I5FW?8Cmjh3?`Vo~-v^;6pePOjYO2xXriN-4pd=(}Oxtn{
zMt`TUWC*bz5Tn{KiFw&>UgNIy3pax(IQ`psO{yV&JTdab1M2C5->C6DKvUx!pW3|I
z(@<2cp;>5(Jtqe94nBR5&~z=U+u9?9R1Wl)XZkE|vhzb@=I=fqta+B6=I{ezn3XFthc?ju2l@CM(0*lv{^biqW^}MqekI
zTt^{T9MD=GSB4$M6d+^yeJxS|F^7zdZ^vpizQeL*qhqT?5vI!6-H%-R-VoH8r9TFo
z`IP_#=F<1h=fg;*3;dM#1}QMQnh&=xl&ZTfJ3zV-6O~_8zxldwwc#UINY*Yp{%S|9
zjgQGg#Vl$6W>EcFsH%s()#q_cD*qux(;UGaU&|F@#kt9)$u!d*Z05I+8`bPNiQu!`
z?_}!CJ&=}(NsnQ?I3#<;MS(>;pB?ZUv)TI8H0-jc+9jVqj8|*y^k8n-rXG9hdj^XM
zXE(EV>x7L!f}dj?HA~5l7x9Y0b1j9TE!8o3bV`V3LnTGCq~lH~nWsE#p~*w!RmHcK
zmXhCEq1BB2sF&3C*oyVtEQh8x8{Q8XeR7c8C!8_El#TvDf*(`1r_SG%jw}YZbUC%e
zlQd40)vL=@`l^YEd5}C2kSsJp>o4nbeb97x<%hsU^_R*A1zVK=xw|0^=u(xJdVAu;
z^$zLnA#EAEM5E>|jKn4tDJmq6^X*RM>04Ey0d`m!-zx2^dsuVT@uuXLzQEi$p4c#>
zrxi-;ayxlTQ)tZRK={|^O3`kbCHX*7ZO6j%>7FcO0^hN{3zC6gc%w0D=nu*-4@4ib
zW6&9w&Wz;2r+=rkbhDbLlXjm+!dpZu+Ss%}N6anHm3{`YvKlJy@l)bv?Rodrl5NHn
zKX1A{LY&yWu8hzv3`YAMOk28STUvw9*9t`eTETiIyc5~`DUu)$
zL_eb)Vb5P{l9X*I65nt@L@_8#oSS=0gukMg@ON_ulj^Mic2+wh*9De|^lbHDYa}_p
z$$6PnH2$Hq_1n$ts)thJ=VEuo;5@5y0mtWXItWqsdTmxdN_sBuA)v$4>3X3)o3?2^
zo{ca+^0HRI@M{c^k+lJFlN6Sw9o+@%OEELTaulzSV&
z9d*7(v(8R=`vncq)0&$_`O;T&9lR?{MfAmNwbSwzqjcZu%_S
zJ2vPYywU}~#(q{E??KZZF_>g`a#7nz?@Ox-SZ*_QH-B+BKOy^HuHL`mqR--RZnV2qe|b{1UDY-b$gIMI-1XEe0@|iyrnw2
zlui_+G`S9Ci7DH!>@b=Q*U>-ZB;Znm&jx4CMo)$@*Ol=DRLyvImh^R)3*TN}|JnRg
zh$CkTZVpu=#*&Y{pL=C;-CF!)OI6(6V#A{HtokU~RHgh1t~4;*4??iKv|SD7@A92m
zs+v1avf9_!&*H?EUbt4uPAV$d99y_qGsKhiy-|9hiHZkJm+V__%vn&WbLP2b!$*A>
zG{dLbZ|tgS`&TX7(^z;`i9I5$EAc&O#s)8Y(Aoy`U3uv1?+%4P^8t_8w3E;+2S$H@
zwJtiX5?A;ZIP}VZWR<{VetT;dM4qe_ot3A!4G}o|i%AYNzBlEnqzUZ=o1vEPZc=)C
zslr4gk7)?bQe`eBQ78mqex%#m_0z?R8apI=1oO*J>gRMOs<`cg+i8oa$+{HtkWS
zzB|=pz0sHfjbd@{)*mVQaP;AeNs&IiKn);LBN8{}3zjb1jl^w(&8<%=l7Boo6;C?;`S7L=){aoSXXN1sbeopz@b6U4El~*g
zmn^RF%^ygWyaIK=M)&7-Zgf4oy{?RJiqdxJ_$>?gxI~R7d3@_t!=bluUq~e+vk+39
z-?lYMzuYC^XYGW0Acwb<>X`tGgT(u+67uBV#CQAr$O-fSc^nx
zIFPZ2G{Ex3ZooG$ZXp!nX=4v`>43H^-7b5N$Z?^HBC(b)!H5dRjPK{w#JgVZ{$|f#vqJ=o%e%F*BpnHUj$+QXEPK1&
zgD+3d_>JOH0l>FWHo@Fq2_9;=ps9MlUeWct)t?Mtr%SA$Pw&CA^~ON{m@7q|u}I_|
zLh_!qH-^Ptj3)Yid+$nqHCPA*GAvHA0D4W~3a&ADzB^VC0m({_ir+
zR-d*=;C@bZ=T(Q=azIA}%o-q2-)C2yve0mG@-v=%TRrRaV#DJIsQ*AZe9487@J-AO
zV7Ed|OOdc%{esFs9TYQwkTAMmVqBB~nhes>H@ZORulU*b9>vMfK2=7@_}N#f0=AB=
zOSL9>l4AV}C3&$0?d2BgUdQ|kJkL7>l5{fEsIH3_`uq>XHle3XPvK*Ly-h$O^5{e=
zCNpG1Q4l5oRnXLd4EqZ1*R`q-prbd^9ns`|Rt20s!*%iG6G#RJBaB=2`H~?8iFp+z
zsR=(^kYhMv(-f!_Z37J63)vxfO{w+9^sd;8vrlLWwGz7c3~6HhMY>z(m!u+F+u+4F
z=|~LNx~CvV38WlO>>aj54vaPx#E!}I~e&&s3lE(_OeQ2c=S4HJH
z(w2d34VHyLg~=S2+pJN9K<2d26e@uvs9+cG5vkIdm59c?=orcCd{>ux8qk#{Yj?$}
z8x5buK28&1scwRZi~ysBT*1;_>9RBb3syCs@BOgK$M
z5?o<&o>aOu`g2bEDLGWZU8vWUKU+7ad!&(O@5cutaG=}A78xY!N6hX)17#!Hs1?I3~T;7fi{6HW{tkSp=^
zGx?DC221v9gV7;s!?n++mzwTE(?K3`R~^KP&C|pIX$c#x!MZ9MQwlEnMYOHwj)m{k
zTCLsmw{zcaW!fa`#Ck##0R|Wr0YYaM74nFQ7g7zi;MWxdAFHRbZw%%SF4fVELVbK<
zl?sAv71rx57E69)J20$%mSkzr*Uz|FInd%K+|OxXLls8d+$#(CO>qPx_@;wsaWrL9
zNc%xl_jzK}NtxHz2mnC93xhz2e#JAW)nSUSZ57&{sA$tQEjE4BpJ#%aHqZdCoMwBU
z)tTixp}_pL_}wEni!zp@oo?QNADajyP#u78=8j^a31n^spArK5NVu`!n6UC>!^swQ
z1gi;n2$*S9rp3~px9DOj5G_nnR&)EDn$F$`I5)FE_Jft-!oV`lvYLxjsOB~ro`#2a
zQ!r
z7t9+y%z{C2b^YuL)>wdtTt&3C?Fi%@wut@sNFDnb(j{acUhgL$P#pYWH_KO!KT)jh
z)0;mwI|smm_jrqPg+mo}trF(FbRB%b$A^DhFr(O=Ao4tV^ah$x84K9>41Z;_a^`R;
z0gm*L1IF$>WmMdNNZ4M~wEMVQ~r`!q*MB!kb@_2$#%ouEF;^kH*2(4>OGrbseDmL_@
zAkgMaJ*&23V`1$4%4y~C*QeHL91%f%hMS8O`Fcd;mq*>GBew}-^mopxZ6eJ+c7YWj
z#EOo27qt8Kb`$3Rcv9={c^3h{ab$F&*#*!2)sZ~uWTx&oI;*Qz0lECb#9=VwxL*OU
z28?f7b9A`gSYvsIQ!B;Leee;#{N){QDmKh`py8o1zx(=Y%NrCyUy(i|mp95q#1B3K
zMG9DIS`mWfD@OWdHIpiw(K+%ABAK$c%_gF)B^l$cx+GSURFs;>xpiiO;S^XPbp^bz
zyWyD1#OPCnrMFv=v8|0Ha#m(h^@-0`Lq?n~!E|Su0(N0)bn^QO&0S29{vIh6o9b7p
z%Noc+^4^7Wy<1l*nk?`R?YxB
za+`|5DA8?J76hCeKfxymKmvv@o&7w%%aExcrS3%io|2
zaSGXcR(K4{ri7TiomoVU<+@U=Cv#%|@nYH~U~j^@iTWA(=n<2%;!O?aZlg4Lr_m*|
zPZqS;gI3}$VH1hz!8;{K?qwEL_VXXn)^rSV+2XT`9SRy;!rf|u!rrY9ymJ1s5ZL9^g;B_Nhr%h#
zl5sMpIpkWGR{{FtxNczqpv)iCU>>kFxyVCe(N5VU&h_yi%7r1k@aBYI1{oqV*6r
zmVln09S@PHf_?>%?r2Nw#_fPA9kZF8#mSoDAckPtuiy7_91+s|Jq$g5meU{=$AEM4
zNidBoJHP0JkROQ@89l7~bD;n%IDNyG2aDwo?Be5Ix`qQbQ
zf#jEmFu-1+f}!7L(dC9|;Ierm>BO5ytEKzi*kD2f-!W$K#OTy1PjMiwiFbytbv<&v
zEHu@{b9e`4EL*aQWtd8$4t=I_oM((=JAsLO%VgkYyM_EoD
zwj63D^RS|qCR&ZS<)1`(6LEQhDw+c)fyy-E%%U}50vx%tK@d+UkpBF}M&h`}T;)bk
z0=w_sjo<5CZ`6OOk7BK
z8xsUFkl(5o4WqX{x-&T0UI6St>*`vc8^i=Esq2B=dqgDtKLA@1DZc36s@*a5W5cD7
zX#39FRvmrHkX72ExWg{6y{p-Bm%RcwxT%>VTpU6Uv5QRr)cc5#qXGcVPpZ?S9t`p|
zE}XJC#oP}009Q|ohL_Xr>1%!Qa8uma4{F3{@?7LDy@;sBE~bbMmv#y3DSlPBLpA6d
zp@e?yUpmALF$$~D7Z8(&&UP=c67NQID-7jnVIp1&6rn3sGbHqJLPkQ&xZF%D6%K5Y
z;ez1&a1Ip;C-Kp0t@S&1vR0Hi`I>Du3X*zkY#i73(Uf)jitG
zx1B&yqP;0{PPOp#`0@hpMM0~_*xs227_tk1;Ks$XNP45?%+eAti`uM1c0j9l&MbJ_
zG|tHFco1YGk)x5Mq?<4+b=CIgmTFXdlW6f-{J3d`Nu;S#XvDS{;nW@BX~;3F~w@8cmIS>ev7rq1znE_D@
zv6~pp4>)yOQn!VFfxwwU<1$2045PW3sE-T6h?a7os{{5X5|Q;>FO?+57A2=pWWVRr
zXzD#@2xBQsmFRyIHGQ5K4NNyH>a=X_0G|84H~*p?{g0pQnrR@7UA2KUZ7qSX?m(CGPu=F4yHqn;v;^=O9XQ{4mCsQ%PVQdmOw=`w
z^5^u`2n7+z^<|}!e#dc{!BEQ6{=v$Bg_nP7
z!`5g|*~4AuCNhzLLp=I#4pGNBKKoeeMD2x}F_fDnLI;%skD_`L;QElkzuXMFs7kyB!;S9=!?Z?N$TE8|ABl8~(zT
zQ;^W>E+L=CqbpE43w3({(GmUf7g5%0g~Hv;qErJJ_~B$6#_KhmIWL(rl=*uB3*D|f
z_d3sgcJ65$Fuo?)dgct5ay*swY_P~6;7bEiGi~6Qsp0y=xDa*Ig&K90iRpm`1tVVX
z$LrC{jJcMNr+Ol{tY)jGxm0Gv&F*v&!)~kP+nGgOC!z(as|c}sU0LDg$b~G=`;eBO
zOiiQ4eB!Ap&{L6}!y4bto;mtfBUk&)X$}4Rsrjv_Pt9%D4aT3^)7S6kT?MGrn|12E
znPKM;Z08s2K|%kZgZ5t>1yZum%GOV5b-X{!(*dC~q@*1->e!9}HCc;d*9pWMS)Y
z*ivrKN?VWVjG%?tbwr4-jNUP|QcidLVcuhT&^J%B6~9QU9RLRPE>LmPG>28U-WWf6
zQRn<%$PzFU?k$*{0XWMK)C98RbC3-?3}^VpYIboX(dX-z$MDfG0`mZ9Q{77|#^juz
z+dk4NQ~u?NCh%4{!Jp+22q_i7r9&vpUVG9sW@H#2dvZ>d&{21!G)-#Z9|he-P=B
zos#*{HFRX|<~mLfy6IM=bq^WfZ6=lvx;d^^C4i8wGnKBgzOX55vU`Y+EBDXA=L*}f
zbE(1Eq~RKiJko&0a0TR14ni4_`$M}y>vBc?Bel13GX3N6hg(Iyif?Fi+Nc^hFkNWx@6+@I;ZYVU+>#-|_hdJQRc7X!(~mlq%ve7-}nSacIO
z|11jluu&Ia5l)NifHo~$Eg3pzLtVx(UY{zN#6duQB^D6tjw!&(b);e53bzX2C;(zY
zxYT{;-;7*Kx-Haa7r!1L9*tzf&XzY0EM1UgF5mm&`nIht+lw-6e6)DM3&BddAAb@U
z0GhgF^PPfJfcfv-c+DzM_pZ@nZ#w`qclmAf))$8sDqr%hmXP1F(0#4Q>&ve<(}&)1
zu~@Ye>}UvtE@T=~qkcLBPW=yVmQy96tN9$`POyG^V^#fXrL<
z$4M)EAuu2q{eRkf^Khv5_kX-jDo)wz#F3D4=#-_1$vOwoYRi@_6|&CAHnPk($yP!o
zNwTzAMhIgaOGvf`l_j$f*~b`>!HnVie2r75bE^0E^SgfE>-)QY*Y%!1>W^8xp4~^QtJ)
z=ZeI_Z7ODf8gJ7d#D@;r65HlWOVZ6Z1x1K$r*%3iYj_XJxtLS?5(852zRR6{pmUXD
za#`*Ek6K4`3`LXP%~tl5kEaCbd-I13{tQc9CQs%Z@>|)>!Ab__`OB6qtDP`XS!~r8
z_tWpXv1%;Vr)Nx<9_8VmdEI7XX0Pi$JD)x0To6^hqDH8)D>{h%=UBY#1Qoe}&FRyC
zfMLJyRDFx+My8jQg{2cjfU)2>o}>CYYc{QP&gi(5ZpZqmoVU{HaFiO`C~v*sy8x;+
zkUfSNE!Cv3bcXMxmDyCw`(x6c{gC&R7lWgLgj}HkVfotb#523U+FHUrjGTb}EqC;~
zN;xYas2%E>cCL6?7E1fcZxt;~ns+$R6{ypqwG+5Vl5(vW*%E=9-QKAyBt_{5vp)
z6qlT#PRGbA^&Q;3{ksg;+zF&Ewxy^Kd+UfO-fv=6oq2P#V;)_{E1!%P>(iH#Hi`_%
z2wEQ1O(U1RAD^Z`mtCL9WPK0OK=c@iH3O3&v3i{%Wd>%ZRnu;AEWN9Vux`K#NTX1e
z#Rng_OPaAaK}TZ*z4;UIqkS$sT905uJO-dJ2$1F-xGo1xG;lCRzE<@)(^W0Pqa7U&
zDoP!G){egWOEBS{vNt69Sm^ny3FL`#I{9@qVCEbNg`!;@tX
z;_OB$up^skK@C@tZd6Ub*1AbUvX>DPEJHl)-YXua6-e1UR3W?sh1NqQZVA{l$WR$|
z^>nD12BrGD`(gvaHiEm>N#X`b+upbX9?yom1tVMeV~e~cR7eKYr;dp6g-zwl8Rg2{
z7UGk2ax_b2yZXkyX^@33l++5XfI-$0ZAwA2k0zdWcV%&Uz5P+03UDGf?x;HZc#k|K
znr06>8{HP)hJ4tYpyz8kU|0i#v3{CbL!iNKRf
zW8L$hLdm%33gBXhn(kBkDrkrK2c)L!(Vse^Fzi9{PLe4uo{gA
z4=5*VOwH>u9O|U%{u+o+a^le@)Xt|ShF!!+xy)bQN13|(U@?tEn|3hYt&Ldj0D+wK
zv(2{m;{C}Vn0#92+K+ZPMM4E)yS^MMHD#{io3xUc)iwH-#t?xs}-G_4N@-G$7H5QH;x%*OqIxWMQFv&!W|fh5#kNPtDK0b2TQpG0Pjk
z{Du5YfLC+0wNN6ObC~)n>9Ho?^&I}%r!RjE6%urr+!uzMKVBt&JK#EbFW8}s8n!H~
z&||9h2;*7mlyFra!FTS}O7Oy~Xo?GS)^TWX`AFG#<0Z9VC2*YbsX^l(n;AHP&Juh$
z0sHyN$l3~S2*Y=89UuXiQte)I<#3RZg|~`_Vu#Q9L~Qo2{P)4a-iae5HA0cx_uSXr
zk}h6gF`Tc1>XIMq5ne)}j2(r0d5*B#BYV)u
z+P5F@D}%fmV%5iF=oS5Ql|u%zyR~VxX1ZS@Cq28(4dS81PEp39bqxmXogCEnNZ9%T
zR-Lm|4s#Pv(%b^xm7!uDmG~4vvGf-g4gBZ8;pn)evfmE%$e
z17Ch9ce*2h^MWDCdwPW5_))u?{YI+@6%`djfYt;;1W*Jg8a!X&D3w)1siHT~`F#ep7ngfU%1rF^uuh&f4#
z^Ze-8TZz$PaYd&Ouc#dh`r`?)jezamc8akUkT7{&tr!jk;prgux%Z!jQ9ZQ-vy(?7
zXPq&wsZQNJG
z>boJVk&;17{d~I+olobgM+b-(sS`9pHUhNn-`Dn`XeN-fV0{w+&a;x6bnlHZF6
zSL9)DrlSE31`19&OHfK44}4>iE+o%yzJoo}7t-Ge#Vv-1d9+*e|9)yvWn__jt6o1l
zhbDR)T!()fo84SAjUQ)^rML(^p0q=DAeK&}R~}w{+90R33A_HR%RFs13ZY;;zh
zId~f~>&vAfO}tNxLP|4lw%NQYJg5m)bY>oO!RGC{`dN1HUPQS1dxu9e(AS5XM?qj}KaldLZW)FR1)Ll1x#6QkJm77Ed
zpHMmQ!x0YOiJrkZs77LDegXt|#N(_$yIv_SgkiFR5x@b3PBH@)H*DB&D|!KA;CSykm*%3wF#(u_xA?3-l6kc}erxhp-)WGu7AHHvjgX1($rR*FbtDx^Xo6hl
zR4;op)Tzj`cK{juw(e$IF^67gw^=asj(xGF?(s`E;%rJhZlZY+N|G)#)c|qq+~6BC
zlJeupb%oj8qHhW(8_#dj?u~26B)ZOD_)Q#0%NE9sSsX*M7=KL{(!hHDm`6)8I?
zgRE+u6Z7>b)T9j@Rr$$(s7_%u>4M9OmINXvIVH}JZ&JX}kXA@m)gs!G+>#{a0>ezy
zu_f+bnb8Iz5H3U;6~}xpR0vC7WC1i?c3p=k3T;uM@Ce+!HZSBps?2BQ?rT>)%CEKX
z-qtf5)(Q$LKQZvjU87QKGsC@9-V}Kb-_r!3)DiM~S79mO7EB%AL}PkLjjfC0T)Kb)
z*<8ZlPu`w&s9?PDt5^rOgr$j`s=E?wTZ5^ZbazQ!^yo#Iwr&Q_>nxVG{oiJ1(DX+;
zX1Q%D6;*>~BsZH5DpalPneRH%Q^pF+OGLDSP;_<#9bHk%nB7i<3@B3v1l^sJA=IR1
zZsScu9)}-NrmBMi1D8jyNR;j284u9Y
z`k4l$1d;qy?lCx>nfO4;bMSb~hO}l5UN50^
zRyY(yAsiu+70XD&0Rs@&hZNRCslq}zh-E818d8CCEl#_zVS_NP_K9lwuwf5t&|yBT
z9gtijVEeZmd~Ey2!90$!{q;{6JcdIw2IeatQ?
z_@0o(O#xpJq%?r`Qw)lauvga5?6AQMM!|=fjZk^
zKo0~(8@y#neHfUM#vrP_P5!igrnn_ICz$6{8GiHF!-t1^uHB&T<7g
zO5D(M;|8uaz^2N~NZ(5!+=T>gUL!`M#vlpFB|AyL=ubJFt#!M1*O58k$)*Hkka*u^
zHAr1QQIFqkgi=y3r)WkOec0N(l(l;_q;HPy9|p+gdW!eDJ=>SralZA5=;A3q2YAJS
zq>Z1Fe9sssUpTwOs>yP<%mry@PTaeCse|diWSM_PuE+i{%EJ=9i3SNrG7~2tEyEez
zigWcF%8{=!h#8W<{qYJ4hN9J`=qJ?AB2m#>5ptsl
zmvMK~gI5VmT5BKuTA5vEyz5oTo4VLBAmQ~6D6+xaAY}w9FeC9jO_$L8wEpavw
zfNuQyY8Qu>?x))&^pJ4tx~N&Jr8IhwM}^1~==5Nm(>6<1
zvda=DQ!_77)6E-_w=6WjqNq9?p-}NACWHTxB4T^!z^0nJWD9#z-zov|p(Uan%2>G(
z>BSRA`noZ%(=u0wLR{1qG#z=jG~~uE^MtOVe;o@Ay|AfK0uxY=5<~B_!}iAe>_3
z9cyf`c8^L-!Cz=jm^jjli@*D@X2pr#A3g4M%8s1t^A0Of_o)^!87{q-vxz{brA9
zpDI1yzNh0_JqWyOl@!iAtG51v+jIP1MC9}zR-Y1G_26Oqhn%sfKXt@~7N6ClOu+9zyb}mb0Z&l83*@r?t(1&b>h9ZX<|`0Jdl2qeyb<3atU(hB74kZ@wa&P7
zL9PLynv7o-E|+3DVieTRYiBvnTHmV*ndO7|jx-<#BWx&B*}s=xNBN_CI}Ly~zqd`$
z%5nnj)BSRUX;tpziY%9y#;6i;28TC;x@Y7j*czOoG`4RJ!v`vA+9$=eitqoTIdwJox
z-Z25^(mKmXpJS(;A5ad^{p!=mV;NHzWBE+6;s7{8YSj42RG)6Yii9uobzz1CK^0(8
zK&Z>}juAnEJ#qo|jOj5y=`%XQiZN}jL!$Zp>Ame^zQ_3nfUsOV(??xkHm
z=|%|f;)M#h=N3U&}$+&3A9z~jgL&fkg5y_s>kw_j1X!&yAm(RPMmn7s8I_Cm(b@^
zXWi))V*he`=sj=~86!-;J$jMPP)Oq!`n;OLS*2pS9fculUt=?wclL4_q@>&j^`pBv
z3L=m)j{~^#>(nIrtfrlPB8lxAjO^&rdH7yJ?DZC>25gtZZpDp#^H-%d=kY19RgMDb
zo8i5%ne2U(lScN1l##>(%EKAr=<`$AA6XKBm(xYMh#4Bg94P}VZz_;&q7XC++_fcy
zAQeMS%7NpCzd>>H7Xt0Rd++TFd+V=uR?PS=&I}hS5ZoS}=wi-HgANpT(0}9-BR-pZ
z5Y!~HPw&9ezJqxi6Jfo@r7w41=+fsM384C_N2Fg!g8StDe7xn?&_D!VD^QrB>TCI#
zd+!Sd)x8FrbZG6M#?q^P6w_wVS8X6KBQ3XdKXAwkcBy+q&pdhe=qUTsBY6DIBCy=s
zBD>MR-I;w~r+R81$pZzn%a++ab}OJ@ibQ~WU;|0Kcj?S1Q5M4}{&{%?KWnw(n{yw~
zZUd2vNbAXW1-cBcY@vVxJ49AyU5^_{FIPbU(f^2PVu4_t9y^yNEV}+q4=de_r0|$4
ziUWR_G_uj4;<^RnK`-%!TjUb&d>i}m=dQOZ*;!+u&%6RN+eT
zN1>*KKBld_oXfjp=^ed94!-MSUFJL+$*7#C4?6-rR5Kb;IxWFF|8VdB`Q02b{(KM@>&pI
z`OcU3e!R*Yc7()wzMYWmuSa5w_8+7QV^Q;Oh{f*1h1V3{6*vmgmP~SCM!6XpBZm-X
zLGP~;X5HAQ*ty-9$WBF_oNlpGKfU0j(z8A+S6|lzc@~Q{z+}cse7jc~pPb;{o(2TY
z?nR?T-yPSbJbV}RIxIcANJ$Bqk<$}{8L-I%Dr04@osP(Ivt{?6)YlMa7{WdcoFKnY
z2uZHgm&@sD+q}8rV;WmcWI)ZtG%~+IKa8(>=`d_&9B4SDR}b|kDoZS{_l`HGwKs_i$+&j57ei-{j_glDiy@9II@
z0sVvR@H2pp2;d+%ao_Vtoai#NFoQ=c(rJQfTq!*xo7nXdln$#zsdEF*%JizK0r@A8
zZ1M7iy)ck*3{;it2yJrgYX>7hCup?$Me)X806vrKf2gBH05^8E
ze%>iZdV~Rv{HRdqJyi74JHT8S_RpgmWJ(*Eq={;l%W$6r!*7LEK+1xUY#~7ur
z9MYvzQXGuXVp{e10DjA|d1Ts!Eq`jwABA0dsfS!*+6;N?=Py@NLmI6TdgSiIaLn%c
zWmt7Tul>FDx9Wc{F*I1&8FHTaqb#>#nSp$pQu*>fO3_WHrhY9bVQdfLj-rjq-fnu7
z=bm3>kq3VadB>{`K96=i$jRaVhs)|l)V7+-*)}p~;A%08$@k&aHl?BFdzZCgC9Xx(
zfW_qaMICnT`;7HhF0|+LO+>Vnbr>vt>MzkzNwJ^+<)D53R~VS(kV|#Em2{0B|L3XGZ{n6u*0r+tvM&LFkcEf);sfBq
zm3^TdU}pvo2;0%m(S0ciC0(V}$;&9_W#!R|0=gX?CH!g49^eRng;bIxHC&u^Qx)X(
zg4{6sjz~9v@SGFI^9}$#S&&TsPXqCo+v>@G=>4BVTzYf4M@jNr1_sER65D}excr35
zPWG32wBP-RF>%jHJQ_2NPCKe|81G~!=l(mfG7U%wBd2vmZQFsI<ZuPfA4`fIyi48&f
z;%*j@4T;A&tZOKf=?p{(q(OZjudOPVtay!Y34Q4-6P6By$T6V9L!rGMDNk~&
zQc0<~{zilXV6yqCS}X1C3^f)t=`9_!LoPf@^NX)3KcOIVrN~YbIaJhBRt>9ymv5^<
z^|cS7me`1L$ofwJXpkS2p%9B3y|pq=!t??(p#`;WmUIic+V2xPHN|BOn8U1xQ4;!*&w9TEoA=(%Ha#c$;X@7sWA_bb=4
zHxB}d8syZ}jlX-PLI-^kc(=$>Q+6u}0=k!Rb+JDK8Q1ELq*M%ynLwAh>`XHy$wlGT
zydBvhrZ#&X=2f5DeDqZDQ@2ciO`vbBPTsVPqHPhkMbCB3I2V+A54W6pm0*{%
zp1UN)JWaiKS1LIlBv4Sj8?)vj%%E_-^F{-TSKEN$K->lKe!f%oC!Q!T=@Ja2$B9^*
z!PZ}{`JU%iqkqBy0%)SB+NjboZdapQ`Fy(U9jWNX${yYj??=fMEbXfMdm8~_M_r_v
z11t8ZpL~DG-tUsC`oYPUAzsm}9i
zI6{8p{=^gk=opQ7G$>!2i2N3n_{*k+r>jVoOgNlDzI&$4cw~+W=Dr8So8kM1URDF4
zQlN(7cspZ#yw>Jo5(#lTgYDr$lcBuugW-E`nv_3vi{I%JOy)&TD)s;^rDIvv>U3u?
z3d^!u5YiO1cz1cXJpLCj`K$QpyJf+P-n}K)U;grEAY$l-7|BdC<-Caz-IB`p7YfE%
z5wLCCfanDh#B2ZY`B7VphY*qwsLysu2ut5o+~LhMYOlCyWS2nx*>krp`m}2sC9j%1
zCmwK3RBPS`Im1ABtBy=|M2S`}QS-pPc}wPv%^GWPs*}Tb`$bDQAr5M=w(LLCqQt3zS^)iSV&788
zLA6#>!XGd}hrGoS2Rycjf}1B`GCNUCC%WQ3hXEXPnKfa;O0P
zkY8H6>w@~^x~y~koTH(qvA@{EKg=9307K0CzoVuzMU1W0X}v+@%M8R5r+okxKpUzF
z>=n9(MG>+}O!4YkTB+2qUepb9D&Q#pl)>EIQnT_t0V4$05pw^7fz(YwxG*4-!lpbeW6
zHvAD$YWKK=ncP^_dzJ2|3wnfyEIblD^x>h@*p9q%y}mlb*(7X;%;hn_Z>oiSg`Ao+
zvYSMGWg6>^`1CfWiVZ8}cQvU3Hhz5q7EW-v9G7y*rUTm@-5bO9HvnhkM0reStS;2T
zs5V`#9#|M>Nm*WMm)Aw-{G01O}YjmaU9xQTsuonsl@#_!(qT2
z{T;#w-d$b2SWd0?9pC8(#Y`GkgZvn6P3l~B5$e1HjfQezZ3rNhG)U|;RgwDzXVM@P
zEBoZa+d6*xQn4@xCcB{8`l73FbIQya1uMKB#)E9oH>2J&pJ5umQHDVHST3`GgU6Fb
zKnxSOGI0pz-2kJ!QlbELCCk?9W_aUgH>**h0CjY&*)-w**h2jQBfp^ip24=)Ezdi8
z&Z9p9h{1x{A40NUzzcS&J@ax{wlhWlJPyLw{O|`Q`S7W-4?~K&E@_ddvaLJAmgjrg
z9?g(PPQ2EFGsGLbUm+tu@G9S?menZGte>}iKuAzoxf7>|qhum$c2F_qu!kAL;hdS@
zw>GAl0!qkB-|8A4e%gb)nMRZF1vlwr1vG?8k13%A5QIb3TM2L)&ulzt5dKl`#Y^+i
zZITEnuH0_@*3;?d)`nYSENF9#_13*tVia9Sjn%RRJDc?7!o}?@bHWcWg3fDS&h4q&
z+@)~%#Zyr6dD1-j+iQkoCn|PUU5V_F%g0m(+_;%LGktM06!!P!BEjocRK=9EI#y|LIRMcHPKMa5*j6tutLuQR$
z-vY^;PLvl+QtY9QX@Y_W_`1Bv-!j@cO&_hyMiW7DOYbnyqxah}|4q7t(^&Gn(0?E6
zzq188+g
zbg=h7X2p(w8?bPC+S+14H{_`ZQRKx$JCv1o@)FiOXN$1t?{2#_(a9jr{4JkU=oBp~
zA(7Y$KL26{8$0)dUxK>S{x1>5+Z2P*?GN;Qn=iQxLdxLISnSb3#nbQcH^X8udS(C>u{Bzj<
zxBn>A9CcId>vepG{^&j}xH_UWYoN!55GBR@YX%Skxa81K@e^wDo#ZCa4BH;+@XdMs
z!M*(CyxlU+XVfssvSCDfqnH`!o__F?+0%VzfnQtr{vA#*7h8Re5zX`YD*~Z6ulb3X
zsXKb8+%xa8_z2&pU%ze}_1Zg}YrHII!0y?9#?ofBZ8zsDe+o=yolXRO!z!&VSR8Qm
zzn(+fOB5SzPM|1w^0?dAOz{vUTMU$$JpG5
zlyl;|^!(G7VL<0xOgt7Byf{-qJXK@+)u4sVp0=j*X)T;hvTAtDd&jKUeCb$&%O&jw
z(5T+B2?UN#0|l4rd10MhX2b4KkN4GA*!$nMnQusNG~0{6R-2cX=Lr5@&S3MlM>>2x
z#Xsd57q2~k5j4{EQ+}eaY$p8qTMy5Iw(+tpihC368s0-~$0v>zYbU(+veh%cZUd-w
zgt}RZw9S_BUm$%|OmgqpL6nyc|ZNw07l$
zUh-EXmBQw1G}8e=FwOp6(A-}lv5(-ozC*pqvc0^jIcVo&Crv|dw@=qgPZlQ@L2)H6T*Q1eYlaIh`%zM_kE
z?u38BtMyRZcFrrnLF9Hz#+Fu%cO5!J^yd9bjn`iHJ5}{a;xjb6SBUQU38LBN$lgLH
z`*E=9#WRO5eg0;XXluI1as337-}0=BTcF(P1-sl1@mNf^m2-2og==YTP40P{v!1us
zM_@Yw_%%Nnmtxp~rQkX!ADHQm7`Apxm0bFyKi`D;{EJaStq)}nnyGeG3Po7Ibg}{}
zJTty$&+ZhNbm7-8wV5+tar3}O#*mV6?K^SZ=*bKMKvxSffD>7V~t$KLJRJx=dJ9CRnetjQhzZJWQDnP~cC
zteGR9@#oLDDBm%$vUsBSRMCC*UI}4Gy4ABIti~Dr-ed>em@gKmCCtep2z~a{G2H94
z
## Background
-The open-source package [scikit-learn](https://scikit-learn.org/) provides a large variety of machine
+The [scikit-learn](https://scikit-learn.org/) package provides a large variety of machine
learning algorithms and data processing tools, among which is the `Pipeline` class, allowing users to
prepend custom data processing steps to the machine learning model.
-`MolPipeline` extends this concept to the field of chemoinformatics by
-wrapping default functionalities of [RDKit](https://www.rdkit.org/), such as reading and writing SMILES strings
+`MolPipeline` extends this concept to the field of cheminformatics by
+wrapping standard [RDKit](https://www.rdkit.org/) functionality, such as reading and writing SMILES strings
or calculating molecular descriptors from a molecule-object.
-A notable difference to the `Pipeline` class of scikit-learn is that the Pipline from `MolPipeline` allows for
-instances to fail during processing without interrupting the whole pipeline.
-Such behaviour is useful when processing large datasets, where some SMILES strings might not encode valid molecules
-or some descriptors might not be calculable for certain molecules.
+MolPipeline aims to provide:
+- Automated end-to-end processing from molecule data sets to deployable machine learning models.
+- Scalable parallel processing and low memory usage through instance-based processing.
+- Standard pipeline building blocks for flexibly building custom pipelines for various
+cheminformatics tasks.
+- Consistent error handling for tracking, logging, and replacing failed instances (e.g., a
+SMILES string that could not be parsed correctly).
+- Integrated and self-contained pipeline serialization for easy deployment and tracking
+in version control.
## Publications
-The publication is freely available [here](https://chemrxiv.org/engage/chemrxiv/article-details/661fec7f418a5379b00ae036).
+[Sieg J, Feldmann CW, Hemmerich J, Stork C, Sandfort F, Eiden P, and Mathea M, MolPipeline: A python package for processing
+molecules with RDKit in scikit-learn, J. Chem. Inf. Model., doi:10.1021/acs.jcim.4c00863, 2024](https://doi.org/10.1021/acs.jcim.4c00863)
+\
+Further links: [arXiv](https://chemrxiv.org/engage/chemrxiv/article-details/661fec7f418a5379b00ae036)
+
+Feldmann CW, Sieg J, and Mathea M, Analysis of uncertainty of neural
+fingerprint-based models, 2024
+\
+Further links: [repository](https://github.com/basf/neural-fingerprint-uncertainty)
## Installation
```commandline
pip install molpipeline
```
-## Usage
+## Documentation
+
+The [notebooks](notebooks) folder contains many basic and advanced examples of how to use Molpipeline.
+
+A nice introduction to the basic usage is in the [01_getting_started_with_molpipeline notebook](notebooks/01_getting_started_with_molpipeline.ipynb).
-See the [notebooks](notebooks) folder for basic and advanced examples of how to use Molpipeline.
+## Quick Start
-A basic example of how to use MolPipeline to create a fingerprint-based model is shown below (see also the [notebook](notebooks/01_getting_started_with_molpipeline.ipynb)):
+### Model building
+
+Create a fingerprint-based prediction model:
```python
from molpipeline import Pipeline
from molpipeline.any2mol import AutoToMol
@@ -58,8 +79,42 @@ pipeline.predict(["CCC"])
# output: array([0.29])
```
-Molpipeline also provides custom estimators for standard cheminformatics tasks that can be integrated into pipelines,
-like clustering for scaffold splits (see also the [notebook](notebooks/02_scaffold_split_with_custom_estimators.ipynb)):
+### Feature calculation
+
+Calculating molecular descriptors from SMILES strings is straightforward. For example, physicochemical properties can
+be calculated like this:
+```python
+from molpipeline import Pipeline
+from molpipeline.any2mol import AutoToMol
+from molpipeline.mol2any import MolToRDKitPhysChem
+
+pipeline_physchem = Pipeline(
+ [
+ ("auto2mol", AutoToMol()),
+ (
+ "physchem",
+ MolToRDKitPhysChem(
+ standardizer=None,
+ descriptor_list=["HeavyAtomMolWt", "TPSA", "NumHAcceptors"],
+ ),
+ ),
+ ],
+ n_jobs=-1,
+)
+physchem_matrix = pipeline_physchem.transform(["CCCCCC", "c1ccccc1(O)"])
+physchem_matrix
+# output: array([[72.066, 0. , 0. ],
+# [88.065, 20.23 , 1. ]])
+```
+
+MolPipeline provides further features and descriptors from RDKit,
+for example Morgan (binary/count) fingerprints and MACCS keys.
+See the [04_feature_calculation notebook](notebooks/04_feature_calculation.ipynb) for more examples.
+
+### Clustering
+
+Molpipeline provides several clustering algorithms as sklearn-like estimators. For example, molecules can be
+clustered by their Murcko scaffold. See the [02_scaffold_split_with_custom_estimators notebook](notebooks/02_scaffold_split_with_custom_estimators.ipynb) for scaffolds splits and further examples.
```python
from molpipeline.estimators import MurckoScaffoldClustering
diff --git a/notebooks/04_feature_calculation.ipynb b/notebooks/04_feature_calculation.ipynb
new file mode 100644
index 00000000..1bcf9ce8
--- /dev/null
+++ b/notebooks/04_feature_calculation.ipynb
@@ -0,0 +1,915 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "a5e18566-ab97-4ead-b6e3-0ad930754a21",
+ "metadata": {},
+ "source": [
+ "# Feature calculation\n",
+ "\n",
+ "\n",
+ "\n",
+ "Molpipeline provides multiple molecular featurization methods and descriptors from RDKit. This notebook shows how features like\n",
+ "\n",
+ "- Morgan binary fingerprints\n",
+ "- Morgan count fingerprints\n",
+ "- MACCS keys fingerprints\n",
+ "- Physicochemical features\n",
+ "\n",
+ "can be easily calculated in parallel and in different variations with MolPipeline. If you are interested in further molecular featurization and descriptors check out the `molpipeline.mol2any` module."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "6872cc5e-5851-42ec-a63e-071d8139829e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "from molpipeline import Pipeline\n",
+ "from molpipeline.any2mol import AutoToMol\n",
+ "from molpipeline.mol2any import MolToMorganFP, MolToMACCSFP, MolToRDKitPhysChem"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8a6ba6bf-c0cd-4949-82f3-e71e538cdee0",
+ "metadata": {},
+ "source": [
+ "In this example we fetch the ESOL (delaney) data set. However, you can use any other data set."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "761f0ee7-3e66-4e86-bdac-e9dcec9ecb17",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_full = pd.read_csv(\n",
+ " \"https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/delaney-processed.csv\",\n",
+ " usecols=lambda col: col != \"num\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6853d13e-c371-49cc-8009-544022c67d34",
+ "metadata": {},
+ "source": [
+ "We use a smaller portion of the data set for illustration"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "d47ea54e-ac15-4358-ae2b-7e8428642a26",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Compound ID | \n",
+ " ESOL predicted log solubility in mols per litre | \n",
+ " Minimum Degree | \n",
+ " Molecular Weight | \n",
+ " Number of H-Bond Donors | \n",
+ " Number of Rings | \n",
+ " Number of Rotatable Bonds | \n",
+ " Polar Surface Area | \n",
+ " measured log solubility in mols per litre | \n",
+ " smiles | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Amigdalin | \n",
+ " -0.974 | \n",
+ " 1 | \n",
+ " 457.432 | \n",
+ " 7 | \n",
+ " 3 | \n",
+ " 7 | \n",
+ " 202.32 | \n",
+ " -0.77 | \n",
+ " OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Fenfuram | \n",
+ " -2.885 | \n",
+ " 1 | \n",
+ " 201.225 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 42.24 | \n",
+ " -3.30 | \n",
+ " Cc1occc1C(=O)Nc2ccccc2 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " citral | \n",
+ " -2.579 | \n",
+ " 1 | \n",
+ " 152.237 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 4 | \n",
+ " 17.07 | \n",
+ " -2.06 | \n",
+ " CC(C)=CCCC(C)=CC(=O) | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Picene | \n",
+ " -6.618 | \n",
+ " 2 | \n",
+ " 278.354 | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 0.00 | \n",
+ " -7.87 | \n",
+ " c1ccc2c(c1)ccc3c2ccc4c5ccccc5ccc43 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Thiophene | \n",
+ " -2.232 | \n",
+ " 2 | \n",
+ " 84.143 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0.00 | \n",
+ " -1.33 | \n",
+ " c1ccsc1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 95 | \n",
+ " diethylstilbestrol | \n",
+ " -5.074 | \n",
+ " 1 | \n",
+ " 268.356 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 40.46 | \n",
+ " -4.07 | \n",
+ " CCC(=C(CC)c1ccc(O)cc1)c2ccc(O)cc2 | \n",
+ "
\n",
+ " \n",
+ " 96 | \n",
+ " Chlorothalonil | \n",
+ " -3.995 | \n",
+ " 1 | \n",
+ " 265.914 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 47.58 | \n",
+ " -5.64 | \n",
+ " c1(C#N)c(Cl)c(C#N)c(Cl)c(Cl)c(Cl)1 | \n",
+ "
\n",
+ " \n",
+ " 97 | \n",
+ " 2,3',4',5-PCB | \n",
+ " -6.312 | \n",
+ " 1 | \n",
+ " 291.992 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 0.00 | \n",
+ " -7.25 | \n",
+ " Clc1ccc(Cl)c(c1)c2ccc(Cl)c(Cl)c2 | \n",
+ "
\n",
+ " \n",
+ " 98 | \n",
+ " styrene oxide | \n",
+ " -1.826 | \n",
+ " 2 | \n",
+ " 120.151 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 12.53 | \n",
+ " -1.60 | \n",
+ " C1OC1c2ccccc2 | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " Isopropylbenzene | \n",
+ " -3.265 | \n",
+ " 1 | \n",
+ " 120.195 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.00 | \n",
+ " -3.27 | \n",
+ " CC(C)c1ccccc1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
100 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Compound ID ESOL predicted log solubility in mols per litre \\\n",
+ "0 Amigdalin -0.974 \n",
+ "1 Fenfuram -2.885 \n",
+ "2 citral -2.579 \n",
+ "3 Picene -6.618 \n",
+ "4 Thiophene -2.232 \n",
+ ".. ... ... \n",
+ "95 diethylstilbestrol -5.074 \n",
+ "96 Chlorothalonil -3.995 \n",
+ "97 2,3',4',5-PCB -6.312 \n",
+ "98 styrene oxide -1.826 \n",
+ "99 Isopropylbenzene -3.265 \n",
+ "\n",
+ " Minimum Degree Molecular Weight Number of H-Bond Donors \\\n",
+ "0 1 457.432 7 \n",
+ "1 1 201.225 1 \n",
+ "2 1 152.237 0 \n",
+ "3 2 278.354 0 \n",
+ "4 2 84.143 0 \n",
+ ".. ... ... ... \n",
+ "95 1 268.356 2 \n",
+ "96 1 265.914 0 \n",
+ "97 1 291.992 0 \n",
+ "98 2 120.151 0 \n",
+ "99 1 120.195 0 \n",
+ "\n",
+ " Number of Rings Number of Rotatable Bonds Polar Surface Area \\\n",
+ "0 3 7 202.32 \n",
+ "1 2 2 42.24 \n",
+ "2 0 4 17.07 \n",
+ "3 5 0 0.00 \n",
+ "4 1 0 0.00 \n",
+ ".. ... ... ... \n",
+ "95 2 4 40.46 \n",
+ "96 1 0 47.58 \n",
+ "97 2 1 0.00 \n",
+ "98 2 1 12.53 \n",
+ "99 1 1 0.00 \n",
+ "\n",
+ " measured log solubility in mols per litre \\\n",
+ "0 -0.77 \n",
+ "1 -3.30 \n",
+ "2 -2.06 \n",
+ "3 -7.87 \n",
+ "4 -1.33 \n",
+ ".. ... \n",
+ "95 -4.07 \n",
+ "96 -5.64 \n",
+ "97 -7.25 \n",
+ "98 -1.60 \n",
+ "99 -3.27 \n",
+ "\n",
+ " smiles \n",
+ "0 OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)... \n",
+ "1 Cc1occc1C(=O)Nc2ccccc2 \n",
+ "2 CC(C)=CCCC(C)=CC(=O) \n",
+ "3 c1ccc2c(c1)ccc3c2ccc4c5ccccc5ccc43 \n",
+ "4 c1ccsc1 \n",
+ ".. ... \n",
+ "95 CCC(=C(CC)c1ccc(O)cc1)c2ccc(O)cc2 \n",
+ "96 c1(C#N)c(Cl)c(C#N)c(Cl)c(Cl)c(Cl)1 \n",
+ "97 Clc1ccc(Cl)c(c1)c2ccc(Cl)c(Cl)c2 \n",
+ "98 C1OC1c2ccccc2 \n",
+ "99 CC(C)c1ccccc1 \n",
+ "\n",
+ "[100 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = df_full.head(n=100)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "80d9843a-a702-4da5-8a4f-6c5ed7a5034b",
+ "metadata": {},
+ "source": [
+ "## Calculating fingerprints"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "15dcb6cb-2a8e-4d62-a218-826581155816",
+ "metadata": {},
+ "source": [
+ "### Morgan binary fingerprints\n",
+ "\n",
+ "Morgan fingerprints are the most popular molecular fingerprints. They are also known as [Extended-Connectivity Fingerprints (ECFP)](https://doi.org/10.1021/ci100050t). They encode circular substructures in the molecule. The binary version contains only 0s and 1s indicating the presence or absence of the substructures in the molecule."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1a838dd7-ec21-4875-a5b8-c5e0c27d9389",
+ "metadata": {},
+ "source": [
+ "Let's define the Pipeline to first read the molecule and then calculate the binary Morgan fingerprint. Then, we execute it by calling the `transform` function."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "b6be019a-cc4d-45b2-b41a-9dca98d9644c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 181 ms, sys: 247 ms, total: 428 ms\n",
+ "Wall time: 12.6 s\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "# define the pipeline\n",
+ "pipeline_morgan = Pipeline(\n",
+ " [(\"auto2mol\", AutoToMol()), (\"morgan2_2048\", MolToMorganFP(n_bits=2048, radius=2))],\n",
+ " n_jobs=-1,\n",
+ ")\n",
+ "# execute the pipeline\n",
+ "morgan_matrix = pipeline_morgan.transform(df[\"smiles\"])\n",
+ "morgan_matrix"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a13cc430-1c5e-4399-ab50-4b56ce8a7c09",
+ "metadata": {},
+ "source": [
+ "By default, the `MolToMorganFP` element returns a sparse matrix. More specifically, a [csr_matrix](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html) is returned which is more memory efficient than a dense matrix since most elements in the matrix are zero."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d872a591-cbfe-4158-8960-da813249fd1b",
+ "metadata": {},
+ "source": [
+ "To get a dense matrix you can convert the `csr_matrix` to a dense numpy matrix like this:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "5d9d772b-98b9-42e5-ba12-11f007a3d17f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[0, 1, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " ...,\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 1, 0, ..., 0, 0, 0]])"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "morgan_matrix.todense()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "923f168d-e6e4-418d-adb3-5451555b1303",
+ "metadata": {},
+ "source": [
+ "Alternatively, you can specify in the `MolToMorganFP` element the return type of the feature matrix by using the `return_as` option. You can choose between\n",
+ "\n",
+ "- `return_as=\"sparse\"` which returns a `csr_matrix`\n",
+ "- `return_as=\"dense` which returns a dense numpy matrix\n",
+ "- `return_as=\"explicit_bit_vect\"` which returns RDKit's dense [ExplicitBitVect](https://www.rdkit.org/new_docs/cppapi/classExplicitBitVect.html)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "e728cf48-10bb-4168-9229-fe48b462ac03",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 45.4 ms, sys: 11.7 ms, total: 57 ms\n",
+ "Wall time: 62.4 ms\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([[0, 1, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " ...,\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 1, 0, ..., 0, 0, 0]], dtype=uint8)"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "pipeline_morgan_dense = Pipeline(\n",
+ " [\n",
+ " (\"auto2mol\", AutoToMol()),\n",
+ " (\"morgan2_2048\", MolToMorganFP(n_bits=2048, radius=2, return_as=\"dense\")),\n",
+ " ],\n",
+ " n_jobs=-1,\n",
+ ")\n",
+ "dense_morgan_matrix = pipeline_morgan_dense.transform(df[\"smiles\"])\n",
+ "dense_morgan_matrix"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6aecd789-2198-4325-b892-6aeecf857e25",
+ "metadata": {},
+ "source": [
+ "The feature matrix can be used to train a machine learning model but also for various analyses."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "85043b30-7476-4204-8268-a9375b2ee4f8",
+ "metadata": {},
+ "source": [
+ "### Morgan count fingerprints"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9897e96f-4ffd-434b-b629-837a31a99f04",
+ "metadata": {},
+ "source": [
+ "Just set `counted=True` to compute Morgan count fingerprints instead of binary fingerprints."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "477ebba4-0fbe-46c2-8c4a-13f9051ae85b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[0, 1, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " ...,\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 1, 0, ..., 0, 0, 0]], dtype=uint32)"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pipeline_morgan_counted = Pipeline(\n",
+ " [\n",
+ " (\"auto2mol\", AutoToMol()),\n",
+ " (\n",
+ " \"morgan2_2048\",\n",
+ " MolToMorganFP(n_bits=2048, radius=2, counted=True, return_as=\"dense\"),\n",
+ " ),\n",
+ " ],\n",
+ " n_jobs=-1,\n",
+ ")\n",
+ "count_morgan_matrix = pipeline_morgan_counted.transform(df[\"smiles\"])\n",
+ "count_morgan_matrix"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0e24ea56-f0f8-4426-b3e3-da960b93d431",
+ "metadata": {},
+ "source": [
+ "When we sort the matrix values we see that some substructures are present up to 14 times in a single molecule."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "189ea2d6-9274-4097-b654-5ca88c318abf",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[14, 13, 12, 12, 11, 10, 10, 10, 10, 10, 10, 10, 9, 9, 8, 8, 8, 8, 8, 8]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sorted(count_morgan_matrix.ravel(), reverse=True)[:20]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "80fb055a-1b4c-4c69-989c-5f3e774e80e1",
+ "metadata": {},
+ "source": [
+ "### MACCS key fingerprints\n",
+ "\n",
+ "MACCS keys are a manually defined set of 166 substructures whose presence is checked in the molecule. MACCS keys contain for example common functional groups."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "d9a11c62-c8ad-470f-b40f-f5d4ddc16b61",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 43.8 ms, sys: 1.15 ms, total: 44.9 ms\n",
+ "Wall time: 70.9 ms\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([[0, 0, 0, ..., 1, 1, 0],\n",
+ " [0, 0, 0, ..., 1, 1, 0],\n",
+ " [0, 0, 0, ..., 1, 0, 0],\n",
+ " ...,\n",
+ " [0, 0, 0, ..., 0, 1, 0],\n",
+ " [0, 0, 0, ..., 1, 1, 0],\n",
+ " [0, 0, 0, ..., 0, 1, 0]])"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "pipeline_maccs_dense = Pipeline(\n",
+ " [(\"auto2mol\", AutoToMol()), (\"maccs\", MolToMACCSFP(return_as=\"dense\"))],\n",
+ " n_jobs=-1,\n",
+ ")\n",
+ "dense_maccs_matrix = pipeline_maccs_dense.transform(df[\"smiles\"])\n",
+ "dense_maccs_matrix"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7d3546ca-6d58-4a69-a252-d7deb3147a40",
+ "metadata": {},
+ "source": [
+ "## Physicochemical features\n",
+ "\n",
+ "RDKit also provides more than 200 physicochemical descriptors that can readily be computed from most molecules. In MolPipeline we can compute these features with the `MolToRDKitPhysChem` element."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "858afb55-7e24-415d-bb5a-e0d7c811d6df",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 68.1 ms, sys: 2.43 ms, total: 70.5 ms\n",
+ "Wall time: 171 ms\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([[10.25332888, 10.25332888, 0.48660209, ..., 0. ,\n",
+ " 0. , 0. ],\n",
+ " [11.72491119, 11.72491119, 0.14587963, ..., 0. ,\n",
+ " 0. , 0. ],\n",
+ " [10.02049761, 10.02049761, 0.84508976, ..., 0. ,\n",
+ " 0. , 0. ],\n",
+ " ...,\n",
+ " [ 6.08815823, 6.08815823, 0.49556374, ..., 0. ,\n",
+ " 0. , 0. ],\n",
+ " [ 5.09453704, 5.09453704, 0.40851852, ..., 0. ,\n",
+ " 0. , 0. ],\n",
+ " [ 2.2037037 , 2.2037037 , 0.65851852, ..., 0. ,\n",
+ " 0. , 0. ]])"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "pipeline_physchem = Pipeline(\n",
+ " [(\"auto2mol\", AutoToMol()), (\"physchem\", MolToRDKitPhysChem(standardizer=None))],\n",
+ " n_jobs=-1,\n",
+ ")\n",
+ "physchem_matrix = pipeline_physchem.transform(df[\"smiles\"])\n",
+ "physchem_matrix"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8746f6cb-dc30-4435-a97b-0235f2c8c47a",
+ "metadata": {},
+ "source": [
+ "We can get the name of the descriptors like this:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "f0b5fe47-54f0-4cca-9a1a-aa689a0b2d0c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['MaxAbsEStateIndex',\n",
+ " 'MaxEStateIndex',\n",
+ " 'MinAbsEStateIndex',\n",
+ " 'MinEStateIndex',\n",
+ " 'qed',\n",
+ " 'SPS',\n",
+ " 'HeavyAtomMolWt',\n",
+ " 'ExactMolWt',\n",
+ " 'NumValenceElectrons',\n",
+ " 'NumRadicalElectrons',\n",
+ " 'MaxPartialCharge',\n",
+ " 'MinPartialCharge',\n",
+ " 'MaxAbsPartialCharge',\n",
+ " 'MinAbsPartialCharge',\n",
+ " 'FpDensityMorgan1',\n",
+ " 'FpDensityMorgan2',\n",
+ " 'FpDensityMorgan3',\n",
+ " 'BCUT2D_MWHI',\n",
+ " 'BCUT2D_MWLOW',\n",
+ " 'BCUT2D_CHGHI']"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pipeline_physchem[\"physchem\"].descriptor_list[:20]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b0823f4d-8a2e-4ae2-91f7-3db6ecaf0c0e",
+ "metadata": {},
+ "source": [
+ "When we only want to calculate a subset of all available descriptors we can specify this during pipeline construction"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "a3e005f3-f421-4634-9135-860e91a19de1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 41.2 ms, sys: 3.38 ms, total: 44.6 ms\n",
+ "Wall time: 47.5 ms\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([[430.216, 202.32 , 12. ],\n",
+ " [190.137, 42.24 , 2. ],\n",
+ " [136.109, 17.07 , 1. ],\n",
+ " [264.242, 0. , 0. ],\n",
+ " [ 80.111, 0. , 1. ],\n",
+ " [130.151, 12.89 , 2. ],\n",
+ " [321.397, 0. , 0. ],\n",
+ " [248.196, 40.46 , 2. ],\n",
+ " [372.849, 12.53 , 1. ],\n",
+ " [372.247, 63.22 , 6. ],\n",
+ " [ 78.05 , 29.1 , 1. ],\n",
+ " [155.563, 0. , 0. ],\n",
+ " [ 60.055, 0. , 0. ],\n",
+ " [204.144, 58.2 , 2. ],\n",
+ " [168.154, 0. , 0. ],\n",
+ " [ 71.486, 0. , 0. ],\n",
+ " [ 76.054, 20.23 , 1. ],\n",
+ " [ 98.084, 23.79 , 1. ],\n",
+ " [283.184, 53.47 , 6. ],\n",
+ " [148.12 , 20.23 , 1. ],\n",
+ " [321.397, 0. , 0. ],\n",
+ " [216.155, 54.86 , 3. ],\n",
+ " [243.25 , 18.46 , 5. ],\n",
+ " [166.115, 38.33 , 2. ],\n",
+ " [309.139, 115.54 , 6. ],\n",
+ " [100.076, 20.23 , 1. ],\n",
+ " [172.103, 72.68 , 5. ],\n",
+ " [196.121, 75.27 , 3. ],\n",
+ " [309.966, 0. , 0. ],\n",
+ " [140.097, 26.3 , 2. ],\n",
+ " [120.11 , 0. , 0. ],\n",
+ " [267.272, 18.46 , 5. ],\n",
+ " [284.186, 76.66 , 4. ],\n",
+ " [ 94.928, 0. , 0. ],\n",
+ " [168.154, 0. , 0. ],\n",
+ " [ 76.054, 17.07 , 1. ],\n",
+ " [158.139, 12.03 , 1. ],\n",
+ " [234.215, 29.54 , 3. ],\n",
+ " [325.266, 38.77 , 5. ],\n",
+ " [210.981, 0. , 0. ],\n",
+ " [179.585, 0. , 0. ],\n",
+ " [ 76.054, 20.23 , 1. ],\n",
+ " [160.088, 75.27 , 3. ],\n",
+ " [136.109, 20.23 , 1. ],\n",
+ " [ 80.042, 26.3 , 2. ],\n",
+ " [100.076, 20.23 , 1. ],\n",
+ " [205.998, 29.1 , 1. ],\n",
+ " [258.034, 60.91 , 4. ],\n",
+ " [328.195, 107.77 , 7. ],\n",
+ " [146.128, 12.89 , 1. ],\n",
+ " [ 96.088, 0. , 0. ],\n",
+ " [220.143, 75.27 , 3. ],\n",
+ " [216.198, 0. , 0. ],\n",
+ " [248.015, 54.86 , 3. ],\n",
+ " [356.85 , 0. , 0. ],\n",
+ " [100.076, 20.23 , 1. ],\n",
+ " [108.099, 0. , 0. ],\n",
+ " [144.132, 0. , 0. ],\n",
+ " [228.209, 0. , 0. ],\n",
+ " [ 76.054, 17.07 , 1. ],\n",
+ " [427.756, 0. , 0. ],\n",
+ " [104.064, 26.3 , 2. ],\n",
+ " [367.223, 115.06 , 6. ],\n",
+ " [102.072, 46.25 , 2. ],\n",
+ " [248.157, 90.06 , 5. ],\n",
+ " [347.692, 54.37 , 3. ],\n",
+ " [213.587, 53.94 , 5. ],\n",
+ " [118.075, 68.87 , 3. ],\n",
+ " [223.993, 72.19 , 2. ],\n",
+ " [215.038, 0. , 0. ],\n",
+ " [232.111, 118.05 , 6. ],\n",
+ " [277.042, 52.37 , 3. ],\n",
+ " [136.109, 17.07 , 1. ],\n",
+ " [232.154, 75.27 , 3. ],\n",
+ " [116.075, 26.3 , 2. ],\n",
+ " [116.075, 26.3 , 2. ],\n",
+ " [356.252, 75.71 , 4. ],\n",
+ " [250.491, 0. , 0. ],\n",
+ " [115.937, 0. , 0. ],\n",
+ " [325.09 , 49.17 , 5. ],\n",
+ " [245.177, 55.84 , 6. ],\n",
+ " [140.105, 51.56 , 4. ],\n",
+ " [ 72.092, 52.04 , 1. ],\n",
+ " [ 96.088, 0. , 0. ],\n",
+ " [120.11 , 0. , 0. ],\n",
+ " [236.74 , 0. , 0. ],\n",
+ " [428.285, 68.55 , 5. ],\n",
+ " [ 82.038, 43.14 , 2. ],\n",
+ " [136.109, 17.07 , 1. ],\n",
+ " [261.627, 45.23 , 3. ],\n",
+ " [188.977, 43.14 , 2. ],\n",
+ " [236.211, 58.2 , 3. ],\n",
+ " [192.176, 0. , 0. ],\n",
+ " [ 88.065, 9.23 , 1. ],\n",
+ " [144.132, 0. , 0. ],\n",
+ " [248.196, 40.46 , 2. ],\n",
+ " [265.914, 47.58 , 2. ],\n",
+ " [285.944, 0. , 0. ],\n",
+ " [112.087, 12.53 , 1. ],\n",
+ " [108.099, 0. , 0. ]])"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "pipeline_physchem_small = Pipeline(\n",
+ " [\n",
+ " (\"auto2mol\", AutoToMol()),\n",
+ " (\n",
+ " \"physchem\",\n",
+ " MolToRDKitPhysChem(\n",
+ " standardizer=None,\n",
+ " descriptor_list=[\"HeavyAtomMolWt\", \"TPSA\", \"NumHAcceptors\"],\n",
+ " ),\n",
+ " ),\n",
+ " ],\n",
+ " n_jobs=-1,\n",
+ ")\n",
+ "physchem_matrix_small = pipeline_physchem_small.transform(df[\"smiles\"])\n",
+ "physchem_matrix_small"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
From ccb5d59aef8b39f2b79d15ef8dddbc5e266639ad Mon Sep 17 00:00:00 2001
From: Jochen Sieg
Date: Fri, 20 Sep 2024 14:59:59 +0200
Subject: [PATCH 6/6] add counted fps to test
---
.../test_mol2morgan_fingerprint.py | 26 ++++++++++++-------
tests/utils/fingerprints.py | 4 ++-
2 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/tests/test_elements/test_mol2any/test_mol2morgan_fingerprint.py b/tests/test_elements/test_mol2any/test_mol2morgan_fingerprint.py
index 14ff2282..6fae46b4 100644
--- a/tests/test_elements/test_mol2any/test_mol2morgan_fingerprint.py
+++ b/tests/test_elements/test_mol2any/test_mol2morgan_fingerprint.py
@@ -10,6 +10,7 @@
from molpipeline import Pipeline
from molpipeline.any2mol import SmilesToMol
from molpipeline.mol2any import MolToMorganFP
+from tests.utils.fingerprints import fingerprints_to_numpy
test_smiles = [
"c1ccccc1",
@@ -129,11 +130,15 @@ def test_setter_getter_error_handling(self) -> None:
self.assertRaises(ValueError, mol_fp.set_params, **params)
def test_bit2atom_mapping(self) -> None:
- """Test that the mapping from bits to atom weights works as intended."""
- # lower n_bit values, e.g. 2048, will lead to a bit clash during folding,
- # for the test smiles "NCCOCCCC(=O)O".
- # We want no folding clashes in this test to check the correct length
- # of the bit-to-atom mapping.
+ """Test that the mapping from bits to atom weights works as intended.
+
+ Notes
+ -----
+ lower n_bit values, e.g. 2048, will lead to a bit clash during folding,
+ for the test smiles "NCCOCCCC(=O)O".
+ We want no folding clashes in this test to check the correct length
+ of the bit-to-atom mapping.
+ """
n_bits = 2100
sparse_morgan = MolToMorganFP(radius=2, n_bits=n_bits, return_as="sparse")
dense_morgan = MolToMorganFP(radius=2, n_bits=n_bits, return_as="dense")
@@ -144,10 +149,13 @@ def test_bit2atom_mapping(self) -> None:
smi2mol = SmilesToMol()
for test_smi in test_smiles:
for fp_gen in [sparse_morgan, dense_morgan, explicit_bit_vect_morgan]:
- mol = smi2mol.transform([test_smi])[0]
- fp = fp_gen.transform([mol])
- mapping = fp_gen.bit2atom_mapping(mol)
- self.assertEqual(np.sum(fp), len(mapping)) # type: ignore
+ for counted in [False, True]:
+ mol = smi2mol.transform([test_smi])[0]
+ fp_gen.set_params(counted=counted)
+ fp = fp_gen.transform([mol])
+ mapping = fp_gen.bit2atom_mapping(mol)
+ np_fp = fingerprints_to_numpy(fp)
+ self.assertEqual(np.nonzero(np_fp)[0].shape[0], len(mapping)) # type: ignore
if __name__ == "__main__":
diff --git a/tests/utils/fingerprints.py b/tests/utils/fingerprints.py
index 5973d004..1ca392a4 100644
--- a/tests/utils/fingerprints.py
+++ b/tests/utils/fingerprints.py
@@ -8,7 +8,7 @@
# pylint: disable=no-name-in-module
from rdkit.Chem import rdFingerprintGenerator as rdkit_fp
-from rdkit.DataStructs import ExplicitBitVect
+from rdkit.DataStructs import ExplicitBitVect, UIntSparseIntVect
from scipy import sparse
@@ -59,6 +59,8 @@ def fingerprints_to_numpy(
"""
if all(isinstance(fp, ExplicitBitVect) for fp in fingerprints):
return np.array(fingerprints)
+ if all(isinstance(fp, UIntSparseIntVect) for fp in fingerprints):
+ return np.array([fp.ToList() for fp in fingerprints])
if isinstance(fingerprints, sparse.csr_matrix):
return fingerprints.toarray()
if isinstance(fingerprints, np.ndarray):