From 21b1f101e4947ffb880427540b34074399809ecf Mon Sep 17 00:00:00 2001 From: JochenSiegWork <135010976+JochenSiegWork@users.noreply.github.com> Date: Fri, 27 Sep 2024 11:59:26 +0200 Subject: [PATCH] mol2morgan_fingerprint: fix bug of mismatched indices after folding (#93) --- molpipeline/mol2any/mol2morgan_fingerprint.py | 5 ++--- .../test_mol2any/test_mol2morgan_fingerprint.py | 12 ++---------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/molpipeline/mol2any/mol2morgan_fingerprint.py b/molpipeline/mol2any/mol2morgan_fingerprint.py index 79fa46c1..36fca193 100644 --- a/molpipeline/mol2any/mol2morgan_fingerprint.py +++ b/molpipeline/mol2any/mol2morgan_fingerprint.py @@ -154,8 +154,7 @@ def _explain_rdmol(self, mol_obj: RDKitMol) -> dict[int, list[tuple[int, int]]]: fp_generator = self._get_fp_generator() additional_output = AllChem.AdditionalOutput() additional_output.AllocateBitInfoMap() - _ = fp_generator.GetSparseFingerprint( - mol_obj, additionalOutput=additional_output - ) + # using the dense fingerprint here, to get indices after folding + _ = fp_generator.GetFingerprint(mol_obj, additionalOutput=additional_output) bit_info = additional_output.GetBitInfoMap() return bit_info diff --git a/tests/test_elements/test_mol2any/test_mol2morgan_fingerprint.py b/tests/test_elements/test_mol2any/test_mol2morgan_fingerprint.py index 6fae46b4..2b765949 100644 --- a/tests/test_elements/test_mol2any/test_mol2morgan_fingerprint.py +++ b/tests/test_elements/test_mol2any/test_mol2morgan_fingerprint.py @@ -130,16 +130,8 @@ def test_setter_getter_error_handling(self) -> None: self.assertRaises(ValueError, mol_fp.set_params, **params) def test_bit2atom_mapping(self) -> None: - """Test that the mapping from bits to atom weights works as intended. - - Notes - ----- - lower n_bit values, e.g. 2048, will lead to a bit clash during folding, - for the test smiles "NCCOCCCC(=O)O". - We want no folding clashes in this test to check the correct length - of the bit-to-atom mapping. - """ - n_bits = 2100 + """Test that the mapping from bits to atom weights works as intended.""" + n_bits = 2048 sparse_morgan = MolToMorganFP(radius=2, n_bits=n_bits, return_as="sparse") dense_morgan = MolToMorganFP(radius=2, n_bits=n_bits, return_as="dense") explicit_bit_vect_morgan = MolToMorganFP(