-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add graph_descriptors * update doc * prepare release * fix version * prepare release * prepare release 0.0.8 * update Molecule subpackage * update reaction cleaning * add graph fingerprint * update graph signature * add nx_to_gml function * update format package, transforming gml to nx and reverse; transformaing mol to nx and reverse * format * add new features, prepare release * fix format
- Loading branch information
1 parent
c435122
commit fa4868b
Showing
44 changed files
with
2,476 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import unittest | ||
from rdkit import Chem | ||
from synutility.SynChem.Molecule.standardize import ( | ||
normalize_molecule, | ||
canonicalize_tautomer, | ||
salts_remover, | ||
uncharge_molecule, | ||
fragments_remover, | ||
remove_explicit_hydrogens, | ||
remove_radicals_and_add_hydrogens, | ||
remove_isotopes, | ||
clear_stereochemistry, | ||
) | ||
|
||
|
||
class TestMoleculeFunctions(unittest.TestCase): | ||
|
||
def test_normalize_molecule(self): | ||
smi = "[Na]OC(=O)c1ccc(C[S+2]([O-])([O-]))cc1" | ||
expect = "O=C(O[Na])c1ccc(C[S](=O)=O)cc1" | ||
mol = Chem.MolFromSmiles(smi) | ||
normalized_mol = normalize_molecule(mol) | ||
self.assertIsInstance(normalized_mol, Chem.Mol) | ||
self.assertEqual(expect, Chem.MolToSmiles(normalized_mol)) | ||
|
||
def test_canonicalize_tautomer(self): | ||
smi = "N=c1[nH]cc[nH]1" | ||
expect = "Nc1ncc[nH]1" | ||
mol = Chem.MolFromSmiles(smi) | ||
tautomer = canonicalize_tautomer(mol) | ||
self.assertIsInstance(tautomer, Chem.Mol) | ||
self.assertEqual(expect, Chem.MolToSmiles(tautomer)) | ||
|
||
def test_salts_remover(self): | ||
smi = "CC(=O).[Na+]" | ||
expect = "CC=O" | ||
mol = Chem.MolFromSmiles(smi) | ||
remover = salts_remover(mol) | ||
self.assertIsInstance(remover, Chem.Mol) | ||
self.assertEqual(expect, Chem.MolToSmiles(remover)) | ||
|
||
def test_uncharge_molecule(self): | ||
smi = "CC(=O)[O-]" | ||
expect = "CC(=O)O" | ||
mol = Chem.MolFromSmiles(smi) | ||
uncharged_mol = uncharge_molecule(mol) | ||
self.assertIsInstance(uncharged_mol, Chem.Mol) | ||
self.assertEqual(expect, Chem.MolToSmiles(uncharged_mol)) | ||
|
||
def test_fragments_remover(self): | ||
smi = "CC(=O)[O-].[Na+]" | ||
expect = "CC(=O)[O-]" | ||
mol = Chem.MolFromSmiles(smi) | ||
remover = fragments_remover(mol) | ||
self.assertIsInstance(remover, Chem.Mol) | ||
self.assertEqual(expect, Chem.MolToSmiles(remover)) | ||
|
||
def test_remove_explicit_hydrogens(self): | ||
smi = "[CH4]" | ||
expect = "C" | ||
mol = Chem.MolFromSmiles(smi) | ||
remover = remove_explicit_hydrogens(mol) | ||
self.assertIsInstance(remover, Chem.Mol) | ||
self.assertEqual(expect, Chem.MolToSmiles(remover)) | ||
|
||
def test_remove_radicals(self): | ||
smi = "[CH3]" | ||
expect = "C" | ||
mol = Chem.MolFromSmiles(smi) | ||
remover = remove_radicals_and_add_hydrogens(mol) | ||
self.assertIsInstance(remover, Chem.Mol) | ||
self.assertEqual(expect, Chem.MolToSmiles(remover)) | ||
|
||
def test_remove_isotopes(self): | ||
# Molecule with isotopic labeling | ||
smiles = "[13CH3]C([2H])([2H])[17O][18OH]" | ||
expect = "[H]C([H])(C)OO" | ||
mol = Chem.MolFromSmiles(smiles) | ||
result_mol = remove_isotopes(mol) | ||
for atom in result_mol.GetAtoms(): | ||
self.assertEqual(atom.GetIsotope(), 0, "Isotopes not properly removed") | ||
self.assertEqual(Chem.MolToSmiles(result_mol), expect) | ||
|
||
def test_clear_stereochemistry(self): | ||
# Molecule with defined stereochemistry | ||
smiles = "C[C@H](O)[C@@H](O)C" | ||
mol = Chem.MolFromSmiles(smiles) | ||
result_mol = clear_stereochemistry(mol) | ||
has_stereo = any(atom.HasProp("_CIPCode") for atom in result_mol.GetAtoms()) | ||
self.assertFalse(has_stereo, "Stereochemistry not properly cleared") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import unittest | ||
from synutility.SynChem.Reaction.cleanning import Cleanning | ||
|
||
|
||
class TestCleaning(unittest.TestCase): | ||
|
||
def setUp(self): | ||
self.cleaner = Cleanning() | ||
|
||
def test_remove_duplicates(self): | ||
input_smiles = ["CC>>CC", "CC>>CC"] | ||
expected_output = ["CC>>CC"] | ||
result = self.cleaner.remove_duplicates(input_smiles) | ||
self.assertEqual( | ||
result, expected_output, "Failed to remove duplicates correctly" | ||
) | ||
|
||
def test_clean_smiles(self): | ||
input_smiles = ["CC>>CC", "CC>>CC", "CC>>CCC"] | ||
expected_output = ["CC>>CC"] # Assuming 'CC>>CCC' is not balanced | ||
result = self.cleaner.clean_smiles(input_smiles) | ||
self.assertEqual(result, expected_output, "Failed to clean SMILES correctly") | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
Empty file.
2 changes: 1 addition & 1 deletion
2
Test/SynGraph/test_graph_descriptors.py → ...raph/Descriptor/test_graph_descriptors.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import unittest | ||
from synutility.SynIO.data_type import load_from_pickle | ||
from synutility.SynGraph.Descriptor.graph_signature import GraphSignature | ||
|
||
|
||
class TestGraphSignature(unittest.TestCase): | ||
|
||
def setUp(self): | ||
# Create a sample graph for testing | ||
data = load_from_pickle("Data/test.pkl.gz") | ||
self.rc = data[0]["GraphRules"][2] | ||
self.its = data[0]["ITSGraph"][2] | ||
|
||
def test_create_topology_signature(self): | ||
signature = GraphSignature(self.rc) | ||
self.assertEqual(signature.create_topology_signature(), "114") | ||
|
||
def test_create_node_signature(self): | ||
signature = GraphSignature(self.rc) | ||
self.assertEqual(signature.create_node_signature(), "BrCHN") | ||
|
||
def test_create_node_signature_condensed(self): | ||
signature = GraphSignature(self.its) | ||
self.assertEqual(signature.create_node_signature(), "BrC{23}ClHN{3}O{5}S") | ||
|
||
def test_create_edge_signature(self): | ||
signature = GraphSignature(self.rc) | ||
self.assertEqual( | ||
signature.create_edge_signature(), "Br[-1]H/Br[1]C/C[-1]N/H[1]N" | ||
) | ||
|
||
def test_create_graph_signature(self): | ||
# Ensure the graph signature combines the results correctly | ||
signature = GraphSignature(self.rc) | ||
node_signature = "BrCHN" | ||
edge_signature = "Br[-1]H/Br[1]C/C[-1]N/H[1]N" | ||
topo_signature = "114" | ||
expected = f"{topo_signature}.{node_signature}.{edge_signature}" | ||
self.assertEqual(signature.create_graph_signature(), expected) | ||
|
||
|
||
# Running the tests | ||
if __name__ == "__main__": | ||
unittest.main() |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import unittest | ||
import networkx as nx | ||
from synutility.SynGraph.Fingerprint.graph_fps import GraphFP | ||
|
||
|
||
class TestGraphFP(unittest.TestCase): | ||
|
||
def setUp(self): | ||
"""Set up a test graph for use in all test cases.""" | ||
self.graph = nx.gnp_random_graph(10, 0.5, seed=42) | ||
self.nBits = 512 | ||
self.hash_alg = "sha256" | ||
self.fp_class = GraphFP( | ||
graph=self.graph, nBits=self.nBits, hash_alg=self.hash_alg | ||
) | ||
|
||
def test_spectrum_fp(self): | ||
"""Test the spectrum-based fingerprint generation.""" | ||
fingerprint = self.fp_class.fingerprint("spectrum") | ||
self.assertEqual(len(fingerprint), self.nBits) | ||
self.assertTrue(isinstance(fingerprint, str)) | ||
|
||
def test_adjacency_fp(self): | ||
"""Test the adjacency matrix-based fingerprint generation.""" | ||
fingerprint = self.fp_class.fingerprint("adjacency") | ||
self.assertEqual(len(fingerprint), self.nBits) | ||
self.assertTrue(isinstance(fingerprint, str)) | ||
|
||
def test_degree_sequence_fp(self): | ||
"""Test the degree sequence-based fingerprint generation.""" | ||
fingerprint = self.fp_class.fingerprint("degree") | ||
self.assertEqual(len(fingerprint), self.nBits) | ||
self.assertTrue(isinstance(fingerprint, str)) | ||
|
||
def test_motif_count_fp(self): | ||
"""Test the motif count-based fingerprint generation.""" | ||
fingerprint = self.fp_class.fingerprint("motif") | ||
self.assertEqual(len(fingerprint), self.nBits) | ||
self.assertTrue(isinstance(fingerprint, str)) | ||
|
||
def test_iterative_deepening(self): | ||
"""Test the iterative deepening method.""" | ||
short_fingerprint = "1010101010101010" | ||
remaining_bits = self.nBits - len(short_fingerprint) | ||
extended_fingerprint = self.fp_class.iterative_deepening(remaining_bits) | ||
self.assertEqual(len(extended_fingerprint), remaining_bits) | ||
self.assertTrue(isinstance(extended_fingerprint, str)) | ||
|
||
def test_fingerprint_length(self): | ||
"""Test that each method produces a fingerprint of exactly nBits.""" | ||
methods = ["spectrum", "adjacency", "degree", "motif"] | ||
for method in methods: | ||
with self.subTest(method=method): | ||
fingerprint = self.fp_class.fingerprint(method) | ||
self.assertEqual(len(fingerprint), self.nBits) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import unittest | ||
import networkx as nx | ||
from synutility.SynGraph.Fingerprint.hash_fps import HashFPs | ||
from synutility.SynIO.data_type import load_from_pickle | ||
|
||
|
||
class TestHashFPs(unittest.TestCase): | ||
def setUp(self): | ||
"""Set up a simple graph for testing.""" | ||
self.graph = nx.cycle_graph(4) # Simple cycle graph with 4 nodes | ||
self.hasher = HashFPs(self.graph, numBits=128, hash_alg="sha256") | ||
|
||
def test_hash_fps_default(self): | ||
"""Test the default hash generation without specifying start or end nodes.""" | ||
result = self.hasher.hash_fps() | ||
self.assertEqual(len(result), 128) | ||
self.assertIsInstance(result, str) | ||
self.assertTrue(all(c in "01" for c in result), "Hash must be binary") | ||
|
||
def test_hash_fps_path_specified(self): | ||
"""Test hash generation with specified start and end nodes.""" | ||
result = self.hasher.hash_fps(start_node=0, end_node=1) | ||
self.assertEqual(len(result), 128) | ||
self.assertTrue(all(c in "01" for c in result), "Hash must be binary") | ||
|
||
def test_hash_fps_invalid_hash_algorithm(self): | ||
"""Test initialization with an invalid hash algorithm.""" | ||
with self.assertRaises(ValueError): | ||
HashFPs(self.graph, numBits=128, hash_alg="invalid256") | ||
|
||
def test_hash_fps_negative_numBits(self): | ||
"""Test initialization with negative numBits.""" | ||
with self.assertRaises(ValueError): | ||
HashFPs(self.graph, numBits=-1, hash_alg="sha256") | ||
|
||
def test_hash_fps_large_numBits(self): | ||
"""Test hash generation with a large numBits.""" | ||
large_hasher = HashFPs(self.graph, numBits=1024, hash_alg="sha512") | ||
result = large_hasher.hash_fps() | ||
self.assertEqual(len(result), 1024) | ||
self.assertTrue(all(c in "01" for c in result), "Hash must be binary") | ||
|
||
def test_fps_rc(self): | ||
data = load_from_pickle("Data/test.pkl.gz") | ||
graph = data[0]["GraphRules"][2] | ||
hasher = HashFPs(graph, numBits=1024, hash_alg="sha256") | ||
result = hasher.hash_fps() | ||
self.assertEqual(len(result), 1024) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import unittest | ||
import networkx as nx | ||
from synutility.SynGraph.Fingerprint.morgan_fps import MorganFPs | ||
from synutility.SynIO.data_type import load_from_pickle | ||
|
||
|
||
class TestMorganFPs(unittest.TestCase): | ||
def setUp(self): | ||
self.graph = nx.cycle_graph(5) # Creates a cycle graph for testing | ||
self.morgan_fps = MorganFPs(self.graph, radius=2, nBits=128, hash_alg="sha256") | ||
|
||
def test_fingerprint_length(self): | ||
"""Test that the fingerprint is exactly the specified bit length.""" | ||
fingerprint = self.morgan_fps.generate_fingerprint() | ||
self.assertEqual(len(fingerprint), 128) | ||
|
||
def test_fingerprint_consistency(self): | ||
"""Test that the same graph with the same parameters produces the same fingerprint.""" | ||
fingerprint1 = self.morgan_fps.generate_fingerprint() | ||
fingerprint2 = self.morgan_fps.generate_fingerprint() | ||
self.assertEqual(fingerprint1, fingerprint2) | ||
|
||
def test_fingerprint_variation_with_radius(self): | ||
"""Test that changing the radius changes the fingerprint.""" | ||
new_morgan_fps = MorganFPs(self.graph, radius=1, nBits=128, hash_alg="sha256") | ||
fingerprint1 = self.morgan_fps.generate_fingerprint() | ||
fingerprint2 = new_morgan_fps.generate_fingerprint() | ||
self.assertNotEqual(fingerprint1, fingerprint2) | ||
|
||
def test_fps_rc(self): | ||
data = load_from_pickle("Data/test.pkl.gz") | ||
graph = data[0]["GraphRules"][2] | ||
hasher = MorganFPs(graph, radius=3, nBits=1024, hash_alg="sha256") | ||
result = hasher.generate_fingerprint() | ||
self.assertEqual(len(result), 1024) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import unittest | ||
import networkx as nx | ||
from synutility.SynGraph.Fingerprint.path_fps import PathFPs | ||
from synutility.SynIO.data_type import load_from_pickle | ||
|
||
|
||
class TestPathFPs(unittest.TestCase): | ||
def setUp(self): | ||
self.graph = nx.path_graph(5) # Creates a simple path graph | ||
self.path_fps = PathFPs(self.graph, max_length=3, nBits=64, hash_alg="sha256") | ||
|
||
def test_fingerprint_length(self): | ||
"""Test that the fingerprint has the exact length specified by nBits.""" | ||
fingerprint = self.path_fps.generate_fingerprint() | ||
self.assertEqual(len(fingerprint), 64) | ||
|
||
def test_fingerprint_consistency(self): | ||
"""Test that the same graph with the same parameters produces the same | ||
fingerprint.""" | ||
fingerprint1 = self.path_fps.generate_fingerprint() | ||
fingerprint2 = self.path_fps.generate_fingerprint() | ||
self.assertEqual(fingerprint1, fingerprint2) | ||
|
||
def test_fingerprint_variation(self): | ||
"""Test that changing the parameters changes the fingerprint.""" | ||
new_path_fps = PathFPs(self.graph, max_length=4, nBits=128, hash_alg="sha256") | ||
fingerprint1 = self.path_fps.generate_fingerprint() | ||
fingerprint2 = new_path_fps.generate_fingerprint() | ||
self.assertNotEqual(fingerprint1, fingerprint2) | ||
|
||
def test_fps_rc(self): | ||
data = load_from_pickle("Data/test.pkl.gz") | ||
graph = data[0]["GraphRules"][2] | ||
hasher = PathFPs(graph, max_length=5, nBits=1024, hash_alg="sha256") | ||
result = hasher.generate_fingerprint() | ||
self.assertEqual(len(result), 1024) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
Empty file.
Oops, something went wrong.