Skip to content

Commit

Permalink
Prepare release (#10)
Browse files Browse the repository at this point in the history
* add graph_descriptors

* update doc

* prepare release

* fix version

* prepare release

* prepare release 0.0.8

* update Molecule subpackage

* update reaction cleaning

* add graph fingerprint

* update graph signature

* add nx_to_gml function

* update format package, transforming gml to nx and reverse; transformaing mol to nx and reverse

* format

* add new features, prepare release

* fix format
  • Loading branch information
TieuLongPhan authored Nov 4, 2024
1 parent c435122 commit fa4868b
Show file tree
Hide file tree
Showing 44 changed files with 2,476 additions and 15 deletions.
8 changes: 1 addition & 7 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,5 @@
*.csv
*/catboost_info/*
*.ipynb
test.py
rebalance_test.py
split_comparison.py
fp.py

*.json
split_benchmark_process.py
synutility/SynChem/Reaction/misc.py
test_mod.py
Binary file added Data/test.pkl.gz
Binary file not shown.
Empty file.
90 changes: 90 additions & 0 deletions Test/SynChem/Molecule/test_standardize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import unittest
from rdkit import Chem
from synutility.SynChem.Molecule.standardize import (
normalize_molecule,
canonicalize_tautomer,
salts_remover,
uncharge_molecule,
fragments_remover,
remove_explicit_hydrogens,
remove_radicals_and_add_hydrogens,
remove_isotopes,
clear_stereochemistry,
)


class TestMoleculeFunctions(unittest.TestCase):

def test_normalize_molecule(self):
smi = "[Na]OC(=O)c1ccc(C[S+2]([O-])([O-]))cc1"
expect = "O=C(O[Na])c1ccc(C[S](=O)=O)cc1"
mol = Chem.MolFromSmiles(smi)
normalized_mol = normalize_molecule(mol)
self.assertIsInstance(normalized_mol, Chem.Mol)
self.assertEqual(expect, Chem.MolToSmiles(normalized_mol))

def test_canonicalize_tautomer(self):
smi = "N=c1[nH]cc[nH]1"
expect = "Nc1ncc[nH]1"
mol = Chem.MolFromSmiles(smi)
tautomer = canonicalize_tautomer(mol)
self.assertIsInstance(tautomer, Chem.Mol)
self.assertEqual(expect, Chem.MolToSmiles(tautomer))

def test_salts_remover(self):
smi = "CC(=O).[Na+]"
expect = "CC=O"
mol = Chem.MolFromSmiles(smi)
remover = salts_remover(mol)
self.assertIsInstance(remover, Chem.Mol)
self.assertEqual(expect, Chem.MolToSmiles(remover))

def test_uncharge_molecule(self):
smi = "CC(=O)[O-]"
expect = "CC(=O)O"
mol = Chem.MolFromSmiles(smi)
uncharged_mol = uncharge_molecule(mol)
self.assertIsInstance(uncharged_mol, Chem.Mol)
self.assertEqual(expect, Chem.MolToSmiles(uncharged_mol))

def test_fragments_remover(self):
smi = "CC(=O)[O-].[Na+]"
expect = "CC(=O)[O-]"
mol = Chem.MolFromSmiles(smi)
remover = fragments_remover(mol)
self.assertIsInstance(remover, Chem.Mol)
self.assertEqual(expect, Chem.MolToSmiles(remover))

def test_remove_explicit_hydrogens(self):
smi = "[CH4]"
expect = "C"
mol = Chem.MolFromSmiles(smi)
remover = remove_explicit_hydrogens(mol)
self.assertIsInstance(remover, Chem.Mol)
self.assertEqual(expect, Chem.MolToSmiles(remover))

def test_remove_radicals(self):
smi = "[CH3]"
expect = "C"
mol = Chem.MolFromSmiles(smi)
remover = remove_radicals_and_add_hydrogens(mol)
self.assertIsInstance(remover, Chem.Mol)
self.assertEqual(expect, Chem.MolToSmiles(remover))

def test_remove_isotopes(self):
# Molecule with isotopic labeling
smiles = "[13CH3]C([2H])([2H])[17O][18OH]"
expect = "[H]C([H])(C)OO"
mol = Chem.MolFromSmiles(smiles)
result_mol = remove_isotopes(mol)
for atom in result_mol.GetAtoms():
self.assertEqual(atom.GetIsotope(), 0, "Isotopes not properly removed")
self.assertEqual(Chem.MolToSmiles(result_mol), expect)

def test_clear_stereochemistry(self):
# Molecule with defined stereochemistry
smiles = "C[C@H](O)[C@@H](O)C"
mol = Chem.MolFromSmiles(smiles)
result_mol = clear_stereochemistry(mol)
has_stereo = any(atom.HasProp("_CIPCode") for atom in result_mol.GetAtoms())
self.assertFalse(has_stereo, "Stereochemistry not properly cleared")
26 changes: 26 additions & 0 deletions Test/SynChem/Reaction/test_cleanning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import unittest
from synutility.SynChem.Reaction.cleanning import Cleanning


class TestCleaning(unittest.TestCase):

def setUp(self):
self.cleaner = Cleanning()

def test_remove_duplicates(self):
input_smiles = ["CC>>CC", "CC>>CC"]
expected_output = ["CC>>CC"]
result = self.cleaner.remove_duplicates(input_smiles)
self.assertEqual(
result, expected_output, "Failed to remove duplicates correctly"
)

def test_clean_smiles(self):
input_smiles = ["CC>>CC", "CC>>CC", "CC>>CCC"]
expected_output = ["CC>>CC"] # Assuming 'CC>>CCC' is not balanced
result = self.cleaner.clean_smiles(input_smiles)
self.assertEqual(result, expected_output, "Failed to clean SMILES correctly")


if __name__ == "__main__":
unittest.main()
Empty file.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest
import networkx as nx
from synutility.SynGraph.graph_descriptors import GraphDescriptor
from synutility.SynGraph.Descriptor.graph_descriptors import GraphDescriptor


class TestGraphDescriptor(unittest.TestCase):
Expand Down
44 changes: 44 additions & 0 deletions Test/SynGraph/Descriptor/test_graph_signature.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import unittest
from synutility.SynIO.data_type import load_from_pickle
from synutility.SynGraph.Descriptor.graph_signature import GraphSignature


class TestGraphSignature(unittest.TestCase):

def setUp(self):
# Create a sample graph for testing
data = load_from_pickle("Data/test.pkl.gz")
self.rc = data[0]["GraphRules"][2]
self.its = data[0]["ITSGraph"][2]

def test_create_topology_signature(self):
signature = GraphSignature(self.rc)
self.assertEqual(signature.create_topology_signature(), "114")

def test_create_node_signature(self):
signature = GraphSignature(self.rc)
self.assertEqual(signature.create_node_signature(), "BrCHN")

def test_create_node_signature_condensed(self):
signature = GraphSignature(self.its)
self.assertEqual(signature.create_node_signature(), "BrC{23}ClHN{3}O{5}S")

def test_create_edge_signature(self):
signature = GraphSignature(self.rc)
self.assertEqual(
signature.create_edge_signature(), "Br[-1]H/Br[1]C/C[-1]N/H[1]N"
)

def test_create_graph_signature(self):
# Ensure the graph signature combines the results correctly
signature = GraphSignature(self.rc)
node_signature = "BrCHN"
edge_signature = "Br[-1]H/Br[1]C/C[-1]N/H[1]N"
topo_signature = "114"
expected = f"{topo_signature}.{node_signature}.{edge_signature}"
self.assertEqual(signature.create_graph_signature(), expected)


# Running the tests
if __name__ == "__main__":
unittest.main()
Empty file.
59 changes: 59 additions & 0 deletions Test/SynGraph/Fingerprint/test_graph_fps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import unittest
import networkx as nx
from synutility.SynGraph.Fingerprint.graph_fps import GraphFP


class TestGraphFP(unittest.TestCase):

def setUp(self):
"""Set up a test graph for use in all test cases."""
self.graph = nx.gnp_random_graph(10, 0.5, seed=42)
self.nBits = 512
self.hash_alg = "sha256"
self.fp_class = GraphFP(
graph=self.graph, nBits=self.nBits, hash_alg=self.hash_alg
)

def test_spectrum_fp(self):
"""Test the spectrum-based fingerprint generation."""
fingerprint = self.fp_class.fingerprint("spectrum")
self.assertEqual(len(fingerprint), self.nBits)
self.assertTrue(isinstance(fingerprint, str))

def test_adjacency_fp(self):
"""Test the adjacency matrix-based fingerprint generation."""
fingerprint = self.fp_class.fingerprint("adjacency")
self.assertEqual(len(fingerprint), self.nBits)
self.assertTrue(isinstance(fingerprint, str))

def test_degree_sequence_fp(self):
"""Test the degree sequence-based fingerprint generation."""
fingerprint = self.fp_class.fingerprint("degree")
self.assertEqual(len(fingerprint), self.nBits)
self.assertTrue(isinstance(fingerprint, str))

def test_motif_count_fp(self):
"""Test the motif count-based fingerprint generation."""
fingerprint = self.fp_class.fingerprint("motif")
self.assertEqual(len(fingerprint), self.nBits)
self.assertTrue(isinstance(fingerprint, str))

def test_iterative_deepening(self):
"""Test the iterative deepening method."""
short_fingerprint = "1010101010101010"
remaining_bits = self.nBits - len(short_fingerprint)
extended_fingerprint = self.fp_class.iterative_deepening(remaining_bits)
self.assertEqual(len(extended_fingerprint), remaining_bits)
self.assertTrue(isinstance(extended_fingerprint, str))

def test_fingerprint_length(self):
"""Test that each method produces a fingerprint of exactly nBits."""
methods = ["spectrum", "adjacency", "degree", "motif"]
for method in methods:
with self.subTest(method=method):
fingerprint = self.fp_class.fingerprint(method)
self.assertEqual(len(fingerprint), self.nBits)


if __name__ == "__main__":
unittest.main()
52 changes: 52 additions & 0 deletions Test/SynGraph/Fingerprint/test_hash_fps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import unittest
import networkx as nx
from synutility.SynGraph.Fingerprint.hash_fps import HashFPs
from synutility.SynIO.data_type import load_from_pickle


class TestHashFPs(unittest.TestCase):
def setUp(self):
"""Set up a simple graph for testing."""
self.graph = nx.cycle_graph(4) # Simple cycle graph with 4 nodes
self.hasher = HashFPs(self.graph, numBits=128, hash_alg="sha256")

def test_hash_fps_default(self):
"""Test the default hash generation without specifying start or end nodes."""
result = self.hasher.hash_fps()
self.assertEqual(len(result), 128)
self.assertIsInstance(result, str)
self.assertTrue(all(c in "01" for c in result), "Hash must be binary")

def test_hash_fps_path_specified(self):
"""Test hash generation with specified start and end nodes."""
result = self.hasher.hash_fps(start_node=0, end_node=1)
self.assertEqual(len(result), 128)
self.assertTrue(all(c in "01" for c in result), "Hash must be binary")

def test_hash_fps_invalid_hash_algorithm(self):
"""Test initialization with an invalid hash algorithm."""
with self.assertRaises(ValueError):
HashFPs(self.graph, numBits=128, hash_alg="invalid256")

def test_hash_fps_negative_numBits(self):
"""Test initialization with negative numBits."""
with self.assertRaises(ValueError):
HashFPs(self.graph, numBits=-1, hash_alg="sha256")

def test_hash_fps_large_numBits(self):
"""Test hash generation with a large numBits."""
large_hasher = HashFPs(self.graph, numBits=1024, hash_alg="sha512")
result = large_hasher.hash_fps()
self.assertEqual(len(result), 1024)
self.assertTrue(all(c in "01" for c in result), "Hash must be binary")

def test_fps_rc(self):
data = load_from_pickle("Data/test.pkl.gz")
graph = data[0]["GraphRules"][2]
hasher = HashFPs(graph, numBits=1024, hash_alg="sha256")
result = hasher.hash_fps()
self.assertEqual(len(result), 1024)


if __name__ == "__main__":
unittest.main()
39 changes: 39 additions & 0 deletions Test/SynGraph/Fingerprint/test_morgan_fps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import unittest
import networkx as nx
from synutility.SynGraph.Fingerprint.morgan_fps import MorganFPs
from synutility.SynIO.data_type import load_from_pickle


class TestMorganFPs(unittest.TestCase):
def setUp(self):
self.graph = nx.cycle_graph(5) # Creates a cycle graph for testing
self.morgan_fps = MorganFPs(self.graph, radius=2, nBits=128, hash_alg="sha256")

def test_fingerprint_length(self):
"""Test that the fingerprint is exactly the specified bit length."""
fingerprint = self.morgan_fps.generate_fingerprint()
self.assertEqual(len(fingerprint), 128)

def test_fingerprint_consistency(self):
"""Test that the same graph with the same parameters produces the same fingerprint."""
fingerprint1 = self.morgan_fps.generate_fingerprint()
fingerprint2 = self.morgan_fps.generate_fingerprint()
self.assertEqual(fingerprint1, fingerprint2)

def test_fingerprint_variation_with_radius(self):
"""Test that changing the radius changes the fingerprint."""
new_morgan_fps = MorganFPs(self.graph, radius=1, nBits=128, hash_alg="sha256")
fingerprint1 = self.morgan_fps.generate_fingerprint()
fingerprint2 = new_morgan_fps.generate_fingerprint()
self.assertNotEqual(fingerprint1, fingerprint2)

def test_fps_rc(self):
data = load_from_pickle("Data/test.pkl.gz")
graph = data[0]["GraphRules"][2]
hasher = MorganFPs(graph, radius=3, nBits=1024, hash_alg="sha256")
result = hasher.generate_fingerprint()
self.assertEqual(len(result), 1024)


if __name__ == "__main__":
unittest.main()
40 changes: 40 additions & 0 deletions Test/SynGraph/Fingerprint/test_path_fps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import unittest
import networkx as nx
from synutility.SynGraph.Fingerprint.path_fps import PathFPs
from synutility.SynIO.data_type import load_from_pickle


class TestPathFPs(unittest.TestCase):
def setUp(self):
self.graph = nx.path_graph(5) # Creates a simple path graph
self.path_fps = PathFPs(self.graph, max_length=3, nBits=64, hash_alg="sha256")

def test_fingerprint_length(self):
"""Test that the fingerprint has the exact length specified by nBits."""
fingerprint = self.path_fps.generate_fingerprint()
self.assertEqual(len(fingerprint), 64)

def test_fingerprint_consistency(self):
"""Test that the same graph with the same parameters produces the same
fingerprint."""
fingerprint1 = self.path_fps.generate_fingerprint()
fingerprint2 = self.path_fps.generate_fingerprint()
self.assertEqual(fingerprint1, fingerprint2)

def test_fingerprint_variation(self):
"""Test that changing the parameters changes the fingerprint."""
new_path_fps = PathFPs(self.graph, max_length=4, nBits=128, hash_alg="sha256")
fingerprint1 = self.path_fps.generate_fingerprint()
fingerprint2 = new_path_fps.generate_fingerprint()
self.assertNotEqual(fingerprint1, fingerprint2)

def test_fps_rc(self):
data = load_from_pickle("Data/test.pkl.gz")
graph = data[0]["GraphRules"][2]
hasher = PathFPs(graph, max_length=5, nBits=1024, hash_alg="sha256")
result = hasher.generate_fingerprint()
self.assertEqual(len(result), 1024)


if __name__ == "__main__":
unittest.main()
Empty file added Test/SynIO/Format/__init__.py
Empty file.
Loading

0 comments on commit fa4868b

Please sign in to comment.