-
Notifications
You must be signed in to change notification settings - Fork 4
/
smiles_checker.py
60 lines (44 loc) · 1.48 KB
/
smiles_checker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from rdkit import Chem
import argparse
import warnings
warnings.filterwarnings(action='ignore')
parser = argparse.ArgumentParser(description='smiles checker arguments')
parser.add_argument(
'--sf',
type=str,
default="input/test_id_smiles.csv",
metavar='SF',
help='the name of smiles file (default: input/test_id_smiles.csv)')
def read_smiles(FileName):
smilesCompoundDict = {}
c = 0
with open(FileName) as f:
lines = f.readlines()
for line in lines:
if c == 0:
c += 1
continue
compound_smile_pair = line.rstrip('\n').split(",")
smilesCompoundDict[compound_smile_pair[0]] = compound_smile_pair[1]
return smilesCompoundDict
if __name__ == '__main__':
args = parser.parse_args()
smiles_file = args.sf
smilesCompoundDict = read_smiles(smiles_file)
invalid_chem_lst, invalid_smiles_lst = [], []
for key, value in smilesCompoundDict.items():
smi = value
m = Chem.MolFromSmiles(smi, sanitize=False)
if m is None:
invalid_smiles_lst.append(key)
else:
try:
Chem.SanitizeMol(m)
except:
invalid_chem_lst.append(key)
if len(invalid_chem_lst) > 0:
print("The following id(s) have invalid chemistry:", invalid_chem_lst)
elif len(invalid_smiles_lst) > 0:
print("The following id(s) have invalid smiles:", invalid_smiles_lst)
else:
print("1")