-
Notifications
You must be signed in to change notification settings - Fork 7
/
jlib.py
155 lines (121 loc) · 5.18 KB
/
jlib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import numpy as np
def hello(name='no name'):
"""
name is welcome by saying hello
input: name - the welcome name
"""
print('Hello {name}!'.format(**locals()))
print('2015-3-2, 2:45pm')
def check_mol_similarity():
from rdkit import Chem
from rdkit import DataStructs
from rdkit.Chem.Fingerprints import FingerprintMols
ms = [Chem.MolFromSmiles('CCOC'), Chem.MolFromSmiles(
'CCO'), Chem.MolFromSmiles('COC')]
fps = [FingerprintMols.FingerprintMol(x) for x in ms]
print fps[0]
print DataStructs.FingerprintSimilarity(fps[0], fps[1])
print DataStructs.FingerprintSimilarity(fps[0], fps[2])
print DataStructs.FingerprintSimilarity(fps[1], fps[2])
print DataStructs.FingerprintSimilarity(fps[0], fps[0])
def mols_similarity(ms_smiles=['CCOC', 'CCO', 'COC']):
from rdkit import Chem
from rdkit import DataStructs
from rdkit.Chem.Fingerprints import FingerprintMols
ms = [Chem.MolFromSmiles(m_sm) for m_sm in ms_smiles]
# [Chem.MolFromSmiles('CCOC'), Chem.MolFromSmiles('CCO'), Chem.MolFromSmiles('COC')]
fps = [FingerprintMols.FingerprintMol(x) for x in ms]
print fps[0]
print DataStructs.FingerprintSimilarity(fps[0], fps[1])
print DataStructs.FingerprintSimilarity(fps[0], fps[2])
print DataStructs.FingerprintSimilarity(fps[1], fps[2])
print DataStructs.FingerprintSimilarity(fps[0], fps[0])
def _mols_similarity_base_r0(ms_smiles_mid, ms_smiles_base):
"""
Input: dictionary type required such as {nick name: smiles code, ...}
"""
from rdkit import Chem
from rdkit import DataStructs
from rdkit.Chem.Fingerprints import FingerprintMols
# processing for mid
print("Target: " + ms_smiles_mid.keys())
ms_mid = [Chem.MolFromSmiles(m_sm) for m_sm in ms_smiles_mid.values()]
# [Chem.MolFromSmiles('CCOC'), Chem.MolFromSmiles('CCO'), Chem.MolFromSmiles('COC')]
fps_mid = [FingerprintMols.FingerprintMol(x) for x in ms_mid]
# processing for base
print("Base: " + ms_smiles_base.keys())
ms_base = [Chem.MolFromSmiles(m_sm) for m_sm in ms_smiles_base.values()]
# [Chem.MolFromSmiles('CCOC'), Chem.MolFromSmiles('CCO'), Chem.MolFromSmiles('COC')]
fps_base = [FingerprintMols.FingerprintMol(x) for x in ms_base]
for (bx, f_b) in enumerate(fps_base):
for (dx, f_d) in enumerate(fps_mid):
print("Base:{0}, Target:{1}".format(
ms_smiles_base.keys()[bx], ms_smiles_mid.keys()[dx]))
print(DataStructs.FingerprintSimilarity(f_b, f_d))
"""
core part is generated while addition is changed for both
"""
def mols_similarity_base_core(ms_smiles_mid, ms_smiles_base):
"""
Input: dictionary type required such as {nick name: smiles code, ...}
"""
from rdkit import Chem
# from rdkit import DataStructs
from rdkit.Chem.Fingerprints import FingerprintMols
# Processing for mid
print("Target: ", ms_smiles_mid.keys())
ms_mid = [Chem.MolFromSmiles(m_sm) for m_sm in ms_smiles_mid.values()]
# [Chem.MolFromSmiles('CCOC'), Chem.MolFromSmiles('CCO'), Chem.MolFromSmiles('COC')]
fps_mid = [FingerprintMols.FingerprintMol(x) for x in ms_mid]
# processing for base
print("Base: ", ms_smiles_base.keys())
ms_base = [Chem.MolFromSmiles(m_sm) for m_sm in ms_smiles_base.values()]
# [Chem.MolFromSmiles('CCOC'), Chem.MolFromSmiles('CCO'), Chem.MolFromSmiles('COC')]
fps_base = [FingerprintMols.FingerprintMol(x) for x in ms_base]
return fps_base, fps_mid
def mols_similarity_base(ms_smiles_mid, ms_smiles_base):
"""
Input: dictionary type required such as {nick name: smiles code, ...}
"""
from rdkit import DataStructs
[fps_base, fps_mid] = mols_similarity_base_core(
ms_smiles_mid, ms_smiles_base)
for (bx, f_b) in enumerate(fps_base):
for (dx, f_d) in enumerate(fps_mid):
print("Base:{0}, Target:{1}".format(
ms_smiles_base.keys()[bx], ms_smiles_mid.keys()[dx]))
print(DataStructs.FingerprintSimilarity(f_b, f_d))
def mols_similarity_base_return(ms_smiles_mid, ms_smiles_base, property_of_base=None):
"""
The results will be returned.
A * w = b, A and b will be returned.
return A, b, w
"""
from rdkit import DataStructs
[fps_base, fps_mid] = mols_similarity_base_core(
ms_smiles_mid, ms_smiles_base)
Nb, Nm = len(fps_base), len(fps_mid)
A = np.zeros((Nm, Nb))
b = np.zeros(Nb)
for (bx, f_b) in enumerate(fps_base):
for (mx, f_m) in enumerate(fps_mid):
print("Base:{0}, Target:{1}".format(
ms_smiles_base.keys()[bx], ms_smiles_mid.keys()[mx]))
A[mx, bx] = DataStructs.FingerprintSimilarity(f_b, f_m)
print(A[mx, bx])
if property_of_base:
b[bx] = property_of_base[bx]
print(b[bx])
if property_of_base:
print "b is obtained."
return A, b
else:
return A
def mols_similarity_base_get_w(ms_smiles_mid, ms_smiles_base, property_of_base):
"""
property_of_base, which is b, must be entered
"""
[A, b] = mols_similarity_base_return(
ms_smiles_mid, ms_smiles_base, property_of_base)
w = np.dot(np.linalg.pinv(A), b)
return w