-
Notifications
You must be signed in to change notification settings - Fork 3
/
command.txt
345 lines (232 loc) · 11.6 KB
/
command.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
#A supporting file for
#Structure-based virtual screening to identify inhibitors
#against N-Terminal Acetyltransferase NatB using natural product compounds library
#author:yanhong hong
####################################################
# #
# download compounds library from ZINC #
# #
####################################################
#Because the download link for all natural-products is broken.
#(https://zinc15.docking.org/substances/subsets/natural-products.mol2?count=all)
#I tried to download them by downloading all biogenic compounds from different catalogs
#and using the natural product list to filter it.
#command#1,2,3:download all biogenic compounds from different catalogs and the natural product list.
for i in {msnp,afronp,acdiscnp,biofacquimnp,acdiscnd,acdiscnpbb,asternp,biopurify,\
hmdbtoxin,indofinenp,molportnp,targetmolnp,nubbenp,himnp,hitnp,specsnp,npactnp,tcmnp\
,uefsnp,timtecnd};do (nohup curl -o $i.mol2 https://zinc15.docking.org/catalogs/$i/protomers.mol2?count=all &);done
for i in {msnp,afronp,acdiscnp,biofacquimnp,acdiscnd,acdiscnpbb,asternp,biopurify,\
hmdbtoxin,indofinenp,molportnp,targetmolnp,nubbenp,himnp,hitnp,specsnp,npactnp,tcmnp\
,uefsnp,timtecnd};do mkdir $i|mv $i.mol2 $i;done
wget -O np.txt https://zinc15.docking.org/substances/subsets/natural-products.txt?count=all
cat np.txt|cut -f1|sort > sorted_cutted_np.txt
#command#4:use a script(split_rename_filter.sh) to split mol2 file,rename the splited files and filter them.
#The script can be found in the supplementary files.
for i in {msnp,afronp,acdiscnp,biofacquimnp,acdiscnd,acdiscnpbb,asternp,biopurify,\
hmdbtoxin,indofinenp,molportnp,targetmolnp,nubbenp,himnp,hitnp,specsnp,npactnp,tcmnp\
,uefsnp,timtecnd};do (bash split_rename_filter.sh $i &);done
#command#5:Integrate all the files
mkdir all|cp */mol2_final/* all &
####################################################
# #
# DOCKING #
# #
####################################################
#The original pdb file(natB.pdb)contains a ligand(a short peptide),a cofactor(coA),and a protein.
#use MOE to remove the ligand and the cofactor and save the receptor as receptor.pdb.
#Note that the autodock vina is not supported for parallel running,
#so we need to divide them into different groups,and run them parallelly.
#command#7,8,9:
for f in all/*; do d=part_$(printf %03d $((i/10000+1))); mkdir -p $d;mv "$f" $d;let i++;done &
mkdir dock
cd dock
#WE USE A SCRIPT FOR PREPARE LIGANDS AND START DOCKING.
#The script is provided in the supplementary materials.(docking.sh)
#The config file(conf.txt) is generated using ADT.
#command#10:
for i in {1..19};do (bash docking.sh $i &);done
####################################################
# #
# RESCORING #
# #
####################################################
#1
#USE SMINA TO RESCORE THE POSES GENERATED BY AUTODOCK VINA.
#All poses are in the allresult directory.All files are generated using vina_split.
#Scoring functions include vinardo,dkoes_scoring,dkoes_fast,ad4_scoring
#command#10:set up the environment
mkdir -p rescorelog/vinardo rescorelog/dkoes_scoring rescorelog/dkoes_fast rescorelog/ad4_scoring
#command#11:
nohup sh -c 'for i in allresult/*;do smina --scoring dkoes_scoring --score_only -r receptor.pdbqt -l $i --log rescorelog/dkoes_scoring/`basename $i .pdbqt`.log;done' > nohup_dkoesscoring &
#command#12:
nohup sh -c 'for i in allresult/*;do smina --scoring vinardo --score_only -r receptor.pdbqt -l $i --log rescorelog/vinardo/`basename $i .pdbqt`.log;done' > nohup_vinardo &
#command#13:
nohup sh -c 'for i in allresult/*;do smina --scoring dkoes_fast --score_only -r receptor.pdbqt -l $i --log rescorelog/dkoes_fast/`basename $i .pdbqt`.log;done' > nohup_dkoesfast &
#command#14:
nohup sh -c 'for i in allresult/*;do smina --scoring ad4_scoring --score_only -r receptor.pdbqt -l $i --log rescorelog/ad4_scoring/`basename $i .pdbqt`.log;done' > nohup_ad4scoring &
#2
#USE NIB (negative image based) to rescore the poses.
#We use panther to generate the negative image and use shaep to compare the similarity between the negative image and poses.
#first,add hydrogens to the receptor.
#command#15:
./reduce -BUILD natb.pdb > natb_H.pdb
#Because our ligand is a peptide,We need to change the forth column to ZIN.
#...
#generate the default input parameter file default.in using mkdef option in panther.
#command#16:
./panther.py -mkdef default.in
#generate the model.
#command#17:
./panther.py -pfil natb_H.pdb -bmp F-2 default.in model.mol2 &
#NIB requires mol2 file format for ligands,We convert all pdbqt files to mol2 files using openbabel and integrate them into a single file all.mol2.
#START RESCORING
#command#18:
./shaep model.mol2 all.mol2 --output-file shaep_rescore_model.txt -s rescored_model.sdf --noOptimization &
#3
#sort all rescoring results and get intersection between all top0.1% results.
#e.g default
#command#19:
for i in default/*;do cat $i|egrep "Affinity:" |awk -v a=`basename $i .log` '{print a " " $2}';done > defaultlist &
#command#20:
sort -k2n defaultlist |head -n 1680|cut -d" " -f1|sort > sorted_defaulttop0.1%list
#NIB
cat shaep_rescore_model-I.txt|awk 'NR>1'|awk '{print $1"\t" $2}'|sort -k2n |tail -n 1680|cut -f1|sort > sorted_defaulttop0.1%list
#take the intersection between different groups
#command#20:
comm -1 -2 sorted_ad4scoringtop0.1% sorted_defaultlist0.1% |comm -1 -2 - sorted_dkoes_fastlist0.1% |comm -1 -2 - \ sorted_dkoes_scoringlist0.1% |comm -1 -2 - sorted_vinardolist0.1% |comm -1 -2 - sorted_vinatop0.1% |comm -1 -2 - \
sorted_NIB_rescoretop0.1% > commlist &
####################################################
# #
# VISUALIZATION #
# #
####################################################
#We visualize the chemical space using matplotlib,the commands are provided in supplementary materials(chemical_space_by_pca.ipynb)
####################################################
# #
# MOLECULAR DYNAMICS #
# #
####################################################
#We use GROMACS to do molecular dynamics,which is for validating the docking results.
#The original protein structure miss some residues,so we need to reconstruct the protein structure using swiss modeling.
#the resulting file is final_receptor.pdb.
#e.g ZINC000150350042
#1
#command#21:pdb2gmx
#force-field:ATB-specific 54a7 force field.
gmx_mpi pdb2gmx -f final_receptor.pdb -o receptor.gro -water spc -ignh
#2:We use ATB to prepare the cofactor/ligand's topology file and coodinate file.
#The resulting files are 1NFP.itp,1NFP.pdb,EM94.itp,EM94.pdb.
#command#22,23:convert pdb to gro
gmx_mpi editconf -f EM94.pdb -o EM94.gro
gmx_mpi editconf -f 1NFP.pdb -o 1NFP.gro
#Integrate the coodinate information to the receptor and rename as complex.gro.
#Include the topology information to the topol.top.
#command#24,25:construct a dodecahedron box and solvate it.
gmx_mpi editconf -f complex.gro -o newbox.gro -bt dodecahedron -d 1.0
gmx_mpi solvate -cp newbox.gro -cs spc216.gro -p topol.top -o solv.gro
#3
#Add ions to neutralize the system.
#ions.mdp is provided in the supplementary files.
#command#26,27:
gmx_mpi grompp -f ions.mdp -c solv.gro -p topol.top -o ions.tpr
gmx_mpi genion -s ions.tpr -o solv_ions.gro -p topol.top -pname NA -nname CL -neutral
#4
#Energy minimization(EM)
#WHY EM? -A:to relax the system to ensure that steric clashes or inappropriate geometry.
#command#28,29:
#create the binary file
gmx_mpi grompp -f em.mdp -c solv_ions.gro -p topol.top -o em.tpr
#start EM.
nohup gmx_mpi mdrun -v -deffnm em -nb gpu -ntomp 60 > nohupem.out &
#5
#Restraint the ligand and cofactor
#command#30:create a cofactor index file.
gmx_mpi make_ndx -f ligand/coa/coa_orig.gro -o index_1NFP.ndx
>0 & ! a H*
>q
#command#31:generate the cofactor restraint file
gmx_mpi genrestr -f ligand/coa/coa_orig.gro -n index_1NFP.ndx -o posre_1NFP.itp -fc 1000 1000 1000
#command#32:create a ligand index file.
gmx_mpi make_ndx -f ligand/zinc/EM94.gro -o index_EM94.ndx
>0 & ! a H*
>q
#command#33:generate the ligand restraint file
gmx_mpi genrestr -f ligand/zinc/EM94.gro -n index_EM94.ndx -o poses_EM94.itp -fc 1000 1000 1000
#Integrate the ligand restraint information to the topol.top
#6
#Thermostats
#command#34,35,36:
#nvt.mdp is provided in supplementary file.
gmx_mpi make_ndx -f em.gro -o index.ndx
>1|13|14
>q
gmx_mpi grompp -f nvt.mdp -c em.gro -r em.gro -p topol.top -n index.ndx -o nvt.tpr
nohup gmx_mpi mdrun -deffnm nvt -v -nb gpu -ntomp 60 >nohupnvt.out &
#7
#NPT
#npt.mdp is provided in supplementary file.
#command#37:
gmx grompp -f npt.mdp -c nvt.gro -t nvt.cpt -r nvt.gro -p topol.top -n index.ndx -o npt.tpr
#command#38:
nohup gmx_mpi mdrun -deffnm npt -v -nb gpu -ntomp 60 > nohupnpt.out &
#8
#START MD.
#md.mdp is provided in supplementary files.
#command#39,40:
gmx_mpi grompp -f md.mdp -c npt.gro -t npt.cpt -p topol.top -n index.ndx -o md_0_50.tpr
nohup gmx_mpi mdrun -deffnm md_0_50 -v -nb gpu -ntomp 60 > nohupmd.out &
####################################################
# #
# MD results analysis #
# #
####################################################
#Recentering and Rewrapping Coordinates
#In this case,the system is a protein-protein-ligand-cofactor complex,it needs a index file to recentering the system.
#Add chain A to index.ndx manually.
#command#41:
gmx_mpi make_ndx -f em.gro -n index.ndx
>0 & ri 12-17 # 0 is chainA
>name 26 center
>q
#command#42:
gmx_mpi trjconv -s md_0_50.tpr -f md_0_50.xtc -o md_0_50_center.xtc -n index.ndx -center -pbc mol -ur compact
>26
>1 #1 is system.
>q
#RMSD calculation
#command#43,44
gmx_mpi make_ndx -f em.gro -n index.ndx
>21 & !a H*
>name 27 EM94_heavy
>q
gmx_mpi rms -s em.tpr -f md_0_50_center.xtc -n index.ndx -tu ns -o rmsd.xvg
>5 #backbond
>27 #EM94_heavy
>q
#Number of hydrogen bonds.
#Protein-Ligand Interaction Energy
#ie.mdp is provided in supplementary files.
#command#45
gmx_mpi grompp -f ie.mdp -c npt.gro -t npt.cpt -p topol.top -n index.ndx -o ie.tpr
gmx_mpi mdrun -deffnm ie -rerun md_0_50.xtc -nb cpu
gmx_mpi energy -f ie.edr -o interaction_energy.xvg
>Coul-SR:Protein-EM94
>LJ-SR:Protein-EM94
#The free energy landscape is plotted by matplotlib(free_energy_landscape.ipynb)
#The commands below is to perform pca analysis and generate the free_energy_landscape.txt file for plotting.
#command#46,47
gmx_mpi trjconv -f md_0_50.xtc -s md_0_50.tpr -fit rot+trans -o mdfit.xtc
gmx_mpi trjconv -s md_0_50.tpr -f mdfit.xtc -b 0 -e 0 -o md.gro
#command#48
#covariance matrix
gmx_mpi covar -s md.gro -f mdfit.xtc -o eigenvalues.xvg -v eigenvectors.trr -xpma covapic.xpm
#command#49,50,51
#projection
gmx_mpi anaeig -f mdfit.xtc -s md.gro -v eigenvectors.trr -last 1 -proj pc1.xvg
gmx_mpi anaeig -f mdfit.xtc -s md.gro -v eigenvectors.trr -frist 2 -last 2 -proj pc2.xvg
perl sham.pl -i1 pc1.xvg -i2 pc2.xvg -data 1 -data2 1 -o gsham_input.xvg
#command#51
#calculation of gibbs free energy
#xpm2txt.py is provided in supplementary files.
gmx_mpi sham -f gsham_input.xvg
python2 xpm2txt.py -f FES.xpm -o free-energy-landscape.txt