-
Notifications
You must be signed in to change notification settings - Fork 3
/
main.bib
1889 lines (1794 loc) · 92.9 KB
/
main.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{Borhani:2012mi,
author = {Borhani, David W and Shaw, David E},
journal = {J Comput Aided Mol Des},
month = {Jan},
number = {1},
pages = {15-26},
title = {The future of molecular dynamics simulations in drug
discovery},
volume = {26},
year = {2012},
abstract = {Molecular dynamics simulations can now track rapid
processes--those occurring in less than about a
millisecond--at atomic resolution for many
biologically relevant systems. These simulations
appear poised to exert a significant impact on how
new drugs are found, perhaps even transforming the
very process of drug discovery. We predict here
future results we can expect from, and enhancements
we need to make in, molecular dynamics simulations
over the coming 25 years, and in so doing set out
several Grand Challenges for the field. In the
context of the problems now facing the pharmaceutical
industry, we ask how we can best address drug
discovery needs of the next quarter century using
molecular dynamics simulations, and we suggest some
possible approaches.},
doi = {10.1007/s10822-011-9517-y},
}
@article{Dror:2012cr,
author = {Dror, Ron O and Dirks, Robert M and Grossman, J P and
Xu, Huafeng and Shaw, David E},
journal = {Annu Rev Biophys},
pages = {429-52},
title = {Biomolecular simulation: a computational microscope
for molecular biology},
volume = {41},
year = {2012},
abstract = {Molecular dynamics simulations capture the behavior
of biological macromolecules in full atomic detail,
but their computational demands, combined with the
challenge of appropriately modeling the relevant
physics, have historically restricted their length
and accuracy. Dramatic recent improvements in
achievable simulation speed and the underlying
physical models have enabled atomic-level simulations
on timescales as long as milliseconds that capture
key biochemical processes such as protein folding,
drug binding, membrane transport, and the
conformational changes critical to protein function.
Such simulation may serve as a computational
microscope, revealing biomolecular mechanisms at
spatial and temporal scales that are difficult to
observe experimentally. We describe the rapidly
evolving state of the art for atomic-level
biomolecular simulation, illustrate the types of
biological discoveries that can now be made through
simulation, and discuss challenges motivating
continued innovation in this field.},
doi = {10.1146/annurev-biophys-042910-155245},
}
@article{Orozco:2014dq,
author = {Orozco, Modesto},
journal = {Chem. Soc. Rev.},
pages = {5051-5066},
publisher = {The Royal Society of Chemistry},
title = {A theoretical view of protein dynamics},
volume = {43},
year = {2014},
abstract = {Proteins are fascinating supramolecular structures{,}
which are able to recognize ligands transforming
binding information into chemical signals. They can
transfer information across the cell{,} can catalyse
complex chemical reactions{,} and are able to
transform energy into work with much more efficiency
than any human engine. The unique abilities of
proteins are tightly coupled with their dynamic
properties{,} which are coded in a complex way in the
sequence and carefully refined by evolution. Despite
its importance{,} our experimental knowledge of
protein dynamics is still rather limited{,} and
mostly derived from theoretical calculations. I will
review here{,} in a systematic way{,} the current
state-of-the-art theoretical approaches to the study
of protein dynamics{,} emphasizing the most recent
advances{,} examples of use and the expected lines of
development in the near future.},
doi = {10.1039/C3CS60474H},
}
@article{Perilla:2015kx,
author = {Perilla, Juan R and Goh, Boon Chong and
Cassidy, C Keith and Liu, Bo and Bernardi, Rafael C and
Rudack, Till and Yu, Hang and Wu, Zhe and
Schulten, Klaus},
journal = {Current Opinion in Structural Biology},
pages = {64 - 74},
title = {Molecular dynamics simulations of large
macromolecular complexes},
volume = {31},
year = {2015},
abstract = {Connecting dynamics to structural data from diverse
experimental sources, molecular dynamics simulations
permit the exploration of biological phenomena in
unparalleled detail. Advances in simulations are
moving the atomic resolution descriptions of
biological systems into the million-to-billion atom
regime, in which numerous cell functions reside. In
this opinion, we review the progress, driven by
large-scale molecular dynamics simulations, in the
study of viruses, ribosomes, bioenergetic systems,
and other diverse applications. These examples
highlight the utility of molecular dynamics
simulations in the critical task of relating atomic
detail to the function of supramolecular complexes, a
task that cannot be achieved by smaller-scale
simulations or existing experimental approaches
alone.},
doi = {10.1016/j.sbi.2015.03.007},
issn = {0959-440X},
}
@article{Bottaro:2018aa,
author = {Bottaro, Sandro and Lindorff-Larsen, Kresten},
journal = {Science},
number = {6400},
pages = {355--360},
publisher = {American Association for the Advancement of Science},
title = {Biophysical experiments and biomolecular simulations:
A perfect match?},
volume = {361},
year = {2018},
abstract = {A fundamental challenge in biological research is
achieving an atomic-level description and mechanistic
understanding of the function of biomolecules.
Techniques for biomolecular simulations have
undergone substantial developments, and their
accuracy and scope have expanded considerably.
Progress has been made through an increasingly tight
integration of experiments and simulations, with
experiments being used to refine simulations and
simulations used to interpret experiments. Here we
review the underpinnings of this progress, including
methods for more efficient conformational sampling,
accuracy of the physical models used, and theoretical
approaches to integrate experiments and simulations.
These developments are enabling detailed studies of
complex biomolecular assemblies.},
doi = {10.1126/science.aat4010},
issn = {0036-8075},
}
@book{Tuckerman:2010cr,
address = {Oxford, UK},
author = {Tuckerman, Mark E.},
publisher = {Oxford University Press},
title = {Statistical Mechanics: Theory and Molecular
Simulation},
year = {2010},
}
@article{Mura:2014kx,
author = {Mura, Cameron and McAnany, Charles E.},
journal = {Molecular Simulation},
number = {10-11},
pages = {732-764},
title = {An introduction to biomolecular simulations and
docking},
volume = {40},
year = {2014},
abstract = {The biomolecules in and around a living cell --
proteins, nucleic acids, lipids and carbohydrates --
continuously sample myriad conformational states that
are thermally accessible at physiological
temperatures. Simultaneously, a given biomolecule
also samples (and is sampled by) a rapidly
fluctuating local environment comprising other
biopolymers, small molecules, water, ions, etc. that
diffuse to within a few nanometres, leading to
inter-molecular contacts that stitch together large
supramolecular assemblies. Indeed, all biological
systems can be viewed as dynamic networks of
molecular interactions. As a complement to
experimentation, molecular simulation offers a
uniquely powerful approach to analyse biomolecular
structure, mechanism and dynamics; this is possible
because the molecular contacts that define a
complicated biomolecular system are governed by the
same physical principles (forces and energetics) that
characterise individual small molecules, and these
simpler systems are relatively well-understood. With
modern algorithms and computing capabilities,
simulations are now an indispensable tool for
examining biomolecular assemblies in atomic detail,
from the conformational motion in an individual
protein to the diffusional dynamics and
inter-molecular collisions in the early stages of
formation of cellular-scale assemblies such as the
ribosome. This text introduces the physicochemical
foundations of molecular simulations and docking,
largely from the perspective of biomolecular
interactions.},
doi = {10.1080/08927022.2014.935372},
}
@article{Cheatham:2015,
author = {Cheatham, T. and Roe, D.},
journal = {Computing in Science Engineering},
number = {2},
pages = {30--39},
title = {The impact of heterogeneous computing on workflows
for biomolecular simulation and analysis},
volume = {17},
year = {2015},
abstract = {The field of biomolecular simulation has matured to
the level that detailed, accurate, and functionally
relevant information that complements experimental
data about the structure, dynamics, and interactions
of biomolecules can now be routinely discovered. This
has been enabled by access to large scale and
heterogeneous high performance computing resources,
including special purpose hardware. The improved
performance of modern simulation methods coupled with
hardware advances is shifting the rate-limiting steps
of common biomolecular simulations of small- to
moderately-sized systems from the generation of data
(for example via production molecular dynamics
simulations that used to take weeks or even months)
to the pre- and post-processing phases of the
workflow, namely simulation set-up and data
processing, management, and analysis. Access to
heterogeneous computational resources enables a
broader exploration of biomolecular structure and
dynamics by facilitating distinct aspects of typical
biomolecular simulation workflows.},
doi = {10.1109/MCSE.2015.7},
issn = {1521-9615},
}
@article{nmoldyn,
author = {Gerald R. Kneller and Volker Keiner and
Meinhard Kneller and Matthias Schiller},
journal = {Computer Physics Communications},
number = {1},
pages = {191 - 214},
title = {nMOLDYN: A program package for a neutron scattering
oriented analysis of Molecular Dynamics simulations},
volume = {91},
year = {1995},
doi = {10.1016/0010-4655(95)00048-K},
issn = {0010-4655},
}
@article{nmoldyn-2012,
author = {Hinsen, Konrad and Pellegrini, Eric and
Stachura, S{\l}awomir and Kneller, Gerald R.},
journal = {Journal of Computational Chemistry},
number = {25},
pages = {2043--2048},
publisher = {Wiley Subscription Services, Inc., A Wiley Company},
title = {nMoldyn 3: Using task farming for a parallel
spectroscopy-oriented analysis of molecular dynamics
simulations},
volume = {33},
year = {2012},
doi = {10.1002/jcc.23035},
issn = {1096-987X},
}
@article{Hum96,
author = {Humphrey, W. and Dalke, A. and Schulten, K.},
journal = {J.~Mol.~Graph.},
pages = {33--38},
title = {{VMD} -- {V}isual {M}olecular {D}ynamics},
volume = {14},
year = {1996},
}
@article{Hinsen:2000kx,
author = {Hinsen, K.},
journal = {Journal of Computational Chemistry},
number = {2},
pages = {79--85},
publisher = {Wiley Online Library},
title = {The molecular modeling toolkit: a new approach to
molecular simulations},
volume = {21},
year = {2000},
}
@article{Grant:2006ud,
author = {Grant, Barry J and Rodrigues, Ana P C and
ElSawy, Karim M and McCammon, J Andrew and
Caves, Leo S D},
journal = {Bioinformatics},
month = {Nov},
number = {21},
pages = {2695-6},
title = {{Bio3d}: an {R} package for the comparative analysis of
protein structures},
volume = {22},
year = {2006},
abstract = {UNLABELLED: An automated procedure for the analysis
of homologous protein structures has been developed.
The method facilitates the characterization of
internal conformational differences and
inter-conformer relationships and provides a
framework for the analysis of protein structural
evolution. The method is implemented in bio3d, an R
package for the exploratory analysis of structure and
sequence data. AVAILABILITY: The bio3d package is
distributed with full source code as a
platform-independent R package under a GPL2 license
from: http://mccammon.ucsd.edu/~bgrant/bio3d/},
doi = {10.1093/bioinformatics/btl461},
}
@inproceedings{himach-2008,
author = {Tiankai Tu and C. A. Rendleman and D. W. Borhani and
R. O. Dror and J. Gullingsrud and M. O. Jensen and
J. L. Klepeis and P. Maragakis and P. Miller and
K. A. Stafford and D. E. Shaw},
booktitle = {2008 SC - International Conference for High
Performance Computing, Networking, Storage and
Analysis},
organization = {IEEE},
address = {Austin, TX, USA},
month = {Nov},
pages = {1-12},
title = {A scalable parallel framework for analyzing terascale
molecular dynamics simulation trajectories},
year = {2008},
doi = {10.1109/SC.2008.5214715},
issn = {2167-4329},
}
@inproceedings{Romo:2009zr,
address = {Minneapolis, Minnesota, USA},
author = {Romo, Tod D. and Grossfield, Alan},
booktitle = {31st Annual International Conference of the IEEE
EMBS},
organization = {IEEE},
pages = {2332--2335},
title = {{LOOS}: An Extensible Platform for the Structural
Analysis of Simulations},
year = {2009},
abstract = {We have developed LOOS (Lightweight Object- Oriented
Structure-analysis library) as an object-oriented li-
brary designed to facilitate the rapid development of
tools for the structural analysis of simulations.
LOOS supports the native file formats of most common
simulation packages including AMBER, CHARMM, CNS,
Gromacs, NAMD, Tinker, and X-PLOR. Encapsulation and
polymorphism are used to simultaneously provide a
stable interface to the programmer and make LOOS
easily extensible. A rich atom selection language
based on the C expression syntax is included as part
of the library. LOOS enables students and casual
programmer- scientists to rapidly write their own
analytical tools in a compact and expressive manner
resembling scripting. LOOS is written in C++ and
makes extensive use of the Standard Template Library
and Boost, and is freely available under the GNU
General Public License (version 3)
(http://loos.sourceforge.net). LOOS has been tested
on Linux and MacOS X, but is written to be portable
and should work on most Unix-based platforms.},
}
@article{Romo:2014bh,
author = {Romo, Tod D. and Leioatts, Nicholas and
Grossfield, Alan},
journal = {Journal of Computational Chemistry},
number = {32},
pages = {2305--2318},
title = {Lightweight object oriented structure analysis: Tools
for building tools to analyze molecular dynamics
simulations},
volume = {35},
year = {2014},
abstract = {LOOS (Lightweight Object Oriented Structure-analysis)
is a C++ library designed to facilitate making novel
tools for analyzing molecular dynamics simulations by
abstracting out the repetitive tasks, allowing
developers to focus on the scientifically relevant
part of the problem. LOOS supports input using the
native file formats of most common biomolecular
simulation packages, including CHARMM, NAMD, Amber,
Tinker, and Gromacs. A dynamic atom selection
language based on the C expression syntax is included
and is easily accessible to the tool-writer. In
addition, LOOS is bundled with over 140 prebuilt
tools, including suites of tools for analyzing
simulation convergence, three-dimensional histograms,
and elastic network models. Through modern C++
design, LOOS is both simple to develop with
(requiring knowledge of only four core classes and a
few utility functions) and is easily extensible. A
python interface to the core classes is also
provided, further facilitating tool development.
{\copyright} 2014 Wiley Periodicals, Inc.},
doi = {10.1002/jcc.23753},
issn = {1096-987X},
}
@article{Michaud-Agrawal:2011fu,
author = {Michaud-Agrawal, Naveen and Denning, Elizabeth Jane and
Woolf, Thomas B. and Beckstein, Oliver},
journal = {J Comp Chem},
pages = {2319--2327},
title = {{MDAnalysis}: A Toolkit for the Analysis of Molecular
Dynamics Simulations},
volume = {32},
year = {2011},
abstract = {MDAnalysis is an object-oriented library for
structural and temporal analysis of molecular
dynamics (MD) simulation trajectories and individual
protein structures. It is written in the Python
language with some performance-critical code in C. It
uses the powerful NumPy package to expose trajectory
data as fast and efficient NumPy arrays. It has been
tested on systems of millions of particles. Many
common file formats of simulation packages including
CHARMM, Gromacs, and NAMD and the Protein Data Bank
format can be read and written. Atoms can be selected
with a syntax similar to CHARMM's powerful selection
commands. MDAnalysis enables both novice and
experienced programmers to rapidly write their own
analytical tools and access data stored in
trajectories in an easily accessible manner that
facilitates interactive explorative analysis.
MDAnalysis has been tested on and works for most
Unix-based platforms such as Linux and Mac OS X. It
is freely available under the GNU Public License from
http://mdanalysis.googlecode.com.},
doi = {10.1002/jcc.21787},
}
@inproceedings{Gowers:2016aa,
address = {Austin, TX},
author = {Gowers, Richard J. and Linke, Max and
Barnoud, Jonathan and Reddy, Tyler J. E. and
Melo, Manuel N. and Seyler, Sean L. and
Dotson, David L and Doma{\'n}ski, Jan and
Buchoux, S{\'e}bastien and Kenney, Ian M. and
Beckstein, Oliver},
booktitle = {{P}roceedings of the 15th {P}ython in {S}cience
{C}onference},
editor = {Benthall, Sebastian and Rostrup, Scott},
organization = {SciPy},
pages = {102 -- 109},
title = {{MDAnalysis}: A {Python} package for the Rapid
Analysis of Molecular Dynamics Simulations},
year = 2016,
abstract = {MDAnalysis (http://mdanalysis.org) is a library for
structural and temporal analysis of molecular
dynamics (MD) simulation trajectories and individual
protein structures. MD simulations of biological
molecules have become an important tool to elucidate
the relationship between molecular structure and
physiological function. Simulations are performed
with highly optimized software packages on HPC
resources but most codes generate output trajectories
in their own formats so that the development of new
trajectory analysis algorithms is confined to
specific user communities and widespread adoption and
further development is delayed. MDAnalysis addresses
this problem by abstracting access to the raw
simulation data and presenting a uniform
object-oriented Python interface to the user. It thus
enables users to rapidly write code that is portable
and immediately usable in virtually all biomolecular
simulation communities. The user interface and
modular design work equally well in complex scripted
work flows, as foundations for other packages, and
for interactive and rapid prototyping work in IPython
/ Jupyter notebooks, especially together with
molecular visualization provided by nglview and time
series analysis with pandas. MDAnalysis is written in
Python and Cython and uses NumPy arrays for easy
interoperability with the wider scientific Python
ecosystem. It is widely used and forms the foundation
for more specialized biomolecular simulation tools.
MDAnalysis is available under the GNU General Public
License v2.},
doi = {10.25080/Majora-629e541a-00e},
}
@article{cpptraj-2013,
author = {Daniel R. Roe and Thomas E. Cheatham, III},
journal = {Journal of Chemical Theory and Computation},
note = {PMID: 26583988},
number = {7},
pages = {3084-3095},
title = {{PTRAJ} and {CPPTRAJ}: Software for Processing and
Analysis of Molecular Dynamics Trajectory Data},
volume = {9},
year = {2013},
doi = {10.1021/ct400341p},
}
@article{McGibbon:2015aa,
author = {McGibbon, Robert T. and Beauchamp, Kyle A. and
Harrigan, Matthew P. and Klein, Christoph and
Swails, Jason M. and Hern{\'a}ndez, Carlos X. and
Schwantes, Christian R. and Wang, Lee-Ping and
Lane, Thomas J. and Pande, Vijay S.},
journal = {Biophysical Journal},
number = {8},
pages = {1528 - 1532},
title = {{MDTraj}: A Modern Open Library for the Analysis of
Molecular Dynamics Trajectories},
volume = {109},
year = {2015},
abstract = {Abstract As molecular dynamics (MD) simulations
continue to evolve into powerful computational tools
for studying complex biomolecular systems, the
necessity of flexible and easy-to-use software tools
for the analysis of these simulations is growing. We
have developed MDTraj, a modern, lightweight, and
fast software package for analyzing \{MD\}
simulations. \{MDTraj\} reads and writes trajectory
data in a wide variety of commonly used formats. It
provides a large number of trajectory analysis
capabilities including minimal
root-mean-square-deviation calculations, secondary
structure assignment, and the extraction of common
order parameters. The package has a strong focus on
interoperability with the wider scientific Python
ecosystem, bridging the gap between \{MD\} data and
the rapidly growing collection of industry-standard
statistical analysis and visualization tools in
Python. \{MDTraj\} is a powerful and user-friendly
software package that simplifies the analysis of
\{MD\} data and connects these datasets with the
modern interactive data science software ecosystem in
Python.},
doi = {10.1016/j.bpj.2015.08.015},
issn = {0006-3495},
}
@article{pteros2015,
author = {Yesylevskyy, Semen O.},
journal = {Journal of Computational Chemistry},
number = {19},
pages = {1480--1488},
title = {Pteros 2.0: Evolution of the fast parallel molecular
analysis library for {C++} and python},
volume = {36},
year = {2015},
doi = {10.1002/jcc.23943},
issn = {1096-987X},
}
@article{Doerr:2016aa,
author = {Doerr, S. and Harvey, M. J. and No{\'e}, Frank and
De Fabritiis, G.},
journal = {Journal of Chemical Theory and Computation},
month = {Apr},
number = {4},
pages = {1845--1852},
publisher = {American Chemical Society (ACS)},
title = {{HTMD}: High-Throughput Molecular Dynamics for
Molecular Discovery},
volume = {12},
year = {2016},
doi = {10.1021/acs.jctc.6b00049},
issn = {1549-9626},
}
@inproceedings{Khoshlessan:2017ab,
address = {Austin, TX},
author = {Khoshlessan, Mahzad and Paraskevakos, Ioannis and
Jha, Shantenu and Beckstein, Oliver},
booktitle = {{P}roceedings of the 16th {P}ython in {S}cience
{C}onference},
editor = {{K}aty {H}uff and {D}avid {L}ippa and
{D}illon {N}iederhut and {M} {P}acer},
organization = {SciPy},
pages = {64--72},
title = {Parallel Analysis in {MDAnalysis} using the {Dask}
Parallel Computing Library},
year = {2017},
abstract = {The analysis of biomolecular computer simulations has
become a challenge because the amount of output data
is now routinely in the terabyte range. We evaluated
if this challenge can be met by a parallel map-reduce
approach with the Dask parallel computing library for
task-graph based com- puting coupled with our
MDAnalysis Python library for the analysis of
molecular dynamics (MD) simulations. We performed a
representative performance evalu- ation, taking into
account the highly heterogeneous computing
environment that researchers typically work in
together with the diversity of existing file formats
for MD trajectory data. We found that the underlying
storage system (solid state drives, parallel file
systems, or simple spinning platter disks) can be a
deciding performance factor that leads to data
ingestion becoming the primary bottleneck in the
analysis work flow. However, the choice of the data
file format can mitigate the effect of the storage
system; in particular, the commonly used Gromacs XTC
trajectory format, which is highly compressed, can
exhibit strong scaling close to ideal due to trading
a decrease in global storage access load against an
increase in local per-core CPU-intensive
decompression. Scaling was tested on a single node
and multiple nodes on national and local
supercomputing resources as well as typical
workstations. Although very good strong scaling could
be achieved for single nodes, good scaling across
multiple nodes was hindered by the persistent
occurrence of "stragglers", tasks that take much
longer than all other tasks, and whose ultimate cause
could not be completely ascertained. In summary, we
show that, due to the focus on high interoperability
in the scientific Python eco system, it is
straightforward to implement map-reduce with Dask in
MDAnalysis and provide an in-depth analysis of the
considerations to obtain good parallel performance on
HPC resources.},
doi = {10.25080/shinma-7f4c6e7-00a},
}
@inproceedings{ICCP-2018,
Address = {New York, NY, USA},
Author = {Ioannis Paraskevakos and Andre Luckow and Mahzad Khoshlessan and Goerge Chantzialexiou and Thomas E. Cheatham and Oliver Beckstein and Geoffrey Fox and Shantenu Jha},
Booktitle = {ICPP 2018: 47th International Conference on Parallel Processing, August 13--16, 2018, Eugene, OR, USA},
Month = {August 13--16},
Organization = {Association for Computing Machinery},
Pages = {Article No. 49},
Publisher = {ACM},
Title = {Task-parallel Analysis of Molecular Dynamics Trajectories},
Year = 2018,
}
@article{Liu:2010kx,
author = {Liu, Pu and Agrafiotis, Dimitris K and
Theobald, Douglas L},
journal = {J Comput Chem},
month = {May},
number = {7},
pages = {1561-3},
title = {Fast determination of the optimal rotational matrix
for macromolecular superpositions},
volume = {31},
year = {2010},
abstract = {Finding the rotational matrix that minimizes the sum
of squared deviations between two vectors is an
important problem in bioinformatics and
crystallography. Traditional algorithms involve the
inversion or decomposition of a 3 x 3 or 4 x 4
matrix, which can be computationally expensive and
numerically unstable in certain cases. Here, we
present a simple and robust algorithm to rapidly
determine the optimal rotation using a Newton-Raphson
quaternion-based method and an adjoint matrix. Our
method is at least an order of magnitude more
efficient than conventional inversion/decomposition
methods, and it should be particularly useful for
high-throughput analyses of molecular conformations.},
doi = {10.1002/jcc.21439},
}
@book{Lea96,
author = {Leach, A. R.},
publisher = {Longman},
title = {Molecular Modelling. Principles and Applications},
year = {1996},
}
@inproceedings{Rocklin:2015aa,
author = {Rocklin, Matthew},
booktitle = {Proceedings of the 14th Python in Science Conference (SciPy 2015)},
organization = {SciPy},
address = {Austin, TX},
editor = {Kathryn Huff and James Bergstra},
pages = {130--136},
title = {Dask: Parallel Computation with Blocked algorithms
and Task Scheduling},
year = {2015},
abstract = {Dask enables parallel and out-of-core computation. We
couple blocked algorithms with dynamic and memory
aware task scheduling to achieve a parallel and
out-of-core NumPy clone. We show how this extends the
effective scale of modern hardware to larger datasets
and discuss how these ideas can be more broadly
applied to other parallel collections.},
doi = {10.25080/Majora-7b98e3ed-013},
}
@article{Dalcin:2011aa,
author = {Dalc{\'\i}n, Lisandro D. and Paz, Rodrigo R. and
Kler, Pablo A. and Cosimo, Alejandro},
journal = {Advances in Water Resources},
number = {9},
pages = {1124 - 1139},
title = {Parallel distributed computing using Python},
volume = {34},
year = {2011},
abstract = {This work presents two software components aimed to
relieve the costs of accessing high-performance
parallel computing resources within a Python
programming environment: MPI for Python and PETSc for
Python. MPI for Python is a general-purpose Python
package that provides bindings for the Message
Passing Interface (MPI) standard using any back-end
MPI implementation. Its facilities allow parallel
Python programs to easily exploit multiple processors
using the message passing paradigm. PETSc for Python
provides access to the Portable, Extensible Toolkit
for Scientific Computation (PETSc) libraries. Its
facilities allow sequential and parallel Python
applications to exploit state of the art algorithms
and data structures readily available in PETSc for
the solution of large-scale problems in science and
engineering. MPI for Python and PETSc for Python are
fully integrated to PETSc-FEM, an MPI and PETSc based
parallel, multiphysics, finite elements code
developed at CIMEC laboratory. This software
infrastructure supports research activities related
to simulation of fluid flows with applications
ranging from the design of microfluidic devices for
biochemical analysis to modeling of large-scale
stream/aquifer interactions.},
doi = {10.1016/j.advwatres.2011.04.013},
issn = {0309-1708},
}
@article{Dalcin:2005aa,
author = {Dalc{\'\i}n, Lisandro D. and Paz, Rodrigo and
Storti, Mario},
journal = {Journal of Parallel and Distributed Computing},
number = {9},
pages = {1108 - 1115},
title = {{MPI} for Python},
volume = {65},
year = {2005},
abstract = {MPI for Python provides bindings of the Message
Passing Interface (MPI) standard for the Python
programming language and allows any Python program to
exploit multiple processors. This package is
constructed on top of the MPI-1 specification and
defines an object-oriented interface which closely
follows MPI-2 C++bindings. It supports point-to-point
(sends, receives) and collective (broadcasts,
scatters, gathers) communications of general Python
objects. Efficiency has been tested in a Beowulf
class cluster and satisfying results were obtained.
MPI for Python is open source and available for
download on the web
(http://www.cimec.org.ar/python).},
doi = {10.1016/j.jpdc.2005.03.010},
issn = {0743-7315},
}
@article{Garraghan2016,
author = {Garraghan, P. and Ouyang, X. and Yang, R. and McKee, D. and Xu, J.},
journal = {IEEE Transactions on Services Computing},
volume = 12,
pages = {91-104},
title = {Straggler Root-Cause and Impact Analysis for
Massive-scale Virtualized Cloud Datacenters},
year = 2016,
doi = {10.1109/TSC.2016.2611578},
}
@article{xsede,
author = {J. Towns and T. Cockerill and M. Dahan and I. Foster and
K. Gaither and A. Grimshaw and V. Hazlewood and
S. Lathrop and D. Lifka and G. D. Peterson and
R. Roskies and J. R. Scott and N. Wilkins-Diehr},
journal = {Computing in Science \& Engineering},
month = {Sept.-Oct.},
number = {5},
pages = {62-74},
title = {{XSEDE}: Accelerating Scientific Discovery},
volume = {16},
year = {2014},
doi = {10.1109/MCSE.2014.80},
issn = {1521-9615},
}
@mastersthesis{GAiN,
author = {Jeffrey Alan Daily},
school = {School of Electrical Engineering and Computer
Science, Washington State University},
address = {Pullman, WA},
title = {{GAiN}: Distributed Array Computation with {Python}},
year = {2009},
}
@article{GA,
author = {Jarek Nieplocha and Bruce Palmer and Vinod Tipparaju and
Manojkumar Krishnan and Harold Trease and
Edoardo Apr{\`a}},
journal = {The International Journal of High Performance
Computing Applications},
number = {2},
pages = {203-231},
title = {Advances, Applications and Performance of the {Global
Arrays} Shared Memory Programming Toolkit},
volume = {20},
year = {2006},
}
@article{Dean2008,
Abstract = {MapReduce is a programming model and an associated implementation for processing and generating large datasets that is amenable to a broad variety of real-world tasks. Users specify the computation in terms of a map and a reduce function, and the underlying runtime system automatically parallelizes the computation across large-scale clusters of machines, handles machine failures, and schedules inter-machine communication to make efficient use of the network and disks. Programmers find the system easy to use: more than ten thousand distinct MapReduce programs have been implemented internally at Google over the past four years, and an average of one hundred thousand MapReduce jobs are executed on Google's clusters every day, processing a total of more than twenty petabytes of data per day.},
Author = {Dean, Jeffrey and Ghemawat, Sanjay},
Doi = {10.1145/1327452.1327492},
Journal = {Communications of the ACM},
Keywords = {map reduce},
Number = 1,
Pages = {107--113},
Publisher = {ACM},
Title = {{MapReduce}: simplified data processing on large clusters},
Volume = 51,
Year = 2008,
}
@inproceedings{Kyong2017,
address = {New York, USA},
author = {Kyong, Joohyun and Jeon, Jinwoo and Lim, Sung-Soo},
booktitle = {Proceedings of the 6th International Conference on
Software and Computer Applications - ICSCA '17},
pages = {176--180},
publisher = {ACM Press},
title = {Improving scalability of apache spark-based scale-up
server through docker container-based partitioning},
year = {2017},
doi = {10.1145/3056662.3056686},
isbn = {9781450348577},
}
@phdthesis{Ousterhout2017,
author = {Ousterhout, Kay},
title = {Architecting for Performance Clarity in Data Analytics Frameworks},
number = {UCB/EECS-2017-158},
school = {EECS Department, University of California, Berkeley},
address = {Berkeley, CA},
year = 2017,
month = {Oct},
url = {https://www2.eecs.berkeley.edu/Pubs/TechRpts/2017/EECS-2017-158.html},
eprint = {https://www2.eecs.berkeley.edu/Pubs/TechRpts/2017/EECS-2017-158.pdf},
}
@inproceedings{Gittens2016,
author = {Gittens, Alex and Devarakonda, Aditya and Racah, Evan and
Ringenburg, Michael and Gerhardt, Lisa and
Kottalam, Jey and Liu, Jialin and Maschhoff, Kristyn and
Canon, Shane and Chhugani, Jatin and Sharma, Pramod and
Yang, Jiyan and Demmel, James and Harrell, Jim and
Krishnamurthy, Venkat and Mahoney, Michael W. and
Prabhat},
booktitle = {IEEE International Conference on Big Data (Big Data)},
month = {dec},
pages = {204--213},
title = {Matrix factorizations at scale: A comparison of
scientific data analytics in spark and {C+MPI} using
three case studies},
year = {2016},
doi = {10.1109/BigData.2016.7840606},
isbn = {978-1-4673-9005-7},
}
@Misc{Kirpichov2016,
author = {Eugene Kirpichov and Malo Denielou},
title = {No shard left behind: dynamic work rebalancing in {Google Cloud Dataflow}},
howpublished = {Google Cloud Blog},
month = {18 May},
year = 2016,
note = {accessed Aug 24, 2019},
url = {https://cloud.google.com/blog/products/gcp/no-shard-left-behind-dynamic-work-rebalancing-in-google-cloud-dataflow}
}
@phdthesis{Tien-2017,
author = {Tien-Dat Phan},
school = {{\'E}cole normale sup{\'e}rieure de Renne},
title = {Energy-efficient Straggler Mitigation for Big Data
Applications on the Clouds},
year = {2017},
}
@article{Chen2014,
author = {Qi Chen and Cheng Liu and Zhen Xiao},
journal = {IEEE Transactions on Computers},
number = 4,
pages = {954-967},
publisher = {IEEE},
doi = {10.1109/TC.2013.15},
title = {Improving MapReduce Performance Using Smart
Speculative Execution Strategy},
volume = 63,
year = 2014,
}
@inproceedings{Xie:2012aa,
address = {Los Alamitos, CA, USA},
author = {Xie, Bing and Chase, Jeffrey and Dillow, David and
Drokin, Oleg and Klasky, Scott and Oral, Sarp and
Podhorszki, Norbert},
booktitle = {Proceedings of the International Conference on High
Performance Computing, Networking, Storage and
Analysis},
pages = {8:1--8:11},
publisher = {IEEE Computer Society Press},
series = {SC '12},
title = {Characterizing Output Bottlenecks in a Supercomputer},
year = {2012},
abstract = {Supercomputer I/O loads are often dominated by
writes. HPC (High Performance Computing) file systems
are designed to absorb these bursty outputs at high
bandwidth through massive parallelism. However, the
delivered write bandwidth often falls well below the
peak. This paper characterizes the data absorption
behavior of a center-wide shared Lustre parallel file
system on the Jaguar supercomputer. We use a
statistical methodology to address the challenges of
accurately measuring a shared machine under
production load and to obtain the distribution of
bandwidth across samples of compute nodes, storage
targets, and time intervals. We observe and quantify
limitations from competing traffic, contention on
storage servers and I/O routers, concurrency
limitations in the client compute node operating
systems, and the impact of variance (stragglers) on
coupled output such as striping. We then examine the
implications of our results for application
performance and the design of I/O middleware systems
on shared supercomputers.},
isbn = {978-1-4673-0804-5},
}
@inproceedings{Yang2016,
author = {Yang, Hongbin and Liu, Xianyang and Chen, Shenbo and
Lei, Zhou and Du, Hongguang and Zhu, Caixin},
booktitle = {2016 International Conference on Audio, Language and
Image Processing (ICALIP)},
month = {jul},
pages = {28--33},
publisher = {IEEE},
title = {{Improving Spark performance with MPTE in
heterogeneous environments}},
year = {2016},
doi = {10.1109/ICALIP.2016.7846627},
isbn = {978-1-5090-0654-0},
}
@techreport{Rosen2012,
author = {Josh Rosen and Bill Zhao},
title = {Fine-Grained Micro-Tasks for MapReduce Skew-Handling},
year = {2012},
institution = {EECS, UC Berkeley},
url = {https://pdfs.semanticscholar.org/3617/916adb83f33f8df7d0b3bfc23d0de80da9b7.pdf},
}
@inproceedings{Kwon2012,
doi = {10.1145/2213836.2213840},
author = {YongChul Kwon and Magdalena Balazinska and Bill Howe and
Jerome Rolia},
booktitle = {SIGMOD'12},
month = {May 20 - 24},
pages = {Pages 25-36},
publisher = {SIGMOD '12 Proceedings of the 2012 ACM SIGMOD
International Conference on Management of Data},
title = {SkewTune: Mitigating Skew in MapReduce Applications,
Pages 25-36},
year = {2012},
}
@inproceedings{Ousterhout2015,
author = {Kay Ousterhout and Ryan Rasti and Sylvia Ratnasamy and
Scott Shenker and Byung-Gon Chun},
booktitle = {NSDI'15 Proceedings of the 12th USENIX Conference on
Networked Systems Design and Implementation},
number = {ISBN: 978-1-931971-218},
pages = {Pages 293-307},
title = {Making Sense of Performance in Data Analytics
Frameworks},
year = {2015},
}
@article{AWE-WQ2014,
author = {Badi Abdul-Wahid and Haoyun Feng and Dinesh Rajan and
Ronan Costaouec and Eric Darve and Douglas Thain and
Jesu{\'s} A. Izaguirre},
journal = {Journal of Chemical Information and Modeling},
pages = {3033--3043},
title = {AWE-WQ, Fast-Forwarding Molecular Dynamics Using the
Accelerated Weighted Ensemble},