main.bib



@article{Borhani:2012mi,
  author =        {Borhani, David W and Shaw, David E},
  journal =       {J Comput Aided Mol Des},
  month =         {Jan},
  number =        {1},
  pages =         {15-26},
  title =         {The future of molecular dynamics simulations in drug
                   discovery},
  volume =        {26},
  year =          {2012},
  abstract =      {Molecular dynamics simulations can now track rapid
                   processes--those occurring in less than about a
                   millisecond--at atomic resolution for many
                   biologically relevant systems. These simulations
                   appear poised to exert a significant impact on how
                   new drugs are found, perhaps even transforming the
                   very process of drug discovery. We predict here
                   future results we can expect from, and enhancements
                   we need to make in, molecular dynamics simulations
                   over the coming 25 years, and in so doing set out
                   several Grand Challenges for the field. In the
                   context of the problems now facing the pharmaceutical
                   industry, we ask how we can best address drug
                   discovery needs of the next quarter century using
                   molecular dynamics simulations, and we suggest some
                   possible approaches.},
  doi =           {10.1007/s10822-011-9517-y},
}

@article{Dror:2012cr,
  author =        {Dror, Ron O and Dirks, Robert M and Grossman, J P and
                   Xu, Huafeng and Shaw, David E},
  journal =       {Annu Rev Biophys},
  pages =         {429-52},
  title =         {Biomolecular simulation: a computational microscope
                   for molecular biology},
  volume =        {41},
  year =          {2012},
  abstract =      {Molecular dynamics simulations capture the behavior
                   of biological macromolecules in full atomic detail,
                   but their computational demands, combined with the
                   challenge of appropriately modeling the relevant
                   physics, have historically restricted their length
                   and accuracy. Dramatic recent improvements in
                   achievable simulation speed and the underlying
                   physical models have enabled atomic-level simulations
                   on timescales as long as milliseconds that capture
                   key biochemical processes such as protein folding,
                   drug binding, membrane transport, and the
                   conformational changes critical to protein function.
                   Such simulation may serve as a computational
                   microscope, revealing biomolecular mechanisms at
                   spatial and temporal scales that are difficult to
                   observe experimentally. We describe the rapidly
                   evolving state of the art for atomic-level
                   biomolecular simulation, illustrate the types of
                   biological discoveries that can now be made through
                   simulation, and discuss challenges motivating
                   continued innovation in this field.},
  doi =           {10.1146/annurev-biophys-042910-155245},
}

@article{Orozco:2014dq,
  author =        {Orozco, Modesto},
  journal =       {Chem. Soc. Rev.},
  pages =         {5051-5066},
  publisher =     {The Royal Society of Chemistry},
  title =         {A theoretical view of protein dynamics},
  volume =        {43},
  year =          {2014},
  abstract =      {Proteins are fascinating supramolecular structures{,}
                   which are able to recognize ligands transforming
                   binding information into chemical signals. They can
                   transfer information across the cell{,} can catalyse
                   complex chemical reactions{,} and are able to
                   transform energy into work with much more efficiency
                   than any human engine. The unique abilities of
                   proteins are tightly coupled with their dynamic
                   properties{,} which are coded in a complex way in the
                   sequence and carefully refined by evolution. Despite
                   its importance{,} our experimental knowledge of
                   protein dynamics is still rather limited{,} and
                   mostly derived from theoretical calculations. I will
                   review here{,} in a systematic way{,} the current
                   state-of-the-art theoretical approaches to the study
                   of protein dynamics{,} emphasizing the most recent
                   advances{,} examples of use and the expected lines of
                   development in the near future.},
  doi =           {10.1039/C3CS60474H},
}

@article{Perilla:2015kx,
  author =        {Perilla, Juan R and Goh, Boon Chong and
                   Cassidy, C Keith and Liu, Bo and Bernardi, Rafael C and
                   Rudack, Till and Yu, Hang and Wu, Zhe and
                   Schulten, Klaus},
  journal =       {Current Opinion in Structural Biology},
  pages =         {64 - 74},
  title =         {Molecular dynamics simulations of large
                   macromolecular complexes},
  volume =        {31},
  year =          {2015},
  abstract =      {Connecting dynamics to structural data from diverse
                   experimental sources, molecular dynamics simulations
                   permit the exploration of biological phenomena in
                   unparalleled detail. Advances in simulations are
                   moving the atomic resolution descriptions of
                   biological systems into the million-to-billion atom
                   regime, in which numerous cell functions reside. In
                   this opinion, we review the progress, driven by
                   large-scale molecular dynamics simulations, in the
                   study of viruses, ribosomes, bioenergetic systems,
                   and other diverse applications. These examples
                   highlight the utility of molecular dynamics
                   simulations in the critical task of relating atomic
                   detail to the function of supramolecular complexes, a
                   task that cannot be achieved by smaller-scale
                   simulations or existing experimental approaches
                   alone.},
  doi =           {10.1016/j.sbi.2015.03.007},
  issn =          {0959-440X},
}

@article{Bottaro:2018aa,
  author =        {Bottaro, Sandro and Lindorff-Larsen, Kresten},
  journal =       {Science},
  number =        {6400},
  pages =         {355--360},
  publisher =     {American Association for the Advancement of Science},
  title =         {Biophysical experiments and biomolecular simulations:
                   A perfect match?},
  volume =        {361},
  year =          {2018},
  abstract =      {A fundamental challenge in biological research is
                   achieving an atomic-level description and mechanistic
                   understanding of the function of biomolecules.
                   Techniques for biomolecular simulations have
                   undergone substantial developments, and their
                   accuracy and scope have expanded considerably.
                   Progress has been made through an increasingly tight
                   integration of experiments and simulations, with
                   experiments being used to refine simulations and
                   simulations used to interpret experiments. Here we
                   review the underpinnings of this progress, including
                   methods for more efficient conformational sampling,
                   accuracy of the physical models used, and theoretical
                   approaches to integrate experiments and simulations.
                   These developments are enabling detailed studies of
                   complex biomolecular assemblies.},
  doi =           {10.1126/science.aat4010},
  issn =          {0036-8075},
}

@book{Tuckerman:2010cr,
  address =       {Oxford, UK},
  author =        {Tuckerman, Mark E.},
  publisher =     {Oxford University Press},
  title =         {Statistical Mechanics: Theory and Molecular
                   Simulation},
  year =          {2010},
}

@article{Mura:2014kx,
  author =        {Mura, Cameron and McAnany, Charles E.},
  journal =       {Molecular Simulation},
  number =        {10-11},
  pages =         {732-764},
  title =         {An introduction to biomolecular simulations and
                   docking},
  volume =        {40},
  year =          {2014},
  abstract =      {The biomolecules in and around a living cell --
                   proteins, nucleic acids, lipids and carbohydrates --
                   continuously sample myriad conformational states that
                   are thermally accessible at physiological
                   temperatures. Simultaneously, a given biomolecule
                   also samples (and is sampled by) a rapidly
                   fluctuating local environment comprising other
                   biopolymers, small molecules, water, ions, etc. that
                   diffuse to within a few nanometres, leading to
                   inter-molecular contacts that stitch together large
                   supramolecular assemblies. Indeed, all biological
                   systems can be viewed as dynamic networks of
                   molecular interactions. As a complement to
                   experimentation, molecular simulation offers a
                   uniquely powerful approach to analyse biomolecular
                   structure, mechanism and dynamics; this is possible
                   because the molecular contacts that define a
                   complicated biomolecular system are governed by the
                   same physical principles (forces and energetics) that
                   characterise individual small molecules, and these
                   simpler systems are relatively well-understood. With
                   modern algorithms and computing capabilities,
                   simulations are now an indispensable tool for
                   examining biomolecular assemblies in atomic detail,
                   from the conformational motion in an individual
                   protein to the diffusional dynamics and
                   inter-molecular collisions in the early stages of
                   formation of cellular-scale assemblies such as the
                   ribosome. This text introduces the physicochemical
                   foundations of molecular simulations and docking,
                   largely from the perspective of biomolecular
                   interactions.},
  doi =           {10.1080/08927022.2014.935372},
}

@article{Cheatham:2015,
  author =        {Cheatham, T. and Roe, D.},
  journal =       {Computing in Science Engineering},
  number =        {2},
  pages =         {30--39},
  title =         {The impact of heterogeneous computing on workflows
                   for biomolecular simulation and analysis},
  volume =        {17},
  year =          {2015},
  abstract =      {The field of biomolecular simulation has matured to
                   the level that detailed, accurate, and functionally
                   relevant information that complements experimental
                   data about the structure, dynamics, and interactions
                   of biomolecules can now be routinely discovered. This
                   has been enabled by access to large scale and
                   heterogeneous high performance computing resources,
                   including special purpose hardware. The improved
                   performance of modern simulation methods coupled with
                   hardware advances is shifting the rate-limiting steps
                   of common biomolecular simulations of small- to
                   moderately-sized systems from the generation of data
                   (for example via production molecular dynamics
                   simulations that used to take weeks or even months)
                   to the pre- and post-processing phases of the
                   workflow, namely simulation set-up and data
                   processing, management, and analysis. Access to
                   heterogeneous computational resources enables a
                   broader exploration of biomolecular structure and
                   dynamics by facilitating distinct aspects of typical
                   biomolecular simulation workflows.},
  doi =           {10.1109/MCSE.2015.7},
  issn =          {1521-9615},
}

@article{nmoldyn,
  author =        {Gerald R. Kneller and Volker Keiner and
                   Meinhard Kneller and Matthias Schiller},
  journal =       {Computer Physics Communications},
  number =        {1},
  pages =         {191 - 214},
  title =         {nMOLDYN: A program package for a neutron scattering
                   oriented analysis of Molecular Dynamics simulations},
  volume =        {91},
  year =          {1995},
  doi =           {10.1016/0010-4655(95)00048-K},
  issn =          {0010-4655},
}

@article{nmoldyn-2012,
  author =        {Hinsen, Konrad and Pellegrini, Eric and
                   Stachura, S{\l}awomir and Kneller, Gerald R.},
  journal =       {Journal of Computational Chemistry},
  number =        {25},
  pages =         {2043--2048},
  publisher =     {Wiley Subscription Services, Inc., A Wiley Company},
  title =         {nMoldyn 3: Using task farming for a parallel
                   spectroscopy-oriented analysis of molecular dynamics
                   simulations},
  volume =        {33},
  year =          {2012},
  doi =           {10.1002/jcc.23035},
  issn =          {1096-987X},
}

@article{Hum96,
  author =        {Humphrey, W. and Dalke, A. and Schulten, K.},
  journal =       {J.~Mol.~Graph.},
  pages =         {33--38},
  title =         {{VMD} -- {V}isual {M}olecular {D}ynamics},
  volume =        {14},
  year =          {1996},
}

@article{Hinsen:2000kx,
  author =        {Hinsen, K.},
  journal =       {Journal of Computational Chemistry},
  number =        {2},
  pages =         {79--85},
  publisher =     {Wiley Online Library},
  title =         {The molecular modeling toolkit: a new approach to
                   molecular simulations},
  volume =        {21},
  year =          {2000},
}

@article{Grant:2006ud,
  author =        {Grant, Barry J and Rodrigues, Ana P C and
                   ElSawy, Karim M and McCammon, J Andrew and
                   Caves, Leo S D},
  journal =       {Bioinformatics},
  month =         {Nov},
  number =        {21},
  pages =         {2695-6},
  title =         {{Bio3d}: an {R} package for the comparative analysis of
                   protein structures},
  volume =        {22},
  year =          {2006},
  abstract =      {UNLABELLED: An automated procedure for the analysis
                   of homologous protein structures has been developed.
                   The method facilitates the characterization of
                   internal conformational differences and
                   inter-conformer relationships and provides a
                   framework for the analysis of protein structural
                   evolution. The method is implemented in bio3d, an R
                   package for the exploratory analysis of structure and
                   sequence data. AVAILABILITY: The bio3d package is
                   distributed with full source code as a
                   platform-independent R package under a GPL2 license
                   from: http://mccammon.ucsd.edu/~bgrant/bio3d/},
  doi =           {10.1093/bioinformatics/btl461},
}

@inproceedings{himach-2008,
  author =        {Tiankai Tu and C. A. Rendleman and D. W. Borhani and
                   R. O. Dror and J. Gullingsrud and M. O. Jensen and
                   J. L. Klepeis and P. Maragakis and P. Miller and
                   K. A. Stafford and D. E. Shaw},
  booktitle =      {2008 SC - International Conference for High
                   Performance Computing, Networking, Storage and
                   Analysis},
  organization =  {IEEE},
  address =       {Austin, TX, USA},
  month =         {Nov},
  pages =         {1-12},
  title =         {A scalable parallel framework for analyzing terascale
                   molecular dynamics simulation trajectories},
  year =          {2008},
  doi =           {10.1109/SC.2008.5214715},
  issn =          {2167-4329},
}

@inproceedings{Romo:2009zr,
  address =       {Minneapolis, Minnesota, USA},
  author =        {Romo, Tod D. and Grossfield, Alan},
  booktitle =     {31st Annual International Conference of the IEEE
                   EMBS},
  organization =  {IEEE},
  pages =         {2332--2335},
  title =         {{LOOS}: An Extensible Platform for the Structural
                   Analysis of Simulations},
  year =          {2009},
  abstract =      {We have developed LOOS (Lightweight Object- Oriented
                   Structure-analysis library) as an object-oriented li-
                   brary designed to facilitate the rapid development of
                   tools for the structural analysis of simulations.
                   LOOS supports the native file formats of most common
                   simulation packages including AMBER, CHARMM, CNS,
                   Gromacs, NAMD, Tinker, and X-PLOR. Encapsulation and
                   polymorphism are used to simultaneously provide a
                   stable interface to the programmer and make LOOS
                   easily extensible. A rich atom selection language
                   based on the C expression syntax is included as part
                   of the library. LOOS enables students and casual
                   programmer- scientists to rapidly write their own
                   analytical tools in a compact and expressive manner
                   resembling scripting. LOOS is written in C++ and
                   makes extensive use of the Standard Template Library
                   and Boost, and is freely available under the GNU
                   General Public License (version 3)
                   (http://loos.sourceforge.net). LOOS has been tested
                   on Linux and MacOS X, but is written to be portable
                   and should work on most Unix-based platforms.},
}

@article{Romo:2014bh,
  author =        {Romo, Tod D. and Leioatts, Nicholas and
                   Grossfield, Alan},
  journal =       {Journal of Computational Chemistry},
  number =        {32},
  pages =         {2305--2318},
  title =         {Lightweight object oriented structure analysis: Tools
                   for building tools to analyze molecular dynamics
                   simulations},
  volume =        {35},
  year =          {2014},
  abstract =      {LOOS (Lightweight Object Oriented Structure-analysis)
                   is a C++ library designed to facilitate making novel
                   tools for analyzing molecular dynamics simulations by
                   abstracting out the repetitive tasks, allowing
                   developers to focus on the scientifically relevant
                   part of the problem. LOOS supports input using the
                   native file formats of most common biomolecular
                   simulation packages, including CHARMM, NAMD, Amber,
                   Tinker, and Gromacs. A dynamic atom selection
                   language based on the C expression syntax is included
                   and is easily accessible to the tool-writer. In
                   addition, LOOS is bundled with over 140 prebuilt
                   tools, including suites of tools for analyzing
                   simulation convergence, three-dimensional histograms,
                   and elastic network models. Through modern C++
                   design, LOOS is both simple to develop with
                   (requiring knowledge of only four core classes and a
                   few utility functions) and is easily extensible. A
                   python interface to the core classes is also
                   provided, further facilitating tool development.
                   {\copyright} 2014 Wiley Periodicals, Inc.},
  doi =           {10.1002/jcc.23753},
  issn =          {1096-987X},
}

@article{Michaud-Agrawal:2011fu,
  author =        {Michaud-Agrawal, Naveen and Denning, Elizabeth Jane and
                   Woolf, Thomas B. and Beckstein, Oliver},
  journal =       {J Comp Chem},
  pages =         {2319--2327},
  title =         {{MDAnalysis}: A Toolkit for the Analysis of Molecular
                   Dynamics Simulations},
  volume =        {32},
  year =          {2011},
  abstract =      {MDAnalysis is an object-oriented library for
                   structural and temporal analysis of molecular
                   dynamics (MD) simulation trajectories and individual
                   protein structures. It is written in the Python
                   language with some performance-critical code in C. It
                   uses the powerful NumPy package to expose trajectory
                   data as fast and efficient NumPy arrays. It has been
                   tested on systems of millions of particles. Many
                   common file formats of simulation packages including
                   CHARMM, Gromacs, and NAMD and the Protein Data Bank
                   format can be read and written. Atoms can be selected
                   with a syntax similar to CHARMM's powerful selection
                   commands. MDAnalysis enables both novice and
                   experienced programmers to rapidly write their own
                   analytical tools and access data stored in
                   trajectories in an easily accessible manner that
                   facilitates interactive explorative analysis.
                   MDAnalysis has been tested on and works for most
                   Unix-based platforms such as Linux and Mac OS X. It
                   is freely available under the GNU Public License from
                   http://mdanalysis.googlecode.com.},
  doi =           {10.1002/jcc.21787},
}

@inproceedings{Gowers:2016aa,
  address =       {Austin, TX},
  author =        {Gowers, Richard J. and Linke, Max and
                   Barnoud, Jonathan and Reddy, Tyler J. E. and
                   Melo, Manuel N. and Seyler, Sean L. and
                   Dotson, David L and Doma{\'n}ski, Jan and
                   Buchoux, S{\'e}bastien and Kenney, Ian M. and
                   Beckstein, Oliver},
  booktitle =     {{P}roceedings of the 15th {P}ython in {S}cience
                   {C}onference},
  editor =        {Benthall, Sebastian and Rostrup, Scott},
  organization =  {SciPy},
  pages =         {102 -- 109},
  title =         {{MDAnalysis}: A {Python} package for the Rapid
                   Analysis of Molecular Dynamics Simulations},
  year =          2016,
  abstract =      {MDAnalysis (http://mdanalysis.org) is a library for
                   structural and temporal analysis of molecular
                   dynamics (MD) simulation trajectories and individual
                   protein structures. MD simulations of biological
                   molecules have become an important tool to elucidate
                   the relationship between molecular structure and
                   physiological function. Simulations are performed
                   with highly optimized software packages on HPC
                   resources but most codes generate output trajectories
                   in their own formats so that the development of new
                   trajectory analysis algorithms is confined to
                   specific user communities and widespread adoption and
                   further development is delayed. MDAnalysis addresses
                   this problem by abstracting access to the raw
                   simulation data and presenting a uniform
                   object-oriented Python interface to the user. It thus
                   enables users to rapidly write code that is portable
                   and immediately usable in virtually all biomolecular
                   simulation communities. The user interface and
                   modular design work equally well in complex scripted
                   work flows, as foundations for other packages, and
                   for interactive and rapid prototyping work in IPython
                   / Jupyter notebooks, especially together with
                   molecular visualization provided by nglview and time
                   series analysis with pandas. MDAnalysis is written in
                   Python and Cython and uses NumPy arrays for easy
                   interoperability with the wider scientific Python
                   ecosystem. It is widely used and forms the foundation
                   for more specialized biomolecular simulation tools.
                   MDAnalysis is available under the GNU General Public
                   License v2.},
   doi =           {10.25080/Majora-629e541a-00e},
}

@article{cpptraj-2013,
  author =        {Daniel R. Roe and Thomas E. Cheatham, III},
  journal =       {Journal of Chemical Theory and Computation},
  note =          {PMID: 26583988},
  number =        {7},
  pages =         {3084-3095},
  title =         {{PTRAJ} and {CPPTRAJ}: Software for Processing and
                   Analysis of Molecular Dynamics Trajectory Data},
  volume =        {9},
  year =          {2013},
  doi =           {10.1021/ct400341p},
}

@article{McGibbon:2015aa,
  author =        {McGibbon, Robert T. and Beauchamp, Kyle A. and
                   Harrigan, Matthew P. and Klein, Christoph and
                   Swails, Jason M. and Hern{\'a}ndez, Carlos X. and
                   Schwantes, Christian R. and Wang, Lee-Ping and
                   Lane, Thomas J. and Pande, Vijay S.},
  journal =       {Biophysical Journal},
  number =        {8},
  pages =         {1528 - 1532},
  title =         {{MDTraj}: A Modern Open Library for the Analysis of
                   Molecular Dynamics Trajectories},
  volume =        {109},
  year =          {2015},
  abstract =      {Abstract As molecular dynamics (MD) simulations
                   continue to evolve into powerful computational tools
                   for studying complex biomolecular systems, the
                   necessity of flexible and easy-to-use software tools
                   for the analysis of these simulations is growing. We
                   have developed MDTraj, a modern, lightweight, and
                   fast software package for analyzing \{MD\}
                   simulations. \{MDTraj\} reads and writes trajectory
                   data in a wide variety of commonly used formats. It
                   provides a large number of trajectory analysis
                   capabilities including minimal
                   root-mean-square-deviation calculations, secondary
                   structure assignment, and the extraction of common
                   order parameters. The package has a strong focus on
                   interoperability with the wider scientific Python
                   ecosystem, bridging the gap between \{MD\} data and
                   the rapidly growing collection of industry-standard
                   statistical analysis and visualization tools in
                   Python. \{MDTraj\} is a powerful and user-friendly
                   software package that simplifies the analysis of
                   \{MD\} data and connects these datasets with the
                   modern interactive data science software ecosystem in
                   Python.},
  doi =           {10.1016/j.bpj.2015.08.015},
  issn =          {0006-3495},
}

@article{pteros2015,
  author =        {Yesylevskyy, Semen O.},
  journal =       {Journal of Computational Chemistry},
  number =        {19},
  pages =         {1480--1488},
  title =         {Pteros 2.0: Evolution of the fast parallel molecular
                   analysis library for {C++} and python},
  volume =        {36},
  year =          {2015},
  doi =           {10.1002/jcc.23943},
  issn =          {1096-987X},
}

@article{Doerr:2016aa,
  author =        {Doerr, S. and Harvey, M. J. and No{\'e}, Frank and
                   De Fabritiis, G.},
  journal =       {Journal of Chemical Theory and Computation},
  month =         {Apr},
  number =        {4},
  pages =         {1845--1852},
  publisher =     {American Chemical Society (ACS)},
  title =         {{HTMD}: High-Throughput Molecular Dynamics for
                   Molecular Discovery},
  volume =        {12},
  year =          {2016},
  doi =           {10.1021/acs.jctc.6b00049},
  issn =          {1549-9626},
}

@inproceedings{Khoshlessan:2017ab,
  address =       {Austin, TX},
  author =        {Khoshlessan, Mahzad and Paraskevakos, Ioannis and
                   Jha, Shantenu and Beckstein, Oliver},
  booktitle =     {{P}roceedings of the 16th {P}ython in {S}cience
                   {C}onference},
  editor =        {{K}aty {H}uff and {D}avid {L}ippa and
                   {D}illon {N}iederhut and {M} {P}acer},
  organization =  {SciPy},
  pages =         {64--72},
  title =         {Parallel Analysis in {MDAnalysis} using the {Dask}
                   Parallel Computing Library},
  year =          {2017},
  abstract =      {The analysis of biomolecular computer simulations has
                   become a challenge because the amount of output data
                   is now routinely in the terabyte range. We evaluated
                   if this challenge can be met by a parallel map-reduce
                   approach with the Dask parallel computing library for
                   task-graph based com- puting coupled with our
                   MDAnalysis Python library for the analysis of
                   molecular dynamics (MD) simulations. We performed a
                   representative performance evalu- ation, taking into
                   account the highly heterogeneous computing
                   environment that researchers typically work in
                   together with the diversity of existing file formats
                   for MD trajectory data. We found that the underlying
                   storage system (solid state drives, parallel file
                   systems, or simple spinning platter disks) can be a
                   deciding performance factor that leads to data
                   ingestion becoming the primary bottleneck in the
                   analysis work flow. However, the choice of the data
                   file format can mitigate the effect of the storage
                   system; in particular, the commonly used Gromacs XTC
                   trajectory format, which is highly compressed, can
                   exhibit strong scaling close to ideal due to trading
                   a decrease in global storage access load against an
                   increase in local per-core CPU-intensive
                   decompression. Scaling was tested on a single node
                   and multiple nodes on national and local
                   supercomputing resources as well as typical
                   workstations. Although very good strong scaling could
                   be achieved for single nodes, good scaling across
                   multiple nodes was hindered by the persistent
                   occurrence of "stragglers", tasks that take much
                   longer than all other tasks, and whose ultimate cause
                   could not be completely ascertained. In summary, we
                   show that, due to the focus on high interoperability
                   in the scientific Python eco system, it is
                   straightforward to implement map-reduce with Dask in
                   MDAnalysis and provide an in-depth analysis of the
                   considerations to obtain good parallel performance on
                   HPC resources.},
  doi =           {10.25080/shinma-7f4c6e7-00a},
}


@inproceedings{ICCP-2018,
 Address = {New York, NY, USA},
 Author = {Ioannis Paraskevakos and Andre Luckow and Mahzad Khoshlessan and Goerge Chantzialexiou and Thomas E. Cheatham and Oliver Beckstein and Geoffrey Fox and Shantenu Jha},
 Booktitle = {ICPP 2018: 47th International Conference on Parallel Processing, August 13--16, 2018, Eugene, OR, USA},
 Month = {August 13--16},
 Organization = {Association for Computing Machinery},
 Pages = {Article No. 49},
 Publisher = {ACM},
 Title = {Task-parallel Analysis of Molecular Dynamics Trajectories},
 Year = 2018,
}


@article{Liu:2010kx,
  author =        {Liu, Pu and Agrafiotis, Dimitris K and
                   Theobald, Douglas L},
  journal =       {J Comput Chem},
  month =         {May},
  number =        {7},
  pages =         {1561-3},
  title =         {Fast determination of the optimal rotational matrix
                   for macromolecular superpositions},
  volume =        {31},
  year =          {2010},
  abstract =      {Finding the rotational matrix that minimizes the sum
                   of squared deviations between two vectors is an
                   important problem in bioinformatics and
                   crystallography. Traditional algorithms involve the
                   inversion or decomposition of a 3 x 3 or 4 x 4
                   matrix, which can be computationally expensive and
                   numerically unstable in certain cases. Here, we
                   present a simple and robust algorithm to rapidly
                   determine the optimal rotation using a Newton-Raphson
                   quaternion-based method and an adjoint matrix. Our
                   method is at least an order of magnitude more
                   efficient than conventional inversion/decomposition
                   methods, and it should be particularly useful for
                   high-throughput analyses of molecular conformations.},
  doi =           {10.1002/jcc.21439},
}

@book{Lea96,
  author =        {Leach, A. R.},
  publisher =     {Longman},
  title =         {Molecular Modelling. Principles and Applications},
  year =          {1996},
}


@inproceedings{Rocklin:2015aa,
  author =        {Rocklin, Matthew},
  booktitle =     {Proceedings of the 14th Python in Science Conference (SciPy 2015)},
  organization =  {SciPy},
  address =       {Austin, TX},
  editor =        {Kathryn Huff and James Bergstra},  
  pages =         {130--136},
  title =         {Dask: Parallel Computation with Blocked algorithms
                   and Task Scheduling},
  year =          {2015},
  abstract =      {Dask enables parallel and out-of-core computation. We
                   couple blocked algorithms with dynamic and memory
                   aware task scheduling to achieve a parallel and
                   out-of-core NumPy clone. We show how this extends the
                   effective scale of modern hardware to larger datasets
                   and discuss how these ideas can be more broadly
                   applied to other parallel collections.},
  doi =            {10.25080/Majora-7b98e3ed-013},		   
}

@article{Dalcin:2011aa,
  author =        {Dalc{\'\i}n, Lisandro D. and Paz, Rodrigo R. and
                   Kler, Pablo A. and Cosimo, Alejandro},
  journal =       {Advances in Water Resources},
  number =        {9},
  pages =         {1124 - 1139},
  title =         {Parallel distributed computing using Python},
  volume =        {34},
  year =          {2011},
  abstract =      {This work presents two software components aimed to
                   relieve the costs of accessing high-performance
                   parallel computing resources within a Python
                   programming environment: MPI for Python and PETSc for
                   Python. MPI for Python is a general-purpose Python
                   package that provides bindings for the Message
                   Passing Interface (MPI) standard using any back-end
                   MPI implementation. Its facilities allow parallel
                   Python programs to easily exploit multiple processors
                   using the message passing paradigm. PETSc for Python
                   provides access to the Portable, Extensible Toolkit
                   for Scientific Computation (PETSc) libraries. Its
                   facilities allow sequential and parallel Python
                   applications to exploit state of the art algorithms
                   and data structures readily available in PETSc for
                   the solution of large-scale problems in science and
                   engineering. MPI for Python and PETSc for Python are
                   fully integrated to PETSc-FEM, an MPI and PETSc based
                   parallel, multiphysics, finite elements code
                   developed at CIMEC laboratory. This software
                   infrastructure supports research activities related
                   to simulation of fluid flows with applications
                   ranging from the design of microfluidic devices for
                   biochemical analysis to modeling of large-scale
                   stream/aquifer interactions.},
  doi =           {10.1016/j.advwatres.2011.04.013},
  issn =          {0309-1708},
}

@article{Dalcin:2005aa,
  author =        {Dalc{\'\i}n, Lisandro D. and Paz, Rodrigo and
                   Storti, Mario},
  journal =       {Journal of Parallel and Distributed Computing},
  number =        {9},
  pages =         {1108 - 1115},
  title =         {{MPI} for Python},
  volume =        {65},
  year =          {2005},
  abstract =      {MPI for Python provides bindings of the Message
                   Passing Interface (MPI) standard for the Python
                   programming language and allows any Python program to
                   exploit multiple processors. This package is
                   constructed on top of the MPI-1 specification and
                   defines an object-oriented interface which closely
                   follows MPI-2 C++bindings. It supports point-to-point
                   (sends, receives) and collective (broadcasts,
                   scatters, gathers) communications of general Python
                   objects. Efficiency has been tested in a Beowulf
                   class cluster and satisfying results were obtained.
                   MPI for Python is open source and available for
                   download on the web
                   (http://www.cimec.org.ar/python).},
  doi =           {10.1016/j.jpdc.2005.03.010},
  issn =          {0743-7315},
}

@article{Garraghan2016,
  author =        {Garraghan, P. and Ouyang, X. and Yang, R. and McKee, D. and Xu, J.},
  journal =       {IEEE Transactions on Services Computing},
  volume =        12,
  pages =         {91-104},
  title =         {Straggler Root-Cause and Impact Analysis for
                   Massive-scale Virtualized Cloud Datacenters},
  year =          2016,
  doi =           {10.1109/TSC.2016.2611578},   
}

@article{xsede,
  author =        {J. Towns and T. Cockerill and M. Dahan and I. Foster and
                   K. Gaither and A. Grimshaw and V. Hazlewood and
                   S. Lathrop and D. Lifka and G. D. Peterson and
                   R. Roskies and J. R. Scott and N. Wilkins-Diehr},
  journal =       {Computing in Science \& Engineering},
  month =         {Sept.-Oct.},
  number =        {5},
  pages =         {62-74},
  title =         {{XSEDE}: Accelerating Scientific Discovery},
  volume =        {16},
  year =          {2014},
  doi =           {10.1109/MCSE.2014.80},
  issn =          {1521-9615},
}

@mastersthesis{GAiN,
  author =        {Jeffrey Alan Daily},
  school =        {School of Electrical Engineering and Computer
                   Science, Washington State University},
  address =       {Pullman, WA},		   
  title =         {{GAiN}: Distributed Array Computation with {Python}},
  year =          {2009},
}

@article{GA,
  author =        {Jarek Nieplocha and Bruce Palmer and Vinod Tipparaju and
                   Manojkumar Krishnan and Harold Trease and
                   Edoardo Apr{\`a}},
  journal =       {The International Journal of High Performance
                   Computing Applications},
  number =        {2},
  pages =         {203-231},
  title =         {Advances, Applications and Performance of the {Global
                   Arrays} Shared Memory Programming Toolkit},
  volume =        {20},
  year =          {2006},
}


@article{Dean2008,
	Abstract = {MapReduce is a programming model and an associated implementation for processing and generating large datasets that is amenable to a broad variety of real-world tasks. Users specify the computation in terms of a map and a reduce function, and the underlying runtime system automatically parallelizes the computation across large-scale clusters of machines, handles machine failures, and schedules inter-machine communication to make efficient use of the network and disks. Programmers find the system easy to use: more than ten thousand distinct MapReduce programs have been implemented internally at Google over the past four years, and an average of one hundred thousand MapReduce jobs are executed on Google's clusters every day, processing a total of more than twenty petabytes of data per day.},
	Author = {Dean, Jeffrey and Ghemawat, Sanjay},
	Doi = {10.1145/1327452.1327492},
	Journal = {Communications of the ACM},
	Keywords = {map reduce},
	Number = 1,
	Pages = {107--113},
	Publisher = {ACM},
	Title = {{MapReduce}: simplified data processing on large clusters},
	Volume = 51,
	Year = 2008,
}

@inproceedings{Kyong2017,
  address =       {New York, USA},
  author =        {Kyong, Joohyun and Jeon, Jinwoo and Lim, Sung-Soo},
  booktitle =     {Proceedings of the 6th International Conference on
                   Software and Computer Applications - ICSCA '17},
  pages =         {176--180},
  publisher =     {ACM Press},
  title =         {Improving scalability of apache spark-based scale-up
                   server through docker container-based partitioning},
  year =          {2017},
  doi =           {10.1145/3056662.3056686},
  isbn =          {9781450348577},
}

@phdthesis{Ousterhout2017,
  author =        {Ousterhout, Kay},
  title =         {Architecting for Performance Clarity in Data Analytics Frameworks},
  number =        {UCB/EECS-2017-158},
  school =        {EECS Department, University of California, Berkeley},
  address =       {Berkeley, CA},
  year =          2017,
  month =         {Oct},  
  url =           {https://www2.eecs.berkeley.edu/Pubs/TechRpts/2017/EECS-2017-158.html},
  eprint =        {https://www2.eecs.berkeley.edu/Pubs/TechRpts/2017/EECS-2017-158.pdf},
}

@inproceedings{Gittens2016,
  author =        {Gittens, Alex and Devarakonda, Aditya and Racah, Evan and
                   Ringenburg, Michael and Gerhardt, Lisa and
                   Kottalam, Jey and Liu, Jialin and Maschhoff, Kristyn and
                   Canon, Shane and Chhugani, Jatin and Sharma, Pramod and
                   Yang, Jiyan and Demmel, James and Harrell, Jim and
                   Krishnamurthy, Venkat and Mahoney, Michael W. and
                   Prabhat},
  booktitle =     {IEEE International Conference on Big Data (Big Data)},
  month =         {dec},
  pages =         {204--213},
  title =         {Matrix factorizations at scale: A comparison of
                   scientific data analytics in spark and {C+MPI} using
                   three case studies},
  year =          {2016},
  doi =           {10.1109/BigData.2016.7840606},
  isbn =          {978-1-4673-9005-7},
}


@Misc{Kirpichov2016,
  author = 	 {Eugene Kirpichov and Malo Denielou},
  title = 	 {No shard left behind: dynamic work rebalancing in {Google Cloud Dataflow}},
  howpublished = {Google Cloud Blog},
  month = 	 {18 May},
  year = 	 2016,
  note = 	 {accessed Aug 24, 2019},
  url =          {https://cloud.google.com/blog/products/gcp/no-shard-left-behind-dynamic-work-rebalancing-in-google-cloud-dataflow}
  }

@phdthesis{Tien-2017,
  author =        {Tien-Dat Phan},
  school =        {{\'E}cole normale sup{\'e}rieure de Renne},
  title =         {Energy-efficient Straggler Mitigation for Big Data
                   Applications on the Clouds},
  year =          {2017},
}

@article{Chen2014,
  author =        {Qi Chen and Cheng Liu and Zhen Xiao},
  journal =     {IEEE Transactions on Computers},
  number =        4,
  pages =         {954-967},
  publisher =     {IEEE},
  doi =           {10.1109/TC.2013.15},
  title =         {Improving MapReduce Performance Using Smart
                   Speculative Execution Strategy},
  volume =        63,
  year =          2014,
}

@inproceedings{Xie:2012aa,
  address =       {Los Alamitos, CA, USA},
  author =        {Xie, Bing and Chase, Jeffrey and Dillow, David and
                   Drokin, Oleg and Klasky, Scott and Oral, Sarp and
                   Podhorszki, Norbert},
  booktitle =     {Proceedings of the International Conference on High
                   Performance Computing, Networking, Storage and
                   Analysis},
  pages =         {8:1--8:11},
  publisher =     {IEEE Computer Society Press},
  series =        {SC '12},
  title =         {Characterizing Output Bottlenecks in a Supercomputer},
  year =          {2012},
  abstract =      {Supercomputer I/O loads are often dominated by
                   writes. HPC (High Performance Computing) file systems
                   are designed to absorb these bursty outputs at high
                   bandwidth through massive parallelism. However, the
                   delivered write bandwidth often falls well below the
                   peak. This paper characterizes the data absorption
                   behavior of a center-wide shared Lustre parallel file
                   system on the Jaguar supercomputer. We use a
                   statistical methodology to address the challenges of
                   accurately measuring a shared machine under
                   production load and to obtain the distribution of
                   bandwidth across samples of compute nodes, storage
                   targets, and time intervals. We observe and quantify
                   limitations from competing traffic, contention on
                   storage servers and I/O routers, concurrency
                   limitations in the client compute node operating
                   systems, and the impact of variance (stragglers) on
                   coupled output such as striping. We then examine the
                   implications of our results for application
                   performance and the design of I/O middleware systems
                   on shared supercomputers.},
  isbn =          {978-1-4673-0804-5},
}

@inproceedings{Yang2016,
  author =        {Yang, Hongbin and Liu, Xianyang and Chen, Shenbo and
                   Lei, Zhou and Du, Hongguang and Zhu, Caixin},
  booktitle =     {2016 International Conference on Audio, Language and
                   Image Processing (ICALIP)},
  month =         {jul},
  pages =         {28--33},
  publisher =     {IEEE},
  title =         {{Improving Spark performance with MPTE in
                   heterogeneous environments}},
  year =          {2016},
  doi =           {10.1109/ICALIP.2016.7846627},
  isbn =          {978-1-5090-0654-0},
}

@techreport{Rosen2012,
  author =        {Josh Rosen and Bill Zhao},
  title =         {Fine-Grained Micro-Tasks for MapReduce Skew-Handling},
  year =          {2012},
  institution =   {EECS, UC Berkeley},
  url =           {https://pdfs.semanticscholar.org/3617/916adb83f33f8df7d0b3bfc23d0de80da9b7.pdf},
  
}

@inproceedings{Kwon2012,
  doi =           {10.1145/2213836.2213840},
  author =        {YongChul Kwon and Magdalena Balazinska and Bill Howe and
                   Jerome Rolia},
  booktitle =     {SIGMOD'12},
  month =         {May 20 - 24},
  pages =         {Pages 25-36},
  publisher =     {SIGMOD '12 Proceedings of the 2012 ACM SIGMOD
                   International Conference on Management of Data},
  title =         {SkewTune: Mitigating Skew in MapReduce Applications,
                   Pages 25-36},
  year =          {2012},
}

@inproceedings{Ousterhout2015,
  author =        {Kay Ousterhout and Ryan Rasti and Sylvia Ratnasamy and
                   Scott Shenker and Byung-Gon Chun},
  booktitle =     {NSDI'15 Proceedings of the 12th USENIX Conference on
                   Networked Systems Design and Implementation},
  number =        {ISBN: 978-1-931971-218},
  pages =         {Pages 293-307},
  title =         {Making Sense of Performance in Data Analytics
                   Frameworks},
  year =          {2015},
}

@article{AWE-WQ2014,
  author =        {Badi Abdul-Wahid and Haoyun Feng and Dinesh Rajan and
                   Ronan Costaouec and Eric Darve and Douglas Thain and
                   Jesu{\'s} A. Izaguirre},
  journal =       {Journal of Chemical Information and Modeling},
  pages =         {3033--3043},
  title =         {AWE-WQ, Fast-Forwarding Molecular Dynamics Using the
                   Accelerated Weighted Ensemble},
  volume =        {54},
  year =          {2014},
}

@article{Wu_et.al,
  author =        {Guoqing Wu and Haifeng Song and Deye Lin},
  journal =       {Computational Materials Science},
  pages =         {322--330},
  title =         {A scalable parallel framework for microstructure
                   analysis of large-scale molecular dynamics
                   simulations data},
  volume =        {144},
  year =          {2018},
}

@inproceedings{Zazen,
  author =        {Tiankai Tu and Charles A. Rendleman and
                   Patrick J. Miller and Federico Sacerdoti and
                   Ron O. Dror and David E. Shaw},
  booktitle =     {8th {USENIX} Conference on File and Storage
                   Technologies, San Jose, CA, USA},
  month =         {February 23-26},
  pages =         {129--142},
  title =         {Accelerating Parallel Analysis of Scientific
                   Simulation Data via Zazen},
  year =          {2010},
  url =           {http://www.usenix.org/events/fast10/tech/full\_papers/tu.pdf},  
}

@inproceedings{VMD2013,
  address =       {Washington, DC, USA},
  author =        {John E. Stone and Barry Isralewitz and
                   Klaus Schulten},
  booktitle =     {Proceedings of the 2013 Extreme Scaling Workshop (Xsw
                   2013)},
  month =         {15-16 Aug},
  pages =         {43--50},
  publisher =     {IEEE Computer Society},
  title =         {Early Experiences Scaling VMD Molecular Visualization
                   and Analysis Jobs on Blue Waters},
  year =          {2013},
}

@article{pyPcazip,
  author =        {Ardita Shkurtia and Ramon Goni and Pau Andrio and
                   Elena Breitmoserd and Iain Bethuned and
                   Modesto Orozco and Charles A. Laughtona},
  journal =       {SoftwareX},
  pages =         {44--50},
  title =         {{pyPcazip}: A {PCA}-based toolkit for compression and
                   analysis of molecular simulation data},
  volume =        {5},
  year =          {2016},
}

@inproceedings{Malakar-etal,
  author =        {Preeti Malakar and Christopher Knight and Todd Munson and
                   Venkatram Vishwanath and Michael E. Papka},
  booktitle =     {ISAV'17 Proceedings of the In Situ Infrastructures on
                   Enabling Extreme-Scale Analysis and Visualization},
  month =         {November 12 - 17},
  pages =         {1-6},
  title =         {Scalable In situ Analysis of Molecular Dynamics
                   Simulations},
  year =          {2017},
}

@article{Johnston:2017aa,
  author =        {Johnston, Travis and Zhang, Boyu and Liwo, Adam and
                   Crivelli, Silvia and Taufer, Michela},
  journal =       {J Comput Chem},
  month =         {Jun},
  number =        {16},
  pages =         {1419-1430},
  title =         {\textit{In situ} data analytics and indexing of
                   protein trajectories},
  volume =        {38},
  year =          {2017},
  abstract =      {The transition toward exascale computing will be
                   accompanied by a performance dichotomy. Computational
                   peak performance will rapidly increase; I/O
                   performance will either grow slowly or be completely
                   stagnant. Essentially, the rate at which data are
                   generated will grow much faster than the rate at
                   which data can be read from and written to the disk.
                   MD simulations will soon face the I/O problem of
                   efficiently writing to and reading from disk on the
                   next generation of supercomputers. This article
                   targets MD simulations at the exascale and proposes a
                   novel technique for in situ data analysis and
                   indexing of MD trajectories. Our technique maps
                   individual trajectories' substructures (i.e.,
                   alpha-helices, beta-strands) to metadata frame by
                   frame. The metadata captures the conformational
                   properties of the substructures. The ensemble of
                   metadata can be used for automatic, strategic
                   analysis within a trajectory or across trajectories,
                   without manually identify those portions of
                   trajectories in which critical changes take place. We
                   demonstrate our technique's effectiveness by applying
                   it to 26.3k helices and 31.2k strands from 9917 PDB
                   proteins and by providing three empirical case
                   studies.},
  doi =           {10.1002/jcc.24729},
}

@article{Brooks:2009pt,
  address =       {Laboratory of Computational Biology, National Heart,
                   Lung, and Blood Institute, National Institutes of
                   Health, Bethesda, Maryland 20892, USA.
                   brbrooks@helix.nih.gov},
  author =        {Brooks, B R and Brooks III., C L and
                   Mackerell, A D Jr and Nilsson, L and Petrella, R J and
                   Roux, B and Won, Y and Archontis, G and Bartels, C and
                   Boresch, S and Caflisch, A and Caves, L and Cui, Q and
                   Dinner, A R and Feig, M and Fischer, S and Gao, J and
                   Hodoscek, M and Im, W and Kuczera, K and Lazaridis, T and
                   Ma, J and Ovchinnikov, V and Paci, E and Pastor, R W and
                   Post, C B and Pu, J Z and Schaefer, M and Tidor, B and
                   Venable, R M and Woodcock, H L and Wu, X and Yang, W and
                   York, D M and Karplus, M},
  journal =       {J.~Comp.~Chem.},
  month =         {Jul},
  number =        {10},
  pages =         {1545--1614},
  title =         {{CHARMM}: the biomolecular simulation program.},
  volume =        {30},
  year =          {2009},
  abstract =      {CHARMM (Chemistry at HARvard Molecular Mechanics) is
                   a highly versatile and widely used molecular
                   simulation program. It has been developed over the
                   last three decades with a primary focus on molecules
                   of biological interest, including proteins, peptides,
                   lipids, nucleic acids, carbohydrates, and small
                   molecule ligands, as they occur in solution,
                   crystals, and membrane environments. For the study of
                   such systems, the program provides a large suite of
                   computational tools that include numerous
                   conformational and path sampling methods, free energy
                   estimators, molecular minimization, dynamics, and
                   analysis techniques, and model-building capabilities.
                   The CHARMM program is applicable to problems
                   involving a much broader class of many-particle
                   systems. Calculations with CHARMM can be performed
                   using a number of different energy functions and
                   models, from mixed quantum mechanical-molecular
                   mechanical force fields, to all-atom classical
                   potential energy functions with explicit solvent and
                   various boundary conditions, to implicit solvent and
                   membrane models. The program has been ported to
                   numerous platforms in both serial and parallel
                   architectures. This article provides an overview of
                   the program as it exists today with an emphasis on
                   developments since the publication of the original
                   CHARMM article in 1983.},
  annote =        {new Charmm reference},
  doi =           {10.1002/jcc.21287},
  issn =          {1096-987X (Electronic)},
  language =      {eng},
}

@article{Abraham:2015aa,
  author =        {Abraham, Mark James and Murtola, Teemu and
                   Schulz, Roland and P{\'a}ll, Szil{\'a}rd and
                   Smith, Jeremy C. and Hess, Berk and Lindahl, Erik},
  journal =       {SoftwareX},
  pages =         {19 - 25},
  title =         {{GROMACS}: High performance molecular simulations
                   through multi-level parallelism from laptops to
                   supercomputers},
  volume =        {1--2},
  year =          {2015},
  abstract =      {Abstract \{GROMACS\} is one of the most widely used
                   open-source and free software codes in chemistry,
                   used primarily for dynamical simulations of
                   biomolecules. It provides a rich set of calculation
                   types, preparation and analysis tools. Several
                   advanced techniques for free-energy calculations are
                   supported. In version 5, it reaches new performance
                   heights, through several new and enhanced
                   parallelization algorithms. These work on every
                   level; \{SIMD\} registers inside cores,
                   multithreading, heterogeneous CPU--GPU acceleration,
                   state-of-the-art 3D domain decomposition, and
                   ensemble-level parallelization through built-in
                   replica exchange and the separate Copernicus
                   framework. The latest best-in-class compressed
                   trajectory storage format is supported.},
  doi =           {10.1016/j.softx.2015.06.001},
  issn =          {2352-7110},
}

@article{Case:2005uq,
  author =        {Case, David A and Cheatham, 3rd, Thomas E and
                   Darden, Tom and Gohlke, Holger and Luo, Ray and
                   Merz, Jr, Kenneth M and Onufriev, Alexey and
                   Simmerling, Carlos and Wang, Bing and
                   Woods, Robert J},
  journal =       {J Comput Chem},
  number =        {16},
  pages =         {1668-1688},
  title =         {The Amber biomolecular simulation programs},
  volume =        {26},
  year =          {2005},
  abstract =      {We describe the development, current features, and
                   some directions for future development of the Amber
                   package of computer programs. This package evolved
                   from a program that was constructed in the late 1970s
                   to do Assisted Model Building with Energy Refinement,
                   and now contains a group of programs embodying a
                   number of powerful tools of modern computational
                   chemistry, focused on molecular dynamics and free
                   energy calculations of proteins, nucleic acids, and
                   carbohydrates.},
  doi =           {10.1002/jcc.20290},
}

@article{Phillips:2005ek,
  address =       {111 RIVER ST, HOBOKEN, NJ 07030 USA},
  author =        {Phillips, JC and Braun, R and Wang, W and Gumbart, J and
                   Tajkhorshid, E and Villa, E and Chipot, C and
                   Skeel, RD and Kale, L and Schulten, K},
  journal =       {J Comput Chem},
  pages =         {1781-1802},
  publisher =     {JOHN WILEY \& SONS INC},
  title =         {Scalable molecular dynamics with {NAMD}},
  volume =        {26},
  year =          {2005},
  abstract =      {NAMD is a parallel molecular dynamics code designed
                   for high-performance simulation of large biomolecular
                   systems. NAMD scales to hundreds of processors on
                   high-end parallel platforms, as well as tens of
                   processors on low-cost commodity clusters, and also
                   runs on individual desktop and laptop computers. NAMD
                   works with AMBER and CHARMM potential functions,
                   parameters, and file formats. This article, directed
                   to novices as well as experts, first introduces
                   concepts and methods used in the NAMD program,
                   describing the classical molecular dynamics force
                   field, equations of motion, and integration methods
                   along with the efficient electrostatics evaluation
                   algorithms employed and temperature and pressure
                   controls used. Features for steering the simulation
                   across barriers and for calculating both alchemical
                   and conformational free energy differences are
                   presented. The motivations for and a roadmap to the
                   internal design of NAMD, implemented in C++ and based
                   on Charm++ parallel objects, are outlined. The
                   factors affecting the serial and parallel performance
                   of a simulation are discussed. Finally, typical NAMD
                   use is illustrated with representative applications
                   to a small, a medium, and a large biomolecular
                   system, highlighting particular features of NAMD. for
                   example, the Tcl scripting language. The article also
                   provides a list of the key features of NAMD and
                   discusses the benefits of combining NAMD with the
                   molecular graphics/sequence analysis software VMD and
                   the grid computing/collaboratory software BioCoRE.
                   NAMD is distributed free of charge with source code
                   at www.ks.uiuc.edu. (c) 2005 Wiley Periodicals, Inc.},
  doi =           {10.1002/jcc.20289},
}

@article{Burley:2018aa,
  author =        {Burley, Stephen K and Berman, Helen M and
                   Bhikadiya, Charmi and Bi, Chunxiao and Chen, Li and
                   Costanzo, Luigi Di and Christie, Cole and
                   Duarte, Jose M and Dutta, Shuchismita and et al.},
  journal =       {Nucleic Acids Research},
  month =         {Oct},
  number =        {D1},
  pages =         {D520--D528},
  publisher =     {Oxford University Press (OUP)},
  title =         {{Protein Data Bank}: the single global archive for
                   {3D} macromolecular structure data},
  volume =        {47},
  year =          {2018},
  abstract =      {The Protein Data Bank (PDB) is the single global
                   archive of experimentally determined
                   three-dimensional (3D) structure data of biological
                   macromolecules. Since 2003, the PDB has been managed
                   by the Worldwide Protein Data Bank (wwPDB;
                   wwpdb.org), an international consortium that
                   collaboratively oversees deposition, validation,
                   biocuration, and open access dissemination of 3D
                   macromolecular structure data. The PDB Core Archive
                   houses 3D atomic coordinates of more than 144 000
                   structural models of proteins, DNA/RNA, and their
                   complexes with metals and small molecules and related
                   experimental data and metadata. Structure and
                   experimental data/metadata are also stored in the PDB
                   Core Archive using the readily extensible wwPDB
                   PDBx/mmCIF master data format, which will continue to
                   evolve as data/metadata from new experimental
                   techniques and structure determination methods are
                   incorporated by the wwPDB. Impacts of the recently
                   developed universal wwPDB OneDep
                   deposition/validation/biocuration system and various
                   methods-specific wwPDB Validation Task Forces on
                   improving the quality of structures and data housed
                   in the PDB Core Archive are described together with
                   current challenges and future plans.},
  doi =           {10.1093/nar/gky949},
  issn =          {1362-4962},
}

@article{Van-Der-Walt:2011aa,
  author =        {Van Der Walt, Stefan and Colbert, S Chris and
                   Varoquaux, Gael},
  journal =       {Computing in Science \& Engineering},
  number =        {2},
  pages =         {22--30},
  publisher =     {AIP Publishing},
  title =         {The NumPy array: a structure for efficient numerical
                   computation},
  volume =        {13},
  year =          {2011},
  abstract =      {In the Python world, NumPy arrays are the standard
                   representation for numerical data and enable
                   efficient implementation of numerical computations in
                   a high-level language. As this effort shows, NumPy
                   performance can be improved through three techniques:
                   vectorizing calculations, avoiding copying data in
                   memory, and minimizing operation counts.},
  doi =           {10.1109/MCSE.2011.37},
}

@article{Theobald:2005vn,
  author =        {Theobald, Douglas L},
  journal =       {Acta Crystallogr A},
  month =         {Jul},
  number =        {Pt 4},
  pages =         {478-80},
  title =         {Rapid calculation of {RMSDs} using a quaternion-based
                   characteristic polynomial},
  volume =        {61},
  year =          {2005},
  abstract =      {A common measure of conformational similarity in
                   structural bioinformatics is the minimum root mean
                   square deviation (RMSD) between the coordinates of
                   two macromolecules. In many applications, the
                   rotations relating the structures are not needed.
                   Several common algorithms for calculating RMSDs
                   require the computationally costly procedures of
                   determining either the eigen decomposition or matrix
                   inversion of a 3x3 or 4x4 matrix. Using a
                   quaternion-based method, here a simple algorithm is
                   developed that rapidly and stably determines RMSDs by
                   circumventing the decomposition and inversion
                   problems.},
  doi =           {10.1107/S0108767305015266},
}

@inproceedings{Daily:2014aa,
  author =        {J. Daily and A. Vishnu and B. Palmer and {van Dam}, H. and
                   D. Kerbyson},
  booktitle =     {2014 21st International Conference on High
                   Performance Computing (HiPC)},
  month =         {Dec},
  pages =         {1-10},
  title =         {On the suitability of {MPI} as a {PGAS} runtime},
  year =          {2014},
  abstract =      {Partitioned Global Address Space (PGAS) models are
                   emerging as a popular alternative to MPI models for
                   designing scalable applications. At the same time,
                   MPI remains a ubiquitous communication subsystem due
                   to its standardization, high performance, and
                   availability on leading platforms. In this paper, we
                   explore the suitability of using MPI as a scalable
                   PGAS communication subsystem. We focus on the Remote
                   Memory Access (RMA) communication in PGAS models
                   which typically includes get, put, and atomic memory
                   operations. We perform an in-depth exploration of
                   design alternatives based on MPI. These alternatives
                   include using a semantically-matching interface such
                   as MPI-RMA, as well as not-so-intuitive interfaces
                   such as MPI two-sided with a combination of
                   multi-threading and dynamic process management. With
                   an in-depth exploration of these alternatives and
                   their shortcomings, we propose a novel design which
                   is facilitated by the data-centric view in PGAS
                   models. This design leverages a combination of highly
                   tuned MPI two-sided semantics and an automatic,
                   user-transparent split of MPI communicators to
                   provide asynchronous progress. We implement the
                   asynchronous progress ranks approach and other
                   approaches within the Communication Runtime for
                   Exascale which is a communication subsystem for
                   Global Arrays. Our performance evaluation spans pure
                   communication benchmarks, graph community detection
                   and sparse matrix-vector multiplication kernels, and
                   a computational chemistry application. The utility of
                   our proposed PR-based approach is demonstrated by a
                   2.17x speedup on 1008 processors over the other
                   MPI-based designs.},
  doi =           {10.1109/HiPC.2014.7116712},
  issn =          {1094-7256},
}

@incollection{pythonhdf5,
  author =        {Andrew Collette},
  booktitle =     {{Python} and {HDF5}},
  editor =        {Meghan Blanchette and Rachel Roumeliotis},
  publisher =     {O'Reilly Media, Inc., 1005 Gravenstein Highway North,
                   Sebastopol, CA 95472.},
  title =         {Python and HDF5},
  year =          {2014},
}

@article{Seyler:2014il,
  author =        {Seyler, Sean L and Beckstein, Oliver},
  journal =       {Molec. Simul.},
  number =        {10--11},
  pages =         {855--877},
  title =         {Sampling of large conformational transitions:
                   Adenylate kinase as a testing ground},
  volume =        {40},
  year =          {2014},
  doi =           {10.1080/08927022.2014.919497},
}

@misc{Seyler:2017aa,
  author =        {Seyler, Sean and Beckstein, Oliver},
  month =         {6},
  title =         {Molecular dynamics trajectory for benchmarking
                   {MDAnalysis}},
  year =          {2017},
  abstract =      {MD trajectory of apo adenylate kinase with CHARMM27
                   force field and simulated with explicit water and
                   ions in NPT at 300 K and 1 bar. Saved every 240 ps
                   for a total of 1.004 µs. Produced on PSC Anton. The
                   trajectory only contains the protein and all solvent
                   stripped. Superimposed on the CORE domain of AdK by
                   RMSD fitting. The topology is contained in the PSF
                   file (CHARMM format). The trajectory is contained in
                   the DCD file (CHARMM/NAMD format).},
  doi =           {10.6084/m9.figshare.5108170},
  howpublished =  {figshare},		  
}

@article{Lindahl01,
  author =        {Lindahl, Erik and Hess, Berk and
                   {van der Spoel}, David},
  journal =       {J.~Mol.~Mod.},
  number =        {8},
  pages =         {306--317},
  title =         {Gromacs 3.0: {A} package for molecular simulation and
                   trajectory analysis},
  volume =        {7},
  year =          {2001},
  abstract =      {GROMACS 3.0 is the latest release of a versatile and
                   very well optimized package for molecular simulation.
                   Much effort has been devoted to achieving extremely
                   high performance on both workstations and parallel
                   computers. The design includes an extraction of
                   virial and periodic boundary conditions from the
                   loops over pairwise interactions, and special
                   software routines to enable rapid calculation of
                   x-1/2. Inner loops are generated automatically in C
                   or Fortran at compile time, with optimizations
                   adapted to each architecture. Assembly loops using
                   SSE and 3DNow! Multimedia instructions are provided
                   for x86 processors, resulting in exceptional
                   performance on inexpensive PC workstations. The
                   interface is simple and easy to use (no scripting
                   language), based on standard command line arguments
                   with self-explanatory functionality and integrated
                   documentation. All binary files are independent of
                   hardware endian and can be read by versions of
                   GROMACS compiled using different floating-point
                   precision. A large collection of flexible tools for
                   trajectory analysis is included, with output in the
                   form of finished Xmgr/Grace graphs. A basic
                   trajectory viewer is included, and several external
                   visualization tools can read the GROMACS trajectory
                   format. Starting with version 3.0, GROMACS is
                   available under the GNU General Public License from
                   http://www.gromacs.org.},
  doi =           {10.1007/s008940100045},
}

@article{Spangberg:2011zr,
  author =        {Sp{\aa}ngberg, Daniel and Larsson, Daniel S D and
                   {van der Spoel}, David},
  journal =       {J Mol Model},
  month =         {Oct},
  number =        {10},
  pages =         {2669-85},
  title =         {{Trajectory NG}: portable, compressed, general
                   molecular dynamics trajectories},
  volume =        {17},
  year =          {2011},
  abstract =      {We present general algorithms for the compression of
                   molecular dynamics trajectories. The standard ways to
                   store MD trajectories as text or as raw binary
                   floating point numbers result in very large files
                   when efficient simulation programs are used on
                   supercomputers. Our algorithms are based on the
                   observation that differences in atomic
                   coordinates/velocities, in either time or space, are
                   generally smaller than the absolute values of the
                   coordinates/velocities. Also, it is often possible to
                   store values at a lower precision. We apply several
                   compression schemes to compress the resulting
                   differences further. The most efficient algorithms
                   developed here use a block sorting algorithm in
                   combination with Huffman coding. Depending on the
                   frequency of storage of frames in the trajectory,
                   either space, time, or combinations of space and time
                   differences are usually the most efficient. We
                   compare the efficiency of our algorithms with each
                   other and with other algorithms present in the
                   literature for various systems: liquid argon, water,
                   a virus capsid solvated in 15 mM aqueous NaCl, and
                   solid magnesium oxide. We perform tests to determine
                   how much precision is necessary to obtain accurate
                   structural and dynamic properties, as well as
                   benchmark a parallelized implementation of the
                   algorithms. We obtain compression ratios (compared to
                   single precision floating point) of 1:3.3-1:35
                   depending on the frequency of storage of frames and
                   the system studied.},
  doi =           {10.1007/s00894-010-0948-5},
}

@inproceedings{Shaw:2009ly,
  address =       {New York, NY, USA},
  author =        {Shaw, David E. and Dror, Ron O. and Salmon, John K. and
                   Grossman, J. P. and Mackenzie, Kenneth M. and
                   Bank, Joseph A. and Young, Cliff and
                   Deneroff, Martin M. and Batson, Brannon and
                   Bowers, Kevin J. and Chow, Edmond and
                   Eastwood, Michael P. and Ierardi, Douglas J. and
                   Klepeis, John L. and Kuskin, Jeffrey S. and
                   Larson, Richard H. and Lindorff-Larsen, Kresten and
                   Maragakis, Paul and Moraes, Mark A. and
                   Piana, Stefano and Shan, Yibing and Towles, Brian},
  booktitle =     {SC '09: Proceedings of the Conference on High
                   Performance Computing Networking, Storage and
                   Analysis},
  pages =         {1--11},
  publisher =     {ACM},
  title =         {Millisecond-scale molecular dynamics simulations on
                   Anton},
  year =          {2009},
  abstract =      {Anton is a recently completed special-purpose
                   supercomputer designed for molecular dynamics (MD)
                   simulations of biomolecular systems. The machine's
                   specialized hardware dramatically increases the speed
                   of MD calculations, making pos- sible for the first
                   time the simulation of biological molecules at an
                   atomic level of detail for periods on the order of a
                   millisecond--- about two orders of magnitude beyond
                   the previous state of the art. Anton is now running
                   simulations on a timescale at which many critically
                   important, but poorly understood phenomena are known
                   to occur, allowing the observation of aspects of
                   protein dynamics that were previously inaccessible to
                   both computational and experimental study. Here, we
                   report Anton's performance when executing actual MD
                   simulations whose accuracy has been vali- dated
                   against both existing MD software and experimental
                   observa- tions. We also discuss the manner in which
                   novel algorithms have been coordinated with Anton's
                   co-designed, application-specific hardware to achieve
                   these results.},
  doi =           {10.1145/1654059.1654099},
  isbn =          {978-1-60558-744-8},
}

@inproceedings{Shaw:2014aa,
  author =        {D. E. {Shaw} and J. P. {Grossman} and J. A. {Bank} and
                   B. {Batson} and J. A. {Butts} and J. C. {Chao} and
                   M. M. {Deneroff} and R. O. {Dror} and A. {Even} and
                   C. H. {Fenton} and A. {Forte} and J. {Gagliardo} and
                   G. {Gill} and B. {Greskamp} and C. R. {Ho} and
                   D. J. {Ierardi} and L. {Iserovich} and J. S. {Kuskin} and
                   R. H. {Larson} and T. {Layman} and L. {Lee} and
                   A. K. {Lerer} and C. {Li} and D. {Killebrew} and
                   K. M. {Mackenzie} and S. Y. {Mok} and M. A. {Moraes} and
                   R. {Mueller} and L. J. {Nociolo} and
                   J. L. {Peticolas} and T. {Quan} and D. {Ramot} and
                   J. K. {Salmon} and D. P. {Scarpazza} and
                   U. B. {Schafer} and N. {Siddique} and C. W. {Snyder} and
                   J. {Spengler} and P. T. P. {Tang} and M. {Theobald} and
                   H. {Toma} and B. {Towles} and B. {Vitale} and
                   S. C. {Wang} and C. {Young}},
  booktitle =     {SC '14: Proceedings of the International Conference
                   for High Performance Computing, Networking, Storage
                   and Analysis},
  month =         {Nov},
  pages =         {41-53},
  title =         {Anton 2: Raising the Bar for Performance and
                   Programmability in a Special-Purpose Molecular
                   Dynamics Supercomputer},
  year =          {2014},
  abstract =      {Anton 2 is a second-generation special-purpose
                   supercomputer for molecular dynamics simulations that
                   achieves significant gains in performance,
                   programmability, and capacity compared to its
                   predecessor, Anton 1. The architecture of Anton 2 is
                   tailored for fine-grained event-driven operation,
                   which improves performance by increasing the overlap
                   of computation with communication, and also allows a
                   wider range of algorithms to run efficiently,
                   enabling many new software-based optimizations. A
                   512-node Anton 2 machine, currently in operation, is
                   up to ten times faster than Anton 1 with the same
                   number of nodes, greatly expanding the reach of
                   all-atom bio molecular simulations. Anton 2 is the
                   first platform to achieve simulation rates of
                   multiple microseconds of physical time per day for
                   systems with millions of atoms. Demonstrating strong
                   scaling, the machine simulates a standard 23,558-atom
                   benchmark system at a rate of 85 μs/day -- 180 times
                   faster than any commodity hardware platform or
                   general-purpose supercomputer.},
  doi =           {10.1109/SC.2014.9},
  issn =          {2167-4337},
}

@article{Salomon-Ferrer:2013cr,
  author =        {Salomon-Ferrer, Romelia and G{\"o}tz, Andreas W. and
                   Poole, Duncan and Le Grand, Scott and
                   Walker, Ross C.},
  journal =       {Journal of Chemical Theory and Computation},
  number =        {9},
  pages =         {3878-3888},
  title =         {Routine Microsecond Molecular Dynamics Simulations
                   with AMBER on GPUs. 2. Explicit Solvent Particle Mesh
                   Ewald},
  volume =        {9},
  year =          {2013},
  abstract =      {We present an implementation of explicit solvent all
                   atom classical molecular dynamics (MD) within the
                   AMBER program package that runs entirely on
                   CUDA-enabled GPUs. First released publicly in April
                   2010 as part of version 11 of the AMBER MD package
                   and further improved and optimized over the last two
                   years, this implementation supports the three most
                   widely used statistical mechanical ensembles (NVE,
                   NVT, and NPT), uses particle mesh Ewald (PME) for the
                   long-range electrostatics, and runs entirely on
                   CUDA-enabled NVIDIA graphics processing units (GPUs),
                   providing results that are statistically
                   indistinguishable from the traditional CPU version of
                   the software and with performance that exceeds that
                   achievable by the CPU version of AMBER software
                   running on all conventional CPU-based clusters and
                   supercomputers. We briefly discuss three different
                   precision models developed specifically for this work
                   (SPDP, SPFP, and DPDP) and highlight the technical
                   details of the approach as it extends beyond
                   previously reported work [G{\"o}tz et al., J. Chem.
                   Theory Comput. 2012, DOI: 10.1021/ct200909j; Le Grand
                   et al., Comp. Phys. Comm. 2013, DOI:
                   10.1016/j.cpc.2012.09.022].We highlight the
                   substantial improvements in performance that are seen
                   over traditional CPU-only machines and provide
                   validation of our implementation and precision
                   models. We also provide evidence supporting our
                   decision to deprecate the previously described fully
                   single precision (SPSP) model from the latest release
                   of the AMBER software package.},
  doi =           {10.1021/ct400314y},
}

@article{Glaser:2015ys,
  author =        {Glaser, Jens and Nguyen, Trung Dac and
                   Anderson, Joshua A. and Lui, Pak and Spiga, Filippo and
                   Millan, Jaime A. and Morse, David C. and
                   Glotzer, Sharon C.},
  journal =       {Computer Physics Communications},
  month =         {7},
  pages =         {97--107},
  title =         {Strong scaling of general-purpose molecular dynamics
                   simulations on GPUs},
  volume =        {192},
  year =          {2015},
  abstract =      {Abstract We describe a highly optimized
                   implementation of MPI domain decomposition in a
                   GPU-enabled, general-purpose molecular dynamics code,
                   HOOMD-blue (Anderson and Glotzer, 2013). Our approach
                   is inspired by a traditional CPU-based code, LAMMPS
                   (Plimpton, 1995), but is implemented within a code
                   that was designed for execution on GPUs from the
                   start (Anderson et al., 2008). The software supports
                   short-ranged pair force and bond force fields and
                   achieves optimal GPU performance using an autotuning
                   algorithm. We are able to demonstrate equivalent or
                   superior scaling on up to 3375 GPUs in Lennard-Jones
                   and dissipative particle dynamics (DPD) simulations
                   of up to 108 million particles. GPUDirect RDMA
                   capabilities in recent GPU generations provide better
                   performance in full double precision calculations.
                   For a representative polymer physics application,
                   HOOMD-blue 1.0 provides an effective GPU vs. CPU node
                   speed-up of 12.5 {\ $\times$} .},
  doi =           {10.1016/j.cpc.2015.02.028},
  isbn =          {0010-4655},
}

@inproceedings{Brown:2018ab,
  address =       {New York, NY, USA},
  author =        {Brown, Kevin A. and Jain, Nikhil and
                   Matsuoka, Satoshi and Schulz, Martin and
                   Bhatele, Abhinav},
  booktitle =     {Proceedings of the 47th International Conference on
                   Parallel Processing},
  pages =         {7:1--7:10},
  publisher =     {ACM},
  series =        {ICPP 2018},
  title =         {Interference Between {I/O} and {MPI} Traffic on
                   Fat-tree Networks},
  year =          {2018},
  doi =           {10.1145/3225058.3225144},
  isbn =          {978-1-4503-6510-9},
}

@article{scalable-IO,
  author =        {Alok Choudhary and Liao, Wei-keng and Kui Gao and
                   Arifa Nisar and Robert Ross and Rajeev Thakur and
                   Robert Latham},
  journal =       {Journal of Physics: Conference Series},
  number =        {012048},
  title =         {Scalable {I/O} and analytics},
  volume =        {180},
  year =          {2009},
}

@article{scalable-IO1,
  author =        {Seung Woo Son and Saba Sehrish and Wei-keng Liao and
                   Ron Oldfield and Alok Choudhary},
  journal =       {Journal of Supercomputing},
  number =        {5},
  pages =         {pp 2069--2097},
  title =         {Reducing {I/O} Variability using Dynamic {I/O} Path
                   Characterization in Petascale Storage Systems},
  volume =        {73},
  year =          {2017},
}

@inproceedings{optimize_lustre,
  author =        {Kuan-Wu Lin and Jerry Chou and Surendra Byna and
                   Kesheng Wu},
  booktitle =     {SSDBM Proceedings of the 25th International
                   Conference on Scientific and Statistical Database
                   Management},
  month =         {July 29 - 31},
  number =        {Article No. 29},
  title =         {Optimizing Fast query Performance on {Lustre} File
                   System},
  year =          {2013},
}

@misc{POSIX2017,
  title =  {What is So Bad About {POSIX} {I/O}?},
  author = {Glenn Lockwood},
  url =    {https://www.nextplatform.com/2017/09/11/whats-bad-posix-io/},
  year =   2017,
  date =   {Sept 11, 2017},
  month =  {Sep},
  howpublished = {The Next Platform},
  note =   {accessed Aug 24, 2019},
}

@article{Mache:2005aa,
  author =        {Jens Mache and Virginia Lo and Sharad Garg},
  journal =       {Journal of Parallel and Distributed Computing},
  number =        {10},
  pages =         {1190 - 1203},
  title =         {The impact of spatial layout of jobs on {I/O}
                   hotspots in mesh networks},
  volume =        {65},
  year =          {2005},
  abstract =      {Network contention hotspots can limit network
                   throughput for parallel disk I/O, even when the
                   interconnection network appears to be sufficiently
                   provisioned. We studied I/O hotspots in mesh networks
                   as a function of the spatial layout of an
                   application's compute nodes relative to the I/O
                   nodes. Our analytical modeling and dynamic
                   simulations show that when I/O nodes are configured
                   on one side of a two-dimensional mesh, realizable I/O
                   throughput is at best bounded by four times the
                   network bandwidth per link. Maximal performance
                   depends on the spatial layout of jobs, and cannot be
                   further improved by adding I/O nodes. Applying these
                   results, we devised a new parallel layout allocation
                   strategy (PLAS) which minimizes I/O hotspots, and
                   approaches the theoretical best case for parallel I/O
                   throughput. Our I/O performance analysis and
                   processor allocation strategy are applicable to a
                   wide range of contemporary and emerging
                   high-performance computing systems.},
  doi =           {10.1016/j.jpdc.2005.04.020},
  issn =          {0743-7315},
}

@article{Buyl:2014aa,
  author =        {{de Buyl}, Pierre and Peter H. Colberg and
                   Felix H{\"o}fling},
  journal =       {Computer Physics Communications},
  number =        {6},
  pages =         {1546 - 1553},
  title =         {{H5MD}: A structured, efficient, and portable file
                   format for molecular data},
  volume =        {185},
  year =          {2014},
  abstract =      {We propose a new file format named ``H5MD'' for
                   storing molecular simulation data, such as
                   trajectories of particle positions and velocities,
                   along with thermodynamic observables that are
                   monitored during the course of the simulation. H5MD
                   files are HDF5 (Hierarchical Data Format) files with
                   a specific hierarchy and naming scheme. Thus, H5MD
                   inherits many benefits of HDF5, e.g., structured
                   layout of multi-dimensional datasets, data
                   compression, fast and parallel I/O, and portability
                   across many programming languages and hardware
                   platforms. H5MD files are self-contained, and foster
                   the reproducibility of scientific data and the
                   interchange of data between researchers using
                   different simulation programs and analysis software.
                   In addition, the H5MD specification can serve for
                   other kinds of data (e.g. experimental data) and is
                   extensible to supplemental data, or may be part of an
                   enclosing file structure.},
  doi =           {10.1016/j.cpc.2014.01.018},
  issn =          {0010-4655},
}


@inproceedings{Ananthanarayanan:2010aa,
	Address = {Berkeley, CA, USA},
	Author = {Ananthanarayanan, Ganesh and Kandula, Srikanth and Greenberg, Albert and Stoica, Ion and Lu, Yi and Saha, Bikas and Harris, Edward},
	Booktitle = {Proceedings of the 9th USENIX Conference on Operating Systems Design and Implementation},
	Pages = {265--278},
	Publisher = {USENIX Association},
	Series = {OSDI'10},
	Title = {Reining in the Outliers in Map-reduce Clusters Using {Mantri}},
	Year = {2010}}


@article{GA-NUMA,
	Author = {Jaroslaw Nieplocha and Robert J. Harrison and Richard J. Littlefield},
	Date-Added = {2019-05-27 11:25:46 -0700},
	Date-Modified = {2019-05-27 11:27:54 -0700},
	Journal = {Journal of Supercomputing},
	Number = 2,
	Pages = {169--189},
	Title = {{Global Arrays}: A Non-Uniform-Memory-Access Programming Model For High-Performance Computers},
	Volume = 10,
	Year = 1996}


@inproceedings{Fan:2019aa,
	Address = {Austin, TX},
	Organization = {SciPy},
	Author = {{S}hujie {F}an and {M}ax {L}inke and {I}oannis {P}araskevakos and {R}ichard {J}. {G}owers and {M}ichael {G}echt and {O}liver {B}eckstein},
	Booktitle = {{P}roceedings of the 18th {P}ython in {S}cience {C}onference},
	Editor = {{C}hris {C}alloway and {D}avid {L}ippa and {D}illon {N}iederhut and {D}avid {S}hupe},
	Pages = {134 - 142},
	Title = {{PMDA} - {P}arallel {M}olecular {D}ynamics {A}nalysis},
	Year = 2019,
	doi = {10.25080/Majora-7ddc1dd1-013},
}


@article{Roe:2018aa,
	Author = {Roe, Daniel R. and Cheatham III, Thomas E.},
	Journal = {Journal of Computational Chemistry},
	Number = 25,
	Pages = {2110-2117},
	Title = {Parallelization of {CPPTRAJ} enables large scale analysis of molecular dynamics trajectory data},
	Volume = 39,
	Year = 2018
}


@article{Wickham:2011aa,
	Author = {Hadley Wickham},
	Journal = {Journal of Statistical Software},
	Number = {1},
	Title = {The Split-Apply-Combine Strategy for Data Analysis},
	Volume = {40},
	Year = {2011}}