Parallel Performance of Molecular Dynamics Trajectory Analysis. Khoshlessan, M., Paraskevakos, I., Fox, G., C., Jha, S., & Beckstein, O. Technical Report 2019.
abstract   bibtex   
The performance of biomolecular molecular dynamics (MD) simulations has steadily increased on modern high performance computing (HPC) resources but acceleration of the analysis of the output trajectories has lagged behind so that analyzing simulations is increasingly becoming a bottleneck. To close this gap, we studied the performance of parallel trajectory analysis with MPI and the Python MDAnalysis library on three different XSEDE supercomputers where trajectories were read from a Lustre parallel file system. We found that strong scaling performance was impeded by stragglers, MPI processes that were slower than the typical process and that therefore dominated the overall run time. Stragglers were less prevalent for compute-bound workloads, thus pointing to file reading as a crucial bottleneck for scaling. However, a more complicated picture emerged in which both the computation and the ingestion of data exhibited close to ideal strong scaling behavior whereas stragglers were primarily caused by either large MPI communication costs or long times to open the single shared trajectory file. We improved overall strong scaling performance by two different approaches to file access, namely subfiling (splitting the trajectory into as many trajectory segments as number of processes) and MPI-IO with Parallel HDF5 trajectory files. Applying these strategies, we obtained near ideal strong scaling on up to 384 cores (16 nodes). We summarize our lessons-learned in guidelines and strategies on how to take advantage of the available HPC resources to gain good scalability and potentially reduce trajectory analysis times by two orders of magnitude compared to the prevalent serial approach.
@techreport{
 title = {Parallel Performance of Molecular Dynamics Trajectory Analysis},
 type = {techreport},
 year = {2019},
 keywords = {Big Data,Global Arrays,HDF5,HPC,MDAnalysis,MPI,MPI I/O,Molecular Dynamics,Python,Straggler,Trajectory Analysis},
 pages = {1-60},
 id = {dcf9957b-d904-3177-9e2c-1017d45f8ae4},
 created = {2019-08-21T18:57:48.492Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 group_id = {f0704cbc-a3d0-3264-b41e-15f4ed0c92ee},
 last_modified = {2019-08-21T18:57:48.492Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 private_publication = {false},
 abstract = {The performance of biomolecular molecular dynamics (MD) simulations has steadily increased on modern high performance computing (HPC) resources but acceleration of the analysis of the output trajectories has lagged behind so that analyzing simulations is increasingly becoming a bottleneck. To close this gap, we studied the performance of parallel trajectory analysis with MPI and the Python MDAnalysis library on three different XSEDE supercomputers where trajectories were read from a Lustre parallel file system. We found that strong scaling performance was impeded by stragglers, MPI processes that were slower than the typical process and that therefore dominated the overall run time. Stragglers were less prevalent for compute-bound workloads, thus pointing to file reading as a crucial bottleneck for scaling. However, a more complicated picture emerged in which both the computation and the ingestion of data exhibited close to ideal strong scaling behavior whereas stragglers were primarily caused by either large MPI communication costs or long times to open the single shared trajectory file. We improved overall strong scaling performance by two different approaches to file access, namely subfiling (splitting the trajectory into as many trajectory segments as number of processes) and MPI-IO with Parallel HDF5 trajectory files. Applying these strategies, we obtained near ideal strong scaling on up to 384 cores (16 nodes). We summarize our lessons-learned in guidelines and strategies on how to take advantage of the available HPC resources to gain good scalability and potentially reduce trajectory analysis times by two orders of magnitude compared to the prevalent serial approach.},
 bibtype = {techreport},
 author = {Khoshlessan, Mahzad and Paraskevakos, Ioannis and Fox, Geoffrey C and Jha, Shantenu and Beckstein, Oliver}
}

Downloads: 0