Zarrtraj: A Python package for streaming molecular dynamics trajectories from cloud services. Woods, L., MacDermott-Opeskin, H., Jakupovic, E., Zhuang, Y., Gowers, R., & Beckstein, O. November, 2024.
Zarrtraj: A Python package for streaming molecular dynamics trajectories from cloud services [link]Paper  doi  abstract   bibtex   
Molecular dynamics (MD) simulations provide a microscope into the behavior of atomic-scale environments otherwise prohibitively difficult to observe. However, the resulting trajectory data are too often siloed in a single institutions' HPC environment, rendering it unusable by the broader scientific community. Additionally, it is increasingly common for trajectory data to be entirely stored in a cloud storage provider, rather than a traditional on-premise storage site. Zarrtraj enables these trajectories to be read directly from cloud storage providers like AWS, Google Cloud, and Microsoft Azure into MDAnalysis, a popular Python package for analyzing trajectory data, providing a method to open up access to trajectory data to anyone with an internet connection. Enabling cloud streaming for MD trajectories empowers easier replication of published analysis results, analyses of large, conglomerate datasets from different sources, and training machine learning models without downloading and storing trajectory data.
@misc{woods_zarrtraj_2024,
	title = {Zarrtraj: {A} {Python} package for streaming molecular dynamics trajectories from cloud services},
	shorttitle = {Zarrtraj},
	url = {https://zenodo.org/records/14172229},
	doi = {10.5281/zenodo.14172229},
	abstract = {Molecular dynamics (MD) simulations provide a microscope into the behavior of atomic-scale environments otherwise prohibitively difficult to observe. However, the resulting trajectory data are too often siloed in a single institutions' HPC environment, rendering it unusable by the broader scientific community. Additionally, it is increasingly common for trajectory data to be entirely stored in a cloud storage provider, rather than a traditional on-premise storage site. Zarrtraj enables these trajectories to be read directly from cloud storage providers like AWS, Google Cloud, and Microsoft Azure into MDAnalysis, a popular Python package for analyzing trajectory data, providing a method to open up access to trajectory data to anyone with an internet connection. Enabling cloud streaming for MD trajectories empowers easier replication of published analysis results, analyses of large, conglomerate datasets from different sources, and training machine learning models without downloading and storing trajectory data.},
	urldate = {2025-02-15},
	publisher = {Zenodo},
	author = {Woods, Lawson and MacDermott-Opeskin, Hugo and Jakupovic, Edis and Zhuang, Yuxuan and Gowers, Richard and Beckstein, Oliver},
	month = nov,
	year = {2024},
	keywords = {file-format, mdanalysis, molecular-dynamics, streaming, zarr},
}

Downloads: 0