Parallel zero-copy algorithms for Fast Fourier Transform and conjugate gradient using MPI datatypes. Hoefler, T. & Gottlieb, S. Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 6305 LNCS:132-141, 2010.
Parallel zero-copy algorithms for Fast Fourier Transform and conjugate gradient using MPI datatypes [link]Website  doi  abstract   bibtex   
Many parallel applications need to communicate non- contiguous data. Most applications manually copy (pack/unpack) data before communications even though MPI allows a zero-copy specification. In this work, we study two complex use-cases: (1) Fast Fourier Transformation where we express a local memory transpose as part of the datatype, and (2) a conjugate gradient solver with a checkerboard layout that requires multiple nested datatypes. We demonstrate significant speedups up to a factor of 3.8 and 18%, respectively, in both cases. Our work can be used as a template to utilize datatypes for application developers. For MPI implementers, we show two practically relevant access patterns that deserve special optimization. © 2010 Springer-Verlag.
@article{
 title = {Parallel zero-copy algorithms for Fast Fourier Transform and conjugate gradient using MPI datatypes},
 type = {article},
 year = {2010},
 keywords = {Access patterns; Application developers; Conjugate,Conjugate gradient method; Fast Fourier transform,Message passing},
 pages = {132-141},
 volume = {6305 LNCS},
 websites = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-78149256345&doi=10.1007%2F978-3-642-15646-5_14&partnerID=40&md5=66babc8b772a92a6b75d4ad7a5fffe8b},
 city = {Stuttgart},
 id = {aa3067c1-6048-3590-9fb3-780a61df0328},
 created = {2018-01-09T20:30:40.386Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2018-03-12T19:03:17.517Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Hoefler2010132},
 source_type = {article},
 notes = {cited By 28; Conference of 17th European MPI Users' Group Meeting, EuroMPI 2010 ; Conference Date: 12 September 2010 Through 15 September 2010; Conference Code:82267},
 folder_uuids = {2aba6c14-9027-4f47-8627-0902e1e2342b},
 private_publication = {false},
 abstract = {Many parallel applications need to communicate non- contiguous data. Most applications manually copy (pack/unpack) data before communications even though MPI allows a zero-copy specification. In this work, we study two complex use-cases: (1) Fast Fourier Transformation where we express a local memory transpose as part of the datatype, and (2) a conjugate gradient solver with a checkerboard layout that requires multiple nested datatypes. We demonstrate significant speedups up to a factor of 3.8 and 18%, respectively, in both cases. Our work can be used as a template to utilize datatypes for application developers. For MPI implementers, we show two practically relevant access patterns that deserve special optimization. © 2010 Springer-Verlag.},
 bibtype = {article},
 author = {Hoefler, T and Gottlieb, S},
 doi = {10.1007/978-3-642-15646-5_14},
 journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}
}

Downloads: 0