Optimizing a conjugate gradient solver with non-blocking collective operations. Hoefler, T., Gottschling, P., Lumsdaine, A., & Rehm, W. Parallel Computing, 33(9):624-633, 2007.
Optimizing a conjugate gradient solver with non-blocking collective operations [link]Website  doi  abstract   bibtex   
This paper presents a case study that analyzes the suitability and usage of non-blocking collective operations in parallel applications. As with their point-to-point counterparts, non-blocking collective operations provide the ability to overlap communication with computation and to avoid unnecessary synchronization. These operations are provided for MPI programs with LibNBC, a portable low-overhead implementation of non-blocking collective operations built on MPI-1. The straightforward applicability of the LibNBC is demonstrated by incorporating non-blocking collective operations into a parallel conjugate gradient solver. Although only minor changes are required to use them, non-blocking collective operations allow most of the communication costs to be hidden and provide performance improvements of up to 34%. We also show that, because of overlap, there is no significant performance difference between Gigabit Ethernet and InfiniBandTM for special cases of our calculation. © 2007 Elsevier B.V. All rights reserved.
@article{
 title = {Optimizing a conjugate gradient solver with non-blocking collective operations},
 type = {article},
 year = {2007},
 keywords = {Blocking probability,Collective operations; Computation overlaps; Mess,Communication systems; Interfaces (computer); Mess},
 pages = {624-633},
 volume = {33},
 websites = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-34548698431&doi=10.1016%2Fj.parco.2007.06.006&partnerID=40&md5=a0c957b64119789a5c1a85843c694932},
 id = {4182a9b1-7d7e-36ee-b25c-f7fada028215},
 created = {2018-01-09T20:30:38.280Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2018-03-12T19:03:18.886Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Hoefler2007624},
 source_type = {article},
 notes = {cited By 30},
 folder_uuids = {2aba6c14-9027-4f47-8627-0902e1e2342b},
 private_publication = {false},
 abstract = {This paper presents a case study that analyzes the suitability and usage of non-blocking collective operations in parallel applications. As with their point-to-point counterparts, non-blocking collective operations provide the ability to overlap communication with computation and to avoid unnecessary synchronization. These operations are provided for MPI programs with LibNBC, a portable low-overhead implementation of non-blocking collective operations built on MPI-1. The straightforward applicability of the LibNBC is demonstrated by incorporating non-blocking collective operations into a parallel conjugate gradient solver. Although only minor changes are required to use them, non-blocking collective operations allow most of the communication costs to be hidden and provide performance improvements of up to 34%. We also show that, because of overlap, there is no significant performance difference between Gigabit Ethernet and InfiniBandTM for special cases of our calculation. © 2007 Elsevier B.V. All rights reserved.},
 bibtype = {article},
 author = {Hoefler, T and Gottschling, P and Lumsdaine, A and Rehm, W},
 doi = {10.1016/j.parco.2007.06.006},
 journal = {Parallel Computing},
 number = {9}
}

Downloads: 0