Optimizing a conjugate gradient solver with non-blocking collective operations

Optimizing a conjugate gradient solver with non-blocking collective operations. Hoefler, T., Gottschling, P., Lumsdaine, A., & Rehm, W. Parallel Computing, 33(9):624-633, 2007.

Website doi abstract bibtex

This paper presents a case study that analyzes the suitability and usage of non-blocking collective operations in parallel applications. As with their point-to-point counterparts, non-blocking collective operations provide the ability to overlap communication with computation and to avoid unnecessary synchronization. These operations are provided for MPI programs with LibNBC, a portable low-overhead implementation of non-blocking collective operations built on MPI-1. The straightforward applicability of the LibNBC is demonstrated by incorporating non-blocking collective operations into a parallel conjugate gradient solver. Although only minor changes are required to use them, non-blocking collective operations allow most of the communication costs to be hidden and provide performance improvements of up to 34%. We also show that, because of overlap, there is no significant performance difference between Gigabit Ethernet and InfiniBandTM for special cases of our calculation. © 2007 Elsevier B.V. All rights reserved.

@article{
 title = {Optimizing a conjugate gradient solver with non-blocking collective operations},
 type = {article},
 year = {2007},
 keywords = {Blocking probability,Collective operations; Computation overlaps; Mess,Communication systems; Interfaces (computer); Mess},
 pages = {624-633},
 volume = {33},
 websites = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-34548698431&doi=10.1016%2Fj.parco.2007.06.006&partnerID=40&md5=a0c957b64119789a5c1a85843c694932},
 id = {4182a9b1-7d7e-36ee-b25c-f7fada028215},
 created = {2018-01-09T20:30:38.280Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2018-03-12T19:03:18.886Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Hoefler2007624},
 source_type = {article},
 notes = {cited By 30},
 folder_uuids = {2aba6c14-9027-4f47-8627-0902e1e2342b},
 private_publication = {false},
 abstract = {This paper presents a case study that analyzes the suitability and usage of non-blocking collective operations in parallel applications. As with their point-to-point counterparts, non-blocking collective operations provide the ability to overlap communication with computation and to avoid unnecessary synchronization. These operations are provided for MPI programs with LibNBC, a portable low-overhead implementation of non-blocking collective operations built on MPI-1. The straightforward applicability of the LibNBC is demonstrated by incorporating non-blocking collective operations into a parallel conjugate gradient solver. Although only minor changes are required to use them, non-blocking collective operations allow most of the communication costs to be hidden and provide performance improvements of up to 34%. We also show that, because of overlap, there is no significant performance difference between Gigabit Ethernet and InfiniBandTM for special cases of our calculation. © 2007 Elsevier B.V. All rights reserved.},
 bibtype = {article},
 author = {Hoefler, T and Gottschling, P and Lumsdaine, A and Rehm, W},
 doi = {10.1016/j.parco.2007.06.006},
 journal = {Parallel Computing},
 number = {9}
}

Downloads: 0

{"_id":"2eAscX6GshRE8nKJn","bibbaseid":"hoefler-gottschling-lumsdaine-rehm-optimizingaconjugategradientsolverwithnonblockingcollectiveoperations-2007","downloads":0,"creationDate":"2018-03-12T19:10:28.096Z","title":"Optimizing a conjugate gradient solver with non-blocking collective operations","author_short":["Hoefler, T.","Gottschling, P.","Lumsdaine, A.","Rehm, W."],"year":2007,"bibtype":"article","biburl":"https://bibbase.org/service/mendeley/42d295c0-0737-38d6-8b43-508cab6ea85d","bibdata":{"title":"Optimizing a conjugate gradient solver with non-blocking collective operations","type":"article","year":"2007","keywords":"Blocking probability,Collective operations; Computation overlaps; Mess,Communication systems; Interfaces (computer); Mess","pages":"624-633","volume":"33","websites":"https://www.scopus.com/inward/record.uri?eid=2-s2.0-34548698431&doi=10.1016%2Fj.parco.2007.06.006&partnerID=40&md5=a0c957b64119789a5c1a85843c694932","id":"4182a9b1-7d7e-36ee-b25c-f7fada028215","created":"2018-01-09T20:30:38.280Z","file_attached":false,"profile_id":"42d295c0-0737-38d6-8b43-508cab6ea85d","last_modified":"2018-03-12T19:03:18.886Z","read":false,"starred":false,"authored":"true","confirmed":"true","hidden":false,"citation_key":"Hoefler2007624","source_type":"article","notes":"cited By 30","folder_uuids":"2aba6c14-9027-4f47-8627-0902e1e2342b","private_publication":false,"abstract":"This paper presents a case study that analyzes the suitability and usage of non-blocking collective operations in parallel applications. As with their point-to-point counterparts, non-blocking collective operations provide the ability to overlap communication with computation and to avoid unnecessary synchronization. These operations are provided for MPI programs with LibNBC, a portable low-overhead implementation of non-blocking collective operations built on MPI-1. The straightforward applicability of the LibNBC is demonstrated by incorporating non-blocking collective operations into a parallel conjugate gradient solver. Although only minor changes are required to use them, non-blocking collective operations allow most of the communication costs to be hidden and provide performance improvements of up to 34%. We also show that, because of overlap, there is no significant performance difference between Gigabit Ethernet and InfiniBandTM for special cases of our calculation. © 2007 Elsevier B.V. All rights reserved.","bibtype":"article","author":"Hoefler, T and Gottschling, P and Lumsdaine, A and Rehm, W","doi":"10.1016/j.parco.2007.06.006","journal":"Parallel Computing","number":"9","bibtex":"@article{\n title = {Optimizing a conjugate gradient solver with non-blocking collective operations},\n type = {article},\n year = {2007},\n keywords = {Blocking probability,Collective operations; Computation overlaps; Mess,Communication systems; Interfaces (computer); Mess},\n pages = {624-633},\n volume = {33},\n websites = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-34548698431&doi=10.1016%2Fj.parco.2007.06.006&partnerID=40&md5=a0c957b64119789a5c1a85843c694932},\n id = {4182a9b1-7d7e-36ee-b25c-f7fada028215},\n created = {2018-01-09T20:30:38.280Z},\n file_attached = {false},\n profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},\n last_modified = {2018-03-12T19:03:18.886Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Hoefler2007624},\n source_type = {article},\n notes = {cited By 30},\n folder_uuids = {2aba6c14-9027-4f47-8627-0902e1e2342b},\n private_publication = {false},\n abstract = {This paper presents a case study that analyzes the suitability and usage of non-blocking collective operations in parallel applications. As with their point-to-point counterparts, non-blocking collective operations provide the ability to overlap communication with computation and to avoid unnecessary synchronization. These operations are provided for MPI programs with LibNBC, a portable low-overhead implementation of non-blocking collective operations built on MPI-1. The straightforward applicability of the LibNBC is demonstrated by incorporating non-blocking collective operations into a parallel conjugate gradient solver. Although only minor changes are required to use them, non-blocking collective operations allow most of the communication costs to be hidden and provide performance improvements of up to 34%. We also show that, because of overlap, there is no significant performance difference between Gigabit Ethernet and InfiniBandTM for special cases of our calculation. © 2007 Elsevier B.V. All rights reserved.},\n bibtype = {article},\n author = {Hoefler, T and Gottschling, P and Lumsdaine, A and Rehm, W},\n doi = {10.1016/j.parco.2007.06.006},\n journal = {Parallel Computing},\n number = {9}\n}","author_short":["Hoefler, T.","Gottschling, P.","Lumsdaine, A.","Rehm, W."],"urls":{"Website":"https://www.scopus.com/inward/record.uri?eid=2-s2.0-34548698431&doi=10.1016%2Fj.parco.2007.06.006&partnerID=40&md5=a0c957b64119789a5c1a85843c694932"},"biburl":"https://bibbase.org/service/mendeley/42d295c0-0737-38d6-8b43-508cab6ea85d","bibbaseid":"hoefler-gottschling-lumsdaine-rehm-optimizingaconjugategradientsolverwithnonblockingcollectiveoperations-2007","role":"author","keyword":["Blocking probability","Collective operations; Computation overlaps; Mess","Communication systems; Interfaces (computer); Mess"],"metadata":{"authorlinks":{}},"downloads":0},"search_terms":["optimizing","conjugate","gradient","solver","non","blocking","collective","operations","hoefler","gottschling","lumsdaine","rehm"],"keywords":["blocking probability","collective operations; computation overlaps; mess","communication systems; interfaces (computer); mess"],"authorIDs":[],"dataSources":["zgahneP4uAjKbudrQ","ya2CyA73rpZseyrZ8","2252seNhipfTmjEBQ"]}