Accurately measuring overhead, communication time and progression of blocking and nonblocking collective operations at massive scale. Hoefler, T., Schneider, T., & Lumsdaine, A. International Journal of Parallel, Emergent and Distributed Systems, 25(4):241-258, 2010.
Accurately measuring overhead, communication time and progression of blocking and nonblocking collective operations at massive scale [link]Website  doi  abstract   bibtex   
Accurate, reproducible and comparable measurement of the overheads, communication times and progression behaviour of blocking and nonblocking collective operations is a complicated task. Although different measurement schemes for blocking collective operations are implemented in well-known benchmarks, many of these schemes introduce different systematic errors in their measurements. We characterise these errors and select a window-based approach as the most accurate method. However, this approach complicates measurements significantly and introduces clock synchronisation as a new source of errors. We analyse approaches to avoid or correct those errors and develop a scalable synchronisation scheme to conduct benchmarks on massively parallel systems. Our results are compared to the window-based scheme implemented in the SKaMPI benchmarks and show a reduction of the synchronisation overhead by a factor of 16 on 128 processes. We also describe two different measurement schemes for the overhead and asynchronous progress of nonblocking collective communications. An implementation and results of both measurement schemes are presented. © 2010 Taylor & Francis.
@article{
 title = {Accurately measuring overhead, communication time and progression of blocking and nonblocking collective operations at massive scale},
 type = {article},
 year = {2010},
 keywords = {Benchmarking; Errors; Systematic errors; Ubiquito,Collective communications; Collective operations;,Synchronization},
 pages = {241-258},
 volume = {25},
 websites = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-77954594806&doi=10.1080%2F17445760902894688&partnerID=40&md5=ac0100fb8c04aa89c272ee5e559f6126},
 id = {c3ad22a0-358a-3d40-94c0-88f445aa0857},
 created = {2018-01-09T20:30:39.297Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2018-03-12T19:03:17.779Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Hoefler2010241},
 source_type = {article},
 notes = {cited By 10},
 folder_uuids = {2aba6c14-9027-4f47-8627-0902e1e2342b},
 private_publication = {false},
 abstract = {Accurate, reproducible and comparable measurement of the overheads, communication times and progression behaviour of blocking and nonblocking collective operations is a complicated task. Although different measurement schemes for blocking collective operations are implemented in well-known benchmarks, many of these schemes introduce different systematic errors in their measurements. We characterise these errors and select a window-based approach as the most accurate method. However, this approach complicates measurements significantly and introduces clock synchronisation as a new source of errors. We analyse approaches to avoid or correct those errors and develop a scalable synchronisation scheme to conduct benchmarks on massively parallel systems. Our results are compared to the window-based scheme implemented in the SKaMPI benchmarks and show a reduction of the synchronisation overhead by a factor of 16 on 128 processes. We also describe two different measurement schemes for the overhead and asynchronous progress of nonblocking collective communications. An implementation and results of both measurement schemes are presented. © 2010 Taylor & Francis.},
 bibtype = {article},
 author = {Hoefler, T and Schneider, T and Lumsdaine, A},
 doi = {10.1080/17445760902894688},
 journal = {International Journal of Parallel, Emergent and Distributed Systems},
 number = {4}
}

Downloads: 0