Harp: Collective communication on hadoop. Zhang, B., Ruan, Y., & Qiu, J. In Cloud Engineering (IC2E), 2015 IEEE International Conference on, pages 228-233, 2015. IEEE.
doi  abstract   bibtex   
© 2015 IEEE. Big data processing tools have evolved rapidly in recent years. MapReduce has proven very successful but is not optimized for many important analytics, especially those involving iteration. In this regard, Iterative MapReduce frameworks improve performance of MapReduce job chains through caching. Further, Pregel, Giraph and GraphLab abstract data as a graph and process it in iterations. But all these tools are designed with a fixed data abstraction and have limited collective communication support to synchronize application data and algorithm control states among parallel processes. In this paper, we introduce a collective communication abstraction layer which provides efficient collective communication operations on several common data abstractions such as arrays, key-values and graphs, and define a MapCollective programming model which serves the diverse collective communication demands in different parallel algorithms. We implement a library called Harp to provide the features above and plug it into Hadoop so that applications abstracted in MapCollective model can be easily developed on top of MapReduce framework and conveniently integrated with other tools in Apache Big Data Stack. With improved expressiveness in the abstraction and excellent performance on the implementation, we can simultaneously support various applications from HPC to Cloud systems together with high performance.
@inproceedings{
 title = {Harp: Collective communication on hadoop},
 type = {inproceedings},
 year = {2015},
 pages = {228-233},
 publisher = {IEEE},
 id = {dbf7b1bb-56b9-33fd-b67c-ab39915211ab},
 created = {2017-12-18T21:44:04.370Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2020-05-11T14:43:45.473Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Zhang2015b},
 source_type = {CONF},
 folder_uuids = {36d8ccf4-7085-47fa-8ab9-897283d082c5},
 private_publication = {false},
 abstract = {© 2015 IEEE. Big data processing tools have evolved rapidly in recent years. MapReduce has proven very successful but is not optimized for many important analytics, especially those involving iteration. In this regard, Iterative MapReduce frameworks improve performance of MapReduce job chains through caching. Further, Pregel, Giraph and GraphLab abstract data as a graph and process it in iterations. But all these tools are designed with a fixed data abstraction and have limited collective communication support to synchronize application data and algorithm control states among parallel processes. In this paper, we introduce a collective communication abstraction layer which provides efficient collective communication operations on several common data abstractions such as arrays, key-values and graphs, and define a MapCollective programming model which serves the diverse collective communication demands in different parallel algorithms. We implement a library called Harp to provide the features above and plug it into Hadoop so that applications abstracted in MapCollective model can be easily developed on top of MapReduce framework and conveniently integrated with other tools in Apache Big Data Stack. With improved expressiveness in the abstraction and excellent performance on the implementation, we can simultaneously support various applications from HPC to Cloud systems together with high performance.},
 bibtype = {inproceedings},
 author = {Zhang, Bingjing and Ruan, Yang and Qiu, Judy},
 doi = {10.1109/IC2E.2015.35},
 booktitle = {Cloud Engineering (IC2E), 2015 IEEE International Conference on}
}

Downloads: 0