Twister2: TSet high-performance iterative dataflow. Wickramasinghe, P., Kamburugamuve, S., Govindarajan, K., Abeykoon, V., Widanage, C., Perera, N., Uyar, A., Gunduz, G., Akkas, S., & Fox, G. In 2019 International Conference on High Performance Big Data and Intelligent Systems, HPBD and IS 2019, pages 55-60, 5, 2019. Institute of Electrical and Electronics Engineers Inc..
doi  abstract   bibtex   
The dataflow model is slowly becoming the de facto standard for big data applications. While many popular frameworks are built around the dataflow model, very little research has been done on understanding the inner workings of the dataflow model. This has led to many inefficiencies in existing frameworks. It is important to note that understanding the relation between dataflow and HPC building blocks allows us to address and alleviate many of the fundamental inefficiencies in dataflow by learning from the extensive research literature in the HPC community. In this paper, we present TSet's, the dataflow abstraction of Twister2, which is a big data framework designed for high-performance dataflow and iterative computations. We discuss the dataflow model adopted by TSet's and the rationale behind implementing iteration handling at the worker level. Finally, we evaluate TSet's to show the performance of the framework.
@inproceedings{
 title = {Twister2: TSet high-performance iterative dataflow},
 type = {inproceedings},
 year = {2019},
 keywords = {Batch,Big data,Dataflow,Iterative,Mapreduce,Parallel programming,Stream},
 pages = {55-60},
 month = {5},
 publisher = {Institute of Electrical and Electronics Engineers Inc.},
 day = {1},
 id = {f11ba317-f161-3a42-aba4-bd4d585ce01e},
 created = {2019-10-01T17:21:01.592Z},
 accessed = {2019-08-21},
 file_attached = {true},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2020-05-11T14:43:32.134Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {false},
 hidden = {false},
 citation_key = {Wickramasinghe2019},
 private_publication = {false},
 abstract = {The dataflow model is slowly becoming the de facto standard for big data applications. While many popular frameworks are built around the dataflow model, very little research has been done on understanding the inner workings of the dataflow model. This has led to many inefficiencies in existing frameworks. It is important to note that understanding the relation between dataflow and HPC building blocks allows us to address and alleviate many of the fundamental inefficiencies in dataflow by learning from the extensive research literature in the HPC community. In this paper, we present TSet's, the dataflow abstraction of Twister2, which is a big data framework designed for high-performance dataflow and iterative computations. We discuss the dataflow model adopted by TSet's and the rationale behind implementing iteration handling at the worker level. Finally, we evaluate TSet's to show the performance of the framework.},
 bibtype = {inproceedings},
 author = {Wickramasinghe, Pulasthi and Kamburugamuve, Supun and Govindarajan, Kannan and Abeykoon, Vibhatha and Widanage, Chathura and Perera, Niranda and Uyar, Ahmet and Gunduz, Gurhan and Akkas, Selahattin and Fox, Geoffrey},
 doi = {10.1109/HPBDIS.2019.8735495},
 booktitle = {2019 International Conference on High Performance Big Data and Intelligent Systems, HPBD and IS 2019}
}

Downloads: 0