Twister2: Design of a big data toolkit

Twister2: Design of a big data toolkit. Kamburugamuve, S., Govindarajan, K., Wickramasinghe, P., Abeykoon, V., & Fox, G. Concurrency Computation: Practice and Experience, John Wiley and Sons Ltd, 2019.

Paper doi abstract bibtex

Summary Data-driven applications are essential to handle the ever-increasing volume, velocity, and veracity of data generated by sources such as the Web and Internet of Things (IoT) devices. Simultaneously, an event-driven computational paradigm is emerging as the core of modern systems designed for database queries, data analytics, and on-demand applications. Modern big data processing runtimes and asynchronous many task (AMT) systems from high performance computing (HPC) community have adopted dataflow event-driven model. The services are increasingly moving to an event-driven model in the form of Function as a Service (FaaS) to compose services. An event-driven runtime designed for data processing consists of well-understood components such as communication, scheduling, and fault tolerance. Different design choices adopted by these components determine the type of applications a system can support efficiently. We find that modern systems are limited to specific sets of applications because they have been designed with fixed choices that cannot be changed easily. In this paper, we present a loosely coupled component-based design of a big data toolkit where each component can have different implementations to support various applications. Such a polymorphic design would allow services and data analytics to be integrated seamlessly and expand from edge to cloud to HPC environments.

@article{
 title = {Twister2: Design of a big data toolkit},
 type = {article},
 year = {2019},
 keywords = {big data,dataflow,event-driven computing,high performance computing},
 publisher = {John Wiley and Sons Ltd},
 id = {c887ff50-bb65-3ac4-b78e-0ed6f691138d},
 created = {2019-10-01T17:20:54.329Z},
 accessed = {2019-08-21},
 file_attached = {true},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2020-05-11T14:43:33.121Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {false},
 hidden = {false},
 citation_key = {Kamburugamuve2019},
 private_publication = {false},
 abstract = {Summary Data-driven applications are essential to handle the ever-increasing volume, velocity, and veracity of data generated by sources such as the Web and Internet of Things (IoT) devices. Simultaneously, an event-driven computational paradigm is emerging as the core of modern systems designed for database queries, data analytics, and on-demand applications. Modern big data processing runtimes and asynchronous many task (AMT) systems from high performance computing (HPC) community have adopted dataflow event-driven model. The services are increasingly moving to an event-driven model in the form of Function as a Service (FaaS) to compose services. An event-driven runtime designed for data processing consists of well-understood components such as communication, scheduling, and fault tolerance. Different design choices adopted by these components determine the type of applications a system can support efficiently. We find that modern systems are limited to specific sets of applications because they have been designed with fixed choices that cannot be changed easily. In this paper, we present a loosely coupled component-based design of a big data toolkit where each component can have different implementations to support various applications. Such a polymorphic design would allow services and data analytics to be integrated seamlessly and expand from edge to cloud to HPC environments.},
 bibtype = {article},
 author = {Kamburugamuve, Supun and Govindarajan, Kannan and Wickramasinghe, Pulasthi and Abeykoon, Vibhatha and Fox, Geoffrey},
 doi = {10.1002/cpe.5189},
 journal = {Concurrency Computation: Practice and Experience}
}

Downloads: 0

{"_id":"ZFyWfd9eryk8nJCmv","bibbaseid":"kamburugamuve-govindarajan-wickramasinghe-abeykoon-fox-twister2designofabigdatatoolkit-2019","authorIDs":[],"author_short":["Kamburugamuve, S.","Govindarajan, K.","Wickramasinghe, P.","Abeykoon, V.","Fox, G."],"bibdata":{"title":"Twister2: Design of a big data toolkit","type":"article","year":"2019","keywords":"big data,dataflow,event-driven computing,high performance computing","publisher":"John Wiley and Sons Ltd","id":"c887ff50-bb65-3ac4-b78e-0ed6f691138d","created":"2019-10-01T17:20:54.329Z","accessed":"2019-08-21","file_attached":"true","profile_id":"42d295c0-0737-38d6-8b43-508cab6ea85d","last_modified":"2020-05-11T14:43:33.121Z","read":false,"starred":false,"authored":"true","confirmed":false,"hidden":false,"citation_key":"Kamburugamuve2019","private_publication":false,"abstract":"Summary Data-driven applications are essential to handle the ever-increasing volume, velocity, and veracity of data generated by sources such as the Web and Internet of Things (IoT) devices. Simultaneously, an event-driven computational paradigm is emerging as the core of modern systems designed for database queries, data analytics, and on-demand applications. Modern big data processing runtimes and asynchronous many task (AMT) systems from high performance computing (HPC) community have adopted dataflow event-driven model. The services are increasingly moving to an event-driven model in the form of Function as a Service (FaaS) to compose services. An event-driven runtime designed for data processing consists of well-understood components such as communication, scheduling, and fault tolerance. Different design choices adopted by these components determine the type of applications a system can support efficiently. We find that modern systems are limited to specific sets of applications because they have been designed with fixed choices that cannot be changed easily. In this paper, we present a loosely coupled component-based design of a big data toolkit where each component can have different implementations to support various applications. Such a polymorphic design would allow services and data analytics to be integrated seamlessly and expand from edge to cloud to HPC environments.","bibtype":"article","author":"Kamburugamuve, Supun and Govindarajan, Kannan and Wickramasinghe, Pulasthi and Abeykoon, Vibhatha and Fox, Geoffrey","doi":"10.1002/cpe.5189","journal":"Concurrency Computation: Practice and Experience","bibtex":"@article{\n title = {Twister2: Design of a big data toolkit},\n type = {article},\n year = {2019},\n keywords = {big data,dataflow,event-driven computing,high performance computing},\n publisher = {John Wiley and Sons Ltd},\n id = {c887ff50-bb65-3ac4-b78e-0ed6f691138d},\n created = {2019-10-01T17:20:54.329Z},\n accessed = {2019-08-21},\n file_attached = {true},\n profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},\n last_modified = {2020-05-11T14:43:33.121Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {false},\n hidden = {false},\n citation_key = {Kamburugamuve2019},\n private_publication = {false},\n abstract = {Summary Data-driven applications are essential to handle the ever-increasing volume, velocity, and veracity of data generated by sources such as the Web and Internet of Things (IoT) devices. Simultaneously, an event-driven computational paradigm is emerging as the core of modern systems designed for database queries, data analytics, and on-demand applications. Modern big data processing runtimes and asynchronous many task (AMT) systems from high performance computing (HPC) community have adopted dataflow event-driven model. The services are increasingly moving to an event-driven model in the form of Function as a Service (FaaS) to compose services. An event-driven runtime designed for data processing consists of well-understood components such as communication, scheduling, and fault tolerance. Different design choices adopted by these components determine the type of applications a system can support efficiently. We find that modern systems are limited to specific sets of applications because they have been designed with fixed choices that cannot be changed easily. In this paper, we present a loosely coupled component-based design of a big data toolkit where each component can have different implementations to support various applications. Such a polymorphic design would allow services and data analytics to be integrated seamlessly and expand from edge to cloud to HPC environments.},\n bibtype = {article},\n author = {Kamburugamuve, Supun and Govindarajan, Kannan and Wickramasinghe, Pulasthi and Abeykoon, Vibhatha and Fox, Geoffrey},\n doi = {10.1002/cpe.5189},\n journal = {Concurrency Computation: Practice and Experience}\n}","author_short":["Kamburugamuve, S.","Govindarajan, K.","Wickramasinghe, P.","Abeykoon, V.","Fox, G."],"urls":{"Paper":"https://bibbase.org/service/mendeley/42d295c0-0737-38d6-8b43-508cab6ea85d/file/2588efbb-9518-9d4d-7928-c32877664676/Kamburugamuve_et_al___2019___Twister2_Design_of_a_big_data_toolkit2.pdf.pdf"},"biburl":"https://bibbase.org/service/mendeley/42d295c0-0737-38d6-8b43-508cab6ea85d","bibbaseid":"kamburugamuve-govindarajan-wickramasinghe-abeykoon-fox-twister2designofabigdatatoolkit-2019","role":"author","keyword":["big data","dataflow","event-driven computing","high performance computing"],"metadata":{"authorlinks":{}},"downloads":0},"bibtype":"article","creationDate":"2019-08-21T20:36:59.237Z","downloads":0,"keywords":["big data","dataflow","event-driven computing","high performance computing"],"search_terms":["twister2","design","big","data","toolkit","kamburugamuve","govindarajan","wickramasinghe","abeykoon","fox"],"title":"Twister2: Design of a big data toolkit","year":2019,"biburl":"https://bibbase.org/service/mendeley/42d295c0-0737-38d6-8b43-508cab6ea85d","dataSources":["zgahneP4uAjKbudrQ","ya2CyA73rpZseyrZ8","2252seNhipfTmjEBQ"]}