Hymr: a hybrid mapreduce workflow system. Ruan, Y., Guo, Z., Zhou, Y., Qiu, J., & Fox, G., C. In Proceedings of the 3rd international workshop on Emerging computational methods for the life sciences, pages 39-48, 2012. ACM. doi abstract bibtex Many distributed computing models have been developed for high performance processing of large scale scientific data. Among them, MapReduce is a popular and widely used fine grain parallel runtime. Workflows integrate and coordinate distributed and heterogeneous components to solve the computation problem which may contain several MapReduce jobs. However, existing workflow solutions have limited supports for important features such as fault tolerance and efficient execution for iterative applications. In this paper, we propose HyMR: a hybrid MapReduce workflow system based on two different MapReduce frameworks. HyMR optimizes scheduling for individual jobs and supports fault tolerance for the entire workflow pipeline. A distributed file system is used for fast data sharing between jobs. We compare a pipeline using HyMR with the workflow model based on a single MapReduce framework. Our results show that the hybrid model achieves a higher efficiency. © 2012 ACM.
@inproceedings{
title = {Hymr: a hybrid mapreduce workflow system},
type = {inproceedings},
year = {2012},
pages = {39-48},
publisher = {ACM},
id = {bfc6454d-2e97-373d-81b8-9f1430aeadef},
created = {2017-12-18T21:44:04.268Z},
file_attached = {false},
profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
last_modified = {2020-05-11T14:43:45.824Z},
read = {false},
starred = {false},
authored = {true},
confirmed = {true},
hidden = {false},
citation_key = {Ruan2012a},
source_type = {CONF},
folder_uuids = {36d8ccf4-7085-47fa-8ab9-897283d082c5},
private_publication = {false},
abstract = {Many distributed computing models have been developed for high performance processing of large scale scientific data. Among them, MapReduce is a popular and widely used fine grain parallel runtime. Workflows integrate and coordinate distributed and heterogeneous components to solve the computation problem which may contain several MapReduce jobs. However, existing workflow solutions have limited supports for important features such as fault tolerance and efficient execution for iterative applications. In this paper, we propose HyMR: a hybrid MapReduce workflow system based on two different MapReduce frameworks. HyMR optimizes scheduling for individual jobs and supports fault tolerance for the entire workflow pipeline. A distributed file system is used for fast data sharing between jobs. We compare a pipeline using HyMR with the workflow model based on a single MapReduce framework. Our results show that the hybrid model achieves a higher efficiency. © 2012 ACM.},
bibtype = {inproceedings},
author = {Ruan, Yang and Guo, Zhenhua and Zhou, Yuduo and Qiu, Judy and Fox, Geoffrey Charles},
doi = {10.1145/2483954.2483962},
booktitle = {Proceedings of the 3rd international workshop on Emerging computational methods for the life sciences}
}
Downloads: 0
{"_id":"XSwQ3fPKFXri2DRxv","bibbaseid":"ruan-guo-zhou-qiu-fox-hymrahybridmapreduceworkflowsystem-2012","downloads":0,"creationDate":"2018-03-12T19:10:28.007Z","title":"Hymr: a hybrid mapreduce workflow system","author_short":["Ruan, Y.","Guo, Z.","Zhou, Y.","Qiu, J.","Fox, G., C."],"year":2012,"bibtype":"inproceedings","biburl":"https://bibbase.org/service/mendeley/42d295c0-0737-38d6-8b43-508cab6ea85d","bibdata":{"title":"Hymr: a hybrid mapreduce workflow system","type":"inproceedings","year":"2012","pages":"39-48","publisher":"ACM","id":"bfc6454d-2e97-373d-81b8-9f1430aeadef","created":"2017-12-18T21:44:04.268Z","file_attached":false,"profile_id":"42d295c0-0737-38d6-8b43-508cab6ea85d","last_modified":"2020-05-11T14:43:45.824Z","read":false,"starred":false,"authored":"true","confirmed":"true","hidden":false,"citation_key":"Ruan2012a","source_type":"CONF","folder_uuids":"36d8ccf4-7085-47fa-8ab9-897283d082c5","private_publication":false,"abstract":"Many distributed computing models have been developed for high performance processing of large scale scientific data. Among them, MapReduce is a popular and widely used fine grain parallel runtime. Workflows integrate and coordinate distributed and heterogeneous components to solve the computation problem which may contain several MapReduce jobs. However, existing workflow solutions have limited supports for important features such as fault tolerance and efficient execution for iterative applications. In this paper, we propose HyMR: a hybrid MapReduce workflow system based on two different MapReduce frameworks. HyMR optimizes scheduling for individual jobs and supports fault tolerance for the entire workflow pipeline. A distributed file system is used for fast data sharing between jobs. We compare a pipeline using HyMR with the workflow model based on a single MapReduce framework. Our results show that the hybrid model achieves a higher efficiency. © 2012 ACM.","bibtype":"inproceedings","author":"Ruan, Yang and Guo, Zhenhua and Zhou, Yuduo and Qiu, Judy and Fox, Geoffrey Charles","doi":"10.1145/2483954.2483962","booktitle":"Proceedings of the 3rd international workshop on Emerging computational methods for the life sciences","bibtex":"@inproceedings{\n title = {Hymr: a hybrid mapreduce workflow system},\n type = {inproceedings},\n year = {2012},\n pages = {39-48},\n publisher = {ACM},\n id = {bfc6454d-2e97-373d-81b8-9f1430aeadef},\n created = {2017-12-18T21:44:04.268Z},\n file_attached = {false},\n profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},\n last_modified = {2020-05-11T14:43:45.824Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Ruan2012a},\n source_type = {CONF},\n folder_uuids = {36d8ccf4-7085-47fa-8ab9-897283d082c5},\n private_publication = {false},\n abstract = {Many distributed computing models have been developed for high performance processing of large scale scientific data. Among them, MapReduce is a popular and widely used fine grain parallel runtime. Workflows integrate and coordinate distributed and heterogeneous components to solve the computation problem which may contain several MapReduce jobs. However, existing workflow solutions have limited supports for important features such as fault tolerance and efficient execution for iterative applications. In this paper, we propose HyMR: a hybrid MapReduce workflow system based on two different MapReduce frameworks. HyMR optimizes scheduling for individual jobs and supports fault tolerance for the entire workflow pipeline. A distributed file system is used for fast data sharing between jobs. We compare a pipeline using HyMR with the workflow model based on a single MapReduce framework. Our results show that the hybrid model achieves a higher efficiency. © 2012 ACM.},\n bibtype = {inproceedings},\n author = {Ruan, Yang and Guo, Zhenhua and Zhou, Yuduo and Qiu, Judy and Fox, Geoffrey Charles},\n doi = {10.1145/2483954.2483962},\n booktitle = {Proceedings of the 3rd international workshop on Emerging computational methods for the life sciences}\n}","author_short":["Ruan, Y.","Guo, Z.","Zhou, Y.","Qiu, J.","Fox, G., C."],"biburl":"https://bibbase.org/service/mendeley/42d295c0-0737-38d6-8b43-508cab6ea85d","bibbaseid":"ruan-guo-zhou-qiu-fox-hymrahybridmapreduceworkflowsystem-2012","role":"author","urls":{},"metadata":{"authorlinks":{}},"downloads":0},"search_terms":["hymr","hybrid","mapreduce","workflow","system","ruan","guo","zhou","qiu","fox"],"keywords":[],"authorIDs":[],"dataSources":["zgahneP4uAjKbudrQ","ya2CyA73rpZseyrZ8","2252seNhipfTmjEBQ"]}