Hierarchical MapReduce: Towards simplified cross-domain data processing. Luo, Y., Plale, B., Guo, Z., Li, W., Qiu, J., & Sun, Y. Concurrency Computation Practice and Experience, 2014. doi abstract bibtex The MapReduce programming model has proven useful for data-driven high throughput applications. However, the conventional MapReduce model limits itself to scheduling jobs within a single cluster. As job sizes become larger, single-cluster solutions grow increasingly inadequate. We present a hierarchical MapReduce framework that utilizes computation resources from multiple clusters simultaneously to run MapReduce job across them. The applications implemented in this framework adopt the Map-Reduce-GlobalReduce model where computations are expressed as three functions: Map, Reduce, and GlobalReduce. Two scheduling algorithms are proposed, one that targets compute-intensive jobs and another data-intensive jobs, evaluated using a life science application, AutoDock, and a simple Grep. Data management is explored through analysis of the Gfarm file system.Copyright © 2012 John Wiley & Sons, Ltd.
@article{
title = {Hierarchical MapReduce: Towards simplified cross-domain data processing},
type = {article},
year = {2014},
volume = {26},
id = {04793778-e898-3680-aabd-c1a18fe62276},
created = {2019-10-01T17:20:59.890Z},
file_attached = {false},
profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
last_modified = {2019-10-01T17:23:48.289Z},
read = {false},
starred = {false},
authored = {true},
confirmed = {true},
hidden = {false},
citation_key = {Luo2014},
folder_uuids = {73f994b4-a3be-4035-a6dd-3802077ce863,36d8ccf4-7085-47fa-8ab9-897283d082c5},
private_publication = {false},
abstract = {The MapReduce programming model has proven useful for data-driven high throughput applications. However, the conventional MapReduce model limits itself to scheduling jobs within a single cluster. As job sizes become larger, single-cluster solutions grow increasingly inadequate. We present a hierarchical MapReduce framework that utilizes computation resources from multiple clusters simultaneously to run MapReduce job across them. The applications implemented in this framework adopt the Map-Reduce-GlobalReduce model where computations are expressed as three functions: Map, Reduce, and GlobalReduce. Two scheduling algorithms are proposed, one that targets compute-intensive jobs and another data-intensive jobs, evaluated using a life science application, AutoDock, and a simple Grep. Data management is explored through analysis of the Gfarm file system.Copyright © 2012 John Wiley & Sons, Ltd.},
bibtype = {article},
author = {Luo, Y. and Plale, B. and Guo, Z. and Li, W.W. and Qiu, J. and Sun, Y.},
doi = {10.1002/cpe.2929},
journal = {Concurrency Computation Practice and Experience},
number = {4}
}
Downloads: 0
{"_id":"abBSuEpPbvSWn4Qrw","bibbaseid":"luo-plale-guo-li-qiu-sun-hierarchicalmapreducetowardssimplifiedcrossdomaindataprocessing-2014","downloads":0,"creationDate":"2018-03-12T19:10:28.005Z","title":"Hierarchical MapReduce: Towards simplified cross-domain data processing","author_short":["Luo, Y.","Plale, B.","Guo, Z.","Li, W.","Qiu, J.","Sun, Y."],"year":2014,"bibtype":"article","biburl":"https://bibbase.org/service/mendeley/42d295c0-0737-38d6-8b43-508cab6ea85d","bibdata":{"title":"Hierarchical MapReduce: Towards simplified cross-domain data processing","type":"article","year":"2014","volume":"26","id":"04793778-e898-3680-aabd-c1a18fe62276","created":"2019-10-01T17:20:59.890Z","file_attached":false,"profile_id":"42d295c0-0737-38d6-8b43-508cab6ea85d","last_modified":"2019-10-01T17:23:48.289Z","read":false,"starred":false,"authored":"true","confirmed":"true","hidden":false,"citation_key":"Luo2014","folder_uuids":"73f994b4-a3be-4035-a6dd-3802077ce863,36d8ccf4-7085-47fa-8ab9-897283d082c5","private_publication":false,"abstract":"The MapReduce programming model has proven useful for data-driven high throughput applications. However, the conventional MapReduce model limits itself to scheduling jobs within a single cluster. As job sizes become larger, single-cluster solutions grow increasingly inadequate. We present a hierarchical MapReduce framework that utilizes computation resources from multiple clusters simultaneously to run MapReduce job across them. The applications implemented in this framework adopt the Map-Reduce-GlobalReduce model where computations are expressed as three functions: Map, Reduce, and GlobalReduce. Two scheduling algorithms are proposed, one that targets compute-intensive jobs and another data-intensive jobs, evaluated using a life science application, AutoDock, and a simple Grep. Data management is explored through analysis of the Gfarm file system.Copyright © 2012 John Wiley & Sons, Ltd.","bibtype":"article","author":"Luo, Y. and Plale, B. and Guo, Z. and Li, W.W. and Qiu, J. and Sun, Y.","doi":"10.1002/cpe.2929","journal":"Concurrency Computation Practice and Experience","number":"4","bibtex":"@article{\n title = {Hierarchical MapReduce: Towards simplified cross-domain data processing},\n type = {article},\n year = {2014},\n volume = {26},\n id = {04793778-e898-3680-aabd-c1a18fe62276},\n created = {2019-10-01T17:20:59.890Z},\n file_attached = {false},\n profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},\n last_modified = {2019-10-01T17:23:48.289Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Luo2014},\n folder_uuids = {73f994b4-a3be-4035-a6dd-3802077ce863,36d8ccf4-7085-47fa-8ab9-897283d082c5},\n private_publication = {false},\n abstract = {The MapReduce programming model has proven useful for data-driven high throughput applications. However, the conventional MapReduce model limits itself to scheduling jobs within a single cluster. As job sizes become larger, single-cluster solutions grow increasingly inadequate. We present a hierarchical MapReduce framework that utilizes computation resources from multiple clusters simultaneously to run MapReduce job across them. The applications implemented in this framework adopt the Map-Reduce-GlobalReduce model where computations are expressed as three functions: Map, Reduce, and GlobalReduce. Two scheduling algorithms are proposed, one that targets compute-intensive jobs and another data-intensive jobs, evaluated using a life science application, AutoDock, and a simple Grep. Data management is explored through analysis of the Gfarm file system.Copyright © 2012 John Wiley & Sons, Ltd.},\n bibtype = {article},\n author = {Luo, Y. and Plale, B. and Guo, Z. and Li, W.W. and Qiu, J. and Sun, Y.},\n doi = {10.1002/cpe.2929},\n journal = {Concurrency Computation Practice and Experience},\n number = {4}\n}","author_short":["Luo, Y.","Plale, B.","Guo, Z.","Li, W.","Qiu, J.","Sun, Y."],"biburl":"https://bibbase.org/service/mendeley/42d295c0-0737-38d6-8b43-508cab6ea85d","bibbaseid":"luo-plale-guo-li-qiu-sun-hierarchicalmapreducetowardssimplifiedcrossdomaindataprocessing-2014","role":"author","urls":{},"metadata":{"authorlinks":{}},"downloads":0},"search_terms":["hierarchical","mapreduce","towards","simplified","cross","domain","data","processing","luo","plale","guo","li","qiu","sun"],"keywords":[],"authorIDs":[],"dataSources":["zgahneP4uAjKbudrQ","ya2CyA73rpZseyrZ8","2252seNhipfTmjEBQ"]}