Hierarchical MapReduce: Towards simplified cross-domain data processing

Hierarchical MapReduce: Towards simplified cross-domain data processing. Luo, Y., Plale, B., Guo, Z., Li, W., Qiu, J., & Sun, Y. Concurrency Computation Practice and Experience, 2014.
doi abstract bibtex

The MapReduce programming model has proven useful for data-driven high throughput applications. However, the conventional MapReduce model limits itself to scheduling jobs within a single cluster. As job sizes become larger, single-cluster solutions grow increasingly inadequate. We present a hierarchical MapReduce framework that utilizes computation resources from multiple clusters simultaneously to run MapReduce job across them. The applications implemented in this framework adopt the Map-Reduce-GlobalReduce model where computations are expressed as three functions: Map, Reduce, and GlobalReduce. Two scheduling algorithms are proposed, one that targets compute-intensive jobs and another data-intensive jobs, evaluated using a life science application, AutoDock, and a simple Grep. Data management is explored through analysis of the Gfarm file system.Copyright © 2012 John Wiley & Sons, Ltd.

@article{
 title = {Hierarchical MapReduce: Towards simplified cross-domain data processing},
 type = {article},
 year = {2014},
 volume = {26},
 id = {04793778-e898-3680-aabd-c1a18fe62276},
 created = {2019-10-01T17:20:59.890Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2019-10-01T17:23:48.289Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Luo2014},
 folder_uuids = {73f994b4-a3be-4035-a6dd-3802077ce863,36d8ccf4-7085-47fa-8ab9-897283d082c5},
 private_publication = {false},
 abstract = {The MapReduce programming model has proven useful for data-driven high throughput applications. However, the conventional MapReduce model limits itself to scheduling jobs within a single cluster. As job sizes become larger, single-cluster solutions grow increasingly inadequate. We present a hierarchical MapReduce framework that utilizes computation resources from multiple clusters simultaneously to run MapReduce job across them. The applications implemented in this framework adopt the Map-Reduce-GlobalReduce model where computations are expressed as three functions: Map, Reduce, and GlobalReduce. Two scheduling algorithms are proposed, one that targets compute-intensive jobs and another data-intensive jobs, evaluated using a life science application, AutoDock, and a simple Grep. Data management is explored through analysis of the Gfarm file system.Copyright © 2012 John Wiley  &  Sons, Ltd.},
 bibtype = {article},
 author = {Luo, Y. and Plale, B. and Guo, Z. and Li, W.W. and Qiu, J. and Sun, Y.},
 doi = {10.1002/cpe.2929},
 journal = {Concurrency Computation Practice and Experience},
 number = {4}
}

Downloads: 0

{"_id":"abBSuEpPbvSWn4Qrw","bibbaseid":"luo-plale-guo-li-qiu-sun-hierarchicalmapreducetowardssimplifiedcrossdomaindataprocessing-2014","downloads":0,"creationDate":"2018-03-12T19:10:28.005Z","title":"Hierarchical MapReduce: Towards simplified cross-domain data processing","author_short":["Luo, Y.","Plale, B.","Guo, Z.","Li, W.","Qiu, J.","Sun, Y."],"year":2014,"bibtype":"article","biburl":"https://bibbase.org/service/mendeley/42d295c0-0737-38d6-8b43-508cab6ea85d","bibdata":{"title":"Hierarchical MapReduce: Towards simplified cross-domain data processing","type":"article","year":"2014","volume":"26","id":"04793778-e898-3680-aabd-c1a18fe62276","created":"2019-10-01T17:20:59.890Z","file_attached":false,"profile_id":"42d295c0-0737-38d6-8b43-508cab6ea85d","last_modified":"2019-10-01T17:23:48.289Z","read":false,"starred":false,"authored":"true","confirmed":"true","hidden":false,"citation_key":"Luo2014","folder_uuids":"73f994b4-a3be-4035-a6dd-3802077ce863,36d8ccf4-7085-47fa-8ab9-897283d082c5","private_publication":false,"abstract":"The MapReduce programming model has proven useful for data-driven high throughput applications. However, the conventional MapReduce model limits itself to scheduling jobs within a single cluster. As job sizes become larger, single-cluster solutions grow increasingly inadequate. We present a hierarchical MapReduce framework that utilizes computation resources from multiple clusters simultaneously to run MapReduce job across them. The applications implemented in this framework adopt the Map-Reduce-GlobalReduce model where computations are expressed as three functions: Map, Reduce, and GlobalReduce. Two scheduling algorithms are proposed, one that targets compute-intensive jobs and another data-intensive jobs, evaluated using a life science application, AutoDock, and a simple Grep. Data management is explored through analysis of the Gfarm file system.Copyright © 2012 John Wiley & Sons, Ltd.","bibtype":"article","author":"Luo, Y. and Plale, B. and Guo, Z. and Li, W.W. and Qiu, J. and Sun, Y.","doi":"10.1002/cpe.2929","journal":"Concurrency Computation Practice and Experience","number":"4","bibtex":"@article{\n title = {Hierarchical MapReduce: Towards simplified cross-domain data processing},\n type = {article},\n year = {2014},\n volume = {26},\n id = {04793778-e898-3680-aabd-c1a18fe62276},\n created = {2019-10-01T17:20:59.890Z},\n file_attached = {false},\n profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},\n last_modified = {2019-10-01T17:23:48.289Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Luo2014},\n folder_uuids = {73f994b4-a3be-4035-a6dd-3802077ce863,36d8ccf4-7085-47fa-8ab9-897283d082c5},\n private_publication = {false},\n abstract = {The MapReduce programming model has proven useful for data-driven high throughput applications. However, the conventional MapReduce model limits itself to scheduling jobs within a single cluster. As job sizes become larger, single-cluster solutions grow increasingly inadequate. We present a hierarchical MapReduce framework that utilizes computation resources from multiple clusters simultaneously to run MapReduce job across them. The applications implemented in this framework adopt the Map-Reduce-GlobalReduce model where computations are expressed as three functions: Map, Reduce, and GlobalReduce. Two scheduling algorithms are proposed, one that targets compute-intensive jobs and another data-intensive jobs, evaluated using a life science application, AutoDock, and a simple Grep. Data management is explored through analysis of the Gfarm file system.Copyright © 2012 John Wiley & Sons, Ltd.},\n bibtype = {article},\n author = {Luo, Y. and Plale, B. and Guo, Z. and Li, W.W. and Qiu, J. and Sun, Y.},\n doi = {10.1002/cpe.2929},\n journal = {Concurrency Computation Practice and Experience},\n number = {4}\n}","author_short":["Luo, Y.","Plale, B.","Guo, Z.","Li, W.","Qiu, J.","Sun, Y."],"biburl":"https://bibbase.org/service/mendeley/42d295c0-0737-38d6-8b43-508cab6ea85d","bibbaseid":"luo-plale-guo-li-qiu-sun-hierarchicalmapreducetowardssimplifiedcrossdomaindataprocessing-2014","role":"author","urls":{},"metadata":{"authorlinks":{}},"downloads":0},"search_terms":["hierarchical","mapreduce","towards","simplified","cross","domain","data","processing","luo","plale","guo","li","qiu","sun"],"keywords":[],"authorIDs":[],"dataSources":["zgahneP4uAjKbudrQ","ya2CyA73rpZseyrZ8","2252seNhipfTmjEBQ"]}