Horme: Random access big data analytics. Ruan, G. & Plale, B. In Proceedings - IEEE International Conference on Cluster Computing, ICCC, 2016.
doi  abstract   bibtex   
© 2016 IEEE. MapReduce is a parallel framework which has been widely adopted for conducting large-scale data analytics. In cases where analysis of multiple millions of books must be analyzed using federally funded high performance computing (HPC) resources, the framework fails to port directly. We propose a solution that builds off of MapReduce for use on a HPC system that preserves the key-value semantics of map-reduce while supporting the random access of query access for subsetting Big Data datasets, and at same time hosting the service using the storage medium found in HPC architectures (parallel file systems) for reduced latencies. Experimental results demonstrate Horme's good performance in the HPC setting, with up to 41.4% faster than NoSQL based solution in random access scenario.
@inproceedings{
 title = {Horme: Random access big data analytics},
 type = {inproceedings},
 year = {2016},
 id = {42514b76-cb81-39ef-bf6d-bf3e950c6cdd},
 created = {2018-03-05T18:20:25.036Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 group_id = {9d761a94-2f2d-31ce-a8c3-50aa6d668643},
 last_modified = {2018-03-05T18:20:25.036Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {false},
 hidden = {false},
 citation_key = {Ruan2016},
 private_publication = {false},
 abstract = {© 2016 IEEE. MapReduce is a parallel framework which has been widely adopted for conducting large-scale data analytics. In cases where analysis of multiple millions of books must be analyzed using federally funded high performance computing (HPC) resources, the framework fails to port directly. We propose a solution that builds off of MapReduce for use on a HPC system that preserves the key-value semantics of map-reduce while supporting the random access of query access for subsetting Big Data datasets, and at same time hosting the service using the storage medium found in HPC architectures (parallel file systems) for reduced latencies. Experimental results demonstrate Horme's good performance in the HPC setting, with up to 41.4% faster than NoSQL based solution in random access scenario.},
 bibtype = {inproceedings},
 author = {Ruan, G. and Plale, B.},
 doi = {10.1109/CLUSTER.2016.27},
 booktitle = {Proceedings - IEEE International Conference on Cluster Computing, ICCC}
}

Downloads: 0