Benchmarking Harp-DAAL: High Performance Hadoop on KNL Clusters. Chen, L., Peng, B., Zhang, B., Liu, T., Zou, Y., Jiang, L., Henschel, R., Stewart, C., Zhang, Z., McCallum, E., Tom, Z., Jon, O., & Qiu, J. In 2017 IEEE 10th International Conference on Cloud Computing (CLOUD), volume 2017-June, pages 82-89, 6, 2017. IEEE.
Benchmarking Harp-DAAL: High Performance Hadoop on KNL Clusters [link]Website  doi  abstract   bibtex   
Data analytics is undergoing a revolution in many scientific domains, and demands cost-effective parallel data analysis techniques. Traditional Java-based Big Data processing tools like Hadoop MapReduce are designed for commodity CPUs. In contrast, emerging manycore processors like the Xeon Phi have an order of magnitude greater computation power and memory bandwidth. To harness their computing capabilities, we propose the Harp-DAAL framework. We show that enhanced versions of MapReduce can be replaced by Harp, a Hadoop plug-in, that offers useful data abstractions for both high-performance iterative computation and MPI-quality communication, as well as drive Intel's native DAAL library. We select a subset of three machine learning algorithms and implement them within Harp-DAAL. Our scalability benchmarks ran on Knights Landing (KNL) clusters and achieved up to 2.5 times speedup of performance over the HPC solution in NOMAD and 15 to 40 times speedup over Java-based solutions in Spark. We further quantify the workloads on single node KNL with a performance breakdown at the micro-architecture level.
@inproceedings{
 title = {Benchmarking Harp-DAAL: High Performance Hadoop on KNL Clusters},
 type = {inproceedings},
 year = {2017},
 keywords = {BigData,HPC,Xeon Phi},
 pages = {82-89},
 volume = {2017-June},
 websites = {http://ieeexplore.ieee.org/document/8030575/},
 month = {6},
 publisher = {IEEE},
 day = {8},
 id = {711b172a-6620-334b-96d4-aa9c1dd94ceb},
 created = {2020-09-09T19:45:43.999Z},
 accessed = {2020-09-09},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2020-09-15T22:44:01.120Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Chen2017c},
 folder_uuids = {3b35931e-fb6d-48f9-8e01-87ee16ef0331},
 private_publication = {false},
 abstract = {Data analytics is undergoing a revolution in many scientific domains, and demands cost-effective parallel data analysis techniques. Traditional Java-based Big Data processing tools like Hadoop MapReduce are designed for commodity CPUs. In contrast, emerging manycore processors like the Xeon Phi have an order of magnitude greater computation power and memory bandwidth. To harness their computing capabilities, we propose the Harp-DAAL framework. We show that enhanced versions of MapReduce can be replaced by Harp, a Hadoop plug-in, that offers useful data abstractions for both high-performance iterative computation and MPI-quality communication, as well as drive Intel's native DAAL library. We select a subset of three machine learning algorithms and implement them within Harp-DAAL. Our scalability benchmarks ran on Knights Landing (KNL) clusters and achieved up to 2.5 times speedup of performance over the HPC solution in NOMAD and 15 to 40 times speedup over Java-based solutions in Spark. We further quantify the workloads on single node KNL with a performance breakdown at the micro-architecture level.},
 bibtype = {inproceedings},
 author = {Chen, Langshi and Peng, Bo and Zhang, Bingjing and Liu, Tony and Zou, Yiming and Jiang, Lei and Henschel, Robert and Stewart, Craig and Zhang, Zhang and McCallum, Emily and Tom, Zahniser and Jon, Omer and Qiu, Judy},
 doi = {10.1109/CLOUD.2017.19},
 booktitle = {2017 IEEE 10th International Conference on Cloud Computing (CLOUD)}
}

Downloads: 0