A tale of two data-intensive paradigms: Applications, abstractions, and architectures. Jha, S., Qiu, J., Luckow, A., Mantha, P., & Fox, G., C., G. In Big Data (BigData Congress), 2014 IEEE International Congress on, pages 645-652, 2014. IEEE.
doi  abstract   bibtex   
© 2014 IEEE. Scientific problems that depend on processing largeamounts of data require overcoming challenges in multiple areas:managing large-scale data distribution, co-placement andscheduling of data with compute resources, and storing and transferringlarge volumes of data. We analyze the ecosystems of thetwo prominent paradigms for data-intensive applications, hereafterreferred to as the high-performance computing and theApache-Hadoop paradigm. We propose a basis, common terminologyand functional factors upon which to analyze the two approachesof both paradigms. We discuss the concept of 'Big DataOgres' and their facets as means of understanding and characterizingthe most common application workloads found acrossthe two paradigms. We then discuss the salient features of thetwo paradigms, and compare and contrast the two approaches.Specifically, we examine common implementation/approaches ofthese paradigms, shed light upon the reasons for their current'architecture' and discuss some typical workloads that utilizethem. In spite of the significant software distinctions, we believethere is architectural similarity. We discuss the potential integrationof different implementations, across the different levelsand components. Our comparison progresses from a fully qualitativeexamination of the two paradigms, to a semi-quantitativemethodology. We use a simple and broadly used Ogre (K-meansclustering), characterize its performance on a range of representativeplatforms, covering several implementations from bothparadigms. Our experiments provide an insight into the relativestrengths of the two paradigms. We propose that the set of Ogreswill serve as a benchmark to evaluate the two paradigms alongdifferent dimensions.
@inproceedings{
 title = {A tale of two data-intensive paradigms: Applications, abstractions, and architectures},
 type = {inproceedings},
 year = {2014},
 pages = {645-652},
 publisher = {IEEE},
 id = {9f8cd8c0-ef36-33d5-8d01-46b03af1d0e6},
 created = {2017-12-18T21:44:04.703Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2020-05-11T14:43:45.278Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Jha2014},
 source_type = {CONF},
 folder_uuids = {36d8ccf4-7085-47fa-8ab9-897283d082c5},
 private_publication = {false},
 abstract = {© 2014 IEEE. Scientific problems that depend on processing largeamounts of data require overcoming challenges in multiple areas:managing large-scale data distribution, co-placement andscheduling of data with compute resources, and storing and transferringlarge volumes of data. We analyze the ecosystems of thetwo prominent paradigms for data-intensive applications, hereafterreferred to as the high-performance computing and theApache-Hadoop paradigm. We propose a basis, common terminologyand functional factors upon which to analyze the two approachesof both paradigms. We discuss the concept of 'Big DataOgres' and their facets as means of understanding and characterizingthe most common application workloads found acrossthe two paradigms. We then discuss the salient features of thetwo paradigms, and compare and contrast the two approaches.Specifically, we examine common implementation/approaches ofthese paradigms, shed light upon the reasons for their current'architecture' and discuss some typical workloads that utilizethem. In spite of the significant software distinctions, we believethere is architectural similarity. We discuss the potential integrationof different implementations, across the different levelsand components. Our comparison progresses from a fully qualitativeexamination of the two paradigms, to a semi-quantitativemethodology. We use a simple and broadly used Ogre (K-meansclustering), characterize its performance on a range of representativeplatforms, covering several implementations from bothparadigms. Our experiments provide an insight into the relativestrengths of the two paradigms. We propose that the set of Ogreswill serve as a benchmark to evaluate the two paradigms alongdifferent dimensions.},
 bibtype = {inproceedings},
 author = {Jha, Shantenu and Qiu, Judy and Luckow, Andre and Mantha, Pradeep and Fox, Geoffrey C G.C.},
 doi = {10.1109/BigData.Congress.2014.137},
 booktitle = {Big Data (BigData Congress), 2014 IEEE International Congress on}
}

Downloads: 0