High performance clustering of social images in a map-collective programming model. Zhang, B. & Qiu, J. In Proceedings of the 4th annual Symposium on Cloud Computing, pages 44, 2013. ACM.
doi  abstract   bibtex   
Large-scale iterative computations are common in many important data mining and machine learning algorithms. Most of these applications can be specified as iterations of MapReduce computations, leading to the Iterative MapReduce programming model [1] for efficient execution of data-intensive iterative computations interoperably between HPC and cloud environments. We observe that a systematic approach to collective communication is essential but notably missing in the current model. Thus we generalize the iterative MapReduce concept to Map-Collective on the premise that large collectives are a distinctive feature of data intensive and data mining applications. To show the necessity of Map-Collective model, this paper studies the implications of large-scale social image clustering problems, where 10-100 million images represented as points in a high dimensional (up to 2048) vector space are required to be divided into 1-10 million clusters. Copyright © 2013 by the Association for Computing Machinery, Inc.
@inproceedings{
 title = {High performance clustering of social images in a map-collective programming model},
 type = {inproceedings},
 year = {2013},
 pages = {44},
 publisher = {ACM},
 id = {cf291953-8de2-3e3f-a773-fe2009d69032},
 created = {2017-12-18T21:44:04.163Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2020-05-11T14:43:45.656Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Zhang2013a},
 source_type = {CONF},
 folder_uuids = {36d8ccf4-7085-47fa-8ab9-897283d082c5},
 private_publication = {false},
 abstract = {Large-scale iterative computations are common in many important data mining and machine learning algorithms. Most of these applications can be specified as iterations of MapReduce computations, leading to the Iterative MapReduce programming model [1] for efficient execution of data-intensive iterative computations interoperably between HPC and cloud environments. We observe that a systematic approach to collective communication is essential but notably missing in the current model. Thus we generalize the iterative MapReduce concept to Map-Collective on the premise that large collectives are a distinctive feature of data intensive and data mining applications. To show the necessity of Map-Collective model, this paper studies the implications of large-scale social image clustering problems, where 10-100 million images represented as points in a high dimensional (up to 2048) vector space are required to be divided into 1-10 million clusters. Copyright © 2013 by the Association for Computing Machinery, Inc.},
 bibtype = {inproceedings},
 author = {Zhang, Bingjing and Qiu, Judy},
 doi = {10.1145/2523616.2525952},
 booktitle = {Proceedings of the 4th annual Symposium on Cloud Computing}
}

Downloads: 0