Mammoth data in the cloud: Clustering social images. Qiu, J. & Zhang, B. Volume 23 , 2013.
doi  abstract   bibtex   
Social image datasets have grown to dramatic size with images classified in vector spaces with high dimension (512-2048) and with potentially billions of images and corresponding classification vectors. We study the challenging problem of clustering such sets into millions of clusters using Iterative MapReduce. We introduce a new Kmeans algorithm in the Map phase which can tackle the challenge of large cluster and dimension size. Further we stress that the necessary parallelism of such data intensive problems are dominated by particular collective operations which are common to MPI and MapReduce and study different collective implementations, which enable cloud-HPC cluster interoperability. Extensive performance results are presented. © 2013 The Authors.
@book{
 title = {Mammoth data in the cloud: Clustering social images},
 type = {book},
 year = {2013},
 source = {Advances in Parallel Computing},
 volume = {23},
 id = {03836e05-16fc-388d-9d06-b193b33472dd},
 created = {2017-11-28T17:32:48.788Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2020-05-11T14:43:30.325Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {false},
 hidden = {false},
 citation_key = {Qiu2013a},
 folder_uuids = {36d8ccf4-7085-47fa-8ab9-897283d082c5},
 private_publication = {false},
 abstract = {Social image datasets have grown to dramatic size with images classified in vector spaces with high dimension (512-2048) and with potentially billions of images and corresponding classification vectors. We study the challenging problem of clustering such sets into millions of clusters using Iterative MapReduce. We introduce a new Kmeans algorithm in the Map phase which can tackle the challenge of large cluster and dimension size. Further we stress that the necessary parallelism of such data intensive problems are dominated by particular collective operations which are common to MPI and MapReduce and study different collective implementations, which enable cloud-HPC cluster interoperability. Extensive performance results are presented. © 2013 The Authors.},
 bibtype = {book},
 author = {Qiu, J. and Zhang, B.},
 doi = {10.3233/978-1-61499-322-3-231}
}

Downloads: 0