StreamLeader: A New Stream Clustering Algorithm not Based in Conventional Clustering. Andrés-Merino, J. & Belanche, L. A. In Villa, A. E., Masulli, P., & Pons Rivero, A. J., editors, Artificial Neural Networks and Machine Learning – ICANN 2016, of Lecture Notes in Computer Science, pages 208–215, Cham, 2016. Springer International Publishing. doi abstract bibtex Stream clustering algorithms normally require two phases: an online first step that statistically summarizes the stream while forming special structures – such as micro-clusters– and a second, offline phase, that uses a conventional clustering algorithm taking the micro-clusters as pseudo-points to deliver the final clustering. This procedure tends to produce oversized or overlapping clusters in medium-to-high dimensional spaces, and typically degrades seriously in noisy data environments. In this paper we introduce StreamLeader, a novel stream clustering algorithm suitable to massive data that does not resort to a conventional clustering phase, being based on the notion of Leader Cluster and on an aggressive noise reduction process. We report an extensive systematic testing in which the new algorithm is shown to consistently outperform its contenders both in terms of quality and scalability.
@inproceedings{andres-merino_streamleader_2016,
address = {Cham},
series = {Lecture {Notes} in {Computer} {Science}},
title = {{StreamLeader}: {A} {New} {Stream} {Clustering} {Algorithm} not {Based} in {Conventional} {Clustering}},
isbn = {978-3-319-44781-0},
shorttitle = {{StreamLeader}},
doi = {10.1007/978-3-319-44781-0_25},
abstract = {Stream clustering algorithms normally require two phases: an online first step that statistically summarizes the stream while forming special structures – such as micro-clusters– and a second, offline phase, that uses a conventional clustering algorithm taking the micro-clusters as pseudo-points to deliver the final clustering. This procedure tends to produce oversized or overlapping clusters in medium-to-high dimensional spaces, and typically degrades seriously in noisy data environments. In this paper we introduce StreamLeader, a novel stream clustering algorithm suitable to massive data that does not resort to a conventional clustering phase, being based on the notion of Leader Cluster and on an aggressive noise reduction process. We report an extensive systematic testing in which the new algorithm is shown to consistently outperform its contenders both in terms of quality and scalability.},
language = {en},
booktitle = {Artificial {Neural} {Networks} and {Machine} {Learning} – {ICANN} 2016},
publisher = {Springer International Publishing},
author = {Andrés-Merino, Jaime and Belanche, Lluís A.},
editor = {Villa, Alessandro E.P. and Masulli, Paolo and Pons Rivero, Antonio Javier},
year = {2016},
keywords = {Big Data, Clustering, Stream algorithms},
pages = {208--215},
}
Downloads: 0
{"_id":"9vRNhibX4JxarMmMf","bibbaseid":"andrsmerino-belanche-streamleaderanewstreamclusteringalgorithmnotbasedinconventionalclustering-2016","author_short":["Andrés-Merino, J.","Belanche, L. A."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","address":"Cham","series":"Lecture Notes in Computer Science","title":"StreamLeader: A New Stream Clustering Algorithm not Based in Conventional Clustering","isbn":"978-3-319-44781-0","shorttitle":"StreamLeader","doi":"10.1007/978-3-319-44781-0_25","abstract":"Stream clustering algorithms normally require two phases: an online first step that statistically summarizes the stream while forming special structures – such as micro-clusters– and a second, offline phase, that uses a conventional clustering algorithm taking the micro-clusters as pseudo-points to deliver the final clustering. This procedure tends to produce oversized or overlapping clusters in medium-to-high dimensional spaces, and typically degrades seriously in noisy data environments. In this paper we introduce StreamLeader, a novel stream clustering algorithm suitable to massive data that does not resort to a conventional clustering phase, being based on the notion of Leader Cluster and on an aggressive noise reduction process. We report an extensive systematic testing in which the new algorithm is shown to consistently outperform its contenders both in terms of quality and scalability.","language":"en","booktitle":"Artificial Neural Networks and Machine Learning – ICANN 2016","publisher":"Springer International Publishing","author":[{"propositions":[],"lastnames":["Andrés-Merino"],"firstnames":["Jaime"],"suffixes":[]},{"propositions":[],"lastnames":["Belanche"],"firstnames":["Lluís","A."],"suffixes":[]}],"editor":[{"propositions":[],"lastnames":["Villa"],"firstnames":["Alessandro","E.P."],"suffixes":[]},{"propositions":[],"lastnames":["Masulli"],"firstnames":["Paolo"],"suffixes":[]},{"propositions":[],"lastnames":["Pons","Rivero"],"firstnames":["Antonio","Javier"],"suffixes":[]}],"year":"2016","keywords":"Big Data, Clustering, Stream algorithms","pages":"208–215","bibtex":"@inproceedings{andres-merino_streamleader_2016,\n\taddress = {Cham},\n\tseries = {Lecture {Notes} in {Computer} {Science}},\n\ttitle = {{StreamLeader}: {A} {New} {Stream} {Clustering} {Algorithm} not {Based} in {Conventional} {Clustering}},\n\tisbn = {978-3-319-44781-0},\n\tshorttitle = {{StreamLeader}},\n\tdoi = {10.1007/978-3-319-44781-0_25},\n\tabstract = {Stream clustering algorithms normally require two phases: an online first step that statistically summarizes the stream while forming special structures – such as micro-clusters– and a second, offline phase, that uses a conventional clustering algorithm taking the micro-clusters as pseudo-points to deliver the final clustering. This procedure tends to produce oversized or overlapping clusters in medium-to-high dimensional spaces, and typically degrades seriously in noisy data environments. In this paper we introduce StreamLeader, a novel stream clustering algorithm suitable to massive data that does not resort to a conventional clustering phase, being based on the notion of Leader Cluster and on an aggressive noise reduction process. We report an extensive systematic testing in which the new algorithm is shown to consistently outperform its contenders both in terms of quality and scalability.},\n\tlanguage = {en},\n\tbooktitle = {Artificial {Neural} {Networks} and {Machine} {Learning} – {ICANN} 2016},\n\tpublisher = {Springer International Publishing},\n\tauthor = {Andrés-Merino, Jaime and Belanche, Lluís A.},\n\teditor = {Villa, Alessandro E.P. and Masulli, Paolo and Pons Rivero, Antonio Javier},\n\tyear = {2016},\n\tkeywords = {Big Data, Clustering, Stream algorithms},\n\tpages = {208--215},\n}\n\n\n\n","author_short":["Andrés-Merino, J.","Belanche, L. A."],"editor_short":["Villa, A. E.","Masulli, P.","Pons Rivero, A. J."],"key":"andres-merino_streamleader_2016","id":"andres-merino_streamleader_2016","bibbaseid":"andrsmerino-belanche-streamleaderanewstreamclusteringalgorithmnotbasedinconventionalclustering-2016","role":"author","urls":{},"keyword":["Big Data","Clustering","Stream algorithms"],"metadata":{"authorlinks":{}},"html":""},"bibtype":"inproceedings","biburl":"https://bibbase.org/zotero/mh_lenguyen","dataSources":["iwKepCrWBps7ojhDx"],"keywords":["big data","clustering","stream algorithms"],"search_terms":["streamleader","new","stream","clustering","algorithm","based","conventional","clustering","andrés-merino","belanche"],"title":"StreamLeader: A New Stream Clustering Algorithm not Based in Conventional Clustering","year":2016}