Cluster Ensembles — A Knowledge Reuse Framework for Combining Multiple Partitions. Strehl, A. & Ghosh, J. Journal of Machine Learning Research, 3(Dec):583–617, 2002. 311 citations (Semantic Scholar/DOI) [2025-11-01]
Paper doi abstract bibtex This paper introduces the problem of combining multiple partitionings of a set of objects into a single consolidated clustering without accessing the features or algorithms that determined these partitionings. We first identify several application scenarios for the resultant 'knowledge reuse' framework that we call cluster ensembles. The cluster ensemble problem is then formalized as a combinatorial optimization problem in terms of shared mutual information. In addition to a direct maximization approach, we propose three effective and efficient techniques for obtaining high-quality combiners (consensus functions). The first combiner induces a similarity measure from the partitionings and then reclusters the objects. The second combiner is based on hypergraph partitioning. The third one collapses groups of clusters into meta-clusters which then compete for each object to determine the combined clustering. Due to the low computational costs of our techniques, it is quite feasible to use a supra-consensus function that evaluates all three approaches against the objective function and picks the best solution for a given situation. We evaluate the effectiveness of cluster ensembles in three qualitatively different application scenarios: (i) where the original clusters were formed based on non-identical sets of features, (ii) where the original clustering algorithms worked on non-identical sets of objects, and (iii) where a common data-set is used and the main purpose of combining multiple clusterings is to improve the quality and robustness of the solution. Promising results are obtained in all three situations for synthetic as well as real data-sets.
@article{strehl_cluster_2002,
title = {Cluster {Ensembles} --- {A} {Knowledge} {Reuse} {Framework} for {Combining} {Multiple} {Partitions}},
volume = {3},
issn = {ISSN 1533-7928},
url = {https://www.jmlr.org/papers/v3/strehl02a.html},
doi = {10.1162/153244303321897735},
abstract = {This paper introduces the problem of combining multiple partitionings
of a set of objects into a single consolidated clustering without accessing the features or algorithms that determined these
partitionings. We first identify several application scenarios for
the resultant 'knowledge reuse' framework that we call cluster ensembles.
The cluster ensemble problem is then formalized as a combinatorial
optimization problem in terms of shared mutual information. In
addition to a direct maximization approach, we propose three effective
and efficient techniques for obtaining high-quality combiners
(consensus functions). The first combiner induces a similarity
measure from the partitionings and then reclusters the objects. The
second combiner is based on hypergraph partitioning. The third one
collapses groups of clusters into meta-clusters which then compete for
each object to determine the combined clustering. Due to the low
computational costs of our techniques, it is quite feasible to use a
supra-consensus function that evaluates all three approaches against
the objective function and picks the best solution for a given
situation.
We evaluate the effectiveness of cluster ensembles in three
qualitatively different application scenarios: (i) where the original
clusters were formed based on non-identical sets of features, (ii)
where the original clustering algorithms worked on non-identical sets
of objects, and (iii) where a common data-set is used and the main
purpose of combining multiple clusterings is to improve the quality and
robustness of the solution. Promising results are obtained in all
three situations for synthetic as well as real data-sets.},
number = {Dec},
urldate = {2025-11-01},
journal = {Journal of Machine Learning Research},
author = {Strehl, Alexander and Ghosh, Joydeep},
year = {2002},
note = {311 citations (Semantic Scholar/DOI) [2025-11-01]},
pages = {583--617},
}
Downloads: 0
{"_id":"XBFPPBf4F8ZCH84My","bibbaseid":"strehl-ghosh-clusterensemblesaknowledgereuseframeworkforcombiningmultiplepartitions-2002","downloads":0,"creationDate":"2018-02-07T16:22:57.351Z","title":"Cluster Ensembles — A Knowledge Reuse Framework for Combining Multiple Partitions","author_short":["Strehl, A.","Ghosh, J."],"year":2002,"bibtype":"article","biburl":"https://bibbase.org/zotero/hegera","bibdata":{"bibtype":"article","type":"article","title":"Cluster Ensembles — A Knowledge Reuse Framework for Combining Multiple Partitions","volume":"3","issn":"ISSN 1533-7928","url":"https://www.jmlr.org/papers/v3/strehl02a.html","doi":"10.1162/153244303321897735","abstract":"This paper introduces the problem of combining multiple partitionings of a set of objects into a single consolidated clustering without accessing the features or algorithms that determined these partitionings. We first identify several application scenarios for the resultant 'knowledge reuse' framework that we call cluster ensembles. The cluster ensemble problem is then formalized as a combinatorial optimization problem in terms of shared mutual information. In addition to a direct maximization approach, we propose three effective and efficient techniques for obtaining high-quality combiners (consensus functions). The first combiner induces a similarity measure from the partitionings and then reclusters the objects. The second combiner is based on hypergraph partitioning. The third one collapses groups of clusters into meta-clusters which then compete for each object to determine the combined clustering. Due to the low computational costs of our techniques, it is quite feasible to use a supra-consensus function that evaluates all three approaches against the objective function and picks the best solution for a given situation. We evaluate the effectiveness of cluster ensembles in three qualitatively different application scenarios: (i) where the original clusters were formed based on non-identical sets of features, (ii) where the original clustering algorithms worked on non-identical sets of objects, and (iii) where a common data-set is used and the main purpose of combining multiple clusterings is to improve the quality and robustness of the solution. Promising results are obtained in all three situations for synthetic as well as real data-sets.","number":"Dec","urldate":"2025-11-01","journal":"Journal of Machine Learning Research","author":[{"propositions":[],"lastnames":["Strehl"],"firstnames":["Alexander"],"suffixes":[]},{"propositions":[],"lastnames":["Ghosh"],"firstnames":["Joydeep"],"suffixes":[]}],"year":"2002","note":"311 citations (Semantic Scholar/DOI) [2025-11-01]","pages":"583–617","bibtex":"@article{strehl_cluster_2002,\n\ttitle = {Cluster {Ensembles} --- {A} {Knowledge} {Reuse} {Framework} for {Combining} {Multiple} {Partitions}},\n\tvolume = {3},\n\tissn = {ISSN 1533-7928},\n\turl = {https://www.jmlr.org/papers/v3/strehl02a.html},\n\tdoi = {10.1162/153244303321897735},\n\tabstract = {This paper introduces the problem of combining multiple partitionings\nof a set of objects into a single consolidated clustering without accessing the features or algorithms that determined these\npartitionings. We first identify several application scenarios for\nthe resultant 'knowledge reuse' framework that we call cluster ensembles.\nThe cluster ensemble problem is then formalized as a combinatorial\noptimization problem in terms of shared mutual information. In\naddition to a direct maximization approach, we propose three effective\nand efficient techniques for obtaining high-quality combiners\n(consensus functions). The first combiner induces a similarity\nmeasure from the partitionings and then reclusters the objects. The\nsecond combiner is based on hypergraph partitioning. The third one\ncollapses groups of clusters into meta-clusters which then compete for\neach object to determine the combined clustering. Due to the low\ncomputational costs of our techniques, it is quite feasible to use a\nsupra-consensus function that evaluates all three approaches against\nthe objective function and picks the best solution for a given\nsituation.\nWe evaluate the effectiveness of cluster ensembles in three\nqualitatively different application scenarios: (i) where the original\nclusters were formed based on non-identical sets of features, (ii)\nwhere the original clustering algorithms worked on non-identical sets\nof objects, and (iii) where a common data-set is used and the main\npurpose of combining multiple clusterings is to improve the quality and\nrobustness of the solution. Promising results are obtained in all\nthree situations for synthetic as well as real data-sets.},\n\tnumber = {Dec},\n\turldate = {2025-11-01},\n\tjournal = {Journal of Machine Learning Research},\n\tauthor = {Strehl, Alexander and Ghosh, Joydeep},\n\tyear = {2002},\n\tnote = {311 citations (Semantic Scholar/DOI) [2025-11-01]},\n\tpages = {583--617},\n}\n\n\n\n\n\n\n\n","author_short":["Strehl, A.","Ghosh, J."],"key":"strehl_cluster_2002","id":"strehl_cluster_2002","bibbaseid":"strehl-ghosh-clusterensemblesaknowledgereuseframeworkforcombiningmultiplepartitions-2002","role":"author","urls":{"Paper":"https://www.jmlr.org/papers/v3/strehl02a.html"},"metadata":{"authorlinks":{}},"downloads":0,"html":""},"search_terms":["cluster","ensembles","knowledge","reuse","framework","combining","multiple","partitions","strehl","ghosh"],"keywords":[],"authorIDs":[],"dataSources":["MjyMYAXhzMT5PBaZ2"]}