Semi-supervised clustering with inaccurate pairwise annotations. Gribel, D., Gendreau, M., & Vidal, T. Information Sciences, 607(C):441-457, 2022.
Website abstract bibtex Pairwise relational information is a useful way of providing partial supervision in domains where class labels are difficult to acquire. This work presents a clustering model that incorporates pairwise annotations in the form of must-link and cannot-link relations and considers possible annotation inaccuracies (i.e., a common setting when experts provide pairwise supervision). We propose a generative model that assumes Gaussian-distributed data samples along with must-link and cannot-link relations generated by stochastic block models. We adopt a maximum-likelihood approach and demonstrate that, even when supervision is weak and inaccurate, accounting for relational information significantly improves clustering performance. Relational information also helps to detect meaningful groups in real-world datasets that do not fit the original data-distribution assumptions. Additionally, we extend the model to integrate prior knowledge of experts' accuracy and discuss circumstances in which the use of this knowledge is beneficial.
@article{
title = {Semi-supervised clustering with inaccurate pairwise annotations},
type = {article},
year = {2022},
pages = {441-457},
volume = {607},
websites = {https://arxiv.org/pdf/2104.02146.pdf},
id = {0330d47e-9f29-373e-ab2f-5783b08a9ae5},
created = {2021-04-07T13:22:58.049Z},
file_attached = {true},
profile_id = {5e3d1dc4-cb58-3af5-aff1-4d943d2eaf6a},
last_modified = {2022-09-09T13:57:33.625Z},
read = {true},
starred = {false},
authored = {true},
confirmed = {true},
hidden = {false},
citation_key = {Gribel2021},
private_publication = {false},
abstract = {Pairwise relational information is a useful way of providing partial supervision in domains where class labels are difficult to acquire. This work presents a clustering model that incorporates pairwise annotations in the form of must-link and cannot-link relations and considers possible annotation inaccuracies (i.e., a common setting when experts provide pairwise supervision). We propose a generative model that assumes Gaussian-distributed data samples along with must-link and cannot-link relations generated by stochastic block models. We adopt a maximum-likelihood approach and demonstrate that, even when supervision is weak and inaccurate, accounting for relational information significantly improves clustering performance. Relational information also helps to detect meaningful groups in real-world datasets that do not fit the original data-distribution assumptions. Additionally, we extend the model to integrate prior knowledge of experts' accuracy and discuss circumstances in which the use of this knowledge is beneficial.},
bibtype = {article},
author = {Gribel, D. and Gendreau, M. and Vidal, T.},
journal = {Information Sciences},
number = {C}
}
Downloads: 0
{"_id":"RNQTYEvirYinRZ2aE","bibbaseid":"gribel-gendreau-vidal-semisupervisedclusteringwithinaccuratepairwiseannotations-2022","author_short":["Gribel, D.","Gendreau, M.","Vidal, T."],"bibdata":{"title":"Semi-supervised clustering with inaccurate pairwise annotations","type":"article","year":"2022","pages":"441-457","volume":"607","websites":"https://arxiv.org/pdf/2104.02146.pdf","id":"0330d47e-9f29-373e-ab2f-5783b08a9ae5","created":"2021-04-07T13:22:58.049Z","file_attached":"true","profile_id":"5e3d1dc4-cb58-3af5-aff1-4d943d2eaf6a","last_modified":"2022-09-09T13:57:33.625Z","read":"true","starred":false,"authored":"true","confirmed":"true","hidden":false,"citation_key":"Gribel2021","private_publication":false,"abstract":"Pairwise relational information is a useful way of providing partial supervision in domains where class labels are difficult to acquire. This work presents a clustering model that incorporates pairwise annotations in the form of must-link and cannot-link relations and considers possible annotation inaccuracies (i.e., a common setting when experts provide pairwise supervision). We propose a generative model that assumes Gaussian-distributed data samples along with must-link and cannot-link relations generated by stochastic block models. We adopt a maximum-likelihood approach and demonstrate that, even when supervision is weak and inaccurate, accounting for relational information significantly improves clustering performance. Relational information also helps to detect meaningful groups in real-world datasets that do not fit the original data-distribution assumptions. Additionally, we extend the model to integrate prior knowledge of experts' accuracy and discuss circumstances in which the use of this knowledge is beneficial.","bibtype":"article","author":"Gribel, D. and Gendreau, M. and Vidal, T.","journal":"Information Sciences","number":"C","bibtex":"@article{\n title = {Semi-supervised clustering with inaccurate pairwise annotations},\n type = {article},\n year = {2022},\n pages = {441-457},\n volume = {607},\n websites = {https://arxiv.org/pdf/2104.02146.pdf},\n id = {0330d47e-9f29-373e-ab2f-5783b08a9ae5},\n created = {2021-04-07T13:22:58.049Z},\n file_attached = {true},\n profile_id = {5e3d1dc4-cb58-3af5-aff1-4d943d2eaf6a},\n last_modified = {2022-09-09T13:57:33.625Z},\n read = {true},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Gribel2021},\n private_publication = {false},\n abstract = {Pairwise relational information is a useful way of providing partial supervision in domains where class labels are difficult to acquire. This work presents a clustering model that incorporates pairwise annotations in the form of must-link and cannot-link relations and considers possible annotation inaccuracies (i.e., a common setting when experts provide pairwise supervision). We propose a generative model that assumes Gaussian-distributed data samples along with must-link and cannot-link relations generated by stochastic block models. We adopt a maximum-likelihood approach and demonstrate that, even when supervision is weak and inaccurate, accounting for relational information significantly improves clustering performance. Relational information also helps to detect meaningful groups in real-world datasets that do not fit the original data-distribution assumptions. Additionally, we extend the model to integrate prior knowledge of experts' accuracy and discuss circumstances in which the use of this knowledge is beneficial.},\n bibtype = {article},\n author = {Gribel, D. and Gendreau, M. and Vidal, T.},\n journal = {Information Sciences},\n number = {C}\n}","author_short":["Gribel, D.","Gendreau, M.","Vidal, T."],"urls":{"Website":"https://arxiv.org/pdf/2104.02146.pdf"},"biburl":"https://bibbase.org/service/mendeley/1465671","bibbaseid":"gribel-gendreau-vidal-semisupervisedclusteringwithinaccuratepairwiseannotations-2022","role":"author","metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://bibbase.org/service/mendeley/1465671","dataSources":["yinfondEAJRbDM9sJ","sempRA6PhmAdGk3yG","2252seNhipfTmjEBQ"],"keywords":[],"search_terms":["semi","supervised","clustering","inaccurate","pairwise","annotations","gribel","gendreau","vidal"],"title":"Semi-supervised clustering with inaccurate pairwise annotations","year":2022}