Semi-supervised clustering with inaccurate pairwise annotations. Gribel, D., Gendreau, M., & Vidal, T. Information Sciences, 607(C):441–457, 2022. Paper abstract bibtex Pairwise relational information is a useful way of providing partial supervision in domains where class labels are difficult to acquire. This work presents a clustering model that incorporates pairwise annotations in the form of must-link and cannot-link relations and considers possible annotation inaccuracies (i.e., a common setting when experts provide pairwise supervision). We propose a generative model that assumes Gaussian-distributed data samples along with must-link and cannot-link relations generated by stochastic block models. We adopt a maximum-likelihood approach and demonstrate that, even when supervision is weak and inaccurate, accounting for relational information significantly improves clustering performance. Relational information also helps to detect meaningful groups in real-world datasets that do not fit the original data-distribution assumptions. Additionally, we extend the model to integrate prior knowledge of experts' accuracy and discuss circumstances in which the use of this knowledge is beneficial.
@article{Gribel2021,
abstract = {Pairwise relational information is a useful way of providing partial supervision in domains where class labels are difficult to acquire. This work presents a clustering model that incorporates pairwise annotations in the form of must-link and cannot-link relations and considers possible annotation inaccuracies (i.e., a common setting when experts provide pairwise supervision). We propose a generative model that assumes Gaussian-distributed data samples along with must-link and cannot-link relations generated by stochastic block models. We adopt a maximum-likelihood approach and demonstrate that, even when supervision is weak and inaccurate, accounting for relational information significantly improves clustering performance. Relational information also helps to detect meaningful groups in real-world datasets that do not fit the original data-distribution assumptions. Additionally, we extend the model to integrate prior knowledge of experts' accuracy and discuss circumstances in which the use of this knowledge is beneficial.},
archivePrefix = {arXiv},
arxivId = {2104.02146},
author = {Gribel, D. and Gendreau, M. and Vidal, T.},
eprint = {2104.02146},
file = {:C$\backslash$:/Users/Thibaut/Documents/Mendeley-Articles/Gribel, Gendreau, Vidal/Gribel, Gendreau, Vidal - 2022 - Semi-supervised clustering with inaccurate pairwise annotations.pdf:pdf},
journal = {Information Sciences},
number = {C},
pages = {441--457},
title = {{Semi-supervised clustering with inaccurate pairwise annotations}},
url = {https://arxiv.org/pdf/2104.02146.pdf},
volume = {607},
year = {2022}
}
Downloads: 0
{"_id":"RNQTYEvirYinRZ2aE","bibbaseid":"gribel-gendreau-vidal-semisupervisedclusteringwithinaccuratepairwiseannotations-2022","author_short":["Gribel, D.","Gendreau, M.","Vidal, T."],"bibdata":{"bibtype":"article","type":"article","abstract":"Pairwise relational information is a useful way of providing partial supervision in domains where class labels are difficult to acquire. This work presents a clustering model that incorporates pairwise annotations in the form of must-link and cannot-link relations and considers possible annotation inaccuracies (i.e., a common setting when experts provide pairwise supervision). We propose a generative model that assumes Gaussian-distributed data samples along with must-link and cannot-link relations generated by stochastic block models. We adopt a maximum-likelihood approach and demonstrate that, even when supervision is weak and inaccurate, accounting for relational information significantly improves clustering performance. Relational information also helps to detect meaningful groups in real-world datasets that do not fit the original data-distribution assumptions. Additionally, we extend the model to integrate prior knowledge of experts' accuracy and discuss circumstances in which the use of this knowledge is beneficial.","archiveprefix":"arXiv","arxivid":"2104.02146","author":[{"propositions":[],"lastnames":["Gribel"],"firstnames":["D."],"suffixes":[]},{"propositions":[],"lastnames":["Gendreau"],"firstnames":["M."],"suffixes":[]},{"propositions":[],"lastnames":["Vidal"],"firstnames":["T."],"suffixes":[]}],"eprint":"2104.02146","file":":C$\\$:/Users/Thibaut/Documents/Mendeley-Articles/Gribel, Gendreau, Vidal/Gribel, Gendreau, Vidal - 2022 - Semi-supervised clustering with inaccurate pairwise annotations.pdf:pdf","journal":"Information Sciences","number":"C","pages":"441–457","title":"Semi-supervised clustering with inaccurate pairwise annotations","url":"https://arxiv.org/pdf/2104.02146.pdf","volume":"607","year":"2022","bibtex":"@article{Gribel2021,\nabstract = {Pairwise relational information is a useful way of providing partial supervision in domains where class labels are difficult to acquire. This work presents a clustering model that incorporates pairwise annotations in the form of must-link and cannot-link relations and considers possible annotation inaccuracies (i.e., a common setting when experts provide pairwise supervision). We propose a generative model that assumes Gaussian-distributed data samples along with must-link and cannot-link relations generated by stochastic block models. We adopt a maximum-likelihood approach and demonstrate that, even when supervision is weak and inaccurate, accounting for relational information significantly improves clustering performance. Relational information also helps to detect meaningful groups in real-world datasets that do not fit the original data-distribution assumptions. Additionally, we extend the model to integrate prior knowledge of experts' accuracy and discuss circumstances in which the use of this knowledge is beneficial.},\narchivePrefix = {arXiv},\narxivId = {2104.02146},\nauthor = {Gribel, D. and Gendreau, M. and Vidal, T.},\neprint = {2104.02146},\nfile = {:C$\\backslash$:/Users/Thibaut/Documents/Mendeley-Articles/Gribel, Gendreau, Vidal/Gribel, Gendreau, Vidal - 2022 - Semi-supervised clustering with inaccurate pairwise annotations.pdf:pdf},\njournal = {Information Sciences},\nnumber = {C},\npages = {441--457},\ntitle = {{Semi-supervised clustering with inaccurate pairwise annotations}},\nurl = {https://arxiv.org/pdf/2104.02146.pdf},\nvolume = {607},\nyear = {2022}\n}\n","author_short":["Gribel, D.","Gendreau, M.","Vidal, T."],"key":"Gribel2021","id":"Gribel2021","bibbaseid":"gribel-gendreau-vidal-semisupervisedclusteringwithinaccuratepairwiseannotations-2022","role":"author","urls":{"Paper":"https://arxiv.org/pdf/2104.02146.pdf"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://w1.cirrelt.ca/~vidalt/resources/My%20Collection.bib","dataSources":["yinfondEAJRbDM9sJ","sempRA6PhmAdGk3yG"],"keywords":[],"search_terms":["semi","supervised","clustering","inaccurate","pairwise","annotations","gribel","gendreau","vidal"],"title":"Semi-supervised clustering with inaccurate pairwise annotations","year":2022}