Enhanced semi-supervised learning for multimodal emotion recognition. Zhang, Z., Ringeval, F., Dong, B., Coutinho, E., Marchi, E., & Schuller, B. In ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, volume 2016-May, pages 5185-5189, 3, 2016. IEEE.
Enhanced semi-supervised learning for multimodal emotion recognition [link]Website  doi  abstract   bibtex   
Semi-Supervised Learning (SSL) techniques have found many applications where labeled data is scarce and/or expensive to obtain. However, SSL suffers from various inherent limitations that limit its performance in practical applications. A central problem is that the low performance that a classifier can deliver on challenging recognition tasks reduces the trustability of the automatically labeled data. Another related issue is the noise accumulation problem - instances that are misclassified by the system are still used to train it in future iterations. In this paper, we propose to address both issues in the context of emotion recognition. Initially, we exploit the complementarity between audio-visual features to improve the performance of the classifier during the supervised phase. Then, we iteratively re-evaluate the automatically labeled instances to correct possibly mislabeled data and this enhances the overall confidence of the system's predictions. Experimental results performed on the RECOLA database demonstrate that our methodology delivers a strong performance in the classification of high/low emotional arousal (UAR = 76.5%), and significantly outperforms traditional SSL methods by at least 5.0% (absolute gain).
@inproceedings{
 title = {Enhanced semi-supervised learning for multimodal emotion recognition},
 type = {inproceedings},
 year = {2016},
 keywords = {article,conference},
 pages = {5185-5189},
 volume = {2016-May},
 websites = {http://ieeexplore.ieee.org/document/7472666/},
 month = {3},
 publisher = {IEEE},
 id = {8f1f3221-0c83-30dd-84a1-4992535dbafb},
 created = {2020-05-29T11:51:38.718Z},
 file_attached = {true},
 profile_id = {ffa9027c-806a-3827-93a1-02c42eb146a1},
 last_modified = {2023-05-15T08:14:21.986Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {zhang2016enhancedrecognition},
 source_type = {inproceedings},
 folder_uuids = {aac08d0d-38e7-4f4e-a381-5271c5c099ce},
 private_publication = {false},
 abstract = {Semi-Supervised Learning (SSL) techniques have found many applications where labeled data is scarce and/or expensive to obtain. However, SSL suffers from various inherent limitations that limit its performance in practical applications. A central problem is that the low performance that a classifier can deliver on challenging recognition tasks reduces the trustability of the automatically labeled data. Another related issue is the noise accumulation problem - instances that are misclassified by the system are still used to train it in future iterations. In this paper, we propose to address both issues in the context of emotion recognition. Initially, we exploit the complementarity between audio-visual features to improve the performance of the classifier during the supervised phase. Then, we iteratively re-evaluate the automatically labeled instances to correct possibly mislabeled data and this enhances the overall confidence of the system's predictions. Experimental results performed on the RECOLA database demonstrate that our methodology delivers a strong performance in the classification of high/low emotional arousal (UAR = 76.5%), and significantly outperforms traditional SSL methods by at least 5.0% (absolute gain).},
 bibtype = {inproceedings},
 author = {Zhang, Zixing and Ringeval, Fabien and Dong, Bin and Coutinho, Eduardo and Marchi, Erik and Schuller, Bjorn},
 doi = {10.1109/ICASSP.2016.7472666},
 booktitle = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings}
}

Downloads: 0