The multiple voices of musical emotions: source separation for improving music emotion recognition models and their interpretability

The multiple voices of musical emotions: source separation for improving music emotion recognition models and their interpretability. de Berardinis, J., Cangelosi, A., & Coutinho, E. In Cumming, J., Lee, J., H., McFee, B., Schedl, M., Devaney, J., McKay, C., Zangerle, E., & de Reuse, T., editors, Proceedings of the 21st International Society for Music Information Retrieval Conference, pages 310-217, 2020. International Society for Music Information Retrieval.

Website abstract bibtex 2 downloads

Despite the manifold developments in music emotion recognition and related areas, estimating the emotional impact of music still poses many challenges. These are often associated to the complexity of the acoustic codes to emotion and the lack of large amounts of data with robust golden standards. In this paper, we propose a new computational model (EmoMucs) that considers the role of different musical voices in the prediction of the emotions induced by music. We combine source separation algorithms for breaking up music signals into independent song elements (vocals, bass, drums, other) and end-to-end state-of-the-art machine learning techniques for feature extraction and emotion modelling (valence and arousal regression). Through a series of computational experiments on a benchmark dataset using source-specialised models trained independently and different fusion strategies, we demonstrate that EmoMucs outperforms state-of-the-art approaches with the advantage of providing insights into the relative contribution of different musical elements to the emotions perceived by listeners.

@inproceedings{
 title = {The multiple voices of musical emotions: source separation for improving music emotion recognition models and their interpretability},
 type = {inproceedings},
 year = {2020},
 pages = {310-217},
 websites = {https://www.ismir2020.net/assets/img/proceedings/2020_ISMIR_Proceedings.pdf},
 publisher = {International Society for Music Information Retrieval},
 city = {Montréal, Québec, Canada},
 id = {d352dc1b-deb3-348f-8e54-c7b1f689089a},
 created = {2020-10-12T15:41:02.947Z},
 file_attached = {true},
 profile_id = {ffa9027c-806a-3827-93a1-02c42eb146a1},
 last_modified = {2021-05-14T09:05:35.878Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {DeBerardinis2020a},
 private_publication = {false},
 abstract = {Despite the manifold developments in music emotion recognition and related areas, estimating the emotional impact of music still poses many challenges. These are often associated to the complexity of the acoustic codes to emotion and the lack of large amounts of data with robust golden standards. In this paper, we propose a new computational model (EmoMucs) that considers the role of different musical voices in the prediction of the emotions induced by music. We combine source separation algorithms for breaking up music signals into independent song elements (vocals, bass, drums, other) and end-to-end state-of-the-art machine learning techniques for feature extraction and emotion modelling (valence and arousal regression). Through a series of computational experiments on a benchmark dataset using source-specialised models trained independently and different fusion strategies, we demonstrate that EmoMucs outperforms state-of-the-art approaches with the advantage of providing insights into the relative contribution of different musical elements to the emotions perceived by listeners.},
 bibtype = {inproceedings},
 author = {de Berardinis, J. and Cangelosi, A. and Coutinho, E.},
 editor = {Cumming, Julie and Lee, Jin Ha and McFee, Brian and Schedl, Markus and Devaney, Johanna and McKay, Cory and Zangerle, Eva and de Reuse, Timothy},
 booktitle = {Proceedings of the 21st International Society for Music Information Retrieval Conference}
}

Downloads: 2

{"_id":"4jKx6qWGctLGcN7Wp","bibbaseid":"deberardinis-cangelosi-coutinho-themultiplevoicesofmusicalemotionssourceseparationforimprovingmusicemotionrecognitionmodelsandtheirinterpretability-2020","authorIDs":["mo4CFXJ7ukAMT9nho"],"author_short":["de Berardinis, J.","Cangelosi, A.","Coutinho, E."],"bibdata":{"title":"The multiple voices of musical emotions: source separation for improving music emotion recognition models and their interpretability","type":"inproceedings","year":"2020","pages":"310-217","websites":"https://www.ismir2020.net/assets/img/proceedings/2020_ISMIR_Proceedings.pdf","publisher":"International Society for Music Information Retrieval","city":"Montréal, Québec, Canada","id":"d352dc1b-deb3-348f-8e54-c7b1f689089a","created":"2020-10-12T15:41:02.947Z","file_attached":"true","profile_id":"ffa9027c-806a-3827-93a1-02c42eb146a1","last_modified":"2021-05-14T09:05:35.878Z","read":false,"starred":false,"authored":"true","confirmed":"true","hidden":false,"citation_key":"DeBerardinis2020a","private_publication":false,"abstract":"Despite the manifold developments in music emotion recognition and related areas, estimating the emotional impact of music still poses many challenges. These are often associated to the complexity of the acoustic codes to emotion and the lack of large amounts of data with robust golden standards. In this paper, we propose a new computational model (EmoMucs) that considers the role of different musical voices in the prediction of the emotions induced by music. We combine source separation algorithms for breaking up music signals into independent song elements (vocals, bass, drums, other) and end-to-end state-of-the-art machine learning techniques for feature extraction and emotion modelling (valence and arousal regression). Through a series of computational experiments on a benchmark dataset using source-specialised models trained independently and different fusion strategies, we demonstrate that EmoMucs outperforms state-of-the-art approaches with the advantage of providing insights into the relative contribution of different musical elements to the emotions perceived by listeners.","bibtype":"inproceedings","author":"de Berardinis, J. and Cangelosi, A. and Coutinho, E.","editor":"Cumming, Julie and Lee, Jin Ha and McFee, Brian and Schedl, Markus and Devaney, Johanna and McKay, Cory and Zangerle, Eva and de Reuse, Timothy","booktitle":"Proceedings of the 21st International Society for Music Information Retrieval Conference","bibtex":"@inproceedings{\n title = {The multiple voices of musical emotions: source separation for improving music emotion recognition models and their interpretability},\n type = {inproceedings},\n year = {2020},\n pages = {310-217},\n websites = {https://www.ismir2020.net/assets/img/proceedings/2020_ISMIR_Proceedings.pdf},\n publisher = {International Society for Music Information Retrieval},\n city = {Montréal, Québec, Canada},\n id = {d352dc1b-deb3-348f-8e54-c7b1f689089a},\n created = {2020-10-12T15:41:02.947Z},\n file_attached = {true},\n profile_id = {ffa9027c-806a-3827-93a1-02c42eb146a1},\n last_modified = {2021-05-14T09:05:35.878Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {DeBerardinis2020a},\n private_publication = {false},\n abstract = {Despite the manifold developments in music emotion recognition and related areas, estimating the emotional impact of music still poses many challenges. These are often associated to the complexity of the acoustic codes to emotion and the lack of large amounts of data with robust golden standards. In this paper, we propose a new computational model (EmoMucs) that considers the role of different musical voices in the prediction of the emotions induced by music. We combine source separation algorithms for breaking up music signals into independent song elements (vocals, bass, drums, other) and end-to-end state-of-the-art machine learning techniques for feature extraction and emotion modelling (valence and arousal regression). Through a series of computational experiments on a benchmark dataset using source-specialised models trained independently and different fusion strategies, we demonstrate that EmoMucs outperforms state-of-the-art approaches with the advantage of providing insights into the relative contribution of different musical elements to the emotions perceived by listeners.},\n bibtype = {inproceedings},\n author = {de Berardinis, J. and Cangelosi, A. and Coutinho, E.},\n editor = {Cumming, Julie and Lee, Jin Ha and McFee, Brian and Schedl, Markus and Devaney, Johanna and McKay, Cory and Zangerle, Eva and de Reuse, Timothy},\n booktitle = {Proceedings of the 21st International Society for Music Information Retrieval Conference}\n}","author_short":["de Berardinis, J.","Cangelosi, A.","Coutinho, E."],"editor_short":["Cumming, J.","Lee, J., H.","McFee, B.","Schedl, M.","Devaney, J.","McKay, C.","Zangerle, E.","de Reuse, T."],"urls":{"Website":"https://www.ismir2020.net/assets/img/proceedings/2020_ISMIR_Proceedings.pdf"},"biburl":"https://bibbase.org/service/mendeley/ffa9027c-806a-3827-93a1-02c42eb146a1","bibbaseid":"deberardinis-cangelosi-coutinho-themultiplevoicesofmusicalemotionssourceseparationforimprovingmusicemotionrecognitionmodelsandtheirinterpretability-2020","role":"author","metadata":{"authorlinks":{"coutinho, e":"https://bibbase.org/service/mendeley/ffa9027c-806a-3827-93a1-02c42eb146a1"}},"downloads":2},"bibtype":"inproceedings","creationDate":"2020-10-12T15:52:35.605Z","downloads":2,"keywords":[],"search_terms":["multiple","voices","musical","emotions","source","separation","improving","music","emotion","recognition","models","interpretability","de berardinis","cangelosi","coutinho"],"title":"The multiple voices of musical emotions: source separation for improving music emotion recognition models and their interpretability","year":2020,"biburl":"https://bibbase.org/service/mendeley/ffa9027c-806a-3827-93a1-02c42eb146a1","dataSources":["Tcd3cXtdQsiKHPZsW","YqW8pMoihb7JazZcx","ya2CyA73rpZseyrZ8","pzyFFGWvxG2bs63zP","2252seNhipfTmjEBQ"]}