On the improvement of modulation features using multi-microphone energy tracking for robust distant speech recognition. Rodomagoulakis, I. & Maragos, P. In 2017 25th European Signal Processing Conference (EUSIPCO), pages 558-562, Aug, 2017.
Paper doi abstract bibtex In this work, we investigate robust speech energy estimation and tracking schemes aiming at improved energy-based multiband speech demodulation and feature extraction for multi-microphone distant speech recognition. Based on the spatial diversity of the speech and noise recordings of a multi-microphone setup, the proposed Multichannel, Multiband Demodulation (MMD) scheme includes: 1) energy selection across the microphones that are less affected by noise and 2) cross-signal energy estimation based on the cross-Teager energy operator. Instantaneous modulations of speech resonances are estimated on the denoised energies. Second-order frequency modulation features are measured and combined with MFCCs achieving improved distant speech recognition on simulated and real data recorded in noisy and reverberant domestic environments.
@InProceedings{8081269,
author = {I. Rodomagoulakis and P. Maragos},
booktitle = {2017 25th European Signal Processing Conference (EUSIPCO)},
title = {On the improvement of modulation features using multi-microphone energy tracking for robust distant speech recognition},
year = {2017},
pages = {558-562},
abstract = {In this work, we investigate robust speech energy estimation and tracking schemes aiming at improved energy-based multiband speech demodulation and feature extraction for multi-microphone distant speech recognition. Based on the spatial diversity of the speech and noise recordings of a multi-microphone setup, the proposed Multichannel, Multiband Demodulation (MMD) scheme includes: 1) energy selection across the microphones that are less affected by noise and 2) cross-signal energy estimation based on the cross-Teager energy operator. Instantaneous modulations of speech resonances are estimated on the denoised energies. Second-order frequency modulation features are measured and combined with MFCCs achieving improved distant speech recognition on simulated and real data recorded in noisy and reverberant domestic environments.},
keywords = {array signal processing;demodulation;feature extraction;microphone arrays;speech recognition;feature extraction;multimicrophone distant speech recognition;cross-signal energy estimation;cross-Teager energy operator;second-order frequency modulation features;multimicrophone energy tracking;robust distant speech recognition;robust speech energy estimation;energy based multiband speech demodulation;speech resonances estimation;energy denoising;Speech;Feature extraction;Microphones;Noise measurement;Demodulation;Robustness},
doi = {10.23919/EUSIPCO.2017.8081269},
issn = {2076-1465},
month = {Aug},
url = {https://www.eurasip.org/proceedings/eusipco/eusipco2017/papers/1570347597.pdf},
}
Downloads: 0
{"_id":"nwRQm3nSSkfdo2Ggv","bibbaseid":"rodomagoulakis-maragos-ontheimprovementofmodulationfeaturesusingmultimicrophoneenergytrackingforrobustdistantspeechrecognition-2017","authorIDs":[],"author_short":["Rodomagoulakis, I.","Maragos, P."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","author":[{"firstnames":["I."],"propositions":[],"lastnames":["Rodomagoulakis"],"suffixes":[]},{"firstnames":["P."],"propositions":[],"lastnames":["Maragos"],"suffixes":[]}],"booktitle":"2017 25th European Signal Processing Conference (EUSIPCO)","title":"On the improvement of modulation features using multi-microphone energy tracking for robust distant speech recognition","year":"2017","pages":"558-562","abstract":"In this work, we investigate robust speech energy estimation and tracking schemes aiming at improved energy-based multiband speech demodulation and feature extraction for multi-microphone distant speech recognition. Based on the spatial diversity of the speech and noise recordings of a multi-microphone setup, the proposed Multichannel, Multiband Demodulation (MMD) scheme includes: 1) energy selection across the microphones that are less affected by noise and 2) cross-signal energy estimation based on the cross-Teager energy operator. Instantaneous modulations of speech resonances are estimated on the denoised energies. Second-order frequency modulation features are measured and combined with MFCCs achieving improved distant speech recognition on simulated and real data recorded in noisy and reverberant domestic environments.","keywords":"array signal processing;demodulation;feature extraction;microphone arrays;speech recognition;feature extraction;multimicrophone distant speech recognition;cross-signal energy estimation;cross-Teager energy operator;second-order frequency modulation features;multimicrophone energy tracking;robust distant speech recognition;robust speech energy estimation;energy based multiband speech demodulation;speech resonances estimation;energy denoising;Speech;Feature extraction;Microphones;Noise measurement;Demodulation;Robustness","doi":"10.23919/EUSIPCO.2017.8081269","issn":"2076-1465","month":"Aug","url":"https://www.eurasip.org/proceedings/eusipco/eusipco2017/papers/1570347597.pdf","bibtex":"@InProceedings{8081269,\n author = {I. Rodomagoulakis and P. Maragos},\n booktitle = {2017 25th European Signal Processing Conference (EUSIPCO)},\n title = {On the improvement of modulation features using multi-microphone energy tracking for robust distant speech recognition},\n year = {2017},\n pages = {558-562},\n abstract = {In this work, we investigate robust speech energy estimation and tracking schemes aiming at improved energy-based multiband speech demodulation and feature extraction for multi-microphone distant speech recognition. Based on the spatial diversity of the speech and noise recordings of a multi-microphone setup, the proposed Multichannel, Multiband Demodulation (MMD) scheme includes: 1) energy selection across the microphones that are less affected by noise and 2) cross-signal energy estimation based on the cross-Teager energy operator. Instantaneous modulations of speech resonances are estimated on the denoised energies. Second-order frequency modulation features are measured and combined with MFCCs achieving improved distant speech recognition on simulated and real data recorded in noisy and reverberant domestic environments.},\n keywords = {array signal processing;demodulation;feature extraction;microphone arrays;speech recognition;feature extraction;multimicrophone distant speech recognition;cross-signal energy estimation;cross-Teager energy operator;second-order frequency modulation features;multimicrophone energy tracking;robust distant speech recognition;robust speech energy estimation;energy based multiband speech demodulation;speech resonances estimation;energy denoising;Speech;Feature extraction;Microphones;Noise measurement;Demodulation;Robustness},\n doi = {10.23919/EUSIPCO.2017.8081269},\n issn = {2076-1465},\n month = {Aug},\n url = {https://www.eurasip.org/proceedings/eusipco/eusipco2017/papers/1570347597.pdf},\n}\n\n","author_short":["Rodomagoulakis, I.","Maragos, P."],"key":"8081269","id":"8081269","bibbaseid":"rodomagoulakis-maragos-ontheimprovementofmodulationfeaturesusingmultimicrophoneenergytrackingforrobustdistantspeechrecognition-2017","role":"author","urls":{"Paper":"https://www.eurasip.org/proceedings/eusipco/eusipco2017/papers/1570347597.pdf"},"keyword":["array signal processing;demodulation;feature extraction;microphone arrays;speech recognition;feature extraction;multimicrophone distant speech recognition;cross-signal energy estimation;cross-Teager energy operator;second-order frequency modulation features;multimicrophone energy tracking;robust distant speech recognition;robust speech energy estimation;energy based multiband speech demodulation;speech resonances estimation;energy denoising;Speech;Feature extraction;Microphones;Noise measurement;Demodulation;Robustness"],"metadata":{"authorlinks":{}},"downloads":0},"bibtype":"inproceedings","biburl":"https://raw.githubusercontent.com/Roznn/EUSIPCO/main/eusipco2017url.bib","creationDate":"2021-02-13T16:38:25.552Z","downloads":0,"keywords":["array signal processing;demodulation;feature extraction;microphone arrays;speech recognition;feature extraction;multimicrophone distant speech recognition;cross-signal energy estimation;cross-teager energy operator;second-order frequency modulation features;multimicrophone energy tracking;robust distant speech recognition;robust speech energy estimation;energy based multiband speech demodulation;speech resonances estimation;energy denoising;speech;feature extraction;microphones;noise measurement;demodulation;robustness"],"search_terms":["improvement","modulation","features","using","multi","microphone","energy","tracking","robust","distant","speech","recognition","rodomagoulakis","maragos"],"title":"On the improvement of modulation features using multi-microphone energy tracking for robust distant speech recognition","year":2017,"dataSources":["2MNbFYjMYTD6z7ExY","uP2aT6Qs8sfZJ6s8b"]}