Evaluation of PNCC and extended spectral subtraction methods for robust speech recognition

Evaluation of PNCC and extended spectral subtraction methods for robust speech recognition. Fux, T. & Jouvet, D. In 2015 23rd European Signal Processing Conference (EUSIPCO), pages 1416-1420, Aug, 2015.

Paper doi abstract bibtex

This paper evaluates the robustness of different approaches for speech recognition with respect to signal-to-noise ratio (SNR), to signal level and to presence of non-speech data before and after utterances to be recognized. Three types of noise robust features are considered: Power Normalized Cepstral Coefficients (PNCC), Mel-Frequency Cepstral Coefficients (MFCC) after applying an extended spectral subtraction method, and Sphinx embedded denoising features from recent sphinx versions. Although removing C0 in MFCC-based features leads to a slight decrease in speech recognition performance, it makes the speech recognition system independent on the speech signal level. With multi-condition training, the three sets of noise-robust features lead to a rather similar behavior of performance with respect to SNR and presence of non-speech data. Overall, best performance is achieved with the extended spectral subtraction approach. Also, the performance of the PNCC features appears to be dependent on the initialization of the normalization factor.

@InProceedings{7362617,
  author = {T. Fux and D. Jouvet},
  booktitle = {2015 23rd European Signal Processing Conference (EUSIPCO)},
  title = {Evaluation of PNCC and extended spectral subtraction methods for robust speech recognition},
  year = {2015},
  pages = {1416-1420},
  abstract = {This paper evaluates the robustness of different approaches for speech recognition with respect to signal-to-noise ratio (SNR), to signal level and to presence of non-speech data before and after utterances to be recognized. Three types of noise robust features are considered: Power Normalized Cepstral Coefficients (PNCC), Mel-Frequency Cepstral Coefficients (MFCC) after applying an extended spectral subtraction method, and Sphinx embedded denoising features from recent sphinx versions. Although removing C0 in MFCC-based features leads to a slight decrease in speech recognition performance, it makes the speech recognition system independent on the speech signal level. With multi-condition training, the three sets of noise-robust features lead to a rather similar behavior of performance with respect to SNR and presence of non-speech data. Overall, best performance is achieved with the extended spectral subtraction approach. Also, the performance of the PNCC features appears to be dependent on the initialization of the normalization factor.},
  keywords = {cepstral analysis;speech recognition;extended spectral subtraction methods;robust speech recognition;signal-to-noise ratio;SNR;nonspeech data;noise robust features;power normalized cepstral coefficients;Mel-frequency cepstral coefficients;MFCC;Sphinx embedded denoising features;sphinx versions;multicondition training;PNCC features;Speech;Noise measurement;Mel frequency cepstral coefficient;Speech recognition;Training;Signal to noise ratio;Hidden Markov models;Speech recognition;Speech level robustness;Noise robustness;Spectral subtraction;PNCC},
  doi = {10.1109/EUSIPCO.2015.7362617},
  issn = {2076-1465},
  month = {Aug},
  url = {https://www.eurasip.org/proceedings/eusipco/eusipco2015/papers/1570104069.pdf},
}

Downloads: 0

{"_id":"kkZEEve8Rv49CLvsd","bibbaseid":"fux-jouvet-evaluationofpnccandextendedspectralsubtractionmethodsforrobustspeechrecognition-2015","authorIDs":[],"author_short":["Fux, T.","Jouvet, D."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","author":[{"firstnames":["T."],"propositions":[],"lastnames":["Fux"],"suffixes":[]},{"firstnames":["D."],"propositions":[],"lastnames":["Jouvet"],"suffixes":[]}],"booktitle":"2015 23rd European Signal Processing Conference (EUSIPCO)","title":"Evaluation of PNCC and extended spectral subtraction methods for robust speech recognition","year":"2015","pages":"1416-1420","abstract":"This paper evaluates the robustness of different approaches for speech recognition with respect to signal-to-noise ratio (SNR), to signal level and to presence of non-speech data before and after utterances to be recognized. Three types of noise robust features are considered: Power Normalized Cepstral Coefficients (PNCC), Mel-Frequency Cepstral Coefficients (MFCC) after applying an extended spectral subtraction method, and Sphinx embedded denoising features from recent sphinx versions. Although removing C0 in MFCC-based features leads to a slight decrease in speech recognition performance, it makes the speech recognition system independent on the speech signal level. With multi-condition training, the three sets of noise-robust features lead to a rather similar behavior of performance with respect to SNR and presence of non-speech data. Overall, best performance is achieved with the extended spectral subtraction approach. Also, the performance of the PNCC features appears to be dependent on the initialization of the normalization factor.","keywords":"cepstral analysis;speech recognition;extended spectral subtraction methods;robust speech recognition;signal-to-noise ratio;SNR;nonspeech data;noise robust features;power normalized cepstral coefficients;Mel-frequency cepstral coefficients;MFCC;Sphinx embedded denoising features;sphinx versions;multicondition training;PNCC features;Speech;Noise measurement;Mel frequency cepstral coefficient;Speech recognition;Training;Signal to noise ratio;Hidden Markov models;Speech recognition;Speech level robustness;Noise robustness;Spectral subtraction;PNCC","doi":"10.1109/EUSIPCO.2015.7362617","issn":"2076-1465","month":"Aug","url":"https://www.eurasip.org/proceedings/eusipco/eusipco2015/papers/1570104069.pdf","bibtex":"@InProceedings{7362617,\n author = {T. Fux and D. Jouvet},\n booktitle = {2015 23rd European Signal Processing Conference (EUSIPCO)},\n title = {Evaluation of PNCC and extended spectral subtraction methods for robust speech recognition},\n year = {2015},\n pages = {1416-1420},\n abstract = {This paper evaluates the robustness of different approaches for speech recognition with respect to signal-to-noise ratio (SNR), to signal level and to presence of non-speech data before and after utterances to be recognized. Three types of noise robust features are considered: Power Normalized Cepstral Coefficients (PNCC), Mel-Frequency Cepstral Coefficients (MFCC) after applying an extended spectral subtraction method, and Sphinx embedded denoising features from recent sphinx versions. Although removing C0 in MFCC-based features leads to a slight decrease in speech recognition performance, it makes the speech recognition system independent on the speech signal level. With multi-condition training, the three sets of noise-robust features lead to a rather similar behavior of performance with respect to SNR and presence of non-speech data. Overall, best performance is achieved with the extended spectral subtraction approach. Also, the performance of the PNCC features appears to be dependent on the initialization of the normalization factor.},\n keywords = {cepstral analysis;speech recognition;extended spectral subtraction methods;robust speech recognition;signal-to-noise ratio;SNR;nonspeech data;noise robust features;power normalized cepstral coefficients;Mel-frequency cepstral coefficients;MFCC;Sphinx embedded denoising features;sphinx versions;multicondition training;PNCC features;Speech;Noise measurement;Mel frequency cepstral coefficient;Speech recognition;Training;Signal to noise ratio;Hidden Markov models;Speech recognition;Speech level robustness;Noise robustness;Spectral subtraction;PNCC},\n doi = {10.1109/EUSIPCO.2015.7362617},\n issn = {2076-1465},\n month = {Aug},\n url = {https://www.eurasip.org/proceedings/eusipco/eusipco2015/papers/1570104069.pdf},\n}\n\n","author_short":["Fux, T.","Jouvet, D."],"key":"7362617","id":"7362617","bibbaseid":"fux-jouvet-evaluationofpnccandextendedspectralsubtractionmethodsforrobustspeechrecognition-2015","role":"author","urls":{"Paper":"https://www.eurasip.org/proceedings/eusipco/eusipco2015/papers/1570104069.pdf"},"keyword":["cepstral analysis;speech recognition;extended spectral subtraction methods;robust speech recognition;signal-to-noise ratio;SNR;nonspeech data;noise robust features;power normalized cepstral coefficients;Mel-frequency cepstral coefficients;MFCC;Sphinx embedded denoising features;sphinx versions;multicondition training;PNCC features;Speech;Noise measurement;Mel frequency cepstral coefficient;Speech recognition;Training;Signal to noise ratio;Hidden Markov models;Speech recognition;Speech level robustness;Noise robustness;Spectral subtraction;PNCC"],"metadata":{"authorlinks":{}},"downloads":0},"bibtype":"inproceedings","biburl":"https://raw.githubusercontent.com/Roznn/EUSIPCO/main/eusipco2015url.bib","creationDate":"2021-02-13T17:31:52.440Z","downloads":0,"keywords":["cepstral analysis;speech recognition;extended spectral subtraction methods;robust speech recognition;signal-to-noise ratio;snr;nonspeech data;noise robust features;power normalized cepstral coefficients;mel-frequency cepstral coefficients;mfcc;sphinx embedded denoising features;sphinx versions;multicondition training;pncc features;speech;noise measurement;mel frequency cepstral coefficient;speech recognition;training;signal to noise ratio;hidden markov models;speech recognition;speech level robustness;noise robustness;spectral subtraction;pncc"],"search_terms":["evaluation","pncc","extended","spectral","subtraction","methods","robust","speech","recognition","fux","jouvet"],"title":"Evaluation of PNCC and extended spectral subtraction methods for robust speech recognition","year":2015,"dataSources":["eov4vbT6mnAiTpKji","knrZsDjSNHWtA9WNT"]}