Vector quantization with memory and multi-labeling for isolated video-only automatic speech recognition

Vector quantization with memory and multi-labeling for isolated video-only automatic speech recognition. Terry, L. H., Shiell, D. J., & Katsaggelos, A. K. In 2008 15th IEEE International Conference on Image Processing, pages 1320–1323, 2008. IEEE.

Paper doi abstract bibtex

We describe a vector quantizer (VQ) with memory for automatic speech recognition (ASR) and compare the recognition performance results to those obtained with traditional mem-oryless VQ for ASR. Standard VQ for ASR quantizes the speech data independently of any past information. We introduce memory in a probabilistic framework for quantization state modeling. This is accomplished in the form of an ergodic hidden Markov model (HMM) in which the state occupied by the HMM represents the quantization label. We evaluate this approach in the context of video-only isolated digit ASR and implement both single stream (single labeling) and multi-stream (multi-labeling) systems. For single stream recognition, our approach increases the recognition rate from 62.67% to 66.95%. When using multi-labeling, our proposed vector quantizer with memory consistently outperforms the memoryless vector quantizer. © 2008 IEEE.

@inproceedings{Louis2008b,
abstract = {We describe a vector quantizer (VQ) with memory for automatic speech recognition (ASR) and compare the recognition performance results to those obtained with traditional mem-oryless VQ for ASR. Standard VQ for ASR quantizes the speech data independently of any past information. We introduce memory in a probabilistic framework for quantization state modeling. This is accomplished in the form of an ergodic hidden Markov model (HMM) in which the state occupied by the HMM represents the quantization label. We evaluate this approach in the context of video-only isolated digit ASR and implement both single stream (single labeling) and multi-stream (multi-labeling) systems. For single stream recognition, our approach increases the recognition rate from 62.67% to 66.95%. When using multi-labeling, our proposed vector quantizer with memory consistently outperforms the memoryless vector quantizer. {\textcopyright} 2008 IEEE.},
author = {Terry, Louis H. and Shiell, Derek J. and Katsaggelos, Aggelos K.},
booktitle = {2008 15th IEEE International Conference on Image Processing},
doi = {10.1109/ICIP.2008.4712006},
isbn = {978-1-4244-1765-0},
issn = {15224880},
keywords = {Hidden Markov models,Speech recognition,Vector quantization},
pages = {1320--1323},
publisher = {IEEE},
title = {{Vector quantization with memory and multi-labeling for isolated video-only automatic speech recognition}},
url = {http://ieeexplore.ieee.org/document/4712006/},
year = {2008}
}

Downloads: 0

{"_id":"96NFYS36dB6ct33FQ","bibbaseid":"terry-shiell-katsaggelos-vectorquantizationwithmemoryandmultilabelingforisolatedvideoonlyautomaticspeechrecognition-2008","author_short":["Terry, L. H.","Shiell, D. J.","Katsaggelos, A. K."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","abstract":"We describe a vector quantizer (VQ) with memory for automatic speech recognition (ASR) and compare the recognition performance results to those obtained with traditional mem-oryless VQ for ASR. Standard VQ for ASR quantizes the speech data independently of any past information. We introduce memory in a probabilistic framework for quantization state modeling. This is accomplished in the form of an ergodic hidden Markov model (HMM) in which the state occupied by the HMM represents the quantization label. We evaluate this approach in the context of video-only isolated digit ASR and implement both single stream (single labeling) and multi-stream (multi-labeling) systems. For single stream recognition, our approach increases the recognition rate from 62.67% to 66.95%. When using multi-labeling, our proposed vector quantizer with memory consistently outperforms the memoryless vector quantizer. © 2008 IEEE.","author":[{"propositions":[],"lastnames":["Terry"],"firstnames":["Louis","H."],"suffixes":[]},{"propositions":[],"lastnames":["Shiell"],"firstnames":["Derek","J."],"suffixes":[]},{"propositions":[],"lastnames":["Katsaggelos"],"firstnames":["Aggelos","K."],"suffixes":[]}],"booktitle":"2008 15th IEEE International Conference on Image Processing","doi":"10.1109/ICIP.2008.4712006","isbn":"978-1-4244-1765-0","issn":"15224880","keywords":"Hidden Markov models,Speech recognition,Vector quantization","pages":"1320–1323","publisher":"IEEE","title":"Vector quantization with memory and multi-labeling for isolated video-only automatic speech recognition","url":"http://ieeexplore.ieee.org/document/4712006/","year":"2008","bibtex":"@inproceedings{Louis2008b,\nabstract = {We describe a vector quantizer (VQ) with memory for automatic speech recognition (ASR) and compare the recognition performance results to those obtained with traditional mem-oryless VQ for ASR. Standard VQ for ASR quantizes the speech data independently of any past information. We introduce memory in a probabilistic framework for quantization state modeling. This is accomplished in the form of an ergodic hidden Markov model (HMM) in which the state occupied by the HMM represents the quantization label. We evaluate this approach in the context of video-only isolated digit ASR and implement both single stream (single labeling) and multi-stream (multi-labeling) systems. For single stream recognition, our approach increases the recognition rate from 62.67% to 66.95%. When using multi-labeling, our proposed vector quantizer with memory consistently outperforms the memoryless vector quantizer. {\\textcopyright} 2008 IEEE.},\nauthor = {Terry, Louis H. and Shiell, Derek J. and Katsaggelos, Aggelos K.},\nbooktitle = {2008 15th IEEE International Conference on Image Processing},\ndoi = {10.1109/ICIP.2008.4712006},\nisbn = {978-1-4244-1765-0},\nissn = {15224880},\nkeywords = {Hidden Markov models,Speech recognition,Vector quantization},\npages = {1320--1323},\npublisher = {IEEE},\ntitle = {{Vector quantization with memory and multi-labeling for isolated video-only automatic speech recognition}},\nurl = {http://ieeexplore.ieee.org/document/4712006/},\nyear = {2008}\n}\n","author_short":["Terry, L. H.","Shiell, D. J.","Katsaggelos, A. K."],"key":"Louis2008b","id":"Louis2008b","bibbaseid":"terry-shiell-katsaggelos-vectorquantizationwithmemoryandmultilabelingforisolatedvideoonlyautomaticspeechrecognition-2008","role":"author","urls":{"Paper":"http://ieeexplore.ieee.org/document/4712006/"},"keyword":["Hidden Markov models","Speech recognition","Vector quantization"],"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"https://sites.northwestern.edu/ivpl/files/2023/06/IVPL_Updated_publications-1.bib","dataSources":["KTWAakbPXLGfYseXn","ePKPjG8C6yvpk4mEK","ya2CyA73rpZseyrZ8","qhF8zxmGcJfvtdeAg","fvDEHD49E2ZRwE3fb","H7crv8NWhZup4d4by","DHqokWsryttGh7pJE","vRJd4wNg9HpoZSMHD","sYxQ6pxFgA59JRhxi","w2WahSbYrbcCKBDsC","XasdXLL99y5rygCmq","3gkSihZQRfAD2KBo3","t5XMbyZbtPBo4wBGS","bEpHM2CtrwW2qE8FP","teJzFLHexaz5AQW5z"],"keywords":["hidden markov models","speech recognition","vector quantization"],"search_terms":["vector","quantization","memory","multi","labeling","isolated","video","automatic","speech","recognition","terry","shiell","katsaggelos"],"title":"Vector quantization with memory and multi-labeling for isolated video-only automatic speech recognition","year":2008}