A Critical Assessment of Spoken Utterance Retrieval through Approximate Lattice Representations. Kazemian, S. Master's thesis, Department of Computer Science, University of Toronto, January, 2009. abstract bibtex This paper compares the performance of Position-specific Posterior Lattices (PSPL) and Confusion Networks (CN) applied to Spoken Utterance Retrieval, and tests these recent proposals against several baselines, namely 1-best transcription, using the whole lattice, and the set-of-words baseline. The set-of-words baseline is used for the first time in context of Spoken Utterance Retrieval. PSPL and CN provide compact representations that generalize the original segment lattices and provide greater recall robustness, but have yet to be evaluated against each other in multiple WER conditions for Spoken Utterance Retrieval. Our comparisons suggest that while PSPL and Confusion Networks have comparable recall, the former is slightly more precise, although its merit appears to be coupled to the assumptions of low-frequency search queries and low- WER environments. While in the low-WER environments all methods tested have comparable performance, both PSPL and CN significantly outperform the 1-best transcription in high-WER environments but perform similarly to the whole lattice and set-of-words baselines.
@MastersThesis{ kazemian:2009:thesis,
author = {Siavash Kazemian},
title = {A Critical Assessment of Spoken Utterance Retrieval
through Approximate Lattice Representations},
year = {2009},
school = {Department of Computer Science, University of Toronto},
month = {January},
abstract = {This paper compares the performance of Position-specific
Posterior Lattices (PSPL) and Confusion Networks (CN)
applied to Spoken Utterance Retrieval, and tests these
recent proposals against several baselines, namely 1-best
transcription, using the whole lattice, and the
set-of-words baseline. The set-of-words baseline is used
for the first time in context of Spoken Utterance
Retrieval. PSPL and CN provide compact representations that
generalize the original segment lattices and provide
greater recall robustness, but have yet to be evaluated
against each other in multiple WER conditions for Spoken
Utterance Retrieval. Our comparisons suggest that while
PSPL and Confusion Networks have comparable recall, the
former is slightly more precise, although its merit appears
to be coupled to the assumptions of low-frequency search
queries and low- WER environments. While in the low-WER
environments all methods tested have comparable
performance, both PSPL and CN significantly outperform the
1-best transcription in high-WER environments but perform
similarly to the whole lattice and set-of-words baselines.},
download = {http://ftp.cs.toronto.edu/pub/gh/Kazemian-MSc-paper.pdf}
}
Downloads: 0
{"_id":{"_str":"53d57a48f414ae191e0003b2"},"__v":0,"authorIDs":[],"author_short":["Kazemian, S."],"bibbaseid":"kazemian-acriticalassessmentofspokenutteranceretrievalthroughapproximatelatticerepresentations-2009","bibdata":{"bibtype":"mastersthesis","type":"mastersthesis","author":[{"firstnames":["Siavash"],"propositions":[],"lastnames":["Kazemian"],"suffixes":[]}],"title":"A Critical Assessment of Spoken Utterance Retrieval through Approximate Lattice Representations","year":"2009","school":"Department of Computer Science, University of Toronto","month":"January","abstract":"This paper compares the performance of Position-specific Posterior Lattices (PSPL) and Confusion Networks (CN) applied to Spoken Utterance Retrieval, and tests these recent proposals against several baselines, namely 1-best transcription, using the whole lattice, and the set-of-words baseline. The set-of-words baseline is used for the first time in context of Spoken Utterance Retrieval. PSPL and CN provide compact representations that generalize the original segment lattices and provide greater recall robustness, but have yet to be evaluated against each other in multiple WER conditions for Spoken Utterance Retrieval. Our comparisons suggest that while PSPL and Confusion Networks have comparable recall, the former is slightly more precise, although its merit appears to be coupled to the assumptions of low-frequency search queries and low- WER environments. While in the low-WER environments all methods tested have comparable performance, both PSPL and CN significantly outperform the 1-best transcription in high-WER environments but perform similarly to the whole lattice and set-of-words baselines.","download":"http://ftp.cs.toronto.edu/pub/gh/Kazemian-MSc-paper.pdf","bibtex":"@MastersThesis{\t kazemian:2009:thesis,\n author\t= {Siavash Kazemian},\n title\t\t= {A Critical Assessment of Spoken Utterance Retrieval\n\t\t through Approximate Lattice Representations},\n year\t\t= {2009},\n school\t= {Department of Computer Science, University of Toronto},\n month\t\t= {January},\n abstract\t= {This paper compares the performance of Position-specific\n\t\t Posterior Lattices (PSPL) and Confusion Networks (CN)\n\t\t applied to Spoken Utterance Retrieval, and tests these\n\t\t recent proposals against several baselines, namely 1-best\n\t\t transcription, using the whole lattice, and the\n\t\t set-of-words baseline. The set-of-words baseline is used\n\t\t for the first time in context of Spoken Utterance\n\t\t Retrieval. PSPL and CN provide compact representations that\n\t\t generalize the original segment lattices and provide\n\t\t greater recall robustness, but have yet to be evaluated\n\t\t against each other in multiple WER conditions for Spoken\n\t\t Utterance Retrieval. Our comparisons suggest that while\n\t\t PSPL and Confusion Networks have comparable recall, the\n\t\t former is slightly more precise, although its merit appears\n\t\t to be coupled to the assumptions of low-frequency search\n\t\t queries and low- WER environments. While in the low-WER\n\t\t environments all methods tested have comparable\n\t\t performance, both PSPL and CN significantly outperform the\n\t\t 1-best transcription in high-WER environments but perform\n\t\t similarly to the whole lattice and set-of-words baselines.},\n download\t= {http://ftp.cs.toronto.edu/pub/gh/Kazemian-MSc-paper.pdf}\n}\n\n","author_short":["Kazemian, S."],"key":"kazemian:2009:thesis","id":"kazemian:2009:thesis","bibbaseid":"kazemian-acriticalassessmentofspokenutteranceretrievalthroughapproximatelatticerepresentations-2009","role":"author","urls":{},"metadata":{"authorlinks":{}}},"bibtype":"mastersthesis","biburl":"www.cs.toronto.edu/~fritz/tmp/compling.bib","creationDate":"2014-07-27T22:16:40.875Z","downloads":0,"keywords":[],"search_terms":["critical","assessment","spoken","utterance","retrieval","through","approximate","lattice","representations","kazemian"],"title":"A Critical Assessment of Spoken Utterance Retrieval through Approximate Lattice Representations","year":2009,"dataSources":["n8jB5BJxaeSmH6mtR","6b6A9kbkw4CsEGnRX"]}