SailAlign: Robust long speech-text alignment

SailAlign: Robust long speech-text alignment. Katsamanis, A., Black, M. P., Georgiou, P., Goldstein, L., & Narayanan, S. S. In Proc. of Workshop on New Tools and Methods for Very-Large Scale Phonetics Research, pages 28-31, Jan, 2011. University of Pennsylvania.
abstract bibtex

Long speech-text alignment can facilitate large-scale study of rich spoken language resources that have recently become widely accessible, e.g., collections of audio books, or multime- dia documents. For such resources, the conventional Viterbi- based forced alignment may often be proven inadequate mainly due to mismatched audio and text and/or noisy audio. In this paper, we present SailAlign which is an open-source software toolkit for robust long speech-text alignment that circumvents these restrictions. It implements an adaptive, iterative speech recognition and text alignment scheme that allows for the pro- cessing of very long (and possibly noisy) audio and is robust to transcription errors. SailAlign is evaluated on artificially cre- ated long chunks of the TIMIT database. Audio is artificially contaminated with babble noise, and the corresponding tran- scriptions are corrupted at various levels. We present the corre- sponding word boundary detection results. Finally, we demon- strate the potential use of the software for the exploitation of audio books for the study of read speech.

@inproceedings{Katsamanis2011SailAlign:Robustlongspeech-text,
 abstract = {Long speech-text alignment can facilitate large-scale study of rich spoken language resources that have recently become widely accessible, e.g., collections of audio books, or multime- dia documents. For such resources, the conventional Viterbi- based forced alignment may often be proven inadequate mainly due to mismatched audio and text and/or noisy audio. In this paper, we present SailAlign which is an open-source software toolkit for robust long speech-text alignment that circumvents these restrictions. It implements an adaptive, iterative speech recognition and text alignment scheme that allows for the pro- cessing of very long (and possibly noisy) audio and is robust to transcription errors. SailAlign is evaluated on artificially cre- ated long chunks of the TIMIT database. Audio is artificially contaminated with babble noise, and the corresponding tran- scriptions are corrupted at various levels. We present the corre- sponding word boundary detection results. Finally, we demon- strate the potential use of the software for the exploitation of audio books for the study of read speech.},
 author = {Katsamanis, Athanasios and Black, Matthew P. and Georgiou, Panayiotis and Goldstein, Louis and Narayanan, Shrikanth S.},
 bib2html_rescat = {speechlinks,span},
 booktitle = {Proc. of Workshop on New Tools and Methods for Very-Large Scale Phonetics Research},
 link = {http://sail.usc.edu/publications/files/35448ecec01603f25919f9302f8f368a15c6.pdf},
 location = {Philadelphia, PA},
 month = {Jan},
 pages = {28-31},
 publisher = {University of Pennsylvania},
 title = {SailAlign: Robust long speech-text alignment},
 year = {2011}
}

Downloads: 0

{"_id":"mCBruYkGYZb4bcngp","bibbaseid":"katsamanis-black-georgiou-goldstein-narayanan-sailalignrobustlongspeechtextalignment-2011","downloads":0,"creationDate":"2017-05-08T08:45:46.181Z","title":"SailAlign: Robust long speech-text alignment","author_short":["Katsamanis, A.","Black, M. P.","Georgiou, P.","Goldstein, L.","Narayanan, S. S."],"year":2011,"bibtype":"inproceedings","biburl":"https://bibbase.org/f/nWhKb4SffvhfreEmj/shri-isi-edu.bib","bibdata":{"bibtype":"inproceedings","type":"inproceedings","abstract":"Long speech-text alignment can facilitate large-scale study of rich spoken language resources that have recently become widely accessible, e.g., collections of audio books, or multime- dia documents. For such resources, the conventional Viterbi- based forced alignment may often be proven inadequate mainly due to mismatched audio and text and/or noisy audio. In this paper, we present SailAlign which is an open-source software toolkit for robust long speech-text alignment that circumvents these restrictions. It implements an adaptive, iterative speech recognition and text alignment scheme that allows for the pro- cessing of very long (and possibly noisy) audio and is robust to transcription errors. SailAlign is evaluated on artificially cre- ated long chunks of the TIMIT database. Audio is artificially contaminated with babble noise, and the corresponding tran- scriptions are corrupted at various levels. We present the corre- sponding word boundary detection results. Finally, we demon- strate the potential use of the software for the exploitation of audio books for the study of read speech.","author":[{"propositions":[],"lastnames":["Katsamanis"],"firstnames":["Athanasios"],"suffixes":[]},{"propositions":[],"lastnames":["Black"],"firstnames":["Matthew","P."],"suffixes":[]},{"propositions":[],"lastnames":["Georgiou"],"firstnames":["Panayiotis"],"suffixes":[]},{"propositions":[],"lastnames":["Goldstein"],"firstnames":["Louis"],"suffixes":[]},{"propositions":[],"lastnames":["Narayanan"],"firstnames":["Shrikanth","S."],"suffixes":[]}],"bib2html_rescat":"speechlinks,span","booktitle":"Proc. of Workshop on New Tools and Methods for Very-Large Scale Phonetics Research","link":"http://sail.usc.edu/publications/files/35448ecec01603f25919f9302f8f368a15c6.pdf","location":"Philadelphia, PA","month":"Jan","pages":"28-31","publisher":"University of Pennsylvania","title":"SailAlign: Robust long speech-text alignment","year":"2011","bibtex":"@inproceedings{Katsamanis2011SailAlign:Robustlongspeech-text,\n abstract = {Long speech-text alignment can facilitate large-scale study of rich spoken language resources that have recently become widely accessible, e.g., collections of audio books, or multime- dia documents. For such resources, the conventional Viterbi- based forced alignment may often be proven inadequate mainly due to mismatched audio and text and/or noisy audio. In this paper, we present SailAlign which is an open-source software toolkit for robust long speech-text alignment that circumvents these restrictions. It implements an adaptive, iterative speech recognition and text alignment scheme that allows for the pro- cessing of very long (and possibly noisy) audio and is robust to transcription errors. SailAlign is evaluated on artificially cre- ated long chunks of the TIMIT database. Audio is artificially contaminated with babble noise, and the corresponding tran- scriptions are corrupted at various levels. We present the corre- sponding word boundary detection results. Finally, we demon- strate the potential use of the software for the exploitation of audio books for the study of read speech.},\n author = {Katsamanis, Athanasios and Black, Matthew P. and Georgiou, Panayiotis and Goldstein, Louis and Narayanan, Shrikanth S.},\n bib2html_rescat = {speechlinks,span},\n booktitle = {Proc. of Workshop on New Tools and Methods for Very-Large Scale Phonetics Research},\n link = {http://sail.usc.edu/publications/files/35448ecec01603f25919f9302f8f368a15c6.pdf},\n location = {Philadelphia, PA},\n month = {Jan},\n pages = {28-31},\n publisher = {University of Pennsylvania},\n title = {SailAlign: Robust long speech-text alignment},\n year = {2011}\n}\n\n","author_short":["Katsamanis, A.","Black, M. P.","Georgiou, P.","Goldstein, L.","Narayanan, S. S."],"bibbaseid":"katsamanis-black-georgiou-goldstein-narayanan-sailalignrobustlongspeechtextalignment-2011","role":"author","urls":{},"metadata":{"authorlinks":{}}},"search_terms":["sailalign","robust","long","speech","text","alignment","katsamanis","black","georgiou","goldstein","narayanan"],"keywords":[],"authorIDs":["5473ee10cb4bf04e39000394"],"dataSources":["zCoW8NCDfFAcyLCTb","P3nQrSLkFzGGSmKJQ","Reikhy6EiDXFTcuR9"]}