Combining Residual Networks with LSTMs for Lipreading

Combining Residual Networks with LSTMs for Lipreading. Stafylakis, T. & Tzimiropoulos, G. arXiv:1703.04105 [cs], March, 2017. 00000 arXiv: 1703.04105

Paper abstract bibtex

We propose an end-to-end deep learning architecture for word-level visual speech recognition. The system is a combination of spatiotemporal convolutional, residual and bidirectional Long Short-Term Memory networks. We trained and evaluated it on the Lipreading In-The-Wild benchmark, a challenging database of 500-size vocabulary consisting of video excerpts from BBC TV broadcasts. The proposed network attains word accuracy equal to 83.0%, yielding 6.8% absolute improvement over the current state-of-the-art.

@article{stafylakis_combining_2017,
	title = {Combining {Residual} {Networks} with {LSTMs} for {Lipreading}},
	url = {http://arxiv.org/abs/1703.04105},
	abstract = {We propose an end-to-end deep learning architecture for word-level visual speech recognition. The system is a combination of spatiotemporal convolutional, residual and bidirectional Long Short-Term Memory networks. We trained and evaluated it on the Lipreading In-The-Wild benchmark, a challenging database of 500-size vocabulary consisting of video excerpts from BBC TV broadcasts. The proposed network attains word accuracy equal to 83.0\%, yielding 6.8\% absolute improvement over the current state-of-the-art.},
	urldate = {2017-03-30TZ},
	journal = {arXiv:1703.04105 [cs]},
	author = {Stafylakis, Themos and Tzimiropoulos, Georgios},
	month = mar,
	year = {2017},
	note = {00000 
arXiv: 1703.04105},
	keywords = {Computer Science - Computer Vision and Pattern Recognition, Once}
}

Downloads: 0

{"_id":"5S9bqYx8snBeasS45","bibbaseid":"stafylakis-tzimiropoulos-combiningresidualnetworkswithlstmsforlipreading-2017","downloads":0,"creationDate":"2017-04-24T18:33:14.925Z","title":"Combining Residual Networks with LSTMs for Lipreading","author_short":["Stafylakis, T.","Tzimiropoulos, G."],"year":2017,"bibtype":"article","biburl":"http://bibbase.org/zotero/BobWong","bibdata":{"bibtype":"article","type":"article","title":"Combining Residual Networks with LSTMs for Lipreading","url":"http://arxiv.org/abs/1703.04105","abstract":"We propose an end-to-end deep learning architecture for word-level visual speech recognition. The system is a combination of spatiotemporal convolutional, residual and bidirectional Long Short-Term Memory networks. We trained and evaluated it on the Lipreading In-The-Wild benchmark, a challenging database of 500-size vocabulary consisting of video excerpts from BBC TV broadcasts. The proposed network attains word accuracy equal to 83.0%, yielding 6.8% absolute improvement over the current state-of-the-art.","urldate":"2017-03-30TZ","journal":"arXiv:1703.04105 [cs]","author":[{"propositions":[],"lastnames":["Stafylakis"],"firstnames":["Themos"],"suffixes":[]},{"propositions":[],"lastnames":["Tzimiropoulos"],"firstnames":["Georgios"],"suffixes":[]}],"month":"March","year":"2017","note":"00000 arXiv: 1703.04105","keywords":"Computer Science - Computer Vision and Pattern Recognition, Once","bibtex":"@article{stafylakis_combining_2017,\n\ttitle = {Combining {Residual} {Networks} with {LSTMs} for {Lipreading}},\n\turl = {http://arxiv.org/abs/1703.04105},\n\tabstract = {We propose an end-to-end deep learning architecture for word-level visual speech recognition. The system is a combination of spatiotemporal convolutional, residual and bidirectional Long Short-Term Memory networks. We trained and evaluated it on the Lipreading In-The-Wild benchmark, a challenging database of 500-size vocabulary consisting of video excerpts from BBC TV broadcasts. The proposed network attains word accuracy equal to 83.0\\%, yielding 6.8\\% absolute improvement over the current state-of-the-art.},\n\turldate = {2017-03-30TZ},\n\tjournal = {arXiv:1703.04105 [cs]},\n\tauthor = {Stafylakis, Themos and Tzimiropoulos, Georgios},\n\tmonth = mar,\n\tyear = {2017},\n\tnote = {00000 \narXiv: 1703.04105},\n\tkeywords = {Computer Science - Computer Vision and Pattern Recognition, Once}\n}\n\n","author_short":["Stafylakis, T.","Tzimiropoulos, G."],"key":"stafylakis_combining_2017","id":"stafylakis_combining_2017","bibbaseid":"stafylakis-tzimiropoulos-combiningresidualnetworkswithlstmsforlipreading-2017","role":"author","urls":{"Paper":"http://arxiv.org/abs/1703.04105"},"keyword":["Computer Science - Computer Vision and Pattern Recognition","Once"],"downloads":0},"search_terms":["combining","residual","networks","lstms","lipreading","stafylakis","tzimiropoulos"],"keywords":["computer science - computer vision and pattern recognition","once"],"authorIDs":[],"dataSources":["PRu9F5KuG5QWYnRZL"]}