Exploring the Limits of Language Modeling. Jozefowicz, R., Vinyals, O., Schuster, M., Shazeer, N., & Wu, Y. 2016.
Paper abstract bibtex In this work we explore recent advances in Recurrent Neural Networks for large scale Language Modeling, a task central to language understanding. We extend current models to deal with two key challenges present in this task: corpora and vocabulary sizes, and complex, long term structure of language. We perform an exhaustive study on techniques such as character Convolutional Neural Networks or Long-Short Term Memory, on the One Billion Word Benchmark. Our best single model significantly improves state-of-the-art perplexity from 51.3 down to 30.0 (whilst reducing the number of parameters by a factor of 20), while an ensemble of models sets a new record by improving perplexity from 41.0 down to 23.7. We also release these models for the NLP and ML community to study and improve upon.
@unpublished{Jozefowicz2016,
abstract = {In this work we explore recent advances in Recurrent Neural Networks for large scale Language Modeling, a task central to language understanding. We extend current models to deal with two key challenges present in this task: corpora and vocabulary sizes, and complex, long term structure of language. We perform an exhaustive study on techniques such as character Convolutional Neural Networks or Long-Short Term Memory, on the One Billion Word Benchmark. Our best single model significantly improves state-of-the-art perplexity from 51.3 down to 30.0 (whilst reducing the number of parameters by a factor of 20), while an ensemble of models sets a new record by improving perplexity from 41.0 down to 23.7. We also release these models for the NLP and ML community to study and improve upon.},
archivePrefix = {arXiv},
arxivId = {1602.02410},
author = {Jozefowicz, Rafal and Vinyals, Oriol and Schuster, Mike and Shazeer, Noam and Wu, Yonghui},
eprint = {1602.02410},
file = {:Users/shanest/Documents/Library/Jozefowicz et al/Unknown/Jozefowicz et al. - 2016 - Exploring the Limits of Language Modeling.pdf:pdf},
keywords = {model},
title = {{Exploring the Limits of Language Modeling}},
url = {https://arxiv.org/abs/1602.02410},
year = {2016}
}
Downloads: 0
{"_id":"qmLtfFF8wJSRu4BJi","bibbaseid":"jozefowicz-vinyals-schuster-shazeer-wu-exploringthelimitsoflanguagemodeling-2016","downloads":0,"creationDate":"2016-10-27T16:01:55.937Z","title":"Exploring the Limits of Language Modeling","author_short":["Jozefowicz, R.","Vinyals, O.","Schuster, M.","Shazeer, N.","Wu, Y."],"year":2016,"bibtype":"unpublished","biburl":"https://www.shane.st/teaching/575/win20/MachineLearning-interpretability.bib","bibdata":{"bibtype":"unpublished","type":"unpublished","abstract":"In this work we explore recent advances in Recurrent Neural Networks for large scale Language Modeling, a task central to language understanding. We extend current models to deal with two key challenges present in this task: corpora and vocabulary sizes, and complex, long term structure of language. We perform an exhaustive study on techniques such as character Convolutional Neural Networks or Long-Short Term Memory, on the One Billion Word Benchmark. Our best single model significantly improves state-of-the-art perplexity from 51.3 down to 30.0 (whilst reducing the number of parameters by a factor of 20), while an ensemble of models sets a new record by improving perplexity from 41.0 down to 23.7. We also release these models for the NLP and ML community to study and improve upon.","archiveprefix":"arXiv","arxivid":"1602.02410","author":[{"propositions":[],"lastnames":["Jozefowicz"],"firstnames":["Rafal"],"suffixes":[]},{"propositions":[],"lastnames":["Vinyals"],"firstnames":["Oriol"],"suffixes":[]},{"propositions":[],"lastnames":["Schuster"],"firstnames":["Mike"],"suffixes":[]},{"propositions":[],"lastnames":["Shazeer"],"firstnames":["Noam"],"suffixes":[]},{"propositions":[],"lastnames":["Wu"],"firstnames":["Yonghui"],"suffixes":[]}],"eprint":"1602.02410","file":":Users/shanest/Documents/Library/Jozefowicz et al/Unknown/Jozefowicz et al. - 2016 - Exploring the Limits of Language Modeling.pdf:pdf","keywords":"model","title":"Exploring the Limits of Language Modeling","url":"https://arxiv.org/abs/1602.02410","year":"2016","bibtex":"@unpublished{Jozefowicz2016,\nabstract = {In this work we explore recent advances in Recurrent Neural Networks for large scale Language Modeling, a task central to language understanding. We extend current models to deal with two key challenges present in this task: corpora and vocabulary sizes, and complex, long term structure of language. We perform an exhaustive study on techniques such as character Convolutional Neural Networks or Long-Short Term Memory, on the One Billion Word Benchmark. Our best single model significantly improves state-of-the-art perplexity from 51.3 down to 30.0 (whilst reducing the number of parameters by a factor of 20), while an ensemble of models sets a new record by improving perplexity from 41.0 down to 23.7. We also release these models for the NLP and ML community to study and improve upon.},\narchivePrefix = {arXiv},\narxivId = {1602.02410},\nauthor = {Jozefowicz, Rafal and Vinyals, Oriol and Schuster, Mike and Shazeer, Noam and Wu, Yonghui},\neprint = {1602.02410},\nfile = {:Users/shanest/Documents/Library/Jozefowicz et al/Unknown/Jozefowicz et al. - 2016 - Exploring the Limits of Language Modeling.pdf:pdf},\nkeywords = {model},\ntitle = {{Exploring the Limits of Language Modeling}},\nurl = {https://arxiv.org/abs/1602.02410},\nyear = {2016}\n}\n","author_short":["Jozefowicz, R.","Vinyals, O.","Schuster, M.","Shazeer, N.","Wu, Y."],"key":"Jozefowicz2016","id":"Jozefowicz2016","bibbaseid":"jozefowicz-vinyals-schuster-shazeer-wu-exploringthelimitsoflanguagemodeling-2016","role":"author","urls":{"Paper":"https://arxiv.org/abs/1602.02410"},"keyword":["model"],"metadata":{"authorlinks":{}}},"search_terms":["exploring","limits","language","modeling","jozefowicz","vinyals","schuster","shazeer","wu"],"keywords":["model"],"authorIDs":[],"dataSources":["okYcdTpf4JJ2zkj7A"]}