Training Compute-Optimal Large Language Models. Hoffmann, J., Borgeaud, S., Mensch, A., Buchatskaya, E., Cai, T., Rutherford, E., de Las Casas, D., Hendricks, L. A., Welbl, J., Clark, A., Hennigan, T., Noland, E., Millican, K., van den Driessche, G., Damoc, B., Guy, A., Osindero, S., Simonyan, K., Elsen, E., Rae, J. W., Vinyals, O., & Sifre, L. CoRR, 2022.
Paper doi bibtex @article{DBLP:journals/corr/abs-2203-15556,
author = {Jordan Hoffmann and
Sebastian Borgeaud and
Arthur Mensch and
Elena Buchatskaya and
Trevor Cai and
Eliza Rutherford and
Diego de Las Casas and
Lisa Anne Hendricks and
Johannes Welbl and
Aidan Clark and
Tom Hennigan and
Eric Noland and
Katie Millican and
George van den Driessche and
Bogdan Damoc and
Aurelia Guy and
Simon Osindero and
Karen Simonyan and
Erich Elsen and
Jack W. Rae and
Oriol Vinyals and
Laurent Sifre},
title = {Training Compute-Optimal Large Language Models},
journal = {CoRR},
volume = {abs/2203.15556},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2203.15556},
doi = {10.48550/ARXIV.2203.15556},
eprinttype = {arXiv},
eprint = {2203.15556},
timestamp = {Sun, 19 Jan 2025 13:42:19 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2203-15556.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
Downloads: 0
{"_id":"nrGt64u4kh9A9yYsk","bibbaseid":"hoffmann-borgeaud-mensch-buchatskaya-cai-rutherford-delascasas-hendricks-etal-trainingcomputeoptimallargelanguagemodels-2022","author_short":["Hoffmann, J.","Borgeaud, S.","Mensch, A.","Buchatskaya, E.","Cai, T.","Rutherford, E.","de Las Casas, D.","Hendricks, L. A.","Welbl, J.","Clark, A.","Hennigan, T.","Noland, E.","Millican, K.","van den Driessche, G.","Damoc, B.","Guy, A.","Osindero, S.","Simonyan, K.","Elsen, E.","Rae, J. W.","Vinyals, O.","Sifre, L."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Jordan"],"propositions":[],"lastnames":["Hoffmann"],"suffixes":[]},{"firstnames":["Sebastian"],"propositions":[],"lastnames":["Borgeaud"],"suffixes":[]},{"firstnames":["Arthur"],"propositions":[],"lastnames":["Mensch"],"suffixes":[]},{"firstnames":["Elena"],"propositions":[],"lastnames":["Buchatskaya"],"suffixes":[]},{"firstnames":["Trevor"],"propositions":[],"lastnames":["Cai"],"suffixes":[]},{"firstnames":["Eliza"],"propositions":[],"lastnames":["Rutherford"],"suffixes":[]},{"firstnames":["Diego"],"propositions":["de"],"lastnames":["Las","Casas"],"suffixes":[]},{"firstnames":["Lisa","Anne"],"propositions":[],"lastnames":["Hendricks"],"suffixes":[]},{"firstnames":["Johannes"],"propositions":[],"lastnames":["Welbl"],"suffixes":[]},{"firstnames":["Aidan"],"propositions":[],"lastnames":["Clark"],"suffixes":[]},{"firstnames":["Tom"],"propositions":[],"lastnames":["Hennigan"],"suffixes":[]},{"firstnames":["Eric"],"propositions":[],"lastnames":["Noland"],"suffixes":[]},{"firstnames":["Katie"],"propositions":[],"lastnames":["Millican"],"suffixes":[]},{"firstnames":["George"],"propositions":["van","den"],"lastnames":["Driessche"],"suffixes":[]},{"firstnames":["Bogdan"],"propositions":[],"lastnames":["Damoc"],"suffixes":[]},{"firstnames":["Aurelia"],"propositions":[],"lastnames":["Guy"],"suffixes":[]},{"firstnames":["Simon"],"propositions":[],"lastnames":["Osindero"],"suffixes":[]},{"firstnames":["Karen"],"propositions":[],"lastnames":["Simonyan"],"suffixes":[]},{"firstnames":["Erich"],"propositions":[],"lastnames":["Elsen"],"suffixes":[]},{"firstnames":["Jack","W."],"propositions":[],"lastnames":["Rae"],"suffixes":[]},{"firstnames":["Oriol"],"propositions":[],"lastnames":["Vinyals"],"suffixes":[]},{"firstnames":["Laurent"],"propositions":[],"lastnames":["Sifre"],"suffixes":[]}],"title":"Training Compute-Optimal Large Language Models","journal":"CoRR","volume":"abs/2203.15556","year":"2022","url":"https://doi.org/10.48550/arXiv.2203.15556","doi":"10.48550/ARXIV.2203.15556","eprinttype":"arXiv","eprint":"2203.15556","timestamp":"Sun, 19 Jan 2025 13:42:19 +0100","biburl":"https://dblp.org/rec/journals/corr/abs-2203-15556.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/corr/abs-2203-15556,\n author = {Jordan Hoffmann and\n Sebastian Borgeaud and\n Arthur Mensch and\n Elena Buchatskaya and\n Trevor Cai and\n Eliza Rutherford and\n Diego de Las Casas and\n Lisa Anne Hendricks and\n Johannes Welbl and\n Aidan Clark and\n Tom Hennigan and\n Eric Noland and\n Katie Millican and\n George van den Driessche and\n Bogdan Damoc and\n Aurelia Guy and\n Simon Osindero and\n Karen Simonyan and\n Erich Elsen and\n Jack W. Rae and\n Oriol Vinyals and\n Laurent Sifre},\n title = {Training Compute-Optimal Large Language Models},\n journal = {CoRR},\n volume = {abs/2203.15556},\n year = {2022},\n url = {https://doi.org/10.48550/arXiv.2203.15556},\n doi = {10.48550/ARXIV.2203.15556},\n eprinttype = {arXiv},\n eprint = {2203.15556},\n timestamp = {Sun, 19 Jan 2025 13:42:19 +0100},\n biburl = {https://dblp.org/rec/journals/corr/abs-2203-15556.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Hoffmann, J.","Borgeaud, S.","Mensch, A.","Buchatskaya, E.","Cai, T.","Rutherford, E.","de Las Casas, D.","Hendricks, L. A.","Welbl, J.","Clark, A.","Hennigan, T.","Noland, E.","Millican, K.","van den Driessche, G.","Damoc, B.","Guy, A.","Osindero, S.","Simonyan, K.","Elsen, E.","Rae, J. W.","Vinyals, O.","Sifre, L."],"key":"DBLP:journals/corr/abs-2203-15556","id":"DBLP:journals/corr/abs-2203-15556","bibbaseid":"hoffmann-borgeaud-mensch-buchatskaya-cai-rutherford-delascasas-hendricks-etal-trainingcomputeoptimallargelanguagemodels-2022","role":"author","urls":{"Paper":"https://doi.org/10.48550/arXiv.2203.15556"},"metadata":{"authorlinks":{}},"downloads":0,"html":""},"bibtype":"article","biburl":"https://bibbase.org/network/files/vr3BBX7hq2kArHnuq","dataSources":["3gTBYW5YxtNcnhN2g"],"keywords":[],"search_terms":["training","compute","optimal","large","language","models","hoffmann","borgeaud","mensch","buchatskaya","cai","rutherford","de las casas","hendricks","welbl","clark","hennigan","noland","millican","van den driessche","damoc","guy","osindero","simonyan","elsen","rae","vinyals","sifre"],"title":"Training Compute-Optimal Large Language Models","year":2022}