Training Compute-Optimal Large Language Models. Hoffmann, J., Borgeaud, S., Mensch, A., Buchatskaya, E., Cai, T., Rutherford, E., de Las Casas, D., Hendricks, L. A., Welbl, J., Clark, A., Hennigan, T., Noland, E., Millican, K., van den Driessche, G., Damoc, B., Guy, A., Osindero, S., Simonyan, K., Elsen, E., Rae, J. W., Vinyals, O., & Sifre, L. CoRR, 2022.
Training Compute-Optimal Large Language Models [link]Paper  doi  bibtex   
@article{DBLP:journals/corr/abs-2203-15556,
  author       = {Jordan Hoffmann and
                  Sebastian Borgeaud and
                  Arthur Mensch and
                  Elena Buchatskaya and
                  Trevor Cai and
                  Eliza Rutherford and
                  Diego de Las Casas and
                  Lisa Anne Hendricks and
                  Johannes Welbl and
                  Aidan Clark and
                  Tom Hennigan and
                  Eric Noland and
                  Katie Millican and
                  George van den Driessche and
                  Bogdan Damoc and
                  Aurelia Guy and
                  Simon Osindero and
                  Karen Simonyan and
                  Erich Elsen and
                  Jack W. Rae and
                  Oriol Vinyals and
                  Laurent Sifre},
  title        = {Training Compute-Optimal Large Language Models},
  journal      = {CoRR},
  volume       = {abs/2203.15556},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2203.15556},
  doi          = {10.48550/ARXIV.2203.15556},
  eprinttype    = {arXiv},
  eprint       = {2203.15556},
  timestamp    = {Sun, 19 Jan 2025 13:42:19 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2203-15556.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0