Efficiently Scaling Transformer Inference. Pope, R., Douglas, S., Chowdhery, A., Devlin, J., Bradbury, J., Levskaya, A., Heek, J., Xiao, K., Agrawal, S., & Dean, J. CoRR, 2022.
Efficiently Scaling Transformer Inference [link]Paper  doi  bibtex   
@article{DBLP:journals/corr/abs-2211-05102,
  author       = {Reiner Pope and
                  Sholto Douglas and
                  Aakanksha Chowdhery and
                  Jacob Devlin and
                  James Bradbury and
                  Anselm Levskaya and
                  Jonathan Heek and
                  Kefan Xiao and
                  Shivani Agrawal and
                  Jeff Dean},
  title        = {Efficiently Scaling Transformer Inference},
  journal      = {CoRR},
  volume       = {abs/2211.05102},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2211.05102},
  doi          = {10.48550/ARXIV.2211.05102},
  eprinttype    = {arXiv},
  eprint       = {2211.05102},
  timestamp    = {Tue, 15 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2211-05102.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0