{"_id":"9wk73N3JBGWnjscP9","bibbaseid":"pope-douglas-chowdhery-devlin-bradbury-levskaya-heek-xiao-etal-efficientlyscalingtransformerinference-2022","author_short":["Pope, R.","Douglas, S.","Chowdhery, A.","Devlin, J.","Bradbury, J.","Levskaya, A.","Heek, J.","Xiao, K.","Agrawal, S.","Dean, J."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Reiner"],"propositions":[],"lastnames":["Pope"],"suffixes":[]},{"firstnames":["Sholto"],"propositions":[],"lastnames":["Douglas"],"suffixes":[]},{"firstnames":["Aakanksha"],"propositions":[],"lastnames":["Chowdhery"],"suffixes":[]},{"firstnames":["Jacob"],"propositions":[],"lastnames":["Devlin"],"suffixes":[]},{"firstnames":["James"],"propositions":[],"lastnames":["Bradbury"],"suffixes":[]},{"firstnames":["Anselm"],"propositions":[],"lastnames":["Levskaya"],"suffixes":[]},{"firstnames":["Jonathan"],"propositions":[],"lastnames":["Heek"],"suffixes":[]},{"firstnames":["Kefan"],"propositions":[],"lastnames":["Xiao"],"suffixes":[]},{"firstnames":["Shivani"],"propositions":[],"lastnames":["Agrawal"],"suffixes":[]},{"firstnames":["Jeff"],"propositions":[],"lastnames":["Dean"],"suffixes":[]}],"title":"Efficiently Scaling Transformer Inference","journal":"CoRR","volume":"abs/2211.05102","year":"2022","url":"https://doi.org/10.48550/arXiv.2211.05102","doi":"10.48550/ARXIV.2211.05102","eprinttype":"arXiv","eprint":"2211.05102","timestamp":"Tue, 15 Nov 2022 00:00:00 +0100","biburl":"https://dblp.org/rec/journals/corr/abs-2211-05102.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/corr/abs-2211-05102,\n author = {Reiner Pope and\n Sholto Douglas and\n Aakanksha Chowdhery and\n Jacob Devlin and\n James Bradbury and\n Anselm Levskaya and\n Jonathan Heek and\n Kefan Xiao and\n Shivani Agrawal and\n Jeff Dean},\n title = {Efficiently Scaling Transformer Inference},\n journal = {CoRR},\n volume = {abs/2211.05102},\n year = {2022},\n url = {https://doi.org/10.48550/arXiv.2211.05102},\n doi = {10.48550/ARXIV.2211.05102},\n eprinttype = {arXiv},\n eprint = {2211.05102},\n timestamp = {Tue, 15 Nov 2022 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/corr/abs-2211-05102.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Pope, R.","Douglas, S.","Chowdhery, A.","Devlin, J.","Bradbury, J.","Levskaya, A.","Heek, J.","Xiao, K.","Agrawal, S.","Dean, J."],"key":"DBLP:journals/corr/abs-2211-05102","id":"DBLP:journals/corr/abs-2211-05102","bibbaseid":"pope-douglas-chowdhery-devlin-bradbury-levskaya-heek-xiao-etal-efficientlyscalingtransformerinference-2022","role":"author","urls":{"Paper":"https://doi.org/10.48550/arXiv.2211.05102"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"http://dblp.org/pers/tb2/d/Dean:Jeffrey","dataSources":["rMEZSGRpGTSfEZaHM"],"keywords":[],"search_terms":["efficiently","scaling","transformer","inference","pope","douglas","chowdhery","devlin","bradbury","levskaya","heek","xiao","agrawal","dean"],"title":"Efficiently Scaling Transformer Inference","year":2022}