E.T.: re-thinking self-attention for transformer models on GPUs

E.T.: re-thinking self-attention for transformer models on GPUs. Chen, S., Huang, S., Pandey, S., Li, B., Gao, G. R., Zheng, L., Ding, C., & Liu, H. In de Supinski, B. R., Hall, M. W., & Gamblin, T., editors, International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2021, St. Louis, Missouri, USA, November 14-19, 2021, pages 25, 2021. ACM.

Paper doi bibtex

@inproceedings{DBLP:conf/sc/ChenHPLG0D021,
  author       = {Shiyang Chen and
                  Shaoyi Huang and
                  Santosh Pandey and
                  Bingbing Li and
                  Guang R. Gao and
                  Long Zheng and
                  Caiwen Ding and
                  Hang Liu},
  editor       = {Bronis R. de Supinski and
                  Mary W. Hall and
                  Todd Gamblin},
  title        = {{E.T.:} re-thinking self-attention for transformer models on GPUs},
  booktitle    = {International Conference for High Performance Computing, Networking,
                  Storage and Analysis, {SC} 2021, St. Louis, Missouri, USA, November
                  14-19, 2021},
  pages        = {25},
  publisher    = {{ACM}},
  year         = {2021},
  url          = {https://doi.org/10.1145/3458817.3476138},
  doi          = {10.1145/3458817.3476138},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/ChenHPLG0D021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0

{"_id":"Jp8xLkdPCsy226mz7","bibbaseid":"chen-huang-pandey-li-gao-zheng-ding-liu-etrethinkingselfattentionfortransformermodelsongpus-2021","author_short":["Chen, S.","Huang, S.","Pandey, S.","Li, B.","Gao, G. R.","Zheng, L.","Ding, C.","Liu, H."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","author":[{"firstnames":["Shiyang"],"propositions":[],"lastnames":["Chen"],"suffixes":[]},{"firstnames":["Shaoyi"],"propositions":[],"lastnames":["Huang"],"suffixes":[]},{"firstnames":["Santosh"],"propositions":[],"lastnames":["Pandey"],"suffixes":[]},{"firstnames":["Bingbing"],"propositions":[],"lastnames":["Li"],"suffixes":[]},{"firstnames":["Guang","R."],"propositions":[],"lastnames":["Gao"],"suffixes":[]},{"firstnames":["Long"],"propositions":[],"lastnames":["Zheng"],"suffixes":[]},{"firstnames":["Caiwen"],"propositions":[],"lastnames":["Ding"],"suffixes":[]},{"firstnames":["Hang"],"propositions":[],"lastnames":["Liu"],"suffixes":[]}],"editor":[{"firstnames":["Bronis","R."],"propositions":["de"],"lastnames":["Supinski"],"suffixes":[]},{"firstnames":["Mary","W."],"propositions":[],"lastnames":["Hall"],"suffixes":[]},{"firstnames":["Todd"],"propositions":[],"lastnames":["Gamblin"],"suffixes":[]}],"title":"E.T.: re-thinking self-attention for transformer models on GPUs","booktitle":"International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2021, St. Louis, Missouri, USA, November 14-19, 2021","pages":"25","publisher":"ACM","year":"2021","url":"https://doi.org/10.1145/3458817.3476138","doi":"10.1145/3458817.3476138","timestamp":"Mon, 05 Feb 2024 00:00:00 +0100","biburl":"https://dblp.org/rec/conf/sc/ChenHPLG0D021.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@inproceedings{DBLP:conf/sc/ChenHPLG0D021,\n author = {Shiyang Chen and\n Shaoyi Huang and\n Santosh Pandey and\n Bingbing Li and\n Guang R. Gao and\n Long Zheng and\n Caiwen Ding and\n Hang Liu},\n editor = {Bronis R. de Supinski and\n Mary W. Hall and\n Todd Gamblin},\n title = {{E.T.:} re-thinking self-attention for transformer models on GPUs},\n booktitle = {International Conference for High Performance Computing, Networking,\n Storage and Analysis, {SC} 2021, St. Louis, Missouri, USA, November\n 14-19, 2021},\n pages = {25},\n publisher = {{ACM}},\n year = {2021},\n url = {https://doi.org/10.1145/3458817.3476138},\n doi = {10.1145/3458817.3476138},\n timestamp = {Mon, 05 Feb 2024 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/sc/ChenHPLG0D021.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Chen, S.","Huang, S.","Pandey, S.","Li, B.","Gao, G. R.","Zheng, L.","Ding, C.","Liu, H."],"editor_short":["de Supinski, B. R.","Hall, M. W.","Gamblin, T."],"key":"DBLP:conf/sc/ChenHPLG0D021","id":"DBLP:conf/sc/ChenHPLG0D021","bibbaseid":"chen-huang-pandey-li-gao-zheng-ding-liu-etrethinkingselfattentionfortransformermodelsongpus-2021","role":"author","urls":{"Paper":"https://doi.org/10.1145/3458817.3476138"},"metadata":{"authorlinks":{}},"downloads":0,"html":""},"bibtype":"inproceedings","biburl":"https://dblp.org/pid/82/3121.bib","dataSources":["s8HYpuJPT7hBtu2Qa"],"keywords":[],"search_terms":["thinking","self","attention","transformer","models","gpus","chen","huang","pandey","li","gao","zheng","ding","liu"],"title":"E.T.: re-thinking self-attention for transformer models on GPUs","year":2021}