Beyond KV Caching: Shared Attention for Efficient LLMs

Beyond KV Caching: Shared Attention for Efficient LLMs. Liao, B. & Vargas, D. V. CoRR, 2024.

@article{DBLP:journals/corr/abs-2407-12866,
  author       = {Bingli Liao and
                  Danilo Vasconcellos Vargas},
  title        = {Beyond {KV} Caching: Shared Attention for Efficient LLMs},
  journal      = {CoRR},
  volume       = {abs/2407.12866},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2407.12866},
  doi          = {10.48550/ARXIV.2407.12866},
  eprinttype   = {arXiv},
  eprint       = {2407.12866},
  timestamp    = {Thu, 22 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2407-12866.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0

{"_id":"nwTtJdquieHNtg5wE","bibbaseid":"liao-vargas-beyondkvcachingsharedattentionforefficientllms-2024","author_short":["Liao, B.","Vargas, D. V."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Bingli"],"propositions":[],"lastnames":["Liao"],"suffixes":[]},{"firstnames":["Danilo","Vasconcellos"],"propositions":[],"lastnames":["Vargas"],"suffixes":[]}],"title":"Beyond KV Caching: Shared Attention for Efficient LLMs","journal":"CoRR","volume":"abs/2407.12866","year":"2024","url":"https://doi.org/10.48550/arXiv.2407.12866","doi":"10.48550/ARXIV.2407.12866","eprinttype":"arXiv","eprint":"2407.12866","timestamp":"Thu, 22 Aug 2024 01:00:00 +0200","biburl":"https://dblp.org/rec/journals/corr/abs-2407-12866.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/corr/abs-2407-12866,\n author = {Bingli Liao and\n Danilo Vasconcellos Vargas},\n title = {Beyond {KV} Caching: Shared Attention for Efficient LLMs},\n journal = {CoRR},\n volume = {abs/2407.12866},\n year = {2024},\n url = {https://doi.org/10.48550/arXiv.2407.12866},\n doi = {10.48550/ARXIV.2407.12866},\n eprinttype = {arXiv},\n eprint = {2407.12866},\n timestamp = {Thu, 22 Aug 2024 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/corr/abs-2407-12866.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Liao, B.","Vargas, D. V."],"key":"DBLP:journals/corr/abs-2407-12866","id":"DBLP:journals/corr/abs-2407-12866","bibbaseid":"liao-vargas-beyondkvcachingsharedattentionforefficientllms-2024","role":"author","urls":{"Paper":"https://doi.org/10.48550/arXiv.2407.12866"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://dblp.org/pid/40/9358.bib","dataSources":["pBz5FgvEoaZcWJkbr"],"keywords":[],"search_terms":["beyond","caching","shared","attention","efficient","llms","liao","vargas"],"title":"Beyond KV Caching: Shared Attention for Efficient LLMs","year":2024}