Beyond KV caching: Shared attention for efficient LLMs

Beyond KV caching: Shared attention for efficient LLMs. Liao, B. & Vargas, D. V. Neurocomputing, 648:130587, 2025.

@article{DBLP:journals/ijon/LiaoV25,
  author       = {Bingli Liao and
                  Danilo Vasconcellos Vargas},
  title        = {Beyond {KV} caching: Shared attention for efficient LLMs},
  journal      = {Neurocomputing},
  volume       = {648},
  pages        = {130587},
  year         = {2025},
  url          = {https://doi.org/10.1016/j.neucom.2025.130587},
  doi          = {10.1016/J.NEUCOM.2025.130587},
  timestamp    = {Sat, 15 Nov 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ijon/LiaoV25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0

{"_id":"Tonno5dpixuxZRkHC","bibbaseid":"liao-vargas-beyondkvcachingsharedattentionforefficientllms-2025","author_short":["Liao, B.","Vargas, D. V."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Bingli"],"propositions":[],"lastnames":["Liao"],"suffixes":[]},{"firstnames":["Danilo","Vasconcellos"],"propositions":[],"lastnames":["Vargas"],"suffixes":[]}],"title":"Beyond KV caching: Shared attention for efficient LLMs","journal":"Neurocomputing","volume":"648","pages":"130587","year":"2025","url":"https://doi.org/10.1016/j.neucom.2025.130587","doi":"10.1016/J.NEUCOM.2025.130587","timestamp":"Sat, 15 Nov 2025 00:00:00 +0100","biburl":"https://dblp.org/rec/journals/ijon/LiaoV25.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/ijon/LiaoV25,\n author = {Bingli Liao and\n Danilo Vasconcellos Vargas},\n title = {Beyond {KV} caching: Shared attention for efficient LLMs},\n journal = {Neurocomputing},\n volume = {648},\n pages = {130587},\n year = {2025},\n url = {https://doi.org/10.1016/j.neucom.2025.130587},\n doi = {10.1016/J.NEUCOM.2025.130587},\n timestamp = {Sat, 15 Nov 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/ijon/LiaoV25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Liao, B.","Vargas, D. V."],"key":"DBLP:journals/ijon/LiaoV25","id":"DBLP:journals/ijon/LiaoV25","bibbaseid":"liao-vargas-beyondkvcachingsharedattentionforefficientllms-2025","role":"author","urls":{"Paper":"https://doi.org/10.1016/j.neucom.2025.130587"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://dblp.org/pid/40/9358.bib","dataSources":["pBz5FgvEoaZcWJkbr"],"keywords":[],"search_terms":["beyond","caching","shared","attention","efficient","llms","liao","vargas"],"title":"Beyond KV caching: Shared attention for efficient LLMs","year":2025}