Beyond KV caching: Shared attention for efficient LLMs. Liao, B. & Vargas, D. V. Neurocomputing, 648:130587, 2025.
Paper doi bibtex @article{DBLP:journals/ijon/LiaoV25,
author = {Bingli Liao and
Danilo Vasconcellos Vargas},
title = {Beyond {KV} caching: Shared attention for efficient LLMs},
journal = {Neurocomputing},
volume = {648},
pages = {130587},
year = {2025},
url = {https://doi.org/10.1016/j.neucom.2025.130587},
doi = {10.1016/J.NEUCOM.2025.130587},
timestamp = {Sun, 29 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/ijon/LiaoV25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
Downloads: 0
{"_id":"ddP3HB9AwnaE4ndNj","bibbaseid":"liao-vargas-beyondkvcachingsharedattentionforefficientllms-2025","author_short":["Liao, B.","Vargas, D. V."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Bingli"],"propositions":[],"lastnames":["Liao"],"suffixes":[]},{"firstnames":["Danilo","Vasconcellos"],"propositions":[],"lastnames":["Vargas"],"suffixes":[]}],"title":"Beyond KV caching: Shared attention for efficient LLMs","journal":"Neurocomputing","volume":"648","pages":"130587","year":"2025","url":"https://doi.org/10.1016/j.neucom.2025.130587","doi":"10.1016/J.NEUCOM.2025.130587","timestamp":"Sun, 29 Jun 2025 01:00:00 +0200","biburl":"https://dblp.org/rec/journals/ijon/LiaoV25.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/ijon/LiaoV25,\n author = {Bingli Liao and\n Danilo Vasconcellos Vargas},\n title = {Beyond {KV} caching: Shared attention for efficient LLMs},\n journal = {Neurocomputing},\n volume = {648},\n pages = {130587},\n year = {2025},\n url = {https://doi.org/10.1016/j.neucom.2025.130587},\n doi = {10.1016/J.NEUCOM.2025.130587},\n timestamp = {Sun, 29 Jun 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/ijon/LiaoV25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Liao, B.","Vargas, D. V."],"key":"DBLP:journals/ijon/LiaoV25","id":"DBLP:journals/ijon/LiaoV25","bibbaseid":"liao-vargas-beyondkvcachingsharedattentionforefficientllms-2025","role":"author","urls":{"Paper":"https://doi.org/10.1016/j.neucom.2025.130587"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://dblp.org/pid/40/9358.bib","dataSources":["pBz5FgvEoaZcWJkbr"],"keywords":[],"search_terms":["beyond","caching","shared","attention","efficient","llms","liao","vargas"],"title":"Beyond KV caching: Shared attention for efficient LLMs","year":2025}