Beyond KV caching: Shared attention for efficient LLMs. Liao, B. & Vargas, D. V. Neurocomputing, 648:130587, 2025.
Beyond KV caching: Shared attention for efficient LLMs [link]Paper  doi  bibtex   
@article{DBLP:journals/ijon/LiaoV25,
  author       = {Bingli Liao and
                  Danilo Vasconcellos Vargas},
  title        = {Beyond {KV} caching: Shared attention for efficient LLMs},
  journal      = {Neurocomputing},
  volume       = {648},
  pages        = {130587},
  year         = {2025},
  url          = {https://doi.org/10.1016/j.neucom.2025.130587},
  doi          = {10.1016/J.NEUCOM.2025.130587},
  timestamp    = {Sun, 29 Jun 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijon/LiaoV25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0