A\(^\mbox2\)ATS: Retrieval-Based KV Cache Reduction via Windowed Rotary Position Embedding and Query-Aware Vector Quantization. He, J., Xing, J., Wang, N., Xu, R., Wu, S., Zhou, P., Liu, Q., Xue, C. J., & Li, Q. CoRR, 2025.
Paper doi bibtex @article{DBLP:journals/corr/abs-2502-12665,
author = {Junhui He and
Junna Xing and
Nan Wang and
Rui Xu and
Shangyu Wu and
Peng Zhou and
Qiang Liu and
Chun Jason Xue and
Qingan Li},
title = {A\({}^{\mbox{2}}\)ATS: Retrieval-Based {KV} Cache Reduction via Windowed
Rotary Position Embedding and Query-Aware Vector Quantization},
journal = {CoRR},
volume = {abs/2502.12665},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2502.12665},
doi = {10.48550/ARXIV.2502.12665},
eprinttype = {arXiv},
eprint = {2502.12665},
timestamp = {Tue, 01 Apr 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2502-12665.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
Downloads: 0
{"_id":"5D4TMQzMKvjcey3MF","bibbaseid":"he-xing-wang-xu-wu-zhou-liu-xue-etal-ambox2atsretrievalbasedkvcachereductionviawindowedrotarypositionembeddingandqueryawarevectorquantization-2025","author_short":["He, J.","Xing, J.","Wang, N.","Xu, R.","Wu, S.","Zhou, P.","Liu, Q.","Xue, C. J.","Li, Q."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Junhui"],"propositions":[],"lastnames":["He"],"suffixes":[]},{"firstnames":["Junna"],"propositions":[],"lastnames":["Xing"],"suffixes":[]},{"firstnames":["Nan"],"propositions":[],"lastnames":["Wang"],"suffixes":[]},{"firstnames":["Rui"],"propositions":[],"lastnames":["Xu"],"suffixes":[]},{"firstnames":["Shangyu"],"propositions":[],"lastnames":["Wu"],"suffixes":[]},{"firstnames":["Peng"],"propositions":[],"lastnames":["Zhou"],"suffixes":[]},{"firstnames":["Qiang"],"propositions":[],"lastnames":["Liu"],"suffixes":[]},{"firstnames":["Chun","Jason"],"propositions":[],"lastnames":["Xue"],"suffixes":[]},{"firstnames":["Qingan"],"propositions":[],"lastnames":["Li"],"suffixes":[]}],"title":"A\\(^\\mbox2\\)ATS: Retrieval-Based KV Cache Reduction via Windowed Rotary Position Embedding and Query-Aware Vector Quantization","journal":"CoRR","volume":"abs/2502.12665","year":"2025","url":"https://doi.org/10.48550/arXiv.2502.12665","doi":"10.48550/ARXIV.2502.12665","eprinttype":"arXiv","eprint":"2502.12665","timestamp":"Tue, 01 Apr 2025 01:00:00 +0200","biburl":"https://dblp.org/rec/journals/corr/abs-2502-12665.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/corr/abs-2502-12665,\n author = {Junhui He and\n Junna Xing and\n Nan Wang and\n Rui Xu and\n Shangyu Wu and\n Peng Zhou and\n Qiang Liu and\n Chun Jason Xue and\n Qingan Li},\n title = {A\\({}^{\\mbox{2}}\\)ATS: Retrieval-Based {KV} Cache Reduction via Windowed\n Rotary Position Embedding and Query-Aware Vector Quantization},\n journal = {CoRR},\n volume = {abs/2502.12665},\n year = {2025},\n url = {https://doi.org/10.48550/arXiv.2502.12665},\n doi = {10.48550/ARXIV.2502.12665},\n eprinttype = {arXiv},\n eprint = {2502.12665},\n timestamp = {Tue, 01 Apr 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/corr/abs-2502-12665.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["He, J.","Xing, J.","Wang, N.","Xu, R.","Wu, S.","Zhou, P.","Liu, Q.","Xue, C. J.","Li, Q."],"key":"DBLP:journals/corr/abs-2502-12665","id":"DBLP:journals/corr/abs-2502-12665","bibbaseid":"he-xing-wang-xu-wu-zhou-liu-xue-etal-ambox2atsretrievalbasedkvcachereductionviawindowedrotarypositionembeddingandqueryawarevectorquantization-2025","role":"author","urls":{"paper":"https://doi.org/10.48550/arXiv.2502.12665"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://dblp.org/pid/x/ChunJasonXue.bib","dataSources":["Zmgr4hiaLbNyiBeRm"],"keywords":[],"search_terms":["mbox2","ats","retrieval","based","cache","reduction","via","windowed","rotary","position","embedding","query","aware","vector","quantization","he","xing","wang","xu","wu","zhou","liu","xue","li"],"title":"A\\(^\\mbox2\\)ATS: Retrieval-Based KV Cache Reduction via Windowed Rotary Position Embedding and Query-Aware Vector Quantization","year":2025}