No Buffer, No Bottleneck: Efficient Zero-Copy KV Cache Offloading for Long-Context LLMs. Luo, S & Shen, H. In The 20th USENIX Symposium on Operating Systems Design and Implementation (OSDI), Seattle, WA, July, 2026. bibtex @inproceedings{luo_no_2026,
address = {Seattle, WA},
title = {No {Buffer}, {No} {Bottleneck}: {Efficient} {Zero}-{Copy} {KV} {Cache} {Offloading} for {Long}-{Context} {LLMs}},
booktitle = {The 20th {USENIX} {Symposium} on {Operating} {Systems} {Design} and {Implementation} ({OSDI})},
author = {Luo, S and Shen, Haiying},
month = jul,
year = {2026},
keywords = {Foundational, SYS: CosmicAI Contact Author, WG: Explorable},
}
Downloads: 0
{"_id":"AhhrLw9g9x24YNcER","bibbaseid":"luo-shen-nobuffernobottleneckefficientzerocopykvcacheoffloadingforlongcontextllms-2026","author_short":["Luo, S","Shen, H."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","address":"Seattle, WA","title":"No Buffer, No Bottleneck: Efficient Zero-Copy KV Cache Offloading for Long-Context LLMs","booktitle":"The 20th USENIX Symposium on Operating Systems Design and Implementation (OSDI)","author":[{"propositions":[],"lastnames":["Luo"],"firstnames":["S"],"suffixes":[]},{"propositions":[],"lastnames":["Shen"],"firstnames":["Haiying"],"suffixes":[]}],"month":"July","year":"2026","keywords":"Foundational, SYS: CosmicAI Contact Author, WG: Explorable","bibtex":"@inproceedings{luo_no_2026,\n\taddress = {Seattle, WA},\n\ttitle = {No {Buffer}, {No} {Bottleneck}: {Efficient} {Zero}-{Copy} {KV} {Cache} {Offloading} for {Long}-{Context} {LLMs}},\n\tbooktitle = {The 20th {USENIX} {Symposium} on {Operating} {Systems} {Design} and {Implementation} ({OSDI})},\n\tauthor = {Luo, S and Shen, Haiying},\n\tmonth = jul,\n\tyear = {2026},\n\tkeywords = {Foundational, SYS: CosmicAI Contact Author, WG: Explorable},\n}\n\n\n\n","author_short":["Luo, S","Shen, H."],"key":"luo_no_2026","id":"luo_no_2026","bibbaseid":"luo-shen-nobuffernobottleneckefficientzerocopykvcacheoffloadingforlongcontextllms-2026","role":"author","urls":{},"keyword":["Foundational","SYS: CosmicAI Contact Author","WG: Explorable"],"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"https://bibbase.org/zotero-group/pratikmhatre/5933976","dataSources":["yJr5AAtJ5Sz3Q4WT4"],"keywords":["foundational","sys: cosmicai contact author","wg: explorable"],"search_terms":["buffer","bottleneck","efficient","zero","copy","cache","offloading","long","context","llms","luo","shen"],"title":"No Buffer, No Bottleneck: Efficient Zero-Copy KV Cache Offloading for Long-Context LLMs","year":2026}