Beyond Tokens: Semantic-Aware Speculative Decoding for Efficient Inference by Probing Internal States. Dong, X., Wang, S., Lin, D., Chen, B., & Hassan, A. E. CoRR, 2026.
Paper doi bibtex @article{DBLP:journals/corr/abs-2602-03708,
author = {Ximing Dong and
Shaowei Wang and
Dayi Lin and
Boyuan Chen and
Ahmed E. Hassan},
title = {Beyond Tokens: Semantic-Aware Speculative Decoding for Efficient Inference
by Probing Internal States},
journal = {CoRR},
volume = {abs/2602.03708},
year = {2026},
url = {https://doi.org/10.48550/arXiv.2602.03708},
doi = {10.48550/ARXIV.2602.03708},
eprinttype = {arXiv},
eprint = {2602.03708},
timestamp = {Fri, 13 Mar 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2602-03708.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
Downloads: 0
{"_id":"wzh56obmh4csiMDtg","bibbaseid":"dong-wang-lin-chen-hassan-beyondtokenssemanticawarespeculativedecodingforefficientinferencebyprobinginternalstates-2026","author_short":["Dong, X.","Wang, S.","Lin, D.","Chen, B.","Hassan, A. E."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Ximing"],"propositions":[],"lastnames":["Dong"],"suffixes":[]},{"firstnames":["Shaowei"],"propositions":[],"lastnames":["Wang"],"suffixes":[]},{"firstnames":["Dayi"],"propositions":[],"lastnames":["Lin"],"suffixes":[]},{"firstnames":["Boyuan"],"propositions":[],"lastnames":["Chen"],"suffixes":[]},{"firstnames":["Ahmed","E."],"propositions":[],"lastnames":["Hassan"],"suffixes":[]}],"title":"Beyond Tokens: Semantic-Aware Speculative Decoding for Efficient Inference by Probing Internal States","journal":"CoRR","volume":"abs/2602.03708","year":"2026","url":"https://doi.org/10.48550/arXiv.2602.03708","doi":"10.48550/ARXIV.2602.03708","eprinttype":"arXiv","eprint":"2602.03708","timestamp":"Fri, 13 Mar 2026 00:00:00 +0100","biburl":"https://dblp.org/rec/journals/corr/abs-2602-03708.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/corr/abs-2602-03708,\n author = {Ximing Dong and\n Shaowei Wang and\n Dayi Lin and\n Boyuan Chen and\n Ahmed E. Hassan},\n title = {Beyond Tokens: Semantic-Aware Speculative Decoding for Efficient Inference\n by Probing Internal States},\n journal = {CoRR},\n volume = {abs/2602.03708},\n year = {2026},\n url = {https://doi.org/10.48550/arXiv.2602.03708},\n doi = {10.48550/ARXIV.2602.03708},\n eprinttype = {arXiv},\n eprint = {2602.03708},\n timestamp = {Fri, 13 Mar 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/corr/abs-2602-03708.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Dong, X.","Wang, S.","Lin, D.","Chen, B.","Hassan, A. E."],"key":"DBLP:journals/corr/abs-2602-03708","id":"DBLP:journals/corr/abs-2602-03708","bibbaseid":"dong-wang-lin-chen-hassan-beyondtokenssemanticawarespeculativedecodingforefficientinferencebyprobinginternalstates-2026","role":"author","urls":{"Paper":"https://doi.org/10.48550/arXiv.2602.03708"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://dblp.org/pid/187/9420.bib","dataSources":["FLwtLJyrjR3ZC72vq"],"keywords":[],"search_terms":["beyond","tokens","semantic","aware","speculative","decoding","efficient","inference","probing","internal","states","dong","wang","lin","chen","hassan"],"title":"Beyond Tokens: Semantic-Aware Speculative Decoding for Efficient Inference by Probing Internal States","year":2026}