AIBrix: Towards Scalable, Cost-Effective Large Language Model Inference Infrastructure. Shan, J., Gupta, V., Xu, L., Shi, H., Zhang, J., Wang, N., Xu, L., Kang, R., Liu, T., Zhang, Y., Zhu, Y., Jin, S., Lim, G., Chen, B., Chen, Z., Liu, X., Chen, X., Yin, K., Chung, C., Jiang, C., Lu, Y., Chen, J., Lin, C., Xiang, W., Shi, R., & Xie, L. CoRR, 2025.
Paper doi bibtex @article{DBLP:journals/corr/abs-2504-03648,
author = {Jiaxin Shan and
Varun Gupta and
Le Xu and
Haiyang Shi and
Jingyuan Zhang and
Ning Wang and
Linhui Xu and
Rong Kang and
Tongping Liu and
Yifei Zhang and
Yiqing Zhu and
Shuowei Jin and
Gangmuk Lim and
Binbin Chen and
Zuzhi Chen and
Xiao Liu and
Xin Chen and
Kante Yin and
Chak{-}Pong Chung and
Chenyu Jiang and
Yicheng Lu and
Jianjun Chen and
Caixue Lin and
Wu Xiang and
Rui Shi and
Liguang Xie},
title = {AIBrix: Towards Scalable, Cost-Effective Large Language Model Inference
Infrastructure},
journal = {CoRR},
volume = {abs/2504.03648},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2504.03648},
doi = {10.48550/ARXIV.2504.03648},
eprinttype = {arXiv},
eprint = {2504.03648},
timestamp = {Tue, 20 May 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2504-03648.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
Downloads: 0
{"_id":"QuqF63cK2v9DK3JYX","bibbaseid":"shan-gupta-xu-shi-zhang-wang-xu-kang-etal-aibrixtowardsscalablecosteffectivelargelanguagemodelinferenceinfrastructure-2025","author_short":["Shan, J.","Gupta, V.","Xu, L.","Shi, H.","Zhang, J.","Wang, N.","Xu, L.","Kang, R.","Liu, T.","Zhang, Y.","Zhu, Y.","Jin, S.","Lim, G.","Chen, B.","Chen, Z.","Liu, X.","Chen, X.","Yin, K.","Chung, C.","Jiang, C.","Lu, Y.","Chen, J.","Lin, C.","Xiang, W.","Shi, R.","Xie, L."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Jiaxin"],"propositions":[],"lastnames":["Shan"],"suffixes":[]},{"firstnames":["Varun"],"propositions":[],"lastnames":["Gupta"],"suffixes":[]},{"firstnames":["Le"],"propositions":[],"lastnames":["Xu"],"suffixes":[]},{"firstnames":["Haiyang"],"propositions":[],"lastnames":["Shi"],"suffixes":[]},{"firstnames":["Jingyuan"],"propositions":[],"lastnames":["Zhang"],"suffixes":[]},{"firstnames":["Ning"],"propositions":[],"lastnames":["Wang"],"suffixes":[]},{"firstnames":["Linhui"],"propositions":[],"lastnames":["Xu"],"suffixes":[]},{"firstnames":["Rong"],"propositions":[],"lastnames":["Kang"],"suffixes":[]},{"firstnames":["Tongping"],"propositions":[],"lastnames":["Liu"],"suffixes":[]},{"firstnames":["Yifei"],"propositions":[],"lastnames":["Zhang"],"suffixes":[]},{"firstnames":["Yiqing"],"propositions":[],"lastnames":["Zhu"],"suffixes":[]},{"firstnames":["Shuowei"],"propositions":[],"lastnames":["Jin"],"suffixes":[]},{"firstnames":["Gangmuk"],"propositions":[],"lastnames":["Lim"],"suffixes":[]},{"firstnames":["Binbin"],"propositions":[],"lastnames":["Chen"],"suffixes":[]},{"firstnames":["Zuzhi"],"propositions":[],"lastnames":["Chen"],"suffixes":[]},{"firstnames":["Xiao"],"propositions":[],"lastnames":["Liu"],"suffixes":[]},{"firstnames":["Xin"],"propositions":[],"lastnames":["Chen"],"suffixes":[]},{"firstnames":["Kante"],"propositions":[],"lastnames":["Yin"],"suffixes":[]},{"firstnames":["Chak-Pong"],"propositions":[],"lastnames":["Chung"],"suffixes":[]},{"firstnames":["Chenyu"],"propositions":[],"lastnames":["Jiang"],"suffixes":[]},{"firstnames":["Yicheng"],"propositions":[],"lastnames":["Lu"],"suffixes":[]},{"firstnames":["Jianjun"],"propositions":[],"lastnames":["Chen"],"suffixes":[]},{"firstnames":["Caixue"],"propositions":[],"lastnames":["Lin"],"suffixes":[]},{"firstnames":["Wu"],"propositions":[],"lastnames":["Xiang"],"suffixes":[]},{"firstnames":["Rui"],"propositions":[],"lastnames":["Shi"],"suffixes":[]},{"firstnames":["Liguang"],"propositions":[],"lastnames":["Xie"],"suffixes":[]}],"title":"AIBrix: Towards Scalable, Cost-Effective Large Language Model Inference Infrastructure","journal":"CoRR","volume":"abs/2504.03648","year":"2025","url":"https://doi.org/10.48550/arXiv.2504.03648","doi":"10.48550/ARXIV.2504.03648","eprinttype":"arXiv","eprint":"2504.03648","timestamp":"Tue, 20 May 2025 01:00:00 +0200","biburl":"https://dblp.org/rec/journals/corr/abs-2504-03648.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/corr/abs-2504-03648,\n author = {Jiaxin Shan and\n Varun Gupta and\n Le Xu and\n Haiyang Shi and\n Jingyuan Zhang and\n Ning Wang and\n Linhui Xu and\n Rong Kang and\n Tongping Liu and\n Yifei Zhang and\n Yiqing Zhu and\n Shuowei Jin and\n Gangmuk Lim and\n Binbin Chen and\n Zuzhi Chen and\n Xiao Liu and\n Xin Chen and\n Kante Yin and\n Chak{-}Pong Chung and\n Chenyu Jiang and\n Yicheng Lu and\n Jianjun Chen and\n Caixue Lin and\n Wu Xiang and\n Rui Shi and\n Liguang Xie},\n title = {AIBrix: Towards Scalable, Cost-Effective Large Language Model Inference\n Infrastructure},\n journal = {CoRR},\n volume = {abs/2504.03648},\n year = {2025},\n url = {https://doi.org/10.48550/arXiv.2504.03648},\n doi = {10.48550/ARXIV.2504.03648},\n eprinttype = {arXiv},\n eprint = {2504.03648},\n timestamp = {Tue, 20 May 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/corr/abs-2504-03648.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Shan, J.","Gupta, V.","Xu, L.","Shi, H.","Zhang, J.","Wang, N.","Xu, L.","Kang, R.","Liu, T.","Zhang, Y.","Zhu, Y.","Jin, S.","Lim, G.","Chen, B.","Chen, Z.","Liu, X.","Chen, X.","Yin, K.","Chung, C.","Jiang, C.","Lu, Y.","Chen, J.","Lin, C.","Xiang, W.","Shi, R.","Xie, L."],"key":"DBLP:journals/corr/abs-2504-03648","id":"DBLP:journals/corr/abs-2504-03648","bibbaseid":"shan-gupta-xu-shi-zhang-wang-xu-kang-etal-aibrixtowardsscalablecosteffectivelargelanguagemodelinferenceinfrastructure-2025","role":"author","urls":{"Paper":"https://doi.org/10.48550/arXiv.2504.03648"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://dblp.org/pid/207/3468.bib","dataSources":["7ecbLnRSGBTHkhiqm"],"keywords":[],"search_terms":["aibrix","towards","scalable","cost","effective","large","language","model","inference","infrastructure","shan","gupta","xu","shi","zhang","wang","xu","kang","liu","zhang","zhu","jin","lim","chen","chen","liu","chen","yin","chung","jiang","lu","chen","lin","xiang","shi","xie"],"title":"AIBrix: Towards Scalable, Cost-Effective Large Language Model Inference Infrastructure","year":2025}