AIBrix: Towards Scalable, Cost-Effective Large Language Model Inference Infrastructure. Shan, J., Gupta, V., Xu, L., Shi, H., Zhang, J., Wang, N., Xu, L., Kang, R., Liu, T., Zhang, Y., Zhu, Y., Jin, S., Lim, G., Chen, B., Chen, Z., Liu, X., Chen, X., Yin, K., Chung, C., Jiang, C., Lu, Y., Chen, J., Lin, C., Xiang, W., Shi, R., & Xie, L. CoRR, 2025.
AIBrix: Towards Scalable, Cost-Effective Large Language Model Inference Infrastructure [link]Paper  doi  bibtex   
@article{DBLP:journals/corr/abs-2504-03648,
  author       = {Jiaxin Shan and
                  Varun Gupta and
                  Le Xu and
                  Haiyang Shi and
                  Jingyuan Zhang and
                  Ning Wang and
                  Linhui Xu and
                  Rong Kang and
                  Tongping Liu and
                  Yifei Zhang and
                  Yiqing Zhu and
                  Shuowei Jin and
                  Gangmuk Lim and
                  Binbin Chen and
                  Zuzhi Chen and
                  Xiao Liu and
                  Xin Chen and
                  Kante Yin and
                  Chak{-}Pong Chung and
                  Chenyu Jiang and
                  Yicheng Lu and
                  Jianjun Chen and
                  Caixue Lin and
                  Wu Xiang and
                  Rui Shi and
                  Liguang Xie},
  title        = {AIBrix: Towards Scalable, Cost-Effective Large Language Model Inference
                  Infrastructure},
  journal      = {CoRR},
  volume       = {abs/2504.03648},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2504.03648},
  doi          = {10.48550/ARXIV.2504.03648},
  eprinttype    = {arXiv},
  eprint       = {2504.03648},
  timestamp    = {Tue, 20 May 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2504-03648.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0