DeepSeek-V3 Technical Report. DeepSeek-AI, Liu, A., Feng, B., Xue, B., Wang, B., Wu, B., Lu, C., Zhao, C., Deng, C., Zhang, C., Ruan, C., Dai, D., Guo, D., Yang, D., Chen, D., Ji, D., Li, E., Lin, F., Dai, F., Luo, F., Hao, G., Chen, G., Li, G., Zhang, H., Bao, H., Xu, H., Wang, H., Zhang, H., Ding, H., Xin, H., Gao, H., Li, H., Qu, H., Cai, J. L., Liang, J., Guo, J., Ni, J., Li, J., Wang, J., Chen, J., Chen, J., Yuan, J., Qiu, J., Li, J., Song, J., Dong, K., Hu, K., Gao, K., Guan, K., Huang, K., Yu, K., Wang, L., Zhang, L., Xu, L., Xia, L., Zhao, L., Wang, L., Zhang, L., Li, M., Wang, M., Zhang, M., Zhang, M., Tang, M., Li, M., Tian, N., Huang, P., Wang, P., Zhang, P., Wang, Q., Zhu, Q., Chen, Q., Du, Q., Chen, R. J., Jin, R. L., Ge, R., Zhang, R., Pan, R., Wang, R., Xu, R., Zhang, R., Chen, R., Li, S. S., Lu, S., Zhou, S., Chen, S., Wu, S., Ye, S., Ye, S., Ma, S., Wang, S., Zhou, S., Yu, S., Zhou, S., Pan, S., Wang, T., Yun, T., Pei, T., Sun, T., Xiao, W. L., Zeng, W., Zhao, W., An, W., Liu, W., Liang, W., Gao, W., Yu, W., Zhang, W., Li, X. Q., Jin, X., Wang, X., Bi, X., Liu, X., Wang, X., Shen, X., Chen, X., Zhang, X., Chen, X., Nie, X., Sun, X., Wang, X., Cheng, X., Liu, X., Xie, X., Liu, X., Yu, X., Song, X., Shan, X., Zhou, X., Yang, X., Li, X., Su, X., Lin, X., Li, Y. K., Wang, Y. Q., Wei, Y. X., Zhu, Y. X., Zhang, Y., Xu, Y., Xu, Y., Huang, Y., Li, Y., Zhao, Y., Sun, Y., Li, Y., Wang, Y., Yu, Y., Zheng, Y., Zhang, Y., Shi, Y., Xiong, Y., He, Y., Tang, Y., Piao, Y., Wang, Y., Tan, Y., Ma, Y., Liu, Y., Guo, Y., Wu, Y., Ou, Y., Zhu, Y., Wang, Y., Gong, Y., Zou, Y., He, Y., Zha, Y., Xiong, Y., Ma, Y., Yan, Y., Luo, Y., You, Y., Liu, Y., Zhou, Y., Wu, Z. F., Ren, Z. Z., Ren, Z., Sha, Z., Fu, Z., Xu, Z., Huang, Z., Zhang, Z., Xie, Z., Zhang, Z., Hao, Z., Gou, Z., Ma, Z., Yan, Z., Shao, Z., Xu, Z., Wu, Z., Zhang, Z., Li, Z., Gu, Z., Zhu, Z., Liu, Z., Li, Z., Xie, Z., Song, Z., Gao, Z., & Pan, Z. December, 2024. arXiv:2412.19437 [cs] version: 1
Paper doi abstract bibtex We present DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. To achieve efficient inference and cost-effective training, DeepSeek-V3 adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures, which were thoroughly validated in DeepSeek-V2. Furthermore, DeepSeek-V3 pioneers an auxiliary-loss-free strategy for load balancing and sets a multi-token prediction training objective for stronger performance. We pre-train DeepSeek-V3 on 14.8 trillion diverse and high-quality tokens, followed by Supervised Fine-Tuning and Reinforcement Learning stages to fully harness its capabilities. Comprehensive evaluations reveal that DeepSeek-V3 outperforms other open-source models and achieves performance comparable to leading closed-source models. Despite its excellent performance, DeepSeek-V3 requires only 2.788M H800 GPU hours for its full training. In addition, its training process is remarkably stable. Throughout the entire training process, we did not experience any irrecoverable loss spikes or perform any rollbacks. The model checkpoints are available at https://github.com/deepseek-ai/DeepSeek-V3.
@misc{deepseek-ai_deepseek-v3_2024,
title = {{DeepSeek}-{V3} {Technical} {Report}},
url = {http://arxiv.org/abs/2412.19437},
doi = {10.48550/arXiv.2412.19437},
abstract = {We present DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. To achieve efficient inference and cost-effective training, DeepSeek-V3 adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures, which were thoroughly validated in DeepSeek-V2. Furthermore, DeepSeek-V3 pioneers an auxiliary-loss-free strategy for load balancing and sets a multi-token prediction training objective for stronger performance. We pre-train DeepSeek-V3 on 14.8 trillion diverse and high-quality tokens, followed by Supervised Fine-Tuning and Reinforcement Learning stages to fully harness its capabilities. Comprehensive evaluations reveal that DeepSeek-V3 outperforms other open-source models and achieves performance comparable to leading closed-source models. Despite its excellent performance, DeepSeek-V3 requires only 2.788M H800 GPU hours for its full training. In addition, its training process is remarkably stable. Throughout the entire training process, we did not experience any irrecoverable loss spikes or perform any rollbacks. The model checkpoints are available at https://github.com/deepseek-ai/DeepSeek-V3.},
urldate = {2025-02-03},
publisher = {arXiv},
author = {DeepSeek-AI and Liu, Aixin and Feng, Bei and Xue, Bing and Wang, Bingxuan and Wu, Bochao and Lu, Chengda and Zhao, Chenggang and Deng, Chengqi and Zhang, Chenyu and Ruan, Chong and Dai, Damai and Guo, Daya and Yang, Dejian and Chen, Deli and Ji, Dongjie and Li, Erhang and Lin, Fangyun and Dai, Fucong and Luo, Fuli and Hao, Guangbo and Chen, Guanting and Li, Guowei and Zhang, H. and Bao, Han and Xu, Hanwei and Wang, Haocheng and Zhang, Haowei and Ding, Honghui and Xin, Huajian and Gao, Huazuo and Li, Hui and Qu, Hui and Cai, J. L. and Liang, Jian and Guo, Jianzhong and Ni, Jiaqi and Li, Jiashi and Wang, Jiawei and Chen, Jin and Chen, Jingchang and Yuan, Jingyang and Qiu, Junjie and Li, Junlong and Song, Junxiao and Dong, Kai and Hu, Kai and Gao, Kaige and Guan, Kang and Huang, Kexin and Yu, Kuai and Wang, Lean and Zhang, Lecong and Xu, Lei and Xia, Leyi and Zhao, Liang and Wang, Litong and Zhang, Liyue and Li, Meng and Wang, Miaojun and Zhang, Mingchuan and Zhang, Minghua and Tang, Minghui and Li, Mingming and Tian, Ning and Huang, Panpan and Wang, Peiyi and Zhang, Peng and Wang, Qiancheng and Zhu, Qihao and Chen, Qinyu and Du, Qiushi and Chen, R. J. and Jin, R. L. and Ge, Ruiqi and Zhang, Ruisong and Pan, Ruizhe and Wang, Runji and Xu, Runxin and Zhang, Ruoyu and Chen, Ruyi and Li, S. S. and Lu, Shanghao and Zhou, Shangyan and Chen, Shanhuang and Wu, Shaoqing and Ye, Shengfeng and Ye, Shengfeng and Ma, Shirong and Wang, Shiyu and Zhou, Shuang and Yu, Shuiping and Zhou, Shunfeng and Pan, Shuting and Wang, T. and Yun, Tao and Pei, Tian and Sun, Tianyu and Xiao, W. L. and Zeng, Wangding and Zhao, Wanjia and An, Wei and Liu, Wen and Liang, Wenfeng and Gao, Wenjun and Yu, Wenqin and Zhang, Wentao and Li, X. Q. and Jin, Xiangyue and Wang, Xianzu and Bi, Xiao and Liu, Xiaodong and Wang, Xiaohan and Shen, Xiaojin and Chen, Xiaokang and Zhang, Xiaokang and Chen, Xiaosha and Nie, Xiaotao and Sun, Xiaowen and Wang, Xiaoxiang and Cheng, Xin and Liu, Xin and Xie, Xin and Liu, Xingchao and Yu, Xingkai and Song, Xinnan and Shan, Xinxia and Zhou, Xinyi and Yang, Xinyu and Li, Xinyuan and Su, Xuecheng and Lin, Xuheng and Li, Y. K. and Wang, Y. Q. and Wei, Y. X. and Zhu, Y. X. and Zhang, Yang and Xu, Yanhong and Xu, Yanhong and Huang, Yanping and Li, Yao and Zhao, Yao and Sun, Yaofeng and Li, Yaohui and Wang, Yaohui and Yu, Yi and Zheng, Yi and Zhang, Yichao and Shi, Yifan and Xiong, Yiliang and He, Ying and Tang, Ying and Piao, Yishi and Wang, Yisong and Tan, Yixuan and Ma, Yiyang and Liu, Yiyuan and Guo, Yongqiang and Wu, Yu and Ou, Yuan and Zhu, Yuchen and Wang, Yuduan and Gong, Yue and Zou, Yuheng and He, Yujia and Zha, Yukun and Xiong, Yunfan and Ma, Yunxian and Yan, Yuting and Luo, Yuxiang and You, Yuxiang and Liu, Yuxuan and Zhou, Yuyang and Wu, Z. F. and Ren, Z. Z. and Ren, Zehui and Sha, Zhangli and Fu, Zhe and Xu, Zhean and Huang, Zhen and Zhang, Zhen and Xie, Zhenda and Zhang, Zhengyan and Hao, Zhewen and Gou, Zhibin and Ma, Zhicheng and Yan, Zhigang and Shao, Zhihong and Xu, Zhipeng and Wu, Zhiyu and Zhang, Zhongyu and Li, Zhuoshu and Gu, Zihui and Zhu, Zijia and Liu, Zijun and Li, Zilin and Xie, Ziwei and Song, Ziyang and Gao, Ziyi and Pan, Zizheng},
month = dec,
year = {2024},
note = {arXiv:2412.19437 [cs]
version: 1},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language},
}
Downloads: 0
{"_id":"c7cAo3LMd7GAracHj","bibbaseid":"deepseekai-liu-feng-xue-wang-wu-lu-zhao-etal-deepseekv3technicalreport-2024","author_short":["DeepSeek-AI","Liu, A.","Feng, B.","Xue, B.","Wang, B.","Wu, B.","Lu, C.","Zhao, C.","Deng, C.","Zhang, C.","Ruan, C.","Dai, D.","Guo, D.","Yang, D.","Chen, D.","Ji, D.","Li, E.","Lin, F.","Dai, F.","Luo, F.","Hao, G.","Chen, G.","Li, G.","Zhang, H.","Bao, H.","Xu, H.","Wang, H.","Zhang, H.","Ding, H.","Xin, H.","Gao, H.","Li, H.","Qu, H.","Cai, J. L.","Liang, J.","Guo, J.","Ni, J.","Li, J.","Wang, J.","Chen, J.","Chen, J.","Yuan, J.","Qiu, J.","Li, J.","Song, J.","Dong, K.","Hu, K.","Gao, K.","Guan, K.","Huang, K.","Yu, K.","Wang, L.","Zhang, L.","Xu, L.","Xia, L.","Zhao, L.","Wang, L.","Zhang, L.","Li, M.","Wang, M.","Zhang, M.","Zhang, M.","Tang, M.","Li, M.","Tian, N.","Huang, P.","Wang, P.","Zhang, P.","Wang, Q.","Zhu, Q.","Chen, Q.","Du, Q.","Chen, R. J.","Jin, R. L.","Ge, R.","Zhang, R.","Pan, R.","Wang, R.","Xu, R.","Zhang, R.","Chen, R.","Li, S. S.","Lu, S.","Zhou, S.","Chen, S.","Wu, S.","Ye, S.","Ye, S.","Ma, S.","Wang, S.","Zhou, S.","Yu, S.","Zhou, S.","Pan, S.","Wang, T.","Yun, T.","Pei, T.","Sun, T.","Xiao, W. L.","Zeng, W.","Zhao, W.","An, W.","Liu, W.","Liang, W.","Gao, W.","Yu, W.","Zhang, W.","Li, X. Q.","Jin, X.","Wang, X.","Bi, X.","Liu, X.","Wang, X.","Shen, X.","Chen, X.","Zhang, X.","Chen, X.","Nie, X.","Sun, X.","Wang, X.","Cheng, X.","Liu, X.","Xie, X.","Liu, X.","Yu, X.","Song, X.","Shan, X.","Zhou, X.","Yang, X.","Li, X.","Su, X.","Lin, X.","Li, Y. K.","Wang, Y. Q.","Wei, Y. X.","Zhu, Y. X.","Zhang, Y.","Xu, Y.","Xu, Y.","Huang, Y.","Li, Y.","Zhao, Y.","Sun, Y.","Li, Y.","Wang, Y.","Yu, Y.","Zheng, Y.","Zhang, Y.","Shi, Y.","Xiong, Y.","He, Y.","Tang, Y.","Piao, Y.","Wang, Y.","Tan, Y.","Ma, Y.","Liu, Y.","Guo, Y.","Wu, Y.","Ou, Y.","Zhu, Y.","Wang, Y.","Gong, Y.","Zou, Y.","He, Y.","Zha, Y.","Xiong, Y.","Ma, Y.","Yan, Y.","Luo, Y.","You, Y.","Liu, Y.","Zhou, Y.","Wu, Z. F.","Ren, Z. Z.","Ren, Z.","Sha, Z.","Fu, Z.","Xu, Z.","Huang, Z.","Zhang, Z.","Xie, Z.","Zhang, Z.","Hao, Z.","Gou, Z.","Ma, Z.","Yan, Z.","Shao, Z.","Xu, Z.","Wu, Z.","Zhang, Z.","Li, Z.","Gu, Z.","Zhu, Z.","Liu, Z.","Li, Z.","Xie, Z.","Song, Z.","Gao, Z.","Pan, Z."],"bibdata":{"bibtype":"misc","type":"misc","title":"DeepSeek-V3 Technical Report","url":"http://arxiv.org/abs/2412.19437","doi":"10.48550/arXiv.2412.19437","abstract":"We present DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. To achieve efficient inference and cost-effective training, DeepSeek-V3 adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures, which were thoroughly validated in DeepSeek-V2. Furthermore, DeepSeek-V3 pioneers an auxiliary-loss-free strategy for load balancing and sets a multi-token prediction training objective for stronger performance. We pre-train DeepSeek-V3 on 14.8 trillion diverse and high-quality tokens, followed by Supervised Fine-Tuning and Reinforcement Learning stages to fully harness its capabilities. Comprehensive evaluations reveal that DeepSeek-V3 outperforms other open-source models and achieves performance comparable to leading closed-source models. Despite its excellent performance, DeepSeek-V3 requires only 2.788M H800 GPU hours for its full training. In addition, its training process is remarkably stable. Throughout the entire training process, we did not experience any irrecoverable loss spikes or perform any rollbacks. The model checkpoints are available at https://github.com/deepseek-ai/DeepSeek-V3.","urldate":"2025-02-03","publisher":"arXiv","author":[{"firstnames":[],"propositions":[],"lastnames":["DeepSeek-AI"],"suffixes":[]},{"propositions":[],"lastnames":["Liu"],"firstnames":["Aixin"],"suffixes":[]},{"propositions":[],"lastnames":["Feng"],"firstnames":["Bei"],"suffixes":[]},{"propositions":[],"lastnames":["Xue"],"firstnames":["Bing"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Bingxuan"],"suffixes":[]},{"propositions":[],"lastnames":["Wu"],"firstnames":["Bochao"],"suffixes":[]},{"propositions":[],"lastnames":["Lu"],"firstnames":["Chengda"],"suffixes":[]},{"propositions":[],"lastnames":["Zhao"],"firstnames":["Chenggang"],"suffixes":[]},{"propositions":[],"lastnames":["Deng"],"firstnames":["Chengqi"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Chenyu"],"suffixes":[]},{"propositions":[],"lastnames":["Ruan"],"firstnames":["Chong"],"suffixes":[]},{"propositions":[],"lastnames":["Dai"],"firstnames":["Damai"],"suffixes":[]},{"propositions":[],"lastnames":["Guo"],"firstnames":["Daya"],"suffixes":[]},{"propositions":[],"lastnames":["Yang"],"firstnames":["Dejian"],"suffixes":[]},{"propositions":[],"lastnames":["Chen"],"firstnames":["Deli"],"suffixes":[]},{"propositions":[],"lastnames":["Ji"],"firstnames":["Dongjie"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["Erhang"],"suffixes":[]},{"propositions":[],"lastnames":["Lin"],"firstnames":["Fangyun"],"suffixes":[]},{"propositions":[],"lastnames":["Dai"],"firstnames":["Fucong"],"suffixes":[]},{"propositions":[],"lastnames":["Luo"],"firstnames":["Fuli"],"suffixes":[]},{"propositions":[],"lastnames":["Hao"],"firstnames":["Guangbo"],"suffixes":[]},{"propositions":[],"lastnames":["Chen"],"firstnames":["Guanting"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["Guowei"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["H."],"suffixes":[]},{"propositions":[],"lastnames":["Bao"],"firstnames":["Han"],"suffixes":[]},{"propositions":[],"lastnames":["Xu"],"firstnames":["Hanwei"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Haocheng"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Haowei"],"suffixes":[]},{"propositions":[],"lastnames":["Ding"],"firstnames":["Honghui"],"suffixes":[]},{"propositions":[],"lastnames":["Xin"],"firstnames":["Huajian"],"suffixes":[]},{"propositions":[],"lastnames":["Gao"],"firstnames":["Huazuo"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["Hui"],"suffixes":[]},{"propositions":[],"lastnames":["Qu"],"firstnames":["Hui"],"suffixes":[]},{"propositions":[],"lastnames":["Cai"],"firstnames":["J.","L."],"suffixes":[]},{"propositions":[],"lastnames":["Liang"],"firstnames":["Jian"],"suffixes":[]},{"propositions":[],"lastnames":["Guo"],"firstnames":["Jianzhong"],"suffixes":[]},{"propositions":[],"lastnames":["Ni"],"firstnames":["Jiaqi"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["Jiashi"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Jiawei"],"suffixes":[]},{"propositions":[],"lastnames":["Chen"],"firstnames":["Jin"],"suffixes":[]},{"propositions":[],"lastnames":["Chen"],"firstnames":["Jingchang"],"suffixes":[]},{"propositions":[],"lastnames":["Yuan"],"firstnames":["Jingyang"],"suffixes":[]},{"propositions":[],"lastnames":["Qiu"],"firstnames":["Junjie"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["Junlong"],"suffixes":[]},{"propositions":[],"lastnames":["Song"],"firstnames":["Junxiao"],"suffixes":[]},{"propositions":[],"lastnames":["Dong"],"firstnames":["Kai"],"suffixes":[]},{"propositions":[],"lastnames":["Hu"],"firstnames":["Kai"],"suffixes":[]},{"propositions":[],"lastnames":["Gao"],"firstnames":["Kaige"],"suffixes":[]},{"propositions":[],"lastnames":["Guan"],"firstnames":["Kang"],"suffixes":[]},{"propositions":[],"lastnames":["Huang"],"firstnames":["Kexin"],"suffixes":[]},{"propositions":[],"lastnames":["Yu"],"firstnames":["Kuai"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Lean"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Lecong"],"suffixes":[]},{"propositions":[],"lastnames":["Xu"],"firstnames":["Lei"],"suffixes":[]},{"propositions":[],"lastnames":["Xia"],"firstnames":["Leyi"],"suffixes":[]},{"propositions":[],"lastnames":["Zhao"],"firstnames":["Liang"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Litong"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Liyue"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["Meng"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Miaojun"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Mingchuan"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Minghua"],"suffixes":[]},{"propositions":[],"lastnames":["Tang"],"firstnames":["Minghui"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["Mingming"],"suffixes":[]},{"propositions":[],"lastnames":["Tian"],"firstnames":["Ning"],"suffixes":[]},{"propositions":[],"lastnames":["Huang"],"firstnames":["Panpan"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Peiyi"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Peng"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Qiancheng"],"suffixes":[]},{"propositions":[],"lastnames":["Zhu"],"firstnames":["Qihao"],"suffixes":[]},{"propositions":[],"lastnames":["Chen"],"firstnames":["Qinyu"],"suffixes":[]},{"propositions":[],"lastnames":["Du"],"firstnames":["Qiushi"],"suffixes":[]},{"propositions":[],"lastnames":["Chen"],"firstnames":["R.","J."],"suffixes":[]},{"propositions":[],"lastnames":["Jin"],"firstnames":["R.","L."],"suffixes":[]},{"propositions":[],"lastnames":["Ge"],"firstnames":["Ruiqi"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Ruisong"],"suffixes":[]},{"propositions":[],"lastnames":["Pan"],"firstnames":["Ruizhe"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Runji"],"suffixes":[]},{"propositions":[],"lastnames":["Xu"],"firstnames":["Runxin"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Ruoyu"],"suffixes":[]},{"propositions":[],"lastnames":["Chen"],"firstnames":["Ruyi"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["S.","S."],"suffixes":[]},{"propositions":[],"lastnames":["Lu"],"firstnames":["Shanghao"],"suffixes":[]},{"propositions":[],"lastnames":["Zhou"],"firstnames":["Shangyan"],"suffixes":[]},{"propositions":[],"lastnames":["Chen"],"firstnames":["Shanhuang"],"suffixes":[]},{"propositions":[],"lastnames":["Wu"],"firstnames":["Shaoqing"],"suffixes":[]},{"propositions":[],"lastnames":["Ye"],"firstnames":["Shengfeng"],"suffixes":[]},{"propositions":[],"lastnames":["Ye"],"firstnames":["Shengfeng"],"suffixes":[]},{"propositions":[],"lastnames":["Ma"],"firstnames":["Shirong"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Shiyu"],"suffixes":[]},{"propositions":[],"lastnames":["Zhou"],"firstnames":["Shuang"],"suffixes":[]},{"propositions":[],"lastnames":["Yu"],"firstnames":["Shuiping"],"suffixes":[]},{"propositions":[],"lastnames":["Zhou"],"firstnames":["Shunfeng"],"suffixes":[]},{"propositions":[],"lastnames":["Pan"],"firstnames":["Shuting"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["T."],"suffixes":[]},{"propositions":[],"lastnames":["Yun"],"firstnames":["Tao"],"suffixes":[]},{"propositions":[],"lastnames":["Pei"],"firstnames":["Tian"],"suffixes":[]},{"propositions":[],"lastnames":["Sun"],"firstnames":["Tianyu"],"suffixes":[]},{"propositions":[],"lastnames":["Xiao"],"firstnames":["W.","L."],"suffixes":[]},{"propositions":[],"lastnames":["Zeng"],"firstnames":["Wangding"],"suffixes":[]},{"propositions":[],"lastnames":["Zhao"],"firstnames":["Wanjia"],"suffixes":[]},{"propositions":[],"lastnames":["An"],"firstnames":["Wei"],"suffixes":[]},{"propositions":[],"lastnames":["Liu"],"firstnames":["Wen"],"suffixes":[]},{"propositions":[],"lastnames":["Liang"],"firstnames":["Wenfeng"],"suffixes":[]},{"propositions":[],"lastnames":["Gao"],"firstnames":["Wenjun"],"suffixes":[]},{"propositions":[],"lastnames":["Yu"],"firstnames":["Wenqin"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Wentao"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["X.","Q."],"suffixes":[]},{"propositions":[],"lastnames":["Jin"],"firstnames":["Xiangyue"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Xianzu"],"suffixes":[]},{"propositions":[],"lastnames":["Bi"],"firstnames":["Xiao"],"suffixes":[]},{"propositions":[],"lastnames":["Liu"],"firstnames":["Xiaodong"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Xiaohan"],"suffixes":[]},{"propositions":[],"lastnames":["Shen"],"firstnames":["Xiaojin"],"suffixes":[]},{"propositions":[],"lastnames":["Chen"],"firstnames":["Xiaokang"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Xiaokang"],"suffixes":[]},{"propositions":[],"lastnames":["Chen"],"firstnames":["Xiaosha"],"suffixes":[]},{"propositions":[],"lastnames":["Nie"],"firstnames":["Xiaotao"],"suffixes":[]},{"propositions":[],"lastnames":["Sun"],"firstnames":["Xiaowen"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Xiaoxiang"],"suffixes":[]},{"propositions":[],"lastnames":["Cheng"],"firstnames":["Xin"],"suffixes":[]},{"propositions":[],"lastnames":["Liu"],"firstnames":["Xin"],"suffixes":[]},{"propositions":[],"lastnames":["Xie"],"firstnames":["Xin"],"suffixes":[]},{"propositions":[],"lastnames":["Liu"],"firstnames":["Xingchao"],"suffixes":[]},{"propositions":[],"lastnames":["Yu"],"firstnames":["Xingkai"],"suffixes":[]},{"propositions":[],"lastnames":["Song"],"firstnames":["Xinnan"],"suffixes":[]},{"propositions":[],"lastnames":["Shan"],"firstnames":["Xinxia"],"suffixes":[]},{"propositions":[],"lastnames":["Zhou"],"firstnames":["Xinyi"],"suffixes":[]},{"propositions":[],"lastnames":["Yang"],"firstnames":["Xinyu"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["Xinyuan"],"suffixes":[]},{"propositions":[],"lastnames":["Su"],"firstnames":["Xuecheng"],"suffixes":[]},{"propositions":[],"lastnames":["Lin"],"firstnames":["Xuheng"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["Y.","K."],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Y.","Q."],"suffixes":[]},{"propositions":[],"lastnames":["Wei"],"firstnames":["Y.","X."],"suffixes":[]},{"propositions":[],"lastnames":["Zhu"],"firstnames":["Y.","X."],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Yang"],"suffixes":[]},{"propositions":[],"lastnames":["Xu"],"firstnames":["Yanhong"],"suffixes":[]},{"propositions":[],"lastnames":["Xu"],"firstnames":["Yanhong"],"suffixes":[]},{"propositions":[],"lastnames":["Huang"],"firstnames":["Yanping"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["Yao"],"suffixes":[]},{"propositions":[],"lastnames":["Zhao"],"firstnames":["Yao"],"suffixes":[]},{"propositions":[],"lastnames":["Sun"],"firstnames":["Yaofeng"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["Yaohui"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Yaohui"],"suffixes":[]},{"propositions":[],"lastnames":["Yu"],"firstnames":["Yi"],"suffixes":[]},{"propositions":[],"lastnames":["Zheng"],"firstnames":["Yi"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Yichao"],"suffixes":[]},{"propositions":[],"lastnames":["Shi"],"firstnames":["Yifan"],"suffixes":[]},{"propositions":[],"lastnames":["Xiong"],"firstnames":["Yiliang"],"suffixes":[]},{"propositions":[],"lastnames":["He"],"firstnames":["Ying"],"suffixes":[]},{"propositions":[],"lastnames":["Tang"],"firstnames":["Ying"],"suffixes":[]},{"propositions":[],"lastnames":["Piao"],"firstnames":["Yishi"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Yisong"],"suffixes":[]},{"propositions":[],"lastnames":["Tan"],"firstnames":["Yixuan"],"suffixes":[]},{"propositions":[],"lastnames":["Ma"],"firstnames":["Yiyang"],"suffixes":[]},{"propositions":[],"lastnames":["Liu"],"firstnames":["Yiyuan"],"suffixes":[]},{"propositions":[],"lastnames":["Guo"],"firstnames":["Yongqiang"],"suffixes":[]},{"propositions":[],"lastnames":["Wu"],"firstnames":["Yu"],"suffixes":[]},{"propositions":[],"lastnames":["Ou"],"firstnames":["Yuan"],"suffixes":[]},{"propositions":[],"lastnames":["Zhu"],"firstnames":["Yuchen"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Yuduan"],"suffixes":[]},{"propositions":[],"lastnames":["Gong"],"firstnames":["Yue"],"suffixes":[]},{"propositions":[],"lastnames":["Zou"],"firstnames":["Yuheng"],"suffixes":[]},{"propositions":[],"lastnames":["He"],"firstnames":["Yujia"],"suffixes":[]},{"propositions":[],"lastnames":["Zha"],"firstnames":["Yukun"],"suffixes":[]},{"propositions":[],"lastnames":["Xiong"],"firstnames":["Yunfan"],"suffixes":[]},{"propositions":[],"lastnames":["Ma"],"firstnames":["Yunxian"],"suffixes":[]},{"propositions":[],"lastnames":["Yan"],"firstnames":["Yuting"],"suffixes":[]},{"propositions":[],"lastnames":["Luo"],"firstnames":["Yuxiang"],"suffixes":[]},{"propositions":[],"lastnames":["You"],"firstnames":["Yuxiang"],"suffixes":[]},{"propositions":[],"lastnames":["Liu"],"firstnames":["Yuxuan"],"suffixes":[]},{"propositions":[],"lastnames":["Zhou"],"firstnames":["Yuyang"],"suffixes":[]},{"propositions":[],"lastnames":["Wu"],"firstnames":["Z.","F."],"suffixes":[]},{"propositions":[],"lastnames":["Ren"],"firstnames":["Z.","Z."],"suffixes":[]},{"propositions":[],"lastnames":["Ren"],"firstnames":["Zehui"],"suffixes":[]},{"propositions":[],"lastnames":["Sha"],"firstnames":["Zhangli"],"suffixes":[]},{"propositions":[],"lastnames":["Fu"],"firstnames":["Zhe"],"suffixes":[]},{"propositions":[],"lastnames":["Xu"],"firstnames":["Zhean"],"suffixes":[]},{"propositions":[],"lastnames":["Huang"],"firstnames":["Zhen"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Zhen"],"suffixes":[]},{"propositions":[],"lastnames":["Xie"],"firstnames":["Zhenda"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Zhengyan"],"suffixes":[]},{"propositions":[],"lastnames":["Hao"],"firstnames":["Zhewen"],"suffixes":[]},{"propositions":[],"lastnames":["Gou"],"firstnames":["Zhibin"],"suffixes":[]},{"propositions":[],"lastnames":["Ma"],"firstnames":["Zhicheng"],"suffixes":[]},{"propositions":[],"lastnames":["Yan"],"firstnames":["Zhigang"],"suffixes":[]},{"propositions":[],"lastnames":["Shao"],"firstnames":["Zhihong"],"suffixes":[]},{"propositions":[],"lastnames":["Xu"],"firstnames":["Zhipeng"],"suffixes":[]},{"propositions":[],"lastnames":["Wu"],"firstnames":["Zhiyu"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Zhongyu"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["Zhuoshu"],"suffixes":[]},{"propositions":[],"lastnames":["Gu"],"firstnames":["Zihui"],"suffixes":[]},{"propositions":[],"lastnames":["Zhu"],"firstnames":["Zijia"],"suffixes":[]},{"propositions":[],"lastnames":["Liu"],"firstnames":["Zijun"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["Zilin"],"suffixes":[]},{"propositions":[],"lastnames":["Xie"],"firstnames":["Ziwei"],"suffixes":[]},{"propositions":[],"lastnames":["Song"],"firstnames":["Ziyang"],"suffixes":[]},{"propositions":[],"lastnames":["Gao"],"firstnames":["Ziyi"],"suffixes":[]},{"propositions":[],"lastnames":["Pan"],"firstnames":["Zizheng"],"suffixes":[]}],"month":"December","year":"2024","note":"arXiv:2412.19437 [cs] version: 1","keywords":"Computer Science - Artificial Intelligence, Computer Science - Computation and Language","bibtex":"@misc{deepseek-ai_deepseek-v3_2024,\n\ttitle = {{DeepSeek}-{V3} {Technical} {Report}},\n\turl = {http://arxiv.org/abs/2412.19437},\n\tdoi = {10.48550/arXiv.2412.19437},\n\tabstract = {We present DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. To achieve efficient inference and cost-effective training, DeepSeek-V3 adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures, which were thoroughly validated in DeepSeek-V2. Furthermore, DeepSeek-V3 pioneers an auxiliary-loss-free strategy for load balancing and sets a multi-token prediction training objective for stronger performance. We pre-train DeepSeek-V3 on 14.8 trillion diverse and high-quality tokens, followed by Supervised Fine-Tuning and Reinforcement Learning stages to fully harness its capabilities. Comprehensive evaluations reveal that DeepSeek-V3 outperforms other open-source models and achieves performance comparable to leading closed-source models. Despite its excellent performance, DeepSeek-V3 requires only 2.788M H800 GPU hours for its full training. In addition, its training process is remarkably stable. Throughout the entire training process, we did not experience any irrecoverable loss spikes or perform any rollbacks. The model checkpoints are available at https://github.com/deepseek-ai/DeepSeek-V3.},\n\turldate = {2025-02-03},\n\tpublisher = {arXiv},\n\tauthor = {DeepSeek-AI and Liu, Aixin and Feng, Bei and Xue, Bing and Wang, Bingxuan and Wu, Bochao and Lu, Chengda and Zhao, Chenggang and Deng, Chengqi and Zhang, Chenyu and Ruan, Chong and Dai, Damai and Guo, Daya and Yang, Dejian and Chen, Deli and Ji, Dongjie and Li, Erhang and Lin, Fangyun and Dai, Fucong and Luo, Fuli and Hao, Guangbo and Chen, Guanting and Li, Guowei and Zhang, H. and Bao, Han and Xu, Hanwei and Wang, Haocheng and Zhang, Haowei and Ding, Honghui and Xin, Huajian and Gao, Huazuo and Li, Hui and Qu, Hui and Cai, J. L. and Liang, Jian and Guo, Jianzhong and Ni, Jiaqi and Li, Jiashi and Wang, Jiawei and Chen, Jin and Chen, Jingchang and Yuan, Jingyang and Qiu, Junjie and Li, Junlong and Song, Junxiao and Dong, Kai and Hu, Kai and Gao, Kaige and Guan, Kang and Huang, Kexin and Yu, Kuai and Wang, Lean and Zhang, Lecong and Xu, Lei and Xia, Leyi and Zhao, Liang and Wang, Litong and Zhang, Liyue and Li, Meng and Wang, Miaojun and Zhang, Mingchuan and Zhang, Minghua and Tang, Minghui and Li, Mingming and Tian, Ning and Huang, Panpan and Wang, Peiyi and Zhang, Peng and Wang, Qiancheng and Zhu, Qihao and Chen, Qinyu and Du, Qiushi and Chen, R. J. and Jin, R. L. and Ge, Ruiqi and Zhang, Ruisong and Pan, Ruizhe and Wang, Runji and Xu, Runxin and Zhang, Ruoyu and Chen, Ruyi and Li, S. S. and Lu, Shanghao and Zhou, Shangyan and Chen, Shanhuang and Wu, Shaoqing and Ye, Shengfeng and Ye, Shengfeng and Ma, Shirong and Wang, Shiyu and Zhou, Shuang and Yu, Shuiping and Zhou, Shunfeng and Pan, Shuting and Wang, T. and Yun, Tao and Pei, Tian and Sun, Tianyu and Xiao, W. L. and Zeng, Wangding and Zhao, Wanjia and An, Wei and Liu, Wen and Liang, Wenfeng and Gao, Wenjun and Yu, Wenqin and Zhang, Wentao and Li, X. Q. and Jin, Xiangyue and Wang, Xianzu and Bi, Xiao and Liu, Xiaodong and Wang, Xiaohan and Shen, Xiaojin and Chen, Xiaokang and Zhang, Xiaokang and Chen, Xiaosha and Nie, Xiaotao and Sun, Xiaowen and Wang, Xiaoxiang and Cheng, Xin and Liu, Xin and Xie, Xin and Liu, Xingchao and Yu, Xingkai and Song, Xinnan and Shan, Xinxia and Zhou, Xinyi and Yang, Xinyu and Li, Xinyuan and Su, Xuecheng and Lin, Xuheng and Li, Y. K. and Wang, Y. Q. and Wei, Y. X. and Zhu, Y. X. and Zhang, Yang and Xu, Yanhong and Xu, Yanhong and Huang, Yanping and Li, Yao and Zhao, Yao and Sun, Yaofeng and Li, Yaohui and Wang, Yaohui and Yu, Yi and Zheng, Yi and Zhang, Yichao and Shi, Yifan and Xiong, Yiliang and He, Ying and Tang, Ying and Piao, Yishi and Wang, Yisong and Tan, Yixuan and Ma, Yiyang and Liu, Yiyuan and Guo, Yongqiang and Wu, Yu and Ou, Yuan and Zhu, Yuchen and Wang, Yuduan and Gong, Yue and Zou, Yuheng and He, Yujia and Zha, Yukun and Xiong, Yunfan and Ma, Yunxian and Yan, Yuting and Luo, Yuxiang and You, Yuxiang and Liu, Yuxuan and Zhou, Yuyang and Wu, Z. F. and Ren, Z. Z. and Ren, Zehui and Sha, Zhangli and Fu, Zhe and Xu, Zhean and Huang, Zhen and Zhang, Zhen and Xie, Zhenda and Zhang, Zhengyan and Hao, Zhewen and Gou, Zhibin and Ma, Zhicheng and Yan, Zhigang and Shao, Zhihong and Xu, Zhipeng and Wu, Zhiyu and Zhang, Zhongyu and Li, Zhuoshu and Gu, Zihui and Zhu, Zijia and Liu, Zijun and Li, Zilin and Xie, Ziwei and Song, Ziyang and Gao, Ziyi and Pan, Zizheng},\n\tmonth = dec,\n\tyear = {2024},\n\tnote = {arXiv:2412.19437 [cs]\nversion: 1},\n\tkeywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language},\n}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n","author_short":["DeepSeek-AI","Liu, A.","Feng, B.","Xue, B.","Wang, B.","Wu, B.","Lu, C.","Zhao, C.","Deng, C.","Zhang, C.","Ruan, C.","Dai, D.","Guo, D.","Yang, D.","Chen, D.","Ji, D.","Li, E.","Lin, F.","Dai, F.","Luo, F.","Hao, G.","Chen, G.","Li, G.","Zhang, H.","Bao, H.","Xu, H.","Wang, H.","Zhang, H.","Ding, H.","Xin, H.","Gao, H.","Li, H.","Qu, H.","Cai, J. L.","Liang, J.","Guo, J.","Ni, J.","Li, J.","Wang, J.","Chen, J.","Chen, J.","Yuan, J.","Qiu, J.","Li, J.","Song, J.","Dong, K.","Hu, K.","Gao, K.","Guan, K.","Huang, K.","Yu, K.","Wang, L.","Zhang, L.","Xu, L.","Xia, L.","Zhao, L.","Wang, L.","Zhang, L.","Li, M.","Wang, M.","Zhang, M.","Zhang, M.","Tang, M.","Li, M.","Tian, N.","Huang, P.","Wang, P.","Zhang, P.","Wang, Q.","Zhu, Q.","Chen, Q.","Du, Q.","Chen, R. J.","Jin, R. L.","Ge, R.","Zhang, R.","Pan, R.","Wang, R.","Xu, R.","Zhang, R.","Chen, R.","Li, S. S.","Lu, S.","Zhou, S.","Chen, S.","Wu, S.","Ye, S.","Ye, S.","Ma, S.","Wang, S.","Zhou, S.","Yu, S.","Zhou, S.","Pan, S.","Wang, T.","Yun, T.","Pei, T.","Sun, T.","Xiao, W. L.","Zeng, W.","Zhao, W.","An, W.","Liu, W.","Liang, W.","Gao, W.","Yu, W.","Zhang, W.","Li, X. Q.","Jin, X.","Wang, X.","Bi, X.","Liu, X.","Wang, X.","Shen, X.","Chen, X.","Zhang, X.","Chen, X.","Nie, X.","Sun, X.","Wang, X.","Cheng, X.","Liu, X.","Xie, X.","Liu, X.","Yu, X.","Song, X.","Shan, X.","Zhou, X.","Yang, X.","Li, X.","Su, X.","Lin, X.","Li, Y. K.","Wang, Y. Q.","Wei, Y. X.","Zhu, Y. X.","Zhang, Y.","Xu, Y.","Xu, Y.","Huang, Y.","Li, Y.","Zhao, Y.","Sun, Y.","Li, Y.","Wang, Y.","Yu, Y.","Zheng, Y.","Zhang, Y.","Shi, Y.","Xiong, Y.","He, Y.","Tang, Y.","Piao, Y.","Wang, Y.","Tan, Y.","Ma, Y.","Liu, Y.","Guo, Y.","Wu, Y.","Ou, Y.","Zhu, Y.","Wang, Y.","Gong, Y.","Zou, Y.","He, Y.","Zha, Y.","Xiong, Y.","Ma, Y.","Yan, Y.","Luo, Y.","You, Y.","Liu, Y.","Zhou, Y.","Wu, Z. F.","Ren, Z. Z.","Ren, Z.","Sha, Z.","Fu, Z.","Xu, Z.","Huang, Z.","Zhang, Z.","Xie, Z.","Zhang, Z.","Hao, Z.","Gou, Z.","Ma, Z.","Yan, Z.","Shao, Z.","Xu, Z.","Wu, Z.","Zhang, Z.","Li, Z.","Gu, Z.","Zhu, Z.","Liu, Z.","Li, Z.","Xie, Z.","Song, Z.","Gao, Z.","Pan, Z."],"key":"deepseek-ai_deepseek-v3_2024-1","id":"deepseek-ai_deepseek-v3_2024-1","bibbaseid":"deepseekai-liu-feng-xue-wang-wu-lu-zhao-etal-deepseekv3technicalreport-2024","role":"author","urls":{"Paper":"http://arxiv.org/abs/2412.19437"},"keyword":["Computer Science - Artificial Intelligence","Computer Science - Computation and Language"],"metadata":{"authorlinks":{}},"html":""},"bibtype":"misc","biburl":"https://bibbase.org/zotero/pa511","dataSources":["3gTBYW5YxtNcnhN2g","MpmemwLeQzDcKDq6x"],"keywords":["computer science - artificial intelligence","computer science - computation and language"],"search_terms":["deepseek","technical","report","deepseek-ai","liu","feng","xue","wang","wu","lu","zhao","deng","zhang","ruan","dai","guo","yang","chen","ji","li","lin","dai","luo","hao","chen","li","zhang","bao","xu","wang","zhang","ding","xin","gao","li","qu","cai","liang","guo","ni","li","wang","chen","chen","yuan","qiu","li","song","dong","hu","gao","guan","huang","yu","wang","zhang","xu","xia","zhao","wang","zhang","li","wang","zhang","zhang","tang","li","tian","huang","wang","zhang","wang","zhu","chen","du","chen","jin","ge","zhang","pan","wang","xu","zhang","chen","li","lu","zhou","chen","wu","ye","ye","ma","wang","zhou","yu","zhou","pan","wang","yun","pei","sun","xiao","zeng","zhao","an","liu","liang","gao","yu","zhang","li","jin","wang","bi","liu","wang","shen","chen","zhang","chen","nie","sun","wang","cheng","liu","xie","liu","yu","song","shan","zhou","yang","li","su","lin","li","wang","wei","zhu","zhang","xu","xu","huang","li","zhao","sun","li","wang","yu","zheng","zhang","shi","xiong","he","tang","piao","wang","tan","ma","liu","guo","wu","ou","zhu","wang","gong","zou","he","zha","xiong","ma","yan","luo","you","liu","zhou","wu","ren","ren","sha","fu","xu","huang","zhang","xie","zhang","hao","gou","ma","yan","shao","xu","wu","zhang","li","gu","zhu","liu","li","xie","song","gao","pan"],"title":"DeepSeek-V3 Technical Report","year":2024}