{"_id":"NfH9gdfwtkY5cGyY7","bibbaseid":"wang-ma-dong-huang-wang-ma-yang-wang-etal-bitnetscaling1bittransformersforlargelanguagemodels-2023","author_short":["Wang, H.","Ma, S.","Dong, L.","Huang, S.","Wang, H.","Ma, L.","Yang, F.","Wang, R.","Wu, Y.","Wei, F."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Hongyu"],"propositions":[],"lastnames":["Wang"],"suffixes":[]},{"firstnames":["Shuming"],"propositions":[],"lastnames":["Ma"],"suffixes":[]},{"firstnames":["Li"],"propositions":[],"lastnames":["Dong"],"suffixes":[]},{"firstnames":["Shaohan"],"propositions":[],"lastnames":["Huang"],"suffixes":[]},{"firstnames":["Huaijie"],"propositions":[],"lastnames":["Wang"],"suffixes":[]},{"firstnames":["Lingxiao"],"propositions":[],"lastnames":["Ma"],"suffixes":[]},{"firstnames":["Fan"],"propositions":[],"lastnames":["Yang"],"suffixes":[]},{"firstnames":["Ruiping"],"propositions":[],"lastnames":["Wang"],"suffixes":[]},{"firstnames":["Yi"],"propositions":[],"lastnames":["Wu"],"suffixes":[]},{"firstnames":["Furu"],"propositions":[],"lastnames":["Wei"],"suffixes":[]}],"title":"BitNet: Scaling 1-bit Transformers for Large Language Models","journal":"CoRR","volume":"abs/2310.11453","year":"2023","url":"https://doi.org/10.48550/arXiv.2310.11453","doi":"10.48550/ARXIV.2310.11453","eprinttype":"arXiv","eprint":"2310.11453","timestamp":"Tue, 16 Jul 2024 01:00:00 +0200","biburl":"https://dblp.org/rec/journals/corr/abs-2310-11453.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/corr/abs-2310-11453,\n author = {Hongyu Wang and\n Shuming Ma and\n Li Dong and\n Shaohan Huang and\n Huaijie Wang and\n Lingxiao Ma and\n Fan Yang and\n Ruiping Wang and\n Yi Wu and\n Furu Wei},\n title = {BitNet: Scaling 1-bit Transformers for Large Language Models},\n journal = {CoRR},\n volume = {abs/2310.11453},\n year = {2023},\n url = {https://doi.org/10.48550/arXiv.2310.11453},\n doi = {10.48550/ARXIV.2310.11453},\n eprinttype = {arXiv},\n eprint = {2310.11453},\n timestamp = {Tue, 16 Jul 2024 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/corr/abs-2310-11453.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Wang, H.","Ma, S.","Dong, L.","Huang, S.","Wang, H.","Ma, L.","Yang, F.","Wang, R.","Wu, Y.","Wei, F."],"key":"DBLP:journals/corr/abs-2310-11453","id":"DBLP:journals/corr/abs-2310-11453","bibbaseid":"wang-ma-dong-huang-wang-ma-yang-wang-etal-bitnetscaling1bittransformersforlargelanguagemodels-2023","role":"author","urls":{"Paper":"https://doi.org/10.48550/arXiv.2310.11453"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://dblp.org/pid/176/0380.bib","dataSources":["pzyFFGWvxG2bs63zP"],"keywords":[],"search_terms":["bitnet","scaling","bit","transformers","large","language","models","wang","ma","dong","huang","wang","ma","yang","wang","wu","wei"],"title":"BitNet: Scaling 1-bit Transformers for Large Language Models","year":2023}