{"_id":"RgdMazDKf64qi7Rcn","bibbaseid":"wang-ma-dong-huang-zhang-wei-deepnetscalingtransformersto1000layers-2022","author_short":["Wang, H.","Ma, S.","Dong, L.","Huang, S.","Zhang, D.","Wei, F."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Hongyu"],"propositions":[],"lastnames":["Wang"],"suffixes":[]},{"firstnames":["Shuming"],"propositions":[],"lastnames":["Ma"],"suffixes":[]},{"firstnames":["Li"],"propositions":[],"lastnames":["Dong"],"suffixes":[]},{"firstnames":["Shaohan"],"propositions":[],"lastnames":["Huang"],"suffixes":[]},{"firstnames":["Dongdong"],"propositions":[],"lastnames":["Zhang"],"suffixes":[]},{"firstnames":["Furu"],"propositions":[],"lastnames":["Wei"],"suffixes":[]}],"title":"DeepNet: Scaling Transformers to 1, 000 Layers","journal":"CoRR","volume":"abs/2203.00555","year":"2022","url":"https://doi.org/10.48550/arXiv.2203.00555","doi":"10.48550/ARXIV.2203.00555","eprinttype":"arXiv","eprint":"2203.00555","timestamp":"Tue, 20 Dec 2022 00:00:00 +0100","biburl":"https://dblp.org/rec/journals/corr/abs-2203-00555.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/corr/abs-2203-00555,\n author = {Hongyu Wang and\n Shuming Ma and\n Li Dong and\n Shaohan Huang and\n Dongdong Zhang and\n Furu Wei},\n title = {DeepNet: Scaling Transformers to 1, 000 Layers},\n journal = {CoRR},\n volume = {abs/2203.00555},\n year = {2022},\n url = {https://doi.org/10.48550/arXiv.2203.00555},\n doi = {10.48550/ARXIV.2203.00555},\n eprinttype = {arXiv},\n eprint = {2203.00555},\n timestamp = {Tue, 20 Dec 2022 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/corr/abs-2203-00555.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Wang, H.","Ma, S.","Dong, L.","Huang, S.","Zhang, D.","Wei, F."],"key":"DBLP:journals/corr/abs-2203-00555","id":"DBLP:journals/corr/abs-2203-00555","bibbaseid":"wang-ma-dong-huang-zhang-wei-deepnetscalingtransformersto1000layers-2022","role":"author","urls":{"Paper":"https://doi.org/10.48550/arXiv.2203.00555"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://dblp.org/pid/176/0380.bib","dataSources":["zbFJei64gwybfvHiC"],"keywords":[],"search_terms":["deepnet","scaling","transformers","000","layers","wang","ma","dong","huang","zhang","wei"],"title":"DeepNet: Scaling Transformers to 1, 000 Layers","year":2022}