On the Effect of Dropping Layers of Pre-trained Transformer Models. Sajjad, H., Dalvi, F., Durrani, N., & Nakov, P. Computer Speech & Language (CSL), 2022.
doi  bibtex   
@article{sajjad_poorman:csl2022,
title = {{On the Effect of Dropping Layers of Pre-trained Transformer Models}},
journal = {Computer Speech & Language (CSL)},
year = {2022},
issn = {0885-2308},
doi = {https://doi.org/10.1016/j.csl.2022.101429},
author = {Hassan Sajjad and Fahim Dalvi and Nadir Durrani and Preslav Nakov},
}

Downloads: 0