{"_id":"MhHEnYuwScya6FLoW","bibbaseid":"sajjad-dalvi-durrani-nakov-ontheeffectofdroppinglayersofpretrainedtransformermodels-2022","author_short":["Sajjad, H.","Dalvi, F.","Durrani, N.","Nakov, P."],"bibdata":{"bibtype":"article","type":"article","title":"On the Effect of Dropping Layers of Pre-trained Transformer Models","journal":"Computer Speech & Language (CSL)","year":"2022","issn":"0885-2308","doi":"https://doi.org/10.1016/j.csl.2022.101429","author":[{"firstnames":["Hassan"],"propositions":[],"lastnames":["Sajjad"],"suffixes":[]},{"firstnames":["Fahim"],"propositions":[],"lastnames":["Dalvi"],"suffixes":[]},{"firstnames":["Nadir"],"propositions":[],"lastnames":["Durrani"],"suffixes":[]},{"firstnames":["Preslav"],"propositions":[],"lastnames":["Nakov"],"suffixes":[]}],"bibtex":"@article{sajjad_poorman:csl2022,\ntitle = {{On the Effect of Dropping Layers of Pre-trained Transformer Models}},\njournal = {Computer Speech & Language (CSL)},\nyear = {2022},\nissn = {0885-2308},\ndoi = {https://doi.org/10.1016/j.csl.2022.101429},\nauthor = {Hassan Sajjad and Fahim Dalvi and Nadir Durrani and Preslav Nakov},\n}\n\n","author_short":["Sajjad, H.","Dalvi, F.","Durrani, N.","Nakov, P."],"key":"sajjad_poorman:csl2022","id":"sajjad_poorman:csl2022","bibbaseid":"sajjad-dalvi-durrani-nakov-ontheeffectofdroppinglayersofpretrainedtransformermodels-2022","role":"author","urls":{},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://bibbase.org/network/files/dzSEA6BwTnC4hoDJj","dataSources":["ovHzf4TqfbrCLAdkj","Jzair3cWJRyYbLQPX"],"keywords":[],"search_terms":["effect","dropping","layers","pre","trained","transformer","models","sajjad","dalvi","durrani","nakov"],"title":"On the Effect of Dropping Layers of Pre-trained Transformer Models","year":2022}