UZH_CLyp at SemEval-2023 Task 9: Head-First Fine-Tuning and ChatGPT Data Generation for Cross-Lingual Learning in Tweet Intimacy Prediction. Andrianos Michail, Konstantinou, S., & Clematide, S. arXiv.org, March, 2023. Place: Ithaca Publisher: Cornell University Library, arXiv.org
Paper abstract bibtex This paper describes the submission of UZH_CLyp for the SemEval 2023 Task 9 "Multilingual Tweet Intimacy Analysis". We achieved second-best results in all 10 languages according to the official Pearson's correlation regression evaluation measure. Our cross-lingual transfer learning approach explores the benefits of using a Head-First Fine-Tuning method (HeFiT) that first updates only the regression head parameters and then also updates the pre-trained transformer encoder parameters at a reduced learning rate. Additionally, we study the impact of using a small set of automatically generated examples (in our case, from ChatGPT) for low-resource settings where no human-labeled data is available. Our study shows that HeFiT stabilizes training and consistently improves results for pre-trained models that lack domain adaptation to tweets. Our study also shows a noticeable performance increase in cross-lingual learning when synthetic data is used, confirming the usefulness of current text generation systems to improve zero-shot baseline results. Finally, we examine how possible inconsistencies in the annotated data contribute to cross-lingual interference issues.
@article{andrianos_michail_uzh_clyp_2023,
title = {{UZH}\_CLyp at {SemEval}-2023 {Task} 9: {Head}-{First} {Fine}-{Tuning} and {ChatGPT} {Data} {Generation} for {Cross}-{Lingual} {Learning} in {Tweet} {Intimacy} {Prediction}},
url = {https://www.proquest.com/working-papers/uzh-clyp-at-semeval-2023-task-9-head-first-fine/docview/2782019528/se-2},
abstract = {This paper describes the submission of UZH\_CLyp for the SemEval 2023 Task 9 "Multilingual Tweet Intimacy Analysis". We achieved second-best results in all 10 languages according to the official Pearson's correlation regression evaluation measure. Our cross-lingual transfer learning approach explores the benefits of using a Head-First Fine-Tuning method (HeFiT) that first updates only the regression head parameters and then also updates the pre-trained transformer encoder parameters at a reduced learning rate. Additionally, we study the impact of using a small set of automatically generated examples (in our case, from ChatGPT) for low-resource settings where no human-labeled data is available. Our study shows that HeFiT stabilizes training and consistently improves results for pre-trained models that lack domain adaptation to tweets. Our study also shows a noticeable performance increase in cross-lingual learning when synthetic data is used, confirming the usefulness of current text generation systems to improve zero-shot baseline results. Finally, we examine how possible inconsistencies in the annotated data contribute to cross-lingual interference issues.},
language = {English},
journal = {arXiv.org},
author = {{Andrianos Michail} and Konstantinou, Stefanos and Clematide, Simon},
month = mar,
year = {2023},
note = {Place: Ithaca
Publisher: Cornell University Library, arXiv.org},
keywords = {Artificial intelligence, Chatbots, Artificial Intelligence, Learning, Business And Economics--Banking And Finance, Computation and Language, Coders, Parameters},
annote = {Copyright - © 2023. This work is published under http://creativecommons.org/licenses/by/4.0/ (the “License”). Notwithstanding the ProQuest Terms and Conditions, you may use this content in accordance with the terms of the License.},
annote = {Última actualización - 2023-03-04},
}
Downloads: 0
{"_id":"77jMiigNRTrNHg9Nq","bibbaseid":"andrianosmichail-konstantinou-clematide-uzhclypatsemeval2023task9headfirstfinetuningandchatgptdatagenerationforcrosslinguallearningintweetintimacyprediction-2023","author_short":["Andrianos Michail","Konstantinou, S.","Clematide, S."],"bibdata":{"bibtype":"article","type":"article","title":"UZH_CLyp at SemEval-2023 Task 9: Head-First Fine-Tuning and ChatGPT Data Generation for Cross-Lingual Learning in Tweet Intimacy Prediction","url":"https://www.proquest.com/working-papers/uzh-clyp-at-semeval-2023-task-9-head-first-fine/docview/2782019528/se-2","abstract":"This paper describes the submission of UZH_CLyp for the SemEval 2023 Task 9 \"Multilingual Tweet Intimacy Analysis\". We achieved second-best results in all 10 languages according to the official Pearson's correlation regression evaluation measure. Our cross-lingual transfer learning approach explores the benefits of using a Head-First Fine-Tuning method (HeFiT) that first updates only the regression head parameters and then also updates the pre-trained transformer encoder parameters at a reduced learning rate. Additionally, we study the impact of using a small set of automatically generated examples (in our case, from ChatGPT) for low-resource settings where no human-labeled data is available. Our study shows that HeFiT stabilizes training and consistently improves results for pre-trained models that lack domain adaptation to tweets. Our study also shows a noticeable performance increase in cross-lingual learning when synthetic data is used, confirming the usefulness of current text generation systems to improve zero-shot baseline results. Finally, we examine how possible inconsistencies in the annotated data contribute to cross-lingual interference issues.","language":"English","journal":"arXiv.org","author":[{"firstnames":[],"propositions":[],"lastnames":["Andrianos Michail"],"suffixes":[]},{"propositions":[],"lastnames":["Konstantinou"],"firstnames":["Stefanos"],"suffixes":[]},{"propositions":[],"lastnames":["Clematide"],"firstnames":["Simon"],"suffixes":[]}],"month":"March","year":"2023","note":"Place: Ithaca Publisher: Cornell University Library, arXiv.org","keywords":"Artificial intelligence, Chatbots, Artificial Intelligence, Learning, Business And Economics–Banking And Finance, Computation and Language, Coders, Parameters","annote":"Última actualización - 2023-03-04","bibtex":"@article{andrianos_michail_uzh_clyp_2023,\n\ttitle = {{UZH}\\_CLyp at {SemEval}-2023 {Task} 9: {Head}-{First} {Fine}-{Tuning} and {ChatGPT} {Data} {Generation} for {Cross}-{Lingual} {Learning} in {Tweet} {Intimacy} {Prediction}},\n\turl = {https://www.proquest.com/working-papers/uzh-clyp-at-semeval-2023-task-9-head-first-fine/docview/2782019528/se-2},\n\tabstract = {This paper describes the submission of UZH\\_CLyp for the SemEval 2023 Task 9 \"Multilingual Tweet Intimacy Analysis\". We achieved second-best results in all 10 languages according to the official Pearson's correlation regression evaluation measure. Our cross-lingual transfer learning approach explores the benefits of using a Head-First Fine-Tuning method (HeFiT) that first updates only the regression head parameters and then also updates the pre-trained transformer encoder parameters at a reduced learning rate. Additionally, we study the impact of using a small set of automatically generated examples (in our case, from ChatGPT) for low-resource settings where no human-labeled data is available. Our study shows that HeFiT stabilizes training and consistently improves results for pre-trained models that lack domain adaptation to tweets. Our study also shows a noticeable performance increase in cross-lingual learning when synthetic data is used, confirming the usefulness of current text generation systems to improve zero-shot baseline results. Finally, we examine how possible inconsistencies in the annotated data contribute to cross-lingual interference issues.},\n\tlanguage = {English},\n\tjournal = {arXiv.org},\n\tauthor = {{Andrianos Michail} and Konstantinou, Stefanos and Clematide, Simon},\n\tmonth = mar,\n\tyear = {2023},\n\tnote = {Place: Ithaca\nPublisher: Cornell University Library, arXiv.org},\n\tkeywords = {Artificial intelligence, Chatbots, Artificial Intelligence, Learning, Business And Economics--Banking And Finance, Computation and Language, Coders, Parameters},\n\tannote = {Copyright - © 2023. This work is published under http://creativecommons.org/licenses/by/4.0/ (the “License”). Notwithstanding the ProQuest Terms and Conditions, you may use this content in accordance with the terms of the License.},\n\tannote = {Última actualización - 2023-03-04},\n}\n\n","author_short":["Andrianos Michail","Konstantinou, S.","Clematide, S."],"key":"andrianos_michail_uzh_clyp_2023","id":"andrianos_michail_uzh_clyp_2023","bibbaseid":"andrianosmichail-konstantinou-clematide-uzhclypatsemeval2023task9headfirstfinetuningandchatgptdatagenerationforcrosslinguallearningintweetintimacyprediction-2023","role":"author","urls":{"Paper":"https://www.proquest.com/working-papers/uzh-clyp-at-semeval-2023-task-9-head-first-fine/docview/2782019528/se-2"},"keyword":["Artificial intelligence","Chatbots","Artificial Intelligence","Learning","Business And Economics–Banking And Finance","Computation and Language","Coders","Parameters"],"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://bibbase.org/network/files/22WYpzbBvi3hDHX7Y","dataSources":["cYu6uhMkeFHgRrEty","hLMh7bwHyFsPNWAEL","LKW3iRvnztCpLNTW7","TLD9JxqHfSQQ4r268","X9BvByJrC3kGJexn8","iovNvcnNYDGJcuMq2","NjZJ5ZmWhTtMZBfje"],"keywords":["artificial intelligence","chatbots","artificial intelligence","learning","business and economics–banking and finance","computation and language","coders","parameters"],"search_terms":["uzh","clyp","semeval","2023","task","head","first","fine","tuning","chatgpt","data","generation","cross","lingual","learning","tweet","intimacy","prediction","andrianos michail","konstantinou","clematide"],"title":"UZH_CLyp at SemEval-2023 Task 9: Head-First Fine-Tuning and ChatGPT Data Generation for Cross-Lingual Learning in Tweet Intimacy Prediction","year":2023}