Adapting transformer models to morphological tagging of two highly inflectional languages: a case study on Ancient Greek and Latin

Adapting transformer models to morphological tagging of two highly inflectional languages: a case study on Ancient Greek and Latin. Keersmaekers, A. & Mercelis, W. Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024), 2024. Conference Name: Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024) Place: Hybrid in Bangkok, Thailand and online Publisher: Association for Computational Linguistics

Paper doi abstract bibtex

Natural language processing for Greek and Latin, inflectional languages with small corpora, requires special techniques. For morphological tagging, transformer models show promising potential, but the best approach to use these models is unclear. For both languages, this paper examines the impact of using morphological lexica, training different model types (a single model with a combined feature tag, multiple models for separate features, and a multi-task model for all features), and adding linguistic constraints. We find that, although simply fine-tuning transformers to predict a monolithic tag may already yield decent results, each of these adaptations can further improve tagging accuracy.

@article{keersmaekers_adapting_2024,
	title = {Adapting transformer models to morphological tagging of two highly inflectional languages: a case study on {Ancient} {Greek} and {Latin}},
	shorttitle = {Adapting transformer models to morphological tagging of two highly inflectional languages},
	url = {https://aclanthology.org/2024.ml4al-1.17},
	doi = {10.18653/v1/2024.ml4al-1.17},
	abstract = {Natural language processing for Greek and Latin, inflectional languages with small corpora, requires special techniques. For morphological tagging, transformer models show promising potential, but the best approach to use these models is unclear. For both languages, this paper examines the impact of using morphological lexica, training different model types (a single model with a combined feature tag, multiple models for separate features, and a multi-task model for all features), and adding linguistic constraints. We find that, although simply fine-tuning transformers to predict a monolithic tag may already yield decent results, each of these adaptations can further improve tagging accuracy.},
	language = {en},
	urldate = {2025-01-26},
	journal = {Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024)},
	author = {Keersmaekers, Alek and Mercelis, Wouter},
	year = {2024},
	note = {Conference Name: Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024)
Place: Hybrid in Bangkok, Thailand and online
Publisher: Association for Computational Linguistics},
	pages = {165--176},
}

Downloads: 0

{"_id":"WSusTm5PdAeyJnWN8","bibbaseid":"keersmaekers-mercelis-adaptingtransformermodelstomorphologicaltaggingoftwohighlyinflectionallanguagesacasestudyonancientgreekandlatin-2024","author_short":["Keersmaekers, A.","Mercelis, W."],"bibdata":{"bibtype":"article","type":"article","title":"Adapting transformer models to morphological tagging of two highly inflectional languages: a case study on Ancient Greek and Latin","shorttitle":"Adapting transformer models to morphological tagging of two highly inflectional languages","url":"https://aclanthology.org/2024.ml4al-1.17","doi":"10.18653/v1/2024.ml4al-1.17","abstract":"Natural language processing for Greek and Latin, inflectional languages with small corpora, requires special techniques. For morphological tagging, transformer models show promising potential, but the best approach to use these models is unclear. For both languages, this paper examines the impact of using morphological lexica, training different model types (a single model with a combined feature tag, multiple models for separate features, and a multi-task model for all features), and adding linguistic constraints. We find that, although simply fine-tuning transformers to predict a monolithic tag may already yield decent results, each of these adaptations can further improve tagging accuracy.","language":"en","urldate":"2025-01-26","journal":"Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024)","author":[{"propositions":[],"lastnames":["Keersmaekers"],"firstnames":["Alek"],"suffixes":[]},{"propositions":[],"lastnames":["Mercelis"],"firstnames":["Wouter"],"suffixes":[]}],"year":"2024","note":"Conference Name: Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024) Place: Hybrid in Bangkok, Thailand and online Publisher: Association for Computational Linguistics","pages":"165–176","bibtex":"@article{keersmaekers_adapting_2024,\n\ttitle = {Adapting transformer models to morphological tagging of two highly inflectional languages: a case study on {Ancient} {Greek} and {Latin}},\n\tshorttitle = {Adapting transformer models to morphological tagging of two highly inflectional languages},\n\turl = {https://aclanthology.org/2024.ml4al-1.17},\n\tdoi = {10.18653/v1/2024.ml4al-1.17},\n\tabstract = {Natural language processing for Greek and Latin, inflectional languages with small corpora, requires special techniques. For morphological tagging, transformer models show promising potential, but the best approach to use these models is unclear. For both languages, this paper examines the impact of using morphological lexica, training different model types (a single model with a combined feature tag, multiple models for separate features, and a multi-task model for all features), and adding linguistic constraints. We find that, although simply fine-tuning transformers to predict a monolithic tag may already yield decent results, each of these adaptations can further improve tagging accuracy.},\n\tlanguage = {en},\n\turldate = {2025-01-26},\n\tjournal = {Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024)},\n\tauthor = {Keersmaekers, Alek and Mercelis, Wouter},\n\tyear = {2024},\n\tnote = {Conference Name: Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024)\nPlace: Hybrid in Bangkok, Thailand and online\nPublisher: Association for Computational Linguistics},\n\tpages = {165--176},\n}\n\n\n\n","author_short":["Keersmaekers, A.","Mercelis, W."],"key":"keersmaekers_adapting_2024","id":"keersmaekers_adapting_2024","bibbaseid":"keersmaekers-mercelis-adaptingtransformermodelstomorphologicaltaggingoftwohighlyinflectionallanguagesacasestudyonancientgreekandlatin-2024","role":"author","urls":{"Paper":"https://aclanthology.org/2024.ml4al-1.17"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://bibbase.org/zotero-group/schulzkx/5158478","dataSources":["JFDnASMkoQCjjGL8E"],"keywords":[],"search_terms":["adapting","transformer","models","morphological","tagging","two","highly","inflectional","languages","case","study","ancient","greek","latin","keersmaekers","mercelis"],"title":"Adapting transformer models to morphological tagging of two highly inflectional languages: a case study on Ancient Greek and Latin","year":2024}