MIGRATE: Cross-Lingual Adaptation of Domain-Specific LLMs through Code-Switching and Embedding Transfer. Hong, S., Lee, S., Moon, H., & Lim, H. In Rambow, O., Wanner, L., Apidianaki, M., Al-Khalifa, H., Eugenio, B. D., & Schockaert, S., editors, Proceedings of the 31st International Conference on Computational Linguistics, pages 9184–9193, Abu Dhabi, UAE, January, 2025. Association for Computational Linguistics.
MIGRATE: Cross-Lingual Adaptation of Domain-Specific LLMs through Code-Switching and Embedding Transfer [link]Paper  abstract   bibtex   
Large Language Models (LLMs) have rapidly advanced, with domain-specific expert models emerging to handle specialized tasks across various fields. However, the predominant focus on English-centric models demands extensive data, making it challenging to develop comparable models for middle and low-resource languages. To address this limitation, we introduce Migrate, a novel method that leverages open-source static embedding models and up to 3 million tokens of code-switching data to facilitate the seamless transfer of embeddings to target languages. Migrate enables effective cross-lingual adaptation without requiring large-scale domain-specific corpora in the target language, promoting the accessibility of expert LLMs to a diverse range of linguistic communities. Our experimental results demonstrate that Migrate significantly enhances model performance in target languages, outperforming baseline and existing cross-lingual transfer methods. This approach provides a practical and efficient solution for extending the capabilities of domain-specific expert models.
@inproceedings{hong_migrate_2025,
	address = {Abu Dhabi, UAE},
	title = {{MIGRATE}: {Cross}-{Lingual} {Adaptation} of {Domain}-{Specific} {LLMs} through {Code}-{Switching} and {Embedding} {Transfer}},
	shorttitle = {{MIGRATE}},
	url = {https://aclanthology.org/2025.coling-main.617/},
	abstract = {Large Language Models (LLMs) have rapidly advanced, with domain-specific expert models emerging to handle specialized tasks across various fields. However, the predominant focus on English-centric models demands extensive data, making it challenging to develop comparable models for middle and low-resource languages. To address this limitation, we introduce Migrate, a novel method that leverages open-source static embedding models and up to 3 million tokens of code-switching data to facilitate the seamless transfer of embeddings to target languages. Migrate enables effective cross-lingual adaptation without requiring large-scale domain-specific corpora in the target language, promoting the accessibility of expert LLMs to a diverse range of linguistic communities. Our experimental results demonstrate that Migrate significantly enhances model performance in target languages, outperforming baseline and existing cross-lingual transfer methods. This approach provides a practical and efficient solution for extending the capabilities of domain-specific expert models.},
	urldate = {2025-01-28},
	booktitle = {Proceedings of the 31st {International} {Conference} on {Computational} {Linguistics}},
	publisher = {Association for Computational Linguistics},
	author = {Hong, Seongtae and Lee, Seungyoon and Moon, Hyeonseok and Lim, Heuiseok},
	editor = {Rambow, Owen and Wanner, Leo and Apidianaki, Marianna and Al-Khalifa, Hend and Eugenio, Barbara Di and Schockaert, Steven},
	month = jan,
	year = {2025},
	keywords = {coling-25},
	pages = {9184--9193},
}

Downloads: 0