MIGRATE: Cross-Lingual Adaptation of Domain-Specific LLMs through Code-Switching and Embedding Transfer

MIGRATE: Cross-Lingual Adaptation of Domain-Specific LLMs through Code-Switching and Embedding Transfer. Hong, S., Lee, S., Moon, H., & Lim, H. In Rambow, O., Wanner, L., Apidianaki, M., Al-Khalifa, H., Eugenio, B. D., & Schockaert, S., editors, Proceedings of the 31st International Conference on Computational Linguistics, pages 9184–9193, Abu Dhabi, UAE, January, 2025. Association for Computational Linguistics.

Paper abstract bibtex

Large Language Models (LLMs) have rapidly advanced, with domain-specific expert models emerging to handle specialized tasks across various fields. However, the predominant focus on English-centric models demands extensive data, making it challenging to develop comparable models for middle and low-resource languages. To address this limitation, we introduce Migrate, a novel method that leverages open-source static embedding models and up to 3 million tokens of code-switching data to facilitate the seamless transfer of embeddings to target languages. Migrate enables effective cross-lingual adaptation without requiring large-scale domain-specific corpora in the target language, promoting the accessibility of expert LLMs to a diverse range of linguistic communities. Our experimental results demonstrate that Migrate significantly enhances model performance in target languages, outperforming baseline and existing cross-lingual transfer methods. This approach provides a practical and efficient solution for extending the capabilities of domain-specific expert models.

@inproceedings{hong_migrate_2025,
	address = {Abu Dhabi, UAE},
	title = {{MIGRATE}: {Cross}-{Lingual} {Adaptation} of {Domain}-{Specific} {LLMs} through {Code}-{Switching} and {Embedding} {Transfer}},
	shorttitle = {{MIGRATE}},
	url = {https://aclanthology.org/2025.coling-main.617/},
	abstract = {Large Language Models (LLMs) have rapidly advanced, with domain-specific expert models emerging to handle specialized tasks across various fields. However, the predominant focus on English-centric models demands extensive data, making it challenging to develop comparable models for middle and low-resource languages. To address this limitation, we introduce Migrate, a novel method that leverages open-source static embedding models and up to 3 million tokens of code-switching data to facilitate the seamless transfer of embeddings to target languages. Migrate enables effective cross-lingual adaptation without requiring large-scale domain-specific corpora in the target language, promoting the accessibility of expert LLMs to a diverse range of linguistic communities. Our experimental results demonstrate that Migrate significantly enhances model performance in target languages, outperforming baseline and existing cross-lingual transfer methods. This approach provides a practical and efficient solution for extending the capabilities of domain-specific expert models.},
	urldate = {2025-01-28},
	booktitle = {Proceedings of the 31st {International} {Conference} on {Computational} {Linguistics}},
	publisher = {Association for Computational Linguistics},
	author = {Hong, Seongtae and Lee, Seungyoon and Moon, Hyeonseok and Lim, Heuiseok},
	editor = {Rambow, Owen and Wanner, Leo and Apidianaki, Marianna and Al-Khalifa, Hend and Eugenio, Barbara Di and Schockaert, Steven},
	month = jan,
	year = {2025},
	keywords = {coling-25},
	pages = {9184--9193},
}

Downloads: 0

{"_id":"4z9NSqsGcZfrEigoy","bibbaseid":"hong-lee-moon-lim-migratecrosslingualadaptationofdomainspecificllmsthroughcodeswitchingandembeddingtransfer-2025","author_short":["Hong, S.","Lee, S.","Moon, H.","Lim, H."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","address":"Abu Dhabi, UAE","title":"MIGRATE: Cross-Lingual Adaptation of Domain-Specific LLMs through Code-Switching and Embedding Transfer","shorttitle":"MIGRATE","url":"https://aclanthology.org/2025.coling-main.617/","abstract":"Large Language Models (LLMs) have rapidly advanced, with domain-specific expert models emerging to handle specialized tasks across various fields. However, the predominant focus on English-centric models demands extensive data, making it challenging to develop comparable models for middle and low-resource languages. To address this limitation, we introduce Migrate, a novel method that leverages open-source static embedding models and up to 3 million tokens of code-switching data to facilitate the seamless transfer of embeddings to target languages. Migrate enables effective cross-lingual adaptation without requiring large-scale domain-specific corpora in the target language, promoting the accessibility of expert LLMs to a diverse range of linguistic communities. Our experimental results demonstrate that Migrate significantly enhances model performance in target languages, outperforming baseline and existing cross-lingual transfer methods. This approach provides a practical and efficient solution for extending the capabilities of domain-specific expert models.","urldate":"2025-01-28","booktitle":"Proceedings of the 31st International Conference on Computational Linguistics","publisher":"Association for Computational Linguistics","author":[{"propositions":[],"lastnames":["Hong"],"firstnames":["Seongtae"],"suffixes":[]},{"propositions":[],"lastnames":["Lee"],"firstnames":["Seungyoon"],"suffixes":[]},{"propositions":[],"lastnames":["Moon"],"firstnames":["Hyeonseok"],"suffixes":[]},{"propositions":[],"lastnames":["Lim"],"firstnames":["Heuiseok"],"suffixes":[]}],"editor":[{"propositions":[],"lastnames":["Rambow"],"firstnames":["Owen"],"suffixes":[]},{"propositions":[],"lastnames":["Wanner"],"firstnames":["Leo"],"suffixes":[]},{"propositions":[],"lastnames":["Apidianaki"],"firstnames":["Marianna"],"suffixes":[]},{"propositions":[],"lastnames":["Al-Khalifa"],"firstnames":["Hend"],"suffixes":[]},{"propositions":[],"lastnames":["Eugenio"],"firstnames":["Barbara","Di"],"suffixes":[]},{"propositions":[],"lastnames":["Schockaert"],"firstnames":["Steven"],"suffixes":[]}],"month":"January","year":"2025","keywords":"coling-25","pages":"9184–9193","bibtex":"@inproceedings{hong_migrate_2025,\n\taddress = {Abu Dhabi, UAE},\n\ttitle = {{MIGRATE}: {Cross}-{Lingual} {Adaptation} of {Domain}-{Specific} {LLMs} through {Code}-{Switching} and {Embedding} {Transfer}},\n\tshorttitle = {{MIGRATE}},\n\turl = {https://aclanthology.org/2025.coling-main.617/},\n\tabstract = {Large Language Models (LLMs) have rapidly advanced, with domain-specific expert models emerging to handle specialized tasks across various fields. However, the predominant focus on English-centric models demands extensive data, making it challenging to develop comparable models for middle and low-resource languages. To address this limitation, we introduce Migrate, a novel method that leverages open-source static embedding models and up to 3 million tokens of code-switching data to facilitate the seamless transfer of embeddings to target languages. Migrate enables effective cross-lingual adaptation without requiring large-scale domain-specific corpora in the target language, promoting the accessibility of expert LLMs to a diverse range of linguistic communities. Our experimental results demonstrate that Migrate significantly enhances model performance in target languages, outperforming baseline and existing cross-lingual transfer methods. This approach provides a practical and efficient solution for extending the capabilities of domain-specific expert models.},\n\turldate = {2025-01-28},\n\tbooktitle = {Proceedings of the 31st {International} {Conference} on {Computational} {Linguistics}},\n\tpublisher = {Association for Computational Linguistics},\n\tauthor = {Hong, Seongtae and Lee, Seungyoon and Moon, Hyeonseok and Lim, Heuiseok},\n\teditor = {Rambow, Owen and Wanner, Leo and Apidianaki, Marianna and Al-Khalifa, Hend and Eugenio, Barbara Di and Schockaert, Steven},\n\tmonth = jan,\n\tyear = {2025},\n\tkeywords = {coling-25},\n\tpages = {9184--9193},\n}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n","author_short":["Hong, S.","Lee, S.","Moon, H.","Lim, H."],"editor_short":["Rambow, O.","Wanner, L.","Apidianaki, M.","Al-Khalifa, H.","Eugenio, B. D.","Schockaert, S."],"key":"hong_migrate_2025","id":"hong_migrate_2025","bibbaseid":"hong-lee-moon-lim-migratecrosslingualadaptationofdomainspecificllmsthroughcodeswitchingandembeddingtransfer-2025","role":"author","urls":{"Paper":"https://aclanthology.org/2025.coling-main.617/"},"keyword":["coling-25"],"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"https://bibbase.org/zotero/abhishek-p","dataSources":["h7kKWXpJh2iaX92T5"],"keywords":["coling-25"],"search_terms":["migrate","cross","lingual","adaptation","domain","specific","llms","through","code","switching","embedding","transfer","hong","lee","moon","lim"],"title":"MIGRATE: Cross-Lingual Adaptation of Domain-Specific LLMs through Code-Switching and Embedding Transfer","year":2025}