Massively Multilingual Pronunciation Modeling with WikiPron. Lee, J. L., Ashby, L. F., Garza, M. E., Lee-Sikka, Y., Miller, S., Wong, A., McCarthy, A. D., & Gorman, K. In Proceedings of the Twelfth Language Resources and Evaluation Conference, pages 4223–4228, Marseille, France, May, 2020. European Language Resources Association.
Paper abstract bibtex We introduce WikiPron, an open-source command-line tool for extracting pronunciation data from Wiktionary, a collaborative multilingual online dictionary. We first describe the design and use of WikiPron. We then discuss the challenges faced scaling this tool to create an automatically-generated database of 1.7 million pronunciations from 165 languages. Finally, we validate the pronunciation database by using it to train and evaluating a collection of generic grapheme-to-phoneme models. The software, pronunciation data, and models are all made available under permissive open-source licenses.
@inproceedings{lee_massively_2020,
address = {Marseille, France},
title = {Massively {Multilingual} {Pronunciation} {Modeling} with {WikiPron}},
isbn = {979-10-95546-34-4},
url = {https://aclanthology.org/2020.lrec-1.521},
abstract = {We introduce WikiPron, an open-source command-line tool for extracting pronunciation data from Wiktionary, a collaborative multilingual online dictionary. We first describe the design and use of WikiPron. We then discuss the challenges faced scaling this tool to create an automatically-generated database of 1.7 million pronunciations from 165 languages. Finally, we validate the pronunciation database by using it to train and evaluating a collection of generic grapheme-to-phoneme models. The software, pronunciation data, and models are all made available under permissive open-source licenses.},
language = {English},
urldate = {2023-04-05},
booktitle = {Proceedings of the {Twelfth} {Language} {Resources} and {Evaluation} {Conference}},
publisher = {European Language Resources Association},
author = {Lee, Jackson L. and Ashby, Lucas F.E. and Garza, M. Elizabeth and Lee-Sikka, Yeonju and Miller, Sean and Wong, Alan and McCarthy, Arya D. and Gorman, Kyle},
month = may,
year = {2020},
pages = {4223--4228},
}
Downloads: 0
{"_id":"ARkHkk4HEzmtmEDNn","bibbaseid":"lee-ashby-garza-leesikka-miller-wong-mccarthy-gorman-massivelymultilingualpronunciationmodelingwithwikipron-2020","author_short":["Lee, J. L.","Ashby, L. F.","Garza, M. E.","Lee-Sikka, Y.","Miller, S.","Wong, A.","McCarthy, A. D.","Gorman, K."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","address":"Marseille, France","title":"Massively Multilingual Pronunciation Modeling with WikiPron","isbn":"979-10-95546-34-4","url":"https://aclanthology.org/2020.lrec-1.521","abstract":"We introduce WikiPron, an open-source command-line tool for extracting pronunciation data from Wiktionary, a collaborative multilingual online dictionary. We first describe the design and use of WikiPron. We then discuss the challenges faced scaling this tool to create an automatically-generated database of 1.7 million pronunciations from 165 languages. Finally, we validate the pronunciation database by using it to train and evaluating a collection of generic grapheme-to-phoneme models. The software, pronunciation data, and models are all made available under permissive open-source licenses.","language":"English","urldate":"2023-04-05","booktitle":"Proceedings of the Twelfth Language Resources and Evaluation Conference","publisher":"European Language Resources Association","author":[{"propositions":[],"lastnames":["Lee"],"firstnames":["Jackson","L."],"suffixes":[]},{"propositions":[],"lastnames":["Ashby"],"firstnames":["Lucas","F.E."],"suffixes":[]},{"propositions":[],"lastnames":["Garza"],"firstnames":["M.","Elizabeth"],"suffixes":[]},{"propositions":[],"lastnames":["Lee-Sikka"],"firstnames":["Yeonju"],"suffixes":[]},{"propositions":[],"lastnames":["Miller"],"firstnames":["Sean"],"suffixes":[]},{"propositions":[],"lastnames":["Wong"],"firstnames":["Alan"],"suffixes":[]},{"propositions":[],"lastnames":["McCarthy"],"firstnames":["Arya","D."],"suffixes":[]},{"propositions":[],"lastnames":["Gorman"],"firstnames":["Kyle"],"suffixes":[]}],"month":"May","year":"2020","pages":"4223–4228","bibtex":"@inproceedings{lee_massively_2020,\n\taddress = {Marseille, France},\n\ttitle = {Massively {Multilingual} {Pronunciation} {Modeling} with {WikiPron}},\n\tisbn = {979-10-95546-34-4},\n\turl = {https://aclanthology.org/2020.lrec-1.521},\n\tabstract = {We introduce WikiPron, an open-source command-line tool for extracting pronunciation data from Wiktionary, a collaborative multilingual online dictionary. We first describe the design and use of WikiPron. We then discuss the challenges faced scaling this tool to create an automatically-generated database of 1.7 million pronunciations from 165 languages. Finally, we validate the pronunciation database by using it to train and evaluating a collection of generic grapheme-to-phoneme models. The software, pronunciation data, and models are all made available under permissive open-source licenses.},\n\tlanguage = {English},\n\turldate = {2023-04-05},\n\tbooktitle = {Proceedings of the {Twelfth} {Language} {Resources} and {Evaluation} {Conference}},\n\tpublisher = {European Language Resources Association},\n\tauthor = {Lee, Jackson L. and Ashby, Lucas F.E. and Garza, M. Elizabeth and Lee-Sikka, Yeonju and Miller, Sean and Wong, Alan and McCarthy, Arya D. and Gorman, Kyle},\n\tmonth = may,\n\tyear = {2020},\n\tpages = {4223--4228},\n}\n\n\n\n\n\n\n\n","author_short":["Lee, J. L.","Ashby, L. F.","Garza, M. E.","Lee-Sikka, Y.","Miller, S.","Wong, A.","McCarthy, A. D.","Gorman, K."],"key":"lee_massively_2020","id":"lee_massively_2020","bibbaseid":"lee-ashby-garza-leesikka-miller-wong-mccarthy-gorman-massivelymultilingualpronunciationmodelingwithwikipron-2020","role":"author","urls":{"Paper":"https://aclanthology.org/2020.lrec-1.521"},"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"https://bibbase.org/zotero/abhishek-p","dataSources":["h7kKWXpJh2iaX92T5"],"keywords":[],"search_terms":["massively","multilingual","pronunciation","modeling","wikipron","lee","ashby","garza","lee-sikka","miller","wong","mccarthy","gorman"],"title":"Massively Multilingual Pronunciation Modeling with WikiPron","year":2020}