Named entity transliteration with comparable corpora

Named entity transliteration with comparable corpora. Sproat, R., Tao, T., & Zhai, C. In Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the Association for Computational Linguistics, pages 73-80, 2006. Association for Computational Linguistics.

Website abstract bibtex

In this paper we investigate Chinese-English name transliteration using comparable corpora, corpora where texts in the two languages deal in some of the same topics --- and therefore share references to named entities --- but are not translations of each other. We present two distinct methods for transliteration, one approach using phonetic transliteration, and the second using the temporal distribution of candidate pairs. Each of these approaches works quite well, but by combining the approaches one can achieve even better results. We then propose a novel score propagation method that utilizes the co-occurrence of transliteration pairs within document pairs. This propagation method achieves further improvement over the best results from the previous step.

@inProceedings{
 title = {Named entity transliteration with comparable corpora},
 type = {inProceedings},
 year = {2006},
 identifiers = {[object Object]},
 pages = {73-80},
 issue = {July},
 websites = {http://portal.acm.org/citation.cfm?id=1220185&amp;dl=GUIDE,},
 publisher = {Association for Computational Linguistics},
 id = {c13fc75a-09d1-37c3-aa0a-0b5e2058e729},
 created = {2011-12-28T07:04:55.000Z},
 file_attached = {false},
 profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
 group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
 last_modified = {2017-03-14T14:36:19.698Z},
 tags = {named entities},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Sproat2006},
 private_publication = {false},
 abstract = {In this paper we investigate Chinese-English name transliteration using comparable corpora, corpora where texts in the two languages deal in some of the same topics --- and therefore share references to named entities --- but are not translations of each other. We present two distinct methods for transliteration, one approach using phonetic transliteration, and the second using the temporal distribution of candidate pairs. Each of these approaches works quite well, but by combining the approaches one can achieve even better results. We then propose a novel score propagation method that utilizes the co-occurrence of transliteration pairs within document pairs. This propagation method achieves further improvement over the best results from the previous step.},
 bibtype = {inProceedings},
 author = {Sproat, Richard and Tao, Tao and Zhai, ChengXiang},
 booktitle = {Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the Association for Computational Linguistics}
}

Downloads: 0

{"_id":"eJoP7tk6QyZfnq3ZJ","bibbaseid":"sproat-tao-zhai-namedentitytransliterationwithcomparablecorpora-2006","authorIDs":[],"author_short":["Sproat, R.","Tao, T.","Zhai, C."],"bibdata":{"title":"Named entity transliteration with comparable corpora","type":"inProceedings","year":"2006","identifiers":"[object Object]","pages":"73-80","issue":"July","websites":"http://portal.acm.org/citation.cfm?id=1220185&dl=GUIDE,","publisher":"Association for Computational Linguistics","id":"c13fc75a-09d1-37c3-aa0a-0b5e2058e729","created":"2011-12-28T07:04:55.000Z","file_attached":false,"profile_id":"5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6","group_id":"066b42c8-f712-3fc3-abb2-225c158d2704","last_modified":"2017-03-14T14:36:19.698Z","tags":"named entities","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"citation_key":"Sproat2006","private_publication":false,"abstract":"In this paper we investigate Chinese-English name transliteration using comparable corpora, corpora where texts in the two languages deal in some of the same topics --- and therefore share references to named entities --- but are not translations of each other. We present two distinct methods for transliteration, one approach using phonetic transliteration, and the second using the temporal distribution of candidate pairs. Each of these approaches works quite well, but by combining the approaches one can achieve even better results. We then propose a novel score propagation method that utilizes the co-occurrence of transliteration pairs within document pairs. This propagation method achieves further improvement over the best results from the previous step.","bibtype":"inProceedings","author":"Sproat, Richard and Tao, Tao and Zhai, ChengXiang","booktitle":"Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the Association for Computational Linguistics","bibtex":"@inProceedings{\n title = {Named entity transliteration with comparable corpora},\n type = {inProceedings},\n year = {2006},\n identifiers = {[object Object]},\n pages = {73-80},\n issue = {July},\n websites = {http://portal.acm.org/citation.cfm?id=1220185&dl=GUIDE,},\n publisher = {Association for Computational Linguistics},\n id = {c13fc75a-09d1-37c3-aa0a-0b5e2058e729},\n created = {2011-12-28T07:04:55.000Z},\n file_attached = {false},\n profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},\n group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},\n last_modified = {2017-03-14T14:36:19.698Z},\n tags = {named entities},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n citation_key = {Sproat2006},\n private_publication = {false},\n abstract = {In this paper we investigate Chinese-English name transliteration using comparable corpora, corpora where texts in the two languages deal in some of the same topics --- and therefore share references to named entities --- but are not translations of each other. We present two distinct methods for transliteration, one approach using phonetic transliteration, and the second using the temporal distribution of candidate pairs. Each of these approaches works quite well, but by combining the approaches one can achieve even better results. We then propose a novel score propagation method that utilizes the co-occurrence of transliteration pairs within document pairs. This propagation method achieves further improvement over the best results from the previous step.},\n bibtype = {inProceedings},\n author = {Sproat, Richard and Tao, Tao and Zhai, ChengXiang},\n booktitle = {Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the Association for Computational Linguistics}\n}","author_short":["Sproat, R.","Tao, T.","Zhai, C."],"urls":{"Website":"http://portal.acm.org/citation.cfm?id=1220185&dl=GUIDE,"},"bibbaseid":"sproat-tao-zhai-namedentitytransliterationwithcomparablecorpora-2006","role":"author","downloads":0,"html":""},"bibtype":"inProceedings","creationDate":"2020-02-06T23:48:11.827Z","downloads":0,"keywords":[],"search_terms":["named","entity","transliteration","comparable","corpora","sproat","tao","zhai"],"title":"Named entity transliteration with comparable corpora","year":2006}