A Multilingual Text Mining Approach Based on Self-Organizing Maps

A Multilingual Text Mining Approach Based on Self-Organizing Maps. Lee, C. & Yang, H. Applied Intelligence, 18(3):295-310, Ieee, 2003.

Website abstract bibtex

This paper describes our work on developing a language-independent technique for discovery of implicit knowledge from multilingual information sources. Text mining has been gaining popularity in the knowledge discovery field, particularity with the increasing availability of digital documents in various languages from all around the world. However, currently most text mining tools mainly focus only on processing monolingual documents (particularly English documents): little attention has been paid to apply the techniques to handle the documents in Asian languages, and further extend the mining algorithms to support the aspects of multilingual information sources. In this work, we attempt to develop a language-neutral method to tackle the linguistics difficulties in the text mining process. Using a variation of automatic clustering techniques, which apply a neural net approach, namely the Self-Organizing Maps (SOM), we have conducted several experiments to uncover associated documents based on a Chinese corpus, Chinese-English bilingual parallel corpora, and a hybrid Chinese-English corpus. The experiments show some interesting results and a couple of potential paths for future work in the field of multilingual information discovery. Besides, this work is expected to act as a starting point for exploring the impacts on linguistics issues with the machine-learning approach to mining sensible linguistics elements from multilingual text collections.

@article{
 title = {A Multilingual Text Mining Approach Based on Self-Organizing Maps},
 type = {article},
 year = {2003},
 identifiers = {[object Object]},
 pages = {295-310},
 volume = {18},
 websites = {http://portal.acm.org/citation.cfm?id=762622.762655&coll=portal&dl=GUIDE&CFID=39731654&CFTOKEN=77798259},
 publisher = {Ieee},
 id = {0cb22a23-1a70-380f-b41e-1c18bf6b3f06},
 created = {2012-02-09T21:39:35.000Z},
 file_attached = {false},
 profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
 group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
 last_modified = {2017-03-14T14:36:19.698Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Lee2003},
 private_publication = {false},
 abstract = {This paper describes our work on developing a language-independent technique for discovery of implicit knowledge from multilingual information sources. Text mining has been gaining popularity in the knowledge discovery field, particularity with the increasing availability of digital documents in various languages from all around the world. However, currently most text mining tools mainly focus only on processing monolingual documents (particularly English documents): little attention has been paid to apply the techniques to handle the documents in Asian languages, and further extend the mining algorithms to support the aspects of multilingual information sources. In this work, we attempt to develop a language-neutral method to tackle the linguistics difficulties in the text mining process. Using a variation of automatic clustering techniques, which apply a neural net approach, namely the Self-Organizing Maps (SOM), we have conducted several experiments to uncover associated documents based on a Chinese corpus, Chinese-English bilingual parallel corpora, and a hybrid Chinese-English corpus. The experiments show some interesting results and a couple of potential paths for future work in the field of multilingual information discovery. Besides, this work is expected to act as a starting point for exploring the impacts on linguistics issues with the machine-learning approach to mining sensible linguistics elements from multilingual text collections.},
 bibtype = {article},
 author = {Lee, Chung-Hong and Yang, Hsin-Chang},
 journal = {Applied Intelligence},
 number = {3}
}

Downloads: 0

{"_id":"Kmv9M9zPQNh6ZfkKm","bibbaseid":"lee-yang-amultilingualtextminingapproachbasedonselforganizingmaps-2003","authorIDs":[],"author_short":["Lee, C.","Yang, H."],"bibdata":{"title":"A Multilingual Text Mining Approach Based on Self-Organizing Maps","type":"article","year":"2003","identifiers":"[object Object]","pages":"295-310","volume":"18","websites":"http://portal.acm.org/citation.cfm?id=762622.762655&coll=portal&dl=GUIDE&CFID=39731654&CFTOKEN=77798259","publisher":"Ieee","id":"0cb22a23-1a70-380f-b41e-1c18bf6b3f06","created":"2012-02-09T21:39:35.000Z","file_attached":false,"profile_id":"5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6","group_id":"066b42c8-f712-3fc3-abb2-225c158d2704","last_modified":"2017-03-14T14:36:19.698Z","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"citation_key":"Lee2003","private_publication":false,"abstract":"This paper describes our work on developing a language-independent technique for discovery of implicit knowledge from multilingual information sources. Text mining has been gaining popularity in the knowledge discovery field, particularity with the increasing availability of digital documents in various languages from all around the world. However, currently most text mining tools mainly focus only on processing monolingual documents (particularly English documents): little attention has been paid to apply the techniques to handle the documents in Asian languages, and further extend the mining algorithms to support the aspects of multilingual information sources. In this work, we attempt to develop a language-neutral method to tackle the linguistics difficulties in the text mining process. Using a variation of automatic clustering techniques, which apply a neural net approach, namely the Self-Organizing Maps (SOM), we have conducted several experiments to uncover associated documents based on a Chinese corpus, Chinese-English bilingual parallel corpora, and a hybrid Chinese-English corpus. The experiments show some interesting results and a couple of potential paths for future work in the field of multilingual information discovery. Besides, this work is expected to act as a starting point for exploring the impacts on linguistics issues with the machine-learning approach to mining sensible linguistics elements from multilingual text collections.","bibtype":"article","author":"Lee, Chung-Hong and Yang, Hsin-Chang","journal":"Applied Intelligence","number":"3","bibtex":"@article{\n title = {A Multilingual Text Mining Approach Based on Self-Organizing Maps},\n type = {article},\n year = {2003},\n identifiers = {[object Object]},\n pages = {295-310},\n volume = {18},\n websites = {http://portal.acm.org/citation.cfm?id=762622.762655&coll=portal&dl=GUIDE&CFID=39731654&CFTOKEN=77798259},\n publisher = {Ieee},\n id = {0cb22a23-1a70-380f-b41e-1c18bf6b3f06},\n created = {2012-02-09T21:39:35.000Z},\n file_attached = {false},\n profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},\n group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},\n last_modified = {2017-03-14T14:36:19.698Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n citation_key = {Lee2003},\n private_publication = {false},\n abstract = {This paper describes our work on developing a language-independent technique for discovery of implicit knowledge from multilingual information sources. Text mining has been gaining popularity in the knowledge discovery field, particularity with the increasing availability of digital documents in various languages from all around the world. However, currently most text mining tools mainly focus only on processing monolingual documents (particularly English documents): little attention has been paid to apply the techniques to handle the documents in Asian languages, and further extend the mining algorithms to support the aspects of multilingual information sources. In this work, we attempt to develop a language-neutral method to tackle the linguistics difficulties in the text mining process. Using a variation of automatic clustering techniques, which apply a neural net approach, namely the Self-Organizing Maps (SOM), we have conducted several experiments to uncover associated documents based on a Chinese corpus, Chinese-English bilingual parallel corpora, and a hybrid Chinese-English corpus. The experiments show some interesting results and a couple of potential paths for future work in the field of multilingual information discovery. Besides, this work is expected to act as a starting point for exploring the impacts on linguistics issues with the machine-learning approach to mining sensible linguistics elements from multilingual text collections.},\n bibtype = {article},\n author = {Lee, Chung-Hong and Yang, Hsin-Chang},\n journal = {Applied Intelligence},\n number = {3}\n}","author_short":["Lee, C.","Yang, H."],"urls":{"Website":"http://portal.acm.org/citation.cfm?id=762622.762655&coll=portal&dl=GUIDE&CFID=39731654&CFTOKEN=77798259"},"bibbaseid":"lee-yang-amultilingualtextminingapproachbasedonselforganizingmaps-2003","role":"author","downloads":0,"html":""},"bibtype":"article","creationDate":"2020-02-06T23:48:11.979Z","downloads":0,"keywords":[],"search_terms":["multilingual","text","mining","approach","based","self","organizing","maps","lee","yang"],"title":"A Multilingual Text Mining Approach Based on Self-Organizing Maps","year":2003}