Improving Topic Models with Latent Feature Word Representations

Improving Topic Models with Latent Feature Word Representations. Nguyen, D., Q., Billingsley, R., Du, L., & Johnson, M.

Improving Topic Models with Latent Feature Word Representations [link]

Probabilistic topic models are widely used to discover latent topics in document collec-tions, while latent feature vector representa-tions of words have been used to obtain high performance in many NLP tasks. In this pa-per, we extend two different Dirichlet multino-mial topic models by incorporating latent fea-ture vector representations of words trained on very large corpora to improve the word-topic mapping learnt on a smaller corpus. Exper-imental results show that by using informa-tion from the external corpora, our new mod-els produce significant improvements on topic coherence, document clustering and document classification tasks, especially on datasets with few or short documents.

@article{
 title = {Improving Topic Models with Latent Feature Word Representations},
 type = {article},
 websites = {http://www.aclweb.org/anthology/Q15-1022},
 id = {acbebb30-8df5-30e2-a458-dea549ec07ad},
 created = {2018-02-05T17:43:12.729Z},
 accessed = {2018-02-05},
 file_attached = {true},
 profile_id = {371589bb-c770-37ff-8193-93c6f25ffeb1},
 group_id = {f982cd63-7ceb-3aa2-ac7e-a953963d6716},
 last_modified = {2018-02-05T17:43:15.229Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {false},
 hidden = {false},
 private_publication = {false},
 abstract = {Probabilistic topic models are widely used to discover latent topics in document collec-tions, while latent feature vector representa-tions of words have been used to obtain high performance in many NLP tasks. In this pa-per, we extend two different Dirichlet multino-mial topic models by incorporating latent fea-ture vector representations of words trained on very large corpora to improve the word-topic mapping learnt on a smaller corpus. Exper-imental results show that by using informa-tion from the external corpora, our new mod-els produce significant improvements on topic coherence, document clustering and document classification tasks, especially on datasets with few or short documents.},
 bibtype = {article},
 author = {Nguyen, Dat Quoc and Billingsley, Richard and Du, Lan and Johnson, Mark}
}

Downloads: 0

{"_id":"nqHECXbNxqY7sDPhY","bibbaseid":"nguyen-billingsley-du-johnson-improvingtopicmodelswithlatentfeaturewordrepresentations","downloads":0,"creationDate":"2018-02-07T16:22:57.300Z","title":"Improving Topic Models with Latent Feature Word Representations","author_short":["Nguyen, D., Q.","Billingsley, R.","Du, L.","Johnson, M."],"year":null,"bibtype":"article","biburl":null,"bibdata":{"title":"Improving Topic Models with Latent Feature Word Representations","type":"article","websites":"http://www.aclweb.org/anthology/Q15-1022","id":"acbebb30-8df5-30e2-a458-dea549ec07ad","created":"2018-02-05T17:43:12.729Z","accessed":"2018-02-05","file_attached":"true","profile_id":"371589bb-c770-37ff-8193-93c6f25ffeb1","group_id":"f982cd63-7ceb-3aa2-ac7e-a953963d6716","last_modified":"2018-02-05T17:43:15.229Z","read":false,"starred":false,"authored":false,"confirmed":false,"hidden":false,"private_publication":false,"abstract":"Probabilistic topic models are widely used to discover latent topics in document collec-tions, while latent feature vector representa-tions of words have been used to obtain high performance in many NLP tasks. In this pa-per, we extend two different Dirichlet multino-mial topic models by incorporating latent fea-ture vector representations of words trained on very large corpora to improve the word-topic mapping learnt on a smaller corpus. Exper-imental results show that by using informa-tion from the external corpora, our new mod-els produce significant improvements on topic coherence, document clustering and document classification tasks, especially on datasets with few or short documents.","bibtype":"article","author":"Nguyen, Dat Quoc and Billingsley, Richard and Du, Lan and Johnson, Mark","bibtex":"@article{\n title = {Improving Topic Models with Latent Feature Word Representations},\n type = {article},\n websites = {http://www.aclweb.org/anthology/Q15-1022},\n id = {acbebb30-8df5-30e2-a458-dea549ec07ad},\n created = {2018-02-05T17:43:12.729Z},\n accessed = {2018-02-05},\n file_attached = {true},\n profile_id = {371589bb-c770-37ff-8193-93c6f25ffeb1},\n group_id = {f982cd63-7ceb-3aa2-ac7e-a953963d6716},\n last_modified = {2018-02-05T17:43:15.229Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {false},\n hidden = {false},\n private_publication = {false},\n abstract = {Probabilistic topic models are widely used to discover latent topics in document collec-tions, while latent feature vector representa-tions of words have been used to obtain high performance in many NLP tasks. In this pa-per, we extend two different Dirichlet multino-mial topic models by incorporating latent fea-ture vector representations of words trained on very large corpora to improve the word-topic mapping learnt on a smaller corpus. Exper-imental results show that by using informa-tion from the external corpora, our new mod-els produce significant improvements on topic coherence, document clustering and document classification tasks, especially on datasets with few or short documents.},\n bibtype = {article},\n author = {Nguyen, Dat Quoc and Billingsley, Richard and Du, Lan and Johnson, Mark}\n}","author_short":["Nguyen, D., Q.","Billingsley, R.","Du, L.","Johnson, M."],"urls":{"Paper":"http://bibbase.org/service/mendeley/371589bb-c770-37ff-8193-93c6f25ffeb1/file/4d6bd5b8-82a5-afe5-73c2-983887980d2f/Improving_Topic_Models_with_Latent_Feature_Word_Representations.pdf.pdf","Website":"http://www.aclweb.org/anthology/Q15-1022"},"bibbaseid":"nguyen-billingsley-du-johnson-improvingtopicmodelswithlatentfeaturewordrepresentations","role":"author","downloads":0,"html":""},"search_terms":["improving","topic","models","latent","feature","word","representations","nguyen","billingsley","du","johnson"],"keywords":[],"authorIDs":[]}