Improving Topic Models with Latent Feature Word Representations. Nguyen, D., Q., Billingsley, R., Du, L., & Johnson, M.
Improving Topic Models with Latent Feature Word Representations [pdf]Paper  Improving Topic Models with Latent Feature Word Representations [link]Website  abstract   bibtex   
Probabilistic topic models are widely used to discover latent topics in document collec-tions, while latent feature vector representa-tions of words have been used to obtain high performance in many NLP tasks. In this pa-per, we extend two different Dirichlet multino-mial topic models by incorporating latent fea-ture vector representations of words trained on very large corpora to improve the word-topic mapping learnt on a smaller corpus. Exper-imental results show that by using informa-tion from the external corpora, our new mod-els produce significant improvements on topic coherence, document clustering and document classification tasks, especially on datasets with few or short documents.
@article{
 title = {Improving Topic Models with Latent Feature Word Representations},
 type = {article},
 websites = {http://www.aclweb.org/anthology/Q15-1022},
 id = {acbebb30-8df5-30e2-a458-dea549ec07ad},
 created = {2018-02-05T17:43:12.729Z},
 accessed = {2018-02-05},
 file_attached = {true},
 profile_id = {371589bb-c770-37ff-8193-93c6f25ffeb1},
 group_id = {f982cd63-7ceb-3aa2-ac7e-a953963d6716},
 last_modified = {2018-02-05T17:43:15.229Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {false},
 hidden = {false},
 private_publication = {false},
 abstract = {Probabilistic topic models are widely used to discover latent topics in document collec-tions, while latent feature vector representa-tions of words have been used to obtain high performance in many NLP tasks. In this pa-per, we extend two different Dirichlet multino-mial topic models by incorporating latent fea-ture vector representations of words trained on very large corpora to improve the word-topic mapping learnt on a smaller corpus. Exper-imental results show that by using informa-tion from the external corpora, our new mod-els produce significant improvements on topic coherence, document clustering and document classification tasks, especially on datasets with few or short documents.},
 bibtype = {article},
 author = {Nguyen, Dat Quoc and Billingsley, Richard and Du, Lan and Johnson, Mark}
}
Downloads: 0