@article{ title = {An Evaluation of Topic Modelling Techniques for Twitter}, type = {article}, websites = {https://www.cs.toronto.edu/~jstolee/projects/topic.pdf}, id = {e0d247b8-f815-34eb-a55a-832250e28a0d}, created = {2018-02-05T17:31:26.539Z}, accessed = {2018-02-05}, file_attached = {true}, profile_id = {371589bb-c770-37ff-8193-93c6f25ffeb1}, group_id = {f982cd63-7ceb-3aa2-ac7e-a953963d6716}, last_modified = {2018-02-05T17:31:28.692Z}, read = {false}, starred = {false}, authored = {false}, confirmed = {false}, hidden = {false}, private_publication = {false}, abstract = {In this paper, we complete an evaluation of various topic modelling algorithms, and examine their performance when working with Twitter tweets. LDA [1] is an algorithm that is often used when modelling topics within text, and it has been proven to be affective; however, LDA may not necessarily perform well when working with documents that are short in length [2, 3, 4]. We compare LDA to three models which offer potential improvements over the downfalls of LDA when modelling tweets. This includes a variation of LDA, referred to as LDA-U, which aggregates data on a user-basis in an effort to improve the standard LDA model's performance[3]. We also evaluate two other models specifically designed to work with short text: the " biterm topic model " (BTM), and a " word2vec Gaussian mix-ture model " , which models topics as a distribution over words in semantic space [4].}, bibtype = {article}, author = {Jónsson, Elías and Stolee, Jake} }