An Evaluation of Topic Modelling Techniques for Twitter

An Evaluation of Topic Modelling Techniques for Twitter. Jónsson, E. & Stolee, J.

In this paper, we complete an evaluation of various topic modelling algorithms, and examine their performance when working with Twitter tweets. LDA [1] is an algorithm that is often used when modelling topics within text, and it has been proven to be affective; however, LDA may not necessarily perform well when working with documents that are short in length [2, 3, 4]. We compare LDA to three models which offer potential improvements over the downfalls of LDA when modelling tweets. This includes a variation of LDA, referred to as LDA-U, which aggregates data on a user-basis in an effort to improve the standard LDA model's performance[3]. We also evaluate two other models specifically designed to work with short text: the " biterm topic model " (BTM), and a " word2vec Gaussian mix-ture model " , which models topics as a distribution over words in semantic space [4].

@article{
 title = {An Evaluation of Topic Modelling Techniques for Twitter},
 type = {article},
 websites = {https://www.cs.toronto.edu/~jstolee/projects/topic.pdf},
 id = {e0d247b8-f815-34eb-a55a-832250e28a0d},
 created = {2018-02-05T17:31:26.539Z},
 accessed = {2018-02-05},
 file_attached = {true},
 profile_id = {371589bb-c770-37ff-8193-93c6f25ffeb1},
 group_id = {f982cd63-7ceb-3aa2-ac7e-a953963d6716},
 last_modified = {2018-02-05T17:31:28.692Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {false},
 hidden = {false},
 private_publication = {false},
 abstract = {In this paper, we complete an evaluation of various topic modelling algorithms, and examine their performance when working with Twitter tweets. LDA [1] is an algorithm that is often used when modelling topics within text, and it has been proven to be affective; however, LDA may not necessarily perform well when working with documents that are short in length [2, 3, 4]. We compare LDA to three models which offer potential improvements over the downfalls of LDA when modelling tweets. This includes a variation of LDA, referred to as LDA-U, which aggregates data on a user-basis in an effort to improve the standard LDA model's performance[3]. We also evaluate two other models specifically designed to work with short text: the " biterm topic model " (BTM), and a " word2vec Gaussian mix-ture model " , which models topics as a distribution over words in semantic space [4].},
 bibtype = {article},
 author = {Jónsson, Elías and Stolee, Jake}
}

Downloads: 0

{"_id":"hmZdvnAPdEbF8Zmib","bibbaseid":"jnsson-stolee-anevaluationoftopicmodellingtechniquesfortwitter","downloads":0,"creationDate":"2018-02-07T16:22:57.286Z","title":"An Evaluation of Topic Modelling Techniques for Twitter","author_short":["Jónsson, E.","Stolee, J."],"year":null,"bibtype":"article","biburl":null,"bibdata":{"title":"An Evaluation of Topic Modelling Techniques for Twitter","type":"article","websites":"https://www.cs.toronto.edu/~jstolee/projects/topic.pdf","id":"e0d247b8-f815-34eb-a55a-832250e28a0d","created":"2018-02-05T17:31:26.539Z","accessed":"2018-02-05","file_attached":"true","profile_id":"371589bb-c770-37ff-8193-93c6f25ffeb1","group_id":"f982cd63-7ceb-3aa2-ac7e-a953963d6716","last_modified":"2018-02-05T17:31:28.692Z","read":false,"starred":false,"authored":false,"confirmed":false,"hidden":false,"private_publication":false,"abstract":"In this paper, we complete an evaluation of various topic modelling algorithms, and examine their performance when working with Twitter tweets. LDA [1] is an algorithm that is often used when modelling topics within text, and it has been proven to be affective; however, LDA may not necessarily perform well when working with documents that are short in length [2, 3, 4]. We compare LDA to three models which offer potential improvements over the downfalls of LDA when modelling tweets. This includes a variation of LDA, referred to as LDA-U, which aggregates data on a user-basis in an effort to improve the standard LDA model's performance[3]. We also evaluate two other models specifically designed to work with short text: the \" biterm topic model \" (BTM), and a \" word2vec Gaussian mix-ture model \" , which models topics as a distribution over words in semantic space [4].","bibtype":"article","author":"Jónsson, Elías and Stolee, Jake","bibtex":"@article{\n title = {An Evaluation of Topic Modelling Techniques for Twitter},\n type = {article},\n websites = {https://www.cs.toronto.edu/~jstolee/projects/topic.pdf},\n id = {e0d247b8-f815-34eb-a55a-832250e28a0d},\n created = {2018-02-05T17:31:26.539Z},\n accessed = {2018-02-05},\n file_attached = {true},\n profile_id = {371589bb-c770-37ff-8193-93c6f25ffeb1},\n group_id = {f982cd63-7ceb-3aa2-ac7e-a953963d6716},\n last_modified = {2018-02-05T17:31:28.692Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {false},\n hidden = {false},\n private_publication = {false},\n abstract = {In this paper, we complete an evaluation of various topic modelling algorithms, and examine their performance when working with Twitter tweets. LDA [1] is an algorithm that is often used when modelling topics within text, and it has been proven to be affective; however, LDA may not necessarily perform well when working with documents that are short in length [2, 3, 4]. We compare LDA to three models which offer potential improvements over the downfalls of LDA when modelling tweets. This includes a variation of LDA, referred to as LDA-U, which aggregates data on a user-basis in an effort to improve the standard LDA model's performance[3]. We also evaluate two other models specifically designed to work with short text: the \" biterm topic model \" (BTM), and a \" word2vec Gaussian mix-ture model \" , which models topics as a distribution over words in semantic space [4].},\n bibtype = {article},\n author = {Jónsson, Elías and Stolee, Jake}\n}","author_short":["Jónsson, E.","Stolee, J."],"urls":{"Paper":"http://bibbase.org/service/mendeley/371589bb-c770-37ff-8193-93c6f25ffeb1/file/14661a6d-1683-be81-5e8e-0b03bb0597e3/An_Evaluation_of_Topic_Modelling_Techniques_for_Twitter.pdf.pdf","Website":"https://www.cs.toronto.edu/~jstolee/projects/topic.pdf"},"bibbaseid":"jnsson-stolee-anevaluationoftopicmodellingtechniquesfortwitter","role":"author","downloads":0},"search_terms":["evaluation","topic","modelling","techniques","twitter","jónsson","stolee"],"keywords":[],"authorIDs":[]}