Empirical Study of Topic Modeling in Twitter

Empirical Study of Topic Modeling in Twitter. Hong, L. & Davison, B., D.

Social networks such as Facebook, LinkedIn, and Twitter have been a crucial source of information for a wide spectrum of users. In Twitter, popular information that is deemed important by the com-munity propagates through the network. Studying the character-istics of content in the messages becomes important for a number of tasks, such as breaking news detection, personalized message recommendation, friends recommendation, sentiment analysis and others. While many researchers wish to use standard text mining tools to understand messages on Twitter, the restricted length of those messages prevents them from being employed to their full potential. We address the problem of using standard topic models in micro-blogging environments by studying how the models can be trained on the dataset. We propose several schemes to train a standard topic model and compare their quality and effectiveness through a set of carefully designed experiments from both qualitative and quantitative perspectives. We show that by training a topic model on aggregated messages we can obtain a higher quality of learned model which results in significantly better performance in two real-world classification problems. We also discuss how the state-of-the-art Author-Topic model fails to model hierarchical relationships between entities in Social Media.

@article{
 title = {Empirical Study of Topic Modeling in Twitter},
 type = {article},
 keywords = {Design,Experimentation Keywords Twitter,Social Media,Topic Models},
 websites = {http://snap.stanford.edu/soma2010/papers/soma2010_12.pdf},
 id = {653d9cf0-971d-3c3d-8f05-346896d8bb03},
 created = {2018-02-05T17:30:45.709Z},
 accessed = {2018-02-05},
 file_attached = {true},
 profile_id = {371589bb-c770-37ff-8193-93c6f25ffeb1},
 group_id = {f982cd63-7ceb-3aa2-ac7e-a953963d6716},
 last_modified = {2018-02-05T17:30:50.859Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {false},
 hidden = {false},
 private_publication = {false},
 abstract = {Social networks such as Facebook, LinkedIn, and Twitter have been a crucial source of information for a wide spectrum of users. In Twitter, popular information that is deemed important by the com-munity propagates through the network. Studying the character-istics of content in the messages becomes important for a number of tasks, such as breaking news detection, personalized message recommendation, friends recommendation, sentiment analysis and others. While many researchers wish to use standard text mining tools to understand messages on Twitter, the restricted length of those messages prevents them from being employed to their full potential. We address the problem of using standard topic models in micro-blogging environments by studying how the models can be trained on the dataset. We propose several schemes to train a standard topic model and compare their quality and effectiveness through a set of carefully designed experiments from both qualitative and quantitative perspectives. We show that by training a topic model on aggregated messages we can obtain a higher quality of learned model which results in significantly better performance in two real-world classification problems. We also discuss how the state-of-the-art Author-Topic model fails to model hierarchical relationships between entities in Social Media.},
 bibtype = {article},
 author = {Hong, Liangjie and Davison, Brian D}
}

Downloads: 0

{"_id":"SjJn5gsHmgCWs2tGA","bibbaseid":"hong-davison-empiricalstudyoftopicmodelingintwitter","downloads":0,"creationDate":"2018-02-07T16:22:57.284Z","title":"Empirical Study of Topic Modeling in Twitter","author_short":["Hong, L.","Davison, B., D."],"year":null,"bibtype":"article","biburl":null,"bibdata":{"title":"Empirical Study of Topic Modeling in Twitter","type":"article","keywords":"Design,Experimentation Keywords Twitter,Social Media,Topic Models","websites":"http://snap.stanford.edu/soma2010/papers/soma2010_12.pdf","id":"653d9cf0-971d-3c3d-8f05-346896d8bb03","created":"2018-02-05T17:30:45.709Z","accessed":"2018-02-05","file_attached":"true","profile_id":"371589bb-c770-37ff-8193-93c6f25ffeb1","group_id":"f982cd63-7ceb-3aa2-ac7e-a953963d6716","last_modified":"2018-02-05T17:30:50.859Z","read":false,"starred":false,"authored":false,"confirmed":false,"hidden":false,"private_publication":false,"abstract":"Social networks such as Facebook, LinkedIn, and Twitter have been a crucial source of information for a wide spectrum of users. In Twitter, popular information that is deemed important by the com-munity propagates through the network. Studying the character-istics of content in the messages becomes important for a number of tasks, such as breaking news detection, personalized message recommendation, friends recommendation, sentiment analysis and others. While many researchers wish to use standard text mining tools to understand messages on Twitter, the restricted length of those messages prevents them from being employed to their full potential. We address the problem of using standard topic models in micro-blogging environments by studying how the models can be trained on the dataset. We propose several schemes to train a standard topic model and compare their quality and effectiveness through a set of carefully designed experiments from both qualitative and quantitative perspectives. We show that by training a topic model on aggregated messages we can obtain a higher quality of learned model which results in significantly better performance in two real-world classification problems. We also discuss how the state-of-the-art Author-Topic model fails to model hierarchical relationships between entities in Social Media.","bibtype":"article","author":"Hong, Liangjie and Davison, Brian D","bibtex":"@article{\n title = {Empirical Study of Topic Modeling in Twitter},\n type = {article},\n keywords = {Design,Experimentation Keywords Twitter,Social Media,Topic Models},\n websites = {http://snap.stanford.edu/soma2010/papers/soma2010_12.pdf},\n id = {653d9cf0-971d-3c3d-8f05-346896d8bb03},\n created = {2018-02-05T17:30:45.709Z},\n accessed = {2018-02-05},\n file_attached = {true},\n profile_id = {371589bb-c770-37ff-8193-93c6f25ffeb1},\n group_id = {f982cd63-7ceb-3aa2-ac7e-a953963d6716},\n last_modified = {2018-02-05T17:30:50.859Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {false},\n hidden = {false},\n private_publication = {false},\n abstract = {Social networks such as Facebook, LinkedIn, and Twitter have been a crucial source of information for a wide spectrum of users. In Twitter, popular information that is deemed important by the com-munity propagates through the network. Studying the character-istics of content in the messages becomes important for a number of tasks, such as breaking news detection, personalized message recommendation, friends recommendation, sentiment analysis and others. While many researchers wish to use standard text mining tools to understand messages on Twitter, the restricted length of those messages prevents them from being employed to their full potential. We address the problem of using standard topic models in micro-blogging environments by studying how the models can be trained on the dataset. We propose several schemes to train a standard topic model and compare their quality and effectiveness through a set of carefully designed experiments from both qualitative and quantitative perspectives. We show that by training a topic model on aggregated messages we can obtain a higher quality of learned model which results in significantly better performance in two real-world classification problems. We also discuss how the state-of-the-art Author-Topic model fails to model hierarchical relationships between entities in Social Media.},\n bibtype = {article},\n author = {Hong, Liangjie and Davison, Brian D}\n}","author_short":["Hong, L.","Davison, B., D."],"urls":{"Paper":"http://bibbase.org/service/mendeley/371589bb-c770-37ff-8193-93c6f25ffeb1/file/4cafdfa1-7ab7-9f1f-0733-514a9ab57f47/Empirical_Study_of_Topic_Modeling_in_Twitter.pdf.pdf","Website":"http://snap.stanford.edu/soma2010/papers/soma2010_12.pdf"},"bibbaseid":"hong-davison-empiricalstudyoftopicmodelingintwitter","role":"author","keyword":["Design","Experimentation Keywords Twitter","Social Media","Topic Models"],"downloads":0,"html":""},"search_terms":["empirical","study","topic","modeling","twitter","hong","davison"],"keywords":["design","experimentation keywords twitter","social media","topic models"],"authorIDs":[]}