Empirical Study of Topic Modeling in Twitter. Hong, L. & Davison, B., D.
Empirical Study of Topic Modeling in Twitter [pdf]Paper  Empirical Study of Topic Modeling in Twitter [pdf]Website  abstract   bibtex   
Social networks such as Facebook, LinkedIn, and Twitter have been a crucial source of information for a wide spectrum of users. In Twitter, popular information that is deemed important by the com-munity propagates through the network. Studying the character-istics of content in the messages becomes important for a number of tasks, such as breaking news detection, personalized message recommendation, friends recommendation, sentiment analysis and others. While many researchers wish to use standard text mining tools to understand messages on Twitter, the restricted length of those messages prevents them from being employed to their full potential. We address the problem of using standard topic models in micro-blogging environments by studying how the models can be trained on the dataset. We propose several schemes to train a standard topic model and compare their quality and effectiveness through a set of carefully designed experiments from both qualitative and quantitative perspectives. We show that by training a topic model on aggregated messages we can obtain a higher quality of learned model which results in significantly better performance in two real-world classification problems. We also discuss how the state-of-the-art Author-Topic model fails to model hierarchical relationships between entities in Social Media.
@article{
 title = {Empirical Study of Topic Modeling in Twitter},
 type = {article},
 keywords = {Design,Experimentation Keywords Twitter,Social Media,Topic Models},
 websites = {http://snap.stanford.edu/soma2010/papers/soma2010_12.pdf},
 id = {653d9cf0-971d-3c3d-8f05-346896d8bb03},
 created = {2018-02-05T17:30:45.709Z},
 accessed = {2018-02-05},
 file_attached = {true},
 profile_id = {371589bb-c770-37ff-8193-93c6f25ffeb1},
 group_id = {f982cd63-7ceb-3aa2-ac7e-a953963d6716},
 last_modified = {2018-02-05T17:30:50.859Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {false},
 hidden = {false},
 private_publication = {false},
 abstract = {Social networks such as Facebook, LinkedIn, and Twitter have been a crucial source of information for a wide spectrum of users. In Twitter, popular information that is deemed important by the com-munity propagates through the network. Studying the character-istics of content in the messages becomes important for a number of tasks, such as breaking news detection, personalized message recommendation, friends recommendation, sentiment analysis and others. While many researchers wish to use standard text mining tools to understand messages on Twitter, the restricted length of those messages prevents them from being employed to their full potential. We address the problem of using standard topic models in micro-blogging environments by studying how the models can be trained on the dataset. We propose several schemes to train a standard topic model and compare their quality and effectiveness through a set of carefully designed experiments from both qualitative and quantitative perspectives. We show that by training a topic model on aggregated messages we can obtain a higher quality of learned model which results in significantly better performance in two real-world classification problems. We also discuss how the state-of-the-art Author-Topic model fails to model hierarchical relationships between entities in Social Media.},
 bibtype = {article},
 author = {Hong, Liangjie and Davison, Brian D}
}
Downloads: 0