Topical Clustering of Tweets

Topical Clustering of Tweets. Rosa, K., D., Shah, R., Lin, B., Gershman, A., & Frederking, R.

In the emerging field of micro-blogging and social communication services, users post millions of short messages every day. Keeping track of all the messages posted by your friends and the conversation as a whole can become tedious or even impossible. In this paper, we presented a study on automatically clustering and classifying Twitter messages, also known as " tweets " , into different categories, inspired by the approaches taken by news aggregating services like Google News. Our results suggest that the clusters produced by traditional unsupervised methods can often be incoherent from a topical perspective, but utilizing a supervised methodology that utilize the hash-tags as indicators of topics produce surprisingly good results. We also offer a discussion on temporal effects of our methodology and training set size considerations. Lastly, we describe a simple method of finding the most representative tweet in a cluster, and provide an analysis of the results.

@article{
 title = {Topical Clustering of Tweets},
 type = {article},
 keywords = {Clustering,Experimentation Keywords Social Media,Information Search and Retrieval – clustering,Microblog Analysis,Summarization,information filtering,selection process I27 [Artificial Intelligence]},
 websites = {http://www.cs.cmu.edu/~kdelaros/sigir-swsm-2011.pdf},
 id = {56722589-ffeb-3144-a964-eb5234299802},
 created = {2018-02-05T17:43:55.085Z},
 accessed = {2018-02-05},
 file_attached = {true},
 profile_id = {371589bb-c770-37ff-8193-93c6f25ffeb1},
 group_id = {f982cd63-7ceb-3aa2-ac7e-a953963d6716},
 last_modified = {2018-02-05T17:44:00.502Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {false},
 hidden = {false},
 private_publication = {false},
 abstract = {In the emerging field of micro-blogging and social communication services, users post millions of short messages every day. Keeping track of all the messages posted by your friends and the conversation as a whole can become tedious or even impossible. In this paper, we presented a study on automatically clustering and classifying Twitter messages, also known as " tweets " , into different categories, inspired by the approaches taken by news aggregating services like Google News. Our results suggest that the clusters produced by traditional unsupervised methods can often be incoherent from a topical perspective, but utilizing a supervised methodology that utilize the hash-tags as indicators of topics produce surprisingly good results. We also offer a discussion on temporal effects of our methodology and training set size considerations. Lastly, we describe a simple method of finding the most representative tweet in a cluster, and provide an analysis of the results.},
 bibtype = {article},
 author = {Rosa, Kevin Dela and Shah, Rushin and Lin, Bo and Gershman, Anatole and Frederking, Robert}
}

Downloads: 0

{"_id":"tApTyPjebMCw664Dx","bibbaseid":"rosa-shah-lin-gershman-frederking-topicalclusteringoftweets","downloads":0,"creationDate":"2018-02-07T16:22:57.301Z","title":"Topical Clustering of Tweets","author_short":["Rosa, K., D.","Shah, R.","Lin, B.","Gershman, A.","Frederking, R."],"year":null,"bibtype":"article","biburl":null,"bibdata":{"title":"Topical Clustering of Tweets","type":"article","keywords":"Clustering,Experimentation Keywords Social Media,Information Search and Retrieval – clustering,Microblog Analysis,Summarization,information filtering,selection process I27 [Artificial Intelligence]","websites":"http://www.cs.cmu.edu/~kdelaros/sigir-swsm-2011.pdf","id":"56722589-ffeb-3144-a964-eb5234299802","created":"2018-02-05T17:43:55.085Z","accessed":"2018-02-05","file_attached":"true","profile_id":"371589bb-c770-37ff-8193-93c6f25ffeb1","group_id":"f982cd63-7ceb-3aa2-ac7e-a953963d6716","last_modified":"2018-02-05T17:44:00.502Z","read":false,"starred":false,"authored":false,"confirmed":false,"hidden":false,"private_publication":false,"abstract":"In the emerging field of micro-blogging and social communication services, users post millions of short messages every day. Keeping track of all the messages posted by your friends and the conversation as a whole can become tedious or even impossible. In this paper, we presented a study on automatically clustering and classifying Twitter messages, also known as \" tweets \" , into different categories, inspired by the approaches taken by news aggregating services like Google News. Our results suggest that the clusters produced by traditional unsupervised methods can often be incoherent from a topical perspective, but utilizing a supervised methodology that utilize the hash-tags as indicators of topics produce surprisingly good results. We also offer a discussion on temporal effects of our methodology and training set size considerations. Lastly, we describe a simple method of finding the most representative tweet in a cluster, and provide an analysis of the results.","bibtype":"article","author":"Rosa, Kevin Dela and Shah, Rushin and Lin, Bo and Gershman, Anatole and Frederking, Robert","bibtex":"@article{\n title = {Topical Clustering of Tweets},\n type = {article},\n keywords = {Clustering,Experimentation Keywords Social Media,Information Search and Retrieval – clustering,Microblog Analysis,Summarization,information filtering,selection process I27 [Artificial Intelligence]},\n websites = {http://www.cs.cmu.edu/~kdelaros/sigir-swsm-2011.pdf},\n id = {56722589-ffeb-3144-a964-eb5234299802},\n created = {2018-02-05T17:43:55.085Z},\n accessed = {2018-02-05},\n file_attached = {true},\n profile_id = {371589bb-c770-37ff-8193-93c6f25ffeb1},\n group_id = {f982cd63-7ceb-3aa2-ac7e-a953963d6716},\n last_modified = {2018-02-05T17:44:00.502Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {false},\n hidden = {false},\n private_publication = {false},\n abstract = {In the emerging field of micro-blogging and social communication services, users post millions of short messages every day. Keeping track of all the messages posted by your friends and the conversation as a whole can become tedious or even impossible. In this paper, we presented a study on automatically clustering and classifying Twitter messages, also known as \" tweets \" , into different categories, inspired by the approaches taken by news aggregating services like Google News. Our results suggest that the clusters produced by traditional unsupervised methods can often be incoherent from a topical perspective, but utilizing a supervised methodology that utilize the hash-tags as indicators of topics produce surprisingly good results. We also offer a discussion on temporal effects of our methodology and training set size considerations. Lastly, we describe a simple method of finding the most representative tweet in a cluster, and provide an analysis of the results.},\n bibtype = {article},\n author = {Rosa, Kevin Dela and Shah, Rushin and Lin, Bo and Gershman, Anatole and Frederking, Robert}\n}","author_short":["Rosa, K., D.","Shah, R.","Lin, B.","Gershman, A.","Frederking, R."],"urls":{"Paper":"http://bibbase.org/service/mendeley/371589bb-c770-37ff-8193-93c6f25ffeb1/file/1bbd5472-20b6-fd39-4a4d-476b10a87edb/Topical_Clustering_of_Tweets.pdf.pdf","Website":"http://www.cs.cmu.edu/~kdelaros/sigir-swsm-2011.pdf"},"bibbaseid":"rosa-shah-lin-gershman-frederking-topicalclusteringoftweets","role":"author","keyword":["Clustering","Experimentation Keywords Social Media","Information Search and Retrieval – clustering","Microblog Analysis","Summarization","information filtering","selection process I27 [Artificial Intelligence]"],"downloads":0,"html":""},"search_terms":["topical","clustering","tweets","rosa","shah","lin","gershman","frederking"],"keywords":["clustering","experimentation keywords social media","information search and retrieval – clustering","microblog analysis","summarization","information filtering","selection process i27 [artificial intelligence]"],"authorIDs":[]}