Mapping Knowledge Domains: Finding scientific topics

Mapping Knowledge Domains: Finding scientific topics. Griffiths, T. L. & Steyvers, M. Proceedings of the National Academy of Sciences of the United States of America (PNAS), 101(1):5228–5235, April, 2004.

Paper doi abstract bibtex

A first step in identifying the content of a document is determining which topics that document addresses. We describe a generative model for documents, introduced by Blei, Ng, and Jordan [Blei, D. M., Ng, A. Y. & Jordan, M. I. (2003) J. Machine Learn. Res. 3, 993-1022], in which each document is generated by choosing a distribution over topics and then choosing each word in the document from a topic selected according to this distribution. We then present a Markov chain Monte Carlo algorithm for inference in this model. We use this algorithm to analyze abstracts from PNAS by using Bayesian model selection to establish the number of topics. We show that the extracted topics capture meaningful structure in the data, consistent with the class designations provided by the authors of the articles, and outline further applications of this analysis, including identifying ‘‘hot topics’’ by examining temporal dynamics and tagging abstracts to illustrate semantic content.

@article{citeulike:563921,
  abstract = {A first step in identifying the content of a document is determining which topics that document addresses. We describe a generative model for documents, introduced by Blei, Ng, and Jordan [Blei, D. M., Ng, A. Y. \& Jordan, M. I. (2003) J. Machine Learn. Res. 3, 993-1022], in which each document is generated by choosing a distribution over topics and then choosing each word in the document from a topic selected according to this distribution. We then present a Markov chain Monte Carlo algorithm for inference in this model. We use this algorithm to analyze abstracts from PNAS by using Bayesian model selection to establish the number of topics. We show that the extracted topics capture meaningful structure in the data, consistent with the class designations provided by the authors of the articles, and outline further applications of this analysis, including identifying ‘‘hot topics’’ by examining temporal dynamics and tagging abstracts to illustrate semantic content.},
  added-at = {2006-06-16T10:34:37.000+0200},
  author = {Griffiths, Thomas L. and Steyvers, Mark},
  biburl = {https://www.bibsonomy.org/bibtex/2490f368b3056ec60eae1d3d13d9e8437/ldietz},
  citeulike-article-id = {563921},
  doi = {10.1073/pnas.0307752101},
  interhash = {0e95bb0922d76312cbd803d2433d1519},
  intrahash = {490f368b3056ec60eae1d3d13d9e8437},
  journal = {Proceedings of the National Academy of Sciences of the United States of America (PNAS)},
  keywords = {topicinference},
  month = {April},
  number = 1,
  pages = {5228--5235},
  priority = {0},
  timestamp = {2006-06-16T10:34:37.000+0200},
  title = {Mapping Knowledge Domains: Finding scientific topics},
  url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=387300},
  volume = 101,
  year = 2004
}

Downloads: 0

{"_id":"Bfr29FrLKSqTFMaSq","bibbaseid":"griffiths-steyvers-mappingknowledgedomainsfindingscientifictopics-2004","authorIDs":[],"author_short":["Griffiths, T. L.","Steyvers, M."],"bibdata":{"bibtype":"article","type":"article","abstract":"A first step in identifying the content of a document is determining which topics that document addresses. We describe a generative model for documents, introduced by Blei, Ng, and Jordan [Blei, D. M., Ng, A. Y. & Jordan, M. I. (2003) J. Machine Learn. Res. 3, 993-1022], in which each document is generated by choosing a distribution over topics and then choosing each word in the document from a topic selected according to this distribution. We then present a Markov chain Monte Carlo algorithm for inference in this model. We use this algorithm to analyze abstracts from PNAS by using Bayesian model selection to establish the number of topics. We show that the extracted topics capture meaningful structure in the data, consistent with the class designations provided by the authors of the articles, and outline further applications of this analysis, including identifying ‘‘hot topics’’ by examining temporal dynamics and tagging abstracts to illustrate semantic content.","added-at":"2006-06-16T10:34:37.000+0200","author":[{"propositions":[],"lastnames":["Griffiths"],"firstnames":["Thomas","L."],"suffixes":[]},{"propositions":[],"lastnames":["Steyvers"],"firstnames":["Mark"],"suffixes":[]}],"biburl":"https://www.bibsonomy.org/bibtex/2490f368b3056ec60eae1d3d13d9e8437/ldietz","citeulike-article-id":"563921","doi":"10.1073/pnas.0307752101","interhash":"0e95bb0922d76312cbd803d2433d1519","intrahash":"490f368b3056ec60eae1d3d13d9e8437","journal":"Proceedings of the National Academy of Sciences of the United States of America (PNAS)","keywords":"topicinference","month":"April","number":"1","pages":"5228–5235","priority":"0","timestamp":"2006-06-16T10:34:37.000+0200","title":"Mapping Knowledge Domains: Finding scientific topics","url":"http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=387300","volume":"101","year":"2004","bibtex":"@article{citeulike:563921,\n abstract = {A first step in identifying the content of a document is determining which topics that document addresses. We describe a generative model for documents, introduced by Blei, Ng, and Jordan [Blei, D. M., Ng, A. Y. \\& Jordan, M. I. (2003) J. Machine Learn. Res. 3, 993-1022], in which each document is generated by choosing a distribution over topics and then choosing each word in the document from a topic selected according to this distribution. We then present a Markov chain Monte Carlo algorithm for inference in this model. We use this algorithm to analyze abstracts from PNAS by using Bayesian model selection to establish the number of topics. We show that the extracted topics capture meaningful structure in the data, consistent with the class designations provided by the authors of the articles, and outline further applications of this analysis, including identifying ‘‘hot topics’’ by examining temporal dynamics and tagging abstracts to illustrate semantic content.},\n added-at = {2006-06-16T10:34:37.000+0200},\n author = {Griffiths, Thomas L. and Steyvers, Mark},\n biburl = {https://www.bibsonomy.org/bibtex/2490f368b3056ec60eae1d3d13d9e8437/ldietz},\n citeulike-article-id = {563921},\n doi = {10.1073/pnas.0307752101},\n interhash = {0e95bb0922d76312cbd803d2433d1519},\n intrahash = {490f368b3056ec60eae1d3d13d9e8437},\n journal = {Proceedings of the National Academy of Sciences of the United States of America (PNAS)},\n keywords = {topicinference},\n month = {April},\n number = 1,\n pages = {5228--5235},\n priority = {0},\n timestamp = {2006-06-16T10:34:37.000+0200},\n title = {Mapping Knowledge Domains: Finding scientific topics},\n url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=387300},\n volume = 101,\n year = 2004\n}\n\n","author_short":["Griffiths, T. L.","Steyvers, M."],"key":"citeulike:563921","id":"citeulike:563921","bibbaseid":"griffiths-steyvers-mappingknowledgedomainsfindingscientifictopics-2004","role":"author","urls":{"Paper":"http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=387300"},"keyword":["topicinference"],"downloads":0},"bibtype":"article","biburl":"http://www.bibsonomy.org/bib/author/steyvers?items=1000","creationDate":"2019-06-29T20:32:00.300Z","downloads":0,"keywords":["topicinference"],"search_terms":["mapping","knowledge","domains","finding","scientific","topics","griffiths","steyvers"],"title":"Mapping Knowledge Domains: Finding scientific topics","year":2004,"dataSources":["R5kPzuC6AgTCJiyMD"]}