In *Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval - SIGIR '09*, pages 115–122, New York, New York, USA, 2009. ACM Press.

Paper doi abstract bibtex

Paper doi abstract bibtex

This paper studies document ranking under uncertainty. It is tackled in a general situation where the relevance predic- tions of individual documents have uncertainty, and are de- pendent between each other. Inspired by the Modern Port- folio Theory, an economic theory dealing with investment in financial markets, we argue that ranking under uncertainty is not just about picking individual relevant documents, but about choosing the right combination of relevant documents. This motivates us to quantify a ranked list of documents on the basis of its expected overall relevance (mean) and its variance; the latter serves as a measure of risk, which was rarely studied for document ranking in the past. Through the analysis of themean and variance, we show that an optimal rank order is the one that balancing the overall relevance (mean) of the ranked list against its risk level (variance). Based on this principle, we then derive an efficient document ranking algorithm. It generalizes the well-known probability ranking principle (PRP) by considering both the uncertainty of relevance predictions and correlations between retrieved documents. Moreover, the benefit of diversification is mathematically quantified; we show that diversifying documents is an effective way to reduce the risk of document ranking. Experimental results in text retrieval confirm the theoretical insights with improved retrieval performance.

@inproceedings{Wang2009, address = {New York, New York, USA}, title = {Portfolio theory of information retrieval}, isbn = {978-1-60558-483-6}, url = {http://portal.acm.org/citation.cfm?doid=1571941.1571963}, doi = {10.1145/1571941.1571963}, abstract = {This paper studies document ranking under uncertainty. It is tackled in a general situation where the relevance predic- tions of individual documents have uncertainty, and are de- pendent between each other. Inspired by the Modern Port- folio Theory, an economic theory dealing with investment in financial markets, we argue that ranking under uncertainty is not just about picking individual relevant documents, but about choosing the right combination of relevant documents. This motivates us to quantify a ranked list of documents on the basis of its expected overall relevance (mean) and its variance; the latter serves as a measure of risk, which was rarely studied for document ranking in the past. Through the analysis of themean and variance, we show that an optimal rank order is the one that balancing the overall relevance (mean) of the ranked list against its risk level (variance). Based on this principle, we then derive an efficient document ranking algorithm. It generalizes the well-known probability ranking principle (PRP) by considering both the uncertainty of relevance predictions and correlations between retrieved documents. Moreover, the benefit of diversification is mathematically quantified; we show that diversifying documents is an effective way to reduce the risk of document ranking. Experimental results in text retrieval confirm the theoretical insights with improved retrieval performance.}, booktitle = {Proceedings of the 32nd international {ACM} {SIGIR} conference on {Research} and development in information retrieval - {SIGIR} '09}, publisher = {ACM Press}, author = {Wang, Jun and Zhu, Jianhan}, year = {2009}, keywords = {mean-variance analysis, modern portfolio theory, probability ranking principle, ranking under uncertainty}, pages = {115--122}, }

Downloads: 0