All the cool kids, how do they fit in?: popularity and demographic biases in recommender evaluation and effectiveness. Ekstrand, M. D, Tian, M., Azpiazu, I. M., Ekstrand, J. D, Anuyah, O., McNeill, D., & Pera, M. S. In Friedler, S. A & Wilson, C., editors, Proceedings of the 1st Conference on Fairness, Accountability and Transparency, volume 81, of Proceedings of Machine Learning Research, pages 172–186, 2018. PMLR. Journal Abbreviation: Proceedings of Machine Learning Research
Paper abstract bibtex In the research literature, evaluations of recommender system effectiveness typically report results over a given data set, providing an aggregate measure of effectiveness over each instance (e.g. user) in the data set. Recent advances in information retrieval evaluation, however, demonstrate the importance of considering the distribution of effectiveness across diverse groups of varying sizes. For example, do users of different ages or genders obtain similar utility from the system, particularly if their group is a relatively small subset of the user base? We apply this consideration to recommender systems, using offline evaluation and a utility-based metric of recommendation effectiveness to explore whether different user demographic groups experience similar recommendation accuracy. We find demographic differences in measured recommender effectiveness across two data sets containing different types of feedback in different domains; these differences sometimes, but not always, correlate with the size of the user group in question. Demographic effects also have a complex—and likely detrimental—interaction with popularity bias, a known deficiency of recommender evaluation. These results demonstrate the need for recommender system evaluation protocols that explicitly quantify the degree to which the system is meeting the information needs of all its users, as well as the need for researchers and operators to move beyond naïve evaluations that favor the needs of larger subsets of the user population while ignoring smaller subsets.
@inproceedings{ekstrand_all_2018,
series = {Proceedings of {Machine} {Learning} {Research}},
title = {All the cool kids, how do they fit in?: popularity and demographic biases in recommender evaluation and effectiveness},
volume = {81},
url = {https://proceedings.mlr.press/v81/ekstrand18b.html},
abstract = {In the research literature, evaluations of recommender system
effectiveness typically report results over a given data set, providing an
aggregate measure of effectiveness over each instance (e.g. user) in the
data set. Recent advances in information retrieval evaluation, however,
demonstrate the importance of considering the distribution of
effectiveness across diverse groups of varying sizes. For example, do
users of different ages or genders obtain similar utility from the system,
particularly if their group is a relatively small subset of the user base?
We apply this consideration to recommender systems, using offline
evaluation and a utility-based metric of recommendation effectiveness to
explore whether different user demographic groups experience similar
recommendation accuracy. We find demographic differences in measured
recommender effectiveness across two data sets containing different types
of feedback in different domains; these differences sometimes, but not
always, correlate with the size of the user group in question. Demographic
effects also have a complex—and likely detrimental—interaction with
popularity bias, a known deficiency of recommender evaluation. These
results demonstrate the need for recommender system evaluation protocols
that explicitly quantify the degree to which the system is meeting the
information needs of all its users, as well as the need for researchers
and operators to move beyond naïve evaluations that favor the needs of
larger subsets of the user population while ignoring smaller subsets.},
booktitle = {Proceedings of the 1st {Conference} on {Fairness}, {Accountability} and {Transparency}},
publisher = {PMLR},
author = {Ekstrand, Michael D and Tian, Mucun and Azpiazu, Ion Madrazo and Ekstrand, Jennifer D and Anuyah, Oghenemaro and McNeill, David and Pera, Maria Soledad},
editor = {Friedler, Sorelle A and Wilson, Christo},
year = {2018},
note = {Journal Abbreviation: Proceedings of Machine Learning Research},
pages = {172--186},
}
Downloads: 0
{"_id":"ArFkXzB7iRRMxzahe","bibbaseid":"ekstrand-tian-azpiazu-ekstrand-anuyah-mcneill-pera-allthecoolkidshowdotheyfitinpopularityanddemographicbiasesinrecommenderevaluationandeffectiveness-2018","authorIDs":[],"author_short":["Ekstrand, M. D","Tian, M.","Azpiazu, I. M.","Ekstrand, J. D","Anuyah, O.","McNeill, D.","Pera, M. S."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","series":"Proceedings of Machine Learning Research","title":"All the cool kids, how do they fit in?: popularity and demographic biases in recommender evaluation and effectiveness","volume":"81","url":"https://proceedings.mlr.press/v81/ekstrand18b.html","abstract":"In the research literature, evaluations of recommender system effectiveness typically report results over a given data set, providing an aggregate measure of effectiveness over each instance (e.g. user) in the data set. Recent advances in information retrieval evaluation, however, demonstrate the importance of considering the distribution of effectiveness across diverse groups of varying sizes. For example, do users of different ages or genders obtain similar utility from the system, particularly if their group is a relatively small subset of the user base? We apply this consideration to recommender systems, using offline evaluation and a utility-based metric of recommendation effectiveness to explore whether different user demographic groups experience similar recommendation accuracy. We find demographic differences in measured recommender effectiveness across two data sets containing different types of feedback in different domains; these differences sometimes, but not always, correlate with the size of the user group in question. Demographic effects also have a complex—and likely detrimental—interaction with popularity bias, a known deficiency of recommender evaluation. These results demonstrate the need for recommender system evaluation protocols that explicitly quantify the degree to which the system is meeting the information needs of all its users, as well as the need for researchers and operators to move beyond naïve evaluations that favor the needs of larger subsets of the user population while ignoring smaller subsets.","booktitle":"Proceedings of the 1st Conference on Fairness, Accountability and Transparency","publisher":"PMLR","author":[{"propositions":[],"lastnames":["Ekstrand"],"firstnames":["Michael","D"],"suffixes":[]},{"propositions":[],"lastnames":["Tian"],"firstnames":["Mucun"],"suffixes":[]},{"propositions":[],"lastnames":["Azpiazu"],"firstnames":["Ion","Madrazo"],"suffixes":[]},{"propositions":[],"lastnames":["Ekstrand"],"firstnames":["Jennifer","D"],"suffixes":[]},{"propositions":[],"lastnames":["Anuyah"],"firstnames":["Oghenemaro"],"suffixes":[]},{"propositions":[],"lastnames":["McNeill"],"firstnames":["David"],"suffixes":[]},{"propositions":[],"lastnames":["Pera"],"firstnames":["Maria","Soledad"],"suffixes":[]}],"editor":[{"propositions":[],"lastnames":["Friedler"],"firstnames":["Sorelle","A"],"suffixes":[]},{"propositions":[],"lastnames":["Wilson"],"firstnames":["Christo"],"suffixes":[]}],"year":"2018","note":"Journal Abbreviation: Proceedings of Machine Learning Research","pages":"172–186","bibtex":"@inproceedings{ekstrand_all_2018,\n\tseries = {Proceedings of {Machine} {Learning} {Research}},\n\ttitle = {All the cool kids, how do they fit in?: popularity and demographic biases in recommender evaluation and effectiveness},\n\tvolume = {81},\n\turl = {https://proceedings.mlr.press/v81/ekstrand18b.html},\n\tabstract = {In the research literature, evaluations of recommender system\neffectiveness typically report results over a given data set, providing an\naggregate measure of effectiveness over each instance (e.g. user) in the\ndata set. Recent advances in information retrieval evaluation, however,\ndemonstrate the importance of considering the distribution of\neffectiveness across diverse groups of varying sizes. For example, do\nusers of different ages or genders obtain similar utility from the system,\nparticularly if their group is a relatively small subset of the user base?\nWe apply this consideration to recommender systems, using offline\nevaluation and a utility-based metric of recommendation effectiveness to\nexplore whether different user demographic groups experience similar\nrecommendation accuracy. We find demographic differences in measured\nrecommender effectiveness across two data sets containing different types\nof feedback in different domains; these differences sometimes, but not\nalways, correlate with the size of the user group in question. Demographic\neffects also have a complex—and likely detrimental—interaction with\npopularity bias, a known deficiency of recommender evaluation. These\nresults demonstrate the need for recommender system evaluation protocols\nthat explicitly quantify the degree to which the system is meeting the\ninformation needs of all its users, as well as the need for researchers\nand operators to move beyond naïve evaluations that favor the needs of\nlarger subsets of the user population while ignoring smaller subsets.},\n\tbooktitle = {Proceedings of the 1st {Conference} on {Fairness}, {Accountability} and {Transparency}},\n\tpublisher = {PMLR},\n\tauthor = {Ekstrand, Michael D and Tian, Mucun and Azpiazu, Ion Madrazo and Ekstrand, Jennifer D and Anuyah, Oghenemaro and McNeill, David and Pera, Maria Soledad},\n\teditor = {Friedler, Sorelle A and Wilson, Christo},\n\tyear = {2018},\n\tnote = {Journal Abbreviation: Proceedings of Machine Learning Research},\n\tpages = {172--186},\n}\n\n","author_short":["Ekstrand, M. D","Tian, M.","Azpiazu, I. M.","Ekstrand, J. D","Anuyah, O.","McNeill, D.","Pera, M. S."],"editor_short":["Friedler, S. A","Wilson, C."],"key":"ekstrand_all_2018","id":"ekstrand_all_2018","bibbaseid":"ekstrand-tian-azpiazu-ekstrand-anuyah-mcneill-pera-allthecoolkidshowdotheyfitinpopularityanddemographicbiasesinrecommenderevaluationandeffectiveness-2018","role":"author","urls":{"Paper":"https://proceedings.mlr.press/v81/ekstrand18b.html"},"metadata":{"authorlinks":{}},"downloads":0},"bibtype":"inproceedings","biburl":"https://api.zotero.org/users/6655/collections/TJPPJ92X/items?key=VFvZhZXIoHNBbzoLZ1IM2zgf&format=bibtex&limit=100","creationDate":"2020-03-27T02:34:35.402Z","downloads":0,"keywords":[],"search_terms":["cool","kids","fit","popularity","demographic","biases","recommender","evaluation","effectiveness","ekstrand","tian","azpiazu","ekstrand","anuyah","mcneill","pera"],"title":"All the cool kids, how do they fit in?: popularity and demographic biases in recommender evaluation and effectiveness","year":2018,"dataSources":["5Dp4QphkvpvNA33zi","jfoasiDDpStqkkoZB","BiuuFc45aHCgJqDLY"]}