Using the Veil of Ignorance to align AI systems with principles of justice. Weidinger, L., McKee, K. R., Everett, R., Huang, S., Zhu, T. O., Chadwick, M. J., Summerfield, C., & Gabriel, I. Proceedings of the National Academy of Sciences of the United States of America, 2023. Publisher: National Academy of Sciences Type: Article
Paper doi abstract bibtex The philosopher John Rawls proposed the Veil of Ignorance (VoI) as a thought experiment to identify fair principles for governing a society. Here, we apply the VoI to an important governance domain: artificial intelligence (AI). In five incentive-compatible studies (N = 2,508), including two preregistered protocols, participants choose principles to govern an Artificial Intelligence (AI) assistant from behind the veil: that is, without knowledge of their own relative position in the group. Compared to participants who have this information, we find a consistent preference for a principle that instructs the AI assistant to prioritize the worst-off. Neither risk attitudes nor political preferences adequately explain these choices. Instead, they appear to be driven by elevated concerns about fairness: Without prompting, participants who reason behind the VoI more frequently explain their choice in terms of fairness, compared to those in the Control condition. Moreover, we find initial support for the ability of the VoI to elicit more robust preferences: In the studies presented here, the VoI increases the likelihood of participants continuing to endorse their initial choice in a subsequent round where they know how they will be affected by the AI intervention and have a self-interested motivation to change their mind. These results emerge in both a descriptive and an immersive game. Our findings suggest that the VoI may be a suitable mechanism for selecting distributive principles to govern AI. Copyright © 2023 the Author(s).
@article{weidinger_using_2023,
title = {Using the {Veil} of {Ignorance} to align {AI} systems with principles of justice},
volume = {120},
issn = {00278424},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85153686741&doi=10.1073%2fpnas.2213709120&partnerID=40&md5=b00c4f32784b75d63fb30d3befc1e82a},
doi = {10.1073/pnas.2213709120},
abstract = {The philosopher John Rawls proposed the Veil of Ignorance (VoI) as a thought experiment to identify fair principles for governing a society. Here, we apply the VoI to an important governance domain: artificial intelligence (AI). In five incentive-compatible studies (N = 2,508), including two preregistered protocols, participants choose principles to govern an Artificial Intelligence (AI) assistant from behind the veil: that is, without knowledge of their own relative position in the group. Compared to participants who have this information, we find a consistent preference for a principle that instructs the AI assistant to prioritize the worst-off. Neither risk attitudes nor political preferences adequately explain these choices. Instead, they appear to be driven by elevated concerns about fairness: Without prompting, participants who reason behind the VoI more frequently explain their choice in terms of fairness, compared to those in the Control condition. Moreover, we find initial support for the ability of the VoI to elicit more robust preferences: In the studies presented here, the VoI increases the likelihood of participants continuing to endorse their initial choice in a subsequent round where they know how they will be affected by the AI intervention and have a self-interested motivation to change their mind. These results emerge in both a descriptive and an immersive game. Our findings suggest that the VoI may be a suitable mechanism for selecting distributive principles to govern AI. Copyright © 2023 the Author(s).},
language = {English},
number = {118},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
author = {Weidinger, Laura and McKee, Kevin R. and Everett, Richard and Huang, Saffron and Zhu, Tina O. and Chadwick, Martin J. and Summerfield, Christopher and Gabriel, Iason},
year = {2023},
pmid = {37094137},
note = {Publisher: National Academy of Sciences
Type: Article},
keywords = {Artificial Intelligence, Humans, Social Justice, Societies, adult, article, artificial intelligence, controlled study, decision making, ethics, fairness, female, human, human experiment, incentive, justice, major clinical study, male, motivation, organization, risk attitude, social justice},
}
Downloads: 0
{"_id":"uPGrX57WX4dtc3YBT","bibbaseid":"weidinger-mckee-everett-huang-zhu-chadwick-summerfield-gabriel-usingtheveilofignorancetoalignaisystemswithprinciplesofjustice-2023","author_short":["Weidinger, L.","McKee, K. R.","Everett, R.","Huang, S.","Zhu, T. O.","Chadwick, M. J.","Summerfield, C.","Gabriel, I."],"bibdata":{"bibtype":"article","type":"article","title":"Using the Veil of Ignorance to align AI systems with principles of justice","volume":"120","issn":"00278424","url":"https://www.scopus.com/inward/record.uri?eid=2-s2.0-85153686741&doi=10.1073%2fpnas.2213709120&partnerID=40&md5=b00c4f32784b75d63fb30d3befc1e82a","doi":"10.1073/pnas.2213709120","abstract":"The philosopher John Rawls proposed the Veil of Ignorance (VoI) as a thought experiment to identify fair principles for governing a society. Here, we apply the VoI to an important governance domain: artificial intelligence (AI). In five incentive-compatible studies (N = 2,508), including two preregistered protocols, participants choose principles to govern an Artificial Intelligence (AI) assistant from behind the veil: that is, without knowledge of their own relative position in the group. Compared to participants who have this information, we find a consistent preference for a principle that instructs the AI assistant to prioritize the worst-off. Neither risk attitudes nor political preferences adequately explain these choices. Instead, they appear to be driven by elevated concerns about fairness: Without prompting, participants who reason behind the VoI more frequently explain their choice in terms of fairness, compared to those in the Control condition. Moreover, we find initial support for the ability of the VoI to elicit more robust preferences: In the studies presented here, the VoI increases the likelihood of participants continuing to endorse their initial choice in a subsequent round where they know how they will be affected by the AI intervention and have a self-interested motivation to change their mind. These results emerge in both a descriptive and an immersive game. Our findings suggest that the VoI may be a suitable mechanism for selecting distributive principles to govern AI. Copyright © 2023 the Author(s).","language":"English","number":"118","journal":"Proceedings of the National Academy of Sciences of the United States of America","author":[{"propositions":[],"lastnames":["Weidinger"],"firstnames":["Laura"],"suffixes":[]},{"propositions":[],"lastnames":["McKee"],"firstnames":["Kevin","R."],"suffixes":[]},{"propositions":[],"lastnames":["Everett"],"firstnames":["Richard"],"suffixes":[]},{"propositions":[],"lastnames":["Huang"],"firstnames":["Saffron"],"suffixes":[]},{"propositions":[],"lastnames":["Zhu"],"firstnames":["Tina","O."],"suffixes":[]},{"propositions":[],"lastnames":["Chadwick"],"firstnames":["Martin","J."],"suffixes":[]},{"propositions":[],"lastnames":["Summerfield"],"firstnames":["Christopher"],"suffixes":[]},{"propositions":[],"lastnames":["Gabriel"],"firstnames":["Iason"],"suffixes":[]}],"year":"2023","pmid":"37094137","note":"Publisher: National Academy of Sciences Type: Article","keywords":"Artificial Intelligence, Humans, Social Justice, Societies, adult, article, artificial intelligence, controlled study, decision making, ethics, fairness, female, human, human experiment, incentive, justice, major clinical study, male, motivation, organization, risk attitude, social justice","bibtex":"@article{weidinger_using_2023,\n\ttitle = {Using the {Veil} of {Ignorance} to align {AI} systems with principles of justice},\n\tvolume = {120},\n\tissn = {00278424},\n\turl = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85153686741&doi=10.1073%2fpnas.2213709120&partnerID=40&md5=b00c4f32784b75d63fb30d3befc1e82a},\n\tdoi = {10.1073/pnas.2213709120},\n\tabstract = {The philosopher John Rawls proposed the Veil of Ignorance (VoI) as a thought experiment to identify fair principles for governing a society. Here, we apply the VoI to an important governance domain: artificial intelligence (AI). In five incentive-compatible studies (N = 2,508), including two preregistered protocols, participants choose principles to govern an Artificial Intelligence (AI) assistant from behind the veil: that is, without knowledge of their own relative position in the group. Compared to participants who have this information, we find a consistent preference for a principle that instructs the AI assistant to prioritize the worst-off. Neither risk attitudes nor political preferences adequately explain these choices. Instead, they appear to be driven by elevated concerns about fairness: Without prompting, participants who reason behind the VoI more frequently explain their choice in terms of fairness, compared to those in the Control condition. Moreover, we find initial support for the ability of the VoI to elicit more robust preferences: In the studies presented here, the VoI increases the likelihood of participants continuing to endorse their initial choice in a subsequent round where they know how they will be affected by the AI intervention and have a self-interested motivation to change their mind. These results emerge in both a descriptive and an immersive game. Our findings suggest that the VoI may be a suitable mechanism for selecting distributive principles to govern AI. Copyright © 2023 the Author(s).},\n\tlanguage = {English},\n\tnumber = {118},\n\tjournal = {Proceedings of the National Academy of Sciences of the United States of America},\n\tauthor = {Weidinger, Laura and McKee, Kevin R. and Everett, Richard and Huang, Saffron and Zhu, Tina O. and Chadwick, Martin J. and Summerfield, Christopher and Gabriel, Iason},\n\tyear = {2023},\n\tpmid = {37094137},\n\tnote = {Publisher: National Academy of Sciences\nType: Article},\n\tkeywords = {Artificial Intelligence, Humans, Social Justice, Societies, adult, article, artificial intelligence, controlled study, decision making, ethics, fairness, female, human, human experiment, incentive, justice, major clinical study, male, motivation, organization, risk attitude, social justice},\n}\n\n\n\n","author_short":["Weidinger, L.","McKee, K. R.","Everett, R.","Huang, S.","Zhu, T. O.","Chadwick, M. J.","Summerfield, C.","Gabriel, I."],"key":"weidinger_using_2023","id":"weidinger_using_2023","bibbaseid":"weidinger-mckee-everett-huang-zhu-chadwick-summerfield-gabriel-usingtheveilofignorancetoalignaisystemswithprinciplesofjustice-2023","role":"author","urls":{"Paper":"https://www.scopus.com/inward/record.uri?eid=2-s2.0-85153686741&doi=10.1073%2fpnas.2213709120&partnerID=40&md5=b00c4f32784b75d63fb30d3befc1e82a"},"keyword":["Artificial Intelligence","Humans","Social Justice","Societies","adult","article","artificial intelligence","controlled study","decision making","ethics","fairness","female","human","human experiment","incentive","justice","major clinical study","male","motivation","organization","risk attitude","social justice"],"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"http://bibbase.org/zotero-group/science_et_ignorance/1340424","dataSources":["zX4acseCDM6D58AW7"],"keywords":["artificial intelligence","humans","social justice","societies","adult","article","artificial intelligence","controlled study","decision making","ethics","fairness","female","human","human experiment","incentive","justice","major clinical study","male","motivation","organization","risk attitude","social justice"],"search_terms":["using","veil","ignorance","align","systems","principles","justice","weidinger","mckee","everett","huang","zhu","chadwick","summerfield","gabriel"],"title":"Using the Veil of Ignorance to align AI systems with principles of justice","year":2023}