Chain of thought monitorability: A new and fragile opportunity for ai safety. Korbak, T., Balesni, M., Barnes, E., Bengio, Y., Benton, J., Bloom, J., Chen, M., Cooney, A., Dafoe, A., & Dragan, A. 2025.
Paper bibtex @misc{korbak_chain_2025,
title = {Chain of thought monitorability: {A} new and fragile opportunity for ai safety},
url = {https://arxiv.org/pdf/2507.11473},
publisher = {arXiv},
author = {Korbak, Tomek and Balesni, Mikita and Barnes, Elizabeth and Bengio, Yoshua and Benton, Joe and Bloom, Joseph and Chen, Mark and Cooney, Alan and Dafoe, Allan and Dragan, Anca},
year = {2025},
}
Downloads: 0
{"_id":"jnu4XF8gwwqrfiYuw","bibbaseid":"korbak-balesni-barnes-bengio-benton-bloom-chen-cooney-etal-chainofthoughtmonitorabilityanewandfragileopportunityforaisafety-2025","author_short":["Korbak, T.","Balesni, M.","Barnes, E.","Bengio, Y.","Benton, J.","Bloom, J.","Chen, M.","Cooney, A.","Dafoe, A.","Dragan, A."],"bibdata":{"bibtype":"misc","type":"misc","title":"Chain of thought monitorability: A new and fragile opportunity for ai safety","url":"https://arxiv.org/pdf/2507.11473","publisher":"arXiv","author":[{"propositions":[],"lastnames":["Korbak"],"firstnames":["Tomek"],"suffixes":[]},{"propositions":[],"lastnames":["Balesni"],"firstnames":["Mikita"],"suffixes":[]},{"propositions":[],"lastnames":["Barnes"],"firstnames":["Elizabeth"],"suffixes":[]},{"propositions":[],"lastnames":["Bengio"],"firstnames":["Yoshua"],"suffixes":[]},{"propositions":[],"lastnames":["Benton"],"firstnames":["Joe"],"suffixes":[]},{"propositions":[],"lastnames":["Bloom"],"firstnames":["Joseph"],"suffixes":[]},{"propositions":[],"lastnames":["Chen"],"firstnames":["Mark"],"suffixes":[]},{"propositions":[],"lastnames":["Cooney"],"firstnames":["Alan"],"suffixes":[]},{"propositions":[],"lastnames":["Dafoe"],"firstnames":["Allan"],"suffixes":[]},{"propositions":[],"lastnames":["Dragan"],"firstnames":["Anca"],"suffixes":[]}],"year":"2025","bibtex":"@misc{korbak_chain_2025,\n\ttitle = {Chain of thought monitorability: {A} new and fragile opportunity for ai safety},\n\turl = {https://arxiv.org/pdf/2507.11473},\n\tpublisher = {arXiv},\n\tauthor = {Korbak, Tomek and Balesni, Mikita and Barnes, Elizabeth and Bengio, Yoshua and Benton, Joe and Bloom, Joseph and Chen, Mark and Cooney, Alan and Dafoe, Allan and Dragan, Anca},\n\tyear = {2025},\n}\n\n\n\n","author_short":["Korbak, T.","Balesni, M.","Barnes, E.","Bengio, Y.","Benton, J.","Bloom, J.","Chen, M.","Cooney, A.","Dafoe, A.","Dragan, A."],"key":"korbak_chain_2025","id":"korbak_chain_2025","bibbaseid":"korbak-balesni-barnes-bengio-benton-bloom-chen-cooney-etal-chainofthoughtmonitorabilityanewandfragileopportunityforaisafety-2025","role":"author","urls":{"Paper":"https://arxiv.org/pdf/2507.11473"},"metadata":{"authorlinks":{}}},"bibtype":"misc","biburl":"https://bibbase.org/zotero-group/schulzkx/5158478","dataSources":["JFDnASMkoQCjjGL8E"],"keywords":[],"search_terms":["chain","thought","monitorability","new","fragile","opportunity","safety","korbak","balesni","barnes","bengio","benton","bloom","chen","cooney","dafoe","dragan"],"title":"Chain of thought monitorability: A new and fragile opportunity for ai safety","year":2025}