Direct preference optimization: Your language model is secretly a reward model. Rafailov, R., Sharma, A., Mitchell, E., Manning, C. D, Ermon, S., & Finn, C. Advances in neural information processing systems, 36:53728–53741, 2023. bibtex @article{rafailov2023direct,
title={Direct preference optimization: Your language model is secretly a reward model},
author={Rafailov, Rafael and Sharma, Archit and Mitchell, Eric and Manning, Christopher D and Ermon, Stefano and Finn, Chelsea},
journal={Advances in neural information processing systems},
volume={36},
pages={53728--53741},
year={2023}
}
Downloads: 0
{"_id":"orpqpubZRJiwdNzeX","bibbaseid":"rafailov-sharma-mitchell-manning-ermon-finn-directpreferenceoptimizationyourlanguagemodelissecretlyarewardmodel-2023","author_short":["Rafailov, R.","Sharma, A.","Mitchell, E.","Manning, C. D","Ermon, S.","Finn, C."],"bibdata":{"bibtype":"article","type":"article","title":"Direct preference optimization: Your language model is secretly a reward model","author":[{"propositions":[],"lastnames":["Rafailov"],"firstnames":["Rafael"],"suffixes":[]},{"propositions":[],"lastnames":["Sharma"],"firstnames":["Archit"],"suffixes":[]},{"propositions":[],"lastnames":["Mitchell"],"firstnames":["Eric"],"suffixes":[]},{"propositions":[],"lastnames":["Manning"],"firstnames":["Christopher","D"],"suffixes":[]},{"propositions":[],"lastnames":["Ermon"],"firstnames":["Stefano"],"suffixes":[]},{"propositions":[],"lastnames":["Finn"],"firstnames":["Chelsea"],"suffixes":[]}],"journal":"Advances in neural information processing systems","volume":"36","pages":"53728–53741","year":"2023","bibtex":"@article{rafailov2023direct,\n title={Direct preference optimization: Your language model is secretly a reward model},\n author={Rafailov, Rafael and Sharma, Archit and Mitchell, Eric and Manning, Christopher D and Ermon, Stefano and Finn, Chelsea},\n journal={Advances in neural information processing systems},\n volume={36},\n pages={53728--53741},\n year={2023}\n}\n\n","author_short":["Rafailov, R.","Sharma, A.","Mitchell, E.","Manning, C. D","Ermon, S.","Finn, C."],"key":"rafailov2023direct","id":"rafailov2023direct","bibbaseid":"rafailov-sharma-mitchell-manning-ermon-finn-directpreferenceoptimizationyourlanguagemodelissecretlyarewardmodel-2023","role":"author","urls":{},"metadata":{"authorlinks":{}},"downloads":0,"html":""},"bibtype":"article","biburl":"https://bibbase.org/network/files/v9iLSAgPxqijWtNb2","dataSources":["HQzSNhug84wo985ng"],"keywords":[],"search_terms":["direct","preference","optimization","language","model","secretly","reward","model","rafailov","sharma","mitchell","manning","ermon","finn"],"title":"Direct preference optimization: Your language model is secretly a reward model","year":2023}