Reward Learning from Human Demonstration Improves SFT for LLM Alignment. Li, J., Zeng, S., Wai, H., Li, C., Garcia, A., & Hong, M. In NeurIPS, 2024.
Paper bibtex 2 downloads @inproceedings{li2024getting,
author = {Li, Jiaxiang and Zeng, Siliang and Wai, Hoi-To and Li, Chenliang and Garcia, Alfredo and Hong, Mingyi},
date-added = {2024-06-01 00:29:40 +0800},
date-modified = {2024-06-01 00:29:49 +0800},
booktitle = {NeurIPS},
year = {2024},
title = {Reward Learning from Human Demonstration Improves SFT for LLM Alignment},
url_paper = {https://arxiv.org/abs/2405.17888}}
Downloads: 2
{"_id":"vB34TQeRoPMcCWodR","bibbaseid":"li-zeng-wai-li-garcia-hong-rewardlearningfromhumandemonstrationimprovessftforllmalignment-2024","author_short":["Li, J.","Zeng, S.","Wai, H.","Li, C.","Garcia, A.","Hong, M."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","author":[{"propositions":[],"lastnames":["Li"],"firstnames":["Jiaxiang"],"suffixes":[]},{"propositions":[],"lastnames":["Zeng"],"firstnames":["Siliang"],"suffixes":[]},{"propositions":[],"lastnames":["Wai"],"firstnames":["Hoi-To"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["Chenliang"],"suffixes":[]},{"propositions":[],"lastnames":["Garcia"],"firstnames":["Alfredo"],"suffixes":[]},{"propositions":[],"lastnames":["Hong"],"firstnames":["Mingyi"],"suffixes":[]}],"date-added":"2024-06-01 00:29:40 +0800","date-modified":"2024-06-01 00:29:49 +0800","booktitle":"NeurIPS","year":"2024","title":"Reward Learning from Human Demonstration Improves SFT for LLM Alignment","url_paper":"https://arxiv.org/abs/2405.17888","bibtex":"@inproceedings{li2024getting,\n\tauthor = {Li, Jiaxiang and Zeng, Siliang and Wai, Hoi-To and Li, Chenliang and Garcia, Alfredo and Hong, Mingyi},\n\tdate-added = {2024-06-01 00:29:40 +0800},\n\tdate-modified = {2024-06-01 00:29:49 +0800},\n\tbooktitle = {NeurIPS},\n\tyear = {2024},\n\ttitle = {Reward Learning from Human Demonstration Improves SFT for LLM Alignment},\n\turl_paper = {https://arxiv.org/abs/2405.17888}}\n\n","author_short":["Li, J.","Zeng, S.","Wai, H.","Li, C.","Garcia, A.","Hong, M."],"key":"li2024getting","id":"li2024getting","bibbaseid":"li-zeng-wai-li-garcia-hong-rewardlearningfromhumandemonstrationimprovessftforllmalignment-2024","role":"author","urls":{" paper":"https://arxiv.org/abs/2405.17888"},"metadata":{"authorlinks":{}},"downloads":2},"bibtype":"inproceedings","biburl":"https://www1.se.cuhk.edu.hk/~htwai/bib.bib","dataSources":["q5vKAC6pFJwaDMzPQ"],"keywords":[],"search_terms":["reward","learning","human","demonstration","improves","sft","llm","alignment","li","zeng","wai","li","garcia","hong"],"title":"Reward Learning from Human Demonstration Improves SFT for LLM Alignment","year":2024,"downloads":2}