Reward-Based Environment States for Robot Manipulation Policy Learning. Cédérick, M., Ferrané, I., & Cuayahuitl, H. In NeurIPS 2021 Workshop on Deployable Decision Making in Embodied Systems (DDM), December, 2021. Paper abstract bibtex Training robot manipulation policies is a challenging and open problem in robotics and artificial intelligence. In this paper we propose a novel and compact state representation based on the rewards predicted from an image-based task success classifier. Our experiments–using the Pepper robot in simulation with two deep reinforcement learning algorithms on a grab-and-lift task–reveal that our proposed state representation can achieve up to 97% task success using our best policies.
@inproceedings{lincoln47522,
booktitle = {NeurIPS 2021 Workshop on Deployable Decision Making in Embodied Systems (DDM)},
month = {December},
title = {Reward-Based Environment States for Robot Manipulation Policy Learning},
author = {Mouliets C{\'e}d{\'e}rick and Isabelle Ferran{\'e} and Heriberto Cuayahuitl},
year = {2021},
keywords = {ARRAY(0x56546f014e28)},
url = {https://eprints.lincoln.ac.uk/id/eprint/47522/},
abstract = {Training robot manipulation policies is a challenging and open problem in robotics and artificial intelligence. In this paper we propose a novel and compact state representation based on the rewards predicted from an image-based task success
classifier. Our experiments{--}using the Pepper robot in simulation with two deep reinforcement learning algorithms on a grab-and-lift task{--}reveal that our proposed state representation can achieve up to 97\% task success using our best policies.}
}
Downloads: 0
{"_id":"rRCaeBLpi2YngFNj2","bibbaseid":"cdrick-ferran-cuayahuitl-rewardbasedenvironmentstatesforrobotmanipulationpolicylearning-2021","author_short":["Cédérick, M.","Ferrané, I.","Cuayahuitl, H."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","booktitle":"NeurIPS 2021 Workshop on Deployable Decision Making in Embodied Systems (DDM)","month":"December","title":"Reward-Based Environment States for Robot Manipulation Policy Learning","author":[{"firstnames":["Mouliets"],"propositions":[],"lastnames":["Cédérick"],"suffixes":[]},{"firstnames":["Isabelle"],"propositions":[],"lastnames":["Ferrané"],"suffixes":[]},{"firstnames":["Heriberto"],"propositions":[],"lastnames":["Cuayahuitl"],"suffixes":[]}],"year":"2021","keywords":"ARRAY(0x56546f014e28)","url":"https://eprints.lincoln.ac.uk/id/eprint/47522/","abstract":"Training robot manipulation policies is a challenging and open problem in robotics and artificial intelligence. In this paper we propose a novel and compact state representation based on the rewards predicted from an image-based task success classifier. Our experiments–using the Pepper robot in simulation with two deep reinforcement learning algorithms on a grab-and-lift task–reveal that our proposed state representation can achieve up to 97% task success using our best policies.","bibtex":"@inproceedings{lincoln47522,\n booktitle = {NeurIPS 2021 Workshop on Deployable Decision Making in Embodied Systems (DDM)},\n month = {December},\n title = {Reward-Based Environment States for Robot Manipulation Policy Learning},\n author = {Mouliets C{\\'e}d{\\'e}rick and Isabelle Ferran{\\'e} and Heriberto Cuayahuitl},\n year = {2021},\n keywords = {ARRAY(0x56546f014e28)},\n url = {https://eprints.lincoln.ac.uk/id/eprint/47522/},\n abstract = {Training robot manipulation policies is a challenging and open problem in robotics and artificial intelligence. In this paper we propose a novel and compact state representation based on the rewards predicted from an image-based task success\r\nclassifier. Our experiments{--}using the Pepper robot in simulation with two deep reinforcement learning algorithms on a grab-and-lift task{--}reveal that our proposed state representation can achieve up to 97\\% task success using our best policies.}\n}\n\n","author_short":["Cédérick, M.","Ferrané, I.","Cuayahuitl, H."],"key":"lincoln47522","id":"lincoln47522","bibbaseid":"cdrick-ferran-cuayahuitl-rewardbasedenvironmentstatesforrobotmanipulationpolicylearning-2021","role":"author","urls":{"Paper":"https://eprints.lincoln.ac.uk/id/eprint/47522/"},"keyword":["ARRAY(0x56546f014e28)"],"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"https://eprints.lincoln.ac.uk/cgi/search/archive/advanced/export_lincoln_BibTeX.bib?screen=Search&dataset=archive&_action_export=1&output=BibTeX&exp=0%7C1%7C-date%2Fcreators_name%2Ftitle%7Carchive%7C-%7Ccreators_name%3Acreators_name%3AANY%3AEQ%3AHanheide+Al-Fadhli+Baxter+Bellotto+Bosilj+Calisti+Cielniak+Coutts+Cuayahuitl+Das+Elgeneidy+Gaju+Esfahani+Fox+From+Gao+Gould+Millard+Parsons+Pearson+Saaj+Sklar+Swainson+Valluru+Villa+Wright+Yue%7Cdate%3Adate%3AALL%3AEQ%3A2021%7C-%7Ceprint_status%3Aeprint_status%3AANY%3AEQ%3Aarchive%7Cmetadata_visibility%3Ametadata_visibility%3AANY%3AEQ%3Ashow&n=&cache=11447865","dataSources":["5gniG5JtcCSxmGw9h"],"keywords":["array(0x56546f014e28)"],"search_terms":["reward","based","environment","states","robot","manipulation","policy","learning","cédérick","ferrané","cuayahuitl"],"title":"Reward-Based Environment States for Robot Manipulation Policy Learning","year":2021}