Continuous control with deep reinforcement learning

Continuous control with deep reinforcement learning. Lillicrap, T. P., Hunt, J. J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D., & Wierstra, D. arXiv:1509.02971 [cs, stat], July, 2019. arXiv: 1509.02971

Paper abstract bibtex

We adapt the ideas underlying the success of Deep Q-Learning to the continuous action domain. We present an actor-critic, model-free algorithm based on the deterministic policy gradient that can operate over continuous action spaces. Using the same learning algorithm, network architecture and hyper-parameters, our algorithm robustly solves more than 20 simulated physics tasks, including classic problems such as cartpole swing-up, dexterous manipulation, legged locomotion and car driving. Our algorithm is able to find policies whose performance is competitive with those found by a planning algorithm with full access to the dynamics of the domain and its derivatives. We further demonstrate that for many of the tasks the algorithm can learn policies end-to-end: directly from raw pixel inputs.

@article{lillicrap_continuous_2019,
	title = {Continuous control with deep reinforcement learning},
	url = {http://arxiv.org/abs/1509.02971},
	abstract = {We adapt the ideas underlying the success of Deep Q-Learning to the continuous action domain. We present an actor-critic, model-free algorithm based on the deterministic policy gradient that can operate over continuous action spaces. Using the same learning algorithm, network architecture and hyper-parameters, our algorithm robustly solves more than 20 simulated physics tasks, including classic problems such as cartpole swing-up, dexterous manipulation, legged locomotion and car driving. Our algorithm is able to find policies whose performance is competitive with those found by a planning algorithm with full access to the dynamics of the domain and its derivatives. We further demonstrate that for many of the tasks the algorithm can learn policies end-to-end: directly from raw pixel inputs.},
	urldate = {2022-02-22},
	journal = {arXiv:1509.02971 [cs, stat]},
	author = {Lillicrap, Timothy P. and Hunt, Jonathan J. and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
	month = jul,
	year = {2019},
	note = {arXiv: 1509.02971},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}

Downloads: 0

{"_id":"5C5iSkvD58PSNSmuG","bibbaseid":"lillicrap-hunt-pritzel-heess-erez-tassa-silver-wierstra-continuouscontrolwithdeepreinforcementlearning-2019","authorIDs":[],"author_short":["Lillicrap, T. P.","Hunt, J. J.","Pritzel, A.","Heess, N.","Erez, T.","Tassa, Y.","Silver, D.","Wierstra, D."],"bibdata":{"bibtype":"article","type":"article","title":"Continuous control with deep reinforcement learning","url":"http://arxiv.org/abs/1509.02971","abstract":"We adapt the ideas underlying the success of Deep Q-Learning to the continuous action domain. We present an actor-critic, model-free algorithm based on the deterministic policy gradient that can operate over continuous action spaces. Using the same learning algorithm, network architecture and hyper-parameters, our algorithm robustly solves more than 20 simulated physics tasks, including classic problems such as cartpole swing-up, dexterous manipulation, legged locomotion and car driving. Our algorithm is able to find policies whose performance is competitive with those found by a planning algorithm with full access to the dynamics of the domain and its derivatives. We further demonstrate that for many of the tasks the algorithm can learn policies end-to-end: directly from raw pixel inputs.","urldate":"2022-02-22","journal":"arXiv:1509.02971 [cs, stat]","author":[{"propositions":[],"lastnames":["Lillicrap"],"firstnames":["Timothy","P."],"suffixes":[]},{"propositions":[],"lastnames":["Hunt"],"firstnames":["Jonathan","J."],"suffixes":[]},{"propositions":[],"lastnames":["Pritzel"],"firstnames":["Alexander"],"suffixes":[]},{"propositions":[],"lastnames":["Heess"],"firstnames":["Nicolas"],"suffixes":[]},{"propositions":[],"lastnames":["Erez"],"firstnames":["Tom"],"suffixes":[]},{"propositions":[],"lastnames":["Tassa"],"firstnames":["Yuval"],"suffixes":[]},{"propositions":[],"lastnames":["Silver"],"firstnames":["David"],"suffixes":[]},{"propositions":[],"lastnames":["Wierstra"],"firstnames":["Daan"],"suffixes":[]}],"month":"July","year":"2019","note":"arXiv: 1509.02971","keywords":"Computer Science - Machine Learning, Statistics - Machine Learning","bibtex":"@article{lillicrap_continuous_2019,\n\ttitle = {Continuous control with deep reinforcement learning},\n\turl = {http://arxiv.org/abs/1509.02971},\n\tabstract = {We adapt the ideas underlying the success of Deep Q-Learning to the continuous action domain. We present an actor-critic, model-free algorithm based on the deterministic policy gradient that can operate over continuous action spaces. Using the same learning algorithm, network architecture and hyper-parameters, our algorithm robustly solves more than 20 simulated physics tasks, including classic problems such as cartpole swing-up, dexterous manipulation, legged locomotion and car driving. Our algorithm is able to find policies whose performance is competitive with those found by a planning algorithm with full access to the dynamics of the domain and its derivatives. We further demonstrate that for many of the tasks the algorithm can learn policies end-to-end: directly from raw pixel inputs.},\n\turldate = {2022-02-22},\n\tjournal = {arXiv:1509.02971 [cs, stat]},\n\tauthor = {Lillicrap, Timothy P. and Hunt, Jonathan J. and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},\n\tmonth = jul,\n\tyear = {2019},\n\tnote = {arXiv: 1509.02971},\n\tkeywords = {Computer Science - Machine Learning, Statistics - Machine Learning},\n}\n\n","author_short":["Lillicrap, T. P.","Hunt, J. J.","Pritzel, A.","Heess, N.","Erez, T.","Tassa, Y.","Silver, D.","Wierstra, D."],"key":"lillicrap_continuous_2019","id":"lillicrap_continuous_2019","bibbaseid":"lillicrap-hunt-pritzel-heess-erez-tassa-silver-wierstra-continuouscontrolwithdeepreinforcementlearning-2019","role":"author","urls":{"Paper":"http://arxiv.org/abs/1509.02971"},"keyword":["Computer Science - Machine Learning","Statistics - Machine Learning"],"metadata":{"authorlinks":{}},"html":""},"bibtype":"article","biburl":"https://bibbase.org/zotero/mxmplx","creationDate":"2020-05-12T22:54:28.451Z","downloads":0,"keywords":["computer science - machine learning","statistics - machine learning"],"search_terms":["continuous","control","deep","reinforcement","learning","lillicrap","hunt","pritzel","heess","erez","tassa","silver","wierstra"],"title":"Continuous control with deep reinforcement learning","year":2019,"dataSources":["fjacg9txEnNSDwee6","aXmRAq63YsH7a3ufx"]}