Following Newton direction in Policy Gradient with parameter exploration

Following Newton direction in Policy Gradient with parameter exploration. Manganini, G., Pirotta, M., Restelli, M., & Bascetta, L. In Proceedings of the International Joint Conference on Neural Networks (IJCNN), pages 1--8, 2015. Institute of Electrical and Electronics Engineers Inc..
doi abstract bibtex

This paper investigates the use of second-order methods to solve Markov Decision Processes (MDPs). Despite the popularity of second-order methods in optimization literature, so far little attention has been paid to the extension of such techniques to face sequential decision problems. Here we provide a model-free Reinforcement Learning method that estimates the Newton direction by sampling directly in the parameter space. In order to compute the Newton direction we provide the formulation of the Hessian of the expected return, a technique for variance reduction in the sample-based estimation and a finite sample analysis in the case of Normal distribution. Beside discussing the theoretical properties, we empirically evaluate the method on an instructional linear-quadratic regulator and on a complex dynamical quadrotor system.

@inproceedings{manganini_following_2015,
	title = {Following {Newton} direction in {Policy} {Gradient} with parameter exploration},
	doi = {10.1109/IJCNN.2015.7280673},
	abstract = {This paper investigates the use of second-order methods to solve Markov Decision Processes (MDPs). Despite the popularity of second-order methods in optimization literature, so far little attention has been paid to the extension of such techniques to face sequential decision problems. Here we provide a model-free Reinforcement Learning method that estimates the Newton direction by sampling directly in the parameter space. In order to compute the Newton direction we provide the formulation of the Hessian of the expected return, a technique for variance reduction in the sample-based estimation and a finite sample analysis in the case of Normal distribution. Beside discussing the theoretical properties, we empirically evaluate the method on an instructional linear-quadratic regulator and on a complex dynamical quadrotor system.},
	booktitle = {Proceedings of the {International} {Joint} {Conference} on {Neural} {Networks} ({IJCNN})},
	publisher = {Institute of Electrical and Electronics Engineers Inc.},
	author = {Manganini, Giorgio and Pirotta, Matteo and Restelli, Marcello and Bascetta, Luca},
	year = {2015},
	keywords = {AUT, INF},
	pages = {1--8}
}

Downloads: 0

{"_id":"q6GR9pimpiRcwSFqz","bibbaseid":"manganini-pirotta-restelli-bascetta-followingnewtondirectioninpolicygradientwithparameterexploration-2015","downloads":0,"creationDate":"2018-02-17T09:50:51.236Z","title":"Following Newton direction in Policy Gradient with parameter exploration","author_short":["Manganini, G.","Pirotta, M.","Restelli, M.","Bascetta, L."],"year":2015,"bibtype":"inproceedings","biburl":"https://api.zotero.org/users/1823213/collections/AMSQJK9D/items?key=Z1izgAlgKpMkoUj4bClwJq8L&format=bibtex&limit=100","bibdata":{"bibtype":"inproceedings","type":"inproceedings","title":"Following Newton direction in Policy Gradient with parameter exploration","doi":"10.1109/IJCNN.2015.7280673","abstract":"This paper investigates the use of second-order methods to solve Markov Decision Processes (MDPs). Despite the popularity of second-order methods in optimization literature, so far little attention has been paid to the extension of such techniques to face sequential decision problems. Here we provide a model-free Reinforcement Learning method that estimates the Newton direction by sampling directly in the parameter space. In order to compute the Newton direction we provide the formulation of the Hessian of the expected return, a technique for variance reduction in the sample-based estimation and a finite sample analysis in the case of Normal distribution. Beside discussing the theoretical properties, we empirically evaluate the method on an instructional linear-quadratic regulator and on a complex dynamical quadrotor system.","booktitle":"Proceedings of the International Joint Conference on Neural Networks (IJCNN)","publisher":"Institute of Electrical and Electronics Engineers Inc.","author":[{"propositions":[],"lastnames":["Manganini"],"firstnames":["Giorgio"],"suffixes":[]},{"propositions":[],"lastnames":["Pirotta"],"firstnames":["Matteo"],"suffixes":[]},{"propositions":[],"lastnames":["Restelli"],"firstnames":["Marcello"],"suffixes":[]},{"propositions":[],"lastnames":["Bascetta"],"firstnames":["Luca"],"suffixes":[]}],"year":"2015","keywords":"AUT, INF","pages":"1--8","bibtex":"@inproceedings{manganini_following_2015,\n\ttitle = {Following {Newton} direction in {Policy} {Gradient} with parameter exploration},\n\tdoi = {10.1109/IJCNN.2015.7280673},\n\tabstract = {This paper investigates the use of second-order methods to solve Markov Decision Processes (MDPs). Despite the popularity of second-order methods in optimization literature, so far little attention has been paid to the extension of such techniques to face sequential decision problems. Here we provide a model-free Reinforcement Learning method that estimates the Newton direction by sampling directly in the parameter space. In order to compute the Newton direction we provide the formulation of the Hessian of the expected return, a technique for variance reduction in the sample-based estimation and a finite sample analysis in the case of Normal distribution. Beside discussing the theoretical properties, we empirically evaluate the method on an instructional linear-quadratic regulator and on a complex dynamical quadrotor system.},\n\tbooktitle = {Proceedings of the {International} {Joint} {Conference} on {Neural} {Networks} ({IJCNN})},\n\tpublisher = {Institute of Electrical and Electronics Engineers Inc.},\n\tauthor = {Manganini, Giorgio and Pirotta, Matteo and Restelli, Marcello and Bascetta, Luca},\n\tyear = {2015},\n\tkeywords = {AUT, INF},\n\tpages = {1--8}\n}\n\n","author_short":["Manganini, G.","Pirotta, M.","Restelli, M.","Bascetta, L."],"key":"manganini_following_2015","id":"manganini_following_2015","bibbaseid":"manganini-pirotta-restelli-bascetta-followingnewtondirectioninpolicygradientwithparameterexploration-2015","role":"author","urls":{},"keyword":["AUT","INF"],"downloads":0},"search_terms":["following","newton","direction","policy","gradient","parameter","exploration","manganini","pirotta","restelli","bascetta"],"keywords":["aut","inf"],"authorIDs":["595a1330918d93027900005e"],"dataSources":["MsJAywAN5iq2JyCay"]}