Actor-Critic Algorithms

Actor-Critic Algorithms. Konda, V. R & Tsitsiklis, J. N
abstract bibtex

We propose and analyze a class of actor-critic algorithms for simulation-based optimization of a Markov decision process over a parameterized family of randomized stationary policies. These are two-time-scale algorithms in which the critic uses TD learning with a linear approximation architecture and the actor is updated in an approximate gradient direction based on information provided by the critic. We show that the features for the critic should span a subspace prescribed by the choice of parameterization of the actor. We conclude by discussing convergence properties and some open problems.

@article{konda_actor-critic_nodate,
	title = {Actor-{Critic} {Algorithms}},
	abstract = {We propose and analyze a class of actor-critic algorithms for simulation-based optimization of a Markov decision process over a parameterized family of randomized stationary policies. These are two-time-scale algorithms in which the critic uses TD learning with a linear approximation architecture and the actor is updated in an approximate gradient direction based on information provided by the critic. We show that the features for the critic should span a subspace prescribed by the choice of parameterization of the actor. We conclude by discussing convergence properties and some open problems.},
	language = {en},
	author = {Konda, Vijay R and Tsitsiklis, John N},
	keywords = {Reinforcement Learning},
	pages = {7}
}

Downloads: 0

{"_id":"WDFomJTGGdinPLGLc","bibbaseid":"konda-tsitsiklis-actorcriticalgorithms","authorIDs":[],"author_short":["Konda, V. R","Tsitsiklis, J. N"],"bibdata":{"bibtype":"article","type":"article","title":"Actor-Critic Algorithms","abstract":"We propose and analyze a class of actor-critic algorithms for simulation-based optimization of a Markov decision process over a parameterized family of randomized stationary policies. These are two-time-scale algorithms in which the critic uses TD learning with a linear approximation architecture and the actor is updated in an approximate gradient direction based on information provided by the critic. We show that the features for the critic should span a subspace prescribed by the choice of parameterization of the actor. We conclude by discussing convergence properties and some open problems.","language":"en","author":[{"propositions":[],"lastnames":["Konda"],"firstnames":["Vijay","R"],"suffixes":[]},{"propositions":[],"lastnames":["Tsitsiklis"],"firstnames":["John","N"],"suffixes":[]}],"keywords":"Reinforcement Learning","pages":"7","bibtex":"@article{konda_actor-critic_nodate,\n\ttitle = {Actor-{Critic} {Algorithms}},\n\tabstract = {We propose and analyze a class of actor-critic algorithms for simulation-based optimization of a Markov decision process over a parameterized family of randomized stationary policies. These are two-time-scale algorithms in which the critic uses TD learning with a linear approximation architecture and the actor is updated in an approximate gradient direction based on information provided by the critic. We show that the features for the critic should span a subspace prescribed by the choice of parameterization of the actor. We conclude by discussing convergence properties and some open problems.},\n\tlanguage = {en},\n\tauthor = {Konda, Vijay R and Tsitsiklis, John N},\n\tkeywords = {Reinforcement Learning},\n\tpages = {7}\n}\n\n","author_short":["Konda, V. R","Tsitsiklis, J. N"],"key":"konda_actor-critic_nodate","id":"konda_actor-critic_nodate","bibbaseid":"konda-tsitsiklis-actorcriticalgorithms","role":"author","urls":{},"keyword":["Reinforcement Learning"],"downloads":0,"html":""},"bibtype":"article","biburl":"https://bibbase.org/zotero/asneha213","creationDate":"2019-06-06T20:57:45.732Z","downloads":0,"keywords":["reinforcement learning"],"search_terms":["actor","critic","algorithms","konda","tsitsiklis"],"title":"Actor-Critic Algorithms","year":null,"dataSources":["fjacg9txEnNSDwee6"]}