Hierarchical Linearly-Solvable Markov Decision Problems

Hierarchical Linearly-Solvable Markov Decision Problems. Jonsson, A. & Gómez, V. In

We present a hierarchical reinforcement learning framework that formulates each task in the hierarchy as a special type of Markov decision process for which the Bellman equation is linear and has analytical solution. Problems of this type, called linearly-solvable MDPs (LMDPs) have interesting properties that can be exploited in a hierarchical setting, such as efficient learning of the optimal value function or task compositionality. The proposed hierarchical approach can also be seen as a novel alternative to solve LMDPs with large state spaces. We derive a hierarchical version of the so-called Z-learning algorithm that learns different tasks simultaneously and show empirically that it significantly outperforms the state-of-the-art learning methods in two classical HRL domains: the taxi domain and an autonomous guided vehicle task.

@inproceedings {icaps16-83,
    track    = {Main Track},
    title    = {Hierarchical Linearly-Solvable Markov Decision Problems},
    url      = {http://www.aaai.org/ocs/index.php/ICAPS/ICAPS16/paper/view/13090},
    author   = {Anders Jonsson and  Vicenç Gómez},
    abstract = {We present a hierarchical reinforcement learning framework that formulates each task in the hierarchy as a special type of Markov decision process for which the Bellman equation is linear and has analytical solution. Problems of this type, called linearly-solvable MDPs (LMDPs) have interesting properties that can be exploited in a hierarchical setting, such as efficient learning of the optimal value function or task compositionality. The proposed hierarchical approach can also be seen as a novel alternative to solve LMDPs with large state spaces. We derive a hierarchical version of the so-called Z-learning algorithm that learns different tasks simultaneously and show empirically that it significantly outperforms the state-of-the-art learning methods in two classical HRL domains: the taxi domain and an autonomous guided vehicle task.},
    keywords = {Control and optimisation of dynamical systems,Probabilistic planning; MDPs and POMDPs,Planning under (non-probabilistic) uncertainty,Learning in planning and scheduling}
}

Downloads: 0

{"_id":"9thoynNCsoiFdpEnQ","bibbaseid":"jonsson-gmez-hierarchicallinearlysolvablemarkovdecisionproblems","downloads":0,"creationDate":"2016-03-09T03:04:33.001Z","title":"Hierarchical Linearly-Solvable Markov Decision Problems","author_short":["Jonsson, A.","Gómez, V."],"year":null,"bibtype":"inproceedings","biburl":"icaps16.icaps-conference.org/papers.bib","bibdata":{"bibtype":"inproceedings","type":"inproceedings","track":"Main Track","title":"Hierarchical Linearly-Solvable Markov Decision Problems","url":"http://www.aaai.org/ocs/index.php/ICAPS/ICAPS16/paper/view/13090","author":[{"firstnames":["Anders"],"propositions":[],"lastnames":["Jonsson"],"suffixes":[]},{"firstnames":["Vicenç"],"propositions":[],"lastnames":["Gómez"],"suffixes":[]}],"abstract":"We present a hierarchical reinforcement learning framework that formulates each task in the hierarchy as a special type of Markov decision process for which the Bellman equation is linear and has analytical solution. Problems of this type, called linearly-solvable MDPs (LMDPs) have interesting properties that can be exploited in a hierarchical setting, such as efficient learning of the optimal value function or task compositionality. The proposed hierarchical approach can also be seen as a novel alternative to solve LMDPs with large state spaces. We derive a hierarchical version of the so-called Z-learning algorithm that learns different tasks simultaneously and show empirically that it significantly outperforms the state-of-the-art learning methods in two classical HRL domains: the taxi domain and an autonomous guided vehicle task.","keywords":"Control and optimisation of dynamical systems,Probabilistic planning; MDPs and POMDPs,Planning under (non-probabilistic) uncertainty,Learning in planning and scheduling","bibtex":"@inproceedings {icaps16-83,\r\n track = {Main Track},\r\n title = {Hierarchical Linearly-Solvable Markov Decision Problems},\r\n url = {http://www.aaai.org/ocs/index.php/ICAPS/ICAPS16/paper/view/13090},\r\n author = {Anders Jonsson and Vicenç Gómez},\r\n abstract = {We present a hierarchical reinforcement learning framework that formulates each task in the hierarchy as a special type of Markov decision process for which the Bellman equation is linear and has analytical solution. Problems of this type, called linearly-solvable MDPs (LMDPs) have interesting properties that can be exploited in a hierarchical setting, such as efficient learning of the optimal value function or task compositionality. The proposed hierarchical approach can also be seen as a novel alternative to solve LMDPs with large state spaces. We derive a hierarchical version of the so-called Z-learning algorithm that learns different tasks simultaneously and show empirically that it significantly outperforms the state-of-the-art learning methods in two classical HRL domains: the taxi domain and an autonomous guided vehicle task.},\r\n keywords = {Control and optimisation of dynamical systems,Probabilistic planning; MDPs and POMDPs,Planning under (non-probabilistic) uncertainty,Learning in planning and scheduling}\r\n}\r\n\r\n","author_short":["Jonsson, A.","Gómez, V."],"key":"icaps16-83","id":"icaps16-83","bibbaseid":"jonsson-gmez-hierarchicallinearlysolvablemarkovdecisionproblems","role":"author","urls":{"Paper":"http://www.aaai.org/ocs/index.php/ICAPS/ICAPS16/paper/view/13090"},"keyword":["Control and optimisation of dynamical systems","Probabilistic planning; MDPs and POMDPs","Planning under (non-probabilistic) uncertainty","Learning in planning and scheduling"],"metadata":{"authorlinks":{}},"downloads":0,"html":""},"search_terms":["hierarchical","linearly","solvable","markov","decision","problems","jonsson","gómez"],"keywords":["control and optimisation of dynamical systems","probabilistic planning; mdps and pomdps","planning under (non-probabilistic) uncertainty","learning in planning and scheduling"],"authorIDs":[],"dataSources":["iMkx859KiXcegwsin","EZtZjCTnxcdTTyeij"]}