Near-Optimal Reinforcement Learning in Polynomial Time

Near-Optimal Reinforcement Learning in Polynomial Time. Kearns, M. & Singh, S. 49(2):209-232.

We present new algorithms for reinforcement learning and prove that they have polynomial bounds on the resources required to achieve near-optimal return in general Markov decision processes. After observing that the number of actions required to approach the optimal return is lower bounded by the mixing time T of the optimal policy (in the undiscounted case) or by the horizon time T (in the discounted case), we then give algorithms requiring a number of actions and total computation time that are only polynomial in T and the number of states and actions, for both the undiscounted and discounted cases. An interesting aspect of our algorithms is their explicit handling of the Exploration-Exploitation trade-off.

@article{kearnsNearOptimalReinforcementLearning2002,
  langid = {english},
  title = {Near-{{Optimal Reinforcement Learning}} in {{Polynomial Time}}},
  volume = {49},
  issn = {1573-0565},
  url = {https://doi.org/10.1023/A:1017984413808},
  doi = {10.1023/A:1017984413808},
  abstract = {We present new algorithms for reinforcement learning and prove that they have polynomial bounds on the resources required to achieve near-optimal return in general Markov decision processes. After observing that the number of actions required to approach the optimal return is lower bounded by the mixing time T of the optimal policy (in the undiscounted case) or by the horizon time T (in the discounted case), we then give algorithms requiring a number of actions and total computation time that are only polynomial in T and the number of states and actions, for both the undiscounted and discounted cases. An interesting aspect of our algorithms is their explicit handling of the Exploration-Exploitation trade-off.},
  number = {2},
  journaltitle = {Machine Learning},
  shortjournal = {Machine Learning},
  urldate = {2019-01-21},
  date = {2002-11-01},
  pages = {209-232},
  keywords = {reinforcement learning,exploration versus exploitation,Markov decision processes},
  author = {Kearns, Michael and Singh, Satinder},
  file = {/home/dimitri/Nextcloud/Zotero/storage/QHHJB8YE/Kearns and Singh - 2002 - Near-Optimal Reinforcement Learning in Polynomial .pdf}
}

Downloads: 0

{"_id":"cWvSwQdxX5GDpMEgk","bibbaseid":"kearns-singh-nearoptimalreinforcementlearninginpolynomialtime","authorIDs":[],"author_short":["Kearns, M.","Singh, S."],"bibdata":{"bibtype":"article","type":"article","langid":"english","title":"Near-Optimal Reinforcement Learning in Polynomial Time","volume":"49","issn":"1573-0565","url":"https://doi.org/10.1023/A:1017984413808","doi":"10.1023/A:1017984413808","abstract":"We present new algorithms for reinforcement learning and prove that they have polynomial bounds on the resources required to achieve near-optimal return in general Markov decision processes. After observing that the number of actions required to approach the optimal return is lower bounded by the mixing time T of the optimal policy (in the undiscounted case) or by the horizon time T (in the discounted case), we then give algorithms requiring a number of actions and total computation time that are only polynomial in T and the number of states and actions, for both the undiscounted and discounted cases. An interesting aspect of our algorithms is their explicit handling of the Exploration-Exploitation trade-off.","number":"2","journaltitle":"Machine Learning","shortjournal":"Machine Learning","urldate":"2019-01-21","date":"2002-11-01","pages":"209-232","keywords":"reinforcement learning,exploration versus exploitation,Markov decision processes","author":[{"propositions":[],"lastnames":["Kearns"],"firstnames":["Michael"],"suffixes":[]},{"propositions":[],"lastnames":["Singh"],"firstnames":["Satinder"],"suffixes":[]}],"file":"/home/dimitri/Nextcloud/Zotero/storage/QHHJB8YE/Kearns and Singh - 2002 - Near-Optimal Reinforcement Learning in Polynomial .pdf","bibtex":"@article{kearnsNearOptimalReinforcementLearning2002,\n langid = {english},\n title = {Near-{{Optimal Reinforcement Learning}} in {{Polynomial Time}}},\n volume = {49},\n issn = {1573-0565},\n url = {https://doi.org/10.1023/A:1017984413808},\n doi = {10.1023/A:1017984413808},\n abstract = {We present new algorithms for reinforcement learning and prove that they have polynomial bounds on the resources required to achieve near-optimal return in general Markov decision processes. After observing that the number of actions required to approach the optimal return is lower bounded by the mixing time T of the optimal policy (in the undiscounted case) or by the horizon time T (in the discounted case), we then give algorithms requiring a number of actions and total computation time that are only polynomial in T and the number of states and actions, for both the undiscounted and discounted cases. An interesting aspect of our algorithms is their explicit handling of the Exploration-Exploitation trade-off.},\n number = {2},\n journaltitle = {Machine Learning},\n shortjournal = {Machine Learning},\n urldate = {2019-01-21},\n date = {2002-11-01},\n pages = {209-232},\n keywords = {reinforcement learning,exploration versus exploitation,Markov decision processes},\n author = {Kearns, Michael and Singh, Satinder},\n file = {/home/dimitri/Nextcloud/Zotero/storage/QHHJB8YE/Kearns and Singh - 2002 - Near-Optimal Reinforcement Learning in Polynomial .pdf}\n}\n\n","author_short":["Kearns, M.","Singh, S."],"key":"kearnsNearOptimalReinforcementLearning2002","id":"kearnsNearOptimalReinforcementLearning2002","bibbaseid":"kearns-singh-nearoptimalreinforcementlearninginpolynomialtime","role":"author","urls":{"Paper":"https://doi.org/10.1023/A:1017984413808"},"keyword":["reinforcement learning","exploration versus exploitation","Markov decision processes"],"downloads":0},"bibtype":"article","biburl":"https://raw.githubusercontent.com/dlozeve/newblog/master/bib/all.bib","creationDate":"2020-01-08T20:39:39.231Z","downloads":0,"keywords":["reinforcement learning","exploration versus exploitation","markov decision processes"],"search_terms":["near","optimal","reinforcement","learning","polynomial","time","kearns","singh"],"title":"Near-Optimal Reinforcement Learning in Polynomial Time","year":null,"dataSources":["3XqdvqRE7zuX4cm8m"]}