When Is Partially Observable Reinforcement Learning Not Scary?. Liu, Q., Chung, A., Szepesvári, C., & Jin, C. In *COLT*, 07, 2022. Link Pdf abstract bibtex Partial observability is ubiquitous in applications of Reinforcement Learning (RL), in which agents learn to make a sequence of decisions despite lacking complete information about the latent states of the controlled system. Partially observable RL is notoriously difficult in theory—well-known complexity-theoretic results show that learning partially observable Markov decision processes (POMDPs) requires an exponential number of samples in the worst case. Yet, this does not rule out the possible existence of interesting subclasses of POMDPs, which include a large set of partial observable applications in practice while being tractable. In this paper we identify a rich family of tractable POMDPs, which we call weakly revealing POMDPs. This family rules out the pathological instances of POMDPs with non-informative observations. We prove that for weakly revealing POMDPs, a simple algorithm combining optimism and Maximum Likelihood Estimation (MLE) is sufficient to guarantee a polynomial sample complexity. To the best of our knowledge, this is the first provably sample-efficient result for learning in overcomplete POMDPs—where the number of latent states can be larger than the number of observations—in settings where exploration is necessary.

@inproceedings{LCSzJ22,
author = {Liu, Qinghua and Chung, Alan and Szepesv\'ari, Csaba and Jin, Chi},
title = {When Is Partially Observable Reinforcement Learning Not Scary?},
booktitle = {COLT},
acceptrate = {155 out of 484 = 32\%},
month = {07},
year = {2022},
url_link = {https://doi.org/10.48550/arXiv.2204.08967},
url_pdf = {https://arxiv.org/pdf/2204.08967.pdf},
abstract = {Partial observability is ubiquitous in applications of Reinforcement Learning (RL), in which agents learn to make a sequence of decisions despite lacking complete information about the latent states of the controlled system. Partially observable RL is notoriously difficult in theory---well-known complexity-theoretic results show that learning partially observable Markov decision processes (POMDPs) requires an exponential number of samples in the worst case. Yet, this does not rule out the possible existence of interesting subclasses of POMDPs, which include a large set of partial observable applications in practice while being tractable. In this paper we identify a rich family of tractable POMDPs, which we call weakly revealing POMDPs. This family rules out the pathological instances of POMDPs with non-informative observations. We prove that for weakly revealing POMDPs, a simple algorithm combining optimism and Maximum Likelihood Estimation (MLE) is sufficient to guarantee a polynomial sample complexity. To the best of our knowledge, this is the first provably sample-efficient result for learning in overcomplete POMDPs---where the number of latent states can be larger than the number of observations---in settings where exploration is necessary.},
keywords = {reinforcement learning, exploration, POMDPs}
}

Downloads: 0

{"_id":"6uF48DBP8HrWR8uE6","bibbaseid":"liu-chung-szepesvri-jin-whenispartiallyobservablereinforcementlearningnotscary-2022","author_short":["Liu, Q.","Chung, A.","Szepesvári, C.","Jin, C."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","author":[{"propositions":[],"lastnames":["Liu"],"firstnames":["Qinghua"],"suffixes":[]},{"propositions":[],"lastnames":["Chung"],"firstnames":["Alan"],"suffixes":[]},{"propositions":[],"lastnames":["Szepesvári"],"firstnames":["Csaba"],"suffixes":[]},{"propositions":[],"lastnames":["Jin"],"firstnames":["Chi"],"suffixes":[]}],"title":"When Is Partially Observable Reinforcement Learning Not Scary?","booktitle":"COLT","acceptrate":"155 out of 484 = 32%","month":"07","year":"2022","url_link":"https://doi.org/10.48550/arXiv.2204.08967","url_pdf":"https://arxiv.org/pdf/2204.08967.pdf","abstract":"Partial observability is ubiquitous in applications of Reinforcement Learning (RL), in which agents learn to make a sequence of decisions despite lacking complete information about the latent states of the controlled system. Partially observable RL is notoriously difficult in theory—well-known complexity-theoretic results show that learning partially observable Markov decision processes (POMDPs) requires an exponential number of samples in the worst case. Yet, this does not rule out the possible existence of interesting subclasses of POMDPs, which include a large set of partial observable applications in practice while being tractable. In this paper we identify a rich family of tractable POMDPs, which we call weakly revealing POMDPs. This family rules out the pathological instances of POMDPs with non-informative observations. We prove that for weakly revealing POMDPs, a simple algorithm combining optimism and Maximum Likelihood Estimation (MLE) is sufficient to guarantee a polynomial sample complexity. To the best of our knowledge, this is the first provably sample-efficient result for learning in overcomplete POMDPs—where the number of latent states can be larger than the number of observations—in settings where exploration is necessary.","keywords":"reinforcement learning, exploration, POMDPs","bibtex":"@inproceedings{LCSzJ22,\n author = {Liu, Qinghua and Chung, Alan and Szepesv\\'ari, Csaba and Jin, Chi},\n title = {When Is Partially Observable Reinforcement Learning Not Scary?},\n booktitle = {COLT},\n acceptrate = {155 out of 484 = 32\\%},\n month = {07},\n year = {2022},\n url_link = {https://doi.org/10.48550/arXiv.2204.08967},\n url_pdf = {https://arxiv.org/pdf/2204.08967.pdf},\n abstract = {Partial observability is ubiquitous in applications of Reinforcement Learning (RL), in which agents learn to make a sequence of decisions despite lacking complete information about the latent states of the controlled system. Partially observable RL is notoriously difficult in theory---well-known complexity-theoretic results show that learning partially observable Markov decision processes (POMDPs) requires an exponential number of samples in the worst case. Yet, this does not rule out the possible existence of interesting subclasses of POMDPs, which include a large set of partial observable applications in practice while being tractable. In this paper we identify a rich family of tractable POMDPs, which we call weakly revealing POMDPs. This family rules out the pathological instances of POMDPs with non-informative observations. We prove that for weakly revealing POMDPs, a simple algorithm combining optimism and Maximum Likelihood Estimation (MLE) is sufficient to guarantee a polynomial sample complexity. To the best of our knowledge, this is the first provably sample-efficient result for learning in overcomplete POMDPs---where the number of latent states can be larger than the number of observations---in settings where exploration is necessary.},\nkeywords = {reinforcement learning, exploration, POMDPs} \n}\n\n","author_short":["Liu, Q.","Chung, A.","Szepesvári, C.","Jin, C."],"key":"LCSzJ22","id":"LCSzJ22","bibbaseid":"liu-chung-szepesvri-jin-whenispartiallyobservablereinforcementlearningnotscary-2022","role":"author","urls":{" link":"https://doi.org/10.48550/arXiv.2204.08967"," pdf":"https://arxiv.org/pdf/2204.08967.pdf"},"keyword":["reinforcement learning","exploration","POMDPs"],"metadata":{"authorlinks":{}},"html":""},"bibtype":"inproceedings","biburl":"https://www.ualberta.ca/~szepesva/papers/p2.bib","dataSources":["dYMomj4Jofy8t4qmm"],"keywords":["reinforcement learning","exploration","pomdps"],"search_terms":["partially","observable","reinforcement","learning","scary","liu","chung","szepesvári","jin"],"title":"When Is Partially Observable Reinforcement Learning Not Scary?","year":2022}