High Confidence Off-Policy Evaluation. Thomas, P. S., Theocharous, G., & Ghavamzadeh, M. In Proceedings of the Twenty-Ninth AAAI Conference on Artificial Intelligence, of AAAI'15, pages 3000–3006. AAAI Press. Paper abstract bibtex Many reinforcement learning algorithms use trajectories collected from the execution of one or more policies to propose a new policy. Because execution of a bad policy can be costly or dangerous, techniques for evaluating the performance of the new policy without requiring its execution have been of recent interest in industry. Such off-policy evaluation methods, which estimate the performance of a policy using trajectories collected from the execution of other policies, heretofore have not provided confidences regarding the accuracy of their estimates. In this paper we propose an off-policy method for computing a lower confidence bound on the expected return of a policy.
@inproceedings{thomasHighConfidenceOffpolicy2015,
title = {High {{Confidence Off}}-Policy {{Evaluation}}},
isbn = {978-0-262-51129-2},
url = {https://www.aaai.org/ocs/index.php/AAAI/AAAI15/paper/viewPaper/10042},
abstract = {Many reinforcement learning algorithms use trajectories collected from the execution of one or more policies to propose a new policy. Because execution of a bad policy can be costly or dangerous, techniques for evaluating the performance of the new policy without requiring its execution have been of recent interest in industry. Such off-policy evaluation methods, which estimate the performance of a policy using trajectories collected from the execution of other policies, heretofore have not provided confidences regarding the accuracy of their estimates. In this paper we propose an off-policy method for computing a lower confidence bound on the expected return of a policy.},
booktitle = {Proceedings of the {{Twenty}}-{{Ninth AAAI Conference}} on {{Artificial Intelligence}}},
series = {{{AAAI}}'15},
publisher = {{AAAI Press}},
urldate = {2019-05-17},
date = {2015},
pages = {3000--3006},
author = {Thomas, Philip S. and Theocharous, Georgios and Ghavamzadeh, Mohammad},
file = {/home/dimitri/Nextcloud/Zotero/storage/DSD86HE9/Thomas et al. - 2015 - High Confidence Off-policy Evaluation.pdf},
venue = {Austin, Texas}
}
Downloads: 0
{"_id":"P9WQ9R4ZutaHiPto5","bibbaseid":"thomas-theocharous-ghavamzadeh-highconfidenceoffpolicyevaluation","authorIDs":[],"author_short":["Thomas, P. S.","Theocharous, G.","Ghavamzadeh, M."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","title":"High Confidence Off-Policy Evaluation","isbn":"978-0-262-51129-2","url":"https://www.aaai.org/ocs/index.php/AAAI/AAAI15/paper/viewPaper/10042","abstract":"Many reinforcement learning algorithms use trajectories collected from the execution of one or more policies to propose a new policy. Because execution of a bad policy can be costly or dangerous, techniques for evaluating the performance of the new policy without requiring its execution have been of recent interest in industry. Such off-policy evaluation methods, which estimate the performance of a policy using trajectories collected from the execution of other policies, heretofore have not provided confidences regarding the accuracy of their estimates. In this paper we propose an off-policy method for computing a lower confidence bound on the expected return of a policy.","booktitle":"Proceedings of the Twenty-Ninth AAAI Conference on Artificial Intelligence","series":"AAAI'15","publisher":"AAAI Press","urldate":"2019-05-17","date":"2015","pages":"3000–3006","author":[{"propositions":[],"lastnames":["Thomas"],"firstnames":["Philip","S."],"suffixes":[]},{"propositions":[],"lastnames":["Theocharous"],"firstnames":["Georgios"],"suffixes":[]},{"propositions":[],"lastnames":["Ghavamzadeh"],"firstnames":["Mohammad"],"suffixes":[]}],"file":"/home/dimitri/Nextcloud/Zotero/storage/DSD86HE9/Thomas et al. - 2015 - High Confidence Off-policy Evaluation.pdf","venue":"Austin, Texas","bibtex":"@inproceedings{thomasHighConfidenceOffpolicy2015,\n title = {High {{Confidence Off}}-Policy {{Evaluation}}},\n isbn = {978-0-262-51129-2},\n url = {https://www.aaai.org/ocs/index.php/AAAI/AAAI15/paper/viewPaper/10042},\n abstract = {Many reinforcement learning algorithms use trajectories collected from the execution of one or more policies to propose a new policy. Because execution of a bad policy can be costly or dangerous, techniques for evaluating the performance of the new policy without requiring its execution have been of recent interest in industry. Such off-policy evaluation methods, which estimate the performance of a policy using trajectories collected from the execution of other policies, heretofore have not provided confidences regarding the accuracy of their estimates. In this paper we propose an off-policy method for computing a lower confidence bound on the expected return of a policy.},\n booktitle = {Proceedings of the {{Twenty}}-{{Ninth AAAI Conference}} on {{Artificial Intelligence}}},\n series = {{{AAAI}}'15},\n publisher = {{AAAI Press}},\n urldate = {2019-05-17},\n date = {2015},\n pages = {3000--3006},\n author = {Thomas, Philip S. and Theocharous, Georgios and Ghavamzadeh, Mohammad},\n file = {/home/dimitri/Nextcloud/Zotero/storage/DSD86HE9/Thomas et al. - 2015 - High Confidence Off-policy Evaluation.pdf},\n venue = {Austin, Texas}\n}\n\n","author_short":["Thomas, P. S.","Theocharous, G.","Ghavamzadeh, M."],"key":"thomasHighConfidenceOffpolicy2015","id":"thomasHighConfidenceOffpolicy2015","bibbaseid":"thomas-theocharous-ghavamzadeh-highconfidenceoffpolicyevaluation","role":"author","urls":{"Paper":"https://www.aaai.org/ocs/index.php/AAAI/AAAI15/paper/viewPaper/10042"},"downloads":0},"bibtype":"inproceedings","biburl":"https://raw.githubusercontent.com/dlozeve/newblog/master/bib/all.bib","creationDate":"2020-01-08T20:39:39.362Z","downloads":0,"keywords":[],"search_terms":["high","confidence","policy","evaluation","thomas","theocharous","ghavamzadeh"],"title":"High Confidence Off-Policy Evaluation","year":null,"dataSources":["3XqdvqRE7zuX4cm8m"]}