Data-Efficient Off-Policy Policy Evaluation for Reinforcement Learning. Thomas, P. and Brunskill, E. In International Conference on Machine Learning, pages 2139-2148.
Paper abstract bibtex In this paper we present a new way of predicting the performance of a reinforcement learning policy given historical data that may have been generated by a different policy. The ability to evaluate...
@inproceedings{thomasDataEfficientOffPolicyPolicy2016,
langid = {english},
title = {Data-{{Efficient Off}}-{{Policy Policy Evaluation}} for {{Reinforcement Learning}}},
url = {http://proceedings.mlr.press/v48/thomasa16.html},
abstract = {In this paper we present a new way of predicting the performance of a reinforcement learning policy given historical data that may have been generated by a different policy. The ability to evaluate...},
eventtitle = {International {{Conference}} on {{Machine Learning}}},
booktitle = {International {{Conference}} on {{Machine Learning}}},
urldate = {2019-05-17},
date = {2016-06-11},
pages = {2139-2148},
author = {Thomas, Philip and Brunskill, Emma},
file = {/home/dimitri/Nextcloud/Zotero/storage/KRD885VU/Thomas and Brunskill - 2016 - Data-Efficient Off-Policy Policy Evaluation for Re.pdf;/home/dimitri/Nextcloud/Zotero/storage/X2R64R83/Appendix.pdf;/home/dimitri/Nextcloud/Zotero/storage/YPJPLZJ6/thomasa16.html}
}