Intentionally-underestimated Value Function at Terminal State for Temporal-difference Learning with Mis-designed Reward. Kobayashi, T. 2023. (submitted for publication)Paper bibtex @misc{kobayashi2023intentionallyunderestimated,
title={Intentionally-underestimated Value Function at Terminal State for Temporal-difference Learning with Mis-designed Reward},
author={Taisuke Kobayashi},
year={2023},
eprint={2308.12772},
archivePrefix={arXiv},
primaryClass={cs.RO},
url={https://arxiv.org/abs/2308.12772},
note={(submitted for publication)},
youtube={https://youtu.be/AxXr8uFOe7M},
}
%RSJ seminar 2023/07/06
Downloads: 0
{"_id":"4yM9LQnyR6Gph5PoB","bibbaseid":"kobayashi-intentionallyunderestimatedvaluefunctionatterminalstatefortemporaldifferencelearningwithmisdesignedreward-2023","author_short":["Kobayashi, T."],"bibdata":{"bibtype":"misc","type":"misc","title":"Intentionally-underestimated Value Function at Terminal State for Temporal-difference Learning with Mis-designed Reward","author":[{"firstnames":["Taisuke"],"propositions":[],"lastnames":["Kobayashi"],"suffixes":[]}],"year":"2023","eprint":"2308.12772","archiveprefix":"arXiv","primaryclass":"cs.RO","url":"https://arxiv.org/abs/2308.12772","note":"(submitted for publication)","youtube":"https://youtu.be/AxXr8uFOe7M","bibtex":"@misc{kobayashi2023intentionallyunderestimated,\n title={Intentionally-underestimated Value Function at Terminal State for Temporal-difference Learning with Mis-designed Reward},\n author={Taisuke Kobayashi},\n year={2023},\n eprint={2308.12772},\n archivePrefix={arXiv},\n primaryClass={cs.RO},\n url={https://arxiv.org/abs/2308.12772},\n note={(submitted for publication)},\n youtube={https://youtu.be/AxXr8uFOe7M},\n}\n\n%RSJ seminar 2023/07/06\n","author_short":["Kobayashi, T."],"key":"kobayashi2023intentionallyunderestimated","id":"kobayashi2023intentionallyunderestimated","bibbaseid":"kobayashi-intentionallyunderestimatedvaluefunctionatterminalstatefortemporaldifferencelearningwithmisdesignedreward-2023","role":"author","urls":{"Paper":"https://arxiv.org/abs/2308.12772"},"metadata":{"authorlinks":{}},"html":""},"bibtype":"misc","biburl":"https://www.dropbox.com/s/ebs0utssp5unpnm/publish.bib?dl=1","dataSources":["Fdi7RLbmsFp46nnDK"],"keywords":[],"search_terms":["intentionally","underestimated","value","function","terminal","state","temporal","difference","learning","mis","designed","reward","kobayashi"],"title":"Intentionally-underestimated Value Function at Terminal State for Temporal-difference Learning with Mis-designed Reward","year":2023}