\n
\n\n \n \n Bayer, C.; Belomestny, D.; Hager, P.; Pigato, P.; Schoenmakers, J.; and Spokoiny, V.\n\n\n \n \n \n \n \n Reinforced optimal control.\n \n \n \n \n\n\n \n\n\n\n
Commun. Math. Sci., 20(7): 1951–1978. 2022.\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@article {MR4504018,\n AUTHOR = {Bayer, Christian and Belomestny, Denis and Hager, Paul and\n Pigato, Paolo and Schoenmakers, John and Spokoiny, Vladimir},\n TITLE = {Reinforced optimal control},\n JOURNAL = {Commun. Math. Sci.},\n FJOURNAL = {Communications in Mathematical Sciences},\n VOLUME = {20},\n YEAR = {2022},\n NUMBER = {7},\n PAGES = {1951--1978},\n ISSN = {1539-6746},\n MRCLASS = {93E20 (90C39 91G20)},\n MRNUMBER = {4504018},\nMRREVIEWER = {Gregory Gagnon},\n DOI = {10.4310/cms.2022.v20.n7.a7},\n URL = {https://doi.org/10.4310/cms.2022.v20.n7.a7},\n}\n\t\t\n
\n
\n\n\n\n
\n
\n\n \n \n Belomestny, D.; Gugushvili, S.; Schauer, M.; and Spreij, P.\n\n\n \n \n \n \n \n Nonparametric Bayesian volatility estimation for gamma-driven stochastic differential equations.\n \n \n \n \n\n\n \n\n\n\n
Bernoulli, 28(4): 2151–2180. 2022.\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@article {MR4474539,\n AUTHOR = {Belomestny, Denis and Gugushvili, Shota and Schauer, Moritz\n and Spreij, Peter},\n TITLE = {Nonparametric {B}ayesian volatility estimation for\n gamma-driven stochastic differential equations},\n JOURNAL = {Bernoulli},\n FJOURNAL = {Bernoulli. Official Journal of the Bernoulli Society for\n Mathematical Statistics and Probability},\n VOLUME = {28},\n YEAR = {2022},\n NUMBER = {4},\n PAGES = {2151--2180},\n ISSN = {1350-7265},\n MRCLASS = {62G20 (60G51 60H10 62M30)},\n MRNUMBER = {4474539},\nMRREVIEWER = {Markus Bibinger},\n DOI = {10.3150/21-bej1413},\n URL = {https://doi.org/10.3150/21-bej1413},\n}\n\t\t\n\n\n
\n
\n\n\n\n
\n
\n\n \n \n Tiapkin, D.; Belomestny, D.; Calandriello, D.; Moulines, E.; Munos, R.; Naumov, A.; Rowland, M.; Valko, M.; and Menard, P.\n\n\n \n \n \n \n \n Optimistic Posterior Sampling for Reinforcement Learning with Few Samples and Tight Guarantees.\n \n \n \n \n\n\n \n\n\n\n In Oh, A. H.; Agarwal, A.; Belgrave, D.; and Cho, K., editor(s),
Advances in Neural Information Processing Systems, 2022. \n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@inproceedings{\ntiapkin2022optimistic,\ntitle={Optimistic Posterior Sampling for Reinforcement Learning with Few Samples and Tight Guarantees},\nauthor={Daniil Tiapkin and Denis Belomestny and Daniele Calandriello and Eric Moulines and Remi Munos and Alexey Naumov \nand Mark Rowland and Michal Valko and Pierre Menard},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=gvwDosudtyA}\n}\n\n
\n
\n\n\n\n
\n
\n\n \n \n Tiapkin, D.; Belomestny, D.; Moulines, E.; Naumov, A.; Samsonov, S.; Tang, Y.; Valko, M.; and Menard, P.\n\n\n \n \n \n \n \n From Dirichlet to Rubin: Optimistic Exploration in RL without Bonuses.\n \n \n \n \n\n\n \n\n\n\n In Chaudhuri, K.; Jegelka, S.; Song, L.; Szepesvari, C.; Niu, G.; and Sabato, S., editor(s),
Proceedings of the 39th International Conference on Machine Learning, volume 162, of
Proceedings of Machine Learning Research, pages 21380–21431, 17–23 Jul 2022. PMLR\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@InProceedings{pmlr-v162-tiapkin22a,\n title = \t {From {D}irichlet to Rubin: Optimistic Exploration in {RL} without Bonuses},\n author = {Tiapkin, Daniil and Belomestny, Denis and Moulines, Eric and Naumov, Alexey and Samsonov, Sergey and Tang, Yunhao and Valko, Michal and Menard, Pierre},\n booktitle = \t {Proceedings of the 39th International Conference on Machine Learning},\n pages = \t {21380--21431},\n year = \t {2022},\n editor = \t {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan},\n volume = \t {162},\n series = \t {Proceedings of Machine Learning Research},\n month = \t {17--23 Jul},\n publisher = {PMLR},\n pdf = \t {https://proceedings.mlr.press/v162/tiapkin22a/tiapkin22a.pdf},\n url = \t {https://proceedings.mlr.press/v162/tiapkin22a.html},\n abstract = \t {We propose the Bayes-UCBVI algorithm for reinforcement learning in tabular, stage-dependent, episodic Markov decision process: a natural extension of the Bayes-UCB algorithm by Kaufmann et al. 2012 for multi-armed bandits. Our method uses the quantile of a Q-value function posterior as upper confidence bound on the optimal Q-value function. For Bayes-UCBVI, we prove a regret bound of order $\\widetilde{\\mathcal{O}}(\\sqrt{H^3SAT})$ where $H$ is the length of one episode, $S$ is the number of states, $A$ the number of actions, $T$ the number of episodes, that matches the lower-bound of $\\Omega(\\sqrt{H^3SAT})$ up to poly-$\\log$ terms in $H,S,A,T$ for a large enough $T$. To the best of our knowledge, this is the first algorithm that obtains an optimal dependence on the horizon $H$ (and $S$) <em>without the need of an involved Bernstein-like bonus or noise.</em> Crucial to our analysis is a new fine-grained anti-concentration bound for a weighted Dirichlet sum that can be of independent interest. We then explain how Bayes-UCBVI can be easily extended beyond the tabular setting, exhibiting a strong link between our algorithm and Bayesian bootstrap (Rubin,1981).}\n}\n\n
\n
\n\n\n
\n We propose the Bayes-UCBVI algorithm for reinforcement learning in tabular, stage-dependent, episodic Markov decision process: a natural extension of the Bayes-UCB algorithm by Kaufmann et al. 2012 for multi-armed bandits. Our method uses the quantile of a Q-value function posterior as upper confidence bound on the optimal Q-value function. For Bayes-UCBVI, we prove a regret bound of order $\\widetilde{\\mathcal{O}}(\\sqrt{H^3SAT})$ where $H$ is the length of one episode, $S$ is the number of states, $A$ the number of actions, $T$ the number of episodes, that matches the lower-bound of $Ω(\\sqrt{H^3SAT})$ up to poly-$łog$ terms in $H,S,A,T$ for a large enough $T$. To the best of our knowledge, this is the first algorithm that obtains an optimal dependence on the horizon $H$ (and $S$) without the need of an involved Bernstein-like bonus or noise. Crucial to our analysis is a new fine-grained anti-concentration bound for a weighted Dirichlet sum that can be of independent interest. We then explain how Bayes-UCBVI can be easily extended beyond the tabular setting, exhibiting a strong link between our algorithm and Bayesian bootstrap (Rubin,1981).\n
\n\n\n
\n
\n\n \n \n Belomestny, D.; Hübner, T.; and Krätschmer, V.\n\n\n \n \n \n \n \n Solving optimal stopping problems under model uncertainty via empirical dual optimisation.\n \n \n \n \n\n\n \n\n\n\n
Finance Stoch., 26(3): 461–503. 2022.\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@article {MR4447251,\n AUTHOR = {Belomestny, Denis and H\\"{u}bner, Tobias and Kr\\"{a}tschmer, Volker},\n TITLE = {Solving optimal stopping problems under model uncertainty via\n empirical dual optimisation},\n JOURNAL = {Finance Stoch.},\n FJOURNAL = {Finance and Stochastics},\n VOLUME = {26},\n YEAR = {2022},\n NUMBER = {3},\n PAGES = {461--503},\n ISSN = {0949-2984},\n MRCLASS = {60G40 (60G17 90C47 91G20)},\n MRNUMBER = {4447251},\n DOI = {10.1007/s00780-022-00480-z},\n URL = {https://doi.org/10.1007/s00780-022-00480-z},\n}\n\t\n\n
\n
\n\n\n\n
\n
\n\n \n \n Gauer, J.; Nagathil, A.; Eckel, K.; Belomestny, D.; and Martin, R.\n\n\n \n \n \n \n \n A versatile deep-neural-network-based music preprocessing and remixing scheme for cochlear implant listeners.\n \n \n \n \n\n\n \n\n\n\n
Journal of the Acoustical Society of America, 151(5): 2975 – 2986. 2022.\n
Q1 quartile\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@ARTICLE{Gauer20222975,\n\tauthor = {Gauer, Johannes and Nagathil, Anil and Eckel, Kai and Belomestny, Denis and Martin, Rainer},\n\ttitle = {A versatile deep-neural-network-based music preprocessing and remixing scheme for cochlear implant listeners},\n\tyear = {2022},\n\tjournal = {Journal of the Acoustical Society of America},\n\tvolume = {151},\n\tnumber = {5},\n\tpages = {2975 – 2986},\n\tdoi = {10.1121/10.0010371},\n\turl = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85130096784&doi=10.1121%2f10.0010371&partnerID=40&md5=216e0907dbaad8513f25b1c64d1f1c31},\n\tpublication_stage = {Final},\n\tnote={ Q1 quartile}\n}\n\n\n
\n
\n\n\n\n
\n
\n\n \n \n Belomestny, D.; Iosipoi, L.; Paris, Q.; and Zhivotovskiy, N.\n\n\n \n \n \n \n \n Empirical variance minimization with applications in variance reduction and optimal control.\n \n \n \n \n\n\n \n\n\n\n
Bernoulli, 28(2): 1382–1407. 2022.\n
Q1 quartile\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n \n \n 3 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@article {MR4388942,\n AUTHOR = {Belomestny, Denis and Iosipoi, Leonid and Paris, Quentin and\n Zhivotovskiy, Nikita},\n TITLE = {Empirical variance minimization with applications in variance\n reduction and optimal control},\n JOURNAL = {Bernoulli},\n FJOURNAL = {Bernoulli. Official Journal of the Bernoulli Society for\n Mathematical Statistics and Probability},\n VOLUME = {28},\n YEAR = {2022},\n NUMBER = {2},\n PAGES = {1382--1407},\n ISSN = {1350-7265},\n MRCLASS = {65C20 (65K10 90C25 90C48)},\n MRNUMBER = {4388942},\n DOI = {10.3150/21-bej1392},\n URL = {https://doi.org/10.3150/21-bej1392},\n note={ Q1 quartile}\n}\n\n\n\n\n\t\t\n
\n
\n\n\n\n