Complete Policy Regret Bounds for Tallying Bandits. Malik, D., Li, Y., & Singh, A. In Conference on Learning Theory, pages 5146–5174, 2022. PMLR.
bibtex   
@inproceedings{malik22complete,
  title={Complete Policy Regret Bounds for Tallying Bandits},
  author={Malik, Dhruv and Li, Yuanzhi and Singh, Aarti},
  booktitle={Conference on Learning Theory},
  pages={5146--5174},
  year={2022},
  organization={PMLR}
}

Downloads: 0