Optimal rates for zero-order convex optimization: the power of two function evaluations. Duchi, J. C.; Jordan, M. I.; Wainwright, M. J.; and Wibisono, A. August, 2014.
Paper abstract bibtex We consider derivative-free algorithms for stochastic and non-stochastic convex optimization problems that use only function values rather than gradients. Focusing on non-asymptotic bounds on convergence rates, we show that if pairs of function values are available, algorithms for \$d\$-dimensional optimization that use gradient estimates based on random perturbations suffer a factor of at most \$\sqrt{d}\$ in convergence rate over traditional stochastic gradient methods. We establish such results for both smooth and non-smooth cases, sharpening previous analyses that suggested a worse dimension dependence, and extend our results to the case of multiple (\$m \ge 2\$) evaluations. We complement our algorithmic development with information-theoretic lower bounds on the minimax convergence rate of such problems, establishing the sharpness of our achievable results up to constant (sometimes logarithmic) factors.
@misc{duchi2014optimal,
abstract = {We consider derivative-free algorithms for stochastic and non-stochastic
convex optimization problems that use only function values rather than
gradients. Focusing on non-asymptotic bounds on convergence rates, we show that
if pairs of function values are available, algorithms for \$d\$-dimensional
optimization that use gradient estimates based on random perturbations suffer a
factor of at most \$\sqrt{d}\$ in convergence rate over traditional stochastic
gradient methods. We establish such results for both smooth and non-smooth
cases, sharpening previous analyses that suggested a worse dimension
dependence, and extend our results to the case of multiple (\$m \ge 2\$)
evaluations. We complement our algorithmic development with
information-theoretic lower bounds on the minimax convergence rate of such
problems, establishing the sharpness of our achievable results up to constant
(sometimes logarithmic) factors.},
added-at = {2018-12-07T09:10:16.000+0100},
archiveprefix = {arXiv},
author = {Duchi, John C. and Jordan, Michael I. and Wainwright, Martin J. and Wibisono, Andre},
biburl = {https://www.bibsonomy.org/bibtex/28b0e71de07069134b4bf4c3f3545554d/jpvaldes},
citeulike-article-id = {12834238},
citeulike-linkout-0 = {http://arxiv.org/abs/1312.2139},
citeulike-linkout-1 = {http://arxiv.org/pdf/1312.2139},
day = 20,
eprint = {1312.2139},
interhash = {670afef60e4881e0959d86fd8719ae01},
intrahash = {8b0e71de07069134b4bf4c3f3545554d},
keywords = {optimization derivative_free},
month = aug,
posted-at = {2017-04-04 15:49:53},
priority = {2},
timestamp = {2018-12-07T09:17:38.000+0100},
title = {Optimal rates for zero-order convex optimization: the power of two function evaluations},
url = {http://arxiv.org/abs/1312.2139},
year = 2014
}