Automatic Differentiation of Programs with Discrete Randomness. Arya, G., Schauer, M., Schäfer, F., & Rackauckas, C. January, 2023. arXiv:2210.08572 [cs, math]
Paper doi abstract bibtex Automatic differentiation (AD), a technique for constructing new programs which compute the derivative of an original program, has become ubiquitous throughout scientific computing and deep learning due to the improved performance afforded by gradient-based optimization. However, AD systems have been restricted to the subset of programs that have a continuous dependence on parameters. Programs that have discrete stochastic behaviors governed by distribution parameters, such as flipping a coin with probability $p$ of being heads, pose a challenge to these systems because the connection between the result (heads vs tails) and the parameters ($p$) is fundamentally discrete. In this paper we develop a new reparameterization-based methodology that allows for generating programs whose expectation is the derivative of the expectation of the original program. We showcase how this method gives an unbiased and low-variance estimator which is as automated as traditional AD mechanisms. We demonstrate unbiased forward-mode AD of discrete-time Markov chains, agent-based models such as Conway's Game of Life, and unbiased reverse-mode AD of a particle filter. Our code package is available at https://github.com/gaurav-arya/StochasticAD.jl.
@misc{arya2023,
title = {Automatic {Differentiation} of {Programs} with {Discrete} {Randomness}},
url = {http://arxiv.org/abs/2210.08572},
doi = {10.48550/arXiv.2210.08572},
abstract = {Automatic differentiation (AD), a technique for constructing new programs which compute the derivative of an original program, has become ubiquitous throughout scientific computing and deep learning due to the improved performance afforded by gradient-based optimization. However, AD systems have been restricted to the subset of programs that have a continuous dependence on parameters. Programs that have discrete stochastic behaviors governed by distribution parameters, such as flipping a coin with probability \$p\$ of being heads, pose a challenge to these systems because the connection between the result (heads vs tails) and the parameters (\$p\$) is fundamentally discrete. In this paper we develop a new reparameterization-based methodology that allows for generating programs whose expectation is the derivative of the expectation of the original program. We showcase how this method gives an unbiased and low-variance estimator which is as automated as traditional AD mechanisms. We demonstrate unbiased forward-mode AD of discrete-time Markov chains, agent-based models such as Conway's Game of Life, and unbiased reverse-mode AD of a particle filter. Our code package is available at https://github.com/gaurav-arya/StochasticAD.jl.},
urldate = {2023-08-24},
publisher = {arXiv},
author = {Arya, Gaurav and Schauer, Moritz and Schäfer, Frank and Rackauckas, Chris},
month = jan,
year = {2023},
note = {arXiv:2210.08572 [cs, math]},
keywords = {Computer Science - Machine Learning, Mathematics - Numerical Analysis, Computer Science - Mathematical Software, Mathematics - Probability, neuroblox: fancy Julia methods},
annote = {Comment: In Proceedings of NeurIPS 2022},
file = {arXiv Fulltext PDF:/Users/lcneuro/Zotero/storage/3FDE4SZY/Arya et al. - 2023 - Automatic Differentiation of Programs with Discret.pdf:application/pdf},
}
Downloads: 0
{"_id":"NaaDrPCa4A4wsCqBb","bibbaseid":"arya-schauer-schfer-rackauckas-automaticdifferentiationofprogramswithdiscreterandomness-2023","author_short":["Arya, G.","Schauer, M.","Schäfer, F.","Rackauckas, C."],"bibdata":{"bibtype":"misc","type":"misc","title":"Automatic Differentiation of Programs with Discrete Randomness","url":"http://arxiv.org/abs/2210.08572","doi":"10.48550/arXiv.2210.08572","abstract":"Automatic differentiation (AD), a technique for constructing new programs which compute the derivative of an original program, has become ubiquitous throughout scientific computing and deep learning due to the improved performance afforded by gradient-based optimization. However, AD systems have been restricted to the subset of programs that have a continuous dependence on parameters. Programs that have discrete stochastic behaviors governed by distribution parameters, such as flipping a coin with probability $p$ of being heads, pose a challenge to these systems because the connection between the result (heads vs tails) and the parameters ($p$) is fundamentally discrete. In this paper we develop a new reparameterization-based methodology that allows for generating programs whose expectation is the derivative of the expectation of the original program. We showcase how this method gives an unbiased and low-variance estimator which is as automated as traditional AD mechanisms. We demonstrate unbiased forward-mode AD of discrete-time Markov chains, agent-based models such as Conway's Game of Life, and unbiased reverse-mode AD of a particle filter. Our code package is available at https://github.com/gaurav-arya/StochasticAD.jl.","urldate":"2023-08-24","publisher":"arXiv","author":[{"propositions":[],"lastnames":["Arya"],"firstnames":["Gaurav"],"suffixes":[]},{"propositions":[],"lastnames":["Schauer"],"firstnames":["Moritz"],"suffixes":[]},{"propositions":[],"lastnames":["Schäfer"],"firstnames":["Frank"],"suffixes":[]},{"propositions":[],"lastnames":["Rackauckas"],"firstnames":["Chris"],"suffixes":[]}],"month":"January","year":"2023","note":"arXiv:2210.08572 [cs, math]","keywords":"Computer Science - Machine Learning, Mathematics - Numerical Analysis, Computer Science - Mathematical Software, Mathematics - Probability, neuroblox: fancy Julia methods","annote":"Comment: In Proceedings of NeurIPS 2022","file":"arXiv Fulltext PDF:/Users/lcneuro/Zotero/storage/3FDE4SZY/Arya et al. - 2023 - Automatic Differentiation of Programs with Discret.pdf:application/pdf","bibtex":"@misc{arya2023,\n\ttitle = {Automatic {Differentiation} of {Programs} with {Discrete} {Randomness}},\n\turl = {http://arxiv.org/abs/2210.08572},\n\tdoi = {10.48550/arXiv.2210.08572},\n\tabstract = {Automatic differentiation (AD), a technique for constructing new programs which compute the derivative of an original program, has become ubiquitous throughout scientific computing and deep learning due to the improved performance afforded by gradient-based optimization. However, AD systems have been restricted to the subset of programs that have a continuous dependence on parameters. Programs that have discrete stochastic behaviors governed by distribution parameters, such as flipping a coin with probability \\$p\\$ of being heads, pose a challenge to these systems because the connection between the result (heads vs tails) and the parameters (\\$p\\$) is fundamentally discrete. In this paper we develop a new reparameterization-based methodology that allows for generating programs whose expectation is the derivative of the expectation of the original program. We showcase how this method gives an unbiased and low-variance estimator which is as automated as traditional AD mechanisms. We demonstrate unbiased forward-mode AD of discrete-time Markov chains, agent-based models such as Conway's Game of Life, and unbiased reverse-mode AD of a particle filter. Our code package is available at https://github.com/gaurav-arya/StochasticAD.jl.},\n\turldate = {2023-08-24},\n\tpublisher = {arXiv},\n\tauthor = {Arya, Gaurav and Schauer, Moritz and Schäfer, Frank and Rackauckas, Chris},\n\tmonth = jan,\n\tyear = {2023},\n\tnote = {arXiv:2210.08572 [cs, math]},\n\tkeywords = {Computer Science - Machine Learning, Mathematics - Numerical Analysis, Computer Science - Mathematical Software, Mathematics - Probability, neuroblox: fancy Julia methods},\n\tannote = {Comment: In Proceedings of NeurIPS 2022},\n\tfile = {arXiv Fulltext PDF:/Users/lcneuro/Zotero/storage/3FDE4SZY/Arya et al. - 2023 - Automatic Differentiation of Programs with Discret.pdf:application/pdf},\n}\n\n","author_short":["Arya, G.","Schauer, M.","Schäfer, F.","Rackauckas, C."],"bibbaseid":"arya-schauer-schfer-rackauckas-automaticdifferentiationofprogramswithdiscreterandomness-2023","role":"author","urls":{"Paper":"http://arxiv.org/abs/2210.08572"},"keyword":["Computer Science - Machine Learning","Mathematics - Numerical Analysis","Computer Science - Mathematical Software","Mathematics - Probability","neuroblox: fancy Julia methods"],"metadata":{"authorlinks":{}}},"bibtype":"misc","biburl":"https://bibbase.org/f/8yBxW5neHauDryu9w/LCNeuro Site.bib","dataSources":["8vRkunHPvxf9E4NkX","ZHHpjMFTPYP8NqJif"],"keywords":["computer science - machine learning","mathematics - numerical analysis","computer science - mathematical software","mathematics - probability","neuroblox: fancy julia methods"],"search_terms":["automatic","differentiation","programs","discrete","randomness","arya","schauer","schäfer","rackauckas"],"title":"Automatic Differentiation of Programs with Discrete Randomness","year":2023}