<script src="https://bibbase.org/show?bib=https://raw.githubusercontent.com/plai-group/bibliography/master/group_publications.bib&theme=dividers&group0=year&group1=type&folding=1&filter=support:D3M&jsonp=1"></script>
<?php
$contents = file_get_contents("https://bibbase.org/show?bib=https://raw.githubusercontent.com/plai-group/bibliography/master/group_publications.bib&theme=dividers&group0=year&group1=type&folding=1&filter=support:D3M");
print_r($contents);
?>
<iframe src="https://bibbase.org/show?bib=https://raw.githubusercontent.com/plai-group/bibliography/master/group_publications.bib&theme=dividers&group0=year&group1=type&folding=1&filter=support:D3M"></iframe>
For more details see the documention.
To the site owner:
Action required! Mendeley is changing its API. In order to keep using Mendeley with BibBase past April 14th, you need to:
@article{WOO-22, AUTHOR={Wood, Frank and Warrington, Andrew and Naderiparizi, Saeid and Weilbach, Christian and Masrani, Vaden and Harvey, William and Åšcibior, Adam and Beronov, Boyan and Grefenstette, John and Campbell, Duncan and Nasseri, S. Ali}, TITLE={Planning as Inference in Epidemiological Dynamics Models}, JOURNAL={Frontiers in Artificial Intelligence}, VOLUME={4}, YEAR={2022}, URL_Paper={https://www.frontiersin.org/article/10.3389/frai.2021.550603}, url_ArXiv={https://arxiv.org/abs/2003.13221}, DOI={10.3389/frai.2021.550603}, ISSN={2624-8212}, support = {D3M,COVID,ETALUMIS}, ABSTRACT={In this work we demonstrate how to automate parts of the infectious disease-control policy-making process via performing inference in existing epidemiological models. The kind of inference tasks undertaken include computing the posterior distribution over controllable, via direct policy-making choices, simulation model parameters that give rise to acceptable disease progression outcomes. Among other things, we illustrate the use of a probabilistic programming language that automates inference in existing simulators. Neither the full capabilities of this tool for automating inference nor its utility for planning is widely disseminated at the current time. Timely gains in understanding about how such simulation-based models and inference automation tools applied in support of policy-making could lead to less economically damaging policy prescriptions, particularly during the current COVID-19 pandemic.} }
@InProceedings{Bateni_2022_WACV, author = {Bateni, Peyman and Barber, Jarred and van de Meent, Jan-Willem and Wood, Frank}, title = {Enhancing Few-Shot Image Classification With Unlabelled Examples}, booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)}, month = {January}, year = {2022}, pages = {2796-2805}, url_ArXiv = {https://arxiv.org/abs/2006.12245}, url_Paper = {https://ieeexplore.ieee.org/document/9706775}, support = {D3M,LwLL}, abstract={We develop a transductive meta-learning method that uses unlabelled instances to improve few-shot image classification performance. Our approach combines a regularized Mahalanobis-distance-based soft k-means clustering procedure with a modified state of the art neural adaptive feature extractor to achieve improved test-time classification accuracy using unlabelled data. We evaluate our method on transductive few-shot learning tasks, in which the goal is to jointly predict labels for query (test) examples given a set of support (training) examples. We achieve state of the art performance on the Meta-Dataset, mini-ImageNet and tiered-ImageNet benchmarks.} }
@InProceedings{9533449, author={Munk, Andreas and Harvey, William and Wood, Frank}, booktitle={2021 International Joint Conference on Neural Networks (IJCNN)}, title={Assisting the Adversary to Improve GAN Training}, year={2021}, pages={1-8}, abstract={Some of the most popular methods for improving the stability and performance of GANs involve constraining or regularizing the discriminator. In this paper we consider a largely overlooked regularization technique which we refer to as the Adversary's Assistant (AdvAs). We motivate this using a different perspective to that of prior work. Specifically, we consider a common mismatch between theoretical analysis and practice: analysis often assumes that the discriminator reaches its optimum on each iteration. In practice, this is essentially never true, often leading to poor gradient estimates for the generator. To address this, AdvAs is a penalty imposed on the generator based on the norm of the gradients used to train the discriminator. This encourages the generator to move towards points where the discriminator is optimal. We demonstrate the effect of applying AdvAs to several GAN objectives, datasets and network architectures. The results indicate a reduction in the mismatch between theory and practice and that AdvAs can lead to improvement of GAN training, as measured by FID scores.}, doi={10.1109/IJCNN52387.2021.9533449}, ISSN={2161-4407}, month={July}, url_ArXiv = {https://arxiv.org/abs/2010.01274}, url_Paper = {https://ieeexplore.ieee.org/document/9533449}, support = {D3M,ETALUMIS} }
@InProceedings{BER-21, title={Sequential core-set Monte Carlo}, author={Beronov, Boyan and Weilbach, Christian and Wood, Frank and Campbell, Trevor}, booktitle={Proceedings of the Thirty-Seventh Conference on Uncertainty in Artificial Intelligence}, pages={2165--2175}, year={2021}, editor={de Campos, Cassio and Maathuis, Marloes H.}, volume={161}, series={Proceedings of Machine Learning Research}, month={27--30 Jul}, publisher={PMLR}, pdf={https://proceedings.mlr.press/v161/beronov21a/beronov21a.pdf}, url={https://proceedings.mlr.press/v161/beronov21a.html}, url_Presentation={https://github.com/plai-group/bibliography/raw/master/presentations_posters/UAI2021_BER_presentation.pdf}, url_Poster={https://github.com/plai-group/bibliography/raw/master/presentations_posters/UAI2021_BER_poster.pdf}, support={D3M}, abstract={Sequential Monte Carlo (SMC) is a general-purpose methodology for recursive Bayesian inference, and is widely used in state space modeling and probabilistic programming. Its resample-move variant reduces the variance of posterior estimates by interleaving Markov chain Monte Carlo (MCMC) steps for particle “rejuvenation”; but this requires accessing all past observations and leads to linearly growing memory size and quadratic computation cost. Under the assumption of exchangeability, we introduce sequential core-set Monte Carlo (SCMC), which achieves constant space and linear time by rejuvenating based on sparse, weighted subsets of past data. In contrast to earlier approaches, which uniformly subsample or throw away observations, SCMC uses a novel online version of a state-of-the-art Bayesian core-set algorithm to incrementally construct a nonparametric, data- and model-dependent variational representation of the unnormalized target density. Experiments demonstrate significantly reduced approximation errors at negligible additional cost.} }
@unpublished{harvey2021image, title={Image Completion via Inference in Deep Generative Models}, author={Harvey, William and Naderiparizi, Saeid and Wood, Frank}, journal={arXiv preprint arXiv:2102.12037}, year={2021}, url_ArXiv = {https://arxiv.org/abs/2102.12037}, eprint={2102.12037}, archivePrefix={arXiv}, support = {D3M}, abtstract={We consider image completion from the perspective of amortized inference in an image generative model. We leverage recent state of the art variational auto-encoder architectures that have been shown to produce photo-realistic natural images at non-trivial resolutions. Through amortized inference in such a model we can train neural artifacts that produce diverse, realistic image completions even when the vast majority of an image is missing. We demonstrate superior sample quality and diversity compared to prior art on the CIFAR-10 and FFHQ-256 datasets. We conclude by describing and demonstrating an application that requires an in-painting model with the capabilities ours exhibits: the use of Bayesian optimal experimental design to select the most informative sequence of small field of view x-rays for chest pathology detection.} }
@InProceedings{nguyen2020gaussian, title={Gaussian Process Bandit Optimization of the Thermodynamic Variational Objective}, author={Nguyen, Vu and Masrani, Vaden and Brekelmans, Rob and Osborne, Michael and Wood, Frank}, series={Advances in Neural Information Processing Systems (NeurIPS)}, year={2020}, url_Link = {https://proceedings.neurips.cc/paper/2020/hash/3f2dff7862a70f97a59a1fa02c3ec110-Abstract.html}, url_Paper = {https://proceedings.neurips.cc/paper/2020/file/3f2dff7862a70f97a59a1fa02c3ec110-Paper.pdf}, url_ArXiv={https://arxiv.org/abs/2010.15750}, support = {D3M}, abstract={Achieving the full promise of the Thermodynamic Variational Objective (TVO), a recently proposed variational lower bound on the log evidence involving a one-dimensional Riemann integral approximation, requires choosing a "schedule" of sorted discretization points. This paper introduces a bespoke Gaussian process bandit optimization method for automatically choosing these points. Our approach not only automates their one-time selection, but also dynamically adapts their positions over the course of optimization, leading to improved model learning and inference. We provide theoretical guarantees that our bandit optimization converges to the regret-minimizing choice of integration points. Empirical validation of our algorithm is provided in terms of improved learning and inference in Variational Autoencoders and Sigmoid Belief Networks.} }
@InProceedings{Le-20, title = {Revisiting Reweighted Wake-Sleep for Models with Stochastic Control Flow}, author = {Le, Tuan Anh and Kosiorek, Adam R. and Siddharth, N. and Teh, Yee Whye and Wood, Frank}, pages = {1039--1049}, year = {2020}, editor = {Ryan P. Adams and Vibhav Gogate}, volume = {115}, series = {Proceedings of the 35th conference on Uncertainty in Artificial Intelligence (UAI)}, address = {Tel Aviv, Israel}, month = {22--25 Jul}, publisher = {PMLR}, url_Link = {http://proceedings.mlr.press/v115/le20a.html}, url_Paper = {http://proceedings.mlr.press/v115/le20a/le20a.pdf}, url_ArXiv={https://arxiv.org/abs/1805.10469}, support = {D3M}, abstract = {Stochastic control-flow models (SCFMs) are a class of generative models that involve branching on choices from discrete random variables. Amortized gradient-based learning of SCFMs is challenging as most approaches targeting discrete variables rely on their continuous relaxations—which can be intractable in SCFMs, as branching on relaxations requires evaluating all (exponentially many) branching paths. Tractable alternatives mainly combine REINFORCE with complex control-variate schemes to improve the variance of naive estimators. Here, we revisit the reweighted wake-sleep (RWS) [5] algorithm, and through extensive evaluations, show that it outperforms current state-of-the-art methods in learning SCFMs. Further, in contrast to the importance weighted autoencoder, we observe that RWS learns better models and inference networks with increasing numbers of particles. Our results suggest that RWS is a competitive, often preferable, alternative for learning SCFMs.} }
@inproceedings{TEN-20, title={Semi-supervised Sequential Generative Models}, author={Teng, Michael and Le, Tuan Anh and Scibior, Adam and Wood, Frank}, booktitle={Conference on Uncertainty in Artificial Intelligence (UAI)}, eid = {arXiv:2007.00155}, archivePrefix = {arXiv}, eprint = {2007.00155}, url_Link = {http://www.auai.org/~w-auai/uai2020/accepted.php}, url_Paper={http://www.auai.org/uai2020/proceedings/272_main_paper.pdf}, url_ArXiv = {https://arxiv.org/abs/2007.00155}, support = {D3M}, year={2020} }
@inproceedings{WEI-20, title={Structured Conditional Continuous Normalizing Flows for Efficient Amortized Inference in Graphical Models}, author={Weilbach, Christian and Beronov, Boyan and Wood, Frank and Harvey, William}, booktitle={Proceedings of the Twenty Third International Conference on Artificial Intelligence and Statistics (AISTATS)}, pages={4441--4451}, year={2020}, url_Link={http://proceedings.mlr.press/v108/weilbach20a.html}, url_Paper={http://proceedings.mlr.press/v108/weilbach20a/weilbach20a.pdf}, url_Poster={https://github.com/plai-group/bibliography/blob/master/presentations_posters/PROBPROG2020_WEI.pdf}, support = {D3M}, bibbase_note = {PMLR 108:4441-4451}, abstract = {We exploit minimally faithful inversion of graphical model structures to specify sparse continuous normalizing flows (CNFs) for amortized inference. We find that the sparsity of this factorization can be exploited to reduce the numbers of parameters in the neural network, adaptive integration steps of the flow, and consequently FLOPs at both training and inference time without decreasing performance in comparison to unconstrained flows. By expressing the structure inversion as a compilation pass in a probabilistic programming language, we are able to apply it in a novel way to models as complex as convolutional neural networks. Furthermore, we extend the training objective for CNFs in the context of inference amortization to the symmetric Kullback-Leibler divergence, and demonstrate its theoretical and practical advantages.} }
@inproceedings{BRE-20, author = {{Brekelmans}, Rob and {Masrani}, Vaden and {Wood}, Frank and {Ver Steeg}, Greg and {Galstyan}, Aram}, title = {All in the Exponential Family: Bregman Duality in Thermodynamic Variational Inference}, booktitle={Thirty-seventh International Conference on Machine Learning (ICML 2020)}, keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, year = 2020, month = jul, eid = {arXiv:2007.00642}, archivePrefix = {arXiv}, eprint = {2007.00642}, url_Link = {https://proceedings.icml.cc/book/2020/hash/12311d05c9aa67765703984239511212}, url_Paper={https://proceedings.icml.cc/static/paper_files/icml/2020/2826-Paper.pdf}, url_ArXiv={https://arxiv.org/abs/2007.00642}, support = {D3M}, abstract={The recently proposed Thermodynamic Variational Objective (TVO) leverages thermodynamic integration to provide a family of variational inference objectives, which both tighten and generalize the ubiquitous Evidence Lower Bound (ELBO). However, the tightness of TVO bounds was not previously known, an expensive grid search was used to choose a "schedule" of intermediate distributions, and model learning suffered with ostensibly tighter bounds. In this work, we propose an exponential family interpretation of the geometric mixture curve underlying the TVO and various path sampling methods, which allows us to characterize the gap in TVO likelihood bounds as a sum of KL divergences. We propose to choose intermediate distributions using equal spacing in the moment parameters of our exponential family, which matches grid search performance and allows the schedule to adaptively update over the course of training. Finally, we derive a doubly reparameterized gradient estimator which improves model learning and allows the TVO to benefit from more refined bounds. To further contextualize our contributions, we provide a unified framework for understanding thermodynamic integration and the TVO using Taylor series remainders.} } %@unpublished{WOO-20, % author = {{Wood}, Frank and {Warrington}, Andrew and {Naderiparizi}, Saeid and {Weilbach}, Christian and {Masrani}, Vaden and {Harvey}, William and {Scibior}, Adam and {Beronov}, Boyan and {Nasseri}, Ali}, % title = {Planning as Inference in Epidemiological Models}, % journal = {arXiv e-prints}, % keywords = {Quantitative Biology - Populations and Evolution, Computer Science - Machine Learning, Statistics - Machine Learning}, % year = {2020}, % eid = {arXiv:2003.13221}, % archivePrefix = {arXiv}, % eprint = {2003.13221}, % support = {D3M,COVID,ETALUMIS}, % url_ArXiv={https://arxiv.org/abs/2003.13221}, % url_Paper={https://arxiv.org/pdf/2003.13221.pdf}, % abstract={In this work we demonstrate how existing software tools can be used to automate parts of infectious disease-control policy-making via performing inference in existing epidemiological dynamics models. The kind of inference tasks undertaken include computing, for planning purposes, the posterior distribution over putatively controllable, via direct policy-making choices, simulation model parameters that give rise to acceptable disease progression outcomes. Neither the full capabilities of such inference automation software tools nor their utility for planning is widely disseminated at the current time. Timely gains in understanding about these tools and how they can be used may lead to more fine-grained and less economically damaging policy prescriptions, particularly during the current COVID-19 pandemic.} %}
@inproceedings{WAR-20, title={Coping With Simulators That Don’t Always Return}, author={Warrington, A and Naderiparizi, S and Wood, F}, booktitle={The 23rd International Conference on Artificial Intelligence and Statistics (AISTATS)}, archiveprefix = {arXiv}, eprint = {1906.05462}, year={2020}, url_Link = {http://proceedings.mlr.press/v108/warrington20a.html}, url_Paper = {http://proceedings.mlr.press/v108/warrington20a/warrington20a.pdf}, url_Poster = {https://github.com/plai-group/bibliography/blob/master/presentations_posters/WAR-20.pdf}, url_ArXiv = {https://arxiv.org/abs/2003.12908}, keywords = {simulators, smc, autoregressive flow}, support = {D3M,ETALUMIS}, bibbase_note={PMLR 108:1748-1758}, abstract = {Deterministic models are approximations of reality that are easy to interpret and often easier to build than stochastic alternatives. Unfortunately, as nature is capricious, observational data can never be fully explained by deterministic models in practice. Observation and process noise need to be added to adapt deterministic models to behave stochastically, such that they are capable of explaining and extrapolating from noisy data. We investigate and address computational inefficiencies that arise from adding process noise to deterministic simulators that fail to return for certain inputs; a property we describe as "brittle." We show how to train a conditional normalizing flow to propose perturbations such that the simulator succeeds with high probability, increasing computational efficiency.} }
@inproceedings{HAR-20, title={Attention for Inference Compilation}, author={Harvey, W and Munk, A and Baydin, AG and Bergholm, A and Wood, F}, booktitle={The second International Conference on Probabilistic Programming (PROBPROG)}, year={2020}, archiveprefix = {arXiv}, eprint = {1910.11961}, support = {D3M,LwLL}, url_Paper={https://arxiv.org/pdf/1910.11961.pdf}, url_ArXiv={https://arxiv.org/abs/1910.11961}, url_Poster={https://github.com/plai-group/bibliography/blob/master/presentations_posters/PROBPROG2020_HAR.pdf}, abstract = {We present a new approach to automatic amortized inference in universal probabilistic programs which improves performance compared to current methods. Our approach is a variation of inference compilation (IC) which leverages deep neural networks to approximate a posterior distribution over latent variables in a probabilistic program. A challenge with existing IC network architectures is that they can fail to model long-range dependencies between latent variables. To address this, we introduce an attention mechanism that attends to the most salient variables previously sampled in the execution of a probabilistic program. We demonstrate that the addition of attention allows the proposal distributions to better match the true posterior, enhancing inference about latent variables in simulators.}, }
@inproceedings{MUN-20, title={Deep probabilistic surrogate networks for universal simulator approximation}, author={Munk, Andreas and Ścibior, Adam and Baydin, AG and Stewart, A and Fernlund, A and Poursartip, A and Wood, Frank}, booktitle={The second International Conference on Probabilistic Programming (PROBPROG)}, year={2020}, archiveprefix = {arXiv}, eprint = {1910.11950}, support = {D3M,ETALUMIS}, url_Paper={https://arxiv.org/pdf/1910.11950.pdf}, url_ArXiv={https://arxiv.org/abs/1910.11950}, url_Poster={https://github.com/plai-group/bibliography/blob/master/presentations_posters/PROBPROG2020_MUN.pdf}, abstract = {We present a framework for automatically structuring and training fast, approximate, deep neural surrogates of existing stochastic simulators. Unlike traditional approaches to surrogate modeling, our surrogates retain the interpretable structure of the reference simulators. The particular way we achieve this allows us to replace the reference simulator with the surrogate when undertaking amortized inference in the probabilistic programming sense. The fidelity and speed of our surrogates allow for not only faster "forward" stochastic simulation but also for accurate and substantially faster inference. We support these claims via experiments that involve a commercial composite-materials curing simulator. Employing our surrogate modeling technique makes inference an order of magnitude faster, opening up the possibility of doing simulator-based, non-invasive, just-in-time parts quality testing; in this case inferring safety-critical latent internal temperature profiles of composite materials undergoing curing from surface temperature profile measurements.}, }
@inproceedings{BAT-20, author = {{Bateni}, Peyman and {Goyal}, Raghav and {Masrani}, Vaden and {Wood}, Frank and {Sigal}, Leonid}, title = {Improved Few-Shot Visual Classification}, booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)}, keywords = {LwLL, Computer Science - Computer Vision and Pattern Recognition}, year = {2020}, eid = {arXiv:1912.03432}, archivePrefix = {arXiv}, eprint = {1912.03432}, support = {D3M,LwLL}, url_Link = {https://openaccess.thecvf.com/content_CVPR_2020/html/Bateni_Improved_Few-Shot_Visual_Classification_CVPR_2020_paper.html}, url_Paper={http://openaccess.thecvf.com/content_CVPR_2020/papers/Bateni_Improved_Few-Shot_Visual_Classification_CVPR_2020_paper.pdf}, url_ArXiv={https://arxiv.org/abs/1912.03432}, abstract={Few-shot learning is a fundamental task in computer vision that carries the promise of alleviating the need for exhaustively labeled data. Most few-shot learning approaches to date have focused on progressively more complex neural feature extractors and classifier adaptation strategies, as well as the refinement of the task definition itself. In this paper, we explore the hypothesis that a simple class-covariance-based distance metric, namely the Mahalanobis distance, adopted into a state of the art few-shot learning approach (CNAPS) can, in and of itself, lead to a significant performance improvement. We also discover that it is possible to learn adaptive feature extractors that allow useful estimation of the high dimensional feature covariances required by this metric from surprisingly few samples. The result of our work is a new "Simple CNAPS" architecture which has up to 9.2% fewer trainable parameters than CNAPS and performs up to 6.1% better than state of the art on the standard few-shot image classification benchmark dataset.} } %@inproceedings{WAN-19, % title={Safer End-to-End Autonomous Driving via Conditional Imitation Learning and Command Augmentation}, % author={Wang, R and Scibior, A and Wood F}, % booktitle={NeurIPS self-driving car workshop}, % year={2019}, % archiveprefix = {arXiv}, % eprint = {1909.09721}, % support = {D3M}, % url_Paper = {https://arxiv.org/pdf/1909.09721.pdf}, % url_ArXiv={https://arxiv.org/abs/1909.09721}, % abstract={Imitation learning is a promising approach to end-to-end training of autonomous vehicle controllers. Typically the driving process with such approaches is entirely automatic and black-box, although in practice it is desirable to control the vehicle through high-level commands, such as telling it which way to go at an intersection. In existing work this has been accomplished by the application of a branched neural architecture, since directly providing the command as an additional input to the controller often results in the command being ignored. In this work we overcome this limitation by learning a disentangled probabilistic latent variable model that generates the steering commands. We achieve faithful command-conditional generation without using a branched architecture and demonstrate improved stability of the controller, applying only a variational objective without any domain-specific adjustments. On top of that, we extend our model with an additional latent variable and augment the dataset to train a controller that is robust to unsafe commands, such as asking it to turn into a wall. The main contribution of this work is a recipe for building controllable imitation driving agents that improves upon multiple aspects of the current state of the art relating to robustness and interpretability.} %}
@unpublished{yoo2020ensemble, title={Ensemble Squared: A Meta AutoML System}, author={Jason Yoo and Tony Joseph and Dylan Yung and S. Ali Nasseri and Frank Wood}, year={2020}, eprint={2012.05390}, archivePrefix={arXiv}, primaryClass={cs.LG}, url_ArXiv={https://arxiv.org/abs/2012.05390}, url_Paper={https://arxiv.org/pdf/2012.05390.pdf}, support = {D3M}, abstract = {The continuing rise in the number of problems amenable to machine learning solutions, coupled with simultaneous growth in both computing power and variety of machine learning techniques has led to an explosion of interest in automated machine learning (AutoML). This paper presents Ensemble Squared (Ensemble2), a "meta" AutoML system that ensembles at the level of AutoML systems. Ensemble2 exploits the diversity of existing, competing AutoML systems by ensembling the top-performing models simultaneously generated by a set of them. Our work shows that diversity in AutoML systems is sufficient to justify ensembling at the AutoML system level. In demonstrating this, we also establish a new state of the art AutoML result on the OpenML classification challenge.} }
@unpublished{NAD-20a, title={Uncertainty in Neural Processes}, author={Saeid Naderiparizi and Kenny Chiu and Benjamin Bloem-Reddy and Frank Wood}, journal={arXiv preprint arXiv:1906.05462}, year={2020}, eid = {arXiv:2010.03753}, archivePrefix = {arXiv}, eprint = {2010.03753}, url_ArXiv={https://arxiv.org/abs/2010.03753}, url_Paper={https://arxiv.org/pdf/2010.03753.pdf}, support = {D3M,ETALUMIS}, abstract={We explore the effects of architecture and training objective choice on amortized posterior predictive inference in probabilistic conditional generative models. We aim this work to be a counterpoint to a recent trend in the literature that stresses achieving good samples when the amount of conditioning data is large. We instead focus our attention on the case where the amount of conditioning data is small. We highlight specific architecture and objective choices that we find lead to qualitative and quantitative improvement to posterior inference in this low data regime. Specifically we explore the effects of choices of pooling operator and variational family on posterior quality in neural processes. Superior posterior predictive samples drawn from our novel neural process architectures are demonstrated via image completion/in-painting experiments.} }
@inproceedings{WAR-19a, title={Coping With Simulators That Don’t Always Return}, author={Warrington, A and Naderiparizi, S and Wood, F}, booktitle={2nd Symposium on Advances in Approximate Bayesian Inference (AABI)}, year={2019}, url_Link={https://openreview.net/forum?id=SJecKyhEKr¬eId=SJecKyhEKr}, url_Paper={https://openreview.net/pdf?id=SJecKyhEKr}, keywords = {simulators, smc, autoregressive flow}, support = {D3M,ETALUMIS}, abstract = {Deterministic models are approximations of reality that are often easier to build and interpret than stochastic alternatives. Unfortunately, as nature is capricious, observational data can never be fully explained by deterministic models in practice. Observation and process noise need to be added to adapt deterministic models to behave stochastically, such that they are capable of explaining and extrapolating from noisy data. Adding process noise to deterministic simulators can induce a failure in the simulator resulting in no return value for certain inputs -- a property we describe as ``brittle.'' We investigate and address the wasted computation that arises from these failures, and the effect of such failures on downstream inference tasks. We show that performing inference in this space can be viewed as rejection sampling, and train a conditional normalizing flow as a proposal over noise values such that there is a low probability that the simulator crashes, increasing computational efficiency and inference fidelity for a fixed sample budget when used as the proposal in an approximate inference algorithm.} }
@inproceedings{HAR-19, title={Near-Optimal Glimpse Sequences for Improved Hard Attention Neural Network Training}, author={Harvey, William and Teng, Michael and Wood, Frank}, booktitle={NeurIPS Workshop on Bayesian Deep Learning}, year={2019}, support = {D3M,LwLL}, archiveprefix = {arXiv}, eprint = {1906.05462}, url_Paper={http://bayesiandeeplearning.org/2019/papers/38.pdf}, url_ArXiv={https://arxiv.org/abs/1906.05462}, url_Poster={https://github.com/plai-group/bibliography/blob/master/presentations_posters/HAR-19.pdf}, abstract = {We introduce the use of Bayesian optimal experimental design techniques for generating glimpse sequences to use in semi-supervised training of hard attention networks. Hard attention holds the promise of greater energy efficiency and superior inference performance. Employing such networks for image classification usually involves choosing a sequence of glimpse locations from a stochastic policy. As the outputs of observations are typically non-differentiable with respect to their glimpse locations, unsupervised gradient learning of such a policy requires REINFORCE-style updates. Also, the only reward signal is the final classification accuracy. For these reasons hard attention networks, despite their promise, have not achieved the wide adoption that soft attention networks have and, in many practical settings, are difficult to train. We find that our method for semi-supervised training makes it easier and faster to train hard attention networks and correspondingly could make them practical to consider in situations where they were not before.}, }
@inproceedings{WEI-19, title={Efficient Inference Amortization in Graphical Models using Structured Continuous Conditional Normalizing Flows}, author={Weilbach, Christian and Beronov, Boyan and Harvey, William and Wood, Frank}, booktitle={2nd Symposium on Advances in Approximate Bayesian Inference (AABI)}, support = {D3M}, url_Link={https://openreview.net/forum?id=BJlhYknNFS}, url_Paper={https://openreview.net/pdf?id=BJlhYknNFS}, abstract = {We introduce a more efficient neural architecture for amortized inference, which combines continuous and conditional normalizing flows using a principled choice of structure. Our gradient flow derives its sparsity pattern from the minimally faithful inverse of its underlying graphical model. We find that this factorization reduces the necessary numbers both of parameters in the neural network and of adaptive integration steps in the ODE solver. Consequently, the throughput at training time and inference time is increased, without decreasing performance in comparison to unconstrained flows. By expressing the structural inversion and the flow construction as compilation passes of a probabilistic programming language, we demonstrate their applicability to the stochastic inversion of realistic models such as convolutional neural networks (CNN).}, year={2019} }
@inproceedings{CAM-19, title={Sparse Variational Inference: Bayesian Coresets from Scratch}, author={Campbell, Trevor and Beronov, Boyan}, booktitle={Conference on Neural Information Processing Systems (NeurIPS)}, pages={11457--11468}, year={2019}, eid = {arXiv:1906.03329}, archivePrefix = {arXiv}, eprint = {1906.03329}, support = {D3M}, url_Link={http://papers.nips.cc/paper/9322-sparse-variational-inference-bayesian-coresets-from-scratch}, url_Paper={http://papers.nips.cc/paper/9322-sparse-variational-inference-bayesian-coresets-from-scratch.pdf}, url_Poster={https://github.com/plai-group/bibliography/raw/master/presentations_posters/CAM-19.pdf}, bibbase_note={1st prize, Student poster competition, AICan (Annual Meeting, Pan-Canadian AI Strategy, Canadian Institute for Advanced Research). Vancouver, Canada, Dec. 9, 2019}, abstract={The proliferation of automated inference algorithms in Bayesian statistics has provided practitioners newfound access to fast, reproducible data analysis and powerful statistical models. Designing automated methods that are also both computationally scalable and theoretically sound, however, remains a significant challenge. Recent work on Bayesian coresets takes the approach of compressing the dataset before running a standard inference algorithm, providing both scalability and guarantees on posterior approximation error. But the automation of past coreset methods is limited because they depend on the availability of a reasonable coarse posterior approximation, which is difficult to specify in practice. In the present work we remove this requirement by formulating coreset construction as sparsity-constrained variational inference within an exponential family. This perspective leads to a novel construction via greedy optimization, and also provides a unifying information-geometric view of present and past methods. The proposed Riemannian coreset construction algorithm is fully automated, requiring no problem-specific inputs aside from the probabilistic model and dataset. In addition to being significantly easier to use than past methods, experiments demonstrate that past coreset constructions are fundamentally limited by the fixed coarse posterior approximation; in contrast, the proposed algorithm is able to continually improve the coreset, providing state-of-the-art Bayesian dataset summarization with orders-of-magnitude reduction in KL divergence to the exact posterior.} }
@inproceedings{GRA-19, title={Efficient Bayesian Inference for Nested Simulators}, author={Gram-Hansen, B and Schroeder de Witt, C and Zinkov, R and Naderiparizi, S and Scibior, A and Munk, A and Wood, F and Ghadiri, M and Torr, P and Whye Teh, Y and Gunes Baydin, A and Rainforth, T}, booktitle={2nd Symposium on Advances in Approximate Bayesian Inference (AABI)}, year={2019}, support = {D3M}, url_Link={https://openreview.net/forum?id=rJeMcy2EtH}, url_Paper={https://openreview.net/pdf?id=rJeMcy2EtH}, abstact={We introduce two approaches for conducting efficient Bayesian inference in stochastic simulators containing nested stochastic sub-procedures, i.e., internal procedures for which the density cannot be calculated directly such as rejection sampling loops. The resulting class of simulators are used extensively throughout the sciences and can be interpreted as probabilistic generative models. However, drawing inferences from them poses a substantial challenge due to the inability to evaluate even their unnormalised density, preventing the use of many standard inference procedures like Markov Chain Monte Carlo (MCMC). To address this, we introduce inference algorithms based on a two-step approach that first approximates the conditional densities of the individual sub-procedures, before using these approximations to run MCMC methods on the full program. Because the sub-procedures can be dealt with separately and are lower-dimensional than that of the overall problem, this two-step process allows them to be isolated and thus be tractably dealt with, without placing restrictions on the overall dimensionality of the problem. We demonstrate the utility of our approach on a simple, artificially constructed simulator.} }
@inproceedings{MAS-19, title={The Thermodynamic Variational Objective}, author={Masrani, Vaden and Le, Tuan Anh and Wood, Frank}, booktitle={Thirty-third Conference on Neural Information Processing Systems (NeurIPS)}, archiveprefix = {arXiv}, eprint = {1907.00031}, url_Paper={https://arxiv.org/pdf/1907.00031.pdf}, url_ArXiv={https://arxiv.org/abs/1907.00031}, url_Poster={https://github.com/plai-group/bibliography/blob/master/presentations_posters/neurips_tvo_poster.pdf}, support = {D3M}, abstract={We introduce the thermodynamic variational objective (TVO) for learning in both continuous and discrete deep generative models. The TVO arises from a key connection between variational inference and thermodynamic integration that results in a tighter lower bound to the log marginal likelihood than the standard variational variational evidence lower bound (ELBO) while remaining as broadly applicable. We provide a computationally efficient gradient estimator for the TVO that applies to continuous, discrete, and non-reparameterizable distributions and show that the objective functions used in variational inference, variational autoencoders, wake sleep, and inference compilation are all special cases of the TVO. We use the TVO to learn both discrete and continuous deep generative models and empirically demonstrate state of the art model and inference network learning.}, year={2019} }
@inproceedings{BAY-19, title={Etalumis: Bringing Probabilistic Programming to Scientific Simulators at Scale}, author={Baydin, At{\i}l{\i}m G{\"u}ne{\c{s}} and Shao, Lei and Bhimji, Wahid and Heinrich, Lukas and Meadows, Lawrence and Liu, Jialin and Munk, Andreas and Naderiparizi, Saeid and Gram-Hansen, Bradley and Louppe, Gilles and others}, booktitle={the International Conference for High Performance Computing, Networking, Storage and Analysis (SC ’19)}, archiveprefix = {arXiv}, eprint = {1907.03382}, support = {D3M,ETALUMIS}, url_Paper={https://arxiv.org/pdf/1907.03382.pdf}, url_ArXiv={https://arxiv.org/abs/1907.03382}, abstract={Probabilistic programming languages (PPLs) are receiving widespread attention for performing Bayesian inference in complex generative models. However, applications to science remain limited because of the impracticability of rewriting complex scientific simulators in a PPL, the computational cost of inference, and the lack of scalable implementations. To address these, we present a novel PPL framework that couples directly to existing scientific simulators through a cross-platform probabilistic execution protocol and provides Markov chain Monte Carlo (MCMC) and deep-learning-based inference compilation (IC) engines for tractable inference. To guide IC inference, we perform distributed training of a dynamic 3DCNN--LSTM architecture with a PyTorch-MPI-based framework on 1,024 32-core CPU nodes of the Cori supercomputer with a global minibatch size of 128k: achieving a performance of 450 Tflop/s through enhancements to PyTorch. We demonstrate a Large Hadron Collider (LHC) use-case with the C++ Sherpa simulator and achieve the largest-scale posterior inference in a Turing-complete PPL.}, year={2019}, doi={10.1145/3295500.3356180} }
@inproceedings{WAR-19, title={The Virtual Patch Clamp: Imputing C. elegans Membrane Potentials from Calcium Imaging}, author={Warrington, Andrew and Spencer, Arthur and Wood, Frank}, booktitle={NeurIPS 2019 Workshop Neuro AI}, archiveprefix = {arXiv}, eprint = {1907.11075}, support = {D3M}, url_Paper={https://arxiv.org/pdf/1907.11075.pdf}, url_ArXiv={https://arxiv.org/abs/1907.11075}, url_Poster={https://github.com/plai-group/bibliography/blob/master/presentations_posters/WAR-19.pdf}, abstract={We develop a stochastic whole-brain and body simulator of the nematode roundworm Caenorhabditis elegans (C. elegans) and show that it is sufficiently regularizing to allow imputation of latent membrane potentials from partial calcium fluorescence imaging observations. This is the first attempt we know of to "complete the circle," where an anatomically grounded whole-connectome simulator is used to impute a time-varying "brain" state at single-cell fidelity from covariates that are measurable in practice. The sequential Monte Carlo (SMC) method we employ not only enables imputation of said latent states but also presents a strategy for learning simulator parameters via variational optimization of the noisy model evidence approximation provided by SMC. Our imputation and parameter estimation experiments were conducted on distributed systems using novel implementations of the aforementioned techniques applied to synthetic data of dimension and type representative of that which are measured in laboratories currently.}, year={2019} }
@inproceedings{GOL-19, title={Amortized Monte Carlo Integration}, author={Goli{\'n}ski, Adam and Wood, Frank and Rainforth, Tom}, booktitle={Proceedings of the International Conference on Machine Learning (ICML)}, year={2019}, archiveprefix = {arXiv}, eprint = {1907.08082}, url_Paper={https://arxiv.org/pdf/1907.08082.pdf}, url_ArXiv={https://arxiv.org/abs/1907.08082}, url_Presentation={https://icml.cc/Conferences/2019/ScheduleMultitrack?event=4702}, support = {D3M}, abstract={Current approaches to amortizing Bayesian inference focus solely on approximating the posterior distribution. Typically, this approximation is, in turn, used to calculate expectations for one or more target functions - a computational pipeline which is inefficient when the target function(s) are known upfront. In this paper, we address this inefficiency by introducing AMCI, a method for amortizing Monte Carlo integration directly. AMCI operates similarly to amortized inference but produces three distinct amortized proposals, each tailored to a different component of the overall expectation calculation. At runtime, samples are produced separately from each amortized proposal, before being combined to an overall estimate of the expectation. We show that while existing approaches are fundamentally limited in the level of accuracy they can achieve, AMCI can theoretically produce arbitrarily small errors for any integrable target function using only a single sample from each proposal at runtime. We further show that it is able to empirically outperform the theoretically optimal self-normalized importance sampler on a number of example problems. Furthermore, AMCI allows not only for amortizing over datasets but also amortizing over target functions.} }
@inproceedings{ZHO-19, title={{LF-PPL}: A Low-Level First Order Probabilistic Programming Language for Non-Differentiable Models}, author={Zhou, Yuan and Gram-Hansen, Bradley J and Kohn, Tobias and Rainforth, Tom and Yang, Hongseok and Wood, Frank}, booktitle={Proceedings of the Twentieth International Conference on Artificial Intelligence and Statistics (AISTATS)}, year={2019}, archiveprefix = {arXiv}, eprint = {1903.02482}, support = {D3M}, url_Paper={https://arxiv.org/pdf/1903.02482.pdf}, url_ArXiv={https://arxiv.org/abs/1903.02482}, abstract={We develop a new Low-level, First-order Probabilistic Programming Language (LF-PPL) suited for models containing a mix of continuous, discrete, and/or piecewise-continuous variables. The key success of this language and its compilation scheme is in its ability to automatically distinguish parameters the density function is discontinuous with respect to, while further providing runtime checks for boundary crossings. This enables the introduction of new inference engines that are able to exploit gradient information, while remaining efficient for models which are not everywhere differentiable. We demonstrate this ability by incorporating a discontinuous Hamiltonian Monte Carlo (DHMC) inference engine that is able to deliver automated and efficient inference for non-differentiable models. Our system is backed up by a mathematical formalism that ensures that any model expressed in this language has a density with measure zero discontinuities to maintain the validity of the inference engine.} }
@techreport{WOO-19, title={Hasty-A Generative Model Complier}, author={Wood, Frank and Teng, Michael and Zinkov, Rob}, year={2019}, institution={University of Oxford Oxford United Kingdom}, url_Link={https://apps.dtic.mil/sti/citations/AD1072839}, url_Paper={https://apps.dtic.mil/sti/pdfs/AD1072839.pdf}, support = {D3M}, abstract = {This work describes our contribution of proof of concept primitives to the D3M program and research progress made towards an initial version of Hasty. Although we were unable to complete the initial version of Hasty, or contribute to the D3M primitive library the types of primitives that Hasty will enable we did train a number of Highly Qualified Personnel HQP and have interacted with the AutoML, probabilistic programming languages, neural networking, and other communities which our work is expected to impact.} }
@unpublished{TEN-19, title={Imitation Learning of Factored Multi-agent Reactive Models}, author={Teng, Michael and Le, Tuan Anh and Scibior, Adam and Wood, Frank}, archiveprefix = {arXiv}, eprint = {1903.04714}, year={2019}, url_Paper={https://arxiv.org/pdf/1903.04714.pdf}, url_ArXiv={https://arxiv.org/abs/1903.04714}, support = {D3M}, abstract={We apply recent advances in deep generative modeling to the task of imitation learning from biological agents. Specifically, we apply variations of the variational recurrent neural network model to a multi-agent setting where we learn policies of individual uncoordinated agents acting based on their perceptual inputs and their hidden belief state. We learn stochastic policies for these agents directly from observational data, without constructing a reward function. An inference network learned jointly with the policy allows for efficient inference over the agent's belief state given a sequence of its current perceptual inputs and the prior actions it performed, which lets us extrapolate observed sequences of behavior into the future while maintaining uncertainty estimates over future trajectories. We test our approach on a dataset of flies interacting in a 2D environment, where we demonstrate better predictive performance than existing approaches which learn deterministic policies with recurrent neural networks. We further show that the uncertainty estimates over future trajectories we obtain are well calibrated, which makes them useful for a variety of downstream processing tasks.}, }