\n \n \n
\n
\n\n \n \n \n \n \n \n Towards control of opinion diversity by introducing zealots into a polarised social group.\n \n \n \n \n\n\n \n Vendeville, A.; Guedj, B.; and Zhou, S.\n\n\n \n\n\n\n In Benito, R. M.; Cherifi, C.; Cherifi, H.; Moro, E.; Rocha, L. M.; and Sales-Pardo, M., editor(s),
Complex Networks & Their Applications X, pages 341–352, 2022. Springer International Publishing\n
\n\n
\n\n
\n\n
\n\n \n \n Paper\n \n \n \n arxiv\n \n \n \n pdf\n \n \n \n code\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n
\n
@inproceedings{vendeville2020opinions,\ntitle={Towards control of opinion diversity by introducing zealots into a polarised social group},\nauthor={Antoine Vendeville and Benjamin Guedj and Shi Zhou},\nyear={2022},\neditor="Benito, Rosa Maria\nand Cherifi, Chantal\nand Cherifi, Hocine\nand Moro, Esteban\nand Rocha, Luis M.\nand Sales-Pardo, Marta",\npublisher="Springer International Publishing",\npages="341--352",\nbooktitle="Complex Networks {\\&} Their Applications X",\nabstract = {We explore a method to influence or even control the diversity of opinions within a polarised social group. We leverage the voter model in which users hold binary opinions and repeatedly update their beliefs based on others they connect with. Stubborn agents who never change their minds ("zealots") are also disseminated through the network, which is modelled by a connected graph. Building on earlier results, we provide a closed-form expression for the average opinion of the group at equilibrium. This leads us to a strategy to inject zealots into a polarised network in order to shift the average opinion towards any target value. We account for the possible presence of a backfire effect, which may lead the group to react negatively and reinforce its level of polarisation in response. Our results are supported by numerical experiments on synthetic data.},\nisbn="978-3-030-93413-2",\nurl = "https://link.springer.com/chapter/10.1007%2F978-3-030-93413-2_29",\nurl_arXiv = "https://arxiv.org/abs/2006.07265",\nurl_PDF = "https://arxiv.org/pdf/2006.07265.pdf",\nurl_Code = "https://github.com/AntoineVendeville/HowOpinionsCrystallise",\ndoi = {10.1007/978-3-030-93413-2_29},\neprint={2006.07265},\narchivePrefix={arXiv},\nprimaryClass={cs.SI},\nkeywords={mine}\n}\n\n
\n
\n\n\n
\n We explore a method to influence or even control the diversity of opinions within a polarised social group. We leverage the voter model in which users hold binary opinions and repeatedly update their beliefs based on others they connect with. Stubborn agents who never change their minds (\"zealots\") are also disseminated through the network, which is modelled by a connected graph. Building on earlier results, we provide a closed-form expression for the average opinion of the group at equilibrium. This leads us to a strategy to inject zealots into a polarised network in order to shift the average opinion towards any target value. We account for the possible presence of a backfire effect, which may lead the group to react negatively and reinforce its level of polarisation in response. Our results are supported by numerical experiments on synthetic data.\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n MAGMA: Inference and Prediction with Multi-Task Gaussian Processes.\n \n \n \n \n\n\n \n Leroy, A.; Latouche, P.; Guedj, B.; and Gey, S.\n\n\n \n\n\n\n
Machine Learning. 2022.\n
\n\n
\n\n
\n\n
\n\n \n \n Paper\n \n \n \n pdf\n \n \n \n code\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n
\n
@article{leroy2020magma,\ntitle={{MAGMA}: Inference and Prediction with Multi-Task {Gaussian} Processes},\nauthor={Arthur Leroy and Pierre Latouche and Benjamin Guedj and Servane Gey},\nyear={2022},\njournal = {Machine Learning},\nabstract = {A novel multi-task Gaussian process (GP) framework is proposed, by using a common mean process for sharing information across tasks. In particular, we investigate the problem of time series forecasting, with the objective to improve multiple-step-ahead predictions. The common mean process is defined as a GP for which the hyper-posterior distribution is tractable. Therefore an EM algorithm is derived for handling both hyper-parameters optimisation and hyper-posterior computation. Unlike previous approaches in the literature, the model fully accounts for uncertainty and can handle irregular grids of observations while maintaining explicit formulations, by modelling the mean process in a unified GP framework. Predictive analytical equations are provided, integrating information shared across tasks through a relevant prior mean. This approach greatly improves the predictive performances, even far from observations, and may reduce significantly the computational complexity compared to traditional multi-task GP models. Our overall algorithm is called MAGMA (standing for Multi tAsk GPs with common MeAn). The quality of the mean process estimation, predictive performances, and comparisons to alternatives are assessed in various simulated scenarios and on real datasets.},\neprint={2007.10731},\narchivePrefix={arXiv},\nprimaryClass={stat.CO},\nurl = {https://arxiv.org/abs/2007.10731},\nurl_PDF = {https://arxiv.org/pdf/2007.10731.pdf},\nurl_Code = {https://github.com/ArthurLeroy/MAGMA},\nDOI = "10.1007/s10994-022-06172-1",\nkeywords={mine}\n}\n\n
\n
\n\n\n
\n A novel multi-task Gaussian process (GP) framework is proposed, by using a common mean process for sharing information across tasks. In particular, we investigate the problem of time series forecasting, with the objective to improve multiple-step-ahead predictions. The common mean process is defined as a GP for which the hyper-posterior distribution is tractable. Therefore an EM algorithm is derived for handling both hyper-parameters optimisation and hyper-posterior computation. Unlike previous approaches in the literature, the model fully accounts for uncertainty and can handle irregular grids of observations while maintaining explicit formulations, by modelling the mean process in a unified GP framework. Predictive analytical equations are provided, integrating information shared across tasks through a relevant prior mean. This approach greatly improves the predictive performances, even far from observations, and may reduce significantly the computational complexity compared to traditional multi-task GP models. Our overall algorithm is called MAGMA (standing for Multi tAsk GPs with common MeAn). The quality of the mean process estimation, predictive performances, and comparisons to alternatives are assessed in various simulated scenarios and on real datasets.\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n An end-to-end data-driven optimisation framework for constrained trajectories.\n \n \n \n \n\n\n \n Dewez, F.; Guedj, B.; Talpaert, A.; and Vandewalle, V.\n\n\n \n\n\n\n
Data-Centric Engineering, 3. 2022.\n
\n\n
\n\n
\n\n
\n\n \n \n Paper\n \n \n \n arxiv\n \n \n \n pdf\n \n \n \n code\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n
\n
@article{dewez2020endtoend,\ntitle={An end-to-end data-driven optimisation framework for constrained trajectories}, \nauthor={Florent Dewez and Benjamin Guedj and Arthur Talpaert and Vincent Vandewalle},\nyear={2022},\njournal={Data-Centric Engineering},\npublisher={Cambridge University Press},\nvolume = "3",\nabstract = {Many real-world problems require to optimize trajectories under constraints. Classical approaches are often based on optimal control methods but require an exact knowledge of the underlying dynamics and constraints, which could be challenging or even out of reach. In view of this, we leverage data-driven approaches to design a new end-to-end framework which is dynamics-free for optimized and realistic trajectories. Trajectories are here decomposed on function basis, trading the initial infinite dimension problem on a multivariate functional space for a parameter optimization problem. Then a maximum a posteriori approach which incorporates information from data is used to obtain a new penalized optimization problem. The penalized term narrows the search on a region centered on data and includes estimated features of the problem. We apply our data-driven approach to two settings in aeronautics and sailing routes optimization. The developed approach is implemented in the Python library PyRotor.},\nurl = "https://www.cambridge.org/core/journals/data-centric-engineering/article/an-endtoend-datadriven-optimization-framework-for-constrained-trajectories/B4BF729E1681F795FFE79A1F4B544CE5",\nurl_arXiv = "https://arxiv.org/abs/2011.11820",\nurl_PDF = "https://www.cambridge.org/core/services/aop-cambridge-core/content/view/B4BF729E1681F795FFE79A1F4B544CE5/S2632673622000065a.pdf/an-end-to-end-data-driven-optimization-framework-for-constrained-trajectories.pdf",\nurl_Code = "https://github.com/bguedj/pyrotor",\neprint={2011.11820},\narchivePrefix={arXiv},\nDOI = "10.1017/dce.2022.6",\nprimaryClass={stat.AP},\nkeywords={mine}\n}\n\n\n\n
\n
\n\n\n
\n Many real-world problems require to optimize trajectories under constraints. Classical approaches are often based on optimal control methods but require an exact knowledge of the underlying dynamics and constraints, which could be challenging or even out of reach. In view of this, we leverage data-driven approaches to design a new end-to-end framework which is dynamics-free for optimized and realistic trajectories. Trajectories are here decomposed on function basis, trading the initial infinite dimension problem on a multivariate functional space for a parameter optimization problem. Then a maximum a posteriori approach which incorporates information from data is used to obtain a new penalized optimization problem. The penalized term narrows the search on a region centered on data and includes estimated features of the problem. We apply our data-driven approach to two settings in aeronautics and sailing routes optimization. The developed approach is implemented in the Python library PyRotor.\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n On Margins and Derandomisation in PAC-Bayes.\n \n \n \n \n\n\n \n Biggs, F.; and Guedj, B.\n\n\n \n\n\n\n In Camps-Valls, G.; Ruiz, F. J. R.; and Valera, I., editor(s),
Proceedings of The 25th International Conference on Artificial Intelligence and Statistics [AISTATS], volume 151, of
Proceedings of Machine Learning Research, pages 3709–3731, 28–30 Mar 2022. PMLR\n
\n\n
\n\n
\n\n
\n\n \n \n Paper\n \n \n \n arxiv\n \n \n \n pdf\n \n \n \n code\n \n \n \n video\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n
\n
@inproceedings{biggs2021margins,\ntitle={On Margins and Derandomisation in {PAC-Bayes}},\nauthor={Felix Biggs and Benjamin Guedj},\nyear={2022},\npages = \t {3709--3731},\neditor = \t {Camps-Valls, Gustau and Ruiz, Francisco J. R. and Valera, Isabel},\nvolume = \t {151},\nseries = \t {Proceedings of Machine Learning Research},\nmonth = \t {28--30 Mar},\npublisher = {PMLR},\nbooktitle={Proceedings of The 25th International Conference on Artificial Intelligence and Statistics [AISTATS]},\nabstract = {We give a general recipe for derandomising PAC-Bayesian bounds using margins, with the critical ingredient being that our randomised predictions concentrate around some value. The tools we develop straightforwardly lead to margin bounds for various classifiers, including linear prediction—a class that includes boosting and the support vector machine—single-hidden-layer neural networks with an unusual erf activation function, and deep ReLU networks. Further we extend to partially-derandomised predictors where only some of the randomness of our estimators is removed, letting us extend bounds to cases where the concentration properties of our estimators are otherwise poor.},\nurl = {https://proceedings.mlr.press/v151/biggs22a.html},\nurl_arXiv = {https://arxiv.org/abs/2107.03955},\nurl_PDF = {https://proceedings.mlr.press/v151/biggs22a/biggs22a.pdf},\nurl_Code = {https://github.com/biggs/margins-and-derandomisation-code},\nurl_Video = {https://virtual.aistats.org/virtual/2022/poster/3295},\neprint={2107.03955},\narchivePrefix={arXiv},\nprimaryClass={cs.LG},\nkeywords={mine}\n}\n\n
\n
\n\n\n
\n We give a general recipe for derandomising PAC-Bayesian bounds using margins, with the critical ingredient being that our randomised predictions concentrate around some value. The tools we develop straightforwardly lead to margin bounds for various classifiers, including linear prediction—a class that includes boosting and the support vector machine—single-hidden-layer neural networks with an unusual erf activation function, and deep ReLU networks. Further we extend to partially-derandomised predictors where only some of the randomness of our estimators is removed, letting us extend bounds to cases where the concentration properties of our estimators are otherwise poor.\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n On PAC-Bayesian reconstruction guarantees for VAEs.\n \n \n \n \n\n\n \n Chérief-Abdellatif, B.; Shi, Y.; Doucet, A.; and Guedj, B.\n\n\n \n\n\n\n In Camps-Valls, G.; Ruiz, F. J. R.; and Valera, I., editor(s),
Proceedings of The 25th International Conference on Artificial Intelligence and Statistics [AISTATS], volume 151, of
Proceedings of Machine Learning Research, pages 3066–3079, 28–30 Mar 2022. PMLR\n
\n\n
\n\n
\n\n
\n\n \n \n Paper\n \n \n \n arxiv\n \n \n \n pdf\n \n \n \n code\n \n \n \n video\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n
\n
@inproceedings{cherief2021vae,\ntitle={On {PAC-Bayesian} reconstruction guarantees for {VAEs}},\nauthor={Ch\\'erief-Abdellatif, Badr-Eddine and Shi, Yuyang and Doucet, Arnaud and Guedj, Benjamin},\nyear={2022},\npages = \t {3066--3079},\neditor = \t {Camps-Valls, Gustau and Ruiz, Francisco J. R. and Valera, Isabel},\nvolume = \t {151},\nseries = \t {Proceedings of Machine Learning Research},\nmonth = \t {28--30 Mar},\npublisher = {PMLR},\nbooktitle={Proceedings of The 25th International Conference on Artificial Intelligence and Statistics [AISTATS]},\nabstract = {Despite its wide use and empirical successes, the theoretical understanding and study of the behaviour and performance of the variational autoencoder (VAE) have only emerged in the past few years. We contribute to this recent line of work by analysing the VAE’s reconstruction ability for unseen test data, leveraging arguments from the PAC-Bayes theory. We provide generalisation bounds on the theoretical reconstruction error, and provide insights on the regularisation effect of VAE objectives. We illustrate our theoretical results with supporting experiments on classical benchmark datasets.},\nurl = {https://proceedings.mlr.press/v151/cherief-abdellatif22a.html},\nurl_arXiv = {https://arxiv.org/abs/2202.11455},\nurl_PDF = {https://proceedings.mlr.press/v151/cherief-abdellatif22a/cherief-abdellatif22a.pdf},\nurl_Code = {https://github.com/yuyang-shi/PBVAE},\nurl_Video = {https://virtual.aistats.org/virtual/2022/poster/3265},\neprint={2202.11455},\narchivePrefix={arXiv},\nprimaryClass={cs.LG},\nkeywords={mine}\n}\n\n
\n
\n\n\n
\n Despite its wide use and empirical successes, the theoretical understanding and study of the behaviour and performance of the variational autoencoder (VAE) have only emerged in the past few years. We contribute to this recent line of work by analysing the VAE’s reconstruction ability for unseen test data, leveraging arguments from the PAC-Bayes theory. We provide generalisation bounds on the theoretical reconstruction error, and provide insights on the regularisation effect of VAE objectives. We illustrate our theoretical results with supporting experiments on classical benchmark datasets.\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n KSD Aggregated Goodness-of-fit Test.\n \n \n \n \n\n\n \n Schrab, A.; Guedj, B.; and Gretton, A.\n\n\n \n\n\n\n In Koyejo, S.; Mohamed, S.; Agarwal, A.; Belgrave, D.; Cho, K.; and Oh, A., editor(s),
Advances in Neural Information Processing Systems [NeurIPS], volume 35, pages 32624–32638, 2022. Curran Associates, Inc.\n
\n\n
\n\n
\n\n
\n\n \n \n Paper\n \n \n \n pdf\n \n \n \n supplementary\n \n \n \n arxiv\n \n \n \n code\n \n \n \n slides\n \n \n \n poster\n \n \n \n posterbis\n \n \n \n video\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 58 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n
\n
@inproceedings{schrab2022ksd,\ntitle={{KSD} Aggregated Goodness-of-fit Test},\nauthor={Antonin Schrab and Benjamin Guedj and Arthur Gretton},\nyear={2022},\neditor = {S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh},\npages = {32624--32638},\npublisher = {Curran Associates, Inc.},\nvolume = {35},\nbooktitle = {Advances in Neural Information Processing Systems [NeurIPS]},\nabstract = {We investigate properties of goodness-of-fit tests based on the Kernel Stein Discrepancy (KSD). We introduce a strategy to construct a test, called KSDAgg, which aggregates multiple tests with different kernels. KSDAgg avoids splitting the data to perform kernel selection (which leads to a loss in test power), and rather maximises the test power over a collection of kernels. We provide theoretical guarantees on the power of KSDAgg: we show it achieves the smallest uniform separation rate of the collection, up to a logarithmic term. KSDAgg can be computed exactly in practice as it relies either on a parametric bootstrap or on a wild bootstrap to estimate the quantiles and the level corrections. In particular, for the crucial choice of bandwidth of a fixed kernel, it avoids resorting to arbitrary heuristics (such as median or standard deviation) or to data splitting. We find on both synthetic and real-world data that KSDAgg outperforms other state-of-the-art adaptive KSD-based goodness-of-fit testing procedures.},\nurl = {https://papers.nips.cc/paper_files/paper/2022/hash/d241a7b1499cee1bf40769ceade2444d-Abstract-Conference.html},\nurl_PDF = {https://papers.nips.cc/paper_files/paper/2022/file/d241a7b1499cee1bf40769ceade2444d-Paper-Conference.pdf},\nurl_Supplementary = {https://papers.nips.cc/paper_files/paper/2022/file/d241a7b1499cee1bf40769ceade2444d-Supplemental-Conference.pdf},\nurl_arXiv = {https://arxiv.org/abs/2202.00824},\nurl_Code = {https://github.com/antoninschrab/ksdagg-paper},\nurl_Slides = {https://antoninschrab.github.io/files/Slides-31-05-22.pdf},\nurl_Poster = {https://antoninschrab.github.io/files/Poster_MMDAgg_KSDAgg.pdf},\nurl_Posterbis = {https://antoninschrab.github.io/files/Simplified_Poster.pdf},\nurl_Video = {https://www.youtube.com/watch?v=OWh6Hj10wsY},\neprint={2202.00824},\narchivePrefix={arXiv},\nprimaryClass={stat.ML},\nkeywords={mine}\n}\n\n
\n
\n\n\n
\n We investigate properties of goodness-of-fit tests based on the Kernel Stein Discrepancy (KSD). We introduce a strategy to construct a test, called KSDAgg, which aggregates multiple tests with different kernels. KSDAgg avoids splitting the data to perform kernel selection (which leads to a loss in test power), and rather maximises the test power over a collection of kernels. We provide theoretical guarantees on the power of KSDAgg: we show it achieves the smallest uniform separation rate of the collection, up to a logarithmic term. KSDAgg can be computed exactly in practice as it relies either on a parametric bootstrap or on a wild bootstrap to estimate the quantiles and the level corrections. In particular, for the crucial choice of bandwidth of a fixed kernel, it avoids resorting to arbitrary heuristics (such as median or standard deviation) or to data splitting. We find on both synthetic and real-world data that KSDAgg outperforms other state-of-the-art adaptive KSD-based goodness-of-fit testing procedures.\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Non-Vacuous Generalisation Bounds for Shallow Neural Networks.\n \n \n \n \n\n\n \n Biggs, F.; and Guedj, B.\n\n\n \n\n\n\n In Chaudhuri, K.; Jegelka, S.; Song, L.; Szepesvari, C.; Niu, G.; and Sabato, S., editor(s),
Proceedings of the 39th International Conference on Machine Learning [ICML], volume 162, of
Proceedings of Machine Learning Research, pages 1963–1981, July 2022. PMLR\n
\n\n
\n\n
\n\n
\n\n \n \n Paper\n \n \n \n arxiv\n \n \n \n pdf\n \n \n \n code\n \n \n \n video\n \n \n \n slides\n \n \n \n poster\n \n \n \n slideslive\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n
\n
@inproceedings{biggs2022shallow,\nbooktitle={Proceedings of the 39th International Conference on Machine Learning [ICML]},\ntitle={Non-Vacuous Generalisation Bounds for Shallow Neural Networks},\nauthor={Felix Biggs and Benjamin Guedj},\nyear={2022},\npages = \t {1963--1981},\neditor = \t {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan},\nvolume = \t {162},\nseries = \t {Proceedings of Machine Learning Research},\nmonth = \t {July},\npublisher = {PMLR},\nabstract = {We focus on a specific class of shallow neural networks with a single hidden layer, namely those with $L_2$-normalised data and either a sigmoid-shaped Gaussian error function (“erf”) activation or a Gaussian Error Linear Unit (GELU) activation. For these networks, we derive new generalisation bounds through the PAC-Bayesian theory; unlike most existing such bounds they apply to neural networks with deterministic rather than randomised parameters. Our bounds are empirically non-vacuous when the network is trained with vanilla stochastic gradient descent on MNIST and Fashion-MNIST.},\nurl = {https://proceedings.mlr.press/v162/biggs22a.html},\nurl_arXiv = {https://arxiv.org/abs/2202.01627},\nurl_PDF = {https://proceedings.mlr.press/v162/biggs22a/biggs22a.pdf},\nurl_Code = {},\nurl_Video = {https://icml.cc/virtual/2022/spotlight/17948},\nurl_Slides = {https://icml.cc/media/icml-2022/Slides/17948.pdf},\nurl_Poster = {https://icml.cc/media/PosterPDFs/ICML%202022/194cf6c2de8e00c05fcf16c498adc7bf.png},\nurl_SlidesLive = {https://slideslive.com/38983983/nonvacuous-generalisation-bounds-for-shallow-neural-networks},\neprint={2202.01627},\narchivePrefix={arXiv},\nprimaryClass={cs.LG},\nkeywords={mine}\n}\n\n
\n
\n\n\n
\n We focus on a specific class of shallow neural networks with a single hidden layer, namely those with $L_2$-normalised data and either a sigmoid-shaped Gaussian error function (“erf”) activation or a Gaussian Error Linear Unit (GELU) activation. For these networks, we derive new generalisation bounds through the PAC-Bayesian theory; unlike most existing such bounds they apply to neural networks with deterministic rather than randomised parameters. Our bounds are empirically non-vacuous when the network is trained with vanilla stochastic gradient descent on MNIST and Fashion-MNIST.\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Measuring dissimilarity with diffeomorphism invariance.\n \n \n \n \n\n\n \n Cantelobre, T.; Ciliberto, C.; Guedj, B.; and Rudi, A.\n\n\n \n\n\n\n In Chaudhuri, K.; Jegelka, S.; Song, L.; Szepesvari, C.; Niu, G.; and Sabato, S., editor(s),
Proceedings of the 39th International Conference on Machine Learning [ICML], volume 162, of
Proceedings of Machine Learning Research, pages 2572–2596, 17–23 Jul 2022. PMLR\n
\n\n
\n\n
\n\n
\n\n \n \n Paper\n \n \n \n arxiv\n \n \n \n pdf\n \n \n \n code\n \n \n \n video\n \n \n \n videobis\n \n \n \n slides\n \n \n \n poster\n \n \n \n slidesposter\n \n \n \n slideslive\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n
\n
@inproceedings{cantelobre2022dissimilarity,\nbooktitle={Proceedings of the 39th International Conference on Machine Learning [ICML]},\ntitle={Measuring dissimilarity with diffeomorphism invariance},\nauthor={Cantelobre, Th{\\'e}ophile and Ciliberto, Carlo and Guedj, Benjamin and Rudi, Alessandro},\nyear={2022},\npages = \t {2572--2596},\neditor = \t {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan},\nvolume = \t {162},\nseries = \t {Proceedings of Machine Learning Research},\nmonth = \t {17--23 Jul},\npublisher = {PMLR},\nabstract = {Measures of similarity (or dissimilarity) are a key ingredient to many machine learning algorithms. We introduce DID, a pairwise dissimilarity measure applicable to a wide range of data spaces, which leverages the data’s internal structure to be invariant to diffeomorphisms. We prove that DID enjoys properties which make it relevant for theoretical study and practical use. By representing each datum as a function, DID is defined as the solution to an optimization problem in a Reproducing Kernel Hilbert Space and can be expressed in closed-form. In practice, it can be efficiently approximated via Nystr{ö}m sampling. Empirical experiments support the merits of DID.},\nurl = \t {https://proceedings.mlr.press/v162/cantelobre22a.html},\nurl_arXiv = {https://arxiv.org/abs/2202.05614},\nurl_PDF = {https://proceedings.mlr.press/v162/cantelobre22a/cantelobre22a.pdf},\nurl_Code = {https://github.com/theophilec/diffy},\nurl_Video = {https://youtu.be/WKADjgoTBBs},\nurl_Videobis = {https://icml.cc/virtual/2022/spotlight/17588},\nurl_Slides = {https://icml.cc/media/icml-2022/Slides/17588_MqtuoR9.pdf},\nurl_Poster = {https://icml.cc/media/PosterPDFs/ICML%202022/9ff0525c64bf3d4c9957a1d4397f1b40.png},\nurl_SlidesPoster = {https://icml.cc/media/icml-2022/Slides/17587.pdf},\nurl_SlidesLive = {https://slideslive.com/38983838/measuring-dissimilarity-with-diffeomorphism-invariance},\neprint={2202.05614},\narchivePrefix={arXiv},\nprimaryClass={stat.ML},\nkeywords={mine}\n}\n\n
\n
\n\n\n
\n Measures of similarity (or dissimilarity) are a key ingredient to many machine learning algorithms. We introduce DID, a pairwise dissimilarity measure applicable to a wide range of data spaces, which leverages the data’s internal structure to be invariant to diffeomorphisms. We prove that DID enjoys properties which make it relevant for theoretical study and practical use. By representing each datum as a function, DID is defined as the solution to an optimization problem in a Reproducing Kernel Hilbert Space and can be expressed in closed-form. In practice, it can be efficiently approximated via Nyström sampling. Empirical experiments support the merits of DID.\n
\n\n\n
\n\n\n \n\n\n
\n
\n\n \n \n \n \n \n \n On change of measure inequalities for $f$-divergences.\n \n \n \n \n\n\n \n Picard-Weibel, A.; and Guedj, B.\n\n\n \n\n\n\n 2022.\n
Submitted.\n\n
\n\n
\n\n
\n\n \n \n Paper\n \n \n \n pdf\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n
\n
@unpublished{picard2022divergence,\ntitle={On change of measure inequalities for $f$-divergences},\nauthor={Antoine Picard-Weibel and Benjamin Guedj},\nyear={2022},\nnote = "Submitted.",\nabstract = {We propose new change of measure inequalities based on $f$-divergences (of which the Kullback-Leibler divergence is a particular case). Our strategy relies on combining the Legendre transform of $f$-divergences and the Young-Fenchel inequality. By exploiting these new change of measure inequalities, we derive new PAC-Bayesian generalisation bounds with a complexity involving $f$-divergences, and holding in mostly unchartered settings (such as heavy-tailed losses). We instantiate our results for the most popular $f$-divergences.},\nurl = {https://arxiv.org/abs/2202.05568},\nurl_PDF = {https://arxiv.org/pdf/2202.05568.pdf},\neprint={2202.05568},\narchivePrefix={arXiv},\nprimaryClass={stat.ML},\nkeywords={mine}\n}\n\n
\n
\n\n\n
\n We propose new change of measure inequalities based on $f$-divergences (of which the Kullback-Leibler divergence is a particular case). Our strategy relies on combining the Legendre transform of $f$-divergences and the Young-Fenchel inequality. By exploiting these new change of measure inequalities, we derive new PAC-Bayesian generalisation bounds with a complexity involving $f$-divergences, and holding in mostly unchartered settings (such as heavy-tailed losses). We instantiate our results for the most popular $f$-divergences.\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Reprint: a randomized extrapolation based on principal components for data augmentation.\n \n \n \n \n\n\n \n Wei, J.; Chen, Q.; Peng, P.; Guedj, B.; and Li, L.\n\n\n \n\n\n\n 2022.\n
Submitted.\n\n
\n\n
\n\n
\n\n \n \n Paper\n \n \n \n pdf\n \n \n \n code\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n
\n
@unpublished{wei2022reprint,\ntitle={Reprint: a randomized extrapolation based on principal components for data augmentation},\nauthor={Jiale Wei and Qiyuan Chen and Pai Peng and Benjamin Guedj and Le Li},\nyear={2022},\nnote = "Submitted.",\nabstract = {Data scarcity and data imbalance have attracted a lot of attention in many fields. Data augmentation, explored as an effective approach to tackle them, can improve the robustness and efficiency of classification models by generating new samples. This paper presents REPRINT, a simple and effective hidden-space data augmentation method for imbalanced data classification. Given hidden-space representations of samples in each class, REPRINT extrapolates, in a randomized fashion, augmented examples for target class by using subspaces spanned by principal components to summarize distribution structure of both source and target class. Consequently, the examples generated would diversify the target while maintaining the original geometry of target distribution. Besides, this method involves a label refinement component which allows to synthesize new soft labels for augmented examples. Compared with different NLP data augmentation approaches under a range of data imbalanced scenarios on four text classification benchmark, REPRINT shows prominent improvements. Moreover, through comprehensive ablation studies, we show that label refinement is better than label-preserving for augmented examples, and that our method suggests stable and consistent improvements in terms of suitable choices of principal components. Moreover, REPRINT is appealing for its easy-to-use since it contains only one hyperparameter determining the dimension of subspace and requires low computational resource.},\nurl = {https://arxiv.org/abs/2204.12024},\nurl_PDF = {https://arxiv.org/pdf/2204.12024.pdf},\nurl_Code = {https://github.com/bigdata-ccnu/REPRINT},\neprint={2204.12024},\narchivePrefix={arXiv},\nprimaryClass={cs.CL},\nkeywords={mine}\n}\n\n
\n
\n\n\n
\n Data scarcity and data imbalance have attracted a lot of attention in many fields. Data augmentation, explored as an effective approach to tackle them, can improve the robustness and efficiency of classification models by generating new samples. This paper presents REPRINT, a simple and effective hidden-space data augmentation method for imbalanced data classification. Given hidden-space representations of samples in each class, REPRINT extrapolates, in a randomized fashion, augmented examples for target class by using subspaces spanned by principal components to summarize distribution structure of both source and target class. Consequently, the examples generated would diversify the target while maintaining the original geometry of target distribution. Besides, this method involves a label refinement component which allows to synthesize new soft labels for augmented examples. Compared with different NLP data augmentation approaches under a range of data imbalanced scenarios on four text classification benchmark, REPRINT shows prominent improvements. Moreover, through comprehensive ablation studies, we show that label refinement is better than label-preserving for augmented examples, and that our method suggests stable and consistent improvements in terms of suitable choices of principal components. Moreover, REPRINT is appealing for its easy-to-use since it contains only one hyperparameter determining the dimension of subspace and requires low computational resource.\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Online PAC-Bayesian Learning.\n \n \n \n \n\n\n \n Haddouche, M.; and Guedj, B.\n\n\n \n\n\n\n In Koyejo, S.; Mohamed, S.; Agarwal, A.; Belgrave, D.; Cho, K.; and Oh, A., editor(s),
Advances in Neural Information Processing Systems [NeurIPS], volume 35, pages 25725–25738, 2022. Curran Associates, Inc.\n
\n\n
\n\n
\n\n
\n\n \n \n Paper\n \n \n \n pdf\n \n \n \n supplementary\n \n \n \n arxiv\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n
\n
@inproceedings{haddouche2022online,\ntitle={Online {PAC-Bayesian} Learning},\nauthor={Maxime Haddouche and Benjamin Guedj},\nyear={2022},\nbooktitle = {Advances in Neural Information Processing Systems [NeurIPS]},\neditor = {S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh},\nabstract = {Most PAC-Bayesian bounds hold in the batch learning setting where data is collected at once, prior to inference or prediction. This somewhat departs from many contemporary learning problems where data streams are collected and the algorithms must dynamically adjust. We prove new PAC-Bayesian bounds in this online learning framework, leveraging an updated definition of regret, and we revisit classical PAC-Bayesian results with a batch-to-online conversion, extending their remit to the case of dependent data. Our results hold for bounded losses, potentially \\emph{non-convex}, paving the way to promising developments in online learning.},\npages = {25725--25738},\npublisher = {Curran Associates, Inc.},\nvolume = {35},\nurl = {https://papers.nips.cc/paper_files/paper/2022/hash/a4d991d581accd2955a1e1928f4e6965-Abstract-Conference.html},\nurl_PDF = {https://papers.nips.cc/paper_files/paper/2022/file/a4d991d581accd2955a1e1928f4e6965-Paper-Conference.pdf},\nurl_Supplementary = {https://papers.nips.cc/paper_files/paper/2022/file/a4d991d581accd2955a1e1928f4e6965-Supplemental-Conference.pdf},\nurl_arXiv = {https://arxiv.org/abs/2206.00024},\neprint={2206.00024},\narchivePrefix={arXiv},\nprimaryClass={cs.LG},\nkeywords={mine}\n}\n\n
\n
\n\n\n
\n Most PAC-Bayesian bounds hold in the batch learning setting where data is collected at once, prior to inference or prediction. This somewhat departs from many contemporary learning problems where data streams are collected and the algorithms must dynamically adjust. We prove new PAC-Bayesian bounds in this online learning framework, leveraging an updated definition of regret, and we revisit classical PAC-Bayesian results with a batch-to-online conversion, extending their remit to the case of dependent data. Our results hold for bounded losses, potentially \\emphnon-convex, paving the way to promising developments in online learning.\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n On Margins and Generalisation for Voting Classifiers.\n \n \n \n \n\n\n \n Biggs, F.; Zantedeschi, V.; and Guedj, B.\n\n\n \n\n\n\n In Koyejo, S.; Mohamed, S.; Agarwal, A.; Belgrave, D.; Cho, K.; and Oh, A., editor(s),
Advances in Neural Information Processing Systems [NeurIPS], volume 35, pages 9713–9726, 2022. Curran Associates, Inc.\n
\n\n
\n\n
\n\n
\n\n \n \n Paper\n \n \n \n pdf\n \n \n \n supplementary\n \n \n \n arxiv\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n
\n
@inproceedings{biggs2022majority,\ntitle={On Margins and Generalisation for Voting Classifiers},\nauthor={Felix Biggs and Valentina Zantedeschi and Benjamin Guedj},\nyear={2022},\neditor = {S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh},\npages = {9713--9726},\nvolume = {35},\npublisher = {Curran Associates, Inc.},\nbooktitle = {Advances in Neural Information Processing Systems [NeurIPS]},\nabstract = {We study the generalisation properties of majority voting on finite ensembles of classifiers, proving margin-based generalisation bounds via the PAC-Bayes theory. These provide state-of-the-art guarantees on a number of classification tasks. Our central results leverage the Dirichlet posteriors studied recently by Zantedeschi et al. [2021] for training voting classifiers; in contrast to that work our bounds apply to non-randomised votes via the use of margins. Our contributions add perspective to the debate on the "margins theory" proposed by Schapire et al. [1998] for the generalisation of ensemble classifiers.},\nurl = {https://papers.nips.cc/paper_files/paper/2022/hash/3f8675af3da6da231c9e75b889b7f047-Abstract-Conference.html},\nurl_PDF = {https://papers.nips.cc/paper_files/paper/2022/file/3f8675af3da6da231c9e75b889b7f047-Paper-Conference.pdf},\nurl_Supplementary = {https://papers.nips.cc/paper_files/paper/2022/file/3f8675af3da6da231c9e75b889b7f047-Supplemental-Conference.pdf},\nurl_arXiv = {https://arxiv.org/abs/2206.04607},\neprint={2206.04607},\narchivePrefix={arXiv},\nprimaryClass={cs.LG},\nkeywords={mine}\n}\n\n
\n
\n\n\n
\n We study the generalisation properties of majority voting on finite ensembles of classifiers, proving margin-based generalisation bounds via the PAC-Bayes theory. These provide state-of-the-art guarantees on a number of classification tasks. Our central results leverage the Dirichlet posteriors studied recently by Zantedeschi et al. [2021] for training voting classifiers; in contrast to that work our bounds apply to non-randomised votes via the use of margins. Our contributions add perspective to the debate on the \"margins theory\" proposed by Schapire et al. [1998] for the generalisation of ensemble classifiers.\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Efficient Aggregated Kernel Tests using Incomplete $U$-statistics.\n \n \n \n \n\n\n \n Schrab, A.; Kim, I.; Guedj, B.; and Gretton, A.\n\n\n \n\n\n\n In Koyejo, S.; Mohamed, S.; Agarwal, A.; Belgrave, D.; Cho, K.; and Oh, A., editor(s),
Advances in Neural Information Processing Systems [NeurIPS], volume 35, pages 18793–18807, 2022. Curran Associates, Inc.\n
\n\n
\n\n
\n\n
\n\n \n \n Paper\n \n \n \n pdf\n \n \n \n supplementary\n \n \n \n arxiv\n \n \n \n slides\n \n \n \n poster\n \n \n \n posterbis\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 9 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n
\n
@inproceedings{schrab2022ustats,\ntitle={Efficient Aggregated Kernel Tests using Incomplete $U$-statistics},\nauthor={Antonin Schrab and Ilmun Kim and Benjamin Guedj and Arthur Gretton},\nyear={2022},\neditor = {S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh},\npages = {18793--18807},\npublisher = {Curran Associates, Inc.},\nvolume = {35},\nbooktitle = {Advances in Neural Information Processing Systems [NeurIPS]},\nabstract = {We propose a series of computationally efficient, nonparametric tests for the two-sample, independence and goodness-of-fit problems, using the Maximum Mean Discrepancy (MMD), Hilbert Schmidt Independence Criterion (HSIC), and Kernel Stein Discrepancy (KSD), respectively. Our test statistics are incomplete $U$-statistics, with a computational cost that interpolates between linear time in the number of samples, and quadratic time, as associated with classical $U$-statistic tests. The three proposed tests aggregate over several kernel bandwidths to detect departures from the null on various scales: we call the resulting tests MMDAggInc, HSICAggInc and KSDAggInc. For the test thresholds, we derive a quantile bound for wild bootstrapped incomplete $U$-statistics, which is of independent interest. We derive uniform separation rates for MMDAggInc and HSICAggInc, and quantify exactly the trade-off between computational efficiency and the attainable rates: this result is novel for tests based on incomplete $U$-statistics, to our knowledge. We further show that in the quadratic-time case, the wild bootstrap incurs no penalty to test power over more widespread permutation-based approaches, since both attain the same minimax optimal rates (which in turn match the rates that use oracle quantiles). We support our claims with numerical experiments on the trade-off between computational efficiency and test power. In the three testing frameworks, we observe that our proposed linear-time aggregated tests obtain higher power than current state-of-the-art linear-time kernel tests.},\nurl = {https://papers.nips.cc/paper_files/paper/2022/hash/774164b966cc277c82a960934445140d-Abstract-Conference.html},\nurl_PDF = {https://papers.nips.cc/paper_files/paper/2022/file/774164b966cc277c82a960934445140d-Paper-Conference.pdf},\nurl_Supplementary = {https://papers.nips.cc/paper_files/paper/2022/file/774164b966cc277c82a960934445140d-Supplemental-Conference.pdf},\nurl_arXiv = {https://arxiv.org/abs/2206.09194},\nurl_Slides = {https://antoninschrab.github.io/files/Slides-31-05-22.pdf},\nurl_Poster = {},\nurl_Posterbis = {https://antoninschrab.github.io/files/Simplified_Poster.pdf},\neprint={2206.09194},\narchivePrefix={arXiv},\nprimaryClass={stat.ML},\nkeywords={mine}\n}\n\n
\n
\n\n\n
\n We propose a series of computationally efficient, nonparametric tests for the two-sample, independence and goodness-of-fit problems, using the Maximum Mean Discrepancy (MMD), Hilbert Schmidt Independence Criterion (HSIC), and Kernel Stein Discrepancy (KSD), respectively. Our test statistics are incomplete $U$-statistics, with a computational cost that interpolates between linear time in the number of samples, and quadratic time, as associated with classical $U$-statistic tests. The three proposed tests aggregate over several kernel bandwidths to detect departures from the null on various scales: we call the resulting tests MMDAggInc, HSICAggInc and KSDAggInc. For the test thresholds, we derive a quantile bound for wild bootstrapped incomplete $U$-statistics, which is of independent interest. We derive uniform separation rates for MMDAggInc and HSICAggInc, and quantify exactly the trade-off between computational efficiency and the attainable rates: this result is novel for tests based on incomplete $U$-statistics, to our knowledge. We further show that in the quadratic-time case, the wild bootstrap incurs no penalty to test power over more widespread permutation-based approaches, since both attain the same minimax optimal rates (which in turn match the rates that use oracle quantiles). We support our claims with numerical experiments on the trade-off between computational efficiency and test power. In the three testing frameworks, we observe that our proposed linear-time aggregated tests obtain higher power than current state-of-the-art linear-time kernel tests.\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Generalisation under gradient descent via deterministic PAC-Bayes.\n \n \n \n \n\n\n \n Clerico, E.; Farghly, T.; Deligiannidis, G.; Guedj, B.; and Doucet, A.\n\n\n \n\n\n\n 2022.\n
Submitted.\n\n
\n\n
\n\n
\n\n \n \n Paper\n \n \n \n pdf\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n
\n
@unpublished{clerico2022pacbayes,\ntitle = {Generalisation under gradient descent via deterministic {PAC-Bayes}},\nauthor = {Clerico, Eugenio and Farghly, Tyler and Deligiannidis, George and Guedj, Benjamin and Doucet, Arnaud},\nyear = {2022},\nnote = "Submitted.",\nabstract = {We establish disintegrated PAC-Bayesian generalisation bounds for models trained with gradient descent methods or continuous gradient flows. Contrary to standard practice in the PAC-Bayesian setting, our result applies to optimisation algorithms that are deterministic, without requiring any \\textit{de-randomisation} step. Our bounds are fully computable, depending on the density of the initial distribution and the Hessian of the training objective over the trajectory. We show that our framework can be applied to a variety of iterative optimisation algorithms, including stochastic gradient descent (SGD), momentum-based schemes, and damped Hamiltonian dynamics.},\nurl = {https://arxiv.org/abs/2209.02525},\nurl_PDF = {https://arxiv.org/pdf/2209.02525.pdf},\ndoi = {10.48550/ARXIV.2209.02525},\neprint={2209.02525},\narchivePrefix={arXiv},\nprimaryClass={stat.ML},\ncopyright = {Creative Commons Attribution 4.0 International},\nkeywords={mine}\n}\n\n
\n
\n\n\n
\n We establish disintegrated PAC-Bayesian generalisation bounds for models trained with gradient descent methods or continuous gradient flows. Contrary to standard practice in the PAC-Bayesian setting, our result applies to optimisation algorithms that are deterministic, without requiring any de-randomisation step. Our bounds are fully computable, depending on the density of the initial distribution and the Hessian of the training objective over the trajectory. We show that our framework can be applied to a variety of iterative optimisation algorithms, including stochastic gradient descent (SGD), momentum-based schemes, and damped Hamiltonian dynamics.\n
\n\n\n
\n\n\n\n\n\n