Non-Vacuous Generalisation Bounds for Shallow Neural Networks. Biggs, F. & Guedj, B. In Chaudhuri, K., Jegelka, S., Song, L., Szepesvari, C., Niu, G., & Sabato, S., editors, Proceedings of the 39th International Conference on Machine Learning [ICML], volume 162, of Proceedings of Machine Learning Research, pages 1963–1981, July, 2022. PMLR.
Paper
Arxiv
Pdf
Code
Video
Slides
Poster
Slideslive abstract bibtex 1 download We focus on a specific class of shallow neural networks with a single hidden layer, namely those with $L_2$-normalised data and either a sigmoid-shaped Gaussian error function (“erf”) activation or a Gaussian Error Linear Unit (GELU) activation. For these networks, we derive new generalisation bounds through the PAC-Bayesian theory; unlike most existing such bounds they apply to neural networks with deterministic rather than randomised parameters. Our bounds are empirically non-vacuous when the network is trained with vanilla stochastic gradient descent on MNIST and Fashion-MNIST.
@inproceedings{biggs2022shallow,
booktitle={Proceedings of the 39th International Conference on Machine Learning [ICML]},
title={Non-Vacuous Generalisation Bounds for Shallow Neural Networks},
author={Felix Biggs and Benjamin Guedj},
year={2022},
pages = {1963--1981},
editor = {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan},
volume = {162},
series = {Proceedings of Machine Learning Research},
month = {July},
publisher = {PMLR},
abstract = {We focus on a specific class of shallow neural networks with a single hidden layer, namely those with $L_2$-normalised data and either a sigmoid-shaped Gaussian error function (“erf”) activation or a Gaussian Error Linear Unit (GELU) activation. For these networks, we derive new generalisation bounds through the PAC-Bayesian theory; unlike most existing such bounds they apply to neural networks with deterministic rather than randomised parameters. Our bounds are empirically non-vacuous when the network is trained with vanilla stochastic gradient descent on MNIST and Fashion-MNIST.},
url = {https://proceedings.mlr.press/v162/biggs22a.html},
url_arXiv = {https://arxiv.org/abs/2202.01627},
url_PDF = {https://proceedings.mlr.press/v162/biggs22a/biggs22a.pdf},
url_Code = {},
url_Video = {https://icml.cc/virtual/2022/spotlight/17948},
url_Slides = {https://icml.cc/media/icml-2022/Slides/17948.pdf},
url_Poster = {https://icml.cc/media/PosterPDFs/ICML%202022/194cf6c2de8e00c05fcf16c498adc7bf.png},
url_SlidesLive = {https://slideslive.com/38983983/nonvacuous-generalisation-bounds-for-shallow-neural-networks},
eprint={2202.01627},
archivePrefix={arXiv},
primaryClass={cs.LG},
keywords={mine}
}
Downloads: 1
{"_id":"7KdiFodsEwc7JqhW7","bibbaseid":"biggs-guedj-nonvacuousgeneralisationboundsforshallowneuralnetworks-2022","author_short":["Biggs, F.","Guedj, B."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","booktitle":"Proceedings of the 39th International Conference on Machine Learning [ICML]","title":"Non-Vacuous Generalisation Bounds for Shallow Neural Networks","author":[{"firstnames":["Felix"],"propositions":[],"lastnames":["Biggs"],"suffixes":[]},{"firstnames":["Benjamin"],"propositions":[],"lastnames":["Guedj"],"suffixes":[]}],"year":"2022","pages":"1963–1981","editor":[{"propositions":[],"lastnames":["Chaudhuri"],"firstnames":["Kamalika"],"suffixes":[]},{"propositions":[],"lastnames":["Jegelka"],"firstnames":["Stefanie"],"suffixes":[]},{"propositions":[],"lastnames":["Song"],"firstnames":["Le"],"suffixes":[]},{"propositions":[],"lastnames":["Szepesvari"],"firstnames":["Csaba"],"suffixes":[]},{"propositions":[],"lastnames":["Niu"],"firstnames":["Gang"],"suffixes":[]},{"propositions":[],"lastnames":["Sabato"],"firstnames":["Sivan"],"suffixes":[]}],"volume":"162","series":"Proceedings of Machine Learning Research","month":"July","publisher":"PMLR","abstract":"We focus on a specific class of shallow neural networks with a single hidden layer, namely those with $L_2$-normalised data and either a sigmoid-shaped Gaussian error function (“erf”) activation or a Gaussian Error Linear Unit (GELU) activation. For these networks, we derive new generalisation bounds through the PAC-Bayesian theory; unlike most existing such bounds they apply to neural networks with deterministic rather than randomised parameters. Our bounds are empirically non-vacuous when the network is trained with vanilla stochastic gradient descent on MNIST and Fashion-MNIST.","url":"https://proceedings.mlr.press/v162/biggs22a.html","url_arxiv":"https://arxiv.org/abs/2202.01627","url_pdf":"https://proceedings.mlr.press/v162/biggs22a/biggs22a.pdf","url_code":"","url_video":"https://icml.cc/virtual/2022/spotlight/17948","url_slides":"https://icml.cc/media/icml-2022/Slides/17948.pdf","url_poster":"https://icml.cc/media/PosterPDFs/ICML%202022/194cf6c2de8e00c05fcf16c498adc7bf.png","url_slideslive":"https://slideslive.com/38983983/nonvacuous-generalisation-bounds-for-shallow-neural-networks","eprint":"2202.01627","archiveprefix":"arXiv","primaryclass":"cs.LG","keywords":"mine","bibtex":"@inproceedings{biggs2022shallow,\nbooktitle={Proceedings of the 39th International Conference on Machine Learning [ICML]},\ntitle={Non-Vacuous Generalisation Bounds for Shallow Neural Networks},\nauthor={Felix Biggs and Benjamin Guedj},\nyear={2022},\npages = \t {1963--1981},\neditor = \t {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan},\nvolume = \t {162},\nseries = \t {Proceedings of Machine Learning Research},\nmonth = \t {July},\npublisher = {PMLR},\nabstract = {We focus on a specific class of shallow neural networks with a single hidden layer, namely those with $L_2$-normalised data and either a sigmoid-shaped Gaussian error function (“erf”) activation or a Gaussian Error Linear Unit (GELU) activation. For these networks, we derive new generalisation bounds through the PAC-Bayesian theory; unlike most existing such bounds they apply to neural networks with deterministic rather than randomised parameters. Our bounds are empirically non-vacuous when the network is trained with vanilla stochastic gradient descent on MNIST and Fashion-MNIST.},\nurl = {https://proceedings.mlr.press/v162/biggs22a.html},\nurl_arXiv = {https://arxiv.org/abs/2202.01627},\nurl_PDF = {https://proceedings.mlr.press/v162/biggs22a/biggs22a.pdf},\nurl_Code = {},\nurl_Video = {https://icml.cc/virtual/2022/spotlight/17948},\nurl_Slides = {https://icml.cc/media/icml-2022/Slides/17948.pdf},\nurl_Poster = {https://icml.cc/media/PosterPDFs/ICML%202022/194cf6c2de8e00c05fcf16c498adc7bf.png},\nurl_SlidesLive = {https://slideslive.com/38983983/nonvacuous-generalisation-bounds-for-shallow-neural-networks},\neprint={2202.01627},\narchivePrefix={arXiv},\nprimaryClass={cs.LG},\nkeywords={mine}\n}\n\n","author_short":["Biggs, F.","Guedj, B."],"editor_short":["Chaudhuri, K.","Jegelka, S.","Song, L.","Szepesvari, C.","Niu, G.","Sabato, S."],"key":"biggs2022shallow","id":"biggs2022shallow","bibbaseid":"biggs-guedj-nonvacuousgeneralisationboundsforshallowneuralnetworks-2022","role":"author","urls":{"Paper":"https://proceedings.mlr.press/v162/biggs22a.html"," arxiv":"https://arxiv.org/abs/2202.01627"," pdf":"https://proceedings.mlr.press/v162/biggs22a/biggs22a.pdf"," code":"https://bguedj.github.io/files/bguedj-publications.bib"," video":"https://icml.cc/virtual/2022/spotlight/17948"," slides":"https://icml.cc/media/icml-2022/Slides/17948.pdf"," poster":"https://icml.cc/media/PosterPDFs/ICML%202022/194cf6c2de8e00c05fcf16c498adc7bf.png"," slideslive":"https://slideslive.com/38983983/nonvacuous-generalisation-bounds-for-shallow-neural-networks"},"keyword":["mine"],"metadata":{"authorlinks":{}},"downloads":1,"html":""},"bibtype":"inproceedings","biburl":"https://bguedj.github.io/files/bguedj-publications.bib","dataSources":["suE7RgYeZEnSYr5Fy"],"keywords":["mine"],"search_terms":["non","vacuous","generalisation","bounds","shallow","neural","networks","biggs","guedj"],"title":"Non-Vacuous Generalisation Bounds for Shallow Neural Networks","year":2022,"downloads":1}