Stable Architectures for Deep Neural Networks

Stable Architectures for Deep Neural Networks. Haber, E. & Ruthotto, L. Inverse Problems, 34(1):014004, January, 2018. 512 citations (Semantic Scholar/arXiv) [2023-07-05] 512 citations (Semantic Scholar/DOI) [2023-07-05] arXiv:1705.03341 [cs, math]

Paper doi abstract bibtex

Deep neural networks have become invaluable tools for supervised machine learning, e.g., classiﬁcation of text or images. While often oﬀering superior results over traditional techniques and successfully expressing complicated patterns in data, deep architectures are known to be challenging to design and train such that they generalize well to new data. Critical issues with deep architectures are numerical instabilities in derivative-based learning algorithms commonly called exploding or vanishing gradients. In this paper, we propose new forward propagation techniques inspired by systems of Ordinary Diﬀerential Equations (ODE) that overcome this challenge and lead to well-posed learning problems for arbitrarily deep networks.

@article{haber_stable_2018,
	title = {Stable {Architectures} for {Deep} {Neural} {Networks}},
	volume = {34},
	issn = {0266-5611, 1361-6420},
	url = {http://arxiv.org/abs/1705.03341},
	doi = {10.1088/1361-6420/aa9a90},
	abstract = {Deep neural networks have become invaluable tools for supervised machine learning, e.g., classiﬁcation of text or images. While often oﬀering superior results over traditional techniques and successfully expressing complicated patterns in data, deep architectures are known to be challenging to design and train such that they generalize well to new data. Critical issues with deep architectures are numerical instabilities in derivative-based learning algorithms commonly called exploding or vanishing gradients. In this paper, we propose new forward propagation techniques inspired by systems of Ordinary Diﬀerential Equations (ODE) that overcome this challenge and lead to well-posed learning problems for arbitrarily deep networks.},
	language = {en},
	number = {1},
	urldate = {2023-07-05},
	journal = {Inverse Problems},
	author = {Haber, Eldad and Ruthotto, Lars},
	month = jan,
	year = {2018},
	note = {512 citations (Semantic Scholar/arXiv) [2023-07-05]
512 citations (Semantic Scholar/DOI) [2023-07-05]
arXiv:1705.03341 [cs, math]},
	keywords = {/unread, 68T05, 65L09, 49N90, Computer Science - Machine Learning, I.2.6, Mathematics - Numerical Analysis, Mathematics - Optimization and Control},
	pages = {014004},
}

Downloads: 0

{"_id":"3aeKLsGnnicWo5Jk7","bibbaseid":"haber-ruthotto-stablearchitecturesfordeepneuralnetworks-2018","author_short":["Haber, E.","Ruthotto, L."],"bibdata":{"bibtype":"article","type":"article","title":"Stable Architectures for Deep Neural Networks","volume":"34","issn":"0266-5611, 1361-6420","url":"http://arxiv.org/abs/1705.03341","doi":"10.1088/1361-6420/aa9a90","abstract":"Deep neural networks have become invaluable tools for supervised machine learning, e.g., classiﬁcation of text or images. While often oﬀering superior results over traditional techniques and successfully expressing complicated patterns in data, deep architectures are known to be challenging to design and train such that they generalize well to new data. Critical issues with deep architectures are numerical instabilities in derivative-based learning algorithms commonly called exploding or vanishing gradients. In this paper, we propose new forward propagation techniques inspired by systems of Ordinary Diﬀerential Equations (ODE) that overcome this challenge and lead to well-posed learning problems for arbitrarily deep networks.","language":"en","number":"1","urldate":"2023-07-05","journal":"Inverse Problems","author":[{"propositions":[],"lastnames":["Haber"],"firstnames":["Eldad"],"suffixes":[]},{"propositions":[],"lastnames":["Ruthotto"],"firstnames":["Lars"],"suffixes":[]}],"month":"January","year":"2018","note":"512 citations (Semantic Scholar/arXiv) [2023-07-05] 512 citations (Semantic Scholar/DOI) [2023-07-05] arXiv:1705.03341 [cs, math]","keywords":"/unread, 68T05, 65L09, 49N90, Computer Science - Machine Learning, I.2.6, Mathematics - Numerical Analysis, Mathematics - Optimization and Control","pages":"014004","bibtex":"@article{haber_stable_2018,\n\ttitle = {Stable {Architectures} for {Deep} {Neural} {Networks}},\n\tvolume = {34},\n\tissn = {0266-5611, 1361-6420},\n\turl = {http://arxiv.org/abs/1705.03341},\n\tdoi = {10.1088/1361-6420/aa9a90},\n\tabstract = {Deep neural networks have become invaluable tools for supervised machine learning, e.g., classiﬁcation of text or images. While often oﬀering superior results over traditional techniques and successfully expressing complicated patterns in data, deep architectures are known to be challenging to design and train such that they generalize well to new data. Critical issues with deep architectures are numerical instabilities in derivative-based learning algorithms commonly called exploding or vanishing gradients. In this paper, we propose new forward propagation techniques inspired by systems of Ordinary Diﬀerential Equations (ODE) that overcome this challenge and lead to well-posed learning problems for arbitrarily deep networks.},\n\tlanguage = {en},\n\tnumber = {1},\n\turldate = {2023-07-05},\n\tjournal = {Inverse Problems},\n\tauthor = {Haber, Eldad and Ruthotto, Lars},\n\tmonth = jan,\n\tyear = {2018},\n\tnote = {512 citations (Semantic Scholar/arXiv) [2023-07-05]\n512 citations (Semantic Scholar/DOI) [2023-07-05]\narXiv:1705.03341 [cs, math]},\n\tkeywords = {/unread, 68T05, 65L09, 49N90, Computer Science - Machine Learning, I.2.6, Mathematics - Numerical Analysis, Mathematics - Optimization and Control},\n\tpages = {014004},\n}\n\n","author_short":["Haber, E.","Ruthotto, L."],"key":"haber_stable_2018","id":"haber_stable_2018","bibbaseid":"haber-ruthotto-stablearchitecturesfordeepneuralnetworks-2018","role":"author","urls":{"Paper":"http://arxiv.org/abs/1705.03341"},"keyword":["/unread","68T05","65L09","49N90","Computer Science - Machine Learning","I.2.6","Mathematics - Numerical Analysis","Mathematics - Optimization and Control"],"metadata":{"authorlinks":{}},"html":""},"bibtype":"article","biburl":"https://bibbase.org/zotero/victorjhu","dataSources":["CmHEoydhafhbkXXt5"],"keywords":["/unread","68t05","65l09","49n90","computer science - machine learning","i.2.6","mathematics - numerical analysis","mathematics - optimization and control"],"search_terms":["stable","architectures","deep","neural","networks","haber","ruthotto"],"title":"Stable Architectures for Deep Neural Networks","year":2018}