Understanding Convolutional Neural Network Training with Information Theory. Yu, S., Jenssen, R., & Principe, J., C. Technical Report 2018.
abstract   bibtex   
Using information theoretic concepts to understand and explore the inner organization of deep neural networks (DNNs) remains a big challenge. Recently, the concept of an information plane began to shed light on the analysis of multilayer perceptrons (MLPs). We provided an in-depth insight into stacked autoencoders (SAEs) using a novel matrix-based Renyi's \alpha-entropy functional, enabling for the first time the analysis of the dynamics of learning using information flow in real-world scenario involving complex network architecture and large data. Despite the great potential of these past works, there are several open questions when it comes to applying information theoretic concepts to understand convolutional neural networks (CNNs). These include for instance the accurate estimation of information quantities among multiple variables, and the many different training methodologies. By extending the novel matrix-based Renyi's \alpha-entropy functional to a multivariate scenario, this paper presents a systematic method to analyze CNNs training using information theory. Our results validate two fundamental data processing inequalities in CNNs, and also have direct impacts on previous work concerning the training and design of CNNs.
@techreport{
 title = {Understanding Convolutional Neural Network Training with Information Theory},
 type = {techreport},
 year = {2018},
 identifiers = {[object Object]},
 id = {eb74dabb-173e-3aec-948c-800a7b8eb676},
 created = {2019-04-01T11:18:54.221Z},
 file_attached = {false},
 profile_id = {00330907-7fcb-39a3-a388-db79231d75d8},
 group_id = {a4d8c5ee-7397-327b-a54b-330e0b4d7061},
 last_modified = {2019-04-01T11:18:54.221Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Yu2018},
 private_publication = {false},
 abstract = {Using information theoretic concepts to understand and explore the inner organization of deep neural networks (DNNs) remains a big challenge. Recently, the concept of an information plane began to shed light on the analysis of multilayer perceptrons (MLPs). We provided an in-depth insight into stacked autoencoders (SAEs) using a novel matrix-based Renyi's \alpha-entropy functional, enabling for the first time the analysis of the dynamics of learning using information flow in real-world scenario involving complex network architecture and large data. Despite the great potential of these past works, there are several open questions when it comes to applying information theoretic concepts to understand convolutional neural networks (CNNs). These include for instance the accurate estimation of information quantities among multiple variables, and the many different training methodologies. By extending the novel matrix-based Renyi's \alpha-entropy functional to a multivariate scenario, this paper presents a systematic method to analyze CNNs training using information theory. Our results validate two fundamental data processing inequalities in CNNs, and also have direct impacts on previous work concerning the training and design of CNNs.},
 bibtype = {techreport},
 author = {Yu, Shujian and Jenssen, Robert and Principe, Jose C.}
}

Downloads: 0