Linear Approximation of Deep Neural Networks for Efficient Inference on Video Data. Rueckauer, B. & Liu, S. -. In *2019 27th European Signal Processing Conference (EUSIPCO)*, pages 1-5, Sep., 2019. Paper doi abstract bibtex Sequential data such as video are characterized by spatio-temporal correlations. As of yet, few deep learning algorithms exploit them to decrease the often massive cost during inference. This work leverages correlations in video data to linearize part of a deep neural network and thus reduce its size and computational cost. Drawing upon the simplicity of the typically used rectifier activation function, we replace the ReLU function by dynamically updating masks. The resulting layer stack is a simple chain of matrix multiplications and bias additions, that can be contracted into a single weight matrix and bias vector. Inference then reduces to an affine transformation of the input sequence with these contracted parameters. We show that the method is akin to approximating the neural network with a first-order Taylor expansion around a dynamically updating reference point. The proposed algorithm is evaluated on a denoising convolutional autoencoder.

@InProceedings{8902997,
author = {B. Rueckauer and S. -C. Liu},
booktitle = {2019 27th European Signal Processing Conference (EUSIPCO)},
title = {Linear Approximation of Deep Neural Networks for Efficient Inference on Video Data},
year = {2019},
pages = {1-5},
abstract = {Sequential data such as video are characterized by spatio-temporal correlations. As of yet, few deep learning algorithms exploit them to decrease the often massive cost during inference. This work leverages correlations in video data to linearize part of a deep neural network and thus reduce its size and computational cost. Drawing upon the simplicity of the typically used rectifier activation function, we replace the ReLU function by dynamically updating masks. The resulting layer stack is a simple chain of matrix multiplications and bias additions, that can be contracted into a single weight matrix and bias vector. Inference then reduces to an affine transformation of the input sequence with these contracted parameters. We show that the method is akin to approximating the neural network with a first-order Taylor expansion around a dynamically updating reference point. The proposed algorithm is evaluated on a denoising convolutional autoencoder.},
keywords = {approximation theory;image denoising;inference mechanisms;learning (artificial intelligence);matrix multiplication;neural nets;transfer functions;vectors;video signal processing;sequential data;spatio-temporal correlations;deep learning algorithms;video data;deep neural network;rectifier activation function;ReLU function;single weight matrix;bias vector;dynamically updating reference point;linear approximation;matrix multiplications;first-order Taylor expansion;denoising convolutional autoencoder;Neurons;Biological neural networks;Taylor series;Convolution;Noise reduction;Task analysis;Correlation;Deep neural networks;video;sequential data;linearization;compression},
doi = {10.23919/EUSIPCO.2019.8902997},
issn = {2076-1465},
month = {Sep.},
url = {https://www.eurasip.org/proceedings/eusipco/eusipco2019/proceedings/papers/1570534153.pdf},
}