Residual Networks Behave Like Ensembles of Relatively Shallow Networks. Veit, A, Wilber, M, & Belongie, S ArXiv e-prints, May, 2016.
Residual Networks Behave Like Ensembles of Relatively Shallow Networks [link]Paper  bibtex   
@article{Veit:2016tc,
author = {Veit, A and Wilber, M and Belongie, S},
title = {{Residual Networks Behave Like Ensembles of Relatively Shallow Networks}},
journal = {ArXiv e-prints},
year = {2016},
volume = {cs.CV},
month = may,
annote = {Very elegant paper giving insights on the success of ResNet. Essentially, ResNet is powerful because it's ensemble of many relatively independent networks.

Also, being deep is not necessary, as effectively, short paths are most critical.

However, there's some doubt on Section 4.1. I think VGG doesn't have Batch Normalization layers, making the statistics of different layers very different, and it's doomed to fail. So maybe the comparison is not fair.},
keywords = {deep learning},
read = {Yes},
rating = {5},
date-added = {2017-03-28T21:38:05GMT},
date-modified = {2017-03-30T14:58:45GMT},
url = {http://arxiv.org/abs/1605.06431},
uri = {\url{papers3://publication/uuid/559EAEE5-EEC0-4CBD-9788-DA7DAF667964}}
}

Downloads: 0