An In-depth Performance Characterization of CPU- and GPU-based DNN Training on Modern Architectures. A. A. Awan, Subramoni, H., & Panda, D. In Proceedings of the Machine Learning on HPC Environments, of MLHPC'17, pages 8:1–8:8, New York, NY, USA, 2017. ACM.
An In-depth Performance Characterization of CPU- and GPU-based DNN Training on Modern Architectures [link]Paper  doi  bibtex   
@inproceedings{awan-mlhpc17, 
 author = {{A. A. Awan} and H. Subramoni and DK Panda},
 title = {{An In-depth Performance Characterization of CPU- and GPU-based DNN
     Training on Modern Architectures}},
 booktitle = {Proceedings of the Machine Learning on HPC Environments},
 series = {MLHPC'17},
 year = {2017},
 isbn = {978-1-4503-5137-9},
 location = {Denver, CO, USA},
 pages = {8:1--8:8},
 articleno = {8},
 numpages = {8},
 url = {http://doi.acm.org/10.1145/3146347.3146356},
 doi = {10.1145/3146347.3146356},
 acmid = {3146356},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Caffe, Deep Learning, High-Performance Computing, Pascal Architecture, Unified Memory},
}

Downloads: 0