Improving Generalization Performance by Switching from Adam to SGD. Shirish Keskar, N. & Socher, R. arXiv e-prints, 2017.
bibtex   
@article{shirish2017improving,
  title={Improving Generalization Performance by Switching from Adam to SGD},
  author={Shirish Keskar, Nitish and Socher, Richard},
  journal={arXiv e-prints},
  pages={arXiv--1712},
  year={2017}
}

Downloads: 0