On the Performance of Preconditioned Stochastic Gradient Descent

On the Performance of Preconditioned Stochastic Gradient Descent. Li, X. 2018. cite arxiv:1803.09383

This paper studies the performance of preconditioned stochastic gradient descent (PSGD), which can be regarded as an enhance stochastic Newton method with the ability to handle gradient noise and non-convexity at the same time. We have improved the implementation of PSGD, unrevealed its relationship to equilibrated stochastic gradient descent (ESGD) and batch normalization, and provided a software package (https://github.com/lixilinx/psgd_tf) implemented in Tensorflow to compare variations of PSGD and stochastic gradient descent (SGD) on a wide range of benchmark problems with commonly used neural network models, e.g., convolutional and recurrent neural networks. Comparison results clearly demonstrate the advantages of PSGD in terms of convergence speeds and generalization performances.

@misc{li2018performance,
  abstract = {This paper studies the performance of preconditioned stochastic gradient
descent (PSGD), which can be regarded as an enhance stochastic Newton method
with the ability to handle gradient noise and non-convexity at the same time.
We have improved the implementation of PSGD, unrevealed its relationship to
equilibrated stochastic gradient descent (ESGD) and batch normalization, and
provided a software package (https://github.com/lixilinx/psgd_tf) implemented
in Tensorflow to compare variations of PSGD and stochastic gradient descent
(SGD) on a wide range of benchmark problems with commonly used neural network
models, e.g., convolutional and recurrent neural networks. Comparison results
clearly demonstrate the advantages of PSGD in terms of convergence speeds and
generalization performances.},
  added-at = {2018-03-29T06:30:41.000+0200},
  author = {Li, Xi-Lin},
  biburl = {https://www.bibsonomy.org/bibtex/2643c311f6ae435783b2d4bf11f585b8b/jk_itwm},
  description = {On the Performance of Preconditioned Stochastic Gradient Descent},
  interhash = {c2a2f63380ea309bfd53423000255030},
  intrahash = {643c311f6ae435783b2d4bf11f585b8b},
  keywords = {SGD theory},
  note = {cite arxiv:1803.09383},
  timestamp = {2018-03-29T06:30:41.000+0200},
  title = {On the Performance of Preconditioned Stochastic Gradient Descent},
  url = {http://arxiv.org/abs/1803.09383},
  year = 2018
}

Downloads: 0

{"_id":"8rdWWudi3BP4td24Z","bibbaseid":"li-ontheperformanceofpreconditionedstochasticgradientdescent-2018","downloads":0,"creationDate":"2018-10-30T20:10:25.827Z","title":"On the Performance of Preconditioned Stochastic Gradient Descent","author_short":["Li, X."],"year":2018,"bibtype":"misc","biburl":"http://www.bibsonomy.org/bib/author/li lin?items=1000","bibdata":{"bibtype":"misc","type":"misc","abstract":"This paper studies the performance of preconditioned stochastic gradient descent (PSGD), which can be regarded as an enhance stochastic Newton method with the ability to handle gradient noise and non-convexity at the same time. We have improved the implementation of PSGD, unrevealed its relationship to equilibrated stochastic gradient descent (ESGD) and batch normalization, and provided a software package (https://github.com/lixilinx/psgd_tf) implemented in Tensorflow to compare variations of PSGD and stochastic gradient descent (SGD) on a wide range of benchmark problems with commonly used neural network models, e.g., convolutional and recurrent neural networks. Comparison results clearly demonstrate the advantages of PSGD in terms of convergence speeds and generalization performances.","added-at":"2018-03-29T06:30:41.000+0200","author":[{"propositions":[],"lastnames":["Li"],"firstnames":["Xi-Lin"],"suffixes":[]}],"biburl":"https://www.bibsonomy.org/bibtex/2643c311f6ae435783b2d4bf11f585b8b/jk_itwm","description":"On the Performance of Preconditioned Stochastic Gradient Descent","interhash":"c2a2f63380ea309bfd53423000255030","intrahash":"643c311f6ae435783b2d4bf11f585b8b","keywords":"SGD theory","note":"cite arxiv:1803.09383","timestamp":"2018-03-29T06:30:41.000+0200","title":"On the Performance of Preconditioned Stochastic Gradient Descent","url":"http://arxiv.org/abs/1803.09383","year":"2018","bibtex":"@misc{li2018performance,\n abstract = {This paper studies the performance of preconditioned stochastic gradient\r\ndescent (PSGD), which can be regarded as an enhance stochastic Newton method\r\nwith the ability to handle gradient noise and non-convexity at the same time.\r\nWe have improved the implementation of PSGD, unrevealed its relationship to\r\nequilibrated stochastic gradient descent (ESGD) and batch normalization, and\r\nprovided a software package (https://github.com/lixilinx/psgd_tf) implemented\r\nin Tensorflow to compare variations of PSGD and stochastic gradient descent\r\n(SGD) on a wide range of benchmark problems with commonly used neural network\r\nmodels, e.g., convolutional and recurrent neural networks. Comparison results\r\nclearly demonstrate the advantages of PSGD in terms of convergence speeds and\r\ngeneralization performances.},\n added-at = {2018-03-29T06:30:41.000+0200},\n author = {Li, Xi-Lin},\n biburl = {https://www.bibsonomy.org/bibtex/2643c311f6ae435783b2d4bf11f585b8b/jk_itwm},\n description = {On the Performance of Preconditioned Stochastic Gradient Descent},\n interhash = {c2a2f63380ea309bfd53423000255030},\n intrahash = {643c311f6ae435783b2d4bf11f585b8b},\n keywords = {SGD theory},\n note = {cite arxiv:1803.09383},\n timestamp = {2018-03-29T06:30:41.000+0200},\n title = {On the Performance of Preconditioned Stochastic Gradient Descent},\n url = {http://arxiv.org/abs/1803.09383},\n year = 2018\n}\n\n","author_short":["Li, X."],"key":"li2018performance","id":"li2018performance","bibbaseid":"li-ontheperformanceofpreconditionedstochasticgradientdescent-2018","role":"author","urls":{"Paper":"http://arxiv.org/abs/1803.09383"},"keyword":["SGD theory"],"downloads":0,"html":""},"search_terms":["performance","preconditioned","stochastic","gradient","descent","li"],"keywords":["sgd theory"],"authorIDs":[],"dataSources":["ehFTfKMMaY3aAXnje"]}