SPARQ-SGD: Event-Triggered and Compressed Communication in Decentralized Stochastic Optimization

SPARQ-SGD: Event-Triggered and Compressed Communication in Decentralized Stochastic Optimization. Singh, N., Data, D., George, J., & Diggavi, S. 2020 59th IEEE Conference on Decision and Control (CDC), 2020.

Arxiv doi abstract bibtex 2 downloads

In this paper, we propose and analyze SPARQ-SGD, which is an event-triggered and compressed algorithm for decentralized training of large-scale machine learning models. Each node can locally compute a condition (event) which triggers a communication where quantized and sparsified local model parameters are sent. In SPARQ-SGD each node takes at least a fixed number (H) of local gradient steps and then checks if the model parameters have significantly changed compared to its last update; it communicates further compressed model parameters only when there is a significant change, as specified by a (design) criterion. We prove that the SPARQ-SGD converges as O(1nT) and O(1nT√) in the strongly-convex and non-convex settings, respectively, demonstrating that such aggressive compression, including event-triggered communication, model sparsification and quantization does not affect the overall convergence rate as compared to uncompressed decentralized training; thereby theoretically yielding communication efficiency for "free". We evaluate SPARQ-SGD over real datasets to demonstrate significant amount of savings in communication over the state-of-the-art.

@article{singh2019sparq,
 abstract = {In this paper, we propose and analyze SPARQ-SGD, which is an event-triggered and compressed algorithm for decentralized training of large-scale machine learning models. Each node can locally compute a condition (event) which triggers a communication where quantized and sparsified local model parameters are sent. In SPARQ-SGD each node takes at least a fixed number (H) of local gradient steps and then checks if the model parameters have significantly changed compared to its last update; it communicates further compressed model parameters only when there is a significant change, as specified by a (design) criterion. We prove that the SPARQ-SGD converges as O(1nT) and O(1nT√) in the strongly-convex and non-convex settings, respectively, demonstrating that such aggressive compression, including event-triggered communication, model sparsification and quantization does not affect the overall convergence rate as compared to uncompressed decentralized training; thereby theoretically yielding communication efficiency for "free". We evaluate SPARQ-SGD over real datasets to demonstrate significant amount of savings in communication over the state-of-the-art.},
 author = {Singh, Navjot and Data, Deepesh and George, Jemin and Diggavi, Suhas},
 journal = {2020 59th IEEE Conference on Decision and Control (CDC)},
 tags = {conf,CEDL,DML},
 title = {SPARQ-SGD: Event-Triggered and Compressed Communication in Decentralized Stochastic Optimization},
 type = {4},
 url_arxiv = {https://arxiv.org/abs/1910.14280},
 year = {2020},
 pages={3449-3456},
 doi={10.1109/CDC42340.2020.9303828},
 ISSN={2576-2370},
}

Downloads: 2

{"_id":"eEstbuzQtDA4d8HuQ","bibbaseid":"singh-data-george-diggavi-sparqsgdeventtriggeredandcompressedcommunicationindecentralizedstochasticoptimization-2020","author_short":["Singh, N.","Data, D.","George, J.","Diggavi, S."],"bibdata":{"bibtype":"article","type":"4","abstract":"In this paper, we propose and analyze SPARQ-SGD, which is an event-triggered and compressed algorithm for decentralized training of large-scale machine learning models. Each node can locally compute a condition (event) which triggers a communication where quantized and sparsified local model parameters are sent. In SPARQ-SGD each node takes at least a fixed number (H) of local gradient steps and then checks if the model parameters have significantly changed compared to its last update; it communicates further compressed model parameters only when there is a significant change, as specified by a (design) criterion. We prove that the SPARQ-SGD converges as O(1nT) and O(1nT√) in the strongly-convex and non-convex settings, respectively, demonstrating that such aggressive compression, including event-triggered communication, model sparsification and quantization does not affect the overall convergence rate as compared to uncompressed decentralized training; thereby theoretically yielding communication efficiency for \"free\". We evaluate SPARQ-SGD over real datasets to demonstrate significant amount of savings in communication over the state-of-the-art.","author":[{"propositions":[],"lastnames":["Singh"],"firstnames":["Navjot"],"suffixes":[]},{"propositions":[],"lastnames":["Data"],"firstnames":["Deepesh"],"suffixes":[]},{"propositions":[],"lastnames":["George"],"firstnames":["Jemin"],"suffixes":[]},{"propositions":[],"lastnames":["Diggavi"],"firstnames":["Suhas"],"suffixes":[]}],"journal":"2020 59th IEEE Conference on Decision and Control (CDC)","tags":"conf,CEDL,DML","title":"SPARQ-SGD: Event-Triggered and Compressed Communication in Decentralized Stochastic Optimization","url_arxiv":"https://arxiv.org/abs/1910.14280","year":"2020","pages":"3449-3456","doi":"10.1109/CDC42340.2020.9303828","issn":"2576-2370","bibtex":"@article{singh2019sparq,\n abstract = {In this paper, we propose and analyze SPARQ-SGD, which is an event-triggered and compressed algorithm for decentralized training of large-scale machine learning models. Each node can locally compute a condition (event) which triggers a communication where quantized and sparsified local model parameters are sent. In SPARQ-SGD each node takes at least a fixed number (H) of local gradient steps and then checks if the model parameters have significantly changed compared to its last update; it communicates further compressed model parameters only when there is a significant change, as specified by a (design) criterion. We prove that the SPARQ-SGD converges as O(1nT) and O(1nT√) in the strongly-convex and non-convex settings, respectively, demonstrating that such aggressive compression, including event-triggered communication, model sparsification and quantization does not affect the overall convergence rate as compared to uncompressed decentralized training; thereby theoretically yielding communication efficiency for \"free\". We evaluate SPARQ-SGD over real datasets to demonstrate significant amount of savings in communication over the state-of-the-art.},\n author = {Singh, Navjot and Data, Deepesh and George, Jemin and Diggavi, Suhas},\n journal = {2020 59th IEEE Conference on Decision and Control (CDC)},\n tags = {conf,CEDL,DML},\n title = {SPARQ-SGD: Event-Triggered and Compressed Communication in Decentralized Stochastic Optimization},\n type = {4},\n url_arxiv = {https://arxiv.org/abs/1910.14280},\n year = {2020},\n pages={3449-3456},\n doi={10.1109/CDC42340.2020.9303828},\n ISSN={2576-2370},\n}\n\n","author_short":["Singh, N.","Data, D.","George, J.","Diggavi, S."],"key":"singh2019sparq","id":"singh2019sparq","bibbaseid":"singh-data-george-diggavi-sparqsgdeventtriggeredandcompressedcommunicationindecentralizedstochasticoptimization-2020","role":"author","urls":{" arxiv":"https://arxiv.org/abs/1910.14280"},"metadata":{"authorlinks":{}},"downloads":2,"html":""},"bibtype":"article","biburl":"https://bibbase.org/network/files/e2kjGxYgtBo8SWSbC","dataSources":["hicKnsKYNEFXC4CgH","jxCYzXXYRqw2fiEXQ","wCByFFrQMyRwfzrJ6","yuqM5ah4HMsTyDrMa","YaM87hGQiepg5qijZ","n9wmfkt5w8CPqCepg","soj2cS6PgG8NPmWGr","FaDBDiyFAJY5pL28h","ycfdiwWPzC2rE6H77"],"keywords":[],"search_terms":["sparq","sgd","event","triggered","compressed","communication","decentralized","stochastic","optimization","singh","data","george","diggavi"],"title":"SPARQ-SGD: Event-Triggered and Compressed Communication in Decentralized Stochastic Optimization","year":2020,"downloads":2}