iDARTS: Differentiable Architecture Search with Stochastic Implicit Gradients

iDARTS: Differentiable Architecture Search with Stochastic Implicit Gradients. Zhang, M., Su, S., Pan, S., Chang, X., Abbasnejad, E., & Haffari, R. In International Conference on Machine Learning (ICML), pages 12557-12566 (CORE Ranked A*), 2021.
abstract bibtex

\textitDifferentiable ARchiTecture Search (DARTS) has recently become the mainstream of neural architecture search (NAS) due to its efficiency and simplicity. With a gradient-based bi-level optimization, DARTS alternately optimizes the inner model weights and the outer architecture parameter in a weight-sharing supernet. A key challenge to the scalability and quality of the learned architectures is the need for differentiating through the inner-loop optimisation. While much has been discussed about several potentially fatal factors in DARTS, the architecture gradient, a.k.a. hypergradient, has received less attention. In this paper, we tackle the hypergradient computation in DARTS based on the implicit function theorem, making it only depends on the obtained solution to the inner-loop optimization and agnostic to the optimization path. To further reduce the computational requirements, we formulate a stochastic hypergradient approximation for differentiable NAS, and theoretically show that the architecture optimization with the proposed method, named iDARTS, is expected to converge to a stationary point. Comprehensive experiments on two NAS benchmark search spaces and the common NAS search space verify the effectiveness of our proposed method. It leads to architectures outperforming, with large margins, those learned by the baseline methods.

@inproceedings{
 title = {iDARTS: Differentiable Architecture Search with Stochastic Implicit Gradients},
 type = {inproceedings},
 year = {2021},
 pages = {12557-12566 (CORE Ranked A*)},
 id = {7fed38ad-d5cb-3a46-8ba4-e88a314a543b},
 created = {2021-06-25T11:28:09.692Z},
 file_attached = {false},
 profile_id = {079852a8-52df-3ac8-a41c-8bebd97d6b2b},
 last_modified = {2022-04-10T12:11:37.265Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Zhang2021},
 folder_uuids = {f3b8cf54-f818-49eb-a899-33ac83c5e58d,2327f56c-ffc0-4246-bac0-b9fa6098ebfb},
 private_publication = {false},
 abstract = {\textitDifferentiable ARchiTecture Search (DARTS) has recently become the mainstream of neural architecture search (NAS) due to its efficiency and simplicity. With a gradient-based bi-level optimization, DARTS alternately optimizes the inner model weights and the outer architecture parameter in a weight-sharing supernet. A key challenge to the scalability and quality of the learned architectures is the need for differentiating through the inner-loop optimisation. While much has been discussed about several potentially fatal factors in DARTS, the architecture gradient, a.k.a. hypergradient, has received less attention. In this paper, we tackle the hypergradient computation in DARTS based on the implicit function theorem, making it only depends on the obtained solution to the inner-loop optimization and agnostic to the optimization path. To further reduce the computational requirements, we formulate a stochastic hypergradient approximation for differentiable NAS, and theoretically show that the architecture optimization with the proposed method, named iDARTS, is expected to converge to a stationary point. Comprehensive experiments on two NAS benchmark search spaces and the common NAS search space verify the effectiveness of our proposed method. It leads to architectures outperforming, with large margins, those learned by the baseline methods.},
 bibtype = {inproceedings},
 author = {Zhang, Miao and Su, Steven and Pan, Shirui and Chang, Xiaojun and Abbasnejad, Ehsan and Haffari, Reza},
 booktitle = {International Conference on Machine Learning (ICML)}
}

Downloads: 0

{"_id":"vfQGqjBx5L6hLXJcr","bibbaseid":"zhang-su-pan-chang-abbasnejad-haffari-idartsdifferentiablearchitecturesearchwithstochasticimplicitgradients-2021","author_short":["Zhang, M.","Su, S.","Pan, S.","Chang, X.","Abbasnejad, E.","Haffari, R."],"bibdata":{"title":"iDARTS: Differentiable Architecture Search with Stochastic Implicit Gradients","type":"inproceedings","year":"2021","pages":"12557-12566 (CORE Ranked A*)","id":"7fed38ad-d5cb-3a46-8ba4-e88a314a543b","created":"2021-06-25T11:28:09.692Z","file_attached":false,"profile_id":"079852a8-52df-3ac8-a41c-8bebd97d6b2b","last_modified":"2022-04-10T12:11:37.265Z","read":false,"starred":false,"authored":"true","confirmed":"true","hidden":false,"citation_key":"Zhang2021","folder_uuids":"f3b8cf54-f818-49eb-a899-33ac83c5e58d,2327f56c-ffc0-4246-bac0-b9fa6098ebfb","private_publication":false,"abstract":"\\textitDifferentiable ARchiTecture Search (DARTS) has recently become the mainstream of neural architecture search (NAS) due to its efficiency and simplicity. With a gradient-based bi-level optimization, DARTS alternately optimizes the inner model weights and the outer architecture parameter in a weight-sharing supernet. A key challenge to the scalability and quality of the learned architectures is the need for differentiating through the inner-loop optimisation. While much has been discussed about several potentially fatal factors in DARTS, the architecture gradient, a.k.a. hypergradient, has received less attention. In this paper, we tackle the hypergradient computation in DARTS based on the implicit function theorem, making it only depends on the obtained solution to the inner-loop optimization and agnostic to the optimization path. To further reduce the computational requirements, we formulate a stochastic hypergradient approximation for differentiable NAS, and theoretically show that the architecture optimization with the proposed method, named iDARTS, is expected to converge to a stationary point. Comprehensive experiments on two NAS benchmark search spaces and the common NAS search space verify the effectiveness of our proposed method. It leads to architectures outperforming, with large margins, those learned by the baseline methods.","bibtype":"inproceedings","author":"Zhang, Miao and Su, Steven and Pan, Shirui and Chang, Xiaojun and Abbasnejad, Ehsan and Haffari, Reza","booktitle":"International Conference on Machine Learning (ICML)","bibtex":"@inproceedings{\n title = {iDARTS: Differentiable Architecture Search with Stochastic Implicit Gradients},\n type = {inproceedings},\n year = {2021},\n pages = {12557-12566 (CORE Ranked A*)},\n id = {7fed38ad-d5cb-3a46-8ba4-e88a314a543b},\n created = {2021-06-25T11:28:09.692Z},\n file_attached = {false},\n profile_id = {079852a8-52df-3ac8-a41c-8bebd97d6b2b},\n last_modified = {2022-04-10T12:11:37.265Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Zhang2021},\n folder_uuids = {f3b8cf54-f818-49eb-a899-33ac83c5e58d,2327f56c-ffc0-4246-bac0-b9fa6098ebfb},\n private_publication = {false},\n abstract = {\\textitDifferentiable ARchiTecture Search (DARTS) has recently become the mainstream of neural architecture search (NAS) due to its efficiency and simplicity. With a gradient-based bi-level optimization, DARTS alternately optimizes the inner model weights and the outer architecture parameter in a weight-sharing supernet. A key challenge to the scalability and quality of the learned architectures is the need for differentiating through the inner-loop optimisation. While much has been discussed about several potentially fatal factors in DARTS, the architecture gradient, a.k.a. hypergradient, has received less attention. In this paper, we tackle the hypergradient computation in DARTS based on the implicit function theorem, making it only depends on the obtained solution to the inner-loop optimization and agnostic to the optimization path. To further reduce the computational requirements, we formulate a stochastic hypergradient approximation for differentiable NAS, and theoretically show that the architecture optimization with the proposed method, named iDARTS, is expected to converge to a stationary point. Comprehensive experiments on two NAS benchmark search spaces and the common NAS search space verify the effectiveness of our proposed method. It leads to architectures outperforming, with large margins, those learned by the baseline methods.},\n bibtype = {inproceedings},\n author = {Zhang, Miao and Su, Steven and Pan, Shirui and Chang, Xiaojun and Abbasnejad, Ehsan and Haffari, Reza},\n booktitle = {International Conference on Machine Learning (ICML)}\n}","author_short":["Zhang, M.","Su, S.","Pan, S.","Chang, X.","Abbasnejad, E.","Haffari, R."],"biburl":"https://bibbase.org/service/mendeley/079852a8-52df-3ac8-a41c-8bebd97d6b2b","bibbaseid":"zhang-su-pan-chang-abbasnejad-haffari-idartsdifferentiablearchitecturesearchwithstochasticimplicitgradients-2021","role":"author","urls":{},"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"https://bibbase.org/service/mendeley/079852a8-52df-3ac8-a41c-8bebd97d6b2b","dataSources":["mKA5vx6kcS6ikoYhW","ya2CyA73rpZseyrZ8","fcdT59YHNhp9Euu5k","m7B7iLMuqoXuENyof","AoeZNpAr9D2ciGMwa","Byqq56wkTmdCSSibG","gmNB3pprCEczjrwyo","SRK2HijFQemp6YcG3","dJWKgXqQFEYPXFiST","HPBzCWvwA7wkE6Dnk","uEtXodz95HRDCHN22","2252seNhipfTmjEBQ","vpu5W6z2tNtLkKjsj","HmWAviNezgcH2jK9X","ukuCjJZTpTcMx84Tz","AcaDrFjGvc6GmT8Yb"],"keywords":[],"search_terms":["idarts","differentiable","architecture","search","stochastic","implicit","gradients","zhang","su","pan","chang","abbasnejad","haffari"],"title":"iDARTS: Differentiable Architecture Search with Stochastic Implicit Gradients","year":2021}