Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer. Shazeer, N., Mirhoseini, A., Maziarz, K., Davis, A., Le, Q. V., Hinton, G. E., & Dean, J. CoRR, 2017.  
Paper  bibtex   @article{DBLP:journals/corr/ShazeerMMDLHD17,
  author       = {Noam Shazeer and
                  Azalia Mirhoseini and
                  Krzysztof Maziarz and
                  Andy Davis and
                  Quoc V. Le and
                  Geoffrey E. Hinton and
                  Jeff Dean},
  title        = {Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts
                  Layer},
  journal      = {CoRR},
  volume       = {abs/1701.06538},
  year         = {2017},
  url          = {http://arxiv.org/abs/1701.06538},
  eprinttype    = {arXiv},
  eprint       = {1701.06538},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/ShazeerMMDLHD17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
} 
Downloads: 0
{"_id":"CKmFTepme4QhHuHt8","bibbaseid":"shazeer-mirhoseini-maziarz-davis-le-hinton-dean-outrageouslylargeneuralnetworksthesparselygatedmixtureofexpertslayer-2017","author_short":["Shazeer, N.","Mirhoseini, A.","Maziarz, K.","Davis, A.","Le, Q. V.","Hinton, G. E.","Dean, J."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Noam"],"propositions":[],"lastnames":["Shazeer"],"suffixes":[]},{"firstnames":["Azalia"],"propositions":[],"lastnames":["Mirhoseini"],"suffixes":[]},{"firstnames":["Krzysztof"],"propositions":[],"lastnames":["Maziarz"],"suffixes":[]},{"firstnames":["Andy"],"propositions":[],"lastnames":["Davis"],"suffixes":[]},{"firstnames":["Quoc","V."],"propositions":[],"lastnames":["Le"],"suffixes":[]},{"firstnames":["Geoffrey","E."],"propositions":[],"lastnames":["Hinton"],"suffixes":[]},{"firstnames":["Jeff"],"propositions":[],"lastnames":["Dean"],"suffixes":[]}],"title":"Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer","journal":"CoRR","volume":"abs/1701.06538","year":"2017","url":"http://arxiv.org/abs/1701.06538","eprinttype":"arXiv","eprint":"1701.06538","timestamp":"Mon, 13 Aug 2018 01:00:00 +0200","biburl":"https://dblp.org/rec/journals/corr/ShazeerMMDLHD17.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/corr/ShazeerMMDLHD17,\n  author       = {Noam Shazeer and\n                  Azalia Mirhoseini and\n                  Krzysztof Maziarz and\n                  Andy Davis and\n                  Quoc V. Le and\n                  Geoffrey E. Hinton and\n                  Jeff Dean},\n  title        = {Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts\n                  Layer},\n  journal      = {CoRR},\n  volume       = {abs/1701.06538},\n  year         = {2017},\n  url          = {http://arxiv.org/abs/1701.06538},\n  eprinttype    = {arXiv},\n  eprint       = {1701.06538},\n  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},\n  biburl       = {https://dblp.org/rec/journals/corr/ShazeerMMDLHD17.bib},\n  bibsource    = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Shazeer, N.","Mirhoseini, A.","Maziarz, K.","Davis, A.","Le, Q. V.","Hinton, G. E.","Dean, J."],"key":"DBLP:journals/corr/ShazeerMMDLHD17","id":"DBLP:journals/corr/ShazeerMMDLHD17","bibbaseid":"shazeer-mirhoseini-maziarz-davis-le-hinton-dean-outrageouslylargeneuralnetworksthesparselygatedmixtureofexpertslayer-2017","role":"author","urls":{"Paper":"http://arxiv.org/abs/1701.06538"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"http://dblp.org/pers/tb2/d/Dean:Jeffrey","dataSources":["rMEZSGRpGTSfEZaHM","h7kKWXpJh2iaX92T5","cx4WvnDhXJhiLqdQo"],"keywords":[],"search_terms":["outrageously","large","neural","networks","sparsely","gated","mixture","experts","layer","shazeer","mirhoseini","maziarz","davis","le","hinton","dean"],"title":"Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer","year":2017}