Mega: Moving Average Equipped Gated Attention. Ma, X., Zhou, C., Kong, X., He, J., Gui, L., Neubig, G., May, J., & Zettlemoyer, L. 2022.
Mega: Moving Average Equipped Gated Attention [link]Paper  doi  bibtex   
@misc{https://doi.org/10.48550/arxiv.2209.10655,
  doi = {10.48550/ARXIV.2209.10655},
  url = {https://arxiv.org/abs/2209.10655},
  author = {Ma, Xuezhe and Zhou, Chunting and Kong, Xiang and He, Junxian and Gui, Liangke and Neubig, Graham and May, Jonathan and Zettlemoyer, Luke},
  keywords = {Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences},
  title = {Mega: Moving Average Equipped Gated Attention},
  publisher = {arXiv},
  year = {2022},
  copyright = {arXiv.org perpetual, non-exclusive license}
}

Downloads: 0