Learn Your Tokens: Word-Pooled Tokenization for Language Modeling. Thawani, A., Ghanekar, S., Zhu, X., & Pujara, J. In Findings of the Association for Computational Linguistics: EMNLP, 2023.
bibtex   
@inproceedings{thawani:emnlpf23,
    author = "Thawani, Avijit and Ghanekar, Saurabh and Zhu, Xiaoyuan and Pujara, Jay",
    acceptrate = "46.2\%",
    arxiv_url = "https://arxiv.org/pdf/2310.11628",
    bib_url = "/pubs/bib/thawani-emnlpf23.bib",
    booktitle = "Findings of the Association for Computational Linguistics: EMNLP",
    doi_url = "https://doi.org/10.18653/v1/2023.findings-emnlp.662",
    pdf_url = "/pubs/2023/thawani-emnlpf23/thawani-emnlpf23.pdf",
    sec = "conf",
    title = "Learn Your Tokens: Word-Pooled Tokenization for Language Modeling",
    year = "2023"
}

Downloads: 0