LiveCodeBench: Holistic and Contamination Free Evaluation of Large Language Models for Code. Jain, N., Han, K., Gu, A., Li, W., Yan, F., Zhang, T., Wang, S., Solar-Lezama, A., Sen, K., & Stoica, I. CoRR, 2024.
LiveCodeBench: Holistic and Contamination Free Evaluation of Large Language Models for Code [link]Paper  doi  bibtex   
@article{DBLP:journals/corr/abs-2403-07974,
  author       = {Naman Jain and
                  King Han and
                  Alex Gu and
                  Wen{-}Ding Li and
                  Fanjia Yan and
                  Tianjun Zhang and
                  Sida Wang and
                  Armando Solar{-}Lezama and
                  Koushik Sen and
                  Ion Stoica},
  title        = {LiveCodeBench: Holistic and Contamination Free Evaluation of Large
                  Language Models for Code},
  journal      = {CoRR},
  volume       = {abs/2403.07974},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2403.07974},
  doi          = {10.48550/ARXIV.2403.07974},
  eprinttype    = {arXiv},
  eprint       = {2403.07974},
  timestamp    = {Thu, 04 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2403-07974.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0