CRUXEval: A Benchmark for Code Reasoning, Understanding and Execution. Gu, A., Rozière, B., Leather, H., Solar-Lezama, A., Synnaeve, G., & Wang, S. I. CoRR, 2024.
CRUXEval: A Benchmark for Code Reasoning, Understanding and Execution [link]Paper  doi  bibtex   
@article{DBLP:journals/corr/abs-2401-03065,
  author       = {Alex Gu and
                  Baptiste Rozi{\`{e}}re and
                  Hugh Leather and
                  Armando Solar{-}Lezama and
                  Gabriel Synnaeve and
                  Sida I. Wang},
  title        = {CRUXEval: {A} Benchmark for Code Reasoning, Understanding and Execution},
  journal      = {CoRR},
  volume       = {abs/2401.03065},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2401.03065},
  doi          = {10.48550/ARXIV.2401.03065},
  eprinttype    = {arXiv},
  eprint       = {2401.03065},
  timestamp    = {Wed, 24 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2401-03065.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0