CRUXEval: A Benchmark for Code Reasoning, Understanding and Execution. Gu, A., Rozière, B., Leather, H. J., Solar-Lezama, A., Synnaeve, G., & Wang, S. In Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024, pages 16568–16621, 2024.
CRUXEval: A Benchmark for Code Reasoning, Understanding and Execution [link]Paper  bibtex   
@inproceedings{DBLP:conf/icml/GuRLSS024,
  author       = {Alex Gu and
                  Baptiste Rozi{\`{e}}re and
                  Hugh James Leather and
                  Armando Solar{-}Lezama and
                  Gabriel Synnaeve and
                  Sida Wang},
  title        = {CRUXEval: {A} Benchmark for Code Reasoning, Understanding and Execution},
  booktitle    = {Forty-first International Conference on Machine Learning, {ICML} 2024,
                  Vienna, Austria, July 21-27, 2024},
  pages        = {16568--16621},
  year         = {2024},
  crossref     = {DBLP:conf/icml/2024},
  url          = {https://proceedings.mlr.press/v235/gu24c.html},
  timestamp    = {Mon, 09 Feb 2026 15:35:36 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/GuRLSS024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0