LiveCodeBench: Holistic and Contamination Free Evaluation of Large Language Models for Code

LiveCodeBench: Holistic and Contamination Free Evaluation of Large Language Models for Code. Jain, N., Han, K., Gu, A., Li, W., Yan, F., Zhang, T., Wang, S., Solar-Lezama, A., Sen, K., & Stoica, I. CoRR, 2024.

Paper doi bibtex

@article{DBLP:journals/corr/abs-2403-07974,
  author       = {Naman Jain and
                  King Han and
                  Alex Gu and
                  Wen{-}Ding Li and
                  Fanjia Yan and
                  Tianjun Zhang and
                  Sida Wang and
                  Armando Solar{-}Lezama and
                  Koushik Sen and
                  Ion Stoica},
  title        = {LiveCodeBench: Holistic and Contamination Free Evaluation of Large
                  Language Models for Code},
  journal      = {CoRR},
  volume       = {abs/2403.07974},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2403.07974},
  doi          = {10.48550/ARXIV.2403.07974},
  eprinttype   = {arXiv},
  eprint       = {2403.07974},
  timestamp    = {Thu, 04 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2403-07974.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0

{"_id":"fGSfjMhaSxD9BtMDH","bibbaseid":"jain-han-gu-li-yan-zhang-wang-solarlezama-etal-livecodebenchholisticandcontaminationfreeevaluationoflargelanguagemodelsforcode-2024","author_short":["Jain, N.","Han, K.","Gu, A.","Li, W.","Yan, F.","Zhang, T.","Wang, S.","Solar-Lezama, A.","Sen, K.","Stoica, I."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Naman"],"propositions":[],"lastnames":["Jain"],"suffixes":[]},{"firstnames":["King"],"propositions":[],"lastnames":["Han"],"suffixes":[]},{"firstnames":["Alex"],"propositions":[],"lastnames":["Gu"],"suffixes":[]},{"firstnames":["Wen-Ding"],"propositions":[],"lastnames":["Li"],"suffixes":[]},{"firstnames":["Fanjia"],"propositions":[],"lastnames":["Yan"],"suffixes":[]},{"firstnames":["Tianjun"],"propositions":[],"lastnames":["Zhang"],"suffixes":[]},{"firstnames":["Sida"],"propositions":[],"lastnames":["Wang"],"suffixes":[]},{"firstnames":["Armando"],"propositions":[],"lastnames":["Solar-Lezama"],"suffixes":[]},{"firstnames":["Koushik"],"propositions":[],"lastnames":["Sen"],"suffixes":[]},{"firstnames":["Ion"],"propositions":[],"lastnames":["Stoica"],"suffixes":[]}],"title":"LiveCodeBench: Holistic and Contamination Free Evaluation of Large Language Models for Code","journal":"CoRR","volume":"abs/2403.07974","year":"2024","url":"https://doi.org/10.48550/arXiv.2403.07974","doi":"10.48550/ARXIV.2403.07974","eprinttype":"arXiv","eprint":"2403.07974","timestamp":"Thu, 04 Apr 2024 01:00:00 +0200","biburl":"https://dblp.org/rec/journals/corr/abs-2403-07974.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/corr/abs-2403-07974,\n author = {Naman Jain and\n King Han and\n Alex Gu and\n Wen{-}Ding Li and\n Fanjia Yan and\n Tianjun Zhang and\n Sida Wang and\n Armando Solar{-}Lezama and\n Koushik Sen and\n Ion Stoica},\n title = {LiveCodeBench: Holistic and Contamination Free Evaluation of Large\n Language Models for Code},\n journal = {CoRR},\n volume = {abs/2403.07974},\n year = {2024},\n url = {https://doi.org/10.48550/arXiv.2403.07974},\n doi = {10.48550/ARXIV.2403.07974},\n eprinttype = {arXiv},\n eprint = {2403.07974},\n timestamp = {Thu, 04 Apr 2024 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/corr/abs-2403-07974.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Jain, N.","Han, K.","Gu, A.","Li, W.","Yan, F.","Zhang, T.","Wang, S.","Solar-Lezama, A.","Sen, K.","Stoica, I."],"key":"DBLP:journals/corr/abs-2403-07974","id":"DBLP:journals/corr/abs-2403-07974","bibbaseid":"jain-han-gu-li-yan-zhang-wang-solarlezama-etal-livecodebenchholisticandcontaminationfreeevaluationoflargelanguagemodelsforcode-2024","role":"author","urls":{"Paper":"https://doi.org/10.48550/arXiv.2403.07974"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"http://dblp.org/pers/tb2/s/Solar=Lezama:Armando","dataSources":["DAnxudWukpMXCtWy8"],"keywords":[],"search_terms":["livecodebench","holistic","contamination","free","evaluation","large","language","models","code","jain","han","gu","li","yan","zhang","wang","solar-lezama","sen","stoica"],"title":"LiveCodeBench: Holistic and Contamination Free Evaluation of Large Language Models for Code","year":2024}