AgentQuest: A Modular Benchmark Framework to Measure Progress and Improve LLM Agents

AgentQuest: A Modular Benchmark Framework to Measure Progress and Improve LLM Agents. Gioacchini, L., Siracusano, G., Sanvito, D., Gashteovski, K., Friede, D., Bifulco, R., & Lawrence, C. CoRR, 2024.

Paper doi bibtex

@article{DBLP:journals/corr/abs-2404-06411,
  author       = {Luca Gioacchini and
                  Giuseppe Siracusano and
                  Davide Sanvito and
                  Kiril Gashteovski and
                  David Friede and
                  Roberto Bifulco and
                  Carolin Lawrence},
  title        = {AgentQuest: {A} Modular Benchmark Framework to Measure Progress and
                  Improve {LLM} Agents},
  journal      = {CoRR},
  volume       = {abs/2404.06411},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2404.06411},
  doi          = {10.48550/ARXIV.2404.06411},
  eprinttype    = {arXiv},
  eprint       = {2404.06411},
  timestamp    = {Sun, 04 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2404-06411.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0

{"_id":"TiMPukGsajbWTXEAi","bibbaseid":"gioacchini-siracusano-sanvito-gashteovski-friede-bifulco-lawrence-agentquestamodularbenchmarkframeworktomeasureprogressandimprovellmagents-2024","author_short":["Gioacchini, L.","Siracusano, G.","Sanvito, D.","Gashteovski, K.","Friede, D.","Bifulco, R.","Lawrence, C."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Luca"],"propositions":[],"lastnames":["Gioacchini"],"suffixes":[]},{"firstnames":["Giuseppe"],"propositions":[],"lastnames":["Siracusano"],"suffixes":[]},{"firstnames":["Davide"],"propositions":[],"lastnames":["Sanvito"],"suffixes":[]},{"firstnames":["Kiril"],"propositions":[],"lastnames":["Gashteovski"],"suffixes":[]},{"firstnames":["David"],"propositions":[],"lastnames":["Friede"],"suffixes":[]},{"firstnames":["Roberto"],"propositions":[],"lastnames":["Bifulco"],"suffixes":[]},{"firstnames":["Carolin"],"propositions":[],"lastnames":["Lawrence"],"suffixes":[]}],"title":"AgentQuest: A Modular Benchmark Framework to Measure Progress and Improve LLM Agents","journal":"CoRR","volume":"abs/2404.06411","year":"2024","url":"https://doi.org/10.48550/arXiv.2404.06411","doi":"10.48550/ARXIV.2404.06411","eprinttype":"arXiv","eprint":"2404.06411","timestamp":"Sun, 04 Aug 2024 01:00:00 +0200","biburl":"https://dblp.org/rec/journals/corr/abs-2404-06411.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/corr/abs-2404-06411,\n author = {Luca Gioacchini and\n Giuseppe Siracusano and\n Davide Sanvito and\n Kiril Gashteovski and\n David Friede and\n Roberto Bifulco and\n Carolin Lawrence},\n title = {AgentQuest: {A} Modular Benchmark Framework to Measure Progress and\n Improve {LLM} Agents},\n journal = {CoRR},\n volume = {abs/2404.06411},\n year = {2024},\n url = {https://doi.org/10.48550/arXiv.2404.06411},\n doi = {10.48550/ARXIV.2404.06411},\n eprinttype = {arXiv},\n eprint = {2404.06411},\n timestamp = {Sun, 04 Aug 2024 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/corr/abs-2404-06411.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Gioacchini, L.","Siracusano, G.","Sanvito, D.","Gashteovski, K.","Friede, D.","Bifulco, R.","Lawrence, C."],"key":"DBLP:journals/corr/abs-2404-06411","id":"DBLP:journals/corr/abs-2404-06411","bibbaseid":"gioacchini-siracusano-sanvito-gashteovski-friede-bifulco-lawrence-agentquestamodularbenchmarkframeworktomeasureprogressandimprovellmagents-2024","role":"author","urls":{"Paper":"https://doi.org/10.48550/arXiv.2404.06411"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://dblp.org/pid/81/8239.bib","dataSources":["7uFqemKiCfDrN67xY","2SkuutMJrBjfhnDPL"],"keywords":[],"search_terms":["agentquest","modular","benchmark","framework","measure","progress","improve","llm","agents","gioacchini","siracusano","sanvito","gashteovski","friede","bifulco","lawrence"],"title":"AgentQuest: A Modular Benchmark Framework to Measure Progress and Improve LLM Agents","year":2024}