AgentQuest: A Modular Benchmark Framework to Measure Progress and Improve LLM Agents. Gioacchini, L., Siracusano, G., Sanvito, D., Gashteovski, K., Friede, D., Bifulco, R., & Lawrence, C. CoRR, 2024.
AgentQuest: A Modular Benchmark Framework to Measure Progress and Improve LLM Agents [link]Paper  doi  bibtex   
@article{DBLP:journals/corr/abs-2404-06411,
  author       = {Luca Gioacchini and
                  Giuseppe Siracusano and
                  Davide Sanvito and
                  Kiril Gashteovski and
                  David Friede and
                  Roberto Bifulco and
                  Carolin Lawrence},
  title        = {AgentQuest: {A} Modular Benchmark Framework to Measure Progress and
                  Improve {LLM} Agents},
  journal      = {CoRR},
  volume       = {abs/2404.06411},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2404.06411},
  doi          = {10.48550/ARXIV.2404.06411},
  eprinttype    = {arXiv},
  eprint       = {2404.06411},
  timestamp    = {Sun, 04 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2404-06411.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0