OSWorld: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments. Xie, T., Zhang, D., Chen, J., Li, X., Zhao, S., Cao, R., Hua, T. J., Cheng, Z., Shin, D., Lei, F., Liu, Y., Xu, Y., Zhou, S., Savarese, S., Xiong, C., Zhong, V., & Yu, T. 2024.
Paper bibtex @misc{xie2024osworldbenchmarkingmultimodalagents,
title={OSWorld: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments},
author={Tianbao Xie and Danyang Zhang and Jixuan Chen and Xiaochuan Li and Siheng Zhao and Ruisheng Cao and Toh Jing Hua and Zhoujun Cheng and Dongchan Shin and Fangyu Lei and Yitao Liu and Yiheng Xu and Shuyan Zhou and Silvio Savarese and Caiming Xiong and Victor Zhong and Tao Yu},
year={2024},
eprint={2404.07972},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/2404.07972},
}
Downloads: 0
{"_id":"QyXwfj6ahcGR2m36A","bibbaseid":"xie-zhang-chen-li-zhao-cao-hua-cheng-etal-osworldbenchmarkingmultimodalagentsforopenendedtasksinrealcomputerenvironments-2024","author_short":["Xie, T.","Zhang, D.","Chen, J.","Li, X.","Zhao, S.","Cao, R.","Hua, T. J.","Cheng, Z.","Shin, D.","Lei, F.","Liu, Y.","Xu, Y.","Zhou, S.","Savarese, S.","Xiong, C.","Zhong, V.","Yu, T."],"bibdata":{"bibtype":"misc","type":"misc","title":"OSWorld: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments","author":[{"firstnames":["Tianbao"],"propositions":[],"lastnames":["Xie"],"suffixes":[]},{"firstnames":["Danyang"],"propositions":[],"lastnames":["Zhang"],"suffixes":[]},{"firstnames":["Jixuan"],"propositions":[],"lastnames":["Chen"],"suffixes":[]},{"firstnames":["Xiaochuan"],"propositions":[],"lastnames":["Li"],"suffixes":[]},{"firstnames":["Siheng"],"propositions":[],"lastnames":["Zhao"],"suffixes":[]},{"firstnames":["Ruisheng"],"propositions":[],"lastnames":["Cao"],"suffixes":[]},{"firstnames":["Toh","Jing"],"propositions":[],"lastnames":["Hua"],"suffixes":[]},{"firstnames":["Zhoujun"],"propositions":[],"lastnames":["Cheng"],"suffixes":[]},{"firstnames":["Dongchan"],"propositions":[],"lastnames":["Shin"],"suffixes":[]},{"firstnames":["Fangyu"],"propositions":[],"lastnames":["Lei"],"suffixes":[]},{"firstnames":["Yitao"],"propositions":[],"lastnames":["Liu"],"suffixes":[]},{"firstnames":["Yiheng"],"propositions":[],"lastnames":["Xu"],"suffixes":[]},{"firstnames":["Shuyan"],"propositions":[],"lastnames":["Zhou"],"suffixes":[]},{"firstnames":["Silvio"],"propositions":[],"lastnames":["Savarese"],"suffixes":[]},{"firstnames":["Caiming"],"propositions":[],"lastnames":["Xiong"],"suffixes":[]},{"firstnames":["Victor"],"propositions":[],"lastnames":["Zhong"],"suffixes":[]},{"firstnames":["Tao"],"propositions":[],"lastnames":["Yu"],"suffixes":[]}],"year":"2024","eprint":"2404.07972","archiveprefix":"arXiv","primaryclass":"cs.AI","url":"https://arxiv.org/abs/2404.07972","bibtex":"@misc{xie2024osworldbenchmarkingmultimodalagents,\n title={OSWorld: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments}, \n author={Tianbao Xie and Danyang Zhang and Jixuan Chen and Xiaochuan Li and Siheng Zhao and Ruisheng Cao and Toh Jing Hua and Zhoujun Cheng and Dongchan Shin and Fangyu Lei and Yitao Liu and Yiheng Xu and Shuyan Zhou and Silvio Savarese and Caiming Xiong and Victor Zhong and Tao Yu},\n year={2024},\n eprint={2404.07972},\n archivePrefix={arXiv},\n primaryClass={cs.AI},\n url={https://arxiv.org/abs/2404.07972}, \n}\n","author_short":["Xie, T.","Zhang, D.","Chen, J.","Li, X.","Zhao, S.","Cao, R.","Hua, T. J.","Cheng, Z.","Shin, D.","Lei, F.","Liu, Y.","Xu, Y.","Zhou, S.","Savarese, S.","Xiong, C.","Zhong, V.","Yu, T."],"key":"xie2024osworldbenchmarkingmultimodalagents","id":"xie2024osworldbenchmarkingmultimodalagents","bibbaseid":"xie-zhang-chen-li-zhao-cao-hua-cheng-etal-osworldbenchmarkingmultimodalagentsforopenendedtasksinrealcomputerenvironments-2024","role":"author","urls":{"Paper":"https://arxiv.org/abs/2404.07972"},"metadata":{"authorlinks":{}},"html":""},"bibtype":"misc","biburl":"https://bibbase.org/network/files/eLFhJWeFgePnGyx8Z","dataSources":["FCFsT25DRsdsN8aKw","pMrRPKXepAmKuGcND","d72xZhMLNvhbmocKT"],"keywords":[],"search_terms":["osworld","benchmarking","multimodal","agents","open","ended","tasks","real","computer","environments","xie","zhang","chen","li","zhao","cao","hua","cheng","shin","lei","liu","xu","zhou","savarese","xiong","zhong","yu"],"title":"OSWorld: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments","year":2024}