BabyVLM-V2: Toward Developmentally Grounded Pretraining and Benchmarking of Vision Foundation Models. Wang, S., Wang, W., Wang, Z., Whitton, M., Wakeham, M., Chandra, A., Huang, J., Zhu, P., Chen, H., Li, D., Li, J., Li, S., Zagula, A., Zhao, A., Zhu, A., Nakamura, S., Yamamoto, Y., Yokono, J. J., Mueller, A., Plummer, B. A., Saenko, K., Saligrama, V., & Gong, B. CoRR, 2025.
Paper doi bibtex @article{DBLP:journals/corr/abs-2512-10932,
author = {Shengao Wang and
Wenqi Wang and
Zecheng Wang and
Max Whitton and
Michael Wakeham and
Arjun Chandra and
Joey Huang and
Pengyue Zhu and
Helen Chen and
David Li and
Jeffrey Li and
Shawn Li and
Andrew Zagula and
Amy Zhao and
Andrew Zhu and
Sayaka Nakamura and
Yuki Yamamoto and
Jerry Jun Yokono and
Aaron Mueller and
Bryan A. Plummer and
Kate Saenko and
Venkatesh Saligrama and
Boqing Gong},
title = {BabyVLM-V2: Toward Developmentally Grounded Pretraining and Benchmarking
of Vision Foundation Models},
journal = {CoRR},
volume = {abs/2512.10932},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2512.10932},
doi = {10.48550/ARXIV.2512.10932},
eprinttype = {arXiv},
eprint = {2512.10932},
timestamp = {Fri, 23 Jan 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2512-10932.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
Downloads: 0
{"_id":"5QZLaFBp3TSJArbHF","bibbaseid":"wang-wang-wang-whitton-wakeham-chandra-huang-zhu-etal-babyvlmv2towarddevelopmentallygroundedpretrainingandbenchmarkingofvisionfoundationmodels-2025","author_short":["Wang, S.","Wang, W.","Wang, Z.","Whitton, M.","Wakeham, M.","Chandra, A.","Huang, J.","Zhu, P.","Chen, H.","Li, D.","Li, J.","Li, S.","Zagula, A.","Zhao, A.","Zhu, A.","Nakamura, S.","Yamamoto, Y.","Yokono, J. J.","Mueller, A.","Plummer, B. A.","Saenko, K.","Saligrama, V.","Gong, B."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Shengao"],"propositions":[],"lastnames":["Wang"],"suffixes":[]},{"firstnames":["Wenqi"],"propositions":[],"lastnames":["Wang"],"suffixes":[]},{"firstnames":["Zecheng"],"propositions":[],"lastnames":["Wang"],"suffixes":[]},{"firstnames":["Max"],"propositions":[],"lastnames":["Whitton"],"suffixes":[]},{"firstnames":["Michael"],"propositions":[],"lastnames":["Wakeham"],"suffixes":[]},{"firstnames":["Arjun"],"propositions":[],"lastnames":["Chandra"],"suffixes":[]},{"firstnames":["Joey"],"propositions":[],"lastnames":["Huang"],"suffixes":[]},{"firstnames":["Pengyue"],"propositions":[],"lastnames":["Zhu"],"suffixes":[]},{"firstnames":["Helen"],"propositions":[],"lastnames":["Chen"],"suffixes":[]},{"firstnames":["David"],"propositions":[],"lastnames":["Li"],"suffixes":[]},{"firstnames":["Jeffrey"],"propositions":[],"lastnames":["Li"],"suffixes":[]},{"firstnames":["Shawn"],"propositions":[],"lastnames":["Li"],"suffixes":[]},{"firstnames":["Andrew"],"propositions":[],"lastnames":["Zagula"],"suffixes":[]},{"firstnames":["Amy"],"propositions":[],"lastnames":["Zhao"],"suffixes":[]},{"firstnames":["Andrew"],"propositions":[],"lastnames":["Zhu"],"suffixes":[]},{"firstnames":["Sayaka"],"propositions":[],"lastnames":["Nakamura"],"suffixes":[]},{"firstnames":["Yuki"],"propositions":[],"lastnames":["Yamamoto"],"suffixes":[]},{"firstnames":["Jerry","Jun"],"propositions":[],"lastnames":["Yokono"],"suffixes":[]},{"firstnames":["Aaron"],"propositions":[],"lastnames":["Mueller"],"suffixes":[]},{"firstnames":["Bryan","A."],"propositions":[],"lastnames":["Plummer"],"suffixes":[]},{"firstnames":["Kate"],"propositions":[],"lastnames":["Saenko"],"suffixes":[]},{"firstnames":["Venkatesh"],"propositions":[],"lastnames":["Saligrama"],"suffixes":[]},{"firstnames":["Boqing"],"propositions":[],"lastnames":["Gong"],"suffixes":[]}],"title":"BabyVLM-V2: Toward Developmentally Grounded Pretraining and Benchmarking of Vision Foundation Models","journal":"CoRR","volume":"abs/2512.10932","year":"2025","url":"https://doi.org/10.48550/arXiv.2512.10932","doi":"10.48550/ARXIV.2512.10932","eprinttype":"arXiv","eprint":"2512.10932","timestamp":"Fri, 23 Jan 2026 00:00:00 +0100","biburl":"https://dblp.org/rec/journals/corr/abs-2512-10932.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/corr/abs-2512-10932,\n author = {Shengao Wang and\n Wenqi Wang and\n Zecheng Wang and\n Max Whitton and\n Michael Wakeham and\n Arjun Chandra and\n Joey Huang and\n Pengyue Zhu and\n Helen Chen and\n David Li and\n Jeffrey Li and\n Shawn Li and\n Andrew Zagula and\n Amy Zhao and\n Andrew Zhu and\n Sayaka Nakamura and\n Yuki Yamamoto and\n Jerry Jun Yokono and\n Aaron Mueller and\n Bryan A. Plummer and\n Kate Saenko and\n Venkatesh Saligrama and\n Boqing Gong},\n title = {BabyVLM-V2: Toward Developmentally Grounded Pretraining and Benchmarking\n of Vision Foundation Models},\n journal = {CoRR},\n volume = {abs/2512.10932},\n year = {2025},\n url = {https://doi.org/10.48550/arXiv.2512.10932},\n doi = {10.48550/ARXIV.2512.10932},\n eprinttype = {arXiv},\n eprint = {2512.10932},\n timestamp = {Fri, 23 Jan 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/corr/abs-2512-10932.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Wang, S.","Wang, W.","Wang, Z.","Whitton, M.","Wakeham, M.","Chandra, A.","Huang, J.","Zhu, P.","Chen, H.","Li, D.","Li, J.","Li, S.","Zagula, A.","Zhao, A.","Zhu, A.","Nakamura, S.","Yamamoto, Y.","Yokono, J. J.","Mueller, A.","Plummer, B. A.","Saenko, K.","Saligrama, V.","Gong, B."],"key":"DBLP:journals/corr/abs-2512-10932","id":"DBLP:journals/corr/abs-2512-10932","bibbaseid":"wang-wang-wang-whitton-wakeham-chandra-huang-zhu-etal-babyvlmv2towarddevelopmentallygroundedpretrainingandbenchmarkingofvisionfoundationmodels-2025","role":"author","urls":{"Paper":"https://doi.org/10.48550/arXiv.2512.10932"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://dblp.org/pid/67/4721.bib","dataSources":["vDgwZPGAKNTHeEubz","X3pp7jX6PpQ4DfzpE"],"keywords":[],"search_terms":["babyvlm","toward","developmentally","grounded","pretraining","benchmarking","vision","foundation","models","wang","wang","wang","whitton","wakeham","chandra","huang","zhu","chen","li","li","li","zagula","zhao","zhu","nakamura","yamamoto","yokono","mueller","plummer","saenko","saligrama","gong"],"title":"BabyVLM-V2: Toward Developmentally Grounded Pretraining and Benchmarking of Vision Foundation Models","year":2025}