{"_id":"sGjDGJKiZdjBpeTEY","bibbaseid":"huang-pei-aliannejadi-sun-ahsan-yu-ren-csar-etal-legocobuilderexploringfinegrainedvisionlanguagemodelingformultimodallegoassemblyassistants-2025","author_short":["Huang, H.","Pei, J.","Aliannejadi, M.","Sun, X.","Ahsan, M.","Yu, C.","Ren, Z.","César, P.","Wang, J."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Haochen"],"propositions":[],"lastnames":["Huang"],"suffixes":[]},{"firstnames":["Jiahuan"],"propositions":[],"lastnames":["Pei"],"suffixes":[]},{"firstnames":["Mohammad"],"propositions":[],"lastnames":["Aliannejadi"],"suffixes":[]},{"firstnames":["Xin"],"propositions":[],"lastnames":["Sun"],"suffixes":[]},{"firstnames":["Moonisa"],"propositions":[],"lastnames":["Ahsan"],"suffixes":[]},{"firstnames":["Chuang"],"propositions":[],"lastnames":["Yu"],"suffixes":[]},{"firstnames":["Zhaochun"],"propositions":[],"lastnames":["Ren"],"suffixes":[]},{"firstnames":["Pablo"],"propositions":[],"lastnames":["César"],"suffixes":[]},{"firstnames":["Junxiao"],"propositions":[],"lastnames":["Wang"],"suffixes":[]}],"title":"LEGO Co-builder: Exploring Fine-Grained Vision-Language Modeling for Multimodal LEGO Assembly Assistants","journal":"CoRR","volume":"abs/2507.05515","year":"2025","url":"https://doi.org/10.48550/arXiv.2507.05515","doi":"10.48550/ARXIV.2507.05515","eprinttype":"arXiv","eprint":"2507.05515","timestamp":"Sat, 15 Nov 2025 00:00:00 +0100","biburl":"https://dblp.org/rec/journals/corr/abs-2507-05515.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/corr/abs-2507-05515,\n author = {Haochen Huang and\n Jiahuan Pei and\n Mohammad Aliannejadi and\n Xin Sun and\n Moonisa Ahsan and\n Chuang Yu and\n Zhaochun Ren and\n Pablo C{\\'{e}}sar and\n Junxiao Wang},\n title = {{LEGO} Co-builder: Exploring Fine-Grained Vision-Language Modeling\n for Multimodal {LEGO} Assembly Assistants},\n journal = {CoRR},\n volume = {abs/2507.05515},\n year = {2025},\n url = {https://doi.org/10.48550/arXiv.2507.05515},\n doi = {10.48550/ARXIV.2507.05515},\n eprinttype = {arXiv},\n eprint = {2507.05515},\n timestamp = {Sat, 15 Nov 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/corr/abs-2507-05515.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Huang, H.","Pei, J.","Aliannejadi, M.","Sun, X.","Ahsan, M.","Yu, C.","Ren, Z.","César, P.","Wang, J."],"key":"DBLP:journals/corr/abs-2507-05515","id":"DBLP:journals/corr/abs-2507-05515","bibbaseid":"huang-pei-aliannejadi-sun-ahsan-yu-ren-csar-etal-legocobuilderexploringfinegrainedvisionlanguagemodelingformultimodallegoassemblyassistants-2025","role":"author","urls":{"Paper":"https://doi.org/10.48550/arXiv.2507.05515"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://dblp.org/pid/294/7094.bib","dataSources":["aR5WQWfNHnbm4WC3F"],"keywords":[],"search_terms":["lego","builder","exploring","fine","grained","vision","language","modeling","multimodal","lego","assembly","assistants","huang","pei","aliannejadi","sun","ahsan","yu","ren","césar","wang"],"title":"LEGO Co-builder: Exploring Fine-Grained Vision-Language Modeling for Multimodal LEGO Assembly Assistants","year":2025}