AlanaVLM: A Multimodal Embodied AI Foundation Model for Egocentric Video Understanding. Suglia, A., Greco, C., Baker, K., Part, J. L., Papaioannou, I., Eshghi, A., Konstas, I., & Lemon, O. In Al-Onaizan, Y., Bansal, M., & Chen, Y., editors, Findings of the Association for Computational Linguistics: EMNLP 2024, Miami, Florida, USA, November 12-16, 2024, pages 11101–11122, 2024. Association for Computational Linguistics.
Paper doi bibtex @inproceedings{DBLP:conf/emnlp/Suglia0BPPEKL24,
author = {Alessandro Suglia and
Claudio Greco and
Katie Baker and
Jose L. Part and
Ioannis Papaioannou and
Arash Eshghi and
Ioannis Konstas and
Oliver Lemon},
editor = {Yaser Al{-}Onaizan and
Mohit Bansal and
Yun{-}Nung Chen},
title = {AlanaVLM: {A} Multimodal Embodied {AI} Foundation Model for Egocentric
Video Understanding},
booktitle = {Findings of the Association for Computational Linguistics: {EMNLP}
2024, Miami, Florida, USA, November 12-16, 2024},
pages = {11101--11122},
publisher = {Association for Computational Linguistics},
year = {2024},
url = {https://doi.org/10.18653/v1/2024.findings-emnlp.649},
doi = {10.18653/V1/2024.FINDINGS-EMNLP.649},
timestamp = {Fri, 13 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/emnlp/Suglia0BPPEKL24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
Downloads: 0
{"_id":"6jSuvK2beTni2Hhff","bibbaseid":"suglia-greco-baker-part-papaioannou-eshghi-konstas-lemon-alanavlmamultimodalembodiedaifoundationmodelforegocentricvideounderstanding-2024","author_short":["Suglia, A.","Greco, C.","Baker, K.","Part, J. L.","Papaioannou, I.","Eshghi, A.","Konstas, I.","Lemon, O."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","author":[{"firstnames":["Alessandro"],"propositions":[],"lastnames":["Suglia"],"suffixes":[]},{"firstnames":["Claudio"],"propositions":[],"lastnames":["Greco"],"suffixes":[]},{"firstnames":["Katie"],"propositions":[],"lastnames":["Baker"],"suffixes":[]},{"firstnames":["Jose","L."],"propositions":[],"lastnames":["Part"],"suffixes":[]},{"firstnames":["Ioannis"],"propositions":[],"lastnames":["Papaioannou"],"suffixes":[]},{"firstnames":["Arash"],"propositions":[],"lastnames":["Eshghi"],"suffixes":[]},{"firstnames":["Ioannis"],"propositions":[],"lastnames":["Konstas"],"suffixes":[]},{"firstnames":["Oliver"],"propositions":[],"lastnames":["Lemon"],"suffixes":[]}],"editor":[{"firstnames":["Yaser"],"propositions":[],"lastnames":["Al-Onaizan"],"suffixes":[]},{"firstnames":["Mohit"],"propositions":[],"lastnames":["Bansal"],"suffixes":[]},{"firstnames":["Yun-Nung"],"propositions":[],"lastnames":["Chen"],"suffixes":[]}],"title":"AlanaVLM: A Multimodal Embodied AI Foundation Model for Egocentric Video Understanding","booktitle":"Findings of the Association for Computational Linguistics: EMNLP 2024, Miami, Florida, USA, November 12-16, 2024","pages":"11101–11122","publisher":"Association for Computational Linguistics","year":"2024","url":"https://doi.org/10.18653/v1/2024.findings-emnlp.649","doi":"10.18653/V1/2024.FINDINGS-EMNLP.649","timestamp":"Fri, 13 Jun 2025 01:00:00 +0200","biburl":"https://dblp.org/rec/conf/emnlp/Suglia0BPPEKL24.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@inproceedings{DBLP:conf/emnlp/Suglia0BPPEKL24,\n author = {Alessandro Suglia and\n Claudio Greco and\n Katie Baker and\n Jose L. Part and\n Ioannis Papaioannou and\n Arash Eshghi and\n Ioannis Konstas and\n Oliver Lemon},\n editor = {Yaser Al{-}Onaizan and\n Mohit Bansal and\n Yun{-}Nung Chen},\n title = {AlanaVLM: {A} Multimodal Embodied {AI} Foundation Model for Egocentric\n Video Understanding},\n booktitle = {Findings of the Association for Computational Linguistics: {EMNLP}\n 2024, Miami, Florida, USA, November 12-16, 2024},\n pages = {11101--11122},\n publisher = {Association for Computational Linguistics},\n year = {2024},\n url = {https://doi.org/10.18653/v1/2024.findings-emnlp.649},\n doi = {10.18653/V1/2024.FINDINGS-EMNLP.649},\n timestamp = {Fri, 13 Jun 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/conf/emnlp/Suglia0BPPEKL24.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Suglia, A.","Greco, C.","Baker, K.","Part, J. L.","Papaioannou, I.","Eshghi, A.","Konstas, I.","Lemon, O."],"editor_short":["Al-Onaizan, Y.","Bansal, M.","Chen, Y."],"key":"DBLP:conf/emnlp/Suglia0BPPEKL24","id":"DBLP:conf/emnlp/Suglia0BPPEKL24","bibbaseid":"suglia-greco-baker-part-papaioannou-eshghi-konstas-lemon-alanavlmamultimodalembodiedaifoundationmodelforegocentricvideounderstanding-2024","role":"author","urls":{"Paper":"https://doi.org/10.18653/v1/2024.findings-emnlp.649"},"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"https://dblp.org/pid/170/6698.bib","dataSources":["TwxQ8NsSNCEBhLh2T"],"keywords":[],"search_terms":["alanavlm","multimodal","embodied","foundation","model","egocentric","video","understanding","suglia","greco","baker","part","papaioannou","eshghi","konstas","lemon"],"title":"AlanaVLM: A Multimodal Embodied AI Foundation Model for Egocentric Video Understanding","year":2024}