AlanaVLM: A Multimodal Embodied AI Foundation Model for Egocentric Video Understanding

AlanaVLM: A Multimodal Embodied AI Foundation Model for Egocentric Video Understanding. Suglia, A., Greco, C., Baker, K., Part, J. L., Papaioannou, I., Eshghi, A., Konstas, I., & Lemon, O. CoRR, 2024.

Paper doi bibtex

@article{DBLP:journals/corr/abs-2406-13807,
  author       = {Alessandro Suglia and
                  Claudio Greco and
                  Katie Baker and
                  Jose L. Part and
                  Ioannis Papaioannou and
                  Arash Eshghi and
                  Ioannis Konstas and
                  Oliver Lemon},
  title        = {AlanaVLM: {A} Multimodal Embodied {AI} Foundation Model for Egocentric
                  Video Understanding},
  journal      = {CoRR},
  volume       = {abs/2406.13807},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2406.13807},
  doi          = {10.48550/ARXIV.2406.13807},
  eprinttype    = {arXiv},
  eprint       = {2406.13807},
  timestamp    = {Tue, 14 Oct 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2406-13807.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0

{"_id":"6jSuvK2beTni2Hhff","bibbaseid":"suglia-greco-baker-part-papaioannou-eshghi-konstas-lemon-alanavlmamultimodalembodiedaifoundationmodelforegocentricvideounderstanding-2024","author_short":["Suglia, A.","Greco, C.","Baker, K.","Part, J. L.","Papaioannou, I.","Eshghi, A.","Konstas, I.","Lemon, O."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Alessandro"],"propositions":[],"lastnames":["Suglia"],"suffixes":[]},{"firstnames":["Claudio"],"propositions":[],"lastnames":["Greco"],"suffixes":[]},{"firstnames":["Katie"],"propositions":[],"lastnames":["Baker"],"suffixes":[]},{"firstnames":["Jose","L."],"propositions":[],"lastnames":["Part"],"suffixes":[]},{"firstnames":["Ioannis"],"propositions":[],"lastnames":["Papaioannou"],"suffixes":[]},{"firstnames":["Arash"],"propositions":[],"lastnames":["Eshghi"],"suffixes":[]},{"firstnames":["Ioannis"],"propositions":[],"lastnames":["Konstas"],"suffixes":[]},{"firstnames":["Oliver"],"propositions":[],"lastnames":["Lemon"],"suffixes":[]}],"title":"AlanaVLM: A Multimodal Embodied AI Foundation Model for Egocentric Video Understanding","journal":"CoRR","volume":"abs/2406.13807","year":"2024","url":"https://doi.org/10.48550/arXiv.2406.13807","doi":"10.48550/ARXIV.2406.13807","eprinttype":"arXiv","eprint":"2406.13807","timestamp":"Tue, 14 Oct 2025 01:00:00 +0200","biburl":"https://dblp.org/rec/journals/corr/abs-2406-13807.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/corr/abs-2406-13807,\n author = {Alessandro Suglia and\n Claudio Greco and\n Katie Baker and\n Jose L. Part and\n Ioannis Papaioannou and\n Arash Eshghi and\n Ioannis Konstas and\n Oliver Lemon},\n title = {AlanaVLM: {A} Multimodal Embodied {AI} Foundation Model for Egocentric\n Video Understanding},\n journal = {CoRR},\n volume = {abs/2406.13807},\n year = {2024},\n url = {https://doi.org/10.48550/arXiv.2406.13807},\n doi = {10.48550/ARXIV.2406.13807},\n eprinttype = {arXiv},\n eprint = {2406.13807},\n timestamp = {Tue, 14 Oct 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/corr/abs-2406-13807.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Suglia, A.","Greco, C.","Baker, K.","Part, J. L.","Papaioannou, I.","Eshghi, A.","Konstas, I.","Lemon, O."],"key":"DBLP:journals/corr/abs-2406-13807","id":"DBLP:journals/corr/abs-2406-13807","bibbaseid":"suglia-greco-baker-part-papaioannou-eshghi-konstas-lemon-alanavlmamultimodalembodiedaifoundationmodelforegocentricvideounderstanding-2024","role":"author","urls":{"Paper":"https://doi.org/10.48550/arXiv.2406.13807"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://dblp.org/pid/170/6698.bib","dataSources":["TwxQ8NsSNCEBhLh2T"],"keywords":[],"search_terms":["alanavlm","multimodal","embodied","foundation","model","egocentric","video","understanding","suglia","greco","baker","part","papaioannou","eshghi","konstas","lemon"],"title":"AlanaVLM: A Multimodal Embodied AI Foundation Model for Egocentric Video Understanding","year":2024}