SnapNTell: Enhancing Entity-Centric Visual Question Answering with Retrieval Augmented Multimodal LLM. Qiu, J., Madotto, A., Lin, Z., Crook, P. A., Xu, Y. E., Dong, X. L., Faloutsos, C., Li, L., Damavandi, B., & Moon, S. CoRR, 2024.
SnapNTell: Enhancing Entity-Centric Visual Question Answering with Retrieval Augmented Multimodal LLM [link]Paper  doi  bibtex   
@article{DBLP:journals/corr/abs-2403-04735,
  author       = {Jielin Qiu and
                  Andrea Madotto and
                  Zhaojiang Lin and
                  Paul A. Crook and
                  Yifan Ethan Xu and
                  Xin Luna Dong and
                  Christos Faloutsos and
                  Lei Li and
                  Babak Damavandi and
                  Seungwhan Moon},
  title        = {SnapNTell: Enhancing Entity-Centric Visual Question Answering with
                  Retrieval Augmented Multimodal {LLM}},
  journal      = {CoRR},
  volume       = {abs/2403.04735},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2403.04735},
  doi          = {10.48550/ARXIV.2403.04735},
  eprinttype    = {arXiv},
  eprint       = {2403.04735},
  timestamp    = {Thu, 25 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2403-04735.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0