SnapNTell: Enhancing Entity-Centric Visual Question Answering with Retrieval Augmented Multimodal LLM

SnapNTell: Enhancing Entity-Centric Visual Question Answering with Retrieval Augmented Multimodal LLM. Qiu, J., Madotto, A., Lin, Z., Crook, P. A., Xu, Y. E., Dong, X. L., Faloutsos, C., Li, L., Damavandi, B., & Moon, S. CoRR, 2024.

Paper doi bibtex

@article{DBLP:journals/corr/abs-2403-04735,
  author       = {Jielin Qiu and
                  Andrea Madotto and
                  Zhaojiang Lin and
                  Paul A. Crook and
                  Yifan Ethan Xu and
                  Xin Luna Dong and
                  Christos Faloutsos and
                  Lei Li and
                  Babak Damavandi and
                  Seungwhan Moon},
  title        = {SnapNTell: Enhancing Entity-Centric Visual Question Answering with
                  Retrieval Augmented Multimodal {LLM}},
  journal      = {CoRR},
  volume       = {abs/2403.04735},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2403.04735},
  doi          = {10.48550/ARXIV.2403.04735},
  eprinttype    = {arXiv},
  eprint       = {2403.04735},
  timestamp    = {Thu, 25 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2403-04735.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Downloads: 0

{"_id":"gTsyqNdki4W9SyMrv","bibbaseid":"qiu-madotto-lin-crook-xu-dong-faloutsos-li-etal-snapntellenhancingentitycentricvisualquestionansweringwithretrievalaugmentedmultimodalllm-2024","author_short":["Qiu, J.","Madotto, A.","Lin, Z.","Crook, P. A.","Xu, Y. E.","Dong, X. L.","Faloutsos, C.","Li, L.","Damavandi, B.","Moon, S."],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Jielin"],"propositions":[],"lastnames":["Qiu"],"suffixes":[]},{"firstnames":["Andrea"],"propositions":[],"lastnames":["Madotto"],"suffixes":[]},{"firstnames":["Zhaojiang"],"propositions":[],"lastnames":["Lin"],"suffixes":[]},{"firstnames":["Paul","A."],"propositions":[],"lastnames":["Crook"],"suffixes":[]},{"firstnames":["Yifan","Ethan"],"propositions":[],"lastnames":["Xu"],"suffixes":[]},{"firstnames":["Xin","Luna"],"propositions":[],"lastnames":["Dong"],"suffixes":[]},{"firstnames":["Christos"],"propositions":[],"lastnames":["Faloutsos"],"suffixes":[]},{"firstnames":["Lei"],"propositions":[],"lastnames":["Li"],"suffixes":[]},{"firstnames":["Babak"],"propositions":[],"lastnames":["Damavandi"],"suffixes":[]},{"firstnames":["Seungwhan"],"propositions":[],"lastnames":["Moon"],"suffixes":[]}],"title":"SnapNTell: Enhancing Entity-Centric Visual Question Answering with Retrieval Augmented Multimodal LLM","journal":"CoRR","volume":"abs/2403.04735","year":"2024","url":"https://doi.org/10.48550/arXiv.2403.04735","doi":"10.48550/ARXIV.2403.04735","eprinttype":"arXiv","eprint":"2403.04735","timestamp":"Thu, 25 Apr 2024 01:00:00 +0200","biburl":"https://dblp.org/rec/journals/corr/abs-2403-04735.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@article{DBLP:journals/corr/abs-2403-04735,\n author = {Jielin Qiu and\n Andrea Madotto and\n Zhaojiang Lin and\n Paul A. Crook and\n Yifan Ethan Xu and\n Xin Luna Dong and\n Christos Faloutsos and\n Lei Li and\n Babak Damavandi and\n Seungwhan Moon},\n title = {SnapNTell: Enhancing Entity-Centric Visual Question Answering with\n Retrieval Augmented Multimodal {LLM}},\n journal = {CoRR},\n volume = {abs/2403.04735},\n year = {2024},\n url = {https://doi.org/10.48550/arXiv.2403.04735},\n doi = {10.48550/ARXIV.2403.04735},\n eprinttype = {arXiv},\n eprint = {2403.04735},\n timestamp = {Thu, 25 Apr 2024 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/corr/abs-2403-04735.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Qiu, J.","Madotto, A.","Lin, Z.","Crook, P. A.","Xu, Y. E.","Dong, X. L.","Faloutsos, C.","Li, L.","Damavandi, B.","Moon, S."],"key":"DBLP:journals/corr/abs-2403-04735","id":"DBLP:journals/corr/abs-2403-04735","bibbaseid":"qiu-madotto-lin-crook-xu-dong-faloutsos-li-etal-snapntellenhancingentitycentricvisualquestionansweringwithretrievalaugmentedmultimodalllm-2024","role":"author","urls":{"Paper":"https://doi.org/10.48550/arXiv.2403.04735"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://dblp.org/pid/f/CFaloutsos.bib","dataSources":["zM4wMHSu34vjmb6Cm"],"keywords":[],"search_terms":["snapntell","enhancing","entity","centric","visual","question","answering","retrieval","augmented","multimodal","llm","qiu","madotto","lin","crook","xu","dong","faloutsos","li","damavandi","moon"],"title":"SnapNTell: Enhancing Entity-Centric Visual Question Answering with Retrieval Augmented Multimodal LLM","year":2024}