Seeing in Words: Learning to Classify through Language Bottlenecks. Saifullah, K., Wen, Y., Geiping, J., Goldblum, M., & Goldstein, T. June, 2023. arXiv:2307.00028 [cs]
Seeing in Words: Learning to Classify through Language Bottlenecks [link]Paper  doi  abstract   bibtex   
Neural networks for computer vision extract uninterpretable features despite achieving high accuracy on benchmarks. In contrast, humans can explain their predictions using succinct and intuitive descriptions. To incorporate explainability into neural networks, we train a vision model whose feature representations are text. We show that such a model can effectively classify ImageNet images, and we discuss the challenges we encountered when training it.
@misc{saifullah_seeing_2023,
	title = {Seeing in {Words}: {Learning} to {Classify} through {Language} {Bottlenecks}},
	shorttitle = {Seeing in {Words}},
	url = {http://arxiv.org/abs/2307.00028},
	doi = {10.48550/arXiv.2307.00028},
	abstract = {Neural networks for computer vision extract uninterpretable features despite achieving high accuracy on benchmarks. In contrast, humans can explain their predictions using succinct and intuitive descriptions. To incorporate explainability into neural networks, we train a vision model whose feature representations are text. We show that such a model can effectively classify ImageNet images, and we discuss the challenges we encountered when training it.},
	urldate = {2024-05-02},
	publisher = {arXiv},
	author = {Saifullah, Khalid and Wen, Yuxin and Geiping, Jonas and Goldblum, Micah and Goldstein, Tom},
	month = jun,
	year = {2023},
	note = {arXiv:2307.00028 [cs]},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning},
}

Downloads: 0