AMECON: Abstract Meta-Concept Features for Text-Illustration. Chami, I., Tamaazousti, Y., & Le Borgne, H. In ACM International Conference on Multimedia Retrieval (ICMR), Bucharest, 2017.
Pdf
Slides doi abstract bibtex Cross-media retrieval is a problem of high interest that is at the frontier between computer vision and natural language processing. The state-of-the-art in the domain consists of learning a common space with regard to some constraints of correlation or similarity from two textual and visual modalities that are processed in parallel and possibly jointly. This paper proposes a different approach that considers the cross-modal problem as a supervised mapping of visual modalities to textual ones. Each modality is thus seen as a particular projection of an abstract meta-concept, each of its dimension subsuming several semantic concepts (``meta'' aspect) but may not correspond to an actual one (``abstract'' aspect). In practice, the textual modality is used to generate a multi-label representation, further used to map the visual modality through a simple shallow neural network. While being quite easy to implement, the experiments show that our approach significantly outperforms the state-of-the-art on Flickr-8K and Flickr-30K datasets for the text-illustration task
@InProceedings{chami2017icmr,
author = {Chami,Ines and Tamaazousti, Youssef and Le Borgne, Herv{\'e}},
title = {AMECON: Abstract Meta-Concept Features for Text-Illustration},
booktitle = {ACM International Conference on Multimedia Retrieval (ICMR)},
year = {2017},
address = {Bucharest},
doi = {10.1145/3078971.3078993},
url_PDF = {http://people.csail.mit.edu/ytamaaz/files/pdf/AMECON_Abstract_Meta_Concept_Features_for_Text_Illustration.pdf},
url_slides = {http://people.csail.mit.edu/ytamaaz/files/slides/Chami_Tamaazousti_LeBorgne_ICMR17.pdf},
abstract = {Cross-media retrieval is a problem of high interest that is at the frontier between computer vision and natural language processing. The state-of-the-art in the domain consists of learning a common space with regard to some constraints of correlation or similarity from two textual and visual modalities that are processed in parallel and possibly jointly. This paper proposes a different approach that considers the cross-modal problem as a supervised mapping of visual modalities to textual ones. Each modality is thus seen as a particular projection of an abstract meta-concept, each of its dimension subsuming several semantic concepts (``meta'' aspect) but may not correspond to an actual one (``abstract'' aspect). In practice, the textual modality is used to generate a multi-label representation, further used to map the visual modality through a simple shallow neural network. While being quite easy to implement, the experiments show that our approach significantly outperforms the state-of-the-art on Flickr-8K and Flickr-30K datasets for the text-illustration task},
keywords = {vision-language}
}
Downloads: 0
{"_id":"eXfTNuSRfP3GPjKoT","bibbaseid":"chami-tamaazousti-leborgne-ameconabstractmetaconceptfeaturesfortextillustration-2017","author_short":["Chami, I.","Tamaazousti, Y.","Le Borgne, H."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","author":[{"propositions":[],"lastnames":["Chami"],"firstnames":["Ines"],"suffixes":[]},{"propositions":[],"lastnames":["Tamaazousti"],"firstnames":["Youssef"],"suffixes":[]},{"propositions":[],"lastnames":["Le","Borgne"],"firstnames":["Hervé"],"suffixes":[]}],"title":"AMECON: Abstract Meta-Concept Features for Text-Illustration","booktitle":"ACM International Conference on Multimedia Retrieval (ICMR)","year":"2017","address":"Bucharest","doi":"10.1145/3078971.3078993","url_pdf":"http://people.csail.mit.edu/ytamaaz/files/pdf/AMECON_Abstract_Meta_Concept_Features_for_Text_Illustration.pdf","url_slides":"http://people.csail.mit.edu/ytamaaz/files/slides/Chami_Tamaazousti_LeBorgne_ICMR17.pdf","abstract":"Cross-media retrieval is a problem of high interest that is at the frontier between computer vision and natural language processing. The state-of-the-art in the domain consists of learning a common space with regard to some constraints of correlation or similarity from two textual and visual modalities that are processed in parallel and possibly jointly. This paper proposes a different approach that considers the cross-modal problem as a supervised mapping of visual modalities to textual ones. Each modality is thus seen as a particular projection of an abstract meta-concept, each of its dimension subsuming several semantic concepts (``meta'' aspect) but may not correspond to an actual one (``abstract'' aspect). In practice, the textual modality is used to generate a multi-label representation, further used to map the visual modality through a simple shallow neural network. While being quite easy to implement, the experiments show that our approach significantly outperforms the state-of-the-art on Flickr-8K and Flickr-30K datasets for the text-illustration task","keywords":"vision-language","bibtex":"@InProceedings{chami2017icmr,\n author = {Chami,Ines and Tamaazousti, Youssef and Le Borgne, Herv{\\'e}},\n title = {AMECON: Abstract Meta-Concept Features for Text-Illustration},\n booktitle = {ACM International Conference on Multimedia Retrieval (ICMR)},\n year = {2017},\n address = {Bucharest},\n doi = {10.1145/3078971.3078993},\n url_PDF = {http://people.csail.mit.edu/ytamaaz/files/pdf/AMECON_Abstract_Meta_Concept_Features_for_Text_Illustration.pdf},\n url_slides = {http://people.csail.mit.edu/ytamaaz/files/slides/Chami_Tamaazousti_LeBorgne_ICMR17.pdf},\n abstract = {Cross-media retrieval is a problem of high interest that is at the frontier between computer vision and natural language processing. The state-of-the-art in the domain consists of learning a common space with regard to some constraints of correlation or similarity from two textual and visual modalities that are processed in parallel and possibly jointly. This paper proposes a different approach that considers the cross-modal problem as a supervised mapping of visual modalities to textual ones. Each modality is thus seen as a particular projection of an abstract meta-concept, each of its dimension subsuming several semantic concepts (``meta'' aspect) but may not correspond to an actual one (``abstract'' aspect). In practice, the textual modality is used to generate a multi-label representation, further used to map the visual modality through a simple shallow neural network. While being quite easy to implement, the experiments show that our approach significantly outperforms the state-of-the-art on Flickr-8K and Flickr-30K datasets for the text-illustration task},\n keywords = {vision-language}\n}\n\n","author_short":["Chami, I.","Tamaazousti, Y.","Le Borgne, H."],"key":"chami2017icmr","id":"chami2017icmr","bibbaseid":"chami-tamaazousti-leborgne-ameconabstractmetaconceptfeaturesfortextillustration-2017","role":"author","urls":{" pdf":"http://people.csail.mit.edu/ytamaaz/files/pdf/AMECON_Abstract_Meta_Concept_Features_for_Text_Illustration.pdf"," slides":"http://people.csail.mit.edu/ytamaaz/files/slides/Chami_Tamaazousti_LeBorgne_ICMR17.pdf"},"keyword":["vision-language"],"metadata":{"authorlinks":{}},"html":""},"bibtype":"inproceedings","biburl":"https://hleborgne.github.io/files/hleborgne-publications.bib","dataSources":["sJzmxoNKfHCgQoayi"],"keywords":["vision-language"],"search_terms":["amecon","abstract","meta","concept","features","text","illustration","chami","tamaazousti","le borgne"],"title":"AMECON: Abstract Meta-Concept Features for Text-Illustration","year":2017}