Retina enhanced bag of words descriptors for video classification. Strat, S. T., Benoit, A., & Lambert, P. In 2014 22nd European Signal Processing Conference (EUSIPCO), pages 1307-1311, Sep., 2014.
Paper abstract bibtex This paper addresses the task of detecting diverse semantic concepts in videos. Within this context, the Bag Of Visual Words (BoW) model, inherited from sampled video keyframes analysis, is among the most popular methods. However, in the case of image sequences, this model faces new difficulties such as the added motion information, the extra computational cost and the increased variability of content and concepts to handle. Considering this spatio-temporal context, we propose to extend the BoW model by introducing video preprocessing strategies with the help of a retina model, before extracting BoW descriptors. This preprocessing increases the robustness of local features to disturbances such as noise and lighting variations. Additionally, the retina model is used to detect potentially salient areas and to construct spatio-temporal descriptors. We experiment with three state of the art local features, SIFT, SURF and FREAK, and we evaluate our results on the TRECVid 2012 Semantic Indexing (SIN) challenge.
@InProceedings{6952461,
author = {S. T. Strat and A. Benoit and P. Lambert},
booktitle = {2014 22nd European Signal Processing Conference (EUSIPCO)},
title = {Retina enhanced bag of words descriptors for video classification},
year = {2014},
pages = {1307-1311},
abstract = {This paper addresses the task of detecting diverse semantic concepts in videos. Within this context, the Bag Of Visual Words (BoW) model, inherited from sampled video keyframes analysis, is among the most popular methods. However, in the case of image sequences, this model faces new difficulties such as the added motion information, the extra computational cost and the increased variability of content and concepts to handle. Considering this spatio-temporal context, we propose to extend the BoW model by introducing video preprocessing strategies with the help of a retina model, before extracting BoW descriptors. This preprocessing increases the robustness of local features to disturbances such as noise and lighting variations. Additionally, the retina model is used to detect potentially salient areas and to construct spatio-temporal descriptors. We experiment with three state of the art local features, SIFT, SURF and FREAK, and we evaluate our results on the TRECVid 2012 Semantic Indexing (SIN) challenge.},
keywords = {image classification;image motion analysis;image sequences;video signal processing;retina enhanced bag of words descriptors;bag of visual words model;diverse semantic concept detection;video keyframes analysis;image sequences;motion information;computational cost;spatiotemporal context;video classification;video preprocessing strategies;BoW descriptors;local features;noise variations;lighting variations;SIFT;SURF;FREAK;TRECVid 2012 semantic indexing;SIN challenge;Retina;Feature extraction;Semantics;Visualization;Histograms;Streaming media;Computational modeling;Video;classification;retina;saliency;Bag of Words},
issn = {2076-1465},
month = {Sep.},
url = {https://www.eurasip.org/proceedings/eusipco/eusipco2014/html/papers/1569925953.pdf},
}
Downloads: 0
{"_id":"bFmxypHSL9T3DDegd","bibbaseid":"strat-benoit-lambert-retinaenhancedbagofwordsdescriptorsforvideoclassification-2014","authorIDs":[],"author_short":["Strat, S. T.","Benoit, A.","Lambert, P."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","author":[{"firstnames":["S.","T."],"propositions":[],"lastnames":["Strat"],"suffixes":[]},{"firstnames":["A."],"propositions":[],"lastnames":["Benoit"],"suffixes":[]},{"firstnames":["P."],"propositions":[],"lastnames":["Lambert"],"suffixes":[]}],"booktitle":"2014 22nd European Signal Processing Conference (EUSIPCO)","title":"Retina enhanced bag of words descriptors for video classification","year":"2014","pages":"1307-1311","abstract":"This paper addresses the task of detecting diverse semantic concepts in videos. Within this context, the Bag Of Visual Words (BoW) model, inherited from sampled video keyframes analysis, is among the most popular methods. However, in the case of image sequences, this model faces new difficulties such as the added motion information, the extra computational cost and the increased variability of content and concepts to handle. Considering this spatio-temporal context, we propose to extend the BoW model by introducing video preprocessing strategies with the help of a retina model, before extracting BoW descriptors. This preprocessing increases the robustness of local features to disturbances such as noise and lighting variations. Additionally, the retina model is used to detect potentially salient areas and to construct spatio-temporal descriptors. We experiment with three state of the art local features, SIFT, SURF and FREAK, and we evaluate our results on the TRECVid 2012 Semantic Indexing (SIN) challenge.","keywords":"image classification;image motion analysis;image sequences;video signal processing;retina enhanced bag of words descriptors;bag of visual words model;diverse semantic concept detection;video keyframes analysis;image sequences;motion information;computational cost;spatiotemporal context;video classification;video preprocessing strategies;BoW descriptors;local features;noise variations;lighting variations;SIFT;SURF;FREAK;TRECVid 2012 semantic indexing;SIN challenge;Retina;Feature extraction;Semantics;Visualization;Histograms;Streaming media;Computational modeling;Video;classification;retina;saliency;Bag of Words","issn":"2076-1465","month":"Sep.","url":"https://www.eurasip.org/proceedings/eusipco/eusipco2014/html/papers/1569925953.pdf","bibtex":"@InProceedings{6952461,\n author = {S. T. Strat and A. Benoit and P. Lambert},\n booktitle = {2014 22nd European Signal Processing Conference (EUSIPCO)},\n title = {Retina enhanced bag of words descriptors for video classification},\n year = {2014},\n pages = {1307-1311},\n abstract = {This paper addresses the task of detecting diverse semantic concepts in videos. Within this context, the Bag Of Visual Words (BoW) model, inherited from sampled video keyframes analysis, is among the most popular methods. However, in the case of image sequences, this model faces new difficulties such as the added motion information, the extra computational cost and the increased variability of content and concepts to handle. Considering this spatio-temporal context, we propose to extend the BoW model by introducing video preprocessing strategies with the help of a retina model, before extracting BoW descriptors. This preprocessing increases the robustness of local features to disturbances such as noise and lighting variations. Additionally, the retina model is used to detect potentially salient areas and to construct spatio-temporal descriptors. We experiment with three state of the art local features, SIFT, SURF and FREAK, and we evaluate our results on the TRECVid 2012 Semantic Indexing (SIN) challenge.},\n keywords = {image classification;image motion analysis;image sequences;video signal processing;retina enhanced bag of words descriptors;bag of visual words model;diverse semantic concept detection;video keyframes analysis;image sequences;motion information;computational cost;spatiotemporal context;video classification;video preprocessing strategies;BoW descriptors;local features;noise variations;lighting variations;SIFT;SURF;FREAK;TRECVid 2012 semantic indexing;SIN challenge;Retina;Feature extraction;Semantics;Visualization;Histograms;Streaming media;Computational modeling;Video;classification;retina;saliency;Bag of Words},\n issn = {2076-1465},\n month = {Sep.},\n url = {https://www.eurasip.org/proceedings/eusipco/eusipco2014/html/papers/1569925953.pdf},\n}\n\n","author_short":["Strat, S. T.","Benoit, A.","Lambert, P."],"key":"6952461","id":"6952461","bibbaseid":"strat-benoit-lambert-retinaenhancedbagofwordsdescriptorsforvideoclassification-2014","role":"author","urls":{"Paper":"https://www.eurasip.org/proceedings/eusipco/eusipco2014/html/papers/1569925953.pdf"},"keyword":["image classification;image motion analysis;image sequences;video signal processing;retina enhanced bag of words descriptors;bag of visual words model;diverse semantic concept detection;video keyframes analysis;image sequences;motion information;computational cost;spatiotemporal context;video classification;video preprocessing strategies;BoW descriptors;local features;noise variations;lighting variations;SIFT;SURF;FREAK;TRECVid 2012 semantic indexing;SIN challenge;Retina;Feature extraction;Semantics;Visualization;Histograms;Streaming media;Computational modeling;Video;classification;retina;saliency;Bag of Words"],"metadata":{"authorlinks":{}},"downloads":0},"bibtype":"inproceedings","biburl":"https://raw.githubusercontent.com/Roznn/EUSIPCO/main/eusipco2014url.bib","creationDate":"2021-02-13T17:43:41.683Z","downloads":0,"keywords":["image classification;image motion analysis;image sequences;video signal processing;retina enhanced bag of words descriptors;bag of visual words model;diverse semantic concept detection;video keyframes analysis;image sequences;motion information;computational cost;spatiotemporal context;video classification;video preprocessing strategies;bow descriptors;local features;noise variations;lighting variations;sift;surf;freak;trecvid 2012 semantic indexing;sin challenge;retina;feature extraction;semantics;visualization;histograms;streaming media;computational modeling;video;classification;retina;saliency;bag of words"],"search_terms":["retina","enhanced","bag","words","descriptors","video","classification","strat","benoit","lambert"],"title":"Retina enhanced bag of words descriptors for video classification","year":2014,"dataSources":["A2ezyFL6GG6na7bbs","oZFG3eQZPXnykPgnE"]}