Spatial pyramid pooling in deep convolutional networks for visual recognition. He, K., Zhang, X., Ren, S., & Sun, J. Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 8691 LNCS(PART 3):346-361, 2014. Paper doi abstract bibtex Existing deep convolutional neural networks (CNNs) require a fixed-size (e.g. 224×224) input image. This requirement is "artificial" and may hurt the recognition accuracy for the images or sub-images of an arbitrary size/scale. In this work, we equip the networks with a more principled pooling strategy, "spatial pyramid pooling", to eliminate the above requirement. The new network structure, called SPP-net, can generate a fixed-length representation regardless of image size/scale. By removing the fixed-size limitation, we can improve all CNN-based image classification methods in general. Our SPP-net achieves state-of-the-art accuracy on the datasets of ImageNet 2012, Pascal VOC 2007, and Caltech101. The power of SPP-net is more significant in object detection. Using SPP-net, we compute the feature maps from the entire image only once, and then pool features in arbitrary regions (sub-images) to generate fixed-length representations for training the detectors. This method avoids repeatedly computing the convolutional features. In processing test images, our method computes convolutional features 30-170× faster than the recent leading method R-CNN (and 24-64× faster overall), while achieving better or comparable accuracy on Pascal VOC 2007. © 2014 Springer International Publishing.
@article{
title = {Spatial pyramid pooling in deep convolutional networks for visual recognition},
type = {article},
year = {2014},
pages = {346-361},
volume = {8691 LNCS},
id = {97047425-7e0a-3473-a2a7-99d3ff6a77f2},
created = {2021-11-01T10:14:38.921Z},
file_attached = {true},
profile_id = {235249c2-3ed4-314a-b309-b1ea0330f5d9},
group_id = {1ff583c0-be37-34fa-9c04-73c69437d354},
last_modified = {2022-03-28T09:45:09.334Z},
read = {false},
starred = {false},
authored = {false},
confirmed = {true},
hidden = {false},
citation_key = {He2014},
folder_uuids = {cd02f564-0123-4236-a320-b339927f085a},
private_publication = {false},
abstract = {Existing deep convolutional neural networks (CNNs) require a fixed-size (e.g. 224×224) input image. This requirement is "artificial" and may hurt the recognition accuracy for the images or sub-images of an arbitrary size/scale. In this work, we equip the networks with a more principled pooling strategy, "spatial pyramid pooling", to eliminate the above requirement. The new network structure, called SPP-net, can generate a fixed-length representation regardless of image size/scale. By removing the fixed-size limitation, we can improve all CNN-based image classification methods in general. Our SPP-net achieves state-of-the-art accuracy on the datasets of ImageNet 2012, Pascal VOC 2007, and Caltech101. The power of SPP-net is more significant in object detection. Using SPP-net, we compute the feature maps from the entire image only once, and then pool features in arbitrary regions (sub-images) to generate fixed-length representations for training the detectors. This method avoids repeatedly computing the convolutional features. In processing test images, our method computes convolutional features 30-170× faster than the recent leading method R-CNN (and 24-64× faster overall), while achieving better or comparable accuracy on Pascal VOC 2007. © 2014 Springer International Publishing.},
bibtype = {article},
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
doi = {10.1007/978-3-319-10578-9_23},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
number = {PART 3}
}
Downloads: 0
{"_id":"CpJjkaPWKQ969Fv2J","bibbaseid":"he-zhang-ren-sun-spatialpyramidpoolingindeepconvolutionalnetworksforvisualrecognition-2014","downloads":0,"creationDate":"2017-03-02T15:49:35.286Z","title":"Spatial pyramid pooling in deep convolutional networks for visual recognition","author_short":["He, K.","Zhang, X.","Ren, S.","Sun, J."],"year":2014,"bibtype":"article","biburl":"https://bibbase.org/service/mendeley/bfbbf840-4c42-3914-a463-19024f50b30c","bibdata":{"title":"Spatial pyramid pooling in deep convolutional networks for visual recognition","type":"article","year":"2014","pages":"346-361","volume":"8691 LNCS","id":"97047425-7e0a-3473-a2a7-99d3ff6a77f2","created":"2021-11-01T10:14:38.921Z","file_attached":"true","profile_id":"235249c2-3ed4-314a-b309-b1ea0330f5d9","group_id":"1ff583c0-be37-34fa-9c04-73c69437d354","last_modified":"2022-03-28T09:45:09.334Z","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"citation_key":"He2014","folder_uuids":"cd02f564-0123-4236-a320-b339927f085a","private_publication":false,"abstract":"Existing deep convolutional neural networks (CNNs) require a fixed-size (e.g. 224×224) input image. This requirement is \"artificial\" and may hurt the recognition accuracy for the images or sub-images of an arbitrary size/scale. In this work, we equip the networks with a more principled pooling strategy, \"spatial pyramid pooling\", to eliminate the above requirement. The new network structure, called SPP-net, can generate a fixed-length representation regardless of image size/scale. By removing the fixed-size limitation, we can improve all CNN-based image classification methods in general. Our SPP-net achieves state-of-the-art accuracy on the datasets of ImageNet 2012, Pascal VOC 2007, and Caltech101. The power of SPP-net is more significant in object detection. Using SPP-net, we compute the feature maps from the entire image only once, and then pool features in arbitrary regions (sub-images) to generate fixed-length representations for training the detectors. This method avoids repeatedly computing the convolutional features. In processing test images, our method computes convolutional features 30-170× faster than the recent leading method R-CNN (and 24-64× faster overall), while achieving better or comparable accuracy on Pascal VOC 2007. © 2014 Springer International Publishing.","bibtype":"article","author":"He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian","doi":"10.1007/978-3-319-10578-9_23","journal":"Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)","number":"PART 3","bibtex":"@article{\n title = {Spatial pyramid pooling in deep convolutional networks for visual recognition},\n type = {article},\n year = {2014},\n pages = {346-361},\n volume = {8691 LNCS},\n id = {97047425-7e0a-3473-a2a7-99d3ff6a77f2},\n created = {2021-11-01T10:14:38.921Z},\n file_attached = {true},\n profile_id = {235249c2-3ed4-314a-b309-b1ea0330f5d9},\n group_id = {1ff583c0-be37-34fa-9c04-73c69437d354},\n last_modified = {2022-03-28T09:45:09.334Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n citation_key = {He2014},\n folder_uuids = {cd02f564-0123-4236-a320-b339927f085a},\n private_publication = {false},\n abstract = {Existing deep convolutional neural networks (CNNs) require a fixed-size (e.g. 224×224) input image. This requirement is \"artificial\" and may hurt the recognition accuracy for the images or sub-images of an arbitrary size/scale. In this work, we equip the networks with a more principled pooling strategy, \"spatial pyramid pooling\", to eliminate the above requirement. The new network structure, called SPP-net, can generate a fixed-length representation regardless of image size/scale. By removing the fixed-size limitation, we can improve all CNN-based image classification methods in general. Our SPP-net achieves state-of-the-art accuracy on the datasets of ImageNet 2012, Pascal VOC 2007, and Caltech101. The power of SPP-net is more significant in object detection. Using SPP-net, we compute the feature maps from the entire image only once, and then pool features in arbitrary regions (sub-images) to generate fixed-length representations for training the detectors. This method avoids repeatedly computing the convolutional features. In processing test images, our method computes convolutional features 30-170× faster than the recent leading method R-CNN (and 24-64× faster overall), while achieving better or comparable accuracy on Pascal VOC 2007. © 2014 Springer International Publishing.},\n bibtype = {article},\n author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},\n doi = {10.1007/978-3-319-10578-9_23},\n journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},\n number = {PART 3}\n}","author_short":["He, K.","Zhang, X.","Ren, S.","Sun, J."],"urls":{"Paper":"https://bibbase.org/service/mendeley/bfbbf840-4c42-3914-a463-19024f50b30c/file/43d9a004-27e0-8081-6ad5-ca8497b87afe/He_et_al___2014___Spatial_Pyramid_Pooling_in_Deep_Convolutional_Netw.pdf.pdf"},"biburl":"https://bibbase.org/service/mendeley/bfbbf840-4c42-3914-a463-19024f50b30c","bibbaseid":"he-zhang-ren-sun-spatialpyramidpoolingindeepconvolutionalnetworksforvisualrecognition-2014","role":"author","metadata":{"authorlinks":{}},"downloads":0},"search_terms":["spatial","pyramid","pooling","deep","convolutional","networks","visual","recognition","he","zhang","ren","sun"],"keywords":[],"authorIDs":[],"dataSources":["bzxc3uBcwMv3h47xE","ya2CyA73rpZseyrZ8","am4cuScX42bDPukDn","KZFesWbmGy4yc4ZLC","2252seNhipfTmjEBQ"]}