Multi-view convolutional neural networks for 3D shape recognition

Multi-view convolutional neural networks for 3D shape recognition. Su, H., Maji, S., Kalogerakis, E., & Learned-Miller, E. Proceedings of the IEEE International Conference on Computer Vision, 2015 Inter:945-953, 2015.

Paper doi abstract bibtex

A longstanding question in computer vision concerns the representation of 3D shapes for recognition: should 3D shapes be represented with descriptors operating on their native 3D formats, such as voxel grid or polygon mesh, or can they be effectively represented with view-based descriptors? We address this question in the context of learning to recognize 3D shapes from a collection of their rendered views on 2D images. We first present a standard CNN architecture trained to recognize the shapes' rendered views independently of each other, and show that a 3D shape can be recognized even from a single view at an accuracy far higher than using state-of-the-art 3D shape descriptors. Recognition rates further increase when multiple views of the shapes are provided. In addition, we present a novel CNN architecture that combines information from multiple views of a 3D shape into a single and compact shape descriptor offering even better recognition performance. The same architecture can be applied to accurately recognize human hand-drawn sketches of shapes. We conclude that a collection of 2D views can be highly informative for 3D shape recognition and is amenable to emerging CNN architectures and their derivatives.

@article{
 title = {Multi-view convolutional neural networks for 3D shape recognition},
 type = {article},
 year = {2015},
 pages = {945-953},
 volume = {2015 Inter},
 id = {0a7a65da-51ed-3993-b272-2cfb587116d0},
 created = {2021-01-25T08:45:25.102Z},
 file_attached = {true},
 profile_id = {48fc0258-023d-3602-860e-824092d62c56},
 group_id = {1ff583c0-be37-34fa-9c04-73c69437d354},
 last_modified = {2021-01-25T08:45:42.413Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 folder_uuids = {a89f4866-a7e8-4ea9-aa98-e3f470892f7c},
 private_publication = {false},
 abstract = {A longstanding question in computer vision concerns the representation of 3D shapes for recognition: should 3D shapes be represented with descriptors operating on their native 3D formats, such as voxel grid or polygon mesh, or can they be effectively represented with view-based descriptors? We address this question in the context of learning to recognize 3D shapes from a collection of their rendered views on 2D images. We first present a standard CNN architecture trained to recognize the shapes' rendered views independently of each other, and show that a 3D shape can be recognized even from a single view at an accuracy far higher than using state-of-the-art 3D shape descriptors. Recognition rates further increase when multiple views of the shapes are provided. In addition, we present a novel CNN architecture that combines information from multiple views of a 3D shape into a single and compact shape descriptor offering even better recognition performance. The same architecture can be applied to accurately recognize human hand-drawn sketches of shapes. We conclude that a collection of 2D views can be highly informative for 3D shape recognition and is amenable to emerging CNN architectures and their derivatives.},
 bibtype = {article},
 author = {Su, Hang and Maji, Subhransu and Kalogerakis, Evangelos and Learned-Miller, Erik},
 doi = {10.1109/ICCV.2015.114},
 journal = {Proceedings of the IEEE International Conference on Computer Vision}
}

Downloads: 0

{"_id":"xPbYrCD5RCvEXWepj","bibbaseid":"su-maji-kalogerakis-learnedmiller-multiviewconvolutionalneuralnetworksfor3dshaperecognition-2015","author_short":["Su, H.","Maji, S.","Kalogerakis, E.","Learned-Miller, E."],"bibdata":{"title":"Multi-view convolutional neural networks for 3D shape recognition","type":"article","year":"2015","pages":"945-953","volume":"2015 Inter","id":"0a7a65da-51ed-3993-b272-2cfb587116d0","created":"2021-01-25T08:45:25.102Z","file_attached":"true","profile_id":"48fc0258-023d-3602-860e-824092d62c56","group_id":"1ff583c0-be37-34fa-9c04-73c69437d354","last_modified":"2021-01-25T08:45:42.413Z","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"folder_uuids":"a89f4866-a7e8-4ea9-aa98-e3f470892f7c","private_publication":false,"abstract":"A longstanding question in computer vision concerns the representation of 3D shapes for recognition: should 3D shapes be represented with descriptors operating on their native 3D formats, such as voxel grid or polygon mesh, or can they be effectively represented with view-based descriptors? We address this question in the context of learning to recognize 3D shapes from a collection of their rendered views on 2D images. We first present a standard CNN architecture trained to recognize the shapes' rendered views independently of each other, and show that a 3D shape can be recognized even from a single view at an accuracy far higher than using state-of-the-art 3D shape descriptors. Recognition rates further increase when multiple views of the shapes are provided. In addition, we present a novel CNN architecture that combines information from multiple views of a 3D shape into a single and compact shape descriptor offering even better recognition performance. The same architecture can be applied to accurately recognize human hand-drawn sketches of shapes. We conclude that a collection of 2D views can be highly informative for 3D shape recognition and is amenable to emerging CNN architectures and their derivatives.","bibtype":"article","author":"Su, Hang and Maji, Subhransu and Kalogerakis, Evangelos and Learned-Miller, Erik","doi":"10.1109/ICCV.2015.114","journal":"Proceedings of the IEEE International Conference on Computer Vision","bibtex":"@article{\n title = {Multi-view convolutional neural networks for 3D shape recognition},\n type = {article},\n year = {2015},\n pages = {945-953},\n volume = {2015 Inter},\n id = {0a7a65da-51ed-3993-b272-2cfb587116d0},\n created = {2021-01-25T08:45:25.102Z},\n file_attached = {true},\n profile_id = {48fc0258-023d-3602-860e-824092d62c56},\n group_id = {1ff583c0-be37-34fa-9c04-73c69437d354},\n last_modified = {2021-01-25T08:45:42.413Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n folder_uuids = {a89f4866-a7e8-4ea9-aa98-e3f470892f7c},\n private_publication = {false},\n abstract = {A longstanding question in computer vision concerns the representation of 3D shapes for recognition: should 3D shapes be represented with descriptors operating on their native 3D formats, such as voxel grid or polygon mesh, or can they be effectively represented with view-based descriptors? We address this question in the context of learning to recognize 3D shapes from a collection of their rendered views on 2D images. We first present a standard CNN architecture trained to recognize the shapes' rendered views independently of each other, and show that a 3D shape can be recognized even from a single view at an accuracy far higher than using state-of-the-art 3D shape descriptors. Recognition rates further increase when multiple views of the shapes are provided. In addition, we present a novel CNN architecture that combines information from multiple views of a 3D shape into a single and compact shape descriptor offering even better recognition performance. The same architecture can be applied to accurately recognize human hand-drawn sketches of shapes. We conclude that a collection of 2D views can be highly informative for 3D shape recognition and is amenable to emerging CNN architectures and their derivatives.},\n bibtype = {article},\n author = {Su, Hang and Maji, Subhransu and Kalogerakis, Evangelos and Learned-Miller, Erik},\n doi = {10.1109/ICCV.2015.114},\n journal = {Proceedings of the IEEE International Conference on Computer Vision}\n}","author_short":["Su, H.","Maji, S.","Kalogerakis, E.","Learned-Miller, E."],"urls":{"Paper":"https://bibbase.org/service/mendeley/bfbbf840-4c42-3914-a463-19024f50b30c/file/7087c7c3-4a7e-df9d-4f28-4fe773f58ce2/150500880.pdf.pdf"},"biburl":"https://bibbase.org/service/mendeley/bfbbf840-4c42-3914-a463-19024f50b30c","bibbaseid":"su-maji-kalogerakis-learnedmiller-multiviewconvolutionalneuralnetworksfor3dshaperecognition-2015","role":"author","metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://bibbase.org/service/mendeley/bfbbf840-4c42-3914-a463-19024f50b30c","dataSources":["z5vjAHmuibbBBnvdc","2252seNhipfTmjEBQ"],"keywords":[],"search_terms":["multi","view","convolutional","neural","networks","shape","recognition","su","maji","kalogerakis","learned-miller"],"title":"Multi-view convolutional neural networks for 3D shape recognition","year":2015}