ScanNet: Richly-annotated 3D reconstructions of indoor scenes

ScanNet: Richly-annotated 3D reconstructions of indoor scenes. Dai, A., Chang, A., X., Savva, M., Halber, M., Funkhouser, T., & Nießner, M. Proceedings - 30th IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017, 2017-Janua:2432-2443, 2017.

Paper doi abstract bibtex

A key requirement for leveraging supervised deep learning methods is the availability of large, labeled datasets. Unfortunately, in the context of RGB-D scene understanding, very little data is available - current datasets cover a small range of scene views and have limited semantic annotations. To address this issue, we introduce ScanNet, an RGB-D video dataset containing 2.5M views in 1513 scenes annotated with 3D camera poses, surface reconstructions, and semantic segmentations. To collect this data, we designed an easy-to-use and scalable RGB-D capture system that includes automated surface reconstruction and crowd-sourced semantic annotation. We show that using this data helps achieve state-of-the-art performance on several 3D scene understanding tasks, including 3D object classification, semantic voxel labeling, and CAD model retrieval.

@article{
 title = {ScanNet: Richly-annotated 3D reconstructions of indoor scenes},
 type = {article},
 year = {2017},
 pages = {2432-2443},
 volume = {2017-Janua},
 id = {0edc9ce5-bde5-3914-8e02-9a4097a5380b},
 created = {2021-01-27T10:09:33.305Z},
 file_attached = {true},
 profile_id = {48fc0258-023d-3602-860e-824092d62c56},
 group_id = {1ff583c0-be37-34fa-9c04-73c69437d354},
 last_modified = {2022-03-18T10:02:59.816Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 folder_uuids = {a89f4866-a7e8-4ea9-aa98-e3f470892f7c,1e7b477c-c241-48c3-a542-ad06e3d39dd5},
 private_publication = {false},
 abstract = {A key requirement for leveraging supervised deep learning methods is the availability of large, labeled datasets. Unfortunately, in the context of RGB-D scene understanding, very little data is available - current datasets cover a small range of scene views and have limited semantic annotations. To address this issue, we introduce ScanNet, an RGB-D video dataset containing 2.5M views in 1513 scenes annotated with 3D camera poses, surface reconstructions, and semantic segmentations. To collect this data, we designed an easy-to-use and scalable RGB-D capture system that includes automated surface reconstruction and crowd-sourced semantic annotation. We show that using this data helps achieve state-of-the-art performance on several 3D scene understanding tasks, including 3D object classification, semantic voxel labeling, and CAD model retrieval.},
 bibtype = {article},
 author = {Dai, Angela and Chang, Angel X. and Savva, Manolis and Halber, Maciej and Funkhouser, Thomas and Nießner, Matthias},
 doi = {10.1109/CVPR.2017.261},
 journal = {Proceedings - 30th IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017}
}

Downloads: 0

{"_id":"JX3AmFpjjdfFgw3P9","bibbaseid":"dai-chang-savva-halber-funkhouser-niener-scannetrichlyannotated3dreconstructionsofindoorscenes-2017","author_short":["Dai, A.","Chang, A., X.","Savva, M.","Halber, M.","Funkhouser, T.","Nießner, M."],"bibdata":{"title":"ScanNet: Richly-annotated 3D reconstructions of indoor scenes","type":"article","year":"2017","pages":"2432-2443","volume":"2017-Janua","id":"0edc9ce5-bde5-3914-8e02-9a4097a5380b","created":"2021-01-27T10:09:33.305Z","file_attached":"true","profile_id":"48fc0258-023d-3602-860e-824092d62c56","group_id":"1ff583c0-be37-34fa-9c04-73c69437d354","last_modified":"2022-03-18T10:02:59.816Z","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"folder_uuids":"a89f4866-a7e8-4ea9-aa98-e3f470892f7c,1e7b477c-c241-48c3-a542-ad06e3d39dd5","private_publication":false,"abstract":"A key requirement for leveraging supervised deep learning methods is the availability of large, labeled datasets. Unfortunately, in the context of RGB-D scene understanding, very little data is available - current datasets cover a small range of scene views and have limited semantic annotations. To address this issue, we introduce ScanNet, an RGB-D video dataset containing 2.5M views in 1513 scenes annotated with 3D camera poses, surface reconstructions, and semantic segmentations. To collect this data, we designed an easy-to-use and scalable RGB-D capture system that includes automated surface reconstruction and crowd-sourced semantic annotation. We show that using this data helps achieve state-of-the-art performance on several 3D scene understanding tasks, including 3D object classification, semantic voxel labeling, and CAD model retrieval.","bibtype":"article","author":"Dai, Angela and Chang, Angel X. and Savva, Manolis and Halber, Maciej and Funkhouser, Thomas and Nießner, Matthias","doi":"10.1109/CVPR.2017.261","journal":"Proceedings - 30th IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017","bibtex":"@article{\n title = {ScanNet: Richly-annotated 3D reconstructions of indoor scenes},\n type = {article},\n year = {2017},\n pages = {2432-2443},\n volume = {2017-Janua},\n id = {0edc9ce5-bde5-3914-8e02-9a4097a5380b},\n created = {2021-01-27T10:09:33.305Z},\n file_attached = {true},\n profile_id = {48fc0258-023d-3602-860e-824092d62c56},\n group_id = {1ff583c0-be37-34fa-9c04-73c69437d354},\n last_modified = {2022-03-18T10:02:59.816Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n folder_uuids = {a89f4866-a7e8-4ea9-aa98-e3f470892f7c,1e7b477c-c241-48c3-a542-ad06e3d39dd5},\n private_publication = {false},\n abstract = {A key requirement for leveraging supervised deep learning methods is the availability of large, labeled datasets. Unfortunately, in the context of RGB-D scene understanding, very little data is available - current datasets cover a small range of scene views and have limited semantic annotations. To address this issue, we introduce ScanNet, an RGB-D video dataset containing 2.5M views in 1513 scenes annotated with 3D camera poses, surface reconstructions, and semantic segmentations. To collect this data, we designed an easy-to-use and scalable RGB-D capture system that includes automated surface reconstruction and crowd-sourced semantic annotation. We show that using this data helps achieve state-of-the-art performance on several 3D scene understanding tasks, including 3D object classification, semantic voxel labeling, and CAD model retrieval.},\n bibtype = {article},\n author = {Dai, Angela and Chang, Angel X. and Savva, Manolis and Halber, Maciej and Funkhouser, Thomas and Nießner, Matthias},\n doi = {10.1109/CVPR.2017.261},\n journal = {Proceedings - 30th IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017}\n}","author_short":["Dai, A.","Chang, A., X.","Savva, M.","Halber, M.","Funkhouser, T.","Nießner, M."],"urls":{"Paper":"https://bibbase.org/service/mendeley/bfbbf840-4c42-3914-a463-19024f50b30c/file/3c9f6344-17ee-c58b-75ca-35a71b693892/170204405.pdf.pdf"},"biburl":"https://bibbase.org/service/mendeley/bfbbf840-4c42-3914-a463-19024f50b30c","bibbaseid":"dai-chang-savva-halber-funkhouser-niener-scannetrichlyannotated3dreconstructionsofindoorscenes-2017","role":"author","metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://bibbase.org/service/mendeley/bfbbf840-4c42-3914-a463-19024f50b30c","dataSources":["fPsJaJjuyvc65MnRr","2252seNhipfTmjEBQ"],"keywords":[],"search_terms":["scannet","richly","annotated","reconstructions","indoor","scenes","dai","chang","savva","halber","funkhouser","nießner"],"title":"ScanNet: Richly-annotated 3D reconstructions of indoor scenes","year":2017}