E3D: Event-Based 3D Shape Reconstruction

E3D: Event-Based 3D Shape Reconstruction. Baudron, A., Wang, Z. W., Cossairt, O., & Katsaggelos, A. K. arXiv preprint arXiv:2012.05214, dec, 2020.

Paper abstract bibtex

3D shape reconstruction is a primary component of augmented/virtual reality. Despite being highly advanced, existing solutions based on RGB, RGB-D and Lidar sensors are power and data intensive, which introduces challenges for deployment in edge devices. We approach 3D reconstruction with an event camera, a sensor with significantly lower power, latency and data expense while enabling high dynamic range. While previous event-based 3D reconstruction methods are primarily based on stereo vision, we cast the problem as multi-view shape from silhouette using a monocular event camera. The output from a moving event camera is a sparse point set of space-time gradients, largely sketching scene/object edges and contours. We first introduce an event-to-silhouette (E2S) neural network module to transform a stack of event frames to the corresponding silhouettes, with additional neural branches for camera pose regression. Second, we introduce E3D, which employs a 3D differentiable renderer (PyTorch3D) to enforce cross-view 3D mesh consistency and fine-tune the E2S and pose network. Lastly, we introduce a 3D-to-events simulation pipeline and apply it to publicly available object datasets and generate synthetic event/silhouette training pairs for supervised learning.

@article{Alexis2020,
abstract = {3D shape reconstruction is a primary component of augmented/virtual reality. Despite being highly advanced, existing solutions based on RGB, RGB-D and Lidar sensors are power and data intensive, which introduces challenges for deployment in edge devices. We approach 3D reconstruction with an event camera, a sensor with significantly lower power, latency and data expense while enabling high dynamic range. While previous event-based 3D reconstruction methods are primarily based on stereo vision, we cast the problem as multi-view shape from silhouette using a monocular event camera. The output from a moving event camera is a sparse point set of space-time gradients, largely sketching scene/object edges and contours. We first introduce an event-to-silhouette (E2S) neural network module to transform a stack of event frames to the corresponding silhouettes, with additional neural branches for camera pose regression. Second, we introduce E3D, which employs a 3D differentiable renderer (PyTorch3D) to enforce cross-view 3D mesh consistency and fine-tune the E2S and pose network. Lastly, we introduce a 3D-to-events simulation pipeline and apply it to publicly available object datasets and generate synthetic event/silhouette training pairs for supervised learning.},
archivePrefix = {arXiv},
arxivId = {2012.05214},
author = {Baudron, Alexis and Wang, Zihao W. and Cossairt, Oliver and Katsaggelos, Aggelos K.},
eprint = {2012.05214},
journal = {arXiv preprint arXiv:2012.05214},
month = {dec},
title = {{E3D: Event-Based 3D Shape Reconstruction}},
url = {http://arxiv.org/abs/2012.05214},
year = {2020}
}

Downloads: 0

{"_id":"nRPQ7azzf2Dhi2qCM","bibbaseid":"baudron-wang-cossairt-katsaggelos-e3deventbased3dshapereconstruction-2020","author_short":["Baudron, A.","Wang, Z. W.","Cossairt, O.","Katsaggelos, A. K."],"bibdata":{"bibtype":"article","type":"article","abstract":"3D shape reconstruction is a primary component of augmented/virtual reality. Despite being highly advanced, existing solutions based on RGB, RGB-D and Lidar sensors are power and data intensive, which introduces challenges for deployment in edge devices. We approach 3D reconstruction with an event camera, a sensor with significantly lower power, latency and data expense while enabling high dynamic range. While previous event-based 3D reconstruction methods are primarily based on stereo vision, we cast the problem as multi-view shape from silhouette using a monocular event camera. The output from a moving event camera is a sparse point set of space-time gradients, largely sketching scene/object edges and contours. We first introduce an event-to-silhouette (E2S) neural network module to transform a stack of event frames to the corresponding silhouettes, with additional neural branches for camera pose regression. Second, we introduce E3D, which employs a 3D differentiable renderer (PyTorch3D) to enforce cross-view 3D mesh consistency and fine-tune the E2S and pose network. Lastly, we introduce a 3D-to-events simulation pipeline and apply it to publicly available object datasets and generate synthetic event/silhouette training pairs for supervised learning.","archiveprefix":"arXiv","arxivid":"2012.05214","author":[{"propositions":[],"lastnames":["Baudron"],"firstnames":["Alexis"],"suffixes":[]},{"propositions":[],"lastnames":["Wang"],"firstnames":["Zihao","W."],"suffixes":[]},{"propositions":[],"lastnames":["Cossairt"],"firstnames":["Oliver"],"suffixes":[]},{"propositions":[],"lastnames":["Katsaggelos"],"firstnames":["Aggelos","K."],"suffixes":[]}],"eprint":"2012.05214","journal":"arXiv preprint arXiv:2012.05214","month":"dec","title":"E3D: Event-Based 3D Shape Reconstruction","url":"http://arxiv.org/abs/2012.05214","year":"2020","bibtex":"@article{Alexis2020,\nabstract = {3D shape reconstruction is a primary component of augmented/virtual reality. Despite being highly advanced, existing solutions based on RGB, RGB-D and Lidar sensors are power and data intensive, which introduces challenges for deployment in edge devices. We approach 3D reconstruction with an event camera, a sensor with significantly lower power, latency and data expense while enabling high dynamic range. While previous event-based 3D reconstruction methods are primarily based on stereo vision, we cast the problem as multi-view shape from silhouette using a monocular event camera. The output from a moving event camera is a sparse point set of space-time gradients, largely sketching scene/object edges and contours. We first introduce an event-to-silhouette (E2S) neural network module to transform a stack of event frames to the corresponding silhouettes, with additional neural branches for camera pose regression. Second, we introduce E3D, which employs a 3D differentiable renderer (PyTorch3D) to enforce cross-view 3D mesh consistency and fine-tune the E2S and pose network. Lastly, we introduce a 3D-to-events simulation pipeline and apply it to publicly available object datasets and generate synthetic event/silhouette training pairs for supervised learning.},\narchivePrefix = {arXiv},\narxivId = {2012.05214},\nauthor = {Baudron, Alexis and Wang, Zihao W. and Cossairt, Oliver and Katsaggelos, Aggelos K.},\neprint = {2012.05214},\njournal = {arXiv preprint arXiv:2012.05214},\nmonth = {dec},\ntitle = {{E3D: Event-Based 3D Shape Reconstruction}},\nurl = {http://arxiv.org/abs/2012.05214},\nyear = {2020}\n}\n","author_short":["Baudron, A.","Wang, Z. W.","Cossairt, O.","Katsaggelos, A. K."],"key":"Alexis2020","id":"Alexis2020","bibbaseid":"baudron-wang-cossairt-katsaggelos-e3deventbased3dshapereconstruction-2020","role":"author","urls":{"Paper":"http://arxiv.org/abs/2012.05214"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://sites.northwestern.edu/ivpl/files/2023/06/IVPL_Updated_publications-1.bib","dataSources":["KTWAakbPXLGfYseXn","ePKPjG8C6yvpk4mEK","E6Bth2QB5BYjBMZE7","nbnEjsN7MJhurAK9x","PNQZj6FjzoxxJk4Yi","7FpDWDGJ4KgpDiGfB","bod9ms4MQJHuJgPpp","QR9t5P2cLdJuzhfzK","D8k2SxfC5dKNRFgro","7Dwzbxq93HWrJEhT6","qhF8zxmGcJfvtdeAg","fvDEHD49E2ZRwE3fb","H7crv8NWhZup4d4by","DHqokWsryttGh7pJE","vRJd4wNg9HpoZSMHD","sYxQ6pxFgA59JRhxi","w2WahSbYrbcCKBDsC","XasdXLL99y5rygCmq","3gkSihZQRfAD2KBo3","t5XMbyZbtPBo4wBGS","bEpHM2CtrwW2qE8FP","teJzFLHexaz5AQW5z"],"keywords":[],"search_terms":["e3d","event","based","shape","reconstruction","baudron","wang","cossairt","katsaggelos"],"title":"E3D: Event-Based 3D Shape Reconstruction","year":2020}