Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments

Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments. Ionescu, C., Papava, D., Olaru, V., & Sminchisescu, C. IEEE Transactions on Pattern Analysis and Machine Intelligence, 36(7):1325-1339, 7, 2014.

Paper doi abstract bibtex

We introduce a new dataset, Human3.6M, of 3.6 Million accurate 3D Human poses, acquired by recording the performance of 5 female and 6 male subjects, under 4 different viewpoints, for training realistic human sensing systems and for evaluating the next generation of human pose estimation models and algorithms. Besides increasing the size of the datasets in the current state-of-the-art by several orders of magnitude, we also aim to complement such datasets with a diverse set of motions and poses encountered as part of typical human activities (taking photos, talking on the phone, posing, greeting, eating, etc.), with additional synchronized image, human motion capture, and time of flight (depth) data, and with accurate 3D body scans of all the subject actors involved. We also provide controlled mixed reality evaluation scenarios where 3D human models are animated using motion capture and inserted using correct 3D geometry, in complex real environments, viewed with moving cameras, and under occlusion. Finally, we provide a set of large-scale statistical models and detailed evaluation baselines for the dataset illustrating its diversity and the scope for improvement by future work in the research community. Our experiments show that our best large-scale model can leverage our full training set to obtain a 20\% improvement in performance compared to a training set of the scale of the largest existing public dataset for this problem. Yet the potential for improvement by leveraging higher capacity, more complex models with our large dataset, is substantially vaster and should stimulate future research. The dataset together with code for the associated large-scale learning models, features, visualization tools, as well as the evaluation server, is available online at http://vision.imar.ro/human3.6m.

@article{
 title = {Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments},
 type = {article},
 year = {2014},
 keywords = {3D human pose estimation,Cameras,Estimation,Fourier kernel approximations,Joints,Modeling and recovery of physical attributes,Motion,Sensors,Solid modeling,Three-dimensional displays,Training,articulated body modeling,human motion capture data,large-scale learning,optimization,structured prediction},
 pages = {1325-1339},
 volume = {36},
 month = {7},
 id = {5c22c9ee-ff8a-3f65-8f2e-b0e226821844},
 created = {2022-03-28T09:45:02.757Z},
 file_attached = {true},
 profile_id = {235249c2-3ed4-314a-b309-b1ea0330f5d9},
 group_id = {1ff583c0-be37-34fa-9c04-73c69437d354},
 last_modified = {2022-03-29T08:04:24.355Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {ionescuHuman36MLarge2014},
 source_type = {article},
 short_title = {Human3.6M},
 notes = {Conference Name: IEEE Transactions on Pattern Analysis and Machine Intelligence},
 private_publication = {false},
 abstract = {We introduce a new dataset, Human3.6M, of 3.6 Million accurate 3D Human poses, acquired by recording the performance of 5 female and 6 male subjects, under 4 different viewpoints, for training realistic human sensing systems and for evaluating the next generation of human pose estimation models and algorithms. Besides increasing the size of the datasets in the current state-of-the-art by several orders of magnitude, we also aim to complement such datasets with a diverse set of motions and poses encountered as part of typical human activities (taking photos, talking on the phone, posing, greeting, eating, etc.), with additional synchronized image, human motion capture, and time of flight (depth) data, and with accurate 3D body scans of all the subject actors involved. We also provide controlled mixed reality evaluation scenarios where 3D human models are animated using motion capture and inserted using correct 3D geometry, in complex real environments, viewed with moving cameras, and under occlusion. Finally, we provide a set of large-scale statistical models and detailed evaluation baselines for the dataset illustrating its diversity and the scope for improvement by future work in the research community. Our experiments show that our best large-scale model can leverage our full training set to obtain a 20\% improvement in performance compared to a training set of the scale of the largest existing public dataset for this problem. Yet the potential for improvement by leveraging higher capacity, more complex models with our large dataset, is substantially vaster and should stimulate future research. The dataset together with code for the associated large-scale learning models, features, visualization tools, as well as the evaluation server, is available online at http://vision.imar.ro/human3.6m.},
 bibtype = {article},
 author = {Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu, Cristian},
 doi = {10.1109/TPAMI.2013.248},
 journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
 number = {7}
}

Downloads: 0

{"_id":"j2JLq9sDYDMALK49P","bibbaseid":"ionescu-papava-olaru-sminchisescu-human36mlargescaledatasetsandpredictivemethodsfor3dhumansensinginnaturalenvironments-2014","downloads":0,"creationDate":"2015-09-03T07:18:49.377Z","title":"Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments","author_short":["Ionescu, C.","Papava, D.","Olaru, V.","Sminchisescu, C."],"year":2014,"bibtype":"article","biburl":"https://bibbase.org/service/mendeley/bfbbf840-4c42-3914-a463-19024f50b30c","bibdata":{"title":"Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments","type":"article","year":"2014","keywords":"3D human pose estimation,Cameras,Estimation,Fourier kernel approximations,Joints,Modeling and recovery of physical attributes,Motion,Sensors,Solid modeling,Three-dimensional displays,Training,articulated body modeling,human motion capture data,large-scale learning,optimization,structured prediction","pages":"1325-1339","volume":"36","month":"7","id":"5c22c9ee-ff8a-3f65-8f2e-b0e226821844","created":"2022-03-28T09:45:02.757Z","file_attached":"true","profile_id":"235249c2-3ed4-314a-b309-b1ea0330f5d9","group_id":"1ff583c0-be37-34fa-9c04-73c69437d354","last_modified":"2022-03-29T08:04:24.355Z","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"citation_key":"ionescuHuman36MLarge2014","source_type":"article","short_title":"Human3.6M","notes":"Conference Name: IEEE Transactions on Pattern Analysis and Machine Intelligence","private_publication":false,"abstract":"We introduce a new dataset, Human3.6M, of 3.6 Million accurate 3D Human poses, acquired by recording the performance of 5 female and 6 male subjects, under 4 different viewpoints, for training realistic human sensing systems and for evaluating the next generation of human pose estimation models and algorithms. Besides increasing the size of the datasets in the current state-of-the-art by several orders of magnitude, we also aim to complement such datasets with a diverse set of motions and poses encountered as part of typical human activities (taking photos, talking on the phone, posing, greeting, eating, etc.), with additional synchronized image, human motion capture, and time of flight (depth) data, and with accurate 3D body scans of all the subject actors involved. We also provide controlled mixed reality evaluation scenarios where 3D human models are animated using motion capture and inserted using correct 3D geometry, in complex real environments, viewed with moving cameras, and under occlusion. Finally, we provide a set of large-scale statistical models and detailed evaluation baselines for the dataset illustrating its diversity and the scope for improvement by future work in the research community. Our experiments show that our best large-scale model can leverage our full training set to obtain a 20\\% improvement in performance compared to a training set of the scale of the largest existing public dataset for this problem. Yet the potential for improvement by leveraging higher capacity, more complex models with our large dataset, is substantially vaster and should stimulate future research. The dataset together with code for the associated large-scale learning models, features, visualization tools, as well as the evaluation server, is available online at http://vision.imar.ro/human3.6m.","bibtype":"article","author":"Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu, Cristian","doi":"10.1109/TPAMI.2013.248","journal":"IEEE Transactions on Pattern Analysis and Machine Intelligence","number":"7","bibtex":"@article{\n title = {Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments},\n type = {article},\n year = {2014},\n keywords = {3D human pose estimation,Cameras,Estimation,Fourier kernel approximations,Joints,Modeling and recovery of physical attributes,Motion,Sensors,Solid modeling,Three-dimensional displays,Training,articulated body modeling,human motion capture data,large-scale learning,optimization,structured prediction},\n pages = {1325-1339},\n volume = {36},\n month = {7},\n id = {5c22c9ee-ff8a-3f65-8f2e-b0e226821844},\n created = {2022-03-28T09:45:02.757Z},\n file_attached = {true},\n profile_id = {235249c2-3ed4-314a-b309-b1ea0330f5d9},\n group_id = {1ff583c0-be37-34fa-9c04-73c69437d354},\n last_modified = {2022-03-29T08:04:24.355Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n citation_key = {ionescuHuman36MLarge2014},\n source_type = {article},\n short_title = {Human3.6M},\n notes = {Conference Name: IEEE Transactions on Pattern Analysis and Machine Intelligence},\n private_publication = {false},\n abstract = {We introduce a new dataset, Human3.6M, of 3.6 Million accurate 3D Human poses, acquired by recording the performance of 5 female and 6 male subjects, under 4 different viewpoints, for training realistic human sensing systems and for evaluating the next generation of human pose estimation models and algorithms. Besides increasing the size of the datasets in the current state-of-the-art by several orders of magnitude, we also aim to complement such datasets with a diverse set of motions and poses encountered as part of typical human activities (taking photos, talking on the phone, posing, greeting, eating, etc.), with additional synchronized image, human motion capture, and time of flight (depth) data, and with accurate 3D body scans of all the subject actors involved. We also provide controlled mixed reality evaluation scenarios where 3D human models are animated using motion capture and inserted using correct 3D geometry, in complex real environments, viewed with moving cameras, and under occlusion. Finally, we provide a set of large-scale statistical models and detailed evaluation baselines for the dataset illustrating its diversity and the scope for improvement by future work in the research community. Our experiments show that our best large-scale model can leverage our full training set to obtain a 20\\% improvement in performance compared to a training set of the scale of the largest existing public dataset for this problem. Yet the potential for improvement by leveraging higher capacity, more complex models with our large dataset, is substantially vaster and should stimulate future research. The dataset together with code for the associated large-scale learning models, features, visualization tools, as well as the evaluation server, is available online at http://vision.imar.ro/human3.6m.},\n bibtype = {article},\n author = {Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu, Cristian},\n doi = {10.1109/TPAMI.2013.248},\n journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},\n number = {7}\n}","author_short":["Ionescu, C.","Papava, D.","Olaru, V.","Sminchisescu, C."],"urls":{"Paper":"https://bibbase.org/service/mendeley/bfbbf840-4c42-3914-a463-19024f50b30c/file/292acecf-b6b9-f0db-7c52-0be55b89e73a/6682899.html.pdf"},"biburl":"https://bibbase.org/service/mendeley/bfbbf840-4c42-3914-a463-19024f50b30c","bibbaseid":"ionescu-papava-olaru-sminchisescu-human36mlargescaledatasetsandpredictivemethodsfor3dhumansensinginnaturalenvironments-2014","role":"author","keyword":["3D human pose estimation","Cameras","Estimation","Fourier kernel approximations","Joints","Modeling and recovery of physical attributes","Motion","Sensors","Solid modeling","Three-dimensional displays","Training","articulated body modeling","human motion capture data","large-scale learning","optimization","structured prediction"],"metadata":{"authorlinks":{}},"downloads":0},"search_terms":["human3","large","scale","datasets","predictive","methods","human","sensing","natural","environments","ionescu","papava","olaru","sminchisescu"],"keywords":["3d human pose estimation","cameras","estimation","fourier kernel approximations","joints","modeling and recovery of physical attributes","motion","sensors","solid modeling","three-dimensional displays","training","articulated body modeling","human motion capture data","large-scale learning","optimization","structured prediction"],"authorIDs":[],"dataSources":["9cexBw6hrwgyZphZZ","ya2CyA73rpZseyrZ8","2252seNhipfTmjEBQ"]}