Machine Learning Models for GPU Error Prediction in a Large Scale HPC System. Nie, B., Xue, J., Gupta, S., Patel, T., Engelmann, C., Smirni, E., & Tiwari, D. In 48th Annual IEEE/IFIP International Conference on Dependable Systems and Networks, DSN 2018, Luxembourg City, Luxembourg, June 25-28, 2018, pages 95–106, 2018. IEEE Computer Society. Paper doi bibtex 2 downloads @inproceedings{DBLP:conf/dsn/NieXGPEST18,
author = {Bin Nie and
Ji Xue and
Saurabh Gupta and
Tirthak Patel and
Christian Engelmann and
Evgenia Smirni and
Devesh Tiwari},
title = {Machine Learning Models for {GPU} Error Prediction in a Large Scale
{HPC} System},
booktitle = {48th Annual {IEEE/IFIP} International Conference on Dependable Systems
and Networks, {DSN} 2018, Luxembourg City, Luxembourg, June 25-28,
2018},
pages = {95--106},
publisher = {{IEEE} Computer Society},
year = {2018},
url = {https://doi.org/10.1109/DSN.2018.00022},
doi = {10.1109/DSN.2018.00022},
timestamp = {Mon, 05 Feb 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/dsn/NieXGPEST18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
Downloads: 2
{"_id":"52wSgAZ6C4bDaFkft","bibbaseid":"nie-xue-gupta-patel-engelmann-smirni-tiwari-machinelearningmodelsforgpuerrorpredictioninalargescalehpcsystem-2018","authorIDs":["2QiDCXhv2QGyie2QN","3xGygrmDYXjuNNHyn","5DgG8jkhX87tCSdm4","5e2f552126e5cadf01000132","5e2f5fbf26e5cadf01000315","5e2f612f26e5cadf01000346","5e2f7b699ca24fdf0100019e","5e2f7d879ca24fdf010001eb","5e2f9bcf48b7a4df010001b9","5e2fa117a21e47de01000020","5e2fbb65e075a2df01000011","5e30414346a666df01000240","5e30415e46a666df01000245","5e320db1f80a24de01000052","5e33ba037bf3ecde0100007c","5e34f6685978bef20100008f","5e35111a8c7375de01000081","5e37dd98e8908edf01000012","5e38c98e87bbe9de010000d0","5e39a211ad94f9de01000055","5e39d3bfd6538cdf010001d9","5e39e1f03687dddf010000fb","5e3b1ba3ba2e16df01000108","5e3d7fbb96e576de010000c2","5e3dac6bf33211df010001b9","5e41917e8491fadf0100010e","5e444f03df3c2af3010000d1","5e46ba6b8573d1de010000c6","5e53a128c02a31de01000160","5e55cc368061d1de01000089","5e5f4bb5fff7e8de01000007","5e6064e09119f0de01000002","5e6180f61d4ccede01000039","5e61c00fabaeaede010000b7","5e61ca42abaeaede01000116","5e61d3bc0c95fdde01000012","5e62680311ac5fde010000a7","5e62beeecb259cde010000c3","5e6594945dd5c8de0100003b","5e682bc3dfcfe3de01000316","5e693ad16964dedf01000179","72oGmS7b3stRzZXkW","8vbikqKEWgGnHTmGb","9BDTTNRk94S3bhskA","AeggDu2icY2XXKwR3","Byxh4o9Z23T9P3pMb","CzW52LigcXjFKHxDT","DKRDJyKsoxgtQ6N9k","DjyJP4wnnDTjP4M3Y","EJQkkaCQKrJvDykKk","EQBko8vjbwsAGdTvG","EnrA3NXuvegX3E6oC","Et46wYojnY3pXTyr6","GLoE2piskDMZvre39","GTq6u35ynQfwkRbyu","HBp5aMMiidNAsjyA7","HXpLZtw9J9xRxb7Gd","K6CCYxfRY9A8b2kPP","KzvEarqeLBzwgp7hk","PkskKxTDAiBT8T8GG","QqeHmAxgkB9rwymkn","SxZbjqsfRLZ578LCY","W29uDfXPrzLyegQaX","W3KENv4RLt598XuAh","WpJ4QoPiDGt7Daxtn","XCSmSJtJp6jSfYtp6","XKb7iToGC3zB9DDM3","YaqP3J4RHfku26brQ","a3HHBb7sqg5sb8B5W","apmdAndN2HA7gRBB7","b3SNsciSeudhEvynN","efuvw3p5ts9Hbvzdk","gBtfF9823uTbyDQHf","hWMnzs9jXJFYbjgkS","jGXGRo5tvcrr6qeRY","jQTcykihtd5DAjCbr","kXSf5mgpZaraN8gSv","m3ctWMDKg3y8NdyWS","mbpHgiMheu5RNTSLX","mppy8soatfZor5KbN","njCYcJPQBLEq24kux","oXQxpw98fQ2yrscSw","oiusWP9mcB5WhuQPH","pYTyQKZZNWNYik37p","q34ciE8updxX3hMHv","u5EQ8Fw8QKsrxpNoR","w6Jmx4MLyquTDe6xR","wBfqrgFcH7q27wRAQ","wPHj8cW9axeC9zPjn","x2CXFayjYsAQWYCCj","x53z9uLDsmgBxG4P4","y7p2o8s3ASjXGEmmj","yBuKZocLWuyhk8nYC","yPJT58zzE8E8tYLRJ"],"author_short":["Nie, B.","Xue, J.","Gupta, S.","Patel, T.","Engelmann, C.","Smirni, E.","Tiwari, D."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","author":[{"firstnames":["Bin"],"propositions":[],"lastnames":["Nie"],"suffixes":[]},{"firstnames":["Ji"],"propositions":[],"lastnames":["Xue"],"suffixes":[]},{"firstnames":["Saurabh"],"propositions":[],"lastnames":["Gupta"],"suffixes":[]},{"firstnames":["Tirthak"],"propositions":[],"lastnames":["Patel"],"suffixes":[]},{"firstnames":["Christian"],"propositions":[],"lastnames":["Engelmann"],"suffixes":[]},{"firstnames":["Evgenia"],"propositions":[],"lastnames":["Smirni"],"suffixes":[]},{"firstnames":["Devesh"],"propositions":[],"lastnames":["Tiwari"],"suffixes":[]}],"title":"Machine Learning Models for GPU Error Prediction in a Large Scale HPC System","booktitle":"48th Annual IEEE/IFIP International Conference on Dependable Systems and Networks, DSN 2018, Luxembourg City, Luxembourg, June 25-28, 2018","pages":"95–106","publisher":"IEEE Computer Society","year":"2018","url":"https://doi.org/10.1109/DSN.2018.00022","doi":"10.1109/DSN.2018.00022","timestamp":"Mon, 05 Feb 2024 00:00:00 +0100","biburl":"https://dblp.org/rec/conf/dsn/NieXGPEST18.bib","bibsource":"dblp computer science bibliography, https://dblp.org","bibtex":"@inproceedings{DBLP:conf/dsn/NieXGPEST18,\n author = {Bin Nie and\n Ji Xue and\n Saurabh Gupta and\n Tirthak Patel and\n Christian Engelmann and\n Evgenia Smirni and\n Devesh Tiwari},\n title = {Machine Learning Models for {GPU} Error Prediction in a Large Scale\n {HPC} System},\n booktitle = {48th Annual {IEEE/IFIP} International Conference on Dependable Systems\n and Networks, {DSN} 2018, Luxembourg City, Luxembourg, June 25-28,\n 2018},\n pages = {95--106},\n publisher = {{IEEE} Computer Society},\n year = {2018},\n url = {https://doi.org/10.1109/DSN.2018.00022},\n doi = {10.1109/DSN.2018.00022},\n timestamp = {Mon, 05 Feb 2024 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/dsn/NieXGPEST18.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n","author_short":["Nie, B.","Xue, J.","Gupta, S.","Patel, T.","Engelmann, C.","Smirni, E.","Tiwari, D."],"key":"DBLP:conf/dsn/NieXGPEST18","id":"DBLP:conf/dsn/NieXGPEST18","bibbaseid":"nie-xue-gupta-patel-engelmann-smirni-tiwari-machinelearningmodelsforgpuerrorpredictioninalargescalehpcsystem-2018","role":"author","urls":{"Paper":"https://doi.org/10.1109/DSN.2018.00022"},"metadata":{"authorlinks":{"patel, t":"https://bibbase.org/show?bib=https://dblp.org/pid/208/1839.bib","tiwari, d":"https://web.northeastern.edu/"}},"downloads":2},"bibtype":"inproceedings","biburl":"https://dblp.org/pid/208/1839.bib","creationDate":"2020-01-27T21:24:49.439Z","downloads":2,"keywords":[],"search_terms":["machine","learning","models","gpu","error","prediction","large","scale","hpc","system","nie","xue","gupta","patel","engelmann","smirni","tiwari"],"title":"Machine Learning Models for GPU Error Prediction in a Large Scale HPC System","year":2018,"dataSources":["GNKKSXvrD6ZcYybbR","WbzXEMXNWXWatjweM","F8XADc7Wv7MiLSAzj"]}