XGBoost: A Scalable Tree Boosting System. Chen, T. & Guestrin, C. In Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pages 785–794, August, 2016. arXiv:1603.02754 [cs]
Paper doi abstract bibtex Tree boosting is a highly effective and widely used machine learning method. In this paper, we describe a scalable end-to-end tree boosting system called XGBoost, which is used widely by data scientists to achieve state-of-the-art results on many machine learning challenges. We propose a novel sparsity-aware algorithm for sparse data and weighted quantile sketch for approximate tree learning. More importantly, we provide insights on cache access patterns, data compression and sharding to build a scalable tree boosting system. By combining these insights, XGBoost scales beyond billions of examples using far fewer resources than existing systems.
@inproceedings{chenXGBoostScalableTree2016,
title = {{XGBoost}: {A} {Scalable} {Tree} {Boosting} {System}},
shorttitle = {{XGBoost}},
url = {http://arxiv.org/abs/1603.02754},
doi = {10.1145/2939672.2939785},
abstract = {Tree boosting is a highly effective and widely used machine learning method. In this paper, we describe a scalable end-to-end tree boosting system called XGBoost, which is used widely by data scientists to achieve state-of-the-art results on many machine learning challenges. We propose a novel sparsity-aware algorithm for sparse data and weighted quantile sketch for approximate tree learning. More importantly, we provide insights on cache access patterns, data compression and sharding to build a scalable tree boosting system. By combining these insights, XGBoost scales beyond billions of examples using far fewer resources than existing systems.},
urldate = {2025-04-03},
booktitle = {Proceedings of the 22nd {ACM} {SIGKDD} {International} {Conference} on {Knowledge} {Discovery} and {Data} {Mining}},
author = {Chen, Tianqi and Guestrin, Carlos},
month = aug,
year = {2016},
note = {arXiv:1603.02754 [cs]},
keywords = {Computer Science - Machine Learning},
pages = {785--794},
annote = {Comment: KDD'16 changed all figures to type1},
file = {Preprint PDF:/Users/tyuan/Zotero/storage/7GBRDQI2/Chen and Guestrin - 2016 - XGBoost A Scalable Tree Boosting System.pdf:application/pdf;Snapshot:/Users/tyuan/Zotero/storage/LBCM5Y7E/1603.html:text/html},
}
Downloads: 0
{"_id":"6QP4KLmtx4ntWW8ZT","bibbaseid":"chen-guestrin-xgboostascalabletreeboostingsystem-2016","author_short":["Chen, T.","Guestrin, C."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","title":"XGBoost: A Scalable Tree Boosting System","shorttitle":"XGBoost","url":"http://arxiv.org/abs/1603.02754","doi":"10.1145/2939672.2939785","abstract":"Tree boosting is a highly effective and widely used machine learning method. In this paper, we describe a scalable end-to-end tree boosting system called XGBoost, which is used widely by data scientists to achieve state-of-the-art results on many machine learning challenges. We propose a novel sparsity-aware algorithm for sparse data and weighted quantile sketch for approximate tree learning. More importantly, we provide insights on cache access patterns, data compression and sharding to build a scalable tree boosting system. By combining these insights, XGBoost scales beyond billions of examples using far fewer resources than existing systems.","urldate":"2025-04-03","booktitle":"Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","author":[{"propositions":[],"lastnames":["Chen"],"firstnames":["Tianqi"],"suffixes":[]},{"propositions":[],"lastnames":["Guestrin"],"firstnames":["Carlos"],"suffixes":[]}],"month":"August","year":"2016","note":"arXiv:1603.02754 [cs]","keywords":"Computer Science - Machine Learning","pages":"785–794","annote":"Comment: KDD'16 changed all figures to type1","file":"Preprint PDF:/Users/tyuan/Zotero/storage/7GBRDQI2/Chen and Guestrin - 2016 - XGBoost A Scalable Tree Boosting System.pdf:application/pdf;Snapshot:/Users/tyuan/Zotero/storage/LBCM5Y7E/1603.html:text/html","bibtex":"@inproceedings{chenXGBoostScalableTree2016,\n\ttitle = {{XGBoost}: {A} {Scalable} {Tree} {Boosting} {System}},\n\tshorttitle = {{XGBoost}},\n\turl = {http://arxiv.org/abs/1603.02754},\n\tdoi = {10.1145/2939672.2939785},\n\tabstract = {Tree boosting is a highly effective and widely used machine learning method. In this paper, we describe a scalable end-to-end tree boosting system called XGBoost, which is used widely by data scientists to achieve state-of-the-art results on many machine learning challenges. We propose a novel sparsity-aware algorithm for sparse data and weighted quantile sketch for approximate tree learning. More importantly, we provide insights on cache access patterns, data compression and sharding to build a scalable tree boosting system. By combining these insights, XGBoost scales beyond billions of examples using far fewer resources than existing systems.},\n\turldate = {2025-04-03},\n\tbooktitle = {Proceedings of the 22nd {ACM} {SIGKDD} {International} {Conference} on {Knowledge} {Discovery} and {Data} {Mining}},\n\tauthor = {Chen, Tianqi and Guestrin, Carlos},\n\tmonth = aug,\n\tyear = {2016},\n\tnote = {arXiv:1603.02754 [cs]},\n\tkeywords = {Computer Science - Machine Learning},\n\tpages = {785--794},\n\tannote = {Comment: KDD'16 changed all figures to type1},\n\tfile = {Preprint PDF:/Users/tyuan/Zotero/storage/7GBRDQI2/Chen and Guestrin - 2016 - XGBoost A Scalable Tree Boosting System.pdf:application/pdf;Snapshot:/Users/tyuan/Zotero/storage/LBCM5Y7E/1603.html:text/html},\n}\n\n","author_short":["Chen, T.","Guestrin, C."],"key":"chenXGBoostScalableTree2016","id":"chenXGBoostScalableTree2016","bibbaseid":"chen-guestrin-xgboostascalabletreeboostingsystem-2016","role":"author","urls":{"Paper":"http://arxiv.org/abs/1603.02754"},"keyword":["Computer Science - Machine Learning"],"metadata":{"authorlinks":{}},"html":""},"bibtype":"inproceedings","biburl":"https://metatl.github.io/yuan/bib/publications.bib","dataSources":["iwKepCrWBps7ojhDx","cx4WvnDhXJhiLqdQo","3yGioK9jzJGERGht9"],"keywords":["computer science - machine learning"],"search_terms":["xgboost","scalable","tree","boosting","system","chen","guestrin"],"title":"XGBoost: A Scalable Tree Boosting System","year":2016}