Bayes Point Machines

Bayes Point Machines. Herbrich, R., Graepel, T., & Campbell, C. Journal of Machine Learning Research, 1:245--279, 2001.

Paper abstract bibtex 2 downloads

Kernel-classifiers comprise a powerful class of non-linear decision functions for binary classification. The support vector machine is an example of a learning algorithm for kernel classifiers that singles out the consistent classifier with the largest margin, i.e. minimal real-valued output on the training sample, within the set of consistent hypotheses, the so-called version space. We suggest the Bayes point machine as a well-founded improvement which approximates the Bayes-optimal decision by the centre of mass of version space. We present two algorithms to stochastically approximate the centre of mass of version space: a billiard sampling algorithm and a sampling algorithm based on the well known perceptron algorithm. It is shown how both algorithms can be extended to allow for soft-boundaries in order to admit training errors. Experimentally, we find that — for the zero training error case — Bayes point machines consistently outperform support vector machines on both surrogate data and real-world benchmark data sets. In the soft-boundary/soft-margin case, the improvement over support vector machines is shown to be reduced. Finally, we demonstrate that the real-valued output of single Bayes points on novel test points is a valid confidence measure and leads to a steady decrease in generalisation error when used as a rejection criterion.

@article{DBLP:journals/jmlr/HerbrichGC01,
abstract = {Kernel-classifiers comprise a powerful class of non-linear decision functions for binary classification. The support vector machine is an example of a learning algorithm for kernel classifiers that singles out the consistent classifier with the largest margin, i.e. minimal real-valued output on the training sample, within the set of consistent hypotheses, the so-called version space. We suggest the Bayes point machine as a well-founded improvement which approximates the Bayes-optimal decision by the centre of mass of version space. We present two algorithms to stochastically approximate the centre of mass of version space: a billiard sampling algorithm and a sampling algorithm based on the well known perceptron algorithm. It is shown how both algorithms can be extended to allow for soft-boundaries in order to admit training errors. Experimentally, we find that — for the zero training error case — Bayes point machines consistently outperform support vector machines on both surrogate data and real-world benchmark data sets. In the soft-boundary/soft-margin case, the improvement over support vector machines is shown to be reduced. Finally, we demonstrate that the real-valued output of single Bayes points on novel test points is a valid confidence measure and leads to a steady decrease in generalisation error when used as a rejection criterion.},
author = {Herbrich, Ralf and Graepel, Thore and Campbell, Colin},
file = {:Users/rherb/Code/herbrich.me/papers/bpm.pdf:pdf},
journal = {Journal of Machine Learning Research},
pages = {245--279},
title = {{Bayes Point Machines}},
url = {http://www.herbrich.me/papers/bpm.pdf},
volume = {1},
year = {2001}
}

Downloads: 2

{"_id":{"_str":"53421b61ecd21cdc070003fb"},"__v":8,"authorIDs":["5456e9a38b01c8193000005e","54576c282abc8e9f370003ae"],"author_short":["Herbrich, R.","Graepel, T.","Campbell, C."],"bibbaseid":"herbrich-graepel-campbell-bayespointmachines-2001","bibdata":{"bibtype":"article","type":"article","abstract":"Kernel-classifiers comprise a powerful class of non-linear decision functions for binary classification. The support vector machine is an example of a learning algorithm for kernel classifiers that singles out the consistent classifier with the largest margin, i.e. minimal real-valued output on the training sample, within the set of consistent hypotheses, the so-called version space. We suggest the Bayes point machine as a well-founded improvement which approximates the Bayes-optimal decision by the centre of mass of version space. We present two algorithms to stochastically approximate the centre of mass of version space: a billiard sampling algorithm and a sampling algorithm based on the well known perceptron algorithm. It is shown how both algorithms can be extended to allow for soft-boundaries in order to admit training errors. Experimentally, we find that — for the zero training error case — Bayes point machines consistently outperform support vector machines on both surrogate data and real-world benchmark data sets. In the soft-boundary/soft-margin case, the improvement over support vector machines is shown to be reduced. Finally, we demonstrate that the real-valued output of single Bayes points on novel test points is a valid confidence measure and leads to a steady decrease in generalisation error when used as a rejection criterion.","author":[{"propositions":[],"lastnames":["Herbrich"],"firstnames":["Ralf"],"suffixes":[]},{"propositions":[],"lastnames":["Graepel"],"firstnames":["Thore"],"suffixes":[]},{"propositions":[],"lastnames":["Campbell"],"firstnames":["Colin"],"suffixes":[]}],"file":":Users/rherb/Code/herbrich.me/papers/bpm.pdf:pdf","journal":"Journal of Machine Learning Research","pages":"245--279","title":"Bayes Point Machines","url":"http://www.herbrich.me/papers/bpm.pdf","volume":"1","year":"2001","bibtex":"@article{DBLP:journals/jmlr/HerbrichGC01,\nabstract = {Kernel-classifiers comprise a powerful class of non-linear decision functions for binary classification. The support vector machine is an example of a learning algorithm for kernel classifiers that singles out the consistent classifier with the largest margin, i.e. minimal real-valued output on the training sample, within the set of consistent hypotheses, the so-called version space. We suggest the Bayes point machine as a well-founded improvement which approximates the Bayes-optimal decision by the centre of mass of version space. We present two algorithms to stochastically approximate the centre of mass of version space: a billiard sampling algorithm and a sampling algorithm based on the well known perceptron algorithm. It is shown how both algorithms can be extended to allow for soft-boundaries in order to admit training errors. Experimentally, we find that — for the zero training error case — Bayes point machines consistently outperform support vector machines on both surrogate data and real-world benchmark data sets. In the soft-boundary/soft-margin case, the improvement over support vector machines is shown to be reduced. Finally, we demonstrate that the real-valued output of single Bayes points on novel test points is a valid confidence measure and leads to a steady decrease in generalisation error when used as a rejection criterion.},\nauthor = {Herbrich, Ralf and Graepel, Thore and Campbell, Colin},\nfile = {:Users/rherb/Code/herbrich.me/papers/bpm.pdf:pdf},\njournal = {Journal of Machine Learning Research},\npages = {245--279},\ntitle = {{Bayes Point Machines}},\nurl = {http://www.herbrich.me/papers/bpm.pdf},\nvolume = {1},\nyear = {2001}\n}\n","author_short":["Herbrich, R.","Graepel, T.","Campbell, C."],"key":"DBLP:journals/jmlr/HerbrichGC01","id":"DBLP:journals/jmlr/HerbrichGC01","bibbaseid":"herbrich-graepel-campbell-bayespointmachines-2001","role":"author","urls":{"Paper":"http://www.herbrich.me/papers/bpm.pdf"},"downloads":2,"html":""},"bibtype":"article","biburl":"http://herbrich.me/bib/herbrich.bib","downloads":2,"keywords":[],"search_terms":["bayes","point","machines","herbrich","graepel","campbell"],"title":"Bayes Point Machines","year":2001,"dataSources":["y2DvMgAcqeDpXQ6ds"]}