An Efficient Implementation of Distance-Based Diversity Measures Based on $k$-d Trees. Agrafiotis, D. K. & Lobanov, V. S. J.~Chem.~Inf.~Comput.~Sci., 39:51--58, 1999. doi abstract bibtex The problem of quantifying molecular diversity continues to attract significant interest among computational chemists. Most algorithms reported to date are distance-based and scale to the square of the size of the data set. This paper reports an alternative algorithm based on k-dimensional (or k-d) trees. k-d trees are combinatorial data structures that allow expedient location of nearest neighbors in multivariate spaces. Nearest neighbor detection forms the basis of many popular diversity measures, such as maximin, minimum spanning trees, and many others. In this report, we demonstrate that k-d trees exhibit excellent scaling characteristics and can be used to accelerate diversity estimation without compromising the quality of the design. The advantages of this approach are contrasted with an alternative algorithm that was recently proposed by Turner et al. based on the cosine similarity coefficient.
@article{Agrafiotis:1999aa,
Abstract = { The problem of quantifying molecular diversity continues to attract
significant interest among computational chemists. Most algorithms
reported to date are distance-based and scale to the square of the
size of the data set. This paper reports an alternative algorithm
based on k-dimensional (or k-d) trees. k-d trees are combinatorial
data structures that allow expedient location of nearest neighbors
in multivariate spaces. Nearest neighbor detection forms the basis
of many popular diversity measures, such as maximin, minimum spanning
trees, and many others. In this report, we demonstrate that k-d trees
exhibit excellent scaling characteristics and can be used to accelerate
diversity estimation without compromising the quality of the design.
The advantages of this approach are contrasted with an alternative
algorithm that was recently proposed by Turner et al. based on the
cosine similarity coefficient. },
Author = {Agrafiotis, D. K. and Lobanov, V. S.},
Date-Added = {2007-12-11 17:01:03 -0500},
Date-Modified = {2008-08-21 14:27:42 -0400},
Doi = {10.1021/ci980100c},
Journal = {J.~Chem.~Inf.~Comput.~Sci.},
Keywords = {diversity analysis; kd tree; spatial index},
Pages = {51--58},
Title = {An Efficient Implementation of Distance-Based Diversity Measures Based on $k$-d Trees},
Volume = {39},
Year = {1999},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAW4AAAAAAW4AAgAAA212IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMU5bQNIKwAAABCNbQ1jaTk4MDEwMGMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEI6UxNMr5AAAAAAAAAAAAAIAAwAACSAAAAAAAAAAAAAAAAAAAAAIYXJ0aWNsZXMAEAAIAADFObNTAAAAEQAIAADE02QkAAAAAQAQABCNbQAKTIAACkxpAAB8EwACADBtdiA6VXNlcnM6cmd1aGE6RG9jdW1lbnRzOmFydGljbGVzOmNpOTgwMTAwYy5wZGYADgAcAA0AYwBpADkAOAAwADEAMAAwAGMALgBwAGQAZgAPAAgAAwBtAHYAIAASACxVc2Vycy9yZ3VoYS9Eb2N1bWVudHMvYXJ0aWNsZXMvY2k5ODAxMDBjLnBkZgATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAmLi4vLi4vRG9jdW1lbnRzL2FydGljbGVzL2NpOTgwMTAwYy5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKACEgIUAhkCIgItAjECPwJGAk8CeAJ9AoACjQKSAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAqQ=}}
Downloads: 0
{"_id":"64ka4JkWKYP4WwALm","bibbaseid":"agrafiotis-lobanov-anefficientimplementationofdistancebaseddiversitymeasuresbasedonkdtrees-1999","downloads":0,"creationDate":"2016-02-18T13:03:35.812Z","title":"An Efficient Implementation of Distance-Based Diversity Measures Based on $k$-d Trees","author_short":["Agrafiotis, D. K.","Lobanov, V. S."],"year":1999,"bibtype":"article","biburl":"https://dl.dropboxusercontent.com/u/26998770/main.bib","bibdata":{"bibtype":"article","type":"article","abstract":"The problem of quantifying molecular diversity continues to attract significant interest among computational chemists. Most algorithms reported to date are distance-based and scale to the square of the size of the data set. This paper reports an alternative algorithm based on k-dimensional (or k-d) trees. k-d trees are combinatorial data structures that allow expedient location of nearest neighbors in multivariate spaces. Nearest neighbor detection forms the basis of many popular diversity measures, such as maximin, minimum spanning trees, and many others. In this report, we demonstrate that k-d trees exhibit excellent scaling characteristics and can be used to accelerate diversity estimation without compromising the quality of the design. The advantages of this approach are contrasted with an alternative algorithm that was recently proposed by Turner et al. based on the cosine similarity coefficient. ","author":[{"propositions":[],"lastnames":["Agrafiotis"],"firstnames":["D.","K."],"suffixes":[]},{"propositions":[],"lastnames":["Lobanov"],"firstnames":["V.","S."],"suffixes":[]}],"date-added":"2007-12-11 17:01:03 -0500","date-modified":"2008-08-21 14:27:42 -0400","doi":"10.1021/ci980100c","journal":"J.~Chem.~Inf.~Comput.~Sci.","keywords":"diversity analysis; kd tree; spatial index","pages":"51--58","title":"An Efficient Implementation of Distance-Based Diversity Measures Based on $k$-d Trees","volume":"39","year":"1999","bdsk-file-1":"YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAW4AAAAAAW4AAgAAA212IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMU5bQNIKwAAABCNbQ1jaTk4MDEwMGMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEI6UxNMr5AAAAAAAAAAAAAIAAwAACSAAAAAAAAAAAAAAAAAAAAAIYXJ0aWNsZXMAEAAIAADFObNTAAAAEQAIAADE02QkAAAAAQAQABCNbQAKTIAACkxpAAB8EwACADBtdiA6VXNlcnM6cmd1aGE6RG9jdW1lbnRzOmFydGljbGVzOmNpOTgwMTAwYy5wZGYADgAcAA0AYwBpADkAOAAwADEAMAAwAGMALgBwAGQAZgAPAAgAAwBtAHYAIAASACxVc2Vycy9yZ3VoYS9Eb2N1bWVudHMvYXJ0aWNsZXMvY2k5ODAxMDBjLnBkZgATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAmLi4vLi4vRG9jdW1lbnRzL2FydGljbGVzL2NpOTgwMTAwYy5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKACEgIUAhkCIgItAjECPwJGAk8CeAJ9AoACjQKSAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAqQ=","bibtex":"@article{Agrafiotis:1999aa,\n\tAbstract = { The problem of quantifying molecular diversity continues to attract\n\tsignificant interest among computational chemists. Most algorithms\n\treported to date are distance-based and scale to the square of the\n\tsize of the data set. This paper reports an alternative algorithm\n\tbased on k-dimensional (or k-d) trees. k-d trees are combinatorial\n\tdata structures that allow expedient location of nearest neighbors\n\tin multivariate spaces. Nearest neighbor detection forms the basis\n\tof many popular diversity measures, such as maximin, minimum spanning\n\ttrees, and many others. In this report, we demonstrate that k-d trees\n\texhibit excellent scaling characteristics and can be used to accelerate\n\tdiversity estimation without compromising the quality of the design.\n\tThe advantages of this approach are contrasted with an alternative\n\talgorithm that was recently proposed by Turner et al. based on the\n\tcosine similarity coefficient. },\n\tAuthor = {Agrafiotis, D. K. and Lobanov, V. S.},\n\tDate-Added = {2007-12-11 17:01:03 -0500},\n\tDate-Modified = {2008-08-21 14:27:42 -0400},\n\tDoi = {10.1021/ci980100c},\n\tJournal = {J.~Chem.~Inf.~Comput.~Sci.},\n\tKeywords = {diversity analysis; kd tree; spatial index},\n\tPages = {51--58},\n\tTitle = {An Efficient Implementation of Distance-Based Diversity Measures Based on $k$-d Trees},\n\tVolume = {39},\n\tYear = {1999},\n\tBdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAW4AAAAAAW4AAgAAA212IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMU5bQNIKwAAABCNbQ1jaTk4MDEwMGMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEI6UxNMr5AAAAAAAAAAAAAIAAwAACSAAAAAAAAAAAAAAAAAAAAAIYXJ0aWNsZXMAEAAIAADFObNTAAAAEQAIAADE02QkAAAAAQAQABCNbQAKTIAACkxpAAB8EwACADBtdiA6VXNlcnM6cmd1aGE6RG9jdW1lbnRzOmFydGljbGVzOmNpOTgwMTAwYy5wZGYADgAcAA0AYwBpADkAOAAwADEAMAAwAGMALgBwAGQAZgAPAAgAAwBtAHYAIAASACxVc2Vycy9yZ3VoYS9Eb2N1bWVudHMvYXJ0aWNsZXMvY2k5ODAxMDBjLnBkZgATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAmLi4vLi4vRG9jdW1lbnRzL2FydGljbGVzL2NpOTgwMTAwYy5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKACEgIUAhkCIgItAjECPwJGAk8CeAJ9AoACjQKSAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAqQ=}}\n\n","author_short":["Agrafiotis, D. K.","Lobanov, V. S."],"key":"Agrafiotis:1999aa","id":"Agrafiotis:1999aa","bibbaseid":"agrafiotis-lobanov-anefficientimplementationofdistancebaseddiversitymeasuresbasedonkdtrees-1999","role":"author","urls":{},"keyword":["diversity analysis; kd tree; spatial index"],"downloads":0},"search_terms":["efficient","implementation","distance","based","diversity","measures","based","trees","agrafiotis","lobanov"],"keywords":["diversity analysis; kd tree; spatial index"],"authorIDs":[],"dataSources":["c5japf9eAQRaeMS4h"]}