A Constant Time Algorithm for Estimating the Diversity of Large Chemical Libraries. Agrafiotis, D. K. J.~Chem.~Inf.~Comput.~Sci., 41:156--167, 2001. doi abstract bibtex We describe a novel diversity metric for use in the design of combinatorial chemistry and high-throughput screening experiments. The method estimates the cumulative probability distribution of intermolecular dissimilarities in the collection of interest and then measures the deviation of that distribution from the respective distribution of a uniform sample using the Kolmogorov-Smirnov statistic. The distinct advantage of this approach is that the cumulative distribution can be easily estimated using probability sampling and does not require exhaustive enumeration of all pairwise distances in the data set. The function is intuitive, very fast to compute, does not depend on the size of the collection, and can be used to perform diversity estimates on both global and local scale. More importantly, it allows meaningful comparison of data sets of different cardinality and is not affected by the curse of dimensionality, which plagues many other diversity indices. The advantages of this approach are demonstrated using examples from the combinatorial chemistry literature.
@article{Agrafiotis:2001aa,
Abstract = { We describe a novel diversity metric for use in the design of combinatorial
chemistry and high-throughput screening experiments. The method estimates
the cumulative probability distribution of intermolecular dissimilarities
in the collection of interest and then measures the deviation of
that distribution from the respective distribution of a uniform sample
using the Kolmogorov-Smirnov statistic. The distinct advantage of
this approach is that the cumulative distribution can be easily estimated
using probability sampling and does not require exhaustive enumeration
of all pairwise distances in the data set. The function is intuitive,
very fast to compute, does not depend on the size of the collection,
and can be used to perform diversity estimates on both global and
local scale. More importantly, it allows meaningful comparison of
data sets of different cardinality and is not affected by the curse
of dimensionality, which plagues many other diversity indices. The
advantages of this approach are demonstrated using examples from
the combinatorial chemistry literature. },
Author = {Agrafiotis, D. K.},
Date-Added = {2007-12-11 17:01:03 -0500},
Date-Modified = {2009-04-27 17:32:57 -0400},
Doi = {10.1021/ci000091j},
Journal = {J.~Chem.~Inf.~Comput.~Sci.},
Keywords = {diversity; ks; kolmogorov; smirnov; distribution},
Pages = {156--167},
Title = {A Constant Time Algorithm for Estimating the Diversity of Large Chemical Libraries},
Volume = {41},
Year = {2001},
Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAW4AAAAAAW4AAgAAA212IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMU5bQNIKwAAABCNbQ1jaTAwMDA5MWoucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEI3xxNMuPwAAAAAAAAAAAAIAAwAACSAAAAAAAAAAAAAAAAAAAAAIYXJ0aWNsZXMAEAAIAADFObNTAAAAEQAIAADE02Z/AAAAAQAQABCNbQAKTIAACkxpAAB8EwACADBtdiA6VXNlcnM6cmd1aGE6RG9jdW1lbnRzOmFydGljbGVzOmNpMDAwMDkxai5wZGYADgAcAA0AYwBpADAAMAAwADAAOQAxAGoALgBwAGQAZgAPAAgAAwBtAHYAIAASACxVc2Vycy9yZ3VoYS9Eb2N1bWVudHMvYXJ0aWNsZXMvY2kwMDAwOTFqLnBkZgATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAmLi4vLi4vRG9jdW1lbnRzL2FydGljbGVzL2NpMDAwMDkxai5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKACEgIUAhkCIgItAjECPwJGAk8CeAJ9AoACjQKSAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAqQ=}}
Downloads: 0
{"_id":"5oDdPhQ2meJghNwpR","bibbaseid":"agrafiotis-aconstanttimealgorithmforestimatingthediversityoflargechemicallibraries-2001","downloads":0,"creationDate":"2016-02-18T13:03:35.807Z","title":"A Constant Time Algorithm for Estimating the Diversity of Large Chemical Libraries","author_short":["Agrafiotis, D. K."],"year":2001,"bibtype":"article","biburl":"https://dl.dropboxusercontent.com/u/26998770/main.bib","bibdata":{"bibtype":"article","type":"article","abstract":"We describe a novel diversity metric for use in the design of combinatorial chemistry and high-throughput screening experiments. The method estimates the cumulative probability distribution of intermolecular dissimilarities in the collection of interest and then measures the deviation of that distribution from the respective distribution of a uniform sample using the Kolmogorov-Smirnov statistic. The distinct advantage of this approach is that the cumulative distribution can be easily estimated using probability sampling and does not require exhaustive enumeration of all pairwise distances in the data set. The function is intuitive, very fast to compute, does not depend on the size of the collection, and can be used to perform diversity estimates on both global and local scale. More importantly, it allows meaningful comparison of data sets of different cardinality and is not affected by the curse of dimensionality, which plagues many other diversity indices. The advantages of this approach are demonstrated using examples from the combinatorial chemistry literature. ","author":[{"propositions":[],"lastnames":["Agrafiotis"],"firstnames":["D.","K."],"suffixes":[]}],"date-added":"2007-12-11 17:01:03 -0500","date-modified":"2009-04-27 17:32:57 -0400","doi":"10.1021/ci000091j","journal":"J.~Chem.~Inf.~Comput.~Sci.","keywords":"diversity; ks; kolmogorov; smirnov; distribution","pages":"156--167","title":"A Constant Time Algorithm for Estimating the Diversity of Large Chemical Libraries","volume":"41","year":"2001","bdsk-file-1":"YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAW4AAAAAAW4AAgAAA212IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMU5bQNIKwAAABCNbQ1jaTAwMDA5MWoucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEI3xxNMuPwAAAAAAAAAAAAIAAwAACSAAAAAAAAAAAAAAAAAAAAAIYXJ0aWNsZXMAEAAIAADFObNTAAAAEQAIAADE02Z/AAAAAQAQABCNbQAKTIAACkxpAAB8EwACADBtdiA6VXNlcnM6cmd1aGE6RG9jdW1lbnRzOmFydGljbGVzOmNpMDAwMDkxai5wZGYADgAcAA0AYwBpADAAMAAwADAAOQAxAGoALgBwAGQAZgAPAAgAAwBtAHYAIAASACxVc2Vycy9yZ3VoYS9Eb2N1bWVudHMvYXJ0aWNsZXMvY2kwMDAwOTFqLnBkZgATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAmLi4vLi4vRG9jdW1lbnRzL2FydGljbGVzL2NpMDAwMDkxai5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKACEgIUAhkCIgItAjECPwJGAk8CeAJ9AoACjQKSAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAqQ=","bibtex":"@article{Agrafiotis:2001aa,\n\tAbstract = { We describe a novel diversity metric for use in the design of combinatorial\n\tchemistry and high-throughput screening experiments. The method estimates\n\tthe cumulative probability distribution of intermolecular dissimilarities\n\tin the collection of interest and then measures the deviation of\n\tthat distribution from the respective distribution of a uniform sample\n\tusing the Kolmogorov-Smirnov statistic. The distinct advantage of\n\tthis approach is that the cumulative distribution can be easily estimated\n\tusing probability sampling and does not require exhaustive enumeration\n\tof all pairwise distances in the data set. The function is intuitive,\n\tvery fast to compute, does not depend on the size of the collection,\n\tand can be used to perform diversity estimates on both global and\n\tlocal scale. More importantly, it allows meaningful comparison of\n\tdata sets of different cardinality and is not affected by the curse\n\tof dimensionality, which plagues many other diversity indices. The\n\tadvantages of this approach are demonstrated using examples from\n\tthe combinatorial chemistry literature. },\n\tAuthor = {Agrafiotis, D. K.},\n\tDate-Added = {2007-12-11 17:01:03 -0500},\n\tDate-Modified = {2009-04-27 17:32:57 -0400},\n\tDoi = {10.1021/ci000091j},\n\tJournal = {J.~Chem.~Inf.~Comput.~Sci.},\n\tKeywords = {diversity; ks; kolmogorov; smirnov; distribution},\n\tPages = {156--167},\n\tTitle = {A Constant Time Algorithm for Estimating the Diversity of Large Chemical Libraries},\n\tVolume = {41},\n\tYear = {2001},\n\tBdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RElpOUy5vYmplY3RzViRjbGFzc1dOUy5rZXlzog8QgASABoAHohMUgAKAA1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgMGRpXTlMuZGF0YU8RAW4AAAAAAW4AAgAAA212IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMU5bQNIKwAAABCNbQ1jaTAwMDA5MWoucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEI3xxNMuPwAAAAAAAAAAAAIAAwAACSAAAAAAAAAAAAAAAAAAAAAIYXJ0aWNsZXMAEAAIAADFObNTAAAAEQAIAADE02Z/AAAAAQAQABCNbQAKTIAACkxpAAB8EwACADBtdiA6VXNlcnM6cmd1aGE6RG9jdW1lbnRzOmFydGljbGVzOmNpMDAwMDkxai5wZGYADgAcAA0AYwBpADAAMAAwADAAOQAxAGoALgBwAGQAZgAPAAgAAwBtAHYAIAASACxVc2Vycy9yZ3VoYS9Eb2N1bWVudHMvYXJ0aWNsZXMvY2kwMDAwOTFqLnBkZgATAAEvAAAVAAIADP//AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAmLi4vLi4vRG9jdW1lbnRzL2FydGljbGVzL2NpMDAwMDkxai5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGQAbABvAHEAcwB1AHgAegB8AIYAkwCYAKACEgIUAhkCIgItAjECPwJGAk8CeAJ9AoACjQKSAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAqQ=}}\n\n","author_short":["Agrafiotis, D. K."],"key":"Agrafiotis:2001aa","id":"Agrafiotis:2001aa","bibbaseid":"agrafiotis-aconstanttimealgorithmforestimatingthediversityoflargechemicallibraries-2001","role":"author","urls":{},"keyword":["diversity; ks; kolmogorov; smirnov; distribution"],"downloads":0},"search_terms":["constant","time","algorithm","estimating","diversity","large","chemical","libraries","agrafiotis"],"keywords":["diversity; ks; kolmogorov; smirnov; distribution"],"authorIDs":[],"dataSources":["c5japf9eAQRaeMS4h"]}