Data mining applied to linkage disequilibrium mapping

Data mining applied to linkage disequilibrium mapping. Toivonen, H., T., Onkamo, P., Vasko, K., Ollikainen, V., Sevon, P., Mannila, H., Herr, M., & Kere, J. Am J Hum Genet, 67(1):133-45., 2000.
abstract bibtex

We introduce a new method for linkage disequilibrium mapping: haplotype pattern mining (HPM). The method, inspired by data mining methods, is based on discovery of recurrent patterns. We define a class of useful haplotype patterns in genetic case-control data and use the algorithm for finding disease-associated haplotypes. The haplotypes are ordered by their strength of association with the phenotype, and all haplotypes exceeding a given threshold level are used for prediction of disease susceptibility-gene location. The method is model-free, in the sense that it does not require (and is unable to utilize) any assumptions about the inheritance model of the disease. The statistical model is nonparametric. The haplotypes are allowed to contain gaps, which improves the method's robustness to mutations and to missing and erroneous data. Experimental studies with simulated microsatellite and SNP data show that the method has good localization power in data sets with large degrees of phenocopies and with lots of missing and erroneous data. The power of HPM is roughly identical for marker maps at a density of 3 single-nucleotide polymorphisms/cM or 1 microsatellite/cM. The capacity to handle high proportions of phenocopies makes the method promising for complex disease mapping. An example of correct disease susceptibility-gene localization with HPM is given with real marker data from families from the United Kingdom affected by type 1 diabetes. The method is extendable to include environmental covariates or phenotype measurements or to find several genes simultaneously.

@article{
 title = {Data mining applied to linkage disequilibrium mapping},
 type = {article},
 year = {2000},
 identifiers = {[object Object]},
 keywords = {Adolescence,Adult,Aged,Aged, 80 and over,Algorithms,Alleles,Child,Child, Preschool,Chromosome Mapping/*methods/statistics & numerical,Computer Simulation,Diabetes Mellitus, Insulin-Dependent/genetics,Female,Founder Effect,Genes, Dominant/genetics,Genetic Predisposition to Disease/genetics,Great Britain,HLA Antigens/genetics,Haplotypes/*genetics,Human,Infant,Linkage Disequilibrium/*genetics,Male,Microsatellite Repeats/genetics,Middle Age,Models, Genetic,Mutation/genetics,Phenotype,Polymorphism, Single Nucleotide/genetics,Statistics, Nonparametric,Support, Non-U.S. Gov't},
 pages = {133-45.},
 volume = {67},
 id = {f0c72e95-0269-3a20-8f92-79ced23957fb},
 created = {2017-06-19T13:43:38.237Z},
 file_attached = {false},
 profile_id = {de68dde1-2ff3-3a4e-a214-ef424d0c7646},
 group_id = {b2078731-0913-33b9-8902-a53629a24e83},
 last_modified = {2017-06-19T13:43:38.346Z},
 tags = {01/11/30},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 source_type = {Journal Article},
 notes = {<m:note>eng<m:linebreak/>Journal Article</m:note>},
 abstract = {We introduce a new method for linkage disequilibrium mapping: haplotype pattern mining (HPM). The method, inspired by data mining methods, is based on discovery of recurrent patterns. We define a class of useful haplotype patterns in genetic case-control data and use the algorithm for finding disease-associated haplotypes. The haplotypes are ordered by their strength of association with the phenotype, and all haplotypes exceeding a given threshold level are used for prediction of disease susceptibility-gene location. The method is model-free, in the sense that it does not require (and is unable to utilize) any assumptions about the inheritance model of the disease. The statistical model is nonparametric. The haplotypes are allowed to contain gaps, which improves the method's robustness to mutations and to missing and erroneous data. Experimental studies with simulated microsatellite and SNP data show that the method has good localization power in data sets with large degrees of phenocopies and with lots of missing and erroneous data. The power of HPM is roughly identical for marker maps at a density of 3 single-nucleotide polymorphisms/cM or 1 microsatellite/cM. The capacity to handle high proportions of phenocopies makes the method promising for complex disease mapping. An example of correct disease susceptibility-gene localization with HPM is given with real marker data from families from the United Kingdom affected by type 1 diabetes. The method is extendable to include environmental covariates or phenotype measurements or to find several genes simultaneously.},
 bibtype = {article},
 author = {Toivonen, H T and Onkamo, P and Vasko, K and Ollikainen, V and Sevon, P and Mannila, H and Herr, M and Kere, J},
 journal = {Am J Hum Genet},
 number = {1}
}

Downloads: 0

{"_id":"7ktEHQ5acLjkGbHDx","bibbaseid":"toivonen-onkamo-vasko-ollikainen-sevon-mannila-herr-kere-dataminingappliedtolinkagedisequilibriummapping-2000","downloads":0,"creationDate":"2017-06-19T14:46:34.253Z","title":"Data mining applied to linkage disequilibrium mapping","author_short":["Toivonen, H., T.","Onkamo, P.","Vasko, K.","Ollikainen, V.","Sevon, P.","Mannila, H.","Herr, M.","Kere, J."],"year":2000,"bibtype":"article","biburl":null,"bibdata":{"title":"Data mining applied to linkage disequilibrium mapping","type":"article","year":"2000","identifiers":"[object Object]","keywords":"Adolescence,Adult,Aged,Aged, 80 and over,Algorithms,Alleles,Child,Child, Preschool,Chromosome Mapping/*methods/statistics & numerical,Computer Simulation,Diabetes Mellitus, Insulin-Dependent/genetics,Female,Founder Effect,Genes, Dominant/genetics,Genetic Predisposition to Disease/genetics,Great Britain,HLA Antigens/genetics,Haplotypes/*genetics,Human,Infant,Linkage Disequilibrium/*genetics,Male,Microsatellite Repeats/genetics,Middle Age,Models, Genetic,Mutation/genetics,Phenotype,Polymorphism, Single Nucleotide/genetics,Statistics, Nonparametric,Support, Non-U.S. Gov't","pages":"133-45.","volume":"67","id":"f0c72e95-0269-3a20-8f92-79ced23957fb","created":"2017-06-19T13:43:38.237Z","file_attached":false,"profile_id":"de68dde1-2ff3-3a4e-a214-ef424d0c7646","group_id":"b2078731-0913-33b9-8902-a53629a24e83","last_modified":"2017-06-19T13:43:38.346Z","tags":"01/11/30","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"source_type":"Journal Article","notes":"<m:note>eng<m:linebreak/>Journal Article</m:note>","abstract":"We introduce a new method for linkage disequilibrium mapping: haplotype pattern mining (HPM). The method, inspired by data mining methods, is based on discovery of recurrent patterns. We define a class of useful haplotype patterns in genetic case-control data and use the algorithm for finding disease-associated haplotypes. The haplotypes are ordered by their strength of association with the phenotype, and all haplotypes exceeding a given threshold level are used for prediction of disease susceptibility-gene location. The method is model-free, in the sense that it does not require (and is unable to utilize) any assumptions about the inheritance model of the disease. The statistical model is nonparametric. The haplotypes are allowed to contain gaps, which improves the method's robustness to mutations and to missing and erroneous data. Experimental studies with simulated microsatellite and SNP data show that the method has good localization power in data sets with large degrees of phenocopies and with lots of missing and erroneous data. The power of HPM is roughly identical for marker maps at a density of 3 single-nucleotide polymorphisms/cM or 1 microsatellite/cM. The capacity to handle high proportions of phenocopies makes the method promising for complex disease mapping. An example of correct disease susceptibility-gene localization with HPM is given with real marker data from families from the United Kingdom affected by type 1 diabetes. The method is extendable to include environmental covariates or phenotype measurements or to find several genes simultaneously.","bibtype":"article","author":"Toivonen, H T and Onkamo, P and Vasko, K and Ollikainen, V and Sevon, P and Mannila, H and Herr, M and Kere, J","journal":"Am J Hum Genet","number":"1","bibtex":"@article{\n title = {Data mining applied to linkage disequilibrium mapping},\n type = {article},\n year = {2000},\n identifiers = {[object Object]},\n keywords = {Adolescence,Adult,Aged,Aged, 80 and over,Algorithms,Alleles,Child,Child, Preschool,Chromosome Mapping/*methods/statistics & numerical,Computer Simulation,Diabetes Mellitus, Insulin-Dependent/genetics,Female,Founder Effect,Genes, Dominant/genetics,Genetic Predisposition to Disease/genetics,Great Britain,HLA Antigens/genetics,Haplotypes/*genetics,Human,Infant,Linkage Disequilibrium/*genetics,Male,Microsatellite Repeats/genetics,Middle Age,Models, Genetic,Mutation/genetics,Phenotype,Polymorphism, Single Nucleotide/genetics,Statistics, Nonparametric,Support, Non-U.S. Gov't},\n pages = {133-45.},\n volume = {67},\n id = {f0c72e95-0269-3a20-8f92-79ced23957fb},\n created = {2017-06-19T13:43:38.237Z},\n file_attached = {false},\n profile_id = {de68dde1-2ff3-3a4e-a214-ef424d0c7646},\n group_id = {b2078731-0913-33b9-8902-a53629a24e83},\n last_modified = {2017-06-19T13:43:38.346Z},\n tags = {01/11/30},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n source_type = {Journal Article},\n notes = {<m:note>eng<m:linebreak/>Journal Article</m:note>},\n abstract = {We introduce a new method for linkage disequilibrium mapping: haplotype pattern mining (HPM). The method, inspired by data mining methods, is based on discovery of recurrent patterns. We define a class of useful haplotype patterns in genetic case-control data and use the algorithm for finding disease-associated haplotypes. The haplotypes are ordered by their strength of association with the phenotype, and all haplotypes exceeding a given threshold level are used for prediction of disease susceptibility-gene location. The method is model-free, in the sense that it does not require (and is unable to utilize) any assumptions about the inheritance model of the disease. The statistical model is nonparametric. The haplotypes are allowed to contain gaps, which improves the method's robustness to mutations and to missing and erroneous data. Experimental studies with simulated microsatellite and SNP data show that the method has good localization power in data sets with large degrees of phenocopies and with lots of missing and erroneous data. The power of HPM is roughly identical for marker maps at a density of 3 single-nucleotide polymorphisms/cM or 1 microsatellite/cM. The capacity to handle high proportions of phenocopies makes the method promising for complex disease mapping. An example of correct disease susceptibility-gene localization with HPM is given with real marker data from families from the United Kingdom affected by type 1 diabetes. The method is extendable to include environmental covariates or phenotype measurements or to find several genes simultaneously.},\n bibtype = {article},\n author = {Toivonen, H T and Onkamo, P and Vasko, K and Ollikainen, V and Sevon, P and Mannila, H and Herr, M and Kere, J},\n journal = {Am J Hum Genet},\n number = {1}\n}","author_short":["Toivonen, H., T.","Onkamo, P.","Vasko, K.","Ollikainen, V.","Sevon, P.","Mannila, H.","Herr, M.","Kere, J."],"bibbaseid":"toivonen-onkamo-vasko-ollikainen-sevon-mannila-herr-kere-dataminingappliedtolinkagedisequilibriummapping-2000","role":"author","urls":{},"keyword":["Adolescence","Adult","Aged","Aged","80 and over","Algorithms","Alleles","Child","Child","Preschool","Chromosome Mapping/*methods/statistics & numerical","Computer Simulation","Diabetes Mellitus","Insulin-Dependent/genetics","Female","Founder Effect","Genes","Dominant/genetics","Genetic Predisposition to Disease/genetics","Great Britain","HLA Antigens/genetics","Haplotypes/*genetics","Human","Infant","Linkage Disequilibrium/*genetics","Male","Microsatellite Repeats/genetics","Middle Age","Models","Genetic","Mutation/genetics","Phenotype","Polymorphism","Single Nucleotide/genetics","Statistics","Nonparametric","Support","Non-U.S. Gov't"],"downloads":0},"search_terms":["data","mining","applied","linkage","disequilibrium","mapping","toivonen","onkamo","vasko","ollikainen","sevon","mannila","herr","kere"],"keywords":["adolescence","adult","aged","aged","80 and over","algorithms","alleles","child","child","preschool","chromosome mapping/*methods/statistics & numerical","computer simulation","diabetes mellitus","insulin-dependent/genetics","female","founder effect","genes","dominant/genetics","genetic predisposition to disease/genetics","great britain","hla antigens/genetics","haplotypes/*genetics","human","infant","linkage disequilibrium/*genetics","male","microsatellite repeats/genetics","middle age","models","genetic","mutation/genetics","phenotype","polymorphism","single nucleotide/genetics","statistics","nonparametric","support","non-u.s. gov't"],"authorIDs":[]}