Comparison of multi-sample variant calling methods for whole genome sequencing. Nho, K., West, J., D., Li, H., Henschel, R., Tavares, M., C., Bharthur, A., Weiner, M., W., Green, R., C., Toga, A., W., & Saykin, A., J. International Conference on Systems Biology, ISB, IEEE Computer Society, 2014.
Comparison of multi-sample variant calling methods for whole genome sequencing [link]Website  doi  abstract   bibtex   
Rapid advancement of next-generation sequencing (NGS) technologies has facilitated the search for genetic susceptibility factors that influence disease risk in the field of human genetics. In particular whole genome sequencing (WGS) has been used to obtain the most comprehensive genetic variation of an individual and perform detailed evaluation of all genetic variation. To this end, sophisticated methods to accurately call high-quality variants and genotypes simultaneously on a cohort of individuals from raw sequence data are required. On chromosome 22 of 818 WGS data from the Alzheimer's Disease Neuroimaging Initiative (ADNI), which is the largest WGS related to a single disease, we compared two multi-sample variant calling methods for the detection of single nucleotide variants (SNVs) and short insertions and deletions (indels) in WGS: (1) reduce the analysis-ready reads (BAM) file to a manageable size by keeping only essential information for variant calling ('REDUCE') and (2) call variants individually on each sample and then perform a joint genotyping analysis of the variant files produced for all samples in a cohort ('JOINT'). JOINT identified 515,210 SNVs and 60,042 indels, while REDUCE identified 358,303 SNVs and 52,855 indels. JOINT identified many more SNVs and indels compared to REDUCE. Both methods had concordance rate of 99.60% for SNVs and 99.06% for indels. For SNVs, evaluation with HumanOmni 2.5M genotyping arrays revealed a concordance rate of 99.68% for JOINT and 99.50% for REDUCE. REDUCE needed more computational time and memory compared to JOINT. Our findings indicate that the multi-sample variant calling method using the JOINT process is a promising strategy for the variant detection, which should facilitate our understanding of the underlying pathogenesis of human diseases. © 2014 IEEE.
@article{
 title = {Comparison of multi-sample variant calling methods for whole genome sequencing},
 type = {article},
 year = {2014},
 keywords = {ADNI; GATK; HaplotypeCaller; Multi-samples; Whole,Chromosomes; Neurodegenerative diseases; Neuroimag,Genes},
 pages = {59-62},
 websites = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84920124381&doi=10.1109%2FISB.2014.6990432&partnerID=40&md5=7f2e10dbcf90763fd58afbdecb65be6a},
 publisher = {IEEE Computer Society},
 id = {256801ef-1a33-39c9-a162-43d043912aa6},
 created = {2018-02-27T18:07:26.286Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 group_id = {27e0553c-8ec0-31bd-b42c-825b8a5a9ae8},
 last_modified = {2018-02-27T18:07:26.286Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Nho201459},
 source_type = {article},
 notes = {cited By 5; Conference of 8th International Conference on Systems Biology, ISB 2014 ; Conference Date: 24 August 2014 Through 27 August 2014; Conference Code:109794},
 private_publication = {false},
 abstract = {Rapid advancement of next-generation sequencing (NGS) technologies has facilitated the search for genetic susceptibility factors that influence disease risk in the field of human genetics. In particular whole genome sequencing (WGS) has been used to obtain the most comprehensive genetic variation of an individual and perform detailed evaluation of all genetic variation. To this end, sophisticated methods to accurately call high-quality variants and genotypes simultaneously on a cohort of individuals from raw sequence data are required. On chromosome 22 of 818 WGS data from the Alzheimer's Disease Neuroimaging Initiative (ADNI), which is the largest WGS related to a single disease, we compared two multi-sample variant calling methods for the detection of single nucleotide variants (SNVs) and short insertions and deletions (indels) in WGS: (1) reduce the analysis-ready reads (BAM) file to a manageable size by keeping only essential information for variant calling ('REDUCE') and (2) call variants individually on each sample and then perform a joint genotyping analysis of the variant files produced for all samples in a cohort ('JOINT'). JOINT identified 515,210 SNVs and 60,042 indels, while REDUCE identified 358,303 SNVs and 52,855 indels. JOINT identified many more SNVs and indels compared to REDUCE. Both methods had concordance rate of 99.60% for SNVs and 99.06% for indels. For SNVs, evaluation with HumanOmni 2.5M genotyping arrays revealed a concordance rate of 99.68% for JOINT and 99.50% for REDUCE. REDUCE needed more computational time and memory compared to JOINT. Our findings indicate that the multi-sample variant calling method using the JOINT process is a promising strategy for the variant detection, which should facilitate our understanding of the underlying pathogenesis of human diseases. © 2014 IEEE.},
 bibtype = {article},
 author = {Nho, K and West, J D and Li, H and Henschel, R and Tavares, M C and Bharthur, A and Weiner, M W and Green, R C and Toga, A W and Saykin, A J},
 editor = {Wu L.-Y. Wang Y., Chen L Zhang X.-S.},
 doi = {10.1109/ISB.2014.6990432},
 journal = {International Conference on Systems Biology, ISB}
}

Downloads: 0