RAPSearch: a fast protein similarity search tool for short reads. Ye, Y., Choi, J., & Tang, H.
Paper
Website abstract bibtex Background: Next Generation Sequencing (NGS) is producing enormous corpuses of short DNA reads, affecting emerging fields like metagenomics. Protein similarity search–a key step to achieve annotation of protein-coding genes in these short reads, and identification of their biological functions–faces daunting challenges because of the very sizes of the short read datasets. Results: We developed a fast protein similarity search tool RAPSearch that utilizes a reduced amino acid alphabet and suffix array to detect seeds of flexible length. For short reads (translated in 6 frames) we tested, RAPSearch achieved ~20-90 times speedup as compared to BLASTX. RAPSearch missed only a small fraction (~1.3-3.2%) of BLASTX similarity hits, but it also discovered additional homologous proteins (~0.3-2.1%) that BLASTX missed. By contrast, BLAT, a tool that is even slightly faster than RAPSearch, had significant loss of sensitivity as compared to RAPSearch and BLAST.
@article{
title = {RAPSearch: a fast protein similarity search tool for short reads},
type = {article},
identifiers = {[object Object]},
keywords = {metagenomics,reduced amino acid alphabet,short reads,similarity search,suffix array},
websites = {https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/1471-2105-12-159?site=bmcbioinformatics.biomedcentral.com},
id = {054dc7cd-c948-36f3-897d-78c1b3b99d67},
created = {2017-09-05T15:47:54.661Z},
accessed = {2017-09-05},
file_attached = {true},
profile_id = {5db6d3e7-562f-3ec2-a249-16ecf1e747e4},
group_id = {49665d18-5720-3154-b3f7-40652b55b7b9},
last_modified = {2017-09-07T01:37:58.910Z},
read = {false},
starred = {false},
authored = {false},
confirmed = {false},
hidden = {false},
citation_key = {Ye},
private_publication = {false},
abstract = {Background: Next Generation Sequencing (NGS) is producing enormous corpuses of short DNA reads, affecting emerging fields like metagenomics. Protein similarity search–a key step to achieve annotation of protein-coding genes in these short reads, and identification of their biological functions–faces daunting challenges because of the very sizes of the short read datasets. Results: We developed a fast protein similarity search tool RAPSearch that utilizes a reduced amino acid alphabet and suffix array to detect seeds of flexible length. For short reads (translated in 6 frames) we tested, RAPSearch achieved ~20-90 times speedup as compared to BLASTX. RAPSearch missed only a small fraction (~1.3-3.2%) of BLASTX similarity hits, but it also discovered additional homologous proteins (~0.3-2.1%) that BLASTX missed. By contrast, BLAT, a tool that is even slightly faster than RAPSearch, had significant loss of sensitivity as compared to RAPSearch and BLAST.},
bibtype = {article},
author = {Ye, Yuzhen and Choi, Jeong-Hyeon and Tang, Haixu}
}
Downloads: 0
{"_id":"KEjheGvfxG6SnSB6F","bibbaseid":"ye-choi-tang-rapsearchafastproteinsimilaritysearchtoolforshortreads","authorIDs":[],"author_short":["Ye, Y.","Choi, J.","Tang, H."],"bibdata":{"title":"RAPSearch: a fast protein similarity search tool for short reads","type":"article","identifiers":"[object Object]","keywords":"metagenomics,reduced amino acid alphabet,short reads,similarity search,suffix array","websites":"https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/1471-2105-12-159?site=bmcbioinformatics.biomedcentral.com","id":"054dc7cd-c948-36f3-897d-78c1b3b99d67","created":"2017-09-05T15:47:54.661Z","accessed":"2017-09-05","file_attached":"true","profile_id":"5db6d3e7-562f-3ec2-a249-16ecf1e747e4","group_id":"49665d18-5720-3154-b3f7-40652b55b7b9","last_modified":"2017-09-07T01:37:58.910Z","read":false,"starred":false,"authored":false,"confirmed":false,"hidden":false,"citation_key":"Ye","private_publication":false,"abstract":"Background: Next Generation Sequencing (NGS) is producing enormous corpuses of short DNA reads, affecting emerging fields like metagenomics. Protein similarity search–a key step to achieve annotation of protein-coding genes in these short reads, and identification of their biological functions–faces daunting challenges because of the very sizes of the short read datasets. Results: We developed a fast protein similarity search tool RAPSearch that utilizes a reduced amino acid alphabet and suffix array to detect seeds of flexible length. For short reads (translated in 6 frames) we tested, RAPSearch achieved ~20-90 times speedup as compared to BLASTX. RAPSearch missed only a small fraction (~1.3-3.2%) of BLASTX similarity hits, but it also discovered additional homologous proteins (~0.3-2.1%) that BLASTX missed. By contrast, BLAT, a tool that is even slightly faster than RAPSearch, had significant loss of sensitivity as compared to RAPSearch and BLAST.","bibtype":"article","author":"Ye, Yuzhen and Choi, Jeong-Hyeon and Tang, Haixu","bibtex":"@article{\n title = {RAPSearch: a fast protein similarity search tool for short reads},\n type = {article},\n identifiers = {[object Object]},\n keywords = {metagenomics,reduced amino acid alphabet,short reads,similarity search,suffix array},\n websites = {https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/1471-2105-12-159?site=bmcbioinformatics.biomedcentral.com},\n id = {054dc7cd-c948-36f3-897d-78c1b3b99d67},\n created = {2017-09-05T15:47:54.661Z},\n accessed = {2017-09-05},\n file_attached = {true},\n profile_id = {5db6d3e7-562f-3ec2-a249-16ecf1e747e4},\n group_id = {49665d18-5720-3154-b3f7-40652b55b7b9},\n last_modified = {2017-09-07T01:37:58.910Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {false},\n hidden = {false},\n citation_key = {Ye},\n private_publication = {false},\n abstract = {Background: Next Generation Sequencing (NGS) is producing enormous corpuses of short DNA reads, affecting emerging fields like metagenomics. Protein similarity search–a key step to achieve annotation of protein-coding genes in these short reads, and identification of their biological functions–faces daunting challenges because of the very sizes of the short read datasets. Results: We developed a fast protein similarity search tool RAPSearch that utilizes a reduced amino acid alphabet and suffix array to detect seeds of flexible length. For short reads (translated in 6 frames) we tested, RAPSearch achieved ~20-90 times speedup as compared to BLASTX. RAPSearch missed only a small fraction (~1.3-3.2%) of BLASTX similarity hits, but it also discovered additional homologous proteins (~0.3-2.1%) that BLASTX missed. By contrast, BLAT, a tool that is even slightly faster than RAPSearch, had significant loss of sensitivity as compared to RAPSearch and BLAST.},\n bibtype = {article},\n author = {Ye, Yuzhen and Choi, Jeong-Hyeon and Tang, Haixu}\n}","author_short":["Ye, Y.","Choi, J.","Tang, H."],"urls":{"Paper":"https://bibbase.org/service/mendeley/4b66b327-35ad-3956-a9a2-307331dd9988/file/49d43adf-978d-7501-58bc-7b48ae38f042/full_text.pdf.pdf","Website":"https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/1471-2105-12-159?site=bmcbioinformatics.biomedcentral.com"},"bibbaseid":"ye-choi-tang-rapsearchafastproteinsimilaritysearchtoolforshortreads","role":"author","keyword":["metagenomics","reduced amino acid alphabet","short reads","similarity search","suffix array"],"downloads":0},"bibtype":"article","creationDate":"2020-04-29T21:11:31.521Z","downloads":0,"keywords":["metagenomics","reduced amino acid alphabet","short reads","similarity search","suffix array"],"search_terms":["rapsearch","fast","protein","similarity","search","tool","short","reads","ye","choi","tang"],"title":"RAPSearch: a fast protein similarity search tool for short reads","year":null}