Trinity RNA-Seq assembler performance optimization. Henschel, R., Nista, P., M., Lieber, M., Haas, B., J., Wu, L., Leduc, R., D., & Henschel, R., Lieber, M., Wu, L-S, Nista, P. M., Haas, B.J., and LeDuc, R. In ACM International Conference Proceeding Series, 2012. Website doi abstract bibtex RNA-sequencing is a technique to study RNA expression in biological material. It is quickly gaining popularity in the field of transcriptomics. Trinity is a software tool that was developed for efficient de novo reconstruction of transcriptomes from RNA-Seq data. In this paper we first conduct a performance study of Trinity and compare it to previously published data from 2011. The version from 2011 is much slower than many other de novo assemblers and biologists have thus been forced to choose between quality and speed. We examine the runtime behavior of Trinity as a whole as well as its individual components and then optimize the most performance critical parts. We find that standard best practices for HPC applications can also be applied to Trinity, especially on systems with large amounts of memory. When combining best practices for HPC applications along with our specific performance optimization, we can decrease the runtime of Trinity by a factor of 3.9. This brings the runtime of Trinity in line with other de novo assemblers while maintaining superior quality. The purpose of this paper is to describe a series of improvements to Trinity, quantify the execution improvements achieved, and document the new version of the software. © 2012 ACM.
@inproceedings{
title = {Trinity RNA-Seq assembler performance optimization},
type = {inproceedings},
year = {2012},
keywords = {Application performance,Biological materials,Critical parts,DNA seque,Management,Optimization,RNA},
websites = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84865314650&doi=10.1145%2F2335755.2335842&partnerID=40&md5=30fc3625a3985dfa9953003f6cd44c42},
city = {Chicago, IL},
id = {4749ed2c-0c56-3a0d-ac73-b9bd336e601b},
created = {2019-10-01T18:06:10.748Z},
file_attached = {false},
profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
last_modified = {2019-10-01T18:06:27.278Z},
read = {false},
starred = {false},
authored = {true},
confirmed = {true},
hidden = {false},
citation_key = {Henschel2012},
source_type = {conference},
notes = {<b>From Duplicate 2 (<i>Trinity RNA-Seq assembler performance optimization</i> - Henschel, R; Nista, P M; Lieber, M; Haas, B J; Wu, L.-S.; Leduc, R D)<br/></b><br/>cited By 0; Conference of 1st Conference of the Extreme Science and Engineering Discovery Environment: Bridging from the eXtreme to the Campus and Beyond, XSEDE12 ; Conference Date: 16 July 2012 Through 19 July 2012; Conference Code:92061},
folder_uuids = {22c3b665-9e84-4884-8172-710aa9082eaf,b4e18ac7-7050-4c86-a2a4-a9c35bdc74d4},
private_publication = {false},
abstract = {RNA-sequencing is a technique to study RNA expression in biological material. It is quickly gaining popularity in the field of transcriptomics. Trinity is a software tool that was developed for efficient de novo reconstruction of transcriptomes from RNA-Seq data. In this paper we first conduct a performance study of Trinity and compare it to previously published data from 2011. The version from 2011 is much slower than many other de novo assemblers and biologists have thus been forced to choose between quality and speed. We examine the runtime behavior of Trinity as a whole as well as its individual components and then optimize the most performance critical parts. We find that standard best practices for HPC applications can also be applied to Trinity, especially on systems with large amounts of memory. When combining best practices for HPC applications along with our specific performance optimization, we can decrease the runtime of Trinity by a factor of 3.9. This brings the runtime of Trinity in line with other de novo assemblers while maintaining superior quality. The purpose of this paper is to describe a series of improvements to Trinity, quantify the execution improvements achieved, and document the new version of the software. © 2012 ACM.},
bibtype = {inproceedings},
author = {Henschel, R and Nista, P M and Lieber, M and Haas, B J and Wu, L.-S. and Leduc, R D and Henschel, R., Lieber, M., Wu, L-S, Nista, P. M., Haas, B.J., and LeDuc, R.D},
doi = {10.1145/2335755.2335842},
booktitle = {ACM International Conference Proceeding Series}
}
Downloads: 0
{"_id":"aRnoN8oDpkq29vJbW","bibbaseid":"henschel-nista-lieber-haas-wu-leduc-henschel-trinityrnaseqassemblerperformanceoptimization-2012","downloads":0,"creationDate":"2018-03-12T19:10:27.398Z","title":"Trinity RNA-Seq assembler performance optimization","author_short":["Henschel, R.","Nista, P., M.","Lieber, M.","Haas, B., J.","Wu, L.","Leduc, R., D.","Henschel, R., Lieber, M., Wu, L-S, Nista, P. M., Haas, B.J., and LeDuc, R."],"year":2012,"bibtype":"inproceedings","biburl":"https://bibbase.org/service/mendeley/42d295c0-0737-38d6-8b43-508cab6ea85d","bibdata":{"title":"Trinity RNA-Seq assembler performance optimization","type":"inproceedings","year":"2012","keywords":"Application performance,Biological materials,Critical parts,DNA seque,Management,Optimization,RNA","websites":"https://www.scopus.com/inward/record.uri?eid=2-s2.0-84865314650&doi=10.1145%2F2335755.2335842&partnerID=40&md5=30fc3625a3985dfa9953003f6cd44c42","city":"Chicago, IL","id":"4749ed2c-0c56-3a0d-ac73-b9bd336e601b","created":"2019-10-01T18:06:10.748Z","file_attached":false,"profile_id":"42d295c0-0737-38d6-8b43-508cab6ea85d","last_modified":"2019-10-01T18:06:27.278Z","read":false,"starred":false,"authored":"true","confirmed":"true","hidden":false,"citation_key":"Henschel2012","source_type":"conference","notes":"<b>From Duplicate 2 (<i>Trinity RNA-Seq assembler performance optimization</i> - Henschel, R; Nista, P M; Lieber, M; Haas, B J; Wu, L.-S.; Leduc, R D)<br/></b><br/>cited By 0; Conference of 1st Conference of the Extreme Science and Engineering Discovery Environment: Bridging from the eXtreme to the Campus and Beyond, XSEDE12 ; Conference Date: 16 July 2012 Through 19 July 2012; Conference Code:92061","folder_uuids":"22c3b665-9e84-4884-8172-710aa9082eaf,b4e18ac7-7050-4c86-a2a4-a9c35bdc74d4","private_publication":false,"abstract":"RNA-sequencing is a technique to study RNA expression in biological material. It is quickly gaining popularity in the field of transcriptomics. Trinity is a software tool that was developed for efficient de novo reconstruction of transcriptomes from RNA-Seq data. In this paper we first conduct a performance study of Trinity and compare it to previously published data from 2011. The version from 2011 is much slower than many other de novo assemblers and biologists have thus been forced to choose between quality and speed. We examine the runtime behavior of Trinity as a whole as well as its individual components and then optimize the most performance critical parts. We find that standard best practices for HPC applications can also be applied to Trinity, especially on systems with large amounts of memory. When combining best practices for HPC applications along with our specific performance optimization, we can decrease the runtime of Trinity by a factor of 3.9. This brings the runtime of Trinity in line with other de novo assemblers while maintaining superior quality. The purpose of this paper is to describe a series of improvements to Trinity, quantify the execution improvements achieved, and document the new version of the software. © 2012 ACM.","bibtype":"inproceedings","author":"Henschel, R and Nista, P M and Lieber, M and Haas, B J and Wu, L.-S. and Leduc, R D and Henschel, R., Lieber, M., Wu, L-S, Nista, P. M., Haas, B.J., and LeDuc, R.D","doi":"10.1145/2335755.2335842","booktitle":"ACM International Conference Proceeding Series","bibtex":"@inproceedings{\n title = {Trinity RNA-Seq assembler performance optimization},\n type = {inproceedings},\n year = {2012},\n keywords = {Application performance,Biological materials,Critical parts,DNA seque,Management,Optimization,RNA},\n websites = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84865314650&doi=10.1145%2F2335755.2335842&partnerID=40&md5=30fc3625a3985dfa9953003f6cd44c42},\n city = {Chicago, IL},\n id = {4749ed2c-0c56-3a0d-ac73-b9bd336e601b},\n created = {2019-10-01T18:06:10.748Z},\n file_attached = {false},\n profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},\n last_modified = {2019-10-01T18:06:27.278Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Henschel2012},\n source_type = {conference},\n notes = {<b>From Duplicate 2 (<i>Trinity RNA-Seq assembler performance optimization</i> - Henschel, R; Nista, P M; Lieber, M; Haas, B J; Wu, L.-S.; Leduc, R D)<br/></b><br/>cited By 0; Conference of 1st Conference of the Extreme Science and Engineering Discovery Environment: Bridging from the eXtreme to the Campus and Beyond, XSEDE12 ; Conference Date: 16 July 2012 Through 19 July 2012; Conference Code:92061},\n folder_uuids = {22c3b665-9e84-4884-8172-710aa9082eaf,b4e18ac7-7050-4c86-a2a4-a9c35bdc74d4},\n private_publication = {false},\n abstract = {RNA-sequencing is a technique to study RNA expression in biological material. It is quickly gaining popularity in the field of transcriptomics. Trinity is a software tool that was developed for efficient de novo reconstruction of transcriptomes from RNA-Seq data. In this paper we first conduct a performance study of Trinity and compare it to previously published data from 2011. The version from 2011 is much slower than many other de novo assemblers and biologists have thus been forced to choose between quality and speed. We examine the runtime behavior of Trinity as a whole as well as its individual components and then optimize the most performance critical parts. We find that standard best practices for HPC applications can also be applied to Trinity, especially on systems with large amounts of memory. When combining best practices for HPC applications along with our specific performance optimization, we can decrease the runtime of Trinity by a factor of 3.9. This brings the runtime of Trinity in line with other de novo assemblers while maintaining superior quality. The purpose of this paper is to describe a series of improvements to Trinity, quantify the execution improvements achieved, and document the new version of the software. © 2012 ACM.},\n bibtype = {inproceedings},\n author = {Henschel, R and Nista, P M and Lieber, M and Haas, B J and Wu, L.-S. and Leduc, R D and Henschel, R., Lieber, M., Wu, L-S, Nista, P. M., Haas, B.J., and LeDuc, R.D},\n doi = {10.1145/2335755.2335842},\n booktitle = {ACM International Conference Proceeding Series}\n}","author_short":["Henschel, R.","Nista, P., M.","Lieber, M.","Haas, B., J.","Wu, L.","Leduc, R., D.","Henschel, R., Lieber, M., Wu, L-S, Nista, P. M., Haas, B.J., and LeDuc, R."],"urls":{"Website":"https://www.scopus.com/inward/record.uri?eid=2-s2.0-84865314650&doi=10.1145%2F2335755.2335842&partnerID=40&md5=30fc3625a3985dfa9953003f6cd44c42"},"biburl":"https://bibbase.org/service/mendeley/42d295c0-0737-38d6-8b43-508cab6ea85d","bibbaseid":"henschel-nista-lieber-haas-wu-leduc-henschel-trinityrnaseqassemblerperformanceoptimization-2012","role":"author","keyword":["Application performance","Biological materials","Critical parts","DNA seque","Management","Optimization","RNA"],"metadata":{"authorlinks":{}},"downloads":0},"search_terms":["trinity","rna","seq","assembler","performance","optimization","henschel","nista","lieber","haas","wu","leduc","henschel"],"keywords":["application performance","biological materials","critical parts","dna seque","management","optimization","rna"],"authorIDs":["5aa81b632f2aff8f1100005b"],"dataSources":["zgahneP4uAjKbudrQ","ya2CyA73rpZseyrZ8","2252seNhipfTmjEBQ"]}