Alleleauto: a pipeline for allele identification and analysis of allele-specific gene expression with haplotype-resolved diploid genome assemblies. Shi, T., Nie, S., Bao, Y., Li, Z., Chen, Z., Zhao, S., Yan, X., Ma, H., Tian, X., Jia, K., Guo, J., Zhang, J., & Mao, J. aBIOTECH, 7(3):100056, September, 2026.
Paper doi abstract bibtex Advanced sequencing now enables haplotype resolution of genomes from non-model diploid plant species, facilitating allele identification and the use of allele-specific expression (ASE) analysis to uncover the relationships between genes and phenotypes in heterozygous genomes. However, identification of true allelic pairs remains challenging due to the presence of paralogous genes from ancient genome duplications, and existing methods lack systematic, reproducible filtering criteria. In this study, we developed Alleleauto, a workflow integrating the parametric 3σ rule and the non-parametric Tukey's method as two complementary outlier detection methods to precisely identify alleles and perform ASE analysis from haplotype-resolved assemblies. Alleleauto first searches for homologous genes across homologous chromosomes, then applies statistical filtering criteria based on synonymous substitution rates (Ks) and synteny to systematically remove false alleles (paralogs). This dual-method framework offers flexible filtering strategies with adjustable parameters, enabling optimization for diverse genomes. We validated the workflow on tea plant (Camellia sinensis), ginger (Zingiber officinale), and lychee (Litchi chinensis), three plant species with distinct genomic features, demonstrating that statistical filtering significantly improves accuracy over the use of sequence similarity alone. Using the alleles identified by Alleleauto, we performed ASE analysis and calculated sequence divergence parameters to investigate ASE and heterosis mechanisms. Our open-source, and easy-to-use pipeline provides significant value for reproducible, scalable investigation of ASE and heterosis with haplotype-resolved genome assemblies.
@article{shi_alleleauto_2026,
title = {Alleleauto: a pipeline for allele identification and analysis of allele-specific gene expression with haplotype-resolved diploid genome assemblies},
volume = {7},
issn = {2662-1738},
shorttitle = {Alleleauto},
url = {https://www.sciencedirect.com/science/article/pii/S2662173826000706},
doi = {10.1016/j.abiote.2026.100056},
abstract = {Advanced sequencing now enables haplotype resolution of genomes from non-model diploid plant species, facilitating allele identification and the use of allele-specific expression (ASE) analysis to uncover the relationships between genes and phenotypes in heterozygous genomes. However, identification of true allelic pairs remains challenging due to the presence of paralogous genes from ancient genome duplications, and existing methods lack systematic, reproducible filtering criteria. In this study, we developed Alleleauto, a workflow integrating the parametric 3σ rule and the non-parametric Tukey's method as two complementary outlier detection methods to precisely identify alleles and perform ASE analysis from haplotype-resolved assemblies. Alleleauto first searches for homologous genes across homologous chromosomes, then applies statistical filtering criteria based on synonymous substitution rates (Ks) and synteny to systematically remove false alleles (paralogs). This dual-method framework offers flexible filtering strategies with adjustable parameters, enabling optimization for diverse genomes. We validated the workflow on tea plant (Camellia sinensis), ginger (Zingiber officinale), and lychee (Litchi chinensis), three plant species with distinct genomic features, demonstrating that statistical filtering significantly improves accuracy over the use of sequence similarity alone. Using the alleles identified by Alleleauto, we performed ASE analysis and calculated sequence divergence parameters to investigate ASE and heterosis mechanisms. Our open-source, and easy-to-use pipeline provides significant value for reproducible, scalable investigation of ASE and heterosis with haplotype-resolved genome assemblies.},
number = {3},
urldate = {2026-06-12},
journal = {aBIOTECH},
author = {Shi, Tian-Le and Nie, Shuai and Bao, Yu-Tao and Li, Zhi-Chao and Chen, Zhao-Yang and Zhao, Shi-Wei and Yan, Xue-Mei and Ma, Hai-Yao and Tian, Xue-Chan and Jia, Kai-Hua and Guo, Jing-Fang and Zhang, Jun-Ke and Mao, Jian-Feng},
month = sep,
year = {2026},
keywords = {Allele identification, Allele-specific expression, Haplotype-resolved, Non-model diploid plant genome, Sequence similarity},
pages = {100056},
}
Downloads: 0
{"_id":"KE8iNvbP7Nyk4hDSf","bibbaseid":"shi-nie-bao-li-chen-zhao-yan-ma-etal-alleleautoapipelineforalleleidentificationandanalysisofallelespecificgeneexpressionwithhaplotyperesolveddiploidgenomeassemblies-2026","author_short":["Shi, T.","Nie, S.","Bao, Y.","Li, Z.","Chen, Z.","Zhao, S.","Yan, X.","Ma, H.","Tian, X.","Jia, K.","Guo, J.","Zhang, J.","Mao, J."],"bibdata":{"bibtype":"article","type":"article","title":"Alleleauto: a pipeline for allele identification and analysis of allele-specific gene expression with haplotype-resolved diploid genome assemblies","volume":"7","issn":"2662-1738","shorttitle":"Alleleauto","url":"https://www.sciencedirect.com/science/article/pii/S2662173826000706","doi":"10.1016/j.abiote.2026.100056","abstract":"Advanced sequencing now enables haplotype resolution of genomes from non-model diploid plant species, facilitating allele identification and the use of allele-specific expression (ASE) analysis to uncover the relationships between genes and phenotypes in heterozygous genomes. However, identification of true allelic pairs remains challenging due to the presence of paralogous genes from ancient genome duplications, and existing methods lack systematic, reproducible filtering criteria. In this study, we developed Alleleauto, a workflow integrating the parametric 3σ rule and the non-parametric Tukey's method as two complementary outlier detection methods to precisely identify alleles and perform ASE analysis from haplotype-resolved assemblies. Alleleauto first searches for homologous genes across homologous chromosomes, then applies statistical filtering criteria based on synonymous substitution rates (Ks) and synteny to systematically remove false alleles (paralogs). This dual-method framework offers flexible filtering strategies with adjustable parameters, enabling optimization for diverse genomes. We validated the workflow on tea plant (Camellia sinensis), ginger (Zingiber officinale), and lychee (Litchi chinensis), three plant species with distinct genomic features, demonstrating that statistical filtering significantly improves accuracy over the use of sequence similarity alone. Using the alleles identified by Alleleauto, we performed ASE analysis and calculated sequence divergence parameters to investigate ASE and heterosis mechanisms. Our open-source, and easy-to-use pipeline provides significant value for reproducible, scalable investigation of ASE and heterosis with haplotype-resolved genome assemblies.","number":"3","urldate":"2026-06-12","journal":"aBIOTECH","author":[{"propositions":[],"lastnames":["Shi"],"firstnames":["Tian-Le"],"suffixes":[]},{"propositions":[],"lastnames":["Nie"],"firstnames":["Shuai"],"suffixes":[]},{"propositions":[],"lastnames":["Bao"],"firstnames":["Yu-Tao"],"suffixes":[]},{"propositions":[],"lastnames":["Li"],"firstnames":["Zhi-Chao"],"suffixes":[]},{"propositions":[],"lastnames":["Chen"],"firstnames":["Zhao-Yang"],"suffixes":[]},{"propositions":[],"lastnames":["Zhao"],"firstnames":["Shi-Wei"],"suffixes":[]},{"propositions":[],"lastnames":["Yan"],"firstnames":["Xue-Mei"],"suffixes":[]},{"propositions":[],"lastnames":["Ma"],"firstnames":["Hai-Yao"],"suffixes":[]},{"propositions":[],"lastnames":["Tian"],"firstnames":["Xue-Chan"],"suffixes":[]},{"propositions":[],"lastnames":["Jia"],"firstnames":["Kai-Hua"],"suffixes":[]},{"propositions":[],"lastnames":["Guo"],"firstnames":["Jing-Fang"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Jun-Ke"],"suffixes":[]},{"propositions":[],"lastnames":["Mao"],"firstnames":["Jian-Feng"],"suffixes":[]}],"month":"September","year":"2026","keywords":"Allele identification, Allele-specific expression, Haplotype-resolved, Non-model diploid plant genome, Sequence similarity","pages":"100056","bibtex":"@article{shi_alleleauto_2026,\n\ttitle = {Alleleauto: a pipeline for allele identification and analysis of allele-specific gene expression with haplotype-resolved diploid genome assemblies},\n\tvolume = {7},\n\tissn = {2662-1738},\n\tshorttitle = {Alleleauto},\n\turl = {https://www.sciencedirect.com/science/article/pii/S2662173826000706},\n\tdoi = {10.1016/j.abiote.2026.100056},\n\tabstract = {Advanced sequencing now enables haplotype resolution of genomes from non-model diploid plant species, facilitating allele identification and the use of allele-specific expression (ASE) analysis to uncover the relationships between genes and phenotypes in heterozygous genomes. However, identification of true allelic pairs remains challenging due to the presence of paralogous genes from ancient genome duplications, and existing methods lack systematic, reproducible filtering criteria. In this study, we developed Alleleauto, a workflow integrating the parametric 3σ rule and the non-parametric Tukey's method as two complementary outlier detection methods to precisely identify alleles and perform ASE analysis from haplotype-resolved assemblies. Alleleauto first searches for homologous genes across homologous chromosomes, then applies statistical filtering criteria based on synonymous substitution rates (Ks) and synteny to systematically remove false alleles (paralogs). This dual-method framework offers flexible filtering strategies with adjustable parameters, enabling optimization for diverse genomes. We validated the workflow on tea plant (Camellia sinensis), ginger (Zingiber officinale), and lychee (Litchi chinensis), three plant species with distinct genomic features, demonstrating that statistical filtering significantly improves accuracy over the use of sequence similarity alone. Using the alleles identified by Alleleauto, we performed ASE analysis and calculated sequence divergence parameters to investigate ASE and heterosis mechanisms. Our open-source, and easy-to-use pipeline provides significant value for reproducible, scalable investigation of ASE and heterosis with haplotype-resolved genome assemblies.},\n\tnumber = {3},\n\turldate = {2026-06-12},\n\tjournal = {aBIOTECH},\n\tauthor = {Shi, Tian-Le and Nie, Shuai and Bao, Yu-Tao and Li, Zhi-Chao and Chen, Zhao-Yang and Zhao, Shi-Wei and Yan, Xue-Mei and Ma, Hai-Yao and Tian, Xue-Chan and Jia, Kai-Hua and Guo, Jing-Fang and Zhang, Jun-Ke and Mao, Jian-Feng},\n\tmonth = sep,\n\tyear = {2026},\n\tkeywords = {Allele identification, Allele-specific expression, Haplotype-resolved, Non-model diploid plant genome, Sequence similarity},\n\tpages = {100056},\n}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n","author_short":["Shi, T.","Nie, S.","Bao, Y.","Li, Z.","Chen, Z.","Zhao, S.","Yan, X.","Ma, H.","Tian, X.","Jia, K.","Guo, J.","Zhang, J.","Mao, J."],"key":"shi_alleleauto_2026","id":"shi_alleleauto_2026","bibbaseid":"shi-nie-bao-li-chen-zhao-yan-ma-etal-alleleautoapipelineforalleleidentificationandanalysisofallelespecificgeneexpressionwithhaplotyperesolveddiploidgenomeassemblies-2026","role":"author","urls":{"Paper":"https://www.sciencedirect.com/science/article/pii/S2662173826000706"},"keyword":["Allele identification","Allele-specific expression","Haplotype-resolved","Non-model diploid plant genome","Sequence similarity"],"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://bibbase.org/zotero/upscpub","dataSources":["9cGcv2t8pRzC92kzs"],"keywords":["allele identification","allele-specific expression","haplotype-resolved","non-model diploid plant genome","sequence similarity"],"search_terms":["alleleauto","pipeline","allele","identification","analysis","allele","specific","gene","expression","haplotype","resolved","diploid","genome","assemblies","shi","nie","bao","li","chen","zhao","yan","ma","tian","jia","guo","zhang","mao"],"title":"Alleleauto: a pipeline for allele identification and analysis of allele-specific gene expression with haplotype-resolved diploid genome assemblies","year":2026}