Microbiome meta-analysis and cross-disease comparison enabled by the SIAMCAT machine learning toolbox. Wirbel, J., Zych, K., Essex, M., Karcher, N., Kartal, E., Salazar, G., Bork, P., Sunagawa, S., & Zeller, G. Genome Biology, BioMed Central Ltd, 12, 2021.
Paper doi abstract bibtex The human microbiome is increasingly mined for diagnostic and therapeutic biomarkers using machine learning (ML). However, metagenomics-specific software is scarce, and overoptimistic evaluation and limited cross-study generalization are prevailing issues. To address these, we developed SIAMCAT, a versatile R toolbox for ML-based comparative metagenomics. We demonstrate its capabilities in a meta-analysis of fecal metagenomic studies (10,803 samples). When naively transferred across studies, ML models lost accuracy and disease specificity, which could however be resolved by a novel training set augmentation strategy. This reveals some biomarkers to be disease-specific, with others shared across multiple conditions. SIAMCAT is freely available from siamcat.embl.de.
@article{
title = {Microbiome meta-analysis and cross-disease comparison enabled by the SIAMCAT machine learning toolbox},
type = {article},
year = {2021},
keywords = {Machine learning,Meta-analysis,Microbiome data analysis,Microbiome-wide association studies (MWAS),Statistical modeling},
volume = {22},
month = {12},
publisher = {BioMed Central Ltd},
day = {1},
id = {455ad83a-64a7-345b-8ad4-aaa95b4bbcda},
created = {2025-10-30T12:05:03.035Z},
accessed = {2025-10-30},
file_attached = {true},
profile_id = {9c1a206b-6cfb-375e-a257-a4c31f5a0791},
group_id = {89bece75-0a7e-3035-98e1-71b82260b8e8},
last_modified = {2025-10-30T12:05:12.321Z},
read = {false},
starred = {false},
authored = {false},
confirmed = {false},
hidden = {false},
private_publication = {false},
abstract = {The human microbiome is increasingly mined for diagnostic and therapeutic biomarkers using machine learning (ML). However, metagenomics-specific software is scarce, and overoptimistic evaluation and limited cross-study generalization are prevailing issues. To address these, we developed SIAMCAT, a versatile R toolbox for ML-based comparative metagenomics. We demonstrate its capabilities in a meta-analysis of fecal metagenomic studies (10,803 samples). When naively transferred across studies, ML models lost accuracy and disease specificity, which could however be resolved by a novel training set augmentation strategy. This reveals some biomarkers to be disease-specific, with others shared across multiple conditions. SIAMCAT is freely available from siamcat.embl.de.},
bibtype = {article},
author = {Wirbel, Jakob and Zych, Konrad and Essex, Morgan and Karcher, Nicolai and Kartal, Ece and Salazar, Guillem and Bork, Peer and Sunagawa, Shinichi and Zeller, Georg},
doi = {10.1186/S13059-021-02306-1},
journal = {Genome Biology},
number = {1}
}
Downloads: 0
{"_id":"DRp8xTvTTpsDcKMfg","bibbaseid":"wirbel-zych-essex-karcher-kartal-salazar-bork-sunagawa-etal-microbiomemetaanalysisandcrossdiseasecomparisonenabledbythesiamcatmachinelearningtoolbox-2021","author_short":["Wirbel, J.","Zych, K.","Essex, M.","Karcher, N.","Kartal, E.","Salazar, G.","Bork, P.","Sunagawa, S.","Zeller, G."],"bibdata":{"title":"Microbiome meta-analysis and cross-disease comparison enabled by the SIAMCAT machine learning toolbox","type":"article","year":"2021","keywords":"Machine learning,Meta-analysis,Microbiome data analysis,Microbiome-wide association studies (MWAS),Statistical modeling","volume":"22","month":"12","publisher":"BioMed Central Ltd","day":"1","id":"455ad83a-64a7-345b-8ad4-aaa95b4bbcda","created":"2025-10-30T12:05:03.035Z","accessed":"2025-10-30","file_attached":"true","profile_id":"9c1a206b-6cfb-375e-a257-a4c31f5a0791","group_id":"89bece75-0a7e-3035-98e1-71b82260b8e8","last_modified":"2025-10-30T12:05:12.321Z","read":false,"starred":false,"authored":false,"confirmed":false,"hidden":false,"private_publication":false,"abstract":"The human microbiome is increasingly mined for diagnostic and therapeutic biomarkers using machine learning (ML). However, metagenomics-specific software is scarce, and overoptimistic evaluation and limited cross-study generalization are prevailing issues. To address these, we developed SIAMCAT, a versatile R toolbox for ML-based comparative metagenomics. We demonstrate its capabilities in a meta-analysis of fecal metagenomic studies (10,803 samples). When naively transferred across studies, ML models lost accuracy and disease specificity, which could however be resolved by a novel training set augmentation strategy. This reveals some biomarkers to be disease-specific, with others shared across multiple conditions. SIAMCAT is freely available from siamcat.embl.de.","bibtype":"article","author":"Wirbel, Jakob and Zych, Konrad and Essex, Morgan and Karcher, Nicolai and Kartal, Ece and Salazar, Guillem and Bork, Peer and Sunagawa, Shinichi and Zeller, Georg","doi":"10.1186/S13059-021-02306-1","journal":"Genome Biology","number":"1","bibtex":"@article{\n title = {Microbiome meta-analysis and cross-disease comparison enabled by the SIAMCAT machine learning toolbox},\n type = {article},\n year = {2021},\n keywords = {Machine learning,Meta-analysis,Microbiome data analysis,Microbiome-wide association studies (MWAS),Statistical modeling},\n volume = {22},\n month = {12},\n publisher = {BioMed Central Ltd},\n day = {1},\n id = {455ad83a-64a7-345b-8ad4-aaa95b4bbcda},\n created = {2025-10-30T12:05:03.035Z},\n accessed = {2025-10-30},\n file_attached = {true},\n profile_id = {9c1a206b-6cfb-375e-a257-a4c31f5a0791},\n group_id = {89bece75-0a7e-3035-98e1-71b82260b8e8},\n last_modified = {2025-10-30T12:05:12.321Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {false},\n hidden = {false},\n private_publication = {false},\n abstract = {The human microbiome is increasingly mined for diagnostic and therapeutic biomarkers using machine learning (ML). However, metagenomics-specific software is scarce, and overoptimistic evaluation and limited cross-study generalization are prevailing issues. To address these, we developed SIAMCAT, a versatile R toolbox for ML-based comparative metagenomics. We demonstrate its capabilities in a meta-analysis of fecal metagenomic studies (10,803 samples). When naively transferred across studies, ML models lost accuracy and disease specificity, which could however be resolved by a novel training set augmentation strategy. This reveals some biomarkers to be disease-specific, with others shared across multiple conditions. SIAMCAT is freely available from siamcat.embl.de.},\n bibtype = {article},\n author = {Wirbel, Jakob and Zych, Konrad and Essex, Morgan and Karcher, Nicolai and Kartal, Ece and Salazar, Guillem and Bork, Peer and Sunagawa, Shinichi and Zeller, Georg},\n doi = {10.1186/S13059-021-02306-1},\n journal = {Genome Biology},\n number = {1}\n}","author_short":["Wirbel, J.","Zych, K.","Essex, M.","Karcher, N.","Kartal, E.","Salazar, G.","Bork, P.","Sunagawa, S.","Zeller, G."],"urls":{"Paper":"https://bibbase.org/service/mendeley/9c1a206b-6cfb-375e-a257-a4c31f5a0791/file/2606a905-4e85-7077-7b0c-87e4bed89adb/full_text.pdf.pdf"},"biburl":"https://bibbase.org/service/mendeley/9c1a206b-6cfb-375e-a257-a4c31f5a0791","bibbaseid":"wirbel-zych-essex-karcher-kartal-salazar-bork-sunagawa-etal-microbiomemetaanalysisandcrossdiseasecomparisonenabledbythesiamcatmachinelearningtoolbox-2021","role":"author","keyword":["Machine learning","Meta-analysis","Microbiome data analysis","Microbiome-wide association studies (MWAS)","Statistical modeling"],"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://bibbase.org/service/mendeley/9c1a206b-6cfb-375e-a257-a4c31f5a0791","dataSources":["HEHSH48E7jnzCpz7K","ipXoPhmRmrab9fjQu","2252seNhipfTmjEBQ"],"keywords":["machine learning","meta-analysis","microbiome data analysis","microbiome-wide association studies (mwas)","statistical modeling"],"search_terms":["microbiome","meta","analysis","cross","disease","comparison","enabled","siamcat","machine","learning","toolbox","wirbel","zych","essex","karcher","kartal","salazar","bork","sunagawa","zeller"],"title":"Microbiome meta-analysis and cross-disease comparison enabled by the SIAMCAT machine learning toolbox","year":2021}