Learning seasonal phytoplankton communities with topic models. Kalmbach, A., Sosik, H., Dudek, G., & Girdhar, Y. 2017.
abstract   bibtex   
Copyright © 2017, arXiv, All rights reserved. In this work we develop and demonstrate a probabilistic generative model for phytoplankton communities. The proposed model takes counts of a set of phytoplankton taxa in a timeseries as its training data, and models communities by learning sparse co-occurrence structure between the taxa. Our model is probabilistic, where communities are represented by probability distributions over the species, and each time-step is represented by a probability distribution over the communities. The proposed approach uses a non-parametric, spatiotemporal topic model to encourage the communities to form an interpretable representation of the data, without making strong assumptions about the communities. We demonstrate the quality and interpretability of our method by its ability to improve performance of a simplistic regression model. We show that simple linear regression is sufficient to predict the community distribution learned by our method, and therefore the taxon distributions, from a set of naively chosen environment variables. In contrast, a similar regression model is insufficient to predict the taxon distributions directly or through PCA with the same level of accuracy.
@misc{
 title = {Learning seasonal phytoplankton communities with topic models},
 type = {misc},
 year = {2017},
 source = {arXiv},
 id = {7f552baa-89d3-3ee6-b6d3-ff2fca76b037},
 created = {2020-10-27T23:59:00.000Z},
 file_attached = {false},
 profile_id = {2331788d-b144-3e67-ab8c-4abd7ab569c5},
 last_modified = {2021-09-14T14:07:16.285Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {false},
 hidden = {false},
 citation_key = {Kalmbach2017a},
 private_publication = {false},
 abstract = {Copyright © 2017, arXiv, All rights reserved. In this work we develop and demonstrate a probabilistic generative model for phytoplankton communities. The proposed model takes counts of a set of phytoplankton taxa in a timeseries as its training data, and models communities by learning sparse co-occurrence structure between the taxa. Our model is probabilistic, where communities are represented by probability distributions over the species, and each time-step is represented by a probability distribution over the communities. The proposed approach uses a non-parametric, spatiotemporal topic model to encourage the communities to form an interpretable representation of the data, without making strong assumptions about the communities. We demonstrate the quality and interpretability of our method by its ability to improve performance of a simplistic regression model. We show that simple linear regression is sufficient to predict the community distribution learned by our method, and therefore the taxon distributions, from a set of naively chosen environment variables. In contrast, a similar regression model is insufficient to predict the taxon distributions directly or through PCA with the same level of accuracy.},
 bibtype = {misc},
 author = {Kalmbach, A. and Sosik, H.M. and Dudek, G. and Girdhar, Y.}
}

Downloads: 0