Mining lake time series using symbolic representation. Ruan, G., Hanson, P., C., Dugan, H., & Plale, B., A. Ecological Informatics, 2017.
doi  abstract   bibtex   
© 2017 Elsevier B.V. Sensor networks deployed in lakes and reservoirs, when combined with simulation models and expert knowledge from the global community, are creating deeper understanding of the ecological dynamics of lakes. However, the amount of data and the complex patterns in the data demand substantial compute resources and efficient data mining algorithms, both of which are beyond the realm of traditional limnological research. This paper uniquely adapts methods from computer science for application to data intensive ecological questions, in order to provide ecologists with approachable methodology to facilitate knowledge discovery in lake ecology. We apply a state-of-the-art time series mining technique based on symbolic representation (SAX) to high-frequency time series of phycocyanin (PHYCO) and chlorophyll (CHLORO) fluorescence, both of which are indicators of algal biomass in lakes, as well as model predictions of algal biomass (MODEL). We use data mining techniques to demonstrate that MODEL predicts PHYCO better than it predicts CHLORO. All time series have high redundancy, resulting in a relatively small subset of unique patterns. However, MODEL is much less complex than either PHYCO or CHLORO and fails to reproduce high biomass periods indicative of algal blooms. We develop a set of tools in R to enable motif discovery and anomaly detection within a single lake time series, and relationship study among multiple lake time series through distance metrics, clustering and classification. Furthermore, to improve computation times, we provision web services to launch R tools remotely on high performance computing (HPC) resources. Comprehensive experimental results on observational and simulated lake data demonstrate the effectiveness of our approach.
@article{
 title = {Mining lake time series using symbolic representation},
 type = {article},
 year = {2017},
 volume = {39},
 id = {fb016cdf-ce82-3c9f-a370-cc423fc408d7},
 created = {2019-10-01T17:21:02.259Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2019-10-01T17:24:12.946Z},
 read = {true},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Ruan2017},
 folder_uuids = {73f994b4-a3be-4035-a6dd-3802077ce863,3b35931e-fb6d-48f9-8e01-87ee16ef0331},
 private_publication = {false},
 abstract = {© 2017 Elsevier B.V. Sensor networks deployed in lakes and reservoirs, when combined with simulation models and expert knowledge from the global community, are creating deeper understanding of the ecological dynamics of lakes. However, the amount of data and the complex patterns in the data demand substantial compute resources and efficient data mining algorithms, both of which are beyond the realm of traditional limnological research. This paper uniquely adapts methods from computer science for application to data intensive ecological questions, in order to provide ecologists with approachable methodology to facilitate knowledge discovery in lake ecology. We apply a state-of-the-art time series mining technique based on symbolic representation (SAX) to high-frequency time series of phycocyanin (PHYCO) and chlorophyll (CHLORO) fluorescence, both of which are indicators of algal biomass in lakes, as well as model predictions of algal biomass (MODEL). We use data mining techniques to demonstrate that MODEL predicts PHYCO better than it predicts CHLORO. All time series have high redundancy, resulting in a relatively small subset of unique patterns. However, MODEL is much less complex than either PHYCO or CHLORO and fails to reproduce high biomass periods indicative of algal blooms. We develop a set of tools in R to enable motif discovery and anomaly detection within a single lake time series, and relationship study among multiple lake time series through distance metrics, clustering and classification. Furthermore, to improve computation times, we provision web services to launch R tools remotely on high performance computing (HPC) resources. Comprehensive experimental results on observational and simulated lake data demonstrate the effectiveness of our approach.},
 bibtype = {article},
 author = {Ruan, G. and Hanson, P.C. C. and Dugan, H.A. and Plale, Beth A.},
 doi = {10.1016/j.ecoinf.2017.03.001},
 journal = {Ecological Informatics}
}

Downloads: 0