Analysis of memory constrained live provenance. Chen, P., Evans, T., & Plale, B. Volume 9672 , 2016. doi abstract bibtex © Springer International Publishing Switzerland 2016. We conjecture that meaningful analysis of large-scale provenance can be preserved by analyzing provenance data in limited memory while the data is still in motion; that the provenance needs not be fully resident before analysis can occur. As a proof of concept, this paper defines a stream model for reasoning about provenance data in motion for Big Data provenance.We propose a novel streaming algorithm for the backward provenance query, and apply it to the live provenance captured from agent-based simulations. The performance test demonstrates high throughput, low latency and good scalability, in a distributed stream processing framework built on Apache Kafka and Spark Streaming.
@book{
title = {Analysis of memory constrained live provenance},
type = {book},
year = {2016},
source = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
volume = {9672},
id = {7aabf41b-617c-3c37-90d9-0dac98577034},
created = {2019-10-01T17:20:59.241Z},
file_attached = {false},
profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
last_modified = {2019-10-01T17:23:58.080Z},
read = {false},
starred = {false},
authored = {true},
confirmed = {true},
hidden = {false},
citation_key = {Chen2016},
folder_uuids = {73f994b4-a3be-4035-a6dd-3802077ce863},
private_publication = {false},
abstract = {© Springer International Publishing Switzerland 2016. We conjecture that meaningful analysis of large-scale provenance can be preserved by analyzing provenance data in limited memory while the data is still in motion; that the provenance needs not be fully resident before analysis can occur. As a proof of concept, this paper defines a stream model for reasoning about provenance data in motion for Big Data provenance.We propose a novel streaming algorithm for the backward provenance query, and apply it to the live provenance captured from agent-based simulations. The performance test demonstrates high throughput, low latency and good scalability, in a distributed stream processing framework built on Apache Kafka and Spark Streaming.},
bibtype = {book},
author = {Chen, P. and Evans, T. and Plale, B.},
doi = {10.1007/978-3-319-40593-3_4}
}