Analysis of memory constrained live provenance. Chen, P., Evans, T., & Plale, B. Volume 9672 , 2016.
doi  abstract   bibtex   
© Springer International Publishing Switzerland 2016. We conjecture that meaningful analysis of large-scale provenance can be preserved by analyzing provenance data in limited memory while the data is still in motion; that the provenance needs not be fully resident before analysis can occur. As a proof of concept, this paper defines a stream model for reasoning about provenance data in motion for Big Data provenance.We propose a novel streaming algorithm for the backward provenance query, and apply it to the live provenance captured from agent-based simulations. The performance test demonstrates high throughput, low latency and good scalability, in a distributed stream processing framework built on Apache Kafka and Spark Streaming.
@book{
 title = {Analysis of memory constrained live provenance},
 type = {book},
 year = {2016},
 source = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
 volume = {9672},
 id = {7aabf41b-617c-3c37-90d9-0dac98577034},
 created = {2019-10-01T17:20:59.241Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2019-10-01T17:23:58.080Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Chen2016},
 folder_uuids = {73f994b4-a3be-4035-a6dd-3802077ce863},
 private_publication = {false},
 abstract = {© Springer International Publishing Switzerland 2016. We conjecture that meaningful analysis of large-scale provenance can be preserved by analyzing provenance data in limited memory while the data is still in motion; that the provenance needs not be fully resident before analysis can occur. As a proof of concept, this paper defines a stream model for reasoning about provenance data in motion for Big Data provenance.We propose a novel streaming algorithm for the backward provenance query, and apply it to the live provenance captured from agent-based simulations. The performance test demonstrates high throughput, low latency and good scalability, in a distributed stream processing framework built on Apache Kafka and Spark Streaming.},
 bibtype = {book},
 author = {Chen, P. and Evans, T. and Plale, B.},
 doi = {10.1007/978-3-319-40593-3_4}
}

Downloads: 0