Provenance from log files: a BigData problem. Ghoshal, D. & Plale, B. In Proceedings of the Joint EDBT/ICDT 2013 Workshops, pages 290-297, 2013. ACM.
Provenance from log files: a BigData problem [link]Website  doi  abstract   bibtex   
As new data products of research increasingly become the product or output of complex processes, the lineage of the resulting products takes on greater importance as a description of the processes that contributed to the result. Without adequate description of data products, their reuse is lessened. The act of instrumenting an application for provenance capture is burdensome, however. This paper explores the option of deriving provenance from existing log files, an approach that reduces the instrumentation task substantially but raises questions about sifting through huge amounts of information for what may or may not be complete provenance. In this paper we study the tradeoff of ease of capture and provenance completeness, and show that under some circumstances capture through logs can result in high quality provenance.
@inproceedings{
 title = {Provenance from log files: a BigData problem},
 type = {inproceedings},
 year = {2013},
 keywords = {2013,pti},
 pages = {290-297},
 websites = {http://d2i.indiana.edu/pubs/provenance-log-files-bigdata-problem},
 publisher = {ACM},
 city = {Genoa, Italy},
 id = {cfb14e97-898b-372c-9622-b5dfeb44ce8f},
 created = {2019-10-01T17:21:00.145Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2019-10-01T17:23:45.800Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Ghoshal2013},
 source_type = {proceedings},
 folder_uuids = {73f994b4-a3be-4035-a6dd-3802077ce863},
 private_publication = {false},
 abstract = {As new data products of research increasingly become the product or output of complex processes, the lineage of the resulting products takes on greater importance as a description of the processes that contributed to the result. Without adequate description of data products, their reuse is lessened. The act of instrumenting an application for provenance capture is burdensome, however. This paper explores the option of deriving provenance from existing log files, an approach that reduces the instrumentation task substantially but raises questions about sifting through huge amounts of information for what may or may not be complete provenance. In this paper we study the tradeoff of ease of capture and provenance completeness, and show that under some circumstances capture through logs can result in high quality provenance.},
 bibtype = {inproceedings},
 author = {Ghoshal, Devarshi and Plale, Beth},
 doi = {10.1145/2457317.2457366},
 booktitle = {Proceedings of the Joint EDBT/ICDT 2013 Workshops}
}

Downloads: 0