Towards Collaborative Data Reduction in Stream-Processing Systems. Li, M. & Kotz, D. International Journal of Communication Networks and Distributed Systems (IJCNDS), 2(4):375-400, Inderscience, 2009. Website abstract bibtex We consider a distributed system that disseminates high-volume event streams to many simultaneous monitoring applications over a low-bandwidth network. For bandwidth efficiency, we propose a collaborative data-reduction mechanism, ``group-aware stream filtering'', used together with multicast, to select a small set of necessary data that satisfy the needs of a group of subscribers simultaneously. We turn data-compressing filters into group-aware filters by exploiting two overlooked, yet important, properties of monitoring applications: 1)~many of them can tolerate some degree of ``slack'' in their data quality requirements, and 2)~there may exist multiple subsets of the source data satisfying the quality needs of an application. We can thus choose the ``best alternative'' subset for each application to maximize the data overlap within the group to best benefit from multicasting. We provide a general framework that treats the group-aware stream filtering problem completely; we prove the problem NP-hard and thus provide a suite of heuristic algorithms that ensure data quality (specifically, granularity and timeliness) while collaboratively reducing data. The framework is extensible and supports a diverse range of filters. Our prototype-based evaluation shows that group-aware stream filtering is effective in trading CPU time for data reduction, compared with self-interested filtering.
@article{
title = {Towards Collaborative Data Reduction in Stream-Processing Systems},
type = {article},
year = {2009},
identifiers = {[object Object]},
keywords = {dartmouth-cs,distributed-computing},
pages = {375-400},
volume = {2},
websites = {http://www.cs.dartmouth.edu/~dfk/papers/internal/li-ijcnds.pdf},
publisher = {Inderscience},
id = {df8b23d8-f43b-396b-bf8e-11a6b15d5977},
created = {2018-07-12T21:30:51.766Z},
file_attached = {false},
profile_id = {f954d000-ce94-3da6-bd26-b983145a920f},
group_id = {b0b145a3-980e-3ad7-a16f-c93918c606ed},
last_modified = {2018-07-12T21:30:51.766Z},
read = {false},
starred = {false},
authored = {false},
confirmed = {true},
hidden = {false},
citation_key = {li:ijcnds},
source_type = {article},
notes = {Invited paper},
private_publication = {false},
abstract = {We consider a distributed system that disseminates high-volume event streams to many simultaneous monitoring applications over a low-bandwidth network. For bandwidth efficiency, we propose a collaborative data-reduction mechanism, ``group-aware stream filtering'', used together with multicast, to select a small set of necessary data that satisfy the needs of a group of subscribers simultaneously. We turn data-compressing filters into group-aware filters by exploiting two overlooked, yet important, properties of monitoring applications: 1)~many of them can tolerate some degree of ``slack'' in their data quality requirements, and 2)~there may exist multiple subsets of the source data satisfying the quality needs of an application. We can thus choose the ``best alternative'' subset for each application to maximize the data overlap within the group to best benefit from multicasting. We provide a general framework that treats the group-aware stream filtering problem completely; we prove the problem NP-hard and thus provide a suite of heuristic algorithms that ensure data quality (specifically, granularity and timeliness) while collaboratively reducing data. The framework is extensible and supports a diverse range of filters. Our prototype-based evaluation shows that group-aware stream filtering is effective in trading CPU time for data reduction, compared with self-interested filtering.},
bibtype = {article},
author = {Li, Ming and Kotz, David},
journal = {International Journal of Communication Networks and Distributed Systems (IJCNDS)},
number = {4}
}
Downloads: 0
{"_id":"gEFSZZY2EveunyCG3","bibbaseid":"li-kotz-towardscollaborativedatareductioninstreamprocessingsystems-2009","downloads":0,"creationDate":"2019-02-15T15:14:57.484Z","title":"Towards Collaborative Data Reduction in Stream-Processing Systems","author_short":["Li, M.","Kotz, D."],"year":2009,"bibtype":"article","biburl":null,"bibdata":{"title":"Towards Collaborative Data Reduction in Stream-Processing Systems","type":"article","year":"2009","identifiers":"[object Object]","keywords":"dartmouth-cs,distributed-computing","pages":"375-400","volume":"2","websites":"http://www.cs.dartmouth.edu/~dfk/papers/internal/li-ijcnds.pdf","publisher":"Inderscience","id":"df8b23d8-f43b-396b-bf8e-11a6b15d5977","created":"2018-07-12T21:30:51.766Z","file_attached":false,"profile_id":"f954d000-ce94-3da6-bd26-b983145a920f","group_id":"b0b145a3-980e-3ad7-a16f-c93918c606ed","last_modified":"2018-07-12T21:30:51.766Z","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"citation_key":"li:ijcnds","source_type":"article","notes":"Invited paper","private_publication":false,"abstract":"We consider a distributed system that disseminates high-volume event streams to many simultaneous monitoring applications over a low-bandwidth network. For bandwidth efficiency, we propose a collaborative data-reduction mechanism, ``group-aware stream filtering'', used together with multicast, to select a small set of necessary data that satisfy the needs of a group of subscribers simultaneously. We turn data-compressing filters into group-aware filters by exploiting two overlooked, yet important, properties of monitoring applications: 1)~many of them can tolerate some degree of ``slack'' in their data quality requirements, and 2)~there may exist multiple subsets of the source data satisfying the quality needs of an application. We can thus choose the ``best alternative'' subset for each application to maximize the data overlap within the group to best benefit from multicasting. We provide a general framework that treats the group-aware stream filtering problem completely; we prove the problem NP-hard and thus provide a suite of heuristic algorithms that ensure data quality (specifically, granularity and timeliness) while collaboratively reducing data. The framework is extensible and supports a diverse range of filters. Our prototype-based evaluation shows that group-aware stream filtering is effective in trading CPU time for data reduction, compared with self-interested filtering.","bibtype":"article","author":"Li, Ming and Kotz, David","journal":"International Journal of Communication Networks and Distributed Systems (IJCNDS)","number":"4","bibtex":"@article{\n title = {Towards Collaborative Data Reduction in Stream-Processing Systems},\n type = {article},\n year = {2009},\n identifiers = {[object Object]},\n keywords = {dartmouth-cs,distributed-computing},\n pages = {375-400},\n volume = {2},\n websites = {http://www.cs.dartmouth.edu/~dfk/papers/internal/li-ijcnds.pdf},\n publisher = {Inderscience},\n id = {df8b23d8-f43b-396b-bf8e-11a6b15d5977},\n created = {2018-07-12T21:30:51.766Z},\n file_attached = {false},\n profile_id = {f954d000-ce94-3da6-bd26-b983145a920f},\n group_id = {b0b145a3-980e-3ad7-a16f-c93918c606ed},\n last_modified = {2018-07-12T21:30:51.766Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n citation_key = {li:ijcnds},\n source_type = {article},\n notes = {Invited paper},\n private_publication = {false},\n abstract = {We consider a distributed system that disseminates high-volume event streams to many simultaneous monitoring applications over a low-bandwidth network. For bandwidth efficiency, we propose a collaborative data-reduction mechanism, ``group-aware stream filtering'', used together with multicast, to select a small set of necessary data that satisfy the needs of a group of subscribers simultaneously. We turn data-compressing filters into group-aware filters by exploiting two overlooked, yet important, properties of monitoring applications: 1)~many of them can tolerate some degree of ``slack'' in their data quality requirements, and 2)~there may exist multiple subsets of the source data satisfying the quality needs of an application. We can thus choose the ``best alternative'' subset for each application to maximize the data overlap within the group to best benefit from multicasting. We provide a general framework that treats the group-aware stream filtering problem completely; we prove the problem NP-hard and thus provide a suite of heuristic algorithms that ensure data quality (specifically, granularity and timeliness) while collaboratively reducing data. The framework is extensible and supports a diverse range of filters. Our prototype-based evaluation shows that group-aware stream filtering is effective in trading CPU time for data reduction, compared with self-interested filtering.},\n bibtype = {article},\n author = {Li, Ming and Kotz, David},\n journal = {International Journal of Communication Networks and Distributed Systems (IJCNDS)},\n number = {4}\n}","author_short":["Li, M.","Kotz, D."],"urls":{"Website":"http://www.cs.dartmouth.edu/~dfk/papers/internal/li-ijcnds.pdf"},"bibbaseid":"li-kotz-towardscollaborativedatareductioninstreamprocessingsystems-2009","role":"author","keyword":["dartmouth-cs","distributed-computing"],"downloads":0},"search_terms":["towards","collaborative","data","reduction","stream","processing","systems","li","kotz"],"keywords":["dartmouth-cs","distributed-computing","distributed computing"],"authorIDs":["5c66d9b9e4b1db10000001ba","5c69efe6ac591010000000e0"]}