var bibbase_data = {"data":"\"Loading..\"\n\n
\n\n \n\n \n\n \n \n\n \n\n \n \n\n \n\n \n
\n generated by\n \n \"bibbase.org\"\n\n \n
\n \n\n
\n\n \n\n\n
\n\n Excellent! Next you can\n create a new website with this list, or\n embed it in an existing web page by copying & pasting\n any of the following snippets.\n\n
\n JavaScript\n (easiest)\n
\n \n <script src=\"https://bibbase.org/show?bib=https%3A%2F%2Fapi.zotero.org%2Fgroups%2F762173%2Fitems%3Fkey%3DVoBqRAZqV5r5GUZQw2NHKmbx%26format%3Dbibtex%26limit%3D300&jsonp=1&theme=default&jsonp=1\"></script>\n \n
\n\n PHP\n
\n \n <?php\n $contents = file_get_contents(\"https://bibbase.org/show?bib=https%3A%2F%2Fapi.zotero.org%2Fgroups%2F762173%2Fitems%3Fkey%3DVoBqRAZqV5r5GUZQw2NHKmbx%26format%3Dbibtex%26limit%3D300&jsonp=1&theme=default\");\n print_r($contents);\n ?>\n \n
\n\n iFrame\n (not recommended)\n
\n \n <iframe src=\"https://bibbase.org/show?bib=https%3A%2F%2Fapi.zotero.org%2Fgroups%2F762173%2Fitems%3Fkey%3DVoBqRAZqV5r5GUZQw2NHKmbx%26format%3Dbibtex%26limit%3D300&jsonp=1&theme=default\"></iframe>\n \n
\n\n

\n For more details see the documention.\n

\n
\n
\n\n
\n\n This is a preview! To use this list on your own web site\n or create a new web site from it,\n create a free account. The file will be added\n and you will be able to edit it in the File Manager.\n We will show you instructions once you've created your account.\n
\n\n
\n\n

To the site owner:

\n\n

Action required! Mendeley is changing its\n API. In order to keep using Mendeley with BibBase past April\n 14th, you need to:\n

    \n
  1. renew the authorization for BibBase on Mendeley, and
  2. \n
  3. update the BibBase URL\n in your page the same way you did when you initially set up\n this page.\n
  4. \n
\n

\n\n

\n \n \n Fix it now\n

\n
\n\n
\n\n\n
\n \n \n
\n
\n  \n 2017\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Raw Data Queries during Data-intensive Parallel Workflow Execution.\n \n \n \n\n\n \n Silva, V.; Leite, J.; Camata, J.; Oliveira, D.; Coutinho, A.; Valduriez, P.; and Mattoso, M.\n\n\n \n\n\n\n Under revision in Special Issue on Workflows for Data-Driven Research in the Future Generation Computer Systems Journal. 2017.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{silva_raw_2017,\n\ttitle = {Raw {Data} {Queries} during {Data}-intensive {Parallel} {Workflow} {Execution}},\n\tjournal = {Under revision in Special Issue on Workflows for Data-Driven Research in the Future Generation Computer Systems Journal},\n\tauthor = {Silva, Vitor and Leite, Jose and Camata, Jose and Oliveira, Daniel and Coutinho, A.L.G.A and Valduriez, Patrick and Mattoso, Marta},\n\tyear = {2017},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2016\n \n \n (3)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Analyzing related raw data files through dataflows.\n \n \n \n \n\n\n \n Silva, V.; de Oliveira, D.; Valduriez, P.; and Mattoso, M.\n\n\n \n\n\n\n CCPE, 28(8): 2528–2545. 2016.\n \n\n\n\n
\n\n\n\n \n \n \"AnalyzingPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{silva_analyzing_2016,\n\ttitle = {Analyzing related raw data files through dataflows},\n\tvolume = {28},\n\tissn = {15320626},\n\tshorttitle = {Analyzing related raw data files through dataflows},\n\turl = {http://doi.wiley.com/10.1002/cpe.3616},\n\tdoi = {10.1002/cpe.3616},\n\tlanguage = {en},\n\tnumber = {8},\n\turldate = {2015-08-05},\n\tjournal = {CCPE},\n\tauthor = {Silva, Vítor and de Oliveira, Daniel and Valduriez, Patrick and Mattoso, Marta},\n\tyear = {2016},\n\tpages = {2528--2545},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Uncertainty quantification in numerical simulation of particle-laden flows.\n \n \n \n \n\n\n \n Guerra, G. M.; Zio, S.; Camata, J. J.; Dias, J.; Elias, R. N.; Mattoso, M.; B. Paraizo, P. L.; G. A. Coutinho, A. L.; and Rochinha, F. A.\n\n\n \n\n\n\n Computational Geosciences, 20(1): 265–281. February 2016.\n \n\n\n\n
\n\n\n\n \n \n \"UncertaintyPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{guerra_uncertainty_2016,\n\ttitle = {Uncertainty quantification in numerical simulation of particle-laden flows},\n\tvolume = {20},\n\tissn = {1420-0597, 1573-1499},\n\turl = {http://link.springer.com/10.1007/s10596-016-9563-6},\n\tdoi = {10.1007/s10596-016-9563-6},\n\tlanguage = {en},\n\tnumber = {1},\n\turldate = {2016-03-30},\n\tjournal = {Computational Geosciences},\n\tauthor = {Guerra, Gabriel M. and Zio, Souleymane and Camata, Jose J. and Dias, Jonas and Elias, Renato N. and Mattoso, Marta and B. Paraizo, Paulo L. and G. A. Coutinho, Alvaro L. and Rochinha, Fernando A.},\n\tmonth = feb,\n\tyear = {2016},\n\tpages = {265--281},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Analyzing Provenance Across Heterogeneous Provenance Graphs.\n \n \n \n \n\n\n \n Oliveira, W.; Missier, P.; Ocaña, K.; Oliveira, D. d.; and Braganholo, V.\n\n\n \n\n\n\n In Mattoso, M.; and Glavic, B., editor(s), Provenance and Annotation of Data and Processes, of Lecture Notes in Computer Science, pages 57–70. Springer International Publishing, June 2016.\n \n\n\n\n
\n\n\n\n \n \n \"AnalyzingPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@incollection{oliveira_analyzing_2016,\n\tseries = {Lecture {Notes} in {Computer} {Science}},\n\ttitle = {Analyzing {Provenance} {Across} {Heterogeneous} {Provenance} {Graphs}},\n\tcopyright = {©2016 Springer International Publishing Switzerland},\n\tisbn = {978-3-319-40592-6 978-3-319-40593-3},\n\turl = {http://link.springer.com/chapter/10.1007/978-3-319-40593-3_5},\n\tabstract = {Provenance generated by different workflow systems is generally expressed using different formats. This is not an issue when scientists analyze provenance graphs in isolation, or when they use the same workflow system. However, when analyzing heterogeneous provenance graphs from multiple systems poses a challenge. To address this problem we adopt ProvONE as an integration model, and show how different provenance databases can be converted to a global ProvONE schema. Scientists can then query this integrated database, exploring and linking provenance across several different workflows that may represent different implementations of the same experiment. To illustrate the feasibility of our approach, we developed conceptual mappings between the provenance databases of two workflow systems (e-Science Central and SciCumulus). We provide cartridges that implement these mappings and generate an integrated provenance database expressed as Prolog facts. To demonstrate its usage, we have developed Prolog rules that enable scientists to query the integrated database.},\n\tlanguage = {en},\n\tnumber = {9672},\n\turldate = {2016-08-01},\n\tbooktitle = {Provenance and {Annotation} of {Data} and {Processes}},\n\tpublisher = {Springer International Publishing},\n\tauthor = {Oliveira, Wellington and Missier, Paolo and Ocaña, Kary and Oliveira, Daniel de and Braganholo, Vanessa},\n\teditor = {Mattoso, Marta and Glavic, Boris},\n\tmonth = jun,\n\tyear = {2016},\n\tdoi = {10.1007/978-3-319-40593-3_5},\n\tkeywords = {Artificial Intelligence (incl. Robotics), Computers and Society, Database Management, Information Storage and Retrieval, Information Systems Applications (incl. Internet), Management of Computing and Information Systems},\n\tpages = {57--70},\n}\n\n
\n
\n\n\n
\n Provenance generated by different workflow systems is generally expressed using different formats. This is not an issue when scientists analyze provenance graphs in isolation, or when they use the same workflow system. However, when analyzing heterogeneous provenance graphs from multiple systems poses a challenge. To address this problem we adopt ProvONE as an integration model, and show how different provenance databases can be converted to a global ProvONE schema. Scientists can then query this integrated database, exploring and linking provenance across several different workflows that may represent different implementations of the same experiment. To illustrate the feasibility of our approach, we developed conceptual mappings between the provenance databases of two workflow systems (e-Science Central and SciCumulus). We provide cartridges that implement these mappings and generate an integrated provenance database expressed as Prolog facts. To demonstrate its usage, we have developed Prolog rules that enable scientists to query the integrated database.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2015\n \n \n (6)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n How Much Domain Data Should Be in Provenance Databases?.\n \n \n \n\n\n \n De Oliveira, D.; Silva, V.; and Mattoso, M.\n\n\n \n\n\n\n In Workshop on Theory and Practice of Provenance (TaPP), Edinburgh, Scotland, 2015. USENIX Association\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{de_oliveira_how_2015,\n\taddress = {Edinburgh, Scotland},\n\ttitle = {How {Much} {Domain} {Data} {Should} {Be} in {Provenance} {Databases}?},\n\tbooktitle = {Workshop on {Theory} and {Practice} of {Provenance} ({TaPP})},\n\tpublisher = {USENIX Association},\n\tauthor = {De Oliveira, Daniel and Silva, Vítor and Mattoso, Marta},\n\tyear = {2015},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Data-centric iteration in dynamic workflows.\n \n \n \n \n\n\n \n Dias, J.; Guerra, G.; Rochinha, F.; Coutinho, A. L. G. A.; Valduriez, P.; and Mattoso, M.\n\n\n \n\n\n\n FGCS, 46: 114–126. 2015.\n \n\n\n\n
\n\n\n\n \n \n \"Data-centricPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{dias_data-centric_2015,\n\ttitle = {Data-centric iteration in dynamic workflows},\n\tvolume = {46},\n\tissn = {0167-739X},\n\turl = {http://www.sciencedirect.com/science/article/pii/S0167739X14002155},\n\tdoi = {10.1016/j.future.2014.10.021},\n\tabstract = {Dynamic workflows are scientific workflows to support computational science simulations, typically using dynamic processes based on runtime scientific data analyses. They require the ability of adapting the workflow, at runtime, based on user input and dynamic steering. Supporting data-centric iteration is an important step towards dynamic workflows because user interaction with workflows is iterative. However, current support for iteration in scientific workflows is static and does not allow for changing data at runtime. In this paper, we propose a solution based on algebraic operators and a dynamic execution model to enable workflow adaptation based on user input and dynamic steering. We introduce the concept of iteration lineage that makes provenance data management consistent with dynamic iterative workflow changes. Lineage enables scientists to interact with workflow data and configuration at runtime through an API that triggers steering. We evaluate our approach using a novel and real large-scale workflow for uncertainty quantification on a 640-core cluster. The results show impressive execution time savings from 2.5 to 24 days, compared to non-iterative workflow execution. We verify that the maximum overhead introduced by our iterative model is less than 5\\% of execution time. Also, our proposed steering algorithms are very efficient and run in less than 1 millisecond, in the worst-case scenario.},\n\turldate = {2015-06-01},\n\tjournal = {FGCS},\n\tauthor = {Dias, Jonas and Guerra, Gabriel and Rochinha, Fernando and Coutinho, Alvaro L. G. A. and Valduriez, Patrick and Mattoso, Marta},\n\tyear = {2015},\n\tkeywords = {Dynamic workflows, Iteration, Scientific workflows, steering},\n\tpages = {114--126},\n}\n\n
\n
\n\n\n
\n Dynamic workflows are scientific workflows to support computational science simulations, typically using dynamic processes based on runtime scientific data analyses. They require the ability of adapting the workflow, at runtime, based on user input and dynamic steering. Supporting data-centric iteration is an important step towards dynamic workflows because user interaction with workflows is iterative. However, current support for iteration in scientific workflows is static and does not allow for changing data at runtime. In this paper, we propose a solution based on algebraic operators and a dynamic execution model to enable workflow adaptation based on user input and dynamic steering. We introduce the concept of iteration lineage that makes provenance data management consistent with dynamic iterative workflow changes. Lineage enables scientists to interact with workflow data and configuration at runtime through an API that triggers steering. We evaluate our approach using a novel and real large-scale workflow for uncertainty quantification on a 640-core cluster. The results show impressive execution time savings from 2.5 to 24 days, compared to non-iterative workflow execution. We verify that the maximum overhead introduced by our iterative model is less than 5% of execution time. Also, our proposed steering algorithms are very efficient and run in less than 1 millisecond, in the worst-case scenario.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Monitoramento de Desempenho usando Dados de Proveniência e de Domínio durante a Execução de Aplicações Científicas.\n \n \n \n\n\n \n Souza, R.; Silva, V.; Neves, L.; De Oliveira, D.; and Mattoso, M.\n\n\n \n\n\n\n In XIV Workshop em Desempenho de Sistemas Computacionais e de Comunicação, Recife, PE, 2015. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{souza_monitoramento_2015,\n\taddress = {Recife, PE},\n\ttitle = {Monitoramento de {Desempenho} usando {Dados} de {Proveniência} e de {Domínio} durante a {Execução} de {Aplicações} {Científicas}},\n\tbooktitle = {{XIV} {Workshop} em {Desempenho} de {Sistemas} {Computacionais} e de {Comunicação}},\n\tauthor = {Souza, Renan and Silva, Vítor and Neves, Leonardo and De Oliveira, Daniel and Mattoso, Marta},\n\tyear = {2015},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Parallel Execution of Workflows Driven by a Distributed Database Management System.\n \n \n \n\n\n \n Souza, R.; Silva, V.; Oliveira, D.; Valduriez, P.; Lima, A. A. B.; and Mattoso, M.\n\n\n \n\n\n\n In Poster in ACM 27th SC Conference, Austin, Texas, USA, 2015. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{souza_parallel_2015,\n\taddress = {Austin, Texas, USA},\n\ttitle = {Parallel {Execution} of {Workflows} {Driven} by a {Distributed} {Database} {Management} {System}},\n\tbooktitle = {Poster in {ACM} 27th {SC} {Conference}},\n\tauthor = {Souza, Renan and Silva, Vítor and Oliveira, Daniel and Valduriez, Patrick and Lima, Alexandre A. B. and Mattoso, Marta},\n\tyear = {2015},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n CONTROLLING THE PARALLEL EXECUTION OF WORKFLOWS RELYING ON A DISTRIBUTED DATABASE.\n \n \n \n\n\n \n Souza, R. F. S.; and Mattoso, M. L. d. Q.\n\n\n \n\n\n\n Ph.D. Thesis, UFRJ/COPPE, Rio de Janeiro, 2015.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@phdthesis{souza_controlling_2015,\n\taddress = {Rio de Janeiro},\n\ttitle = {{CONTROLLING} {THE} {PARALLEL} {EXECUTION} {OF} {WORKFLOWS} {RELYING} {ON} {A} {DISTRIBUTED} {DATABASE}},\n\tabstract = {managed by a Scientific Workflow Management System (SWfMS). In a parallel execution, a SWfMS schedules many tasks to the computing resources and Many Task Computing (MTC) is the paradigm that contemplates this scenario. In order to manage the execution data necessary for the parallel execution management and tasks’ scheduling in MTC, an execution engine needs a scalable data structure to accommodate those many tasks. In addition to managing execution data, it has been shown that storing provenance and domain data at runtime enables powerful advantages, such as execution monitoring, discovery of anticipated results, and user steering. Although all these data may be managed using different approaches (e.g., flat log files, DBMS, or a hybrid approach), using a centralized DBMS has shown to deliver enhanced analytical capabilities at runtime, which is very valuable for end-users. However, if on the one hand using a centralized DBMS enables important advantages, on the other hand, it introduces a single point of failure and of contention, which jeopardizes performance in a large scenario. To cope with this, in this work, we propose a novel SWfMS architecture that removes the responsibility of a central node to which all other nodes need to communicate for tasks’ scheduling, which generates a point of contention; and transfer such responsibility to a distributed DBMS. By doing this, we show that our solution frequently attains an efficiency of over 80\\% and more than 90\\% of gains in relation to an architecture that relies on a centralized DBMS, in a 1,000 cores cluster. More importantly, we achieve all these results without abdicating the advantages of using a DBMS to manage execution, provenance, and domain data, jointly, at runtime.},\n\tschool = {UFRJ/COPPE},\n\tauthor = {Souza, Renan Francisco Santos and Mattoso, Marta Lima de Queirós},\n\tyear = {2015},\n}\n\n
\n
\n\n\n
\n managed by a Scientific Workflow Management System (SWfMS). In a parallel execution, a SWfMS schedules many tasks to the computing resources and Many Task Computing (MTC) is the paradigm that contemplates this scenario. In order to manage the execution data necessary for the parallel execution management and tasks’ scheduling in MTC, an execution engine needs a scalable data structure to accommodate those many tasks. In addition to managing execution data, it has been shown that storing provenance and domain data at runtime enables powerful advantages, such as execution monitoring, discovery of anticipated results, and user steering. Although all these data may be managed using different approaches (e.g., flat log files, DBMS, or a hybrid approach), using a centralized DBMS has shown to deliver enhanced analytical capabilities at runtime, which is very valuable for end-users. However, if on the one hand using a centralized DBMS enables important advantages, on the other hand, it introduces a single point of failure and of contention, which jeopardizes performance in a large scenario. To cope with this, in this work, we propose a novel SWfMS architecture that removes the responsibility of a central node to which all other nodes need to communicate for tasks’ scheduling, which generates a point of contention; and transfer such responsibility to a distributed DBMS. By doing this, we show that our solution frequently attains an efficiency of over 80% and more than 90% of gains in relation to an architecture that relies on a centralized DBMS, in a 1,000 cores cluster. More importantly, we achieve all these results without abdicating the advantages of using a DBMS to manage execution, provenance, and domain data, jointly, at runtime.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Dynamic steering of HPC scientific workflows: A survey.\n \n \n \n \n\n\n \n Mattoso, M.; Dias, J.; Ocaña, K. A.; Ogasawara, E.; Costa, F.; Horta, F.; Silva, V.; and de Oliveira, D.\n\n\n \n\n\n\n FGCS, 46: 100–113. May 2015.\n \n\n\n\n
\n\n\n\n \n \n \"DynamicPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{mattoso_dynamic_2015,\n\ttitle = {Dynamic steering of {HPC} scientific workflows: {A} survey},\n\tvolume = {46},\n\tissn = {0167739X},\n\tshorttitle = {Dynamic steering of {HPC} scientific workflows},\n\turl = {http://linkinghub.elsevier.com/retrieve/pii/S0167739X14002519},\n\tdoi = {10.1016/j.future.2014.11.017},\n\tlanguage = {en},\n\turldate = {2015-03-12},\n\tjournal = {FGCS},\n\tauthor = {Mattoso, Marta and Dias, Jonas and Ocaña, Kary A.C.S. and Ogasawara, Eduardo and Costa, Flavio and Horta, Felipe and Silva, Vítor and de Oliveira, Daniel},\n\tmonth = may,\n\tyear = {2015},\n\tpages = {100--113},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2013\n \n \n (8)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Capturing and Querying Workflow Runtime Provenance with PROV: A Practical Approach.\n \n \n \n\n\n \n Costa, F.; Silva, V.; de Oliveira, D.; Ocaña, K.; Ogasawara, E.; Dias, J.; and Mattoso, M.\n\n\n \n\n\n\n In EDBT/ICDT Workshops, pages 282–289, New York, NY, USA, 2013. ACM Press\n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{costa_capturing_2013,\n\taddress = {New York, NY, USA},\n\ttitle = {Capturing and {Querying} {Workflow} {Runtime} {Provenance} with {PROV}: {A} {Practical} {Approach}},\n\tisbn = {978-1-4503-1599-9},\n\tshorttitle = {Capturing and {Querying} {Workflow} {Runtime} {Provenance} with {PROV}},\n\tdoi = {10.1145/2457317.2457365},\n\tabstract = {Scientific workflows are commonly used to model and execute large-scale scientific experiments. They represent key resources for scientists and are enacted and managed by Scientific Workflow Management Systems (SWfMS). Each SWfMS has its particular approach to execute workflows and to capture and manage their provenance data. Due to the large scale of experiments, it may be unviable to analyze provenance data only after the end of the execution. A single experiment may demand weeks to run, even in high performance computing environments. Thus scientists need to monitor the experiment during its execution, and this can be done through provenance data. Runtime provenance analysis allows for scientists to monitor workflow execution and to take actions before the end of it (i.e. workflow steering). This provenance data can also be used to fine-tune the parallel execution of the workflow dynamically. We use the PROV data model as a basic framework for modeling and providing runtime provenance as a database that can be queried even during the execution. This database is agnostic of SWfMS and workflow engine. We show the benefits of representing and sharing runtime provenance data for improving the experiment management as well as the analysis of the scientific data.},\n\tbooktitle = {{EDBT}/{ICDT} {Workshops}},\n\tpublisher = {ACM Press},\n\tauthor = {Costa, Flavio and Silva, Vítor and de Oliveira, Daniel and Ocaña, Kary and Ogasawara, Eduardo and Dias, Jonas and Mattoso, Marta},\n\tyear = {2013},\n\tkeywords = {PROV model, Scientific workflows, import cartridge},\n\tpages = {282--289},\n}\n\n
\n
\n\n\n
\n Scientific workflows are commonly used to model and execute large-scale scientific experiments. They represent key resources for scientists and are enacted and managed by Scientific Workflow Management Systems (SWfMS). Each SWfMS has its particular approach to execute workflows and to capture and manage their provenance data. Due to the large scale of experiments, it may be unviable to analyze provenance data only after the end of the execution. A single experiment may demand weeks to run, even in high performance computing environments. Thus scientists need to monitor the experiment during its execution, and this can be done through provenance data. Runtime provenance analysis allows for scientists to monitor workflow execution and to take actions before the end of it (i.e. workflow steering). This provenance data can also be used to fine-tune the parallel execution of the workflow dynamically. We use the PROV data model as a basic framework for modeling and providing runtime provenance as a database that can be queried even during the execution. This database is agnostic of SWfMS and workflow engine. We show the benefits of representing and sharing runtime provenance data for improving the experiment management as well as the analysis of the scientific data.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Algebraic dataflows for big data analysis.\n \n \n \n\n\n \n Dias, J.; Ogasawara, E.; de Oliveira, D.; Porto, F.; Valduriez, P.; and Mattoso, M.\n\n\n \n\n\n\n In 2013 IEEE International Conference on Big Data, pages 150–155, 2013. \n 00001\n\n\n\n
\n\n\n\n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{dias_algebraic_2013,\n\ttitle = {Algebraic dataflows for big data analysis},\n\tdoi = {10.1109/BigData.2013.6691567},\n\tabstract = {Analyzing big data requires the support of dataflows with many activities to extract and explore relevant information from the data. Recent approaches such as Pig Latin propose a high-level language to model such dataflows. However, the dataflow execution is typically delegated to a MapRe-duce implementation such as Hadoop, which does not follow an algebraic approach, thus it cannot take advantage of the optimization opportunities of PigLatin algebra. In this paper, we propose an approach for big data analysis based on algebraic workflows, which yields optimization and parallel execution of activities and supports user steering using provenance queries. We illustrate how a big data processing dataflow can be modeled using the algebra. Through an experimental evaluation using real datasets and the execution of the dataflow with Chiron, an engine that supports our algebra, we show that our approach yields performance gains of up to 19.6\\% using algebraic optimizations in the dataflow and up to 39.1\\% of time saved on a user steering scenario.},\n\tbooktitle = {2013 {IEEE} {International} {Conference} on {Big} {Data}},\n\tauthor = {Dias, J. and Ogasawara, E. and de Oliveira, D. and Porto, F. and Valduriez, P. and Mattoso, M.},\n\tyear = {2013},\n\tnote = {00001},\n\tkeywords = {Algebra, Chiron, Data storage systems, Hadoop, High level languages, History, Information Management, MapRe-duce implementation, PigLatin algebra, Runtime, activities optimization, activities parallel execution, algebraic dataflow execution, algebraic optimizations, algebraic workflow, big data, big data analysis, big data processing dataflow, data analysis, data flow computing, data handling, dataflow, high-level language, information extraction, optimization, performance evaluation, provenance queries, query processing, user steering scenario},\n\tpages = {150--155},\n}\n\n
\n
\n\n\n
\n Analyzing big data requires the support of dataflows with many activities to extract and explore relevant information from the data. Recent approaches such as Pig Latin propose a high-level language to model such dataflows. However, the dataflow execution is typically delegated to a MapRe-duce implementation such as Hadoop, which does not follow an algebraic approach, thus it cannot take advantage of the optimization opportunities of PigLatin algebra. In this paper, we propose an approach for big data analysis based on algebraic workflows, which yields optimization and parallel execution of activities and supports user steering using provenance queries. We illustrate how a big data processing dataflow can be modeled using the algebra. Through an experimental evaluation using real datasets and the execution of the dataflow with Chiron, an engine that supports our algebra, we show that our approach yields performance gains of up to 19.6% using algebraic optimizations in the dataflow and up to 39.1% of time saved on a user steering scenario.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Prov-Vis: Large-Scale Scientific Data Visualization Using Provenance (Abstract).\n \n \n \n\n\n \n Horta, F.; Dias, J.; Elias, R.; Oliveira, D.; Coutinho, A. L. G. A.; and Mattoso, M.\n\n\n \n\n\n\n In Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, Denver, CO, USA, 2013. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{horta_prov-vis:_2013,\n\taddress = {Denver, CO, USA},\n\ttitle = {Prov-{Vis}: {Large}-{Scale} {Scientific} {Data} {Visualization} {Using} {Provenance} ({Abstract})},\n\tshorttitle = {Abstract},\n\tabstract = {Large-scale scientific computing often rely on in- tensive tasks chained through a workflow. Scientists need to check the status of the execution at particular points, to discov- er if anything odd has happened and take actions. To achieve that, they need to track partial result files, which is usually complex and laborious. When using a scientific workflow sys- tem, provenance data keeps track of every step of the execu- tion. If traversing provenance data is allowed at runtime, it is easier to monitor and analyze partial results. However, visuali- zation of partial results is necessary to be done in sync to the workflow provenance. Prov-Vis is a scientific data visualization tool for large-scale workflows that is based on runtime prove- nance queries to organize and aggregate data for visualization. Prov-Vis helps scientists to follow the steps of the running workflow and visualize the produced partial results. This inno- vates because several systems execute workflows “offline” and do not allow for runtime analysis and workflow steering. To evaluate Prov-Vis, a finite element computational fluid dynam- ics workflow is executed on a supercomputer. Prov-Vis sup- ported the visualization, on a tiled-wall, of several simulation steps and different views based on runtime provenance que- ries.},\n\tbooktitle = {Proceedings of the {International} {Conference} on {High} {Performance} {Computing}, {Networking}, {Storage} and {Analysis}},\n\tauthor = {Horta, Felipe and Dias, Jonas and Elias, Renato and Oliveira, Daniel and Coutinho, Alvaro L. G. A. and Mattoso, Marta},\n\tyear = {2013},\n}\n\n
\n
\n\n\n
\n Large-scale scientific computing often rely on in- tensive tasks chained through a workflow. Scientists need to check the status of the execution at particular points, to discov- er if anything odd has happened and take actions. To achieve that, they need to track partial result files, which is usually complex and laborious. When using a scientific workflow sys- tem, provenance data keeps track of every step of the execu- tion. If traversing provenance data is allowed at runtime, it is easier to monitor and analyze partial results. However, visuali- zation of partial results is necessary to be done in sync to the workflow provenance. Prov-Vis is a scientific data visualization tool for large-scale workflows that is based on runtime prove- nance queries to organize and aggregate data for visualization. Prov-Vis helps scientists to follow the steps of the running workflow and visualize the produced partial results. This inno- vates because several systems execute workflows “offline” and do not allow for runtime analysis and workflow steering. To evaluate Prov-Vis, a finite element computational fluid dynam- ics workflow is executed on a supercomputer. Prov-Vis sup- ported the visualization, on a tiled-wall, of several simulation steps and different views based on runtime provenance que- ries.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Provenance Traces from Chiron Parallel Workflow Engine.\n \n \n \n \n\n\n \n Horta, F.; Silva, V.; Costa, F.; de Oliveira, D.; Ocaña, K.; Ogasawara, E.; Dias, J.; and Mattoso, M.\n\n\n \n\n\n\n In EDBT/ICDT, of EDBT '13, pages 337–338, New York, NY, USA, 2013. ACM\n \n\n\n\n
\n\n\n\n \n \n \"ProvenancePaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{horta_provenance_2013,\n\taddress = {New York, NY, USA},\n\tseries = {{EDBT} '13},\n\ttitle = {Provenance {Traces} from {Chiron} {Parallel} {Workflow} {Engine}},\n\tisbn = {978-1-4503-1599-9},\n\turl = {http://doi.acm.org/10.1145/2457317.2457379},\n\tdoi = {10.1145/2457317.2457379},\n\tabstract = {Scientific workflows are commonly used to model and execute large-scale scientific experiments. They represent key resources for scientists and are managed by Scientific Workflow Management Systems (SWfMS). The different languages used by SWfMS may impact in the way the workflow engine executes the workflow, sometimes limiting optimization opportunities. To tackle this issue, we recently proposed a scientific workflow algebra [1]. This algebra is inspired by database relational algebra and it enables automatic optimization of scientific workflows to be executed in parallel in high performance computing (HPC) environments. This way, the experiments presented in this paper were executed in Chiron, a parallel scientific workflow engine implemented to support the scientific workflow algebra. Before executing the workflow, Chiron stores the prospective provenance [2] of the workflow on its provenance database. Each workflow is composed by several activities, and each activity consumes relations. Similarly to relational databases, a relation contains a set of attributes and it is composed by a set of tuples. Each tuple in a relation contains a series of values, each one associated to a specific attribute. The tuples of a relation are distributed to be consumed in parallel over the computing resources according to the workflow activity. During and after the execution, the retrospective provenance [2] is also stored.},\n\turldate = {2014-04-01},\n\tbooktitle = {{EDBT}/{ICDT}},\n\tpublisher = {ACM},\n\tauthor = {Horta, Felipe and Silva, Vítor and Costa, Flavio and de Oliveira, Daniel and Ocaña, Kary and Ogasawara, Eduardo and Dias, Jonas and Mattoso, Marta},\n\tyear = {2013},\n\tpages = {337--338},\n}\n\n
\n
\n\n\n
\n Scientific workflows are commonly used to model and execute large-scale scientific experiments. They represent key resources for scientists and are managed by Scientific Workflow Management Systems (SWfMS). The different languages used by SWfMS may impact in the way the workflow engine executes the workflow, sometimes limiting optimization opportunities. To tackle this issue, we recently proposed a scientific workflow algebra [1]. This algebra is inspired by database relational algebra and it enables automatic optimization of scientific workflows to be executed in parallel in high performance computing (HPC) environments. This way, the experiments presented in this paper were executed in Chiron, a parallel scientific workflow engine implemented to support the scientific workflow algebra. Before executing the workflow, Chiron stores the prospective provenance [2] of the workflow on its provenance database. Each workflow is composed by several activities, and each activity consumes relations. Similarly to relational databases, a relation contains a set of attributes and it is composed by a set of tuples. Each tuple in a relation contains a series of values, each one associated to a specific attribute. The tuples of a relation are distributed to be consumed in parallel over the computing resources according to the workflow activity. During and after the execution, the retrospective provenance [2] is also stored.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n SciLightning: a Cloud Provenance-based Event Notification for Parallel Workflows.\n \n \n \n\n\n \n Pintas, J.; Oliveira, D.; Ocaña, K. A. C. S.; Ogasawara, E.; and Mattoso, M.\n\n\n \n\n\n\n In Proceedings of the 3rd International Workshop on Cloud Computing and Scientific Applications (CCSA), pages 352–365, Berlin, Germany, 2013. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{pintas_scilightning:_2013,\n\taddress = {Berlin, Germany},\n\ttitle = {{SciLightning}: a {Cloud} {Provenance}-based {Event} {Notification} for {Parallel} {Workflows}},\n\tbooktitle = {Proceedings of the 3rd {International} {Workshop} on {Cloud} {Computing} and {Scientific} {Applications} ({CCSA})},\n\tauthor = {Pintas, Julliano and Oliveira, Daniel and Ocaña, Kary A. C. S. and Ogasawara, Eduardo and Mattoso, Marta},\n\tyear = {2013},\n\tpages = {352--365},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n On the Performance of the Position() XPath Function.\n \n \n \n\n\n \n Silva, L.; Silva, L.; Mattoso, M.; and Braganholo, V.\n\n\n \n\n\n\n In ACM Symposium on Document Engineering (DocEng), pages 229–230, Florence, Italy, 2013. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{silva_performance_2013,\n\taddress = {Florence, Italy},\n\ttitle = {On the {Performance} of the {Position}() {XPath} {Function}},\n\tbooktitle = {{ACM} {Symposium} on {Document} {Engineering} ({DocEng})},\n\tauthor = {Silva, Luiz and Silva, Luiz and Mattoso, Marta and Braganholo, Vanessa},\n\tyear = {2013},\n\tpages = {229--230},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Uma Arquitetura P2P de Distribuição de Atividades para Execução Paralela de Workflows Científicos.\n \n \n \n\n\n \n Silva, V.; Dias, J.; Oliveira, D.; Ogasawara, E.; and Mattoso, M.\n\n\n \n\n\n\n In VII e-Science, pages 1–8, Maceió, Alagoas, Brazil, 2013. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{silva_uma_2013,\n\taddress = {Maceió, Alagoas, Brazil},\n\ttitle = {Uma {Arquitetura} {P2P} de {Distribuição} de {Atividades} para {Execução} {Paralela} de {Workflows} {Científicos}},\n\tbooktitle = {{VII} e-{Science}},\n\tauthor = {Silva, Vitor and Dias, Jonas and Oliveira, Daniel and Ogasawara, Eduardo and Mattoso, Marta},\n\tyear = {2013},\n\tkeywords = {unified modeling language (uml), version control},\n\tpages = {1--8},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Designing a parallel cloud based comparative genomics workflow to improve phylogenetic analyses.\n \n \n \n \n\n\n \n Ocaña, K. A.; de Oliveira, D.; Dias, J.; Ogasawara, E.; and Mattoso, M.\n\n\n \n\n\n\n Future Generation Computer Systems, 29(8): 2205–2219. October 2013.\n \n\n\n\n
\n\n\n\n \n \n \"DesigningPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ocana_designing_2013,\n\ttitle = {Designing a parallel cloud based comparative genomics workflow to improve phylogenetic analyses},\n\tvolume = {29},\n\tissn = {0167739X},\n\turl = {http://linkinghub.elsevier.com/retrieve/pii/S0167739X13000654},\n\tdoi = {10.1016/j.future.2013.04.005},\n\tlanguage = {en},\n\tnumber = {8},\n\turldate = {2014-04-02},\n\tjournal = {Future Generation Computer Systems},\n\tauthor = {Ocaña, Kary A.C.S. and de Oliveira, Daniel and Dias, Jonas and Ogasawara, Eduardo and Mattoso, Marta},\n\tmonth = oct,\n\tyear = {2013},\n\tpages = {2205--2219},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2012\n \n \n (6)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Evaluating Parameter Sweep Workflows in High Performance Computing.\n \n \n \n\n\n \n Chirigati, F.; Silva, V.; Ogasawara, E.; Oliveira, D.; Dias, J.; Porto, F.; Valduriez, P.; and Mattoso, M.\n\n\n \n\n\n\n In Proceedings of the 1st International Workshop on Scalable Workflow Enactment Engines and Technologies (SWEET'12), pages 10, Scottsdale, AZ, EUA, 2012. \n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{chirigati_evaluating_2012,\n\taddress = {Scottsdale, AZ, EUA},\n\ttitle = {Evaluating {Parameter} {Sweep} {Workflows} in {High} {Performance} {Computing}},\n\tisbn = {978-1-4503-1876-1},\n\tdoi = {10.1145/2443416.2443418},\n\tabstract = {Scientific experiments based on computer simulations can be defined, executed and monitored using Scientific Workflow Management Systems (SWfMS). Several SWfMS are available, each with a different goal and a different engine. Due to the exploratory analysis, scientists need to run parameter sweep (PS) workflows, which are workflows that are invoked repeatedly using different input data. These workflows generate a large amount of tasks that are submitted to High Performance Computing (HPC) environments. Different execution models for a workflow may have significant differences in performance in HPC. However, selecting the best execution model for a given workflow is difficult due to the existence of many characteristics of the workflow that may affect the parallel execution. We developed a study to show performance impacts of using different execution models in running PS workflows in HPC. Our study contributes by presenting a characterization of PS workflow patterns (the basis for many existing scientific workflows) and its behavior under different execution models in HPC. We evaluated four execution models to run workflows in parallel. Our study measures the performance behavior of small, large and complex workflows among the evaluated execution models. The results can be used as a guideline to select the best model for a given scientific workflow execution in HPC. Our evaluation may also serve as a basis for workflow designers to analyze the expected behavior of an HPC workflow engine based on the characteristics of PS workflows.},\n\tbooktitle = {Proceedings of the 1st {International} {Workshop} on {Scalable} {Workflow} {Enactment} {Engines} and {Technologies} ({SWEET}'12)},\n\tauthor = {Chirigati, Fernando and Silva, Vítor and Ogasawara, Eduardo and Oliveira, Daniel and Dias, Jonas and Porto, Fabio and Valduriez, Patrick and Mattoso, Marta},\n\tyear = {2012},\n\tpages = {10},\n}\n\n
\n
\n\n\n
\n Scientific experiments based on computer simulations can be defined, executed and monitored using Scientific Workflow Management Systems (SWfMS). Several SWfMS are available, each with a different goal and a different engine. Due to the exploratory analysis, scientists need to run parameter sweep (PS) workflows, which are workflows that are invoked repeatedly using different input data. These workflows generate a large amount of tasks that are submitted to High Performance Computing (HPC) environments. Different execution models for a workflow may have significant differences in performance in HPC. However, selecting the best execution model for a given workflow is difficult due to the existence of many characteristics of the workflow that may affect the parallel execution. We developed a study to show performance impacts of using different execution models in running PS workflows in HPC. Our study contributes by presenting a characterization of PS workflow patterns (the basis for many existing scientific workflows) and its behavior under different execution models in HPC. We evaluated four execution models to run workflows in parallel. Our study measures the performance behavior of small, large and complex workflows among the evaluated execution models. The results can be used as a guideline to select the best model for a given scientific workflow execution in HPC. Our evaluation may also serve as a basis for workflow designers to analyze the expected behavior of an HPC workflow engine based on the characteristics of PS workflows.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n MTCProv: a practical provenance query framework for many-task scientific computing.\n \n \n \n\n\n \n Gadelha, L. M. R.; Wilde, M.; Mattoso, M.; and Foster, I.\n\n\n \n\n\n\n Distributed and Parallel Databases, 30(5-6): 351–370. 2012.\n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{gadelha_mtcprov:_2012,\n\ttitle = {{MTCProv}: a practical provenance query framework for many-task scientific computing},\n\tvolume = {30},\n\tissn = {0926-8782, 1573-7578},\n\tshorttitle = {{MTCProv}},\n\tdoi = {10.1007/s10619-012-7104-4},\n\tabstract = {Scientific research is increasingly assisted by computer-based experiments. Such experiments are often composed of a vast number of loosely-coupled computational tasks that are specified and automated as scientific workflows. This large scale is also characteristic of the data that flows within such “many-task” computations (MTC). Provenance information can record the behavior of such computational experiments via the lineage of process and data artifacts. However, work to date has focused on lineage data models, leaving unsolved issues of recording and querying other aspects, such as domain-specific information about the experiments, MTC behavior given by resource consumption and failure information, or the impact of environment on performance and accuracy. In this work we contribute with MTCProv, a provenance query framework for many-task scientific computing that captures the runtime execution details of MTC workflow tasks on parallel and distributed systems, in addition to standard prospective and data derivation provenance. To help users query provenance data we provide a high level interface that hides relational query complexities. We evaluate MTCProv using an application in protein science, and describe how important query patterns such as correlations between provenance, runtime data, and scientific parameters are simplified and expressed.},\n\tlanguage = {en},\n\tnumber = {5-6},\n\tjournal = {Distributed and Parallel Databases},\n\tauthor = {Gadelha, Luiz M. R. and Wilde, Michael and Mattoso, Marta and Foster, Ian},\n\tyear = {2012},\n\tkeywords = {Data structures, Database Management, Information Systems Applications (incl. Internet), Many-task computing, Memory Structures, Operating Systems, Parallel and distributed computing, database queries, provenance},\n\tpages = {351--370},\n}\n\n
\n
\n\n\n
\n Scientific research is increasingly assisted by computer-based experiments. Such experiments are often composed of a vast number of loosely-coupled computational tasks that are specified and automated as scientific workflows. This large scale is also characteristic of the data that flows within such “many-task” computations (MTC). Provenance information can record the behavior of such computational experiments via the lineage of process and data artifacts. However, work to date has focused on lineage data models, leaving unsolved issues of recording and querying other aspects, such as domain-specific information about the experiments, MTC behavior given by resource consumption and failure information, or the impact of environment on performance and accuracy. In this work we contribute with MTCProv, a provenance query framework for many-task scientific computing that captures the runtime execution details of MTC workflow tasks on parallel and distributed systems, in addition to standard prospective and data derivation provenance. To help users query provenance data we provide a high level interface that hides relational query complexities. We evaluate MTCProv using an application in protein science, and describe how important query patterns such as correlations between provenance, runtime data, and scientific parameters are simplified and expressed.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Cloud-based Phylogenomic Inference of Evolutionary Relationships: A Performance Study.\n \n \n \n\n\n \n Oliveira, D.; Ocaña, K. A. C. S.; Ogasawara, E.; Dias, J.; Goncalves, J.; and Mattoso, M.\n\n\n \n\n\n\n In Proceedings of the 2nd International Workshop on Cloud Computing and Scientific Applications (CCSA), Ottawa, Canadá, 2012. \n 00003\n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{oliveira_cloud-based_2012,\n\taddress = {Ottawa, Canadá},\n\ttitle = {Cloud-based {Phylogenomic} {Inference} of {Evolutionary} {Relationships}: {A} {Performance} {Study}},\n\tbooktitle = {Proceedings of the 2nd {International} {Workshop} on {Cloud} {Computing} and {Scientific} {Applications} ({CCSA})},\n\tauthor = {Oliveira, Daniel and Ocaña, Kary A. C. S. and Ogasawara, Eduardo and Dias, Jonas and Goncalves, Joao and Mattoso, Marta},\n\tyear = {2012},\n\tnote = {00003},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n A Provenance-based Adaptive Scheduling Heuristic for Parallel Scientific Workflows in Clouds.\n \n \n \n\n\n \n Oliveira, D. d.; Ocaña, K. A. C. S.; Baião, F.; and Mattoso, M.\n\n\n \n\n\n\n Journal of Grid Computing, 10(3): 521–552. 2012.\n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{oliveira_provenance-based_2012,\n\ttitle = {A {Provenance}-based {Adaptive} {Scheduling} {Heuristic} for {Parallel} {Scientific} {Workflows} in {Clouds}},\n\tvolume = {10},\n\tissn = {1570-7873, 1572-9184},\n\tdoi = {10.1007/s10723-012-9227-2},\n\tabstract = {In the last years, scientific workflows have emerged as a fundamental abstraction for structuring and executing scientific experiments in computational environments. Scientific workflows are becoming increasingly complex and more demanding in terms of computational resources, thus requiring the usage of parallel techniques and high performance computing (HPC) environments. Meanwhile, clouds have emerged as a new paradigm where resources are virtualized and provided on demand. By using clouds, scientists have expanded beyond single parallel computers to hundreds or even thousands of virtual machines. Although the initial focus of clouds was to provide high throughput computing, clouds are already being used to provide an HPC environment where elastic resources can be instantiated on demand during the course of a scientific workflow. However, this model also raises many open, yet important, challenges such as scheduling workflow activities. Scheduling parallel scientific workflows in the cloud is a very complex task since we have to take into account many different criteria and to explore the elasticity characteristic for optimizing workflow execution. In this paper, we introduce an adaptive scheduling heuristic for parallel execution of scientific workflows in the cloud that is based on three criteria: total execution time (makespan), reliability and financial cost. Besides scheduling workflow activities based on a 3-objective cost model, this approach also scales resources up and down according to the restrictions imposed by scientists before workflow execution. This tuning is based on provenance data captured and queried at runtime. We conducted a thorough validation of our approach using a real bioinformatics workflow. The experiments were performed in SciCumulus, a cloud workflow engine for managing scientific workflow execution.},\n\tlanguage = {en},\n\tnumber = {3},\n\turldate = {2013-08-28},\n\tjournal = {Journal of Grid Computing},\n\tauthor = {Oliveira, Daniel de and Ocaña, Kary A. C. S. and Baião, Fernanda and Mattoso, Marta},\n\tyear = {2012},\n\tkeywords = {Management of Computing and Information Systems, Processor Architectures, Scientific experiment, Scientific workflow, User Interfaces and Human Computer Interaction, cloud computing, provenance},\n\tpages = {521--552},\n}\n\n
\n
\n\n\n
\n In the last years, scientific workflows have emerged as a fundamental abstraction for structuring and executing scientific experiments in computational environments. Scientific workflows are becoming increasingly complex and more demanding in terms of computational resources, thus requiring the usage of parallel techniques and high performance computing (HPC) environments. Meanwhile, clouds have emerged as a new paradigm where resources are virtualized and provided on demand. By using clouds, scientists have expanded beyond single parallel computers to hundreds or even thousands of virtual machines. Although the initial focus of clouds was to provide high throughput computing, clouds are already being used to provide an HPC environment where elastic resources can be instantiated on demand during the course of a scientific workflow. However, this model also raises many open, yet important, challenges such as scheduling workflow activities. Scheduling parallel scientific workflows in the cloud is a very complex task since we have to take into account many different criteria and to explore the elasticity characteristic for optimizing workflow execution. In this paper, we introduce an adaptive scheduling heuristic for parallel execution of scientific workflows in the cloud that is based on three criteria: total execution time (makespan), reliability and financial cost. Besides scheduling workflow activities based on a 3-objective cost model, this approach also scales resources up and down according to the restrictions imposed by scientists before workflow execution. This tuning is based on provenance data captured and queried at runtime. We conducted a thorough validation of our approach using a real bioinformatics workflow. The experiments were performed in SciCumulus, a cloud workflow engine for managing scientific workflow execution.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Recomendações para Fragmentação Horizontal de Bases de Dados XML.\n \n \n \n\n\n \n Silva, T.; Baião, F.; Sampaio, J.; Mattoso, M.; and Braganholo, V.\n\n\n \n\n\n\n In pages 145–152, São Paulo, Brasil, 2012. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{silva_recomendacoes_2012,\n\taddress = {São Paulo, Brasil},\n\ttitle = {Recomendações para {Fragmentação} {Horizontal} de {Bases} de {Dados} {XML}},\n\tauthor = {Silva, Tatiane and Baião, Fernanda and Sampaio, Jonice and Mattoso, Marta and Braganholo, Vanessa},\n\tyear = {2012},\n\tpages = {145--152},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Exploring Molecular Evolution Reconstruction Using a Parallel Cloud Based Scientific Workflow.\n \n \n \n \n\n\n \n Ocaña, K. A. C. S.; Oliveira, D. d.; Horta, F.; Dias, J.; Ogasawara, E.; and Mattoso, M.\n\n\n \n\n\n\n In Souto, M. C. d.; and Kann, M. G., editor(s), Advances in Bioinformatics and Computational Biology, of Lecture Notes in Computer Science, pages 179–191. Springer Berlin Heidelberg, January 2012.\n \n\n\n\n
\n\n\n\n \n \n \"ExploringPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@incollection{ocana_exploring_2012,\n\tseries = {Lecture {Notes} in {Computer} {Science}},\n\ttitle = {Exploring {Molecular} {Evolution} {Reconstruction} {Using} a {Parallel} {Cloud} {Based} {Scientific} {Workflow}},\n\tcopyright = {©2012 Springer-Verlag Berlin Heidelberg},\n\tisbn = {978-3-642-31926-6 978-3-642-31927-3},\n\turl = {http://link.springer.com/chapter/10.1007/978-3-642-31927-3_16},\n\tabstract = {Recent studies of evolution at molecular level address two important issues: reconstruction of the evolutionary relationships between species and investigation of the forces of the evolutionary process. Both issues experienced an explosive growth in the last two decades due to massive generation of genomic data, novel statistical methods and computational approaches to process and analyze this large volume of data. Most experiments in molecular evolution are based on computing intensive simulations preceded by other computation tools and post-processed by computing validators. All these tools can be modeled as scientific workflows to improve the experiment management while capturing provenance data. However, these evolutionary analyses experiments are very complex and may execute for weeks. These workflows need to be executed in parallel in High Performance Computing (HPC) environments such as clouds. Clouds are becoming adopted for bioinformatics experiments due to its characteristics, such as, elasticity and availability. Clouds are evolving into HPC environments. In this paper, we introduce SciEvol, a bioinformatics scientific workflow for molecular evolution reconstruction that aims at inferring evolutionary relationships (i.e. to detect positive Darwinian selection) on genomic data. SciEvol is designed and implemented to execute in parallel over the clouds using SciCumulus workflow engine. Our experiments show that SciEvol can help scientists by enabling the reconstruction of evolutionary relationships using the cloud environment. Results present performance improvements of up to 94.64\\% in the execution time when compared to the sequential execution, which drops from around 10 days to 12 hours.},\n\tnumber = {7409},\n\turldate = {2014-02-18},\n\tbooktitle = {Advances in {Bioinformatics} and {Computational} {Biology}},\n\tpublisher = {Springer Berlin Heidelberg},\n\tauthor = {Ocaña, Kary A. C. S. and Oliveira, Daniel de and Horta, Felipe and Dias, Jonas and Ogasawara, Eduardo and Mattoso, Marta},\n\teditor = {Souto, Marcilio C. de and Kann, Maricel G.},\n\tmonth = jan,\n\tyear = {2012},\n\tkeywords = {Algorithm Analysis and Problem Complexity, Artificial Intelligence (incl. Robotics), Cloud, Computation by Abstract Devices, Computational Biology/Bioinformatics, Database Management, Molecular Evolution Reconstruction, Pattern Recognition, Scientific workflow},\n\tpages = {179--191},\n}\n\n
\n
\n\n\n
\n Recent studies of evolution at molecular level address two important issues: reconstruction of the evolutionary relationships between species and investigation of the forces of the evolutionary process. Both issues experienced an explosive growth in the last two decades due to massive generation of genomic data, novel statistical methods and computational approaches to process and analyze this large volume of data. Most experiments in molecular evolution are based on computing intensive simulations preceded by other computation tools and post-processed by computing validators. All these tools can be modeled as scientific workflows to improve the experiment management while capturing provenance data. However, these evolutionary analyses experiments are very complex and may execute for weeks. These workflows need to be executed in parallel in High Performance Computing (HPC) environments such as clouds. Clouds are becoming adopted for bioinformatics experiments due to its characteristics, such as, elasticity and availability. Clouds are evolving into HPC environments. In this paper, we introduce SciEvol, a bioinformatics scientific workflow for molecular evolution reconstruction that aims at inferring evolutionary relationships (i.e. to detect positive Darwinian selection) on genomic data. SciEvol is designed and implemented to execute in parallel over the clouds using SciCumulus workflow engine. Our experiments show that SciEvol can help scientists by enabling the reconstruction of evolutionary relationships using the cloud environment. Results present performance improvements of up to 94.64% in the execution time when compared to the sequential execution, which drops from around 10 days to 12 hours.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2011\n \n \n (16)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n A performance evaluation of x-ray crystallography scientific workflow using scicumulus.\n \n \n \n \n\n\n \n de Oliveira, D.; Ocana, K.; Ogasawara, E.; Dias, J.; Baiao, F.; and Mattoso, M.\n\n\n \n\n\n\n In Cloud Computing (CLOUD), 2011 IEEE International Conference on, pages 708–715, 2011. \n \n\n\n\n
\n\n\n\n \n \n \"APaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{de_oliveira_performance_2011,\n\ttitle = {A performance evaluation of x-ray crystallography scientific workflow using scicumulus},\n\turl = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6008774},\n\turldate = {2013-08-15},\n\tbooktitle = {Cloud {Computing} ({CLOUD}), 2011 {IEEE} {International} {Conference} on},\n\tauthor = {de Oliveira, Daniel and Ocana, Kary and Ogasawara, Eduardo and Dias, Jonas and Baiao, Fernanda and Mattoso, Marta},\n\tyear = {2011},\n\tpages = {708--715},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Supporting Dynamic Parameter Sweep in Adaptive and User-Steered Workflow.\n \n \n \n\n\n \n Dias, J.; Ogasawara, E.; Oliveira, D.; Porto, F.; Coutinho, A.; and Mattoso, M.\n\n\n \n\n\n\n In WORKS, of WORKS '11, pages 31–36, Seattle, WA, USA, 2011. ACM\n ACM ID: 1645178\n\n\n\n
\n\n\n\n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{dias_supporting_2011,\n\taddress = {Seattle, WA, USA},\n\tseries = {{WORKS} '11},\n\ttitle = {Supporting {Dynamic} {Parameter} {Sweep} in {Adaptive} and {User}-{Steered} {Workflow}},\n\tisbn = {978-1-4503-1100-7},\n\tdoi = {10.1145/2110497.2110502},\n\tbooktitle = {{WORKS}},\n\tpublisher = {ACM},\n\tauthor = {Dias, Jonas and Ogasawara, Eduardo and Oliveira, Daniel and Porto, Fabio and Coutinho, Alvaro and Mattoso, Marta},\n\tyear = {2011},\n\tnote = {ACM ID: 1645178},\n\tkeywords = {Workflow management, design, logistics, management, strategic information systems planning, systems analysis and design, systems development, theory},\n\tpages = {31--36},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Provenance Query Patterns for Many-Task Scientific Computing.\n \n \n \n\n\n \n Gadelha, L.; Mattoso, M.; Wilde, M.; and Foster, I.\n\n\n \n\n\n\n In USENIX Workshop on the Theory and Practice of Provenance, Heraklion, Crete, Greece, 2011. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{gadelha_provenance_2011,\n\taddress = {Heraklion, Crete, Greece},\n\ttitle = {Provenance {Query} {Patterns} for {Many}-{Task} {Scientific} {Computing}},\n\tbooktitle = {{USENIX} {Workshop} on the {Theory} and {Practice} of {Provenance}},\n\tauthor = {Gadelha, Luiz and Mattoso, Marta and Wilde, Michael and Foster, Ian},\n\tyear = {2011},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n SciPhy: A Cloud-Based Workflow for Phylogenetic Analysis of Drug Targets in Protozoan Genomes.\n \n \n \n \n\n\n \n Ocaña, K.; Oliveira, D. d.; Ogasawara, E.; Dávila, A.; Lima, A.; and Mattoso, M.\n\n\n \n\n\n\n In Advances in Bioinformatics and Computational Biology, of Lecture Notes in Computer Science, pages 66–70, 2011. Springer\n \n\n\n\n
\n\n\n\n \n \n \"SciPhy:Paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{ocana_sciphy:_2011,\n\tseries = {Lecture {Notes} in {Computer} {Science}},\n\ttitle = {{SciPhy}: {A} {Cloud}-{Based} {Workflow} for {Phylogenetic} {Analysis} of {Drug} {Targets} in {Protozoan} {Genomes}},\n\tcopyright = {©2011 Springer-Verlag GmbH Berlin Heidelberg},\n\tisbn = {978-3-642-22824-7 978-3-642-22825-4},\n\tshorttitle = {{SciPhy}},\n\turl = {http://link.springer.com/chapter/10.1007/978-3-642-22825-4_9},\n\tabstract = {Bioinformatics experiments are rapidly evolving with genomic projects that analyze large amounts of data. This fact demands high performance computation and opens up for exploring new approaches to provide better control and performance when running experiments, including Phylogeny/Phylogenomics. We designed a phylogenetic scientific workflow, named SciPhy, to construct phylogenetic trees from a set of drug target enzymes found in protozoan genomes. Our contribution is the development, implementation and test of SciPhy in public cloud computing environments. SciPhy can be used in other Bioinformatics experiments to control a systematic execution with high performance while producing provenance data.},\n\tlanguage = {en},\n\turldate = {2014-03-14},\n\tbooktitle = {Advances in {Bioinformatics} and {Computational} {Biology}},\n\tpublisher = {Springer},\n\tauthor = {Ocaña, Kary and Oliveira, Daniel de and Ogasawara, Eduardo and Dávila, A. and Lima, A. and Mattoso, Marta},\n\tyear = {2011},\n\tkeywords = {Algorithm Analysis and Problem Complexity, Artificial Intelligence (incl. Robotics), Computation by Abstract Devices, Computational Biology/Bioinformatics, Database Management, Pattern Recognition, Phylogeny, Protozoa, Scientific workflow, cloud computing},\n\tpages = {66--70},\n}\n\n
\n
\n\n\n
\n Bioinformatics experiments are rapidly evolving with genomic projects that analyze large amounts of data. This fact demands high performance computation and opens up for exploring new approaches to provide better control and performance when running experiments, including Phylogeny/Phylogenomics. We designed a phylogenetic scientific workflow, named SciPhy, to construct phylogenetic trees from a set of drug target enzymes found in protozoan genomes. Our contribution is the development, implementation and test of SciPhy in public cloud computing environments. SciPhy can be used in other Bioinformatics experiments to control a systematic execution with high performance while producing provenance data.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Virtual Partitioning ad-hoc Queries over Distributed XML Databases.\n \n \n \n\n\n \n Rodrigues, C.; Braganholo, V.; and Mattoso, M.\n\n\n \n\n\n\n Journal of Information and Data Management, 2(3): 495–510. 2011.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{rodrigues_virtual_2011,\n\ttitle = {Virtual {Partitioning} ad-hoc {Queries} over {Distributed} {XML} {Databases}},\n\tvolume = {2},\n\tissn = {21787107},\n\tnumber = {3},\n\tjournal = {Journal of Information and Data Management},\n\tauthor = {Rodrigues, Carla and Braganholo, Vanessa and Mattoso, Marta},\n\tyear = {2011},\n\tkeywords = {XML, distributed query processing, virtual partitioning},\n\tpages = {495--510},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Migrating Scientific Experiments to the Cloud.\n \n \n \n\n\n \n Oliveira, D.; Baiao, F.; and Mattoso, M.\n\n\n \n\n\n\n HPC in the Cloud. March 2011.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{oliveira_migrating_2011,\n\ttitle = {Migrating {Scientific} {Experiments} to the {Cloud}},\n\tjournal = {HPC in the Cloud},\n\tauthor = {Oliveira, Daniel and Baiao, Fernanda and Mattoso, Marta},\n\tmonth = mar,\n\tyear = {2011},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Provenance management in Swift.\n \n \n \n \n\n\n \n Gadelha, L. M.; Clifford, B.; Mattoso, M.; Wilde, M.; and Foster, I.\n\n\n \n\n\n\n Future Generation Computer Systems, 27(6): 775–780. June 2011.\n \n\n\n\n
\n\n\n\n \n \n \"ProvenancePaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{gadelha_provenance_2011-1,\n\ttitle = {Provenance management in {Swift}},\n\tvolume = {27},\n\tissn = {0167-739X},\n\turl = {http://www.sciencedirect.com/science/article/pii/S0167739X1000083X},\n\tdoi = {16/j.future.2010.05.003},\n\tabstract = {{\\textless}p{\\textgreater}{\\textless}br/{\\textgreater}The Swift parallel scripting language allows for the specification, execution and analysis of large-scale computations in parallel and distributed environments. It incorporates a data model for recording and querying provenance information. In this article we describe these capabilities and evaluate the interoperability with other systems through the use of the Open Provenance Model. We describe Swift's provenance data model and compare it to the Open Provenance Model. We also describe and evaluate activities performed within the Third Provenance Challenge, which consisted of implementing a specific scientific workflow, capturing and recording provenance information of its execution, performing provenance queries, and exchanging provenance information with other systems. Finally, we propose improvements to both the Open Provenance Model and Swift's provenance system.{\\textless}/p{\\textgreater}},\n\tnumber = {6},\n\turldate = {2011-06-04},\n\tjournal = {Future Generation Computer Systems},\n\tauthor = {Gadelha, Luiz M.R. and Clifford, Ben and Mattoso, Marta and Wilde, Michael and Foster, Ian},\n\tmonth = jun,\n\tyear = {2011},\n\tkeywords = {Parallel scripting languages, Scientific workflows, provenance},\n\tpages = {775--780},\n}\n\n
\n
\n\n\n
\n \\textlessp\\textgreater\\textlessbr/\\textgreaterThe Swift parallel scripting language allows for the specification, execution and analysis of large-scale computations in parallel and distributed environments. It incorporates a data model for recording and querying provenance information. In this article we describe these capabilities and evaluate the interoperability with other systems through the use of the Open Provenance Model. We describe Swift's provenance data model and compare it to the Open Provenance Model. We also describe and evaluate activities performed within the Third Provenance Challenge, which consisted of implementing a specific scientific workflow, capturing and recording provenance information of its execution, performing provenance queries, and exchanging provenance information with other systems. Finally, we propose improvements to both the Open Provenance Model and Swift's provenance system.\\textless/p\\textgreater\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n A Performance Evaluation of X-Ray Crystallography Scientific Workflow Using SciCumulus.\n \n \n \n\n\n \n Oliveira, D.; Ocaña, K.; Ogasawara, E.; Dias, J.; Baião, F.; and Mattoso, M.\n\n\n \n\n\n\n In IEEE International Conference on Cloud Computing (CLOUD), pages 708–715, Washington, D.C., USA, July 2011. IEEE\n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{oliveira_performance_2011,\n\taddress = {Washington, D.C., USA},\n\ttitle = {A {Performance} {Evaluation} of {X}-{Ray} {Crystallography} {Scientific} {Workflow} {Using} {SciCumulus}},\n\tisbn = {978-1-4577-0836-7},\n\tdoi = {10.1109/CLOUD.2011.99},\n\tabstract = {X-ray crystallography is an important field due to its role in drug discovery and its relevance in bioinformatics experiments of comparative genomics, phylogenomics, evolutionary analysis, ortholog detection, and three-dimensional structure determination. Managing these experiments is a challenging task due to the orchestration of legacy tools and the management of several variations of the same experiment. Workflows can model a coherent flow of activities that are managed by scientific workflow management systems (SWfMS). Due to the huge amount of variations of the workflow to be explored (parameters, input data) it is often necessary to execute X-ray crystallography experiments in High Performance Computing (HPC) environments. Cloud computing is well known for its scalable and elastic HPC model. In this paper, we present a performance evaluation for the X-ray crystallography workflow defined by the PC4 (Provenance Challenge series). The workflow was executed using the SciCumulus middleware at the Amazon EC2 cloud environment. SciCumulus is a layer for SWfMS that offers support for the parallel execution of scientific workflows in cloud environments with provenance mechanisms. Our results reinforce the benefits (total execution time × monetary cost) of parallelizing the X-ray crystallography workflow using SciCumulus. The results show a consistent way to execute X-ray crystallography workflows that need HPC using cloud computing. The evaluated workflow shares features of many scientific workflows and can be applied to other experiments.},\n\tlanguage = {English},\n\tbooktitle = {{IEEE} {International} {Conference} on {Cloud} {Computing} ({CLOUD})},\n\tpublisher = {IEEE},\n\tauthor = {Oliveira, D. and Ocaña, K. and Ogasawara, E. and Dias, Jonas and Baião, F. and Mattoso, M.},\n\tmonth = jul,\n\tyear = {2011},\n\tkeywords = {Amazon EC2 cloud environment, Middleware, Phylogenomics, Reflection, SciCumulus middleware, Scientific workflows, X-ray crystallography, X-ray crystallography scientific workflow, X-ray diffraction, X-ray imaging, bioinformatics, cloud computing, crystallography, drug discovery, evolutionary analysis, execution time, high performance computing, monetary cost, ortholog detection, parallel execution, performance evaluation, provenance challenge series, scalable elastic HPC model, scientific information systems, scientific workflow management system, three-dimensional structure determination, workflow management software},\n\tpages = {708--715},\n}\n\n
\n
\n\n\n
\n X-ray crystallography is an important field due to its role in drug discovery and its relevance in bioinformatics experiments of comparative genomics, phylogenomics, evolutionary analysis, ortholog detection, and three-dimensional structure determination. Managing these experiments is a challenging task due to the orchestration of legacy tools and the management of several variations of the same experiment. Workflows can model a coherent flow of activities that are managed by scientific workflow management systems (SWfMS). Due to the huge amount of variations of the workflow to be explored (parameters, input data) it is often necessary to execute X-ray crystallography experiments in High Performance Computing (HPC) environments. Cloud computing is well known for its scalable and elastic HPC model. In this paper, we present a performance evaluation for the X-ray crystallography workflow defined by the PC4 (Provenance Challenge series). The workflow was executed using the SciCumulus middleware at the Amazon EC2 cloud environment. SciCumulus is a layer for SWfMS that offers support for the parallel execution of scientific workflows in cloud environments with provenance mechanisms. Our results reinforce the benefits (total execution time × monetary cost) of parallelizing the X-ray crystallography workflow using SciCumulus. The results show a consistent way to execute X-ray crystallography workflows that need HPC using cloud computing. The evaluated workflow shares features of many scientific workflows and can be applied to other experiments.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Optimizing Phylogenetic Analysis Using SciHmm Cloud-based Scientific Workflow.\n \n \n \n\n\n \n Ocaña, K. A. C. S.; Oliveira, D.; Dias, J.; Ogasawara, E.; and Mattoso, M.\n\n\n \n\n\n\n In Proceedings of the 7th IEEE International Conference on e-Science (e-Science), pages 190–197, Stockholm, Sweden, December 2011. IEEE\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{ocana_optimizing_2011,\n\taddress = {Stockholm, Sweden},\n\ttitle = {Optimizing {Phylogenetic} {Analysis} {Using} {SciHmm} {Cloud}-based {Scientific} {Workflow}},\n\tlanguage = {English},\n\tbooktitle = {Proceedings of the 7th {IEEE} {International} {Conference} on e-{Science} (e-{Science})},\n\tpublisher = {IEEE},\n\tauthor = {Ocaña, Kary A. C. S. and Oliveira, Daniel and Dias, Jonas and Ogasawara, Eduardo and Mattoso, Marta},\n\tmonth = dec,\n\tyear = {2011},\n\tkeywords = {Auditing, Computational modeling, Computers, Distributed databases, Electronic mail, Pedigree, Sketch, authoritative local repository, central server, data models, data-intensive scientific applications, decentralized architecture, distributed, distributed data provenance, lineage, meta data, metadata, monitoring, provenance, provenance management system, provenance records, query formulation, querying, remote hosts, tracking},\n\tpages = {190--197},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Heurísticas para Controle de Execução de Atividades de Workflows Científicos na Nuvem.\n \n \n \n\n\n \n Costa, F.; Oliveira, D.; and M., M.\n\n\n \n\n\n\n In Anais do Workshop de Teses e Dissertações em bancos de Dados - SBBD 2011, Florianópolis, SC, Brasil, 2011. Sociedade Brasileira de Computação\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{costa_heuristicas_2011,\n\taddress = {Florianópolis, SC, Brasil},\n\ttitle = {Heurísticas para {Controle} de {Execução} de {Atividades} de  {Workflows} {Científicos} na {Nuvem}},\n\tbooktitle = {Anais do {Workshop} de {Teses} e {Dissertações} em bancos de {Dados} - {SBBD} 2011},\n\tpublisher = {Sociedade Brasileira de Computação},\n\tauthor = {Costa, Flávio and Oliveira, D. and Mattoso M.},\n\tyear = {2011},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Exploring Provenance in High Performance Scientific Computing.\n \n \n \n \n\n\n \n Gadelha, J.; Wilde, M.; Mattoso, M.; and Foster, I.\n\n\n \n\n\n\n In Proceedings of the First Annual Workshop on High Performance Computing Meets Databases, of HPCDB '11, pages 17–20, New York, NY, USA, 2011. ACM\n \n\n\n\n
\n\n\n\n \n \n \"ExploringPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{gadelhajunior_exploring_2011,\n\taddress = {New York, NY, USA},\n\tseries = {{HPCDB} '11},\n\ttitle = {Exploring {Provenance} in {High} {Performance} {Scientific} {Computing}},\n\tisbn = {978-1-4503-1157-1},\n\turl = {http://doi.acm.org/10.1145/2125636.2125643},\n\tdoi = {10.1145/2125636.2125643},\n\tabstract = {Large-scale scientific computations are often organized as a composition of many computational tasks linked through data flow. After the completion of a computational scientific experiment, a scientist has to analyze its outcome, for instance, by checking inputs and outputs of computational tasks that are part of the experiment. This analysis can be automated using provenance management systems that describe, for instance, the production and consumption relationships between data artifacts, such as files, and the computational tasks that compose the scientific application. In this article, we explore the relationship between high performance computing and provenance management systems, observing that storing provenance as structured data enriched with information about the runtime behavior of computational tasks in high performance computing environments can enable interesting and useful queries to correlate computational resource usage, scientific parameters, and data set derivation. We briefly describe how provenance of many-task scientific computations specified and coordinated by the Swift parallel scripting system is gathered and queried.},\n\turldate = {2014-05-03},\n\tbooktitle = {Proceedings of the {First} {Annual} {Workshop} on {High} {Performance} {Computing} {Meets} {Databases}},\n\tpublisher = {ACM},\n\tauthor = {Gadelha,Junior, Luiz Manoel Rocha and Wilde, Michael and Mattoso, Marta and Foster, Ian},\n\tyear = {2011},\n\tkeywords = {Databases, high performance computing, provenance},\n\tpages = {17--20},\n}\n\n
\n
\n\n\n
\n Large-scale scientific computations are often organized as a composition of many computational tasks linked through data flow. After the completion of a computational scientific experiment, a scientist has to analyze its outcome, for instance, by checking inputs and outputs of computational tasks that are part of the experiment. This analysis can be automated using provenance management systems that describe, for instance, the production and consumption relationships between data artifacts, such as files, and the computational tasks that compose the scientific application. In this article, we explore the relationship between high performance computing and provenance management systems, observing that storing provenance as structured data enriched with information about the runtime behavior of computational tasks in high performance computing environments can enable interesting and useful queries to correlate computational resource usage, scientific parameters, and data set derivation. We briefly describe how provenance of many-task scientific computations specified and coordinated by the Swift parallel scripting system is gathered and queried.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Challenges in managing implicit and abstract provenance data: experiences with ProvManager.\n \n \n \n\n\n \n Marinho, A.; Mattoso, M.; Werner, C.; Braganholo, V.; and Murta, L.\n\n\n \n\n\n\n In TaPP, pages 1–6, Crete, Greece, 2011. USENIX Association\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{marinho_challenges_2011,\n\taddress = {Crete, Greece},\n\ttitle = {Challenges in managing implicit and abstract provenance data: experiences with {ProvManager}},\n\tlanguage = {en},\n\tbooktitle = {{TaPP}},\n\tpublisher = {USENIX Association},\n\tauthor = {Marinho, Anderson and Mattoso, Marta and Werner, Claudia and Braganholo, Vanessa and Murta, Leonardo},\n\tyear = {2011},\n\tpages = {1--6},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Distributed Database Research at COPPE/UFRJ.\n \n \n \n\n\n \n Mattoso, M.; Braganholo, V.; Lima, A.; and Murta, L.\n\n\n \n\n\n\n Journal of Information and Data Management (JIDM), 2(2): 123–138. 2011.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{mattoso_distributed_2011,\n\ttitle = {Distributed {Database} {Research} at {COPPE}/{UFRJ}},\n\tvolume = {2},\n\tnumber = {2},\n\tjournal = {Journal of Information and Data Management (JIDM)},\n\tauthor = {Mattoso, Marta and Braganholo, Vanessa and Lima, Alexandre and Murta, Leonardo},\n\tyear = {2011},\n\tpages = {123--138},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n An Algebraic Approach for Data-Centric Scientific Workflows.\n \n \n \n\n\n \n Ogasawara, E.; Dias, J.; Oliveira, D.; Porto, F.; Valduriez, P.; and Mattoso, M.\n\n\n \n\n\n\n PVLDB, 4(12): 1328–1339. 2011.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{ogasawara_algebraic_2011,\n\ttitle = {An {Algebraic} {Approach} for {Data}-{Centric} {Scientific} {Workflows}},\n\tvolume = {4},\n\tissn = {2150-8097},\n\tnumber = {12},\n\turldate = {2011-05-20},\n\tjournal = {PVLDB},\n\tauthor = {Ogasawara, Eduardo and Dias, Jonas and Oliveira, Daniel and Porto, Fabio and Valduriez, Patrick and Mattoso, Marta},\n\tyear = {2011},\n\tkeywords = {Measurement, Workflow management, design, experimentation, performance, scientific databases},\n\tpages = {1328--1339},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Adding Ontologies to Scientific Workflow Composition.\n \n \n \n\n\n \n Oliveira, D.; Ogasawara, E.; Baiao, F.; and Mattoso, M.\n\n\n \n\n\n\n In XXVI SBBD, pages 1–8, Florianópolis, SC, Brazil, 2011. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{oliveira_adding_2011,\n\taddress = {Florianópolis, SC, Brazil},\n\ttitle = {Adding {Ontologies} to {Scientific} {Workflow} {Composition}},\n\tbooktitle = {{XXVI} {SBBD}},\n\tauthor = {Oliveira, Daniel and Ogasawara, Eduardo and Baiao, Fernanda and Mattoso, Marta},\n\tyear = {2011},\n\tpages = {1--8},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Capturing Distributed Provenance Metadata from Cloud-Based Scientific Workflows.\n \n \n \n\n\n \n Paulino, C.; Cruz, S.; Oliveira, D.; Campos, M. L. M.; and Mattoso, M.\n\n\n \n\n\n\n Journal of Information and Data Management, 2(1): 43–50. 2011.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{paulino_capturing_2011,\n\ttitle = {Capturing {Distributed} {Provenance} {Metadata} from {Cloud}-{Based} {Scientific} {Workflows}},\n\tvolume = {2},\n\tnumber = {1},\n\tjournal = {Journal of Information and Data Management},\n\tauthor = {Paulino, C.E. and Cruz, S.M.S. and Oliveira, D. and Campos, M. L. M. and Mattoso, M.},\n\tyear = {2011},\n\tpages = {43--50},\n}\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2010\n \n \n (2)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Adaptive Normalization: A novel data normalization approach for non-stationary time series.\n \n \n \n\n\n \n Ogasawara, E.; Martinez, L. C; de Oliveira, D.; Zimbrão, G.; Pappa, G. L; and Mattoso, M.\n\n\n \n\n\n\n In International Joint Conference on Neural Networks (IJCNN), pages 1–8, July 2010. IEEE\n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{ogasawara_adaptive_2010,\n\ttitle = {Adaptive {Normalization}: {A} novel data normalization approach for non-stationary time series},\n\tisbn = {978-1-4244-6916-1},\n\tshorttitle = {Adaptive {Normalization}},\n\tdoi = {10.1109/IJCNN.2010.5596746},\n\tabstract = {Data normalization is a fundamental preprocessing step for mining and learning from data. However, finding an appropriated method to deal with time series normalization is not a simple task. This is because most of the traditional normalization methods make assumptions that do not hold for most time series. The first assumption is that all time series are stationary, i.e., their statistical properties, such as mean and standard deviation, do not change over time. The second assumption is that the volatility of the time series is considered uniform. None of the methods currently available in the literature address these issues. This paper proposes a new method for normalizing non-stationary heteroscedastic (with non-uniform volatility) time series. The method, named Adaptive Normalization (AN), was tested together with an Artificial Neural Network (ANN) in three forecast problems. The results were compared to other four traditional normalization methods, and showed AN improves ANN accuracy in both short- and long-term predictions.},\n\tlanguage = {English},\n\tbooktitle = {International {Joint} {Conference} on {Neural} {Networks} ({IJCNN})},\n\tpublisher = {IEEE},\n\tauthor = {Ogasawara, E. and Martinez, L. C and de Oliveira, D. and Zimbrão, G. and Pappa, G. L and Mattoso, M.},\n\tmonth = jul,\n\tyear = {2010},\n\tkeywords = {Artificial neural networks, Computational efficiency, Exchange rates, Real time systems, Time series analysis, Training, adaptive normalization, artificial neural network, data mining, data normalization, forecast problem, forecasting theory, learning, learning (artificial intelligence), long-term prediction, mean deviation, neural nets, nonstationary heteroscedastic time series, short-term prediction, standard deviation, statistical analysis, statistical property, time series},\n\tpages = {1--8},\n}\n\n
\n
\n\n\n
\n Data normalization is a fundamental preprocessing step for mining and learning from data. However, finding an appropriated method to deal with time series normalization is not a simple task. This is because most of the traditional normalization methods make assumptions that do not hold for most time series. The first assumption is that all time series are stationary, i.e., their statistical properties, such as mean and standard deviation, do not change over time. The second assumption is that the volatility of the time series is considered uniform. None of the methods currently available in the literature address these issues. This paper proposes a new method for normalizing non-stationary heteroscedastic (with non-uniform volatility) time series. The method, named Adaptive Normalization (AN), was tested together with an Artificial Neural Network (ANN) in three forecast problems. The results were compared to other four traditional normalization methods, and showed AN improves ANN accuracy in both short- and long-term predictions.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Improving Many-Task computing in scientific workflows using P2P techniques.\n \n \n \n\n\n \n Dias, J.; Ogasawara, E.; de Oliveira, D.; Pacitti, E.; and Mattoso, M.\n\n\n \n\n\n\n In Proceedings of the 3rd IEEE Workshop on Many-Task Computing on Grids and Supercomputers, pages 1–10, New Orleans, Louisiana, USA, November 2010. IEEE\n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{dias_improving_2010,\n\taddress = {New Orleans, Louisiana, USA},\n\ttitle = {Improving {Many}-{Task} computing in scientific workflows using {P2P} techniques},\n\tisbn = {978-1-4244-9704-1},\n\tdoi = {10.1109/MTAGS.2010.5699430},\n\tabstract = {Large-scale scientific experiments are usually supported by scientific workflows that may demand high performance computing infrastructure. Within a given experiment, the same workflow may be explored with different sets of parameters. However, the parallelization of the workflow instances is hard to be accomplished mainly due to the heterogeneity of its activities. Many-Task computing paradigm seems to be a candidate approach to support workflow activity parallelism. However, scheduling a huge amount of workflow activities on large clusters may be susceptible to resource failures and overloading. In this paper, we propose Heracles, an approach to apply consolidated P2P techniques to improve Many-Task computing of workflow activities on large clusters. We present a fault tolerance mechanism, a dynamic resource management and a hierarchical organization of computing nodes to handle workflow instances execution properly. We have evaluated Heracles by executing experimental analysis regarding the benefits of P2P techniques on the workflow execution time.},\n\tlanguage = {English},\n\tbooktitle = {Proceedings of the 3rd {IEEE} {Workshop} on {Many}-{Task} {Computing} on {Grids} and {Supercomputers}},\n\tpublisher = {IEEE},\n\tauthor = {Dias, Jonas and Ogasawara, E. and de Oliveira, D. and Pacitti, E. and Mattoso, M.},\n\tmonth = nov,\n\tyear = {2010},\n\tkeywords = {Processor scheduling, Scientific workflows, dynamic resource management, fault tolerance, fault tolerant computing, many task computing, overloading, p2p, parallelization, peer-to-peer computing, resource failures, scheduling, task analysis, workflow execution time, workflow management software},\n\tpages = {1--10},\n}\n\n
\n
\n\n\n
\n Large-scale scientific experiments are usually supported by scientific workflows that may demand high performance computing infrastructure. Within a given experiment, the same workflow may be explored with different sets of parameters. However, the parallelization of the workflow instances is hard to be accomplished mainly due to the heterogeneity of its activities. Many-Task computing paradigm seems to be a candidate approach to support workflow activity parallelism. However, scheduling a huge amount of workflow activities on large clusters may be susceptible to resource failures and overloading. In this paper, we propose Heracles, an approach to apply consolidated P2P techniques to improve Many-Task computing of workflow activities on large clusters. We present a fault tolerance mechanism, a dynamic resource management and a hierarchical organization of computing nodes to handle workflow instances execution properly. We have evaluated Heracles by executing experimental analysis regarding the benefits of P2P techniques on the workflow execution time.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n undefined\n \n \n (2)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Database Clusters.\n \n \n \n\n\n \n Mattoso, M.\n\n\n \n\n\n\n In Encyclopedia of database systems, pages 700–704. L. Liu, M. T. Özsu, .\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@incollection{mattoso_database_nodate,\n\ttitle = {Database {Clusters}},\n\tbooktitle = {Encyclopedia of database systems},\n\tpublisher = {L. Liu, M. T. Özsu},\n\tauthor = {Mattoso, M.},\n\tpages = {700--704},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n SciCumulus-ECM: Um Servi\\cco de Custos para a Execu\\ccão de Workflows Científicos em Nuvens Computacionais.\n \n \n \n \n\n\n \n Viana, V.; de Oliveira, D.; Ogasawara, E.; and Mattoso, M.\n\n\n \n\n\n\n . .\n \n\n\n\n
\n\n\n\n \n \n \"SciCumulus-ECM:Paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{viana_scicumulus-ecm:_nodate,\n\ttitle = {{SciCumulus}-{ECM}: {Um} {Servi}{\\textbackslash}cco de {Custos} para a {Execu}{\\textbackslash}ccão de {Workflows} {Científicos} em {Nuvens} {Computacionais}},\n\tshorttitle = {{SciCumulus}-{ECM}},\n\turl = {http://www.lbd.dcc.ufmg.br/colecoes/sbbd/2011/001.pdf},\n\turldate = {2013-08-15},\n\tauthor = {Viana, Vitor and de Oliveira, Daniel and Ogasawara, Eduardo and Mattoso, Marta},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n\n\n\n
\n\n\n \n\n \n \n \n \n\n
\n"}; document.write(bibbase_data.data);