Noise in Mylyn Interaction Traces and Its Impact on Developers and Recommendation Systems. Soh, Z., Khomh, F., Guéhéneuc, Y., & Antoniol, G. Journal of Empirical Software Engineering (EMSE), Springer, June, 2017. 49 pages.Paper abstract bibtex Interaction traces (ITs) are developers� logs collected while developers maintain or evolve software systems. Researchers use ITs to study developers� editing styles and recommend relevant program entities when developers perform changes on source code. However, when using ITs, they make assumptions that may not necessarily be true. This article assesses the extent to which researchers� assumptions are true and examines noise in ITs. It also investigates the impact of noise on previous studies. This article describes a quasi-experiment collecting both Mylyn ITs and video-screen captures while 15 participants performed four realistic software maintenance tasks. It assesses the noise in ITs by comparing Mylyn ITs and the ITs obtained from the video captures. It proposes an approach to correct noise and uses this approach to revisit previous studies. The collected data show that Mylyn ITs can miss, on average, about 6% of the time spent by participants performing tasks and can contain, on average, about 85% of false edit events, which are not real changes to the source code. The approach to correct noise reveals about 45% of misclassification of ITs. It can improve the precision and recall of recommendation systems from the literature by up to 56% and 62%, respectively. Mylyn ITs include noise that biases subsequent studies and, thus, can prevent researchers from assisting developers effectively. They must be cleaned before use in studies and recommendation systems. The results on Mylyn ITs open new perspectives for the investigation of noise in ITs generated by other monitoring tools such as DFlow, FeedBag, and Mimec, and for future studies based on ITs.
@ARTICLE{Soh17-EMSE-MylynNoise,
author = {Z{\'e}phyrin Soh and Foutse Khomh and Yann-Ga{\"e}l Gu{\'e}h{\'e}neuc and Giuliano Antoniol},
title = {Noise in Mylyn Interaction Traces and Its Impact on Developers and Recommendation Systems},
journal = {Journal of Empirical Software Engineering ({EMSE})},
year = {2017},
month = {June},
MISSINGvolume = {XXX},
MISSINGnumber = {XXX},
pages = {1--48},
note = {49 pages.},
abstract = {Interaction traces (ITs) are developers� logs collected while developers maintain or evolve software systems. Researchers use ITs to study developers� editing styles and recommend relevant program entities when developers perform changes on source code. However, when using ITs, they make assumptions that may not necessarily be true. This article assesses the extent to which researchers� assumptions are true and examines noise in ITs. It also investigates the impact of noise on previous studies. This article describes a quasi-experiment collecting both Mylyn ITs and video-screen captures while 15 participants performed four realistic software maintenance tasks. It assesses the noise in ITs by comparing Mylyn ITs and the ITs obtained from the video captures. It proposes an approach to correct noise and uses this approach to revisit previous studies. The collected data show that Mylyn ITs can miss, on average, about 6\% of the time spent by participants performing tasks and can contain, on average, about 85\% of false edit events, which are not real changes to the source code. The approach to correct noise reveals about 45\% of misclassification of ITs. It can improve the precision and recall of recommendation systems from the literature by up to 56\% and 62\%, respectively. Mylyn ITs include noise that biases subsequent studies and, thus, can prevent researchers from assisting developers effectively. They must be cleaned before use in studies and recommendation systems. The results on Mylyn ITs open new perspectives for the investigation of noise in ITs generated by other monitoring tools such as DFlow, FeedBag, and Mimec, and for future studies based on ITs.},
MISSINGeditor = {XXX},
grant = {NSERC DG and CRC on Multi-language Systems},
keywords = {Understanding program comprehension ; EMSE},
kind = {RIAS},
language = {english},
publisher = {Springer},
url = {http://www.ptidej.net/publications/documents/EMSE17a.doc.pdf}
}
Downloads: 0
{"_id":"AmAcFzQm68FAf9i2k","bibbaseid":"soh-khomh-guhneuc-antoniol-noiseinmylyninteractiontracesanditsimpactondevelopersandrecommendationsystems-2017","downloads":0,"creationDate":"2018-01-17T20:29:42.157Z","title":"Noise in Mylyn Interaction Traces and Its Impact on Developers and Recommendation Systems","author_short":["Soh, Z.","Khomh, F.","Guéhéneuc, Y.","Antoniol, G."],"year":2017,"bibtype":"article","biburl":"http://www.yann-gael.gueheneuc.net/Work/BibBase/guehene (automatically cleaned).bib","bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Zéphyrin"],"propositions":[],"lastnames":["Soh"],"suffixes":[]},{"firstnames":["Foutse"],"propositions":[],"lastnames":["Khomh"],"suffixes":[]},{"firstnames":["Yann-Gaël"],"propositions":[],"lastnames":["Guéhéneuc"],"suffixes":[]},{"firstnames":["Giuliano"],"propositions":[],"lastnames":["Antoniol"],"suffixes":[]}],"title":"Noise in Mylyn Interaction Traces and Its Impact on Developers and Recommendation Systems","journal":"Journal of Empirical Software Engineering (EMSE)","year":"2017","month":"June","missingvolume":"XXX","missingnumber":"XXX","pages":"1–48","note":"49 pages.","abstract":"Interaction traces (ITs) are developers� logs collected while developers maintain or evolve software systems. Researchers use ITs to study developers� editing styles and recommend relevant program entities when developers perform changes on source code. However, when using ITs, they make assumptions that may not necessarily be true. This article assesses the extent to which researchers� assumptions are true and examines noise in ITs. It also investigates the impact of noise on previous studies. This article describes a quasi-experiment collecting both Mylyn ITs and video-screen captures while 15 participants performed four realistic software maintenance tasks. It assesses the noise in ITs by comparing Mylyn ITs and the ITs obtained from the video captures. It proposes an approach to correct noise and uses this approach to revisit previous studies. The collected data show that Mylyn ITs can miss, on average, about 6% of the time spent by participants performing tasks and can contain, on average, about 85% of false edit events, which are not real changes to the source code. The approach to correct noise reveals about 45% of misclassification of ITs. It can improve the precision and recall of recommendation systems from the literature by up to 56% and 62%, respectively. Mylyn ITs include noise that biases subsequent studies and, thus, can prevent researchers from assisting developers effectively. They must be cleaned before use in studies and recommendation systems. The results on Mylyn ITs open new perspectives for the investigation of noise in ITs generated by other monitoring tools such as DFlow, FeedBag, and Mimec, and for future studies based on ITs.","missingeditor":"XXX","grant":"NSERC DG and CRC on Multi-language Systems","keywords":"Understanding program comprehension ; EMSE","kind":"RIAS","language":"english","publisher":"Springer","url":"http://www.ptidej.net/publications/documents/EMSE17a.doc.pdf","bibtex":"@ARTICLE{Soh17-EMSE-MylynNoise,\n author = {Z{\\'e}phyrin Soh and Foutse Khomh and Yann-Ga{\\\"e}l Gu{\\'e}h{\\'e}neuc and Giuliano Antoniol},\n title = {Noise in Mylyn Interaction Traces and Its Impact on Developers and Recommendation Systems},\n journal = {Journal of Empirical Software Engineering ({EMSE})},\n year = {2017},\n month = {June},\n MISSINGvolume = {XXX},\n MISSINGnumber = {XXX},\n pages = {1--48},\n note = {49 pages.},\n abstract = {Interaction traces (ITs) are developers� logs collected while developers maintain or evolve software systems. Researchers use ITs to study developers� editing styles and recommend relevant program entities when developers perform changes on source code. However, when using ITs, they make assumptions that may not necessarily be true. This article assesses the extent to which researchers� assumptions are true and examines noise in ITs. It also investigates the impact of noise on previous studies. This article describes a quasi-experiment collecting both Mylyn ITs and video-screen captures while 15 participants performed four realistic software maintenance tasks. It assesses the noise in ITs by comparing Mylyn ITs and the ITs obtained from the video captures. It proposes an approach to correct noise and uses this approach to revisit previous studies. The collected data show that Mylyn ITs can miss, on average, about 6\\% of the time spent by participants performing tasks and can contain, on average, about 85\\% of false edit events, which are not real changes to the source code. The approach to correct noise reveals about 45\\% of misclassification of ITs. It can improve the precision and recall of recommendation systems from the literature by up to 56\\% and 62\\%, respectively. Mylyn ITs include noise that biases subsequent studies and, thus, can prevent researchers from assisting developers effectively. They must be cleaned before use in studies and recommendation systems. The results on Mylyn ITs open new perspectives for the investigation of noise in ITs generated by other monitoring tools such as DFlow, FeedBag, and Mimec, and for future studies based on ITs.},\n MISSINGeditor = {XXX},\n grant = {NSERC DG and CRC on Multi-language Systems},\n keywords = {Understanding program comprehension ; EMSE},\n kind = {RIAS},\n language = {english},\n publisher = {Springer},\n url = {http://www.ptidej.net/publications/documents/EMSE17a.doc.pdf}\n}\n\n","author_short":["Soh, Z.","Khomh, F.","Guéhéneuc, Y.","Antoniol, G."],"key":"Soh17-EMSE-MylynNoise","id":"Soh17-EMSE-MylynNoise","bibbaseid":"soh-khomh-guhneuc-antoniol-noiseinmylyninteractiontracesanditsimpactondevelopersandrecommendationsystems-2017","role":"author","urls":{"Paper":"http://www.ptidej.net/publications/documents/EMSE17a.doc.pdf"},"keyword":["Understanding program comprehension ; EMSE"],"metadata":{"authorlinks":{"guéhéneuc, y":"https://bibbase.org/show?bib=http://www.yann-gael.gueheneuc.net/Work/BibBase/guehene%20(automatically%20cleaned).bib"}},"downloads":0,"html":""},"search_terms":["noise","mylyn","interaction","traces","impact","developers","recommendation","systems","soh","khomh","guéhéneuc","antoniol"],"keywords":["understanding program comprehension ; emse"],"authorIDs":["xkviMnkrGBneANvMr"],"dataSources":["Sed98LbBeGaXxenrM"]}