Performance Analysis of Machine Learning Approaches in Software Complexity Prediction

Performance Analysis of Machine Learning Approaches in Software Complexity Prediction. Moshin Reza, S., Mahfujur Rahman, M., Parvez, H., Badreddin, O., & Al Mamun, S. In Kaiser, M. S., Bandyopadhyay, A., Mahmud, M., & Ray, K., editors, Proceedings of International Conference on Trends in Computational and Cognitive Engineering, of Advances in Intelligent Systems and Computing, pages 27–39, Singapore, 2021. Springer.
doi abstract bibtex

Moshin Reza, SayedMahfujur Rahman, Md.Parvez, HasnatBadreddin, OmarAl Mamun, ShamimSoftware design is one of the core concepts in software engineering. This covers insights and intuitions of software evolution, reliability, and maintainability. Effective software design facilitates software reliability and better quality management during development which reduces software development cost. Therefore, it is required to detect and maintain these issues earlier. Class complexity is one of the ways of detecting software quality. The objective of this paper is to predict class complexity from source code metrics using machine learning (ML) approaches and compare the performance of the approaches. In order to do that, we collect ten popular and quality maintained open source repositories and extract 18 source code metrics that relate to complexity for class-level analysis. First, we apply statistical correlation to find out the source code metrics that impact most on class complexity. Second, we apply five alternative ML techniques to build complexity predictors and compare the performances. The results report that the following source code metrics: Depth inheritance tree (DIT), response for class (RFC), weighted method count (WMC), lines of code (LOC), and coupling between objects (CBO) have the most impact on class complexity. Also, we evaluate the performance of the techniques, and results show that random forest (RF) significantly improves accuracy without providing additional false negative or false positive that work as false alarms in complexity prediction.

@inproceedings{moshin_reza_performance_2021,
	address = {Singapore},
	series = {Advances in {Intelligent} {Systems} and {Computing}},
	title = {Performance {Analysis} of {Machine} {Learning} {Approaches} in {Software} {Complexity} {Prediction}},
	isbn = {978-981-334-673-4},
	doi = {10.1007/978-981-33-4673-4_3},
	abstract = {Moshin Reza, SayedMahfujur Rahman, Md.Parvez, HasnatBadreddin, OmarAl Mamun, ShamimSoftware design is one of the core concepts in software engineering. This covers insights and intuitions of software evolution, reliability, and maintainability. Effective software design facilitates software reliability and better quality management during development which reduces software development cost. Therefore, it is required to detect and maintain these issues earlier. Class complexity is one of the ways of detecting software quality. The objective of this paper is to predict class complexity from source code metrics using machine learning (ML) approaches and compare the performance of the approaches. In order to do that, we collect ten popular and quality maintained open source repositories and extract 18 source code metrics that relate to complexity for class-level analysis. First, we apply statistical correlation to find out the source code metrics that impact most on class complexity. Second, we apply five alternative ML techniques to build complexity predictors and compare the performances. The results report that the following source code metrics: Depth inheritance tree (DIT), response for class (RFC), weighted method count (WMC), lines of code (LOC), and coupling between objects (CBO) have the most impact on class complexity. Also, we evaluate the performance of the techniques, and results show that random forest (RF) significantly improves accuracy without providing additional false negative or false positive that work as false alarms in complexity prediction.},
	language = {en},
	booktitle = {Proceedings of {International} {Conference} on {Trends} in {Computational} and {Cognitive} {Engineering}},
	publisher = {Springer},
	author = {Moshin Reza, Sayed and Mahfujur Rahman, Md. and Parvez, Hasnat and Badreddin, Omar and Al Mamun, Shamim},
	editor = {Kaiser, M. Shamim and Bandyopadhyay, Anirban and Mahmud, Mufti and Ray, Kanad},
	year = {2021},
	keywords = {Machine learning, Software complexity, Software design, Software quality, Software reliability},
	pages = {27--39},
}

Downloads: 0

{"_id":"grcznXchYkGPxvwYu","bibbaseid":"moshinreza-mahfujurrahman-parvez-badreddin-almamun-performanceanalysisofmachinelearningapproachesinsoftwarecomplexityprediction-2021","author_short":["Moshin Reza, S.","Mahfujur Rahman, M.","Parvez, H.","Badreddin, O.","Al Mamun, S."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","address":"Singapore","series":"Advances in Intelligent Systems and Computing","title":"Performance Analysis of Machine Learning Approaches in Software Complexity Prediction","isbn":"978-981-334-673-4","doi":"10.1007/978-981-33-4673-4_3","abstract":"Moshin Reza, SayedMahfujur Rahman, Md.Parvez, HasnatBadreddin, OmarAl Mamun, ShamimSoftware design is one of the core concepts in software engineering. This covers insights and intuitions of software evolution, reliability, and maintainability. Effective software design facilitates software reliability and better quality management during development which reduces software development cost. Therefore, it is required to detect and maintain these issues earlier. Class complexity is one of the ways of detecting software quality. The objective of this paper is to predict class complexity from source code metrics using machine learning (ML) approaches and compare the performance of the approaches. In order to do that, we collect ten popular and quality maintained open source repositories and extract 18 source code metrics that relate to complexity for class-level analysis. First, we apply statistical correlation to find out the source code metrics that impact most on class complexity. Second, we apply five alternative ML techniques to build complexity predictors and compare the performances. The results report that the following source code metrics: Depth inheritance tree (DIT), response for class (RFC), weighted method count (WMC), lines of code (LOC), and coupling between objects (CBO) have the most impact on class complexity. Also, we evaluate the performance of the techniques, and results show that random forest (RF) significantly improves accuracy without providing additional false negative or false positive that work as false alarms in complexity prediction.","language":"en","booktitle":"Proceedings of International Conference on Trends in Computational and Cognitive Engineering","publisher":"Springer","author":[{"propositions":[],"lastnames":["Moshin","Reza"],"firstnames":["Sayed"],"suffixes":[]},{"propositions":[],"lastnames":["Mahfujur","Rahman"],"firstnames":["Md."],"suffixes":[]},{"propositions":[],"lastnames":["Parvez"],"firstnames":["Hasnat"],"suffixes":[]},{"propositions":[],"lastnames":["Badreddin"],"firstnames":["Omar"],"suffixes":[]},{"propositions":[],"lastnames":["Al","Mamun"],"firstnames":["Shamim"],"suffixes":[]}],"editor":[{"propositions":[],"lastnames":["Kaiser"],"firstnames":["M.","Shamim"],"suffixes":[]},{"propositions":[],"lastnames":["Bandyopadhyay"],"firstnames":["Anirban"],"suffixes":[]},{"propositions":[],"lastnames":["Mahmud"],"firstnames":["Mufti"],"suffixes":[]},{"propositions":[],"lastnames":["Ray"],"firstnames":["Kanad"],"suffixes":[]}],"year":"2021","keywords":"Machine learning, Software complexity, Software design, Software quality, Software reliability","pages":"27–39","bibtex":"@inproceedings{moshin_reza_performance_2021,\n\taddress = {Singapore},\n\tseries = {Advances in {Intelligent} {Systems} and {Computing}},\n\ttitle = {Performance {Analysis} of {Machine} {Learning} {Approaches} in {Software} {Complexity} {Prediction}},\n\tisbn = {978-981-334-673-4},\n\tdoi = {10.1007/978-981-33-4673-4_3},\n\tabstract = {Moshin Reza, SayedMahfujur Rahman, Md.Parvez, HasnatBadreddin, OmarAl Mamun, ShamimSoftware design is one of the core concepts in software engineering. This covers insights and intuitions of software evolution, reliability, and maintainability. Effective software design facilitates software reliability and better quality management during development which reduces software development cost. Therefore, it is required to detect and maintain these issues earlier. Class complexity is one of the ways of detecting software quality. The objective of this paper is to predict class complexity from source code metrics using machine learning (ML) approaches and compare the performance of the approaches. In order to do that, we collect ten popular and quality maintained open source repositories and extract 18 source code metrics that relate to complexity for class-level analysis. First, we apply statistical correlation to find out the source code metrics that impact most on class complexity. Second, we apply five alternative ML techniques to build complexity predictors and compare the performances. The results report that the following source code metrics: Depth inheritance tree (DIT), response for class (RFC), weighted method count (WMC), lines of code (LOC), and coupling between objects (CBO) have the most impact on class complexity. Also, we evaluate the performance of the techniques, and results show that random forest (RF) significantly improves accuracy without providing additional false negative or false positive that work as false alarms in complexity prediction.},\n\tlanguage = {en},\n\tbooktitle = {Proceedings of {International} {Conference} on {Trends} in {Computational} and {Cognitive} {Engineering}},\n\tpublisher = {Springer},\n\tauthor = {Moshin Reza, Sayed and Mahfujur Rahman, Md. and Parvez, Hasnat and Badreddin, Omar and Al Mamun, Shamim},\n\teditor = {Kaiser, M. Shamim and Bandyopadhyay, Anirban and Mahmud, Mufti and Ray, Kanad},\n\tyear = {2021},\n\tkeywords = {Machine learning, Software complexity, Software design, Software quality, Software reliability},\n\tpages = {27--39},\n}\n\n","author_short":["Moshin Reza, S.","Mahfujur Rahman, M.","Parvez, H.","Badreddin, O.","Al Mamun, S."],"editor_short":["Kaiser, M. S.","Bandyopadhyay, A.","Mahmud, M.","Ray, K."],"key":"moshin_reza_performance_2021","id":"moshin_reza_performance_2021","bibbaseid":"moshinreza-mahfujurrahman-parvez-badreddin-almamun-performanceanalysisofmachinelearningapproachesinsoftwarecomplexityprediction-2021","role":"author","urls":{},"keyword":["Machine learning","Software complexity","Software design","Software quality","Software reliability"],"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"https://api.zotero.org/users/6538716/collections/HQSSXXRW/items?key=eWyANMLLCecfw2Uc56WwG3XK&format=bibtex&limit=100","dataSources":["9EghT8XChGii7gpni"],"keywords":["machine learning","software complexity","software design","software quality","software reliability"],"search_terms":["performance","analysis","machine","learning","approaches","software","complexity","prediction","moshin reza","mahfujur rahman","parvez","badreddin","al mamun"],"title":"Performance Analysis of Machine Learning Approaches in Software Complexity Prediction","year":2021}