An Apache Spark-Based Platform for Predicting the Performance of Undergraduate Students. Mai, T. L., Do, P. T., Chung, M. T., & Thoai, N. In 2019 IEEE 21st International Conference on High Performance Computing and Communications; IEEE 17th International Conference on Smart City; IEEE 5th International Conference on Data Science and Systems (HPCC/SmartCity/DSS), pages 191–199, August, 2019. ZSCC: 0000001doi abstract bibtex Nowadays, Education Data Mining (EDM) plays a very important role in higher education institutions. Plenty of algorithms have been employed to measure student's GPA in the next semester's courses. The results can be used to early identify dropout students or help students choose the elective courses which are appropriate for them. The most widely used methods are machine learning, however, the problem is the accuracy which can be changed from dataset to dataset. More importantly, the performance of prediction models can be affected by the characteristic of dataset associated with the applied model. In this paper, we build a distributed platform on Spark to predict missing grades of elective courses for undergraduate students. The paper compares several methods that are based on the combination of Collaborative Filtering & Matrix Factorization (namely Alternative Least Square). We evaluate the performance of these algorithms using a dataset provided by Ho Chi Minh University of Technology (HCMUT). The dataset consists of information about undergraduate students from 2006 to 2017. Depending on the characteristics of our dataset, the paper highlights that Alternative Least Square with non-negative constraint achieves the better results than others in comparison.
@inproceedings{mai_apache_2019,
title = {An {Apache} {Spark}-{Based} {Platform} for {Predicting} the {Performance} of {Undergraduate} {Students}},
doi = {10/ggx735},
abstract = {Nowadays, Education Data Mining (EDM) plays a very important role in higher education institutions. Plenty of algorithms have been employed to measure student's GPA in the next semester's courses. The results can be used to early identify dropout students or help students choose the elective courses which are appropriate for them. The most widely used methods are machine learning, however, the problem is the accuracy which can be changed from dataset to dataset. More importantly, the performance of prediction models can be affected by the characteristic of dataset associated with the applied model. In this paper, we build a distributed platform on Spark to predict missing grades of elective courses for undergraduate students. The paper compares several methods that are based on the combination of Collaborative Filtering \& Matrix Factorization (namely Alternative Least Square). We evaluate the performance of these algorithms using a dataset provided by Ho Chi Minh University of Technology (HCMUT). The dataset consists of information about undergraduate students from 2006 to 2017. Depending on the characteristics of our dataset, the paper highlights that Alternative Least Square with non-negative constraint achieves the better results than others in comparison.},
booktitle = {2019 {IEEE} 21st {International} {Conference} on {High} {Performance} {Computing} and {Communications}; {IEEE} 17th {International} {Conference} on {Smart} {City}; {IEEE} 5th {International} {Conference} on {Data} {Science} and {Systems} ({HPCC}/{SmartCity}/{DSS})},
author = {Mai, Thong Le and Do, Phat Thanh and Chung, Minh Thanh and Thoai, Nam},
month = aug,
year = {2019},
note = {ZSCC: 0000001},
keywords = {Apache Spark, Collaboration, Data mining, Educational Data Mining, Spark, prediction, student performance, distributed system, machine learning, Filtering, Ho Chi Minh University of Technology, Matrix decomposition, Prediction algorithms, Predictive models, Sparks, alternative least square, collaborative filtering, data mining, distributed platform, education data mining, educational administrative data processing, educational courses, educational institutions, elective courses, further education, higher education institutions, learning (artificial intelligence), least squares approximations, machine learning, matrix decomposition, matrix factorization, parallel algorithms, student GPA, undergraduate students},
pages = {191--199}
}
Downloads: 0
{"_id":"5jffFjYrFmcQEvTCh","bibbaseid":"mai-do-chung-thoai-anapachesparkbasedplatformforpredictingtheperformanceofundergraduatestudents-2019","authorIDs":[],"author_short":["Mai, T. L.","Do, P. T.","Chung, M. T.","Thoai, N."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","title":"An Apache Spark-Based Platform for Predicting the Performance of Undergraduate Students","doi":"10/ggx735","abstract":"Nowadays, Education Data Mining (EDM) plays a very important role in higher education institutions. Plenty of algorithms have been employed to measure student's GPA in the next semester's courses. The results can be used to early identify dropout students or help students choose the elective courses which are appropriate for them. The most widely used methods are machine learning, however, the problem is the accuracy which can be changed from dataset to dataset. More importantly, the performance of prediction models can be affected by the characteristic of dataset associated with the applied model. In this paper, we build a distributed platform on Spark to predict missing grades of elective courses for undergraduate students. The paper compares several methods that are based on the combination of Collaborative Filtering & Matrix Factorization (namely Alternative Least Square). We evaluate the performance of these algorithms using a dataset provided by Ho Chi Minh University of Technology (HCMUT). The dataset consists of information about undergraduate students from 2006 to 2017. Depending on the characteristics of our dataset, the paper highlights that Alternative Least Square with non-negative constraint achieves the better results than others in comparison.","booktitle":"2019 IEEE 21st International Conference on High Performance Computing and Communications; IEEE 17th International Conference on Smart City; IEEE 5th International Conference on Data Science and Systems (HPCC/SmartCity/DSS)","author":[{"propositions":[],"lastnames":["Mai"],"firstnames":["Thong","Le"],"suffixes":[]},{"propositions":[],"lastnames":["Do"],"firstnames":["Phat","Thanh"],"suffixes":[]},{"propositions":[],"lastnames":["Chung"],"firstnames":["Minh","Thanh"],"suffixes":[]},{"propositions":[],"lastnames":["Thoai"],"firstnames":["Nam"],"suffixes":[]}],"month":"August","year":"2019","note":"ZSCC: 0000001","keywords":"Apache Spark, Collaboration, Data mining, Educational Data Mining, Spark, prediction, student performance, distributed system, machine learning, Filtering, Ho Chi Minh University of Technology, Matrix decomposition, Prediction algorithms, Predictive models, Sparks, alternative least square, collaborative filtering, data mining, distributed platform, education data mining, educational administrative data processing, educational courses, educational institutions, elective courses, further education, higher education institutions, learning (artificial intelligence), least squares approximations, machine learning, matrix decomposition, matrix factorization, parallel algorithms, student GPA, undergraduate students","pages":"191–199","bibtex":"@inproceedings{mai_apache_2019,\n\ttitle = {An {Apache} {Spark}-{Based} {Platform} for {Predicting} the {Performance} of {Undergraduate} {Students}},\n\tdoi = {10/ggx735},\n\tabstract = {Nowadays, Education Data Mining (EDM) plays a very important role in higher education institutions. Plenty of algorithms have been employed to measure student's GPA in the next semester's courses. The results can be used to early identify dropout students or help students choose the elective courses which are appropriate for them. The most widely used methods are machine learning, however, the problem is the accuracy which can be changed from dataset to dataset. More importantly, the performance of prediction models can be affected by the characteristic of dataset associated with the applied model. In this paper, we build a distributed platform on Spark to predict missing grades of elective courses for undergraduate students. The paper compares several methods that are based on the combination of Collaborative Filtering \\& Matrix Factorization (namely Alternative Least Square). We evaluate the performance of these algorithms using a dataset provided by Ho Chi Minh University of Technology (HCMUT). The dataset consists of information about undergraduate students from 2006 to 2017. Depending on the characteristics of our dataset, the paper highlights that Alternative Least Square with non-negative constraint achieves the better results than others in comparison.},\n\tbooktitle = {2019 {IEEE} 21st {International} {Conference} on {High} {Performance} {Computing} and {Communications}; {IEEE} 17th {International} {Conference} on {Smart} {City}; {IEEE} 5th {International} {Conference} on {Data} {Science} and {Systems} ({HPCC}/{SmartCity}/{DSS})},\n\tauthor = {Mai, Thong Le and Do, Phat Thanh and Chung, Minh Thanh and Thoai, Nam},\n\tmonth = aug,\n\tyear = {2019},\n\tnote = {ZSCC: 0000001},\n\tkeywords = {Apache Spark, Collaboration, Data mining, Educational Data Mining, Spark, prediction, student performance, distributed system, machine learning, Filtering, Ho Chi Minh University of Technology, Matrix decomposition, Prediction algorithms, Predictive models, Sparks, alternative least square, collaborative filtering, data mining, distributed platform, education data mining, educational administrative data processing, educational courses, educational institutions, elective courses, further education, higher education institutions, learning (artificial intelligence), least squares approximations, machine learning, matrix decomposition, matrix factorization, parallel algorithms, student GPA, undergraduate students},\n\tpages = {191--199}\n}","author_short":["Mai, T. L.","Do, P. T.","Chung, M. T.","Thoai, N."],"key":"mai_apache_2019","id":"mai_apache_2019","bibbaseid":"mai-do-chung-thoai-anapachesparkbasedplatformforpredictingtheperformanceofundergraduatestudents-2019","role":"author","urls":{},"keyword":["Apache Spark","Collaboration","Data mining","Educational Data Mining","Spark","prediction","student performance","distributed system","machine learning","Filtering","Ho Chi Minh University of Technology","Matrix decomposition","Prediction algorithms","Predictive models","Sparks","alternative least square","collaborative filtering","data mining","distributed platform","education data mining","educational administrative data processing","educational courses","educational institutions","elective courses","further education","higher education institutions","learning (artificial intelligence)","least squares approximations","machine learning","matrix decomposition","matrix factorization","parallel algorithms","student GPA","undergraduate students"],"downloads":0},"bibtype":"inproceedings","biburl":"https://api.zotero.org/users/6725895/collections/I7A997Q2/items?key=aPL9p0reGZrSDB4p9mxtpzP8&format=bibtex&limit=100","creationDate":"2020-07-19T05:05:47.056Z","downloads":0,"keywords":["apache spark","collaboration","data mining","educational data mining","spark","prediction","student performance","distributed system","machine learning","filtering","ho chi minh university of technology","matrix decomposition","prediction algorithms","predictive models","sparks","alternative least square","collaborative filtering","data mining","distributed platform","education data mining","educational administrative data processing","educational courses","educational institutions","elective courses","further education","higher education institutions","learning (artificial intelligence)","least squares approximations","machine learning","matrix decomposition","matrix factorization","parallel algorithms","student gpa","undergraduate students"],"search_terms":["apache","spark","based","platform","predicting","performance","undergraduate","students","mai","do","chung","thoai"],"title":"An Apache Spark-Based Platform for Predicting the Performance of Undergraduate Students","year":2019,"dataSources":["rrP7iMs5e3NAiMaDH"]}