Detecting Machine-Obfuscated Plagiarism. Foltýnek, T., Ruas, T., Scharpf, P., Meuschke, N., Schubotz, M., Grosky, W., & Gipp, B. In Sundqvist, A., Berget, G., Nolin, J., & Skjerdingstad, K. I., editors, Sustainable Digital Communities, volume 12051 LNCS, pages 816–827. Springer International Publishing, Cham, March, 2020. Paper Data Demo doi abstract bibtex 5 downloads Research on academic integrity has identified online paraphrasing tools as a severe threat to the effectiveness of plagiarism detection systems. To enable the automated identification of machine-paraphrased text, we make three contributions. First, we evaluate the effectiveness of six prominent word embedding models in combination with five classifiers for distinguishing human-written from machine-paraphrased text. The best performing classification approach achieves an accuracy of 99.0% for documents and 83.4% for paragraphs. Second, we show that the best approach outperforms human experts and established plagiarism detection systems for these classification tasks. Third, we provide a Web application that uses the best performing classification approach to indicate whether a text underwent machine-paraphrasing. The data and code of our study are openly available.
@incollection{FoltynekRSM20,
address = {Cham},
title = {Detecting {Machine}-{Obfuscated} {Plagiarism}},
volume = {12051 LNCS},
isbn = {978-3-030-43686-5 978-3-030-43687-2},
url = {paper=https://www.gipp.com/wp-content/papercite-data/pdf/foltynek2020.pdf data=https://doi.org/10.7302/bewj-qx93 demo=https://purl.org/spindetector},
abstract = {Research on academic integrity has identified online paraphrasing tools as a severe threat to the effectiveness of plagiarism detection systems. To enable the automated identification of machine-paraphrased text, we make three contributions. First, we evaluate the effectiveness of six prominent word embedding models in combination with five classifiers for distinguishing human-written from machine-paraphrased text. The best performing classification approach achieves an accuracy of 99.0\% for documents and 83.4\% for paragraphs. Second, we show that the best approach outperforms human experts and established plagiarism detection systems for these classification tasks. Third, we provide a Web application that uses the best performing classification approach to indicate whether a text underwent machine-paraphrasing. The data and code of our study are openly available.},
booktitle = {Sustainable {Digital} {Communities}},
publisher = {Springer International Publishing},
author = {Foltýnek, Tomáš and Ruas, Terry and Scharpf, Philipp and Meuschke, Norman and Schubotz, Moritz and Grosky, William and Gipp, Bela},
editor = {Sundqvist, Anneli and Berget, Gerd and Nolin, Jan and Skjerdingstad, Kjell Ivar},
month = mar,
year = {2020},
doi = {10.1007/978-3-030-43687-2_68},
keywords = {Plagiarism Detection},
pages = {816--827},
}
Downloads: 5
{"_id":"Kgr9hb3mYYtw7KXXA","bibbaseid":"foltnek-ruas-scharpf-meuschke-schubotz-grosky-gipp-detectingmachineobfuscatedplagiarism-2020","authorIDs":["3aamy24wTzcQoTPGY","7Crs4B84W7BbduMmq","97o4RCsEFAoSxEQqt","9dzP7gNRTLKvc9aPR","GYqCNzAZv2xc9nhmD","KLLNwF6yrTvRfDhAP","LKQ5pS2Y8Pc7FTkr7","TuCkHmKovwKzF3y8Z","ZDet9tokdva7KFSEH","ZJvJiH6kd887XEnz3","gBWY7RvNrDhhspCGi","nLJ4c698vfAyWRWTr","pCb6WupcebiMmhw8Y","qNrPNpAwKg5fp598G","s7Z2R2uTWDHRHN2bE","tFwG3DWb6fYeXs3sL","yiM4TojQ7StGdi2iD"],"author_short":["Foltýnek, T.","Ruas, T.","Scharpf, P.","Meuschke, N.","Schubotz, M.","Grosky, W.","Gipp, B."],"bibdata":{"bibtype":"incollection","type":"incollection","address":"Cham","title":"Detecting Machine-Obfuscated Plagiarism","volume":"12051 LNCS","isbn":"978-3-030-43686-5 978-3-030-43687-2","abstract":"Research on academic integrity has identified online paraphrasing tools as a severe threat to the effectiveness of plagiarism detection systems. To enable the automated identification of machine-paraphrased text, we make three contributions. First, we evaluate the effectiveness of six prominent word embedding models in combination with five classifiers for distinguishing human-written from machine-paraphrased text. The best performing classification approach achieves an accuracy of 99.0% for documents and 83.4% for paragraphs. Second, we show that the best approach outperforms human experts and established plagiarism detection systems for these classification tasks. Third, we provide a Web application that uses the best performing classification approach to indicate whether a text underwent machine-paraphrasing. The data and code of our study are openly available.","booktitle":"Sustainable Digital Communities","publisher":"Springer International Publishing","author":[{"propositions":[],"lastnames":["Foltýnek"],"firstnames":["Tomáš"],"suffixes":[]},{"propositions":[],"lastnames":["Ruas"],"firstnames":["Terry"],"suffixes":[]},{"propositions":[],"lastnames":["Scharpf"],"firstnames":["Philipp"],"suffixes":[]},{"propositions":[],"lastnames":["Meuschke"],"firstnames":["Norman"],"suffixes":[]},{"propositions":[],"lastnames":["Schubotz"],"firstnames":["Moritz"],"suffixes":[]},{"propositions":[],"lastnames":["Grosky"],"firstnames":["William"],"suffixes":[]},{"propositions":[],"lastnames":["Gipp"],"firstnames":["Bela"],"suffixes":[]}],"editor":[{"propositions":[],"lastnames":["Sundqvist"],"firstnames":["Anneli"],"suffixes":[]},{"propositions":[],"lastnames":["Berget"],"firstnames":["Gerd"],"suffixes":[]},{"propositions":[],"lastnames":["Nolin"],"firstnames":["Jan"],"suffixes":[]},{"propositions":[],"lastnames":["Skjerdingstad"],"firstnames":["Kjell","Ivar"],"suffixes":[]}],"month":"March","year":"2020","doi":"10.1007/978-3-030-43687-2_68","keywords":"Plagiarism Detection","pages":"816–827","bibtex":"@incollection{FoltynekRSM20,\n\taddress = {Cham},\n\ttitle = {Detecting {Machine}-{Obfuscated} {Plagiarism}},\n\tvolume = {12051 LNCS},\n\tisbn = {978-3-030-43686-5 978-3-030-43687-2},\n\turl = {paper=https://www.gipp.com/wp-content/papercite-data/pdf/foltynek2020.pdf data=https://doi.org/10.7302/bewj-qx93 demo=https://purl.org/spindetector},\n\tabstract = {Research on academic integrity has identified online paraphrasing tools as a severe threat to the effectiveness of plagiarism detection systems. To enable the automated identification of machine-paraphrased text, we make three contributions. First, we evaluate the effectiveness of six prominent word embedding models in combination with five classifiers for distinguishing human-written from machine-paraphrased text. The best performing classification approach achieves an accuracy of 99.0\\% for documents and 83.4\\% for paragraphs. Second, we show that the best approach outperforms human experts and established plagiarism detection systems for these classification tasks. Third, we provide a Web application that uses the best performing classification approach to indicate whether a text underwent machine-paraphrasing. The data and code of our study are openly available.},\n\tbooktitle = {Sustainable {Digital} {Communities}},\n\tpublisher = {Springer International Publishing},\n\tauthor = {Foltýnek, Tomáš and Ruas, Terry and Scharpf, Philipp and Meuschke, Norman and Schubotz, Moritz and Grosky, William and Gipp, Bela},\n\teditor = {Sundqvist, Anneli and Berget, Gerd and Nolin, Jan and Skjerdingstad, Kjell Ivar},\n\tmonth = mar,\n\tyear = {2020},\n\tdoi = {10.1007/978-3-030-43687-2_68},\n\tkeywords = {Plagiarism Detection},\n\tpages = {816--827},\n}\n\n","author_short":["Foltýnek, T.","Ruas, T.","Scharpf, P.","Meuschke, N.","Schubotz, M.","Grosky, W.","Gipp, B."],"editor_short":["Sundqvist, A.","Berget, G.","Nolin, J.","Skjerdingstad, K. I."],"urlpaper":"https://www.gipp.com/wp-content/papercite-data/pdf/foltynek2020.pdf","urldata":"https://doi.org/10.7302/bewj-qx93","urldemo":"https://purl.org/spindetector","key":"FoltynekRSM20","id":"FoltynekRSM20","bibbaseid":"foltnek-ruas-scharpf-meuschke-schubotz-grosky-gipp-detectingmachineobfuscatedplagiarism-2020","role":"author","urls":{"Paper":"https://www.gipp.com/wp-content/papercite-data/pdf/foltynek2020.pdf","Data":"https://doi.org/10.7302/bewj-qx93","Demo":"https://purl.org/spindetector"},"keyword":["Plagiarism Detection"],"metadata":{"authorlinks":{"meuschke, n":"https://bibbase.org/show?bib=https%3A%2F%2Fapi.zotero.org%2Fgroups%2F2532143%2Fitems%3Fkey%3DDOjJ33bOgISaFjBIBr7jCV5S%26format%3Dbibtex%26limit%3D100"}},"downloads":5},"bibtype":"incollection","biburl":"https://api.zotero.org/groups/2532143/items?key=DOjJ33bOgISaFjBIBr7jCV5S&format=bibtex&limit=100","creationDate":"2020-05-17T17:01:38.196Z","downloads":5,"keywords":["plagiarism detection"],"search_terms":["detecting","machine","obfuscated","plagiarism","foltýnek","ruas","scharpf","meuschke","schubotz","grosky","gipp"],"title":"Detecting Machine-Obfuscated Plagiarism","year":2020,"dataSources":["xteq4cdC6ATE2G6Fg","JNgeyAG2vQ8k88oYh","FPjHiAkAja6XvmScK","RTGAqwGfLTSqYQMsS","Y7kZGjoN5Erk3Lo2J","jnWJCpbQCoWvxj9kz","F32umBkhFrpeJbp7A","BWzEyLkMvdMGpHpr6","e3AdWzdxYmb85Fn5D","MtqPmSRuq4X8FJqNT","YCwvFifyPbazBYMQD","6oZMeYhGKA2Mp8xhF","gYMS6DBXsNosXKcRC","SzFkcrpurPzNHEyqX","6KJgnNtYZiwwFkcGq","Zp98Nuv7ftsXLefzT","F3AfGZZbixwqNK4mj","XJBi8b8xDjDoWPzcZ","kHqqD8pzLteJJWS2X","hG7rv86o2PDG2z44d","aJH3D6QaHCDgg2JGg","dHLtmS5G7GmooD755","EvZZTzAZvA3EsuMjm"]}