Are Neural Language Models Good Plagiarists? A Benchmark for Neural Paraphrase Detection

Are Neural Language Models Good Plagiarists? A Benchmark for Neural Paraphrase Detection. Wahle, J. P., Ruas, T., Meuschke, N., & Gipp, B. In 2021 ACM/IEEE Joint Conference on Digital Libraries (JCDL), pages 226–229.

Paper doi abstract bibtex

The rise of language models such as BERT allows for high-quality text paraphrasing. This is a problem to academic integrity, as it is difficult to differentiate between original and machine-generated content. We propose a benchmark consisting of paraphrased articles using recent language models relying on the Transformer architecture. Our contribution fosters future research of paraphrase detection systems as it offers a large collection of aligned original and paraphrased documents, a study regarding its structure, classification experiments with state-of-the-art systems, and we make our findings publicly available.

@inproceedings{WahleRMG21,
  title = {Are {{Neural Language Models Good Plagiarists}}? {{A Benchmark}} for {{Neural Paraphrase Detection}}},
  shorttitle = {Are {{Neural Language Models Good Plagiarists}}?},
  booktitle = {2021 {{ACM}}/{{IEEE Joint Conference}} on {{Digital Libraries}} ({{JCDL}})},
  author = {Wahle, Jan Philip and Ruas, Terry and Meuschke, Norman and Gipp, Bela},
  date = {2021-09},
  eprint = {2103.12450},
  eprinttype = {arxiv},
  eprintclass = {cs},
  pages = {226--229},
  doi = {10.1109/JCDL52503.2021.00065},
  url = {https://aclanthology.org/2022.emnlp-main.62},
  urldate = {2022-11-04},
  abstract = {The rise of language models such as BERT allows for high-quality text paraphrasing. This is a problem to academic integrity, as it is difficult to differentiate between original and machine-generated content. We propose a benchmark consisting of paraphrased articles using recent language models relying on the Transformer architecture. Our contribution fosters future research of paraphrase detection systems as it offers a large collection of aligned original and paraphrased documents, a study regarding its structure, classification experiments with state-of-the-art systems, and we make our findings publicly available.},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Digital Libraries},
  file = {C\:\\Users\\ruast\\Zotero\\storage\\7P6V49B7\\WahleRMG21--tr--are_neural_language_models_good_plagiarists_a_benchmark_for_neural_paraphrase_detection.pdf;C\:\\Users\\ruast\\Zotero\\storage\\CXPL4A8B\\2103.html}
}

Downloads: 0

{"_id":"o4eHzE9JQJLfrgunR","bibbaseid":"wahle-ruas-meuschke-gipp-areneurallanguagemodelsgoodplagiaristsabenchmarkforneuralparaphrasedetection","author_short":["Wahle, J. P.","Ruas, T.","Meuschke, N.","Gipp, B."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","title":"Are Neural Language Models Good Plagiarists? A Benchmark for Neural Paraphrase Detection","shorttitle":"Are Neural Language Models Good Plagiarists?","booktitle":"2021 ACM/IEEE Joint Conference on Digital Libraries (JCDL)","author":[{"propositions":[],"lastnames":["Wahle"],"firstnames":["Jan","Philip"],"suffixes":[]},{"propositions":[],"lastnames":["Ruas"],"firstnames":["Terry"],"suffixes":[]},{"propositions":[],"lastnames":["Meuschke"],"firstnames":["Norman"],"suffixes":[]},{"propositions":[],"lastnames":["Gipp"],"firstnames":["Bela"],"suffixes":[]}],"date":"2021-09","eprint":"2103.12450","eprinttype":"arxiv","eprintclass":"cs","pages":"226–229","doi":"10.1109/JCDL52503.2021.00065","url":"https://aclanthology.org/2022.emnlp-main.62","urldate":"2022-11-04","abstract":"The rise of language models such as BERT allows for high-quality text paraphrasing. This is a problem to academic integrity, as it is difficult to differentiate between original and machine-generated content. We propose a benchmark consisting of paraphrased articles using recent language models relying on the Transformer architecture. Our contribution fosters future research of paraphrase detection systems as it offers a large collection of aligned original and paraphrased documents, a study regarding its structure, classification experiments with state-of-the-art systems, and we make our findings publicly available.","keywords":"Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Digital Libraries","file":"C\\:\\\\Users\\ůast\\\\Zotero\\\\storage\\\\7P6V49B7\\\\WahleRMG21–tr–are_neural_language_models_good_plagiarists_a_benchmark_for_neural_paraphrase_detection.pdf;C\\:\\\\Users\\ůast\\\\Zotero\\\\storage\\\\CXPL4A8B\\\\2103.html","bibtex":"@inproceedings{WahleRMG21,\n title = {Are {{Neural Language Models Good Plagiarists}}? {{A Benchmark}} for {{Neural Paraphrase Detection}}},\n shorttitle = {Are {{Neural Language Models Good Plagiarists}}?},\n booktitle = {2021 {{ACM}}/{{IEEE Joint Conference}} on {{Digital Libraries}} ({{JCDL}})},\n author = {Wahle, Jan Philip and Ruas, Terry and Meuschke, Norman and Gipp, Bela},\n date = {2021-09},\n eprint = {2103.12450},\n eprinttype = {arxiv},\n eprintclass = {cs},\n pages = {226--229},\n doi = {10.1109/JCDL52503.2021.00065},\n url = {https://aclanthology.org/2022.emnlp-main.62},\n urldate = {2022-11-04},\n abstract = {The rise of language models such as BERT allows for high-quality text paraphrasing. This is a problem to academic integrity, as it is difficult to differentiate between original and machine-generated content. We propose a benchmark consisting of paraphrased articles using recent language models relying on the Transformer architecture. Our contribution fosters future research of paraphrase detection systems as it offers a large collection of aligned original and paraphrased documents, a study regarding its structure, classification experiments with state-of-the-art systems, and we make our findings publicly available.},\n keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Digital Libraries},\n file = {C\\:\\\\Users\\\\ruast\\\\Zotero\\\\storage\\\\7P6V49B7\\\\WahleRMG21--tr--are_neural_language_models_good_plagiarists_a_benchmark_for_neural_paraphrase_detection.pdf;C\\:\\\\Users\\\\ruast\\\\Zotero\\\\storage\\\\CXPL4A8B\\\\2103.html}\n}\n\n","author_short":["Wahle, J. P.","Ruas, T.","Meuschke, N.","Gipp, B."],"bibbaseid":"wahle-ruas-meuschke-gipp-areneurallanguagemodelsgoodplagiaristsabenchmarkforneuralparaphrasedetection","role":"author","urls":{"Paper":"https://aclanthology.org/2022.emnlp-main.62"},"keyword":["Computer Science - Artificial Intelligence","Computer Science - Computation and Language","Computer Science - Digital Libraries"],"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"https://bibbase.org/f/6vn4PfYLY2SBaJEoL/Exported Items.bib","dataSources":["efHmkeMNnQ4e8rskR"],"keywords":["computer science - artificial intelligence","computer science - computation and language","computer science - digital libraries"],"search_terms":["neural","language","models","good","plagiarists","benchmark","neural","paraphrase","detection","wahle","ruas","meuschke","gipp"],"title":"Are Neural Language Models Good Plagiarists? A Benchmark for Neural Paraphrase Detection","year":null}