Authorship verification with entity coherence and other rich linguistic features. Feng, V. W. & Hirst, G. In Proceedings, PAN 2013 Lab: Uncovering Plagiarism, Authorship and Social Software Misuse — at the CLEF 2013 Conference and Labs of the Evaluation Forum: Information Access Evaluation meets Multilinguality, Multimodality, and Visualization), Valencia, Spain, September, 2013. Paper abstract bibtex We adopt Koppel et al.'s unmasking approach as the major framework of our authorship verification system. We enrich Koppel et al.'s original word frequency features with a novel set of coherence features, derived from our earlier work, together with a full set of stylometric features. For texts written in languages other than English, some stylometric features are unavailable due to the lack of appropriate NLP tools, and their coherence features are derived from their translations produced by Google Translate service. Evaluated on the training corpus, we achieve an overall accuracy of 65.7%: 100.0% for both English and Spanish texts, while only 40% for Greek texts; evaluated on the test corpus, we achieve an overall accuracy of 68.2%, and roughly the same performance across three languages.
@InProceedings{ feng2013p,
author = {Vanessa Wei Feng and Graeme Hirst},
title = {Authorship verification with entity coherence and other
rich linguistic features},
address = {Valencia, Spain},
booktitle = {Proceedings, PAN 2013 Lab: Uncovering Plagiarism,
Authorship and Social Software Misuse --- at the CLEF 2013
Conference and Labs of the Evaluation Forum: Information
Access Evaluation meets Multilinguality, Multimodality, and
Visualization)},
year = {2013},
month = {September},
url = {http://www.clef-initiative.eu/documents/71612/278c06fc-20c1-4340-a6f9-eeec1e87913c}
,
abstract = {We adopt Koppel et al.'s unmasking approach as the major
framework of our authorship verification system. We enrich
Koppel et al.'s original word frequency features with a
novel set of coherence features, derived from our earlier
work, together with a full set of stylometric features. For
texts written in languages other than English, some
stylometric features are unavailable due to the lack of
appropriate NLP tools, and their coherence features are
derived from their translations produced by Google
Translate service. Evaluated on the training corpus, we
achieve an overall accuracy of 65.7\%: 100.0\% for both
English and Spanish texts, while only 40\% for Greek texts;
evaluated on the test corpus, we achieve an overall
accuracy of 68.2\%, and roughly the same performance across
three languages.}
}
Downloads: 0
{"_id":{"_str":"53d57a49f414ae191e0003c6"},"__v":0,"authorIDs":[],"author_short":["Feng, V. W.","Hirst, G."],"bibbaseid":"feng-hirst-authorshipverificationwithentitycoherenceandotherrichlinguisticfeatures-2013","bibdata":{"bibtype":"inproceedings","type":"inproceedings","author":[{"firstnames":["Vanessa","Wei"],"propositions":[],"lastnames":["Feng"],"suffixes":[]},{"firstnames":["Graeme"],"propositions":[],"lastnames":["Hirst"],"suffixes":[]}],"title":"Authorship verification with entity coherence and other rich linguistic features","address":"Valencia, Spain","booktitle":"Proceedings, PAN 2013 Lab: Uncovering Plagiarism, Authorship and Social Software Misuse — at the CLEF 2013 Conference and Labs of the Evaluation Forum: Information Access Evaluation meets Multilinguality, Multimodality, and Visualization)","year":"2013","month":"September","url":"http://www.clef-initiative.eu/documents/71612/278c06fc-20c1-4340-a6f9-eeec1e87913c","abstract":"We adopt Koppel et al.'s unmasking approach as the major framework of our authorship verification system. We enrich Koppel et al.'s original word frequency features with a novel set of coherence features, derived from our earlier work, together with a full set of stylometric features. For texts written in languages other than English, some stylometric features are unavailable due to the lack of appropriate NLP tools, and their coherence features are derived from their translations produced by Google Translate service. Evaluated on the training corpus, we achieve an overall accuracy of 65.7%: 100.0% for both English and Spanish texts, while only 40% for Greek texts; evaluated on the test corpus, we achieve an overall accuracy of 68.2%, and roughly the same performance across three languages.","bibtex":"@InProceedings{\t feng2013p,\n author\t= {Vanessa Wei Feng and Graeme Hirst},\n title\t\t= {Authorship verification with entity coherence and other\n\t\t rich linguistic features},\n address\t= {Valencia, Spain},\n booktitle\t= {Proceedings, PAN 2013 Lab: Uncovering Plagiarism,\n\t\t Authorship and Social Software Misuse --- at the CLEF 2013\n\t\t Conference and Labs of the Evaluation Forum: Information\n\t\t Access Evaluation meets Multilinguality, Multimodality, and\n\t\t Visualization)},\n year\t\t= {2013},\n month\t\t= {September},\n url\t\t= {http://www.clef-initiative.eu/documents/71612/278c06fc-20c1-4340-a6f9-eeec1e87913c}\n\t\t ,\n abstract\t= {We adopt Koppel et al.'s unmasking approach as the major\n\t\t framework of our authorship verification system. We enrich\n\t\t Koppel et al.'s original word frequency features with a\n\t\t novel set of coherence features, derived from our earlier\n\t\t work, together with a full set of stylometric features. For\n\t\t texts written in languages other than English, some\n\t\t stylometric features are unavailable due to the lack of\n\t\t appropriate NLP tools, and their coherence features are\n\t\t derived from their translations produced by Google\n\t\t Translate service. Evaluated on the training corpus, we\n\t\t achieve an overall accuracy of 65.7\\%: 100.0\\% for both\n\t\t English and Spanish texts, while only 40\\% for Greek texts;\n\t\t evaluated on the test corpus, we achieve an overall\n\t\t accuracy of 68.2\\%, and roughly the same performance across\n\t\t three languages.}\n}\n\n","author_short":["Feng, V. W.","Hirst, G."],"key":"feng2013p","id":"feng2013p","bibbaseid":"feng-hirst-authorshipverificationwithentitycoherenceandotherrichlinguisticfeatures-2013","role":"author","urls":{"Paper":"http://www.clef-initiative.eu/documents/71612/278c06fc-20c1-4340-a6f9-eeec1e87913c"},"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"www.cs.toronto.edu/~fritz/tmp/compling.bib","creationDate":"2014-07-27T22:16:41.268Z","downloads":0,"keywords":[],"search_terms":["authorship","verification","entity","coherence","rich","linguistic","features","feng","hirst"],"title":"Authorship verification with entity coherence and other rich linguistic features","year":2013,"dataSources":["n8jB5BJxaeSmH6mtR","6b6A9kbkw4CsEGnRX"]}