Authorship attribution via network motifs identification. Marinho, V. Q., Hirst, G., & Amancio, D. R. In Proceedings, 5th Brazilian Conference on Intelligent Systems (BRACIS), pages ???--???, Recife, Brazil, October, 2016. abstract bibtex Concepts and methods of complex networks can be used to analyse texts at their different complexity levels. Examples of natural language processing (NLP) tasks studied via topological analysis of networks are keyword identification, automatic extractive summarization and authorship attribution. Even though a myriad of network measurements have been applied to study the authorship attribution problem, the use of motifs for text analysis has been restricted to a few works. The goal of this paper is to apply the concept of motifs, recurrent interconnection patterns, in the authorship attribution task. The absolute frequencies of all thirteen directed motifs with three nodes were extracted from the co-occurrence networks and used as classification features. The effectiveness of these features was verified with four machine learning methods. The results show that motifs are able to distinguish the writing style of different authors. In our best scenario, 57.5% of the books were correctly classified. The chance baseline for this problem is 12.5%. In addition, we have found that function words play an important role in these recurrent patterns. Taken together, our findings suggest that motifs should be further explored in other related linguistic tasks.
@inproceedings{Marinho2016BRACIS,
author = {Vanessa Queiroz Marinho and Graeme Hirst and Diego Raphael Amancio},
title = {Authorship attribution via network motifs identification},
address = {Recife, Brazil},
booktitle = {Proceedings, 5th Brazilian Conference on Intelligent Systems
(BRACIS)},
pages = {???--???},
year = {2016},
month = {October},
download = {http://ftp.cs.toronto.edu/pub/gh/Marinho-etal-BRACIS-2016.pdf},
abstract = {Concepts and methods of complex networks can be used to
analyse texts at their different complexity
levels. Examples of natural language processing
(NLP) tasks studied via topological analysis of
networks are keyword identification, automatic
extractive summarization and authorship
attribution. Even though a myriad of network
measurements have been applied to study the
authorship attribution problem, the use of motifs
for text analysis has been restricted to a few
works. The goal of this paper is to apply the
concept of motifs, recurrent interconnection
patterns, in the authorship attribution task. The
absolute frequencies of all thirteen directed motifs
with three nodes were extracted from the
co-occurrence networks and used as classification
features. The effectiveness of these features was
verified with four machine learning methods. The
results show that motifs are able to distinguish the
writing style of different authors. In our best
scenario, 57.5\% of the books were correctly
classified. The chance baseline for this problem is
12.5\%. In addition, we have found that function
words play an important role in these recurrent
patterns. Taken together, our findings suggest that
motifs should be further explored in other related
linguistic tasks. }
}
Downloads: 0
{"_id":"DfJXBmh3PqW2dLPeC","bibbaseid":"marinho-hirst-amancio-authorshipattributionvianetworkmotifsidentification-2016","downloads":0,"creationDate":"2016-10-19T19:03:54.018Z","title":"Authorship attribution via network motifs identification","author_short":["Marinho, V. Q.","Hirst, G.","Amancio, D. R."],"year":2016,"bibtype":"inproceedings","biburl":"http://www.cs.toronto.edu/compling/all_bib.bib","bibdata":{"bibtype":"inproceedings","type":"inproceedings","author":[{"firstnames":["Vanessa","Queiroz"],"propositions":[],"lastnames":["Marinho"],"suffixes":[]},{"firstnames":["Graeme"],"propositions":[],"lastnames":["Hirst"],"suffixes":[]},{"firstnames":["Diego","Raphael"],"propositions":[],"lastnames":["Amancio"],"suffixes":[]}],"title":"Authorship attribution via network motifs identification","address":"Recife, Brazil","booktitle":"Proceedings, 5th Brazilian Conference on Intelligent Systems (BRACIS)","pages":"???--???","year":"2016","month":"October","download":"http://ftp.cs.toronto.edu/pub/gh/Marinho-etal-BRACIS-2016.pdf","abstract":"Concepts and methods of complex networks can be used to analyse texts at their different complexity levels. Examples of natural language processing (NLP) tasks studied via topological analysis of networks are keyword identification, automatic extractive summarization and authorship attribution. Even though a myriad of network measurements have been applied to study the authorship attribution problem, the use of motifs for text analysis has been restricted to a few works. The goal of this paper is to apply the concept of motifs, recurrent interconnection patterns, in the authorship attribution task. The absolute frequencies of all thirteen directed motifs with three nodes were extracted from the co-occurrence networks and used as classification features. The effectiveness of these features was verified with four machine learning methods. The results show that motifs are able to distinguish the writing style of different authors. In our best scenario, 57.5% of the books were correctly classified. The chance baseline for this problem is 12.5%. In addition, we have found that function words play an important role in these recurrent patterns. Taken together, our findings suggest that motifs should be further explored in other related linguistic tasks. ","bibtex":"@inproceedings{Marinho2016BRACIS,\n author = {Vanessa Queiroz Marinho and Graeme Hirst and Diego Raphael Amancio},\n title = {Authorship attribution via network motifs identification},\n address = {Recife, Brazil},\n booktitle = {Proceedings, 5th Brazilian Conference on Intelligent Systems\n(BRACIS)},\n pages = {???--???},\n year = {2016},\n month = {October},\n download = {http://ftp.cs.toronto.edu/pub/gh/Marinho-etal-BRACIS-2016.pdf},\n abstract = {Concepts and methods of complex networks can be used to\n analyse texts at their different complexity\n levels. Examples of natural language processing\n (NLP) tasks studied via topological analysis of\n networks are keyword identification, automatic\n extractive summarization and authorship\n attribution. Even though a myriad of network\n measurements have been applied to study the \n authorship attribution problem, the use of motifs\n for text analysis has been restricted to a few \n works. The goal of this paper is to apply the \n concept of motifs, recurrent interconnection\n patterns, in the authorship attribution task. The \n absolute frequencies of all thirteen directed motifs\n with three nodes were extracted from the \n co-occurrence networks and used as classification\n features. The effectiveness of these features was \n verified with four machine learning methods. The \n results show that motifs are able to distinguish the \n writing style of different authors. In our best\n scenario, 57.5\\% of the books were correctly\n classified. The chance baseline for this problem is\n 12.5\\%. In addition, we have found that function\n words play an important role in these recurrent\n patterns. Taken together, our findings suggest that\n motifs should be further explored in other related\n linguistic tasks. }\n}\n\n\n","author_short":["Marinho, V. Q.","Hirst, G.","Amancio, D. R."],"key":"Marinho2016BRACIS","id":"Marinho2016BRACIS","bibbaseid":"marinho-hirst-amancio-authorshipattributionvianetworkmotifsidentification-2016","role":"author","urls":{},"downloads":0},"search_terms":["authorship","attribution","via","network","motifs","identification","marinho","hirst","amancio"],"keywords":[],"authorIDs":[],"dataSources":["2vBSdbWEoTEQZtb6g"]}