High accuracy citation extraction and named entity recognition for a heterogeneous corpus of academic papers. Powley, B. & Dale, R. In 2007 International Conference on Natural Language Processing and Knowledge Engineering, pages 119-124, 2007. Ieee.
Website abstract bibtex Citation indices are increasingly being used not only as navigational tools for re- searchers, but also as the basis for mea- surement of academic performance and re- search impact. This means that the reliabil- ity of tools used to extract citations and con- struct such indices is becoming more crit- ical; however, existing approaches to cita- tion extraction still fall short of the high ac- curacy required if critical assessments are to be based on them. In this paper, we present techniques for high accuracy extrac- tion of citations from academic papers, de- signed for applicability across a broad range of disciplines and document styles. We in- tegrate citation extraction, reference pars- ing, and author named entity recognition to significantly improve performance in cita- tion extraction, and demonstrate this per- formance on a cross-disciplinary heteroge- neous corpus. Applying our algorithm to previously unseen documents, we demon- strate high F-measure performance of 0.98 for author named entity recognition and 0.97 for citation extraction.
@inProceedings{
title = {High accuracy citation extraction and named entity recognition for a heterogeneous corpus of academic papers},
type = {inProceedings},
year = {2007},
identifiers = {[object Object]},
pages = {119-124},
websites = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4368021},
publisher = {Ieee},
institution = {IEEE},
id = {a533c015-d4cd-3c83-822e-4684afdd8157},
created = {2012-12-24T15:02:36.000Z},
file_attached = {false},
profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
last_modified = {2017-03-14T14:36:19.698Z},
tags = {citation extraction,named entity recognition},
read = {false},
starred = {false},
authored = {false},
confirmed = {true},
hidden = {false},
citation_key = {Powley2007},
private_publication = {false},
abstract = {Citation indices are increasingly being used not only as navigational tools for re- searchers, but also as the basis for mea- surement of academic performance and re- search impact. This means that the reliabil- ity of tools used to extract citations and con- struct such indices is becoming more crit- ical; however, existing approaches to cita- tion extraction still fall short of the high ac- curacy required if critical assessments are to be based on them. In this paper, we present techniques for high accuracy extrac- tion of citations from academic papers, de- signed for applicability across a broad range of disciplines and document styles. We in- tegrate citation extraction, reference pars- ing, and author named entity recognition to significantly improve performance in cita- tion extraction, and demonstrate this per- formance on a cross-disciplinary heteroge- neous corpus. Applying our algorithm to previously unseen documents, we demon- strate high F-measure performance of 0.98 for author named entity recognition and 0.97 for citation extraction.},
bibtype = {inProceedings},
author = {Powley, Brett and Dale, Robert},
booktitle = {2007 International Conference on Natural Language Processing and Knowledge Engineering}
}
Downloads: 0
{"_id":"hdHMFQD6yPgH3WP5S","bibbaseid":"powley-dale-highaccuracycitationextractionandnamedentityrecognitionforaheterogeneouscorpusofacademicpapers-2007","authorIDs":[],"author_short":["Powley, B.","Dale, R."],"bibdata":{"title":"High accuracy citation extraction and named entity recognition for a heterogeneous corpus of academic papers","type":"inProceedings","year":"2007","identifiers":"[object Object]","pages":"119-124","websites":"http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4368021","publisher":"Ieee","institution":"IEEE","id":"a533c015-d4cd-3c83-822e-4684afdd8157","created":"2012-12-24T15:02:36.000Z","file_attached":false,"profile_id":"5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6","group_id":"066b42c8-f712-3fc3-abb2-225c158d2704","last_modified":"2017-03-14T14:36:19.698Z","tags":"citation extraction,named entity recognition","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"citation_key":"Powley2007","private_publication":false,"abstract":"Citation indices are increasingly being used not only as navigational tools for re- searchers, but also as the basis for mea- surement of academic performance and re- search impact. This means that the reliabil- ity of tools used to extract citations and con- struct such indices is becoming more crit- ical; however, existing approaches to cita- tion extraction still fall short of the high ac- curacy required if critical assessments are to be based on them. In this paper, we present techniques for high accuracy extrac- tion of citations from academic papers, de- signed for applicability across a broad range of disciplines and document styles. We in- tegrate citation extraction, reference pars- ing, and author named entity recognition to significantly improve performance in cita- tion extraction, and demonstrate this per- formance on a cross-disciplinary heteroge- neous corpus. Applying our algorithm to previously unseen documents, we demon- strate high F-measure performance of 0.98 for author named entity recognition and 0.97 for citation extraction.","bibtype":"inProceedings","author":"Powley, Brett and Dale, Robert","booktitle":"2007 International Conference on Natural Language Processing and Knowledge Engineering","bibtex":"@inProceedings{\n title = {High accuracy citation extraction and named entity recognition for a heterogeneous corpus of academic papers},\n type = {inProceedings},\n year = {2007},\n identifiers = {[object Object]},\n pages = {119-124},\n websites = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4368021},\n publisher = {Ieee},\n institution = {IEEE},\n id = {a533c015-d4cd-3c83-822e-4684afdd8157},\n created = {2012-12-24T15:02:36.000Z},\n file_attached = {false},\n profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},\n group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},\n last_modified = {2017-03-14T14:36:19.698Z},\n tags = {citation extraction,named entity recognition},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n citation_key = {Powley2007},\n private_publication = {false},\n abstract = {Citation indices are increasingly being used not only as navigational tools for re- searchers, but also as the basis for mea- surement of academic performance and re- search impact. This means that the reliabil- ity of tools used to extract citations and con- struct such indices is becoming more crit- ical; however, existing approaches to cita- tion extraction still fall short of the high ac- curacy required if critical assessments are to be based on them. In this paper, we present techniques for high accuracy extrac- tion of citations from academic papers, de- signed for applicability across a broad range of disciplines and document styles. We in- tegrate citation extraction, reference pars- ing, and author named entity recognition to significantly improve performance in cita- tion extraction, and demonstrate this per- formance on a cross-disciplinary heteroge- neous corpus. Applying our algorithm to previously unseen documents, we demon- strate high F-measure performance of 0.98 for author named entity recognition and 0.97 for citation extraction.},\n bibtype = {inProceedings},\n author = {Powley, Brett and Dale, Robert},\n booktitle = {2007 International Conference on Natural Language Processing and Knowledge Engineering}\n}","author_short":["Powley, B.","Dale, R."],"urls":{"Website":"http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4368021"},"bibbaseid":"powley-dale-highaccuracycitationextractionandnamedentityrecognitionforaheterogeneouscorpusofacademicpapers-2007","role":"author","downloads":0,"html":""},"bibtype":"inProceedings","creationDate":"2020-02-06T23:48:12.211Z","downloads":0,"keywords":[],"search_terms":["high","accuracy","citation","extraction","named","entity","recognition","heterogeneous","corpus","academic","papers","powley","dale"],"title":"High accuracy citation extraction and named entity recognition for a heterogeneous corpus of academic papers","year":2007}