Multi-criteria-based active learning for named entity recognition. Shen, D., Zhang, J., Su, J., Zhou, G., & Tan, C. Proceedings of the 42nd Annual Meeting on Association for Computational Linguistics ACL 04, pages:589-es, Association for Computational Linguistics, 2004.
Multi-criteria-based active learning for named entity recognition [link]Website  abstract   bibtex   
In this paper, we propose a multi-criteria-based active learning approach and effectively apply it to named entity recognition. Active learning targets to minimize the human annotation efforts by selecting examples for labeling. To maximize the contribution of the selected examples, we consider the multiple criteria: informativeness, representativeness and diversity and propose measures to quantify them. More comprehensively, we incorporate all the criteria using two selection strategies, both of which result in less labeling cost than single-criterion-based method. The results of the named entity recognition in both MUC-6 and GENIA show that the labeling cost can be reduced by at least 80% without degrading the performance.
@article{
 title = {Multi-criteria-based active learning for named entity recognition},
 type = {article},
 year = {2004},
 identifiers = {[object Object]},
 pages = {589-es},
 volume = {pages},
 websites = {http://portal.acm.org/citation.cfm?doid=1218955.1219030},
 publisher = {Association for Computational Linguistics},
 id = {f2e8f577-b452-3b03-82f7-7dbf62087011},
 created = {2011-12-28T07:04:55.000Z},
 file_attached = {false},
 profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
 group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
 last_modified = {2017-03-14T14:36:19.698Z},
 tags = {named entities},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Shen2004},
 private_publication = {false},
 abstract = {In this paper, we propose a multi-criteria-based active learning approach and effectively apply it to named entity recognition. Active learning targets to minimize the human annotation efforts by selecting examples for labeling. To maximize the contribution of the selected examples, we consider the multiple criteria: informativeness, representativeness and diversity and propose measures to quantify them. More comprehensively, we incorporate all the criteria using two selection strategies, both of which result in less labeling cost than single-criterion-based method. The results of the named entity recognition in both MUC-6 and GENIA show that the labeling cost can be reduced by at least 80% without degrading the performance.},
 bibtype = {article},
 author = {Shen, Dan and Zhang, Jie and Su, Jian and Zhou, Guodong and Tan, Chew-Lim},
 journal = {Proceedings of the 42nd Annual Meeting on Association for Computational Linguistics ACL 04}
}

Downloads: 0