Generative vs Discriminative approaches to entity Recognition from label deficient data. Goutte, C., Gaussier, E., Cancedda, N., & Dejean, H. Machine Learning, 2004.
Generative vs Discriminative approaches to entity Recognition from label deficient data [link]Website  abstract   bibtex   
Annotating biomedical text for Named Entity Recognition (NER) is usually a tedious and expensive process, while unannotated data is freely available in large quantities. It therefore seems relevant to address biomedical NER using Machine Learning techniques that learn from a combination of labelled and unlabelled data. We consider two approaches: one is discriminative, using Support Vector Machines, the other generative, using mixture models. We compare the two on a biomedical NER task with various levels of annotation, and different similarity measures. We also investigate the use of Fisher kernels as a way to leverage the strength of both approaches. Overall the discriminative approach using standard similarity measures seems to out-perform both the generative approach and the Fisher kernels.
@article{
 title = {Generative vs Discriminative approaches to entity Recognition from label deficient data},
 type = {article},
 year = {2004},
 keywords = {information retrieval & textual information access,learning,natural language processing,statistics & optimisation},
 websites = {http://eprints.pascal-network.org/archive/00000551/},
 id = {691554a5-93c7-38c3-81c8-e3d59aed7ed9},
 created = {2012-01-21T12:35:31.000Z},
 file_attached = {false},
 profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
 group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
 last_modified = {2017-03-14T14:36:19.698Z},
 tags = {named entity recognition},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Goutte2004},
 private_publication = {false},
 abstract = {Annotating biomedical text for Named Entity Recognition (NER) is usually a tedious and expensive process, while unannotated data is freely available in large quantities. It therefore seems relevant to address biomedical NER using Machine Learning techniques that learn from a combination of labelled and unlabelled data. We consider two approaches: one is discriminative, using Support Vector Machines, the other generative, using mixture models. We compare the two on a biomedical NER task with various levels of annotation, and different similarity measures. We also investigate the use of Fisher kernels as a way to leverage the strength of both approaches. Overall the discriminative approach using standard similarity measures seems to out-perform both the generative approach and the Fisher kernels.},
 bibtype = {article},
 author = {Goutte, Cyril and Gaussier, Eric and Cancedda, Nicola and Dejean, Herve},
 journal = {Machine Learning}
}
Downloads: 0