Generative vs Discriminative approaches to entity Recognition from label deficient data

Generative vs Discriminative approaches to entity Recognition from label deficient data. Goutte, C., Gaussier, E., Cancedda, N., & Dejean, H. Machine Learning, 2004.

Website abstract bibtex

Annotating biomedical text for Named Entity Recognition (NER) is usually a tedious and expensive process, while unannotated data is freely available in large quantities. It therefore seems relevant to address biomedical NER using Machine Learning techniques that learn from a combination of labelled and unlabelled data. We consider two approaches: one is discriminative, using Support Vector Machines, the other generative, using mixture models. We compare the two on a biomedical NER task with various levels of annotation, and different similarity measures. We also investigate the use of Fisher kernels as a way to leverage the strength of both approaches. Overall the discriminative approach using standard similarity measures seems to out-perform both the generative approach and the Fisher kernels.

@article{
 title = {Generative vs Discriminative approaches to entity Recognition from label deficient data},
 type = {article},
 year = {2004},
 keywords = {information retrieval & textual information access,learning,natural language processing,statistics & optimisation},
 websites = {http://eprints.pascal-network.org/archive/00000551/},
 id = {691554a5-93c7-38c3-81c8-e3d59aed7ed9},
 created = {2012-01-21T12:35:31.000Z},
 file_attached = {false},
 profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
 group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
 last_modified = {2017-03-14T14:36:19.698Z},
 tags = {named entity recognition},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Goutte2004},
 private_publication = {false},
 abstract = {Annotating biomedical text for Named Entity Recognition (NER) is usually a tedious and expensive process, while unannotated data is freely available in large quantities. It therefore seems relevant to address biomedical NER using Machine Learning techniques that learn from a combination of labelled and unlabelled data. We consider two approaches: one is discriminative, using Support Vector Machines, the other generative, using mixture models. We compare the two on a biomedical NER task with various levels of annotation, and different similarity measures. We also investigate the use of Fisher kernels as a way to leverage the strength of both approaches. Overall the discriminative approach using standard similarity measures seems to out-perform both the generative approach and the Fisher kernels.},
 bibtype = {article},
 author = {Goutte, Cyril and Gaussier, Eric and Cancedda, Nicola and Dejean, Herve},
 journal = {Machine Learning}
}

Downloads: 0

{"_id":"mskcGQGq6bRTy26mR","bibbaseid":"goutte-gaussier-cancedda-dejean-generativevsdiscriminativeapproachestoentityrecognitionfromlabeldeficientdata-2004","authorIDs":[],"author_short":["Goutte, C.","Gaussier, E.","Cancedda, N.","Dejean, H."],"bibdata":{"title":"Generative vs Discriminative approaches to entity Recognition from label deficient data","type":"article","year":"2004","keywords":"information retrieval & textual information access,learning,natural language processing,statistics & optimisation","websites":"http://eprints.pascal-network.org/archive/00000551/","id":"691554a5-93c7-38c3-81c8-e3d59aed7ed9","created":"2012-01-21T12:35:31.000Z","file_attached":false,"profile_id":"5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6","group_id":"066b42c8-f712-3fc3-abb2-225c158d2704","last_modified":"2017-03-14T14:36:19.698Z","tags":"named entity recognition","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"citation_key":"Goutte2004","private_publication":false,"abstract":"Annotating biomedical text for Named Entity Recognition (NER) is usually a tedious and expensive process, while unannotated data is freely available in large quantities. It therefore seems relevant to address biomedical NER using Machine Learning techniques that learn from a combination of labelled and unlabelled data. We consider two approaches: one is discriminative, using Support Vector Machines, the other generative, using mixture models. We compare the two on a biomedical NER task with various levels of annotation, and different similarity measures. We also investigate the use of Fisher kernels as a way to leverage the strength of both approaches. Overall the discriminative approach using standard similarity measures seems to out-perform both the generative approach and the Fisher kernels.","bibtype":"article","author":"Goutte, Cyril and Gaussier, Eric and Cancedda, Nicola and Dejean, Herve","journal":"Machine Learning","bibtex":"@article{\n title = {Generative vs Discriminative approaches to entity Recognition from label deficient data},\n type = {article},\n year = {2004},\n keywords = {information retrieval & textual information access,learning,natural language processing,statistics & optimisation},\n websites = {http://eprints.pascal-network.org/archive/00000551/},\n id = {691554a5-93c7-38c3-81c8-e3d59aed7ed9},\n created = {2012-01-21T12:35:31.000Z},\n file_attached = {false},\n profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},\n group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},\n last_modified = {2017-03-14T14:36:19.698Z},\n tags = {named entity recognition},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n citation_key = {Goutte2004},\n private_publication = {false},\n abstract = {Annotating biomedical text for Named Entity Recognition (NER) is usually a tedious and expensive process, while unannotated data is freely available in large quantities. It therefore seems relevant to address biomedical NER using Machine Learning techniques that learn from a combination of labelled and unlabelled data. We consider two approaches: one is discriminative, using Support Vector Machines, the other generative, using mixture models. We compare the two on a biomedical NER task with various levels of annotation, and different similarity measures. We also investigate the use of Fisher kernels as a way to leverage the strength of both approaches. Overall the discriminative approach using standard similarity measures seems to out-perform both the generative approach and the Fisher kernels.},\n bibtype = {article},\n author = {Goutte, Cyril and Gaussier, Eric and Cancedda, Nicola and Dejean, Herve},\n journal = {Machine Learning}\n}","author_short":["Goutte, C.","Gaussier, E.","Cancedda, N.","Dejean, H."],"urls":{"Website":"http://eprints.pascal-network.org/archive/00000551/"},"bibbaseid":"goutte-gaussier-cancedda-dejean-generativevsdiscriminativeapproachestoentityrecognitionfromlabeldeficientdata-2004","role":"author","keyword":["information retrieval & textual information access","learning","natural language processing","statistics & optimisation"],"downloads":0,"html":""},"bibtype":"article","creationDate":"2020-02-06T23:48:11.935Z","downloads":0,"keywords":["information retrieval & textual information access","learning","natural language processing","statistics & optimisation"],"search_terms":["generative","discriminative","approaches","entity","recognition","label","deficient","data","goutte","gaussier","cancedda","dejean"],"title":"Generative vs Discriminative approaches to entity Recognition from label deficient data","year":2004}