Trained named entity recognition using distributional clusters. Freitag, D. Proceedings of EMNLP, 2004. Website abstract bibtex This work applies boosted wrapper induction (BWI), a machine learning algorithm for information extraction from semi-structured documents, to the problem of named entity recognition. The default feature set of BWI is augmented with features based on distributional term clusters induced from a large unlabeled text corpus. Using no traditional linguistic resources, such as syntactic tags or specialpurpose gazetteers, this approach yields results near the state of the art in the MUC 6 named entity domain. Supervised learning using features derived through unsupervised corpus analysis may be regarded as an alternative to bootstrapping methods.
@article{
title = {Trained named entity recognition using distributional clusters},
type = {article},
year = {2004},
volume = {4},
websites = {http://acl.ldc.upenn.edu/acl2004/emnlp/pdf/Freitag.pdf},
id = {d65ce704-28d3-36bc-a753-57e773bef807},
created = {2012-02-28T00:51:15.000Z},
file_attached = {false},
profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
last_modified = {2017-03-14T14:36:19.698Z},
tags = {named entity recognition},
read = {false},
starred = {false},
authored = {false},
confirmed = {true},
hidden = {false},
citation_key = {Freitag2004},
private_publication = {false},
abstract = {This work applies boosted wrapper induction (BWI), a machine learning algorithm for information extraction from semi-structured documents, to the problem of named entity recognition. The default feature set of BWI is augmented with features based on distributional term clusters induced from a large unlabeled text corpus. Using no traditional linguistic resources, such as syntactic tags or specialpurpose gazetteers, this approach yields results near the state of the art in the MUC 6 named entity domain. Supervised learning using features derived through unsupervised corpus analysis may be regarded as an alternative to bootstrapping methods.},
bibtype = {article},
author = {Freitag, Dayne},
journal = {Proceedings of EMNLP}
}
Downloads: 0
{"_id":"inkCEWxQoY8h7nvQa","bibbaseid":"freitag-trainednamedentityrecognitionusingdistributionalclusters-2004","authorIDs":[],"author_short":["Freitag, D."],"bibdata":{"title":"Trained named entity recognition using distributional clusters","type":"article","year":"2004","volume":"4","websites":"http://acl.ldc.upenn.edu/acl2004/emnlp/pdf/Freitag.pdf","id":"d65ce704-28d3-36bc-a753-57e773bef807","created":"2012-02-28T00:51:15.000Z","file_attached":false,"profile_id":"5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6","group_id":"066b42c8-f712-3fc3-abb2-225c158d2704","last_modified":"2017-03-14T14:36:19.698Z","tags":"named entity recognition","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"citation_key":"Freitag2004","private_publication":false,"abstract":"This work applies boosted wrapper induction (BWI), a machine learning algorithm for information extraction from semi-structured documents, to the problem of named entity recognition. The default feature set of BWI is augmented with features based on distributional term clusters induced from a large unlabeled text corpus. Using no traditional linguistic resources, such as syntactic tags or specialpurpose gazetteers, this approach yields results near the state of the art in the MUC 6 named entity domain. Supervised learning using features derived through unsupervised corpus analysis may be regarded as an alternative to bootstrapping methods.","bibtype":"article","author":"Freitag, Dayne","journal":"Proceedings of EMNLP","bibtex":"@article{\n title = {Trained named entity recognition using distributional clusters},\n type = {article},\n year = {2004},\n volume = {4},\n websites = {http://acl.ldc.upenn.edu/acl2004/emnlp/pdf/Freitag.pdf},\n id = {d65ce704-28d3-36bc-a753-57e773bef807},\n created = {2012-02-28T00:51:15.000Z},\n file_attached = {false},\n profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},\n group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},\n last_modified = {2017-03-14T14:36:19.698Z},\n tags = {named entity recognition},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n citation_key = {Freitag2004},\n private_publication = {false},\n abstract = {This work applies boosted wrapper induction (BWI), a machine learning algorithm for information extraction from semi-structured documents, to the problem of named entity recognition. The default feature set of BWI is augmented with features based on distributional term clusters induced from a large unlabeled text corpus. Using no traditional linguistic resources, such as syntactic tags or specialpurpose gazetteers, this approach yields results near the state of the art in the MUC 6 named entity domain. Supervised learning using features derived through unsupervised corpus analysis may be regarded as an alternative to bootstrapping methods.},\n bibtype = {article},\n author = {Freitag, Dayne},\n journal = {Proceedings of EMNLP}\n}","author_short":["Freitag, D."],"urls":{"Website":"http://acl.ldc.upenn.edu/acl2004/emnlp/pdf/Freitag.pdf"},"bibbaseid":"freitag-trainednamedentityrecognitionusingdistributionalclusters-2004","role":"author","downloads":0,"html":""},"bibtype":"article","creationDate":"2020-02-06T23:48:12.095Z","downloads":0,"keywords":[],"search_terms":["trained","named","entity","recognition","using","distributional","clusters","freitag"],"title":"Trained named entity recognition using distributional clusters","year":2004}