Trained named entity recognition using distributional clusters

Trained named entity recognition using distributional clusters. Freitag, D. Proceedings of EMNLP, 2004.

This work applies boosted wrapper induction (BWI), a machine learning algorithm for information extraction from semi-structured documents, to the problem of named entity recognition. The default feature set of BWI is augmented with features based on distributional term clusters induced from a large unlabeled text corpus. Using no traditional linguistic resources, such as syntactic tags or specialpurpose gazetteers, this approach yields results near the state of the art in the MUC 6 named entity domain. Supervised learning using features derived through unsupervised corpus analysis may be regarded as an alternative to bootstrapping methods.

@article{
 title = {Trained named entity recognition using distributional clusters},
 type = {article},
 year = {2004},
 volume = {4},
 websites = {http://acl.ldc.upenn.edu/acl2004/emnlp/pdf/Freitag.pdf},
 id = {d65ce704-28d3-36bc-a753-57e773bef807},
 created = {2012-02-28T00:51:15.000Z},
 file_attached = {false},
 profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
 group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
 last_modified = {2017-03-14T14:36:19.698Z},
 tags = {named entity recognition},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Freitag2004},
 private_publication = {false},
 abstract = {This work applies boosted wrapper induction (BWI), a machine learning algorithm for information extraction from semi-structured documents, to the problem of named entity recognition. The default feature set of BWI is augmented with features based on distributional term clusters induced from a large unlabeled text corpus. Using no traditional linguistic resources, such as syntactic tags or specialpurpose gazetteers, this approach yields results near the state of the art in the MUC 6 named entity domain. Supervised learning using features derived through unsupervised corpus analysis may be regarded as an alternative to bootstrapping methods.},
 bibtype = {article},
 author = {Freitag, Dayne},
 journal = {Proceedings of EMNLP}
}

Downloads: 0

{"_id":"inkCEWxQoY8h7nvQa","bibbaseid":"freitag-trainednamedentityrecognitionusingdistributionalclusters-2004","authorIDs":[],"author_short":["Freitag, D."],"bibdata":{"title":"Trained named entity recognition using distributional clusters","type":"article","year":"2004","volume":"4","websites":"http://acl.ldc.upenn.edu/acl2004/emnlp/pdf/Freitag.pdf","id":"d65ce704-28d3-36bc-a753-57e773bef807","created":"2012-02-28T00:51:15.000Z","file_attached":false,"profile_id":"5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6","group_id":"066b42c8-f712-3fc3-abb2-225c158d2704","last_modified":"2017-03-14T14:36:19.698Z","tags":"named entity recognition","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"citation_key":"Freitag2004","private_publication":false,"abstract":"This work applies boosted wrapper induction (BWI), a machine learning algorithm for information extraction from semi-structured documents, to the problem of named entity recognition. The default feature set of BWI is augmented with features based on distributional term clusters induced from a large unlabeled text corpus. Using no traditional linguistic resources, such as syntactic tags or specialpurpose gazetteers, this approach yields results near the state of the art in the MUC 6 named entity domain. Supervised learning using features derived through unsupervised corpus analysis may be regarded as an alternative to bootstrapping methods.","bibtype":"article","author":"Freitag, Dayne","journal":"Proceedings of EMNLP","bibtex":"@article{\n title = {Trained named entity recognition using distributional clusters},\n type = {article},\n year = {2004},\n volume = {4},\n websites = {http://acl.ldc.upenn.edu/acl2004/emnlp/pdf/Freitag.pdf},\n id = {d65ce704-28d3-36bc-a753-57e773bef807},\n created = {2012-02-28T00:51:15.000Z},\n file_attached = {false},\n profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},\n group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},\n last_modified = {2017-03-14T14:36:19.698Z},\n tags = {named entity recognition},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n citation_key = {Freitag2004},\n private_publication = {false},\n abstract = {This work applies boosted wrapper induction (BWI), a machine learning algorithm for information extraction from semi-structured documents, to the problem of named entity recognition. The default feature set of BWI is augmented with features based on distributional term clusters induced from a large unlabeled text corpus. Using no traditional linguistic resources, such as syntactic tags or specialpurpose gazetteers, this approach yields results near the state of the art in the MUC 6 named entity domain. Supervised learning using features derived through unsupervised corpus analysis may be regarded as an alternative to bootstrapping methods.},\n bibtype = {article},\n author = {Freitag, Dayne},\n journal = {Proceedings of EMNLP}\n}","author_short":["Freitag, D."],"urls":{"Website":"http://acl.ldc.upenn.edu/acl2004/emnlp/pdf/Freitag.pdf"},"bibbaseid":"freitag-trainednamedentityrecognitionusingdistributionalclusters-2004","role":"author","downloads":0,"html":""},"bibtype":"article","creationDate":"2020-02-06T23:48:12.095Z","downloads":0,"keywords":[],"search_terms":["trained","named","entity","recognition","using","distributional","clusters","freitag"],"title":"Trained named entity recognition using distributional clusters","year":2004}