Acquisition of categorized named entities for web search. Pasca, M. In Proceedings of the Thirteenth ACM conference on Information and knowledge management CIKM 04, of CIKM '04, pages 137-145, 2004. ACM Press.
Acquisition of categorized named entities for web search [link]Website  abstract   bibtex   
The recognition of names and their associated categories within unstructured text traditionally relies on semantic lexicons and gazetteers. The amount of effort required to assemble large lexicons confines the recognition to either a limited domain (e.g., medical imaging), or a small set of pre-defined, broader categories of interest (e.g., persons, countries, organizations, products). This constitutes a serious limitation in an information seeking context. In this case, the categories of potential interest to users are more diverse (universities, agencies, retailers, celebrities), often refined (e.g., SLR digital cameras, programming languages, multinational oil companies), and usually overlapping (e.g., the same entity may be concurrently a brand name, a technology company, and an industry leader). We present a lightly supervised method for acquiring named entities in arbitrary categories. The method applies lightweight lexico-syntactic extraction patterns to the unstructured text of Web documents. The method is a departure from traditional approaches to named entity recognition in that: 1) it does not require any start-up seed names or training; 2) it does not encode any domain knowledge in its extraction patterns; 3) it is only lightly supervised, and data-driven; 4) it does not impose any a-priori restriction on the categories of extracted names. We illustrate applications of the method in Web search, and describe experiments on 500 million Web documents and news articles.
@inProceedings{
 title = {Acquisition of categorized named entities for web search},
 type = {inProceedings},
 year = {2004},
 identifiers = {[object Object]},
 keywords = {entity extraction,information,lightweight text processing,named,related names categories,web information retrieval},
 pages = {137-145},
 websites = {http://portal.acm.org/citation.cfm?doid=1031171.1031194},
 publisher = {ACM Press},
 series = {CIKM '04},
 id = {ac4878ca-d282-3f69-956e-bed85660e197},
 created = {2011-12-28T07:04:55.000Z},
 file_attached = {false},
 profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
 group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
 last_modified = {2017-03-14T14:36:19.698Z},
 tags = {named entities},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Pasca2004},
 private_publication = {false},
 abstract = {The recognition of names and their associated categories within unstructured text traditionally relies on semantic lexicons and gazetteers. The amount of effort required to assemble large lexicons confines the recognition to either a limited domain (e.g., medical imaging), or a small set of pre-defined, broader categories of interest (e.g., persons, countries, organizations, products). This constitutes a serious limitation in an information seeking context. In this case, the categories of potential interest to users are more diverse (universities, agencies, retailers, celebrities), often refined (e.g., SLR digital cameras, programming languages, multinational oil companies), and usually overlapping (e.g., the same entity may be concurrently a brand name, a technology company, and an industry leader). We present a lightly supervised method for acquiring named entities in arbitrary categories. The method applies lightweight lexico-syntactic extraction patterns to the unstructured text of Web documents. The method is a departure from traditional approaches to named entity recognition in that: 1) it does not require any start-up seed names or training; 2) it does not encode any domain knowledge in its extraction patterns; 3) it is only lightly supervised, and data-driven; 4) it does not impose any a-priori restriction on the categories of extracted names. We illustrate applications of the method in Web search, and describe experiments on 500 million Web documents and news articles.},
 bibtype = {inProceedings},
 author = {Pasca, Marius},
 booktitle = {Proceedings of the Thirteenth ACM conference on Information and knowledge management CIKM 04}
}

Downloads: 0