Organizing and Searching the World Wide Web of Facts - Step One: The One-Million Fact Extraction Challenge. Pasca, M., Lin, D., Bigham, J., Lifchits, A., & Jain, A. In AAAI 2006, 2006.
abstract   bibtex   
Due to the inherent difficulty of processing noisy text, the potential of the Web as a decentralized repository of human knowledge remains largely untapped during Web search. The access to billions of binary relations among named entities would enable new search paradigms and alternative methods for presenting the search results. A first concrete step towards building large searchable repositories of factual knowledge is to derive such knowledge automatically at large scale from textual documents. Generalized contextual extraction pat- terns allow for fast iterative progression towards extracting one million facts of a given type (e.g., Person-BornIn-Year) from 100 million Web documents of arbitrary quality. The extraction starts from as few as 10 seed facts, requires no ad- ditional input knowledge or annotated text, and emphasizes scale and coverage by avoiding the use of syntactic parsers, named entity recognizers, gazetteers, and similar text pro- cessing tools and resources.
@inProceedings{
 title = {Organizing and Searching the World Wide Web of Facts - Step One: The One-Million Fact Extraction Challenge},
 type = {inProceedings},
 year = {2006},
 id = {e805ca89-ca66-38aa-88b0-66898b4288bd},
 created = {2011-02-24T21:47:51.000Z},
 file_attached = {false},
 profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
 group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
 last_modified = {2017-03-14T14:36:19.698Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Pasca2006},
 private_publication = {false},
 abstract = {Due to the inherent difficulty of processing noisy text, the potential of the Web as a decentralized repository of human knowledge remains largely untapped during Web search. The access to billions of binary relations among named entities would enable new search paradigms and alternative methods for presenting the search results. A first concrete step towards building large searchable repositories of factual knowledge is to derive such knowledge automatically at large scale from textual documents. Generalized contextual extraction pat- terns allow for fast iterative progression towards extracting one million facts of a given type (e.g., Person-BornIn-Year) from 100 million Web documents of arbitrary quality. The extraction starts from as few as 10 seed facts, requires no ad- ditional input knowledge or annotated text, and emphasizes scale and coverage by avoiding the use of syntactic parsers, named entity recognizers, gazetteers, and similar text pro- cessing tools and resources.},
 bibtype = {inProceedings},
 author = {Pasca, Marius and Lin, Dekang and Bigham, Jeffrey and Lifchits, Andrei and Jain, Alpa},
 booktitle = {AAAI 2006}
}

Downloads: 0