WebSets: extracting sets of entities from the web using unsupervised information extraction

WebSets: extracting sets of entities from the web using unsupervised information extraction. Dalvi, B., B., Cohen, W., W., & Callan, J. In Proceedings of the fifth ACM international conference on Web search and data mining - WSDM '12, pages 243, 2012. ACM Press.

Website abstract bibtex

We describe a open-domain information extraction method for extracting concept-instance pairs from an HTML corpus. Most earlier approaches to this problem rely on combining clusters of distributionally similar terms and concept-instance pairs obtained with Hearst patterns. In contrast, our method relies on a novel approach for clustering terms found in HTML tables, and then assigning concept names to these clusters using Hearst patterns. The method can be efficiently applied to a large corpus, and experimental results on several datasets show that our method can accurately extract large numbers of concept-instance pairs.

@inProceedings{
 title = {WebSets: extracting sets of entities from the web using unsupervised information extraction},
 type = {inProceedings},
 year = {2012},
 identifiers = {[object Object]},
 pages = {243},
 websites = {http://dl.acm.org/citation.cfm?doid=2124295.2124327},
 publisher = {ACM Press},
 city = {New York, New York, USA},
 id = {0778e0d0-e371-3ede-8eba-aac11d02526a},
 created = {2012-04-01T16:32:49.000Z},
 accessed = {2012-03-28},
 file_attached = {false},
 profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
 group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
 last_modified = {2017-03-14T14:36:19.698Z},
 tags = {named entities},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Dalvi2012},
 private_publication = {false},
 abstract = {We describe a open-domain information extraction method for extracting concept-instance pairs from an HTML corpus. Most earlier approaches to this problem rely on combining clusters of distributionally similar terms and concept-instance pairs obtained with Hearst patterns. In contrast, our method relies on a novel approach for clustering terms found in HTML tables, and then assigning concept names to these clusters using Hearst patterns. The method can be efficiently applied to a large corpus, and experimental results on several datasets show that our method can accurately extract large numbers of concept-instance pairs.},
 bibtype = {inProceedings},
 author = {Dalvi, Bhavana Bharat and Cohen, William W. and Callan, Jamie},
 booktitle = {Proceedings of the fifth ACM international conference on Web search and data mining - WSDM '12}
}

Downloads: 0

{"_id":"pWWDBY5rtFiAknWLg","bibbaseid":"dalvi-cohen-callan-websetsextractingsetsofentitiesfromthewebusingunsupervisedinformationextraction-2012","authorIDs":[],"author_short":["Dalvi, B., B.","Cohen, W., W.","Callan, J."],"bibdata":{"title":"WebSets: extracting sets of entities from the web using unsupervised information extraction","type":"inProceedings","year":"2012","identifiers":"[object Object]","pages":"243","websites":"http://dl.acm.org/citation.cfm?doid=2124295.2124327","publisher":"ACM Press","city":"New York, New York, USA","id":"0778e0d0-e371-3ede-8eba-aac11d02526a","created":"2012-04-01T16:32:49.000Z","accessed":"2012-03-28","file_attached":false,"profile_id":"5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6","group_id":"066b42c8-f712-3fc3-abb2-225c158d2704","last_modified":"2017-03-14T14:36:19.698Z","tags":"named entities","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"citation_key":"Dalvi2012","private_publication":false,"abstract":"We describe a open-domain information extraction method for extracting concept-instance pairs from an HTML corpus. Most earlier approaches to this problem rely on combining clusters of distributionally similar terms and concept-instance pairs obtained with Hearst patterns. In contrast, our method relies on a novel approach for clustering terms found in HTML tables, and then assigning concept names to these clusters using Hearst patterns. The method can be efficiently applied to a large corpus, and experimental results on several datasets show that our method can accurately extract large numbers of concept-instance pairs.","bibtype":"inProceedings","author":"Dalvi, Bhavana Bharat and Cohen, William W. and Callan, Jamie","booktitle":"Proceedings of the fifth ACM international conference on Web search and data mining - WSDM '12","bibtex":"@inProceedings{\n title = {WebSets: extracting sets of entities from the web using unsupervised information extraction},\n type = {inProceedings},\n year = {2012},\n identifiers = {[object Object]},\n pages = {243},\n websites = {http://dl.acm.org/citation.cfm?doid=2124295.2124327},\n publisher = {ACM Press},\n city = {New York, New York, USA},\n id = {0778e0d0-e371-3ede-8eba-aac11d02526a},\n created = {2012-04-01T16:32:49.000Z},\n accessed = {2012-03-28},\n file_attached = {false},\n profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},\n group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},\n last_modified = {2017-03-14T14:36:19.698Z},\n tags = {named entities},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n citation_key = {Dalvi2012},\n private_publication = {false},\n abstract = {We describe a open-domain information extraction method for extracting concept-instance pairs from an HTML corpus. Most earlier approaches to this problem rely on combining clusters of distributionally similar terms and concept-instance pairs obtained with Hearst patterns. In contrast, our method relies on a novel approach for clustering terms found in HTML tables, and then assigning concept names to these clusters using Hearst patterns. The method can be efficiently applied to a large corpus, and experimental results on several datasets show that our method can accurately extract large numbers of concept-instance pairs.},\n bibtype = {inProceedings},\n author = {Dalvi, Bhavana Bharat and Cohen, William W. and Callan, Jamie},\n booktitle = {Proceedings of the fifth ACM international conference on Web search and data mining - WSDM '12}\n}","author_short":["Dalvi, B., B.","Cohen, W., W.","Callan, J."],"urls":{"Website":"http://dl.acm.org/citation.cfm?doid=2124295.2124327"},"bibbaseid":"dalvi-cohen-callan-websetsextractingsetsofentitiesfromthewebusingunsupervisedinformationextraction-2012","role":"author","downloads":0,"html":""},"bibtype":"inProceedings","creationDate":"2020-02-06T23:48:12.133Z","downloads":0,"keywords":[],"search_terms":["websets","extracting","sets","entities","web","using","unsupervised","information","extraction","dalvi","cohen","callan"],"title":"WebSets: extracting sets of entities from the web using unsupervised information extraction","year":2012}