Combining linguistic and statistical analysis to extract relations from web documents

Combining linguistic and statistical analysis to extract relations from web documents. Suchanek, F., M., Ifrim, G., & Weikum, G. In Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining KDD 06, pages 712, 2006. ACM Press.

Paper

Combining linguistic and statistical analysis to extract relations from web documents [link]

Website abstract bibtex

The World Wide Web provides a nearly endless source of knowledge, which is mostly given in natural language. A first step towards exploiting this data automatically could be to extract pairs of a given semantic relation from text documents - for example all pairs of a person and her birthdate. One strategy for this task is to find text patterns that express the semantic relation, to generalize these patterns, and to apply them to a corpus to find new pairs. In this paper, we show that this approach profits significantly when deep linguistic structures are used instead of surface text patterns. We demonstrate how linguistic structures can be represented for machine learning, and we provide a theoretical analysis of the pattern matching approach. We show the benefits of our approach by extensive experiments with our prototype system LEILA.

@inProceedings{
 title = {Combining linguistic and statistical analysis to extract relations from web documents},
 type = {inProceedings},
 year = {2006},
 identifiers = {[object Object]},
 pages = {712},
 issue = {April},
 websites = {http://portal.acm.org/citation.cfm?doid=1150402.1150492},
 publisher = {ACM Press},
 id = {a21db1fe-e3ac-3f64-a84c-4efa34b130ea},
 created = {2011-03-13T12:54:42.000Z},
 file_attached = {true},
 profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
 group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
 last_modified = {2017-03-14T14:36:19.698Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Suchanek2006},
 private_publication = {false},
 abstract = {The World Wide Web provides a nearly endless source of knowledge, which is mostly given in natural language. A first step towards exploiting this data automatically could be to extract pairs of a given semantic relation from text documents - for example all pairs of a person and her birthdate. One strategy for this task is to find text patterns that express the semantic relation, to generalize these patterns, and to apply them to a corpus to find new pairs. In this paper, we show that this approach profits significantly when deep linguistic structures are used instead of surface text patterns. We demonstrate how linguistic structures can be represented for machine learning, and we provide a theoretical analysis of the pattern matching approach. We show the benefits of our approach by extensive experiments with our prototype system LEILA.},
 bibtype = {inProceedings},
 author = {Suchanek, Fabian M and Ifrim, Georgiana and Weikum, Gerhard},
 booktitle = {Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining KDD 06}
}

Downloads: 0

{"_id":"EY9sJshCcmsw5Duem","bibbaseid":"suchanek-ifrim-weikum-combininglinguisticandstatisticalanalysistoextractrelationsfromwebdocuments-2006","authorIDs":[],"author_short":["Suchanek, F., M.","Ifrim, G.","Weikum, G."],"bibdata":{"title":"Combining linguistic and statistical analysis to extract relations from web documents","type":"inProceedings","year":"2006","identifiers":"[object Object]","pages":"712","issue":"April","websites":"http://portal.acm.org/citation.cfm?doid=1150402.1150492","publisher":"ACM Press","id":"a21db1fe-e3ac-3f64-a84c-4efa34b130ea","created":"2011-03-13T12:54:42.000Z","file_attached":"true","profile_id":"5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6","group_id":"066b42c8-f712-3fc3-abb2-225c158d2704","last_modified":"2017-03-14T14:36:19.698Z","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"citation_key":"Suchanek2006","private_publication":false,"abstract":"The World Wide Web provides a nearly endless source of knowledge, which is mostly given in natural language. A first step towards exploiting this data automatically could be to extract pairs of a given semantic relation from text documents - for example all pairs of a person and her birthdate. One strategy for this task is to find text patterns that express the semantic relation, to generalize these patterns, and to apply them to a corpus to find new pairs. In this paper, we show that this approach profits significantly when deep linguistic structures are used instead of surface text patterns. We demonstrate how linguistic structures can be represented for machine learning, and we provide a theoretical analysis of the pattern matching approach. We show the benefits of our approach by extensive experiments with our prototype system LEILA.","bibtype":"inProceedings","author":"Suchanek, Fabian M and Ifrim, Georgiana and Weikum, Gerhard","booktitle":"Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining KDD 06","bibtex":"@inProceedings{\n title = {Combining linguistic and statistical analysis to extract relations from web documents},\n type = {inProceedings},\n year = {2006},\n identifiers = {[object Object]},\n pages = {712},\n issue = {April},\n websites = {http://portal.acm.org/citation.cfm?doid=1150402.1150492},\n publisher = {ACM Press},\n id = {a21db1fe-e3ac-3f64-a84c-4efa34b130ea},\n created = {2011-03-13T12:54:42.000Z},\n file_attached = {true},\n profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},\n group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},\n last_modified = {2017-03-14T14:36:19.698Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n citation_key = {Suchanek2006},\n private_publication = {false},\n abstract = {The World Wide Web provides a nearly endless source of knowledge, which is mostly given in natural language. A first step towards exploiting this data automatically could be to extract pairs of a given semantic relation from text documents - for example all pairs of a person and her birthdate. One strategy for this task is to find text patterns that express the semantic relation, to generalize these patterns, and to apply them to a corpus to find new pairs. In this paper, we show that this approach profits significantly when deep linguistic structures are used instead of surface text patterns. We demonstrate how linguistic structures can be represented for machine learning, and we provide a theoretical analysis of the pattern matching approach. We show the benefits of our approach by extensive experiments with our prototype system LEILA.},\n bibtype = {inProceedings},\n author = {Suchanek, Fabian M and Ifrim, Georgiana and Weikum, Gerhard},\n booktitle = {Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining KDD 06}\n}","author_short":["Suchanek, F., M.","Ifrim, G.","Weikum, G."],"urls":{"Paper":"https://bibbase.org/service/mendeley/bfdabac2-d7f2-3c5b-aa7a-06431c0ae35e/file/6d4127c5-181f-38b5-3c70-4e84ee17c118/2006-Combining_linguistic_and_statistical_analysis_to_extract_relations_from_web_documents.pdf.pdf","Website":"http://portal.acm.org/citation.cfm?doid=1150402.1150492"},"bibbaseid":"suchanek-ifrim-weikum-combininglinguisticandstatisticalanalysistoextractrelationsfromwebdocuments-2006","role":"author","downloads":0,"html":""},"bibtype":"inProceedings","creationDate":"2020-02-06T23:48:11.764Z","downloads":0,"keywords":[],"search_terms":["combining","linguistic","statistical","analysis","extract","relations","web","documents","suchanek","ifrim","weikum"],"title":"Combining linguistic and statistical analysis to extract relations from web documents","year":2006}