Scalable and Distributed Methods for Resolving, Consolidating, Matching and Disambiguating Entities in Linked Data Corpora. Hogan, A., Zimmermann, A., Umbrich, J., Polleres, A., & Decker, S. Journal of Web Semantics (JWS), 10:76–110, Elsevier, January, 2012. abstract bibtex With respect to large-scale, static, Linked Data corpora, in this paper we discuss scalable and distributed methods for: (i) entity consolidation—identifying entities that signify the same referent, aka. smushing, entity resolution, object consolidation, etc.—using explicit \textttowl:sameAs relations; (ii) extended entity consolidation based on a subset of OWL 2 RL/RDF rules—particularly over inverse-functional properties, functional-properties and (max-)cardinality restrictions with value one; (iii) deriving weighted concurrence measures between entities in the corpus based on shared inlinks/outlinks and attribute values using statistical analyses; (iv) disambiguating (initially) consolidated entities based on inconsistency detection using OWL 2 RL/RDF rules. Our methods are based upon distributed sorts and scans of the corpus, where we purposefully avoid the requirement for indexing all data. Throughout, we offer evaluation over a diverse Linked Data corpus consisting of 1.118 billion quadruples derived from a domain-agnostic, open crawl of 3.985 million RDF/XML Web documents, demonstrating the feasibility of our methods at that scale, and giving insights into the quality of the results for real-world data.
@article{hoga-etal-2011-ent-cons-JWS,
Abstract = {With respect to large-scale, static, Linked Data corpora, in this paper we discuss scalable and distributed methods for: (i) entity consolidation---identifying entities that signify the same referent, aka. smushing, entity resolution, object consolidation, etc.---using explicit \texttt{owl{:}sameAs} relations; (ii) extended entity consolidation based on a subset of OWL 2 RL/RDF rules---particularly over inverse-functional properties, functional-properties and (max-)cardinality restrictions with value one; (iii) deriving weighted concurrence measures between entities in the corpus based on shared inlinks/outlinks and attribute values using statistical analyses; (iv) disambiguating (initially) consolidated entities based on inconsistency detection using OWL 2 RL/RDF rules. Our methods are based upon distributed sorts and scans of the corpus, where we purposefully avoid the requirement for indexing all data. Throughout, we offer evaluation over a diverse Linked Data corpus consisting of 1.118 billion quadruples derived from a domain-agnostic, open crawl of 3.985 million RDF/XML Web documents, demonstrating the feasibility of our methods at that scale, and giving insights into the quality of the results for real-world data.},
Author = {Aidan Hogan and Antoine Zimmermann and J{\"u}rgen Umbrich and Axel Polleres and Stefan Decker},
Journal = JWS,
Month = jan,
Pages = {76--110},
Projects = {lion2},
Publisher = {Elsevier},
Title = {Scalable and Distributed Methods for Resolving, Consolidating, Matching and Disambiguating Entities in Linked Data Corpora},
Type = JOURNAL,
Volume = {10},
Year = 2012}
Downloads: 0
{"_id":"qqTARhYjg6beGWJ9N","bibbaseid":"hogan-zimmermann-umbrich-polleres-decker-scalableanddistributedmethodsforresolvingconsolidatingmatchinganddisambiguatingentitiesinlinkeddatacorpora-2012","downloads":0,"creationDate":"2015-12-16T06:35:17.671Z","title":"Scalable and Distributed Methods for Resolving, Consolidating, Matching and Disambiguating Entities in Linked Data Corpora","author_short":["Hogan, A.","Zimmermann, A.","Umbrich, J.","Polleres, A.","Decker, S."],"year":2012,"bibtype":"article","biburl":"www.polleres.net/mypublications.bib","bibdata":{"bibtype":"article","type":"journal","abstract":"With respect to large-scale, static, Linked Data corpora, in this paper we discuss scalable and distributed methods for: (i) entity consolidation—identifying entities that signify the same referent, aka. smushing, entity resolution, object consolidation, etc.—using explicit \\textttowl:sameAs relations; (ii) extended entity consolidation based on a subset of OWL 2 RL/RDF rules—particularly over inverse-functional properties, functional-properties and (max-)cardinality restrictions with value one; (iii) deriving weighted concurrence measures between entities in the corpus based on shared inlinks/outlinks and attribute values using statistical analyses; (iv) disambiguating (initially) consolidated entities based on inconsistency detection using OWL 2 RL/RDF rules. Our methods are based upon distributed sorts and scans of the corpus, where we purposefully avoid the requirement for indexing all data. Throughout, we offer evaluation over a diverse Linked Data corpus consisting of 1.118 billion quadruples derived from a domain-agnostic, open crawl of 3.985 million RDF/XML Web documents, demonstrating the feasibility of our methods at that scale, and giving insights into the quality of the results for real-world data.","author":[{"firstnames":["Aidan"],"propositions":[],"lastnames":["Hogan"],"suffixes":[]},{"firstnames":["Antoine"],"propositions":[],"lastnames":["Zimmermann"],"suffixes":[]},{"firstnames":["Jürgen"],"propositions":[],"lastnames":["Umbrich"],"suffixes":[]},{"firstnames":["Axel"],"propositions":[],"lastnames":["Polleres"],"suffixes":[]},{"firstnames":["Stefan"],"propositions":[],"lastnames":["Decker"],"suffixes":[]}],"journal":"Journal of Web Semantics (JWS)","month":"January","pages":"76–110","projects":"lion2","publisher":"Elsevier","title":"Scalable and Distributed Methods for Resolving, Consolidating, Matching and Disambiguating Entities in Linked Data Corpora","volume":"10","year":"2012","bibtex":"@article{hoga-etal-2011-ent-cons-JWS,\n\tAbstract = {With respect to large-scale, static, Linked Data corpora, in this paper we discuss scalable and distributed methods for: (i) entity consolidation---identifying entities that signify the same referent, aka. smushing, entity resolution, object consolidation, etc.---using explicit \\texttt{owl{:}sameAs} relations; (ii) extended entity consolidation based on a subset of OWL 2 RL/RDF rules---particularly over inverse-functional properties, functional-properties and (max-)cardinality restrictions with value one; (iii) deriving weighted concurrence measures between entities in the corpus based on shared inlinks/outlinks and attribute values using statistical analyses; (iv) disambiguating (initially) consolidated entities based on inconsistency detection using OWL 2 RL/RDF rules. Our methods are based upon distributed sorts and scans of the corpus, where we purposefully avoid the requirement for indexing all data. Throughout, we offer evaluation over a diverse Linked Data corpus consisting of 1.118 billion quadruples derived from a domain-agnostic, open crawl of 3.985 million RDF/XML Web documents, demonstrating the feasibility of our methods at that scale, and giving insights into the quality of the results for real-world data.},\n\tAuthor = {Aidan Hogan and Antoine Zimmermann and J{\\\"u}rgen Umbrich and Axel Polleres and Stefan Decker},\n\tJournal = JWS,\n\tMonth = jan,\n\tPages = {76--110},\n\tProjects = {lion2},\n\tPublisher = {Elsevier},\n\tTitle = {Scalable and Distributed Methods for Resolving, Consolidating, Matching and Disambiguating Entities in Linked Data Corpora},\n\tType = JOURNAL,\n\tVolume = {10},\n\tYear = 2012}\n\n","author_short":["Hogan, A.","Zimmermann, A.","Umbrich, J.","Polleres, A.","Decker, S."],"key":"hoga-etal-2011-ent-cons-JWS","id":"hoga-etal-2011-ent-cons-JWS","bibbaseid":"hogan-zimmermann-umbrich-polleres-decker-scalableanddistributedmethodsforresolvingconsolidatingmatchinganddisambiguatingentitiesinlinkeddatacorpora-2012","role":"author","urls":{},"metadata":{"authorlinks":{"polleres, a":"https://bibbase.org/show?bib=www.polleres.net/mypublications.bib"}}},"search_terms":["scalable","distributed","methods","resolving","consolidating","matching","disambiguating","entities","linked","data","corpora","hogan","zimmermann","umbrich","polleres","decker"],"keywords":[],"authorIDs":["545720922abc8e9f370000ae","5PFMiHGwfvbGBZwWF","5de7280d97054edf010000c3","5e02b1a419da8edf01000028","5e048450db7916df010000b1","5e06d565a0810cde0100009b","5e10e27445c12cde01000062","5e123345c196d3de01000074","5e14ba61e55ed8de01000072","5e189b4e779abfdf0100013f","5e216f7e5a651cdf010000eb","5e25b9fdf299d4de01000001","5e2d64605e7586df01000083","5e36e5e9b26a0fde0100005e","5e37d23b56571fde010000de","5e4ded1052c311f20100018e","5e51a3102793ecde010000e0","5e59a6b5ad6c7fde01000114","5e5d588ead47bcde01000072","5e60e857839e59df010000f1","A5AFuDAiNR4HEYiFD","BtzwZ6TFPsASbdqvo","DLdeXAmrbA4niYQzH","FyLDFGg993nDS2Spf","NCjPvWahWRjdP3ghB","XcyP3jptz7zE4ZLws","aiXjXMLP63k5WCt84","fTDcT5K3oSTcdxSBj","fbKNfWffDzdzubrER","haaAs2rQaQA7EaZva","nQX2P8WzFeKwcpLqd","nuWuyLnGu7YzMrn4d","pfENTBFWo85mRy3ik","rX6EShFR2rMFmQL2C","w6wHZukTjqqera7BR","woa42kCD35yCmdQTj","yPgvarsL7KAT9yfZd","yzkCNJMYNL8B3bni2","zDG3tj87ZfYXo7u9c"],"dataSources":["cBfwyqsLFQQMc4Fss","QfLT6siHZuHw9MqvK"]}