Scalable and distributed methods for entity matching, consolidation and disambiguation over linked data corpora. Hogan, A., Zimmermann, A., Umbrich, J., Polleres, A., & Decker, S. Journal of Web Semantics (JWS), 10:76–110, Elsevier, January, 2012. doi abstract bibtex With respect to large-scale, static, Linked Data corpora, in this paper we discuss scalable and distributed methods for: (i) entity consolidation—identifying entities that signify the same referent, aka. smushing, entity resolution, object consolidation, etc.—using explicit \textttowl:sameAs relations; (ii) extended entity consolidation based on a subset of OWL 2 RL/RDF rules—particularly over inverse-functional properties, functional-properties and (max-)cardinality restrictions with value one; (iii) deriving weighted concurrence measures between entities in the corpus based on shared inlinks/outlinks and attribute values using statistical analyses; (iv) disambiguating (initially) consolidated entities based on inconsistency detection using OWL 2 RL/RDF rules. Our methods are based upon distributed sorts and scans of the corpus, where we purposefully avoid the requirement for indexing all data. Throughout, we offer evaluation over a diverse Linked Data corpus consisting of 1.118 billion quadruples derived from a domain-agnostic, open crawl of 3.985 million RDF/XML Web documents, demonstrating the feasibility of our methods at that scale, and giving insights into the quality of the results for real-world data.
@article{hoga-etal-2011-ent-cons-JWS,
Abstract = {With respect to large-scale, static, Linked Data corpora, in this paper we discuss scalable and distributed methods for: (i) entity consolidation---identifying entities that signify the same referent, aka. smushing, entity resolution, object consolidation, etc.---using explicit \texttt{owl{:}sameAs} relations; (ii) extended entity consolidation based on a subset of OWL 2 RL/RDF rules---particularly over inverse-functional properties, functional-properties and (max-)cardinality restrictions with value one; (iii) deriving weighted concurrence measures between entities in the corpus based on shared inlinks/outlinks and attribute values using statistical analyses; (iv) disambiguating (initially) consolidated entities based on inconsistency detection using OWL 2 RL/RDF rules. Our methods are based upon distributed sorts and scans of the corpus, where we purposefully avoid the requirement for indexing all data. Throughout, we offer evaluation over a diverse Linked Data corpus consisting of 1.118 billion quadruples derived from a domain-agnostic, open crawl of 3.985 million RDF/XML Web documents, demonstrating the feasibility of our methods at that scale, and giving insights into the quality of the results for real-world data.},
Author = {Aidan Hogan and Antoine Zimmermann and J{\"u}rgen Umbrich and Axel Polleres and Stefan Decker},
Journal = JWS,
Month = jan,
Pages = {76--110},
Projects = {lion2},
Publisher = {Elsevier},
Title = {Scalable and distributed methods for entity matching, consolidation and disambiguation over linked data corpora},
doi = {https://doi.org/10.1016/j.websem.2011.11.002},
Type = JOURNAL,
Volume = {10},
Year = 2012
}
Downloads: 0
{"_id":"cisepHqqvmXsDgQvf","bibbaseid":"hogan-zimmermann-umbrich-polleres-decker-scalableanddistributedmethodsforentitymatchingconsolidationanddisambiguationoverlinkeddatacorpora-2012","downloads":0,"creationDate":"2015-06-01T14:50:03.154Z","title":"Scalable and distributed methods for entity matching, consolidation and disambiguation over linked data corpora","author_short":["Hogan, A.","Zimmermann, A.","Umbrich, J.","Polleres, A.","Decker, S."],"year":2012,"bibtype":"article","biburl":"www.polleres.net/mypublications.bib","bibdata":{"bibtype":"article","type":"journal","abstract":"With respect to large-scale, static, Linked Data corpora, in this paper we discuss scalable and distributed methods for: (i) entity consolidation—identifying entities that signify the same referent, aka. smushing, entity resolution, object consolidation, etc.—using explicit \\textttowl:sameAs relations; (ii) extended entity consolidation based on a subset of OWL 2 RL/RDF rules—particularly over inverse-functional properties, functional-properties and (max-)cardinality restrictions with value one; (iii) deriving weighted concurrence measures between entities in the corpus based on shared inlinks/outlinks and attribute values using statistical analyses; (iv) disambiguating (initially) consolidated entities based on inconsistency detection using OWL 2 RL/RDF rules. Our methods are based upon distributed sorts and scans of the corpus, where we purposefully avoid the requirement for indexing all data. Throughout, we offer evaluation over a diverse Linked Data corpus consisting of 1.118 billion quadruples derived from a domain-agnostic, open crawl of 3.985 million RDF/XML Web documents, demonstrating the feasibility of our methods at that scale, and giving insights into the quality of the results for real-world data.","author":[{"firstnames":["Aidan"],"propositions":[],"lastnames":["Hogan"],"suffixes":[]},{"firstnames":["Antoine"],"propositions":[],"lastnames":["Zimmermann"],"suffixes":[]},{"firstnames":["Jürgen"],"propositions":[],"lastnames":["Umbrich"],"suffixes":[]},{"firstnames":["Axel"],"propositions":[],"lastnames":["Polleres"],"suffixes":[]},{"firstnames":["Stefan"],"propositions":[],"lastnames":["Decker"],"suffixes":[]}],"journal":"Journal of Web Semantics (JWS)","month":"January","pages":"76–110","projects":"lion2","publisher":"Elsevier","title":"Scalable and distributed methods for entity matching, consolidation and disambiguation over linked data corpora","doi":"https://doi.org/10.1016/j.websem.2011.11.002","volume":"10","year":"2012","bibtex":"@article{hoga-etal-2011-ent-cons-JWS,\n\tAbstract = {With respect to large-scale, static, Linked Data corpora, in this paper we discuss scalable and distributed methods for: (i) entity consolidation---identifying entities that signify the same referent, aka. smushing, entity resolution, object consolidation, etc.---using explicit \\texttt{owl{:}sameAs} relations; (ii) extended entity consolidation based on a subset of OWL 2 RL/RDF rules---particularly over inverse-functional properties, functional-properties and (max-)cardinality restrictions with value one; (iii) deriving weighted concurrence measures between entities in the corpus based on shared inlinks/outlinks and attribute values using statistical analyses; (iv) disambiguating (initially) consolidated entities based on inconsistency detection using OWL 2 RL/RDF rules. Our methods are based upon distributed sorts and scans of the corpus, where we purposefully avoid the requirement for indexing all data. Throughout, we offer evaluation over a diverse Linked Data corpus consisting of 1.118 billion quadruples derived from a domain-agnostic, open crawl of 3.985 million RDF/XML Web documents, demonstrating the feasibility of our methods at that scale, and giving insights into the quality of the results for real-world data.},\n\tAuthor = {Aidan Hogan and Antoine Zimmermann and J{\\\"u}rgen Umbrich and Axel Polleres and Stefan Decker},\n\tJournal = JWS,\n\tMonth = jan,\n\tPages = {76--110},\n\tProjects = {lion2},\n\tPublisher = {Elsevier},\n\tTitle = {Scalable and distributed methods for entity matching, consolidation and disambiguation over linked data corpora},\n\tdoi = {https://doi.org/10.1016/j.websem.2011.11.002},\n\tType = JOURNAL,\n\tVolume = {10},\n\tYear = 2012\n\t}\n\n","author_short":["Hogan, A.","Zimmermann, A.","Umbrich, J.","Polleres, A.","Decker, S."],"key":"hoga-etal-2011-ent-cons-JWS","id":"hoga-etal-2011-ent-cons-JWS","bibbaseid":"hogan-zimmermann-umbrich-polleres-decker-scalableanddistributedmethodsforentitymatchingconsolidationanddisambiguationoverlinkeddatacorpora-2012","role":"author","urls":{},"metadata":{"authorlinks":{}},"downloads":0,"html":""},"search_terms":["scalable","distributed","methods","entity","matching","consolidation","disambiguation","over","linked","data","corpora","hogan","zimmermann","umbrich","polleres","decker"],"keywords":[],"authorIDs":["5461bd8e8a9aab071c00009d"],"dataSources":["CjYzfxp6QM8GorK7b","cBfwyqsLFQQMc4Fss","gixxkiKt6rtWGoKSh"]}