{"_id":"8gtx3r2Go3svpTFvi","bibbaseid":"cohen-ravikumar-fienberg-acomparisonofstringmetricsformatchingnamesandrecords-2003","downloads":0,"creationDate":"2015-12-18T06:38:06.185Z","title":"A Comparison of String Metrics for Matching Names and Records","author_short":["Cohen, W. W.","Ravikumar, P.","Fienberg, S. E."],"year":2003,"bibtype":"inproceedings","biburl":"http://www.bibsonomy.org/bib/author/cohen?items=1000","bibdata":{"bibtype":"inproceedings","type":"inproceedings","abstract":"We describe an open-source Java toolkit of methods for matching names and records. We summarize results obtained from using various string distance metrics on the task of matching entity names. These metrics include distance functions proposed by several different communities, such as edit-distance metrics, fast heuristic string comparators, token-based distance metrics, and hybrid methods. We then describe an extension to the toolkit which allows records to be compared. We discuss some issues involved in performing a similar comparison for record-matching techniques, and finally present results for some baseline record-matching algorithms that aggregate string comparisons between fields","added-at":"2015-11-04T11:59:08.000+0100","author":[{"propositions":[],"lastnames":["Cohen"],"firstnames":["William","W."],"suffixes":[]},{"propositions":[],"lastnames":["Ravikumar"],"firstnames":["Pradeep"],"suffixes":[]},{"propositions":[],"lastnames":["Fienberg"],"firstnames":["Stephen","E."],"suffixes":[]}],"biburl":"http://www.bibsonomy.org/bibtex/2f0330353b21308916dbb49e1af7c4f1c/jaeschke","booktitle":"KDD Workshop on Data Cleaning and Object Consolidation","interhash":"45ce8df32fdc0f589510bb315a46bce0","intrahash":"f0330353b21308916dbb49e1af7c4f1c","keywords":"measure comparison distance string record similarity name disambiguation matching","timestamp":"2015-11-04T11:59:08.000+0100","title":"A Comparison of String Metrics for Matching Names and Records","url":"https://www.cs.cmu.edu/afs/cs/Web/People/wcohen/postscript/kdd-2003-match-ws.pdf","year":"2003","bibtex":"@inproceedings{cohen2003comparison,\n abstract = {We describe an open-source Java toolkit of methods for matching names and records. We summarize results obtained from using various string distance metrics on the task of matching entity names. These metrics include distance functions proposed by several different communities, such as edit-distance metrics, fast heuristic string comparators, token-based distance metrics, and hybrid methods. We then describe an extension to the toolkit which allows records to be compared. We discuss some issues involved in performing a similar comparison for record-matching techniques, and finally present results for some baseline record-matching algorithms that aggregate string comparisons between fields},\n added-at = {2015-11-04T11:59:08.000+0100},\n author = {Cohen, William W. and Ravikumar, Pradeep and Fienberg, Stephen E.},\n biburl = {http://www.bibsonomy.org/bibtex/2f0330353b21308916dbb49e1af7c4f1c/jaeschke},\n booktitle = {KDD Workshop on Data Cleaning and Object Consolidation},\n interhash = {45ce8df32fdc0f589510bb315a46bce0},\n intrahash = {f0330353b21308916dbb49e1af7c4f1c},\n keywords = {measure comparison distance string record similarity name disambiguation matching},\n timestamp = {2015-11-04T11:59:08.000+0100},\n title = {A Comparison of String Metrics for Matching Names and Records},\n url = {https://www.cs.cmu.edu/afs/cs/Web/People/wcohen/postscript/kdd-2003-match-ws.pdf},\n year = 2003\n}\n\n","author_short":["Cohen, W. W.","Ravikumar, P.","Fienberg, S. E."],"key":"cohen2003comparison","id":"cohen2003comparison","bibbaseid":"cohen-ravikumar-fienberg-acomparisonofstringmetricsformatchingnamesandrecords-2003","role":"author","urls":{"Paper":"https://www.cs.cmu.edu/afs/cs/Web/People/wcohen/postscript/kdd-2003-match-ws.pdf"},"keyword":["measure comparison distance string record similarity name disambiguation matching"],"downloads":0},"search_terms":["comparison","string","metrics","matching","names","records","cohen","ravikumar","fienberg"],"keywords":["measure comparison distance string record similarity name disambiguation matching"],"authorIDs":[],"dataSources":["DY7iBA8E4Z4ZyQgtk"]}