A globally optimal k-anonymity method for the de-identification of health data. El Emam, K., Dankar, F. K., Issa, R., Jonker, E., Amyot, D., Cogo, E., Corriveau, J., Walker, M., Chowdhury, S., Vaillancourt, R., Roffey, T., & Bottomley, J. Journal of the American Medical Informatics Association : JAMIA, 16(5):670--82, 2009. 00000 Paper doi abstract bibtex BACKGROUND: Explicit patient consent requirements in privacy laws can have a negative impact on health research, leading to selection bias and reduced recruitment. Often legislative requirements to obtain consent are waived if the information collected or disclosed is de-identified. OBJECTIVE: The authors developed and empirically evaluated a new globally optimal de-identification algorithm that satisfies the k-anonymity criterion and that is suitable for health datasets. DESIGN: Authors compared OLA (Optimal Lattice Anonymization) empirically to three existing k-anonymity algorithms, Datafly, Samarati, and Incognito, on six public, hospital, and registry datasets for different values of k and suppression limits. Measurement Three information loss metrics were used for the comparison: precision, discernability metric, and non-uniform entropy. Each algorithm's performance speed was also evaluated. RESULTS: The Datafly and Samarati algorithms had higher information loss than OLA and Incognito; OLA was consistently faster than Incognito in finding the globally optimal de-identification solution. CONCLUSIONS: For the de-identification of health datasets, OLA is an improvement on existing k-anonymity algorithms in terms of information loss and performance.
@article{el_emam_globally_2009,
title = {A globally optimal k-anonymity method for the de-identification of health data.},
volume = {16},
issn = {1527-974X},
url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2744718&tool=pmcentrez&rendertype=abstract},
doi = {10.1197/jamia.M3144},
abstract = {BACKGROUND: Explicit patient consent requirements in privacy laws can have a negative impact on health research, leading to selection bias and reduced recruitment. Often legislative requirements to obtain consent are waived if the information collected or disclosed is de-identified. OBJECTIVE: The authors developed and empirically evaluated a new globally optimal de-identification algorithm that satisfies the k-anonymity criterion and that is suitable for health datasets. DESIGN: Authors compared OLA (Optimal Lattice Anonymization) empirically to three existing k-anonymity algorithms, Datafly, Samarati, and Incognito, on six public, hospital, and registry datasets for different values of k and suppression limits. Measurement Three information loss metrics were used for the comparison: precision, discernability metric, and non-uniform entropy. Each algorithm's performance speed was also evaluated. RESULTS: The Datafly and Samarati algorithms had higher information loss than OLA and Incognito; OLA was consistently faster than Incognito in finding the globally optimal de-identification solution. CONCLUSIONS: For the de-identification of health datasets, OLA is an improvement on existing k-anonymity algorithms in terms of information loss and performance.},
number = {5},
journal = {Journal of the American Medical Informatics Association : JAMIA},
author = {El Emam, Khaled and Dankar, Fida Kamal and Issa, Romeo and Jonker, Elizabeth and Amyot, Daniel and Cogo, Elise and Corriveau, Jean-Pierre and Walker, Mark and Chowdhury, Sadrul and Vaillancourt, Regis and Roffey, Tyson and Bottomley, Jim},
year = {2009},
pmid = {19567795},
note = {00000 },
keywords = {Adolescent, Adult, Algorithms, Computerized, Confidentiality, Female, Humans, Information Storage and Retrieval, Male, Medical Records Systems},
pages = {670--82}
}
Downloads: 0
{"_id":"TwkrkyH4bRffTn3Jh","bibbaseid":"elemam-dankar-issa-jonker-amyot-cogo-corriveau-walker-etal-agloballyoptimalkanonymitymethodforthedeidentificationofhealthdata-2009","downloads":0,"creationDate":"2017-04-24T18:33:15.052Z","title":"A globally optimal k-anonymity method for the de-identification of health data.","author_short":["El Emam, K.","Dankar, F. K.","Issa, R.","Jonker, E.","Amyot, D.","Cogo, E.","Corriveau, J.","Walker, M.","Chowdhury, S.","Vaillancourt, R.","Roffey, T.","Bottomley, J."],"year":2009,"bibtype":"article","biburl":"http://bibbase.org/zotero/BobWong","bibdata":{"bibtype":"article","type":"article","title":"A globally optimal k-anonymity method for the de-identification of health data.","volume":"16","issn":"1527-974X","url":"http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2744718&tool=pmcentrez&rendertype=abstract","doi":"10.1197/jamia.M3144","abstract":"BACKGROUND: Explicit patient consent requirements in privacy laws can have a negative impact on health research, leading to selection bias and reduced recruitment. Often legislative requirements to obtain consent are waived if the information collected or disclosed is de-identified. OBJECTIVE: The authors developed and empirically evaluated a new globally optimal de-identification algorithm that satisfies the k-anonymity criterion and that is suitable for health datasets. DESIGN: Authors compared OLA (Optimal Lattice Anonymization) empirically to three existing k-anonymity algorithms, Datafly, Samarati, and Incognito, on six public, hospital, and registry datasets for different values of k and suppression limits. Measurement Three information loss metrics were used for the comparison: precision, discernability metric, and non-uniform entropy. Each algorithm's performance speed was also evaluated. RESULTS: The Datafly and Samarati algorithms had higher information loss than OLA and Incognito; OLA was consistently faster than Incognito in finding the globally optimal de-identification solution. CONCLUSIONS: For the de-identification of health datasets, OLA is an improvement on existing k-anonymity algorithms in terms of information loss and performance.","number":"5","journal":"Journal of the American Medical Informatics Association : JAMIA","author":[{"propositions":[],"lastnames":["El","Emam"],"firstnames":["Khaled"],"suffixes":[]},{"propositions":[],"lastnames":["Dankar"],"firstnames":["Fida","Kamal"],"suffixes":[]},{"propositions":[],"lastnames":["Issa"],"firstnames":["Romeo"],"suffixes":[]},{"propositions":[],"lastnames":["Jonker"],"firstnames":["Elizabeth"],"suffixes":[]},{"propositions":[],"lastnames":["Amyot"],"firstnames":["Daniel"],"suffixes":[]},{"propositions":[],"lastnames":["Cogo"],"firstnames":["Elise"],"suffixes":[]},{"propositions":[],"lastnames":["Corriveau"],"firstnames":["Jean-Pierre"],"suffixes":[]},{"propositions":[],"lastnames":["Walker"],"firstnames":["Mark"],"suffixes":[]},{"propositions":[],"lastnames":["Chowdhury"],"firstnames":["Sadrul"],"suffixes":[]},{"propositions":[],"lastnames":["Vaillancourt"],"firstnames":["Regis"],"suffixes":[]},{"propositions":[],"lastnames":["Roffey"],"firstnames":["Tyson"],"suffixes":[]},{"propositions":[],"lastnames":["Bottomley"],"firstnames":["Jim"],"suffixes":[]}],"year":"2009","pmid":"19567795","note":"00000 ","keywords":"Adolescent, Adult, Algorithms, Computerized, Confidentiality, Female, Humans, Information Storage and Retrieval, Male, Medical Records Systems","pages":"670--82","bibtex":"@article{el_emam_globally_2009,\n\ttitle = {A globally optimal k-anonymity method for the de-identification of health data.},\n\tvolume = {16},\n\tissn = {1527-974X},\n\turl = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2744718&tool=pmcentrez&rendertype=abstract},\n\tdoi = {10.1197/jamia.M3144},\n\tabstract = {BACKGROUND: Explicit patient consent requirements in privacy laws can have a negative impact on health research, leading to selection bias and reduced recruitment. Often legislative requirements to obtain consent are waived if the information collected or disclosed is de-identified. OBJECTIVE: The authors developed and empirically evaluated a new globally optimal de-identification algorithm that satisfies the k-anonymity criterion and that is suitable for health datasets. DESIGN: Authors compared OLA (Optimal Lattice Anonymization) empirically to three existing k-anonymity algorithms, Datafly, Samarati, and Incognito, on six public, hospital, and registry datasets for different values of k and suppression limits. Measurement Three information loss metrics were used for the comparison: precision, discernability metric, and non-uniform entropy. Each algorithm's performance speed was also evaluated. RESULTS: The Datafly and Samarati algorithms had higher information loss than OLA and Incognito; OLA was consistently faster than Incognito in finding the globally optimal de-identification solution. CONCLUSIONS: For the de-identification of health datasets, OLA is an improvement on existing k-anonymity algorithms in terms of information loss and performance.},\n\tnumber = {5},\n\tjournal = {Journal of the American Medical Informatics Association : JAMIA},\n\tauthor = {El Emam, Khaled and Dankar, Fida Kamal and Issa, Romeo and Jonker, Elizabeth and Amyot, Daniel and Cogo, Elise and Corriveau, Jean-Pierre and Walker, Mark and Chowdhury, Sadrul and Vaillancourt, Regis and Roffey, Tyson and Bottomley, Jim},\n\tyear = {2009},\n\tpmid = {19567795},\n\tnote = {00000 },\n\tkeywords = {Adolescent, Adult, Algorithms, Computerized, Confidentiality, Female, Humans, Information Storage and Retrieval, Male, Medical Records Systems},\n\tpages = {670--82}\n}\n\n","author_short":["El Emam, K.","Dankar, F. K.","Issa, R.","Jonker, E.","Amyot, D.","Cogo, E.","Corriveau, J.","Walker, M.","Chowdhury, S.","Vaillancourt, R.","Roffey, T.","Bottomley, J."],"key":"el_emam_globally_2009","id":"el_emam_globally_2009","bibbaseid":"elemam-dankar-issa-jonker-amyot-cogo-corriveau-walker-etal-agloballyoptimalkanonymitymethodforthedeidentificationofhealthdata-2009","role":"author","urls":{"Paper":"http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2744718&tool=pmcentrez&rendertype=abstract"},"keyword":["Adolescent","Adult","Algorithms","Computerized","Confidentiality","Female","Humans","Information Storage and Retrieval","Male","Medical Records Systems"],"downloads":0},"search_terms":["globally","optimal","anonymity","method","identification","health","data","el emam","dankar","issa","jonker","amyot","cogo","corriveau","walker","chowdhury","vaillancourt","roffey","bottomley"],"keywords":["adolescent","adult","algorithms","computerized","confidentiality","female","humans","information storage and retrieval","male","medical records systems"],"authorIDs":[],"dataSources":["PRu9F5KuG5QWYnRZL"]}