Privacy-preserving generative deep neural networks support clinical data sharing. Beaulieu-Jones, B. K., Wu, Z. S., Williams, C., Lee, R., Bhavnani, S. P., Byrd, J. B., & Greene, C. S. Circulation: Cardiovascular Quality and Outcomes, 12(7):e005122, 2019. Publisher: American Heart Association
Privacy-preserving generative deep neural networks support clinical data sharing [link]Paper  doi  abstract   bibtex   
Background: Data sharing accelerates scientific progress but sharing individual-level data while preserving patient privacy presents a barrier. Methods and Results: Using pairs of deep neural networks, we generated simulated, synthetic participants that closely resemble participants of the SPRINT trial (Systolic Blood Pressure Trial). We showed that such paired networks can be trained with differential privacy, a formal privacy framework that limits the likelihood that queries of the synthetic participants' data could identify a real a participant in the trial. Machine learning predictors built on the synthetic population generalize to the original data set. This finding suggests that the synthetic data can be shared with others, enabling them to perform hypothesis-generating analyses as though they had the original trial data. Conclusions: Deep neural networks that generate synthetic participants facilitate secondary analyses and reproducible investigation of clinical data sets by enhancing data sharing while preserving participant privacy.
@article{beaulieu-jones_privacy-preserving_2019,
	title = {Privacy-preserving generative deep neural networks support clinical data sharing},
	volume = {12},
	issn = {19417705},
	url = {http://dx.doi.org/10.1161/CIRCOUTCOMES.118.005122 https://www.ncbi.nlm.nih.gov/pubmed/31284738 http://www.ahajournals.org/doi/full/10.1161/CIRCOUTCOMES.118.005122?url_ver=Z39.88-2003&rfr_id=ori:rid:crossref.org&rfr_dat=cr_pub%3Dpubmed https://doi.org/10.1},
	doi = {10.1161/CIRCOUTCOMES.118.005122},
	abstract = {Background: Data sharing accelerates scientific progress but sharing individual-level data while preserving patient privacy presents a barrier. Methods and Results: Using pairs of deep neural networks, we generated simulated, synthetic participants that closely resemble participants of the SPRINT trial (Systolic Blood Pressure Trial). We showed that such paired networks can be trained with differential privacy, a formal privacy framework that limits the likelihood that queries of the synthetic participants' data could identify a real a participant in the trial. Machine learning predictors built on the synthetic population generalize to the original data set. This finding suggests that the synthetic data can be shared with others, enabling them to perform hypothesis-generating analyses as though they had the original trial data. Conclusions: Deep neural networks that generate synthetic participants facilitate secondary analyses and reproducible investigation of clinical data sets by enhancing data sharing while preserving participant privacy.},
	number = {7},
	journal = {Circulation: Cardiovascular Quality and Outcomes},
	author = {Beaulieu-Jones, Brett K. and Wu, Zhiwei Steven and Williams, Chris and Lee, Ran and Bhavnani, Sanjeev P. and Byrd, James Brian and Greene, Casey S.},
	year = {2019},
	pmid = {31284738},
	note = {Publisher: American Heart Association},
	keywords = {blood pressure, deep learning, machine learning, privacy, propensity score},
	pages = {e005122},
}

Downloads: 0