var bibbase_data = {"data":"\"Loading..\"\n\n
\n\n \n\n \n\n \n \n\n \n\n \n \n\n \n\n \n
\n generated by\n \n \"bibbase.org\"\n\n \n
\n \n\n
\n\n \n\n\n
\n\n Excellent! Next you can\n create a new website with this list, or\n embed it in an existing web page by copying & pasting\n any of the following snippets.\n\n
\n JavaScript\n (easiest)\n
\n \n <script src=\"https://bibbase.org/show?bib=http%3A%2F%2Fpeople.csail.mit.edu%2Fdsontag%2Fpapers%2Fbibtex%2Fdavid_sontag_papers_all.bib&jsonp=1&&filter=keywords:Health%20care&jsonp=1\"></script>\n \n
\n\n PHP\n
\n \n <?php\n $contents = file_get_contents(\"https://bibbase.org/show?bib=http%3A%2F%2Fpeople.csail.mit.edu%2Fdsontag%2Fpapers%2Fbibtex%2Fdavid_sontag_papers_all.bib&jsonp=1&&filter=keywords:Health%20care\");\n print_r($contents);\n ?>\n \n
\n\n iFrame\n (not recommended)\n
\n \n <iframe src=\"https://bibbase.org/show?bib=http%3A%2F%2Fpeople.csail.mit.edu%2Fdsontag%2Fpapers%2Fbibtex%2Fdavid_sontag_papers_all.bib&jsonp=1&&filter=keywords:Health%20care\"></iframe>\n \n
\n\n

\n For more details see the documention.\n

\n
\n
\n\n
\n\n This is a preview! To use this list on your own web site\n or create a new web site from it,\n create a free account. The file will be added\n and you will be able to edit it in the File Manager.\n We will show you instructions once you've created your account.\n
\n\n
\n\n

To the site owner:

\n\n

Action required! Mendeley is changing its\n API. In order to keep using Mendeley with BibBase past April\n 14th, you need to:\n

    \n
  1. renew the authorization for BibBase on Mendeley, and
  2. \n
  3. update the BibBase URL\n in your page the same way you did when you initially set up\n this page.\n
  4. \n
\n

\n\n

\n \n \n Fix it now\n

\n
\n\n
\n\n\n
\n \n \n
\n
\n  \n 2018\n \n \n (3)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Why Is My Classifier Discriminatory?.\n \n \n \n \n\n\n \n Chen, I.; Johansson, F. D.; and Sontag, D.\n\n\n \n\n\n\n ArXiv e-prints arXiv:1805.12002. 2018.\n \n\n\n\n
\n\n\n\n \n \n \"Why paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 8 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@article{ChenJohanssonSontag_arxiv18,\n  author = {Irene Chen and Fredrik D. Johansson and David Sontag},\n  title = {Why Is My Classifier Discriminatory?},\n  journal = {ArXiv e-prints arXiv:1805.12002},\n  archivePrefix = "arXiv",\n  eprint = {1805.12002},\n  primaryClass = "stat.ML",\n  year = 2018,\n  keywords = {Machine learning, Health care},\n  url_Paper = {https://arxiv.org/pdf/1805.12002.pdf},\n  abstract = {Recent attempts to achieve fairness in predictive models focus on the balance between fairness and accuracy. In sensitive applications such as healthcare or criminal justice, this trade-off is often undesirable as any increase in prediction error could have devastating consequences. In this work, we argue that the fairness of predictions should be evaluated in context of the data, and that unfairness induced by inadequate samples sizes or unmeasured predictive variables should be addressed through data collection, rather than by constraining the model. We decompose cost-based metrics of discrimination into bias, variance, and noise, and propose actions aimed at estimating and reducing each term. Finally, we perform case-studies on prediction of income, mortality, and review ratings, confirming the value of this analysis. We find that data collection is often a means to reduce discrimination without sacrificing accuracy.}\n}\n\n\n
\n
\n\n\n
\n Recent attempts to achieve fairness in predictive models focus on the balance between fairness and accuracy. In sensitive applications such as healthcare or criminal justice, this trade-off is often undesirable as any increase in prediction error could have devastating consequences. In this work, we argue that the fairness of predictions should be evaluated in context of the data, and that unfairness induced by inadequate samples sizes or unmeasured predictive variables should be addressed through data collection, rather than by constraining the model. We decompose cost-based metrics of discrimination into bias, variance, and noise, and propose actions aimed at estimating and reducing each term. Finally, we perform case-studies on prediction of income, mortality, and review ratings, confirming the value of this analysis. We find that data collection is often a means to reduce discrimination without sacrificing accuracy.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Recurrent Neural Networks for Multivariate Time Series with Missing Values.\n \n \n \n \n\n\n \n Che, Z.; Purushotham, S.; Cho, K.; Sontag, D.; and Liu, Y.\n\n\n \n\n\n\n Nature Scientific Reports, 8(1): 6085. 2018.\n \n\n\n\n
\n\n\n\n \n \n \"Recurrent paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 5 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{CheEtAl_nature_sr18,\n\tAuthor = {Che, Zhengping and Purushotham, Sanjay and Cho, Kyunghyun and Sontag, David and Liu, Yan},\n\tJournal = {Nature Scientific Reports},\n\tNumber = {1},\n\tPages = {6085},\n\tTitle = {Recurrent Neural Networks for Multivariate Time Series with Missing Values},\n\tVolume = {8},\n\tYear = {2018},\n        keywords = {Health care, Machine learning, Deep learning},\n        url_Paper = {https://www.nature.com/articles/s41598-018-24271-9},\n\tabstract = {Multivariate time series data in practical applications, such as health care, geoscience, and biology, are characterized by a variety of missing values. In time series prediction and other related tasks, it has been noted that missing values and their missing patterns are often correlated with the target labels, a.k.a., informative missingness. There is very limited work on exploiting the missing patterns for effective imputation and improving prediction performance. In this paper, we develop novel deep learning models, namely GRU-D, as one of the early attempts. GRU-D is based on Gated Recurrent Unit (GRU), a state-of-the-art recurrent neural network. It takes two representations of missing patterns, i.e., masking and time interval, and effectively incorporates them into a deep model architecture so that it not only captures the long-term temporal dependencies in time series, but also utilizes the missing patterns to achieve better prediction results. Experiments of time series classification tasks on real-world clinical datasets (MIMIC-III, PhysioNet) and synthetic datasets demonstrate that our models achieve state-of-the-art performance and provide useful insights for better understanding and utilization of missing values in time series analysis.},\n}\n\n
\n
\n\n\n
\n Multivariate time series data in practical applications, such as health care, geoscience, and biology, are characterized by a variety of missing values. In time series prediction and other related tasks, it has been noted that missing values and their missing patterns are often correlated with the target labels, a.k.a., informative missingness. There is very limited work on exploiting the missing patterns for effective imputation and improving prediction performance. In this paper, we develop novel deep learning models, namely GRU-D, as one of the early attempts. GRU-D is based on Gated Recurrent Unit (GRU), a state-of-the-art recurrent neural network. It takes two representations of missing patterns, i.e., masking and time interval, and effectively incorporates them into a deep model architecture so that it not only captures the long-term temporal dependencies in time series, but also utilizes the missing patterns to achieve better prediction results. Experiments of time series classification tasks on real-world clinical datasets (MIMIC-III, PhysioNet) and synthetic datasets demonstrate that our models achieve state-of-the-art performance and provide useful insights for better understanding and utilization of missing values in time series analysis.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Cell-specific prediction and application of drug-induced gene expression profiles.\n \n \n \n \n\n\n \n Hodos, R.; Zhang, P.; Lee, H. C.; Duan, Q.; Wang, Z.; Clark, N. R.; Ma'ayan, A.; Wang, F.; Kidd, B.; Hu, J.; Sontag, D.; and Dudley, J.\n\n\n \n\n\n\n In Proceedings of the Pacific Symposium on Biocomputing (PSB), 2018. \n \n\n\n\n
\n\n\n\n \n \n \"Cell-specific paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{HodosEtAl_PSB17,\n author = {Rachel Hodos and Ping Zhang and Hao Chih Lee and Qiaonan Duan and Zichen Wang and Neil R. Clark and Avi Ma'ayan and Fei Wang and Brian Kidd and Jianying Hu and David Sontag and Joel Dudley},\n title = {Cell-specific prediction and application of drug-induced gene expression profiles},\n booktitle = {Proceedings of the Pacific Symposium on Biocomputing (PSB)},\n year = {2018},\n url_Paper = {http://people.csail.mit.edu/dsontag/papers/HodosEtAl_PSB18.pdf},\n keywords = {Computational biology, Health care},\n abstract = {Gene expression profiling of in vitro drug perturbations is useful for many biomedical discovery applications including drug repurposing and elucidation of drug mechanisms. However, limited data availability across cell types has hindered our capacity to leverage or explore the cell specificity of these perturbations. While recent efforts have generated a large number of drug perturbation profiles across a variety of human cell types, many gaps remain in this combinatorial drug-cell space. Hence, we asked whether it is possible to fill these gaps by predicting cell-specific drug perturbation profiles using available expression data from related conditions -- i.e. from other drugs and cell types. We developed a computational framework that first arranges existing profiles into a three-dimensional array (or tensor) indexed by drugs, genes, and cell types, and then uses either local (nearest-neighbors) or global (tensor completion) information to predict unmeasured profiles. We evaluate prediction accuracy using a variety of metrics, and find that the two methods have complementary performance, each superior in different regions in the drug-cell space. Predictions achieve correlations of 0.68 with true values, and maintain accurate differentially expressed genes (AUC 0.81). Finally, we demonstrate that the predicted profiles add value for making downstream associations with drug targets and therapeutic classes.}\n}\n\n\n
\n
\n\n\n
\n Gene expression profiling of in vitro drug perturbations is useful for many biomedical discovery applications including drug repurposing and elucidation of drug mechanisms. However, limited data availability across cell types has hindered our capacity to leverage or explore the cell specificity of these perturbations. While recent efforts have generated a large number of drug perturbation profiles across a variety of human cell types, many gaps remain in this combinatorial drug-cell space. Hence, we asked whether it is possible to fill these gaps by predicting cell-specific drug perturbation profiles using available expression data from related conditions – i.e. from other drugs and cell types. We developed a computational framework that first arranges existing profiles into a three-dimensional array (or tensor) indexed by drugs, genes, and cell types, and then uses either local (nearest-neighbors) or global (tensor completion) information to predict unmeasured profiles. We evaluate prediction accuracy using a variety of metrics, and find that the two methods have complementary performance, each superior in different regions in the drug-cell space. Predictions achieve correlations of 0.68 with true values, and maintain accurate differentially expressed genes (AUC 0.81). Finally, we demonstrate that the predicted profiles add value for making downstream associations with drug targets and therapeutic classes.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2017\n \n \n (9)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Learning a Health Knowledge Graph from Electronic Medical Records.\n \n \n \n \n\n\n \n Rotmensch, M.; Halpern, Y.; Tlimat, A.; Horng, S.; and Sontag, D.\n\n\n \n\n\n\n Nature Scientific Reports, 7(1): 5994. 2017.\n \n\n\n\n
\n\n\n\n \n \n \"Learning paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{rotmensch_nature_sr17,\n\tAuthor = {Rotmensch, Maya and Halpern, Yoni and Tlimat, Abdulhakim and Horng, Steven and Sontag, David},\n\tJournal = {Nature Scientific Reports},\n\tNumber = {1},\n\tPages = {5994},\n\tTitle = {Learning a Health Knowledge Graph from Electronic Medical Records},\n\tVolume = {7},\n\tYear = {2017},\n        keywords = {Health care},\n        url_Paper = {https://www.nature.com/articles/s41598-017-05778-z.pdf},\n        abstract = {Demand for clinical decision support systems in medicine and self-diagnostic symptom checkers has substantially increased in recent years. Existing platforms rely on knowledge bases manually compiled through a labor-intensive process or automatically derived using simple pairwise statistics. This study explored an automated process to learn high quality knowledge bases linking diseases and symptoms directly from electronic medical records. Medical concepts were extracted from 273,174 deidentified patient records and maximum likelihood estimation of three probabilistic models was used to automatically construct knowledge graphs: logistic regression, naive Bayes classifier and a Bayesian network using noisy OR gates. A graph of disease-symptom relationships was elicited from the learned parameters and the constructed knowledge graphs were evaluated and validated, with permission, against Google's manually-constructed knowledge graph and against expert physician opinions. Our study shows that direct and automated construction of high quality health knowledge graphs from medical records using rudimentary concept extraction is feasible. The noisy OR model produces a high quality knowledge graph reaching precision of 0.85 for a recall of 0.6 in the clinical evaluation. Noisy OR significantly outperforms all tested models across evaluation frameworks (p<0.01).}\n}\n\n
\n
\n\n\n
\n Demand for clinical decision support systems in medicine and self-diagnostic symptom checkers has substantially increased in recent years. Existing platforms rely on knowledge bases manually compiled through a labor-intensive process or automatically derived using simple pairwise statistics. This study explored an automated process to learn high quality knowledge bases linking diseases and symptoms directly from electronic medical records. Medical concepts were extracted from 273,174 deidentified patient records and maximum likelihood estimation of three probabilistic models was used to automatically construct knowledge graphs: logistic regression, naive Bayes classifier and a Bayesian network using noisy OR gates. A graph of disease-symptom relationships was elicited from the learned parameters and the constructed knowledge graphs were evaluated and validated, with permission, against Google's manually-constructed knowledge graph and against expert physician opinions. Our study shows that direct and automated construction of high quality health knowledge graphs from medical records using rudimentary concept extraction is feasible. The noisy OR model produces a high quality knowledge graph reaching precision of 0.85 for a recall of 0.6 in the clinical evaluation. Noisy OR significantly outperforms all tested models across evaluation frameworks (p<0.01).\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Grounded Recurrent Neural Networks.\n \n \n \n \n\n\n \n Vani, A.; Jernite, Y.; and Sontag, D.\n\n\n \n\n\n\n ArXiv e-prints arXiv:1705.08557. 2017.\n \n\n\n\n
\n\n\n\n \n \n \"Grounded paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{VaniEtAl_arxiv17,\n   author = {{Vani}, A. and {Jernite}, Y. and {Sontag}, D.},\n    title = "{Grounded Recurrent Neural Networks}",\n  journal = {ArXiv e-prints arXiv:1705.08557},\narchivePrefix = "arXiv",\n   eprint = {1705.08557},\n primaryClass = "stat.ML",\n     year = 2017,\n  keywords = {Machine learning, Health care, Natural language processing, Deep learning},\n  url_Paper = {https://arxiv.org/pdf/1705.08557.pdf},\n  abstract = {In this work, we present the Grounded Recurrent Neural Network (GRNN), a recurrent neural network architecture for multi-label prediction which explicitly ties labels to specific dimensions of the recurrent hidden state (we call this process "grounding"). The approach is particularly well-suited for extracting large numbers of concepts from text. We apply the new model to address an important problem in healthcare of understanding what medical concepts are discussed in clinical text. Using a publicly available dataset derived from Intensive Care Units, we learn to label a patient's diagnoses and procedures from their discharge summary. Our evaluation shows a clear advantage to using our proposed architecture over a variety of strong baselines.}\n}\n\n
\n
\n\n\n
\n In this work, we present the Grounded Recurrent Neural Network (GRNN), a recurrent neural network architecture for multi-label prediction which explicitly ties labels to specific dimensions of the recurrent hidden state (we call this process \"grounding\"). The approach is particularly well-suited for extracting large numbers of concepts from text. We apply the new model to address an important problem in healthcare of understanding what medical concepts are discussed in clinical text. Using a publicly available dataset derived from Intensive Care Units, we learn to label a patient's diagnoses and procedures from their discharge summary. Our evaluation shows a clear advantage to using our proposed architecture over a variety of strong baselines.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Structured Inference Networks for Nonlinear State Space Models.\n \n \n \n \n\n\n \n Krishnan, R. G.; Shalit, U.; and Sontag, D.\n\n\n \n\n\n\n In Proceedings of the Thirty-First AAAI Conference on Artificial Intelligence, pages 2101-2109, 2017. \n \n\n\n\n
\n\n\n\n \n \n \"Structured paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 3 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{KrishnanEtAl_aaai17,\n  author    = {Rahul G. Krishnan and\n               Uri Shalit and\n               David Sontag},\n  title     = {Structured Inference Networks for Nonlinear State Space Models},\n  booktitle = {Proceedings of the Thirty-First {AAAI} Conference on Artificial Intelligence},\n  pages     = {2101-2109},\n  year      = {2017},\n  keywords = {Machine learning, Unsupervised learning, Deep learning, Health care, Approximate inference in graphical models},\n  url_Paper = {https://arxiv.org/pdf/1609.09869.pdf},\n  abstract = {Gaussian state space models have been used for decades as generative models of sequential data. They admit an intuitive probabilistic interpretation, have a simple functional form, and enjoy widespread adoption. We introduce a unified algorithm to efficiently learn a broad class of linear and non-linear state space models, including variants where the emission and transition distributions are modeled by deep neural networks. Our learning algorithm simultaneously learns a compiled inference network and the generative model, leveraging a structured variational approximation parameterized by recurrent neural networks to mimic the posterior distribution. We apply the learning algorithm to both synthetic and real-world datasets, demonstrating its scalability and versatility. We find that using the structured approximation to the posterior results in models with significantly higher held-out likelihood.}\n}\n\n
\n
\n\n\n
\n Gaussian state space models have been used for decades as generative models of sequential data. They admit an intuitive probabilistic interpretation, have a simple functional form, and enjoy widespread adoption. We introduce a unified algorithm to efficiently learn a broad class of linear and non-linear state space models, including variants where the emission and transition distributions are modeled by deep neural networks. Our learning algorithm simultaneously learns a compiled inference network and the generative model, leveraging a structured variational approximation parameterized by recurrent neural networks to mimic the posterior distribution. We apply the learning algorithm to both synthetic and real-world datasets, demonstrating its scalability and versatility. We find that using the structured approximation to the posterior results in models with significantly higher held-out likelihood.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Electronic phenotyping with APHRODITE and the Observational Health Sciences and Informatics (OHDSI) data network.\n \n \n \n \n\n\n \n Banda, J. M.; Halpern, Y.; Sontag, D.; and Shah, N. H.\n\n\n \n\n\n\n In Proceedings of the AMIA Summit on Clinical Research Informatics (CRI), 2017. \n \n\n\n\n
\n\n\n\n \n \n \"Electronic paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@inproceedings{banda_cri17,\n author = {Juan M. Banda and Yoni Halpern and David Sontag and Nigam H. Shah},\n title = {Electronic phenotyping with APHRODITE and the Observational Health Sciences and Informatics ({OHDSI}) data network},\n booktitle = {Proceedings of the AMIA Summit on Clinical Research Informatics (CRI)},\n year = {2017},\n keywords = {Health care},\n url_Paper = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5543379/pdf/2611061.pdf},\n abstract = {The widespread usage of electronic health records (EHRs) for clinical research has produced multiple electronic phenotyping approaches. Methods for electronic phenotyping range from those needing extensive specialized medical expert supervision to those based on semi-supervised learning techniques. We present Automated PHenotype Routine for Observational Definition, Identification, Training and Evaluation (APHRODITE), an R package phenotyping framework that combines noisy labeling and anchor learning. APHRODITE makes these cutting-edge phenotyping approaches available for use with the Observational Health Data Sciences and Informatics (OHDSI) data model for standardized and scalable deployment. APHRODITE uses EHR data available in the OHDSI Common Data Model to build classification models for electronic phenotyping. We demonstrate the utility of APHRODITE by comparing its performance versus traditional rule-based phenotyping approaches. Finally, the resulting phenotype models and model construction workflows built with APHRODITE can be shared between multiple OHDSI sites. Such sharing allows their application on large and diverse patient populations.}\n}\n\n
\n
\n\n\n
\n The widespread usage of electronic health records (EHRs) for clinical research has produced multiple electronic phenotyping approaches. Methods for electronic phenotyping range from those needing extensive specialized medical expert supervision to those based on semi-supervised learning techniques. We present Automated PHenotype Routine for Observational Definition, Identification, Training and Evaluation (APHRODITE), an R package phenotyping framework that combines noisy labeling and anchor learning. APHRODITE makes these cutting-edge phenotyping approaches available for use with the Observational Health Data Sciences and Informatics (OHDSI) data model for standardized and scalable deployment. APHRODITE uses EHR data available in the OHDSI Common Data Model to build classification models for electronic phenotyping. We demonstrate the utility of APHRODITE by comparing its performance versus traditional rule-based phenotyping approaches. Finally, the resulting phenotype models and model construction workflows built with APHRODITE can be shared between multiple OHDSI sites. Such sharing allows their application on large and diverse patient populations.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Objective Assessment of Depressive Symptoms with Machine Learning and Wearable Sensors Data.\n \n \n \n \n\n\n \n Ghandeharioun, A.; Fedor, S.; Sangermano, L.; Ionescu, D.; Alpert, J.; Dale, C.; Sontag, D.; and Picard, R.\n\n\n \n\n\n\n In Proceedings of the Seventh International Conference on Affective Computing and Intelligent Interaction (ACII), 2017. \n \n\n\n\n
\n\n\n\n \n \n \"Objective paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@inproceedings{GhandehariounEtAl_ACII17,\n Author = {Asma Ghandeharioun and Szymon Fedor and Lisa Sangermano and Dawn Ionescu and Jonathan Alpert and Chelsea Dale and David Sontag and Rosalind Picard},\n Title = {Objective Assessment of Depressive Symptoms with Machine Learning and Wearable Sensors Data},\n Year = {2017},\n Booktitle = {Proceedings of the Seventh International Conference on Affective Computing and Intelligent Interaction (ACII)},\n keywords = {Health care},\n url_Paper = {http://affect.media.mit.edu/pdfs/17.ghandeharioun_etal_objective_ACII.pdf},\n abstract = {Depression is the major cause of years lived in disability world-wide; however, its diagnosis and tracking\nmethods still rely mainly on assessing self-reported depressive symptoms, methods that originated more than fifty years ago. These methods, which usually involve filling out surveys or engaging in face-to-face interviews, provide limited accuracy and reliability and are costly to track and scale. In this paper, we develop and test the efficacy of machine learning techniques applied to objective data captured passively and continuously from E4 wearable wristbands and from sensors in an Android phone for predicting the Hamilton Depression Rating Scale (HDRS). Input data include electrodermal activity (EDA), sleep behavior, motion, phone-based communication, location changes, and phone usage patterns. We introduce our feature generation and transformation process, imputing missing clinical scores from self-reported measures, and predicting depression severity from continuous sensor measurements. While HDRS ranges between 0 and 52, we were able to impute it with 2.8 RMSE and predict it with 4.5 RMSE which are low relative errors. Analyzing the features and their relation to depressive symptoms, we found that poor mental health was accompanied by more irregular sleep, less motion, fewer incoming messages, less variability in location patterns, and higher asymmetry of EDA between the right and the left wrists.}\n}\n\n
\n
\n\n\n
\n Depression is the major cause of years lived in disability world-wide; however, its diagnosis and tracking methods still rely mainly on assessing self-reported depressive symptoms, methods that originated more than fifty years ago. These methods, which usually involve filling out surveys or engaging in face-to-face interviews, provide limited accuracy and reliability and are costly to track and scale. In this paper, we develop and test the efficacy of machine learning techniques applied to objective data captured passively and continuously from E4 wearable wristbands and from sensors in an Android phone for predicting the Hamilton Depression Rating Scale (HDRS). Input data include electrodermal activity (EDA), sleep behavior, motion, phone-based communication, location changes, and phone usage patterns. We introduce our feature generation and transformation process, imputing missing clinical scores from self-reported measures, and predicting depression severity from continuous sensor measurements. While HDRS ranges between 0 and 52, we were able to impute it with 2.8 RMSE and predict it with 4.5 RMSE which are low relative errors. Analyzing the features and their relation to depressive symptoms, we found that poor mental health was accompanied by more irregular sleep, less motion, fewer incoming messages, less variability in location patterns, and higher asymmetry of EDA between the right and the left wrists.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Early Identification of Patients with Acute Decompensated Heart Failure.\n \n \n \n \n\n\n \n Blecker, S.; Sontag, D.; Horwitz, L.; Kuperman, G.; Park, H.; Reyentovich, A.; and Katz, S.\n\n\n \n\n\n\n Journal of Cardiac Failure. 2017.\n \n\n\n\n
\n\n\n\n \n \n \"Early paper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{BleckerEtAl_JCF17,\n\tAuthor = {Blecker, Saul and Sontag, David and Horwitz, Leora and Kuperman, Gilad and Park, Hannah and Reyentovich, Alex and Katz, Stuart},\n\tBooktitle = {Journal of Cardiac Failure},\n\tDoi = {10.1016/j.cardfail.2017.08.458},\n\tIsbn = {1071-9164},\n\tJournal = {Journal of Cardiac Failure},\n\tYear = {2017},\n\tPublisher = {Elsevier},\n\tTitle = {Early Identification of Patients with Acute Decompensated Heart Failure},\n\tTy = {JOUR},\n        keywords = {Health care},\n        url_Paper = {http://www.onlinejcf.com/article/S1071-9164(17)31161-2/pdf},\n        abstract = {Interventions to reduce readmissions following acute heart failure hospitalization require early identification of patients. The purpose of this study was to develop and test accuracies of various approaches to identify patients with acute decompensated heart failure (ADHF) using data derived from the electronic health record. We included 37,229 hospitalizations of adult patients at a single hospital in 2013-2015. We developed four algorithms to identify hospitalization with a principal discharge diagnosis of ADHF: 1) presence of one of three clinical characteristics; 2) logistic regression of 31 structured data elements; 3) machine learning with unstructured data; 4) machine learning with both structured and unstructured data. In data validation, Algorithm 1 had a sensitivity of 0.98 and positive predictive value (PPV) of 0.14 for ADHF. Algorithm 2 had an area under the receiver operating characteristic curve (AUC) of 0.96, while both machine learning algorithms had AUCs of 0.99. Based on a brief survey of three providers who perform chart review for ADHF, we estimated providers spent 8.6 minutes per chart review; using this this parameter, we estimated providers would spend 61.4, 57.3, 28.7, and 25.3 minutes on secondary chart review for each case of ADHF if initial screening was done with algorithms 1, 2, 3, and 4, respectively. In conclusion, machine learning algorithms with unstructured notes had best performance for identification of ADHF and can improve provider efficiency for delivery of quality improvement interventions.}\n}\n\n
\n
\n\n\n
\n Interventions to reduce readmissions following acute heart failure hospitalization require early identification of patients. The purpose of this study was to develop and test accuracies of various approaches to identify patients with acute decompensated heart failure (ADHF) using data derived from the electronic health record. We included 37,229 hospitalizations of adult patients at a single hospital in 2013-2015. We developed four algorithms to identify hospitalization with a principal discharge diagnosis of ADHF: 1) presence of one of three clinical characteristics; 2) logistic regression of 31 structured data elements; 3) machine learning with unstructured data; 4) machine learning with both structured and unstructured data. In data validation, Algorithm 1 had a sensitivity of 0.98 and positive predictive value (PPV) of 0.14 for ADHF. Algorithm 2 had an area under the receiver operating characteristic curve (AUC) of 0.96, while both machine learning algorithms had AUCs of 0.99. Based on a brief survey of three providers who perform chart review for ADHF, we estimated providers spent 8.6 minutes per chart review; using this this parameter, we estimated providers would spend 61.4, 57.3, 28.7, and 25.3 minutes on secondary chart review for each case of ADHF if initial screening was done with algorithms 1, 2, 3, and 4, respectively. In conclusion, machine learning algorithms with unstructured notes had best performance for identification of ADHF and can improve provider efficiency for delivery of quality improvement interventions.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Using Machine Learning to Recommend Oncology Clinical Trials.\n \n \n \n \n\n\n \n Das, A.; Thorbergsson, L.; Grigorenko, A.; Sontag, D.; and Huerga, I.\n\n\n \n\n\n\n In Machine Learning for Health Care (Clinical abstract), 2017. \n \n\n\n\n
\n\n\n\n \n \n \"Using paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@inproceedings{DasEtAl_mlhc17,\nauthor = {Anasuya Das and Leifur Thorbergsson and Aleksandr Grigorenko and David Sontag and Iker Huerga},\ntitle = {Using Machine Learning to Recommend Oncology Clinical Trials},\nbooktitle = {Machine Learning for Health Care (Clinical abstract)},\nyear = {2017},\nkeywords = {Health care},\nurl_Paper = {http://mucmd.org/CameraReadySubmissions/21\\%5Cclinical_abstracts\\%201.pdf},\nabstract = {Clinical trials serve an important role in oncology, not only advancing medical science but also offering patients promising therapy before it is widely available. Memorial Sloan Kettering Cancer Center (MSK) conducts over 500 therapeutic trials at one time; most are focused on a single type of cancer (e.g. breast, lung) reflecting subspecialized nature of care. However, clinical trial accrual is a challenge as patient-trial matching is a slow and manual process. We address this challenge via a machine learning-powered clinical trial recommendation engine designed to be deployed at the point of care.}\n}\n\n
\n
\n\n\n
\n Clinical trials serve an important role in oncology, not only advancing medical science but also offering patients promising therapy before it is widely available. Memorial Sloan Kettering Cancer Center (MSK) conducts over 500 therapeutic trials at one time; most are focused on a single type of cancer (e.g. breast, lung) reflecting subspecialized nature of care. However, clinical trial accrual is a challenge as patient-trial matching is a slow and manual process. We address this challenge via a machine learning-powered clinical trial recommendation engine designed to be deployed at the point of care.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Creating an Automated Trigger for Sepsis Clinical Decision Support at Emergency Department Triage using Machine Learning.\n \n \n \n \n\n\n \n Horng, S.; Sontag, D.; Halpern, Y.; Jernite, Y.; Shapiro, N. I.; and Nathanson, L. A.\n\n\n \n\n\n\n PLoS ONE, 12(4): e0174708. 2017.\n \n\n\n\n
\n\n\n\n \n \n \"Creating paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{HorngEtAl_plos17,\n author = {Steven Horng and David Sontag and Yoni Halpern and Yacine Jernite and Nathan I. Shapiro and Larry A. Nathanson},\n title = {Creating an Automated Trigger for Sepsis Clinical Decision Support at Emergency Department Triage using Machine Learning},\n journal = {PLoS ONE},\n volume = {12},\n number={4},\n pages={e0174708},\n year = {2017},\n keywords = {Health care},\n url_Paper = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0174708},\n abstract = {Our objective is to demonstrate the incremental benefit of using free text data in addition to vital sign and demographic data to identify patients with suspected infection in the emergency department. Compared to previous work that only used structured data such as vital signs and demographic information, utilizing free text drastically improves the discriminatory ability (increase in AUC from 0.67 to 0.86) of identifying infection.}\n}\n\n
\n
\n\n\n
\n Our objective is to demonstrate the incremental benefit of using free text data in addition to vital sign and demographic data to identify patients with suspected infection in the emergency department. Compared to previous work that only used structured data such as vital signs and demographic information, utilizing free text drastically improves the discriminatory ability (increase in AUC from 0.67 to 0.86) of identifying infection.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Contextual Autocomplete: A Novel User Interface Using Machine Learning to Improve Ontology Usage and Structured Data Capture for Presenting Problems in the Emergency Department.\n \n \n \n \n\n\n \n Greenbaum, N. R; Jernite, Y.; Halpern, Y.; Calder, S.; Nathanson, L. A.; Sontag, D.; and Horng, S.\n\n\n \n\n\n\n bioRxiv:10.1101/127092. 2017.\n \n\n\n\n
\n\n\n\n \n \n \"Contextual paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{GreenbaumEtAl17,\n\tauthor = {Greenbaum, Nathaniel R and Jernite, Yacine and Halpern, Yoni and Calder, Shelley and Nathanson, Larry A. and Sontag, David and Horng, Steven},\n\ttitle = {Contextual Autocomplete: A Novel User Interface Using Machine Learning to Improve Ontology Usage and Structured Data Capture for Presenting Problems in the Emergency Department},\n\tyear = {2017},\n\tjournal = {bioRxiv:10.1101/127092},\n        keywords = {Health care},\n        url_Paper = {https://www.biorxiv.org/content/early/2017/04/12/127092.full.pdf},\nabstract = {Our objective is to determine the effect of contextual autocomplete, a user interface that uses machine learning, on the efficiency and quality of documentation of presenting problems (chief complaints) in the emergency department (ED). We used contextual autocomplete, a user interface that ranks concepts by their predicted probability, to help nurses enter data about a patient’s reason for visiting the ED. Predicted probabilities were calculated using a previously derived model based on triage vital signs and a brief free text note. We evaluated the percentage and quality of structured data captured using a prospective before-and-after study design. A total of 279,231 patient encounters were analyzed. Structured data capture improved from 26.2\\% to 97.2\\% (p<0.0001). During the post-implementation period, presenting problems\nwere more complete (3.35 vs 3.66; p=0.0004), as precise (3.59 vs. 3.74; p=0.1), and higher in overall quality (3.38 vs. 3.72; p=0.0002). Our system reduced the mean number of keystrokes required to document a presenting problem from 11.6 to 0.6 (p<0.0001), a 95\\% improvement. We have thus demonstrated a technique that captures structured data on nearly all patients. We estimate that our system reduces the number of man-hours required annually to type presenting problems at our institution from 92.5 hours to 4.8 hours. In conclusion, implementation of a contextual autocomplete system resulted in improved structured data capture, ontology usage compliance, and data quality.}\n}\n\n
\n
\n\n\n
\n Our objective is to determine the effect of contextual autocomplete, a user interface that uses machine learning, on the efficiency and quality of documentation of presenting problems (chief complaints) in the emergency department (ED). We used contextual autocomplete, a user interface that ranks concepts by their predicted probability, to help nurses enter data about a patient’s reason for visiting the ED. Predicted probabilities were calculated using a previously derived model based on triage vital signs and a brief free text note. We evaluated the percentage and quality of structured data captured using a prospective before-and-after study design. A total of 279,231 patient encounters were analyzed. Structured data capture improved from 26.2% to 97.2% (p<0.0001). During the post-implementation period, presenting problems were more complete (3.35 vs 3.66; p=0.0004), as precise (3.59 vs. 3.74; p=0.1), and higher in overall quality (3.38 vs. 3.72; p=0.0002). Our system reduced the mean number of keystrokes required to document a presenting problem from 11.6 to 0.6 (p<0.0001), a 95% improvement. We have thus demonstrated a technique that captures structured data on nearly all patients. We estimate that our system reduces the number of man-hours required annually to type presenting problems at our institution from 92.5 hours to 4.8 hours. In conclusion, implementation of a contextual autocomplete system resulted in improved structured data capture, ontology usage compliance, and data quality.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2016\n \n \n (7)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Multi-task Prediction of Disease Onsets from Longitudinal Laboratory Tests.\n \n \n \n \n\n\n \n Razavian, N.; Marcus, J.; and Sontag, D.\n\n\n \n\n\n\n In Doshi-Velez, F.; Fackler, J.; Kale, D.; Wallace, B.; and Wiens, J., editor(s), Proceedings of the 1st Machine Learning for Healthcare Conference, volume 56, of Proceedings of Machine Learning Research, pages 73-100, 2016. PMLR\n \n\n\n\n
\n\n\n\n \n \n \"Multi-task paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@InProceedings{RazavianEtAl_mlhc16,\n  title = \t {Multi-task Prediction of Disease Onsets from Longitudinal Laboratory Tests},\n  author = \t {Narges Razavian and Jake Marcus and David Sontag},\n  booktitle = \t {Proceedings of the 1st Machine Learning for Healthcare Conference},\n  pages = \t {73-100},\n  year = \t {2016},\n  editor = \t {Finale Doshi-Velez and Jim Fackler and David Kale and Byron Wallace and Jenna Wiens},\n  volume = \t {56},\n  series = \t {Proceedings of Machine Learning Research},\n  publisher = \t {PMLR},\n  keywords = {Health care, Deep learning},\n  url_Paper = {http://arxiv.org/pdf/1608.00647.pdf},\n  abstract = {Disparate areas of machine learning have benefited from models that can take raw data with little preprocessing as input and learn rich representations of that raw data in order to perform well on a given prediction task. We evaluate this approach in healthcare by using longitudinal measurements of lab tests, one of the more raw signals of a patient's health state widely available in clinical data, to predict disease onsets. In particular, we train a Long Short-Term Memory (LSTM) recurrent neural network and two novel convolutional neural networks for multi-task prediction of disease onset for 133 conditions based on 18 common lab tests measured over time in a cohort of 298K patients derived from 8 years of administrative claims data. We compare the neural networks to a logistic regression with several hand-engineered, clinically relevant features. We find that the representation-based learning approaches significantly outperform this baseline. We believe that our work suggests a new avenue for patient risk stratification based solely on lab results.}\n}\n\n
\n
\n\n\n
\n Disparate areas of machine learning have benefited from models that can take raw data with little preprocessing as input and learn rich representations of that raw data in order to perform well on a given prediction task. We evaluate this approach in healthcare by using longitudinal measurements of lab tests, one of the more raw signals of a patient's health state widely available in clinical data, to predict disease onsets. In particular, we train a Long Short-Term Memory (LSTM) recurrent neural network and two novel convolutional neural networks for multi-task prediction of disease onset for 133 conditions based on 18 common lab tests measured over time in a cohort of 298K patients derived from 8 years of administrative claims data. We compare the neural networks to a logistic regression with several hand-engineered, clinically relevant features. We find that the representation-based learning approaches significantly outperform this baseline. We believe that our work suggests a new avenue for patient risk stratification based solely on lab results.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Clinical Tagging with Joint Probabilistic Models.\n \n \n \n \n\n\n \n Halpern, Y.; Horng, S.; and Sontag, D.\n\n\n \n\n\n\n In Doshi-Velez, F.; Fackler, J.; Kale, D.; Wallace, B.; and Wiens, J., editor(s), Proceedings of the 1st Machine Learning for Healthcare Conference, volume 56, of Proceedings of Machine Learning Research, pages 209-225, 2016. \n \n\n\n\n
\n\n\n\n \n \n \"Clinical paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@InProceedings{HalpernEtAl_mlhc16,\n  title = \t {Clinical Tagging with Joint Probabilistic Models},\n  author = \t {Yoni Halpern and Steven Horng and David Sontag},\n  booktitle = \t {Proceedings of the 1st Machine Learning for Healthcare Conference},\n  pages = \t {209-225},\n  year = \t {2016},\n  editor = \t {Finale Doshi-Velez and Jim Fackler and David Kale and Byron Wallace and Jenna Wiens},\n  volume = \t {56},\n  series = \t {Proceedings of Machine Learning Research},\n  keywords = {Health care, Unsupervised learning},\n  url_Paper = {https://arxiv.org/pdf/1608.00686.pdf},\n  abstract = {We describe a method for parameter estimation in bipartite probabilistic graphical models for joint prediction of clinical conditions from the electronic medical record. The method does not rely on the availability of gold-standard labels, but rather uses noisy labels, called anchors, for learning. We provide a likelihood-based objective and a moments-based initialization that are effective at learning the model parameters. The learned model is evaluated in a task of assigning a heldout clinical condition to patients based on retrospective analysis of the records, and outperforms baselines which do not account for the noisiness in the labels or do not model the conditions jointly.}\n}\n\n
\n
\n\n\n
\n We describe a method for parameter estimation in bipartite probabilistic graphical models for joint prediction of clinical conditions from the electronic medical record. The method does not rely on the availability of gold-standard labels, but rather uses noisy labels, called anchors, for learning. We provide a likelihood-based objective and a moments-based initialization that are effective at learning the model parameters. The learned model is evaluated in a task of assigning a heldout clinical condition to patients based on retrospective analysis of the records, and outperforms baselines which do not account for the noisiness in the labels or do not model the conditions jointly.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Comparison of approaches for heart failure case identification from electronic health record data.\n \n \n \n \n\n\n \n Blecker, S.; Katz, S.; Horwitz, L.; Kuperman, G.; Park, H; Gold, A; and Sontag, D.\n\n\n \n\n\n\n JAMA Cardiology, 1(9): 1014-1020. 2016.\n \n\n\n\n
\n\n\n\n \n \n \"Comparison paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{BleckerEtAl_jama16,\nauthor = {Blecker, Saul and Katz, Stuart and Horwitz, LI and Kuperman, Gilad and Park, H and Gold, A and Sontag, David},\ntitle = {Comparison of approaches for heart failure case identification from electronic health record data},\njournal = {JAMA Cardiology},\nvolume = {1},\nnumber = {9},\npages = {1014-1020},\nyear = {2016},\nkeywords = {Health care},\nurl_Paper = {http://jamanetwork.com/journals/jamacardiology/article-abstract/2557840},\nabstract = {Accurate, real-time case identification is needed to target interventions to improve quality and outcomes for hospitalized patients with heart failure. Problem lists may be useful for case identification but are often inaccurate or incomplete. Machine-learning approaches may improve accuracy of identification but can be limited by complexity of implementation. Our objective was to develop algorithms that use readily available clinical data to identify patients with heart failure while in the hospital. In this study of 47,119 hospitalizations, inclusion of heart failure on the problem list had a sensitivity of 0.40 and a positive predictive value (PPV) of 0.96. A logistic regression model with clinical data was associated with a sensitivity of 0.68 and PPV of 0.90, whereas a machine-learning algorithm that used free text had a sensitivity of 0.83 and a PPV of 0.90. The high predictive accuracy of machine learning using free text demonstrates that support of such analytics in future electronic health record systems can improve cohort identification.}\n}\n\n
\n
\n\n\n
\n Accurate, real-time case identification is needed to target interventions to improve quality and outcomes for hospitalized patients with heart failure. Problem lists may be useful for case identification but are often inaccurate or incomplete. Machine-learning approaches may improve accuracy of identification but can be limited by complexity of implementation. Our objective was to develop algorithms that use readily available clinical data to identify patients with heart failure while in the hospital. In this study of 47,119 hospitalizations, inclusion of heart failure on the problem list had a sensitivity of 0.40 and a positive predictive value (PPV) of 0.96. A logistic regression model with clinical data was associated with a sensitivity of 0.68 and PPV of 0.90, whereas a machine-learning algorithm that used free text had a sensitivity of 0.83 and a PPV of 0.90. The high predictive accuracy of machine learning using free text demonstrates that support of such analytics in future electronic health record systems can improve cohort identification.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Identifiable Phenotyping using Constrained Non-Negative Matrix Factorization.\n \n \n \n \n\n\n \n Joshi, S.; Gunasekar, S.; Sontag, D.; and Joydeep, G.\n\n\n \n\n\n\n In Doshi-Velez, F.; Fackler, J.; Kale, D.; Wallace, B.; and Wiens, J., editor(s), Proceedings of the 1st Machine Learning for Healthcare Conference, volume 56, of Proceedings of Machine Learning Research, pages 17–41, 2016. PMLR\n \n\n\n\n
\n\n\n\n \n \n \"Identifiable paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@InProceedings{JoshiEtAl_MLHC16,\n  title = \t {Identifiable Phenotyping using Constrained Non-Negative Matrix Factorization},\n  author = \t {Shalmali Joshi and Suriya Gunasekar and David Sontag and Ghosh Joydeep},\n  booktitle = \t {Proceedings of the 1st Machine Learning for Healthcare Conference},\n  pages = \t {17--41},\n  year = \t {2016},\n  editor = \t {Finale Doshi-Velez and Jim Fackler and David Kale and Byron Wallace and Jenna Wiens},\n  volume = \t {56},\n  series = \t {Proceedings of Machine Learning Research},\n  publisher = \t {PMLR},\n  keywords = {Health care},\n  url_Paper = \t {http://proceedings.mlr.press/v56/Joshi16.pdf},\n  abstract = \t {This work proposes a new algorithm for automated and simultaneous phenotyping of multiple co-occurring medical conditions, also referred to as comorbidities, using clinical notes from electronic health records (EHRs). A latent factor estimation technique, non-negative matrix factorization (NMF), is augmented with domain constraints from weak supervision to obtain sparse latent factors that are grounded to a fixed set of chronic conditions. The proposed grounding mechanism ensures a one-to-one identifiable and interpretable mapping between the latent factors and the target comorbidities. Qualitative assessment of the empirical results by clinical experts show that the proposed model learns clinically interpretable phenotypes which are also shown to have competitive performance on 30 day mortality prediction task. The proposed method can be readily adapted to any non-negative EHR data across various healthcare institutions.}\n}\n\n
\n
\n\n\n
\n This work proposes a new algorithm for automated and simultaneous phenotyping of multiple co-occurring medical conditions, also referred to as comorbidities, using clinical notes from electronic health records (EHRs). A latent factor estimation technique, non-negative matrix factorization (NMF), is augmented with domain constraints from weak supervision to obtain sparse latent factors that are grounded to a fixed set of chronic conditions. The proposed grounding mechanism ensures a one-to-one identifiable and interpretable mapping between the latent factors and the target comorbidities. Qualitative assessment of the empirical results by clinical experts show that the proposed model learns clinically interpretable phenotypes which are also shown to have competitive performance on 30 day mortality prediction task. The proposed method can be readily adapted to any non-negative EHR data across various healthcare institutions.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Electronic Medical Record Phenotyping using the Anchor & Learn Framework.\n \n \n \n \n\n\n \n Halpern, Y.; Horng, S.; Choi, Y.; and Sontag, D.\n\n\n \n\n\n\n In Journal of the American Medical Informatics Association (JAMIA), 2016. \n \n\n\n\n
\n\n\n\n \n \n \"Electronic paper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{HalpernEtAl_jamia16,\n author = {Yoni Halpern and Steven Horng and Youngduck Choi and David Sontag},\n title = {Electronic Medical Record Phenotyping using the Anchor \\& Learn Framework},\n booktitle = {Journal of the American Medical Informatics Association (JAMIA)},\n year = {2016},\n keywords = {Health care, Unsupervised learning},\n doi = {10.1093/jamia/ocw011},\n url_Paper = {http://jamia.oxfordjournals.org/content/early/2016/04/26/jamia.ocw011.full.pdf},\n abstract = {Electronic medical records (EMRs) hold a tremendous amount of information about patients that is relevant to determining the optimal approach to patient care. As medicine becomes increasingly precise, a patient’s electronic medical record phenotype will play an important role in triggering clinical decision support systems that can deliver personalized recommendations in real time. Learning with anchors presents a method of efficiently learning statistically driven phenotypes with minimal manual intervention. We developed a phenotype library that uses both structured and unstructured data from the EMR to represent patients for real-time clinical decision support. Eight of the phenotypes were evaluated using retrospective EMR data on emergency department patients using a set of prospectively gathered gold standard labels. We built the phenotype library with 42 publicly available phenotype definitions. Using information from triage time, the phenotype classifiers have an area under the ROC curve (AUC) of infection 0.89, cancer 0.88, immunosuppressed 0.85, septic shock 0.93, nursing home 0.87, anticoagulated 0.83, cardiac etiology 0.89, and pneumonia 0.90. Using information available at the time of disposition from the emergency department, the AUC values are infection 0.91, cancer 0.95, immunosuppressed 0.90, septic shock 0.97, nursing home 0.91, anticoagulated 0.94, cardiac etiology 0.92, and pneumonia 0.97. The resulting phenotypes are interpretable and fast to build, and perform comparably to statistically learned phenotypes developed with 5000 manually labeled patients. Learning with anchors is an attractive option for building a large public repository of phenotype definitions that can be used for a range of health IT applications, including real-time decision support.}\n}\n%\tpublisher = {The Oxford University Press},\n%\tissn = {1067-5027},\n%\tURL = {http://jamia.oxfordjournals.org/content/early/2016/04/26/jamia.ocw011},\n\n
\n
\n\n\n
\n Electronic medical records (EMRs) hold a tremendous amount of information about patients that is relevant to determining the optimal approach to patient care. As medicine becomes increasingly precise, a patient’s electronic medical record phenotype will play an important role in triggering clinical decision support systems that can deliver personalized recommendations in real time. Learning with anchors presents a method of efficiently learning statistically driven phenotypes with minimal manual intervention. We developed a phenotype library that uses both structured and unstructured data from the EMR to represent patients for real-time clinical decision support. Eight of the phenotypes were evaluated using retrospective EMR data on emergency department patients using a set of prospectively gathered gold standard labels. We built the phenotype library with 42 publicly available phenotype definitions. Using information from triage time, the phenotype classifiers have an area under the ROC curve (AUC) of infection 0.89, cancer 0.88, immunosuppressed 0.85, septic shock 0.93, nursing home 0.87, anticoagulated 0.83, cardiac etiology 0.89, and pneumonia 0.90. Using information available at the time of disposition from the emergency department, the AUC values are infection 0.91, cancer 0.95, immunosuppressed 0.90, septic shock 0.97, nursing home 0.91, anticoagulated 0.94, cardiac etiology 0.92, and pneumonia 0.97. The resulting phenotypes are interpretable and fast to build, and perform comparably to statistically learned phenotypes developed with 5000 manually labeled patients. Learning with anchors is an attractive option for building a large public repository of phenotype definitions that can be used for a range of health IT applications, including real-time decision support.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Population-Level Prediction of Type 2 Diabetes using Claims Data and Analysis of Risk Factors.\n \n \n \n \n\n\n \n Razavian, N.; Blecker, S.; Schmidt, A. M.; Smith-McLallen, A.; Nigam, S.; and Sontag, D.\n\n\n \n\n\n\n Big Data, Data and Healthcare Special Issue. 2016.\n \n\n\n\n
\n\n\n\n \n \n \"Population-Level paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{RazavianEtAl_bigdata16,\n author = {Narges Razavian and Saul Blecker and Ann Marie Schmidt and Aaron Smith-McLallen and Somesh Nigam and David Sontag},\n title = {Population-Level Prediction of Type 2 Diabetes using Claims Data and Analysis of Risk Factors},\n journal = {Big Data},\n year = {2016},\n volume = {Data and Healthcare Special Issue},\n keywords = {Health care},\n url_Paper = {http://online.liebertpub.com/doi/pdf/10.1089/big.2015.0020},\n abstract = {We present a new approach to population health, in which data-driven predictive models are learned for outcomes such as type 2 diabetes. Our approach enables risk assessment from readily available electronic claims data on large populations, without additional screening cost. Proposed model uncovers early and late-stage risk factors. Using administrative claims, pharmacy records, healthcare utilization, and laboratory results of 4.1 million individuals between 2005 and 2009, an initial set of 42,000 variables were derived that together describe the full health status and history of every individual. Machine learning was then used to methodically enhance predictive variable set and fit models predicting onset of type 2 diabetes in 2009-2011, 2010-2012, and 2011-2013. We compared the enhanced model with a parsimonious model consisting of known diabetes risk factors in a real-world environment, where missing values are common and prevalent. Furthermore, we analyzed novel and known risk factors emerging from the model at different age groups at different stages before the onset. Parsimonious model using 21 classic diabetes risk factors resulted in area under ROC curve (AUC) of 0.75 for diabetes prediction within a 2-year window following the baseline. The enhanced model increased the AUC to 0.80, with about 900 variables selected as predictive ( p < 0.0001 for differences between AUCs). Similar improvements were observed for models predicting diabetes onset 1–3 years and 2–4 years after baseline. The enhanced model improved positive predictive value by at least 50\\% and identified novel surrogate risk factors for type 2 diabetes, such as chronic liver disease (odds ratio [OR] 3.71), high alanine aminotransferase (OR 2.26), esophageal reflux (OR 1.85), and history of acute bronchitis (OR 1.45). Liver risk factors emerge later in the process of diabetes development compared with obesity-related factors such as hypertension and high hemoglobin A1c. In conclusion, population-level risk prediction for type 2 diabetes using readily available administrative data is feasible and has better prediction performance than classical diabetes risk prediction algorithms on very large populations with missing data. The new model enables intervention allocation at national scale quickly and accurately and recovers potentially novel risk factors at different stages before the disease onset.}\n}\n\n
\n
\n\n\n
\n We present a new approach to population health, in which data-driven predictive models are learned for outcomes such as type 2 diabetes. Our approach enables risk assessment from readily available electronic claims data on large populations, without additional screening cost. Proposed model uncovers early and late-stage risk factors. Using administrative claims, pharmacy records, healthcare utilization, and laboratory results of 4.1 million individuals between 2005 and 2009, an initial set of 42,000 variables were derived that together describe the full health status and history of every individual. Machine learning was then used to methodically enhance predictive variable set and fit models predicting onset of type 2 diabetes in 2009-2011, 2010-2012, and 2011-2013. We compared the enhanced model with a parsimonious model consisting of known diabetes risk factors in a real-world environment, where missing values are common and prevalent. Furthermore, we analyzed novel and known risk factors emerging from the model at different age groups at different stages before the onset. Parsimonious model using 21 classic diabetes risk factors resulted in area under ROC curve (AUC) of 0.75 for diabetes prediction within a 2-year window following the baseline. The enhanced model increased the AUC to 0.80, with about 900 variables selected as predictive ( p < 0.0001 for differences between AUCs). Similar improvements were observed for models predicting diabetes onset 1–3 years and 2–4 years after baseline. The enhanced model improved positive predictive value by at least 50% and identified novel surrogate risk factors for type 2 diabetes, such as chronic liver disease (odds ratio [OR] 3.71), high alanine aminotransferase (OR 2.26), esophageal reflux (OR 1.85), and history of acute bronchitis (OR 1.45). Liver risk factors emerge later in the process of diabetes development compared with obesity-related factors such as hypertension and high hemoglobin A1c. In conclusion, population-level risk prediction for type 2 diabetes using readily available administrative data is feasible and has better prediction performance than classical diabetes risk prediction algorithms on very large populations with missing data. The new model enables intervention allocation at national scale quickly and accurately and recovers potentially novel risk factors at different stages before the disease onset.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Learning Low-Dimensional Representations of Medical Concepts.\n \n \n \n \n\n\n \n Choi, Y.; Chiu, Y.; and Sontag, D.\n\n\n \n\n\n\n In Proceedings of the AMIA Summit on Clinical Research Informatics (CRI), 2016. \n \n\n\n\n
\n\n\n\n \n \n \"Learning paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@inproceedings{ChoiChiuSon_amia16,\n  author    = {Youngduck Choi and Yi-I Chiu and David Sontag},\n  title     = {Learning Low-Dimensional Representations of Medical Concepts},\n booktitle = {Proceedings of the AMIA Summit on Clinical Research Informatics (CRI)},\n year = {2016},\n keywords = {Health care},\n url_Paper = {http://people.csail.mit.edu/dsontag/papers/ChoiChiuSontag_AMIA_CRI16.pdf},\n abstract = {We show how to learn low-dimensional representations (embeddings) of a wide range of concepts in medicine, including diseases (e.g., ICD9 codes), medications, procedures, and laboratory tests. We expect that these embeddings will be useful across medical informatics for tasks such as cohort selection and patient summarization. These embeddings are learned using a technique called neural language modeling from the natural language processing community. However, rather than learning the embeddings solely from text, we show how to learn the embeddings from claims data, which is widely available both to providers and to payers. We also show that with a simple algorithmic adjustment, it is possible to learn medical concept embeddings in a privacy preserving manner from co-occurrence counts derived from clinical narratives. Finally, we establish a methodological framework, arising from standard medical ontologies such as UMLS, NDF-RT, and CCS, to further investigate the embeddings and precisely characterize their quantitative properties.}\n}\n\n
\n
\n\n\n
\n We show how to learn low-dimensional representations (embeddings) of a wide range of concepts in medicine, including diseases (e.g., ICD9 codes), medications, procedures, and laboratory tests. We expect that these embeddings will be useful across medical informatics for tasks such as cohort selection and patient summarization. These embeddings are learned using a technique called neural language modeling from the natural language processing community. However, rather than learning the embeddings solely from text, we show how to learn the embeddings from claims data, which is widely available both to providers and to payers. We also show that with a simple algorithmic adjustment, it is possible to learn medical concept embeddings in a privacy preserving manner from co-occurrence counts derived from clinical narratives. Finally, we establish a methodological framework, arising from standard medical ontologies such as UMLS, NDF-RT, and CCS, to further investigate the embeddings and precisely characterize their quantitative properties.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2015\n \n \n (5)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Visual Exploration of Temporal Data in Electronic Medical Records.\n \n \n \n \n\n\n \n Krause, J.; Razavian, N.; Bertini, E.; and Sontag, D.\n\n\n \n\n\n\n In Proceedings of the American Medical Informatics Association (AMIA) Annual Symposium (Abstract), pages 1538, 2015. \n \n\n\n\n
\n\n\n\n \n \n \"Visual paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n  \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@inproceedings{KrauseEtAl_amia15,\n  author    = {Josua Krause and Narges Razavian and Enrico Bertini and David Sontag},\n  title     = {Visual Exploration of Temporal Data in Electronic Medical Records},\n booktitle = {Proceedings of the American Medical Informatics Association (AMIA) Annual Symposium (Abstract)},\n pages = {1538},\n year = {2015},\n keywords = {Health care},\n url_Paper = {http://people.csail.mit.edu/dsontag/papers/KrauseEtAl_PatientViz_AMIA15_abstract.pdf},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Predicting chronic comorbid conditions of type 2 diabetes in Newly-Diagnosed Diabetic Patients.\n \n \n \n \n\n\n \n Razavian, N.; Smith-McLallen, A.; Nigam, S.; Blecker, S.; Schmidt, A. M.; and Sontag, D.\n\n\n \n\n\n\n Value in Health (Abstract), 18(3): A53. 2015.\n \n\n\n\n
\n\n\n\n \n \n \"Predicting paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{RazavianEtAl_ispor15,\n\tAuthor = {Razavian, N. and Smith-McLallen, A. and Nigam, S. and Blecker, S. and Schmidt, A. M. and Sontag, D.},\n\tJournal = {Value in Health (Abstract)},\n\tyear = {2015},\n\tNumber = {3},\n\tPages = {A53},\n\tTitle = {Predicting chronic comorbid conditions of type 2 diabetes in Newly-Diagnosed Diabetic Patients},\n\tVolume = {18},\n        keywords = {Health care},\n        url_Paper = {https://www.ispor.org/awards/20Meet/prediction_posterID_PDB5.pdf},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Deep Kalman Filters.\n \n \n \n \n\n\n \n Krishnan, R. G.; Shalit, U.; and Sontag, D.\n\n\n \n\n\n\n In arXiv:1511.05121, 2015. \n \n\n\n\n
\n\n\n\n \n \n \"Deep paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{KriShaSon_arxiv15,\n author = {Rahul G. Krishnan and Uri Shalit and David Sontag},\n title = {Deep Kalman Filters},\n booktitle = {arXiv:1511.05121},\n year = {2015},\n keywords = {Machine learning, Unsupervised learning, Health care, Deep learning},\n url_Paper = {http://arxiv.org/pdf/1511.05121.pdf},\n abstract = {Kalman Filters are one of the most influential models of time-varying phenomena. They admit an intuitive probabilistic interpretation, have a simple functional form, and enjoy widespread adoption in a variety of disciplines. Motivated by recent variational methods for learning deep generative models, we introduce a unified algorithm to efficiently learn a broad spectrum of Kalman filters. Of particular interest is the use of temporal generative models for counterfactual inference. We investigate the efficacy of such models for counterfactual inference, and to that end we introduce the "Healing MNIST" dataset where long-term structure, noise and actions are applied to sequences of digits. We show the efficacy of our method for modeling this dataset. We further show how our model can be used for counterfactual inference for patients, based on electronic health record data of 8,000 patients over 4.5 years.}\n}\n\n
\n
\n\n\n
\n Kalman Filters are one of the most influential models of time-varying phenomena. They admit an intuitive probabilistic interpretation, have a simple functional form, and enjoy widespread adoption in a variety of disciplines. Motivated by recent variational methods for learning deep generative models, we introduce a unified algorithm to efficiently learn a broad spectrum of Kalman filters. Of particular interest is the use of temporal generative models for counterfactual inference. We investigate the efficacy of such models for counterfactual inference, and to that end we introduce the \"Healing MNIST\" dataset where long-term structure, noise and actions are applied to sequences of digits. We show the efficacy of our method for modeling this dataset. We further show how our model can be used for counterfactual inference for patients, based on electronic health record data of 8,000 patients over 4.5 years.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Anchored Discrete Factor Analysis.\n \n \n \n \n\n\n \n Halpern, Y.; Horng, S.; and Sontag, D.\n\n\n \n\n\n\n In arXiv:1511.03299, 2015. \n \n\n\n\n
\n\n\n\n \n \n \"Anchored paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{HalpernEtAl_arxiv15,\n author = {Yoni Halpern and Steven Horng and David Sontag},\n title = {Anchored Discrete Factor Analysis},\n booktitle = {arXiv:1511.03299},\n year = {2015},\n keywords = {Machine learning, Unsupervised learning, Health care},\n url_Paper = {http://arxiv.org/pdf/1511.03299.pdf},\n abstract = {We present a semi-supervised learning algorithm for learning discrete factor analysis models with arbitrary structure on the latent variables. Our algorithm assumes that every latent variable has an "anchor", an observed variable with only that latent variable as its parent. Given such anchors, we show that it is possible to consistently recover moments of the latent variables and use these moments to learn complete models. We also introduce a new technique for improving the robustness of method-of-moment algorithms by optimizing over the marginal polytope or its relaxations. We evaluate our algorithm using two real-world tasks, tag prediction on questions from the Stack Overflow website and medical diagnosis in an emergency department.}\n}\n\n
\n
\n\n\n
\n We present a semi-supervised learning algorithm for learning discrete factor analysis models with arbitrary structure on the latent variables. Our algorithm assumes that every latent variable has an \"anchor\", an observed variable with only that latent variable as its parent. Given such anchors, we show that it is possible to consistently recover moments of the latent variables and use these moments to learn complete models. We also introduce a new technique for improving the robustness of method-of-moment algorithms by optimizing over the marginal polytope or its relaxations. We evaluate our algorithm using two real-world tasks, tag prediction on questions from the Stack Overflow website and medical diagnosis in an emergency department.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Temporal Convolutional Neural Networks for Diagnosis from Lab Tests.\n \n \n \n \n\n\n \n Razavian, N.; and Sontag, D.\n\n\n \n\n\n\n In arXiv:1511.07938, 2015. \n \n\n\n\n
\n\n\n\n \n \n \"Temporal paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{RazavianSontag_arxiv15,\n author = {Narges Razavian and David Sontag},\n title = {Temporal Convolutional Neural Networks for Diagnosis from Lab Tests},\n booktitle = {arXiv:1511.07938},\n year = {2015},\n keywords = {Health care, Machine learning, Deep learning},\n url_Paper = {http://arxiv.org/pdf/1511.07938.pdf},\n abstract = {Early diagnosis of treatable diseases is essential for improving healthcare, and many diseases’ onsets are predictable from annual lab tests and their temporal trends. We introduce a multi-resolution convolutional neural network for early detection of multiple diseases from irregularly measured sparse lab values. Our novel architecture takes as input both an imputed version of the data and a binary observation matrix. For imputing the temporal sparse observations, we develop a flexible, fast to train method for differentiable multivariate kernel regression. Our experiments on data from 298K individuals over 8 years, 18 common lab measurements, and 171 diseases show that the temporal signatures learned via convolution are significantly more predictive than baselines commonly used for early disease diagnosis.}\n}\n\n
\n
\n\n\n
\n Early diagnosis of treatable diseases is essential for improving healthcare, and many diseases’ onsets are predictable from annual lab tests and their temporal trends. We introduce a multi-resolution convolutional neural network for early detection of multiple diseases from irregularly measured sparse lab values. Our novel architecture takes as input both an imputed version of the data and a binary observation matrix. For imputing the temporal sparse observations, we develop a flexible, fast to train method for differentiable multivariate kernel regression. Our experiments on data from 298K individuals over 8 years, 18 common lab measurements, and 171 diseases show that the temporal signatures learned via convolution are significantly more predictive than baselines commonly used for early disease diagnosis.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2014\n \n \n (2)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Using Anchors to Estimate Clinical State without Labeled Data.\n \n \n \n \n\n\n \n Halpern, Y.; Choi, Y.; Horng, S.; and Sontag, D.\n\n\n \n\n\n\n In Proceedings of the American Medical Informatics Association (AMIA) Annual Symposium, pages 606–615, 2014. \n \n\n\n\n
\n\n\n\n \n \n \"Using paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{HalpernEtAl_amia14,\n author = {Yoni Halpern and Youngduck Choi and Steven Horng and David Sontag},\n title = {Using Anchors to Estimate Clinical State without Labeled Data},\n booktitle = {Proceedings of the American Medical Informatics Association (AMIA) Annual Symposium},\n pages = {606--615},\n year = {2014},\n keywords = {Health care, Machine learning, Unsupervised learning},\n url_Paper = {http://people.csail.mit.edu/dsontag/papers/HalpernEtAl_amia14.pdf},\n abstract = {We present a novel framework for learning to estimate and predict clinical state variables without labeled data. The resulting models can used for electronic phenotyping, triggering clinical decision support, and cohort selection. The framework relies on key observations which we characterize and term "anchor variables". By specifying anchor variables, an expert encodes a certain amount of domain knowledge about the problem while the rest of learning proceeds in an unsupervised manner. The ability to build anchors upon standardized ontologies and the framework's ability to learn from unlabeled data promote generalizability across institutions. We additionally develop a user interface to enable experts to choose anchor variables in an informed manner. The framework is applied to electronic medical record-based phenotyping to enable real-time decision support in the emergency department. We validate the learned models using a prospectively gathered set of gold-standard responses from emergency physicians for nine clinically relevant variables.}\n}\n\n
\n
\n\n\n
\n We present a novel framework for learning to estimate and predict clinical state variables without labeled data. The resulting models can used for electronic phenotyping, triggering clinical decision support, and cohort selection. The framework relies on key observations which we characterize and term \"anchor variables\". By specifying anchor variables, an expert encodes a certain amount of domain knowledge about the problem while the rest of learning proceeds in an unsupervised manner. The ability to build anchors upon standardized ontologies and the framework's ability to learn from unlabeled data promote generalizability across institutions. We additionally develop a user interface to enable experts to choose anchor variables in an informed manner. The framework is applied to electronic medical record-based phenotyping to enable real-time decision support in the emergency department. We validate the learned models using a prospectively gathered set of gold-standard responses from emergency physicians for nine clinically relevant variables.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Unsupervised Learning of Disease Progression Models.\n \n \n \n \n\n\n \n Wang, X.; Sontag, D.; and Wang, F.\n\n\n \n\n\n\n In Proceedings of the 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, of KDD '14, pages 85–94, New York, NY, USA, 2014. ACM\n \n\n\n\n
\n\n\n\n \n \n \"Unsupervised paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{WanSonWan_kdd14,\n author = {Xiang Wang and David Sontag and Fei Wang},\n title = {Unsupervised Learning of Disease Progression Models},\n booktitle = {Proceedings of the 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},\n series = {KDD '14},\n isbn = {978-1-4503-2956-9},\n pages = {85--94},\n numpages = {10},\n publisher = {ACM},\n address = {New York, NY, USA},\n keywords = {Health care, Unsupervised learning},\n year = {2014},\n url_Paper = {http://people.csail.mit.edu/dsontag/papers/WanSonWan_kdd14.pdf},\n abstract = {Chronic diseases, such as Alzheimer's Disease, Diabetes, and Chronic Obstructive Pulmonary Disease, usually progress slowly over a long period of time, causing increasing burden to the patients, their families, and the healthcare system. A better understanding of their progression is instrumental in early diagnosis and personalized care. Modeling disease progression based on real-world evidence is a very challenging task due to the incompleteness and irregularity of the observations, as well as the heterogeneity of the patient conditions. In this paper, we propose a probabilistic disease progression model that address these challenges. As compared to existing disease progression models, the advantage of our model is three-fold: 1) it learns a continuous-time progression model from discrete-time observations with non-equal intervals; 2) it learns the full progression trajectory from a set of incomplete records that only cover short segments of the progression; 3) it learns a compact set of medical concepts as the bridge between the hidden progression process and the observed medical evidence, which are usually extremely sparse and noisy. We demonstrate the capabilities of our model by applying it to a real-world COPD patient cohort and deriving some interesting clinical insights.}\n}\n\n
\n
\n\n\n
\n Chronic diseases, such as Alzheimer's Disease, Diabetes, and Chronic Obstructive Pulmonary Disease, usually progress slowly over a long period of time, causing increasing burden to the patients, their families, and the healthcare system. A better understanding of their progression is instrumental in early diagnosis and personalized care. Modeling disease progression based on real-world evidence is a very challenging task due to the incompleteness and irregularity of the observations, as well as the heterogeneity of the patient conditions. In this paper, we propose a probabilistic disease progression model that address these challenges. As compared to existing disease progression models, the advantage of our model is three-fold: 1) it learns a continuous-time progression model from discrete-time observations with non-equal intervals; 2) it learns the full progression trajectory from a set of incomplete records that only cover short segments of the progression; 3) it learns a compact set of medical concepts as the bridge between the hidden progression process and the observed medical evidence, which are usually extremely sparse and noisy. We demonstrate the capabilities of our model by applying it to a real-world COPD patient cohort and deriving some interesting clinical insights.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2013\n \n \n (3)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Discovering Hidden Variables in Noisy-Or Networks using Quartet Tests.\n \n \n \n \n\n\n \n Jernite, Y.; Halpern, Y.; and Sontag, D.\n\n\n \n\n\n\n In Advances in Neural Information Processing Systems 26, pages 2355–2363. MIT Press, 2013.\n \n\n\n\n
\n\n\n\n \n \n paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@incollection{JerHalSon_nips13,\n author = {Yacine Jernite and Yoni Halpern and David Sontag},\n title = {Discovering Hidden Variables in Noisy-Or Networks using Quartet Tests},\n booktitle = {Advances in Neural Information Processing Systems 26},\n pages = {2355--2363},\n publisher = {MIT Press},\n year = {2013},\n keywords = {Machine learning, Unsupervised learning, Health care},\n url_Paper = {http://people.csail.mit.edu/dsontag/papers/JerHalSon_nips13.pdf},\n abstract = {We give a polynomial-time algorithm for provably learning the structure and parameters of bipartite noisy-or Bayesian networks of binary variables where the top layer is completely hidden. Unsupervised learning of these models is a form of discrete factor analysis, enabling the discovery of hidden variables and their causal relationships with observed data. We obtain an efficient learning algorithm for a family of Bayesian networks that we call quartet-learnable. For each latent variable, the existence of a singly-coupled quartet allows us to uniquely identify and learn all parameters involving that latent variable. We give a proof of the polynomial sample complexity of our learning algorithm, and experimentally compare it to variational EM.}\n}\n\n
\n
\n\n\n
\n We give a polynomial-time algorithm for provably learning the structure and parameters of bipartite noisy-or Bayesian networks of binary variables where the top layer is completely hidden. Unsupervised learning of these models is a form of discrete factor analysis, enabling the discovery of hidden variables and their causal relationships with observed data. We obtain an efficient learning algorithm for a family of Bayesian networks that we call quartet-learnable. For each latent variable, the existence of a singly-coupled quartet allows us to uniquely identify and learn all parameters involving that latent variable. We give a proof of the polynomial sample complexity of our learning algorithm, and experimentally compare it to variational EM.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Predicting Chief Complaints at Triage Time in the Emergency Department.\n \n \n \n \n\n\n \n Jernite, Y.; Halpern, Y.; Horng, S.; and Sontag, D.\n\n\n \n\n\n\n NIPS Workshop on Machine Learning for Clinical Data Analysis and Healthcare. 2013.\n \n\n\n\n
\n\n\n\n \n \n \"Predicting paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{JerniteEtAl_nips13health,\n  author = {Yacine Jernite and Yoni Halpern and Steven Horng and David Sontag},\n  title = {Predicting Chief Complaints at Triage Time in the Emergency Department},\n  journal={NIPS Workshop on Machine Learning for Clinical Data Analysis and Healthcare},\n  year={2013},\n  keywords = {Health care},\n  url_Paper = {http://people.csail.mit.edu/dsontag/papers/JerniteEtAl_nips13health.pdf},\n  abstract = {As hospitals increasingly use electronic medical records for research and quality improvement, it is important to provide ways to structure medical data without losing either expressiveness or time. We present a system that helps achieve this goal by building an extended ontology of chief complaints and automatically predicting a patient's chief complaint, based on their vitals and the nurses' description of their state at arrival.}\n}\n\n
\n
\n\n\n
\n As hospitals increasingly use electronic medical records for research and quality improvement, it is important to provide ways to structure medical data without losing either expressiveness or time. We present a system that helps achieve this goal by building an extended ontology of chief complaints and automatically predicting a patient's chief complaint, based on their vitals and the nurses' description of their state at arrival.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Unsupervised Learning of Noisy-Or Bayesian Networks.\n \n \n \n \n\n\n \n Halpern, Y.; and Sontag, D.\n\n\n \n\n\n\n In Proceedings of the Twenty-Ninth Conference on Uncertainty in Artificial Intelligence (UAI-13), pages 272–281, Corvallis, Oregon, 2013. AUAI Press\n \n\n\n\n
\n\n\n\n \n \n \"Unsupervised paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{HalpernSontag_uai13,\n author = {Yoni Halpern and David Sontag},\n title = {Unsupervised Learning of Noisy-Or Bayesian Networks},\n booktitle = {Proceedings of the Twenty-Ninth Conference on Uncertainty in Artificial Intelligence ({UAI}-13)},\n publisher = {AUAI Press},\n address = {Corvallis, Oregon},\n pages = {272--281},\n year = {2013},\n keywords = {Machine learning, Unsupervised learning, Health care},\n url_Paper = {http://people.csail.mit.edu/dsontag/papers/HalpernSontag_uai13.pdf},\n abstract = {This paper considers the problem of learning the parameters in Bayesian networks of discrete variables with known structure and hidden variables. Previous approaches in these settings typically use expectation maximization; when the network has high treewidth, the required expectations might be approximated using Monte Carlo or variational methods. We show how to avoid inference altogether during learning by giving a polynomial-time algorithm based on the method-of-moments, building upon recent work on learning discrete-valued mixture models. In particular, we show how to learn the parameters for a family of bipartite noisy-or Bayesian networks. In our experimental results, we demonstrate an application of our algorithm to learning QMR-DT, a large Bayesian network used for medical diagnosis. We show that it is possible to fully learn the parameters of QMR-DT even when only the findings are observed in the training data (ground truth diseases unknown).}\n}\n\n
\n
\n\n\n
\n This paper considers the problem of learning the parameters in Bayesian networks of discrete variables with known structure and hidden variables. Previous approaches in these settings typically use expectation maximization; when the network has high treewidth, the required expectations might be approximated using Monte Carlo or variational methods. We show how to avoid inference altogether during learning by giving a polynomial-time algorithm based on the method-of-moments, building upon recent work on learning discrete-valued mixture models. In particular, we show how to learn the parameters for a family of bipartite noisy-or Bayesian networks. In our experimental results, we demonstrate an application of our algorithm to learning QMR-DT, a large Bayesian network used for medical diagnosis. We show that it is possible to fully learn the parameters of QMR-DT even when only the findings are observed in the training data (ground truth diseases unknown).\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2012\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n A Comparison of Dimensionality Reduction Techniques for Unstructured Clinical Text.\n \n \n \n \n\n\n \n Halpern, Y.; Horng, S.; Nathanson, L. A.; Shapiro, N. I.; and Sontag, D.\n\n\n \n\n\n\n ICML 2012 Workshop on Clinical Data Analysis. 2012.\n \n\n\n\n
\n\n\n\n \n \n \"A paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{HalpernEtAl_ICML_clinical_workshop12,\n  author = {Yoni Halpern and Steven Horng and Larry A. Nathanson and Nathan I. Shapiro and David Sontag},\n  title = {A Comparison of Dimensionality Reduction Techniques for Unstructured Clinical Text},\n  journal={ICML 2012 Workshop on Clinical Data Analysis},\n  year={2012},\n  keywords = {Health care},\n  url_Paper = {http://people.csail.mit.edu/dsontag/papers/HalpernEtAl_icml12_workshop.pdf},\n  abstract = {Much of clinical data is free text, which is challenging to use together with machine learning, visualization tools, and clinical decision rules. In this paper, we compare supervised and unsupervised dimensionality reduction techniques, including the recently proposed sLDA and MedLDA algorithms, on clinical texts. We evaluate each dimensionality reduction method by using them as features for two important prediction problems that arise in emergency departments: predicting whether a patient has an infection, which can progress to sepsis, and predicting the likelihood of a patient being admitted to the Intensive Care Unit (used for risk stratification). We find that, on this data, existing supervised dimensionality reduction techniques perform better than unsupervise techniques only for very low dimensional representations.}\n}\n\n
\n
\n\n\n
\n Much of clinical data is free text, which is challenging to use together with machine learning, visualization tools, and clinical decision rules. In this paper, we compare supervised and unsupervised dimensionality reduction techniques, including the recently proposed sLDA and MedLDA algorithms, on clinical texts. We evaluate each dimensionality reduction method by using them as features for two important prediction problems that arise in emergency departments: predicting whether a patient has an infection, which can progress to sepsis, and predicting the likelihood of a patient being admitted to the Intensive Care Unit (used for risk stratification). We find that, on this data, existing supervised dimensionality reduction techniques perform better than unsupervise techniques only for very low dimensional representations.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2007\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Probabilistic Modeling of Systematic Errors in Two-Hybrid Experiments.\n \n \n \n \n\n\n \n Sontag, D.; Singh, R.; and Berger, B.\n\n\n \n\n\n\n In Pacific Symposium on Biocomputing, volume 12, pages 445-457, 2007. \n \n\n\n\n
\n\n\n\n \n \n \"Probabilistic paper\n  \n \n \n \"Probabilistic link\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{SonSinBer_psb07,\n title  = {Probabilistic Modeling of Systematic Errors in Two-Hybrid Experiments},\n author = {David Sontag and Rohit Singh and Bonnie Berger},\n booktitle = {Pacific Symposium on Biocomputing},\n volume  = {12},\n year   = {2007},\n pages  = {445-457},\n keywords = {Computational biology, Health care},\n url_Paper = {http://psb.stanford.edu/psb-online/proceedings/psb07/sontag.pdf},\n url_Link = {http://groups.csail.mit.edu/cb/probmod2H/},\n abstract = {We describe a novel probabilistic approach to estimating errors in two-hybrid (2H) experiments. Such experiments are frequently used to elucidate protein-protein interaction networks in a high-throughput fashion; however, a significant challenge with these is their relatively high error rate, specifically, a high false-positive rate. We describe a comprehensive error model for 2H data, accounting for both random\nand systematic errors. The latter arise from limitations of the 2H experimental protocol: in theory, the reporting mechanism of a 2H experiment should be activated if and only if the two proteins being tested truly interact; in practice, even in the absence of a true interaction, it may be activated by some proteins -- either by themselves or through promiscuous interaction with other proteins. We describe a probabilistic relational model that explicitly models the above phenomenon and use Markov Chain Monte Carlo (MCMC) algorithms to compute both the probability of an observed 2H interaction being true as well as the probability of individual proteins being self-activating/promiscuous. This is the first approach that explicitly models systematic errors in protein-protein interaction data; in contrast, previous work on this topic has modeled errors as being independent and random. By explicitly modeling the sources of noise in 2H systems, we find that we are better able to make use of the available experimental data. In comparison with Bader et al.’s method for estimating confidence in 2H predicted interactions, the proposed method performed 5-10\\% better overall, and in particular regimes improved prediction accuracy by as much as 76\\%.}\n}\n\n
\n
\n\n\n
\n We describe a novel probabilistic approach to estimating errors in two-hybrid (2H) experiments. Such experiments are frequently used to elucidate protein-protein interaction networks in a high-throughput fashion; however, a significant challenge with these is their relatively high error rate, specifically, a high false-positive rate. We describe a comprehensive error model for 2H data, accounting for both random and systematic errors. The latter arise from limitations of the 2H experimental protocol: in theory, the reporting mechanism of a 2H experiment should be activated if and only if the two proteins being tested truly interact; in practice, even in the absence of a true interaction, it may be activated by some proteins – either by themselves or through promiscuous interaction with other proteins. We describe a probabilistic relational model that explicitly models the above phenomenon and use Markov Chain Monte Carlo (MCMC) algorithms to compute both the probability of an observed 2H interaction being true as well as the probability of individual proteins being self-activating/promiscuous. This is the first approach that explicitly models systematic errors in protein-protein interaction data; in contrast, previous work on this topic has modeled errors as being independent and random. By explicitly modeling the sources of noise in 2H systems, we find that we are better able to make use of the available experimental data. In comparison with Bader et al.’s method for estimating confidence in 2H predicted interactions, the proposed method performed 5-10% better overall, and in particular regimes improved prediction accuracy by as much as 76%.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n\n\n\n
\n\n\n \n\n \n \n \n \n\n
\n"}; document.write(bibbase_data.data);