var bibbase_data = {"data":"\"Loading..\"\n\n
\n\n \n\n \n\n \n \n\n \n\n \n \n\n \n\n \n
\n generated by\n \n \"bibbase.org\"\n\n \n
\n \n\n
\n\n \n\n\n
\n\n Excellent! Next you can\n create a new website with this list, or\n embed it in an existing web page by copying & pasting\n any of the following snippets.\n\n
\n JavaScript\n (easiest)\n
\n \n <script src=\"https://bibbase.org/show?bib=http://web.mit.edu/~blengeri/www/lengerich_website.bib&jsonp=1&jsonp=1\"></script>\n \n
\n\n PHP\n
\n \n <?php\n $contents = file_get_contents(\"https://bibbase.org/show?bib=http://web.mit.edu/~blengeri/www/lengerich_website.bib&jsonp=1\");\n print_r($contents);\n ?>\n \n
\n\n iFrame\n (not recommended)\n
\n \n <iframe src=\"https://bibbase.org/show?bib=http://web.mit.edu/~blengeri/www/lengerich_website.bib&jsonp=1\"></iframe>\n \n
\n\n

\n For more details see the documention.\n

\n
\n
\n\n
\n\n This is a preview! To use this list on your own web site\n or create a new web site from it,\n create a free account. The file will be added\n and you will be able to edit it in the File Manager.\n We will show you instructions once you've created your account.\n
\n\n
\n\n

To the site owner:

\n\n

Action required! Mendeley is changing its\n API. In order to keep using Mendeley with BibBase past April\n 14th, you need to:\n

    \n
  1. renew the authorization for BibBase on Mendeley, and
  2. \n
  3. update the BibBase URL\n in your page the same way you did when you initially set up\n this page.\n
  4. \n
\n

\n\n

\n \n \n Fix it now\n

\n
\n\n
\n\n\n
\n \n \n
\n
\n  \n 2023\n \n \n (6)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n LLMs Understand Glass-Box Models, Discover Surprises, and Suggest Repairs.\n \n \n \n \n\n\n \n Lengerich, B. J.; Bordt, S.; Nori, H.; Nunnally, M. E.; Aphinyanaphongs, Y.; Kellis, M.; and Caruana, R.\n\n\n \n\n\n\n . 2023.\n \n\n\n\n
\n\n\n\n \n \n \"LLMs preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 4 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{lengerich2023llms,\n    author = {Lengerich, Benjamin J. and Bordt, Sebastian and Nori, Harsha and Nunnally, Mark E. and Aphinyanaphongs, Yin and Kellis, Manolis and Caruana, Rich},\n    title = {LLMs Understand Glass-Box Models, Discover Surprises, and Suggest Repairs},\n    informal_venue = {arXiv},\n    journal = {},\n    year = {2023},\n    url_preprint={https://arxiv.org/abs/2308.01157},\n    abstract={We show that large language models (LLMs) are remarkably good at working with interpretable models that decompose complex outcomes into univariate graph-represented components. By adopting a hierarchical approach to reasoning, LLMs can provide comprehensive model-level summaries without ever requiring the entire model to fit in context. This approach enables LLMs to apply their extensive background knowledge to automate common tasks in data science such as detecting anomalies that contradict prior knowledge, describing potential reasons for the anomalies, and suggesting repairs that would remove the anomalies. We use multiple examples in healthcare to demonstrate the utility of these new capabilities of LLMs, with particular emphasis on Generalized Additive Models (GAMs). Finally, we present the package TalkToEBM as an open-source LLM-GAM interface.},\n    keywords={LLMs}\n}\n\n
\n
\n\n\n
\n We show that large language models (LLMs) are remarkably good at working with interpretable models that decompose complex outcomes into univariate graph-represented components. By adopting a hierarchical approach to reasoning, LLMs can provide comprehensive model-level summaries without ever requiring the entire model to fit in context. This approach enables LLMs to apply their extensive background knowledge to automate common tasks in data science such as detecting anomalies that contradict prior knowledge, describing potential reasons for the anomalies, and suggesting repairs that would remove the anomalies. We use multiple examples in healthcare to demonstrate the utility of these new capabilities of LLMs, with particular emphasis on Generalized Additive Models (GAMs). Finally, we present the package TalkToEBM as an open-source LLM-GAM interface.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Contextualized Policy Recovery: Modeling and Interpreting Medical Decisions with Adaptive Imitation Learning.\n \n \n \n \n\n\n \n Deuschel, J.; Ellington, C.; Lengerich, B.; Luo, Y.; Friederich, P.; and Xing, E.\n\n\n \n\n\n\n . 2023.\n \n\n\n\n
\n\n\n\n \n \n \"Contextualized preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{deuschel2023contextualized,\n    title={Contextualized Policy Recovery: Modeling and Interpreting Medical Decisions with Adaptive Imitation Learning},\n    author={Deuschel, Jannik and Ellington, Caleb and Lengerich, Ben and Luo, Yingtao and Friederich, Pascal and Xing, Eric},\n    informal_venue = {arXiv},\n    year={2023},\n    url_preprint={https://arxiv.org/abs/2310.07918},\n    abstract={Interpretable policy learning seeks to estimate intelligible decision policies from observed actions; however, existing models fall short by forcing a tradeoff between accuracy and interpretability. This tradeoff limits data-driven interpretations of human decision-making process. e.g. to audit medical decisions for biases and suboptimal practices, we require models of decision processes which provide concise descriptions of complex behaviors. Fundamentally, existing approaches are burdened by this tradeoff because they represent the underlying decision process as a universal policy, when in fact human decisions are dynamic and can change drastically with contextual information. Thus, we propose Contextualized Policy Recovery (CPR), which re-frames the problem of modeling complex decision processes as a multi-task learning problem in which complex decision policies are comprised of context-specific policies. CPR models each context-specific policy as a linear observation-to-action mapping, and generates new decision models on-demand as contexts are updated with new observations. CPR is compatible with fully offline and partially observable decision environments, and can be tailored to incorporate any recurrent black-box model or interpretable decision model. We assess CPR through studies on simulated and real data, achieving state-of-the-art performance on the canonical tasks of predicting antibiotic prescription in intensive care units (+22% AUROC vs. previous SOTA) and predicting MRI prescription for Alzheimer's patients (+7.7% AUROC vs. previous SOTA). With this improvement in predictive performance, CPR closes the accuracy gap between interpretable and black-box methods for policy learning, allowing high-resolution exploration and analysis of context-specific decision models.},\n    keywords={Contextualized, Healthcare, Policy Learning}\n}\n\n\n
\n
\n\n\n
\n Interpretable policy learning seeks to estimate intelligible decision policies from observed actions; however, existing models fall short by forcing a tradeoff between accuracy and interpretability. This tradeoff limits data-driven interpretations of human decision-making process. e.g. to audit medical decisions for biases and suboptimal practices, we require models of decision processes which provide concise descriptions of complex behaviors. Fundamentally, existing approaches are burdened by this tradeoff because they represent the underlying decision process as a universal policy, when in fact human decisions are dynamic and can change drastically with contextual information. Thus, we propose Contextualized Policy Recovery (CPR), which re-frames the problem of modeling complex decision processes as a multi-task learning problem in which complex decision policies are comprised of context-specific policies. CPR models each context-specific policy as a linear observation-to-action mapping, and generates new decision models on-demand as contexts are updated with new observations. CPR is compatible with fully offline and partially observable decision environments, and can be tailored to incorporate any recurrent black-box model or interpretable decision model. We assess CPR through studies on simulated and real data, achieving state-of-the-art performance on the canonical tasks of predicting antibiotic prescription in intensive care units (+22% AUROC vs. previous SOTA) and predicting MRI prescription for Alzheimer's patients (+7.7% AUROC vs. previous SOTA). With this improvement in predictive performance, CPR closes the accuracy gap between interpretable and black-box methods for policy learning, allowing high-resolution exploration and analysis of context-specific decision models.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Contextualized Machine Learning.\n \n \n \n \n\n\n \n Lengerich, B.; Ellington, C.; Rubbi, A.; Kellis, M.; and Xing, E.\n\n\n \n\n\n\n . 2023.\n \n\n\n\n
\n\n\n\n \n \n \"Contextualized preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{lengerich2023contextualized,\n    title={Contextualized Machine Learning},\n    author={Lengerich, Ben and Ellington, Caleb and Rubbi, Andrea and Kellis, Manolis and Xing, Eric},\n    informal_venue = {arXiv},\n    year={2023},\n    url_preprint={https://arxiv.org/abs/2310.11340},\n    abstract={We examine Contextualized Machine Learning (ML), a paradigm for learning heterogeneous and context-dependent effects. Contextualized ML estimates heterogeneous functions by applying deep learning to the meta-relationship between contextual information and context-specific parametric models. This is a form of varying-coefficient modeling that unifies existing frameworks including cluster analysis and cohort modeling by introducing two reusable concepts: a context encoder which translates sample context into model parameters, and sample-specific model which operates on sample predictors. We review the process of developing contextualized models, nonparametric inference from contextualized models, and identifiability conditions of contextualized models. Finally, we present the open-source PyTorch package ContextualizedML.},\n    keywords={Contextualized}\n}\n\n
\n
\n\n\n
\n We examine Contextualized Machine Learning (ML), a paradigm for learning heterogeneous and context-dependent effects. Contextualized ML estimates heterogeneous functions by applying deep learning to the meta-relationship between contextual information and context-specific parametric models. This is a form of varying-coefficient modeling that unifies existing frameworks including cluster analysis and cohort modeling by introducing two reusable concepts: a context encoder which translates sample context into model parameters, and sample-specific model which operates on sample predictors. We review the process of developing contextualized models, nonparametric inference from contextualized models, and identifiability conditions of contextualized models. Finally, we present the open-source PyTorch package ContextualizedML.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Recent Advances, Applications and Open Challenges in Machine Learning for Health: Reflections from Research Roundtables at ML4H 2022 Symposium.\n \n \n \n \n\n\n \n Hegselmann, S.; Zhou, H.; Zhou, Y.; Chien, J.; Nagaraj, S.; Hulkund, N.; Bhave, S.; Oberst, M.; Pai, A.; Ellington, C.; Ikezogwo, W.; Dou, J. X.; Agrawal, M.; Li, C.; Argaw, P.; Biswas, A.; Gupta, M.; Li, X.; Lemanczyk, M.; Zhang, Y.; Garbin, C.; Healey, E.; Kim, H.; Boone, C.; Daneshjou, R.; Shi, S.; Pezzotti, N.; Pfohl, S. R.; Fong, E.; Naik, A.; Lengerich, B.; Xu, Y.; Bidwell, J.; Sendak, M.; Kim, B.; Hendrix, N.; Spathis, D.; Seita, J.; Quast, B.; Coffee, M.; Stultz, C.; Chen, I. Y.; Joshi, S.; and Tadesse, G. A.\n\n\n \n\n\n\n . May 2023.\n \n\n\n\n
\n\n\n\n \n \n \"RecentPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{stefan_hegselmann_2023_7951122,\n  title        = {{Recent Advances, Applications and Open Challenges\n                   in Machine Learning for Health: Reflections from\n                   Research Roundtables at ML4H 2022 Symposium}},\n    author       = {Stefan Hegselmann and\n                  Helen Zhou and\n                  Yuyin Zhou and\n                  Jennifer Chien and\n                  Sujay Nagaraj and\n                  Neha Hulkund and\n                  Shreyas Bhave and\n                  Michael Oberst and\n                  Amruta Pai and\n                  Caleb Ellington and\n                  Wisdom Ikezogwo and\n                  Jason Xiaotian Dou and\n                  Monica Agrawal and\n                  Changye Li and\n                  Peniel Argaw and\n                  Arpita Biswas and\n                  Mehak Gupta and\n                  Xinhui Li and\n                  Marta Lemanczyk and\n                  Yuhui Zhang and\n                  Christian Garbin and\n                  Elizabeth Healey and\n                  Heejong Kim and\n                  Claire Boone and\n                  Roxana Daneshjou and\n                  Siyu Shi and\n                  Nicola Pezzotti and\n                  Stephen R. Pfohl and\n                  Edwin Fong and\n                  Aakanksha Naik and\n                  Ben Lengerich and\n                  Ying Xu and\n                  Jonathan Bidwell and\n                  Mark Sendak and\n                  Byung-Hak Kim and\n                  Nathaniel Hendrix and\n                  Dimitris Spathis and\n                  Jun Seita and\n                  Bastiaan Quast and\n                  Megan Coffee and\n                  Collin Stultz and\n                  Irene Y. Chen and\n                  Shalmali Joshi and\n                  Girmaw Abebe Tadesse},\n  month        = May,\n  year         = 2023,\n  publisher    = {Zenodo},\n  doi          = {10.5281/zenodo.7951122},\n  url          = {https://doi.org/10.5281/zenodo.7951122},\n  keywords     = {Healthcare},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Interpretable Predictive Models to Understand Risk Factors for Maternal and Fetal Outcomes.\n \n \n \n \n\n\n \n Bosschieter, T. M.; Xu, Z.; Lan, H.; Lengerich, B. J.; Nori, H.; Painter, I.; Souter, V.; and Caruana, R.\n\n\n \n\n\n\n Journal of Healthcare Informatics Research. 2023.\n \n\n\n\n
\n\n\n\n \n \n \"Interpretable paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@article{bosschieter2023interpretable,\n    title={Interpretable Predictive Models to Understand Risk Factors for Maternal and Fetal Outcomes},\n    author={Bosschieter, Tomas M. and Xu, Zifei and Lan, Hui and Lengerich, Benjamin J. and Nori, Harsha and Painter, Ian and Souter, Vivienne and Caruana, Rich},\n    abstract={Although most pregnancies result in a good outcome, complications are not uncommon and can be associated with serious implications for mothers and babies. Predictive modeling has the potential to improve outcomes through a better understanding of risk factors, heightened surveillance for high-risk patients, and more timely and appropriate interventions, thereby helping obstetricians deliver better care. We identify and study the most important risk factors for four types of pregnancy complications: (i) severe maternal morbidity, (ii) shoulder dystocia, (iii) preterm preeclampsia, and (iv) antepartum stillbirth. We use an Explainable Boosting Machine (EBM), a high-accuracy glass-box learning method, for the prediction and identification of important risk factors. We undertake external validation and perform an extensive robustness analysis of the EBM models. EBMs match the accuracy of other black-box ML methods, such as deep neural networks and random forests, and outperform logistic regression, while being more interpretable. EBMs prove to be robust. The interpretability of the EBM models reveal surprising insights into the features contributing to risk (e.g., maternal height is the second most important feature for shoulder dystocia) and may have potential for clinical application in the prediction and prevention of serious complications in pregnancy.},\n    year={2023},\n    journal={Journal of Healthcare Informatics Research},\n    url_paper={https://link.springer.com/article/10.1007/s41666-023-00151-4},\n    informal_venue = {JHIR},\n    keywords={Healthcare, Pregnancy}\n}\n\n
\n
\n\n\n
\n Although most pregnancies result in a good outcome, complications are not uncommon and can be associated with serious implications for mothers and babies. Predictive modeling has the potential to improve outcomes through a better understanding of risk factors, heightened surveillance for high-risk patients, and more timely and appropriate interventions, thereby helping obstetricians deliver better care. We identify and study the most important risk factors for four types of pregnancy complications: (i) severe maternal morbidity, (ii) shoulder dystocia, (iii) preterm preeclampsia, and (iv) antepartum stillbirth. We use an Explainable Boosting Machine (EBM), a high-accuracy glass-box learning method, for the prediction and identification of important risk factors. We undertake external validation and perform an extensive robustness analysis of the EBM models. EBMs match the accuracy of other black-box ML methods, such as deep neural networks and random forests, and outperform logistic regression, while being more interpretable. EBMs prove to be robust. The interpretability of the EBM models reveal surprising insights into the features contributing to risk (e.g., maternal height is the second most important feature for shoulder dystocia) and may have potential for clinical application in the prediction and prevention of serious complications in pregnancy.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Data Science with LLMs and Interpretable Models.\n \n \n \n\n\n \n Bordt, S.; Lengerich, B.; Nori, H.; and Caruana, R.\n\n\n \n\n\n\n AAAI Explainable AI for Science. 2023.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@article{bordt2024data,\n    author = {Bordt, Sebastian and Lengerich, Ben and Nori, Harsha and Caruana, Rich},\n    title = {Data Science with LLMs and Interpretable Models},\n    journal = {AAAI Explainable AI for Science},\n    year = {2023},\n    informal_venue = {AAAI XAI4Sci},\n    abstract = {Recent years have seen important advances in the building of interpretable models, machine learning models that are designed to be easily understood by humans. In this work, we show that large language models (LLMs) are remarkably good at working with interpretable models, too. In particular, we show that LLMs can describe, interpret, and debug Generalized Additive Models (GAMs). Combining the flexibility of LLMs with the breadth of statistical patterns accurately described by GAMs enables dataset summarization, question answering, and model critique. LLMs can also improve the interaction between domain experts and interpretable models, and generate hypotheses about the underlying phenomenon. We release TalkToEBM as an open-source LLM-GAM interface.},\nkeywords = {Interpretable, LLMs}\n}\n\n
\n
\n\n\n
\n Recent years have seen important advances in the building of interpretable models, machine learning models that are designed to be easily understood by humans. In this work, we show that large language models (LLMs) are remarkably good at working with interpretable models, too. In particular, we show that LLMs can describe, interpret, and debug Generalized Additive Models (GAMs). Combining the flexibility of LLMs with the breadth of statistical patterns accurately described by GAMs enables dataset summarization, question answering, and model critique. LLMs can also improve the interaction between domain experts and interpretable models, and generate hypotheses about the underlying phenomenon. We release TalkToEBM as an open-source LLM-GAM interface.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2022\n \n \n (10)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Death by Round Numbers and Sharp Thresholds: Glass-Box Machine Learning Uncovers Biases in Medical Practice.\n \n \n \n \n\n\n \n Lengerich, B.; Caruana, R.; Nunnally, M. E.; and Kellis, M.\n\n\n \n\n\n\n . 2022.\n \n\n\n\n
\n\n\n\n \n \n \"Death preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{lengerich2022death,\n  title={Death by Round Numbers and Sharp Thresholds: Glass-Box Machine Learning Uncovers Biases in Medical Practice},\n  author={Lengerich, Benjamin and Caruana, Rich and Nunnally, Mark E. and Kellis, Manolis},\n  informal_venue = {medrxiv},\n    url_preprint={https://www.medrxiv.org/content/10.1101/2022.04.30.22274520v2},\n    abstract={Real-world evidence is confounded by treatments, so data-driven systems can learn to recapitulate biases that influenced treatment decisions. This confounding presents a challenge: uninterpretable black-box systems can put patients at risk by confusing treatment benefits with intrinsic risk, but also an opportunity: interpretable “glass-box” models can improve medical practice by highlighting unexpected patterns which suggest biases in medical practice. We propose a glass-box model that enables clinical experts to find unexpected changes in patient mortality risk. By applying this model to four datasets, we identify two characteristic types of biases: (1) discontinuities where sharp treatment thresholds produce step-function changes in risk near clinically-important round-number cutoffs, and (2) counter-causal paradoxes where aggressive treatment produces non-monotone risk curves that contradict underlying causal risk by lowering the risk of treated patients below that of healthier, but untreated, patients. While these effects are learned by all accurate models, they are only revealed by interpretable models. We show that because these effects are the result of clinical practice rather than statistical aberration, they are pervasive even in large, canonical datasets. Finally, we apply this method to uncover opportunities for improvements in clinical practice, including 8000 excess deaths per year in the US, where paradoxically, patients with moderately-elevated serum creatinine have higher mortality risk than patients with severely-elevated serum creatinine.},\n    keywords={Interpretable, Healthcare, Causal Inference},\n    year={2022}\n}\n\n
\n
\n\n\n
\n Real-world evidence is confounded by treatments, so data-driven systems can learn to recapitulate biases that influenced treatment decisions. This confounding presents a challenge: uninterpretable black-box systems can put patients at risk by confusing treatment benefits with intrinsic risk, but also an opportunity: interpretable “glass-box” models can improve medical practice by highlighting unexpected patterns which suggest biases in medical practice. We propose a glass-box model that enables clinical experts to find unexpected changes in patient mortality risk. By applying this model to four datasets, we identify two characteristic types of biases: (1) discontinuities where sharp treatment thresholds produce step-function changes in risk near clinically-important round-number cutoffs, and (2) counter-causal paradoxes where aggressive treatment produces non-monotone risk curves that contradict underlying causal risk by lowering the risk of treated patients below that of healthier, but untreated, patients. While these effects are learned by all accurate models, they are only revealed by interpretable models. We show that because these effects are the result of clinical practice rather than statistical aberration, they are pervasive even in large, canonical datasets. Finally, we apply this method to uncover opportunities for improvements in clinical practice, including 8000 excess deaths per year in the US, where paradoxically, patients with moderately-elevated serum creatinine have higher mortality risk than patients with severely-elevated serum creatinine.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n NOTMAD: Estimating Bayesian Networks with Sample-Specific Structures and Parameters.\n \n \n \n \n\n\n \n Lengerich, B.; Ellington, C.; Aragam, B.; Xing, E. P.; and Kellis, M.\n\n\n \n\n\n\n . 2022.\n \n\n\n\n
\n\n\n\n \n \n \"NOTMAD: preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@article{lengerich2022notmad,\n  title={NOTMAD: Estimating Bayesian Networks with Sample-Specific Structures and Parameters},\n  author={Lengerich, Benjamin and Ellington, Caleb and Aragam, Bryon and Xing, Eric P. and Kellis, Manolis},\n  informal_venue = {arXiv},\n  url_preprint={https://arxiv.org/abs/2111.01104},\n  abstract={Context-specific Bayesian networks (i.e. directed acyclic graphs, DAGs) identify context-dependent relationships between variables, but the non-convexity induced by the acyclicity requirement makes it difficult to share information between context-specific estimators (e.g. with graph generator functions). For this reason, existing methods for inferring context-specific Bayesian networks have favored breaking datasets into subsamples, limiting statistical power and resolution, and preventing the use of multidimensional and latent contexts. To overcome this challenge, we propose NOTEARS-optimized Mixtures of Archetypal DAGs (NOTMAD). NOTMAD models context-specific Bayesian networks as the output of a function which learns to mix archetypal networks according to sample context. The archetypal networks are estimated jointly with the context-specific networks and do not require any prior knowledge. We encode the acyclicity constraint as a smooth regularization loss which is back-propagated to the mixing function; in this way, NOTMAD shares information between context-specific acyclic graphs, enabling the estimation of Bayesian network structures and parameters at even single-sample resolution. We demonstrate the utility of NOTMAD and sample-specific network inference through analysis and experiments, including patient-specific gene expression networks which correspond to morphological variation in cancer.},\n  keywords={Bayesian Networks, Contextualized},\n  year={2022}\n}\n\n
\n
\n\n\n
\n Context-specific Bayesian networks (i.e. directed acyclic graphs, DAGs) identify context-dependent relationships between variables, but the non-convexity induced by the acyclicity requirement makes it difficult to share information between context-specific estimators (e.g. with graph generator functions). For this reason, existing methods for inferring context-specific Bayesian networks have favored breaking datasets into subsamples, limiting statistical power and resolution, and preventing the use of multidimensional and latent contexts. To overcome this challenge, we propose NOTEARS-optimized Mixtures of Archetypal DAGs (NOTMAD). NOTMAD models context-specific Bayesian networks as the output of a function which learns to mix archetypal networks according to sample context. The archetypal networks are estimated jointly with the context-specific networks and do not require any prior knowledge. We encode the acyclicity constraint as a smooth regularization loss which is back-propagated to the mixing function; in this way, NOTMAD shares information between context-specific acyclic graphs, enabling the estimation of Bayesian network structures and parameters at even single-sample resolution. We demonstrate the utility of NOTMAD and sample-specific network inference through analysis and experiments, including patient-specific gene expression networks which correspond to morphological variation in cancer.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Automated interpretable discovery of heterogeneous treatment effectiveness: A COVID-19 case study.\n \n \n \n \n\n\n \n Lengerich, B. J; Nunnally, M. E; Aphinyanaphongs, Y.; Ellington, C.; and Caruana, R.\n\n\n \n\n\n\n Journal of biomedical informatics, 130: 104086. 2022.\n \n\n\n\n
\n\n\n\n \n \n \"Automated paper\n  \n \n \n \"Automated preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{lengerich2022automated,\n  title={Automated interpretable discovery of heterogeneous treatment effectiveness: A COVID-19 case study},\n  author={Lengerich, Benjamin J and Nunnally, Mark E and Aphinyanaphongs, Yin and Ellington, Caleb and Caruana, Rich},\n  journal={Journal of biomedical informatics},\n  volume={130},\n  pages={104086},\n  year={2022},\n  publisher={Elsevier},\n  informal_venue = {JBI},\n  url_paper={https://www.sciencedirect.com/science/article/abs/pii/S1532046422001022},\n  url_preprint={https://www.medrxiv.org/content/10.1101/2021.10.30.21265430v1},\n  abstract={Testing multiple treatments for heterogeneous (varying) effectiveness with respect to many underlying risk factors requires many pairwise tests; we would like to instead automatically discover and visualize patient archetypes and predictors of treatment effectiveness using multitask machine learning. In this paper, we present a method to estimate these heterogeneous treatment effects with an interpretable hierarchical framework that uses additive models to visualize expected treatment benefits as a function of patient factors (identifying personalized treatment benefits) and concurrent treatments (identifying combinatorial treatment benefits). This method achieves state-of-the-art predictive power for COVID-19 in-hospital mortality and interpretable identification of heterogeneous treatment benefits. We first validate this method on the large public MIMIC-IV dataset of ICU patients to test recovery of heterogeneous treatment effects. Next we apply this method to a proprietary dataset of over 3000 patients hospitalized for COVID-19, and find evidence of heterogeneous treatment effectiveness predicted largely by indicators of inflammation and thrombosis risk: patients with few indicators of thrombosis risk benefit most from treatments against inflammation, while patients with few indicators of inflammation risk benefit most from treatments against thrombosis. This approach provides an automated methodology to discover heterogeneous and individualized effectiveness of treatments.},\n  keywords={Interpretable, Healthcare, COVID-19}\n}\n\n
\n
\n\n\n
\n Testing multiple treatments for heterogeneous (varying) effectiveness with respect to many underlying risk factors requires many pairwise tests; we would like to instead automatically discover and visualize patient archetypes and predictors of treatment effectiveness using multitask machine learning. In this paper, we present a method to estimate these heterogeneous treatment effects with an interpretable hierarchical framework that uses additive models to visualize expected treatment benefits as a function of patient factors (identifying personalized treatment benefits) and concurrent treatments (identifying combinatorial treatment benefits). This method achieves state-of-the-art predictive power for COVID-19 in-hospital mortality and interpretable identification of heterogeneous treatment benefits. We first validate this method on the large public MIMIC-IV dataset of ICU patients to test recovery of heterogeneous treatment effects. Next we apply this method to a proprietary dataset of over 3000 patients hospitalized for COVID-19, and find evidence of heterogeneous treatment effectiveness predicted largely by indicators of inflammation and thrombosis risk: patients with few indicators of thrombosis risk benefit most from treatments against inflammation, while patients with few indicators of inflammation risk benefit most from treatments against thrombosis. This approach provides an automated methodology to discover heterogeneous and individualized effectiveness of treatments.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Dropout as a Regularizer of Interaction Effects.\n \n \n \n \n\n\n \n Lengerich, B.; Xing, E. P.; and Caruana, R.\n\n\n \n\n\n\n In Proceedings of the Twenty Fifth International Conference on Artificial Intelligence and Statistics, 2022. \n \n\n\n\n
\n\n\n\n \n \n \"Dropout paper\n  \n \n \n \"Dropout preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 5 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@InProceedings{lengerich2022dropout,\n  title={Dropout as a Regularizer of Interaction Effects},\n  author={Lengerich, Benjamin and Xing, Eric P. and Caruana, Rich},\n  journal={{Proceedings of the Twenty Fifth International Conference on Artificial Intelligence and Statistics}},\n  year={2022},\n  informal_venue = {AISTATS},\n  booktitle = {Proceedings of the Twenty Fifth International Conference on Artificial Intelligence and Statistics},\n  url_paper= {https://proceedings.mlr.press/v151/lengerich22a.html},\n  url_preprint = {https://arxiv.org/abs/2007.00823},\n  abstract = {We examine Dropout through the perspective of interactions: effects that require multiple variables. Given $N$ variables, there are ${N \\choose k}$ possible sets of $k$ variables ($N$ univariate effects, $\\mathcal{O}(N^2)$ pairwise interactions, $\\mathcal{O}(N^3)$ 3-way interactions); we can thus imagine that models with large representational capacity could be dominated by high-order interactions. In this paper, we show that Dropout contributes a regularization effect which helps neural networks (NNs) explore functions of lower-order interactions before considering functions of higher-order interactions. Dropout imposes this regularization by reducing the effective learning rate of higher-order interactions. As a result, Dropout encourages models to learn lower-order functions of additive components. This understanding of Dropout has implications for choosing Dropout rates: higher Dropout rates should be used when we need stronger regularization against interactions. This perspective also issues caution against using Dropout to measure term salience because Dropout regularizes against high-order interactions. Finally, this view of Dropout as a regularizer of interactions provides insight into the varying effectiveness of Dropout across architectures and datasets. We also compare Dropout to weight decay and early stopping and find that it is difficult to obtain the same regularization with these alternatives.},\n  keywords = {Deep Learning, Theory}\n}\n\n
\n
\n\n\n
\n We examine Dropout through the perspective of interactions: effects that require multiple variables. Given $N$ variables, there are ${N ḩoose k}$ possible sets of $k$ variables ($N$ univariate effects, $\\mathcal{O}(N^2)$ pairwise interactions, $\\mathcal{O}(N^3)$ 3-way interactions); we can thus imagine that models with large representational capacity could be dominated by high-order interactions. In this paper, we show that Dropout contributes a regularization effect which helps neural networks (NNs) explore functions of lower-order interactions before considering functions of higher-order interactions. Dropout imposes this regularization by reducing the effective learning rate of higher-order interactions. As a result, Dropout encourages models to learn lower-order functions of additive components. This understanding of Dropout has implications for choosing Dropout rates: higher Dropout rates should be used when we need stronger regularization against interactions. This perspective also issues caution against using Dropout to measure term salience because Dropout regularizes against high-order interactions. Finally, this view of Dropout as a regularizer of interactions provides insight into the varying effectiveness of Dropout across architectures and datasets. We also compare Dropout to weight decay and early stopping and find that it is difficult to obtain the same regularization with these alternatives.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Ten quick tips for deep learning in biology.\n \n \n \n \n\n\n \n Lee, B. D; Gitter, A.; Greene, C. S; Raschka, S.; Maguire, F.; Titus, A. J; Kessler, M. D; Lee, A. J; Chevrette, M. G; Stewart, P. A.; Britto-Borges, T.; Cofer, E. M. C.; Yu, K.; Carmona, J. J.; Fertig, E. J.; Kalinin, A. A.; Signal, B.; ˘nderlineLengerich, ˘.; Triche, T. J. J.; and Boca, S. M.\n\n\n \n\n\n\n PLoS computational biology, 18(3): e1009803. 2022.\n \n\n\n\n
\n\n\n\n \n \n \"Ten paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{lee2022ten,\n  title={Ten quick tips for deep learning in biology},\n  author={Lee, Benjamin D and Gitter, Anthony and Greene, Casey S and Raschka, Sebastian and Maguire, Finlay and Titus, Alexander J and Kessler, Michael D and Lee, Alexandra J and Chevrette, Marc G and Stewart, Paul Allen and Britto-Borges, Thiago and Cofer, Evan M. Cofer and Yu, Kun-Hsing and Carmona, Juan Jose and Fertig, Elana J. and Kalinin, Alexandr A. and Signal, Brandon and \\underline{Lengerich}, \\underline{Benjamin J.} and Triche, Timothy J. Jr. and Boca, Simina M.}\n  ,\n  journal={PLoS computational biology},\n  informal_venue = {PLoS CompBio},\n  volume={18},\n  number={3},\n  pages={e1009803},\n  year={2022},\n  publisher={Public Library of Science San Francisco, CA USA},\n    url_paper={https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1009803},\n    keywords={Deep Learning, Biology, Computational Genomics}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Unique insights into risk factors for antepartum stillbirth using explainable AI.\n \n \n \n \n\n\n \n Bosschieter, T.; Xu, Z.; Lan, H.; Lengerich, B.; Nori, H.; Sitcov, K.; Painter, I.; Souter, V.; and Caruana, R.\n\n\n \n\n\n\n American Journal of Obstetrics & Gynecology. 2022.\n \n\n\n\n
\n\n\n\n \n \n \"Unique paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@article{bosschieter2022smfm2,\n  title={Unique insights into risk factors for antepartum stillbirth using explainable AI},\n  author={Bosschieter, Tomas and Xu, Zifei and Lan, Hui and Lengerich, Benjamin and Nori, Harsha and Sitcov, Kristin and Painter, Ian and Souter, Vivienne and Caruana, Rich},\n  journal={American Journal of Obstetrics \\& Gynecology},\n  informal_venue = {SMFM},\n  volume={},\n  number={},\n  pages={},\n  year={2022},\n  publisher={Elsevier},\n    url_paper={https://www.ajog.org/article/S0002-9378(22)01587-3/abstract},\n    keywords={Healthcare, Pregnancy}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Understanding risk factors for shoulder dystocia using interpretable machine learning.\n \n \n \n \n\n\n \n Lan, H.; Xu, Z.; Bosschieter, T.; Lengerich, B.; Nori, H.; Sitcov, K.; Painter, I.; Souter, V.; and Caruana, R.\n\n\n \n\n\n\n American Journal of Obstetrics & Gynecology. 2022.\n \n\n\n\n
\n\n\n\n \n \n \"Understanding paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@article{lan2022smfm,\n  title={Understanding risk factors for shoulder dystocia using interpretable machine learning},\n  author={Lan, Hui and Xu, Zifei and Bosschieter, Tomas and Lengerich, Benjamin and Nori, Harsha and Sitcov, Kristin and Painter, Ian and Souter, Vivienne and Caruana, Rich},\n  journal={American Journal of Obstetrics \\& Gynecology},\n  informal_venue = {SMFM},\n  volume={},\n  number={},\n  pages={},\n  year={2022},\n  publisher={Elsevier},\n    url_paper={https://www.ajog.org/article/S0002-9378(22)02139-1/abstract},\n    keywords={Healthcare, Pregnancy}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Preterm preeclampsia prediction using intelligible machine learning.\n \n \n \n \n\n\n \n Bosschieter, T.; Xu, Z.; Lan, H.; Lengerich, B.; Nori, H.; Sitcov, K.; Painter, I.; Souter, V.; and Caruana, R.\n\n\n \n\n\n\n American Journal of Obstetrics & Gynecology. 2022.\n \n\n\n\n
\n\n\n\n \n \n \"Preterm paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@article{bosschieter2022smfm,\n  title={Preterm preeclampsia prediction using intelligible machine learning},\n  author={Bosschieter, Tomas and Xu, Zifei and Lan, Hui and Lengerich, Benjamin and Nori, Harsha and Sitcov, Kristin and Painter, Ian and Souter, Vivienne and Caruana, Rich},\n  journal={American Journal of Obstetrics \\& Gynecology},\n  informal_venue = {SMFM},\n  volume={},\n  number={},\n  pages={},\n  year={2022},\n  publisher={Elsevier},\n    url_paper={https://www.ajog.org/article/S0002-9378(22)01596-4/abstract},\n    keywords={Healthcare, Pregnancy}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Predicting severe maternal morbidity at admission for delivery using intelligible machine learning.\n \n \n \n \n\n\n \n Xu, Z.; Bosschieter, T.; Lan, H.; Lengerich, B.; Nori, H.; Sitcov, K.; Painter, I.; Souter, V.; and Caruana, R.\n\n\n \n\n\n\n American Journal of Obstetrics & Gynecology. 2022.\n \n\n\n\n
\n\n\n\n \n \n \"Predicting paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@article{xu2022smfm,\n  title={Predicting severe maternal morbidity at admission for delivery using intelligible machine learning},\n  author={Xu, Zifei and Bosschieter, Tomas and Lan, Hui and Lengerich, Benjamin and Nori, Harsha and Sitcov, Kristin and Painter, Ian and Souter, Vivienne and Caruana, Rich},\n  journal={American Journal of Obstetrics \\& Gynecology},\n  informal_venue = {SMFM},\n  volume={},\n  number={},\n  pages={},\n  year={2022},\n  publisher={Elsevier},\n    url_paper={https://www.ajog.org/article/S0002-9378(22)01588-5/abstract},\n    keywords={Healthcare, Pregnancy}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Time-Varying Mortality Risk Suggests Increased Impact of Thrombosis in Hospitalized Covid-19 Patients.\n \n \n \n \n\n\n \n Lengerich, B. J; Nunnally, M. E; Aphinyanaphongs, Y. J; and Caruana, R.\n\n\n \n\n\n\n In Machine Learning for Health, 2022. \n \n\n\n\n
\n\n\n\n \n \n \"Time-Varying preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@InProceedings{lengerich2021time,\n  title={Time-Varying Mortality Risk Suggests Increased Impact of Thrombosis in Hospitalized Covid-19 Patients},\n  author={Lengerich, Benjamin J and Nunnally, Mark E and Aphinyanaphongs, Yin J and Caruana, Rich},\n  booktitle={Machine Learning for Health},\n  year={2022},\n  url_preprint={https://www.medrxiv.org/content/10.1101/2021.12.11.21267259v1},\n  abstract={Treatment protocols, treatment availability, disease understanding, and viral characteristics have changed over the course of the Covid-19 pandemic; as a result, the risks associated with patient comorbidities and biomarkers have also changed. We add to the ongoing conversation regarding inflammation, hemostasis and vascular function in Covid-19 by performing a time-varying observational analysis of over 4000 patients hospitalized for Covid-19 in a New York City hospital system from March 2020 to August 2021 to elucidate the changing impact of thrombosis, inflammation, and other risk factors on in-hospital mortality. We find that the predictive power of biomarkers of thrombosis risk have increased over time, suggesting an opportunity for improved care by identifying and targeting therapies for patients with elevated thrombophilic propensity.},\n    keywords={Healthcare, Covid-19}\n}\n\n
\n
\n\n\n
\n Treatment protocols, treatment availability, disease understanding, and viral characteristics have changed over the course of the Covid-19 pandemic; as a result, the risks associated with patient comorbidities and biomarkers have also changed. We add to the ongoing conversation regarding inflammation, hemostasis and vascular function in Covid-19 by performing a time-varying observational analysis of over 4000 patients hospitalized for Covid-19 in a New York City hospital system from March 2020 to August 2021 to elucidate the changing impact of thrombosis, inflammation, and other risk factors on in-hospital mortality. We find that the predictive power of biomarkers of thrombosis risk have increased over time, suggesting an opportunity for improved care by identifying and targeting therapies for patients with elevated thrombophilic propensity.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2021\n \n \n (6)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Neural Additive Models: Interpretable Machine Learning with Neural Nets.\n \n \n \n \n\n\n \n Agarwal, R.; Melnick, L.; Frosst, N.; Zhang, X.; Lengerich, B.; Caruana, R.; and Hinton, G. E\n\n\n \n\n\n\n Advances in Neural Information Processing Systems, 34: 4699–4711. 2021.\n \n\n\n\n
\n\n\n\n \n \n \"Neural preprint\n  \n \n \n \"Neural paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{agarwal2022neural,\n  title={Neural Additive Models: Interpretable Machine Learning with Neural Nets},\n  author={Agarwal, Rishabh and Melnick, Levi and Frosst, Nicholas and Zhang, Xuezhou and Lengerich, Ben and Caruana, Rich and Hinton, Geoffrey E},\n  journal={Advances in Neural Information Processing Systems},\n  volume={34},\n  pages={4699--4711},\n  year={2021},\n  informal_venue = {NeurIPS},\n  url_preprint={https://arxiv.org/abs/2004.13912},\n  url_paper={https://proceedings.neurips.cc/paper/2021/hash/251bd0442dfcc53b5a761e050f8022b8-Abstract.html},\n  abstract={Deep neural networks (DNNs) are powerful black-box predictors that have achieved impressive performance on a wide variety of tasks. However, their accuracy comes at the cost of intelligibility: it is usually unclear how they make their decisions. This hinders their applicability to high stakes decision-making domains such as healthcare. We propose Neural Additive Models (NAMs) which combine some of the expressivity of DNNs with the inherent intelligibility of generalized additive models. NAMs learn a linear combination of neural networks that each attend to a single input feature. These networks are trained jointly and can learn arbitrarily complex relationships between their input feature and the output. Our experiments on regression and classification datasets show that NAMs are more accurate than widely used intelligible models such as logistic regression and shallow decision trees. They perform similarly to existing state-of-the-art generalized additive models in accuracy, but are more flexible because they are based on neural nets instead of boosted trees. To demonstrate this, we show how NAMs can be used for multitask learning on synthetic data and on the COMPAS recidivism data due to their composability, and demonstrate that the differentiability of NAMs allows them to train more complex interpretable models for COVID-19.},\n  keywords = {Interpretable, Deep Learning, Generalized Additive Models}\n}\n\n
\n
\n\n\n
\n Deep neural networks (DNNs) are powerful black-box predictors that have achieved impressive performance on a wide variety of tasks. However, their accuracy comes at the cost of intelligibility: it is usually unclear how they make their decisions. This hinders their applicability to high stakes decision-making domains such as healthcare. We propose Neural Additive Models (NAMs) which combine some of the expressivity of DNNs with the inherent intelligibility of generalized additive models. NAMs learn a linear combination of neural networks that each attend to a single input feature. These networks are trained jointly and can learn arbitrarily complex relationships between their input feature and the output. Our experiments on regression and classification datasets show that NAMs are more accurate than widely used intelligible models such as logistic regression and shallow decision trees. They perform similarly to existing state-of-the-art generalized additive models in accuracy, but are more flexible because they are based on neural nets instead of boosted trees. To demonstrate this, we show how NAMs can be used for multitask learning on synthetic data and on the COMPAS recidivism data due to their composability, and demonstrate that the differentiability of NAMs allows them to train more complex interpretable models for COVID-19.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n How Interpretable and Trustworthy are GAMs?.\n \n \n \n \n\n\n \n Chang, C.; Tan, S.; Lengerich, B.; Goldenberg, A.; and Caruana, R.\n\n\n \n\n\n\n In Proceedings of the 27th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, 2021. \n \n\n\n\n
\n\n\n\n \n \n \"How paper\n  \n \n \n \"How preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@InProceedings{chang2021how,\n  title={How Interpretable and Trustworthy are GAMs?},\n  author={Chang, Chun-Hao and Tan, Sarah and Lengerich, Ben and Goldenberg, Anna and Caruana, Rich},\n  journal={Proceedings of the 27th ACM SIGKDD International Conference on Knowledge Discovery \\& Data Mining},\n  booktitle={Proceedings of the 27th ACM SIGKDD International Conference on Knowledge Discovery \\& Data Mining},\n  year={2021},\n  informal_venue = {KDD},\n  abstract = {Generalized additive models (GAMs) have become a leading model class for interpretable machine learning. However, there are many algorithms for training GAMs, and these can learn different or even contradictory models, while being equally accurate. Which GAM should we trust? In this paper, we quantitatively and qualitatively investigate a variety of GAM algorithms on real and simulated datasets. We find that GAMs with high feature sparsity (only using a few variables to make predictions) can miss patterns in the data and be unfair to rare subpopulations. Our results suggest that inductive bias plays a crucial role in what interpretable models learn and that tree-based GAMs represent the best balance of sparsity, fidelity and accuracy and thus appear to be the most trustworthy GAM models.},\n  url_paper = {https://dl.acm.org/doi/abs/10.1145/3447548.3467453},\n  url_preprint={https://arxiv.org/abs/2006.06466},\n  keywords={Interpretable, Generalized Additive Models}\n}\n\n
\n
\n\n\n
\n Generalized additive models (GAMs) have become a leading model class for interpretable machine learning. However, there are many algorithms for training GAMs, and these can learn different or even contradictory models, while being equally accurate. Which GAM should we trust? In this paper, we quantitatively and qualitatively investigate a variety of GAM algorithms on real and simulated datasets. We find that GAMs with high feature sparsity (only using a few variables to make predictions) can miss patterns in the data and be unfair to rare subpopulations. Our results suggest that inductive bias plays a crucial role in what interpretable models learn and that tree-based GAMs represent the best balance of sparsity, fidelity and accuracy and thus appear to be the most trustworthy GAM models.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Length of labor and severe maternal morbidity in the NTSV population.\n \n \n \n \n\n\n \n Lengerich, B. J.; Caruana, R.; Weeks, W. B; Painter, I.; Spencer, S.; Sitcov, K.; Daly, C.; and Souter, V.\n\n\n \n\n\n\n American Journal of Obstetrics & Gynecology, 224(2): S33. 2021.\n \n\n\n\n
\n\n\n\n \n \n \"Length paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@article{lengerich2021length,\n  title={Length of labor and severe maternal morbidity in the NTSV population},\n  author={Lengerich, Benjamin J. and Caruana, Rich and Weeks, William B and Painter, Ian and Spencer, Sydney and Sitcov, Kristin and Daly, Colleen and Souter, Vivienne},\n  journal={American Journal of Obstetrics \\& Gynecology},\n  volume={224},\n  number={2},\n  pages={S33},\n  year={2021},\n  publisher={Elsevier},\n  url_paper={https://www.ajog.org/article/S0002-9378(20)31386-7/pdf},\n    keywords={Healthcare, Pregnancy}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Insights into severe maternal morbidity in the NTSV population.\n \n \n \n \n\n\n \n Lengerich, B. J.; Caruana, R.; Weeks, W. B; Painter, I.; Spencer, S.; Sitcov, K.; Daly, C.; and Souter, V.\n\n\n \n\n\n\n American Journal of Obstetrics & Gynecology, 224(2): S629–S630. 2021.\n \n\n\n\n
\n\n\n\n \n \n \"Insights paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@article{lengerich2021insights,\n  title={Insights into severe maternal morbidity in the NTSV population},\n  author={Lengerich, Benjamin J. and Caruana, Rich and Weeks, William B and Painter, Ian and Spencer, Sydney and Sitcov, Kristin and Daly, Colleen and Souter, Vivienne},\n  journal={American Journal of Obstetrics \\& Gynecology},\n  volume={224},\n  number={2},\n  pages={S629--S630},\n  year={2021},\n  publisher={Elsevier},\n  url_paper={https://www.ajog.org/article/S0002-9378(20)32418-2/fulltext},\n    keywords={Healthcare, Pregnancy}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Neutrophil Lymphocyte Ratio as a Determinant of Glucocorticoid Effectiveness in Covid-19 Treatment.\n \n \n \n \n\n\n \n Lengerich, B.; Caruana, R.; and Aphinyanaphongs, Y.\n\n\n \n\n\n\n MedRXiv. 2021.\n \n\n\n\n
\n\n\n\n \n \n \"Neutrophil preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@article{lengerich2021neutrophil,\n  title={Neutrophil Lymphocyte Ratio as a Determinant of Glucocorticoid Effectiveness in Covid-19 Treatment},\n  author={Lengerich, Benjamin and Caruana, Rich and Aphinyanaphongs, Yin},\n  journal={MedRXiv},\n  year={2021},\n  url_preprint = {https://www.medrxiv.org/content/10.1101/2021.06.15.21251794v1},\n  abstract = {Glucocorticoids have been shown to improve outcomes of patients with severe cases of Covid-19. However, criteria for prescribing glucocorticoids are currently limited. To identify potential for targeting, we perform an observational analysis of mortality of hospitalized patients. Our results agree with current clinical understanding that glucocorticoids benefit patients with severe cases of Covid-19, and that elevated Neutrophil/Lymphocyte Ratio (NLR) is associated with mortality. Furthermore, our results suggest that glucocorticoids could be targeted to patients with elevated NLR (especially in the range 6-25) at time of admission. Finally, we note there are also high-risk patients with low NLR, suggesting varying presentations of severe Covid-19.},\n    keywords={Healthcare, Covid-19}\n}\n\n\n
\n
\n\n\n
\n Glucocorticoids have been shown to improve outcomes of patients with severe cases of Covid-19. However, criteria for prescribing glucocorticoids are currently limited. To identify potential for targeting, we perform an observational analysis of mortality of hospitalized patients. Our results agree with current clinical understanding that glucocorticoids benefit patients with severe cases of Covid-19, and that elevated Neutrophil/Lymphocyte Ratio (NLR) is associated with mortality. Furthermore, our results suggest that glucocorticoids could be targeted to patients with elevated NLR (especially in the range 6-25) at time of admission. Finally, we note there are also high-risk patients with low NLR, suggesting varying presentations of severe Covid-19.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Data-Driven Patterns in Protective Effects of Ibuprofen and Ketorolac on Hospitalized Covid-19 Patients.\n \n \n \n \n\n\n \n Caruana, R.; Lengerich, B.; and Aphinyanaphongs, Y.\n\n\n \n\n\n\n In 2021. \n \n\n\n\n
\n\n\n\n \n \n \"Data-Driven preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@InProceedings{caruana2021data,\n  title={Data-Driven Patterns in Protective Effects of Ibuprofen and Ketorolac on Hospitalized Covid-19 Patients},\n  author={Caruana, Rich and Lengerich, Benjamin and Aphinyanaphongs, Yin},\n  journal={American Medical Informatics Association (AMIA) Annual Symposium},\n  year={2021},\n  url_preprint={https://www.medrxiv.org/content/10.1101/2021.06.15.21258991v1},\n  abstract={The impact of nonsteroidal anti-inflammatory drugs (NSAIDs) on patients with Covid-19 has been unclear. A major reason for this uncertainty is the confounding between treatments, patient comorbidities, and illness severity. Here, we perform an observational analysis of over 3000 patients hospitalized for Covid-19 in a New York hospital system to identify the relationship between in-patient treatment with Ibuprofen or Ketorolac and mortality. Our analysis finds evidence consitent with a protective effect for Ibuprofen and Ketorolac, with evidence stronger for a protective effect of Ketorolac than for a protective effect of Ibuprofen.},\n    keywords={Healthcare, Covid-19}\n}\n
\n
\n\n\n
\n The impact of nonsteroidal anti-inflammatory drugs (NSAIDs) on patients with Covid-19 has been unclear. A major reason for this uncertainty is the confounding between treatments, patient comorbidities, and illness severity. Here, we perform an observational analysis of over 3000 patients hospitalized for Covid-19 in a New York hospital system to identify the relationship between in-patient treatment with Ibuprofen or Ketorolac and mortality. Our analysis finds evidence consitent with a protective effect for Ibuprofen and Ketorolac, with evidence stronger for a protective effect of Ketorolac than for a protective effect of Ibuprofen.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2020\n \n \n (3)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Discriminative Subtyping of Lung Cancers from Histopathology Images via Contextual Deep Learning.\n \n \n \n \n\n\n \n Lengerich*, B.; Al-Shedivat*, M.; Alavi, A.; Williams, J.; Labakki, S.; and Xing, E.\n\n\n \n\n\n\n . 2020.\n \n\n\n\n
\n\n\n\n \n \n \"Discriminative preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{lengerich2020discriminative,\n  title={Discriminative Subtyping of Lung Cancers from Histopathology Images via Contextual Deep Learning},\n  author={Lengerich*, Benjamin and Al-Shedivat*, Maruan and Alavi, Amir and Williams, Jennifer and Labakki, Sami and Xing, Eric},\n  year={2020},\n  informal_venue = {arXiv},\n  url_preprint={https://www.medrxiv.org/content/10.1101/2020.06.25.20140053v2},\n  abstract={Summarizing multiple data modalities into a parsimonious cancer “subtype” is difficult because the most informative representation of each patient’s disease is not observed. We propose to model these latent summaries as discriminative subtypes: sample representations which induce accurate and interpretable sample-specific models for downstream predictions. In this way, discriminative subtypes, which are shared between data modalities, can be estimated from one data modality and optimized according to the predictions induced in another modality. We apply this approach to lung cancer by training a deep neural network to predict discriminative subtypes from histopathology images, and use these predicted subtypes to generate models which classify adenocarcinoma, squamous cell carcinoma, and healthy tissue based on transcriptomic signatures. In this way, we optimize the latent discriminative subtypes through induced prediction loss, and the discriminative subtypes are interpreted with standard interpretation of transcriptomic predictive models. Our framework achieves state-of-the-art classification accuracy (F1-score of 0.97) and identifies discriminative subtypes which link histopathology images to transcriptomic explanations without requiring pre-specification of morphological patterns or transcriptomic processes.},\n  keywords={Interpretable, Contextualized, Computational Genomics, Cancer},\n}\n\n
\n
\n\n\n
\n Summarizing multiple data modalities into a parsimonious cancer “subtype” is difficult because the most informative representation of each patient’s disease is not observed. We propose to model these latent summaries as discriminative subtypes: sample representations which induce accurate and interpretable sample-specific models for downstream predictions. In this way, discriminative subtypes, which are shared between data modalities, can be estimated from one data modality and optimized according to the predictions induced in another modality. We apply this approach to lung cancer by training a deep neural network to predict discriminative subtypes from histopathology images, and use these predicted subtypes to generate models which classify adenocarcinoma, squamous cell carcinoma, and healthy tissue based on transcriptomic signatures. In this way, we optimize the latent discriminative subtypes through induced prediction loss, and the discriminative subtypes are interpreted with standard interpretation of transcriptomic predictive models. Our framework achieves state-of-the-art classification accuracy (F1-score of 0.97) and identifies discriminative subtypes which link histopathology images to transcriptomic explanations without requiring pre-specification of morphological patterns or transcriptomic processes.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Purifying Interaction Effects with the Functional ANOVA: An Efficient Algorithm for Recovering Identifiable Additive Models.\n \n \n \n \n\n\n \n Lengerich, B.; Tan, S.; Chang, C.; Hooker, G.; and Caruana, R.\n\n\n \n\n\n\n In Chiappa, S.; and Calandra, R., editor(s), Proceedings of the Twenty Third International Conference on Artificial Intelligence and Statistics, volume 108, of Proceedings of Machine Learning Research, pages 2402–2412, 26–28 Aug 2020. PMLR\n \n\n\n\n
\n\n\n\n \n \n \"Purifying paper\n  \n \n \n \"Purifying preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@InProceedings{lengerich2020purifying,\n  title =    {Purifying Interaction Effects with the Functional ANOVA: An Efficient Algorithm for Recovering Identifiable Additive Models},\n  author =       {Lengerich, Benjamin and Tan, Sarah and Chang, Chun-Hao and Hooker, Giles and Caruana, Rich},\n  booktitle =    {Proceedings of the Twenty Third International Conference on Artificial Intelligence and Statistics},\n  pages =    {2402--2412},\n  year =     {2020},\n  editor =   {Chiappa, Silvia and Calandra, Roberto},\n  volume =   {108},\n  series =   {Proceedings of Machine Learning Research},\n  month =    {26--28 Aug},\n  publisher =    {PMLR},\n  informal_venue = {AISTATS},\n  pdf =      {http://proceedings.mlr.press/v108/lengerich20a/lengerich20a.pdf},\n  abstract =     {Models which estimate main effects of individual variables alongside interaction effects have an identifiability challenge: effects can be freely moved between main effects and interaction effects without changing the model prediction. This is a critical problem for interpretability because it permits “contradictory" models to represent the same function. To solve this problem, we propose pure interaction effects: variance in the outcome which cannot be represented by any subset of features. This definition has an equivalence with the Functional ANOVA decomposition. To compute this decomposition, we present a fast, exact algorithm that transforms any piecewise-constant function (such as a tree-based model) into a purified, canonical representation. We apply this algorithm to Generalized Additive Models with interactions trained on several datasets and show large disparity, including contradictions, between the apparent and the purified effects. These results underscore the need to specify data distributions and ensure identifiability before interpreting model parameters.},\n  url_paper =      {http://proceedings.mlr.press/v108/lengerich20a.html},\n    url_preprint={https://arxiv.org/abs/1911.04974},\n    keywords={Interpretable, Generalized Additive Models, Theory}\n}\n\n
\n
\n\n\n
\n Models which estimate main effects of individual variables alongside interaction effects have an identifiability challenge: effects can be freely moved between main effects and interaction effects without changing the model prediction. This is a critical problem for interpretability because it permits “contradictory\" models to represent the same function. To solve this problem, we propose pure interaction effects: variance in the outcome which cannot be represented by any subset of features. This definition has an equivalence with the Functional ANOVA decomposition. To compute this decomposition, we present a fast, exact algorithm that transforms any piecewise-constant function (such as a tree-based model) into a purified, canonical representation. We apply this algorithm to Generalized Additive Models with interactions trained on several datasets and show large disparity, including contradictions, between the apparent and the purified effects. These results underscore the need to specify data distributions and ensure identifiability before interpreting model parameters.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Disentangling Increased Testing from Covid-19 Epidemic Spread.\n \n \n \n \n\n\n \n Lengerich, B. J.; Neiswanger, W.; Lengerich, E. J.; and Xing, E. P.\n\n\n \n\n\n\n MedRXiv. 2020.\n \n\n\n\n
\n\n\n\n \n \n \"Disentangling preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@article{lengerich2020disentangling,\n  title={Disentangling Increased Testing from Covid-19 Epidemic Spread},\n  author={Lengerich, Benjamin J. and Neiswanger, Willie and Lengerich, Eugene J. and Xing, Eric P.},\n  journal={MedRXiv},\n  informal_venue={medrxiv},\n  year={2020},\n  abstract={To design effective disease control strategies, it is critical to understand the incidence of diseases. In the Covid-19 epidemic in the United States (caused by outbreak of the SARS-CoV-2 virus), testing capacity was initially very limited and has been increasing at the same time as the virus has been spreading. When estimating the incidence, it can be difficult to distinguish whether increased numbers of positive tests stem from increases in the spread of the virus or increases in testing. This has made it very difficult to identify locations in which the epidemic poses the largest public health risks. Here, we use a probabilistic model to quantify beliefs about testing strategies and understand implications regarding incidence. We apply this model to estimate the incidence in each state of the United States, and find that: (1) the Covid-19 epidemic is likely to be more widespread than reported by limited testing, (2) the Covid-19 epidemic growth in the summer months is likely smaller than it was during the spring months, and (3) the regions which are at highest risk of Covid-19 epidemic outbreaks are not always those with the largest number of positive test results.},\n  url_preprint={https://www.medrxiv.org/content/10.1101/2020.07.09.20141762v1},\n    keywords={Healthcare, Covid-19}\n}\n\n\n
\n
\n\n\n
\n To design effective disease control strategies, it is critical to understand the incidence of diseases. In the Covid-19 epidemic in the United States (caused by outbreak of the SARS-CoV-2 virus), testing capacity was initially very limited and has been increasing at the same time as the virus has been spreading. When estimating the incidence, it can be difficult to distinguish whether increased numbers of positive tests stem from increases in the spread of the virus or increases in testing. This has made it very difficult to identify locations in which the epidemic poses the largest public health risks. Here, we use a probabilistic model to quantify beliefs about testing strategies and understand implications regarding incidence. We apply this model to estimate the incidence in each state of the United States, and find that: (1) the Covid-19 epidemic is likely to be more widespread than reported by limited testing, (2) the Covid-19 epidemic growth in the summer months is likely smaller than it was during the spring months, and (3) the regions which are at highest risk of Covid-19 epidemic outbreaks are not always those with the largest number of positive test results.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2019\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Learning Sample-Specific Models with Low-Rank Personalized Regression.\n \n \n \n \n\n\n \n Lengerich, B. J.; Aragam, B.; and Xing, E. P.\n\n\n \n\n\n\n In Advances in Neural Information Processing Systems (NeurIPS), 2019. \n \n\n\n\n
\n\n\n\n \n \n \"Learning paper\n  \n \n \n \"Learning preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@InProceedings{lengerich2019learning,\n  title={Learning Sample-Specific Models with Low-Rank Personalized Regression},\n  author={Lengerich, Benjamin J. and Aragam, Bryon and Xing, Eric P.},\n  journal={Advances in Neural Information Processing Systems (NeurIPS)},\n  booktitle={Advances in Neural Information Processing Systems (NeurIPS)},\n  year={2019},\n  informal_venue = {NeurIPS},\n  abstract = {\n    Modern applications of machine learning (ML) deal with increasingly heterogeneous datasets comprised of data collected from overlapping latent subpopulations. As a result, traditional models trained over large datasets may fail to recognize highly predictive localized effects in favour of weakly predictive global patterns. This is a problem because localized effects are critical to developing individualized policies and treatment plans in applications ranging from precision medicine to advertising. To address this challenge, we propose to estimate sample-specific models that tailor inference and prediction at the individual level. In contrast to classical ML models that estimate a single, complex model (or only a few complex models), our approach produces a model personalized to each sample. These sample-specific models can be studied to understand subgroup dynamics that go beyond coarse-grained class labels. Crucially, our approach does not assume that relationships between samples (e.g. a similarity network) are known a priori. Instead, we use unmodeled covariates to learn a latent distance metric over the samples. We apply this approach to financial, biomedical, and electoral data as well as simulated data and show that sample-specific models provide fine-grained interpretations of complicated phenomena without sacrificing predictive accuracy compared to state-of-the-art models such as deep neural networks.\n  },\n  url_paper = {https://proceedings.neurips.cc/paper/2019/file/52d2752b150f9c35ccb6869cbf074e48-Paper.pdf},\n  url_preprint = {https://arxiv.org/abs/1910.06939},\n  keywords = {Interpretable, Contextualized}\n}\n\n
\n
\n\n\n
\n Modern applications of machine learning (ML) deal with increasingly heterogeneous datasets comprised of data collected from overlapping latent subpopulations. As a result, traditional models trained over large datasets may fail to recognize highly predictive localized effects in favour of weakly predictive global patterns. This is a problem because localized effects are critical to developing individualized policies and treatment plans in applications ranging from precision medicine to advertising. To address this challenge, we propose to estimate sample-specific models that tailor inference and prediction at the individual level. In contrast to classical ML models that estimate a single, complex model (or only a few complex models), our approach produces a model personalized to each sample. These sample-specific models can be studied to understand subgroup dynamics that go beyond coarse-grained class labels. Crucially, our approach does not assume that relationships between samples (e.g. a similarity network) are known a priori. Instead, we use unmodeled covariates to learn a latent distance metric over the samples. We apply this approach to financial, biomedical, and electoral data as well as simulated data and show that sample-specific models provide fine-grained interpretations of complicated phenomena without sacrificing predictive accuracy compared to state-of-the-art models such as deep neural networks. \n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2018\n \n \n (4)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Precision Lasso: Accounting for Correlations and Linear Dependencies in High-Dimensional Genomic Data.\n \n \n \n \n\n\n \n Wang, H.; Lengerich, B. J.; Aragam, B.; and Xing, E. P\n\n\n \n\n\n\n Bioinformatics, 35(7): 1181–1187. 2018.\n \n\n\n\n
\n\n\n\n \n \n \"Precision paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@article{wang2018precision,\n  title={Precision Lasso: Accounting for Correlations and Linear Dependencies in High-Dimensional Genomic Data},\n  author={Wang, Haohan and Lengerich, Benjamin J. and Aragam, Bryon and Xing, Eric P},\n  journal={Bioinformatics},\n  volume={35},\n  number={7},\n  pages={1181--1187},\n  year={2018},\n  informal_venue = {Bioinformatics},\n  publisher={Oxford University Press},\n  abstract = {Association studies to discover links between genetic markers and phenotypes are central to bioinformatics. Methods of regularized regression, such as variants of the Lasso, are popular for this task. Despite the good predictive performance of these methods in the average case, they suffer from unstable selections of correlated variables and inconsistent selections of linearly dependent variables. Unfortunately, as we demonstrate empirically, such problematic situations of correlated and linearly dependent variables often exist in genomic datasets and lead to under-performance of classical methods of variable selection. To address these challenges, we propose the Precision Lasso. Precision Lasso is a Lasso variant that promotes sparse variable selection by regularization governed by the covariance and inverse covariance matrices of explanatory variables. We illustrate its capacity for stable and consistent variable selection in simulated data with highly correlated and linearly dependent variables. We then demonstrate the effectiveness of the Precision Lasso to select meaningful variables from transcriptomic profiles of breast cancer patients. Our results indicate that in settings with correlated and linearly dependent variables, the Precision Lasso outperforms popular methods of variable selection such as the Lasso, the Elastic Net and Minimax Concave Penalty (MCP) regression.},\n  url_paper = {https://academic.oup.com/bioinformatics/article/35/7/1181/5089232?login=true},\n  keywords={Statistical Genetics, Genomics}\n}\n\n
\n
\n\n\n
\n Association studies to discover links between genetic markers and phenotypes are central to bioinformatics. Methods of regularized regression, such as variants of the Lasso, are popular for this task. Despite the good predictive performance of these methods in the average case, they suffer from unstable selections of correlated variables and inconsistent selections of linearly dependent variables. Unfortunately, as we demonstrate empirically, such problematic situations of correlated and linearly dependent variables often exist in genomic datasets and lead to under-performance of classical methods of variable selection. To address these challenges, we propose the Precision Lasso. Precision Lasso is a Lasso variant that promotes sparse variable selection by regularization governed by the covariance and inverse covariance matrices of explanatory variables. We illustrate its capacity for stable and consistent variable selection in simulated data with highly correlated and linearly dependent variables. We then demonstrate the effectiveness of the Precision Lasso to select meaningful variables from transcriptomic profiles of breast cancer patients. Our results indicate that in settings with correlated and linearly dependent variables, the Precision Lasso outperforms popular methods of variable selection such as the Lasso, the Elastic Net and Minimax Concave Penalty (MCP) regression.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Retrofitting Distributional Embeddings to Knowledge Graphs with Functional Relations.\n \n \n \n \n\n\n \n Lengerich, B. J.; Maas, A.; and Potts, C.\n\n\n \n\n\n\n In Proceedings of the 27th International Conference on Computational Linguistics (COLING), 2018. \n \n\n\n\n
\n\n\n\n \n \n \"Retrofitting paper\n  \n \n \n \"Retrofitting preprint\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@InProceedings{lengerich2018retrofitting,\n    title={Retrofitting Distributional Embeddings to Knowledge Graphs with Functional Relations},\n    author={Lengerich, Benjamin J. and Maas, Andrew and Potts, Christopher},\n    journal={Proceedings of the 27th International Conference on Computational Linguistics (COLING)},\n    booktitle={Proceedings of the 27th International Conference on Computational Linguistics (COLING)},\n    year={2018},\n    informal_venue = {COLING},\n    abstract = {Knowledge graphs are a versatile framework to encode richly structured data relationships, but it can be challenging to combine these graphs with unstructured data. Methods for retrofitting pre-trained entity representations to the structure of a knowledge graph typically assume that entities are embedded in a connected space and that relations imply similarity. However, useful knowledge graphs often contain diverse entities and relations (with potentially disjoint underlying corpora) which do not accord with these assumptions. To overcome these limitations, we present Functional Retrofitting, a framework that generalizes current retrofitting methods by explicitly modeling pairwise relations. Our framework can directly incorporate a variety of pairwise penalty functions previously developed for knowledge graph completion. Further, it allows users to encode, learn, and extract information about relation semantics. We present both linear and neural instantiations of the framework. Functional Retrofitting significantly outperforms existing retrofitting methods on complex knowledge graphs and loses no accuracy on simpler graphs (in which relations do imply similarity). Finally, we demonstrate the utility of the framework by predicting new drug--disease treatment pairs in a large, complex health knowledge graph.},\n    url_paper = {https://aclanthology.org/C18-1205/},\n    url_preprint = {https://arxiv.org/abs/1708.00112},\n    keywords = {Natural Language Processing, Knowledge Graphs}\n}\n\n
\n
\n\n\n
\n Knowledge graphs are a versatile framework to encode richly structured data relationships, but it can be challenging to combine these graphs with unstructured data. Methods for retrofitting pre-trained entity representations to the structure of a knowledge graph typically assume that entities are embedded in a connected space and that relations imply similarity. However, useful knowledge graphs often contain diverse entities and relations (with potentially disjoint underlying corpora) which do not accord with these assumptions. To overcome these limitations, we present Functional Retrofitting, a framework that generalizes current retrofitting methods by explicitly modeling pairwise relations. Our framework can directly incorporate a variety of pairwise penalty functions previously developed for knowledge graph completion. Further, it allows users to encode, learn, and extract information about relation semantics. We present both linear and neural instantiations of the framework. Functional Retrofitting significantly outperforms existing retrofitting methods on complex knowledge graphs and loses no accuracy on simpler graphs (in which relations do imply similarity). Finally, we demonstrate the utility of the framework by predicting new drug–disease treatment pairs in a large, complex health knowledge graph.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Personalized Regression Enables Sample-specific Pan-cancer Analysis.\n \n \n \n \n\n\n \n Lengerich, B. J.; Aragam, B.; and Xing, E. P\n\n\n \n\n\n\n Bioinformatics, 34(13): i178-i186. 2018.\n \n\n\n\n
\n\n\n\n \n \n \"PersonalizedPaper\n  \n \n \n \"Personalized paper\n  \n \n \n \"Personalized preprint\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{lengerich2018personalized,\n  author = {Lengerich, Benjamin J. and Aragam, Bryon and Xing, Eric P},\n  title = {Personalized Regression Enables Sample-specific Pan-cancer Analysis},\n  journal = {Bioinformatics},\n  volume = {34},\n  number = {13},\n  pages = {i178-i186},\n  year = {2018},\n  informal_venue = {ISMB},\n  doi = {10.1093/bioinformatics/bty250},\n  URL = {http://dx.doi.org/10.1093/bioinformatics/bty250},\n  eprint = {/oup/backfile/content_public/journal/bioinformatics/34/13/10.1093_bioinformatics_bty250/1/bty250.pdf},\n  url_paper = {http://dx.doi.org/10.1093/bioinformatics/bty250},\n  abstract = {In many applications, inter-sample heterogeneity is crucial to understanding the complex biological processes under study. For example, in genomic analysis of cancers, each patient in a cohort may have a different driver mutation, making it difficult or impossible to identify causal mutations from an averaged view of the entire cohort. Unfortunately, many traditional methods for genomic analysis seek to estimate a single model which is shared by all samples in a population, ignoring this inter-sample heterogeneity entirely. In order to better understand patient heterogeneity, it is necessary to develop practical, personalized statistical models.\n  To uncover this inter-sample heterogeneity, we propose a novel regularizer for achieving patient-specific personalized estimation. This regularizer operates by learning two latent distance metrics—one between personalized parameters and one between clinical covariates—and attempting to match the induced distances as closely as possible. Crucially, we do not assume these distance metrics are already known. Instead, we allow the data to dictate the structure of these latent distance metrics. Finally, we apply our method to learn patient-specific, interpretable models for a pan-cancer gene expression dataset containing samples from more than 30 distinct cancer types and find strong evidence of personalization effects between cancer types as well as between individuals. Our analysis uncovers sample-specific aberrations that are overlooked by population-level methods, suggesting a promising new path for precision analysis of complex diseases such as cancer.},\n    url_preprint={https://www.biorxiv.org/content/early/2018/04/05/294496?},\n    keywords = {Interpretable, Contextualized, Statistical Genetics, Genomics, Cancer}\n}\n\n
\n
\n\n\n
\n In many applications, inter-sample heterogeneity is crucial to understanding the complex biological processes under study. For example, in genomic analysis of cancers, each patient in a cohort may have a different driver mutation, making it difficult or impossible to identify causal mutations from an averaged view of the entire cohort. Unfortunately, many traditional methods for genomic analysis seek to estimate a single model which is shared by all samples in a population, ignoring this inter-sample heterogeneity entirely. In order to better understand patient heterogeneity, it is necessary to develop practical, personalized statistical models. To uncover this inter-sample heterogeneity, we propose a novel regularizer for achieving patient-specific personalized estimation. This regularizer operates by learning two latent distance metrics—one between personalized parameters and one between clinical covariates—and attempting to match the induced distances as closely as possible. Crucially, we do not assume these distance metrics are already known. Instead, we allow the data to dictate the structure of these latent distance metrics. Finally, we apply our method to learn patient-specific, interpretable models for a pan-cancer gene expression dataset containing samples from more than 30 distinct cancer types and find strong evidence of personalization effects between cancer types as well as between individuals. Our analysis uncovers sample-specific aberrations that are overlooked by population-level methods, suggesting a promising new path for precision analysis of complex diseases such as cancer.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Opportunities and Obstacles for Deep Learning in Biology and Medicine.\n \n \n \n \n\n\n \n Ching, T.; Himmelstein, D. S.; Beaulieu-Jones, B. K.; Kalinin, A. A.; Do, B. T.; Way, G. P.; Ferrero, E.; Agapow, P.; Zietz, M.; Hoffman, M. M.; Xie, W.; Rosen, G. L.; ˘nderlineLengerich, ˘.; Israeli, J.; Lanchantin, J.; Woloszynek, S.; Carpenter, A. E.; Shrikumar, A.; Xu, J.; Cofer, E. M.; Lavender, C. A.; Turaga, S. C.; Alexandari, A. M.; Lu, Z.; Harris, D. J.; DeCaprio, D.; Qi, Y.; Kundaje, A.; Peng, Y.; Wiley, L. K.; Segler, M. H. S.; Boca, S. M.; Swamidass, S. J.; Huang, A.; Gitter, A.; and Greene, C. S.\n\n\n \n\n\n\n Journal of The Royal Society Interface, 15(141). 2018.\n \n\n\n\n
\n\n\n\n \n \n \"OpportunitiesPaper\n  \n \n \n \"Opportunities paper\n  \n \n \n \"Opportunities preprint\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{ching2018opportunities,\n    author = {Ching, Travers and Himmelstein, Daniel S. and Beaulieu-Jones, Brett K. and Kalinin, Alexandr A. and Do, Brian T. and Way, Gregory P. and Ferrero, Enrico and Agapow, Paul-Michael and Zietz, Michael and Hoffman, Michael M. and Xie, Wei and Rosen, Gail L. and \\underline{Lengerich}, \\underline{Benjamin J.} and Israeli, Johnny and Lanchantin, Jack and Woloszynek, Stephen and Carpenter, Anne E. and Shrikumar, Avanti and Xu, Jinbo and Cofer, Evan M. and Lavender, Christopher A. and Turaga, Srinivas C. and Alexandari, Amr M. and Lu, Zhiyong and Harris, David J. and DeCaprio, Dave and Qi, Yanjun and Kundaje, Anshul and Peng, Yifan and Wiley, Laura K. and Segler, Marwin H. S. and Boca, Simina M. and Swamidass, S. Joshua and Huang, Austin and Gitter, Anthony and Greene, Casey S.},\n    title = {Opportunities and Obstacles for Deep Learning in Biology and Medicine},\n    volume = {15},\n    number = {141},\n    year = {2018},\n    doi = {10.1098/rsif.2017.0387},\n    publisher = {The Royal Society},\n    informal_venue = {JRSI},\n    abstract = {Deep learning describes a class of machine learning algorithms that are capable of combining raw inputs into layers of intermediate features. These algorithms have recently shown impressive results across a variety of domains. Biology and medicine are data-rich disciplines, but the data are complex and often ill-understood. Hence, deep learning techniques may be particularly well suited to solve problems of these fields. We examine applications of deep learning to a variety of biomedical problems{\\textemdash}patient classification, fundamental biological processes and treatment of patients{\\textemdash}and discuss whether deep learning will be able to transform these tasks or if the biomedical sphere poses unique challenges. Following from an extensive literature review, we find that deep learning has yet to revolutionize biomedicine or definitively resolve any of the most pressing challenges in the field, but promising advances have been made on the prior state of the art. Even though improvements over previous baselines have been modest in general, the recent progress indicates that deep learning methods will provide valuable means for speeding up or aiding human investigation. Though progress has been made linking a specific neural network{\\textquoteright}s prediction to input features, understanding how users should interpret these models to make testable hypotheses about the system under study remains an open challenge. Furthermore, the limited amount of labelled data for training presents problems in some domains, as do legal and privacy constraints on work with sensitive health records. Nonetheless, we foresee deep learning enabling changes at both bench and bedside with the potential to transform several areas of biology and medicine.},\n    issn = {1742-5689},\n    URL = {http://rsif.royalsocietypublishing.org/content/15/141/20170387},\n    eprint = {http://rsif.royalsocietypublishing.org/content/15/141/20170387.full.pdf},\n    journal = {Journal of The Royal Society Interface},\n    url_paper = {http://rsif.royalsocietypublishing.org/content/15/141/20170387.full.pdf},\n    url_preprint = {http://www.biorxiv.org/content/biorxiv/early/2017/05/28/142760.full.pdf},\n    keywords = {Deep Learning, Biology, Computational Genomics}\n}\n\n
\n
\n\n\n
\n Deep learning describes a class of machine learning algorithms that are capable of combining raw inputs into layers of intermediate features. These algorithms have recently shown impressive results across a variety of domains. Biology and medicine are data-rich disciplines, but the data are complex and often ill-understood. Hence, deep learning techniques may be particularly well suited to solve problems of these fields. We examine applications of deep learning to a variety of biomedical problems—patient classification, fundamental biological processes and treatment of patients—and discuss whether deep learning will be able to transform these tasks or if the biomedical sphere poses unique challenges. Following from an extensive literature review, we find that deep learning has yet to revolutionize biomedicine or definitively resolve any of the most pressing challenges in the field, but promising advances have been made on the prior state of the art. Even though improvements over previous baselines have been modest in general, the recent progress indicates that deep learning methods will provide valuable means for speeding up or aiding human investigation. Though progress has been made linking a specific neural network\\textquoterights prediction to input features, understanding how users should interpret these models to make testable hypotheses about the system under study remains an open challenge. Furthermore, the limited amount of labelled data for training presents problems in some domains, as do legal and privacy constraints on work with sensitive health records. Nonetheless, we foresee deep learning enabling changes at both bench and bedside with the potential to transform several areas of biology and medicine.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2014\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Experimental and Computational Mutagenesis to Investigate the Positioning of a General Base Within an Enzyme Active Site.\n \n \n \n \n\n\n \n Schwans, J. P; Hanoian, P.; ˘nderlineLengerich, ˘.; Sunden, F.; Gonzalez, A.; Tsai, Y.; Hammes-Schiffer, S.; and Herschlag, D.\n\n\n \n\n\n\n Biochemistry, 53(15): 2541–2555. 2014.\n \n\n\n\n
\n\n\n\n \n \n \"Experimental paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@article{schwans2014experimental,\n  title={Experimental and Computational Mutagenesis to Investigate the Positioning of a General Base Within an Enzyme Active Site},\n  author={Schwans, Jason P and Hanoian, Philip and \\underline{Lengerich}, \\underline{Benjamin J.} and Sunden, Fanny and Gonzalez, Ana and Tsai, Yingssu and Hammes-Schiffer, Sharon and Herschlag, Daniel},\n  journal={Biochemistry},\n  volume={53},\n  number={15},\n  pages={2541--2555},\n  year={2014},\n  informal_venue = {Biochemistry},\n  publisher={American Chemical Society},\n  abstract = {The positioning of catalytic groups within proteins plays an important role in enzyme catalysis, and here we investigate the positioning of the general base in the enzyme ketosteroid isomerase (KSI). The oxygen atoms of Asp38, the general base in KSI, were previously shown to be involved in anion–aromatic interactions with two neighboring Phe residues. Here we ask whether those interactions are sufficient, within the overall protein architecture, to position Asp38 for catalysis or whether the side chains that pack against Asp38 and/or the residues of the structured loop that is capped by Asp38 are necessary to achieve optimal positioning for catalysis. To test positioning, we mutated each of the aforementioned residues, alone and in combinations, in a background with the native Asp general base and in a D38E mutant background, as Glu at position 38 was previously shown to be mispositioned for general base catalysis. These double-mutant cycles reveal positioning effects as large as 103-fold, indicating that structural features in addition to the overall protein architecture and the Phe residues neighboring the carboxylate oxygen atoms play roles in positioning. X-ray crystallography and molecular dynamics simulations suggest that the functional effects arise from both restricting dynamic fluctuations and disfavoring potential mispositioned states. Whereas it may have been anticipated that multiple interactions would be necessary for optimal general base positioning, the energetic contributions from positioning and the nonadditive nature of these interactions are not revealed by structural inspection and require functional dissection. Recognizing the extent, type, and energetic interconnectivity of interactions that contribute to positioning catalytic groups has implications for enzyme evolution and may help reveal the nature and extent of interactions required to design enzymes that rival those found in biology.},\n  url_paper = {https://pubs.acs.org/doi/pdf/10.1021/bi401671t},\n  keywords = {Molecular Dynamics, Computational Chemistry}\n}\n\n
\n
\n\n\n
\n The positioning of catalytic groups within proteins plays an important role in enzyme catalysis, and here we investigate the positioning of the general base in the enzyme ketosteroid isomerase (KSI). The oxygen atoms of Asp38, the general base in KSI, were previously shown to be involved in anion–aromatic interactions with two neighboring Phe residues. Here we ask whether those interactions are sufficient, within the overall protein architecture, to position Asp38 for catalysis or whether the side chains that pack against Asp38 and/or the residues of the structured loop that is capped by Asp38 are necessary to achieve optimal positioning for catalysis. To test positioning, we mutated each of the aforementioned residues, alone and in combinations, in a background with the native Asp general base and in a D38E mutant background, as Glu at position 38 was previously shown to be mispositioned for general base catalysis. These double-mutant cycles reveal positioning effects as large as 103-fold, indicating that structural features in addition to the overall protein architecture and the Phe residues neighboring the carboxylate oxygen atoms play roles in positioning. X-ray crystallography and molecular dynamics simulations suggest that the functional effects arise from both restricting dynamic fluctuations and disfavoring potential mispositioned states. Whereas it may have been anticipated that multiple interactions would be necessary for optimal general base positioning, the energetic contributions from positioning and the nonadditive nature of these interactions are not revealed by structural inspection and require functional dissection. Recognizing the extent, type, and energetic interconnectivity of interactions that contribute to positioning catalytic groups has implications for enzyme evolution and may help reveal the nature and extent of interactions required to design enzymes that rival those found in biology.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n\n\n\n
\n\n\n \n\n \n \n \n \n\n
\n"}; document.write(bibbase_data.data);