doi abstract bibtex

To many, the foundations of statistical inference are cryptic and irrelevant to routine statistical practice. The analysis of 2 x 2 contingency tables, omnipresent in the scientific literature, is a case in point. Fisher's exact test is routinely used even though it has been fraught with controversy for over 70 years. The problem, not widely acknowledged, is that several different p-values can be associated with a single table, making scientific inference inconsistent. The root cause of this controversy lies in the table's origins and the manner in which nuisance parameters are eliminated. However, fundamental statistical principles (e.g., sufficiency, ancillarity, conditionality, and likelihood) can shed light on the controversy and guide our approach in using this test. In this paper, we use these fundamental principles to show how much information is lost when the tables origins are ignored and when various approaches are used to eliminate unknown nuisance parameters. We present novel likelihood contours to aid in the visualization of information loss and show that the information loss is often virtually non-existent. We find that problems arising from the discreteness of the sample space are exacerbated by p-value-based inference. Accordingly, methods that are less sensitive to this discreteness - likelihood ratios, posterior probabilities and mid-p-values - lead to more consistent inferences.

@article{cho15elu, title = {Elucidating the {{Foundations}} of {{Statistical Inference}} with 2 x 2 {{Tables}}}, volume = {10}, abstract = {To many, the foundations of statistical inference are cryptic and irrelevant to routine statistical practice. The analysis of 2 x 2 contingency tables, omnipresent in the scientific literature, is a case in point. Fisher's exact test is routinely used even though it has been fraught with controversy for over 70 years. The problem, not widely acknowledged, is that several different p-values can be associated with a single table, making scientific inference inconsistent. The root cause of this controversy lies in the table's origins and the manner in which nuisance parameters are eliminated. However, fundamental statistical principles (e.g., sufficiency, ancillarity, conditionality, and likelihood) can shed light on the controversy and guide our approach in using this test. In this paper, we use these fundamental principles to show how much information is lost when the tables origins are ignored and when various approaches are used to eliminate unknown nuisance parameters. We present novel likelihood contours to aid in the visualization of information loss and show that the information loss is often virtually non-existent. We find that problems arising from the discreteness of the sample space are exacerbated by p-value-based inference. Accordingly, methods that are less sensitive to this discreteness - likelihood ratios, posterior probabilities and mid-p-values - lead to more consistent inferences.}, number = {4}, journal = {PLoS ONE}, doi = {10.1371/journal.pone.0121263}, author = {Choi, Leena and Blume, Jeffrey D. and Dupont, William D.}, month = apr, year = {2015}, keywords = {ctsafac}, pages = {e0121263+}, publisher = {Public Library of Science}, citeulike-article-id = {14033215}, citeulike-linkout-0 = {http://dx.doi.org/10.1371/journal.pone.0121263}, day = {7}, posted-at = {2016-05-11 12:50:32}, priority = {2} }

Downloads: 0